From 33a5f6261a61af28f7b4c86f9f958da0f206c914 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Thu, 11 Oct 2012 17:52:56 +0200
Subject: nohz: Add API to check tick state

We need some quick way to check if the CPU has stopped
its tick. This will be useful to implement the printk tick
using the irq work subsystem.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
---
 include/linux/tick.h | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/tick.h b/include/linux/tick.h
index f37fceb69b73..2307dd31b966 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -8,6 +8,8 @@
 
 #include <linux/clockchips.h>
 #include <linux/irqflags.h>
+#include <linux/percpu.h>
+#include <linux/hrtimer.h>
 
 #ifdef CONFIG_GENERIC_CLOCKEVENTS
 
@@ -122,13 +124,26 @@ static inline int tick_oneshot_mode_active(void) { return 0; }
 #endif /* !CONFIG_GENERIC_CLOCKEVENTS */
 
 # ifdef CONFIG_NO_HZ
+DECLARE_PER_CPU(struct tick_sched, tick_cpu_sched);
+
+static inline int tick_nohz_tick_stopped(void)
+{
+	return __this_cpu_read(tick_cpu_sched.tick_stopped);
+}
+
 extern void tick_nohz_idle_enter(void);
 extern void tick_nohz_idle_exit(void);
 extern void tick_nohz_irq_exit(void);
 extern ktime_t tick_nohz_get_sleep_length(void);
 extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time);
 extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time);
-# else
+
+# else /* !CONFIG_NO_HZ */
+static inline int tick_nohz_tick_stopped(void)
+{
+	return 0;
+}
+
 static inline void tick_nohz_idle_enter(void) { }
 static inline void tick_nohz_idle_exit(void) { }
 
-- 
cgit v1.2.3


From 00b42959106a9ca1c2899e591ae4e9a83ad6af05 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Wed, 7 Nov 2012 21:03:07 +0100
Subject: irq_work: Don't stop the tick with pending works

Don't stop the tick if we have pending irq works on the
queue, otherwise if the arch can't raise self-IPIs, we may not
find an opportunity to execute the pending works for a while.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
---
 include/linux/irq_work.h |  6 ++++++
 kernel/irq_work.c        | 11 +++++++++++
 kernel/time/tick-sched.c |  3 ++-
 3 files changed, 19 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h
index 6a9e8f5399e2..a69704f37204 100644
--- a/include/linux/irq_work.h
+++ b/include/linux/irq_work.h
@@ -20,4 +20,10 @@ bool irq_work_queue(struct irq_work *work);
 void irq_work_run(void);
 void irq_work_sync(struct irq_work *work);
 
+#ifdef CONFIG_IRQ_WORK
+bool irq_work_needs_cpu(void);
+#else
+static bool irq_work_needs_cpu(void) { return false; }
+#endif
+
 #endif /* _LINUX_IRQ_WORK_H */
diff --git a/kernel/irq_work.c b/kernel/irq_work.c
index 64eddd59ed83..b3c113a14727 100644
--- a/kernel/irq_work.c
+++ b/kernel/irq_work.c
@@ -99,6 +99,17 @@ bool irq_work_queue(struct irq_work *work)
 }
 EXPORT_SYMBOL_GPL(irq_work_queue);
 
+bool irq_work_needs_cpu(void)
+{
+	struct llist_head *this_list;
+
+	this_list = &__get_cpu_var(irq_work_list);
+	if (llist_empty(this_list))
+		return false;
+
+	return true;
+}
+
 /*
  * Run the irq_work entries on this cpu. Requires to be ran from hardirq
  * context with local IRQs disabled.
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 9e945aa090ac..f249e8c3e58e 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -20,6 +20,7 @@
 #include <linux/profile.h>
 #include <linux/sched.h>
 #include <linux/module.h>
+#include <linux/irq_work.h>
 
 #include <asm/irq_regs.h>
 
@@ -289,7 +290,7 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
 	} while (read_seqretry(&xtime_lock, seq));
 
 	if (rcu_needs_cpu(cpu, &rcu_delta_jiffies) || printk_needs_cpu(cpu) ||
-	    arch_needs_cpu(cpu)) {
+	    arch_needs_cpu(cpu) || irq_work_needs_cpu()) {
 		next_jiffies = last_jiffies + 1;
 		delta_jiffies = 1;
 	} else {
-- 
cgit v1.2.3


From bc6679aef673f9dcb8f718528fc3df49ff661af9 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Fri, 19 Oct 2012 16:43:41 -0400
Subject: irq_work: Make self-IPIs optable

On irq work initialization, let the user choose to define it
as "lazy" or not. "Lazy" means that we don't want to send
an IPI (provided the arch can anyway) when we enqueue this
work but we rather prefer to wait for the next timer tick
to execute our work if possible.

This is going to be a benefit for non-urgent enqueuers
(like printk in the future) that may prefer not to raise
an IPI storm in case of frequent enqueuing on short periods
of time.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
---
 include/linux/irq_work.h | 14 ++++++++++++++
 kernel/irq_work.c        | 47 +++++++++++++++++++++++++++--------------------
 2 files changed, 41 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h
index a69704f37204..b28eb60c8bf6 100644
--- a/include/linux/irq_work.h
+++ b/include/linux/irq_work.h
@@ -3,6 +3,20 @@
 
 #include <linux/llist.h>
 
+/*
+ * An entry can be in one of four states:
+ *
+ * free	     NULL, 0 -> {claimed}       : free to be used
+ * claimed   NULL, 3 -> {pending}       : claimed to be enqueued
+ * pending   next, 3 -> {busy}          : queued, pending callback
+ * busy      NULL, 2 -> {free, claimed} : callback in progress, can be claimed
+ */
+
+#define IRQ_WORK_PENDING	1UL
+#define IRQ_WORK_BUSY		2UL
+#define IRQ_WORK_FLAGS		3UL
+#define IRQ_WORK_LAZY		4UL /* Doesn't want IPI, wait for tick */
+
 struct irq_work {
 	unsigned long flags;
 	struct llist_node llnode;
diff --git a/kernel/irq_work.c b/kernel/irq_work.c
index 480f74715ba9..7f3a59bc8e3d 100644
--- a/kernel/irq_work.c
+++ b/kernel/irq_work.c
@@ -12,24 +12,15 @@
 #include <linux/percpu.h>
 #include <linux/hardirq.h>
 #include <linux/irqflags.h>
+#include <linux/sched.h>
+#include <linux/tick.h>
 #include <linux/cpu.h>
 #include <linux/notifier.h>
 #include <asm/processor.h>
 
-/*
- * An entry can be in one of four states:
- *
- * free	     NULL, 0 -> {claimed}       : free to be used
- * claimed   NULL, 3 -> {pending}       : claimed to be enqueued
- * pending   next, 3 -> {busy}          : queued, pending callback
- * busy      NULL, 2 -> {free, claimed} : callback in progress, can be claimed
- */
-
-#define IRQ_WORK_PENDING	1UL
-#define IRQ_WORK_BUSY		2UL
-#define IRQ_WORK_FLAGS		3UL
 
 static DEFINE_PER_CPU(struct llist_head, irq_work_list);
+static DEFINE_PER_CPU(int, irq_work_raised);
 
 /*
  * Claim the entry so that no one else will poke at it.
@@ -69,14 +60,19 @@ void __weak arch_irq_work_raise(void)
  */
 static void __irq_work_queue(struct irq_work *work)
 {
-	bool empty;
-
 	preempt_disable();
 
-	empty = llist_add(&work->llnode, &__get_cpu_var(irq_work_list));
-	/* The list was empty, raise self-interrupt to start processing. */
-	if (empty)
-		arch_irq_work_raise();
+	llist_add(&work->llnode, &__get_cpu_var(irq_work_list));
+
+	/*
+	 * If the work is not "lazy" or the tick is stopped, raise the irq
+	 * work interrupt (if supported by the arch), otherwise, just wait
+	 * for the next tick.
+	 */
+	if (!(work->flags & IRQ_WORK_LAZY) || tick_nohz_tick_stopped()) {
+		if (!this_cpu_cmpxchg(irq_work_raised, 0, 1))
+			arch_irq_work_raise();
+	}
 
 	preempt_enable();
 }
@@ -117,10 +113,19 @@ bool irq_work_needs_cpu(void)
 
 static void __irq_work_run(void)
 {
+	unsigned long flags;
 	struct irq_work *work;
 	struct llist_head *this_list;
 	struct llist_node *llnode;
 
+
+	/*
+	 * Reset the "raised" state right before we check the list because
+	 * an NMI may enqueue after we find the list empty from the runner.
+	 */
+	__this_cpu_write(irq_work_raised, 0);
+	barrier();
+
 	this_list = &__get_cpu_var(irq_work_list);
 	if (llist_empty(this_list))
 		return;
@@ -140,13 +145,15 @@ static void __irq_work_run(void)
 		 * to claim that work don't rely on us to handle their data
 		 * while we are in the middle of the func.
 		 */
-		xchg(&work->flags, IRQ_WORK_BUSY);
+		flags = work->flags & ~IRQ_WORK_PENDING;
+		xchg(&work->flags, flags);
+
 		work->func(work);
 		/*
 		 * Clear the BUSY bit and return to the free state if
 		 * no-one else claimed it meanwhile.
 		 */
-		(void)cmpxchg(&work->flags, IRQ_WORK_BUSY, 0);
+		(void)cmpxchg(&work->flags, flags, flags & ~IRQ_WORK_BUSY);
 	}
 }
 
-- 
cgit v1.2.3


From 74876a98a87a115254b3a66a14b27320b7f0acaa Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Fri, 12 Oct 2012 18:00:23 +0200
Subject: printk: Wake up klogd using irq_work

klogd is woken up asynchronously from the tick in order
to do it safely.

However if printk is called when the tick is stopped, the reader
won't be woken up until the next interrupt, which might not fire
for a while. As a result, the user may miss some message.

To fix this, lets implement the printk tick using a lazy irq work.
This subsystem takes care of the timer tick state and can
fix up accordingly.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
---
 include/linux/printk.h   |  3 ---
 init/Kconfig             |  1 +
 kernel/printk.c          | 36 ++++++++++++++++++++----------------
 kernel/time/tick-sched.c |  2 +-
 kernel/timer.c           |  1 -
 5 files changed, 22 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/printk.h b/include/linux/printk.h
index 9afc01e5a0a6..86c4b6294713 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -98,9 +98,6 @@ int no_printk(const char *fmt, ...)
 extern asmlinkage __printf(1, 2)
 void early_printk(const char *fmt, ...);
 
-extern int printk_needs_cpu(int cpu);
-extern void printk_tick(void);
-
 #ifdef CONFIG_PRINTK
 asmlinkage __printf(5, 0)
 int vprintk_emit(int facility, int level,
diff --git a/init/Kconfig b/init/Kconfig
index cdc152c75727..c575566be47d 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1196,6 +1196,7 @@ config HOTPLUG
 config PRINTK
 	default y
 	bool "Enable support for printk" if EXPERT
+	select IRQ_WORK
 	help
 	  This option enables normal printk support. Removing it
 	  eliminates most of the message strings from the kernel image
diff --git a/kernel/printk.c b/kernel/printk.c
index 2d607f4d1797..c9104feba5ec 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -42,6 +42,7 @@
 #include <linux/notifier.h>
 #include <linux/rculist.h>
 #include <linux/poll.h>
+#include <linux/irq_work.h>
 
 #include <asm/uaccess.h>
 
@@ -1955,30 +1956,32 @@ int is_console_locked(void)
 static DEFINE_PER_CPU(int, printk_pending);
 static DEFINE_PER_CPU(char [PRINTK_BUF_SIZE], printk_sched_buf);
 
-void printk_tick(void)
+static void wake_up_klogd_work_func(struct irq_work *irq_work)
 {
-	if (__this_cpu_read(printk_pending)) {
-		int pending = __this_cpu_xchg(printk_pending, 0);
-		if (pending & PRINTK_PENDING_SCHED) {
-			char *buf = __get_cpu_var(printk_sched_buf);
-			printk(KERN_WARNING "[sched_delayed] %s", buf);
-		}
-		if (pending & PRINTK_PENDING_WAKEUP)
-			wake_up_interruptible(&log_wait);
+	int pending = __this_cpu_xchg(printk_pending, 0);
+
+	if (pending & PRINTK_PENDING_SCHED) {
+		char *buf = __get_cpu_var(printk_sched_buf);
+		printk(KERN_WARNING "[sched_delayed] %s", buf);
 	}
-}
 
-int printk_needs_cpu(int cpu)
-{
-	if (cpu_is_offline(cpu))
-		printk_tick();
-	return __this_cpu_read(printk_pending);
+	if (pending & PRINTK_PENDING_WAKEUP)
+		wake_up_interruptible(&log_wait);
 }
 
+static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) = {
+	.func = wake_up_klogd_work_func,
+	.flags = IRQ_WORK_LAZY,
+};
+
 void wake_up_klogd(void)
 {
-	if (waitqueue_active(&log_wait))
+	preempt_disable();
+	if (waitqueue_active(&log_wait)) {
 		this_cpu_or(printk_pending, PRINTK_PENDING_WAKEUP);
+		irq_work_queue(&__get_cpu_var(wake_up_klogd_work));
+	}
+	preempt_enable();
 }
 
 static void console_cont_flush(char *text, size_t size)
@@ -2458,6 +2461,7 @@ int printk_sched(const char *fmt, ...)
 	va_end(args);
 
 	__this_cpu_or(printk_pending, PRINTK_PENDING_SCHED);
+	irq_work_queue(&__get_cpu_var(wake_up_klogd_work));
 	local_irq_restore(flags);
 
 	return r;
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index f249e8c3e58e..822d7572bf2d 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -289,7 +289,7 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
 		time_delta = timekeeping_max_deferment();
 	} while (read_seqretry(&xtime_lock, seq));
 
-	if (rcu_needs_cpu(cpu, &rcu_delta_jiffies) || printk_needs_cpu(cpu) ||
+	if (rcu_needs_cpu(cpu, &rcu_delta_jiffies) ||
 	    arch_needs_cpu(cpu) || irq_work_needs_cpu()) {
 		next_jiffies = last_jiffies + 1;
 		delta_jiffies = 1;
diff --git a/kernel/timer.c b/kernel/timer.c
index 367d00858482..ff3b5165737b 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1351,7 +1351,6 @@ void update_process_times(int user_tick)
 	account_process_tick(p, user_tick);
 	run_local_timers();
 	rcu_check_callbacks(cpu, user_tick);
-	printk_tick();
 #ifdef CONFIG_IRQ_WORK
 	if (in_irq())
 		irq_work_run();
-- 
cgit v1.2.3


From b0284de05e07d56ff7de154d0c9263788755f5eb Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Fri, 30 Nov 2012 10:09:42 +0000
Subject: ab8500_bm: Rename battery management platform data to something more
 logical

The platform specific battery management configuration data structure
is currently called 'bat' short for 'battery'; however, it contains
information for all components of the battery management group, rather
than information pertaining to the battery itself - there are other
structures for that. So, in keeping with its structure namesake
'abx500_bm_data', we rename it to 'bm' here. Using similar logic,
we're also renaming 'bmdevs_of_probe' to the more device specific
'ab8500_bm_of_probe'.

Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/power/ab8500_bmdata.c   |   6 +--
 drivers/power/ab8500_btemp.c    |  60 ++++++++++-----------
 drivers/power/ab8500_charger.c  |  24 ++++-----
 drivers/power/ab8500_fg.c       |  96 +++++++++++++++++-----------------
 drivers/power/abx500_chargalg.c | 112 ++++++++++++++++++++--------------------
 include/linux/mfd/abx500.h      |   6 +--
 6 files changed, 152 insertions(+), 152 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/power/ab8500_bmdata.c b/drivers/power/ab8500_bmdata.c
index df5a590d760e..c2fb2c554019 100644
--- a/drivers/power/ab8500_bmdata.c
+++ b/drivers/power/ab8500_bmdata.c
@@ -452,9 +452,9 @@ struct abx500_bm_data ab8500_bm_data = {
 	.fg_params              = &fg,
 };
 
-int __devinit bmdevs_of_probe(struct device *dev,
-			      struct device_node *np,
-			      struct abx500_bm_data **battery)
+int __devinit ab8500_bm_of_probe(struct device *dev,
+				 struct device_node *np,
+				 struct abx500_bm_data **battery)
 {
 	struct batres_vs_temp *tmp_batres_tbl;
 	struct device_node *np_bat_supply;
diff --git a/drivers/power/ab8500_btemp.c b/drivers/power/ab8500_btemp.c
index 5d1bf0bd6fa5..33ed0fccbd0e 100644
--- a/drivers/power/ab8500_btemp.c
+++ b/drivers/power/ab8500_btemp.c
@@ -78,7 +78,7 @@ struct ab8500_btemp_ranges {
  * @parent:		Pointer to the struct ab8500
  * @gpadc:		Pointer to the struct gpadc
  * @fg:			Pointer to the struct fg
- * @bat:		Pointer to the abx500_bm platform data
+ * @bm:           	Platform specific battery management information
  * @btemp_psy:		Structure for BTEMP specific battery properties
  * @events:		Structure for information about events triggered
  * @btemp_ranges:	Battery temperature range structure
@@ -95,7 +95,7 @@ struct ab8500_btemp {
 	struct ab8500 *parent;
 	struct ab8500_gpadc *gpadc;
 	struct ab8500_fg *fg;
-	struct abx500_bm_data *bat;
+	struct abx500_bm_data *bm;
 	struct power_supply btemp_psy;
 	struct ab8500_btemp_events events;
 	struct ab8500_btemp_ranges btemp_ranges;
@@ -149,13 +149,13 @@ static int ab8500_btemp_batctrl_volt_to_res(struct ab8500_btemp *di,
 		return (450000 * (v_batctrl)) / (1800 - v_batctrl);
 	}
 
-	if (di->bat->adc_therm == ABx500_ADC_THERM_BATCTRL) {
+	if (di->bm->adc_therm == ABx500_ADC_THERM_BATCTRL) {
 		/*
 		 * If the battery has internal NTC, we use the current
 		 * source to calculate the resistance, 7uA or 20uA
 		 */
 		rbs = (v_batctrl * 1000
-		       - di->bat->gnd_lift_resistance * inst_curr)
+		       - di->bm->gnd_lift_resistance * inst_curr)
 		      / di->curr_source;
 	} else {
 		/*
@@ -211,7 +211,7 @@ static int ab8500_btemp_curr_source_enable(struct ab8500_btemp *di,
 		return 0;
 
 	/* Only do this for batteries with internal NTC */
-	if (di->bat->adc_therm == ABx500_ADC_THERM_BATCTRL && enable) {
+	if (di->bm->adc_therm == ABx500_ADC_THERM_BATCTRL && enable) {
 		if (di->curr_source == BTEMP_BATCTRL_CURR_SRC_7UA)
 			curr = BAT_CTRL_7U_ENA;
 		else
@@ -243,7 +243,7 @@ static int ab8500_btemp_curr_source_enable(struct ab8500_btemp *di,
 				__func__);
 			goto disable_curr_source;
 		}
-	} else if (di->bat->adc_therm == ABx500_ADC_THERM_BATCTRL && !enable) {
+	} else if (di->bm->adc_therm == ABx500_ADC_THERM_BATCTRL && !enable) {
 		dev_dbg(di->dev, "Disable BATCTRL curr source\n");
 
 		/* Write 0 to the curr bits */
@@ -459,9 +459,9 @@ static int ab8500_btemp_measure_temp(struct ab8500_btemp *di)
 	int rbat, rntc, vntc;
 	u8 id;
 
-	id = di->bat->batt_id;
+	id = di->bm->batt_id;
 
-	if (di->bat->adc_therm == ABx500_ADC_THERM_BATCTRL &&
+	if (di->bm->adc_therm == ABx500_ADC_THERM_BATCTRL &&
 			id != BATTERY_UNKNOWN) {
 
 		rbat = ab8500_btemp_get_batctrl_res(di);
@@ -476,8 +476,8 @@ static int ab8500_btemp_measure_temp(struct ab8500_btemp *di)
 		}
 
 		temp = ab8500_btemp_res_to_temp(di,
-			di->bat->bat_type[id].r_to_t_tbl,
-			di->bat->bat_type[id].n_temp_tbl_elements, rbat);
+			di->bm->bat_type[id].r_to_t_tbl,
+			di->bm->bat_type[id].n_temp_tbl_elements, rbat);
 	} else {
 		vntc = ab8500_gpadc_convert(di->gpadc, BTEMP_BALL);
 		if (vntc < 0) {
@@ -493,8 +493,8 @@ static int ab8500_btemp_measure_temp(struct ab8500_btemp *di)
 		rntc = 230000 * vntc / (VTVOUT_V - vntc);
 
 		temp = ab8500_btemp_res_to_temp(di,
-			di->bat->bat_type[id].r_to_t_tbl,
-			di->bat->bat_type[id].n_temp_tbl_elements, rntc);
+			di->bm->bat_type[id].r_to_t_tbl,
+			di->bm->bat_type[id].n_temp_tbl_elements, rntc);
 		prev = temp;
 	}
 	dev_dbg(di->dev, "Battery temperature is %d\n", temp);
@@ -515,7 +515,7 @@ static int ab8500_btemp_id(struct ab8500_btemp *di)
 	u8 i;
 
 	di->curr_source = BTEMP_BATCTRL_CURR_SRC_7UA;
-	di->bat->batt_id = BATTERY_UNKNOWN;
+	di->bm->batt_id = BATTERY_UNKNOWN;
 
 	res =  ab8500_btemp_get_batctrl_res(di);
 	if (res < 0) {
@@ -524,23 +524,23 @@ static int ab8500_btemp_id(struct ab8500_btemp *di)
 	}
 
 	/* BATTERY_UNKNOWN is defined on position 0, skip it! */
-	for (i = BATTERY_UNKNOWN + 1; i < di->bat->n_btypes; i++) {
-		if ((res <= di->bat->bat_type[i].resis_high) &&
-			(res >= di->bat->bat_type[i].resis_low)) {
+	for (i = BATTERY_UNKNOWN + 1; i < di->bm->n_btypes; i++) {
+		if ((res <= di->bm->bat_type[i].resis_high) &&
+			(res >= di->bm->bat_type[i].resis_low)) {
 			dev_dbg(di->dev, "Battery detected on %s"
 				" low %d < res %d < high: %d"
 				" index: %d\n",
-				di->bat->adc_therm == ABx500_ADC_THERM_BATCTRL ?
+				di->bm->adc_therm == ABx500_ADC_THERM_BATCTRL ?
 				"BATCTRL" : "BATTEMP",
-				di->bat->bat_type[i].resis_low, res,
-				di->bat->bat_type[i].resis_high, i);
+				di->bm->bat_type[i].resis_low, res,
+				di->bm->bat_type[i].resis_high, i);
 
-			di->bat->batt_id = i;
+			di->bm->batt_id = i;
 			break;
 		}
 	}
 
-	if (di->bat->batt_id == BATTERY_UNKNOWN) {
+	if (di->bm->batt_id == BATTERY_UNKNOWN) {
 		dev_warn(di->dev, "Battery identified as unknown"
 			", resistance %d Ohm\n", res);
 		return -ENXIO;
@@ -550,13 +550,13 @@ static int ab8500_btemp_id(struct ab8500_btemp *di)
 	 * We only have to change current source if the
 	 * detected type is Type 1, else we use the 7uA source
 	 */
-	if (di->bat->adc_therm == ABx500_ADC_THERM_BATCTRL &&
-			di->bat->batt_id == 1) {
+	if (di->bm->adc_therm == ABx500_ADC_THERM_BATCTRL &&
+			di->bm->batt_id == 1) {
 		dev_dbg(di->dev, "Set BATCTRL current source to 20uA\n");
 		di->curr_source = BTEMP_BATCTRL_CURR_SRC_20UA;
 	}
 
-	return di->bat->batt_id;
+	return di->bm->batt_id;
 }
 
 /**
@@ -586,9 +586,9 @@ static void ab8500_btemp_periodic_work(struct work_struct *work)
 	}
 
 	if (di->events.ac_conn || di->events.usb_conn)
-		interval = di->bat->temp_interval_chg;
+		interval = di->bm->temp_interval_chg;
 	else
-		interval = di->bat->temp_interval_nochg;
+		interval = di->bm->temp_interval_nochg;
 
 	/* Schedule a new measurement */
 	queue_delayed_work(di->btemp_wq,
@@ -815,7 +815,7 @@ static int ab8500_btemp_get_property(struct power_supply *psy,
 			val->intval = 1;
 		break;
 	case POWER_SUPPLY_PROP_TECHNOLOGY:
-		val->intval = di->bat->bat_type[di->bat->batt_id].name;
+		val->intval = di->bm->bat_type[di->bm->batt_id].name;
 		break;
 	case POWER_SUPPLY_PROP_TEMP:
 		val->intval = ab8500_btemp_get_temp(di);
@@ -985,10 +985,10 @@ static int __devinit ab8500_btemp_probe(struct platform_device *pdev)
 		dev_err(&pdev->dev, "%s no mem for ab8500_btemp\n", __func__);
 		return -ENOMEM;
 	}
-	di->bat = pdev->mfd_cell->platform_data;
-	if (!di->bat) {
+	di->bm = pdev->mfd_cell->platform_data;
+	if (!di->bm) {
 		if (np) {
-			ret = bmdevs_of_probe(&pdev->dev, np, &di->bat);
+			ret = ab8500_bm_of_probe(&pdev->dev, np, &di->bm);
 			if (ret) {
 				dev_err(&pdev->dev,
 					"failed to get battery information\n");
diff --git a/drivers/power/ab8500_charger.c b/drivers/power/ab8500_charger.c
index d27dd7fec163..21dc8422778d 100644
--- a/drivers/power/ab8500_charger.c
+++ b/drivers/power/ab8500_charger.c
@@ -189,7 +189,7 @@ struct ab8500_charger_usb_state {
  * @autopower_cfg	platform specific power config support for "pwron after pwrloss"
  * @parent:		Pointer to the struct ab8500
  * @gpadc:		Pointer to the struct gpadc
- * @bat:		Pointer to the abx500_bm platform data
+ * @bm:           	Platform specific battery management information
  * @flags:		Structure for information about events triggered
  * @usb_state:		Structure for usb stack information
  * @ac_chg:		AC charger power supply
@@ -226,7 +226,7 @@ struct ab8500_charger {
 	bool autopower_cfg;
 	struct ab8500 *parent;
 	struct ab8500_gpadc *gpadc;
-	struct abx500_bm_data *bat;
+	struct abx500_bm_data *bm;
 	struct ab8500_charger_event_flags flags;
 	struct ab8500_charger_usb_state usb_state;
 	struct ux500_charger ac_chg;
@@ -1034,7 +1034,7 @@ static int ab8500_charger_set_vbus_in_curr(struct ab8500_charger *di,
 	int min_value;
 
 	/* We should always use to lowest current limit */
-	min_value = min(di->bat->chg_params->usb_curr_max, ich_in);
+	min_value = min(di->bm->chg_params->usb_curr_max, ich_in);
 
 	switch (min_value) {
 	case 100:
@@ -1176,7 +1176,7 @@ static int ab8500_charger_ac_en(struct ux500_charger *charger,
 		volt_index = ab8500_voltage_to_regval(vset);
 		curr_index = ab8500_current_to_regval(iset);
 		input_curr_index = ab8500_current_to_regval(
-			di->bat->chg_params->ac_curr_max);
+			di->bm->chg_params->ac_curr_max);
 		if (volt_index < 0 || curr_index < 0 || input_curr_index < 0) {
 			dev_err(di->dev,
 				"Charger voltage or current too high, "
@@ -1193,7 +1193,7 @@ static int ab8500_charger_ac_en(struct ux500_charger *charger,
 		}
 		/* MainChInputCurr: current that can be drawn from the charger*/
 		ret = ab8500_charger_set_main_in_curr(di,
-			di->bat->chg_params->ac_curr_max);
+			di->bm->chg_params->ac_curr_max);
 		if (ret) {
 			dev_err(di->dev, "%s Failed to set MainChInputCurr\n",
 				__func__);
@@ -1209,7 +1209,7 @@ static int ab8500_charger_ac_en(struct ux500_charger *charger,
 		}
 
 		/* Check if VBAT overshoot control should be enabled */
-		if (!di->bat->enable_overshoot)
+		if (!di->bm->enable_overshoot)
 			overshoot = MAIN_CH_NO_OVERSHOOT_ENA_N;
 
 		/* Enable Main Charger */
@@ -1376,7 +1376,7 @@ static int ab8500_charger_usb_en(struct ux500_charger *charger,
 			return ret;
 		}
 		/* Check if VBAT overshoot control should be enabled */
-		if (!di->bat->enable_overshoot)
+		if (!di->bm->enable_overshoot)
 			overshoot = USB_CHG_NO_OVERSHOOT_ENA_N;
 
 		/* Enable USB Charger */
@@ -2454,8 +2454,8 @@ static int ab8500_charger_init_hw_registers(struct ab8500_charger *di)
 	ret = abx500_set_register_interruptible(di->dev,
 		AB8500_RTC,
 		AB8500_RTC_BACKUP_CHG_REG,
-		di->bat->bkup_bat_v |
-		di->bat->bkup_bat_i);
+		di->bm->bkup_bat_v |
+		di->bm->bkup_bat_i);
 	if (ret) {
 		dev_err(di->dev, "failed to setup backup battery charging\n");
 		goto out;
@@ -2644,10 +2644,10 @@ static int __devinit ab8500_charger_probe(struct platform_device *pdev)
 		dev_err(&pdev->dev, "%s no mem for ab8500_charger\n", __func__);
 		return -ENOMEM;
 	}
-	di->bat = pdev->mfd_cell->platform_data;
-	if (!di->bat) {
+	di->bm = pdev->mfd_cell->platform_data;
+	if (!di->bm) {
 		if (np) {
-			ret = bmdevs_of_probe(&pdev->dev, np, &di->bat);
+			ret = ab8500_bm_of_probe(&pdev->dev, np, &di->bm);
 			if (ret) {
 				dev_err(&pdev->dev,
 					"failed to get battery information\n");
diff --git a/drivers/power/ab8500_fg.c b/drivers/power/ab8500_fg.c
index 5a9f58d4c0fb..4cf231375de3 100644
--- a/drivers/power/ab8500_fg.c
+++ b/drivers/power/ab8500_fg.c
@@ -173,7 +173,7 @@ struct inst_curr_result_list {
  * @avg_cap:		Average capacity filter
  * @parent:		Pointer to the struct ab8500
  * @gpadc:		Pointer to the struct gpadc
- * @bat:		Pointer to the abx500_bm platform data
+ * @bm:           	Platform specific battery management information
  * @fg_psy:		Structure that holds the FG specific battery properties
  * @fg_wq:		Work queue for running the FG algorithm
  * @fg_periodic_work:	Work to run the FG algorithm periodically
@@ -212,7 +212,7 @@ struct ab8500_fg {
 	struct ab8500_fg_avg_cap avg_cap;
 	struct ab8500 *parent;
 	struct ab8500_gpadc *gpadc;
-	struct abx500_bm_data *bat;
+	struct abx500_bm_data *bm;
 	struct power_supply fg_psy;
 	struct workqueue_struct *fg_wq;
 	struct delayed_work fg_periodic_work;
@@ -355,7 +355,7 @@ static int ab8500_fg_is_low_curr(struct ab8500_fg *di, int curr)
 	/*
 	 * We want to know if we're in low current mode
 	 */
-	if (curr > -di->bat->fg_params->high_curr_threshold)
+	if (curr > -di->bm->fg_params->high_curr_threshold)
 		return true;
 	else
 		return false;
@@ -648,7 +648,7 @@ int ab8500_fg_inst_curr_finalize(struct ab8500_fg *di, int *res)
 	 * 112.9nAh assumes 10mOhm, but fg_res is in 0.1mOhm
 	 */
 	val = (val * QLSB_NANO_AMP_HOURS_X10 * 36 * 4) /
-		(1000 * di->bat->fg_res);
+		(1000 * di->bm->fg_res);
 
 	if (di->turn_off_fg) {
 		dev_dbg(di->dev, "%s Disable FG\n", __func__);
@@ -751,7 +751,7 @@ static void ab8500_fg_acc_cur_work(struct work_struct *work)
 	 * 112.9nAh assumes 10mOhm, but fg_res is in 0.1mOhm
 	 */
 	di->accu_charge = (val * QLSB_NANO_AMP_HOURS_X10) /
-		(100 * di->bat->fg_res);
+		(100 * di->bm->fg_res);
 
 	/*
 	 * Convert to unit value in mA
@@ -763,7 +763,7 @@ static void ab8500_fg_acc_cur_work(struct work_struct *work)
 	 * 112.9nAh assumes 10mOhm, but fg_res is in 0.1mOhm
 	 */
 	di->avg_curr = (val * QLSB_NANO_AMP_HOURS_X10 * 36) /
-		(1000 * di->bat->fg_res * (di->fg_samples / 4));
+		(1000 * di->bm->fg_res * (di->fg_samples / 4));
 
 	di->flags.conv_done = true;
 
@@ -815,8 +815,8 @@ static int ab8500_fg_volt_to_capacity(struct ab8500_fg *di, int voltage)
 	struct abx500_v_to_cap *tbl;
 	int cap = 0;
 
-	tbl = di->bat->bat_type[di->bat->batt_id].v_to_cap_tbl,
-	tbl_size = di->bat->bat_type[di->bat->batt_id].n_v_cap_tbl_elements;
+	tbl = di->bm->bat_type[di->bm->batt_id].v_to_cap_tbl,
+	tbl_size = di->bm->bat_type[di->bm->batt_id].n_v_cap_tbl_elements;
 
 	for (i = 0; i < tbl_size; ++i) {
 		if (voltage > tbl[i].voltage)
@@ -867,8 +867,8 @@ static int ab8500_fg_battery_resistance(struct ab8500_fg *di)
 	struct batres_vs_temp *tbl;
 	int resist = 0;
 
-	tbl = di->bat->bat_type[di->bat->batt_id].batres_tbl;
-	tbl_size = di->bat->bat_type[di->bat->batt_id].n_batres_tbl_elements;
+	tbl = di->bm->bat_type[di->bm->batt_id].batres_tbl;
+	tbl_size = di->bm->bat_type[di->bm->batt_id].n_batres_tbl_elements;
 
 	for (i = 0; i < tbl_size; ++i) {
 		if (di->bat_temp / 10 > tbl[i].temp)
@@ -889,11 +889,11 @@ static int ab8500_fg_battery_resistance(struct ab8500_fg *di)
 
 	dev_dbg(di->dev, "%s Temp: %d battery internal resistance: %d"
 	    " fg resistance %d, total: %d (mOhm)\n",
-		__func__, di->bat_temp, resist, di->bat->fg_res / 10,
-		(di->bat->fg_res / 10) + resist);
+		__func__, di->bat_temp, resist, di->bm->fg_res / 10,
+		(di->bm->fg_res / 10) + resist);
 
 	/* fg_res variable is in 0.1mOhm */
-	resist += di->bat->fg_res / 10;
+	resist += di->bm->fg_res / 10;
 
 	return resist;
 }
@@ -1111,14 +1111,14 @@ static int ab8500_fg_capacity_level(struct ab8500_fg *di)
 
 	percent = di->bat_cap.permille / 10;
 
-	if (percent <= di->bat->cap_levels->critical ||
+	if (percent <= di->bm->cap_levels->critical ||
 		di->flags.low_bat)
 		ret = POWER_SUPPLY_CAPACITY_LEVEL_CRITICAL;
-	else if (percent <= di->bat->cap_levels->low)
+	else if (percent <= di->bm->cap_levels->low)
 		ret = POWER_SUPPLY_CAPACITY_LEVEL_LOW;
-	else if (percent <= di->bat->cap_levels->normal)
+	else if (percent <= di->bm->cap_levels->normal)
 		ret = POWER_SUPPLY_CAPACITY_LEVEL_NORMAL;
-	else if (percent <= di->bat->cap_levels->high)
+	else if (percent <= di->bm->cap_levels->high)
 		ret = POWER_SUPPLY_CAPACITY_LEVEL_HIGH;
 	else
 		ret = POWER_SUPPLY_CAPACITY_LEVEL_FULL;
@@ -1183,7 +1183,7 @@ static void ab8500_fg_check_capacity_limits(struct ab8500_fg *di, bool init)
 			di->bat_cap.prev_percent !=
 			(di->bat_cap.permille) / 10 &&
 			(di->bat_cap.permille / 10) <
-			di->bat->fg_params->maint_thres) {
+			di->bm->fg_params->maint_thres) {
 			dev_dbg(di->dev,
 				"battery reported full "
 				"but capacity dropping: %d\n",
@@ -1285,7 +1285,7 @@ static void ab8500_fg_algorithm_charging(struct ab8500_fg *di)
 	switch (di->charge_state) {
 	case AB8500_FG_CHARGE_INIT:
 		di->fg_samples = SEC_TO_SAMPLE(
-			di->bat->fg_params->accu_charging);
+			di->bm->fg_params->accu_charging);
 
 		ab8500_fg_coulomb_counter(di, true);
 		ab8500_fg_charge_state_to(di, AB8500_FG_CHARGE_READOUT);
@@ -1347,8 +1347,8 @@ static bool check_sysfs_capacity(struct ab8500_fg *di)
 	cap_permille = ab8500_fg_convert_mah_to_permille(di,
 		di->bat_cap.user_mah);
 
-	lower = di->bat_cap.permille - di->bat->fg_params->user_cap_limit * 10;
-	upper = di->bat_cap.permille + di->bat->fg_params->user_cap_limit * 10;
+	lower = di->bat_cap.permille - di->bm->fg_params->user_cap_limit * 10;
+	upper = di->bat_cap.permille + di->bm->fg_params->user_cap_limit * 10;
 
 	if (lower < 0)
 		lower = 0;
@@ -1388,7 +1388,7 @@ static void ab8500_fg_algorithm_discharging(struct ab8500_fg *di)
 	case AB8500_FG_DISCHARGE_INIT:
 		/* We use the FG IRQ to work on */
 		di->init_cnt = 0;
-		di->fg_samples = SEC_TO_SAMPLE(di->bat->fg_params->init_timer);
+		di->fg_samples = SEC_TO_SAMPLE(di->bm->fg_params->init_timer);
 		ab8500_fg_coulomb_counter(di, true);
 		ab8500_fg_discharge_state_to(di,
 			AB8500_FG_DISCHARGE_INITMEASURING);
@@ -1401,17 +1401,17 @@ static void ab8500_fg_algorithm_discharging(struct ab8500_fg *di)
 		 * samples to get an initial capacity.
 		 * Then go to READOUT
 		 */
-		sleep_time = di->bat->fg_params->init_timer;
+		sleep_time = di->bm->fg_params->init_timer;
 
 		/* Discard the first [x] seconds */
-		if (di->init_cnt > di->bat->fg_params->init_discard_time) {
+		if (di->init_cnt > di->bm->fg_params->init_discard_time) {
 			ab8500_fg_calc_cap_discharge_voltage(di, true);
 
 			ab8500_fg_check_capacity_limits(di, true);
 		}
 
 		di->init_cnt += sleep_time;
-		if (di->init_cnt > di->bat->fg_params->init_total_time)
+		if (di->init_cnt > di->bm->fg_params->init_total_time)
 			ab8500_fg_discharge_state_to(di,
 				AB8500_FG_DISCHARGE_READOUT_INIT);
 
@@ -1426,7 +1426,7 @@ static void ab8500_fg_algorithm_discharging(struct ab8500_fg *di)
 		/* Intentional fallthrough */
 
 	case AB8500_FG_DISCHARGE_RECOVERY:
-		sleep_time = di->bat->fg_params->recovery_sleep_timer;
+		sleep_time = di->bm->fg_params->recovery_sleep_timer;
 
 		/*
 		 * We should check the power consumption
@@ -1438,9 +1438,9 @@ static void ab8500_fg_algorithm_discharging(struct ab8500_fg *di)
 
 		if (ab8500_fg_is_low_curr(di, di->inst_curr)) {
 			if (di->recovery_cnt >
-				di->bat->fg_params->recovery_total_time) {
+				di->bm->fg_params->recovery_total_time) {
 				di->fg_samples = SEC_TO_SAMPLE(
-					di->bat->fg_params->accu_high_curr);
+					di->bm->fg_params->accu_high_curr);
 				ab8500_fg_coulomb_counter(di, true);
 				ab8500_fg_discharge_state_to(di,
 					AB8500_FG_DISCHARGE_READOUT);
@@ -1453,7 +1453,7 @@ static void ab8500_fg_algorithm_discharging(struct ab8500_fg *di)
 			di->recovery_cnt += sleep_time;
 		} else {
 			di->fg_samples = SEC_TO_SAMPLE(
-				di->bat->fg_params->accu_high_curr);
+				di->bm->fg_params->accu_high_curr);
 			ab8500_fg_coulomb_counter(di, true);
 			ab8500_fg_discharge_state_to(di,
 				AB8500_FG_DISCHARGE_READOUT);
@@ -1462,7 +1462,7 @@ static void ab8500_fg_algorithm_discharging(struct ab8500_fg *di)
 
 	case AB8500_FG_DISCHARGE_READOUT_INIT:
 		di->fg_samples = SEC_TO_SAMPLE(
-			di->bat->fg_params->accu_high_curr);
+			di->bm->fg_params->accu_high_curr);
 		ab8500_fg_coulomb_counter(di, true);
 		ab8500_fg_discharge_state_to(di,
 				AB8500_FG_DISCHARGE_READOUT);
@@ -1509,9 +1509,9 @@ static void ab8500_fg_algorithm_discharging(struct ab8500_fg *di)
 			}
 
 			di->high_curr_cnt +=
-				di->bat->fg_params->accu_high_curr;
+				di->bm->fg_params->accu_high_curr;
 			if (di->high_curr_cnt >
-				di->bat->fg_params->high_curr_time)
+				di->bm->fg_params->high_curr_time)
 				di->recovery_needed = true;
 
 			ab8500_fg_calc_cap_discharge_fg(di);
@@ -1528,7 +1528,7 @@ static void ab8500_fg_algorithm_discharging(struct ab8500_fg *di)
 		ab8500_fg_calc_cap_discharge_voltage(di, true);
 
 		di->fg_samples = SEC_TO_SAMPLE(
-			di->bat->fg_params->accu_high_curr);
+			di->bm->fg_params->accu_high_curr);
 		ab8500_fg_coulomb_counter(di, true);
 		ab8500_fg_discharge_state_to(di,
 				AB8500_FG_DISCHARGE_READOUT);
@@ -1721,7 +1721,7 @@ static void ab8500_fg_low_bat_work(struct work_struct *work)
 	vbat = ab8500_fg_bat_voltage(di);
 
 	/* Check if LOW_BAT still fulfilled */
-	if (vbat < di->bat->fg_params->lowbat_threshold) {
+	if (vbat < di->bm->fg_params->lowbat_threshold) {
 		di->flags.low_bat = true;
 		dev_warn(di->dev, "Battery voltage still LOW\n");
 
@@ -1779,8 +1779,8 @@ static int ab8500_fg_battok_init_hw_register(struct ab8500_fg *di)
 	int ret;
 	int new_val;
 
-	sel0 = di->bat->fg_params->battok_falling_th_sel0;
-	sel1 = di->bat->fg_params->battok_raising_th_sel1;
+	sel0 = di->bm->fg_params->battok_falling_th_sel0;
+	sel1 = di->bm->fg_params->battok_raising_th_sel1;
 
 	cbp_sel0 = ab8500_fg_battok_calc(di, sel0);
 	cbp_sel1 = ab8500_fg_battok_calc(di, sel1);
@@ -1963,7 +1963,7 @@ static int ab8500_fg_get_property(struct power_supply *psy,
 				di->bat_cap.max_mah);
 		break;
 	case POWER_SUPPLY_PROP_ENERGY_NOW:
-		if (di->flags.batt_unknown && !di->bat->chg_unknown_bat &&
+		if (di->flags.batt_unknown && !di->bm->chg_unknown_bat &&
 				di->flags.batt_id_received)
 			val->intval = ab8500_fg_convert_mah_to_uwh(di,
 					di->bat_cap.max_mah);
@@ -1978,21 +1978,21 @@ static int ab8500_fg_get_property(struct power_supply *psy,
 		val->intval = di->bat_cap.max_mah;
 		break;
 	case POWER_SUPPLY_PROP_CHARGE_NOW:
-		if (di->flags.batt_unknown && !di->bat->chg_unknown_bat &&
+		if (di->flags.batt_unknown && !di->bm->chg_unknown_bat &&
 				di->flags.batt_id_received)
 			val->intval = di->bat_cap.max_mah;
 		else
 			val->intval = di->bat_cap.prev_mah;
 		break;
 	case POWER_SUPPLY_PROP_CAPACITY:
-		if (di->flags.batt_unknown && !di->bat->chg_unknown_bat &&
+		if (di->flags.batt_unknown && !di->bm->chg_unknown_bat &&
 				di->flags.batt_id_received)
 			val->intval = 100;
 		else
 			val->intval = di->bat_cap.prev_percent;
 		break;
 	case POWER_SUPPLY_PROP_CAPACITY_LEVEL:
-		if (di->flags.batt_unknown && !di->bat->chg_unknown_bat &&
+		if (di->flags.batt_unknown && !di->bm->chg_unknown_bat &&
 				di->flags.batt_id_received)
 			val->intval = POWER_SUPPLY_CAPACITY_LEVEL_UNKNOWN;
 		else
@@ -2078,7 +2078,7 @@ static int ab8500_fg_get_ext_psy_data(struct device *dev, void *data)
 				if (!di->flags.batt_id_received) {
 					const struct abx500_battery_type *b;
 
-					b = &(di->bat->bat_type[di->bat->batt_id]);
+					b = &(di->bm->bat_type[di->bm->batt_id]);
 
 					di->flags.batt_id_received = true;
 
@@ -2155,7 +2155,7 @@ static int ab8500_fg_init_hw_registers(struct ab8500_fg *di)
 		AB8500_SYS_CTRL2_BLOCK,
 		AB8500_LOW_BAT_REG,
 		ab8500_volt_to_regval(
-			di->bat->fg_params->lowbat_threshold) << 1 |
+			di->bm->fg_params->lowbat_threshold) << 1 |
 		LOW_BAT_ENABLE);
 	if (ret) {
 		dev_err(di->dev, "%s write failed\n", __func__);
@@ -2457,10 +2457,10 @@ static int __devinit ab8500_fg_probe(struct platform_device *pdev)
 		dev_err(&pdev->dev, "%s no mem for ab8500_fg\n", __func__);
 		return -ENOMEM;
 	}
-	di->bat = pdev->mfd_cell->platform_data;
-	if (!di->bat) {
+	di->bm = pdev->mfd_cell->platform_data;
+	if (!di->bm) {
 		if (np) {
-			ret = bmdevs_of_probe(&pdev->dev, np, &di->bat);
+			ret = ab8500_bm_of_probe(&pdev->dev, np, &di->bm);
 			if (ret) {
 				dev_err(&pdev->dev,
 					"failed to get battery information\n");
@@ -2491,11 +2491,11 @@ static int __devinit ab8500_fg_probe(struct platform_device *pdev)
 	di->fg_psy.external_power_changed = ab8500_fg_external_power_changed;
 
 	di->bat_cap.max_mah_design = MILLI_TO_MICRO *
-		di->bat->bat_type[di->bat->batt_id].charge_full_design;
+		di->bm->bat_type[di->bm->batt_id].charge_full_design;
 
 	di->bat_cap.max_mah = di->bat_cap.max_mah_design;
 
-	di->vbat_nom = di->bat->bat_type[di->bat->batt_id].nominal_voltage;
+	di->vbat_nom = di->bm->bat_type[di->bm->batt_id].nominal_voltage;
 
 	di->init_capacity = true;
 
@@ -2549,7 +2549,7 @@ static int __devinit ab8500_fg_probe(struct platform_device *pdev)
 		goto free_inst_curr_wq;
 	}
 
-	di->fg_samples = SEC_TO_SAMPLE(di->bat->fg_params->init_timer);
+	di->fg_samples = SEC_TO_SAMPLE(di->bm->fg_params->init_timer);
 	ab8500_fg_coulomb_counter(di, true);
 
 	/* Initialize completion used to notify completion of inst current */
diff --git a/drivers/power/abx500_chargalg.c b/drivers/power/abx500_chargalg.c
index dcdc4393b9e7..ea2e2eb652ef 100644
--- a/drivers/power/abx500_chargalg.c
+++ b/drivers/power/abx500_chargalg.c
@@ -207,7 +207,7 @@ enum maxim_ret {
  * @chg_info:		information about connected charger types
  * @batt_data:		data of the battery
  * @susp_status:	current charger suspension status
- * @bat:		pointer to the abx500_bm platform data
+ * @bm:           	Platform specific battery management information
  * @chargalg_psy:	structure that holds the battery properties exposed by
  *			the charging algorithm
  * @events:		structure for information about events triggered
@@ -232,7 +232,7 @@ struct abx500_chargalg {
 	struct abx500_chargalg_charger_info chg_info;
 	struct abx500_chargalg_battery_data batt_data;
 	struct abx500_chargalg_suspension_status susp_status;
-	struct abx500_bm_data *bat;
+	struct abx500_bm_data *bm;
 	struct power_supply chargalg_psy;
 	struct ux500_charger *ac_chg;
 	struct ux500_charger *usb_chg;
@@ -367,13 +367,13 @@ static void abx500_chargalg_start_safety_timer(struct abx500_chargalg *di)
 	case AC_CHG:
 		timer_expiration =
 		round_jiffies(jiffies +
-			(di->bat->main_safety_tmr_h * 3600 * HZ));
+			(di->bm->main_safety_tmr_h * 3600 * HZ));
 		break;
 
 	case USB_CHG:
 		timer_expiration =
 		round_jiffies(jiffies +
-			(di->bat->usb_safety_tmr_h * 3600 * HZ));
+			(di->bm->usb_safety_tmr_h * 3600 * HZ));
 		break;
 
 	default:
@@ -638,32 +638,32 @@ static void abx500_chargalg_start_charging(struct abx500_chargalg *di,
  */
 static void abx500_chargalg_check_temp(struct abx500_chargalg *di)
 {
-	if (di->batt_data.temp > (di->bat->temp_low + di->t_hyst_norm) &&
-		di->batt_data.temp < (di->bat->temp_high - di->t_hyst_norm)) {
+	if (di->batt_data.temp > (di->bm->temp_low + di->t_hyst_norm) &&
+		di->batt_data.temp < (di->bm->temp_high - di->t_hyst_norm)) {
 		/* Temp OK! */
 		di->events.btemp_underover = false;
 		di->events.btemp_lowhigh = false;
 		di->t_hyst_norm = 0;
 		di->t_hyst_lowhigh = 0;
 	} else {
-		if (((di->batt_data.temp >= di->bat->temp_high) &&
+		if (((di->batt_data.temp >= di->bm->temp_high) &&
 			(di->batt_data.temp <
-				(di->bat->temp_over - di->t_hyst_lowhigh))) ||
+				(di->bm->temp_over - di->t_hyst_lowhigh))) ||
 			((di->batt_data.temp >
-				(di->bat->temp_under + di->t_hyst_lowhigh)) &&
-			(di->batt_data.temp <= di->bat->temp_low))) {
+				(di->bm->temp_under + di->t_hyst_lowhigh)) &&
+			(di->batt_data.temp <= di->bm->temp_low))) {
 			/* TEMP minor!!!!! */
 			di->events.btemp_underover = false;
 			di->events.btemp_lowhigh = true;
-			di->t_hyst_norm = di->bat->temp_hysteresis;
+			di->t_hyst_norm = di->bm->temp_hysteresis;
 			di->t_hyst_lowhigh = 0;
-		} else if (di->batt_data.temp <= di->bat->temp_under ||
-			di->batt_data.temp >= di->bat->temp_over) {
+		} else if (di->batt_data.temp <= di->bm->temp_under ||
+			di->batt_data.temp >= di->bm->temp_over) {
 			/* TEMP major!!!!! */
 			di->events.btemp_underover = true;
 			di->events.btemp_lowhigh = false;
 			di->t_hyst_norm = 0;
-			di->t_hyst_lowhigh = di->bat->temp_hysteresis;
+			di->t_hyst_lowhigh = di->bm->temp_hysteresis;
 		} else {
 		/* Within hysteresis */
 		dev_dbg(di->dev, "Within hysteresis limit temp: %d "
@@ -682,12 +682,12 @@ static void abx500_chargalg_check_temp(struct abx500_chargalg *di)
  */
 static void abx500_chargalg_check_charger_voltage(struct abx500_chargalg *di)
 {
-	if (di->chg_info.usb_volt > di->bat->chg_params->usb_volt_max)
+	if (di->chg_info.usb_volt > di->bm->chg_params->usb_volt_max)
 		di->chg_info.usb_chg_ok = false;
 	else
 		di->chg_info.usb_chg_ok = true;
 
-	if (di->chg_info.ac_volt > di->bat->chg_params->ac_volt_max)
+	if (di->chg_info.ac_volt > di->bm->chg_params->ac_volt_max)
 		di->chg_info.ac_chg_ok = false;
 	else
 		di->chg_info.ac_chg_ok = true;
@@ -707,10 +707,10 @@ static void abx500_chargalg_end_of_charge(struct abx500_chargalg *di)
 	if (di->charge_status == POWER_SUPPLY_STATUS_CHARGING &&
 		di->charge_state == STATE_NORMAL &&
 		!di->maintenance_chg && (di->batt_data.volt >=
-		di->bat->bat_type[di->bat->batt_id].termination_vol ||
+		di->bm->bat_type[di->bm->batt_id].termination_vol ||
 		di->events.usb_cv_active || di->events.ac_cv_active) &&
 		di->batt_data.avg_curr <
-		di->bat->bat_type[di->bat->batt_id].termination_curr &&
+		di->bm->bat_type[di->bm->batt_id].termination_curr &&
 		di->batt_data.avg_curr > 0) {
 		if (++di->eoc_cnt >= EOC_COND_CNT) {
 			di->eoc_cnt = 0;
@@ -733,12 +733,12 @@ static void abx500_chargalg_end_of_charge(struct abx500_chargalg *di)
 static void init_maxim_chg_curr(struct abx500_chargalg *di)
 {
 	di->ccm.original_iset =
-		di->bat->bat_type[di->bat->batt_id].normal_cur_lvl;
+		di->bm->bat_type[di->bm->batt_id].normal_cur_lvl;
 	di->ccm.current_iset =
-		di->bat->bat_type[di->bat->batt_id].normal_cur_lvl;
-	di->ccm.test_delta_i = di->bat->maxi->charger_curr_step;
-	di->ccm.max_current = di->bat->maxi->chg_curr;
-	di->ccm.condition_cnt = di->bat->maxi->wait_cycles;
+		di->bm->bat_type[di->bm->batt_id].normal_cur_lvl;
+	di->ccm.test_delta_i = di->bm->maxi->charger_curr_step;
+	di->ccm.max_current = di->bm->maxi->chg_curr;
+	di->ccm.condition_cnt = di->bm->maxi->wait_cycles;
 	di->ccm.level = 0;
 }
 
@@ -755,7 +755,7 @@ static enum maxim_ret abx500_chargalg_chg_curr_maxim(struct abx500_chargalg *di)
 {
 	int delta_i;
 
-	if (!di->bat->maxi->ena_maxi)
+	if (!di->bm->maxi->ena_maxi)
 		return MAXIM_RET_NOACTION;
 
 	delta_i = di->ccm.original_iset - di->batt_data.inst_curr;
@@ -766,7 +766,7 @@ static enum maxim_ret abx500_chargalg_chg_curr_maxim(struct abx500_chargalg *di)
 		if (di->ccm.wait_cnt == 0) {
 			dev_dbg(di->dev, "lowering current\n");
 			di->ccm.wait_cnt++;
-			di->ccm.condition_cnt = di->bat->maxi->wait_cycles;
+			di->ccm.condition_cnt = di->bm->maxi->wait_cycles;
 			di->ccm.max_current =
 				di->ccm.current_iset - di->ccm.test_delta_i;
 			di->ccm.current_iset = di->ccm.max_current;
@@ -791,7 +791,7 @@ static enum maxim_ret abx500_chargalg_chg_curr_maxim(struct abx500_chargalg *di)
 		if (di->ccm.current_iset == di->ccm.original_iset)
 			return MAXIM_RET_NOACTION;
 
-		di->ccm.condition_cnt = di->bat->maxi->wait_cycles;
+		di->ccm.condition_cnt = di->bm->maxi->wait_cycles;
 		di->ccm.current_iset = di->ccm.original_iset;
 		di->ccm.level = 0;
 
@@ -803,7 +803,7 @@ static enum maxim_ret abx500_chargalg_chg_curr_maxim(struct abx500_chargalg *di)
 		di->ccm.max_current) {
 		if (di->ccm.condition_cnt-- == 0) {
 			/* Increse the iset with cco.test_delta_i */
-			di->ccm.condition_cnt = di->bat->maxi->wait_cycles;
+			di->ccm.condition_cnt = di->bm->maxi->wait_cycles;
 			di->ccm.current_iset += di->ccm.test_delta_i;
 			di->ccm.level++;
 			dev_dbg(di->dev, " Maximization needed, increase"
@@ -818,7 +818,7 @@ static enum maxim_ret abx500_chargalg_chg_curr_maxim(struct abx500_chargalg *di)
 			return MAXIM_RET_NOACTION;
 		}
 	}  else {
-		di->ccm.condition_cnt = di->bat->maxi->wait_cycles;
+		di->ccm.condition_cnt = di->bm->maxi->wait_cycles;
 		return MAXIM_RET_NOACTION;
 	}
 }
@@ -838,7 +838,7 @@ static void handle_maxim_chg_curr(struct abx500_chargalg *di)
 		break;
 	case MAXIM_RET_IBAT_TOO_HIGH:
 		result = abx500_chargalg_update_chg_curr(di,
-			di->bat->bat_type[di->bat->batt_id].normal_cur_lvl);
+			di->bm->bat_type[di->bm->batt_id].normal_cur_lvl);
 		if (result)
 			dev_err(di->dev, "failed to set chg curr\n");
 		break;
@@ -1210,7 +1210,7 @@ static void abx500_chargalg_algorithm(struct abx500_chargalg *di)
 	 * this way
 	 */
 	if (!charger_status ||
-		(di->events.batt_unknown && !di->bat->chg_unknown_bat)) {
+		(di->events.batt_unknown && !di->bm->chg_unknown_bat)) {
 		if (di->charge_state != STATE_HANDHELD) {
 			di->events.safety_timer_expired = false;
 			abx500_chargalg_state_to(di, STATE_HANDHELD_INIT);
@@ -1394,8 +1394,8 @@ static void abx500_chargalg_algorithm(struct abx500_chargalg *di)
 
 	case STATE_NORMAL_INIT:
 		abx500_chargalg_start_charging(di,
-			di->bat->bat_type[di->bat->batt_id].normal_vol_lvl,
-			di->bat->bat_type[di->bat->batt_id].normal_cur_lvl);
+			di->bm->bat_type[di->bm->batt_id].normal_vol_lvl,
+			di->bm->bat_type[di->bm->batt_id].normal_cur_lvl);
 		abx500_chargalg_state_to(di, STATE_NORMAL);
 		abx500_chargalg_start_safety_timer(di);
 		abx500_chargalg_stop_maintenance_timer(di);
@@ -1411,7 +1411,7 @@ static void abx500_chargalg_algorithm(struct abx500_chargalg *di)
 		handle_maxim_chg_curr(di);
 		if (di->charge_status == POWER_SUPPLY_STATUS_FULL &&
 			di->maintenance_chg) {
-			if (di->bat->no_maintenance)
+			if (di->bm->no_maintenance)
 				abx500_chargalg_state_to(di,
 					STATE_WAIT_FOR_RECHARGE_INIT);
 			else
@@ -1429,7 +1429,7 @@ static void abx500_chargalg_algorithm(struct abx500_chargalg *di)
 
 	case STATE_WAIT_FOR_RECHARGE:
 		if (di->batt_data.volt <=
-			di->bat->bat_type[di->bat->batt_id].recharge_vol) {
+			di->bm->bat_type[di->bm->batt_id].recharge_vol) {
 			if (di->rch_cnt-- == 0)
 				abx500_chargalg_state_to(di, STATE_NORMAL_INIT);
 		} else
@@ -1439,13 +1439,13 @@ static void abx500_chargalg_algorithm(struct abx500_chargalg *di)
 	case STATE_MAINTENANCE_A_INIT:
 		abx500_chargalg_stop_safety_timer(di);
 		abx500_chargalg_start_maintenance_timer(di,
-			di->bat->bat_type[
-				di->bat->batt_id].maint_a_chg_timer_h);
+			di->bm->bat_type[
+				di->bm->batt_id].maint_a_chg_timer_h);
 		abx500_chargalg_start_charging(di,
-			di->bat->bat_type[
-				di->bat->batt_id].maint_a_vol_lvl,
-			di->bat->bat_type[
-				di->bat->batt_id].maint_a_cur_lvl);
+			di->bm->bat_type[
+				di->bm->batt_id].maint_a_vol_lvl,
+			di->bm->bat_type[
+				di->bm->batt_id].maint_a_cur_lvl);
 		abx500_chargalg_state_to(di, STATE_MAINTENANCE_A);
 		power_supply_changed(&di->chargalg_psy);
 		/* Intentional fallthrough*/
@@ -1459,13 +1459,13 @@ static void abx500_chargalg_algorithm(struct abx500_chargalg *di)
 
 	case STATE_MAINTENANCE_B_INIT:
 		abx500_chargalg_start_maintenance_timer(di,
-			di->bat->bat_type[
-				di->bat->batt_id].maint_b_chg_timer_h);
+			di->bm->bat_type[
+				di->bm->batt_id].maint_b_chg_timer_h);
 		abx500_chargalg_start_charging(di,
-			di->bat->bat_type[
-				di->bat->batt_id].maint_b_vol_lvl,
-			di->bat->bat_type[
-				di->bat->batt_id].maint_b_cur_lvl);
+			di->bm->bat_type[
+				di->bm->batt_id].maint_b_vol_lvl,
+			di->bm->bat_type[
+				di->bm->batt_id].maint_b_cur_lvl);
 		abx500_chargalg_state_to(di, STATE_MAINTENANCE_B);
 		power_supply_changed(&di->chargalg_psy);
 		/* Intentional fallthrough*/
@@ -1479,10 +1479,10 @@ static void abx500_chargalg_algorithm(struct abx500_chargalg *di)
 
 	case STATE_TEMP_LOWHIGH_INIT:
 		abx500_chargalg_start_charging(di,
-			di->bat->bat_type[
-				di->bat->batt_id].low_high_vol_lvl,
-			di->bat->bat_type[
-				di->bat->batt_id].low_high_cur_lvl);
+			di->bm->bat_type[
+				di->bm->batt_id].low_high_vol_lvl,
+			di->bm->bat_type[
+				di->bm->batt_id].low_high_cur_lvl);
 		abx500_chargalg_stop_maintenance_timer(di);
 		di->charge_status = POWER_SUPPLY_STATUS_CHARGING;
 		abx500_chargalg_state_to(di, STATE_TEMP_LOWHIGH);
@@ -1543,11 +1543,11 @@ static void abx500_chargalg_periodic_work(struct work_struct *work)
 	if (di->chg_info.conn_chg)
 		queue_delayed_work(di->chargalg_wq,
 			&di->chargalg_periodic_work,
-			di->bat->interval_charging * HZ);
+			di->bm->interval_charging * HZ);
 	else
 		queue_delayed_work(di->chargalg_wq,
 			&di->chargalg_periodic_work,
-			di->bat->interval_not_charging * HZ);
+			di->bm->interval_not_charging * HZ);
 }
 
 /**
@@ -1614,7 +1614,7 @@ static int abx500_chargalg_get_property(struct power_supply *psy,
 		if (di->events.batt_ovv) {
 			val->intval = POWER_SUPPLY_HEALTH_OVERVOLTAGE;
 		} else if (di->events.btemp_underover) {
-			if (di->batt_data.temp <= di->bat->temp_under)
+			if (di->batt_data.temp <= di->bm->temp_under)
 				val->intval = POWER_SUPPLY_HEALTH_COLD;
 			else
 				val->intval = POWER_SUPPLY_HEALTH_OVERHEAT;
@@ -1814,10 +1814,10 @@ static int __devinit abx500_chargalg_probe(struct platform_device *pdev)
 		dev_err(&pdev->dev, "%s no mem for ab8500_chargalg\n", __func__);
 		return -ENOMEM;
 	}
-	di->bat = pdev->mfd_cell->platform_data;
-	if (!di->bat) {
+	di->bm = pdev->mfd_cell->platform_data;
+	if (!di->bm) {
 		if (np) {
-			ret = bmdevs_of_probe(&pdev->dev, np, &di->bat);
+			ret = ab8500_bm_of_probe(&pdev->dev, np, &di->bm);
 			if (ret) {
 				dev_err(&pdev->dev,
 					"failed to get battery information\n");
diff --git a/include/linux/mfd/abx500.h b/include/linux/mfd/abx500.h
index 2138bd33021a..6ce749a0e9d4 100644
--- a/include/linux/mfd/abx500.h
+++ b/include/linux/mfd/abx500.h
@@ -279,9 +279,9 @@ enum {
 	NTC_INTERNAL,
 };
 
-int bmdevs_of_probe(struct device *dev,
-		struct device_node *np,
-		struct abx500_bm_data **battery);
+int ab8500_bm_of_probe(struct device *dev,
+		       struct device_node *np,
+		       struct abx500_bm_data **battery);
 
 int abx500_set_register_interruptible(struct device *dev, u8 bank, u8 reg,
 	u8 value);
-- 
cgit v1.2.3


From 23a04f9f40f2b32ee593b768483105b1c776814d Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Thu, 29 Nov 2012 15:08:41 +0000
Subject: ab8500_bm: Always send platform specific battery information via
 pdata

Currently the AB8500 battery management subsystem receives platform
specific information via two different means depending on how the
platform is booted. If DT is not enabled, a reference to a *_bm_data
data structure containing each platform specific attribute is passed
though platform_data. However, if DT is enabled, then platform_data
is empty and the reference is gained though a DT specific probe
function. There are two issues here 1) the same reference is
being collected each time and 2) the DT way doesn't allow any
provisions to select different platform specific attributes, which
kind of defeats the object.

Cc: Samuel Ortiz <sameo@linux.intel.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/ab8500-core.c       |  8 --------
 drivers/power/ab8500_bmdata.c   | 33 ++++++++++++++-------------------
 drivers/power/ab8500_btemp.c    |  2 +-
 drivers/power/ab8500_charger.c  |  2 +-
 drivers/power/ab8500_fg.c       |  2 +-
 drivers/power/abx500_chargalg.c |  2 +-
 include/linux/mfd/abx500.h      |  2 +-
 7 files changed, 19 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/ab8500-core.c b/drivers/mfd/ab8500-core.c
index 5ec70f26b9d5..bbd49d796902 100644
--- a/drivers/mfd/ab8500-core.c
+++ b/drivers/mfd/ab8500-core.c
@@ -1044,40 +1044,32 @@ static struct mfd_cell __devinitdata ab8500_bm_devs[] = {
 		.of_compatible = "stericsson,ab8500-charger",
 		.num_resources = ARRAY_SIZE(ab8500_charger_resources),
 		.resources = ab8500_charger_resources,
-#ifndef CONFIG_OF
 		.platform_data = &ab8500_bm_data,
 		.pdata_size = sizeof(ab8500_bm_data),
-#endif
 	},
 	{
 		.name = "ab8500-btemp",
 		.of_compatible = "stericsson,ab8500-btemp",
 		.num_resources = ARRAY_SIZE(ab8500_btemp_resources),
 		.resources = ab8500_btemp_resources,
-#ifndef CONFIG_OF
 		.platform_data = &ab8500_bm_data,
 		.pdata_size = sizeof(ab8500_bm_data),
-#endif
 	},
 	{
 		.name = "ab8500-fg",
 		.of_compatible = "stericsson,ab8500-fg",
 		.num_resources = ARRAY_SIZE(ab8500_fg_resources),
 		.resources = ab8500_fg_resources,
-#ifndef CONFIG_OF
 		.platform_data = &ab8500_bm_data,
 		.pdata_size = sizeof(ab8500_bm_data),
-#endif
 	},
 	{
 		.name = "ab8500-chargalg",
 		.of_compatible = "stericsson,ab8500-chargalg",
 		.num_resources = ARRAY_SIZE(ab8500_chargalg_resources),
 		.resources = ab8500_chargalg_resources,
-#ifndef CONFIG_OF
 		.platform_data = &ab8500_bm_data,
 		.pdata_size = sizeof(ab8500_bm_data),
-#endif
 	},
 };
 
diff --git a/drivers/power/ab8500_bmdata.c b/drivers/power/ab8500_bmdata.c
index c2fb2c554019..6b772e5f515f 100644
--- a/drivers/power/ab8500_bmdata.c
+++ b/drivers/power/ab8500_bmdata.c
@@ -454,16 +454,13 @@ struct abx500_bm_data ab8500_bm_data = {
 
 int __devinit ab8500_bm_of_probe(struct device *dev,
 				 struct device_node *np,
-				 struct abx500_bm_data **battery)
+				 struct abx500_bm_data *bm)
 {
 	struct batres_vs_temp *tmp_batres_tbl;
 	struct device_node *np_bat_supply;
-	struct abx500_bm_data *bat;
 	const char *btech;
 	int i;
 
-	*battery = &ab8500_bm_data;
-
 	/* get phandle to 'battery-info' node */
 	np_bat_supply = of_parse_phandle(np, "battery", 0);
 	if (!np_bat_supply) {
@@ -477,16 +474,14 @@ int __devinit ab8500_bm_of_probe(struct device *dev,
 		return -EINVAL;
 	}
 
-	bat = *battery;
-
 	if (strncmp(btech, "LION", 4) == 0) {
-		bat->no_maintenance  = true;
-		bat->chg_unknown_bat = true;
-		bat->bat_type[BATTERY_UNKNOWN].charge_full_design = 2600;
-		bat->bat_type[BATTERY_UNKNOWN].termination_vol    = 4150;
-		bat->bat_type[BATTERY_UNKNOWN].recharge_vol	  = 4130;
-		bat->bat_type[BATTERY_UNKNOWN].normal_cur_lvl	  = 520;
-		bat->bat_type[BATTERY_UNKNOWN].normal_vol_lvl	  = 4200;
+		bm->no_maintenance  = true;
+		bm->chg_unknown_bat = true;
+		bm->bat_type[BATTERY_UNKNOWN].charge_full_design = 2600;
+		bm->bat_type[BATTERY_UNKNOWN].termination_vol    = 4150;
+		bm->bat_type[BATTERY_UNKNOWN].recharge_vol       = 4130;
+		bm->bat_type[BATTERY_UNKNOWN].normal_cur_lvl     = 520;
+		bm->bat_type[BATTERY_UNKNOWN].normal_vol_lvl     = 4200;
 	}
 
 	if (of_property_read_bool(np_bat_supply, "thermistor-on-batctrl")) {
@@ -495,15 +490,15 @@ int __devinit ab8500_bm_of_probe(struct device *dev,
 		else
 			tmp_batres_tbl = temp_to_batres_tbl_thermistor;
 	} else {
-		bat->n_btypes   = 4;
-		bat->bat_type   = bat_type_ext_thermistor;
-		bat->adc_therm  = ABx500_ADC_THERM_BATTEMP;
-		tmp_batres_tbl  = temp_to_batres_tbl_ext_thermistor;
+		bm->n_btypes   = 4;
+		bm->bat_type   = bat_type_ext_thermistor;
+		bm->adc_therm  = ABx500_ADC_THERM_BATTEMP;
+		tmp_batres_tbl = temp_to_batres_tbl_ext_thermistor;
 	}
 
 	/* select the battery resolution table */
-	for (i = 0; i < bat->n_btypes; ++i)
-		bat->bat_type[i]->batres_tbl = tmp_batres_tbl;
+	for (i = 0; i < bm->n_btypes; ++i)
+		bm->bat_type[i].batres_tbl = tmp_batres_tbl;
 
 	of_node_put(np_bat_supply);
 
diff --git a/drivers/power/ab8500_btemp.c b/drivers/power/ab8500_btemp.c
index 33ed0fccbd0e..c0f7dcceae45 100644
--- a/drivers/power/ab8500_btemp.c
+++ b/drivers/power/ab8500_btemp.c
@@ -988,7 +988,7 @@ static int __devinit ab8500_btemp_probe(struct platform_device *pdev)
 	di->bm = pdev->mfd_cell->platform_data;
 	if (!di->bm) {
 		if (np) {
-			ret = ab8500_bm_of_probe(&pdev->dev, np, &di->bm);
+			ret = ab8500_bm_of_probe(&pdev->dev, np, di->bm);
 			if (ret) {
 				dev_err(&pdev->dev,
 					"failed to get battery information\n");
diff --git a/drivers/power/ab8500_charger.c b/drivers/power/ab8500_charger.c
index 21dc8422778d..c1077df26783 100644
--- a/drivers/power/ab8500_charger.c
+++ b/drivers/power/ab8500_charger.c
@@ -2647,7 +2647,7 @@ static int __devinit ab8500_charger_probe(struct platform_device *pdev)
 	di->bm = pdev->mfd_cell->platform_data;
 	if (!di->bm) {
 		if (np) {
-			ret = ab8500_bm_of_probe(&pdev->dev, np, &di->bm);
+			ret = ab8500_bm_of_probe(&pdev->dev, np, di->bm);
 			if (ret) {
 				dev_err(&pdev->dev,
 					"failed to get battery information\n");
diff --git a/drivers/power/ab8500_fg.c b/drivers/power/ab8500_fg.c
index 4cf231375de3..8bb9df92627d 100644
--- a/drivers/power/ab8500_fg.c
+++ b/drivers/power/ab8500_fg.c
@@ -2460,7 +2460,7 @@ static int __devinit ab8500_fg_probe(struct platform_device *pdev)
 	di->bm = pdev->mfd_cell->platform_data;
 	if (!di->bm) {
 		if (np) {
-			ret = ab8500_bm_of_probe(&pdev->dev, np, &di->bm);
+			ret = ab8500_bm_of_probe(&pdev->dev, np, di->bm);
 			if (ret) {
 				dev_err(&pdev->dev,
 					"failed to get battery information\n");
diff --git a/drivers/power/abx500_chargalg.c b/drivers/power/abx500_chargalg.c
index ea2e2eb652ef..22a511c2a719 100644
--- a/drivers/power/abx500_chargalg.c
+++ b/drivers/power/abx500_chargalg.c
@@ -1817,7 +1817,7 @@ static int __devinit abx500_chargalg_probe(struct platform_device *pdev)
 	di->bm = pdev->mfd_cell->platform_data;
 	if (!di->bm) {
 		if (np) {
-			ret = ab8500_bm_of_probe(&pdev->dev, np, &di->bm);
+			ret = ab8500_bm_of_probe(&pdev->dev, np, di->bm);
 			if (ret) {
 				dev_err(&pdev->dev,
 					"failed to get battery information\n");
diff --git a/include/linux/mfd/abx500.h b/include/linux/mfd/abx500.h
index 6ce749a0e9d4..4906b1842d2f 100644
--- a/include/linux/mfd/abx500.h
+++ b/include/linux/mfd/abx500.h
@@ -281,7 +281,7 @@ enum {
 
 int ab8500_bm_of_probe(struct device *dev,
 		       struct device_node *np,
-		       struct abx500_bm_data **battery);
+		       struct abx500_bm_data *bm);
 
 int abx500_set_register_interruptible(struct device *dev, u8 bank, u8 reg,
 	u8 value);
-- 
cgit v1.2.3


From b59320cc5a5e6ceaa17f0895ffbe0711ebad7adf Mon Sep 17 00:00:00 2001
From: Daniel Jeong <gshark.jeong@gmail.com>
Date: Mon, 17 Dec 2012 10:24:06 +0900
Subject: regulator: lp8755: new driver for LP8755

This patch is for new lp8755 regulator dirver and
several unsed variables were deleted and then test was done.

LP8755 :
The LP8755 is a high performance power management unit.It contains
six step-down DC-DC converters which can can be filexibly bundled
together in multiphase converters as required by application.
www.ti.com

Signed-off-by: Daniel Jeong <gshark.jeong@gmail.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 drivers/regulator/Kconfig            |   9 +
 drivers/regulator/Makefile           |   1 +
 drivers/regulator/lp8755.c           | 580 +++++++++++++++++++++++++++++++++++
 include/linux/platform_data/lp8755.h |  71 +++++
 4 files changed, 661 insertions(+)
 create mode 100644 drivers/regulator/lp8755.c
 create mode 100644 include/linux/platform_data/lp8755.h

(limited to 'include/linux')

diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig
index 551a22b07538..f37a1c8bb929 100644
--- a/drivers/regulator/Kconfig
+++ b/drivers/regulator/Kconfig
@@ -277,6 +277,15 @@ config REGULATOR_LP872X
 	help
 	  This driver supports LP8720/LP8725 PMIC
 
+config REGULATOR_LP8755
+	tristate "TI LP8755 High Performance PMU driver"
+	depends on I2C
+	select REGMAP_I2C
+	help
+	  This driver supports LP8755 High Performance PMU driver. This
+	  chip contains six step-down DC/DC converters which can support
+	  9 mode multiphase configuration.
+
 config REGULATOR_LP8788
 	bool "TI LP8788 Power Regulators"
 	depends on MFD_LP8788
diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile
index b802b0c7fb02..6e8250382def 100644
--- a/drivers/regulator/Makefile
+++ b/drivers/regulator/Makefile
@@ -30,6 +30,7 @@ obj-$(CONFIG_REGULATOR_LP3972) += lp3972.o
 obj-$(CONFIG_REGULATOR_LP872X) += lp872x.o
 obj-$(CONFIG_REGULATOR_LP8788) += lp8788-buck.o
 obj-$(CONFIG_REGULATOR_LP8788) += lp8788-ldo.o
+obj-$(CONFIG_REGULATOR_LP8755) += lp8755.o
 obj-$(CONFIG_REGULATOR_MAX1586) += max1586.o
 obj-$(CONFIG_REGULATOR_MAX8649)	+= max8649.o
 obj-$(CONFIG_REGULATOR_MAX8660) += max8660.o
diff --git a/drivers/regulator/lp8755.c b/drivers/regulator/lp8755.c
new file mode 100644
index 000000000000..dbc4d1254ca1
--- /dev/null
+++ b/drivers/regulator/lp8755.c
@@ -0,0 +1,580 @@
+/*
+ * LP8755 High Performance Power Management Unit : System Interface Driver
+ * (based on rev. 0.26)
+ * Copyright 2012 Texas Instruments
+ *
+ * Author: Daniel(Geon Si) Jeong <daniel.jeong@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/i2c.h>
+#include <linux/err.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+#include <linux/gpio.h>
+#include <linux/regmap.h>
+#include <linux/delay.h>
+#include <linux/uaccess.h>
+#include <linux/regulator/driver.h>
+#include <linux/regulator/machine.h>
+#include <linux/platform_data/lp8755.h>
+
+#define LP8755_REG_BUCK0	0x00
+#define LP8755_REG_BUCK1	0x03
+#define LP8755_REG_BUCK2	0x04
+#define LP8755_REG_BUCK3	0x01
+#define LP8755_REG_BUCK4	0x05
+#define LP8755_REG_BUCK5	0x02
+#define LP8755_REG_MAX		0xFF
+
+#define LP8755_BUCK_EN_M	BIT(7)
+#define LP8755_BUCK_LINEAR_OUT_MAX	0x76
+#define LP8755_BUCK_VOUT_M	0x7F
+
+enum bucks {
+	BUCK0 = 0,
+	BUCK1,
+	BUCK2,
+	BUCK3,
+	BUCK4,
+	BUCK5,
+};
+
+struct lp8755_mphase {
+	int nreg;
+	int buck_num[LP8755_BUCK_MAX];
+};
+
+struct lp8755_chip {
+	struct device *dev;
+	struct regmap *regmap;
+	struct lp8755_platform_data *pdata;
+
+	int irq;
+	unsigned int irqmask;
+
+	int mphase;
+	struct regulator_dev *rdev[LP8755_BUCK_MAX];
+};
+
+/**
+ *lp8755_read : read a single register value from lp8755.
+ *@pchip : device to read from
+ *@reg   : register to read from
+ *@val   : pointer to store read value
+ */
+static int lp8755_read(struct lp8755_chip *pchip, unsigned int reg,
+		       unsigned int *val)
+{
+	return regmap_read(pchip->regmap, reg, val);
+}
+
+/**
+ *lp8755_write : write a single register value to lp8755.
+ *@pchip : device to write to
+ *@reg   : register to write to
+ *@val   : value to be written
+ */
+static int lp8755_write(struct lp8755_chip *pchip, unsigned int reg,
+			unsigned int val)
+{
+	return regmap_write(pchip->regmap, reg, val);
+}
+
+/**
+ *lp8755_update_bits : set the values of bit fields in lp8755 register.
+ *@pchip : device to read from
+ *@reg   : register to update
+ *@mask  : bitmask to be changed
+ *@val   : value for bitmask
+ */
+static int lp8755_update_bits(struct lp8755_chip *pchip, unsigned int reg,
+			      unsigned int mask, unsigned int val)
+{
+	return regmap_update_bits(pchip->regmap, reg, mask, val);
+}
+
+static int lp8755_buck_enable_time(struct regulator_dev *rdev)
+{
+	int ret;
+	unsigned int regval;
+	enum lp8755_bucks id = rdev_get_id(rdev);
+	struct lp8755_chip *pchip = rdev_get_drvdata(rdev);
+
+	ret = lp8755_read(pchip, 0x12 + id, &regval);
+	if (ret < 0) {
+		dev_err(pchip->dev, "i2c acceess error %s\n", __func__);
+		return ret;
+	}
+	return (regval & 0xff) * 100;
+}
+
+static int lp8755_buck_set_mode(struct regulator_dev *rdev, unsigned int mode)
+{
+	int ret;
+	unsigned int regbval = 0x0;
+	enum lp8755_bucks id = rdev_get_id(rdev);
+	struct lp8755_chip *pchip = rdev_get_drvdata(rdev);
+
+	switch (mode) {
+	case REGULATOR_MODE_FAST:
+		/* forced pwm mode */
+		regbval = (0x01 << id);
+		break;
+	case REGULATOR_MODE_NORMAL:
+		/* enable automatic pwm/pfm mode */
+		ret = lp8755_update_bits(pchip, 0x08 + id, 0x20, 0x00);
+		if (ret < 0)
+			goto err_i2c;
+		break;
+	case REGULATOR_MODE_IDLE:
+		/* enable automatic pwm/pfm/lppfm mode */
+		ret = lp8755_update_bits(pchip, 0x08 + id, 0x20, 0x20);
+		if (ret < 0)
+			goto err_i2c;
+
+		ret = lp8755_update_bits(pchip, 0x10, 0x01, 0x01);
+		if (ret < 0)
+			goto err_i2c;
+		break;
+	default:
+		dev_err(pchip->dev, "Not supported buck mode %s\n", __func__);
+		/* forced pwm mode */
+		regbval = (0x01 << id);
+	}
+
+	ret = lp8755_update_bits(pchip, 0x06, 0x01 << id, regbval);
+	if (ret < 0)
+		goto err_i2c;
+	return ret;
+err_i2c:
+	dev_err(pchip->dev, "i2c acceess error %s\n", __func__);
+	return ret;
+}
+
+static unsigned int lp8755_buck_get_mode(struct regulator_dev *rdev)
+{
+	int ret;
+	unsigned int regval;
+	enum lp8755_bucks id = rdev_get_id(rdev);
+	struct lp8755_chip *pchip = rdev_get_drvdata(rdev);
+
+	ret = lp8755_read(pchip, 0x06, &regval);
+	if (ret < 0)
+		goto err_i2c;
+
+	/* mode fast means forced pwm mode */
+	if (regval & (0x01 << id))
+		return REGULATOR_MODE_FAST;
+
+	ret = lp8755_read(pchip, 0x08 + id, &regval);
+	if (ret < 0)
+		goto err_i2c;
+
+	/* mode idle means automatic pwm/pfm/lppfm mode */
+	if (regval & 0x20)
+		return REGULATOR_MODE_IDLE;
+
+	/* mode normal means automatic pwm/pfm mode */
+	return REGULATOR_MODE_NORMAL;
+
+err_i2c:
+	dev_err(pchip->dev, "i2c acceess error %s\n", __func__);
+	return 0;
+}
+
+static int lp8755_buck_set_ramp(struct regulator_dev *rdev, int ramp)
+{
+	int ret;
+	unsigned int regval = 0x00;
+	enum lp8755_bucks id = rdev_get_id(rdev);
+	struct lp8755_chip *pchip = rdev_get_drvdata(rdev);
+
+	/* uV/us */
+	switch (ramp) {
+	case 0 ... 230:
+		regval = 0x07;
+		break;
+	case 231 ... 470:
+		regval = 0x06;
+		break;
+	case 471 ... 940:
+		regval = 0x05;
+		break;
+	case 941 ... 1900:
+		regval = 0x04;
+		break;
+	case 1901 ... 3800:
+		regval = 0x03;
+		break;
+	case 3801 ... 7500:
+		regval = 0x02;
+		break;
+	case 7501 ... 15000:
+		regval = 0x01;
+		break;
+	case 15001 ... 30000:
+		regval = 0x00;
+		break;
+	default:
+		dev_err(pchip->dev,
+			"Not supported ramp value %d %s\n", ramp, __func__);
+		return -EINVAL;
+	}
+
+	ret = lp8755_update_bits(pchip, 0x07 + id, 0x07, regval);
+	if (ret < 0)
+		goto err_i2c;
+	return ret;
+err_i2c:
+	dev_err(pchip->dev, "i2c acceess error %s\n", __func__);
+	return ret;
+}
+
+static struct regulator_ops lp8755_buck_ops = {
+	.list_voltage = regulator_list_voltage_linear,
+	.set_voltage_sel = regulator_set_voltage_sel_regmap,
+	.get_voltage_sel = regulator_get_voltage_sel_regmap,
+	.enable = regulator_enable_regmap,
+	.disable = regulator_disable_regmap,
+	.is_enabled = regulator_is_enabled_regmap,
+	.enable_time = lp8755_buck_enable_time,
+	.set_mode = lp8755_buck_set_mode,
+	.get_mode = lp8755_buck_get_mode,
+	.set_ramp_delay = lp8755_buck_set_ramp,
+};
+
+#define lp8755_rail(_id) "lp8755_buck"#_id
+#define lp8755_buck_init(_id)\
+{\
+	.constraints = {\
+		.name = lp8755_rail(_id),\
+		.valid_ops_mask = REGULATOR_CHANGE_VOLTAGE,\
+		.min_uV = 500000,\
+		.max_uV = 1675000,\
+	},\
+}
+
+static struct regulator_init_data lp8755_reg_default[LP8755_BUCK_MAX] = {
+	[BUCK0] = lp8755_buck_init(0),
+	[BUCK1] = lp8755_buck_init(1),
+	[BUCK2] = lp8755_buck_init(2),
+	[BUCK3] = lp8755_buck_init(3),
+	[BUCK4] = lp8755_buck_init(4),
+	[BUCK5] = lp8755_buck_init(5),
+};
+
+static const struct lp8755_mphase mphase_buck[MPHASE_CONF_MAX] = {
+	{3, {BUCK0, BUCK3, BUCK5}
+	 },
+	{6, {BUCK0, BUCK1, BUCK2, BUCK3, BUCK4, BUCK5}
+	 },
+	{5, {BUCK0, BUCK2, BUCK3, BUCK4, BUCK5}
+	 },
+	{4, {BUCK0, BUCK3, BUCK4, BUCK5}
+	 },
+	{3, {BUCK0, BUCK4, BUCK5}
+	 },
+	{2, {BUCK0, BUCK5}
+	 },
+	{1, {BUCK0}
+	 },
+	{2, {BUCK0, BUCK3}
+	 },
+	{4, {BUCK0, BUCK2, BUCK3, BUCK5}
+	 },
+};
+
+static int lp8755_init_data(struct lp8755_chip *pchip)
+{
+	unsigned int regval;
+	int ret, icnt, buck_num;
+	struct lp8755_platform_data *pdata = pchip->pdata;
+
+	/* read back  muti-phase configuration */
+	ret = lp8755_read(pchip, 0x3D, &regval);
+	if (ret < 0)
+		goto out_i2c_error;
+	pchip->mphase = regval & 0x07;
+
+	/* set default data based on multi-phase config */
+	for (icnt = 0; icnt < mphase_buck[pchip->mphase].nreg; icnt++) {
+		buck_num = mphase_buck[pchip->mphase].buck_num[icnt];
+		pdata->buck_data[buck_num] = &lp8755_reg_default[buck_num];
+	}
+	return ret;
+
+out_i2c_error:
+	dev_err(pchip->dev, "i2c acceess error %s\n", __func__);
+	return ret;
+}
+
+#define lp8755_buck_desc(_id)\
+{\
+	.name = lp8755_rail(_id),\
+	.id   = LP8755_BUCK##_id,\
+	.ops  = &lp8755_buck_ops,\
+	.n_voltages = LP8755_BUCK_LINEAR_OUT_MAX+1,\
+	.uV_step = 10000,\
+	.min_uV = 500000,\
+	.type = REGULATOR_VOLTAGE,\
+	.owner = THIS_MODULE,\
+	.enable_reg = LP8755_REG_BUCK##_id,\
+	.enable_mask = LP8755_BUCK_EN_M,\
+	.vsel_reg = LP8755_REG_BUCK##_id,\
+	.vsel_mask = LP8755_BUCK_VOUT_M,\
+}
+
+static struct regulator_desc lp8755_regulators[] = {
+	lp8755_buck_desc(0),
+	lp8755_buck_desc(1),
+	lp8755_buck_desc(2),
+	lp8755_buck_desc(3),
+	lp8755_buck_desc(4),
+	lp8755_buck_desc(5),
+};
+
+static int lp8755_regulator_init(struct lp8755_chip *pchip)
+{
+	int ret, icnt, buck_num;
+	struct lp8755_platform_data *pdata = pchip->pdata;
+	struct regulator_config rconfig = { };
+
+	rconfig.regmap = pchip->regmap;
+	rconfig.dev = pchip->dev;
+	rconfig.driver_data = pchip;
+
+	for (icnt = 0; icnt < mphase_buck[pchip->mphase].nreg; icnt++) {
+		buck_num = mphase_buck[pchip->mphase].buck_num[icnt];
+		rconfig.init_data = pdata->buck_data[buck_num];
+		rconfig.of_node = pchip->dev->of_node;
+		pchip->rdev[buck_num] =
+		    regulator_register(&lp8755_regulators[buck_num], &rconfig);
+		if (IS_ERR(pchip->rdev[buck_num])) {
+			ret = PTR_ERR(pchip->rdev[buck_num]);
+			dev_err(pchip->dev, "regulator init failed: buck 0\n");
+			goto err_buck;
+		}
+	}
+
+	return 0;
+
+err_buck:
+	for (icnt = 0; icnt < LP8755_BUCK_MAX; icnt++)
+		if (pchip->rdev[icnt] != NULL)
+			regulator_unregister(pchip->rdev[icnt]);
+	return ret;
+}
+
+static irqreturn_t lp8755_irq_handler(int irq, void *data)
+{
+	int ret, icnt;
+	unsigned int flag0, flag1;
+	struct lp8755_chip *pchip = data;
+
+	/* read flag0 register */
+	ret = lp8755_read(pchip, 0x0D, &flag0);
+	if (ret < 0)
+		goto err_i2c;
+	/* clear flag register to pull up int. pin */
+	ret = lp8755_write(pchip, 0x0D, 0x00);
+	if (ret < 0)
+		goto err_i2c;
+
+	/* sent power fault detection event to specific regulator */
+	for (icnt = 0; icnt < 6; icnt++)
+		if ((flag0 & (0x4 << icnt))
+		    && (pchip->irqmask & (0x04 << icnt))
+		    && (pchip->rdev[icnt] != NULL))
+			regulator_notifier_call_chain(pchip->rdev[icnt],
+						      LP8755_EVENT_PWR_FAULT,
+						      NULL);
+
+	/* read flag1 register */
+	ret = lp8755_read(pchip, 0x0E, &flag1);
+	if (ret < 0)
+		goto err_i2c;
+	/* clear flag register to pull up int. pin */
+	ret = lp8755_write(pchip, 0x0E, 0x00);
+	if (ret < 0)
+		goto err_i2c;
+
+	/* send OCP event to all regualtor devices */
+	if ((flag1 & 0x01) && (pchip->irqmask & 0x01))
+		for (icnt = 0; icnt < LP8755_BUCK_MAX; icnt++)
+			if (pchip->rdev[icnt] != NULL)
+				regulator_notifier_call_chain(pchip->rdev[icnt],
+							      LP8755_EVENT_OCP,
+							      NULL);
+
+	/* send OVP event to all regualtor devices */
+	if ((flag1 & 0x02) && (pchip->irqmask & 0x02))
+		for (icnt = 0; icnt < LP8755_BUCK_MAX; icnt++)
+			if (pchip->rdev[icnt] != NULL)
+				regulator_notifier_call_chain(pchip->rdev[icnt],
+							      LP8755_EVENT_OVP,
+							      NULL);
+	return IRQ_HANDLED;
+
+err_i2c:
+	dev_err(pchip->dev, "i2c acceess error %s\n", __func__);
+	return IRQ_NONE;
+}
+
+static int lp8755_int_config(struct lp8755_chip *pchip)
+{
+	int ret;
+	unsigned int regval;
+
+	if (pchip->irq == 0) {
+		dev_warn(pchip->dev, "not use interrupt : %s\n", __func__);
+		return 0;
+	}
+
+	ret = lp8755_read(pchip, 0x0F, &regval);
+	if (ret < 0)
+		goto err_i2c;
+	pchip->irqmask = regval;
+	ret = request_threaded_irq(pchip->irq, NULL, lp8755_irq_handler,
+				   IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
+				   "lp8755-irq", pchip);
+	if (ret)
+		return ret;
+
+	return ret;
+
+err_i2c:
+	dev_err(pchip->dev, "i2c acceess error %s\n", __func__);
+	return ret;
+}
+
+static const struct regmap_config lp8755_regmap = {
+	.reg_bits = 8,
+	.val_bits = 8,
+	.max_register = LP8755_REG_MAX,
+};
+
+static int lp8755_probe(struct i2c_client *client,
+			const struct i2c_device_id *id)
+{
+	int ret, icnt;
+	struct lp8755_chip *pchip;
+	struct lp8755_platform_data *pdata = client->dev.platform_data;
+
+	if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) {
+		dev_err(&client->dev, "i2c functionality check fail.\n");
+		return -EOPNOTSUPP;
+	}
+
+	pchip = devm_kzalloc(&client->dev,
+			     sizeof(struct lp8755_chip), GFP_KERNEL);
+	if (!pchip)
+		return -ENOMEM;
+
+	pchip->dev = &client->dev;
+	pchip->regmap = devm_regmap_init_i2c(client, &lp8755_regmap);
+	if (IS_ERR(pchip->regmap)) {
+		ret = PTR_ERR(pchip->regmap);
+		dev_err(&client->dev, "fail to allocate regmap %d\n", ret);
+		return ret;
+	}
+	i2c_set_clientdata(client, pchip);
+
+	if (pdata != NULL) {
+		pchip->pdata = pdata;
+		pchip->mphase = pdata->mphase;
+	} else {
+		pchip->pdata = devm_kzalloc(pchip->dev,
+					    sizeof(struct lp8755_platform_data),
+					    GFP_KERNEL);
+		if (!pchip->pdata)
+			return -ENOMEM;
+		ret = lp8755_init_data(pchip);
+		if (ret < 0)
+			goto err_chip_init;
+	}
+
+	ret = lp8755_regulator_init(pchip);
+	if (ret < 0)
+		goto err_regulator;
+
+	pchip->irq = client->irq;
+	ret = lp8755_int_config(pchip);
+	if (ret < 0)
+		goto err_irq;
+
+	return ret;
+
+err_irq:
+	dev_err(&client->dev, "fail to irq config\n");
+
+	for (icnt = 0; icnt < mphase_buck[pchip->mphase].nreg; icnt++)
+		regulator_unregister(pchip->rdev[icnt]);
+
+err_regulator:
+	dev_err(&client->dev, "fail to initialize regulators\n");
+	/* output disable */
+	for (icnt = 0; icnt < 0x06; icnt++)
+		lp8755_write(pchip, icnt, 0x00);
+
+err_chip_init:
+	dev_err(&client->dev, "fail to initialize chip\n");
+	return ret;
+}
+
+static int lp8755_remove(struct i2c_client *client)
+{
+	int icnt;
+	struct lp8755_chip *pchip = i2c_get_clientdata(client);
+
+	for (icnt = 0; icnt < mphase_buck[pchip->mphase].nreg; icnt++)
+		regulator_unregister(pchip->rdev[icnt]);
+
+	for (icnt = 0; icnt < 0x06; icnt++)
+		lp8755_write(pchip, icnt, 0x00);
+
+	if (pchip->irq != 0)
+		free_irq(pchip->irq, pchip);
+
+	return 0;
+}
+
+static const struct i2c_device_id lp8755_id[] = {
+	{LP8755_NAME, 0},
+	{}
+};
+
+MODULE_DEVICE_TABLE(i2c, lp8755_id);
+
+static struct i2c_driver lp8755_i2c_driver = {
+	.driver = {
+		   .name = LP8755_NAME,
+		   },
+	.probe = lp8755_probe,
+	.remove = __devexit_p(lp8755_remove),
+	.id_table = lp8755_id,
+};
+
+static int __init lp8755_init(void)
+{
+	return i2c_add_driver(&lp8755_i2c_driver);
+}
+
+subsys_initcall(lp8755_init);
+
+static void __exit lp8755_exit(void)
+{
+	i2c_del_driver(&lp8755_i2c_driver);
+}
+
+module_exit(lp8755_exit);
+
+MODULE_DESCRIPTION("Texas Instruments lp8755 driver");
+MODULE_AUTHOR("Daniel Jeong <daniel.jeong@ti.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/platform_data/lp8755.h b/include/linux/platform_data/lp8755.h
new file mode 100644
index 000000000000..a7fd0776c9bf
--- /dev/null
+++ b/include/linux/platform_data/lp8755.h
@@ -0,0 +1,71 @@
+/*
+ * LP8755 High Performance Power Management Unit Driver:System Interface Driver
+ *
+ *			Copyright (C) 2012 Texas Instruments
+ *
+ * Author: Daniel(Geon Si) Jeong <daniel.jeong@ti.com>
+ *             G.Shark Jeong <gshark.jeong@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#ifndef _LP8755_H
+#define _LP8755_H
+
+#include <linux/regulator/consumer.h>
+
+#define LP8755_NAME "lp8755-regulator"
+/*
+ *PWR FAULT : power fault detected
+ *OCP : over current protect activated
+ *OVP : over voltage protect activated
+ *TEMP_WARN : thermal warning
+ *TEMP_SHDN : thermal shutdonw detected
+ *I_LOAD : current measured
+ */
+#define LP8755_EVENT_PWR_FAULT REGULATOR_EVENT_FAIL
+#define LP8755_EVENT_OCP REGULATOR_EVENT_OVER_CURRENT
+#define LP8755_EVENT_OVP 0x10000
+#define LP8755_EVENT_TEMP_WARN 0x2000
+#define LP8755_EVENT_TEMP_SHDN REGULATOR_EVENT_OVER_TEMP
+#define LP8755_EVENT_I_LOAD	0x40000
+
+enum lp8755_bucks {
+	LP8755_BUCK0 = 0,
+	LP8755_BUCK1,
+	LP8755_BUCK2,
+	LP8755_BUCK3,
+	LP8755_BUCK4,
+	LP8755_BUCK5,
+	LP8755_BUCK_MAX,
+};
+
+/**
+ * multiphase configuration options
+ */
+enum lp8755_mphase_config {
+	MPHASE_CONF0,
+	MPHASE_CONF1,
+	MPHASE_CONF2,
+	MPHASE_CONF3,
+	MPHASE_CONF4,
+	MPHASE_CONF5,
+	MPHASE_CONF6,
+	MPHASE_CONF7,
+	MPHASE_CONF8,
+	MPHASE_CONF_MAX
+};
+
+/**
+ * struct lp8755_platform_data
+ * @mphase_type : Multiphase Switcher Configurations.
+ * @buck_data   : buck0~6 init voltage in uV
+ */
+struct lp8755_platform_data {
+	int mphase;
+	struct regulator_init_data *buck_data[LP8755_BUCK_MAX];
+};
+#endif
-- 
cgit v1.2.3


From c8520b4c5d25eb7b8b54f1ae9ba7da71375f2b2c Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Tue, 18 Dec 2012 09:30:10 +0800
Subject: regulator: core: Allow specify apply_[reg|bit] for regmap based
 voltage_sel operations

Some DVM regulators needs to update apply_bit after setting vsel_reg to
initiate voltage change on the output.  This patch adds apply_reg and
apply_bit to struct regulator_desc and update
regulator_set_voltage_sel_regmap() to set apply_bit of apply_reg when
apply_bit is set.

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 drivers/regulator/core.c         | 12 +++++++++++-
 include/linux/regulator/driver.h |  6 ++++++
 2 files changed, 17 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index 0f65b246cc0c..571ce0f1c06e 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -2074,10 +2074,20 @@ EXPORT_SYMBOL_GPL(regulator_get_voltage_sel_regmap);
  */
 int regulator_set_voltage_sel_regmap(struct regulator_dev *rdev, unsigned sel)
 {
+	int ret;
+
 	sel <<= ffs(rdev->desc->vsel_mask) - 1;
 
-	return regmap_update_bits(rdev->regmap, rdev->desc->vsel_reg,
+	ret = regmap_update_bits(rdev->regmap, rdev->desc->vsel_reg,
 				  rdev->desc->vsel_mask, sel);
+	if (ret)
+		return ret;
+
+	if (rdev->desc->apply_bit)
+		ret = regmap_update_bits(rdev->regmap, rdev->desc->apply_reg,
+					 rdev->desc->apply_bit,
+					 rdev->desc->apply_bit);
+	return ret;
 }
 EXPORT_SYMBOL_GPL(regulator_set_voltage_sel_regmap);
 
diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index d10bb0f39c5e..23070fd83872 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h
@@ -193,6 +193,10 @@ enum regulator_type {
  *
  * @vsel_reg: Register for selector when using regulator_regmap_X_voltage_
  * @vsel_mask: Mask for register bitfield used for selector
+ * @apply_reg: Register for initiate voltage change on the output when
+ *                using regulator_set_voltage_sel_regmap
+ * @apply_bit: Register bitfield used for initiate voltage change on the
+ *                output when using regulator_set_voltage_sel_regmap
  * @enable_reg: Register for control when using regmap enable/disable ops
  * @enable_mask: Mask for control when using regmap enable/disable ops
  *
@@ -218,6 +222,8 @@ struct regulator_desc {
 
 	unsigned int vsel_reg;
 	unsigned int vsel_mask;
+	unsigned int apply_reg;
+	unsigned int apply_bit;
 	unsigned int enable_reg;
 	unsigned int enable_mask;
 	unsigned int bypass_reg;
-- 
cgit v1.2.3


From 176eb8dc91bd91e7b1beb1761ee29f016e8c09e8 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <w.sang@pengutronix.de>
Date: Mon, 24 Dec 2012 09:49:55 -0800
Subject: Input: adxl34x - make platform_data include self contained

Since it suggests to use defines from input.h (ABS_X, ...), also include
the file to make them available.

Signed-off-by: Wolfram Sang <w.sang@pengutronix.de>
Acked-by: Michael Hennerich <michael.hennerich@analog.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 include/linux/input/adxl34x.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/input/adxl34x.h b/include/linux/input/adxl34x.h
index 57e01a7cb006..010d98175efa 100644
--- a/include/linux/input/adxl34x.h
+++ b/include/linux/input/adxl34x.h
@@ -13,6 +13,8 @@
 #ifndef __LINUX_INPUT_ADXL34X_H__
 #define __LINUX_INPUT_ADXL34X_H__
 
+#include <linux/input.h>
+
 struct adxl34x_platform_data {
 
 	/*
-- 
cgit v1.2.3


From 4bf84c35c65f36a344fb7a6cde6274df4120efb8 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@resnulli.us>
Date: Thu, 27 Dec 2012 23:49:37 +0000
Subject: net: add change_carrier netdev op

This allows a driver to register change_carrier callback which will be
called whenever user will like to change carrier state. This is useful
for devices like dummy, gre, team and so on.

Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Acked-by: Flavio Leitner <fbl@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 12 ++++++++++++
 net/core/dev.c            | 19 +++++++++++++++++++
 2 files changed, 31 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index c599e4782d45..0e1b92a0c1ec 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -891,6 +891,14 @@ struct netdev_fcoe_hbainfo {
  * int (*ndo_bridge_setlink)(struct net_device *dev, struct nlmsghdr *nlh)
  * int (*ndo_bridge_getlink)(struct sk_buff *skb, u32 pid, u32 seq,
  *			     struct net_device *dev)
+ *
+ * int (*ndo_change_carrier)(struct net_device *dev, bool new_carrier);
+ *	Called to change device carrier. Soft-devices (like dummy, team, etc)
+ *	which do not represent real hardware may define this to allow their
+ *	userspace components to manage their virtual carrier state. Devices
+ *	that determine carrier state from physical hardware properties (eg
+ *	network cables) or protocol-dependent mechanisms (eg
+ *	USB_CDC_NOTIFY_NETWORK_CONNECTION) should NOT implement this function.
  */
 struct net_device_ops {
 	int			(*ndo_init)(struct net_device *dev);
@@ -1008,6 +1016,8 @@ struct net_device_ops {
 	int			(*ndo_bridge_getlink)(struct sk_buff *skb,
 						      u32 pid, u32 seq,
 						      struct net_device *dev);
+	int			(*ndo_change_carrier)(struct net_device *dev,
+						      bool new_carrier);
 };
 
 /*
@@ -2194,6 +2204,8 @@ extern int		dev_set_mtu(struct net_device *, int);
 extern void		dev_set_group(struct net_device *, int);
 extern int		dev_set_mac_address(struct net_device *,
 					    struct sockaddr *);
+extern int		dev_change_carrier(struct net_device *,
+					   bool new_carrier);
 extern int		dev_hard_start_xmit(struct sk_buff *skb,
 					    struct net_device *dev,
 					    struct netdev_queue *txq);
diff --git a/net/core/dev.c b/net/core/dev.c
index 515473ee52cb..21c5b976dcf3 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5027,6 +5027,25 @@ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
 }
 EXPORT_SYMBOL(dev_set_mac_address);
 
+/**
+ *	dev_change_carrier - Change device carrier
+ *	@dev: device
+ *	@new_carries: new value
+ *
+ *	Change device carrier
+ */
+int dev_change_carrier(struct net_device *dev, bool new_carrier)
+{
+	const struct net_device_ops *ops = dev->netdev_ops;
+
+	if (!ops->ndo_change_carrier)
+		return -EOPNOTSUPP;
+	if (!netif_device_present(dev))
+		return -ENODEV;
+	return ops->ndo_change_carrier(dev, new_carrier);
+}
+EXPORT_SYMBOL(dev_change_carrier);
+
 /*
  *	Perform the SIOCxIFxxx calls, inside rcu_read_lock()
  */
-- 
cgit v1.2.3


From 2681128f0ced8aa4e66f221197e183cc16d244fe Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 29 Dec 2012 16:02:43 +0000
Subject: veth: reduce stat overhead

veth stats are a bit bloated. There is no need to account transmit
and receive stats, since they are absolutely symmetric.

Also use a per device atomic64_t for the dropped counter, as it
should never be used in fast path.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/veth.c        | 115 +++++++++++++++++++---------------------------
 include/linux/netdevice.h |   1 +
 2 files changed, 48 insertions(+), 68 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index 95814d9747ef..c048f8d27bbf 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -25,18 +25,15 @@
 #define MIN_MTU 68		/* Min L3 MTU */
 #define MAX_MTU 65535		/* Max L3 MTU (arbitrary) */
 
-struct veth_net_stats {
-	u64			rx_packets;
-	u64			rx_bytes;
-	u64			tx_packets;
-	u64			tx_bytes;
-	u64			rx_dropped;
+struct pcpu_vstats {
+	u64			packets;
+	u64			bytes;
 	struct u64_stats_sync	syncp;
 };
 
 struct veth_priv {
-	struct net_device *peer;
-	struct veth_net_stats __percpu *stats;
+	struct net_device	*peer;
+	atomic64_t		dropped;
 };
 
 /*
@@ -107,50 +104,30 @@ static const struct ethtool_ops veth_ethtool_ops = {
 	.get_ethtool_stats	= veth_get_ethtool_stats,
 };
 
-/*
- * xmit
- */
-
 static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
 {
-	struct net_device *rcv = NULL;
-	struct veth_priv *priv, *rcv_priv;
-	struct veth_net_stats *stats, *rcv_stats;
-	int length;
-
-	priv = netdev_priv(dev);
-	rcv = priv->peer;
-	rcv_priv = netdev_priv(rcv);
-
-	stats = this_cpu_ptr(priv->stats);
-	rcv_stats = this_cpu_ptr(rcv_priv->stats);
+	struct veth_priv *priv = netdev_priv(dev);
+	struct net_device *rcv = priv->peer;
+	int length = skb->len;
 
 	/* don't change ip_summed == CHECKSUM_PARTIAL, as that
-	   will cause bad checksum on forwarded packets */
+	 * will cause bad checksum on forwarded packets
+	 */
 	if (skb->ip_summed == CHECKSUM_NONE &&
 	    rcv->features & NETIF_F_RXCSUM)
 		skb->ip_summed = CHECKSUM_UNNECESSARY;
 
-	length = skb->len;
-	if (dev_forward_skb(rcv, skb) != NET_RX_SUCCESS)
-		goto rx_drop;
-
-	u64_stats_update_begin(&stats->syncp);
-	stats->tx_bytes += length;
-	stats->tx_packets++;
-	u64_stats_update_end(&stats->syncp);
+	if (likely(dev_forward_skb(rcv, skb) == NET_RX_SUCCESS)) {
+		struct pcpu_vstats *stats = this_cpu_ptr(dev->vstats);
 
-	u64_stats_update_begin(&rcv_stats->syncp);
-	rcv_stats->rx_bytes += length;
-	rcv_stats->rx_packets++;
-	u64_stats_update_end(&rcv_stats->syncp);
-
-	return NETDEV_TX_OK;
+		u64_stats_update_begin(&stats->syncp);
+		stats->bytes += length;
+		stats->packets++;
+		u64_stats_update_end(&stats->syncp);
+	} else {
+		atomic64_inc(&priv->dropped);
+	}
 
-rx_drop:
-	u64_stats_update_begin(&rcv_stats->syncp);
-	rcv_stats->rx_dropped++;
-	u64_stats_update_end(&rcv_stats->syncp);
 	return NETDEV_TX_OK;
 }
 
@@ -158,32 +135,42 @@ rx_drop:
  * general routines
  */
 
-static struct rtnl_link_stats64 *veth_get_stats64(struct net_device *dev,
-						  struct rtnl_link_stats64 *tot)
+static u64 veth_stats_one(struct pcpu_vstats *result, struct net_device *dev)
 {
 	struct veth_priv *priv = netdev_priv(dev);
 	int cpu;
 
+	result->packets = 0;
+	result->bytes = 0;
 	for_each_possible_cpu(cpu) {
-		struct veth_net_stats *stats = per_cpu_ptr(priv->stats, cpu);
-		u64 rx_packets, rx_bytes, rx_dropped;
-		u64 tx_packets, tx_bytes;
+		struct pcpu_vstats *stats = per_cpu_ptr(dev->vstats, cpu);
+		u64 packets, bytes;
 		unsigned int start;
 
 		do {
 			start = u64_stats_fetch_begin_bh(&stats->syncp);
-			rx_packets = stats->rx_packets;
-			tx_packets = stats->tx_packets;
-			rx_bytes = stats->rx_bytes;
-			tx_bytes = stats->tx_bytes;
-			rx_dropped = stats->rx_dropped;
+			packets = stats->packets;
+			bytes = stats->bytes;
 		} while (u64_stats_fetch_retry_bh(&stats->syncp, start));
-		tot->rx_packets += rx_packets;
-		tot->tx_packets += tx_packets;
-		tot->rx_bytes   += rx_bytes;
-		tot->tx_bytes   += tx_bytes;
-		tot->rx_dropped += rx_dropped;
+		result->packets += packets;
+		result->bytes += bytes;
 	}
+	return atomic64_read(&priv->dropped);
+}
+
+static struct rtnl_link_stats64 *veth_get_stats64(struct net_device *dev,
+						  struct rtnl_link_stats64 *tot)
+{
+	struct veth_priv *priv = netdev_priv(dev);
+	struct pcpu_vstats one;
+
+	tot->tx_dropped = veth_stats_one(&one, dev);
+	tot->tx_bytes = one.bytes;
+	tot->tx_packets = one.packets;
+
+	tot->rx_dropped = veth_stats_one(&one, priv->peer);
+	tot->rx_bytes = one.bytes;
+	tot->rx_packets = one.packets;
 
 	return tot;
 }
@@ -228,24 +215,16 @@ static int veth_change_mtu(struct net_device *dev, int new_mtu)
 
 static int veth_dev_init(struct net_device *dev)
 {
-	struct veth_net_stats __percpu *stats;
-	struct veth_priv *priv;
-
-	stats = alloc_percpu(struct veth_net_stats);
-	if (stats == NULL)
+	dev->vstats = alloc_percpu(struct pcpu_vstats);
+	if (!dev->vstats)
 		return -ENOMEM;
 
-	priv = netdev_priv(dev);
-	priv->stats = stats;
 	return 0;
 }
 
 static void veth_dev_free(struct net_device *dev)
 {
-	struct veth_priv *priv;
-
-	priv = netdev_priv(dev);
-	free_percpu(priv->stats);
+	free_percpu(dev->vstats);
 	free_netdev(dev);
 }
 
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 0e1b92a0c1ec..6835b5837f93 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1284,6 +1284,7 @@ struct net_device {
 		struct pcpu_lstats __percpu	*lstats; /* loopback stats */
 		struct pcpu_tstats __percpu	*tstats; /* tunnel stats */
 		struct pcpu_dstats __percpu	*dstats; /* dummy stats */
+		struct pcpu_vstats __percpu	*vstats; /* veth stats */
 	};
 	/* GARP */
 	struct garp_port __rcu	*garp_port;
-- 
cgit v1.2.3


From 2ac902ce17f9dfa0d4d1f0818be147b5d2515fb7 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Wed, 19 Dec 2012 14:51:55 +0000
Subject: regmap: flat: Add flat cache type

While for I2C and SPI devices the overhead of using rbtree for devices with
only one block of registers is negligible the same isn't always going to
be true for MMIO devices where the I/O costs are very much lower. Cater
for these devices by adding a simple flat array type for them where the
lookups are simple array accesses, taking us right back to the original
ASoC cache implementation.

Thanks to Magnus Damm for the discussion which prompted this.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 drivers/base/regmap/Makefile        |  2 +-
 drivers/base/regmap/internal.h      |  1 +
 drivers/base/regmap/regcache-flat.c | 72 +++++++++++++++++++++++++++++++++++++
 drivers/base/regmap/regcache.c      |  1 +
 include/linux/regmap.h              |  3 +-
 5 files changed, 77 insertions(+), 2 deletions(-)
 create mode 100644 drivers/base/regmap/regcache-flat.c

(limited to 'include/linux')

diff --git a/drivers/base/regmap/Makefile b/drivers/base/regmap/Makefile
index 5e75d1b683e2..cf129980abd0 100644
--- a/drivers/base/regmap/Makefile
+++ b/drivers/base/regmap/Makefile
@@ -1,5 +1,5 @@
 obj-$(CONFIG_REGMAP) += regmap.o regcache.o
-obj-$(CONFIG_REGMAP) += regcache-rbtree.o regcache-lzo.o
+obj-$(CONFIG_REGMAP) += regcache-rbtree.o regcache-lzo.o regcache-flat.o
 obj-$(CONFIG_DEBUG_FS) += regmap-debugfs.o
 obj-$(CONFIG_REGMAP_I2C) += regmap-i2c.o
 obj-$(CONFIG_REGMAP_SPI) += regmap-spi.o
diff --git a/drivers/base/regmap/internal.h b/drivers/base/regmap/internal.h
index 401d1919635a..e22bb80edff1 100644
--- a/drivers/base/regmap/internal.h
+++ b/drivers/base/regmap/internal.h
@@ -177,5 +177,6 @@ int regcache_lookup_reg(struct regmap *map, unsigned int reg);
 
 extern struct regcache_ops regcache_rbtree_ops;
 extern struct regcache_ops regcache_lzo_ops;
+extern struct regcache_ops regcache_flat_ops;
 
 #endif
diff --git a/drivers/base/regmap/regcache-flat.c b/drivers/base/regmap/regcache-flat.c
new file mode 100644
index 000000000000..d9762e41959b
--- /dev/null
+++ b/drivers/base/regmap/regcache-flat.c
@@ -0,0 +1,72 @@
+/*
+ * Register cache access API - flat caching support
+ *
+ * Copyright 2012 Wolfson Microelectronics plc
+ *
+ * Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/slab.h>
+#include <linux/device.h>
+#include <linux/seq_file.h>
+
+#include "internal.h"
+
+static int regcache_flat_init(struct regmap *map)
+{
+	int i;
+	unsigned int *cache;
+
+	map->cache = kzalloc(sizeof(unsigned int) * (map->max_register + 1),
+			     GFP_KERNEL);
+	if (!map->cache)
+		return -ENOMEM;
+
+	cache = map->cache;
+
+	for (i = 0; i < map->num_reg_defaults; i++)
+		cache[map->reg_defaults[i].reg] = map->reg_defaults[i].def;
+
+	return 0;
+}
+
+static int regcache_flat_exit(struct regmap *map)
+{
+	kfree(map->cache);
+	map->cache = NULL;
+
+	return 0;
+}
+
+static int regcache_flat_read(struct regmap *map,
+			      unsigned int reg, unsigned int *value)
+{
+	unsigned int *cache = map->cache;
+
+	*value = cache[reg];
+
+	return 0;
+}
+
+static int regcache_flat_write(struct regmap *map, unsigned int reg,
+			       unsigned int value)
+{
+	unsigned int *cache = map->cache;
+
+	cache[reg] = value;
+
+	return 0;
+}
+
+struct regcache_ops regcache_flat_ops = {
+	.type = REGCACHE_FLAT,
+	.name = "flat",
+	.init = regcache_flat_init,
+	.exit = regcache_flat_exit,
+	.read = regcache_flat_read,
+	.write = regcache_flat_write,
+};
diff --git a/drivers/base/regmap/regcache.c b/drivers/base/regmap/regcache.c
index 835883bda977..e69ff3e4742c 100644
--- a/drivers/base/regmap/regcache.c
+++ b/drivers/base/regmap/regcache.c
@@ -22,6 +22,7 @@
 static const struct regcache_ops *cache_types[] = {
 	&regcache_rbtree_ops,
 	&regcache_lzo_ops,
+	&regcache_flat_ops,
 };
 
 static int regcache_hw_init(struct regmap *map)
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index b7e95bf942c9..390d879d473a 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -28,7 +28,8 @@ struct regmap_range_cfg;
 enum regcache_type {
 	REGCACHE_NONE,
 	REGCACHE_RBTREE,
-	REGCACHE_COMPRESSED
+	REGCACHE_COMPRESSED,
+	REGCACHE_FLAT,
 };
 
 /**
-- 
cgit v1.2.3


From 0f6dfcee2e081f47a3e97cb8984fb4d62217e6f7 Mon Sep 17 00:00:00 2001
From: Vladimir Kondratiev <qca_vkondrat@qca.qualcomm.com>
Date: Tue, 18 Dec 2012 09:55:33 +0200
Subject: wireless: more 'capability info' bits

define bits for 'capability info', as in recent spec edition
IEEE802.11-2012

Also, add mask for 2-bit field 'bss type', as it is in 802.11ad

Signed-off-by: Vladimir Kondratiev <qca_vkondrat@qca.qualcomm.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index f0859cc73861..09879eb24380 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1311,16 +1311,21 @@ struct ieee80211_vht_operation {
 #define WLAN_CAPABILITY_SPECTRUM_MGMT	(1<<8)
 #define WLAN_CAPABILITY_QOS		(1<<9)
 #define WLAN_CAPABILITY_SHORT_SLOT_TIME	(1<<10)
+#define WLAN_CAPABILITY_APSD		(1<<11)
+#define WLAN_CAPABILITY_RADIO_MEASURE	(1<<12)
 #define WLAN_CAPABILITY_DSSS_OFDM	(1<<13)
+#define WLAN_CAPABILITY_DEL_BACK	(1<<14)
+#define WLAN_CAPABILITY_IMM_BACK	(1<<15)
 
 /* DMG (60gHz) 802.11ad */
 /* type - bits 0..1 */
+#define WLAN_CAPABILITY_DMG_TYPE_MASK		(3<<0)
 #define WLAN_CAPABILITY_DMG_TYPE_IBSS		(1<<0) /* Tx by: STA */
 #define WLAN_CAPABILITY_DMG_TYPE_PBSS		(2<<0) /* Tx by: PCP */
 #define WLAN_CAPABILITY_DMG_TYPE_AP		(3<<0) /* Tx by: AP */
 
 #define WLAN_CAPABILITY_DMG_CBAP_ONLY		(1<<2)
-#define WLAN_CAPABILITY_DMG_CBAP_SOURCE	(1<<3)
+#define WLAN_CAPABILITY_DMG_CBAP_SOURCE		(1<<3)
 #define WLAN_CAPABILITY_DMG_PRIVACY		(1<<4)
 #define WLAN_CAPABILITY_DMG_ECPAC		(1<<5)
 
-- 
cgit v1.2.3


From 598a5938e04ce30d837dca4c3c3326c69435342a Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 28 Dec 2012 12:00:40 +0100
Subject: wireless: use __packed in ieee80211.h

Use __packed instead of __attribute__((packed)).

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 84 +++++++++++++++++++++++------------------------
 1 file changed, 42 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 09879eb24380..5db76ebe8810 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -180,7 +180,7 @@ struct ieee80211_hdr {
 	u8 addr3[6];
 	__le16 seq_ctrl;
 	u8 addr4[6];
-} __attribute__ ((packed));
+} __packed;
 
 struct ieee80211_hdr_3addr {
 	__le16 frame_control;
@@ -189,7 +189,7 @@ struct ieee80211_hdr_3addr {
 	u8 addr2[6];
 	u8 addr3[6];
 	__le16 seq_ctrl;
-} __attribute__ ((packed));
+} __packed;
 
 struct ieee80211_qos_hdr {
 	__le16 frame_control;
@@ -199,7 +199,7 @@ struct ieee80211_qos_hdr {
 	u8 addr3[6];
 	__le16 seq_ctrl;
 	__le16 qos_ctrl;
-} __attribute__ ((packed));
+} __packed;
 
 /**
  * ieee80211_has_tods - check if IEEE80211_FCTL_TODS is set
@@ -576,7 +576,7 @@ struct ieee80211s_hdr {
 	__le32 seqnum;
 	u8 eaddr1[6];
 	u8 eaddr2[6];
-} __attribute__ ((packed));
+} __packed;
 
 /* Mesh flags */
 #define MESH_FLAGS_AE_A4 	0x1
@@ -614,7 +614,7 @@ struct ieee80211_quiet_ie {
 	u8 period;
 	__le16 duration;
 	__le16 offset;
-} __attribute__ ((packed));
+} __packed;
 
 /**
  * struct ieee80211_msrment_ie
@@ -626,7 +626,7 @@ struct ieee80211_msrment_ie {
 	u8 mode;
 	u8 type;
 	u8 request[0];
-} __attribute__ ((packed));
+} __packed;
 
 /**
  * struct ieee80211_channel_sw_ie
@@ -637,7 +637,7 @@ struct ieee80211_channel_sw_ie {
 	u8 mode;
 	u8 new_ch_num;
 	u8 count;
-} __attribute__ ((packed));
+} __packed;
 
 /**
  * struct ieee80211_tim
@@ -650,7 +650,7 @@ struct ieee80211_tim_ie {
 	u8 bitmap_ctrl;
 	/* variable size: 1 - 251 bytes */
 	u8 virtual_map[1];
-} __attribute__ ((packed));
+} __packed;
 
 /**
  * struct ieee80211_meshconf_ie
@@ -665,7 +665,7 @@ struct ieee80211_meshconf_ie {
 	u8 meshconf_auth;
 	u8 meshconf_form;
 	u8 meshconf_cap;
-} __attribute__ ((packed));
+} __packed;
 
 /**
  * enum mesh_config_capab_flags - Mesh Configuration IE capability field flags
@@ -695,7 +695,7 @@ struct ieee80211_rann_ie {
 	__le32 rann_seq;
 	__le32 rann_interval;
 	__le32 rann_metric;
-} __attribute__ ((packed));
+} __packed;
 
 enum ieee80211_rann_flags {
 	RANN_FLAG_IS_GATE = 1 << 0,
@@ -717,33 +717,33 @@ struct ieee80211_mgmt {
 			__le16 status_code;
 			/* possibly followed by Challenge text */
 			u8 variable[0];
-		} __attribute__ ((packed)) auth;
+		} __packed auth;
 		struct {
 			__le16 reason_code;
-		} __attribute__ ((packed)) deauth;
+		} __packed deauth;
 		struct {
 			__le16 capab_info;
 			__le16 listen_interval;
 			/* followed by SSID and Supported rates */
 			u8 variable[0];
-		} __attribute__ ((packed)) assoc_req;
+		} __packed assoc_req;
 		struct {
 			__le16 capab_info;
 			__le16 status_code;
 			__le16 aid;
 			/* followed by Supported rates */
 			u8 variable[0];
-		} __attribute__ ((packed)) assoc_resp, reassoc_resp;
+		} __packed assoc_resp, reassoc_resp;
 		struct {
 			__le16 capab_info;
 			__le16 listen_interval;
 			u8 current_ap[6];
 			/* followed by SSID and Supported rates */
 			u8 variable[0];
-		} __attribute__ ((packed)) reassoc_req;
+		} __packed reassoc_req;
 		struct {
 			__le16 reason_code;
-		} __attribute__ ((packed)) disassoc;
+		} __packed disassoc;
 		struct {
 			__le64 timestamp;
 			__le16 beacon_int;
@@ -751,11 +751,11 @@ struct ieee80211_mgmt {
 			/* followed by some of SSID, Supported rates,
 			 * FH Params, DS Params, CF Params, IBSS Params, TIM */
 			u8 variable[0];
-		} __attribute__ ((packed)) beacon;
+		} __packed beacon;
 		struct {
 			/* only variable items: SSID, Supported rates */
 			u8 variable[0];
-		} __attribute__ ((packed)) probe_req;
+		} __packed probe_req;
 		struct {
 			__le64 timestamp;
 			__le16 beacon_int;
@@ -763,7 +763,7 @@ struct ieee80211_mgmt {
 			/* followed by some of SSID, Supported rates,
 			 * FH Params, DS Params, CF Params, IBSS Params */
 			u8 variable[0];
-		} __attribute__ ((packed)) probe_resp;
+		} __packed probe_resp;
 		struct {
 			u8 category;
 			union {
@@ -772,55 +772,55 @@ struct ieee80211_mgmt {
 					u8 dialog_token;
 					u8 status_code;
 					u8 variable[0];
-				} __attribute__ ((packed)) wme_action;
+				} __packed wme_action;
 				struct{
 					u8 action_code;
 					u8 element_id;
 					u8 length;
 					struct ieee80211_channel_sw_ie sw_elem;
-				} __attribute__((packed)) chan_switch;
+				} __packed chan_switch;
 				struct{
 					u8 action_code;
 					u8 dialog_token;
 					u8 element_id;
 					u8 length;
 					struct ieee80211_msrment_ie msr_elem;
-				} __attribute__((packed)) measurement;
+				} __packed measurement;
 				struct{
 					u8 action_code;
 					u8 dialog_token;
 					__le16 capab;
 					__le16 timeout;
 					__le16 start_seq_num;
-				} __attribute__((packed)) addba_req;
+				} __packed addba_req;
 				struct{
 					u8 action_code;
 					u8 dialog_token;
 					__le16 status;
 					__le16 capab;
 					__le16 timeout;
-				} __attribute__((packed)) addba_resp;
+				} __packed addba_resp;
 				struct{
 					u8 action_code;
 					__le16 params;
 					__le16 reason_code;
-				} __attribute__((packed)) delba;
+				} __packed delba;
 				struct {
 					u8 action_code;
 					u8 variable[0];
-				} __attribute__((packed)) self_prot;
+				} __packed self_prot;
 				struct{
 					u8 action_code;
 					u8 variable[0];
-				} __attribute__((packed)) mesh_action;
+				} __packed mesh_action;
 				struct {
 					u8 action;
 					u8 trans_id[WLAN_SA_QUERY_TR_ID_LEN];
-				} __attribute__ ((packed)) sa_query;
+				} __packed sa_query;
 				struct {
 					u8 action;
 					u8 smps_control;
-				} __attribute__ ((packed)) ht_smps;
+				} __packed ht_smps;
 				struct {
 					u8 action_code;
 					u8 dialog_token;
@@ -828,9 +828,9 @@ struct ieee80211_mgmt {
 					u8 variable[0];
 				} __packed tdls_discover_resp;
 			} u;
-		} __attribute__ ((packed)) action;
+		} __packed action;
 	} u;
-} __attribute__ ((packed));
+} __packed;
 
 /* Supported Rates value encodings in 802.11n-2009 7.3.2.2 */
 #define BSS_MEMBERSHIP_SELECTOR_HT_PHY	127
@@ -846,7 +846,7 @@ struct ieee80211_mmie {
 	__le16 key_id;
 	u8 sequence_number[6];
 	u8 mic[8];
-} __attribute__ ((packed));
+} __packed;
 
 struct ieee80211_vendor_ie {
 	u8 element_id;
@@ -861,20 +861,20 @@ struct ieee80211_rts {
 	__le16 duration;
 	u8 ra[6];
 	u8 ta[6];
-} __attribute__ ((packed));
+} __packed;
 
 struct ieee80211_cts {
 	__le16 frame_control;
 	__le16 duration;
 	u8 ra[6];
-} __attribute__ ((packed));
+} __packed;
 
 struct ieee80211_pspoll {
 	__le16 frame_control;
 	__le16 aid;
 	u8 bssid[6];
 	u8 ta[6];
-} __attribute__ ((packed));
+} __packed;
 
 /* TDLS */
 
@@ -967,7 +967,7 @@ struct ieee80211_bar {
 	__u8 ta[6];
 	__le16 control;
 	__le16 start_seq_num;
-} __attribute__((packed));
+} __packed;
 
 /* 802.11 BAR control masks */
 #define IEEE80211_BAR_CTRL_ACK_POLICY_NORMAL	0x0000
@@ -992,7 +992,7 @@ struct ieee80211_mcs_info {
 	__le16 rx_highest;
 	u8 tx_params;
 	u8 reserved[3];
-} __attribute__((packed));
+} __packed;
 
 /* 802.11n HT capability MSC set */
 #define IEEE80211_HT_MCS_RX_HIGHEST_MASK	0x3ff
@@ -1031,7 +1031,7 @@ struct ieee80211_ht_cap {
 	__le16 extended_ht_cap_info;
 	__le32 tx_BF_cap_info;
 	u8 antenna_selection_info;
-} __attribute__ ((packed));
+} __packed;
 
 /* 802.11n HT capabilities masks (for cap_info) */
 #define IEEE80211_HT_CAP_LDPC_CODING		0x0001
@@ -1102,7 +1102,7 @@ struct ieee80211_ht_operation {
 	__le16 operation_mode;
 	__le16 stbc_param;
 	u8 basic_set[16];
-} __attribute__ ((packed));
+} __packed;
 
 /* for ht_param */
 #define IEEE80211_HT_PARAM_CHA_SEC_OFFSET		0x03
@@ -1839,14 +1839,14 @@ struct ieee80211_country_ie_triplet {
 			u8 first_channel;
 			u8 num_channels;
 			s8 max_power;
-		} __attribute__ ((packed)) chans;
+		} __packed chans;
 		struct {
 			u8 reg_extension_id;
 			u8 reg_class;
 			u8 coverage_class;
-		} __attribute__ ((packed)) ext;
+		} __packed ext;
 	};
-} __attribute__ ((packed));
+} __packed;
 
 enum ieee80211_timeout_interval_type {
 	WLAN_TIMEOUT_REASSOC_DEADLINE = 1 /* 802.11r */,
-- 
cgit v1.2.3


From ec61cd63dd3f3bf982180b2bcc1b325160d73837 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 28 Dec 2012 12:12:10 +0100
Subject: mac80211: support HT notify channel width action

Support the HT notify channel width action frame
to update the rate scaling about the bandwidth
the peer can receive in.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h |  9 +++++++++
 net/mac80211/rx.c         | 31 ++++++++++++++++++++++++++++++-
 2 files changed, 39 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 5db76ebe8810..ccf9ee1dca8c 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -701,6 +701,11 @@ enum ieee80211_rann_flags {
 	RANN_FLAG_IS_GATE = 1 << 0,
 };
 
+enum ieee80211_ht_chanwidth_values {
+	IEEE80211_HT_CHANWIDTH_20MHZ = 0,
+	IEEE80211_HT_CHANWIDTH_ANY = 1,
+};
+
 #define WLAN_SA_QUERY_TR_ID_LEN 2
 
 struct ieee80211_mgmt {
@@ -821,6 +826,10 @@ struct ieee80211_mgmt {
 					u8 action;
 					u8 smps_control;
 				} __packed ht_smps;
+				struct {
+					u8 action_code;
+					u8 chanwidth;
+				} __packed ht_notify_cw;
 				struct {
 					u8 action_code;
 					u8 dialog_token;
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 580704eba8b8..a19089565c4b 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2353,7 +2353,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 		    sdata->vif.type != NL80211_IFTYPE_ADHOC)
 			break;
 
-		/* verify action & smps_control are present */
+		/* verify action & smps_control/chanwidth are present */
 		if (len < IEEE80211_MIN_ACTION_SIZE + 2)
 			goto invalid;
 
@@ -2392,6 +2392,35 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 						 IEEE80211_RC_SMPS_CHANGED);
 			goto handled;
 		}
+		case WLAN_HT_ACTION_NOTIFY_CHANWIDTH: {
+			struct ieee80211_supported_band *sband;
+			u8 chanwidth = mgmt->u.action.u.ht_notify_cw.chanwidth;
+			bool old_40mhz, new_40mhz;
+
+			/* If it doesn't support 40 MHz it can't change ... */
+			if (!rx->sta->supports_40mhz)
+				goto handled;
+
+			old_40mhz = rx->sta->sta.ht_cap.cap &
+					IEEE80211_HT_CAP_SUP_WIDTH_20_40;
+			new_40mhz = chanwidth == IEEE80211_HT_CHANWIDTH_ANY;
+
+			if (old_40mhz == new_40mhz)
+				goto handled;
+
+			if (new_40mhz)
+				rx->sta->sta.ht_cap.cap |=
+					IEEE80211_HT_CAP_SUP_WIDTH_20_40;
+			else
+				rx->sta->sta.ht_cap.cap &=
+					~IEEE80211_HT_CAP_SUP_WIDTH_20_40;
+
+			sband = rx->local->hw.wiphy->bands[status->band];
+
+			rate_control_rate_update(local, sband, rx->sta,
+						 IEEE80211_RC_BW_CHANGED);
+			goto handled;
+		}
 		default:
 			goto invalid;
 		}
-- 
cgit v1.2.3


From d2e5f0c16ad60a7208fd371233e63b73c990ece2 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Sun, 23 Dec 2012 00:02:44 +0100
Subject: ACPI / PCI: Rework the setup and cleanup of device wakeup

Currently, the ACPI wakeup capability of PCI devices is set up
in two different places, partially in acpi_pci_bind() where
runtime wakeup is initialized and partially in
platform_pci_wakeup_init(), where system wakeup is initialized.
The cleanup is only done in acpi_pci_unbind() and it only covers
runtime wakeup.

Use the new .setup() and .cleanup() callbacks in struct acpi_bus_type
to consolidate that code and do the setup and the cleanup each in one
place.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Yinghai Lu <yinghai@kernel.org>
Acked-by: Toshi Kani <toshi.kani@hp.com>
---
 drivers/acpi/device_pm.c |  2 +-
 drivers/acpi/pci_bind.c  |  5 -----
 drivers/pci/pci-acpi.c   | 30 +++++++++++++++++++++++++++++-
 drivers/pci/pci.c        | 26 +-------------------------
 drivers/pci/pci.h        |  5 -----
 drivers/pci/probe.c      |  1 -
 include/linux/acpi.h     |  5 +++++
 7 files changed, 36 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c
index f09dc987cf17..e4f6ac95595c 100644
--- a/drivers/acpi/device_pm.c
+++ b/drivers/acpi/device_pm.c
@@ -353,7 +353,7 @@ int acpi_pm_device_sleep_wake(struct device *dev, bool enable)
  * acpi_dev_pm_get_node - Get ACPI device node for the given physical device.
  * @dev: Device to get the ACPI node for.
  */
-static struct acpi_device *acpi_dev_pm_get_node(struct device *dev)
+struct acpi_device *acpi_dev_pm_get_node(struct device *dev)
 {
 	acpi_handle handle = DEVICE_ACPI_HANDLE(dev);
 	struct acpi_device *adev;
diff --git a/drivers/acpi/pci_bind.c b/drivers/acpi/pci_bind.c
index a1dee29beed3..bbddcc9c894f 100644
--- a/drivers/acpi/pci_bind.c
+++ b/drivers/acpi/pci_bind.c
@@ -43,8 +43,6 @@ static int acpi_pci_unbind(struct acpi_device *device)
 	if (!dev)
 		goto out;
 
-	device_set_run_wake(&dev->dev, false);
-	pci_acpi_remove_pm_notifier(device);
 	acpi_power_resource_unregister_device(&dev->dev, device->handle);
 
 	if (!dev->subordinate)
@@ -71,10 +69,7 @@ static int acpi_pci_bind(struct acpi_device *device)
 	if (!dev)
 		return 0;
 
-	pci_acpi_add_pm_notifier(device, dev);
 	acpi_power_resource_register_device(&dev->dev, device->handle);
-	if (device->wakeup.flags.run_wake)
-		device_set_run_wake(&dev->dev, true);
 
 	/*
 	 * Install the 'bind' function to facilitate callbacks for
diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c
index 1af4008182fd..b98106c110e7 100644
--- a/drivers/pci/pci-acpi.c
+++ b/drivers/pci/pci-acpi.c
@@ -283,7 +283,6 @@ static struct pci_platform_pm_ops acpi_pci_platform_pm = {
 	.is_manageable = acpi_pci_power_manageable,
 	.set_state = acpi_pci_set_power_state,
 	.choose_state = acpi_pci_choose_state,
-	.can_wakeup = acpi_pci_can_wakeup,
 	.sleep_wake = acpi_pci_sleep_wake,
 	.run_wake = acpi_pci_run_wake,
 };
@@ -321,10 +320,39 @@ static int acpi_pci_find_root_bridge(struct device *dev, acpi_handle *handle)
 	return 0;
 }
 
+static void acpi_pci_wakeup_setup(struct device *dev)
+{
+	struct acpi_device *adev = acpi_dev_pm_get_node(dev);
+	struct pci_dev *pci_dev = to_pci_dev(dev);
+
+	if (!adev || !adev->wakeup.flags.valid)
+		return;
+
+	device_set_wakeup_capable(dev, true);
+	acpi_pci_sleep_wake(pci_dev, false);
+
+	pci_acpi_add_pm_notifier(adev, pci_dev);
+	if (adev->wakeup.flags.run_wake)
+		device_set_run_wake(dev, true);
+}
+
+static void acpi_pci_wakeup_cleanup(struct device *dev)
+{
+	struct acpi_device *adev = acpi_dev_pm_get_node(dev);
+
+	if (adev && adev->wakeup.flags.valid) {
+		device_set_wakeup_capable(dev, false);
+		device_set_run_wake(dev, false);
+		pci_acpi_remove_pm_notifier(adev);
+	}
+}
+
 static struct acpi_bus_type acpi_pci_bus = {
 	.bus = &pci_bus_type,
 	.find_device = acpi_pci_find_device,
 	.find_bridge = acpi_pci_find_root_bridge,
+	.setup = acpi_pci_wakeup_setup,
+	.cleanup = acpi_pci_wakeup_cleanup,
 };
 
 static int __init acpi_pci_init(void)
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 5cb5820fae40..0c4f641b7be1 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -450,7 +450,7 @@ static struct pci_platform_pm_ops *pci_platform_pm;
 int pci_set_platform_pm(struct pci_platform_pm_ops *ops)
 {
 	if (!ops->is_manageable || !ops->set_state || !ops->choose_state
-	    || !ops->sleep_wake || !ops->can_wakeup)
+	    || !ops->sleep_wake)
 		return -EINVAL;
 	pci_platform_pm = ops;
 	return 0;
@@ -473,11 +473,6 @@ static inline pci_power_t platform_pci_choose_state(struct pci_dev *dev)
 			pci_platform_pm->choose_state(dev) : PCI_POWER_ERROR;
 }
 
-static inline bool platform_pci_can_wakeup(struct pci_dev *dev)
-{
-	return pci_platform_pm ? pci_platform_pm->can_wakeup(dev) : false;
-}
-
 static inline int platform_pci_sleep_wake(struct pci_dev *dev, bool enable)
 {
 	return pci_platform_pm ?
@@ -1985,25 +1980,6 @@ void pci_pm_init(struct pci_dev *dev)
 	}
 }
 
-/**
- * platform_pci_wakeup_init - init platform wakeup if present
- * @dev: PCI device
- *
- * Some devices don't have PCI PM caps but can still generate wakeup
- * events through platform methods (like ACPI events).  If @dev supports
- * platform wakeup events, set the device flag to indicate as much.  This
- * may be redundant if the device also supports PCI PM caps, but double
- * initialization should be safe in that case.
- */
-void platform_pci_wakeup_init(struct pci_dev *dev)
-{
-	if (!platform_pci_can_wakeup(dev))
-		return;
-
-	device_set_wakeup_capable(&dev->dev, true);
-	platform_pci_sleep_wake(dev, false);
-}
-
 static void pci_add_saved_cap(struct pci_dev *pci_dev,
 	struct pci_cap_saved_state *new_cap)
 {
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index e8518292826f..adfd172c5b9b 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -43,9 +43,6 @@ int pci_probe_reset_function(struct pci_dev *dev);
  *                platform; to be used during system-wide transitions from a
  *                sleeping state to the working state and vice versa
  *
- * @can_wakeup: returns 'true' if given device is capable of waking up the
- *              system from a sleeping state
- *
  * @sleep_wake: enables/disables the system wake up capability of given device
  *
  * @run_wake: enables/disables the platform to generate run-time wake-up events
@@ -59,7 +56,6 @@ struct pci_platform_pm_ops {
 	bool (*is_manageable)(struct pci_dev *dev);
 	int (*set_state)(struct pci_dev *dev, pci_power_t state);
 	pci_power_t (*choose_state)(struct pci_dev *dev);
-	bool (*can_wakeup)(struct pci_dev *dev);
 	int (*sleep_wake)(struct pci_dev *dev, bool enable);
 	int (*run_wake)(struct pci_dev *dev, bool enable);
 };
@@ -74,7 +70,6 @@ extern void pci_wakeup_bus(struct pci_bus *bus);
 extern void pci_config_pm_runtime_get(struct pci_dev *dev);
 extern void pci_config_pm_runtime_put(struct pci_dev *dev);
 extern void pci_pm_init(struct pci_dev *dev);
-extern void platform_pci_wakeup_init(struct pci_dev *dev);
 extern void pci_allocate_cap_save_buffers(struct pci_dev *dev);
 void pci_free_cap_save_buffers(struct pci_dev *dev);
 
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 6186f03d84f3..2dcd22d9c816 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -1280,7 +1280,6 @@ static void pci_init_capabilities(struct pci_dev *dev)
 
 	/* Power Management */
 	pci_pm_init(dev);
-	platform_pci_wakeup_init(dev);
 
 	/* Vital Product Data */
 	pci_vpd_pci22_init(dev);
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 3994d7790b23..8c1d6f2a2193 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -526,9 +526,14 @@ static inline int acpi_subsys_resume_early(struct device *dev) { return 0; }
 #endif
 
 #if defined(CONFIG_ACPI) && defined(CONFIG_PM)
+struct acpi_device *acpi_dev_pm_get_node(struct device *dev);
 int acpi_dev_pm_attach(struct device *dev, bool power_on);
 void acpi_dev_pm_detach(struct device *dev, bool power_off);
 #else
+static inline struct acpi_device *acpi_dev_pm_get_node(struct device *dev)
+{
+	return NULL;
+}
 static inline int acpi_dev_pm_attach(struct device *dev, bool power_on)
 {
 	return -ENODEV;
-- 
cgit v1.2.3


From 1d5210008bd3a26daf4b06aed9d6c330dd4c83e2 Mon Sep 17 00:00:00 2001
From: Lance Ortiz <lance.ortiz@hp.com>
Date: Thu, 3 Jan 2013 15:34:08 -0700
Subject: aerdrv: Enhanced AER logging

This patch will provide a more reliable and easy way for user-space
applications to have access to AER logs rather than reading them from the
message buffer. It also provides a way to notify user-space when an AER
event occurs.

The aer driver is updated to generate a trace event of function 'aer_event'
when a PCIe error is reported over the AER interface.  The trace event was
added to both the interrupt based aer path and the firmware first path.

Signed-off-by: Lance Ortiz <lance.ortiz@hp.com>
Acked-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Acked-by: Boris Petkov <bp@alien8.de>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 drivers/acpi/apei/cper.c               | 19 ++++++++++++++++---
 drivers/pci/pcie/aer/aerdrv_errprint.c |  9 ++++++++-
 include/linux/aer.h                    |  4 ++--
 3 files changed, 26 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/apei/cper.c b/drivers/acpi/apei/cper.c
index e6defd86b424..1e5d8a40101e 100644
--- a/drivers/acpi/apei/cper.c
+++ b/drivers/acpi/apei/cper.c
@@ -29,6 +29,7 @@
 #include <linux/time.h>
 #include <linux/cper.h>
 #include <linux/acpi.h>
+#include <linux/pci.h>
 #include <linux/aer.h>
 
 /*
@@ -249,6 +250,10 @@ static const char *cper_pcie_port_type_strs[] = {
 static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie,
 			    const struct acpi_hest_generic_data *gdata)
 {
+#ifdef CONFIG_ACPI_APEI_PCIEAER
+	struct pci_dev *dev;
+#endif
+
 	if (pcie->validation_bits & CPER_PCIE_VALID_PORT_TYPE)
 		printk("%s""port_type: %d, %s\n", pfx, pcie->port_type,
 		       pcie->port_type < ARRAY_SIZE(cper_pcie_port_type_strs) ?
@@ -281,10 +286,18 @@ static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie,
 	"%s""bridge: secondary_status: 0x%04x, control: 0x%04x\n",
 	pfx, pcie->bridge.secondary_status, pcie->bridge.control);
 #ifdef CONFIG_ACPI_APEI_PCIEAER
-	if (pcie->validation_bits & CPER_PCIE_VALID_AER_INFO) {
-		struct aer_capability_regs *aer_regs = (void *)pcie->aer_info;
-		cper_print_aer(pfx, gdata->error_severity, aer_regs);
+	dev = pci_get_domain_bus_and_slot(pcie->device_id.segment,
+			pcie->device_id.bus, pcie->device_id.function);
+	if (!dev) {
+		pr_err("PCI AER Cannot get PCI device %04x:%02x:%02x.%d\n",
+			pcie->device_id.segment, pcie->device_id.bus,
+			pcie->device_id.slot, pcie->device_id.function);
+		return;
 	}
+	if (pcie->validation_bits & CPER_PCIE_VALID_AER_INFO)
+		cper_print_aer(pfx, dev, gdata->error_severity,
+				(struct aer_capability_regs *) pcie->aer_info);
+	pci_dev_put(dev);
 #endif
 }
 
diff --git a/drivers/pci/pcie/aer/aerdrv_errprint.c b/drivers/pci/pcie/aer/aerdrv_errprint.c
index 3ea51736f18d..d3e5fc5a2de9 100644
--- a/drivers/pci/pcie/aer/aerdrv_errprint.c
+++ b/drivers/pci/pcie/aer/aerdrv_errprint.c
@@ -23,6 +23,9 @@
 
 #include "aerdrv.h"
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/ras.h>
+
 #define AER_AGENT_RECEIVER		0
 #define AER_AGENT_REQUESTER		1
 #define AER_AGENT_COMPLETER		2
@@ -194,6 +197,8 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
 	if (info->id && info->error_dev_num > 1 && info->id == id)
 		printk("%s""  Error of this Agent(%04x) is reported first\n",
 			prefix, id);
+	trace_aer_event(dev_name(&dev->dev), (info->status & ~info->mask),
+			info->severity);
 }
 
 void aer_print_port_info(struct pci_dev *dev, struct aer_err_info *info)
@@ -217,7 +222,7 @@ int cper_severity_to_aer(int cper_severity)
 }
 EXPORT_SYMBOL_GPL(cper_severity_to_aer);
 
-void cper_print_aer(const char *prefix, int cper_severity,
+void cper_print_aer(const char *prefix, struct pci_dev *dev, int cper_severity,
 		    struct aer_capability_regs *aer)
 {
 	int aer_severity, layer, agent, status_strs_size, tlp_header_valid = 0;
@@ -259,5 +264,7 @@ void cper_print_aer(const char *prefix, int cper_severity,
 			*(tlp + 8), *(tlp + 15), *(tlp + 14),
 			*(tlp + 13), *(tlp + 12));
 	}
+	trace_aer_event(dev_name(&dev->dev), (status & ~mask),
+			aer_severity);
 }
 #endif
diff --git a/include/linux/aer.h b/include/linux/aer.h
index 544abdb2238c..ec10e1b24c1c 100644
--- a/include/linux/aer.h
+++ b/include/linux/aer.h
@@ -49,8 +49,8 @@ static inline int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev)
 }
 #endif
 
-extern void cper_print_aer(const char *prefix, int cper_severity,
-			   struct aer_capability_regs *aer);
+extern void cper_print_aer(const char *prefix, struct pci_dev *dev,
+			   int cper_severity, struct aer_capability_regs *aer);
 extern int cper_severity_to_aer(int cper_severity);
 extern void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn,
 			      int severity);
-- 
cgit v1.2.3


From e41b2d7fe7803e85e1202d0eb172717d7bf1bbaf Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@resnulli.us>
Date: Tue, 1 Jan 2013 03:30:15 +0000
Subject: net: set dev->addr_assign_type correctly

Not a bitfield, but a plain value.

Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/atheros/atl1c/atl1c_main.c | 2 +-
 drivers/net/ethernet/atheros/atlx/atl1.c        | 2 +-
 drivers/net/ethernet/ethoc.c                    | 2 +-
 drivers/net/ethernet/lantiq_etop.c              | 2 +-
 include/linux/etherdevice.h                     | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
index 56d3f697e0c7..17651c779680 100644
--- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
+++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
@@ -2540,7 +2540,7 @@ static int atl1c_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	}
 	if (atl1c_read_mac_addr(&adapter->hw)) {
 		/* got a random MAC address, set NET_ADDR_RANDOM to netdev */
-		netdev->addr_assign_type |= NET_ADDR_RANDOM;
+		netdev->addr_assign_type = NET_ADDR_RANDOM;
 	}
 	memcpy(netdev->dev_addr, adapter->hw.mac_addr, netdev->addr_len);
 	memcpy(netdev->perm_addr, adapter->hw.mac_addr, netdev->addr_len);
diff --git a/drivers/net/ethernet/atheros/atlx/atl1.c b/drivers/net/ethernet/atheros/atlx/atl1.c
index 71b3d7daa21d..5b0d9931c720 100644
--- a/drivers/net/ethernet/atheros/atlx/atl1.c
+++ b/drivers/net/ethernet/atheros/atlx/atl1.c
@@ -3053,7 +3053,7 @@ static int atl1_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	/* copy the MAC address out of the EEPROM */
 	if (atl1_read_mac_addr(&adapter->hw)) {
 		/* mark random mac */
-		netdev->addr_assign_type |= NET_ADDR_RANDOM;
+		netdev->addr_assign_type = NET_ADDR_RANDOM;
 	}
 	memcpy(netdev->dev_addr, adapter->hw.mac_addr, netdev->addr_len);
 
diff --git a/drivers/net/ethernet/ethoc.c b/drivers/net/ethernet/ethoc.c
index 8db1c06008de..f380bb7653dd 100644
--- a/drivers/net/ethernet/ethoc.c
+++ b/drivers/net/ethernet/ethoc.c
@@ -1068,7 +1068,7 @@ static int ethoc_probe(struct platform_device *pdev)
 	}
 
 	if (random_mac)
-		netdev->addr_assign_type |= NET_ADDR_RANDOM;
+		netdev->addr_assign_type = NET_ADDR_RANDOM;
 
 	/* register MII bus */
 	priv->mdio = mdiobus_alloc();
diff --git a/drivers/net/ethernet/lantiq_etop.c b/drivers/net/ethernet/lantiq_etop.c
index c124e67a1a1c..cd3d2c09cdd0 100644
--- a/drivers/net/ethernet/lantiq_etop.c
+++ b/drivers/net/ethernet/lantiq_etop.c
@@ -655,7 +655,7 @@ ltq_etop_init(struct net_device *dev)
 
 	/* Set addr_assign_type here, ltq_etop_set_mac_address would reset it. */
 	if (random_mac)
-		dev->addr_assign_type |= NET_ADDR_RANDOM;
+		dev->addr_assign_type = NET_ADDR_RANDOM;
 
 	ltq_etop_set_multicast_list(dev);
 	err = ltq_etop_mdio_init(dev);
diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index 243eea1e33d8..1a43e1b4f7ad 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -192,7 +192,7 @@ static inline void eth_zero_addr(u8 *addr)
  */
 static inline void eth_hw_addr_random(struct net_device *dev)
 {
-	dev->addr_assign_type |= NET_ADDR_RANDOM;
+	dev->addr_assign_type = NET_ADDR_RANDOM;
 	eth_random_addr(dev->dev_addr);
 }
 
-- 
cgit v1.2.3


From fbdeca2d7753aa1ab929aeb77ccc46489eed02b9 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@resnulli.us>
Date: Tue, 1 Jan 2013 03:30:16 +0000
Subject: net: add address assign type "SET"

This is the way to indicate that mac address of a device has been set by
dev_set_mac_address()

Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 2 ++
 net/core/dev.c            | 1 +
 2 files changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 6835b5837f93..c5031a45e185 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -64,6 +64,8 @@ struct wireless_dev;
 #define NET_ADDR_PERM		0	/* address is permanent (default) */
 #define NET_ADDR_RANDOM		1	/* address is generated randomly */
 #define NET_ADDR_STOLEN		2	/* address is stolen from other device */
+#define NET_ADDR_SET		3	/* address is set using
+					 * dev_set_mac_address() */
 
 /* Backlog congestion levels */
 #define NET_RX_SUCCESS		0	/* keep 'em coming, baby */
diff --git a/net/core/dev.c b/net/core/dev.c
index c85e32b30f04..bddb2f2ccaa9 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5022,6 +5022,7 @@ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
 	err = ops->ndo_set_mac_address(dev, sa);
 	if (err)
 		return err;
+	dev->addr_assign_type = NET_ADDR_SET;
 	call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
 	add_device_randomness(dev->dev_addr, dev->addr_len);
 	return 0;
-- 
cgit v1.2.3


From 9442490a02867088bcd5ed6231fa3b35a733c2b8 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Fri, 4 Jan 2013 16:35:07 +0000
Subject: regmap: irq: Support wake IRQ mask inversion

Support devices which have an enable rather than mask register for wake
sources.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 drivers/base/regmap/regmap-irq.c | 21 +++++++++++++++++----
 include/linux/regmap.h           |  1 +
 2 files changed, 18 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/regmap/regmap-irq.c b/drivers/base/regmap/regmap-irq.c
index be5579964be0..4706c63d0bc6 100644
--- a/drivers/base/regmap/regmap-irq.c
+++ b/drivers/base/regmap/regmap-irq.c
@@ -92,8 +92,14 @@ static void regmap_irq_sync_unlock(struct irq_data *data)
 		reg = d->chip->wake_base +
 			(i * map->reg_stride * d->irq_reg_stride);
 		if (d->wake_buf) {
-			ret = regmap_update_bits(d->map, reg,
-					 d->mask_buf_def[i], d->wake_buf[i]);
+			if (d->chip->wake_invert)
+				ret = regmap_update_bits(d->map, reg,
+							 d->mask_buf_def[i],
+							 ~d->wake_buf[i]);
+			else
+				ret = regmap_update_bits(d->map, reg,
+							 d->mask_buf_def[i],
+							 d->wake_buf[i]);
 			if (ret != 0)
 				dev_err(d->map->dev,
 					"Failed to sync wakes in %x: %d\n",
@@ -419,8 +425,15 @@ int regmap_add_irq_chip(struct regmap *map, int irq, int irq_flags,
 			d->wake_buf[i] = d->mask_buf_def[i];
 			reg = chip->wake_base +
 				(i * map->reg_stride * d->irq_reg_stride);
-			ret = regmap_update_bits(map, reg, d->wake_buf[i],
-						 d->wake_buf[i]);
+
+			if (chip->wake_invert)
+				ret = regmap_update_bits(map, reg,
+							 d->mask_buf_def[i],
+							 0);
+			else
+				ret = regmap_update_bits(map, reg,
+							 d->mask_buf_def[i],
+							 d->wake_buf[i]);
 			if (ret != 0) {
 				dev_err(map->dev, "Failed to set masks in 0x%x: %d\n",
 					reg, ret);
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index b7e95bf942c9..8c0b50df3d59 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -381,6 +381,7 @@ struct regmap_irq_chip {
 	unsigned int wake_base;
 	unsigned int irq_reg_stride;
 	unsigned int mask_invert;
+	unsigned int wake_invert;
 	bool runtime_pm;
 
 	int num_regs;
-- 
cgit v1.2.3


From 9ff162a8b96c96238773972e26288a366e403b0c Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@resnulli.us>
Date: Thu, 3 Jan 2013 22:48:49 +0000
Subject: net: introduce upper device lists

This lists are supposed to serve for storing pointers to all upper devices.
Eventually it will replace dev->master pointer which is used for
bonding, bridge, team but it cannot be used for vlan, macvlan where
there might be multiple upper present. In case the upper link is
replacement for dev->master, it is marked with "master" flag.

New upper device list resolves this limitation. Also, the information
stored in lists is used for preventing looping setups like
"bond->somethingelse->samebond"

Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  14 +++
 net/core/dev.c            | 239 +++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 249 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index c5031a45e185..e324601f48e8 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1174,6 +1174,8 @@ struct net_device {
 					  * which this device is member of.
 					  */
 
+	struct list_head	upper_dev_list; /* List of upper devices */
+
 	/* Interface address info used in eth_type_trans() */
 	unsigned char		*dev_addr;	/* hw address, (before bcast
 						   because most packets are
@@ -2636,6 +2638,18 @@ extern int		netdev_max_backlog;
 extern int		netdev_tstamp_prequeue;
 extern int		weight_p;
 extern int		bpf_jit_enable;
+
+extern bool netdev_has_upper_dev(struct net_device *dev,
+				 struct net_device *upper_dev);
+extern bool netdev_has_any_upper_dev(struct net_device *dev);
+extern struct net_device *netdev_master_upper_dev_get(struct net_device *dev);
+extern struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev);
+extern int netdev_upper_dev_link(struct net_device *dev,
+				 struct net_device *upper_dev);
+extern int netdev_master_upper_dev_link(struct net_device *dev,
+					struct net_device *upper_dev);
+extern void netdev_upper_dev_unlink(struct net_device *dev,
+				    struct net_device *upper_dev);
 extern int		netdev_set_master(struct net_device *dev, struct net_device *master);
 extern int netdev_set_bond_master(struct net_device *dev,
 				  struct net_device *master);
diff --git a/net/core/dev.c b/net/core/dev.c
index bddb2f2ccaa9..53a9fefbc9af 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4600,6 +4600,232 @@ static int __init dev_proc_init(void)
 #endif	/* CONFIG_PROC_FS */
 
 
+struct netdev_upper {
+	struct net_device *dev;
+	bool master;
+	struct list_head list;
+	struct rcu_head rcu;
+	struct list_head search_list;
+};
+
+static void __append_search_uppers(struct list_head *search_list,
+				   struct net_device *dev)
+{
+	struct netdev_upper *upper;
+
+	list_for_each_entry(upper, &dev->upper_dev_list, list) {
+		/* check if this upper is not already in search list */
+		if (list_empty(&upper->search_list))
+			list_add_tail(&upper->search_list, search_list);
+	}
+}
+
+static bool __netdev_search_upper_dev(struct net_device *dev,
+				      struct net_device *upper_dev)
+{
+	LIST_HEAD(search_list);
+	struct netdev_upper *upper;
+	struct netdev_upper *tmp;
+	bool ret = false;
+
+	__append_search_uppers(&search_list, dev);
+	list_for_each_entry(upper, &search_list, search_list) {
+		if (upper->dev == upper_dev) {
+			ret = true;
+			break;
+		}
+		__append_search_uppers(&search_list, upper->dev);
+	}
+	list_for_each_entry_safe(upper, tmp, &search_list, search_list)
+		INIT_LIST_HEAD(&upper->search_list);
+	return ret;
+}
+
+static struct netdev_upper *__netdev_find_upper(struct net_device *dev,
+						struct net_device *upper_dev)
+{
+	struct netdev_upper *upper;
+
+	list_for_each_entry(upper, &dev->upper_dev_list, list) {
+		if (upper->dev == upper_dev)
+			return upper;
+	}
+	return NULL;
+}
+
+/**
+ * netdev_has_upper_dev - Check if device is linked to an upper device
+ * @dev: device
+ * @upper_dev: upper device to check
+ *
+ * Find out if a device is linked to specified upper device and return true
+ * in case it is. Note that this checks only immediate upper device,
+ * not through a complete stack of devices. The caller must hold the RTNL lock.
+ */
+bool netdev_has_upper_dev(struct net_device *dev,
+			  struct net_device *upper_dev)
+{
+	ASSERT_RTNL();
+
+	return __netdev_find_upper(dev, upper_dev);
+}
+EXPORT_SYMBOL(netdev_has_upper_dev);
+
+/**
+ * netdev_has_any_upper_dev - Check if device is linked to some device
+ * @dev: device
+ *
+ * Find out if a device is linked to an upper device and return true in case
+ * it is. The caller must hold the RTNL lock.
+ */
+bool netdev_has_any_upper_dev(struct net_device *dev)
+{
+	ASSERT_RTNL();
+
+	return !list_empty(&dev->upper_dev_list);
+}
+EXPORT_SYMBOL(netdev_has_any_upper_dev);
+
+/**
+ * netdev_master_upper_dev_get - Get master upper device
+ * @dev: device
+ *
+ * Find a master upper device and return pointer to it or NULL in case
+ * it's not there. The caller must hold the RTNL lock.
+ */
+struct net_device *netdev_master_upper_dev_get(struct net_device *dev)
+{
+	struct netdev_upper *upper;
+
+	ASSERT_RTNL();
+
+	if (list_empty(&dev->upper_dev_list))
+		return NULL;
+
+	upper = list_first_entry(&dev->upper_dev_list,
+				 struct netdev_upper, list);
+	if (likely(upper->master))
+		return upper->dev;
+	return NULL;
+}
+EXPORT_SYMBOL(netdev_master_upper_dev_get);
+
+/**
+ * netdev_master_upper_dev_get_rcu - Get master upper device
+ * @dev: device
+ *
+ * Find a master upper device and return pointer to it or NULL in case
+ * it's not there. The caller must hold the RCU read lock.
+ */
+struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev)
+{
+	struct netdev_upper *upper;
+
+	upper = list_first_or_null_rcu(&dev->upper_dev_list,
+				       struct netdev_upper, list);
+	if (upper && likely(upper->master))
+		return upper->dev;
+	return NULL;
+}
+EXPORT_SYMBOL(netdev_master_upper_dev_get_rcu);
+
+static int __netdev_upper_dev_link(struct net_device *dev,
+				   struct net_device *upper_dev, bool master)
+{
+	struct netdev_upper *upper;
+
+	ASSERT_RTNL();
+
+	if (dev == upper_dev)
+		return -EBUSY;
+
+	/* To prevent loops, check if dev is not upper device to upper_dev. */
+	if (__netdev_search_upper_dev(upper_dev, dev))
+		return -EBUSY;
+
+	if (__netdev_find_upper(dev, upper_dev))
+		return -EEXIST;
+
+	if (master && netdev_master_upper_dev_get(dev))
+		return -EBUSY;
+
+	upper = kmalloc(sizeof(*upper), GFP_KERNEL);
+	if (!upper)
+		return -ENOMEM;
+
+	upper->dev = upper_dev;
+	upper->master = master;
+	INIT_LIST_HEAD(&upper->search_list);
+
+	/* Ensure that master upper link is always the first item in list. */
+	if (master)
+		list_add_rcu(&upper->list, &dev->upper_dev_list);
+	else
+		list_add_tail_rcu(&upper->list, &dev->upper_dev_list);
+	dev_hold(upper_dev);
+
+	return 0;
+}
+
+/**
+ * netdev_upper_dev_link - Add a link to the upper device
+ * @dev: device
+ * @upper_dev: new upper device
+ *
+ * Adds a link to device which is upper to this one. The caller must hold
+ * the RTNL lock. On a failure a negative errno code is returned.
+ * On success the reference counts are adjusted and the function
+ * returns zero.
+ */
+int netdev_upper_dev_link(struct net_device *dev,
+			  struct net_device *upper_dev)
+{
+	return __netdev_upper_dev_link(dev, upper_dev, false);
+}
+EXPORT_SYMBOL(netdev_upper_dev_link);
+
+/**
+ * netdev_master_upper_dev_link - Add a master link to the upper device
+ * @dev: device
+ * @upper_dev: new upper device
+ *
+ * Adds a link to device which is upper to this one. In this case, only
+ * one master upper device can be linked, although other non-master devices
+ * might be linked as well. The caller must hold the RTNL lock.
+ * On a failure a negative errno code is returned. On success the reference
+ * counts are adjusted and the function returns zero.
+ */
+int netdev_master_upper_dev_link(struct net_device *dev,
+				 struct net_device *upper_dev)
+{
+	return __netdev_upper_dev_link(dev, upper_dev, true);
+}
+EXPORT_SYMBOL(netdev_master_upper_dev_link);
+
+/**
+ * netdev_upper_dev_unlink - Removes a link to upper device
+ * @dev: device
+ * @upper_dev: new upper device
+ *
+ * Removes a link to device which is upper to this one. The caller must hold
+ * the RTNL lock.
+ */
+void netdev_upper_dev_unlink(struct net_device *dev,
+			     struct net_device *upper_dev)
+{
+	struct netdev_upper *upper;
+
+	ASSERT_RTNL();
+
+	upper = __netdev_find_upper(dev, upper_dev);
+	if (!upper)
+		return;
+	list_del_rcu(&upper->list);
+	dev_put(upper_dev);
+	kfree_rcu(upper, rcu);
+}
+EXPORT_SYMBOL(netdev_upper_dev_unlink);
+
 /**
  *	netdev_set_master	-	set up master pointer
  *	@slave: slave device
@@ -4613,19 +4839,23 @@ static int __init dev_proc_init(void)
 int netdev_set_master(struct net_device *slave, struct net_device *master)
 {
 	struct net_device *old = slave->master;
+	int err;
 
 	ASSERT_RTNL();
 
 	if (master) {
 		if (old)
 			return -EBUSY;
-		dev_hold(master);
+		err = netdev_master_upper_dev_link(slave, master);
+		if (err)
+			return err;
 	}
 
 	slave->master = master;
 
 	if (old)
-		dev_put(old);
+		netdev_upper_dev_unlink(slave, master);
+
 	return 0;
 }
 EXPORT_SYMBOL(netdev_set_master);
@@ -5503,8 +5733,8 @@ static void rollback_registered_many(struct list_head *head)
 		if (dev->netdev_ops->ndo_uninit)
 			dev->netdev_ops->ndo_uninit(dev);
 
-		/* Notifier chain MUST detach us from master device. */
-		WARN_ON(dev->master);
+		/* Notifier chain MUST detach us all upper devices. */
+		WARN_ON(netdev_has_any_upper_dev(dev));
 
 		/* Remove entries from kobject tree */
 		netdev_unregister_kobject(dev);
@@ -6212,6 +6442,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
 	INIT_LIST_HEAD(&dev->napi_list);
 	INIT_LIST_HEAD(&dev->unreg_list);
 	INIT_LIST_HEAD(&dev->link_watch_list);
+	INIT_LIST_HEAD(&dev->upper_dev_list);
 	dev->priv_flags = IFF_XMIT_DST_RELEASE;
 	setup(dev);
 
-- 
cgit v1.2.3


From 8b98a70c28a607a02b3c3d41bc9a4c141f421052 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@resnulli.us>
Date: Thu, 3 Jan 2013 22:49:02 +0000
Subject: net: remove no longer used netdev_set_bond_master() and
 netdev_set_master()

Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  6 +----
 net/core/dev.c            | 63 -----------------------------------------------
 2 files changed, 1 insertion(+), 68 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index e324601f48e8..3cad8eab02b6 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -858,8 +858,7 @@ struct netdev_fcoe_hbainfo {
  *	flow_id is a flow ID to be passed to rps_may_expire_flow() later.
  *	Return the filter ID on success, or a negative error code.
  *
- *	Slave management functions (for bridge, bonding, etc). User should
- *	call netdev_set_master() to set dev->master properly.
+ *	Slave management functions (for bridge, bonding, etc).
  * int (*ndo_add_slave)(struct net_device *dev, struct net_device *slave_dev);
  *	Called to make another netdev an underling.
  *
@@ -2650,9 +2649,6 @@ extern int netdev_master_upper_dev_link(struct net_device *dev,
 					struct net_device *upper_dev);
 extern void netdev_upper_dev_unlink(struct net_device *dev,
 				    struct net_device *upper_dev);
-extern int		netdev_set_master(struct net_device *dev, struct net_device *master);
-extern int netdev_set_bond_master(struct net_device *dev,
-				  struct net_device *master);
 extern int skb_checksum_help(struct sk_buff *skb);
 extern struct sk_buff *skb_gso_segment(struct sk_buff *skb,
 	netdev_features_t features);
diff --git a/net/core/dev.c b/net/core/dev.c
index 53a9fefbc9af..a51ccf46e8b7 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4826,69 +4826,6 @@ void netdev_upper_dev_unlink(struct net_device *dev,
 }
 EXPORT_SYMBOL(netdev_upper_dev_unlink);
 
-/**
- *	netdev_set_master	-	set up master pointer
- *	@slave: slave device
- *	@master: new master device
- *
- *	Changes the master device of the slave. Pass %NULL to break the
- *	bonding. The caller must hold the RTNL semaphore. On a failure
- *	a negative errno code is returned. On success the reference counts
- *	are adjusted and the function returns zero.
- */
-int netdev_set_master(struct net_device *slave, struct net_device *master)
-{
-	struct net_device *old = slave->master;
-	int err;
-
-	ASSERT_RTNL();
-
-	if (master) {
-		if (old)
-			return -EBUSY;
-		err = netdev_master_upper_dev_link(slave, master);
-		if (err)
-			return err;
-	}
-
-	slave->master = master;
-
-	if (old)
-		netdev_upper_dev_unlink(slave, master);
-
-	return 0;
-}
-EXPORT_SYMBOL(netdev_set_master);
-
-/**
- *	netdev_set_bond_master	-	set up bonding master/slave pair
- *	@slave: slave device
- *	@master: new master device
- *
- *	Changes the master device of the slave. Pass %NULL to break the
- *	bonding. The caller must hold the RTNL semaphore. On a failure
- *	a negative errno code is returned. On success %RTM_NEWLINK is sent
- *	to the routing socket and the function returns zero.
- */
-int netdev_set_bond_master(struct net_device *slave, struct net_device *master)
-{
-	int err;
-
-	ASSERT_RTNL();
-
-	err = netdev_set_master(slave, master);
-	if (err)
-		return err;
-	if (master)
-		slave->flags |= IFF_SLAVE;
-	else
-		slave->flags &= ~IFF_SLAVE;
-
-	rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
-	return 0;
-}
-EXPORT_SYMBOL(netdev_set_bond_master);
-
 static void dev_change_rx_flags(struct net_device *dev, int flags)
 {
 	const struct net_device_ops *ops = dev->netdev_ops;
-- 
cgit v1.2.3


From 85464ef271a0f5496f404c6a2f2dfbf1d76e1a49 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@resnulli.us>
Date: Thu, 3 Jan 2013 22:49:03 +0000
Subject: net: kill dev->master

Nobody uses this now. Remove it.

Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 3cad8eab02b6..0209ac328e8a 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1169,10 +1169,6 @@ struct net_device {
 						 * avoid dirtying this cache line.
 						 */
 
-	struct net_device	*master; /* Pointer to master device of a group,
-					  * which this device is member of.
-					  */
-
 	struct list_head	upper_dev_list; /* List of upper devices */
 
 	/* Interface address info used in eth_type_trans() */
-- 
cgit v1.2.3


From a05be99174edc9f258ee68140b71b9645ad977ee Mon Sep 17 00:00:00 2001
From: Ramakrishna Pallala <ramakrishna.pallala@intel.com>
Date: Fri, 30 Nov 2012 13:57:46 +0530
Subject: power_supply: Add watchdog and safety timer expiries under
 PROP_HEALTH_*

As most of the charger chips come with two kinds of safety features
related to timing:

1. Watchdog Timer (interms of seconds/mins)
2. Safety Timer (interms of hours)

This patch adds these to fault causes in POWER_SUPPLY_PROP_HEALTH_* enums
so that whenever there is either watchdog timeout or safety timer timeout
driver could notify the user space accurately about the fault and will
also be helpful for debug.

Signed-off-by: Ramakrishna Pallala <ramakrishna.pallala@intel.com>
Signed-off-by: Anton Vorontsov <anton@enomsg.org>
---
 drivers/power/power_supply_sysfs.c | 3 ++-
 include/linux/power_supply.h       | 2 ++
 2 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/power/power_supply_sysfs.c b/drivers/power/power_supply_sysfs.c
index 40fa3b7cae54..29178f78d73c 100644
--- a/drivers/power/power_supply_sysfs.c
+++ b/drivers/power/power_supply_sysfs.c
@@ -55,7 +55,8 @@ static ssize_t power_supply_show_property(struct device *dev,
 	};
 	static char *health_text[] = {
 		"Unknown", "Good", "Overheat", "Dead", "Over voltage",
-		"Unspecified failure", "Cold",
+		"Unspecified failure", "Cold", "Watchdog timer expire",
+		"Safety timer expire"
 	};
 	static char *technology_text[] = {
 		"Unknown", "NiMH", "Li-ion", "Li-poly", "LiFe", "NiCd",
diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index 1f0ab90aff00..25c0982eb9b1 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -54,6 +54,8 @@ enum {
 	POWER_SUPPLY_HEALTH_OVERVOLTAGE,
 	POWER_SUPPLY_HEALTH_UNSPEC_FAILURE,
 	POWER_SUPPLY_HEALTH_COLD,
+	POWER_SUPPLY_HEALTH_WATCHDOG_TIMER_EXPIRE,
+	POWER_SUPPLY_HEALTH_SAFETY_TIMER_EXPIRE,
 };
 
 enum {
-- 
cgit v1.2.3


From 80524f083e2c3e70057f5bb476db92baa14cb22b Mon Sep 17 00:00:00 2001
From: Kamal Mostafa <kamal@canonical.com>
Date: Tue, 20 Nov 2012 23:04:35 -0800
Subject: Input: increase struct ps2dev cmdbuf[] to 8 bytes

Cypress PS/2 Trackpad (drivers/input/mouse/cypress_ps2.c) needs
this larger cmdbuf[] to handle 8-byte packet responses.

Signed-off-by: Kamal Mostafa <kamal@canonical.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 include/linux/libps2.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/libps2.h b/include/linux/libps2.h
index 79603a6c356f..4ad06e824f76 100644
--- a/include/linux/libps2.h
+++ b/include/linux/libps2.h
@@ -36,7 +36,7 @@ struct ps2dev {
 	wait_queue_head_t wait;
 
 	unsigned long flags;
-	unsigned char cmdbuf[6];
+	unsigned char cmdbuf[8];
 	unsigned char cmdcnt;
 	unsigned char nak;
 };
-- 
cgit v1.2.3


From d5b1fe68baa7213f198e5be8cd1a1037258ab2c8 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 28 Dec 2012 13:18:28 -0800
Subject: cgroup: remove unused dummy cgroup_fork_callbacks()

5edee61ede ("cgroup: cgroup_subsys->fork() should be called after the
task is added to css_set") removed cgroup_fork_callbacks() but forgot
to remove its dummy version for !CONFIG_CGROUPS.  Remove it.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Herton Ronaldo Krzesinski <herton.krzesinski@canonical.com>
---
 include/linux/cgroup.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 7d73905dcba2..942e68705577 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -706,7 +706,6 @@ struct cgroup_subsys_state *cgroup_css_from_dir(struct file *f, int id);
 static inline int cgroup_init_early(void) { return 0; }
 static inline int cgroup_init(void) { return 0; }
 static inline void cgroup_fork(struct task_struct *p) {}
-static inline void cgroup_fork_callbacks(struct task_struct *p) {}
 static inline void cgroup_post_fork(struct task_struct *p) {}
 static inline void cgroup_exit(struct task_struct *p, int callbacks) {}
 
-- 
cgit v1.2.3


From 12a9d2fef1d35770d3cdc2cd1faabb83c45bc0fa Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 7 Jan 2013 08:49:33 -0800
Subject: cgroup: implement cgroup_rightmost_descendant()

Implement cgroup_rightmost_descendant() which returns the right most
descendant of the specified cgroup.  This can be used to skip the
cgroup's subtree while iterating with
cgroup_for_each_descendant_pre().

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Michal Hocko <mhocko@suse.cz>
Acked-by: Li Zefan <lizefan@huawei.com>
---
 include/linux/cgroup.h |  1 +
 kernel/cgroup.c        | 26 ++++++++++++++++++++++++++
 2 files changed, 27 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 942e68705577..8118a3120378 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -558,6 +558,7 @@ static inline struct cgroup* task_cgroup(struct task_struct *task,
 
 struct cgroup *cgroup_next_descendant_pre(struct cgroup *pos,
 					  struct cgroup *cgroup);
+struct cgroup *cgroup_rightmost_descendant(struct cgroup *pos);
 
 /**
  * cgroup_for_each_descendant_pre - pre-order walk of a cgroup's descendants
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 4855892798fd..6643f7053454 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -3017,6 +3017,32 @@ struct cgroup *cgroup_next_descendant_pre(struct cgroup *pos,
 }
 EXPORT_SYMBOL_GPL(cgroup_next_descendant_pre);
 
+/**
+ * cgroup_rightmost_descendant - return the rightmost descendant of a cgroup
+ * @pos: cgroup of interest
+ *
+ * Return the rightmost descendant of @pos.  If there's no descendant,
+ * @pos is returned.  This can be used during pre-order traversal to skip
+ * subtree of @pos.
+ */
+struct cgroup *cgroup_rightmost_descendant(struct cgroup *pos)
+{
+	struct cgroup *last, *tmp;
+
+	WARN_ON_ONCE(!rcu_read_lock_held());
+
+	do {
+		last = pos;
+		/* ->prev isn't RCU safe, walk ->next till the end */
+		pos = NULL;
+		list_for_each_entry_rcu(tmp, &last->children, sibling)
+			pos = tmp;
+	} while (pos);
+
+	return last;
+}
+EXPORT_SYMBOL_GPL(cgroup_rightmost_descendant);
+
 static struct cgroup *cgroup_leftmost_descendant(struct cgroup *pos)
 {
 	struct cgroup *last;
-- 
cgit v1.2.3


From 8f9dc85348ac37ff3b6b031d22e93a5b59d81f83 Mon Sep 17 00:00:00 2001
From: Hauke Mehrtens <hauke@hauke-m.de>
Date: Fri, 4 Jan 2013 00:51:24 +0100
Subject: bcma: mips: remove assigned_irqs from structure

This member is not needed any more.

Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/bcma/driver_mips.c            | 2 --
 include/linux/bcma/bcma_driver_mips.h | 1 -
 2 files changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/bcma/driver_mips.c b/drivers/bcma/driver_mips.c
index 69815079a6dd..c6d7be33b972 100644
--- a/drivers/bcma/driver_mips.c
+++ b/drivers/bcma/driver_mips.c
@@ -263,8 +263,6 @@ void bcma_core_mips_init(struct bcma_drv_mips *mcore)
 
 	bcma_core_mips_early_init(mcore);
 
-	mcore->assigned_irqs = 1;
-
 	switch (bus->chipinfo.id) {
 	case BCMA_CHIP_ID_BCM4716:
 	case BCMA_CHIP_ID_BCM4748:
diff --git a/include/linux/bcma/bcma_driver_mips.h b/include/linux/bcma/bcma_driver_mips.h
index 0baf8a56b794..6495579e3f35 100644
--- a/include/linux/bcma/bcma_driver_mips.h
+++ b/include/linux/bcma/bcma_driver_mips.h
@@ -36,7 +36,6 @@ struct bcma_drv_mips {
 	struct bcma_device *core;
 	u8 setup_done:1;
 	u8 early_setup_done:1;
-	unsigned int assigned_irqs;
 };
 
 #ifdef CONFIG_BCMA_DRIVER_MIPS
-- 
cgit v1.2.3


From 851bacf5902cad15f9bb789d278a1ee9608c8f25 Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Mon, 7 Jan 2013 12:44:33 +0200
Subject: spi/pxa2xx: embed the ssp_device to platform data

The spi-pxa2xx-pci glue driver had to implement pxa_ssp_request()/free() in
order to support the spi-pxa2xx platform driver. Since the ACPI enabled
platforms can use the same platform driver we would need to implement
pxa_ssp_request()/free() in some central place that can be shared by the
ACPI and PCI glue code.

Instead of doing that we can make pxa_ssp_request()/free() to be available
only when CONFIG_ARCH_PXA is set. On other arches these are being stubbed
out in preference to passing the ssp_device from the platform data
directly.

We also change the SPI bus number to be taken from ssp->port_id instead of
platform device id. This way the supporting code that passes the ssp can
decide the number (or it can set it to the same as pdev->id).

Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 drivers/spi/spi-pxa2xx-pci.c   | 73 +++---------------------------------------
 drivers/spi/spi-pxa2xx.c       | 15 ++++++---
 include/linux/pxa2xx_ssp.h     |  9 ++++++
 include/linux/spi/pxa2xx_spi.h |  3 ++
 4 files changed, 28 insertions(+), 72 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/spi/spi-pxa2xx-pci.c b/drivers/spi/spi-pxa2xx-pci.c
index ece979e9c642..364964d2ed04 100644
--- a/drivers/spi/spi-pxa2xx-pci.c
+++ b/drivers/spi/spi-pxa2xx-pci.c
@@ -8,55 +8,11 @@
 #include <linux/module.h>
 #include <linux/spi/pxa2xx_spi.h>
 
-struct ce4100_info {
-	struct ssp_device ssp;
-	struct platform_device *spi_pdev;
-};
-
-static DEFINE_MUTEX(ssp_lock);
-static LIST_HEAD(ssp_list);
-
-struct ssp_device *pxa_ssp_request(int port, const char *label)
-{
-	struct ssp_device *ssp = NULL;
-
-	mutex_lock(&ssp_lock);
-
-	list_for_each_entry(ssp, &ssp_list, node) {
-		if (ssp->port_id == port && ssp->use_count == 0) {
-			ssp->use_count++;
-			ssp->label = label;
-			break;
-		}
-	}
-
-	mutex_unlock(&ssp_lock);
-
-	if (&ssp->node == &ssp_list)
-		return NULL;
-
-	return ssp;
-}
-EXPORT_SYMBOL_GPL(pxa_ssp_request);
-
-void pxa_ssp_free(struct ssp_device *ssp)
-{
-	mutex_lock(&ssp_lock);
-	if (ssp->use_count) {
-		ssp->use_count--;
-		ssp->label = NULL;
-	} else
-		dev_err(&ssp->pdev->dev, "device already free\n");
-	mutex_unlock(&ssp_lock);
-}
-EXPORT_SYMBOL_GPL(pxa_ssp_free);
-
 static int ce4100_spi_probe(struct pci_dev *dev,
 		const struct pci_device_id *ent)
 {
 	struct platform_device_info pi;
 	int ret;
-	struct ce4100_info *spi_info;
 	struct platform_device *pdev;
 	struct pxa2xx_spi_master spi_pdata;
 	struct ssp_device *ssp;
@@ -69,14 +25,10 @@ static int ce4100_spi_probe(struct pci_dev *dev,
 	if (!ret)
 		return ret;
 
-	spi_info = devm_kzalloc(&dev->dev, sizeof(*spi_info), GFP_KERNEL);
-	if (!spi_info)
-		return -ENOMEM;
-
 	memset(&spi_pdata, 0, sizeof(spi_pdata));
 	spi_pdata.num_chipselect = dev->devfn;
 
-	ssp = &spi_info->ssp;
+	ssp = &spi_pdata.ssp;
 	ssp->phys_base = pci_resource_start(dev, 0);
 	ssp->mmio_base = pcim_iomap_table(dev)[0];
 	if (!ssp->mmio_base) {
@@ -87,10 +39,6 @@ static int ce4100_spi_probe(struct pci_dev *dev,
 	ssp->port_id = dev->devfn;
 	ssp->type = PXA25x_SSP;
 
-	mutex_lock(&ssp_lock);
-	list_add(&ssp->node, &ssp_list);
-	mutex_unlock(&ssp_lock);
-
 	memset(&pi, 0, sizeof(pi));
 	pi.parent = &dev->dev;
 	pi.name = "pxa2xx-spi";
@@ -99,30 +47,19 @@ static int ce4100_spi_probe(struct pci_dev *dev,
 	pi.size_data = sizeof(spi_pdata);
 
 	pdev = platform_device_register_full(&pi);
-	if (!pdev) {
-		mutex_lock(&ssp_lock);
-		list_del(&ssp->node);
-		mutex_unlock(&ssp_lock);
-
+	if (!pdev)
 		return -ENOMEM;
-	}
 
-	spi_info->spi_pdev = pdev;
-	pci_set_drvdata(dev, spi_info);
+	pci_set_drvdata(dev, pdev);
 
 	return 0;
 }
 
 static void ce4100_spi_remove(struct pci_dev *dev)
 {
-	struct ce4100_info *spi_info = pci_get_drvdata(dev);
-	struct ssp_device *ssp = &spi_info->ssp;
-
-	platform_device_unregister(spi_info->spi_pdev);
+	struct platform_device *pdev = pci_get_drvdata(dev);
 
-	mutex_lock(&ssp_lock);
-	list_del(&ssp->node);
-	mutex_unlock(&ssp_lock);
+	platform_device_unregister(pdev);
 }
 
 static DEFINE_PCI_DEVICE_TABLE(ce4100_spi_devices) = {
diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c
index 5c8c4f5883c4..54097ad76356 100644
--- a/drivers/spi/spi-pxa2xx.c
+++ b/drivers/spi/spi-pxa2xx.c
@@ -1535,11 +1535,18 @@ static int pxa2xx_spi_probe(struct platform_device *pdev)
 	struct ssp_device *ssp;
 	int status;
 
-	platform_info = dev->platform_data;
+	platform_info = dev_get_platdata(dev);
+	if (!platform_info) {
+		dev_err(&pdev->dev, "missing platform data\n");
+		return -ENODEV;
+	}
 
 	ssp = pxa_ssp_request(pdev->id, pdev->name);
-	if (ssp == NULL) {
-		dev_err(&pdev->dev, "failed to request SSP%d\n", pdev->id);
+	if (!ssp)
+		ssp = &platform_info->ssp;
+
+	if (!ssp->mmio_base) {
+		dev_err(&pdev->dev, "failed to get ssp\n");
 		return -ENODEV;
 	}
 
@@ -1561,7 +1568,7 @@ static int pxa2xx_spi_probe(struct platform_device *pdev)
 	/* the spi->mode bits understood by this driver: */
 	master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH;
 
-	master->bus_num = pdev->id;
+	master->bus_num = ssp->port_id;
 	master->num_chipselect = platform_info->num_chipselect;
 	master->dma_alignment = DMA_ALIGNMENT;
 	master->cleanup = cleanup;
diff --git a/include/linux/pxa2xx_ssp.h b/include/linux/pxa2xx_ssp.h
index f36632061c66..065e7f6c3ad7 100644
--- a/include/linux/pxa2xx_ssp.h
+++ b/include/linux/pxa2xx_ssp.h
@@ -206,6 +206,15 @@ static inline u32 pxa_ssp_read_reg(struct ssp_device *dev, u32 reg)
 	return __raw_readl(dev->mmio_base + reg);
 }
 
+#ifdef CONFIG_ARCH_PXA
 struct ssp_device *pxa_ssp_request(int port, const char *label);
 void pxa_ssp_free(struct ssp_device *);
+#else
+static inline struct ssp_device *pxa_ssp_request(int port, const char *label)
+{
+	return NULL;
+}
+static inline void pxa_ssp_free(struct ssp_device *ssp) {}
+#endif
+
 #endif
diff --git a/include/linux/spi/pxa2xx_spi.h b/include/linux/spi/pxa2xx_spi.h
index c73d1445c77e..6b99f09b717d 100644
--- a/include/linux/spi/pxa2xx_spi.h
+++ b/include/linux/spi/pxa2xx_spi.h
@@ -28,6 +28,9 @@ struct pxa2xx_spi_master {
 	u32 clock_enable;
 	u16 num_chipselect;
 	u8 enable_dma;
+
+	/* For non-PXA arches */
+	struct ssp_device ssp;
 };
 
 /* spi_board_info.controller_data for SPI slave devices,
-- 
cgit v1.2.3


From 52494535103986dbbf689b44d8c2c7efe2132b16 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paul.mckenney@linaro.org>
Date: Wed, 14 Nov 2012 16:26:40 -0800
Subject: rcu: Reduce rcutorture tracing

Currently, rcutorture traces every read-side access.  This can be
problematic because even a two-minute rcutorture run on a two-CPU system
can generate 28,853,363 reads.  Normally, only a failing read is of
interest, so this commit traces adjusts rcutorture's tracing to only
trace failing reads.  The resulting event tracing records the time
and the ->completed value captured at the beginning of the RCU read-side
critical section, allowing correlation with other event-tracing messages.

Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
[ paulmck: Add fix to build problem located by Randy Dunlap based on
  diagnosis by Steven Rostedt. ]
---
 include/linux/rcupdate.h   | 13 ++++++++++---
 include/trace/events/rcu.h | 19 ++++++++++++++-----
 kernel/rcupdate.c          |  9 ++++++---
 kernel/rcutorture.c        | 31 ++++++++++++++++++++++++-------
 lib/Kconfig.debug          |  1 +
 5 files changed, 55 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 275aa3f1062d..7f89cea596e1 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -53,7 +53,10 @@ extern int rcutorture_runnable; /* for sysctl */
 extern void rcutorture_record_test_transition(void);
 extern void rcutorture_record_progress(unsigned long vernum);
 extern void do_trace_rcu_torture_read(char *rcutorturename,
-				      struct rcu_head *rhp);
+				      struct rcu_head *rhp,
+				      unsigned long secs,
+				      unsigned long c_old,
+				      unsigned long c);
 #else
 static inline void rcutorture_record_test_transition(void)
 {
@@ -63,9 +66,13 @@ static inline void rcutorture_record_progress(unsigned long vernum)
 }
 #ifdef CONFIG_RCU_TRACE
 extern void do_trace_rcu_torture_read(char *rcutorturename,
-				      struct rcu_head *rhp);
+				      struct rcu_head *rhp,
+				      unsigned long secs,
+				      unsigned long c_old,
+				      unsigned long c);
 #else
-#define do_trace_rcu_torture_read(rcutorturename, rhp) do { } while (0)
+#define do_trace_rcu_torture_read(rcutorturename, rhp, secs, c_old, c) \
+	do { } while (0)
 #endif
 #endif
 
diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h
index d4f559b1ec34..09af021c8e96 100644
--- a/include/trace/events/rcu.h
+++ b/include/trace/events/rcu.h
@@ -523,22 +523,30 @@ TRACE_EVENT(rcu_batch_end,
  */
 TRACE_EVENT(rcu_torture_read,
 
-	TP_PROTO(char *rcutorturename, struct rcu_head *rhp),
+	TP_PROTO(char *rcutorturename, struct rcu_head *rhp,
+		 unsigned long secs, unsigned long c_old, unsigned long c),
 
-	TP_ARGS(rcutorturename, rhp),
+	TP_ARGS(rcutorturename, rhp, secs, c_old, c),
 
 	TP_STRUCT__entry(
 		__field(char *, rcutorturename)
 		__field(struct rcu_head *, rhp)
+		__field(unsigned long, secs)
+		__field(unsigned long, c_old)
+		__field(unsigned long, c)
 	),
 
 	TP_fast_assign(
 		__entry->rcutorturename = rcutorturename;
 		__entry->rhp = rhp;
+		__entry->secs = secs;
+		__entry->c_old = c_old;
+		__entry->c = c;
 	),
 
-	TP_printk("%s torture read %p",
-		  __entry->rcutorturename, __entry->rhp)
+	TP_printk("%s torture read %p %luus c: %lu %lu",
+		  __entry->rcutorturename, __entry->rhp,
+		  __entry->secs, __entry->c_old, __entry->c)
 );
 
 /*
@@ -608,7 +616,8 @@ TRACE_EVENT(rcu_barrier,
 #define trace_rcu_invoke_kfree_callback(rcuname, rhp, offset) do { } while (0)
 #define trace_rcu_batch_end(rcuname, callbacks_invoked, cb, nr, iit, risk) \
 	do { } while (0)
-#define trace_rcu_torture_read(rcutorturename, rhp) do { } while (0)
+#define trace_rcu_torture_read(rcutorturename, rhp, secs, c_old, c) \
+	do { } while (0)
 #define trace_rcu_barrier(name, s, cpu, cnt, done) do { } while (0)
 
 #endif /* #else #ifdef CONFIG_RCU_TRACE */
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index a2cf76177b44..303359d1ca88 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -404,11 +404,14 @@ EXPORT_SYMBOL_GPL(rcuhead_debug_descr);
 #endif /* #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD */
 
 #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) || defined(CONFIG_RCU_TRACE)
-void do_trace_rcu_torture_read(char *rcutorturename, struct rcu_head *rhp)
+void do_trace_rcu_torture_read(char *rcutorturename, struct rcu_head *rhp,
+			       unsigned long secs,
+			       unsigned long c_old, unsigned long c)
 {
-	trace_rcu_torture_read(rcutorturename, rhp);
+	trace_rcu_torture_read(rcutorturename, rhp, secs, c_old, c);
 }
 EXPORT_SYMBOL_GPL(do_trace_rcu_torture_read);
 #else
-#define do_trace_rcu_torture_read(rcutorturename, rhp) do { } while (0)
+#define do_trace_rcu_torture_read(rcutorturename, rhp, secs, c_old, c) \
+	do { } while (0)
 #endif
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index 31dea01c85fd..a583f1ce713d 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -46,6 +46,7 @@
 #include <linux/stat.h>
 #include <linux/srcu.h>
 #include <linux/slab.h>
+#include <linux/trace_clock.h>
 #include <asm/byteorder.h>
 
 MODULE_LICENSE("GPL");
@@ -1028,7 +1029,6 @@ void rcutorture_trace_dump(void)
 		return;
 	if (atomic_xchg(&beenhere, 1) != 0)
 		return;
-	do_trace_rcu_torture_read(cur_ops->name, (struct rcu_head *)~0UL);
 	ftrace_dump(DUMP_ALL);
 }
 
@@ -1042,13 +1042,16 @@ static void rcu_torture_timer(unsigned long unused)
 {
 	int idx;
 	int completed;
+	int completed_end;
 	static DEFINE_RCU_RANDOM(rand);
 	static DEFINE_SPINLOCK(rand_lock);
 	struct rcu_torture *p;
 	int pipe_count;
+	unsigned long long ts;
 
 	idx = cur_ops->readlock();
 	completed = cur_ops->completed();
+	ts = trace_clock_local();
 	p = rcu_dereference_check(rcu_torture_current,
 				  rcu_read_lock_bh_held() ||
 				  rcu_read_lock_sched_held() ||
@@ -1058,7 +1061,6 @@ static void rcu_torture_timer(unsigned long unused)
 		cur_ops->readunlock(idx);
 		return;
 	}
-	do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu);
 	if (p->rtort_mbtest == 0)
 		atomic_inc(&n_rcu_torture_mberror);
 	spin_lock(&rand_lock);
@@ -1071,10 +1073,16 @@ static void rcu_torture_timer(unsigned long unused)
 		/* Should not happen, but... */
 		pipe_count = RCU_TORTURE_PIPE_LEN;
 	}
-	if (pipe_count > 1)
+	completed_end = cur_ops->completed();
+	if (pipe_count > 1) {
+		unsigned long __maybe_unused ts_rem = do_div(ts, NSEC_PER_USEC);
+
+		do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu, ts,
+					  completed, completed_end);
 		rcutorture_trace_dump();
+	}
 	__this_cpu_inc(rcu_torture_count[pipe_count]);
-	completed = cur_ops->completed() - completed;
+	completed = completed_end - completed;
 	if (completed > RCU_TORTURE_PIPE_LEN) {
 		/* Should not happen, but... */
 		completed = RCU_TORTURE_PIPE_LEN;
@@ -1094,11 +1102,13 @@ static int
 rcu_torture_reader(void *arg)
 {
 	int completed;
+	int completed_end;
 	int idx;
 	DEFINE_RCU_RANDOM(rand);
 	struct rcu_torture *p;
 	int pipe_count;
 	struct timer_list t;
+	unsigned long long ts;
 
 	VERBOSE_PRINTK_STRING("rcu_torture_reader task started");
 	set_user_nice(current, 19);
@@ -1112,6 +1122,7 @@ rcu_torture_reader(void *arg)
 		}
 		idx = cur_ops->readlock();
 		completed = cur_ops->completed();
+		ts = trace_clock_local();
 		p = rcu_dereference_check(rcu_torture_current,
 					  rcu_read_lock_bh_held() ||
 					  rcu_read_lock_sched_held() ||
@@ -1122,7 +1133,6 @@ rcu_torture_reader(void *arg)
 			schedule_timeout_interruptible(HZ);
 			continue;
 		}
-		do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu);
 		if (p->rtort_mbtest == 0)
 			atomic_inc(&n_rcu_torture_mberror);
 		cur_ops->read_delay(&rand);
@@ -1132,10 +1142,17 @@ rcu_torture_reader(void *arg)
 			/* Should not happen, but... */
 			pipe_count = RCU_TORTURE_PIPE_LEN;
 		}
-		if (pipe_count > 1)
+		completed_end = cur_ops->completed();
+		if (pipe_count > 1) {
+			unsigned long __maybe_unused ts_rem =
+					do_div(ts, NSEC_PER_USEC);
+
+			do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu,
+						  ts, completed, completed_end);
 			rcutorture_trace_dump();
+		}
 		__this_cpu_inc(rcu_torture_count[pipe_count]);
-		completed = cur_ops->completed() - completed;
+		completed = completed_end - completed;
 		if (completed > RCU_TORTURE_PIPE_LEN) {
 			/* Should not happen, but... */
 			completed = RCU_TORTURE_PIPE_LEN;
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 3a353091a903..7d83f52fbade 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1008,6 +1008,7 @@ config RCU_CPU_STALL_INFO
 config RCU_TRACE
 	bool "Enable tracing for RCU"
 	depends on DEBUG_KERNEL
+	select TRACE_CLOCK
 	help
 	  This option provides tracing in RCU which presents stats
 	  in debugfs for debugging RCU implementation.
-- 
cgit v1.2.3


From fda55eca5a33f33ffcd4192c6b2d75179714a52c Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 7 Jan 2013 09:28:21 +0000
Subject: net: introduce skb_transport_header_was_set()

We have skb_mac_header_was_set() helper to tell if mac_header
was set on a skb. We would like the same for transport_header.

__netif_receive_skb() doesn't reset the transport header if already
set by GRO layer.

Note that network stacks usually reset the transport header anyway,
after pulling the network header, so this change only allows
a followup patch to have more precise qdisc pkt_len computation
for GSO packets at ingress side.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 10 ++++++++++
 net/core/dev.c         |  3 ++-
 net/core/skbuff.c      |  2 ++
 3 files changed, 14 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 320e976d5ab8..8b2256e880e0 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1492,6 +1492,11 @@ static inline void skb_set_inner_network_header(struct sk_buff *skb,
 	skb->inner_network_header += offset;
 }
 
+static inline bool skb_transport_header_was_set(const struct sk_buff *skb)
+{
+	return skb->transport_header != ~0U;
+}
+
 static inline unsigned char *skb_transport_header(const struct sk_buff *skb)
 {
 	return skb->head + skb->transport_header;
@@ -1580,6 +1585,11 @@ static inline void skb_set_inner_network_header(struct sk_buff *skb,
 	skb->inner_network_header = skb->data + offset;
 }
 
+static inline bool skb_transport_header_was_set(const struct sk_buff *skb)
+{
+	return skb->transport_header != NULL;
+}
+
 static inline unsigned char *skb_transport_header(const struct sk_buff *skb)
 {
 	return skb->transport_header;
diff --git a/net/core/dev.c b/net/core/dev.c
index a51ccf46e8b7..2e2448201a76 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3352,7 +3352,8 @@ static int __netif_receive_skb(struct sk_buff *skb)
 	orig_dev = skb->dev;
 
 	skb_reset_network_header(skb);
-	skb_reset_transport_header(skb);
+	if (!skb_transport_header_was_set(skb))
+		skb_reset_transport_header(skb);
 	skb_reset_mac_len(skb);
 
 	pt_prev = NULL;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index b03fc0c6a952..1e1b9ea0296d 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -260,6 +260,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
 	skb->end = skb->tail + size;
 #ifdef NET_SKBUFF_DATA_USES_OFFSET
 	skb->mac_header = ~0U;
+	skb->transport_header = ~0U;
 #endif
 
 	/* make sure we initialize shinfo sequentially */
@@ -328,6 +329,7 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size)
 	skb->end = skb->tail + size;
 #ifdef NET_SKBUFF_DATA_USES_OFFSET
 	skb->mac_header = ~0U;
+	skb->transport_header = ~0U;
 #endif
 
 	/* make sure we initialize shinfo sequentially */
-- 
cgit v1.2.3


From b7394d2429c198b1da3d46ac39192e891029ec0f Mon Sep 17 00:00:00 2001
From: Cong Wang <amwang@redhat.com>
Date: Mon, 7 Jan 2013 20:52:39 +0000
Subject: netpoll: prepare for ipv6

This patch adjusts some struct and functions, to prepare
for supporting IPv6.

Cc: David S. Miller <davem@davemloft.net>
Signed-off-by: Cong Wang <amwang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/netconsole.c |  12 +-
 include/linux/netpoll.h  |  13 +-
 net/core/netpoll.c       | 402 ++++++++++++++++++++++++++---------------------
 3 files changed, 243 insertions(+), 184 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c
index 6989ebe2bc79..998fa0257a92 100644
--- a/drivers/net/netconsole.c
+++ b/drivers/net/netconsole.c
@@ -269,12 +269,14 @@ static ssize_t show_remote_port(struct netconsole_target *nt, char *buf)
 
 static ssize_t show_local_ip(struct netconsole_target *nt, char *buf)
 {
-	return snprintf(buf, PAGE_SIZE, "%pI4\n", &nt->np.local_ip);
+	if (!nt->np.ipv6)
+		return snprintf(buf, PAGE_SIZE, "%pI4\n", &nt->np.local_ip);
 }
 
 static ssize_t show_remote_ip(struct netconsole_target *nt, char *buf)
 {
-	return snprintf(buf, PAGE_SIZE, "%pI4\n", &nt->np.remote_ip);
+	if (!nt->np.ipv6)
+		return snprintf(buf, PAGE_SIZE, "%pI4\n", &nt->np.remote_ip);
 }
 
 static ssize_t show_local_mac(struct netconsole_target *nt, char *buf)
@@ -410,7 +412,8 @@ static ssize_t store_local_ip(struct netconsole_target *nt,
 		return -EINVAL;
 	}
 
-	nt->np.local_ip = in_aton(buf);
+	if (!strnchr(buf, count, ':'))
+		nt->np.local_ip.ip = in_aton(buf);
 
 	return strnlen(buf, count);
 }
@@ -426,7 +429,8 @@ static ssize_t store_remote_ip(struct netconsole_target *nt,
 		return -EINVAL;
 	}
 
-	nt->np.remote_ip = in_aton(buf);
+	if (!strnchr(buf, count, ':'))
+		nt->np.remote_ip.ip = in_aton(buf);
 
 	return strnlen(buf, count);
 }
diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h
index 66d5379c305e..f54c3bb6a22b 100644
--- a/include/linux/netpoll.h
+++ b/include/linux/netpoll.h
@@ -12,13 +12,22 @@
 #include <linux/rcupdate.h>
 #include <linux/list.h>
 
+union inet_addr {
+	__u32		all[4];
+	__be32		ip;
+	__be32		ip6[4];
+	struct in_addr	in;
+	struct in6_addr	in6;
+};
+
 struct netpoll {
 	struct net_device *dev;
 	char dev_name[IFNAMSIZ];
 	const char *name;
 	void (*rx_hook)(struct netpoll *, int, char *, int);
 
-	__be32 local_ip, remote_ip;
+	union inet_addr local_ip, remote_ip;
+	bool ipv6;
 	u16 local_port, remote_port;
 	u8 remote_mac[ETH_ALEN];
 
@@ -33,7 +42,7 @@ struct netpoll_info {
 	spinlock_t rx_lock;
 	struct list_head rx_np; /* netpolls that registered an rx_hook */
 
-	struct sk_buff_head arp_tx; /* list of arp requests to reply to */
+	struct sk_buff_head neigh_tx; /* list of neigh requests to reply to */
 	struct sk_buff_head txq;
 
 	struct delayed_work tx_work;
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index d2bda8eb08ec..6bd073688f68 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -55,7 +55,7 @@ static atomic_t trapped;
 	 MAX_UDP_CHUNK)
 
 static void zap_completion_queue(void);
-static void netpoll_arp_reply(struct sk_buff *skb, struct netpoll_info *npinfo);
+static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo);
 
 static unsigned int carrier_timeout = 4;
 module_param(carrier_timeout, uint, 0644);
@@ -181,13 +181,13 @@ static void poll_napi(struct net_device *dev)
 	}
 }
 
-static void service_arp_queue(struct netpoll_info *npi)
+static void service_neigh_queue(struct netpoll_info *npi)
 {
 	if (npi) {
 		struct sk_buff *skb;
 
-		while ((skb = skb_dequeue(&npi->arp_tx)))
-			netpoll_arp_reply(skb, npi);
+		while ((skb = skb_dequeue(&npi->neigh_tx)))
+			netpoll_neigh_reply(skb, npi);
 	}
 }
 
@@ -216,14 +216,14 @@ static void netpoll_poll_dev(struct net_device *dev)
 
 			bond_dev = netdev_master_upper_dev_get_rcu(dev);
 			bond_ni = rcu_dereference_bh(bond_dev->npinfo);
-			while ((skb = skb_dequeue(&ni->arp_tx))) {
+			while ((skb = skb_dequeue(&ni->neigh_tx))) {
 				skb->dev = bond_dev;
-				skb_queue_tail(&bond_ni->arp_tx, skb);
+				skb_queue_tail(&bond_ni->neigh_tx, skb);
 			}
 		}
 	}
 
-	service_arp_queue(ni);
+	service_neigh_queue(ni);
 
 	zap_completion_queue();
 }
@@ -386,7 +386,9 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
 	static atomic_t ip_ident;
 
 	udp_len = len + sizeof(*udph);
-	ip_len = udp_len + sizeof(*iph);
+	if (!np->ipv6)
+		ip_len = udp_len + sizeof(*iph);
+
 	total_len = ip_len + LL_RESERVED_SPACE(np->dev);
 
 	skb = find_skb(np, total_len + np->dev->needed_tailroom,
@@ -403,34 +405,38 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
 	udph->source = htons(np->local_port);
 	udph->dest = htons(np->remote_port);
 	udph->len = htons(udp_len);
-	udph->check = 0;
-	udph->check = csum_tcpudp_magic(np->local_ip,
-					np->remote_ip,
-					udp_len, IPPROTO_UDP,
-					csum_partial(udph, udp_len, 0));
-	if (udph->check == 0)
-		udph->check = CSUM_MANGLED_0;
-
-	skb_push(skb, sizeof(*iph));
-	skb_reset_network_header(skb);
-	iph = ip_hdr(skb);
-
-	/* iph->version = 4; iph->ihl = 5; */
-	put_unaligned(0x45, (unsigned char *)iph);
-	iph->tos      = 0;
-	put_unaligned(htons(ip_len), &(iph->tot_len));
-	iph->id       = htons(atomic_inc_return(&ip_ident));
-	iph->frag_off = 0;
-	iph->ttl      = 64;
-	iph->protocol = IPPROTO_UDP;
-	iph->check    = 0;
-	put_unaligned(np->local_ip, &(iph->saddr));
-	put_unaligned(np->remote_ip, &(iph->daddr));
-	iph->check    = ip_fast_csum((unsigned char *)iph, iph->ihl);
-
-	eth = (struct ethhdr *) skb_push(skb, ETH_HLEN);
-	skb_reset_mac_header(skb);
-	skb->protocol = eth->h_proto = htons(ETH_P_IP);
+
+	if (!np->ipv6) {
+		udph->check = 0;
+		udph->check = csum_tcpudp_magic(np->local_ip.ip,
+						np->remote_ip.ip,
+						udp_len, IPPROTO_UDP,
+						csum_partial(udph, udp_len, 0));
+		if (udph->check == 0)
+			udph->check = CSUM_MANGLED_0;
+
+		skb_push(skb, sizeof(*iph));
+		skb_reset_network_header(skb);
+		iph = ip_hdr(skb);
+
+		/* iph->version = 4; iph->ihl = 5; */
+		put_unaligned(0x45, (unsigned char *)iph);
+		iph->tos      = 0;
+		put_unaligned(htons(ip_len), &(iph->tot_len));
+		iph->id       = htons(atomic_inc_return(&ip_ident));
+		iph->frag_off = 0;
+		iph->ttl      = 64;
+		iph->protocol = IPPROTO_UDP;
+		iph->check    = 0;
+		put_unaligned(np->local_ip.ip, &(iph->saddr));
+		put_unaligned(np->remote_ip.ip, &(iph->daddr));
+		iph->check    = ip_fast_csum((unsigned char *)iph, iph->ihl);
+
+		eth = (struct ethhdr *) skb_push(skb, ETH_HLEN);
+		skb_reset_mac_header(skb);
+		skb->protocol = eth->h_proto = htons(ETH_P_IP);
+	}
+
 	memcpy(eth->h_source, np->dev->dev_addr, ETH_ALEN);
 	memcpy(eth->h_dest, np->remote_mac, ETH_ALEN);
 
@@ -440,7 +446,7 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
 }
 EXPORT_SYMBOL(netpoll_send_udp);
 
-static void netpoll_arp_reply(struct sk_buff *skb, struct netpoll_info *npinfo)
+static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo)
 {
 	struct arphdr *arp;
 	unsigned char *arp_ptr;
@@ -451,7 +457,7 @@ static void netpoll_arp_reply(struct sk_buff *skb, struct netpoll_info *npinfo)
 	struct netpoll *np, *tmp;
 	unsigned long flags;
 	int hlen, tlen;
-	int hits = 0;
+	int hits = 0, proto;
 
 	if (list_empty(&npinfo->rx_np))
 		return;
@@ -469,94 +475,97 @@ static void netpoll_arp_reply(struct sk_buff *skb, struct netpoll_info *npinfo)
 	if (!hits)
 		return;
 
-	/* No arp on this interface */
-	if (skb->dev->flags & IFF_NOARP)
-		return;
-
-	if (!pskb_may_pull(skb, arp_hdr_len(skb->dev)))
-		return;
+	proto = ntohs(eth_hdr(skb)->h_proto);
+	if (proto == ETH_P_IP) {
+		/* No arp on this interface */
+		if (skb->dev->flags & IFF_NOARP)
+			return;
 
-	skb_reset_network_header(skb);
-	skb_reset_transport_header(skb);
-	arp = arp_hdr(skb);
+		if (!pskb_may_pull(skb, arp_hdr_len(skb->dev)))
+			return;
 
-	if ((arp->ar_hrd != htons(ARPHRD_ETHER) &&
-	     arp->ar_hrd != htons(ARPHRD_IEEE802)) ||
-	    arp->ar_pro != htons(ETH_P_IP) ||
-	    arp->ar_op != htons(ARPOP_REQUEST))
-		return;
+		skb_reset_network_header(skb);
+		skb_reset_transport_header(skb);
+		arp = arp_hdr(skb);
 
-	arp_ptr = (unsigned char *)(arp+1);
-	/* save the location of the src hw addr */
-	sha = arp_ptr;
-	arp_ptr += skb->dev->addr_len;
-	memcpy(&sip, arp_ptr, 4);
-	arp_ptr += 4;
-	/* If we actually cared about dst hw addr,
-	   it would get copied here */
-	arp_ptr += skb->dev->addr_len;
-	memcpy(&tip, arp_ptr, 4);
-
-	/* Should we ignore arp? */
-	if (ipv4_is_loopback(tip) || ipv4_is_multicast(tip))
-		return;
+		if ((arp->ar_hrd != htons(ARPHRD_ETHER) &&
+		     arp->ar_hrd != htons(ARPHRD_IEEE802)) ||
+		    arp->ar_pro != htons(ETH_P_IP) ||
+		    arp->ar_op != htons(ARPOP_REQUEST))
+			return;
 
-	size = arp_hdr_len(skb->dev);
+		arp_ptr = (unsigned char *)(arp+1);
+		/* save the location of the src hw addr */
+		sha = arp_ptr;
+		arp_ptr += skb->dev->addr_len;
+		memcpy(&sip, arp_ptr, 4);
+		arp_ptr += 4;
+		/* If we actually cared about dst hw addr,
+		   it would get copied here */
+		arp_ptr += skb->dev->addr_len;
+		memcpy(&tip, arp_ptr, 4);
 
-	spin_lock_irqsave(&npinfo->rx_lock, flags);
-	list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
-		if (tip != np->local_ip)
-			continue;
+		/* Should we ignore arp? */
+		if (ipv4_is_loopback(tip) || ipv4_is_multicast(tip))
+			return;
 
-		hlen = LL_RESERVED_SPACE(np->dev);
-		tlen = np->dev->needed_tailroom;
-		send_skb = find_skb(np, size + hlen + tlen, hlen);
-		if (!send_skb)
-			continue;
+		size = arp_hdr_len(skb->dev);
 
-		skb_reset_network_header(send_skb);
-		arp = (struct arphdr *) skb_put(send_skb, size);
-		send_skb->dev = skb->dev;
-		send_skb->protocol = htons(ETH_P_ARP);
+		spin_lock_irqsave(&npinfo->rx_lock, flags);
+		list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
+			if (tip != np->local_ip.ip)
+				continue;
+
+			hlen = LL_RESERVED_SPACE(np->dev);
+			tlen = np->dev->needed_tailroom;
+			send_skb = find_skb(np, size + hlen + tlen, hlen);
+			if (!send_skb)
+				continue;
+
+			skb_reset_network_header(send_skb);
+			arp = (struct arphdr *) skb_put(send_skb, size);
+			send_skb->dev = skb->dev;
+			send_skb->protocol = htons(ETH_P_ARP);
+
+			/* Fill the device header for the ARP frame */
+			if (dev_hard_header(send_skb, skb->dev, ptype,
+					    sha, np->dev->dev_addr,
+					    send_skb->len) < 0) {
+				kfree_skb(send_skb);
+				continue;
+			}
 
-		/* Fill the device header for the ARP frame */
-		if (dev_hard_header(send_skb, skb->dev, ptype,
-				    sha, np->dev->dev_addr,
-				    send_skb->len) < 0) {
-			kfree_skb(send_skb);
-			continue;
+			/*
+			 * Fill out the arp protocol part.
+			 *
+			 * we only support ethernet device type,
+			 * which (according to RFC 1390) should
+			 * always equal 1 (Ethernet).
+			 */
+
+			arp->ar_hrd = htons(np->dev->type);
+			arp->ar_pro = htons(ETH_P_IP);
+			arp->ar_hln = np->dev->addr_len;
+			arp->ar_pln = 4;
+			arp->ar_op = htons(type);
+
+			arp_ptr = (unsigned char *)(arp + 1);
+			memcpy(arp_ptr, np->dev->dev_addr, np->dev->addr_len);
+			arp_ptr += np->dev->addr_len;
+			memcpy(arp_ptr, &tip, 4);
+			arp_ptr += 4;
+			memcpy(arp_ptr, sha, np->dev->addr_len);
+			arp_ptr += np->dev->addr_len;
+			memcpy(arp_ptr, &sip, 4);
+
+			netpoll_send_skb(np, send_skb);
+
+			/* If there are several rx_hooks for the same address,
+			   we're fine by sending a single reply */
+			break;
 		}
-
-		/*
-		 * Fill out the arp protocol part.
-		 *
-		 * we only support ethernet device type,
-		 * which (according to RFC 1390) should
-		 * always equal 1 (Ethernet).
-		 */
-
-		arp->ar_hrd = htons(np->dev->type);
-		arp->ar_pro = htons(ETH_P_IP);
-		arp->ar_hln = np->dev->addr_len;
-		arp->ar_pln = 4;
-		arp->ar_op = htons(type);
-
-		arp_ptr = (unsigned char *)(arp + 1);
-		memcpy(arp_ptr, np->dev->dev_addr, np->dev->addr_len);
-		arp_ptr += np->dev->addr_len;
-		memcpy(arp_ptr, &tip, 4);
-		arp_ptr += 4;
-		memcpy(arp_ptr, sha, np->dev->addr_len);
-		arp_ptr += np->dev->addr_len;
-		memcpy(arp_ptr, &sip, 4);
-
-		netpoll_send_skb(np, send_skb);
-
-		/* If there are several rx_hooks for the same address,
-		   we're fine by sending a single reply */
-		break;
+		spin_unlock_irqrestore(&npinfo->rx_lock, flags);
 	}
-	spin_unlock_irqrestore(&npinfo->rx_lock, flags);
 }
 
 int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo)
@@ -576,7 +585,7 @@ int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo)
 	/* check if netpoll clients need ARP */
 	if (skb->protocol == htons(ETH_P_ARP) &&
 	    atomic_read(&trapped)) {
-		skb_queue_tail(&npinfo->arp_tx, skb);
+		skb_queue_tail(&npinfo->neigh_tx, skb);
 		return 1;
 	}
 
@@ -587,60 +596,61 @@ int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo)
 	}
 
 	proto = ntohs(eth_hdr(skb)->h_proto);
-	if (proto != ETH_P_IP)
+	if (proto != ETH_P_IP && proto != ETH_P_IPV6)
 		goto out;
 	if (skb->pkt_type == PACKET_OTHERHOST)
 		goto out;
 	if (skb_shared(skb))
 		goto out;
 
-	if (!pskb_may_pull(skb, sizeof(struct iphdr)))
-		goto out;
-	iph = (struct iphdr *)skb->data;
-	if (iph->ihl < 5 || iph->version != 4)
-		goto out;
-	if (!pskb_may_pull(skb, iph->ihl*4))
-		goto out;
-	iph = (struct iphdr *)skb->data;
-	if (ip_fast_csum((u8 *)iph, iph->ihl) != 0)
-		goto out;
-
-	len = ntohs(iph->tot_len);
-	if (skb->len < len || len < iph->ihl*4)
-		goto out;
-
-	/*
-	 * Our transport medium may have padded the buffer out.
-	 * Now We trim to the true length of the frame.
-	 */
-	if (pskb_trim_rcsum(skb, len))
-		goto out;
+	if (proto == ETH_P_IP) {
+		if (!pskb_may_pull(skb, sizeof(struct iphdr)))
+			goto out;
+		iph = (struct iphdr *)skb->data;
+		if (iph->ihl < 5 || iph->version != 4)
+			goto out;
+		if (!pskb_may_pull(skb, iph->ihl*4))
+			goto out;
+		iph = (struct iphdr *)skb->data;
+		if (ip_fast_csum((u8 *)iph, iph->ihl) != 0)
+			goto out;
 
-	iph = (struct iphdr *)skb->data;
-	if (iph->protocol != IPPROTO_UDP)
-		goto out;
+		len = ntohs(iph->tot_len);
+		if (skb->len < len || len < iph->ihl*4)
+			goto out;
 
-	len -= iph->ihl*4;
-	uh = (struct udphdr *)(((char *)iph) + iph->ihl*4);
-	ulen = ntohs(uh->len);
+		/*
+		 * Our transport medium may have padded the buffer out.
+		 * Now We trim to the true length of the frame.
+		 */
+		if (pskb_trim_rcsum(skb, len))
+			goto out;
 
-	if (ulen != len)
-		goto out;
-	if (checksum_udp(skb, uh, ulen, iph->saddr, iph->daddr))
-		goto out;
+		iph = (struct iphdr *)skb->data;
+		if (iph->protocol != IPPROTO_UDP)
+			goto out;
 
-	list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
-		if (np->local_ip && np->local_ip != iph->daddr)
-			continue;
-		if (np->remote_ip && np->remote_ip != iph->saddr)
-			continue;
-		if (np->local_port && np->local_port != ntohs(uh->dest))
-			continue;
+		len -= iph->ihl*4;
+		uh = (struct udphdr *)(((char *)iph) + iph->ihl*4);
+		ulen = ntohs(uh->len);
 
-		np->rx_hook(np, ntohs(uh->source),
-			       (char *)(uh+1),
-			       ulen - sizeof(struct udphdr));
-		hits++;
+		if (ulen != len)
+			goto out;
+		if (checksum_udp(skb, uh, ulen, iph->saddr, iph->daddr))
+			goto out;
+		list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
+			if (np->local_ip.ip && np->local_ip.ip != iph->daddr)
+				continue;
+			if (np->remote_ip.ip && np->remote_ip.ip != iph->saddr)
+				continue;
+			if (np->local_port && np->local_port != ntohs(uh->dest))
+				continue;
+
+			np->rx_hook(np, ntohs(uh->source),
+				       (char *)(uh+1),
+				       ulen - sizeof(struct udphdr));
+			hits++;
+		}
 	}
 
 	if (!hits)
@@ -661,17 +671,40 @@ out:
 void netpoll_print_options(struct netpoll *np)
 {
 	np_info(np, "local port %d\n", np->local_port);
-	np_info(np, "local IP %pI4\n", &np->local_ip);
+	if (!np->ipv6)
+		np_info(np, "local IPv4 address %pI4\n", &np->local_ip.ip);
 	np_info(np, "interface '%s'\n", np->dev_name);
 	np_info(np, "remote port %d\n", np->remote_port);
-	np_info(np, "remote IP %pI4\n", &np->remote_ip);
+	if (!np->ipv6)
+		np_info(np, "remote IPv4 address %pI4\n", &np->remote_ip.ip);
 	np_info(np, "remote ethernet address %pM\n", np->remote_mac);
 }
 EXPORT_SYMBOL(netpoll_print_options);
 
+static int netpoll_parse_ip_addr(const char *str, union inet_addr *addr)
+{
+	const char *end;
+
+	if (!strchr(str, ':') &&
+	    in4_pton(str, -1, (void *)addr, -1, &end) > 0) {
+		if (!*end)
+			return 0;
+	}
+	if (in6_pton(str, -1, addr->in6.s6_addr, -1, &end) > 0) {
+#if IS_ENABLED(CONFIG_IPV6)
+		if (!*end)
+			return 1;
+#else
+		return -1;
+#endif
+	}
+	return -1;
+}
+
 int netpoll_parse_options(struct netpoll *np, char *opt)
 {
 	char *cur=opt, *delim;
+	int ipv6;
 
 	if (*cur != '@') {
 		if ((delim = strchr(cur, '@')) == NULL)
@@ -687,7 +720,11 @@ int netpoll_parse_options(struct netpoll *np, char *opt)
 		if ((delim = strchr(cur, '/')) == NULL)
 			goto parse_failed;
 		*delim = 0;
-		np->local_ip = in_aton(cur);
+		ipv6 = netpoll_parse_ip_addr(cur, &np->local_ip);
+		if (ipv6 < 0)
+			goto parse_failed;
+		else
+			np->ipv6 = (bool)ipv6;
 		cur = delim;
 	}
 	cur++;
@@ -719,7 +756,13 @@ int netpoll_parse_options(struct netpoll *np, char *opt)
 	if ((delim = strchr(cur, '/')) == NULL)
 		goto parse_failed;
 	*delim = 0;
-	np->remote_ip = in_aton(cur);
+	ipv6 = netpoll_parse_ip_addr(cur, &np->remote_ip);
+	if (ipv6 < 0)
+		goto parse_failed;
+	else if (np->ipv6 != (bool)ipv6)
+		goto parse_failed;
+	else
+		np->ipv6 = (bool)ipv6;
 	cur = delim + 1;
 
 	if (*cur != 0) {
@@ -767,7 +810,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp)
 		INIT_LIST_HEAD(&npinfo->rx_np);
 
 		spin_lock_init(&npinfo->rx_lock);
-		skb_queue_head_init(&npinfo->arp_tx);
+		skb_queue_head_init(&npinfo->neigh_tx);
 		skb_queue_head_init(&npinfo->txq);
 		INIT_DELAYED_WORK(&npinfo->tx_work, queue_process);
 
@@ -859,21 +902,24 @@ int netpoll_setup(struct netpoll *np)
 		}
 	}
 
-	if (!np->local_ip) {
-		rcu_read_lock();
-		in_dev = __in_dev_get_rcu(ndev);
+	if (!np->local_ip.ip) {
+		if (!np->ipv6) {
+			rcu_read_lock();
+			in_dev = __in_dev_get_rcu(ndev);
 
-		if (!in_dev || !in_dev->ifa_list) {
+
+			if (!in_dev || !in_dev->ifa_list) {
+				rcu_read_unlock();
+				np_err(np, "no IP address for %s, aborting\n",
+				       np->dev_name);
+				err = -EDESTADDRREQ;
+				goto put;
+			}
+
+			np->local_ip.ip = in_dev->ifa_list->ifa_local;
 			rcu_read_unlock();
-			np_err(np, "no IP address for %s, aborting\n",
-			       np->dev_name);
-			err = -EDESTADDRREQ;
-			goto put;
+			np_info(np, "local IP %pI4\n", &np->local_ip.ip);
 		}
-
-		np->local_ip = in_dev->ifa_list->ifa_local;
-		rcu_read_unlock();
-		np_info(np, "local IP %pI4\n", &np->local_ip);
 	}
 
 	/* fill up the skb queue */
@@ -906,7 +952,7 @@ static void rcu_cleanup_netpoll_info(struct rcu_head *rcu_head)
 	struct netpoll_info *npinfo =
 			container_of(rcu_head, struct netpoll_info, rcu);
 
-	skb_queue_purge(&npinfo->arp_tx);
+	skb_queue_purge(&npinfo->neigh_tx);
 	skb_queue_purge(&npinfo->txq);
 
 	/* we can't call cancel_delayed_work_sync here, as we are in softirq */
-- 
cgit v1.2.3


From dd4544f05469aaaeee891d7dc54d66430344321e Mon Sep 17 00:00:00 2001
From: Rafał Miłecki <zajec5@gmail.com>
Date: Tue, 8 Jan 2013 20:06:23 +0000
Subject: bgmac: driver for GBit MAC core on BCMA bus
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

BCMA is a Broadcom specific bus with devices AKA cores. All recent BCMA
based SoCs have gigabit ethernet provided by the GBit MAC core. This
patch adds driver for such a cores registering itself as a netdev. It
has been tested on a BCM4706 and BCM4718 chipsets.

In the kernel tree there is already b44 driver which has some common
things with bgmac, however there are many differences that has led to
the decision or writing a new driver:
1) GBit MAC cores appear on BCMA bus (not SSB as in case of b44)
2) There is 64bit DMA engine which differs from 32bit one
3) There is no CAM (Content Addressable Memory) in GBit MAC
4) We have 4 TX queues on GBit MAC devices (instead of 1)
5) Many registers have different addresses/values
6) RX header flags are also different

The driver in it's state is functional how, however there is of course
place for improvements:
1) Supporting more net_device_ops
2) SUpporting more ethtool_ops
3) Unaligned addressing in DMA
4) Writing separated PHY driver

Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/bcma/driver_chipcommon_pmu.c        |    3 +-
 drivers/net/ethernet/broadcom/Kconfig       |    9 +
 drivers/net/ethernet/broadcom/Makefile      |    1 +
 drivers/net/ethernet/broadcom/bgmac.c       | 1422 +++++++++++++++++++++++++++
 drivers/net/ethernet/broadcom/bgmac.h       |  456 +++++++++
 include/linux/bcma/bcma_driver_chipcommon.h |    2 +
 6 files changed, 1892 insertions(+), 1 deletion(-)
 create mode 100644 drivers/net/ethernet/broadcom/bgmac.c
 create mode 100644 drivers/net/ethernet/broadcom/bgmac.h

(limited to 'include/linux')

diff --git a/drivers/bcma/driver_chipcommon_pmu.c b/drivers/bcma/driver_chipcommon_pmu.c
index c62c788b3289..932b101dee36 100644
--- a/drivers/bcma/driver_chipcommon_pmu.c
+++ b/drivers/bcma/driver_chipcommon_pmu.c
@@ -264,7 +264,7 @@ static u32 bcma_pmu_pll_clock_bcm4706(struct bcma_drv_cc *cc, u32 pll0, u32 m)
 }
 
 /* query bus clock frequency for PMU-enabled chipcommon */
-static u32 bcma_pmu_get_bus_clock(struct bcma_drv_cc *cc)
+u32 bcma_pmu_get_bus_clock(struct bcma_drv_cc *cc)
 {
 	struct bcma_bus *bus = cc->core->bus;
 
@@ -293,6 +293,7 @@ static u32 bcma_pmu_get_bus_clock(struct bcma_drv_cc *cc)
 	}
 	return BCMA_CC_PMU_HT_CLOCK;
 }
+EXPORT_SYMBOL_GPL(bcma_pmu_get_bus_clock);
 
 /* query cpu clock frequency for PMU-enabled chipcommon */
 u32 bcma_pmu_get_cpu_clock(struct bcma_drv_cc *cc)
diff --git a/drivers/net/ethernet/broadcom/Kconfig b/drivers/net/ethernet/broadcom/Kconfig
index 3b3bf0dd0f1a..3e69b3f88099 100644
--- a/drivers/net/ethernet/broadcom/Kconfig
+++ b/drivers/net/ethernet/broadcom/Kconfig
@@ -130,4 +130,13 @@ config BNX2X_SRIOV
 	  Virtualization support in the 578xx and 57712 products. This
 	  allows for virtual function acceleration in virtual environments.
 
+config BGMAC
+	tristate "BCMA bus GBit core support"
+	depends on BCMA_HOST_SOC && HAS_DMA
+	---help---
+	  This driver supports GBit MAC and BCM4706 GBit MAC cores on BCMA bus.
+	  They can be found on BCM47xx SoCs and provide gigabit ethernet.
+	  In case of using this driver on BCM4706 it's also requires to enable
+	  BCMA_DRIVER_GMAC_CMN to make it work.
+
 endif # NET_VENDOR_BROADCOM
diff --git a/drivers/net/ethernet/broadcom/Makefile b/drivers/net/ethernet/broadcom/Makefile
index b7896051d54e..68efa1a3fb88 100644
--- a/drivers/net/ethernet/broadcom/Makefile
+++ b/drivers/net/ethernet/broadcom/Makefile
@@ -9,3 +9,4 @@ obj-$(CONFIG_CNIC) += cnic.o
 obj-$(CONFIG_BNX2X) += bnx2x/
 obj-$(CONFIG_SB1250_MAC) += sb1250-mac.o
 obj-$(CONFIG_TIGON3) += tg3.o
+obj-$(CONFIG_BGMAC) += bgmac.o
diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c
new file mode 100644
index 000000000000..9bd33db7fddd
--- /dev/null
+++ b/drivers/net/ethernet/broadcom/bgmac.c
@@ -0,0 +1,1422 @@
+/*
+ * Driver for (BCM4706)? GBit MAC core on BCMA bus.
+ *
+ * Copyright (C) 2012 Rafał Miłecki <zajec5@gmail.com>
+ *
+ * Licensed under the GNU/GPL. See COPYING for details.
+ */
+
+#include "bgmac.h"
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/etherdevice.h>
+#include <linux/mii.h>
+#include <linux/interrupt.h>
+#include <linux/dma-mapping.h>
+#include <asm/mach-bcm47xx/nvram.h>
+
+static const struct bcma_device_id bgmac_bcma_tbl[] = {
+	BCMA_CORE(BCMA_MANUF_BCM, BCMA_CORE_4706_MAC_GBIT, BCMA_ANY_REV, BCMA_ANY_CLASS),
+	BCMA_CORE(BCMA_MANUF_BCM, BCMA_CORE_MAC_GBIT, BCMA_ANY_REV, BCMA_ANY_CLASS),
+	BCMA_CORETABLE_END
+};
+MODULE_DEVICE_TABLE(bcma, bgmac_bcma_tbl);
+
+static bool bgmac_wait_value(struct bcma_device *core, u16 reg, u32 mask,
+			     u32 value, int timeout)
+{
+	u32 val;
+	int i;
+
+	for (i = 0; i < timeout / 10; i++) {
+		val = bcma_read32(core, reg);
+		if ((val & mask) == value)
+			return true;
+		udelay(10);
+	}
+	pr_err("Timeout waiting for reg 0x%X\n", reg);
+	return false;
+}
+
+/**************************************************
+ * DMA
+ **************************************************/
+
+static void bgmac_dma_tx_reset(struct bgmac *bgmac, struct bgmac_dma_ring *ring)
+{
+	u32 val;
+	int i;
+
+	if (!ring->mmio_base)
+		return;
+
+	/* Suspend DMA TX ring first.
+	 * bgmac_wait_value doesn't support waiting for any of few values, so
+	 * implement whole loop here.
+	 */
+	bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_TX_CTL,
+		    BGMAC_DMA_TX_SUSPEND);
+	for (i = 0; i < 10000 / 10; i++) {
+		val = bgmac_read(bgmac, ring->mmio_base + BGMAC_DMA_TX_STATUS);
+		val &= BGMAC_DMA_TX_STAT;
+		if (val == BGMAC_DMA_TX_STAT_DISABLED ||
+		    val == BGMAC_DMA_TX_STAT_IDLEWAIT ||
+		    val == BGMAC_DMA_TX_STAT_STOPPED) {
+			i = 0;
+			break;
+		}
+		udelay(10);
+	}
+	if (i)
+		bgmac_err(bgmac, "Timeout suspending DMA TX ring 0x%X (BGMAC_DMA_TX_STAT: 0x%08X)\n",
+			  ring->mmio_base, val);
+
+	/* Remove SUSPEND bit */
+	bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_TX_CTL, 0);
+	if (!bgmac_wait_value(bgmac->core,
+			      ring->mmio_base + BGMAC_DMA_TX_STATUS,
+			      BGMAC_DMA_TX_STAT, BGMAC_DMA_TX_STAT_DISABLED,
+			      10000)) {
+		bgmac_warn(bgmac, "DMA TX ring 0x%X wasn't disabled on time, waiting additional 300us\n",
+			   ring->mmio_base);
+		udelay(300);
+		val = bgmac_read(bgmac, ring->mmio_base + BGMAC_DMA_TX_STATUS);
+		if ((val & BGMAC_DMA_TX_STAT) != BGMAC_DMA_TX_STAT_DISABLED)
+			bgmac_err(bgmac, "Reset of DMA TX ring 0x%X failed\n",
+				  ring->mmio_base);
+	}
+}
+
+static void bgmac_dma_tx_enable(struct bgmac *bgmac,
+				struct bgmac_dma_ring *ring)
+{
+	u32 ctl;
+
+	ctl = bgmac_read(bgmac, ring->mmio_base + BGMAC_DMA_TX_CTL);
+	ctl |= BGMAC_DMA_TX_ENABLE;
+	ctl |= BGMAC_DMA_TX_PARITY_DISABLE;
+	bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_TX_CTL, ctl);
+}
+
+static netdev_tx_t bgmac_dma_tx_add(struct bgmac *bgmac,
+				    struct bgmac_dma_ring *ring,
+				    struct sk_buff *skb)
+{
+	struct device *dma_dev = bgmac->core->dma_dev;
+	struct net_device *net_dev = bgmac->net_dev;
+	struct bgmac_dma_desc *dma_desc;
+	struct bgmac_slot_info *slot;
+	u32 ctl0, ctl1;
+	int free_slots;
+
+	if (skb->len > BGMAC_DESC_CTL1_LEN) {
+		bgmac_err(bgmac, "Too long skb (%d)\n", skb->len);
+		goto err_stop_drop;
+	}
+
+	if (ring->start <= ring->end)
+		free_slots = ring->start - ring->end + BGMAC_TX_RING_SLOTS;
+	else
+		free_slots = ring->start - ring->end;
+	if (free_slots == 1) {
+		bgmac_err(bgmac, "TX ring is full, queue should be stopped!\n");
+		netif_stop_queue(net_dev);
+		return NETDEV_TX_BUSY;
+	}
+
+	slot = &ring->slots[ring->end];
+	slot->skb = skb;
+	slot->dma_addr = dma_map_single(dma_dev, skb->data, skb->len,
+					DMA_TO_DEVICE);
+	if (dma_mapping_error(dma_dev, slot->dma_addr)) {
+		bgmac_err(bgmac, "Mapping error of skb on ring 0x%X\n",
+			  ring->mmio_base);
+		goto err_stop_drop;
+	}
+
+	ctl0 = BGMAC_DESC_CTL0_IOC | BGMAC_DESC_CTL0_SOF | BGMAC_DESC_CTL0_EOF;
+	if (ring->end == ring->num_slots - 1)
+		ctl0 |= BGMAC_DESC_CTL0_EOT;
+	ctl1 = skb->len & BGMAC_DESC_CTL1_LEN;
+
+	dma_desc = ring->cpu_base;
+	dma_desc += ring->end;
+	dma_desc->addr_low = cpu_to_le32(lower_32_bits(slot->dma_addr));
+	dma_desc->addr_high = cpu_to_le32(upper_32_bits(slot->dma_addr));
+	dma_desc->ctl0 = cpu_to_le32(ctl0);
+	dma_desc->ctl1 = cpu_to_le32(ctl1);
+
+	wmb();
+
+	/* Increase ring->end to point empty slot. We tell hardware the first
+	 * slot it should *not* read.
+	 */
+	if (++ring->end >= BGMAC_TX_RING_SLOTS)
+		ring->end = 0;
+	bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_TX_INDEX,
+		    ring->end * sizeof(struct bgmac_dma_desc));
+
+	/* Always keep one slot free to allow detecting bugged calls. */
+	if (--free_slots == 1)
+		netif_stop_queue(net_dev);
+
+	return NETDEV_TX_OK;
+
+err_stop_drop:
+	netif_stop_queue(net_dev);
+	dev_kfree_skb(skb);
+	return NETDEV_TX_OK;
+}
+
+/* Free transmitted packets */
+static void bgmac_dma_tx_free(struct bgmac *bgmac, struct bgmac_dma_ring *ring)
+{
+	struct device *dma_dev = bgmac->core->dma_dev;
+	int empty_slot;
+	bool freed = false;
+
+	/* The last slot that hardware didn't consume yet */
+	empty_slot = bgmac_read(bgmac, ring->mmio_base + BGMAC_DMA_TX_STATUS);
+	empty_slot &= BGMAC_DMA_TX_STATDPTR;
+	empty_slot /= sizeof(struct bgmac_dma_desc);
+
+	while (ring->start != empty_slot) {
+		struct bgmac_slot_info *slot = &ring->slots[ring->start];
+
+		if (slot->skb) {
+			/* Unmap no longer used buffer */
+			dma_unmap_single(dma_dev, slot->dma_addr,
+					 slot->skb->len, DMA_TO_DEVICE);
+			slot->dma_addr = 0;
+
+			/* Free memory! :) */
+			dev_kfree_skb(slot->skb);
+			slot->skb = NULL;
+		} else {
+			bgmac_err(bgmac, "Hardware reported transmission for empty TX ring slot %d! End of ring: %d\n",
+				  ring->start, ring->end);
+		}
+
+		if (++ring->start >= BGMAC_TX_RING_SLOTS)
+			ring->start = 0;
+		freed = true;
+	}
+
+	if (freed && netif_queue_stopped(bgmac->net_dev))
+		netif_wake_queue(bgmac->net_dev);
+}
+
+static void bgmac_dma_rx_reset(struct bgmac *bgmac, struct bgmac_dma_ring *ring)
+{
+	if (!ring->mmio_base)
+		return;
+
+	bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_RX_CTL, 0);
+	if (!bgmac_wait_value(bgmac->core,
+			      ring->mmio_base + BGMAC_DMA_RX_STATUS,
+			      BGMAC_DMA_RX_STAT, BGMAC_DMA_RX_STAT_DISABLED,
+			      10000))
+		bgmac_err(bgmac, "Reset of ring 0x%X RX failed\n",
+			  ring->mmio_base);
+}
+
+static void bgmac_dma_rx_enable(struct bgmac *bgmac,
+				struct bgmac_dma_ring *ring)
+{
+	u32 ctl;
+
+	ctl = bgmac_read(bgmac, ring->mmio_base + BGMAC_DMA_RX_CTL);
+	ctl &= BGMAC_DMA_RX_ADDREXT_MASK;
+	ctl |= BGMAC_DMA_RX_ENABLE;
+	ctl |= BGMAC_DMA_RX_PARITY_DISABLE;
+	ctl |= BGMAC_DMA_RX_OVERFLOW_CONT;
+	ctl |= BGMAC_RX_FRAME_OFFSET << BGMAC_DMA_RX_FRAME_OFFSET_SHIFT;
+	bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_RX_CTL, ctl);
+}
+
+static int bgmac_dma_rx_skb_for_slot(struct bgmac *bgmac,
+				     struct bgmac_slot_info *slot)
+{
+	struct device *dma_dev = bgmac->core->dma_dev;
+	struct bgmac_rx_header *rx;
+
+	/* Alloc skb */
+	slot->skb = netdev_alloc_skb(bgmac->net_dev, BGMAC_RX_BUF_SIZE);
+	if (!slot->skb) {
+		bgmac_err(bgmac, "Allocation of skb failed!\n");
+		return -ENOMEM;
+	}
+
+	/* Poison - if everything goes fine, hardware will overwrite it */
+	rx = (struct bgmac_rx_header *)slot->skb->data;
+	rx->len = cpu_to_le16(0xdead);
+	rx->flags = cpu_to_le16(0xbeef);
+
+	/* Map skb for the DMA */
+	slot->dma_addr = dma_map_single(dma_dev, slot->skb->data,
+					BGMAC_RX_BUF_SIZE, DMA_FROM_DEVICE);
+	if (dma_mapping_error(dma_dev, slot->dma_addr)) {
+		bgmac_err(bgmac, "DMA mapping error\n");
+		return -ENOMEM;
+	}
+	if (slot->dma_addr & 0xC0000000)
+		bgmac_warn(bgmac, "DMA address using 0xC0000000 bit(s), it may need translation trick\n");
+
+	return 0;
+}
+
+static int bgmac_dma_rx_read(struct bgmac *bgmac, struct bgmac_dma_ring *ring,
+			     int weight)
+{
+	u32 end_slot;
+	int handled = 0;
+
+	end_slot = bgmac_read(bgmac, ring->mmio_base + BGMAC_DMA_RX_STATUS);
+	end_slot &= BGMAC_DMA_RX_STATDPTR;
+	end_slot /= sizeof(struct bgmac_dma_desc);
+
+	ring->end = end_slot;
+
+	while (ring->start != ring->end) {
+		struct device *dma_dev = bgmac->core->dma_dev;
+		struct bgmac_slot_info *slot = &ring->slots[ring->start];
+		struct sk_buff *skb = slot->skb;
+		struct sk_buff *new_skb;
+		struct bgmac_rx_header *rx;
+		u16 len, flags;
+
+		/* Unmap buffer to make it accessible to the CPU */
+		dma_sync_single_for_cpu(dma_dev, slot->dma_addr,
+					BGMAC_RX_BUF_SIZE, DMA_FROM_DEVICE);
+
+		/* Get info from the header */
+		rx = (struct bgmac_rx_header *)skb->data;
+		len = le16_to_cpu(rx->len);
+		flags = le16_to_cpu(rx->flags);
+
+		/* Check for poison and drop or pass the packet */
+		if (len == 0xdead && flags == 0xbeef) {
+			bgmac_err(bgmac, "Found poisoned packet at slot %d, DMA issue!\n",
+				  ring->start);
+		} else {
+			new_skb = netdev_alloc_skb(bgmac->net_dev, len);
+			if (new_skb) {
+				skb_put(new_skb, len);
+				skb_copy_from_linear_data_offset(skb, BGMAC_RX_FRAME_OFFSET,
+								 new_skb->data,
+								 len);
+				new_skb->protocol =
+					eth_type_trans(new_skb, bgmac->net_dev);
+				netif_receive_skb(new_skb);
+				handled++;
+			} else {
+				bgmac->net_dev->stats.rx_dropped++;
+				bgmac_err(bgmac, "Allocation of skb for copying packet failed!\n");
+			}
+
+			/* Poison the old skb */
+			rx->len = cpu_to_le16(0xdead);
+			rx->flags = cpu_to_le16(0xbeef);
+		}
+
+		/* Make it back accessible to the hardware */
+		dma_sync_single_for_device(dma_dev, slot->dma_addr,
+					   BGMAC_RX_BUF_SIZE, DMA_FROM_DEVICE);
+
+		if (++ring->start >= BGMAC_RX_RING_SLOTS)
+			ring->start = 0;
+
+		if (handled >= weight) /* Should never be greater */
+			break;
+	}
+
+	return handled;
+}
+
+/* Does ring support unaligned addressing? */
+static bool bgmac_dma_unaligned(struct bgmac *bgmac,
+				struct bgmac_dma_ring *ring,
+				enum bgmac_dma_ring_type ring_type)
+{
+	switch (ring_type) {
+	case BGMAC_DMA_RING_TX:
+		bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_TX_RINGLO,
+			    0xff0);
+		if (bgmac_read(bgmac, ring->mmio_base + BGMAC_DMA_TX_RINGLO))
+			return true;
+		break;
+	case BGMAC_DMA_RING_RX:
+		bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_RX_RINGLO,
+			    0xff0);
+		if (bgmac_read(bgmac, ring->mmio_base + BGMAC_DMA_RX_RINGLO))
+			return true;
+		break;
+	}
+	return false;
+}
+
+static void bgmac_dma_ring_free(struct bgmac *bgmac,
+				struct bgmac_dma_ring *ring)
+{
+	struct device *dma_dev = bgmac->core->dma_dev;
+	struct bgmac_slot_info *slot;
+	int size;
+	int i;
+
+	for (i = 0; i < ring->num_slots; i++) {
+		slot = &ring->slots[i];
+		if (slot->skb) {
+			if (slot->dma_addr)
+				dma_unmap_single(dma_dev, slot->dma_addr,
+						 slot->skb->len, DMA_TO_DEVICE);
+			dev_kfree_skb(slot->skb);
+		}
+	}
+
+	if (ring->cpu_base) {
+		/* Free ring of descriptors */
+		size = ring->num_slots * sizeof(struct bgmac_dma_desc);
+		dma_free_coherent(dma_dev, size, ring->cpu_base,
+				  ring->dma_base);
+	}
+}
+
+static void bgmac_dma_free(struct bgmac *bgmac)
+{
+	int i;
+
+	for (i = 0; i < BGMAC_MAX_TX_RINGS; i++)
+		bgmac_dma_ring_free(bgmac, &bgmac->tx_ring[i]);
+	for (i = 0; i < BGMAC_MAX_RX_RINGS; i++)
+		bgmac_dma_ring_free(bgmac, &bgmac->rx_ring[i]);
+}
+
+static int bgmac_dma_alloc(struct bgmac *bgmac)
+{
+	struct device *dma_dev = bgmac->core->dma_dev;
+	struct bgmac_dma_ring *ring;
+	static const u16 ring_base[] = { BGMAC_DMA_BASE0, BGMAC_DMA_BASE1,
+					 BGMAC_DMA_BASE2, BGMAC_DMA_BASE3, };
+	int size; /* ring size: different for Tx and Rx */
+	int err;
+	int i;
+
+	BUILD_BUG_ON(BGMAC_MAX_TX_RINGS > ARRAY_SIZE(ring_base));
+	BUILD_BUG_ON(BGMAC_MAX_RX_RINGS > ARRAY_SIZE(ring_base));
+
+	if (!(bcma_aread32(bgmac->core, BCMA_IOST) & BCMA_IOST_DMA64)) {
+		bgmac_err(bgmac, "Core does not report 64-bit DMA\n");
+		return -ENOTSUPP;
+	}
+
+	for (i = 0; i < BGMAC_MAX_TX_RINGS; i++) {
+		ring = &bgmac->tx_ring[i];
+		ring->num_slots = BGMAC_TX_RING_SLOTS;
+		ring->mmio_base = ring_base[i];
+		if (bgmac_dma_unaligned(bgmac, ring, BGMAC_DMA_RING_TX))
+			bgmac_warn(bgmac, "TX on ring 0x%X supports unaligned addressing but this feature is not implemented\n",
+				   ring->mmio_base);
+
+		/* Alloc ring of descriptors */
+		size = ring->num_slots * sizeof(struct bgmac_dma_desc);
+		ring->cpu_base = dma_zalloc_coherent(dma_dev, size,
+						     &ring->dma_base,
+						     GFP_KERNEL);
+		if (!ring->cpu_base) {
+			bgmac_err(bgmac, "Allocation of TX ring 0x%X failed\n",
+				  ring->mmio_base);
+			goto err_dma_free;
+		}
+		if (ring->dma_base & 0xC0000000)
+			bgmac_warn(bgmac, "DMA address using 0xC0000000 bit(s), it may need translation trick\n");
+
+		/* No need to alloc TX slots yet */
+	}
+
+	for (i = 0; i < BGMAC_MAX_RX_RINGS; i++) {
+		ring = &bgmac->rx_ring[i];
+		ring->num_slots = BGMAC_RX_RING_SLOTS;
+		ring->mmio_base = ring_base[i];
+		if (bgmac_dma_unaligned(bgmac, ring, BGMAC_DMA_RING_RX))
+			bgmac_warn(bgmac, "RX on ring 0x%X supports unaligned addressing but this feature is not implemented\n",
+				   ring->mmio_base);
+
+		/* Alloc ring of descriptors */
+		size = ring->num_slots * sizeof(struct bgmac_dma_desc);
+		ring->cpu_base = dma_zalloc_coherent(dma_dev, size,
+						     &ring->dma_base,
+						     GFP_KERNEL);
+		if (!ring->cpu_base) {
+			bgmac_err(bgmac, "Allocation of RX ring 0x%X failed\n",
+				  ring->mmio_base);
+			err = -ENOMEM;
+			goto err_dma_free;
+		}
+		if (ring->dma_base & 0xC0000000)
+			bgmac_warn(bgmac, "DMA address using 0xC0000000 bit(s), it may need translation trick\n");
+
+		/* Alloc RX slots */
+		for (i = 0; i < ring->num_slots; i++) {
+			err = bgmac_dma_rx_skb_for_slot(bgmac, &ring->slots[i]);
+			if (err) {
+				bgmac_err(bgmac, "Can't allocate skb for slot in RX ring\n");
+				goto err_dma_free;
+			}
+		}
+	}
+
+	return 0;
+
+err_dma_free:
+	bgmac_dma_free(bgmac);
+	return -ENOMEM;
+}
+
+static void bgmac_dma_init(struct bgmac *bgmac)
+{
+	struct bgmac_dma_ring *ring;
+	struct bgmac_dma_desc *dma_desc;
+	u32 ctl0, ctl1;
+	int i;
+
+	for (i = 0; i < BGMAC_MAX_TX_RINGS; i++) {
+		ring = &bgmac->tx_ring[i];
+
+		/* We don't implement unaligned addressing, so enable first */
+		bgmac_dma_tx_enable(bgmac, ring);
+		bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_TX_RINGLO,
+			    lower_32_bits(ring->dma_base));
+		bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_TX_RINGHI,
+			    upper_32_bits(ring->dma_base));
+
+		ring->start = 0;
+		ring->end = 0;	/* Points the slot that should *not* be read */
+	}
+
+	for (i = 0; i < BGMAC_MAX_RX_RINGS; i++) {
+		ring = &bgmac->rx_ring[i];
+
+		/* We don't implement unaligned addressing, so enable first */
+		bgmac_dma_rx_enable(bgmac, ring);
+		bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_RX_RINGLO,
+			    lower_32_bits(ring->dma_base));
+		bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_RX_RINGHI,
+			    upper_32_bits(ring->dma_base));
+
+		for (i = 0, dma_desc = ring->cpu_base; i < ring->num_slots;
+		     i++, dma_desc++) {
+			ctl0 = ctl1 = 0;
+
+			if (i == ring->num_slots - 1)
+				ctl0 |= BGMAC_DESC_CTL0_EOT;
+			ctl1 |= BGMAC_RX_BUF_SIZE & BGMAC_DESC_CTL1_LEN;
+			/* Is there any BGMAC device that requires extension? */
+			/* ctl1 |= (addrext << B43_DMA64_DCTL1_ADDREXT_SHIFT) &
+			 * B43_DMA64_DCTL1_ADDREXT_MASK;
+			 */
+
+			dma_desc->addr_low = cpu_to_le32(lower_32_bits(ring->slots[i].dma_addr));
+			dma_desc->addr_high = cpu_to_le32(upper_32_bits(ring->slots[i].dma_addr));
+			dma_desc->ctl0 = cpu_to_le32(ctl0);
+			dma_desc->ctl1 = cpu_to_le32(ctl1);
+		}
+
+		bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_RX_INDEX,
+			    ring->num_slots * sizeof(struct bgmac_dma_desc));
+
+		ring->start = 0;
+		ring->end = 0;
+	}
+}
+
+/**************************************************
+ * PHY ops
+ **************************************************/
+
+u16 bgmac_phy_read(struct bgmac *bgmac, u8 phyaddr, u8 reg)
+{
+	struct bcma_device *core;
+	u16 phy_access_addr;
+	u16 phy_ctl_addr;
+	u32 tmp;
+
+	BUILD_BUG_ON(BGMAC_PA_DATA_MASK != BCMA_GMAC_CMN_PA_DATA_MASK);
+	BUILD_BUG_ON(BGMAC_PA_ADDR_MASK != BCMA_GMAC_CMN_PA_ADDR_MASK);
+	BUILD_BUG_ON(BGMAC_PA_ADDR_SHIFT != BCMA_GMAC_CMN_PA_ADDR_SHIFT);
+	BUILD_BUG_ON(BGMAC_PA_REG_MASK != BCMA_GMAC_CMN_PA_REG_MASK);
+	BUILD_BUG_ON(BGMAC_PA_REG_SHIFT != BCMA_GMAC_CMN_PA_REG_SHIFT);
+	BUILD_BUG_ON(BGMAC_PA_WRITE != BCMA_GMAC_CMN_PA_WRITE);
+	BUILD_BUG_ON(BGMAC_PA_START != BCMA_GMAC_CMN_PA_START);
+	BUILD_BUG_ON(BGMAC_PC_EPA_MASK != BCMA_GMAC_CMN_PC_EPA_MASK);
+	BUILD_BUG_ON(BGMAC_PC_MCT_MASK != BCMA_GMAC_CMN_PC_MCT_MASK);
+	BUILD_BUG_ON(BGMAC_PC_MCT_SHIFT != BCMA_GMAC_CMN_PC_MCT_SHIFT);
+	BUILD_BUG_ON(BGMAC_PC_MTE != BCMA_GMAC_CMN_PC_MTE);
+
+	if (bgmac->core->id.id == BCMA_CORE_4706_MAC_GBIT) {
+		core = bgmac->core->bus->drv_gmac_cmn.core;
+		phy_access_addr = BCMA_GMAC_CMN_PHY_ACCESS;
+		phy_ctl_addr = BCMA_GMAC_CMN_PHY_CTL;
+	} else {
+		core = bgmac->core;
+		phy_access_addr = BGMAC_PHY_ACCESS;
+		phy_ctl_addr = BGMAC_PHY_CNTL;
+	}
+
+	tmp = bcma_read32(core, phy_ctl_addr);
+	tmp &= ~BGMAC_PC_EPA_MASK;
+	tmp |= phyaddr;
+	bcma_write32(core, phy_ctl_addr, tmp);
+
+	tmp = BGMAC_PA_START;
+	tmp |= phyaddr << BGMAC_PA_ADDR_SHIFT;
+	tmp |= reg << BGMAC_PA_REG_SHIFT;
+	bcma_write32(core, phy_access_addr, tmp);
+
+	if (!bgmac_wait_value(core, phy_access_addr, BGMAC_PA_START, 0, 1000)) {
+		bgmac_err(bgmac, "Reading PHY %d register 0x%X failed\n",
+			  phyaddr, reg);
+		return 0xffff;
+	}
+
+	return bcma_read32(core, phy_access_addr) & BGMAC_PA_DATA_MASK;
+}
+
+/* http://bcm-v4.sipsolutions.net/mac-gbit/gmac/chipphywr */
+void bgmac_phy_write(struct bgmac *bgmac, u8 phyaddr, u8 reg, u16 value)
+{
+	struct bcma_device *core;
+	u16 phy_access_addr;
+	u16 phy_ctl_addr;
+	u32 tmp;
+
+	if (bgmac->core->id.id == BCMA_CORE_4706_MAC_GBIT) {
+		core = bgmac->core->bus->drv_gmac_cmn.core;
+		phy_access_addr = BCMA_GMAC_CMN_PHY_ACCESS;
+		phy_ctl_addr = BCMA_GMAC_CMN_PHY_CTL;
+	} else {
+		core = bgmac->core;
+		phy_access_addr = BGMAC_PHY_ACCESS;
+		phy_ctl_addr = BGMAC_PHY_CNTL;
+	}
+
+	tmp = bcma_read32(core, phy_ctl_addr);
+	tmp &= ~BGMAC_PC_EPA_MASK;
+	tmp |= phyaddr;
+	bcma_write32(core, phy_ctl_addr, tmp);
+
+	bgmac_write(bgmac, BGMAC_INT_STATUS, BGMAC_IS_MDIO);
+	if (bgmac_read(bgmac, BGMAC_INT_STATUS) & BGMAC_IS_MDIO)
+		bgmac_warn(bgmac, "Error setting MDIO int\n");
+
+	tmp = BGMAC_PA_START;
+	tmp |= BGMAC_PA_WRITE;
+	tmp |= phyaddr << BGMAC_PA_ADDR_SHIFT;
+	tmp |= reg << BGMAC_PA_REG_SHIFT;
+	tmp |= value;
+	bcma_write32(core, phy_access_addr, tmp);
+
+	if (!bgmac_wait_value(core, phy_access_addr, BGMAC_PA_START, 0, 1000))
+		bgmac_err(bgmac, "Writing to PHY %d register 0x%X failed\n",
+			  phyaddr, reg);
+}
+
+/* http://bcm-v4.sipsolutions.net/mac-gbit/gmac/chipphyforce */
+static void bgmac_phy_force(struct bgmac *bgmac)
+{
+	u16 ctl;
+	u16 mask = ~(BGMAC_PHY_CTL_SPEED | BGMAC_PHY_CTL_SPEED_MSB |
+		     BGMAC_PHY_CTL_ANENAB | BGMAC_PHY_CTL_DUPLEX);
+
+	if (bgmac->phyaddr == BGMAC_PHY_NOREGS)
+		return;
+
+	if (bgmac->autoneg)
+		return;
+
+	ctl = bgmac_phy_read(bgmac, bgmac->phyaddr, BGMAC_PHY_CTL);
+	ctl &= mask;
+	if (bgmac->full_duplex)
+		ctl |= BGMAC_PHY_CTL_DUPLEX;
+	if (bgmac->speed == BGMAC_SPEED_100)
+		ctl |= BGMAC_PHY_CTL_SPEED_100;
+	else if (bgmac->speed == BGMAC_SPEED_1000)
+		ctl |= BGMAC_PHY_CTL_SPEED_1000;
+	bgmac_phy_write(bgmac, bgmac->phyaddr, BGMAC_PHY_CTL, ctl);
+}
+
+/* http://bcm-v4.sipsolutions.net/mac-gbit/gmac/chipphyadvertise */
+static void bgmac_phy_advertise(struct bgmac *bgmac)
+{
+	u16 adv;
+
+	if (bgmac->phyaddr == BGMAC_PHY_NOREGS)
+		return;
+
+	if (!bgmac->autoneg)
+		return;
+
+	/* Adv selected 10/100 speeds */
+	adv = bgmac_phy_read(bgmac, bgmac->phyaddr, BGMAC_PHY_ADV);
+	adv &= ~(BGMAC_PHY_ADV_10HALF | BGMAC_PHY_ADV_10FULL |
+		 BGMAC_PHY_ADV_100HALF | BGMAC_PHY_ADV_100FULL);
+	if (!bgmac->full_duplex && bgmac->speed & BGMAC_SPEED_10)
+		adv |= BGMAC_PHY_ADV_10HALF;
+	if (!bgmac->full_duplex && bgmac->speed & BGMAC_SPEED_100)
+		adv |= BGMAC_PHY_ADV_100HALF;
+	if (bgmac->full_duplex && bgmac->speed & BGMAC_SPEED_10)
+		adv |= BGMAC_PHY_ADV_10FULL;
+	if (bgmac->full_duplex && bgmac->speed & BGMAC_SPEED_100)
+		adv |= BGMAC_PHY_ADV_100FULL;
+	bgmac_phy_write(bgmac, bgmac->phyaddr, BGMAC_PHY_ADV, adv);
+
+	/* Adv selected 1000 speeds */
+	adv = bgmac_phy_read(bgmac, bgmac->phyaddr, BGMAC_PHY_ADV2);
+	adv &= ~(BGMAC_PHY_ADV2_1000HALF | BGMAC_PHY_ADV2_1000FULL);
+	if (!bgmac->full_duplex && bgmac->speed & BGMAC_SPEED_1000)
+		adv |= BGMAC_PHY_ADV2_1000HALF;
+	if (bgmac->full_duplex && bgmac->speed & BGMAC_SPEED_1000)
+		adv |= BGMAC_PHY_ADV2_1000FULL;
+	bgmac_phy_write(bgmac, bgmac->phyaddr, BGMAC_PHY_ADV2, adv);
+
+	/* Restart */
+	bgmac_phy_write(bgmac, bgmac->phyaddr, BGMAC_PHY_CTL,
+			bgmac_phy_read(bgmac, bgmac->phyaddr, BGMAC_PHY_CTL) |
+			BGMAC_PHY_CTL_RESTART);
+}
+
+/* http://bcm-v4.sipsolutions.net/mac-gbit/gmac/chipphyinit */
+static void bgmac_phy_init(struct bgmac *bgmac)
+{
+	struct bcma_chipinfo *ci = &bgmac->core->bus->chipinfo;
+	struct bcma_drv_cc *cc = &bgmac->core->bus->drv_cc;
+	u8 i;
+
+	if (ci->id == BCMA_CHIP_ID_BCM5356) {
+		for (i = 0; i < 5; i++) {
+			bgmac_phy_write(bgmac, i, 0x1f, 0x008b);
+			bgmac_phy_write(bgmac, i, 0x15, 0x0100);
+			bgmac_phy_write(bgmac, i, 0x1f, 0x000f);
+			bgmac_phy_write(bgmac, i, 0x12, 0x2aaa);
+			bgmac_phy_write(bgmac, i, 0x1f, 0x000b);
+		}
+	}
+	if ((ci->id == BCMA_CHIP_ID_BCM5357 && ci->pkg != 10) ||
+	    (ci->id == BCMA_CHIP_ID_BCM4749 && ci->pkg != 10) ||
+	    (ci->id == BCMA_CHIP_ID_BCM53572 && ci->pkg != 9)) {
+		bcma_chipco_chipctl_maskset(cc, 2, ~0xc0000000, 0);
+		bcma_chipco_chipctl_maskset(cc, 4, ~0x80000000, 0);
+		for (i = 0; i < 5; i++) {
+			bgmac_phy_write(bgmac, i, 0x1f, 0x000f);
+			bgmac_phy_write(bgmac, i, 0x16, 0x5284);
+			bgmac_phy_write(bgmac, i, 0x1f, 0x000b);
+			bgmac_phy_write(bgmac, i, 0x17, 0x0010);
+			bgmac_phy_write(bgmac, i, 0x1f, 0x000f);
+			bgmac_phy_write(bgmac, i, 0x16, 0x5296);
+			bgmac_phy_write(bgmac, i, 0x17, 0x1073);
+			bgmac_phy_write(bgmac, i, 0x17, 0x9073);
+			bgmac_phy_write(bgmac, i, 0x16, 0x52b6);
+			bgmac_phy_write(bgmac, i, 0x17, 0x9273);
+			bgmac_phy_write(bgmac, i, 0x1f, 0x000b);
+		}
+	}
+}
+
+/* http://bcm-v4.sipsolutions.net/mac-gbit/gmac/chipphyreset */
+static void bgmac_phy_reset(struct bgmac *bgmac)
+{
+	if (bgmac->phyaddr == BGMAC_PHY_NOREGS)
+		return;
+
+	bgmac_phy_write(bgmac, bgmac->phyaddr, BGMAC_PHY_CTL,
+			BGMAC_PHY_CTL_RESET);
+	udelay(100);
+	if (bgmac_phy_read(bgmac, bgmac->phyaddr, BGMAC_PHY_CTL) &
+	    BGMAC_PHY_CTL_RESET)
+		bgmac_err(bgmac, "PHY reset failed\n");
+	bgmac_phy_init(bgmac);
+}
+
+/**************************************************
+ * Chip ops
+ **************************************************/
+
+/* TODO: can we just drop @force? Can we don't reset MAC at all if there is
+ * nothing to change? Try if after stabilizng driver.
+ */
+static void bgmac_cmdcfg_maskset(struct bgmac *bgmac, u32 mask, u32 set,
+				 bool force)
+{
+	u32 cmdcfg = bgmac_read(bgmac, BGMAC_CMDCFG);
+	u32 new_val = (cmdcfg & mask) | set;
+
+	bgmac_set(bgmac, BGMAC_CMDCFG, BGMAC_CMDCFG_SR);
+	udelay(2);
+
+	if (new_val != cmdcfg || force)
+		bgmac_write(bgmac, BGMAC_CMDCFG, new_val);
+
+	bgmac_mask(bgmac, BGMAC_CMDCFG, ~BGMAC_CMDCFG_SR);
+	udelay(2);
+}
+
+#if 0 /* We don't use that regs yet */
+static void bgmac_chip_stats_update(struct bgmac *bgmac)
+{
+	int i;
+
+	if (bgmac->core->id.id != BCMA_CORE_4706_MAC_GBIT) {
+		for (i = 0; i < BGMAC_NUM_MIB_TX_REGS; i++)
+			bgmac->mib_tx_regs[i] =
+				bgmac_read(bgmac,
+					   BGMAC_TX_GOOD_OCTETS + (i * 4));
+		for (i = 0; i < BGMAC_NUM_MIB_RX_REGS; i++)
+			bgmac->mib_rx_regs[i] =
+				bgmac_read(bgmac,
+					   BGMAC_RX_GOOD_OCTETS + (i * 4));
+	}
+
+	/* TODO: what else? how to handle BCM4706? Specs are needed */
+}
+#endif
+
+static void bgmac_clear_mib(struct bgmac *bgmac)
+{
+	int i;
+
+	if (bgmac->core->id.id == BCMA_CORE_4706_MAC_GBIT)
+		return;
+
+	bgmac_set(bgmac, BGMAC_DEV_CTL, BGMAC_DC_MROR);
+	for (i = 0; i < BGMAC_NUM_MIB_TX_REGS; i++)
+		bgmac_read(bgmac, BGMAC_TX_GOOD_OCTETS + (i * 4));
+	for (i = 0; i < BGMAC_NUM_MIB_RX_REGS; i++)
+		bgmac_read(bgmac, BGMAC_RX_GOOD_OCTETS + (i * 4));
+}
+
+/* http://bcm-v4.sipsolutions.net/mac-gbit/gmac/gmac_speed */
+static void bgmac_speed(struct bgmac *bgmac, int speed)
+{
+	u32 mask = ~(BGMAC_CMDCFG_ES_MASK | BGMAC_CMDCFG_HD);
+	u32 set = 0;
+
+	if (speed & BGMAC_SPEED_10)
+		set |= BGMAC_CMDCFG_ES_10;
+	if (speed & BGMAC_SPEED_100)
+		set |= BGMAC_CMDCFG_ES_100;
+	if (speed & BGMAC_SPEED_1000)
+		set |= BGMAC_CMDCFG_ES_1000;
+	if (!bgmac->full_duplex)
+		set |= BGMAC_CMDCFG_HD;
+	bgmac_cmdcfg_maskset(bgmac, mask, set, true);
+}
+
+static void bgmac_miiconfig(struct bgmac *bgmac)
+{
+	u8 imode = (bgmac_read(bgmac, BGMAC_DEV_STATUS) & BGMAC_DS_MM_MASK) >>
+			BGMAC_DS_MM_SHIFT;
+	if (imode == 0 || imode == 1) {
+		if (bgmac->autoneg)
+			bgmac_speed(bgmac, BGMAC_SPEED_100);
+		else
+			bgmac_speed(bgmac, bgmac->speed);
+	}
+}
+
+/* http://bcm-v4.sipsolutions.net/mac-gbit/gmac/chipreset */
+static void bgmac_chip_reset(struct bgmac *bgmac)
+{
+	struct bcma_device *core = bgmac->core;
+	struct bcma_bus *bus = core->bus;
+	struct bcma_chipinfo *ci = &bus->chipinfo;
+	u32 flags = 0;
+	u32 iost;
+	int i;
+
+	if (bcma_core_is_enabled(core)) {
+		if (!bgmac->stats_grabbed) {
+			/* bgmac_chip_stats_update(bgmac); */
+			bgmac->stats_grabbed = true;
+		}
+
+		for (i = 0; i < BGMAC_MAX_TX_RINGS; i++)
+			bgmac_dma_tx_reset(bgmac, &bgmac->tx_ring[i]);
+
+		bgmac_cmdcfg_maskset(bgmac, ~0, BGMAC_CMDCFG_ML, false);
+		udelay(1);
+
+		for (i = 0; i < BGMAC_MAX_RX_RINGS; i++)
+			bgmac_dma_rx_reset(bgmac, &bgmac->rx_ring[i]);
+
+		/* TODO: Clear software multicast filter list */
+	}
+
+	iost = bcma_aread32(core, BCMA_IOST);
+	if ((ci->id == BCMA_CHIP_ID_BCM5357 && ci->pkg == 10) ||
+	    (ci->id == BCMA_CHIP_ID_BCM4749 && ci->pkg == 10) ||
+	    (ci->id == BCMA_CHIP_ID_BCM53572 && ci->pkg == 9))
+		iost &= ~BGMAC_BCMA_IOST_ATTACHED;
+
+	if (iost & BGMAC_BCMA_IOST_ATTACHED) {
+		flags = BGMAC_BCMA_IOCTL_SW_CLKEN;
+		if (!bgmac->has_robosw)
+			flags |= BGMAC_BCMA_IOCTL_SW_RESET;
+	}
+
+	bcma_core_enable(core, flags);
+
+	if (core->id.rev > 2) {
+		bgmac_set(bgmac, BCMA_CLKCTLST, 1 << 8);
+		bgmac_wait_value(bgmac->core, BCMA_CLKCTLST, 1 << 24, 1 << 24,
+				 1000);
+	}
+
+	if (ci->id == BCMA_CHIP_ID_BCM5357 || ci->id == BCMA_CHIP_ID_BCM4749 ||
+	    ci->id == BCMA_CHIP_ID_BCM53572) {
+		struct bcma_drv_cc *cc = &bgmac->core->bus->drv_cc;
+		u8 et_swtype = 0;
+		u8 sw_type = BGMAC_CHIPCTL_1_SW_TYPE_EPHY |
+			     BGMAC_CHIPCTL_1_IF_TYPE_RMII;
+		char buf[2];
+
+		if (nvram_getenv("et_swtype", buf, 1) > 0) {
+			if (kstrtou8(buf, 0, &et_swtype))
+				bgmac_err(bgmac, "Failed to parse et_swtype (%s)\n",
+					  buf);
+			et_swtype &= 0x0f;
+			et_swtype <<= 4;
+			sw_type = et_swtype;
+		} else if (ci->id == BCMA_CHIP_ID_BCM5357 && ci->pkg == 9) {
+			sw_type = BGMAC_CHIPCTL_1_SW_TYPE_EPHYRMII;
+		} else if (0) {
+			/* TODO */
+		}
+		bcma_chipco_chipctl_maskset(cc, 1,
+					    ~(BGMAC_CHIPCTL_1_IF_TYPE_MASK |
+					      BGMAC_CHIPCTL_1_SW_TYPE_MASK),
+					    sw_type);
+	}
+
+	if (iost & BGMAC_BCMA_IOST_ATTACHED && !bgmac->has_robosw)
+		bcma_awrite32(core, BCMA_IOCTL,
+			      bcma_aread32(core, BCMA_IOCTL) &
+			      ~BGMAC_BCMA_IOCTL_SW_RESET);
+
+	/* http://bcm-v4.sipsolutions.net/mac-gbit/gmac/gmac_reset
+	 * Specs don't say about using BGMAC_CMDCFG_SR, but in this routine
+	 * BGMAC_CMDCFG is read _after_ putting chip in a reset. So it has to
+	 * be keps until taking MAC out of the reset.
+	 */
+	bgmac_cmdcfg_maskset(bgmac,
+			     ~(BGMAC_CMDCFG_TE |
+			       BGMAC_CMDCFG_RE |
+			       BGMAC_CMDCFG_RPI |
+			       BGMAC_CMDCFG_TAI |
+			       BGMAC_CMDCFG_HD |
+			       BGMAC_CMDCFG_ML |
+			       BGMAC_CMDCFG_CFE |
+			       BGMAC_CMDCFG_RL |
+			       BGMAC_CMDCFG_RED |
+			       BGMAC_CMDCFG_PE |
+			       BGMAC_CMDCFG_TPI |
+			       BGMAC_CMDCFG_PAD_EN |
+			       BGMAC_CMDCFG_PF),
+			     BGMAC_CMDCFG_PROM |
+			     BGMAC_CMDCFG_NLC |
+			     BGMAC_CMDCFG_CFE |
+			     BGMAC_CMDCFG_SR,
+			     false);
+
+	bgmac_clear_mib(bgmac);
+	if (core->id.id == BCMA_CORE_4706_MAC_GBIT)
+		bcma_maskset32(bgmac->cmn, BCMA_GMAC_CMN_PHY_CTL, ~0,
+			       BCMA_GMAC_CMN_PC_MTE);
+	else
+		bgmac_set(bgmac, BGMAC_PHY_CNTL, BGMAC_PC_MTE);
+	bgmac_miiconfig(bgmac);
+	bgmac_phy_init(bgmac);
+
+	bgmac->int_status = 0;
+}
+
+static void bgmac_chip_intrs_on(struct bgmac *bgmac)
+{
+	bgmac_write(bgmac, BGMAC_INT_MASK, bgmac->int_mask);
+}
+
+static void bgmac_chip_intrs_off(struct bgmac *bgmac)
+{
+	bgmac_write(bgmac, BGMAC_INT_MASK, 0);
+}
+
+/* http://bcm-v4.sipsolutions.net/mac-gbit/gmac/gmac_enable */
+static void bgmac_enable(struct bgmac *bgmac)
+{
+	struct bcma_chipinfo *ci = &bgmac->core->bus->chipinfo;
+	u32 cmdcfg;
+	u32 mode;
+	u32 rxq_ctl;
+	u32 fl_ctl;
+	u16 bp_clk;
+	u8 mdp;
+
+	cmdcfg = bgmac_read(bgmac, BGMAC_CMDCFG);
+	bgmac_cmdcfg_maskset(bgmac, ~(BGMAC_CMDCFG_TE | BGMAC_CMDCFG_RE),
+			     BGMAC_CMDCFG_SR, true);
+	udelay(2);
+	cmdcfg |= BGMAC_CMDCFG_TE | BGMAC_CMDCFG_RE;
+	bgmac_write(bgmac, BGMAC_CMDCFG, cmdcfg);
+
+	mode = (bgmac_read(bgmac, BGMAC_DEV_STATUS) & BGMAC_DS_MM_MASK) >>
+		BGMAC_DS_MM_SHIFT;
+	if (ci->id != BCMA_CHIP_ID_BCM47162 || mode != 0)
+		bgmac_set(bgmac, BCMA_CLKCTLST, BCMA_CLKCTLST_FORCEHT);
+	if (ci->id == BCMA_CHIP_ID_BCM47162 && mode == 2)
+		bcma_chipco_chipctl_maskset(&bgmac->core->bus->drv_cc, 1, ~0,
+					    BGMAC_CHIPCTL_1_RXC_DLL_BYPASS);
+
+	switch (ci->id) {
+	case BCMA_CHIP_ID_BCM5357:
+	case BCMA_CHIP_ID_BCM4749:
+	case BCMA_CHIP_ID_BCM53572:
+	case BCMA_CHIP_ID_BCM4716:
+	case BCMA_CHIP_ID_BCM47162:
+		fl_ctl = 0x03cb04cb;
+		if (ci->id == BCMA_CHIP_ID_BCM5357 ||
+		    ci->id == BCMA_CHIP_ID_BCM4749 ||
+		    ci->id == BCMA_CHIP_ID_BCM53572)
+			fl_ctl = 0x2300e1;
+		bgmac_write(bgmac, BGMAC_FLOW_CTL_THRESH, fl_ctl);
+		bgmac_write(bgmac, BGMAC_PAUSE_CTL, 0x27fff);
+		break;
+	}
+
+	rxq_ctl = bgmac_read(bgmac, BGMAC_RXQ_CTL);
+	rxq_ctl &= ~BGMAC_RXQ_CTL_MDP_MASK;
+	bp_clk = bcma_pmu_get_bus_clock(&bgmac->core->bus->drv_cc) / 1000000;
+	mdp = (bp_clk * 128 / 1000) - 3;
+	rxq_ctl |= (mdp << BGMAC_RXQ_CTL_MDP_SHIFT);
+	bgmac_write(bgmac, BGMAC_RXQ_CTL, rxq_ctl);
+}
+
+/* http://bcm-v4.sipsolutions.net/mac-gbit/gmac/chipinit */
+static void bgmac_chip_init(struct bgmac *bgmac, bool full_init)
+{
+	struct bgmac_dma_ring *ring;
+	u8 *mac = bgmac->net_dev->dev_addr;
+	u32 tmp;
+	int i;
+
+	/* 1 interrupt per received frame */
+	bgmac_write(bgmac, BGMAC_INT_RECV_LAZY, 1 << BGMAC_IRL_FC_SHIFT);
+
+	/* Enable 802.3x tx flow control (honor received PAUSE frames) */
+	bgmac_cmdcfg_maskset(bgmac, ~BGMAC_CMDCFG_RPI, 0, true);
+
+	if (bgmac->net_dev->flags & IFF_PROMISC)
+		bgmac_cmdcfg_maskset(bgmac, ~0, BGMAC_CMDCFG_PROM, false);
+	else
+		bgmac_cmdcfg_maskset(bgmac, ~BGMAC_CMDCFG_PROM, 0, false);
+
+	/* Set MAC addr */
+	tmp = (mac[0] << 24) | (mac[1] << 16) | (mac[2] << 8) | mac[3];
+	bgmac_write(bgmac, BGMAC_MACADDR_HIGH, tmp);
+	tmp = (mac[4] << 8) | mac[5];
+	bgmac_write(bgmac, BGMAC_MACADDR_LOW, tmp);
+
+	if (bgmac->loopback)
+		bgmac_cmdcfg_maskset(bgmac, ~0, BGMAC_CMDCFG_ML, true);
+	else
+		bgmac_cmdcfg_maskset(bgmac, ~BGMAC_CMDCFG_ML, 0, true);
+
+	bgmac_write(bgmac, BGMAC_RXMAX_LENGTH, 32 + ETHER_MAX_LEN);
+
+	if (!bgmac->autoneg) {
+		bgmac_speed(bgmac, bgmac->speed);
+		bgmac_phy_force(bgmac);
+	} else if (bgmac->speed) { /* if there is anything to adv */
+		bgmac_phy_advertise(bgmac);
+	}
+
+	if (full_init) {
+		bgmac_dma_init(bgmac);
+		if (1) /* FIXME: is there any case we don't want IRQs? */
+			bgmac_chip_intrs_on(bgmac);
+	} else {
+		for (i = 0; i < BGMAC_MAX_RX_RINGS; i++) {
+			ring = &bgmac->rx_ring[i];
+			bgmac_dma_rx_enable(bgmac, ring);
+		}
+	}
+
+	bgmac_enable(bgmac);
+}
+
+static irqreturn_t bgmac_interrupt(int irq, void *dev_id)
+{
+	struct bgmac *bgmac = netdev_priv(dev_id);
+
+	u32 int_status = bgmac_read(bgmac, BGMAC_INT_STATUS);
+	int_status &= bgmac->int_mask;
+
+	if (!int_status)
+		return IRQ_NONE;
+
+	/* Ack */
+	bgmac_write(bgmac, BGMAC_INT_STATUS, int_status);
+
+	/* Disable new interrupts until handling existing ones */
+	bgmac_chip_intrs_off(bgmac);
+
+	bgmac->int_status = int_status;
+
+	napi_schedule(&bgmac->napi);
+
+	return IRQ_HANDLED;
+}
+
+static int bgmac_poll(struct napi_struct *napi, int weight)
+{
+	struct bgmac *bgmac = container_of(napi, struct bgmac, napi);
+	struct bgmac_dma_ring *ring;
+	int handled = 0;
+
+	if (bgmac->int_status & BGMAC_IS_TX0) {
+		ring = &bgmac->tx_ring[0];
+		bgmac_dma_tx_free(bgmac, ring);
+		bgmac->int_status &= ~BGMAC_IS_TX0;
+	}
+
+	if (bgmac->int_status & BGMAC_IS_RX) {
+		ring = &bgmac->rx_ring[0];
+		handled += bgmac_dma_rx_read(bgmac, ring, weight);
+		bgmac->int_status &= ~BGMAC_IS_RX;
+	}
+
+	if (bgmac->int_status) {
+		bgmac_err(bgmac, "Unknown IRQs: 0x%08X\n", bgmac->int_status);
+		bgmac->int_status = 0;
+	}
+
+	if (handled < weight)
+		napi_complete(napi);
+
+	bgmac_chip_intrs_on(bgmac);
+
+	return handled;
+}
+
+/**************************************************
+ * net_device_ops
+ **************************************************/
+
+static int bgmac_open(struct net_device *net_dev)
+{
+	struct bgmac *bgmac = netdev_priv(net_dev);
+	int err = 0;
+
+	bgmac_chip_reset(bgmac);
+	/* Specs say about reclaiming rings here, but we do that in DMA init */
+	bgmac_chip_init(bgmac, true);
+
+	err = request_irq(bgmac->core->irq, bgmac_interrupt, IRQF_SHARED,
+			  KBUILD_MODNAME, net_dev);
+	if (err < 0) {
+		bgmac_err(bgmac, "IRQ request error: %d!\n", err);
+		goto err_out;
+	}
+	napi_enable(&bgmac->napi);
+
+	netif_carrier_on(net_dev);
+
+err_out:
+	return err;
+}
+
+static int bgmac_stop(struct net_device *net_dev)
+{
+	struct bgmac *bgmac = netdev_priv(net_dev);
+
+	netif_carrier_off(net_dev);
+
+	napi_disable(&bgmac->napi);
+	bgmac_chip_intrs_off(bgmac);
+	free_irq(bgmac->core->irq, net_dev);
+
+	bgmac_chip_reset(bgmac);
+
+	return 0;
+}
+
+static netdev_tx_t bgmac_start_xmit(struct sk_buff *skb,
+				    struct net_device *net_dev)
+{
+	struct bgmac *bgmac = netdev_priv(net_dev);
+	struct bgmac_dma_ring *ring;
+
+	/* No QOS support yet */
+	ring = &bgmac->tx_ring[0];
+	return bgmac_dma_tx_add(bgmac, ring, skb);
+}
+
+static int bgmac_ioctl(struct net_device *net_dev, struct ifreq *ifr, int cmd)
+{
+	struct bgmac *bgmac = netdev_priv(net_dev);
+	struct mii_ioctl_data *data = if_mii(ifr);
+
+	switch (cmd) {
+	case SIOCGMIIPHY:
+		data->phy_id = bgmac->phyaddr;
+		/* fallthru */
+	case SIOCGMIIREG:
+		if (!netif_running(net_dev))
+			return -EAGAIN;
+		data->val_out = bgmac_phy_read(bgmac, data->phy_id,
+					       data->reg_num & 0x1f);
+		return 0;
+	case SIOCSMIIREG:
+		if (!netif_running(net_dev))
+			return -EAGAIN;
+		bgmac_phy_write(bgmac, data->phy_id, data->reg_num & 0x1f,
+				data->val_in);
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static const struct net_device_ops bgmac_netdev_ops = {
+	.ndo_open		= bgmac_open,
+	.ndo_stop		= bgmac_stop,
+	.ndo_start_xmit		= bgmac_start_xmit,
+	.ndo_set_mac_address	= eth_mac_addr, /* generic, sets dev_addr */
+	.ndo_do_ioctl           = bgmac_ioctl,
+};
+
+/**************************************************
+ * ethtool_ops
+ **************************************************/
+
+static int bgmac_get_settings(struct net_device *net_dev,
+			      struct ethtool_cmd *cmd)
+{
+	struct bgmac *bgmac = netdev_priv(net_dev);
+
+	cmd->supported = SUPPORTED_10baseT_Half |
+			 SUPPORTED_10baseT_Full |
+			 SUPPORTED_100baseT_Half |
+			 SUPPORTED_100baseT_Full |
+			 SUPPORTED_1000baseT_Half |
+			 SUPPORTED_1000baseT_Full |
+			 SUPPORTED_Autoneg;
+
+	if (bgmac->autoneg) {
+		WARN_ON(cmd->advertising);
+		if (bgmac->full_duplex) {
+			if (bgmac->speed & BGMAC_SPEED_10)
+				cmd->advertising |= ADVERTISED_10baseT_Full;
+			if (bgmac->speed & BGMAC_SPEED_100)
+				cmd->advertising |= ADVERTISED_100baseT_Full;
+			if (bgmac->speed & BGMAC_SPEED_1000)
+				cmd->advertising |= ADVERTISED_1000baseT_Full;
+		} else {
+			if (bgmac->speed & BGMAC_SPEED_10)
+				cmd->advertising |= ADVERTISED_10baseT_Half;
+			if (bgmac->speed & BGMAC_SPEED_100)
+				cmd->advertising |= ADVERTISED_100baseT_Half;
+			if (bgmac->speed & BGMAC_SPEED_1000)
+				cmd->advertising |= ADVERTISED_1000baseT_Half;
+		}
+	} else {
+		switch (bgmac->speed) {
+		case BGMAC_SPEED_10:
+			ethtool_cmd_speed_set(cmd, SPEED_10);
+			break;
+		case BGMAC_SPEED_100:
+			ethtool_cmd_speed_set(cmd, SPEED_100);
+			break;
+		case BGMAC_SPEED_1000:
+			ethtool_cmd_speed_set(cmd, SPEED_1000);
+			break;
+		}
+	}
+
+	cmd->duplex = bgmac->full_duplex ? DUPLEX_FULL : DUPLEX_HALF;
+
+	cmd->autoneg = bgmac->autoneg;
+
+	return 0;
+}
+
+#if 0
+static int bgmac_set_settings(struct net_device *net_dev,
+			      struct ethtool_cmd *cmd)
+{
+	struct bgmac *bgmac = netdev_priv(net_dev);
+
+	return -1;
+}
+#endif
+
+static void bgmac_get_drvinfo(struct net_device *net_dev,
+			      struct ethtool_drvinfo *info)
+{
+	strlcpy(info->driver, KBUILD_MODNAME, sizeof(info->driver));
+	strlcpy(info->bus_info, "BCMA", sizeof(info->bus_info));
+}
+
+static const struct ethtool_ops bgmac_ethtool_ops = {
+	.get_settings		= bgmac_get_settings,
+	.get_drvinfo		= bgmac_get_drvinfo,
+};
+
+/**************************************************
+ * BCMA bus ops
+ **************************************************/
+
+/* http://bcm-v4.sipsolutions.net/mac-gbit/gmac/chipattach */
+static int bgmac_probe(struct bcma_device *core)
+{
+	struct net_device *net_dev;
+	struct bgmac *bgmac;
+	struct ssb_sprom *sprom = &core->bus->sprom;
+	u8 *mac = core->core_unit ? sprom->et1mac : sprom->et0mac;
+	int err;
+
+	/* We don't support 2nd, 3rd, ... units, SPROM has to be adjusted */
+	if (core->core_unit > 1) {
+		pr_err("Unsupported core_unit %d\n", core->core_unit);
+		return -ENOTSUPP;
+	}
+
+	/* Allocation and references */
+	net_dev = alloc_etherdev(sizeof(*bgmac));
+	if (!net_dev)
+		return -ENOMEM;
+	net_dev->netdev_ops = &bgmac_netdev_ops;
+	net_dev->irq = core->irq;
+	SET_ETHTOOL_OPS(net_dev, &bgmac_ethtool_ops);
+	bgmac = netdev_priv(net_dev);
+	bgmac->net_dev = net_dev;
+	bgmac->core = core;
+	bcma_set_drvdata(core, bgmac);
+
+	/* Defaults */
+	bgmac->autoneg = true;
+	bgmac->full_duplex = true;
+	bgmac->speed = BGMAC_SPEED_10 | BGMAC_SPEED_100 | BGMAC_SPEED_1000;
+	memcpy(bgmac->net_dev->dev_addr, mac, ETH_ALEN);
+
+	/* On BCM4706 we need common core to access PHY */
+	if (core->id.id == BCMA_CORE_4706_MAC_GBIT &&
+	    !core->bus->drv_gmac_cmn.core) {
+		bgmac_err(bgmac, "GMAC CMN core not found (required for BCM4706)\n");
+		err = -ENODEV;
+		goto err_netdev_free;
+	}
+	bgmac->cmn = core->bus->drv_gmac_cmn.core;
+
+	bgmac->phyaddr = core->core_unit ? sprom->et1phyaddr :
+			 sprom->et0phyaddr;
+	bgmac->phyaddr &= BGMAC_PHY_MASK;
+	if (bgmac->phyaddr == BGMAC_PHY_MASK) {
+		bgmac_err(bgmac, "No PHY found\n");
+		err = -ENODEV;
+		goto err_netdev_free;
+	}
+	bgmac_info(bgmac, "Found PHY addr: %d%s\n", bgmac->phyaddr,
+		   bgmac->phyaddr == BGMAC_PHY_NOREGS ? " (NOREGS)" : "");
+
+	if (core->bus->hosttype == BCMA_HOSTTYPE_PCI) {
+		bgmac_err(bgmac, "PCI setup not implemented\n");
+		err = -ENOTSUPP;
+		goto err_netdev_free;
+	}
+
+	bgmac_chip_reset(bgmac);
+
+	err = bgmac_dma_alloc(bgmac);
+	if (err) {
+		bgmac_err(bgmac, "Unable to alloc memory for DMA\n");
+		goto err_netdev_free;
+	}
+
+	bgmac->int_mask = BGMAC_IS_ERRMASK | BGMAC_IS_RX | BGMAC_IS_TX_MASK;
+	if (nvram_getenv("et0_no_txint", NULL, 0) == 0)
+		bgmac->int_mask &= ~BGMAC_IS_TX_MASK;
+
+	/* TODO: reset the external phy. Specs are needed */
+	bgmac_phy_reset(bgmac);
+
+	bgmac->has_robosw = !!(core->bus->sprom.boardflags_lo &
+			       BGMAC_BFL_ENETROBO);
+	if (bgmac->has_robosw)
+		bgmac_warn(bgmac, "Support for Roboswitch not implemented\n");
+
+	if (core->bus->sprom.boardflags_lo & BGMAC_BFL_ENETADM)
+		bgmac_warn(bgmac, "Support for ADMtek ethernet switch not implemented\n");
+
+	err = register_netdev(bgmac->net_dev);
+	if (err) {
+		bgmac_err(bgmac, "Cannot register net device\n");
+		err = -ENOTSUPP;
+		goto err_dma_free;
+	}
+
+	netif_carrier_off(net_dev);
+
+	netif_napi_add(net_dev, &bgmac->napi, bgmac_poll, BGMAC_WEIGHT);
+
+	return 0;
+
+err_dma_free:
+	bgmac_dma_free(bgmac);
+
+err_netdev_free:
+	bcma_set_drvdata(core, NULL);
+	free_netdev(net_dev);
+
+	return err;
+}
+
+static void bgmac_remove(struct bcma_device *core)
+{
+	struct bgmac *bgmac = bcma_get_drvdata(core);
+
+	netif_napi_del(&bgmac->napi);
+	unregister_netdev(bgmac->net_dev);
+	bgmac_dma_free(bgmac);
+	bcma_set_drvdata(core, NULL);
+	free_netdev(bgmac->net_dev);
+}
+
+static struct bcma_driver bgmac_bcma_driver = {
+	.name		= KBUILD_MODNAME,
+	.id_table	= bgmac_bcma_tbl,
+	.probe		= bgmac_probe,
+	.remove		= bgmac_remove,
+};
+
+static int __init bgmac_init(void)
+{
+	int err;
+
+	err = bcma_driver_register(&bgmac_bcma_driver);
+	if (err)
+		return err;
+	pr_info("Broadcom 47xx GBit MAC driver loaded\n");
+
+	return 0;
+}
+
+static void __exit bgmac_exit(void)
+{
+	bcma_driver_unregister(&bgmac_bcma_driver);
+}
+
+module_init(bgmac_init)
+module_exit(bgmac_exit)
+
+MODULE_AUTHOR("Rafał Miłecki");
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/broadcom/bgmac.h b/drivers/net/ethernet/broadcom/bgmac.h
new file mode 100644
index 000000000000..129947017041
--- /dev/null
+++ b/drivers/net/ethernet/broadcom/bgmac.h
@@ -0,0 +1,456 @@
+#ifndef _BGMAC_H
+#define _BGMAC_H
+
+#define pr_fmt(fmt)		KBUILD_MODNAME ": " fmt
+
+#define bgmac_err(bgmac, fmt, ...) \
+	dev_err(&(bgmac)->core->dev, fmt, ##__VA_ARGS__)
+#define bgmac_warn(bgmac, fmt, ...) \
+	dev_warn(&(bgmac)->core->dev, fmt,  ##__VA_ARGS__)
+#define bgmac_info(bgmac, fmt, ...) \
+	dev_info(&(bgmac)->core->dev, fmt,  ##__VA_ARGS__)
+#define bgmac_dbg(bgmac, fmt, ...) \
+	dev_dbg(&(bgmac)->core->dev, fmt, ##__VA_ARGS__)
+
+#include <linux/bcma/bcma.h>
+#include <linux/netdevice.h>
+
+#define BGMAC_DEV_CTL				0x000
+#define  BGMAC_DC_TSM				0x00000002
+#define  BGMAC_DC_CFCO				0x00000004
+#define  BGMAC_DC_RLSS				0x00000008
+#define  BGMAC_DC_MROR				0x00000010
+#define  BGMAC_DC_FCM_MASK			0x00000060
+#define  BGMAC_DC_FCM_SHIFT			5
+#define  BGMAC_DC_NAE				0x00000080
+#define  BGMAC_DC_TF				0x00000100
+#define  BGMAC_DC_RDS_MASK			0x00030000
+#define  BGMAC_DC_RDS_SHIFT			16
+#define  BGMAC_DC_TDS_MASK			0x000c0000
+#define  BGMAC_DC_TDS_SHIFT			18
+#define BGMAC_DEV_STATUS			0x004		/* Configuration of the interface */
+#define  BGMAC_DS_RBF				0x00000001
+#define  BGMAC_DS_RDF				0x00000002
+#define  BGMAC_DS_RIF				0x00000004
+#define  BGMAC_DS_TBF				0x00000008
+#define  BGMAC_DS_TDF				0x00000010
+#define  BGMAC_DS_TIF				0x00000020
+#define  BGMAC_DS_PO				0x00000040
+#define  BGMAC_DS_MM_MASK			0x00000300	/* Mode of the interface */
+#define  BGMAC_DS_MM_SHIFT			8
+#define BGMAC_BIST_STATUS			0x00c
+#define BGMAC_INT_STATUS			0x020		/* Interrupt status */
+#define  BGMAC_IS_MRO				0x00000001
+#define  BGMAC_IS_MTO				0x00000002
+#define  BGMAC_IS_TFD				0x00000004
+#define  BGMAC_IS_LS				0x00000008
+#define  BGMAC_IS_MDIO				0x00000010
+#define  BGMAC_IS_MR				0x00000020
+#define  BGMAC_IS_MT				0x00000040
+#define  BGMAC_IS_TO				0x00000080
+#define  BGMAC_IS_DESC_ERR			0x00000400	/* Descriptor error */
+#define  BGMAC_IS_DATA_ERR			0x00000800	/* Data error */
+#define  BGMAC_IS_DESC_PROT_ERR			0x00001000	/* Descriptor protocol error */
+#define  BGMAC_IS_RX_DESC_UNDERF		0x00002000	/* Receive descriptor underflow */
+#define  BGMAC_IS_RX_F_OVERF			0x00004000	/* Receive FIFO overflow */
+#define  BGMAC_IS_TX_F_UNDERF			0x00008000	/* Transmit FIFO underflow */
+#define  BGMAC_IS_RX				0x00010000	/* Interrupt for RX queue 0 */
+#define  BGMAC_IS_TX0				0x01000000	/* Interrupt for TX queue 0 */
+#define  BGMAC_IS_TX1				0x02000000	/* Interrupt for TX queue 1 */
+#define  BGMAC_IS_TX2				0x04000000	/* Interrupt for TX queue 2 */
+#define  BGMAC_IS_TX3				0x08000000	/* Interrupt for TX queue 3 */
+#define  BGMAC_IS_TX_MASK			0x0f000000
+#define  BGMAC_IS_INTMASK			0x0f01fcff
+#define  BGMAC_IS_ERRMASK			0x0000fc00
+#define BGMAC_INT_MASK				0x024		/* Interrupt mask */
+#define BGMAC_GP_TIMER				0x028
+#define BGMAC_INT_RECV_LAZY			0x100
+#define  BGMAC_IRL_TO_MASK			0x00ffffff
+#define  BGMAC_IRL_FC_MASK			0xff000000
+#define  BGMAC_IRL_FC_SHIFT			24		/* Shift the number of interrupts triggered per received frame */
+#define BGMAC_FLOW_CTL_THRESH			0x104		/* Flow control thresholds */
+#define BGMAC_WRRTHRESH				0x108
+#define BGMAC_GMAC_IDLE_CNT_THRESH		0x10c
+#define BGMAC_PHY_ACCESS			0x180		/* PHY access address */
+#define  BGMAC_PA_DATA_MASK			0x0000ffff
+#define  BGMAC_PA_ADDR_MASK			0x001f0000
+#define  BGMAC_PA_ADDR_SHIFT			16
+#define  BGMAC_PA_REG_MASK			0x1f000000
+#define  BGMAC_PA_REG_SHIFT			24
+#define  BGMAC_PA_WRITE				0x20000000
+#define  BGMAC_PA_START				0x40000000
+#define BGMAC_PHY_CNTL				0x188		/* PHY control address */
+#define  BGMAC_PC_EPA_MASK			0x0000001f
+#define  BGMAC_PC_MCT_MASK			0x007f0000
+#define  BGMAC_PC_MCT_SHIFT			16
+#define  BGMAC_PC_MTE				0x00800000
+#define BGMAC_TXQ_CTL				0x18c
+#define  BGMAC_TXQ_CTL_DBT_MASK			0x00000fff
+#define  BGMAC_TXQ_CTL_DBT_SHIFT		0
+#define BGMAC_RXQ_CTL				0x190
+#define  BGMAC_RXQ_CTL_DBT_MASK			0x00000fff
+#define  BGMAC_RXQ_CTL_DBT_SHIFT		0
+#define  BGMAC_RXQ_CTL_PTE			0x00001000
+#define  BGMAC_RXQ_CTL_MDP_MASK			0x3f000000
+#define  BGMAC_RXQ_CTL_MDP_SHIFT		24
+#define BGMAC_GPIO_SELECT			0x194
+#define BGMAC_GPIO_OUTPUT_EN			0x198
+/* For 0x1e0 see BCMA_CLKCTLST */
+#define BGMAC_HW_WAR				0x1e4
+#define BGMAC_PWR_CTL				0x1e8
+#define BGMAC_DMA_BASE0				0x200		/* Tx and Rx controller */
+#define BGMAC_DMA_BASE1				0x240		/* Tx controller only */
+#define BGMAC_DMA_BASE2				0x280		/* Tx controller only */
+#define BGMAC_DMA_BASE3				0x2C0		/* Tx controller only */
+#define BGMAC_TX_GOOD_OCTETS			0x300
+#define BGMAC_TX_GOOD_OCTETS_HIGH		0x304
+#define BGMAC_TX_GOOD_PKTS			0x308
+#define BGMAC_TX_OCTETS				0x30c
+#define BGMAC_TX_OCTETS_HIGH			0x310
+#define BGMAC_TX_PKTS				0x314
+#define BGMAC_TX_BROADCAST_PKTS			0x318
+#define BGMAC_TX_MULTICAST_PKTS			0x31c
+#define BGMAC_TX_LEN_64				0x320
+#define BGMAC_TX_LEN_65_TO_127			0x324
+#define BGMAC_TX_LEN_128_TO_255			0x328
+#define BGMAC_TX_LEN_256_TO_511			0x32c
+#define BGMAC_TX_LEN_512_TO_1023		0x330
+#define BGMAC_TX_LEN_1024_TO_1522		0x334
+#define BGMAC_TX_LEN_1523_TO_2047		0x338
+#define BGMAC_TX_LEN_2048_TO_4095		0x33c
+#define BGMAC_TX_LEN_4095_TO_8191		0x340
+#define BGMAC_TX_LEN_8192_TO_MAX		0x344
+#define BGMAC_TX_JABBER_PKTS			0x348		/* Error */
+#define BGMAC_TX_OVERSIZE_PKTS			0x34c		/* Error */
+#define BGMAC_TX_FRAGMENT_PKTS			0x350
+#define BGMAC_TX_UNDERRUNS			0x354		/* Error */
+#define BGMAC_TX_TOTAL_COLS			0x358
+#define BGMAC_TX_SINGLE_COLS			0x35c
+#define BGMAC_TX_MULTIPLE_COLS			0x360
+#define BGMAC_TX_EXCESSIVE_COLS			0x364		/* Error */
+#define BGMAC_TX_LATE_COLS			0x368		/* Error */
+#define BGMAC_TX_DEFERED			0x36c
+#define BGMAC_TX_CARRIER_LOST			0x370
+#define BGMAC_TX_PAUSE_PKTS			0x374
+#define BGMAC_TX_UNI_PKTS			0x378
+#define BGMAC_TX_Q0_PKTS			0x37c
+#define BGMAC_TX_Q0_OCTETS			0x380
+#define BGMAC_TX_Q0_OCTETS_HIGH			0x384
+#define BGMAC_TX_Q1_PKTS			0x388
+#define BGMAC_TX_Q1_OCTETS			0x38c
+#define BGMAC_TX_Q1_OCTETS_HIGH			0x390
+#define BGMAC_TX_Q2_PKTS			0x394
+#define BGMAC_TX_Q2_OCTETS			0x398
+#define BGMAC_TX_Q2_OCTETS_HIGH			0x39c
+#define BGMAC_TX_Q3_PKTS			0x3a0
+#define BGMAC_TX_Q3_OCTETS			0x3a4
+#define BGMAC_TX_Q3_OCTETS_HIGH			0x3a8
+#define BGMAC_RX_GOOD_OCTETS			0x3b0
+#define BGMAC_RX_GOOD_OCTETS_HIGH		0x3b4
+#define BGMAC_RX_GOOD_PKTS			0x3b8
+#define BGMAC_RX_OCTETS				0x3bc
+#define BGMAC_RX_OCTETS_HIGH			0x3c0
+#define BGMAC_RX_PKTS				0x3c4
+#define BGMAC_RX_BROADCAST_PKTS			0x3c8
+#define BGMAC_RX_MULTICAST_PKTS			0x3cc
+#define BGMAC_RX_LEN_64				0x3d0
+#define BGMAC_RX_LEN_65_TO_127			0x3d4
+#define BGMAC_RX_LEN_128_TO_255			0x3d8
+#define BGMAC_RX_LEN_256_TO_511			0x3dc
+#define BGMAC_RX_LEN_512_TO_1023		0x3e0
+#define BGMAC_RX_LEN_1024_TO_1522		0x3e4
+#define BGMAC_RX_LEN_1523_TO_2047		0x3e8
+#define BGMAC_RX_LEN_2048_TO_4095		0x3ec
+#define BGMAC_RX_LEN_4095_TO_8191		0x3f0
+#define BGMAC_RX_LEN_8192_TO_MAX		0x3f4
+#define BGMAC_RX_JABBER_PKTS			0x3f8		/* Error */
+#define BGMAC_RX_OVERSIZE_PKTS			0x3fc		/* Error */
+#define BGMAC_RX_FRAGMENT_PKTS			0x400
+#define BGMAC_RX_MISSED_PKTS			0x404		/* Error */
+#define BGMAC_RX_CRC_ALIGN_ERRS			0x408		/* Error */
+#define BGMAC_RX_UNDERSIZE			0x40c		/* Error */
+#define BGMAC_RX_CRC_ERRS			0x410		/* Error */
+#define BGMAC_RX_ALIGN_ERRS			0x414		/* Error */
+#define BGMAC_RX_SYMBOL_ERRS			0x418		/* Error */
+#define BGMAC_RX_PAUSE_PKTS			0x41c
+#define BGMAC_RX_NONPAUSE_PKTS			0x420
+#define BGMAC_RX_SACHANGES			0x424
+#define BGMAC_RX_UNI_PKTS			0x428
+#define BGMAC_UNIMAC_VERSION			0x800
+#define BGMAC_HDBKP_CTL				0x804
+#define BGMAC_CMDCFG				0x808		/* Configuration */
+#define  BGMAC_CMDCFG_TE			0x00000001	/* Set to activate TX */
+#define  BGMAC_CMDCFG_RE			0x00000002	/* Set to activate RX */
+#define  BGMAC_CMDCFG_ES_MASK			0x0000000c	/* Ethernet speed see gmac_speed */
+#define   BGMAC_CMDCFG_ES_10			0x00000000
+#define   BGMAC_CMDCFG_ES_100			0x00000004
+#define   BGMAC_CMDCFG_ES_1000			0x00000008
+#define  BGMAC_CMDCFG_PROM			0x00000010	/* Set to activate promiscuous mode */
+#define  BGMAC_CMDCFG_PAD_EN			0x00000020
+#define  BGMAC_CMDCFG_CF			0x00000040
+#define  BGMAC_CMDCFG_PF			0x00000080
+#define  BGMAC_CMDCFG_RPI			0x00000100	/* Unset to enable 802.3x tx flow control */
+#define  BGMAC_CMDCFG_TAI			0x00000200
+#define  BGMAC_CMDCFG_HD			0x00000400	/* Set if in half duplex mode */
+#define  BGMAC_CMDCFG_HD_SHIFT			10
+#define  BGMAC_CMDCFG_SR			0x00000800	/* Set to reset mode */
+#define  BGMAC_CMDCFG_ML			0x00008000	/* Set to activate mac loopback mode */
+#define  BGMAC_CMDCFG_AE			0x00400000
+#define  BGMAC_CMDCFG_CFE			0x00800000
+#define  BGMAC_CMDCFG_NLC			0x01000000
+#define  BGMAC_CMDCFG_RL			0x02000000
+#define  BGMAC_CMDCFG_RED			0x04000000
+#define  BGMAC_CMDCFG_PE			0x08000000
+#define  BGMAC_CMDCFG_TPI			0x10000000
+#define  BGMAC_CMDCFG_AT			0x20000000
+#define BGMAC_MACADDR_HIGH			0x80c		/* High 4 octets of own mac address */
+#define BGMAC_MACADDR_LOW			0x810		/* Low 2 octets of own mac address */
+#define BGMAC_RXMAX_LENGTH			0x814		/* Max receive frame length with vlan tag */
+#define BGMAC_PAUSEQUANTA			0x818
+#define BGMAC_MAC_MODE				0x844
+#define BGMAC_OUTERTAG				0x848
+#define BGMAC_INNERTAG				0x84c
+#define BGMAC_TXIPG				0x85c
+#define BGMAC_PAUSE_CTL				0xb30
+#define BGMAC_TX_FLUSH				0xb34
+#define BGMAC_RX_STATUS				0xb38
+#define BGMAC_TX_STATUS				0xb3c
+
+#define BGMAC_PHY_CTL				0x00
+#define  BGMAC_PHY_CTL_SPEED_MSB		0x0040
+#define  BGMAC_PHY_CTL_DUPLEX			0x0100		/* duplex mode */
+#define  BGMAC_PHY_CTL_RESTART			0x0200		/* restart autonegotiation */
+#define  BGMAC_PHY_CTL_ANENAB			0x1000		/* enable autonegotiation */
+#define  BGMAC_PHY_CTL_SPEED			0x2000
+#define  BGMAC_PHY_CTL_LOOP			0x4000		/* loopback */
+#define  BGMAC_PHY_CTL_RESET			0x8000		/* reset */
+/* Helpers */
+#define  BGMAC_PHY_CTL_SPEED_10			0
+#define  BGMAC_PHY_CTL_SPEED_100		BGMAC_PHY_CTL_SPEED
+#define  BGMAC_PHY_CTL_SPEED_1000		BGMAC_PHY_CTL_SPEED_MSB
+#define BGMAC_PHY_ADV				0x04
+#define  BGMAC_PHY_ADV_10HALF			0x0020		/* advertise 10MBits/s half duplex */
+#define  BGMAC_PHY_ADV_10FULL			0x0040		/* advertise 10MBits/s full duplex */
+#define  BGMAC_PHY_ADV_100HALF			0x0080		/* advertise 100MBits/s half duplex */
+#define  BGMAC_PHY_ADV_100FULL			0x0100		/* advertise 100MBits/s full duplex */
+#define BGMAC_PHY_ADV2				0x09
+#define  BGMAC_PHY_ADV2_1000HALF		0x0100		/* advertise 1000MBits/s half duplex */
+#define  BGMAC_PHY_ADV2_1000FULL		0x0200		/* advertise 1000MBits/s full duplex */
+
+/* BCMA GMAC core specific IO Control (BCMA_IOCTL) flags */
+#define BGMAC_BCMA_IOCTL_SW_CLKEN		0x00000004	/* PHY Clock Enable */
+#define BGMAC_BCMA_IOCTL_SW_RESET		0x00000008	/* PHY Reset */
+
+/* BCMA GMAC core specific IO status (BCMA_IOST) flags */
+#define BGMAC_BCMA_IOST_ATTACHED		0x00000800
+
+#define BGMAC_NUM_MIB_TX_REGS	\
+		(((BGMAC_TX_Q3_OCTETS_HIGH - BGMAC_TX_GOOD_OCTETS) / 4) + 1)
+#define BGMAC_NUM_MIB_RX_REGS	\
+		(((BGMAC_RX_UNI_PKTS - BGMAC_RX_GOOD_OCTETS) / 4) + 1)
+
+#define BGMAC_DMA_TX_CTL			0x00
+#define  BGMAC_DMA_TX_ENABLE			0x00000001
+#define  BGMAC_DMA_TX_SUSPEND			0x00000002
+#define  BGMAC_DMA_TX_LOOPBACK			0x00000004
+#define  BGMAC_DMA_TX_FLUSH			0x00000010
+#define  BGMAC_DMA_TX_PARITY_DISABLE		0x00000800
+#define  BGMAC_DMA_TX_ADDREXT_MASK		0x00030000
+#define  BGMAC_DMA_TX_ADDREXT_SHIFT		16
+#define BGMAC_DMA_TX_INDEX			0x04
+#define BGMAC_DMA_TX_RINGLO			0x08
+#define BGMAC_DMA_TX_RINGHI			0x0C
+#define BGMAC_DMA_TX_STATUS			0x10
+#define  BGMAC_DMA_TX_STATDPTR			0x00001FFF
+#define  BGMAC_DMA_TX_STAT			0xF0000000
+#define   BGMAC_DMA_TX_STAT_DISABLED		0x00000000
+#define   BGMAC_DMA_TX_STAT_ACTIVE		0x10000000
+#define   BGMAC_DMA_TX_STAT_IDLEWAIT		0x20000000
+#define   BGMAC_DMA_TX_STAT_STOPPED		0x30000000
+#define   BGMAC_DMA_TX_STAT_SUSP		0x40000000
+#define BGMAC_DMA_TX_ERROR			0x14
+#define  BGMAC_DMA_TX_ERRDPTR			0x0001FFFF
+#define  BGMAC_DMA_TX_ERR			0xF0000000
+#define   BGMAC_DMA_TX_ERR_NOERR		0x00000000
+#define   BGMAC_DMA_TX_ERR_PROT			0x10000000
+#define   BGMAC_DMA_TX_ERR_UNDERRUN		0x20000000
+#define   BGMAC_DMA_TX_ERR_TRANSFER		0x30000000
+#define   BGMAC_DMA_TX_ERR_DESCREAD		0x40000000
+#define   BGMAC_DMA_TX_ERR_CORE			0x50000000
+#define BGMAC_DMA_RX_CTL			0x20
+#define  BGMAC_DMA_RX_ENABLE			0x00000001
+#define  BGMAC_DMA_RX_FRAME_OFFSET_MASK		0x000000FE
+#define  BGMAC_DMA_RX_FRAME_OFFSET_SHIFT	1
+#define  BGMAC_DMA_RX_DIRECT_FIFO		0x00000100
+#define  BGMAC_DMA_RX_OVERFLOW_CONT		0x00000400
+#define  BGMAC_DMA_RX_PARITY_DISABLE		0x00000800
+#define  BGMAC_DMA_RX_ADDREXT_MASK		0x00030000
+#define  BGMAC_DMA_RX_ADDREXT_SHIFT		16
+#define BGMAC_DMA_RX_INDEX			0x24
+#define BGMAC_DMA_RX_RINGLO			0x28
+#define BGMAC_DMA_RX_RINGHI			0x2C
+#define BGMAC_DMA_RX_STATUS			0x30
+#define  BGMAC_DMA_RX_STATDPTR			0x00001FFF
+#define  BGMAC_DMA_RX_STAT			0xF0000000
+#define   BGMAC_DMA_RX_STAT_DISABLED		0x00000000
+#define   BGMAC_DMA_RX_STAT_ACTIVE		0x10000000
+#define   BGMAC_DMA_RX_STAT_IDLEWAIT		0x20000000
+#define   BGMAC_DMA_RX_STAT_STOPPED		0x30000000
+#define   BGMAC_DMA_RX_STAT_SUSP		0x40000000
+#define BGMAC_DMA_RX_ERROR			0x34
+#define  BGMAC_DMA_RX_ERRDPTR			0x0001FFFF
+#define  BGMAC_DMA_RX_ERR			0xF0000000
+#define   BGMAC_DMA_RX_ERR_NOERR		0x00000000
+#define   BGMAC_DMA_RX_ERR_PROT			0x10000000
+#define   BGMAC_DMA_RX_ERR_UNDERRUN		0x20000000
+#define   BGMAC_DMA_RX_ERR_TRANSFER		0x30000000
+#define   BGMAC_DMA_RX_ERR_DESCREAD		0x40000000
+#define   BGMAC_DMA_RX_ERR_CORE			0x50000000
+
+#define BGMAC_DESC_CTL0_EOT			0x10000000	/* End of ring */
+#define BGMAC_DESC_CTL0_IOC			0x20000000	/* IRQ on complete */
+#define BGMAC_DESC_CTL0_SOF			0x40000000	/* Start of frame */
+#define BGMAC_DESC_CTL0_EOF			0x80000000	/* End of frame */
+#define BGMAC_DESC_CTL1_LEN			0x00001FFF
+
+#define BGMAC_PHY_NOREGS			0x1E
+#define BGMAC_PHY_MASK				0x1F
+
+#define BGMAC_MAX_TX_RINGS			4
+#define BGMAC_MAX_RX_RINGS			1
+
+#define BGMAC_TX_RING_SLOTS			128
+#define BGMAC_RX_RING_SLOTS			512 - 1		/* Why -1? Well, Broadcom does that... */
+
+#define BGMAC_RX_HEADER_LEN			28		/* Last 24 bytes are unused. Well... */
+#define BGMAC_RX_FRAME_OFFSET			30		/* There are 2 unused bytes between header and real data */
+#define BGMAC_RX_MAX_FRAME_SIZE			1536		/* Copied from b44/tg3 */
+#define BGMAC_RX_BUF_SIZE			(BGMAC_RX_FRAME_OFFSET + BGMAC_RX_MAX_FRAME_SIZE)
+
+#define BGMAC_BFL_ENETROBO			0x0010		/* has ephy roboswitch spi */
+#define BGMAC_BFL_ENETADM			0x0080		/* has ADMtek switch */
+#define BGMAC_BFL_ENETVLAN			0x0100		/* can do vlan */
+
+#define BGMAC_CHIPCTL_1_IF_TYPE_MASK		0x00000030
+#define BGMAC_CHIPCTL_1_IF_TYPE_RMII		0x00000000
+#define BGMAC_CHIPCTL_1_IF_TYPE_MI		0x00000010
+#define BGMAC_CHIPCTL_1_IF_TYPE_RGMII		0x00000020
+#define BGMAC_CHIPCTL_1_SW_TYPE_MASK		0x000000C0
+#define BGMAC_CHIPCTL_1_SW_TYPE_EPHY		0x00000000
+#define BGMAC_CHIPCTL_1_SW_TYPE_EPHYMII		0x00000040
+#define BGMAC_CHIPCTL_1_SW_TYPE_EPHYRMII	0x00000080
+#define BGMAC_CHIPCTL_1_SW_TYPE_RGMI		0x000000C0
+#define BGMAC_CHIPCTL_1_RXC_DLL_BYPASS		0x00010000
+
+#define BGMAC_SPEED_10				0x0001
+#define BGMAC_SPEED_100				0x0002
+#define BGMAC_SPEED_1000			0x0004
+
+#define BGMAC_WEIGHT	64
+
+#define ETHER_MAX_LEN   1518
+
+struct bgmac_slot_info {
+	struct sk_buff *skb;
+	dma_addr_t dma_addr;
+};
+
+struct bgmac_dma_desc {
+	__le32 ctl0;
+	__le32 ctl1;
+	__le32 addr_low;
+	__le32 addr_high;
+} __packed;
+
+enum bgmac_dma_ring_type {
+	BGMAC_DMA_RING_TX,
+	BGMAC_DMA_RING_RX,
+};
+
+/**
+ * bgmac_dma_ring - contains info about DMA ring (either TX or RX one)
+ * @start: index of the first slot containing data
+ * @end: index of a slot that can *not* be read (yet)
+ *
+ * Be really aware of the specific @end meaning. It's an index of a slot *after*
+ * the one containing data that can be read. If @start equals @end the ring is
+ * empty.
+ */
+struct bgmac_dma_ring {
+	u16 num_slots;
+	u16 start;
+	u16 end;
+
+	u16 mmio_base;
+	struct bgmac_dma_desc *cpu_base;
+	dma_addr_t dma_base;
+
+	struct bgmac_slot_info slots[BGMAC_RX_RING_SLOTS];
+};
+
+struct bgmac_rx_header {
+	__le16 len;
+	__le16 flags;
+	__le16 pad[12];
+};
+
+struct bgmac {
+	struct bcma_device *core;
+	struct bcma_device *cmn; /* Reference to CMN core for BCM4706 */
+	struct net_device *net_dev;
+	struct napi_struct napi;
+
+	/* DMA */
+	struct bgmac_dma_ring tx_ring[BGMAC_MAX_TX_RINGS];
+	struct bgmac_dma_ring rx_ring[BGMAC_MAX_RX_RINGS];
+
+	/* Stats */
+	bool stats_grabbed;
+	u32 mib_tx_regs[BGMAC_NUM_MIB_TX_REGS];
+	u32 mib_rx_regs[BGMAC_NUM_MIB_RX_REGS];
+
+	/* Int */
+	u32 int_mask;
+	u32 int_status;
+
+	/* Speed-related */
+	int speed;
+	bool autoneg;
+	bool full_duplex;
+
+	u8 phyaddr;
+	bool has_robosw;
+
+	bool loopback;
+};
+
+static inline u32 bgmac_read(struct bgmac *bgmac, u16 offset)
+{
+	return bcma_read32(bgmac->core, offset);
+}
+
+static inline void bgmac_write(struct bgmac *bgmac, u16 offset, u32 value)
+{
+	bcma_write32(bgmac->core, offset, value);
+}
+
+static inline void bgmac_maskset(struct bgmac *bgmac, u16 offset, u32 mask,
+				   u32 set)
+{
+	bgmac_write(bgmac, offset, (bgmac_read(bgmac, offset) & mask) | set);
+}
+
+static inline void bgmac_mask(struct bgmac *bgmac, u16 offset, u32 mask)
+{
+	bgmac_maskset(bgmac, offset, mask, 0);
+}
+
+static inline void bgmac_set(struct bgmac *bgmac, u16 offset, u32 set)
+{
+	bgmac_maskset(bgmac, offset, ~0, set);
+}
+
+u16 bgmac_phy_read(struct bgmac *bgmac, u8 phyaddr, u8 reg);
+void bgmac_phy_write(struct bgmac *bgmac, u8 phyaddr, u8 reg, u16 value);
+
+#endif /* _BGMAC_H */
diff --git a/include/linux/bcma/bcma_driver_chipcommon.h b/include/linux/bcma/bcma_driver_chipcommon.h
index 9a0e3fa3ca95..ee332fab825b 100644
--- a/include/linux/bcma/bcma_driver_chipcommon.h
+++ b/include/linux/bcma/bcma_driver_chipcommon.h
@@ -634,4 +634,6 @@ extern void bcma_chipco_regctl_maskset(struct bcma_drv_cc *cc,
 				       u32 offset, u32 mask, u32 set);
 extern void bcma_pmu_spuravoid_pllupdate(struct bcma_drv_cc *cc, int spuravoid);
 
+extern u32 bcma_pmu_get_bus_clock(struct bcma_drv_cc *cc);
+
 #endif /* LINUX_BCMA_DRIVER_CC_H_ */
-- 
cgit v1.2.3


From 416186fbf8c5b4e4465a10c6ac7a45b6c47144b2 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Thu, 10 Jan 2013 08:56:51 +0000
Subject: net: Split core bits of netdev_pick_tx into __netdev_pick_tx

This change splits the core bits of netdev_pick_tx into a separate function.
The main idea behind this is to make this code accessible to select queue
functions when they decide to process the standard path instead of their
own custom path in their select queue routine.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  1 +
 net/core/dev.c            | 57 ++++++++++++++++++++++++++---------------------
 2 files changed, 33 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 0209ac328e8a..608c3ac4d045 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1403,6 +1403,7 @@ static inline void netdev_for_each_tx_queue(struct net_device *dev,
 
 extern struct netdev_queue *netdev_pick_tx(struct net_device *dev,
 					   struct sk_buff *skb);
+extern u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb);
 
 /*
  * Net namespace inlines
diff --git a/net/core/dev.c b/net/core/dev.c
index 4794cae84939..81ff67149f62 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2495,37 +2495,44 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
 #endif
 }
 
-struct netdev_queue *netdev_pick_tx(struct net_device *dev,
-				    struct sk_buff *skb)
+u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb)
 {
-	int queue_index;
-	const struct net_device_ops *ops = dev->netdev_ops;
-
-	if (dev->real_num_tx_queues == 1)
-		queue_index = 0;
-	else if (ops->ndo_select_queue) {
-		queue_index = ops->ndo_select_queue(dev, skb);
-		queue_index = dev_cap_txqueue(dev, queue_index);
-	} else {
-		struct sock *sk = skb->sk;
-		queue_index = sk_tx_queue_get(sk);
+	struct sock *sk = skb->sk;
+	int queue_index = sk_tx_queue_get(sk);
 
-		if (queue_index < 0 || skb->ooo_okay ||
-		    queue_index >= dev->real_num_tx_queues) {
-			int old_index = queue_index;
+	if (queue_index < 0 || skb->ooo_okay ||
+	    queue_index >= dev->real_num_tx_queues) {
+		int new_index = get_xps_queue(dev, skb);
+		if (new_index < 0)
+			new_index = skb_tx_hash(dev, skb);
 
-			queue_index = get_xps_queue(dev, skb);
-			if (queue_index < 0)
-				queue_index = skb_tx_hash(dev, skb);
-
-			if (queue_index != old_index && sk) {
-				struct dst_entry *dst =
+		if (queue_index != new_index && sk) {
+			struct dst_entry *dst =
 				    rcu_dereference_check(sk->sk_dst_cache, 1);
 
-				if (dst && skb_dst(skb) == dst)
-					sk_tx_queue_set(sk, queue_index);
-			}
+			if (dst && skb_dst(skb) == dst)
+				sk_tx_queue_set(sk, queue_index);
+
 		}
+
+		queue_index = new_index;
+	}
+
+	return queue_index;
+}
+
+struct netdev_queue *netdev_pick_tx(struct net_device *dev,
+				    struct sk_buff *skb)
+{
+	int queue_index = 0;
+
+	if (dev->real_num_tx_queues != 1) {
+		const struct net_device_ops *ops = dev->netdev_ops;
+		if (ops->ndo_select_queue)
+			queue_index = ops->ndo_select_queue(dev, skb);
+		else
+			queue_index = __netdev_pick_tx(dev, skb);
+		queue_index = dev_cap_txqueue(dev, queue_index);
 	}
 
 	skb_set_queue_mapping(skb, queue_index);
-- 
cgit v1.2.3


From 537c00de1c9ba9876b91d869e84caceefe2b8bf9 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Thu, 10 Jan 2013 08:57:02 +0000
Subject: net: Add functions netif_reset_xps_queue and netif_set_xps_queue

This patch adds two functions, netif_reset_xps_queue and
netif_set_xps_queue.  The main idea behind these two functions is to
provide a mechanism through which drivers can update their defaults in
regards to XPS.

Currently no such mechanism exists and as a result we cannot use XPS for
things such as ATR which would require a basic configuration to start in
which the Tx queues are mapped to CPUs via a 1:1 mapping.  With this change
I am making it possible for drivers such as ixgbe to be able to use the XPS
feature by controlling the default configuration.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  13 ++++
 net/core/dev.c            | 155 ++++++++++++++++++++++++++++++++++++++++++++++
 net/core/net-sysfs.c      | 148 ++-----------------------------------------
 3 files changed, 173 insertions(+), 143 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 608c3ac4d045..59fe9da4e315 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2103,6 +2103,19 @@ static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index)
 		__netif_schedule(txq->qdisc);
 }
 
+#ifdef CONFIG_XPS
+extern void netif_reset_xps_queue(struct net_device *dev, u16 index);
+extern int netif_set_xps_queue(struct net_device *dev, struct cpumask *mask,
+			       u16 index);
+#else
+static inline int netif_set_xps_queue(struct net_device *dev,
+				      struct cpumask *mask,
+				      u16 index)
+{
+	return 0;
+}
+#endif
+
 /*
  * Returns a Tx hash for the given packet when dev->real_num_tx_queues is used
  * as a distribution range limit for the returned value.
diff --git a/net/core/dev.c b/net/core/dev.c
index 81ff67149f62..257b29516f69 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1857,6 +1857,161 @@ static void netif_setup_tc(struct net_device *dev, unsigned int txq)
 	}
 }
 
+#ifdef CONFIG_XPS
+static DEFINE_MUTEX(xps_map_mutex);
+#define xmap_dereference(P)		\
+	rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex))
+
+void netif_reset_xps_queue(struct net_device *dev, u16 index)
+{
+	struct xps_dev_maps *dev_maps;
+	struct xps_map *map;
+	int i, pos, nonempty = 0;
+
+	mutex_lock(&xps_map_mutex);
+	dev_maps = xmap_dereference(dev->xps_maps);
+
+	if (!dev_maps)
+		goto out_no_maps;
+
+	for_each_possible_cpu(i) {
+		map = xmap_dereference(dev_maps->cpu_map[i]);
+		if (!map)
+			continue;
+
+		for (pos = 0; pos < map->len; pos++)
+			if (map->queues[pos] == index)
+				break;
+
+		if (pos < map->len) {
+			if (map->len > 1) {
+				map->queues[pos] = map->queues[--map->len];
+			} else {
+				RCU_INIT_POINTER(dev_maps->cpu_map[i], NULL);
+				kfree_rcu(map, rcu);
+				map = NULL;
+			}
+		}
+		if (map)
+			nonempty = 1;
+	}
+
+	if (!nonempty) {
+		RCU_INIT_POINTER(dev->xps_maps, NULL);
+		kfree_rcu(dev_maps, rcu);
+	}
+
+out_no_maps:
+	mutex_unlock(&xps_map_mutex);
+}
+
+int netif_set_xps_queue(struct net_device *dev, struct cpumask *mask, u16 index)
+{
+	int i, cpu, pos, map_len, alloc_len, need_set;
+	struct xps_map *map, *new_map;
+	struct xps_dev_maps *dev_maps, *new_dev_maps;
+	int nonempty = 0;
+	int numa_node_id = -2;
+	int maps_sz = max_t(unsigned int, XPS_DEV_MAPS_SIZE, L1_CACHE_BYTES);
+
+	new_dev_maps = kzalloc(maps_sz, GFP_KERNEL);
+	if (!new_dev_maps)
+		return -ENOMEM;
+
+	mutex_lock(&xps_map_mutex);
+
+	dev_maps = xmap_dereference(dev->xps_maps);
+
+	for_each_possible_cpu(cpu) {
+		map = dev_maps ?
+			xmap_dereference(dev_maps->cpu_map[cpu]) : NULL;
+		new_map = map;
+		if (map) {
+			for (pos = 0; pos < map->len; pos++)
+				if (map->queues[pos] == index)
+					break;
+			map_len = map->len;
+			alloc_len = map->alloc_len;
+		} else
+			pos = map_len = alloc_len = 0;
+
+		need_set = cpumask_test_cpu(cpu, mask) && cpu_online(cpu);
+#ifdef CONFIG_NUMA
+		if (need_set) {
+			if (numa_node_id == -2)
+				numa_node_id = cpu_to_node(cpu);
+			else if (numa_node_id != cpu_to_node(cpu))
+				numa_node_id = -1;
+		}
+#endif
+		if (need_set && pos >= map_len) {
+			/* Need to add queue to this CPU's map */
+			if (map_len >= alloc_len) {
+				alloc_len = alloc_len ?
+				    2 * alloc_len : XPS_MIN_MAP_ALLOC;
+				new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len),
+						       GFP_KERNEL,
+						       cpu_to_node(cpu));
+				if (!new_map)
+					goto error;
+				new_map->alloc_len = alloc_len;
+				for (i = 0; i < map_len; i++)
+					new_map->queues[i] = map->queues[i];
+				new_map->len = map_len;
+			}
+			new_map->queues[new_map->len++] = index;
+		} else if (!need_set && pos < map_len) {
+			/* Need to remove queue from this CPU's map */
+			if (map_len > 1)
+				new_map->queues[pos] =
+				    new_map->queues[--new_map->len];
+			else
+				new_map = NULL;
+		}
+		RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], new_map);
+	}
+
+	/* Cleanup old maps */
+	for_each_possible_cpu(cpu) {
+		map = dev_maps ?
+			xmap_dereference(dev_maps->cpu_map[cpu]) : NULL;
+		if (map && xmap_dereference(new_dev_maps->cpu_map[cpu]) != map)
+			kfree_rcu(map, rcu);
+		if (new_dev_maps->cpu_map[cpu])
+			nonempty = 1;
+	}
+
+	if (nonempty) {
+		rcu_assign_pointer(dev->xps_maps, new_dev_maps);
+	} else {
+		kfree(new_dev_maps);
+		RCU_INIT_POINTER(dev->xps_maps, NULL);
+	}
+
+	if (dev_maps)
+		kfree_rcu(dev_maps, rcu);
+
+	netdev_queue_numa_node_write(netdev_get_tx_queue(dev, index),
+				     (numa_node_id >= 0) ? numa_node_id :
+				     NUMA_NO_NODE);
+
+	mutex_unlock(&xps_map_mutex);
+
+	return 0;
+error:
+	mutex_unlock(&xps_map_mutex);
+
+	if (new_dev_maps)
+		for_each_possible_cpu(i)
+			kfree(rcu_dereference_protected(
+				new_dev_maps->cpu_map[i],
+				1));
+	kfree(new_dev_maps);
+	return -ENOMEM;
+}
+EXPORT_SYMBOL(netif_set_xps_queue);
+
+#endif
 /*
  * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues
  * greater then real_num_tx_queues stale skbs on the qdisc must be flushed.
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 29c884a74c38..5ad489d5d062 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -1002,54 +1002,14 @@ static ssize_t show_xps_map(struct netdev_queue *queue,
 	return len;
 }
 
-static DEFINE_MUTEX(xps_map_mutex);
-#define xmap_dereference(P)		\
-	rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex))
-
 static void xps_queue_release(struct netdev_queue *queue)
 {
 	struct net_device *dev = queue->dev;
-	struct xps_dev_maps *dev_maps;
-	struct xps_map *map;
 	unsigned long index;
-	int i, pos, nonempty = 0;
 
 	index = get_netdev_queue_index(queue);
 
-	mutex_lock(&xps_map_mutex);
-	dev_maps = xmap_dereference(dev->xps_maps);
-
-	if (dev_maps) {
-		for_each_possible_cpu(i) {
-			map = xmap_dereference(dev_maps->cpu_map[i]);
-			if (!map)
-				continue;
-
-			for (pos = 0; pos < map->len; pos++)
-				if (map->queues[pos] == index)
-					break;
-
-			if (pos < map->len) {
-				if (map->len > 1)
-					map->queues[pos] =
-					    map->queues[--map->len];
-				else {
-					RCU_INIT_POINTER(dev_maps->cpu_map[i],
-					    NULL);
-					kfree_rcu(map, rcu);
-					map = NULL;
-				}
-			}
-			if (map)
-				nonempty = 1;
-		}
-
-		if (!nonempty) {
-			RCU_INIT_POINTER(dev->xps_maps, NULL);
-			kfree_rcu(dev_maps, rcu);
-		}
-	}
-	mutex_unlock(&xps_map_mutex);
+	netif_reset_xps_queue(dev, index);
 }
 
 static ssize_t store_xps_map(struct netdev_queue *queue,
@@ -1057,13 +1017,9 @@ static ssize_t store_xps_map(struct netdev_queue *queue,
 		      const char *buf, size_t len)
 {
 	struct net_device *dev = queue->dev;
-	cpumask_var_t mask;
-	int err, i, cpu, pos, map_len, alloc_len, need_set;
 	unsigned long index;
-	struct xps_map *map, *new_map;
-	struct xps_dev_maps *dev_maps, *new_dev_maps;
-	int nonempty = 0;
-	int numa_node_id = -2;
+	cpumask_var_t mask;
+	int err;
 
 	if (!capable(CAP_NET_ADMIN))
 		return -EPERM;
@@ -1079,105 +1035,11 @@ static ssize_t store_xps_map(struct netdev_queue *queue,
 		return err;
 	}
 
-	new_dev_maps = kzalloc(max_t(unsigned int,
-	    XPS_DEV_MAPS_SIZE, L1_CACHE_BYTES), GFP_KERNEL);
-	if (!new_dev_maps) {
-		free_cpumask_var(mask);
-		return -ENOMEM;
-	}
-
-	mutex_lock(&xps_map_mutex);
-
-	dev_maps = xmap_dereference(dev->xps_maps);
-
-	for_each_possible_cpu(cpu) {
-		map = dev_maps ?
-			xmap_dereference(dev_maps->cpu_map[cpu]) : NULL;
-		new_map = map;
-		if (map) {
-			for (pos = 0; pos < map->len; pos++)
-				if (map->queues[pos] == index)
-					break;
-			map_len = map->len;
-			alloc_len = map->alloc_len;
-		} else
-			pos = map_len = alloc_len = 0;
-
-		need_set = cpumask_test_cpu(cpu, mask) && cpu_online(cpu);
-#ifdef CONFIG_NUMA
-		if (need_set) {
-			if (numa_node_id == -2)
-				numa_node_id = cpu_to_node(cpu);
-			else if (numa_node_id != cpu_to_node(cpu))
-				numa_node_id = -1;
-		}
-#endif
-		if (need_set && pos >= map_len) {
-			/* Need to add queue to this CPU's map */
-			if (map_len >= alloc_len) {
-				alloc_len = alloc_len ?
-				    2 * alloc_len : XPS_MIN_MAP_ALLOC;
-				new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len),
-						       GFP_KERNEL,
-						       cpu_to_node(cpu));
-				if (!new_map)
-					goto error;
-				new_map->alloc_len = alloc_len;
-				for (i = 0; i < map_len; i++)
-					new_map->queues[i] = map->queues[i];
-				new_map->len = map_len;
-			}
-			new_map->queues[new_map->len++] = index;
-		} else if (!need_set && pos < map_len) {
-			/* Need to remove queue from this CPU's map */
-			if (map_len > 1)
-				new_map->queues[pos] =
-				    new_map->queues[--new_map->len];
-			else
-				new_map = NULL;
-		}
-		RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], new_map);
-	}
-
-	/* Cleanup old maps */
-	for_each_possible_cpu(cpu) {
-		map = dev_maps ?
-			xmap_dereference(dev_maps->cpu_map[cpu]) : NULL;
-		if (map && xmap_dereference(new_dev_maps->cpu_map[cpu]) != map)
-			kfree_rcu(map, rcu);
-		if (new_dev_maps->cpu_map[cpu])
-			nonempty = 1;
-	}
-
-	if (nonempty) {
-		rcu_assign_pointer(dev->xps_maps, new_dev_maps);
-	} else {
-		kfree(new_dev_maps);
-		RCU_INIT_POINTER(dev->xps_maps, NULL);
-	}
-
-	if (dev_maps)
-		kfree_rcu(dev_maps, rcu);
-
-	netdev_queue_numa_node_write(queue, (numa_node_id >= 0) ? numa_node_id :
-					    NUMA_NO_NODE);
-
-	mutex_unlock(&xps_map_mutex);
+	err = netif_set_xps_queue(dev, mask, index);
 
 	free_cpumask_var(mask);
-	return len;
 
-error:
-	mutex_unlock(&xps_map_mutex);
-
-	if (new_dev_maps)
-		for_each_possible_cpu(i)
-			kfree(rcu_dereference_protected(
-				new_dev_maps->cpu_map[i],
-				1));
-	kfree(new_dev_maps);
-	free_cpumask_var(mask);
-	return -ENOMEM;
+	return err ? : len;
 }
 
 static struct netdev_queue_attribute xps_cpus_attribute =
-- 
cgit v1.2.3


From 024e9679a2daaa67642693366fb63a6b8c61b9f3 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Thu, 10 Jan 2013 08:57:46 +0000
Subject: net: Add support for XPS without sysfs being defined

This patch makes it so that we can support transmit packet steering without
sysfs needing to be enabled.  The reason for making this change is to make
it so that a driver can make use of the XPS even while the sysfs portion of
the interface is not present.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  1 -
 net/Kconfig               |  2 +-
 net/core/dev.c            | 26 ++++++++++++++++++++------
 net/core/net-sysfs.c      | 14 --------------
 4 files changed, 21 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 59fe9da4e315..aa7ad8a96e70 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2104,7 +2104,6 @@ static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index)
 }
 
 #ifdef CONFIG_XPS
-extern void netif_reset_xps_queue(struct net_device *dev, u16 index);
 extern int netif_set_xps_queue(struct net_device *dev, struct cpumask *mask,
 			       u16 index);
 #else
diff --git a/net/Kconfig b/net/Kconfig
index 30b48f523135..3cc5be0fe420 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -232,7 +232,7 @@ config RFS_ACCEL
 
 config XPS
 	boolean
-	depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS
+	depends on SMP && USE_GENERIC_SMP_HELPERS
 	default y
 
 config NETPRIO_CGROUP
diff --git a/net/core/dev.c b/net/core/dev.c
index 41d5120df469..95de4c011808 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1887,10 +1887,10 @@ static struct xps_map *remove_xps_queue(struct xps_dev_maps *dev_maps,
 	return map;
 }
 
-void netif_reset_xps_queue(struct net_device *dev, u16 index)
+static void netif_reset_xps_queues_gt(struct net_device *dev, u16 index)
 {
 	struct xps_dev_maps *dev_maps;
-	int cpu;
+	int cpu, i;
 	bool active = false;
 
 	mutex_lock(&xps_map_mutex);
@@ -1900,7 +1900,11 @@ void netif_reset_xps_queue(struct net_device *dev, u16 index)
 		goto out_no_maps;
 
 	for_each_possible_cpu(cpu) {
-		if (remove_xps_queue(dev_maps, cpu, index))
+		for (i = index; i < dev->num_tx_queues; i++) {
+			if (!remove_xps_queue(dev_maps, cpu, i))
+				break;
+		}
+		if (i == dev->num_tx_queues)
 			active = true;
 	}
 
@@ -1909,8 +1913,10 @@ void netif_reset_xps_queue(struct net_device *dev, u16 index)
 		kfree_rcu(dev_maps, rcu);
 	}
 
-	netdev_queue_numa_node_write(netdev_get_tx_queue(dev, index),
-				     NUMA_NO_NODE);
+	for (i = index; i < dev->num_tx_queues; i++)
+		netdev_queue_numa_node_write(netdev_get_tx_queue(dev, i),
+					     NUMA_NO_NODE);
+
 out_no_maps:
 	mutex_unlock(&xps_map_mutex);
 }
@@ -2096,8 +2102,12 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
 		if (dev->num_tc)
 			netif_setup_tc(dev, txq);
 
-		if (txq < dev->real_num_tx_queues)
+		if (txq < dev->real_num_tx_queues) {
 			qdisc_reset_all_tx_gt(dev, txq);
+#ifdef CONFIG_XPS
+			netif_reset_xps_queues_gt(dev, txq);
+#endif
+		}
 	}
 
 	dev->real_num_tx_queues = txq;
@@ -5919,6 +5929,10 @@ static void rollback_registered_many(struct list_head *head)
 
 		/* Remove entries from kobject tree */
 		netdev_unregister_kobject(dev);
+#ifdef CONFIG_XPS
+		/* Remove XPS queueing entries */
+		netif_reset_xps_queues_gt(dev, 0);
+#endif
 	}
 
 	synchronize_net();
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 5ad489d5d062..a5b89a6fec6d 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -1002,16 +1002,6 @@ static ssize_t show_xps_map(struct netdev_queue *queue,
 	return len;
 }
 
-static void xps_queue_release(struct netdev_queue *queue)
-{
-	struct net_device *dev = queue->dev;
-	unsigned long index;
-
-	index = get_netdev_queue_index(queue);
-
-	netif_reset_xps_queue(dev, index);
-}
-
 static ssize_t store_xps_map(struct netdev_queue *queue,
 		      struct netdev_queue_attribute *attribute,
 		      const char *buf, size_t len)
@@ -1058,10 +1048,6 @@ static void netdev_queue_release(struct kobject *kobj)
 {
 	struct netdev_queue *queue = to_netdev_queue(kobj);
 
-#ifdef CONFIG_XPS
-	xps_queue_release(queue);
-#endif
-
 	memset(kobj, 0, sizeof(*kobj));
 	dev_put(queue->dev);
 }
-- 
cgit v1.2.3


From b43e1065cab4b5be90c016b2f076086b70cd1556 Mon Sep 17 00:00:00 2001
From: Lv Zheng <lv.zheng@intel.com>
Date: Sat, 12 Jan 2013 15:29:38 +0000
Subject: ACPICA: Cleanup table handler naming conflicts.

This is a cosmetic patch only. Comparison of the resulting binary showed
only line number differences.

This patch does not affect the generation of the Linux binary.
This patch decreases 44 lines of 20121114 divergence.diff.

There are naming conflicts between Linux and ACPICA on table handlers. This
patch cleans up this conflicts to reduce the source code diff between Linux
and ACPICA.

Signed-off-by: Lv Zheng <lv.zheng@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpica/acglobal.h |  2 +-
 drivers/acpi/acpica/tbxface.c  |  4 ++--
 drivers/acpi/numa.c            |  2 +-
 drivers/acpi/tables.c          |  6 +++---
 include/acpi/acpixf.h          |  4 ++--
 include/acpi/actypes.h         |  2 +-
 include/linux/acpi.h           | 15 ++++++++++-----
 7 files changed, 20 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/acpica/acglobal.h b/drivers/acpi/acpica/acglobal.h
index 864806e1ac54..585d364fb7e5 100644
--- a/drivers/acpi/acpica/acglobal.h
+++ b/drivers/acpi/acpica/acglobal.h
@@ -252,7 +252,7 @@ ACPI_EXTERN acpi_cache_t *acpi_gbl_operand_cache;
 ACPI_EXTERN struct acpi_global_notify_handler acpi_gbl_global_notify[2];
 ACPI_EXTERN acpi_exception_handler acpi_gbl_exception_handler;
 ACPI_EXTERN acpi_init_handler acpi_gbl_init_handler;
-ACPI_EXTERN acpi_tbl_handler acpi_gbl_table_handler;
+ACPI_EXTERN acpi_table_handler acpi_gbl_table_handler;
 ACPI_EXTERN void *acpi_gbl_table_handler_context;
 ACPI_EXTERN struct acpi_walk_state *acpi_gbl_breakpoint_walk;
 ACPI_EXTERN acpi_interface_handler acpi_gbl_interface_handler;
diff --git a/drivers/acpi/acpica/tbxface.c b/drivers/acpi/acpica/tbxface.c
index d102fe7f709b..2115f2242a29 100644
--- a/drivers/acpi/acpica/tbxface.c
+++ b/drivers/acpi/acpica/tbxface.c
@@ -436,7 +436,7 @@ ACPI_EXPORT_SYMBOL(acpi_get_table_by_index)
  *
  ******************************************************************************/
 acpi_status
-acpi_install_table_handler(acpi_tbl_handler handler, void *context)
+acpi_install_table_handler(acpi_table_handler handler, void *context)
 {
 	acpi_status status;
 
@@ -482,7 +482,7 @@ ACPI_EXPORT_SYMBOL(acpi_install_table_handler)
  * DESCRIPTION: Remove table event handler
  *
  ******************************************************************************/
-acpi_status acpi_remove_table_handler(acpi_tbl_handler handler)
+acpi_status acpi_remove_table_handler(acpi_table_handler handler)
 {
 	acpi_status status;
 
diff --git a/drivers/acpi/numa.c b/drivers/acpi/numa.c
index cb31298ca684..5ddbc65e0f6e 100644
--- a/drivers/acpi/numa.c
+++ b/drivers/acpi/numa.c
@@ -273,7 +273,7 @@ static int __init acpi_parse_srat(struct acpi_table_header *table)
 
 static int __init
 acpi_table_parse_srat(enum acpi_srat_type id,
-		      acpi_table_entry_handler handler, unsigned int max_entries)
+		      acpi_tbl_entry_handler handler, unsigned int max_entries)
 {
 	return acpi_table_parse_entries(ACPI_SIG_SRAT,
 					    sizeof(struct acpi_table_srat), id,
diff --git a/drivers/acpi/tables.c b/drivers/acpi/tables.c
index 2572d9715bda..d67a1fe07f0e 100644
--- a/drivers/acpi/tables.c
+++ b/drivers/acpi/tables.c
@@ -204,7 +204,7 @@ int __init
 acpi_table_parse_entries(char *id,
 			     unsigned long table_size,
 			     int entry_id,
-			     acpi_table_entry_handler handler,
+			     acpi_tbl_entry_handler handler,
 			     unsigned int max_entries)
 {
 	struct acpi_table_header *table_header = NULL;
@@ -269,7 +269,7 @@ err:
 
 int __init
 acpi_table_parse_madt(enum acpi_madt_type id,
-		      acpi_table_entry_handler handler, unsigned int max_entries)
+		      acpi_tbl_entry_handler handler, unsigned int max_entries)
 {
 	return acpi_table_parse_entries(ACPI_SIG_MADT,
 					    sizeof(struct acpi_table_madt), id,
@@ -285,7 +285,7 @@ acpi_table_parse_madt(enum acpi_madt_type id,
  * Scan the ACPI System Descriptor Table (STD) for a table matching @id,
  * run @handler on it.  Return 0 if table found, return on if not.
  */
-int __init acpi_table_parse(char *id, acpi_table_handler handler)
+int __init acpi_table_parse(char *id, acpi_tbl_table_handler handler)
 {
 	struct acpi_table_header *table = NULL;
 	acpi_size tbl_size;
diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index b86364d48645..d8b2ea673fc6 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -197,9 +197,9 @@ acpi_status
 acpi_get_table_by_index(u32 table_index, struct acpi_table_header **out_table);
 
 acpi_status
-acpi_install_table_handler(acpi_tbl_handler handler, void *context);
+acpi_install_table_handler(acpi_table_handler handler, void *context);
 
-acpi_status acpi_remove_table_handler(acpi_tbl_handler handler);
+acpi_status acpi_remove_table_handler(acpi_table_handler handler);
 
 /*
  * Namespace and name interfaces
diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h
index cd89810a4387..3d4e09c60e2b 100644
--- a/include/acpi/actypes.h
+++ b/include/acpi/actypes.h
@@ -984,7 +984,7 @@ acpi_status(*acpi_exception_handler) (acpi_status aml_status,
 /* Table Event handler (Load, load_table, etc.) and types */
 
 typedef
-acpi_status(*acpi_tbl_handler) (u32 event, void *table, void *context);
+acpi_status(*acpi_table_handler) (u32 event, void *table, void *context);
 
 #define ACPI_TABLE_LOAD             0x0
 #define ACPI_TABLE_UNLOAD           0x1
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 3994d7790b23..6b795bd36383 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -74,9 +74,10 @@ enum acpi_address_range_id {
 
 /* Table Handlers */
 
-typedef int (*acpi_table_handler) (struct acpi_table_header *table);
+typedef int (*acpi_tbl_table_handler)(struct acpi_table_header *table);
 
-typedef int (*acpi_table_entry_handler) (struct acpi_subtable_header *header, const unsigned long end);
+typedef int (*acpi_tbl_entry_handler)(struct acpi_subtable_header *header,
+				      const unsigned long end);
 
 #ifdef CONFIG_ACPI_INITRD_TABLE_OVERRIDE
 void acpi_initrd_override(void *data, size_t size);
@@ -95,10 +96,14 @@ int acpi_mps_check (void);
 int acpi_numa_init (void);
 
 int acpi_table_init (void);
-int acpi_table_parse (char *id, acpi_table_handler handler);
+int acpi_table_parse(char *id, acpi_tbl_table_handler handler);
 int __init acpi_table_parse_entries(char *id, unsigned long table_size,
-	int entry_id, acpi_table_entry_handler handler, unsigned int max_entries);
-int acpi_table_parse_madt (enum acpi_madt_type id, acpi_table_entry_handler handler, unsigned int max_entries);
+				    int entry_id,
+				    acpi_tbl_entry_handler handler,
+				    unsigned int max_entries);
+int acpi_table_parse_madt(enum acpi_madt_type id,
+			  acpi_tbl_entry_handler handler,
+			  unsigned int max_entries);
 int acpi_parse_mcfg (struct acpi_table_header *header);
 void acpi_table_print_madt_entry (struct acpi_subtable_header *madt);
 
-- 
cgit v1.2.3


From e2aa19fadd718d7dd920a3994118863861a4b61e Mon Sep 17 00:00:00 2001
From: Nathan Hintz <nlhintz@hotmail.com>
Date: Thu, 10 Jan 2013 17:54:09 +0100
Subject: bcma: return the mips irq number in bcma_core_irq

The irq signal numbers that are send by the cpu are increased by 2 from
the number programmed into the mips core by bcma.
Return the irq number on which the irqs are send in bcma_core_irq() now.

Signed-off-by: Nathan Hintz <nlhintz@hotmail.com>
Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 arch/mips/bcm47xx/serial.c            |  2 +-
 drivers/bcma/driver_chipcommon.c      |  2 +-
 drivers/bcma/driver_mips.c            | 12 +++++++++---
 drivers/bcma/driver_pci_host.c        |  4 ++--
 include/linux/bcma/bcma_driver_mips.h |  2 +-
 5 files changed, 14 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/arch/mips/bcm47xx/serial.c b/arch/mips/bcm47xx/serial.c
index 57981e4fe2bc..b8ef965705cf 100644
--- a/arch/mips/bcm47xx/serial.c
+++ b/arch/mips/bcm47xx/serial.c
@@ -62,7 +62,7 @@ static int __init uart8250_init_bcma(void)
 
 		p->mapbase = (unsigned int) bcma_port->regs;
 		p->membase = (void *) bcma_port->regs;
-		p->irq = bcma_port->irq + 2;
+		p->irq = bcma_port->irq;
 		p->uartclk = bcma_port->baud_base;
 		p->regshift = bcma_port->reg_shift;
 		p->iotype = UPIO_MEM;
diff --git a/drivers/bcma/driver_chipcommon.c b/drivers/bcma/driver_chipcommon.c
index e461ad25fda4..28fa50ad87be 100644
--- a/drivers/bcma/driver_chipcommon.c
+++ b/drivers/bcma/driver_chipcommon.c
@@ -329,7 +329,7 @@ void bcma_chipco_serial_init(struct bcma_drv_cc *cc)
 		return;
 	}
 
-	irq = bcma_core_mips_irq(cc->core);
+	irq = bcma_core_irq(cc->core);
 
 	/* Determine the registers of the UARTs */
 	cc->nr_serial_ports = (cc->capabilities & BCMA_CC_CAP_NRUART);
diff --git a/drivers/bcma/driver_mips.c b/drivers/bcma/driver_mips.c
index 90f20a247725..a808404db4b1 100644
--- a/drivers/bcma/driver_mips.c
+++ b/drivers/bcma/driver_mips.c
@@ -85,7 +85,7 @@ static u32 bcma_core_mips_irqflag(struct bcma_device *dev)
  * If disabled, 5 is returned.
  * If not supported, 6 is returned.
  */
-unsigned int bcma_core_mips_irq(struct bcma_device *dev)
+static unsigned int bcma_core_mips_irq(struct bcma_device *dev)
 {
 	struct bcma_device *mdev = dev->bus->drv_mips.core;
 	u32 irqflag;
@@ -102,7 +102,13 @@ unsigned int bcma_core_mips_irq(struct bcma_device *dev)
 
 	return 5;
 }
-EXPORT_SYMBOL(bcma_core_mips_irq);
+
+unsigned int bcma_core_irq(struct bcma_device *dev)
+{
+	unsigned int mips_irq = bcma_core_mips_irq(dev);
+	return mips_irq <= 4 ? mips_irq + 2 : 0;
+}
+EXPORT_SYMBOL(bcma_core_irq);
 
 static void bcma_core_mips_set_irq(struct bcma_device *dev, unsigned int irq)
 {
@@ -299,7 +305,7 @@ void bcma_core_mips_init(struct bcma_drv_mips *mcore)
 		break;
 	default:
 		list_for_each_entry(core, &bus->cores, list) {
-			core->irq = bcma_core_mips_irq(core) + 2;
+			core->irq = bcma_core_irq(core);
 		}
 		bcma_err(bus,
 			 "Unknown device (0x%x) found, can not configure IRQs\n",
diff --git a/drivers/bcma/driver_pci_host.c b/drivers/bcma/driver_pci_host.c
index e6b5c89469dc..ef9f0938da77 100644
--- a/drivers/bcma/driver_pci_host.c
+++ b/drivers/bcma/driver_pci_host.c
@@ -577,7 +577,7 @@ int bcma_core_pci_plat_dev_init(struct pci_dev *dev)
 	pr_info("PCI: Fixing up device %s\n", pci_name(dev));
 
 	/* Fix up interrupt lines */
-	dev->irq = bcma_core_mips_irq(pc_host->pdev->core) + 2;
+	dev->irq = bcma_core_irq(pc_host->pdev->core);
 	pci_write_config_byte(dev, PCI_INTERRUPT_LINE, dev->irq);
 
 	return 0;
@@ -596,6 +596,6 @@ int bcma_core_pci_pcibios_map_irq(const struct pci_dev *dev)
 
 	pc_host = container_of(dev->bus->ops, struct bcma_drv_pci_host,
 			       pci_ops);
-	return bcma_core_mips_irq(pc_host->pdev->core) + 2;
+	return bcma_core_irq(pc_host->pdev->core);
 }
 EXPORT_SYMBOL(bcma_core_pci_pcibios_map_irq);
diff --git a/include/linux/bcma/bcma_driver_mips.h b/include/linux/bcma/bcma_driver_mips.h
index 6495579e3f35..73c7f4b882cc 100644
--- a/include/linux/bcma/bcma_driver_mips.h
+++ b/include/linux/bcma/bcma_driver_mips.h
@@ -48,6 +48,6 @@ static inline void bcma_core_mips_early_init(struct bcma_drv_mips *mcore) { }
 
 extern u32 bcma_cpu_clock(struct bcma_drv_mips *mcore);
 
-extern unsigned int bcma_core_mips_irq(struct bcma_device *dev);
+extern unsigned int bcma_core_irq(struct bcma_device *core);
 
 #endif /* LINUX_BCMA_DRIVER_MIPS_H_ */
-- 
cgit v1.2.3


From 990debe2ca8379863709721926550a55f47f3880 Mon Sep 17 00:00:00 2001
From: Nathan Hintz <nlhintz@hotmail.com>
Date: Thu, 10 Jan 2013 22:24:03 -0800
Subject: bcma: update pci configuration for bcm4706/bcm4716

Update the PCI configuration for BCM4706 and BCM4716 per the 2011
Broadcom SDK.

Signed-off-by: Nathan Hintz <nlhintz@hotmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/bcma/driver_pci_host.c       | 13 ++++++++++++-
 include/linux/bcma/bcma_driver_pci.h |  2 ++
 2 files changed, 14 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/bcma/driver_pci_host.c b/drivers/bcma/driver_pci_host.c
index ef9f0938da77..37d1777dcd47 100644
--- a/drivers/bcma/driver_pci_host.c
+++ b/drivers/bcma/driver_pci_host.c
@@ -427,7 +427,7 @@ void __devinit bcma_core_pci_hostmode_init(struct bcma_drv_pci *pc)
 	/* Reset RC */
 	usleep_range(3000, 5000);
 	pcicore_write32(pc, BCMA_CORE_PCI_CTL, BCMA_CORE_PCI_CTL_RST_OE);
-	usleep_range(1000, 2000);
+	msleep(50);
 	pcicore_write32(pc, BCMA_CORE_PCI_CTL, BCMA_CORE_PCI_CTL_RST |
 			BCMA_CORE_PCI_CTL_RST_OE);
 
@@ -489,6 +489,17 @@ void __devinit bcma_core_pci_hostmode_init(struct bcma_drv_pci *pc)
 
 	bcma_core_pci_enable_crs(pc);
 
+	if (bus->chipinfo.id == BCMA_CHIP_ID_BCM4706 ||
+	    bus->chipinfo.id == BCMA_CHIP_ID_BCM4716) {
+		u16 val16;
+		bcma_extpci_read_config(pc, 0, 0, BCMA_CORE_PCI_CFG_DEVCTRL,
+					&val16, sizeof(val16));
+		val16 |= (2 << 5);	/* Max payload size of 512 */
+		val16 |= (2 << 12);	/* MRRS 512 */
+		bcma_extpci_write_config(pc, 0, 0, BCMA_CORE_PCI_CFG_DEVCTRL,
+					 &val16, sizeof(val16));
+	}
+
 	/* Enable PCI bridge BAR0 memory & master access */
 	tmp = PCI_COMMAND_MASTER | PCI_COMMAND_MEMORY;
 	bcma_extpci_write_config(pc, 0, 0, PCI_COMMAND, &tmp, sizeof(tmp));
diff --git a/include/linux/bcma/bcma_driver_pci.h b/include/linux/bcma/bcma_driver_pci.h
index 41da581e1612..31232247a1ee 100644
--- a/include/linux/bcma/bcma_driver_pci.h
+++ b/include/linux/bcma/bcma_driver_pci.h
@@ -179,6 +179,8 @@ struct pci_dev;
 #define BCMA_CORE_PCI_CFG_FUN_MASK		7	/* Function mask */
 #define BCMA_CORE_PCI_CFG_OFF_MASK		0xfff	/* Register mask */
 
+#define BCMA_CORE_PCI_CFG_DEVCTRL		0xd8
+
 /* PCIE Root Capability Register bits (Host mode only) */
 #define BCMA_CORE_PCI_RC_CRS_VISIBILITY		0x0001
 
-- 
cgit v1.2.3


From 483f33f63c1cb3c6becb465bac7b75d7ff5e3b8f Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Fri, 4 Jan 2013 17:57:40 +0100
Subject: pinctrl: add pinconf-generic defines for output

This adds a definition of a generic output configuration
for a certain pin when using the generic pin configuration
library. Whereas driving pins low/high is usually a GPIO
business, you may want to set up pins into a default state
using hogs, and never touch them again. This helps out
with that scenario.

Based on a patch from Patrice Chotard.

Signed-off-by: Patrice Chotard <patrice.chotard@stericsson.com>
Reviewed-by: Stephen Warren <swarren@nvidia.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/pinconf-generic.c       | 1 +
 include/linux/pinctrl/pinconf-generic.h | 3 +++
 2 files changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/pinctrl/pinconf-generic.c b/drivers/pinctrl/pinconf-generic.c
index 833a36458157..bcf8157ceea7 100644
--- a/drivers/pinctrl/pinconf-generic.c
+++ b/drivers/pinctrl/pinconf-generic.c
@@ -46,6 +46,7 @@ struct pin_config_item conf_items[] = {
 	PCONFDUMP(PIN_CONFIG_INPUT_DEBOUNCE, "input debounce", "time units"),
 	PCONFDUMP(PIN_CONFIG_POWER_SOURCE, "pin power source", "selector"),
 	PCONFDUMP(PIN_CONFIG_LOW_POWER_MODE, "pin low power", "mode"),
+	PCONFDUMP(PIN_CONFIG_OUTPUT, "pin output", "level"),
 };
 
 void pinconf_generic_dump_pin(struct pinctrl_dev *pctldev,
diff --git a/include/linux/pinctrl/pinconf-generic.h b/include/linux/pinctrl/pinconf-generic.h
index 47a1bdd88878..b23d99da3b4b 100644
--- a/include/linux/pinctrl/pinconf-generic.h
+++ b/include/linux/pinctrl/pinconf-generic.h
@@ -62,6 +62,8 @@
  *	operation, if several modes of operation are supported these can be
  *	passed in the argument on a custom form, else just use argument 1
  *	to indicate low power mode, argument 0 turns low power mode off.
+ * @PIN_CONFIG_OUTPUT: this will configure the pin in output, use argument
+ *	1 to indicate high level, argument 0 to indicate low level.
  * @PIN_CONFIG_END: this is the last enumerator for pin configurations, if
  *	you need to pass in custom configurations to the pin controller, use
  *	PIN_CONFIG_END+1 as the base offset.
@@ -79,6 +81,7 @@ enum pin_config_param {
 	PIN_CONFIG_INPUT_DEBOUNCE,
 	PIN_CONFIG_POWER_SOURCE,
 	PIN_CONFIG_LOW_POWER_MODE,
+	PIN_CONFIG_OUTPUT,
 	PIN_CONFIG_END = 0x7FFF,
 };
 
-- 
cgit v1.2.3


From e7219858ac1f98213a4714d0e24e7a003e1bf6a2 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki / 吉藤英明 <yoshfuji@linux-ipv6.org>
Date: Sun, 13 Jan 2013 05:02:01 +0000
Subject: ipv6: Use ipv6_get_dsfield() instead of ipv6_tclass().

Commit 7a3198a8 ("ipv6: helper function to get tclass") introduced
ipv6_tclass(), but similar function is already available as
ipv6_get_dsfield().

We might be able to call ipv6_tclass() from ipv6_get_dsfield(),
but it is confusing to have two versions.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ipv6.h | 5 -----
 net/ipv6/datagram.c  | 3 ++-
 net/ipv6/tcp_ipv6.c  | 6 +++---
 3 files changed, 5 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index faed1e357dd6..304a9f46b578 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -77,11 +77,6 @@ static inline struct ipv6hdr *ipipv6_hdr(const struct sk_buff *skb)
 	return (struct ipv6hdr *)skb_transport_header(skb);
 }
 
-static inline __u8 ipv6_tclass(const struct ipv6hdr *iph)
-{
-	return (ntohl(*(__be32 *)iph) >> 20) & 0xff;
-}
-
 /* 
    This structure contains results of exthdrs parsing
    as offsets from skb->nh.
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 7f65df41c396..33be36398a78 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -30,6 +30,7 @@
 #include <net/transp_v6.h>
 #include <net/ip6_route.h>
 #include <net/tcp_states.h>
+#include <net/dsfield.h>
 
 #include <linux/errqueue.h>
 #include <asm/uaccess.h>
@@ -487,7 +488,7 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
 	}
 
 	if (np->rxopt.bits.rxtclass) {
-		int tclass = ipv6_tclass(ipv6_hdr(skb));
+		int tclass = ipv6_get_dsfield(ipv6_hdr(skb));
 		put_cmsg(msg, SOL_IPV6, IPV6_TCLASS, sizeof(tclass), &tclass);
 	}
 
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 3164ad272a74..3701c3c6e2eb 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1163,7 +1163,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 		newnp->opt	   = NULL;
 		newnp->mcast_oif   = inet6_iif(skb);
 		newnp->mcast_hops  = ipv6_hdr(skb)->hop_limit;
-		newnp->rcv_tclass  = ipv6_tclass(ipv6_hdr(skb));
+		newnp->rcv_tclass  = ipv6_get_dsfield(ipv6_hdr(skb));
 
 		/*
 		 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
@@ -1243,7 +1243,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 	newnp->opt	  = NULL;
 	newnp->mcast_oif  = inet6_iif(skb);
 	newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
-	newnp->rcv_tclass = ipv6_tclass(ipv6_hdr(skb));
+	newnp->rcv_tclass = ipv6_get_dsfield(ipv6_hdr(skb));
 
 	/* Clone native IPv6 options from listening socket (if any)
 
@@ -1456,7 +1456,7 @@ ipv6_pktoptions:
 		if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
 			np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
 		if (np->rxopt.bits.rxtclass)
-			np->rcv_tclass = ipv6_tclass(ipv6_hdr(skb));
+			np->rcv_tclass = ipv6_get_dsfield(ipv6_hdr(skb));
 		if (ipv6_opt_accepted(sk, opt_skb)) {
 			skb_set_owner_r(opt_skb, sk);
 			opt_skb = xchg(&np->pktoptions, opt_skb);
-- 
cgit v1.2.3


From dd3332bfcb2223458f553f341d3388cb84040e6a Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki / 吉藤英明 <yoshfuji@linux-ipv6.org>
Date: Sun, 13 Jan 2013 05:02:45 +0000
Subject: ipv6: Store Router Alert option in IP6CB directly.

Router Alert option is very small and we can store the value
itself in the skb.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ipv6.h      | 3 ++-
 include/uapi/linux/ipv6.h | 2 ++
 net/ipv6/exthdrs.c        | 3 ++-
 net/ipv6/ip6_input.c      | 5 ++---
 4 files changed, 8 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 304a9f46b578..e971e3742172 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -84,7 +84,7 @@ static inline struct ipv6hdr *ipipv6_hdr(const struct sk_buff *skb)
 
 struct inet6_skb_parm {
 	int			iif;
-	__u16			ra;
+	__be16			ra;
 	__u16			hop;
 	__u16			dst0;
 	__u16			srcrt;
@@ -100,6 +100,7 @@ struct inet6_skb_parm {
 #define IP6SKB_XFRM_TRANSFORMED	1
 #define IP6SKB_FORWARDED	2
 #define IP6SKB_REROUTED		4
+#define IP6SKB_ROUTERALERT	8
 };
 
 #define IP6CB(skb)	((struct inet6_skb_parm*)((skb)->cb))
diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h
index 5a2991cf0251..4bda4cf5b0f5 100644
--- a/include/uapi/linux/ipv6.h
+++ b/include/uapi/linux/ipv6.h
@@ -63,6 +63,8 @@ struct ipv6_opt_hdr {
 #define ipv6_destopt_hdr ipv6_opt_hdr
 #define ipv6_hopopt_hdr  ipv6_opt_hdr
 
+/* Router Alert option values (RFC2711) */
+#define IPV6_OPT_ROUTERALERT_MLD	0x0000	/* MLD(RFC2710) */
 
 /*
  *	routing header type 0 (used in cmsghdr struct)
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 473f628f9f20..07a7d65a7cb6 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -553,7 +553,8 @@ static bool ipv6_hop_ra(struct sk_buff *skb, int optoff)
 	const unsigned char *nh = skb_network_header(skb);
 
 	if (nh[optoff + 1] == 2) {
-		IP6CB(skb)->ra = optoff;
+		IP6CB(skb)->flags |= IP6SKB_ROUTERALERT;
+		memcpy(&IP6CB(skb)->ra, nh + optoff + 2, sizeof(IP6CB(skb)->ra));
 		return true;
 	}
 	LIMIT_NETDEBUG(KERN_DEBUG "ipv6_hop_ra: wrong RA length %d\n",
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 2ccd35ec3628..4ac5bf30e16a 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -280,9 +280,8 @@ int ip6_mc_input(struct sk_buff *skb)
 		struct inet6_skb_parm *opt = IP6CB(skb);
 
 		/* Check for MLD */
-		if (unlikely(opt->ra)) {
+		if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
 			/* Check if this is a mld message */
-			u8 *ptr = skb_network_header(skb) + opt->ra;
 			u8 nexthdr = hdr->nexthdr;
 			__be16 frag_off;
 			int offset;
@@ -290,7 +289,7 @@ int ip6_mc_input(struct sk_buff *skb)
 			/* Check if the value of Router Alert
 			 * is for MLD (0x0000).
 			 */
-			if ((ptr[2] | ptr[3]) == 0) {
+			if (opt->ra == htons(IPV6_OPT_ROUTERALERT_MLD)) {
 				deliver = false;
 
 				if (!ipv6_ext_hdr(nexthdr)) {
-- 
cgit v1.2.3


From 25d46f43a911b08c5aa8c8fd4fe7fa9b36445068 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki / 吉藤英明 <yoshfuji@linux-ipv6.org>
Date: Sun, 13 Jan 2013 16:02:06 +0000
Subject: ipv6: Move comment to right place.

IN6ADDR_* and in6addr_* are not exported to userspace, and are defined
in include/linux/in6.h.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/in6.h      | 4 ++++
 include/uapi/linux/in6.h | 5 -----
 2 files changed, 4 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/in6.h b/include/linux/in6.h
index 9e2ae26fb598..a16e19349ec0 100644
--- a/include/linux/in6.h
+++ b/include/linux/in6.h
@@ -22,6 +22,10 @@
 
 #include <uapi/linux/in6.h>
 
+/* IPv6 Wildcard Address (::) and Loopback Address (::1) defined in RFC2553
+ * NOTE: Be aware the IN6ADDR_* constants and in6addr_* externals are defined
+ * in network byte order, not in host byte order as are the IPv4 equivalents
+ */
 extern const struct in6_addr in6addr_any;
 #define IN6ADDR_ANY_INIT { { { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } } }
 extern const struct in6_addr in6addr_loopback;
diff --git a/include/uapi/linux/in6.h b/include/uapi/linux/in6.h
index f79c3721da6e..5673b97dcf54 100644
--- a/include/uapi/linux/in6.h
+++ b/include/uapi/linux/in6.h
@@ -38,11 +38,6 @@ struct in6_addr {
 #define s6_addr32		in6_u.u6_addr32
 };
 
-/* IPv6 Wildcard Address (::) and Loopback Address (::1) defined in RFC2553
- * NOTE: Be aware the IN6ADDR_* constants and in6addr_* externals are defined
- * in network byte order, not in host byte order as are the IPv4 equivalents
- */
-
 struct sockaddr_in6 {
 	unsigned short int	sin6_family;    /* AF_INET6 */
 	__be16			sin6_port;      /* Transport layer port # */
-- 
cgit v1.2.3


From 6bf2e5461479c4511f59946a7378db576b04dbc5 Mon Sep 17 00:00:00 2001
From: Nathan Hintz <nlhintz@hotmail.com>
Date: Fri, 11 Jan 2013 22:07:22 -0800
Subject: bcma: fix bcm4716/bcm4748 i2s irqflag

The default irqflag assignment for the I2S core on some Broadcom
4716/4748 devices is invalid and needs to be corrected (from the
Broadcom SDK).

Signed-off-by: Nathan Hintz <nlhintz@hotmail.com>
Acked-by: Hauke Mehrtens <hauke@hauke-m.de>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/bcma/driver_mips.c            | 28 ++++++++++++++++++++++++++++
 include/linux/bcma/bcma_driver_mips.h |  1 +
 2 files changed, 29 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/bcma/driver_mips.c b/drivers/bcma/driver_mips.c
index a808404db4b1..9fe86ee16c66 100644
--- a/drivers/bcma/driver_mips.c
+++ b/drivers/bcma/driver_mips.c
@@ -256,6 +256,32 @@ void bcma_core_mips_early_init(struct bcma_drv_mips *mcore)
 	mcore->early_setup_done = true;
 }
 
+static void bcma_fix_i2s_irqflag(struct bcma_bus *bus)
+{
+	struct bcma_device *cpu, *pcie, *i2s;
+
+	/* Fixup the interrupts in 4716/4748 for i2s core (2010 Broadcom SDK)
+	 * (IRQ flags > 7 are ignored when setting the interrupt masks)
+	 */
+	if (bus->chipinfo.id != BCMA_CHIP_ID_BCM4716 &&
+	    bus->chipinfo.id != BCMA_CHIP_ID_BCM4748)
+		return;
+
+	cpu = bcma_find_core(bus, BCMA_CORE_MIPS_74K);
+	pcie = bcma_find_core(bus, BCMA_CORE_PCIE);
+	i2s = bcma_find_core(bus, BCMA_CORE_I2S);
+	if (cpu && pcie && i2s &&
+	    bcma_aread32(cpu, BCMA_MIPS_OOBSELINA74) == 0x08060504 &&
+	    bcma_aread32(pcie, BCMA_MIPS_OOBSELINA74) == 0x08060504 &&
+	    bcma_aread32(i2s, BCMA_MIPS_OOBSELOUTA30) == 0x88) {
+		bcma_awrite32(cpu, BCMA_MIPS_OOBSELINA74, 0x07060504);
+		bcma_awrite32(pcie, BCMA_MIPS_OOBSELINA74, 0x07060504);
+		bcma_awrite32(i2s, BCMA_MIPS_OOBSELOUTA30, 0x87);
+		bcma_debug(bus,
+			   "Moved i2s interrupt to oob line 7 instead of 8\n");
+	}
+}
+
 void bcma_core_mips_init(struct bcma_drv_mips *mcore)
 {
 	struct bcma_bus *bus;
@@ -269,6 +295,8 @@ void bcma_core_mips_init(struct bcma_drv_mips *mcore)
 
 	bcma_core_mips_early_init(mcore);
 
+	bcma_fix_i2s_irqflag(bus);
+
 	switch (bus->chipinfo.id) {
 	case BCMA_CHIP_ID_BCM4716:
 	case BCMA_CHIP_ID_BCM4748:
diff --git a/include/linux/bcma/bcma_driver_mips.h b/include/linux/bcma/bcma_driver_mips.h
index 73c7f4b882cc..0d1ea297851a 100644
--- a/include/linux/bcma/bcma_driver_mips.h
+++ b/include/linux/bcma/bcma_driver_mips.h
@@ -28,6 +28,7 @@
 #define BCMA_MIPS_MIPS74K_GPIOEN	0x0048
 #define BCMA_MIPS_MIPS74K_CLKCTLST	0x01E0
 
+#define BCMA_MIPS_OOBSELINA74		0x004
 #define BCMA_MIPS_OOBSELOUTA30		0x100
 
 struct bcma_device;
-- 
cgit v1.2.3


From f9a8f83b04e0c362a2fc660dbad980d24af209fc Mon Sep 17 00:00:00 2001
From: Florian Fainelli <florian@openwrt.org>
Date: Mon, 14 Jan 2013 00:52:52 +0000
Subject: net: phy: remove flags argument from phy_{attach, connect,
 connect_direct}

The flags argument of the phy_{attach,connect,connect_direct} functions
is then used to assign a struct phy_device dev_flags with its value.
All callers but the tg3 driver pass the flag 0, which results in the
underlying PHY drivers in drivers/net/phy/ not being able to actually
use any of the flags they would set in dev_flags. This patch gets rid of
the flags argument, and passes phydev->dev_flags to the internal PHY
library call phy_attach_direct() such that drivers which actually modify
a phy device dev_flags get the value preserved for use by the underlying
phy driver.

Acked-by: Kosta Zertsekel <konszert@marvell.com>
Signed-off-by: Florian Fainelli <florian@openwrt.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/phy.txt                  | 11 ++++++-----
 drivers/net/ethernet/8390/ax88796.c               |  2 +-
 drivers/net/ethernet/adi/bfin_mac.c               |  4 ++--
 drivers/net/ethernet/aeroflex/greth.c             |  4 +---
 drivers/net/ethernet/amd/au1000_eth.c             |  4 ++--
 drivers/net/ethernet/broadcom/bcm63xx_enet.c      |  2 +-
 drivers/net/ethernet/broadcom/sb1250-mac.c        |  2 +-
 drivers/net/ethernet/broadcom/tg3.c               |  4 ++--
 drivers/net/ethernet/cadence/macb.c               |  2 +-
 drivers/net/ethernet/dnet.c                       |  4 ++--
 drivers/net/ethernet/ethoc.c                      |  4 ++--
 drivers/net/ethernet/faraday/ftgmac100.c          |  3 +--
 drivers/net/ethernet/freescale/fec.c              |  2 +-
 drivers/net/ethernet/lantiq_etop.c                |  4 ++--
 drivers/net/ethernet/marvell/mv643xx_eth.c        |  2 +-
 drivers/net/ethernet/marvell/pxa168_eth.c         |  2 +-
 drivers/net/ethernet/nxp/lpc_eth.c                |  2 +-
 drivers/net/ethernet/rdc/r6040.c                  |  2 +-
 drivers/net/ethernet/renesas/sh_eth.c             |  2 +-
 drivers/net/ethernet/s6gmac.c                     |  2 +-
 drivers/net/ethernet/smsc/smsc911x.c              |  5 ++---
 drivers/net/ethernet/smsc/smsc9420.c              |  2 +-
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c |  3 +--
 drivers/net/ethernet/ti/cpmac.c                   |  4 ++--
 drivers/net/ethernet/ti/cpsw.c                    |  2 +-
 drivers/net/ethernet/ti/davinci_emac.c            |  2 +-
 drivers/net/ethernet/toshiba/tc35815.c            |  5 ++---
 drivers/net/ethernet/xscale/ixp4xx_eth.c          |  2 +-
 drivers/net/phy/phy_device.c                      | 15 ++++++---------
 drivers/net/usb/ax88172a.c                        |  2 +-
 drivers/of/of_mdio.c                              |  4 ++--
 drivers/staging/et131x/et131x.c                   |  2 +-
 include/linux/phy.h                               |  6 +++---
 net/dsa/slave.c                                   |  2 +-
 34 files changed, 56 insertions(+), 64 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/networking/phy.txt b/Documentation/networking/phy.txt
index 95e5f5985a2a..d5b1a3935245 100644
--- a/Documentation/networking/phy.txt
+++ b/Documentation/networking/phy.txt
@@ -103,7 +103,7 @@ Letting the PHY Abstraction Layer do Everything
  
  Now, to connect, just call this function:
  
-   phydev = phy_connect(dev, phy_name, &adjust_link, flags, interface);
+   phydev = phy_connect(dev, phy_name, &adjust_link, interface);
 
  phydev is a pointer to the phy_device structure which represents the PHY.  If
  phy_connect is successful, it will return the pointer.  dev, here, is the
@@ -113,7 +113,9 @@ Letting the PHY Abstraction Layer do Everything
  current state, though the PHY will not yet be truly operational at this
  point.
 
- flags is a u32 which can optionally contain phy-specific flags.
+ PHY-specific flags should be set in phydev->dev_flags prior to the call
+ to phy_connect() such that the underlying PHY driver can check for flags
+ and perform specific operations based on them.
  This is useful if the system has put hardware restrictions on
  the PHY/controller, of which the PHY needs to be aware.
 
@@ -185,11 +187,10 @@ Doing it all yourself
    start, or disables then frees them for stop.
 
  struct phy_device * phy_attach(struct net_device *dev, const char *phy_id,
-		 u32 flags, phy_interface_t interface);
+		 phy_interface_t interface);
 
    Attaches a network device to a particular PHY, binding the PHY to a generic
-   driver if none was found during bus initialization.  Passes in
-   any phy-specific flags as needed.
+   driver if none was found during bus initialization.
 
  int phy_start_aneg(struct phy_device *phydev);
    
diff --git a/drivers/net/ethernet/8390/ax88796.c b/drivers/net/ethernet/8390/ax88796.c
index 7eeddf01307f..cab306a9888e 100644
--- a/drivers/net/ethernet/8390/ax88796.c
+++ b/drivers/net/ethernet/8390/ax88796.c
@@ -358,7 +358,7 @@ static int ax_mii_probe(struct net_device *dev)
 		return -ENODEV;
 	}
 
-	ret = phy_connect_direct(dev, phy_dev, ax_handle_link_change, 0,
+	ret = phy_connect_direct(dev, phy_dev, ax_handle_link_change,
 				 PHY_INTERFACE_MODE_MII);
 	if (ret) {
 		netdev_err(dev, "Could not attach to PHY\n");
diff --git a/drivers/net/ethernet/adi/bfin_mac.c b/drivers/net/ethernet/adi/bfin_mac.c
index c7a83f6f2382..a175d0be1ae1 100644
--- a/drivers/net/ethernet/adi/bfin_mac.c
+++ b/drivers/net/ethernet/adi/bfin_mac.c
@@ -425,8 +425,8 @@ static int mii_probe(struct net_device *dev, int phy_mode)
 		return -EINVAL;
 	}
 
-	phydev = phy_connect(dev, dev_name(&phydev->dev), &bfin_mac_adjust_link,
-			0, phy_mode);
+	phydev = phy_connect(dev, dev_name(&phydev->dev),
+			     &bfin_mac_adjust_link, phy_mode);
 
 	if (IS_ERR(phydev)) {
 		netdev_err(dev, "could not attach PHY\n");
diff --git a/drivers/net/ethernet/aeroflex/greth.c b/drivers/net/ethernet/aeroflex/greth.c
index 480662ba5227..0be2195e5034 100644
--- a/drivers/net/ethernet/aeroflex/greth.c
+++ b/drivers/net/ethernet/aeroflex/greth.c
@@ -1288,9 +1288,7 @@ static int greth_mdio_probe(struct net_device *dev)
 	}
 
 	ret = phy_connect_direct(dev, phy, &greth_link_change,
-			0, greth->gbit_mac ?
-			PHY_INTERFACE_MODE_GMII :
-			PHY_INTERFACE_MODE_MII);
+				 greth->gbit_mac ? PHY_INTERFACE_MODE_GMII : PHY_INTERFACE_MODE_MII);
 	if (ret) {
 		if (netif_msg_ifup(greth))
 			dev_err(&dev->dev, "could not attach to PHY\n");
diff --git a/drivers/net/ethernet/amd/au1000_eth.c b/drivers/net/ethernet/amd/au1000_eth.c
index 65b865a0cc78..de774d419144 100644
--- a/drivers/net/ethernet/amd/au1000_eth.c
+++ b/drivers/net/ethernet/amd/au1000_eth.c
@@ -437,8 +437,8 @@ static int au1000_mii_probe(struct net_device *dev)
 	/* now we are supposed to have a proper phydev, to attach to... */
 	BUG_ON(phydev->attached_dev);
 
-	phydev = phy_connect(dev, dev_name(&phydev->dev), &au1000_adjust_link,
-			0, PHY_INTERFACE_MODE_MII);
+	phydev = phy_connect(dev, dev_name(&phydev->dev),
+			     &au1000_adjust_link, PHY_INTERFACE_MODE_MII);
 
 	if (IS_ERR(phydev)) {
 		netdev_err(dev, "Could not attach to PHY\n");
diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c
index d8a151046728..f5b6b4715d45 100644
--- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c
+++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c
@@ -799,7 +799,7 @@ static int bcm_enet_open(struct net_device *dev)
 		snprintf(phy_id, sizeof(phy_id), PHY_ID_FMT,
 			 priv->mii_bus->id, priv->phy_id);
 
-		phydev = phy_connect(dev, phy_id, bcm_enet_adjust_phy_link, 0,
+		phydev = phy_connect(dev, phy_id, bcm_enet_adjust_phy_link,
 				     PHY_INTERFACE_MODE_MII);
 
 		if (IS_ERR(phydev)) {
diff --git a/drivers/net/ethernet/broadcom/sb1250-mac.c b/drivers/net/ethernet/broadcom/sb1250-mac.c
index 3a1c8a3cf7c9..e9b35da375cb 100644
--- a/drivers/net/ethernet/broadcom/sb1250-mac.c
+++ b/drivers/net/ethernet/broadcom/sb1250-mac.c
@@ -2385,7 +2385,7 @@ static int sbmac_mii_probe(struct net_device *dev)
 		return -ENXIO;
 	}
 
-	phy_dev = phy_connect(dev, dev_name(&phy_dev->dev), &sbmac_mii_poll, 0,
+	phy_dev = phy_connect(dev, dev_name(&phy_dev->dev), &sbmac_mii_poll,
 			      PHY_INTERFACE_MODE_GMII);
 	if (IS_ERR(phy_dev)) {
 		printk(KERN_ERR "%s: could not attach to PHY\n", dev->name);
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index 88f2d41c009b..227749107789 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -2004,8 +2004,8 @@ static int tg3_phy_init(struct tg3 *tp)
 	phydev = tp->mdio_bus->phy_map[TG3_PHY_MII_ADDR];
 
 	/* Attach the MAC to the PHY. */
-	phydev = phy_connect(tp->dev, dev_name(&phydev->dev), tg3_adjust_link,
-			     phydev->dev_flags, phydev->interface);
+	phydev = phy_connect(tp->dev, dev_name(&phydev->dev),
+			     tg3_adjust_link, phydev->interface);
 	if (IS_ERR(phydev)) {
 		dev_err(&tp->pdev->dev, "Could not attach to PHY\n");
 		return PTR_ERR(phydev);
diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c
index a9b0830fb39d..352190b9ebe7 100644
--- a/drivers/net/ethernet/cadence/macb.c
+++ b/drivers/net/ethernet/cadence/macb.c
@@ -287,7 +287,7 @@ static int macb_mii_probe(struct net_device *dev)
 	}
 
 	/* attach the mac to the phy */
-	ret = phy_connect_direct(dev, phydev, &macb_handle_link_change, 0,
+	ret = phy_connect_direct(dev, phydev, &macb_handle_link_change,
 				 bp->phy_interface);
 	if (ret) {
 		netdev_err(dev, "Could not attach to PHY\n");
diff --git a/drivers/net/ethernet/dnet.c b/drivers/net/ethernet/dnet.c
index 2c177b329c8b..f3d60eb13c3a 100644
--- a/drivers/net/ethernet/dnet.c
+++ b/drivers/net/ethernet/dnet.c
@@ -281,11 +281,11 @@ static int dnet_mii_probe(struct net_device *dev)
 	/* attach the mac to the phy */
 	if (bp->capabilities & DNET_HAS_RMII) {
 		phydev = phy_connect(dev, dev_name(&phydev->dev),
-				     &dnet_handle_link_change, 0,
+				     &dnet_handle_link_change,
 				     PHY_INTERFACE_MODE_RMII);
 	} else {
 		phydev = phy_connect(dev, dev_name(&phydev->dev),
-				     &dnet_handle_link_change, 0,
+				     &dnet_handle_link_change,
 				     PHY_INTERFACE_MODE_MII);
 	}
 
diff --git a/drivers/net/ethernet/ethoc.c b/drivers/net/ethernet/ethoc.c
index b51c81ac0b6f..aa47ef9689a8 100644
--- a/drivers/net/ethernet/ethoc.c
+++ b/drivers/net/ethernet/ethoc.c
@@ -682,8 +682,8 @@ static int ethoc_mdio_probe(struct net_device *dev)
 		return -ENXIO;
 	}
 
-	err = phy_connect_direct(dev, phy, ethoc_mdio_poll, 0,
-			PHY_INTERFACE_MODE_GMII);
+	err = phy_connect_direct(dev, phy, ethoc_mdio_poll,
+				 PHY_INTERFACE_MODE_GMII);
 	if (err) {
 		dev_err(&dev->dev, "could not attach to PHY\n");
 		return err;
diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c
index 96454b5fca63..7c361d1db94c 100644
--- a/drivers/net/ethernet/faraday/ftgmac100.c
+++ b/drivers/net/ethernet/faraday/ftgmac100.c
@@ -858,8 +858,7 @@ static int ftgmac100_mii_probe(struct ftgmac100 *priv)
 	}
 
 	phydev = phy_connect(netdev, dev_name(&phydev->dev),
-			     &ftgmac100_adjust_link, 0,
-			     PHY_INTERFACE_MODE_GMII);
+			     &ftgmac100_adjust_link, PHY_INTERFACE_MODE_GMII);
 
 	if (IS_ERR(phydev)) {
 		netdev_err(netdev, "%s: Could not attach to PHY\n", netdev->name);
diff --git a/drivers/net/ethernet/freescale/fec.c b/drivers/net/ethernet/freescale/fec.c
index 5f2b4acf4836..1b7684a8851e 100644
--- a/drivers/net/ethernet/freescale/fec.c
+++ b/drivers/net/ethernet/freescale/fec.c
@@ -1008,7 +1008,7 @@ static int fec_enet_mii_probe(struct net_device *ndev)
 	}
 
 	snprintf(phy_name, sizeof(phy_name), PHY_ID_FMT, mdio_bus_id, phy_id);
-	phy_dev = phy_connect(ndev, phy_name, &fec_enet_adjust_link, 0,
+	phy_dev = phy_connect(ndev, phy_name, &fec_enet_adjust_link,
 			      fep->phy_interface);
 	if (IS_ERR(phy_dev)) {
 		printk(KERN_ERR "%s: could not attach to PHY\n", ndev->name);
diff --git a/drivers/net/ethernet/lantiq_etop.c b/drivers/net/ethernet/lantiq_etop.c
index 8ead46adc21e..6a2127489af7 100644
--- a/drivers/net/ethernet/lantiq_etop.c
+++ b/drivers/net/ethernet/lantiq_etop.c
@@ -393,8 +393,8 @@ ltq_etop_mdio_probe(struct net_device *dev)
 		return -ENODEV;
 	}
 
-	phydev = phy_connect(dev, dev_name(&phydev->dev), &ltq_etop_mdio_link,
-			0, priv->pldata->mii_mode);
+	phydev = phy_connect(dev, dev_name(&phydev->dev),
+			     &ltq_etop_mdio_link, priv->pldata->mii_mode);
 
 	if (IS_ERR(phydev)) {
 		netdev_err(dev, "Could not attach to PHY\n");
diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c
index 84c13263c514..c27b23d8f4fc 100644
--- a/drivers/net/ethernet/marvell/mv643xx_eth.c
+++ b/drivers/net/ethernet/marvell/mv643xx_eth.c
@@ -2789,7 +2789,7 @@ static void phy_init(struct mv643xx_eth_private *mp, int speed, int duplex)
 
 	phy_reset(mp);
 
-	phy_attach(mp->dev, dev_name(&phy->dev), 0, PHY_INTERFACE_MODE_GMII);
+	phy_attach(mp->dev, dev_name(&phy->dev), PHY_INTERFACE_MODE_GMII);
 
 	if (speed == 0) {
 		phy->autoneg = AUTONEG_ENABLE;
diff --git a/drivers/net/ethernet/marvell/pxa168_eth.c b/drivers/net/ethernet/marvell/pxa168_eth.c
index c7f2fa60fe6f..037ed866c22f 100644
--- a/drivers/net/ethernet/marvell/pxa168_eth.c
+++ b/drivers/net/ethernet/marvell/pxa168_eth.c
@@ -1390,7 +1390,7 @@ static void phy_init(struct pxa168_eth_private *pep, int speed, int duplex)
 	struct phy_device *phy = pep->phy;
 	ethernet_phy_reset(pep);
 
-	phy_attach(pep->dev, dev_name(&phy->dev), 0, PHY_INTERFACE_MODE_MII);
+	phy_attach(pep->dev, dev_name(&phy->dev), PHY_INTERFACE_MODE_MII);
 
 	if (speed == 0) {
 		phy->autoneg = AUTONEG_ENABLE;
diff --git a/drivers/net/ethernet/nxp/lpc_eth.c b/drivers/net/ethernet/nxp/lpc_eth.c
index 6fda51ebcc76..c4122c86f829 100644
--- a/drivers/net/ethernet/nxp/lpc_eth.c
+++ b/drivers/net/ethernet/nxp/lpc_eth.c
@@ -800,7 +800,7 @@ static int lpc_mii_probe(struct net_device *ndev)
 	else
 		netdev_info(ndev, "using RMII interface\n");
 	phydev = phy_connect(ndev, dev_name(&phydev->dev),
-			     &lpc_handle_link_change, 0,
+			     &lpc_handle_link_change,
 			     lpc_phy_interface_mode(&pldat->pdev->dev));
 
 	if (IS_ERR(phydev)) {
diff --git a/drivers/net/ethernet/rdc/r6040.c b/drivers/net/ethernet/rdc/r6040.c
index be3616d060d9..34f76e99dc8a 100644
--- a/drivers/net/ethernet/rdc/r6040.c
+++ b/drivers/net/ethernet/rdc/r6040.c
@@ -1042,7 +1042,7 @@ static int r6040_mii_probe(struct net_device *dev)
 	}
 
 	phydev = phy_connect(dev, dev_name(&phydev->dev), &r6040_adjust_link,
-				0, PHY_INTERFACE_MODE_MII);
+			     PHY_INTERFACE_MODE_MII);
 
 	if (IS_ERR(phydev)) {
 		dev_err(&lp->pdev->dev, "could not attach to PHY\n");
diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index 3d705862bd7d..e195c1e89d61 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -1422,7 +1422,7 @@ static int sh_eth_phy_init(struct net_device *ndev)
 
 	/* Try connect to PHY */
 	phydev = phy_connect(ndev, phy_id, sh_eth_adjust_link,
-				0, mdp->phy_interface);
+			     mdp->phy_interface);
 	if (IS_ERR(phydev)) {
 		dev_err(&ndev->dev, "phy_connect failed\n");
 		return PTR_ERR(phydev);
diff --git a/drivers/net/ethernet/s6gmac.c b/drivers/net/ethernet/s6gmac.c
index 72fc57dd084d..21683e2b1ff4 100644
--- a/drivers/net/ethernet/s6gmac.c
+++ b/drivers/net/ethernet/s6gmac.c
@@ -795,7 +795,7 @@ static inline int s6gmac_phy_start(struct net_device *dev)
 	struct phy_device *p = NULL;
 	while ((i < PHY_MAX_ADDR) && (!(p = pd->mii.bus->phy_map[i])))
 		i++;
-	p = phy_connect(dev, dev_name(&p->dev), &s6gmac_adjust_link, 0,
+	p = phy_connect(dev, dev_name(&p->dev), &s6gmac_adjust_link,
 			PHY_INTERFACE_MODE_RGMII);
 	if (IS_ERR(p)) {
 		printk(KERN_ERR "%s: Could not attach to PHY\n", dev->name);
diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c
index 04ff63cb6544..da5cc9a3b34c 100644
--- a/drivers/net/ethernet/smsc/smsc911x.c
+++ b/drivers/net/ethernet/smsc/smsc911x.c
@@ -997,9 +997,8 @@ static int smsc911x_mii_probe(struct net_device *dev)
 	SMSC_TRACE(pdata, probe, "PHY: addr %d, phy_id 0x%08X",
 		   phydev->addr, phydev->phy_id);
 
-	ret = phy_connect_direct(dev, phydev,
-			&smsc911x_phy_adjust_link, 0,
-			pdata->config.phy_interface);
+	ret = phy_connect_direct(dev, phydev, &smsc911x_phy_adjust_link,
+				 pdata->config.phy_interface);
 
 	if (ret) {
 		netdev_err(dev, "Could not attach to PHY\n");
diff --git a/drivers/net/ethernet/smsc/smsc9420.c b/drivers/net/ethernet/smsc/smsc9420.c
index 3c586585e1b3..ecfb43614d7b 100644
--- a/drivers/net/ethernet/smsc/smsc9420.c
+++ b/drivers/net/ethernet/smsc/smsc9420.c
@@ -1179,7 +1179,7 @@ static int smsc9420_mii_probe(struct net_device *dev)
 		phydev->phy_id);
 
 	phydev = phy_connect(dev, dev_name(&phydev->dev),
-		smsc9420_phy_adjust_link, 0, PHY_INTERFACE_MODE_MII);
+			     smsc9420_phy_adjust_link, PHY_INTERFACE_MODE_MII);
 
 	if (IS_ERR(phydev)) {
 		pr_err("%s: Could not attach to PHY\n", dev->name);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index f07c0612abf6..8c657294ce56 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -428,8 +428,7 @@ static int stmmac_init_phy(struct net_device *dev)
 		 priv->plat->phy_addr);
 	pr_debug("stmmac_init_phy:  trying to attach to %s\n", phy_id_fmt);
 
-	phydev = phy_connect(dev, phy_id_fmt, &stmmac_adjust_link, 0,
-			     interface);
+	phydev = phy_connect(dev, phy_id_fmt, &stmmac_adjust_link, interface);
 
 	if (IS_ERR(phydev)) {
 		pr_err("%s: Could not attach to PHY\n", dev->name);
diff --git a/drivers/net/ethernet/ti/cpmac.c b/drivers/net/ethernet/ti/cpmac.c
index 70d1920cac97..31bbbca341a7 100644
--- a/drivers/net/ethernet/ti/cpmac.c
+++ b/drivers/net/ethernet/ti/cpmac.c
@@ -1172,8 +1172,8 @@ static int cpmac_probe(struct platform_device *pdev)
 	snprintf(priv->phy_name, MII_BUS_ID_SIZE, PHY_ID_FMT,
 						mdio_bus_id, phy_id);
 
-	priv->phy = phy_connect(dev, priv->phy_name, cpmac_adjust_link, 0,
-						PHY_INTERFACE_MODE_MII);
+	priv->phy = phy_connect(dev, priv->phy_name, cpmac_adjust_link,
+				PHY_INTERFACE_MODE_MII);
 
 	if (IS_ERR(priv->phy)) {
 		if (netif_msg_drv(priv))
diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index bea736b8c3ec..3772804fb697 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -592,7 +592,7 @@ static void cpsw_slave_open(struct cpsw_slave *slave, struct cpsw_priv *priv)
 			   1 << slave_port, 0, ALE_MCAST_FWD_2);
 
 	slave->phy = phy_connect(priv->ndev, slave->data->phy_id,
-				 &cpsw_adjust_link, 0, slave->data->phy_if);
+				 &cpsw_adjust_link, slave->data->phy_if);
 	if (IS_ERR(slave->phy)) {
 		dev_err(priv->dev, "phy %s not found on slave %d\n",
 			slave->data->phy_id, slave->slave_num);
diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c
index 6621ae3a98d9..8478d98c1092 100644
--- a/drivers/net/ethernet/ti/davinci_emac.c
+++ b/drivers/net/ethernet/ti/davinci_emac.c
@@ -1599,7 +1599,7 @@ static int emac_dev_open(struct net_device *ndev)
 
 	if (priv->phy_id && *priv->phy_id) {
 		priv->phydev = phy_connect(ndev, priv->phy_id,
-					   &emac_adjust_link, 0,
+					   &emac_adjust_link,
 					   PHY_INTERFACE_MODE_MII);
 
 		if (IS_ERR(priv->phydev)) {
diff --git a/drivers/net/ethernet/toshiba/tc35815.c b/drivers/net/ethernet/toshiba/tc35815.c
index f16410e599f4..fe256094db35 100644
--- a/drivers/net/ethernet/toshiba/tc35815.c
+++ b/drivers/net/ethernet/toshiba/tc35815.c
@@ -633,9 +633,8 @@ static int tc_mii_probe(struct net_device *dev)
 
 	/* attach the mac to the phy */
 	phydev = phy_connect(dev, dev_name(&phydev->dev),
-			     &tc_handle_link_change, 0,
-			     lp->chiptype == TC35815_TX4939 ?
-			     PHY_INTERFACE_MODE_RMII : PHY_INTERFACE_MODE_MII);
+			     &tc_handle_link_change,
+			     lp->chiptype == TC35815_TX4939 ? PHY_INTERFACE_MODE_RMII : PHY_INTERFACE_MODE_MII);
 	if (IS_ERR(phydev)) {
 		printk(KERN_ERR "%s: Could not attach to PHY\n", dev->name);
 		return PTR_ERR(phydev);
diff --git a/drivers/net/ethernet/xscale/ixp4xx_eth.c b/drivers/net/ethernet/xscale/ixp4xx_eth.c
index a4be1ad886c5..6958a5e87703 100644
--- a/drivers/net/ethernet/xscale/ixp4xx_eth.c
+++ b/drivers/net/ethernet/xscale/ixp4xx_eth.c
@@ -1451,7 +1451,7 @@ static int eth_init_one(struct platform_device *pdev)
 
 	snprintf(phy_id, MII_BUS_ID_SIZE + 3, PHY_ID_FMT,
 		mdio_bus->id, plat->phy);
-	port->phydev = phy_connect(dev, phy_id, &ixp4xx_adjust_link, 0,
+	port->phydev = phy_connect(dev, phy_id, &ixp4xx_adjust_link,
 				   PHY_INTERFACE_MODE_MII);
 	if (IS_ERR(port->phydev)) {
 		err = PTR_ERR(port->phydev);
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 8af46e88a181..9930f9999561 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -416,16 +416,15 @@ static void phy_prepare_link(struct phy_device *phydev,
  * @dev: the network device to connect
  * @phydev: the pointer to the phy device
  * @handler: callback function for state change notifications
- * @flags: PHY device's dev_flags
  * @interface: PHY device's interface
  */
 int phy_connect_direct(struct net_device *dev, struct phy_device *phydev,
-		       void (*handler)(struct net_device *), u32 flags,
+		       void (*handler)(struct net_device *),
 		       phy_interface_t interface)
 {
 	int rc;
 
-	rc = phy_attach_direct(dev, phydev, flags, interface);
+	rc = phy_attach_direct(dev, phydev, phydev->dev_flags, interface);
 	if (rc)
 		return rc;
 
@@ -443,7 +442,6 @@ EXPORT_SYMBOL(phy_connect_direct);
  * @dev: the network device to connect
  * @bus_id: the id string of the PHY device to connect
  * @handler: callback function for state change notifications
- * @flags: PHY device's dev_flags
  * @interface: PHY device's interface
  *
  * Description: Convenience function for connecting ethernet
@@ -455,7 +453,7 @@ EXPORT_SYMBOL(phy_connect_direct);
  *   the desired functionality.
  */
 struct phy_device * phy_connect(struct net_device *dev, const char *bus_id,
-		void (*handler)(struct net_device *), u32 flags,
+		void (*handler)(struct net_device *),
 		phy_interface_t interface)
 {
 	struct phy_device *phydev;
@@ -471,7 +469,7 @@ struct phy_device * phy_connect(struct net_device *dev, const char *bus_id,
 	}
 	phydev = to_phy_device(d);
 
-	rc = phy_connect_direct(dev, phydev, handler, flags, interface);
+	rc = phy_connect_direct(dev, phydev, handler, interface);
 	if (rc)
 		return ERR_PTR(rc);
 
@@ -576,14 +574,13 @@ static int phy_attach_direct(struct net_device *dev, struct phy_device *phydev,
  * phy_attach - attach a network device to a particular PHY device
  * @dev: network device to attach
  * @bus_id: Bus ID of PHY device to attach
- * @flags: PHY device's dev_flags
  * @interface: PHY device's interface
  *
  * Description: Same as phy_attach_direct() except that a PHY bus_id
  *     string is passed instead of a pointer to a struct phy_device.
  */
 struct phy_device *phy_attach(struct net_device *dev,
-		const char *bus_id, u32 flags, phy_interface_t interface)
+		const char *bus_id, phy_interface_t interface)
 {
 	struct bus_type *bus = &mdio_bus_type;
 	struct phy_device *phydev;
@@ -599,7 +596,7 @@ struct phy_device *phy_attach(struct net_device *dev,
 	}
 	phydev = to_phy_device(d);
 
-	rc = phy_attach_direct(dev, phydev, flags, interface);
+	rc = phy_attach_direct(dev, phydev, phydev->dev_flags, interface);
 	if (rc)
 		return ERR_PTR(rc);
 
diff --git a/drivers/net/usb/ax88172a.c b/drivers/net/usb/ax88172a.c
index c8e0aa85fb8e..fdbab72926bd 100644
--- a/drivers/net/usb/ax88172a.c
+++ b/drivers/net/usb/ax88172a.c
@@ -377,7 +377,7 @@ static int ax88172a_reset(struct usbnet *dev)
 
 	priv->phydev = phy_connect(dev->net, priv->phy_name,
 				   &ax88172a_adjust_link,
-				   0, PHY_INTERFACE_MODE_MII);
+				   PHY_INTERFACE_MODE_MII);
 	if (IS_ERR(priv->phydev)) {
 		netdev_err(dev->net, "Could not connect to PHY device %s\n",
 			   priv->phy_name);
diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c
index 83ca06f4312b..e3a8b22ef9dd 100644
--- a/drivers/of/of_mdio.c
+++ b/drivers/of/of_mdio.c
@@ -157,7 +157,7 @@ struct phy_device *of_phy_connect(struct net_device *dev,
 	if (!phy)
 		return NULL;
 
-	return phy_connect_direct(dev, phy, hndlr, flags, iface) ? NULL : phy;
+	return phy_connect_direct(dev, phy, hndlr, iface) ? NULL : phy;
 }
 EXPORT_SYMBOL(of_phy_connect);
 
@@ -194,7 +194,7 @@ struct phy_device *of_phy_connect_fixed_link(struct net_device *dev,
 
 	sprintf(bus_id, PHY_ID_FMT, "fixed-0", be32_to_cpu(phy_id[0]));
 
-	phy = phy_connect(dev, bus_id, hndlr, 0, iface);
+	phy = phy_connect(dev, bus_id, hndlr, iface);
 	return IS_ERR(phy) ? NULL : phy;
 }
 EXPORT_SYMBOL(of_phy_connect_fixed_link);
diff --git a/drivers/staging/et131x/et131x.c b/drivers/staging/et131x/et131x.c
index f15059ca3781..a0a30b3f2dcd 100644
--- a/drivers/staging/et131x/et131x.c
+++ b/drivers/staging/et131x/et131x.c
@@ -3917,7 +3917,7 @@ static int et131x_mii_probe(struct net_device *netdev)
 	}
 
 	phydev = phy_connect(netdev, dev_name(&phydev->dev),
-			&et131x_adjust_link, 0, PHY_INTERFACE_MODE_MII);
+			     &et131x_adjust_link, PHY_INTERFACE_MODE_MII);
 
 	if (IS_ERR(phydev)) {
 		dev_err(&adapter->pdev->dev, "Could not attach to PHY\n");
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 93b3cf77f564..33999adbf8c8 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -506,13 +506,13 @@ struct phy_device *get_phy_device(struct mii_bus *bus, int addr, bool is_c45);
 int phy_device_register(struct phy_device *phy);
 int phy_init_hw(struct phy_device *phydev);
 struct phy_device * phy_attach(struct net_device *dev,
-		const char *bus_id, u32 flags, phy_interface_t interface);
+		const char *bus_id, phy_interface_t interface);
 struct phy_device *phy_find_first(struct mii_bus *bus);
 int phy_connect_direct(struct net_device *dev, struct phy_device *phydev,
-		void (*handler)(struct net_device *), u32 flags,
+		void (*handler)(struct net_device *),
 		phy_interface_t interface);
 struct phy_device * phy_connect(struct net_device *dev, const char *bus_id,
-		void (*handler)(struct net_device *), u32 flags,
+		void (*handler)(struct net_device *),
 		phy_interface_t interface);
 void phy_disconnect(struct phy_device *phydev);
 void phy_detach(struct phy_device *phydev);
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index f795b0ca7ee6..f4345582a6b9 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -391,7 +391,7 @@ dsa_slave_create(struct dsa_switch *ds, struct device *parent,
 
 	if (p->phy != NULL) {
 		phy_attach(slave_dev, dev_name(&p->phy->dev),
-			   0, PHY_INTERFACE_MODE_GMII);
+			   PHY_INTERFACE_MODE_GMII);
 
 		p->phy->autoneg = AUTONEG_ENABLE;
 		p->phy->speed = 0;
-- 
cgit v1.2.3


From 3988a4df3499e604a3f2ae979372d27fc5664f77 Mon Sep 17 00:00:00 2001
From: Johan Bjornstedt <johan.bjornstedt@stericsson.com>
Date: Fri, 11 Jan 2013 13:12:50 +0000
Subject: ab8500_bm: Skip first CCEOC irq for instant current

When enabling the CCEOC irq we might get false interrupt
from ab8500-driver due to the latched value will be saved
and interpreted as an IRQ when enabled

Signed-off-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Johan Bjornstedt <johan.bjornstedt@stericsson.com>
Signed-off-by: Henrik Solver <henrik.solver@stericsson.com>
Reviewed-by: Karl KOMIEROWSKI <karl.komierowski@stericsson.com>
Signed-off-by: Anton Vorontsov <anton@enomsg.org>
---
 drivers/power/ab8500_btemp.c         | 11 +++----
 drivers/power/ab8500_fg.c            | 63 +++++++++++++++++++++++++++++++-----
 include/linux/mfd/abx500/ab8500-bm.h |  7 ++++
 3 files changed, 66 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/power/ab8500_btemp.c b/drivers/power/ab8500_btemp.c
index e1d28039ce7b..4a570b6c9e47 100644
--- a/drivers/power/ab8500_btemp.c
+++ b/drivers/power/ab8500_btemp.c
@@ -374,13 +374,10 @@ static int ab8500_btemp_get_batctrl_res(struct ab8500_btemp *di)
 		return ret;
 	}
 
-	/*
-	 * Since there is no interrupt when current measurement is done,
-	 * loop for over 250ms (250ms is one sample conversion time
-	 * with 32.768 Khz RTC clock). Note that a stop time must be set
-	 * since the ab8500_btemp_read_batctrl_voltage call can block and
-	 * take an unknown amount of time to complete.
-	 */
+	do {
+		msleep(20);
+	} while (!ab8500_fg_inst_curr_started(di->fg));
+
 	i = 0;
 
 	do {
diff --git a/drivers/power/ab8500_fg.c b/drivers/power/ab8500_fg.c
index 3d05c73813c8..0f40c968286c 100644
--- a/drivers/power/ab8500_fg.c
+++ b/drivers/power/ab8500_fg.c
@@ -160,6 +160,7 @@ struct inst_curr_result_list {
  * @recovery_cnt:	Counter for recovery mode
  * @high_curr_cnt:	Counter for high current mode
  * @init_cnt:		Counter for init mode
+ * @nbr_cceoc_irq_cnt	Counter for number of CCEOC irqs received since enabled
  * @recovery_needed:	Indicate if recovery is needed
  * @high_curr_mode:	Indicate if we're in high current mode
  * @init_capacity:	Indicate if initial capacity measuring should be done
@@ -167,6 +168,7 @@ struct inst_curr_result_list {
  * @calib_state		State during offset calibration
  * @discharge_state:	Current discharge state
  * @charge_state:	Current charge state
+ * @ab8500_fg_started	Completion struct used for the instant current start
  * @ab8500_fg_complete	Completion struct used for the instant current reading
  * @flags:		Structure for information about events triggered
  * @bat_cap:		Structure for battery capacity specific parameters
@@ -199,6 +201,7 @@ struct ab8500_fg {
 	int recovery_cnt;
 	int high_curr_cnt;
 	int init_cnt;
+	int nbr_cceoc_irq_cnt;
 	bool recovery_needed;
 	bool high_curr_mode;
 	bool init_capacity;
@@ -206,6 +209,7 @@ struct ab8500_fg {
 	enum ab8500_fg_calibration_state calib_state;
 	enum ab8500_fg_discharge_state discharge_state;
 	enum ab8500_fg_charge_state charge_state;
+	struct completion ab8500_fg_started;
 	struct completion ab8500_fg_complete;
 	struct ab8500_fg_flags flags;
 	struct ab8500_fg_battery_capacity bat_cap;
@@ -524,13 +528,14 @@ cc_err:
  * Note: This is part "one" and has to be called before
  * ab8500_fg_inst_curr_finalize()
  */
- int ab8500_fg_inst_curr_start(struct ab8500_fg *di)
+int ab8500_fg_inst_curr_start(struct ab8500_fg *di)
 {
 	u8 reg_val;
 	int ret;
 
 	mutex_lock(&di->cc_lock);
 
+	di->nbr_cceoc_irq_cnt = 0;
 	ret = abx500_get_register_interruptible(di->dev, AB8500_RTC,
 		AB8500_RTC_CC_CONF_REG, &reg_val);
 	if (ret < 0)
@@ -558,6 +563,7 @@ cc_err:
 	}
 
 	/* Return and WFI */
+	INIT_COMPLETION(di->ab8500_fg_started);
 	INIT_COMPLETION(di->ab8500_fg_complete);
 	enable_irq(di->irq);
 
@@ -568,6 +574,17 @@ fail:
 	return ret;
 }
 
+/**
+ * ab8500_fg_inst_curr_started() - check if fg conversion has started
+ * @di:         pointer to the ab8500_fg structure
+ *
+ * Returns 1 if conversion started, 0 if still waiting
+ */
+int ab8500_fg_inst_curr_started(struct ab8500_fg *di)
+{
+	return completion_done(&di->ab8500_fg_started);
+}
+
 /**
  * ab8500_fg_inst_curr_done() - check if fg conversion is done
  * @di:         pointer to the ab8500_fg structure
@@ -596,13 +613,15 @@ int ab8500_fg_inst_curr_finalize(struct ab8500_fg *di, int *res)
 	int timeout;
 
 	if (!completion_done(&di->ab8500_fg_complete)) {
-		timeout = wait_for_completion_timeout(&di->ab8500_fg_complete,
+		timeout = wait_for_completion_timeout(
+			&di->ab8500_fg_complete,
 			INS_CURR_TIMEOUT);
 		dev_dbg(di->dev, "Finalize time: %d ms\n",
 			((INS_CURR_TIMEOUT - timeout) * 1000) / HZ);
 		if (!timeout) {
 			ret = -ETIME;
 			disable_irq(di->irq);
+			di->nbr_cceoc_irq_cnt = 0;
 			dev_err(di->dev, "completion timed out [%d]\n",
 				__LINE__);
 			goto fail;
@@ -610,6 +629,7 @@ int ab8500_fg_inst_curr_finalize(struct ab8500_fg *di, int *res)
 	}
 
 	disable_irq(di->irq);
+	di->nbr_cceoc_irq_cnt = 0;
 
 	ret = abx500_mask_and_set_register_interruptible(di->dev,
 			AB8500_GAS_GAUGE, AB8500_GASG_CC_CTRL_REG,
@@ -684,6 +704,7 @@ fail:
 int ab8500_fg_inst_curr_blocking(struct ab8500_fg *di)
 {
 	int ret;
+	int timeout;
 	int res = 0;
 
 	ret = ab8500_fg_inst_curr_start(di);
@@ -692,13 +713,32 @@ int ab8500_fg_inst_curr_blocking(struct ab8500_fg *di)
 		return 0;
 	}
 
+	/* Wait for CC to actually start */
+	if (!completion_done(&di->ab8500_fg_started)) {
+		timeout = wait_for_completion_timeout(
+			&di->ab8500_fg_started,
+			INS_CURR_TIMEOUT);
+		dev_dbg(di->dev, "Start time: %d ms\n",
+			((INS_CURR_TIMEOUT - timeout) * 1000) / HZ);
+		if (!timeout) {
+			ret = -ETIME;
+			dev_err(di->dev, "completion timed out [%d]\n",
+				__LINE__);
+			goto fail;
+		}
+	}
+
 	ret = ab8500_fg_inst_curr_finalize(di, &res);
 	if (ret) {
 		dev_err(di->dev, "Failed to finalize fg_inst\n");
 		return 0;
 	}
 
+	dev_dbg(di->dev, "%s instant current: %d", __func__, res);
 	return res;
+fail:
+	mutex_unlock(&di->cc_lock);
+	return ret;
 }
 
 /**
@@ -1523,8 +1563,6 @@ static void ab8500_fg_algorithm_discharging(struct ab8500_fg *di)
 
 	case AB8500_FG_DISCHARGE_WAKEUP:
 		ab8500_fg_coulomb_counter(di, true);
-		di->inst_curr = ab8500_fg_inst_curr_blocking(di);
-
 		ab8500_fg_calc_cap_discharge_voltage(di, true);
 
 		di->fg_samples = SEC_TO_SAMPLE(
@@ -1641,8 +1679,6 @@ static void ab8500_fg_periodic_work(struct work_struct *work)
 		fg_periodic_work.work);
 
 	if (di->init_capacity) {
-		/* A dummy read that will return 0 */
-		di->inst_curr = ab8500_fg_inst_curr_blocking(di);
 		/* Get an initial capacity calculation */
 		ab8500_fg_calc_cap_discharge_voltage(di, true);
 		ab8500_fg_check_capacity_limits(di, true);
@@ -1828,7 +1864,13 @@ static void ab8500_fg_instant_work(struct work_struct *work)
 static irqreturn_t ab8500_fg_cc_data_end_handler(int irq, void *_di)
 {
 	struct ab8500_fg *di = _di;
-	complete(&di->ab8500_fg_complete);
+	if (!di->nbr_cceoc_irq_cnt) {
+		di->nbr_cceoc_irq_cnt++;
+		complete(&di->ab8500_fg_started);
+	} else {
+		di->nbr_cceoc_irq_cnt = 0;
+		complete(&di->ab8500_fg_complete);
+	}
 	return IRQ_HANDLED;
 }
 
@@ -2551,7 +2593,11 @@ static int ab8500_fg_probe(struct platform_device *pdev)
 	di->fg_samples = SEC_TO_SAMPLE(di->bm->fg_params->init_timer);
 	ab8500_fg_coulomb_counter(di, true);
 
-	/* Initialize completion used to notify completion of inst current */
+	/*
+	 * Initialize completion used to notify completion and start
+	 * of inst current
+	 */
+	init_completion(&di->ab8500_fg_started);
 	init_completion(&di->ab8500_fg_complete);
 
 	/* Register interrupts */
@@ -2571,6 +2617,7 @@ static int ab8500_fg_probe(struct platform_device *pdev)
 	}
 	di->irq = platform_get_irq_byname(pdev, "CCEOC");
 	disable_irq(di->irq);
+	di->nbr_cceoc_irq_cnt = 0;
 
 	platform_set_drvdata(pdev, di);
 
diff --git a/include/linux/mfd/abx500/ab8500-bm.h b/include/linux/mfd/abx500/ab8500-bm.h
index 44310c98ee6e..6c6a02e53cd9 100644
--- a/include/linux/mfd/abx500/ab8500-bm.h
+++ b/include/linux/mfd/abx500/ab8500-bm.h
@@ -431,11 +431,18 @@ struct ab8500_fg *ab8500_fg_get(void);
 int ab8500_fg_inst_curr_blocking(struct ab8500_fg *dev);
 int ab8500_fg_inst_curr_start(struct ab8500_fg *di);
 int ab8500_fg_inst_curr_finalize(struct ab8500_fg *di, int *res);
+int ab8500_fg_inst_curr_started(struct ab8500_fg *di);
 int ab8500_fg_inst_curr_done(struct ab8500_fg *di);
 
 #else
+int ab8500_fg_inst_curr_started(struct ab8500_fg *di)
+{
+	return 0;
+}
+
 int ab8500_fg_inst_curr_done(struct ab8500_fg *di)
 {
+	return 0;
 }
 static void ab8500_fg_reinit(void)
 {
-- 
cgit v1.2.3


From ea4024017831d61874351defe8f8c58ae73f8009 Mon Sep 17 00:00:00 2001
From: Marcus Cooper <marcus.xm.cooper@stericsson.com>
Date: Fri, 11 Jan 2013 13:12:54 +0000
Subject: ab8500_bm: Recharge condition not optimal for battery

Today the battery recharge is determined with a voltage threshold. This
voltage threshold is only valid when the battery is relaxed. In charging
algorithm the voltage read is the loaded battery voltage and no
compensation is done to get the relaxed voltage. When maintenance
charging is not selected, this makes the recharging condition to almost
immediately activate when there is a discharge present on the battery.

Depending on which vendor the battery comes from this behavior can wear
out the battery much faster than normal.

The fuelgauge driver is responsible to monitor the actual battery
capacity and is able to estimate the remaining capacity. It is better to
use the remaining capacity as a limit to determine when battery should
be recharged.

Signed-off-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Marcus Cooper <marcus.xm.cooper@stericsson.com>
Reviewed-by: Hakan BERG <hakan.berg@stericsson.com>
Reviewed-by: Jonas ABERG <jonas.aberg@stericsson.com>
Signed-off-by: Anton Vorontsov <anton@enomsg.org>
---
 drivers/power/ab8500_bmdata.c        |  19 ++---
 drivers/power/ab8500_fg.c            | 146 ++++++++++++++++++++++++++++++++---
 drivers/power/abx500_chargalg.c      |  30 +++----
 include/linux/mfd/abx500.h           |   6 +-
 include/linux/mfd/abx500/ab8500-bm.h |   2 +
 5 files changed, 168 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/power/ab8500_bmdata.c b/drivers/power/ab8500_bmdata.c
index 0bf52369d6a5..20c157b4fe99 100644
--- a/drivers/power/ab8500_bmdata.c
+++ b/drivers/power/ab8500_bmdata.c
@@ -192,7 +192,7 @@ static struct abx500_battery_type bat_type_thermistor[] = {
 		.nominal_voltage = 3700,
 		.termination_vol = 4050,
 		.termination_curr = 200,
-		.recharge_vol = 3990,
+		.recharge_cap = 95,
 		.normal_cur_lvl = 400,
 		.normal_vol_lvl = 4100,
 		.maint_a_cur_lvl = 400,
@@ -219,7 +219,7 @@ static struct abx500_battery_type bat_type_thermistor[] = {
 		.nominal_voltage = 3600,
 		.termination_vol = 4150,
 		.termination_curr = 80,
-		.recharge_vol = 4130,
+		.recharge_cap = 95,
 		.normal_cur_lvl = 700,
 		.normal_vol_lvl = 4200,
 		.maint_a_cur_lvl = 600,
@@ -247,7 +247,7 @@ static struct abx500_battery_type bat_type_thermistor[] = {
 		.nominal_voltage = 3600,
 		.termination_vol = 4150,
 		.termination_curr = 80,
-		.recharge_vol = 4130,
+		.recharge_cap = 95,
 		.normal_cur_lvl = 700,
 		.normal_vol_lvl = 4200,
 		.maint_a_cur_lvl = 600,
@@ -278,7 +278,7 @@ static struct abx500_battery_type bat_type_ext_thermistor[] = {
 		.nominal_voltage = 3700,
 		.termination_vol = 4050,
 		.termination_curr = 200,
-		.recharge_vol = 3990,
+		.recharge_cap = 95,
 		.normal_cur_lvl = 400,
 		.normal_vol_lvl = 4100,
 		.maint_a_cur_lvl = 400,
@@ -310,7 +310,7 @@ static struct abx500_battery_type bat_type_ext_thermistor[] = {
 		.nominal_voltage = 3700,
 		.termination_vol = 4150,
 		.termination_curr = 100,
-		.recharge_vol = 4130,
+		.recharge_cap = 95,
 		.normal_cur_lvl = 700,
 		.normal_vol_lvl = 4200,
 		.maint_a_cur_lvl = 600,
@@ -337,7 +337,7 @@ static struct abx500_battery_type bat_type_ext_thermistor[] = {
 		.nominal_voltage = 3700,
 		.termination_vol = 4150,
 		.termination_curr = 100,
-		.recharge_vol = 4130,
+		.recharge_cap = 95,
 		.normal_cur_lvl = 700,
 		.normal_vol_lvl = 4200,
 		.maint_a_cur_lvl = 600,
@@ -364,7 +364,7 @@ static struct abx500_battery_type bat_type_ext_thermistor[] = {
 		.nominal_voltage = 3700,
 		.termination_vol = 4150,
 		.termination_curr = 100,
-		.recharge_vol = 4130,
+		.recharge_cap = 95,
 		.normal_cur_lvl = 700,
 		.normal_vol_lvl = 4200,
 		.maint_a_cur_lvl = 600,
@@ -405,8 +405,8 @@ static const struct abx500_fg_parameters fg = {
 	.lowbat_threshold = 3100,
 	.battok_falling_th_sel0 = 2860,
 	.battok_raising_th_sel1 = 2860,
+	.maint_thres = 95,
 	.user_cap_limit = 15,
-	.maint_thres = 97,
 };
 
 static const struct abx500_maxim_parameters maxi_params = {
@@ -435,6 +435,7 @@ struct abx500_bm_data ab8500_bm_data = {
 	.bkup_bat_v             = BUP_VCH_SEL_2P6V,
 	.bkup_bat_i             = BUP_ICH_SEL_150UA,
 	.no_maintenance         = false,
+	.capacity_scaling       = false,
 	.adc_therm              = ABx500_ADC_THERM_BATCTRL,
 	.chg_unknown_bat        = false,
 	.enable_overshoot       = false,
@@ -479,7 +480,7 @@ int __devinit ab8500_bm_of_probe(struct device *dev,
 		bm->chg_unknown_bat = true;
 		bm->bat_type[BATTERY_UNKNOWN].charge_full_design = 2600;
 		bm->bat_type[BATTERY_UNKNOWN].termination_vol    = 4150;
-		bm->bat_type[BATTERY_UNKNOWN].recharge_vol       = 4130;
+		bm->bat_type[BATTERY_UNKNOWN].recharge_cap       = 95;
 		bm->bat_type[BATTERY_UNKNOWN].normal_cur_lvl     = 520;
 		bm->bat_type[BATTERY_UNKNOWN].normal_vol_lvl     = 4200;
 	}
diff --git a/drivers/power/ab8500_fg.c b/drivers/power/ab8500_fg.c
index f3dbba8b3925..76fab6b5c541 100644
--- a/drivers/power/ab8500_fg.c
+++ b/drivers/power/ab8500_fg.c
@@ -113,6 +113,13 @@ struct ab8500_fg_avg_cap {
 	int sum;
 };
 
+struct ab8500_fg_cap_scaling {
+	bool enable;
+	int cap_to_scale[2];
+	int disable_cap_level;
+	int scaled_cap;
+};
+
 struct ab8500_fg_battery_capacity {
 	int max_mah_design;
 	int max_mah;
@@ -123,6 +130,7 @@ struct ab8500_fg_battery_capacity {
 	int prev_percent;
 	int prev_level;
 	int user_mah;
+	struct ab8500_fg_cap_scaling cap_scale;
 };
 
 struct ab8500_fg_flags {
@@ -1166,6 +1174,99 @@ static int ab8500_fg_capacity_level(struct ab8500_fg *di)
 	return ret;
 }
 
+/**
+ * ab8500_fg_calculate_scaled_capacity() - Capacity scaling
+ * @di:		pointer to the ab8500_fg structure
+ *
+ * Calculates the capacity to be shown to upper layers. Scales the capacity
+ * to have 100% as a reference from the actual capacity upon removal of charger
+ * when charging is in maintenance mode.
+ */
+static int ab8500_fg_calculate_scaled_capacity(struct ab8500_fg *di)
+{
+	struct ab8500_fg_cap_scaling *cs = &di->bat_cap.cap_scale;
+	int capacity = di->bat_cap.prev_percent;
+
+	if (!cs->enable)
+		return capacity;
+
+	/*
+	 * As long as we are in fully charge mode scale the capacity
+	 * to show 100%.
+	 */
+	if (di->flags.fully_charged) {
+		cs->cap_to_scale[0] = 100;
+		cs->cap_to_scale[1] =
+			max(capacity, di->bm->fg_params->maint_thres);
+		dev_dbg(di->dev, "Scale cap with %d/%d\n",
+			 cs->cap_to_scale[0], cs->cap_to_scale[1]);
+	}
+
+	/* Calculates the scaled capacity. */
+	if ((cs->cap_to_scale[0] != cs->cap_to_scale[1])
+					&& (cs->cap_to_scale[1] > 0))
+		capacity = min(100,
+				 DIV_ROUND_CLOSEST(di->bat_cap.prev_percent *
+						 cs->cap_to_scale[0],
+						 cs->cap_to_scale[1]));
+
+	if (di->flags.charging) {
+		if (capacity < cs->disable_cap_level) {
+			cs->disable_cap_level = capacity;
+			dev_dbg(di->dev, "Cap to stop scale lowered %d%%\n",
+				cs->disable_cap_level);
+		} else if (!di->flags.fully_charged) {
+			if (di->bat_cap.prev_percent >=
+			    cs->disable_cap_level) {
+				dev_dbg(di->dev, "Disabling scaled capacity\n");
+				cs->enable = false;
+				capacity = di->bat_cap.prev_percent;
+			} else {
+				dev_dbg(di->dev,
+					"Waiting in cap to level %d%%\n",
+					cs->disable_cap_level);
+				capacity = cs->disable_cap_level;
+			}
+		}
+	}
+
+	return capacity;
+}
+
+/**
+ * ab8500_fg_update_cap_scalers() - Capacity scaling
+ * @di:		pointer to the ab8500_fg structure
+ *
+ * To be called when state change from charge<->discharge to update
+ * the capacity scalers.
+ */
+static void ab8500_fg_update_cap_scalers(struct ab8500_fg *di)
+{
+	struct ab8500_fg_cap_scaling *cs = &di->bat_cap.cap_scale;
+
+	if (!cs->enable)
+		return;
+	if (di->flags.charging) {
+		di->bat_cap.cap_scale.disable_cap_level =
+			di->bat_cap.cap_scale.scaled_cap;
+		dev_dbg(di->dev, "Cap to stop scale at charge %d%%\n",
+				di->bat_cap.cap_scale.disable_cap_level);
+	} else {
+		if (cs->scaled_cap != 100) {
+			cs->cap_to_scale[0] = cs->scaled_cap;
+			cs->cap_to_scale[1] = di->bat_cap.prev_percent;
+		} else {
+			cs->cap_to_scale[0] = 100;
+			cs->cap_to_scale[1] =
+				max(di->bat_cap.prev_percent,
+				    di->bm->fg_params->maint_thres);
+		}
+
+		dev_dbg(di->dev, "Cap to scale at discharge %d/%d\n",
+				cs->cap_to_scale[0], cs->cap_to_scale[1]);
+	}
+}
+
 /**
  * ab8500_fg_check_capacity_limits() - Check if capacity has changed
  * @di:		pointer to the ab8500_fg structure
@@ -1214,16 +1315,24 @@ static void ab8500_fg_check_capacity_limits(struct ab8500_fg *di, bool init)
 	} else if (di->flags.fully_charged) {
 		/*
 		 * We report 100% if algorithm reported fully charged
-		 * unless capacity drops too much
+		 * and show 100% during maintenance charging (scaling).
 		 */
 		if (di->flags.force_full) {
 			di->bat_cap.prev_percent = di->bat_cap.permille / 10;
 			di->bat_cap.prev_mah = di->bat_cap.mah;
-		} else if (!di->flags.force_full &&
-			di->bat_cap.prev_percent !=
-			(di->bat_cap.permille) / 10 &&
-			(di->bat_cap.permille / 10) <
-			di->bm->fg_params->maint_thres) {
+
+			changed = true;
+
+			if (!di->bat_cap.cap_scale.enable &&
+						di->bm->capacity_scaling) {
+				di->bat_cap.cap_scale.enable = true;
+				di->bat_cap.cap_scale.cap_to_scale[0] = 100;
+				di->bat_cap.cap_scale.cap_to_scale[1] =
+						di->bat_cap.prev_percent;
+				di->bat_cap.cap_scale.disable_cap_level = 100;
+			}
+		} else if ( di->bat_cap.prev_percent !=
+			(di->bat_cap.permille) / 10) {
 			dev_dbg(di->dev,
 				"battery reported full "
 				"but capacity dropping: %d\n",
@@ -1272,6 +1381,14 @@ static void ab8500_fg_check_capacity_limits(struct ab8500_fg *di, bool init)
 	}
 
 	if (changed) {
+		if (di->bm->capacity_scaling) {
+			di->bat_cap.cap_scale.scaled_cap =
+				ab8500_fg_calculate_scaled_capacity(di);
+
+			dev_info(di->dev, "capacity=%d (%d)\n",
+				di->bat_cap.prev_percent,
+				di->bat_cap.cap_scale.scaled_cap);
+		}
 		power_supply_changed(&di->fg_psy);
 		if (di->flags.fully_charged && di->flags.force_full) {
 			dev_dbg(di->dev, "Battery full, notifying.\n");
@@ -1337,7 +1454,7 @@ static void ab8500_fg_algorithm_charging(struct ab8500_fg *di)
 		 * Read the FG and calculate the new capacity
 		 */
 		mutex_lock(&di->cc_lock);
-		if (!di->flags.conv_done) {
+		if (!di->flags.conv_done && !di->flags.force_full) {
 			/* Wasn't the CC IRQ that got us here */
 			mutex_unlock(&di->cc_lock);
 			dev_dbg(di->dev, "%s CC conv not done\n",
@@ -2027,7 +2144,9 @@ static int ab8500_fg_get_property(struct power_supply *psy,
 			val->intval = di->bat_cap.prev_mah;
 		break;
 	case POWER_SUPPLY_PROP_CAPACITY:
-		if (di->flags.batt_unknown && !di->bm->chg_unknown_bat &&
+		if (di->bm->capacity_scaling)
+			val->intval = di->bat_cap.cap_scale.scaled_cap;
+		else if (di->flags.batt_unknown && !di->bm->chg_unknown_bat &&
 				di->flags.batt_id_received)
 			val->intval = 100;
 		else
@@ -2091,6 +2210,8 @@ static int ab8500_fg_get_ext_psy_data(struct device *dev, void *data)
 						break;
 					di->flags.charging = false;
 					di->flags.fully_charged = false;
+					if (di->bm->capacity_scaling)
+						ab8500_fg_update_cap_scalers(di);
 					queue_work(di->fg_wq, &di->fg_work);
 					break;
 				case POWER_SUPPLY_STATUS_FULL:
@@ -2103,10 +2224,13 @@ static int ab8500_fg_get_ext_psy_data(struct device *dev, void *data)
 					queue_work(di->fg_wq, &di->fg_work);
 					break;
 				case POWER_SUPPLY_STATUS_CHARGING:
-					if (di->flags.charging)
+					if (di->flags.charging &&
+						!di->flags.fully_charged)
 						break;
 					di->flags.charging = true;
 					di->flags.fully_charged = false;
+					if (di->bm->capacity_scaling)
+						ab8500_fg_update_cap_scalers(di);
 					queue_work(di->fg_wq, &di->fg_work);
 					break;
 				};
@@ -2146,8 +2270,8 @@ static int ab8500_fg_get_ext_psy_data(struct device *dev, void *data)
 		case POWER_SUPPLY_PROP_TEMP:
 			switch (ext->type) {
 			case POWER_SUPPLY_TYPE_BATTERY:
-			    if (di->flags.batt_id_received)
-				di->bat_temp = ret.intval;
+				if (di->flags.batt_id_received)
+					di->bat_temp = ret.intval;
 				break;
 			default:
 				break;
diff --git a/drivers/power/abx500_chargalg.c b/drivers/power/abx500_chargalg.c
index 8b69da0ae5af..78b623572b52 100644
--- a/drivers/power/abx500_chargalg.c
+++ b/drivers/power/abx500_chargalg.c
@@ -33,9 +33,6 @@
 /* End-of-charge criteria counter */
 #define EOC_COND_CNT			10
 
-/* Recharge criteria counter */
-#define RCH_COND_CNT			3
-
 #define to_abx500_chargalg_device_info(x) container_of((x), \
 	struct abx500_chargalg, chargalg_psy);
 
@@ -196,7 +193,6 @@ enum maxim_ret {
  * @dev:		pointer to the structure device
  * @charge_status:	battery operating status
  * @eoc_cnt:		counter used to determine end-of_charge
- * @rch_cnt:		counter used to determine start of recharge
  * @maintenance_chg:	indicate if maintenance charge is active
  * @t_hyst_norm		temperature hysteresis when the temperature has been
  *			over or under normal limits
@@ -223,7 +219,6 @@ struct abx500_chargalg {
 	struct device *dev;
 	int charge_status;
 	int eoc_cnt;
-	int rch_cnt;
 	bool maintenance_chg;
 	int t_hyst_norm;
 	int t_hyst_lowhigh;
@@ -858,6 +853,7 @@ static int abx500_chargalg_get_ext_psy_data(struct device *dev, void *data)
 	union power_supply_propval ret;
 	int i, j;
 	bool psy_found = false;
+	bool capacity_updated = false;
 
 	psy = (struct power_supply *)data;
 	ext = dev_get_drvdata(dev);
@@ -870,6 +866,16 @@ static int abx500_chargalg_get_ext_psy_data(struct device *dev, void *data)
 	if (!psy_found)
 		return 0;
 
+	/*
+	 *  If external is not registering 'POWER_SUPPLY_PROP_CAPACITY' to its
+	 * property because of handling that sysfs entry on its own, this is
+	 * the place to get the battery capacity.
+	 */
+	if (!ext->get_property(ext, POWER_SUPPLY_PROP_CAPACITY, &ret)) {
+		di->batt_data.percent = ret.intval;
+		capacity_updated = true;
+	}
+
 	/* Go through all properties for the psy */
 	for (j = 0; j < ext->num_properties; j++) {
 		enum power_supply_property prop;
@@ -1154,7 +1160,8 @@ static int abx500_chargalg_get_ext_psy_data(struct device *dev, void *data)
 			}
 			break;
 		case POWER_SUPPLY_PROP_CAPACITY:
-			di->batt_data.percent = ret.intval;
+			if (!capacity_updated)
+				di->batt_data.percent = ret.intval;
 			break;
 		default:
 			break;
@@ -1424,16 +1431,13 @@ static void abx500_chargalg_algorithm(struct abx500_chargalg *di)
 	case STATE_WAIT_FOR_RECHARGE_INIT:
 		abx500_chargalg_hold_charging(di);
 		abx500_chargalg_state_to(di, STATE_WAIT_FOR_RECHARGE);
-		di->rch_cnt = RCH_COND_CNT;
 		/* Intentional fallthrough */
 
 	case STATE_WAIT_FOR_RECHARGE:
-		if (di->batt_data.volt <=
-			di->bm->bat_type[di->bm->batt_id].recharge_vol) {
-			if (di->rch_cnt-- == 0)
-				abx500_chargalg_state_to(di, STATE_NORMAL_INIT);
-		} else
-			di->rch_cnt = RCH_COND_CNT;
+		if (di->batt_data.percent <=
+		    di->bm->bat_type[di->bm->batt_id].
+		    recharge_cap)
+			abx500_chargalg_state_to(di, STATE_NORMAL_INIT);
 		break;
 
 	case STATE_MAINTENANCE_A_INIT:
diff --git a/include/linux/mfd/abx500.h b/include/linux/mfd/abx500.h
index 4906b1842d2f..0e6e90badfca 100644
--- a/include/linux/mfd/abx500.h
+++ b/include/linux/mfd/abx500.h
@@ -131,7 +131,7 @@ struct abx500_maxim_parameters {
  * @nominal_voltage:		Nominal voltage of the battery in mV
  * @termination_vol:		max voltage upto which battery can be charged
  * @termination_curr		battery charging termination current in mA
- * @recharge_vol		battery voltage limit that will trigger a new
+ * @recharge_cap		battery capacity limit that will trigger a new
  *				full charging cycle in the case where maintenan-
  *				-ce charging has been disabled
  * @normal_cur_lvl:		charger current in normal state in mA
@@ -160,7 +160,7 @@ struct abx500_battery_type {
 	int nominal_voltage;
 	int termination_vol;
 	int termination_curr;
-	int recharge_vol;
+	int recharge_cap;
 	int normal_cur_lvl;
 	int normal_vol_lvl;
 	int maint_a_cur_lvl;
@@ -224,6 +224,7 @@ struct abx500_bm_charger_parameters {
  * @bkup_bat_v		voltage which we charge the backup battery with
  * @bkup_bat_i		current which we charge the backup battery with
  * @no_maintenance	indicates that maintenance charging is disabled
+ * @capacity_scaling    indicates whether capacity scaling is to be used
  * @abx500_adc_therm	placement of thermistor, batctrl or battemp adc
  * @chg_unknown_bat	flag to enable charging of unknown batteries
  * @enable_overshoot	flag to enable VBAT overshoot control
@@ -254,6 +255,7 @@ struct abx500_bm_data {
 	int bkup_bat_v;
 	int bkup_bat_i;
 	bool no_maintenance;
+	bool capacity_scaling;
 	bool chg_unknown_bat;
 	bool enable_overshoot;
 	bool auto_trig;
diff --git a/include/linux/mfd/abx500/ab8500-bm.h b/include/linux/mfd/abx500/ab8500-bm.h
index 6c6a02e53cd9..fac684a6f346 100644
--- a/include/linux/mfd/abx500/ab8500-bm.h
+++ b/include/linux/mfd/abx500/ab8500-bm.h
@@ -355,6 +355,7 @@ struct ab8500_bm_charger_parameters {
  * @bkup_bat_v		voltage which we charge the backup battery with
  * @bkup_bat_i		current which we charge the backup battery with
  * @no_maintenance	indicates that maintenance charging is disabled
+ * @capacity_scaling    indicates whether capacity scaling is to be used
  * @adc_therm		placement of thermistor, batctrl or battemp adc
  * @chg_unknown_bat	flag to enable charging of unknown batteries
  * @enable_overshoot	flag to enable VBAT overshoot control
@@ -383,6 +384,7 @@ struct ab8500_bm_data {
 	int bkup_bat_v;
 	int bkup_bat_i;
 	bool no_maintenance;
+	bool capacity_scaling;
 	bool chg_unknown_bat;
 	bool enable_overshoot;
 	enum abx500_adc_therm adc_therm;
-- 
cgit v1.2.3


From d36e3e6d50ccdb5cdef6da0a01dedddd317f23fc Mon Sep 17 00:00:00 2001
From: Hakan Berg <hakan.berg@stericsson.com>
Date: Fri, 11 Jan 2013 13:12:57 +0000
Subject: ab8500_btemp: Ignore false btemp low interrupt

Ignore the low btemp interrupts for ab8500 3.0 and 3.3

Signed-off-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Hakan Berg <hakan.berg@stericsson.com>
Reviewed-by: Jonas ABERG <jonas.aberg@stericsson.com>
Tested-by: Marcus COOPER <marcus.xm.cooper@stericsson.com>
Signed-off-by: Anton Vorontsov <anton@enomsg.org>
---
 drivers/power/ab8500_btemp.c      | 8 ++++----
 include/linux/mfd/abx500/ab8500.h | 5 +++++
 2 files changed, 9 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/power/ab8500_btemp.c b/drivers/power/ab8500_btemp.c
index adebf6c6d146..b3f6467da7f3 100644
--- a/drivers/power/ab8500_btemp.c
+++ b/drivers/power/ab8500_btemp.c
@@ -622,9 +622,9 @@ static irqreturn_t ab8500_btemp_templow_handler(int irq, void *_di)
 {
 	struct ab8500_btemp *di = _di;
 
-	if (is_ab8500_2p0_or_earlier(di->parent)) {
+	if (is_ab8500_3p3_or_earlier(di->parent)) {
 		dev_dbg(di->dev, "Ignore false btemp low irq"
-			" for ABB cut 1.0, 1.1 and 2.0\n");
+			" for ABB cut 1.0, 1.1, 2.0 and 3.3\n");
 	} else {
 		dev_crit(di->dev, "Battery temperature lower than -10deg c\n");
 
@@ -738,10 +738,10 @@ static int ab8500_btemp_get_temp(struct ab8500_btemp *di)
 	int temp = 0;
 
 	/*
-	 * The BTEMP events are not reliabe on AB8500 cut2.0
+	 * The BTEMP events are not reliabe on AB8500 cut3.3
 	 * and prior versions
 	 */
-	if (is_ab8500_2p0_or_earlier(di->parent)) {
+	if (is_ab8500_3p3_or_earlier(di->parent)) {
 		temp = di->bat_temp * 10;
 	} else {
 		if (di->events.btemp_low) {
diff --git a/include/linux/mfd/abx500/ab8500.h b/include/linux/mfd/abx500/ab8500.h
index 1cb5698b4d76..9dd9b99099df 100644
--- a/include/linux/mfd/abx500/ab8500.h
+++ b/include/linux/mfd/abx500/ab8500.h
@@ -335,6 +335,11 @@ static inline int is_ab8500_2p0_or_earlier(struct ab8500 *ab)
 	return (is_ab8500(ab) && (ab->chip_id <= AB8500_CUT2P0));
 }
 
+static inline int is_ab8500_3p3_or_earlier(struct ab8500 *ab)
+{
+	return (is_ab8500(ab) && (ab->chip_id <= AB8500_CUT3P3));
+}
+
 /* exclude also ab8505, ab9540... */
 static inline int is_ab8500_2p0(struct ab8500 *ab)
 {
-- 
cgit v1.2.3


From 74a8e349b1c882e34419877207ae850ed87dddf7 Mon Sep 17 00:00:00 2001
From: Hakan Berg <hakan.berg@stericsson.com>
Date: Fri, 11 Jan 2013 13:12:58 +0000
Subject: ab8500_bm: Adds support for Car/Travel Adapters

The Travel and Carkit adapter should be handled directly by
the charger driver.

Signed-off-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Marcus Cooper <marcus.xm.cooper@stericsson.com>
Reviewed-by: Jonas ABERG <jonas.aberg@stericsson.com>
Signed-off-by: Anton Vorontsov <anton@enomsg.org>
---
 drivers/power/ab8500_charger.c       | 92 ++++++++++++++++++++++++++----------
 include/linux/mfd/abx500/ab8500-bm.h |  1 +
 2 files changed, 69 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/power/ab8500_charger.c b/drivers/power/ab8500_charger.c
index 126b325cddc9..a0aeef2d2e05 100644
--- a/drivers/power/ab8500_charger.c
+++ b/drivers/power/ab8500_charger.c
@@ -108,6 +108,18 @@ enum ab8500_charger_link_status {
 	USB_STAT_HM_IDGND,
 	USB_STAT_RESERVED,
 	USB_STAT_NOT_VALID_LINK,
+	USB_STAT_PHY_EN,
+	USB_STAT_SUP_NO_IDGND_VBUS,
+	USB_STAT_SUP_IDGND_VBUS,
+	USB_STAT_CHARGER_LINE_1,
+	USB_STAT_CARKIT_1,
+	USB_STAT_CARKIT_2,
+	USB_STAT_ACA_DOCK_CHARGER,
+	USB_STAT_SAMSUNG_USB_PHY_DIS,
+	USB_STAT_SAMSUNG_USB_PHY_ENA,
+	USB_STAT_SAMSUNG_UART_PHY_DIS,
+	USB_STAT_SAMSUNG_UART_PHY_ENA,
+	USB_STAT_MOTOROLA_USB_PHY_ENA,
 };
 
 enum ab8500_usb_state {
@@ -586,7 +598,7 @@ static int ab8500_charger_detect_chargers(struct ab8500_charger *di)
  * Returns error code in case of failure else 0 on success
  */
 static int ab8500_charger_max_usb_curr(struct ab8500_charger *di,
-	enum ab8500_charger_link_status link_status)
+		enum ab8500_charger_link_status link_status)
 {
 	int ret = 0;
 
@@ -595,15 +607,19 @@ static int ab8500_charger_max_usb_curr(struct ab8500_charger *di,
 	case USB_STAT_STD_HOST_C_NS:
 	case USB_STAT_STD_HOST_C_S:
 		dev_dbg(di->dev, "USB Type - Standard host is "
-			"detected through USB driver\n");
+				"detected through USB driver\n");
 		di->max_usb_in_curr = USB_CH_IP_CUR_LVL_0P09;
 		break;
 	case USB_STAT_HOST_CHG_HS_CHIRP:
 		di->max_usb_in_curr = USB_CH_IP_CUR_LVL_0P5;
+		dev_dbg(di->dev, "USB Type - 0x%02x MaxCurr: %d", link_status,
+				di->max_usb_in_curr);
 		break;
 	case USB_STAT_HOST_CHG_HS:
 	case USB_STAT_ACA_RID_C_HS:
 		di->max_usb_in_curr = USB_CH_IP_CUR_LVL_0P9;
+		dev_dbg(di->dev, "USB Type - 0x%02x MaxCurr: %d", link_status,
+				di->max_usb_in_curr);
 		break;
 	case USB_STAT_ACA_RID_A:
 		/*
@@ -611,6 +627,8 @@ static int ab8500_charger_max_usb_curr(struct ab8500_charger *di,
 		 * can consume (300mA). Closest level is 1100mA
 		 */
 		di->max_usb_in_curr = USB_CH_IP_CUR_LVL_1P1;
+		dev_dbg(di->dev, "USB Type - 0x%02x MaxCurr: %d", link_status,
+				di->max_usb_in_curr);
 		break;
 	case USB_STAT_ACA_RID_B:
 		/*
@@ -618,34 +636,50 @@ static int ab8500_charger_max_usb_curr(struct ab8500_charger *di,
 		 * 100mA for potential accessory). Closest level is 1300mA
 		 */
 		di->max_usb_in_curr = USB_CH_IP_CUR_LVL_1P3;
+		dev_dbg(di->dev, "USB Type - 0x%02x MaxCurr: %d", link_status,
+				di->max_usb_in_curr);
 		break;
-	case USB_STAT_DEDICATED_CHG:
 	case USB_STAT_HOST_CHG_NM:
-	case USB_STAT_ACA_RID_C_HS_CHIRP:
+	case USB_STAT_DEDICATED_CHG:
 	case USB_STAT_ACA_RID_C_NM:
+	case USB_STAT_ACA_RID_C_HS_CHIRP:
 		di->max_usb_in_curr = USB_CH_IP_CUR_LVL_1P5;
-		break;
-	case USB_STAT_RESERVED:
-		/*
-		 * This state is used to indicate that VBUS has dropped below
-		 * the detection level 4 times in a row. This is due to the
-		 * charger output current is set to high making the charger
-		 * voltage collapse. This have to be propagated through to
-		 * chargalg. This is done using the property
-		 * POWER_SUPPLY_PROP_CURRENT_AVG = 1
-		 */
-		di->flags.vbus_collapse = true;
-		dev_dbg(di->dev, "USB Type - USB_STAT_RESERVED "
-			"VBUS has collapsed\n");
-		ret = -1;
+		dev_dbg(di->dev, "USB Type - 0x%02x MaxCurr: %d", link_status,
+				di->max_usb_in_curr);
 		break;
 	case USB_STAT_HM_IDGND:
-	case USB_STAT_NOT_CONFIGURED:
 	case USB_STAT_NOT_VALID_LINK:
+	case USB_STAT_NOT_CONFIGURED:
 		dev_err(di->dev, "USB Type - Charging not allowed\n");
 		di->max_usb_in_curr = USB_CH_IP_CUR_LVL_0P05;
 		ret = -ENXIO;
 		break;
+	case USB_STAT_RESERVED:
+		if (is_ab8500(di->parent)) {
+			di->flags.vbus_collapse = true;
+			dev_err(di->dev, "USB Type - USB_STAT_RESERVED "
+						"VBUS has collapsed\n");
+			ret = -ENXIO;
+			break;
+		}
+		if (is_ab9540(di->parent) || is_ab8505(di->parent)) {
+			dev_dbg(di->dev, "USB Type - Charging not allowed\n");
+			di->max_usb_in_curr = USB_CH_IP_CUR_LVL_0P05;
+			dev_dbg(di->dev, "USB Type - 0x%02x MaxCurr: %d",
+					link_status, di->max_usb_in_curr);
+			ret = -ENXIO;
+			break;
+		}
+		break;
+	case USB_STAT_CARKIT_1:
+	case USB_STAT_CARKIT_2:
+	case USB_STAT_ACA_DOCK_CHARGER:
+	case USB_STAT_CHARGER_LINE_1:
+		di->max_usb_in_curr = USB_CH_IP_CUR_LVL_0P5;
+		dev_dbg(di->dev, "USB Type - 0x%02x MaxCurr: %d", link_status,
+				di->max_usb_in_curr);
+		break;
+
 	default:
 		dev_err(di->dev, "USB Type - Unknown\n");
 		di->max_usb_in_curr = USB_CH_IP_CUR_LVL_0P05;
@@ -677,8 +711,14 @@ static int ab8500_charger_read_usb_type(struct ab8500_charger *di)
 		dev_err(di->dev, "%s ab8500 read failed\n", __func__);
 		return ret;
 	}
-	ret = abx500_get_register_interruptible(di->dev, AB8500_USB,
-		AB8500_USB_LINE_STAT_REG, &val);
+	if (is_ab8500(di->parent)) {
+		ret = abx500_get_register_interruptible(di->dev, AB8500_USB,
+				AB8500_USB_LINE_STAT_REG, &val);
+	} else {
+		if (is_ab9540(di->parent) || is_ab8505(di->parent))
+			ret = abx500_get_register_interruptible(di->dev,
+				AB8500_USB, AB8500_USB_LINK1_STAT_REG, &val);
+	}
 	if (ret < 0) {
 		dev_err(di->dev, "%s ab8500 read failed\n", __func__);
 		return ret;
@@ -718,8 +758,13 @@ static int ab8500_charger_detect_usb_type(struct ab8500_charger *di)
 			dev_err(di->dev, "%s ab8500 read failed\n", __func__);
 			return ret;
 		}
-		ret = abx500_get_register_interruptible(di->dev, AB8500_USB,
-			AB8500_USB_LINE_STAT_REG, &val);
+
+		if (is_ab8500(di->parent))
+			ret = abx500_get_register_interruptible(di->dev,
+				AB8500_USB, AB8500_USB_LINE_STAT_REG, &val);
+		else
+			ret = abx500_get_register_interruptible(di->dev,
+				AB8500_USB, AB8500_USB_LINK1_STAT_REG, &val);
 		if (ret < 0) {
 			dev_err(di->dev, "%s ab8500 read failed\n", __func__);
 			return ret;
@@ -2944,7 +2989,6 @@ static int ab8500_charger_probe(struct platform_device *pdev)
 	}
 
 	if (charger_status & USB_PW_CONN) {
-		dev_dbg(di->dev, "VBUS Detect during startup\n");
 		di->vbus_detected = true;
 		di->vbus_detected_start = true;
 		queue_work(di->charger_wq,
diff --git a/include/linux/mfd/abx500/ab8500-bm.h b/include/linux/mfd/abx500/ab8500-bm.h
index fac684a6f346..e2a1e6d84f72 100644
--- a/include/linux/mfd/abx500/ab8500-bm.h
+++ b/include/linux/mfd/abx500/ab8500-bm.h
@@ -23,6 +23,7 @@
  * Bank : 0x5
  */
 #define AB8500_USB_LINE_STAT_REG	0x80
+#define AB8500_USB_LINK1_STAT_REG	0x94
 
 /*
  * Charger / status register offfsets
-- 
cgit v1.2.3


From 1e817fb62cd185a2232ad4302579491805609489 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Mon, 19 Nov 2012 10:26:16 -0800
Subject: time: create __getnstimeofday for WARNless calls

The pstore RAM backend can get called during resume, and must be defensive
against a suspended time source. Expose getnstimeofday logic that returns
an error instead of a WARN. This can be detected and the timestamp can
be zeroed out.

Reported-by: Doug Anderson <dianders@chromium.org>
Cc: John Stultz <johnstul@us.ibm.com>
Cc: Anton Vorontsov <anton.vorontsov@linaro.org>
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 fs/pstore/ram.c           | 10 +++++++---
 include/linux/time.h      |  1 +
 kernel/time/timekeeping.c | 29 ++++++++++++++++++++++++-----
 3 files changed, 32 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index 1a4f6da58eab..dacfe78aee7e 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -168,12 +168,16 @@ static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type,
 static size_t ramoops_write_kmsg_hdr(struct persistent_ram_zone *prz)
 {
 	char *hdr;
-	struct timeval timestamp;
+	struct timespec timestamp;
 	size_t len;
 
-	do_gettimeofday(&timestamp);
+	/* Report zeroed timestamp if called before timekeeping has resumed. */
+	if (__getnstimeofday(&timestamp)) {
+		timestamp.tv_sec = 0;
+		timestamp.tv_nsec = 0;
+	}
 	hdr = kasprintf(GFP_ATOMIC, RAMOOPS_KERNMSG_HDR "%lu.%lu\n",
-		(long)timestamp.tv_sec, (long)timestamp.tv_usec);
+		(long)timestamp.tv_sec, (long)(timestamp.tv_nsec / 1000));
 	WARN_ON_ONCE(!hdr);
 	len = hdr ? strlen(hdr) : 0;
 	persistent_ram_write(prz, hdr, len);
diff --git a/include/linux/time.h b/include/linux/time.h
index 4d358e9d10f1..0015aea4c4a7 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -158,6 +158,7 @@ extern int do_setitimer(int which, struct itimerval *value,
 			struct itimerval *ovalue);
 extern unsigned int alarm_setitimer(unsigned int seconds);
 extern int do_getitimer(int which, struct itimerval *value);
+extern int __getnstimeofday(struct timespec *tv);
 extern void getnstimeofday(struct timespec *tv);
 extern void getrawmonotonic(struct timespec *ts);
 extern void getnstime_raw_and_real(struct timespec *ts_raw,
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 4c7de02eacdc..dfc7f87eb332 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -214,19 +214,18 @@ static void timekeeping_forward_now(struct timekeeper *tk)
 }
 
 /**
- * getnstimeofday - Returns the time of day in a timespec
+ * __getnstimeofday - Returns the time of day in a timespec.
  * @ts:		pointer to the timespec to be set
  *
- * Returns the time of day in a timespec.
+ * Updates the time of day in the timespec.
+ * Returns 0 on success, or -ve when suspended (timespec will be undefined).
  */
-void getnstimeofday(struct timespec *ts)
+int __getnstimeofday(struct timespec *ts)
 {
 	struct timekeeper *tk = &timekeeper;
 	unsigned long seq;
 	s64 nsecs = 0;
 
-	WARN_ON(timekeeping_suspended);
-
 	do {
 		seq = read_seqbegin(&tk->lock);
 
@@ -237,6 +236,26 @@ void getnstimeofday(struct timespec *ts)
 
 	ts->tv_nsec = 0;
 	timespec_add_ns(ts, nsecs);
+
+	/*
+	 * Do not bail out early, in case there were callers still using
+	 * the value, even in the face of the WARN_ON.
+	 */
+	if (unlikely(timekeeping_suspended))
+		return -EAGAIN;
+	return 0;
+}
+EXPORT_SYMBOL(__getnstimeofday);
+
+/**
+ * getnstimeofday - Returns the time of day in a timespec.
+ * @ts:		pointer to the timespec to be set
+ *
+ * Returns the time of day in a timespec (WARN if suspended).
+ */
+void getnstimeofday(struct timespec *ts)
+{
+	WARN_ON(__getnstimeofday(ts));
 }
 EXPORT_SYMBOL(getnstimeofday);
 
-- 
cgit v1.2.3


From 023f333a99cee9b5cd3268ff87298eb01a31f78e Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Date: Mon, 17 Dec 2012 14:30:53 -0700
Subject: NTP: Add a CONFIG_RTC_SYSTOHC configuration

The purpose of this option is to allow ARM/etc systems that rely on the
class RTC subsystem to have the same kind of automatic NTP based
synchronization that we have on PC platforms. Today ARM does not
implement update_persistent_clock and makes extensive use of the class
RTC system.

When enabled CONFIG_RTC_SYSTOHC will provide a generic
rtc_update_persistent_clock that stores the current time in the RTC and
is intended complement the existing CONFIG_RTC_HCTOSYS option that loads
the RTC at boot.

Like with RTC_HCTOSYS the platform's update_persistent_clock is used
first, if it works. Platforms with mixed class RTC and non-RTC drivers
need to return ENODEV when class RTC should be used. Such an update for
PPC is included in this patch.

Long term, implementations of update_persistent_clock should migrate to
proper class RTC drivers and use CONFIG_RTC_SYSTOHC instead.

Tested on ARM kirkwood and PPC405

Signed-off-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 arch/powerpc/kernel/time.c |  2 +-
 drivers/rtc/Kconfig        | 10 +++++++++-
 drivers/rtc/Makefile       |  1 +
 drivers/rtc/systohc.c      | 44 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/rtc.h        |  1 +
 kernel/time/ntp.c          | 16 ++++++++++++----
 6 files changed, 68 insertions(+), 6 deletions(-)
 create mode 100644 drivers/rtc/systohc.c

(limited to 'include/linux')

diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index ce4cb772dc78..bc844a857e05 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -667,7 +667,7 @@ int update_persistent_clock(struct timespec now)
 	struct rtc_time tm;
 
 	if (!ppc_md.set_rtc_time)
-		return 0;
+		return -ENODEV;
 
 	to_tm(now.tv_sec + 1 + timezone_offset, &tm);
 	tm.tm_year -= 1900;
diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index 19c03ab2bdcb..b377e9672497 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -25,9 +25,17 @@ config RTC_HCTOSYS
 	  the value read from a specified RTC device. This is useful to avoid
 	  unnecessary fsck runs at boot time, and to network better.
 
+config RTC_SYSTOHC
+	bool "Set the RTC time based on NTP synchronization"
+	default y
+	help
+	  If you say yes here, the system time (wall clock) will be stored
+	  in the RTC specified by RTC_HCTOSYS_DEVICE approximately every 11
+	  minutes if userspace reports synchronized NTP status.
+
 config RTC_HCTOSYS_DEVICE
 	string "RTC used to set the system time"
-	depends on RTC_HCTOSYS = y
+	depends on RTC_HCTOSYS = y || RTC_SYSTOHC = y
 	default "rtc0"
 	help
 	  The RTC device that will be used to (re)initialize the system
diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile
index 56297f0fd388..69d11f1d76e4 100644
--- a/drivers/rtc/Makefile
+++ b/drivers/rtc/Makefile
@@ -6,6 +6,7 @@ ccflags-$(CONFIG_RTC_DEBUG)	:= -DDEBUG
 
 obj-$(CONFIG_RTC_LIB)		+= rtc-lib.o
 obj-$(CONFIG_RTC_HCTOSYS)	+= hctosys.o
+obj-$(CONFIG_RTC_SYSTOHC)	+= systohc.o
 obj-$(CONFIG_RTC_CLASS)		+= rtc-core.o
 rtc-core-y			:= class.o interface.o
 
diff --git a/drivers/rtc/systohc.c b/drivers/rtc/systohc.c
new file mode 100644
index 000000000000..bf3e242ccc5c
--- /dev/null
+++ b/drivers/rtc/systohc.c
@@ -0,0 +1,44 @@
+/*
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ */
+#include <linux/rtc.h>
+#include <linux/time.h>
+
+/**
+ * rtc_set_ntp_time - Save NTP synchronized time to the RTC
+ * @now: Current time of day
+ *
+ * Replacement for the NTP platform function update_persistent_clock
+ * that stores time for later retrieval by rtc_hctosys.
+ *
+ * Returns 0 on successful RTC update, -ENODEV if a RTC update is not
+ * possible at all, and various other -errno for specific temporary failure
+ * cases.
+ *
+ * If temporary failure is indicated the caller should try again 'soon'
+ */
+int rtc_set_ntp_time(struct timespec now)
+{
+	struct rtc_device *rtc;
+	struct rtc_time tm;
+	int err = -ENODEV;
+
+	if (now.tv_nsec < (NSEC_PER_SEC >> 1))
+		rtc_time_to_tm(now.tv_sec, &tm);
+	else
+		rtc_time_to_tm(now.tv_sec + 1, &tm);
+
+	rtc = rtc_class_open(CONFIG_RTC_HCTOSYS_DEVICE);
+	if (rtc) {
+		/* rtc_hctosys exclusively uses UTC, so we call set_time here,
+		 * not set_mmss. */
+		if (rtc->ops && (rtc->ops->set_time || rtc->ops->set_mmss))
+			err = rtc_set_time(rtc, &tm);
+		rtc_class_close(rtc);
+	}
+
+	return err;
+}
diff --git a/include/linux/rtc.h b/include/linux/rtc.h
index 9531845c419f..11d05f9fe8b6 100644
--- a/include/linux/rtc.h
+++ b/include/linux/rtc.h
@@ -138,6 +138,7 @@ extern void rtc_device_unregister(struct rtc_device *rtc);
 extern int rtc_read_time(struct rtc_device *rtc, struct rtc_time *tm);
 extern int rtc_set_time(struct rtc_device *rtc, struct rtc_time *tm);
 extern int rtc_set_mmss(struct rtc_device *rtc, unsigned long secs);
+extern int rtc_set_ntp_time(struct timespec now);
 int __rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm);
 extern int rtc_read_alarm(struct rtc_device *rtc,
 			struct rtc_wkalrm *alrm);
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 24174b4d669b..313b161504b7 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -15,6 +15,7 @@
 #include <linux/time.h>
 #include <linux/mm.h>
 #include <linux/module.h>
+#include <linux/rtc.h>
 
 #include "tick-internal.h"
 
@@ -483,8 +484,7 @@ out:
 	return leap;
 }
 
-#ifdef CONFIG_GENERIC_CMOS_UPDATE
-
+#if defined(CONFIG_GENERIC_CMOS_UPDATE) || defined(CONFIG_RTC_SYSTOHC)
 static void sync_cmos_clock(struct work_struct *work);
 
 static DECLARE_DELAYED_WORK(sync_cmos_work, sync_cmos_clock);
@@ -510,14 +510,22 @@ static void sync_cmos_clock(struct work_struct *work)
 	}
 
 	getnstimeofday(&now);
-	if (abs(now.tv_nsec - (NSEC_PER_SEC / 2)) <= tick_nsec / 2)
+	if (abs(now.tv_nsec - (NSEC_PER_SEC / 2)) <= tick_nsec / 2) {
+		fail = -ENODEV;
+#ifdef CONFIG_GENERIC_CMOS_UPDATE
 		fail = update_persistent_clock(now);
+#endif
+#ifdef CONFIG_RTC_SYSTOHC
+		if (fail == -ENODEV)
+			fail = rtc_set_ntp_time(now);
+#endif
+	}
 
 	next.tv_nsec = (NSEC_PER_SEC / 2) - now.tv_nsec - (TICK_NSEC / 2);
 	if (next.tv_nsec <= 0)
 		next.tv_nsec += NSEC_PER_SEC;
 
-	if (!fail)
+	if (!fail || fail == -ENODEV)
 		next.tv_sec = 659;
 	else
 		next.tv_sec = 0;
-- 
cgit v1.2.3


From 31ade30692dc9680bfc95700d794818fa3f754ac Mon Sep 17 00:00:00 2001
From: Feng Tang <feng.tang@intel.com>
Date: Wed, 16 Jan 2013 00:09:47 +0800
Subject: timekeeping: Add persistent_clock_exist flag

In current kernel, there are several places which need to check
whether there is a persistent clock for the platform. Current check
is done by calling the read_persistent_clock() and validating its
return value.

So one optimization is to do the check only once in timekeeping_init(),
and use a flag persistent_clock_exist to record it.

v2: Add a has_persistent_clock() helper function, as suggested by John.

Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: John Stultz <john.stultz@linaro.org>
Signed-off-by: Feng Tang <feng.tang@intel.com>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 include/linux/time.h      |  6 ++++++
 kernel/time/timekeeping.c | 16 +++++++++++-----
 2 files changed, 17 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/time.h b/include/linux/time.h
index 0015aea4c4a7..dfbc4e82e8ba 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -115,6 +115,12 @@ static inline bool timespec_valid_strict(const struct timespec *ts)
 	return true;
 }
 
+extern bool persistent_clock_exist;
+static inline bool has_persistent_clock(void)
+{
+	return persistent_clock_exist;
+}
+
 extern void read_persistent_clock(struct timespec *ts);
 extern void read_boot_clock(struct timespec *ts);
 extern int update_persistent_clock(struct timespec now);
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index dfc7f87eb332..b7a584177618 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -28,6 +28,9 @@ static struct timekeeper timekeeper;
 /* flag for if timekeeping is suspended */
 int __read_mostly timekeeping_suspended;
 
+/* Flag for if there is a persistent clock on this platform */
+bool __read_mostly persistent_clock_exist = false;
+
 static inline void tk_normalize_xtime(struct timekeeper *tk)
 {
 	while (tk->xtime_nsec >= ((u64)NSEC_PER_SEC << tk->shift)) {
@@ -609,12 +612,14 @@ void __init timekeeping_init(void)
 	struct timespec now, boot, tmp;
 
 	read_persistent_clock(&now);
+
 	if (!timespec_valid_strict(&now)) {
 		pr_warn("WARNING: Persistent clock returned invalid value!\n"
 			"         Check your CMOS/BIOS settings.\n");
 		now.tv_sec = 0;
 		now.tv_nsec = 0;
-	}
+	} else if (now.tv_sec || now.tv_nsec)
+		persistent_clock_exist = true;
 
 	read_boot_clock(&boot);
 	if (!timespec_valid_strict(&boot)) {
@@ -687,11 +692,12 @@ void timekeeping_inject_sleeptime(struct timespec *delta)
 {
 	struct timekeeper *tk = &timekeeper;
 	unsigned long flags;
-	struct timespec ts;
 
-	/* Make sure we don't set the clock twice */
-	read_persistent_clock(&ts);
-	if (!(ts.tv_sec == 0 && ts.tv_nsec == 0))
+	/*
+	 * Make sure we don't set the clock twice, as timekeeping_resume()
+	 * already did it
+	 */
+	if (has_persistent_clock())
 		return;
 
 	write_seqlock_irqsave(&tk->lock, flags);
-- 
cgit v1.2.3


From 05ad717c77b1b8e98a1dd768c3700036d634629e Mon Sep 17 00:00:00 2001
From: Feng Tang <feng.tang@intel.com>
Date: Wed, 16 Jan 2013 00:09:49 +0800
Subject: timekeeping: Add CONFIG_HAS_PERSISTENT_CLOCK option

Make the persistent clock check a kernel config option, so that some
platform can explicitely select it, also make CONFIG_RTC_HCTOSYS and
RTC_SYSTOHC depend on its non-existence, which could prevent the
persistent clock and RTC code from doing similar thing twice during
system's init/suspend/resume phases.

If the CONFIG_HAS_PERSISTENT_CLOCK=n, then no change happens for kernel
which still does the persistent clock check in timekeeping_init().

Cc: Thomas Gleixner <tglx@linutronix.de>
Suggested-by: John Stultz <john.stultz@linaro.org>
Signed-off-by: Feng Tang <feng.tang@intel.com>
[jstultz: Added dependency for RTC_SYSTOHC as well]
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 drivers/rtc/Kconfig  | 2 ++
 include/linux/time.h | 5 +++++
 kernel/time/Kconfig  | 5 +++++
 3 files changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index b377e9672497..05761de7929b 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -20,6 +20,7 @@ if RTC_CLASS
 config RTC_HCTOSYS
 	bool "Set system time from RTC on startup and resume"
 	default y
+	depends on !HAS_PERSISTENT_CLOCK
 	help
 	  If you say yes here, the system time (wall clock) will be set using
 	  the value read from a specified RTC device. This is useful to avoid
@@ -28,6 +29,7 @@ config RTC_HCTOSYS
 config RTC_SYSTOHC
 	bool "Set the RTC time based on NTP synchronization"
 	default y
+	depends on !HAS_PERSISTENT_CLOCK
 	help
 	  If you say yes here, the system time (wall clock) will be stored
 	  in the RTC specified by RTC_HCTOSYS_DEVICE approximately every 11
diff --git a/include/linux/time.h b/include/linux/time.h
index dfbc4e82e8ba..369b6e3b87d8 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -116,10 +116,15 @@ static inline bool timespec_valid_strict(const struct timespec *ts)
 }
 
 extern bool persistent_clock_exist;
+
+#ifdef CONFIG_HAS_PERSISTENT_CLOCK
+#define has_persistent_clock()	true
+#else
 static inline bool has_persistent_clock(void)
 {
 	return persistent_clock_exist;
 }
+#endif
 
 extern void read_persistent_clock(struct timespec *ts);
 extern void read_boot_clock(struct timespec *ts);
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig
index 8601f0db1261..f7e45b9b142b 100644
--- a/kernel/time/Kconfig
+++ b/kernel/time/Kconfig
@@ -12,6 +12,11 @@ config CLOCKSOURCE_WATCHDOG
 config ARCH_CLOCKSOURCE_DATA
 	bool
 
+# Platforms has a persistent clock
+config HAS_PERSISTENT_CLOCK
+	bool
+	default n
+
 # Timekeeping vsyscall support
 config GENERIC_TIME_VSYSCALL
 	bool
-- 
cgit v1.2.3


From 7266507d89991fa1e989283e4e032c6d9357fe26 Mon Sep 17 00:00:00 2001
From: Kevin Cernekee <cernekee@gmail.com>
Date: Mon, 17 Dec 2012 18:33:58 +0000
Subject: netfilter: nf_ct_sip: support Cisco 7941/7945 IP phones

Most SIP devices use a source port of 5060/udp on SIP requests, so the
response automatically comes back to port 5060:

    phone_ip:5060 -> proxy_ip:5060   REGISTER
    proxy_ip:5060 -> phone_ip:5060   100 Trying

The newer Cisco IP phones, however, use a randomly chosen high source
port for the SIP request but expect the response on port 5060:

    phone_ip:49173 -> proxy_ip:5060  REGISTER
    proxy_ip:5060 -> phone_ip:5060   100 Trying

Standard Linux NAT, with or without nf_nat_sip, will send the reply back
to port 49173, not 5060:

    phone_ip:49173 -> proxy_ip:5060  REGISTER
    proxy_ip:5060 -> phone_ip:49173  100 Trying

But the phone is not listening on 49173, so it will never see the reply.

This patch modifies nf_*_sip to work around this quirk by extracting
the SIP response port from the Via: header, iff the source IP in the
packet header matches the source IP in the SIP request.

Signed-off-by: Kevin Cernekee <cernekee@gmail.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Patrick McHardy <kaber@trash.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/nf_conntrack_sip.h |  3 +++
 net/netfilter/nf_conntrack_sip.c           | 17 +++++++++++++++++
 net/netfilter/nf_nat_sip.c                 | 27 ++++++++++++++++++++++++---
 3 files changed, 44 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/nf_conntrack_sip.h b/include/linux/netfilter/nf_conntrack_sip.h
index 387bdd02945d..ba7f571a2b1c 100644
--- a/include/linux/netfilter/nf_conntrack_sip.h
+++ b/include/linux/netfilter/nf_conntrack_sip.h
@@ -4,12 +4,15 @@
 
 #include <net/netfilter/nf_conntrack_expect.h>
 
+#include <linux/types.h>
+
 #define SIP_PORT	5060
 #define SIP_TIMEOUT	3600
 
 struct nf_ct_sip_master {
 	unsigned int	register_cseq;
 	unsigned int	invite_cseq;
+	__be16		forced_dport;
 };
 
 enum sip_expectation_classes {
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index df8f4f284481..72a67bbe3518 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -1440,8 +1440,25 @@ static int process_sip_request(struct sk_buff *skb, unsigned int protoff,
 {
 	enum ip_conntrack_info ctinfo;
 	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+	struct nf_ct_sip_master *ct_sip_info = nfct_help_data(ct);
+	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
 	unsigned int matchoff, matchlen;
 	unsigned int cseq, i;
+	union nf_inet_addr addr;
+	__be16 port;
+
+	/* Many Cisco IP phones use a high source port for SIP requests, but
+	 * listen for the response on port 5060.  If we are the local
+	 * router for one of these phones, save the port number from the
+	 * Via: header so that nf_nat_sip can redirect the responses to
+	 * the correct port.
+	 */
+	if (ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen,
+				    SIP_HDR_VIA_UDP, NULL, &matchoff,
+				    &matchlen, &addr, &port) > 0 &&
+	    port != ct->tuplehash[dir].tuple.src.u.udp.port &&
+	    nf_inet_addr_cmp(&addr, &ct->tuplehash[dir].tuple.src.u3))
+		ct_sip_info->forced_dport = port;
 
 	for (i = 0; i < ARRAY_SIZE(sip_handlers); i++) {
 		const struct sip_handler *handler;
diff --git a/net/netfilter/nf_nat_sip.c b/net/netfilter/nf_nat_sip.c
index 16303c752213..5951146e7688 100644
--- a/net/netfilter/nf_nat_sip.c
+++ b/net/netfilter/nf_nat_sip.c
@@ -95,6 +95,7 @@ static int map_addr(struct sk_buff *skb, unsigned int protoff,
 	enum ip_conntrack_info ctinfo;
 	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
 	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+	struct nf_ct_sip_master *ct_sip_info = nfct_help_data(ct);
 	char buffer[INET6_ADDRSTRLEN + sizeof("[]:nnnnn")];
 	unsigned int buflen;
 	union nf_inet_addr newaddr;
@@ -107,7 +108,8 @@ static int map_addr(struct sk_buff *skb, unsigned int protoff,
 	} else if (nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3, addr) &&
 		   ct->tuplehash[dir].tuple.dst.u.udp.port == port) {
 		newaddr = ct->tuplehash[!dir].tuple.src.u3;
-		newport = ct->tuplehash[!dir].tuple.src.u.udp.port;
+		newport = ct_sip_info->forced_dport ? :
+			  ct->tuplehash[!dir].tuple.src.u.udp.port;
 	} else
 		return 1;
 
@@ -144,6 +146,7 @@ static unsigned int nf_nat_sip(struct sk_buff *skb, unsigned int protoff,
 	enum ip_conntrack_info ctinfo;
 	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
 	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+	struct nf_ct_sip_master *ct_sip_info = nfct_help_data(ct);
 	unsigned int coff, matchoff, matchlen;
 	enum sip_header_types hdr;
 	union nf_inet_addr addr;
@@ -258,6 +261,21 @@ next:
 	    !map_sip_addr(skb, protoff, dataoff, dptr, datalen, SIP_HDR_TO))
 		return NF_DROP;
 
+	/* Mangle destination port for Cisco phones, then fix up checksums */
+	if (dir == IP_CT_DIR_REPLY && ct_sip_info->forced_dport) {
+		struct udphdr *uh;
+
+		if (!skb_make_writable(skb, skb->len))
+			return NF_DROP;
+
+		uh = (void *)skb->data + protoff;
+		uh->dest = ct_sip_info->forced_dport;
+
+		if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo, protoff,
+					      0, 0, NULL, 0))
+			return NF_DROP;
+	}
+
 	return NF_ACCEPT;
 }
 
@@ -311,8 +329,10 @@ static unsigned int nf_nat_sip_expect(struct sk_buff *skb, unsigned int protoff,
 	enum ip_conntrack_info ctinfo;
 	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
 	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+	struct nf_ct_sip_master *ct_sip_info = nfct_help_data(ct);
 	union nf_inet_addr newaddr;
 	u_int16_t port;
+	__be16 srcport;
 	char buffer[INET6_ADDRSTRLEN + sizeof("[]:nnnnn")];
 	unsigned int buflen;
 
@@ -326,8 +346,9 @@ static unsigned int nf_nat_sip_expect(struct sk_buff *skb, unsigned int protoff,
 	/* If the signalling port matches the connection's source port in the
 	 * original direction, try to use the destination port in the opposite
 	 * direction. */
-	if (exp->tuple.dst.u.udp.port ==
-	    ct->tuplehash[dir].tuple.src.u.udp.port)
+	srcport = ct_sip_info->forced_dport ? :
+		  ct->tuplehash[dir].tuple.src.u.udp.port;
+	if (exp->tuple.dst.u.udp.port == srcport)
 		port = ntohs(ct->tuplehash[!dir].tuple.dst.u.udp.port);
 	else
 		port = ntohs(exp->tuple.dst.u.udp.port);
-- 
cgit v1.2.3


From f868ef995b32f0f08d7b6ce700bbe25de3ad65ac Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@free-electrons.com>
Date: Tue, 8 Jan 2013 22:43:12 +0100
Subject: pinctrl: pinconf-generic: add drive strength parameter

Some pin configurations IP allows to set the current output to the pin.
This patch adds such a parameter to the pinconf-generic mechanism.

This parameter takes as argument the drive strength in mA.

Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/pinctrl/pinconf-generic.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pinctrl/pinconf-generic.h b/include/linux/pinctrl/pinconf-generic.h
index b23d99da3b4b..40d7bb9c7562 100644
--- a/include/linux/pinctrl/pinconf-generic.h
+++ b/include/linux/pinctrl/pinconf-generic.h
@@ -46,6 +46,8 @@
  * @PIN_CONFIG_DRIVE_OPEN_SOURCE: the pin will be driven with open source
  *	(open emitter). Sending this config will enabale open drain mode, the
  *	argument is ignored.
+ * @PIN_CONFIG_DRIVE_STRENGTH: the pin will output the current passed as
+ * 	argument. The argument is in mA.
  * @PIN_CONFIG_INPUT_SCHMITT_DISABLE: disable schmitt-trigger mode on the pin.
  * @PIN_CONFIG_INPUT_SCHMITT: this will configure an input pin to run in
  *	schmitt-trigger mode. If the schmitt-trigger has adjustable hysteresis,
@@ -76,6 +78,7 @@ enum pin_config_param {
 	PIN_CONFIG_DRIVE_PUSH_PULL,
 	PIN_CONFIG_DRIVE_OPEN_DRAIN,
 	PIN_CONFIG_DRIVE_OPEN_SOURCE,
+	PIN_CONFIG_DRIVE_STRENGTH,
 	PIN_CONFIG_INPUT_SCHMITT_DISABLE,
 	PIN_CONFIG_INPUT_SCHMITT,
 	PIN_CONFIG_INPUT_DEBOUNCE,
-- 
cgit v1.2.3


From bb813f4c933ae9f887a014483690d5f8b8ec05e1 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 18 Jan 2013 14:05:56 -0800
Subject: init, block: try to load default elevator module early during boot

This patch adds default module loading and uses it to load the default
block elevator.  During boot, it's called right after initramfs or
initrd is made available and right before control is passed to
userland.  This ensures that as long as the modules are available in
the usual places in initramfs, initrd or the root filesystem, the
default modules are loaded as soon as possible.

This will replace the on-demand elevator module loading from elevator
init path.

v2: Fixed build breakage when !CONFIG_BLOCK.  Reported by kbuild test
    robot.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Alex Riesen <raa.lkml@gmail.com>
Cc: Fengguang We <fengguang.wu@intel.com>
---
 block/elevator.c         | 16 ++++++++++++++++
 include/linux/elevator.h |  5 +++++
 include/linux/init.h     |  1 +
 init/do_mounts_initrd.c  |  3 +++
 init/initramfs.c         |  8 +++++++-
 init/main.c              | 16 ++++++++++++++++
 6 files changed, 48 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/block/elevator.c b/block/elevator.c
index 9edba1b8323e..c2d61d56e0b7 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -136,6 +136,22 @@ static int __init elevator_setup(char *str)
 
 __setup("elevator=", elevator_setup);
 
+/* called during boot to load the elevator chosen by the elevator param */
+void __init load_default_elevator_module(void)
+{
+	struct elevator_type *e;
+
+	if (!chosen_elevator[0])
+		return;
+
+	spin_lock(&elv_list_lock);
+	e = elevator_find(chosen_elevator);
+	spin_unlock(&elv_list_lock);
+
+	if (!e)
+		request_module("%s-iosched", chosen_elevator);
+}
+
 static struct kobj_type elv_ktype;
 
 static struct elevator_queue *elevator_alloc(struct request_queue *q,
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index c03af7687bb4..186620631750 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -138,6 +138,7 @@ extern void elv_drain_elevator(struct request_queue *);
 /*
  * io scheduler registration
  */
+extern void __init load_default_elevator_module(void);
 extern int elv_register(struct elevator_type *);
 extern void elv_unregister(struct elevator_type *);
 
@@ -206,5 +207,9 @@ enum {
 	INIT_LIST_HEAD(&(rq)->csd.list);	\
 	} while (0)
 
+#else /* CONFIG_BLOCK */
+
+static inline void load_default_elevator_module(void) { }
+
 #endif /* CONFIG_BLOCK */
 #endif
diff --git a/include/linux/init.h b/include/linux/init.h
index a799273714ac..9230c9408d8b 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -161,6 +161,7 @@ extern unsigned int reset_devices;
 /* used by init/main.c */
 void setup_arch(char **);
 void prepare_namespace(void);
+void __init load_default_modules(void);
 
 extern void (*late_time_init)(void);
 
diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c
index 5e4ded51788e..dfe606a7dd61 100644
--- a/init/do_mounts_initrd.c
+++ b/init/do_mounts_initrd.c
@@ -57,6 +57,9 @@ static void __init handle_initrd(void)
 	sys_mkdir("/old", 0700);
 	sys_chdir("/old");
 
+	/* try loading default modules from initrd */
+	load_default_modules();
+
 	/*
 	 * In case that a resume from disk is carried out by linuxrc or one of
 	 * its children, we need to tell the freezer not to wait for us.
diff --git a/init/initramfs.c b/init/initramfs.c
index 84c6bf111300..a67ef9dbda9d 100644
--- a/init/initramfs.c
+++ b/init/initramfs.c
@@ -592,7 +592,7 @@ static int __init populate_rootfs(void)
 			initrd_end - initrd_start);
 		if (!err) {
 			free_initrd();
-			return 0;
+			goto done;
 		} else {
 			clean_rootfs();
 			unpack_to_rootfs(__initramfs_start, __initramfs_size);
@@ -607,6 +607,7 @@ static int __init populate_rootfs(void)
 			sys_close(fd);
 			free_initrd();
 		}
+	done:
 #else
 		printk(KERN_INFO "Unpacking initramfs...\n");
 		err = unpack_to_rootfs((char *)initrd_start,
@@ -615,6 +616,11 @@ static int __init populate_rootfs(void)
 			printk(KERN_EMERG "Initramfs unpacking failed: %s\n", err);
 		free_initrd();
 #endif
+		/*
+		 * Try loading default modules from initramfs.  This gives
+		 * us a chance to load before device_initcalls.
+		 */
+		load_default_modules();
 	}
 	return 0;
 }
diff --git a/init/main.c b/init/main.c
index baf1f0f5c461..18efadb11cf6 100644
--- a/init/main.c
+++ b/init/main.c
@@ -70,6 +70,8 @@
 #include <linux/perf_event.h>
 #include <linux/file.h>
 #include <linux/ptrace.h>
+#include <linux/blkdev.h>
+#include <linux/elevator.h>
 
 #include <asm/io.h>
 #include <asm/bugs.h>
@@ -794,6 +796,17 @@ static void __init do_pre_smp_initcalls(void)
 		do_one_initcall(*fn);
 }
 
+/*
+ * This function requests modules which should be loaded by default and is
+ * called twice right after initrd is mounted and right before init is
+ * exec'd.  If such modules are on either initrd or rootfs, they will be
+ * loaded before control is passed to userland.
+ */
+void __init load_default_modules(void)
+{
+	load_default_elevator_module();
+}
+
 static int run_init_process(const char *init_filename)
 {
 	argv_init[0] = init_filename;
@@ -898,4 +911,7 @@ static void __init kernel_init_freeable(void)
 	 * we're essentially up and running. Get rid of the
 	 * initmem segments and start the user-mode stuff..
 	 */
+
+	/* rootfs is available now, try loading default modules */
+	load_default_modules();
 }
-- 
cgit v1.2.3


From 84b233adcca3cacd5cfa8013a5feda7a3db4a9af Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 18 Jan 2013 14:05:56 -0800
Subject: workqueue: implement current_is_async()

This function queries whether %current is an async worker executing an
async item.  This will be used to implement warning on synchronous
request_module() from async workers.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/async.h       |  1 +
 kernel/async.c              | 14 ++++++++++++++
 kernel/workqueue_internal.h | 11 +++++++++++
 3 files changed, 26 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/async.h b/include/linux/async.h
index 7a24fe9b44b4..345169cfa304 100644
--- a/include/linux/async.h
+++ b/include/linux/async.h
@@ -52,4 +52,5 @@ extern void async_synchronize_full_domain(struct async_domain *domain);
 extern void async_synchronize_cookie(async_cookie_t cookie);
 extern void async_synchronize_cookie_domain(async_cookie_t cookie,
 					    struct async_domain *domain);
+extern bool current_is_async(void);
 #endif
diff --git a/kernel/async.c b/kernel/async.c
index 9d3118384858..d9bf2a9b5cee 100644
--- a/kernel/async.c
+++ b/kernel/async.c
@@ -57,6 +57,8 @@ asynchronous and synchronous parts of the kernel.
 #include <linux/slab.h>
 #include <linux/workqueue.h>
 
+#include "workqueue_internal.h"
+
 static async_cookie_t next_cookie = 1;
 
 #define MAX_WORK	32768
@@ -337,3 +339,15 @@ void async_synchronize_cookie(async_cookie_t cookie)
 	async_synchronize_cookie_domain(cookie, &async_running);
 }
 EXPORT_SYMBOL_GPL(async_synchronize_cookie);
+
+/**
+ * current_is_async - is %current an async worker task?
+ *
+ * Returns %true if %current is an async worker task.
+ */
+bool current_is_async(void)
+{
+	struct worker *worker = current_wq_worker();
+
+	return worker && worker->current_func == async_run_entry_fn;
+}
diff --git a/kernel/workqueue_internal.h b/kernel/workqueue_internal.h
index 02549fa04587..cc35e7e62091 100644
--- a/kernel/workqueue_internal.h
+++ b/kernel/workqueue_internal.h
@@ -8,6 +8,7 @@
 #define _KERNEL_WORKQUEUE_INTERNAL_H
 
 #include <linux/workqueue.h>
+#include <linux/kthread.h>
 
 struct global_cwq;
 struct worker_pool;
@@ -44,6 +45,16 @@ struct worker {
 	struct workqueue_struct	*rescue_wq;	/* I: the workqueue to rescue */
 };
 
+/**
+ * current_wq_worker - return struct worker if %current is a workqueue worker
+ */
+static inline struct worker *current_wq_worker(void)
+{
+	if (current->flags & PF_WQ_WORKER)
+		return kthread_data(current);
+	return NULL;
+}
+
 /*
  * Scheduler hooks for concurrency managed workqueue.  Only to be used from
  * sched.c and workqueue.c.
-- 
cgit v1.2.3


From d79beb39922e41083e8bbbb3de084a6ca958e25f Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Sat, 19 Jan 2013 14:29:31 +0100
Subject: ACPI / PM: Fix build for unusual combination of Kconfig options

CONFIG_PM_SLEEP may be set even if CONFIG_ACPI_SLEEP is unset,
although that is unusual.  For this reason, make the headers of
functions built for both CONFIG_ACPI and CONFIG_PM_SLEEP set
simultaneously depend on that combination of Kconfig options
instead of CONFIG_ACPI_SLEEP.

This fixes a build problem reported by Randy Dunlap.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/acpi.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 8c1d6f2a2193..5f2be833aaf8 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -511,7 +511,7 @@ static inline int acpi_subsys_runtime_suspend(struct device *dev) { return 0; }
 static inline int acpi_subsys_runtime_resume(struct device *dev) { return 0; }
 #endif
 
-#ifdef CONFIG_ACPI_SLEEP
+#if defined(CONFIG_ACPI) && defined(CONFIG_PM_SLEEP)
 int acpi_dev_suspend_late(struct device *dev);
 int acpi_dev_resume_early(struct device *dev);
 int acpi_subsys_prepare(struct device *dev);
-- 
cgit v1.2.3


From d6e99abb424a916ecbb127dba065a379b460a062 Mon Sep 17 00:00:00 2001
From: Haojian Zhuang <haojian.zhuang@linaro.org>
Date: Fri, 18 Jan 2013 15:31:06 +0800
Subject: pinctrl: core: get devname from pinctrl_dev

Add new function to get devname from pinctrl_dev. pinctrl_dev_get_name()
can only get pinctrl description name. If we want to use gpio driver to
find pinctrl device node, we need to fetch the pinctrl device name.

Signed-off-by: Haojian Zhuang <haojian.zhuang@linaro.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/core.c          | 6 ++++++
 include/linux/pinctrl/pinctrl.h | 1 +
 2 files changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c
index 361175372598..e6373f30aeeb 100644
--- a/drivers/pinctrl/core.c
+++ b/drivers/pinctrl/core.c
@@ -83,6 +83,12 @@ const char *pinctrl_dev_get_name(struct pinctrl_dev *pctldev)
 }
 EXPORT_SYMBOL_GPL(pinctrl_dev_get_name);
 
+const char *pinctrl_dev_get_devname(struct pinctrl_dev *pctldev)
+{
+	return dev_name(pctldev->dev);
+}
+EXPORT_SYMBOL_GPL(pinctrl_dev_get_devname);
+
 void *pinctrl_dev_get_drvdata(struct pinctrl_dev *pctldev)
 {
 	return pctldev->driver_data;
diff --git a/include/linux/pinctrl/pinctrl.h b/include/linux/pinctrl/pinctrl.h
index 04d6700d99af..778804df293f 100644
--- a/include/linux/pinctrl/pinctrl.h
+++ b/include/linux/pinctrl/pinctrl.h
@@ -154,6 +154,7 @@ struct pinctrl_dev *of_pinctrl_get(struct device_node *np)
 #endif /* CONFIG_OF */
 
 extern const char *pinctrl_dev_get_name(struct pinctrl_dev *pctldev);
+extern const char *pinctrl_dev_get_devname(struct pinctrl_dev *pctldev);
 extern void *pinctrl_dev_get_drvdata(struct pinctrl_dev *pctldev);
 #else
 
-- 
cgit v1.2.3


From 684697cbbcd076b8fde78d8863e341700533b542 Mon Sep 17 00:00:00 2001
From: Haojian Zhuang <haojian.zhuang@linaro.org>
Date: Fri, 18 Jan 2013 15:31:15 +0800
Subject: pinctrl: generic: add slew rate config parameter

Add PIN_CONFIG_SLEW_RATE parameter into pinconf-generic driver.

Signed-off-by: Haojian Zhuang <haojian.zhuang@linaro.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/pinconf-generic.c       | 1 +
 include/linux/pinctrl/pinconf-generic.h | 4 ++++
 2 files changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/pinctrl/pinconf-generic.c b/drivers/pinctrl/pinconf-generic.c
index bcf8157ceea7..e5948f8172af 100644
--- a/drivers/pinctrl/pinconf-generic.c
+++ b/drivers/pinctrl/pinconf-generic.c
@@ -45,6 +45,7 @@ struct pin_config_item conf_items[] = {
 	PCONFDUMP(PIN_CONFIG_INPUT_SCHMITT, "input schmitt trigger", NULL),
 	PCONFDUMP(PIN_CONFIG_INPUT_DEBOUNCE, "input debounce", "time units"),
 	PCONFDUMP(PIN_CONFIG_POWER_SOURCE, "pin power source", "selector"),
+	PCONFDUMP(PIN_CONFIG_SLEW_RATE, "slew rate", NULL),
 	PCONFDUMP(PIN_CONFIG_LOW_POWER_MODE, "pin low power", "mode"),
 	PCONFDUMP(PIN_CONFIG_OUTPUT, "pin output", "level"),
 };
diff --git a/include/linux/pinctrl/pinconf-generic.h b/include/linux/pinctrl/pinconf-generic.h
index 40d7bb9c7562..3e7909aa5c03 100644
--- a/include/linux/pinctrl/pinconf-generic.h
+++ b/include/linux/pinctrl/pinconf-generic.h
@@ -60,6 +60,9 @@
  * @PIN_CONFIG_POWER_SOURCE: if the pin can select between different power
  *	supplies, the argument to this parameter (on a custom format) tells
  *	the driver which alternative power source to use.
+ * @PIN_CONFIG_SLEW_RATE: if the pin can select slew rate, the argument to
+ * 	this parameter (on a custom format) tells the driver which alternative
+ * 	slew rate to use.
  * @PIN_CONFIG_LOW_POWER_MODE: this will configure the pin for low power
  *	operation, if several modes of operation are supported these can be
  *	passed in the argument on a custom form, else just use argument 1
@@ -83,6 +86,7 @@ enum pin_config_param {
 	PIN_CONFIG_INPUT_SCHMITT,
 	PIN_CONFIG_INPUT_DEBOUNCE,
 	PIN_CONFIG_POWER_SOURCE,
+	PIN_CONFIG_SLEW_RATE,
 	PIN_CONFIG_LOW_POWER_MODE,
 	PIN_CONFIG_OUTPUT,
 	PIN_CONFIG_END = 0x7FFF,
-- 
cgit v1.2.3


From 418c59e49ddc77fcb7054f2c8d52c9d47403b43e Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Fri, 14 Dec 2012 13:15:08 -0500
Subject: tracing: Fix sparse warning with is_signed_type() macro

Sparse complains when is_signed_type() is used on a pointer.
This macro is needed for the format output used for ftrace
and perf, to know if a binary field is a signed type or not.
The is_signed_type() macro is used against all fields that are
recorded by events to automate the operation.

The problem sparse has is with the current way is_signed_type()
works:

  ((type)-1 < 0)

If "type" is a poiner, than sparse does not like it being compared
to an integer (zero). The simple fix is to just give zero the
same type. The runtime result stays the same.

Reported-by: Robert Jarzmik <robert.jarzmik@free.fr>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace_event.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index a3d489531d83..43ef8b6cce7f 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -272,7 +272,7 @@ extern int trace_define_field(struct ftrace_event_call *call, const char *type,
 extern int trace_add_event_call(struct ftrace_event_call *call);
 extern void trace_remove_event_call(struct ftrace_event_call *call);
 
-#define is_signed_type(type)	(((type)(-1)) < 0)
+#define is_signed_type(type)	(((type)(-1)) < (type)0)
 
 int trace_set_clr_event(const char *system, const char *event, int set);
 
-- 
cgit v1.2.3


From 0f1ac8fd254b6c3e77950a1c4ee67be5dc88f7e0 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Tue, 15 Jan 2013 22:11:19 -0500
Subject: tracing/lockdep: Disable lockdep first in entering NMI

When function tracing with either debug locks enabled or tracing
preempt disabled, the add_preempt_count() is traced. This is an
issue with lockdep and function tracing. As function tracing
can disable interrupts, and lockdep records that change,
lockdep may not be able to handle this recursion if it happens from
an NMI context.

The first thing that an NMI does is:

 #define nmi_enter()					\
	do {							\
		ftrace_nmi_enter();				\
		BUG_ON(in_nmi());				\
		add_preempt_count(NMI_OFFSET + HARDIRQ_OFFSET);	\
		lockdep_off();					\
		rcu_nmi_enter();				\
		trace_hardirq_enter();				\
	} while (0)

When the add_preempt_count() is traced, and the tracing callback
disables interrupts, it will jump into the lockdep code. There's
some places in lockdep that can't handle this re-entrance, and
causes lockdep to fail.

As the lockdep_off() (and lockdep_on) is a simple:

void lockdep_off(void)
{
	current->lockdep_recursion++;
}

and is never traced, it can be called first in nmi_enter()
and lockdep_on() last in nmi_exit().

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/hardirq.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index 624ef3f45c8e..57bfdce8fb90 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -180,10 +180,10 @@ extern void irq_exit(void);
 
 #define nmi_enter()						\
 	do {							\
+		lockdep_off();					\
 		ftrace_nmi_enter();				\
 		BUG_ON(in_nmi());				\
 		add_preempt_count(NMI_OFFSET + HARDIRQ_OFFSET);	\
-		lockdep_off();					\
 		rcu_nmi_enter();				\
 		trace_hardirq_enter();				\
 	} while (0)
@@ -192,10 +192,10 @@ extern void irq_exit(void);
 	do {							\
 		trace_hardirq_exit();				\
 		rcu_nmi_exit();					\
-		lockdep_on();					\
 		BUG_ON(!in_nmi());				\
 		sub_preempt_count(NMI_OFFSET + HARDIRQ_OFFSET);	\
 		ftrace_nmi_exit();				\
+		lockdep_on();					\
 	} while (0)
 
 #endif /* LINUX_HARDIRQ_H */
-- 
cgit v1.2.3


From 06aeaaeabf69da4a3e86df532425640f51b01cef Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Date: Fri, 28 Sep 2012 17:15:17 +0900
Subject: ftrace: Move ARCH_SUPPORTS_FTRACE_SAVE_REGS in Kconfig

Move SAVE_REGS support flag into Kconfig and rename
it to CONFIG_DYNAMIC_FTRACE_WITH_REGS. This also introduces
CONFIG_HAVE_DYNAMIC_FTRACE_WITH_REGS which indicates
the architecture depending part of ftrace has a code
that saves full registers.
On the other hand, CONFIG_DYNAMIC_FTRACE_WITH_REGS indicates
the code is enabled.

Link: http://lkml.kernel.org/r/20120928081516.3560.72534.stgit@ltc138.sdl.hitachi.co.jp

Cc: Ingo Molnar <mingo@elte.hu>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 arch/x86/Kconfig              | 1 +
 arch/x86/include/asm/ftrace.h | 1 -
 include/linux/ftrace.h        | 6 +++---
 kernel/trace/Kconfig          | 8 ++++++++
 kernel/trace/ftrace.c         | 6 +++---
 kernel/trace/trace_selftest.c | 2 +-
 6 files changed, 16 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 79795af59810..996ccecc694c 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -44,6 +44,7 @@ config X86
 	select HAVE_FENTRY if X86_64
 	select HAVE_C_RECORDMCOUNT
 	select HAVE_DYNAMIC_FTRACE
+	select HAVE_DYNAMIC_FTRACE_WITH_REGS
 	select HAVE_FUNCTION_TRACER
 	select HAVE_FUNCTION_GRAPH_TRACER
 	select HAVE_FUNCTION_GRAPH_FP_TEST
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index 9a25b522d377..86cb51e1ca96 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -44,7 +44,6 @@
 
 #ifdef CONFIG_DYNAMIC_FTRACE
 #define ARCH_SUPPORTS_FTRACE_OPS 1
-#define ARCH_SUPPORTS_FTRACE_SAVE_REGS
 #endif
 
 #ifndef __ASSEMBLY__
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 92691d85c320..e5ca8ef50e9b 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -74,7 +74,7 @@ typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip,
  * SAVE_REGS - The ftrace_ops wants regs saved at each function called
  *            and passed to the callback. If this flag is set, but the
  *            architecture does not support passing regs
- *            (ARCH_SUPPORTS_FTRACE_SAVE_REGS is not defined), then the
+ *            (CONFIG_DYNAMIC_FTRACE_WITH_REGS is not defined), then the
  *            ftrace_ops will fail to register, unless the next flag
  *            is set.
  * SAVE_REGS_IF_SUPPORTED - This is the same as SAVE_REGS, but if the
@@ -418,7 +418,7 @@ void ftrace_modify_all_code(int command);
 #endif
 
 #ifndef FTRACE_REGS_ADDR
-#ifdef ARCH_SUPPORTS_FTRACE_SAVE_REGS
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
 # define FTRACE_REGS_ADDR ((unsigned long)ftrace_regs_caller)
 #else
 # define FTRACE_REGS_ADDR FTRACE_ADDR
@@ -480,7 +480,7 @@ extern int ftrace_make_nop(struct module *mod,
  */
 extern int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr);
 
-#ifdef ARCH_SUPPORTS_FTRACE_SAVE_REGS
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
 /**
  * ftrace_modify_call - convert from one addr to another (no nop)
  * @rec: the mcount call site record
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 5d89335a485f..cdc9d284d24e 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -39,6 +39,9 @@ config HAVE_DYNAMIC_FTRACE
 	help
 	  See Documentation/trace/ftrace-design.txt
 
+config HAVE_DYNAMIC_FTRACE_WITH_REGS
+	bool
+
 config HAVE_FTRACE_MCOUNT_RECORD
 	bool
 	help
@@ -434,6 +437,11 @@ config DYNAMIC_FTRACE
 	  were made. If so, it runs stop_machine (stops all CPUS)
 	  and modifies the code to jump over the call to ftrace.
 
+config DYNAMIC_FTRACE_WITH_REGS
+	def_bool y
+	depends on DYNAMIC_FTRACE
+	depends on HAVE_DYNAMIC_FTRACE_WITH_REGS
+
 config FUNCTION_PROFILER
 	bool "Kernel function profiler"
 	depends on FUNCTION_TRACER
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 41473b4ad7a4..6e34dc162fe1 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -337,7 +337,7 @@ static int __register_ftrace_function(struct ftrace_ops *ops)
 	if ((ops->flags & FL_GLOBAL_CONTROL_MASK) == FL_GLOBAL_CONTROL_MASK)
 		return -EINVAL;
 
-#ifndef ARCH_SUPPORTS_FTRACE_SAVE_REGS
+#ifndef CONFIG_DYNAMIC_FTRACE_WITH_REGS
 	/*
 	 * If the ftrace_ops specifies SAVE_REGS, then it only can be used
 	 * if the arch supports it, or SAVE_REGS_IF_SUPPORTED is also set.
@@ -4143,8 +4143,8 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
  * Archs are to support both the regs and ftrace_ops at the same time.
  * If they support ftrace_ops, it is assumed they support regs.
  * If call backs want to use regs, they must either check for regs
- * being NULL, or ARCH_SUPPORTS_FTRACE_SAVE_REGS.
- * Note, ARCH_SUPPORT_SAVE_REGS expects a full regs to be saved.
+ * being NULL, or CONFIG_DYNAMIC_FTRACE_WITH_REGS.
+ * Note, CONFIG_DYNAMIC_FTRACE_WITH_REGS expects a full regs to be saved.
  * An architecture can pass partial regs with ftrace_ops and still
  * set the ARCH_SUPPORT_FTARCE_OPS.
  */
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 47623169a815..6c62d58d8e87 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -568,7 +568,7 @@ trace_selftest_function_regs(void)
 	int ret;
 	int supported = 0;
 
-#ifdef ARCH_SUPPORTS_FTRACE_SAVE_REGS
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
 	supported = 1;
 #endif
 
-- 
cgit v1.2.3


From e7dbfe349d12eabb7783b117e0c115f6f3d9ef9e Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Date: Fri, 28 Sep 2012 17:15:20 +0900
Subject: kprobes/x86: Move ftrace-based kprobe code into kprobes-ftrace.c

Split ftrace-based kprobes code from kprobes, and introduce
CONFIG_(HAVE_)KPROBES_ON_FTRACE Kconfig flags.
For the cleanup reason, this also moves kprobe_ftrace check
into skip_singlestep.

Link: http://lkml.kernel.org/r/20120928081520.3560.25624.stgit@ltc138.sdl.hitachi.co.jp

Cc: Ingo Molnar <mingo@elte.hu>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 arch/Kconfig                     | 12 ++++++
 arch/x86/Kconfig                 |  1 +
 arch/x86/kernel/Makefile         |  1 +
 arch/x86/kernel/kprobes-common.h | 11 +++++
 arch/x86/kernel/kprobes-ftrace.c | 93 ++++++++++++++++++++++++++++++++++++++++
 arch/x86/kernel/kprobes.c        | 70 +-----------------------------
 include/linux/kprobes.h          | 12 +-----
 kernel/kprobes.c                 |  8 ++--
 8 files changed, 125 insertions(+), 83 deletions(-)
 create mode 100644 arch/x86/kernel/kprobes-ftrace.c

(limited to 'include/linux')

diff --git a/arch/Kconfig b/arch/Kconfig
index 7f8f281f2585..97fb7d0365d1 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -76,6 +76,15 @@ config OPTPROBES
 	depends on KPROBES && HAVE_OPTPROBES
 	depends on !PREEMPT
 
+config KPROBES_ON_FTRACE
+	def_bool y
+	depends on KPROBES && HAVE_KPROBES_ON_FTRACE
+	depends on DYNAMIC_FTRACE_WITH_REGS
+	help
+	 If function tracer is enabled and the arch supports full
+	 passing of pt_regs to function tracing, then kprobes can
+	 optimize on top of function tracing.
+
 config UPROBES
 	bool "Transparent user-space probes (EXPERIMENTAL)"
 	depends on UPROBE_EVENT && PERF_EVENTS
@@ -158,6 +167,9 @@ config HAVE_KRETPROBES
 config HAVE_OPTPROBES
 	bool
 
+config HAVE_KPROBES_ON_FTRACE
+	bool
+
 config HAVE_NMI_WATCHDOG
 	bool
 #
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 996ccecc694c..be8b2b3ab979 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -40,6 +40,7 @@ config X86
 	select HAVE_DMA_CONTIGUOUS if !SWIOTLB
 	select HAVE_KRETPROBES
 	select HAVE_OPTPROBES
+	select HAVE_KPROBES_ON_FTRACE
 	select HAVE_FTRACE_MCOUNT_RECORD
 	select HAVE_FENTRY if X86_64
 	select HAVE_C_RECORDMCOUNT
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 34e923a53762..cc5d31f8830c 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -67,6 +67,7 @@ obj-$(CONFIG_KEXEC)		+= relocate_kernel_$(BITS).o crash.o
 obj-$(CONFIG_CRASH_DUMP)	+= crash_dump_$(BITS).o
 obj-$(CONFIG_KPROBES)		+= kprobes.o
 obj-$(CONFIG_OPTPROBES)		+= kprobes-opt.o
+obj-$(CONFIG_KPROBES_ON_FTRACE)	+= kprobes-ftrace.o
 obj-$(CONFIG_MODULES)		+= module.o
 obj-$(CONFIG_DOUBLEFAULT) 	+= doublefault_32.o
 obj-$(CONFIG_KGDB)		+= kgdb.o
diff --git a/arch/x86/kernel/kprobes-common.h b/arch/x86/kernel/kprobes-common.h
index 3230b68ef29a..2e9d4b5af036 100644
--- a/arch/x86/kernel/kprobes-common.h
+++ b/arch/x86/kernel/kprobes-common.h
@@ -99,4 +99,15 @@ static inline unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsig
 	return addr;
 }
 #endif
+
+#ifdef CONFIG_KPROBES_ON_FTRACE
+extern int skip_singlestep(struct kprobe *p, struct pt_regs *regs,
+			   struct kprobe_ctlblk *kcb);
+#else
+static inline int skip_singlestep(struct kprobe *p, struct pt_regs *regs,
+				  struct kprobe_ctlblk *kcb)
+{
+	return 0;
+}
+#endif
 #endif
diff --git a/arch/x86/kernel/kprobes-ftrace.c b/arch/x86/kernel/kprobes-ftrace.c
new file mode 100644
index 000000000000..70a81c7aa0a7
--- /dev/null
+++ b/arch/x86/kernel/kprobes-ftrace.c
@@ -0,0 +1,93 @@
+/*
+ * Dynamic Ftrace based Kprobes Optimization
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) Hitachi Ltd., 2012
+ */
+#include <linux/kprobes.h>
+#include <linux/ptrace.h>
+#include <linux/hardirq.h>
+#include <linux/preempt.h>
+#include <linux/ftrace.h>
+
+#include "kprobes-common.h"
+
+static int __skip_singlestep(struct kprobe *p, struct pt_regs *regs,
+			     struct kprobe_ctlblk *kcb)
+{
+	/*
+	 * Emulate singlestep (and also recover regs->ip)
+	 * as if there is a 5byte nop
+	 */
+	regs->ip = (unsigned long)p->addr + MCOUNT_INSN_SIZE;
+	if (unlikely(p->post_handler)) {
+		kcb->kprobe_status = KPROBE_HIT_SSDONE;
+		p->post_handler(p, regs, 0);
+	}
+	__this_cpu_write(current_kprobe, NULL);
+	return 1;
+}
+
+int __kprobes skip_singlestep(struct kprobe *p, struct pt_regs *regs,
+			      struct kprobe_ctlblk *kcb)
+{
+	if (kprobe_ftrace(p))
+		return __skip_singlestep(p, regs, kcb);
+	else
+		return 0;
+}
+
+/* Ftrace callback handler for kprobes */
+void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
+				     struct ftrace_ops *ops, struct pt_regs *regs)
+{
+	struct kprobe *p;
+	struct kprobe_ctlblk *kcb;
+	unsigned long flags;
+
+	/* Disable irq for emulating a breakpoint and avoiding preempt */
+	local_irq_save(flags);
+
+	p = get_kprobe((kprobe_opcode_t *)ip);
+	if (unlikely(!p) || kprobe_disabled(p))
+		goto end;
+
+	kcb = get_kprobe_ctlblk();
+	if (kprobe_running()) {
+		kprobes_inc_nmissed_count(p);
+	} else {
+		/* Kprobe handler expects regs->ip = ip + 1 as breakpoint hit */
+		regs->ip = ip + sizeof(kprobe_opcode_t);
+
+		__this_cpu_write(current_kprobe, p);
+		kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+		if (!p->pre_handler || !p->pre_handler(p, regs))
+			__skip_singlestep(p, regs, kcb);
+		/*
+		 * If pre_handler returns !0, it sets regs->ip and
+		 * resets current kprobe.
+		 */
+	}
+end:
+	local_irq_restore(flags);
+}
+
+int __kprobes arch_prepare_kprobe_ftrace(struct kprobe *p)
+{
+	p->ainsn.insn = NULL;
+	p->ainsn.boostable = -1;
+	return 0;
+}
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 57916c0d3cf6..18114bfb10f3 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -541,23 +541,6 @@ reenter_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb
 	return 1;
 }
 
-#ifdef KPROBES_CAN_USE_FTRACE
-static void __kprobes skip_singlestep(struct kprobe *p, struct pt_regs *regs,
-				      struct kprobe_ctlblk *kcb)
-{
-	/*
-	 * Emulate singlestep (and also recover regs->ip)
-	 * as if there is a 5byte nop
-	 */
-	regs->ip = (unsigned long)p->addr + MCOUNT_INSN_SIZE;
-	if (unlikely(p->post_handler)) {
-		kcb->kprobe_status = KPROBE_HIT_SSDONE;
-		p->post_handler(p, regs, 0);
-	}
-	__this_cpu_write(current_kprobe, NULL);
-}
-#endif
-
 /*
  * Interrupts are disabled on entry as trap3 is an interrupt gate and they
  * remain disabled throughout this function.
@@ -616,13 +599,8 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
 	} else if (kprobe_running()) {
 		p = __this_cpu_read(current_kprobe);
 		if (p->break_handler && p->break_handler(p, regs)) {
-#ifdef KPROBES_CAN_USE_FTRACE
-			if (kprobe_ftrace(p)) {
-				skip_singlestep(p, regs, kcb);
-				return 1;
-			}
-#endif
-			setup_singlestep(p, regs, kcb, 0);
+			if (!skip_singlestep(p, regs, kcb))
+				setup_singlestep(p, regs, kcb, 0);
 			return 1;
 		}
 	} /* else: not a kprobe fault; let the kernel handle it */
@@ -1075,50 +1053,6 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
 	return 0;
 }
 
-#ifdef KPROBES_CAN_USE_FTRACE
-/* Ftrace callback handler for kprobes */
-void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
-				     struct ftrace_ops *ops, struct pt_regs *regs)
-{
-	struct kprobe *p;
-	struct kprobe_ctlblk *kcb;
-	unsigned long flags;
-
-	/* Disable irq for emulating a breakpoint and avoiding preempt */
-	local_irq_save(flags);
-
-	p = get_kprobe((kprobe_opcode_t *)ip);
-	if (unlikely(!p) || kprobe_disabled(p))
-		goto end;
-
-	kcb = get_kprobe_ctlblk();
-	if (kprobe_running()) {
-		kprobes_inc_nmissed_count(p);
-	} else {
-		/* Kprobe handler expects regs->ip = ip + 1 as breakpoint hit */
-		regs->ip = ip + sizeof(kprobe_opcode_t);
-
-		__this_cpu_write(current_kprobe, p);
-		kcb->kprobe_status = KPROBE_HIT_ACTIVE;
-		if (!p->pre_handler || !p->pre_handler(p, regs))
-			skip_singlestep(p, regs, kcb);
-		/*
-		 * If pre_handler returns !0, it sets regs->ip and
-		 * resets current kprobe.
-		 */
-	}
-end:
-	local_irq_restore(flags);
-}
-
-int __kprobes arch_prepare_kprobe_ftrace(struct kprobe *p)
-{
-	p->ainsn.insn = NULL;
-	p->ainsn.boostable = -1;
-	return 0;
-}
-#endif
-
 int __init arch_init_kprobes(void)
 {
 	return arch_init_optprobes();
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 23755ba42abc..4b6ef4d33cc2 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -49,16 +49,6 @@
 #define KPROBE_REENTER		0x00000004
 #define KPROBE_HIT_SSDONE	0x00000008
 
-/*
- * If function tracer is enabled and the arch supports full
- * passing of pt_regs to function tracing, then kprobes can
- * optimize on top of function tracing.
- */
-#if defined(CONFIG_FUNCTION_TRACER) && defined(ARCH_SUPPORTS_FTRACE_SAVE_REGS) \
-	&& defined(ARCH_SUPPORTS_KPROBES_ON_FTRACE)
-# define KPROBES_CAN_USE_FTRACE
-#endif
-
 /* Attach to insert probes on any functions which should be ignored*/
 #define __kprobes	__attribute__((__section__(".kprobes.text")))
 
@@ -316,7 +306,7 @@ extern int proc_kprobes_optimization_handler(struct ctl_table *table,
 #endif
 
 #endif /* CONFIG_OPTPROBES */
-#ifdef KPROBES_CAN_USE_FTRACE
+#ifdef CONFIG_KPROBES_ON_FTRACE
 extern void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
 				  struct ftrace_ops *ops, struct pt_regs *regs);
 extern int arch_prepare_kprobe_ftrace(struct kprobe *p);
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 098f396aa409..f423c3ef4a82 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -919,7 +919,7 @@ static __kprobes struct kprobe *alloc_aggr_kprobe(struct kprobe *p)
 }
 #endif /* CONFIG_OPTPROBES */
 
-#ifdef KPROBES_CAN_USE_FTRACE
+#ifdef CONFIG_KPROBES_ON_FTRACE
 static struct ftrace_ops kprobe_ftrace_ops __read_mostly = {
 	.func = kprobe_ftrace_handler,
 	.flags = FTRACE_OPS_FL_SAVE_REGS,
@@ -964,7 +964,7 @@ static void __kprobes disarm_kprobe_ftrace(struct kprobe *p)
 			   (unsigned long)p->addr, 1, 0);
 	WARN(ret < 0, "Failed to disarm kprobe-ftrace at %p (%d)\n", p->addr, ret);
 }
-#else	/* !KPROBES_CAN_USE_FTRACE */
+#else	/* !CONFIG_KPROBES_ON_FTRACE */
 #define prepare_kprobe(p)	arch_prepare_kprobe(p)
 #define arm_kprobe_ftrace(p)	do {} while (0)
 #define disarm_kprobe_ftrace(p)	do {} while (0)
@@ -1414,12 +1414,12 @@ static __kprobes int check_kprobe_address_safe(struct kprobe *p,
 	 */
 	ftrace_addr = ftrace_location((unsigned long)p->addr);
 	if (ftrace_addr) {
-#ifdef KPROBES_CAN_USE_FTRACE
+#ifdef CONFIG_KPROBES_ON_FTRACE
 		/* Given address is not on the instruction boundary */
 		if ((unsigned long)p->addr != ftrace_addr)
 			return -EILSEQ;
 		p->flags |= KPROBE_FLAG_FTRACE;
-#else	/* !KPROBES_CAN_USE_FTRACE */
+#else	/* !CONFIG_KPROBES_ON_FTRACE */
 		return -EINVAL;
 #endif
 	}
-- 
cgit v1.2.3


From bbb923a4c2d17ebd5ec34755fe19a33914cbd86f Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Mon, 21 Jan 2013 06:00:25 +0000
Subject: mcast: define and use MRT[6]_MAX in ip[6]_mroute_opt()

This will ease further addition of new MRT[6]_* values and avoid to update
in6.h each time.
Note that we reduce the maximum value from 210 to 209, but 210 does not match
any known value in ip[6]_mroute_setsockopt().

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Acked-by: David L Stevens <dlstevens@us.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mroute.h       |  2 +-
 include/linux/mroute6.h      |  2 +-
 include/uapi/linux/in6.h     | 15 ++++-----------
 include/uapi/linux/mroute.h  |  1 +
 include/uapi/linux/mroute6.h |  1 +
 5 files changed, 8 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mroute.h b/include/linux/mroute.h
index ea00d9162ee5..79aaa9fc1a15 100644
--- a/include/linux/mroute.h
+++ b/include/linux/mroute.h
@@ -9,7 +9,7 @@
 #ifdef CONFIG_IP_MROUTE
 static inline int ip_mroute_opt(int opt)
 {
-	return (opt >= MRT_BASE) && (opt <= MRT_BASE + 10);
+	return (opt >= MRT_BASE) && (opt <= MRT_MAX);
 }
 #else
 static inline int ip_mroute_opt(int opt)
diff --git a/include/linux/mroute6.h b/include/linux/mroute6.h
index a223561ba12e..66982e764051 100644
--- a/include/linux/mroute6.h
+++ b/include/linux/mroute6.h
@@ -10,7 +10,7 @@
 #ifdef CONFIG_IPV6_MROUTE
 static inline int ip6_mroute_opt(int opt)
 {
-	return (opt >= MRT6_BASE) && (opt <= MRT6_BASE + 10);
+	return (opt >= MRT6_BASE) && (opt <= MRT6_MAX);
 }
 #else
 static inline int ip6_mroute_opt(int opt)
diff --git a/include/uapi/linux/in6.h b/include/uapi/linux/in6.h
index 5673b97dcf54..53b1d56a6e7f 100644
--- a/include/uapi/linux/in6.h
+++ b/include/uapi/linux/in6.h
@@ -259,17 +259,10 @@ struct in6_flowlabel_req {
 
 /*
  * Multicast Routing:
- * see include/linux/mroute6.h.
+ * see include/uapi/linux/mroute6.h.
  *
- * MRT6_INIT			200
- * MRT6_DONE			201
- * MRT6_ADD_MIF			202
- * MRT6_DEL_MIF			203
- * MRT6_ADD_MFC			204
- * MRT6_DEL_MFC			205
- * MRT6_VERSION			206
- * MRT6_ASSERT			207
- * MRT6_PIM			208
- * (reserved)			209
+ * MRT6_BASE			200
+ * ...
+ * MRT6_MAX
  */
 #endif /* _UAPI_LINUX_IN6_H */
diff --git a/include/uapi/linux/mroute.h b/include/uapi/linux/mroute.h
index 16929993acc4..1c11004af5db 100644
--- a/include/uapi/linux/mroute.h
+++ b/include/uapi/linux/mroute.h
@@ -26,6 +26,7 @@
 #define MRT_ASSERT	(MRT_BASE+7)	/* Activate PIM assert mode		*/
 #define MRT_PIM		(MRT_BASE+8)	/* enable PIM code			*/
 #define MRT_TABLE	(MRT_BASE+9)	/* Specify mroute table ID		*/
+#define MRT_MAX		(MRT_BASE+9)
 
 #define SIOCGETVIFCNT	SIOCPROTOPRIVATE	/* IP protocol privates */
 #define SIOCGETSGCNT	(SIOCPROTOPRIVATE+1)
diff --git a/include/uapi/linux/mroute6.h b/include/uapi/linux/mroute6.h
index 3e89b5e7f9e3..c206ae3a2327 100644
--- a/include/uapi/linux/mroute6.h
+++ b/include/uapi/linux/mroute6.h
@@ -26,6 +26,7 @@
 #define MRT6_ASSERT	(MRT6_BASE+7)	/* Activate PIM assert mode		*/
 #define MRT6_PIM	(MRT6_BASE+8)	/* enable PIM code			*/
 #define MRT6_TABLE	(MRT6_BASE+9)	/* Specify mroute table ID		*/
+#define MRT6_MAX	(MRT6_BASE+9)
 
 #define SIOCGETMIFCNT_IN6	SIOCPROTOPRIVATE	/* IP protocol privates */
 #define SIOCGETSGCNT_IN6	(SIOCPROTOPRIVATE+1)
-- 
cgit v1.2.3


From fa0879e37b59e8e3f130a30a9e6fa515717c5bdd Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@gmail.com>
Date: Mon, 21 Jan 2013 01:17:22 +0000
Subject: net: split eth_mac_addr for better error handling

When we set mac address, software mac address in system and hardware mac
address all need to be updated. Current eth_mac_addr() doesn't allow
callers to implement error handling nicely.

This patch split eth_mac_addr() to prepare part and real commit part,
then we can prepare first, and try to change hardware address, then do
the real commit if hardware address is set successfully.

Signed-off-by: Stefan Hajnoczi <stefanha@gmail.com>
Signed-off-by: Amos Kong <akong@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/etherdevice.h |  2 ++
 net/ethernet/eth.c          | 41 +++++++++++++++++++++++++++++++++++------
 2 files changed, 37 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index 1a43e1b4f7ad..c623861964e4 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -40,6 +40,8 @@ extern int eth_header_cache(const struct neighbour *neigh, struct hh_cache *hh,
 extern void eth_header_cache_update(struct hh_cache *hh,
 				    const struct net_device *dev,
 				    const unsigned char *haddr);
+extern int eth_prepare_mac_addr_change(struct net_device *dev, void *p);
+extern void eth_commit_mac_addr_change(struct net_device *dev, void *p);
 extern int eth_mac_addr(struct net_device *dev, void *p);
 extern int eth_change_mtu(struct net_device *dev, int new_mtu);
 extern int eth_validate_addr(struct net_device *dev);
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index bc39c8c8f589..a36c85eab5b4 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -271,6 +271,36 @@ void eth_header_cache_update(struct hh_cache *hh,
 }
 EXPORT_SYMBOL(eth_header_cache_update);
 
+/**
+ * eth_prepare_mac_addr_change - prepare for mac change
+ * @dev: network device
+ * @p: socket address
+ */
+int eth_prepare_mac_addr_change(struct net_device *dev, void *p)
+{
+	struct sockaddr *addr = p;
+
+	if (!(dev->priv_flags & IFF_LIVE_ADDR_CHANGE) && netif_running(dev))
+		return -EBUSY;
+	if (!is_valid_ether_addr(addr->sa_data))
+		return -EADDRNOTAVAIL;
+	return 0;
+}
+EXPORT_SYMBOL(eth_prepare_mac_addr_change);
+
+/**
+ * eth_commit_mac_addr_change - commit mac change
+ * @dev: network device
+ * @p: socket address
+ */
+void eth_commit_mac_addr_change(struct net_device *dev, void *p)
+{
+	struct sockaddr *addr = p;
+
+	memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN);
+}
+EXPORT_SYMBOL(eth_commit_mac_addr_change);
+
 /**
  * eth_mac_addr - set new Ethernet hardware address
  * @dev: network device
@@ -283,13 +313,12 @@ EXPORT_SYMBOL(eth_header_cache_update);
  */
 int eth_mac_addr(struct net_device *dev, void *p)
 {
-	struct sockaddr *addr = p;
+	int ret;
 
-	if (!(dev->priv_flags & IFF_LIVE_ADDR_CHANGE) && netif_running(dev))
-		return -EBUSY;
-	if (!is_valid_ether_addr(addr->sa_data))
-		return -EADDRNOTAVAIL;
-	memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN);
+	ret = eth_prepare_mac_addr_change(dev, p);
+	if (ret < 0)
+		return ret;
+	eth_commit_mac_addr_change(dev, p);
 	return 0;
 }
 EXPORT_SYMBOL(eth_mac_addr);
-- 
cgit v1.2.3


From b000c8065a92b0fe0e1694f41b2c8d8ba7b7b1ec Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Fri, 18 Jan 2013 10:31:20 -0500
Subject: tracing: Remove the extra 4 bytes of padding in events

Due to a userspace issue with PowerTop v2beta, which hardcoded
the offset of event fields that it was using, it broke when
we removed the Big Kernel Lock counter from the event header.

 (commit e6e1e2593 "tracing: Remove lock_depth from event entry")

Because this broke userspace, it was determined that we must
keep those 4 bytes around.

 (commit a3a4a5acd "Regression: partial revert "tracing: Remove lock_depth from event entry"")

This unfortunately wastes space in the ring buffer. 4 bytes per
event, where a lot of events are just 24 bytes. That's 16% of the
buffer wasted. A million events will add 4 megs of white space
into the buffer.

It was later noticed that PowerTop v2beta could not work on systems
where the kernel was 64 bit but the userspace was 32 bits.
The reason was because the offsets are different between the
two and the hard coded offset of one would not work with the other.

With PowerTop v2 final, it implemented the same interface that both
perf and trace-cmd use. That is, it reads the format file of
the event to find the offsets of the fields it needs. This fixes
the problem with running powertop on a 32 bit userspace running
on a 64 bit kernel. It also no longer requires the 4 byte padding.

As PowerTop v2 has been out for a while, and is included in all
major distributions, it is time that we can safely remove the
4 bytes of padding. Users of PowerTop v2beta should upgrade to
PowerTop v2 final.

Cc: Linus Torvalds <torvalds@linux-foundation.org>
Acked-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace_event.h | 1 -
 kernel/trace/trace.c         | 1 -
 kernel/trace/trace_events.c  | 1 -
 3 files changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 43ef8b6cce7f..6f8d0b77006b 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -49,7 +49,6 @@ struct trace_entry {
 	unsigned char		flags;
 	unsigned char		preempt_count;
 	int			pid;
-	int			padding;
 };
 
 #define FTRACE_MAX_EVENT						\
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index d62248dfda76..a387bd271e71 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1173,7 +1173,6 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
 
 	entry->preempt_count		= pc & 0xff;
 	entry->pid			= (tsk) ? tsk->pid : 0;
-	entry->padding			= 0;
 	entry->flags =
 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
 		(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 880073d0b946..57e9b284250c 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -116,7 +116,6 @@ static int trace_define_common_fields(void)
 	__common_field(unsigned char, flags);
 	__common_field(unsigned char, preempt_count);
 	__common_field(int, pid);
-	__common_field(int, padding);
 
 	return ret;
 }
-- 
cgit v1.2.3


From 01ec8c5423901c4fe8d97f786ed9a0c31215b53a Mon Sep 17 00:00:00 2001
From: Michel JAOUEN <michel.jaouen@stericsson.com>
Date: Thu, 26 Apr 2012 10:00:04 +0200
Subject: pm2301: Provide u9540 support for the pm2301 charger

AC charger driver for the DB9540 based platforms.

Signed-off-by: Rajkumar Kasirajan <rajkumar.kasirajan@stericsson.com>
Signed-off-by: Loic Pallardy <loic.pallardy@stericsson.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Reviewed-by: Michel JAOUEN <michel.jaouen@stericsson.com>
Tested-by: Michel JAOUEN <michel.jaouen@stericsson.com>
---
 drivers/power/Kconfig                     |    7 +
 drivers/power/Makefile                    |    1 +
 drivers/power/ab8500_charger.c            |   36 +-
 drivers/power/pm2301_charger.c            | 1455 +++++++++++++++++++++++++++++
 include/linux/mfd/abx500/ab8500-bm.h      |    2 +
 include/linux/mfd/abx500/ux500_chargalg.h |    2 +
 include/linux/pm2301_charger.h            |   60 ++
 7 files changed, 1550 insertions(+), 13 deletions(-)
 create mode 100644 drivers/power/pm2301_charger.c
 create mode 100644 include/linux/pm2301_charger.h

(limited to 'include/linux')

diff --git a/drivers/power/Kconfig b/drivers/power/Kconfig
index 9f45e2f77d53..4811b59ba730 100644
--- a/drivers/power/Kconfig
+++ b/drivers/power/Kconfig
@@ -346,6 +346,13 @@ config AB8500_BM
 	help
 	  Say Y to include support for AB8500 battery management.
 
+config CHARGER_PM2301
+	bool "PM2301 Battery Charger Driver"
+	depends on AB8500_BM
+	help
+	  Say Y to include support for PM2301 charger driver.
+	  Depends on AB8500 battery management core.
+
 source "drivers/power/reset/Kconfig"
 
 endif # POWER_SUPPLY
diff --git a/drivers/power/Makefile b/drivers/power/Makefile
index b11e0c7ea0f1..aa966e806834 100644
--- a/drivers/power/Makefile
+++ b/drivers/power/Makefile
@@ -46,6 +46,7 @@ obj-$(CONFIG_CHARGER_LP8727)	+= lp8727_charger.o
 obj-$(CONFIG_CHARGER_LP8788)	+= lp8788-charger.o
 obj-$(CONFIG_CHARGER_GPIO)	+= gpio-charger.o
 obj-$(CONFIG_CHARGER_MANAGER)	+= charger-manager.o
+obj-$(CONFIG_CHARGER_PM2301)	+= pm2301_charger.o
 obj-$(CONFIG_CHARGER_MAX8997)	+= max8997_charger.o
 obj-$(CONFIG_CHARGER_MAX8998)	+= max8998_charger.o
 obj-$(CONFIG_CHARGER_BQ2415X)	+= bq2415x_charger.o
diff --git a/drivers/power/ab8500_charger.c b/drivers/power/ab8500_charger.c
index d5a8bdadb49a..43ec82ba4275 100644
--- a/drivers/power/ab8500_charger.c
+++ b/drivers/power/ab8500_charger.c
@@ -2830,8 +2830,11 @@ static int ab8500_charger_remove(struct platform_device *pdev)
 	destroy_workqueue(di->charger_wq);
 
 	flush_scheduled_work();
-	power_supply_unregister(&di->usb_chg.psy);
-	power_supply_unregister(&di->ac_chg.psy);
+	if(di->usb_chg.enabled)
+		power_supply_unregister(&di->usb_chg.psy);
+	if(di->ac_chg.enabled)
+		power_supply_unregister(&di->ac_chg.psy);
+
 	platform_set_drvdata(pdev, NULL);
 
 	return 0;
@@ -2899,6 +2902,7 @@ static int ab8500_charger_probe(struct platform_device *pdev)
 		ARRAY_SIZE(ab8500_charger_voltage_map) - 1];
 	di->ac_chg.max_out_curr = ab8500_charger_current_map[
 		ARRAY_SIZE(ab8500_charger_current_map) - 1];
+	di->ac_chg.enabled = di->pdata->ac_enabled;
 
 	/* USB supply */
 	/* power_supply base class */
@@ -2917,7 +2921,7 @@ static int ab8500_charger_probe(struct platform_device *pdev)
 		ARRAY_SIZE(ab8500_charger_voltage_map) - 1];
 	di->usb_chg.max_out_curr = ab8500_charger_current_map[
 		ARRAY_SIZE(ab8500_charger_current_map) - 1];
-
+	di->usb_chg.enabled = di->pdata->usb_enabled;
 
 	/* Create a work queue for the charger */
 	di->charger_wq =
@@ -2995,17 +2999,21 @@ static int ab8500_charger_probe(struct platform_device *pdev)
 	}
 
 	/* Register AC charger class */
-	ret = power_supply_register(di->dev, &di->ac_chg.psy);
-	if (ret) {
-		dev_err(di->dev, "failed to register AC charger\n");
-		goto free_charger_wq;
+	if(di->ac_chg.enabled) {
+		ret = power_supply_register(di->dev, &di->ac_chg.psy);
+		if (ret) {
+			dev_err(di->dev, "failed to register AC charger\n");
+			goto free_charger_wq;
+		}
 	}
 
 	/* Register USB charger class */
-	ret = power_supply_register(di->dev, &di->usb_chg.psy);
-	if (ret) {
-		dev_err(di->dev, "failed to register USB charger\n");
-		goto free_ac;
+	if(di->usb_chg.enabled) {
+		ret = power_supply_register(di->dev, &di->usb_chg.psy);
+		if (ret) {
+			dev_err(di->dev, "failed to register USB charger\n");
+			goto free_ac;
+		}
 	}
 
 	di->usb_phy = usb_get_phy(USB_PHY_TYPE_USB2);
@@ -3085,9 +3093,11 @@ free_irq:
 put_usb_phy:
 	usb_put_phy(di->usb_phy);
 free_usb:
-	power_supply_unregister(&di->usb_chg.psy);
+	if(di->usb_chg.enabled)
+		power_supply_unregister(&di->usb_chg.psy);
 free_ac:
-	power_supply_unregister(&di->ac_chg.psy);
+	if(di->ac_chg.enabled)
+		power_supply_unregister(&di->ac_chg.psy);
 free_charger_wq:
 	destroy_workqueue(di->charger_wq);
 	return ret;
diff --git a/drivers/power/pm2301_charger.c b/drivers/power/pm2301_charger.c
new file mode 100644
index 000000000000..60a30323151e
--- /dev/null
+++ b/drivers/power/pm2301_charger.c
@@ -0,0 +1,1455 @@
+/*
+ * Power supply driver for ST Ericsson pm2xxx_charger charger
+ *
+ * Copyright 2012 ST Ericsson.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include <linux/platform_device.h>
+#include <linux/power_supply.h>
+#include <linux/completion.h>
+#include <linux/regulator/consumer.h>
+#include <linux/err.h>
+#include <linux/i2c.h>
+#include <linux/workqueue.h>
+#include <linux/kobject.h>
+#include <linux/mfd/abx500.h>
+#include <linux/mfd/abx500/ab8500.h>
+#include <linux/mfd/abx500/ab8500-bm.h>
+#include <linux/mfd/abx500/ab8500-gpadc.h>
+#include <linux/mfd/abx500/ux500_chargalg.h>
+#include <linux/pm2301_charger.h>
+
+#define MAIN_WDOG_ENA			0x01
+#define MAIN_WDOG_KICK			0x02
+#define MAIN_WDOG_DIS			0x00
+#define CHARG_WD_KICK			0x01
+#define MAIN_CH_ENA			0x01
+#define MAIN_CH_NO_OVERSHOOT_ENA_N	0x02
+#define MAIN_CH_DET			0x01
+#define MAIN_CH_CV_ON			0x04
+#define OTP_ENABLE_WD			0x01
+
+#define MAIN_CH_INPUT_CURR_SHIFT	4
+
+#define LED_INDICATOR_PWM_ENA		0x01
+#define LED_INDICATOR_PWM_DIS		0x00
+#define LED_IND_CUR_5MA			0x04
+#define LED_INDICATOR_PWM_DUTY_252_256	0xBF
+
+/* HW failure constants */
+#define MAIN_CH_TH_PROT			0x02
+#define MAIN_CH_NOK			0x01
+
+/* Watchdog timeout constant */
+#define WD_TIMER			0x30 /* 4min */
+#define WD_KICK_INTERVAL		(60 * HZ)
+
+/* Constant voltage/current */
+#define PM2XXX_CONST_CURR		0x0
+#define PM2XXX_CONST_VOLT		0x1
+
+/* Lowest charger voltage is 3.39V -> 0x4E */
+#define LOW_VOLT_REG			0x4E
+
+#define PM2XXX_BATT_CTRL_REG1		0x00
+#define PM2XXX_BATT_CTRL_REG2		0x01
+#define PM2XXX_BATT_CTRL_REG3		0x02
+#define PM2XXX_BATT_CTRL_REG4		0x03
+#define PM2XXX_BATT_CTRL_REG5		0x04
+#define PM2XXX_BATT_CTRL_REG6		0x05
+#define PM2XXX_BATT_CTRL_REG7		0x06
+#define PM2XXX_BATT_CTRL_REG8		0x07
+#define PM2XXX_NTC_CTRL_REG1		0x08
+#define PM2XXX_NTC_CTRL_REG2		0x09
+#define PM2XXX_BATT_CTRL_REG9		0x0A
+#define PM2XXX_BATT_STAT_REG1		0x0B
+#define PM2XXX_INP_VOLT_VPWR2		0x11
+#define PM2XXX_INP_DROP_VPWR2		0x13
+#define PM2XXX_INP_VOLT_VPWR1		0x15
+#define PM2XXX_INP_DROP_VPWR1		0x17
+#define PM2XXX_INP_MODE_VPWR		0x18
+#define PM2XXX_BATT_WD_KICK		0x70
+#define PM2XXX_DEV_VER_STAT		0x0C
+#define PM2XXX_THERM_WARN_CTRL_REG	0x20
+#define PM2XXX_BATT_DISC_REG		0x21
+#define PM2XXX_BATT_LOW_LEV_COMP_REG	0x22
+#define PM2XXX_BATT_LOW_LEV_VAL_REG	0x23
+#define PM2XXX_I2C_PAD_CTRL_REG		0x24
+#define PM2XXX_SW_CTRL_REG		0x26
+#define PM2XXX_LED_CTRL_REG		0x28
+
+#define PM2XXX_REG_INT1		0x40
+#define PM2XXX_MASK_REG_INT1	0x50
+#define PM2XXX_SRCE_REG_INT1	0x60
+#define PM2XXX_REG_INT2		0x41
+#define PM2XXX_MASK_REG_INT2	0x51
+#define PM2XXX_SRCE_REG_INT2	0x61
+#define PM2XXX_REG_INT3		0x42
+#define PM2XXX_MASK_REG_INT3	0x52
+#define PM2XXX_SRCE_REG_INT3	0x62
+#define PM2XXX_REG_INT4		0x43
+#define PM2XXX_MASK_REG_INT4	0x53
+#define PM2XXX_SRCE_REG_INT4	0x63
+#define PM2XXX_REG_INT5		0x44
+#define PM2XXX_MASK_REG_INT5	0x54
+#define PM2XXX_SRCE_REG_INT5	0x64
+#define PM2XXX_REG_INT6		0x45
+#define PM2XXX_MASK_REG_INT6	0x55
+#define PM2XXX_SRCE_REG_INT6	0x65
+
+#define VPWR_OVV 0x0
+#define VSYSTEM_OVV 0x1
+
+/* control Reg 1 */
+#define PM2XXX_CH_RESUME_EN	     0x1
+#define PM2XXX_CH_RESUME_DIS		0x0
+
+/* control Reg 2 */
+#define PM2XXX_CH_AUTO_RESUME_EN	0X2
+#define PM2XXX_CH_AUTO_RESUME_DIS	0X0
+#define PM2XXX_CHARGER_ENA		0x4
+#define PM2XXX_CHARGER_DIS		0x0
+
+/* control Reg 3 */
+#define PM2XXX_CH_WD_CC_PHASE_OFF	0x0
+#define PM2XXX_CH_WD_CC_PHASE_5MIN	0x1
+#define PM2XXX_CH_WD_CC_PHASE_10MIN	0x2
+#define PM2XXX_CH_WD_CC_PHASE_30MIN	0x3
+#define PM2XXX_CH_WD_CC_PHASE_60MIN	0x4
+#define PM2XXX_CH_WD_CC_PHASE_120MIN	0x5
+#define PM2XXX_CH_WD_CC_PHASE_240MIN	0x6
+#define PM2XXX_CH_WD_CC_PHASE_360MIN	0x7
+
+#define PM2XXX_CH_WD_CV_PHASE_OFF	(0x0<<3)
+#define PM2XXX_CH_WD_CV_PHASE_5MIN	(0x1<<3)
+#define PM2XXX_CH_WD_CV_PHASE_10MIN	(0x2<<3)
+#define PM2XXX_CH_WD_CV_PHASE_30MIN	(0x3<<3)
+#define PM2XXX_CH_WD_CV_PHASE_60MIN	(0x4<<3)
+#define PM2XXX_CH_WD_CV_PHASE_120MIN	(0x5<<3)
+#define PM2XXX_CH_WD_CV_PHASE_240MIN	(0x6<<3)
+#define PM2XXX_CH_WD_CV_PHASE_360MIN	(0x7<<3)
+
+/* control Reg 4 */
+#define PM2XXX_CH_WD_PRECH_PHASE_OFF	0x0
+#define PM2XXX_CH_WD_PRECH_PHASE_1MIN	0x1
+#define PM2XXX_CH_WD_PRECH_PHASE_5MIN	0x2
+#define PM2XXX_CH_WD_PRECH_PHASE_10MIN	0x3
+#define PM2XXX_CH_WD_PRECH_PHASE_30MIN	0x4
+#define PM2XXX_CH_WD_PRECH_PHASE_60MIN	0x5
+#define PM2XXX_CH_WD_PRECH_PHASE_120MIN	0x6
+#define PM2XXX_CH_WD_PRECH_PHASE_240MIN	0x7
+
+/* control Reg 5 */
+#define PM2XXX_CH_WD_AUTO_TIMEOUT_NONE	0x0
+#define PM2XXX_CH_WD_AUTO_TIMEOUT_20MIN	0x1
+
+/* control Reg 6 */
+#define PM2XXX_DIR_CH_CC_CURRENT_MASK	0x0F
+#define PM2XXX_DIR_CH_CC_CURRENT_200MA	0x0
+#define PM2XXX_DIR_CH_CC_CURRENT_400MA	0x2
+#define PM2XXX_DIR_CH_CC_CURRENT_600MA	0x3
+#define PM2XXX_DIR_CH_CC_CURRENT_800MA	0x4
+#define PM2XXX_DIR_CH_CC_CURRENT_1000MA	0x5
+#define PM2XXX_DIR_CH_CC_CURRENT_1200MA	0x6
+#define PM2XXX_DIR_CH_CC_CURRENT_1400MA	0x7
+#define PM2XXX_DIR_CH_CC_CURRENT_1600MA	0x8
+#define PM2XXX_DIR_CH_CC_CURRENT_1800MA	0x9
+#define PM2XXX_DIR_CH_CC_CURRENT_2000MA	0xA
+#define PM2XXX_DIR_CH_CC_CURRENT_2200MA	0xB
+#define PM2XXX_DIR_CH_CC_CURRENT_2400MA	0xC
+#define PM2XXX_DIR_CH_CC_CURRENT_2600MA	0xD
+#define PM2XXX_DIR_CH_CC_CURRENT_2800MA	0xE
+#define PM2XXX_DIR_CH_CC_CURRENT_3000MA	0xF
+
+#define PM2XXX_CH_PRECH_CURRENT_MASK	0x30
+#define PM2XXX_CH_PRECH_CURRENT_25MA	(0x0<<4)
+#define PM2XXX_CH_PRECH_CURRENT_50MA	(0x1<<4)
+#define PM2XXX_CH_PRECH_CURRENT_75MA	(0x2<<4)
+#define PM2XXX_CH_PRECH_CURRENT_100MA	(0x3<<4)
+
+#define PM2XXX_CH_EOC_CURRENT_MASK	0xC0
+#define PM2XXX_CH_EOC_CURRENT_100MA	(0x0<<6)
+#define PM2XXX_CH_EOC_CURRENT_150MA	(0x1<<6)
+#define PM2XXX_CH_EOC_CURRENT_300MA	(0x2<<6)
+#define PM2XXX_CH_EOC_CURRENT_400MA	(0x3<<6)
+
+/* control Reg 7 */
+#define PM2XXX_CH_PRECH_VOL_2_5		0x0
+#define PM2XXX_CH_PRECH_VOL_2_7		0x1
+#define PM2XXX_CH_PRECH_VOL_2_9		0x2
+#define PM2XXX_CH_PRECH_VOL_3_1		0x3
+
+#define PM2XXX_CH_VRESUME_VOL_3_2	(0x0<<2)
+#define PM2XXX_CH_VRESUME_VOL_3_4	(0x1<<2)
+#define PM2XXX_CH_VRESUME_VOL_3_6	(0x2<<2)
+#define PM2XXX_CH_VRESUME_VOL_3_8	(0x3<<2)
+
+/* control Reg 8 */
+#define PM2XXX_CH_VOLT_MASK		0x3F
+#define PM2XXX_CH_VOLT_3_5		0x0
+#define PM2XXX_CH_VOLT_3_5225		0x1
+#define PM2XXX_CH_VOLT_3_6		0x4
+#define PM2XXX_CH_VOLT_3_7		0x8
+#define PM2XXX_CH_VOLT_4_0		0x14
+#define PM2XXX_CH_VOLT_4_175		0x1B
+#define PM2XXX_CH_VOLT_4_2		0x1C
+#define PM2XXX_CH_VOLT_4_275		0x1F
+#define PM2XXX_CH_VOLT_4_3		0x20
+
+/*NTC control register 1*/
+#define PM2XXX_BTEMP_HIGH_TH_45		0x0
+#define PM2XXX_BTEMP_HIGH_TH_50		0x1
+#define PM2XXX_BTEMP_HIGH_TH_55		0x2
+#define PM2XXX_BTEMP_HIGH_TH_60		0x3
+#define PM2XXX_BTEMP_HIGH_TH_65		0x4
+
+#define PM2XXX_BTEMP_LOW_TH_N5		(0x0<<3)
+#define PM2XXX_BTEMP_LOW_TH_0		(0x1<<3)
+#define PM2XXX_BTEMP_LOW_TH_5		(0x2<<3)
+#define PM2XXX_BTEMP_LOW_TH_10		(0x3<<3)
+
+/*NTC control register 2*/
+#define PM2XXX_NTC_BETA_COEFF_3477	0x0
+#define PM2XXX_NTC_BETA_COEFF_3964	0x1
+
+#define PM2XXX_NTC_RES_10K		(0x0<<2)
+#define PM2XXX_NTC_RES_47K		(0x1<<2)
+#define PM2XXX_NTC_RES_100K		(0x2<<2)
+#define PM2XXX_NTC_RES_NO_NTC		(0x3<<2)
+
+/* control Reg 9 */
+#define PM2XXX_CH_CC_MODEDROP_EN	1
+#define PM2XXX_CH_CC_MODEDROP_DIS	0
+
+#define PM2XXX_CH_CC_REDUCED_CURRENT_100MA	(0x0<<1)
+#define PM2XXX_CH_CC_REDUCED_CURRENT_200MA	(0x1<<1)
+#define PM2XXX_CH_CC_REDUCED_CURRENT_400MA	(0x2<<1)
+#define PM2XXX_CH_CC_REDUCED_CURRENT_IDENT	(0x3<<1)
+
+#define PM2XXX_CHARCHING_INFO_DIS	(0<<3)
+#define PM2XXX_CHARCHING_INFO_EN	(1<<3)
+
+#define PM2XXX_CH_150MV_DROP_300MV	(0<<4)
+#define PM2XXX_CH_150MV_DROP_150MV	(1<<4)
+
+
+/* charger status register */
+#define PM2XXX_CHG_STATUS_OFF		0x0
+#define PM2XXX_CHG_STATUS_ON		0x1
+#define PM2XXX_CHG_STATUS_FULL		0x2
+#define PM2XXX_CHG_STATUS_ERR		0x3
+#define PM2XXX_CHG_STATUS_WAIT		0x4
+#define PM2XXX_CHG_STATUS_NOBAT		0x5
+
+/* Input charger voltage VPWR2 */
+#define PM2XXX_VPWR2_OVV_6_0		0x0
+#define PM2XXX_VPWR2_OVV_6_3		0x1
+#define PM2XXX_VPWR2_OVV_10		0x2
+#define PM2XXX_VPWR2_OVV_NONE		0x3
+
+/* Input charger voltage VPWR1 */
+#define PM2XXX_VPWR1_OVV_6_0		0x0
+#define PM2XXX_VPWR1_OVV_6_3		0x1
+#define PM2XXX_VPWR1_OVV_10		0x2
+#define PM2XXX_VPWR1_OVV_NONE		0x3
+
+/* Battery low level comparator control register */
+#define PM2XXX_VBAT_LOW_MONITORING_DIS	0x0
+#define PM2XXX_VBAT_LOW_MONITORING_ENA	0x1
+
+/* Battery low level value control register */
+#define PM2XXX_VBAT_LOW_LEVEL_2_3	0x0
+#define PM2XXX_VBAT_LOW_LEVEL_2_4	0x1
+#define PM2XXX_VBAT_LOW_LEVEL_2_5	0x2
+#define PM2XXX_VBAT_LOW_LEVEL_2_6	0x3
+#define PM2XXX_VBAT_LOW_LEVEL_2_7	0x4
+#define PM2XXX_VBAT_LOW_LEVEL_2_8	0x5
+#define PM2XXX_VBAT_LOW_LEVEL_2_9	0x6
+#define PM2XXX_VBAT_LOW_LEVEL_3_0	0x7
+#define PM2XXX_VBAT_LOW_LEVEL_3_1	0x8
+#define PM2XXX_VBAT_LOW_LEVEL_3_2	0x9
+#define PM2XXX_VBAT_LOW_LEVEL_3_3	0xA
+#define PM2XXX_VBAT_LOW_LEVEL_3_4	0xB
+#define PM2XXX_VBAT_LOW_LEVEL_3_5	0xC
+#define PM2XXX_VBAT_LOW_LEVEL_3_6	0xD
+#define PM2XXX_VBAT_LOW_LEVEL_3_7	0xE
+#define PM2XXX_VBAT_LOW_LEVEL_3_8	0xF
+#define PM2XXX_VBAT_LOW_LEVEL_3_9	0x10
+#define PM2XXX_VBAT_LOW_LEVEL_4_0	0x11
+#define PM2XXX_VBAT_LOW_LEVEL_4_1	0x12
+#define PM2XXX_VBAT_LOW_LEVEL_4_2	0x13
+
+/* SW CTRL */
+#define PM2XXX_SWCTRL_HW		0x0
+#define PM2XXX_SWCTRL_SW		0x1
+
+
+/* LED Driver Control */
+#define PM2XXX_LED_CURRENT_MASK		0x0C
+#define PM2XXX_LED_CURRENT_2_5MA	(0X0<<2)
+#define PM2XXX_LED_CURRENT_1MA		(0X1<<2)
+#define PM2XXX_LED_CURRENT_5MA		(0X2<<2)
+#define PM2XXX_LED_CURRENT_10MA		(0X3<<2)
+
+#define PM2XXX_LED_SELECT_MASK		0x02
+#define PM2XXX_LED_SELECT_EN		(0X0<<1)
+#define PM2XXX_LED_SELECT_DIS		(0X1<<1)
+
+#define PM2XXX_ANTI_OVERSHOOT_MASK	0x01
+#define PM2XXX_ANTI_OVERSHOOT_DIS	0X0
+#define PM2XXX_ANTI_OVERSHOOT_EN	0X1
+
+#define to_pm2xxx_charger_ac_device_info(x) container_of((x), \
+		struct pm2xxx_charger, ac_chg)
+
+static int pm2xxx_interrupt_registers[] = {
+	PM2XXX_REG_INT1,
+	PM2XXX_REG_INT2,
+	PM2XXX_REG_INT3,
+	PM2XXX_REG_INT4,
+	PM2XXX_REG_INT5,
+	PM2XXX_REG_INT6,
+};
+
+enum pm2xxx_reg_int1 {
+	PM2XXX_INT1_ITVBATDISCONNECT	= 0x02,
+	PM2XXX_INT1_ITVBATLOWR		= 0x04,
+	PM2XXX_INT1_ITVBATLOWF		= 0x08,
+};
+
+enum pm2xxx_mask_reg_int1 {
+	PM2XXX_INT1_M_ITVBATDISCONNECT	= 0x02,
+	PM2XXX_INT1_M_ITVBATLOWR	= 0x04,
+	PM2XXX_INT1_M_ITVBATLOWF	= 0x08,
+};
+
+enum pm2xxx_source_reg_int1 {
+	PM2XXX_INT1_S_ITVBATDISCONNECT	= 0x02,
+	PM2XXX_INT1_S_ITVBATLOWR	= 0x04,
+	PM2XXX_INT1_S_ITVBATLOWF	= 0x08,
+};
+
+enum pm2xxx_reg_int2 {
+	PM2XXX_INT2_ITVPWR2PLUG		= 0x01,
+	PM2XXX_INT2_ITVPWR2UNPLUG	= 0x02,
+	PM2XXX_INT2_ITVPWR1PLUG		= 0x04,
+	PM2XXX_INT2_ITVPWR1UNPLUG	= 0x08,
+};
+
+enum pm2xxx_mask_reg_int2 {
+	PM2XXX_INT2_M_ITVPWR2PLUG	= 0x01,
+	PM2XXX_INT2_M_ITVPWR2UNPLUG	= 0x02,
+	PM2XXX_INT2_M_ITVPWR1PLUG	= 0x04,
+	PM2XXX_INT2_M_ITVPWR1UNPLUG	= 0x08,
+};
+
+enum pm2xxx_source_reg_int2 {
+	PM2XXX_INT2_S_ITVPWR2PLUG	= 0x03,
+	PM2XXX_INT2_S_ITVPWR1PLUG	= 0x0c,
+};
+
+enum pm2xxx_reg_int3 {
+	PM2XXX_INT3_ITCHPRECHARGEWD	= 0x01,
+	PM2XXX_INT3_ITCHCCWD		= 0x02,
+	PM2XXX_INT3_ITCHCVWD		= 0x04,
+	PM2XXX_INT3_ITAUTOTIMEOUTWD	= 0x08,
+};
+
+enum pm2xxx_mask_reg_int3 {
+	PM2XXX_INT3_M_ITCHPRECHARGEWD	= 0x01,
+	PM2XXX_INT3_M_ITCHCCWD		= 0x02,
+	PM2XXX_INT3_M_ITCHCVWD		= 0x04,
+	PM2XXX_INT3_M_ITAUTOTIMEOUTWD	= 0x08,
+};
+
+enum pm2xxx_source_reg_int3 {
+	PM2XXX_INT3_S_ITCHPRECHARGEWD	= 0x01,
+	PM2XXX_INT3_S_ITCHCCWD		= 0x02,
+	PM2XXX_INT3_S_ITCHCVWD		= 0x04,
+	PM2XXX_INT3_S_ITAUTOTIMEOUTWD	= 0x08,
+};
+
+enum pm2xxx_reg_int4 {
+	PM2XXX_INT4_ITBATTEMPCOLD	= 0x01,
+	PM2XXX_INT4_ITBATTEMPHOT	= 0x02,
+	PM2XXX_INT4_ITVPWR2OVV		= 0x04,
+	PM2XXX_INT4_ITVPWR1OVV		= 0x08,
+	PM2XXX_INT4_ITCHARGINGON	= 0x10,
+	PM2XXX_INT4_ITVRESUME		= 0x20,
+	PM2XXX_INT4_ITBATTFULL		= 0x40,
+	PM2XXX_INT4_ITCVPHASE		= 0x80,
+};
+
+enum pm2xxx_mask_reg_int4 {
+	PM2XXX_INT4_M_ITBATTEMPCOLD	= 0x01,
+	PM2XXX_INT4_M_ITBATTEMPHOT	= 0x02,
+	PM2XXX_INT4_M_ITVPWR2OVV	= 0x04,
+	PM2XXX_INT4_M_ITVPWR1OVV	= 0x08,
+	PM2XXX_INT4_M_ITCHARGINGON	= 0x10,
+	PM2XXX_INT4_M_ITVRESUME		= 0x20,
+	PM2XXX_INT4_M_ITBATTFULL	= 0x40,
+	PM2XXX_INT4_M_ITCVPHASE		= 0x80,
+};
+
+enum pm2xxx_source_reg_int4 {
+	PM2XXX_INT4_S_ITBATTEMPCOLD	= 0x01,
+	PM2XXX_INT4_S_ITBATTEMPHOT	= 0x02,
+	PM2XXX_INT4_S_ITVPWR2OVV	= 0x04,
+	PM2XXX_INT4_S_ITVPWR1OVV	= 0x08,
+	PM2XXX_INT4_S_ITCHARGINGON	= 0x10,
+	PM2XXX_INT4_S_ITVRESUME		= 0x20,
+	PM2XXX_INT4_S_ITBATTFULL	= 0x40,
+	PM2XXX_INT4_S_ITCVPHASE		= 0x80,
+};
+
+enum pm2xxx_reg_int5 {
+	PM2XXX_INT5_ITTHERMALSHUTDOWNRISE	= 0x01,
+	PM2XXX_INT5_ITTHERMALSHUTDOWNFALL	= 0x02,
+	PM2XXX_INT5_ITTHERMALWARNINGRISE	= 0x04,
+	PM2XXX_INT5_ITTHERMALWARNINGFALL	= 0x08,
+	PM2XXX_INT5_ITVSYSTEMOVV		= 0x10,
+};
+
+enum pm2xxx_mask_reg_int5 {
+	PM2XXX_INT5_M_ITTHERMALSHUTDOWNRISE	= 0x01,
+	PM2XXX_INT5_M_ITTHERMALSHUTDOWNFALL	= 0x02,
+	PM2XXX_INT5_M_ITTHERMALWARNINGRISE	= 0x04,
+	PM2XXX_INT5_M_ITTHERMALWARNINGFALL	= 0x08,
+	PM2XXX_INT5_M_ITVSYSTEMOVV		= 0x10,
+};
+
+enum pm2xxx_source_reg_int5 {
+	PM2XXX_INT5_S_ITTHERMALSHUTDOWNRISE	= 0x01,
+	PM2XXX_INT5_S_ITTHERMALSHUTDOWNFALL	= 0x02,
+	PM2XXX_INT5_S_ITTHERMALWARNINGRISE	= 0x04,
+	PM2XXX_INT5_S_ITTHERMALWARNINGFALL	= 0x08,
+	PM2XXX_INT5_S_ITVSYSTEMOVV		= 0x10,
+};
+
+enum pm2xxx_reg_int6 {
+	PM2XXX_INT6_ITVPWR2DROP		= 0x01,
+	PM2XXX_INT6_ITVPWR1DROP		= 0x02,
+	PM2XXX_INT6_ITVPWR2VALIDRISE	= 0x04,
+	PM2XXX_INT6_ITVPWR2VALIDFALL	= 0x08,
+	PM2XXX_INT6_ITVPWR1VALIDRISE	= 0x10,
+	PM2XXX_INT6_ITVPWR1VALIDFALL	= 0x20,
+};
+
+enum pm2xxx_mask_reg_int6 {
+	PM2XXX_INT6_M_ITVPWR2DROP	= 0x01,
+	PM2XXX_INT6_M_ITVPWR1DROP	= 0x02,
+	PM2XXX_INT6_M_ITVPWR2VALIDRISE	= 0x04,
+	PM2XXX_INT6_M_ITVPWR2VALIDFALL	= 0x08,
+	PM2XXX_INT6_M_ITVPWR1VALIDRISE	= 0x10,
+	PM2XXX_INT6_M_ITVPWR1VALIDFALL	= 0x20,
+};
+
+enum pm2xxx_source_reg_int6 {
+	PM2XXX_INT6_S_ITVPWR2DROP	= 0x01,
+	PM2XXX_INT6_S_ITVPWR1DROP	= 0x02,
+	PM2XXX_INT6_S_ITVPWR2VALIDRISE	= 0x04,
+	PM2XXX_INT6_S_ITVPWR2VALIDFALL	= 0x08,
+	PM2XXX_INT6_S_ITVPWR1VALIDRISE	= 0x10,
+	PM2XXX_INT6_S_ITVPWR1VALIDFALL	= 0x20,
+};
+
+static enum power_supply_property pm2xxx_charger_ac_props[] = {
+	POWER_SUPPLY_PROP_HEALTH,
+	POWER_SUPPLY_PROP_PRESENT,
+	POWER_SUPPLY_PROP_ONLINE,
+	POWER_SUPPLY_PROP_VOLTAGE_NOW,
+	POWER_SUPPLY_PROP_VOLTAGE_AVG,
+	POWER_SUPPLY_PROP_CURRENT_NOW,
+};
+
+static int pm2xxx_charger_voltage_map[] = {
+	3500,
+	3525,
+	3550,
+	3575,
+	3600,
+	3625,
+	3650,
+	3675,
+	3700,
+	3725,
+	3750,
+	3775,
+	3800,
+	3825,
+	3850,
+	3875,
+	3900,
+	3925,
+	3950,
+	3975,
+	4000,
+	4025,
+	4050,
+	4075,
+	4100,
+	4125,
+	4150,
+	4175,
+	4200,
+	4225,
+	4250,
+	4275,
+	4300,
+};
+
+static int pm2xxx_charger_current_map[] = {
+	200,
+	200,
+	400,
+	600,
+	800,
+	1000,
+	1200,
+	1400,
+	1600,
+	1800,
+	2000,
+	2200,
+	2400,
+	2600,
+	2800,
+	3000,
+};
+
+struct pm2xxx_irq {
+	char *name;
+	irqreturn_t (*isr)(int irq, void *data);
+};
+
+struct pm2xxx_charger_info {
+	int charger_connected;
+	int charger_online;
+	int charger_voltage;
+	int cv_active;
+	bool wd_expired;
+};
+
+struct pm2xxx_charger_event_flags {
+	bool mainextchnotok;
+	bool main_thermal_prot;
+	bool ovv;
+	bool chgwdexp;
+};
+
+struct pm2xxx_config {
+	struct i2c_client *pm2xxx_i2c;
+	struct i2c_device_id *pm2xxx_id;
+};
+
+struct pm2xxx_charger {
+	struct device *dev;
+	u8 chip_id;
+	bool vddadc_en_ac;
+	struct pm2xxx_config config;
+	bool ac_conn;
+	unsigned int gpio_irq;
+	int vbat;
+	int old_vbat;
+	int failure_case;
+	int failure_input_ovv;
+	u8 pm2_int[6];
+	struct ab8500_gpadc *gpadc;
+	struct regulator *regu;
+	struct pm2xxx_bm_data *bat;
+	struct mutex lock;
+	struct ab8500 *parent;
+	struct pm2xxx_charger_info ac;
+	struct pm2xxx_charger_platform_data *pdata;
+	struct workqueue_struct *charger_wq;
+	struct delayed_work check_vbat_work;
+	struct work_struct ac_work;
+	struct work_struct check_main_thermal_prot_work;
+	struct ux500_charger ac_chg;
+	struct pm2xxx_charger_event_flags flags;
+};
+
+static const struct i2c_device_id pm2xxx_ident[] = {
+	{ "pm2301", 0 },
+	{ }
+};
+
+static int pm2xxx_reg_read(struct pm2xxx_charger *pm2, int reg, u8 *val)
+{
+	int ret;
+
+	ret = i2c_smbus_read_i2c_block_data(pm2->config.pm2xxx_i2c, reg,
+				1, val);
+	if (ret < 0)
+		dev_err(pm2->dev, "Error reading register at 0x%x\n", reg);
+
+	return ret;
+}
+
+static int pm2xxx_reg_write(struct pm2xxx_charger *pm2, int reg, u8 val)
+{
+	int ret;
+
+	ret = i2c_smbus_write_i2c_block_data(pm2->config.pm2xxx_i2c, reg,
+				1, &val);
+	if (ret < 0)
+		dev_err(pm2->dev, "Error writing register at 0x%x\n", reg);
+
+	return ret;
+}
+
+static int pm2xxx_charging_enable_mngt(struct pm2xxx_charger *pm2)
+{
+	int ret;
+
+	/* Enable charging */
+	ret = pm2xxx_reg_write(pm2, PM2XXX_BATT_CTRL_REG2,
+			(PM2XXX_CH_AUTO_RESUME_EN | PM2XXX_CHARGER_ENA));
+
+	return ret;
+}
+
+static int pm2xxx_charging_disable_mngt(struct pm2xxx_charger *pm2)
+{
+	int ret;
+
+	/* Disable charging */
+	ret = pm2xxx_reg_write(pm2, PM2XXX_BATT_CTRL_REG2,
+			(PM2XXX_CH_AUTO_RESUME_DIS | PM2XXX_CHARGER_DIS));
+
+	return ret;
+}
+
+static int pm2xxx_charger_batt_therm_mngt(struct pm2xxx_charger *pm2, int val)
+{
+	queue_work(pm2->charger_wq, &pm2->check_main_thermal_prot_work);
+
+	return 0;
+}
+
+
+int pm2xxx_charger_die_therm_mngt(struct pm2xxx_charger *pm2, int val)
+{
+	queue_work(pm2->charger_wq, &pm2->check_main_thermal_prot_work);
+
+	return 0;
+}
+
+static int pm2xxx_charger_ovv_mngt(struct pm2xxx_charger *pm2, int val)
+{
+	int ret = 0;
+
+	pm2->failure_input_ovv++;
+	if (pm2->failure_input_ovv < 4) {
+		ret = pm2xxx_charging_enable_mngt(pm2);
+		goto out;
+	} else {
+		pm2->failure_input_ovv = 0;
+		dev_err(pm2->dev, "Overvoltage detected\n");
+		pm2->flags.ovv = true;
+		power_supply_changed(&pm2->ac_chg.psy);
+	}
+
+out:
+	return ret;
+}
+
+static int pm2xxx_charger_wd_exp_mngt(struct pm2xxx_charger *pm2, int val)
+{
+	dev_dbg(pm2->dev , "20 minutes watchdog occured\n");
+
+	pm2->ac.wd_expired = true;
+	power_supply_changed(&pm2->ac_chg.psy);
+
+	return 0;
+}
+
+static int pm2xxx_charger_vbat_lsig_mngt(struct pm2xxx_charger *pm2, int val)
+{
+	switch (val) {
+	case PM2XXX_INT1_ITVBATLOWR:
+		dev_dbg(pm2->dev, "VBAT grows above VBAT_LOW level\n");
+		break;
+
+	case PM2XXX_INT1_ITVBATLOWF:
+		dev_dbg(pm2->dev, "VBAT drops below VBAT_LOW level\n");
+		break;
+
+	default:
+		dev_err(pm2->dev, "Unknown VBAT level\n");
+	}
+
+	return 0;
+}
+
+static int pm2xxx_charger_bat_disc_mngt(struct pm2xxx_charger *pm2, int val)
+{
+	dev_dbg(pm2->dev, "battery disconnected\n");
+
+	return (pm2xxx_charging_disable_mngt(pm2));
+}
+
+static int pm2xxx_charger_detection(struct pm2xxx_charger *pm2, u8 *val)
+{
+	int ret = 0;
+
+	ret = pm2xxx_reg_read(pm2, PM2XXX_SRCE_REG_INT2, val);
+
+	if (ret < 0) {
+		dev_err(pm2->dev, "Charger detection failed\n");
+		goto out;
+	}
+
+	*val &= (PM2XXX_INT2_S_ITVPWR1PLUG | PM2XXX_INT2_S_ITVPWR2PLUG);
+out:
+	return ret;
+}
+
+static int pm2xxx_charger_itv_pwr_plug_mngt(struct pm2xxx_charger *pm2, int val)
+{
+
+	int ret;
+	u8 read_val;
+
+	/*
+	 * Since we can't be sure that the events are received
+	 * synchronously, we have the check if the main charger is
+	 * connected by reading the interrupt source register.
+	 */
+	ret = pm2xxx_charger_detection(pm2, &read_val);
+
+	if ((ret == 0) && read_val) {
+		pm2->ac.charger_connected = 1;
+		pm2->ac_conn = true;
+		queue_work(pm2->charger_wq, &pm2->ac_work);
+	}
+
+
+	return ret;
+}
+
+static int pm2xxx_charger_itv_pwr_unplug_mngt(struct pm2xxx_charger *pm2,
+								int val)
+{
+	pm2->ac.charger_connected = 0;
+	queue_work(pm2->charger_wq, &pm2->ac_work);
+
+	return 0;
+}
+
+static int pm2_int_reg0(struct pm2xxx_charger *pm2)
+{
+	int ret = 0;
+
+	if (pm2->pm2_int[0] &
+			(PM2XXX_INT1_ITVBATLOWR | PM2XXX_INT1_ITVBATLOWF)) {
+		ret = pm2xxx_charger_vbat_lsig_mngt(pm2, pm2->pm2_int[0] &
+			(PM2XXX_INT1_ITVBATLOWR | PM2XXX_INT1_ITVBATLOWF));
+	}
+
+	if (pm2->pm2_int[0] & PM2XXX_INT1_ITVBATDISCONNECT) {
+		ret = pm2xxx_charger_bat_disc_mngt(pm2,
+				PM2XXX_INT1_ITVBATDISCONNECT);
+	}
+
+	return ret;
+}
+
+static int pm2_int_reg1(struct pm2xxx_charger *pm2)
+{
+	int ret = 0;
+
+	if (pm2->pm2_int[1] &
+		(PM2XXX_INT2_ITVPWR1PLUG | PM2XXX_INT2_ITVPWR2PLUG)) {
+		dev_dbg(pm2->dev , "Main charger plugged\n");
+		ret = pm2xxx_charger_itv_pwr_plug_mngt(pm2, pm2->pm2_int[1] &
+			(PM2XXX_INT2_ITVPWR1PLUG | PM2XXX_INT2_ITVPWR2PLUG));
+	}
+
+	if (pm2->pm2_int[1] &
+		(PM2XXX_INT2_ITVPWR1UNPLUG | PM2XXX_INT2_ITVPWR2UNPLUG)) {
+		dev_dbg(pm2->dev , "Main charger unplugged\n");
+		ret = pm2xxx_charger_itv_pwr_unplug_mngt(pm2, pm2->pm2_int[1] &
+						(PM2XXX_INT2_ITVPWR1UNPLUG |
+						PM2XXX_INT2_ITVPWR2UNPLUG));
+	}
+
+	return ret;
+}
+
+static int pm2_int_reg2(struct pm2xxx_charger *pm2)
+{
+	int ret = 0;
+
+	if (pm2->pm2_int[2] & PM2XXX_INT3_ITAUTOTIMEOUTWD)
+		ret = pm2xxx_charger_wd_exp_mngt(pm2, pm2->pm2_int[2]);
+
+	if (pm2->pm2_int[2] & (PM2XXX_INT3_ITCHPRECHARGEWD |
+				PM2XXX_INT3_ITCHCCWD | PM2XXX_INT3_ITCHCVWD)) {
+		dev_dbg(pm2->dev,
+			"Watchdog occured for precharge, CC and CV charge\n");
+	}
+
+	return ret;
+}
+
+static int pm2_int_reg3(struct pm2xxx_charger *pm2)
+{
+	int ret = 0;
+
+	if (pm2->pm2_int[3] & (PM2XXX_INT4_ITCHARGINGON)) {
+		dev_dbg(pm2->dev ,
+			"chargind operation has started\n");
+	}
+
+	if (pm2->pm2_int[3] & (PM2XXX_INT4_ITVRESUME)) {
+		dev_dbg(pm2->dev,
+			"battery discharged down to VResume threshold\n");
+	}
+
+	if (pm2->pm2_int[3] & (PM2XXX_INT4_ITBATTFULL)) {
+		dev_dbg(pm2->dev , "battery fully detected\n");
+	}
+
+	if (pm2->pm2_int[3] & (PM2XXX_INT4_ITCVPHASE)) {
+		dev_dbg(pm2->dev, "CV phase enter with 0.5C charging\n");
+	}
+
+	if (pm2->pm2_int[3] &
+			(PM2XXX_INT4_ITVPWR2OVV | PM2XXX_INT4_ITVPWR1OVV)) {
+		pm2->failure_case = VPWR_OVV;
+		ret = pm2xxx_charger_ovv_mngt(pm2, pm2->pm2_int[3] &
+			(PM2XXX_INT4_ITVPWR2OVV | PM2XXX_INT4_ITVPWR1OVV));
+		dev_dbg(pm2->dev, "VPWR/VSYSTEM overvoltage detected\n");
+	}
+
+	if (pm2->pm2_int[3] & (PM2XXX_INT4_S_ITBATTEMPCOLD |
+				PM2XXX_INT4_S_ITBATTEMPHOT)) {
+		ret = pm2xxx_charger_batt_therm_mngt(pm2,
+			pm2->pm2_int[3] & (PM2XXX_INT4_S_ITBATTEMPCOLD |
+					PM2XXX_INT4_S_ITBATTEMPHOT));
+		dev_dbg(pm2->dev, "BTEMP is too Low/High\n");
+	}
+
+	return ret;
+}
+
+static int pm2_int_reg4(struct pm2xxx_charger *pm2)
+{
+	int ret = 0;
+
+	if (pm2->pm2_int[4] & PM2XXX_INT5_ITVSYSTEMOVV) {
+		pm2->failure_case = VSYSTEM_OVV;
+		ret = pm2xxx_charger_ovv_mngt(pm2, pm2->pm2_int[4] &
+						PM2XXX_INT5_ITVSYSTEMOVV);
+		dev_dbg(pm2->dev, "VSYSTEM overvoltage detected\n");
+	}
+
+	if (pm2->pm2_int[4] & (PM2XXX_INT5_ITTHERMALWARNINGFALL |
+				PM2XXX_INT5_ITTHERMALWARNINGRISE |
+				PM2XXX_INT5_ITTHERMALSHUTDOWNFALL |
+				PM2XXX_INT5_ITTHERMALSHUTDOWNRISE)) {
+		dev_dbg(pm2->dev, "BTEMP die temperature is too Low/High\n");
+		ret = pm2xxx_charger_die_therm_mngt(pm2, pm2->pm2_int[4] &
+			(PM2XXX_INT5_ITTHERMALWARNINGFALL |
+			PM2XXX_INT5_ITTHERMALWARNINGRISE |
+			PM2XXX_INT5_ITTHERMALSHUTDOWNFALL |
+			PM2XXX_INT5_ITTHERMALSHUTDOWNRISE));
+	}
+
+	return ret;
+}
+
+static int pm2_int_reg5(struct pm2xxx_charger *pm2)
+{
+
+	if (pm2->pm2_int[5]
+		& (PM2XXX_INT6_ITVPWR2DROP | PM2XXX_INT6_ITVPWR1DROP)) {
+		dev_dbg(pm2->dev, "VMPWR drop to VBAT level\n");
+	}
+
+	if (pm2->pm2_int[5] & (PM2XXX_INT6_ITVPWR2VALIDRISE |
+				PM2XXX_INT6_ITVPWR1VALIDRISE |
+				PM2XXX_INT6_ITVPWR2VALIDFALL |
+				PM2XXX_INT6_ITVPWR1VALIDFALL)) {
+		dev_dbg(pm2->dev, "Falling/Rising edge on WPWR1/2\n");
+	}
+
+	return 0;
+}
+
+static irqreturn_t  pm2xxx_irq_int(int irq, void *data)
+{
+	struct pm2xxx_charger *pm2 = data;
+	int ret, i;
+
+	for (i = 0; i < ARRAY_SIZE(pm2->pm2_int); i++) {
+		ret = pm2xxx_reg_read(pm2, pm2xxx_interrupt_registers[i],
+				&(pm2->pm2_int[i]));
+	}
+
+	pm2_int_reg0(pm2);
+	pm2_int_reg1(pm2);
+	pm2_int_reg2(pm2);
+	pm2_int_reg3(pm2);
+	pm2_int_reg4(pm2);
+	pm2_int_reg5(pm2);
+
+	return IRQ_HANDLED;
+}
+
+static int pm2xxx_charger_get_ac_voltage(struct pm2xxx_charger *pm2)
+{
+	int vch = 0;
+
+	if (pm2->ac.charger_connected) {
+		vch = ab8500_gpadc_convert(pm2->gpadc, MAIN_CHARGER_V);
+		if (vch < 0)
+			dev_err(pm2->dev, "%s gpadc conv failed,\n", __func__);
+	}
+
+	return vch;
+}
+
+static int pm2xxx_charger_get_ac_cv(struct pm2xxx_charger *pm2)
+{
+	int ret = 0;
+	u8 val;
+
+	if (pm2->ac.charger_connected && pm2->ac.charger_online) {
+
+		ret = pm2xxx_reg_read(pm2, PM2XXX_SRCE_REG_INT4, &val);
+		if (ret < 0) {
+			dev_err(pm2->dev, "%s pm2xxx read failed\n", __func__);
+			goto out;
+		}
+
+		if (val & PM2XXX_INT4_S_ITCVPHASE)
+			ret = PM2XXX_CONST_VOLT;
+		else
+			ret = PM2XXX_CONST_CURR;
+	}
+out:
+	return ret;
+}
+
+static int pm2xxx_charger_get_ac_current(struct pm2xxx_charger *pm2)
+{
+	int ich = 0;
+
+	if (pm2->ac.charger_online) {
+		ich = ab8500_gpadc_convert(pm2->gpadc, MAIN_CHARGER_C);
+		if (ich < 0)
+			dev_err(pm2->dev, "%s gpadc conv failed\n", __func__);
+	}
+
+	return ich;
+}
+
+static int pm2xxx_current_to_regval(int curr)
+{
+	int i;
+
+	if (curr < pm2xxx_charger_current_map[0])
+		return 0;
+
+	for (i = 1; i < ARRAY_SIZE(pm2xxx_charger_current_map); i++) {
+		if (curr < pm2xxx_charger_current_map[i])
+			return (i - 1);
+	}
+
+	i = ARRAY_SIZE(pm2xxx_charger_current_map) - 1;
+	if (curr == pm2xxx_charger_current_map[i])
+		return i;
+	else
+		return -EINVAL;
+}
+
+static int pm2xxx_voltage_to_regval(int curr)
+{
+	int i;
+
+	if (curr < pm2xxx_charger_voltage_map[0])
+		return 0;
+
+	for (i = 1; i < ARRAY_SIZE(pm2xxx_charger_voltage_map); i++) {
+		if (curr < pm2xxx_charger_voltage_map[i])
+			return i - 1;
+	}
+
+	i = ARRAY_SIZE(pm2xxx_charger_voltage_map) - 1;
+	if (curr == pm2xxx_charger_voltage_map[i])
+		return i;
+	else
+		return -EINVAL;
+}
+
+static int pm2xxx_charger_update_charger_current(struct ux500_charger *charger,
+		int ich_out)
+{
+	int ret;
+	int curr_index;
+	struct pm2xxx_charger *pm2;
+	u8 val;
+
+	if (charger->psy.type == POWER_SUPPLY_TYPE_MAINS)
+		pm2 = to_pm2xxx_charger_ac_device_info(charger);
+	else
+		return -ENXIO;
+
+	curr_index = pm2xxx_current_to_regval(ich_out);
+	if (curr_index < 0) {
+		dev_err(pm2->dev,
+			"Charger current too high: charging not started\n");
+		return -ENXIO;
+	}
+
+	ret = pm2xxx_reg_read(pm2, PM2XXX_BATT_CTRL_REG6, &val);
+	if (ret >= 0) {
+		val &= ~PM2XXX_DIR_CH_CC_CURRENT_MASK;
+		val |= curr_index;
+		ret = pm2xxx_reg_write(pm2, PM2XXX_BATT_CTRL_REG6, val);
+		if (ret < 0) {
+			dev_err(pm2->dev,
+				"%s write failed\n", __func__);
+		}
+	}
+	else
+		dev_err(pm2->dev, "%s read failed\n", __func__);
+
+	return ret;
+}
+
+static int pm2xxx_charger_ac_get_property(struct power_supply *psy,
+	enum power_supply_property psp,
+	union power_supply_propval *val)
+{
+	struct pm2xxx_charger *pm2;
+
+	pm2 = to_pm2xxx_charger_ac_device_info(psy_to_ux500_charger(psy));
+
+	switch (psp) {
+	case POWER_SUPPLY_PROP_HEALTH:
+		if (pm2->flags.mainextchnotok)
+			val->intval = POWER_SUPPLY_HEALTH_UNSPEC_FAILURE;
+		else if (pm2->ac.wd_expired)
+			val->intval = POWER_SUPPLY_HEALTH_DEAD;
+		else if (pm2->flags.main_thermal_prot)
+			val->intval = POWER_SUPPLY_HEALTH_OVERHEAT;
+		else
+			val->intval = POWER_SUPPLY_HEALTH_GOOD;
+		break;
+	case POWER_SUPPLY_PROP_ONLINE:
+		val->intval = pm2->ac.charger_online;
+		break;
+	case POWER_SUPPLY_PROP_PRESENT:
+		val->intval = pm2->ac.charger_connected;
+		break;
+	case POWER_SUPPLY_PROP_VOLTAGE_NOW:
+		pm2->ac.charger_voltage = pm2xxx_charger_get_ac_voltage(pm2);
+		val->intval = pm2->ac.charger_voltage * 1000;
+		break;
+	case POWER_SUPPLY_PROP_VOLTAGE_AVG:
+		pm2->ac.cv_active = pm2xxx_charger_get_ac_cv(pm2);
+		val->intval = pm2->ac.cv_active;
+		break;
+	case POWER_SUPPLY_PROP_CURRENT_NOW:
+		val->intval = pm2xxx_charger_get_ac_current(pm2) * 1000;
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int pm2xxx_charging_init(struct pm2xxx_charger *pm2)
+{
+	int ret = 0;
+
+	/* enable CC and CV watchdog */
+	ret = pm2xxx_reg_write(pm2, PM2XXX_BATT_CTRL_REG3,
+		(PM2XXX_CH_WD_CV_PHASE_60MIN | PM2XXX_CH_WD_CC_PHASE_60MIN));
+	if( ret < 0)
+		return ret;
+
+	/* enable precharge watchdog */
+	ret = pm2xxx_reg_write(pm2, PM2XXX_BATT_CTRL_REG4,
+					PM2XXX_CH_WD_PRECH_PHASE_60MIN);
+
+	return ret;
+}
+
+static int pm2xxx_charger_ac_en(struct ux500_charger *charger,
+	int enable, int vset, int iset)
+{
+	int ret;
+	int volt_index;
+	int curr_index;
+	u8 val;
+
+	struct pm2xxx_charger *pm2 = to_pm2xxx_charger_ac_device_info(charger);
+
+	if (enable) {
+		if (!pm2->ac.charger_connected) {
+			dev_dbg(pm2->dev, "AC charger not connected\n");
+			return -ENXIO;
+		}
+
+		dev_dbg(pm2->dev, "Enable AC: %dmV %dmA\n", vset, iset);
+		if (!pm2->vddadc_en_ac) {
+			regulator_enable(pm2->regu);
+			pm2->vddadc_en_ac = true;
+		}
+
+		ret = pm2xxx_charging_init(pm2);
+		if (ret < 0) {
+			dev_err(pm2->dev, "%s charging init failed\n",
+					__func__);
+			goto error_occured;
+		}
+
+		volt_index = pm2xxx_voltage_to_regval(vset);
+		curr_index = pm2xxx_current_to_regval(iset);
+
+		if (volt_index < 0 || curr_index < 0) {
+			dev_err(pm2->dev,
+				"Charger voltage or current too high, "
+				"charging not started\n");
+			return -ENXIO;
+		}
+
+		ret = pm2xxx_reg_read(pm2, PM2XXX_BATT_CTRL_REG8, &val);
+		if (ret >= 0) {
+			val &= ~PM2XXX_CH_VOLT_MASK;
+			val |= volt_index;
+			ret = pm2xxx_reg_write(pm2, PM2XXX_BATT_CTRL_REG8, val);
+
+			if (ret < 0) {
+				dev_err(pm2->dev,
+					"%s write failed\n", __func__);
+				goto error_occured;
+			}
+		else
+			dev_err(pm2->dev, "%s read failed\n", __func__);
+		}
+
+		ret = pm2xxx_reg_read(pm2, PM2XXX_BATT_CTRL_REG6, &val);
+		if (ret >= 0) {
+			val &= ~PM2XXX_DIR_CH_CC_CURRENT_MASK;
+			val |= curr_index;
+			ret = pm2xxx_reg_write(pm2, PM2XXX_BATT_CTRL_REG6, val);
+			if (ret < 0) {
+				dev_err(pm2->dev,
+					"%s write failed\n", __func__);
+				goto error_occured;
+			}
+		else
+			dev_err(pm2->dev, "%s read failed\n", __func__);
+		}
+
+		if (!pm2->bat->enable_overshoot) {
+			ret = pm2xxx_reg_read(pm2, PM2XXX_LED_CTRL_REG, &val);
+			if (ret >= 0) {
+				val |= PM2XXX_ANTI_OVERSHOOT_EN;
+				ret = pm2xxx_reg_write(pm2, PM2XXX_LED_CTRL_REG,
+							val);
+				if (ret < 0){
+					dev_err(pm2->dev, "%s write failed\n",
+							__func__);
+					goto error_occured;
+				}
+			}
+		else
+			dev_err(pm2->dev, "%s read failed\n", __func__);
+		}
+
+		ret = pm2xxx_charging_enable_mngt(pm2);
+		if (ret) {
+			dev_err(pm2->dev, "%s write failed\n", __func__);
+			goto error_occured;
+		}
+
+		pm2->ac.charger_online = 1;
+	} else {
+		pm2->ac.charger_online = 0;
+		pm2->ac.wd_expired = false;
+
+		/* Disable regulator if enabled */
+		if (pm2->vddadc_en_ac) {
+			regulator_disable(pm2->regu);
+			pm2->vddadc_en_ac = false;
+		}
+
+		ret = pm2xxx_charging_disable_mngt(pm2);
+		if (ret) {
+			dev_err(pm2->dev, "%s write failed\n", __func__);
+			return ret;
+		}
+
+		dev_dbg(pm2->dev, "PM2301: " "Disabled AC charging\n");
+	}
+	power_supply_changed(&pm2->ac_chg.psy);
+
+error_occured:
+	return ret;
+}
+
+static int pm2xxx_charger_watchdog_kick(struct ux500_charger *charger)
+{
+	int ret;
+	struct pm2xxx_charger *pm2;
+
+	if (charger->psy.type == POWER_SUPPLY_TYPE_MAINS)
+		pm2 = to_pm2xxx_charger_ac_device_info(charger);
+	else
+		return -ENXIO;
+
+	ret = pm2xxx_reg_write(pm2, PM2XXX_BATT_WD_KICK, WD_TIMER);
+	if (ret)
+		dev_err(pm2->dev, "Failed to kick WD!\n");
+
+	return ret;
+}
+
+static void pm2xxx_charger_ac_work(struct work_struct *work)
+{
+	struct pm2xxx_charger *pm2 = container_of(work,
+		struct pm2xxx_charger, ac_work);
+
+
+	power_supply_changed(&pm2->ac_chg.psy);
+	sysfs_notify(&pm2->ac_chg.psy.dev->kobj, NULL, "present");
+};
+
+static void pm2xxx_charger_check_main_thermal_prot_work(
+	struct work_struct *work)
+{
+};
+
+static struct pm2xxx_irq pm2xxx_charger_irq[] = {
+	{"PM2XXX_IRQ_INT", pm2xxx_irq_int},
+};
+
+static int pm2xxx_wall_charger_resume(struct i2c_client *i2c_client)
+{
+	return 0;
+}
+
+static int pm2xxx_wall_charger_suspend(struct i2c_client *i2c_client,
+	pm_message_t state)
+{
+	return 0;
+}
+
+static int __devinit pm2xxx_wall_charger_probe(struct i2c_client *i2c_client,
+		const struct i2c_device_id *id)
+{
+	struct pm2xxx_platform_data *pl_data = i2c_client->dev.platform_data;
+	struct pm2xxx_charger *pm2;
+	int ret = 0;
+	u8 val;
+
+	pm2 = kzalloc(sizeof(struct pm2xxx_charger), GFP_KERNEL);
+	if (!pm2) {
+		dev_err(pm2->dev, "pm2xxx_charger allocation failed\n");
+		return -ENOMEM;
+	}
+
+	/* get parent data */
+	pm2->dev = &i2c_client->dev;
+	pm2->gpadc = ab8500_gpadc_get("ab8500-gpadc.0");
+
+	/* get charger spcific platform data */
+	if (!pl_data->wall_charger) {
+		dev_err(pm2->dev, "no charger platform data supplied\n");
+		ret = -EINVAL;
+		goto free_device_info;
+	}
+
+	pm2->pdata = pl_data->wall_charger;
+
+	/* get battery specific platform data */
+	if (!pl_data->battery) {
+		dev_err(pm2->dev, "no battery platform data supplied\n");
+		ret = -EINVAL;
+		goto free_device_info;
+	}
+
+	pm2->bat = pl_data->battery;
+
+	if (!i2c_check_functionality(i2c_client->adapter,
+			I2C_FUNC_SMBUS_BYTE_DATA |
+			I2C_FUNC_SMBUS_READ_WORD_DATA)) {
+		ret = -ENODEV;
+		dev_info(pm2->dev, "pm2301 i2c_check_functionality failed\n");
+		goto free_device_info;
+	}
+
+	pm2->config.pm2xxx_i2c = i2c_client;
+	pm2->config.pm2xxx_id = (struct i2c_device_id *) id;
+	i2c_set_clientdata(i2c_client, pm2);
+
+	/* AC supply */
+	/* power_supply base class */
+	pm2->ac_chg.psy.name = pm2->pdata->label;
+	pm2->ac_chg.psy.type = POWER_SUPPLY_TYPE_MAINS;
+	pm2->ac_chg.psy.properties = pm2xxx_charger_ac_props;
+	pm2->ac_chg.psy.num_properties = ARRAY_SIZE(pm2xxx_charger_ac_props);
+	pm2->ac_chg.psy.get_property = pm2xxx_charger_ac_get_property;
+	pm2->ac_chg.psy.supplied_to = pm2->pdata->supplied_to;
+	pm2->ac_chg.psy.num_supplicants = pm2->pdata->num_supplicants;
+	/* pm2xxx_charger sub-class */
+	pm2->ac_chg.ops.enable = &pm2xxx_charger_ac_en;
+	pm2->ac_chg.ops.kick_wd = &pm2xxx_charger_watchdog_kick;
+	pm2->ac_chg.ops.update_curr = &pm2xxx_charger_update_charger_current;
+	pm2->ac_chg.max_out_volt = pm2xxx_charger_voltage_map[
+		ARRAY_SIZE(pm2xxx_charger_voltage_map) - 1];
+	pm2->ac_chg.max_out_curr = pm2xxx_charger_current_map[
+		ARRAY_SIZE(pm2xxx_charger_current_map) - 1];
+
+	/* Create a work queue for the charger */
+	pm2->charger_wq =
+		create_singlethread_workqueue("pm2xxx_charger_wq");
+	if (pm2->charger_wq == NULL) {
+		dev_err(pm2->dev, "failed to create work queue\n");
+		goto free_device_info;
+	}
+
+	/* Init work for charger detection */
+	INIT_WORK(&pm2->ac_work, pm2xxx_charger_ac_work);
+
+	/* Init work for checking HW status */
+	INIT_WORK(&pm2->check_main_thermal_prot_work,
+		pm2xxx_charger_check_main_thermal_prot_work);
+
+	/*
+	 * VDD ADC supply needs to be enabled from this driver when there
+	 * is a charger connected to avoid erroneous BTEMP_HIGH/LOW
+	 * interrupts during charging
+	 */
+	pm2->regu = regulator_get(pm2->dev, "vddadc");
+	if (IS_ERR(pm2->regu)) {
+		ret = PTR_ERR(pm2->regu);
+		dev_err(pm2->dev, "failed to get vddadc regulator\n");
+		goto free_charger_wq;
+	}
+
+	/* Register AC charger class */
+	ret = power_supply_register(pm2->dev, &pm2->ac_chg.psy);
+	if (ret) {
+		dev_err(pm2->dev, "failed to register AC charger\n");
+		goto free_regulator;
+	}
+
+	/* Register interrupts */
+	ret = request_threaded_irq(pm2->pdata->irq_number, NULL,
+				pm2xxx_charger_irq[0].isr,
+				pm2->pdata->irq_type,
+				pm2xxx_charger_irq[0].name, pm2);
+
+	if (ret != 0) {
+		dev_err(pm2->dev, "failed to request %s IRQ %d: %d\n",
+		pm2xxx_charger_irq[0].name, pm2->pdata->irq_number, ret);
+		goto unregister_pm2xxx_charger;
+	}
+
+	/*
+	 * I2C Read/Write will fail, if AC adaptor is not connected.
+	 * fix the charger detection mechanism.
+	 */
+	ret = pm2xxx_charger_detection(pm2, &val);
+
+	if ((ret == 0) && val) {
+		pm2->ac.charger_connected = 1;
+		pm2->ac_conn = true;
+		power_supply_changed(&pm2->ac_chg.psy);
+		sysfs_notify(&pm2->ac_chg.psy.dev->kobj, NULL, "present");
+	}
+
+	return 0;
+
+unregister_pm2xxx_charger:
+	/* unregister power supply */
+	power_supply_unregister(&pm2->ac_chg.psy);
+free_regulator:
+	/* disable the regulator */
+	regulator_put(pm2->regu);
+free_charger_wq:
+	destroy_workqueue(pm2->charger_wq);
+free_device_info:
+	kfree(pm2);
+	return ret;
+}
+
+static int __devexit pm2xxx_wall_charger_remove(struct i2c_client *i2c_client)
+{
+	struct pm2xxx_charger *pm2 = i2c_get_clientdata(i2c_client);
+
+	/* Disable AC charging */
+	pm2xxx_charger_ac_en(&pm2->ac_chg, false, 0, 0);
+
+	/* Disable interrupts */
+	free_irq(pm2->pdata->irq_number, pm2);
+
+	/* Delete the work queue */
+	destroy_workqueue(pm2->charger_wq);
+
+	flush_scheduled_work();
+
+	/* disable the regulator */
+	regulator_put(pm2->regu);
+
+	power_supply_unregister(&pm2->ac_chg.psy);
+
+	kfree(pm2);
+
+	return 0;
+}
+
+static const struct i2c_device_id pm2xxx_id[] = {
+	{ "pm2301", 0 },
+	{ }
+};
+
+MODULE_DEVICE_TABLE(i2c, pm2xxx_id);
+
+static struct i2c_driver pm2xxx_charger_driver = {
+	.probe = pm2xxx_wall_charger_probe,
+	.remove = __devexit_p(pm2xxx_wall_charger_remove),
+	.suspend = pm2xxx_wall_charger_suspend,
+	.resume = pm2xxx_wall_charger_resume,
+	.driver = {
+		.name = "pm2xxx-wall_charger",
+		.owner = THIS_MODULE,
+	},
+	.id_table = pm2xxx_id,
+};
+
+static int __init pm2xxx_charger_init(void)
+{
+	return i2c_add_driver(&pm2xxx_charger_driver);
+}
+
+static void __exit pm2xxx_charger_exit(void)
+{
+	i2c_del_driver(&pm2xxx_charger_driver);
+}
+
+subsys_initcall_sync(pm2xxx_charger_init);
+module_exit(pm2xxx_charger_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Rajkumar kasirajan, Olivier Launay");
+MODULE_ALIAS("platform:pm2xxx-charger");
+MODULE_DESCRIPTION("PM2xxx charger management driver");
+
diff --git a/include/linux/mfd/abx500/ab8500-bm.h b/include/linux/mfd/abx500/ab8500-bm.h
index e2a1e6d84f72..a03d4fdf9c2c 100644
--- a/include/linux/mfd/abx500/ab8500-bm.h
+++ b/include/linux/mfd/abx500/ab8500-bm.h
@@ -406,6 +406,8 @@ struct ab8500_charger_platform_data {
 	char **supplied_to;
 	size_t num_supplicants;
 	bool autopower_cfg;
+	bool ac_enabled;
+	bool usb_enabled;
 };
 
 struct ab8500_btemp_platform_data {
diff --git a/include/linux/mfd/abx500/ux500_chargalg.h b/include/linux/mfd/abx500/ux500_chargalg.h
index 9b07725750c9..5b77a610c6b6 100644
--- a/include/linux/mfd/abx500/ux500_chargalg.h
+++ b/include/linux/mfd/abx500/ux500_chargalg.h
@@ -27,12 +27,14 @@ struct ux500_charger_ops {
  * @ops			ux500 charger operations
  * @max_out_volt	maximum output charger voltage in mV
  * @max_out_curr	maximum output charger current in mA
+ * @enabled		indicates if this charger is used or not
  */
 struct ux500_charger {
 	struct power_supply psy;
 	struct ux500_charger_ops ops;
 	int max_out_volt;
 	int max_out_curr;
+	bool enabled;
 };
 
 #endif
diff --git a/include/linux/pm2301_charger.h b/include/linux/pm2301_charger.h
new file mode 100644
index 000000000000..16bb1d34b9d5
--- /dev/null
+++ b/include/linux/pm2301_charger.h
@@ -0,0 +1,60 @@
+/*
+ * PM2301 charger driver.
+ *
+ * Copyright (C) 2012 ST Ericsson Corporation
+ *
+ * Contact: Olivier LAUNAY (olivier.launay@stericsson.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ */
+
+#ifndef __LINUX_PM2301_H
+#define __LINUX_PM2301_H
+
+/**
+ * struct pm2xxx_bm_charger_parameters - Charger specific parameters
+ * @ac_volt_max:	maximum allowed AC charger voltage in mV
+ * @ac_curr_max:	maximum allowed AC charger current in mA
+ */
+struct pm2xxx_bm_charger_parameters {
+	int ac_volt_max;
+	int ac_curr_max;
+};
+
+/**
+ * struct pm2xxx_bm_data - pm2xxx battery management data
+ * @enable_overshoot    flag to enable VBAT overshoot control
+ * @chg_params	  charger parameters
+ */
+struct pm2xxx_bm_data {
+	bool enable_overshoot;
+	const struct pm2xxx_bm_charger_parameters *chg_params;
+};
+
+struct pm2xxx_charger_platform_data {
+	char **supplied_to;
+	size_t num_supplicants;
+	int i2c_bus;
+	const char *label;
+	int irq_number;
+	int irq_type;
+};
+
+struct pm2xxx_platform_data {
+	struct pm2xxx_charger_platform_data *wall_charger;
+	struct pm2xxx_bm_data *battery;
+};
+
+#endif /* __LINUX_PM2301_H */
-- 
cgit v1.2.3


From e6aac611b86956bdd981f30e8862ee2ac8aaf664 Mon Sep 17 00:00:00 2001
From: Michel JAOUEN <michel.jaouen@stericsson.com>
Date: Tue, 22 May 2012 15:46:46 +0200
Subject: ab8500-btemp: Adaptation to AB8505 and AB9540 platforms

Add AB9540 and AB8505 support to ABx500 BTEMP driver.

Signed-off-by: Rajkumar Kasirajan <rajkumar.kasirajan@stericsson.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Reviewed-by: Michel JAOUEN <michel.jaouen@stericsson.com>
Reviewed-by: Marcus COOPER <marcus.xm.cooper@stericsson.com>
Reviewed-by: Jonas ABERG <jonas.aberg@stericsson.com>
Tested-by: Michel JAOUEN <michel.jaouen@stericsson.com>
Tested-by: Jonas ABERG <jonas.aberg@stericsson.com>
---
 drivers/power/ab8500_btemp.c         | 67 ++++++++++++++++++++++++++++--------
 include/linux/mfd/abx500/ab8500-bm.h |  2 ++
 2 files changed, 55 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/power/ab8500_btemp.c b/drivers/power/ab8500_btemp.c
index 8ccf3590fd76..301a9ad3474d 100644
--- a/drivers/power/ab8500_btemp.c
+++ b/drivers/power/ab8500_btemp.c
@@ -39,6 +39,9 @@
 #define BTEMP_BATCTRL_CURR_SRC_7UA	7
 #define BTEMP_BATCTRL_CURR_SRC_20UA	20
 
+#define BTEMP_BATCTRL_CURR_SRC_16UA	16
+#define BTEMP_BATCTRL_CURR_SRC_18UA	18
+
 #define to_ab8500_btemp_device_info(x) container_of((x), \
 	struct ab8500_btemp, btemp_psy);
 
@@ -212,10 +215,18 @@ static int ab8500_btemp_curr_source_enable(struct ab8500_btemp *di,
 
 	/* Only do this for batteries with internal NTC */
 	if (di->bm->adc_therm == ABx500_ADC_THERM_BATCTRL && enable) {
-		if (di->curr_source == BTEMP_BATCTRL_CURR_SRC_7UA)
-			curr = BAT_CTRL_7U_ENA;
-		else
-			curr = BAT_CTRL_20U_ENA;
+
+		if (is_ab9540(di->parent) || is_ab8505(di->parent)) {
+			if (di->curr_source == BTEMP_BATCTRL_CURR_SRC_16UA)
+				curr = BAT_CTRL_16U_ENA;
+			else
+				curr = BAT_CTRL_18U_ENA;
+		} else {
+			if (di->curr_source == BTEMP_BATCTRL_CURR_SRC_7UA)
+				curr = BAT_CTRL_7U_ENA;
+			else
+				curr = BAT_CTRL_20U_ENA;
+		}
 
 		dev_dbg(di->dev, "Set BATCTRL %duA\n", di->curr_source);
 
@@ -246,11 +257,22 @@ static int ab8500_btemp_curr_source_enable(struct ab8500_btemp *di,
 	} else if (di->bm->adc_therm == ABx500_ADC_THERM_BATCTRL && !enable) {
 		dev_dbg(di->dev, "Disable BATCTRL curr source\n");
 
-		/* Write 0 to the curr bits */
-		ret = abx500_mask_and_set_register_interruptible(di->dev,
-			AB8500_CHARGER, AB8500_BAT_CTRL_CURRENT_SOURCE,
-			BAT_CTRL_7U_ENA | BAT_CTRL_20U_ENA,
-			~(BAT_CTRL_7U_ENA | BAT_CTRL_20U_ENA));
+		if (is_ab9540(di->parent) || is_ab8505(di->parent)) {
+			/* Write 0 to the curr bits */
+			ret = abx500_mask_and_set_register_interruptible(
+				di->dev,
+				AB8500_CHARGER, AB8500_BAT_CTRL_CURRENT_SOURCE,
+				BAT_CTRL_16U_ENA | BAT_CTRL_18U_ENA,
+				~(BAT_CTRL_16U_ENA | BAT_CTRL_18U_ENA));
+		} else {
+			/* Write 0 to the curr bits */
+			ret = abx500_mask_and_set_register_interruptible(
+				di->dev,
+				AB8500_CHARGER, AB8500_BAT_CTRL_CURRENT_SOURCE,
+				BAT_CTRL_7U_ENA | BAT_CTRL_20U_ENA,
+				~(BAT_CTRL_7U_ENA | BAT_CTRL_20U_ENA));
+		}
+
 		if (ret) {
 			dev_err(di->dev, "%s failed disabling current source\n",
 				__func__);
@@ -292,11 +314,20 @@ static int ab8500_btemp_curr_source_enable(struct ab8500_btemp *di,
 	 * if we got an error above
 	 */
 disable_curr_source:
-	/* Write 0 to the curr bits */
-	ret = abx500_mask_and_set_register_interruptible(di->dev,
+	if (is_ab9540(di->parent) || is_ab8505(di->parent)) {
+		/* Write 0 to the curr bits */
+		ret = abx500_mask_and_set_register_interruptible(di->dev,
+			AB8500_CHARGER, AB8500_BAT_CTRL_CURRENT_SOURCE,
+			BAT_CTRL_16U_ENA | BAT_CTRL_18U_ENA,
+			~(BAT_CTRL_16U_ENA | BAT_CTRL_18U_ENA));
+	} else {
+		/* Write 0 to the curr bits */
+		ret = abx500_mask_and_set_register_interruptible(di->dev,
 			AB8500_CHARGER, AB8500_BAT_CTRL_CURRENT_SOURCE,
 			BAT_CTRL_7U_ENA | BAT_CTRL_20U_ENA,
 			~(BAT_CTRL_7U_ENA | BAT_CTRL_20U_ENA));
+	}
+
 	if (ret) {
 		dev_err(di->dev, "%s failed disabling current source\n",
 			__func__);
@@ -510,8 +541,11 @@ static int ab8500_btemp_id(struct ab8500_btemp *di)
 {
 	int res;
 	u8 i;
+	if (is_ab9540(di->parent) || is_ab8505(di->parent))
+		di->curr_source = BTEMP_BATCTRL_CURR_SRC_16UA;
+	else
+		di->curr_source = BTEMP_BATCTRL_CURR_SRC_7UA;
 
-	di->curr_source = BTEMP_BATCTRL_CURR_SRC_7UA;
 	di->bm->batt_id = BATTERY_UNKNOWN;
 
 	res =  ab8500_btemp_get_batctrl_res(di);
@@ -549,8 +583,13 @@ static int ab8500_btemp_id(struct ab8500_btemp *di)
 	 */
 	if (di->bm->adc_therm == ABx500_ADC_THERM_BATCTRL &&
 			di->bm->batt_id == 1) {
-		dev_dbg(di->dev, "Set BATCTRL current source to 20uA\n");
-		di->curr_source = BTEMP_BATCTRL_CURR_SRC_20UA;
+		if (is_ab9540(di->parent) || is_ab8505(di->parent)) {
+			dev_dbg(di->dev, "Set BATCTRL current source to 16uA\n");
+			di->curr_source = BTEMP_BATCTRL_CURR_SRC_16UA;
+		} else {
+			dev_dbg(di->dev, "Set BATCTRL current source to 20uA\n");
+			di->curr_source = BTEMP_BATCTRL_CURR_SRC_20UA;
+		}
 	}
 
 	return di->bm->batt_id;
diff --git a/include/linux/mfd/abx500/ab8500-bm.h b/include/linux/mfd/abx500/ab8500-bm.h
index a03d4fdf9c2c..ec796c700e4c 100644
--- a/include/linux/mfd/abx500/ab8500-bm.h
+++ b/include/linux/mfd/abx500/ab8500-bm.h
@@ -226,6 +226,8 @@
 /* BatCtrl Current Source Constants */
 #define BAT_CTRL_7U_ENA			0x01
 #define BAT_CTRL_20U_ENA		0x02
+#define BAT_CTRL_18U_ENA		0x01
+#define BAT_CTRL_16U_ENA		0x02
 #define BAT_CTRL_CMP_ENA		0x04
 #define FORCE_BAT_CTRL_CMP_HIGH		0x08
 #define BAT_CTRL_PULL_UP_ENA		0x10
-- 
cgit v1.2.3


From e07a56453b14b929cf01bf032cc3e3220094609c Mon Sep 17 00:00:00 2001
From: Loic Pallardy <loic.pallardy@stericsson.com>
Date: Thu, 10 May 2012 15:33:56 +0200
Subject: pm2301: Update watchdog for pm2xxx support

AB and PMxxx doesn't have same watchdog refresh period. Add watchdog
to refresh period parameters in x500 charger structure, this should
kick watchdog every 30sec. The AC charging should also kick both
pm2xxx and the AB charger watchdog.

Signed-off-by: Rajkumar Kasirajan <rajkumar.kasirajan@stericsson.com>
Signed-off-by: Loic Pallardy <loic.pallardy@stericsson.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Reviewed-by: Michel JAOUEN <michel.jaouen@stericsson.com>
Reviewed-by: Marcus COOPER <marcus.xm.cooper@stericsson.com>
Reviewed-by: Jonas ABERG <jonas.aberg@stericsson.com>
Tested-by: Michel JAOUEN <michel.jaouen@stericsson.com>
Tested-by: Jonas ABERG <jonas.aberg@stericsson.com>
---
 drivers/power/ab8500_charger.c            |  6 ++++++
 drivers/power/abx500_chargalg.c           | 12 +++++++++++-
 drivers/power/pm2301_charger.c            |  2 ++
 drivers/power/pm2301_charger.h            |  2 +-
 include/linux/mfd/abx500/ux500_chargalg.h |  3 +++
 5 files changed, 23 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/power/ab8500_charger.c b/drivers/power/ab8500_charger.c
index 4c66172aaa64..0483e7cb2268 100644
--- a/drivers/power/ab8500_charger.c
+++ b/drivers/power/ab8500_charger.c
@@ -93,6 +93,8 @@
 
 #define CHARGER_STATUS_POLL 10 /* in ms */
 
+#define CHG_WD_INTERVAL			(60 * HZ)
+
 /* UsbLineStatus register - usb types */
 enum ab8500_charger_link_status {
 	USB_STAT_NOT_CONFIGURED,
@@ -2953,7 +2955,9 @@ static int ab8500_charger_probe(struct platform_device *pdev)
 		ARRAY_SIZE(ab8500_charger_voltage_map) - 1];
 	di->ac_chg.max_out_curr = ab8500_charger_current_map[
 		ARRAY_SIZE(ab8500_charger_current_map) - 1];
+	di->ac_chg.wdt_refresh = CHG_WD_INTERVAL;
 	di->ac_chg.enabled = di->pdata->ac_enabled;
+	di->ac_chg.external = false;
 
 	/* USB supply */
 	/* power_supply base class */
@@ -2972,7 +2976,9 @@ static int ab8500_charger_probe(struct platform_device *pdev)
 		ARRAY_SIZE(ab8500_charger_voltage_map) - 1];
 	di->usb_chg.max_out_curr = ab8500_charger_current_map[
 		ARRAY_SIZE(ab8500_charger_current_map) - 1];
+	di->usb_chg.wdt_refresh = CHG_WD_INTERVAL;
 	di->usb_chg.enabled = di->pdata->usb_enabled;
+	di->usb_chg.external = false;
 
 	/* Create a work queue for the charger */
 	di->charger_wq =
diff --git a/drivers/power/abx500_chargalg.c b/drivers/power/abx500_chargalg.c
index f59bc025ca58..7defb3e91d59 100644
--- a/drivers/power/abx500_chargalg.c
+++ b/drivers/power/abx500_chargalg.c
@@ -445,8 +445,18 @@ static int abx500_chargalg_kick_watchdog(struct abx500_chargalg *di)
 {
 	/* Check if charger exists and kick watchdog if charging */
 	if (di->ac_chg && di->ac_chg->ops.kick_wd &&
-			di->chg_info.online_chg & AC_CHG)
+	    di->chg_info.online_chg & AC_CHG) {
+		/*
+		 * If AB charger watchdog expired, pm2xxx charging
+		 * gets disabled. To be safe, kick both AB charger watchdog
+		 * and pm2xxx watchdog.
+		 */
+		if (di->ac_chg->external &&
+		    di->usb_chg && di->usb_chg->ops.kick_wd)
+			di->usb_chg->ops.kick_wd(di->usb_chg);
+
 		return di->ac_chg->ops.kick_wd(di->ac_chg);
+	}
 	else if (di->usb_chg && di->usb_chg->ops.kick_wd &&
 			di->chg_info.online_chg & USB_CHG)
 		return di->usb_chg->ops.kick_wd(di->usb_chg);
diff --git a/drivers/power/pm2301_charger.c b/drivers/power/pm2301_charger.c
index 085e468d8a2b..8c1dd5628d16 100644
--- a/drivers/power/pm2301_charger.c
+++ b/drivers/power/pm2301_charger.c
@@ -875,7 +875,9 @@ static int __devinit pm2xxx_wall_charger_probe(struct i2c_client *i2c_client,
 		ARRAY_SIZE(pm2xxx_charger_voltage_map) - 1];
 	pm2->ac_chg.max_out_curr = pm2xxx_charger_current_map[
 		ARRAY_SIZE(pm2xxx_charger_current_map) - 1];
+	pm2->ac_chg.wdt_refresh = WD_KICK_INTERVAL;
 	pm2->ac_chg.enabled = true;
+	pm2->ac_chg.external = true;
 
 	/* Create a work queue for the charger */
 	pm2->charger_wq =
diff --git a/drivers/power/pm2301_charger.h b/drivers/power/pm2301_charger.h
index da804716aa82..3531cc5a9056 100644
--- a/drivers/power/pm2301_charger.h
+++ b/drivers/power/pm2301_charger.h
@@ -32,7 +32,7 @@
 
 /* Watchdog timeout constant */
 #define WD_TIMER			0x30 /* 4min */
-#define WD_KICK_INTERVAL		(60 * HZ)
+#define WD_KICK_INTERVAL		(30 * HZ)
 
 #define PM2XXX_NUM_INT_REG		0x6
 
diff --git a/include/linux/mfd/abx500/ux500_chargalg.h b/include/linux/mfd/abx500/ux500_chargalg.h
index 5b77a610c6b6..d43ac0f35526 100644
--- a/include/linux/mfd/abx500/ux500_chargalg.h
+++ b/include/linux/mfd/abx500/ux500_chargalg.h
@@ -28,13 +28,16 @@ struct ux500_charger_ops {
  * @max_out_volt	maximum output charger voltage in mV
  * @max_out_curr	maximum output charger current in mA
  * @enabled		indicates if this charger is used or not
+ * @external		external charger unit (pm2xxx)
  */
 struct ux500_charger {
 	struct power_supply psy;
 	struct ux500_charger_ops ops;
 	int max_out_volt;
 	int max_out_curr;
+	int wdt_refresh;
 	bool enabled;
+	bool external;
 };
 
 #endif
-- 
cgit v1.2.3


From 3988043b0ee1104d4cca7c57bbc23b16ea798b6f Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Wed, 23 Jan 2013 14:38:15 +0000
Subject: pm2301: LPN mode control support

The AC charger plug-in detection while booting causes I2C read
failure if AC charger is not connected. Now the LPN pin is enabled
for every PM2301 register access, which solves the issue.

Signed-off-by: Rupesh Kumar <rupesh.kumar@stericsson.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Reviewed-by: Marcus COOPER <marcus.xm.cooper@stericsson.com>
Reviewed-by: Vijaya Kumar K-1 <vijay.kilari@stericsson.com>
Reviewed-by: Rabin VINCENT <rabin.vincent@stericsson.com>
Reviewed-by: Jonas ABERG <jonas.aberg@stericsson.com>
Tested-by: Jonas ABERG <jonas.aberg@stericsson.com>
---
 drivers/power/pm2301_charger.c | 71 ++++++++++++++++++++++++++++++++++++++++--
 drivers/power/pm2301_charger.h |  1 +
 include/linux/pm2301_charger.h |  1 +
 3 files changed, 71 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/power/pm2301_charger.c b/drivers/power/pm2301_charger.c
index 9b2e8943f944..ed48d75bb786 100644
--- a/drivers/power/pm2301_charger.c
+++ b/drivers/power/pm2301_charger.c
@@ -28,6 +28,7 @@
 #include <linux/mfd/abx500/ab8500-gpadc.h>
 #include <linux/mfd/abx500/ux500_chargalg.h>
 #include <linux/pm2301_charger.h>
+#include <linux/gpio.h>
 
 #include "pm2301_charger.h"
 
@@ -110,9 +111,35 @@ static const struct i2c_device_id pm2xxx_ident[] = {
 	{ }
 };
 
+static void set_lpn_pin(struct pm2xxx_charger *pm2)
+{
+	if (pm2->ac.charger_connected)
+		return;
+	gpio_set_value(pm2->lpn_pin, 1);
+
+	return;
+}
+
+static void clear_lpn_pin(struct pm2xxx_charger *pm2)
+{
+	if (pm2->ac.charger_connected)
+		return;
+	gpio_set_value(pm2->lpn_pin, 0);
+
+	return;
+}
+
 static int pm2xxx_reg_read(struct pm2xxx_charger *pm2, int reg, u8 *val)
 {
 	int ret;
+	/*
+	 * When AC adaptor is unplugged, the host
+	 * must put LPN high to be able to
+	 * communicate by I2C with PM2301
+	 * and receive I2C "acknowledge" from PM2301.
+	 */
+	mutex_lock(&pm2->lock);
+	set_lpn_pin(pm2);
 
 	ret = i2c_smbus_read_i2c_block_data(pm2->config.pm2xxx_i2c, reg,
 				1, val);
@@ -120,6 +147,8 @@ static int pm2xxx_reg_read(struct pm2xxx_charger *pm2, int reg, u8 *val)
 		dev_err(pm2->dev, "Error reading register at 0x%x\n", reg);
 	else
 		ret = 0;
+	clear_lpn_pin(pm2);
+	mutex_unlock(&pm2->lock);
 
 	return ret;
 }
@@ -127,6 +156,14 @@ static int pm2xxx_reg_read(struct pm2xxx_charger *pm2, int reg, u8 *val)
 static int pm2xxx_reg_write(struct pm2xxx_charger *pm2, int reg, u8 val)
 {
 	int ret;
+	/*
+	 * When AC adaptor is unplugged, the host
+	 * must put LPN high to be able to
+	 * communicate by I2C with PM2301
+	 * and receive I2C "acknowledge" from PM2301.
+	 */
+	mutex_lock(&pm2->lock);
+	set_lpn_pin(pm2);
 
 	ret = i2c_smbus_write_i2c_block_data(pm2->config.pm2xxx_i2c, reg,
 				1, &val);
@@ -134,6 +171,8 @@ static int pm2xxx_reg_write(struct pm2xxx_charger *pm2, int reg, u8 val)
 		dev_err(pm2->dev, "Error writing register at 0x%x\n", reg);
 	else
 		ret = 0;
+	clear_lpn_pin(pm2);
+	mutex_unlock(&pm2->lock);
 
 	return ret;
 }
@@ -850,6 +889,14 @@ static int __devinit pm2xxx_wall_charger_probe(struct i2c_client *i2c_client,
 
 	pm2->bat = pl_data->battery;
 
+	/*get lpn GPIO from platform data*/
+	if (!pm2->pdata->lpn_gpio) {
+		dev_err(pm2->dev, "no lpn gpio data supplied\n");
+		ret = -EINVAL;
+		goto free_device_info;
+	}
+	pm2->lpn_pin = pm2->pdata->lpn_gpio;
+
 	if (!i2c_check_functionality(i2c_client->adapter,
 			I2C_FUNC_SMBUS_BYTE_DATA |
 			I2C_FUNC_SMBUS_READ_WORD_DATA)) {
@@ -929,10 +976,25 @@ static int __devinit pm2xxx_wall_charger_probe(struct i2c_client *i2c_client,
 		goto unregister_pm2xxx_charger;
 	}
 
+	/*Initialize lock*/
+	mutex_init(&pm2->lock);
+
 	/*
-	 * I2C Read/Write will fail, if AC adaptor is not connected.
-	 * fix the charger detection mechanism.
+	 * Charger detection mechanism requires pulling up the LPN pin
+	 * while i2c communication if Charger is not connected
+	 * LPN pin of PM2301 is GPIO60 of AB9540
 	 */
+	ret = gpio_request(pm2->lpn_pin, "pm2301_lpm_gpio");
+	if (ret < 0) {
+		dev_err(pm2->dev, "pm2301_lpm_gpio request failed\n");
+		goto unregister_pm2xxx_charger;
+	}
+	ret = gpio_direction_output(pm2->lpn_pin, 0);
+	if (ret < 0) {
+		dev_err(pm2->dev, "pm2301_lpm_gpio direction failed\n");
+		goto free_gpio;
+	}
+
 	ret = pm2xxx_charger_detection(pm2, &val);
 
 	if ((ret == 0) && val) {
@@ -944,6 +1006,8 @@ static int __devinit pm2xxx_wall_charger_probe(struct i2c_client *i2c_client,
 
 	return 0;
 
+free_gpio:
+	gpio_free(pm2->lpn_pin);
 unregister_pm2xxx_charger:
 	/* unregister power supply */
 	power_supply_unregister(&pm2->ac_chg.psy);
@@ -977,6 +1041,9 @@ static int __devexit pm2xxx_wall_charger_remove(struct i2c_client *i2c_client)
 
 	power_supply_unregister(&pm2->ac_chg.psy);
 
+	/*Free GPIO60*/
+	gpio_free(pm2->lpn_pin);
+
 	kfree(pm2);
 
 	return 0;
diff --git a/drivers/power/pm2301_charger.h b/drivers/power/pm2301_charger.h
index 3531cc5a9056..e6319cdbc94f 100644
--- a/drivers/power/pm2301_charger.h
+++ b/drivers/power/pm2301_charger.h
@@ -493,6 +493,7 @@ struct pm2xxx_charger {
 	int old_vbat;
 	int failure_case;
 	int failure_input_ovv;
+	unsigned int lpn_pin;
 	struct pm2xxx_interrupts *pm2_int;
 	struct ab8500_gpadc *gpadc;
 	struct regulator *regu;
diff --git a/include/linux/pm2301_charger.h b/include/linux/pm2301_charger.h
index 16bb1d34b9d5..fc3f026922ae 100644
--- a/include/linux/pm2301_charger.h
+++ b/include/linux/pm2301_charger.h
@@ -49,6 +49,7 @@ struct pm2xxx_charger_platform_data {
 	int i2c_bus;
 	const char *label;
 	int irq_number;
+	unsigned int lpn_gpio;
 	int irq_type;
 };
 
-- 
cgit v1.2.3


From 0ed5107fa86013c91b1752230d44b79dffee0cda Mon Sep 17 00:00:00 2001
From: Jonas Aaberg <jonas.aberg@stericsson.com>
Date: Fri, 11 May 2012 12:42:25 +0200
Subject: ab8500-charger: Do not touch VBUSOVV bits

Do not touch the VBUSOVV in USBCHTRL2 when running on AB8505.

Signed-off-by: Jonas Aaberg <jonas.aberg@stericsson.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Reviewed-by: Marcus COOPER <marcus.xm.cooper@stericsson.com>
Tested-by: Mian Yousaf KAUKAB <mian.yousaf.kaukab@stericsson.com>
---
 drivers/power/ab8500_charger.c    | 22 ++++++++++++++++------
 include/linux/mfd/abx500/ab8500.h | 19 +++++++++++++++++++
 2 files changed, 35 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/power/ab8500_charger.c b/drivers/power/ab8500_charger.c
index a632b94e38b9..871bf5a7c42a 100644
--- a/drivers/power/ab8500_charger.c
+++ b/drivers/power/ab8500_charger.c
@@ -2671,13 +2671,23 @@ static int ab8500_charger_init_hw_registers(struct ab8500_charger *di)
 		}
 	}
 
-	/* VBUS OVV set to 6.3V and enable automatic current limitiation */
-	ret = abx500_set_register_interruptible(di->dev,
-		AB8500_CHARGER,
-		AB8500_USBCH_CTRL2_REG,
-		VBUS_OVV_SELECT_6P3V | VBUS_AUTO_IN_CURR_LIM_ENA);
+	if (is_ab9540_2p0(di->parent) || is_ab8505_2p0(di->parent))
+		ret = abx500_mask_and_set_register_interruptible(di->dev,
+			AB8500_CHARGER,
+			AB8500_USBCH_CTRL2_REG,
+			VBUS_AUTO_IN_CURR_LIM_ENA,
+			VBUS_AUTO_IN_CURR_LIM_ENA);
+	else
+		/*
+		 * VBUS OVV set to 6.3V and enable automatic current limitation
+		 */
+		ret = abx500_set_register_interruptible(di->dev,
+			AB8500_CHARGER,
+			AB8500_USBCH_CTRL2_REG,
+			VBUS_OVV_SELECT_6P3V | VBUS_AUTO_IN_CURR_LIM_ENA);
 	if (ret) {
-		dev_err(di->dev, "failed to set VBUS OVV\n");
+		dev_err(di->dev,
+			"failed to set automatic current limitation\n");
 		goto out;
 	}
 
diff --git a/include/linux/mfd/abx500/ab8500.h b/include/linux/mfd/abx500/ab8500.h
index 9dd9b99099df..b9a6a847ff61 100644
--- a/include/linux/mfd/abx500/ab8500.h
+++ b/include/linux/mfd/abx500/ab8500.h
@@ -346,4 +346,23 @@ static inline int is_ab8500_2p0(struct ab8500 *ab)
 	return (is_ab8500(ab) && (ab->chip_id == AB8500_CUT2P0));
 }
 
+static inline int is_ab8505_1p0_or_earlier(struct ab8500 *ab)
+{
+	return (is_ab8505(ab) && (ab->chip_id <= AB8500_CUT1P0));
+}
+
+static inline int is_ab8505_2p0(struct ab8500 *ab)
+{
+	return (is_ab8505(ab) && (ab->chip_id == AB8500_CUT2P0));
+}
+
+static inline int is_ab9540_1p0_or_earlier(struct ab8500 *ab)
+{
+	return (is_ab9540(ab) && (ab->chip_id <= AB8500_CUT1P0));
+}
+
+static inline int is_ab9540_2p0(struct ab8500 *ab)
+{
+	return (is_ab9540(ab) && (ab->chip_id == AB8500_CUT2P0));
+}
 #endif /* MFD_AB8500_H */
-- 
cgit v1.2.3


From 97034a1e042d4316a83a3f68d61edf6c42e3f265 Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Thu, 17 Jan 2013 16:08:42 +0000
Subject: ab8500-bm: Remove individual [charger|btemp|fg|chargalg] pdata
 structures

None of the aforementioned components have their own dedicated
platform data structures anymore. Instead they have all been
merged into one big Battery Management container. Let's remove
them and place all the nice newly added attributes into the core
container.

Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/power/ab8500_charger.c       |  4 ++--
 include/linux/mfd/abx500.h           |  3 +++
 include/linux/mfd/abx500/ab8500-bm.h | 22 ----------------------
 3 files changed, 5 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/power/ab8500_charger.c b/drivers/power/ab8500_charger.c
index 871bf5a7c42a..432f6bc48764 100644
--- a/drivers/power/ab8500_charger.c
+++ b/drivers/power/ab8500_charger.c
@@ -2998,7 +2998,7 @@ static int ab8500_charger_probe(struct platform_device *pdev)
 	di->ac_chg.max_out_curr = ab8500_charger_current_map[
 		ARRAY_SIZE(ab8500_charger_current_map) - 1];
 	di->ac_chg.wdt_refresh = CHG_WD_INTERVAL;
-	di->ac_chg.enabled = di->pdata->ac_enabled;
+	di->ac_chg.enabled = di->bm->ac_enabled;
 	di->ac_chg.external = false;
 
 	/* USB supply */
@@ -3019,7 +3019,7 @@ static int ab8500_charger_probe(struct platform_device *pdev)
 	di->usb_chg.max_out_curr = ab8500_charger_current_map[
 		ARRAY_SIZE(ab8500_charger_current_map) - 1];
 	di->usb_chg.wdt_refresh = CHG_WD_INTERVAL;
-	di->usb_chg.enabled = di->pdata->usb_enabled;
+	di->usb_chg.enabled = di->bm->usb_enabled;
 	di->usb_chg.external = false;
 
 	/* Create a work queue for the charger */
diff --git a/include/linux/mfd/abx500.h b/include/linux/mfd/abx500.h
index 0e6e90badfca..1beaa056f195 100644
--- a/include/linux/mfd/abx500.h
+++ b/include/linux/mfd/abx500.h
@@ -254,6 +254,9 @@ struct abx500_bm_data {
 	int usb_safety_tmr_h;
 	int bkup_bat_v;
 	int bkup_bat_i;
+	bool autopower_cfg;
+	bool ac_enabled;
+	bool usb_enabled;
 	bool no_maintenance;
 	bool capacity_scaling;
 	bool chg_unknown_bat;
diff --git a/include/linux/mfd/abx500/ab8500-bm.h b/include/linux/mfd/abx500/ab8500-bm.h
index ec796c700e4c..345bc159f978 100644
--- a/include/linux/mfd/abx500/ab8500-bm.h
+++ b/include/linux/mfd/abx500/ab8500-bm.h
@@ -404,28 +404,6 @@ struct ab8500_bm_data {
 	const struct ab8500_fg_parameters *fg_params;
 };
 
-struct ab8500_charger_platform_data {
-	char **supplied_to;
-	size_t num_supplicants;
-	bool autopower_cfg;
-	bool ac_enabled;
-	bool usb_enabled;
-};
-
-struct ab8500_btemp_platform_data {
-	char **supplied_to;
-	size_t num_supplicants;
-};
-
-struct ab8500_fg_platform_data {
-	char **supplied_to;
-	size_t num_supplicants;
-};
-
-struct ab8500_chargalg_platform_data {
-	char **supplied_to;
-	size_t num_supplicants;
-};
 struct ab8500_btemp;
 struct ab8500_gpadc;
 struct ab8500_fg;
-- 
cgit v1.2.3


From ab78029ecc347debbd737f06688d788bd9d60c1d Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Tue, 22 Jan 2013 10:56:14 -0700
Subject: drivers/pinctrl: grab default handles from device core

This makes the device core auto-grab the pinctrl handle and set
the "default" (PINCTRL_STATE_DEFAULT) state for every device
that is present in the device model right before probe. This will
account for the lion's share of embedded silicon devcies.

A modification of the semantics for pinctrl_get() is also done:
previously if the pinctrl handle for a certain device was already
taken, the pinctrl core would return an error. Now, since the
core may have already default-grabbed the handle and set its
state to "default", if the handle was already taken, this will
be disregarded and the located, previously instanitated handle
will be returned to the caller.

This way all code in drivers explicitly requesting their pinctrl
handlers will still be functional, and drivers that want to
explicitly retrieve and switch their handles can still do that.
But if the desired functionality is just boilerplate of this
type in the probe() function:

struct pinctrl  *p;

p = devm_pinctrl_get_select_default(&dev);
if (IS_ERR(p)) {
   if (PTR_ERR(p) == -EPROBE_DEFER)
        return -EPROBE_DEFER;
        dev_warn(&dev, "no pinctrl handle\n");
}

The discussion began with the addition of such boilerplate
to the omap4 keypad driver:
http://marc.info/?l=linux-input&m=135091157719300&w=2

A previous approach using notifiers was discussed:
http://marc.info/?l=linux-kernel&m=135263661110528&w=2
This failed because it could not handle deferred probes.

This patch alone does not solve the entire dilemma faced:
whether code should be distributed into the drivers or
if it should be centralized to e.g. a PM domain. But it
solves the immediate issue of the addition of boilerplate
to a lot of drivers that just want to grab the default
state. As mentioned, they can later explicitly retrieve
the handle and set different states, and this could as
well be done by e.g. PM domains as it is only related
to a certain struct device * pointer.

ChangeLog v4->v5 (Stephen):
- Simplified the devicecore grab code.
- Deleted a piece of documentation recommending that pins
  be mapped to a device rather than hogged.
ChangeLog v3->v4 (Linus):
- Drop overzealous NULL checks.
- Move kref initialization to pinctrl_create().
- Seeking Tested-by from Stephen Warren so we do not disturb
  the Tegra platform.
- Seeking ACK on this from Greg (and others who like it) so I
  can merge it through the pinctrl subsystem.
ChangeLog v2->v3 (Linus):
- Abstain from using IS_ERR_OR_NULL() in the driver core,
  Russell recently sent a patch to remove it. Handle the
  NULL case explicitly even though it's a bogus case.
- Make sure we handle probe deferral correctly in the device
  core file. devm_kfree() the container on error so we don't
  waste memory for devices without pinctrl handles.
- Introduce reference counting into the pinctrl core using
  <linux/kref.h> so that we don't release pinctrl handles
  that have been obtained for two or more places.
ChangeLog v1->v2 (Linus):
- Only store a pointer in the device struct, and only allocate
  this if it's really used by the device.

Cc: Felipe Balbi <balbi@ti.com>
Cc: Benoit Cousson <b-cousson@ti.com>
Cc: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Cc: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Cc: Mitch Bradley <wmb@firmworks.com>
Cc: Ulf Hansson <ulf.hansson@linaro.org>
Cc: Rafael J. Wysocki <rjw@sisk.pl>
Cc: Jean-Christophe PLAGNIOL-VILLARD <plagnioj@jcrosoft.com>
Cc: Rickard Andersson <rickard.andersson@stericsson.com>
Cc: Russell King <linux@arm.linux.org.uk>
Reviewed-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
[swarren: fixed and simplified error-handling in pinctrl_bind_pins(), to
correctly handle deferred probe. Removed admonition from docs not to use
pinctrl hogs for devices]
Signed-off-by: Stephen Warren <swarren@nvidia.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 Documentation/pinctrl.txt       | 18 +++++++++--
 drivers/base/Makefile           |  1 +
 drivers/base/dd.c               |  7 +++++
 drivers/base/pinctrl.c          | 69 +++++++++++++++++++++++++++++++++++++++++
 drivers/pinctrl/core.c          | 30 +++++++++++++++---
 drivers/pinctrl/core.h          |  3 ++
 include/linux/device.h          |  7 +++++
 include/linux/pinctrl/devinfo.h | 45 +++++++++++++++++++++++++++
 8 files changed, 173 insertions(+), 7 deletions(-)
 create mode 100644 drivers/base/pinctrl.c
 create mode 100644 include/linux/pinctrl/devinfo.h

(limited to 'include/linux')

diff --git a/Documentation/pinctrl.txt b/Documentation/pinctrl.txt
index da40efbef6ec..a2b57e0a1db0 100644
--- a/Documentation/pinctrl.txt
+++ b/Documentation/pinctrl.txt
@@ -972,6 +972,18 @@ pinmux core.
 Pin control requests from drivers
 =================================
 
+When a device driver is about to probe the device core will automatically
+attempt to issue pinctrl_get_select_default() on these devices.
+This way driver writers do not need to add any of the boilerplate code
+of the type found below. However when doing fine-grained state selection
+and not using the "default" state, you may have to do some device driver
+handling of the pinctrl handles and states.
+
+So if you just want to put the pins for a certain device into the default
+state and be done with it, there is nothing you need to do besides
+providing the proper mapping table. The device core will take care of
+the rest.
+
 Generally it is discouraged to let individual drivers get and enable pin
 control. So if possible, handle the pin control in platform code or some other
 place where you have access to all the affected struct device * pointers. In
@@ -1097,9 +1109,9 @@ situations that can be electrically unpleasant, you will certainly want to
 mux in and bias pins in a certain way before the GPIO subsystems starts to
 deal with them.
 
-The above can be hidden: using pinctrl hogs, the pin control driver may be
-setting up the config and muxing for the pins when it is probing,
-nevertheless orthogonal to the GPIO subsystem.
+The above can be hidden: using the device core, the pinctrl core may be
+setting up the config and muxing for the pins right before the device is
+probing, nevertheless orthogonal to the GPIO subsystem.
 
 But there are also situations where it makes sense for the GPIO subsystem
 to communicate directly with with the pinctrl subsystem, using the latter
diff --git a/drivers/base/Makefile b/drivers/base/Makefile
index 5aa2d703d19f..4e22ce3ed73d 100644
--- a/drivers/base/Makefile
+++ b/drivers/base/Makefile
@@ -21,6 +21,7 @@ endif
 obj-$(CONFIG_SYS_HYPERVISOR) += hypervisor.o
 obj-$(CONFIG_REGMAP)	+= regmap/
 obj-$(CONFIG_SOC_BUS) += soc.o
+obj-$(CONFIG_PINCTRL) += pinctrl.o
 
 ccflags-$(CONFIG_DEBUG_DRIVER) := -DDEBUG
 
diff --git a/drivers/base/dd.c b/drivers/base/dd.c
index e3bbed8a617c..656310156dde 100644
--- a/drivers/base/dd.c
+++ b/drivers/base/dd.c
@@ -24,6 +24,7 @@
 #include <linux/wait.h>
 #include <linux/async.h>
 #include <linux/pm_runtime.h>
+#include <linux/pinctrl/devinfo.h>
 
 #include "base.h"
 #include "power/power.h"
@@ -269,6 +270,12 @@ static int really_probe(struct device *dev, struct device_driver *drv)
 	WARN_ON(!list_empty(&dev->devres_head));
 
 	dev->driver = drv;
+
+	/* If using pinctrl, bind pins now before probing */
+	ret = pinctrl_bind_pins(dev);
+	if (ret)
+		goto probe_failed;
+
 	if (driver_sysfs_add(dev)) {
 		printk(KERN_ERR "%s: driver_sysfs_add(%s) failed\n",
 			__func__, dev_name(dev));
diff --git a/drivers/base/pinctrl.c b/drivers/base/pinctrl.c
new file mode 100644
index 000000000000..67a274e86727
--- /dev/null
+++ b/drivers/base/pinctrl.c
@@ -0,0 +1,69 @@
+/*
+ * Driver core interface to the pinctrl subsystem.
+ *
+ * Copyright (C) 2012 ST-Ericsson SA
+ * Written on behalf of Linaro for ST-Ericsson
+ * Based on bits of regulator core, gpio core and clk core
+ *
+ * Author: Linus Walleij <linus.walleij@linaro.org>
+ *
+ * License terms: GNU General Public License (GPL) version 2
+ */
+
+#include <linux/device.h>
+#include <linux/pinctrl/devinfo.h>
+#include <linux/pinctrl/consumer.h>
+#include <linux/slab.h>
+
+/**
+ * pinctrl_bind_pins() - called by the device core before probe
+ * @dev: the device that is just about to probe
+ */
+int pinctrl_bind_pins(struct device *dev)
+{
+	int ret;
+
+	dev->pins = devm_kzalloc(dev, sizeof(*(dev->pins)), GFP_KERNEL);
+	if (!dev->pins)
+		return -ENOMEM;
+
+	dev->pins->p = devm_pinctrl_get(dev);
+	if (IS_ERR(dev->pins->p)) {
+		dev_dbg(dev, "no pinctrl handle\n");
+		ret = PTR_ERR(dev->pins->p);
+		goto cleanup_alloc;
+	}
+
+	dev->pins->default_state = pinctrl_lookup_state(dev->pins->p,
+					PINCTRL_STATE_DEFAULT);
+	if (IS_ERR(dev->pins->default_state)) {
+		dev_dbg(dev, "no default pinctrl state\n");
+		ret = 0;
+		goto cleanup_get;
+	}
+
+	ret = pinctrl_select_state(dev->pins->p, dev->pins->default_state);
+	if (ret) {
+		dev_dbg(dev, "failed to activate default pinctrl state\n");
+		goto cleanup_get;
+	}
+
+	return 0;
+
+	/*
+	 * If no pinctrl handle or default state was found for this device,
+	 * let's explicitly free the pin container in the device, there is
+	 * no point in keeping it around.
+	 */
+cleanup_get:
+	devm_pinctrl_put(dev->pins->p);
+cleanup_alloc:
+	devm_kfree(dev, dev->pins);
+	dev->pins = NULL;
+
+	/* Only return deferrals */
+	if (ret != -EPROBE_DEFER)
+		ret = 0;
+
+	return ret;
+}
diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c
index e6373f30aeeb..5a2fe9aae20f 100644
--- a/drivers/pinctrl/core.c
+++ b/drivers/pinctrl/core.c
@@ -14,6 +14,7 @@
 #define pr_fmt(fmt) "pinctrl core: " fmt
 
 #include <linux/kernel.h>
+#include <linux/kref.h>
 #include <linux/export.h>
 #include <linux/init.h>
 #include <linux/device.h>
@@ -727,6 +728,8 @@ static struct pinctrl *create_pinctrl(struct device *dev)
 		return ERR_PTR(ret);
 	}
 
+	kref_init(&p->users);
+
 	/* Add the pinctrl handle to the global list */
 	list_add_tail(&p->node, &pinctrl_list);
 
@@ -740,9 +743,17 @@ static struct pinctrl *pinctrl_get_locked(struct device *dev)
 	if (WARN_ON(!dev))
 		return ERR_PTR(-EINVAL);
 
+	/*
+	 * See if somebody else (such as the device core) has already
+	 * obtained a handle to the pinctrl for this device. In that case,
+	 * return another pointer to it.
+	 */
 	p = find_pinctrl(dev);
-	if (p != NULL)
-		return ERR_PTR(-EBUSY);
+	if (p != NULL) {
+		dev_dbg(dev, "obtain a copy of previously claimed pinctrl\n");
+		kref_get(&p->users);
+		return p;
+	}
 
 	return create_pinctrl(dev);
 }
@@ -798,13 +809,24 @@ static void pinctrl_put_locked(struct pinctrl *p, bool inlist)
 }
 
 /**
- * pinctrl_put() - release a previously claimed pinctrl handle
+ * pinctrl_release() - release the pinctrl handle
+ * @kref: the kref in the pinctrl being released
+ */
+void pinctrl_release(struct kref *kref)
+{
+	struct pinctrl *p = container_of(kref, struct pinctrl, users);
+
+	pinctrl_put_locked(p, true);
+}
+
+/**
+ * pinctrl_put() - decrease use count on a previously claimed pinctrl handle
  * @p: the pinctrl handle to release
  */
 void pinctrl_put(struct pinctrl *p)
 {
 	mutex_lock(&pinctrl_mutex);
-	pinctrl_put_locked(p, true);
+	kref_put(&p->users, pinctrl_release);
 	mutex_unlock(&pinctrl_mutex);
 }
 EXPORT_SYMBOL_GPL(pinctrl_put);
diff --git a/drivers/pinctrl/core.h b/drivers/pinctrl/core.h
index 232a9f6db4aa..fdd350d639f6 100644
--- a/drivers/pinctrl/core.h
+++ b/drivers/pinctrl/core.h
@@ -9,6 +9,7 @@
  * License terms: GNU General Public License (GPL) version 2
  */
 
+#include <linux/kref.h>
 #include <linux/mutex.h>
 #include <linux/radix-tree.h>
 #include <linux/pinctrl/pinconf.h>
@@ -58,6 +59,7 @@ struct pinctrl_dev {
  * @state: the current state
  * @dt_maps: the mapping table chunks dynamically parsed from device tree for
  *	this device, if any
+ * @users: reference count
  */
 struct pinctrl {
 	struct list_head node;
@@ -65,6 +67,7 @@ struct pinctrl {
 	struct list_head states;
 	struct pinctrl_state *state;
 	struct list_head dt_maps;
+	struct kref users;
 };
 
 /**
diff --git a/include/linux/device.h b/include/linux/device.h
index 43dcda937ddf..001f6637aa47 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -21,6 +21,7 @@
 #include <linux/compiler.h>
 #include <linux/types.h>
 #include <linux/mutex.h>
+#include <linux/pinctrl/devinfo.h>
 #include <linux/pm.h>
 #include <linux/atomic.h>
 #include <linux/ratelimit.h>
@@ -620,6 +621,8 @@ struct acpi_dev_node {
  * @pm_domain:	Provide callbacks that are executed during system suspend,
  * 		hibernation, system resume and during runtime PM transitions
  * 		along with subsystem-level and driver-level callbacks.
+ * @pins:	For device pin management.
+ *		See Documentation/pinctrl.txt for details.
  * @numa_node:	NUMA node this device is close to.
  * @dma_mask:	Dma mask (if dma'ble device).
  * @coherent_dma_mask: Like dma_mask, but for alloc_coherent mapping as not all
@@ -672,6 +675,10 @@ struct device {
 	struct dev_pm_info	power;
 	struct dev_pm_domain	*pm_domain;
 
+#ifdef CONFIG_PINCTRL
+	struct dev_pin_info	*pins;
+#endif
+
 #ifdef CONFIG_NUMA
 	int		numa_node;	/* NUMA node this device is close to */
 #endif
diff --git a/include/linux/pinctrl/devinfo.h b/include/linux/pinctrl/devinfo.h
new file mode 100644
index 000000000000..6e5f8a985ea7
--- /dev/null
+++ b/include/linux/pinctrl/devinfo.h
@@ -0,0 +1,45 @@
+/*
+ * Per-device information from the pin control system.
+ * This is the stuff that get included into the device
+ * core.
+ *
+ * Copyright (C) 2012 ST-Ericsson SA
+ * Written on behalf of Linaro for ST-Ericsson
+ * This interface is used in the core to keep track of pins.
+ *
+ * Author: Linus Walleij <linus.walleij@linaro.org>
+ *
+ * License terms: GNU General Public License (GPL) version 2
+ */
+
+#ifndef PINCTRL_DEVINFO_H
+#define PINCTRL_DEVINFO_H
+
+#ifdef CONFIG_PINCTRL
+
+/* The device core acts as a consumer toward pinctrl */
+#include <linux/pinctrl/consumer.h>
+
+/**
+ * struct dev_pin_info - pin state container for devices
+ * @p: pinctrl handle for the containing device
+ * @default_state: the default state for the handle, if found
+ */
+struct dev_pin_info {
+	struct pinctrl *p;
+	struct pinctrl_state *default_state;
+};
+
+extern int pinctrl_bind_pins(struct device *dev);
+
+#else
+
+/* Stubs if we're not using pinctrl */
+
+static inline int pinctrl_bind_pins(struct device *dev)
+{
+	return 0;
+}
+
+#endif /* CONFIG_PINCTRL */
+#endif /* PINCTRL_DEVINFO_H */
-- 
cgit v1.2.3


From 8723d5037cafea09c7242303c6c8e5d7058cec61 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 23 Jan 2013 09:32:30 -0800
Subject: async: bring sanity to the use of words domain and running

In the beginning, running lists were literal struct list_heads.  Later
on, struct async_domain was added.  For some reason, while the
conversion substituted list_heads with async_domains, the variable
names weren't fully converted.  In more places, "running" was used for
struct async_domain while other places adopted new "domain" name.

The situation is made much worse by having async_domain's running list
named "domain" and async_entry's field pointing to async_domain named
"running".

So, we end up with mix of "running" and "domain" for variable names
for async_domain, with the field names of async_domain and async_entry
swapped between "running" and "domain".

It feels almost intentionally made to be as confusing as possible.
Bring some sanity by

* Renaming all async_domain variables "domain".

* s/async_running/async_dfl_domain/

* s/async_domain->domain/async_domain->running/

* s/async_entry->running/async_entry->domain/

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Dan Williams <djbw@fb.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/async.h |  6 ++---
 kernel/async.c        | 68 +++++++++++++++++++++++++--------------------------
 2 files changed, 37 insertions(+), 37 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/async.h b/include/linux/async.h
index 345169cfa304..34ff5c610e0e 100644
--- a/include/linux/async.h
+++ b/include/linux/async.h
@@ -19,7 +19,7 @@ typedef u64 async_cookie_t;
 typedef void (async_func_ptr) (void *data, async_cookie_t cookie);
 struct async_domain {
 	struct list_head node;
-	struct list_head domain;
+	struct list_head running;
 	int count;
 	unsigned registered:1;
 };
@@ -29,7 +29,7 @@ struct async_domain {
  */
 #define ASYNC_DOMAIN(_name) \
 	struct async_domain _name = { .node = LIST_HEAD_INIT(_name.node), \
-				      .domain = LIST_HEAD_INIT(_name.domain), \
+				      .running = LIST_HEAD_INIT(_name.running), \
 				      .count = 0, \
 				      .registered = 1 }
 
@@ -39,7 +39,7 @@ struct async_domain {
  */
 #define ASYNC_DOMAIN_EXCLUSIVE(_name) \
 	struct async_domain _name = { .node = LIST_HEAD_INIT(_name.node), \
-				      .domain = LIST_HEAD_INIT(_name.domain), \
+				      .running = LIST_HEAD_INIT(_name.running), \
 				      .count = 0, \
 				      .registered = 0 }
 
diff --git a/kernel/async.c b/kernel/async.c
index 6c68fc3fae7b..29d51d483bee 100644
--- a/kernel/async.c
+++ b/kernel/async.c
@@ -64,7 +64,7 @@ static async_cookie_t next_cookie = 1;
 #define MAX_WORK	32768
 
 static LIST_HEAD(async_pending);
-static ASYNC_DOMAIN(async_running);
+static ASYNC_DOMAIN(async_dfl_domain);
 static LIST_HEAD(async_domains);
 static DEFINE_SPINLOCK(async_lock);
 static DEFINE_MUTEX(async_register_mutex);
@@ -75,7 +75,7 @@ struct async_entry {
 	async_cookie_t		cookie;
 	async_func_ptr		*func;
 	void			*data;
-	struct async_domain	*running;
+	struct async_domain	*domain;
 };
 
 static DECLARE_WAIT_QUEUE_HEAD(async_done);
@@ -86,7 +86,7 @@ static atomic_t entry_count;
 /*
  * MUST be called with the lock held!
  */
-static async_cookie_t  __lowest_in_progress(struct async_domain *running)
+static async_cookie_t __lowest_in_progress(struct async_domain *domain)
 {
 	async_cookie_t first_running = next_cookie;	/* infinity value */
 	async_cookie_t first_pending = next_cookie;	/* ditto */
@@ -96,13 +96,13 @@ static async_cookie_t  __lowest_in_progress(struct async_domain *running)
 	 * Both running and pending lists are sorted but not disjoint.
 	 * Take the first cookies from both and return the min.
 	 */
-	if (!list_empty(&running->domain)) {
-		entry = list_first_entry(&running->domain, typeof(*entry), list);
+	if (!list_empty(&domain->running)) {
+		entry = list_first_entry(&domain->running, typeof(*entry), list);
 		first_running = entry->cookie;
 	}
 
 	list_for_each_entry(entry, &async_pending, list) {
-		if (entry->running == running) {
+		if (entry->domain == domain) {
 			first_pending = entry->cookie;
 			break;
 		}
@@ -111,13 +111,13 @@ static async_cookie_t  __lowest_in_progress(struct async_domain *running)
 	return min(first_running, first_pending);
 }
 
-static async_cookie_t  lowest_in_progress(struct async_domain *running)
+static async_cookie_t lowest_in_progress(struct async_domain *domain)
 {
 	unsigned long flags;
 	async_cookie_t ret;
 
 	spin_lock_irqsave(&async_lock, flags);
-	ret = __lowest_in_progress(running);
+	ret = __lowest_in_progress(domain);
 	spin_unlock_irqrestore(&async_lock, flags);
 	return ret;
 }
@@ -132,11 +132,11 @@ static void async_run_entry_fn(struct work_struct *work)
 	struct async_entry *pos;
 	unsigned long flags;
 	ktime_t uninitialized_var(calltime), delta, rettime;
-	struct async_domain *running = entry->running;
+	struct async_domain *domain = entry->domain;
 
 	/* 1) move self to the running queue, make sure it stays sorted */
 	spin_lock_irqsave(&async_lock, flags);
-	list_for_each_entry_reverse(pos, &running->domain, list)
+	list_for_each_entry_reverse(pos, &domain->running, list)
 		if (entry->cookie < pos->cookie)
 			break;
 	list_move_tail(&entry->list, &pos->list);
@@ -162,8 +162,8 @@ static void async_run_entry_fn(struct work_struct *work)
 	/* 3) remove self from the running queue */
 	spin_lock_irqsave(&async_lock, flags);
 	list_del(&entry->list);
-	if (running->registered && --running->count == 0)
-		list_del_init(&running->node);
+	if (domain->registered && --domain->count == 0)
+		list_del_init(&domain->node);
 
 	/* 4) free the entry */
 	kfree(entry);
@@ -175,7 +175,7 @@ static void async_run_entry_fn(struct work_struct *work)
 	wake_up(&async_done);
 }
 
-static async_cookie_t __async_schedule(async_func_ptr *ptr, void *data, struct async_domain *running)
+static async_cookie_t __async_schedule(async_func_ptr *ptr, void *data, struct async_domain *domain)
 {
 	struct async_entry *entry;
 	unsigned long flags;
@@ -201,13 +201,13 @@ static async_cookie_t __async_schedule(async_func_ptr *ptr, void *data, struct a
 	INIT_WORK(&entry->work, async_run_entry_fn);
 	entry->func = ptr;
 	entry->data = data;
-	entry->running = running;
+	entry->domain = domain;
 
 	spin_lock_irqsave(&async_lock, flags);
 	newcookie = entry->cookie = next_cookie++;
 	list_add_tail(&entry->list, &async_pending);
-	if (running->registered && running->count++ == 0)
-		list_add_tail(&running->node, &async_domains);
+	if (domain->registered && domain->count++ == 0)
+		list_add_tail(&domain->node, &async_domains);
 	atomic_inc(&entry_count);
 	spin_unlock_irqrestore(&async_lock, flags);
 
@@ -230,7 +230,7 @@ static async_cookie_t __async_schedule(async_func_ptr *ptr, void *data, struct a
  */
 async_cookie_t async_schedule(async_func_ptr *ptr, void *data)
 {
-	return __async_schedule(ptr, data, &async_running);
+	return __async_schedule(ptr, data, &async_dfl_domain);
 }
 EXPORT_SYMBOL_GPL(async_schedule);
 
@@ -238,18 +238,18 @@ EXPORT_SYMBOL_GPL(async_schedule);
  * async_schedule_domain - schedule a function for asynchronous execution within a certain domain
  * @ptr: function to execute asynchronously
  * @data: data pointer to pass to the function
- * @running: running list for the domain
+ * @domain: the domain
  *
  * Returns an async_cookie_t that may be used for checkpointing later.
- * @running may be used in the async_synchronize_*_domain() functions
- * to wait within a certain synchronization domain rather than globally.
- * A synchronization domain is specified via the running queue @running to use.
- * Note: This function may be called from atomic or non-atomic contexts.
+ * @domain may be used in the async_synchronize_*_domain() functions to
+ * wait within a certain synchronization domain rather than globally.  A
+ * synchronization domain is specified via @domain.  Note: This function
+ * may be called from atomic or non-atomic contexts.
  */
 async_cookie_t async_schedule_domain(async_func_ptr *ptr, void *data,
-				     struct async_domain *running)
+				     struct async_domain *domain)
 {
-	return __async_schedule(ptr, data, running);
+	return __async_schedule(ptr, data, domain);
 }
 EXPORT_SYMBOL_GPL(async_schedule_domain);
 
@@ -289,7 +289,7 @@ void async_unregister_domain(struct async_domain *domain)
 	mutex_lock(&async_register_mutex);
 	spin_lock_irq(&async_lock);
 	WARN_ON(!domain->registered || !list_empty(&domain->node) ||
-		!list_empty(&domain->domain));
+		!list_empty(&domain->running));
 	domain->registered = 0;
 	spin_unlock_irq(&async_lock);
 	mutex_unlock(&async_register_mutex);
@@ -298,10 +298,10 @@ EXPORT_SYMBOL_GPL(async_unregister_domain);
 
 /**
  * async_synchronize_full_domain - synchronize all asynchronous function within a certain domain
- * @domain: running list to synchronize on
+ * @domain: the domain to synchronize
  *
  * This function waits until all asynchronous function calls for the
- * synchronization domain specified by the running list @domain have been done.
+ * synchronization domain specified by @domain have been done.
  */
 void async_synchronize_full_domain(struct async_domain *domain)
 {
@@ -312,17 +312,17 @@ EXPORT_SYMBOL_GPL(async_synchronize_full_domain);
 /**
  * async_synchronize_cookie_domain - synchronize asynchronous function calls within a certain domain with cookie checkpointing
  * @cookie: async_cookie_t to use as checkpoint
- * @running: running list to synchronize on
+ * @domain: the domain to synchronize
  *
  * This function waits until all asynchronous function calls for the
- * synchronization domain specified by running list @running submitted
- * prior to @cookie have been done.
+ * synchronization domain specified by @domain submitted prior to @cookie
+ * have been done.
  */
-void async_synchronize_cookie_domain(async_cookie_t cookie, struct async_domain *running)
+void async_synchronize_cookie_domain(async_cookie_t cookie, struct async_domain *domain)
 {
 	ktime_t uninitialized_var(starttime), delta, endtime;
 
-	if (!running)
+	if (!domain)
 		return;
 
 	if (initcall_debug && system_state == SYSTEM_BOOTING) {
@@ -330,7 +330,7 @@ void async_synchronize_cookie_domain(async_cookie_t cookie, struct async_domain
 		starttime = ktime_get();
 	}
 
-	wait_event(async_done, lowest_in_progress(running) >= cookie);
+	wait_event(async_done, lowest_in_progress(domain) >= cookie);
 
 	if (initcall_debug && system_state == SYSTEM_BOOTING) {
 		endtime = ktime_get();
@@ -352,7 +352,7 @@ EXPORT_SYMBOL_GPL(async_synchronize_cookie_domain);
  */
 void async_synchronize_cookie(async_cookie_t cookie)
 {
-	async_synchronize_cookie_domain(cookie, &async_running);
+	async_synchronize_cookie_domain(cookie, &async_dfl_domain);
 }
 EXPORT_SYMBOL_GPL(async_synchronize_cookie);
 
-- 
cgit v1.2.3


From 52722794d6a48162fd8906d54618ae60a4abdb21 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 23 Jan 2013 09:32:30 -0800
Subject: async: keep pending tasks on async_domain and remove async_pending

Async kept single global pending list and per-domain running lists.
When an async item is queued, it's put on the global pending list.
The item is moved to the per-domain running list when its execution
starts.

At this point, this design complicates execution and synchronization
without bringing any benefit.  The list only matters for
synchronization which doesn't care whether a given async item is
pending or executing.  Also, global synchronization is done by
iterating through all active registered async_domains, so the global
async_pending list doesn't help anything either.

Rename async_domain->running to async_domain->pending and put async
items directly there and remove when execution completes.  This
simplifies lowest_in_progress() a lot - the first item on the pending
list is the one with the lowest cookie, and async_run_entry_fn()
doesn't have to mess with moving the item from pending to running.

After the change, whether a domain is empty or not can be trivially
determined by looking at async_domain->pending.  Remove
async_domain->count and use list_empty() on pending instead.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Dan Williams <djbw@fb.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/async.h |  9 +++-----
 kernel/async.c        | 63 ++++++++++++---------------------------------------
 2 files changed, 17 insertions(+), 55 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/async.h b/include/linux/async.h
index 34ff5c610e0e..a2e3f18b2ad6 100644
--- a/include/linux/async.h
+++ b/include/linux/async.h
@@ -19,8 +19,7 @@ typedef u64 async_cookie_t;
 typedef void (async_func_ptr) (void *data, async_cookie_t cookie);
 struct async_domain {
 	struct list_head node;
-	struct list_head running;
-	int count;
+	struct list_head pending;
 	unsigned registered:1;
 };
 
@@ -29,8 +28,7 @@ struct async_domain {
  */
 #define ASYNC_DOMAIN(_name) \
 	struct async_domain _name = { .node = LIST_HEAD_INIT(_name.node), \
-				      .running = LIST_HEAD_INIT(_name.running), \
-				      .count = 0, \
+				      .pending = LIST_HEAD_INIT(_name.pending), \
 				      .registered = 1 }
 
 /*
@@ -39,8 +37,7 @@ struct async_domain {
  */
 #define ASYNC_DOMAIN_EXCLUSIVE(_name) \
 	struct async_domain _name = { .node = LIST_HEAD_INIT(_name.node), \
-				      .running = LIST_HEAD_INIT(_name.running), \
-				      .count = 0, \
+				      .pending = LIST_HEAD_INIT(_name.pending), \
 				      .registered = 0 }
 
 extern async_cookie_t async_schedule(async_func_ptr *ptr, void *data);
diff --git a/kernel/async.c b/kernel/async.c
index a4c1a9e63b2e..7c9f50f436d6 100644
--- a/kernel/async.c
+++ b/kernel/async.c
@@ -64,7 +64,6 @@ static async_cookie_t next_cookie = 1;
 #define MAX_WORK		32768
 #define ASYNC_COOKIE_MAX	ULLONG_MAX	/* infinity cookie */
 
-static LIST_HEAD(async_pending);
 static ASYNC_DOMAIN(async_dfl_domain);
 static LIST_HEAD(async_domains);
 static DEFINE_SPINLOCK(async_lock);
@@ -83,42 +82,17 @@ static DECLARE_WAIT_QUEUE_HEAD(async_done);
 
 static atomic_t entry_count;
 
-
-/*
- * MUST be called with the lock held!
- */
-static async_cookie_t __lowest_in_progress(struct async_domain *domain)
-{
-	async_cookie_t first_running = ASYNC_COOKIE_MAX;
-	async_cookie_t first_pending = ASYNC_COOKIE_MAX;
-	struct async_entry *entry;
-
-	/*
-	 * Both running and pending lists are sorted but not disjoint.
-	 * Take the first cookies from both and return the min.
-	 */
-	if (!list_empty(&domain->running)) {
-		entry = list_first_entry(&domain->running, typeof(*entry), list);
-		first_running = entry->cookie;
-	}
-
-	list_for_each_entry(entry, &async_pending, list) {
-		if (entry->domain == domain) {
-			first_pending = entry->cookie;
-			break;
-		}
-	}
-
-	return min(first_running, first_pending);
-}
-
 static async_cookie_t lowest_in_progress(struct async_domain *domain)
 {
+	async_cookie_t ret = ASYNC_COOKIE_MAX;
 	unsigned long flags;
-	async_cookie_t ret;
 
 	spin_lock_irqsave(&async_lock, flags);
-	ret = __lowest_in_progress(domain);
+	if (!list_empty(&domain->pending)) {
+		struct async_entry *first = list_first_entry(&domain->pending,
+						struct async_entry, list);
+		ret = first->cookie;
+	}
 	spin_unlock_irqrestore(&async_lock, flags);
 	return ret;
 }
@@ -130,20 +104,11 @@ static void async_run_entry_fn(struct work_struct *work)
 {
 	struct async_entry *entry =
 		container_of(work, struct async_entry, work);
-	struct async_entry *pos;
 	unsigned long flags;
 	ktime_t uninitialized_var(calltime), delta, rettime;
 	struct async_domain *domain = entry->domain;
 
-	/* 1) move self to the running queue, make sure it stays sorted */
-	spin_lock_irqsave(&async_lock, flags);
-	list_for_each_entry_reverse(pos, &domain->running, list)
-		if (entry->cookie < pos->cookie)
-			break;
-	list_move_tail(&entry->list, &pos->list);
-	spin_unlock_irqrestore(&async_lock, flags);
-
-	/* 2) run (and print duration) */
+	/* 1) run (and print duration) */
 	if (initcall_debug && system_state == SYSTEM_BOOTING) {
 		printk(KERN_DEBUG "calling  %lli_%pF @ %i\n",
 			(long long)entry->cookie,
@@ -160,19 +125,19 @@ static void async_run_entry_fn(struct work_struct *work)
 			(long long)ktime_to_ns(delta) >> 10);
 	}
 
-	/* 3) remove self from the running queue */
+	/* 2) remove self from the pending queues */
 	spin_lock_irqsave(&async_lock, flags);
 	list_del(&entry->list);
-	if (domain->registered && --domain->count == 0)
+	if (domain->registered && list_empty(&domain->pending))
 		list_del_init(&domain->node);
 
-	/* 4) free the entry */
+	/* 3) free the entry */
 	kfree(entry);
 	atomic_dec(&entry_count);
 
 	spin_unlock_irqrestore(&async_lock, flags);
 
-	/* 5) wake up any waiters */
+	/* 4) wake up any waiters */
 	wake_up(&async_done);
 }
 
@@ -206,9 +171,9 @@ static async_cookie_t __async_schedule(async_func_ptr *ptr, void *data, struct a
 
 	spin_lock_irqsave(&async_lock, flags);
 	newcookie = entry->cookie = next_cookie++;
-	list_add_tail(&entry->list, &async_pending);
-	if (domain->registered && domain->count++ == 0)
+	if (domain->registered && list_empty(&domain->pending))
 		list_add_tail(&domain->node, &async_domains);
+	list_add_tail(&entry->list, &domain->pending);
 	atomic_inc(&entry_count);
 	spin_unlock_irqrestore(&async_lock, flags);
 
@@ -290,7 +255,7 @@ void async_unregister_domain(struct async_domain *domain)
 	mutex_lock(&async_register_mutex);
 	spin_lock_irq(&async_lock);
 	WARN_ON(!domain->registered || !list_empty(&domain->node) ||
-		!list_empty(&domain->running));
+		!list_empty(&domain->pending));
 	domain->registered = 0;
 	spin_unlock_irq(&async_lock);
 	mutex_unlock(&async_register_mutex);
-- 
cgit v1.2.3


From 055dc21a1d1d219608cd4baac7d0683fb2cbbe8a Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Tue, 22 Jan 2013 09:49:50 +0000
Subject: soreuseport: infrastructure

Definitions and macros for implementing soreusport.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/alpha/include/uapi/asm/socket.h   | 2 +-
 arch/avr32/include/uapi/asm/socket.h   | 2 +-
 arch/cris/include/uapi/asm/socket.h    | 2 +-
 arch/frv/include/uapi/asm/socket.h     | 2 +-
 arch/h8300/include/uapi/asm/socket.h   | 2 +-
 arch/ia64/include/uapi/asm/socket.h    | 2 +-
 arch/m32r/include/uapi/asm/socket.h    | 2 +-
 arch/mips/include/uapi/asm/socket.h    | 3 +--
 arch/mn10300/include/uapi/asm/socket.h | 2 +-
 arch/parisc/include/uapi/asm/socket.h  | 2 +-
 arch/powerpc/include/uapi/asm/socket.h | 2 +-
 arch/s390/include/uapi/asm/socket.h    | 2 +-
 arch/sparc/include/uapi/asm/socket.h   | 2 +-
 arch/xtensa/include/uapi/asm/socket.h  | 2 +-
 include/linux/random.h                 | 6 ++++++
 include/net/sock.h                     | 5 ++++-
 include/uapi/asm-generic/socket.h      | 3 +--
 net/core/sock.c                        | 7 +++++++
 18 files changed, 32 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h
index 755702eefd9c..c5195524d1ef 100644
--- a/arch/alpha/include/uapi/asm/socket.h
+++ b/arch/alpha/include/uapi/asm/socket.h
@@ -19,7 +19,7 @@
 #define SO_BROADCAST	0x0020
 #define SO_LINGER	0x0080
 #define SO_OOBINLINE	0x0100
-/* To add :#define SO_REUSEPORT 0x0200 */
+#define SO_REUSEPORT	0x0200
 
 #define SO_TYPE		0x1008
 #define SO_ERROR	0x1007
diff --git a/arch/avr32/include/uapi/asm/socket.h b/arch/avr32/include/uapi/asm/socket.h
index f3f38a0e2ef9..51c6401582ea 100644
--- a/arch/avr32/include/uapi/asm/socket.h
+++ b/arch/avr32/include/uapi/asm/socket.h
@@ -22,7 +22,7 @@
 #define SO_PRIORITY	12
 #define SO_LINGER	13
 #define SO_BSDCOMPAT	14
-/* To add :#define SO_REUSEPORT 15 */
+#define SO_REUSEPORT	15
 #define SO_PASSCRED	16
 #define SO_PEERCRED	17
 #define SO_RCVLOWAT	18
diff --git a/arch/cris/include/uapi/asm/socket.h b/arch/cris/include/uapi/asm/socket.h
index 406b5838defd..50692b738c75 100644
--- a/arch/cris/include/uapi/asm/socket.h
+++ b/arch/cris/include/uapi/asm/socket.h
@@ -24,7 +24,7 @@
 #define SO_PRIORITY	12
 #define SO_LINGER	13
 #define SO_BSDCOMPAT	14
-/* To add :#define SO_REUSEPORT 15 */
+#define SO_REUSEPORT	15
 #define SO_PASSCRED	16
 #define SO_PEERCRED	17
 #define SO_RCVLOWAT	18
diff --git a/arch/frv/include/uapi/asm/socket.h b/arch/frv/include/uapi/asm/socket.h
index d8e1132a1ab6..595391f0f98c 100644
--- a/arch/frv/include/uapi/asm/socket.h
+++ b/arch/frv/include/uapi/asm/socket.h
@@ -22,7 +22,7 @@
 #define SO_PRIORITY	12
 #define SO_LINGER	13
 #define SO_BSDCOMPAT	14
-/* To add :#define SO_REUSEPORT 15 */
+#define SO_REUSEPORT	15
 #define SO_PASSCRED	16
 #define SO_PEERCRED	17
 #define SO_RCVLOWAT	18
diff --git a/arch/h8300/include/uapi/asm/socket.h b/arch/h8300/include/uapi/asm/socket.h
index c8b87a828206..43e32621da7d 100644
--- a/arch/h8300/include/uapi/asm/socket.h
+++ b/arch/h8300/include/uapi/asm/socket.h
@@ -22,7 +22,7 @@
 #define SO_PRIORITY	12
 #define SO_LINGER	13
 #define SO_BSDCOMPAT	14
-/* To add :#define SO_REUSEPORT 15 */
+#define SO_REUSEPORT	15
 #define SO_PASSCRED	16
 #define SO_PEERCRED	17
 #define SO_RCVLOWAT	18
diff --git a/arch/ia64/include/uapi/asm/socket.h b/arch/ia64/include/uapi/asm/socket.h
index f390896c3104..c567adc8bea5 100644
--- a/arch/ia64/include/uapi/asm/socket.h
+++ b/arch/ia64/include/uapi/asm/socket.h
@@ -31,7 +31,7 @@
 #define SO_PRIORITY	12
 #define SO_LINGER	13
 #define SO_BSDCOMPAT	14
-/* To add :#define SO_REUSEPORT 15 */
+#define SO_REUSEPORT	15
 #define SO_PASSCRED	16
 #define SO_PEERCRED	17
 #define SO_RCVLOWAT	18
diff --git a/arch/m32r/include/uapi/asm/socket.h b/arch/m32r/include/uapi/asm/socket.h
index 6a895155e7a3..519afa2755db 100644
--- a/arch/m32r/include/uapi/asm/socket.h
+++ b/arch/m32r/include/uapi/asm/socket.h
@@ -22,7 +22,7 @@
 #define SO_PRIORITY	12
 #define SO_LINGER	13
 #define SO_BSDCOMPAT	14
-/* To add :#define SO_REUSEPORT 15 */
+#define SO_REUSEPORT	15
 #define SO_PASSCRED	16
 #define SO_PEERCRED	17
 #define SO_RCVLOWAT	18
diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h
index 9d11a7713923..7e2723637b35 100644
--- a/arch/mips/include/uapi/asm/socket.h
+++ b/arch/mips/include/uapi/asm/socket.h
@@ -28,8 +28,7 @@
 #define SO_LINGER	0x0080	/* Block on close of a reliable
 				   socket to transmit pending data.  */
 #define SO_OOBINLINE 0x0100	/* Receive out-of-band data in-band.  */
-#if 0
-To add: #define SO_REUSEPORT 0x0200	/* Allow local address and port reuse.  */
+#define SO_REUSEPORT 0x0200	/* Allow local address and port reuse.  */
 #endif
 
 #define SO_TYPE		0x1008	/* Compatible name for SO_STYLE.  */
diff --git a/arch/mn10300/include/uapi/asm/socket.h b/arch/mn10300/include/uapi/asm/socket.h
index ab702c40b30e..5c7c7c988544 100644
--- a/arch/mn10300/include/uapi/asm/socket.h
+++ b/arch/mn10300/include/uapi/asm/socket.h
@@ -22,7 +22,7 @@
 #define SO_PRIORITY	12
 #define SO_LINGER	13
 #define SO_BSDCOMPAT	14
-/* To add :#define SO_REUSEPORT 15 */
+#define SO_REUSEPORT	15
 #define SO_PASSCRED	16
 #define SO_PEERCRED	17
 #define SO_RCVLOWAT	18
diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h
index da2c8d3c209e..526e4b9aece0 100644
--- a/arch/parisc/include/uapi/asm/socket.h
+++ b/arch/parisc/include/uapi/asm/socket.h
@@ -13,7 +13,7 @@
 #define SO_BROADCAST	0x0020
 #define SO_LINGER	0x0080
 #define SO_OOBINLINE	0x0100
-/* To add :#define SO_REUSEPORT 0x0200 */
+#define SO_REUSEPORT	0x0200
 #define SO_SNDBUF	0x1001
 #define SO_RCVBUF	0x1002
 #define SO_SNDBUFFORCE	0x100a
diff --git a/arch/powerpc/include/uapi/asm/socket.h b/arch/powerpc/include/uapi/asm/socket.h
index e6ca31816cc9..a26dcaece509 100644
--- a/arch/powerpc/include/uapi/asm/socket.h
+++ b/arch/powerpc/include/uapi/asm/socket.h
@@ -29,7 +29,7 @@
 #define SO_PRIORITY	12
 #define SO_LINGER	13
 #define SO_BSDCOMPAT	14
-/* To add :#define SO_REUSEPORT 15 */
+#define SO_REUSEPORT	15
 #define SO_RCVLOWAT	16
 #define SO_SNDLOWAT	17
 #define SO_RCVTIMEO	18
diff --git a/arch/s390/include/uapi/asm/socket.h b/arch/s390/include/uapi/asm/socket.h
index 9ce60b68f070..f99eea7fff0f 100644
--- a/arch/s390/include/uapi/asm/socket.h
+++ b/arch/s390/include/uapi/asm/socket.h
@@ -28,7 +28,7 @@
 #define SO_PRIORITY	12
 #define SO_LINGER	13
 #define SO_BSDCOMPAT	14
-/* To add :#define SO_REUSEPORT 15 */
+#define SO_REUSEPORT	15
 #define SO_PASSCRED	16
 #define SO_PEERCRED	17
 #define SO_RCVLOWAT	18
diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h
index fbbba57547d1..cbbad74b2e06 100644
--- a/arch/sparc/include/uapi/asm/socket.h
+++ b/arch/sparc/include/uapi/asm/socket.h
@@ -15,7 +15,7 @@
 #define SO_PEERCRED	0x0040
 #define SO_LINGER	0x0080
 #define SO_OOBINLINE	0x0100
-/* To add :#define SO_REUSEPORT 0x0200 */
+#define SO_REUSEPORT	0x0200
 #define SO_BSDCOMPAT    0x0400
 #define SO_RCVLOWAT     0x0800
 #define SO_SNDLOWAT     0x1000
diff --git a/arch/xtensa/include/uapi/asm/socket.h b/arch/xtensa/include/uapi/asm/socket.h
index dbf316487b51..35905cb6e419 100644
--- a/arch/xtensa/include/uapi/asm/socket.h
+++ b/arch/xtensa/include/uapi/asm/socket.h
@@ -32,7 +32,7 @@
 #define SO_PRIORITY	12
 #define SO_LINGER	13
 #define SO_BSDCOMPAT	14
-/* To add :#define SO_REUSEPORT 15 */
+#define SO_REUSEPORT	15
 #define SO_PASSCRED	16
 #define SO_PEERCRED	17
 #define SO_RCVLOWAT	18
diff --git a/include/linux/random.h b/include/linux/random.h
index d9846088c2c5..347ce553a306 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -74,4 +74,10 @@ static inline int arch_get_random_int(unsigned int *v)
 }
 #endif
 
+/* Pseudo random number generator from numerical recipes. */
+static inline u32 next_pseudo_random32(u32 seed)
+{
+	return seed * 1664525 + 1013904223;
+}
+
 #endif /* _LINUX_RANDOM_H */
diff --git a/include/net/sock.h b/include/net/sock.h
index 5a34e2f03657..581dc6bd7dc6 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -140,6 +140,7 @@ typedef __u64 __bitwise __addrpair;
  *	@skc_family: network address family
  *	@skc_state: Connection state
  *	@skc_reuse: %SO_REUSEADDR setting
+ *	@skc_reuseport: %SO_REUSEPORT setting
  *	@skc_bound_dev_if: bound device index if != 0
  *	@skc_bind_node: bind hash linkage for various protocol lookup tables
  *	@skc_portaddr_node: second hash linkage for UDP/UDP-Lite protocol
@@ -179,7 +180,8 @@ struct sock_common {
 
 	unsigned short		skc_family;
 	volatile unsigned char	skc_state;
-	unsigned char		skc_reuse;
+	unsigned char		skc_reuse:4;
+	unsigned char		skc_reuseport:4;
 	int			skc_bound_dev_if;
 	union {
 		struct hlist_node	skc_bind_node;
@@ -297,6 +299,7 @@ struct sock {
 #define sk_family		__sk_common.skc_family
 #define sk_state		__sk_common.skc_state
 #define sk_reuse		__sk_common.skc_reuse
+#define sk_reuseport		__sk_common.skc_reuseport
 #define sk_bound_dev_if		__sk_common.skc_bound_dev_if
 #define sk_bind_node		__sk_common.skc_bind_node
 #define sk_prot			__sk_common.skc_prot
diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h
index 3f6a99201410..4ef3acbba5da 100644
--- a/include/uapi/asm-generic/socket.h
+++ b/include/uapi/asm-generic/socket.h
@@ -22,8 +22,7 @@
 #define SO_PRIORITY	12
 #define SO_LINGER	13
 #define SO_BSDCOMPAT	14
-/* To add :#define SO_REUSEPORT 15 */
-
+#define SO_REUSEPORT	15
 #ifndef SO_PASSCRED /* powerpc only differs in these */
 #define SO_PASSCRED	16
 #define SO_PEERCRED	17
diff --git a/net/core/sock.c b/net/core/sock.c
index 8258fb741e9a..235fb89e8973 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -665,6 +665,9 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
 	case SO_REUSEADDR:
 		sk->sk_reuse = (valbool ? SK_CAN_REUSE : SK_NO_REUSE);
 		break;
+	case SO_REUSEPORT:
+		sk->sk_reuseport = valbool;
+		break;
 	case SO_TYPE:
 	case SO_PROTOCOL:
 	case SO_DOMAIN:
@@ -972,6 +975,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
 		v.val = sk->sk_reuse;
 		break;
 
+	case SO_REUSEPORT:
+		v.val = sk->sk_reuseport;
+		break;
+
 	case SO_KEEPALIVE:
 		v.val = sock_flag(sk, SOCK_KEEPOPEN);
 		break;
-- 
cgit v1.2.3


From ba6fdda46b377034c782c0b89c8f1090b31eabd8 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Sat, 22 Dec 2012 16:59:51 +0100
Subject: profiling: Remove unused timer hook

The last remaining user was oprofile and its use has been
removed a while ago in commit bc078e4eab65f11bba
("oprofile: convert oprofile from timer_hook to hrtimer").

There doesn't seem to be any upstream user of this hook
for about two years now. And I'm not even aware of any out of
tree user.

Let's remove it.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Alessio Igor Bogani <abogani@kernel.org>
Cc: Avi Kivity <avi@redhat.com>
Cc: Chris Metcalf <cmetcalf@tilera.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Geoff Levand <geoff@infradead.org>
Cc: Gilad Ben Yossef <gilad@benyossef.com>
Cc: Hakan Akkan <hakanakkan@gmail.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/r/1356191991-2251-1-git-send-email-fweisbec@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/profile.h | 13 -------------
 kernel/profile.c        | 24 ------------------------
 2 files changed, 37 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/profile.h b/include/linux/profile.h
index a0fc32279fc0..21123902366d 100644
--- a/include/linux/profile.h
+++ b/include/linux/profile.h
@@ -82,9 +82,6 @@ int task_handoff_unregister(struct notifier_block * n);
 int profile_event_register(enum profile_type, struct notifier_block * n);
 int profile_event_unregister(enum profile_type, struct notifier_block * n);
 
-int register_timer_hook(int (*hook)(struct pt_regs *));
-void unregister_timer_hook(int (*hook)(struct pt_regs *));
-
 struct pt_regs;
 
 #else
@@ -135,16 +132,6 @@ static inline int profile_event_unregister(enum profile_type t, struct notifier_
 #define profile_handoff_task(a) (0)
 #define profile_munmap(a) do { } while (0)
 
-static inline int register_timer_hook(int (*hook)(struct pt_regs *))
-{
-	return -ENOSYS;
-}
-
-static inline void unregister_timer_hook(int (*hook)(struct pt_regs *))
-{
-	return;
-}
-
 #endif /* CONFIG_PROFILING */
 
 #endif /* _LINUX_PROFILE_H */
diff --git a/kernel/profile.c b/kernel/profile.c
index 1f391819c42f..dc3384ee874e 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -37,9 +37,6 @@ struct profile_hit {
 #define NR_PROFILE_HIT		(PAGE_SIZE/sizeof(struct profile_hit))
 #define NR_PROFILE_GRP		(NR_PROFILE_HIT/PROFILE_GRPSZ)
 
-/* Oprofile timer tick hook */
-static int (*timer_hook)(struct pt_regs *) __read_mostly;
-
 static atomic_t *prof_buffer;
 static unsigned long prof_len, prof_shift;
 
@@ -208,25 +205,6 @@ int profile_event_unregister(enum profile_type type, struct notifier_block *n)
 }
 EXPORT_SYMBOL_GPL(profile_event_unregister);
 
-int register_timer_hook(int (*hook)(struct pt_regs *))
-{
-	if (timer_hook)
-		return -EBUSY;
-	timer_hook = hook;
-	return 0;
-}
-EXPORT_SYMBOL_GPL(register_timer_hook);
-
-void unregister_timer_hook(int (*hook)(struct pt_regs *))
-{
-	WARN_ON(hook != timer_hook);
-	timer_hook = NULL;
-	/* make sure all CPUs see the NULL hook */
-	synchronize_sched();  /* Allow ongoing interrupts to complete. */
-}
-EXPORT_SYMBOL_GPL(unregister_timer_hook);
-
-
 #ifdef CONFIG_SMP
 /*
  * Each cpu has a pair of open-addressed hashtables for pending
@@ -436,8 +414,6 @@ void profile_tick(int type)
 {
 	struct pt_regs *regs = get_irq_regs();
 
-	if (type == CPU_PROFILING && timer_hook)
-		timer_hook(regs);
 	if (!user_mode(regs) && prof_cpu_mask != NULL &&
 	    cpumask_test_cpu(smp_processor_id(), prof_cpu_mask))
 		profile_hit(type, (void *)profile_pc(regs));
-- 
cgit v1.2.3


From d437c86baacf265a640dfc462c75941d02c0e153 Mon Sep 17 00:00:00 2001
From: Bing Zhao <bzhao@marvell.com>
Date: Wed, 23 Jan 2013 20:33:58 -0800
Subject: ieee80211: define AKM suite selectors type 5, 6 and 7

Reference: IEEE 802.11-2012 8.4.2.27.3 "AKM suites"

Signed-off-by: Bing Zhao <bzhao@marvell.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index ccf9ee1dca8c..11c8bc87fdcb 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1898,7 +1898,10 @@ enum ieee80211_sa_query_action {
 /* AKM suite selectors */
 #define WLAN_AKM_SUITE_8021X		0x000FAC01
 #define WLAN_AKM_SUITE_PSK		0x000FAC02
-#define WLAN_AKM_SUITE_SAE			0x000FAC08
+#define WLAN_AKM_SUITE_8021X_SHA256	0x000FAC05
+#define WLAN_AKM_SUITE_PSK_SHA256	0x000FAC06
+#define WLAN_AKM_SUITE_TDLS		0x000FAC07
+#define WLAN_AKM_SUITE_SAE		0x000FAC08
 #define WLAN_AKM_SUITE_FT_OVER_SAE	0x000FAC09
 
 #define WLAN_MAX_KEY_LEN		32
-- 
cgit v1.2.3


From 51906e779f2b13b38f8153774c4c7163d412ffd9 Mon Sep 17 00:00:00 2001
From: Alexander Gordeev <agordeev@redhat.com>
Date: Mon, 19 Nov 2012 16:01:29 +0100
Subject: x86/MSI: Support multiple MSIs in presense of IRQ remapping

The MSI specification has several constraints in comparison with
MSI-X, most notable of them is the inability to configure MSIs
independently. As a result, it is impossible to dispatch
interrupts from different queues to different CPUs. This is
largely devalues the support of multiple MSIs in SMP systems.

Also, a necessity to allocate a contiguous block of vector
numbers for devices capable of multiple MSIs might cause a
considerable pressure on x86 interrupt vector allocator and
could lead to fragmentation of the interrupt vectors space.

This patch overcomes both drawbacks in presense of IRQ remapping
and lets devices take advantage of multiple queues and per-IRQ
affinity assignments.

Signed-off-by: Alexander Gordeev <agordeev@redhat.com>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Matthew Wilcox <willy@linux.intel.com>
Cc: Jeff Garzik <jgarzik@pobox.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/c8bd86ff56b5fc118257436768aaa04489ac0a4c.1353324359.git.agordeev@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/apic/io_apic.c | 165 +++++++++++++++++++++++++++++++++--------
 include/linux/irq.h            |   5 ++
 kernel/irq/chip.c              |  30 ++++++--
 3 files changed, 160 insertions(+), 40 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index b739d398bb29..2016f9dabd72 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -300,9 +300,9 @@ static struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node)
 	return cfg;
 }
 
-static int alloc_irq_from(unsigned int from, int node)
+static int alloc_irqs_from(unsigned int from, unsigned int count, int node)
 {
-	return irq_alloc_desc_from(from, node);
+	return irq_alloc_descs_from(from, count, node);
 }
 
 static void free_irq_at(unsigned int at, struct irq_cfg *cfg)
@@ -2982,37 +2982,58 @@ device_initcall(ioapic_init_ops);
 /*
  * Dynamic irq allocate and deallocation
  */
-unsigned int create_irq_nr(unsigned int from, int node)
+unsigned int __create_irqs(unsigned int from, unsigned int count, int node)
 {
-	struct irq_cfg *cfg;
+	struct irq_cfg **cfg;
 	unsigned long flags;
-	unsigned int ret = 0;
-	int irq;
+	int irq, i;
 
 	if (from < nr_irqs_gsi)
 		from = nr_irqs_gsi;
 
-	irq = alloc_irq_from(from, node);
-	if (irq < 0)
-		return 0;
-	cfg = alloc_irq_cfg(irq, node);
-	if (!cfg) {
-		free_irq_at(irq, NULL);
+	cfg = kzalloc_node(count * sizeof(cfg[0]), GFP_KERNEL, node);
+	if (!cfg)
 		return 0;
+
+	irq = alloc_irqs_from(from, count, node);
+	if (irq < 0)
+		goto out_cfgs;
+
+	for (i = 0; i < count; i++) {
+		cfg[i] = alloc_irq_cfg(irq + i, node);
+		if (!cfg[i])
+			goto out_irqs;
 	}
 
 	raw_spin_lock_irqsave(&vector_lock, flags);
-	if (!__assign_irq_vector(irq, cfg, apic->target_cpus()))
-		ret = irq;
+	for (i = 0; i < count; i++)
+		if (__assign_irq_vector(irq + i, cfg[i], apic->target_cpus()))
+			goto out_vecs;
 	raw_spin_unlock_irqrestore(&vector_lock, flags);
 
-	if (ret) {
-		irq_set_chip_data(irq, cfg);
-		irq_clear_status_flags(irq, IRQ_NOREQUEST);
-	} else {
-		free_irq_at(irq, cfg);
+	for (i = 0; i < count; i++) {
+		irq_set_chip_data(irq + i, cfg[i]);
+		irq_clear_status_flags(irq + i, IRQ_NOREQUEST);
 	}
-	return ret;
+
+	kfree(cfg);
+	return irq;
+
+out_vecs:
+	for (i--; i >= 0; i--)
+		__clear_irq_vector(irq + i, cfg[i]);
+	raw_spin_unlock_irqrestore(&vector_lock, flags);
+out_irqs:
+	for (i = 0; i < count; i++)
+		free_irq_at(irq + i, cfg[i]);
+out_cfgs:
+	kfree(cfg);
+	return 0;
+}
+
+unsigned int create_irq_nr(unsigned int from, int node)
+{
+	return __create_irqs(from, 1, node);
 }
 
 int create_irq(void)
@@ -3045,6 +3066,14 @@ void destroy_irq(unsigned int irq)
 	free_irq_at(irq, cfg);
 }
 
+static inline void destroy_irqs(unsigned int irq, unsigned int count)
+{
+	unsigned int i;
+
+	for (i = 0; i < count; i++)
+		destroy_irq(irq + i);
+}
+
 /*
  * MSI message composition
  */
@@ -3071,7 +3100,7 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq,
 
 	if (irq_remapped(cfg)) {
 		compose_remapped_msi_msg(pdev, irq, dest, msg, hpet_id);
-		return err;
+		return 0;
 	}
 
 	if (x2apic_enabled())
@@ -3098,7 +3127,7 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq,
 			MSI_DATA_DELIVERY_LOWPRI) |
 		MSI_DATA_VECTOR(cfg->vector);
 
-	return err;
+	return 0;
 }
 
 static int
@@ -3136,18 +3165,26 @@ static struct irq_chip msi_chip = {
 	.irq_retrigger		= ioapic_retrigger_irq,
 };
 
-static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
+static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc,
+			 unsigned int irq_base, unsigned int irq_offset)
 {
 	struct irq_chip *chip = &msi_chip;
 	struct msi_msg msg;
+	unsigned int irq = irq_base + irq_offset;
 	int ret;
 
 	ret = msi_compose_msg(dev, irq, &msg, -1);
 	if (ret < 0)
 		return ret;
 
-	irq_set_msi_desc(irq, msidesc);
-	write_msi_msg(irq, &msg);
+	irq_set_msi_desc_off(irq_base, irq_offset, msidesc);
+
+	/*
+	 * MSI-X message is written per-IRQ, the offset is always 0.
+	 * MSI message denotes a contiguous group of IRQs, written for 0th IRQ.
+	 */
+	if (!irq_offset)
+		write_msi_msg(irq, &msg);
 
 	if (irq_remapped(irq_get_chip_data(irq))) {
 		irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
@@ -3161,23 +3198,19 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
 	return 0;
 }
 
-int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+int setup_msix_irqs(struct pci_dev *dev, int nvec)
 {
 	int node, ret, sub_handle, index = 0;
 	unsigned int irq, irq_want;
 	struct msi_desc *msidesc;
 
-	/* x86 doesn't support multiple MSI yet */
-	if (type == PCI_CAP_ID_MSI && nvec > 1)
-		return 1;
-
 	node = dev_to_node(&dev->dev);
 	irq_want = nr_irqs_gsi;
 	sub_handle = 0;
 	list_for_each_entry(msidesc, &dev->msi_list, list) {
 		irq = create_irq_nr(irq_want, node);
 		if (irq == 0)
-			return -1;
+			return -ENOSPC;
 		irq_want = irq + 1;
 		if (!irq_remapping_enabled)
 			goto no_ir;
@@ -3199,7 +3232,7 @@ int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 				goto error;
 		}
 no_ir:
-		ret = setup_msi_irq(dev, msidesc, irq);
+		ret = setup_msi_irq(dev, msidesc, irq, 0);
 		if (ret < 0)
 			goto error;
 		sub_handle++;
@@ -3211,6 +3244,74 @@ error:
 	return ret;
 }
 
+int setup_msi_irqs(struct pci_dev *dev, int nvec)
+{
+	int node, ret, sub_handle, index = 0;
+	unsigned int irq;
+	struct msi_desc *msidesc;
+
+	if (nvec > 1 && !irq_remapping_enabled)
+		return 1;
+
+	nvec = __roundup_pow_of_two(nvec);
+
+	WARN_ON(!list_is_singular(&dev->msi_list));
+	msidesc = list_entry(dev->msi_list.next, struct msi_desc, list);
+	WARN_ON(msidesc->irq);
+	WARN_ON(msidesc->msi_attrib.multiple);
+
+	node = dev_to_node(&dev->dev);
+	irq = __create_irqs(nr_irqs_gsi, nvec, node);
+	if (irq == 0)
+		return -ENOSPC;
+
+	if (!irq_remapping_enabled) {
+		ret = setup_msi_irq(dev, msidesc, irq, 0);
+		if (ret < 0)
+			goto error;
+		return 0;
+	}
+
+	msidesc->msi_attrib.multiple = ilog2(nvec);
+	for (sub_handle = 0; sub_handle < nvec; sub_handle++) {
+		if (!sub_handle) {
+			index = msi_alloc_remapped_irq(dev, irq, nvec);
+			if (index < 0) {
+				ret = index;
+				goto error;
+			}
+		} else {
+			ret = msi_setup_remapped_irq(dev, irq + sub_handle,
+						     index, sub_handle);
+			if (ret < 0)
+				goto error;
+		}
+		ret = setup_msi_irq(dev, msidesc, irq, sub_handle);
+		if (ret < 0)
+			goto error;
+	}
+	return 0;
+
+error:
+	destroy_irqs(irq, nvec);
+
+	/*
+	 * Restore altered MSI descriptor fields and prevent just destroyed
+	 * IRQs from tearing down again in default_teardown_msi_irqs()
+	 */
+	msidesc->irq = 0;
+	msidesc->msi_attrib.multiple = 0;
+
+	return ret;
+}
+
+int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+{
+	if (type == PCI_CAP_ID_MSI)
+		return setup_msi_irqs(dev, nvec);
+	return setup_msix_irqs(dev, nvec);
+}
+
 void native_teardown_msi_irq(unsigned int irq)
 {
 	destroy_irq(irq);
diff --git a/include/linux/irq.h b/include/linux/irq.h
index fdf2c4a238cc..1eab99111e94 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -528,6 +528,8 @@ extern int irq_set_handler_data(unsigned int irq, void *data);
 extern int irq_set_chip_data(unsigned int irq, void *data);
 extern int irq_set_irq_type(unsigned int irq, unsigned int type);
 extern int irq_set_msi_desc(unsigned int irq, struct msi_desc *entry);
+extern int irq_set_msi_desc_off(unsigned int irq_base, unsigned int irq_offset,
+				struct msi_desc *entry);
 extern struct irq_data *irq_get_irq_data(unsigned int irq);
 
 static inline struct irq_chip *irq_get_chip(unsigned int irq)
@@ -590,6 +592,9 @@ int __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node,
 #define irq_alloc_desc_from(from, node)		\
 	irq_alloc_descs(-1, from, 1, node)
 
+#define irq_alloc_descs_from(from, cnt, node)	\
+	irq_alloc_descs(-1, from, cnt, node)
+
 void irq_free_descs(unsigned int irq, unsigned int cnt);
 int irq_reserve_irqs(unsigned int from, unsigned int cnt);
 
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 3aca9f29d30e..cbd97ce0b000 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -90,26 +90,40 @@ int irq_set_handler_data(unsigned int irq, void *data)
 EXPORT_SYMBOL(irq_set_handler_data);
 
 /**
- *	irq_set_msi_desc - set MSI descriptor data for an irq
- *	@irq:	Interrupt number
- *	@entry:	Pointer to MSI descriptor data
+ *	irq_set_msi_desc_off - set MSI descriptor data for an irq at offset
+ *	@irq_base:	Interrupt number base
+ *	@irq_offset:	Interrupt number offset
+ *	@entry:		Pointer to MSI descriptor data
  *
- *	Set the MSI descriptor entry for an irq
+ *	Set the MSI descriptor entry for an irq at offset
  */
-int irq_set_msi_desc(unsigned int irq, struct msi_desc *entry)
+int irq_set_msi_desc_off(unsigned int irq_base, unsigned int irq_offset,
+			 struct msi_desc *entry)
 {
 	unsigned long flags;
-	struct irq_desc *desc = irq_get_desc_lock(irq, &flags, IRQ_GET_DESC_CHECK_GLOBAL);
+	struct irq_desc *desc = irq_get_desc_lock(irq_base + irq_offset, &flags, IRQ_GET_DESC_CHECK_GLOBAL);
 
 	if (!desc)
 		return -EINVAL;
 	desc->irq_data.msi_desc = entry;
-	if (entry)
-		entry->irq = irq;
+	if (entry && !irq_offset)
+		entry->irq = irq_base;
 	irq_put_desc_unlock(desc, flags);
 	return 0;
 }
 
+/**
+ *	irq_set_msi_desc - set MSI descriptor data for an irq
+ *	@irq:	Interrupt number
+ *	@entry:	Pointer to MSI descriptor data
+ *
+ *	Set the MSI descriptor entry for an irq
+ */
+int irq_set_msi_desc(unsigned int irq, struct msi_desc *entry)
+{
+	return irq_set_msi_desc_off(irq, 0, entry);
+}
+
 /**
  *	irq_set_chip_data - set irq chip data for an irq
  *	@irq:	Interrupt number
-- 
cgit v1.2.3


From 08261d87f7d1b6253ab3223756625a5c74532293 Mon Sep 17 00:00:00 2001
From: Alexander Gordeev <agordeev@redhat.com>
Date: Mon, 19 Nov 2012 16:02:10 +0100
Subject: PCI/MSI: Enable multiple MSIs with pci_enable_msi_block_auto()

The new function pci_enable_msi_block_auto() tries to allocate
maximum possible number of MSIs up to the number the device
supports. It generalizes a pattern when pci_enable_msi_block()
is contiguously called until it succeeds or fails.

Opposite to pci_enable_msi_block() which takes the number of
MSIs to allocate as a input parameter,
pci_enable_msi_block_auto() could be used by device drivers to
obtain the number of assigned MSIs and the number of MSIs the
device supports.

Signed-off-by: Alexander Gordeev <agordeev@redhat.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Cc: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Matthew Wilcox <willy@linux.intel.com>
Cc: Jeff Garzik <jgarzik@pobox.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/c3de2419df94a0f95ca1a6f755afc421486455e6.1353324359.git.agordeev@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 Documentation/PCI/MSI-HOWTO.txt | 37 ++++++++++++++++++++++++++++++++-----
 drivers/pci/msi.c               | 26 ++++++++++++++++++++++++++
 include/linux/pci.h             |  7 +++++++
 3 files changed, 65 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/PCI/MSI-HOWTO.txt b/Documentation/PCI/MSI-HOWTO.txt
index 53e6fca146d7..a09178086c30 100644
--- a/Documentation/PCI/MSI-HOWTO.txt
+++ b/Documentation/PCI/MSI-HOWTO.txt
@@ -127,15 +127,42 @@ on the number of vectors that can be allocated; pci_enable_msi_block()
 returns as soon as it finds any constraint that doesn't allow the
 call to succeed.
 
-4.2.3 pci_disable_msi
+4.2.3 pci_enable_msi_block_auto
+
+int pci_enable_msi_block_auto(struct pci_dev *dev, unsigned int *count)
+
+This variation on pci_enable_msi() call allows a device driver to request
+the maximum possible number of MSIs.  The MSI specification only allows
+interrupts to be allocated in powers of two, up to a maximum of 2^5 (32).
+
+If this function returns a positive number, it indicates that it has
+succeeded and the returned value is the number of allocated interrupts. In
+this case, the function enables MSI on this device and updates dev->irq to
+be the lowest of the new interrupts assigned to it.  The other interrupts
+assigned to the device are in the range dev->irq to dev->irq + returned
+value - 1.
+
+If this function returns a negative number, it indicates an error and
+the driver should not attempt to request any more MSI interrupts for
+this device.
+
+If the device driver needs to know the number of interrupts the device
+supports it can pass the pointer count where that number is stored. The
+device driver must decide what action to take if pci_enable_msi_block_auto()
+succeeds, but returns a value less than the number of interrupts supported.
+If the device driver does not need to know the number of interrupts
+supported, it can set the pointer count to NULL.
+
+4.2.4 pci_disable_msi
 
 void pci_disable_msi(struct pci_dev *dev)
 
 This function should be used to undo the effect of pci_enable_msi() or
-pci_enable_msi_block().  Calling it restores dev->irq to the pin-based
-interrupt number and frees the previously allocated message signaled
-interrupt(s).  The interrupt may subsequently be assigned to another
-device, so drivers should not cache the value of dev->irq.
+pci_enable_msi_block() or pci_enable_msi_block_auto().  Calling it restores
+dev->irq to the pin-based interrupt number and frees the previously
+allocated message signaled interrupt(s).  The interrupt may subsequently be
+assigned to another device, so drivers should not cache the value of
+dev->irq.
 
 Before calling this function, a device driver must always call free_irq()
 on any interrupt for which it previously called request_irq().
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 5099636a6e5f..00cc78c7aa04 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -845,6 +845,32 @@ int pci_enable_msi_block(struct pci_dev *dev, unsigned int nvec)
 }
 EXPORT_SYMBOL(pci_enable_msi_block);
 
+int pci_enable_msi_block_auto(struct pci_dev *dev, unsigned int *maxvec)
+{
+	int ret, pos, nvec;
+	u16 msgctl;
+
+	pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
+	if (!pos)
+		return -EINVAL;
+
+	pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &msgctl);
+	ret = 1 << ((msgctl & PCI_MSI_FLAGS_QMASK) >> 1);
+
+	if (maxvec)
+		*maxvec = ret;
+
+	do {
+		nvec = ret;
+		ret = pci_enable_msi_block(dev, nvec);
+	} while (ret > 0);
+
+	if (ret < 0)
+		return ret;
+	return nvec;
+}
+EXPORT_SYMBOL(pci_enable_msi_block_auto);
+
 void pci_msi_shutdown(struct pci_dev *dev)
 {
 	struct msi_desc *desc;
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 15472d691ee6..6fa4dd2a3b9e 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1101,6 +1101,12 @@ static inline int pci_enable_msi_block(struct pci_dev *dev, unsigned int nvec)
 	return -1;
 }
 
+static inline int
+pci_enable_msi_block_auto(struct pci_dev *dev, unsigned int *maxvec)
+{
+	return -1;
+}
+
 static inline void pci_msi_shutdown(struct pci_dev *dev)
 { }
 static inline void pci_disable_msi(struct pci_dev *dev)
@@ -1132,6 +1138,7 @@ static inline int pci_msi_enabled(void)
 }
 #else
 extern int pci_enable_msi_block(struct pci_dev *dev, unsigned int nvec);
+extern int pci_enable_msi_block_auto(struct pci_dev *dev, unsigned int *maxvec);
 extern void pci_msi_shutdown(struct pci_dev *dev);
 extern void pci_disable_msi(struct pci_dev *dev);
 extern int pci_msix_table_size(struct pci_dev *dev);
-- 
cgit v1.2.3


From e2905b29122173b72b612c962b138e3fa07476b8 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 24 Jan 2013 11:01:32 -0800
Subject: workqueue: unexport work_cpu()

This function no longer has any external users.  Unexport it.  It will
be removed later on.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 include/linux/workqueue.h | 1 -
 kernel/workqueue.c        | 4 ++--
 2 files changed, 2 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 2b58905d3504..ff68b1d95b41 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -426,7 +426,6 @@ extern bool cancel_delayed_work_sync(struct delayed_work *dwork);
 extern void workqueue_set_max_active(struct workqueue_struct *wq,
 				     int max_active);
 extern bool workqueue_congested(unsigned int cpu, struct workqueue_struct *wq);
-extern unsigned int work_cpu(struct work_struct *work);
 extern unsigned int work_busy(struct work_struct *work);
 
 /*
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 2ffa240052fa..fe0745f54fcd 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -449,6 +449,7 @@ static atomic_t unbound_pool_nr_running[NR_WORKER_POOLS] = {
 };
 
 static int worker_thread(void *__worker);
+static unsigned int work_cpu(struct work_struct *work);
 
 static int worker_pool_pri(struct worker_pool *pool)
 {
@@ -3419,13 +3420,12 @@ EXPORT_SYMBOL_GPL(workqueue_congested);
  * RETURNS:
  * CPU number if @work was ever queued.  WORK_CPU_NONE otherwise.
  */
-unsigned int work_cpu(struct work_struct *work)
+static unsigned int work_cpu(struct work_struct *work)
 {
 	struct global_cwq *gcwq = get_work_gcwq(work);
 
 	return gcwq ? gcwq->cpu : WORK_CPU_NONE;
 }
-EXPORT_SYMBOL_GPL(work_cpu);
 
 /**
  * work_busy - test whether a work is currently pending or running
-- 
cgit v1.2.3


From 715b06b864c99a18cb8368dfb187da4f569788cd Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 24 Jan 2013 11:01:33 -0800
Subject: workqueue: introduce WORK_OFFQ_CPU_NONE

Currently, when a work item is off queue, high bits of its data
encodes the last CPU it was on.  This is scheduled to be changed to
pool ID, which will make it impossible to use WORK_CPU_NONE to
indicate no association.

This patch limits the number of bits which are used for off-queue cpu
number to 31 (so that the max fits in an int) and uses the highest
possible value - WORK_OFFQ_CPU_NONE - to indicate no association.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 include/linux/workqueue.h | 10 +++++++++-
 kernel/workqueue.c        |  4 ++--
 2 files changed, 11 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index ff68b1d95b41..f8b35763e55f 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -73,13 +73,21 @@ enum {
 
 	WORK_OFFQ_CANCELING	= (1 << WORK_OFFQ_FLAG_BASE),
 
+	/*
+	 * When a work item is off queue, its high bits point to the last
+	 * cpu it was on.  Cap at 31 bits and use the highest number to
+	 * indicate that no cpu is associated.
+	 */
 	WORK_OFFQ_FLAG_BITS	= 1,
 	WORK_OFFQ_CPU_SHIFT	= WORK_OFFQ_FLAG_BASE + WORK_OFFQ_FLAG_BITS,
+	WORK_OFFQ_LEFT		= BITS_PER_LONG - WORK_OFFQ_CPU_SHIFT,
+	WORK_OFFQ_CPU_BITS	= WORK_OFFQ_LEFT <= 31 ? WORK_OFFQ_LEFT : 31,
+	WORK_OFFQ_CPU_NONE	= (1LU << WORK_OFFQ_CPU_BITS) - 1,
 
 	/* convenience constants */
 	WORK_STRUCT_FLAG_MASK	= (1UL << WORK_STRUCT_FLAG_BITS) - 1,
 	WORK_STRUCT_WQ_DATA_MASK = ~WORK_STRUCT_FLAG_MASK,
-	WORK_STRUCT_NO_CPU	= (unsigned long)WORK_CPU_NONE << WORK_OFFQ_CPU_SHIFT,
+	WORK_STRUCT_NO_CPU	= (unsigned long)WORK_OFFQ_CPU_NONE << WORK_OFFQ_CPU_SHIFT,
 
 	/* bit mask for work_busy() return values */
 	WORK_BUSY_PENDING	= 1 << 0,
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 1a686e481132..405282d30046 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -573,7 +573,7 @@ static struct global_cwq *get_work_gcwq(struct work_struct *work)
 			(data & WORK_STRUCT_WQ_DATA_MASK))->pool->gcwq;
 
 	cpu = data >> WORK_OFFQ_CPU_SHIFT;
-	if (cpu == WORK_CPU_NONE)
+	if (cpu == WORK_OFFQ_CPU_NONE)
 		return NULL;
 
 	BUG_ON(cpu >= nr_cpu_ids && cpu != WORK_CPU_UNBOUND);
@@ -583,7 +583,7 @@ static struct global_cwq *get_work_gcwq(struct work_struct *work)
 static void mark_work_canceling(struct work_struct *work)
 {
 	struct global_cwq *gcwq = get_work_gcwq(work);
-	unsigned long cpu = gcwq ? gcwq->cpu : WORK_CPU_NONE;
+	unsigned long cpu = gcwq ? gcwq->cpu : WORK_OFFQ_CPU_NONE;
 
 	set_work_data(work, (cpu << WORK_OFFQ_CPU_SHIFT) | WORK_OFFQ_CANCELING,
 		      WORK_STRUCT_PENDING);
-- 
cgit v1.2.3


From 7c3eed5cd60d0f736516e6ade77d90c6255860bd Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 24 Jan 2013 11:01:33 -0800
Subject: workqueue: record pool ID instead of CPU in work->data when off-queue

Currently, when a work item is off-queue, work->data records the CPU
it was last on, which is used to locate the last executing instance
for non-reentrance, flushing, etc.

We're in the process of removing global_cwq and making worker_pool the
top level abstraction.  This patch makes work->data point to the pool
it was last associated with instead of CPU.

After the previous WORK_OFFQ_POOL_CPU and worker_poo->id additions,
the conversion is fairly straight-forward.  WORK_OFFQ constants and
functions are modified to record and read back pool ID instead.
worker_pool_by_id() is added to allow looking up pool from ID.
get_work_pool() replaces get_work_gcwq(), which is reimplemented using
get_work_pool().  get_work_pool_id() replaces work_cpu().

This patch shouldn't introduce any observable behavior changes.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 include/linux/workqueue.h |  18 ++++----
 kernel/workqueue.c        | 111 ++++++++++++++++++++++++++++------------------
 2 files changed, 76 insertions(+), 53 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index f8b35763e55f..a94e4e84e7b1 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -75,19 +75,19 @@ enum {
 
 	/*
 	 * When a work item is off queue, its high bits point to the last
-	 * cpu it was on.  Cap at 31 bits and use the highest number to
-	 * indicate that no cpu is associated.
+	 * pool it was on.  Cap at 31 bits and use the highest number to
+	 * indicate that no pool is associated.
 	 */
 	WORK_OFFQ_FLAG_BITS	= 1,
-	WORK_OFFQ_CPU_SHIFT	= WORK_OFFQ_FLAG_BASE + WORK_OFFQ_FLAG_BITS,
-	WORK_OFFQ_LEFT		= BITS_PER_LONG - WORK_OFFQ_CPU_SHIFT,
-	WORK_OFFQ_CPU_BITS	= WORK_OFFQ_LEFT <= 31 ? WORK_OFFQ_LEFT : 31,
-	WORK_OFFQ_CPU_NONE	= (1LU << WORK_OFFQ_CPU_BITS) - 1,
+	WORK_OFFQ_POOL_SHIFT	= WORK_OFFQ_FLAG_BASE + WORK_OFFQ_FLAG_BITS,
+	WORK_OFFQ_LEFT		= BITS_PER_LONG - WORK_OFFQ_POOL_SHIFT,
+	WORK_OFFQ_POOL_BITS	= WORK_OFFQ_LEFT <= 31 ? WORK_OFFQ_LEFT : 31,
+	WORK_OFFQ_POOL_NONE	= (1LU << WORK_OFFQ_POOL_BITS) - 1,
 
 	/* convenience constants */
 	WORK_STRUCT_FLAG_MASK	= (1UL << WORK_STRUCT_FLAG_BITS) - 1,
 	WORK_STRUCT_WQ_DATA_MASK = ~WORK_STRUCT_FLAG_MASK,
-	WORK_STRUCT_NO_CPU	= (unsigned long)WORK_OFFQ_CPU_NONE << WORK_OFFQ_CPU_SHIFT,
+	WORK_STRUCT_NO_POOL	= (unsigned long)WORK_OFFQ_POOL_NONE << WORK_OFFQ_POOL_SHIFT,
 
 	/* bit mask for work_busy() return values */
 	WORK_BUSY_PENDING	= 1 << 0,
@@ -103,9 +103,9 @@ struct work_struct {
 #endif
 };
 
-#define WORK_DATA_INIT()	ATOMIC_LONG_INIT(WORK_STRUCT_NO_CPU)
+#define WORK_DATA_INIT()	ATOMIC_LONG_INIT(WORK_STRUCT_NO_POOL)
 #define WORK_DATA_STATIC_INIT()	\
-	ATOMIC_LONG_INIT(WORK_STRUCT_NO_CPU | WORK_STRUCT_STATIC)
+	ATOMIC_LONG_INIT(WORK_STRUCT_NO_POOL | WORK_STRUCT_STATIC)
 
 struct delayed_work {
 	struct work_struct work;
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 9c6ad974bb9e..a4d7e3f0a874 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -451,7 +451,6 @@ static DEFINE_MUTEX(worker_pool_idr_mutex);
 static DEFINE_IDR(worker_pool_idr);
 
 static int worker_thread(void *__worker);
-static unsigned int work_cpu(struct work_struct *work);
 
 static int std_worker_pool_pri(struct worker_pool *pool)
 {
@@ -479,6 +478,15 @@ static int worker_pool_assign_id(struct worker_pool *pool)
 	return ret;
 }
 
+/*
+ * Lookup worker_pool by id.  The idr currently is built during boot and
+ * never modified.  Don't worry about locking for now.
+ */
+static struct worker_pool *worker_pool_by_id(int pool_id)
+{
+	return idr_find(&worker_pool_idr, pool_id);
+}
+
 static atomic_t *get_pool_nr_running(struct worker_pool *pool)
 {
 	int cpu = pool->gcwq->cpu;
@@ -520,17 +528,17 @@ static int work_next_color(int color)
 /*
  * While queued, %WORK_STRUCT_CWQ is set and non flag bits of a work's data
  * contain the pointer to the queued cwq.  Once execution starts, the flag
- * is cleared and the high bits contain OFFQ flags and CPU number.
+ * is cleared and the high bits contain OFFQ flags and pool ID.
  *
- * set_work_cwq(), set_work_cpu_and_clear_pending(), mark_work_canceling()
- * and clear_work_data() can be used to set the cwq, cpu or clear
+ * set_work_cwq(), set_work_pool_and_clear_pending(), mark_work_canceling()
+ * and clear_work_data() can be used to set the cwq, pool or clear
  * work->data.  These functions should only be called while the work is
  * owned - ie. while the PENDING bit is set.
  *
- * get_work_[g]cwq() can be used to obtain the gcwq or cwq corresponding to
- * a work.  gcwq is available once the work has been queued anywhere after
- * initialization until it is sync canceled.  cwq is available only while
- * the work item is queued.
+ * get_work_pool() and get_work_cwq() can be used to obtain the pool or cwq
+ * corresponding to a work.  Pool is available once the work has been
+ * queued anywhere after initialization until it is sync canceled.  cwq is
+ * available only while the work item is queued.
  *
  * %WORK_OFFQ_CANCELING is used to mark a work item which is being
  * canceled.  While being canceled, a work item may have its PENDING set
@@ -552,8 +560,8 @@ static void set_work_cwq(struct work_struct *work,
 		      WORK_STRUCT_PENDING | WORK_STRUCT_CWQ | extra_flags);
 }
 
-static void set_work_cpu_and_clear_pending(struct work_struct *work,
-					   unsigned int cpu)
+static void set_work_pool_and_clear_pending(struct work_struct *work,
+					    int pool_id)
 {
 	/*
 	 * The following wmb is paired with the implied mb in
@@ -562,13 +570,13 @@ static void set_work_cpu_and_clear_pending(struct work_struct *work,
 	 * owner.
 	 */
 	smp_wmb();
-	set_work_data(work, (unsigned long)cpu << WORK_OFFQ_CPU_SHIFT, 0);
+	set_work_data(work, (unsigned long)pool_id << WORK_OFFQ_POOL_SHIFT, 0);
 }
 
 static void clear_work_data(struct work_struct *work)
 {
-	smp_wmb();	/* see set_work_cpu_and_clear_pending() */
-	set_work_data(work, WORK_STRUCT_NO_CPU, 0);
+	smp_wmb();	/* see set_work_pool_and_clear_pending() */
+	set_work_data(work, WORK_STRUCT_NO_POOL, 0);
 }
 
 static struct cpu_workqueue_struct *get_work_cwq(struct work_struct *work)
@@ -581,30 +589,58 @@ static struct cpu_workqueue_struct *get_work_cwq(struct work_struct *work)
 		return NULL;
 }
 
-static struct global_cwq *get_work_gcwq(struct work_struct *work)
+/**
+ * get_work_pool - return the worker_pool a given work was associated with
+ * @work: the work item of interest
+ *
+ * Return the worker_pool @work was last associated with.  %NULL if none.
+ */
+static struct worker_pool *get_work_pool(struct work_struct *work)
 {
 	unsigned long data = atomic_long_read(&work->data);
-	unsigned int cpu;
+	struct worker_pool *pool;
+	int pool_id;
 
 	if (data & WORK_STRUCT_CWQ)
 		return ((struct cpu_workqueue_struct *)
-			(data & WORK_STRUCT_WQ_DATA_MASK))->pool->gcwq;
+			(data & WORK_STRUCT_WQ_DATA_MASK))->pool;
 
-	cpu = data >> WORK_OFFQ_CPU_SHIFT;
-	if (cpu == WORK_OFFQ_CPU_NONE)
+	pool_id = data >> WORK_OFFQ_POOL_SHIFT;
+	if (pool_id == WORK_OFFQ_POOL_NONE)
 		return NULL;
 
-	BUG_ON(cpu >= nr_cpu_ids && cpu != WORK_CPU_UNBOUND);
-	return get_gcwq(cpu);
+	pool = worker_pool_by_id(pool_id);
+	WARN_ON_ONCE(!pool);
+	return pool;
+}
+
+/**
+ * get_work_pool_id - return the worker pool ID a given work is associated with
+ * @work: the work item of interest
+ *
+ * Return the worker_pool ID @work was last associated with.
+ * %WORK_OFFQ_POOL_NONE if none.
+ */
+static int get_work_pool_id(struct work_struct *work)
+{
+	struct worker_pool *pool = get_work_pool(work);
+
+	return pool ? pool->id : WORK_OFFQ_POOL_NONE;
+}
+
+static struct global_cwq *get_work_gcwq(struct work_struct *work)
+{
+	struct worker_pool *pool = get_work_pool(work);
+
+	return pool ? pool->gcwq : NULL;
 }
 
 static void mark_work_canceling(struct work_struct *work)
 {
-	struct global_cwq *gcwq = get_work_gcwq(work);
-	unsigned long cpu = gcwq ? gcwq->cpu : WORK_OFFQ_CPU_NONE;
+	unsigned long pool_id = get_work_pool_id(work);
 
-	set_work_data(work, (cpu << WORK_OFFQ_CPU_SHIFT) | WORK_OFFQ_CANCELING,
-		      WORK_STRUCT_PENDING);
+	pool_id <<= WORK_OFFQ_POOL_SHIFT;
+	set_work_data(work, pool_id | WORK_OFFQ_CANCELING, WORK_STRUCT_PENDING);
 }
 
 static bool work_is_canceling(struct work_struct *work)
@@ -2192,12 +2228,12 @@ __acquires(&gcwq->lock)
 		wake_up_worker(pool);
 
 	/*
-	 * Record the last CPU and clear PENDING which should be the last
+	 * Record the last pool and clear PENDING which should be the last
 	 * update to @work.  Also, do this inside @gcwq->lock so that
 	 * PENDING and queued state changes happen together while IRQ is
 	 * disabled.
 	 */
-	set_work_cpu_and_clear_pending(work, gcwq->cpu);
+	set_work_pool_and_clear_pending(work, pool->id);
 
 	spin_unlock_irq(&gcwq->lock);
 
@@ -2967,7 +3003,8 @@ bool cancel_delayed_work(struct delayed_work *dwork)
 	if (unlikely(ret < 0))
 		return false;
 
-	set_work_cpu_and_clear_pending(&dwork->work, work_cpu(&dwork->work));
+	set_work_pool_and_clear_pending(&dwork->work,
+					get_work_pool_id(&dwork->work));
 	local_irq_restore(flags);
 	return ret;
 }
@@ -3430,20 +3467,6 @@ bool workqueue_congested(unsigned int cpu, struct workqueue_struct *wq)
 }
 EXPORT_SYMBOL_GPL(workqueue_congested);
 
-/**
- * work_cpu - return the last known associated cpu for @work
- * @work: the work of interest
- *
- * RETURNS:
- * CPU number if @work was ever queued.  WORK_CPU_NONE otherwise.
- */
-static unsigned int work_cpu(struct work_struct *work)
-{
-	struct global_cwq *gcwq = get_work_gcwq(work);
-
-	return gcwq ? gcwq->cpu : WORK_CPU_NONE;
-}
-
 /**
  * work_busy - test whether a work is currently pending or running
  * @work: the work to be tested
@@ -3816,9 +3839,9 @@ static int __init init_workqueues(void)
 {
 	unsigned int cpu;
 
-	/* make sure we have enough bits for OFFQ CPU number */
-	BUILD_BUG_ON((1LU << (BITS_PER_LONG - WORK_OFFQ_CPU_SHIFT)) <
-		     WORK_CPU_LAST);
+	/* make sure we have enough bits for OFFQ pool ID */
+	BUILD_BUG_ON((1LU << (BITS_PER_LONG - WORK_OFFQ_POOL_SHIFT)) <
+		     WORK_CPU_LAST * NR_STD_WORKER_POOLS);
 
 	cpu_notifier(workqueue_cpu_up_callback, CPU_PRI_WORKQUEUE_UP);
 	hotcpu_notifier(workqueue_cpu_down_callback, CPU_PRI_WORKQUEUE_DOWN);
-- 
cgit v1.2.3


From f2f6c2556dcc432e50003bc8fa4d62d95906f149 Mon Sep 17 00:00:00 2001
From: Prashant Gaikwad <pgaikwad@nvidia.com>
Date: Fri, 4 Jan 2013 12:30:52 +0530
Subject: clk: add common of_clk_init() function

Modify of_clk_init function so that it will determine which
driver to initialize based on device tree instead of each driver
registering to it.

Based on a similar patch for drivers/irqchip by Thomas Petazzoni and
drivers/clocksource by Stephen Warren.

Signed-off-by: Prashant Gaikwad <pgaikwad@nvidia.com>
Tested-by: Tony Prisk <linux@prisktech.co.nz>
Tested-by: Pawel Moll <pawel.moll@arm.com>
Tested-by: Rob Herring <rob.herring@calxeda.com>
Tested-by: Josh Cartwright <josh.cartwright@ni.com>
Reviewed-by: Josh Cartwright <josh.cartwright@ni.com>
Acked-by: Maxime Ripard <maxime.ripard@anandra.org>
Signed-off-by: Mike Turquette <mturquette@linaro.org>
[mturquette@linaro.org: merge conflict from missing CLKSRC_OF_TABLES()]

Signed-off-by: Mike Turquette <mturquette@linaro.org>
---
 drivers/clk/clk-fixed-rate.c      |  1 +
 drivers/clk/clk.c                 |  9 +++++++++
 include/asm-generic/vmlinux.lds.h | 10 ++++++++++
 include/linux/clk-provider.h      |  6 ++++++
 4 files changed, 26 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/clk/clk-fixed-rate.c b/drivers/clk/clk-fixed-rate.c
index a53b53be3d65..dc58fbd8516f 100644
--- a/drivers/clk/clk-fixed-rate.c
+++ b/drivers/clk/clk-fixed-rate.c
@@ -101,4 +101,5 @@ void of_fixed_clk_setup(struct device_node *node)
 		of_clk_add_provider(node, of_clk_src_simple_get, clk);
 }
 EXPORT_SYMBOL_GPL(of_fixed_clk_setup);
+CLK_OF_DECLARE(fixed_clk, "fixed-clock", of_fixed_clk_setup);
 #endif
diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index ad2ac94fede8..fabbfe1a9253 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -18,6 +18,7 @@
 #include <linux/slab.h>
 #include <linux/of.h>
 #include <linux/device.h>
+#include <linux/init.h>
 
 static DEFINE_SPINLOCK(enable_lock);
 static DEFINE_MUTEX(prepare_lock);
@@ -1803,6 +1804,11 @@ struct of_clk_provider {
 	void *data;
 };
 
+extern struct of_device_id __clk_of_table[];
+
+static const struct of_device_id __clk_of_table_sentinel
+	__used __section(__clk_of_table_end);
+
 static LIST_HEAD(of_clk_providers);
 static DEFINE_MUTEX(of_clk_lock);
 
@@ -1931,6 +1937,9 @@ void __init of_clk_init(const struct of_device_id *matches)
 {
 	struct device_node *np;
 
+	if (!matches)
+		matches = __clk_of_table;
+
 	for_each_matching_node(np, matches) {
 		const struct of_device_id *match = of_match_node(matches, np);
 		of_clk_init_cb_t clk_init_cb = match->data;
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index d1ea7ce0b4cb..c1fe60ad1540 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -150,6 +150,15 @@
 #endif
 
 
+#ifdef CONFIG_COMMON_CLK
+#define CLK_OF_TABLES() . = ALIGN(8);				\
+			VMLINUX_SYMBOL(__clk_of_table) = .;	\
+			*(__clk_of_table)			\
+			*(__clk_of_table_end)
+#else
+#define CLK_OF_TABLES()
+#endif
+
 #define KERNEL_DTB()							\
 	STRUCT_ALIGN();							\
 	VMLINUX_SYMBOL(__dtb_start) = .;				\
@@ -493,6 +502,7 @@
 	DEV_DISCARD(init.rodata)					\
 	CPU_DISCARD(init.rodata)					\
 	MEM_DISCARD(init.rodata)					\
+	CLK_OF_TABLES()							\
 	KERNEL_DTB()
 
 #define INIT_TEXT							\
diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index 4989b8a7bed1..7f197d7addb0 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -379,7 +379,13 @@ struct clk_onecell_data {
 };
 struct clk *of_clk_src_onecell_get(struct of_phandle_args *clkspec, void *data);
 const char *of_clk_get_parent_name(struct device_node *np, int index);
+
 void of_clk_init(const struct of_device_id *matches);
 
+#define CLK_OF_DECLARE(name, compat, fn)			\
+	static const struct of_device_id __clk_of_table_##name	\
+		__used __section(__clk_of_table)		\
+		= { .compatible = compat, .data = fn };
+
 #endif /* CONFIG_COMMON_CLK */
 #endif /* CLK_PROVIDER_H */
-- 
cgit v1.2.3


From 85a181986c9cf8bbd2c4f2fb6f2add7ac5db1f76 Mon Sep 17 00:00:00 2001
From: Prashant Gaikwad <pgaikwad@nvidia.com>
Date: Fri, 4 Jan 2013 12:30:54 +0530
Subject: clk: sunxi: Use common of_clk_init() function

Use common of_clk_init() function to initialize clocks.

Signed-off-by: Prashant Gaikwad <pgaikwad@nvidia.com>
Acked-by: Maxime Ripard <maxime.ripard@anandra.org>
Signed-off-by: Mike Turquette <mturquette@linaro.org>
---
 drivers/clk/clk-sunxi.c           | 30 ------------------------------
 drivers/clocksource/sunxi_timer.c |  4 ++--
 include/linux/clk/sunxi.h         | 22 ----------------------
 3 files changed, 2 insertions(+), 54 deletions(-)
 delete mode 100644 drivers/clk/clk-sunxi.c
 delete mode 100644 include/linux/clk/sunxi.h

(limited to 'include/linux')

diff --git a/drivers/clk/clk-sunxi.c b/drivers/clk/clk-sunxi.c
deleted file mode 100644
index 0e831b584ba7..000000000000
--- a/drivers/clk/clk-sunxi.c
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Copyright 2012 Maxime Ripard
- *
- * Maxime Ripard <maxime.ripard@free-electrons.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-
-#include <linux/clk-provider.h>
-#include <linux/clkdev.h>
-#include <linux/clk/sunxi.h>
-#include <linux/of.h>
-
-static const __initconst struct of_device_id clk_match[] = {
-	{ .compatible = "fixed-clock", .data = of_fixed_clk_setup, },
-	{}
-};
-
-void __init sunxi_init_clocks(void)
-{
-	of_clk_init(clk_match);
-}
diff --git a/drivers/clocksource/sunxi_timer.c b/drivers/clocksource/sunxi_timer.c
index 3cd1bd3d7aee..93d09d0e009f 100644
--- a/drivers/clocksource/sunxi_timer.c
+++ b/drivers/clocksource/sunxi_timer.c
@@ -23,7 +23,7 @@
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
 #include <linux/sunxi_timer.h>
-#include <linux/clk/sunxi.h>
+#include <linux/clk-provider.h>
 
 #define TIMER_CTL_REG		0x00
 #define TIMER_CTL_ENABLE		(1 << 0)
@@ -124,7 +124,7 @@ static void __init sunxi_timer_init(void)
 	if (irq <= 0)
 		panic("Can't parse IRQ");
 
-	sunxi_init_clocks();
+	of_clk_init(NULL);
 
 	clk = of_clk_get(node, 0);
 	if (IS_ERR(clk))
diff --git a/include/linux/clk/sunxi.h b/include/linux/clk/sunxi.h
deleted file mode 100644
index e074fdd5a236..000000000000
--- a/include/linux/clk/sunxi.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Copyright 2012 Maxime Ripard
- *
- * Maxime Ripard <maxime.ripard@free-electrons.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-
-#ifndef __LINUX_CLK_SUNXI_H_
-#define __LINUX_CLK_SUNXI_H_
-
-void __init sunxi_init_clocks(void);
-
-#endif
-- 
cgit v1.2.3


From ace783b9bbfa2182b4a561498db3f09a0c56bc79 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizefan@huawei.com>
Date: Thu, 24 Jan 2013 14:30:48 +0800
Subject: sched: split out css_online/css_offline from tg creation/destruction

This is a preparaton for later patches.

- What do we gain from cpu_cgroup_css_online():

After ss->css_alloc() and before ss->css_online(), there's a small
window that tg->css.cgroup is NULL. With this change, tg won't be seen
before ss->css_online(), where it's added to the global list, so we're
guaranteed we'll never see NULL tg->css.cgroup.

- What do we gain from cpu_cgroup_css_offline():

tg is freed via RCU, so is cgroup. Without this change, This is how
synchronization works:

cgroup_rmdir()
  no ss->css_offline()
diput()
  syncornize_rcu()
  ss->css_free()       <-- unregister tg, and free it via call_rcu()
  kfree_rcu(cgroup)    <-- wait possible refs to cgroup, and free cgroup

We can't just kfree(cgroup), because tg might access tg->css.cgroup.

With this change:

cgroup_rmdir()
  ss->css_offline()    <-- unregister tg
diput()
  synchronize_rcu()    <-- wait possible refs to tg and cgroup
  ss->css_free()       <-- free tg
  kfree_rcu(cgroup)    <-- free cgroup

As you see, kfree_rcu() is redundant now.

Signed-off-by: Li Zefan <lizefan@huawei.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h     |  3 +++
 kernel/sched/auto_group.c |  3 +++
 kernel/sched/core.c       | 49 +++++++++++++++++++++++++++++++++++++----------
 3 files changed, 45 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 206bb089c06b..577eb973de7a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2750,7 +2750,10 @@ extern void normalize_rt_tasks(void);
 extern struct task_group root_task_group;
 
 extern struct task_group *sched_create_group(struct task_group *parent);
+extern void sched_online_group(struct task_group *tg,
+			       struct task_group *parent);
 extern void sched_destroy_group(struct task_group *tg);
+extern void sched_offline_group(struct task_group *tg);
 extern void sched_move_task(struct task_struct *tsk);
 #ifdef CONFIG_FAIR_GROUP_SCHED
 extern int sched_group_set_shares(struct task_group *tg, unsigned long shares);
diff --git a/kernel/sched/auto_group.c b/kernel/sched/auto_group.c
index 0984a21076a3..64de5f8b0c9e 100644
--- a/kernel/sched/auto_group.c
+++ b/kernel/sched/auto_group.c
@@ -35,6 +35,7 @@ static inline void autogroup_destroy(struct kref *kref)
 	ag->tg->rt_se = NULL;
 	ag->tg->rt_rq = NULL;
 #endif
+	sched_offline_group(ag->tg);
 	sched_destroy_group(ag->tg);
 }
 
@@ -76,6 +77,8 @@ static inline struct autogroup *autogroup_create(void)
 	if (IS_ERR(tg))
 		goto out_free;
 
+	sched_online_group(tg, &root_task_group);
+
 	kref_init(&ag->kref);
 	init_rwsem(&ag->lock);
 	ag->id = atomic_inc_return(&autogroup_seq_nr);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 257002c13bb0..106167243d68 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7159,7 +7159,6 @@ static void free_sched_group(struct task_group *tg)
 struct task_group *sched_create_group(struct task_group *parent)
 {
 	struct task_group *tg;
-	unsigned long flags;
 
 	tg = kzalloc(sizeof(*tg), GFP_KERNEL);
 	if (!tg)
@@ -7171,6 +7170,17 @@ struct task_group *sched_create_group(struct task_group *parent)
 	if (!alloc_rt_sched_group(tg, parent))
 		goto err;
 
+	return tg;
+
+err:
+	free_sched_group(tg);
+	return ERR_PTR(-ENOMEM);
+}
+
+void sched_online_group(struct task_group *tg, struct task_group *parent)
+{
+	unsigned long flags;
+
 	spin_lock_irqsave(&task_group_lock, flags);
 	list_add_rcu(&tg->list, &task_groups);
 
@@ -7180,12 +7190,6 @@ struct task_group *sched_create_group(struct task_group *parent)
 	INIT_LIST_HEAD(&tg->children);
 	list_add_rcu(&tg->siblings, &parent->children);
 	spin_unlock_irqrestore(&task_group_lock, flags);
-
-	return tg;
-
-err:
-	free_sched_group(tg);
-	return ERR_PTR(-ENOMEM);
 }
 
 /* rcu callback to free various structures associated with a task group */
@@ -7197,6 +7201,12 @@ static void free_sched_group_rcu(struct rcu_head *rhp)
 
 /* Destroy runqueue etc associated with a task group */
 void sched_destroy_group(struct task_group *tg)
+{
+	/* wait for possible concurrent references to cfs_rqs complete */
+	call_rcu(&tg->rcu, free_sched_group_rcu);
+}
+
+void sched_offline_group(struct task_group *tg)
 {
 	unsigned long flags;
 	int i;
@@ -7209,9 +7219,6 @@ void sched_destroy_group(struct task_group *tg)
 	list_del_rcu(&tg->list);
 	list_del_rcu(&tg->siblings);
 	spin_unlock_irqrestore(&task_group_lock, flags);
-
-	/* wait for possible concurrent references to cfs_rqs complete */
-	call_rcu(&tg->rcu, free_sched_group_rcu);
 }
 
 /* change task's runqueue when it moves between groups.
@@ -7563,6 +7570,19 @@ static struct cgroup_subsys_state *cpu_cgroup_css_alloc(struct cgroup *cgrp)
 	return &tg->css;
 }
 
+static int cpu_cgroup_css_online(struct cgroup *cgrp)
+{
+	struct task_group *tg = cgroup_tg(cgrp);
+	struct task_group *parent;
+
+	if (!cgrp->parent)
+		return 0;
+
+	parent = cgroup_tg(cgrp->parent);
+	sched_online_group(tg, parent);
+	return 0;
+}
+
 static void cpu_cgroup_css_free(struct cgroup *cgrp)
 {
 	struct task_group *tg = cgroup_tg(cgrp);
@@ -7570,6 +7590,13 @@ static void cpu_cgroup_css_free(struct cgroup *cgrp)
 	sched_destroy_group(tg);
 }
 
+static void cpu_cgroup_css_offline(struct cgroup *cgrp)
+{
+	struct task_group *tg = cgroup_tg(cgrp);
+
+	sched_offline_group(tg);
+}
+
 static int cpu_cgroup_can_attach(struct cgroup *cgrp,
 				 struct cgroup_taskset *tset)
 {
@@ -7925,6 +7952,8 @@ struct cgroup_subsys cpu_cgroup_subsys = {
 	.name		= "cpu",
 	.css_alloc	= cpu_cgroup_css_alloc,
 	.css_free	= cpu_cgroup_css_free,
+	.css_online	= cpu_cgroup_css_online,
+	.css_offline	= cpu_cgroup_css_offline,
 	.can_attach	= cpu_cgroup_can_attach,
 	.attach		= cpu_cgroup_attach,
 	.exit		= cpu_cgroup_exit,
-- 
cgit v1.2.3


From be44562613851235d801d41d5b3976dc4333f622 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizefan@huawei.com>
Date: Thu, 24 Jan 2013 14:31:42 +0800
Subject: cgroup: remove synchronize_rcu() from cgroup_diput()

Free cgroup via call_rcu(). The actual work is done through
workqueue.

Signed-off-by: Li Zefan <lizefan@huawei.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/cgroup.h |  1 +
 kernel/cgroup.c        | 72 ++++++++++++++++++++++++++++++--------------------
 2 files changed, 44 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 8118a3120378..900af5964f55 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -203,6 +203,7 @@ struct cgroup {
 
 	/* For RCU-protected deletion */
 	struct rcu_head rcu_head;
+	struct work_struct free_work;
 
 	/* List of events which userspace want to receive */
 	struct list_head event_list;
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index af993919aa04..02e4f201472e 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -852,12 +852,52 @@ static struct inode *cgroup_new_inode(umode_t mode, struct super_block *sb)
 	return inode;
 }
 
+static void cgroup_free_fn(struct work_struct *work)
+{
+	struct cgroup *cgrp = container_of(work, struct cgroup, free_work);
+	struct cgroup_subsys *ss;
+
+	mutex_lock(&cgroup_mutex);
+	/*
+	 * Release the subsystem state objects.
+	 */
+	for_each_subsys(cgrp->root, ss)
+		ss->css_free(cgrp);
+
+	cgrp->root->number_of_cgroups--;
+	mutex_unlock(&cgroup_mutex);
+
+	/*
+	 * Drop the active superblock reference that we took when we
+	 * created the cgroup
+	 */
+	deactivate_super(cgrp->root->sb);
+
+	/*
+	 * if we're getting rid of the cgroup, refcount should ensure
+	 * that there are no pidlists left.
+	 */
+	BUG_ON(!list_empty(&cgrp->pidlists));
+
+	simple_xattrs_free(&cgrp->xattrs);
+
+	ida_simple_remove(&cgrp->root->cgroup_ida, cgrp->id);
+	kfree(cgrp);
+}
+
+static void cgroup_free_rcu(struct rcu_head *head)
+{
+	struct cgroup *cgrp = container_of(head, struct cgroup, rcu_head);
+
+	schedule_work(&cgrp->free_work);
+}
+
 static void cgroup_diput(struct dentry *dentry, struct inode *inode)
 {
 	/* is dentry a directory ? if so, kfree() associated cgroup */
 	if (S_ISDIR(inode->i_mode)) {
 		struct cgroup *cgrp = dentry->d_fsdata;
-		struct cgroup_subsys *ss;
+
 		BUG_ON(!(cgroup_is_removed(cgrp)));
 		/* It's possible for external users to be holding css
 		 * reference counts on a cgroup; css_put() needs to
@@ -865,34 +905,7 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode)
 		 * the reference count in order to know if it needs to
 		 * queue the cgroup to be handled by the release
 		 * agent */
-		synchronize_rcu();
-
-		mutex_lock(&cgroup_mutex);
-		/*
-		 * Release the subsystem state objects.
-		 */
-		for_each_subsys(cgrp->root, ss)
-			ss->css_free(cgrp);
-
-		cgrp->root->number_of_cgroups--;
-		mutex_unlock(&cgroup_mutex);
-
-		/*
-		 * Drop the active superblock reference that we took when we
-		 * created the cgroup
-		 */
-		deactivate_super(cgrp->root->sb);
-
-		/*
-		 * if we're getting rid of the cgroup, refcount should ensure
-		 * that there are no pidlists left.
-		 */
-		BUG_ON(!list_empty(&cgrp->pidlists));
-
-		simple_xattrs_free(&cgrp->xattrs);
-
-		ida_simple_remove(&cgrp->root->cgroup_ida, cgrp->id);
-		kfree(cgrp);
+		call_rcu(&cgrp->rcu_head, cgroup_free_rcu);
 	} else {
 		struct cfent *cfe = __d_cfe(dentry);
 		struct cgroup *cgrp = dentry->d_parent->d_fsdata;
@@ -1391,6 +1404,7 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
 	INIT_LIST_HEAD(&cgrp->allcg_node);
 	INIT_LIST_HEAD(&cgrp->release_list);
 	INIT_LIST_HEAD(&cgrp->pidlists);
+	INIT_WORK(&cgrp->free_work, cgroup_free_fn);
 	mutex_init(&cgrp->pidlist_mutex);
 	INIT_LIST_HEAD(&cgrp->event_list);
 	spin_lock_init(&cgrp->event_list_lock);
-- 
cgit v1.2.3


From 57d2aa00dcec67afa52478730f2b524521af14fb Mon Sep 17 00:00:00 2001
From: Ying Xue <ying.xue@windriver.com>
Date: Tue, 17 Jul 2012 15:03:43 +0800
Subject: sched/rt: Avoid updating RT entry timeout twice within one tick
 period

The issue below was found in 2.6.34-rt rather than mainline rt
kernel, but the issue still exists upstream as well.

So please let me describe how it was noticed on 2.6.34-rt:

On this version, each softirq has its own thread, it means there
is at least one RT FIFO task per cpu. The priority of these
tasks is set to 49 by default. If user launches an RT FIFO task
with priority lower than 49 of softirq RT tasks, it's possible
there are two RT FIFO tasks enqueued one cpu runqueue at one
moment. By current strategy of balancing RT tasks, when it comes
to RT tasks, we really need to put them off to a CPU that they
can run on as soon as possible. Even if it means a bit of cache
line flushing, we want RT tasks to be run with the least latency.

When the user RT FIFO task which just launched before is
running, the sched timer tick of the current cpu happens. In this
tick period, the timeout value of the user RT task will be
updated once. Subsequently, we try to wake up one softirq RT
task on its local cpu. As the priority of current user RT task
is lower than the softirq RT task, the current task will be
preempted by the higher priority softirq RT task. Before
preemption, we check to see if current can readily move to a
different cpu. If so, we will reschedule to allow the RT push logic
to try to move current somewhere else. Whenever the woken
softirq RT task runs, it first tries to migrate the user FIFO RT
task over to a cpu that is running a task of lesser priority. If
migration is done, it will send a reschedule request to the found
cpu by IPI interrupt. Once the target cpu responds the IPI
interrupt, it will pick the migrated user RT task to preempt its
current task. When the user RT task is running on the new cpu,
the sched timer tick of the cpu fires. So it will tick the user
RT task again. This also means the RT task timeout value will be
updated again. As the migration may be done in one tick period,
it means the user RT task timeout value will be updated twice
within one tick.

If we set a limit on the amount of cpu time for the user RT task
by setrlimit(RLIMIT_RTTIME), the SIGXCPU signal should be posted
upon reaching the soft limit.

But exactly when the SIGXCPU signal should be sent depends on the
RT task timeout value. In fact the timeout mechanism of sending
the SIGXCPU signal assumes the RT task timeout is increased once
every tick.

However, currently the timeout value may be added twice per
tick. So it results in the SIGXCPU signal being sent earlier
than expected.

To solve this issue, we prevent the timeout value from increasing
twice within one tick time by remembering the jiffies value of
last updating the timeout. As long as the RT task's jiffies is
different with the global jiffies value, we allow its timeout to
be updated.

Signed-off-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Fan Du <fan.du@windriver.com>
Reviewed-by: Yong Zhang <yong.zhang0@gmail.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Cc: <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1342508623-2887-1-git-send-email-ying.xue@windriver.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 1 +
 kernel/sched/rt.c     | 6 +++++-
 2 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index d2112477ff5e..924e42a8df58 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1208,6 +1208,7 @@ struct sched_entity {
 struct sched_rt_entity {
 	struct list_head run_list;
 	unsigned long timeout;
+	unsigned long watchdog_stamp;
 	unsigned int time_slice;
 
 	struct sched_rt_entity *back;
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 29bda5bdf2a5..2f69ca997826 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1988,7 +1988,11 @@ static void watchdog(struct rq *rq, struct task_struct *p)
 	if (soft != RLIM_INFINITY) {
 		unsigned long next;
 
-		p->rt.timeout++;
+		if (p->rt.watchdog_stamp != jiffies) {
+			p->rt.timeout++;
+			p->rt.watchdog_stamp = jiffies;
+		}
+
 		next = DIV_ROUND_UP(min(soft, hard), USEC_PER_SEC/HZ);
 		if (p->rt.timeout > next)
 			p->cputime_expires.sched_exp = p->se.sum_exec_runtime;
-- 
cgit v1.2.3


From 0bb8f3d6ae621945e6fa2102aa894f72b76a023e Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Fri, 25 Jan 2013 21:51:13 +0100
Subject: sysfs: Functions for adding/removing symlinks to/from attribute
 groups

The most convenient way to expose ACPI power resources lists of a
device is to put symbolic links to sysfs directories representing
those resources into special attribute groups in the device's sysfs
directory.  For this purpose, it is necessary to be able to add
symbolic links to attribute groups.

For this reason, add sysfs helper functions for adding/removing
symbolic links to/from attribute groups, sysfs_add_link_to_group()
and sysfs_remove_link_from_group(), respectively.

This change set includes a build fix from David Rientjes.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/sysfs/group.c      | 42 ++++++++++++++++++++++++++++++++++++++++++
 fs/sysfs/symlink.c    | 45 ++++++++++++++++++++++++++++++++-------------
 fs/sysfs/sysfs.h      |  2 ++
 include/linux/sysfs.h | 16 ++++++++++++++++
 4 files changed, 92 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index 2df555c66d57..aec3d5c98c94 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -205,6 +205,48 @@ void sysfs_unmerge_group(struct kobject *kobj,
 }
 EXPORT_SYMBOL_GPL(sysfs_unmerge_group);
 
+/**
+ * sysfs_add_link_to_group - add a symlink to an attribute group.
+ * @kobj:	The kobject containing the group.
+ * @group_name:	The name of the group.
+ * @target:	The target kobject of the symlink to create.
+ * @link_name:	The name of the symlink to create.
+ */
+int sysfs_add_link_to_group(struct kobject *kobj, const char *group_name,
+			    struct kobject *target, const char *link_name)
+{
+	struct sysfs_dirent *dir_sd;
+	int error = 0;
+
+	dir_sd = sysfs_get_dirent(kobj->sd, NULL, group_name);
+	if (!dir_sd)
+		return -ENOENT;
+
+	error = sysfs_create_link_sd(dir_sd, target, link_name);
+	sysfs_put(dir_sd);
+
+	return error;
+}
+EXPORT_SYMBOL_GPL(sysfs_add_link_to_group);
+
+/**
+ * sysfs_remove_link_from_group - remove a symlink from an attribute group.
+ * @kobj:	The kobject containing the group.
+ * @group_name:	The name of the group.
+ * @link_name:	The name of the symlink to remove.
+ */
+void sysfs_remove_link_from_group(struct kobject *kobj, const char *group_name,
+				  const char *link_name)
+{
+	struct sysfs_dirent *dir_sd;
+
+	dir_sd = sysfs_get_dirent(kobj->sd, NULL, group_name);
+	if (dir_sd) {
+		sysfs_hash_and_remove(dir_sd, NULL, link_name);
+		sysfs_put(dir_sd);
+	}
+}
+EXPORT_SYMBOL_GPL(sysfs_remove_link_from_group);
 
 EXPORT_SYMBOL_GPL(sysfs_create_group);
 EXPORT_SYMBOL_GPL(sysfs_update_group);
diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c
index 3c9eb5624f5e..8c940df97a52 100644
--- a/fs/sysfs/symlink.c
+++ b/fs/sysfs/symlink.c
@@ -21,26 +21,17 @@
 
 #include "sysfs.h"
 
-static int sysfs_do_create_link(struct kobject *kobj, struct kobject *target,
-				const char *name, int warn)
+static int sysfs_do_create_link_sd(struct sysfs_dirent *parent_sd,
+				   struct kobject *target,
+				   const char *name, int warn)
 {
-	struct sysfs_dirent *parent_sd = NULL;
 	struct sysfs_dirent *target_sd = NULL;
 	struct sysfs_dirent *sd = NULL;
 	struct sysfs_addrm_cxt acxt;
 	enum kobj_ns_type ns_type;
 	int error;
 
-	BUG_ON(!name);
-
-	if (!kobj)
-		parent_sd = &sysfs_root;
-	else
-		parent_sd = kobj->sd;
-
-	error = -EFAULT;
-	if (!parent_sd)
-		goto out_put;
+	BUG_ON(!name || !parent_sd);
 
 	/* target->sd can go away beneath us but is protected with
 	 * sysfs_assoc_lock.  Fetch target_sd from it.
@@ -95,6 +86,34 @@ static int sysfs_do_create_link(struct kobject *kobj, struct kobject *target,
 	return error;
 }
 
+/**
+ *	sysfs_create_link_sd - create symlink to a given object.
+ *	@sd:		directory we're creating the link in.
+ *	@target:	object we're pointing to.
+ *	@name:		name of the symlink.
+ */
+int sysfs_create_link_sd(struct sysfs_dirent *sd, struct kobject *target,
+			 const char *name)
+{
+	return sysfs_do_create_link_sd(sd, target, name, 1);
+}
+
+static int sysfs_do_create_link(struct kobject *kobj, struct kobject *target,
+				const char *name, int warn)
+{
+	struct sysfs_dirent *parent_sd = NULL;
+
+	if (!kobj)
+		parent_sd = &sysfs_root;
+	else
+		parent_sd = kobj->sd;
+
+	if (!parent_sd)
+		return -EFAULT;
+
+	return sysfs_do_create_link_sd(parent_sd, target, name, warn);
+}
+
 /**
  *	sysfs_create_link - create symlink between two objects.
  *	@kobj:	object whose directory we're creating the link in.
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index d73c0932bbd6..d1e4043eb0c3 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -240,3 +240,5 @@ void unmap_bin_file(struct sysfs_dirent *attr_sd);
  * symlink.c
  */
 extern const struct inode_operations sysfs_symlink_inode_operations;
+int sysfs_create_link_sd(struct sysfs_dirent *sd, struct kobject *target,
+			 const char *name);
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index 381f06db2fe5..e2cee22f578a 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -181,6 +181,10 @@ int sysfs_merge_group(struct kobject *kobj,
 		       const struct attribute_group *grp);
 void sysfs_unmerge_group(struct kobject *kobj,
 		       const struct attribute_group *grp);
+int sysfs_add_link_to_group(struct kobject *kobj, const char *group_name,
+			    struct kobject *target, const char *link_name);
+void sysfs_remove_link_from_group(struct kobject *kobj, const char *group_name,
+				  const char *link_name);
 
 void sysfs_notify(struct kobject *kobj, const char *dir, const char *attr);
 void sysfs_notify_dirent(struct sysfs_dirent *sd);
@@ -326,6 +330,18 @@ static inline void sysfs_unmerge_group(struct kobject *kobj,
 {
 }
 
+static inline int sysfs_add_link_to_group(struct kobject *kobj,
+		const char *group_name, struct kobject *target,
+		const char *link_name)
+{
+	return 0;
+}
+
+static inline void sysfs_remove_link_from_group(struct kobject *kobj,
+		const char *group_name, const char *link_name)
+{
+}
+
 static inline void sysfs_notify(struct kobject *kobj, const char *dir,
 				const char *attr)
 {
-- 
cgit v1.2.3


From fbadc58dd3a52c330c8f3926aa93011bf9d91fa0 Mon Sep 17 00:00:00 2001
From: ShuoX Liu <shuox.liu@intel.com>
Date: Wed, 23 Jan 2013 21:49:37 +0100
Subject: PM / Runtime: Add new helper function: pm_runtime_active()

This boolean function simply returns whether or not the runtime
status of the device is 'active'. The typical scenario is driver
calls pm_runtime_get firstly, then check pm_runtime_active in
atomic environment.

Also add entry to Documentation/power/runtime.txt

Signed-off-by: Yanmin Zhang <yanmin.zhang@intel.com>
Signed-off-by: ShuoX Liu <shuox.liu@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/power/runtime_pm.txt | 4 ++++
 include/linux/pm_runtime.h         | 7 +++++++
 2 files changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/power/runtime_pm.txt b/Documentation/power/runtime_pm.txt
index 03591a750f99..6c9f5d9aa115 100644
--- a/Documentation/power/runtime_pm.txt
+++ b/Documentation/power/runtime_pm.txt
@@ -426,6 +426,10 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h:
       'power.runtime_error' is set or 'power.disable_depth' is greater than
       zero)
 
+  bool pm_runtime_active(struct device *dev);
+    - return true if the device's runtime PM status is 'active' or its
+      'power.disable_depth' field is not equal to zero, or false otherwise
+
   bool pm_runtime_suspended(struct device *dev);
     - return true if the device's runtime PM status is 'suspended' and its
       'power.disable_depth' field is equal to zero, or false otherwise
diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index f271860c78d5..c785c215abfc 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -80,6 +80,12 @@ static inline bool pm_runtime_suspended(struct device *dev)
 		&& !dev->power.disable_depth;
 }
 
+static inline bool pm_runtime_active(struct device *dev)
+{
+	return dev->power.runtime_status == RPM_ACTIVE
+		|| dev->power.disable_depth;
+}
+
 static inline bool pm_runtime_status_suspended(struct device *dev)
 {
 	return dev->power.runtime_status == RPM_SUSPENDED;
@@ -132,6 +138,7 @@ static inline void pm_runtime_put_noidle(struct device *dev) {}
 static inline bool device_run_wake(struct device *dev) { return false; }
 static inline void device_set_run_wake(struct device *dev, bool enable) {}
 static inline bool pm_runtime_suspended(struct device *dev) { return false; }
+static inline bool pm_runtime_active(struct device *dev) { return true; }
 static inline bool pm_runtime_status_suspended(struct device *dev) { return false; }
 static inline bool pm_runtime_enabled(struct device *dev) { return false; }
 
-- 
cgit v1.2.3


From c25fb816298138a4b12c606f0aaa018bdd3cc09c Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Wed, 9 Jan 2013 08:12:46 -0800
Subject: hwmon: Retire SENSORS_LIMIT

SENSORS_LIMIT and clamp_val have the same functionality, so retire SENSORS_LIMIT
as it is no longer needed.

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Acked-by: Jean Delvare <khali@linux-fr.org>
---
 Documentation/hwmon/sysfs-interface |  8 ++++----
 include/linux/hwmon.h               | 12 ------------
 2 files changed, 4 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/hwmon/sysfs-interface b/Documentation/hwmon/sysfs-interface
index 1f4dd855a299..79f8257dd790 100644
--- a/Documentation/hwmon/sysfs-interface
+++ b/Documentation/hwmon/sysfs-interface
@@ -722,14 +722,14 @@ add/subtract if it has been divided before the add/subtract.
 What to do if a value is found to be invalid, depends on the type of the
 sysfs attribute that is being set. If it is a continuous setting like a
 tempX_max or inX_max attribute, then the value should be clamped to its
-limits using SENSORS_LIMIT(value, min_limit, max_limit). If it is not
-continuous like for example a tempX_type, then when an invalid value is
-written, -EINVAL should be returned.
+limits using clamp_val(value, min_limit, max_limit). If it is not continuous
+like for example a tempX_type, then when an invalid value is written,
+-EINVAL should be returned.
 
 Example1, temp1_max, register is a signed 8 bit value (-128 - 127 degrees):
 
 	long v = simple_strtol(buf, NULL, 10) / 1000;
-	v = SENSORS_LIMIT(v, -128, 127);
+	v = clamp_val(v, -128, 127);
 	/* write v to register */
 
 Example2, fan divider setting, valid values 2, 4 and 8:
diff --git a/include/linux/hwmon.h b/include/linux/hwmon.h
index 82b29ae6ebb0..b2514f70d591 100644
--- a/include/linux/hwmon.h
+++ b/include/linux/hwmon.h
@@ -20,16 +20,4 @@ struct device *hwmon_device_register(struct device *dev);
 
 void hwmon_device_unregister(struct device *dev);
 
-/* Scale user input to sensible values */
-static inline int SENSORS_LIMIT(long value, long low, long high)
-{
-	if (value < low)
-		return low;
-	else if (value > high)
-		return high;
-	else
-		return value;
-}
-
 #endif
-
-- 
cgit v1.2.3


From 3343b7a6d2cd0a980d0c4a0ce02ef48b6bfcc12a Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Tue, 22 Jan 2013 12:26:27 +0200
Subject: spi/pxa2xx: convert to the common clk framework

Convert clk_enable() to clk_prepare_enable() and clk_disable() to
clk_disable_unprepare() respectively in order to support the common clk
framework. Otherwise we get warnings on the console as the clock is not
prepared before it is enabled.

In addition we must cache the maximum clock rate to drv_data->max_clk_rate
at probe time because clk_get_rate() cannot be called in tasklet context.

Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 drivers/spi/spi-pxa2xx.c       | 33 ++++++++++++++++++++-------------
 include/linux/spi/pxa2xx_spi.h | 18 ------------------
 2 files changed, 20 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c
index a241891355db..304cf6eb50e6 100644
--- a/drivers/spi/spi-pxa2xx.c
+++ b/drivers/spi/spi-pxa2xx.c
@@ -30,6 +30,7 @@
 #include <linux/delay.h>
 #include <linux/gpio.h>
 #include <linux/slab.h>
+#include <linux/clk.h>
 
 #include <asm/io.h>
 #include <asm/irq.h>
@@ -114,6 +115,9 @@ struct driver_data {
 	u32 clear_sr;
 	u32 mask_sr;
 
+	/* Maximun clock rate */
+	unsigned long max_clk_rate;
+
 	/* Message Transfer pump */
 	struct tasklet_struct pump_transfers;
 
@@ -891,9 +895,12 @@ static int set_dma_burst_and_threshold(struct chip_data *chip,
 	return retval;
 }
 
-static unsigned int ssp_get_clk_div(struct ssp_device *ssp, int rate)
+static unsigned int ssp_get_clk_div(struct driver_data *drv_data, int rate)
 {
-	unsigned long ssp_clk = clk_get_rate(ssp->clk);
+	unsigned long ssp_clk = drv_data->max_clk_rate;
+	const struct ssp_device *ssp = drv_data->ssp;
+
+	rate = min_t(int, ssp_clk, rate);
 
 	if (ssp->type == PXA25x_SSP || ssp->type == CE4100_SSP)
 		return ((ssp_clk / (2 * rate) - 1) & 0xff) << 8;
@@ -908,7 +915,6 @@ static void pump_transfers(unsigned long data)
 	struct spi_transfer *transfer = NULL;
 	struct spi_transfer *previous = NULL;
 	struct chip_data *chip = NULL;
-	struct ssp_device *ssp = drv_data->ssp;
 	void __iomem *reg = drv_data->ioaddr;
 	u32 clk_div = 0;
 	u8 bits = 0;
@@ -1005,7 +1011,7 @@ static void pump_transfers(unsigned long data)
 		if (transfer->bits_per_word)
 			bits = transfer->bits_per_word;
 
-		clk_div = ssp_get_clk_div(ssp, speed);
+		clk_div = ssp_get_clk_div(drv_data, speed);
 
 		if (bits <= 8) {
 			drv_data->n_bytes = 1;
@@ -1214,7 +1220,6 @@ static int setup(struct spi_device *spi)
 	struct pxa2xx_spi_chip *chip_info = NULL;
 	struct chip_data *chip;
 	struct driver_data *drv_data = spi_master_get_devdata(spi->master);
-	struct ssp_device *ssp = drv_data->ssp;
 	unsigned int clk_div;
 	uint tx_thres = TX_THRESH_DFLT;
 	uint rx_thres = RX_THRESH_DFLT;
@@ -1296,7 +1301,7 @@ static int setup(struct spi_device *spi)
 		}
 	}
 
-	clk_div = ssp_get_clk_div(ssp, spi->max_speed_hz);
+	clk_div = ssp_get_clk_div(drv_data, spi->max_speed_hz);
 	chip->speed_hz = spi->max_speed_hz;
 
 	chip->cr0 = clk_div
@@ -1312,12 +1317,12 @@ static int setup(struct spi_device *spi)
 	/* NOTE:  PXA25x_SSP _could_ use external clocking ... */
 	if (!pxa25x_ssp_comp(drv_data))
 		dev_dbg(&spi->dev, "%ld Hz actual, %s\n",
-			clk_get_rate(ssp->clk)
+			drv_data->max_clk_rate
 				/ (1 + ((chip->cr0 & SSCR0_SCR(0xfff)) >> 8)),
 			chip->enable_dma ? "DMA" : "PIO");
 	else
 		dev_dbg(&spi->dev, "%ld Hz actual, %s\n",
-			clk_get_rate(ssp->clk) / 2
+			drv_data->max_clk_rate / 2
 				/ (1 + ((chip->cr0 & SSCR0_SCR(0x0ff)) >> 8)),
 			chip->enable_dma ? "DMA" : "PIO");
 
@@ -1470,7 +1475,9 @@ static int pxa2xx_spi_probe(struct platform_device *pdev)
 	}
 
 	/* Enable SOC clock */
-	clk_enable(ssp->clk);
+	clk_prepare_enable(ssp->clk);
+
+	drv_data->max_clk_rate = clk_get_rate(ssp->clk);
 
 	/* Load default SSP configuration */
 	write_SSCR0(0, drv_data->ioaddr);
@@ -1499,7 +1506,7 @@ static int pxa2xx_spi_probe(struct platform_device *pdev)
 	return status;
 
 out_error_clock_enabled:
-	clk_disable(ssp->clk);
+	clk_disable_unprepare(ssp->clk);
 
 out_error_dma_alloc:
 	if (drv_data->tx_channel != -1)
@@ -1527,7 +1534,7 @@ static int pxa2xx_spi_remove(struct platform_device *pdev)
 
 	/* Disable the SSP at the peripheral and SOC level */
 	write_SSCR0(0, drv_data->ioaddr);
-	clk_disable(ssp->clk);
+	clk_disable_unprepare(ssp->clk);
 
 	/* Release DMA */
 	if (drv_data->master_info->enable_dma) {
@@ -1571,7 +1578,7 @@ static int pxa2xx_spi_suspend(struct device *dev)
 	if (status != 0)
 		return status;
 	write_SSCR0(0, drv_data->ioaddr);
-	clk_disable(ssp->clk);
+	clk_disable_unprepare(ssp->clk);
 
 	return 0;
 }
@@ -1590,7 +1597,7 @@ static int pxa2xx_spi_resume(struct device *dev)
 			DRCMR_MAPVLD | drv_data->tx_channel;
 
 	/* Enable the SSP clock */
-	clk_enable(ssp->clk);
+	clk_prepare_enable(ssp->clk);
 
 	/* Start the queue running */
 	status = spi_master_resume(drv_data->master);
diff --git a/include/linux/spi/pxa2xx_spi.h b/include/linux/spi/pxa2xx_spi.h
index 6b99f09b717d..053b5ba51b25 100644
--- a/include/linux/spi/pxa2xx_spi.h
+++ b/include/linux/spi/pxa2xx_spi.h
@@ -133,23 +133,5 @@ static inline void pxa_free_dma(int dma_ch)
 {
 }
 
-/*
- * The CE4100 does not have the clk framework implemented and SPI clock can
- * not be switched on/off or the divider changed.
- */
-static inline void clk_disable(struct clk *clk)
-{
-}
-
-static inline int clk_enable(struct clk *clk)
-{
-	return 0;
-}
-
-static inline unsigned long clk_get_rate(struct clk *clk)
-{
-	return 3686400;
-}
-
 #endif
 #endif
-- 
cgit v1.2.3


From 5cbc7ca987fb3f293203dc14a6c53b91b7c978a5 Mon Sep 17 00:00:00 2001
From: Matthias Brugger <matthias.bgg@gmail.com>
Date: Thu, 24 Jan 2013 13:40:41 +0100
Subject: spi: spi-omap2-mcspi.c: Toggle CS after each word

This patch allows the board code to define SPI devices which needs to
toggle the chip select after every word send. This is needed to get a
better resolution reading e.g. an ADC data stream.
Apart from that, as in the normal code CS is controlled by software,
a transfer is done much faster.

Signed-off-by: Matthias Brugger <matthias.bgg@gmail.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 drivers/spi/spi-omap2-mcspi.c                 | 18 ++++++++++++++++++
 include/linux/platform_data/spi-omap2-mcspi.h |  3 +++
 2 files changed, 21 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/spi/spi-omap2-mcspi.c b/drivers/spi/spi-omap2-mcspi.c
index 31f6e842ea86..12789fcfa980 100644
--- a/drivers/spi/spi-omap2-mcspi.c
+++ b/drivers/spi/spi-omap2-mcspi.c
@@ -927,6 +927,7 @@ static void omap2_mcspi_work(struct omap2_mcspi *mcspi, struct spi_message *m)
 
 	struct spi_device		*spi;
 	struct spi_transfer		*t = NULL;
+	struct spi_master		*master;
 	int				cs_active = 0;
 	struct omap2_mcspi_cs		*cs;
 	struct omap2_mcspi_device_config *cd;
@@ -935,6 +936,7 @@ static void omap2_mcspi_work(struct omap2_mcspi *mcspi, struct spi_message *m)
 	u32				chconf;
 
 	spi = m->spi;
+	master = spi->master;
 	cs = spi->controller_state;
 	cd = spi->controller_data;
 
@@ -952,6 +954,14 @@ static void omap2_mcspi_work(struct omap2_mcspi *mcspi, struct spi_message *m)
 			if (!t->speed_hz && !t->bits_per_word)
 				par_override = 0;
 		}
+		if (cd && cd->cs_per_word) {
+			chconf = mcspi->ctx.modulctrl;
+			chconf &= ~OMAP2_MCSPI_MODULCTRL_SINGLE;
+			mcspi_write_reg(master, OMAP2_MCSPI_MODULCTRL, chconf);
+			mcspi->ctx.modulctrl =
+				mcspi_read_cs_reg(spi, OMAP2_MCSPI_MODULCTRL);
+		}
+
 
 		if (!cs_active) {
 			omap2_mcspi_force_cs(spi, 1);
@@ -1013,6 +1023,14 @@ static void omap2_mcspi_work(struct omap2_mcspi *mcspi, struct spi_message *m)
 	if (cs_active)
 		omap2_mcspi_force_cs(spi, 0);
 
+	if (cd && cd->cs_per_word) {
+		chconf = mcspi->ctx.modulctrl;
+		chconf |= OMAP2_MCSPI_MODULCTRL_SINGLE;
+		mcspi_write_reg(master, OMAP2_MCSPI_MODULCTRL, chconf);
+		mcspi->ctx.modulctrl =
+			mcspi_read_cs_reg(spi, OMAP2_MCSPI_MODULCTRL);
+	}
+
 	omap2_mcspi_set_enable(spi, 0);
 
 	m->status = status;
diff --git a/include/linux/platform_data/spi-omap2-mcspi.h b/include/linux/platform_data/spi-omap2-mcspi.h
index a65572d53211..c100456eab17 100644
--- a/include/linux/platform_data/spi-omap2-mcspi.h
+++ b/include/linux/platform_data/spi-omap2-mcspi.h
@@ -22,6 +22,9 @@ struct omap2_mcspi_dev_attr {
 
 struct omap2_mcspi_device_config {
 	unsigned turbo_mode:1;
+
+	/* toggle chip select after every word */
+	unsigned cs_per_word:1;
 };
 
 #endif
-- 
cgit v1.2.3


From 996a953de02ffb852c9ac736f4e892008ed68884 Mon Sep 17 00:00:00 2001
From: Fabio Baltieri <fabio.baltieri@gmail.com>
Date: Tue, 18 Dec 2012 18:50:55 +0100
Subject: can: add tx/rx LED trigger support

This patch implements the functions to add two LED triggers, named
<ifname>-tx and <ifname>-rx, to a canbus device driver.

Triggers are called from specific handlers by each CAN device driver and
can be disabled altogether with a Kconfig option.

The implementation keeps the LED on when the interface is UP and blinks
the LED on network activity at a configurable rate.

This only supports can-dev based drivers, as it uses some support field
in the can_priv structure.

Supported drivers should call devm_can_led_init() and can_led_event() as
needed.

Cleanup is handled automatically by devres, so no *_exit function is
needed.

Supported events are:
- CAN_LED_EVENT_OPEN: turn on tx/rx LEDs
- CAN_LED_EVENT_STOP: turn off tx/rx LEDs
- CAN_LED_EVENT_TX: trigger tx LED blink
- CAN_LED_EVENT_RX: trigger tx LED blink

Cc: Wolfgang Grandegger <wg@grandegger.com>
Cc: Marc Kleine-Budde <mkl@pengutronix.de>
Signed-off-by: Fabio Baltieri <fabio.baltieri@gmail.com>
Acked-by: Oliver Hartkopp <socketcan@hartkopp.net>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/Kconfig  | 11 +++++++
 drivers/net/can/Makefile |  2 ++
 drivers/net/can/led.c    | 86 ++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/can/dev.h  |  8 +++++
 include/linux/can/led.h  | 42 +++++++++++++++++++++++
 5 files changed, 149 insertions(+)
 create mode 100644 drivers/net/can/led.c
 create mode 100644 include/linux/can/led.h

(limited to 'include/linux')

diff --git a/drivers/net/can/Kconfig b/drivers/net/can/Kconfig
index 0c5a65682d01..1cca19f1c490 100644
--- a/drivers/net/can/Kconfig
+++ b/drivers/net/can/Kconfig
@@ -51,6 +51,17 @@ config CAN_CALC_BITTIMING
 	  arguments "tq", "prop_seg", "phase_seg1", "phase_seg2" and "sjw".
 	  If unsure, say Y.
 
+config CAN_LEDS
+	bool "Enable LED triggers for Netlink based drivers"
+	depends on LEDS_CLASS
+	select LEDS_TRIGGERS
+	---help---
+	  This option adds two LED triggers for packet receive and transmit
+	  events on each supported CAN device.
+
+	  Say Y here if you are working on a system with led-class supported
+	  LEDs and you want to use them as canbus activity indicators.
+
 config CAN_AT91
 	tristate "Atmel AT91 onchip CAN controller"
 	depends on ARCH_AT91SAM9263 || ARCH_AT91SAM9X5
diff --git a/drivers/net/can/Makefile b/drivers/net/can/Makefile
index 7de59862bbe9..c7440392adbb 100644
--- a/drivers/net/can/Makefile
+++ b/drivers/net/can/Makefile
@@ -8,6 +8,8 @@ obj-$(CONFIG_CAN_SLCAN)		+= slcan.o
 obj-$(CONFIG_CAN_DEV)		+= can-dev.o
 can-dev-y			:= dev.o
 
+can-dev-$(CONFIG_CAN_LEDS)	+= led.o
+
 obj-y				+= usb/
 obj-y				+= softing/
 
diff --git a/drivers/net/can/led.c b/drivers/net/can/led.c
new file mode 100644
index 000000000000..c50a0d741c57
--- /dev/null
+++ b/drivers/net/can/led.c
@@ -0,0 +1,86 @@
+/*
+ * Copyright 2012, Fabio Baltieri <fabio.baltieri@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/netdevice.h>
+#include <linux/can/dev.h>
+
+#include <linux/can/led.h>
+
+static unsigned long led_delay = 50;
+module_param(led_delay, ulong, 0644);
+MODULE_PARM_DESC(led_delay,
+		"blink delay time for activity leds (msecs, default: 50).");
+
+/* Trigger a LED event in response to a CAN device event */
+void can_led_event(struct net_device *netdev, enum can_led_event event)
+{
+	struct can_priv *priv = netdev_priv(netdev);
+
+	switch (event) {
+	case CAN_LED_EVENT_OPEN:
+		led_trigger_event(priv->tx_led_trig, LED_FULL);
+		led_trigger_event(priv->rx_led_trig, LED_FULL);
+		break;
+	case CAN_LED_EVENT_STOP:
+		led_trigger_event(priv->tx_led_trig, LED_OFF);
+		led_trigger_event(priv->rx_led_trig, LED_OFF);
+		break;
+	case CAN_LED_EVENT_TX:
+		if (led_delay)
+			led_trigger_blink_oneshot(priv->tx_led_trig,
+						  &led_delay, &led_delay, 1);
+		break;
+	case CAN_LED_EVENT_RX:
+		if (led_delay)
+			led_trigger_blink_oneshot(priv->rx_led_trig,
+						  &led_delay, &led_delay, 1);
+		break;
+	}
+}
+EXPORT_SYMBOL_GPL(can_led_event);
+
+static void can_led_release(struct device *gendev, void *res)
+{
+	struct can_priv *priv = netdev_priv(to_net_dev(gendev));
+
+	led_trigger_unregister_simple(priv->tx_led_trig);
+	led_trigger_unregister_simple(priv->rx_led_trig);
+}
+
+/* Register CAN LED triggers for a CAN device
+ *
+ * This is normally called from a driver's probe function
+ */
+void devm_can_led_init(struct net_device *netdev)
+{
+	struct can_priv *priv = netdev_priv(netdev);
+	void *res;
+
+	res = devres_alloc(can_led_release, 0, GFP_KERNEL);
+	if (!res) {
+		netdev_err(netdev, "cannot register LED triggers\n");
+		return;
+	}
+
+	snprintf(priv->tx_led_trig_name, sizeof(priv->tx_led_trig_name),
+		 "%s-tx", netdev->name);
+	snprintf(priv->rx_led_trig_name, sizeof(priv->rx_led_trig_name),
+		 "%s-rx", netdev->name);
+
+	led_trigger_register_simple(priv->tx_led_trig_name,
+				    &priv->tx_led_trig);
+	led_trigger_register_simple(priv->rx_led_trig_name,
+				    &priv->rx_led_trig);
+
+	devres_add(&netdev->dev, res);
+}
+EXPORT_SYMBOL_GPL(devm_can_led_init);
diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h
index 2b2fc345afca..7747d9bcdc84 100644
--- a/include/linux/can/dev.h
+++ b/include/linux/can/dev.h
@@ -16,6 +16,7 @@
 #include <linux/can.h>
 #include <linux/can/netlink.h>
 #include <linux/can/error.h>
+#include <linux/can/led.h>
 
 /*
  * CAN mode
@@ -52,6 +53,13 @@ struct can_priv {
 
 	unsigned int echo_skb_max;
 	struct sk_buff **echo_skb;
+
+#ifdef CONFIG_CAN_LEDS
+	struct led_trigger *tx_led_trig;
+	char tx_led_trig_name[CAN_LED_NAME_SZ];
+	struct led_trigger *rx_led_trig;
+	char rx_led_trig_name[CAN_LED_NAME_SZ];
+#endif
 };
 
 /*
diff --git a/include/linux/can/led.h b/include/linux/can/led.h
new file mode 100644
index 000000000000..12d5549abb95
--- /dev/null
+++ b/include/linux/can/led.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright 2012, Fabio Baltieri <fabio.baltieri@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef CAN_LED_H
+#define CAN_LED_H
+
+#include <linux/if.h>
+#include <linux/leds.h>
+
+enum can_led_event {
+	CAN_LED_EVENT_OPEN,
+	CAN_LED_EVENT_STOP,
+	CAN_LED_EVENT_TX,
+	CAN_LED_EVENT_RX,
+};
+
+#ifdef CONFIG_CAN_LEDS
+
+/* keep space for interface name + "-tx"/"-rx" suffix and null terminator */
+#define CAN_LED_NAME_SZ (IFNAMSIZ + 4)
+
+void can_led_event(struct net_device *netdev, enum can_led_event event);
+void devm_can_led_init(struct net_device *netdev);
+
+#else
+
+static inline void can_led_event(struct net_device *netdev,
+				 enum can_led_event event)
+{
+}
+static inline void devm_can_led_init(struct net_device *netdev)
+{
+}
+
+#endif
+
+#endif
-- 
cgit v1.2.3


From bf03a5379cd3492fbeca42111340581ba9dee0b8 Mon Sep 17 00:00:00 2001
From: Kurt Van Dijck <kurt.van.dijck@eia.be>
Date: Tue, 18 Dec 2012 18:50:56 +0100
Subject: can: export a safe netdev_priv wrapper for candev

In net_device notifier calls, it was impossible to determine
if a CAN device is based on candev in a safe way.
This patch adds such test in order to access candev storage
from within those notifiers.

Signed-off-by: Kurt Van Dijck <kurt.van.dijck@eia.be>
Acked-by: Oliver Hartkopp <socketcan@hartkopp.net>
Signed-off-by: Fabio Baltieri <fabio.baltieri@gmail.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/dev.c   | 13 +++++++++++++
 include/linux/can/dev.h |  3 +++
 2 files changed, 16 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c
index 8233e5ed2939..13e738098fbe 100644
--- a/drivers/net/can/dev.c
+++ b/drivers/net/can/dev.c
@@ -794,6 +794,19 @@ void unregister_candev(struct net_device *dev)
 }
 EXPORT_SYMBOL_GPL(unregister_candev);
 
+/*
+ * Test if a network device is a candev based device
+ * and return the can_priv* if so.
+ */
+struct can_priv *safe_candev_priv(struct net_device *dev)
+{
+	if ((dev->type != ARPHRD_CAN) || (dev->rtnl_link_ops != &can_link_ops))
+		return NULL;
+
+	return netdev_priv(dev);
+}
+EXPORT_SYMBOL_GPL(safe_candev_priv);
+
 static __init int can_dev_init(void)
 {
 	int err;
diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h
index 7747d9bcdc84..fb0ab651a041 100644
--- a/include/linux/can/dev.h
+++ b/include/linux/can/dev.h
@@ -106,6 +106,9 @@ u8 can_len2dlc(u8 len);
 struct net_device *alloc_candev(int sizeof_priv, unsigned int echo_skb_max);
 void free_candev(struct net_device *dev);
 
+/* a candev safe wrapper around netdev_priv */
+struct can_priv *safe_candev_priv(struct net_device *dev);
+
 int open_candev(struct net_device *dev);
 void close_candev(struct net_device *dev);
 
-- 
cgit v1.2.3


From a1ef7bd9fce8aba8e4701e60208148fb3bc9bdd4 Mon Sep 17 00:00:00 2001
From: Kurt Van Dijck <kurt.van.dijck@eia.be>
Date: Tue, 18 Dec 2012 18:50:57 +0100
Subject: can: rename LED trigger name on netdev renames

The LED trigger name for CAN devices is based on the initial
CAN device name, but does never change. The LED trigger name
is not guaranteed to be unique in case of hotplugging CAN devices.

This patch tries to address this problem by modifying the
LED trigger name according to the CAN device name when
the latter changes.

v1 - Kurt Van Dijck
v2 - Fabio Baltieri
- remove rename blocking if trigger is bound
- use led-subsystem function for the actual rename (still WiP)
- call init/exit functions from dev.c
v3 - Kurt Van Dijck
- safe operation for non-candev based devices (vcan, slcan)
	based on earlier patch
v4 - Kurt Van Dijck
- trivial patch mistakes fixed

Signed-off-by: Kurt Van Dijck <kurt.van.dijck@eia.be>
Signed-off-by: Fabio Baltieri <fabio.baltieri@gmail.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/dev.c   |  5 +++++
 drivers/net/can/led.c   | 38 ++++++++++++++++++++++++++++++++++++++
 include/linux/can/led.h |  9 +++++++++
 3 files changed, 52 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c
index 13e738098fbe..6abc6e59778e 100644
--- a/drivers/net/can/dev.c
+++ b/drivers/net/can/dev.c
@@ -25,6 +25,7 @@
 #include <linux/can.h>
 #include <linux/can/dev.h>
 #include <linux/can/netlink.h>
+#include <linux/can/led.h>
 #include <net/rtnetlink.h>
 
 #define MOD_DESC "CAN device driver interface"
@@ -811,6 +812,8 @@ static __init int can_dev_init(void)
 {
 	int err;
 
+	can_led_notifier_init();
+
 	err = rtnl_link_register(&can_link_ops);
 	if (!err)
 		printk(KERN_INFO MOD_DESC "\n");
@@ -822,6 +825,8 @@ module_init(can_dev_init);
 static __exit void can_dev_exit(void)
 {
 	rtnl_link_unregister(&can_link_ops);
+
+	can_led_notifier_exit();
 }
 module_exit(can_dev_exit);
 
diff --git a/drivers/net/can/led.c b/drivers/net/can/led.c
index c50a0d741c57..f27fca65dc4a 100644
--- a/drivers/net/can/led.c
+++ b/drivers/net/can/led.c
@@ -1,5 +1,6 @@
 /*
  * Copyright 2012, Fabio Baltieri <fabio.baltieri@gmail.com>
+ * Copyright 2012, Kurt Van Dijck <kurt.van.dijck@eia.be>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -84,3 +85,40 @@ void devm_can_led_init(struct net_device *netdev)
 	devres_add(&netdev->dev, res);
 }
 EXPORT_SYMBOL_GPL(devm_can_led_init);
+
+/* NETDEV rename notifier to rename the associated led triggers too */
+static int can_led_notifier(struct notifier_block *nb, unsigned long msg,
+			void *data)
+{
+	struct net_device *netdev = data;
+	struct can_priv *priv = safe_candev_priv(netdev);
+	char name[CAN_LED_NAME_SZ];
+
+	if (!priv)
+		return NOTIFY_DONE;
+
+	if (msg == NETDEV_CHANGENAME) {
+		snprintf(name, sizeof(name), "%s-tx", netdev->name);
+		led_trigger_rename_static(name, priv->tx_led_trig);
+
+		snprintf(name, sizeof(name), "%s-rx", netdev->name);
+		led_trigger_rename_static(name, priv->rx_led_trig);
+	}
+
+	return NOTIFY_DONE;
+}
+
+/* notifier block for netdevice event */
+static struct notifier_block can_netdev_notifier __read_mostly = {
+	.notifier_call = can_led_notifier,
+};
+
+int __init can_led_notifier_init(void)
+{
+	return register_netdevice_notifier(&can_netdev_notifier);
+}
+
+void __exit can_led_notifier_exit(void)
+{
+	unregister_netdevice_notifier(&can_netdev_notifier);
+}
diff --git a/include/linux/can/led.h b/include/linux/can/led.h
index 12d5549abb95..9c1167baf273 100644
--- a/include/linux/can/led.h
+++ b/include/linux/can/led.h
@@ -26,6 +26,8 @@ enum can_led_event {
 
 void can_led_event(struct net_device *netdev, enum can_led_event event);
 void devm_can_led_init(struct net_device *netdev);
+int __init can_led_notifier_init(void);
+void __exit can_led_notifier_exit(void);
 
 #else
 
@@ -36,6 +38,13 @@ static inline void can_led_event(struct net_device *netdev,
 static inline void devm_can_led_init(struct net_device *netdev)
 {
 }
+static inline int can_led_notifier_init(void)
+{
+	return 0;
+}
+static inline void can_led_notifier_exit(void)
+{
+}
 
 #endif
 
-- 
cgit v1.2.3


From 156c2bb9f88065c8da78814f98fde665a5cbb527 Mon Sep 17 00:00:00 2001
From: Oliver Hartkopp <socketcan@hartkopp.net>
Date: Thu, 17 Jan 2013 18:43:39 +0100
Subject: can: add private data space for CAN sk_buffs

The struct can_skb_priv is used to transport additional information along
with the stored struct can(fd)_frame that can not be contained in existing
struct sk_buff elements.

can_skb_priv is located in the skb headroom, which does not touch the existing
CAN sk_buff usage with skb->data and skb->len, so that even out-of-tree
CAN drivers can be used without changes.

Btw. out-of-tree CAN drivers without can_skb_priv in the sk_buff headroom
would not support features based on can_skb_priv.

The can_skb_priv->ifindex contains the first interface where the CAN frame
appeared on the local host. Unfortunately skb->skb_iif can not be used as this
value is overwritten in every netif_receive_skb() call.

Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/dev.c   |  8 +++++++-
 drivers/net/can/slcan.c |  8 +++++++-
 include/linux/can/skb.h | 35 +++++++++++++++++++++++++++++++++++
 net/can/bcm.c           | 12 +++++++++---
 net/can/raw.c           |  8 ++++++--
 5 files changed, 64 insertions(+), 7 deletions(-)
 create mode 100644 include/linux/can/skb.h

(limited to 'include/linux')

diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c
index 6abc6e59778e..59ada082a994 100644
--- a/drivers/net/can/dev.c
+++ b/drivers/net/can/dev.c
@@ -24,6 +24,7 @@
 #include <linux/if_arp.h>
 #include <linux/can.h>
 #include <linux/can/dev.h>
+#include <linux/can/skb.h>
 #include <linux/can/netlink.h>
 #include <linux/can/led.h>
 #include <net/rtnetlink.h>
@@ -502,13 +503,18 @@ struct sk_buff *alloc_can_skb(struct net_device *dev, struct can_frame **cf)
 {
 	struct sk_buff *skb;
 
-	skb = netdev_alloc_skb(dev, sizeof(struct can_frame));
+	skb = netdev_alloc_skb(dev, sizeof(struct can_skb_priv) +
+			       sizeof(struct can_frame));
 	if (unlikely(!skb))
 		return NULL;
 
 	skb->protocol = htons(ETH_P_CAN);
 	skb->pkt_type = PACKET_BROADCAST;
 	skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+	skb_reserve(skb, sizeof(struct can_skb_priv));
+	((struct can_skb_priv *)(skb->head))->ifindex = dev->ifindex;
+
 	*cf = (struct can_frame *)skb_put(skb, sizeof(struct can_frame));
 	memset(*cf, 0, sizeof(struct can_frame));
 
diff --git a/drivers/net/can/slcan.c b/drivers/net/can/slcan.c
index adc3708d8829..e79a8d10e0fc 100644
--- a/drivers/net/can/slcan.c
+++ b/drivers/net/can/slcan.c
@@ -55,6 +55,7 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/can.h>
+#include <linux/can/skb.h>
 
 static __initconst const char banner[] =
 	KERN_INFO "slcan: serial line CAN interface driver\n";
@@ -184,7 +185,8 @@ static void slc_bump(struct slcan *sl)
 		cf.data[i] |= tmp;
 	}
 
-	skb = dev_alloc_skb(sizeof(struct can_frame));
+	skb = dev_alloc_skb(sizeof(struct can_frame) +
+			    sizeof(struct can_skb_priv));
 	if (!skb)
 		return;
 
@@ -192,6 +194,10 @@ static void slc_bump(struct slcan *sl)
 	skb->protocol = htons(ETH_P_CAN);
 	skb->pkt_type = PACKET_BROADCAST;
 	skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+	skb_reserve(skb, sizeof(struct can_skb_priv));
+	((struct can_skb_priv *)(skb->head))->ifindex = sl->dev->ifindex;
+
 	memcpy(skb_put(skb, sizeof(struct can_frame)),
 	       &cf, sizeof(struct can_frame));
 	netif_rx_ni(skb);
diff --git a/include/linux/can/skb.h b/include/linux/can/skb.h
new file mode 100644
index 000000000000..4b0f24d3a878
--- /dev/null
+++ b/include/linux/can/skb.h
@@ -0,0 +1,35 @@
+/*
+ * linux/can/skb.h
+ *
+ * Definitions for the CAN network socket buffer
+ *
+ * Copyright (C) 2012 Oliver Hartkopp <socketcan@hartkopp.net>
+ *
+ */
+
+#ifndef CAN_SKB_H
+#define CAN_SKB_H
+
+#include <linux/types.h>
+#include <linux/can.h>
+
+/*
+ * The struct can_skb_priv is used to transport additional information along
+ * with the stored struct can(fd)_frame that can not be contained in existing
+ * struct sk_buff elements.
+ * N.B. that this information must not be modified in cloned CAN sk_buffs.
+ * To modify the CAN frame content or the struct can_skb_priv content
+ * skb_copy() needs to be used instead of skb_clone().
+ */
+
+/**
+ * struct can_skb_priv - private additional data inside CAN sk_buffs
+ * @ifindex:	ifindex of the first interface the CAN frame appeared on
+ * @cf:		align to the following CAN frame at skb->data
+ */
+struct can_skb_priv {
+	int ifindex;
+	struct can_frame cf[0];
+};
+
+#endif /* CAN_SKB_H */
diff --git a/net/can/bcm.c b/net/can/bcm.c
index 969b7cdff59d..ccc27b9e8384 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -54,6 +54,7 @@
 #include <linux/skbuff.h>
 #include <linux/can.h>
 #include <linux/can/core.h>
+#include <linux/can/skb.h>
 #include <linux/can/bcm.h>
 #include <linux/slab.h>
 #include <net/sock.h>
@@ -256,10 +257,13 @@ static void bcm_can_tx(struct bcm_op *op)
 		return;
 	}
 
-	skb = alloc_skb(CFSIZ, gfp_any());
+	skb = alloc_skb(CFSIZ + sizeof(struct can_skb_priv), gfp_any());
 	if (!skb)
 		goto out;
 
+	skb_reserve(skb, sizeof(struct can_skb_priv));
+	((struct can_skb_priv *)(skb->head))->ifindex = dev->ifindex;
+
 	memcpy(skb_put(skb, CFSIZ), cf, CFSIZ);
 
 	/* send with loopback */
@@ -1199,11 +1203,12 @@ static int bcm_tx_send(struct msghdr *msg, int ifindex, struct sock *sk)
 	if (!ifindex)
 		return -ENODEV;
 
-	skb = alloc_skb(CFSIZ, GFP_KERNEL);
-
+	skb = alloc_skb(CFSIZ + sizeof(struct can_skb_priv), GFP_KERNEL);
 	if (!skb)
 		return -ENOMEM;
 
+	skb_reserve(skb, sizeof(struct can_skb_priv));
+
 	err = memcpy_fromiovec(skb_put(skb, CFSIZ), msg->msg_iov, CFSIZ);
 	if (err < 0) {
 		kfree_skb(skb);
@@ -1216,6 +1221,7 @@ static int bcm_tx_send(struct msghdr *msg, int ifindex, struct sock *sk)
 		return -ENODEV;
 	}
 
+	((struct can_skb_priv *)(skb->head))->ifindex = dev->ifindex;
 	skb->dev = dev;
 	skb->sk  = sk;
 	err = can_send(skb, 1); /* send with loopback */
diff --git a/net/can/raw.c b/net/can/raw.c
index 5b0e3e330d97..5d860e8dcc52 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -50,6 +50,7 @@
 #include <linux/skbuff.h>
 #include <linux/can.h>
 #include <linux/can/core.h>
+#include <linux/can/skb.h>
 #include <linux/can/raw.h>
 #include <net/sock.h>
 #include <net/net_namespace.h>
@@ -699,11 +700,14 @@ static int raw_sendmsg(struct kiocb *iocb, struct socket *sock,
 	if (!dev)
 		return -ENXIO;
 
-	skb = sock_alloc_send_skb(sk, size, msg->msg_flags & MSG_DONTWAIT,
-				  &err);
+	skb = sock_alloc_send_skb(sk, size + sizeof(struct can_skb_priv),
+				  msg->msg_flags & MSG_DONTWAIT, &err);
 	if (!skb)
 		goto put_dev;
 
+	skb_reserve(skb, sizeof(struct can_skb_priv));
+	((struct can_skb_priv *)(skb->head))->ifindex = dev->ifindex;
+
 	err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
 	if (err < 0)
 		goto free_skb;
-- 
cgit v1.2.3


From 95a79fd458b85132c25e351d45037ec9643312b2 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Mon, 7 Jan 2013 18:12:14 +0100
Subject: context_tracking: Export context state for generic vtime

Export the context state: whether we run in user / kernel
from the context tracking subsystem point of view.

This is going to be used by the generic virtual cputime
accounting subsystem that is needed to implement the full
dynticks.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Li Zhong <zhong@linux.vnet.ibm.com>
Cc: Namhyung Kim <namhyung.kim@lge.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/context_tracking.h | 28 ++++++++++++++++++++++++++++
 kernel/context_tracking.c        | 16 +---------------
 2 files changed, 29 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h
index e24339ccb7f0..b28d161c1091 100644
--- a/include/linux/context_tracking.h
+++ b/include/linux/context_tracking.h
@@ -3,12 +3,40 @@
 
 #ifdef CONFIG_CONTEXT_TRACKING
 #include <linux/sched.h>
+#include <linux/percpu.h>
+
+struct context_tracking {
+	/*
+	 * When active is false, probes are unset in order
+	 * to minimize overhead: TIF flags are cleared
+	 * and calls to user_enter/exit are ignored. This
+	 * may be further optimized using static keys.
+	 */
+	bool active;
+	enum {
+		IN_KERNEL = 0,
+		IN_USER,
+	} state;
+};
+
+DECLARE_PER_CPU(struct context_tracking, context_tracking);
+
+static inline bool context_tracking_in_user(void)
+{
+	return __this_cpu_read(context_tracking.state) == IN_USER;
+}
+
+static inline bool context_tracking_active(void)
+{
+	return __this_cpu_read(context_tracking.active);
+}
 
 extern void user_enter(void);
 extern void user_exit(void);
 extern void context_tracking_task_switch(struct task_struct *prev,
 					 struct task_struct *next);
 #else
+static inline bool context_tracking_in_user(void) { return false; }
 static inline void user_enter(void) { }
 static inline void user_exit(void) { }
 static inline void context_tracking_task_switch(struct task_struct *prev,
diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c
index e0e07fd55508..54f471e536dc 100644
--- a/kernel/context_tracking.c
+++ b/kernel/context_tracking.c
@@ -1,24 +1,10 @@
 #include <linux/context_tracking.h>
 #include <linux/rcupdate.h>
 #include <linux/sched.h>
-#include <linux/percpu.h>
 #include <linux/hardirq.h>
 
-struct context_tracking {
-	/*
-	 * When active is false, hooks are not set to
-	 * minimize overhead: TIF flags are cleared
-	 * and calls to user_enter/exit are ignored. This
-	 * may be further optimized using static keys.
-	 */
-	bool active;
-	enum {
-		IN_KERNEL = 0,
-		IN_USER,
-	} state;
-};
 
-static DEFINE_PER_CPU(struct context_tracking, context_tracking) = {
+DEFINE_PER_CPU(struct context_tracking, context_tracking) = {
 #ifdef CONFIG_CONTEXT_TRACKING_FORCE
 	.active = true,
 #endif
-- 
cgit v1.2.3


From 90f45e4e729a7ffaa3ed2423834aad612870b427 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Thu, 10 Jan 2013 05:24:49 -0800
Subject: rcu: Remove obsolete Kconfig option from comment

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 275aa3f1062d..7e12dbaa1457 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -749,7 +749,7 @@ static inline void rcu_preempt_sleep_check(void)
  * preemptible RCU implementations (TREE_PREEMPT_RCU and TINY_PREEMPT_RCU)
  * in CONFIG_PREEMPT kernel builds, RCU read-side critical sections may
  * be preempted, but explicit blocking is illegal.  Finally, in preemptible
- * RCU implementations in real-time (CONFIG_PREEMPT_RT) kernel builds,
+ * RCU implementations in real-time (with -rt patchset) kernel builds,
  * RCU read-side critical sections may be preempted and they may also
  * block, but only when acquiring spinlocks that are subject to priority
  * inheritance.
-- 
cgit v1.2.3


From abf917cd91cbb73952758f9741e2fa65002a48ee Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Wed, 25 Jul 2012 07:56:04 +0200
Subject: cputime: Generic on-demand virtual cputime accounting

If we want to stop the tick further idle, we need to be
able to account the cputime without using the tick.

Virtual based cputime accounting solves that problem by
hooking into kernel/user boundaries.

However implementing CONFIG_VIRT_CPU_ACCOUNTING require
low level hooks and involves more overhead. But we already
have a generic context tracking subsystem that is required
for RCU needs by archs which plan to shut down the tick
outside idle.

This patch implements a generic virtual based cputime
accounting that relies on these generic kernel/user hooks.

There are some upsides of doing this:

- This requires no arch code to implement CONFIG_VIRT_CPU_ACCOUNTING
if context tracking is already built (already necessary for RCU in full
tickless mode).

- We can rely on the generic context tracking subsystem to dynamically
(de)activate the hooks, so that we can switch anytime between virtual
and tick based accounting. This way we don't have the overhead
of the virtual accounting when the tick is running periodically.

And one downside:

- There is probably more overhead than a native virtual based cputime
accounting. But this relies on hooks that are already set anyway.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Li Zhong <zhong@linux.vnet.ibm.com>
Cc: Namhyung Kim <namhyung.kim@lge.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
---
 arch/ia64/include/asm/cputime.h              |  6 +--
 arch/ia64/include/asm/thread_info.h          |  4 +-
 arch/ia64/include/asm/xen/minstate.h         |  2 +-
 arch/ia64/kernel/asm-offsets.c               |  2 +-
 arch/ia64/kernel/entry.S                     | 16 ++++----
 arch/ia64/kernel/fsys.S                      |  4 +-
 arch/ia64/kernel/head.S                      |  4 +-
 arch/ia64/kernel/ivt.S                       |  8 ++--
 arch/ia64/kernel/minstate.h                  |  2 +-
 arch/ia64/kernel/time.c                      |  4 +-
 arch/powerpc/configs/chroma_defconfig        |  2 +-
 arch/powerpc/configs/corenet64_smp_defconfig |  2 +-
 arch/powerpc/configs/pasemi_defconfig        |  2 +-
 arch/powerpc/include/asm/cputime.h           |  6 +--
 arch/powerpc/include/asm/lppaca.h            |  2 +-
 arch/powerpc/include/asm/ppc_asm.h           |  4 +-
 arch/powerpc/kernel/entry_64.S               |  4 +-
 arch/powerpc/kernel/time.c                   |  4 +-
 arch/powerpc/platforms/pseries/dtl.c         |  6 +--
 arch/powerpc/platforms/pseries/setup.c       |  6 +--
 include/asm-generic/cputime.h                |  8 +++-
 include/asm-generic/cputime_nsecs.h          |  8 ++++
 include/linux/kernel_stat.h                  |  2 +-
 include/linux/vtime.h                        | 16 ++++++++
 init/Kconfig                                 | 23 ++++++++++-
 kernel/context_tracking.c                    |  6 ++-
 kernel/sched/cputime.c                       | 61 ++++++++++++++++++++++++++--
 27 files changed, 160 insertions(+), 54 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/include/asm/cputime.h b/arch/ia64/include/asm/cputime.h
index 040b31638001..e2d3f5baf265 100644
--- a/arch/ia64/include/asm/cputime.h
+++ b/arch/ia64/include/asm/cputime.h
@@ -11,19 +11,19 @@
  * as published by the Free Software Foundation; either version
  * 2 of the License, or (at your option) any later version.
  *
- * If we have CONFIG_VIRT_CPU_ACCOUNTING, we measure cpu time in nsec.
+ * If we have CONFIG_VIRT_CPU_ACCOUNTING_NATIVE, we measure cpu time in nsec.
  * Otherwise we measure cpu time in jiffies using the generic definitions.
  */
 
 #ifndef __IA64_CPUTIME_H
 #define __IA64_CPUTIME_H
 
-#ifndef CONFIG_VIRT_CPU_ACCOUNTING
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 # include <asm-generic/cputime.h>
 #else
 # include <asm/processor.h>
 # include <asm-generic/cputime_nsecs.h>
 extern void arch_vtime_task_switch(struct task_struct *tsk);
-#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
 
 #endif /* __IA64_CPUTIME_H */
diff --git a/arch/ia64/include/asm/thread_info.h b/arch/ia64/include/asm/thread_info.h
index ff2ae4136584..020d655ed082 100644
--- a/arch/ia64/include/asm/thread_info.h
+++ b/arch/ia64/include/asm/thread_info.h
@@ -31,7 +31,7 @@ struct thread_info {
 	mm_segment_t addr_limit;	/* user-level address space limit */
 	int preempt_count;		/* 0=premptable, <0=BUG; will also serve as bh-counter */
 	struct restart_block restart_block;
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 	__u64 ac_stamp;
 	__u64 ac_leave;
 	__u64 ac_stime;
@@ -69,7 +69,7 @@ struct thread_info {
 #define task_stack_page(tsk)	((void *)(tsk))
 
 #define __HAVE_THREAD_FUNCTIONS
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 #define setup_thread_stack(p, org)			\
 	*task_thread_info(p) = *task_thread_info(org);	\
 	task_thread_info(p)->ac_stime = 0;		\
diff --git a/arch/ia64/include/asm/xen/minstate.h b/arch/ia64/include/asm/xen/minstate.h
index c57fa910f2c9..00cf03e0cb82 100644
--- a/arch/ia64/include/asm/xen/minstate.h
+++ b/arch/ia64/include/asm/xen/minstate.h
@@ -1,5 +1,5 @@
 
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 /* read ar.itc in advance, and use it before leaving bank 0 */
 #define XEN_ACCOUNT_GET_STAMP		\
 	MOV_FROM_ITC(pUStk, p6, r20, r2);
diff --git a/arch/ia64/kernel/asm-offsets.c b/arch/ia64/kernel/asm-offsets.c
index a48bd9a9927b..46c9e3007315 100644
--- a/arch/ia64/kernel/asm-offsets.c
+++ b/arch/ia64/kernel/asm-offsets.c
@@ -41,7 +41,7 @@ void foo(void)
 	DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
 	DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
 	DEFINE(TI_PRE_COUNT, offsetof(struct thread_info, preempt_count));
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 	DEFINE(TI_AC_STAMP, offsetof(struct thread_info, ac_stamp));
 	DEFINE(TI_AC_LEAVE, offsetof(struct thread_info, ac_leave));
 	DEFINE(TI_AC_STIME, offsetof(struct thread_info, ac_stime));
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index 6bfd8429ee0f..7a53530f22c2 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -724,7 +724,7 @@ GLOBAL_ENTRY(__paravirt_leave_syscall)
 #endif
 .global __paravirt_work_processed_syscall;
 __paravirt_work_processed_syscall:
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 	adds r2=PT(LOADRS)+16,r12
 	MOV_FROM_ITC(pUStk, p9, r22, r19)	// fetch time at leave
 	adds r18=TI_FLAGS+IA64_TASK_SIZE,r13
@@ -762,7 +762,7 @@ __paravirt_work_processed_syscall:
 
 	ld8 r29=[r2],16		// M0|1 load cr.ipsr
 	ld8 r28=[r3],16		// M0|1 load cr.iip
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 (pUStk) add r14=TI_AC_LEAVE+IA64_TASK_SIZE,r13
 	;;
 	ld8 r30=[r2],16		// M0|1 load cr.ifs
@@ -793,7 +793,7 @@ __paravirt_work_processed_syscall:
 	ld8.fill r1=[r3],16			// M0|1 load r1
 (pUStk) mov r17=1				// A
 	;;
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 (pUStk) st1 [r15]=r17				// M2|3
 #else
 (pUStk) st1 [r14]=r17				// M2|3
@@ -813,7 +813,7 @@ __paravirt_work_processed_syscall:
 	shr.u r18=r19,16		// I0|1 get byte size of existing "dirty" partition
 	COVER				// B    add current frame into dirty partition & set cr.ifs
 	;;
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 	mov r19=ar.bsp			// M2   get new backing store pointer
 	st8 [r14]=r22			// M	save time at leave
 	mov f10=f0			// F    clear f10
@@ -948,7 +948,7 @@ GLOBAL_ENTRY(__paravirt_leave_kernel)
 	adds r16=PT(CR_IPSR)+16,r12
 	adds r17=PT(CR_IIP)+16,r12
 
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 	.pred.rel.mutex pUStk,pKStk
 	MOV_FROM_PSR(pKStk, r22, r29)	// M2 read PSR now that interrupts are disabled
 	MOV_FROM_ITC(pUStk, p9, r22, r29)	// M  fetch time at leave
@@ -981,7 +981,7 @@ GLOBAL_ENTRY(__paravirt_leave_kernel)
 	;;
 	ld8.fill r12=[r16],16
 	ld8.fill r13=[r17],16
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 (pUStk)	adds r3=TI_AC_LEAVE+IA64_TASK_SIZE,r18
 #else
 (pUStk)	adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18
@@ -989,7 +989,7 @@ GLOBAL_ENTRY(__paravirt_leave_kernel)
 	;;
 	ld8 r20=[r16],16	// ar.fpsr
 	ld8.fill r15=[r17],16
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 (pUStk)	adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18	// deferred
 #endif
 	;;
@@ -997,7 +997,7 @@ GLOBAL_ENTRY(__paravirt_leave_kernel)
 	ld8.fill r2=[r17]
 (pUStk)	mov r17=1
 	;;
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 	//  mmi_ :  ld8 st1 shr;;         mmi_ : st8 st1 shr;;
 	//  mib  :  mov add br        ->  mib  : ld8 add br
 	//  bbb_ :  br  nop cover;;       mbb_ : mov br  cover;;
diff --git a/arch/ia64/kernel/fsys.S b/arch/ia64/kernel/fsys.S
index e662f178b990..c4cd45d97749 100644
--- a/arch/ia64/kernel/fsys.S
+++ b/arch/ia64/kernel/fsys.S
@@ -529,7 +529,7 @@ GLOBAL_ENTRY(paravirt_fsys_bubble_down)
 	nop.i 0
 	;;
 	mov ar.rsc=0				// M2   set enforced lazy mode, pl 0, LE, loadrs=0
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 	MOV_FROM_ITC(p0, p6, r30, r23)		// M    get cycle for accounting
 #else
 	nop.m 0
@@ -555,7 +555,7 @@ GLOBAL_ENTRY(paravirt_fsys_bubble_down)
 	cmp.ne pKStk,pUStk=r0,r0		// A    set pKStk <- 0, pUStk <- 1
 	br.call.sptk.many b7=ia64_syscall_setup	// B
 	;;
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 	// mov.m r30=ar.itc is called in advance
 	add r16=TI_AC_STAMP+IA64_TASK_SIZE,r2
 	add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r2
diff --git a/arch/ia64/kernel/head.S b/arch/ia64/kernel/head.S
index 4738ff7bd66a..9be4e497f3d3 100644
--- a/arch/ia64/kernel/head.S
+++ b/arch/ia64/kernel/head.S
@@ -1073,7 +1073,7 @@ END(ia64_native_sched_clock)
 sched_clock = ia64_native_sched_clock
 #endif
 
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 GLOBAL_ENTRY(cycle_to_cputime)
 	alloc r16=ar.pfs,1,0,0,0
 	addl r8=THIS_CPU(ia64_cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET,r0
@@ -1091,7 +1091,7 @@ GLOBAL_ENTRY(cycle_to_cputime)
 	shrp r8=r9,r8,IA64_NSEC_PER_CYC_SHIFT
 	br.ret.sptk.many rp
 END(cycle_to_cputime)
-#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
 
 #ifdef CONFIG_IA64_BRL_EMU
 
diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S
index fa25689fc453..689ffcaa284e 100644
--- a/arch/ia64/kernel/ivt.S
+++ b/arch/ia64/kernel/ivt.S
@@ -784,7 +784,7 @@ ENTRY(break_fault)
 
 (p8)	adds r28=16,r28				// A    switch cr.iip to next bundle
 (p9)	adds r8=1,r8				// A    increment ei to next slot
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 	;;
 	mov b6=r30				// I0   setup syscall handler branch reg early
 #else
@@ -801,7 +801,7 @@ ENTRY(break_fault)
 	//
 ///////////////////////////////////////////////////////////////////////
 	st1 [r16]=r0				// M2|3 clear current->thread.on_ustack flag
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 	MOV_FROM_ITC(p0, p14, r30, r18)		// M    get cycle for accounting
 #else
 	mov b6=r30				// I0   setup syscall handler branch reg early
@@ -817,7 +817,7 @@ ENTRY(break_fault)
 	cmp.eq p14,p0=r9,r0			// A    are syscalls being traced/audited?
 	br.call.sptk.many b7=ia64_syscall_setup	// B
 1:
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 	// mov.m r30=ar.itc is called in advance, and r13 is current
 	add r16=TI_AC_STAMP+IA64_TASK_SIZE,r13	// A
 	add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r13	// A
@@ -1043,7 +1043,7 @@ END(ia64_syscall_setup)
 	DBG_FAULT(16)
 	FAULT(16)
 
-#if defined(CONFIG_VIRT_CPU_ACCOUNTING) && defined(__IA64_ASM_PARAVIRTUALIZED_NATIVE)
+#if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) && defined(__IA64_ASM_PARAVIRTUALIZED_NATIVE)
 	/*
 	 * There is no particular reason for this code to be here, other than
 	 * that there happens to be space here that would go unused otherwise.
diff --git a/arch/ia64/kernel/minstate.h b/arch/ia64/kernel/minstate.h
index d56753a11636..cc82a7d744c9 100644
--- a/arch/ia64/kernel/minstate.h
+++ b/arch/ia64/kernel/minstate.h
@@ -4,7 +4,7 @@
 #include "entry.h"
 #include "paravirt_inst.h"
 
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 /* read ar.itc in advance, and use it before leaving bank 0 */
 #define ACCOUNT_GET_STAMP				\
 (pUStk) mov.m r20=ar.itc;
diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index 88a794536bc0..a3a3f5a1cb3a 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -77,7 +77,7 @@ static struct clocksource clocksource_itc = {
 };
 static struct clocksource *itc_clocksource;
 
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 
 #include <linux/kernel_stat.h>
 
@@ -142,7 +142,7 @@ void vtime_account_idle(struct task_struct *tsk)
 	account_idle_time(vtime_delta(tsk));
 }
 
-#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
 
 static irqreturn_t
 timer_interrupt (int irq, void *dev_id)
diff --git a/arch/powerpc/configs/chroma_defconfig b/arch/powerpc/configs/chroma_defconfig
index 29bb11ec6c64..4f35fc462385 100644
--- a/arch/powerpc/configs/chroma_defconfig
+++ b/arch/powerpc/configs/chroma_defconfig
@@ -1,6 +1,6 @@
 CONFIG_PPC64=y
 CONFIG_PPC_BOOK3E_64=y
-# CONFIG_VIRT_CPU_ACCOUNTING is not set
+# CONFIG_VIRT_CPU_ACCOUNTING_NATIVE is not set
 CONFIG_SMP=y
 CONFIG_NR_CPUS=256
 CONFIG_EXPERIMENTAL=y
diff --git a/arch/powerpc/configs/corenet64_smp_defconfig b/arch/powerpc/configs/corenet64_smp_defconfig
index 88fa5c46f66f..f7df8362911f 100644
--- a/arch/powerpc/configs/corenet64_smp_defconfig
+++ b/arch/powerpc/configs/corenet64_smp_defconfig
@@ -1,6 +1,6 @@
 CONFIG_PPC64=y
 CONFIG_PPC_BOOK3E_64=y
-# CONFIG_VIRT_CPU_ACCOUNTING is not set
+# CONFIG_VIRT_CPU_ACCOUNTING_NATIVE is not set
 CONFIG_SMP=y
 CONFIG_NR_CPUS=2
 CONFIG_EXPERIMENTAL=y
diff --git a/arch/powerpc/configs/pasemi_defconfig b/arch/powerpc/configs/pasemi_defconfig
index 840a2c2d0430..bcedeea0df89 100644
--- a/arch/powerpc/configs/pasemi_defconfig
+++ b/arch/powerpc/configs/pasemi_defconfig
@@ -1,6 +1,6 @@
 CONFIG_PPC64=y
 CONFIG_ALTIVEC=y
-# CONFIG_VIRT_CPU_ACCOUNTING is not set
+# CONFIG_VIRT_CPU_ACCOUNTING_NATIVE is not set
 CONFIG_SMP=y
 CONFIG_NR_CPUS=2
 CONFIG_EXPERIMENTAL=y
diff --git a/arch/powerpc/include/asm/cputime.h b/arch/powerpc/include/asm/cputime.h
index 483733bd06d4..607559ab271f 100644
--- a/arch/powerpc/include/asm/cputime.h
+++ b/arch/powerpc/include/asm/cputime.h
@@ -8,7 +8,7 @@
  * as published by the Free Software Foundation; either version
  * 2 of the License, or (at your option) any later version.
  *
- * If we have CONFIG_VIRT_CPU_ACCOUNTING, we measure cpu time in
+ * If we have CONFIG_VIRT_CPU_ACCOUNTING_NATIVE, we measure cpu time in
  * the same units as the timebase.  Otherwise we measure cpu time
  * in jiffies using the generic definitions.
  */
@@ -16,7 +16,7 @@
 #ifndef __POWERPC_CPUTIME_H
 #define __POWERPC_CPUTIME_H
 
-#ifndef CONFIG_VIRT_CPU_ACCOUNTING
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 #include <asm-generic/cputime.h>
 #ifdef __KERNEL__
 static inline void setup_cputime_one_jiffy(void) { }
@@ -231,5 +231,5 @@ static inline cputime_t clock_t_to_cputime(const unsigned long clk)
 static inline void arch_vtime_task_switch(struct task_struct *tsk) { }
 
 #endif /* __KERNEL__ */
-#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
 #endif /* __POWERPC_CPUTIME_H */
diff --git a/arch/powerpc/include/asm/lppaca.h b/arch/powerpc/include/asm/lppaca.h
index 531fe0c3108f..b1e7f2af1016 100644
--- a/arch/powerpc/include/asm/lppaca.h
+++ b/arch/powerpc/include/asm/lppaca.h
@@ -145,7 +145,7 @@ struct dtl_entry {
 extern struct kmem_cache *dtl_cache;
 
 /*
- * When CONFIG_VIRT_CPU_ACCOUNTING = y, the cpu accounting code controls
+ * When CONFIG_VIRT_CPU_ACCOUNTING_NATIVE = y, the cpu accounting code controls
  * reading from the dispatch trace log.  If other code wants to consume
  * DTL entries, it can set this pointer to a function that will get
  * called once for each DTL entry that gets processed.
diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h
index ea2a86e8ff95..2d0e1f5d8339 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -24,7 +24,7 @@
  * user_time and system_time fields in the paca.
  */
 
-#ifndef CONFIG_VIRT_CPU_ACCOUNTING
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 #define ACCOUNT_CPU_USER_ENTRY(ra, rb)
 #define ACCOUNT_CPU_USER_EXIT(ra, rb)
 #define ACCOUNT_STOLEN_TIME
@@ -70,7 +70,7 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
 
 #endif /* CONFIG_PPC_SPLPAR */
 
-#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
 
 /*
  * Macros for storing registers into and loading registers from
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index b310a0573625..a0ca42fb1541 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -94,7 +94,7 @@ system_call_common:
 	addi	r9,r1,STACK_FRAME_OVERHEAD
 	ld	r11,exception_marker@toc(r2)
 	std	r11,-16(r9)		/* "regshere" marker */
-#if defined(CONFIG_VIRT_CPU_ACCOUNTING) && defined(CONFIG_PPC_SPLPAR)
+#if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) && defined(CONFIG_PPC_SPLPAR)
 BEGIN_FW_FTR_SECTION
 	beq	33f
 	/* if from user, see if there are any DTL entries to process */
@@ -110,7 +110,7 @@ BEGIN_FW_FTR_SECTION
 	addi	r9,r1,STACK_FRAME_OVERHEAD
 33:
 END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
-#endif /* CONFIG_VIRT_CPU_ACCOUNTING && CONFIG_PPC_SPLPAR */
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE && CONFIG_PPC_SPLPAR */
 
 	/*
 	 * A syscall should always be called with interrupts enabled
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 6f6b1cccc916..22c9b67f9983 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -143,7 +143,7 @@ EXPORT_SYMBOL_GPL(ppc_proc_freq);
 unsigned long ppc_tb_freq;
 EXPORT_SYMBOL_GPL(ppc_tb_freq);
 
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 /*
  * Factors for converting from cputime_t (timebase ticks) to
  * jiffies, microseconds, seconds, and clock_t (1/USER_HZ seconds).
@@ -377,7 +377,7 @@ void vtime_account_user(struct task_struct *tsk)
 	account_user_time(tsk, utime, utimescaled);
 }
 
-#else /* ! CONFIG_VIRT_CPU_ACCOUNTING */
+#else /* ! CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
 #define calc_cputime_factors()
 #endif
 
diff --git a/arch/powerpc/platforms/pseries/dtl.c b/arch/powerpc/platforms/pseries/dtl.c
index a7648543c59e..0cc0ac07a55d 100644
--- a/arch/powerpc/platforms/pseries/dtl.c
+++ b/arch/powerpc/platforms/pseries/dtl.c
@@ -57,7 +57,7 @@ static u8 dtl_event_mask = 0x7;
  */
 static int dtl_buf_entries = N_DISPATCH_LOG;
 
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 struct dtl_ring {
 	u64	write_index;
 	struct dtl_entry *write_ptr;
@@ -142,7 +142,7 @@ static u64 dtl_current_index(struct dtl *dtl)
 	return per_cpu(dtl_rings, dtl->cpu).write_index;
 }
 
-#else /* CONFIG_VIRT_CPU_ACCOUNTING */
+#else /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
 
 static int dtl_start(struct dtl *dtl)
 {
@@ -188,7 +188,7 @@ static u64 dtl_current_index(struct dtl *dtl)
 {
 	return lppaca_of(dtl->cpu).dtl_idx;
 }
-#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
 
 static int dtl_enable(struct dtl *dtl)
 {
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index ca55882465d6..527e12c9573b 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -281,7 +281,7 @@ static struct notifier_block pci_dn_reconfig_nb = {
 
 struct kmem_cache *dtl_cache;
 
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 /*
  * Allocate space for the dispatch trace log for all possible cpus
  * and register the buffers with the hypervisor.  This is used for
@@ -332,12 +332,12 @@ static int alloc_dispatch_logs(void)
 
 	return 0;
 }
-#else /* !CONFIG_VIRT_CPU_ACCOUNTING */
+#else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
 static inline int alloc_dispatch_logs(void)
 {
 	return 0;
 }
-#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
 
 static int alloc_dispatch_log_kmem_cache(void)
 {
diff --git a/include/asm-generic/cputime.h b/include/asm-generic/cputime.h
index c6eddf50eaf9..51969436b8b8 100644
--- a/include/asm-generic/cputime.h
+++ b/include/asm-generic/cputime.h
@@ -4,6 +4,12 @@
 #include <linux/time.h>
 #include <linux/jiffies.h>
 
-#include <asm-generic/cputime_jiffies.h>
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING
+# include <asm-generic/cputime_jiffies.h>
+#endif
+
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
+# include <asm-generic/cputime_nsecs.h>
+#endif
 
 #endif
diff --git a/include/asm-generic/cputime_nsecs.h b/include/asm-generic/cputime_nsecs.h
index c73d182f4751..b6485cafb7bd 100644
--- a/include/asm-generic/cputime_nsecs.h
+++ b/include/asm-generic/cputime_nsecs.h
@@ -26,6 +26,7 @@ typedef u64 __nocast cputime64_t;
  */
 #define cputime_to_jiffies(__ct)	\
 	((__force u64)(__ct) / (NSEC_PER_SEC / HZ))
+#define cputime_to_scaled(__ct)		(__ct)
 #define jiffies_to_cputime(__jif)	\
 	(__force cputime_t)((__jif) * (NSEC_PER_SEC / HZ))
 #define cputime64_to_jiffies64(__ct)	\
@@ -33,6 +34,13 @@ typedef u64 __nocast cputime64_t;
 #define jiffies64_to_cputime64(__jif)	\
 	(__force cputime64_t)((__jif) * (NSEC_PER_SEC / HZ))
 
+
+/*
+ * Convert cputime <-> nanoseconds
+ */
+#define nsecs_to_cputime(__nsecs)	((__force u64)(__nsecs))
+
+
 /*
  * Convert cputime <-> microseconds
  */
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index 66b70780e910..ed5f6ed6eb77 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -127,7 +127,7 @@ extern void account_system_time(struct task_struct *, int, cputime_t, cputime_t)
 extern void account_steal_time(cputime_t);
 extern void account_idle_time(cputime_t);
 
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 static inline void account_process_tick(struct task_struct *tsk, int user)
 {
 	vtime_account_user(tsk);
diff --git a/include/linux/vtime.h b/include/linux/vtime.h
index ae30ab58431a..21ef703d1b25 100644
--- a/include/linux/vtime.h
+++ b/include/linux/vtime.h
@@ -14,9 +14,25 @@ extern void vtime_account(struct task_struct *tsk);
 static inline void vtime_task_switch(struct task_struct *prev) { }
 static inline void vtime_account_system(struct task_struct *tsk) { }
 static inline void vtime_account_system_irqsafe(struct task_struct *tsk) { }
+static inline void vtime_account_user(struct task_struct *tsk) { }
 static inline void vtime_account(struct task_struct *tsk) { }
 #endif
 
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
+static inline void arch_vtime_task_switch(struct task_struct *tsk) { }
+static inline void vtime_user_enter(struct task_struct *tsk)
+{
+	vtime_account_system(tsk);
+}
+static inline void vtime_user_exit(struct task_struct *tsk)
+{
+	vtime_account_user(tsk);
+}
+#else
+static inline void vtime_user_enter(struct task_struct *tsk) { }
+static inline void vtime_user_exit(struct task_struct *tsk) { }
+#endif
+
 #ifdef CONFIG_IRQ_TIME_ACCOUNTING
 extern void irqtime_account_irq(struct task_struct *tsk);
 #else
diff --git a/init/Kconfig b/init/Kconfig
index be8b7f55312d..a05f843e7e52 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -326,6 +326,9 @@ source "kernel/time/Kconfig"
 
 menu "CPU/Task time and stats accounting"
 
+config VIRT_CPU_ACCOUNTING
+	bool
+
 choice
 	prompt "Cputime accounting"
 	default TICK_CPU_ACCOUNTING if !PPC64
@@ -342,9 +345,10 @@ config TICK_CPU_ACCOUNTING
 
 	  If unsure, say Y.
 
-config VIRT_CPU_ACCOUNTING
+config VIRT_CPU_ACCOUNTING_NATIVE
 	bool "Deterministic task and CPU time accounting"
 	depends on HAVE_VIRT_CPU_ACCOUNTING
+	select VIRT_CPU_ACCOUNTING
 	help
 	  Select this option to enable more accurate task and CPU time
 	  accounting.  This is done by reading a CPU counter on each
@@ -354,6 +358,23 @@ config VIRT_CPU_ACCOUNTING
 	  this also enables accounting of stolen time on logically-partitioned
 	  systems.
 
+config VIRT_CPU_ACCOUNTING_GEN
+	bool "Full dynticks CPU time accounting"
+	depends on HAVE_CONTEXT_TRACKING && 64BIT
+	select VIRT_CPU_ACCOUNTING
+	select CONTEXT_TRACKING
+	help
+	  Select this option to enable task and CPU time accounting on full
+	  dynticks systems. This accounting is implemented by watching every
+	  kernel-user boundaries using the context tracking subsystem.
+	  The accounting is thus performed at the expense of some significant
+	  overhead.
+
+	  For now this is only useful if you are working on the full
+	  dynticks subsystem development.
+
+	  If unsure, say N.
+
 config IRQ_TIME_ACCOUNTING
 	bool "Fine granularity task level IRQ time accounting"
 	depends on HAVE_IRQ_TIME_ACCOUNTING
diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c
index 54f471e536dc..9002e92e6372 100644
--- a/kernel/context_tracking.c
+++ b/kernel/context_tracking.c
@@ -30,8 +30,9 @@ void user_enter(void)
 	local_irq_save(flags);
 	if (__this_cpu_read(context_tracking.active) &&
 	    __this_cpu_read(context_tracking.state) != IN_USER) {
-		__this_cpu_write(context_tracking.state, IN_USER);
+		vtime_user_enter(current);
 		rcu_user_enter();
+		__this_cpu_write(context_tracking.state, IN_USER);
 	}
 	local_irq_restore(flags);
 }
@@ -53,8 +54,9 @@ void user_exit(void)
 
 	local_irq_save(flags);
 	if (__this_cpu_read(context_tracking.state) == IN_USER) {
-		__this_cpu_write(context_tracking.state, IN_KERNEL);
 		rcu_user_exit();
+		vtime_user_exit(current);
+		__this_cpu_write(context_tracking.state, IN_KERNEL);
 	}
 	local_irq_restore(flags);
 }
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 5849448b981e..1c964eced92c 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -3,6 +3,7 @@
 #include <linux/tsacct_kern.h>
 #include <linux/kernel_stat.h>
 #include <linux/static_key.h>
+#include <linux/context_tracking.h>
 #include "sched.h"
 
 
@@ -479,7 +480,9 @@ void vtime_task_switch(struct task_struct *prev)
 	else
 		vtime_account_system(prev);
 
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 	vtime_account_user(prev);
+#endif
 	arch_vtime_task_switch(prev);
 }
 #endif
@@ -495,10 +498,24 @@ void vtime_task_switch(struct task_struct *prev)
 #ifndef __ARCH_HAS_VTIME_ACCOUNT
 void vtime_account(struct task_struct *tsk)
 {
-	if (in_interrupt() || !is_idle_task(tsk))
-		vtime_account_system(tsk);
-	else
-		vtime_account_idle(tsk);
+	if (!in_interrupt()) {
+		/*
+		 * If we interrupted user, context_tracking_in_user()
+		 * is 1 because the context tracking don't hook
+		 * on irq entry/exit. This way we know if
+		 * we need to flush user time on kernel entry.
+		 */
+		if (context_tracking_in_user()) {
+			vtime_account_user(tsk);
+			return;
+		}
+
+		if (is_idle_task(tsk)) {
+			vtime_account_idle(tsk);
+			return;
+		}
+	}
+	vtime_account_system(tsk);
 }
 EXPORT_SYMBOL_GPL(vtime_account);
 #endif /* __ARCH_HAS_VTIME_ACCOUNT */
@@ -583,3 +600,39 @@ void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime
 	cputime_adjust(&cputime, &p->signal->prev_cputime, ut, st);
 }
 #endif
+
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
+static DEFINE_PER_CPU(unsigned long long, cputime_snap);
+
+static cputime_t get_vtime_delta(void)
+{
+	unsigned long long delta;
+
+	delta = sched_clock() - __this_cpu_read(cputime_snap);
+	__this_cpu_add(cputime_snap, delta);
+
+	/* CHECKME: always safe to convert nsecs to cputime? */
+	return nsecs_to_cputime(delta);
+}
+
+void vtime_account_system(struct task_struct *tsk)
+{
+	cputime_t delta_cpu = get_vtime_delta();
+
+	account_system_time(tsk, irq_count(), delta_cpu, cputime_to_scaled(delta_cpu));
+}
+
+void vtime_account_user(struct task_struct *tsk)
+{
+	cputime_t delta_cpu = get_vtime_delta();
+
+	account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu));
+}
+
+void vtime_account_idle(struct task_struct *tsk)
+{
+	cputime_t delta_cpu = get_vtime_delta();
+
+	account_idle_time(delta_cpu);
+}
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */
-- 
cgit v1.2.3


From 3f4724ea85b7d9055a9976fa8f30b471bdfbca93 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Mon, 16 Jul 2012 18:00:34 +0200
Subject: cputime: Allow dynamic switch between tick/virtual based cputime
 accounting

Allow to dynamically switch between tick and virtual based
cputime accounting. This way we can provide a kind of "on-demand"
virtual based cputime accounting. In this mode, the kernel relies
on the context tracking subsystem to dynamically probe on kernel
boundaries.

This is in preparation for being able to stop the timer tick in
more places than just the idle state. Doing so will depend on
CONFIG_VIRT_CPU_ACCOUNTING_GEN which makes it possible to account
the cputime without the tick by hooking on kernel/user boundaries.

Depending whether the tick is stopped or not, we can switch between
tick and vtime based accounting anytime in order to minimize the
overhead associated to user hooks.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Li Zhong <zhong@linux.vnet.ibm.com>
Cc: Namhyung Kim <namhyung.kim@lge.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/vtime.h    |  8 ++++++++
 kernel/sched/cputime.c   | 41 +++++++++++++++++++++++++++++++----------
 kernel/time/tick-sched.c |  5 ++++-
 3 files changed, 43 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/vtime.h b/include/linux/vtime.h
index 21ef703d1b25..5368af9bdf06 100644
--- a/include/linux/vtime.h
+++ b/include/linux/vtime.h
@@ -10,12 +10,20 @@ extern void vtime_account_system_irqsafe(struct task_struct *tsk);
 extern void vtime_account_idle(struct task_struct *tsk);
 extern void vtime_account_user(struct task_struct *tsk);
 extern void vtime_account(struct task_struct *tsk);
+
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
+extern bool vtime_accounting_enabled(void);
 #else
+static inline bool vtime_accounting_enabled(void) { return true; }
+#endif
+
+#else /* !CONFIG_VIRT_CPU_ACCOUNTING */
 static inline void vtime_task_switch(struct task_struct *prev) { }
 static inline void vtime_account_system(struct task_struct *tsk) { }
 static inline void vtime_account_system_irqsafe(struct task_struct *tsk) { }
 static inline void vtime_account_user(struct task_struct *tsk) { }
 static inline void vtime_account(struct task_struct *tsk) { }
+static inline bool vtime_accounting_enabled(void) { return false; }
 #endif
 
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 1c964eced92c..e1939d38bf73 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -317,8 +317,6 @@ out:
 	rcu_read_unlock();
 }
 
-#ifndef CONFIG_VIRT_CPU_ACCOUNTING
-
 #ifdef CONFIG_IRQ_TIME_ACCOUNTING
 /*
  * Account a tick to a process and cpustat
@@ -383,11 +381,12 @@ static void irqtime_account_idle_ticks(int ticks)
 		irqtime_account_process_tick(current, 0, rq);
 }
 #else /* CONFIG_IRQ_TIME_ACCOUNTING */
-static void irqtime_account_idle_ticks(int ticks) {}
-static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
+static inline void irqtime_account_idle_ticks(int ticks) {}
+static inline void irqtime_account_process_tick(struct task_struct *p, int user_tick,
 						struct rq *rq) {}
 #endif /* CONFIG_IRQ_TIME_ACCOUNTING */
 
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 /*
  * Account a single tick of cpu time.
  * @p: the process that the cpu time gets accounted to
@@ -398,6 +397,9 @@ void account_process_tick(struct task_struct *p, int user_tick)
 	cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy);
 	struct rq *rq = this_rq();
 
+	if (vtime_accounting_enabled())
+		return;
+
 	if (sched_clock_irqtime) {
 		irqtime_account_process_tick(p, user_tick, rq);
 		return;
@@ -439,8 +441,7 @@ void account_idle_ticks(unsigned long ticks)
 
 	account_idle_time(jiffies_to_cputime(ticks));
 }
-
-#endif
+#endif /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
 
 /*
  * Use precise platform statistics if available:
@@ -475,6 +476,9 @@ EXPORT_SYMBOL_GPL(vtime_account_system_irqsafe);
 #ifndef __ARCH_HAS_VTIME_TASK_SWITCH
 void vtime_task_switch(struct task_struct *prev)
 {
+	if (!vtime_accounting_enabled())
+		return;
+
 	if (is_idle_task(prev))
 		vtime_account_idle(prev);
 	else
@@ -498,6 +502,9 @@ void vtime_task_switch(struct task_struct *prev)
 #ifndef __ARCH_HAS_VTIME_ACCOUNT
 void vtime_account(struct task_struct *tsk)
 {
+	if (!vtime_accounting_enabled())
+		return;
+
 	if (!in_interrupt()) {
 		/*
 		 * If we interrupted user, context_tracking_in_user()
@@ -520,7 +527,7 @@ void vtime_account(struct task_struct *tsk)
 EXPORT_SYMBOL_GPL(vtime_account);
 #endif /* __ARCH_HAS_VTIME_ACCOUNT */
 
-#else
+#else /* !CONFIG_VIRT_CPU_ACCOUNTING */
 
 static cputime_t scale_utime(cputime_t utime, cputime_t rtime, cputime_t total)
 {
@@ -599,7 +606,7 @@ void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime
 	thread_group_cputime(p, &cputime);
 	cputime_adjust(&cputime, &p->signal->prev_cputime, ut, st);
 }
-#endif
+#endif /* !CONFIG_VIRT_CPU_ACCOUNTING */
 
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
 static DEFINE_PER_CPU(unsigned long long, cputime_snap);
@@ -617,14 +624,23 @@ static cputime_t get_vtime_delta(void)
 
 void vtime_account_system(struct task_struct *tsk)
 {
-	cputime_t delta_cpu = get_vtime_delta();
+	cputime_t delta_cpu;
+
+	if (!vtime_accounting_enabled())
+		return;
 
+	delta_cpu = get_vtime_delta();
 	account_system_time(tsk, irq_count(), delta_cpu, cputime_to_scaled(delta_cpu));
 }
 
 void vtime_account_user(struct task_struct *tsk)
 {
-	cputime_t delta_cpu = get_vtime_delta();
+	cputime_t delta_cpu;
+
+	if (!vtime_accounting_enabled())
+		return;
+
+	delta_cpu = get_vtime_delta();
 
 	account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu));
 }
@@ -635,4 +651,9 @@ void vtime_account_idle(struct task_struct *tsk)
 
 	account_idle_time(delta_cpu);
 }
+
+bool vtime_accounting_enabled(void)
+{
+	return context_tracking_active();
+}
 #endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index d58e552d9fd1..46dfb6d94b1c 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -631,8 +631,11 @@ static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now)
 
 static void tick_nohz_account_idle_ticks(struct tick_sched *ts)
 {
-#ifndef CONFIG_VIRT_CPU_ACCOUNTING
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 	unsigned long ticks;
+
+	if (vtime_accounting_enabled())
+		return;
 	/*
 	 * We stopped the tick in idle. Update process times would miss the
 	 * time we slept as update_process_times does only a 1 tick
-- 
cgit v1.2.3


From 6fac4829ce0ef9b7f24369086ce5f0e9f38d37bc Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Tue, 13 Nov 2012 14:20:55 +0100
Subject: cputime: Use accessors to read task cputime stats

This is in preparation for the full dynticks feature. While
remotely reading the cputime of a task running in a full
dynticks CPU, we'll need to do some extra-computation. This
way we can account the time it spent tickless in userspace
since its last cputime snapshot.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Li Zhong <zhong@linux.vnet.ibm.com>
Cc: Namhyung Kim <namhyung.kim@lge.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
---
 arch/alpha/kernel/osf_sys.c |  6 ++++--
 arch/x86/kernel/apm_32.c    | 11 ++++++-----
 drivers/isdn/mISDN/stack.c  |  7 ++++++-
 fs/binfmt_elf.c             |  8 ++++++--
 fs/binfmt_elf_fdpic.c       |  7 +++++--
 fs/proc/array.c             |  4 ++--
 include/linux/sched.h       | 23 +++++++++++++++++++++++
 include/linux/tsacct_kern.h |  3 +++
 kernel/acct.c               |  6 ++++--
 kernel/cpu.c                |  4 +++-
 kernel/delayacct.c          |  7 +++++--
 kernel/exit.c               | 10 ++++++----
 kernel/posix-cpu-timers.c   | 28 ++++++++++++++++++++++------
 kernel/sched/cputime.c      | 13 +++++++------
 kernel/signal.c             | 12 ++++++++----
 kernel/tsacct.c             | 44 ++++++++++++++++++++++++++++++++++----------
 16 files changed, 144 insertions(+), 49 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index 14db93e4c8a8..dbc1760f418b 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -1139,6 +1139,7 @@ struct rusage32 {
 SYSCALL_DEFINE2(osf_getrusage, int, who, struct rusage32 __user *, ru)
 {
 	struct rusage32 r;
+	cputime_t utime, stime;
 
 	if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN)
 		return -EINVAL;
@@ -1146,8 +1147,9 @@ SYSCALL_DEFINE2(osf_getrusage, int, who, struct rusage32 __user *, ru)
 	memset(&r, 0, sizeof(r));
 	switch (who) {
 	case RUSAGE_SELF:
-		jiffies_to_timeval32(current->utime, &r.ru_utime);
-		jiffies_to_timeval32(current->stime, &r.ru_stime);
+		task_cputime(current, &utime, &stime);
+		jiffies_to_timeval32(utime, &r.ru_utime);
+		jiffies_to_timeval32(stime, &r.ru_stime);
 		r.ru_minflt = current->min_flt;
 		r.ru_majflt = current->maj_flt;
 		break;
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index d65464e43503..8d7012b7f402 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -899,6 +899,7 @@ static void apm_cpu_idle(void)
 	static int use_apm_idle; /* = 0 */
 	static unsigned int last_jiffies; /* = 0 */
 	static unsigned int last_stime; /* = 0 */
+	cputime_t stime;
 
 	int apm_idle_done = 0;
 	unsigned int jiffies_since_last_check = jiffies - last_jiffies;
@@ -906,23 +907,23 @@ static void apm_cpu_idle(void)
 
 	WARN_ONCE(1, "deprecated apm_cpu_idle will be deleted in 2012");
 recalc:
+	task_cputime(current, NULL, &stime);
 	if (jiffies_since_last_check > IDLE_CALC_LIMIT) {
 		use_apm_idle = 0;
-		last_jiffies = jiffies;
-		last_stime = current->stime;
 	} else if (jiffies_since_last_check > idle_period) {
 		unsigned int idle_percentage;
 
-		idle_percentage = current->stime - last_stime;
+		idle_percentage = stime - last_stime;
 		idle_percentage *= 100;
 		idle_percentage /= jiffies_since_last_check;
 		use_apm_idle = (idle_percentage > idle_threshold);
 		if (apm_info.forbid_idle)
 			use_apm_idle = 0;
-		last_jiffies = jiffies;
-		last_stime = current->stime;
 	}
 
+	last_jiffies = jiffies;
+	last_stime = stime;
+
 	bucket = IDLE_LEAKY_MAX;
 
 	while (!need_resched()) {
diff --git a/drivers/isdn/mISDN/stack.c b/drivers/isdn/mISDN/stack.c
index 5f21f629b7ae..deda591f70b9 100644
--- a/drivers/isdn/mISDN/stack.c
+++ b/drivers/isdn/mISDN/stack.c
@@ -18,6 +18,7 @@
 #include <linux/slab.h>
 #include <linux/mISDNif.h>
 #include <linux/kthread.h>
+#include <linux/sched.h>
 #include "core.h"
 
 static u_int	*debug;
@@ -202,6 +203,9 @@ static int
 mISDNStackd(void *data)
 {
 	struct mISDNstack *st = data;
+#ifdef MISDN_MSG_STATS
+	cputime_t utime, stime;
+#endif
 	int err = 0;
 
 	sigfillset(&current->blocked);
@@ -303,9 +307,10 @@ mISDNStackd(void *data)
 	       "msg %d sleep %d stopped\n",
 	       dev_name(&st->dev->dev), st->msg_cnt, st->sleep_cnt,
 	       st->stopped_cnt);
+	task_cputime(st->thread, &utime, &stime);
 	printk(KERN_DEBUG
 	       "mISDNStackd daemon for %s utime(%ld) stime(%ld)\n",
-	       dev_name(&st->dev->dev), st->thread->utime, st->thread->stime);
+	       dev_name(&st->dev->dev), utime, stime);
 	printk(KERN_DEBUG
 	       "mISDNStackd daemon for %s nvcsw(%ld) nivcsw(%ld)\n",
 	       dev_name(&st->dev->dev), st->thread->nvcsw, st->thread->nivcsw);
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 0c42cdbabecf..49d0b43458b7 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -33,6 +33,7 @@
 #include <linux/elf.h>
 #include <linux/utsname.h>
 #include <linux/coredump.h>
+#include <linux/sched.h>
 #include <asm/uaccess.h>
 #include <asm/param.h>
 #include <asm/page.h>
@@ -1320,8 +1321,11 @@ static void fill_prstatus(struct elf_prstatus *prstatus,
 		cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
 		cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
 	} else {
-		cputime_to_timeval(p->utime, &prstatus->pr_utime);
-		cputime_to_timeval(p->stime, &prstatus->pr_stime);
+		cputime_t utime, stime;
+
+		task_cputime(p, &utime, &stime);
+		cputime_to_timeval(utime, &prstatus->pr_utime);
+		cputime_to_timeval(stime, &prstatus->pr_stime);
 	}
 	cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
 	cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index dc84732e554f..cb240dd3b402 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1375,8 +1375,11 @@ static void fill_prstatus(struct elf_prstatus *prstatus,
 		cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
 		cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
 	} else {
-		cputime_to_timeval(p->utime, &prstatus->pr_utime);
-		cputime_to_timeval(p->stime, &prstatus->pr_stime);
+		cputime_t utime, stime;
+
+		task_cputime(p, &utime, &stime);
+		cputime_to_timeval(utime, &prstatus->pr_utime);
+		cputime_to_timeval(stime, &prstatus->pr_stime);
 	}
 	cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
 	cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 6a91e6ffbcbd..f7ed9ee46eb9 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -449,7 +449,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
 			do {
 				min_flt += t->min_flt;
 				maj_flt += t->maj_flt;
-				gtime += t->gtime;
+				gtime += task_gtime(t);
 				t = next_thread(t);
 			} while (t != task);
 
@@ -472,7 +472,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
 		min_flt = task->min_flt;
 		maj_flt = task->maj_flt;
 		task_cputime_adjusted(task, &utime, &stime);
-		gtime = task->gtime;
+		gtime = task_gtime(task);
 	}
 
 	/* scale priority and nice values from timeslices to -20..20 */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6fc8f45de4e9..a9c608b6154e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1792,6 +1792,29 @@ static inline void put_task_struct(struct task_struct *t)
 		__put_task_struct(t);
 }
 
+static inline cputime_t task_gtime(struct task_struct *t)
+{
+	return t->gtime;
+}
+
+static inline void task_cputime(struct task_struct *t,
+				cputime_t *utime, cputime_t *stime)
+{
+	if (utime)
+		*utime = t->utime;
+	if (stime)
+		*stime = t->stime;
+}
+
+static inline void task_cputime_scaled(struct task_struct *t,
+				       cputime_t *utimescaled,
+				       cputime_t *stimescaled)
+{
+	if (utimescaled)
+		*utimescaled = t->utimescaled;
+	if (stimescaled)
+		*stimescaled = t->stimescaled;
+}
 extern void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st);
 extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st);
 
diff --git a/include/linux/tsacct_kern.h b/include/linux/tsacct_kern.h
index 44893e5ec8f7..3251965bf4cc 100644
--- a/include/linux/tsacct_kern.h
+++ b/include/linux/tsacct_kern.h
@@ -23,12 +23,15 @@ static inline void bacct_add_tsk(struct user_namespace *user_ns,
 #ifdef CONFIG_TASK_XACCT
 extern void xacct_add_tsk(struct taskstats *stats, struct task_struct *p);
 extern void acct_update_integrals(struct task_struct *tsk);
+extern void acct_account_cputime(struct task_struct *tsk);
 extern void acct_clear_integrals(struct task_struct *tsk);
 #else
 static inline void xacct_add_tsk(struct taskstats *stats, struct task_struct *p)
 {}
 static inline void acct_update_integrals(struct task_struct *tsk)
 {}
+static inline void acct_account_cputime(struct task_struct *tsk)
+{}
 static inline void acct_clear_integrals(struct task_struct *tsk)
 {}
 #endif /* CONFIG_TASK_XACCT */
diff --git a/kernel/acct.c b/kernel/acct.c
index 051e071a06e7..e8b1627ab9c7 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -566,6 +566,7 @@ out:
 void acct_collect(long exitcode, int group_dead)
 {
 	struct pacct_struct *pacct = &current->signal->pacct;
+	cputime_t utime, stime;
 	unsigned long vsize = 0;
 
 	if (group_dead && current->mm) {
@@ -593,8 +594,9 @@ void acct_collect(long exitcode, int group_dead)
 		pacct->ac_flag |= ACORE;
 	if (current->flags & PF_SIGNALED)
 		pacct->ac_flag |= AXSIG;
-	pacct->ac_utime += current->utime;
-	pacct->ac_stime += current->stime;
+	task_cputime(current, &utime, &stime);
+	pacct->ac_utime += utime;
+	pacct->ac_stime += stime;
 	pacct->ac_minflt += current->min_flt;
 	pacct->ac_majflt += current->maj_flt;
 	spin_unlock_irq(&current->sighand->siglock);
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 3046a503242c..e5d5e8e1e030 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -224,11 +224,13 @@ void clear_tasks_mm_cpumask(int cpu)
 static inline void check_for_tasks(int cpu)
 {
 	struct task_struct *p;
+	cputime_t utime, stime;
 
 	write_lock_irq(&tasklist_lock);
 	for_each_process(p) {
+		task_cputime(p, &utime, &stime);
 		if (task_cpu(p) == cpu && p->state == TASK_RUNNING &&
-		    (p->utime || p->stime))
+		    (utime || stime))
 			printk(KERN_WARNING "Task %s (pid = %d) is on cpu %d "
 				"(state = %ld, flags = %x)\n",
 				p->comm, task_pid_nr(p), cpu,
diff --git a/kernel/delayacct.c b/kernel/delayacct.c
index 418b3f7053aa..d473988c1d0b 100644
--- a/kernel/delayacct.c
+++ b/kernel/delayacct.c
@@ -106,6 +106,7 @@ int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
 	unsigned long long t2, t3;
 	unsigned long flags;
 	struct timespec ts;
+	cputime_t utime, stime, stimescaled, utimescaled;
 
 	/* Though tsk->delays accessed later, early exit avoids
 	 * unnecessary returning of other data
@@ -114,12 +115,14 @@ int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
 		goto done;
 
 	tmp = (s64)d->cpu_run_real_total;
-	cputime_to_timespec(tsk->utime + tsk->stime, &ts);
+	task_cputime(tsk, &utime, &stime);
+	cputime_to_timespec(utime + stime, &ts);
 	tmp += timespec_to_ns(&ts);
 	d->cpu_run_real_total = (tmp < (s64)d->cpu_run_real_total) ? 0 : tmp;
 
 	tmp = (s64)d->cpu_scaled_run_real_total;
-	cputime_to_timespec(tsk->utimescaled + tsk->stimescaled, &ts);
+	task_cputime_scaled(tsk, &utimescaled, &stimescaled);
+	cputime_to_timespec(utimescaled + stimescaled, &ts);
 	tmp += timespec_to_ns(&ts);
 	d->cpu_scaled_run_real_total =
 		(tmp < (s64)d->cpu_scaled_run_real_total) ? 0 : tmp;
diff --git a/kernel/exit.c b/kernel/exit.c
index b4df21937216..7dd20408707c 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -85,6 +85,7 @@ static void __exit_signal(struct task_struct *tsk)
 	bool group_dead = thread_group_leader(tsk);
 	struct sighand_struct *sighand;
 	struct tty_struct *uninitialized_var(tty);
+	cputime_t utime, stime;
 
 	sighand = rcu_dereference_check(tsk->sighand,
 					lockdep_tasklist_lock_is_held());
@@ -123,9 +124,10 @@ static void __exit_signal(struct task_struct *tsk)
 		 * We won't ever get here for the group leader, since it
 		 * will have been the last reference on the signal_struct.
 		 */
-		sig->utime += tsk->utime;
-		sig->stime += tsk->stime;
-		sig->gtime += tsk->gtime;
+		task_cputime(tsk, &utime, &stime);
+		sig->utime += utime;
+		sig->stime += stime;
+		sig->gtime += task_gtime(tsk);
 		sig->min_flt += tsk->min_flt;
 		sig->maj_flt += tsk->maj_flt;
 		sig->nvcsw += tsk->nvcsw;
@@ -1092,7 +1094,7 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
 		sig = p->signal;
 		psig->cutime += tgutime + sig->cutime;
 		psig->cstime += tgstime + sig->cstime;
-		psig->cgtime += p->gtime + sig->gtime + sig->cgtime;
+		psig->cgtime += task_gtime(p) + sig->gtime + sig->cgtime;
 		psig->cmin_flt +=
 			p->min_flt + sig->min_flt + sig->cmin_flt;
 		psig->cmaj_flt +=
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index a278cad1d5d6..165d47698477 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -155,11 +155,19 @@ static void bump_cpu_timer(struct k_itimer *timer,
 
 static inline cputime_t prof_ticks(struct task_struct *p)
 {
-	return p->utime + p->stime;
+	cputime_t utime, stime;
+
+	task_cputime(p, &utime, &stime);
+
+	return utime + stime;
 }
 static inline cputime_t virt_ticks(struct task_struct *p)
 {
-	return p->utime;
+	cputime_t utime;
+
+	task_cputime(p, &utime, NULL);
+
+	return utime;
 }
 
 static int
@@ -471,18 +479,23 @@ static void cleanup_timers(struct list_head *head,
  */
 void posix_cpu_timers_exit(struct task_struct *tsk)
 {
+	cputime_t utime, stime;
+
 	add_device_randomness((const void*) &tsk->se.sum_exec_runtime,
 						sizeof(unsigned long long));
+	task_cputime(tsk, &utime, &stime);
 	cleanup_timers(tsk->cpu_timers,
-		       tsk->utime, tsk->stime, tsk->se.sum_exec_runtime);
+		       utime, stime, tsk->se.sum_exec_runtime);
 
 }
 void posix_cpu_timers_exit_group(struct task_struct *tsk)
 {
 	struct signal_struct *const sig = tsk->signal;
+	cputime_t utime, stime;
 
+	task_cputime(tsk, &utime, &stime);
 	cleanup_timers(tsk->signal->cpu_timers,
-		       tsk->utime + sig->utime, tsk->stime + sig->stime,
+		       utime + sig->utime, stime + sig->stime,
 		       tsk->se.sum_exec_runtime + sig->sum_sched_runtime);
 }
 
@@ -1226,11 +1239,14 @@ static inline int task_cputime_expired(const struct task_cputime *sample,
 static inline int fastpath_timer_check(struct task_struct *tsk)
 {
 	struct signal_struct *sig;
+	cputime_t utime, stime;
+
+	task_cputime(tsk, &utime, &stime);
 
 	if (!task_cputime_zero(&tsk->cputime_expires)) {
 		struct task_cputime task_sample = {
-			.utime = tsk->utime,
-			.stime = tsk->stime,
+			.utime = utime,
+			.stime = stime,
 			.sum_exec_runtime = tsk->se.sum_exec_runtime
 		};
 
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index e1939d38bf73..c533deaf06d5 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -164,7 +164,7 @@ void account_user_time(struct task_struct *p, cputime_t cputime,
 	task_group_account_field(p, index, (__force u64) cputime);
 
 	/* Account for user time used */
-	acct_update_integrals(p);
+	acct_account_cputime(p);
 }
 
 /*
@@ -214,7 +214,7 @@ void __account_system_time(struct task_struct *p, cputime_t cputime,
 	task_group_account_field(p, index, (__force u64) cputime);
 
 	/* Account for system time used */
-	acct_update_integrals(p);
+	acct_account_cputime(p);
 }
 
 /*
@@ -296,6 +296,7 @@ static __always_inline bool steal_account_process_tick(void)
 void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
 {
 	struct signal_struct *sig = tsk->signal;
+	cputime_t utime, stime;
 	struct task_struct *t;
 
 	times->utime = sig->utime;
@@ -309,8 +310,9 @@ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
 
 	t = tsk;
 	do {
-		times->utime += t->utime;
-		times->stime += t->stime;
+		task_cputime(tsk, &utime, &stime);
+		times->utime += utime;
+		times->stime += stime;
 		times->sum_exec_runtime += task_sched_runtime(t);
 	} while_each_thread(tsk, t);
 out:
@@ -588,11 +590,10 @@ static void cputime_adjust(struct task_cputime *curr,
 void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
 {
 	struct task_cputime cputime = {
-		.utime = p->utime,
-		.stime = p->stime,
 		.sum_exec_runtime = p->se.sum_exec_runtime,
 	};
 
+	task_cputime(p, &cputime.utime, &cputime.stime);
 	cputime_adjust(&cputime, &p->prev_cputime, ut, st);
 }
 
diff --git a/kernel/signal.c b/kernel/signal.c
index 372771e948c2..776a45a3661b 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1638,6 +1638,7 @@ bool do_notify_parent(struct task_struct *tsk, int sig)
 	unsigned long flags;
 	struct sighand_struct *psig;
 	bool autoreap = false;
+	cputime_t utime, stime;
 
 	BUG_ON(sig == -1);
 
@@ -1675,8 +1676,9 @@ bool do_notify_parent(struct task_struct *tsk, int sig)
 				       task_uid(tsk));
 	rcu_read_unlock();
 
-	info.si_utime = cputime_to_clock_t(tsk->utime + tsk->signal->utime);
-	info.si_stime = cputime_to_clock_t(tsk->stime + tsk->signal->stime);
+	task_cputime(tsk, &utime, &stime);
+	info.si_utime = cputime_to_clock_t(utime + tsk->signal->utime);
+	info.si_stime = cputime_to_clock_t(stime + tsk->signal->stime);
 
 	info.si_status = tsk->exit_code & 0x7f;
 	if (tsk->exit_code & 0x80)
@@ -1740,6 +1742,7 @@ static void do_notify_parent_cldstop(struct task_struct *tsk,
 	unsigned long flags;
 	struct task_struct *parent;
 	struct sighand_struct *sighand;
+	cputime_t utime, stime;
 
 	if (for_ptracer) {
 		parent = tsk->parent;
@@ -1758,8 +1761,9 @@ static void do_notify_parent_cldstop(struct task_struct *tsk,
 	info.si_uid = from_kuid_munged(task_cred_xxx(parent, user_ns), task_uid(tsk));
 	rcu_read_unlock();
 
-	info.si_utime = cputime_to_clock_t(tsk->utime);
-	info.si_stime = cputime_to_clock_t(tsk->stime);
+	task_cputime(tsk, &utime, &stime);
+	info.si_utime = cputime_to_clock_t(utime);
+	info.si_stime = cputime_to_clock_t(stime);
 
  	info.si_code = why;
  	switch (why) {
diff --git a/kernel/tsacct.c b/kernel/tsacct.c
index 625df0b44690..a1dd9a1b1327 100644
--- a/kernel/tsacct.c
+++ b/kernel/tsacct.c
@@ -32,6 +32,7 @@ void bacct_add_tsk(struct user_namespace *user_ns,
 {
 	const struct cred *tcred;
 	struct timespec uptime, ts;
+	cputime_t utime, stime, utimescaled, stimescaled;
 	u64 ac_etime;
 
 	BUILD_BUG_ON(TS_COMM_LEN < TASK_COMM_LEN);
@@ -65,10 +66,15 @@ void bacct_add_tsk(struct user_namespace *user_ns,
 	stats->ac_ppid	 = pid_alive(tsk) ?
 		task_tgid_nr_ns(rcu_dereference(tsk->real_parent), pid_ns) : 0;
 	rcu_read_unlock();
-	stats->ac_utime = cputime_to_usecs(tsk->utime);
-	stats->ac_stime = cputime_to_usecs(tsk->stime);
-	stats->ac_utimescaled = cputime_to_usecs(tsk->utimescaled);
-	stats->ac_stimescaled = cputime_to_usecs(tsk->stimescaled);
+
+	task_cputime(tsk, &utime, &stime);
+	stats->ac_utime = cputime_to_usecs(utime);
+	stats->ac_stime = cputime_to_usecs(stime);
+
+	task_cputime_scaled(tsk, &utimescaled, &stimescaled);
+	stats->ac_utimescaled = cputime_to_usecs(utimescaled);
+	stats->ac_stimescaled = cputime_to_usecs(stimescaled);
+
 	stats->ac_minflt = tsk->min_flt;
 	stats->ac_majflt = tsk->maj_flt;
 
@@ -115,11 +121,8 @@ void xacct_add_tsk(struct taskstats *stats, struct task_struct *p)
 #undef KB
 #undef MB
 
-/**
- * acct_update_integrals - update mm integral fields in task_struct
- * @tsk: task_struct for accounting
- */
-void acct_update_integrals(struct task_struct *tsk)
+static void __acct_update_integrals(struct task_struct *tsk,
+				    cputime_t utime, cputime_t stime)
 {
 	if (likely(tsk->mm)) {
 		cputime_t time, dtime;
@@ -128,7 +131,7 @@ void acct_update_integrals(struct task_struct *tsk)
 		u64 delta;
 
 		local_irq_save(flags);
-		time = tsk->stime + tsk->utime;
+		time = stime + utime;
 		dtime = time - tsk->acct_timexpd;
 		jiffies_to_timeval(cputime_to_jiffies(dtime), &value);
 		delta = value.tv_sec;
@@ -144,6 +147,27 @@ void acct_update_integrals(struct task_struct *tsk)
 	}
 }
 
+/**
+ * acct_update_integrals - update mm integral fields in task_struct
+ * @tsk: task_struct for accounting
+ */
+void acct_update_integrals(struct task_struct *tsk)
+{
+	cputime_t utime, stime;
+
+	task_cputime(tsk, &utime, &stime);
+	__acct_update_integrals(tsk, utime, stime);
+}
+
+/**
+ * acct_account_cputime - update mm integral after cputime update
+ * @tsk: task_struct for accounting
+ */
+void acct_account_cputime(struct task_struct *tsk)
+{
+	__acct_update_integrals(tsk, tsk->utime, tsk->stime);
+}
+
 /**
  * acct_clear_integrals - clear the mm integral fields in task_struct
  * @tsk: task_struct whose accounting fields are cleared
-- 
cgit v1.2.3


From c11f11fcbdb5be790c565aed46411486a7586afc Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Mon, 21 Jan 2013 00:50:22 +0100
Subject: kvm: Prepare to add generic guest entry/exit callbacks

Do some ground preparatory work before adding guest_enter()
and guest_exit() context tracking callbacks. Those will
be later used to read the guest cputime safely when we
run in full dynticks mode.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Gleb Natapov <gleb@redhat.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Li Zhong <zhong@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Namhyung Kim <namhyung.kim@lge.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
---
 arch/ia64/kernel/time.c    |  1 +
 arch/powerpc/kernel/time.c |  1 +
 include/linux/kvm_host.h   | 39 ++++++++++++++++++++++++++++++---------
 include/linux/vtime.h      |  2 --
 kernel/sched/cputime.c     | 10 ----------
 5 files changed, 32 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index a3a3f5a1cb3a..fbaac1afb844 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -136,6 +136,7 @@ void vtime_account_system(struct task_struct *tsk)
 
 	account_system_time(tsk, 0, delta, delta);
 }
+EXPORT_SYMBOL_GPL(vtime_account_system);
 
 void vtime_account_idle(struct task_struct *tsk)
 {
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 22c9b67f9983..2e04b37f67f9 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -347,6 +347,7 @@ void vtime_account_system(struct task_struct *tsk)
 	if (stolen)
 		account_steal_time(stolen);
 }
+EXPORT_SYMBOL_GPL(vtime_account_system);
 
 void vtime_account_idle(struct task_struct *tsk)
 {
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 2c497ab0d03d..4fe2396401da 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -22,6 +22,7 @@
 #include <linux/rcupdate.h>
 #include <linux/ratelimit.h>
 #include <linux/err.h>
+#include <linux/irqflags.h>
 #include <asm/signal.h>
 
 #include <linux/kvm.h>
@@ -740,15 +741,36 @@ static inline int kvm_deassign_device(struct kvm *kvm,
 }
 #endif /* CONFIG_IOMMU_API */
 
-static inline void kvm_guest_enter(void)
+static inline void guest_enter(void)
 {
-	BUG_ON(preemptible());
 	/*
 	 * This is running in ioctl context so we can avoid
 	 * the call to vtime_account() with its unnecessary idle check.
 	 */
-	vtime_account_system_irqsafe(current);
+	vtime_account_system(current);
 	current->flags |= PF_VCPU;
+}
+
+static inline void guest_exit(void)
+{
+	/*
+	 * This is running in ioctl context so we can avoid
+	 * the call to vtime_account() with its unnecessary idle check.
+	 */
+	vtime_account_system(current);
+	current->flags &= ~PF_VCPU;
+}
+
+static inline void kvm_guest_enter(void)
+{
+	unsigned long flags;
+
+	BUG_ON(preemptible());
+
+	local_irq_save(flags);
+	guest_enter();
+	local_irq_restore(flags);
+
 	/* KVM does not hold any references to rcu protected data when it
 	 * switches CPU into a guest mode. In fact switching to a guest mode
 	 * is very similar to exiting to userspase from rcu point of view. In
@@ -761,12 +783,11 @@ static inline void kvm_guest_enter(void)
 
 static inline void kvm_guest_exit(void)
 {
-	/*
-	 * This is running in ioctl context so we can avoid
-	 * the call to vtime_account() with its unnecessary idle check.
-	 */
-	vtime_account_system_irqsafe(current);
-	current->flags &= ~PF_VCPU;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	guest_exit();
+	local_irq_restore(flags);
 }
 
 /*
diff --git a/include/linux/vtime.h b/include/linux/vtime.h
index 5368af9bdf06..bb50c3ca0d79 100644
--- a/include/linux/vtime.h
+++ b/include/linux/vtime.h
@@ -6,7 +6,6 @@ struct task_struct;
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING
 extern void vtime_task_switch(struct task_struct *prev);
 extern void vtime_account_system(struct task_struct *tsk);
-extern void vtime_account_system_irqsafe(struct task_struct *tsk);
 extern void vtime_account_idle(struct task_struct *tsk);
 extern void vtime_account_user(struct task_struct *tsk);
 extern void vtime_account(struct task_struct *tsk);
@@ -20,7 +19,6 @@ static inline bool vtime_accounting_enabled(void) { return true; }
 #else /* !CONFIG_VIRT_CPU_ACCOUNTING */
 static inline void vtime_task_switch(struct task_struct *prev) { }
 static inline void vtime_account_system(struct task_struct *tsk) { }
-static inline void vtime_account_system_irqsafe(struct task_struct *tsk) { }
 static inline void vtime_account_user(struct task_struct *tsk) { }
 static inline void vtime_account(struct task_struct *tsk) { }
 static inline bool vtime_accounting_enabled(void) { return false; }
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index c533deaf06d5..a44ecdf809a1 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -465,16 +465,6 @@ void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime
 	*st = cputime.stime;
 }
 
-void vtime_account_system_irqsafe(struct task_struct *tsk)
-{
-	unsigned long flags;
-
-	local_irq_save(flags);
-	vtime_account_system(tsk);
-	local_irq_restore(flags);
-}
-EXPORT_SYMBOL_GPL(vtime_account_system_irqsafe);
-
 #ifndef __ARCH_HAS_VTIME_TASK_SWITCH
 void vtime_task_switch(struct task_struct *prev)
 {
-- 
cgit v1.2.3


From 6a61671bb2f3a1bd12cd17b8fca811a624782632 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Sun, 16 Dec 2012 20:00:34 +0100
Subject: cputime: Safely read cputime of full dynticks CPUs

While remotely reading the cputime of a task running in a
full dynticks CPU, the values stored in utime/stime fields
of struct task_struct may be stale. Its values may be those
of the last kernel <-> user transition time snapshot and
we need to add the tickless time spent since this snapshot.

To fix this, flush the cputime of the dynticks CPUs on
kernel <-> user transition and record the time / context
where we did this. Then on top of this snapshot and the current
time, perform the fixup on the reader side from task_times()
accessors.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Li Zhong <zhong@linux.vnet.ibm.com>
Cc: Namhyung Kim <namhyung.kim@lge.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
[fixed kvm module related build errors]
Signed-off-by: Sedat Dilek <sedat.dilek@gmail.com>
---
 arch/s390/kernel/vtime.c  |   6 +-
 include/linux/hardirq.h   |   4 +-
 include/linux/init_task.h |  11 +++
 include/linux/kvm_host.h  |  20 ++++-
 include/linux/sched.h     |  27 +++++--
 include/linux/vtime.h     |  47 ++++++-----
 kernel/context_tracking.c |  21 ++++-
 kernel/fork.c             |   6 ++
 kernel/sched/core.c       |   1 +
 kernel/sched/cputime.c    | 193 +++++++++++++++++++++++++++++++++++++++++++---
 kernel/softirq.c          |   6 +-
 11 files changed, 290 insertions(+), 52 deletions(-)

(limited to 'include/linux')

diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index e84b8b68444a..ce9cc5aa2033 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -127,7 +127,7 @@ void vtime_account_user(struct task_struct *tsk)
  * Update process times based on virtual cpu times stored by entry.S
  * to the lowcore fields user_timer, system_timer & steal_clock.
  */
-void vtime_account(struct task_struct *tsk)
+void vtime_account_irq_enter(struct task_struct *tsk)
 {
 	struct thread_info *ti = task_thread_info(tsk);
 	u64 timer, system;
@@ -145,10 +145,10 @@ void vtime_account(struct task_struct *tsk)
 
 	virt_timer_forward(system);
 }
-EXPORT_SYMBOL_GPL(vtime_account);
+EXPORT_SYMBOL_GPL(vtime_account_irq_enter);
 
 void vtime_account_system(struct task_struct *tsk)
-__attribute__((alias("vtime_account")));
+__attribute__((alias("vtime_account_irq_enter")));
 EXPORT_SYMBOL_GPL(vtime_account_system);
 
 void __kprobes vtime_stop_cpu(void)
diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index 624ef3f45c8e..7105d5cbb762 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -153,7 +153,7 @@ extern void rcu_nmi_exit(void);
  */
 #define __irq_enter()					\
 	do {						\
-		vtime_account_irq_enter(current);	\
+		account_irq_enter_time(current);	\
 		add_preempt_count(HARDIRQ_OFFSET);	\
 		trace_hardirq_enter();			\
 	} while (0)
@@ -169,7 +169,7 @@ extern void irq_enter(void);
 #define __irq_exit()					\
 	do {						\
 		trace_hardirq_exit();			\
-		vtime_account_irq_exit(current);	\
+		account_irq_exit_time(current);		\
 		sub_preempt_count(HARDIRQ_OFFSET);	\
 	} while (0)
 
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 6d087c5f57f7..cc898b871cef 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -10,6 +10,7 @@
 #include <linux/pid_namespace.h>
 #include <linux/user_namespace.h>
 #include <linux/securebits.h>
+#include <linux/seqlock.h>
 #include <net/net_namespace.h>
 
 #ifdef CONFIG_SMP
@@ -141,6 +142,15 @@ extern struct task_group root_task_group;
 # define INIT_PERF_EVENTS(tsk)
 #endif
 
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
+# define INIT_VTIME(tsk)						\
+	.vtime_seqlock = __SEQLOCK_UNLOCKED(tsk.vtime_seqlock),	\
+	.vtime_snap = 0,				\
+	.vtime_snap_whence = VTIME_SYS,
+#else
+# define INIT_VTIME(tsk)
+#endif
+
 #define INIT_TASK_COMM "swapper"
 
 /*
@@ -210,6 +220,7 @@ extern struct task_group root_task_group;
 	INIT_TRACE_RECURSION						\
 	INIT_TASK_RCU_PREEMPT(tsk)					\
 	INIT_CPUSET_SEQ							\
+	INIT_VTIME(tsk)							\
 }
 
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 4fe2396401da..b7996a768eb2 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -741,7 +741,7 @@ static inline int kvm_deassign_device(struct kvm *kvm,
 }
 #endif /* CONFIG_IOMMU_API */
 
-static inline void guest_enter(void)
+static inline void __guest_enter(void)
 {
 	/*
 	 * This is running in ioctl context so we can avoid
@@ -751,7 +751,7 @@ static inline void guest_enter(void)
 	current->flags |= PF_VCPU;
 }
 
-static inline void guest_exit(void)
+static inline void __guest_exit(void)
 {
 	/*
 	 * This is running in ioctl context so we can avoid
@@ -761,6 +761,22 @@ static inline void guest_exit(void)
 	current->flags &= ~PF_VCPU;
 }
 
+#ifdef CONFIG_CONTEXT_TRACKING
+extern void guest_enter(void);
+extern void guest_exit(void);
+
+#else /* !CONFIG_CONTEXT_TRACKING */
+static inline void guest_enter(void)
+{
+	__guest_enter();
+}
+
+static inline void guest_exit(void)
+{
+	__guest_exit();
+}
+#endif /* !CONFIG_CONTEXT_TRACKING */
+
 static inline void kvm_guest_enter(void)
 {
 	unsigned long flags;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index a9c608b6154e..a9fa5145e1a7 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1367,6 +1367,15 @@ struct task_struct {
 	cputime_t gtime;
 #ifndef CONFIG_VIRT_CPU_ACCOUNTING
 	struct cputime prev_cputime;
+#endif
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
+	seqlock_t vtime_seqlock;
+	unsigned long long vtime_snap;
+	enum {
+		VTIME_SLEEPING = 0,
+		VTIME_USER,
+		VTIME_SYS,
+	} vtime_snap_whence;
 #endif
 	unsigned long nvcsw, nivcsw; /* context switch counts */
 	struct timespec start_time; 		/* monotonic time */
@@ -1792,11 +1801,13 @@ static inline void put_task_struct(struct task_struct *t)
 		__put_task_struct(t);
 }
 
-static inline cputime_t task_gtime(struct task_struct *t)
-{
-	return t->gtime;
-}
-
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
+extern void task_cputime(struct task_struct *t,
+			 cputime_t *utime, cputime_t *stime);
+extern void task_cputime_scaled(struct task_struct *t,
+				cputime_t *utimescaled, cputime_t *stimescaled);
+extern cputime_t task_gtime(struct task_struct *t);
+#else
 static inline void task_cputime(struct task_struct *t,
 				cputime_t *utime, cputime_t *stime)
 {
@@ -1815,6 +1826,12 @@ static inline void task_cputime_scaled(struct task_struct *t,
 	if (stimescaled)
 		*stimescaled = t->stimescaled;
 }
+
+static inline cputime_t task_gtime(struct task_struct *t)
+{
+	return t->gtime;
+}
+#endif
 extern void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st);
 extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st);
 
diff --git a/include/linux/vtime.h b/include/linux/vtime.h
index bb50c3ca0d79..71a5782d8c59 100644
--- a/include/linux/vtime.h
+++ b/include/linux/vtime.h
@@ -8,35 +8,44 @@ extern void vtime_task_switch(struct task_struct *prev);
 extern void vtime_account_system(struct task_struct *tsk);
 extern void vtime_account_idle(struct task_struct *tsk);
 extern void vtime_account_user(struct task_struct *tsk);
-extern void vtime_account(struct task_struct *tsk);
+extern void vtime_account_irq_enter(struct task_struct *tsk);
 
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
-extern bool vtime_accounting_enabled(void);
-#else
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 static inline bool vtime_accounting_enabled(void) { return true; }
 #endif
 
 #else /* !CONFIG_VIRT_CPU_ACCOUNTING */
+
 static inline void vtime_task_switch(struct task_struct *prev) { }
 static inline void vtime_account_system(struct task_struct *tsk) { }
 static inline void vtime_account_user(struct task_struct *tsk) { }
-static inline void vtime_account(struct task_struct *tsk) { }
+static inline void vtime_account_irq_enter(struct task_struct *tsk) { }
 static inline bool vtime_accounting_enabled(void) { return false; }
 #endif
 
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
-static inline void arch_vtime_task_switch(struct task_struct *tsk) { }
-static inline void vtime_user_enter(struct task_struct *tsk)
-{
-	vtime_account_system(tsk);
-}
+extern void arch_vtime_task_switch(struct task_struct *tsk);
+extern void vtime_account_irq_exit(struct task_struct *tsk);
+extern bool vtime_accounting_enabled(void);
+extern void vtime_user_enter(struct task_struct *tsk);
 static inline void vtime_user_exit(struct task_struct *tsk)
 {
 	vtime_account_user(tsk);
 }
+extern void vtime_guest_enter(struct task_struct *tsk);
+extern void vtime_guest_exit(struct task_struct *tsk);
+extern void vtime_init_idle(struct task_struct *tsk);
 #else
+static inline void vtime_account_irq_exit(struct task_struct *tsk)
+{
+	/* On hard|softirq exit we always account to hard|softirq cputime */
+	vtime_account_system(tsk);
+}
 static inline void vtime_user_enter(struct task_struct *tsk) { }
 static inline void vtime_user_exit(struct task_struct *tsk) { }
+static inline void vtime_guest_enter(struct task_struct *tsk) { }
+static inline void vtime_guest_exit(struct task_struct *tsk) { }
+static inline void vtime_init_idle(struct task_struct *tsk) { }
 #endif
 
 #ifdef CONFIG_IRQ_TIME_ACCOUNTING
@@ -45,25 +54,15 @@ extern void irqtime_account_irq(struct task_struct *tsk);
 static inline void irqtime_account_irq(struct task_struct *tsk) { }
 #endif
 
-static inline void vtime_account_irq_enter(struct task_struct *tsk)
+static inline void account_irq_enter_time(struct task_struct *tsk)
 {
-	/*
-	 * Hardirq can interrupt idle task anytime. So we need vtime_account()
-	 * that performs the idle check in CONFIG_VIRT_CPU_ACCOUNTING.
-	 * Softirq can also interrupt idle task directly if it calls
-	 * local_bh_enable(). Such case probably don't exist but we never know.
-	 * Ksoftirqd is not concerned because idle time is flushed on context
-	 * switch. Softirqs in the end of hardirqs are also not a problem because
-	 * the idle time is flushed on hardirq time already.
-	 */
-	vtime_account(tsk);
+	vtime_account_irq_enter(tsk);
 	irqtime_account_irq(tsk);
 }
 
-static inline void vtime_account_irq_exit(struct task_struct *tsk)
+static inline void account_irq_exit_time(struct task_struct *tsk)
 {
-	/* On hard|softirq exit we always account to hard|softirq cputime */
-	vtime_account_system(tsk);
+	vtime_account_irq_exit(tsk);
 	irqtime_account_irq(tsk);
 }
 
diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c
index 9002e92e6372..74f68f4dc6c2 100644
--- a/kernel/context_tracking.c
+++ b/kernel/context_tracking.c
@@ -1,8 +1,9 @@
 #include <linux/context_tracking.h>
+#include <linux/kvm_host.h>
 #include <linux/rcupdate.h>
 #include <linux/sched.h>
 #include <linux/hardirq.h>
-
+#include <linux/export.h>
 
 DEFINE_PER_CPU(struct context_tracking, context_tracking) = {
 #ifdef CONFIG_CONTEXT_TRACKING_FORCE
@@ -61,6 +62,24 @@ void user_exit(void)
 	local_irq_restore(flags);
 }
 
+void guest_enter(void)
+{
+	if (vtime_accounting_enabled())
+		vtime_guest_enter(current);
+	else
+		__guest_enter();
+}
+EXPORT_SYMBOL_GPL(guest_enter);
+
+void guest_exit(void)
+{
+	if (vtime_accounting_enabled())
+		vtime_guest_exit(current);
+	else
+		__guest_exit();
+}
+EXPORT_SYMBOL_GPL(guest_exit);
+
 void context_tracking_task_switch(struct task_struct *prev,
 			     struct task_struct *next)
 {
diff --git a/kernel/fork.c b/kernel/fork.c
index 65ca6d27f24e..e68a95b4cf26 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1233,6 +1233,12 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 #ifndef CONFIG_VIRT_CPU_ACCOUNTING
 	p->prev_cputime.utime = p->prev_cputime.stime = 0;
 #endif
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
+	seqlock_init(&p->vtime_seqlock);
+	p->vtime_snap = 0;
+	p->vtime_snap_whence = VTIME_SLEEPING;
+#endif
+
 #if defined(SPLIT_RSS_COUNTING)
 	memset(&p->rss_stat, 0, sizeof(p->rss_stat));
 #endif
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 257002c13bb0..261022d7e79d 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4666,6 +4666,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
 	 */
 	idle->sched_class = &idle_sched_class;
 	ftrace_graph_init_idle_task(idle, cpu);
+	vtime_init_idle(idle);
 #if defined(CONFIG_SMP)
 	sprintf(idle->comm, "%s/%d", INIT_TASK_COMM, cpu);
 #endif
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index a44ecdf809a1..082e05d915b4 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -492,7 +492,7 @@ void vtime_task_switch(struct task_struct *prev)
  * vtime_account().
  */
 #ifndef __ARCH_HAS_VTIME_ACCOUNT
-void vtime_account(struct task_struct *tsk)
+void vtime_account_irq_enter(struct task_struct *tsk)
 {
 	if (!vtime_accounting_enabled())
 		return;
@@ -516,7 +516,7 @@ void vtime_account(struct task_struct *tsk)
 	}
 	vtime_account_system(tsk);
 }
-EXPORT_SYMBOL_GPL(vtime_account);
+EXPORT_SYMBOL_GPL(vtime_account_irq_enter);
 #endif /* __ARCH_HAS_VTIME_ACCOUNT */
 
 #else /* !CONFIG_VIRT_CPU_ACCOUNTING */
@@ -600,28 +600,55 @@ void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime
 #endif /* !CONFIG_VIRT_CPU_ACCOUNTING */
 
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
-static DEFINE_PER_CPU(unsigned long long, cputime_snap);
+static unsigned long long vtime_delta(struct task_struct *tsk)
+{
+	unsigned long long clock;
+
+	clock = sched_clock();
+	if (clock < tsk->vtime_snap)
+		return 0;
 
-static cputime_t get_vtime_delta(void)
+	return clock - tsk->vtime_snap;
+}
+
+static cputime_t get_vtime_delta(struct task_struct *tsk)
 {
-	unsigned long long delta;
+	unsigned long long delta = vtime_delta(tsk);
 
-	delta = sched_clock() - __this_cpu_read(cputime_snap);
-	__this_cpu_add(cputime_snap, delta);
+	WARN_ON_ONCE(tsk->vtime_snap_whence == VTIME_SLEEPING);
+	tsk->vtime_snap += delta;
 
 	/* CHECKME: always safe to convert nsecs to cputime? */
 	return nsecs_to_cputime(delta);
 }
 
+static void __vtime_account_system(struct task_struct *tsk)
+{
+	cputime_t delta_cpu = get_vtime_delta(tsk);
+
+	account_system_time(tsk, irq_count(), delta_cpu, cputime_to_scaled(delta_cpu));
+}
+
 void vtime_account_system(struct task_struct *tsk)
 {
-	cputime_t delta_cpu;
+	if (!vtime_accounting_enabled())
+		return;
+
+	write_seqlock(&tsk->vtime_seqlock);
+	__vtime_account_system(tsk);
+	write_sequnlock(&tsk->vtime_seqlock);
+}
 
+void vtime_account_irq_exit(struct task_struct *tsk)
+{
 	if (!vtime_accounting_enabled())
 		return;
 
-	delta_cpu = get_vtime_delta();
-	account_system_time(tsk, irq_count(), delta_cpu, cputime_to_scaled(delta_cpu));
+	write_seqlock(&tsk->vtime_seqlock);
+	if (context_tracking_in_user())
+		tsk->vtime_snap_whence = VTIME_USER;
+	__vtime_account_system(tsk);
+	write_sequnlock(&tsk->vtime_seqlock);
 }
 
 void vtime_account_user(struct task_struct *tsk)
@@ -631,14 +658,44 @@ void vtime_account_user(struct task_struct *tsk)
 	if (!vtime_accounting_enabled())
 		return;
 
-	delta_cpu = get_vtime_delta();
+	delta_cpu = get_vtime_delta(tsk);
 
+	write_seqlock(&tsk->vtime_seqlock);
+	tsk->vtime_snap_whence = VTIME_SYS;
 	account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu));
+	write_sequnlock(&tsk->vtime_seqlock);
+}
+
+void vtime_user_enter(struct task_struct *tsk)
+{
+	if (!vtime_accounting_enabled())
+		return;
+
+	write_seqlock(&tsk->vtime_seqlock);
+	tsk->vtime_snap_whence = VTIME_USER;
+	__vtime_account_system(tsk);
+	write_sequnlock(&tsk->vtime_seqlock);
+}
+
+void vtime_guest_enter(struct task_struct *tsk)
+{
+	write_seqlock(&tsk->vtime_seqlock);
+	__vtime_account_system(tsk);
+	current->flags |= PF_VCPU;
+	write_sequnlock(&tsk->vtime_seqlock);
+}
+
+void vtime_guest_exit(struct task_struct *tsk)
+{
+	write_seqlock(&tsk->vtime_seqlock);
+	__vtime_account_system(tsk);
+	current->flags &= ~PF_VCPU;
+	write_sequnlock(&tsk->vtime_seqlock);
 }
 
 void vtime_account_idle(struct task_struct *tsk)
 {
-	cputime_t delta_cpu = get_vtime_delta();
+	cputime_t delta_cpu = get_vtime_delta(tsk);
 
 	account_idle_time(delta_cpu);
 }
@@ -647,4 +704,116 @@ bool vtime_accounting_enabled(void)
 {
 	return context_tracking_active();
 }
+
+void arch_vtime_task_switch(struct task_struct *prev)
+{
+	write_seqlock(&prev->vtime_seqlock);
+	prev->vtime_snap_whence = VTIME_SLEEPING;
+	write_sequnlock(&prev->vtime_seqlock);
+
+	write_seqlock(&current->vtime_seqlock);
+	current->vtime_snap_whence = VTIME_SYS;
+	current->vtime_snap = sched_clock();
+	write_sequnlock(&current->vtime_seqlock);
+}
+
+void vtime_init_idle(struct task_struct *t)
+{
+	unsigned long flags;
+
+	write_seqlock_irqsave(&t->vtime_seqlock, flags);
+	t->vtime_snap_whence = VTIME_SYS;
+	t->vtime_snap = sched_clock();
+	write_sequnlock_irqrestore(&t->vtime_seqlock, flags);
+}
+
+cputime_t task_gtime(struct task_struct *t)
+{
+	unsigned long flags;
+	unsigned int seq;
+	cputime_t gtime;
+
+	do {
+		seq = read_seqbegin_irqsave(&t->vtime_seqlock, flags);
+
+		gtime = t->gtime;
+		if (t->flags & PF_VCPU)
+			gtime += vtime_delta(t);
+
+	} while (read_seqretry_irqrestore(&t->vtime_seqlock, seq, flags));
+
+	return gtime;
+}
+
+/*
+ * Fetch cputime raw values from fields of task_struct and
+ * add up the pending nohz execution time since the last
+ * cputime snapshot.
+ */
+static void
+fetch_task_cputime(struct task_struct *t,
+		   cputime_t *u_dst, cputime_t *s_dst,
+		   cputime_t *u_src, cputime_t *s_src,
+		   cputime_t *udelta, cputime_t *sdelta)
+{
+	unsigned long flags;
+	unsigned int seq;
+	unsigned long long delta;
+
+	do {
+		*udelta = 0;
+		*sdelta = 0;
+
+		seq = read_seqbegin_irqsave(&t->vtime_seqlock, flags);
+
+		if (u_dst)
+			*u_dst = *u_src;
+		if (s_dst)
+			*s_dst = *s_src;
+
+		/* Task is sleeping, nothing to add */
+		if (t->vtime_snap_whence == VTIME_SLEEPING ||
+		    is_idle_task(t))
+			continue;
+
+		delta = vtime_delta(t);
+
+		/*
+		 * Task runs either in user or kernel space, add pending nohz time to
+		 * the right place.
+		 */
+		if (t->vtime_snap_whence == VTIME_USER || t->flags & PF_VCPU) {
+			*udelta = delta;
+		} else {
+			if (t->vtime_snap_whence == VTIME_SYS)
+				*sdelta = delta;
+		}
+	} while (read_seqretry_irqrestore(&t->vtime_seqlock, seq, flags));
+}
+
+
+void task_cputime(struct task_struct *t, cputime_t *utime, cputime_t *stime)
+{
+	cputime_t udelta, sdelta;
+
+	fetch_task_cputime(t, utime, stime, &t->utime,
+			   &t->stime, &udelta, &sdelta);
+	if (utime)
+		*utime += udelta;
+	if (stime)
+		*stime += sdelta;
+}
+
+void task_cputime_scaled(struct task_struct *t,
+			 cputime_t *utimescaled, cputime_t *stimescaled)
+{
+	cputime_t udelta, sdelta;
+
+	fetch_task_cputime(t, utimescaled, stimescaled,
+			   &t->utimescaled, &t->stimescaled, &udelta, &sdelta);
+	if (utimescaled)
+		*utimescaled += cputime_to_scaled(udelta);
+	if (stimescaled)
+		*stimescaled += cputime_to_scaled(sdelta);
+}
 #endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */
diff --git a/kernel/softirq.c b/kernel/softirq.c
index ed567babe789..f5cc25f147a6 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -221,7 +221,7 @@ asmlinkage void __do_softirq(void)
 	current->flags &= ~PF_MEMALLOC;
 
 	pending = local_softirq_pending();
-	vtime_account_irq_enter(current);
+	account_irq_enter_time(current);
 
 	__local_bh_disable((unsigned long)__builtin_return_address(0),
 				SOFTIRQ_OFFSET);
@@ -272,7 +272,7 @@ restart:
 
 	lockdep_softirq_exit();
 
-	vtime_account_irq_exit(current);
+	account_irq_exit_time(current);
 	__local_bh_enable(SOFTIRQ_OFFSET);
 	tsk_restore_flags(current, old_flags, PF_MEMALLOC);
 }
@@ -341,7 +341,7 @@ static inline void invoke_softirq(void)
  */
 void irq_exit(void)
 {
-	vtime_account_irq_exit(current);
+	account_irq_exit_time(current);
 	trace_hardirq_exit();
 	sub_preempt_count(IRQ_EXIT_OFFSET);
 	if (!in_interrupt() && local_softirq_pending())
-- 
cgit v1.2.3


From cef401de7be8c4e155c6746bfccf721a4fa5fab9 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 25 Jan 2013 20:34:37 +0000
Subject: net: fix possible wrong checksum generation

Pravin Shelar mentioned that GSO could potentially generate
wrong TX checksum if skb has fragments that are overwritten
by the user between the checksum computation and transmit.

He suggested to linearize skbs but this extra copy can be
avoided for normal tcp skbs cooked by tcp_sendmsg().

This patch introduces a new SKB_GSO_SHARED_FRAG flag, set
in skb_shinfo(skb)->gso_type if at least one frag can be
modified by the user.

Typical sources of such possible overwrites are {vm}splice(),
sendfile(), and macvtap/tun/virtio_net drivers.

Tested:

$ netperf -H 7.7.8.84
MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to
7.7.8.84 () port 0 AF_INET
Recv   Send    Send
Socket Socket  Message  Elapsed
Size   Size    Size     Time     Throughput
bytes  bytes   bytes    secs.    10^6bits/sec

 87380  16384  16384    10.00    3959.52

$ netperf -H 7.7.8.84 -t TCP_SENDFILE
TCP SENDFILE TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 7.7.8.84 ()
port 0 AF_INET
Recv   Send    Send
Socket Socket  Message  Elapsed
Size   Size    Size     Time     Throughput
bytes  bytes   bytes    secs.    10^6bits/sec

 87380  16384  16384    10.00    3216.80

Performance of the SENDFILE is impacted by the extra allocation and
copy, and because we use order-0 pages, while the TCP_STREAM uses
bigger pages.

Reported-by: Pravin Shelar <pshelar@nicira.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/macvtap.c    |  3 ++-
 drivers/net/tun.c        | 12 ++++++++----
 drivers/net/virtio_net.c | 12 ++++++++----
 include/linux/skbuff.h   | 19 +++++++++++++++++++
 net/core/dev.c           |  9 +++++++++
 net/core/skbuff.c        |  4 ++++
 net/ipv4/af_inet.c       |  1 +
 net/ipv4/ip_gre.c        |  4 +++-
 net/ipv4/ipip.c          |  4 +++-
 net/ipv4/tcp.c           |  3 +++
 net/ipv4/tcp_input.c     |  4 ++--
 net/ipv4/tcp_output.c    |  4 ++--
 net/ipv6/ip6_offload.c   |  1 +
 13 files changed, 65 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
index 0f0f9ce3a776..b181dfb3d6d6 100644
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -543,6 +543,7 @@ static int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from,
 		skb->data_len += len;
 		skb->len += len;
 		skb->truesize += truesize;
+		skb_shinfo(skb)->gso_type |= SKB_GSO_SHARED_FRAG;
 		atomic_add(truesize, &skb->sk->sk_wmem_alloc);
 		while (len) {
 			int off = base & ~PAGE_MASK;
@@ -598,7 +599,7 @@ static int macvtap_skb_from_vnet_hdr(struct sk_buff *skb,
 
 	if (vnet_hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
 		skb_shinfo(skb)->gso_size = vnet_hdr->gso_size;
-		skb_shinfo(skb)->gso_type = gso_type;
+		skb_shinfo(skb)->gso_type |= gso_type;
 
 		/* Header must be checked, and gso_segs computed. */
 		skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index c81680dc10eb..293ce8dfc9e6 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1005,6 +1005,7 @@ static int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from,
 		skb->data_len += len;
 		skb->len += len;
 		skb->truesize += truesize;
+		skb_shinfo(skb)->gso_type |= SKB_GSO_SHARED_FRAG;
 		atomic_add(truesize, &skb->sk->sk_wmem_alloc);
 		while (len) {
 			int off = base & ~PAGE_MASK;
@@ -1150,16 +1151,18 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
 	}
 
 	if (gso.gso_type != VIRTIO_NET_HDR_GSO_NONE) {
+		unsigned short gso_type = 0;
+
 		pr_debug("GSO!\n");
 		switch (gso.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
 		case VIRTIO_NET_HDR_GSO_TCPV4:
-			skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
+			gso_type = SKB_GSO_TCPV4;
 			break;
 		case VIRTIO_NET_HDR_GSO_TCPV6:
-			skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
+			gso_type = SKB_GSO_TCPV6;
 			break;
 		case VIRTIO_NET_HDR_GSO_UDP:
-			skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
+			gso_type = SKB_GSO_UDP;
 			break;
 		default:
 			tun->dev->stats.rx_frame_errors++;
@@ -1168,9 +1171,10 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
 		}
 
 		if (gso.gso_type & VIRTIO_NET_HDR_GSO_ECN)
-			skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
+			gso_type |= SKB_GSO_TCP_ECN;
 
 		skb_shinfo(skb)->gso_size = gso.gso_size;
+		skb_shinfo(skb)->gso_type |= gso_type;
 		if (skb_shinfo(skb)->gso_size == 0) {
 			tun->dev->stats.rx_frame_errors++;
 			kfree_skb(skb);
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 701408a1ded6..58914c8ea68f 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -220,6 +220,7 @@ static void set_skb_frag(struct sk_buff *skb, struct page *page,
 	skb->len += size;
 	skb->truesize += PAGE_SIZE;
 	skb_shinfo(skb)->nr_frags++;
+	skb_shinfo(skb)->gso_type |= SKB_GSO_SHARED_FRAG;
 	*len -= size;
 }
 
@@ -379,16 +380,18 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len)
 		 ntohs(skb->protocol), skb->len, skb->pkt_type);
 
 	if (hdr->hdr.gso_type != VIRTIO_NET_HDR_GSO_NONE) {
+		unsigned short gso_type = 0;
+
 		pr_debug("GSO!\n");
 		switch (hdr->hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
 		case VIRTIO_NET_HDR_GSO_TCPV4:
-			skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
+			gso_type = SKB_GSO_TCPV4;
 			break;
 		case VIRTIO_NET_HDR_GSO_UDP:
-			skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
+			gso_type = SKB_GSO_UDP;
 			break;
 		case VIRTIO_NET_HDR_GSO_TCPV6:
-			skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
+			gso_type = SKB_GSO_TCPV6;
 			break;
 		default:
 			net_warn_ratelimited("%s: bad gso type %u.\n",
@@ -397,7 +400,7 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len)
 		}
 
 		if (hdr->hdr.gso_type & VIRTIO_NET_HDR_GSO_ECN)
-			skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
+			gso_type |= SKB_GSO_TCP_ECN;
 
 		skb_shinfo(skb)->gso_size = hdr->hdr.gso_size;
 		if (skb_shinfo(skb)->gso_size == 0) {
@@ -405,6 +408,7 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len)
 			goto frame_err;
 		}
 
+		skb_shinfo(skb)->gso_type |= gso_type;
 		/* Header must be checked, and gso_segs computed. */
 		skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
 		skb_shinfo(skb)->gso_segs = 0;
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 8b2256e880e0..0259b719bebf 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -307,6 +307,13 @@ enum {
 	SKB_GSO_TCPV6 = 1 << 4,
 
 	SKB_GSO_FCOE = 1 << 5,
+
+	/* This indicates at least one fragment might be overwritten
+	 * (as in vmsplice(), sendfile() ...)
+	 * If we need to compute a TX checksum, we'll need to copy
+	 * all frags to avoid possible bad checksum
+	 */
+	SKB_GSO_SHARED_FRAG = 1 << 6,
 };
 
 #if BITS_PER_LONG > 32
@@ -2200,6 +2207,18 @@ static inline int skb_linearize(struct sk_buff *skb)
 	return skb_is_nonlinear(skb) ? __skb_linearize(skb) : 0;
 }
 
+/**
+ * skb_has_shared_frag - can any frag be overwritten
+ * @skb: buffer to test
+ *
+ * Return true if the skb has at least one frag that might be modified
+ * by an external entity (as in vmsplice()/sendfile())
+ */
+static inline bool skb_has_shared_frag(const struct sk_buff *skb)
+{
+	return skb_shinfo(skb)->gso_type & SKB_GSO_SHARED_FRAG;
+}
+
 /**
  *	skb_linearize_cow - make sure skb is linear and writable
  *	@skb: buffer to process
diff --git a/net/core/dev.c b/net/core/dev.c
index c69cd8721b28..a83375d3af72 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2271,6 +2271,15 @@ int skb_checksum_help(struct sk_buff *skb)
 		return -EINVAL;
 	}
 
+	/* Before computing a checksum, we should make sure no frag could
+	 * be modified by an external entity : checksum could be wrong.
+	 */
+	if (skb_has_shared_frag(skb)) {
+		ret = __skb_linearize(skb);
+		if (ret)
+			goto out;
+	}
+
 	offset = skb_checksum_start_offset(skb);
 	BUG_ON(offset >= skb_headlen(skb));
 	csum = skb_checksum(skb, offset, skb->len - offset, 0);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 2568c449fe36..bddc1dd2e7f2 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2340,6 +2340,8 @@ void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len)
 {
 	int pos = skb_headlen(skb);
 
+	skb_shinfo(skb1)->gso_type = skb_shinfo(skb)->gso_type;
+
 	if (len < pos)	/* Split line is inside header. */
 		skb_split_inside_header(skb, skb1, len, pos);
 	else		/* Second chunk has no header, nothing to copy. */
@@ -2845,6 +2847,8 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
 		skb_copy_from_linear_data_offset(skb, offset,
 						 skb_put(nskb, hsize), hsize);
 
+		skb_shinfo(nskb)->gso_type = skb_shinfo(skb)->gso_type;
+
 		while (pos < offset + len && i < nfrags) {
 			*frag = skb_shinfo(skb)->frags[i];
 			__skb_frag_ref(frag);
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 4b7053919976..49ddca31c4da 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1306,6 +1306,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
 		       SKB_GSO_UDP |
 		       SKB_GSO_DODGY |
 		       SKB_GSO_TCP_ECN |
+		       SKB_GSO_SHARED_FRAG |
 		       0)))
 		goto out;
 
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 303012adf9e6..af6be70821c4 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -738,7 +738,7 @@ drop:
 static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct ip_tunnel *tunnel = netdev_priv(dev);
-	const struct iphdr  *old_iph = ip_hdr(skb);
+	const struct iphdr  *old_iph;
 	const struct iphdr  *tiph;
 	struct flowi4 fl4;
 	u8     tos;
@@ -756,6 +756,8 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 	    skb_checksum_help(skb))
 		goto tx_error;
 
+	old_iph = ip_hdr(skb);
+
 	if (dev->type == ARPHRD_ETHER)
 		IPCB(skb)->flags = 0;
 
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 191fc24a745a..8f024d41eefa 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -472,7 +472,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 	__be16 df = tiph->frag_off;
 	struct rtable *rt;     			/* Route to the other host */
 	struct net_device *tdev;		/* Device to other host */
-	const struct iphdr  *old_iph = ip_hdr(skb);
+	const struct iphdr  *old_iph;
 	struct iphdr  *iph;			/* Our new IP header */
 	unsigned int max_headroom;		/* The extra header space needed */
 	__be32 dst = tiph->daddr;
@@ -486,6 +486,8 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 	    skb_checksum_help(skb))
 		goto tx_error;
 
+	old_iph = ip_hdr(skb);
+
 	if (tos & 1)
 		tos = old_iph->tos;
 
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 52271947a471..3ec1f69c5ceb 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -896,6 +896,8 @@ new_segment:
 			skb_fill_page_desc(skb, i, page, offset, copy);
 		}
 
+		skb_shinfo(skb)->gso_type |= SKB_GSO_SHARED_FRAG;
+
 		skb->len += copy;
 		skb->data_len += copy;
 		skb->truesize += copy;
@@ -3032,6 +3034,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
 			       SKB_GSO_DODGY |
 			       SKB_GSO_TCP_ECN |
 			       SKB_GSO_TCPV6 |
+			       SKB_GSO_SHARED_FRAG |
 			       0) ||
 			     !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))))
 			goto out;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 0905997e5873..492c7cfe1453 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1240,13 +1240,13 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
 	 */
 	if (!skb_shinfo(prev)->gso_size) {
 		skb_shinfo(prev)->gso_size = mss;
-		skb_shinfo(prev)->gso_type = sk->sk_gso_type;
+		skb_shinfo(prev)->gso_type |= sk->sk_gso_type;
 	}
 
 	/* CHECKME: To clear or not to clear? Mimics normal skb currently */
 	if (skb_shinfo(skb)->gso_segs <= 1) {
 		skb_shinfo(skb)->gso_size = 0;
-		skb_shinfo(skb)->gso_type = 0;
+		skb_shinfo(skb)->gso_type &= SKB_GSO_SHARED_FRAG;
 	}
 
 	/* Difference in this won't matter, both ACKed by the same cumul. ACK */
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 667a6adfccf8..367e2ec01da1 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1133,6 +1133,7 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
 static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb,
 				 unsigned int mss_now)
 {
+	skb_shinfo(skb)->gso_type &= SKB_GSO_SHARED_FRAG;
 	if (skb->len <= mss_now || !sk_can_gso(sk) ||
 	    skb->ip_summed == CHECKSUM_NONE) {
 		/* Avoid the costly divide in the normal
@@ -1140,11 +1141,10 @@ static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb,
 		 */
 		skb_shinfo(skb)->gso_segs = 1;
 		skb_shinfo(skb)->gso_size = 0;
-		skb_shinfo(skb)->gso_type = 0;
 	} else {
 		skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss_now);
 		skb_shinfo(skb)->gso_size = mss_now;
-		skb_shinfo(skb)->gso_type = sk->sk_gso_type;
+		skb_shinfo(skb)->gso_type |= sk->sk_gso_type;
 	}
 }
 
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index f26f0da7f095..d141fc32a2ea 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -100,6 +100,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
 		       SKB_GSO_DODGY |
 		       SKB_GSO_TCP_ECN |
 		       SKB_GSO_TCPV6 |
+		       SKB_GSO_SHARED_FRAG |
 		       0)))
 		goto out;
 
-- 
cgit v1.2.3


From 5afba62cc8a16716508605e02c1b02ee5f969184 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joro@8bytes.org>
Date: Wed, 26 Sep 2012 12:44:38 +0200
Subject: x86, msi: Use IRQ remapping specific setup_msi_irqs routine

Use seperate routines to setup MSI IRQs for both
irq_remapping_enabled cases.

Signed-off-by: Joerg Roedel <joro@8bytes.org>
Acked-by: Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/include/asm/irq_remapping.h |  12 ----
 arch/x86/include/asm/pci.h           |   3 +
 arch/x86/kernel/apic/io_apic.c       | 104 ++++----------------------------
 drivers/iommu/irq_remapping.c        | 112 ++++++++++++++++++++++++++++++++++-
 include/linux/irq.h                  |   3 +
 5 files changed, 125 insertions(+), 109 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h
index 5fb9bbbd2f14..0ee1e88bd17a 100644
--- a/arch/x86/include/asm/irq_remapping.h
+++ b/arch/x86/include/asm/irq_remapping.h
@@ -47,9 +47,6 @@ extern void free_remapped_irq(int irq);
 extern void compose_remapped_msi_msg(struct pci_dev *pdev,
 				     unsigned int irq, unsigned int dest,
 				     struct msi_msg *msg, u8 hpet_id);
-extern int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec);
-extern int msi_setup_remapped_irq(struct pci_dev *pdev, unsigned int irq,
-				  int index, int sub_handle);
 extern int setup_hpet_msi_remapped(unsigned int irq, unsigned int id);
 
 #else  /* CONFIG_IRQ_REMAP */
@@ -83,15 +80,6 @@ static inline void compose_remapped_msi_msg(struct pci_dev *pdev,
 					    struct msi_msg *msg, u8 hpet_id)
 {
 }
-static inline int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec)
-{
-	return -ENODEV;
-}
-static inline int msi_setup_remapped_irq(struct pci_dev *pdev, unsigned int irq,
-					 int index, int sub_handle)
-{
-	return -ENODEV;
-}
 static inline int setup_hpet_msi_remapped(unsigned int irq, unsigned int id)
 {
 	return -ENODEV;
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index dba7805176bf..c28fd02f4bf7 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -121,9 +121,12 @@ static inline void x86_restore_msi_irqs(struct pci_dev *dev, int irq)
 #define arch_teardown_msi_irq x86_teardown_msi_irq
 #define arch_restore_msi_irqs x86_restore_msi_irqs
 /* implemented in arch/x86/kernel/apic/io_apic. */
+struct msi_desc;
 int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type);
 void native_teardown_msi_irq(unsigned int irq);
 void native_restore_msi_irqs(struct pci_dev *dev, int irq);
+int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc,
+		  unsigned int irq_base, unsigned int irq_offset);
 /* default to the implementation in drivers/lib/msi.c */
 #define HAVE_DEFAULT_MSI_TEARDOWN_IRQS
 #define HAVE_DEFAULT_MSI_RESTORE_IRQS
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index e7b87630c13d..d4b045e018fb 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -3066,7 +3066,7 @@ void destroy_irq(unsigned int irq)
 	free_irq_at(irq, cfg);
 }
 
-static inline void destroy_irqs(unsigned int irq, unsigned int count)
+void destroy_irqs(unsigned int irq, unsigned int count)
 {
 	unsigned int i;
 
@@ -3165,8 +3165,8 @@ static struct irq_chip msi_chip = {
 	.irq_retrigger		= ioapic_retrigger_irq,
 };
 
-static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc,
-			 unsigned int irq_base, unsigned int irq_offset)
+int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc,
+		  unsigned int irq_base, unsigned int irq_offset)
 {
 	struct irq_chip *chip = &msi_chip;
 	struct msi_msg msg;
@@ -3198,44 +3198,28 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc,
 	return 0;
 }
 
-int setup_msix_irqs(struct pci_dev *dev, int nvec)
+int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 {
-	int node, ret, sub_handle, index = 0;
 	unsigned int irq, irq_want;
 	struct msi_desc *msidesc;
+	int node, ret;
+
+	/* Multiple MSI vectors only supported with interrupt remapping */
+	if (type == PCI_CAP_ID_MSI && nvec > 1)
+		return 1;
 
 	node = dev_to_node(&dev->dev);
 	irq_want = nr_irqs_gsi;
-	sub_handle = 0;
 	list_for_each_entry(msidesc, &dev->msi_list, list) {
 		irq = create_irq_nr(irq_want, node);
 		if (irq == 0)
 			return -ENOSPC;
+
 		irq_want = irq + 1;
-		if (!irq_remapping_enabled)
-			goto no_ir;
 
-		if (!sub_handle) {
-			/*
-			 * allocate the consecutive block of IRTE's
-			 * for 'nvec'
-			 */
-			index = msi_alloc_remapped_irq(dev, irq, nvec);
-			if (index < 0) {
-				ret = index;
-				goto error;
-			}
-		} else {
-			ret = msi_setup_remapped_irq(dev, irq, index,
-						     sub_handle);
-			if (ret < 0)
-				goto error;
-		}
-no_ir:
 		ret = setup_msi_irq(dev, msidesc, irq, 0);
 		if (ret < 0)
 			goto error;
-		sub_handle++;
 	}
 	return 0;
 
@@ -3244,74 +3228,6 @@ error:
 	return ret;
 }
 
-int setup_msi_irqs(struct pci_dev *dev, int nvec)
-{
-	int node, ret, sub_handle, index = 0;
-	unsigned int irq;
-	struct msi_desc *msidesc;
-
-	if (nvec > 1 && !irq_remapping_enabled)
-		return 1;
-
-	nvec = __roundup_pow_of_two(nvec);
-
-	WARN_ON(!list_is_singular(&dev->msi_list));
-	msidesc = list_entry(dev->msi_list.next, struct msi_desc, list);
-	WARN_ON(msidesc->irq);
-	WARN_ON(msidesc->msi_attrib.multiple);
-
-	node = dev_to_node(&dev->dev);
-	irq = __create_irqs(nr_irqs_gsi, nvec, node);
-	if (irq == 0)
-		return -ENOSPC;
-
-	if (!irq_remapping_enabled) {
-		ret = setup_msi_irq(dev, msidesc, irq, 0);
-		if (ret < 0)
-			goto error;
-		return 0;
-	}
-
-	msidesc->msi_attrib.multiple = ilog2(nvec);
-	for (sub_handle = 0; sub_handle < nvec; sub_handle++) {
-		if (!sub_handle) {
-			index = msi_alloc_remapped_irq(dev, irq, nvec);
-			if (index < 0) {
-				ret = index;
-				goto error;
-			}
-		} else {
-			ret = msi_setup_remapped_irq(dev, irq + sub_handle,
-						     index, sub_handle);
-			if (ret < 0)
-				goto error;
-		}
-		ret = setup_msi_irq(dev, msidesc, irq, sub_handle);
-		if (ret < 0)
-			goto error;
-	}
-	return 0;
-
-error:
-	destroy_irqs(irq, nvec);
-
-	/*
-	 * Restore altered MSI descriptor fields and prevent just destroyed
-	 * IRQs from tearing down again in default_teardown_msi_irqs()
-	 */
-	msidesc->irq = 0;
-	msidesc->msi_attrib.multiple = 0;
-
-	return ret;
-}
-
-int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
-{
-	if (type == PCI_CAP_ID_MSI)
-		return setup_msi_irqs(dev, nvec);
-	return setup_msix_irqs(dev, nvec);
-}
-
 void native_teardown_msi_irq(unsigned int irq)
 {
 	destroy_irq(irq);
diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c
index 0baad3b9ecba..20f04b67efd2 100644
--- a/drivers/iommu/irq_remapping.c
+++ b/drivers/iommu/irq_remapping.c
@@ -4,6 +4,8 @@
 #include <linux/cpumask.h>
 #include <linux/errno.h>
 #include <linux/msi.h>
+#include <linux/irq.h>
+#include <linux/pci.h>
 
 #include <asm/hw_irq.h>
 #include <asm/irq_remapping.h>
@@ -21,6 +23,10 @@ int no_x2apic_optout;
 
 static struct irq_remap_ops *remap_ops;
 
+static int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec);
+static int msi_setup_remapped_irq(struct pci_dev *pdev, unsigned int irq,
+				  int index, int sub_handle);
+
 static void irq_remapping_disable_io_apic(void)
 {
 	/*
@@ -34,9 +40,109 @@ static void irq_remapping_disable_io_apic(void)
 		disconnect_bsp_APIC(0);
 }
 
+static int do_setup_msi_irqs(struct pci_dev *dev, int nvec)
+{
+	int node, ret, sub_handle, index = 0;
+	unsigned int irq;
+	struct msi_desc *msidesc;
+
+	nvec = __roundup_pow_of_two(nvec);
+
+	WARN_ON(!list_is_singular(&dev->msi_list));
+	msidesc = list_entry(dev->msi_list.next, struct msi_desc, list);
+	WARN_ON(msidesc->irq);
+	WARN_ON(msidesc->msi_attrib.multiple);
+
+	node = dev_to_node(&dev->dev);
+	irq = __create_irqs(get_nr_irqs_gsi(), nvec, node);
+	if (irq == 0)
+		return -ENOSPC;
+
+	msidesc->msi_attrib.multiple = ilog2(nvec);
+	for (sub_handle = 0; sub_handle < nvec; sub_handle++) {
+		if (!sub_handle) {
+			index = msi_alloc_remapped_irq(dev, irq, nvec);
+			if (index < 0) {
+				ret = index;
+				goto error;
+			}
+		} else {
+			ret = msi_setup_remapped_irq(dev, irq + sub_handle,
+						     index, sub_handle);
+			if (ret < 0)
+				goto error;
+		}
+		ret = setup_msi_irq(dev, msidesc, irq, sub_handle);
+		if (ret < 0)
+			goto error;
+	}
+	return 0;
+
+error:
+	destroy_irqs(irq, nvec);
+
+	/*
+	 * Restore altered MSI descriptor fields and prevent just destroyed
+	 * IRQs from tearing down again in default_teardown_msi_irqs()
+	 */
+	msidesc->irq = 0;
+	msidesc->msi_attrib.multiple = 0;
+
+	return ret;
+}
+
+static int do_setup_msix_irqs(struct pci_dev *dev, int nvec)
+{
+	int node, ret, sub_handle, index = 0;
+	struct msi_desc *msidesc;
+	unsigned int irq;
+
+	node		= dev_to_node(&dev->dev);
+	irq		= get_nr_irqs_gsi();
+	sub_handle	= 0;
+
+	list_for_each_entry(msidesc, &dev->msi_list, list) {
+
+		irq = create_irq_nr(irq, node);
+		if (irq == 0)
+			return -1;
+
+		if (sub_handle == 0)
+			ret = index = msi_alloc_remapped_irq(dev, irq, nvec);
+		else
+			ret = msi_setup_remapped_irq(dev, irq, index, sub_handle);
+
+		if (ret < 0)
+			goto error;
+
+		ret = setup_msi_irq(dev, msidesc, irq, 0);
+		if (ret < 0)
+			goto error;
+
+		sub_handle += 1;
+		irq        += 1;
+	}
+
+	return 0;
+
+error:
+	destroy_irq(irq);
+	return ret;
+}
+
+static int irq_remapping_setup_msi_irqs(struct pci_dev *dev,
+					int nvec, int type)
+{
+	if (type == PCI_CAP_ID_MSI)
+		return do_setup_msi_irqs(dev, nvec);
+	else
+		return do_setup_msix_irqs(dev, nvec);
+}
+
 static void __init irq_remapping_modify_x86_ops(void)
 {
 	x86_io_apic_ops.disable		= irq_remapping_disable_io_apic;
+	x86_msi.setup_msi_irqs		= irq_remapping_setup_msi_irqs;
 	x86_msi.setup_hpet_msi		= setup_hpet_msi_remapped;
 }
 
@@ -186,7 +292,7 @@ void compose_remapped_msi_msg(struct pci_dev *pdev,
 	remap_ops->compose_msi_msg(pdev, irq, dest, msg, hpet_id);
 }
 
-int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec)
+static int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec)
 {
 	if (!remap_ops || !remap_ops->msi_alloc_irq)
 		return -ENODEV;
@@ -194,8 +300,8 @@ int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec)
 	return remap_ops->msi_alloc_irq(pdev, irq, nvec);
 }
 
-int msi_setup_remapped_irq(struct pci_dev *pdev, unsigned int irq,
-			   int index, int sub_handle)
+static int msi_setup_remapped_irq(struct pci_dev *pdev, unsigned int irq,
+				  int index, int sub_handle)
 {
 	if (!remap_ops || !remap_ops->msi_setup_irq)
 		return -ENODEV;
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 1eab99111e94..bc4e06611958 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -509,8 +509,11 @@ static inline void irq_set_percpu_devid_flags(unsigned int irq)
 
 /* Handle dynamic irq creation and destruction */
 extern unsigned int create_irq_nr(unsigned int irq_want, int node);
+extern unsigned int __create_irqs(unsigned int from, unsigned int count,
+				  int node);
 extern int create_irq(void);
 extern void destroy_irq(unsigned int irq);
+extern void destroy_irqs(unsigned int irq, unsigned int count);
 
 /*
  * Dynamic irq helper functions. Obsolete. Use irq_alloc_desc* and
-- 
cgit v1.2.3


From 2bf3440d7b8755f2627232e6a4c37efbbe053685 Mon Sep 17 00:00:00 2001
From: Oliver Hartkopp <socketcan@hartkopp.net>
Date: Mon, 28 Jan 2013 08:33:33 +0000
Subject: can: rework skb reserved data handling

Added accessor and skb_reserve helpers for struct can_skb_priv.
Removed pointless skb_headroom() check.

Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
CC: Marc Kleine-Budde <mkl@pengutronix.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/can/dev.c   |  4 ++--
 drivers/net/can/slcan.c |  4 ++--
 include/linux/can/skb.h | 10 ++++++++++
 net/can/bcm.c           |  8 ++++----
 net/can/gw.c            |  4 +---
 net/can/raw.c           |  4 ++--
 6 files changed, 21 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c
index 59ada082a994..f9cba4123c66 100644
--- a/drivers/net/can/dev.c
+++ b/drivers/net/can/dev.c
@@ -512,8 +512,8 @@ struct sk_buff *alloc_can_skb(struct net_device *dev, struct can_frame **cf)
 	skb->pkt_type = PACKET_BROADCAST;
 	skb->ip_summed = CHECKSUM_UNNECESSARY;
 
-	skb_reserve(skb, sizeof(struct can_skb_priv));
-	((struct can_skb_priv *)(skb->head))->ifindex = dev->ifindex;
+	can_skb_reserve(skb);
+	can_skb_prv(skb)->ifindex = dev->ifindex;
 
 	*cf = (struct can_frame *)skb_put(skb, sizeof(struct can_frame));
 	memset(*cf, 0, sizeof(struct can_frame));
diff --git a/drivers/net/can/slcan.c b/drivers/net/can/slcan.c
index e79a8d10e0fc..06b7e097d36e 100644
--- a/drivers/net/can/slcan.c
+++ b/drivers/net/can/slcan.c
@@ -195,8 +195,8 @@ static void slc_bump(struct slcan *sl)
 	skb->pkt_type = PACKET_BROADCAST;
 	skb->ip_summed = CHECKSUM_UNNECESSARY;
 
-	skb_reserve(skb, sizeof(struct can_skb_priv));
-	((struct can_skb_priv *)(skb->head))->ifindex = sl->dev->ifindex;
+	can_skb_reserve(skb);
+	can_skb_prv(skb)->ifindex = sl->dev->ifindex;
 
 	memcpy(skb_put(skb, sizeof(struct can_frame)),
 	       &cf, sizeof(struct can_frame));
diff --git a/include/linux/can/skb.h b/include/linux/can/skb.h
index 4b0f24d3a878..2f0543f7510c 100644
--- a/include/linux/can/skb.h
+++ b/include/linux/can/skb.h
@@ -32,4 +32,14 @@ struct can_skb_priv {
 	struct can_frame cf[0];
 };
 
+static inline struct can_skb_priv *can_skb_prv(struct sk_buff *skb)
+{
+	return (struct can_skb_priv *)(skb->head);
+}
+
+static inline void can_skb_reserve(struct sk_buff *skb)
+{
+	skb_reserve(skb, sizeof(struct can_skb_priv));
+}
+
 #endif /* CAN_SKB_H */
diff --git a/net/can/bcm.c b/net/can/bcm.c
index ccc27b9e8384..28e12d18f0f1 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -261,8 +261,8 @@ static void bcm_can_tx(struct bcm_op *op)
 	if (!skb)
 		goto out;
 
-	skb_reserve(skb, sizeof(struct can_skb_priv));
-	((struct can_skb_priv *)(skb->head))->ifindex = dev->ifindex;
+	can_skb_reserve(skb);
+	can_skb_prv(skb)->ifindex = dev->ifindex;
 
 	memcpy(skb_put(skb, CFSIZ), cf, CFSIZ);
 
@@ -1207,7 +1207,7 @@ static int bcm_tx_send(struct msghdr *msg, int ifindex, struct sock *sk)
 	if (!skb)
 		return -ENOMEM;
 
-	skb_reserve(skb, sizeof(struct can_skb_priv));
+	can_skb_reserve(skb);
 
 	err = memcpy_fromiovec(skb_put(skb, CFSIZ), msg->msg_iov, CFSIZ);
 	if (err < 0) {
@@ -1221,7 +1221,7 @@ static int bcm_tx_send(struct msghdr *msg, int ifindex, struct sock *sk)
 		return -ENODEV;
 	}
 
-	((struct can_skb_priv *)(skb->head))->ifindex = dev->ifindex;
+	can_skb_prv(skb)->ifindex = dev->ifindex;
 	skb->dev = dev;
 	skb->sk  = sk;
 	err = can_send(skb, 1); /* send with loopback */
diff --git a/net/can/gw.c b/net/can/gw.c
index acdd4656cc3b..c185fcd5e828 100644
--- a/net/can/gw.c
+++ b/net/can/gw.c
@@ -381,9 +381,7 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data)
 
 	/* is sending the skb back to the incoming interface not allowed? */
 	if (!(gwj->flags & CGW_FLAGS_CAN_IIF_TX_OK) &&
-	    skb_headroom(skb) == sizeof(struct can_skb_priv) &&
-	    (((struct can_skb_priv *)(skb->head))->ifindex ==
-	     gwj->dst.dev->ifindex))
+	    can_skb_prv(skb)->ifindex == gwj->dst.dev->ifindex)
 		return;
 
 	/*
diff --git a/net/can/raw.c b/net/can/raw.c
index 5d860e8dcc52..c1764e41ddaf 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -705,8 +705,8 @@ static int raw_sendmsg(struct kiocb *iocb, struct socket *sock,
 	if (!skb)
 		goto put_dev;
 
-	skb_reserve(skb, sizeof(struct can_skb_priv));
-	((struct can_skb_priv *)(skb->head))->ifindex = dev->ifindex;
+	can_skb_reserve(skb);
+	can_skb_prv(skb)->ifindex = dev->ifindex;
 
 	err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
 	if (err < 0)
-- 
cgit v1.2.3


From 5fbee843c32e5de2d8af68ba0bdd113bb0af9d86 Mon Sep 17 00:00:00 2001
From: Cong Wang <amwang@redhat.com>
Date: Tue, 22 Jan 2013 21:29:39 +0000
Subject: netpoll: add RCU annotation to npinfo field

dev->npinfo is protected by RCU.

This fixes the following sparse warnings:

net/core/netpoll.c:177:48: error: incompatible types in comparison expression (different address spaces)
net/core/netpoll.c:200:35: error: incompatible types in comparison expression (different address spaces)
net/core/netpoll.c:221:35: error: incompatible types in comparison expression (different address spaces)
net/core/netpoll.c:327:18: error: incompatible types in comparison expression (different address spaces)

Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: David S. Miller <davem@davemloft.net>
Signed-off-by: Cong Wang <amwang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 549f5ad2055d..85b0949d9946 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1272,7 +1272,7 @@ struct net_device {
 	void (*destructor)(struct net_device *dev);
 
 #ifdef CONFIG_NETPOLL
-	struct netpoll_info	*npinfo;
+	struct netpoll_info __rcu	*npinfo;
 #endif
 
 #ifdef CONFIG_NET_NS
-- 
cgit v1.2.3


From 7ab59dc15e2f42a4321ed016bcd6044a4d8de6d1 Mon Sep 17 00:00:00 2001
From: "David J. Choi" <david.choi@micrel.com>
Date: Wed, 23 Jan 2013 14:05:15 +0000
Subject: drivers/net/phy/micrel_phy: Add support for new PHYs

Summary of changes:
.Newly added phys
	-KSZ8081/KSZ8091, which has some phy ids.
	-KSZ8061
	-KSZ9031, which is Gigabit phy.
	-KSZ886X, which has a switch function.
	-KSZ8031, which has a same phy ids with KSZ8021.

Signed-off-by: David J. Choi <david.choi@micrel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/micrel.c   | 64 +++++++++++++++++++++++++++++++++++++++++++---
 include/linux/micrel_phy.h |  9 ++++++-
 2 files changed, 68 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index b983596abcbb..29934446436a 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -5,15 +5,20 @@
  *
  * Author: David J. Choi
  *
- * Copyright (c) 2010 Micrel, Inc.
+ * Copyright (c) 2010-2013 Micrel, Inc.
  *
  * This program is free software; you can redistribute  it and/or modify it
  * under  the terms of  the GNU General  Public License as published by the
  * Free Software Foundation;  either version 2 of the  License, or (at your
  * option) any later version.
  *
- * Support : ksz9021 1000/100/10 phy from Micrel
- *		ks8001, ks8737, ks8721, ks8041, ks8051 100/10 phy
+ * Support : Micrel Phys:
+ *		Giga phys: ksz9021, ksz9031
+ *		100/10 Phys : ksz8001, ksz8721, ksz8737, ksz8041
+ *			   ksz8021, ksz8031, ksz8051,
+ *			   ksz8081, ksz8091,
+ *			   ksz8061,
+ *		Switch : ksz8873, ksz886x
  */
 
 #include <linux/kernel.h>
@@ -176,7 +181,7 @@ static struct phy_driver ksphy_driver[] = {
 }, {
 	.phy_id		= PHY_ID_KSZ8021,
 	.phy_id_mask	= 0x00ffffff,
-	.name		= "Micrel KSZ8021",
+	.name		= "Micrel KSZ8021 or KSZ8031",
 	.features	= (PHY_BASIC_FEATURES | SUPPORTED_Pause |
 			   SUPPORTED_Asym_Pause),
 	.flags		= PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
@@ -224,6 +229,30 @@ static struct phy_driver ksphy_driver[] = {
 	.ack_interrupt	= kszphy_ack_interrupt,
 	.config_intr	= kszphy_config_intr,
 	.driver		= { .owner = THIS_MODULE,},
+}, {
+	.phy_id		= PHY_ID_KSZ8081,
+	.name		= "Micrel KSZ8081 or KSZ8091",
+	.phy_id_mask	= 0x00fffff0,
+	.features	= (PHY_BASIC_FEATURES | SUPPORTED_Pause),
+	.flags		= PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
+	.config_init	= kszphy_config_init,
+	.config_aneg	= genphy_config_aneg,
+	.read_status	= genphy_read_status,
+	.ack_interrupt	= kszphy_ack_interrupt,
+	.config_intr	= kszphy_config_intr,
+	.driver		= { .owner = THIS_MODULE,},
+}, {
+	.phy_id		= PHY_ID_KSZ8061,
+	.name		= "Micrel KSZ8061",
+	.phy_id_mask	= 0x00fffff0,
+	.features	= (PHY_BASIC_FEATURES | SUPPORTED_Pause),
+	.flags		= PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
+	.config_init	= kszphy_config_init,
+	.config_aneg	= genphy_config_aneg,
+	.read_status	= genphy_read_status,
+	.ack_interrupt	= kszphy_ack_interrupt,
+	.config_intr	= kszphy_config_intr,
+	.driver		= { .owner = THIS_MODULE,},
 }, {
 	.phy_id		= PHY_ID_KSZ9021,
 	.phy_id_mask	= 0x000ffffe,
@@ -237,6 +266,19 @@ static struct phy_driver ksphy_driver[] = {
 	.ack_interrupt	= kszphy_ack_interrupt,
 	.config_intr	= ksz9021_config_intr,
 	.driver		= { .owner = THIS_MODULE, },
+}, {
+	.phy_id		= PHY_ID_KSZ9031,
+	.phy_id_mask	= 0x00fffff0,
+	.name		= "Micrel KSZ9031 Gigabit PHY",
+	.features	= (PHY_GBIT_FEATURES | SUPPORTED_Pause
+				| SUPPORTED_Asym_Pause),
+	.flags		= PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
+	.config_init	= kszphy_config_init,
+	.config_aneg	= genphy_config_aneg,
+	.read_status	= genphy_read_status,
+	.ack_interrupt	= kszphy_ack_interrupt,
+	.config_intr	= ksz9021_config_intr,
+	.driver		= { .owner = THIS_MODULE, },
 }, {
 	.phy_id		= PHY_ID_KSZ8873MLL,
 	.phy_id_mask	= 0x00fffff0,
@@ -247,6 +289,16 @@ static struct phy_driver ksphy_driver[] = {
 	.config_aneg	= ksz8873mll_config_aneg,
 	.read_status	= ksz8873mll_read_status,
 	.driver		= { .owner = THIS_MODULE, },
+}, {
+	.phy_id		= PHY_ID_KSZ886X,
+	.phy_id_mask	= 0x00fffff0,
+	.name		= "Micrel KSZ886X Switch",
+	.features	= (PHY_BASIC_FEATURES | SUPPORTED_Pause),
+	.flags		= PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
+	.config_init	= kszphy_config_init,
+	.config_aneg	= genphy_config_aneg,
+	.read_status	= genphy_read_status,
+	.driver		= { .owner = THIS_MODULE, },
 } };
 
 static int __init ksphy_init(void)
@@ -270,12 +322,16 @@ MODULE_LICENSE("GPL");
 
 static struct mdio_device_id __maybe_unused micrel_tbl[] = {
 	{ PHY_ID_KSZ9021, 0x000ffffe },
+	{ PHY_ID_KSZ9031, 0x00fffff0 },
 	{ PHY_ID_KSZ8001, 0x00ffffff },
 	{ PHY_ID_KS8737, 0x00fffff0 },
 	{ PHY_ID_KSZ8021, 0x00ffffff },
 	{ PHY_ID_KSZ8041, 0x00fffff0 },
 	{ PHY_ID_KSZ8051, 0x00fffff0 },
+	{ PHY_ID_KSZ8061, 0x00fffff0 },
+	{ PHY_ID_KSZ8081, 0x00fffff0 },
 	{ PHY_ID_KSZ8873MLL, 0x00fffff0 },
+	{ PHY_ID_KSZ886X, 0x00fffff0 },
 	{ }
 };
 
diff --git a/include/linux/micrel_phy.h b/include/linux/micrel_phy.h
index adfe8c058f29..9dbb41a4e250 100644
--- a/include/linux/micrel_phy.h
+++ b/include/linux/micrel_phy.h
@@ -21,8 +21,15 @@
 #define PHY_ID_KSZ8021		0x00221555
 #define PHY_ID_KSZ8041		0x00221510
 #define PHY_ID_KSZ8051		0x00221550
-/* both for ks8001 Rev. A/B, and for ks8721 Rev 3. */
+/* same id: ks8001 Rev. A/B, and ks8721 Rev 3. */
 #define PHY_ID_KSZ8001		0x0022161A
+/* same id: KS8081, KS8091 */
+#define PHY_ID_KSZ8081		0x00221560
+#define PHY_ID_KSZ8061		0x00221570
+#define PHY_ID_KSZ9031		0x00221620
+
+#define PHY_ID_KSZ886X		0x00221430
+#define PHY_ID_KSZ8863		0x00221435
 
 /* struct phy_device dev_flags definitions */
 #define MICREL_PHY_50MHZ_CLK	0x00000001
-- 
cgit v1.2.3


From d2a5884a64161b524cc6749ee11b95d252e497f3 Mon Sep 17 00:00:00 2001
From: Andrey Smirnov <andrew.smirnov@gmail.com>
Date: Sun, 27 Jan 2013 10:49:05 -0800
Subject: regmap: Add "no-bus" option for regmap API

This commit adds provision for "no-bus" usage of the regmap API. In
this configuration user can provide API with two callbacks 'reg_read'
and 'reg_write' which are to be called when reads and writes to one of
device's registers is performed. This is useful for devices that
expose registers but whose register access sequence does not fit the 'bus'
abstraction.

Signed-off-by: Andrey Smirnov <andrew.smirnov@gmail.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 drivers/base/regmap/internal.h |  2 ++
 drivers/base/regmap/regmap.c   | 58 +++++++++++++++++++++++++++++++++---------
 include/linux/regmap.h         | 18 ++++++++++++-
 3 files changed, 65 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/regmap/internal.h b/drivers/base/regmap/internal.h
index 51f057405647..b55fde5d216a 100644
--- a/drivers/base/regmap/internal.h
+++ b/drivers/base/regmap/internal.h
@@ -77,6 +77,8 @@ struct regmap {
 	int (*reg_read)(void *context, unsigned int reg, unsigned int *val);
 	int (*reg_write)(void *context, unsigned int reg, unsigned int val);
 
+	bool defer_caching;
+
 	u8 read_flag_mask;
 	u8 write_flag_mask;
 
diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index 6845a077bd84..9592030a0dc3 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -379,7 +379,7 @@ struct regmap *regmap_init(struct device *dev,
 	enum regmap_endian reg_endian, val_endian;
 	int i, j;
 
-	if (!bus || !config)
+	if (!config)
 		goto err;
 
 	map = kzalloc(sizeof(*map), GFP_KERNEL);
@@ -393,7 +393,8 @@ struct regmap *regmap_init(struct device *dev,
 		map->unlock = config->unlock;
 		map->lock_arg = config->lock_arg;
 	} else {
-		if (bus->fast_io) {
+		if ((bus && bus->fast_io) ||
+		    config->fast_io) {
 			spin_lock_init(&map->spinlock);
 			map->lock = regmap_lock_spinlock;
 			map->unlock = regmap_unlock_spinlock;
@@ -433,11 +434,19 @@ struct regmap *regmap_init(struct device *dev,
 	if (config->read_flag_mask || config->write_flag_mask) {
 		map->read_flag_mask = config->read_flag_mask;
 		map->write_flag_mask = config->write_flag_mask;
-	} else {
+	} else if (bus) {
 		map->read_flag_mask = bus->read_flag_mask;
 	}
 
-	map->reg_read = _regmap_bus_read;
+	if (!bus) {
+		map->reg_read  = config->reg_read;
+		map->reg_write = config->reg_write;
+
+		map->defer_caching = false;
+		goto skip_format_initialization;
+	} else {
+		map->reg_read  = _regmap_bus_read;
+	}
 
 	reg_endian = config->reg_format_endian;
 	if (reg_endian == REGMAP_ENDIAN_DEFAULT)
@@ -584,10 +593,15 @@ struct regmap *regmap_init(struct device *dev,
 		goto err_map;
 	}
 
-	if (map->format.format_write)
+	if (map->format.format_write) {
+		map->defer_caching = false;
 		map->reg_write = _regmap_bus_formatted_write;
-	else if (map->format.format_val)
+	} else if (map->format.format_val) {
+		map->defer_caching = true;
 		map->reg_write = _regmap_bus_raw_write;
+	}
+
+skip_format_initialization:
 
 	map->range_tree = RB_ROOT;
 	for (i = 0; i < config->num_ranges; i++) {
@@ -790,7 +804,7 @@ void regmap_exit(struct regmap *map)
 	regcache_exit(map);
 	regmap_debugfs_exit(map);
 	regmap_range_exit(map);
-	if (map->bus->free_context)
+	if (map->bus && map->bus->free_context)
 		map->bus->free_context(map->bus_context);
 	kfree(map->work_buf);
 	kfree(map);
@@ -893,6 +907,8 @@ static int _regmap_raw_write(struct regmap *map, unsigned int reg,
 	size_t len;
 	int i;
 
+	BUG_ON(!map->bus);
+
 	/* Check for unwritable registers before we start */
 	if (map->writeable_reg)
 		for (i = 0; i < val_len / map->format.val_bytes; i++)
@@ -1002,7 +1018,7 @@ static int _regmap_bus_formatted_write(void *context, unsigned int reg,
 	struct regmap_range_node *range;
 	struct regmap *map = context;
 
-	BUG_ON(!map->format.format_write);
+	BUG_ON(!map->bus || !map->format.format_write);
 
 	range = _regmap_range_lookup(map, reg);
 	if (range) {
@@ -1028,7 +1044,7 @@ static int _regmap_bus_raw_write(void *context, unsigned int reg,
 {
 	struct regmap *map = context;
 
-	BUG_ON(!map->format.format_val);
+	BUG_ON(!map->bus || !map->format.format_val);
 
 	map->format.format_val(map->work_buf + map->format.reg_bytes
 			       + map->format.pad_bytes, val, 0);
@@ -1039,12 +1055,18 @@ static int _regmap_bus_raw_write(void *context, unsigned int reg,
 				 map->format.val_bytes);
 }
 
+static inline void *_regmap_map_get_context(struct regmap *map)
+{
+	return (map->bus) ? map : map->bus_context;
+}
+
 int _regmap_write(struct regmap *map, unsigned int reg,
 		  unsigned int val)
 {
 	int ret;
+	void *context = _regmap_map_get_context(map);
 
-	if (!map->cache_bypass && map->format.format_write) {
+	if (!map->cache_bypass && !map->defer_caching) {
 		ret = regcache_write(map, reg, val);
 		if (ret != 0)
 			return ret;
@@ -1061,7 +1083,7 @@ int _regmap_write(struct regmap *map, unsigned int reg,
 
 	trace_regmap_reg_write(map->dev, reg, val);
 
-	return map->reg_write(map, reg, val);
+	return map->reg_write(context, reg, val);
 }
 
 /**
@@ -1112,6 +1134,8 @@ int regmap_raw_write(struct regmap *map, unsigned int reg,
 {
 	int ret;
 
+	if (!map->bus)
+		return -EINVAL;
 	if (val_len % map->format.val_bytes)
 		return -EINVAL;
 	if (reg % map->reg_stride)
@@ -1148,6 +1172,8 @@ int regmap_bulk_write(struct regmap *map, unsigned int reg, const void *val,
 	size_t val_bytes = map->format.val_bytes;
 	void *wval;
 
+	if (!map->bus)
+		return -EINVAL;
 	if (!map->format.parse_val)
 		return -EINVAL;
 	if (reg % map->reg_stride)
@@ -1201,6 +1227,8 @@ static int _regmap_raw_read(struct regmap *map, unsigned int reg, void *val,
 	u8 *u8 = map->work_buf;
 	int ret;
 
+	BUG_ON(!map->bus);
+
 	range = _regmap_range_lookup(map, reg);
 	if (range) {
 		ret = _regmap_select_page(map, &reg, range,
@@ -1252,6 +1280,8 @@ static int _regmap_read(struct regmap *map, unsigned int reg,
 			unsigned int *val)
 {
 	int ret;
+	void *context = _regmap_map_get_context(map);
+
 	BUG_ON(!map->reg_read);
 
 	if (!map->cache_bypass) {
@@ -1263,7 +1293,7 @@ static int _regmap_read(struct regmap *map, unsigned int reg,
 	if (map->cache_only)
 		return -EBUSY;
 
-	ret = map->reg_read(map, reg, val);
+	ret = map->reg_read(context, reg, val);
 	if (ret == 0) {
 #ifdef LOG_DEVICE
 		if (strcmp(dev_name(map->dev), LOG_DEVICE) == 0)
@@ -1325,6 +1355,8 @@ int regmap_raw_read(struct regmap *map, unsigned int reg, void *val,
 	unsigned int v;
 	int ret, i;
 
+	if (!map->bus)
+		return -EINVAL;
 	if (val_len % map->format.val_bytes)
 		return -EINVAL;
 	if (reg % map->reg_stride)
@@ -1376,6 +1408,8 @@ int regmap_bulk_read(struct regmap *map, unsigned int reg, void *val,
 	size_t val_bytes = map->format.val_bytes;
 	bool vol = regmap_volatile_range(map, reg, val_count);
 
+	if (!map->bus)
+		return -EINVAL;
 	if (!map->format.parse_val)
 		return -EINVAL;
 	if (reg % map->reg_stride)
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index b7e95bf942c9..28dde941c783 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -127,7 +127,18 @@ typedef void (*regmap_unlock)(void *);
  * @lock_arg:	  this field is passed as the only argument of lock/unlock
  *		  functions (ignored in case regular lock/unlock functions
  *		  are not overridden).
- *
+ * @reg_read:	  Optional callback that if filled will be used to perform
+ *           	  all the reads from the registers. Should only be provided for
+ *		  devices whos read operation cannot be represented as a simple read
+ *		  operation on a bus such as SPI, I2C, etc. Most of the devices do
+ * 		  not need this.
+ * @reg_write:	  Same as above for writing.
+ * @fast_io:	  Register IO is fast. Use a spinlock instead of a mutex
+ *	     	  to perform locking. This field is ignored if custom lock/unlock
+ *	     	  functions are used (see fields lock/unlock of struct regmap_config).
+ *		  This field is a duplicate of a similar file in
+ *		  'struct regmap_bus' and serves exact same purpose.
+ *		   Use it only for "no-bus" cases.
  * @max_register: Optional, specifies the maximum valid register index.
  * @wr_table:     Optional, points to a struct regmap_access_table specifying
  *                valid ranges for write access.
@@ -177,6 +188,11 @@ struct regmap_config {
 	regmap_unlock unlock;
 	void *lock_arg;
 
+	int (*reg_read)(void *context, unsigned int reg, unsigned int *val);
+	int (*reg_write)(void *context, unsigned int reg, unsigned int val);
+
+	bool fast_io;
+
 	unsigned int max_register;
 	const struct regmap_access_table *wr_table;
 	const struct regmap_access_table *rd_table;
-- 
cgit v1.2.3


From 0d509f2b112b21411712f0bf789b372987967e49 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Sun, 27 Jan 2013 22:07:38 +0800
Subject: regmap: Add asynchronous I/O support

Some use cases like firmware download can transfer a lot of data in quick
succession. With high speed buses these use cases can benefit from having
multiple transfers scheduled at once since this allows the bus to minimise
the delay between transfers.

Support this by adding regmap_raw_write_async(), allowing raw transfers to
be scheduled, and regmap_async_complete() to wait for them to finish.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 drivers/base/regmap/internal.h |  15 ++++
 drivers/base/regmap/regmap.c   | 182 ++++++++++++++++++++++++++++++++++++++---
 include/linux/regmap.h         |  28 +++++++
 3 files changed, 215 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/regmap/internal.h b/drivers/base/regmap/internal.h
index 51f057405647..202518641779 100644
--- a/drivers/base/regmap/internal.h
+++ b/drivers/base/regmap/internal.h
@@ -16,6 +16,7 @@
 #include <linux/regmap.h>
 #include <linux/fs.h>
 #include <linux/list.h>
+#include <linux/wait.h>
 
 struct regmap;
 struct regcache_ops;
@@ -39,6 +40,13 @@ struct regmap_format {
 	unsigned int (*parse_val)(void *buf);
 };
 
+struct regmap_async {
+	struct list_head list;
+	struct work_struct cleanup;
+	struct regmap *map;
+	void *work_buf;
+};
+
 struct regmap {
 	struct mutex mutex;
 	spinlock_t spinlock;
@@ -53,6 +61,11 @@ struct regmap {
 	void *bus_context;
 	const char *name;
 
+	spinlock_t async_lock;
+	wait_queue_head_t async_waitq;
+	struct list_head async_list;
+	int async_ret;
+
 #ifdef CONFIG_DEBUG_FS
 	struct dentry *debugfs;
 	const char *debugfs_name;
@@ -178,6 +191,8 @@ bool regcache_set_val(void *base, unsigned int idx,
 		      unsigned int val, unsigned int word_size);
 int regcache_lookup_reg(struct regmap *map, unsigned int reg);
 
+void regmap_async_complete_cb(struct regmap_async *async, int ret);
+
 extern struct regcache_ops regcache_rbtree_ops;
 extern struct regcache_ops regcache_lzo_ops;
 
diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index 6845a077bd84..e57b7bc035fc 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -41,6 +41,15 @@ static int _regmap_bus_formatted_write(void *context, unsigned int reg,
 static int _regmap_bus_raw_write(void *context, unsigned int reg,
 				 unsigned int val);
 
+static void async_cleanup(struct work_struct *work)
+{
+	struct regmap_async *async = container_of(work, struct regmap_async,
+						  cleanup);
+
+	kfree(async->work_buf);
+	kfree(async);
+}
+
 bool regmap_reg_in_ranges(unsigned int reg,
 			  const struct regmap_range *ranges,
 			  unsigned int nranges)
@@ -430,6 +439,10 @@ struct regmap *regmap_init(struct device *dev,
 	map->cache_type = config->cache_type;
 	map->name = config->name;
 
+	spin_lock_init(&map->async_lock);
+	INIT_LIST_HEAD(&map->async_list);
+	init_waitqueue_head(&map->async_waitq);
+
 	if (config->read_flag_mask || config->write_flag_mask) {
 		map->read_flag_mask = config->read_flag_mask;
 		map->write_flag_mask = config->write_flag_mask;
@@ -884,10 +897,13 @@ static int _regmap_select_page(struct regmap *map, unsigned int *reg,
 }
 
 static int _regmap_raw_write(struct regmap *map, unsigned int reg,
-			     const void *val, size_t val_len)
+			     const void *val, size_t val_len, bool async)
 {
 	struct regmap_range_node *range;
+	unsigned long flags;
 	u8 *u8 = map->work_buf;
+	void *work_val = map->work_buf + map->format.reg_bytes +
+		map->format.pad_bytes;
 	void *buf;
 	int ret = -ENOTSUPP;
 	size_t len;
@@ -932,7 +948,7 @@ static int _regmap_raw_write(struct regmap *map, unsigned int reg,
 			dev_dbg(map->dev, "Writing window %d/%zu\n",
 				win_residue, val_len / map->format.val_bytes);
 			ret = _regmap_raw_write(map, reg, val, win_residue *
-						map->format.val_bytes);
+						map->format.val_bytes, async);
 			if (ret != 0)
 				return ret;
 
@@ -955,6 +971,50 @@ static int _regmap_raw_write(struct regmap *map, unsigned int reg,
 
 	u8[0] |= map->write_flag_mask;
 
+	if (async && map->bus->async_write) {
+		struct regmap_async *async = map->bus->async_alloc();
+		if (!async)
+			return -ENOMEM;
+
+		async->work_buf = kzalloc(map->format.buf_size,
+					  GFP_KERNEL | GFP_DMA);
+		if (!async->work_buf) {
+			kfree(async);
+			return -ENOMEM;
+		}
+
+		INIT_WORK(&async->cleanup, async_cleanup);
+		async->map = map;
+
+		/* If the caller supplied the value we can use it safely. */
+		memcpy(async->work_buf, map->work_buf, map->format.pad_bytes +
+		       map->format.reg_bytes + map->format.val_bytes);
+		if (val == work_val)
+			val = async->work_buf + map->format.pad_bytes +
+				map->format.reg_bytes;
+
+		spin_lock_irqsave(&map->async_lock, flags);
+		list_add_tail(&async->list, &map->async_list);
+		spin_unlock_irqrestore(&map->async_lock, flags);
+
+		ret = map->bus->async_write(map->bus_context, async->work_buf,
+					    map->format.reg_bytes +
+					    map->format.pad_bytes,
+					    val, val_len, async);
+
+		if (ret != 0) {
+			dev_err(map->dev, "Failed to schedule write: %d\n",
+				ret);
+
+			spin_lock_irqsave(&map->async_lock, flags);
+			list_del(&async->list);
+			spin_unlock_irqrestore(&map->async_lock, flags);
+
+			kfree(async->work_buf);
+			kfree(async);
+		}
+	}
+
 	trace_regmap_hw_write_start(map->dev, reg,
 				    val_len / map->format.val_bytes);
 
@@ -962,8 +1022,7 @@ static int _regmap_raw_write(struct regmap *map, unsigned int reg,
 	 * send the work_buf directly, otherwise try to do a gather
 	 * write.
 	 */
-	if (val == (map->work_buf + map->format.pad_bytes +
-		    map->format.reg_bytes))
+	if (val == work_val)
 		ret = map->bus->write(map->bus_context, map->work_buf,
 				      map->format.reg_bytes +
 				      map->format.pad_bytes +
@@ -1036,7 +1095,7 @@ static int _regmap_bus_raw_write(void *context, unsigned int reg,
 				 map->work_buf +
 				 map->format.reg_bytes +
 				 map->format.pad_bytes,
-				 map->format.val_bytes);
+				 map->format.val_bytes, false);
 }
 
 int _regmap_write(struct regmap *map, unsigned int reg,
@@ -1119,7 +1178,7 @@ int regmap_raw_write(struct regmap *map, unsigned int reg,
 
 	map->lock(map->lock_arg);
 
-	ret = _regmap_raw_write(map, reg, val, val_len);
+	ret = _regmap_raw_write(map, reg, val, val_len, false);
 
 	map->unlock(map->lock_arg);
 
@@ -1175,14 +1234,15 @@ int regmap_bulk_write(struct regmap *map, unsigned int reg, const void *val,
 	if (map->use_single_rw) {
 		for (i = 0; i < val_count; i++) {
 			ret = regmap_raw_write(map,
-						reg + (i * map->reg_stride),
-						val + (i * val_bytes),
-						val_bytes);
+					       reg + (i * map->reg_stride),
+					       val + (i * val_bytes),
+					       val_bytes);
 			if (ret != 0)
 				return ret;
 		}
 	} else {
-		ret = _regmap_raw_write(map, reg, wval, val_bytes * val_count);
+		ret = _regmap_raw_write(map, reg, wval, val_bytes * val_count,
+					false);
 	}
 
 	if (val_bytes != 1)
@@ -1194,6 +1254,48 @@ out:
 }
 EXPORT_SYMBOL_GPL(regmap_bulk_write);
 
+/**
+ * regmap_raw_write_async(): Write raw values to one or more registers
+ *                           asynchronously
+ *
+ * @map: Register map to write to
+ * @reg: Initial register to write to
+ * @val: Block of data to be written, laid out for direct transmission to the
+ *       device.  Must be valid until regmap_async_complete() is called.
+ * @val_len: Length of data pointed to by val.
+ *
+ * This function is intended to be used for things like firmware
+ * download where a large block of data needs to be transferred to the
+ * device.  No formatting will be done on the data provided.
+ *
+ * If supported by the underlying bus the write will be scheduled
+ * asynchronously, helping maximise I/O speed on higher speed buses
+ * like SPI.  regmap_async_complete() can be called to ensure that all
+ * asynchrnous writes have been completed.
+ *
+ * A value of zero will be returned on success, a negative errno will
+ * be returned in error cases.
+ */
+int regmap_raw_write_async(struct regmap *map, unsigned int reg,
+			   const void *val, size_t val_len)
+{
+	int ret;
+
+	if (val_len % map->format.val_bytes)
+		return -EINVAL;
+	if (reg % map->reg_stride)
+		return -EINVAL;
+
+	map->lock(map->lock_arg);
+
+	ret = _regmap_raw_write(map, reg, val, val_len, true);
+
+	map->unlock(map->lock_arg);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(regmap_raw_write_async);
+
 static int _regmap_raw_read(struct regmap *map, unsigned int reg, void *val,
 			    unsigned int val_len)
 {
@@ -1492,6 +1594,66 @@ int regmap_update_bits_check(struct regmap *map, unsigned int reg,
 }
 EXPORT_SYMBOL_GPL(regmap_update_bits_check);
 
+void regmap_async_complete_cb(struct regmap_async *async, int ret)
+{
+	struct regmap *map = async->map;
+	bool wake;
+
+	spin_lock(&map->async_lock);
+
+	list_del(&async->list);
+	wake = list_empty(&map->async_list);
+
+	if (ret != 0)
+		map->async_ret = ret;
+
+	spin_unlock(&map->async_lock);
+
+	schedule_work(&async->cleanup);
+
+	if (wake)
+		wake_up(&map->async_waitq);
+}
+
+static int regmap_async_is_done(struct regmap *map)
+{
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&map->async_lock, flags);
+	ret = list_empty(&map->async_list);
+	spin_unlock_irqrestore(&map->async_lock, flags);
+
+	return ret;
+}
+
+/**
+ * regmap_async_complete: Ensure all asynchronous I/O has completed.
+ *
+ * @map: Map to operate on.
+ *
+ * Blocks until any pending asynchronous I/O has completed.  Returns
+ * an error code for any failed I/O operations.
+ */
+int regmap_async_complete(struct regmap *map)
+{
+	unsigned long flags;
+	int ret;
+
+	/* Nothing to do with no async support */
+	if (!map->bus->async_write)
+		return 0;
+
+	wait_event(map->async_waitq, regmap_async_is_done(map));
+
+	spin_lock_irqsave(&map->async_lock, flags);
+	ret = map->async_ret;
+	map->async_ret = 0;
+	spin_unlock_irqrestore(&map->async_lock, flags);
+
+	return ret;
+}
+
 /**
  * regmap_register_patch: Register and apply register updates to be applied
  *                        on device initialistion
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index b7e95bf942c9..f9b7fbe35ab1 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -235,14 +235,21 @@ struct regmap_range_cfg {
 	unsigned int window_len;
 };
 
+struct regmap_async;
+
 typedef int (*regmap_hw_write)(void *context, const void *data,
 			       size_t count);
 typedef int (*regmap_hw_gather_write)(void *context,
 				      const void *reg, size_t reg_len,
 				      const void *val, size_t val_len);
+typedef int (*regmap_hw_async_write)(void *context,
+				     const void *reg, size_t reg_len,
+				     const void *val, size_t val_len,
+				     struct regmap_async *async);
 typedef int (*regmap_hw_read)(void *context,
 			      const void *reg_buf, size_t reg_size,
 			      void *val_buf, size_t val_size);
+typedef struct regmap_async *(*regmap_hw_async_alloc)(void);
 typedef void (*regmap_hw_free_context)(void *context);
 
 /**
@@ -255,8 +262,11 @@ typedef void (*regmap_hw_free_context)(void *context);
  * @write: Write operation.
  * @gather_write: Write operation with split register/value, return -ENOTSUPP
  *                if not implemented  on a given device.
+ * @async_write: Write operation which completes asynchronously, optional and
+ *               must serialise with respect to non-async I/O.
  * @read: Read operation.  Data is returned in the buffer used to transmit
  *         data.
+ * @async_alloc: Allocate a regmap_async() structure.
  * @read_flag_mask: Mask to be set in the top byte of the register when doing
  *                  a read.
  * @reg_format_endian_default: Default endianness for formatted register
@@ -265,13 +275,16 @@ typedef void (*regmap_hw_free_context)(void *context);
  * @val_format_endian_default: Default endianness for formatted register
  *     values. Used when the regmap_config specifies DEFAULT. If this is
  *     DEFAULT, BIG is assumed.
+ * @async_size: Size of struct used for async work.
  */
 struct regmap_bus {
 	bool fast_io;
 	regmap_hw_write write;
 	regmap_hw_gather_write gather_write;
+	regmap_hw_async_write async_write;
 	regmap_hw_read read;
 	regmap_hw_free_context free_context;
+	regmap_hw_async_alloc async_alloc;
 	u8 read_flag_mask;
 	enum regmap_endian reg_format_endian_default;
 	enum regmap_endian val_format_endian_default;
@@ -310,6 +323,8 @@ int regmap_raw_write(struct regmap *map, unsigned int reg,
 		     const void *val, size_t val_len);
 int regmap_bulk_write(struct regmap *map, unsigned int reg, const void *val,
 			size_t val_count);
+int regmap_raw_write_async(struct regmap *map, unsigned int reg,
+			   const void *val, size_t val_len);
 int regmap_read(struct regmap *map, unsigned int reg, unsigned int *val);
 int regmap_raw_read(struct regmap *map, unsigned int reg,
 		    void *val, size_t val_len);
@@ -321,6 +336,7 @@ int regmap_update_bits_check(struct regmap *map, unsigned int reg,
 			     unsigned int mask, unsigned int val,
 			     bool *change);
 int regmap_get_val_bytes(struct regmap *map);
+int regmap_async_complete(struct regmap *map);
 
 int regcache_sync(struct regmap *map);
 int regcache_sync_region(struct regmap *map, unsigned int min,
@@ -422,6 +438,13 @@ static inline int regmap_raw_write(struct regmap *map, unsigned int reg,
 	return -EINVAL;
 }
 
+static inline int regmap_raw_write_async(struct regmap *map, unsigned int reg,
+					 const void *val, size_t val_len)
+{
+	WARN_ONCE(1, "regmap API is disabled");
+	return -EINVAL;
+}
+
 static inline int regmap_bulk_write(struct regmap *map, unsigned int reg,
 				    const void *val, size_t val_count)
 {
@@ -500,6 +523,11 @@ static inline void regcache_mark_dirty(struct regmap *map)
 	WARN_ONCE(1, "regmap API is disabled");
 }
 
+static inline void regmap_async_complete(struct regmap *map)
+{
+	WARN_ONCE(1, "regmap API is disabled");
+}
+
 static inline int regmap_register_patch(struct regmap *map,
 					const struct reg_default *regs,
 					int num_regs)
-- 
cgit v1.2.3


From 5c766d642bcaffd0c2a5b354db2068515b3846cf Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@resnulli.us>
Date: Thu, 24 Jan 2013 09:41:41 +0000
Subject: ipv4: introduce address lifetime
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There are some usecase when lifetime of ipv4 addresses might be helpful.
For example:
1) initramfs networkmanager uses a DHCP daemon to learn network
configuration parameters
2) initramfs networkmanager addresses, routes and DNS configuration
3) initramfs networkmanager is requested to stop
4) initramfs networkmanager stops all daemons including dhclient
5) there are addresses and routes configured but no daemon running. If
the system doesn't start networkmanager for some reason, addresses and
routes will be used forever, which violates RFC 2131.

This patch is essentially a backport of ivp6 address lifetime mechanism
for ipv4 addresses.

Current "ip" tool supports this without any patch (since it does not
distinguish between ipv4 and ipv6 addresses in this perspective.

Also, this should be back-compatible with all current netlink users.

Reported-by: Pavel Šimerda <psimerda@redhat.com>
Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/inetdevice.h |   6 ++
 include/net/addrconf.h     |   4 +
 net/ipv4/devinet.c         | 215 +++++++++++++++++++++++++++++++++++++++++++--
 net/ipv6/addrconf.c        |   4 -
 4 files changed, 220 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h
index a9d828976a77..ea1e3b863890 100644
--- a/include/linux/inetdevice.h
+++ b/include/linux/inetdevice.h
@@ -166,6 +166,12 @@ struct in_ifaddr {
 	unsigned char		ifa_flags;
 	unsigned char		ifa_prefixlen;
 	char			ifa_label[IFNAMSIZ];
+
+	/* In seconds, relative to tstamp. Expiry is at tstamp + HZ * lft. */
+	__u32			ifa_valid_lft;
+	__u32			ifa_preferred_lft;
+	unsigned long		ifa_cstamp; /* created timestamp */
+	unsigned long		ifa_tstamp; /* updated timestamp */
 };
 
 extern int register_inetaddr_notifier(struct notifier_block *nb);
diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 6c58d507123f..40be2a0d8ae1 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -15,6 +15,10 @@
 
 #define IPV6_MAX_ADDRESSES		16
 
+#define ADDRCONF_TIMER_FUZZ_MINUS	(HZ > 50 ? HZ / 50 : 1)
+#define ADDRCONF_TIMER_FUZZ		(HZ / 4)
+#define ADDRCONF_TIMER_FUZZ_MAX		(HZ)
+
 #include <linux/in.h>
 #include <linux/in6.h>
 
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index a8e4f2665d5e..5281314886c1 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -63,6 +63,7 @@
 #include <net/ip_fib.h>
 #include <net/rtnetlink.h>
 #include <net/net_namespace.h>
+#include <net/addrconf.h>
 
 #include "fib_lookup.h"
 
@@ -93,6 +94,7 @@ static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
 	[IFA_ADDRESS]   	= { .type = NLA_U32 },
 	[IFA_BROADCAST] 	= { .type = NLA_U32 },
 	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
+	[IFA_CACHEINFO]		= { .len = sizeof(struct ifa_cacheinfo) },
 };
 
 #define IN4_ADDR_HSIZE_SHIFT	8
@@ -417,6 +419,10 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
 	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
 }
 
+static void check_lifetime(struct work_struct *work);
+
+static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
+
 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
 			     u32 portid)
 {
@@ -462,6 +468,9 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
 
 	inet_hash_insert(dev_net(in_dev->dev), ifa);
 
+	cancel_delayed_work(&check_lifetime_work);
+	schedule_delayed_work(&check_lifetime_work, 0);
+
 	/* Send message first, then call notifier.
 	   Notifier will trigger FIB update, so that
 	   listeners of netlink will know about new ifaddr */
@@ -573,7 +582,107 @@ errout:
 	return err;
 }
 
-static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
+#define INFINITY_LIFE_TIME	0xFFFFFFFF
+
+static void check_lifetime(struct work_struct *work)
+{
+	unsigned long now, next, next_sec, next_sched;
+	struct in_ifaddr *ifa;
+	struct hlist_node *node;
+	int i;
+
+	now = jiffies;
+	next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
+
+	rcu_read_lock();
+	for (i = 0; i < IN4_ADDR_HSIZE; i++) {
+		hlist_for_each_entry_rcu(ifa, node,
+					 &inet_addr_lst[i], hash) {
+			unsigned long age;
+
+			if (ifa->ifa_flags & IFA_F_PERMANENT)
+				continue;
+
+			/* We try to batch several events at once. */
+			age = (now - ifa->ifa_tstamp +
+			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
+
+			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
+			    age >= ifa->ifa_valid_lft) {
+				struct in_ifaddr **ifap ;
+
+				rtnl_lock();
+				for (ifap = &ifa->ifa_dev->ifa_list;
+				     *ifap != NULL; ifap = &ifa->ifa_next) {
+					if (*ifap == ifa)
+						inet_del_ifa(ifa->ifa_dev,
+							     ifap, 1);
+				}
+				rtnl_unlock();
+			} else if (ifa->ifa_preferred_lft ==
+				   INFINITY_LIFE_TIME) {
+				continue;
+			} else if (age >= ifa->ifa_preferred_lft) {
+				if (time_before(ifa->ifa_tstamp +
+						ifa->ifa_valid_lft * HZ, next))
+					next = ifa->ifa_tstamp +
+					       ifa->ifa_valid_lft * HZ;
+
+				if (!(ifa->ifa_flags & IFA_F_DEPRECATED)) {
+					ifa->ifa_flags |= IFA_F_DEPRECATED;
+					rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
+				}
+			} else if (time_before(ifa->ifa_tstamp +
+					       ifa->ifa_preferred_lft * HZ,
+					       next)) {
+				next = ifa->ifa_tstamp +
+				       ifa->ifa_preferred_lft * HZ;
+			}
+		}
+	}
+	rcu_read_unlock();
+
+	next_sec = round_jiffies_up(next);
+	next_sched = next;
+
+	/* If rounded timeout is accurate enough, accept it. */
+	if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
+		next_sched = next_sec;
+
+	now = jiffies;
+	/* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
+	if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
+		next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
+
+	schedule_delayed_work(&check_lifetime_work, next_sched - now);
+}
+
+static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
+			     __u32 prefered_lft)
+{
+	unsigned long timeout;
+
+	ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
+
+	timeout = addrconf_timeout_fixup(valid_lft, HZ);
+	if (addrconf_finite_timeout(timeout))
+		ifa->ifa_valid_lft = timeout;
+	else
+		ifa->ifa_flags |= IFA_F_PERMANENT;
+
+	timeout = addrconf_timeout_fixup(prefered_lft, HZ);
+	if (addrconf_finite_timeout(timeout)) {
+		if (timeout == 0)
+			ifa->ifa_flags |= IFA_F_DEPRECATED;
+		ifa->ifa_preferred_lft = timeout;
+	}
+	ifa->ifa_tstamp = jiffies;
+	if (!ifa->ifa_cstamp)
+		ifa->ifa_cstamp = ifa->ifa_tstamp;
+}
+
+static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
+				       __u32 *pvalid_lft, __u32 *pprefered_lft)
 {
 	struct nlattr *tb[IFA_MAX+1];
 	struct in_ifaddr *ifa;
@@ -633,24 +742,73 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
 	else
 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
 
+	if (tb[IFA_CACHEINFO]) {
+		struct ifa_cacheinfo *ci;
+
+		ci = nla_data(tb[IFA_CACHEINFO]);
+		if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
+			err = -EINVAL;
+			goto errout;
+		}
+		*pvalid_lft = ci->ifa_valid;
+		*pprefered_lft = ci->ifa_prefered;
+	}
+
 	return ifa;
 
 errout:
 	return ERR_PTR(err);
 }
 
+static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
+{
+	struct in_device *in_dev = ifa->ifa_dev;
+	struct in_ifaddr *ifa1, **ifap;
+
+	if (!ifa->ifa_local)
+		return NULL;
+
+	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
+	     ifap = &ifa1->ifa_next) {
+		if (ifa1->ifa_mask == ifa->ifa_mask &&
+		    inet_ifa_match(ifa1->ifa_address, ifa) &&
+		    ifa1->ifa_local == ifa->ifa_local)
+			return ifa1;
+	}
+	return NULL;
+}
+
 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 {
 	struct net *net = sock_net(skb->sk);
 	struct in_ifaddr *ifa;
+	struct in_ifaddr *ifa_existing;
+	__u32 valid_lft = INFINITY_LIFE_TIME;
+	__u32 prefered_lft = INFINITY_LIFE_TIME;
 
 	ASSERT_RTNL();
 
-	ifa = rtm_to_ifaddr(net, nlh);
+	ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
 	if (IS_ERR(ifa))
 		return PTR_ERR(ifa);
 
-	return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
+	ifa_existing = find_matching_ifa(ifa);
+	if (!ifa_existing) {
+		/* It would be best to check for !NLM_F_CREATE here but
+		 * userspace alreay relies on not having to provide this.
+		 */
+		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
+		return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
+	} else {
+		inet_free_ifa(ifa);
+
+		if (nlh->nlmsg_flags & NLM_F_EXCL ||
+		    !(nlh->nlmsg_flags & NLM_F_REPLACE))
+			return -EEXIST;
+
+		set_ifa_lifetime(ifa_existing, valid_lft, prefered_lft);
+	}
+	return 0;
 }
 
 /*
@@ -852,6 +1010,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
 			ifa->ifa_prefixlen = 32;
 			ifa->ifa_mask = inet_make_mask(32);
 		}
+		set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
 		ret = inet_set_ifa(dev, ifa);
 		break;
 
@@ -1190,6 +1349,8 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
 				ifa->ifa_dev = in_dev;
 				ifa->ifa_scope = RT_SCOPE_HOST;
 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
+				set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
+						 INFINITY_LIFE_TIME);
 				inet_insert_ifa(ifa);
 			}
 		}
@@ -1246,11 +1407,30 @@ static size_t inet_nlmsg_size(void)
 	       + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
 }
 
+static inline u32 cstamp_delta(unsigned long cstamp)
+{
+	return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
+}
+
+static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
+			 unsigned long tstamp, u32 preferred, u32 valid)
+{
+	struct ifa_cacheinfo ci;
+
+	ci.cstamp = cstamp_delta(cstamp);
+	ci.tstamp = cstamp_delta(tstamp);
+	ci.ifa_prefered = preferred;
+	ci.ifa_valid = valid;
+
+	return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
+}
+
 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
 			    u32 portid, u32 seq, int event, unsigned int flags)
 {
 	struct ifaddrmsg *ifm;
 	struct nlmsghdr  *nlh;
+	u32 preferred, valid;
 
 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
 	if (nlh == NULL)
@@ -1259,10 +1439,31 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
 	ifm = nlmsg_data(nlh);
 	ifm->ifa_family = AF_INET;
 	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
-	ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
+	ifm->ifa_flags = ifa->ifa_flags;
 	ifm->ifa_scope = ifa->ifa_scope;
 	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
 
+	if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
+		preferred = ifa->ifa_preferred_lft;
+		valid = ifa->ifa_valid_lft;
+		if (preferred != INFINITY_LIFE_TIME) {
+			long tval = (jiffies - ifa->ifa_tstamp) / HZ;
+
+			if (preferred > tval)
+				preferred -= tval;
+			else
+				preferred = 0;
+			if (valid != INFINITY_LIFE_TIME) {
+				if (valid > tval)
+					valid -= tval;
+				else
+					valid = 0;
+			}
+		}
+	} else {
+		preferred = INFINITY_LIFE_TIME;
+		valid = INFINITY_LIFE_TIME;
+	}
 	if ((ifa->ifa_address &&
 	     nla_put_be32(skb, IFA_ADDRESS, ifa->ifa_address)) ||
 	    (ifa->ifa_local &&
@@ -1270,7 +1471,9 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
 	    (ifa->ifa_broadcast &&
 	     nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
 	    (ifa->ifa_label[0] &&
-	     nla_put_string(skb, IFA_LABEL, ifa->ifa_label)))
+	     nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
+	    put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
+			  preferred, valid))
 		goto nla_put_failure;
 
 	return nlmsg_end(skb, nlh);
@@ -1988,6 +2191,8 @@ void __init devinet_init(void)
 	register_gifconf(PF_INET, inet_gifconf);
 	register_netdevice_notifier(&ip_netdev_notifier);
 
+	schedule_delayed_work(&check_lifetime_work, 0);
+
 	rtnl_af_register(&inet_af_ops);
 
 	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 80d59802d964..7f7332b44699 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -110,10 +110,6 @@ static inline u32 cstamp_delta(unsigned long cstamp)
 	return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
 }
 
-#define ADDRCONF_TIMER_FUZZ_MINUS	(HZ > 50 ? HZ/50 : 1)
-#define ADDRCONF_TIMER_FUZZ		(HZ / 4)
-#define ADDRCONF_TIMER_FUZZ_MAX		(HZ)
-
 #ifdef CONFIG_SYSCTL
 static void addrconf_sysctl_register(struct inet6_dev *idev);
 static void addrconf_sysctl_unregister(struct inet6_dev *idev);
-- 
cgit v1.2.3


From 6f16eebe1ff82176339a0439c98ebec9768b0ee2 Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Fri, 25 Jan 2013 17:08:12 -0800
Subject: timekeeping: Switch HAS_PERSISTENT_CLOCK to
 ALWAYS_USE_PERSISTENT_CLOCK

Jason pointed out the HAS_PERSISTENT_CLOCK name isn't
quite accurate for the config, as some systems may have
the persistent_clock in some cases, but not always.

So change the config name to the more clear
ALWAYS_USE_PERSISTENT_CLOCK.

Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 arch/x86/Kconfig     | 2 +-
 drivers/rtc/Kconfig  | 4 ++--
 include/linux/time.h | 2 +-
 kernel/time/Kconfig  | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index a4135b5e562e..335da90560e4 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -108,7 +108,7 @@ config X86
 	select GENERIC_STRNLEN_USER
 	select HAVE_RCU_USER_QS if X86_64
 	select HAVE_IRQ_TIME_ACCOUNTING
-	select HAS_PERSISTENT_CLOCK
+	select ALWAYS_USE_PERSISTENT_CLOCK
 	select GENERIC_KERNEL_THREAD
 	select GENERIC_KERNEL_EXECVE
 	select MODULES_USE_ELF_REL if X86_32
diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index 05761de7929b..da60de01f732 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -20,7 +20,7 @@ if RTC_CLASS
 config RTC_HCTOSYS
 	bool "Set system time from RTC on startup and resume"
 	default y
-	depends on !HAS_PERSISTENT_CLOCK
+	depends on !ALWAYS_USE_PERSISTENT_CLOCK
 	help
 	  If you say yes here, the system time (wall clock) will be set using
 	  the value read from a specified RTC device. This is useful to avoid
@@ -29,7 +29,7 @@ config RTC_HCTOSYS
 config RTC_SYSTOHC
 	bool "Set the RTC time based on NTP synchronization"
 	default y
-	depends on !HAS_PERSISTENT_CLOCK
+	depends on !ALWAYS_USE_PERSISTENT_CLOCK
 	help
 	  If you say yes here, the system time (wall clock) will be stored
 	  in the RTC specified by RTC_HCTOSYS_DEVICE approximately every 11
diff --git a/include/linux/time.h b/include/linux/time.h
index 369b6e3b87d8..476e1d7b2c37 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -117,7 +117,7 @@ static inline bool timespec_valid_strict(const struct timespec *ts)
 
 extern bool persistent_clock_exist;
 
-#ifdef CONFIG_HAS_PERSISTENT_CLOCK
+#ifdef ALWAYS_USE_PERSISTENT_CLOCK
 #define has_persistent_clock()	true
 #else
 static inline bool has_persistent_clock(void)
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig
index f7e45b9b142b..0dddb9d09d0b 100644
--- a/kernel/time/Kconfig
+++ b/kernel/time/Kconfig
@@ -13,7 +13,7 @@ config ARCH_CLOCKSOURCE_DATA
 	bool
 
 # Platforms has a persistent clock
-config HAS_PERSISTENT_CLOCK
+config ALWAYS_USE_PERSISTENT_CLOCK
 	bool
 	default n
 
-- 
cgit v1.2.3


From ad964704ba9326d027fc10fd0099b7c880e50172 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Tue, 29 Jan 2013 17:45:49 -0500
Subject: ring-buffer: Add stats field for amount read from trace ring buffer

Add a stat about the number of events read from the ring buffer:

 #  cat /debug/tracing/per_cpu/cpu0/stats
entries: 39869
overrun: 870512
commit overrun: 0
bytes: 1449912
oldest event ts:  6561.368690
now ts:  6565.246426
dropped events: 0
read events: 112    <-- Added

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ring_buffer.h |  1 +
 kernel/trace/ring_buffer.c  | 18 ++++++++++++++++++
 kernel/trace/trace.c        |  3 +++
 3 files changed, 22 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index 519777e3fa01..1342e69542f3 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -167,6 +167,7 @@ unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu);
 unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu);
 unsigned long ring_buffer_commit_overrun_cpu(struct ring_buffer *buffer, int cpu);
 unsigned long ring_buffer_dropped_events_cpu(struct ring_buffer *buffer, int cpu);
+unsigned long ring_buffer_read_events_cpu(struct ring_buffer *buffer, int cpu);
 
 u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu);
 void ring_buffer_normalize_time_stamp(struct ring_buffer *buffer,
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 13950d9027cb..7244acde77b0 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -3102,6 +3102,24 @@ ring_buffer_dropped_events_cpu(struct ring_buffer *buffer, int cpu)
 }
 EXPORT_SYMBOL_GPL(ring_buffer_dropped_events_cpu);
 
+/**
+ * ring_buffer_read_events_cpu - get the number of events successfully read
+ * @buffer: The ring buffer
+ * @cpu: The per CPU buffer to get the number of events read
+ */
+unsigned long
+ring_buffer_read_events_cpu(struct ring_buffer *buffer, int cpu)
+{
+	struct ring_buffer_per_cpu *cpu_buffer;
+
+	if (!cpumask_test_cpu(cpu, buffer->cpumask))
+		return 0;
+
+	cpu_buffer = buffer->buffers[cpu];
+	return cpu_buffer->read;
+}
+EXPORT_SYMBOL_GPL(ring_buffer_read_events_cpu);
+
 /**
  * ring_buffer_entries - get the number of entries in a buffer
  * @buffer: The ring buffer
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index d399592701aa..90a1c71fdbfc 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -4430,6 +4430,9 @@ tracing_stats_read(struct file *filp, char __user *ubuf,
 	cnt = ring_buffer_dropped_events_cpu(tr->buffer, cpu);
 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
 
+	cnt = ring_buffer_read_events_cpu(tr->buffer, cpu);
+	trace_seq_printf(s, "read events: %ld\n", cnt);
+
 	count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
 
 	kfree(s);
-- 
cgit v1.2.3


From debdd57f5145f3c6a4b3f8d0126abd1a2def7fc6 Mon Sep 17 00:00:00 2001
From: Hiraku Toyooka <hiraku.toyooka.gu@hitachi.com>
Date: Wed, 26 Dec 2012 11:53:00 +0900
Subject: tracing: Make a snapshot feature available from userspace

Ftrace has a snapshot feature available from kernel space and
latency tracers (e.g. irqsoff) are using it. This patch enables
user applictions to take a snapshot via debugfs.

Add "snapshot" debugfs file in "tracing" directory.

  snapshot:
    This is used to take a snapshot and to read the output of the
    snapshot.

     # echo 1 > snapshot

    This will allocate the spare buffer for snapshot (if it is
    not allocated), and take a snapshot.

     # cat snapshot

    This will show contents of the snapshot.

     # echo 0 > snapshot

    This will free the snapshot if it is allocated.

    Any other positive values will clear the snapshot contents if
    the snapshot is allocated, or return EINVAL if it is not allocated.

Link: http://lkml.kernel.org/r/20121226025300.3252.86850.stgit@liselsia

Cc: Jiri Olsa <jolsa@redhat.com>
Cc: David Sharp <dhsharp@google.com>
Signed-off-by: Hiraku Toyooka <hiraku.toyooka.gu@hitachi.com>
[
   Fixed irqsoff selftest and also a conflict with a change
   that fixes the update_max_tr.
]
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace_event.h |   3 +
 kernel/trace/Kconfig         |  10 +++
 kernel/trace/trace.c         | 166 ++++++++++++++++++++++++++++++++++++-------
 kernel/trace/trace.h         |   1 +
 4 files changed, 154 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 6f8d0b77006b..13a54d0bdfa8 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -83,6 +83,9 @@ struct trace_iterator {
 	long			idx;
 
 	cpumask_var_t		started;
+
+	/* it's true when current open file is snapshot */
+	bool			snapshot;
 };
 
 enum trace_iter_flags {
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index cdc9d284d24e..36567564e221 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -253,6 +253,16 @@ config FTRACE_SYSCALLS
 	help
 	  Basic tracer to catch the syscall entry and exit events.
 
+config TRACER_SNAPSHOT
+	bool "Create a snapshot trace buffer"
+	select TRACER_MAX_TRACE
+	help
+	  Allow tracing users to take snapshot of the current buffer using the
+	  ftrace interface, e.g.:
+
+	      echo 1 > /sys/kernel/debug/tracing/snapshot
+	      cat snapshot
+
 config TRACE_BRANCH_PROFILING
 	bool
 	select GENERIC_TRACER
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 2c724662a3e8..70dce64b9ecf 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -710,12 +710,11 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
 
 	WARN_ON_ONCE(!irqs_disabled());
 
-	/* If we disabled the tracer, stop now */
-	if (current_trace == &nop_trace)
-		return;
-
-	if (WARN_ON_ONCE(!current_trace->use_max_tr))
+	if (!current_trace->allocated_snapshot) {
+		/* Only the nop tracer should hit this when disabling */
+		WARN_ON_ONCE(current_trace != &nop_trace);
 		return;
+	}
 
 	arch_spin_lock(&ftrace_max_lock);
 
@@ -743,10 +742,8 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
 		return;
 
 	WARN_ON_ONCE(!irqs_disabled());
-	if (!current_trace->use_max_tr) {
-		WARN_ON_ONCE(1);
+	if (WARN_ON_ONCE(!current_trace->allocated_snapshot))
 		return;
-	}
 
 	arch_spin_lock(&ftrace_max_lock);
 
@@ -866,10 +863,13 @@ int register_tracer(struct tracer *type)
 
 		current_trace = type;
 
-		/* If we expanded the buffers, make sure the max is expanded too */
-		if (ring_buffer_expanded && type->use_max_tr)
-			ring_buffer_resize(max_tr.buffer, trace_buf_size,
-						RING_BUFFER_ALL_CPUS);
+		if (type->use_max_tr) {
+			/* If we expanded the buffers, make sure the max is expanded too */
+			if (ring_buffer_expanded)
+				ring_buffer_resize(max_tr.buffer, trace_buf_size,
+						   RING_BUFFER_ALL_CPUS);
+			type->allocated_snapshot = true;
+		}
 
 		/* the test is responsible for initializing and enabling */
 		pr_info("Testing tracer %s: ", type->name);
@@ -885,10 +885,14 @@ int register_tracer(struct tracer *type)
 		/* Only reset on passing, to avoid touching corrupted buffers */
 		tracing_reset_online_cpus(tr);
 
-		/* Shrink the max buffer again */
-		if (ring_buffer_expanded && type->use_max_tr)
-			ring_buffer_resize(max_tr.buffer, 1,
-						RING_BUFFER_ALL_CPUS);
+		if (type->use_max_tr) {
+			type->allocated_snapshot = false;
+
+			/* Shrink the max buffer again */
+			if (ring_buffer_expanded)
+				ring_buffer_resize(max_tr.buffer, 1,
+						   RING_BUFFER_ALL_CPUS);
+		}
 
 		printk(KERN_CONT "PASSED\n");
 	}
@@ -1964,7 +1968,11 @@ static void *s_start(struct seq_file *m, loff_t *pos)
 		*iter->trace = *current_trace;
 	mutex_unlock(&trace_types_lock);
 
-	atomic_inc(&trace_record_cmdline_disabled);
+	if (iter->snapshot && iter->trace->use_max_tr)
+		return ERR_PTR(-EBUSY);
+
+	if (!iter->snapshot)
+		atomic_inc(&trace_record_cmdline_disabled);
 
 	if (*pos != iter->pos) {
 		iter->ent = NULL;
@@ -2003,7 +2011,11 @@ static void s_stop(struct seq_file *m, void *p)
 {
 	struct trace_iterator *iter = m->private;
 
-	atomic_dec(&trace_record_cmdline_disabled);
+	if (iter->snapshot && iter->trace->use_max_tr)
+		return;
+
+	if (!iter->snapshot)
+		atomic_dec(&trace_record_cmdline_disabled);
 	trace_access_unlock(iter->cpu_file);
 	trace_event_read_unlock();
 }
@@ -2438,7 +2450,7 @@ static const struct seq_operations tracer_seq_ops = {
 };
 
 static struct trace_iterator *
-__tracing_open(struct inode *inode, struct file *file)
+__tracing_open(struct inode *inode, struct file *file, bool snapshot)
 {
 	long cpu_file = (long) inode->i_private;
 	struct trace_iterator *iter;
@@ -2471,10 +2483,11 @@ __tracing_open(struct inode *inode, struct file *file)
 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
 		goto fail;
 
-	if (current_trace && current_trace->print_max)
+	if ((current_trace && current_trace->print_max) || snapshot)
 		iter->tr = &max_tr;
 	else
 		iter->tr = &global_trace;
+	iter->snapshot = snapshot;
 	iter->pos = -1;
 	mutex_init(&iter->mutex);
 	iter->cpu_file = cpu_file;
@@ -2491,8 +2504,9 @@ __tracing_open(struct inode *inode, struct file *file)
 	if (trace_clocks[trace_clock_id].in_ns)
 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
 
-	/* stop the trace while dumping */
-	tracing_stop();
+	/* stop the trace while dumping if we are not opening "snapshot" */
+	if (!iter->snapshot)
+		tracing_stop();
 
 	if (iter->cpu_file == TRACE_PIPE_ALL_CPU) {
 		for_each_tracing_cpu(cpu) {
@@ -2555,8 +2569,9 @@ static int tracing_release(struct inode *inode, struct file *file)
 	if (iter->trace && iter->trace->close)
 		iter->trace->close(iter);
 
-	/* reenable tracing if it was previously enabled */
-	tracing_start();
+	if (!iter->snapshot)
+		/* reenable tracing if it was previously enabled */
+		tracing_start();
 	mutex_unlock(&trace_types_lock);
 
 	mutex_destroy(&iter->mutex);
@@ -2584,7 +2599,7 @@ static int tracing_open(struct inode *inode, struct file *file)
 	}
 
 	if (file->f_mode & FMODE_READ) {
-		iter = __tracing_open(inode, file);
+		iter = __tracing_open(inode, file, false);
 		if (IS_ERR(iter))
 			ret = PTR_ERR(iter);
 		else if (trace_flags & TRACE_ITER_LATENCY_FMT)
@@ -3219,7 +3234,7 @@ static int tracing_set_tracer(const char *buf)
 	if (current_trace && current_trace->reset)
 		current_trace->reset(tr);
 
-	had_max_tr = current_trace && current_trace->use_max_tr;
+	had_max_tr = current_trace && current_trace->allocated_snapshot;
 	current_trace = &nop_trace;
 
 	if (had_max_tr && !t->use_max_tr) {
@@ -3238,6 +3253,8 @@ static int tracing_set_tracer(const char *buf)
 		 */
 		ring_buffer_resize(max_tr.buffer, 1, RING_BUFFER_ALL_CPUS);
 		set_buffer_entries(&max_tr, 1);
+		tracing_reset_online_cpus(&max_tr);
+		current_trace->allocated_snapshot = false;
 	}
 	destroy_trace_option_files(topts);
 
@@ -3248,6 +3265,7 @@ static int tracing_set_tracer(const char *buf)
 						   RING_BUFFER_ALL_CPUS);
 		if (ret < 0)
 			goto out;
+		t->allocated_snapshot = true;
 	}
 
 	if (t->init) {
@@ -4066,6 +4084,87 @@ static int tracing_clock_open(struct inode *inode, struct file *file)
 	return single_open(file, tracing_clock_show, NULL);
 }
 
+#ifdef CONFIG_TRACER_SNAPSHOT
+static int tracing_snapshot_open(struct inode *inode, struct file *file)
+{
+	struct trace_iterator *iter;
+	int ret = 0;
+
+	if (file->f_mode & FMODE_READ) {
+		iter = __tracing_open(inode, file, true);
+		if (IS_ERR(iter))
+			ret = PTR_ERR(iter);
+	}
+	return ret;
+}
+
+static ssize_t
+tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
+		       loff_t *ppos)
+{
+	unsigned long val;
+	int ret;
+
+	ret = tracing_update_buffers();
+	if (ret < 0)
+		return ret;
+
+	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
+	if (ret)
+		return ret;
+
+	mutex_lock(&trace_types_lock);
+
+	if (current_trace && current_trace->use_max_tr) {
+		ret = -EBUSY;
+		goto out;
+	}
+
+	switch (val) {
+	case 0:
+		if (current_trace->allocated_snapshot) {
+			/* free spare buffer */
+			ring_buffer_resize(max_tr.buffer, 1,
+					   RING_BUFFER_ALL_CPUS);
+			set_buffer_entries(&max_tr, 1);
+			tracing_reset_online_cpus(&max_tr);
+			current_trace->allocated_snapshot = false;
+		}
+		break;
+	case 1:
+		if (!current_trace->allocated_snapshot) {
+			/* allocate spare buffer */
+			ret = resize_buffer_duplicate_size(&max_tr,
+					&global_trace, RING_BUFFER_ALL_CPUS);
+			if (ret < 0)
+				break;
+			current_trace->allocated_snapshot = true;
+		}
+
+		local_irq_disable();
+		/* Now, we're going to swap */
+		update_max_tr(&global_trace, current, smp_processor_id());
+		local_irq_enable();
+		break;
+	default:
+		if (current_trace->allocated_snapshot)
+			tracing_reset_online_cpus(&max_tr);
+		else
+			ret = -EINVAL;
+		break;
+	}
+
+	if (ret >= 0) {
+		*ppos += cnt;
+		ret = cnt;
+	}
+out:
+	mutex_unlock(&trace_types_lock);
+	return ret;
+}
+#endif /* CONFIG_TRACER_SNAPSHOT */
+
+
 static const struct file_operations tracing_max_lat_fops = {
 	.open		= tracing_open_generic,
 	.read		= tracing_max_lat_read,
@@ -4122,6 +4221,16 @@ static const struct file_operations trace_clock_fops = {
 	.write		= tracing_clock_write,
 };
 
+#ifdef CONFIG_TRACER_SNAPSHOT
+static const struct file_operations snapshot_fops = {
+	.open		= tracing_snapshot_open,
+	.read		= seq_read,
+	.write		= tracing_snapshot_write,
+	.llseek		= tracing_seek,
+	.release	= tracing_release,
+};
+#endif /* CONFIG_TRACER_SNAPSHOT */
+
 struct ftrace_buffer_info {
 	struct trace_array	*tr;
 	void			*spare;
@@ -4921,6 +5030,11 @@ static __init int tracer_init_debugfs(void)
 			&ftrace_update_tot_cnt, &tracing_dyn_info_fops);
 #endif
 
+#ifdef CONFIG_TRACER_SNAPSHOT
+	trace_create_file("snapshot", 0644, d_tracer,
+			  (void *) TRACE_PIPE_ALL_CPU, &snapshot_fops);
+#endif
+
 	create_trace_options_dir();
 
 	for_each_tracing_cpu(cpu)
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 04a2c7ab1735..57d7e5397d56 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -287,6 +287,7 @@ struct tracer {
 	struct tracer_flags	*flags;
 	bool			print_max;
 	bool			use_max_tr;
+	bool			allocated_snapshot;
 };
 
 
-- 
cgit v1.2.3


From 56813f798e8cb3f79a25e0523e782d910f376083 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Fri, 4 Jan 2013 13:14:34 +0100
Subject: mfd: ab8500: update header file and version detection

This updates the AB8500 register map with defines for a few
new chip variants and adds version detection helpers to handle
the different variants.

Acked-by: Samuel Ortiz <sameo@linux.intel.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/mfd/abx500/ab8500.h | 277 ++++++++++++++++++++++++++++++--------
 1 file changed, 222 insertions(+), 55 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/abx500/ab8500.h b/include/linux/mfd/abx500/ab8500.h
index 1cb5698b4d76..e640ea059be2 100644
--- a/include/linux/mfd/abx500/ab8500.h
+++ b/include/linux/mfd/abx500/ab8500.h
@@ -24,7 +24,7 @@ enum ab8500_version {
 	AB8500_VERSION_AB8500 = 0x0,
 	AB8500_VERSION_AB8505 = 0x1,
 	AB8500_VERSION_AB9540 = 0x2,
-	AB8500_VERSION_AB8540 = 0x3,
+	AB8500_VERSION_AB8540 = 0x4,
 	AB8500_VERSION_UNDEFINED,
 };
 
@@ -32,6 +32,7 @@ enum ab8500_version {
 #define AB8500_CUTEARLY	0x00
 #define AB8500_CUT1P0	0x10
 #define AB8500_CUT1P1	0x11
+#define AB8500_CUT1P2	0x12 /* Only valid for AB8540 */
 #define AB8500_CUT2P0	0x20
 #define AB8500_CUT3P0	0x30
 #define AB8500_CUT3P3	0x33
@@ -39,6 +40,7 @@ enum ab8500_version {
 /*
  * AB8500 bank addresses
  */
+#define AB8500_M_FSM_RANK	0x0
 #define AB8500_SYS_CTRL1_BLOCK	0x1
 #define AB8500_SYS_CTRL2_BLOCK	0x2
 #define AB8500_REGU_CTRL1	0x3
@@ -58,6 +60,7 @@ enum ab8500_version {
 #define AB8500_DEVELOPMENT	0x11
 #define AB8500_DEBUG		0x12
 #define AB8500_PROD_TEST	0x13
+#define AB8500_STE_TEST		0x14
 #define AB8500_OTP_EMUL		0x15
 
 /*
@@ -65,11 +68,11 @@ enum ab8500_version {
  * Values used to index into array ab8500_irq_regoffset[] defined in
  * drivers/mdf/ab8500-core.c
  */
-/* Definitions for AB8500 and AB9540 */
+/* Definitions for AB8500, AB9540 and AB8540 */
 /* ab8500_irq_regoffset[0] -> IT[Source|Latch|Mask]1 */
 #define AB8500_INT_MAIN_EXT_CH_NOT_OK	0 /* not 8505/9540 */
-#define AB8500_INT_UN_PLUG_TV_DET	1 /* not 8505/9540 */
-#define AB8500_INT_PLUG_TV_DET		2 /* not 8505/9540 */
+#define AB8500_INT_UN_PLUG_TV_DET	1 /* not 8505/9540/8540 */
+#define AB8500_INT_PLUG_TV_DET		2 /* not 8505/9540/8540 */
 #define AB8500_INT_TEMP_WARM		3
 #define AB8500_INT_PON_KEY2DB_F		4
 #define AB8500_INT_PON_KEY2DB_R		5
@@ -77,18 +80,19 @@ enum ab8500_version {
 #define AB8500_INT_PON_KEY1DB_R		7
 /* ab8500_irq_regoffset[1] -> IT[Source|Latch|Mask]2 */
 #define AB8500_INT_BATT_OVV		8
-#define AB8500_INT_MAIN_CH_UNPLUG_DET	10 /* not 8505 */
-#define AB8500_INT_MAIN_CH_PLUG_DET	11 /* not 8505 */
+#define AB8500_INT_MAIN_CH_UNPLUG_DET	10 /* not 8505/8540 */
+#define AB8500_INT_MAIN_CH_PLUG_DET	11 /* not 8505/8540 */
 #define AB8500_INT_VBUS_DET_F		14
 #define AB8500_INT_VBUS_DET_R		15
 /* ab8500_irq_regoffset[2] -> IT[Source|Latch|Mask]3 */
 #define AB8500_INT_VBUS_CH_DROP_END	16
 #define AB8500_INT_RTC_60S		17
 #define AB8500_INT_RTC_ALARM		18
+#define AB8540_INT_BIF_INT		19
 #define AB8500_INT_BAT_CTRL_INDB	20
 #define AB8500_INT_CH_WD_EXP		21
 #define AB8500_INT_VBUS_OVV		22
-#define AB8500_INT_MAIN_CH_DROP_END	23 /* not 8505/9540 */
+#define AB8500_INT_MAIN_CH_DROP_END	23 /* not 8505/9540/8540 */
 /* ab8500_irq_regoffset[3] -> IT[Source|Latch|Mask]4 */
 #define AB8500_INT_CCN_CONV_ACC		24
 #define AB8500_INT_INT_AUD		25
@@ -99,7 +103,7 @@ enum ab8500_version {
 #define AB8500_INT_BUP_CHG_NOT_OK	30
 #define AB8500_INT_BUP_CHG_OK		31
 /* ab8500_irq_regoffset[4] -> IT[Source|Latch|Mask]5 */
-#define AB8500_INT_GP_HW_ADC_CONV_END	32 /* not 8505 */
+#define AB8500_INT_GP_HW_ADC_CONV_END	32 /* not 8505/8540 */
 #define AB8500_INT_ACC_DETECT_1DB_F	33
 #define AB8500_INT_ACC_DETECT_1DB_R	34
 #define AB8500_INT_ACC_DETECT_22DB_F	35
@@ -108,23 +112,23 @@ enum ab8500_version {
 #define AB8500_INT_ACC_DETECT_21DB_R	38
 #define AB8500_INT_GP_SW_ADC_CONV_END	39
 /* ab8500_irq_regoffset[5] -> IT[Source|Latch|Mask]7 */
-#define AB8500_INT_GPIO6R		40 /* not 8505/9540 */
-#define AB8500_INT_GPIO7R		41 /* not 8505/9540 */
-#define AB8500_INT_GPIO8R		42 /* not 8505/9540 */
-#define AB8500_INT_GPIO9R		43 /* not 8505/9540 */
-#define AB8500_INT_GPIO10R		44
-#define AB8500_INT_GPIO11R		45
-#define AB8500_INT_GPIO12R		46 /* not 8505 */
-#define AB8500_INT_GPIO13R		47
+#define AB8500_INT_GPIO6R		40 /* not 8505/9540/8540 */
+#define AB8500_INT_GPIO7R		41 /* not 8505/9540/8540 */
+#define AB8500_INT_GPIO8R		42 /* not 8505/9540/8540 */
+#define AB8500_INT_GPIO9R		43 /* not 8505/9540/8540 */
+#define AB8500_INT_GPIO10R		44 /* not 8540 */
+#define AB8500_INT_GPIO11R		45 /* not 8540 */
+#define AB8500_INT_GPIO12R		46 /* not 8505/8540 */
+#define AB8500_INT_GPIO13R		47 /* not 8540 */
 /* ab8500_irq_regoffset[6] -> IT[Source|Latch|Mask]8 */
-#define AB8500_INT_GPIO24R		48 /* not 8505 */
-#define AB8500_INT_GPIO25R		49 /* not 8505 */
-#define AB8500_INT_GPIO36R		50 /* not 8505/9540 */
-#define AB8500_INT_GPIO37R		51 /* not 8505/9540 */
-#define AB8500_INT_GPIO38R		52 /* not 8505/9540 */
-#define AB8500_INT_GPIO39R		53 /* not 8505/9540 */
-#define AB8500_INT_GPIO40R		54
-#define AB8500_INT_GPIO41R		55
+#define AB8500_INT_GPIO24R		48 /* not 8505/8540 */
+#define AB8500_INT_GPIO25R		49 /* not 8505/8540 */
+#define AB8500_INT_GPIO36R		50 /* not 8505/9540/8540 */
+#define AB8500_INT_GPIO37R		51 /* not 8505/9540/8540 */
+#define AB8500_INT_GPIO38R		52 /* not 8505/9540/8540 */
+#define AB8500_INT_GPIO39R		53 /* not 8505/9540/8540 */
+#define AB8500_INT_GPIO40R		54 /* not 8540 */
+#define AB8500_INT_GPIO41R		55 /* not 8540 */
 /* ab8500_irq_regoffset[7] -> IT[Source|Latch|Mask]9 */
 #define AB8500_INT_GPIO6F		56 /* not 8505/9540 */
 #define AB8500_INT_GPIO7F		57 /* not 8505/9540 */
@@ -135,14 +139,14 @@ enum ab8500_version {
 #define AB8500_INT_GPIO12F		62 /* not 8505 */
 #define AB8500_INT_GPIO13F		63
 /* ab8500_irq_regoffset[8] -> IT[Source|Latch|Mask]10 */
-#define AB8500_INT_GPIO24F		64 /* not 8505 */
-#define AB8500_INT_GPIO25F		65 /* not 8505 */
-#define AB8500_INT_GPIO36F		66 /* not 8505/9540 */
-#define AB8500_INT_GPIO37F		67 /* not 8505/9540 */
-#define AB8500_INT_GPIO38F		68 /* not 8505/9540 */
-#define AB8500_INT_GPIO39F		69 /* not 8505/9540 */
-#define AB8500_INT_GPIO40F		70
-#define AB8500_INT_GPIO41F		71
+#define AB8500_INT_GPIO24F		64 /* not 8505/8540 */
+#define AB8500_INT_GPIO25F		65 /* not 8505/8540 */
+#define AB8500_INT_GPIO36F		66 /* not 8505/9540/8540 */
+#define AB8500_INT_GPIO37F		67 /* not 8505/9540/8540 */
+#define AB8500_INT_GPIO38F		68 /* not 8505/9540/8540 */
+#define AB8500_INT_GPIO39F		69 /* not 8505/9540/8540 */
+#define AB8500_INT_GPIO40F		70 /* not 8540 */
+#define AB8500_INT_GPIO41F		71 /* not 8540 */
 /* ab8500_irq_regoffset[9] -> IT[Source|Latch|Mask]12 */
 #define AB8500_INT_ADP_SOURCE_ERROR	72
 #define AB8500_INT_ADP_SINK_ERROR	73
@@ -160,42 +164,44 @@ enum ab8500_version {
 #define AB8500_INT_SRP_DETECT		88
 #define AB8500_INT_USB_CHARGER_NOT_OKR	89
 #define AB8500_INT_ID_WAKEUP_R		90
+#define AB8500_INT_ID_DET_PLUGR         91 /* 8505/9540 cut2.0 */
 #define AB8500_INT_ID_DET_R1R		92
 #define AB8500_INT_ID_DET_R2R		93
 #define AB8500_INT_ID_DET_R3R		94
 #define AB8500_INT_ID_DET_R4R		95
 /* ab8500_irq_regoffset[12] -> IT[Source|Latch|Mask]21 */
-#define AB8500_INT_ID_WAKEUP_F		96
-#define AB8500_INT_ID_DET_R1F		98
-#define AB8500_INT_ID_DET_R2F		99
-#define AB8500_INT_ID_DET_R3F		100
-#define AB8500_INT_ID_DET_R4F		101
-#define AB8500_INT_CHAUTORESTARTAFTSEC  102
+#define AB8500_INT_ID_WAKEUP_F		96 /* not 8505/9540 */
+#define AB8500_INT_ID_DET_PLUGF		97 /* 8505/9540 cut2.0 */
+#define AB8500_INT_ID_DET_R1F		98 /* not 8505/9540 */
+#define AB8500_INT_ID_DET_R2F		99 /* not 8505/9540 */
+#define AB8500_INT_ID_DET_R3F		100 /* not 8505/9540 */
+#define AB8500_INT_ID_DET_R4F		101 /* not 8505/9540 */
+#define AB8500_INT_CHAUTORESTARTAFTSEC	102 /* not 8505/9540 */
 #define AB8500_INT_CHSTOPBYSEC		103
 /* ab8500_irq_regoffset[13] -> IT[Source|Latch|Mask]22 */
 #define AB8500_INT_USB_CH_TH_PROT_F	104
-#define AB8500_INT_USB_CH_TH_PROT_R    105
+#define AB8500_INT_USB_CH_TH_PROT_R	105
 #define AB8500_INT_MAIN_CH_TH_PROT_F	106 /* not 8505/9540 */
 #define AB8500_INT_MAIN_CH_TH_PROT_R	107 /* not 8505/9540 */
 #define AB8500_INT_CHCURLIMNOHSCHIRP	109
 #define AB8500_INT_CHCURLIMHSCHIRP	110
 #define AB8500_INT_XTAL32K_KO		111
 
-/* Definitions for AB9540 */
+/* Definitions for AB9540 / AB8505 */
 /* ab8500_irq_regoffset[14] -> IT[Source|Latch|Mask]13 */
-#define AB9540_INT_GPIO50R		113
-#define AB9540_INT_GPIO51R		114 /* not 8505 */
-#define AB9540_INT_GPIO52R		115
-#define AB9540_INT_GPIO53R		116
-#define AB9540_INT_GPIO54R		117 /* not 8505 */
+#define AB9540_INT_GPIO50R		113 /* not 8540 */
+#define AB9540_INT_GPIO51R		114 /* not 8505/8540 */
+#define AB9540_INT_GPIO52R		115 /* not 8540 */
+#define AB9540_INT_GPIO53R		116 /* not 8540 */
+#define AB9540_INT_GPIO54R		117 /* not 8505/8540 */
 #define AB9540_INT_IEXT_CH_RF_BFN_R	118
-#define AB9540_INT_IEXT_CH_RF_BFN_F	119
 /* ab8500_irq_regoffset[15] -> IT[Source|Latch|Mask]14 */
-#define AB9540_INT_GPIO50F		121
-#define AB9540_INT_GPIO51F		122 /* not 8505 */
-#define AB9540_INT_GPIO52F		123
-#define AB9540_INT_GPIO53F		124
-#define AB9540_INT_GPIO54F		125 /* not 8505 */
+#define AB9540_INT_GPIO50F		121 /* not 8540 */
+#define AB9540_INT_GPIO51F		122 /* not 8505/8540 */
+#define AB9540_INT_GPIO52F		123 /* not 8540 */
+#define AB9540_INT_GPIO53F		124 /* not 8540 */
+#define AB9540_INT_GPIO54F		125 /* not 8505/8540 */
+#define AB9540_INT_IEXT_CH_RF_BFN_F	126
 /* ab8500_irq_regoffset[16] -> IT[Source|Latch|Mask]25 */
 #define AB8505_INT_KEYSTUCK		128
 #define AB8505_INT_IKR			129
@@ -204,6 +210,87 @@ enum ab8500_version {
 #define AB8505_INT_KEYDEGLITCH		132
 #define AB8505_INT_MODPWRSTATUSF	134
 #define AB8505_INT_MODPWRSTATUSR	135
+/* ab8500_irq_regoffset[17] -> IT[Source|Latch|Mask]6 */
+#define AB8500_INT_HOOK_DET_NEG_F	138
+#define AB8500_INT_HOOK_DET_NEG_R	139
+#define AB8500_INT_HOOK_DET_POS_F	140
+#define AB8500_INT_HOOK_DET_POS_R	141
+#define AB8500_INT_PLUG_DET_COMP_F	142
+#define AB8500_INT_PLUG_DET_COMP_R	143
+/* ab8500_irq_regoffset[18] -> IT[Source|Latch|Mask]23 */
+#define AB8505_INT_COLL			144
+#define AB8505_INT_RESERR		145
+#define AB8505_INT_FRAERR		146
+#define AB8505_INT_COMERR		147
+#define AB8505_INT_SPDSET		148
+#define AB8505_INT_DSENT		149
+#define AB8505_INT_DREC			150
+#define AB8505_INT_ACC_INT		151
+/* ab8500_irq_regoffset[19] -> IT[Source|Latch|Mask]24 */
+#define AB8505_INT_NOPINT		152
+/* ab8540_irq_regoffset[20] -> IT[Source|Latch|Mask]26 */
+#define AB8540_INT_IDPLUGDETCOMPF	160
+#define AB8540_INT_IDPLUGDETCOMPR	161
+#define AB8540_INT_FMDETCOMPLOF		162
+#define AB8540_INT_FMDETCOMPLOR		163
+#define AB8540_INT_FMDETCOMPHIF		164
+#define AB8540_INT_FMDETCOMPHIR		165
+#define AB8540_INT_ID5VDETCOMPF		166
+#define AB8540_INT_ID5VDETCOMPR		167
+/* ab8540_irq_regoffset[21] -> IT[Source|Latch|Mask]27 */
+#define AB8540_INT_GPIO43F		168
+#define AB8540_INT_GPIO43R		169
+#define AB8540_INT_GPIO44F		170
+#define AB8540_INT_GPIO44R		171
+#define AB8540_INT_KEYPOSDETCOMPF	172
+#define AB8540_INT_KEYPOSDETCOMPR	173
+#define AB8540_INT_KEYNEGDETCOMPF	174
+#define AB8540_INT_KEYNEGDETCOMPR	175
+/* ab8540_irq_regoffset[22] -> IT[Source|Latch|Mask]28 */
+#define AB8540_INT_GPIO1VBATF		176
+#define AB8540_INT_GPIO1VBATR		177
+#define AB8540_INT_GPIO2VBATF		178
+#define AB8540_INT_GPIO2VBATR		179
+#define AB8540_INT_GPIO3VBATF		180
+#define AB8540_INT_GPIO3VBATR		181
+#define AB8540_INT_GPIO4VBATF		182
+#define AB8540_INT_GPIO4VBATR		183
+/* ab8540_irq_regoffset[23] -> IT[Source|Latch|Mask]29 */
+#define AB8540_INT_SYSCLKREQ2F		184
+#define AB8540_INT_SYSCLKREQ2R		185
+#define AB8540_INT_SYSCLKREQ3F		186
+#define AB8540_INT_SYSCLKREQ3R		187
+#define AB8540_INT_SYSCLKREQ4F		188
+#define AB8540_INT_SYSCLKREQ4R		189
+#define AB8540_INT_SYSCLKREQ5F		190
+#define AB8540_INT_SYSCLKREQ5R		191
+/* ab8540_irq_regoffset[24] -> IT[Source|Latch|Mask]30 */
+#define AB8540_INT_PWMOUT1F		192
+#define AB8540_INT_PWMOUT1R		193
+#define AB8540_INT_PWMCTRL0F		194
+#define AB8540_INT_PWMCTRL0R		195
+#define AB8540_INT_PWMCTRL1F		196
+#define AB8540_INT_PWMCTRL1R		197
+#define AB8540_INT_SYSCLKREQ6F		198
+#define AB8540_INT_SYSCLKREQ6R		199
+/* ab8540_irq_regoffset[25] -> IT[Source|Latch|Mask]31 */
+#define AB8540_INT_PWMEXTVIBRA1F	200
+#define AB8540_INT_PWMEXTVIBRA1R	201
+#define AB8540_INT_PWMEXTVIBRA2F	202
+#define AB8540_INT_PWMEXTVIBRA2R	203
+#define AB8540_INT_PWMOUT2F		204
+#define AB8540_INT_PWMOUT2R		205
+#define AB8540_INT_PWMOUT3F		206
+#define AB8540_INT_PWMOUT3R		207
+/* ab8540_irq_regoffset[26] -> IT[Source|Latch|Mask]32 */
+#define AB8540_INT_ADDATA2F		208
+#define AB8540_INT_ADDATA2R		209
+#define AB8540_INT_DADATA2F		210
+#define AB8540_INT_DADATA2R		211
+#define AB8540_INT_FSYNC2F		212
+#define AB8540_INT_FSYNC2R		213
+#define AB8540_INT_BITCLK2F		214
+#define AB8540_INT_BITCLK2R		215
 
 /*
  * AB8500_AB9540_NR_IRQS is used when configuring the IRQ numbers for the
@@ -213,13 +300,24 @@ enum ab8500_version {
  * which is larger.
  */
 #define AB8500_NR_IRQS			112
-#define AB8505_NR_IRQS			136
-#define AB9540_NR_IRQS			136
+#define AB8505_NR_IRQS			153
+#define AB9540_NR_IRQS			153
+#define AB8540_NR_IRQS			216
 /* This is set to the roof of any AB8500 chip variant IRQ counts */
-#define AB8500_MAX_NR_IRQS		AB9540_NR_IRQS
+#define AB8500_MAX_NR_IRQS		AB8540_NR_IRQS
 
 #define AB8500_NUM_IRQ_REGS		14
-#define AB9540_NUM_IRQ_REGS		17
+#define AB9540_NUM_IRQ_REGS		20
+#define AB8540_NUM_IRQ_REGS		27
+
+/* Turn On Status Event */
+#define AB8500_POR_ON_VBAT		0x01
+#define AB8500_POW_KEY_1_ON		0x02
+#define AB8500_POW_KEY_2_ON		0x04
+#define AB8500_RTC_ALARM		0x08
+#define AB8500_MAIN_CH_DET		0x10
+#define AB8500_VBUS_DET			0x20
+#define AB8500_USB_ID_DET		0x40
 
 /**
  * struct ab8500 - ab8500 internal structure
@@ -335,10 +433,79 @@ static inline int is_ab8500_2p0_or_earlier(struct ab8500 *ab)
 	return (is_ab8500(ab) && (ab->chip_id <= AB8500_CUT2P0));
 }
 
+static inline int is_ab8500_3p3_or_earlier(struct ab8500 *ab)
+{
+	return (is_ab8500(ab) && (ab->chip_id <= AB8500_CUT3P3));
+}
+
 /* exclude also ab8505, ab9540... */
 static inline int is_ab8500_2p0(struct ab8500 *ab)
 {
 	return (is_ab8500(ab) && (ab->chip_id == AB8500_CUT2P0));
 }
 
+static inline int is_ab8505_1p0_or_earlier(struct ab8500 *ab)
+{
+	return (is_ab8505(ab) && (ab->chip_id <= AB8500_CUT1P0));
+}
+
+static inline int is_ab8505_2p0(struct ab8500 *ab)
+{
+	return (is_ab8505(ab) && (ab->chip_id == AB8500_CUT2P0));
+}
+
+static inline int is_ab9540_1p0_or_earlier(struct ab8500 *ab)
+{
+	return (is_ab9540(ab) && (ab->chip_id <= AB8500_CUT1P0));
+}
+
+static inline int is_ab9540_2p0(struct ab8500 *ab)
+{
+	return (is_ab9540(ab) && (ab->chip_id == AB8500_CUT2P0));
+}
+
+/*
+ * Be careful, the marketing name for this chip is 2.1
+ * but the value read from the chip is 3.0 (0x30)
+ */
+static inline int is_ab9540_3p0(struct ab8500 *ab)
+{
+	return (is_ab9540(ab) && (ab->chip_id == AB8500_CUT3P0));
+}
+
+static inline int is_ab8540_1p0_or_earlier(struct ab8500 *ab)
+{
+	return is_ab8540(ab) && (ab->chip_id <= AB8500_CUT1P0);
+}
+
+static inline int is_ab8540_1p1_or_earlier(struct ab8500 *ab)
+{
+	return is_ab8540(ab) && (ab->chip_id <= AB8500_CUT1P1);
+}
+
+static inline int is_ab8540_1p2_or_earlier(struct ab8500 *ab)
+{
+	return is_ab8540(ab) && (ab->chip_id <= AB8500_CUT1P2);
+}
+
+static inline int is_ab8540_2p0_or_earlier(struct ab8500 *ab)
+{
+	return is_ab8540(ab) && (ab->chip_id <= AB8500_CUT2P0);
+}
+
+static inline int is_ab8540_2p0(struct ab8500 *ab)
+{
+	return is_ab8540(ab) && (ab->chip_id == AB8500_CUT2P0);
+}
+
+static inline int is_ab8505_2p0_earlier(struct ab8500 *ab)
+{
+	return (is_ab8505(ab) && (ab->chip_id < AB8500_CUT2P0));
+}
+
+static inline int is_ab9540_2p0_or_earlier(struct ab8500 *ab)
+{
+	return (is_ab9540(ab) && (ab->chip_id < AB8500_CUT2P0));
+}
+
 #endif /* MFD_AB8500_H */
-- 
cgit v1.2.3


From 0493e6493031523f78680b4469f02fc1b2d440f3 Mon Sep 17 00:00:00 2001
From: Patrice Chotard <patrice.chotard@st.com>
Date: Tue, 8 Jan 2013 10:41:02 +0100
Subject: pinctrl: add abx500 pinctrl driver core

This adds the AB8500 core driver, which will be utilized by
the follow-on drivers for different ABx500 variants.
Sselect the driver from the DBX500_SOC, as this chip is
powering and clocking that SoC.

Cc: Samuel Ortiz <sameo@linux.intel.com>
Signed-off-by: Patrice Chotard <patrice.chotard@st.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 arch/arm/mach-ux500/Kconfig            |    1 +
 arch/arm/mach-ux500/board-mop500.c     |   19 +-
 drivers/pinctrl/Kconfig                |    7 +
 drivers/pinctrl/Makefile               |    1 +
 drivers/pinctrl/pinctrl-abx500.c       | 1233 ++++++++++++++++++++++++++++++++
 drivers/pinctrl/pinctrl-abx500.h       |  180 +++++
 include/linux/mfd/abx500/ab8500-gpio.h |   15 +-
 include/linux/mfd/abx500/ab8500.h      |    2 +-
 8 files changed, 1437 insertions(+), 21 deletions(-)
 create mode 100644 drivers/pinctrl/pinctrl-abx500.c
 create mode 100644 drivers/pinctrl/pinctrl-abx500.h

(limited to 'include/linux')

diff --git a/arch/arm/mach-ux500/Kconfig b/arch/arm/mach-ux500/Kconfig
index 1d7dbe61a958..018bf681659d 100644
--- a/arch/arm/mach-ux500/Kconfig
+++ b/arch/arm/mach-ux500/Kconfig
@@ -11,6 +11,7 @@ config UX500_SOC_COMMON
 	select COMMON_CLK
 	select PINCTRL
 	select PINCTRL_NOMADIK
+	select PINCTRL_ABX500
 	select PL310_ERRATA_753970 if CACHE_PL310
 
 config UX500_SOC_DB8500
diff --git a/arch/arm/mach-ux500/board-mop500.c b/arch/arm/mach-ux500/board-mop500.c
index d453522edb0d..b6f14ee507ca 100644
--- a/arch/arm/mach-ux500/board-mop500.c
+++ b/arch/arm/mach-ux500/board-mop500.c
@@ -90,26 +90,9 @@ static struct platform_device snowball_gpio_en_3v3_regulator_dev = {
        },
 };
 
-static struct ab8500_gpio_platform_data ab8500_gpio_pdata = {
+static struct abx500_gpio_platform_data ab8500_gpio_pdata = {
 	.gpio_base		= MOP500_AB8500_PIN_GPIO(1),
 	.irq_base		= MOP500_AB8500_VIR_GPIO_IRQ_BASE,
-	/* config_reg is the initial configuration of ab8500 pins.
-	 * The pins can be configured as GPIO or alt functions based
-	 * on value present in GpioSel1 to GpioSel6 and AlternatFunction
-	 * register. This is the array of 7 configuration settings.
-	 * One has to compile time decide these settings. Below is the
-	 * explanation of these setting
-	 * GpioSel1 = 0x00 => Pins GPIO1 to GPIO8 are not used as GPIO
-	 * GpioSel2 = 0x1E => Pins GPIO10 to GPIO13 are configured as GPIO
-	 * GpioSel3 = 0x80 => Pin GPIO24 is configured as GPIO
-	 * GpioSel4 = 0x01 => Pin GPIo25 is configured as GPIO
-	 * GpioSel5 = 0x7A => Pins GPIO34, GPIO36 to GPIO39 are conf as GPIO
-	 * GpioSel6 = 0x00 => Pins GPIO41 & GPIo42 are not configured as GPIO
-	 * AlternaFunction = 0x00 => If Pins GPIO10 to 13 are not configured
-	 * as GPIO then this register selectes the alternate fucntions
-	 */
-	.config_reg		= {0x00, 0x1E, 0x80, 0x01,
-					0x7A, 0x00, 0x00},
 };
 
 /* ab8500-codec */
diff --git a/drivers/pinctrl/Kconfig b/drivers/pinctrl/Kconfig
index 881ddcd00236..ba3038c827c6 100644
--- a/drivers/pinctrl/Kconfig
+++ b/drivers/pinctrl/Kconfig
@@ -26,6 +26,13 @@ config DEBUG_PINCTRL
 	help
 	  Say Y here to add some extra checks and diagnostics to PINCTRL calls.
 
+config PINCTRL_ABX500
+	bool "ST-Ericsson ABx500 family Mixed Signal Circuit gpio functions"
+	depends on AB8500_CORE
+	select GENERIC_PINCONF
+	help
+	  Select this to enable the ABx500 family IC GPIO driver
+
 config PINCTRL_AT91
 	bool "AT91 pinctrl driver"
 	depends on OF
diff --git a/drivers/pinctrl/Makefile b/drivers/pinctrl/Makefile
index e9ad8c3851be..ead4fa7dfd1f 100644
--- a/drivers/pinctrl/Makefile
+++ b/drivers/pinctrl/Makefile
@@ -9,6 +9,7 @@ ifeq ($(CONFIG_OF),y)
 obj-$(CONFIG_PINCTRL)		+= devicetree.o
 endif
 obj-$(CONFIG_GENERIC_PINCONF)	+= pinconf-generic.o
+obj-$(CONFIG_PINCTRL_ABX500)	+= pinctrl-abx500.o
 obj-$(CONFIG_PINCTRL_AT91)	+= pinctrl-at91.o
 obj-$(CONFIG_PINCTRL_BCM2835)	+= pinctrl-bcm2835.o
 obj-$(CONFIG_PINCTRL_IMX)	+= pinctrl-imx.o
diff --git a/drivers/pinctrl/pinctrl-abx500.c b/drivers/pinctrl/pinctrl-abx500.c
new file mode 100644
index 000000000000..9eed9887da2a
--- /dev/null
+++ b/drivers/pinctrl/pinctrl-abx500.c
@@ -0,0 +1,1233 @@
+/*
+ * Copyright (C) ST-Ericsson SA 2013
+ *
+ * Author: Patrice Chotard <patrice.chotard@st.com>
+ * License terms: GNU General Public License (GPL) version 2
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/err.h>
+#include <linux/platform_device.h>
+#include <linux/gpio.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+#include <linux/bitops.h>
+#include <linux/mfd/abx500.h>
+#include <linux/mfd/abx500/ab8500.h>
+#include <linux/mfd/abx500/ab8500-gpio.h>
+#include <linux/pinctrl/pinctrl.h>
+#include <linux/pinctrl/consumer.h>
+#include <linux/pinctrl/pinmux.h>
+#include <linux/pinctrl/pinconf.h>
+#include <linux/pinctrl/pinconf-generic.h>
+
+#include "pinctrl-abx500.h"
+
+/*
+ * The AB9540 and AB8540 GPIO support are extended versions
+ * of the AB8500 GPIO support.
+ * The AB9540 supports an additional (7th) register so that
+ * more GPIO may be configured and used.
+ * The AB8540 supports 4 new gpios (GPIOx_VBAT) that have
+ * internal pull-up and pull-down capabilities.
+ */
+
+/*
+ * GPIO registers offset
+ * Bank: 0x10
+ */
+#define AB8500_GPIO_SEL1_REG	0x00
+#define AB8500_GPIO_SEL2_REG	0x01
+#define AB8500_GPIO_SEL3_REG	0x02
+#define AB8500_GPIO_SEL4_REG	0x03
+#define AB8500_GPIO_SEL5_REG	0x04
+#define AB8500_GPIO_SEL6_REG	0x05
+#define AB9540_GPIO_SEL7_REG	0x06
+
+#define AB8500_GPIO_DIR1_REG	0x10
+#define AB8500_GPIO_DIR2_REG	0x11
+#define AB8500_GPIO_DIR3_REG	0x12
+#define AB8500_GPIO_DIR4_REG	0x13
+#define AB8500_GPIO_DIR5_REG	0x14
+#define AB8500_GPIO_DIR6_REG	0x15
+#define AB9540_GPIO_DIR7_REG	0x16
+
+#define AB8500_GPIO_OUT1_REG	0x20
+#define AB8500_GPIO_OUT2_REG	0x21
+#define AB8500_GPIO_OUT3_REG	0x22
+#define AB8500_GPIO_OUT4_REG	0x23
+#define AB8500_GPIO_OUT5_REG	0x24
+#define AB8500_GPIO_OUT6_REG	0x25
+#define AB9540_GPIO_OUT7_REG	0x26
+
+#define AB8500_GPIO_PUD1_REG	0x30
+#define AB8500_GPIO_PUD2_REG	0x31
+#define AB8500_GPIO_PUD3_REG	0x32
+#define AB8500_GPIO_PUD4_REG	0x33
+#define AB8500_GPIO_PUD5_REG	0x34
+#define AB8500_GPIO_PUD6_REG	0x35
+#define AB9540_GPIO_PUD7_REG	0x36
+
+#define AB8500_GPIO_IN1_REG	0x40
+#define AB8500_GPIO_IN2_REG	0x41
+#define AB8500_GPIO_IN3_REG	0x42
+#define AB8500_GPIO_IN4_REG	0x43
+#define AB8500_GPIO_IN5_REG	0x44
+#define AB8500_GPIO_IN6_REG	0x45
+#define AB9540_GPIO_IN7_REG	0x46
+#define AB8540_GPIO_VINSEL_REG	0x47
+#define AB8540_GPIO_PULL_UPDOWN_REG	0x48
+#define AB8500_GPIO_ALTFUN_REG	0x50
+#define AB8500_NUM_VIR_GPIO_IRQ	16
+#define AB8540_GPIO_PULL_UPDOWN_MASK	0x03
+#define AB8540_GPIO_VINSEL_MASK	0x03
+#define AB8540_GPIOX_VBAT_START	51
+#define AB8540_GPIOX_VBAT_END	54
+
+enum abx500_gpio_action {
+	NONE,
+	STARTUP,
+	SHUTDOWN,
+	MASK,
+	UNMASK
+};
+
+struct abx500_pinctrl {
+	struct device *dev;
+	struct pinctrl_dev *pctldev;
+	struct abx500_pinctrl_soc_data *soc;
+	struct gpio_chip chip;
+	struct ab8500 *parent;
+	struct mutex lock;
+	u32 irq_base;
+	enum abx500_gpio_action irq_action;
+	u16 rising;
+	u16 falling;
+	struct abx500_gpio_irq_cluster *irq_cluster;
+	int irq_cluster_size;
+	int irq_gpio_rising_offset;
+	int irq_gpio_falling_offset;
+	int irq_gpio_factor;
+};
+
+/**
+ * to_abx500_pinctrl() - get the pointer to abx500_pinctrl
+ * @chip:	Member of the structure abx500_pinctrl
+ */
+static inline struct abx500_pinctrl *to_abx500_pinctrl(struct gpio_chip *chip)
+{
+	return container_of(chip, struct abx500_pinctrl, chip);
+}
+
+static int abx500_gpio_get_bit(struct gpio_chip *chip, u8 reg,
+					unsigned offset, bool *bit)
+{
+	struct abx500_pinctrl *pct = to_abx500_pinctrl(chip);
+	u8 pos = offset % 8;
+	u8 val;
+	int ret;
+
+	reg += offset / 8;
+	ret = abx500_get_register_interruptible(pct->dev,
+						AB8500_MISC, reg, &val);
+
+	*bit = !!(val & BIT(pos));
+
+	if (ret < 0)
+		dev_err(pct->dev,
+			"%s read reg =%x, offset=%x failed\n",
+			__func__, reg, offset);
+
+	return ret;
+}
+
+static int abx500_gpio_set_bits(struct gpio_chip *chip, u8 reg,
+					unsigned offset, int val)
+{
+	struct abx500_pinctrl *pct = to_abx500_pinctrl(chip);
+	u8 pos = offset % 8;
+	int ret;
+
+	reg += offset / 8;
+	ret = abx500_mask_and_set_register_interruptible(pct->dev,
+				AB8500_MISC, reg, 1 << pos, val << pos);
+	if (ret < 0)
+		dev_err(pct->dev, "%s write failed\n", __func__);
+	return ret;
+}
+/**
+ * abx500_gpio_get() - Get the particular GPIO value
+ * @chip: Gpio device
+ * @offset: GPIO number to read
+ */
+static int abx500_gpio_get(struct gpio_chip *chip, unsigned offset)
+{
+	struct abx500_pinctrl *pct = to_abx500_pinctrl(chip);
+	bool bit;
+	int ret;
+
+	ret = abx500_gpio_get_bit(chip, AB8500_GPIO_IN1_REG,
+				  offset, &bit);
+	if (ret < 0) {
+		dev_err(pct->dev, "%s failed\n", __func__);
+		return ret;
+	}
+	return bit;
+}
+
+static void abx500_gpio_set(struct gpio_chip *chip, unsigned offset, int val)
+{
+	struct abx500_pinctrl *pct = to_abx500_pinctrl(chip);
+	int ret;
+
+	ret = abx500_gpio_set_bits(chip, AB8500_GPIO_OUT1_REG, offset, val);
+	if (ret < 0)
+		dev_err(pct->dev, "%s write failed\n", __func__);
+}
+
+static int abx500_config_pull_updown(struct abx500_pinctrl *pct,
+				int offset, enum abx500_gpio_pull_updown val)
+{
+	u8 pos;
+	int ret;
+	struct pullud *pullud;
+
+	if (!pct->soc->pullud) {
+		dev_err(pct->dev, "%s AB chip doesn't support pull up/down feature",
+				__func__);
+		ret = -EPERM;
+		goto out;
+	}
+
+	pullud = pct->soc->pullud;
+
+	if ((offset < pullud->first_pin)
+		|| (offset > pullud->last_pin)) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	pos = offset << 1;
+
+	ret = abx500_mask_and_set_register_interruptible(pct->dev,
+			AB8500_MISC, AB8540_GPIO_PULL_UPDOWN_REG,
+			AB8540_GPIO_PULL_UPDOWN_MASK << pos, val << pos);
+
+out:
+	if (ret < 0)
+		dev_err(pct->dev, "%s failed (%d)\n", __func__, ret);
+	return ret;
+}
+
+static int abx500_gpio_direction_output(struct gpio_chip *chip,
+					unsigned offset,
+					int val)
+{
+	struct abx500_pinctrl *pct = to_abx500_pinctrl(chip);
+	struct pullud *pullud = pct->soc->pullud;
+	unsigned gpio;
+	int ret;
+	/* set direction as output */
+	ret = abx500_gpio_set_bits(chip, AB8500_GPIO_DIR1_REG, offset, 1);
+	if (ret < 0)
+		return ret;
+
+	/* disable pull down */
+	ret = abx500_gpio_set_bits(chip, AB8500_GPIO_PUD1_REG, offset, 1);
+	if (ret < 0)
+		return ret;
+
+	/* if supported, disable both pull down and pull up */
+	gpio = offset + 1;
+	if (pullud && gpio >= pullud->first_pin && gpio <= pullud->last_pin) {
+		ret = abx500_config_pull_updown(pct,
+				gpio,
+				ABX500_GPIO_PULL_NONE);
+		if (ret < 0)
+			return ret;
+	}
+	/* set the output as 1 or 0 */
+	return abx500_gpio_set_bits(chip, AB8500_GPIO_OUT1_REG, offset, val);
+}
+
+static int abx500_gpio_direction_input(struct gpio_chip *chip, unsigned offset)
+{
+	/* set the register as input */
+	return abx500_gpio_set_bits(chip, AB8500_GPIO_DIR1_REG, offset, 0);
+}
+
+static int abx500_gpio_to_irq(struct gpio_chip *chip, unsigned offset)
+{
+	struct abx500_pinctrl *pct = to_abx500_pinctrl(chip);
+	int base = pct->irq_base;
+	int i;
+
+	for (i = 0; i < pct->irq_cluster_size; i++) {
+		struct abx500_gpio_irq_cluster *cluster =
+			&pct->irq_cluster[i];
+
+		if (offset >= cluster->start && offset <= cluster->end)
+			return base + offset - cluster->start;
+
+		/* Advance by the number of gpios in this cluster */
+		base += cluster->end + cluster->offset - cluster->start + 1;
+	}
+
+	return -EINVAL;
+}
+
+static int abx500_set_mode(struct pinctrl_dev *pctldev, struct gpio_chip *chip,
+		unsigned gpio, int alt_setting)
+{
+	struct abx500_pinctrl *pct = pinctrl_dev_get_drvdata(pctldev);
+	struct alternate_functions af = pct->soc->alternate_functions[gpio];
+	int ret;
+	int val;
+	unsigned offset;
+	const char *modes[] = {
+		[ABX500_DEFAULT]	= "default",
+		[ABX500_ALT_A]		= "altA",
+		[ABX500_ALT_B]		= "altB",
+		[ABX500_ALT_C]		= "altC",
+	};
+
+	/* sanity check */
+	if (((alt_setting == ABX500_ALT_A) && (af.gpiosel_bit == UNUSED)) ||
+	    ((alt_setting == ABX500_ALT_B) && (af.alt_bit1 == UNUSED)) ||
+	    ((alt_setting == ABX500_ALT_C) && (af.alt_bit2 == UNUSED))) {
+		dev_dbg(pct->dev, "pin %d doesn't support %s mode\n", gpio,
+				modes[alt_setting]);
+		return -EINVAL;
+	}
+
+	/* on ABx5xx, there is no GPIO0, so adjust the offset */
+	offset = gpio - 1;
+	switch (alt_setting) {
+	case ABX500_DEFAULT:
+		/*
+		 * for ABx5xx family, default mode is always selected by
+		 * writing 0 to GPIOSELx register, except for pins which
+		 * support at least ALT_B mode, default mode is selected
+		 * by writing 1 to GPIOSELx register
+		 */
+		val = 0;
+		if (af.alt_bit1 != UNUSED)
+			val++;
+
+		ret = abx500_gpio_set_bits(chip, AB8500_GPIO_SEL1_REG,
+					   offset, val);
+		break;
+	case ABX500_ALT_A:
+		/*
+		 * for ABx5xx family, alt_a mode is always selected by
+		 * writing 1 to GPIOSELx register, except for pins which
+		 * support at least ALT_B mode, alt_a mode is selected
+		 * by writing 0 to GPIOSELx register and 0 in ALTFUNC
+		 * register
+		 */
+		if (af.alt_bit1 != UNUSED) {
+			ret = abx500_gpio_set_bits(chip, AB8500_GPIO_SEL1_REG,
+					offset, 0);
+			ret = abx500_gpio_set_bits(chip,
+					AB8500_GPIO_ALTFUN_REG,
+					af.alt_bit1,
+					!!(af.alta_val && BIT(0)));
+			if (af.alt_bit2 != UNUSED)
+				ret = abx500_gpio_set_bits(chip,
+					AB8500_GPIO_ALTFUN_REG,
+					af.alt_bit2,
+					!!(af.alta_val && BIT(1)));
+		} else
+			ret = abx500_gpio_set_bits(chip, AB8500_GPIO_SEL1_REG,
+					offset, 1);
+		break;
+	case ABX500_ALT_B:
+		ret = abx500_gpio_set_bits(chip, AB8500_GPIO_SEL1_REG,
+				offset, 0);
+		ret = abx500_gpio_set_bits(chip, AB8500_GPIO_ALTFUN_REG,
+				af.alt_bit1, !!(af.altb_val && BIT(0)));
+		if (af.alt_bit2 != UNUSED)
+			ret = abx500_gpio_set_bits(chip,
+					AB8500_GPIO_ALTFUN_REG,
+					af.alt_bit2,
+					!!(af.altb_val && BIT(1)));
+		break;
+	case ABX500_ALT_C:
+		ret = abx500_gpio_set_bits(chip, AB8500_GPIO_SEL1_REG,
+				offset, 0);
+		ret = abx500_gpio_set_bits(chip, AB8500_GPIO_ALTFUN_REG,
+				af.alt_bit2, !!(af.altc_val && BIT(0)));
+		ret = abx500_gpio_set_bits(chip, AB8500_GPIO_ALTFUN_REG,
+				af.alt_bit2, !!(af.altc_val && BIT(1)));
+		break;
+
+	default:
+		dev_dbg(pct->dev, "unknow alt_setting %d\n", alt_setting);
+		return -EINVAL;
+	}
+	return ret;
+}
+
+static u8 abx500_get_mode(struct pinctrl_dev *pctldev, struct gpio_chip *chip,
+		unsigned gpio)
+{
+	u8 mode;
+	bool bit_mode;
+	bool alt_bit1;
+	bool alt_bit2;
+	struct abx500_pinctrl *pct = pinctrl_dev_get_drvdata(pctldev);
+	struct alternate_functions af = pct->soc->alternate_functions[gpio];
+
+	/*
+	 * if gpiosel_bit is set to unused,
+	 * it means no GPIO or special case
+	 */
+	if (af.gpiosel_bit == UNUSED)
+		return ABX500_DEFAULT;
+
+	/* read GpioSelx register */
+	abx500_gpio_get_bit(chip, AB8500_GPIO_SEL1_REG + (gpio / 8),
+			af.gpiosel_bit, &bit_mode);
+	mode = bit_mode;
+
+	/* sanity check */
+	if ((af.alt_bit1 < UNUSED) || (af.alt_bit1 > 7) ||
+	    (af.alt_bit2 < UNUSED) || (af.alt_bit2 > 7)) {
+		dev_err(pct->dev,
+			"alt_bitX value not in correct range (-1 to 7)\n");
+		return -EINVAL;
+	}
+	/* if alt_bit2 is used, alt_bit1 must be used too */
+	if ((af.alt_bit2 != UNUSED) && (af.alt_bit1 == UNUSED)) {
+		dev_err(pct->dev,
+			"if alt_bit2 is used, alt_bit1 can't be unused\n");
+		return -EINVAL;
+	}
+
+	/* check if pin use AlternateFunction register */
+	if ((af.alt_bit1 == UNUSED) && (af.alt_bit1 == UNUSED))
+		return mode;
+	/*
+	 * if pin GPIOSEL bit is set and pin supports alternate function,
+	 * it means DEFAULT mode
+	 */
+	if (mode)
+		return ABX500_DEFAULT;
+	/*
+	 * pin use the AlternatFunction register
+	 * read alt_bit1 value
+	 */
+	abx500_gpio_get_bit(chip, AB8500_GPIO_ALTFUN_REG,
+			    af.alt_bit1, &alt_bit1);
+
+	if (af.alt_bit2 != UNUSED)
+		/* read alt_bit2 value */
+		abx500_gpio_get_bit(chip, AB8500_GPIO_ALTFUN_REG, af.alt_bit2,
+				&alt_bit2);
+	else
+		alt_bit2 = 0;
+
+	mode = (alt_bit2 << 1) + alt_bit1;
+	if (mode == af.alta_val)
+		return ABX500_ALT_A;
+	else if (mode == af.altb_val)
+		return ABX500_ALT_B;
+	else
+		return ABX500_ALT_C;
+}
+
+#ifdef CONFIG_DEBUG_FS
+
+#include <linux/seq_file.h>
+
+static void abx500_gpio_dbg_show_one(struct seq_file *s,
+	struct pinctrl_dev *pctldev, struct gpio_chip *chip,
+	unsigned offset, unsigned gpio)
+{
+	struct abx500_pinctrl *pct = to_abx500_pinctrl(chip);
+	const char *label = gpiochip_is_requested(chip, offset - 1);
+	u8 gpio_offset = offset - 1;
+	int mode = -1;
+	bool is_out;
+	bool pull;
+	const char *modes[] = {
+		[ABX500_DEFAULT]	= "default",
+		[ABX500_ALT_A]		= "altA",
+		[ABX500_ALT_B]		= "altB",
+		[ABX500_ALT_C]		= "altC",
+	};
+
+	abx500_gpio_get_bit(chip, AB8500_GPIO_DIR1_REG, gpio_offset, &is_out);
+	abx500_gpio_get_bit(chip, AB8500_GPIO_PUD1_REG, gpio_offset, &pull);
+
+	if (pctldev)
+		mode = abx500_get_mode(pctldev, chip, offset);
+
+	seq_printf(s, " gpio-%-3d (%-20.20s) %-3s %-9s %s",
+		   gpio, label ?: "(none)",
+		   is_out ? "out" : "in ",
+		   is_out ?
+		   (chip->get
+		   ? (chip->get(chip, offset) ? "hi" : "lo")
+		   : "?  ")
+		   : (pull ? "pull up" : "pull down"),
+		   (mode < 0) ? "unknown" : modes[mode]);
+
+	if (label && !is_out) {
+		int irq = gpio_to_irq(gpio);
+		struct irq_desc	*desc = irq_to_desc(irq);
+
+		if (irq >= 0 && desc->action) {
+			char *trigger;
+			int irq_offset = irq - pct->irq_base;
+
+			if (pct->rising & BIT(irq_offset))
+				trigger = "edge-rising";
+			else if (pct->falling & BIT(irq_offset))
+				trigger = "edge-falling";
+			else
+				trigger = "edge-undefined";
+
+			seq_printf(s, " irq-%d %s", irq, trigger);
+		}
+	}
+}
+
+static void abx500_gpio_dbg_show(struct seq_file *s, struct gpio_chip *chip)
+{
+	unsigned i;
+	unsigned gpio = chip->base;
+	struct abx500_pinctrl *pct = to_abx500_pinctrl(chip);
+	struct pinctrl_dev *pctldev = pct->pctldev;
+
+	for (i = 0; i < chip->ngpio; i++, gpio++) {
+		/* On AB8500, there is no GPIO0, the first is the GPIO 1 */
+		abx500_gpio_dbg_show_one(s, pctldev, chip, i + 1, gpio);
+		seq_printf(s, "\n");
+	}
+}
+
+#else
+static inline void abx500_gpio_dbg_show_one(struct seq_file *s,
+					 struct pinctrl_dev *pctldev,
+					 struct gpio_chip *chip,
+					 unsigned offset, unsigned gpio)
+{
+}
+#define abx500_gpio_dbg_show	NULL
+#endif
+
+int abx500_gpio_request(struct gpio_chip *chip, unsigned offset)
+{
+	int gpio = chip->base + offset;
+
+	return pinctrl_request_gpio(gpio);
+}
+
+void abx500_gpio_free(struct gpio_chip *chip, unsigned offset)
+{
+	int gpio = chip->base + offset;
+
+	pinctrl_free_gpio(gpio);
+}
+
+static struct gpio_chip abx500gpio_chip = {
+	.label			= "abx500-gpio",
+	.owner			= THIS_MODULE,
+	.request		= abx500_gpio_request,
+	.free			= abx500_gpio_free,
+	.direction_input	= abx500_gpio_direction_input,
+	.get			= abx500_gpio_get,
+	.direction_output	= abx500_gpio_direction_output,
+	.set			= abx500_gpio_set,
+	.to_irq			= abx500_gpio_to_irq,
+	.dbg_show		= abx500_gpio_dbg_show,
+};
+
+static unsigned int irq_to_rising(unsigned int irq)
+{
+	struct abx500_pinctrl *pct = irq_get_chip_data(irq);
+	int offset = irq - pct->irq_base;
+	int new_irq;
+
+	new_irq = offset * pct->irq_gpio_factor
+		+ pct->irq_gpio_rising_offset
+		+ pct->parent->irq_base;
+
+	return new_irq;
+}
+
+static unsigned int irq_to_falling(unsigned int irq)
+{
+	struct abx500_pinctrl *pct = irq_get_chip_data(irq);
+	int offset = irq - pct->irq_base;
+	int new_irq;
+
+	new_irq = offset * pct->irq_gpio_factor
+		+ pct->irq_gpio_falling_offset
+		+ pct->parent->irq_base;
+	return new_irq;
+
+}
+
+static unsigned int rising_to_irq(unsigned int irq, void *dev)
+{
+	struct abx500_pinctrl *pct = dev;
+	int offset, new_irq;
+
+	offset = irq - pct->irq_gpio_rising_offset
+		- pct->parent->irq_base;
+	new_irq = (offset / pct->irq_gpio_factor)
+		+ pct->irq_base;
+
+	return new_irq;
+}
+
+static unsigned int falling_to_irq(unsigned int irq, void *dev)
+{
+	struct abx500_pinctrl *pct = dev;
+	int offset, new_irq;
+
+	offset = irq - pct->irq_gpio_falling_offset
+		- pct->parent->irq_base;
+	new_irq = (offset / pct->irq_gpio_factor)
+		+ pct->irq_base;
+
+	return new_irq;
+}
+
+/*
+ * IRQ handler
+ */
+
+static irqreturn_t handle_rising(int irq, void *dev)
+{
+
+	handle_nested_irq(rising_to_irq(irq , dev));
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t handle_falling(int irq, void *dev)
+{
+
+	handle_nested_irq(falling_to_irq(irq, dev));
+	return IRQ_HANDLED;
+}
+
+static void abx500_gpio_irq_lock(struct irq_data *data)
+{
+	struct abx500_pinctrl *pct = irq_data_get_irq_chip_data(data);
+	mutex_lock(&pct->lock);
+}
+
+static void abx500_gpio_irq_sync_unlock(struct irq_data *data)
+{
+	struct abx500_pinctrl *pct = irq_data_get_irq_chip_data(data);
+	unsigned int irq = data->irq;
+	int offset = irq - pct->irq_base;
+	bool rising = pct->rising & BIT(offset);
+	bool falling = pct->falling & BIT(offset);
+	int ret;
+
+	switch (pct->irq_action)	{
+	case STARTUP:
+		if (rising)
+			ret = request_threaded_irq(irq_to_rising(irq),
+					NULL, handle_rising,
+					IRQF_TRIGGER_RISING | IRQF_NO_SUSPEND,
+					"abx500-gpio-r", pct);
+		if (falling)
+			ret = request_threaded_irq(irq_to_falling(irq),
+				       NULL, handle_falling,
+				       IRQF_TRIGGER_FALLING | IRQF_NO_SUSPEND,
+				       "abx500-gpio-f", pct);
+		break;
+	case SHUTDOWN:
+		if (rising)
+			free_irq(irq_to_rising(irq), pct);
+		if (falling)
+			free_irq(irq_to_falling(irq), pct);
+		break;
+	case MASK:
+		if (rising)
+			disable_irq(irq_to_rising(irq));
+		if (falling)
+			disable_irq(irq_to_falling(irq));
+		break;
+	case UNMASK:
+		if (rising)
+			enable_irq(irq_to_rising(irq));
+		if (falling)
+			enable_irq(irq_to_falling(irq));
+		break;
+	case NONE:
+		break;
+	}
+	pct->irq_action = NONE;
+	pct->rising &= ~(BIT(offset));
+	pct->falling &= ~(BIT(offset));
+	mutex_unlock(&pct->lock);
+}
+
+
+static void abx500_gpio_irq_mask(struct irq_data *data)
+{
+	struct abx500_pinctrl *pct = irq_data_get_irq_chip_data(data);
+	pct->irq_action = MASK;
+}
+
+static void abx500_gpio_irq_unmask(struct irq_data *data)
+{
+	struct abx500_pinctrl *pct =  irq_data_get_irq_chip_data(data);
+	pct->irq_action = UNMASK;
+}
+
+static int abx500_gpio_irq_set_type(struct irq_data *data, unsigned int type)
+{
+	struct abx500_pinctrl *pct = irq_data_get_irq_chip_data(data);
+	unsigned int irq = data->irq;
+	int offset = irq - pct->irq_base;
+
+	if (type == IRQ_TYPE_EDGE_BOTH) {
+		pct->rising =  BIT(offset);
+		pct->falling = BIT(offset);
+	} else if (type == IRQ_TYPE_EDGE_RISING) {
+		pct->rising =  BIT(offset);
+	} else  {
+		pct->falling = BIT(offset);
+	}
+	return 0;
+}
+
+static unsigned int abx500_gpio_irq_startup(struct irq_data *data)
+{
+	struct abx500_pinctrl *pct = irq_data_get_irq_chip_data(data);
+	pct->irq_action = STARTUP;
+	return 0;
+}
+
+static void abx500_gpio_irq_shutdown(struct irq_data *data)
+{
+	struct abx500_pinctrl *pct = irq_data_get_irq_chip_data(data);
+	pct->irq_action = SHUTDOWN;
+}
+
+static struct irq_chip abx500_gpio_irq_chip = {
+	.name			= "abx500-gpio",
+	.irq_startup		= abx500_gpio_irq_startup,
+	.irq_shutdown		= abx500_gpio_irq_shutdown,
+	.irq_bus_lock		= abx500_gpio_irq_lock,
+	.irq_bus_sync_unlock	= abx500_gpio_irq_sync_unlock,
+	.irq_mask		= abx500_gpio_irq_mask,
+	.irq_unmask		= abx500_gpio_irq_unmask,
+	.irq_set_type		= abx500_gpio_irq_set_type,
+};
+
+static int abx500_gpio_irq_init(struct abx500_pinctrl *pct)
+{
+	u32 base = pct->irq_base;
+	int irq;
+
+	for (irq = base; irq < base + AB8500_NUM_VIR_GPIO_IRQ ; irq++) {
+		irq_set_chip_data(irq, pct);
+		irq_set_chip_and_handler(irq, &abx500_gpio_irq_chip,
+				handle_simple_irq);
+		irq_set_nested_thread(irq, 1);
+#ifdef CONFIG_ARM
+		set_irq_flags(irq, IRQF_VALID);
+#else
+		irq_set_noprobe(irq);
+#endif
+	}
+
+	return 0;
+}
+
+static void abx500_gpio_irq_remove(struct abx500_pinctrl *pct)
+{
+	int base = pct->irq_base;
+	int irq;
+
+	for (irq = base; irq < base + AB8500_NUM_VIR_GPIO_IRQ; irq++) {
+#ifdef CONFIG_ARM
+		set_irq_flags(irq, 0);
+#endif
+		irq_set_chip_and_handler(irq, NULL, NULL);
+		irq_set_chip_data(irq, NULL);
+	}
+}
+
+static int abx500_pmx_get_funcs_cnt(struct pinctrl_dev *pctldev)
+{
+	struct abx500_pinctrl *pct = pinctrl_dev_get_drvdata(pctldev);
+
+	return pct->soc->nfunctions;
+}
+
+static const char *abx500_pmx_get_func_name(struct pinctrl_dev *pctldev,
+					 unsigned function)
+{
+	struct abx500_pinctrl *pct = pinctrl_dev_get_drvdata(pctldev);
+
+	return pct->soc->functions[function].name;
+}
+
+static int abx500_pmx_get_func_groups(struct pinctrl_dev *pctldev,
+				   unsigned function,
+				   const char * const **groups,
+				   unsigned * const num_groups)
+{
+	struct abx500_pinctrl *pct = pinctrl_dev_get_drvdata(pctldev);
+
+	*groups = pct->soc->functions[function].groups;
+	*num_groups = pct->soc->functions[function].ngroups;
+
+	return 0;
+}
+
+static void abx500_disable_lazy_irq(struct gpio_chip *chip, unsigned gpio)
+{
+	struct abx500_pinctrl *pct = to_abx500_pinctrl(chip);
+	int irq;
+	int offset;
+	bool rising;
+	bool falling;
+
+	/*
+	 * check if gpio has interrupt capability and convert
+	 * gpio number to irq
+	 * On ABx5xx, there is no GPIO0, GPIO1 is the
+	 * first one, so adjust gpio number
+	 */
+	gpio--;
+	irq =  gpio_to_irq(gpio + chip->base);
+	if (irq < 0)
+		return;
+
+	offset = irq - pct->irq_base;
+	rising = pct->rising & BIT(offset);
+	falling = pct->falling & BIT(offset);
+
+	/* nothing to do ?*/
+	if (!rising && !falling)
+		return;
+
+	if (rising) {
+		disable_irq(irq_to_rising(irq));
+		free_irq(irq_to_rising(irq), pct);
+	}
+	if (falling) {
+		disable_irq(irq_to_falling(irq));
+		free_irq(irq_to_falling(irq), pct);
+	}
+}
+
+static int abx500_pmx_enable(struct pinctrl_dev *pctldev, unsigned function,
+			  unsigned group)
+{
+	struct abx500_pinctrl *pct = pinctrl_dev_get_drvdata(pctldev);
+	struct gpio_chip *chip = &pct->chip;
+	const struct abx500_pingroup *g;
+	int i;
+	int ret = 0;
+
+	g = &pct->soc->groups[group];
+	if (g->altsetting < 0)
+		return -EINVAL;
+
+	dev_dbg(pct->dev, "enable group %s, %u pins\n", g->name, g->npins);
+
+	for (i = 0; i < g->npins; i++) {
+		dev_dbg(pct->dev, "setting pin %d to altsetting %d\n",
+			g->pins[i], g->altsetting);
+
+		abx500_disable_lazy_irq(chip, g->pins[i]);
+		ret = abx500_set_mode(pctldev, chip, g->pins[i], g->altsetting);
+	}
+	return ret;
+}
+
+static void abx500_pmx_disable(struct pinctrl_dev *pctldev,
+			    unsigned function, unsigned group)
+{
+	struct abx500_pinctrl *pct = pinctrl_dev_get_drvdata(pctldev);
+	const struct abx500_pingroup *g;
+
+	g = &pct->soc->groups[group];
+	if (g->altsetting < 0)
+		return;
+
+	/* FIXME: poke out the mux, set the pin to some default state? */
+	dev_dbg(pct->dev, "disable group %s, %u pins\n", g->name, g->npins);
+}
+
+int abx500_gpio_request_enable(struct pinctrl_dev *pctldev,
+			    struct pinctrl_gpio_range *range,
+			    unsigned offset)
+{
+	struct abx500_pinctrl *pct = pinctrl_dev_get_drvdata(pctldev);
+	const struct abx500_pinrange *p;
+	int ret;
+	int i;
+
+	/*
+	 * Different ranges have different ways to enable GPIO function on a
+	 * pin, so refer back to our local range type, where we handily define
+	 * what altfunc enables GPIO for a certain pin.
+	 */
+	for (i = 0; i < pct->soc->gpio_num_ranges; i++) {
+		p = &pct->soc->gpio_ranges[i];
+		if ((offset >= p->offset) &&
+		    (offset < (p->offset + p->npins)))
+		  break;
+	}
+
+	if (i == pct->soc->gpio_num_ranges) {
+		dev_err(pct->dev, "%s failed to locate range\n", __func__);
+		return -ENODEV;
+	}
+
+	dev_dbg(pct->dev, "enable GPIO by altfunc %d at gpio %d\n",
+		p->altfunc, offset);
+
+	ret = abx500_set_mode(pct->pctldev, &pct->chip,
+			      offset, p->altfunc);
+	if (ret < 0) {
+		dev_err(pct->dev, "%s setting altfunc failed\n", __func__);
+		return ret;
+	}
+
+	return ret;
+}
+
+static void abx500_gpio_disable_free(struct pinctrl_dev *pctldev,
+			   struct pinctrl_gpio_range *range,
+			   unsigned offset)
+{
+}
+
+static struct pinmux_ops abx500_pinmux_ops = {
+	.get_functions_count = abx500_pmx_get_funcs_cnt,
+	.get_function_name = abx500_pmx_get_func_name,
+	.get_function_groups = abx500_pmx_get_func_groups,
+	.enable = abx500_pmx_enable,
+	.disable = abx500_pmx_disable,
+	.gpio_request_enable = abx500_gpio_request_enable,
+	.gpio_disable_free = abx500_gpio_disable_free,
+};
+
+static int abx500_get_groups_cnt(struct pinctrl_dev *pctldev)
+{
+	struct abx500_pinctrl *pct = pinctrl_dev_get_drvdata(pctldev);
+
+	return pct->soc->ngroups;
+}
+
+static const char *abx500_get_group_name(struct pinctrl_dev *pctldev,
+				       unsigned selector)
+{
+	struct abx500_pinctrl *pct = pinctrl_dev_get_drvdata(pctldev);
+
+	return pct->soc->groups[selector].name;
+}
+
+static int abx500_get_group_pins(struct pinctrl_dev *pctldev,
+		unsigned selector,
+		const unsigned **pins,
+		unsigned *num_pins)
+{
+	struct abx500_pinctrl *pct = pinctrl_dev_get_drvdata(pctldev);
+
+	*pins = pct->soc->groups[selector].pins;
+	*num_pins = pct->soc->groups[selector].npins;
+	return 0;
+}
+
+static void abx500_pin_dbg_show(struct pinctrl_dev *pctldev,
+		struct seq_file *s, unsigned offset)
+{
+	struct abx500_pinctrl *pct = pinctrl_dev_get_drvdata(pctldev);
+	struct gpio_chip *chip = &pct->chip;
+
+	abx500_gpio_dbg_show_one(s, pctldev, chip, offset,
+				 chip->base + offset - 1);
+}
+
+static struct pinctrl_ops abx500_pinctrl_ops = {
+	.get_groups_count = abx500_get_groups_cnt,
+	.get_group_name = abx500_get_group_name,
+	.get_group_pins = abx500_get_group_pins,
+	.pin_dbg_show = abx500_pin_dbg_show,
+};
+
+int abx500_pin_config_get(struct pinctrl_dev *pctldev,
+		       unsigned pin,
+		       unsigned long *config)
+{
+	/* Not implemented */
+	return -EINVAL;
+}
+
+int abx500_pin_config_set(struct pinctrl_dev *pctldev,
+		       unsigned pin,
+		       unsigned long config)
+{
+	struct abx500_pinctrl *pct = pinctrl_dev_get_drvdata(pctldev);
+	struct pullud *pullud = pct->soc->pullud;
+	struct gpio_chip *chip = &pct->chip;
+	unsigned offset;
+	int ret;
+	enum pin_config_param param = pinconf_to_config_param(config);
+	enum pin_config_param argument = pinconf_to_config_argument(config);
+
+	dev_dbg(chip->dev, "pin %d [%#lx]: %s %s\n",
+		pin, config, (param == PIN_CONFIG_OUTPUT) ? "output " : "input",
+		(param == PIN_CONFIG_OUTPUT) ? (argument ? "high" : "low") :
+		(argument ? "pull up" : "pull down"));
+	/* on ABx500, there is no GPIO0, so adjust the offset */
+	offset = pin - 1;
+
+	switch (param) {
+	case PIN_CONFIG_BIAS_PULL_DOWN:
+		/*
+		 * if argument = 1 set the pull down
+		 * else clear the pull down
+		 */
+		ret = abx500_gpio_direction_input(chip, offset);
+		/*
+		 * Some chips only support pull down, while some actually
+		 * support both pull up and pull down. Such chips have
+		 * a "pullud" range specified for the pins that support
+		 * both features. If the pin is not within that range, we
+		 * fall back to the old bit set that only support pull down.
+		 */
+		if (pullud &&
+		    pin >= pullud->first_pin &&
+		    pin <= pullud->last_pin)
+			ret = abx500_config_pull_updown(pct,
+				pin,
+				argument ? ABX500_GPIO_PULL_DOWN : ABX500_GPIO_PULL_NONE);
+		else
+			/* Chip only supports pull down */
+			ret = abx500_gpio_set_bits(chip, AB8500_GPIO_PUD1_REG,
+				offset, argument ? 0 : 1);
+		break;
+	case PIN_CONFIG_OUTPUT:
+		ret = abx500_gpio_direction_output(chip, offset, argument);
+		break;
+	default:
+		dev_err(chip->dev, "illegal configuration requested\n");
+		return -EINVAL;
+	}
+	return ret;
+}
+
+static struct pinconf_ops abx500_pinconf_ops = {
+	.pin_config_get = abx500_pin_config_get,
+	.pin_config_set = abx500_pin_config_set,
+};
+
+static struct pinctrl_desc abx500_pinctrl_desc = {
+	.name = "pinctrl-abx500",
+	.pctlops = &abx500_pinctrl_ops,
+	.pmxops = &abx500_pinmux_ops,
+	.confops = &abx500_pinconf_ops,
+	.owner = THIS_MODULE,
+};
+
+static int abx500_get_gpio_num(struct abx500_pinctrl_soc_data *soc)
+{
+	unsigned int lowest = 0;
+	unsigned int highest = 0;
+	unsigned int npins = 0;
+	int i;
+
+	/*
+	 * Compute number of GPIOs from the last SoC gpio range descriptors
+	 * These ranges may include "holes" but the GPIO number space shall
+	 * still be homogeneous, so we need to detect and account for any
+	 * such holes so that these are included in the number of GPIO pins.
+	 */
+	for (i = 0; i < soc->gpio_num_ranges; i++) {
+		unsigned gstart;
+		unsigned gend;
+		const struct abx500_pinrange *p;
+
+		p = &soc->gpio_ranges[i];
+		gstart = p->offset;
+		gend = p->offset + p->npins - 1;
+
+		if (i == 0) {
+			/* First iteration, set start values */
+			lowest = gstart;
+			highest = gend;
+		} else {
+			if (gstart < lowest)
+				lowest = gstart;
+			if (gend > highest)
+				highest = gend;
+		}
+	}
+	/* this gives the absolute number of pins */
+	npins = highest - lowest + 1;
+	return npins;
+}
+
+static int abx500_gpio_probe(struct platform_device *pdev)
+{
+	struct ab8500_platform_data *abx500_pdata =
+				dev_get_platdata(pdev->dev.parent);
+	struct abx500_gpio_platform_data *pdata;
+	struct abx500_pinctrl *pct;
+	const struct platform_device_id *platid = platform_get_device_id(pdev);
+	int ret;
+	int i;
+
+	pdata = abx500_pdata->gpio;
+	if (!pdata)	{
+		dev_err(&pdev->dev, "gpio platform data missing\n");
+		return -ENODEV;
+	}
+
+	pct = devm_kzalloc(&pdev->dev, sizeof(struct abx500_pinctrl),
+				   GFP_KERNEL);
+	if (pct == NULL) {
+		dev_err(&pdev->dev,
+			"failed to allocate memory for pct\n");
+		return -ENOMEM;
+	}
+
+	pct->dev = &pdev->dev;
+	pct->parent = dev_get_drvdata(pdev->dev.parent);
+	pct->chip = abx500gpio_chip;
+	pct->chip.dev = &pdev->dev;
+	pct->chip.base = pdata->gpio_base;
+	pct->irq_base = pdata->irq_base;
+
+	/* initialize the lock */
+	mutex_init(&pct->lock);
+
+	/* Poke in other ASIC variants here */
+	switch (platid->driver_data) {
+	default:
+		dev_err(&pdev->dev, "Unsupported pinctrl sub driver (%d)\n",
+				(int) platid->driver_data);
+		return -EINVAL;
+	}
+
+	if (!pct->soc) {
+		dev_err(&pdev->dev, "Invalid SOC data\n");
+		return -EINVAL;
+	}
+
+	pct->chip.ngpio = abx500_get_gpio_num(pct->soc);
+	pct->irq_cluster = pct->soc->gpio_irq_cluster;
+	pct->irq_cluster_size = pct->soc->ngpio_irq_cluster;
+	pct->irq_gpio_rising_offset = pct->soc->irq_gpio_rising_offset;
+	pct->irq_gpio_falling_offset = pct->soc->irq_gpio_falling_offset;
+	pct->irq_gpio_factor = pct->soc->irq_gpio_factor;
+
+	ret = abx500_gpio_irq_init(pct);
+	if (ret)
+		goto out_free;
+	ret = gpiochip_add(&pct->chip);
+	if (ret) {
+		dev_err(&pdev->dev, "unable to add gpiochip: %d\n",
+				ret);
+		goto out_rem_irq;
+	}
+	dev_info(&pdev->dev, "added gpiochip\n");
+
+	abx500_pinctrl_desc.pins = pct->soc->pins;
+	abx500_pinctrl_desc.npins = pct->soc->npins;
+	pct->pctldev = pinctrl_register(&abx500_pinctrl_desc, &pdev->dev, pct);
+	if (!pct->pctldev) {
+		dev_err(&pdev->dev,
+			"could not register abx500 pinctrl driver\n");
+		goto out_rem_chip;
+	}
+	dev_info(&pdev->dev, "registered pin controller\n");
+
+	/* We will handle a range of GPIO pins */
+	for (i = 0; i < pct->soc->gpio_num_ranges; i++) {
+		const struct abx500_pinrange *p = &pct->soc->gpio_ranges[i];
+
+		ret = gpiochip_add_pin_range(&pct->chip,
+					dev_name(&pdev->dev),
+					p->offset - 1, p->offset, p->npins);
+		if (ret < 0)
+			return ret;
+	}
+
+	platform_set_drvdata(pdev, pct);
+	dev_info(&pdev->dev, "initialized abx500 pinctrl driver\n");
+
+	return 0;
+
+out_rem_chip:
+	ret = gpiochip_remove(&pct->chip);
+	if (ret)
+		dev_info(&pdev->dev, "failed to remove gpiochip\n");
+out_rem_irq:
+	abx500_gpio_irq_remove(pct);
+out_free:
+	mutex_destroy(&pct->lock);
+	return ret;
+}
+
+/*
+ * abx500_gpio_remove() - remove Ab8500-gpio driver
+ * @pdev :	Platform device registered
+ */
+static int abx500_gpio_remove(struct platform_device *pdev)
+{
+	struct abx500_pinctrl *pct = platform_get_drvdata(pdev);
+	int ret;
+
+	ret = gpiochip_remove(&pct->chip);
+	if (ret < 0) {
+		dev_err(pct->dev, "unable to remove gpiochip: %d\n",
+			ret);
+		return ret;
+	}
+
+	mutex_destroy(&pct->lock);
+
+	return 0;
+}
+
+static const struct platform_device_id abx500_pinctrl_id[] = {
+	{ "pinctrl-ab8500", PINCTRL_AB8500 },
+	{ "pinctrl-ab8540", PINCTRL_AB8540 },
+	{ "pinctrl-ab9540", PINCTRL_AB9540 },
+	{ "pinctrl-ab8505", PINCTRL_AB8505 },
+	{ },
+};
+
+static struct platform_driver abx500_gpio_driver = {
+	.driver = {
+		.name = "abx500-gpio",
+		.owner = THIS_MODULE,
+	},
+	.probe = abx500_gpio_probe,
+	.remove = abx500_gpio_remove,
+	.id_table = abx500_pinctrl_id,
+};
+
+static int __init abx500_gpio_init(void)
+{
+	return platform_driver_register(&abx500_gpio_driver);
+}
+core_initcall(abx500_gpio_init);
+
+MODULE_AUTHOR("Patrice Chotard <patrice.chotard@st.com>");
+MODULE_DESCRIPTION("Driver allows to use AxB5xx unused pins to be used as GPIO");
+MODULE_ALIAS("platform:abx500-gpio");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/pinctrl/pinctrl-abx500.h b/drivers/pinctrl/pinctrl-abx500.h
new file mode 100644
index 000000000000..436ace356175
--- /dev/null
+++ b/drivers/pinctrl/pinctrl-abx500.h
@@ -0,0 +1,180 @@
+#ifndef PINCTRL_PINCTRL_ABx5O0_H
+#define PINCTRL_PINCTRL_ABx500_H
+
+/* Package definitions */
+#define PINCTRL_AB8500	0
+#define PINCTRL_AB8540	1
+#define PINCTRL_AB9540	2
+#define PINCTRL_AB8505	3
+
+/* pins alternate function */
+enum abx500_pin_func {
+	ABX500_DEFAULT,
+	ABX500_ALT_A,
+	ABX500_ALT_B,
+	ABX500_ALT_C,
+};
+
+/**
+ * struct abx500_function - ABx500 pinctrl mux function
+ * @name: The name of the function, exported to pinctrl core.
+ * @groups: An array of pin groups that may select this function.
+ * @ngroups: The number of entries in @groups.
+ */
+struct abx500_function {
+	const char *name;
+	const char * const *groups;
+	unsigned ngroups;
+};
+
+/**
+ * struct abx500_pingroup - describes a ABx500 pin group
+ * @name: the name of this specific pin group
+ * @pins: an array of discrete physical pins used in this group, taken
+ *	from the driver-local pin enumeration space
+ * @num_pins: the number of pins in this group array, i.e. the number of
+ *	elements in .pins so we can iterate over that array
+ * @altsetting: the altsetting to apply to all pins in this group to
+ *	configure them to be used by a function
+ */
+struct abx500_pingroup {
+	const char *name;
+	const unsigned int *pins;
+	const unsigned npins;
+	int altsetting;
+};
+
+#define ALTERNATE_FUNCTIONS(pin, sel_bit, alt1, alt2, alta, altb, altc)	\
+{									\
+	.pin_number = pin,						\
+	.gpiosel_bit = sel_bit,						\
+	.alt_bit1 = alt1,						\
+	.alt_bit2 = alt2,						\
+	.alta_val = alta,						\
+	.altb_val = altb,						\
+	.altc_val = altc,						\
+}
+
+#define UNUSED -1
+/**
+ * struct alternate_functions
+ * @pin_number:		The pin number
+ * @gpiosel_bit:	Control bit in GPIOSEL register,
+ * @alt_bit1:		First AlternateFunction bit used to select the
+ *			alternate function
+ * @alt_bit2:		Second AlternateFunction bit used to select the
+ *			alternate function
+ *
+ *			these 3 following fields are necessary due to none
+ *			coherency on how to select the altA, altB and altC
+ *			function between the ABx500 SOC family when using
+ *			alternatfunc register.
+ * @alta_val:		value to write in alternatfunc to select altA function
+ * @altb_val:		value to write in alternatfunc to select altB function
+ * @altc_val:		value to write in alternatfunc to select altC function
+ */
+struct alternate_functions {
+	unsigned pin_number;
+	s8 gpiosel_bit;
+	s8 alt_bit1;
+	s8 alt_bit2;
+	u8 alta_val;
+	u8 altb_val;
+	u8 altc_val;
+};
+
+/**
+ * struct pullud - specific pull up/down feature
+ * @first_pin:		The pin number of the first pins which support
+ *			specific pull up/down
+ * @last_pin:		The pin number of the last pins
+ */
+struct pullud {
+	unsigned first_pin;
+	unsigned last_pin;
+};
+
+#define GPIO_IRQ_CLUSTER(a, b, c)	\
+{					\
+	.start = a,			\
+	.end = b,			\
+	.offset = c,			\
+}
+
+/**
+ * struct abx500_gpio_irq_cluster - indicates GPIOs which are interrupt
+ *			capable
+ * @start:		The pin number of the first pin interrupt capable
+ * @end:		The pin number of the last pin interrupt capable
+ * @offset:		offset used to compute specific setting strategy of
+ *			the interrupt line
+ */
+
+struct abx500_gpio_irq_cluster {
+	int start;
+	int end;
+	int offset;
+};
+
+/**
+ * struct abx500_pinrange - map pin numbers to GPIO offsets
+ * @offset:		offset into the GPIO local numberspace, incidentally
+ *			identical to the offset into the local pin numberspace
+ * @npins:		number of pins to map from both offsets
+ * @altfunc:		altfunc setting to be used to enable GPIO on a pin in
+ *			this range (may vary)
+ */
+struct abx500_pinrange {
+	unsigned int offset;
+	unsigned int npins;
+	int altfunc;
+};
+
+#define ABX500_PINRANGE(a, b, c) { .offset = a, .npins = b, .altfunc = c }
+
+/**
+ * struct abx500_pinctrl_soc_data - ABx500 pin controller per-SoC configuration
+ * @gpio_ranges:	An array of GPIO ranges for this SoC
+ * @gpio_num_ranges:	The number of GPIO ranges for this SoC
+ * @pins:		An array describing all pins the pin controller affects.
+ *			All pins which are also GPIOs must be listed first within the
+ *			array, and be numbered identically to the GPIO controller's
+ *			numbering.
+ * @npins:		The number of entries in @pins.
+ * @functions:		The functions supported on this SoC.
+ * @nfunction:		The number of entries in @functions.
+ * @groups:		An array describing all pin groups the pin SoC supports.
+ * @ngroups:		The number of entries in @groups.
+ * @alternate_functions: array describing pins which supports alternate and
+ *			how to set it.
+ * @pullud:		array describing pins which supports pull up/down
+ *			specific registers.
+ * @gpio_irq_cluster:	An array of GPIO interrupt capable for this SoC
+ * @ngpio_irq_cluster:	The number of GPIO inetrrupt capable for this SoC
+ * @irq_gpio_rising_offset: Interrupt offset used as base to compute specific
+ *			setting strategy of the rising interrupt line
+ * @irq_gpio_falling_offset: Interrupt offset used as base to compute specific
+ *			setting strategy of the falling interrupt line
+ * @irq_gpio_factor:	Factor used to compute specific setting strategy of
+ *			the interrupt line
+ */
+
+struct abx500_pinctrl_soc_data {
+	const struct abx500_pinrange *gpio_ranges;
+	unsigned gpio_num_ranges;
+	const struct pinctrl_pin_desc *pins;
+	unsigned npins;
+	const struct abx500_function *functions;
+	unsigned nfunctions;
+	const struct abx500_pingroup *groups;
+	unsigned ngroups;
+	struct alternate_functions *alternate_functions;
+	struct pullud *pullud;
+	struct abx500_gpio_irq_cluster *gpio_irq_cluster;
+	unsigned ngpio_irq_cluster;
+	int irq_gpio_rising_offset;
+	int irq_gpio_falling_offset;
+	int irq_gpio_factor;
+};
+
+#endif /* PINCTRL_PINCTRL_ABx500_H */
diff --git a/include/linux/mfd/abx500/ab8500-gpio.h b/include/linux/mfd/abx500/ab8500-gpio.h
index 2387c207ea86..e8c8281e194a 100644
--- a/include/linux/mfd/abx500/ab8500-gpio.h
+++ b/include/linux/mfd/abx500/ab8500-gpio.h
@@ -14,10 +14,21 @@
  * registers.
  */
 
-struct ab8500_gpio_platform_data {
+struct abx500_gpio_platform_data {
 	int gpio_base;
 	u32 irq_base;
-	u8  config_reg[8];
+};
+
+enum abx500_gpio_pull_updown {
+	ABX500_GPIO_PULL_DOWN = 0x0,
+	ABX500_GPIO_PULL_NONE = 0x1,
+	ABX500_GPIO_PULL_UP = 0x3,
+};
+
+enum abx500_gpio_vinsel {
+	ABX500_GPIO_VINSEL_VBAT = 0x0,
+	ABX500_GPIO_VINSEL_VIN_1V8 = 0x1,
+	ABX500_GPIO_VINSEL_VDD_BIF = 0x2,
 };
 
 #endif /* _AB8500_GPIO_H */
diff --git a/include/linux/mfd/abx500/ab8500.h b/include/linux/mfd/abx500/ab8500.h
index e640ea059be2..fc0534483c72 100644
--- a/include/linux/mfd/abx500/ab8500.h
+++ b/include/linux/mfd/abx500/ab8500.h
@@ -385,7 +385,7 @@ struct ab8500_platform_data {
 	struct ab8500_regulator_reg_init *regulator_reg_init;
 	int num_regulator;
 	struct regulator_init_data *regulator;
-	struct ab8500_gpio_platform_data *gpio;
+	struct abx500_gpio_platform_data *gpio;
 	struct ab8500_codec_platform_data *codec;
 };
 
-- 
cgit v1.2.3


From a6ca2e10f795111a90a4efabb07717258669e03d Mon Sep 17 00:00:00 2001
From: Hauke Mehrtens <hauke@hauke-m.de>
Date: Sat, 26 Jan 2013 21:38:35 +0100
Subject: ssb: add gpio_to_irq

The old bcm47xx gpio code had support for gpio_to_irq, but the new
code did not provide this function, but returned -ENXIO all the time.
This patch adds the missing function.

arch/mips/bcm47xx/wgt634u.c calls gpio_to_irq() and got the correct irq
number with the old gpio handling code. With this patch the code in
wgt634u.c should work again. I do not have a wgt634u to test this.

Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/ssb/driver_gpio.c           | 22 ++++++++++++++++++++++
 include/linux/ssb/ssb_driver_mips.h |  5 +++++
 2 files changed, 27 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/ssb/driver_gpio.c b/drivers/ssb/driver_gpio.c
index 97ac0a38e3d0..accabe39b320 100644
--- a/drivers/ssb/driver_gpio.c
+++ b/drivers/ssb/driver_gpio.c
@@ -74,6 +74,16 @@ static void ssb_gpio_chipco_free(struct gpio_chip *chip, unsigned gpio)
 	ssb_chipco_gpio_pullup(&bus->chipco, 1 << gpio, 0);
 }
 
+static int ssb_gpio_chipco_to_irq(struct gpio_chip *chip, unsigned gpio)
+{
+	struct ssb_bus *bus = ssb_gpio_get_bus(chip);
+
+	if (bus->bustype == SSB_BUSTYPE_SSB)
+		return ssb_mips_irq(bus->chipco.dev) + 2;
+	else
+		return -EINVAL;
+}
+
 static int ssb_gpio_chipco_init(struct ssb_bus *bus)
 {
 	struct gpio_chip *chip = &bus->gpio;
@@ -86,6 +96,7 @@ static int ssb_gpio_chipco_init(struct ssb_bus *bus)
 	chip->set		= ssb_gpio_chipco_set_value;
 	chip->direction_input	= ssb_gpio_chipco_direction_input;
 	chip->direction_output	= ssb_gpio_chipco_direction_output;
+	chip->to_irq		= ssb_gpio_chipco_to_irq;
 	chip->ngpio		= 16;
 	/* There is just one SoC in one device and its GPIO addresses should be
 	 * deterministic to address them more easily. The other buses could get
@@ -134,6 +145,16 @@ static int ssb_gpio_extif_direction_output(struct gpio_chip *chip,
 	return 0;
 }
 
+static int ssb_gpio_extif_to_irq(struct gpio_chip *chip, unsigned gpio)
+{
+	struct ssb_bus *bus = ssb_gpio_get_bus(chip);
+
+	if (bus->bustype == SSB_BUSTYPE_SSB)
+		return ssb_mips_irq(bus->extif.dev) + 2;
+	else
+		return -EINVAL;
+}
+
 static int ssb_gpio_extif_init(struct ssb_bus *bus)
 {
 	struct gpio_chip *chip = &bus->gpio;
@@ -144,6 +165,7 @@ static int ssb_gpio_extif_init(struct ssb_bus *bus)
 	chip->set		= ssb_gpio_extif_set_value;
 	chip->direction_input	= ssb_gpio_extif_direction_input;
 	chip->direction_output	= ssb_gpio_extif_direction_output;
+	chip->to_irq		= ssb_gpio_extif_to_irq;
 	chip->ngpio		= 5;
 	/* There is just one SoC in one device and its GPIO addresses should be
 	 * deterministic to address them more easily. The other buses could get
diff --git a/include/linux/ssb/ssb_driver_mips.h b/include/linux/ssb/ssb_driver_mips.h
index 07a9c7a2e088..afe79d40a99e 100644
--- a/include/linux/ssb/ssb_driver_mips.h
+++ b/include/linux/ssb/ssb_driver_mips.h
@@ -45,6 +45,11 @@ void ssb_mipscore_init(struct ssb_mipscore *mcore)
 {
 }
 
+static inline unsigned int ssb_mips_irq(struct ssb_device *dev)
+{
+	return 0;
+}
+
 #endif /* CONFIG_SSB_DRIVER_MIPS */
 
 #endif /* LINUX_SSB_MIPSCORE_H_ */
-- 
cgit v1.2.3


From 8f1ca2683225afa21b827ff620a6225c390771a9 Mon Sep 17 00:00:00 2001
From: Hauke Mehrtens <hauke@hauke-m.de>
Date: Sat, 26 Jan 2013 21:39:44 +0100
Subject: bcma: add gpio_to_irq

The old bcm47xx gpio code had support for gpio_to_irq, but the new
code did not provide this function, but returned -ENXIO all the time.
This patch adds the missing function.

Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/bcma/driver_gpio.c            | 11 +++++++++++
 include/linux/bcma/bcma_driver_mips.h |  9 +++++++--
 2 files changed, 18 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/bcma/driver_gpio.c b/drivers/bcma/driver_gpio.c
index 9a6f585da2d9..0b5df538dfd9 100644
--- a/drivers/bcma/driver_gpio.c
+++ b/drivers/bcma/driver_gpio.c
@@ -73,6 +73,16 @@ static void bcma_gpio_free(struct gpio_chip *chip, unsigned gpio)
 	bcma_chipco_gpio_pullup(cc, 1 << gpio, 0);
 }
 
+static int bcma_gpio_to_irq(struct gpio_chip *chip, unsigned gpio)
+{
+	struct bcma_drv_cc *cc = bcma_gpio_get_cc(chip);
+
+	if (cc->core->bus->hosttype == BCMA_HOSTTYPE_SOC)
+		return bcma_core_irq(cc->core);
+	else
+		return -EINVAL;
+}
+
 int bcma_gpio_init(struct bcma_drv_cc *cc)
 {
 	struct gpio_chip *chip = &cc->gpio;
@@ -85,6 +95,7 @@ int bcma_gpio_init(struct bcma_drv_cc *cc)
 	chip->set		= bcma_gpio_set_value;
 	chip->direction_input	= bcma_gpio_direction_input;
 	chip->direction_output	= bcma_gpio_direction_output;
+	chip->to_irq		= bcma_gpio_to_irq;
 	chip->ngpio		= 16;
 	/* There is just one SoC in one device and its GPIO addresses should be
 	 * deterministic to address them more easily. The other buses could get
diff --git a/include/linux/bcma/bcma_driver_mips.h b/include/linux/bcma/bcma_driver_mips.h
index 0d1ea297851a..fb61f3fb4ddb 100644
--- a/include/linux/bcma/bcma_driver_mips.h
+++ b/include/linux/bcma/bcma_driver_mips.h
@@ -42,13 +42,18 @@ struct bcma_drv_mips {
 #ifdef CONFIG_BCMA_DRIVER_MIPS
 extern void bcma_core_mips_init(struct bcma_drv_mips *mcore);
 extern void bcma_core_mips_early_init(struct bcma_drv_mips *mcore);
+
+extern unsigned int bcma_core_irq(struct bcma_device *core);
 #else
 static inline void bcma_core_mips_init(struct bcma_drv_mips *mcore) { }
 static inline void bcma_core_mips_early_init(struct bcma_drv_mips *mcore) { }
+
+static inline unsigned int bcma_core_irq(struct bcma_device *core)
+{
+	return 0;
+}
 #endif
 
 extern u32 bcma_cpu_clock(struct bcma_drv_mips *mcore);
 
-extern unsigned int bcma_core_irq(struct bcma_device *core);
-
 #endif /* LINUX_BCMA_DRIVER_MIPS_H_ */
-- 
cgit v1.2.3


From 18367681a10bd29c3f2305e6b7b984de5b33d548 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki / 吉藤英明 <yoshfuji@linux-ipv6.org>
Date: Wed, 30 Jan 2013 09:27:52 +0000
Subject: ipv6 flowlabel: Convert np->ipv6_fl_list to RCU.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ipv6.h     |  2 +-
 include/net/ipv6.h       |  1 +
 net/ipv6/ip6_flowlabel.c | 72 +++++++++++++++++++++++++++---------------------
 3 files changed, 42 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index e971e3742172..850e95bc766c 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -214,7 +214,7 @@ struct ipv6_pinfo {
 
 	struct ipv6_mc_socklist	__rcu *ipv6_mc_list;
 	struct ipv6_ac_socklist	*ipv6_ac_list;
-	struct ipv6_fl_socklist *ipv6_fl_list;
+	struct ipv6_fl_socklist __rcu *ipv6_fl_list;
 
 	struct ipv6_txoptions	*opt;
 	struct sk_buff		*pktoptions;
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 1d457161def2..851d5412a299 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -239,6 +239,7 @@ struct ip6_flowlabel {
 struct ipv6_fl_socklist {
 	struct ipv6_fl_socklist	*next;
 	struct ip6_flowlabel	*fl;
+	struct rcu_head		rcu;
 };
 
 extern struct ip6_flowlabel	*fl6_sock_lookup(struct sock *sk, __be32 label);
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index da156015d827..22494afd981c 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -62,7 +62,7 @@ static DEFINE_SPINLOCK(ip6_fl_lock);
 
 /* Big socket sock */
 
-static DEFINE_RWLOCK(ip6_sk_fl_lock);
+static DEFINE_SPINLOCK(ip6_sk_fl_lock);
 
 #define for_each_fl_rcu(hash, fl)				\
 	for (fl = rcu_dereference(fl_ht[(hash)]);		\
@@ -73,6 +73,11 @@ static DEFINE_RWLOCK(ip6_sk_fl_lock);
 	     fl != NULL;					\
 	     fl = rcu_dereference(fl->next))
 
+#define for_each_sk_fl_rcu(np, sfl)				\
+	for (sfl = rcu_dereference_bh(np->ipv6_fl_list);	\
+	     sfl != NULL;					\
+	     sfl = rcu_dereference_bh(sfl->next))
+
 static inline struct ip6_flowlabel *__fl_lookup(struct net *net, __be32 label)
 {
 	struct ip6_flowlabel *fl;
@@ -244,17 +249,17 @@ struct ip6_flowlabel * fl6_sock_lookup(struct sock *sk, __be32 label)
 
 	label &= IPV6_FLOWLABEL_MASK;
 
-	read_lock_bh(&ip6_sk_fl_lock);
-	for (sfl=np->ipv6_fl_list; sfl; sfl = sfl->next) {
+	rcu_read_lock_bh();
+	for_each_sk_fl_rcu(np, sfl) {
 		struct ip6_flowlabel *fl = sfl->fl;
 		if (fl->label == label) {
 			fl->lastuse = jiffies;
 			atomic_inc(&fl->users);
-			read_unlock_bh(&ip6_sk_fl_lock);
+			rcu_read_unlock_bh();
 			return fl;
 		}
 	}
-	read_unlock_bh(&ip6_sk_fl_lock);
+	rcu_read_unlock_bh();
 	return NULL;
 }
 
@@ -265,20 +270,21 @@ void fl6_free_socklist(struct sock *sk)
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct ipv6_fl_socklist *sfl;
 
-	if (!np->ipv6_fl_list)
+	if (!rcu_access_pointer(np->ipv6_fl_list))
 		return;
 
-	write_lock_bh(&ipv6_sk_fl_lock);
-	sfl = np->ipv6_fl_list;
-	np->ipv6_fl_list = NULL;
-	write_unlock_bh(&ipv6_sk_fl_lock);
+	spin_lock_bh(&ip6_sk_fl_lock);
+	while ((sfl = rcu_dereference_protected(np->ipv6_fl_list,
+						lockdep_is_held(&ip6_sk_fl_lock))) != NULL) {
+		np->ipv6_fl_list = sfl->next;
+		spin_unlock_bh(&ip6_sk_fl_lock);
 
-	while (sfl) {
-		struct ipv6_fl_socklist *next = sfl->next;
 		fl_release(sfl->fl);
-		kfree(sfl);
-		sfl = next;
+		kfree_rcu(sfl, rcu);
+
+		spin_lock_bh(&ip6_sk_fl_lock);
 	}
+	spin_unlock_bh(&ip6_sk_fl_lock);
 }
 
 /* Service routines */
@@ -443,7 +449,7 @@ static int mem_check(struct sock *sk)
 	if (room > FL_MAX_SIZE - FL_MAX_PER_SOCK)
 		return 0;
 
-	for (sfl = np->ipv6_fl_list; sfl; sfl = sfl->next)
+	for_each_sk_fl_rcu(np, sfl)
 		count++;
 
 	if (room <= 0 ||
@@ -486,11 +492,11 @@ static bool ipv6_opt_cmp(struct ipv6_txoptions *o1, struct ipv6_txoptions *o2)
 static inline void fl_link(struct ipv6_pinfo *np, struct ipv6_fl_socklist *sfl,
 		struct ip6_flowlabel *fl)
 {
-	write_lock_bh(&ip6_sk_fl_lock);
+	spin_lock_bh(&ip6_sk_fl_lock);
 	sfl->fl = fl;
 	sfl->next = np->ipv6_fl_list;
-	np->ipv6_fl_list = sfl;
-	write_unlock_bh(&ip6_sk_fl_lock);
+	rcu_assign_pointer(np->ipv6_fl_list, sfl);
+	spin_unlock_bh(&ip6_sk_fl_lock);
 }
 
 int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
@@ -512,31 +518,33 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
 
 	switch (freq.flr_action) {
 	case IPV6_FL_A_PUT:
-		write_lock_bh(&ip6_sk_fl_lock);
-		for (sflp = &np->ipv6_fl_list; (sfl=*sflp)!=NULL; sflp = &sfl->next) {
+		spin_lock_bh(&ip6_sk_fl_lock);
+		for (sflp = &np->ipv6_fl_list;
+		     (sfl = rcu_dereference(*sflp))!=NULL;
+		     sflp = &sfl->next) {
 			if (sfl->fl->label == freq.flr_label) {
 				if (freq.flr_label == (np->flow_label&IPV6_FLOWLABEL_MASK))
 					np->flow_label &= ~IPV6_FLOWLABEL_MASK;
-				*sflp = sfl->next;
-				write_unlock_bh(&ip6_sk_fl_lock);
+				*sflp = rcu_dereference(sfl->next);
+				spin_unlock_bh(&ip6_sk_fl_lock);
 				fl_release(sfl->fl);
-				kfree(sfl);
+				kfree_rcu(sfl, rcu);
 				return 0;
 			}
 		}
-		write_unlock_bh(&ip6_sk_fl_lock);
+		spin_unlock_bh(&ip6_sk_fl_lock);
 		return -ESRCH;
 
 	case IPV6_FL_A_RENEW:
-		read_lock_bh(&ip6_sk_fl_lock);
-		for (sfl = np->ipv6_fl_list; sfl; sfl = sfl->next) {
+		rcu_read_lock_bh();
+		for_each_sk_fl_rcu(np, sfl) {
 			if (sfl->fl->label == freq.flr_label) {
 				err = fl6_renew(sfl->fl, freq.flr_linger, freq.flr_expires);
-				read_unlock_bh(&ip6_sk_fl_lock);
+				rcu_read_unlock_bh();
 				return err;
 			}
 		}
-		read_unlock_bh(&ip6_sk_fl_lock);
+		rcu_read_unlock_bh();
 
 		if (freq.flr_share == IPV6_FL_S_NONE &&
 		    ns_capable(net->user_ns, CAP_NET_ADMIN)) {
@@ -560,11 +568,11 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
 
 		if (freq.flr_label) {
 			err = -EEXIST;
-			read_lock_bh(&ip6_sk_fl_lock);
-			for (sfl = np->ipv6_fl_list; sfl; sfl = sfl->next) {
+			rcu_read_lock_bh();
+			for_each_sk_fl_rcu(np, sfl) {
 				if (sfl->fl->label == freq.flr_label) {
 					if (freq.flr_flags&IPV6_FL_F_EXCL) {
-						read_unlock_bh(&ip6_sk_fl_lock);
+						rcu_read_unlock_bh();
 						goto done;
 					}
 					fl1 = sfl->fl;
@@ -572,7 +580,7 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
 					break;
 				}
 			}
-			read_unlock_bh(&ip6_sk_fl_lock);
+			rcu_read_unlock_bh();
 
 			if (fl1 == NULL)
 				fl1 = fl_lookup(net, freq.flr_label);
-- 
cgit v1.2.3


From d560040f7d6fbe0a2990b8f6edca1815e19e72f5 Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@free-electrons.com>
Date: Thu, 31 Jan 2013 11:12:56 +0100
Subject: spi: spi-gpio: fix compilation warning on 64 bits systems

SPI_GPIO_NO_MOSI and SPI_GPIO_NO_MISO flags are type casted to unsigned
long, yet, they are to be stored in an unsigned int field in the
spi_gpio_platform_data structure.

This leads to the following warning during compilation on 64 bits systems:
warning: large integer implicitly truncated to unsigned type [-Woverflow]

Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
Reported-by: Fengguang Wu <fengguang.wu@intel.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 include/linux/spi/spi_gpio.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/spi/spi_gpio.h b/include/linux/spi/spi_gpio.h
index 369b3d7d5b95..1634ce31c06d 100644
--- a/include/linux/spi/spi_gpio.h
+++ b/include/linux/spi/spi_gpio.h
@@ -62,8 +62,8 @@
  */
 struct spi_gpio_platform_data {
 	unsigned	sck;
-	unsigned	mosi;
-	unsigned	miso;
+	unsigned long	mosi;
+	unsigned long	miso;
 
 	u16		num_chipselect;
 };
-- 
cgit v1.2.3


From 2663960c159f23cbfb8e196c96e9fc9f3b5f1a8d Mon Sep 17 00:00:00 2001
From: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Date: Tue, 22 Jan 2013 22:24:23 -0800
Subject: perf: Make EVENT_ATTR global

Rename EVENT_ATTR() to PMU_EVENT_ATTR() and make it global so it is
available to all architectures.

Further to allow architectures flexibility, have PMU_EVENT_ATTR() pass
in the variable name as a parameter.

Changelog[v2]
	- [Jiri Olsa] No need to define PMU_EVENT_PTR()

Signed-off-by: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Anton Blanchard <anton@au1.ibm.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: linuxppc-dev@ozlabs.org
Link: http://lkml.kernel.org/r/20130123062422.GC13720@us.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 arch/x86/kernel/cpu/perf_event.c | 13 +++----------
 include/linux/perf_event.h       | 11 +++++++++++
 2 files changed, 14 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 6774c17a5576..c0df5ed2e048 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1310,11 +1310,6 @@ static struct attribute_group x86_pmu_format_group = {
 	.attrs = NULL,
 };
 
-struct perf_pmu_events_attr {
-	struct device_attribute attr;
-	u64 id;
-};
-
 /*
  * Remove all undefined events (x86_pmu.event_map(id) == 0)
  * out of events_attr attributes.
@@ -1348,11 +1343,9 @@ static ssize_t events_sysfs_show(struct device *dev, struct device_attribute *at
 #define EVENT_VAR(_id)  event_attr_##_id
 #define EVENT_PTR(_id) &event_attr_##_id.attr.attr
 
-#define EVENT_ATTR(_name, _id)					\
-static struct perf_pmu_events_attr EVENT_VAR(_id) = {		\
-	.attr = __ATTR(_name, 0444, events_sysfs_show, NULL),	\
-	.id   =  PERF_COUNT_HW_##_id,				\
-};
+#define EVENT_ATTR(_name, _id)						\
+	PMU_EVENT_ATTR(_name, EVENT_VAR(_id), PERF_COUNT_HW_##_id,	\
+			events_sysfs_show)
 
 EVENT_ATTR(cpu-cycles,			CPU_CYCLES		);
 EVENT_ATTR(instructions,		INSTRUCTIONS		);
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 6bfb2faa0b19..42adf012145d 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -817,6 +817,17 @@ do {									\
 } while (0)
 
 
+struct perf_pmu_events_attr {
+	struct device_attribute attr;
+	u64 id;
+};
+
+#define PMU_EVENT_ATTR(_name, _var, _id, _show)				\
+static struct perf_pmu_events_attr _var = {				\
+	.attr = __ATTR(_name, 0444, _show, NULL),			\
+	.id   =  _id,							\
+};
+
 #define PMU_FORMAT_ATTR(_name, _format)					\
 static ssize_t								\
 _name##_show(struct device *dev,					\
-- 
cgit v1.2.3


From 955154fa33df2b74f0fea8e7c84df6dfd954dab2 Mon Sep 17 00:00:00 2001
From: Matan Barak <matanb@mellanox.com>
Date: Wed, 30 Jan 2013 23:07:10 +0000
Subject: net/mlx4_en: Don't reassign port mac address on firmware that
 supports it

Mac reassignments should only be done when not supported by the firmware. To
accomplish that, checking firmware capability bit to know whether we should
reassign macs in the driver.

Signed-off-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Amir Vadai <amirv@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 3 ++-
 drivers/net/ethernet/mellanox/mlx4/fw.c        | 7 ++++++-
 include/linux/mlx4/device.h                    | 3 ++-
 3 files changed, 10 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 333a7a0b833c..7b513e9aea85 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -1285,7 +1285,8 @@ void mlx4_en_stop_port(struct net_device *dev)
 
 	/* Unregister Mac address for the port */
 	mlx4_put_eth_qp(mdev->dev, priv->port, priv->mac, priv->base_qpn);
-	mdev->mac_removed[priv->port] = 1;
+	if (!(mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAGS2_REASSIGN_MAC_EN))
+		mdev->mac_removed[priv->port] = 1;
 
 	/* Remove flow steering rules for the port*/
 	if (mdev->dev->caps.steering_mode ==
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index 91acf71aca97..38b62c78d5da 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -127,7 +127,8 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags)
 		[0] = "RSS support",
 		[1] = "RSS Toeplitz Hash Function support",
 		[2] = "RSS XOR Hash Function support",
-		[3] = "Device manage flow steering support"
+		[3] = "Device manage flow steering support",
+		[4] = "Automatic mac reassignment support"
 	};
 	int i;
 
@@ -478,6 +479,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 #define QUERY_DEV_CAP_BMME_FLAGS_OFFSET		0x94
 #define QUERY_DEV_CAP_RSVD_LKEY_OFFSET		0x98
 #define QUERY_DEV_CAP_MAX_ICM_SZ_OFFSET		0xa0
+#define QUERY_DEV_CAP_FW_REASSIGN_MAC		0x9d
 
 	dev_cap->flags2 = 0;
 	mailbox = mlx4_alloc_cmd_mailbox(dev);
@@ -637,6 +639,9 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 		 QUERY_DEV_CAP_BMME_FLAGS_OFFSET);
 	MLX4_GET(dev_cap->reserved_lkey, outbox,
 		 QUERY_DEV_CAP_RSVD_LKEY_OFFSET);
+	MLX4_GET(field, outbox, QUERY_DEV_CAP_FW_REASSIGN_MAC);
+	if (field & 1<<6)
+		dev_cap->flags2 |= MLX4_DEV_CAP_FLAGS2_REASSIGN_MAC_EN;
 	MLX4_GET(dev_cap->max_icm_sz, outbox,
 		 QUERY_DEV_CAP_MAX_ICM_SZ_OFFSET);
 	if (dev_cap->flags & MLX4_DEV_CAP_FLAG_COUNTERS)
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 20ea939c22a6..1883e8e84718 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -150,7 +150,8 @@ enum {
 	MLX4_DEV_CAP_FLAG2_RSS			= 1LL <<  0,
 	MLX4_DEV_CAP_FLAG2_RSS_TOP		= 1LL <<  1,
 	MLX4_DEV_CAP_FLAG2_RSS_XOR		= 1LL <<  2,
-	MLX4_DEV_CAP_FLAG2_FS_EN		= 1LL <<  3
+	MLX4_DEV_CAP_FLAG2_FS_EN		= 1LL <<  3,
+	MLX4_DEV_CAP_FLAGS2_REASSIGN_MAC_EN	= 1LL <<  4
 };
 
 enum {
-- 
cgit v1.2.3


From 12572dbb53638c6e454ef831c8fee7de3df24389 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 14 Jan 2013 17:05:21 +0000
Subject: clockevents: Add generic timer broadcast receiver

Currently the broadcast mechanism used for timers is abstracted by a
function pointer on struct clock_event_device. As the fundamental
mechanism for broadcast is architecture-specific, this ties each
clock_event_device driver to a single architecture, even where the
driver is otherwise generic.

This patch adds a standard path for the receipt of timer broadcasts, so
drivers and/or architecture backends need not manage redundant lists of
timers for the purpose of routing broadcast timer ticks.

[tglx: Made the implementation depend on the config switch as well ]

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: nico@linaro.org
Cc: Will.Deacon@arm.com
Cc: Marc.Zyngier@arm.com
Cc: john.stultz@linaro.org
Link: http://lkml.kernel.org/r/1358183124-28461-2-git-send-email-mark.rutland@arm.com
Tested-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/clockchips.h   |  4 ++++
 kernel/time/tick-broadcast.c | 17 +++++++++++++++++
 2 files changed, 21 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h
index 8a7096fcb01e..e1089aa7816d 100644
--- a/include/linux/clockchips.h
+++ b/include/linux/clockchips.h
@@ -161,6 +161,10 @@ clockevents_calc_mult_shift(struct clock_event_device *ce, u32 freq, u32 minsec)
 extern void clockevents_suspend(void);
 extern void clockevents_resume(void);
 
+#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
+extern int tick_receive_broadcast(void);
+#endif
+
 #ifdef CONFIG_GENERIC_CLOCKEVENTS
 extern void clockevents_notify(unsigned long reason, void *arg);
 #else
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index f113755695e2..7cc81c57eb31 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -125,6 +125,23 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu)
 	return ret;
 }
 
+#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
+int tick_receive_broadcast(void)
+{
+	struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
+	struct clock_event_device *evt = td->evtdev;
+
+	if (!evt)
+		return -ENODEV;
+
+	if (!evt->event_handler)
+		return -EINVAL;
+
+	evt->event_handler(evt);
+	return 0;
+}
+#endif
+
 /*
  * Broadcast the event to the cpus, which are set in the mask (mangled).
  */
-- 
cgit v1.2.3


From 12ad10004645d38356b14d1fbba379c523a61916 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 14 Jan 2013 17:05:22 +0000
Subject: clockevents: Add generic timer broadcast function

Currently, the timer broadcast mechanism is defined by a function
pointer on struct clock_event_device. As the fundamental mechanism for
broadcast is architecture-specific, this means that clock_event_device
drivers cannot be shared across multiple architectures.

This patch adds an (optional) architecture-specific function for timer
tick broadcast, allowing drivers which may require broadcast
functionality to be shared across multiple architectures.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: nico@linaro.org
Cc: Will.Deacon@arm.com
Cc: Marc.Zyngier@arm.com
Cc: john.stultz@linaro.org
Link: http://lkml.kernel.org/r/1358183124-28461-3-git-send-email-mark.rutland@arm.com
Tested-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/clockchips.h   |  5 +++++
 kernel/time/Kconfig          |  4 ++++
 kernel/time/tick-broadcast.c | 13 +++++++++++++
 3 files changed, 22 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h
index e1089aa7816d..66346521cb65 100644
--- a/include/linux/clockchips.h
+++ b/include/linux/clockchips.h
@@ -162,6 +162,11 @@ extern void clockevents_suspend(void);
 extern void clockevents_resume(void);
 
 #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
+#ifdef CONFIG_ARCH_HAS_TICK_BROADCAST
+extern void tick_broadcast(const struct cpumask *mask);
+#else
+#define tick_broadcast	NULL
+#endif
 extern int tick_receive_broadcast(void);
 #endif
 
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig
index 8601f0db1261..b69692250af4 100644
--- a/kernel/time/Kconfig
+++ b/kernel/time/Kconfig
@@ -38,6 +38,10 @@ config GENERIC_CLOCKEVENTS_BUILD
 	default y
 	depends on GENERIC_CLOCKEVENTS
 
+# Architecture can handle broadcast in a driver-agnostic way
+config ARCH_HAS_TICK_BROADCAST
+	bool
+
 # Clockevents broadcasting infrastructure
 config GENERIC_CLOCKEVENTS_BROADCAST
 	bool
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index 7cc81c57eb31..f726537d24eb 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -18,6 +18,7 @@
 #include <linux/percpu.h>
 #include <linux/profile.h>
 #include <linux/sched.h>
+#include <linux/smp.h>
 
 #include "tick-internal.h"
 
@@ -86,6 +87,11 @@ int tick_is_broadcast_device(struct clock_event_device *dev)
 	return (dev && tick_broadcast_device.evtdev == dev);
 }
 
+static void err_broadcast(const struct cpumask *mask)
+{
+	pr_crit_once("Failed to broadcast timer tick. Some CPUs may be unresponsive.\n");
+}
+
 /*
  * Check, if the device is disfunctional and a place holder, which
  * needs to be handled by the broadcast device.
@@ -105,6 +111,13 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu)
 	 */
 	if (!tick_device_is_functional(dev)) {
 		dev->event_handler = tick_handle_periodic;
+		if (!dev->broadcast)
+			dev->broadcast = tick_broadcast;
+		if (!dev->broadcast) {
+			pr_warn_once("%s depends on broadcast, but no broadcast function available\n",
+				     dev->name);
+			dev->broadcast = err_broadcast;
+		}
 		cpumask_set_cpu(cpu, tick_get_broadcast_mask());
 		tick_broadcast_start_periodic(tick_broadcast_device.evtdev);
 		ret = 1;
-- 
cgit v1.2.3


From 6fcdf4facb85e7d54ff6195378dd2ba8e0baccc4 Mon Sep 17 00:00:00 2001
From: Paul Gortmaker <paul.gortmaker@windriver.com>
Date: Wed, 30 Jan 2013 21:50:08 -0500
Subject: wanrouter: delete now orphaned header content, files/drivers

The wanrouter support was identified earlier as unused for years,
and so the previous commit totally decoupled it from the kernel,
leaving the related wanrouter files present, but totally inert.

Here we take the final step in that cleanup, by doing a wholesale
removal of these files.  The two step process is used so that the
large deletion is decoupled from the git history of files that we
still care about.

The drivers deleted here all were dependent on the Kconfig setting
CONFIG_WAN_ROUTER_DRIVERS.

A stub wanrouter.h header (kernel & uapi) are left behind so that
drivers/isdn/i4l/isdn_x25iface.c continues to compile, and so that
we don't accidentally break userspace that expected these defines.

Cc: Joe Perches <joe@perches.com>
Cc: Dan Carpenter <dan.carpenter@oracle.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
---
 drivers/net/wan/cycx_drv.c     |  569 --------------
 drivers/net/wan/cycx_main.c    |  346 ---------
 drivers/net/wan/cycx_x25.c     | 1602 ----------------------------------------
 include/linux/cyclomx.h        |   77 --
 include/linux/cycx_drv.h       |   64 --
 include/linux/wanrouter.h      |  127 +---
 include/uapi/linux/wanrouter.h |  443 +----------
 net/wanrouter/Kconfig          |   27 -
 net/wanrouter/Makefile         |    7 -
 net/wanrouter/patchlevel       |    1 -
 net/wanrouter/wanmain.c        |  782 --------------------
 net/wanrouter/wanproc.c        |  380 ----------
 12 files changed, 8 insertions(+), 4417 deletions(-)
 delete mode 100644 drivers/net/wan/cycx_drv.c
 delete mode 100644 drivers/net/wan/cycx_main.c
 delete mode 100644 drivers/net/wan/cycx_x25.c
 delete mode 100644 include/linux/cyclomx.h
 delete mode 100644 include/linux/cycx_drv.h
 delete mode 100644 net/wanrouter/Kconfig
 delete mode 100644 net/wanrouter/Makefile
 delete mode 100644 net/wanrouter/patchlevel
 delete mode 100644 net/wanrouter/wanmain.c
 delete mode 100644 net/wanrouter/wanproc.c

(limited to 'include/linux')

diff --git a/drivers/net/wan/cycx_drv.c b/drivers/net/wan/cycx_drv.c
deleted file mode 100644
index 2a3ecae67a90..000000000000
--- a/drivers/net/wan/cycx_drv.c
+++ /dev/null
@@ -1,569 +0,0 @@
-/*
-* cycx_drv.c	Cyclom 2X Support Module.
-*
-*		This module is a library of common hardware specific
-*		functions used by the Cyclades Cyclom 2X sync card.
-*
-* Author:	Arnaldo Carvalho de Melo <acme@conectiva.com.br>
-*
-* Copyright:	(c) 1998-2003 Arnaldo Carvalho de Melo
-*
-* Based on sdladrv.c by Gene Kozin <genek@compuserve.com>
-*
-*		This program is free software; you can redistribute it and/or
-*		modify it under the terms of the GNU General Public License
-*		as published by the Free Software Foundation; either version
-*		2 of the License, or (at your option) any later version.
-* ============================================================================
-* 1999/11/11	acme		set_current_state(TASK_INTERRUPTIBLE), code
-*				cleanup
-* 1999/11/08	acme		init_cyc2x deleted, doing nothing
-* 1999/11/06	acme		back to read[bw], write[bw] and memcpy_to and
-*				fromio to use dpmbase ioremaped
-* 1999/10/26	acme		use isa_read[bw], isa_write[bw] & isa_memcpy_to
-*				& fromio
-* 1999/10/23	acme		cleanup to only supports cyclom2x: all the other
-*				boards are no longer manufactured by cyclades,
-*				if someone wants to support them... be my guest!
-* 1999/05/28    acme		cycx_intack & cycx_intde gone for good
-* 1999/05/18	acme		lots of unlogged work, submitting to Linus...
-* 1999/01/03	acme		more judicious use of data types
-* 1999/01/03	acme		judicious use of data types :>
-*				cycx_inten trying to reset pending interrupts
-*				from cyclom 2x - I think this isn't the way to
-*				go, but for now...
-* 1999/01/02	acme		cycx_intack ok, I think there's nothing to do
-*				to ack an int in cycx_drv.c, only handle it in
-*				cyx_isr (or in the other protocols: cyp_isr,
-*				cyf_isr, when they get implemented.
-* Dec 31, 1998	acme		cycx_data_boot & cycx_code_boot fixed, crossing
-*				fingers to see x25_configure in cycx_x25.c
-*				work... :)
-* Dec 26, 1998	acme		load implementation fixed, seems to work! :)
-*				cycx_2x_dpmbase_options with all the possible
-*				DPM addresses (20).
-*				cycx_intr implemented (test this!)
-*				general code cleanup
-* Dec  8, 1998	Ivan Passos	Cyclom-2X firmware load implementation.
-* Aug  8, 1998	acme		Initial version.
-*/
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/init.h>		/* __init */
-#include <linux/module.h>
-#include <linux/kernel.h>	/* printk(), and other useful stuff */
-#include <linux/stddef.h>	/* offsetof(), etc. */
-#include <linux/errno.h>	/* return codes */
-#include <linux/cycx_drv.h>	/* API definitions */
-#include <linux/cycx_cfm.h>	/* CYCX firmware module definitions */
-#include <linux/delay.h>	/* udelay, msleep_interruptible */
-#include <asm/io.h>		/* read[wl], write[wl], ioremap, iounmap */
-
-#define	MOD_VERSION	0
-#define	MOD_RELEASE	6
-
-MODULE_AUTHOR("Arnaldo Carvalho de Melo");
-MODULE_DESCRIPTION("Cyclom 2x Sync Card Driver");
-MODULE_LICENSE("GPL");
-
-/* Hardware-specific functions */
-static int load_cyc2x(struct cycx_hw *hw, struct cycx_firmware *cfm, u32 len);
-static void cycx_bootcfg(struct cycx_hw *hw);
-
-static int reset_cyc2x(void __iomem *addr);
-static int detect_cyc2x(void __iomem *addr);
-
-/* Miscellaneous functions */
-static int get_option_index(const long *optlist, long optval);
-static u16 checksum(u8 *buf, u32 len);
-
-#define wait_cyc(addr) cycx_exec(addr + CMD_OFFSET)
-
-/* Global Data */
-
-/* private data */
-static const char fullname[] = "Cyclom 2X Support Module";
-static const char copyright[] =
-	"(c) 1998-2003 Arnaldo Carvalho de Melo <acme@conectiva.com.br>";
-
-/* Hardware configuration options.
- * These are arrays of configuration options used by verification routines.
- * The first element of each array is its size (i.e. number of options).
- */
-static const long cyc2x_dpmbase_options[] = {
-	20,
-	0xA0000, 0xA4000, 0xA8000, 0xAC000, 0xB0000, 0xB4000, 0xB8000,
-	0xBC000, 0xC0000, 0xC4000, 0xC8000, 0xCC000, 0xD0000, 0xD4000,
-	0xD8000, 0xDC000, 0xE0000, 0xE4000, 0xE8000, 0xEC000
-};
-
-static const long cycx_2x_irq_options[]  = { 7, 3, 5, 9, 10, 11, 12, 15 };
-
-/* Kernel Loadable Module Entry Points */
-/* Module 'insert' entry point.
- * o print announcement
- * o initialize static data
- *
- * Return:	0	Ok
- *		< 0	error.
- * Context:	process */
-
-static int __init cycx_drv_init(void)
-{
-	pr_info("%s v%u.%u %s\n",
-		fullname, MOD_VERSION, MOD_RELEASE, copyright);
-
-	return 0;
-}
-
-/* Module 'remove' entry point.
- * o release all remaining system resources */
-static void cycx_drv_cleanup(void)
-{
-}
-
-/* Kernel APIs */
-/* Set up adapter.
- * o detect adapter type
- * o verify hardware configuration options
- * o check for hardware conflicts
- * o set up adapter shared memory
- * o test adapter memory
- * o load firmware
- * Return:	0	ok.
- *		< 0	error */
-EXPORT_SYMBOL(cycx_setup);
-int cycx_setup(struct cycx_hw *hw, void *cfm, u32 len, unsigned long dpmbase)
-{
-	int err;
-
-	/* Verify IRQ configuration options */
-	if (!get_option_index(cycx_2x_irq_options, hw->irq)) {
-		pr_err("IRQ %d is invalid!\n", hw->irq);
-		return -EINVAL;
-	}
-
-	/* Setup adapter dual-port memory window and test memory */
-	if (!dpmbase) {
-		pr_err("you must specify the dpm address!\n");
- 		return -EINVAL;
-	} else if (!get_option_index(cyc2x_dpmbase_options, dpmbase)) {
-		pr_err("memory address 0x%lX is invalid!\n", dpmbase);
-		return -EINVAL;
-	}
-
-	hw->dpmbase = ioremap(dpmbase, CYCX_WINDOWSIZE);
-	hw->dpmsize = CYCX_WINDOWSIZE;
-
-	if (!detect_cyc2x(hw->dpmbase)) {
-		pr_err("adapter Cyclom 2X not found at address 0x%lX!\n",
-		       dpmbase);
-		return -EINVAL;
-	}
-
-	pr_info("found Cyclom 2X card at address 0x%lX\n", dpmbase);
-
-	/* Load firmware. If loader fails then shut down adapter */
-	err = load_cyc2x(hw, cfm, len);
-
-	if (err)
-		cycx_down(hw);         /* shutdown adapter */
-
-	return err;
-}
-
-EXPORT_SYMBOL(cycx_down);
-int cycx_down(struct cycx_hw *hw)
-{
-	iounmap(hw->dpmbase);
-	return 0;
-}
-
-/* Enable interrupt generation.  */
-static void cycx_inten(struct cycx_hw *hw)
-{
-	writeb(0, hw->dpmbase);
-}
-
-/* Generate an interrupt to adapter's CPU. */
-EXPORT_SYMBOL(cycx_intr);
-void cycx_intr(struct cycx_hw *hw)
-{
-	writew(0, hw->dpmbase + GEN_CYCX_INTR);
-}
-
-/* Execute Adapter Command.
- * o Set exec flag.
- * o Busy-wait until flag is reset. */
-EXPORT_SYMBOL(cycx_exec);
-int cycx_exec(void __iomem *addr)
-{
-	u16 i = 0;
-	/* wait till addr content is zeroed */
-
-	while (readw(addr)) {
-		udelay(1000);
-
-		if (++i > 50)
-			return -1;
-	}
-
-	return 0;
-}
-
-/* Read absolute adapter memory.
- * Transfer data from adapter's memory to data buffer. */
-EXPORT_SYMBOL(cycx_peek);
-int cycx_peek(struct cycx_hw *hw, u32 addr, void *buf, u32 len)
-{
-	if (len == 1)
-		*(u8*)buf = readb(hw->dpmbase + addr);
-	else
-		memcpy_fromio(buf, hw->dpmbase + addr, len);
-
-	return 0;
-}
-
-/* Write Absolute Adapter Memory.
- * Transfer data from data buffer to adapter's memory. */
-EXPORT_SYMBOL(cycx_poke);
-int cycx_poke(struct cycx_hw *hw, u32 addr, void *buf, u32 len)
-{
-	if (len == 1)
-		writeb(*(u8*)buf, hw->dpmbase + addr);
-	else
-		memcpy_toio(hw->dpmbase + addr, buf, len);
-
-	return 0;
-}
-
-/* Hardware-Specific Functions */
-
-/* Load Aux Routines */
-/* Reset board hardware.
-   return 1 if memory exists at addr and 0 if not. */
-static int memory_exists(void __iomem *addr)
-{
-	int tries = 0;
-
-	for (; tries < 3 ; tries++) {
-		writew(TEST_PATTERN, addr + 0x10);
-
-		if (readw(addr + 0x10) == TEST_PATTERN)
-			if (readw(addr + 0x10) == TEST_PATTERN)
-				return 1;
-
-		msleep_interruptible(1 * 1000);
-	}
-
-	return 0;
-}
-
-/* Load reset code. */
-static void reset_load(void __iomem *addr, u8 *buffer, u32 cnt)
-{
-	void __iomem *pt_code = addr + RESET_OFFSET;
-	u16 i; /*, j; */
-
-	for (i = 0 ; i < cnt ; i++) {
-/*		for (j = 0 ; j < 50 ; j++); Delay - FIXME busy waiting... */
-		writeb(*buffer++, pt_code++);
-	}
-}
-
-/* Load buffer using boot interface.
- * o copy data from buffer to Cyclom-X memory
- * o wait for reset code to copy it to right portion of memory */
-static int buffer_load(void __iomem *addr, u8 *buffer, u32 cnt)
-{
-	memcpy_toio(addr + DATA_OFFSET, buffer, cnt);
-	writew(GEN_BOOT_DAT, addr + CMD_OFFSET);
-
-	return wait_cyc(addr);
-}
-
-/* Set up entry point and kick start Cyclom-X CPU. */
-static void cycx_start(void __iomem *addr)
-{
-	/* put in 0x30 offset the jump instruction to the code entry point */
-	writeb(0xea, addr + 0x30);
-	writeb(0x00, addr + 0x31);
-	writeb(0xc4, addr + 0x32);
-	writeb(0x00, addr + 0x33);
-	writeb(0x00, addr + 0x34);
-
-	/* cmd to start executing code */
-	writew(GEN_START, addr + CMD_OFFSET);
-}
-
-/* Load and boot reset code. */
-static void cycx_reset_boot(void __iomem *addr, u8 *code, u32 len)
-{
-	void __iomem *pt_start = addr + START_OFFSET;
-
-	writeb(0xea, pt_start++); /* jmp to f000:3f00 */
-	writeb(0x00, pt_start++);
-	writeb(0xfc, pt_start++);
-	writeb(0x00, pt_start++);
-	writeb(0xf0, pt_start);
-	reset_load(addr, code, len);
-
-	/* 80186 was in hold, go */
-	writeb(0, addr + START_CPU);
-	msleep_interruptible(1 * 1000);
-}
-
-/* Load data.bin file through boot (reset) interface. */
-static int cycx_data_boot(void __iomem *addr, u8 *code, u32 len)
-{
-	void __iomem *pt_boot_cmd = addr + CMD_OFFSET;
-	u32 i;
-
-	/* boot buffer length */
-	writew(CFM_LOAD_BUFSZ, pt_boot_cmd + sizeof(u16));
-	writew(GEN_DEFPAR, pt_boot_cmd);
-
-	if (wait_cyc(addr) < 0)
-		return -1;
-
-	writew(0, pt_boot_cmd + sizeof(u16));
-	writew(0x4000, pt_boot_cmd + 2 * sizeof(u16));
-	writew(GEN_SET_SEG, pt_boot_cmd);
-
-	if (wait_cyc(addr) < 0)
-		return -1;
-
-	for (i = 0 ; i < len ; i += CFM_LOAD_BUFSZ)
-		if (buffer_load(addr, code + i,
-				min_t(u32, CFM_LOAD_BUFSZ, (len - i))) < 0) {
-			pr_err("Error !!\n");
-			return -1;
-		}
-
-	return 0;
-}
-
-
-/* Load code.bin file through boot (reset) interface. */
-static int cycx_code_boot(void __iomem *addr, u8 *code, u32 len)
-{
-	void __iomem *pt_boot_cmd = addr + CMD_OFFSET;
-	u32 i;
-
-	/* boot buffer length */
-	writew(CFM_LOAD_BUFSZ, pt_boot_cmd + sizeof(u16));
-	writew(GEN_DEFPAR, pt_boot_cmd);
-
-	if (wait_cyc(addr) < 0)
-		return -1;
-
-	writew(0x0000, pt_boot_cmd + sizeof(u16));
-	writew(0xc400, pt_boot_cmd + 2 * sizeof(u16));
-	writew(GEN_SET_SEG, pt_boot_cmd);
-
-	if (wait_cyc(addr) < 0)
-		return -1;
-
-	for (i = 0 ; i < len ; i += CFM_LOAD_BUFSZ)
-		if (buffer_load(addr, code + i,
-				min_t(u32, CFM_LOAD_BUFSZ, (len - i)))) {
-			pr_err("Error !!\n");
-			return -1;
-		}
-
-	return 0;
-}
-
-/* Load adapter from the memory image of the CYCX firmware module.
- * o verify firmware integrity and compatibility
- * o start adapter up */
-static int load_cyc2x(struct cycx_hw *hw, struct cycx_firmware *cfm, u32 len)
-{
-	int i, j;
-	struct cycx_fw_header *img_hdr;
-	u8 *reset_image,
-	   *data_image,
-	   *code_image;
-	void __iomem *pt_cycld = hw->dpmbase + 0x400;
-	u16 cksum;
-
-	/* Announce */
-	pr_info("firmware signature=\"%s\"\n", cfm->signature);
-
-	/* Verify firmware signature */
-	if (strcmp(cfm->signature, CFM_SIGNATURE)) {
-		pr_err("load_cyc2x: not Cyclom-2X firmware!\n");
-		return -EINVAL;
-	}
-
-	pr_info("firmware version=%u\n", cfm->version);
-
-	/* Verify firmware module format version */
-	if (cfm->version != CFM_VERSION) {
-		pr_err("%s: firmware format %u rejected! Expecting %u.\n",
-		       __func__, cfm->version, CFM_VERSION);
-		return -EINVAL;
-	}
-
-	/* Verify firmware module length and checksum */
-	cksum = checksum((u8*)&cfm->info, sizeof(struct cycx_fw_info) +
-					  cfm->info.codesize);
-/*
-	FIXME cfm->info.codesize is off by 2
-	if (((len - sizeof(struct cycx_firmware) - 1) != cfm->info.codesize) ||
-*/
-	if (cksum != cfm->checksum) {
-		pr_err("%s: firmware corrupted!\n", __func__);
-		pr_err(" cdsize = 0x%x (expected 0x%lx)\n",
-		       len - (int)sizeof(struct cycx_firmware) - 1,
-		       cfm->info.codesize);
-		pr_err(" chksum = 0x%x (expected 0x%x)\n",
-		       cksum, cfm->checksum);
-		return -EINVAL;
-	}
-
-	/* If everything is ok, set reset, data and code pointers */
-	img_hdr = (struct cycx_fw_header *)&cfm->image;
-#ifdef FIRMWARE_DEBUG
-	pr_info("%s: image sizes\n", __func__);
-	pr_info(" reset=%lu\n", img_hdr->reset_size);
-	pr_info("  data=%lu\n", img_hdr->data_size);
-	pr_info("  code=%lu\n", img_hdr->code_size);
-#endif
-	reset_image = ((u8 *)img_hdr) + sizeof(struct cycx_fw_header);
-	data_image = reset_image + img_hdr->reset_size;
-	code_image = data_image + img_hdr->data_size;
-
-	/*---- Start load ----*/
-	/* Announce */
-	pr_info("loading firmware %s (ID=%u)...\n",
-		cfm->descr[0] ? cfm->descr : "unknown firmware",
-		cfm->info.codeid);
-
-	for (i = 0 ; i < 5 ; i++) {
-		/* Reset Cyclom hardware */
-		if (!reset_cyc2x(hw->dpmbase)) {
-			pr_err("dpm problem or board not found\n");
-			return -EINVAL;
-		}
-
-		/* Load reset.bin */
-		cycx_reset_boot(hw->dpmbase, reset_image, img_hdr->reset_size);
-		/* reset is waiting for boot */
-		writew(GEN_POWER_ON, pt_cycld);
-		msleep_interruptible(1 * 1000);
-
-		for (j = 0 ; j < 3 ; j++)
-			if (!readw(pt_cycld))
-				goto reset_loaded;
-			else
-				msleep_interruptible(1 * 1000);
-	}
-
-	pr_err("reset not started\n");
-	return -EINVAL;
-
-reset_loaded:
-	/* Load data.bin */
-	if (cycx_data_boot(hw->dpmbase, data_image, img_hdr->data_size)) {
-		pr_err("cannot load data file\n");
-		return -EINVAL;
-	}
-
-	/* Load code.bin */
-	if (cycx_code_boot(hw->dpmbase, code_image, img_hdr->code_size)) {
-		pr_err("cannot load code file\n");
-		return -EINVAL;
-	}
-
-	/* Prepare boot-time configuration data */
-	cycx_bootcfg(hw);
-
-	/* kick-off CPU */
-	cycx_start(hw->dpmbase);
-
-	/* Arthur Ganzert's tip: wait a while after the firmware loading...
-	   seg abr 26 17:17:12 EST 1999 - acme */
-	msleep_interruptible(7 * 1000);
-	pr_info("firmware loaded!\n");
-
-	/* enable interrupts */
-	cycx_inten(hw);
-
-	return 0;
-}
-
-/* Prepare boot-time firmware configuration data.
- * o initialize configuration data area
-   From async.doc - V_3.4.0 - 07/18/1994
-   - As of now, only static buffers are available to the user.
-     So, the bit VD_RXDIRC must be set in 'valid'. That means that user
-     wants to use the static transmission and reception buffers. */
-static void cycx_bootcfg(struct cycx_hw *hw)
-{
-	/* use fixed buffers */
-	writeb(FIXED_BUFFERS, hw->dpmbase + CONF_OFFSET);
-}
-
-/* Detect Cyclom 2x adapter.
- *	Following tests are used to detect Cyclom 2x adapter:
- *       to be completed based on the tests done below
- *	Return 1 if detected o.k. or 0 if failed.
- *	Note:	This test is destructive! Adapter will be left in shutdown
- *		state after the test. */
-static int detect_cyc2x(void __iomem *addr)
-{
-	reset_cyc2x(addr);
-
-	return memory_exists(addr);
-}
-
-/* Miscellaneous */
-/* Get option's index into the options list.
- *	Return option's index (1 .. N) or zero if option is invalid. */
-static int get_option_index(const long *optlist, long optval)
-{
-	int i = 1;
-
-	for (; i <= optlist[0]; ++i)
-		if (optlist[i] == optval)
-			return i;
-
-	return 0;
-}
-
-/* Reset adapter's CPU. */
-static int reset_cyc2x(void __iomem *addr)
-{
-	writeb(0, addr + RST_ENABLE);
-	msleep_interruptible(2 * 1000);
-	writeb(0, addr + RST_DISABLE);
-	msleep_interruptible(2 * 1000);
-
-	return memory_exists(addr);
-}
-
-/* Calculate 16-bit CRC using CCITT polynomial. */
-static u16 checksum(u8 *buf, u32 len)
-{
-	u16 crc = 0;
-	u16 mask, flag;
-
-	for (; len; --len, ++buf)
-		for (mask = 0x80; mask; mask >>= 1) {
-			flag = (crc & 0x8000);
-			crc <<= 1;
-			crc |= ((*buf & mask) ? 1 : 0);
-
-			if (flag)
-				crc ^= 0x1021;
-		}
-
-	return crc;
-}
-
-module_init(cycx_drv_init);
-module_exit(cycx_drv_cleanup);
-
-/* End */
diff --git a/drivers/net/wan/cycx_main.c b/drivers/net/wan/cycx_main.c
deleted file mode 100644
index 81fbbad406be..000000000000
--- a/drivers/net/wan/cycx_main.c
+++ /dev/null
@@ -1,346 +0,0 @@
-/*
-* cycx_main.c	Cyclades Cyclom 2X WAN Link Driver. Main module.
-*
-* Author:	Arnaldo Carvalho de Melo <acme@conectiva.com.br>
-*
-* Copyright:	(c) 1998-2003 Arnaldo Carvalho de Melo
-*
-* Based on sdlamain.c by Gene Kozin <genek@compuserve.com> &
-*			 Jaspreet Singh	<jaspreet@sangoma.com>
-*
-*		This program is free software; you can redistribute it and/or
-*		modify it under the terms of the GNU General Public License
-*		as published by the Free Software Foundation; either version
-*		2 of the License, or (at your option) any later version.
-* ============================================================================
-* Please look at the bitkeeper changelog (or any other scm tool that ends up
-* importing bitkeeper changelog or that replaces bitkeeper in the future as
-* main tool for linux development).
-* 
-* 2001/05/09	acme		Fix MODULE_DESC for debug, .bss nitpicks,
-* 				some cleanups
-* 2000/07/13	acme		remove useless #ifdef MODULE and crap
-*				#if KERNEL_VERSION > blah
-* 2000/07/06	acme		__exit at cyclomx_cleanup
-* 2000/04/02	acme		dprintk and cycx_debug
-* 				module_init/module_exit
-* 2000/01/21	acme		rename cyclomx_open to cyclomx_mod_inc_use_count
-*				and cyclomx_close to cyclomx_mod_dec_use_count
-* 2000/01/08	acme		cleanup
-* 1999/11/06	acme		cycx_down back to life (it needs to be
-*				called to iounmap the dpmbase)
-* 1999/08/09	acme		removed references to enable_tx_int
-*				use spinlocks instead of cli/sti in
-*				cyclomx_set_state
-* 1999/05/19	acme		works directly linked into the kernel
-*				init_waitqueue_head for 2.3.* kernel
-* 1999/05/18	acme		major cleanup (polling not needed), etc
-* 1998/08/28	acme		minor cleanup (ioctls for firmware deleted)
-*				queue_task activated
-* 1998/08/08	acme		Initial version.
-*/
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/stddef.h>	/* offsetof(), etc. */
-#include <linux/errno.h>	/* return codes */
-#include <linux/string.h>	/* inline memset(), etc. */
-#include <linux/slab.h>		/* kmalloc(), kfree() */
-#include <linux/kernel.h>	/* printk(), and other useful stuff */
-#include <linux/module.h>	/* support for loadable modules */
-#include <linux/ioport.h>	/* request_region(), release_region() */
-#include <linux/wanrouter.h>	/* WAN router definitions */
-#include <linux/cyclomx.h>	/* cyclomx common user API definitions */
-#include <linux/init.h>         /* __init (when not using as a module) */
-#include <linux/interrupt.h>
-
-unsigned int cycx_debug;
-
-MODULE_AUTHOR("Arnaldo Carvalho de Melo");
-MODULE_DESCRIPTION("Cyclom 2X Sync Card Driver.");
-MODULE_LICENSE("GPL");
-module_param(cycx_debug, int, 0);
-MODULE_PARM_DESC(cycx_debug, "cyclomx debug level");
-
-/* Defines & Macros */
-
-#define	CYCX_DRV_VERSION	0	/* version number */
-#define	CYCX_DRV_RELEASE	11	/* release (minor version) number */
-#define	CYCX_MAX_CARDS		1	/* max number of adapters */
-
-#define	CONFIG_CYCX_CARDS 1
-
-/* Function Prototypes */
-
-/* WAN link driver entry points */
-static int cycx_wan_setup(struct wan_device *wandev, wandev_conf_t *conf);
-static int cycx_wan_shutdown(struct wan_device *wandev);
-
-/* Miscellaneous functions */
-static irqreturn_t cycx_isr(int irq, void *dev_id);
-
-/* Global Data
- * Note: All data must be explicitly initialized!!!
- */
-
-/* private data */
-static const char cycx_drvname[] = "cyclomx";
-static const char cycx_fullname[] = "CYCLOM 2X(tm) Sync Card Driver";
-static const char cycx_copyright[] = "(c) 1998-2003 Arnaldo Carvalho de Melo "
-			  "<acme@conectiva.com.br>";
-static int cycx_ncards = CONFIG_CYCX_CARDS;
-static struct cycx_device *cycx_card_array;	/* adapter data space */
-
-/* Kernel Loadable Module Entry Points */
-
-/*
- * Module 'insert' entry point.
- * o print announcement
- * o allocate adapter data space
- * o initialize static data
- * o register all cards with WAN router
- * o calibrate Cyclom 2X shared memory access delay.
- *
- * Return:	0	Ok
- *		< 0	error.
- * Context:	process
- */
-static int __init cycx_init(void)
-{
-	int cnt, err = -ENOMEM;
-
-	pr_info("%s v%u.%u %s\n",
-		cycx_fullname, CYCX_DRV_VERSION, CYCX_DRV_RELEASE,
-		cycx_copyright);
-
-	/* Verify number of cards and allocate adapter data space */
-	cycx_ncards = min_t(int, cycx_ncards, CYCX_MAX_CARDS);
-	cycx_ncards = max_t(int, cycx_ncards, 1);
-	cycx_card_array = kcalloc(cycx_ncards, sizeof(struct cycx_device), GFP_KERNEL);
-	if (!cycx_card_array)
-		goto out;
-
-
-	/* Register adapters with WAN router */
-	for (cnt = 0; cnt < cycx_ncards; ++cnt) {
-		struct cycx_device *card = &cycx_card_array[cnt];
-		struct wan_device *wandev = &card->wandev;
-
-		sprintf(card->devname, "%s%d", cycx_drvname, cnt + 1);
-		wandev->magic    = ROUTER_MAGIC;
-		wandev->name     = card->devname;
-		wandev->private  = card;
-		wandev->setup    = cycx_wan_setup;
-		wandev->shutdown = cycx_wan_shutdown;
-		err = register_wan_device(wandev);
-
-		if (err) {
-			pr_err("%s registration failed with error %d!\n",
-			       card->devname, err);
-			break;
-		}
-	}
-
-	err = -ENODEV;
-	if (!cnt) {
-		kfree(cycx_card_array);
-		goto out;
-	}
-	err = 0;
-	cycx_ncards = cnt;	/* adjust actual number of cards */
-out:	return err;
-}
-
-/*
- * Module 'remove' entry point.
- * o unregister all adapters from the WAN router
- * o release all remaining system resources
- */
-static void __exit cycx_exit(void)
-{
-	int i = 0;
-
-	for (; i < cycx_ncards; ++i) {
-		struct cycx_device *card = &cycx_card_array[i];
-		unregister_wan_device(card->devname);
-	}
-
-	kfree(cycx_card_array);
-}
-
-/* WAN Device Driver Entry Points */
-/*
- * Setup/configure WAN link driver.
- * o check adapter state
- * o make sure firmware is present in configuration
- * o allocate interrupt vector
- * o setup Cyclom 2X hardware
- * o call appropriate routine to perform protocol-specific initialization
- *
- * This function is called when router handles ROUTER_SETUP IOCTL. The
- * configuration structure is in kernel memory (including extended data, if
- * any).
- */
-static int cycx_wan_setup(struct wan_device *wandev, wandev_conf_t *conf)
-{
-	int rc = -EFAULT;
-	struct cycx_device *card;
-	int irq;
-
-	/* Sanity checks */
-
-	if (!wandev || !wandev->private || !conf)
-		goto out;
-
-	card = wandev->private;
-	rc = -EBUSY;
-	if (wandev->state != WAN_UNCONFIGURED)
-		goto out;
-
-	rc = -EINVAL;
-	if (!conf->data_size || !conf->data) {
-		pr_err("%s: firmware not found in configuration data!\n",
-		       wandev->name);
-		goto out;
-	}
-
-	if (conf->irq <= 0) {
-		pr_err("%s: can't configure without IRQ!\n", wandev->name);
-		goto out;
-	}
-
-	/* Allocate IRQ */
-	irq = conf->irq == 2 ? 9 : conf->irq;	/* IRQ2 -> IRQ9 */
-
-	if (request_irq(irq, cycx_isr, 0, wandev->name, card)) {
-		pr_err("%s: can't reserve IRQ %d!\n", wandev->name, irq);
-		goto out;
-	}
-
-	/* Configure hardware, load firmware, etc. */
-	memset(&card->hw, 0, sizeof(card->hw));
-	card->hw.irq	 = irq;
-	card->hw.dpmsize = CYCX_WINDOWSIZE;
-	card->hw.fwid	 = CFID_X25_2X;
-	spin_lock_init(&card->lock);
-	init_waitqueue_head(&card->wait_stats);
-
-	rc = cycx_setup(&card->hw, conf->data, conf->data_size, conf->maddr);
-	if (rc)
-		goto out_irq;
-
-	/* Initialize WAN device data space */
-	wandev->irq       = irq;
-	wandev->dma       = wandev->ioport = 0;
-	wandev->maddr     = (unsigned long)card->hw.dpmbase;
-	wandev->msize     = card->hw.dpmsize;
-	wandev->hw_opt[2] = 0;
-	wandev->hw_opt[3] = card->hw.fwid;
-
-	/* Protocol-specific initialization */
-	switch (card->hw.fwid) {
-#ifdef CONFIG_CYCLOMX_X25
-	case CFID_X25_2X:
-		rc = cycx_x25_wan_init(card, conf);
-		break;
-#endif
-	default:
-		pr_err("%s: this firmware is not supported!\n", wandev->name);
-		rc = -EINVAL;
-	}
-
-	if (rc) {
-		cycx_down(&card->hw);
-		goto out_irq;
-	}
-
-	rc = 0;
-out:
-	return rc;
-out_irq:
-	free_irq(irq, card);
-	goto out;
-}
-
-/*
- * Shut down WAN link driver.
- * o shut down adapter hardware
- * o release system resources.
- *
- * This function is called by the router when device is being unregistered or
- * when it handles ROUTER_DOWN IOCTL.
- */
-static int cycx_wan_shutdown(struct wan_device *wandev)
-{
-	int ret = -EFAULT;
-	struct cycx_device *card;
-
-	/* sanity checks */
-	if (!wandev || !wandev->private)
-		goto out;
-
-	ret = 0;
-	if (wandev->state == WAN_UNCONFIGURED)
-		goto out;
-
-	card = wandev->private;
-	wandev->state = WAN_UNCONFIGURED;
-	cycx_down(&card->hw);
-	pr_info("%s: irq %d being freed!\n", wandev->name, wandev->irq);
-	free_irq(wandev->irq, card);
-out:	return ret;
-}
-
-/* Miscellaneous */
-/*
- * Cyclom 2X Interrupt Service Routine.
- * o acknowledge Cyclom 2X hardware interrupt.
- * o call protocol-specific interrupt service routine, if any.
- */
-static irqreturn_t cycx_isr(int irq, void *dev_id)
-{
-	struct cycx_device *card = dev_id;
-
-	if (card->wandev.state == WAN_UNCONFIGURED)
-		goto out;
-
-	if (card->in_isr) {
-		pr_warn("%s: interrupt re-entrancy on IRQ %d!\n",
-			card->devname, card->wandev.irq);
-		goto out;
-	}
-
-	if (card->isr)
-		card->isr(card);
-	return IRQ_HANDLED;
-out:
-	return IRQ_NONE;
-}
-
-/* Set WAN device state.  */
-void cycx_set_state(struct cycx_device *card, int state)
-{
-	unsigned long flags;
-	char *string_state = NULL;
-
-	spin_lock_irqsave(&card->lock, flags);
-
-	if (card->wandev.state != state) {
-		switch (state) {
-		case WAN_CONNECTED:
-			string_state = "connected!";
-			break;
-		case WAN_DISCONNECTED:
-			string_state = "disconnected!";
-			break;
-		}
-		pr_info("%s: link %s\n", card->devname, string_state);
-		card->wandev.state = state;
-	}
-
-	card->state_tick = jiffies;
-	spin_unlock_irqrestore(&card->lock, flags);
-}
-
-module_init(cycx_init);
-module_exit(cycx_exit);
diff --git a/drivers/net/wan/cycx_x25.c b/drivers/net/wan/cycx_x25.c
deleted file mode 100644
index 06f3f6309e4b..000000000000
--- a/drivers/net/wan/cycx_x25.c
+++ /dev/null
@@ -1,1602 +0,0 @@
-/*
-* cycx_x25.c	Cyclom 2X WAN Link Driver.  X.25 module.
-*
-* Author:	Arnaldo Carvalho de Melo <acme@conectiva.com.br>
-*
-* Copyright:	(c) 1998-2003 Arnaldo Carvalho de Melo
-*
-* Based on sdla_x25.c by Gene Kozin <genek@compuserve.com>
-*
-*		This program is free software; you can redistribute it and/or
-*		modify it under the terms of the GNU General Public License
-*		as published by the Free Software Foundation; either version
-*		2 of the License, or (at your option) any later version.
-* ============================================================================
-* 2001/01/12	acme		use dev_kfree_skb_irq on interrupt context
-* 2000/04/02	acme		dprintk, cycx_debug
-* 				fixed the bug introduced in get_dev_by_lcn and
-* 				get_dev_by_dte_addr by the anonymous hacker
-* 				that converted this driver to softnet
-* 2000/01/08	acme		cleanup
-* 1999/10/27	acme		use ARPHRD_HWX25 so that the X.25 stack know
-*				that we have a X.25 stack implemented in
-*				firmware onboard
-* 1999/10/18	acme		support for X.25 sockets in if_send,
-*				beware: socket(AF_X25...) IS WORK IN PROGRESS,
-*				TCP/IP over X.25 via wanrouter not affected,
-*				working.
-* 1999/10/09	acme		chan_disc renamed to chan_disconnect,
-* 				began adding support for X.25 sockets:
-* 				conf->protocol in new_if
-* 1999/10/05	acme		fixed return E... to return -E...
-* 1999/08/10	acme		serialized access to the card thru a spinlock
-*				in x25_exec
-* 1999/08/09	acme		removed per channel spinlocks
-*				removed references to enable_tx_int
-* 1999/05/28	acme		fixed nibble_to_byte, ackvc now properly treated
-*				if_send simplified
-* 1999/05/25	acme		fixed t1, t2, t21 & t23 configuration
-*				use spinlocks instead of cli/sti in some points
-* 1999/05/24	acme		finished the x25_get_stat function
-* 1999/05/23	acme		dev->type = ARPHRD_X25 (tcpdump only works,
-*				AFAIT, with ARPHRD_ETHER). This seems to be
-*				needed to use socket(AF_X25)...
-*				Now the config file must specify a peer media
-*				address for svc channels over a crossover cable.
-*				Removed hold_timeout from x25_channel_t,
-*				not used.
-*				A little enhancement in the DEBUG processing
-* 1999/05/22	acme		go to DISCONNECTED in disconnect_confirm_intr,
-*				instead of chan_disc.
-* 1999/05/16	marcelo		fixed timer initialization in SVCs
-* 1999/01/05	acme		x25_configure now get (most of) all
-*				parameters...
-* 1999/01/05	acme		pktlen now (correctly) uses log2 (value
-*				configured)
-* 1999/01/03	acme		judicious use of data types (u8, u16, u32, etc)
-* 1999/01/03	acme		cyx_isr: reset dpmbase to acknowledge
-*				indication (interrupt from cyclom 2x)
-* 1999/01/02	acme		cyx_isr: first hackings...
-* 1999/01/0203  acme 		when initializing an array don't give less
-*				elements than declared...
-* 				example: char send_cmd[6] = "?\xFF\x10";
-*          			you'll gonna lose a couple hours, 'cause your
-*				brain won't admit that there's an error in the
-*				above declaration...  the side effect is that
-*				memset is put into the unresolved symbols
-*				instead of using the inline memset functions...
-* 1999/01/02    acme 		began chan_connect, chan_send, x25_send
-* 1998/12/31	acme		x25_configure
-*				this code can be compiled as non module
-* 1998/12/27	acme		code cleanup
-*				IPX code wiped out! let's decrease code
-*				complexity for now, remember: I'm learning! :)
-*                               bps_to_speed_code OK
-* 1998/12/26	acme		Minimal debug code cleanup
-* 1998/08/08	acme		Initial version.
-*/
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#define CYCLOMX_X25_DEBUG 1
-
-#include <linux/ctype.h>	/* isdigit() */
-#include <linux/errno.h>	/* return codes */
-#include <linux/if_arp.h>       /* ARPHRD_HWX25 */
-#include <linux/kernel.h>	/* printk(), and other useful stuff */
-#include <linux/module.h>
-#include <linux/string.h>	/* inline memset(), etc. */
-#include <linux/sched.h>
-#include <linux/slab.h>		/* kmalloc(), kfree() */
-#include <linux/stddef.h>	/* offsetof(), etc. */
-#include <linux/wanrouter.h>	/* WAN router definitions */
-
-#include <asm/byteorder.h>	/* htons(), etc. */
-
-#include <linux/cyclomx.h>	/* Cyclom 2X common user API definitions */
-#include <linux/cycx_x25.h>	/* X.25 firmware API definitions */
-
-#include <net/x25device.h>
-
-/* Defines & Macros */
-#define CYCX_X25_MAX_CMD_RETRY 5
-#define CYCX_X25_CHAN_MTU 2048	/* unfragmented logical channel MTU */
-
-/* Data Structures */
-/* This is an extension of the 'struct net_device' we create for each network
-   interface to keep the rest of X.25 channel-specific data. */
-struct cycx_x25_channel {
-	/* This member must be first. */
-	struct net_device *slave;	/* WAN slave */
-
-	char name[WAN_IFNAME_SZ+1];	/* interface name, ASCIIZ */
-	char addr[WAN_ADDRESS_SZ+1];	/* media address, ASCIIZ */
-	char *local_addr;		/* local media address, ASCIIZ -
-					   svc thru crossover cable */
-	s16 lcn;			/* logical channel number/conn.req.key*/
-	u8 link;
-	struct timer_list timer;	/* timer used for svc channel disc. */
-	u16 protocol;			/* ethertype, 0 - multiplexed */
-	u8 svc;				/* 0 - permanent, 1 - switched */
-	u8 state;			/* channel state */
-	u8 drop_sequence;		/* mark sequence for dropping */
-	u32 idle_tmout;			/* sec, before disconnecting */
-	struct sk_buff *rx_skb;		/* receive socket buffer */
-	struct cycx_device *card;	/* -> owner */
-	struct net_device_stats ifstats;/* interface statistics */
-};
-
-/* Function Prototypes */
-/* WAN link driver entry points. These are called by the WAN router module. */
-static int cycx_wan_update(struct wan_device *wandev),
-	   cycx_wan_new_if(struct wan_device *wandev, struct net_device *dev,
-			   wanif_conf_t *conf),
-	   cycx_wan_del_if(struct wan_device *wandev, struct net_device *dev);
-
-/* Network device interface */
-static int cycx_netdevice_init(struct net_device *dev);
-static int cycx_netdevice_open(struct net_device *dev);
-static int cycx_netdevice_stop(struct net_device *dev);
-static int cycx_netdevice_hard_header(struct sk_buff *skb,
-				      struct net_device *dev, u16 type,
-				      const void *daddr, const void *saddr,
-				      unsigned len);
-static int cycx_netdevice_rebuild_header(struct sk_buff *skb);
-static netdev_tx_t cycx_netdevice_hard_start_xmit(struct sk_buff *skb,
-							struct net_device *dev);
-
-static struct net_device_stats *
-			cycx_netdevice_get_stats(struct net_device *dev);
-
-/* Interrupt handlers */
-static void cycx_x25_irq_handler(struct cycx_device *card),
-	    cycx_x25_irq_tx(struct cycx_device *card, struct cycx_x25_cmd *cmd),
-	    cycx_x25_irq_rx(struct cycx_device *card, struct cycx_x25_cmd *cmd),
-	    cycx_x25_irq_log(struct cycx_device *card,
-			     struct cycx_x25_cmd *cmd),
-	    cycx_x25_irq_stat(struct cycx_device *card,
-			      struct cycx_x25_cmd *cmd),
-	    cycx_x25_irq_connect_confirm(struct cycx_device *card,
-					 struct cycx_x25_cmd *cmd),
-	    cycx_x25_irq_disconnect_confirm(struct cycx_device *card,
-					    struct cycx_x25_cmd *cmd),
-	    cycx_x25_irq_connect(struct cycx_device *card,
-				 struct cycx_x25_cmd *cmd),
-	    cycx_x25_irq_disconnect(struct cycx_device *card,
-				    struct cycx_x25_cmd *cmd),
-	    cycx_x25_irq_spurious(struct cycx_device *card,
-				  struct cycx_x25_cmd *cmd);
-
-/* X.25 firmware interface functions */
-static int cycx_x25_configure(struct cycx_device *card,
-			      struct cycx_x25_config *conf),
-	   cycx_x25_get_stats(struct cycx_device *card),
-	   cycx_x25_send(struct cycx_device *card, u8 link, u8 lcn, u8 bitm,
-			 int len, void *buf),
-	   cycx_x25_connect_response(struct cycx_device *card,
-				struct cycx_x25_channel *chan),
-	   cycx_x25_disconnect_response(struct cycx_device *card, u8 link,
-			   		u8 lcn);
-
-/* channel functions */
-static int cycx_x25_chan_connect(struct net_device *dev),
-	   cycx_x25_chan_send(struct net_device *dev, struct sk_buff *skb);
-
-static void cycx_x25_chan_disconnect(struct net_device *dev),
-	    cycx_x25_chan_send_event(struct net_device *dev, u8 event);
-
-/* Miscellaneous functions */
-static void cycx_x25_set_chan_state(struct net_device *dev, u8 state),
-	    cycx_x25_chan_timer(unsigned long d);
-
-static void nibble_to_byte(u8 *s, u8 *d, u8 len, u8 nibble),
-	    reset_timer(struct net_device *dev);
-
-static u8 bps_to_speed_code(u32 bps);
-static u8 cycx_log2(u32 n);
-
-static unsigned dec_to_uint(u8 *str, int len);
-
-static struct net_device *cycx_x25_get_dev_by_lcn(struct wan_device *wandev,
-						  s16 lcn);
-static struct net_device *
-	cycx_x25_get_dev_by_dte_addr(struct wan_device *wandev, char *dte);
-
-static void cycx_x25_chan_setup(struct net_device *dev);
-
-#ifdef CYCLOMX_X25_DEBUG
-static void hex_dump(char *msg, unsigned char *p, int len);
-static void cycx_x25_dump_config(struct cycx_x25_config *conf);
-static void cycx_x25_dump_stats(struct cycx_x25_stats *stats);
-static void cycx_x25_dump_devs(struct wan_device *wandev);
-#else
-#define hex_dump(msg, p, len)
-#define cycx_x25_dump_config(conf)
-#define cycx_x25_dump_stats(stats)
-#define cycx_x25_dump_devs(wandev)
-#endif
-/* Public Functions */
-
-/* X.25 Protocol Initialization routine.
- *
- * This routine is called by the main Cyclom 2X module during setup.  At this
- * point adapter is completely initialized and X.25 firmware is running.
- *  o configure adapter
- *  o initialize protocol-specific fields of the adapter data space.
- *
- * Return:	0	o.k.
- *		< 0	failure.  */
-int cycx_x25_wan_init(struct cycx_device *card, wandev_conf_t *conf)
-{
-	struct cycx_x25_config cfg;
-
-	/* Verify configuration ID */
-	if (conf->config_id != WANCONFIG_X25) {
-		pr_info("%s: invalid configuration ID %u!\n",
-			card->devname, conf->config_id);
-		return -EINVAL;
-	}
-
-	/* Initialize protocol-specific fields */
-	card->mbox  = card->hw.dpmbase + X25_MBOX_OFFS;
-	card->u.x.connection_keys = 0;
-	spin_lock_init(&card->u.x.lock);
-
-	/* Configure adapter. Here we set reasonable defaults, then parse
-	 * device configuration structure and set configuration options.
-	 * Most configuration options are verified and corrected (if
-	 * necessary) since we can't rely on the adapter to do so and don't
-	 * want it to fail either. */
-	memset(&cfg, 0, sizeof(cfg));
-	cfg.link = 0;
-	cfg.clock = conf->clocking == WANOPT_EXTERNAL ? 8 : 55;
-	cfg.speed = bps_to_speed_code(conf->bps);
-	cfg.n3win = 7;
-	cfg.n2win = 2;
-	cfg.n2 = 5;
-	cfg.nvc = 1;
-	cfg.npvc = 1;
-	cfg.flags = 0x02; /* default = V35 */
-	cfg.t1 = 10;   /* line carrier timeout */
-	cfg.t2 = 29;   /* tx timeout */
-	cfg.t21 = 180; /* CALL timeout */
-	cfg.t23 = 180; /* CLEAR timeout */
-
-	/* adjust MTU */
-	if (!conf->mtu || conf->mtu >= 512)
-		card->wandev.mtu = 512;
-	else if (conf->mtu >= 256)
-		card->wandev.mtu = 256;
-	else if (conf->mtu >= 128)
-		card->wandev.mtu = 128;
-	else
-		card->wandev.mtu = 64;
-
-	cfg.pktlen = cycx_log2(card->wandev.mtu);
-
-	if (conf->station == WANOPT_DTE) {
-		cfg.locaddr = 3; /* DTE */
-		cfg.remaddr = 1; /* DCE */
-	} else {
-		cfg.locaddr = 1; /* DCE */
-		cfg.remaddr = 3; /* DTE */
-	}
-
-	if (conf->interface == WANOPT_RS232)
-	        cfg.flags = 0;      /* FIXME just reset the 2nd bit */
-
-	if (conf->u.x25.hi_pvc) {
-		card->u.x.hi_pvc = min_t(unsigned int, conf->u.x25.hi_pvc, 4095);
-		card->u.x.lo_pvc = min_t(unsigned int, conf->u.x25.lo_pvc, card->u.x.hi_pvc);
-	}
-
-	if (conf->u.x25.hi_svc) {
-		card->u.x.hi_svc = min_t(unsigned int, conf->u.x25.hi_svc, 4095);
-		card->u.x.lo_svc = min_t(unsigned int, conf->u.x25.lo_svc, card->u.x.hi_svc);
-	}
-
-	if (card->u.x.lo_pvc == 255)
-		cfg.npvc = 0;
-	else
-		cfg.npvc = card->u.x.hi_pvc - card->u.x.lo_pvc + 1;
-
-	cfg.nvc = card->u.x.hi_svc - card->u.x.lo_svc + 1 + cfg.npvc;
-
-	if (conf->u.x25.hdlc_window)
-		cfg.n2win = min_t(unsigned int, conf->u.x25.hdlc_window, 7);
-
-	if (conf->u.x25.pkt_window)
-		cfg.n3win = min_t(unsigned int, conf->u.x25.pkt_window, 7);
-
-	if (conf->u.x25.t1)
-		cfg.t1 = min_t(unsigned int, conf->u.x25.t1, 30);
-
-	if (conf->u.x25.t2)
-		cfg.t2 = min_t(unsigned int, conf->u.x25.t2, 30);
-
-	if (conf->u.x25.t11_t21)
-		cfg.t21 = min_t(unsigned int, conf->u.x25.t11_t21, 30);
-
-	if (conf->u.x25.t13_t23)
-		cfg.t23 = min_t(unsigned int, conf->u.x25.t13_t23, 30);
-
-	if (conf->u.x25.n2)
-		cfg.n2 = min_t(unsigned int, conf->u.x25.n2, 30);
-
-	/* initialize adapter */
-	if (cycx_x25_configure(card, &cfg))
-		return -EIO;
-
-	/* Initialize protocol-specific fields of adapter data space */
-	card->wandev.bps	= conf->bps;
-	card->wandev.interface	= conf->interface;
-	card->wandev.clocking	= conf->clocking;
-	card->wandev.station	= conf->station;
-	card->isr		= cycx_x25_irq_handler;
-	card->exec		= NULL;
-	card->wandev.update	= cycx_wan_update;
-	card->wandev.new_if	= cycx_wan_new_if;
-	card->wandev.del_if	= cycx_wan_del_if;
-	card->wandev.state	= WAN_DISCONNECTED;
-
-	return 0;
-}
-
-/* WAN Device Driver Entry Points */
-/* Update device status & statistics. */
-static int cycx_wan_update(struct wan_device *wandev)
-{
-	/* sanity checks */
-	if (!wandev || !wandev->private)
-		return -EFAULT;
-
-	if (wandev->state == WAN_UNCONFIGURED)
-		return -ENODEV;
-
-	cycx_x25_get_stats(wandev->private);
-
-	return 0;
-}
-
-/* Create new logical channel.
- * This routine is called by the router when ROUTER_IFNEW IOCTL is being
- * handled.
- * o parse media- and hardware-specific configuration
- * o make sure that a new channel can be created
- * o allocate resources, if necessary
- * o prepare network device structure for registration.
- *
- * Return:	0	o.k.
- *		< 0	failure (channel will not be created) */
-static int cycx_wan_new_if(struct wan_device *wandev, struct net_device *dev,
-			   wanif_conf_t *conf)
-{
-	struct cycx_device *card = wandev->private;
-	struct cycx_x25_channel *chan;
-	int err = 0;
-
-	if (!conf->name[0] || strlen(conf->name) > WAN_IFNAME_SZ) {
-		pr_info("%s: invalid interface name!\n", card->devname);
-		return -EINVAL;
-	}
-
-	dev = alloc_netdev(sizeof(struct cycx_x25_channel), conf->name,
-			     cycx_x25_chan_setup);
-	if (!dev)
-		return -ENOMEM;
-
-	chan = netdev_priv(dev);
-	strcpy(chan->name, conf->name);
-	chan->card = card;
-	chan->link = conf->port;
-	chan->protocol = conf->protocol ? ETH_P_X25 : ETH_P_IP;
-	chan->rx_skb = NULL;
-	/* only used in svc connected thru crossover cable */
-	chan->local_addr = NULL;
-
-	if (conf->addr[0] == '@') {	/* SVC */
-		int len = strlen(conf->local_addr);
-
-		if (len) {
-			if (len > WAN_ADDRESS_SZ) {
-				pr_err("%s: %s local addr too long!\n",
-				       wandev->name, chan->name);
-				err = -EINVAL;
-				goto error;
-			} else {
-				chan->local_addr = kmalloc(len + 1, GFP_KERNEL);
-
-				if (!chan->local_addr) {
-					err = -ENOMEM;
-					goto error;
-				}
-			}
-
-			strncpy(chan->local_addr, conf->local_addr,
-				WAN_ADDRESS_SZ);
-		}
-
-		chan->svc = 1;
-		strncpy(chan->addr, &conf->addr[1], WAN_ADDRESS_SZ);
-		init_timer(&chan->timer);
-		chan->timer.function	= cycx_x25_chan_timer;
-		chan->timer.data	= (unsigned long)dev;
-
-		/* Set channel timeouts (default if not specified) */
-		chan->idle_tmout = conf->idle_timeout ? conf->idle_timeout : 90;
-	} else if (isdigit(conf->addr[0])) {	/* PVC */
-		s16 lcn = dec_to_uint(conf->addr, 0);
-
-		if (lcn >= card->u.x.lo_pvc && lcn <= card->u.x.hi_pvc)
-			chan->lcn = lcn;
-		else {
-			pr_err("%s: PVC %u is out of range on interface %s!\n",
-			       wandev->name, lcn, chan->name);
-			err = -EINVAL;
-			goto error;
-		}
-	} else {
-		pr_err("%s: invalid media address on interface %s!\n",
-		       wandev->name, chan->name);
-		err = -EINVAL;
-		goto error;
-	}
-
-	return 0;
-
-error:
-	free_netdev(dev);
-	return err;
-}
-
-/* Delete logical channel. */
-static int cycx_wan_del_if(struct wan_device *wandev, struct net_device *dev)
-{
-	struct cycx_x25_channel *chan = netdev_priv(dev);
-
-	if (chan->svc) {
-		kfree(chan->local_addr);
-		if (chan->state == WAN_CONNECTED)
-			del_timer(&chan->timer);
-	}
-
-	return 0;
-}
-
-
-/* Network Device Interface */
-
-static const struct header_ops cycx_header_ops = {
-	.create = cycx_netdevice_hard_header,
-	.rebuild = cycx_netdevice_rebuild_header,
-};
-
-static const struct net_device_ops cycx_netdev_ops = {
-	.ndo_init	= cycx_netdevice_init,
-	.ndo_open	= cycx_netdevice_open,
-	.ndo_stop	= cycx_netdevice_stop,
-	.ndo_start_xmit	= cycx_netdevice_hard_start_xmit,
-	.ndo_get_stats	= cycx_netdevice_get_stats,
-};
-
-static void cycx_x25_chan_setup(struct net_device *dev)
-{
-	/* Initialize device driver entry points */
-	dev->netdev_ops		= &cycx_netdev_ops;
-	dev->header_ops		= &cycx_header_ops;
-
-	/* Initialize media-specific parameters */
-	dev->mtu		= CYCX_X25_CHAN_MTU;
-	dev->type		= ARPHRD_HWX25;	/* ARP h/w type */
-	dev->hard_header_len	= 0;		/* media header length */
-	dev->addr_len		= 0;		/* hardware address length */
-}
-
-/* Initialize Linux network interface.
- *
- * This routine is called only once for each interface, during Linux network
- * interface registration.  Returning anything but zero will fail interface
- * registration. */
-static int cycx_netdevice_init(struct net_device *dev)
-{
-	struct cycx_x25_channel *chan = netdev_priv(dev);
-	struct cycx_device *card = chan->card;
-	struct wan_device *wandev = &card->wandev;
-
-	if (!chan->svc)
-		*(__be16*)dev->dev_addr = htons(chan->lcn);
-
-	/* Initialize hardware parameters (just for reference) */
-	dev->irq		= wandev->irq;
-	dev->dma		= wandev->dma;
-	dev->base_addr		= wandev->ioport;
-	dev->mem_start		= (unsigned long)wandev->maddr;
-	dev->mem_end		= (unsigned long)(wandev->maddr +
-						  wandev->msize - 1);
-	dev->flags		|= IFF_NOARP;
-
-	/* Set transmit buffer queue length */
-	dev->tx_queue_len	= 10;
-
-	/* Initialize socket buffers */
-	cycx_x25_set_chan_state(dev, WAN_DISCONNECTED);
-
-	return 0;
-}
-
-/* Open network interface.
- * o prevent module from unloading by incrementing use count
- * o if link is disconnected then initiate connection
- *
- * Return 0 if O.k. or errno.  */
-static int cycx_netdevice_open(struct net_device *dev)
-{
-	if (netif_running(dev))
-		return -EBUSY; /* only one open is allowed */
-
-	netif_start_queue(dev);
-	return 0;
-}
-
-/* Close network interface.
- * o reset flags.
- * o if there's no more open channels then disconnect physical link. */
-static int cycx_netdevice_stop(struct net_device *dev)
-{
-	struct cycx_x25_channel *chan = netdev_priv(dev);
-
-	netif_stop_queue(dev);
-
-	if (chan->state == WAN_CONNECTED || chan->state == WAN_CONNECTING)
-		cycx_x25_chan_disconnect(dev);
-
-	return 0;
-}
-
-/* Build media header.
- * o encapsulate packet according to encapsulation type.
- *
- * The trick here is to put packet type (Ethertype) into 'protocol' field of
- * the socket buffer, so that we don't forget it.  If encapsulation fails,
- * set skb->protocol to 0 and discard packet later.
- *
- * Return:	media header length. */
-static int cycx_netdevice_hard_header(struct sk_buff *skb,
-				      struct net_device *dev, u16 type,
-				      const void *daddr, const void *saddr,
-				      unsigned len)
-{
-	skb->protocol = htons(type);
-
-	return dev->hard_header_len;
-}
-
-/* * Re-build media header.
- * Return:	1	physical address resolved.
- *		0	physical address not resolved */
-static int cycx_netdevice_rebuild_header(struct sk_buff *skb)
-{
-	return 1;
-}
-
-/* Send a packet on a network interface.
- * o set busy flag (marks start of the transmission).
- * o check link state. If link is not up, then drop the packet.
- * o check channel status. If it's down then initiate a call.
- * o pass a packet to corresponding WAN device.
- * o free socket buffer
- *
- * Return:	0	complete (socket buffer must be freed)
- *		non-0	packet may be re-transmitted (tbusy must be set)
- *
- * Notes:
- * 1. This routine is called either by the protocol stack or by the "net
- *    bottom half" (with interrupts enabled).
- * 2. Setting tbusy flag will inhibit further transmit requests from the
- *    protocol stack and can be used for flow control with protocol layer. */
-static netdev_tx_t cycx_netdevice_hard_start_xmit(struct sk_buff *skb,
-							struct net_device *dev)
-{
-	struct cycx_x25_channel *chan = netdev_priv(dev);
-	struct cycx_device *card = chan->card;
-
-	if (!chan->svc)
-		chan->protocol = ntohs(skb->protocol);
-
-	if (card->wandev.state != WAN_CONNECTED)
-		++chan->ifstats.tx_dropped;
-	else if (chan->svc && chan->protocol &&
-		 chan->protocol != ntohs(skb->protocol)) {
-		pr_info("%s: unsupported Ethertype 0x%04X on interface %s!\n",
-			card->devname, ntohs(skb->protocol), dev->name);
-		++chan->ifstats.tx_errors;
-	} else if (chan->protocol == ETH_P_IP) {
-		switch (chan->state) {
-		case WAN_DISCONNECTED:
-			if (cycx_x25_chan_connect(dev)) {
-				netif_stop_queue(dev);
-				return NETDEV_TX_BUSY;
-			}
-			/* fall thru */
-		case WAN_CONNECTED:
-			reset_timer(dev);
-			dev->trans_start = jiffies;
-			netif_stop_queue(dev);
-
-			if (cycx_x25_chan_send(dev, skb))
-				return NETDEV_TX_BUSY;
-
-			break;
-		default:
-			++chan->ifstats.tx_dropped;
-			++card->wandev.stats.tx_dropped;
-	}
-	} else { /* chan->protocol == ETH_P_X25 */
-		switch (skb->data[0]) {
-		case X25_IFACE_DATA:
-			break;
-		case X25_IFACE_CONNECT:
-			cycx_x25_chan_connect(dev);
-			goto free_packet;
-		case X25_IFACE_DISCONNECT:
-			cycx_x25_chan_disconnect(dev);
-			goto free_packet;
-	        default:
-			pr_info("%s: unknown %d x25-iface request on %s!\n",
-				card->devname, skb->data[0], dev->name);
-			++chan->ifstats.tx_errors;
-			goto free_packet;
-		}
-
-		skb_pull(skb, 1); /* Remove control byte */
-		reset_timer(dev);
-		dev->trans_start = jiffies;
-		netif_stop_queue(dev);
-
-		if (cycx_x25_chan_send(dev, skb)) {
-			/* prepare for future retransmissions */
-			skb_push(skb, 1);
-			return NETDEV_TX_BUSY;
-		}
-	}
-
-free_packet:
-	dev_kfree_skb(skb);
-
-	return NETDEV_TX_OK;
-}
-
-/* Get Ethernet-style interface statistics.
- * Return a pointer to struct net_device_stats */
-static struct net_device_stats *cycx_netdevice_get_stats(struct net_device *dev)
-{
-	struct cycx_x25_channel *chan = netdev_priv(dev);
-
-	return chan ? &chan->ifstats : NULL;
-}
-
-/* Interrupt Handlers */
-/* X.25 Interrupt Service Routine. */
-static void cycx_x25_irq_handler(struct cycx_device *card)
-{
-	struct cycx_x25_cmd cmd;
-	u16 z = 0;
-
-	card->in_isr = 1;
-	card->buff_int_mode_unbusy = 0;
-	cycx_peek(&card->hw, X25_RXMBOX_OFFS, &cmd, sizeof(cmd));
-
-	switch (cmd.command) {
-	case X25_DATA_INDICATION:
-		cycx_x25_irq_rx(card, &cmd);
-		break;
-	case X25_ACK_FROM_VC:
-		cycx_x25_irq_tx(card, &cmd);
-		break;
-	case X25_LOG:
-		cycx_x25_irq_log(card, &cmd);
-		break;
-	case X25_STATISTIC:
-		cycx_x25_irq_stat(card, &cmd);
-		break;
-	case X25_CONNECT_CONFIRM:
-		cycx_x25_irq_connect_confirm(card, &cmd);
-		break;
-	case X25_CONNECT_INDICATION:
-		cycx_x25_irq_connect(card, &cmd);
-		break;
-	case X25_DISCONNECT_INDICATION:
-		cycx_x25_irq_disconnect(card, &cmd);
-		break;
-	case X25_DISCONNECT_CONFIRM:
-		cycx_x25_irq_disconnect_confirm(card, &cmd);
-		break;
-	case X25_LINE_ON:
-		cycx_set_state(card, WAN_CONNECTED);
-		break;
-	case X25_LINE_OFF:
-		cycx_set_state(card, WAN_DISCONNECTED);
-		break;
-	default:
-		cycx_x25_irq_spurious(card, &cmd);
-		break;
-	}
-
-	cycx_poke(&card->hw, 0, &z, sizeof(z));
-	cycx_poke(&card->hw, X25_RXMBOX_OFFS, &z, sizeof(z));
-	card->in_isr = 0;
-}
-
-/* Transmit interrupt handler.
- *	o Release socket buffer
- *	o Clear 'tbusy' flag */
-static void cycx_x25_irq_tx(struct cycx_device *card, struct cycx_x25_cmd *cmd)
-{
-	struct net_device *dev;
-	struct wan_device *wandev = &card->wandev;
-	u8 lcn;
-
-	cycx_peek(&card->hw, cmd->buf, &lcn, sizeof(lcn));
-
-	/* unbusy device and then dev_tint(); */
-	dev = cycx_x25_get_dev_by_lcn(wandev, lcn);
-	if (dev) {
-		card->buff_int_mode_unbusy = 1;
-		netif_wake_queue(dev);
-	} else
-		pr_err("%s:ackvc for inexistent lcn %d\n", card->devname, lcn);
-}
-
-/* Receive interrupt handler.
- * This routine handles fragmented IP packets using M-bit according to the
- * RFC1356.
- * o map logical channel number to network interface.
- * o allocate socket buffer or append received packet to the existing one.
- * o if M-bit is reset (i.e. it's the last packet in a sequence) then
- *   decapsulate packet and pass socket buffer to the protocol stack.
- *
- * Notes:
- * 1. When allocating a socket buffer, if M-bit is set then more data is
- *    coming and we have to allocate buffer for the maximum IP packet size
- *    expected on this channel.
- * 2. If something goes wrong and X.25 packet has to be dropped (e.g. no
- *    socket buffers available) the whole packet sequence must be discarded. */
-static void cycx_x25_irq_rx(struct cycx_device *card, struct cycx_x25_cmd *cmd)
-{
-	struct wan_device *wandev = &card->wandev;
-	struct net_device *dev;
-	struct cycx_x25_channel *chan;
-	struct sk_buff *skb;
-	u8 bitm, lcn;
-	int pktlen = cmd->len - 5;
-
-	cycx_peek(&card->hw, cmd->buf, &lcn, sizeof(lcn));
-	cycx_peek(&card->hw, cmd->buf + 4, &bitm, sizeof(bitm));
-	bitm &= 0x10;
-
-	dev = cycx_x25_get_dev_by_lcn(wandev, lcn);
-	if (!dev) {
-		/* Invalid channel, discard packet */
-		pr_info("%s: receiving on orphaned LCN %d!\n",
-			card->devname, lcn);
-		return;
-	}
-
-	chan = netdev_priv(dev);
-	reset_timer(dev);
-
-	if (chan->drop_sequence) {
-		if (!bitm)
-			chan->drop_sequence = 0;
-		else
-			return;
-	}
-
-	if ((skb = chan->rx_skb) == NULL) {
-		/* Allocate new socket buffer */
-		int bufsize = bitm ? dev->mtu : pktlen;
-
-		if ((skb = dev_alloc_skb((chan->protocol == ETH_P_X25 ? 1 : 0) +
-					 bufsize +
-					 dev->hard_header_len)) == NULL) {
-			pr_info("%s: no socket buffers available!\n",
-				card->devname);
-			chan->drop_sequence = 1;
-			++chan->ifstats.rx_dropped;
-			return;
-		}
-
-		if (chan->protocol == ETH_P_X25) /* X.25 socket layer control */
-			/* 0 = data packet (dev_alloc_skb zeroed skb->data) */
-			skb_put(skb, 1);
-
-		skb->dev = dev;
-		skb->protocol = htons(chan->protocol);
-		chan->rx_skb = skb;
-	}
-
-	if (skb_tailroom(skb) < pktlen) {
-		/* No room for the packet. Call off the whole thing! */
-		dev_kfree_skb_irq(skb);
-		chan->rx_skb = NULL;
-
-		if (bitm)
-			chan->drop_sequence = 1;
-
-		pr_info("%s: unexpectedly long packet sequence on interface %s!\n",
-			card->devname, dev->name);
-		++chan->ifstats.rx_length_errors;
-		return;
-	}
-
-	/* Append packet to the socket buffer  */
-	cycx_peek(&card->hw, cmd->buf + 5, skb_put(skb, pktlen), pktlen);
-
-	if (bitm)
-		return; /* more data is coming */
-
-	chan->rx_skb = NULL;		/* dequeue packet */
-
-	++chan->ifstats.rx_packets;
-	chan->ifstats.rx_bytes += pktlen;
-
-	skb_reset_mac_header(skb);
-	netif_rx(skb);
-}
-
-/* Connect interrupt handler. */
-static void cycx_x25_irq_connect(struct cycx_device *card,
-				 struct cycx_x25_cmd *cmd)
-{
-	struct wan_device *wandev = &card->wandev;
-	struct net_device *dev = NULL;
-	struct cycx_x25_channel *chan;
-	u8 d[32],
-	   loc[24],
-	   rem[24];
-	u8 lcn, sizeloc, sizerem;
-
-	cycx_peek(&card->hw, cmd->buf, &lcn, sizeof(lcn));
-	cycx_peek(&card->hw, cmd->buf + 5, &sizeloc, sizeof(sizeloc));
-	cycx_peek(&card->hw, cmd->buf + 6, d, cmd->len - 6);
-
-	sizerem = sizeloc >> 4;
-	sizeloc &= 0x0F;
-
-	loc[0] = rem[0] = '\0';
-
-	if (sizeloc)
-		nibble_to_byte(d, loc, sizeloc, 0);
-
-	if (sizerem)
-		nibble_to_byte(d + (sizeloc >> 1), rem, sizerem, sizeloc & 1);
-
-	dprintk(1, KERN_INFO "%s:lcn=%d, local=%s, remote=%s\n",
-			  __func__, lcn, loc, rem);
-
-	dev = cycx_x25_get_dev_by_dte_addr(wandev, rem);
-	if (!dev) {
-		/* Invalid channel, discard packet */
-		pr_info("%s: connect not expected: remote %s!\n",
-			card->devname, rem);
-		return;
-	}
-
-	chan = netdev_priv(dev);
-	chan->lcn = lcn;
-	cycx_x25_connect_response(card, chan);
-	cycx_x25_set_chan_state(dev, WAN_CONNECTED);
-}
-
-/* Connect confirm interrupt handler. */
-static void cycx_x25_irq_connect_confirm(struct cycx_device *card,
-					 struct cycx_x25_cmd *cmd)
-{
-	struct wan_device *wandev = &card->wandev;
-	struct net_device *dev;
-	struct cycx_x25_channel *chan;
-	u8 lcn, key;
-
-	cycx_peek(&card->hw, cmd->buf, &lcn, sizeof(lcn));
-	cycx_peek(&card->hw, cmd->buf + 1, &key, sizeof(key));
-	dprintk(1, KERN_INFO "%s: %s:lcn=%d, key=%d\n",
-			  card->devname, __func__, lcn, key);
-
-	dev = cycx_x25_get_dev_by_lcn(wandev, -key);
-	if (!dev) {
-		/* Invalid channel, discard packet */
-		clear_bit(--key, (void*)&card->u.x.connection_keys);
-		pr_info("%s: connect confirm not expected: lcn %d, key=%d!\n",
-			card->devname, lcn, key);
-		return;
-	}
-
-	clear_bit(--key, (void*)&card->u.x.connection_keys);
-	chan = netdev_priv(dev);
-	chan->lcn = lcn;
-	cycx_x25_set_chan_state(dev, WAN_CONNECTED);
-}
-
-/* Disconnect confirm interrupt handler. */
-static void cycx_x25_irq_disconnect_confirm(struct cycx_device *card,
-					    struct cycx_x25_cmd *cmd)
-{
-	struct wan_device *wandev = &card->wandev;
-	struct net_device *dev;
-	u8 lcn;
-
-	cycx_peek(&card->hw, cmd->buf, &lcn, sizeof(lcn));
-	dprintk(1, KERN_INFO "%s: %s:lcn=%d\n",
-			  card->devname, __func__, lcn);
-	dev = cycx_x25_get_dev_by_lcn(wandev, lcn);
-	if (!dev) {
-		/* Invalid channel, discard packet */
-		pr_info("%s:disconnect confirm not expected!:lcn %d\n",
-			card->devname, lcn);
-		return;
-	}
-
-	cycx_x25_set_chan_state(dev, WAN_DISCONNECTED);
-}
-
-/* disconnect interrupt handler. */
-static void cycx_x25_irq_disconnect(struct cycx_device *card,
-				    struct cycx_x25_cmd *cmd)
-{
-	struct wan_device *wandev = &card->wandev;
-	struct net_device *dev;
-	u8 lcn;
-
-	cycx_peek(&card->hw, cmd->buf, &lcn, sizeof(lcn));
-	dprintk(1, KERN_INFO "%s:lcn=%d\n", __func__, lcn);
-
-	dev = cycx_x25_get_dev_by_lcn(wandev, lcn);
-	if (dev) {
-		struct cycx_x25_channel *chan = netdev_priv(dev);
-
-		cycx_x25_disconnect_response(card, chan->link, lcn);
-		cycx_x25_set_chan_state(dev, WAN_DISCONNECTED);
-	} else
-		cycx_x25_disconnect_response(card, 0, lcn);
-}
-
-/* LOG interrupt handler. */
-static void cycx_x25_irq_log(struct cycx_device *card, struct cycx_x25_cmd *cmd)
-{
-#if CYCLOMX_X25_DEBUG
-	char bf[20];
-	u16 size, toread, link, msg_code;
-	u8 code, routine;
-
-	cycx_peek(&card->hw, cmd->buf, &msg_code, sizeof(msg_code));
-	cycx_peek(&card->hw, cmd->buf + 2, &link, sizeof(link));
-	cycx_peek(&card->hw, cmd->buf + 4, &size, sizeof(size));
-	/* at most 20 bytes are available... thanks to Daniela :) */
-	toread = size < 20 ? size : 20;
-	cycx_peek(&card->hw, cmd->buf + 10, &bf, toread);
-	cycx_peek(&card->hw, cmd->buf + 10 + toread, &code, 1);
-	cycx_peek(&card->hw, cmd->buf + 10 + toread + 1, &routine, 1);
-
-	pr_info("cycx_x25_irq_handler: X25_LOG (0x4500) indic.:\n");
-	pr_info("cmd->buf=0x%X\n", cmd->buf);
-	pr_info("Log message code=0x%X\n", msg_code);
-	pr_info("Link=%d\n", link);
-	pr_info("log code=0x%X\n", code);
-	pr_info("log routine=0x%X\n", routine);
-	pr_info("Message size=%d\n", size);
-	hex_dump("Message", bf, toread);
-#endif
-}
-
-/* STATISTIC interrupt handler. */
-static void cycx_x25_irq_stat(struct cycx_device *card,
-			      struct cycx_x25_cmd *cmd)
-{
-	cycx_peek(&card->hw, cmd->buf, &card->u.x.stats,
-		  sizeof(card->u.x.stats));
-	hex_dump("cycx_x25_irq_stat", (unsigned char*)&card->u.x.stats,
-		 sizeof(card->u.x.stats));
-	cycx_x25_dump_stats(&card->u.x.stats);
-	wake_up_interruptible(&card->wait_stats);
-}
-
-/* Spurious interrupt handler.
- * o print a warning
- * If number of spurious interrupts exceeded some limit, then ??? */
-static void cycx_x25_irq_spurious(struct cycx_device *card,
-				  struct cycx_x25_cmd *cmd)
-{
-	pr_info("%s: spurious interrupt (0x%X)!\n",
-		card->devname, cmd->command);
-}
-#ifdef CYCLOMX_X25_DEBUG
-static void hex_dump(char *msg, unsigned char *p, int len)
-{
-	print_hex_dump(KERN_INFO, msg, DUMP_PREFIX_OFFSET, 16, 1,
-		       p, len, true);
-}
-#endif
-
-/* Cyclom 2X Firmware-Specific Functions */
-/* Exec X.25 command. */
-static int x25_exec(struct cycx_device *card, int command, int link,
-		    void *d1, int len1, void *d2, int len2)
-{
-	struct cycx_x25_cmd c;
-	unsigned long flags;
-	u32 addr = 0x1200 + 0x2E0 * link + 0x1E2;
-	u8 retry = CYCX_X25_MAX_CMD_RETRY;
-	int err = 0;
-
-	c.command = command;
-	c.link = link;
-	c.len = len1 + len2;
-
-	spin_lock_irqsave(&card->u.x.lock, flags);
-
-	/* write command */
-	cycx_poke(&card->hw, X25_MBOX_OFFS, &c, sizeof(c) - sizeof(c.buf));
-
-	/* write X.25 data */
-	if (d1) {
-		cycx_poke(&card->hw, addr, d1, len1);
-
-		if (d2) {
-			if (len2 > 254) {
-				u32 addr1 = 0xA00 + 0x400 * link;
-
-				cycx_poke(&card->hw, addr + len1, d2, 249);
-				cycx_poke(&card->hw, addr1, ((u8*)d2) + 249,
-					  len2 - 249);
-			} else
-				cycx_poke(&card->hw, addr + len1, d2, len2);
-		}
-	}
-
-	/* generate interruption, executing command */
-	cycx_intr(&card->hw);
-
-	/* wait till card->mbox == 0 */
-	do {
-		err = cycx_exec(card->mbox);
-	} while (retry-- && err);
-
-	spin_unlock_irqrestore(&card->u.x.lock, flags);
-
-	return err;
-}
-
-/* Configure adapter. */
-static int cycx_x25_configure(struct cycx_device *card,
-			      struct cycx_x25_config *conf)
-{
-	struct {
-		u16 nlinks;
-		struct cycx_x25_config conf[2];
-	} x25_cmd_conf;
-
-	memset(&x25_cmd_conf, 0, sizeof(x25_cmd_conf));
-	x25_cmd_conf.nlinks = 2;
-	x25_cmd_conf.conf[0] = *conf;
-	/* FIXME: we need to find a way in the wanrouter framework
-		  to configure the second link, for now lets use it
-		  with the same config from the first link, fixing
-		  the interface type to RS232, the speed in 38400 and
-		  the clock to external */
-	x25_cmd_conf.conf[1] = *conf;
-	x25_cmd_conf.conf[1].link = 1;
-	x25_cmd_conf.conf[1].speed = 5; /* 38400 */
-	x25_cmd_conf.conf[1].clock = 8;
-	x25_cmd_conf.conf[1].flags = 0; /* default = RS232 */
-
-	cycx_x25_dump_config(&x25_cmd_conf.conf[0]);
-	cycx_x25_dump_config(&x25_cmd_conf.conf[1]);
-
-	return x25_exec(card, X25_CONFIG, 0,
-			&x25_cmd_conf, sizeof(x25_cmd_conf), NULL, 0);
-}
-
-/* Get protocol statistics. */
-static int cycx_x25_get_stats(struct cycx_device *card)
-{
-	/* the firmware expects 20 in the size field!!!
-	   thanks to Daniela */
-	int err = x25_exec(card, X25_STATISTIC, 0, NULL, 20, NULL, 0);
-
-	if (err)
-		return err;
-
-	interruptible_sleep_on(&card->wait_stats);
-
-	if (signal_pending(current))
-		return -EINTR;
-
-	card->wandev.stats.rx_packets = card->u.x.stats.n2_rx_frames;
-	card->wandev.stats.rx_over_errors = card->u.x.stats.rx_over_errors;
-	card->wandev.stats.rx_crc_errors = card->u.x.stats.rx_crc_errors;
-	card->wandev.stats.rx_length_errors = 0; /* not available from fw */
-	card->wandev.stats.rx_frame_errors = 0; /* not available from fw */
-	card->wandev.stats.rx_missed_errors = card->u.x.stats.rx_aborts;
-	card->wandev.stats.rx_dropped = 0; /* not available from fw */
-	card->wandev.stats.rx_errors = 0; /* not available from fw */
-	card->wandev.stats.tx_packets = card->u.x.stats.n2_tx_frames;
-	card->wandev.stats.tx_aborted_errors = card->u.x.stats.tx_aborts;
-	card->wandev.stats.tx_dropped = 0; /* not available from fw */
-	card->wandev.stats.collisions = 0; /* not available from fw */
-	card->wandev.stats.tx_errors = 0; /* not available from fw */
-
-	cycx_x25_dump_devs(&card->wandev);
-
-	return 0;
-}
-
-/* return the number of nibbles */
-static int byte_to_nibble(u8 *s, u8 *d, char *nibble)
-{
-	int i = 0;
-
-	if (*nibble && *s) {
-		d[i] |= *s++ - '0';
-		*nibble = 0;
-		++i;
-	}
-
-	while (*s) {
-		d[i] = (*s - '0') << 4;
-		if (*(s + 1))
-			d[i] |= *(s + 1) - '0';
-		else {
-			*nibble = 1;
-			break;
-		}
-		++i;
-		s += 2;
-	}
-
-	return i;
-}
-
-static void nibble_to_byte(u8 *s, u8 *d, u8 len, u8 nibble)
-{
-	if (nibble) {
-		*d++ = '0' + (*s++ & 0x0F);
-		--len;
-	}
-
-	while (len) {
-		*d++ = '0' + (*s >> 4);
-
-		if (--len) {
-			*d++ = '0' + (*s & 0x0F);
-			--len;
-		} else break;
-
-		++s;
-	}
-
-	*d = '\0';
-}
-
-/* Place X.25 call. */
-static int x25_place_call(struct cycx_device *card,
-			  struct cycx_x25_channel *chan)
-{
-	int err = 0,
-	    len;
-	char d[64],
-	     nibble = 0,
-	     mylen = chan->local_addr ? strlen(chan->local_addr) : 0,
-	     remotelen = strlen(chan->addr);
-	u8 key;
-
-	if (card->u.x.connection_keys == ~0U) {
-		pr_info("%s: too many simultaneous connection requests!\n",
-			card->devname);
-		return -EAGAIN;
-	}
-
-	key = ffz(card->u.x.connection_keys);
-	set_bit(key, (void*)&card->u.x.connection_keys);
-	++key;
-	dprintk(1, KERN_INFO "%s:x25_place_call:key=%d\n", card->devname, key);
-	memset(d, 0, sizeof(d));
-	d[1] = key; /* user key */
-	d[2] = 0x10;
-	d[4] = 0x0B;
-
-	len = byte_to_nibble(chan->addr, d + 6, &nibble);
-
-	if (chan->local_addr)
-		len += byte_to_nibble(chan->local_addr, d + 6 + len, &nibble);
-
-	if (nibble)
-		++len;
-
-	d[5] = mylen << 4 | remotelen;
-	d[6 + len + 1] = 0xCC; /* TCP/IP over X.25, thanks to Daniela :) */
-
-	if ((err = x25_exec(card, X25_CONNECT_REQUEST, chan->link,
-			    &d, 7 + len + 1, NULL, 0)) != 0)
-		clear_bit(--key, (void*)&card->u.x.connection_keys);
-	else
-		chan->lcn = -key;
-
-	return err;
-}
-
-/* Place X.25 CONNECT RESPONSE. */
-static int cycx_x25_connect_response(struct cycx_device *card,
-				     struct cycx_x25_channel *chan)
-{
-	u8 d[8];
-
-	memset(d, 0, sizeof(d));
-	d[0] = d[3] = chan->lcn;
-	d[2] = 0x10;
-	d[4] = 0x0F;
-	d[7] = 0xCC; /* TCP/IP over X.25, thanks Daniela */
-
-	return x25_exec(card, X25_CONNECT_RESPONSE, chan->link, &d, 8, NULL, 0);
-}
-
-/* Place X.25 DISCONNECT RESPONSE.  */
-static int cycx_x25_disconnect_response(struct cycx_device *card, u8 link,
-					u8 lcn)
-{
-	char d[5];
-
-	memset(d, 0, sizeof(d));
-	d[0] = d[3] = lcn;
-	d[2] = 0x10;
-	d[4] = 0x17;
-
-	return x25_exec(card, X25_DISCONNECT_RESPONSE, link, &d, 5, NULL, 0);
-}
-
-/* Clear X.25 call.  */
-static int x25_clear_call(struct cycx_device *card, u8 link, u8 lcn, u8 cause,
-			  u8 diagn)
-{
-	u8 d[7];
-
-	memset(d, 0, sizeof(d));
-	d[0] = d[3] = lcn;
-	d[2] = 0x10;
-	d[4] = 0x13;
-	d[5] = cause;
-	d[6] = diagn;
-
-	return x25_exec(card, X25_DISCONNECT_REQUEST, link, d, 7, NULL, 0);
-}
-
-/* Send X.25 data packet. */
-static int cycx_x25_send(struct cycx_device *card, u8 link, u8 lcn, u8 bitm,
-			 int len, void *buf)
-{
-	u8 d[] = "?\xFF\x10??";
-
-	d[0] = d[3] = lcn;
-	d[4] = bitm;
-
-	return x25_exec(card, X25_DATA_REQUEST, link, &d, 5, buf, len);
-}
-
-/* Miscellaneous */
-/* Find network device by its channel number.  */
-static struct net_device *cycx_x25_get_dev_by_lcn(struct wan_device *wandev,
-						  s16 lcn)
-{
-	struct net_device *dev = wandev->dev;
-	struct cycx_x25_channel *chan;
-
-	while (dev) {
-		chan = netdev_priv(dev);
-
-		if (chan->lcn == lcn)
-			break;
-		dev = chan->slave;
-	}
-	return dev;
-}
-
-/* Find network device by its remote dte address. */
-static struct net_device *
-	cycx_x25_get_dev_by_dte_addr(struct wan_device *wandev, char *dte)
-{
-	struct net_device *dev = wandev->dev;
-	struct cycx_x25_channel *chan;
-
-	while (dev) {
-		chan = netdev_priv(dev);
-
-		if (!strcmp(chan->addr, dte))
-			break;
-		dev = chan->slave;
-	}
-	return dev;
-}
-
-/* Initiate connection on the logical channel.
- * o for PVC we just get channel configuration
- * o for SVCs place an X.25 call
- *
- * Return:	0	connected
- *		>0	connection in progress
- *		<0	failure */
-static int cycx_x25_chan_connect(struct net_device *dev)
-{
-	struct cycx_x25_channel *chan = netdev_priv(dev);
-	struct cycx_device *card = chan->card;
-
-	if (chan->svc) {
-		if (!chan->addr[0])
-			return -EINVAL; /* no destination address */
-
-		dprintk(1, KERN_INFO "%s: placing X.25 call to %s...\n",
-				  card->devname, chan->addr);
-
-		if (x25_place_call(card, chan))
-			return -EIO;
-
-		cycx_x25_set_chan_state(dev, WAN_CONNECTING);
-		return 1;
-	} else
-		cycx_x25_set_chan_state(dev, WAN_CONNECTED);
-
-	return 0;
-}
-
-/* Disconnect logical channel.
- * o if SVC then clear X.25 call */
-static void cycx_x25_chan_disconnect(struct net_device *dev)
-{
-	struct cycx_x25_channel *chan = netdev_priv(dev);
-
-	if (chan->svc) {
-		x25_clear_call(chan->card, chan->link, chan->lcn, 0, 0);
-		cycx_x25_set_chan_state(dev, WAN_DISCONNECTING);
-	} else
-		cycx_x25_set_chan_state(dev, WAN_DISCONNECTED);
-}
-
-/* Called by kernel timer */
-static void cycx_x25_chan_timer(unsigned long d)
-{
-	struct net_device *dev = (struct net_device *)d;
-	struct cycx_x25_channel *chan = netdev_priv(dev);
-
-	if (chan->state == WAN_CONNECTED)
-		cycx_x25_chan_disconnect(dev);
-	else
-		pr_err("%s: %s for svc (%s) not connected!\n",
-		       chan->card->devname, __func__, dev->name);
-}
-
-/* Set logical channel state. */
-static void cycx_x25_set_chan_state(struct net_device *dev, u8 state)
-{
-	struct cycx_x25_channel *chan = netdev_priv(dev);
-	struct cycx_device *card = chan->card;
-	unsigned long flags;
-	char *string_state = NULL;
-
-	spin_lock_irqsave(&card->lock, flags);
-
-	if (chan->state != state) {
-		if (chan->svc && chan->state == WAN_CONNECTED)
-			del_timer(&chan->timer);
-
-		switch (state) {
-		case WAN_CONNECTED:
-			string_state = "connected!";
-			*(__be16*)dev->dev_addr = htons(chan->lcn);
-			netif_wake_queue(dev);
-			reset_timer(dev);
-
-			if (chan->protocol == ETH_P_X25)
-				cycx_x25_chan_send_event(dev,
-					X25_IFACE_CONNECT);
-
-			break;
-		case WAN_CONNECTING:
-			string_state = "connecting...";
-			break;
-		case WAN_DISCONNECTING:
-			string_state = "disconnecting...";
-			break;
-		case WAN_DISCONNECTED:
-			string_state = "disconnected!";
-
-			if (chan->svc) {
-				*(unsigned short*)dev->dev_addr = 0;
-				chan->lcn = 0;
-			}
-
-			if (chan->protocol == ETH_P_X25)
-				cycx_x25_chan_send_event(dev,
-					X25_IFACE_DISCONNECT);
-
-			netif_wake_queue(dev);
-			break;
-		}
-
-		pr_info("%s: interface %s %s\n",
-			card->devname, dev->name, string_state);
-		chan->state = state;
-	}
-
-	spin_unlock_irqrestore(&card->lock, flags);
-}
-
-/* Send packet on a logical channel.
- *	When this function is called, tx_skb field of the channel data space
- *	points to the transmit socket buffer.  When transmission is complete,
- *	release socket buffer and reset 'tbusy' flag.
- *
- * Return:	0	- transmission complete
- *		1	- busy
- *
- * Notes:
- * 1. If packet length is greater than MTU for this channel, we'll fragment
- *    the packet into 'complete sequence' using M-bit.
- * 2. When transmission is complete, an event notification should be issued
- *    to the router.  */
-static int cycx_x25_chan_send(struct net_device *dev, struct sk_buff *skb)
-{
-	struct cycx_x25_channel *chan = netdev_priv(dev);
-	struct cycx_device *card = chan->card;
-	int bitm = 0;		/* final packet */
-	unsigned len = skb->len;
-
-	if (skb->len > card->wandev.mtu) {
-		len = card->wandev.mtu;
-		bitm = 0x10;		/* set M-bit (more data) */
-	}
-
-	if (cycx_x25_send(card, chan->link, chan->lcn, bitm, len, skb->data))
-		return 1;
-
-	if (bitm) {
-		skb_pull(skb, len);
-		return 1;
-	}
-
-	++chan->ifstats.tx_packets;
-	chan->ifstats.tx_bytes += len;
-
-	return 0;
-}
-
-/* Send event (connection, disconnection, etc) to X.25 socket layer */
-
-static void cycx_x25_chan_send_event(struct net_device *dev, u8 event)
-{
-	struct sk_buff *skb;
-	unsigned char *ptr;
-
-	if ((skb = dev_alloc_skb(1)) == NULL) {
-		pr_err("%s: out of memory\n", __func__);
-		return;
-	}
-
-	ptr  = skb_put(skb, 1);
-	*ptr = event;
-
-	skb->protocol = x25_type_trans(skb, dev);
-	netif_rx(skb);
-}
-
-/* Convert line speed in bps to a number used by cyclom 2x code. */
-static u8 bps_to_speed_code(u32 bps)
-{
-	u8 number = 0; /* defaults to the lowest (1200) speed ;> */
-
-	     if (bps >= 512000) number = 8;
-	else if (bps >= 256000) number = 7;
-	else if (bps >= 64000)  number = 6;
-	else if (bps >= 38400)  number = 5;
-	else if (bps >= 19200)  number = 4;
-	else if (bps >= 9600)   number = 3;
-	else if (bps >= 4800)   number = 2;
-	else if (bps >= 2400)   number = 1;
-
-	return number;
-}
-
-/* log base 2 */
-static u8 cycx_log2(u32 n)
-{
-	u8 log = 0;
-
-	if (!n)
-		return 0;
-
-	while (n > 1) {
-		n >>= 1;
-		++log;
-	}
-
-	return log;
-}
-
-/* Convert decimal string to unsigned integer.
- * If len != 0 then only 'len' characters of the string are converted. */
-static unsigned dec_to_uint(u8 *str, int len)
-{
-	unsigned val = 0;
-
-	if (!len)
-		len = strlen(str);
-
-	for (; len && isdigit(*str); ++str, --len)
-		val = (val * 10) + (*str - (unsigned) '0');
-
-	return val;
-}
-
-static void reset_timer(struct net_device *dev)
-{
-	struct cycx_x25_channel *chan = netdev_priv(dev);
-
-	if (chan->svc)
-		mod_timer(&chan->timer, jiffies+chan->idle_tmout*HZ);
-}
-#ifdef CYCLOMX_X25_DEBUG
-static void cycx_x25_dump_config(struct cycx_x25_config *conf)
-{
-	pr_info("X.25 configuration\n");
-	pr_info("-----------------\n");
-	pr_info("link number=%d\n", conf->link);
-	pr_info("line speed=%d\n", conf->speed);
-	pr_info("clock=%sternal\n", conf->clock == 8 ? "Ex" : "In");
-	pr_info("# level 2 retransm.=%d\n", conf->n2);
-	pr_info("level 2 window=%d\n", conf->n2win);
-	pr_info("level 3 window=%d\n", conf->n3win);
-	pr_info("# logical channels=%d\n", conf->nvc);
-	pr_info("level 3 pkt len=%d\n", conf->pktlen);
-	pr_info("my address=%d\n", conf->locaddr);
-	pr_info("remote address=%d\n", conf->remaddr);
-	pr_info("t1=%d seconds\n", conf->t1);
-	pr_info("t2=%d seconds\n", conf->t2);
-	pr_info("t21=%d seconds\n", conf->t21);
-	pr_info("# PVCs=%d\n", conf->npvc);
-	pr_info("t23=%d seconds\n", conf->t23);
-	pr_info("flags=0x%x\n", conf->flags);
-}
-
-static void cycx_x25_dump_stats(struct cycx_x25_stats *stats)
-{
-	pr_info("X.25 statistics\n");
-	pr_info("--------------\n");
-	pr_info("rx_crc_errors=%d\n", stats->rx_crc_errors);
-	pr_info("rx_over_errors=%d\n", stats->rx_over_errors);
-	pr_info("n2_tx_frames=%d\n", stats->n2_tx_frames);
-	pr_info("n2_rx_frames=%d\n", stats->n2_rx_frames);
-	pr_info("tx_timeouts=%d\n", stats->tx_timeouts);
-	pr_info("rx_timeouts=%d\n", stats->rx_timeouts);
-	pr_info("n3_tx_packets=%d\n", stats->n3_tx_packets);
-	pr_info("n3_rx_packets=%d\n", stats->n3_rx_packets);
-	pr_info("tx_aborts=%d\n", stats->tx_aborts);
-	pr_info("rx_aborts=%d\n", stats->rx_aborts);
-}
-
-static void cycx_x25_dump_devs(struct wan_device *wandev)
-{
-	struct net_device *dev = wandev->dev;
-
-	pr_info("X.25 dev states\n");
-	pr_info("name: addr:           txoff:  protocol:\n");
-	pr_info("---------------------------------------\n");
-
-	while(dev) {
-		struct cycx_x25_channel *chan = netdev_priv(dev);
-
-		pr_info("%-5.5s %-15.15s   %d     ETH_P_%s\n",
-			chan->name, chan->addr, netif_queue_stopped(dev),
-			chan->protocol == ETH_P_IP ? "IP" : "X25");
-		dev = chan->slave;
-	}
-}
-
-#endif /* CYCLOMX_X25_DEBUG */
-/* End */
diff --git a/include/linux/cyclomx.h b/include/linux/cyclomx.h
deleted file mode 100644
index b88f7f428e58..000000000000
--- a/include/linux/cyclomx.h
+++ /dev/null
@@ -1,77 +0,0 @@
-#ifndef	_CYCLOMX_H
-#define	_CYCLOMX_H
-/*
-* cyclomx.h	Cyclom 2X WAN Link Driver.
-*		User-level API definitions.
-*
-* Author:	Arnaldo Carvalho de Melo <acme@conectiva.com.br>
-*
-* Copyright:	(c) 1998-2003 Arnaldo Carvalho de Melo
-*
-* Based on wanpipe.h by Gene Kozin <genek@compuserve.com>
-*
-*		This program is free software; you can redistribute it and/or
-*		modify it under the terms of the GNU General Public License
-*		as published by the Free Software Foundation; either version
-*		2 of the License, or (at your option) any later version.
-* ============================================================================
-* 2000/07/13    acme		remove crap #if KERNEL_VERSION > blah
-* 2000/01/21    acme            rename cyclomx_open to cyclomx_mod_inc_use_count
-*                               and cyclomx_close to cyclomx_mod_dec_use_count
-* 1999/05/19	acme		wait_queue_head_t wait_stats(support for 2.3.*)
-* 1999/01/03	acme		judicious use of data types
-* 1998/12/27	acme		cleanup: PACKED not needed
-* 1998/08/08	acme		Version 0.0.1
-*/
-
-#include <linux/wanrouter.h>
-#include <linux/spinlock.h>
-
-#ifdef	__KERNEL__
-/* Kernel Interface */
-
-#include <linux/cycx_drv.h>	/* Cyclom 2X support module API definitions */
-#include <linux/cycx_cfm.h>	/* Cyclom 2X firmware module definitions */
-#ifdef CONFIG_CYCLOMX_X25
-#include <linux/cycx_x25.h>
-#endif
-
-/* Adapter Data Space.
- * This structure is needed because we handle multiple cards, otherwise
- * static data would do it.
- */
-struct cycx_device {
-	char devname[WAN_DRVNAME_SZ + 1];/* card name */
-	struct cycx_hw hw;		/* hardware configuration */
-	struct wan_device wandev;	/* WAN device data space */
-	u32 state_tick;			/* link state timestamp */
-	spinlock_t lock;
-	char in_isr;			/* interrupt-in-service flag */
-	char buff_int_mode_unbusy;      /* flag for carrying out dev_tint */
-	wait_queue_head_t wait_stats;  /* to wait for the STATS indication */
-	void __iomem *mbox;			/* -> mailbox */
-	void (*isr)(struct cycx_device* card);	/* interrupt service routine */
-	int (*exec)(struct cycx_device* card, void* u_cmd, void* u_data);
-	union {
-#ifdef CONFIG_CYCLOMX_X25
-		struct { /* X.25 specific data */
-			u32 lo_pvc;
-			u32 hi_pvc;
-			u32 lo_svc;
-			u32 hi_svc;
-			struct cycx_x25_stats stats;
-			spinlock_t lock;
-			u32 connection_keys;
-		} x;
-#endif
-	} u;
-};
-
-/* Public Functions */
-void cycx_set_state(struct cycx_device *card, int state);
-
-#ifdef CONFIG_CYCLOMX_X25
-int cycx_x25_wan_init(struct cycx_device *card, wandev_conf_t *conf);
-#endif
-#endif	/* __KERNEL__ */
-#endif	/* _CYCLOMX_H */
diff --git a/include/linux/cycx_drv.h b/include/linux/cycx_drv.h
deleted file mode 100644
index 12fe6b0bfcff..000000000000
--- a/include/linux/cycx_drv.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
-* cycx_drv.h	CYCX Support Module.  Kernel API Definitions.
-*
-* Author:	Arnaldo Carvalho de Melo <acme@conectiva.com.br>
-*
-* Copyright:	(c) 1998-2003 Arnaldo Carvalho de Melo
-*
-* Based on sdladrv.h by Gene Kozin <genek@compuserve.com>
-*
-*		This program is free software; you can redistribute it and/or
-*		modify it under the terms of the GNU General Public License
-*		as published by the Free Software Foundation; either version
-*		2 of the License, or (at your option) any later version.
-* ============================================================================
-* 1999/10/23	acme		cycxhw_t cleanup
-* 1999/01/03	acme		more judicious use of data types...
-*				uclong, ucchar, etc deleted, the u8, u16, u32
-*				types are the portable way to go.
-* 1999/01/03	acme		judicious use of data types... u16, u32, etc
-* 1998/12/26	acme	 	FIXED_BUFFERS, CONF_OFFSET,
-*                               removal of cy_read{bwl}
-* 1998/08/08	acme	 	Initial version.
-*/
-#ifndef	_CYCX_DRV_H
-#define	_CYCX_DRV_H
-
-#define	CYCX_WINDOWSIZE	0x4000	/* default dual-port memory window size */
-#define	GEN_CYCX_INTR	0x02
-#define	RST_ENABLE	0x04
-#define	START_CPU	0x06
-#define	RST_DISABLE	0x08
-#define	FIXED_BUFFERS	0x08
-#define	TEST_PATTERN	0xaa55
-#define	CMD_OFFSET	0x20
-#define CONF_OFFSET     0x0380
-#define	RESET_OFFSET	0x3c00	/* For reset file load */
-#define	DATA_OFFSET	0x0100	/* For code and data files load */
-#define	START_OFFSET	0x3ff0	/* 80186 starts here */
-
-/**
- *	struct cycx_hw - Adapter hardware configuration
- *	@fwid - firmware ID
- *	@irq - interrupt request level
- *	@dpmbase - dual-port memory base
- *	@dpmsize - dual-port memory size
- *	@reserved - reserved for future use
- */
-struct cycx_hw {
-	u32 fwid;
-	int irq;
-	void __iomem *dpmbase;
-	u32 dpmsize;
-	u32 reserved[5];
-};
-
-/* Function Prototypes */
-extern int cycx_setup(struct cycx_hw *hw, void *sfm, u32 len, unsigned long base);
-extern int cycx_down(struct cycx_hw *hw);
-extern int cycx_peek(struct cycx_hw *hw, u32 addr, void *buf, u32 len);
-extern int cycx_poke(struct cycx_hw *hw, u32 addr, void *buf, u32 len);
-extern int cycx_exec(void __iomem *addr);
-
-extern void cycx_intr(struct cycx_hw *hw);
-#endif	/* _CYCX_DRV_H */
diff --git a/include/linux/wanrouter.h b/include/linux/wanrouter.h
index cec4b4159767..8198a63cf459 100644
--- a/include/linux/wanrouter.h
+++ b/include/linux/wanrouter.h
@@ -1,129 +1,10 @@
-/*****************************************************************************
-* wanrouter.h	Definitions for the WAN Multiprotocol Router Module.
-*		This module provides API and common services for WAN Link
-*		Drivers and is completely hardware-independent.
-*
-* Author: 	Nenad Corbic <ncorbic@sangoma.com>
-*		Gideon Hack 	
-* Additions:	Arnaldo Melo
-*
-* Copyright:	(c) 1995-2000 Sangoma Technologies Inc.
-*
-*		This program is free software; you can redistribute it and/or
-*		modify it under the terms of the GNU General Public License
-*		as published by the Free Software Foundation; either version
-*		2 of the License, or (at your option) any later version.
-* ============================================================================
-* Jul 21, 2000  Nenad Corbic	Added WAN_FT1_READY State
-* Feb 24, 2000  Nenad Corbic    Added support for socket based x25api
-* Jan 28, 2000  Nenad Corbic    Added support for the ASYNC protocol.
-* Oct 04, 1999  Nenad Corbic 	Updated for 2.1.0 release
-* Jun 02, 1999  Gideon Hack	Added support for the S514 adapter.
-* May 23, 1999	Arnaldo Melo	Added local_addr to wanif_conf_t
-*				WAN_DISCONNECTING state added
-* Jul 20, 1998	David Fong	Added Inverse ARP options to 'wanif_conf_t'
-* Jun 12, 1998	David Fong	Added Cisco HDLC support.
-* Dec 16, 1997	Jaspreet Singh	Moved 'enable_IPX' and 'network_number' to
-*				'wanif_conf_t'
-* Dec 05, 1997	Jaspreet Singh	Added 'pap', 'chap' to 'wanif_conf_t'
-*				Added 'authenticator' to 'wan_ppp_conf_t'
-* Nov 06, 1997	Jaspreet Singh	Changed Router Driver version to 1.1 from 1.0
-* Oct 20, 1997	Jaspreet Singh	Added 'cir','bc','be' and 'mc' to 'wanif_conf_t'
-*				Added 'enable_IPX' and 'network_number' to 
-*				'wan_device_t'.  Also added defines for
-*				UDP PACKET TYPE, Interrupt test, critical values
-*				for RACE conditions.
-* Oct 05, 1997	Jaspreet Singh	Added 'dlci_num' and 'dlci[100]' to 
-*				'wan_fr_conf_t' to configure a list of dlci(s)
-*				for a NODE 
-* Jul 07, 1997	Jaspreet Singh	Added 'ttl' to 'wandev_conf_t' & 'wan_device_t'
-* May 29, 1997 	Jaspreet Singh	Added 'tx_int_enabled' to 'wan_device_t'
-* May 21, 1997	Jaspreet Singh	Added 'udp_port' to 'wan_device_t'
-* Apr 25, 1997  Farhan Thawar   Added 'udp_port' to 'wandev_conf_t'
-* Jan 16, 1997	Gene Kozin	router_devlist made public
-* Jan 02, 1997	Gene Kozin	Initial version (based on wanpipe.h).
-*****************************************************************************/
+/*
+ * wanrouter.h	Legacy declarations kept around until X25 is removed
+ */
+
 #ifndef	_ROUTER_H
 #define	_ROUTER_H
 
 #include <uapi/linux/wanrouter.h>
 
-/****** Kernel Interface ****************************************************/
-
-#include <linux/fs.h>		/* support for device drivers */
-#include <linux/proc_fs.h>	/* proc filesystem pragmatics */
-#include <linux/netdevice.h>	/* support for network drivers */
-#include <linux/spinlock.h>     /* Support for SMP Locking */
-
-/*----------------------------------------------------------------------------
- * WAN device data space.
- */
-struct wan_device {
-	unsigned magic;			/* magic number */
-	char* name;			/* -> WAN device name (ASCIIZ) */
-	void* private;			/* -> driver private data */
-	unsigned config_id;		/* Configuration ID */
-					/****** hardware configuration ******/
-	unsigned ioport;		/* adapter I/O port base #1 */
-	char S514_cpu_no[1];		/* PCI CPU Number */
-	unsigned char S514_slot_no;	/* PCI Slot Number */
-	unsigned long maddr;		/* dual-port memory address */
-	unsigned msize;			/* dual-port memory size */
-	int irq;			/* interrupt request level */
-	int dma;			/* DMA request level */
-	unsigned bps;			/* data transfer rate */
-	unsigned mtu;			/* max physical transmit unit size */
-	unsigned udp_port;              /* UDP port for management */
-        unsigned char ttl;		/* Time To Live for UDP security */
-	unsigned enable_tx_int; 	/* Transmit Interrupt enabled or not */
-	char interface;			/* RS-232/V.35, etc. */
-	char clocking;			/* external/internal */
-	char line_coding;		/* NRZ/NRZI/FM0/FM1, etc. */
-	char station;			/* DTE/DCE, primary/secondary, etc. */
-	char connection;		/* permanent/switched/on-demand */
-	char signalling;		/* Signalling RS232 or V35 */
-	char read_mode;			/* read mode: Polling or interrupt */
-	char new_if_cnt;                /* Number of interfaces per wanpipe */ 
-	char del_if_cnt;		/* Number of times del_if() gets called */
-	unsigned char piggyback;        /* Piggibacking a port */
-	unsigned hw_opt[4];		/* other hardware options */
-					/****** status and statistics *******/
-	char state;			/* device state */
-	char api_status;		/* device api status */
-	struct net_device_stats stats; 	/* interface statistics */
-	unsigned reserved[16];		/* reserved for future use */
-	unsigned long critical;		/* critical section flag */
-	spinlock_t lock;                /* Support for SMP Locking */
-
-					/****** device management methods ***/
-	int (*setup) (struct wan_device *wandev, wandev_conf_t *conf);
-	int (*shutdown) (struct wan_device *wandev);
-	int (*update) (struct wan_device *wandev);
-	int (*ioctl) (struct wan_device *wandev, unsigned cmd,
-		unsigned long arg);
-	int (*new_if)(struct wan_device *wandev, struct net_device *dev,
-		      wanif_conf_t *conf);
-	int (*del_if)(struct wan_device *wandev, struct net_device *dev);
-					/****** maintained by the router ****/
-	struct wan_device* next;	/* -> next device */
-	struct net_device* dev;		/* list of network interfaces */
-	unsigned ndev;			/* number of interfaces */
-	struct proc_dir_entry *dent;	/* proc filesystem entry */
-};
-
-/* Public functions available for device drivers */
-extern int register_wan_device(struct wan_device *wandev);
-extern int unregister_wan_device(char *name);
-
-/* Proc interface functions. These must not be called by the drivers! */
-extern int wanrouter_proc_init(void);
-extern void wanrouter_proc_cleanup(void);
-extern int wanrouter_proc_add(struct wan_device *wandev);
-extern int wanrouter_proc_delete(struct wan_device *wandev);
-extern long wanrouter_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
-
-/* Public Data */
-/* list of registered devices */
-extern struct wan_device *wanrouter_router_devlist;
-
 #endif	/* _ROUTER_H */
diff --git a/include/uapi/linux/wanrouter.h b/include/uapi/linux/wanrouter.h
index 7617df2833d5..498d6c12c666 100644
--- a/include/uapi/linux/wanrouter.h
+++ b/include/uapi/linux/wanrouter.h
@@ -1,363 +1,9 @@
-/*****************************************************************************
-* wanrouter.h	Definitions for the WAN Multiprotocol Router Module.
-*		This module provides API and common services for WAN Link
-*		Drivers and is completely hardware-independent.
-*
-* Author: 	Nenad Corbic <ncorbic@sangoma.com>
-*		Gideon Hack 	
-* Additions:	Arnaldo Melo
-*
-* Copyright:	(c) 1995-2000 Sangoma Technologies Inc.
-*
-*		This program is free software; you can redistribute it and/or
-*		modify it under the terms of the GNU General Public License
-*		as published by the Free Software Foundation; either version
-*		2 of the License, or (at your option) any later version.
-* ============================================================================
-* Jul 21, 2000  Nenad Corbic	Added WAN_FT1_READY State
-* Feb 24, 2000  Nenad Corbic    Added support for socket based x25api
-* Jan 28, 2000  Nenad Corbic    Added support for the ASYNC protocol.
-* Oct 04, 1999  Nenad Corbic 	Updated for 2.1.0 release
-* Jun 02, 1999  Gideon Hack	Added support for the S514 adapter.
-* May 23, 1999	Arnaldo Melo	Added local_addr to wanif_conf_t
-*				WAN_DISCONNECTING state added
-* Jul 20, 1998	David Fong	Added Inverse ARP options to 'wanif_conf_t'
-* Jun 12, 1998	David Fong	Added Cisco HDLC support.
-* Dec 16, 1997	Jaspreet Singh	Moved 'enable_IPX' and 'network_number' to
-*				'wanif_conf_t'
-* Dec 05, 1997	Jaspreet Singh	Added 'pap', 'chap' to 'wanif_conf_t'
-*				Added 'authenticator' to 'wan_ppp_conf_t'
-* Nov 06, 1997	Jaspreet Singh	Changed Router Driver version to 1.1 from 1.0
-* Oct 20, 1997	Jaspreet Singh	Added 'cir','bc','be' and 'mc' to 'wanif_conf_t'
-*				Added 'enable_IPX' and 'network_number' to 
-*				'wan_device_t'.  Also added defines for
-*				UDP PACKET TYPE, Interrupt test, critical values
-*				for RACE conditions.
-* Oct 05, 1997	Jaspreet Singh	Added 'dlci_num' and 'dlci[100]' to 
-*				'wan_fr_conf_t' to configure a list of dlci(s)
-*				for a NODE 
-* Jul 07, 1997	Jaspreet Singh	Added 'ttl' to 'wandev_conf_t' & 'wan_device_t'
-* May 29, 1997 	Jaspreet Singh	Added 'tx_int_enabled' to 'wan_device_t'
-* May 21, 1997	Jaspreet Singh	Added 'udp_port' to 'wan_device_t'
-* Apr 25, 1997  Farhan Thawar   Added 'udp_port' to 'wandev_conf_t'
-* Jan 16, 1997	Gene Kozin	router_devlist made public
-* Jan 02, 1997	Gene Kozin	Initial version (based on wanpipe.h).
-*****************************************************************************/
-
-#ifndef _UAPI_ROUTER_H
-#define _UAPI_ROUTER_H
-
-#define	ROUTER_NAME	"wanrouter"	/* in case we ever change it */
-#define	ROUTER_VERSION	1		/* version number */
-#define	ROUTER_RELEASE	1		/* release (minor version) number */
-#define	ROUTER_IOCTL	'W'		/* for IOCTL calls */
-#define	ROUTER_MAGIC	0x524D4157L	/* signature: 'WANR' reversed */
-
-/* IOCTL codes for /proc/router/<device> entries (up to 255) */
-enum router_ioctls
-{
-	ROUTER_SETUP	= ROUTER_IOCTL<<8,	/* configure device */
-	ROUTER_DOWN,				/* shut down device */
-	ROUTER_STAT,				/* get device status */
-	ROUTER_IFNEW,				/* add interface */
-	ROUTER_IFDEL,				/* delete interface */
-	ROUTER_IFSTAT,				/* get interface status */
-	ROUTER_USER	= (ROUTER_IOCTL<<8)+16,	/* driver-specific calls */
-	ROUTER_USER_MAX	= (ROUTER_IOCTL<<8)+31
-};
-
-/* identifiers for displaying proc file data for dual port adapters */
-#define PROC_DATA_PORT_0 0x8000	/* the data is for port 0 */
-#define PROC_DATA_PORT_1 0x8001	/* the data is for port 1 */
-
-/* NLPID for packet encapsulation (ISO/IEC TR 9577) */
-#define	NLPID_IP	0xCC	/* Internet Protocol Datagram */
-#define	NLPID_SNAP	0x80	/* IEEE Subnetwork Access Protocol */
-#define	NLPID_CLNP	0x81	/* ISO/IEC 8473 */
-#define	NLPID_ESIS	0x82	/* ISO/IEC 9542 */
-#define	NLPID_ISIS	0x83	/* ISO/IEC ISIS */
-#define	NLPID_Q933	0x08	/* CCITT Q.933 */
-
-/* Miscellaneous */
-#define	WAN_IFNAME_SZ	15	/* max length of the interface name */
-#define	WAN_DRVNAME_SZ	15	/* max length of the link driver name */
-#define	WAN_ADDRESS_SZ	31	/* max length of the WAN media address */
-#define USED_BY_FIELD	8	/* max length of the used by field */
-
-/* Defines for UDP PACKET TYPE */
-#define UDP_PTPIPE_TYPE 	0x01
-#define UDP_FPIPE_TYPE		0x02
-#define UDP_CPIPE_TYPE		0x03
-#define UDP_DRVSTATS_TYPE 	0x04
-#define UDP_INVALID_TYPE  	0x05
-
-/* Command return code */
-#define CMD_OK		0		/* normal firmware return code */
-#define CMD_TIMEOUT	0xFF		/* firmware command timed out */
-
-/* UDP Packet Management */
-#define UDP_PKT_FRM_STACK	0x00
-#define UDP_PKT_FRM_NETWORK	0x01
-
-/* Maximum interrupt test counter */
-#define MAX_INTR_TEST_COUNTER	100
-
-/* Critical Values for RACE conditions*/
-#define CRITICAL_IN_ISR		0xA1
-#define CRITICAL_INTR_HANDLED	0xB1
-
-/****** Data Types **********************************************************/
-
-/*----------------------------------------------------------------------------
- * X.25-specific link-level configuration.
- */
-typedef struct wan_x25_conf
-{
-	unsigned lo_pvc;	/* lowest permanent circuit number */
-	unsigned hi_pvc;	/* highest permanent circuit number */
-	unsigned lo_svc;	/* lowest switched circuit number */
-	unsigned hi_svc;	/* highest switched circuit number */
-	unsigned hdlc_window;	/* HDLC window size (1..7) */
-	unsigned pkt_window;	/* X.25 packet window size (1..7) */
-	unsigned t1;		/* HDLC timer T1, sec (1..30) */
-	unsigned t2;		/* HDLC timer T2, sec (0..29) */
-	unsigned t4;		/* HDLC supervisory frame timer = T4 * T1 */
-	unsigned n2;		/* HDLC retransmission limit (1..30) */
-	unsigned t10_t20;	/* X.25 RESTART timeout, sec (1..255) */
-	unsigned t11_t21;	/* X.25 CALL timeout, sec (1..255) */
-	unsigned t12_t22;	/* X.25 RESET timeout, sec (1..255) */
-	unsigned t13_t23;	/* X.25 CLEAR timeout, sec (1..255) */
-	unsigned t16_t26;	/* X.25 INTERRUPT timeout, sec (1..255) */
-	unsigned t28;		/* X.25 REGISTRATION timeout, sec (1..255) */
-	unsigned r10_r20;	/* RESTART retransmission limit (0..250) */
-	unsigned r12_r22;	/* RESET retransmission limit (0..250) */
-	unsigned r13_r23;	/* CLEAR retransmission limit (0..250) */
-	unsigned ccitt_compat;	/* compatibility mode: 1988/1984/1980 */
-	unsigned x25_conf_opt;   /* User defined x25 config optoins */
-	unsigned char LAPB_hdlc_only; /* Run in HDLC only mode */
-	unsigned char logging;   /* Control connection logging */  
-	unsigned char oob_on_modem; /* Whether to send modem status to the user app */
-} wan_x25_conf_t;
-
-/*----------------------------------------------------------------------------
- * Frame relay specific link-level configuration.
- */
-typedef struct wan_fr_conf
-{
-	unsigned signalling;	/* local in-channel signalling type */
-	unsigned t391;		/* link integrity verification timer */
-	unsigned t392;		/* polling verification timer */
-	unsigned n391;		/* full status polling cycle counter */
-	unsigned n392;		/* error threshold counter */
-	unsigned n393;		/* monitored events counter */
-	unsigned dlci_num;	/* number of DLCs (access node) */
-	unsigned  dlci[100];    /* List of all DLCIs */
-} wan_fr_conf_t;
-
-/*----------------------------------------------------------------------------
- * PPP-specific link-level configuration.
- */
-typedef struct wan_ppp_conf
-{
-	unsigned restart_tmr;	/* restart timer */
-	unsigned auth_rsrt_tmr;	/* authentication timer */
-	unsigned auth_wait_tmr;	/* authentication timer */
-	unsigned mdm_fail_tmr;	/* modem failure timer */
-	unsigned dtr_drop_tmr;	/* DTR drop timer */
-	unsigned connect_tmout;	/* connection timeout */
-	unsigned conf_retry;	/* max. retry */
-	unsigned term_retry;	/* max. retry */
-	unsigned fail_retry;	/* max. retry */
-	unsigned auth_retry;	/* max. retry */
-	unsigned auth_options;	/* authentication opt. */
-	unsigned ip_options;	/* IP options */
-	char	authenticator;	/* AUTHENTICATOR or not */
-	char	ip_mode;	/* Static/Host/Peer */
-} wan_ppp_conf_t;
-
-/*----------------------------------------------------------------------------
- * CHDLC-specific link-level configuration.
- */
-typedef struct wan_chdlc_conf
-{
-	unsigned char ignore_dcd;	/* Protocol options:		*/
-	unsigned char ignore_cts;	/*  Ignore these to determine	*/
-	unsigned char ignore_keepalive;	/*  link status (Yes or No)	*/
-	unsigned char hdlc_streaming;	/*  hdlc_streaming mode (Y/N) */
-	unsigned char receive_only;	/*  no transmit buffering (Y/N) */
-	unsigned keepalive_tx_tmr;	/* transmit keepalive timer */
-	unsigned keepalive_rx_tmr;	/* receive  keepalive timer */
-	unsigned keepalive_err_margin;	/* keepalive_error_tolerance */
-	unsigned slarp_timer;		/* SLARP request timer */
-} wan_chdlc_conf_t;
-
-
-/*----------------------------------------------------------------------------
- * WAN device configuration. Passed to ROUTER_SETUP IOCTL.
- */
-typedef struct wandev_conf
-{
-	unsigned magic;		/* magic number (for verification) */
-	unsigned config_id;	/* configuration structure identifier */
-				/****** hardware configuration ******/
-	unsigned ioport;	/* adapter I/O port base */
-	unsigned long maddr;	/* dual-port memory address */
-	unsigned msize;		/* dual-port memory size */
-	int irq;		/* interrupt request level */
-	int dma;		/* DMA request level */
-        char S514_CPU_no[1];	/* S514 PCI adapter CPU number ('A' or 'B') */
-        unsigned PCI_slot_no;	/* S514 PCI adapter slot number */
-	char auto_pci_cfg;	/* S515 PCI automatic slot detection */
-	char comm_port;		/* Communication Port (PRI=0, SEC=1) */ 
-	unsigned bps;		/* data transfer rate */
-	unsigned mtu;		/* maximum transmit unit size */
-        unsigned udp_port;      /* UDP port for management */
-	unsigned char ttl;	/* Time To Live for UDP security */
-	unsigned char ft1;	/* FT1 Configurator Option */
-        char interface;		/* RS-232/V.35, etc. */
-	char clocking;		/* external/internal */
-	char line_coding;	/* NRZ/NRZI/FM0/FM1, etc. */
-	char station;		/* DTE/DCE, primary/secondary, etc. */
-	char connection;	/* permanent/switched/on-demand */
-	char read_mode;		/* read mode: Polling or interrupt */
-	char receive_only;	/* disable tx buffers */
-	char tty;		/* Create a fake tty device */
-	unsigned tty_major;	/* Major number for wanpipe tty device */
-	unsigned tty_minor; 	/* Minor number for wanpipe tty device */
-	unsigned tty_mode;	/* TTY operation mode SYNC or ASYNC */
-	char backup;		/* Backup Mode */
-	unsigned hw_opt[4];	/* other hardware options */
-	unsigned reserved[4];
-				/****** arbitrary data ***************/
-	unsigned data_size;	/* data buffer size */
-	void* data;		/* data buffer, e.g. firmware */
-	union			/****** protocol-specific ************/
-	{
-		wan_x25_conf_t x25;	/* X.25 configuration */
-		wan_ppp_conf_t ppp;	/* PPP configuration */
-		wan_fr_conf_t fr;	/* frame relay configuration */
-		wan_chdlc_conf_t chdlc;	/* Cisco HDLC configuration */
-	} u;
-} wandev_conf_t;
-
-/* 'config_id' definitions */
-#define	WANCONFIG_X25	101	/* X.25 link */
-#define	WANCONFIG_FR	102	/* frame relay link */
-#define	WANCONFIG_PPP	103	/* synchronous PPP link */
-#define WANCONFIG_CHDLC	104	/* Cisco HDLC Link */
-#define WANCONFIG_BSC	105	/* BiSync Streaming */
-#define WANCONFIG_HDLC	106	/* HDLC Support */
-#define WANCONFIG_MPPP  107	/* Multi Port PPP over RAW CHDLC */
-
 /*
- * Configuration options defines.
+ * wanrouter.h	Legacy declarations kept around until X25 is removed
  */
-/* general options */
-#define	WANOPT_OFF	0
-#define	WANOPT_ON	1
-#define	WANOPT_NO	0
-#define	WANOPT_YES	1
-
-/* intercace options */
-#define	WANOPT_RS232	0
-#define	WANOPT_V35	1
-
-/* data encoding options */
-#define	WANOPT_NRZ	0
-#define	WANOPT_NRZI	1
-#define	WANOPT_FM0	2
-#define	WANOPT_FM1	3
-
-/* link type options */
-#define	WANOPT_POINTTOPOINT	0	/* RTS always active */
-#define	WANOPT_MULTIDROP	1	/* RTS is active when transmitting */
-
-/* clocking options */
-#define	WANOPT_EXTERNAL	0
-#define	WANOPT_INTERNAL	1
-
-/* station options */
-#define	WANOPT_DTE		0
-#define	WANOPT_DCE		1
-#define	WANOPT_CPE		0
-#define	WANOPT_NODE		1
-#define	WANOPT_SECONDARY	0
-#define	WANOPT_PRIMARY		1
-
-/* connection options */
-#define	WANOPT_PERMANENT	0	/* DTR always active */
-#define	WANOPT_SWITCHED		1	/* use DTR to setup link (dial-up) */
-#define	WANOPT_ONDEMAND		2	/* activate DTR only before sending */
-
-/* frame relay in-channel signalling */
-#define	WANOPT_FR_ANSI		1	/* ANSI T1.617 Annex D */
-#define	WANOPT_FR_Q933		2	/* ITU Q.933A */
-#define	WANOPT_FR_LMI		3	/* LMI */
-
-/* PPP IP Mode Options */
-#define	WANOPT_PPP_STATIC	0
-#define	WANOPT_PPP_HOST		1
-#define	WANOPT_PPP_PEER		2
-
-/* ASY Mode Options */
-#define WANOPT_ONE 		1
-#define WANOPT_TWO		2
-#define WANOPT_ONE_AND_HALF	3
-
-#define WANOPT_NONE	0
-#define WANOPT_ODD      1
-#define WANOPT_EVEN	2
-
-/* CHDLC Protocol Options */
-/* DF Commented out for now.
-
-#define WANOPT_CHDLC_NO_DCD		IGNORE_DCD_FOR_LINK_STAT
-#define WANOPT_CHDLC_NO_CTS		IGNORE_CTS_FOR_LINK_STAT
-#define WANOPT_CHDLC_NO_KEEPALIVE	IGNORE_KPALV_FOR_LINK_STAT
-*/
-
-/* Port options */
-#define WANOPT_PRI 0
-#define WANOPT_SEC 1
-/* read mode */
-#define	WANOPT_INTR	0
-#define WANOPT_POLL	1
 
-
-#define WANOPT_TTY_SYNC  0
-#define WANOPT_TTY_ASYNC 1
-/*----------------------------------------------------------------------------
- * WAN Link Status Info (for ROUTER_STAT IOCTL).
- */
-typedef struct wandev_stat
-{
-	unsigned state;		/* link state */
-	unsigned ndev;		/* number of configured interfaces */
-
-	/* link/interface configuration */
-	unsigned connection;	/* permanent/switched/on-demand */
-	unsigned media_type;	/* Frame relay/PPP/X.25/SDLC, etc. */
-	unsigned mtu;		/* max. transmit unit for this device */
-
-	/* physical level statistics */
-	unsigned modem_status;	/* modem status */
-	unsigned rx_frames;	/* received frames count */
-	unsigned rx_overruns;	/* receiver overrun error count */
-	unsigned rx_crc_err;	/* receive CRC error count */
-	unsigned rx_aborts;	/* received aborted frames count */
-	unsigned rx_bad_length;	/* unexpetedly long/short frames count */
-	unsigned rx_dropped;	/* frames discarded at device level */
-	unsigned tx_frames;	/* transmitted frames count */
-	unsigned tx_underruns;	/* aborted transmissions (underruns) count */
-	unsigned tx_timeouts;	/* transmission timeouts */
-	unsigned tx_rejects;	/* other transmit errors */
-
-	/* media level statistics */
-	unsigned rx_bad_format;	/* frames with invalid format */
-	unsigned rx_bad_addr;	/* frames with invalid media address */
-	unsigned tx_retries;	/* frames re-transmitted */
-	unsigned reserved[16];	/* reserved for future use */
-} wandev_stat_t;
+#ifndef _UAPI_ROUTER_H
+#define _UAPI_ROUTER_H
 
 /* 'state' defines */
 enum wan_states
@@ -365,88 +11,7 @@ enum wan_states
 	WAN_UNCONFIGURED,	/* link/channel is not configured */
 	WAN_DISCONNECTED,	/* link/channel is disconnected */
 	WAN_CONNECTING,		/* connection is in progress */
-	WAN_CONNECTED,		/* link/channel is operational */
-	WAN_LIMIT,		/* for verification only */
-	WAN_DUALPORT,		/* for Dual Port cards */
-	WAN_DISCONNECTING,
-	WAN_FT1_READY		/* FT1 Configurator Ready */
+	WAN_CONNECTED		/* link/channel is operational */
 };
 
-enum {
-	WAN_LOCAL_IP,
-	WAN_POINTOPOINT_IP,
-	WAN_NETMASK_IP,
-	WAN_BROADCAST_IP
-};
-
-/* 'modem_status' masks */
-#define	WAN_MODEM_CTS	0x0001	/* CTS line active */
-#define	WAN_MODEM_DCD	0x0002	/* DCD line active */
-#define	WAN_MODEM_DTR	0x0010	/* DTR line active */
-#define	WAN_MODEM_RTS	0x0020	/* RTS line active */
-
-/*----------------------------------------------------------------------------
- * WAN interface (logical channel) configuration (for ROUTER_IFNEW IOCTL).
- */
-typedef struct wanif_conf
-{
-	unsigned magic;			/* magic number */
-	unsigned config_id;		/* configuration identifier */
-	char name[WAN_IFNAME_SZ+1];	/* interface name, ASCIIZ */
-	char addr[WAN_ADDRESS_SZ+1];	/* media address, ASCIIZ */
-	char usedby[USED_BY_FIELD];	/* used by API or WANPIPE */
-	unsigned idle_timeout;		/* sec, before disconnecting */
-	unsigned hold_timeout;		/* sec, before re-connecting */
-	unsigned cir;			/* Committed Information Rate fwd,bwd*/
-	unsigned bc;			/* Committed Burst Size fwd, bwd */
-	unsigned be;			/* Excess Burst Size fwd, bwd */ 
-	unsigned char enable_IPX;	/* Enable or Disable IPX */
-	unsigned char inarp;		/* Send Inverse ARP requests Y/N */
-	unsigned inarp_interval;	/* sec, between InARP requests */
-	unsigned long network_number;	/* Network Number for IPX */
-	char mc;			/* Multicast on or off */
-	char local_addr[WAN_ADDRESS_SZ+1];/* local media address, ASCIIZ */
-	unsigned char port;		/* board port */
-	unsigned char protocol;		/* prococol used in this channel (TCPOX25 or X25) */
-	char pap;			/* PAP enabled or disabled */
-	char chap;			/* CHAP enabled or disabled */
-	unsigned char userid[511];	/* List of User Id */
-	unsigned char passwd[511];	/* List of passwords */
-	unsigned char sysname[31];	/* Name of the system */
-	unsigned char ignore_dcd;	/* Protocol options: */
-	unsigned char ignore_cts;	/*  Ignore these to determine */
-	unsigned char ignore_keepalive;	/*  link status (Yes or No) */
-	unsigned char hdlc_streaming;	/*  Hdlc streaming mode (Y/N) */
-	unsigned keepalive_tx_tmr;	/* transmit keepalive timer */
-	unsigned keepalive_rx_tmr;	/* receive  keepalive timer */
-	unsigned keepalive_err_margin;	/* keepalive_error_tolerance */
-	unsigned slarp_timer;		/* SLARP request timer */
-	unsigned char ttl;		/* Time To Live for UDP security */
-	char interface;			/* RS-232/V.35, etc. */
-	char clocking;			/* external/internal */
-	unsigned bps;			/* data transfer rate */
-	unsigned mtu;			/* maximum transmit unit size */
-	unsigned char if_down;		/* brind down interface when disconnected */
-	unsigned char gateway;		/* Is this interface a gateway */
-	unsigned char true_if_encoding;	/* Set the dev->type to true board protocol */
-
-	unsigned char asy_data_trans;     /* async API options */
-        unsigned char rts_hs_for_receive; /* async Protocol options */
-        unsigned char xon_xoff_hs_for_receive;
-	unsigned char xon_xoff_hs_for_transmit;
-	unsigned char dcd_hs_for_transmit;
-	unsigned char cts_hs_for_transmit;
-	unsigned char async_mode;
-	unsigned tx_bits_per_char;
-	unsigned rx_bits_per_char;
-	unsigned stop_bits;  
-	unsigned char parity;
- 	unsigned break_timer;
-        unsigned inter_char_timer;
-	unsigned rx_complete_length;
-	unsigned xon_char;
-	unsigned xoff_char;
-	unsigned char receive_only;	/*  no transmit buffering (Y/N) */
-} wanif_conf_t;
-
 #endif /* _UAPI_ROUTER_H */
diff --git a/net/wanrouter/Kconfig b/net/wanrouter/Kconfig
deleted file mode 100644
index a157a2e64e18..000000000000
--- a/net/wanrouter/Kconfig
+++ /dev/null
@@ -1,27 +0,0 @@
-#
-# Configuration for WAN router
-#
-
-config WAN_ROUTER
-	tristate "WAN router (DEPRECATED)"
-	depends on EXPERIMENTAL
-	---help---
-	  Wide Area Networks (WANs), such as X.25, frame relay and leased
-	  lines, are used to interconnect Local Area Networks (LANs) over vast
-	  distances with data transfer rates significantly higher than those
-	  achievable with commonly used asynchronous modem connections.
-	  Usually, a quite expensive external device called a `WAN router' is
-	  needed to connect to a WAN.
-
-	  As an alternative, WAN routing can be built into the Linux kernel.
-	  With relatively inexpensive WAN interface cards available on the
-	  market, a perfectly usable router can be built for less than half
-	  the price of an external router.  If you have one of those cards and
-	  wish to use your Linux box as a WAN router, say Y here and also to
-	  the WAN driver for your card, below.  You will then need the
-	  wan-tools package which is available from <ftp://ftp.sangoma.com/>.
-
-	  To compile WAN routing support as a module, choose M here: the
-	  module will be called wanrouter.
-
-	  If unsure, say N.
diff --git a/net/wanrouter/Makefile b/net/wanrouter/Makefile
deleted file mode 100644
index 4da14bc48078..000000000000
--- a/net/wanrouter/Makefile
+++ /dev/null
@@ -1,7 +0,0 @@
-#
-# Makefile for the Linux WAN router layer.
-#
-
-obj-$(CONFIG_WAN_ROUTER) += wanrouter.o
-
-wanrouter-y :=  wanproc.o wanmain.o
diff --git a/net/wanrouter/patchlevel b/net/wanrouter/patchlevel
deleted file mode 100644
index c043eea7767e..000000000000
--- a/net/wanrouter/patchlevel
+++ /dev/null
@@ -1 +0,0 @@
-2.2.1
diff --git a/net/wanrouter/wanmain.c b/net/wanrouter/wanmain.c
deleted file mode 100644
index 2ab785064b7e..000000000000
--- a/net/wanrouter/wanmain.c
+++ /dev/null
@@ -1,782 +0,0 @@
-/*****************************************************************************
-* wanmain.c	WAN Multiprotocol Router Module. Main code.
-*
-*		This module is completely hardware-independent and provides
-*		the following common services for the WAN Link Drivers:
-*		 o WAN device management (registering, unregistering)
-*		 o Network interface management
-*		 o Physical connection management (dial-up, incoming calls)
-*		 o Logical connection management (switched virtual circuits)
-*		 o Protocol encapsulation/decapsulation
-*
-* Author:	Gideon Hack
-*
-* Copyright:	(c) 1995-1999 Sangoma Technologies Inc.
-*
-*		This program is free software; you can redistribute it and/or
-*		modify it under the terms of the GNU General Public License
-*		as published by the Free Software Foundation; either version
-*		2 of the License, or (at your option) any later version.
-* ============================================================================
-* Nov 24, 2000  Nenad Corbic	Updated for 2.4.X kernels
-* Nov 07, 2000  Nenad Corbic	Fixed the Mulit-Port PPP for kernels 2.2.16 and
-*  				greater.
-* Aug 2,  2000  Nenad Corbic	Block the Multi-Port PPP from running on
-*  			        kernels 2.2.16 or greater.  The SyncPPP
-*  			        has changed.
-* Jul 13, 2000  Nenad Corbic	Added SyncPPP support
-* 				Added extra debugging in device_setup().
-* Oct 01, 1999  Gideon Hack     Update for s514 PCI card
-* Dec 27, 1996	Gene Kozin	Initial version (based on Sangoma's WANPIPE)
-* Jan 16, 1997	Gene Kozin	router_devlist made public
-* Jan 31, 1997  Alan Cox	Hacked it about a bit for 2.1
-* Jun 27, 1997  Alan Cox	realigned with vendor code
-* Oct 15, 1997  Farhan Thawar   changed wan_encapsulate to add a pad byte of 0
-* Apr 20, 1998	Alan Cox	Fixed 2.1 symbols
-* May 17, 1998  K. Baranowski	Fixed SNAP encapsulation in wan_encapsulate
-* Dec 15, 1998  Arnaldo Melo    support for firmwares of up to 128000 bytes
-*                               check wandev->setup return value
-* Dec 22, 1998  Arnaldo Melo    vmalloc/vfree used in device_setup to allocate
-*                               kernel memory and copy configuration data to
-*                               kernel space (for big firmwares)
-* Jun 02, 1999  Gideon Hack	Updates for Linux 2.0.X and 2.2.X kernels.
-*****************************************************************************/
-
-#include <linux/stddef.h>	/* offsetof(), etc. */
-#include <linux/capability.h>
-#include <linux/errno.h>	/* return codes */
-#include <linux/kernel.h>
-#include <linux/module.h>	/* support for loadable modules */
-#include <linux/slab.h>		/* kmalloc(), kfree() */
-#include <linux/mutex.h>
-#include <linux/mm.h>
-#include <linux/string.h>	/* inline mem*, str* functions */
-
-#include <asm/byteorder.h>	/* htons(), etc. */
-#include <linux/wanrouter.h>	/* WAN router API definitions */
-
-#include <linux/vmalloc.h>	/* vmalloc, vfree */
-#include <asm/uaccess.h>        /* copy_to/from_user */
-#include <linux/init.h>         /* __initfunc et al. */
-
-#define DEV_TO_SLAVE(dev)	(*((struct net_device **)netdev_priv(dev)))
-
-/*
- * 	Function Prototypes
- */
-
-/*
- *	WAN device IOCTL handlers
- */
-
-static DEFINE_MUTEX(wanrouter_mutex);
-static int wanrouter_device_setup(struct wan_device *wandev,
-				  wandev_conf_t __user *u_conf);
-static int wanrouter_device_stat(struct wan_device *wandev,
-				 wandev_stat_t __user *u_stat);
-static int wanrouter_device_shutdown(struct wan_device *wandev);
-static int wanrouter_device_new_if(struct wan_device *wandev,
-				   wanif_conf_t __user *u_conf);
-static int wanrouter_device_del_if(struct wan_device *wandev,
-				   char __user *u_name);
-
-/*
- *	Miscellaneous
- */
-
-static struct wan_device *wanrouter_find_device(char *name);
-static int wanrouter_delete_interface(struct wan_device *wandev, char *name);
-static void lock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags)
-	__acquires(lock);
-static void unlock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags)
-	__releases(lock);
-
-
-
-/*
- *	Global Data
- */
-
-static char wanrouter_fullname[]  = "Sangoma WANPIPE Router";
-static char wanrouter_copyright[] = "(c) 1995-2000 Sangoma Technologies Inc.";
-static char wanrouter_modname[] = ROUTER_NAME; /* short module name */
-struct wan_device* wanrouter_router_devlist; /* list of registered devices */
-
-/*
- *	Organize Unique Identifiers for encapsulation/decapsulation
- */
-
-#if 0
-static unsigned char wanrouter_oui_ether[] = { 0x00, 0x00, 0x00 };
-static unsigned char wanrouter_oui_802_2[] = { 0x00, 0x80, 0xC2 };
-#endif
-
-static int __init wanrouter_init(void)
-{
-	int err;
-
-	printk(KERN_INFO "%s v%u.%u %s\n",
-	       wanrouter_fullname, ROUTER_VERSION, ROUTER_RELEASE,
-	       wanrouter_copyright);
-
-	err = wanrouter_proc_init();
-	if (err)
-		printk(KERN_INFO "%s: can't create entry in proc filesystem!\n",
-		       wanrouter_modname);
-
-	return err;
-}
-
-static void __exit wanrouter_cleanup (void)
-{
-	wanrouter_proc_cleanup();
-}
-
-/*
- * This is just plain dumb.  We should move the bugger to drivers/net/wan,
- * slap it first in directory and make it module_init().  The only reason
- * for subsys_initcall() here is that net goes after drivers (why, BTW?)
- */
-subsys_initcall(wanrouter_init);
-module_exit(wanrouter_cleanup);
-
-/*
- * 	Kernel APIs
- */
-
-/*
- * 	Register WAN device.
- * 	o verify device credentials
- * 	o create an entry for the device in the /proc/net/router directory
- * 	o initialize internally maintained fields of the wan_device structure
- * 	o link device data space to a singly-linked list
- * 	o if it's the first device, then start kernel 'thread'
- * 	o increment module use count
- *
- * 	Return:
- *	0	Ok
- *	< 0	error.
- *
- * 	Context:	process
- */
-
-
-int register_wan_device(struct wan_device *wandev)
-{
-	int err, namelen;
-
-	if ((wandev == NULL) || (wandev->magic != ROUTER_MAGIC) ||
-	    (wandev->name == NULL))
-		return -EINVAL;
-
-	namelen = strlen(wandev->name);
-	if (!namelen || (namelen > WAN_DRVNAME_SZ))
-		return -EINVAL;
-
-	if (wanrouter_find_device(wandev->name))
-		return -EEXIST;
-
-#ifdef WANDEBUG
-	printk(KERN_INFO "%s: registering WAN device %s\n",
-	       wanrouter_modname, wandev->name);
-#endif
-
-	/*
-	 *	Register /proc directory entry
-	 */
-	err = wanrouter_proc_add(wandev);
-	if (err) {
-		printk(KERN_INFO
-			"%s: can't create /proc/net/router/%s entry!\n",
-			wanrouter_modname, wandev->name);
-		return err;
-	}
-
-	/*
-	 *	Initialize fields of the wan_device structure maintained by the
-	 *	router and update local data.
-	 */
-
-	wandev->ndev = 0;
-	wandev->dev  = NULL;
-	wandev->next = wanrouter_router_devlist;
-	wanrouter_router_devlist = wandev;
-	return 0;
-}
-
-/*
- *	Unregister WAN device.
- *	o shut down device
- *	o unlink device data space from the linked list
- *	o delete device entry in the /proc/net/router directory
- *	o decrement module use count
- *
- *	Return:		0	Ok
- *			<0	error.
- *	Context:	process
- */
-
-
-int unregister_wan_device(char *name)
-{
-	struct wan_device *wandev, *prev;
-
-	if (name == NULL)
-		return -EINVAL;
-
-	for (wandev = wanrouter_router_devlist, prev = NULL;
-		wandev && strcmp(wandev->name, name);
-		prev = wandev, wandev = wandev->next)
-		;
-	if (wandev == NULL)
-		return -ENODEV;
-
-#ifdef WANDEBUG
-	printk(KERN_INFO "%s: unregistering WAN device %s\n",
-	       wanrouter_modname, name);
-#endif
-
-	if (wandev->state != WAN_UNCONFIGURED)
-		wanrouter_device_shutdown(wandev);
-
-	if (prev)
-		prev->next = wandev->next;
-	else
-		wanrouter_router_devlist = wandev->next;
-
-	wanrouter_proc_delete(wandev);
-	return 0;
-}
-
-#if 0
-
-/*
- *	Encapsulate packet.
- *
- *	Return:	encapsulation header size
- *		< 0	- unsupported Ethertype
- *
- *	Notes:
- *	1. This function may be called on interrupt context.
- */
-
-
-int wanrouter_encapsulate(struct sk_buff *skb, struct net_device *dev,
-			  unsigned short type)
-{
-	int hdr_len = 0;
-
-	switch (type) {
-	case ETH_P_IP:		/* IP datagram encapsulation */
-		hdr_len += 1;
-		skb_push(skb, 1);
-		skb->data[0] = NLPID_IP;
-		break;
-
-	case ETH_P_IPX:		/* SNAP encapsulation */
-	case ETH_P_ARP:
-		hdr_len += 7;
-		skb_push(skb, 7);
-		skb->data[0] = 0;
-		skb->data[1] = NLPID_SNAP;
-		skb_copy_to_linear_data_offset(skb, 2, wanrouter_oui_ether,
-					       sizeof(wanrouter_oui_ether));
-		*((unsigned short*)&skb->data[5]) = htons(type);
-		break;
-
-	default:		/* Unknown packet type */
-		printk(KERN_INFO
-			"%s: unsupported Ethertype 0x%04X on interface %s!\n",
-			wanrouter_modname, type, dev->name);
-		hdr_len = -EINVAL;
-	}
-	return hdr_len;
-}
-
-
-/*
- *	Decapsulate packet.
- *
- *	Return:	Ethertype (in network order)
- *			0	unknown encapsulation
- *
- *	Notes:
- *	1. This function may be called on interrupt context.
- */
-
-
-__be16 wanrouter_type_trans(struct sk_buff *skb, struct net_device *dev)
-{
-	int cnt = skb->data[0] ? 0 : 1;	/* there may be a pad present */
-	__be16 ethertype;
-
-	switch (skb->data[cnt]) {
-	case NLPID_IP:		/* IP datagramm */
-		ethertype = htons(ETH_P_IP);
-		cnt += 1;
-		break;
-
-	case NLPID_SNAP:	/* SNAP encapsulation */
-		if (memcmp(&skb->data[cnt + 1], wanrouter_oui_ether,
-			   sizeof(wanrouter_oui_ether))){
-			printk(KERN_INFO
-				"%s: unsupported SNAP OUI %02X-%02X-%02X "
-				"on interface %s!\n", wanrouter_modname,
-				skb->data[cnt+1], skb->data[cnt+2],
-				skb->data[cnt+3], dev->name);
-			return 0;
-		}
-		ethertype = *((__be16*)&skb->data[cnt+4]);
-		cnt += 6;
-		break;
-
-	/* add other protocols, e.g. CLNP, ESIS, ISIS, if needed */
-
-	default:
-		printk(KERN_INFO
-			"%s: unsupported NLPID 0x%02X on interface %s!\n",
-			wanrouter_modname, skb->data[cnt], dev->name);
-		return 0;
-	}
-	skb->protocol = ethertype;
-	skb->pkt_type = PACKET_HOST;	/*	Physically point to point */
-	skb_pull(skb, cnt);
-	skb_reset_mac_header(skb);
-	return ethertype;
-}
-
-#endif  /*  0  */
-
-/*
- *	WAN device IOCTL.
- *	o find WAN device associated with this node
- *	o execute requested action or pass command to the device driver
- */
-
-long wanrouter_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
-{
-	struct inode *inode = file->f_path.dentry->d_inode;
-	int err = 0;
-	struct proc_dir_entry *dent;
-	struct wan_device *wandev;
-	void __user *data = (void __user *)arg;
-
-	if (!capable(CAP_NET_ADMIN))
-		return -EPERM;
-
-	if ((cmd >> 8) != ROUTER_IOCTL)
-		return -EINVAL;
-
-	dent = PDE(inode);
-	if ((dent == NULL) || (dent->data == NULL))
-		return -EINVAL;
-
-	wandev = dent->data;
-	if (wandev->magic != ROUTER_MAGIC)
-		return -EINVAL;
-
-	mutex_lock(&wanrouter_mutex);
-	switch (cmd) {
-	case ROUTER_SETUP:
-		err = wanrouter_device_setup(wandev, data);
-		break;
-
-	case ROUTER_DOWN:
-		err = wanrouter_device_shutdown(wandev);
-		break;
-
-	case ROUTER_STAT:
-		err = wanrouter_device_stat(wandev, data);
-		break;
-
-	case ROUTER_IFNEW:
-		err = wanrouter_device_new_if(wandev, data);
-		break;
-
-	case ROUTER_IFDEL:
-		err = wanrouter_device_del_if(wandev, data);
-		break;
-
-	case ROUTER_IFSTAT:
-		break;
-
-	default:
-		if ((cmd >= ROUTER_USER) &&
-		    (cmd <= ROUTER_USER_MAX) &&
-		    wandev->ioctl)
-			err = wandev->ioctl(wandev, cmd, arg);
-		else err = -EINVAL;
-	}
-	mutex_unlock(&wanrouter_mutex);
-	return err;
-}
-
-/*
- *	WAN Driver IOCTL Handlers
- */
-
-/*
- *	Setup WAN link device.
- *	o verify user address space
- *	o allocate kernel memory and copy configuration data to kernel space
- *	o if configuration data includes extension, copy it to kernel space too
- *	o call driver's setup() entry point
- */
-
-static int wanrouter_device_setup(struct wan_device *wandev,
-				  wandev_conf_t __user *u_conf)
-{
-	void *data = NULL;
-	wandev_conf_t *conf;
-	int err = -EINVAL;
-
-	if (wandev->setup == NULL) {	/* Nothing to do ? */
-		printk(KERN_INFO "%s: ERROR, No setup script: wandev->setup()\n",
-				wandev->name);
-		return 0;
-	}
-
-	conf = kmalloc(sizeof(wandev_conf_t), GFP_KERNEL);
-	if (conf == NULL){
-		printk(KERN_INFO "%s: ERROR, Failed to allocate kernel memory !\n",
-				wandev->name);
-		return -ENOBUFS;
-	}
-
-	if (copy_from_user(conf, u_conf, sizeof(wandev_conf_t))) {
-		printk(KERN_INFO "%s: Failed to copy user config data to kernel space!\n",
-				wandev->name);
-		kfree(conf);
-		return -EFAULT;
-	}
-
-	if (conf->magic != ROUTER_MAGIC) {
-		kfree(conf);
-		printk(KERN_INFO "%s: ERROR, Invalid MAGIC Number\n",
-				wandev->name);
-		return -EINVAL;
-	}
-
-	if (conf->data_size && conf->data) {
-		if (conf->data_size > 128000) {
-			printk(KERN_INFO
-			    "%s: ERROR, Invalid firmware data size %i !\n",
-					wandev->name, conf->data_size);
-			kfree(conf);
-			return -EINVAL;
-		}
-
-		data = vmalloc(conf->data_size);
-		if (!data) {
-			printk(KERN_INFO
-				"%s: ERROR, Failed allocate kernel memory !\n",
-				wandev->name);
-			kfree(conf);
-			return -ENOBUFS;
-		}
-		if (!copy_from_user(data, conf->data, conf->data_size)) {
-			conf->data = data;
-			err = wandev->setup(wandev, conf);
-		} else {
-			printk(KERN_INFO
-			     "%s: ERROR, Failed to copy from user data !\n",
-			       wandev->name);
-			err = -EFAULT;
-		}
-		vfree(data);
-	} else {
-		printk(KERN_INFO
-		    "%s: ERROR, No firmware found ! Firmware size = %i !\n",
-				wandev->name, conf->data_size);
-	}
-
-	kfree(conf);
-	return err;
-}
-
-/*
- *	Shutdown WAN device.
- *	o delete all not opened logical channels for this device
- *	o call driver's shutdown() entry point
- */
-
-static int wanrouter_device_shutdown(struct wan_device *wandev)
-{
-	struct net_device *dev;
-	int err=0;
-
-	if (wandev->state == WAN_UNCONFIGURED)
-		return 0;
-
-	printk(KERN_INFO "\n%s: Shutting Down!\n",wandev->name);
-
-	for (dev = wandev->dev; dev;) {
-		err = wanrouter_delete_interface(wandev, dev->name);
-		if (err)
-			return err;
-		/* The above function deallocates the current dev
-		 * structure. Therefore, we cannot use netdev_priv(dev)
-		 * as the next element: wandev->dev points to the
-		 * next element */
-		dev = wandev->dev;
-	}
-
-	if (wandev->ndev)
-		return -EBUSY;	/* there are opened interfaces  */
-
-	if (wandev->shutdown)
-		err=wandev->shutdown(wandev);
-
-	return err;
-}
-
-/*
- *	Get WAN device status & statistics.
- */
-
-static int wanrouter_device_stat(struct wan_device *wandev,
-				 wandev_stat_t __user *u_stat)
-{
-	wandev_stat_t stat;
-
-	memset(&stat, 0, sizeof(stat));
-
-	/* Ask device driver to update device statistics */
-	if ((wandev->state != WAN_UNCONFIGURED) && wandev->update)
-		wandev->update(wandev);
-
-	/* Fill out structure */
-	stat.ndev  = wandev->ndev;
-	stat.state = wandev->state;
-
-	if (copy_to_user(u_stat, &stat, sizeof(stat)))
-		return -EFAULT;
-
-	return 0;
-}
-
-/*
- *	Create new WAN interface.
- *	o verify user address space
- *	o copy configuration data to kernel address space
- *	o allocate network interface data space
- *	o call driver's new_if() entry point
- *	o make sure there is no interface name conflict
- *	o register network interface
- */
-
-static int wanrouter_device_new_if(struct wan_device *wandev,
-				   wanif_conf_t __user *u_conf)
-{
-	wanif_conf_t *cnf;
-	struct net_device *dev = NULL;
-	int err;
-
-	if ((wandev->state == WAN_UNCONFIGURED) || (wandev->new_if == NULL))
-		return -ENODEV;
-
-	cnf = kmalloc(sizeof(wanif_conf_t), GFP_KERNEL);
-	if (!cnf)
-		return -ENOBUFS;
-
-	err = -EFAULT;
-	if (copy_from_user(cnf, u_conf, sizeof(wanif_conf_t)))
-		goto out;
-
-	err = -EINVAL;
-	if (cnf->magic != ROUTER_MAGIC)
-		goto out;
-
-	if (cnf->config_id == WANCONFIG_MPPP) {
-		printk(KERN_INFO "%s: Wanpipe Mulit-Port PPP support has not been compiled in!\n",
-				wandev->name);
-		err = -EPROTONOSUPPORT;
-		goto out;
-	} else {
-		err = wandev->new_if(wandev, dev, cnf);
-	}
-
-	if (!err) {
-		/* Register network interface. This will invoke init()
-		 * function supplied by the driver.  If device registered
-		 * successfully, add it to the interface list.
-		 */
-
-#ifdef WANDEBUG
-		printk(KERN_INFO "%s: registering interface %s...\n",
-		       wanrouter_modname, dev->name);
-#endif
-
-		err = register_netdev(dev);
-		if (!err) {
-			struct net_device *slave = NULL;
-			unsigned long smp_flags=0;
-
-			lock_adapter_irq(&wandev->lock, &smp_flags);
-
-			if (wandev->dev == NULL) {
-				wandev->dev = dev;
-			} else {
-				for (slave=wandev->dev;
-				     DEV_TO_SLAVE(slave);
-				     slave = DEV_TO_SLAVE(slave))
-					DEV_TO_SLAVE(slave) = dev;
-			}
-			++wandev->ndev;
-
-			unlock_adapter_irq(&wandev->lock, &smp_flags);
-			err = 0;	/* done !!! */
-			goto out;
-		}
-		if (wandev->del_if)
-			wandev->del_if(wandev, dev);
-		free_netdev(dev);
-	}
-
-out:
-	kfree(cnf);
-	return err;
-}
-
-
-/*
- *	Delete WAN logical channel.
- *	 o verify user address space
- *	 o copy configuration data to kernel address space
- */
-
-static int wanrouter_device_del_if(struct wan_device *wandev, char __user *u_name)
-{
-	char name[WAN_IFNAME_SZ + 1];
-	int err = 0;
-
-	if (wandev->state == WAN_UNCONFIGURED)
-		return -ENODEV;
-
-	memset(name, 0, sizeof(name));
-
-	if (copy_from_user(name, u_name, WAN_IFNAME_SZ))
-		return -EFAULT;
-
-	err = wanrouter_delete_interface(wandev, name);
-	if (err)
-		return err;
-
-	/* If last interface being deleted, shutdown card
-	 * This helps with administration at leaf nodes
-	 * (You can tell if the person at the other end of the phone
-	 * has an interface configured) and avoids DoS vulnerabilities
-	 * in binary driver files - this fixes a problem with the current
-	 * Sangoma driver going into strange states when all the network
-	 * interfaces are deleted and the link irrecoverably disconnected.
-	 */
-
-	if (!wandev->ndev && wandev->shutdown)
-		err = wandev->shutdown(wandev);
-
-	return err;
-}
-
-/*
- *	Miscellaneous Functions
- */
-
-/*
- *	Find WAN device by name.
- *	Return pointer to the WAN device data space or NULL if device not found.
- */
-
-static struct wan_device *wanrouter_find_device(char *name)
-{
-	struct wan_device *wandev;
-
-	for (wandev = wanrouter_router_devlist;
-	     wandev && strcmp(wandev->name, name);
-		wandev = wandev->next);
-	return wandev;
-}
-
-/*
- *	Delete WAN logical channel identified by its name.
- *	o find logical channel by its name
- *	o call driver's del_if() entry point
- *	o unregister network interface
- *	o unlink channel data space from linked list of channels
- *	o release channel data space
- *
- *	Return:	0		success
- *		-ENODEV		channel not found.
- *		-EBUSY		interface is open
- *
- *	Note: If (force != 0), then device will be destroyed even if interface
- *	associated with it is open. It's caller's responsibility to make
- *	sure that opened interfaces are not removed!
- */
-
-static int wanrouter_delete_interface(struct wan_device *wandev, char *name)
-{
-	struct net_device *dev = NULL, *prev = NULL;
-	unsigned long smp_flags=0;
-
-	lock_adapter_irq(&wandev->lock, &smp_flags);
-	dev = wandev->dev;
-	prev = NULL;
-	while (dev && strcmp(name, dev->name)) {
-		struct net_device **slave = netdev_priv(dev);
-		prev = dev;
-		dev = *slave;
-	}
-	unlock_adapter_irq(&wandev->lock, &smp_flags);
-
-	if (dev == NULL)
-		return -ENODEV;	/* interface not found */
-
-	if (netif_running(dev))
-		return -EBUSY;	/* interface in use */
-
-	if (wandev->del_if)
-		wandev->del_if(wandev, dev);
-
-	lock_adapter_irq(&wandev->lock, &smp_flags);
-	if (prev) {
-		struct net_device **prev_slave = netdev_priv(prev);
-		struct net_device **slave = netdev_priv(dev);
-
-		*prev_slave = *slave;
-	} else {
-		struct net_device **slave = netdev_priv(dev);
-		wandev->dev = *slave;
-	}
-	--wandev->ndev;
-	unlock_adapter_irq(&wandev->lock, &smp_flags);
-
-	printk(KERN_INFO "%s: unregistering '%s'\n", wandev->name, dev->name);
-
-	unregister_netdev(dev);
-
-	free_netdev(dev);
-
-	return 0;
-}
-
-static void lock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags)
-	__acquires(lock)
-{
-	spin_lock_irqsave(lock, *smp_flags);
-}
-
-
-static void unlock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags)
-	__releases(lock)
-{
-	spin_unlock_irqrestore(lock, *smp_flags);
-}
-
-EXPORT_SYMBOL(register_wan_device);
-EXPORT_SYMBOL(unregister_wan_device);
-
-MODULE_LICENSE("GPL");
-
-/*
- *	End
- */
diff --git a/net/wanrouter/wanproc.c b/net/wanrouter/wanproc.c
deleted file mode 100644
index c43612ee96bb..000000000000
--- a/net/wanrouter/wanproc.c
+++ /dev/null
@@ -1,380 +0,0 @@
-/*****************************************************************************
-* wanproc.c	WAN Router Module. /proc filesystem interface.
-*
-*		This module is completely hardware-independent and provides
-*		access to the router using Linux /proc filesystem.
-*
-* Author: 	Gideon Hack
-*
-* Copyright:	(c) 1995-1999 Sangoma Technologies Inc.
-*
-*		This program is free software; you can redistribute it and/or
-*		modify it under the terms of the GNU General Public License
-*		as published by the Free Software Foundation; either version
-*		2 of the License, or (at your option) any later version.
-* ============================================================================
-* Jun 02, 1999  Gideon Hack	Updates for Linux 2.2.X kernels.
-* Jun 29, 1997	Alan Cox	Merged with 1.0.3 vendor code
-* Jan 29, 1997	Gene Kozin	v1.0.1. Implemented /proc read routines
-* Jan 30, 1997	Alan Cox	Hacked around for 2.1
-* Dec 13, 1996	Gene Kozin	Initial version (based on Sangoma's WANPIPE)
-*****************************************************************************/
-
-#include <linux/init.h>		/* __initfunc et al. */
-#include <linux/stddef.h>	/* offsetof(), etc. */
-#include <linux/errno.h>	/* return codes */
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/wanrouter.h>	/* WAN router API definitions */
-#include <linux/seq_file.h>
-#include <linux/mutex.h>
-
-#include <net/net_namespace.h>
-#include <asm/io.h>
-
-#define PROC_STATS_FORMAT "%30s: %12lu\n"
-
-/****** Defines and Macros **************************************************/
-
-#define PROT_DECODE(prot) ((prot == WANCONFIG_FR) ? " FR" :\
-			      (prot == WANCONFIG_X25) ? " X25" : \
-				 (prot == WANCONFIG_PPP) ? " PPP" : \
-				    (prot == WANCONFIG_CHDLC) ? " CHDLC": \
-				       (prot == WANCONFIG_MPPP) ? " MPPP" : \
-					   " Unknown" )
-
-/****** Function Prototypes *************************************************/
-
-#ifdef CONFIG_PROC_FS
-
-/* Miscellaneous */
-
-/*
- *	Structures for interfacing with the /proc filesystem.
- *	Router creates its own directory /proc/net/router with the following
- *	entries:
- *	config		device configuration
- *	status		global device statistics
- *	<device>	entry for each WAN device
- */
-
-/*
- *	Generic /proc/net/router/<file> file and inode operations
- */
-
-/*
- *	/proc/net/router
- */
-
-static DEFINE_MUTEX(config_mutex);
-static struct proc_dir_entry *proc_router;
-
-/* Strings */
-
-/*
- *	Interface functions
- */
-
-/****** Proc filesystem entry points ****************************************/
-
-/*
- *	Iterator
- */
-static void *r_start(struct seq_file *m, loff_t *pos)
-{
-	struct wan_device *wandev;
-	loff_t l = *pos;
-
-	mutex_lock(&config_mutex);
-	if (!l--)
-		return SEQ_START_TOKEN;
-	for (wandev = wanrouter_router_devlist; l-- && wandev;
-	     wandev = wandev->next)
-		;
-	return wandev;
-}
-
-static void *r_next(struct seq_file *m, void *v, loff_t *pos)
-{
-	struct wan_device *wandev = v;
-	(*pos)++;
-	return (v == SEQ_START_TOKEN) ? wanrouter_router_devlist : wandev->next;
-}
-
-static void r_stop(struct seq_file *m, void *v)
-{
-	mutex_unlock(&config_mutex);
-}
-
-static int config_show(struct seq_file *m, void *v)
-{
-	struct wan_device *p = v;
-	if (v == SEQ_START_TOKEN) {
-		seq_puts(m, "Device name    | port |IRQ|DMA|  mem.addr  |"
-			    "mem.size|option1|option2|option3|option4\n");
-		return 0;
-	}
-	if (!p->state)
-		return 0;
-	seq_printf(m, "%-15s|0x%-4X|%3u|%3u| 0x%-8lX |0x%-6X|%7u|%7u|%7u|%7u\n",
-			p->name, p->ioport, p->irq, p->dma, p->maddr, p->msize,
-			p->hw_opt[0], p->hw_opt[1], p->hw_opt[2], p->hw_opt[3]);
-	return 0;
-}
-
-static int status_show(struct seq_file *m, void *v)
-{
-	struct wan_device *p = v;
-	if (v == SEQ_START_TOKEN) {
-		seq_puts(m, "Device name    |protocol|station|interface|"
-			    "clocking|baud rate| MTU |ndev|link state\n");
-		return 0;
-	}
-	if (!p->state)
-		return 0;
-	seq_printf(m, "%-15s|%-8s| %-7s| %-9s|%-8s|%9u|%5u|%3u |",
-		p->name,
-		PROT_DECODE(p->config_id),
-		p->config_id == WANCONFIG_FR ?
-			(p->station ? "Node" : "CPE") :
-			(p->config_id == WANCONFIG_X25 ?
-			(p->station ? "DCE" : "DTE") :
-			("N/A")),
-		p->interface ? "V.35" : "RS-232",
-		p->clocking ? "internal" : "external",
-		p->bps,
-		p->mtu,
-		p->ndev);
-
-	switch (p->state) {
-	case WAN_UNCONFIGURED:
-		seq_printf(m, "%-12s\n", "unconfigured");
-		break;
-	case WAN_DISCONNECTED:
-		seq_printf(m, "%-12s\n", "disconnected");
-		break;
-	case WAN_CONNECTING:
-		seq_printf(m, "%-12s\n", "connecting");
-		break;
-	case WAN_CONNECTED:
-		seq_printf(m, "%-12s\n", "connected");
-		break;
-	default:
-		seq_printf(m, "%-12s\n", "invalid");
-		break;
-	}
-	return 0;
-}
-
-static const struct seq_operations config_op = {
-	.start	= r_start,
-	.next	= r_next,
-	.stop	= r_stop,
-	.show	= config_show,
-};
-
-static const struct seq_operations status_op = {
-	.start	= r_start,
-	.next	= r_next,
-	.stop	= r_stop,
-	.show	= status_show,
-};
-
-static int config_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &config_op);
-}
-
-static int status_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &status_op);
-}
-
-static const struct file_operations config_fops = {
-	.owner	 = THIS_MODULE,
-	.open	 = config_open,
-	.read	 = seq_read,
-	.llseek	 = seq_lseek,
-	.release = seq_release,
-};
-
-static const struct file_operations status_fops = {
-	.owner	 = THIS_MODULE,
-	.open	 = status_open,
-	.read	 = seq_read,
-	.llseek	 = seq_lseek,
-	.release = seq_release,
-};
-
-static int wandev_show(struct seq_file *m, void *v)
-{
-	struct wan_device *wandev = m->private;
-
-	if (wandev->magic != ROUTER_MAGIC)
-		return 0;
-
-	if (!wandev->state) {
-		seq_puts(m, "device is not configured!\n");
-		return 0;
-	}
-
-	/* Update device statistics */
-	if (wandev->update) {
-		int err = wandev->update(wandev);
-		if (err == -EAGAIN) {
-			seq_puts(m, "Device is busy!\n");
-			return 0;
-		}
-		if (err) {
-			seq_puts(m, "Device is not configured!\n");
-			return 0;
-		}
-	}
-
-	seq_printf(m, PROC_STATS_FORMAT,
-		"total packets received", wandev->stats.rx_packets);
-	seq_printf(m, PROC_STATS_FORMAT,
-		"total packets transmitted", wandev->stats.tx_packets);
-	seq_printf(m, PROC_STATS_FORMAT,
-		"total bytes received", wandev->stats.rx_bytes);
-	seq_printf(m, PROC_STATS_FORMAT,
-		"total bytes transmitted", wandev->stats.tx_bytes);
-	seq_printf(m, PROC_STATS_FORMAT,
-		"bad packets received", wandev->stats.rx_errors);
-	seq_printf(m, PROC_STATS_FORMAT,
-		"packet transmit problems", wandev->stats.tx_errors);
-	seq_printf(m, PROC_STATS_FORMAT,
-		"received frames dropped", wandev->stats.rx_dropped);
-	seq_printf(m, PROC_STATS_FORMAT,
-		"transmit frames dropped", wandev->stats.tx_dropped);
-	seq_printf(m, PROC_STATS_FORMAT,
-		"multicast packets received", wandev->stats.multicast);
-	seq_printf(m, PROC_STATS_FORMAT,
-		"transmit collisions", wandev->stats.collisions);
-	seq_printf(m, PROC_STATS_FORMAT,
-		"receive length errors", wandev->stats.rx_length_errors);
-	seq_printf(m, PROC_STATS_FORMAT,
-		"receiver overrun errors", wandev->stats.rx_over_errors);
-	seq_printf(m, PROC_STATS_FORMAT,
-		"CRC errors", wandev->stats.rx_crc_errors);
-	seq_printf(m, PROC_STATS_FORMAT,
-		"frame format errors (aborts)", wandev->stats.rx_frame_errors);
-	seq_printf(m, PROC_STATS_FORMAT,
-		"receiver fifo overrun", wandev->stats.rx_fifo_errors);
-	seq_printf(m, PROC_STATS_FORMAT,
-		"receiver missed packet", wandev->stats.rx_missed_errors);
-	seq_printf(m, PROC_STATS_FORMAT,
-		"aborted frames transmitted", wandev->stats.tx_aborted_errors);
-	return 0;
-}
-
-static int wandev_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, wandev_show, PDE(inode)->data);
-}
-
-static const struct file_operations wandev_fops = {
-	.owner	 = THIS_MODULE,
-	.open	 = wandev_open,
-	.read	 = seq_read,
-	.llseek	 = seq_lseek,
-	.release = single_release,
-	.unlocked_ioctl  = wanrouter_ioctl,
-};
-
-/*
- *	Initialize router proc interface.
- */
-
-int __init wanrouter_proc_init(void)
-{
-	struct proc_dir_entry *p;
-	proc_router = proc_mkdir(ROUTER_NAME, init_net.proc_net);
-	if (!proc_router)
-		goto fail;
-
-	p = proc_create("config", S_IRUGO, proc_router, &config_fops);
-	if (!p)
-		goto fail_config;
-	p = proc_create("status", S_IRUGO, proc_router, &status_fops);
-	if (!p)
-		goto fail_stat;
-	return 0;
-fail_stat:
-	remove_proc_entry("config", proc_router);
-fail_config:
-	remove_proc_entry(ROUTER_NAME, init_net.proc_net);
-fail:
-	return -ENOMEM;
-}
-
-/*
- *	Clean up router proc interface.
- */
-
-void wanrouter_proc_cleanup(void)
-{
-	remove_proc_entry("config", proc_router);
-	remove_proc_entry("status", proc_router);
-	remove_proc_entry(ROUTER_NAME, init_net.proc_net);
-}
-
-/*
- *	Add directory entry for WAN device.
- */
-
-int wanrouter_proc_add(struct wan_device* wandev)
-{
-	if (wandev->magic != ROUTER_MAGIC)
-		return -EINVAL;
-
-	wandev->dent = proc_create(wandev->name, S_IRUGO,
-				   proc_router, &wandev_fops);
-	if (!wandev->dent)
-		return -ENOMEM;
-	wandev->dent->data	= wandev;
-	return 0;
-}
-
-/*
- *	Delete directory entry for WAN device.
- */
-int wanrouter_proc_delete(struct wan_device* wandev)
-{
-	if (wandev->magic != ROUTER_MAGIC)
-		return -EINVAL;
-	remove_proc_entry(wandev->name, proc_router);
-	return 0;
-}
-
-#else
-
-/*
- *	No /proc - output stubs
- */
-
-int __init wanrouter_proc_init(void)
-{
-	return 0;
-}
-
-void wanrouter_proc_cleanup(void)
-{
-}
-
-int wanrouter_proc_add(struct wan_device *wandev)
-{
-	return 0;
-}
-
-int wanrouter_proc_delete(struct wan_device *wandev)
-{
-	return 0;
-}
-
-#endif
-
-/*
- *	End
- */
-
-- 
cgit v1.2.3


From 97cc019ee56d52005ea4544af17bef268c464862 Mon Sep 17 00:00:00 2001
From: Rafał Miłecki <zajec5@gmail.com>
Date: Fri, 1 Feb 2013 08:46:56 +0100
Subject: bcma: cc: fix (and rename) define of NAND flash type
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/bcma/bcma_driver_chipcommon.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/bcma/bcma_driver_chipcommon.h b/include/linux/bcma/bcma_driver_chipcommon.h
index 9a0e3fa3ca95..6a299f416288 100644
--- a/include/linux/bcma/bcma_driver_chipcommon.h
+++ b/include/linux/bcma/bcma_driver_chipcommon.h
@@ -27,7 +27,7 @@
 #define   BCMA_CC_FLASHT_NONE		0x00000000	/* No flash */
 #define   BCMA_CC_FLASHT_STSER		0x00000100	/* ST serial flash */
 #define   BCMA_CC_FLASHT_ATSER		0x00000200	/* Atmel serial flash */
-#define   BCMA_CC_FLASHT_NFLASH		0x00000200	/* NAND flash */
+#define   BCMA_CC_FLASHT_NAND		0x00000300	/* NAND flash */
 #define	  BCMA_CC_FLASHT_PARA		0x00000700	/* Parallel flash */
 #define  BCMA_CC_CAP_PLLT		0x00038000	/* PLL Type */
 #define   BCMA_PLLTYPE_NONE		0x00000000
-- 
cgit v1.2.3


From b8eed8af94f9203e0cc39245ea335f4b8dc1ed31 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Mon, 14 Jan 2013 13:23:03 +0000
Subject: cpufreq: Simplify __cpufreq_remove_dev()

__cpufreq_remove_dev() is called on multiple occasions: cpufreq_driver
unregister and cpu removals.

Current implementation of this routine is overly complex without much need. If
the cpu to be removed is the policy->cpu, we remove the policy first and add all
other cpus again from policy->cpus and then finally call __cpufreq_remove_dev()
again to remove the cpu to be deleted. Haahhhh..

There exist a simple solution to removal of a cpu:
- Simply use the old policy structure
- update its fields like: policy->cpu, etc.
- notify any users of cpufreq, which depend on changing policy->cpu

Hence this patch, which tries to implement the above theory. It is tested well
by myself on ARM big.LITTLE TC2 SoC, which has 5 cores (2 A15 and 3 A7). Both
A15's share same struct policy and all A7's share same policy structure.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Tested-by: Shawn Guo <shawn.guo@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq.c       | 161 +++++++++++++++++-----------------------
 drivers/cpufreq/cpufreq_stats.c |  27 ++++++-
 drivers/cpufreq/freq_table.c    |   9 +++
 include/linux/cpufreq.h         |  14 ++--
 4 files changed, 113 insertions(+), 98 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 034d1836884b..9af14a8bbcdb 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -1036,6 +1036,25 @@ module_out:
 	return ret;
 }
 
+static void update_policy_cpu(struct cpufreq_policy *policy, unsigned int cpu)
+{
+	int j;
+
+	policy->last_cpu = policy->cpu;
+	policy->cpu = cpu;
+
+	for_each_cpu(j, policy->cpus) {
+		if (!cpu_online(j))
+			continue;
+		per_cpu(cpufreq_policy_cpu, j) = cpu;
+	}
+
+#ifdef CONFIG_CPU_FREQ_TABLE
+	cpufreq_frequency_table_update_policy_cpu(policy);
+#endif
+	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
+			CPUFREQ_UPDATE_POLICY_CPU, policy);
+}
 
 /**
  * __cpufreq_remove_dev - remove a CPU device
@@ -1046,132 +1065,92 @@ module_out:
  */
 static int __cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif)
 {
-	unsigned int cpu = dev->id;
+	unsigned int cpu = dev->id, ret, cpus;
 	unsigned long flags;
 	struct cpufreq_policy *data;
 	struct kobject *kobj;
 	struct completion *cmp;
-#ifdef CONFIG_SMP
 	struct device *cpu_dev;
-	unsigned int j;
-#endif
 
-	pr_debug("unregistering CPU %u\n", cpu);
+	pr_debug("%s: unregistering CPU %u\n", __func__, cpu);
 
 	spin_lock_irqsave(&cpufreq_driver_lock, flags);
 	data = per_cpu(cpufreq_cpu_data, cpu);
 
 	if (!data) {
+		pr_debug("%s: No cpu_data found\n", __func__);
 		spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
 		unlock_policy_rwsem_write(cpu);
 		return -EINVAL;
 	}
-	per_cpu(cpufreq_cpu_data, cpu) = NULL;
 
-#ifdef CONFIG_SMP
-	/* if this isn't the CPU which is the parent of the kobj, we
-	 * only need to unlink, put and exit
-	 */
-	if (unlikely(cpu != data->cpu)) {
-		pr_debug("removing link\n");
+	if (cpufreq_driver->target)
 		__cpufreq_governor(data, CPUFREQ_GOV_STOP);
-		cpumask_clear_cpu(cpu, data->cpus);
-		spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
-
-		__cpufreq_governor(data, CPUFREQ_GOV_START);
-		__cpufreq_governor(data, CPUFREQ_GOV_LIMITS);
-
-		kobj = &dev->kobj;
-		cpufreq_cpu_put(data);
-		unlock_policy_rwsem_write(cpu);
-		sysfs_remove_link(kobj, "cpufreq");
-		return 0;
-	}
-#endif
-
-#ifdef CONFIG_SMP
 
 #ifdef CONFIG_HOTPLUG_CPU
 	strncpy(per_cpu(cpufreq_cpu_governor, cpu), data->governor->name,
 			CPUFREQ_NAME_LEN);
 #endif
 
-	/* if we have other CPUs still registered, we need to unlink them,
-	 * or else wait_for_completion below will lock up. Clean the
-	 * per_cpu(cpufreq_cpu_data) while holding the lock, and remove
-	 * the sysfs links afterwards.
-	 */
-	if (unlikely(cpumask_weight(data->cpus) > 1)) {
-		for_each_cpu(j, data->cpus) {
-			if (j == cpu)
-				continue;
-			per_cpu(cpufreq_cpu_data, j) = NULL;
-		}
-	}
-
-	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
+	per_cpu(cpufreq_cpu_data, cpu) = NULL;
+	cpus = cpumask_weight(data->cpus);
+	cpumask_clear_cpu(cpu, data->cpus);
 
-	if (unlikely(cpumask_weight(data->cpus) > 1)) {
-		for_each_cpu(j, data->cpus) {
-			if (j == cpu)
-				continue;
-			pr_debug("removing link for cpu %u\n", j);
-#ifdef CONFIG_HOTPLUG_CPU
-			strncpy(per_cpu(cpufreq_cpu_governor, j),
-				data->governor->name, CPUFREQ_NAME_LEN);
-#endif
-			cpu_dev = get_cpu_device(j);
-			kobj = &cpu_dev->kobj;
+	if (unlikely((cpu == data->cpu) && (cpus > 1))) {
+		/* first sibling now owns the new sysfs dir */
+		cpu_dev = get_cpu_device(cpumask_first(data->cpus));
+		sysfs_remove_link(&cpu_dev->kobj, "cpufreq");
+		ret = kobject_move(&data->kobj, &cpu_dev->kobj);
+		if (ret) {
+			pr_err("%s: Failed to move kobj: %d", __func__, ret);
+			cpumask_set_cpu(cpu, data->cpus);
+			ret = sysfs_create_link(&cpu_dev->kobj, &data->kobj,
+					"cpufreq");
+			spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
 			unlock_policy_rwsem_write(cpu);
-			sysfs_remove_link(kobj, "cpufreq");
-			lock_policy_rwsem_write(cpu);
-			cpufreq_cpu_put(data);
+			return -EINVAL;
 		}
+
+		update_policy_cpu(data, cpu_dev->id);
+		pr_debug("%s: policy Kobject moved to cpu: %d from: %d\n",
+				__func__, cpu_dev->id, cpu);
 	}
-#else
-	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
-#endif
 
-	if (cpufreq_driver->target)
-		__cpufreq_governor(data, CPUFREQ_GOV_STOP);
+	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
 
-	kobj = &data->kobj;
-	cmp = &data->kobj_unregister;
+	pr_debug("%s: removing link, cpu: %d\n", __func__, cpu);
+	cpufreq_cpu_put(data);
 	unlock_policy_rwsem_write(cpu);
-	kobject_put(kobj);
+	sysfs_remove_link(&dev->kobj, "cpufreq");
 
-	/* we need to make sure that the underlying kobj is actually
-	 * not referenced anymore by anybody before we proceed with
-	 * unloading.
-	 */
-	pr_debug("waiting for dropping of refcount\n");
-	wait_for_completion(cmp);
-	pr_debug("wait complete\n");
-
-	lock_policy_rwsem_write(cpu);
-	if (cpufreq_driver->exit)
-		cpufreq_driver->exit(data);
-	unlock_policy_rwsem_write(cpu);
+	/* If cpu is last user of policy, free policy */
+	if (cpus == 1) {
+		lock_policy_rwsem_write(cpu);
+		kobj = &data->kobj;
+		cmp = &data->kobj_unregister;
+		unlock_policy_rwsem_write(cpu);
+		kobject_put(kobj);
 
-#ifdef CONFIG_HOTPLUG_CPU
-	/* when the CPU which is the parent of the kobj is hotplugged
-	 * offline, check for siblings, and create cpufreq sysfs interface
-	 * and symlinks
-	 */
-	if (unlikely(cpumask_weight(data->cpus) > 1)) {
-		/* first sibling now owns the new sysfs dir */
-		cpumask_clear_cpu(cpu, data->cpus);
-		cpufreq_add_dev(get_cpu_device(cpumask_first(data->cpus)), NULL);
+		/* we need to make sure that the underlying kobj is actually
+		 * not referenced anymore by anybody before we proceed with
+		 * unloading.
+		 */
+		pr_debug("waiting for dropping of refcount\n");
+		wait_for_completion(cmp);
+		pr_debug("wait complete\n");
 
-		/* finally remove our own symlink */
 		lock_policy_rwsem_write(cpu);
-		__cpufreq_remove_dev(dev, sif);
-	}
-#endif
+		if (cpufreq_driver->exit)
+			cpufreq_driver->exit(data);
+		unlock_policy_rwsem_write(cpu);
 
-	free_cpumask_var(data->related_cpus);
-	free_cpumask_var(data->cpus);
-	kfree(data);
+		free_cpumask_var(data->related_cpus);
+		free_cpumask_var(data->cpus);
+		kfree(data);
+	} else if (cpufreq_driver->target) {
+		__cpufreq_governor(data, CPUFREQ_GOV_START);
+		__cpufreq_governor(data, CPUFREQ_GOV_LIMITS);
+	}
 
 	return 0;
 }
diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c
index 9d7732b81044..beef6b54382b 100644
--- a/drivers/cpufreq/cpufreq_stats.c
+++ b/drivers/cpufreq/cpufreq_stats.c
@@ -170,11 +170,13 @@ static int freq_table_get_index(struct cpufreq_stats *stat, unsigned int freq)
 static void cpufreq_stats_free_table(unsigned int cpu)
 {
 	struct cpufreq_stats *stat = per_cpu(cpufreq_stats_table, cpu);
+
 	if (stat) {
+		pr_debug("%s: Free stat table\n", __func__);
 		kfree(stat->time_in_state);
 		kfree(stat);
+		per_cpu(cpufreq_stats_table, cpu) = NULL;
 	}
-	per_cpu(cpufreq_stats_table, cpu) = NULL;
 }
 
 /* must be called early in the CPU removal sequence (before
@@ -183,8 +185,10 @@ static void cpufreq_stats_free_table(unsigned int cpu)
 static void cpufreq_stats_free_sysfs(unsigned int cpu)
 {
 	struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
-	if (policy && policy->cpu == cpu)
+	if (policy && (cpumask_weight(policy->cpus) == 1)) {
+		pr_debug("%s: Free sysfs stat\n", __func__);
 		sysfs_remove_group(&policy->kobj, &stats_attr_group);
+	}
 	if (policy)
 		cpufreq_cpu_put(policy);
 }
@@ -262,6 +266,19 @@ error_get_fail:
 	return ret;
 }
 
+static void cpufreq_stats_update_policy_cpu(struct cpufreq_policy *policy)
+{
+	struct cpufreq_stats *stat = per_cpu(cpufreq_stats_table,
+			policy->last_cpu);
+
+	pr_debug("Updating stats_table for new_cpu %u from last_cpu %u\n",
+			policy->cpu, policy->last_cpu);
+	per_cpu(cpufreq_stats_table, policy->cpu) = per_cpu(cpufreq_stats_table,
+			policy->last_cpu);
+	per_cpu(cpufreq_stats_table, policy->last_cpu) = NULL;
+	stat->cpu = policy->cpu;
+}
+
 static int cpufreq_stat_notifier_policy(struct notifier_block *nb,
 		unsigned long val, void *data)
 {
@@ -269,6 +286,12 @@ static int cpufreq_stat_notifier_policy(struct notifier_block *nb,
 	struct cpufreq_policy *policy = data;
 	struct cpufreq_frequency_table *table;
 	unsigned int cpu = policy->cpu;
+
+	if (val == CPUFREQ_UPDATE_POLICY_CPU) {
+		cpufreq_stats_update_policy_cpu(policy);
+		return 0;
+	}
+
 	if (val != CPUFREQ_NOTIFY)
 		return 0;
 	table = cpufreq_frequency_get_table(cpu);
diff --git a/drivers/cpufreq/freq_table.c b/drivers/cpufreq/freq_table.c
index 49cda256efb2..aa5bd39d129e 100644
--- a/drivers/cpufreq/freq_table.c
+++ b/drivers/cpufreq/freq_table.c
@@ -227,6 +227,15 @@ void cpufreq_frequency_table_put_attr(unsigned int cpu)
 }
 EXPORT_SYMBOL_GPL(cpufreq_frequency_table_put_attr);
 
+void cpufreq_frequency_table_update_policy_cpu(struct cpufreq_policy *policy)
+{
+	pr_debug("Updating show_table for new_cpu %u from last_cpu %u\n",
+			policy->cpu, policy->last_cpu);
+	per_cpu(cpufreq_show_table, policy->cpu) = per_cpu(cpufreq_show_table,
+			policy->last_cpu);
+	per_cpu(cpufreq_show_table, policy->last_cpu) = NULL;
+}
+
 struct cpufreq_frequency_table *cpufreq_frequency_get_table(unsigned int cpu)
 {
 	return per_cpu(cpufreq_show_table, cpu);
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index a55b88eaf96a..52be2d0c994a 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -93,7 +93,9 @@ struct cpufreq_policy {
 	cpumask_var_t		related_cpus; /* CPUs with any coordination */
 	unsigned int		shared_type; /* ANY or ALL affected CPUs
 						should set cpufreq */
-	unsigned int		cpu;    /* cpu nr of registered CPU */
+	unsigned int		cpu;    /* cpu nr of CPU managing this policy */
+	unsigned int		last_cpu; /* cpu nr of previous CPU that managed
+					   * this policy */
 	struct cpufreq_cpuinfo	cpuinfo;/* see above */
 
 	unsigned int		min;    /* in kHz */
@@ -112,10 +114,11 @@ struct cpufreq_policy {
 	struct completion	kobj_unregister;
 };
 
-#define CPUFREQ_ADJUST		(0)
-#define CPUFREQ_INCOMPATIBLE	(1)
-#define CPUFREQ_NOTIFY		(2)
-#define CPUFREQ_START		(3)
+#define CPUFREQ_ADJUST			(0)
+#define CPUFREQ_INCOMPATIBLE		(1)
+#define CPUFREQ_NOTIFY			(2)
+#define CPUFREQ_START			(3)
+#define CPUFREQ_UPDATE_POLICY_CPU	(4)
 
 #define CPUFREQ_SHARED_TYPE_NONE (0) /* None */
 #define CPUFREQ_SHARED_TYPE_HW	 (1) /* HW does needed coordination */
@@ -405,6 +408,7 @@ extern struct freq_attr cpufreq_freq_attr_scaling_available_freqs;
 
 void cpufreq_frequency_table_get_attr(struct cpufreq_frequency_table *table,
 				      unsigned int cpu);
+void cpufreq_frequency_table_update_policy_cpu(struct cpufreq_policy *policy);
 
 void cpufreq_frequency_table_put_attr(unsigned int cpu);
 #endif /* _LINUX_CPUFREQ_H */
-- 
cgit v1.2.3


From 9d95046e5d6afd6d7ae86fb71ab59c6faf0db8de Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Sun, 20 Jan 2013 10:24:28 +0000
Subject: cpufreq: Add a get_current_driver helper

Add a helper function to return cpufreq_driver->name.

Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq.c | 14 ++++++++++++++
 include/linux/cpufreq.h   |  1 +
 2 files changed, 15 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 2417576393a6..216c104d51ad 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -1395,6 +1395,20 @@ static struct syscore_ops cpufreq_syscore_ops = {
 	.resume		= cpufreq_bp_resume,
 };
 
+/**
+ *	cpufreq_get_current_driver - return current driver's name
+ *
+ *	Return the name string of the currently loaded cpufreq driver
+ *	or NULL, if none.
+ */
+const char *cpufreq_get_current_driver(void)
+{
+	if (cpufreq_driver)
+		return cpufreq_driver->name;
+
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(cpufreq_get_current_driver);
 
 /*********************************************************************
  *                     NOTIFIER LISTS INTERFACE                      *
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 52be2d0c994a..1f3a726640e8 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -411,4 +411,5 @@ void cpufreq_frequency_table_get_attr(struct cpufreq_frequency_table *table,
 void cpufreq_frequency_table_update_policy_cpu(struct cpufreq_policy *policy);
 
 void cpufreq_frequency_table_put_attr(unsigned int cpu);
+const char *cpufreq_get_current_driver(void);
 #endif /* _LINUX_CPUFREQ_H */
-- 
cgit v1.2.3


From ab45bd9bed36ad6c471b090bb8846ab7228fdf11 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Mon, 28 Jan 2013 14:50:39 +0100
Subject: cpufreq: Sort function prototypes properly

Move function prototypes to a place where they logically fit better.

Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/cpufreq.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 1f3a726640e8..5fdc6c6e3f8a 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -311,6 +311,9 @@ __ATTR(_name, 0444, show_##_name, NULL)
 static struct global_attr _name =		\
 __ATTR(_name, 0644, show_##_name, store_##_name)
 
+struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu);
+void cpufreq_cpu_put(struct cpufreq_policy *data);
+const char *cpufreq_get_current_driver(void);
 
 /*********************************************************************
  *                        CPUFREQ 2.6. INTERFACE                     *
@@ -400,8 +403,6 @@ int cpufreq_frequency_table_target(struct cpufreq_policy *policy,
 
 /* the following 3 funtions are for cpufreq core use only */
 struct cpufreq_frequency_table *cpufreq_frequency_get_table(unsigned int cpu);
-struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu);
-void   cpufreq_cpu_put(struct cpufreq_policy *data);
 
 /* the following are really really optional */
 extern struct freq_attr cpufreq_freq_attr_scaling_available_freqs;
@@ -411,5 +412,4 @@ void cpufreq_frequency_table_get_attr(struct cpufreq_frequency_table *table,
 void cpufreq_frequency_table_update_policy_cpu(struct cpufreq_policy *policy);
 
 void cpufreq_frequency_table_put_attr(unsigned int cpu);
-const char *cpufreq_get_current_driver(void);
 #endif /* _LINUX_CPUFREQ_H */
-- 
cgit v1.2.3


From 300586778d405f0a4d1f6dc51fcfb4fed567d020 Mon Sep 17 00:00:00 2001
From: Rob Herring <rob.herring@calxeda.com>
Date: Mon, 28 Jan 2013 16:13:14 +0000
Subject: ARM / highbank: add support for pl320 IPC

The pl320 IPC allows for interprocessor communication between the
highbank A9 and the EnergyCore Management Engine. The pl320 implements
a straightforward mailbox protocol.

Signed-off-by: Mark Langsdorf <mark.langsdorf@calxeda.com>
Signed-off-by: Rob Herring <rob.herring@calxeda.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 arch/arm/mach-highbank/Kconfig |   2 +
 drivers/Kconfig                |   2 +
 drivers/Makefile               |   1 +
 drivers/mailbox/Kconfig        |  19 ++++
 drivers/mailbox/Makefile       |   1 +
 drivers/mailbox/pl320-ipc.c    | 199 +++++++++++++++++++++++++++++++++++++++++
 include/linux/mailbox.h        |  17 ++++
 7 files changed, 241 insertions(+)
 create mode 100644 drivers/mailbox/Kconfig
 create mode 100644 drivers/mailbox/Makefile
 create mode 100644 drivers/mailbox/pl320-ipc.c
 create mode 100644 include/linux/mailbox.h

(limited to 'include/linux')

diff --git a/arch/arm/mach-highbank/Kconfig b/arch/arm/mach-highbank/Kconfig
index 551c97e87a78..2388085beaa2 100644
--- a/arch/arm/mach-highbank/Kconfig
+++ b/arch/arm/mach-highbank/Kconfig
@@ -11,5 +11,7 @@ config ARCH_HIGHBANK
 	select GENERIC_CLOCKEVENTS
 	select HAVE_ARM_SCU
 	select HAVE_SMP
+	select MAILBOX
+	select PL320_MBOX
 	select SPARSE_IRQ
 	select USE_OF
diff --git a/drivers/Kconfig b/drivers/Kconfig
index f5fb0722a63a..2b4e89ba15ad 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -134,6 +134,8 @@ source "drivers/hwspinlock/Kconfig"
 
 source "drivers/clocksource/Kconfig"
 
+source "drivers/mailbox/Kconfig"
+
 source "drivers/iommu/Kconfig"
 
 source "drivers/remoteproc/Kconfig"
diff --git a/drivers/Makefile b/drivers/Makefile
index 7863b9fee50b..a8d32f1094b4 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -130,6 +130,7 @@ obj-y				+= platform/
 #common clk code
 obj-y				+= clk/
 
+obj-$(CONFIG_MAILBOX)		+= mailbox/
 obj-$(CONFIG_HWSPINLOCK)	+= hwspinlock/
 obj-$(CONFIG_NFC)		+= nfc/
 obj-$(CONFIG_IOMMU_SUPPORT)	+= iommu/
diff --git a/drivers/mailbox/Kconfig b/drivers/mailbox/Kconfig
new file mode 100644
index 000000000000..9545c9f03809
--- /dev/null
+++ b/drivers/mailbox/Kconfig
@@ -0,0 +1,19 @@
+menuconfig MAILBOX
+	bool "Mailbox Hardware Support"
+	help
+	  Mailbox is a framework to control hardware communication between
+	  on-chip processors through queued messages and interrupt driven
+	  signals. Say Y if your platform supports hardware mailboxes.
+
+if MAILBOX
+config PL320_MBOX
+	bool "ARM PL320 Mailbox"
+	depends on ARM_AMBA
+	help
+	  An implementation of the ARM PL320 Interprocessor Communication
+	  Mailbox (IPCM), tailored for the Calxeda Highbank. It is used to
+	  send short messages between Highbank's A9 cores and the EnergyCore
+	  Management Engine, primarily for cpufreq. Say Y here if you want
+	  to use the PL320 IPCM support.
+
+endif
diff --git a/drivers/mailbox/Makefile b/drivers/mailbox/Makefile
new file mode 100644
index 000000000000..543ad6a79505
--- /dev/null
+++ b/drivers/mailbox/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_PL320_MBOX)	+= pl320-ipc.o
diff --git a/drivers/mailbox/pl320-ipc.c b/drivers/mailbox/pl320-ipc.c
new file mode 100644
index 000000000000..c45b3aedafba
--- /dev/null
+++ b/drivers/mailbox/pl320-ipc.c
@@ -0,0 +1,199 @@
+/*
+ * Copyright 2012 Calxeda, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <linux/types.h>
+#include <linux/err.h>
+#include <linux/delay.h>
+#include <linux/export.h>
+#include <linux/io.h>
+#include <linux/interrupt.h>
+#include <linux/completion.h>
+#include <linux/mutex.h>
+#include <linux/notifier.h>
+#include <linux/spinlock.h>
+#include <linux/device.h>
+#include <linux/amba/bus.h>
+
+#include <linux/mailbox.h>
+
+#define IPCMxSOURCE(m)		((m) * 0x40)
+#define IPCMxDSET(m)		(((m) * 0x40) + 0x004)
+#define IPCMxDCLEAR(m)		(((m) * 0x40) + 0x008)
+#define IPCMxDSTATUS(m)		(((m) * 0x40) + 0x00C)
+#define IPCMxMODE(m)		(((m) * 0x40) + 0x010)
+#define IPCMxMSET(m)		(((m) * 0x40) + 0x014)
+#define IPCMxMCLEAR(m)		(((m) * 0x40) + 0x018)
+#define IPCMxMSTATUS(m)		(((m) * 0x40) + 0x01C)
+#define IPCMxSEND(m)		(((m) * 0x40) + 0x020)
+#define IPCMxDR(m, dr)		(((m) * 0x40) + ((dr) * 4) + 0x024)
+
+#define IPCMMIS(irq)		(((irq) * 8) + 0x800)
+#define IPCMRIS(irq)		(((irq) * 8) + 0x804)
+
+#define MBOX_MASK(n)		(1 << (n))
+#define IPC_TX_MBOX		1
+#define IPC_RX_MBOX		2
+
+#define CHAN_MASK(n)		(1 << (n))
+#define A9_SOURCE		1
+#define M3_SOURCE		0
+
+static void __iomem *ipc_base;
+static int ipc_irq;
+static DEFINE_MUTEX(ipc_m1_lock);
+static DECLARE_COMPLETION(ipc_completion);
+static ATOMIC_NOTIFIER_HEAD(ipc_notifier);
+
+static inline void set_destination(int source, int mbox)
+{
+	__raw_writel(CHAN_MASK(source), ipc_base + IPCMxDSET(mbox));
+	__raw_writel(CHAN_MASK(source), ipc_base + IPCMxMSET(mbox));
+}
+
+static inline void clear_destination(int source, int mbox)
+{
+	__raw_writel(CHAN_MASK(source), ipc_base + IPCMxDCLEAR(mbox));
+	__raw_writel(CHAN_MASK(source), ipc_base + IPCMxMCLEAR(mbox));
+}
+
+static void __ipc_send(int mbox, u32 *data)
+{
+	int i;
+	for (i = 0; i < 7; i++)
+		__raw_writel(data[i], ipc_base + IPCMxDR(mbox, i));
+	__raw_writel(0x1, ipc_base + IPCMxSEND(mbox));
+}
+
+static u32 __ipc_rcv(int mbox, u32 *data)
+{
+	int i;
+	for (i = 0; i < 7; i++)
+		data[i] = __raw_readl(ipc_base + IPCMxDR(mbox, i));
+	return data[1];
+}
+
+/* blocking implmentation from the A9 side, not usuable in interrupts! */
+int pl320_ipc_transmit(u32 *data)
+{
+	int ret;
+
+	mutex_lock(&ipc_m1_lock);
+
+	init_completion(&ipc_completion);
+	__ipc_send(IPC_TX_MBOX, data);
+	ret = wait_for_completion_timeout(&ipc_completion,
+					  msecs_to_jiffies(1000));
+	if (ret == 0) {
+		ret = -ETIMEDOUT;
+		goto out;
+	}
+
+	ret = __ipc_rcv(IPC_TX_MBOX, data);
+out:
+	mutex_unlock(&ipc_m1_lock);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(pl320_ipc_transmit);
+
+static irqreturn_t ipc_handler(int irq, void *dev)
+{
+	u32 irq_stat;
+	u32 data[7];
+
+	irq_stat = __raw_readl(ipc_base + IPCMMIS(1));
+	if (irq_stat & MBOX_MASK(IPC_TX_MBOX)) {
+		__raw_writel(0, ipc_base + IPCMxSEND(IPC_TX_MBOX));
+		complete(&ipc_completion);
+	}
+	if (irq_stat & MBOX_MASK(IPC_RX_MBOX)) {
+		__ipc_rcv(IPC_RX_MBOX, data);
+		atomic_notifier_call_chain(&ipc_notifier, data[0], data + 1);
+		__raw_writel(2, ipc_base + IPCMxSEND(IPC_RX_MBOX));
+	}
+
+	return IRQ_HANDLED;
+}
+
+int pl320_ipc_register_notifier(struct notifier_block *nb)
+{
+	return atomic_notifier_chain_register(&ipc_notifier, nb);
+}
+EXPORT_SYMBOL_GPL(pl320_ipc_register_notifier);
+
+int pl320_ipc_unregister_notifier(struct notifier_block *nb)
+{
+	return atomic_notifier_chain_unregister(&ipc_notifier, nb);
+}
+EXPORT_SYMBOL_GPL(pl320_ipc_unregister_notifier);
+
+static int __init pl320_probe(struct amba_device *adev,
+				const struct amba_id *id)
+{
+	int ret;
+
+	ipc_base = ioremap(adev->res.start, resource_size(&adev->res));
+	if (ipc_base == NULL)
+		return -ENOMEM;
+
+	__raw_writel(0, ipc_base + IPCMxSEND(IPC_TX_MBOX));
+
+	ipc_irq = adev->irq[0];
+	ret = request_irq(ipc_irq, ipc_handler, 0, dev_name(&adev->dev), NULL);
+	if (ret < 0)
+		goto err;
+
+	/* Init slow mailbox */
+	__raw_writel(CHAN_MASK(A9_SOURCE),
+			ipc_base + IPCMxSOURCE(IPC_TX_MBOX));
+	__raw_writel(CHAN_MASK(M3_SOURCE),
+			ipc_base + IPCMxDSET(IPC_TX_MBOX));
+	__raw_writel(CHAN_MASK(M3_SOURCE) | CHAN_MASK(A9_SOURCE),
+		     ipc_base + IPCMxMSET(IPC_TX_MBOX));
+
+	/* Init receive mailbox */
+	__raw_writel(CHAN_MASK(M3_SOURCE),
+			ipc_base + IPCMxSOURCE(IPC_RX_MBOX));
+	__raw_writel(CHAN_MASK(A9_SOURCE),
+			ipc_base + IPCMxDSET(IPC_RX_MBOX));
+	__raw_writel(CHAN_MASK(M3_SOURCE) | CHAN_MASK(A9_SOURCE),
+		     ipc_base + IPCMxMSET(IPC_RX_MBOX));
+
+	return 0;
+err:
+	iounmap(ipc_base);
+	return ret;
+}
+
+static struct amba_id pl320_ids[] = {
+	{
+		.id	= 0x00041320,
+		.mask	= 0x000fffff,
+	},
+	{ 0, 0 },
+};
+
+static struct amba_driver pl320_driver = {
+	.drv = {
+		.name	= "pl320",
+	},
+	.id_table	= pl320_ids,
+	.probe		= pl320_probe,
+};
+
+static int __init ipc_init(void)
+{
+	return amba_driver_register(&pl320_driver);
+}
+module_init(ipc_init);
diff --git a/include/linux/mailbox.h b/include/linux/mailbox.h
new file mode 100644
index 000000000000..5161f63ec1c8
--- /dev/null
+++ b/include/linux/mailbox.h
@@ -0,0 +1,17 @@
+/*
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+int pl320_ipc_transmit(u32 *data);
+int pl320_ipc_register_notifier(struct notifier_block *nb);
+int pl320_ipc_unregister_notifier(struct notifier_block *nb);
-- 
cgit v1.2.3


From 951fc5f45836988c7df1d05c7f4658f331e7a920 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Thu, 31 Jan 2013 02:03:53 +0000
Subject: cpufreq: Update Documentation for cpus and related_cpus

Documentation related to cpus and related_cpus is confusing and not very clear.
Over that CPUFreq core has seen much changes recently. Lets update documentation
and comments for cpus and related_cpus.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/cpu-freq/cpu-drivers.txt | 6 ++++++
 Documentation/cpu-freq/user-guide.txt  | 8 ++++----
 include/linux/cpufreq.h                | 6 ++++--
 3 files changed, 14 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/cpu-freq/cpu-drivers.txt b/Documentation/cpu-freq/cpu-drivers.txt
index c436096351f8..72f70b16d299 100644
--- a/Documentation/cpu-freq/cpu-drivers.txt
+++ b/Documentation/cpu-freq/cpu-drivers.txt
@@ -111,6 +111,12 @@ policy->governor		must contain the "default policy" for
 For setting some of these values, the frequency table helpers might be
 helpful. See the section 2 for more information on them.
 
+SMP systems normally have same clock source for a group of cpus. For these the
+.init() would be called only once for the first online cpu. Here the .init()
+routine must initialize policy->cpus with mask of all possible cpus (Online +
+Offline) that share the clock. Then the core would copy this mask onto
+policy->related_cpus and will reset policy->cpus to carry only online cpus.
+
 
 1.3 verify
 ------------
diff --git a/Documentation/cpu-freq/user-guide.txt b/Documentation/cpu-freq/user-guide.txt
index 04f6b32993e6..ff2f28332cc4 100644
--- a/Documentation/cpu-freq/user-guide.txt
+++ b/Documentation/cpu-freq/user-guide.txt
@@ -190,11 +190,11 @@ scaling_max_freq		show the current "policy limits" (in
 				first set scaling_max_freq, then
 				scaling_min_freq.
 
-affected_cpus :			List of CPUs that require software coordination
-				of frequency.
+affected_cpus :			List of Online CPUs that require software
+				coordination of frequency.
 
-related_cpus :			List of CPUs that need some sort of frequency
-				coordination, whether software or hardware.
+related_cpus :			List of Online + Offline CPUs that need software
+				coordination of frequency.
 
 scaling_driver :		Hardware driver for cpufreq.
 
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 5fdc6c6e3f8a..753b198750cf 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -89,8 +89,10 @@ struct cpufreq_real_policy {
 };
 
 struct cpufreq_policy {
-	cpumask_var_t		cpus;	/* CPUs requiring sw coordination */
-	cpumask_var_t		related_cpus; /* CPUs with any coordination */
+	/* CPUs sharing clock, require sw coordination */
+	cpumask_var_t		cpus;	/* Online CPUs only */
+	cpumask_var_t		related_cpus; /* Online + Offline CPUs */
+
 	unsigned int		shared_type; /* ANY or ALL affected CPUs
 						should set cpufreq */
 	unsigned int		cpu;    /* cpu nr of CPU managing this policy */
-- 
cgit v1.2.3


From 2624f90c16413990ecb0414400174a066319a9f5 Mon Sep 17 00:00:00 2001
From: Fabio Baltieri <fabio.baltieri@linaro.org>
Date: Thu, 31 Jan 2013 09:44:40 +0000
Subject: cpufreq: governors: implement generic policy_is_shared

Implement a generic helper function policy_is_shared() to replace the
current dbs_sw_coordinated_cpus() at cpufreq level, so that it can be
used by code other than cpufreq governors.

Suggested-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Fabio Baltieri <fabio.baltieri@linaro.org>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/acpi-cpufreq.c         | 2 +-
 drivers/cpufreq/cpufreq_conservative.c | 2 +-
 drivers/cpufreq/cpufreq_governor.c     | 8 --------
 drivers/cpufreq/cpufreq_governor.h     | 1 -
 drivers/cpufreq/cpufreq_ondemand.c     | 2 +-
 drivers/cpufreq/cpufreq_stats.c        | 2 +-
 include/linux/cpufreq.h                | 5 +++++
 7 files changed, 9 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c
index 22f9b47c5c74..937bc286591f 100644
--- a/drivers/cpufreq/acpi-cpufreq.c
+++ b/drivers/cpufreq/acpi-cpufreq.c
@@ -734,7 +734,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
 
 #ifdef CONFIG_SMP
 	dmi_check_system(sw_any_bug_dmi_table);
-	if (bios_with_sw_any_bug && cpumask_weight(policy->cpus) == 1) {
+	if (bios_with_sw_any_bug && !policy_is_shared(policy)) {
 		policy->shared_type = CPUFREQ_SHARED_TYPE_ALL;
 		cpumask_copy(policy->cpus, cpu_core_mask(cpu));
 	}
diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c
index 5d8e8942ec97..653fb0652412 100644
--- a/drivers/cpufreq/cpufreq_conservative.c
+++ b/drivers/cpufreq/cpufreq_conservative.c
@@ -154,7 +154,7 @@ static void cs_dbs_timer(struct work_struct *work)
 	struct cs_cpu_dbs_info_s *dbs_info = container_of(work,
 			struct cs_cpu_dbs_info_s, cdbs.work.work);
 
-	if (dbs_sw_coordinated_cpus(&dbs_info->cdbs)) {
+	if (policy_is_shared(dbs_info->cdbs.cur_policy)) {
 		cs_timer_coordinated(dbs_info, dw);
 	} else {
 		mutex_lock(&dbs_info->cdbs.timer_mutex);
diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
index 46f96a4cebf9..67e235acf43b 100644
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c
@@ -161,14 +161,6 @@ void dbs_check_cpu(struct dbs_data *dbs_data, int cpu)
 }
 EXPORT_SYMBOL_GPL(dbs_check_cpu);
 
-bool dbs_sw_coordinated_cpus(struct cpu_dbs_common_info *cdbs)
-{
-	struct cpufreq_policy *policy = cdbs->cur_policy;
-
-	return cpumask_weight(policy->cpus) > 1;
-}
-EXPORT_SYMBOL_GPL(dbs_sw_coordinated_cpus);
-
 static inline void dbs_timer_init(struct dbs_data *dbs_data, int cpu,
 				  unsigned int sampling_rate)
 {
diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h
index aaf073daa9fb..b72e628e7ed6 100644
--- a/drivers/cpufreq/cpufreq_governor.h
+++ b/drivers/cpufreq/cpufreq_governor.h
@@ -172,7 +172,6 @@ static inline int delay_for_sampling_rate(unsigned int sampling_rate)
 
 u64 get_cpu_idle_time(unsigned int cpu, u64 *wall);
 void dbs_check_cpu(struct dbs_data *dbs_data, int cpu);
-bool dbs_sw_coordinated_cpus(struct cpu_dbs_common_info *cdbs);
 int cpufreq_governor_dbs(struct dbs_data *dbs_data,
 		struct cpufreq_policy *policy, unsigned int event);
 #endif /* _CPUFREQ_GOVERNER_H */
diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index 1017b90b902e..5ae84ffeafb3 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -277,7 +277,7 @@ static void od_dbs_timer(struct work_struct *work)
 	struct od_cpu_dbs_info_s *dbs_info =
 		container_of(work, struct od_cpu_dbs_info_s, cdbs.work.work);
 
-	if (dbs_sw_coordinated_cpus(&dbs_info->cdbs)) {
+	if (policy_is_shared(dbs_info->cdbs.cur_policy)) {
 		od_timer_coordinated(dbs_info, dw);
 	} else {
 		mutex_lock(&dbs_info->cdbs.timer_mutex);
diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c
index beef6b54382b..572124c6e36b 100644
--- a/drivers/cpufreq/cpufreq_stats.c
+++ b/drivers/cpufreq/cpufreq_stats.c
@@ -185,7 +185,7 @@ static void cpufreq_stats_free_table(unsigned int cpu)
 static void cpufreq_stats_free_sysfs(unsigned int cpu)
 {
 	struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
-	if (policy && (cpumask_weight(policy->cpus) == 1)) {
+	if (policy && !policy_is_shared(policy)) {
 		pr_debug("%s: Free sysfs stat\n", __func__);
 		sysfs_remove_group(&policy->kobj, &stats_attr_group);
 	}
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 753b198750cf..feb360c8aa88 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -127,6 +127,11 @@ struct cpufreq_policy {
 #define CPUFREQ_SHARED_TYPE_ALL	 (2) /* All dependent CPUs should set freq */
 #define CPUFREQ_SHARED_TYPE_ANY	 (3) /* Freq can be set from any dependent CPU*/
 
+static inline bool policy_is_shared(struct cpufreq_policy *policy)
+{
+	return cpumask_weight(policy->cpus) > 1;
+}
+
 /******************** cpufreq transition notifiers *******************/
 
 #define CPUFREQ_PRECHANGE	(0)
-- 
cgit v1.2.3


From b394058f064848deac7a7cd6942b6521d7b3fe1d Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Fri, 1 Feb 2013 05:42:58 +0000
Subject: cpufreq: governors: Reset tunables only for
 cpufreq_unregister_governor()

Currently, whenever governor->governor() is called for CPUFRREQ_GOV_START event
we reset few tunables of governor. Which isn't correct, as this routine is
called for every cpu hot-[un]plugging event. We should actually be resetting
these only when the governor module is removed and re-installed.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq.c          |  4 ++++
 drivers/cpufreq/cpufreq_governor.c | 24 ++++++++++++++++--------
 include/linux/cpufreq.h            |  1 +
 3 files changed, 21 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 1cea7a1eac13..0b4be4481433 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -1562,6 +1562,9 @@ static int __cpufreq_governor(struct cpufreq_policy *policy,
 						policy->cpu, event);
 	ret = policy->governor->governor(policy, event);
 
+	if (!policy->governor->initialized && (event == CPUFREQ_GOV_START))
+		policy->governor->initialized = 1;
+
 	/* we keep one module reference alive for
 			each CPU governed by this CPU */
 	if ((event != CPUFREQ_GOV_START) || ret)
@@ -1585,6 +1588,7 @@ int cpufreq_register_governor(struct cpufreq_governor *governor)
 
 	mutex_lock(&cpufreq_governor_mutex);
 
+	governor->initialized = 0;
 	err = -EBUSY;
 	if (__find_governor(governor->name) == NULL) {
 		err = 0;
diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
index 7aaa9b151940..79795c4bf611 100644
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c
@@ -254,11 +254,6 @@ int cpufreq_governor_dbs(struct dbs_data *dbs_data,
 			return rc;
 		}
 
-		/* policy latency is in nS. Convert it to uS first */
-		latency = policy->cpuinfo.transition_latency / 1000;
-		if (latency == 0)
-			latency = 1;
-
 		/*
 		 * conservative does not implement micro like ondemand
 		 * governor, thus we are bound to jiffes/HZ
@@ -270,20 +265,33 @@ int cpufreq_governor_dbs(struct dbs_data *dbs_data,
 			cpufreq_register_notifier(cs_ops->notifier_block,
 					CPUFREQ_TRANSITION_NOTIFIER);
 
-			dbs_data->min_sampling_rate = MIN_SAMPLING_RATE_RATIO *
-				jiffies_to_usecs(10);
+			if (!policy->governor->initialized)
+				dbs_data->min_sampling_rate =
+					MIN_SAMPLING_RATE_RATIO *
+					jiffies_to_usecs(10);
 		} else {
 			od_dbs_info->rate_mult = 1;
 			od_dbs_info->sample_type = OD_NORMAL_SAMPLE;
 			od_ops->powersave_bias_init_cpu(cpu);
-			od_tuners->io_is_busy = od_ops->io_busy();
+
+			if (!policy->governor->initialized)
+				od_tuners->io_is_busy = od_ops->io_busy();
 		}
 
+		if (policy->governor->initialized)
+			goto unlock;
+
+		/* policy latency is in nS. Convert it to uS first */
+		latency = policy->cpuinfo.transition_latency / 1000;
+		if (latency == 0)
+			latency = 1;
+
 		/* Bring kernel and HW constraints together */
 		dbs_data->min_sampling_rate = max(dbs_data->min_sampling_rate,
 				MIN_LATENCY_MULTIPLIER * latency);
 		*sampling_rate = max(dbs_data->min_sampling_rate, latency *
 				LATENCY_MULTIPLIER);
+unlock:
 		mutex_unlock(&dbs_data->mutex);
 
 		/* Initiate timer time stamp */
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index feb360c8aa88..6bf3f2d12c90 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -183,6 +183,7 @@ static inline unsigned long cpufreq_scale(unsigned long old, u_int div, u_int mu
 
 struct cpufreq_governor {
 	char	name[CPUFREQ_NAME_LEN];
+	int	initialized;
 	int	(*governor)	(struct cpufreq_policy *policy,
 				 unsigned int event);
 	ssize_t	(*show_setspeed)	(struct cpufreq_policy *policy,
-- 
cgit v1.2.3


From 62b36cc1c83aca1cd252772e82cbc5d9ef8ff25b Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Fri, 1 Feb 2013 06:40:02 +0000
Subject: cpufreq: Remove unnecessary use of policy->shared_type

policy->shared_type field was added only for SoCs with ACPI support:

commit 3b2d99429e3386b6e2ac949fc72486509c8bbe36
Author: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Date:   Wed Dec 14 15:05:00 2005 -0500

    P-state software coordination for ACPI core

    http://bugzilla.kernel.org/show_bug.cgi?id=5737

Many non-ACPI systems are filling this field by mistake, which makes its usage
confusing. Lets clean it.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Acked-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 arch/arm/mach-tegra/cpu-tegra.c  | 1 -
 drivers/cpufreq/cpufreq-cpu0.c   | 1 -
 drivers/cpufreq/db8500-cpufreq.c | 2 --
 drivers/cpufreq/omap-cpufreq.c   | 4 +---
 include/linux/cpufreq.h          | 3 ++-
 5 files changed, 3 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-tegra/cpu-tegra.c b/arch/arm/mach-tegra/cpu-tegra.c
index e7ddcb2b5261..a36a03d3c9a0 100644
--- a/arch/arm/mach-tegra/cpu-tegra.c
+++ b/arch/arm/mach-tegra/cpu-tegra.c
@@ -243,7 +243,6 @@ static int tegra_cpu_init(struct cpufreq_policy *policy)
 	/* FIXME: what's the actual transition time? */
 	policy->cpuinfo.transition_latency = 300 * 1000;
 
-	policy->shared_type = CPUFREQ_SHARED_TYPE_ALL;
 	cpumask_copy(policy->cpus, cpu_possible_mask);
 
 	if (policy->cpu == 0)
diff --git a/drivers/cpufreq/cpufreq-cpu0.c b/drivers/cpufreq/cpufreq-cpu0.c
index a108f66271ee..4e5b7fb8927c 100644
--- a/drivers/cpufreq/cpufreq-cpu0.c
+++ b/drivers/cpufreq/cpufreq-cpu0.c
@@ -146,7 +146,6 @@ static int cpu0_cpufreq_init(struct cpufreq_policy *policy)
 	 * share the clock and voltage and clock.  Use cpufreq affected_cpus
 	 * interface to have all CPUs scaled together.
 	 */
-	policy->shared_type = CPUFREQ_SHARED_TYPE_ANY;
 	cpumask_setall(policy->cpus);
 
 	cpufreq_frequency_table_get_attr(freq_table, policy->cpu);
diff --git a/drivers/cpufreq/db8500-cpufreq.c b/drivers/cpufreq/db8500-cpufreq.c
index e12dff601b8a..79a84860ea56 100644
--- a/drivers/cpufreq/db8500-cpufreq.c
+++ b/drivers/cpufreq/db8500-cpufreq.c
@@ -130,8 +130,6 @@ static int __cpuinit db8500_cpufreq_init(struct cpufreq_policy *policy)
 	/* policy sharing between dual CPUs */
 	cpumask_setall(policy->cpus);
 
-	policy->shared_type = CPUFREQ_SHARED_TYPE_ALL;
-
 	return 0;
 }
 
diff --git a/drivers/cpufreq/omap-cpufreq.c b/drivers/cpufreq/omap-cpufreq.c
index 97102b05843f..9128c07bafba 100644
--- a/drivers/cpufreq/omap-cpufreq.c
+++ b/drivers/cpufreq/omap-cpufreq.c
@@ -214,10 +214,8 @@ static int __cpuinit omap_cpu_init(struct cpufreq_policy *policy)
 	 * interface to handle this scenario. Additional is_smp() check
 	 * is to keep SMP_ON_UP build working.
 	 */
-	if (is_smp()) {
-		policy->shared_type = CPUFREQ_SHARED_TYPE_ANY;
+	if (is_smp())
 		cpumask_setall(policy->cpus);
-	}
 
 	/* FIXME: what's the actual transition time? */
 	policy->cpuinfo.transition_latency = 300 * 1000;
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 6bf3f2d12c90..a22944ca0526 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -93,7 +93,7 @@ struct cpufreq_policy {
 	cpumask_var_t		cpus;	/* Online CPUs only */
 	cpumask_var_t		related_cpus; /* Online + Offline CPUs */
 
-	unsigned int		shared_type; /* ANY or ALL affected CPUs
+	unsigned int		shared_type; /* ACPI: ANY or ALL affected CPUs
 						should set cpufreq */
 	unsigned int		cpu;    /* cpu nr of CPU managing this policy */
 	unsigned int		last_cpu; /* cpu nr of previous CPU that managed
@@ -122,6 +122,7 @@ struct cpufreq_policy {
 #define CPUFREQ_START			(3)
 #define CPUFREQ_UPDATE_POLICY_CPU	(4)
 
+/* Only for ACPI */
 #define CPUFREQ_SHARED_TYPE_NONE (0) /* None */
 #define CPUFREQ_SHARED_TYPE_HW	 (1) /* HW does needed coordination */
 #define CPUFREQ_SHARED_TYPE_ALL	 (2) /* All dependent CPUs should set freq */
-- 
cgit v1.2.3


From cfad1ba87150e198be9ea32367a24e500e59de2c Mon Sep 17 00:00:00 2001
From: Eric Lapuyade <eric.lapuyade@linux.intel.com>
Date: Tue, 18 Dec 2012 14:53:53 +0100
Subject: NFC: Initial support for Inside Secure microread

Inside Secure microread is an HCI based NFC chipset.
This initial support includes reader and p2p (Target and initiator) modes.

Signed-off-by: Eric Lapuyade <eric.lapuyade@intel.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/nfc/Kconfig                     |   1 +
 drivers/nfc/Makefile                    |   1 +
 drivers/nfc/microread/Kconfig           |  13 +
 drivers/nfc/microread/Makefile          |   5 +
 drivers/nfc/microread/microread.c       | 728 ++++++++++++++++++++++++++++++++
 drivers/nfc/microread/microread.h       |  33 ++
 include/linux/platform_data/microread.h |  35 ++
 7 files changed, 816 insertions(+)
 create mode 100644 drivers/nfc/microread/Kconfig
 create mode 100644 drivers/nfc/microread/Makefile
 create mode 100644 drivers/nfc/microread/microread.c
 create mode 100644 drivers/nfc/microread/microread.h
 create mode 100644 include/linux/platform_data/microread.h

(limited to 'include/linux')

diff --git a/drivers/nfc/Kconfig b/drivers/nfc/Kconfig
index 80c728b28828..e57034971ccc 100644
--- a/drivers/nfc/Kconfig
+++ b/drivers/nfc/Kconfig
@@ -27,5 +27,6 @@ config NFC_WILINK
 	  into the kernel or say M to compile it as module.
 
 source "drivers/nfc/pn544/Kconfig"
+source "drivers/nfc/microread/Kconfig"
 
 endmenu
diff --git a/drivers/nfc/Makefile b/drivers/nfc/Makefile
index 574bbc04d97a..a189ada0926a 100644
--- a/drivers/nfc/Makefile
+++ b/drivers/nfc/Makefile
@@ -3,6 +3,7 @@
 #
 
 obj-$(CONFIG_NFC_PN544)		+= pn544/
+obj-$(CONFIG_NFC_MICROREAD)	+= microread/
 obj-$(CONFIG_NFC_PN533)		+= pn533.o
 obj-$(CONFIG_NFC_WILINK)	+= nfcwilink.o
 
diff --git a/drivers/nfc/microread/Kconfig b/drivers/nfc/microread/Kconfig
new file mode 100644
index 000000000000..5b89d011d098
--- /dev/null
+++ b/drivers/nfc/microread/Kconfig
@@ -0,0 +1,13 @@
+config NFC_MICROREAD
+	tristate "Inside Secure microread NFC driver"
+	depends on NFC_HCI
+	select CRC_CCITT
+	default n
+	---help---
+	  This module contains the main code for Inside Secure microread
+	  NFC chipsets. It implements the chipset HCI logic and hooks into
+	  the NFC kernel APIs. Physical layers will register against it.
+
+	  To compile this driver as a module, choose m here. The module will
+	  be called microread.
+	  Say N if unsure.
diff --git a/drivers/nfc/microread/Makefile b/drivers/nfc/microread/Makefile
new file mode 100644
index 000000000000..9ce2c53f49a7
--- /dev/null
+++ b/drivers/nfc/microread/Makefile
@@ -0,0 +1,5 @@
+#
+# Makefile for Microread HCI based NFC driver
+#
+
+obj-$(CONFIG_NFC_MICROREAD)     += microread.o
diff --git a/drivers/nfc/microread/microread.c b/drivers/nfc/microread/microread.c
new file mode 100644
index 000000000000..3420d833db17
--- /dev/null
+++ b/drivers/nfc/microread/microread.c
@@ -0,0 +1,728 @@
+/*
+ * HCI based Driver for Inside Secure microread NFC Chip
+ *
+ * Copyright (C) 2013  Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the
+ * Free Software Foundation, Inc.,
+ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include <linux/crc-ccitt.h>
+
+#include <linux/nfc.h>
+#include <net/nfc/nfc.h>
+#include <net/nfc/hci.h>
+#include <net/nfc/llc.h>
+
+#include "microread.h"
+
+/* Proprietary gates, events, commands and registers */
+/* Admin */
+#define MICROREAD_GATE_ID_ADM NFC_HCI_ADMIN_GATE
+#define MICROREAD_GATE_ID_MGT 0x01
+#define MICROREAD_GATE_ID_OS 0x02
+#define MICROREAD_GATE_ID_TESTRF 0x03
+#define MICROREAD_GATE_ID_LOOPBACK NFC_HCI_LOOPBACK_GATE
+#define MICROREAD_GATE_ID_IDT NFC_HCI_ID_MGMT_GATE
+#define MICROREAD_GATE_ID_LMS NFC_HCI_LINK_MGMT_GATE
+
+/* Reader */
+#define MICROREAD_GATE_ID_MREAD_GEN 0x10
+#define MICROREAD_GATE_ID_MREAD_ISO_B NFC_HCI_RF_READER_B_GATE
+#define MICROREAD_GATE_ID_MREAD_NFC_T1 0x12
+#define MICROREAD_GATE_ID_MREAD_ISO_A NFC_HCI_RF_READER_A_GATE
+#define MICROREAD_GATE_ID_MREAD_NFC_T3 0x14
+#define MICROREAD_GATE_ID_MREAD_ISO_15_3 0x15
+#define MICROREAD_GATE_ID_MREAD_ISO_15_2 0x16
+#define MICROREAD_GATE_ID_MREAD_ISO_B_3 0x17
+#define MICROREAD_GATE_ID_MREAD_BPRIME 0x18
+#define MICROREAD_GATE_ID_MREAD_ISO_A_3 0x19
+
+/* Card */
+#define MICROREAD_GATE_ID_MCARD_GEN 0x20
+#define MICROREAD_GATE_ID_MCARD_ISO_B 0x21
+#define MICROREAD_GATE_ID_MCARD_BPRIME 0x22
+#define MICROREAD_GATE_ID_MCARD_ISO_A 0x23
+#define MICROREAD_GATE_ID_MCARD_NFC_T3 0x24
+#define MICROREAD_GATE_ID_MCARD_ISO_15_3 0x25
+#define MICROREAD_GATE_ID_MCARD_ISO_15_2 0x26
+#define MICROREAD_GATE_ID_MCARD_ISO_B_2 0x27
+#define MICROREAD_GATE_ID_MCARD_ISO_CUSTOM 0x28
+#define MICROREAD_GATE_ID_SECURE_ELEMENT 0x2F
+
+/* P2P */
+#define MICROREAD_GATE_ID_P2P_GEN 0x30
+#define MICROREAD_GATE_ID_P2P_TARGET 0x31
+#define MICROREAD_PAR_P2P_TARGET_MODE 0x01
+#define MICROREAD_PAR_P2P_TARGET_GT 0x04
+#define MICROREAD_GATE_ID_P2P_INITIATOR 0x32
+#define MICROREAD_PAR_P2P_INITIATOR_GI 0x01
+#define MICROREAD_PAR_P2P_INITIATOR_GT 0x03
+
+/* Those pipes are created/opened by default in the chip */
+#define MICROREAD_PIPE_ID_LMS 0x00
+#define MICROREAD_PIPE_ID_ADMIN 0x01
+#define MICROREAD_PIPE_ID_MGT 0x02
+#define MICROREAD_PIPE_ID_OS 0x03
+#define MICROREAD_PIPE_ID_HDS_LOOPBACK 0x04
+#define MICROREAD_PIPE_ID_HDS_IDT 0x05
+#define MICROREAD_PIPE_ID_HDS_MCARD_ISO_B 0x08
+#define MICROREAD_PIPE_ID_HDS_MCARD_ISO_BPRIME 0x09
+#define MICROREAD_PIPE_ID_HDS_MCARD_ISO_A 0x0A
+#define MICROREAD_PIPE_ID_HDS_MCARD_ISO_15_3 0x0B
+#define MICROREAD_PIPE_ID_HDS_MCARD_ISO_15_2 0x0C
+#define MICROREAD_PIPE_ID_HDS_MCARD_NFC_T3 0x0D
+#define MICROREAD_PIPE_ID_HDS_MCARD_ISO_B_2 0x0E
+#define MICROREAD_PIPE_ID_HDS_MCARD_CUSTOM 0x0F
+#define MICROREAD_PIPE_ID_HDS_MREAD_ISO_B 0x10
+#define MICROREAD_PIPE_ID_HDS_MREAD_NFC_T1 0x11
+#define MICROREAD_PIPE_ID_HDS_MREAD_ISO_A 0x12
+#define MICROREAD_PIPE_ID_HDS_MREAD_ISO_15_3 0x13
+#define MICROREAD_PIPE_ID_HDS_MREAD_ISO_15_2 0x14
+#define MICROREAD_PIPE_ID_HDS_MREAD_NFC_T3 0x15
+#define MICROREAD_PIPE_ID_HDS_MREAD_ISO_B_3 0x16
+#define MICROREAD_PIPE_ID_HDS_MREAD_BPRIME 0x17
+#define MICROREAD_PIPE_ID_HDS_MREAD_ISO_A_3 0x18
+#define MICROREAD_PIPE_ID_HDS_MREAD_GEN 0x1B
+#define MICROREAD_PIPE_ID_HDS_STACKED_ELEMENT 0x1C
+#define MICROREAD_PIPE_ID_HDS_INSTANCES 0x1D
+#define MICROREAD_PIPE_ID_HDS_TESTRF 0x1E
+#define MICROREAD_PIPE_ID_HDS_P2P_TARGET 0x1F
+#define MICROREAD_PIPE_ID_HDS_P2P_INITIATOR 0x20
+
+/* Events */
+#define MICROREAD_EVT_MREAD_DISCOVERY_OCCURED NFC_HCI_EVT_TARGET_DISCOVERED
+#define MICROREAD_EVT_MREAD_CARD_FOUND 0x3D
+#define MICROREAD_EMCF_A_ATQA 0
+#define MICROREAD_EMCF_A_SAK 2
+#define MICROREAD_EMCF_A_LEN 3
+#define MICROREAD_EMCF_A_UID 4
+#define MICROREAD_EMCF_A3_ATQA 0
+#define MICROREAD_EMCF_A3_SAK 2
+#define MICROREAD_EMCF_A3_LEN 3
+#define MICROREAD_EMCF_A3_UID 4
+#define MICROREAD_EMCF_B_UID 0
+#define MICROREAD_EMCF_T1_ATQA 0
+#define MICROREAD_EMCF_T1_UID 4
+#define MICROREAD_EMCF_T3_UID 0
+#define MICROREAD_EVT_MREAD_DISCOVERY_START NFC_HCI_EVT_READER_REQUESTED
+#define MICROREAD_EVT_MREAD_DISCOVERY_START_SOME 0x3E
+#define MICROREAD_EVT_MREAD_DISCOVERY_STOP NFC_HCI_EVT_END_OPERATION
+#define MICROREAD_EVT_MREAD_SIM_REQUESTS 0x3F
+#define MICROREAD_EVT_MCARD_EXCHANGE NFC_HCI_EVT_TARGET_DISCOVERED
+#define MICROREAD_EVT_P2P_INITIATOR_EXCHANGE_TO_RF 0x20
+#define MICROREAD_EVT_P2P_INITIATOR_EXCHANGE_FROM_RF 0x21
+#define MICROREAD_EVT_MCARD_FIELD_ON 0x11
+#define MICROREAD_EVT_P2P_TARGET_ACTIVATED 0x13
+#define MICROREAD_EVT_P2P_TARGET_DEACTIVATED 0x12
+#define MICROREAD_EVT_MCARD_FIELD_OFF 0x14
+
+/* Commands */
+#define MICROREAD_CMD_MREAD_EXCHANGE 0x10
+#define MICROREAD_CMD_MREAD_SUBSCRIBE 0x3F
+
+/* Hosts IDs */
+#define MICROREAD_ELT_ID_HDS NFC_HCI_TERMINAL_HOST_ID
+#define MICROREAD_ELT_ID_SIM NFC_HCI_UICC_HOST_ID
+#define MICROREAD_ELT_ID_SE1 0x03
+#define MICROREAD_ELT_ID_SE2 0x04
+#define MICROREAD_ELT_ID_SE3 0x05
+
+static struct nfc_hci_gate microread_gates[] = {
+	{MICROREAD_GATE_ID_ADM, MICROREAD_PIPE_ID_ADMIN},
+	{MICROREAD_GATE_ID_LOOPBACK, MICROREAD_PIPE_ID_HDS_LOOPBACK},
+	{MICROREAD_GATE_ID_IDT, MICROREAD_PIPE_ID_HDS_IDT},
+	{MICROREAD_GATE_ID_LMS, MICROREAD_PIPE_ID_LMS},
+	{MICROREAD_GATE_ID_MREAD_ISO_B, MICROREAD_PIPE_ID_HDS_MREAD_ISO_B},
+	{MICROREAD_GATE_ID_MREAD_ISO_A, MICROREAD_PIPE_ID_HDS_MREAD_ISO_A},
+	{MICROREAD_GATE_ID_MREAD_ISO_A_3, MICROREAD_PIPE_ID_HDS_MREAD_ISO_A_3},
+	{MICROREAD_GATE_ID_MGT, MICROREAD_PIPE_ID_MGT},
+	{MICROREAD_GATE_ID_OS, MICROREAD_PIPE_ID_OS},
+	{MICROREAD_GATE_ID_MREAD_NFC_T1, MICROREAD_PIPE_ID_HDS_MREAD_NFC_T1},
+	{MICROREAD_GATE_ID_MREAD_NFC_T3, MICROREAD_PIPE_ID_HDS_MREAD_NFC_T3},
+	{MICROREAD_GATE_ID_P2P_TARGET, MICROREAD_PIPE_ID_HDS_P2P_TARGET},
+	{MICROREAD_GATE_ID_P2P_INITIATOR, MICROREAD_PIPE_ID_HDS_P2P_INITIATOR}
+};
+
+/* Largest headroom needed for outgoing custom commands */
+#define MICROREAD_CMDS_HEADROOM	2
+#define MICROREAD_CMD_TAILROOM	2
+
+struct microread_info {
+	struct nfc_phy_ops *phy_ops;
+	void *phy_id;
+
+	struct nfc_hci_dev *hdev;
+
+	int async_cb_type;
+	data_exchange_cb_t async_cb;
+	void *async_cb_context;
+};
+
+static int microread_open(struct nfc_hci_dev *hdev)
+{
+	struct microread_info *info = nfc_hci_get_clientdata(hdev);
+
+	return info->phy_ops->enable(info->phy_id);
+}
+
+static void microread_close(struct nfc_hci_dev *hdev)
+{
+	struct microread_info *info = nfc_hci_get_clientdata(hdev);
+
+	info->phy_ops->disable(info->phy_id);
+}
+
+static int microread_hci_ready(struct nfc_hci_dev *hdev)
+{
+	int r;
+	u8 param[4];
+
+	param[0] = 0x03;
+	r = nfc_hci_send_cmd(hdev, MICROREAD_GATE_ID_MREAD_ISO_A,
+			     MICROREAD_CMD_MREAD_SUBSCRIBE, param, 1, NULL);
+	if (r)
+		return r;
+
+	r = nfc_hci_send_cmd(hdev, MICROREAD_GATE_ID_MREAD_ISO_A_3,
+			     MICROREAD_CMD_MREAD_SUBSCRIBE, NULL, 0, NULL);
+	if (r)
+		return r;
+
+	param[0] = 0x00;
+	param[1] = 0x03;
+	param[2] = 0x00;
+	r = nfc_hci_send_cmd(hdev, MICROREAD_GATE_ID_MREAD_ISO_B,
+			     MICROREAD_CMD_MREAD_SUBSCRIBE, param, 3, NULL);
+	if (r)
+		return r;
+
+	r = nfc_hci_send_cmd(hdev, MICROREAD_GATE_ID_MREAD_NFC_T1,
+			     MICROREAD_CMD_MREAD_SUBSCRIBE, NULL, 0, NULL);
+	if (r)
+		return r;
+
+	param[0] = 0xFF;
+	param[1] = 0xFF;
+	param[2] = 0x00;
+	param[3] = 0x00;
+	r = nfc_hci_send_cmd(hdev, MICROREAD_GATE_ID_MREAD_NFC_T3,
+			     MICROREAD_CMD_MREAD_SUBSCRIBE, param, 4, NULL);
+
+	return r;
+}
+
+static int microread_xmit(struct nfc_hci_dev *hdev, struct sk_buff *skb)
+{
+	struct microread_info *info = nfc_hci_get_clientdata(hdev);
+
+	return info->phy_ops->write(info->phy_id, skb);
+}
+
+static int microread_start_poll(struct nfc_hci_dev *hdev,
+				u32 im_protocols, u32 tm_protocols)
+{
+	int r;
+
+	u8 param[2];
+	u8 mode;
+
+	param[0] = 0x00;
+	param[1] = 0x00;
+
+	if (im_protocols & NFC_PROTO_ISO14443_MASK)
+		param[0] |= (1 << 2);
+
+	if (im_protocols & NFC_PROTO_ISO14443_B_MASK)
+		param[0] |= 1;
+
+	if (im_protocols & NFC_PROTO_MIFARE_MASK)
+		param[1] |= 1;
+
+	if (im_protocols & NFC_PROTO_JEWEL_MASK)
+		param[0] |= (1 << 1);
+
+	if (im_protocols & NFC_PROTO_FELICA_MASK)
+		param[0] |= (1 << 5);
+
+	if (im_protocols & NFC_PROTO_NFC_DEP_MASK)
+		param[1] |= (1 << 1);
+
+	if ((im_protocols | tm_protocols) & NFC_PROTO_NFC_DEP_MASK) {
+		hdev->gb = nfc_get_local_general_bytes(hdev->ndev,
+						       &hdev->gb_len);
+		if (hdev->gb == NULL || hdev->gb_len == 0) {
+			im_protocols &= ~NFC_PROTO_NFC_DEP_MASK;
+			tm_protocols &= ~NFC_PROTO_NFC_DEP_MASK;
+		}
+	}
+
+	r = nfc_hci_send_event(hdev, MICROREAD_GATE_ID_MREAD_ISO_A,
+			       MICROREAD_EVT_MREAD_DISCOVERY_STOP, NULL, 0);
+	if (r)
+		return r;
+
+	mode = 0xff;
+	r = nfc_hci_set_param(hdev, MICROREAD_GATE_ID_P2P_TARGET,
+			      MICROREAD_PAR_P2P_TARGET_MODE, &mode, 1);
+	if (r)
+		return r;
+
+	if (im_protocols & NFC_PROTO_NFC_DEP_MASK) {
+		r = nfc_hci_set_param(hdev, MICROREAD_GATE_ID_P2P_INITIATOR,
+				      MICROREAD_PAR_P2P_INITIATOR_GI,
+				      hdev->gb, hdev->gb_len);
+		if (r)
+			return r;
+	}
+
+	if (tm_protocols & NFC_PROTO_NFC_DEP_MASK) {
+		r = nfc_hci_set_param(hdev, MICROREAD_GATE_ID_P2P_TARGET,
+				      MICROREAD_PAR_P2P_TARGET_GT,
+				      hdev->gb, hdev->gb_len);
+		if (r)
+			return r;
+
+		mode = 0x02;
+		r = nfc_hci_set_param(hdev, MICROREAD_GATE_ID_P2P_TARGET,
+				      MICROREAD_PAR_P2P_TARGET_MODE, &mode, 1);
+		if (r)
+			return r;
+	}
+
+	return nfc_hci_send_event(hdev, MICROREAD_GATE_ID_MREAD_ISO_A,
+				  MICROREAD_EVT_MREAD_DISCOVERY_START_SOME,
+				  param, 2);
+}
+
+static int microread_dep_link_up(struct nfc_hci_dev *hdev,
+				struct nfc_target *target, u8 comm_mode,
+				u8 *gb, size_t gb_len)
+{
+	struct sk_buff *rgb_skb = NULL;
+	int r;
+
+	r = nfc_hci_get_param(hdev, target->hci_reader_gate,
+			      MICROREAD_PAR_P2P_INITIATOR_GT, &rgb_skb);
+	if (r < 0)
+		return r;
+
+	if (rgb_skb->len == 0 || rgb_skb->len > NFC_GB_MAXSIZE) {
+		r = -EPROTO;
+		goto exit;
+	}
+
+	r = nfc_set_remote_general_bytes(hdev->ndev, rgb_skb->data,
+					 rgb_skb->len);
+	if (r == 0)
+		r = nfc_dep_link_is_up(hdev->ndev, target->idx, comm_mode,
+				       NFC_RF_INITIATOR);
+exit:
+	kfree_skb(rgb_skb);
+
+	return r;
+}
+
+static int microread_dep_link_down(struct nfc_hci_dev *hdev)
+{
+	return nfc_hci_send_event(hdev, MICROREAD_GATE_ID_P2P_INITIATOR,
+				  MICROREAD_EVT_MREAD_DISCOVERY_STOP, NULL, 0);
+}
+
+static int microread_target_from_gate(struct nfc_hci_dev *hdev, u8 gate,
+				      struct nfc_target *target)
+{
+	switch (gate) {
+	case MICROREAD_GATE_ID_P2P_INITIATOR:
+		target->supported_protocols = NFC_PROTO_NFC_DEP_MASK;
+		break;
+	default:
+		return -EPROTO;
+	}
+
+	return 0;
+}
+
+static int microread_complete_target_discovered(struct nfc_hci_dev *hdev,
+						u8 gate,
+						struct nfc_target *target)
+{
+	return 0;
+}
+
+#define MICROREAD_CB_TYPE_READER_ALL 1
+
+static void microread_im_transceive_cb(void *context, struct sk_buff *skb,
+				       int err)
+{
+	struct microread_info *info = context;
+
+	switch (info->async_cb_type) {
+	case MICROREAD_CB_TYPE_READER_ALL:
+		if (err == 0) {
+			if (skb->len == 0) {
+				err = -EPROTO;
+				kfree_skb(skb);
+				info->async_cb(info->async_cb_context, NULL,
+					       -EPROTO);
+				return;
+			}
+
+			if (skb->data[skb->len - 1] != 0) {
+				err = nfc_hci_result_to_errno(
+						       skb->data[skb->len - 1]);
+				kfree_skb(skb);
+				info->async_cb(info->async_cb_context, NULL,
+					       err);
+				return;
+			}
+
+			skb_trim(skb, skb->len - 1);	/* RF Error ind. */
+		}
+		info->async_cb(info->async_cb_context, skb, err);
+		break;
+	default:
+		if (err == 0)
+			kfree_skb(skb);
+		break;
+	}
+}
+
+/*
+ * Returns:
+ * <= 0: driver handled the data exchange
+ *    1: driver doesn't especially handle, please do standard processing
+ */
+static int microread_im_transceive(struct nfc_hci_dev *hdev,
+				   struct nfc_target *target,
+				   struct sk_buff *skb, data_exchange_cb_t cb,
+				   void *cb_context)
+{
+	struct microread_info *info = nfc_hci_get_clientdata(hdev);
+	u8 control_bits;
+	u16 crc;
+
+	pr_info("data exchange to gate 0x%x\n", target->hci_reader_gate);
+
+	if (target->hci_reader_gate == MICROREAD_GATE_ID_P2P_INITIATOR) {
+		*skb_push(skb, 1) = 0;
+
+		return nfc_hci_send_event(hdev, target->hci_reader_gate,
+				     MICROREAD_EVT_P2P_INITIATOR_EXCHANGE_TO_RF,
+				     skb->data, skb->len);
+	}
+
+	switch (target->hci_reader_gate) {
+	case MICROREAD_GATE_ID_MREAD_ISO_A:
+		control_bits = 0xCB;
+		break;
+	case MICROREAD_GATE_ID_MREAD_ISO_A_3:
+		control_bits = 0xCB;
+		break;
+	case MICROREAD_GATE_ID_MREAD_ISO_B:
+		control_bits = 0xCB;
+		break;
+	case MICROREAD_GATE_ID_MREAD_NFC_T1:
+		control_bits = 0x1B;
+
+		crc = crc_ccitt(0xffff, skb->data, skb->len);
+		crc = ~crc;
+		*skb_put(skb, 1) = crc & 0xff;
+		*skb_put(skb, 1) = crc >> 8;
+		break;
+	case MICROREAD_GATE_ID_MREAD_NFC_T3:
+		control_bits = 0xDB;
+		break;
+	default:
+		pr_info("Abort im_transceive to invalid gate 0x%x\n",
+			target->hci_reader_gate);
+		return 1;
+	}
+
+	*skb_push(skb, 1) = control_bits;
+
+	info->async_cb_type = MICROREAD_CB_TYPE_READER_ALL;
+	info->async_cb = cb;
+	info->async_cb_context = cb_context;
+
+	return nfc_hci_send_cmd_async(hdev, target->hci_reader_gate,
+				      MICROREAD_CMD_MREAD_EXCHANGE,
+				      skb->data, skb->len,
+				      microread_im_transceive_cb, info);
+}
+
+static int microread_tm_send(struct nfc_hci_dev *hdev, struct sk_buff *skb)
+{
+	int r;
+
+	r = nfc_hci_send_event(hdev, MICROREAD_GATE_ID_P2P_TARGET,
+			       MICROREAD_EVT_MCARD_EXCHANGE,
+			       skb->data, skb->len);
+
+	kfree_skb(skb);
+
+	return r;
+}
+
+static void microread_target_discovered(struct nfc_hci_dev *hdev, u8 gate,
+					struct sk_buff *skb)
+{
+	struct nfc_target *targets;
+	int r = 0;
+
+	pr_info("target discovered to gate 0x%x\n", gate);
+
+	targets = kzalloc(sizeof(struct nfc_target), GFP_KERNEL);
+	if (targets == NULL) {
+		r = -ENOMEM;
+		goto exit;
+	}
+
+	targets->hci_reader_gate = gate;
+
+	switch (gate) {
+	case MICROREAD_GATE_ID_MREAD_ISO_A:
+		targets->supported_protocols =
+		      nfc_hci_sak_to_protocol(skb->data[MICROREAD_EMCF_A_SAK]);
+		targets->sens_res =
+			 be16_to_cpu(*(u16 *)&skb->data[MICROREAD_EMCF_A_ATQA]);
+		targets->sel_res = skb->data[MICROREAD_EMCF_A_SAK];
+		memcpy(targets->nfcid1, &skb->data[MICROREAD_EMCF_A_UID],
+		       skb->data[MICROREAD_EMCF_A_LEN]);
+		targets->nfcid1_len = skb->data[MICROREAD_EMCF_A_LEN];
+		break;
+	case MICROREAD_GATE_ID_MREAD_ISO_A_3:
+		targets->supported_protocols =
+		      nfc_hci_sak_to_protocol(skb->data[MICROREAD_EMCF_A3_SAK]);
+		targets->sens_res =
+			 be16_to_cpu(*(u16 *)&skb->data[MICROREAD_EMCF_A3_ATQA]);
+		targets->sel_res = skb->data[MICROREAD_EMCF_A3_SAK];
+		memcpy(targets->nfcid1, &skb->data[MICROREAD_EMCF_A3_UID],
+		       skb->data[MICROREAD_EMCF_A3_LEN]);
+		targets->nfcid1_len = skb->data[MICROREAD_EMCF_A3_LEN];
+		break;
+	case MICROREAD_GATE_ID_MREAD_ISO_B:
+		targets->supported_protocols = NFC_PROTO_ISO14443_B_MASK;
+		memcpy(targets->nfcid1, &skb->data[MICROREAD_EMCF_B_UID], 4);
+		targets->nfcid1_len = 4;
+		break;
+	case MICROREAD_GATE_ID_MREAD_NFC_T1:
+		targets->supported_protocols = NFC_PROTO_JEWEL_MASK;
+		targets->sens_res =
+			le16_to_cpu(*(u16 *)&skb->data[MICROREAD_EMCF_T1_ATQA]);
+		memcpy(targets->nfcid1, &skb->data[MICROREAD_EMCF_T1_UID], 4);
+		targets->nfcid1_len = 4;
+		break;
+	case MICROREAD_GATE_ID_MREAD_NFC_T3:
+		targets->supported_protocols = NFC_PROTO_FELICA_MASK;
+		memcpy(targets->nfcid1, &skb->data[MICROREAD_EMCF_T3_UID], 8);
+		targets->nfcid1_len = 8;
+		break;
+	default:
+		pr_info("discard target discovered to gate 0x%x\n", gate);
+		goto exit_free;
+	}
+
+	r = nfc_targets_found(hdev->ndev, targets, 1);
+
+exit_free:
+	kfree(targets);
+
+exit:
+	kfree_skb(skb);
+
+	if (r)
+		pr_err("Failed to handle discovered target err=%d", r);
+}
+
+static int microread_event_received(struct nfc_hci_dev *hdev, u8 gate,
+				     u8 event, struct sk_buff *skb)
+{
+	int r;
+	u8 mode;
+
+	pr_info("Microread received event 0x%x to gate 0x%x\n", event, gate);
+
+	switch (event) {
+	case MICROREAD_EVT_MREAD_CARD_FOUND:
+		microread_target_discovered(hdev, gate, skb);
+		return 0;
+
+	case MICROREAD_EVT_P2P_INITIATOR_EXCHANGE_FROM_RF:
+		if (skb->len < 1) {
+			kfree_skb(skb);
+			return -EPROTO;
+		}
+
+		if (skb->data[skb->len - 1]) {
+			kfree_skb(skb);
+			return -EIO;
+		}
+
+		skb_trim(skb, skb->len - 1);
+
+		r = nfc_tm_data_received(hdev->ndev, skb);
+		break;
+
+	case MICROREAD_EVT_MCARD_FIELD_ON:
+	case MICROREAD_EVT_MCARD_FIELD_OFF:
+		kfree_skb(skb);
+		return 0;
+
+	case MICROREAD_EVT_P2P_TARGET_ACTIVATED:
+		r = nfc_tm_activated(hdev->ndev, NFC_PROTO_NFC_DEP_MASK,
+				     NFC_COMM_PASSIVE, skb->data,
+				     skb->len);
+
+		kfree_skb(skb);
+		break;
+
+	case MICROREAD_EVT_MCARD_EXCHANGE:
+		if (skb->len < 1) {
+			kfree_skb(skb);
+			return -EPROTO;
+		}
+
+		if (skb->data[skb->len-1]) {
+			kfree_skb(skb);
+			return -EIO;
+		}
+
+		skb_trim(skb, skb->len - 1);
+
+		r = nfc_tm_data_received(hdev->ndev, skb);
+		break;
+
+	case MICROREAD_EVT_P2P_TARGET_DEACTIVATED:
+		kfree_skb(skb);
+
+		mode = 0xff;
+		r = nfc_hci_set_param(hdev, MICROREAD_GATE_ID_P2P_TARGET,
+				      MICROREAD_PAR_P2P_TARGET_MODE, &mode, 1);
+		if (r)
+			break;
+
+		r = nfc_hci_send_event(hdev, gate,
+				       MICROREAD_EVT_MREAD_DISCOVERY_STOP, NULL,
+				       0);
+		break;
+
+	default:
+		return 1;
+	}
+
+	return r;
+}
+
+static struct nfc_hci_ops microread_hci_ops = {
+	.open = microread_open,
+	.close = microread_close,
+	.hci_ready = microread_hci_ready,
+	.xmit = microread_xmit,
+	.start_poll = microread_start_poll,
+	.dep_link_up = microread_dep_link_up,
+	.dep_link_down = microread_dep_link_down,
+	.target_from_gate = microread_target_from_gate,
+	.complete_target_discovered = microread_complete_target_discovered,
+	.im_transceive = microread_im_transceive,
+	.tm_send = microread_tm_send,
+	.check_presence = NULL,
+	.event_received = microread_event_received,
+};
+
+int microread_probe(void *phy_id, struct nfc_phy_ops *phy_ops, char *llc_name,
+		    int phy_headroom, int phy_tailroom, int phy_payload,
+		    struct nfc_hci_dev **hdev)
+{
+	struct microread_info *info;
+	unsigned long quirks = 0;
+	u32 protocols, se;
+	struct nfc_hci_init_data init_data;
+	int r;
+
+	info = kzalloc(sizeof(struct microread_info), GFP_KERNEL);
+	if (!info) {
+		pr_err("Cannot allocate memory for microread_info.\n");
+		r = -ENOMEM;
+		goto err_info_alloc;
+	}
+
+	info->phy_ops = phy_ops;
+	info->phy_id = phy_id;
+
+	init_data.gate_count = ARRAY_SIZE(microread_gates);
+	memcpy(init_data.gates, microread_gates, sizeof(microread_gates));
+
+	strcpy(init_data.session_id, "MICROREA");
+
+	set_bit(NFC_HCI_QUIRK_SHORT_CLEAR, &quirks);
+
+	protocols = NFC_PROTO_JEWEL_MASK |
+		    NFC_PROTO_MIFARE_MASK |
+		    NFC_PROTO_FELICA_MASK |
+		    NFC_PROTO_ISO14443_MASK |
+		    NFC_PROTO_ISO14443_B_MASK |
+		    NFC_PROTO_NFC_DEP_MASK;
+
+	se = NFC_SE_UICC | NFC_SE_EMBEDDED;
+
+	info->hdev = nfc_hci_allocate_device(&microread_hci_ops, &init_data,
+					     quirks, protocols, se, llc_name,
+					     phy_headroom +
+					     MICROREAD_CMDS_HEADROOM,
+					     phy_tailroom +
+					     MICROREAD_CMD_TAILROOM,
+					     phy_payload);
+	if (!info->hdev) {
+		pr_err("Cannot allocate nfc hdev.\n");
+		r = -ENOMEM;
+		goto err_alloc_hdev;
+	}
+
+	nfc_hci_set_clientdata(info->hdev, info);
+
+	r = nfc_hci_register_device(info->hdev);
+	if (r)
+		goto err_regdev;
+
+	*hdev = info->hdev;
+
+	return 0;
+
+err_regdev:
+	nfc_hci_free_device(info->hdev);
+
+err_alloc_hdev:
+	kfree(info);
+
+err_info_alloc:
+	return r;
+}
+EXPORT_SYMBOL(microread_probe);
+
+void microread_remove(struct nfc_hci_dev *hdev)
+{
+	struct microread_info *info = nfc_hci_get_clientdata(hdev);
+
+	nfc_hci_unregister_device(hdev);
+	nfc_hci_free_device(hdev);
+	kfree(info);
+}
+EXPORT_SYMBOL(microread_remove);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION(DRIVER_DESC);
diff --git a/drivers/nfc/microread/microread.h b/drivers/nfc/microread/microread.h
new file mode 100644
index 000000000000..64b447a1c5bf
--- /dev/null
+++ b/drivers/nfc/microread/microread.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (C) 2011 - 2012  Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the
+ * Free Software Foundation, Inc.,
+ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#ifndef __LOCAL_MICROREAD_H_
+#define __LOCAL_MICROREAD_H_
+
+#include <net/nfc/hci.h>
+
+#define DRIVER_DESC "NFC driver for microread"
+
+int microread_probe(void *phy_id, struct nfc_phy_ops *phy_ops, char *llc_name,
+		    int phy_headroom, int phy_tailroom, int phy_payload,
+		    struct nfc_hci_dev **hdev);
+
+void microread_remove(struct nfc_hci_dev *hdev);
+
+#endif /* __LOCAL_MICROREAD_H_ */
diff --git a/include/linux/platform_data/microread.h b/include/linux/platform_data/microread.h
new file mode 100644
index 000000000000..cfda59b226ee
--- /dev/null
+++ b/include/linux/platform_data/microread.h
@@ -0,0 +1,35 @@
+/*
+ * Driver include for the PN544 NFC chip.
+ *
+ * Copyright (C) 2011 Tieto Poland
+ * Copyright (C) 2012 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _MICROREAD_H
+#define _MICROREAD_H
+
+#include <linux/i2c.h>
+
+#define MICROREAD_DRIVER_NAME	"microread"
+
+/* board config platform data for microread */
+struct microread_nfc_platform_data {
+	unsigned int rst_gpio;
+	unsigned int irq_gpio;
+	unsigned int ioh_gpio;
+};
+
+#endif /* _MICROREAD_H */
-- 
cgit v1.2.3


From 26aec009f6b61c077c6de1a96cca7a5132851dbe Mon Sep 17 00:00:00 2001
From: Amit Daniel Kachhap <amit.daniel@samsung.com>
Date: Sun, 3 Feb 2013 15:49:47 -0800
Subject: regulator: add device tree support for s5m8767

This device tree support is added for PMIC block of S5m8767 multi
function driver. The usage detail is added in the device tree
documentation section. This change is tested on exynos5250 based
arndale platform by regulator voltage set/get API's.

Reviewed-by: Thomas Abraham <thomas.abraham@linaro.org>
Signed-off-by: Amit Daniel Kachhap <amit.daniel@samsung.com>
Tested-by: Sachin Kamat <sachin.kamat@linaro.org>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 .../bindings/regulator/s5m8767-regulator.txt       | 152 +++++++++++++++++
 drivers/mfd/sec-core.c                             |  75 ++++++++-
 drivers/regulator/s5m8767.c                        | 186 ++++++++++++++++++++-
 include/linux/mfd/samsung/core.h                   |  11 +-
 4 files changed, 417 insertions(+), 7 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/regulator/s5m8767-regulator.txt

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/regulator/s5m8767-regulator.txt b/Documentation/devicetree/bindings/regulator/s5m8767-regulator.txt
new file mode 100644
index 000000000000..a35ff99003a5
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/s5m8767-regulator.txt
@@ -0,0 +1,152 @@
+* Samsung S5M8767 Voltage and Current Regulator
+
+The Samsung S5M8767 is a multi-function device which includes volatage and
+current regulators, rtc, charger controller and other sub-blocks. It is
+interfaced to the host controller using a i2c interface. Each sub-block is
+addressed by the host system using different i2c slave address. This document
+describes the bindings for 'pmic' sub-block of s5m8767.
+
+Required properties:
+- compatible: Should be "samsung,s5m8767-pmic".
+- reg: Specifies the i2c slave address of the pmic block. It should be 0x66.
+
+- s5m8767,pmic-buck2-dvs-voltage: A set of 8 voltage values in micro-volt (uV)
+  units for buck2 when changing voltage using gpio dvs. Refer to [1] below
+  for additional information.
+
+- s5m8767,pmic-buck3-dvs-voltage: A set of 8 voltage values in micro-volt (uV)
+  units for buck3 when changing voltage using gpio dvs. Refer to [1] below
+  for additional information.
+
+- s5m8767,pmic-buck4-dvs-voltage: A set of 8 voltage values in micro-volt (uV)
+  units for buck4 when changing voltage using gpio dvs. Refer to [1] below
+  for additional information.
+
+- s5m8767,pmic-buck-ds-gpios: GPIO specifiers for three host gpio's used
+  for selecting GPIO DVS lines. It is one-to-one mapped to dvs gpio lines.
+
+[1] If none of the 's5m8767,pmic-buck[2/3/4]-uses-gpio-dvs' optional
+    property is specified, the 's5m8767,pmic-buck[2/3/4]-dvs-voltage'
+    property should specify atleast one voltage level (which would be a
+    safe operating voltage).
+
+    If either of the 's5m8767,pmic-buck[2/3/4]-uses-gpio-dvs' optional
+    property is specified, then all the eight voltage values for the
+    's5m8767,pmic-buck[2/3/4]-dvs-voltage' should be specified.
+
+Optional properties:
+- interrupt-parent: Specifies the phandle of the interrupt controller to which
+  the interrupts from s5m8767 are delivered to.
+- interrupts: Interrupt specifiers for two interrupt sources.
+  - First interrupt specifier is for 'irq1' interrupt.
+  - Second interrupt specifier is for 'alert' interrupt.
+- s5m8767,pmic-buck2-uses-gpio-dvs: 'buck2' can be controlled by gpio dvs.
+- s5m8767,pmic-buck3-uses-gpio-dvs: 'buck3' can be controlled by gpio dvs.
+- s5m8767,pmic-buck4-uses-gpio-dvs: 'buck4' can be controlled by gpio dvs.
+
+Additional properties required if either of the optional properties are used:
+
+- s5m8767,pmic-buck234-default-dvs-idx: Default voltage setting selected from
+  the possible 8 options selectable by the dvs gpios. The value of this
+  property should be between 0 and 7. If not specified or if out of range, the
+  default value of this property is set to 0.
+
+- s5m8767,pmic-buck-dvs-gpios: GPIO specifiers for three host gpio's used
+  for dvs. The format of the gpio specifier depends in the gpio controller.
+
+Regulators: The regulators of s5m8767 that have to be instantiated should be
+included in a sub-node named 'regulators'. Regulator nodes included in this
+sub-node should be of the format as listed below.
+
+	regulator_name {
+		ldo1_reg: LDO1 {
+			regulator-name = "VDD_ALIVE_1.0V";
+			regulator-min-microvolt = <1100000>;
+			regulator-max-microvolt = <1100000>;
+			regulator-always-on;
+			regulator-boot-on;
+			op_mode = <1>; /* Normal Mode */
+		};
+	};
+The above regulator entries are defined in regulator bindings documentation
+except op_mode description.
+	- op_mode: describes the different operating modes of the LDO's with
+		power mode change in SOC. The different possible values are,
+		0 - always off mode
+		1 - on in normal mode
+		2 - low power mode
+		3 - suspend mode
+
+The following are the names of the regulators that the s5m8767 pmic block
+supports. Note: The 'n' in LDOn and BUCKn represents the LDO or BUCK number
+as per the datasheet of s5m8767.
+
+	- LDOn
+		  - valid values for n are 1 to 28
+		  - Example: LDO0, LD01, LDO28
+	- BUCKn
+		  - valid values for n are 1 to 9.
+		  - Example: BUCK1, BUCK2, BUCK9
+
+The bindings inside the regulator nodes use the standard regulator bindings
+which are documented elsewhere.
+
+Example:
+
+	s5m8767_pmic@66 {
+		compatible = "samsung,s5m8767-pmic";
+		reg = <0x66>;
+
+		s5m8767,pmic-buck2-uses-gpio-dvs;
+		s5m8767,pmic-buck3-uses-gpio-dvs;
+		s5m8767,pmic-buck4-uses-gpio-dvs;
+
+		s5m8767,pmic-buck-default-dvs-idx = <0>;
+
+		s5m8767,pmic-buck-dvs-gpios = <&gpx0 0 1 0 0>, /* DVS1 */
+						 <&gpx0 1 1 0 0>, /* DVS2 */
+						 <&gpx0 2 1 0 0>; /* DVS3 */
+
+		s5m8767,pmic-buck-ds-gpios = <&gpx2 3 1 0 0>, /* SET1 */
+						<&gpx2 4 1 0 0>, /* SET2 */
+						<&gpx2 5 1 0 0>; /* SET3 */
+
+		s5m8767,pmic-buck2-dvs-voltage = <1350000>, <1300000>,
+						 <1250000>, <1200000>,
+						 <1150000>, <1100000>,
+						 <1000000>, <950000>;
+
+		s5m8767,pmic-buck3-dvs-voltage = <1100000>, <1100000>,
+						 <1100000>, <1100000>,
+						 <1000000>, <1000000>,
+						 <1000000>, <1000000>;
+
+		s5m8767,pmic-buck4-dvs-voltage = <1200000>, <1200000>,
+						 <1200000>, <1200000>,
+						 <1200000>, <1200000>,
+						 <1200000>, <1200000>;
+
+		regulators {
+			ldo1_reg: LDO1 {
+				regulator-name = "VDD_ABB_3.3V";
+				regulator-min-microvolt = <3300000>;
+				regulator-max-microvolt = <3300000>;
+				op_mode = <1>; /* Normal Mode */
+			};
+
+			ldo2_reg: LDO2 {
+				regulator-name = "VDD_ALIVE_1.1V";
+				regulator-min-microvolt = <1100000>;
+				regulator-max-microvolt = <1100000>;
+				regulator-always-on;
+			};
+
+			buck1_reg: BUCK1 {
+				regulator-name = "VDD_MIF_1.2V";
+				regulator-min-microvolt = <950000>;
+				regulator-max-microvolt = <1350000>;
+				regulator-always-on;
+				regulator-boot-on;
+			};
+		};
+	};
diff --git a/drivers/mfd/sec-core.c b/drivers/mfd/sec-core.c
index 49d361a618d0..ae029834d0a0 100644
--- a/drivers/mfd/sec-core.c
+++ b/drivers/mfd/sec-core.c
@@ -17,6 +17,7 @@
 #include <linux/err.h>
 #include <linux/slab.h>
 #include <linux/i2c.h>
+#include <linux/of_irq.h>
 #include <linux/interrupt.h>
 #include <linux/pm_runtime.h>
 #include <linux/mutex.h>
@@ -60,6 +61,15 @@ static struct mfd_cell s2mps11_devs[] = {
 	},
 };
 
+#ifdef CONFIG_OF
+static struct of_device_id sec_dt_match[] = {
+	{	.compatible = "samsung,s5m8767-pmic",
+		.data = (void *)S5M8767X,
+	},
+	{},
+};
+#endif
+
 int sec_reg_read(struct sec_pmic_dev *sec_pmic, u8 reg, void *dest)
 {
 	return regmap_read(sec_pmic->regmap, reg, dest);
@@ -95,6 +105,57 @@ static struct regmap_config sec_regmap_config = {
 	.val_bits = 8,
 };
 
+
+#ifdef CONFIG_OF
+/*
+ * Only the common platform data elements for s5m8767 are parsed here from the
+ * device tree. Other sub-modules of s5m8767 such as pmic, rtc , charger and
+ * others have to parse their own platform data elements from device tree.
+ *
+ * The s5m8767 platform data structure is instantiated here and the drivers for
+ * the sub-modules need not instantiate another instance while parsing their
+ * platform data.
+ */
+static struct sec_platform_data *sec_pmic_i2c_parse_dt_pdata(
+					struct device *dev)
+{
+	struct sec_platform_data *pd;
+
+	pd = devm_kzalloc(dev, sizeof(*pd), GFP_KERNEL);
+	if (!pd) {
+		dev_err(dev, "could not allocate memory for pdata\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	/*
+	 * ToDo: the 'wakeup' member in the platform data is more of a linux
+	 * specfic information. Hence, there is no binding for that yet and
+	 * not parsed here.
+	 */
+
+	return pd;
+}
+#else
+static struct sec_platform_data *sec_i2c_parse_dt_pdata(
+					struct device *dev)
+{
+	return 0;
+}
+#endif
+
+static inline int sec_i2c_get_driver_data(struct i2c_client *i2c,
+						const struct i2c_device_id *id)
+{
+#ifdef CONFIG_OF
+	if (i2c->dev.of_node) {
+		const struct of_device_id *match;
+		match = of_match_node(sec_dt_match, i2c->dev.of_node);
+		return (int)match->data;
+	}
+#endif
+	return (int)id->driver_data;
+}
+
 static int sec_pmic_probe(struct i2c_client *i2c,
 			    const struct i2c_device_id *id)
 {
@@ -111,13 +172,22 @@ static int sec_pmic_probe(struct i2c_client *i2c,
 	sec_pmic->dev = &i2c->dev;
 	sec_pmic->i2c = i2c;
 	sec_pmic->irq = i2c->irq;
-	sec_pmic->type = id->driver_data;
-
+	sec_pmic->type = sec_i2c_get_driver_data(i2c, id);
+
+	if (sec_pmic->dev->of_node) {
+		pdata = sec_pmic_i2c_parse_dt_pdata(sec_pmic->dev);
+		if (IS_ERR(pdata)) {
+			ret = PTR_ERR(pdata);
+			return ret;
+		}
+		pdata->device_type = sec_pmic->type;
+	}
 	if (pdata) {
 		sec_pmic->device_type = pdata->device_type;
 		sec_pmic->ono = pdata->ono;
 		sec_pmic->irq_base = pdata->irq_base;
 		sec_pmic->wakeup = pdata->wakeup;
+		sec_pmic->pdata = pdata;
 	}
 
 	sec_pmic->regmap = devm_regmap_init_i2c(i2c, &sec_regmap_config);
@@ -192,6 +262,7 @@ static struct i2c_driver sec_pmic_driver = {
 	.driver = {
 		   .name = "sec_pmic",
 		   .owner = THIS_MODULE,
+		   .of_match_table = of_match_ptr(sec_dt_match),
 	},
 	.probe = sec_pmic_probe,
 	.remove = sec_pmic_remove,
diff --git a/drivers/regulator/s5m8767.c b/drivers/regulator/s5m8767.c
index aa0ccef5c31c..1250cef43d7b 100644
--- a/drivers/regulator/s5m8767.c
+++ b/drivers/regulator/s5m8767.c
@@ -14,6 +14,7 @@
 #include <linux/bug.h>
 #include <linux/err.h>
 #include <linux/gpio.h>
+#include <linux/of_gpio.h>
 #include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
@@ -21,6 +22,9 @@
 #include <linux/regulator/machine.h>
 #include <linux/mfd/samsung/core.h>
 #include <linux/mfd/samsung/s5m8767.h>
+#include <linux/regulator/of_regulator.h>
+
+#define S5M8767_OPMODE_NORMAL_MODE 0x1
 
 struct s5m8767_info {
 	struct device *dev;
@@ -474,15 +478,194 @@ static struct regulator_desc regulators[] = {
 	s5m8767_regulator_desc(BUCK9),
 };
 
+#ifdef CONFIG_OF
+static int s5m8767_pmic_dt_parse_dvs_gpio(struct sec_pmic_dev *iodev,
+			struct sec_platform_data *pdata,
+			struct device_node *pmic_np)
+{
+	int i, gpio;
+
+	for (i = 0; i < 3; i++) {
+		gpio = of_get_named_gpio(pmic_np,
+					"s5m8767,pmic-buck-dvs-gpios", i);
+		if (!gpio_is_valid(gpio)) {
+			dev_err(iodev->dev, "invalid gpio[%d]: %d\n", i, gpio);
+			return -EINVAL;
+		}
+		pdata->buck_gpios[i] = gpio;
+	}
+	return 0;
+}
+
+static int s5m8767_pmic_dt_parse_ds_gpio(struct sec_pmic_dev *iodev,
+			struct sec_platform_data *pdata,
+			struct device_node *pmic_np)
+{
+	int i, gpio;
+
+	for (i = 0; i < 3; i++) {
+		gpio = of_get_named_gpio(pmic_np,
+					"s5m8767,pmic-buck-ds-gpios", i);
+		if (!gpio_is_valid(gpio)) {
+			dev_err(iodev->dev, "invalid gpio[%d]: %d\n", i, gpio);
+			return -EINVAL;
+		}
+		pdata->buck_ds[i] = gpio;
+	}
+	return 0;
+}
+
+static int s5m8767_pmic_dt_parse_pdata(struct sec_pmic_dev *iodev,
+					struct sec_platform_data *pdata)
+{
+	struct device_node *pmic_np, *regulators_np, *reg_np;
+	struct sec_regulator_data *rdata;
+	struct sec_opmode_data *rmode;
+	unsigned int i, dvs_voltage_nr = 1, ret;
+
+	pmic_np = iodev->dev->of_node;
+	if (!pmic_np) {
+		dev_err(iodev->dev, "could not find pmic sub-node\n");
+		return -ENODEV;
+	}
+
+	regulators_np = of_find_node_by_name(pmic_np, "regulators");
+	if (!regulators_np) {
+		dev_err(iodev->dev, "could not find regulators sub-node\n");
+		return -EINVAL;
+	}
+
+	/* count the number of regulators to be supported in pmic */
+	pdata->num_regulators = 0;
+	for_each_child_of_node(regulators_np, reg_np)
+		pdata->num_regulators++;
+
+	rdata = devm_kzalloc(iodev->dev, sizeof(*rdata) *
+				pdata->num_regulators, GFP_KERNEL);
+	if (!rdata) {
+		dev_err(iodev->dev,
+			"could not allocate memory for regulator data\n");
+		return -ENOMEM;
+	}
+
+	rmode = devm_kzalloc(iodev->dev, sizeof(*rmode) *
+				pdata->num_regulators, GFP_KERNEL);
+	if (!rdata) {
+		dev_err(iodev->dev,
+			"could not allocate memory for regulator mode\n");
+		return -ENOMEM;
+	}
+
+	pdata->regulators = rdata;
+	pdata->opmode = rmode;
+	for_each_child_of_node(regulators_np, reg_np) {
+		for (i = 0; i < ARRAY_SIZE(regulators); i++)
+			if (!of_node_cmp(reg_np->name, regulators[i].name))
+				break;
+
+		if (i == ARRAY_SIZE(regulators)) {
+			dev_warn(iodev->dev,
+			"don't know how to configure regulator %s\n",
+			reg_np->name);
+			continue;
+		}
+
+		rdata->id = i;
+		rdata->initdata = of_get_regulator_init_data(
+						iodev->dev, reg_np);
+		rdata->reg_node = reg_np;
+		rdata++;
+		rmode->id = i;
+		if (of_property_read_u32(reg_np, "op_mode",
+				&rmode->mode)) {
+			dev_warn(iodev->dev,
+				"no op_mode property property at %s\n",
+				reg_np->full_name);
+
+			rmode->mode = S5M8767_OPMODE_NORMAL_MODE;
+		}
+		rmode++;
+	}
+
+	if (of_get_property(pmic_np, "s5m8767,pmic-buck2-uses-gpio-dvs", NULL))
+		pdata->buck2_gpiodvs = true;
+
+	if (of_get_property(pmic_np, "s5m8767,pmic-buck3-uses-gpio-dvs", NULL))
+		pdata->buck3_gpiodvs = true;
+
+	if (of_get_property(pmic_np, "s5m8767,pmic-buck4-uses-gpio-dvs", NULL))
+		pdata->buck4_gpiodvs = true;
+
+	if (pdata->buck2_gpiodvs || pdata->buck3_gpiodvs ||
+						pdata->buck4_gpiodvs) {
+		ret = s5m8767_pmic_dt_parse_dvs_gpio(iodev, pdata, pmic_np);
+		if (ret)
+			return -EINVAL;
+
+		if (of_property_read_u32(pmic_np,
+				"s5m8767,pmic-buck-default-dvs-idx",
+				&pdata->buck_default_idx)) {
+			pdata->buck_default_idx = 0;
+		} else {
+			if (pdata->buck_default_idx >= 8) {
+				pdata->buck_default_idx = 0;
+				dev_info(iodev->dev,
+				"invalid value for default dvs index, use 0\n");
+			}
+		}
+		dvs_voltage_nr = 8;
+	}
+
+	ret = s5m8767_pmic_dt_parse_ds_gpio(iodev, pdata, pmic_np);
+	if (ret)
+		return -EINVAL;
+
+	if (of_property_read_u32_array(pmic_np,
+				"s5m8767,pmic-buck2-dvs-voltage",
+				pdata->buck2_voltage, dvs_voltage_nr)) {
+		dev_err(iodev->dev, "buck2 voltages not specified\n");
+		return -EINVAL;
+	}
+
+	if (of_property_read_u32_array(pmic_np,
+				"s5m8767,pmic-buck3-dvs-voltage",
+				pdata->buck3_voltage, dvs_voltage_nr)) {
+		dev_err(iodev->dev, "buck3 voltages not specified\n");
+		return -EINVAL;
+	}
+
+	if (of_property_read_u32_array(pmic_np,
+				"s5m8767,pmic-buck4-dvs-voltage",
+				pdata->buck4_voltage, dvs_voltage_nr)) {
+		dev_err(iodev->dev, "buck4 voltages not specified\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+#else
+static int s5m8767_pmic_dt_parse_pdata(struct sec_pmic_dev *iodev,
+					struct sec_platform_data *pdata)
+{
+	return 0;
+}
+#endif /* CONFIG_OF */
+
 static int s5m8767_pmic_probe(struct platform_device *pdev)
 {
 	struct sec_pmic_dev *iodev = dev_get_drvdata(pdev->dev.parent);
-	struct sec_platform_data *pdata = dev_get_platdata(iodev->dev);
+	struct sec_platform_data *pdata = iodev->pdata;
 	struct regulator_config config = { };
 	struct regulator_dev **rdev;
 	struct s5m8767_info *s5m8767;
 	int i, ret, size, buck_init;
 
+	if (iodev->dev->of_node) {
+		ret = s5m8767_pmic_dt_parse_pdata(iodev, pdata);
+		if (ret)
+			return ret;
+	}
+
 	if (!pdata) {
 		dev_err(pdev->dev.parent, "Platform data not supplied\n");
 		return -ENODEV;
@@ -726,6 +909,7 @@ static int s5m8767_pmic_probe(struct platform_device *pdev)
 		config.init_data = pdata->regulators[i].initdata;
 		config.driver_data = s5m8767;
 		config.regmap = iodev->regmap;
+		config.of_node = pdata->regulators[i].reg_node;
 
 		rdev[i] = regulator_register(&regulators[id], &config);
 		if (IS_ERR(rdev[i])) {
diff --git a/include/linux/mfd/samsung/core.h b/include/linux/mfd/samsung/core.h
index b50c38f8bc48..f0f4de3b4ccc 100644
--- a/include/linux/mfd/samsung/core.h
+++ b/include/linux/mfd/samsung/core.h
@@ -26,6 +26,7 @@ enum sec_device_type {
 /**
  * struct sec_pmic_dev - s5m87xx master device for sub-drivers
  * @dev: master device of the chip (can be used to access platform data)
+ * @pdata: pointer to private data used to pass platform data to child
  * @i2c: i2c client private data for regulator
  * @rtc: i2c client private data for rtc
  * @iolock: mutex for serializing io access
@@ -39,6 +40,7 @@ enum sec_device_type {
  */
 struct sec_pmic_dev {
 	struct device *dev;
+	struct sec_platform_data *pdata;
 	struct regmap *regmap;
 	struct i2c_client *i2c;
 	struct i2c_client *rtc;
@@ -82,11 +84,11 @@ struct sec_platform_data {
 
 	int				buck_gpios[3];
 	int				buck_ds[3];
-	int				buck2_voltage[8];
+	unsigned int			buck2_voltage[8];
 	bool				buck2_gpiodvs;
-	int				buck3_voltage[8];
+	unsigned int			buck3_voltage[8];
 	bool				buck3_gpiodvs;
-	int				buck4_voltage[8];
+	unsigned int			buck4_voltage[8];
 	bool				buck4_gpiodvs;
 
 	int				buck_set1;
@@ -127,6 +129,7 @@ struct sec_platform_data {
 struct sec_regulator_data {
 	int				id;
 	struct regulator_init_data	*initdata;
+	struct device_node *reg_node;
 };
 
 /*
@@ -136,7 +139,7 @@ struct sec_regulator_data {
  */
 struct sec_opmode_data {
 	int id;
-	int mode;
+	unsigned int mode;
 };
 
 /*
-- 
cgit v1.2.3


From c02cf5f8ed6137e2b3b2f10e0fca336e06e09ba4 Mon Sep 17 00:00:00 2001
From: anish kumar <anish198519851985@gmail.com>
Date: Sun, 3 Feb 2013 22:08:23 +0100
Subject: irq_work: Remove return value from the irq_work_queue() function

As no one is using the return value of irq_work_queue(),
so it is better to just make it void.

Signed-off-by: anish kumar <anish198519851985@gmail.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
[ Fix stale comments, remove now unnecessary __irq_work_queue() intermediate function ]
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/r/1359925703-24304-1-git-send-email-fweisbec@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/irq_work.h |  2 +-
 kernel/irq_work.c        | 31 ++++++++++---------------------
 2 files changed, 11 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h
index 6a9e8f5399e2..ce60c084635b 100644
--- a/include/linux/irq_work.h
+++ b/include/linux/irq_work.h
@@ -16,7 +16,7 @@ void init_irq_work(struct irq_work *work, void (*func)(struct irq_work *))
 	work->func = func;
 }
 
-bool irq_work_queue(struct irq_work *work);
+void irq_work_queue(struct irq_work *work);
 void irq_work_run(void);
 void irq_work_sync(struct irq_work *work);
 
diff --git a/kernel/irq_work.c b/kernel/irq_work.c
index 64eddd59ed83..c9d7478e4889 100644
--- a/kernel/irq_work.c
+++ b/kernel/irq_work.c
@@ -63,12 +63,20 @@ void __weak arch_irq_work_raise(void)
 }
 
 /*
- * Queue the entry and raise the IPI if needed.
+ * Enqueue the irq_work @entry unless it's already pending
+ * somewhere.
+ *
+ * Can be re-enqueued while the callback is still in progress.
  */
-static void __irq_work_queue(struct irq_work *work)
+void irq_work_queue(struct irq_work *work)
 {
 	bool empty;
 
+	/* Only queue if not already pending */
+	if (!irq_work_claim(work))
+		return;
+
+	/* Queue the entry and raise the IPI if needed. */
 	preempt_disable();
 
 	empty = llist_add(&work->llnode, &__get_cpu_var(irq_work_list));
@@ -78,25 +86,6 @@ static void __irq_work_queue(struct irq_work *work)
 
 	preempt_enable();
 }
-
-/*
- * Enqueue the irq_work @entry, returns true on success, failure when the
- * @entry was already enqueued by someone else.
- *
- * Can be re-enqueued while the callback is still in progress.
- */
-bool irq_work_queue(struct irq_work *work)
-{
-	if (!irq_work_claim(work)) {
-		/*
-		 * Already enqueued, can't do!
-		 */
-		return false;
-	}
-
-	__irq_work_queue(work);
-	return true;
-}
 EXPORT_SYMBOL_GPL(irq_work_queue);
 
 /*
-- 
cgit v1.2.3


From 3f52b7e328c526fa7a592af9bf5772c591ed38a4 Mon Sep 17 00:00:00 2001
From: Marco Porsch <marco@cozybit.com>
Date: Wed, 30 Jan 2013 18:14:08 +0100
Subject: mac80211: mesh power save basics

Add routines to
- maintain a PS mode for each peer and a non-peer PS mode
- indicate own PS mode in transmitted frames
- track neighbor STAs power modes
- buffer frames when neighbors are in PS mode
- add TIM and Awake Window IE to beacons
- release frames in Mesh Peer Service Periods

Add local_pm to sta_info to represent the link-specific power
mode at this station towards the remote station. When a peer
link is established, use the default power mode stored in mesh
config. Update the PS status if the peering status of a neighbor
changes.
Maintain a mesh power mode for non-peer mesh STAs. Set the
non-peer power mode to active mode during peering. Authenticated
mesh peering is currently not working when either node is
configured to be in power save mode.

Indicate the current power mode in transmitted frames. Use QoS
Nulls to indicate mesh power mode transitions.
For performance reasons, calls to the function setting the frame
flags are placed in HWMP routing routines, as there the STA
pointer is already available.

Add peer_pm to sta_info to represent the peer's link-specific
power mode towards the local station. Add nonpeer_pm to
represent the peer's power mode towards all non-peer stations.
Track power modes based on received frames.

Add the ps_data structure to ieee80211_if_mesh (for TIM map, PS
neighbor counter and group-addressed frame buffer).

Set WLAN_STA_PS flag for STA in PS mode to use the unicast frame
buffering routines in the tx path. Update num_sta_ps to buffer
and release group-addressed frames after DTIM beacons.

Announce the awake window duration in beacons if in light or
deep sleep mode towards any peer or non-peer. Create a TIM IE
similarly to AP mode and add it to mesh beacons. Parse received
Awake Window IEs and check TIM IEs for buffered frames.

Release frames towards peers in mesh Peer Service Periods. Use
the corresponding trigger frames and monitor the MPSP status.
Append a QoS Null as trigger frame if neccessary to properly end
the MPSP. Currently, in HT channels MPSPs behave imperfectly and
show large delay spikes and frame losses.

Signed-off-by: Marco Porsch <marco@cozybit.com>
Signed-off-by: Ivan Bezyazychnyy <ivan.bezyazychnyy@gmail.com>
Signed-off-by: Mike Krinkin <krinkin.m.u@gmail.com>
Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h     |   8 +
 net/mac80211/Kconfig          |  11 +
 net/mac80211/Makefile         |   3 +-
 net/mac80211/cfg.c            |  27 +-
 net/mac80211/debug.h          |  10 +
 net/mac80211/debugfs_netdev.c |   5 +
 net/mac80211/debugfs_sta.c    |   5 +-
 net/mac80211/ieee80211_i.h    |   6 +
 net/mac80211/mesh.c           |  33 +++
 net/mac80211/mesh.h           |  17 ++
 net/mac80211/mesh_hwmp.c      |   7 +
 net/mac80211/mesh_pathtbl.c   |   1 +
 net/mac80211/mesh_plink.c     |  17 ++
 net/mac80211/mesh_ps.c        | 585 ++++++++++++++++++++++++++++++++++++++++++
 net/mac80211/rx.c             |   7 +
 net/mac80211/sta_info.c       |  20 +-
 net/mac80211/sta_info.h       |  11 +
 net/mac80211/status.c         |   7 +
 net/mac80211/tx.c             |  31 ++-
 net/mac80211/util.c           |   4 +
 net/mac80211/wme.c            |  13 +-
 21 files changed, 811 insertions(+), 17 deletions(-)
 create mode 100644 net/mac80211/mesh_ps.c

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 11c8bc87fdcb..7e8a498efe6d 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -151,6 +151,11 @@
 /* Mesh Control 802.11s */
 #define IEEE80211_QOS_CTL_MESH_CONTROL_PRESENT  0x0100
 
+/* Mesh Power Save Level */
+#define IEEE80211_QOS_CTL_MESH_PS_LEVEL		0x0200
+/* Mesh Receiver Service Period Initiated */
+#define IEEE80211_QOS_CTL_RSPI			0x0400
+
 /* U-APSD queue for WMM IEs sent by AP */
 #define IEEE80211_WMM_IE_AP_QOSINFO_UAPSD	(1<<7)
 #define IEEE80211_WMM_IE_AP_QOSINFO_PARAM_SET_CNT_MASK	0x0f
@@ -675,11 +680,14 @@ struct ieee80211_meshconf_ie {
  * @IEEE80211_MESHCONF_CAPAB_FORWARDING: the STA forwards MSDUs
  * @IEEE80211_MESHCONF_CAPAB_TBTT_ADJUSTING: TBTT adjustment procedure
  *	is ongoing
+ * @IEEE80211_MESHCONF_CAPAB_POWER_SAVE_LEVEL: STA is in deep sleep mode or has
+ *	neighbors in deep sleep mode
  */
 enum mesh_config_capab_flags {
 	IEEE80211_MESHCONF_CAPAB_ACCEPT_PLINKS		= 0x01,
 	IEEE80211_MESHCONF_CAPAB_FORWARDING		= 0x08,
 	IEEE80211_MESHCONF_CAPAB_TBTT_ADJUSTING		= 0x20,
+	IEEE80211_MESHCONF_CAPAB_POWER_SAVE_LEVEL	= 0x40,
 };
 
 /**
diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig
index b4ecf267a34b..0ecf947ad378 100644
--- a/net/mac80211/Kconfig
+++ b/net/mac80211/Kconfig
@@ -258,6 +258,17 @@ config MAC80211_MESH_SYNC_DEBUG
 
 	  Do not select this option.
 
+config MAC80211_MESH_PS_DEBUG
+	bool "Verbose mesh powersave debugging"
+	depends on MAC80211_DEBUG_MENU
+	depends on MAC80211_MESH
+	---help---
+	  Selecting this option causes mac80211 to print out very verbose mesh
+	  powersave debugging messages (when mac80211 is taking part in a
+	  mesh network).
+
+	  Do not select this option.
+
 config MAC80211_TDLS_DEBUG
 	bool "Verbose TDLS debugging"
 	depends on MAC80211_DEBUG_MENU
diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile
index 4911202334d9..9d7d840aac6d 100644
--- a/net/mac80211/Makefile
+++ b/net/mac80211/Makefile
@@ -39,7 +39,8 @@ mac80211-$(CONFIG_MAC80211_MESH) += \
 	mesh_pathtbl.o \
 	mesh_plink.o \
 	mesh_hwmp.o \
-	mesh_sync.o
+	mesh_sync.o \
+	mesh_ps.o
 
 mac80211-$(CONFIG_PM) += pm.o
 
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 661b878bd19c..f4f7e7691077 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -492,7 +492,10 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
 #ifdef CONFIG_MAC80211_MESH
 		sinfo->filled |= STATION_INFO_LLID |
 				 STATION_INFO_PLID |
-				 STATION_INFO_PLINK_STATE;
+				 STATION_INFO_PLINK_STATE |
+				 STATION_INFO_LOCAL_PM |
+				 STATION_INFO_PEER_PM |
+				 STATION_INFO_NONPEER_PM;
 
 		sinfo->llid = le16_to_cpu(sta->llid);
 		sinfo->plid = le16_to_cpu(sta->plid);
@@ -501,6 +504,9 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
 			sinfo->filled |= STATION_INFO_T_OFFSET;
 			sinfo->t_offset = sta->t_offset;
 		}
+		sinfo->local_pm = sta->local_pm;
+		sinfo->peer_pm = sta->peer_pm;
+		sinfo->nonpeer_pm = sta->nonpeer_pm;
 #endif
 	}
 
@@ -1262,6 +1268,10 @@ static int sta_apply_parameters(struct ieee80211_local *local,
 					changed = mesh_plink_inc_estab_count(
 							sdata);
 				sta->plink_state = params->plink_state;
+
+				ieee80211_mps_sta_status_update(sta);
+				ieee80211_mps_set_sta_local_pm(sta,
+					sdata->u.mesh.mshcfg.power_mode);
 				break;
 			case NL80211_PLINK_LISTEN:
 			case NL80211_PLINK_BLOCKED:
@@ -1273,6 +1283,9 @@ static int sta_apply_parameters(struct ieee80211_local *local,
 					changed = mesh_plink_dec_estab_count(
 							sdata);
 				sta->plink_state = params->plink_state;
+
+				ieee80211_mps_sta_status_update(sta);
+				ieee80211_mps_local_status_update(sdata);
 				break;
 			default:
 				/*  nothing  */
@@ -1289,6 +1302,9 @@ static int sta_apply_parameters(struct ieee80211_local *local,
 				break;
 			}
 		}
+
+		if (params->local_pm)
+			ieee80211_mps_set_sta_local_pm(sta, params->local_pm);
 #endif
 	}
 
@@ -1777,6 +1793,15 @@ static int ieee80211_update_mesh_config(struct wiphy *wiphy,
 	if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_CONFIRMATION_INTERVAL, mask))
 		conf->dot11MeshHWMPconfirmationInterval =
 			nconf->dot11MeshHWMPconfirmationInterval;
+	if (_chg_mesh_attr(NL80211_MESHCONF_POWER_MODE, mask)) {
+		conf->power_mode = nconf->power_mode;
+		ieee80211_mps_local_status_update(sdata);
+	}
+	if (_chg_mesh_attr(NL80211_MESHCONF_AWAKE_WINDOW, mask)) {
+		conf->dot11MeshAwakeWindowDuration =
+			nconf->dot11MeshAwakeWindowDuration;
+		ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON);
+	}
 	return 0;
 }
 
diff --git a/net/mac80211/debug.h b/net/mac80211/debug.h
index 8f383a576016..4ccc5ed6237d 100644
--- a/net/mac80211/debug.h
+++ b/net/mac80211/debug.h
@@ -44,6 +44,12 @@
 #define MAC80211_MESH_SYNC_DEBUG 0
 #endif
 
+#ifdef CONFIG_MAC80211_MESH_PS_DEBUG
+#define MAC80211_MESH_PS_DEBUG 1
+#else
+#define MAC80211_MESH_PS_DEBUG 0
+#endif
+
 #ifdef CONFIG_MAC80211_TDLS_DEBUG
 #define MAC80211_TDLS_DEBUG 1
 #else
@@ -151,6 +157,10 @@ do {									\
 	_sdata_dbg(MAC80211_MESH_SYNC_DEBUG,				\
 		   sdata, fmt, ##__VA_ARGS__)
 
+#define mps_dbg(sdata, fmt, ...)					\
+	_sdata_dbg(MAC80211_MESH_PS_DEBUG,				\
+		   sdata, fmt, ##__VA_ARGS__)
+
 #define tdls_dbg(sdata, fmt, ...)					\
 	_sdata_dbg(MAC80211_TDLS_DEBUG,					\
 		   sdata, fmt, ##__VA_ARGS__)
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index cbde5cc49a40..059bbb82e84f 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -515,6 +515,9 @@ IEEE80211_IF_FILE(dot11MeshHWMProotInterval,
 		  u.mesh.mshcfg.dot11MeshHWMProotInterval, DEC);
 IEEE80211_IF_FILE(dot11MeshHWMPconfirmationInterval,
 		  u.mesh.mshcfg.dot11MeshHWMPconfirmationInterval, DEC);
+IEEE80211_IF_FILE(power_mode, u.mesh.mshcfg.power_mode, DEC);
+IEEE80211_IF_FILE(dot11MeshAwakeWindowDuration,
+		  u.mesh.mshcfg.dot11MeshAwakeWindowDuration, DEC);
 #endif
 
 #define DEBUGFS_ADD_MODE(name, mode) \
@@ -620,6 +623,8 @@ static void add_mesh_config(struct ieee80211_sub_if_data *sdata)
 	MESHPARAMS_ADD(dot11MeshHWMPactivePathToRootTimeout);
 	MESHPARAMS_ADD(dot11MeshHWMProotInterval);
 	MESHPARAMS_ADD(dot11MeshHWMPconfirmationInterval);
+	MESHPARAMS_ADD(power_mode);
+	MESHPARAMS_ADD(dot11MeshAwakeWindowDuration);
 #undef MESHPARAMS_ADD
 }
 #endif
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index 6fb1168b9f16..c7591f73dbc3 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -65,7 +65,7 @@ static ssize_t sta_flags_read(struct file *file, char __user *userbuf,
 	test_sta_flag(sta, WLAN_STA_##flg) ? #flg "\n" : ""
 
 	int res = scnprintf(buf, sizeof(buf),
-			    "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
+			    "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
 			    TEST(AUTH), TEST(ASSOC), TEST(PS_STA),
 			    TEST(PS_DRIVER), TEST(AUTHORIZED),
 			    TEST(SHORT_PREAMBLE),
@@ -74,7 +74,8 @@ static ssize_t sta_flags_read(struct file *file, char __user *userbuf,
 			    TEST(UAPSD), TEST(SP), TEST(TDLS_PEER),
 			    TEST(TDLS_PEER_AUTH), TEST(4ADDR_EVENT),
 			    TEST(INSERTED), TEST(RATE_CONTROL),
-			    TEST(TOFFSET_KNOWN));
+			    TEST(TOFFSET_KNOWN), TEST(MPSP_OWNER),
+			    TEST(MPSP_RECIPIENT));
 #undef TEST
 	return simple_read_from_buffer(userbuf, count, ppos, buf, res);
 }
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 8faf360e0b4c..5fe9db707880 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -590,6 +590,11 @@ struct ieee80211_if_mesh {
 	s64 sync_offset_clockdrift_max;
 	spinlock_t sync_offset_lock;
 	bool adjusting_tbtt;
+	/* mesh power save */
+	enum nl80211_mesh_power_mode nonpeer_pm;
+	int ps_peers_light_sleep;
+	int ps_peers_deep_sleep;
+	struct ps_data ps;
 };
 
 #ifdef CONFIG_MAC80211_MESH
@@ -1185,6 +1190,7 @@ struct ieee802_11_elems {
 	struct ieee80211_meshconf_ie *mesh_config;
 	u8 *mesh_id;
 	u8 *peering;
+	__le16 *awake_window;
 	u8 *preq;
 	u8 *prep;
 	u8 *perr;
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index f920da1201ab..35ac38871420 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -261,6 +261,9 @@ mesh_add_meshconf_ie(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata)
 	*pos = IEEE80211_MESHCONF_CAPAB_FORWARDING;
 	*pos |= ifmsh->accepting_plinks ?
 	    IEEE80211_MESHCONF_CAPAB_ACCEPT_PLINKS : 0x00;
+	/* Mesh PS mode. See IEEE802.11-2012 8.4.2.100.8 */
+	*pos |= ifmsh->ps_peers_deep_sleep ?
+	    IEEE80211_MESHCONF_CAPAB_POWER_SAVE_LEVEL : 0x00;
 	*pos++ |= ifmsh->adjusting_tbtt ?
 	    IEEE80211_MESHCONF_CAPAB_TBTT_ADJUSTING : 0x00;
 	*pos++ = 0x00;
@@ -286,6 +289,29 @@ mesh_add_meshid_ie(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata)
 	return 0;
 }
 
+int mesh_add_awake_window_ie(struct sk_buff *skb,
+			     struct ieee80211_sub_if_data *sdata)
+{
+	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
+	u8 *pos;
+
+	/* see IEEE802.11-2012 13.14.6 */
+	if (ifmsh->ps_peers_light_sleep == 0 &&
+	    ifmsh->ps_peers_deep_sleep == 0 &&
+	    ifmsh->nonpeer_pm == NL80211_MESH_POWER_ACTIVE)
+		return 0;
+
+	if (skb_tailroom(skb) < 4)
+		return -ENOMEM;
+
+	pos = skb_put(skb, 2 + 2);
+	*pos++ = WLAN_EID_MESH_AWAKE_WINDOW;
+	*pos++ = 2;
+	put_unaligned_le16(ifmsh->mshcfg.dot11MeshAwakeWindowDuration, pos);
+
+	return 0;
+}
+
 int
 mesh_add_vendor_ies(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata)
 {
@@ -629,6 +655,8 @@ void ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata)
 	sdata->vif.bss_conf.basic_rates =
 		ieee80211_mandatory_rates(local, band);
 
+	ieee80211_mps_local_status_update(sdata);
+
 	ieee80211_bss_info_change_notify(sdata, changed);
 
 	netif_carrier_on(sdata->dev);
@@ -651,6 +679,10 @@ void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata)
 	sta_info_flush(sdata);
 	mesh_path_flush_by_iface(sdata);
 
+	/* free all potentially still buffered group-addressed frames */
+	local->total_ps_buffered -= skb_queue_len(&ifmsh->ps.bc_buf);
+	skb_queue_purge(&ifmsh->ps.bc_buf);
+
 	del_timer_sync(&sdata->u.mesh.housekeeping_timer);
 	del_timer_sync(&sdata->u.mesh.mesh_path_root_timer);
 	del_timer_sync(&sdata->u.mesh.mesh_path_timer);
@@ -828,6 +860,7 @@ void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata)
 		    ieee80211_mesh_path_root_timer,
 		    (unsigned long) sdata);
 	INIT_LIST_HEAD(&ifmsh->preq_queue.list);
+	skb_queue_head_init(&ifmsh->ps.bc_buf);
 	spin_lock_init(&ifmsh->mesh_preq_queue_lock);
 	spin_lock_init(&ifmsh->sync_offset_lock);
 
diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h
index aff301544c7f..eb336253b6b3 100644
--- a/net/mac80211/mesh.h
+++ b/net/mac80211/mesh.h
@@ -222,6 +222,8 @@ int mesh_add_meshid_ie(struct sk_buff *skb,
 		       struct ieee80211_sub_if_data *sdata);
 int mesh_add_rsn_ie(struct sk_buff *skb,
 		    struct ieee80211_sub_if_data *sdata);
+int mesh_add_awake_window_ie(struct sk_buff *skb,
+			     struct ieee80211_sub_if_data *sdata);
 int mesh_add_vendor_ies(struct sk_buff *skb,
 			struct ieee80211_sub_if_data *sdata);
 int mesh_add_ds_params_ie(struct sk_buff *skb,
@@ -242,6 +244,21 @@ void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata);
 void ieee80211_mesh_root_setup(struct ieee80211_if_mesh *ifmsh);
 const struct ieee80211_mesh_sync_ops *ieee80211_mesh_sync_ops_get(u8 method);
 
+/* mesh power save */
+void ieee80211_mps_local_status_update(struct ieee80211_sub_if_data *sdata);
+void ieee80211_mps_set_sta_local_pm(struct sta_info *sta,
+				    enum nl80211_mesh_power_mode pm);
+void ieee80211_mps_set_frame_flags(struct ieee80211_sub_if_data *sdata,
+				   struct sta_info *sta,
+				   struct ieee80211_hdr *hdr);
+void ieee80211_mps_sta_status_update(struct sta_info *sta);
+void ieee80211_mps_rx_h_sta_process(struct sta_info *sta,
+				    struct ieee80211_hdr *hdr);
+void ieee80211_mpsp_trigger_process(u8 *qc, struct sta_info *sta,
+				    bool tx, bool acked);
+void ieee80211_mps_frame_release(struct sta_info *sta,
+				 struct ieee802_11_elems *elems);
+
 /* Mesh paths */
 int mesh_nexthop_lookup(struct sk_buff *skb,
 		struct ieee80211_sub_if_data *sdata);
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index 6b4603a90031..f0dd8742ed42 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -205,6 +205,7 @@ static void prepare_frame_for_deferred_tx(struct ieee80211_sub_if_data *sdata,
 		struct sk_buff *skb)
 {
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
 
 	skb_set_mac_header(skb, 0);
 	skb_set_network_header(skb, 0);
@@ -217,6 +218,7 @@ static void prepare_frame_for_deferred_tx(struct ieee80211_sub_if_data *sdata,
 	info->control.vif = &sdata->vif;
 	info->flags |= IEEE80211_TX_INTFL_NEED_TXPROCESSING;
 	ieee80211_set_qos_hdr(sdata, skb);
+	ieee80211_mps_set_frame_flags(sdata, NULL, hdr);
 }
 
 /**
@@ -1080,6 +1082,10 @@ int mesh_nexthop_resolve(struct sk_buff *skb,
 	u8 *target_addr = hdr->addr3;
 	int err = 0;
 
+	/* Nulls are only sent to peers for PS and should be pre-addressed */
+	if (ieee80211_is_qos_nullfunc(hdr->frame_control))
+		return 0;
+
 	rcu_read_lock();
 	err = mesh_nexthop_lookup(skb, sdata);
 	if (!err)
@@ -1151,6 +1157,7 @@ int mesh_nexthop_lookup(struct sk_buff *skb,
 	if (next_hop) {
 		memcpy(hdr->addr1, next_hop->sta.addr, ETH_ALEN);
 		memcpy(hdr->addr2, sdata->vif.addr, ETH_ALEN);
+		ieee80211_mps_set_frame_flags(sdata, next_hop, hdr);
 		err = 0;
 	}
 
diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index aa749818860e..d5786c3eaee2 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -212,6 +212,7 @@ void mesh_path_assign_nexthop(struct mesh_path *mpath, struct sta_info *sta)
 		hdr = (struct ieee80211_hdr *) skb->data;
 		memcpy(hdr->addr1, sta->sta.addr, ETH_ALEN);
 		memcpy(hdr->addr2, mpath->sdata->vif.addr, ETH_ALEN);
+		ieee80211_mps_set_frame_flags(sta->sdata, sta, hdr);
 	}
 
 	spin_unlock_irqrestore(&mpath->frame_queue.lock, flags);
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index 6787d696d94c..fe7c3334d6fe 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -201,6 +201,9 @@ static u32 __mesh_plink_deactivate(struct sta_info *sta)
 	sta->plink_state = NL80211_PLINK_BLOCKED;
 	mesh_path_flush_by_nexthop(sta);
 
+	ieee80211_mps_sta_status_update(sta);
+	ieee80211_mps_local_status_update(sdata);
+
 	return changed;
 }
 
@@ -503,6 +506,7 @@ void mesh_neighbour_update(struct ieee80211_sub_if_data *sdata,
 	    rssi_threshold_check(sta, sdata))
 		mesh_plink_open(sta);
 
+	ieee80211_mps_frame_release(sta, elems);
 out:
 	rcu_read_unlock();
 }
@@ -633,6 +637,9 @@ int mesh_plink_open(struct sta_info *sta)
 		"Mesh plink: starting establishment with %pM\n",
 		sta->sta.addr);
 
+	/* set the non-peer mode to active during peering */
+	ieee80211_mps_local_status_update(sdata);
+
 	return mesh_plink_frame_tx(sdata, WLAN_SP_MESH_PEERING_OPEN,
 				   sta->sta.addr, llid, 0, 0);
 }
@@ -866,6 +873,10 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
 			sta->llid = llid;
 			mesh_plink_timer_set(sta,
 					     mshcfg->dot11MeshRetryTimeout);
+
+			/* set the non-peer mode to active during peering */
+			ieee80211_mps_local_status_update(sdata);
+
 			spin_unlock_bh(&sta->lock);
 			mesh_plink_frame_tx(sdata,
 					    WLAN_SP_MESH_PEERING_OPEN,
@@ -959,6 +970,9 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
 			changed |= mesh_set_short_slot_time(sdata);
 			mpl_dbg(sdata, "Mesh plink with %pM ESTABLISHED\n",
 				sta->sta.addr);
+			ieee80211_mps_sta_status_update(sta);
+			ieee80211_mps_set_sta_local_pm(sta,
+						       mshcfg->power_mode);
 			break;
 		default:
 			spin_unlock_bh(&sta->lock);
@@ -998,6 +1012,9 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
 			mesh_plink_frame_tx(sdata,
 					    WLAN_SP_MESH_PEERING_CONFIRM,
 					    sta->sta.addr, llid, plid, 0);
+			ieee80211_mps_sta_status_update(sta);
+			ieee80211_mps_set_sta_local_pm(sta,
+						       mshcfg->power_mode);
 			break;
 		default:
 			spin_unlock_bh(&sta->lock);
diff --git a/net/mac80211/mesh_ps.c b/net/mac80211/mesh_ps.c
new file mode 100644
index 000000000000..b677962525ed
--- /dev/null
+++ b/net/mac80211/mesh_ps.c
@@ -0,0 +1,585 @@
+/*
+ * Copyright 2012-2013, Marco Porsch <marco.porsch@s2005.tu-chemnitz.de>
+ * Copyright 2012-2013, cozybit Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "mesh.h"
+#include "wme.h"
+
+
+/* mesh PS management */
+
+/**
+ * mps_qos_null_get - create pre-addressed QoS Null frame for mesh powersave
+ */
+static struct sk_buff *mps_qos_null_get(struct sta_info *sta)
+{
+	struct ieee80211_sub_if_data *sdata = sta->sdata;
+	struct ieee80211_local *local = sdata->local;
+	struct ieee80211_hdr *nullfunc; /* use 4addr header */
+	struct sk_buff *skb;
+	int size = sizeof(*nullfunc);
+	__le16 fc;
+
+	skb = dev_alloc_skb(local->hw.extra_tx_headroom + size + 2);
+	if (!skb)
+		return NULL;
+	skb_reserve(skb, local->hw.extra_tx_headroom);
+
+	nullfunc = (struct ieee80211_hdr *) skb_put(skb, size);
+	fc = cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_QOS_NULLFUNC);
+	ieee80211_fill_mesh_addresses(nullfunc, &fc, sta->sta.addr,
+				      sdata->vif.addr);
+	nullfunc->frame_control = fc;
+	nullfunc->duration_id = 0;
+	/* no address resolution for this frame -> set addr 1 immediately */
+	memcpy(nullfunc->addr1, sta->sta.addr, ETH_ALEN);
+	memset(skb_put(skb, 2), 0, 2); /* append QoS control field */
+	ieee80211_mps_set_frame_flags(sdata, sta, nullfunc);
+
+	return skb;
+}
+
+/**
+ * mps_qos_null_tx - send a QoS Null to indicate link-specific power mode
+ */
+static void mps_qos_null_tx(struct sta_info *sta)
+{
+	struct sk_buff *skb;
+
+	skb = mps_qos_null_get(sta);
+	if (!skb)
+		return;
+
+	mps_dbg(sta->sdata, "announcing peer-specific power mode to %pM\n",
+		sta->sta.addr);
+
+	/* don't unintentionally start a MPSP */
+	if (!test_sta_flag(sta, WLAN_STA_PS_STA)) {
+		u8 *qc = ieee80211_get_qos_ctl((void *) skb->data);
+
+		qc[0] |= IEEE80211_QOS_CTL_EOSP;
+	}
+
+	ieee80211_tx_skb(sta->sdata, skb);
+}
+
+/**
+ * ieee80211_mps_local_status_update - track status of local link-specific PMs
+ *
+ * @sdata: local mesh subif
+ *
+ * sets the non-peer power mode and triggers the driver PS (re-)configuration
+ */
+void ieee80211_mps_local_status_update(struct ieee80211_sub_if_data *sdata)
+{
+	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
+	struct sta_info *sta;
+	bool peering = false;
+	int light_sleep_cnt = 0;
+	int deep_sleep_cnt = 0;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(sta, &sdata->local->sta_list, list) {
+		if (sdata != sta->sdata)
+			continue;
+
+		switch (sta->plink_state) {
+		case NL80211_PLINK_OPN_SNT:
+		case NL80211_PLINK_OPN_RCVD:
+		case NL80211_PLINK_CNF_RCVD:
+			peering = true;
+			break;
+		case NL80211_PLINK_ESTAB:
+			if (sta->local_pm == NL80211_MESH_POWER_LIGHT_SLEEP)
+				light_sleep_cnt++;
+			else if (sta->local_pm == NL80211_MESH_POWER_DEEP_SLEEP)
+				deep_sleep_cnt++;
+			break;
+		default:
+			break;
+		}
+	}
+	rcu_read_unlock();
+
+	/*
+	 * Set non-peer mode to active during peering/scanning/authentication
+	 * (see IEEE802.11-2012 13.14.8.3). The non-peer mesh power mode is
+	 * deep sleep if the local STA is in light or deep sleep towards at
+	 * least one mesh peer (see 13.14.3.1). Otherwise, set it to the
+	 * user-configured default value.
+	 */
+	if (peering) {
+		mps_dbg(sdata, "setting non-peer PM to active for peering\n");
+		ifmsh->nonpeer_pm = NL80211_MESH_POWER_ACTIVE;
+	} else if (light_sleep_cnt || deep_sleep_cnt) {
+		mps_dbg(sdata, "setting non-peer PM to deep sleep\n");
+		ifmsh->nonpeer_pm = NL80211_MESH_POWER_DEEP_SLEEP;
+	} else {
+		mps_dbg(sdata, "setting non-peer PM to user value\n");
+		ifmsh->nonpeer_pm = ifmsh->mshcfg.power_mode;
+	}
+
+	ifmsh->ps_peers_light_sleep = light_sleep_cnt;
+	ifmsh->ps_peers_deep_sleep = deep_sleep_cnt;
+}
+
+/**
+ * ieee80211_mps_set_sta_local_pm - set local PM towards a mesh STA
+ *
+ * @sta: mesh STA
+ * @pm: the power mode to set
+ */
+void ieee80211_mps_set_sta_local_pm(struct sta_info *sta,
+				    enum nl80211_mesh_power_mode pm)
+{
+	struct ieee80211_sub_if_data *sdata = sta->sdata;
+
+	mps_dbg(sdata, "local STA operates in mode %d with %pM\n",
+		pm, sta->sta.addr);
+
+	sta->local_pm = pm;
+
+	/*
+	 * announce peer-specific power mode transition
+	 * (see IEEE802.11-2012 13.14.3.2 and 13.14.3.3)
+	 */
+	if (sta->plink_state == NL80211_PLINK_ESTAB)
+		mps_qos_null_tx(sta);
+
+	ieee80211_mps_local_status_update(sdata);
+}
+
+/**
+ * ieee80211_mps_set_frame_flags - set mesh PS flags in FC (and QoS Control)
+ *
+ * @sdata: local mesh subif
+ * @sta: mesh STA
+ * @hdr: 802.11 frame header
+ *
+ * see IEEE802.11-2012 8.2.4.1.7 and 8.2.4.5.11
+ *
+ * NOTE: sta must be given when an individually-addressed QoS frame header
+ * is handled, for group-addressed and management frames it is not used
+ */
+void ieee80211_mps_set_frame_flags(struct ieee80211_sub_if_data *sdata,
+				   struct sta_info *sta,
+				   struct ieee80211_hdr *hdr)
+{
+	enum nl80211_mesh_power_mode pm;
+	u8 *qc;
+
+	if (WARN_ON(is_unicast_ether_addr(hdr->addr1) &&
+		    ieee80211_is_data_qos(hdr->frame_control) &&
+		    !sta))
+		return;
+
+	if (is_unicast_ether_addr(hdr->addr1) &&
+	    ieee80211_is_data_qos(hdr->frame_control) &&
+	    sta->plink_state == NL80211_PLINK_ESTAB)
+		pm = sta->local_pm;
+	else
+		pm = sdata->u.mesh.nonpeer_pm;
+
+	if (pm == NL80211_MESH_POWER_ACTIVE)
+		hdr->frame_control &= cpu_to_le16(~IEEE80211_FCTL_PM);
+	else
+		hdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_PM);
+
+	if (!ieee80211_is_data_qos(hdr->frame_control))
+		return;
+
+	qc = ieee80211_get_qos_ctl(hdr);
+
+	if ((is_unicast_ether_addr(hdr->addr1) &&
+	     pm == NL80211_MESH_POWER_DEEP_SLEEP) ||
+	    (is_multicast_ether_addr(hdr->addr1) &&
+	     sdata->u.mesh.ps_peers_deep_sleep > 0))
+		qc[1] |= (IEEE80211_QOS_CTL_MESH_PS_LEVEL >> 8);
+	else
+		qc[1] &= ~(IEEE80211_QOS_CTL_MESH_PS_LEVEL >> 8);
+}
+
+/**
+ * ieee80211_mps_sta_status_update - update buffering status of neighbor STA
+ *
+ * @sta: mesh STA
+ *
+ * called after change of peering status or non-peer/peer-specific power mode
+ */
+void ieee80211_mps_sta_status_update(struct sta_info *sta)
+{
+	enum nl80211_mesh_power_mode pm;
+	bool do_buffer;
+
+	/*
+	 * use peer-specific power mode if peering is established and the
+	 * peer's power mode is known
+	 */
+	if (sta->plink_state == NL80211_PLINK_ESTAB &&
+	    sta->peer_pm != NL80211_MESH_POWER_UNKNOWN)
+		pm = sta->peer_pm;
+	else
+		pm = sta->nonpeer_pm;
+
+	do_buffer = (pm != NL80211_MESH_POWER_ACTIVE);
+
+	/* Don't let the same PS state be set twice */
+	if (test_sta_flag(sta, WLAN_STA_PS_STA) == do_buffer)
+		return;
+
+	if (do_buffer) {
+		set_sta_flag(sta, WLAN_STA_PS_STA);
+		atomic_inc(&sta->sdata->u.mesh.ps.num_sta_ps);
+		mps_dbg(sta->sdata, "start PS buffering frames towards %pM\n",
+			sta->sta.addr);
+	} else {
+		ieee80211_sta_ps_deliver_wakeup(sta);
+	}
+
+	/* clear the MPSP flags for non-peers or active STA */
+	if (sta->plink_state != NL80211_PLINK_ESTAB) {
+		clear_sta_flag(sta, WLAN_STA_MPSP_OWNER);
+		clear_sta_flag(sta, WLAN_STA_MPSP_RECIPIENT);
+	} else if (!do_buffer) {
+		clear_sta_flag(sta, WLAN_STA_MPSP_OWNER);
+	}
+}
+
+static void mps_set_sta_peer_pm(struct sta_info *sta,
+				struct ieee80211_hdr *hdr)
+{
+	enum nl80211_mesh_power_mode pm;
+	u8 *qc = ieee80211_get_qos_ctl(hdr);
+
+	/*
+	 * Test Power Management field of frame control (PW) and
+	 * mesh power save level subfield of QoS control field (PSL)
+	 *
+	 * | PM | PSL| Mesh PM |
+	 * +----+----+---------+
+	 * | 0  |Rsrv|  Active |
+	 * | 1  | 0  |  Light  |
+	 * | 1  | 1  |  Deep   |
+	 */
+	if (ieee80211_has_pm(hdr->frame_control)) {
+		if (qc[1] & (IEEE80211_QOS_CTL_MESH_PS_LEVEL >> 8))
+			pm = NL80211_MESH_POWER_DEEP_SLEEP;
+		else
+			pm = NL80211_MESH_POWER_LIGHT_SLEEP;
+	} else {
+		pm = NL80211_MESH_POWER_ACTIVE;
+	}
+
+	if (sta->peer_pm == pm)
+		return;
+
+	mps_dbg(sta->sdata, "STA %pM enters mode %d\n",
+		sta->sta.addr, pm);
+
+	sta->peer_pm = pm;
+
+	ieee80211_mps_sta_status_update(sta);
+}
+
+static void mps_set_sta_nonpeer_pm(struct sta_info *sta,
+				   struct ieee80211_hdr *hdr)
+{
+	enum nl80211_mesh_power_mode pm;
+
+	if (ieee80211_has_pm(hdr->frame_control))
+		pm = NL80211_MESH_POWER_DEEP_SLEEP;
+	else
+		pm = NL80211_MESH_POWER_ACTIVE;
+
+	if (sta->nonpeer_pm == pm)
+		return;
+
+	mps_dbg(sta->sdata, "STA %pM sets non-peer mode to %d\n",
+		sta->sta.addr, pm);
+
+	sta->nonpeer_pm = pm;
+
+	ieee80211_mps_sta_status_update(sta);
+}
+
+/**
+ * ieee80211_mps_rx_h_sta_process - frame receive handler for mesh powersave
+ *
+ * @sta: STA info that transmitted the frame
+ * @hdr: IEEE 802.11 (QoS) Header
+ */
+void ieee80211_mps_rx_h_sta_process(struct sta_info *sta,
+				    struct ieee80211_hdr *hdr)
+{
+	if (is_unicast_ether_addr(hdr->addr1) &&
+	    ieee80211_is_data_qos(hdr->frame_control)) {
+		/*
+		 * individually addressed QoS Data/Null frames contain
+		 * peer link-specific PS mode towards the local STA
+		 */
+		mps_set_sta_peer_pm(sta, hdr);
+
+		/* check for mesh Peer Service Period trigger frames */
+		ieee80211_mpsp_trigger_process(ieee80211_get_qos_ctl(hdr),
+					       sta, false, false);
+	} else {
+		/*
+		 * can only determine non-peer PS mode
+		 * (see IEEE802.11-2012 8.2.4.1.7)
+		 */
+		mps_set_sta_nonpeer_pm(sta, hdr);
+	}
+}
+
+
+/* mesh PS frame release */
+
+static void mpsp_trigger_send(struct sta_info *sta, bool rspi, bool eosp)
+{
+	struct ieee80211_sub_if_data *sdata = sta->sdata;
+	struct sk_buff *skb;
+	struct ieee80211_hdr *nullfunc;
+	struct ieee80211_tx_info *info;
+	u8 *qc;
+
+	skb = mps_qos_null_get(sta);
+	if (!skb)
+		return;
+
+	nullfunc = (struct ieee80211_hdr *) skb->data;
+	if (!eosp)
+		nullfunc->frame_control |=
+				cpu_to_le16(IEEE80211_FCTL_MOREDATA);
+	/*
+	 * | RSPI | EOSP |  MPSP triggering   |
+	 * +------+------+--------------------+
+	 * |  0   |  0   | local STA is owner |
+	 * |  0   |  1   | no MPSP (MPSP end) |
+	 * |  1   |  0   | both STA are owner |
+	 * |  1   |  1   | peer STA is owner  | see IEEE802.11-2012 13.14.9.2
+	 */
+	qc = ieee80211_get_qos_ctl(nullfunc);
+	if (rspi)
+		qc[1] |= (IEEE80211_QOS_CTL_RSPI >> 8);
+	if (eosp)
+		qc[0] |= IEEE80211_QOS_CTL_EOSP;
+
+	info = IEEE80211_SKB_CB(skb);
+
+	info->flags |= IEEE80211_TX_CTL_NO_PS_BUFFER |
+		       IEEE80211_TX_CTL_REQ_TX_STATUS;
+
+	mps_dbg(sdata, "sending MPSP trigger%s%s to %pM\n",
+		rspi ? " RSPI" : "", eosp ? " EOSP" : "", sta->sta.addr);
+
+	ieee80211_tx_skb(sdata, skb);
+}
+
+/**
+ * mpsp_qos_null_append - append QoS Null frame to MPSP skb queue if needed
+ *
+ * To properly end a mesh MPSP the last transmitted frame has to set the EOSP
+ * flag in the QoS Control field. In case the current tailing frame is not a
+ * QoS Data frame, append a QoS Null to carry the flag.
+ */
+static void mpsp_qos_null_append(struct sta_info *sta,
+				 struct sk_buff_head *frames)
+{
+	struct ieee80211_sub_if_data *sdata = sta->sdata;
+	struct sk_buff *new_skb, *skb = skb_peek_tail(frames);
+	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
+	struct ieee80211_tx_info *info;
+
+	if (ieee80211_is_data_qos(hdr->frame_control))
+		return;
+
+	new_skb = mps_qos_null_get(sta);
+	if (!new_skb)
+		return;
+
+	mps_dbg(sdata, "appending QoS Null in MPSP towards %pM\n",
+		sta->sta.addr);
+	/*
+	 * This frame has to be transmitted last. Assign lowest priority to
+	 * make sure it cannot pass other frames when releasing multiple ACs.
+	 */
+	new_skb->priority = 1;
+	skb_set_queue_mapping(new_skb, IEEE80211_AC_BK);
+	ieee80211_set_qos_hdr(sdata, new_skb);
+
+	info = IEEE80211_SKB_CB(new_skb);
+	info->control.vif = &sdata->vif;
+	info->flags |= IEEE80211_TX_INTFL_NEED_TXPROCESSING;
+
+	__skb_queue_tail(frames, new_skb);
+}
+
+/**
+ * mps_frame_deliver - transmit frames during mesh powersave
+ *
+ * @sta: STA info to transmit to
+ * @n_frames: number of frames to transmit. -1 for all
+ */
+static void mps_frame_deliver(struct sta_info *sta, int n_frames)
+{
+	struct ieee80211_sub_if_data *sdata = sta->sdata;
+	struct ieee80211_local *local = sdata->local;
+	int ac;
+	struct sk_buff_head frames;
+	struct sk_buff *skb;
+	bool more_data = false;
+
+	skb_queue_head_init(&frames);
+
+	/* collect frame(s) from buffers */
+	for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
+		while (n_frames != 0) {
+			skb = skb_dequeue(&sta->tx_filtered[ac]);
+			if (!skb) {
+				skb = skb_dequeue(
+					&sta->ps_tx_buf[ac]);
+				if (skb)
+					local->total_ps_buffered--;
+			}
+			if (!skb)
+				break;
+			n_frames--;
+			__skb_queue_tail(&frames, skb);
+		}
+
+		if (!skb_queue_empty(&sta->tx_filtered[ac]) ||
+		    !skb_queue_empty(&sta->ps_tx_buf[ac]))
+			more_data = true;
+	}
+
+	/* nothing to send? -> EOSP */
+	if (skb_queue_empty(&frames)) {
+		mpsp_trigger_send(sta, false, true);
+		return;
+	}
+
+	/* in a MPSP make sure the last skb is a QoS Data frame */
+	if (test_sta_flag(sta, WLAN_STA_MPSP_OWNER))
+		mpsp_qos_null_append(sta, &frames);
+
+	mps_dbg(sta->sdata, "sending %d frames to PS STA %pM\n",
+		skb_queue_len(&frames), sta->sta.addr);
+
+	/* prepare collected frames for transmission */
+	skb_queue_walk(&frames, skb) {
+		struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+		struct ieee80211_hdr *hdr = (void *) skb->data;
+
+		/*
+		 * Tell TX path to send this frame even though the
+		 * STA may still remain is PS mode after this frame
+		 * exchange.
+		 */
+		info->flags |= IEEE80211_TX_CTL_NO_PS_BUFFER;
+
+		if (more_data || !skb_queue_is_last(&frames, skb))
+			hdr->frame_control |=
+				cpu_to_le16(IEEE80211_FCTL_MOREDATA);
+		else
+			hdr->frame_control &=
+				cpu_to_le16(~IEEE80211_FCTL_MOREDATA);
+
+		if (skb_queue_is_last(&frames, skb) &&
+		    ieee80211_is_data_qos(hdr->frame_control)) {
+			u8 *qoshdr = ieee80211_get_qos_ctl(hdr);
+
+			/* MPSP trigger frame ends service period */
+			*qoshdr |= IEEE80211_QOS_CTL_EOSP;
+			info->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS;
+		}
+	}
+
+	ieee80211_add_pending_skbs(local, &frames);
+	sta_info_recalc_tim(sta);
+}
+
+/**
+ * ieee80211_mpsp_trigger_process - track status of mesh Peer Service Periods
+ *
+ * @qc: QoS Control field
+ * @sta: peer to start a MPSP with
+ * @tx: frame was transmitted by the local STA
+ * @acked: frame has been transmitted successfully
+ *
+ * NOTE: active mode STA may only serve as MPSP owner
+ */
+void ieee80211_mpsp_trigger_process(u8 *qc, struct sta_info *sta,
+				    bool tx, bool acked)
+{
+	u8 rspi = qc[1] & (IEEE80211_QOS_CTL_RSPI >> 8);
+	u8 eosp = qc[0] & IEEE80211_QOS_CTL_EOSP;
+
+	if (tx) {
+		if (rspi && acked)
+			set_sta_flag(sta, WLAN_STA_MPSP_RECIPIENT);
+
+		if (eosp)
+			clear_sta_flag(sta, WLAN_STA_MPSP_OWNER);
+		else if (acked &&
+			 test_sta_flag(sta, WLAN_STA_PS_STA) &&
+			 !test_and_set_sta_flag(sta, WLAN_STA_MPSP_OWNER))
+			mps_frame_deliver(sta, -1);
+	} else {
+		if (eosp)
+			clear_sta_flag(sta, WLAN_STA_MPSP_RECIPIENT);
+		else if (sta->local_pm != NL80211_MESH_POWER_ACTIVE)
+			set_sta_flag(sta, WLAN_STA_MPSP_RECIPIENT);
+
+		if (rspi && !test_and_set_sta_flag(sta, WLAN_STA_MPSP_OWNER))
+			mps_frame_deliver(sta, -1);
+	}
+}
+
+/**
+ * ieee80211_mps_frame_release - release buffered frames in response to beacon
+ *
+ * @sta: mesh STA
+ * @elems: beacon IEs
+ *
+ * For peers if we have individually-addressed frames buffered or the peer
+ * indicates buffered frames, send a corresponding MPSP trigger frame. Since
+ * we do not evaluate the awake window duration, QoS Nulls are used as MPSP
+ * trigger frames. If the neighbour STA is not a peer, only send single frames.
+ */
+void ieee80211_mps_frame_release(struct sta_info *sta,
+				 struct ieee802_11_elems *elems)
+{
+	int ac, buffer_local = 0;
+	bool has_buffered = false;
+
+	/* TIM map only for LLID <= IEEE80211_MAX_AID */
+	if (sta->plink_state == NL80211_PLINK_ESTAB)
+		has_buffered = ieee80211_check_tim(elems->tim, elems->tim_len,
+				le16_to_cpu(sta->llid) % IEEE80211_MAX_AID);
+
+	if (has_buffered)
+		mps_dbg(sta->sdata, "%pM indicates buffered frames\n",
+			sta->sta.addr);
+
+	/* only transmit to PS STA with announced, non-zero awake window */
+	if (test_sta_flag(sta, WLAN_STA_PS_STA) &&
+	    (!elems->awake_window || !le16_to_cpu(*elems->awake_window)))
+		return;
+
+	for (ac = 0; ac < IEEE80211_NUM_ACS; ac++)
+		buffer_local += skb_queue_len(&sta->ps_tx_buf[ac]) +
+				skb_queue_len(&sta->tx_filtered[ac]);
+
+	if (!has_buffered && !buffer_local)
+		return;
+
+	if (sta->plink_state == NL80211_PLINK_ESTAB)
+		mpsp_trigger_send(sta, has_buffered, !buffer_local);
+	else
+		mps_frame_deliver(sta, 1);
+}
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index a19089565c4b..c98be0593756 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1452,6 +1452,10 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx)
 		}
 	}
 
+	/* mesh power save support */
+	if (ieee80211_vif_is_mesh(&rx->sdata->vif))
+		ieee80211_mps_rx_h_sta_process(sta, hdr);
+
 	/*
 	 * Drop (qos-)data::nullfunc frames silently, since they
 	 * are used only to control station power saving mode.
@@ -2090,7 +2094,10 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
 	if (is_multicast_ether_addr(fwd_hdr->addr1)) {
 		IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, fwded_mcast);
 		memcpy(fwd_hdr->addr2, sdata->vif.addr, ETH_ALEN);
+		/* update power mode indication when forwarding */
+		ieee80211_mps_set_frame_flags(sdata, NULL, fwd_hdr);
 	} else if (!mesh_nexthop_lookup(fwd_skb, sdata)) {
+		/* mesh power mode flags updated in mesh_nexthop_lookup */
 		IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, fwded_unicast);
 	} else {
 		/* unable to resolve next hop */
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 227233c3ff7f..47a0f0601768 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -120,6 +120,8 @@ static void cleanup_single_sta(struct sta_info *sta)
 		if (sta->sdata->vif.type == NL80211_IFTYPE_AP ||
 		    sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
 			ps = &sdata->bss->ps;
+		else if (ieee80211_vif_is_mesh(&sdata->vif))
+			ps = &sdata->u.mesh.ps;
 		else
 			return;
 
@@ -587,6 +589,12 @@ void sta_info_recalc_tim(struct sta_info *sta)
 
 		ps = &sta->sdata->bss->ps;
 		id = sta->sta.aid;
+#ifdef CONFIG_MAC80211_MESH
+	} else if (ieee80211_vif_is_mesh(&sta->sdata->vif)) {
+		ps = &sta->sdata->u.mesh.ps;
+		/* TIM map only for PLID <= IEEE80211_MAX_AID */
+		id = le16_to_cpu(sta->plid) % IEEE80211_MAX_AID;
+#endif
 	} else {
 		return;
 	}
@@ -745,8 +753,9 @@ static bool sta_info_cleanup_expire_buffered(struct ieee80211_local *local,
 	bool have_buffered = false;
 	int ac;
 
-	/* This is only necessary for stations on BSS interfaces */
-	if (!sta->sdata->bss)
+	/* This is only necessary for stations on BSS/MBSS interfaces */
+	if (!sta->sdata->bss &&
+	    !ieee80211_vif_is_mesh(&sta->sdata->vif))
 		return false;
 
 	for (ac = 0; ac < IEEE80211_NUM_ACS; ac++)
@@ -934,6 +943,11 @@ void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata,
 		if (time_after(jiffies, sta->last_rx + exp_time)) {
 			sta_dbg(sta->sdata, "expiring inactive STA %pM\n",
 				sta->sta.addr);
+
+			if (ieee80211_vif_is_mesh(&sdata->vif) &&
+			    test_sta_flag(sta, WLAN_STA_PS_STA))
+				atomic_dec(&sdata->u.mesh.ps.num_sta_ps);
+
 			WARN_ON(__sta_info_destroy(sta));
 		}
 	}
@@ -992,6 +1006,8 @@ static void clear_sta_ps_flags(void *_sta)
 	if (sdata->vif.type == NL80211_IFTYPE_AP ||
 	    sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
 		ps = &sdata->bss->ps;
+	else if (ieee80211_vif_is_mesh(&sdata->vif))
+		ps = &sdata->u.mesh.ps;
 	else
 		return;
 
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index af7d78aa5523..5a1deba2c645 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -56,6 +56,8 @@
  * @WLAN_STA_INSERTED: This station is inserted into the hash table.
  * @WLAN_STA_RATE_CONTROL: rate control was initialized for this station.
  * @WLAN_STA_TOFFSET_KNOWN: toffset calculated for this station is valid.
+ * @WLAN_STA_MPSP_OWNER: local STA is owner of a mesh Peer Service Period.
+ * @WLAN_STA_MPSP_RECIPIENT: local STA is recipient of a MPSP.
  */
 enum ieee80211_sta_info_flags {
 	WLAN_STA_AUTH,
@@ -78,6 +80,8 @@ enum ieee80211_sta_info_flags {
 	WLAN_STA_INSERTED,
 	WLAN_STA_RATE_CONTROL,
 	WLAN_STA_TOFFSET_KNOWN,
+	WLAN_STA_MPSP_OWNER,
+	WLAN_STA_MPSP_RECIPIENT,
 };
 
 #define ADDBA_RESP_INTERVAL HZ
@@ -282,6 +286,9 @@ struct sta_ampdu_mlme {
  * @t_offset_setpoint: reference timing offset of this sta to be used when
  * 	calculating clockdrift
  * @ch_width: peer's channel width
+ * @local_pm: local link-specific power save mode
+ * @peer_pm: peer-specific power save mode towards local STA
+ * @nonpeer_pm: STA power save mode towards non-peer neighbors
  * @debugfs: debug filesystem info
  * @dead: set to true when sta is unlinked
  * @uploaded: set to true when sta is uploaded to the driver
@@ -379,6 +386,10 @@ struct sta_info {
 	s64 t_offset;
 	s64 t_offset_setpoint;
 	enum nl80211_chan_width ch_width;
+	/* mesh power save */
+	enum nl80211_mesh_power_mode local_pm;
+	enum nl80211_mesh_power_mode peer_pm;
+	enum nl80211_mesh_power_mode nonpeer_pm;
 #endif
 
 #ifdef CONFIG_MAC80211_DEBUGFS
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index d041de056b7f..43439203f4e4 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -472,6 +472,13 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
 			return;
 		}
 
+		/* mesh Peer Service Period support */
+		if (ieee80211_vif_is_mesh(&sta->sdata->vif) &&
+		    ieee80211_is_data_qos(fc))
+			ieee80211_mpsp_trigger_process(
+					ieee80211_get_qos_ctl(hdr),
+					sta, true, acked);
+
 		if ((local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL) &&
 		    (rates_idx != -1))
 			sta->last_tx_rate = info->status.rates[rates_idx];
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 7892b0a8873e..2ef0e19b06bb 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -329,6 +329,8 @@ static void purge_old_ps_buffers(struct ieee80211_local *local)
 
 		if (sdata->vif.type == NL80211_IFTYPE_AP)
 			ps = &sdata->u.ap.ps;
+		else if (ieee80211_vif_is_mesh(&sdata->vif))
+			ps = &sdata->u.mesh.ps;
 		else
 			continue;
 
@@ -372,18 +374,20 @@ ieee80211_tx_h_multicast_ps_buf(struct ieee80211_tx_data *tx)
 	/*
 	 * broadcast/multicast frame
 	 *
-	 * If any of the associated stations is in power save mode,
+	 * If any of the associated/peer stations is in power save mode,
 	 * the frame is buffered to be sent after DTIM beacon frame.
 	 * This is done either by the hardware or us.
 	 */
 
-	/* powersaving STAs currently only in AP/VLAN mode */
+	/* powersaving STAs currently only in AP/VLAN/mesh mode */
 	if (tx->sdata->vif.type == NL80211_IFTYPE_AP ||
 	    tx->sdata->vif.type == NL80211_IFTYPE_AP_VLAN) {
 		if (!tx->sdata->bss)
 			return TX_CONTINUE;
 
 		ps = &tx->sdata->bss->ps;
+	} else if (ieee80211_vif_is_mesh(&tx->sdata->vif)) {
+		ps = &tx->sdata->u.mesh.ps;
 	} else {
 		return TX_CONTINUE;
 	}
@@ -1473,12 +1477,14 @@ void ieee80211_xmit(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb,
 	hdr = (struct ieee80211_hdr *) skb->data;
 	info->control.vif = &sdata->vif;
 
-	if (ieee80211_vif_is_mesh(&sdata->vif) &&
-	    ieee80211_is_data(hdr->frame_control) &&
-	    !is_multicast_ether_addr(hdr->addr1) &&
-	    mesh_nexthop_resolve(skb, sdata)) {
-		/* skb queued: don't free */
-		return;
+	if (ieee80211_vif_is_mesh(&sdata->vif)) {
+		if (ieee80211_is_data(hdr->frame_control) &&
+		    is_unicast_ether_addr(hdr->addr1)) {
+			if (mesh_nexthop_resolve(skb, sdata))
+				return; /* skb queued: don't free */
+		} else {
+			ieee80211_mps_set_frame_flags(sdata, NULL, hdr);
+		}
 	}
 
 	ieee80211_set_qos_hdr(sdata, skb);
@@ -2445,12 +2451,14 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
 				    2 + /* NULL SSID */
 				    2 + 8 + /* supported rates */
 				    2 + 3 + /* DS params */
+				    256 + /* TIM IE */
 				    2 + (IEEE80211_MAX_SUPP_RATES - 8) +
 				    2 + sizeof(struct ieee80211_ht_cap) +
 				    2 + sizeof(struct ieee80211_ht_operation) +
 				    2 + sdata->u.mesh.mesh_id_len +
 				    2 + sizeof(struct ieee80211_meshconf_ie) +
-				    sdata->u.mesh.ie_len);
+				    sdata->u.mesh.ie_len +
+				    2 + sizeof(__le16)); /* awake window */
 		if (!skb)
 			goto out;
 
@@ -2462,6 +2470,7 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
 		eth_broadcast_addr(mgmt->da);
 		memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN);
 		memcpy(mgmt->bssid, sdata->vif.addr, ETH_ALEN);
+		ieee80211_mps_set_frame_flags(sdata, NULL, (void *) mgmt);
 		mgmt->u.beacon.beacon_int =
 			cpu_to_le16(sdata->vif.bss_conf.beacon_int);
 		mgmt->u.beacon.capab_info |= cpu_to_le16(
@@ -2475,12 +2484,14 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
 
 		if (ieee80211_add_srates_ie(sdata, skb, true, band) ||
 		    mesh_add_ds_params_ie(skb, sdata) ||
+		    ieee80211_beacon_add_tim(sdata, &ifmsh->ps, skb) ||
 		    ieee80211_add_ext_srates_ie(sdata, skb, true, band) ||
 		    mesh_add_rsn_ie(skb, sdata) ||
 		    mesh_add_ht_cap_ie(skb, sdata) ||
 		    mesh_add_ht_oper_ie(skb, sdata) ||
 		    mesh_add_meshid_ie(skb, sdata) ||
 		    mesh_add_meshconf_ie(skb, sdata) ||
+		    mesh_add_awake_window_ie(skb, sdata) ||
 		    mesh_add_vendor_ies(skb, sdata)) {
 			pr_err("o11s: couldn't add ies!\n");
 			goto out;
@@ -2734,6 +2745,8 @@ ieee80211_get_buffered_bc(struct ieee80211_hw *hw,
 			goto out;
 
 		ps = &sdata->u.ap.ps;
+	} else if (ieee80211_vif_is_mesh(&sdata->vif)) {
+		ps = &sdata->u.mesh.ps;
 	} else {
 		goto out;
 	}
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 139ad9b66c39..6cb71a350edd 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -805,6 +805,10 @@ u32 ieee802_11_parse_elems_crc(u8 *start, size_t len,
 			elems->peering = pos;
 			elems->peering_len = elen;
 			break;
+		case WLAN_EID_MESH_AWAKE_WINDOW:
+			if (elen >= 2)
+				elems->awake_window = (void *)pos;
+			break;
 		case WLAN_EID_PREQ:
 			elems->preq = pos;
 			elems->preq_len = elen;
diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c
index 906f00cd6d2f..afba19cb6f87 100644
--- a/net/mac80211/wme.c
+++ b/net/mac80211/wme.c
@@ -191,6 +191,15 @@ void ieee80211_set_qos_hdr(struct ieee80211_sub_if_data *sdata,
 
 	/* qos header is 2 bytes */
 	*p++ = ack_policy | tid;
-	*p = ieee80211_vif_is_mesh(&sdata->vif) ?
-		(IEEE80211_QOS_CTL_MESH_CONTROL_PRESENT >> 8) : 0;
+	if (ieee80211_vif_is_mesh(&sdata->vif)) {
+		/* preserve RSPI and Mesh PS Level bit */
+		*p &= ((IEEE80211_QOS_CTL_RSPI |
+			IEEE80211_QOS_CTL_MESH_PS_LEVEL) >> 8);
+
+		/* Nulls don't have a mesh header (frame body) */
+		if (!ieee80211_is_qos_nullfunc(hdr->frame_control))
+			*p |= (IEEE80211_QOS_CTL_MESH_CONTROL_PRESENT >> 8);
+	} else {
+		*p = 0;
+	}
 }
-- 
cgit v1.2.3


From c14b78e7decd0d1d5add6a4604feb8609fe920a9 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 5 Feb 2013 01:50:26 +0100
Subject: netfilter: nfnetlink: add mutex per subsystem

This patch replaces the global lock to one lock per subsystem.
The per-subsystem lock avoids that processes operating
with different subsystems are synchronized.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/nfnetlink.h  |  4 +--
 net/netfilter/ipset/ip_set_core.c    | 26 +++++++++---------
 net/netfilter/nf_conntrack_netlink.c | 12 ++++-----
 net/netfilter/nfnetlink.c            | 52 ++++++++++++++++++++++--------------
 4 files changed, 53 insertions(+), 41 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h
index 4966ddec039b..ecbb8e495912 100644
--- a/include/linux/netfilter/nfnetlink.h
+++ b/include/linux/netfilter/nfnetlink.h
@@ -34,8 +34,8 @@ extern int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, unsigne
 extern int nfnetlink_set_err(struct net *net, u32 pid, u32 group, int error);
 extern int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u_int32_t pid, int flags);
 
-extern void nfnl_lock(void);
-extern void nfnl_unlock(void);
+extern void nfnl_lock(__u8 subsys_id);
+extern void nfnl_unlock(__u8 subsys_id);
 
 #define MODULE_ALIAS_NFNL_SUBSYS(subsys) \
 	MODULE_ALIAS("nfnetlink-subsys-" __stringify(subsys))
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 6d6d8f2b033e..f82b2e606cfd 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -88,14 +88,14 @@ find_set_type(const char *name, u8 family, u8 revision)
 static bool
 load_settype(const char *name)
 {
-	nfnl_unlock();
+	nfnl_unlock(NFNL_SUBSYS_IPSET);
 	pr_debug("try to load ip_set_%s\n", name);
 	if (request_module("ip_set_%s", name) < 0) {
 		pr_warning("Can't find ip_set type %s\n", name);
-		nfnl_lock();
+		nfnl_lock(NFNL_SUBSYS_IPSET);
 		return false;
 	}
-	nfnl_lock();
+	nfnl_lock(NFNL_SUBSYS_IPSET);
 	return true;
 }
 
@@ -532,7 +532,7 @@ ip_set_nfnl_get(const char *name)
 	ip_set_id_t i, index = IPSET_INVALID_ID;
 	struct ip_set *s;
 
-	nfnl_lock();
+	nfnl_lock(NFNL_SUBSYS_IPSET);
 	for (i = 0; i < ip_set_max; i++) {
 		s = nfnl_set(i);
 		if (s != NULL && STREQ(s->name, name)) {
@@ -541,7 +541,7 @@ ip_set_nfnl_get(const char *name)
 			break;
 		}
 	}
-	nfnl_unlock();
+	nfnl_unlock(NFNL_SUBSYS_IPSET);
 
 	return index;
 }
@@ -561,13 +561,13 @@ ip_set_nfnl_get_byindex(ip_set_id_t index)
 	if (index > ip_set_max)
 		return IPSET_INVALID_ID;
 
-	nfnl_lock();
+	nfnl_lock(NFNL_SUBSYS_IPSET);
 	set = nfnl_set(index);
 	if (set)
 		__ip_set_get(set);
 	else
 		index = IPSET_INVALID_ID;
-	nfnl_unlock();
+	nfnl_unlock(NFNL_SUBSYS_IPSET);
 
 	return index;
 }
@@ -584,11 +584,11 @@ void
 ip_set_nfnl_put(ip_set_id_t index)
 {
 	struct ip_set *set;
-	nfnl_lock();
+	nfnl_lock(NFNL_SUBSYS_IPSET);
 	set = nfnl_set(index);
 	if (set != NULL)
 		__ip_set_put(set);
-	nfnl_unlock();
+	nfnl_unlock(NFNL_SUBSYS_IPSET);
 }
 EXPORT_SYMBOL_GPL(ip_set_nfnl_put);
 
@@ -1763,10 +1763,10 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len)
 			goto done;
 		}
 		req_get->set.name[IPSET_MAXNAMELEN - 1] = '\0';
-		nfnl_lock();
+		nfnl_lock(NFNL_SUBSYS_IPSET);
 		find_set_and_id(req_get->set.name, &id);
 		req_get->set.index = id;
-		nfnl_unlock();
+		nfnl_unlock(NFNL_SUBSYS_IPSET);
 		goto copy;
 	}
 	case IP_SET_OP_GET_BYINDEX: {
@@ -1778,11 +1778,11 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len)
 			ret = -EINVAL;
 			goto done;
 		}
-		nfnl_lock();
+		nfnl_lock(NFNL_SUBSYS_IPSET);
 		set = nfnl_set(req_get->set.index);
 		strncpy(req_get->set.name, set ? set->name : "",
 			IPSET_MAXNAMELEN);
-		nfnl_unlock();
+		nfnl_unlock(NFNL_SUBSYS_IPSET);
 		goto copy;
 	}
 	default:
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 2334cc5d2b16..d490a300ce2b 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1256,13 +1256,13 @@ ctnetlink_parse_nat_setup(struct nf_conn *ct,
 	if (!parse_nat_setup) {
 #ifdef CONFIG_MODULES
 		rcu_read_unlock();
-		nfnl_unlock();
+		nfnl_unlock(NFNL_SUBSYS_CTNETLINK);
 		if (request_module("nf-nat") < 0) {
-			nfnl_lock();
+			nfnl_lock(NFNL_SUBSYS_CTNETLINK);
 			rcu_read_lock();
 			return -EOPNOTSUPP;
 		}
-		nfnl_lock();
+		nfnl_lock(NFNL_SUBSYS_CTNETLINK);
 		rcu_read_lock();
 		if (nfnetlink_parse_nat_setup_hook)
 			return -EAGAIN;
@@ -1274,13 +1274,13 @@ ctnetlink_parse_nat_setup(struct nf_conn *ct,
 	if (err == -EAGAIN) {
 #ifdef CONFIG_MODULES
 		rcu_read_unlock();
-		nfnl_unlock();
+		nfnl_unlock(NFNL_SUBSYS_CTNETLINK);
 		if (request_module("nf-nat-%u", nf_ct_l3num(ct)) < 0) {
-			nfnl_lock();
+			nfnl_lock(NFNL_SUBSYS_CTNETLINK);
 			rcu_read_lock();
 			return -EOPNOTSUPP;
 		}
-		nfnl_lock();
+		nfnl_lock(NFNL_SUBSYS_CTNETLINK);
 		rcu_read_lock();
 #else
 		err = -EOPNOTSUPP;
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 58a09b7c3f6d..d578ec251712 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -36,8 +36,10 @@ MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NETFILTER);
 
 static char __initdata nfversion[] = "0.30";
 
-static const struct nfnetlink_subsystem __rcu *subsys_table[NFNL_SUBSYS_COUNT];
-static DEFINE_MUTEX(nfnl_mutex);
+static struct {
+	struct mutex				mutex;
+	const struct nfnetlink_subsystem __rcu	*subsys;
+} table[NFNL_SUBSYS_COUNT];
 
 static const int nfnl_group2type[NFNLGRP_MAX+1] = {
 	[NFNLGRP_CONNTRACK_NEW]		= NFNL_SUBSYS_CTNETLINK,
@@ -48,27 +50,32 @@ static const int nfnl_group2type[NFNLGRP_MAX+1] = {
 	[NFNLGRP_CONNTRACK_EXP_DESTROY] = NFNL_SUBSYS_CTNETLINK_EXP,
 };
 
-void nfnl_lock(void)
+void nfnl_lock(__u8 subsys_id)
 {
-	mutex_lock(&nfnl_mutex);
+	mutex_lock(&table[subsys_id].mutex);
 }
 EXPORT_SYMBOL_GPL(nfnl_lock);
 
-void nfnl_unlock(void)
+void nfnl_unlock(__u8 subsys_id)
 {
-	mutex_unlock(&nfnl_mutex);
+	mutex_unlock(&table[subsys_id].mutex);
 }
 EXPORT_SYMBOL_GPL(nfnl_unlock);
 
+static struct mutex *nfnl_get_lock(__u8 subsys_id)
+{
+	return &table[subsys_id].mutex;
+}
+
 int nfnetlink_subsys_register(const struct nfnetlink_subsystem *n)
 {
-	nfnl_lock();
-	if (subsys_table[n->subsys_id]) {
-		nfnl_unlock();
+	nfnl_lock(n->subsys_id);
+	if (table[n->subsys_id].subsys) {
+		nfnl_unlock(n->subsys_id);
 		return -EBUSY;
 	}
-	rcu_assign_pointer(subsys_table[n->subsys_id], n);
-	nfnl_unlock();
+	rcu_assign_pointer(table[n->subsys_id].subsys, n);
+	nfnl_unlock(n->subsys_id);
 
 	return 0;
 }
@@ -76,9 +83,9 @@ EXPORT_SYMBOL_GPL(nfnetlink_subsys_register);
 
 int nfnetlink_subsys_unregister(const struct nfnetlink_subsystem *n)
 {
-	nfnl_lock();
-	subsys_table[n->subsys_id] = NULL;
-	nfnl_unlock();
+	nfnl_lock(n->subsys_id);
+	table[n->subsys_id].subsys = NULL;
+	nfnl_unlock(n->subsys_id);
 	synchronize_rcu();
 	return 0;
 }
@@ -91,7 +98,7 @@ static inline const struct nfnetlink_subsystem *nfnetlink_get_subsys(u_int16_t t
 	if (subsys_id >= NFNL_SUBSYS_COUNT)
 		return NULL;
 
-	return rcu_dereference(subsys_table[subsys_id]);
+	return rcu_dereference(table[subsys_id].subsys);
 }
 
 static inline const struct nfnl_callback *
@@ -175,6 +182,7 @@ replay:
 		struct nlattr *cda[ss->cb[cb_id].attr_count + 1];
 		struct nlattr *attr = (void *)nlh + min_len;
 		int attrlen = nlh->nlmsg_len - min_len;
+		__u8 subsys_id = NFNL_SUBSYS_ID(type);
 
 		err = nla_parse(cda, ss->cb[cb_id].attr_count,
 				attr, attrlen, ss->cb[cb_id].policy);
@@ -189,10 +197,9 @@ replay:
 			rcu_read_unlock();
 		} else {
 			rcu_read_unlock();
-			nfnl_lock();
-			if (rcu_dereference_protected(
-					subsys_table[NFNL_SUBSYS_ID(type)],
-					lockdep_is_held(&nfnl_mutex)) != ss ||
+			nfnl_lock(subsys_id);
+			if (rcu_dereference_protected(table[subsys_id].subsys,
+				lockdep_is_held(nfnl_get_lock(subsys_id))) != ss ||
 			    nfnetlink_find_client(type, ss) != nc)
 				err = -EAGAIN;
 			else if (nc->call)
@@ -200,7 +207,7 @@ replay:
 						   (const struct nlattr **)cda);
 			else
 				err = -EINVAL;
-			nfnl_unlock();
+			nfnl_unlock(subsys_id);
 		}
 		if (err == -EAGAIN)
 			goto replay;
@@ -267,6 +274,11 @@ static struct pernet_operations nfnetlink_net_ops = {
 
 static int __init nfnetlink_init(void)
 {
+	int i;
+
+	for (i=0; i<NFNL_SUBSYS_COUNT; i++)
+		mutex_init(&table[i].mutex);
+
 	pr_info("Netfilter messages via NETLINK v%s.\n", nfversion);
 	return register_pernet_subsys(&nfnetlink_net_ops);
 }
-- 
cgit v1.2.3


From ca2eb5679f8ddffff60156af42595df44a315ef0 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <stephen@networkplumber.org>
Date: Tue, 5 Feb 2013 07:25:17 +0000
Subject: tcp: remove Appropriate Byte Count support

TCP Appropriate Byte Count was added by me, but later disabled.
There is no point in maintaining it since it is a potential source
of bugs and Linux already implements other better window protection
heuristics.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.txt | 11 -----------
 include/linux/tcp.h                    |  1 -
 include/net/tcp.h                      |  1 -
 kernel/sysctl_binary.c                 |  1 -
 net/ipv4/sysctl_net_ipv4.c             |  7 -------
 net/ipv4/tcp.c                         |  1 -
 net/ipv4/tcp_cong.c                    | 30 +-----------------------------
 net/ipv4/tcp_input.c                   | 15 ---------------
 net/ipv4/tcp_minisocks.c               |  1 -
 9 files changed, 1 insertion(+), 67 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 19ac1802bfd4..dc2dc87d2557 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -130,17 +130,6 @@ somaxconn - INTEGER
 	Defaults to 128.  See also tcp_max_syn_backlog for additional tuning
 	for TCP sockets.
 
-tcp_abc - INTEGER
-	Controls Appropriate Byte Count (ABC) defined in RFC3465.
-	ABC is a way of increasing congestion window (cwnd) more slowly
-	in response to partial acknowledgments.
-	Possible values are:
-		0 increase cwnd once per acknowledgment (no ABC)
-		1 increase cwnd once per acknowledgment of full sized segment
-		2 allow increase cwnd by two if acknowledgment is
-		  of two segments to compensate for delayed acknowledgments.
-	Default: 0 (off)
-
 tcp_abort_on_overflow - BOOLEAN
 	If listening service is too slow to accept new connections,
 	reset them. Default state is FALSE. It means that if overflow
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 4e1d2283e3cc..6d0d46138ae8 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -246,7 +246,6 @@ struct tcp_sock {
 	u32	sacked_out;	/* SACK'd packets			*/
 	u32	fackets_out;	/* FACK'd packets			*/
 	u32	tso_deferred;
-	u32	bytes_acked;	/* Appropriate Byte Counting - RFC3465 */
 
 	/* from STCP, retrans queue hinting */
 	struct sk_buff* lost_skb_hint;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 614af8b7758e..23f2e98d4b65 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -279,7 +279,6 @@ extern int sysctl_tcp_dma_copybreak;
 extern int sysctl_tcp_nometrics_save;
 extern int sysctl_tcp_moderate_rcvbuf;
 extern int sysctl_tcp_tso_win_divisor;
-extern int sysctl_tcp_abc;
 extern int sysctl_tcp_mtu_probing;
 extern int sysctl_tcp_base_mss;
 extern int sysctl_tcp_workaround_signed_windows;
diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c
index 5a6384450501..b669ca1fa103 100644
--- a/kernel/sysctl_binary.c
+++ b/kernel/sysctl_binary.c
@@ -387,7 +387,6 @@ static const struct bin_table bin_net_ipv4_table[] = {
 	{ CTL_INT,	NET_TCP_MODERATE_RCVBUF,		"tcp_moderate_rcvbuf" },
 	{ CTL_INT,	NET_TCP_TSO_WIN_DIVISOR,		"tcp_tso_win_divisor" },
 	{ CTL_STR,	NET_TCP_CONG_CONTROL,			"tcp_congestion_control" },
-	{ CTL_INT,	NET_TCP_ABC,				"tcp_abc" },
 	{ CTL_INT,	NET_TCP_MTU_PROBING,			"tcp_mtu_probing" },
 	{ CTL_INT,	NET_TCP_BASE_MSS,			"tcp_base_mss" },
 	{ CTL_INT,	NET_IPV4_TCP_WORKAROUND_SIGNED_WINDOWS,	"tcp_workaround_signed_windows" },
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 2622707602d1..960fd29d9b8e 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -632,13 +632,6 @@ static struct ctl_table ipv4_table[] = {
 		.maxlen		= TCP_CA_NAME_MAX,
 		.proc_handler	= proc_tcp_congestion_control,
 	},
-	{
-		.procname	= "tcp_abc",
-		.data		= &sysctl_tcp_abc,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
 	{
 		.procname	= "tcp_mtu_probing",
 		.data		= &sysctl_tcp_mtu_probing,
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 3ec1f69c5ceb..2c7e5963c2ea 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2289,7 +2289,6 @@ int tcp_disconnect(struct sock *sk, int flags)
 	tp->packets_out = 0;
 	tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
 	tp->snd_cwnd_cnt = 0;
-	tp->bytes_acked = 0;
 	tp->window_clamp = 0;
 	tcp_set_ca_state(sk, TCP_CA_Open);
 	tcp_clear_retrans(tp);
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index cdf2e707bb10..019c2389a341 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -317,28 +317,11 @@ void tcp_slow_start(struct tcp_sock *tp)
 		snd_cwnd = 1U;
 	}
 
-	/* RFC3465: ABC Slow start
-	 * Increase only after a full MSS of bytes is acked
-	 *
-	 * TCP sender SHOULD increase cwnd by the number of
-	 * previously unacknowledged bytes ACKed by each incoming
-	 * acknowledgment, provided the increase is not more than L
-	 */
-	if (sysctl_tcp_abc && tp->bytes_acked < tp->mss_cache)
-		return;
-
 	if (sysctl_tcp_max_ssthresh > 0 && tp->snd_cwnd > sysctl_tcp_max_ssthresh)
 		cnt = sysctl_tcp_max_ssthresh >> 1;	/* limited slow start */
 	else
 		cnt = snd_cwnd;				/* exponential increase */
 
-	/* RFC3465: ABC
-	 * We MAY increase by 2 if discovered delayed ack
-	 */
-	if (sysctl_tcp_abc > 1 && tp->bytes_acked >= 2*tp->mss_cache)
-		cnt <<= 1;
-	tp->bytes_acked = 0;
-
 	tp->snd_cwnd_cnt += cnt;
 	while (tp->snd_cwnd_cnt >= snd_cwnd) {
 		tp->snd_cwnd_cnt -= snd_cwnd;
@@ -378,20 +361,9 @@ void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
 	/* In "safe" area, increase. */
 	if (tp->snd_cwnd <= tp->snd_ssthresh)
 		tcp_slow_start(tp);
-
 	/* In dangerous area, increase slowly. */
-	else if (sysctl_tcp_abc) {
-		/* RFC3465: Appropriate Byte Count
-		 * increase once for each full cwnd acked
-		 */
-		if (tp->bytes_acked >= tp->snd_cwnd*tp->mss_cache) {
-			tp->bytes_acked -= tp->snd_cwnd*tp->mss_cache;
-			if (tp->snd_cwnd < tp->snd_cwnd_clamp)
-				tp->snd_cwnd++;
-		}
-	} else {
+	else
 		tcp_cong_avoid_ai(tp, tp->snd_cwnd);
-	}
 }
 EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid);
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index e376aa9591bc..f56bd1082f54 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -98,7 +98,6 @@ int sysctl_tcp_frto_response __read_mostly;
 int sysctl_tcp_thin_dupack __read_mostly;
 
 int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
-int sysctl_tcp_abc __read_mostly;
 int sysctl_tcp_early_retrans __read_mostly = 2;
 
 #define FLAG_DATA		0x01 /* Incoming frame contained data.		*/
@@ -2007,7 +2006,6 @@ static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag)
 	tp->snd_cwnd_cnt = 0;
 	tp->snd_cwnd_stamp = tcp_time_stamp;
 	tp->frto_counter = 0;
-	tp->bytes_acked = 0;
 
 	tp->reordering = min_t(unsigned int, tp->reordering,
 			       sysctl_tcp_reordering);
@@ -2056,7 +2054,6 @@ void tcp_enter_loss(struct sock *sk, int how)
 	tp->snd_cwnd_cnt   = 0;
 	tp->snd_cwnd_stamp = tcp_time_stamp;
 
-	tp->bytes_acked = 0;
 	tcp_clear_retrans_partial(tp);
 
 	if (tcp_is_reno(tp))
@@ -2684,7 +2681,6 @@ static void tcp_init_cwnd_reduction(struct sock *sk, const bool set_ssthresh)
 	struct tcp_sock *tp = tcp_sk(sk);
 
 	tp->high_seq = tp->snd_nxt;
-	tp->bytes_acked = 0;
 	tp->snd_cwnd_cnt = 0;
 	tp->prior_cwnd = tp->snd_cwnd;
 	tp->prr_delivered = 0;
@@ -2735,7 +2731,6 @@ void tcp_enter_cwr(struct sock *sk, const int set_ssthresh)
 	struct tcp_sock *tp = tcp_sk(sk);
 
 	tp->prior_ssthresh = 0;
-	tp->bytes_acked = 0;
 	if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
 		tp->undo_marker = 0;
 		tcp_init_cwnd_reduction(sk, set_ssthresh);
@@ -3417,7 +3412,6 @@ static void tcp_conservative_spur_to_response(struct tcp_sock *tp)
 {
 	tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh);
 	tp->snd_cwnd_cnt = 0;
-	tp->bytes_acked = 0;
 	TCP_ECN_queue_cwr(tp);
 	tcp_moderate_cwnd(tp);
 }
@@ -3609,15 +3603,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
 	if (after(ack, prior_snd_una))
 		flag |= FLAG_SND_UNA_ADVANCED;
 
-	if (sysctl_tcp_abc) {
-		if (icsk->icsk_ca_state < TCP_CA_CWR)
-			tp->bytes_acked += ack - prior_snd_una;
-		else if (icsk->icsk_ca_state == TCP_CA_Loss)
-			/* we assume just one segment left network */
-			tp->bytes_acked += min(ack - prior_snd_una,
-					       tp->mss_cache);
-	}
-
 	prior_fackets = tp->fackets_out;
 	prior_in_flight = tcp_packets_in_flight(tp);
 
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index f35f2dfb6401..f0409287b5f4 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -446,7 +446,6 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
 		 */
 		newtp->snd_cwnd = TCP_INIT_CWND;
 		newtp->snd_cwnd_cnt = 0;
-		newtp->bytes_acked = 0;
 
 		newtp->frto_counter = 0;
 		newtp->frto_highmark = 0;
-- 
cgit v1.2.3


From c0a05bf0182efdf7cd1fd8aa327e7a602360b67e Mon Sep 17 00:00:00 2001
From: Steffen Trumtrar <s.trumtrar@pengutronix.de>
Date: Tue, 18 Dec 2012 11:32:03 +0100
Subject: of: add 'const' to of_node_full_name parameter

As the function just returns the np->full_name or the string "<no-node>", the
passed device_node pointer is not changed in any way.

The passed parameter can therefore be a const pointer.

Also, fix the following error from checkpatch.pl:

ERROR: "foo* bar" should be "foo *bar"
+static inline const char* of_node_full_name(const struct device_node *np)

Signed-off-by: Steffen Trumtrar <s.trumtrar@pengutronix.de>
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 include/linux/of.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/of.h b/include/linux/of.h
index 5ebcc5c8e423..1e0d0c1bfb5e 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -160,7 +160,7 @@ static inline unsigned long of_read_ulong(const __be32 *cell, int size)
 
 #define OF_BAD_ADDR	((u64)-1)
 
-static inline const char* of_node_full_name(struct device_node *np)
+static inline const char *of_node_full_name(const struct device_node *np)
 {
 	return np ? np->full_name : "<no-node>";
 }
-- 
cgit v1.2.3


From 5372d2d71c46e5649e5d2edd4514adcd6fe7a085 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Fri, 12 Oct 2012 23:01:58 -0700
Subject: hwmon: Driver for Maxim MAX6697 and compatibles

Add support for MAX6581, MAX6602, MAX6622, MAX6636, MAX6689, MAX6693,
MAX6694, MAX6697, MAX6698, and MAX6699 temperature sensors

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Reviewed-by: Jean Delvare <khali@linux-fr.org>
---
 Documentation/devicetree/bindings/i2c/max6697.txt |  64 ++
 Documentation/hwmon/max6697                       |  58 ++
 drivers/hwmon/Kconfig                             |  11 +
 drivers/hwmon/Makefile                            |   1 +
 drivers/hwmon/max6697.c                           | 726 ++++++++++++++++++++++
 include/linux/platform_data/max6697.h             |  36 ++
 6 files changed, 896 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/i2c/max6697.txt
 create mode 100644 Documentation/hwmon/max6697
 create mode 100644 drivers/hwmon/max6697.c
 create mode 100644 include/linux/platform_data/max6697.h

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/i2c/max6697.txt b/Documentation/devicetree/bindings/i2c/max6697.txt
new file mode 100644
index 000000000000..5f793998e4a4
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/max6697.txt
@@ -0,0 +1,64 @@
+max6697 properties
+
+Required properties:
+- compatible:
+	Should be one of
+		maxim,max6581
+		maxim,max6602
+		maxim,max6622
+		maxim,max6636
+		maxim,max6689
+		maxim,max6693
+		maxim,max6694
+		maxim,max6697
+		maxim,max6698
+		maxim,max6699
+- reg: I2C address
+
+Optional properties:
+
+- smbus-timeout-disable
+	Set to disable SMBus timeout. If not specified, SMBus timeout will be
+	enabled.
+- extended-range-enable
+	Only valid for MAX6581. Set to enable extended temperature range.
+	Extended temperature will be disabled if not specified.
+- beta-compensation-enable
+	Only valid for MAX6693 and MX6694. Set to enable beta compensation on
+	remote temperature channel 1.
+	Beta compensation will be disabled if not specified.
+- alert-mask
+	Alert bit mask. Alert disabled for bits set.
+	Select bit 0 for local temperature, bit 1..7 for remote temperatures.
+	If not specified, alert will be enabled for all channels.
+- over-temperature-mask
+	Over-temperature bit mask. Over-temperature reporting disabled for
+	bits set.
+	Select bit 0 for local temperature, bit 1..7 for remote temperatures.
+	If not specified, over-temperature reporting will be enabled for all
+	channels.
+- resistance-cancellation
+	Boolean for all chips other than MAX6581. Set to enable resistance
+	cancellation on remote temperature channel 1.
+	For MAX6581, resistance cancellation enabled for all channels if
+	specified as boolean, otherwise as per bit mask specified.
+	Only supported for remote temperatures (bit 1..7).
+	If not specified, resistance cancellation will be disabled for all
+	channels.
+- transistor-ideality
+	For MAX6581 only. Two values; first is bit mask, second is ideality
+	select value as per MAX6581 data sheet. Select bit 1..7 for remote
+	channels.
+	Transistor ideality will be initialized to default (1.008) if not
+	specified.
+
+Example:
+
+temp-sensor@1a {
+	compatible = "maxim,max6697";
+	reg = <0x1a>;
+	smbus-timeout-disable;
+	resistance-cancellation;
+	alert-mask = <0x72>;
+	over-temperature-mask = <0x7f>;
+};
diff --git a/Documentation/hwmon/max6697 b/Documentation/hwmon/max6697
new file mode 100644
index 000000000000..6594177ededa
--- /dev/null
+++ b/Documentation/hwmon/max6697
@@ -0,0 +1,58 @@
+Kernel driver max6697
+=====================
+
+Supported chips:
+  * Maxim MAX6581
+    Prefix: 'max6581'
+    Datasheet: http://datasheets.maximintegrated.com/en/ds/MAX6581.pdf
+  * Maxim MAX6602
+    Prefix: 'max6602'
+    Datasheet: http://datasheets.maximintegrated.com/en/ds/MAX6602.pdf
+  * Maxim MAX6622
+    Prefix: 'max6622'
+    Datasheet: http://datasheets.maximintegrated.com/en/ds/MAX6622.pdf
+  * Maxim MAX6636
+    Prefix: 'max6636'
+    Datasheet: http://datasheets.maximintegrated.com/en/ds/MAX6636.pdf
+  * Maxim MAX6689
+    Prefix: 'max6689'
+    Datasheet: http://datasheets.maximintegrated.com/en/ds/MAX6689.pdf
+  * Maxim MAX6693
+    Prefix: 'max6693'
+    Datasheet: http://datasheets.maximintegrated.com/en/ds/MAX6693.pdf
+  * Maxim MAX6694
+    Prefix: 'max6694'
+    Datasheet: http://datasheets.maximintegrated.com/en/ds/MAX6694.pdf
+  * Maxim MAX6697
+    Prefix: 'max6697'
+    Datasheet: http://datasheets.maximintegrated.com/en/ds/MAX6697.pdf
+  * Maxim MAX6698
+    Prefix: 'max6698'
+    Datasheet: http://datasheets.maximintegrated.com/en/ds/MAX6698.pdf
+  * Maxim MAX6699
+    Prefix: 'max6699'
+    Datasheet: http://datasheets.maximintegrated.com/en/ds/MAX6699.pdf
+
+Author:
+    Guenter Roeck <linux@roeck-us.net>
+
+Description
+-----------
+
+This driver implements support for several MAX6697 compatible temperature sensor
+chips. The chips support one local temperature sensor plus four, six, or seven
+remote temperature sensors. Remote temperature sensors are diode-connected
+thermal transitors, except for MAX6698 which supports three diode-connected
+thermal transistors plus three thermistors in addition to the local temperature
+sensor.
+
+The driver provides the following sysfs attributes. temp1 is the local (chip)
+temperature, temp[2..n] are remote temperatures. The actually supported
+per-channel attributes are chip type and channel dependent.
+
+tempX_input      RO temperature
+tempX_max        RW temperature maximum threshold
+tempX_max_alarm  RO temperature maximum threshold alarm
+tempX_crit       RW temperature critical threshold
+tempX_crit_alarm RO temperature critical threshold alarm
+tempX_fault      RO temperature diode fault (remote sensors only)
diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
index 32f238f3caea..ef5757265f78 100644
--- a/drivers/hwmon/Kconfig
+++ b/drivers/hwmon/Kconfig
@@ -854,6 +854,17 @@ config SENSORS_MAX6650
 	  This driver can also be built as a module.  If so, the module
 	  will be called max6650.
 
+config SENSORS_MAX6697
+	tristate "Maxim MAX6697 and compatibles"
+	depends on I2C
+	help
+	  If you say yes here you get support for MAX6581, MAX6602, MAX6622,
+	  MAX6636, MAX6689, MAX6693, MAX6694, MAX6697, MAX6698, and MAX6699
+	  temperature sensor chips.
+
+	  This driver can also be built as a module.  If so, the module
+	  will be called max6697.
+
 config SENSORS_MCP3021
 	tristate "Microchip MCP3021 and compatibles"
 	depends on I2C
diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile
index 5da287443f6c..a37a82c64da2 100644
--- a/drivers/hwmon/Makefile
+++ b/drivers/hwmon/Makefile
@@ -99,6 +99,7 @@ obj-$(CONFIG_SENSORS_MAX197)	+= max197.o
 obj-$(CONFIG_SENSORS_MAX6639)	+= max6639.o
 obj-$(CONFIG_SENSORS_MAX6642)	+= max6642.o
 obj-$(CONFIG_SENSORS_MAX6650)	+= max6650.o
+obj-$(CONFIG_SENSORS_MAX6697)	+= max6697.o
 obj-$(CONFIG_SENSORS_MC13783_ADC)+= mc13783-adc.o
 obj-$(CONFIG_SENSORS_MCP3021)	+= mcp3021.o
 obj-$(CONFIG_SENSORS_NTC_THERMISTOR)	+= ntc_thermistor.o
diff --git a/drivers/hwmon/max6697.c b/drivers/hwmon/max6697.c
new file mode 100644
index 000000000000..bf4aa3777fc1
--- /dev/null
+++ b/drivers/hwmon/max6697.c
@@ -0,0 +1,726 @@
+/*
+ * Copyright (c) 2012 Guenter Roeck <linux@roeck-us.net>
+ *
+ * based on max1668.c
+ * Copyright (c) 2011 David George <david.george@ska.ac.za>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/jiffies.h>
+#include <linux/i2c.h>
+#include <linux/hwmon.h>
+#include <linux/hwmon-sysfs.h>
+#include <linux/err.h>
+#include <linux/mutex.h>
+#include <linux/of.h>
+
+#include <linux/platform_data/max6697.h>
+
+enum chips { max6581, max6602, max6622, max6636, max6689, max6693, max6694,
+	     max6697, max6698, max6699 };
+
+/* Report local sensor as temp1 */
+
+static const u8 MAX6697_REG_TEMP[] = {
+			0x07, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x08 };
+static const u8 MAX6697_REG_TEMP_EXT[] = {
+			0x57, 0x09, 0x52, 0x53, 0x54, 0x55, 0x56, 0 };
+static const u8 MAX6697_REG_MAX[] = {
+			0x17, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x18 };
+static const u8 MAX6697_REG_CRIT[] = {
+			0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27 };
+
+/*
+ * Map device tree / platform data register bit map to chip bit map.
+ * Applies to alert register and over-temperature register.
+ */
+#define MAX6697_MAP_BITS(reg)	((((reg) & 0x7e) >> 1) | \
+				 (((reg) & 0x01) << 6) | ((reg) & 0x80))
+
+#define MAX6697_REG_STAT(n)		(0x44 + (n))
+
+#define MAX6697_REG_CONFIG		0x41
+#define MAX6581_CONF_EXTENDED		(1 << 1)
+#define MAX6693_CONF_BETA		(1 << 2)
+#define MAX6697_CONF_RESISTANCE		(1 << 3)
+#define MAX6697_CONF_TIMEOUT		(1 << 5)
+#define MAX6697_REG_ALERT_MASK		0x42
+#define MAX6697_REG_OVERT_MASK		0x43
+
+#define MAX6581_REG_RESISTANCE		0x4a
+#define MAX6581_REG_IDEALITY		0x4b
+#define MAX6581_REG_IDEALITY_SELECT	0x4c
+#define MAX6581_REG_OFFSET		0x4d
+#define MAX6581_REG_OFFSET_SELECT	0x4e
+
+#define MAX6697_CONV_TIME		156	/* ms per channel, worst case */
+
+struct max6697_chip_data {
+	int channels;
+	u32 have_ext;
+	u32 have_crit;
+	u32 have_fault;
+	u8 valid_conf;
+	const u8 *alarm_map;
+};
+
+struct max6697_data {
+	struct device *hwmon_dev;
+
+	enum chips type;
+	const struct max6697_chip_data *chip;
+
+	int update_interval;	/* in milli-seconds */
+	int temp_offset;	/* in degrees C */
+
+	struct mutex update_lock;
+	unsigned long last_updated;	/* In jiffies */
+	bool valid;		/* true if following fields are valid */
+
+	/* 1x local and up to 7x remote */
+	u8 temp[8][4];		/* [nr][0]=temp [1]=ext [2]=max [3]=crit */
+#define MAX6697_TEMP_INPUT	0
+#define MAX6697_TEMP_EXT	1
+#define MAX6697_TEMP_MAX	2
+#define MAX6697_TEMP_CRIT	3
+	u32 alarms;
+};
+
+/* Diode fault status bits on MAX6581 are right shifted by one bit */
+static const u8 max6581_alarm_map[] = {
+	 0, 0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15,
+	 16, 17, 18, 19, 20, 21, 22, 23 };
+
+static const struct max6697_chip_data max6697_chip_data[] = {
+	[max6581] = {
+		.channels = 8,
+		.have_crit = 0xff,
+		.have_ext = 0x7f,
+		.have_fault = 0xfe,
+		.valid_conf = MAX6581_CONF_EXTENDED | MAX6697_CONF_TIMEOUT,
+		.alarm_map = max6581_alarm_map,
+	},
+	[max6602] = {
+		.channels = 5,
+		.have_crit = 0x12,
+		.have_ext = 0x02,
+		.have_fault = 0x1e,
+		.valid_conf = MAX6697_CONF_RESISTANCE | MAX6697_CONF_TIMEOUT,
+	},
+	[max6622] = {
+		.channels = 5,
+		.have_crit = 0x12,
+		.have_ext = 0x02,
+		.have_fault = 0x1e,
+		.valid_conf = MAX6697_CONF_RESISTANCE | MAX6697_CONF_TIMEOUT,
+	},
+	[max6636] = {
+		.channels = 7,
+		.have_crit = 0x72,
+		.have_ext = 0x02,
+		.have_fault = 0x7e,
+		.valid_conf = MAX6697_CONF_RESISTANCE | MAX6697_CONF_TIMEOUT,
+	},
+	[max6689] = {
+		.channels = 7,
+		.have_crit = 0x72,
+		.have_ext = 0x02,
+		.have_fault = 0x7e,
+		.valid_conf = MAX6697_CONF_RESISTANCE | MAX6697_CONF_TIMEOUT,
+	},
+	[max6693] = {
+		.channels = 7,
+		.have_crit = 0x72,
+		.have_ext = 0x02,
+		.have_fault = 0x7e,
+		.valid_conf = MAX6697_CONF_RESISTANCE | MAX6693_CONF_BETA |
+		  MAX6697_CONF_TIMEOUT,
+	},
+	[max6694] = {
+		.channels = 5,
+		.have_crit = 0x12,
+		.have_ext = 0x02,
+		.have_fault = 0x1e,
+		.valid_conf = MAX6697_CONF_RESISTANCE | MAX6693_CONF_BETA |
+		  MAX6697_CONF_TIMEOUT,
+	},
+	[max6697] = {
+		.channels = 7,
+		.have_crit = 0x72,
+		.have_ext = 0x02,
+		.have_fault = 0x7e,
+		.valid_conf = MAX6697_CONF_RESISTANCE | MAX6697_CONF_TIMEOUT,
+	},
+	[max6698] = {
+		.channels = 7,
+		.have_crit = 0x72,
+		.have_ext = 0x02,
+		.have_fault = 0x0e,
+		.valid_conf = MAX6697_CONF_RESISTANCE | MAX6697_CONF_TIMEOUT,
+	},
+	[max6699] = {
+		.channels = 5,
+		.have_crit = 0x12,
+		.have_ext = 0x02,
+		.have_fault = 0x1e,
+		.valid_conf = MAX6697_CONF_RESISTANCE | MAX6697_CONF_TIMEOUT,
+	},
+};
+
+static struct max6697_data *max6697_update_device(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct max6697_data *data = i2c_get_clientdata(client);
+	struct max6697_data *ret = data;
+	int val;
+	int i;
+	u32 alarms;
+
+	mutex_lock(&data->update_lock);
+
+	if (data->valid &&
+	    !time_after(jiffies, data->last_updated
+			+ msecs_to_jiffies(data->update_interval)))
+		goto abort;
+
+	for (i = 0; i < data->chip->channels; i++) {
+		if (data->chip->have_ext & (1 << i)) {
+			val = i2c_smbus_read_byte_data(client,
+						       MAX6697_REG_TEMP_EXT[i]);
+			if (unlikely(val < 0)) {
+				ret = ERR_PTR(val);
+				goto abort;
+			}
+			data->temp[i][MAX6697_TEMP_EXT] = val;
+		}
+
+		val = i2c_smbus_read_byte_data(client, MAX6697_REG_TEMP[i]);
+		if (unlikely(val < 0)) {
+			ret = ERR_PTR(val);
+			goto abort;
+		}
+		data->temp[i][MAX6697_TEMP_INPUT] = val;
+
+		val = i2c_smbus_read_byte_data(client, MAX6697_REG_MAX[i]);
+		if (unlikely(val < 0)) {
+			ret = ERR_PTR(val);
+			goto abort;
+		}
+		data->temp[i][MAX6697_TEMP_MAX] = val;
+
+		if (data->chip->have_crit & (1 << i)) {
+			val = i2c_smbus_read_byte_data(client,
+						       MAX6697_REG_CRIT[i]);
+			if (unlikely(val < 0)) {
+				ret = ERR_PTR(val);
+				goto abort;
+			}
+			data->temp[i][MAX6697_TEMP_CRIT] = val;
+		}
+	}
+
+	alarms = 0;
+	for (i = 0; i < 3; i++) {
+		val = i2c_smbus_read_byte_data(client, MAX6697_REG_STAT(i));
+		if (unlikely(val < 0)) {
+			ret = ERR_PTR(val);
+			goto abort;
+		}
+		alarms = (alarms << 8) | val;
+	}
+	data->alarms = alarms;
+	data->last_updated = jiffies;
+	data->valid = true;
+abort:
+	mutex_unlock(&data->update_lock);
+
+	return ret;
+}
+
+static ssize_t show_temp_input(struct device *dev,
+			       struct device_attribute *devattr, char *buf)
+{
+	int index = to_sensor_dev_attr(devattr)->index;
+	struct max6697_data *data = max6697_update_device(dev);
+	int temp;
+
+	if (IS_ERR(data))
+		return PTR_ERR(data);
+
+	temp = (data->temp[index][MAX6697_TEMP_INPUT] - data->temp_offset) << 3;
+	temp |= data->temp[index][MAX6697_TEMP_EXT] >> 5;
+
+	return sprintf(buf, "%d\n", temp * 125);
+}
+
+static ssize_t show_temp(struct device *dev,
+			 struct device_attribute *devattr, char *buf)
+{
+	int nr = to_sensor_dev_attr_2(devattr)->nr;
+	int index = to_sensor_dev_attr_2(devattr)->index;
+	struct max6697_data *data = max6697_update_device(dev);
+	int temp;
+
+	if (IS_ERR(data))
+		return PTR_ERR(data);
+
+	temp = data->temp[nr][index];
+	temp -= data->temp_offset;
+
+	return sprintf(buf, "%d\n", temp * 1000);
+}
+
+static ssize_t show_alarm(struct device *dev, struct device_attribute *attr,
+			  char *buf)
+{
+	int index = to_sensor_dev_attr(attr)->index;
+	struct max6697_data *data = max6697_update_device(dev);
+
+	if (IS_ERR(data))
+		return PTR_ERR(data);
+
+	if (data->chip->alarm_map)
+		index = data->chip->alarm_map[index];
+
+	return sprintf(buf, "%u\n", (data->alarms >> index) & 0x1);
+}
+
+static ssize_t set_temp(struct device *dev,
+			struct device_attribute *devattr,
+			const char *buf, size_t count)
+{
+	int nr = to_sensor_dev_attr_2(devattr)->nr;
+	int index = to_sensor_dev_attr_2(devattr)->index;
+	struct i2c_client *client = to_i2c_client(dev);
+	struct max6697_data *data = i2c_get_clientdata(client);
+	long temp;
+	int ret;
+
+	ret = kstrtol(buf, 10, &temp);
+	if (ret < 0)
+		return ret;
+
+	mutex_lock(&data->update_lock);
+	temp = DIV_ROUND_CLOSEST(temp, 1000) + data->temp_offset;
+	temp = clamp_val(temp, 0, data->type == max6581 ? 255 : 127);
+	data->temp[nr][index] = temp;
+	ret = i2c_smbus_write_byte_data(client,
+					index == 2 ? MAX6697_REG_MAX[nr]
+						   : MAX6697_REG_CRIT[nr],
+					temp);
+	mutex_unlock(&data->update_lock);
+
+	return ret < 0 ? ret : count;
+}
+
+static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, show_temp_input, NULL, 0);
+static SENSOR_DEVICE_ATTR_2(temp1_max, S_IRUGO | S_IWUSR, show_temp, set_temp,
+			    0, MAX6697_TEMP_MAX);
+static SENSOR_DEVICE_ATTR_2(temp1_crit, S_IRUGO | S_IWUSR, show_temp, set_temp,
+			    0, MAX6697_TEMP_CRIT);
+
+static SENSOR_DEVICE_ATTR(temp2_input, S_IRUGO, show_temp_input, NULL, 1);
+static SENSOR_DEVICE_ATTR_2(temp2_max, S_IRUGO | S_IWUSR, show_temp, set_temp,
+			    1, MAX6697_TEMP_MAX);
+static SENSOR_DEVICE_ATTR_2(temp2_crit, S_IRUGO | S_IWUSR, show_temp, set_temp,
+			    1, MAX6697_TEMP_CRIT);
+
+static SENSOR_DEVICE_ATTR(temp3_input, S_IRUGO, show_temp_input, NULL, 2);
+static SENSOR_DEVICE_ATTR_2(temp3_max, S_IRUGO | S_IWUSR, show_temp, set_temp,
+			    2, MAX6697_TEMP_MAX);
+static SENSOR_DEVICE_ATTR_2(temp3_crit, S_IRUGO | S_IWUSR, show_temp, set_temp,
+			    2, MAX6697_TEMP_CRIT);
+
+static SENSOR_DEVICE_ATTR(temp4_input, S_IRUGO, show_temp_input, NULL, 3);
+static SENSOR_DEVICE_ATTR_2(temp4_max, S_IRUGO | S_IWUSR, show_temp, set_temp,
+			    3, MAX6697_TEMP_MAX);
+static SENSOR_DEVICE_ATTR_2(temp4_crit, S_IRUGO | S_IWUSR, show_temp, set_temp,
+			    3, MAX6697_TEMP_CRIT);
+
+static SENSOR_DEVICE_ATTR(temp5_input, S_IRUGO, show_temp_input, NULL, 4);
+static SENSOR_DEVICE_ATTR_2(temp5_max, S_IRUGO | S_IWUSR, show_temp, set_temp,
+			    4, MAX6697_TEMP_MAX);
+static SENSOR_DEVICE_ATTR_2(temp5_crit, S_IRUGO | S_IWUSR, show_temp, set_temp,
+			    4, MAX6697_TEMP_CRIT);
+
+static SENSOR_DEVICE_ATTR(temp6_input, S_IRUGO, show_temp_input, NULL, 5);
+static SENSOR_DEVICE_ATTR_2(temp6_max, S_IRUGO | S_IWUSR, show_temp, set_temp,
+			    5, MAX6697_TEMP_MAX);
+static SENSOR_DEVICE_ATTR_2(temp6_crit, S_IRUGO | S_IWUSR, show_temp, set_temp,
+			    5, MAX6697_TEMP_CRIT);
+
+static SENSOR_DEVICE_ATTR(temp7_input, S_IRUGO, show_temp_input, NULL, 6);
+static SENSOR_DEVICE_ATTR_2(temp7_max, S_IRUGO | S_IWUSR, show_temp, set_temp,
+			    6, MAX6697_TEMP_MAX);
+static SENSOR_DEVICE_ATTR_2(temp7_crit, S_IRUGO | S_IWUSR, show_temp, set_temp,
+			    6, MAX6697_TEMP_CRIT);
+
+static SENSOR_DEVICE_ATTR(temp8_input, S_IRUGO, show_temp_input, NULL, 7);
+static SENSOR_DEVICE_ATTR_2(temp8_max, S_IRUGO | S_IWUSR, show_temp, set_temp,
+			    7, MAX6697_TEMP_MAX);
+static SENSOR_DEVICE_ATTR_2(temp8_crit, S_IRUGO | S_IWUSR, show_temp, set_temp,
+			    7, MAX6697_TEMP_CRIT);
+
+static SENSOR_DEVICE_ATTR(temp1_max_alarm, S_IRUGO, show_alarm, NULL, 22);
+static SENSOR_DEVICE_ATTR(temp2_max_alarm, S_IRUGO, show_alarm, NULL, 16);
+static SENSOR_DEVICE_ATTR(temp3_max_alarm, S_IRUGO, show_alarm, NULL, 17);
+static SENSOR_DEVICE_ATTR(temp4_max_alarm, S_IRUGO, show_alarm, NULL, 18);
+static SENSOR_DEVICE_ATTR(temp5_max_alarm, S_IRUGO, show_alarm, NULL, 19);
+static SENSOR_DEVICE_ATTR(temp6_max_alarm, S_IRUGO, show_alarm, NULL, 20);
+static SENSOR_DEVICE_ATTR(temp7_max_alarm, S_IRUGO, show_alarm, NULL, 21);
+static SENSOR_DEVICE_ATTR(temp8_max_alarm, S_IRUGO, show_alarm, NULL, 23);
+
+static SENSOR_DEVICE_ATTR(temp1_crit_alarm, S_IRUGO, show_alarm, NULL, 14);
+static SENSOR_DEVICE_ATTR(temp2_crit_alarm, S_IRUGO, show_alarm, NULL, 8);
+static SENSOR_DEVICE_ATTR(temp3_crit_alarm, S_IRUGO, show_alarm, NULL, 9);
+static SENSOR_DEVICE_ATTR(temp4_crit_alarm, S_IRUGO, show_alarm, NULL, 10);
+static SENSOR_DEVICE_ATTR(temp5_crit_alarm, S_IRUGO, show_alarm, NULL, 11);
+static SENSOR_DEVICE_ATTR(temp6_crit_alarm, S_IRUGO, show_alarm, NULL, 12);
+static SENSOR_DEVICE_ATTR(temp7_crit_alarm, S_IRUGO, show_alarm, NULL, 13);
+static SENSOR_DEVICE_ATTR(temp8_crit_alarm, S_IRUGO, show_alarm, NULL, 15);
+
+static SENSOR_DEVICE_ATTR(temp2_fault, S_IRUGO, show_alarm, NULL, 1);
+static SENSOR_DEVICE_ATTR(temp3_fault, S_IRUGO, show_alarm, NULL, 2);
+static SENSOR_DEVICE_ATTR(temp4_fault, S_IRUGO, show_alarm, NULL, 3);
+static SENSOR_DEVICE_ATTR(temp5_fault, S_IRUGO, show_alarm, NULL, 4);
+static SENSOR_DEVICE_ATTR(temp6_fault, S_IRUGO, show_alarm, NULL, 5);
+static SENSOR_DEVICE_ATTR(temp7_fault, S_IRUGO, show_alarm, NULL, 6);
+static SENSOR_DEVICE_ATTR(temp8_fault, S_IRUGO, show_alarm, NULL, 7);
+
+static struct attribute *max6697_attributes[8][7] = {
+	{
+		&sensor_dev_attr_temp1_input.dev_attr.attr,
+		&sensor_dev_attr_temp1_max.dev_attr.attr,
+		&sensor_dev_attr_temp1_max_alarm.dev_attr.attr,
+		&sensor_dev_attr_temp1_crit.dev_attr.attr,
+		&sensor_dev_attr_temp1_crit_alarm.dev_attr.attr,
+		NULL
+	}, {
+		&sensor_dev_attr_temp2_input.dev_attr.attr,
+		&sensor_dev_attr_temp2_max.dev_attr.attr,
+		&sensor_dev_attr_temp2_max_alarm.dev_attr.attr,
+		&sensor_dev_attr_temp2_crit.dev_attr.attr,
+		&sensor_dev_attr_temp2_crit_alarm.dev_attr.attr,
+		&sensor_dev_attr_temp2_fault.dev_attr.attr,
+		NULL
+	}, {
+		&sensor_dev_attr_temp3_input.dev_attr.attr,
+		&sensor_dev_attr_temp3_max.dev_attr.attr,
+		&sensor_dev_attr_temp3_max_alarm.dev_attr.attr,
+		&sensor_dev_attr_temp3_crit.dev_attr.attr,
+		&sensor_dev_attr_temp3_crit_alarm.dev_attr.attr,
+		&sensor_dev_attr_temp3_fault.dev_attr.attr,
+		NULL
+	}, {
+		&sensor_dev_attr_temp4_input.dev_attr.attr,
+		&sensor_dev_attr_temp4_max.dev_attr.attr,
+		&sensor_dev_attr_temp4_max_alarm.dev_attr.attr,
+		&sensor_dev_attr_temp4_crit.dev_attr.attr,
+		&sensor_dev_attr_temp4_crit_alarm.dev_attr.attr,
+		&sensor_dev_attr_temp4_fault.dev_attr.attr,
+		NULL
+	}, {
+		&sensor_dev_attr_temp5_input.dev_attr.attr,
+		&sensor_dev_attr_temp5_max.dev_attr.attr,
+		&sensor_dev_attr_temp5_max_alarm.dev_attr.attr,
+		&sensor_dev_attr_temp5_crit.dev_attr.attr,
+		&sensor_dev_attr_temp5_crit_alarm.dev_attr.attr,
+		&sensor_dev_attr_temp5_fault.dev_attr.attr,
+		NULL
+	}, {
+		&sensor_dev_attr_temp6_input.dev_attr.attr,
+		&sensor_dev_attr_temp6_max.dev_attr.attr,
+		&sensor_dev_attr_temp6_max_alarm.dev_attr.attr,
+		&sensor_dev_attr_temp6_crit.dev_attr.attr,
+		&sensor_dev_attr_temp6_crit_alarm.dev_attr.attr,
+		&sensor_dev_attr_temp6_fault.dev_attr.attr,
+		NULL
+	}, {
+		&sensor_dev_attr_temp7_input.dev_attr.attr,
+		&sensor_dev_attr_temp7_max.dev_attr.attr,
+		&sensor_dev_attr_temp7_max_alarm.dev_attr.attr,
+		&sensor_dev_attr_temp7_crit.dev_attr.attr,
+		&sensor_dev_attr_temp7_crit_alarm.dev_attr.attr,
+		&sensor_dev_attr_temp7_fault.dev_attr.attr,
+		NULL
+	}, {
+		&sensor_dev_attr_temp8_input.dev_attr.attr,
+		&sensor_dev_attr_temp8_max.dev_attr.attr,
+		&sensor_dev_attr_temp8_max_alarm.dev_attr.attr,
+		&sensor_dev_attr_temp8_crit.dev_attr.attr,
+		&sensor_dev_attr_temp8_crit_alarm.dev_attr.attr,
+		&sensor_dev_attr_temp8_fault.dev_attr.attr,
+		NULL
+	}
+};
+
+static const struct attribute_group max6697_group[8] = {
+	{ .attrs = max6697_attributes[0] },
+	{ .attrs = max6697_attributes[1] },
+	{ .attrs = max6697_attributes[2] },
+	{ .attrs = max6697_attributes[3] },
+	{ .attrs = max6697_attributes[4] },
+	{ .attrs = max6697_attributes[5] },
+	{ .attrs = max6697_attributes[6] },
+	{ .attrs = max6697_attributes[7] },
+};
+
+static void max6697_get_config_of(struct device_node *node,
+				  struct max6697_platform_data *pdata)
+{
+	int len;
+	const __be32 *prop;
+
+	prop = of_get_property(node, "smbus-timeout-disable", &len);
+	if (prop)
+		pdata->smbus_timeout_disable = true;
+	prop = of_get_property(node, "extended-range-enable", &len);
+	if (prop)
+		pdata->extended_range_enable = true;
+	prop = of_get_property(node, "beta-compensation-enable", &len);
+	if (prop)
+		pdata->beta_compensation = true;
+	prop = of_get_property(node, "alert-mask", &len);
+	if (prop && len == sizeof(u32))
+		pdata->alert_mask = be32_to_cpu(prop[0]);
+	prop = of_get_property(node, "over-temperature-mask", &len);
+	if (prop && len == sizeof(u32))
+		pdata->over_temperature_mask = be32_to_cpu(prop[0]);
+	prop = of_get_property(node, "resistance-cancellation", &len);
+	if (prop) {
+		if (len == sizeof(u32))
+			pdata->resistance_cancellation = be32_to_cpu(prop[0]);
+		else
+			pdata->resistance_cancellation = 0xfe;
+	}
+	prop = of_get_property(node, "transistor-ideality", &len);
+	if (prop && len == 2 * sizeof(u32)) {
+			pdata->ideality_mask = be32_to_cpu(prop[0]);
+			pdata->ideality_value = be32_to_cpu(prop[1]);
+	}
+}
+
+static int max6697_init_chip(struct i2c_client *client)
+{
+	struct max6697_data *data = i2c_get_clientdata(client);
+	struct max6697_platform_data *pdata = dev_get_platdata(&client->dev);
+	struct max6697_platform_data p;
+	const struct max6697_chip_data *chip = data->chip;
+	int factor = chip->channels;
+	int ret, reg;
+
+	/*
+	 * Don't touch configuration if neither platform data nor OF
+	 * configuration was specified. If that is the case, use the
+	 * current chip configuration.
+	 */
+	if (!pdata && !client->dev.of_node) {
+		reg = i2c_smbus_read_byte_data(client, MAX6697_REG_CONFIG);
+		if (reg < 0)
+			return reg;
+		if (data->type == max6581) {
+			if (reg & MAX6581_CONF_EXTENDED)
+				data->temp_offset = 64;
+			reg = i2c_smbus_read_byte_data(client,
+						       MAX6581_REG_RESISTANCE);
+			if (reg < 0)
+				return reg;
+			factor += hweight8(reg);
+		} else {
+			if (reg & MAX6697_CONF_RESISTANCE)
+				factor++;
+		}
+		goto done;
+	}
+
+	if (client->dev.of_node) {
+		memset(&p, 0, sizeof(p));
+		max6697_get_config_of(client->dev.of_node, &p);
+		pdata = &p;
+	}
+
+	reg = 0;
+	if (pdata->smbus_timeout_disable &&
+	    (chip->valid_conf & MAX6697_CONF_TIMEOUT)) {
+		reg |= MAX6697_CONF_TIMEOUT;
+	}
+	if (pdata->extended_range_enable &&
+	    (chip->valid_conf & MAX6581_CONF_EXTENDED)) {
+		reg |= MAX6581_CONF_EXTENDED;
+		data->temp_offset = 64;
+	}
+	if (pdata->resistance_cancellation &&
+	    (chip->valid_conf & MAX6697_CONF_RESISTANCE)) {
+		reg |= MAX6697_CONF_RESISTANCE;
+		factor++;
+	}
+	if (pdata->beta_compensation &&
+	    (chip->valid_conf & MAX6693_CONF_BETA)) {
+		reg |= MAX6693_CONF_BETA;
+	}
+
+	ret = i2c_smbus_write_byte_data(client, MAX6697_REG_CONFIG, reg);
+	if (ret < 0)
+		return ret;
+
+	ret = i2c_smbus_write_byte_data(client, MAX6697_REG_ALERT_MASK,
+					MAX6697_MAP_BITS(pdata->alert_mask));
+	if (ret < 0)
+		return ret;
+
+	ret = i2c_smbus_write_byte_data(client, MAX6697_REG_OVERT_MASK,
+				MAX6697_MAP_BITS(pdata->over_temperature_mask));
+	if (ret < 0)
+		return ret;
+
+	if (data->type == max6581) {
+		factor += hweight8(pdata->resistance_cancellation >> 1);
+		ret = i2c_smbus_write_byte_data(client, MAX6581_REG_RESISTANCE,
+					pdata->resistance_cancellation >> 1);
+		if (ret < 0)
+			return ret;
+		ret = i2c_smbus_write_byte_data(client, MAX6581_REG_IDEALITY,
+						pdata->ideality_mask >> 1);
+		if (ret < 0)
+			return ret;
+		ret = i2c_smbus_write_byte_data(client,
+						MAX6581_REG_IDEALITY_SELECT,
+						pdata->ideality_value);
+		if (ret < 0)
+			return ret;
+	}
+done:
+	data->update_interval = factor * MAX6697_CONV_TIME;
+	return 0;
+}
+
+static void max6697_remove_files(struct i2c_client *client)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(max6697_group); i++)
+		sysfs_remove_group(&client->dev.kobj, &max6697_group[i]);
+}
+
+static int max6697_probe(struct i2c_client *client,
+			 const struct i2c_device_id *id)
+{
+	struct i2c_adapter *adapter = client->adapter;
+	struct device *dev = &client->dev;
+	struct max6697_data *data;
+	int i, err;
+
+	if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA))
+		return -ENODEV;
+
+	data = devm_kzalloc(dev, sizeof(struct max6697_data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	data->type = id->driver_data;
+	data->chip = &max6697_chip_data[data->type];
+
+	i2c_set_clientdata(client, data);
+	mutex_init(&data->update_lock);
+
+	err = max6697_init_chip(client);
+	if (err)
+		return err;
+
+	for (i = 0; i < data->chip->channels; i++) {
+		err = sysfs_create_file(&dev->kobj,
+					max6697_attributes[i][0]);
+		if (err)
+			goto error;
+		err = sysfs_create_file(&dev->kobj,
+					max6697_attributes[i][1]);
+		if (err)
+			goto error;
+		err = sysfs_create_file(&dev->kobj,
+					max6697_attributes[i][2]);
+		if (err)
+			goto error;
+
+		if (data->chip->have_crit & (1 << i)) {
+			err = sysfs_create_file(&dev->kobj,
+						max6697_attributes[i][3]);
+			if (err)
+				goto error;
+			err = sysfs_create_file(&dev->kobj,
+						max6697_attributes[i][4]);
+			if (err)
+				goto error;
+		}
+		if (data->chip->have_fault & (1 << i)) {
+			err = sysfs_create_file(&dev->kobj,
+						max6697_attributes[i][5]);
+			if (err)
+				goto error;
+		}
+	}
+
+	data->hwmon_dev = hwmon_device_register(dev);
+	if (IS_ERR(data->hwmon_dev)) {
+		err = PTR_ERR(data->hwmon_dev);
+		goto error;
+	}
+
+	return 0;
+
+error:
+	max6697_remove_files(client);
+	return err;
+}
+
+static int max6697_remove(struct i2c_client *client)
+{
+	struct max6697_data *data = i2c_get_clientdata(client);
+
+	hwmon_device_unregister(data->hwmon_dev);
+	max6697_remove_files(client);
+
+	return 0;
+}
+
+static const struct i2c_device_id max6697_id[] = {
+	{ "max6581", max6581 },
+	{ "max6602", max6602 },
+	{ "max6622", max6622 },
+	{ "max6636", max6636 },
+	{ "max6689", max6689 },
+	{ "max6693", max6693 },
+	{ "max6694", max6694 },
+	{ "max6697", max6697 },
+	{ "max6698", max6698 },
+	{ "max6699", max6699 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, max6697_id);
+
+static struct i2c_driver max6697_driver = {
+	.class = I2C_CLASS_HWMON,
+	.driver = {
+		.name	= "max6697",
+	},
+	.probe = max6697_probe,
+	.remove	= max6697_remove,
+	.id_table = max6697_id,
+};
+
+module_i2c_driver(max6697_driver);
+
+MODULE_AUTHOR("Guenter Roeck <linux@roeck-us.net>");
+MODULE_DESCRIPTION("MAX6697 temperature sensor driver");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/platform_data/max6697.h b/include/linux/platform_data/max6697.h
new file mode 100644
index 000000000000..ed9d3b3daf02
--- /dev/null
+++ b/include/linux/platform_data/max6697.h
@@ -0,0 +1,36 @@
+/*
+ * max6697.h
+ *     Copyright (c) 2012 Guenter Roeck <linux@roeck-us.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef MAX6697_H
+#define MAX6697_H
+
+#include <linux/types.h>
+
+/*
+ * For all bit masks:
+ * bit 0:    local temperature
+ * bit 1..7: remote temperatures
+ */
+struct max6697_platform_data {
+	bool smbus_timeout_disable;	/* set to disable SMBus timeouts */
+	bool extended_range_enable;	/* set to enable extended temp range */
+	bool beta_compensation;		/* set to enable beta compensation */
+	u8 alert_mask;			/* set bit to 1 to disable alert */
+	u8 over_temperature_mask;	/* set bit to 1 to disable */
+	u8 resistance_cancellation;	/* set bit to 0 to disable
+					 * bit mask for MAX6581,
+					 * boolean for other chips
+					 */
+	u8 ideality_mask;		/* set bit to 0 to disable */
+	u8 ideality_value;		/* transistor ideality as per
+					 * MAX6581 datasheet
+					 */
+};
+
+#endif /* MAX6697_H */
-- 
cgit v1.2.3


From ca99ca14c95ae49fb4c9cd3abf5f84d11a7e8a61 Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Tue, 5 Feb 2013 08:05:43 +0000
Subject: netpoll: protect napi_poll and poll_controller during
 dev_[open|close]

Ivan Vercera was recently backporting commit
9c13cb8bb477a83b9a3c9e5a5478a4e21294a760 to a RHEL kernel, and I noticed that,
while this patch protects the tg3 driver from having its ndo_poll_controller
routine called during device initalization, it does nothing for the driver
during shutdown. I.e. it would be entirely possible to have the
ndo_poll_controller method (or subsequently the ndo_poll) routine called for a
driver in the netpoll path on CPU A while in parallel on CPU B, the ndo_close or
ndo_open routine could be called.  Given that the two latter routines tend to
initizlize and free many data structures that the former two rely on, the result
can easily be data corruption or various other crashes.  Furthermore, it seems
that this is potentially a problem with all net drivers that support netpoll,
and so this should ideally be fixed in a common path.

As Ben H Pointed out to me, we can't preform dev_open/dev_close in atomic
context, so I've come up with this solution.  We can use a mutex to sleep in
open/close paths and just do a mutex_trylock in the napi poll path and abandon
the poll attempt if we're locked, as we'll just retry the poll on the next send
anyway.

I've tested this here by flooding netconsole with messages on a system whos nic
driver I modfied to periodically return NETDEV_TX_BUSY, so that the netpoll tx
workqueue would be forced to send frames and poll the device.  While this was
going on I rapidly ifdown/up'ed the interface and watched for any problems.
I've not found any.

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
CC: Ivan Vecera <ivecera@redhat.com>
CC: "David S. Miller" <davem@davemloft.net>
CC: Ben Hutchings <bhutchings@solarflare.com>
CC: Francois Romieu <romieu@fr.zoreil.com>
CC: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netpoll.h | 11 ++++++++++-
 net/core/dev.c          | 27 ++++++++++++++++++++++++++-
 net/core/netpoll.c      | 40 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 76 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h
index f54c3bb6a22b..ab856d507b7e 100644
--- a/include/linux/netpoll.h
+++ b/include/linux/netpoll.h
@@ -38,8 +38,9 @@ struct netpoll {
 struct netpoll_info {
 	atomic_t refcnt;
 
-	int rx_flags;
+	unsigned long rx_flags;
 	spinlock_t rx_lock;
+	struct mutex dev_lock;
 	struct list_head rx_np; /* netpolls that registered an rx_hook */
 
 	struct sk_buff_head neigh_tx; /* list of neigh requests to reply to */
@@ -51,6 +52,14 @@ struct netpoll_info {
 	struct rcu_head rcu;
 };
 
+#ifdef CONFIG_NETPOLL
+extern int netpoll_rx_disable(struct net_device *dev);
+extern void netpoll_rx_enable(struct net_device *dev);
+#else
+static inline int netpoll_rx_disable(struct net_device *dev) { return 0; }
+static inline void netpoll_rx_enable(struct net_device *dev) { return; }
+#endif
+
 void netpoll_send_udp(struct netpoll *np, const char *msg, int len);
 void netpoll_print_options(struct netpoll *np);
 int netpoll_parse_options(struct netpoll *np, char *opt);
diff --git a/net/core/dev.c b/net/core/dev.c
index e04bfdc9e3e4..2b275a7b8677 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1266,6 +1266,14 @@ static int __dev_open(struct net_device *dev)
 	if (!netif_device_present(dev))
 		return -ENODEV;
 
+	/* Block netpoll from trying to do any rx path servicing.
+	 * If we don't do this there is a chance ndo_poll_controller
+	 * or ndo_poll may be running while we open the device
+	 */
+	ret = netpoll_rx_disable(dev);
+	if (ret)
+		return ret;
+
 	ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev);
 	ret = notifier_to_errno(ret);
 	if (ret)
@@ -1279,6 +1287,8 @@ static int __dev_open(struct net_device *dev)
 	if (!ret && ops->ndo_open)
 		ret = ops->ndo_open(dev);
 
+	netpoll_rx_enable(dev);
+
 	if (ret)
 		clear_bit(__LINK_STATE_START, &dev->state);
 	else {
@@ -1370,9 +1380,16 @@ static int __dev_close(struct net_device *dev)
 	int retval;
 	LIST_HEAD(single);
 
+	/* Temporarily disable netpoll until the interface is down */
+	retval = netpoll_rx_disable(dev);
+	if (retval)
+		return retval;
+
 	list_add(&dev->unreg_list, &single);
 	retval = __dev_close_many(&single);
 	list_del(&single);
+
+	netpoll_rx_enable(dev);
 	return retval;
 }
 
@@ -1408,14 +1425,22 @@ static int dev_close_many(struct list_head *head)
  */
 int dev_close(struct net_device *dev)
 {
+	int ret = 0;
 	if (dev->flags & IFF_UP) {
 		LIST_HEAD(single);
 
+		/* Block netpoll rx while the interface is going down */
+		ret = netpoll_rx_disable(dev);
+		if (ret)
+			return ret;
+
 		list_add(&dev->unreg_list, &single);
 		dev_close_many(&single);
 		list_del(&single);
+
+		netpoll_rx_enable(dev);
 	}
-	return 0;
+	return ret;
 }
 EXPORT_SYMBOL(dev_close);
 
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 331ccb90f915..edcd9ad95304 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -47,6 +47,8 @@ static struct sk_buff_head skb_pool;
 
 static atomic_t trapped;
 
+static struct srcu_struct netpoll_srcu;
+
 #define USEC_PER_POLL	50
 #define NETPOLL_RX_ENABLED  1
 #define NETPOLL_RX_DROP     2
@@ -199,6 +201,13 @@ static void netpoll_poll_dev(struct net_device *dev)
 	const struct net_device_ops *ops;
 	struct netpoll_info *ni = rcu_dereference_bh(dev->npinfo);
 
+	/* Don't do any rx activity if the dev_lock mutex is held
+	 * the dev_open/close paths use this to block netpoll activity
+	 * while changing device state
+	 */
+	if (!mutex_trylock(&dev->npinfo->dev_lock))
+		return;
+
 	if (!dev || !netif_running(dev))
 		return;
 
@@ -211,6 +220,8 @@ static void netpoll_poll_dev(struct net_device *dev)
 
 	poll_napi(dev);
 
+	mutex_unlock(&dev->npinfo->dev_lock);
+
 	if (dev->flags & IFF_SLAVE) {
 		if (ni) {
 			struct net_device *bond_dev;
@@ -231,6 +242,31 @@ static void netpoll_poll_dev(struct net_device *dev)
 	zap_completion_queue();
 }
 
+int netpoll_rx_disable(struct net_device *dev)
+{
+	struct netpoll_info *ni;
+	int idx;
+	might_sleep();
+	idx = srcu_read_lock(&netpoll_srcu);
+	ni = srcu_dereference(dev->npinfo, &netpoll_srcu);
+	if (ni)
+		mutex_lock(&ni->dev_lock);
+	srcu_read_unlock(&netpoll_srcu, idx);
+	return 0;
+}
+EXPORT_SYMBOL(netpoll_rx_disable);
+
+void netpoll_rx_enable(struct net_device *dev)
+{
+	struct netpoll_info *ni;
+	rcu_read_lock();
+	ni = rcu_dereference(dev->npinfo);
+	if (ni)
+		mutex_unlock(&ni->dev_lock);
+	rcu_read_unlock();
+}
+EXPORT_SYMBOL(netpoll_rx_enable);
+
 static void refill_skbs(void)
 {
 	struct sk_buff *skb;
@@ -1004,6 +1040,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp)
 		INIT_LIST_HEAD(&npinfo->rx_np);
 
 		spin_lock_init(&npinfo->rx_lock);
+		mutex_init(&npinfo->dev_lock);
 		skb_queue_head_init(&npinfo->neigh_tx);
 		skb_queue_head_init(&npinfo->txq);
 		INIT_DELAYED_WORK(&npinfo->tx_work, queue_process);
@@ -1169,6 +1206,7 @@ EXPORT_SYMBOL(netpoll_setup);
 static int __init netpoll_init(void)
 {
 	skb_queue_head_init(&skb_pool);
+	init_srcu_struct(&netpoll_srcu);
 	return 0;
 }
 core_initcall(netpoll_init);
@@ -1208,6 +1246,8 @@ void __netpoll_cleanup(struct netpoll *np)
 		spin_unlock_irqrestore(&npinfo->rx_lock, flags);
 	}
 
+	synchronize_srcu(&netpoll_srcu);
+
 	if (atomic_dec_and_test(&npinfo->refcnt)) {
 		const struct net_device_ops *ops;
 
-- 
cgit v1.2.3


From 3b72c2fe0c6bbec42ed7f899931daef227b80322 Mon Sep 17 00:00:00 2001
From: Mugunthan V N <mugunthanvnm@ti.com>
Date: Tue, 5 Feb 2013 08:26:48 +0000
Subject: drivers: net:ethernet: cpsw: add support for VLAN

adding support for VLAN interface for cpsw.

CPSW VLAN Capability
* Can filter VLAN packets in Hardware

Signed-off-by: Mugunthan V N <mugunthanvnm@ti.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ti/cpsw.c     | 106 ++++++++++++++++++++++++++++++++++++-
 drivers/net/ethernet/ti/cpsw_ale.h |   4 ++
 include/linux/platform_data/cpsw.h |   1 +
 3 files changed, 109 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index 534bf7bc34db..888708ceb13c 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -32,6 +32,7 @@
 #include <linux/of.h>
 #include <linux/of_net.h>
 #include <linux/of_device.h>
+#include <linux/if_vlan.h>
 
 #include <linux/platform_data/cpsw.h>
 
@@ -118,6 +119,9 @@ do {								\
 #define TX_PRIORITY_MAPPING	0x33221100
 #define CPDMA_TX_PRIORITY_MAP	0x76543210
 
+#define CPSW_VLAN_AWARE		BIT(1)
+#define CPSW_ALE_VLAN_AWARE	1
+
 #define cpsw_enable_irq(priv)	\
 	do {			\
 		u32 i;		\
@@ -607,14 +611,40 @@ static void cpsw_slave_open(struct cpsw_slave *slave, struct cpsw_priv *priv)
 	}
 }
 
+static inline void cpsw_add_default_vlan(struct cpsw_priv *priv)
+{
+	const int vlan = priv->data.default_vlan;
+	const int port = priv->host_port;
+	u32 reg;
+	int i;
+
+	reg = (priv->version == CPSW_VERSION_1) ? CPSW1_PORT_VLAN :
+	       CPSW2_PORT_VLAN;
+
+	writel(vlan, &priv->host_port_regs->port_vlan);
+
+	for (i = 0; i < 2; i++)
+		slave_write(priv->slaves + i, vlan, reg);
+
+	cpsw_ale_add_vlan(priv->ale, vlan, ALE_ALL_PORTS << port,
+			  ALE_ALL_PORTS << port, ALE_ALL_PORTS << port,
+			  (ALE_PORT_1 | ALE_PORT_2) << port);
+}
+
 static void cpsw_init_host_port(struct cpsw_priv *priv)
 {
+	u32 control_reg;
+
 	/* soft reset the controller and initialize ale */
 	soft_reset("cpsw", &priv->regs->soft_reset);
 	cpsw_ale_start(priv->ale);
 
 	/* switch to vlan unaware mode */
-	cpsw_ale_control_set(priv->ale, 0, ALE_VLAN_AWARE, 0);
+	cpsw_ale_control_set(priv->ale, priv->host_port, ALE_VLAN_AWARE,
+			     CPSW_ALE_VLAN_AWARE);
+	control_reg = readl(&priv->regs->control);
+	control_reg |= CPSW_VLAN_AWARE;
+	writel(control_reg, &priv->regs->control);
 
 	/* setup host port priority mapping */
 	__raw_writel(CPDMA_TX_PRIORITY_MAP,
@@ -650,6 +680,9 @@ static int cpsw_ndo_open(struct net_device *ndev)
 	cpsw_init_host_port(priv);
 	for_each_slave(priv, cpsw_slave_open, priv);
 
+	/* Add default VLAN */
+	cpsw_add_default_vlan(priv);
+
 	/* setup tx dma to fixed prio and zero offset */
 	cpdma_control_set(priv->dma, CPDMA_TX_PRIO_FIXED, 1);
 	cpdma_control_set(priv->dma, CPDMA_RX_BUFFER_OFFSET, 0);
@@ -933,6 +966,73 @@ static void cpsw_ndo_poll_controller(struct net_device *ndev)
 }
 #endif
 
+static inline int cpsw_add_vlan_ale_entry(struct cpsw_priv *priv,
+				unsigned short vid)
+{
+	int ret;
+
+	ret = cpsw_ale_add_vlan(priv->ale, vid,
+				ALE_ALL_PORTS << priv->host_port,
+				0, ALE_ALL_PORTS << priv->host_port,
+				(ALE_PORT_1 | ALE_PORT_2) << priv->host_port);
+	if (ret != 0)
+		return ret;
+
+	ret = cpsw_ale_add_ucast(priv->ale, priv->mac_addr,
+				 priv->host_port, ALE_VLAN, vid);
+	if (ret != 0)
+		goto clean_vid;
+
+	ret = cpsw_ale_add_mcast(priv->ale, priv->ndev->broadcast,
+				 ALE_ALL_PORTS << priv->host_port,
+				 ALE_VLAN, vid, 0);
+	if (ret != 0)
+		goto clean_vlan_ucast;
+	return 0;
+
+clean_vlan_ucast:
+	cpsw_ale_del_ucast(priv->ale, priv->mac_addr,
+			    priv->host_port, ALE_VLAN, vid);
+clean_vid:
+	cpsw_ale_del_vlan(priv->ale, vid, 0);
+	return ret;
+}
+
+static int cpsw_ndo_vlan_rx_add_vid(struct net_device *ndev,
+		unsigned short vid)
+{
+	struct cpsw_priv *priv = netdev_priv(ndev);
+
+	if (vid == priv->data.default_vlan)
+		return 0;
+
+	dev_info(priv->dev, "Adding vlanid %d to vlan filter\n", vid);
+	return cpsw_add_vlan_ale_entry(priv, vid);
+}
+
+static int cpsw_ndo_vlan_rx_kill_vid(struct net_device *ndev,
+		unsigned short vid)
+{
+	struct cpsw_priv *priv = netdev_priv(ndev);
+	int ret;
+
+	if (vid == priv->data.default_vlan)
+		return 0;
+
+	dev_info(priv->dev, "removing vlanid %d from vlan filter\n", vid);
+	ret = cpsw_ale_del_vlan(priv->ale, vid, 0);
+	if (ret != 0)
+		return ret;
+
+	ret = cpsw_ale_del_ucast(priv->ale, priv->mac_addr,
+				 priv->host_port, ALE_VLAN, vid);
+	if (ret != 0)
+		return ret;
+
+	return cpsw_ale_del_mcast(priv->ale, priv->ndev->broadcast,
+				  0, ALE_VLAN, vid);
+}
+
 static const struct net_device_ops cpsw_netdev_ops = {
 	.ndo_open		= cpsw_ndo_open,
 	.ndo_stop		= cpsw_ndo_stop,
@@ -947,6 +1047,8 @@ static const struct net_device_ops cpsw_netdev_ops = {
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller	= cpsw_ndo_poll_controller,
 #endif
+	.ndo_vlan_rx_add_vid	= cpsw_ndo_vlan_rx_add_vid,
+	.ndo_vlan_rx_kill_vid	= cpsw_ndo_vlan_rx_kill_vid,
 };
 
 static void cpsw_get_drvinfo(struct net_device *ndev,
@@ -1354,7 +1456,7 @@ static int cpsw_probe(struct platform_device *pdev)
 		k++;
 	}
 
-	ndev->flags |= IFF_ALLMULTI;	/* see cpsw_ndo_change_rx_flags() */
+	ndev->features |= NETIF_F_HW_VLAN_FILTER;
 
 	ndev->netdev_ops = &cpsw_netdev_ops;
 	SET_ETHTOOL_OPS(ndev, &cpsw_ethtool_ops);
diff --git a/drivers/net/ethernet/ti/cpsw_ale.h b/drivers/net/ethernet/ti/cpsw_ale.h
index a002417f952b..30daa1265f0c 100644
--- a/drivers/net/ethernet/ti/cpsw_ale.h
+++ b/drivers/net/ethernet/ti/cpsw_ale.h
@@ -69,6 +69,10 @@ enum cpsw_ale_port_state {
 #define ALE_SUPER			BIT(2)
 #define ALE_VLAN			BIT(3)
 
+#define ALE_PORT_HOST			BIT(0)
+#define ALE_PORT_1			BIT(1)
+#define ALE_PORT_2			BIT(2)
+
 #define ALE_MCAST_FWD			0
 #define ALE_MCAST_BLOCK_LEARN_FWD	1
 #define ALE_MCAST_FWD_LEARN		2
diff --git a/include/linux/platform_data/cpsw.h b/include/linux/platform_data/cpsw.h
index 24368a2e8b87..e962cfd552e3 100644
--- a/include/linux/platform_data/cpsw.h
+++ b/include/linux/platform_data/cpsw.h
@@ -35,6 +35,7 @@ struct cpsw_platform_data {
 	u32	bd_ram_size;  /*buffer descriptor ram size */
 	u32	rx_descs;	/* Number of Rx Descriptios */
 	u32	mac_control;	/* Mac control register */
+	u16	default_vlan;	/* Def VLAN for ALE lookup in VLAN aware mode*/
 };
 
 #endif /* __CPSW_H__ */
-- 
cgit v1.2.3


From e185483e6b84c127d0b1c890b6b703701ae52d35 Mon Sep 17 00:00:00 2001
From: Flavio Leitner <fbl@redhat.com>
Date: Tue, 5 Feb 2013 09:30:55 +0000
Subject: team: allow userspace to take control over carrier

Some modes don't require any special carrier handling so
in these cases, the kernel can control the carrier as for
any other interface.  However, some other modes, e.g. lacp,
requires more than just that, so userspace needs to control
the carrier itself.

The daemon today is ready to control it, but the kernel
still can change it based on events.

This fix so that either kernel or userspace is controlling
the carrier.

Signed-off-by: Flavio Leitner <fbl@redhat.com>
Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/team/team.c | 8 ++++++++
 include/linux/if_team.h | 1 +
 2 files changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index 694ccf6d71a3..05c5efe84591 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -508,6 +508,7 @@ static bool team_is_mode_set(struct team *team)
 
 static void team_set_no_mode(struct team *team)
 {
+	team->user_carrier_enabled = false;
 	team->mode = &__team_no_mode;
 }
 
@@ -1710,6 +1711,10 @@ static netdev_features_t team_fix_features(struct net_device *dev,
 
 static int team_change_carrier(struct net_device *dev, bool new_carrier)
 {
+	struct team *team = netdev_priv(dev);
+
+	team->user_carrier_enabled = true;
+
 	if (new_carrier)
 		netif_carrier_on(dev);
 	else
@@ -2573,6 +2578,9 @@ static void __team_carrier_check(struct team *team)
 	struct team_port *port;
 	bool team_linkup;
 
+	if (team->user_carrier_enabled)
+		return;
+
 	team_linkup = false;
 	list_for_each_entry(port, &team->port_list, list) {
 		if (port->linkup) {
diff --git a/include/linux/if_team.h b/include/linux/if_team.h
index 0245def2aa93..4648d8021244 100644
--- a/include/linux/if_team.h
+++ b/include/linux/if_team.h
@@ -186,6 +186,7 @@ struct team {
 
 	const struct team_mode *mode;
 	struct team_mode_ops ops;
+	bool user_carrier_enabled;
 	bool queue_override_enabled;
 	struct list_head *qom_lists; /* array of queue override mapping lists */
 	long mode_priv[TEAM_MODE_PRIV_LONGS];
-- 
cgit v1.2.3


From 12b0004d1d1e2a9aa667412d479041e403bcafae Mon Sep 17 00:00:00 2001
From: Cong Wang <amwang@redhat.com>
Date: Tue, 5 Feb 2013 16:36:38 +0000
Subject: net: adjust skb_gso_segment() for calling in rx path

skb_gso_segment() is almost always called in tx path,
except for openvswitch. It calls this function when
it receives the packet and tries to queue it to user-space.
In this special case, the ->ip_summed check inside
skb_gso_segment() is no longer true, as ->ip_summed value
has different meanings on rx path.

This patch adjusts skb_gso_segment() so that we can at least
avoid such warnings on checksum.

Cc: Jesse Gross <jesse@nicira.com>
Cc: David S. Miller <davem@davemloft.net>
Signed-off-by: Cong Wang <amwang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h  | 11 +++++++++--
 net/core/dev.c             | 21 ++++++++++++++++-----
 net/openvswitch/datapath.c |  2 +-
 3 files changed, 26 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 85b0949d9946..ab2774eb49e8 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2662,8 +2662,15 @@ extern int netdev_master_upper_dev_link(struct net_device *dev,
 extern void netdev_upper_dev_unlink(struct net_device *dev,
 				    struct net_device *upper_dev);
 extern int skb_checksum_help(struct sk_buff *skb);
-extern struct sk_buff *skb_gso_segment(struct sk_buff *skb,
-	netdev_features_t features);
+extern struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
+	netdev_features_t features, bool tx_path);
+
+static inline
+struct sk_buff *skb_gso_segment(struct sk_buff *skb, netdev_features_t features)
+{
+	return __skb_gso_segment(skb, features, true);
+}
+
 #ifdef CONFIG_BUG
 extern void netdev_rx_csum_fault(struct net_device *dev);
 #else
diff --git a/net/core/dev.c b/net/core/dev.c
index 2b275a7b8677..65da698c500b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2327,18 +2327,29 @@ out:
 }
 EXPORT_SYMBOL(skb_checksum_help);
 
+/* openvswitch calls this on rx path, so we need a different check.
+ */
+static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path)
+{
+	if (tx_path)
+		return skb->ip_summed != CHECKSUM_PARTIAL;
+	else
+		return skb->ip_summed == CHECKSUM_NONE;
+}
+
 /**
- *	skb_gso_segment - Perform segmentation on skb.
+ *	__skb_gso_segment - Perform segmentation on skb.
  *	@skb: buffer to segment
  *	@features: features for the output path (see dev->features)
+ *	@tx_path: whether it is called in TX path
  *
  *	This function segments the given skb and returns a list of segments.
  *
  *	It may return NULL if the skb requires no segmentation.  This is
  *	only possible when GSO is used for verifying header integrity.
  */
-struct sk_buff *skb_gso_segment(struct sk_buff *skb,
-	netdev_features_t features)
+struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
+				  netdev_features_t features, bool tx_path)
 {
 	struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
 	struct packet_offload *ptype;
@@ -2361,7 +2372,7 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb,
 	skb->mac_len = skb->network_header - skb->mac_header;
 	__skb_pull(skb, skb->mac_len);
 
-	if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
+	if (unlikely(skb_needs_check(skb, tx_path))) {
 		skb_warn_bad_offload(skb);
 
 		if (skb_header_cloned(skb) &&
@@ -2390,7 +2401,7 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb,
 
 	return segs;
 }
-EXPORT_SYMBOL(skb_gso_segment);
+EXPORT_SYMBOL(__skb_gso_segment);
 
 /* Take action when hardware reception checksum errors are detected. */
 #ifdef CONFIG_BUG
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index d8c13a965459..9dc537df46c4 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -301,7 +301,7 @@ static int queue_gso_packets(struct net *net, int dp_ifindex,
 	struct sk_buff *segs, *nskb;
 	int err;
 
-	segs = skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM);
+	segs = __skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM, false);
 	if (IS_ERR(segs))
 		return PTR_ERR(segs);
 
-- 
cgit v1.2.3


From cd431e738509e74726055390c9e5e81e8e7e03ec Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 5 Feb 2013 20:22:50 +0000
Subject: macvlan: add multicast filter

Setting up IPv6 addresses on configurations with many macvlans
is not really working, as many multicast messages are dropped.

Add a multicast filter to macvlan to reduce the amount of cloned
skbs and overhead.

Successfully tested with 1024 macvlans on one ethernet device.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Ben Greear <greearb@candelatech.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/macvlan.c      | 23 +++++++++++++++++++++++
 include/linux/if_macvlan.h |  6 ++++++
 2 files changed, 29 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 7b44ebd7770e..f494da82c33f 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -29,6 +29,7 @@
 #include <linux/if_vlan.h>
 #include <linux/if_link.h>
 #include <linux/if_macvlan.h>
+#include <linux/hash.h>
 #include <net/rtnetlink.h>
 #include <net/xfrm.h>
 
@@ -126,6 +127,13 @@ static int macvlan_broadcast_one(struct sk_buff *skb,
 	return vlan->receive(skb);
 }
 
+static unsigned int mc_hash(const unsigned char *addr)
+{
+	u32 val = __get_unaligned_cpu32(addr + 2);
+
+	return hash_32(val, MACVLAN_MC_FILTER_BITS);
+}
+
 static void macvlan_broadcast(struct sk_buff *skb,
 			      const struct macvlan_port *port,
 			      struct net_device *src,
@@ -137,6 +145,7 @@ static void macvlan_broadcast(struct sk_buff *skb,
 	struct sk_buff *nskb;
 	unsigned int i;
 	int err;
+	unsigned int hash = mc_hash(eth->h_dest);
 
 	if (skb->protocol == htons(ETH_P_PAUSE))
 		return;
@@ -146,6 +155,8 @@ static void macvlan_broadcast(struct sk_buff *skb,
 			if (vlan->dev == src || !(vlan->mode & mode))
 				continue;
 
+			if (!test_bit(hash, vlan->mc_filter))
+				continue;
 			nskb = skb_clone(skb, GFP_ATOMIC);
 			err = macvlan_broadcast_one(nskb, vlan, eth,
 					 mode == MACVLAN_MODE_BRIDGE);
@@ -405,6 +416,18 @@ static void macvlan_set_mac_lists(struct net_device *dev)
 {
 	struct macvlan_dev *vlan = netdev_priv(dev);
 
+	if (dev->flags & (IFF_PROMISC | IFF_ALLMULTI)) {
+		bitmap_fill(vlan->mc_filter, MACVLAN_MC_FILTER_SZ);
+	} else {
+		struct netdev_hw_addr *ha;
+		DECLARE_BITMAP(filter, MACVLAN_MC_FILTER_SZ);
+
+		bitmap_zero(filter, MACVLAN_MC_FILTER_SZ);
+		netdev_for_each_mc_addr(ha, dev) {
+			__set_bit(mc_hash(ha->addr), filter);
+		}
+		bitmap_copy(vlan->mc_filter, filter, MACVLAN_MC_FILTER_SZ);
+	}
 	dev_uc_sync(vlan->lowerdev, dev);
 	dev_mc_sync(vlan->lowerdev, dev);
 }
diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h
index f65e8d250f7e..84dde1dd1da4 100644
--- a/include/linux/if_macvlan.h
+++ b/include/linux/if_macvlan.h
@@ -52,6 +52,9 @@ struct macvlan_pcpu_stats {
  */
 #define MAX_MACVTAP_QUEUES	(NR_CPUS < 16 ? NR_CPUS : 16)
 
+#define MACVLAN_MC_FILTER_BITS	8
+#define MACVLAN_MC_FILTER_SZ	(1 << MACVLAN_MC_FILTER_BITS)
+
 struct macvlan_dev {
 	struct net_device	*dev;
 	struct list_head	list;
@@ -59,6 +62,9 @@ struct macvlan_dev {
 	struct macvlan_port	*port;
 	struct net_device	*lowerdev;
 	struct macvlan_pcpu_stats __percpu *pcpu_stats;
+
+	DECLARE_BITMAP(mc_filter, MACVLAN_MC_FILTER_SZ);
+
 	enum macvlan_mode	mode;
 	u16			flags;
 	int (*receive)(struct sk_buff *skb);
-- 
cgit v1.2.3


From 6be195886ac26abe0194ed1bc7a9224f8a97c310 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Wed, 6 Feb 2013 18:04:53 -0800
Subject: workqueue: replace WORK_CPU_NONE/LAST with WORK_CPU_END

Now that workqueue has moved away from gcwqs, workqueue no longer has
the need to have a CPU identifier indicating "no cpu associated" - we
now use WORK_OFFQ_POOL_NONE instead - and most uses of WORK_CPU_NONE
are gone.

The only left usage is as the end marker for for_each_*wq*()
iterators, where the name WORK_CPU_NONE is confusing w/o actual
WORK_CPU_NONE usages.  Similarly, WORK_CPU_LAST which equals
WORK_CPU_NONE no longer makes sense.

Replace both WORK_CPU_NONE and LAST with WORK_CPU_END.  This patch
doesn't introduce any functional difference.

tj: s/WORK_CPU_LAST/WORK_CPU_END/ and rewrote the description.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h |  3 +--
 kernel/workqueue.c        | 10 +++++-----
 2 files changed, 6 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index a94e4e84e7b1..426c39c2aaa4 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -57,8 +57,7 @@ enum {
 
 	/* special cpu IDs */
 	WORK_CPU_UNBOUND	= NR_CPUS,
-	WORK_CPU_NONE		= NR_CPUS + 1,
-	WORK_CPU_LAST		= WORK_CPU_NONE,
+	WORK_CPU_END		= NR_CPUS + 1,
 
 	/*
 	 * Reserve 7 bits off of cwq pointer w/ debugobjects turned
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 577de1073f24..7e11334a119f 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -258,7 +258,7 @@ static inline int __next_wq_cpu(int cpu, const struct cpumask *mask,
 		if (sw & 2)
 			return WORK_CPU_UNBOUND;
 	}
-	return WORK_CPU_NONE;
+	return WORK_CPU_END;
 }
 
 static inline int __next_cwq_cpu(int cpu, const struct cpumask *mask,
@@ -282,17 +282,17 @@ static inline int __next_cwq_cpu(int cpu, const struct cpumask *mask,
  */
 #define for_each_wq_cpu(cpu)						\
 	for ((cpu) = __next_wq_cpu(-1, cpu_possible_mask, 3);		\
-	     (cpu) < WORK_CPU_NONE;					\
+	     (cpu) < WORK_CPU_END;					\
 	     (cpu) = __next_wq_cpu((cpu), cpu_possible_mask, 3))
 
 #define for_each_online_wq_cpu(cpu)					\
 	for ((cpu) = __next_wq_cpu(-1, cpu_online_mask, 3);		\
-	     (cpu) < WORK_CPU_NONE;					\
+	     (cpu) < WORK_CPU_END;					\
 	     (cpu) = __next_wq_cpu((cpu), cpu_online_mask, 3))
 
 #define for_each_cwq_cpu(cpu, wq)					\
 	for ((cpu) = __next_cwq_cpu(-1, cpu_possible_mask, (wq));	\
-	     (cpu) < WORK_CPU_NONE;					\
+	     (cpu) < WORK_CPU_END;					\
 	     (cpu) = __next_cwq_cpu((cpu), cpu_possible_mask, (wq)))
 
 #ifdef CONFIG_DEBUG_OBJECTS_WORK
@@ -3796,7 +3796,7 @@ static int __init init_workqueues(void)
 
 	/* make sure we have enough bits for OFFQ pool ID */
 	BUILD_BUG_ON((1LU << (BITS_PER_LONG - WORK_OFFQ_POOL_SHIFT)) <
-		     WORK_CPU_LAST * NR_STD_WORKER_POOLS);
+		     WORK_CPU_END * NR_STD_WORKER_POOLS);
 
 	cpu_notifier(workqueue_cpu_up_callback, CPU_PRI_WORKQUEUE_UP);
 	hotcpu_notifier(workqueue_cpu_down_callback, CPU_PRI_WORKQUEUE_DOWN);
-- 
cgit v1.2.3


From 60c057bca22285efefbba033624763a778f243bf Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Wed, 6 Feb 2013 18:04:53 -0800
Subject: workqueue: add delayed_work->wq to simplify reentrancy handling

To avoid executing the same work item from multiple CPUs concurrently,
a work_struct records the last pool it was on in its ->data so that,
on the next queueing, the pool can be queried to determine whether the
work item is still executing or not.

A delayed_work goes through timer before actually being queued on the
target workqueue and the timer needs to know the target workqueue and
CPU.  This is currently achieved by modifying delayed_work->work.data
such that it points to the cwq which points to the target workqueue
and the last CPU the work item was on.  __queue_delayed_work()
extracts the last CPU from delayed_work->work.data and then combines
it with the target workqueue to create new work.data.

The only thing this rather ugly hack achieves is encoding the target
workqueue into delayed_work->work.data without using a separate field,
which could be a trade off one can make; unfortunately, this entangles
work->data management between regular workqueue and delayed_work code
by setting cwq pointer before the work item is actually queued and
becomes a hindrance for further improvements of work->data handling.

This can be easily made sane by adding a target workqueue field to
delayed_work.  While delayed_work is used widely in the kernel and
this does make it a bit larger (<5%), I think this is the right
trade-off especially given the prospect of much saner handling of
work->data which currently involves quite tricky memory barrier
dancing, and don't expect to see any measureable effect.

Add delayed_work->wq and drop the delayed_work->work.data overloading.

tj: Rewrote the description.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h |  3 +++
 kernel/workqueue.c        | 32 +++-----------------------------
 2 files changed, 6 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 426c39c2aaa4..a3d7556510c3 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -109,6 +109,9 @@ struct work_struct {
 struct delayed_work {
 	struct work_struct work;
 	struct timer_list timer;
+
+	/* target workqueue and CPU ->timer uses to queue ->work */
+	struct workqueue_struct *wq;
 	int cpu;
 };
 
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index a229a56f3a32..41a502ce3802 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1339,10 +1339,9 @@ EXPORT_SYMBOL_GPL(queue_work);
 void delayed_work_timer_fn(unsigned long __data)
 {
 	struct delayed_work *dwork = (struct delayed_work *)__data;
-	struct cpu_workqueue_struct *cwq = get_work_cwq(&dwork->work);
 
 	/* should have been called from irqsafe timer with irq already off */
-	__queue_work(dwork->cpu, cwq->wq, &dwork->work);
+	__queue_work(dwork->cpu, dwork->wq, &dwork->work);
 }
 EXPORT_SYMBOL_GPL(delayed_work_timer_fn);
 
@@ -1351,7 +1350,6 @@ static void __queue_delayed_work(int cpu, struct workqueue_struct *wq,
 {
 	struct timer_list *timer = &dwork->timer;
 	struct work_struct *work = &dwork->work;
-	unsigned int lcpu;
 
 	WARN_ON_ONCE(timer->function != delayed_work_timer_fn ||
 		     timer->data != (unsigned long)dwork);
@@ -1371,30 +1369,7 @@ static void __queue_delayed_work(int cpu, struct workqueue_struct *wq,
 
 	timer_stats_timer_set_start_info(&dwork->timer);
 
-	/*
-	 * This stores cwq for the moment, for the timer_fn.  Note that the
-	 * work's pool is preserved to allow reentrance detection for
-	 * delayed works.
-	 */
-	if (!(wq->flags & WQ_UNBOUND)) {
-		struct worker_pool *pool = get_work_pool(work);
-
-		/*
-		 * If we cannot get the last pool from @work directly,
-		 * select the last CPU such that it avoids unnecessarily
-		 * triggering non-reentrancy check in __queue_work().
-		 */
-		lcpu = cpu;
-		if (pool)
-			lcpu = pool->cpu;
-		if (lcpu == WORK_CPU_UNBOUND)
-			lcpu = raw_smp_processor_id();
-	} else {
-		lcpu = WORK_CPU_UNBOUND;
-	}
-
-	set_work_cwq(work, get_cwq(lcpu, wq), 0);
-
+	dwork->wq = wq;
 	dwork->cpu = cpu;
 	timer->expires = jiffies + delay;
 
@@ -2944,8 +2919,7 @@ bool flush_delayed_work(struct delayed_work *dwork)
 {
 	local_irq_disable();
 	if (del_timer_sync(&dwork->timer))
-		__queue_work(dwork->cpu,
-			     get_work_cwq(&dwork->work)->wq, &dwork->work);
+		__queue_work(dwork->cpu, dwork->wq, &dwork->work);
 	local_irq_enable();
 	return flush_work(&dwork->work);
 }
-- 
cgit v1.2.3


From cf4aebc292fac7f34f8345664320e9d4a42ca76c Mon Sep 17 00:00:00 2001
From: Clark Williams <williams@redhat.com>
Date: Thu, 7 Feb 2013 09:46:59 -0600
Subject: sched: Move sched.h sysctl bits into separate header

Move the sysctl-related bits from include/linux/sched.h into
a new file: include/linux/sched/sysctl.h. Then update source
files requiring access to those bits by including the new
header file.

Signed-off-by: Clark Williams <williams@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/r/20130207094659.06dced96@riff.lan
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 block/blk-exec.c             |  1 +
 include/linux/sched.h        | 91 -----------------------------------------
 include/linux/sched/sysctl.h | 97 ++++++++++++++++++++++++++++++++++++++++++++
 init/init_task.c             |  1 +
 kernel/hrtimer.c             |  1 +
 kernel/sched/sched.h         |  1 +
 kernel/sysctl.c              |  1 +
 kernel/timer.c               |  1 +
 mm/mmap.c                    |  1 +
 mm/mremap.c                  |  1 +
 mm/nommu.c                   |  1 +
 11 files changed, 106 insertions(+), 91 deletions(-)
 create mode 100644 include/linux/sched/sysctl.h

(limited to 'include/linux')

diff --git a/block/blk-exec.c b/block/blk-exec.c
index 74638ec234c8..c88202f973d9 100644
--- a/block/blk-exec.c
+++ b/block/blk-exec.c
@@ -5,6 +5,7 @@
 #include <linux/module.h>
 #include <linux/bio.h>
 #include <linux/blkdev.h>
+#include <linux/sched/sysctl.h>
 
 #include "blk.h"
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 719ee0815e3a..8fc9b2710a80 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -304,19 +304,6 @@ static inline void lockup_detector_init(void)
 }
 #endif
 
-#ifdef CONFIG_DETECT_HUNG_TASK
-extern unsigned int  sysctl_hung_task_panic;
-extern unsigned long sysctl_hung_task_check_count;
-extern unsigned long sysctl_hung_task_timeout_secs;
-extern unsigned long sysctl_hung_task_warnings;
-extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
-					 void __user *buffer,
-					 size_t *lenp, loff_t *ppos);
-#else
-/* Avoid need for ifdefs elsewhere in the code */
-enum { sysctl_hung_task_timeout_secs = 0 };
-#endif
-
 /* Attach to any functions which should be ignored in wchan output. */
 #define __sched		__attribute__((__section__(".sched.text")))
 
@@ -338,23 +325,6 @@ extern int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner);
 struct nsproxy;
 struct user_namespace;
 
-/*
- * Default maximum number of active map areas, this limits the number of vmas
- * per mm struct. Users can overwrite this number by sysctl but there is a
- * problem.
- *
- * When a program's coredump is generated as ELF format, a section is created
- * per a vma. In ELF, the number of sections is represented in unsigned short.
- * This means the number of sections should be smaller than 65535 at coredump.
- * Because the kernel adds some informative sections to a image of program at
- * generating coredump, we need some margin. The number of extra sections is
- * 1-3 now and depends on arch. We use "5" as safe margin, here.
- */
-#define MAPCOUNT_ELF_CORE_MARGIN	(5)
-#define DEFAULT_MAX_MAP_COUNT	(USHRT_MAX - MAPCOUNT_ELF_CORE_MARGIN)
-
-extern int sysctl_max_map_count;
-
 #include <linux/aio.h>
 
 #ifdef CONFIG_MMU
@@ -1221,12 +1191,6 @@ struct sched_rt_entity {
 #endif
 };
 
-/*
- * default timeslice is 100 msecs (used only for SCHED_RR tasks).
- * Timeslices get refilled after they expire.
- */
-#define RR_TIMESLICE		(100 * HZ / 1000)
-
 struct rcu_node;
 
 enum perf_event_task_context {
@@ -2074,58 +2038,7 @@ extern void wake_up_idle_cpu(int cpu);
 static inline void wake_up_idle_cpu(int cpu) { }
 #endif
 
-extern unsigned int sysctl_sched_latency;
-extern unsigned int sysctl_sched_min_granularity;
-extern unsigned int sysctl_sched_wakeup_granularity;
-extern unsigned int sysctl_sched_child_runs_first;
-
-enum sched_tunable_scaling {
-	SCHED_TUNABLESCALING_NONE,
-	SCHED_TUNABLESCALING_LOG,
-	SCHED_TUNABLESCALING_LINEAR,
-	SCHED_TUNABLESCALING_END,
-};
-extern enum sched_tunable_scaling sysctl_sched_tunable_scaling;
-
-extern unsigned int sysctl_numa_balancing_scan_delay;
-extern unsigned int sysctl_numa_balancing_scan_period_min;
-extern unsigned int sysctl_numa_balancing_scan_period_max;
-extern unsigned int sysctl_numa_balancing_scan_period_reset;
-extern unsigned int sysctl_numa_balancing_scan_size;
-extern unsigned int sysctl_numa_balancing_settle_count;
-
-#ifdef CONFIG_SCHED_DEBUG
-extern unsigned int sysctl_sched_migration_cost;
-extern unsigned int sysctl_sched_nr_migrate;
-extern unsigned int sysctl_sched_time_avg;
-extern unsigned int sysctl_timer_migration;
-extern unsigned int sysctl_sched_shares_window;
-
-int sched_proc_update_handler(struct ctl_table *table, int write,
-		void __user *buffer, size_t *length,
-		loff_t *ppos);
-#endif
-#ifdef CONFIG_SCHED_DEBUG
-static inline unsigned int get_sysctl_timer_migration(void)
-{
-	return sysctl_timer_migration;
-}
-#else
-static inline unsigned int get_sysctl_timer_migration(void)
-{
-	return 1;
-}
-#endif
-extern unsigned int sysctl_sched_rt_period;
-extern int sysctl_sched_rt_runtime;
-
-int sched_rt_handler(struct ctl_table *table, int write,
-		void __user *buffer, size_t *lenp,
-		loff_t *ppos);
-
 #ifdef CONFIG_SCHED_AUTOGROUP
-extern unsigned int sysctl_sched_autogroup_enabled;
-
 extern void sched_autogroup_create_attach(struct task_struct *p);
 extern void sched_autogroup_detach(struct task_struct *p);
 extern void sched_autogroup_fork(struct signal_struct *sig);
@@ -2141,10 +2054,6 @@ static inline void sched_autogroup_fork(struct signal_struct *sig) { }
 static inline void sched_autogroup_exit(struct signal_struct *sig) { }
 #endif
 
-#ifdef CONFIG_CFS_BANDWIDTH
-extern unsigned int sysctl_sched_cfs_bandwidth_slice;
-#endif
-
 #ifdef CONFIG_RT_MUTEXES
 extern int rt_mutex_getprio(struct task_struct *p);
 extern void rt_mutex_setprio(struct task_struct *p, int prio);
diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
new file mode 100644
index 000000000000..bac914e458ca
--- /dev/null
+++ b/include/linux/sched/sysctl.h
@@ -0,0 +1,97 @@
+#ifndef _SCHED_SYSCTL_H
+#define _SCHED_SYSCTL_H
+
+#ifdef CONFIG_DETECT_HUNG_TASK
+extern unsigned int  sysctl_hung_task_panic;
+extern unsigned long sysctl_hung_task_check_count;
+extern unsigned long sysctl_hung_task_timeout_secs;
+extern unsigned long sysctl_hung_task_warnings;
+extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
+					 void __user *buffer,
+					 size_t *lenp, loff_t *ppos);
+#else
+/* Avoid need for ifdefs elsewhere in the code */
+enum { sysctl_hung_task_timeout_secs = 0 };
+#endif
+
+/*
+ * Default maximum number of active map areas, this limits the number of vmas
+ * per mm struct. Users can overwrite this number by sysctl but there is a
+ * problem.
+ *
+ * When a program's coredump is generated as ELF format, a section is created
+ * per a vma. In ELF, the number of sections is represented in unsigned short.
+ * This means the number of sections should be smaller than 65535 at coredump.
+ * Because the kernel adds some informative sections to a image of program at
+ * generating coredump, we need some margin. The number of extra sections is
+ * 1-3 now and depends on arch. We use "5" as safe margin, here.
+ */
+#define MAPCOUNT_ELF_CORE_MARGIN	(5)
+#define DEFAULT_MAX_MAP_COUNT	(USHRT_MAX - MAPCOUNT_ELF_CORE_MARGIN)
+
+extern int sysctl_max_map_count;
+
+extern unsigned int sysctl_sched_latency;
+extern unsigned int sysctl_sched_min_granularity;
+extern unsigned int sysctl_sched_wakeup_granularity;
+extern unsigned int sysctl_sched_child_runs_first;
+
+enum sched_tunable_scaling {
+	SCHED_TUNABLESCALING_NONE,
+	SCHED_TUNABLESCALING_LOG,
+	SCHED_TUNABLESCALING_LINEAR,
+	SCHED_TUNABLESCALING_END,
+};
+extern enum sched_tunable_scaling sysctl_sched_tunable_scaling;
+
+extern unsigned int sysctl_numa_balancing_scan_delay;
+extern unsigned int sysctl_numa_balancing_scan_period_min;
+extern unsigned int sysctl_numa_balancing_scan_period_max;
+extern unsigned int sysctl_numa_balancing_scan_period_reset;
+extern unsigned int sysctl_numa_balancing_scan_size;
+extern unsigned int sysctl_numa_balancing_settle_count;
+
+#ifdef CONFIG_SCHED_DEBUG
+extern unsigned int sysctl_sched_migration_cost;
+extern unsigned int sysctl_sched_nr_migrate;
+extern unsigned int sysctl_sched_time_avg;
+extern unsigned int sysctl_timer_migration;
+extern unsigned int sysctl_sched_shares_window;
+
+int sched_proc_update_handler(struct ctl_table *table, int write,
+		void __user *buffer, size_t *length,
+		loff_t *ppos);
+#endif
+#ifdef CONFIG_SCHED_DEBUG
+static inline unsigned int get_sysctl_timer_migration(void)
+{
+	return sysctl_timer_migration;
+}
+#else
+static inline unsigned int get_sysctl_timer_migration(void)
+{
+	return 1;
+}
+#endif
+extern unsigned int sysctl_sched_rt_period;
+extern int sysctl_sched_rt_runtime;
+
+#ifdef CONFIG_CFS_BANDWIDTH
+extern unsigned int sysctl_sched_cfs_bandwidth_slice;
+#endif
+
+#ifdef CONFIG_SCHED_AUTOGROUP
+extern unsigned int sysctl_sched_autogroup_enabled;
+#endif
+
+/*
+ * default timeslice is 100 msecs (used only for SCHED_RR tasks).
+ * Timeslices get refilled after they expire.
+ */
+#define RR_TIMESLICE		(100 * HZ / 1000)
+
+int sched_rt_handler(struct ctl_table *table, int write,
+		void __user *buffer, size_t *lenp,
+		loff_t *ppos);
+
+#endif /* _SCHED_SYSCTL_H */
diff --git a/init/init_task.c b/init/init_task.c
index 8b2f3996b035..a031ad14c950 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -2,6 +2,7 @@
 #include <linux/export.h>
 #include <linux/mqueue.h>
 #include <linux/sched.h>
+#include <linux/sched/sysctl.h>
 #include <linux/init.h>
 #include <linux/fs.h>
 #include <linux/mm.h>
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 6db7a5ed52b5..8a9aa59d0d61 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -44,6 +44,7 @@
 #include <linux/err.h>
 #include <linux/debugobjects.h>
 #include <linux/sched.h>
+#include <linux/sched/sysctl.h>
 #include <linux/timer.h>
 
 #include <asm/uaccess.h>
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index fc886441436a..ed8de30a040e 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1,5 +1,6 @@
 
 #include <linux/sched.h>
+#include <linux/sched/sysctl.h>
 #include <linux/mutex.h>
 #include <linux/spinlock.h>
 #include <linux/stop_machine.h>
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index c88878db491e..7357e23aaf68 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -61,6 +61,7 @@
 #include <linux/kmod.h>
 #include <linux/capability.h>
 #include <linux/binfmts.h>
+#include <linux/sched/sysctl.h>
 
 #include <asm/uaccess.h>
 #include <asm/processor.h>
diff --git a/kernel/timer.c b/kernel/timer.c
index 367d00858482..3e13baf3f0ea 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -39,6 +39,7 @@
 #include <linux/kallsyms.h>
 #include <linux/irq_work.h>
 #include <linux/sched.h>
+#include <linux/sched/sysctl.h>
 #include <linux/slab.h>
 
 #include <asm/uaccess.h>
diff --git a/mm/mmap.c b/mm/mmap.c
index 35730ee9d515..5dee4a0bb49f 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -32,6 +32,7 @@
 #include <linux/khugepaged.h>
 #include <linux/uprobes.h>
 #include <linux/rbtree_augmented.h>
+#include <linux/sched/sysctl.h>
 
 #include <asm/uaccess.h>
 #include <asm/cacheflush.h>
diff --git a/mm/mremap.c b/mm/mremap.c
index e1031e1f6a61..f9766f460299 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -19,6 +19,7 @@
 #include <linux/security.h>
 #include <linux/syscalls.h>
 #include <linux/mmu_notifier.h>
+#include <linux/sched/sysctl.h>
 
 #include <asm/uaccess.h>
 #include <asm/cacheflush.h>
diff --git a/mm/nommu.c b/mm/nommu.c
index 79c3cac87afa..b20db4e22263 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -29,6 +29,7 @@
 #include <linux/security.h>
 #include <linux/syscalls.h>
 #include <linux/audit.h>
+#include <linux/sched/sysctl.h>
 
 #include <asm/uaccess.h>
 #include <asm/tlb.h>
-- 
cgit v1.2.3


From ce0dbbbb30aee6a835511d5be446462388ba9eee Mon Sep 17 00:00:00 2001
From: Clark Williams <williams@redhat.com>
Date: Thu, 7 Feb 2013 09:47:04 -0600
Subject: sched/rt: Add a tuning knob to allow changing SCHED_RR timeslice

Add a /proc/sys/kernel scheduler knob named
sched_rr_timeslice_ms that allows global changing of the
SCHED_RR timeslice value. User visable value is in milliseconds
but is stored as jiffies.  Setting to 0 (zero) resets to the
default (currently 100ms).

Signed-off-by: Clark Williams <williams@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/r/20130207094704.13751796@riff.lan
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/sysctl.h | 15 ++++++++++++++-
 kernel/sched/core.c          | 19 +++++++++++++++++++
 kernel/sched/rt.c            |  6 ++++--
 kernel/sysctl.c              |  7 +++++++
 4 files changed, 44 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index bac914e458ca..d2bb0ae979d0 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -73,6 +73,13 @@ static inline unsigned int get_sysctl_timer_migration(void)
 	return 1;
 }
 #endif
+
+/*
+ *  control realtime throttling:
+ *
+ *  /proc/sys/kernel/sched_rt_period_us
+ *  /proc/sys/kernel/sched_rt_runtime_us
+ */
 extern unsigned int sysctl_sched_rt_period;
 extern int sysctl_sched_rt_runtime;
 
@@ -90,7 +97,13 @@ extern unsigned int sysctl_sched_autogroup_enabled;
  */
 #define RR_TIMESLICE		(100 * HZ / 1000)
 
-int sched_rt_handler(struct ctl_table *table, int write,
+extern int sched_rr_timeslice;
+
+extern int sched_rr_handler(struct ctl_table *table, int write,
+		void __user *buffer, size_t *lenp,
+		loff_t *ppos);
+
+extern int sched_rt_handler(struct ctl_table *table, int write,
 		void __user *buffer, size_t *lenp,
 		loff_t *ppos);
 
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 1dff78a9e2ab..4a88f1d51563 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7509,6 +7509,25 @@ static int sched_rt_global_constraints(void)
 }
 #endif /* CONFIG_RT_GROUP_SCHED */
 
+int sched_rr_handler(struct ctl_table *table, int write,
+		void __user *buffer, size_t *lenp,
+		loff_t *ppos)
+{
+	int ret;
+	static DEFINE_MUTEX(mutex);
+
+	mutex_lock(&mutex);
+	ret = proc_dointvec(table, write, buffer, lenp, ppos);
+	/* make sure that internally we keep jiffies */
+	/* also, writing zero resets timeslice to default */
+	if (!ret && write) {
+		sched_rr_timeslice = sched_rr_timeslice <= 0 ?
+			RR_TIMESLICE : msecs_to_jiffies(sched_rr_timeslice);
+	}
+	mutex_unlock(&mutex);
+	return ret;
+}
+
 int sched_rt_handler(struct ctl_table *table, int write,
 		void __user *buffer, size_t *lenp,
 		loff_t *ppos)
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index c25de141c576..fb0f77e402b6 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -7,6 +7,8 @@
 
 #include <linux/slab.h>
 
+int sched_rr_timeslice = RR_TIMESLICE;
+
 static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun);
 
 struct rt_bandwidth def_rt_bandwidth;
@@ -2016,7 +2018,7 @@ static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued)
 	if (--p->rt.time_slice)
 		return;
 
-	p->rt.time_slice = RR_TIMESLICE;
+	p->rt.time_slice = sched_rr_timeslice;
 
 	/*
 	 * Requeue to the end of queue if we (and all of our ancestors) are the
@@ -2047,7 +2049,7 @@ static unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task)
 	 * Time slice is 0 for SCHED_FIFO tasks
 	 */
 	if (task->policy == SCHED_RR)
-		return RR_TIMESLICE;
+		return sched_rr_timeslice;
 	else
 		return 0;
 }
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 7357e23aaf68..4fc9be955c71 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -404,6 +404,13 @@ static struct ctl_table kern_table[] = {
 		.mode		= 0644,
 		.proc_handler	= sched_rt_handler,
 	},
+	{
+		.procname	= "sched_rr_timeslice_ms",
+		.data		= &sched_rr_timeslice,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= sched_rr_handler,
+	},
 #ifdef CONFIG_SCHED_AUTOGROUP
 	{
 		.procname	= "sched_autogroup_enabled",
-- 
cgit v1.2.3


From 8bd75c77b7c6a3954140dd2e20346aef3efe4a35 Mon Sep 17 00:00:00 2001
From: Clark Williams <williams@redhat.com>
Date: Thu, 7 Feb 2013 09:47:07 -0600
Subject: sched/rt: Move rt specific bits into new header file

Move rt scheduler definitions out of include/linux/sched.h into
new file include/linux/sched/rt.h

Signed-off-by: Clark Williams <williams@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/r/20130207094707.7b9f825f@riff.lan
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 drivers/spi/spi.c                 |  2 +-
 drivers/staging/csr/bh.c          |  2 +-
 drivers/staging/csr/unifi_sme.c   |  2 +-
 drivers/tty/sysrq.c               |  1 +
 fs/select.c                       |  1 +
 include/linux/sched.h             | 55 ++-----------------------------------
 include/linux/sched/rt.h          | 58 +++++++++++++++++++++++++++++++++++++++
 init/init_task.c                  |  1 +
 kernel/futex.c                    |  1 +
 kernel/hrtimer.c                  |  1 +
 kernel/irq/manage.c               |  1 +
 kernel/mutex.c                    |  1 +
 kernel/rtmutex-debug.c            |  1 +
 kernel/rtmutex-tester.c           |  1 +
 kernel/rtmutex.c                  |  1 +
 kernel/sched/cpupri.c             |  2 ++
 kernel/sched/sched.h              |  1 +
 kernel/trace/trace.c              |  1 +
 kernel/trace/trace_sched_wakeup.c |  2 +-
 kernel/watchdog.c                 |  1 +
 mm/page-writeback.c               |  1 +
 mm/page_alloc.c                   |  1 +
 22 files changed, 81 insertions(+), 57 deletions(-)
 create mode 100644 include/linux/sched/rt.h

(limited to 'include/linux')

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 19ee901577da..3a6083b386a1 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -33,7 +33,7 @@
 #include <linux/of_gpio.h>
 #include <linux/pm_runtime.h>
 #include <linux/export.h>
-#include <linux/sched.h>
+#include <linux/sched/rt.h>
 #include <linux/delay.h>
 #include <linux/kthread.h>
 #include <linux/ioport.h>
diff --git a/drivers/staging/csr/bh.c b/drivers/staging/csr/bh.c
index 1a1f5c79822a..7b133597e923 100644
--- a/drivers/staging/csr/bh.c
+++ b/drivers/staging/csr/bh.c
@@ -15,7 +15,7 @@
  */
 #include "csr_wifi_hip_unifi.h"
 #include "unifi_priv.h"
-
+#include <linux/sched/rt.h>
 
 /*
  * ---------------------------------------------------------------------------
diff --git a/drivers/staging/csr/unifi_sme.c b/drivers/staging/csr/unifi_sme.c
index 7c6c4138fc76..49395da34b7f 100644
--- a/drivers/staging/csr/unifi_sme.c
+++ b/drivers/staging/csr/unifi_sme.c
@@ -15,7 +15,7 @@
 #include "unifi_priv.h"
 #include "csr_wifi_hip_unifi.h"
 #include "csr_wifi_hip_conversions.h"
-
+#include <linux/sched/rt.h>
 
 
diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
index b3c4a250ff86..40e5b3919e27 100644
--- a/drivers/tty/sysrq.c
+++ b/drivers/tty/sysrq.c
@@ -15,6 +15,7 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/sched.h>
+#include <linux/sched/rt.h>
 #include <linux/interrupt.h>
 #include <linux/mm.h>
 #include <linux/fs.h>
diff --git a/fs/select.c b/fs/select.c
index 2ef72d965036..8c1c96c27062 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -26,6 +26,7 @@
 #include <linux/fs.h>
 #include <linux/rcupdate.h>
 #include <linux/hrtimer.h>
+#include <linux/sched/rt.h>
 
 #include <asm/uaccess.h>
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8fc9b2710a80..33cc42130371 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1164,6 +1164,7 @@ struct sched_entity {
 	/* rq "owned" by this entity/group: */
 	struct cfs_rq		*my_q;
 #endif
+
 /*
  * Load-tracking only depends on SMP, FAIR_GROUP_SCHED dependency below may be
  * removed when useful for applications beyond shares distribution (e.g.
@@ -1191,6 +1192,7 @@ struct sched_rt_entity {
 #endif
 };
 
+
 struct rcu_node;
 
 enum perf_event_task_context {
@@ -1596,37 +1598,6 @@ static inline void set_numabalancing_state(bool enabled)
 }
 #endif
 
-/*
- * Priority of a process goes from 0..MAX_PRIO-1, valid RT
- * priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH
- * tasks are in the range MAX_RT_PRIO..MAX_PRIO-1. Priority
- * values are inverted: lower p->prio value means higher priority.
- *
- * The MAX_USER_RT_PRIO value allows the actual maximum
- * RT priority to be separate from the value exported to
- * user-space.  This allows kernel threads to set their
- * priority to a value higher than any user task. Note:
- * MAX_RT_PRIO must not be smaller than MAX_USER_RT_PRIO.
- */
-
-#define MAX_USER_RT_PRIO	100
-#define MAX_RT_PRIO		MAX_USER_RT_PRIO
-
-#define MAX_PRIO		(MAX_RT_PRIO + 40)
-#define DEFAULT_PRIO		(MAX_RT_PRIO + 20)
-
-static inline int rt_prio(int prio)
-{
-	if (unlikely(prio < MAX_RT_PRIO))
-		return 1;
-	return 0;
-}
-
-static inline int rt_task(struct task_struct *p)
-{
-	return rt_prio(p->prio);
-}
-
 static inline struct pid *task_pid(struct task_struct *task)
 {
 	return task->pids[PIDTYPE_PID].pid;
@@ -2054,26 +2025,6 @@ static inline void sched_autogroup_fork(struct signal_struct *sig) { }
 static inline void sched_autogroup_exit(struct signal_struct *sig) { }
 #endif
 
-#ifdef CONFIG_RT_MUTEXES
-extern int rt_mutex_getprio(struct task_struct *p);
-extern void rt_mutex_setprio(struct task_struct *p, int prio);
-extern void rt_mutex_adjust_pi(struct task_struct *p);
-static inline bool tsk_is_pi_blocked(struct task_struct *tsk)
-{
-	return tsk->pi_blocked_on != NULL;
-}
-#else
-static inline int rt_mutex_getprio(struct task_struct *p)
-{
-	return p->normal_prio;
-}
-# define rt_mutex_adjust_pi(p)		do { } while (0)
-static inline bool tsk_is_pi_blocked(struct task_struct *tsk)
-{
-	return false;
-}
-#endif
-
 extern bool yield_to(struct task_struct *p, bool preempt);
 extern void set_user_nice(struct task_struct *p, long nice);
 extern int task_prio(const struct task_struct *p);
@@ -2703,8 +2654,6 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
 extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask);
 extern long sched_getaffinity(pid_t pid, struct cpumask *mask);
 
-extern void normalize_rt_tasks(void);
-
 #ifdef CONFIG_CGROUP_SCHED
 
 extern struct task_group root_task_group;
diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h
new file mode 100644
index 000000000000..94e19ea28fc3
--- /dev/null
+++ b/include/linux/sched/rt.h
@@ -0,0 +1,58 @@
+#ifndef _SCHED_RT_H
+#define _SCHED_RT_H
+
+/*
+ * Priority of a process goes from 0..MAX_PRIO-1, valid RT
+ * priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH
+ * tasks are in the range MAX_RT_PRIO..MAX_PRIO-1. Priority
+ * values are inverted: lower p->prio value means higher priority.
+ *
+ * The MAX_USER_RT_PRIO value allows the actual maximum
+ * RT priority to be separate from the value exported to
+ * user-space.  This allows kernel threads to set their
+ * priority to a value higher than any user task. Note:
+ * MAX_RT_PRIO must not be smaller than MAX_USER_RT_PRIO.
+ */
+
+#define MAX_USER_RT_PRIO	100
+#define MAX_RT_PRIO		MAX_USER_RT_PRIO
+
+#define MAX_PRIO		(MAX_RT_PRIO + 40)
+#define DEFAULT_PRIO		(MAX_RT_PRIO + 20)
+
+static inline int rt_prio(int prio)
+{
+	if (unlikely(prio < MAX_RT_PRIO))
+		return 1;
+	return 0;
+}
+
+static inline int rt_task(struct task_struct *p)
+{
+	return rt_prio(p->prio);
+}
+
+#ifdef CONFIG_RT_MUTEXES
+extern int rt_mutex_getprio(struct task_struct *p);
+extern void rt_mutex_setprio(struct task_struct *p, int prio);
+extern void rt_mutex_adjust_pi(struct task_struct *p);
+static inline bool tsk_is_pi_blocked(struct task_struct *tsk)
+{
+	return tsk->pi_blocked_on != NULL;
+}
+#else
+static inline int rt_mutex_getprio(struct task_struct *p)
+{
+	return p->normal_prio;
+}
+# define rt_mutex_adjust_pi(p)		do { } while (0)
+static inline bool tsk_is_pi_blocked(struct task_struct *tsk)
+{
+	return false;
+}
+#endif
+
+extern void normalize_rt_tasks(void);
+
+
+#endif /* _SCHED_RT_H */
diff --git a/init/init_task.c b/init/init_task.c
index a031ad14c950..ba0a7f362d9e 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -3,6 +3,7 @@
 #include <linux/mqueue.h>
 #include <linux/sched.h>
 #include <linux/sched/sysctl.h>
+#include <linux/sched/rt.h>
 #include <linux/init.h>
 #include <linux/fs.h>
 #include <linux/mm.h>
diff --git a/kernel/futex.c b/kernel/futex.c
index 19eb089ca003..9618b6e9fb36 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -60,6 +60,7 @@
 #include <linux/pid.h>
 #include <linux/nsproxy.h>
 #include <linux/ptrace.h>
+#include <linux/sched/rt.h>
 
 #include <asm/futex.h>
 
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 8a9aa59d0d61..c5dde988c0ce 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -45,6 +45,7 @@
 #include <linux/debugobjects.h>
 #include <linux/sched.h>
 #include <linux/sched/sysctl.h>
+#include <linux/sched/rt.h>
 #include <linux/timer.h>
 
 #include <asm/uaccess.h>
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index e49a288fa479..02115d9592ec 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -16,6 +16,7 @@
 #include <linux/interrupt.h>
 #include <linux/slab.h>
 #include <linux/sched.h>
+#include <linux/sched/rt.h>
 #include <linux/task_work.h>
 
 #include "internals.h"
diff --git a/kernel/mutex.c b/kernel/mutex.c
index a307cc9c9526..52f23011b6e0 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -19,6 +19,7 @@
  */
 #include <linux/mutex.h>
 #include <linux/sched.h>
+#include <linux/sched/rt.h>
 #include <linux/export.h>
 #include <linux/spinlock.h>
 #include <linux/interrupt.h>
diff --git a/kernel/rtmutex-debug.c b/kernel/rtmutex-debug.c
index 16502d3a71c8..13b243a323fa 100644
--- a/kernel/rtmutex-debug.c
+++ b/kernel/rtmutex-debug.c
@@ -17,6 +17,7 @@
  * See rt.c in preempt-rt for proper credits and further information
  */
 #include <linux/sched.h>
+#include <linux/sched/rt.h>
 #include <linux/delay.h>
 #include <linux/export.h>
 #include <linux/spinlock.h>
diff --git a/kernel/rtmutex-tester.c b/kernel/rtmutex-tester.c
index 98ec49475460..7890b10084a7 100644
--- a/kernel/rtmutex-tester.c
+++ b/kernel/rtmutex-tester.c
@@ -10,6 +10,7 @@
 #include <linux/kthread.h>
 #include <linux/export.h>
 #include <linux/sched.h>
+#include <linux/sched/rt.h>
 #include <linux/spinlock.h>
 #include <linux/timer.h>
 #include <linux/freezer.h>
diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c
index a242e691c993..1e09308bf2a1 100644
--- a/kernel/rtmutex.c
+++ b/kernel/rtmutex.c
@@ -13,6 +13,7 @@
 #include <linux/spinlock.h>
 #include <linux/export.h>
 #include <linux/sched.h>
+#include <linux/sched/rt.h>
 #include <linux/timer.h>
 
 #include "rtmutex_common.h"
diff --git a/kernel/sched/cpupri.c b/kernel/sched/cpupri.c
index 23aa789c53ee..1095e878a46f 100644
--- a/kernel/sched/cpupri.c
+++ b/kernel/sched/cpupri.c
@@ -28,6 +28,8 @@
  */
 
 #include <linux/gfp.h>
+#include <linux/sched.h>
+#include <linux/sched/rt.h>
 #include "cpupri.h"
 
 /* Convert between a 140 based task->prio, and our 102 based cpupri */
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index ed8de30a040e..cc03cfdf469f 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1,6 +1,7 @@
 
 #include <linux/sched.h>
 #include <linux/sched/sysctl.h>
+#include <linux/sched/rt.h>
 #include <linux/mutex.h>
 #include <linux/spinlock.h>
 #include <linux/stop_machine.h>
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 3c13e46d7d24..4d2e4afd956e 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -39,6 +39,7 @@
 #include <linux/poll.h>
 #include <linux/nmi.h>
 #include <linux/fs.h>
+#include <linux/sched/rt.h>
 
 #include "trace.h"
 #include "trace_output.h"
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 9fe45fcefca0..75aa97fbe1a1 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -15,8 +15,8 @@
 #include <linux/kallsyms.h>
 #include <linux/uaccess.h>
 #include <linux/ftrace.h>
+#include <linux/sched/rt.h>
 #include <trace/events/sched.h>
-
 #include "trace.h"
 
 static struct trace_array	*wakeup_trace;
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 75a2ab3d0b02..27689422aa92 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -23,6 +23,7 @@
 #include <linux/module.h>
 #include <linux/sysctl.h>
 #include <linux/smpboot.h>
+#include <linux/sched/rt.h>
 
 #include <asm/irq_regs.h>
 #include <linux/kvm_para.h>
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 0713bfbf0954..66a0024becd9 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -35,6 +35,7 @@
 #include <linux/buffer_head.h> /* __set_page_dirty_buffers */
 #include <linux/pagevec.h>
 #include <linux/timer.h>
+#include <linux/sched/rt.h>
 #include <trace/events/writeback.h>
 
 /*
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index df2022ff0c8a..42d18e46f286 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -58,6 +58,7 @@
 #include <linux/prefetch.h>
 #include <linux/migrate.h>
 #include <linux/page-debug-flags.h>
+#include <linux/sched/rt.h>
 
 #include <asm/tlbflush.h>
 #include <asm/div64.h>
-- 
cgit v1.2.3


From 3bc97a782cc8c112f64a25143452b06206364cc8 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Thu, 29 Nov 2012 16:46:05 +0800
Subject: srcu: Remove checks preventing offline CPUs from calling
 srcu_read_lock()

SRCU has its own statemachine and no longer relies on normal RCU.
Its read-side critical section can now be used by an offline CPU, so this
commit removes the check and the comments, reverting the SRCU portion
of c0d6d01b (rcu: Check for illegal use of RCU from offlined CPUs).

It also makes the code match the comments in whatisRCU.txt:

g.	Do you need read-side critical sections that are respected
	even though they are in the middle of the idle loop, during
	user-mode execution, or on an offlined CPU?  If so, SRCU is the
	only choice that will work for you.

[ paulmck: There is at least one remaining issue, namely use of lockdep
	   with tracing enabled. ]

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/srcu.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 6eb691b08358..ae3ee39eb699 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -163,9 +163,6 @@ void srcu_barrier(struct srcu_struct *sp);
  * power mode. This way we can notice an extended quiescent state to
  * other CPUs that started a grace period. Otherwise we would delay any
  * grace period as long as we run in the idle task.
- *
- * Similarly, we avoid claiming an SRCU read lock held if the current
- * CPU is offline.
  */
 static inline int srcu_read_lock_held(struct srcu_struct *sp)
 {
@@ -173,8 +170,6 @@ static inline int srcu_read_lock_held(struct srcu_struct *sp)
 		return 1;
 	if (rcu_is_cpu_idle())
 		return 0;
-	if (!rcu_lockdep_current_cpu_online())
-		return 0;
 	return lock_is_held(&sp->dep_map);
 }
 
-- 
cgit v1.2.3


From 511a0868bed6694512348fc177cdfaf3fd97d0bb Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Thu, 29 Nov 2012 16:46:06 +0800
Subject: srcu: Remove checks preventing idle CPUs from calling
 srcu_read_lock()

SRCU has its own statemachine and no longer relies on normal RCU.
Its read-side critical section can now be used by an offline CPU, so this
commit removes the check and the comments, reverting the SRCU portion
of ff195cb6 (rcu: Warn when srcu_read_lock() is used in an extended
quiescent state).

It also makes the codes match the comments in whatisRCU.txt:

g.	Do you need read-side critical sections that are respected
	even though they are in the middle of the idle loop, during
	user-mode execution, or on an offlined CPU?  If so, SRCU is the
	only choice that will work for you.

[ paulmck: There is at least one remaining issue, namely use of lockdep
	   with tracing enabled. ]

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/srcu.h | 21 +++------------------
 1 file changed, 3 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index ae3ee39eb699..04f4121a23ae 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -151,25 +151,14 @@ void srcu_barrier(struct srcu_struct *sp);
  * Checks debug_lockdep_rcu_enabled() to prevent false positives during boot
  * and while lockdep is disabled.
  *
- * Note that if the CPU is in the idle loop from an RCU point of view
- * (ie: that we are in the section between rcu_idle_enter() and
- * rcu_idle_exit()) then srcu_read_lock_held() returns false even if
- * the CPU did an srcu_read_lock().  The reason for this is that RCU
- * ignores CPUs that are in such a section, considering these as in
- * extended quiescent state, so such a CPU is effectively never in an
- * RCU read-side critical section regardless of what RCU primitives it
- * invokes.  This state of affairs is required --- we need to keep an
- * RCU-free window in idle where the CPU may possibly enter into low
- * power mode. This way we can notice an extended quiescent state to
- * other CPUs that started a grace period. Otherwise we would delay any
- * grace period as long as we run in the idle task.
+ * Note that SRCU is based on its own statemachine and it doesn't
+ * relies on normal RCU, it can be called from the CPU which
+ * is in the idle loop from an RCU point of view or offline.
  */
 static inline int srcu_read_lock_held(struct srcu_struct *sp)
 {
 	if (!debug_lockdep_rcu_enabled())
 		return 1;
-	if (rcu_is_cpu_idle())
-		return 0;
 	return lock_is_held(&sp->dep_map);
 }
 
@@ -231,8 +220,6 @@ static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp)
 	int retval = __srcu_read_lock(sp);
 
 	rcu_lock_acquire(&(sp)->dep_map);
-	rcu_lockdep_assert(!rcu_is_cpu_idle(),
-			   "srcu_read_lock() used illegally while idle");
 	return retval;
 }
 
@@ -246,8 +233,6 @@ static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp)
 static inline void srcu_read_unlock(struct srcu_struct *sp, int idx)
 	__releases(sp)
 {
-	rcu_lockdep_assert(!rcu_is_cpu_idle(),
-			   "srcu_read_unlock() used illegally while idle");
 	rcu_lock_release(&(sp)->dep_map);
 	__srcu_read_unlock(sp, idx);
 }
-- 
cgit v1.2.3


From 16a10ffd20a13215243bdba64c8e57ef277a55b9 Mon Sep 17 00:00:00 2001
From: Yan Burman <yanb@mellanox.com>
Date: Thu, 7 Feb 2013 02:25:22 +0000
Subject: net/mlx4: Move Ethernet related functionality from mlx4_core to
 mlx4_en

Move low level code that deals with management of Ethernet MACs and QPs from mlx4_core to mlx4_en.
Also convert the new functions to deal with MACs in form of char array instead of u64.

Actual functions moved:
mlx4_replace_mac
mlx4_get_eth_qp
mlx4_put_eth_qp

To conduct this change, some functionality had to be exported from the core,
the following functions were added:
mlx4_get_base_qp
__mlx4_replace_mac (low level function for CX1/A0 compatibility)

Signed-off-by: Yan Burman <yanb@mellanox.com>
Signed-off-by: Amir Vadai <amirv@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 216 +++++++++++++++++++++++--
 drivers/net/ethernet/mellanox/mlx4/main.c      |   5 +-
 drivers/net/ethernet/mellanox/mlx4/mlx4.h      |   7 -
 drivers/net/ethernet/mellanox/mlx4/mlx4_en.h   |   6 +
 drivers/net/ethernet/mellanox/mlx4/port.c      | 193 +---------------------
 include/linux/mlx4/device.h                    |   5 +-
 6 files changed, 224 insertions(+), 208 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 0843dd793aa7..63a1ef348387 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -420,6 +420,206 @@ static void mlx4_en_u64_to_mac(unsigned char dst_mac[ETH_ALEN + 2], u64 src_mac)
 	memset(&dst_mac[ETH_ALEN], 0, 2);
 }
 
+static int mlx4_en_uc_steer_add(struct mlx4_en_priv *priv,
+				unsigned char *mac, int *qpn, u64 *reg_id)
+{
+	struct mlx4_en_dev *mdev = priv->mdev;
+	struct mlx4_dev *dev = mdev->dev;
+	int err;
+
+	switch (dev->caps.steering_mode) {
+	case MLX4_STEERING_MODE_B0: {
+		struct mlx4_qp qp;
+		u8 gid[16] = {0};
+
+		qp.qpn = *qpn;
+		memcpy(&gid[10], mac, ETH_ALEN);
+		gid[5] = priv->port;
+
+		err = mlx4_unicast_attach(dev, &qp, gid, 0, MLX4_PROT_ETH);
+		break;
+	}
+	case MLX4_STEERING_MODE_DEVICE_MANAGED: {
+		struct mlx4_spec_list spec_eth = { {NULL} };
+		__be64 mac_mask = cpu_to_be64(MLX4_MAC_MASK << 16);
+
+		struct mlx4_net_trans_rule rule = {
+			.queue_mode = MLX4_NET_TRANS_Q_FIFO,
+			.exclusive = 0,
+			.allow_loopback = 1,
+			.promisc_mode = MLX4_FS_PROMISC_NONE,
+			.priority = MLX4_DOMAIN_NIC,
+		};
+
+		rule.port = priv->port;
+		rule.qpn = *qpn;
+		INIT_LIST_HEAD(&rule.list);
+
+		spec_eth.id = MLX4_NET_TRANS_RULE_ID_ETH;
+		memcpy(spec_eth.eth.dst_mac, mac, ETH_ALEN);
+		memcpy(spec_eth.eth.dst_mac_msk, &mac_mask, ETH_ALEN);
+		list_add_tail(&spec_eth.list, &rule.list);
+
+		err = mlx4_flow_attach(dev, &rule, reg_id);
+		break;
+	}
+	default:
+		return -EINVAL;
+	}
+	if (err)
+		en_warn(priv, "Failed Attaching Unicast\n");
+
+	return err;
+}
+
+static void mlx4_en_uc_steer_release(struct mlx4_en_priv *priv,
+				     unsigned char *mac, int qpn, u64 reg_id)
+{
+	struct mlx4_en_dev *mdev = priv->mdev;
+	struct mlx4_dev *dev = mdev->dev;
+
+	switch (dev->caps.steering_mode) {
+	case MLX4_STEERING_MODE_B0: {
+		struct mlx4_qp qp;
+		u8 gid[16] = {0};
+
+		qp.qpn = qpn;
+		memcpy(&gid[10], mac, ETH_ALEN);
+		gid[5] = priv->port;
+
+		mlx4_unicast_detach(dev, &qp, gid, MLX4_PROT_ETH);
+		break;
+	}
+	case MLX4_STEERING_MODE_DEVICE_MANAGED: {
+		mlx4_flow_detach(dev, reg_id);
+		break;
+	}
+	default:
+		en_err(priv, "Invalid steering mode.\n");
+	}
+}
+
+static int mlx4_en_get_qp(struct mlx4_en_priv *priv)
+{
+	struct mlx4_en_dev *mdev = priv->mdev;
+	struct mlx4_dev *dev = mdev->dev;
+	struct mlx4_mac_entry *entry;
+	int index = 0;
+	int err = 0;
+	u64 reg_id;
+	int *qpn = &priv->base_qpn;
+	u64 mac = mlx4_en_mac_to_u64(priv->dev->dev_addr);
+
+	en_dbg(DRV, priv, "Registering MAC: %pM for adding\n",
+	       priv->dev->dev_addr);
+	index = mlx4_register_mac(dev, priv->port, mac);
+	if (index < 0) {
+		err = index;
+		en_err(priv, "Failed adding MAC: %pM\n",
+		       priv->dev->dev_addr);
+		return err;
+	}
+
+	if (dev->caps.steering_mode == MLX4_STEERING_MODE_A0) {
+		int base_qpn = mlx4_get_base_qpn(dev, priv->port);
+		*qpn = base_qpn + index;
+		return 0;
+	}
+
+	err = mlx4_qp_reserve_range(dev, 1, 1, qpn);
+	en_dbg(DRV, priv, "Reserved qp %d\n", *qpn);
+	if (err) {
+		en_err(priv, "Failed to reserve qp for mac registration\n");
+		goto qp_err;
+	}
+
+	err = mlx4_en_uc_steer_add(priv, priv->dev->dev_addr, qpn, &reg_id);
+	if (err)
+		goto steer_err;
+
+	entry = kmalloc(sizeof(*entry), GFP_KERNEL);
+	if (!entry) {
+		err = -ENOMEM;
+		goto alloc_err;
+	}
+	memcpy(entry->mac, priv->dev->dev_addr, sizeof(entry->mac));
+	entry->reg_id = reg_id;
+
+	err = radix_tree_insert(&priv->mac_tree, *qpn, entry);
+	if (err)
+		goto insert_err;
+	return 0;
+
+insert_err:
+	kfree(entry);
+
+alloc_err:
+	mlx4_en_uc_steer_release(priv, priv->dev->dev_addr, *qpn, reg_id);
+
+steer_err:
+	mlx4_qp_release_range(dev, *qpn, 1);
+
+qp_err:
+	mlx4_unregister_mac(dev, priv->port, mac);
+	return err;
+}
+
+static void mlx4_en_put_qp(struct mlx4_en_priv *priv)
+{
+	struct mlx4_en_dev *mdev = priv->mdev;
+	struct mlx4_dev *dev = mdev->dev;
+	struct mlx4_mac_entry *entry;
+	int qpn = priv->base_qpn;
+	u64 mac = mlx4_en_mac_to_u64(priv->dev->dev_addr);
+
+	en_dbg(DRV, priv, "Registering MAC: %pM for deleting\n",
+	       priv->dev->dev_addr);
+	mlx4_unregister_mac(dev, priv->port, mac);
+
+	if (dev->caps.steering_mode != MLX4_STEERING_MODE_A0) {
+		entry = radix_tree_lookup(&priv->mac_tree, qpn);
+		if (entry) {
+			en_dbg(DRV, priv, "Releasing qp: port %d, MAC %pM, qpn %d\n",
+			       priv->port, entry->mac, qpn);
+			mlx4_en_uc_steer_release(priv, entry->mac,
+						 qpn, entry->reg_id);
+			mlx4_qp_release_range(dev, qpn, 1);
+			radix_tree_delete(&priv->mac_tree, qpn);
+			kfree(entry);
+		}
+	}
+}
+
+static int mlx4_en_replace_mac(struct mlx4_en_priv *priv, int qpn,
+			       unsigned char *new_mac)
+{
+	struct mlx4_en_dev *mdev = priv->mdev;
+	struct mlx4_dev *dev = mdev->dev;
+	struct mlx4_mac_entry *entry;
+	int err = 0;
+	u64 new_mac_u64 = mlx4_en_mac_to_u64(new_mac);
+
+	if (dev->caps.steering_mode != MLX4_STEERING_MODE_A0) {
+		u64 prev_mac_u64;
+
+		entry = radix_tree_lookup(&priv->mac_tree, qpn);
+		if (!entry)
+			return -EINVAL;
+		prev_mac_u64 = mlx4_en_mac_to_u64(entry->mac);
+		mlx4_en_uc_steer_release(priv, entry->mac,
+					 qpn, entry->reg_id);
+		mlx4_unregister_mac(dev, priv->port, prev_mac_u64);
+		memcpy(entry->mac, new_mac, ETH_ALEN);
+		entry->reg_id = 0;
+		mlx4_register_mac(dev, priv->port, new_mac_u64);
+		err = mlx4_en_uc_steer_add(priv, new_mac,
+					   &qpn, &entry->reg_id);
+		return err;
+	}
+
+	return __mlx4_replace_mac(dev, priv->port, qpn, new_mac_u64);
+}
+
 u64 mlx4_en_mac_to_u64(u8 *addr)
 {
 	u64 mac = 0;
@@ -456,9 +656,8 @@ static void mlx4_en_do_set_mac(struct work_struct *work)
 	mutex_lock(&mdev->state_lock);
 	if (priv->port_up) {
 		/* Remove old MAC and insert the new one */
-		u64 mac = mlx4_en_mac_to_u64(priv->dev->dev_addr);
-		err = mlx4_replace_mac(mdev->dev, priv->port,
-				       priv->base_qpn, mac);
+		err = mlx4_en_replace_mac(priv, priv->base_qpn,
+					  priv->dev->dev_addr);
 		if (err)
 			en_err(priv, "Failed changing HW MAC address\n");
 		memcpy(priv->prev_mac, priv->dev->dev_addr,
@@ -1035,7 +1234,6 @@ int mlx4_en_start_port(struct net_device *dev)
 	int i;
 	int j;
 	u8 mc_list[16] = {0};
-	u64 mac = mlx4_en_mac_to_u64(dev->dev_addr);
 
 	if (priv->port_up) {
 		en_dbg(DRV, priv, "start port called while port already up\n");
@@ -1082,8 +1280,7 @@ int mlx4_en_start_port(struct net_device *dev)
 
 	/* Set qp number */
 	en_dbg(DRV, priv, "Getting qp number for port %d\n", priv->port);
-	err = mlx4_get_eth_qp(mdev->dev, priv->port,
-			      mac, &priv->base_qpn);
+	err = mlx4_en_get_qp(priv);
 	if (err) {
 		en_err(priv, "Failed getting eth qp\n");
 		goto cq_err;
@@ -1196,7 +1393,7 @@ tx_err:
 rss_err:
 	mlx4_en_release_rss_steer(priv);
 mac_err:
-	mlx4_put_eth_qp(mdev->dev, priv->port, mac, priv->base_qpn);
+	mlx4_en_put_qp(priv);
 cq_err:
 	while (rx_index--)
 		mlx4_en_deactivate_cq(priv, &priv->rx_cq[rx_index]);
@@ -1215,7 +1412,6 @@ void mlx4_en_stop_port(struct net_device *dev, int detach)
 	struct ethtool_flow_id *flow, *tmp_flow;
 	int i;
 	u8 mc_list[16] = {0};
-	u64 mac = mlx4_en_mac_to_u64(dev->dev_addr);
 
 	if (!priv->port_up) {
 		en_dbg(DRV, priv, "stop port called while port already down\n");
@@ -1296,7 +1492,7 @@ void mlx4_en_stop_port(struct net_device *dev, int detach)
 	mlx4_en_release_rss_steer(priv);
 
 	/* Unregister Mac address for the port */
-	mlx4_put_eth_qp(mdev->dev, priv->port, mac, priv->base_qpn);
+	mlx4_en_put_qp(priv);
 	if (!(mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAGS2_REASSIGN_MAC_EN))
 		mdev->mac_removed[priv->port] = 1;
 
@@ -1661,6 +1857,8 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
 		dev->dcbnl_ops = &mlx4_en_dcbnl_ops;
 #endif
 
+	INIT_RADIX_TREE(&priv->mac_tree, GFP_KERNEL);
+
 	/* Query for default mac and max mtu */
 	priv->max_mtu = mdev->dev->caps.eth_mtu_cap[priv->port];
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 12ddae6efce3..2d7b9377883a 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -1833,12 +1833,9 @@ static int mlx4_init_port_info(struct mlx4_dev *dev, int port)
 	info->dev = dev;
 	info->port = port;
 	if (!mlx4_is_slave(dev)) {
-		INIT_RADIX_TREE(&info->mac_tree, GFP_KERNEL);
 		mlx4_init_mac_table(dev, &info->mac_table);
 		mlx4_init_vlan_table(dev, &info->vlan_table);
-		info->base_qpn =
-			dev->caps.reserved_qps_base[MLX4_QP_REGION_ETH_ADDR] +
-			(port - 1) * (1 << log_num_mac);
+		info->base_qpn = mlx4_get_base_qpn(dev, port);
 	}
 
 	sprintf(info->dev_name, "mlx4_port%d", port);
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index 172daaa29a9e..ed4a6959e828 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -653,11 +653,6 @@ struct mlx4_set_port_rqp_calc_context {
 	__be32 mcast;
 };
 
-struct mlx4_mac_entry {
-	u64 mac;
-	u64 reg_id;
-};
-
 struct mlx4_port_info {
 	struct mlx4_dev	       *dev;
 	int			port;
@@ -667,7 +662,6 @@ struct mlx4_port_info {
 	char			dev_mtu_name[16];
 	struct device_attribute port_mtu_attr;
 	struct mlx4_mac_table	mac_table;
-	struct radix_tree_root	mac_tree;
 	struct mlx4_vlan_table	vlan_table;
 	int			base_qpn;
 };
@@ -916,7 +910,6 @@ int __mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align,
 void __mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt);
 int __mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac);
 void __mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, u64 mac);
-int __mlx4_replace_mac(struct mlx4_dev *dev, u8 port, int qpn, u64 new_mac);
 int __mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
 		     int start_index, int npages, u64 *page_list);
 int __mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx);
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index 9d0105d3eaf3..84ed328582ac 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -521,6 +521,7 @@ struct mlx4_en_priv {
 	bool wol;
 	struct device *ddev;
 	int base_tx_qpn;
+	struct radix_tree_root mac_tree;
 
 #ifdef CONFIG_MLX4_EN_DCB
 	struct ieee_ets ets;
@@ -540,6 +541,11 @@ enum mlx4_en_wol {
 	MLX4_EN_WOL_ENABLED = (1ULL << 62),
 };
 
+struct mlx4_mac_entry {
+	unsigned char mac[ETH_ALEN + 2];
+	u64 reg_id;
+};
+
 #define MLX4_EN_WOL_DO_MODIFY (1ULL << 63)
 
 void mlx4_en_update_loopback_state(struct net_device *dev,
diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c
index 4c51b05efa28..719ead15e491 100644
--- a/drivers/net/ethernet/mellanox/mlx4/port.c
+++ b/drivers/net/ethernet/mellanox/mlx4/port.c
@@ -74,87 +74,6 @@ void mlx4_init_vlan_table(struct mlx4_dev *dev, struct mlx4_vlan_table *table)
 	table->total = 0;
 }
 
-static int mlx4_uc_steer_add(struct mlx4_dev *dev, u8 port,
-			     u64 mac, int *qpn, u64 *reg_id)
-{
-	__be64 be_mac;
-	int err;
-
-	mac &= MLX4_MAC_MASK;
-	be_mac = cpu_to_be64(mac << 16);
-
-	switch (dev->caps.steering_mode) {
-	case MLX4_STEERING_MODE_B0: {
-		struct mlx4_qp qp;
-		u8 gid[16] = {0};
-
-		qp.qpn = *qpn;
-		memcpy(&gid[10], &be_mac, ETH_ALEN);
-		gid[5] = port;
-
-		err = mlx4_unicast_attach(dev, &qp, gid, 0, MLX4_PROT_ETH);
-		break;
-	}
-	case MLX4_STEERING_MODE_DEVICE_MANAGED: {
-		struct mlx4_spec_list spec_eth = { {NULL} };
-		__be64 mac_mask = cpu_to_be64(MLX4_MAC_MASK << 16);
-
-		struct mlx4_net_trans_rule rule = {
-			.queue_mode = MLX4_NET_TRANS_Q_FIFO,
-			.exclusive = 0,
-			.allow_loopback = 1,
-			.promisc_mode = MLX4_FS_PROMISC_NONE,
-			.priority = MLX4_DOMAIN_NIC,
-		};
-
-		rule.port = port;
-		rule.qpn = *qpn;
-		INIT_LIST_HEAD(&rule.list);
-
-		spec_eth.id = MLX4_NET_TRANS_RULE_ID_ETH;
-		memcpy(spec_eth.eth.dst_mac, &be_mac, ETH_ALEN);
-		memcpy(spec_eth.eth.dst_mac_msk, &mac_mask, ETH_ALEN);
-		list_add_tail(&spec_eth.list, &rule.list);
-
-		err = mlx4_flow_attach(dev, &rule, reg_id);
-		break;
-	}
-	default:
-		return -EINVAL;
-	}
-	if (err)
-		mlx4_warn(dev, "Failed Attaching Unicast\n");
-
-	return err;
-}
-
-static void mlx4_uc_steer_release(struct mlx4_dev *dev, u8 port,
-				  u64 mac, int qpn, u64 reg_id)
-{
-	switch (dev->caps.steering_mode) {
-	case MLX4_STEERING_MODE_B0: {
-		struct mlx4_qp qp;
-		u8 gid[16] = {0};
-		__be64 be_mac;
-
-		qp.qpn = qpn;
-		mac &= MLX4_MAC_MASK;
-		be_mac = cpu_to_be64(mac << 16);
-		memcpy(&gid[10], &be_mac, ETH_ALEN);
-		gid[5] = port;
-
-		mlx4_unicast_detach(dev, &qp, gid, MLX4_PROT_ETH);
-		break;
-	}
-	case MLX4_STEERING_MODE_DEVICE_MANAGED: {
-		mlx4_flow_detach(dev, reg_id);
-		break;
-	}
-	default:
-		mlx4_err(dev, "Invalid steering mode.\n");
-	}
-}
-
 static int validate_index(struct mlx4_dev *dev,
 			  struct mlx4_mac_table *table, int index)
 {
@@ -181,92 +100,6 @@ static int find_index(struct mlx4_dev *dev,
 	return -EINVAL;
 }
 
-int mlx4_get_eth_qp(struct mlx4_dev *dev, u8 port, u64 mac, int *qpn)
-{
-	struct mlx4_port_info *info = &mlx4_priv(dev)->port[port];
-	struct mlx4_mac_entry *entry;
-	int index = 0;
-	int err = 0;
-	u64 reg_id;
-
-	mlx4_dbg(dev, "Registering MAC: 0x%llx for adding\n",
-			(unsigned long long) mac);
-	index = mlx4_register_mac(dev, port, mac);
-	if (index < 0) {
-		err = index;
-		mlx4_err(dev, "Failed adding MAC: 0x%llx\n",
-			 (unsigned long long) mac);
-		return err;
-	}
-
-	if (dev->caps.steering_mode == MLX4_STEERING_MODE_A0) {
-		*qpn = info->base_qpn + index;
-		return 0;
-	}
-
-	err = mlx4_qp_reserve_range(dev, 1, 1, qpn);
-	mlx4_dbg(dev, "Reserved qp %d\n", *qpn);
-	if (err) {
-		mlx4_err(dev, "Failed to reserve qp for mac registration\n");
-		goto qp_err;
-	}
-
-	err = mlx4_uc_steer_add(dev, port, mac, qpn, &reg_id);
-	if (err)
-		goto steer_err;
-
-	entry = kmalloc(sizeof *entry, GFP_KERNEL);
-	if (!entry) {
-		err = -ENOMEM;
-		goto alloc_err;
-	}
-	entry->mac = mac;
-	entry->reg_id = reg_id;
-	err = radix_tree_insert(&info->mac_tree, *qpn, entry);
-	if (err)
-		goto insert_err;
-	return 0;
-
-insert_err:
-	kfree(entry);
-
-alloc_err:
-	mlx4_uc_steer_release(dev, port, mac, *qpn, reg_id);
-
-steer_err:
-	mlx4_qp_release_range(dev, *qpn, 1);
-
-qp_err:
-	mlx4_unregister_mac(dev, port, mac);
-	return err;
-}
-EXPORT_SYMBOL_GPL(mlx4_get_eth_qp);
-
-void mlx4_put_eth_qp(struct mlx4_dev *dev, u8 port, u64 mac, int qpn)
-{
-	struct mlx4_port_info *info = &mlx4_priv(dev)->port[port];
-	struct mlx4_mac_entry *entry;
-
-	mlx4_dbg(dev, "Registering MAC: 0x%llx for deleting\n",
-		 (unsigned long long) mac);
-	mlx4_unregister_mac(dev, port, mac);
-
-	if (dev->caps.steering_mode != MLX4_STEERING_MODE_A0) {
-		entry = radix_tree_lookup(&info->mac_tree, qpn);
-		if (entry) {
-			mlx4_dbg(dev, "Releasing qp: port %d, mac 0x%llx,"
-				 " qpn %d\n", port,
-				 (unsigned long long) mac, qpn);
-			mlx4_uc_steer_release(dev, port, entry->mac,
-					      qpn, entry->reg_id);
-			mlx4_qp_release_range(dev, qpn, 1);
-			radix_tree_delete(&info->mac_tree, qpn);
-			kfree(entry);
-		}
-	}
-}
-EXPORT_SYMBOL_GPL(mlx4_put_eth_qp);
-
 static int mlx4_set_port_mac_table(struct mlx4_dev *dev, u8 port,
 				   __be64 *entries)
 {
@@ -359,6 +192,12 @@ int mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac)
 }
 EXPORT_SYMBOL_GPL(mlx4_register_mac);
 
+int mlx4_get_base_qpn(struct mlx4_dev *dev, u8 port)
+{
+	return dev->caps.reserved_qps_base[MLX4_QP_REGION_ETH_ADDR] +
+			(port - 1) * (1 << dev->caps.log_num_macs);
+}
+EXPORT_SYMBOL_GPL(mlx4_get_base_qpn);
 
 void __mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, u64 mac)
 {
@@ -397,29 +236,13 @@ void mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, u64 mac)
 }
 EXPORT_SYMBOL_GPL(mlx4_unregister_mac);
 
-int mlx4_replace_mac(struct mlx4_dev *dev, u8 port, int qpn, u64 new_mac)
+int __mlx4_replace_mac(struct mlx4_dev *dev, u8 port, int qpn, u64 new_mac)
 {
 	struct mlx4_port_info *info = &mlx4_priv(dev)->port[port];
 	struct mlx4_mac_table *table = &info->mac_table;
-	struct mlx4_mac_entry *entry;
 	int index = qpn - info->base_qpn;
 	int err = 0;
 
-	if (dev->caps.steering_mode != MLX4_STEERING_MODE_A0) {
-		entry = radix_tree_lookup(&info->mac_tree, qpn);
-		if (!entry)
-			return -EINVAL;
-		mlx4_uc_steer_release(dev, port, entry->mac,
-				      qpn, entry->reg_id);
-		mlx4_unregister_mac(dev, port, entry->mac);
-		entry->mac = new_mac;
-		entry->reg_id = 0;
-		mlx4_register_mac(dev, port, new_mac);
-		err = mlx4_uc_steer_add(dev, port, entry->mac,
-					&qpn, &entry->reg_id);
-		return err;
-	}
-
 	/* CX1 doesn't support multi-functions */
 	mutex_lock(&table->mutex);
 
@@ -439,7 +262,7 @@ out:
 	mutex_unlock(&table->mutex);
 	return err;
 }
-EXPORT_SYMBOL_GPL(mlx4_replace_mac);
+EXPORT_SYMBOL_GPL(__mlx4_replace_mac);
 
 static int mlx4_set_port_vlan_table(struct mlx4_dev *dev, u8 port,
 				    __be32 *entries)
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 1883e8e84718..6d48fce06b4a 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -956,9 +956,8 @@ int mlx4_SET_MCAST_FLTR(struct mlx4_dev *dev, u8 port, u64 mac, u64 clear, u8 mo
 
 int mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac);
 void mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, u64 mac);
-int mlx4_replace_mac(struct mlx4_dev *dev, u8 port, int qpn, u64 new_mac);
-int mlx4_get_eth_qp(struct mlx4_dev *dev, u8 port, u64 mac, int *qpn);
-void mlx4_put_eth_qp(struct mlx4_dev *dev, u8 port, u64 mac, int qpn);
+int mlx4_get_base_qpn(struct mlx4_dev *dev, u8 port);
+int __mlx4_replace_mac(struct mlx4_dev *dev, u8 port, int qpn, u64 new_mac);
 void mlx4_set_stats_bitmap(struct mlx4_dev *dev, u64 *stats_bitmap);
 int mlx4_SET_PORT_general(struct mlx4_dev *dev, u8 port, int mtu,
 			  u8 pptx, u8 pfctx, u8 pprx, u8 pfcrx);
-- 
cgit v1.2.3


From 180996c30517b5374f63df3c9765716c5b477155 Mon Sep 17 00:00:00 2001
From: Hauke Mehrtens <hauke@hauke-m.de>
Date: Thu, 7 Feb 2013 05:37:37 +0000
Subject: ssb: get mac address from sprom struct for gige driver

The mac address is already stored in the sprom structure by the
platform code of the SoC this Ethernet core is found on, it just has to
be fetched from this structure instead of accessing the nvram here.
This patch also adds a return value to indicate if a mac address could
be fetched from the sprom structure.
When CONFIG_SSB_DRIVER_GIGE is not set the header file now also declares
ssb_gige_get_macaddr().

Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
Acked-by: Michael Buesch <m@bues.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ssb/ssb_driver_gige.h | 23 +++++++++++------------
 1 file changed, 11 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ssb/ssb_driver_gige.h b/include/linux/ssb/ssb_driver_gige.h
index 6b05dcd927ff..86a12b0cb239 100644
--- a/include/linux/ssb/ssb_driver_gige.h
+++ b/include/linux/ssb/ssb_driver_gige.h
@@ -97,21 +97,16 @@ static inline bool ssb_gige_must_flush_posted_writes(struct pci_dev *pdev)
 	return 0;
 }
 
-#ifdef CONFIG_BCM47XX
-#include <asm/mach-bcm47xx/nvram.h>
 /* Get the device MAC address */
-static inline void ssb_gige_get_macaddr(struct pci_dev *pdev, u8 *macaddr)
-{
-	char buf[20];
-	if (nvram_getenv("et0macaddr", buf, sizeof(buf)) < 0)
-		return;
-	nvram_parse_macaddr(buf, macaddr);
-}
-#else
-static inline void ssb_gige_get_macaddr(struct pci_dev *pdev, u8 *macaddr)
+static inline int ssb_gige_get_macaddr(struct pci_dev *pdev, u8 *macaddr)
 {
+	struct ssb_gige *dev = pdev_to_ssb_gige(pdev);
+	if (!dev)
+		return -ENODEV;
+
+	memcpy(macaddr, dev->dev->bus->sprom.et0mac, 6);
+	return 0;
 }
-#endif
 
 extern int ssb_gige_pcibios_plat_dev_init(struct ssb_device *sdev,
 					  struct pci_dev *pdev);
@@ -175,6 +170,10 @@ static inline bool ssb_gige_must_flush_posted_writes(struct pci_dev *pdev)
 {
 	return 0;
 }
+static inline int ssb_gige_get_macaddr(struct pci_dev *pdev, u8 *macaddr)
+{
+	return -ENODEV;
+}
 
 #endif /* CONFIG_SSB_DRIVER_GIGE */
 #endif /* LINUX_SSB_DRIVER_GIGE_H_ */
-- 
cgit v1.2.3


From 7e6c63f03d94278135753fef7ffcc5b03e34282e Mon Sep 17 00:00:00 2001
From: Hauke Mehrtens <hauke@hauke-m.de>
Date: Thu, 7 Feb 2013 05:37:39 +0000
Subject: tg3: add support for Ethernet core in bcm4785

The BCM4785 or sometimes named BMC4705 is a Broadcom SoC which a
Gigabit 5750 Ethernet core. The core is connected via PCI with the rest
of the SoC, but it uses some extension.

This core does not use a firmware or an eeprom.

Some devices only have a switch which supports 100MBit/s, this
currently does not work with this driver.

This patch was original written by Michael Buesch <m@bues.ch> and is in
OpenWrt for some years now.

This was tested on a Linksys WRT610N V1 and older versions of this patch
were tested by other people on different devices.

Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
Acked-by: Michael Chan <mchan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/tg3.c | 117 ++++++++++++++++++++++++++++++++++--
 drivers/net/ethernet/broadcom/tg3.h |   5 ++
 include/linux/pci_ids.h             |   1 +
 3 files changed, 117 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index d9e81d6be7b9..b1b3bc01cbc2 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -44,6 +44,7 @@
 #include <linux/prefetch.h>
 #include <linux/dma-mapping.h>
 #include <linux/firmware.h>
+#include <linux/ssb/ssb_driver_gige.h>
 #include <linux/hwmon.h>
 #include <linux/hwmon-sysfs.h>
 
@@ -263,6 +264,7 @@ static DEFINE_PCI_DEVICE_TABLE(tg3_pci_tbl) = {
 			TG3_DRV_DATA_FLAG_5705_10_100},
 	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5721)},
 	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5722)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5750)},
 	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5751)},
 	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5751M)},
 	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5751F),
@@ -573,7 +575,9 @@ static void _tw32_flush(struct tg3 *tp, u32 off, u32 val, u32 usec_wait)
 static inline void tw32_mailbox_flush(struct tg3 *tp, u32 off, u32 val)
 {
 	tp->write32_mbox(tp, off, val);
-	if (!tg3_flag(tp, MBOX_WRITE_REORDER) && !tg3_flag(tp, ICH_WORKAROUND))
+	if (tg3_flag(tp, FLUSH_POSTED_WRITES) ||
+	    (!tg3_flag(tp, MBOX_WRITE_REORDER) &&
+	     !tg3_flag(tp, ICH_WORKAROUND)))
 		tp->read32_mbox(tp, off);
 }
 
@@ -583,7 +587,8 @@ static void tg3_write32_tx_mbox(struct tg3 *tp, u32 off, u32 val)
 	writel(val, mbox);
 	if (tg3_flag(tp, TXD_MBOX_HWBUG))
 		writel(val, mbox);
-	if (tg3_flag(tp, MBOX_WRITE_REORDER))
+	if (tg3_flag(tp, MBOX_WRITE_REORDER) ||
+	    tg3_flag(tp, FLUSH_POSTED_WRITES))
 		readl(mbox);
 }
 
@@ -1793,6 +1798,11 @@ static int tg3_poll_fw(struct tg3 *tp)
 	int i;
 	u32 val;
 
+	if (tg3_flag(tp, IS_SSB_CORE)) {
+		/* We don't use firmware. */
+		return 0;
+	}
+
 	if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5906) {
 		/* Wait up to 20ms for init done. */
 		for (i = 0; i < 200; i++) {
@@ -3465,6 +3475,13 @@ static int tg3_halt_cpu(struct tg3 *tp, u32 offset)
 		tw32_f(offset + CPU_MODE,  CPU_MODE_HALT);
 		udelay(10);
 	} else {
+		/*
+		 * There is only an Rx CPU for the 5750 derivative in the
+		 * BCM4785.
+		 */
+		if (tg3_flag(tp, IS_SSB_CORE))
+			return 0;
+
 		for (i = 0; i < 10000; i++) {
 			tw32(offset + CPU_STATE, 0xffffffff);
 			tw32(offset + CPU_MODE,  CPU_MODE_HALT);
@@ -3932,8 +3949,9 @@ static int tg3_power_down_prepare(struct tg3 *tp)
 	tg3_frob_aux_power(tp, true);
 
 	/* Workaround for unstable PLL clock */
-	if ((GET_CHIP_REV(tp->pci_chip_rev_id) == CHIPREV_5750_AX) ||
-	    (GET_CHIP_REV(tp->pci_chip_rev_id) == CHIPREV_5750_BX)) {
+	if ((!tg3_flag(tp, IS_SSB_CORE)) &&
+	    ((GET_CHIP_REV(tp->pci_chip_rev_id) == CHIPREV_5750_AX) ||
+	     (GET_CHIP_REV(tp->pci_chip_rev_id) == CHIPREV_5750_BX))) {
 		u32 val = tr32(0x7d00);
 
 		val &= ~((1 << 16) | (1 << 4) | (1 << 2) | (1 << 1) | 1);
@@ -4454,6 +4472,15 @@ relink:
 	if (current_link_up == 0 || (tp->phy_flags & TG3_PHYFLG_IS_LOW_POWER)) {
 		tg3_phy_copper_begin(tp);
 
+		if (tg3_flag(tp, ROBOSWITCH)) {
+			current_link_up = 1;
+			/* FIXME: when BCM5325 switch is used use 100 MBit/s */
+			current_speed = SPEED_1000;
+			current_duplex = DUPLEX_FULL;
+			tp->link_config.active_speed = current_speed;
+			tp->link_config.active_duplex = current_duplex;
+		}
+
 		tg3_readphy(tp, MII_BMSR, &bmsr);
 		if ((!tg3_readphy(tp, MII_BMSR, &bmsr) && (bmsr & BMSR_LSTATUS)) ||
 		    (tp->mac_mode & MAC_MODE_PORT_INT_LPBACK))
@@ -4472,6 +4499,26 @@ relink:
 	else
 		tp->mac_mode |= MAC_MODE_PORT_MODE_GMII;
 
+	/* In order for the 5750 core in BCM4785 chip to work properly
+	 * in RGMII mode, the Led Control Register must be set up.
+	 */
+	if (tg3_flag(tp, RGMII_MODE)) {
+		u32 led_ctrl = tr32(MAC_LED_CTRL);
+		led_ctrl &= ~(LED_CTRL_1000MBPS_ON | LED_CTRL_100MBPS_ON);
+
+		if (tp->link_config.active_speed == SPEED_10)
+			led_ctrl |= LED_CTRL_LNKLED_OVERRIDE;
+		else if (tp->link_config.active_speed == SPEED_100)
+			led_ctrl |= (LED_CTRL_LNKLED_OVERRIDE |
+				     LED_CTRL_100MBPS_ON);
+		else if (tp->link_config.active_speed == SPEED_1000)
+			led_ctrl |= (LED_CTRL_LNKLED_OVERRIDE |
+				     LED_CTRL_1000MBPS_ON);
+
+		tw32(MAC_LED_CTRL, led_ctrl);
+		udelay(40);
+	}
+
 	tp->mac_mode &= ~MAC_MODE_HALF_DUPLEX;
 	if (tp->link_config.active_duplex == DUPLEX_HALF)
 		tp->mac_mode |= MAC_MODE_HALF_DUPLEX;
@@ -8449,6 +8496,16 @@ static int tg3_chip_reset(struct tg3 *tp)
 		tw32(0x5000, 0x400);
 	}
 
+	if (tg3_flag(tp, IS_SSB_CORE)) {
+		/*
+		 * BCM4785: In order to avoid repercussions from using
+		 * potentially defective internal ROM, stop the Rx RISC CPU,
+		 * which is not required.
+		 */
+		tg3_stop_fw(tp);
+		tg3_halt_cpu(tp, RX_CPU_BASE);
+	}
+
 	tw32(GRC_MODE, tp->grc_mode);
 
 	if (tp->pci_chip_rev_id == CHIPREV_ID_5705_A0) {
@@ -10107,6 +10164,11 @@ static void tg3_timer(unsigned long __opaque)
 	    tg3_flag(tp, 57765_CLASS))
 		tg3_chk_missed_msi(tp);
 
+	if (tg3_flag(tp, FLUSH_POSTED_WRITES)) {
+		/* BCM4785: Flush posted writes from GbE to host memory. */
+		tr32(HOSTCC_MODE);
+	}
+
 	if (!tg3_flag(tp, TAGGED_STATUS)) {
 		/* All of this garbage is because when using non-tagged
 		 * IRQ status the mailbox/status_block protocol the chip
@@ -13879,6 +13941,14 @@ static void tg3_get_5720_nvram_info(struct tg3 *tp)
 /* Chips other than 5700/5701 use the NVRAM for fetching info. */
 static void tg3_nvram_init(struct tg3 *tp)
 {
+	if (tg3_flag(tp, IS_SSB_CORE)) {
+		/* No NVRAM and EEPROM on the SSB Broadcom GigE core. */
+		tg3_flag_clear(tp, NVRAM);
+		tg3_flag_clear(tp, NVRAM_BUFFERED);
+		tg3_flag_set(tp, NO_NVRAM);
+		return;
+	}
+
 	tw32_f(GRC_EEPROM_ADDR,
 	     (EEPROM_ADDR_FSM_RESET |
 	      (EEPROM_DEFAULT_CLOCK_PERIOD <<
@@ -14405,10 +14475,19 @@ static int tg3_phy_probe(struct tg3 *tp)
 			 * subsys device table.
 			 */
 			p = tg3_lookup_by_subsys(tp);
-			if (!p)
+			if (p) {
+				tp->phy_id = p->phy_id;
+			} else if (!tg3_flag(tp, IS_SSB_CORE)) {
+				/* For now we saw the IDs 0xbc050cd0,
+				 * 0xbc050f80 and 0xbc050c30 on devices
+				 * connected to an BCM4785 and there are
+				 * probably more. Just assume that the phy is
+				 * supported when it is connected to a SSB core
+				 * for now.
+				 */
 				return -ENODEV;
+			}
 
-			tp->phy_id = p->phy_id;
 			if (!tp->phy_id ||
 			    tp->phy_id == TG3_PHY_ID_BCM8002)
 				tp->phy_flags |= TG3_PHYFLG_PHY_SERDES;
@@ -15484,6 +15563,11 @@ static int tg3_get_invariants(struct tg3 *tp, const struct pci_device_id *ent)
 				     TG3_CPMU_STATUS_FSHFT_5719;
 	}
 
+	if (tg3_flag(tp, FLUSH_POSTED_WRITES)) {
+		tp->write32_tx_mbox = tg3_write_flush_reg32;
+		tp->write32_rx_mbox = tg3_write_flush_reg32;
+	}
+
 	/* Get eeprom hw config before calling tg3_set_power_state().
 	 * In particular, the TG3_FLAG_IS_NIC flag must be
 	 * determined before calling tg3_set_power_state() so that
@@ -15820,12 +15904,19 @@ static int tg3_get_device_address(struct tg3 *tp)
 	struct net_device *dev = tp->dev;
 	u32 hi, lo, mac_offset;
 	int addr_ok = 0;
+	int err;
 
 #ifdef CONFIG_SPARC
 	if (!tg3_get_macaddr_sparc(tp))
 		return 0;
 #endif
 
+	if (tg3_flag(tp, IS_SSB_CORE)) {
+		err = ssb_gige_get_macaddr(tp->pdev, &dev->dev_addr[0]);
+		if (!err && is_valid_ether_addr(&dev->dev_addr[0]))
+			return 0;
+	}
+
 	mac_offset = 0x7c;
 	if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5704 ||
 	    tg3_flag(tp, 5780_CLASS)) {
@@ -16185,6 +16276,8 @@ static int tg3_test_dma(struct tg3 *tp)
 			tp->dma_rwctrl |= 0x001b000f;
 		}
 	}
+	if (tg3_flag(tp, ONE_DMA_AT_ONCE))
+		tp->dma_rwctrl |= DMA_RWCTRL_ONE_DMA;
 
 	if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5703 ||
 	    GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5704)
@@ -16530,6 +16623,18 @@ static int tg3_init_one(struct pci_dev *pdev,
 	else
 		tp->msg_enable = TG3_DEF_MSG_ENABLE;
 
+	if (pdev_is_ssb_gige_core(pdev)) {
+		tg3_flag_set(tp, IS_SSB_CORE);
+		if (ssb_gige_must_flush_posted_writes(pdev))
+			tg3_flag_set(tp, FLUSH_POSTED_WRITES);
+		if (ssb_gige_one_dma_at_once(pdev))
+			tg3_flag_set(tp, ONE_DMA_AT_ONCE);
+		if (ssb_gige_have_roboswitch(pdev))
+			tg3_flag_set(tp, ROBOSWITCH);
+		if (ssb_gige_is_rgmii(pdev))
+			tg3_flag_set(tp, RGMII_MODE);
+	}
+
 	/* The word/byte swap controls here control register access byte
 	 * swapping.  DMA data byte swapping is controlled in the GRC_MODE
 	 * setting below.
diff --git a/drivers/net/ethernet/broadcom/tg3.h b/drivers/net/ethernet/broadcom/tg3.h
index 9cd88a4b9a5f..ef6ced2bf9c3 100644
--- a/drivers/net/ethernet/broadcom/tg3.h
+++ b/drivers/net/ethernet/broadcom/tg3.h
@@ -3056,6 +3056,11 @@ enum TG3_FLAGS {
 	TG3_FLAG_57765_PLUS,
 	TG3_FLAG_57765_CLASS,
 	TG3_FLAG_5717_PLUS,
+	TG3_FLAG_IS_SSB_CORE,
+	TG3_FLAG_FLUSH_POSTED_WRITES,
+	TG3_FLAG_ROBOSWITCH,
+	TG3_FLAG_ONE_DMA_AT_ONCE,
+	TG3_FLAG_RGMII_MODE,
 
 	/* Add new flags before this comment and TG3_FLAG_NUMBER_OF_FLAGS */
 	TG3_FLAG_NUMBER_OF_FLAGS,	/* Last entry in enum TG3_FLAGS */
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 0eb65796bcb9..907e7e56fa4b 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2127,6 +2127,7 @@
 #define PCI_DEVICE_ID_TIGON3_5754M	0x1672
 #define PCI_DEVICE_ID_TIGON3_5755M	0x1673
 #define PCI_DEVICE_ID_TIGON3_5756	0x1674
+#define PCI_DEVICE_ID_TIGON3_5750	0x1676
 #define PCI_DEVICE_ID_TIGON3_5751	0x1677
 #define PCI_DEVICE_ID_TIGON3_5715	0x1678
 #define PCI_DEVICE_ID_TIGON3_5715S	0x1679
-- 
cgit v1.2.3


From afb43e6d88e587441c960a5d214d2c698d076c9c Mon Sep 17 00:00:00 2001
From: Luciano Coelho <coelho@ti.com>
Date: Fri, 25 Jan 2013 11:57:48 +0200
Subject: wlcore: remove if_ops from platform_data

We can't pass pointers from the platform data to the modules, because
with DT it cannot be done.  Those pointers are not set by the board
files anyway.  It's the bus modules that set them, so they can be
safely removed from the platform data without changing any board
files.

Create a new structure that the bus modules pass to wlcore.  This
structure contains the if_ops pointers and a pointer to the actual
platform data.

Signed-off-by: Luciano Coelho <coelho@ti.com>
Reviewed-by: Felipe Balbi <balbi@ti.com>
---
 drivers/net/wireless/ti/wl12xx/main.c     |  3 ++-
 drivers/net/wireless/ti/wlcore/main.c     |  5 +++--
 drivers/net/wireless/ti/wlcore/sdio.c     | 22 +++++++++++++++++-----
 drivers/net/wireless/ti/wlcore/spi.c      | 20 +++++++++++++++++---
 drivers/net/wireless/ti/wlcore/wlcore_i.h |  5 +++++
 include/linux/wl12xx.h                    |  2 --
 6 files changed, 44 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/wireless/ti/wl12xx/main.c b/drivers/net/wireless/ti/wl12xx/main.c
index 3254bfc81a2a..09694e39bb14 100644
--- a/drivers/net/wireless/ti/wl12xx/main.c
+++ b/drivers/net/wireless/ti/wl12xx/main.c
@@ -1703,7 +1703,8 @@ static struct ieee80211_sta_ht_cap wl12xx_ht_cap = {
 static int wl12xx_setup(struct wl1271 *wl)
 {
 	struct wl12xx_priv *priv = wl->priv;
-	struct wl12xx_platform_data *pdata = wl->pdev->dev.platform_data;
+	struct wlcore_platdev_data *pdev_data = wl->pdev->dev.platform_data;
+	struct wl12xx_platform_data *pdata = pdev_data->pdata;
 
 	wl->rtable = wl12xx_rtable;
 	wl->num_tx_desc = WL12XX_NUM_TX_DESCRIPTORS;
diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c
index 9a66acf1205f..cd70335cd5e8 100644
--- a/drivers/net/wireless/ti/wlcore/main.c
+++ b/drivers/net/wireless/ti/wlcore/main.c
@@ -5966,7 +5966,8 @@ static void wlcore_nvs_cb(const struct firmware *fw, void *context)
 {
 	struct wl1271 *wl = context;
 	struct platform_device *pdev = wl->pdev;
-	struct wl12xx_platform_data *pdata = pdev->dev.platform_data;
+	struct wlcore_platdev_data *pdev_data = pdev->dev.platform_data;
+	struct wl12xx_platform_data *pdata = pdev_data->pdata;
 	unsigned long irqflags;
 	int ret;
 
@@ -5995,7 +5996,7 @@ static void wlcore_nvs_cb(const struct firmware *fw, void *context)
 
 	wl->irq = platform_get_irq(pdev, 0);
 	wl->platform_quirks = pdata->platform_quirks;
-	wl->if_ops = pdata->ops;
+	wl->if_ops = pdev_data->if_ops;
 
 	if (wl->platform_quirks & WL12XX_PLATFORM_QUIRK_EDGE_IRQ)
 		irqflags = IRQF_TRIGGER_RISING;
diff --git a/drivers/net/wireless/ti/wlcore/sdio.c b/drivers/net/wireless/ti/wlcore/sdio.c
index d4f184e2efed..1f6f6e30daca 100644
--- a/drivers/net/wireless/ti/wlcore/sdio.c
+++ b/drivers/net/wireless/ti/wlcore/sdio.c
@@ -218,6 +218,7 @@ static int wl1271_probe(struct sdio_func *func,
 				  const struct sdio_device_id *id)
 {
 	struct wl12xx_platform_data *wlan_data;
+	struct wlcore_platdev_data *pdev_data;
 	struct wl12xx_sdio_glue *glue;
 	struct resource res[1];
 	mmc_pm_flag_t mmcflags;
@@ -228,10 +229,18 @@ static int wl1271_probe(struct sdio_func *func,
 	if (func->num != 0x02)
 		return -ENODEV;
 
+	pdev_data = kzalloc(sizeof(*pdev_data), GFP_KERNEL);
+	if (!pdev_data) {
+		dev_err(&func->dev, "can't allocate platdev_data\n");
+		goto out;
+	}
+
+	pdev_data->if_ops = &sdio_ops;
+
 	glue = kzalloc(sizeof(*glue), GFP_KERNEL);
 	if (!glue) {
 		dev_err(&func->dev, "can't allocate glue\n");
-		goto out;
+		goto out_free_pdev_data;
 	}
 
 	glue->dev = &func->dev;
@@ -256,8 +265,6 @@ static int wl1271_probe(struct sdio_func *func,
 	if (mmcflags & MMC_PM_KEEP_POWER)
 		wlan_data->pwr_in_suspend = true;
 
-	wlan_data->ops = &sdio_ops;
-
 	sdio_set_drvdata(func, glue);
 
 	/* Tell PM core that we don't need the card to be powered now */
@@ -295,8 +302,10 @@ static int wl1271_probe(struct sdio_func *func,
 		goto out_dev_put;
 	}
 
-	ret = platform_device_add_data(glue->core, wlan_data,
-				       sizeof(*wlan_data));
+	pdev_data->pdata = wlan_data;
+
+	ret = platform_device_add_data(glue->core, pdev_data,
+				       sizeof(*pdev_data));
 	if (ret) {
 		dev_err(glue->dev, "can't add platform data\n");
 		goto out_dev_put;
@@ -315,6 +324,9 @@ out_dev_put:
 out_free_glue:
 	kfree(glue);
 
+out_free_pdev_data:
+	kfree(pdev_data);
+
 out:
 	return ret;
 }
diff --git a/drivers/net/wireless/ti/wlcore/spi.c b/drivers/net/wireless/ti/wlcore/spi.c
index 2d700b7ae14c..d437f4d28bd0 100644
--- a/drivers/net/wireless/ti/wlcore/spi.c
+++ b/drivers/net/wireless/ti/wlcore/spi.c
@@ -328,6 +328,7 @@ static int wl1271_probe(struct spi_device *spi)
 {
 	struct wl12xx_spi_glue *glue;
 	struct wl12xx_platform_data *pdata;
+	struct wlcore_platdev_data *pdev_data;
 	struct resource res[1];
 	int ret = -ENOMEM;
 
@@ -337,12 +338,18 @@ static int wl1271_probe(struct spi_device *spi)
 		return -ENODEV;
 	}
 
-	pdata->ops = &spi_ops;
+	pdev_data = kzalloc(sizeof(*pdev_data), GFP_KERNEL);
+	if (!pdev_data) {
+		dev_err(&spi->dev, "can't allocate platdev_data\n");
+		goto out;
+	}
+
+	pdev_data->if_ops = &spi_ops;
 
 	glue = kzalloc(sizeof(*glue), GFP_KERNEL);
 	if (!glue) {
 		dev_err(&spi->dev, "can't allocate glue\n");
-		goto out;
+		goto out_free_pdev_data;
 	}
 
 	glue->dev = &spi->dev;
@@ -380,7 +387,10 @@ static int wl1271_probe(struct spi_device *spi)
 		goto out_dev_put;
 	}
 
-	ret = platform_device_add_data(glue->core, pdata, sizeof(*pdata));
+	pdev_data->pdata = pdata;
+
+	ret = platform_device_add_data(glue->core, pdev_data,
+				       sizeof(*pdev_data));
 	if (ret) {
 		dev_err(glue->dev, "can't add platform data\n");
 		goto out_dev_put;
@@ -399,6 +409,10 @@ out_dev_put:
 
 out_free_glue:
 	kfree(glue);
+
+out_free_pdev_data:
+	kfree(pdev_data);
+
 out:
 	return ret;
 }
diff --git a/drivers/net/wireless/ti/wlcore/wlcore_i.h b/drivers/net/wireless/ti/wlcore/wlcore_i.h
index 20316ac328a2..c845b0ef7f4b 100644
--- a/drivers/net/wireless/ti/wlcore/wlcore_i.h
+++ b/drivers/net/wireless/ti/wlcore/wlcore_i.h
@@ -206,6 +206,11 @@ struct wl1271_if_operations {
 	void (*set_block_size) (struct device *child, unsigned int blksz);
 };
 
+struct wlcore_platdev_data {
+	struct wl12xx_platform_data *pdata;
+	struct wl1271_if_operations *if_ops;
+};
+
 #define MAX_NUM_KEYS 14
 #define MAX_KEY_SIZE 32
 
diff --git a/include/linux/wl12xx.h b/include/linux/wl12xx.h
index 0d6373195d32..360c9bce665c 100644
--- a/include/linux/wl12xx.h
+++ b/include/linux/wl12xx.h
@@ -55,8 +55,6 @@ struct wl12xx_platform_data {
 	int board_tcxo_clock;
 	unsigned long platform_quirks;
 	bool pwr_in_suspend;
-
-	struct wl1271_if_operations *ops;
 };
 
 /* Platform does not support level trigger interrupts */
-- 
cgit v1.2.3


From 6cc9efed707c575a9e5880ea68f8b9d36b235f1f Mon Sep 17 00:00:00 2001
From: Luciano Coelho <coelho@ti.com>
Date: Fri, 25 Jan 2013 12:05:34 +0200
Subject: wlcore: move wl12xx_platform_data up and make it truly optional

The platform data is used not only by wlcore-based drivers, but also
by wl1251.  Move it up in the directory hierarchy to reflect this.

Additionally, make it truly optional.  At the moment, disabling
platform data while wl1251_sdio or wlcore_sdio are enabled doesn't
work, but it will be necessary when device tree support is
implemented.

Signed-off-by: Luciano Coelho <coelho@ti.com>
Reviewed-by: Felipe Balbi <balbi@ti.com>
---
 arch/arm/mach-omap2/board-omap3evm.c               | 10 ++---
 drivers/net/wireless/ti/Kconfig                    |  9 ++++
 drivers/net/wireless/ti/Makefile                   |  4 +-
 drivers/net/wireless/ti/wilink_platform_data.c     | 49 ++++++++++++++++++++++
 drivers/net/wireless/ti/wlcore/Kconfig             |  5 ---
 drivers/net/wireless/ti/wlcore/Makefile            |  3 --
 .../net/wireless/ti/wlcore/wl12xx_platform_data.c  | 49 ----------------------
 include/linux/wl12xx.h                             | 14 +++++--
 8 files changed, 77 insertions(+), 66 deletions(-)
 create mode 100644 drivers/net/wireless/ti/wilink_platform_data.c
 delete mode 100644 drivers/net/wireless/ti/wlcore/wl12xx_platform_data.c

(limited to 'include/linux')

diff --git a/arch/arm/mach-omap2/board-omap3evm.c b/arch/arm/mach-omap2/board-omap3evm.c
index 3985f35aee06..a4ca63ba7faa 100644
--- a/arch/arm/mach-omap2/board-omap3evm.c
+++ b/arch/arm/mach-omap2/board-omap3evm.c
@@ -309,7 +309,7 @@ static struct omap2_hsmmc_info mmc[] = {
 		.gpio_wp	= 63,
 		.deferred	= true,
 	},
-#ifdef CONFIG_WL12XX_PLATFORM_DATA
+#ifdef CONFIG_WILINK_PLATFORM_DATA
 	{
 		.name		= "wl1271",
 		.mmc		= 2,
@@ -450,7 +450,7 @@ static struct regulator_init_data omap3evm_vio = {
 	.consumer_supplies	= omap3evm_vio_supply,
 };
 
-#ifdef CONFIG_WL12XX_PLATFORM_DATA
+#ifdef CONFIG_WILINK_PLATFORM_DATA
 
 #define OMAP3EVM_WLAN_PMENA_GPIO	(150)
 #define OMAP3EVM_WLAN_IRQ_GPIO		(149)
@@ -563,7 +563,7 @@ static struct omap_board_mux omap35x_board_mux[] __initdata = {
 				OMAP_PIN_OFF_NONE),
 	OMAP3_MUX(GPMC_WAIT2, OMAP_MUX_MODE4 | OMAP_PIN_INPUT_PULLUP |
 				OMAP_PIN_OFF_NONE),
-#ifdef CONFIG_WL12XX_PLATFORM_DATA
+#ifdef CONFIG_WILINK_PLATFORM_DATA
 	/* WLAN IRQ - GPIO 149 */
 	OMAP3_MUX(UART1_RTS, OMAP_MUX_MODE4 | OMAP_PIN_INPUT),
 
@@ -601,7 +601,7 @@ static struct omap_board_mux omap36x_board_mux[] __initdata = {
 	OMAP3_MUX(SYS_BOOT4, OMAP_MUX_MODE3 | OMAP_PIN_OFF_NONE),
 	OMAP3_MUX(SYS_BOOT5, OMAP_MUX_MODE3 | OMAP_PIN_OFF_NONE),
 	OMAP3_MUX(SYS_BOOT6, OMAP_MUX_MODE3 | OMAP_PIN_OFF_NONE),
-#ifdef CONFIG_WL12XX_PLATFORM_DATA
+#ifdef CONFIG_WILINK_PLATFORM_DATA
 	/* WLAN IRQ - GPIO 149 */
 	OMAP3_MUX(UART1_RTS, OMAP_MUX_MODE4 | OMAP_PIN_INPUT),
 
@@ -637,7 +637,7 @@ static struct gpio omap3_evm_ehci_gpios[] __initdata = {
 
 static void __init omap3_evm_wl12xx_init(void)
 {
-#ifdef CONFIG_WL12XX_PLATFORM_DATA
+#ifdef CONFIG_WILINK_PLATFORM_DATA
 	int ret;
 
 	/* WL12xx WLAN Init */
diff --git a/drivers/net/wireless/ti/Kconfig b/drivers/net/wireless/ti/Kconfig
index be800119d0a3..cbe1e7fef61b 100644
--- a/drivers/net/wireless/ti/Kconfig
+++ b/drivers/net/wireless/ti/Kconfig
@@ -12,4 +12,13 @@ source "drivers/net/wireless/ti/wl18xx/Kconfig"
 
 # keep last for automatic dependencies
 source "drivers/net/wireless/ti/wlcore/Kconfig"
+
+config WILINK_PLATFORM_DATA
+	bool "TI WiLink platform data"
+	depends on WLCORE_SDIO || WL1251_SDIO
+	default y
+	---help---
+	Small platform data bit needed to pass data to the sdio modules.
+
+
 endif # WL_TI
diff --git a/drivers/net/wireless/ti/Makefile b/drivers/net/wireless/ti/Makefile
index 4d6823983c04..af14231aeede 100644
--- a/drivers/net/wireless/ti/Makefile
+++ b/drivers/net/wireless/ti/Makefile
@@ -1,5 +1,7 @@
 obj-$(CONFIG_WLCORE)			+= wlcore/
 obj-$(CONFIG_WL12XX)			+= wl12xx/
-obj-$(CONFIG_WL12XX_PLATFORM_DATA)	+= wlcore/
 obj-$(CONFIG_WL1251)			+= wl1251/
 obj-$(CONFIG_WL18XX)			+= wl18xx/
+
+# small builtin driver bit
+obj-$(CONFIG_WILINK_PLATFORM_DATA)	+= wilink_platform_data.o
diff --git a/drivers/net/wireless/ti/wilink_platform_data.c b/drivers/net/wireless/ti/wilink_platform_data.c
new file mode 100644
index 000000000000..998e95895f9d
--- /dev/null
+++ b/drivers/net/wireless/ti/wilink_platform_data.c
@@ -0,0 +1,49 @@
+/*
+ * This file is part of wl12xx
+ *
+ * Copyright (C) 2010-2011 Texas Instruments, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/err.h>
+#include <linux/wl12xx.h>
+
+static struct wl12xx_platform_data *platform_data;
+
+int __init wl12xx_set_platform_data(const struct wl12xx_platform_data *data)
+{
+	if (platform_data)
+		return -EBUSY;
+	if (!data)
+		return -EINVAL;
+
+	platform_data = kmemdup(data, sizeof(*data), GFP_KERNEL);
+	if (!platform_data)
+		return -ENOMEM;
+
+	return 0;
+}
+
+struct wl12xx_platform_data *wl12xx_get_platform_data(void)
+{
+	if (!platform_data)
+		return ERR_PTR(-ENODEV);
+
+	return platform_data;
+}
+EXPORT_SYMBOL(wl12xx_get_platform_data);
diff --git a/drivers/net/wireless/ti/wlcore/Kconfig b/drivers/net/wireless/ti/wlcore/Kconfig
index d7b907e67170..2b832825c3d4 100644
--- a/drivers/net/wireless/ti/wlcore/Kconfig
+++ b/drivers/net/wireless/ti/wlcore/Kconfig
@@ -33,8 +33,3 @@ config WLCORE_SDIO
 
 	  If you choose to build a module, it'll be called wlcore_sdio.
 	  Say N if unsure.
-
-config WL12XX_PLATFORM_DATA
-	bool
-	depends on WLCORE_SDIO != n || WL1251_SDIO != n
-	default y
diff --git a/drivers/net/wireless/ti/wlcore/Makefile b/drivers/net/wireless/ti/wlcore/Makefile
index d9fba9e32130..b21398f6c3ec 100644
--- a/drivers/net/wireless/ti/wlcore/Makefile
+++ b/drivers/net/wireless/ti/wlcore/Makefile
@@ -9,7 +9,4 @@ obj-$(CONFIG_WLCORE)			+= wlcore.o
 obj-$(CONFIG_WLCORE_SPI)		+= wlcore_spi.o
 obj-$(CONFIG_WLCORE_SDIO)		+= wlcore_sdio.o
 
-# small builtin driver bit
-obj-$(CONFIG_WL12XX_PLATFORM_DATA)	+= wl12xx_platform_data.o
-
 ccflags-y += -D__CHECK_ENDIAN__
diff --git a/drivers/net/wireless/ti/wlcore/wl12xx_platform_data.c b/drivers/net/wireless/ti/wlcore/wl12xx_platform_data.c
deleted file mode 100644
index 998e95895f9d..000000000000
--- a/drivers/net/wireless/ti/wlcore/wl12xx_platform_data.c
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * This file is part of wl12xx
- *
- * Copyright (C) 2010-2011 Texas Instruments, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
- * 02110-1301 USA
- *
- */
-
-#include <linux/module.h>
-#include <linux/err.h>
-#include <linux/wl12xx.h>
-
-static struct wl12xx_platform_data *platform_data;
-
-int __init wl12xx_set_platform_data(const struct wl12xx_platform_data *data)
-{
-	if (platform_data)
-		return -EBUSY;
-	if (!data)
-		return -EINVAL;
-
-	platform_data = kmemdup(data, sizeof(*data), GFP_KERNEL);
-	if (!platform_data)
-		return -ENOMEM;
-
-	return 0;
-}
-
-struct wl12xx_platform_data *wl12xx_get_platform_data(void)
-{
-	if (!platform_data)
-		return ERR_PTR(-ENODEV);
-
-	return platform_data;
-}
-EXPORT_SYMBOL(wl12xx_get_platform_data);
diff --git a/include/linux/wl12xx.h b/include/linux/wl12xx.h
index 360c9bce665c..a54fe82e704b 100644
--- a/include/linux/wl12xx.h
+++ b/include/linux/wl12xx.h
@@ -24,6 +24,8 @@
 #ifndef _LINUX_WL12XX_H
 #define _LINUX_WL12XX_H
 
+#include <linux/err.h>
+
 /* Reference clock values */
 enum {
 	WL12XX_REFCLOCK_19	= 0, /* 19.2 MHz */
@@ -60,10 +62,12 @@ struct wl12xx_platform_data {
 /* Platform does not support level trigger interrupts */
 #define WL12XX_PLATFORM_QUIRK_EDGE_IRQ	BIT(0)
 
-#ifdef CONFIG_WL12XX_PLATFORM_DATA
+#ifdef CONFIG_WILINK_PLATFORM_DATA
 
 int wl12xx_set_platform_data(const struct wl12xx_platform_data *data);
 
+struct wl12xx_platform_data *wl12xx_get_platform_data(void);
+
 #else
 
 static inline
@@ -72,8 +76,12 @@ int wl12xx_set_platform_data(const struct wl12xx_platform_data *data)
 	return -ENOSYS;
 }
 
-#endif
+static inline
+struct wl12xx_platform_data *wl12xx_get_platform_data(void)
+{
+	return ERR_PTR(-ENODATA);
+}
 
-struct wl12xx_platform_data *wl12xx_get_platform_data(void);
+#endif
 
 #endif
-- 
cgit v1.2.3


From cd7bed00340475ee72a013a070e200e065085ef3 Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Tue, 22 Jan 2013 12:26:28 +0200
Subject: spi/pxa2xx: break out the private DMA API usage into a separate file

The PXA SPI driver uses PXA platform specific private DMA implementation
which does not work on non-PXA platforms. In order to use this driver on
other platforms we break out the private DMA implementation into a separate
file that gets compiled only when CONFIG_SPI_PXA2XX_PXADMA is set. The DMA
functions are stubbed out if there is no DMA implementation selected (i.e
we are building on non-PXA platform).

While we are there we can kill the dummy DMA bits in pxa2xx_spi.h as they
are not needed anymore for CE4100.

Once this is done we can add the generic DMA engine support to the driver
that allows usage of any DMA controller that implements DMA engine API.

Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Tested-by: Lu Cao <lucao@marvell.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 drivers/spi/Kconfig             |   6 +
 drivers/spi/Makefile            |   4 +-
 drivers/spi/spi-pxa2xx-pxadma.c | 490 +++++++++++++++++++++++++++++++++
 drivers/spi/spi-pxa2xx.c        | 595 ++--------------------------------------
 drivers/spi/spi-pxa2xx.h        | 185 +++++++++++++
 include/linux/spi/pxa2xx_spi.h  |  80 ------
 6 files changed, 712 insertions(+), 648 deletions(-)
 create mode 100644 drivers/spi/spi-pxa2xx-pxadma.c
 create mode 100644 drivers/spi/spi-pxa2xx.h

(limited to 'include/linux')

diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index a90393d7f106..f1878666e917 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -297,6 +297,12 @@ config SPI_PPC4xx
 	help
 	  This selects a driver for the PPC4xx SPI Controller.
 
+config SPI_PXA2XX_PXADMA
+	bool "PXA2xx SSP legacy PXA DMA API support"
+	depends on SPI_PXA2XX && ARCH_PXA
+	help
+	  Enable PXA private legacy DMA API support.
+
 config SPI_PXA2XX
 	tristate "PXA2xx SSP SPI master"
 	depends on ARCH_PXA || PCI
diff --git a/drivers/spi/Makefile b/drivers/spi/Makefile
index 64e970ba261c..2e3cdd3caba9 100644
--- a/drivers/spi/Makefile
+++ b/drivers/spi/Makefile
@@ -47,7 +47,9 @@ obj-$(CONFIG_SPI_OMAP24XX)		+= spi-omap2-mcspi.o
 obj-$(CONFIG_SPI_ORION)			+= spi-orion.o
 obj-$(CONFIG_SPI_PL022)			+= spi-pl022.o
 obj-$(CONFIG_SPI_PPC4xx)		+= spi-ppc4xx.o
-obj-$(CONFIG_SPI_PXA2XX)		+= spi-pxa2xx.o
+spi-pxa2xx-platform-objs		:= spi-pxa2xx.o
+spi-pxa2xx-platform-$(CONFIG_SPI_PXA2XX_PXADMA)	+= spi-pxa2xx-pxadma.o
+obj-$(CONFIG_SPI_PXA2XX)		+= spi-pxa2xx-platform.o
 obj-$(CONFIG_SPI_PXA2XX_PCI)		+= spi-pxa2xx-pci.o
 obj-$(CONFIG_SPI_RSPI)			+= spi-rspi.o
 obj-$(CONFIG_SPI_S3C24XX)		+= spi-s3c24xx-hw.o
diff --git a/drivers/spi/spi-pxa2xx-pxadma.c b/drivers/spi/spi-pxa2xx-pxadma.c
new file mode 100644
index 000000000000..2916efc7cfe5
--- /dev/null
+++ b/drivers/spi/spi-pxa2xx-pxadma.c
@@ -0,0 +1,490 @@
+/*
+ * PXA2xx SPI private DMA support.
+ *
+ * Copyright (C) 2005 Stephen Street / StreetFire Sound Labs
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/pxa2xx_ssp.h>
+#include <linux/spi/spi.h>
+#include <linux/spi/pxa2xx_spi.h>
+
+#include "spi-pxa2xx.h"
+
+#define DMA_INT_MASK		(DCSR_ENDINTR | DCSR_STARTINTR | DCSR_BUSERR)
+#define RESET_DMA_CHANNEL	(DCSR_NODESC | DMA_INT_MASK)
+
+bool pxa2xx_spi_dma_is_possible(size_t len)
+{
+	/* Try to map dma buffer and do a dma transfer if successful, but
+	 * only if the length is non-zero and less than MAX_DMA_LEN.
+	 *
+	 * Zero-length non-descriptor DMA is illegal on PXA2xx; force use
+	 * of PIO instead.  Care is needed above because the transfer may
+	 * have have been passed with buffers that are already dma mapped.
+	 * A zero-length transfer in PIO mode will not try to write/read
+	 * to/from the buffers
+	 *
+	 * REVISIT large transfers are exactly where we most want to be
+	 * using DMA.  If this happens much, split those transfers into
+	 * multiple DMA segments rather than forcing PIO.
+	 */
+	return len > 0 && len <= MAX_DMA_LEN;
+}
+
+int pxa2xx_spi_map_dma_buffers(struct driver_data *drv_data)
+{
+	struct spi_message *msg = drv_data->cur_msg;
+	struct device *dev = &msg->spi->dev;
+
+	if (!drv_data->cur_chip->enable_dma)
+		return 0;
+
+	if (msg->is_dma_mapped)
+		return  drv_data->rx_dma && drv_data->tx_dma;
+
+	if (!IS_DMA_ALIGNED(drv_data->rx) || !IS_DMA_ALIGNED(drv_data->tx))
+		return 0;
+
+	/* Modify setup if rx buffer is null */
+	if (drv_data->rx == NULL) {
+		*drv_data->null_dma_buf = 0;
+		drv_data->rx = drv_data->null_dma_buf;
+		drv_data->rx_map_len = 4;
+	} else
+		drv_data->rx_map_len = drv_data->len;
+
+
+	/* Modify setup if tx buffer is null */
+	if (drv_data->tx == NULL) {
+		*drv_data->null_dma_buf = 0;
+		drv_data->tx = drv_data->null_dma_buf;
+		drv_data->tx_map_len = 4;
+	} else
+		drv_data->tx_map_len = drv_data->len;
+
+	/* Stream map the tx buffer. Always do DMA_TO_DEVICE first
+	 * so we flush the cache *before* invalidating it, in case
+	 * the tx and rx buffers overlap.
+	 */
+	drv_data->tx_dma = dma_map_single(dev, drv_data->tx,
+					drv_data->tx_map_len, DMA_TO_DEVICE);
+	if (dma_mapping_error(dev, drv_data->tx_dma))
+		return 0;
+
+	/* Stream map the rx buffer */
+	drv_data->rx_dma = dma_map_single(dev, drv_data->rx,
+					drv_data->rx_map_len, DMA_FROM_DEVICE);
+	if (dma_mapping_error(dev, drv_data->rx_dma)) {
+		dma_unmap_single(dev, drv_data->tx_dma,
+					drv_data->tx_map_len, DMA_TO_DEVICE);
+		return 0;
+	}
+
+	return 1;
+}
+
+static void pxa2xx_spi_unmap_dma_buffers(struct driver_data *drv_data)
+{
+	struct device *dev;
+
+	if (!drv_data->dma_mapped)
+		return;
+
+	if (!drv_data->cur_msg->is_dma_mapped) {
+		dev = &drv_data->cur_msg->spi->dev;
+		dma_unmap_single(dev, drv_data->rx_dma,
+					drv_data->rx_map_len, DMA_FROM_DEVICE);
+		dma_unmap_single(dev, drv_data->tx_dma,
+					drv_data->tx_map_len, DMA_TO_DEVICE);
+	}
+
+	drv_data->dma_mapped = 0;
+}
+
+static int wait_ssp_rx_stall(void const __iomem *ioaddr)
+{
+	unsigned long limit = loops_per_jiffy << 1;
+
+	while ((read_SSSR(ioaddr) & SSSR_BSY) && --limit)
+		cpu_relax();
+
+	return limit;
+}
+
+static int wait_dma_channel_stop(int channel)
+{
+	unsigned long limit = loops_per_jiffy << 1;
+
+	while (!(DCSR(channel) & DCSR_STOPSTATE) && --limit)
+		cpu_relax();
+
+	return limit;
+}
+
+static void pxa2xx_spi_dma_error_stop(struct driver_data *drv_data,
+				      const char *msg)
+{
+	void __iomem *reg = drv_data->ioaddr;
+
+	/* Stop and reset */
+	DCSR(drv_data->rx_channel) = RESET_DMA_CHANNEL;
+	DCSR(drv_data->tx_channel) = RESET_DMA_CHANNEL;
+	write_SSSR_CS(drv_data, drv_data->clear_sr);
+	write_SSCR1(read_SSCR1(reg) & ~drv_data->dma_cr1, reg);
+	if (!pxa25x_ssp_comp(drv_data))
+		write_SSTO(0, reg);
+	pxa2xx_spi_flush(drv_data);
+	write_SSCR0(read_SSCR0(reg) & ~SSCR0_SSE, reg);
+
+	pxa2xx_spi_unmap_dma_buffers(drv_data);
+
+	dev_err(&drv_data->pdev->dev, "%s\n", msg);
+
+	drv_data->cur_msg->state = ERROR_STATE;
+	tasklet_schedule(&drv_data->pump_transfers);
+}
+
+static void pxa2xx_spi_dma_transfer_complete(struct driver_data *drv_data)
+{
+	void __iomem *reg = drv_data->ioaddr;
+	struct spi_message *msg = drv_data->cur_msg;
+
+	/* Clear and disable interrupts on SSP and DMA channels*/
+	write_SSCR1(read_SSCR1(reg) & ~drv_data->dma_cr1, reg);
+	write_SSSR_CS(drv_data, drv_data->clear_sr);
+	DCSR(drv_data->tx_channel) = RESET_DMA_CHANNEL;
+	DCSR(drv_data->rx_channel) = RESET_DMA_CHANNEL;
+
+	if (wait_dma_channel_stop(drv_data->rx_channel) == 0)
+		dev_err(&drv_data->pdev->dev,
+			"dma_handler: dma rx channel stop failed\n");
+
+	if (wait_ssp_rx_stall(drv_data->ioaddr) == 0)
+		dev_err(&drv_data->pdev->dev,
+			"dma_transfer: ssp rx stall failed\n");
+
+	pxa2xx_spi_unmap_dma_buffers(drv_data);
+
+	/* update the buffer pointer for the amount completed in dma */
+	drv_data->rx += drv_data->len -
+			(DCMD(drv_data->rx_channel) & DCMD_LENGTH);
+
+	/* read trailing data from fifo, it does not matter how many
+	 * bytes are in the fifo just read until buffer is full
+	 * or fifo is empty, which ever occurs first */
+	drv_data->read(drv_data);
+
+	/* return count of what was actually read */
+	msg->actual_length += drv_data->len -
+				(drv_data->rx_end - drv_data->rx);
+
+	/* Transfer delays and chip select release are
+	 * handled in pump_transfers or giveback
+	 */
+
+	/* Move to next transfer */
+	msg->state = pxa2xx_spi_next_transfer(drv_data);
+
+	/* Schedule transfer tasklet */
+	tasklet_schedule(&drv_data->pump_transfers);
+}
+
+void pxa2xx_spi_dma_handler(int channel, void *data)
+{
+	struct driver_data *drv_data = data;
+	u32 irq_status = DCSR(channel) & DMA_INT_MASK;
+
+	if (irq_status & DCSR_BUSERR) {
+
+		if (channel == drv_data->tx_channel)
+			pxa2xx_spi_dma_error_stop(drv_data,
+				"dma_handler: bad bus address on tx channel");
+		else
+			pxa2xx_spi_dma_error_stop(drv_data,
+				"dma_handler: bad bus address on rx channel");
+		return;
+	}
+
+	/* PXA255x_SSP has no timeout interrupt, wait for tailing bytes */
+	if ((channel == drv_data->tx_channel)
+		&& (irq_status & DCSR_ENDINTR)
+		&& (drv_data->ssp_type == PXA25x_SSP)) {
+
+		/* Wait for rx to stall */
+		if (wait_ssp_rx_stall(drv_data->ioaddr) == 0)
+			dev_err(&drv_data->pdev->dev,
+				"dma_handler: ssp rx stall failed\n");
+
+		/* finish this transfer, start the next */
+		pxa2xx_spi_dma_transfer_complete(drv_data);
+	}
+}
+
+irqreturn_t pxa2xx_spi_dma_transfer(struct driver_data *drv_data)
+{
+	u32 irq_status;
+	void __iomem *reg = drv_data->ioaddr;
+
+	irq_status = read_SSSR(reg) & drv_data->mask_sr;
+	if (irq_status & SSSR_ROR) {
+		pxa2xx_spi_dma_error_stop(drv_data,
+					  "dma_transfer: fifo overrun");
+		return IRQ_HANDLED;
+	}
+
+	/* Check for false positive timeout */
+	if ((irq_status & SSSR_TINT)
+		&& (DCSR(drv_data->tx_channel) & DCSR_RUN)) {
+		write_SSSR(SSSR_TINT, reg);
+		return IRQ_HANDLED;
+	}
+
+	if (irq_status & SSSR_TINT || drv_data->rx == drv_data->rx_end) {
+
+		/* Clear and disable timeout interrupt, do the rest in
+		 * dma_transfer_complete */
+		if (!pxa25x_ssp_comp(drv_data))
+			write_SSTO(0, reg);
+
+		/* finish this transfer, start the next */
+		pxa2xx_spi_dma_transfer_complete(drv_data);
+
+		return IRQ_HANDLED;
+	}
+
+	/* Opps problem detected */
+	return IRQ_NONE;
+}
+
+int pxa2xx_spi_dma_prepare(struct driver_data *drv_data, u32 dma_burst)
+{
+	u32 dma_width;
+
+	switch (drv_data->n_bytes) {
+	case 1:
+		dma_width = DCMD_WIDTH1;
+		break;
+	case 2:
+		dma_width = DCMD_WIDTH2;
+		break;
+	default:
+		dma_width = DCMD_WIDTH4;
+		break;
+	}
+
+	/* Setup rx DMA Channel */
+	DCSR(drv_data->rx_channel) = RESET_DMA_CHANNEL;
+	DSADR(drv_data->rx_channel) = drv_data->ssdr_physical;
+	DTADR(drv_data->rx_channel) = drv_data->rx_dma;
+	if (drv_data->rx == drv_data->null_dma_buf)
+		/* No target address increment */
+		DCMD(drv_data->rx_channel) = DCMD_FLOWSRC
+						| dma_width
+						| dma_burst
+						| drv_data->len;
+	else
+		DCMD(drv_data->rx_channel) = DCMD_INCTRGADDR
+						| DCMD_FLOWSRC
+						| dma_width
+						| dma_burst
+						| drv_data->len;
+
+	/* Setup tx DMA Channel */
+	DCSR(drv_data->tx_channel) = RESET_DMA_CHANNEL;
+	DSADR(drv_data->tx_channel) = drv_data->tx_dma;
+	DTADR(drv_data->tx_channel) = drv_data->ssdr_physical;
+	if (drv_data->tx == drv_data->null_dma_buf)
+		/* No source address increment */
+		DCMD(drv_data->tx_channel) = DCMD_FLOWTRG
+						| dma_width
+						| dma_burst
+						| drv_data->len;
+	else
+		DCMD(drv_data->tx_channel) = DCMD_INCSRCADDR
+						| DCMD_FLOWTRG
+						| dma_width
+						| dma_burst
+						| drv_data->len;
+
+	/* Enable dma end irqs on SSP to detect end of transfer */
+	if (drv_data->ssp_type == PXA25x_SSP)
+		DCMD(drv_data->tx_channel) |= DCMD_ENDIRQEN;
+
+	return 0;
+}
+
+void pxa2xx_spi_dma_start(struct driver_data *drv_data)
+{
+	DCSR(drv_data->rx_channel) |= DCSR_RUN;
+	DCSR(drv_data->tx_channel) |= DCSR_RUN;
+}
+
+int pxa2xx_spi_dma_setup(struct driver_data *drv_data)
+{
+	struct device *dev = &drv_data->pdev->dev;
+	struct ssp_device *ssp = drv_data->ssp;
+
+	/* Get two DMA channels	(rx and tx) */
+	drv_data->rx_channel = pxa_request_dma("pxa2xx_spi_ssp_rx",
+						DMA_PRIO_HIGH,
+						pxa2xx_spi_dma_handler,
+						drv_data);
+	if (drv_data->rx_channel < 0) {
+		dev_err(dev, "problem (%d) requesting rx channel\n",
+			drv_data->rx_channel);
+		return -ENODEV;
+	}
+	drv_data->tx_channel = pxa_request_dma("pxa2xx_spi_ssp_tx",
+						DMA_PRIO_MEDIUM,
+						pxa2xx_spi_dma_handler,
+						drv_data);
+	if (drv_data->tx_channel < 0) {
+		dev_err(dev, "problem (%d) requesting tx channel\n",
+			drv_data->tx_channel);
+		pxa_free_dma(drv_data->rx_channel);
+		return -ENODEV;
+	}
+
+	DRCMR(ssp->drcmr_rx) = DRCMR_MAPVLD | drv_data->rx_channel;
+	DRCMR(ssp->drcmr_tx) = DRCMR_MAPVLD | drv_data->tx_channel;
+
+	return 0;
+}
+
+void pxa2xx_spi_dma_release(struct driver_data *drv_data)
+{
+	struct ssp_device *ssp = drv_data->ssp;
+
+	DRCMR(ssp->drcmr_rx) = 0;
+	DRCMR(ssp->drcmr_tx) = 0;
+
+	if (drv_data->tx_channel != 0)
+		pxa_free_dma(drv_data->tx_channel);
+	if (drv_data->rx_channel != 0)
+		pxa_free_dma(drv_data->rx_channel);
+}
+
+void pxa2xx_spi_dma_resume(struct driver_data *drv_data)
+{
+	if (drv_data->rx_channel != -1)
+		DRCMR(drv_data->ssp->drcmr_rx) =
+			DRCMR_MAPVLD | drv_data->rx_channel;
+	if (drv_data->tx_channel != -1)
+		DRCMR(drv_data->ssp->drcmr_tx) =
+			DRCMR_MAPVLD | drv_data->tx_channel;
+}
+
+int pxa2xx_spi_set_dma_burst_and_threshold(struct chip_data *chip,
+					   struct spi_device *spi,
+					   u8 bits_per_word, u32 *burst_code,
+					   u32 *threshold)
+{
+	struct pxa2xx_spi_chip *chip_info =
+			(struct pxa2xx_spi_chip *)spi->controller_data;
+	int bytes_per_word;
+	int burst_bytes;
+	int thresh_words;
+	int req_burst_size;
+	int retval = 0;
+
+	/* Set the threshold (in registers) to equal the same amount of data
+	 * as represented by burst size (in bytes).  The computation below
+	 * is (burst_size rounded up to nearest 8 byte, word or long word)
+	 * divided by (bytes/register); the tx threshold is the inverse of
+	 * the rx, so that there will always be enough data in the rx fifo
+	 * to satisfy a burst, and there will always be enough space in the
+	 * tx fifo to accept a burst (a tx burst will overwrite the fifo if
+	 * there is not enough space), there must always remain enough empty
+	 * space in the rx fifo for any data loaded to the tx fifo.
+	 * Whenever burst_size (in bytes) equals bits/word, the fifo threshold
+	 * will be 8, or half the fifo;
+	 * The threshold can only be set to 2, 4 or 8, but not 16, because
+	 * to burst 16 to the tx fifo, the fifo would have to be empty;
+	 * however, the minimum fifo trigger level is 1, and the tx will
+	 * request service when the fifo is at this level, with only 15 spaces.
+	 */
+
+	/* find bytes/word */
+	if (bits_per_word <= 8)
+		bytes_per_word = 1;
+	else if (bits_per_word <= 16)
+		bytes_per_word = 2;
+	else
+		bytes_per_word = 4;
+
+	/* use struct pxa2xx_spi_chip->dma_burst_size if available */
+	if (chip_info)
+		req_burst_size = chip_info->dma_burst_size;
+	else {
+		switch (chip->dma_burst_size) {
+		default:
+			/* if the default burst size is not set,
+			 * do it now */
+			chip->dma_burst_size = DCMD_BURST8;
+		case DCMD_BURST8:
+			req_burst_size = 8;
+			break;
+		case DCMD_BURST16:
+			req_burst_size = 16;
+			break;
+		case DCMD_BURST32:
+			req_burst_size = 32;
+			break;
+		}
+	}
+	if (req_burst_size <= 8) {
+		*burst_code = DCMD_BURST8;
+		burst_bytes = 8;
+	} else if (req_burst_size <= 16) {
+		if (bytes_per_word == 1) {
+			/* don't burst more than 1/2 the fifo */
+			*burst_code = DCMD_BURST8;
+			burst_bytes = 8;
+			retval = 1;
+		} else {
+			*burst_code = DCMD_BURST16;
+			burst_bytes = 16;
+		}
+	} else {
+		if (bytes_per_word == 1) {
+			/* don't burst more than 1/2 the fifo */
+			*burst_code = DCMD_BURST8;
+			burst_bytes = 8;
+			retval = 1;
+		} else if (bytes_per_word == 2) {
+			/* don't burst more than 1/2 the fifo */
+			*burst_code = DCMD_BURST16;
+			burst_bytes = 16;
+			retval = 1;
+		} else {
+			*burst_code = DCMD_BURST32;
+			burst_bytes = 32;
+		}
+	}
+
+	thresh_words = burst_bytes / bytes_per_word;
+
+	/* thresh_words will be between 2 and 8 */
+	*threshold = (SSCR1_RxTresh(thresh_words) & SSCR1_RFT)
+			| (SSCR1_TxTresh(16-thresh_words) & SSCR1_TFT);
+
+	return retval;
+}
diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c
index 304cf6eb50e6..5b7c2a4ba828 100644
--- a/drivers/spi/spi-pxa2xx.c
+++ b/drivers/spi/spi-pxa2xx.c
@@ -24,7 +24,6 @@
 #include <linux/interrupt.h>
 #include <linux/platform_device.h>
 #include <linux/spi/pxa2xx_spi.h>
-#include <linux/dma-mapping.h>
 #include <linux/spi/spi.h>
 #include <linux/workqueue.h>
 #include <linux/delay.h>
@@ -36,6 +35,7 @@
 #include <asm/irq.h>
 #include <asm/delay.h>
 
+#include "spi-pxa2xx.h"
 
 MODULE_AUTHOR("Stephen Street");
 MODULE_DESCRIPTION("PXA2xx SSP SPI Controller");
@@ -46,12 +46,6 @@ MODULE_ALIAS("platform:pxa2xx-spi");
 
 #define TIMOUT_DFLT		1000
 
-#define DMA_INT_MASK		(DCSR_ENDINTR | DCSR_STARTINTR | DCSR_BUSERR)
-#define RESET_DMA_CHANNEL	(DCSR_NODESC | DMA_INT_MASK)
-#define IS_DMA_ALIGNED(x)	IS_ALIGNED((unsigned long)(x), DMA_ALIGNMENT)
-#define MAX_DMA_LEN		8191
-#define DMA_ALIGNMENT		8
-
 /*
  * for testing SSCR1 changes that require SSP restart, basically
  * everything except the service and interrupt enables, the pxa270 developer
@@ -66,106 +60,6 @@ MODULE_ALIAS("platform:pxa2xx-spi");
 				| SSCR1_RFT | SSCR1_TFT | SSCR1_MWDS \
 				| SSCR1_SPH | SSCR1_SPO | SSCR1_LBM)
 
-#define DEFINE_SSP_REG(reg, off) \
-static inline u32 read_##reg(void const __iomem *p) \
-{ return __raw_readl(p + (off)); } \
-\
-static inline void write_##reg(u32 v, void __iomem *p) \
-{ __raw_writel(v, p + (off)); }
-
-DEFINE_SSP_REG(SSCR0, 0x00)
-DEFINE_SSP_REG(SSCR1, 0x04)
-DEFINE_SSP_REG(SSSR, 0x08)
-DEFINE_SSP_REG(SSITR, 0x0c)
-DEFINE_SSP_REG(SSDR, 0x10)
-DEFINE_SSP_REG(SSTO, 0x28)
-DEFINE_SSP_REG(SSPSP, 0x2c)
-
-#define START_STATE ((void*)0)
-#define RUNNING_STATE ((void*)1)
-#define DONE_STATE ((void*)2)
-#define ERROR_STATE ((void*)-1)
-
-struct driver_data {
-	/* Driver model hookup */
-	struct platform_device *pdev;
-
-	/* SSP Info */
-	struct ssp_device *ssp;
-
-	/* SPI framework hookup */
-	enum pxa_ssp_type ssp_type;
-	struct spi_master *master;
-
-	/* PXA hookup */
-	struct pxa2xx_spi_master *master_info;
-
-	/* DMA setup stuff */
-	int rx_channel;
-	int tx_channel;
-	u32 *null_dma_buf;
-
-	/* SSP register addresses */
-	void __iomem *ioaddr;
-	u32 ssdr_physical;
-
-	/* SSP masks*/
-	u32 dma_cr1;
-	u32 int_cr1;
-	u32 clear_sr;
-	u32 mask_sr;
-
-	/* Maximun clock rate */
-	unsigned long max_clk_rate;
-
-	/* Message Transfer pump */
-	struct tasklet_struct pump_transfers;
-
-	/* Current message transfer state info */
-	struct spi_message* cur_msg;
-	struct spi_transfer* cur_transfer;
-	struct chip_data *cur_chip;
-	size_t len;
-	void *tx;
-	void *tx_end;
-	void *rx;
-	void *rx_end;
-	int dma_mapped;
-	dma_addr_t rx_dma;
-	dma_addr_t tx_dma;
-	size_t rx_map_len;
-	size_t tx_map_len;
-	u8 n_bytes;
-	u32 dma_width;
-	int (*write)(struct driver_data *drv_data);
-	int (*read)(struct driver_data *drv_data);
-	irqreturn_t (*transfer_handler)(struct driver_data *drv_data);
-	void (*cs_control)(u32 command);
-};
-
-struct chip_data {
-	u32 cr0;
-	u32 cr1;
-	u32 psp;
-	u32 timeout;
-	u8 n_bytes;
-	u32 dma_width;
-	u32 dma_burst_size;
-	u32 threshold;
-	u32 dma_threshold;
-	u8 enable_dma;
-	u8 bits_per_word;
-	u32 speed_hz;
-	union {
-		int gpio_cs;
-		unsigned int frm;
-	};
-	int gpio_cs_inverted;
-	int (*write)(struct driver_data *drv_data);
-	int (*read)(struct driver_data *drv_data);
-	void (*cs_control)(u32 command);
-};
-
 static void cs_assert(struct driver_data *drv_data)
 {
 	struct chip_data *chip = drv_data->cur_chip;
@@ -200,26 +94,7 @@ static void cs_deassert(struct driver_data *drv_data)
 		gpio_set_value(chip->gpio_cs, !chip->gpio_cs_inverted);
 }
 
-static void write_SSSR_CS(struct driver_data *drv_data, u32 val)
-{
-	void __iomem *reg = drv_data->ioaddr;
-
-	if (drv_data->ssp_type == CE4100_SSP)
-		val |= read_SSSR(reg) & SSSR_ALT_FRM_MASK;
-
-	write_SSSR(val, reg);
-}
-
-static int pxa25x_ssp_comp(struct driver_data *drv_data)
-{
-	if (drv_data->ssp_type == PXA25x_SSP)
-		return 1;
-	if (drv_data->ssp_type == CE4100_SSP)
-		return 1;
-	return 0;
-}
-
-static int flush(struct driver_data *drv_data)
+int pxa2xx_spi_flush(struct driver_data *drv_data)
 {
 	unsigned long limit = loops_per_jiffy << 1;
 
@@ -345,7 +220,7 @@ static int u32_reader(struct driver_data *drv_data)
 	return drv_data->rx == drv_data->rx_end;
 }
 
-static void *next_transfer(struct driver_data *drv_data)
+void *pxa2xx_spi_next_transfer(struct driver_data *drv_data)
 {
 	struct spi_message *msg = drv_data->cur_msg;
 	struct spi_transfer *trans = drv_data->cur_transfer;
@@ -361,76 +236,6 @@ static void *next_transfer(struct driver_data *drv_data)
 		return DONE_STATE;
 }
 
-static int map_dma_buffers(struct driver_data *drv_data)
-{
-	struct spi_message *msg = drv_data->cur_msg;
-	struct device *dev = &msg->spi->dev;
-
-	if (!drv_data->cur_chip->enable_dma)
-		return 0;
-
-	if (msg->is_dma_mapped)
-		return  drv_data->rx_dma && drv_data->tx_dma;
-
-	if (!IS_DMA_ALIGNED(drv_data->rx) || !IS_DMA_ALIGNED(drv_data->tx))
-		return 0;
-
-	/* Modify setup if rx buffer is null */
-	if (drv_data->rx == NULL) {
-		*drv_data->null_dma_buf = 0;
-		drv_data->rx = drv_data->null_dma_buf;
-		drv_data->rx_map_len = 4;
-	} else
-		drv_data->rx_map_len = drv_data->len;
-
-
-	/* Modify setup if tx buffer is null */
-	if (drv_data->tx == NULL) {
-		*drv_data->null_dma_buf = 0;
-		drv_data->tx = drv_data->null_dma_buf;
-		drv_data->tx_map_len = 4;
-	} else
-		drv_data->tx_map_len = drv_data->len;
-
-	/* Stream map the tx buffer. Always do DMA_TO_DEVICE first
-	 * so we flush the cache *before* invalidating it, in case
-	 * the tx and rx buffers overlap.
-	 */
-	drv_data->tx_dma = dma_map_single(dev, drv_data->tx,
-					drv_data->tx_map_len, DMA_TO_DEVICE);
-	if (dma_mapping_error(dev, drv_data->tx_dma))
-		return 0;
-
-	/* Stream map the rx buffer */
-	drv_data->rx_dma = dma_map_single(dev, drv_data->rx,
-					drv_data->rx_map_len, DMA_FROM_DEVICE);
-	if (dma_mapping_error(dev, drv_data->rx_dma)) {
-		dma_unmap_single(dev, drv_data->tx_dma,
-					drv_data->tx_map_len, DMA_TO_DEVICE);
-		return 0;
-	}
-
-	return 1;
-}
-
-static void unmap_dma_buffers(struct driver_data *drv_data)
-{
-	struct device *dev;
-
-	if (!drv_data->dma_mapped)
-		return;
-
-	if (!drv_data->cur_msg->is_dma_mapped) {
-		dev = &drv_data->cur_msg->spi->dev;
-		dma_unmap_single(dev, drv_data->rx_dma,
-					drv_data->rx_map_len, DMA_FROM_DEVICE);
-		dma_unmap_single(dev, drv_data->tx_dma,
-					drv_data->tx_map_len, DMA_TO_DEVICE);
-	}
-
-	drv_data->dma_mapped = 0;
-}
-
 /* caller already set message->status; dma and pio irqs are blocked */
 static void giveback(struct driver_data *drv_data)
 {
@@ -483,161 +288,6 @@ static void giveback(struct driver_data *drv_data)
 	drv_data->cur_chip = NULL;
 }
 
-static int wait_ssp_rx_stall(void const __iomem *ioaddr)
-{
-	unsigned long limit = loops_per_jiffy << 1;
-
-	while ((read_SSSR(ioaddr) & SSSR_BSY) && --limit)
-		cpu_relax();
-
-	return limit;
-}
-
-static int wait_dma_channel_stop(int channel)
-{
-	unsigned long limit = loops_per_jiffy << 1;
-
-	while (!(DCSR(channel) & DCSR_STOPSTATE) && --limit)
-		cpu_relax();
-
-	return limit;
-}
-
-static void dma_error_stop(struct driver_data *drv_data, const char *msg)
-{
-	void __iomem *reg = drv_data->ioaddr;
-
-	/* Stop and reset */
-	DCSR(drv_data->rx_channel) = RESET_DMA_CHANNEL;
-	DCSR(drv_data->tx_channel) = RESET_DMA_CHANNEL;
-	write_SSSR_CS(drv_data, drv_data->clear_sr);
-	write_SSCR1(read_SSCR1(reg) & ~drv_data->dma_cr1, reg);
-	if (!pxa25x_ssp_comp(drv_data))
-		write_SSTO(0, reg);
-	flush(drv_data);
-	write_SSCR0(read_SSCR0(reg) & ~SSCR0_SSE, reg);
-
-	unmap_dma_buffers(drv_data);
-
-	dev_err(&drv_data->pdev->dev, "%s\n", msg);
-
-	drv_data->cur_msg->state = ERROR_STATE;
-	tasklet_schedule(&drv_data->pump_transfers);
-}
-
-static void dma_transfer_complete(struct driver_data *drv_data)
-{
-	void __iomem *reg = drv_data->ioaddr;
-	struct spi_message *msg = drv_data->cur_msg;
-
-	/* Clear and disable interrupts on SSP and DMA channels*/
-	write_SSCR1(read_SSCR1(reg) & ~drv_data->dma_cr1, reg);
-	write_SSSR_CS(drv_data, drv_data->clear_sr);
-	DCSR(drv_data->tx_channel) = RESET_DMA_CHANNEL;
-	DCSR(drv_data->rx_channel) = RESET_DMA_CHANNEL;
-
-	if (wait_dma_channel_stop(drv_data->rx_channel) == 0)
-		dev_err(&drv_data->pdev->dev,
-			"dma_handler: dma rx channel stop failed\n");
-
-	if (wait_ssp_rx_stall(drv_data->ioaddr) == 0)
-		dev_err(&drv_data->pdev->dev,
-			"dma_transfer: ssp rx stall failed\n");
-
-	unmap_dma_buffers(drv_data);
-
-	/* update the buffer pointer for the amount completed in dma */
-	drv_data->rx += drv_data->len -
-			(DCMD(drv_data->rx_channel) & DCMD_LENGTH);
-
-	/* read trailing data from fifo, it does not matter how many
-	 * bytes are in the fifo just read until buffer is full
-	 * or fifo is empty, which ever occurs first */
-	drv_data->read(drv_data);
-
-	/* return count of what was actually read */
-	msg->actual_length += drv_data->len -
-				(drv_data->rx_end - drv_data->rx);
-
-	/* Transfer delays and chip select release are
-	 * handled in pump_transfers or giveback
-	 */
-
-	/* Move to next transfer */
-	msg->state = next_transfer(drv_data);
-
-	/* Schedule transfer tasklet */
-	tasklet_schedule(&drv_data->pump_transfers);
-}
-
-static void dma_handler(int channel, void *data)
-{
-	struct driver_data *drv_data = data;
-	u32 irq_status = DCSR(channel) & DMA_INT_MASK;
-
-	if (irq_status & DCSR_BUSERR) {
-
-		if (channel == drv_data->tx_channel)
-			dma_error_stop(drv_data,
-					"dma_handler: "
-					"bad bus address on tx channel");
-		else
-			dma_error_stop(drv_data,
-					"dma_handler: "
-					"bad bus address on rx channel");
-		return;
-	}
-
-	/* PXA255x_SSP has no timeout interrupt, wait for tailing bytes */
-	if ((channel == drv_data->tx_channel)
-		&& (irq_status & DCSR_ENDINTR)
-		&& (drv_data->ssp_type == PXA25x_SSP)) {
-
-		/* Wait for rx to stall */
-		if (wait_ssp_rx_stall(drv_data->ioaddr) == 0)
-			dev_err(&drv_data->pdev->dev,
-				"dma_handler: ssp rx stall failed\n");
-
-		/* finish this transfer, start the next */
-		dma_transfer_complete(drv_data);
-	}
-}
-
-static irqreturn_t dma_transfer(struct driver_data *drv_data)
-{
-	u32 irq_status;
-	void __iomem *reg = drv_data->ioaddr;
-
-	irq_status = read_SSSR(reg) & drv_data->mask_sr;
-	if (irq_status & SSSR_ROR) {
-		dma_error_stop(drv_data, "dma_transfer: fifo overrun");
-		return IRQ_HANDLED;
-	}
-
-	/* Check for false positive timeout */
-	if ((irq_status & SSSR_TINT)
-		&& (DCSR(drv_data->tx_channel) & DCSR_RUN)) {
-		write_SSSR(SSSR_TINT, reg);
-		return IRQ_HANDLED;
-	}
-
-	if (irq_status & SSSR_TINT || drv_data->rx == drv_data->rx_end) {
-
-		/* Clear and disable timeout interrupt, do the rest in
-		 * dma_transfer_complete */
-		if (!pxa25x_ssp_comp(drv_data))
-			write_SSTO(0, reg);
-
-		/* finish this transfer, start the next */
-		dma_transfer_complete(drv_data);
-
-		return IRQ_HANDLED;
-	}
-
-	/* Opps problem detected */
-	return IRQ_NONE;
-}
-
 static void reset_sccr1(struct driver_data *drv_data)
 {
 	void __iomem *reg = drv_data->ioaddr;
@@ -659,7 +309,7 @@ static void int_error_stop(struct driver_data *drv_data, const char* msg)
 	reset_sccr1(drv_data);
 	if (!pxa25x_ssp_comp(drv_data))
 		write_SSTO(0, reg);
-	flush(drv_data);
+	pxa2xx_spi_flush(drv_data);
 	write_SSCR0(read_SSCR0(reg) & ~SSCR0_SSE, reg);
 
 	dev_err(&drv_data->pdev->dev, "%s\n", msg);
@@ -687,7 +337,7 @@ static void int_transfer_complete(struct driver_data *drv_data)
 	 */
 
 	/* Move to next transfer */
-	drv_data->cur_msg->state = next_transfer(drv_data);
+	drv_data->cur_msg->state = pxa2xx_spi_next_transfer(drv_data);
 
 	/* Schedule transfer tasklet */
 	tasklet_schedule(&drv_data->pump_transfers);
@@ -798,103 +448,6 @@ static irqreturn_t ssp_int(int irq, void *dev_id)
 	return drv_data->transfer_handler(drv_data);
 }
 
-static int set_dma_burst_and_threshold(struct chip_data *chip,
-				struct spi_device *spi,
-				u8 bits_per_word, u32 *burst_code,
-				u32 *threshold)
-{
-	struct pxa2xx_spi_chip *chip_info =
-			(struct pxa2xx_spi_chip *)spi->controller_data;
-	int bytes_per_word;
-	int burst_bytes;
-	int thresh_words;
-	int req_burst_size;
-	int retval = 0;
-
-	/* Set the threshold (in registers) to equal the same amount of data
-	 * as represented by burst size (in bytes).  The computation below
-	 * is (burst_size rounded up to nearest 8 byte, word or long word)
-	 * divided by (bytes/register); the tx threshold is the inverse of
-	 * the rx, so that there will always be enough data in the rx fifo
-	 * to satisfy a burst, and there will always be enough space in the
-	 * tx fifo to accept a burst (a tx burst will overwrite the fifo if
-	 * there is not enough space), there must always remain enough empty
-	 * space in the rx fifo for any data loaded to the tx fifo.
-	 * Whenever burst_size (in bytes) equals bits/word, the fifo threshold
-	 * will be 8, or half the fifo;
-	 * The threshold can only be set to 2, 4 or 8, but not 16, because
-	 * to burst 16 to the tx fifo, the fifo would have to be empty;
-	 * however, the minimum fifo trigger level is 1, and the tx will
-	 * request service when the fifo is at this level, with only 15 spaces.
-	 */
-
-	/* find bytes/word */
-	if (bits_per_word <= 8)
-		bytes_per_word = 1;
-	else if (bits_per_word <= 16)
-		bytes_per_word = 2;
-	else
-		bytes_per_word = 4;
-
-	/* use struct pxa2xx_spi_chip->dma_burst_size if available */
-	if (chip_info)
-		req_burst_size = chip_info->dma_burst_size;
-	else {
-		switch (chip->dma_burst_size) {
-		default:
-			/* if the default burst size is not set,
-			 * do it now */
-			chip->dma_burst_size = DCMD_BURST8;
-		case DCMD_BURST8:
-			req_burst_size = 8;
-			break;
-		case DCMD_BURST16:
-			req_burst_size = 16;
-			break;
-		case DCMD_BURST32:
-			req_burst_size = 32;
-			break;
-		}
-	}
-	if (req_burst_size <= 8) {
-		*burst_code = DCMD_BURST8;
-		burst_bytes = 8;
-	} else if (req_burst_size <= 16) {
-		if (bytes_per_word == 1) {
-			/* don't burst more than 1/2 the fifo */
-			*burst_code = DCMD_BURST8;
-			burst_bytes = 8;
-			retval = 1;
-		} else {
-			*burst_code = DCMD_BURST16;
-			burst_bytes = 16;
-		}
-	} else {
-		if (bytes_per_word == 1) {
-			/* don't burst more than 1/2 the fifo */
-			*burst_code = DCMD_BURST8;
-			burst_bytes = 8;
-			retval = 1;
-		} else if (bytes_per_word == 2) {
-			/* don't burst more than 1/2 the fifo */
-			*burst_code = DCMD_BURST16;
-			burst_bytes = 16;
-			retval = 1;
-		} else {
-			*burst_code = DCMD_BURST32;
-			burst_bytes = 32;
-		}
-	}
-
-	thresh_words = burst_bytes / bytes_per_word;
-
-	/* thresh_words will be between 2 and 8 */
-	*threshold = (SSCR1_RxTresh(thresh_words) & SSCR1_RFT)
-			| (SSCR1_TxTresh(16-thresh_words) & SSCR1_TFT);
-
-	return retval;
-}
-
 static unsigned int ssp_get_clk_div(struct driver_data *drv_data, int rate)
 {
 	unsigned long ssp_clk = drv_data->max_clk_rate;
@@ -956,8 +509,8 @@ static void pump_transfers(unsigned long data)
 			cs_deassert(drv_data);
 	}
 
-	/* Check for transfers that need multiple DMA segments */
-	if (transfer->len > MAX_DMA_LEN && chip->enable_dma) {
+	/* Check if we can DMA this transfer */
+	if (!pxa2xx_spi_dma_is_possible(transfer->len) && chip->enable_dma) {
 
 		/* reject already-mapped transfers; PIO won't always work */
 		if (message->is_dma_mapped
@@ -980,21 +533,20 @@ static void pump_transfers(unsigned long data)
 	}
 
 	/* Setup the transfer state based on the type of transfer */
-	if (flush(drv_data) == 0) {
+	if (pxa2xx_spi_flush(drv_data) == 0) {
 		dev_err(&drv_data->pdev->dev, "pump_transfers: flush failed\n");
 		message->status = -EIO;
 		giveback(drv_data);
 		return;
 	}
 	drv_data->n_bytes = chip->n_bytes;
-	drv_data->dma_width = chip->dma_width;
 	drv_data->tx = (void *)transfer->tx_buf;
 	drv_data->tx_end = drv_data->tx + transfer->len;
 	drv_data->rx = transfer->rx_buf;
 	drv_data->rx_end = drv_data->rx + transfer->len;
 	drv_data->rx_dma = transfer->rx_dma;
 	drv_data->tx_dma = transfer->tx_dma;
-	drv_data->len = transfer->len & DCMD_LENGTH;
+	drv_data->len = transfer->len;
 	drv_data->write = drv_data->tx ? chip->write : null_writer;
 	drv_data->read = drv_data->rx ? chip->read : null_reader;
 
@@ -1015,21 +567,18 @@ static void pump_transfers(unsigned long data)
 
 		if (bits <= 8) {
 			drv_data->n_bytes = 1;
-			drv_data->dma_width = DCMD_WIDTH1;
 			drv_data->read = drv_data->read != null_reader ?
 						u8_reader : null_reader;
 			drv_data->write = drv_data->write != null_writer ?
 						u8_writer : null_writer;
 		} else if (bits <= 16) {
 			drv_data->n_bytes = 2;
-			drv_data->dma_width = DCMD_WIDTH2;
 			drv_data->read = drv_data->read != null_reader ?
 						u16_reader : null_reader;
 			drv_data->write = drv_data->write != null_writer ?
 						u16_writer : null_writer;
 		} else if (bits <= 32) {
 			drv_data->n_bytes = 4;
-			drv_data->dma_width = DCMD_WIDTH4;
 			drv_data->read = drv_data->read != null_reader ?
 						u32_reader : null_reader;
 			drv_data->write = drv_data->write != null_writer ?
@@ -1038,7 +587,8 @@ static void pump_transfers(unsigned long data)
 		/* if bits/word is changed in dma mode, then must check the
 		 * thresholds and burst also */
 		if (chip->enable_dma) {
-			if (set_dma_burst_and_threshold(chip, message->spi,
+			if (pxa2xx_spi_set_dma_burst_and_threshold(chip,
+							message->spi,
 							bits, &dma_burst,
 							&dma_thresh))
 				if (printk_ratelimit())
@@ -1057,70 +607,21 @@ static void pump_transfers(unsigned long data)
 
 	message->state = RUNNING_STATE;
 
-	/* Try to map dma buffer and do a dma transfer if successful, but
-	 * only if the length is non-zero and less than MAX_DMA_LEN.
-	 *
-	 * Zero-length non-descriptor DMA is illegal on PXA2xx; force use
-	 * of PIO instead.  Care is needed above because the transfer may
-	 * have have been passed with buffers that are already dma mapped.
-	 * A zero-length transfer in PIO mode will not try to write/read
-	 * to/from the buffers
-	 *
-	 * REVISIT large transfers are exactly where we most want to be
-	 * using DMA.  If this happens much, split those transfers into
-	 * multiple DMA segments rather than forcing PIO.
-	 */
 	drv_data->dma_mapped = 0;
-	if (drv_data->len > 0 && drv_data->len <= MAX_DMA_LEN)
-		drv_data->dma_mapped = map_dma_buffers(drv_data);
+	if (pxa2xx_spi_dma_is_possible(drv_data->len))
+		drv_data->dma_mapped = pxa2xx_spi_map_dma_buffers(drv_data);
 	if (drv_data->dma_mapped) {
 
 		/* Ensure we have the correct interrupt handler */
-		drv_data->transfer_handler = dma_transfer;
-
-		/* Setup rx DMA Channel */
-		DCSR(drv_data->rx_channel) = RESET_DMA_CHANNEL;
-		DSADR(drv_data->rx_channel) = drv_data->ssdr_physical;
-		DTADR(drv_data->rx_channel) = drv_data->rx_dma;
-		if (drv_data->rx == drv_data->null_dma_buf)
-			/* No target address increment */
-			DCMD(drv_data->rx_channel) = DCMD_FLOWSRC
-							| drv_data->dma_width
-							| dma_burst
-							| drv_data->len;
-		else
-			DCMD(drv_data->rx_channel) = DCMD_INCTRGADDR
-							| DCMD_FLOWSRC
-							| drv_data->dma_width
-							| dma_burst
-							| drv_data->len;
-
-		/* Setup tx DMA Channel */
-		DCSR(drv_data->tx_channel) = RESET_DMA_CHANNEL;
-		DSADR(drv_data->tx_channel) = drv_data->tx_dma;
-		DTADR(drv_data->tx_channel) = drv_data->ssdr_physical;
-		if (drv_data->tx == drv_data->null_dma_buf)
-			/* No source address increment */
-			DCMD(drv_data->tx_channel) = DCMD_FLOWTRG
-							| drv_data->dma_width
-							| dma_burst
-							| drv_data->len;
-		else
-			DCMD(drv_data->tx_channel) = DCMD_INCSRCADDR
-							| DCMD_FLOWTRG
-							| drv_data->dma_width
-							| dma_burst
-							| drv_data->len;
-
-		/* Enable dma end irqs on SSP to detect end of transfer */
-		if (drv_data->ssp_type == PXA25x_SSP)
-			DCMD(drv_data->tx_channel) |= DCMD_ENDIRQEN;
+		drv_data->transfer_handler = pxa2xx_spi_dma_transfer;
+
+		pxa2xx_spi_dma_prepare(drv_data, dma_burst);
 
 		/* Clear status and start DMA engine */
 		cr1 = chip->cr1 | dma_thresh | drv_data->dma_cr1;
 		write_SSSR(drv_data->clear_sr, reg);
-		DCSR(drv_data->rx_channel) |= DCSR_RUN;
-		DCSR(drv_data->tx_channel) |= DCSR_RUN;
+
+		pxa2xx_spi_dma_start(drv_data);
 	} else {
 		/* Ensure we have the correct interrupt handler	*/
 		drv_data->transfer_handler = interrupt_transfer;
@@ -1262,8 +763,6 @@ static int setup(struct spi_device *spi)
 			chip->gpio_cs = -1;
 		chip->enable_dma = 0;
 		chip->timeout = TIMOUT_DFLT;
-		chip->dma_burst_size = drv_data->master_info->enable_dma ?
-					DCMD_BURST8 : 0;
 	}
 
 	/* protocol drivers may change the chip settings, so...
@@ -1293,7 +792,8 @@ static int setup(struct spi_device *spi)
 	 * burst and threshold can still respond to changes in bits_per_word */
 	if (chip->enable_dma) {
 		/* set up legal burst and threshold for dma */
-		if (set_dma_burst_and_threshold(chip, spi, spi->bits_per_word,
+		if (pxa2xx_spi_set_dma_burst_and_threshold(chip, spi,
+						spi->bits_per_word,
 						&chip->dma_burst_size,
 						&chip->dma_threshold)) {
 			dev_warn(&spi->dev, "in setup: DMA burst size reduced "
@@ -1328,18 +828,15 @@ static int setup(struct spi_device *spi)
 
 	if (spi->bits_per_word <= 8) {
 		chip->n_bytes = 1;
-		chip->dma_width = DCMD_WIDTH1;
 		chip->read = u8_reader;
 		chip->write = u8_writer;
 	} else if (spi->bits_per_word <= 16) {
 		chip->n_bytes = 2;
-		chip->dma_width = DCMD_WIDTH2;
 		chip->read = u16_reader;
 		chip->write = u16_writer;
 	} else if (spi->bits_per_word <= 32) {
 		chip->cr0 |= SSCR0_EDSS;
 		chip->n_bytes = 4;
-		chip->dma_width = DCMD_WIDTH4;
 		chip->read = u32_reader;
 		chip->write = u32_writer;
 	} else {
@@ -1447,31 +944,11 @@ static int pxa2xx_spi_probe(struct platform_device *pdev)
 	drv_data->tx_channel = -1;
 	drv_data->rx_channel = -1;
 	if (platform_info->enable_dma) {
-
-		/* Get two DMA channels	(rx and tx) */
-		drv_data->rx_channel = pxa_request_dma("pxa2xx_spi_ssp_rx",
-							DMA_PRIO_HIGH,
-							dma_handler,
-							drv_data);
-		if (drv_data->rx_channel < 0) {
-			dev_err(dev, "problem (%d) requesting rx channel\n",
-				drv_data->rx_channel);
-			status = -ENODEV;
-			goto out_error_irq_alloc;
+		status = pxa2xx_spi_dma_setup(drv_data);
+		if (status) {
+			dev_warn(dev, "failed to setup DMA, using PIO\n");
+			platform_info->enable_dma = false;
 		}
-		drv_data->tx_channel = pxa_request_dma("pxa2xx_spi_ssp_tx",
-							DMA_PRIO_MEDIUM,
-							dma_handler,
-							drv_data);
-		if (drv_data->tx_channel < 0) {
-			dev_err(dev, "problem (%d) requesting tx channel\n",
-				drv_data->tx_channel);
-			status = -ENODEV;
-			goto out_error_dma_alloc;
-		}
-
-		DRCMR(ssp->drcmr_rx) = DRCMR_MAPVLD | drv_data->rx_channel;
-		DRCMR(ssp->drcmr_tx) = DRCMR_MAPVLD | drv_data->tx_channel;
 	}
 
 	/* Enable SOC clock */
@@ -1507,14 +984,7 @@ static int pxa2xx_spi_probe(struct platform_device *pdev)
 
 out_error_clock_enabled:
 	clk_disable_unprepare(ssp->clk);
-
-out_error_dma_alloc:
-	if (drv_data->tx_channel != -1)
-		pxa_free_dma(drv_data->tx_channel);
-	if (drv_data->rx_channel != -1)
-		pxa_free_dma(drv_data->rx_channel);
-
-out_error_irq_alloc:
+	pxa2xx_spi_dma_release(drv_data);
 	free_irq(ssp->irq, drv_data);
 
 out_error_master_alloc:
@@ -1537,12 +1007,8 @@ static int pxa2xx_spi_remove(struct platform_device *pdev)
 	clk_disable_unprepare(ssp->clk);
 
 	/* Release DMA */
-	if (drv_data->master_info->enable_dma) {
-		DRCMR(ssp->drcmr_rx) = 0;
-		DRCMR(ssp->drcmr_tx) = 0;
-		pxa_free_dma(drv_data->tx_channel);
-		pxa_free_dma(drv_data->rx_channel);
-	}
+	if (drv_data->master_info->enable_dma)
+		pxa2xx_spi_dma_release(drv_data);
 
 	/* Release IRQ */
 	free_irq(ssp->irq, drv_data);
@@ -1589,12 +1055,7 @@ static int pxa2xx_spi_resume(struct device *dev)
 	struct ssp_device *ssp = drv_data->ssp;
 	int status = 0;
 
-	if (drv_data->rx_channel != -1)
-		DRCMR(drv_data->ssp->drcmr_rx) =
-			DRCMR_MAPVLD | drv_data->rx_channel;
-	if (drv_data->tx_channel != -1)
-		DRCMR(drv_data->ssp->drcmr_tx) =
-			DRCMR_MAPVLD | drv_data->tx_channel;
+	pxa2xx_spi_dma_resume(drv_data);
 
 	/* Enable the SSP clock */
 	clk_prepare_enable(ssp->clk);
diff --git a/drivers/spi/spi-pxa2xx.h b/drivers/spi/spi-pxa2xx.h
new file mode 100644
index 000000000000..0a98905c916e
--- /dev/null
+++ b/drivers/spi/spi-pxa2xx.h
@@ -0,0 +1,185 @@
+/*
+ * Copyright (C) 2005 Stephen Street / StreetFire Sound Labs
+ * Copyright (C) 2013, Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef SPI_PXA2XX_H
+#define SPI_PXA2XX_H
+
+#include <linux/errno.h>
+#include <linux/io.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/pxa2xx_ssp.h>
+#include <linux/spi/spi.h>
+#include <linux/spi/pxa2xx_spi.h>
+
+struct driver_data {
+	/* Driver model hookup */
+	struct platform_device *pdev;
+
+	/* SSP Info */
+	struct ssp_device *ssp;
+
+	/* SPI framework hookup */
+	enum pxa_ssp_type ssp_type;
+	struct spi_master *master;
+
+	/* PXA hookup */
+	struct pxa2xx_spi_master *master_info;
+
+	/* PXA private DMA setup stuff */
+	int rx_channel;
+	int tx_channel;
+	u32 *null_dma_buf;
+
+	/* SSP register addresses */
+	void __iomem *ioaddr;
+	u32 ssdr_physical;
+
+	/* SSP masks*/
+	u32 dma_cr1;
+	u32 int_cr1;
+	u32 clear_sr;
+	u32 mask_sr;
+
+	/* Maximun clock rate */
+	unsigned long max_clk_rate;
+
+	/* Message Transfer pump */
+	struct tasklet_struct pump_transfers;
+
+	/* Current message transfer state info */
+	struct spi_message *cur_msg;
+	struct spi_transfer *cur_transfer;
+	struct chip_data *cur_chip;
+	size_t len;
+	void *tx;
+	void *tx_end;
+	void *rx;
+	void *rx_end;
+	int dma_mapped;
+	dma_addr_t rx_dma;
+	dma_addr_t tx_dma;
+	size_t rx_map_len;
+	size_t tx_map_len;
+	u8 n_bytes;
+	int (*write)(struct driver_data *drv_data);
+	int (*read)(struct driver_data *drv_data);
+	irqreturn_t (*transfer_handler)(struct driver_data *drv_data);
+	void (*cs_control)(u32 command);
+};
+
+struct chip_data {
+	u32 cr0;
+	u32 cr1;
+	u32 psp;
+	u32 timeout;
+	u8 n_bytes;
+	u32 dma_burst_size;
+	u32 threshold;
+	u32 dma_threshold;
+	u8 enable_dma;
+	u8 bits_per_word;
+	u32 speed_hz;
+	union {
+		int gpio_cs;
+		unsigned int frm;
+	};
+	int gpio_cs_inverted;
+	int (*write)(struct driver_data *drv_data);
+	int (*read)(struct driver_data *drv_data);
+	void (*cs_control)(u32 command);
+};
+
+#define DEFINE_SSP_REG(reg, off) \
+static inline u32 read_##reg(void const __iomem *p) \
+{ return __raw_readl(p + (off)); } \
+\
+static inline void write_##reg(u32 v, void __iomem *p) \
+{ __raw_writel(v, p + (off)); }
+
+DEFINE_SSP_REG(SSCR0, 0x00)
+DEFINE_SSP_REG(SSCR1, 0x04)
+DEFINE_SSP_REG(SSSR, 0x08)
+DEFINE_SSP_REG(SSITR, 0x0c)
+DEFINE_SSP_REG(SSDR, 0x10)
+DEFINE_SSP_REG(SSTO, 0x28)
+DEFINE_SSP_REG(SSPSP, 0x2c)
+
+#define START_STATE ((void *)0)
+#define RUNNING_STATE ((void *)1)
+#define DONE_STATE ((void *)2)
+#define ERROR_STATE ((void *)-1)
+
+#define MAX_DMA_LEN		8191
+#define IS_DMA_ALIGNED(x)	IS_ALIGNED((unsigned long)(x), DMA_ALIGNMENT)
+#define DMA_ALIGNMENT		8
+
+static inline int pxa25x_ssp_comp(struct driver_data *drv_data)
+{
+	if (drv_data->ssp_type == PXA25x_SSP)
+		return 1;
+	if (drv_data->ssp_type == CE4100_SSP)
+		return 1;
+	return 0;
+}
+
+static inline void write_SSSR_CS(struct driver_data *drv_data, u32 val)
+{
+	void __iomem *reg = drv_data->ioaddr;
+
+	if (drv_data->ssp_type == CE4100_SSP)
+		val |= read_SSSR(reg) & SSSR_ALT_FRM_MASK;
+
+	write_SSSR(val, reg);
+}
+
+extern int pxa2xx_spi_flush(struct driver_data *drv_data);
+extern void *pxa2xx_spi_next_transfer(struct driver_data *drv_data);
+
+#if defined(CONFIG_SPI_PXA2XX_PXADMA)
+extern bool pxa2xx_spi_dma_is_possible(size_t len);
+extern int pxa2xx_spi_map_dma_buffers(struct driver_data *drv_data);
+extern irqreturn_t pxa2xx_spi_dma_transfer(struct driver_data *drv_data);
+extern int pxa2xx_spi_dma_prepare(struct driver_data *drv_data, u32 dma_burst);
+extern void pxa2xx_spi_dma_start(struct driver_data *drv_data);
+extern int pxa2xx_spi_dma_setup(struct driver_data *drv_data);
+extern void pxa2xx_spi_dma_release(struct driver_data *drv_data);
+extern void pxa2xx_spi_dma_resume(struct driver_data *drv_data);
+extern int pxa2xx_spi_set_dma_burst_and_threshold(struct chip_data *chip,
+						  struct spi_device *spi,
+						  u8 bits_per_word,
+						  u32 *burst_code,
+						  u32 *threshold);
+#else
+static inline bool pxa2xx_spi_dma_is_possible(size_t len) { return false; }
+static inline int pxa2xx_spi_map_dma_buffers(struct driver_data *drv_data)
+{
+	return 0;
+}
+#define pxa2xx_spi_dma_transfer NULL
+static inline void pxa2xx_spi_dma_prepare(struct driver_data *drv_data,
+					  u32 dma_burst) {}
+static inline void pxa2xx_spi_dma_start(struct driver_data *drv_data) {}
+static inline int pxa2xx_spi_dma_setup(struct driver_data *drv_data)
+{
+	return 0;
+}
+static inline void pxa2xx_spi_dma_release(struct driver_data *drv_data) {}
+static inline void pxa2xx_spi_dma_resume(struct driver_data *drv_data) {}
+static inline int pxa2xx_spi_set_dma_burst_and_threshold(struct chip_data *chip,
+							 struct spi_device *spi,
+							 u8 bits_per_word,
+							 u32 *burst_code,
+							 u32 *threshold)
+{
+	return -ENODEV;
+}
+#endif
+
+#endif /* SPI_PXA2XX_H */
diff --git a/include/linux/spi/pxa2xx_spi.h b/include/linux/spi/pxa2xx_spi.h
index 053b5ba51b25..d6d2b4d557f8 100644
--- a/include/linux/spi/pxa2xx_spi.h
+++ b/include/linux/spi/pxa2xx_spi.h
@@ -53,85 +53,5 @@ struct pxa2xx_spi_chip {
 
 extern void pxa2xx_set_spi_info(unsigned id, struct pxa2xx_spi_master *info);
 
-#else
-/*
- * This is the implemtation for CE4100 on x86. ARM defines them in mach/ or
- * plat/ include path.
- * The CE4100 does not provide DMA support. This bits are here to let the driver
- * compile and will never be used. Maybe we get DMA support at a later point in
- * time.
- */
-
-#define DCSR(n)         (n)
-#define DSADR(n)        (n)
-#define DTADR(n)        (n)
-#define DCMD(n)         (n)
-#define DRCMR(n)        (n)
-
-#define DCSR_RUN	(1 << 31)	/* Run Bit */
-#define DCSR_NODESC	(1 << 30)	/* No-Descriptor Fetch */
-#define DCSR_STOPIRQEN	(1 << 29)	/* Stop Interrupt Enable */
-#define DCSR_REQPEND	(1 << 8)	/* Request Pending (read-only) */
-#define DCSR_STOPSTATE	(1 << 3)	/* Stop State (read-only) */
-#define DCSR_ENDINTR	(1 << 2)	/* End Interrupt */
-#define DCSR_STARTINTR	(1 << 1)	/* Start Interrupt */
-#define DCSR_BUSERR	(1 << 0)	/* Bus Error Interrupt */
-
-#define DCSR_EORIRQEN	(1 << 28)	/* End of Receive Interrupt Enable */
-#define DCSR_EORJMPEN	(1 << 27)	/* Jump to next descriptor on EOR */
-#define DCSR_EORSTOPEN	(1 << 26)	/* STOP on an EOR */
-#define DCSR_SETCMPST	(1 << 25)	/* Set Descriptor Compare Status */
-#define DCSR_CLRCMPST	(1 << 24)	/* Clear Descriptor Compare Status */
-#define DCSR_CMPST	(1 << 10)	/* The Descriptor Compare Status */
-#define DCSR_EORINTR	(1 << 9)	/* The end of Receive */
-
-#define DRCMR_MAPVLD	(1 << 7)	/* Map Valid */
-#define DRCMR_CHLNUM	0x1f		/* mask for Channel Number */
-
-#define DDADR_DESCADDR	0xfffffff0	/* Address of next descriptor */
-#define DDADR_STOP	(1 << 0)	/* Stop */
-
-#define DCMD_INCSRCADDR	(1 << 31)	/* Source Address Increment Setting. */
-#define DCMD_INCTRGADDR	(1 << 30)	/* Target Address Increment Setting. */
-#define DCMD_FLOWSRC	(1 << 29)	/* Flow Control by the source. */
-#define DCMD_FLOWTRG	(1 << 28)	/* Flow Control by the target. */
-#define DCMD_STARTIRQEN	(1 << 22)	/* Start Interrupt Enable */
-#define DCMD_ENDIRQEN	(1 << 21)	/* End Interrupt Enable */
-#define DCMD_ENDIAN	(1 << 18)	/* Device Endian-ness. */
-#define DCMD_BURST8	(1 << 16)	/* 8 byte burst */
-#define DCMD_BURST16	(2 << 16)	/* 16 byte burst */
-#define DCMD_BURST32	(3 << 16)	/* 32 byte burst */
-#define DCMD_WIDTH1	(1 << 14)	/* 1 byte width */
-#define DCMD_WIDTH2	(2 << 14)	/* 2 byte width (HalfWord) */
-#define DCMD_WIDTH4	(3 << 14)	/* 4 byte width (Word) */
-#define DCMD_LENGTH	0x01fff		/* length mask (max = 8K - 1) */
-
-/*
- * Descriptor structure for PXA's DMA engine
- * Note: this structure must always be aligned to a 16-byte boundary.
- */
-
-typedef enum {
-	DMA_PRIO_HIGH = 0,
-	DMA_PRIO_MEDIUM = 1,
-	DMA_PRIO_LOW = 2
-} pxa_dma_prio;
-
-/*
- * DMA registration
- */
-
-static inline int pxa_request_dma(char *name,
-		pxa_dma_prio prio,
-		void (*irq_handler)(int, void *),
-		void *data)
-{
-	return -ENODEV;
-}
-
-static inline void pxa_free_dma(int dma_ch)
-{
-}
-
 #endif
 #endif
-- 
cgit v1.2.3


From 5928808ef623347e0d4aa22327b992e9e125b6ad Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Tue, 22 Jan 2013 12:26:29 +0200
Subject: spi/pxa2xx: add support for DMA engine

To be able to use DMA with this driver on non-PXA platforms we implement
support for the generic DMA engine API. This lets user to use different DMA
engines with little or no modification to the driver.

Request lines and channel numbers can be passed to the driver from the
platform specific data.

The DMA engine implementation will be selected by default even on PXA
platform. User can select the legacy DMA API by enabling Kconfig option
CONFIG_SPI_PXA2XX_PXADMA.

Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Tested-by: Lu Cao <lucao@marvell.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 drivers/spi/Kconfig            |   7 +-
 drivers/spi/Makefile           |   1 +
 drivers/spi/spi-pxa2xx-dma.c   | 392 +++++++++++++++++++++++++++++++++++++++++
 drivers/spi/spi-pxa2xx.c       |   2 +-
 drivers/spi/spi-pxa2xx.h       |  32 +++-
 include/linux/spi/pxa2xx_spi.h |   6 +
 6 files changed, 437 insertions(+), 3 deletions(-)
 create mode 100644 drivers/spi/spi-pxa2xx-dma.c

(limited to 'include/linux')

diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index f1878666e917..8913d3dd5724 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -301,7 +301,12 @@ config SPI_PXA2XX_PXADMA
 	bool "PXA2xx SSP legacy PXA DMA API support"
 	depends on SPI_PXA2XX && ARCH_PXA
 	help
-	  Enable PXA private legacy DMA API support.
+	  Enable PXA private legacy DMA API support. Note that this is
+	  deprecated in favor of generic DMA engine API.
+
+config SPI_PXA2XX_DMA
+	def_bool y
+	depends on SPI_PXA2XX && !SPI_PXA2XX_PXADMA
 
 config SPI_PXA2XX
 	tristate "PXA2xx SSP SPI master"
diff --git a/drivers/spi/Makefile b/drivers/spi/Makefile
index 2e3cdd3caba9..e53c30941340 100644
--- a/drivers/spi/Makefile
+++ b/drivers/spi/Makefile
@@ -49,6 +49,7 @@ obj-$(CONFIG_SPI_PL022)			+= spi-pl022.o
 obj-$(CONFIG_SPI_PPC4xx)		+= spi-ppc4xx.o
 spi-pxa2xx-platform-objs		:= spi-pxa2xx.o
 spi-pxa2xx-platform-$(CONFIG_SPI_PXA2XX_PXADMA)	+= spi-pxa2xx-pxadma.o
+spi-pxa2xx-platform-$(CONFIG_SPI_PXA2XX_DMA)	+= spi-pxa2xx-dma.o
 obj-$(CONFIG_SPI_PXA2XX)		+= spi-pxa2xx-platform.o
 obj-$(CONFIG_SPI_PXA2XX_PCI)		+= spi-pxa2xx-pci.o
 obj-$(CONFIG_SPI_RSPI)			+= spi-rspi.o
diff --git a/drivers/spi/spi-pxa2xx-dma.c b/drivers/spi/spi-pxa2xx-dma.c
new file mode 100644
index 000000000000..c735c5a008a2
--- /dev/null
+++ b/drivers/spi/spi-pxa2xx-dma.c
@@ -0,0 +1,392 @@
+/*
+ * PXA2xx SPI DMA engine support.
+ *
+ * Copyright (C) 2013, Intel Corporation
+ * Author: Mika Westerberg <mika.westerberg@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmaengine.h>
+#include <linux/pxa2xx_ssp.h>
+#include <linux/scatterlist.h>
+#include <linux/sizes.h>
+#include <linux/spi/spi.h>
+#include <linux/spi/pxa2xx_spi.h>
+
+#include "spi-pxa2xx.h"
+
+static int pxa2xx_spi_map_dma_buffer(struct driver_data *drv_data,
+				     enum dma_data_direction dir)
+{
+	int i, nents, len = drv_data->len;
+	struct scatterlist *sg;
+	struct device *dmadev;
+	struct sg_table *sgt;
+	void *buf, *pbuf;
+
+	/*
+	 * Some DMA controllers have problems transferring buffers that are
+	 * not multiple of 4 bytes. So we truncate the transfer so that it
+	 * is suitable for such controllers, and handle the trailing bytes
+	 * manually after the DMA completes.
+	 *
+	 * REVISIT: It would be better if this information could be
+	 * retrieved directly from the DMA device in a similar way than
+	 * ->copy_align etc. is done.
+	 */
+	len = ALIGN(drv_data->len, 4);
+
+	if (dir == DMA_TO_DEVICE) {
+		dmadev = drv_data->tx_chan->device->dev;
+		sgt = &drv_data->tx_sgt;
+		buf = drv_data->tx;
+		drv_data->tx_map_len = len;
+	} else {
+		dmadev = drv_data->rx_chan->device->dev;
+		sgt = &drv_data->rx_sgt;
+		buf = drv_data->rx;
+		drv_data->rx_map_len = len;
+	}
+
+	nents = DIV_ROUND_UP(len, SZ_2K);
+	if (nents != sgt->nents) {
+		int ret;
+
+		sg_free_table(sgt);
+		ret = sg_alloc_table(sgt, nents, GFP_KERNEL);
+		if (ret)
+			return ret;
+	}
+
+	pbuf = buf;
+	for_each_sg(sgt->sgl, sg, sgt->nents, i) {
+		size_t bytes = min_t(size_t, len, SZ_2K);
+
+		if (buf)
+			sg_set_buf(sg, pbuf, bytes);
+		else
+			sg_set_buf(sg, drv_data->dummy, bytes);
+
+		pbuf += bytes;
+		len -= bytes;
+	}
+
+	nents = dma_map_sg(dmadev, sgt->sgl, sgt->nents, dir);
+	if (!nents)
+		return -ENOMEM;
+
+	return nents;
+}
+
+static void pxa2xx_spi_unmap_dma_buffer(struct driver_data *drv_data,
+					enum dma_data_direction dir)
+{
+	struct device *dmadev;
+	struct sg_table *sgt;
+
+	if (dir == DMA_TO_DEVICE) {
+		dmadev = drv_data->tx_chan->device->dev;
+		sgt = &drv_data->tx_sgt;
+	} else {
+		dmadev = drv_data->rx_chan->device->dev;
+		sgt = &drv_data->rx_sgt;
+	}
+
+	dma_unmap_sg(dmadev, sgt->sgl, sgt->nents, dir);
+}
+
+static void pxa2xx_spi_unmap_dma_buffers(struct driver_data *drv_data)
+{
+	if (!drv_data->dma_mapped)
+		return;
+
+	pxa2xx_spi_unmap_dma_buffer(drv_data, DMA_FROM_DEVICE);
+	pxa2xx_spi_unmap_dma_buffer(drv_data, DMA_TO_DEVICE);
+
+	drv_data->dma_mapped = 0;
+}
+
+static void pxa2xx_spi_dma_transfer_complete(struct driver_data *drv_data,
+					     bool error)
+{
+	struct spi_message *msg = drv_data->cur_msg;
+
+	/*
+	 * It is possible that one CPU is handling ROR interrupt and other
+	 * just gets DMA completion. Calling pump_transfers() twice for the
+	 * same transfer leads to problems thus we prevent concurrent calls
+	 * by using ->dma_running.
+	 */
+	if (atomic_dec_and_test(&drv_data->dma_running)) {
+		void __iomem *reg = drv_data->ioaddr;
+
+		/*
+		 * If the other CPU is still handling the ROR interrupt we
+		 * might not know about the error yet. So we re-check the
+		 * ROR bit here before we clear the status register.
+		 */
+		if (!error) {
+			u32 status = read_SSSR(reg) & drv_data->mask_sr;
+			error = status & SSSR_ROR;
+		}
+
+		/* Clear status & disable interrupts */
+		write_SSCR1(read_SSCR1(reg) & ~drv_data->dma_cr1, reg);
+		write_SSSR_CS(drv_data, drv_data->clear_sr);
+		if (!pxa25x_ssp_comp(drv_data))
+			write_SSTO(0, reg);
+
+		if (!error) {
+			pxa2xx_spi_unmap_dma_buffers(drv_data);
+
+			/* Handle the last bytes of unaligned transfer */
+			drv_data->tx += drv_data->tx_map_len;
+			drv_data->write(drv_data);
+
+			drv_data->rx += drv_data->rx_map_len;
+			drv_data->read(drv_data);
+
+			msg->actual_length += drv_data->len;
+			msg->state = pxa2xx_spi_next_transfer(drv_data);
+		} else {
+			/* In case we got an error we disable the SSP now */
+			write_SSCR0(read_SSCR0(reg) & ~SSCR0_SSE, reg);
+
+			msg->state = ERROR_STATE;
+		}
+
+		tasklet_schedule(&drv_data->pump_transfers);
+	}
+}
+
+static void pxa2xx_spi_dma_callback(void *data)
+{
+	pxa2xx_spi_dma_transfer_complete(data, false);
+}
+
+static struct dma_async_tx_descriptor *
+pxa2xx_spi_dma_prepare_one(struct driver_data *drv_data,
+			   enum dma_transfer_direction dir)
+{
+	struct pxa2xx_spi_master *pdata = drv_data->master_info;
+	struct chip_data *chip = drv_data->cur_chip;
+	enum dma_slave_buswidth width;
+	struct dma_slave_config cfg;
+	struct dma_chan *chan;
+	struct sg_table *sgt;
+	int nents, ret;
+
+	switch (drv_data->n_bytes) {
+	case 1:
+		width = DMA_SLAVE_BUSWIDTH_1_BYTE;
+		break;
+	case 2:
+		width = DMA_SLAVE_BUSWIDTH_2_BYTES;
+		break;
+	default:
+		width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+		break;
+	}
+
+	memset(&cfg, 0, sizeof(cfg));
+	cfg.direction = dir;
+
+	if (dir == DMA_MEM_TO_DEV) {
+		cfg.dst_addr = drv_data->ssdr_physical;
+		cfg.dst_addr_width = width;
+		cfg.dst_maxburst = chip->dma_burst_size;
+		cfg.slave_id = pdata->tx_slave_id;
+
+		sgt = &drv_data->tx_sgt;
+		nents = drv_data->tx_nents;
+		chan = drv_data->tx_chan;
+	} else {
+		cfg.src_addr = drv_data->ssdr_physical;
+		cfg.src_addr_width = width;
+		cfg.src_maxburst = chip->dma_burst_size;
+		cfg.slave_id = pdata->rx_slave_id;
+
+		sgt = &drv_data->rx_sgt;
+		nents = drv_data->rx_nents;
+		chan = drv_data->rx_chan;
+	}
+
+	ret = dmaengine_slave_config(chan, &cfg);
+	if (ret) {
+		dev_warn(&drv_data->pdev->dev, "DMA slave config failed\n");
+		return NULL;
+	}
+
+	return dmaengine_prep_slave_sg(chan, sgt->sgl, nents, dir,
+				       DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+}
+
+static bool pxa2xx_spi_dma_filter(struct dma_chan *chan, void *param)
+{
+	const struct pxa2xx_spi_master *pdata = param;
+
+	return chan->chan_id == pdata->tx_chan_id ||
+	       chan->chan_id == pdata->rx_chan_id;
+}
+
+bool pxa2xx_spi_dma_is_possible(size_t len)
+{
+	return len <= MAX_DMA_LEN;
+}
+
+int pxa2xx_spi_map_dma_buffers(struct driver_data *drv_data)
+{
+	const struct chip_data *chip = drv_data->cur_chip;
+	int ret;
+
+	if (!chip->enable_dma)
+		return 0;
+
+	/* Don't bother with DMA if we can't do even a single burst */
+	if (drv_data->len < chip->dma_burst_size)
+		return 0;
+
+	ret = pxa2xx_spi_map_dma_buffer(drv_data, DMA_TO_DEVICE);
+	if (ret <= 0) {
+		dev_warn(&drv_data->pdev->dev, "failed to DMA map TX\n");
+		return 0;
+	}
+
+	drv_data->tx_nents = ret;
+
+	ret = pxa2xx_spi_map_dma_buffer(drv_data, DMA_FROM_DEVICE);
+	if (ret <= 0) {
+		pxa2xx_spi_unmap_dma_buffer(drv_data, DMA_TO_DEVICE);
+		dev_warn(&drv_data->pdev->dev, "failed to DMA map RX\n");
+		return 0;
+	}
+
+	drv_data->rx_nents = ret;
+	return 1;
+}
+
+irqreturn_t pxa2xx_spi_dma_transfer(struct driver_data *drv_data)
+{
+	u32 status;
+
+	status = read_SSSR(drv_data->ioaddr) & drv_data->mask_sr;
+	if (status & SSSR_ROR) {
+		dev_err(&drv_data->pdev->dev, "FIFO overrun\n");
+
+		dmaengine_terminate_all(drv_data->rx_chan);
+		dmaengine_terminate_all(drv_data->tx_chan);
+
+		pxa2xx_spi_dma_transfer_complete(drv_data, true);
+		return IRQ_HANDLED;
+	}
+
+	return IRQ_NONE;
+}
+
+int pxa2xx_spi_dma_prepare(struct driver_data *drv_data, u32 dma_burst)
+{
+	struct dma_async_tx_descriptor *tx_desc, *rx_desc;
+
+	tx_desc = pxa2xx_spi_dma_prepare_one(drv_data, DMA_MEM_TO_DEV);
+	if (!tx_desc) {
+		dev_err(&drv_data->pdev->dev,
+			"failed to get DMA TX descriptor\n");
+		return -EBUSY;
+	}
+
+	rx_desc = pxa2xx_spi_dma_prepare_one(drv_data, DMA_DEV_TO_MEM);
+	if (!rx_desc) {
+		dev_err(&drv_data->pdev->dev,
+			"failed to get DMA RX descriptor\n");
+		return -EBUSY;
+	}
+
+	/* We are ready when RX completes */
+	rx_desc->callback = pxa2xx_spi_dma_callback;
+	rx_desc->callback_param = drv_data;
+
+	dmaengine_submit(rx_desc);
+	dmaengine_submit(tx_desc);
+	return 0;
+}
+
+void pxa2xx_spi_dma_start(struct driver_data *drv_data)
+{
+	dma_async_issue_pending(drv_data->rx_chan);
+	dma_async_issue_pending(drv_data->tx_chan);
+
+	atomic_set(&drv_data->dma_running, 1);
+}
+
+int pxa2xx_spi_dma_setup(struct driver_data *drv_data)
+{
+	struct pxa2xx_spi_master *pdata = drv_data->master_info;
+	dma_cap_mask_t mask;
+
+	dma_cap_zero(mask);
+	dma_cap_set(DMA_SLAVE, mask);
+
+	drv_data->dummy = devm_kzalloc(&drv_data->pdev->dev, SZ_2K, GFP_KERNEL);
+	if (!drv_data->dummy)
+		return -ENOMEM;
+
+	drv_data->tx_chan = dma_request_channel(mask, pxa2xx_spi_dma_filter,
+						pdata);
+	if (!drv_data->tx_chan)
+		return -ENODEV;
+
+	drv_data->rx_chan = dma_request_channel(mask, pxa2xx_spi_dma_filter,
+						pdata);
+	if (!drv_data->rx_chan) {
+		dma_release_channel(drv_data->tx_chan);
+		drv_data->tx_chan = NULL;
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+void pxa2xx_spi_dma_release(struct driver_data *drv_data)
+{
+	if (drv_data->rx_chan) {
+		dmaengine_terminate_all(drv_data->rx_chan);
+		dma_release_channel(drv_data->rx_chan);
+		sg_free_table(&drv_data->rx_sgt);
+		drv_data->rx_chan = NULL;
+	}
+	if (drv_data->tx_chan) {
+		dmaengine_terminate_all(drv_data->tx_chan);
+		dma_release_channel(drv_data->tx_chan);
+		sg_free_table(&drv_data->tx_sgt);
+		drv_data->tx_chan = NULL;
+	}
+}
+
+void pxa2xx_spi_dma_resume(struct driver_data *drv_data)
+{
+}
+
+int pxa2xx_spi_set_dma_burst_and_threshold(struct chip_data *chip,
+					   struct spi_device *spi,
+					   u8 bits_per_word, u32 *burst_code,
+					   u32 *threshold)
+{
+	struct pxa2xx_spi_chip *chip_info = spi->controller_data;
+
+	/*
+	 * If the DMA burst size is given in chip_info we use that,
+	 * otherwise we use the default. Also we use the default FIFO
+	 * thresholds for now.
+	 */
+	*burst_code = chip_info ? chip_info->dma_burst_size : 16;
+	*threshold = SSCR1_RxTresh(RX_THRESH_DFLT)
+		   | SSCR1_TxTresh(TX_THRESH_DFLT);
+
+	return 0;
+}
diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c
index 5b7c2a4ba828..c3d78073d1ad 100644
--- a/drivers/spi/spi-pxa2xx.c
+++ b/drivers/spi/spi-pxa2xx.c
@@ -928,7 +928,7 @@ static int pxa2xx_spi_probe(struct platform_device *pdev)
 		drv_data->mask_sr = SSSR_RFS | SSSR_TFS | SSSR_ROR;
 	} else {
 		drv_data->int_cr1 = SSCR1_TIE | SSCR1_RIE | SSCR1_TINTE;
-		drv_data->dma_cr1 = SSCR1_TSRE | SSCR1_RSRE | SSCR1_TINTE;
+		drv_data->dma_cr1 = DEFAULT_DMA_CR1;
 		drv_data->clear_sr = SSSR_ROR | SSSR_TINT;
 		drv_data->mask_sr = SSSR_TINT | SSSR_RFS | SSSR_TFS | SSSR_ROR;
 	}
diff --git a/drivers/spi/spi-pxa2xx.h b/drivers/spi/spi-pxa2xx.h
index 0a98905c916e..97ff4717e6ac 100644
--- a/drivers/spi/spi-pxa2xx.h
+++ b/drivers/spi/spi-pxa2xx.h
@@ -10,11 +10,15 @@
 #ifndef SPI_PXA2XX_H
 #define SPI_PXA2XX_H
 
+#include <linux/atomic.h>
+#include <linux/dmaengine.h>
 #include <linux/errno.h>
 #include <linux/io.h>
 #include <linux/interrupt.h>
 #include <linux/platform_device.h>
 #include <linux/pxa2xx_ssp.h>
+#include <linux/scatterlist.h>
+#include <linux/sizes.h>
 #include <linux/spi/spi.h>
 #include <linux/spi/pxa2xx_spi.h>
 
@@ -53,6 +57,16 @@ struct driver_data {
 	/* Message Transfer pump */
 	struct tasklet_struct pump_transfers;
 
+	/* DMA engine support */
+	struct dma_chan *rx_chan;
+	struct dma_chan *tx_chan;
+	struct sg_table rx_sgt;
+	struct sg_table tx_sgt;
+	int rx_nents;
+	int tx_nents;
+	void *dummy;
+	atomic_t dma_running;
+
 	/* Current message transfer state info */
 	struct spi_message *cur_msg;
 	struct spi_transfer *cur_transfer;
@@ -116,7 +130,6 @@ DEFINE_SSP_REG(SSPSP, 0x2c)
 #define DONE_STATE ((void *)2)
 #define ERROR_STATE ((void *)-1)
 
-#define MAX_DMA_LEN		8191
 #define IS_DMA_ALIGNED(x)	IS_ALIGNED((unsigned long)(x), DMA_ALIGNMENT)
 #define DMA_ALIGNMENT		8
 
@@ -142,7 +155,24 @@ static inline void write_SSSR_CS(struct driver_data *drv_data, u32 val)
 extern int pxa2xx_spi_flush(struct driver_data *drv_data);
 extern void *pxa2xx_spi_next_transfer(struct driver_data *drv_data);
 
+/*
+ * Select the right DMA implementation.
+ */
 #if defined(CONFIG_SPI_PXA2XX_PXADMA)
+#define SPI_PXA2XX_USE_DMA	1
+#define MAX_DMA_LEN		8191
+#define DEFAULT_DMA_CR1		(SSCR1_TSRE | SSCR1_RSRE | SSCR1_TINTE)
+#elif defined(CONFIG_SPI_PXA2XX_DMA)
+#define SPI_PXA2XX_USE_DMA	1
+#define MAX_DMA_LEN		SZ_64K
+#define DEFAULT_DMA_CR1		(SSCR1_TSRE | SSCR1_RSRE | SSCR1_TRAIL)
+#else
+#undef SPI_PXA2XX_USE_DMA
+#define MAX_DMA_LEN		0
+#define DEFAULT_DMA_CR1		0
+#endif
+
+#ifdef SPI_PXA2XX_USE_DMA
 extern bool pxa2xx_spi_dma_is_possible(size_t len);
 extern int pxa2xx_spi_map_dma_buffers(struct driver_data *drv_data);
 extern irqreturn_t pxa2xx_spi_dma_transfer(struct driver_data *drv_data);
diff --git a/include/linux/spi/pxa2xx_spi.h b/include/linux/spi/pxa2xx_spi.h
index d6d2b4d557f8..e5cbbc4c57f7 100644
--- a/include/linux/spi/pxa2xx_spi.h
+++ b/include/linux/spi/pxa2xx_spi.h
@@ -29,6 +29,12 @@ struct pxa2xx_spi_master {
 	u16 num_chipselect;
 	u8 enable_dma;
 
+	/* DMA engine specific config */
+	int rx_chan_id;
+	int tx_chan_id;
+	int rx_slave_id;
+	int tx_slave_id;
+
 	/* For non-PXA arches */
 	struct ssp_device ssp;
 };
-- 
cgit v1.2.3


From a0d2642e9296882cda3ad03ff3d9a6649cd70439 Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Tue, 22 Jan 2013 12:26:32 +0200
Subject: spi/pxa2xx: add support for Intel Low Power Subsystem SPI

Intel LPSS SPI is pretty much the same as the PXA27xx SPI except that it
has few additional features over the original:

	o FIFO depth is 256 entries
	o RX FIFO has one watermark
	o TX FIFO has two watermarks, low and high
	o chip select can be controlled by writing to a register

The new FIFO registers follow immediately the PXA27xx registers but then there
are some additional LPSS private registers at offset 1k or 2k from the base
address. For these private registers we add new accessors that take advantage
of drv_data->lpss_base once it is resolved.

We add a new type LPSS_SSP that can be used to distinguish the LPSS devices
from others.

Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Tested-by: Lu Cao <lucao@marvell.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 drivers/spi/spi-pxa2xx.c       | 133 +++++++++++++++++++++++++++++++++++++++--
 drivers/spi/spi-pxa2xx.h       |   6 ++
 include/linux/pxa2xx_ssp.h     |   9 +++
 include/linux/spi/pxa2xx_spi.h |   1 +
 4 files changed, 145 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c
index 8a5ba0aa095a..4bd6b729f710 100644
--- a/drivers/spi/spi-pxa2xx.c
+++ b/drivers/spi/spi-pxa2xx.c
@@ -1,5 +1,6 @@
 /*
  * Copyright (C) 2005 Stephen Street / StreetFire Sound Labs
+ * Copyright (C) 2013, Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -61,6 +62,98 @@ MODULE_ALIAS("platform:pxa2xx-spi");
 				| SSCR1_RFT | SSCR1_TFT | SSCR1_MWDS \
 				| SSCR1_SPH | SSCR1_SPO | SSCR1_LBM)
 
+#define LPSS_RX_THRESH_DFLT	64
+#define LPSS_TX_LOTHRESH_DFLT	160
+#define LPSS_TX_HITHRESH_DFLT	224
+
+/* Offset from drv_data->lpss_base */
+#define SPI_CS_CONTROL		0x18
+#define SPI_CS_CONTROL_SW_MODE	BIT(0)
+#define SPI_CS_CONTROL_CS_HIGH	BIT(1)
+
+static bool is_lpss_ssp(const struct driver_data *drv_data)
+{
+	return drv_data->ssp_type == LPSS_SSP;
+}
+
+/*
+ * Read and write LPSS SSP private registers. Caller must first check that
+ * is_lpss_ssp() returns true before these can be called.
+ */
+static u32 __lpss_ssp_read_priv(struct driver_data *drv_data, unsigned offset)
+{
+	WARN_ON(!drv_data->lpss_base);
+	return readl(drv_data->lpss_base + offset);
+}
+
+static void __lpss_ssp_write_priv(struct driver_data *drv_data,
+				  unsigned offset, u32 value)
+{
+	WARN_ON(!drv_data->lpss_base);
+	writel(value, drv_data->lpss_base + offset);
+}
+
+/*
+ * lpss_ssp_setup - perform LPSS SSP specific setup
+ * @drv_data: pointer to the driver private data
+ *
+ * Perform LPSS SSP specific setup. This function must be called first if
+ * one is going to use LPSS SSP private registers.
+ */
+static void lpss_ssp_setup(struct driver_data *drv_data)
+{
+	unsigned offset = 0x400;
+	u32 value, orig;
+
+	if (!is_lpss_ssp(drv_data))
+		return;
+
+	/*
+	 * Perform auto-detection of the LPSS SSP private registers. They
+	 * can be either at 1k or 2k offset from the base address.
+	 */
+	orig = readl(drv_data->ioaddr + offset + SPI_CS_CONTROL);
+
+	value = orig | SPI_CS_CONTROL_SW_MODE;
+	writel(value, drv_data->ioaddr + offset + SPI_CS_CONTROL);
+	value = readl(drv_data->ioaddr + offset + SPI_CS_CONTROL);
+	if (value != (orig | SPI_CS_CONTROL_SW_MODE)) {
+		offset = 0x800;
+		goto detection_done;
+	}
+
+	value &= ~SPI_CS_CONTROL_SW_MODE;
+	writel(value, drv_data->ioaddr + offset + SPI_CS_CONTROL);
+	value = readl(drv_data->ioaddr + offset + SPI_CS_CONTROL);
+	if (value != orig) {
+		offset = 0x800;
+		goto detection_done;
+	}
+
+detection_done:
+	/* Now set the LPSS base */
+	drv_data->lpss_base = drv_data->ioaddr + offset;
+
+	/* Enable software chip select control */
+	value = SPI_CS_CONTROL_SW_MODE | SPI_CS_CONTROL_CS_HIGH;
+	__lpss_ssp_write_priv(drv_data, SPI_CS_CONTROL, value);
+}
+
+static void lpss_ssp_cs_control(struct driver_data *drv_data, bool enable)
+{
+	u32 value;
+
+	if (!is_lpss_ssp(drv_data))
+		return;
+
+	value = __lpss_ssp_read_priv(drv_data, SPI_CS_CONTROL);
+	if (enable)
+		value &= ~SPI_CS_CONTROL_CS_HIGH;
+	else
+		value |= SPI_CS_CONTROL_CS_HIGH;
+	__lpss_ssp_write_priv(drv_data, SPI_CS_CONTROL, value);
+}
+
 static void cs_assert(struct driver_data *drv_data)
 {
 	struct chip_data *chip = drv_data->cur_chip;
@@ -75,8 +168,12 @@ static void cs_assert(struct driver_data *drv_data)
 		return;
 	}
 
-	if (gpio_is_valid(chip->gpio_cs))
+	if (gpio_is_valid(chip->gpio_cs)) {
 		gpio_set_value(chip->gpio_cs, chip->gpio_cs_inverted);
+		return;
+	}
+
+	lpss_ssp_cs_control(drv_data, true);
 }
 
 static void cs_deassert(struct driver_data *drv_data)
@@ -91,8 +188,12 @@ static void cs_deassert(struct driver_data *drv_data)
 		return;
 	}
 
-	if (gpio_is_valid(chip->gpio_cs))
+	if (gpio_is_valid(chip->gpio_cs)) {
 		gpio_set_value(chip->gpio_cs, !chip->gpio_cs_inverted);
+		return;
+	}
+
+	lpss_ssp_cs_control(drv_data, false);
 }
 
 int pxa2xx_spi_flush(struct driver_data *drv_data)
@@ -642,6 +743,13 @@ static void pump_transfers(unsigned long data)
 		write_SSSR_CS(drv_data, drv_data->clear_sr);
 	}
 
+	if (is_lpss_ssp(drv_data)) {
+		if ((read_SSIRF(reg) & 0xff) != chip->lpss_rx_threshold)
+			write_SSIRF(chip->lpss_rx_threshold, reg);
+		if ((read_SSITF(reg) & 0xffff) != chip->lpss_tx_threshold)
+			write_SSITF(chip->lpss_tx_threshold, reg);
+	}
+
 	/* see if we need to reload the config registers */
 	if ((read_SSCR0(reg) != cr0)
 		|| (read_SSCR1(reg) & SSCR1_CHANGE_MASK) !=
@@ -754,8 +862,17 @@ static int setup(struct spi_device *spi)
 	struct chip_data *chip;
 	struct driver_data *drv_data = spi_master_get_devdata(spi->master);
 	unsigned int clk_div;
-	uint tx_thres = TX_THRESH_DFLT;
-	uint rx_thres = RX_THRESH_DFLT;
+	uint tx_thres, tx_hi_thres, rx_thres;
+
+	if (is_lpss_ssp(drv_data)) {
+		tx_thres = LPSS_TX_LOTHRESH_DFLT;
+		tx_hi_thres = LPSS_TX_HITHRESH_DFLT;
+		rx_thres = LPSS_RX_THRESH_DFLT;
+	} else {
+		tx_thres = TX_THRESH_DFLT;
+		tx_hi_thres = 0;
+		rx_thres = RX_THRESH_DFLT;
+	}
 
 	if (!pxa25x_ssp_comp(drv_data)
 		&& (spi->bits_per_word < 4 || spi->bits_per_word > 32)) {
@@ -808,6 +925,8 @@ static int setup(struct spi_device *spi)
 			chip->timeout = chip_info->timeout;
 		if (chip_info->tx_threshold)
 			tx_thres = chip_info->tx_threshold;
+		if (chip_info->tx_hi_threshold)
+			tx_hi_thres = chip_info->tx_hi_threshold;
 		if (chip_info->rx_threshold)
 			rx_thres = chip_info->rx_threshold;
 		chip->enable_dma = drv_data->master_info->enable_dma;
@@ -819,6 +938,10 @@ static int setup(struct spi_device *spi)
 	chip->threshold = (SSCR1_RxTresh(rx_thres) & SSCR1_RFT) |
 			(SSCR1_TxTresh(tx_thres) & SSCR1_TFT);
 
+	chip->lpss_rx_threshold = SSIRF_RxThresh(rx_thres);
+	chip->lpss_tx_threshold = SSITF_TxLoThresh(tx_thres)
+				| SSITF_TxHiThresh(tx_hi_thres);
+
 	/* set dma burst and threshold outside of chip_info path so that if
 	 * chip_info goes away after setting chip->enable_dma, the
 	 * burst and threshold can still respond to changes in bits_per_word */
@@ -1006,6 +1129,8 @@ static int pxa2xx_spi_probe(struct platform_device *pdev)
 		write_SSTO(0, drv_data->ioaddr);
 	write_SSPSP(0, drv_data->ioaddr);
 
+	lpss_ssp_setup(drv_data);
+
 	tasklet_init(&drv_data->pump_transfers, pump_transfers,
 		     (unsigned long)drv_data);
 
diff --git a/drivers/spi/spi-pxa2xx.h b/drivers/spi/spi-pxa2xx.h
index 97ff4717e6ac..5adc2a11c7bc 100644
--- a/drivers/spi/spi-pxa2xx.h
+++ b/drivers/spi/spi-pxa2xx.h
@@ -86,6 +86,8 @@ struct driver_data {
 	int (*read)(struct driver_data *drv_data);
 	irqreturn_t (*transfer_handler)(struct driver_data *drv_data);
 	void (*cs_control)(u32 command);
+
+	void __iomem *lpss_base;
 };
 
 struct chip_data {
@@ -97,6 +99,8 @@ struct chip_data {
 	u32 dma_burst_size;
 	u32 threshold;
 	u32 dma_threshold;
+	u16 lpss_rx_threshold;
+	u16 lpss_tx_threshold;
 	u8 enable_dma;
 	u8 bits_per_word;
 	u32 speed_hz;
@@ -124,6 +128,8 @@ DEFINE_SSP_REG(SSITR, 0x0c)
 DEFINE_SSP_REG(SSDR, 0x10)
 DEFINE_SSP_REG(SSTO, 0x28)
 DEFINE_SSP_REG(SSPSP, 0x2c)
+DEFINE_SSP_REG(SSITF, SSITF)
+DEFINE_SSP_REG(SSIRF, SSIRF)
 
 #define START_STATE ((void *)0)
 #define RUNNING_STATE ((void *)1)
diff --git a/include/linux/pxa2xx_ssp.h b/include/linux/pxa2xx_ssp.h
index 065e7f6c3ad7..467cc6307b62 100644
--- a/include/linux/pxa2xx_ssp.h
+++ b/include/linux/pxa2xx_ssp.h
@@ -155,6 +155,14 @@
 #define SSACD_ACDS(x)		((x) << 0)	/* Audio clock divider select */
 #define SSACD_SCDX8		(1 << 7)	/* SYSCLK division ratio select */
 
+/* LPSS SSP */
+#define SSITF			0x44		/* TX FIFO trigger level */
+#define SSITF_TxLoThresh(x)	(((x) - 1) << 8)
+#define SSITF_TxHiThresh(x)	((x) - 1)
+
+#define SSIRF			0x48		/* RX FIFO trigger level */
+#define SSIRF_RxThresh(x)	((x) - 1)
+
 enum pxa_ssp_type {
 	SSP_UNDEFINED = 0,
 	PXA25x_SSP,  /* pxa 210, 250, 255, 26x */
@@ -164,6 +172,7 @@ enum pxa_ssp_type {
 	PXA168_SSP,
 	PXA910_SSP,
 	CE4100_SSP,
+	LPSS_SSP,
 };
 
 struct ssp_device {
diff --git a/include/linux/spi/pxa2xx_spi.h b/include/linux/spi/pxa2xx_spi.h
index e5cbbc4c57f7..82d5111cd0c2 100644
--- a/include/linux/spi/pxa2xx_spi.h
+++ b/include/linux/spi/pxa2xx_spi.h
@@ -44,6 +44,7 @@ struct pxa2xx_spi_master {
  */
 struct pxa2xx_spi_chip {
 	u8 tx_threshold;
+	u8 tx_hi_threshold;
 	u8 rx_threshold;
 	u8 dma_burst_size;
 	u32 timeout;
-- 
cgit v1.2.3


From fe20d71f25400cccc8bffef865f79250be7dbc81 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Wed, 21 Nov 2012 17:32:30 +0100
Subject: uprobes: Kill uprobe_consumer->filter()

uprobe_consumer->filter() is pointless in its current form, kill it.

We will add it back, but with the different signature/semantics. Perhaps
we will even re-introduce the callsite in handler_chain(), but not to
just skip uc->handler().

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
---
 include/linux/uprobes.h     | 5 -----
 kernel/events/uprobes.c     | 6 ++----
 kernel/trace/trace_uprobe.c | 1 -
 3 files changed, 2 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index 4f628a6fc5b4..83742b91ff73 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -37,11 +37,6 @@ struct inode;
 
 struct uprobe_consumer {
 	int (*handler)(struct uprobe_consumer *self, struct pt_regs *regs);
-	/*
-	 * filter is optional; If a filter exists, handler is run
-	 * if and only if filter returns true.
-	 */
-	bool (*filter)(struct uprobe_consumer *self, struct task_struct *task);
 
 	struct uprobe_consumer *next;
 };
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index a39d8163b713..5cbebac27c01 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -477,10 +477,8 @@ static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs)
 		return;
 
 	down_read(&uprobe->consumer_rwsem);
-	for (uc = uprobe->consumers; uc; uc = uc->next) {
-		if (!uc->filter || uc->filter(uc, current))
-			uc->handler(uc, regs);
-	}
+	for (uc = uprobe->consumers; uc; uc = uc->next)
+		uc->handler(uc, regs);
 	up_read(&uprobe->consumer_rwsem);
 }
 
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index 87b6db4ccbc5..e668024773d4 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -550,7 +550,6 @@ static int probe_event_enable(struct trace_uprobe *tu, int flag)
 		return -EINTR;
 
 	utc->cons.handler = uprobe_dispatcher;
-	utc->cons.filter = NULL;
 	ret = uprobe_register(tu->inode, tu->offset, &utc->cons);
 	if (ret) {
 		kfree(utc);
-- 
cgit v1.2.3


From 8a7f2fa0dea3b019500961b86d765e6fdd4bffb2 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Fri, 28 Dec 2012 17:58:38 +0100
Subject: uprobes: Reintroduce uprobe_consumer->filter()

Finally add uprobe_consumer->filter() and change consumer_filter()
to actually call this method.

Note that ->filter() accepts mm_struct, not task_struct. Because:

	1. We do not have for_each_mm_user(mm, task).

	2. Even if we implement for_each_mm_user(), ->filter() can
	   use it itself.

	3. It is not clear who will actually need this interface to
	   do the "nontrivial" filtering.

Another argument is "enum uprobe_filter_ctx", consumer->filter() can
use it to figure out why/where it was called. For example, perhaps
we can add UPROBE_FILTER_PRE_REGISTER used by build_map_info() to
quickly "nack" the unwanted mm's. In this case consumer should know
that it is called under ->i_mmap_mutex.

See the previous discussion at http://marc.info/?t=135214229700002
Perhaps we should pass more arguments, vma/vaddr?

Note: this patch obviously can't help to filter out the child created
by fork(), this will be addressed later.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
---
 include/linux/uprobes.h |  9 +++++++++
 kernel/events/uprobes.c | 18 +++++++++++-------
 2 files changed, 20 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index 83742b91ff73..c2df6934fdc6 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -35,8 +35,17 @@ struct inode;
 # include <asm/uprobes.h>
 #endif
 
+enum uprobe_filter_ctx {
+	UPROBE_FILTER_REGISTER,
+	UPROBE_FILTER_UNREGISTER,
+	UPROBE_FILTER_MMAP,
+};
+
 struct uprobe_consumer {
 	int (*handler)(struct uprobe_consumer *self, struct pt_regs *regs);
+	bool (*filter)(struct uprobe_consumer *self,
+				enum uprobe_filter_ctx ctx,
+				struct mm_struct *mm);
 
 	struct uprobe_consumer *next;
 };
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 33912086d54e..c2737be3c4b8 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -579,19 +579,21 @@ static int prepare_uprobe(struct uprobe *uprobe, struct file *file,
 	return ret;
 }
 
-static inline bool consumer_filter(struct uprobe_consumer *uc)
+static inline bool consumer_filter(struct uprobe_consumer *uc,
+				   enum uprobe_filter_ctx ctx, struct mm_struct *mm)
 {
-	return true; /* TODO: !uc->filter || uc->filter(...) */
+	return !uc->filter || uc->filter(uc, ctx, mm);
 }
 
-static bool filter_chain(struct uprobe *uprobe)
+static bool filter_chain(struct uprobe *uprobe,
+			 enum uprobe_filter_ctx ctx, struct mm_struct *mm)
 {
 	struct uprobe_consumer *uc;
 	bool ret = false;
 
 	down_read(&uprobe->consumer_rwsem);
 	for (uc = uprobe->consumers; uc; uc = uc->next) {
-		ret = consumer_filter(uc);
+		ret = consumer_filter(uc, ctx, mm);
 		if (ret)
 			break;
 	}
@@ -772,10 +774,12 @@ static int register_for_each_vma(struct uprobe *uprobe, bool is_register)
 
 		if (is_register) {
 			/* consult only the "caller", new consumer. */
-			if (consumer_filter(uprobe->consumers))
+			if (consumer_filter(uprobe->consumers,
+					UPROBE_FILTER_REGISTER, mm))
 				err = install_breakpoint(uprobe, mm, vma, info->vaddr);
 		} else if (test_bit(MMF_HAS_UPROBES, &mm->flags)) {
-			if (!filter_chain(uprobe))
+			if (!filter_chain(uprobe,
+					UPROBE_FILTER_UNREGISTER, mm))
 				err |= remove_breakpoint(uprobe, mm, info->vaddr);
 		}
 
@@ -968,7 +972,7 @@ int uprobe_mmap(struct vm_area_struct *vma)
 	 */
 	list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) {
 		if (!fatal_signal_pending(current) &&
-		    filter_chain(uprobe)) {
+		    filter_chain(uprobe, UPROBE_FILTER_MMAP, vma->vm_mm)) {
 			unsigned long vaddr = offset_to_vaddr(vma, uprobe->offset);
 			install_breakpoint(uprobe, vma->vm_mm, vma, vaddr);
 		}
-- 
cgit v1.2.3


From da1816b1caeccdff04531e763bb35d7caa3ed19f Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Sat, 29 Dec 2012 17:49:11 +0100
Subject: uprobes: Teach handler_chain() to filter out the probed task

Currrently the are 2 problems with pre-filtering:

1. It is not possible to add/remove a task (mm) after uprobe_register()

2. A forked child inherits all breakpoints and uprobe_consumer can not
   control this.

This patch does the first step to improve the filtering. handler_chain()
removes the breakpoints installed by this uprobe from current->mm if all
handlers return UPROBE_HANDLER_REMOVE.

Note that handler_chain() relies on ->register_rwsem to avoid the race
with uprobe_register/unregister which can add/del a consumer, or even
remove and then insert the new uprobe at the same address.

Perhaps we will add uprobe_apply_mm(uprobe, mm, is_register) and teach
copy_mm() to do filter(UPROBE_FILTER_FORK), but I think this change makes
sense anyway.

Note: instead of checking the retcode from uc->handler, we could add
uc->filter(UPROBE_FILTER_BPHIT). But I think this is not optimal to
call 2 hooks in a row. This buys nothing, and if handler/filter do
something nontrivial they will probably do the same work twice.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
---
 include/linux/uprobes.h |  3 +++
 kernel/events/uprobes.c | 58 ++++++++++++++++++++++++++++++++++++++++---------
 2 files changed, 51 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index c2df6934fdc6..95d0002efda5 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -35,6 +35,9 @@ struct inode;
 # include <asm/uprobes.h>
 #endif
 
+#define UPROBE_HANDLER_REMOVE		1
+#define UPROBE_HANDLER_MASK		1
+
 enum uprobe_filter_ctx {
 	UPROBE_FILTER_REGISTER,
 	UPROBE_FILTER_UNREGISTER,
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index c2737be3c4b8..04c104ad9522 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -440,16 +440,6 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset)
 	return uprobe;
 }
 
-static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs)
-{
-	struct uprobe_consumer *uc;
-
-	down_read(&uprobe->register_rwsem);
-	for (uc = uprobe->consumers; uc; uc = uc->next)
-		uc->handler(uc, regs);
-	up_read(&uprobe->register_rwsem);
-}
-
 static void consumer_add(struct uprobe *uprobe, struct uprobe_consumer *uc)
 {
 	down_write(&uprobe->consumer_rwsem);
@@ -882,6 +872,33 @@ void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consume
 	put_uprobe(uprobe);
 }
 
+static int unapply_uprobe(struct uprobe *uprobe, struct mm_struct *mm)
+{
+	struct vm_area_struct *vma;
+	int err = 0;
+
+	down_read(&mm->mmap_sem);
+	for (vma = mm->mmap; vma; vma = vma->vm_next) {
+		unsigned long vaddr;
+		loff_t offset;
+
+		if (!valid_vma(vma, false) ||
+		    vma->vm_file->f_mapping->host != uprobe->inode)
+			continue;
+
+		offset = (loff_t)vma->vm_pgoff << PAGE_SHIFT;
+		if (uprobe->offset <  offset ||
+		    uprobe->offset >= offset + vma->vm_end - vma->vm_start)
+			continue;
+
+		vaddr = offset_to_vaddr(vma, uprobe->offset);
+		err |= remove_breakpoint(uprobe, mm, vaddr);
+	}
+	up_read(&mm->mmap_sem);
+
+	return err;
+}
+
 static struct rb_node *
 find_node_in_range(struct inode *inode, loff_t min, loff_t max)
 {
@@ -1435,6 +1452,27 @@ static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp)
 	return uprobe;
 }
 
+static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs)
+{
+	struct uprobe_consumer *uc;
+	int remove = UPROBE_HANDLER_REMOVE;
+
+	down_read(&uprobe->register_rwsem);
+	for (uc = uprobe->consumers; uc; uc = uc->next) {
+		int rc = uc->handler(uc, regs);
+
+		WARN(rc & ~UPROBE_HANDLER_MASK,
+			"bad rc=0x%x from %pf()\n", rc, uc->handler);
+		remove &= rc;
+	}
+
+	if (remove && uprobe->consumers) {
+		WARN_ON(!uprobe_is_active(uprobe));
+		unapply_uprobe(uprobe, current->mm);
+	}
+	up_read(&uprobe->register_rwsem);
+}
+
 /*
  * Run handler and ask thread to singlestep.
  * Ensure all non-fatal signals cannot interrupt thread while it singlesteps.
-- 
cgit v1.2.3


From f22c1bb6b4706be3502b378cb14564449b15f983 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Sat, 2 Feb 2013 16:27:52 +0100
Subject: perf: Introduce hw_perf_event->tp_target and ->tp_list

sys_perf_event_open()->perf_init_event(event) is called before
find_get_context(event), this means that event->ctx == NULL when
class->reg(TRACE_REG_PERF_REGISTER/OPEN) is called and thus it
can't know if this event is per-task or system-wide.

This patch adds hw_perf_event->tp_target for PERF_TYPE_TRACEPOINT,
this is analogous to PERF_TYPE_BREAKPOINT/bp_target we already have.
The patch also moves ->bp_target up so that it can overlap with the
new member, this can help the compiler to generate the better code.

trace_uprobe_register() will use it for prefiltering to avoid the
unnecessary breakpoints in mm's we do not want to trace.

->tp_target doesn't have its own reference, but we can rely on the
fact that either sys_perf_event_open() holds a reference, or it is
equal to event->ctx->task. So this pointer is always valid until
free_event().

Also add the "struct list_head tp_list" into this union. It is not
strictly necessary, but it can simplify the next changes and we can
add it for free.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
---
 include/linux/perf_event.h | 9 +++++++--
 kernel/events/core.c       | 5 ++++-
 2 files changed, 11 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 42adf012145d..e47ee462c2f2 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -135,16 +135,21 @@ struct hw_perf_event {
 		struct { /* software */
 			struct hrtimer	hrtimer;
 		};
+		struct { /* tracepoint */
+			struct task_struct	*tp_target;
+			/* for tp_event->class */
+			struct list_head	tp_list;
+		};
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
 		struct { /* breakpoint */
-			struct arch_hw_breakpoint	info;
-			struct list_head		bp_list;
 			/*
 			 * Crufty hack to avoid the chicken and egg
 			 * problem hw_breakpoint has with context
 			 * creation and event initalization.
 			 */
 			struct task_struct		*bp_target;
+			struct arch_hw_breakpoint	info;
+			struct list_head		bp_list;
 		};
 #endif
 	};
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 301079d06f24..e2d4323c6ae6 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -6162,11 +6162,14 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
 
 	if (task) {
 		event->attach_state = PERF_ATTACH_TASK;
+
+		if (attr->type == PERF_TYPE_TRACEPOINT)
+			event->hw.tp_target = task;
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
 		/*
 		 * hw_breakpoint is a bit difficult here..
 		 */
-		if (attr->type == PERF_TYPE_BREAKPOINT)
+		else if (attr->type == PERF_TYPE_BREAKPOINT)
 			event->hw.bp_target = task;
 #endif
 	}
-- 
cgit v1.2.3


From bdf8647c44766590ed02f9a84a450a796558b753 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Sun, 3 Feb 2013 19:21:12 +0100
Subject: uprobes: Introduce uprobe_apply()

Currently it is not possible to change the filtering constraints after
uprobe_register(), so a consumer can not, say, start to trace a task/mm
which was previously filtered out, or remove the no longer needed bp's.

Introduce uprobe_apply() which simply does register_for_each_vma() again
to consult uprobe_consumer->filter() and install/remove the breakpoints.
The only complication is that register_for_each_vma() can no longer
assume that uprobe->consumers should be consulter if is_register == T,
so we change it to accept "struct uprobe_consumer *new" instead.

Unlike uprobe_register(), uprobe_apply(true) doesn't do "unregister" if
register_for_each_vma() fails, it is up to caller to handle the error.

Note: we probably need to cleanup the current interface, it is strange
that uprobe_apply/unregister need inode/offset. We should either change
uprobe_register() to return "struct uprobe *", or add a private ->uprobe
member in uprobe_consumer. And in the long term uprobe_apply() should
take a single argument, uprobe or consumer, even "bool add" should go
away.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
---
 include/linux/uprobes.h |  6 ++++++
 kernel/events/uprobes.c | 39 +++++++++++++++++++++++++++++++++++----
 2 files changed, 41 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index 95d0002efda5..02b83db8e2c5 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -101,6 +101,7 @@ extern int __weak set_swbp(struct arch_uprobe *aup, struct mm_struct *mm, unsign
 extern int __weak set_orig_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr);
 extern bool __weak is_swbp_insn(uprobe_opcode_t *insn);
 extern int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc);
+extern int uprobe_apply(struct inode *inode, loff_t offset, struct uprobe_consumer *uc, bool);
 extern void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc);
 extern int uprobe_mmap(struct vm_area_struct *vma);
 extern void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end);
@@ -124,6 +125,11 @@ uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc)
 {
 	return -ENOSYS;
 }
+static inline int
+uprobe_apply(struct inode *inode, loff_t offset, struct uprobe_consumer *uc, bool add)
+{
+	return -ENOSYS;
+}
 static inline void
 uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc)
 {
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 221fc58f59e3..a567c8c7ef31 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -733,8 +733,10 @@ build_map_info(struct address_space *mapping, loff_t offset, bool is_register)
 	return curr;
 }
 
-static int register_for_each_vma(struct uprobe *uprobe, bool is_register)
+static int
+register_for_each_vma(struct uprobe *uprobe, struct uprobe_consumer *new)
 {
+	bool is_register = !!new;
 	struct map_info *info;
 	int err = 0;
 
@@ -765,7 +767,7 @@ static int register_for_each_vma(struct uprobe *uprobe, bool is_register)
 
 		if (is_register) {
 			/* consult only the "caller", new consumer. */
-			if (consumer_filter(uprobe->consumers,
+			if (consumer_filter(new,
 					UPROBE_FILTER_REGISTER, mm))
 				err = install_breakpoint(uprobe, mm, vma, info->vaddr);
 		} else if (test_bit(MMF_HAS_UPROBES, &mm->flags)) {
@@ -788,7 +790,7 @@ static int register_for_each_vma(struct uprobe *uprobe, bool is_register)
 static int __uprobe_register(struct uprobe *uprobe, struct uprobe_consumer *uc)
 {
 	consumer_add(uprobe, uc);
-	return register_for_each_vma(uprobe, true);
+	return register_for_each_vma(uprobe, uc);
 }
 
 static void __uprobe_unregister(struct uprobe *uprobe, struct uprobe_consumer *uc)
@@ -798,7 +800,7 @@ static void __uprobe_unregister(struct uprobe *uprobe, struct uprobe_consumer *u
 	if (!consumer_del(uprobe, uc))	/* WARN? */
 		return;
 
-	err = register_for_each_vma(uprobe, false);
+	err = register_for_each_vma(uprobe, NULL);
 	/* TODO : cant unregister? schedule a worker thread */
 	if (!uprobe->consumers && !err)
 		delete_uprobe(uprobe);
@@ -854,6 +856,35 @@ int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *
 }
 EXPORT_SYMBOL_GPL(uprobe_register);
 
+/*
+ * uprobe_apply - unregister a already registered probe.
+ * @inode: the file in which the probe has to be removed.
+ * @offset: offset from the start of the file.
+ * @uc: consumer which wants to add more or remove some breakpoints
+ * @add: add or remove the breakpoints
+ */
+int uprobe_apply(struct inode *inode, loff_t offset,
+			struct uprobe_consumer *uc, bool add)
+{
+	struct uprobe *uprobe;
+	struct uprobe_consumer *con;
+	int ret = -ENOENT;
+
+	uprobe = find_uprobe(inode, offset);
+	if (!uprobe)
+		return ret;
+
+	down_write(&uprobe->register_rwsem);
+	for (con = uprobe->consumers; con && con != uc ; con = con->next)
+		;
+	if (con)
+		ret = register_for_each_vma(uprobe, add ? uc : NULL);
+	up_write(&uprobe->register_rwsem);
+	put_uprobe(uprobe);
+
+	return ret;
+}
+
 /*
  * uprobe_unregister - unregister a already registered probe.
  * @inode: the file in which the probe has to be removed.
-- 
cgit v1.2.3


From e5e67305885eb12849b5475764b0542f03dc2b59 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Fri, 8 Feb 2013 10:17:15 +0000
Subject: skbuff: Move definition of NETDEV_FRAG_PAGE_MAX_SIZE

In order to address the fact that some devices cannot support the full 32K
frag size we need to have the value accessible somewhere so that we can use it
to do comparisons against what the device can support.  As such I am moving
the values out of skbuff.c and into skbuff.h.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 4 ++++
 net/core/skbuff.c      | 4 ----
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 0259b719bebf..d7573c37a51d 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1832,6 +1832,10 @@ static inline void __skb_queue_purge(struct sk_buff_head *list)
 		kfree_skb(skb);
 }
 
+#define NETDEV_FRAG_PAGE_MAX_ORDER get_order(32768)
+#define NETDEV_FRAG_PAGE_MAX_SIZE  (PAGE_SIZE << NETDEV_FRAG_PAGE_MAX_ORDER)
+#define NETDEV_PAGECNT_MAX_BIAS	   NETDEV_FRAG_PAGE_MAX_SIZE
+
 extern void *netdev_alloc_frag(unsigned int fragsz);
 
 extern struct sk_buff *__netdev_alloc_skb(struct net_device *dev,
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 55f7ef6ada6d..6114c1143564 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -351,10 +351,6 @@ struct netdev_alloc_cache {
 };
 static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache);
 
-#define NETDEV_FRAG_PAGE_MAX_ORDER get_order(32768)
-#define NETDEV_FRAG_PAGE_MAX_SIZE  (PAGE_SIZE << NETDEV_FRAG_PAGE_MAX_ORDER)
-#define NETDEV_PAGECNT_MAX_BIAS	   NETDEV_FRAG_PAGE_MAX_SIZE
-
 static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
 {
 	struct netdev_alloc_cache *nc;
-- 
cgit v1.2.3


From d6d3c4e656513dcea61ce900f0ecb9ca820ee7cd Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 6 Feb 2013 15:30:56 -0500
Subject: OF: convert devtree lock from rw_lock to raw spinlock

With the locking cleanup in place (from "OF: Fixup resursive
locking code paths"), we can now do the conversion from the
rw_lock to a raw spinlock as required for preempt-rt.

The previous cleanup and this conversion were originally
separate since they predated when mainline got raw spinlock (in
commit c2f21ce2e31286a "locking: Implement new raw_spinlock").

So, at that point in time, the cleanup was considered plausible
for mainline, but not this conversion.  In any case, we've kept
them separate as it makes for easier review and better bisection.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
[PG: taken from preempt-rt, update subject & add a commit log]
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: Rob Herring <rob.herring@calxeda.com>
---
 arch/sparc/kernel/prom_common.c |   4 +-
 drivers/of/base.c               | 100 ++++++++++++++++++++++------------------
 include/linux/of.h              |   2 +-
 3 files changed, 59 insertions(+), 47 deletions(-)

(limited to 'include/linux')

diff --git a/arch/sparc/kernel/prom_common.c b/arch/sparc/kernel/prom_common.c
index 1303021748c8..9f20566b0773 100644
--- a/arch/sparc/kernel/prom_common.c
+++ b/arch/sparc/kernel/prom_common.c
@@ -64,7 +64,7 @@ int of_set_property(struct device_node *dp, const char *name, void *val, int len
 	err = -ENODEV;
 
 	mutex_lock(&of_set_property_mutex);
-	write_lock(&devtree_lock);
+	raw_spin_lock(&devtree_lock);
 	prevp = &dp->properties;
 	while (*prevp) {
 		struct property *prop = *prevp;
@@ -91,7 +91,7 @@ int of_set_property(struct device_node *dp, const char *name, void *val, int len
 		}
 		prevp = &(*prevp)->next;
 	}
-	write_unlock(&devtree_lock);
+	raw_spin_unlock(&devtree_lock);
 	mutex_unlock(&of_set_property_mutex);
 
 	/* XXX Upate procfs if necessary... */
diff --git a/drivers/of/base.c b/drivers/of/base.c
index 16ee7a08e044..f86be5594a15 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -55,7 +55,7 @@ static DEFINE_MUTEX(of_aliases_mutex);
 /* use when traversing tree through the allnext, child, sibling,
  * or parent members of struct device_node.
  */
-DEFINE_RWLOCK(devtree_lock);
+DEFINE_RAW_SPINLOCK(devtree_lock);
 
 int of_n_addr_cells(struct device_node *np)
 {
@@ -188,10 +188,11 @@ struct property *of_find_property(const struct device_node *np,
 				  int *lenp)
 {
 	struct property *pp;
+	unsigned long flags;
 
-	read_lock(&devtree_lock);
+	raw_spin_lock_irqsave(&devtree_lock, flags);
 	pp = __of_find_property(np, name, lenp);
-	read_unlock(&devtree_lock);
+	raw_spin_unlock_irqrestore(&devtree_lock, flags);
 
 	return pp;
 }
@@ -209,13 +210,13 @@ struct device_node *of_find_all_nodes(struct device_node *prev)
 {
 	struct device_node *np;
 
-	read_lock(&devtree_lock);
+	raw_spin_lock(&devtree_lock);
 	np = prev ? prev->allnext : of_allnodes;
 	for (; np != NULL; np = np->allnext)
 		if (of_node_get(np))
 			break;
 	of_node_put(prev);
-	read_unlock(&devtree_lock);
+	raw_spin_unlock(&devtree_lock);
 	return np;
 }
 EXPORT_SYMBOL(of_find_all_nodes);
@@ -274,11 +275,12 @@ static int __of_device_is_compatible(const struct device_node *device,
 int of_device_is_compatible(const struct device_node *device,
 		const char *compat)
 {
+	unsigned long flags;
 	int res;
 
-	read_lock(&devtree_lock);
+	raw_spin_lock_irqsave(&devtree_lock, flags);
 	res = __of_device_is_compatible(device, compat);
-	read_unlock(&devtree_lock);
+	raw_spin_unlock_irqrestore(&devtree_lock, flags);
 	return res;
 }
 EXPORT_SYMBOL(of_device_is_compatible);
@@ -340,13 +342,14 @@ EXPORT_SYMBOL(of_device_is_available);
 struct device_node *of_get_parent(const struct device_node *node)
 {
 	struct device_node *np;
+	unsigned long flags;
 
 	if (!node)
 		return NULL;
 
-	read_lock(&devtree_lock);
+	raw_spin_lock_irqsave(&devtree_lock, flags);
 	np = of_node_get(node->parent);
-	read_unlock(&devtree_lock);
+	raw_spin_unlock_irqrestore(&devtree_lock, flags);
 	return np;
 }
 EXPORT_SYMBOL(of_get_parent);
@@ -365,14 +368,15 @@ EXPORT_SYMBOL(of_get_parent);
 struct device_node *of_get_next_parent(struct device_node *node)
 {
 	struct device_node *parent;
+	unsigned long flags;
 
 	if (!node)
 		return NULL;
 
-	read_lock(&devtree_lock);
+	raw_spin_lock_irqsave(&devtree_lock, flags);
 	parent = of_node_get(node->parent);
 	of_node_put(node);
-	read_unlock(&devtree_lock);
+	raw_spin_unlock_irqrestore(&devtree_lock, flags);
 	return parent;
 }
 
@@ -388,14 +392,15 @@ struct device_node *of_get_next_child(const struct device_node *node,
 	struct device_node *prev)
 {
 	struct device_node *next;
+	unsigned long flags;
 
-	read_lock(&devtree_lock);
+	raw_spin_lock_irqsave(&devtree_lock, flags);
 	next = prev ? prev->sibling : node->child;
 	for (; next; next = next->sibling)
 		if (of_node_get(next))
 			break;
 	of_node_put(prev);
-	read_unlock(&devtree_lock);
+	raw_spin_unlock_irqrestore(&devtree_lock, flags);
 	return next;
 }
 EXPORT_SYMBOL(of_get_next_child);
@@ -413,7 +418,7 @@ struct device_node *of_get_next_available_child(const struct device_node *node,
 {
 	struct device_node *next;
 
-	read_lock(&devtree_lock);
+	raw_spin_lock(&devtree_lock);
 	next = prev ? prev->sibling : node->child;
 	for (; next; next = next->sibling) {
 		if (!of_device_is_available(next))
@@ -422,7 +427,7 @@ struct device_node *of_get_next_available_child(const struct device_node *node,
 			break;
 	}
 	of_node_put(prev);
-	read_unlock(&devtree_lock);
+	raw_spin_unlock(&devtree_lock);
 	return next;
 }
 EXPORT_SYMBOL(of_get_next_available_child);
@@ -460,14 +465,15 @@ EXPORT_SYMBOL(of_get_child_by_name);
 struct device_node *of_find_node_by_path(const char *path)
 {
 	struct device_node *np = of_allnodes;
+	unsigned long flags;
 
-	read_lock(&devtree_lock);
+	raw_spin_lock_irqsave(&devtree_lock, flags);
 	for (; np; np = np->allnext) {
 		if (np->full_name && (of_node_cmp(np->full_name, path) == 0)
 		    && of_node_get(np))
 			break;
 	}
-	read_unlock(&devtree_lock);
+	raw_spin_unlock_irqrestore(&devtree_lock, flags);
 	return np;
 }
 EXPORT_SYMBOL(of_find_node_by_path);
@@ -487,15 +493,16 @@ struct device_node *of_find_node_by_name(struct device_node *from,
 	const char *name)
 {
 	struct device_node *np;
+	unsigned long flags;
 
-	read_lock(&devtree_lock);
+	raw_spin_lock_irqsave(&devtree_lock, flags);
 	np = from ? from->allnext : of_allnodes;
 	for (; np; np = np->allnext)
 		if (np->name && (of_node_cmp(np->name, name) == 0)
 		    && of_node_get(np))
 			break;
 	of_node_put(from);
-	read_unlock(&devtree_lock);
+	raw_spin_unlock_irqrestore(&devtree_lock, flags);
 	return np;
 }
 EXPORT_SYMBOL(of_find_node_by_name);
@@ -516,15 +523,16 @@ struct device_node *of_find_node_by_type(struct device_node *from,
 	const char *type)
 {
 	struct device_node *np;
+	unsigned long flags;
 
-	read_lock(&devtree_lock);
+	raw_spin_lock_irqsave(&devtree_lock, flags);
 	np = from ? from->allnext : of_allnodes;
 	for (; np; np = np->allnext)
 		if (np->type && (of_node_cmp(np->type, type) == 0)
 		    && of_node_get(np))
 			break;
 	of_node_put(from);
-	read_unlock(&devtree_lock);
+	raw_spin_unlock_irqrestore(&devtree_lock, flags);
 	return np;
 }
 EXPORT_SYMBOL(of_find_node_by_type);
@@ -547,8 +555,9 @@ struct device_node *of_find_compatible_node(struct device_node *from,
 	const char *type, const char *compatible)
 {
 	struct device_node *np;
+	unsigned long flags;
 
-	read_lock(&devtree_lock);
+	raw_spin_lock_irqsave(&devtree_lock, flags);
 	np = from ? from->allnext : of_allnodes;
 	for (; np; np = np->allnext) {
 		if (type
@@ -559,7 +568,7 @@ struct device_node *of_find_compatible_node(struct device_node *from,
 			break;
 	}
 	of_node_put(from);
-	read_unlock(&devtree_lock);
+	raw_spin_unlock_irqrestore(&devtree_lock, flags);
 	return np;
 }
 EXPORT_SYMBOL(of_find_compatible_node);
@@ -581,8 +590,9 @@ struct device_node *of_find_node_with_property(struct device_node *from,
 {
 	struct device_node *np;
 	struct property *pp;
+	unsigned long flags;
 
-	read_lock(&devtree_lock);
+	raw_spin_lock_irqsave(&devtree_lock, flags);
 	np = from ? from->allnext : of_allnodes;
 	for (; np; np = np->allnext) {
 		for (pp = np->properties; pp; pp = pp->next) {
@@ -594,7 +604,7 @@ struct device_node *of_find_node_with_property(struct device_node *from,
 	}
 out:
 	of_node_put(from);
-	read_unlock(&devtree_lock);
+	raw_spin_unlock_irqrestore(&devtree_lock, flags);
 	return np;
 }
 EXPORT_SYMBOL(of_find_node_with_property);
@@ -635,10 +645,11 @@ const struct of_device_id *of_match_node(const struct of_device_id *matches,
 					 const struct device_node *node)
 {
 	const struct of_device_id *match;
+	unsigned long flags;
 
-	read_lock(&devtree_lock);
+	raw_spin_lock_irqsave(&devtree_lock, flags);
 	match = __of_match_node(matches, node);
-	read_unlock(&devtree_lock);
+	raw_spin_unlock_irqrestore(&devtree_lock, flags);
 	return match;
 }
 EXPORT_SYMBOL(of_match_node);
@@ -662,11 +673,12 @@ struct device_node *of_find_matching_node_and_match(struct device_node *from,
 {
 	struct device_node *np;
 	const struct of_device_id *m;
+	unsigned long flags;
 
 	if (match)
 		*match = NULL;
 
-	read_lock(&devtree_lock);
+	raw_spin_lock_irqsave(&devtree_lock, flags);
 	np = from ? from->allnext : of_allnodes;
 	for (; np; np = np->allnext) {
 		m = __of_match_node(matches, np);
@@ -677,7 +689,7 @@ struct device_node *of_find_matching_node_and_match(struct device_node *from,
 		}
 	}
 	of_node_put(from);
-	read_unlock(&devtree_lock);
+	raw_spin_unlock_irqrestore(&devtree_lock, flags);
 	return np;
 }
 EXPORT_SYMBOL(of_find_matching_node_and_match);
@@ -720,12 +732,12 @@ struct device_node *of_find_node_by_phandle(phandle handle)
 {
 	struct device_node *np;
 
-	read_lock(&devtree_lock);
+	raw_spin_lock(&devtree_lock);
 	for (np = of_allnodes; np; np = np->allnext)
 		if (np->phandle == handle)
 			break;
 	of_node_get(np);
-	read_unlock(&devtree_lock);
+	raw_spin_unlock(&devtree_lock);
 	return np;
 }
 EXPORT_SYMBOL(of_find_node_by_phandle);
@@ -1197,18 +1209,18 @@ int of_add_property(struct device_node *np, struct property *prop)
 		return rc;
 
 	prop->next = NULL;
-	write_lock_irqsave(&devtree_lock, flags);
+	raw_spin_lock_irqsave(&devtree_lock, flags);
 	next = &np->properties;
 	while (*next) {
 		if (strcmp(prop->name, (*next)->name) == 0) {
 			/* duplicate ! don't insert it */
-			write_unlock_irqrestore(&devtree_lock, flags);
+			raw_spin_unlock_irqrestore(&devtree_lock, flags);
 			return -1;
 		}
 		next = &(*next)->next;
 	}
 	*next = prop;
-	write_unlock_irqrestore(&devtree_lock, flags);
+	raw_spin_unlock_irqrestore(&devtree_lock, flags);
 
 #ifdef CONFIG_PROC_DEVICETREE
 	/* try to add to proc as well if it was initialized */
@@ -1238,7 +1250,7 @@ int of_remove_property(struct device_node *np, struct property *prop)
 	if (rc)
 		return rc;
 
-	write_lock_irqsave(&devtree_lock, flags);
+	raw_spin_lock_irqsave(&devtree_lock, flags);
 	next = &np->properties;
 	while (*next) {
 		if (*next == prop) {
@@ -1251,7 +1263,7 @@ int of_remove_property(struct device_node *np, struct property *prop)
 		}
 		next = &(*next)->next;
 	}
-	write_unlock_irqrestore(&devtree_lock, flags);
+	raw_spin_unlock_irqrestore(&devtree_lock, flags);
 
 	if (!found)
 		return -ENODEV;
@@ -1291,7 +1303,7 @@ int of_update_property(struct device_node *np, struct property *newprop)
 	if (!oldprop)
 		return of_add_property(np, newprop);
 
-	write_lock_irqsave(&devtree_lock, flags);
+	raw_spin_lock_irqsave(&devtree_lock, flags);
 	next = &np->properties;
 	while (*next) {
 		if (*next == oldprop) {
@@ -1305,7 +1317,7 @@ int of_update_property(struct device_node *np, struct property *newprop)
 		}
 		next = &(*next)->next;
 	}
-	write_unlock_irqrestore(&devtree_lock, flags);
+	raw_spin_unlock_irqrestore(&devtree_lock, flags);
 
 	if (!found)
 		return -ENODEV;
@@ -1378,12 +1390,12 @@ int of_attach_node(struct device_node *np)
 	if (rc)
 		return rc;
 
-	write_lock_irqsave(&devtree_lock, flags);
+	raw_spin_lock_irqsave(&devtree_lock, flags);
 	np->sibling = np->parent->child;
 	np->allnext = of_allnodes;
 	np->parent->child = np;
 	of_allnodes = np;
-	write_unlock_irqrestore(&devtree_lock, flags);
+	raw_spin_unlock_irqrestore(&devtree_lock, flags);
 
 	of_add_proc_dt_entry(np);
 	return 0;
@@ -1426,17 +1438,17 @@ int of_detach_node(struct device_node *np)
 	if (rc)
 		return rc;
 
-	write_lock_irqsave(&devtree_lock, flags);
+	raw_spin_lock_irqsave(&devtree_lock, flags);
 
 	if (of_node_check_flag(np, OF_DETACHED)) {
 		/* someone already detached it */
-		write_unlock_irqrestore(&devtree_lock, flags);
+		raw_spin_unlock_irqrestore(&devtree_lock, flags);
 		return rc;
 	}
 
 	parent = np->parent;
 	if (!parent) {
-		write_unlock_irqrestore(&devtree_lock, flags);
+		raw_spin_unlock_irqrestore(&devtree_lock, flags);
 		return rc;
 	}
 
@@ -1463,7 +1475,7 @@ int of_detach_node(struct device_node *np)
 	}
 
 	of_node_set_flag(np, OF_DETACHED);
-	write_unlock_irqrestore(&devtree_lock, flags);
+	raw_spin_unlock_irqrestore(&devtree_lock, flags);
 
 	of_remove_proc_dt_entry(np);
 	return rc;
diff --git a/include/linux/of.h b/include/linux/of.h
index 5ebcc5c8e423..bb35c423b1f9 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -92,7 +92,7 @@ static inline void of_node_put(struct device_node *node) { }
 extern struct device_node *of_allnodes;
 extern struct device_node *of_chosen;
 extern struct device_node *of_aliases;
-extern rwlock_t devtree_lock;
+extern raw_spinlock_t devtree_lock;
 
 static inline bool of_have_populated_dt(void)
 {
-- 
cgit v1.2.3


From 84e345e4e209cbe796c88fa2ad1732d7121ec100 Mon Sep 17 00:00:00 2001
From: Prarit Bhargava <prarit@redhat.com>
Date: Fri, 8 Feb 2013 17:59:53 -0500
Subject: time, Fix setting of hardware clock in NTP code

At init time, if the system time is "warped" forward in warp_clock()
it will differ from the hardware clock by sys_tz.tz_minuteswest.  This time
difference is not taken into account when ntp updates the hardware clock,
and this causes the system time to jump forward by this offset every reboot.

The kernel must take this offset into account when writing the system time
to the hardware clock in the ntp code.  This patch adds
persistent_clock_is_local which indicates that an offset has been applied
in warp_clock() and accounts for the "warp" before writing the hardware
clock.

x86 does not have this problem as rtc writes are software limited to a
+/-15 minute window relative to the current rtc time.  Other arches, such
as powerpc, however do a full synchronization of the system time to the
rtc and will see this problem.

[v2]: generated against tip/timers/core

Signed-off-by: Prarit Bhargava <prarit@redhat.com>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 include/linux/time.h | 1 +
 kernel/time.c        | 8 ++++++++
 kernel/time/ntp.c    | 8 ++++++--
 3 files changed, 15 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/time.h b/include/linux/time.h
index 476e1d7b2c37..a3ab6a814a9c 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -128,6 +128,7 @@ static inline bool has_persistent_clock(void)
 
 extern void read_persistent_clock(struct timespec *ts);
 extern void read_boot_clock(struct timespec *ts);
+extern int persistent_clock_is_local;
 extern int update_persistent_clock(struct timespec now);
 void timekeeping_init(void);
 extern int timekeeping_suspended;
diff --git a/kernel/time.c b/kernel/time.c
index d226c6a3fd28..c2a27dd93142 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -114,6 +114,12 @@ SYSCALL_DEFINE2(gettimeofday, struct timeval __user *, tv,
 	return 0;
 }
 
+/*
+ * Indicates if there is an offset between the system clock and the hardware
+ * clock/persistent clock/rtc.
+ */
+int persistent_clock_is_local;
+
 /*
  * Adjust the time obtained from the CMOS to be UTC time instead of
  * local time.
@@ -135,6 +141,8 @@ static inline void warp_clock(void)
 	struct timespec adjust;
 
 	adjust = current_kernel_time();
+	if (sys_tz.tz_minuteswest != 0)
+		persistent_clock_is_local = 1;
 	adjust.tv_sec += sys_tz.tz_minuteswest * 60;
 	do_settimeofday(&adjust);
 }
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 313b161504b7..b10a42bb0165 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -511,13 +511,17 @@ static void sync_cmos_clock(struct work_struct *work)
 
 	getnstimeofday(&now);
 	if (abs(now.tv_nsec - (NSEC_PER_SEC / 2)) <= tick_nsec / 2) {
+		struct timespec adjust = now;
+
 		fail = -ENODEV;
+		if (persistent_clock_is_local)
+			adjust.tv_sec -= (sys_tz.tz_minuteswest * 60);
 #ifdef CONFIG_GENERIC_CMOS_UPDATE
-		fail = update_persistent_clock(now);
+		fail = update_persistent_clock(adjust);
 #endif
 #ifdef CONFIG_RTC_SYSTOHC
 		if (fail == -ENODEV)
-			fail = rtc_set_ntp_time(now);
+			fail = rtc_set_ntp_time(adjust);
 #endif
 	}
 
-- 
cgit v1.2.3


From 7e73c5ae6e7991a6c01f6d096ff8afaef4458c36 Mon Sep 17 00:00:00 2001
From: Zhang Rui <rui.zhang@intel.com>
Date: Wed, 6 Feb 2013 13:00:36 +0100
Subject: PM: Introduce suspend state PM_SUSPEND_FREEZE

PM_SUSPEND_FREEZE state is a general state that
does not need any platform specific support, it equals
frozen processes + suspended devices + idle processors.

Compared with PM_SUSPEND_MEMORY,
PM_SUSPEND_FREEZE saves less power
because the system is still in a running state.
PM_SUSPEND_FREEZE has less resume latency because it does not
touch BIOS, and the processors are in idle state.

Compared with RTPM/idle,
PM_SUSPEND_FREEZE saves more power as
1. the processor has longer sleep time because processes are frozen.
   The deeper c-state the processor supports, more power saving we can get.
2. PM_SUSPEND_FREEZE uses system suspend code path, thus we can get
   more power saving from the devices that does not have good RTPM support.

This state is useful for
1) platforms that do not have STR, or have a broken STR.
2) platforms that have an extremely low power idle state,
   which can be used to replace STR.

The following describes how PM_SUSPEND_FREEZE state works.
1. echo freeze > /sys/power/state
2. the processes are frozen.
3. all the devices are suspended.
4. all the processors are blocked by a wait queue
5. all the processors idles and enters (Deep) c-state.
6. an interrupt fires.
7. a processor is woken up and handles the irq.
8. if it is a general event,
   a) the irq handler runs and quites.
   b) goto step 4.
9. if it is a real wake event, say, power button pressing, keyboard touch, mouse moving,
   a) the irq handler runs and activate the wakeup source
   b) wakeup_source_activate() notifies the wait queue.
   c) system starts resuming from PM_SUSPEND_FREEZE
10. all the devices are resumed.
11. all the processes are unfrozen.
12. system is back to working.

Known Issue:
The wakeup of this new PM_SUSPEND_FREEZE state may behave differently
from the previous suspend state.
Take ACPI platform for example, there are some GPEs that only enabled
when the system is in sleep state, to wake the system backk from S3/S4.
But we are not touching these GPEs during transition to PM_SUSPEND_FREEZE.
This means we may lose some wake event.
But on the other hand, as we do not disable all the Interrupts during
PM_SUSPEND_FREEZE, we may get some extra "wakeup" Interrupts, that are
not available for S3/S4.

The patches has been tested on an old Sony laptop, and here are the results:

Average Power:
1. RPTM/idle for half an hour:
   14.8W, 12.6W, 14.1W, 12.5W, 14.4W, 13.2W, 12.9W
2. Freeze for half an hour:
   11W, 10.4W, 9.4W, 11.3W 10.5W
3. RTPM/idle for three hours:
   11.6W
4. Freeze for three hours:
   10W
5. Suspend to Memory:
   0.5~0.9W

Average Resume Latency:
1. RTPM/idle with a black screen: (From pressing keyboard to screen back)
   Less than 0.2s
2. Freeze: (From pressing power button to screen back)
   2.50s
3. Suspend to Memory: (From pressing power button to screen back)
   4.33s

>From the results, we can see that all the platforms should benefit from
this patch, even if it does not have Low Power S0.

Signed-off-by: Zhang Rui <rui.zhang@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/wakeup.c |  6 ++++
 include/linux/suspend.h     |  6 +++-
 kernel/power/main.c         |  2 +-
 kernel/power/suspend.c      | 69 ++++++++++++++++++++++++++++++++++++---------
 4 files changed, 68 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c
index e6ee5e80e546..79715e7fa43e 100644
--- a/drivers/base/power/wakeup.c
+++ b/drivers/base/power/wakeup.c
@@ -382,6 +382,12 @@ static void wakeup_source_activate(struct wakeup_source *ws)
 {
 	unsigned int cec;
 
+	/*
+	 * active wakeup source should bring the system
+	 * out of PM_SUSPEND_FREEZE state
+	 */
+	freeze_wake();
+
 	ws->active = true;
 	ws->active_count++;
 	ws->last_time = ktime_get();
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 0c808d7fa579..d4e3f16d5e89 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -34,8 +34,10 @@ static inline void pm_restore_console(void)
 typedef int __bitwise suspend_state_t;
 
 #define PM_SUSPEND_ON		((__force suspend_state_t) 0)
-#define PM_SUSPEND_STANDBY	((__force suspend_state_t) 1)
+#define PM_SUSPEND_FREEZE	((__force suspend_state_t) 1)
+#define PM_SUSPEND_STANDBY	((__force suspend_state_t) 2)
 #define PM_SUSPEND_MEM		((__force suspend_state_t) 3)
+#define PM_SUSPEND_MIN		PM_SUSPEND_FREEZE
 #define PM_SUSPEND_MAX		((__force suspend_state_t) 4)
 
 enum suspend_stat_step {
@@ -192,6 +194,7 @@ struct platform_suspend_ops {
  */
 extern void suspend_set_ops(const struct platform_suspend_ops *ops);
 extern int suspend_valid_only_mem(suspend_state_t state);
+extern void freeze_wake(void);
 
 /**
  * arch_suspend_disable_irqs - disable IRQs for suspend
@@ -217,6 +220,7 @@ extern int pm_suspend(suspend_state_t state);
 
 static inline void suspend_set_ops(const struct platform_suspend_ops *ops) {}
 static inline int pm_suspend(suspend_state_t state) { return -ENOSYS; }
+static inline void freeze_wake(void) {}
 #endif /* !CONFIG_SUSPEND */
 
 /* struct pbe is used for creating lists of pages that should be restored
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 1c16f9167de1..b1c26a92ca9f 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -313,7 +313,7 @@ static ssize_t state_show(struct kobject *kobj, struct kobj_attribute *attr,
 static suspend_state_t decode_state(const char *buf, size_t n)
 {
 #ifdef CONFIG_SUSPEND
-	suspend_state_t state = PM_SUSPEND_STANDBY;
+	suspend_state_t state = PM_SUSPEND_MIN;
 	const char * const *s;
 #endif
 	char *p;
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index c8b7446b27df..d4feda084a3a 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -30,12 +30,38 @@
 #include "power.h"
 
 const char *const pm_states[PM_SUSPEND_MAX] = {
+	[PM_SUSPEND_FREEZE]	= "freeze",
 	[PM_SUSPEND_STANDBY]	= "standby",
 	[PM_SUSPEND_MEM]	= "mem",
 };
 
 static const struct platform_suspend_ops *suspend_ops;
 
+static bool need_suspend_ops(suspend_state_t state)
+{
+	return !!(state > PM_SUSPEND_FREEZE);
+}
+
+static DECLARE_WAIT_QUEUE_HEAD(suspend_freeze_wait_head);
+static bool suspend_freeze_wake;
+
+static void freeze_begin(void)
+{
+	suspend_freeze_wake = false;
+}
+
+static void freeze_enter(void)
+{
+	wait_event(suspend_freeze_wait_head, suspend_freeze_wake);
+}
+
+void freeze_wake(void)
+{
+	suspend_freeze_wake = true;
+	wake_up(&suspend_freeze_wait_head);
+}
+EXPORT_SYMBOL_GPL(freeze_wake);
+
 /**
  * suspend_set_ops - Set the global suspend method table.
  * @ops: Suspend operations to use.
@@ -50,8 +76,11 @@ EXPORT_SYMBOL_GPL(suspend_set_ops);
 
 bool valid_state(suspend_state_t state)
 {
+	if (state == PM_SUSPEND_FREEZE)
+		return true;
 	/*
-	 * All states need lowlevel support and need to be valid to the lowlevel
+	 * PM_SUSPEND_STANDBY and PM_SUSPEND_MEMORY states need lowlevel
+	 * support and need to be valid to the lowlevel
 	 * implementation, no valid callback implies that none are valid.
 	 */
 	return suspend_ops && suspend_ops->valid && suspend_ops->valid(state);
@@ -89,11 +118,11 @@ static int suspend_test(int level)
  * hibernation).  Run suspend notifiers, allocate the "suspend" console and
  * freeze processes.
  */
-static int suspend_prepare(void)
+static int suspend_prepare(suspend_state_t state)
 {
 	int error;
 
-	if (!suspend_ops || !suspend_ops->enter)
+	if (need_suspend_ops(state) && (!suspend_ops || !suspend_ops->enter))
 		return -EPERM;
 
 	pm_prepare_console();
@@ -137,7 +166,7 @@ static int suspend_enter(suspend_state_t state, bool *wakeup)
 {
 	int error;
 
-	if (suspend_ops->prepare) {
+	if (need_suspend_ops(state) && suspend_ops->prepare) {
 		error = suspend_ops->prepare();
 		if (error)
 			goto Platform_finish;
@@ -149,12 +178,23 @@ static int suspend_enter(suspend_state_t state, bool *wakeup)
 		goto Platform_finish;
 	}
 
-	if (suspend_ops->prepare_late) {
+	if (need_suspend_ops(state) && suspend_ops->prepare_late) {
 		error = suspend_ops->prepare_late();
 		if (error)
 			goto Platform_wake;
 	}
 
+	/*
+	 * PM_SUSPEND_FREEZE equals
+	 * frozen processes + suspended devices + idle processors.
+	 * Thus we should invoke freeze_enter() soon after
+	 * all the devices are suspended.
+	 */
+	if (state == PM_SUSPEND_FREEZE) {
+		freeze_enter();
+		goto Platform_wake;
+	}
+
 	if (suspend_test(TEST_PLATFORM))
 		goto Platform_wake;
 
@@ -182,13 +222,13 @@ static int suspend_enter(suspend_state_t state, bool *wakeup)
 	enable_nonboot_cpus();
 
  Platform_wake:
-	if (suspend_ops->wake)
+	if (need_suspend_ops(state) && suspend_ops->wake)
 		suspend_ops->wake();
 
 	dpm_resume_start(PMSG_RESUME);
 
  Platform_finish:
-	if (suspend_ops->finish)
+	if (need_suspend_ops(state) && suspend_ops->finish)
 		suspend_ops->finish();
 
 	return error;
@@ -203,11 +243,11 @@ int suspend_devices_and_enter(suspend_state_t state)
 	int error;
 	bool wakeup = false;
 
-	if (!suspend_ops)
+	if (need_suspend_ops(state) && !suspend_ops)
 		return -ENOSYS;
 
 	trace_machine_suspend(state);
-	if (suspend_ops->begin) {
+	if (need_suspend_ops(state) && suspend_ops->begin) {
 		error = suspend_ops->begin(state);
 		if (error)
 			goto Close;
@@ -226,7 +266,7 @@ int suspend_devices_and_enter(suspend_state_t state)
 
 	do {
 		error = suspend_enter(state, &wakeup);
-	} while (!error && !wakeup
+	} while (!error && !wakeup && need_suspend_ops(state)
 		&& suspend_ops->suspend_again && suspend_ops->suspend_again());
 
  Resume_devices:
@@ -236,13 +276,13 @@ int suspend_devices_and_enter(suspend_state_t state)
 	ftrace_start();
 	resume_console();
  Close:
-	if (suspend_ops->end)
+	if (need_suspend_ops(state) && suspend_ops->end)
 		suspend_ops->end();
 	trace_machine_suspend(PWR_EVENT_EXIT);
 	return error;
 
  Recover_platform:
-	if (suspend_ops->recover)
+	if (need_suspend_ops(state) && suspend_ops->recover)
 		suspend_ops->recover();
 	goto Resume_devices;
 }
@@ -278,12 +318,15 @@ static int enter_state(suspend_state_t state)
 	if (!mutex_trylock(&pm_mutex))
 		return -EBUSY;
 
+	if (state == PM_SUSPEND_FREEZE)
+		freeze_begin();
+
 	printk(KERN_INFO "PM: Syncing filesystems ... ");
 	sys_sync();
 	printk("done.\n");
 
 	pr_debug("PM: Preparing system for %s sleep\n", pm_states[state]);
-	error = suspend_prepare();
+	error = suspend_prepare(state);
 	if (error)
 		goto Unlock;
 
-- 
cgit v1.2.3


From 957d1282bb8c07e682e142b9237cd9fcb8348a0b Mon Sep 17 00:00:00 2001
From: Li Fei <fei.li@intel.com>
Date: Fri, 1 Feb 2013 08:56:03 +0000
Subject: suspend: enable freeze timeout configuration through sys

At present, the value of timeout for freezing is 20s, which is
meaningless in case that one thread is frozen with mutex locked
and another thread is trying to lock the mutex, as this time of
freezing will fail unavoidably.
And if there is no new wakeup event registered, the system will
waste at most 20s for such meaningless trying of freezing.

With this patch, the value of timeout can be configured to smaller
value, so such meaningless trying of freezing will be aborted in
earlier time, and later freezing can be also triggered in earlier
time. And more power will be saved.
In normal case on mobile phone, it costs real little time to freeze
processes. On some platform, it only costs about 20ms to freeze
user space processes and 10ms to freeze kernel freezable threads.

Signed-off-by: Liu Chuansheng <chuansheng.liu@intel.com>
Signed-off-by: Li Fei <fei.li@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/power/freezing-of-tasks.txt |  5 +++++
 include/linux/freezer.h                   |  5 +++++
 kernel/power/main.c                       | 27 +++++++++++++++++++++++++++
 kernel/power/process.c                    |  4 ++--
 4 files changed, 39 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/power/freezing-of-tasks.txt b/Documentation/power/freezing-of-tasks.txt
index 6ec291ea1c78..85894d83b352 100644
--- a/Documentation/power/freezing-of-tasks.txt
+++ b/Documentation/power/freezing-of-tasks.txt
@@ -223,3 +223,8 @@ since they ask the freezer to skip freezing this task, since it is anyway
 only after the entire suspend/hibernation sequence is complete.
 So, to summarize, use [un]lock_system_sleep() instead of directly using
 mutex_[un]lock(&pm_mutex). That would prevent freezing failures.
+
+V. Miscellaneous
+/sys/power/pm_freeze_timeout controls how long it will cost at most to freeze
+all user space processes or all freezable kernel threads, in unit of millisecond.
+The default value is 20000, with range of unsigned integer.
diff --git a/include/linux/freezer.h b/include/linux/freezer.h
index e4238ceaa4d6..e70df40d84f6 100644
--- a/include/linux/freezer.h
+++ b/include/linux/freezer.h
@@ -12,6 +12,11 @@ extern atomic_t system_freezing_cnt;	/* nr of freezing conds in effect */
 extern bool pm_freezing;		/* PM freezing in effect */
 extern bool pm_nosig_freezing;		/* PM nosig freezing in effect */
 
+/*
+ * Timeout for stopping processes
+ */
+extern unsigned int freeze_timeout_msecs;
+
 /*
  * Check if a process has been frozen
  */
diff --git a/kernel/power/main.c b/kernel/power/main.c
index b1c26a92ca9f..d77663bfedeb 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -553,6 +553,30 @@ power_attr(pm_trace_dev_match);
 
 #endif /* CONFIG_PM_TRACE */
 
+#ifdef CONFIG_FREEZER
+static ssize_t pm_freeze_timeout_show(struct kobject *kobj,
+				      struct kobj_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%u\n", freeze_timeout_msecs);
+}
+
+static ssize_t pm_freeze_timeout_store(struct kobject *kobj,
+				       struct kobj_attribute *attr,
+				       const char *buf, size_t n)
+{
+	unsigned long val;
+
+	if (kstrtoul(buf, 10, &val))
+		return -EINVAL;
+
+	freeze_timeout_msecs = val;
+	return n;
+}
+
+power_attr(pm_freeze_timeout);
+
+#endif	/* CONFIG_FREEZER*/
+
 static struct attribute * g[] = {
 	&state_attr.attr,
 #ifdef CONFIG_PM_TRACE
@@ -575,6 +599,9 @@ static struct attribute * g[] = {
 #ifdef CONFIG_PM_SLEEP_DEBUG
 	&pm_print_times_attr.attr,
 #endif
+#endif
+#ifdef CONFIG_FREEZER
+	&pm_freeze_timeout_attr.attr,
 #endif
 	NULL,
 };
diff --git a/kernel/power/process.c b/kernel/power/process.c
index d5a258b60c6f..98088e0e71e8 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -21,7 +21,7 @@
 /* 
  * Timeout for stopping processes
  */
-#define TIMEOUT	(20 * HZ)
+unsigned int __read_mostly freeze_timeout_msecs = 20 * MSEC_PER_SEC;
 
 static int try_to_freeze_tasks(bool user_only)
 {
@@ -36,7 +36,7 @@ static int try_to_freeze_tasks(bool user_only)
 
 	do_gettimeofday(&start);
 
-	end_time = jiffies + TIMEOUT;
+	end_time = jiffies + msecs_to_jiffies(freeze_timeout_msecs);
 
 	if (!user_only)
 		freeze_workqueues_begin();
-- 
cgit v1.2.3


From 710a03e9349f027cf2d77559973fc06076a4042a Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Tue, 15 Jan 2013 16:37:56 +0000
Subject: ARM: ux500: remove irq_base property from platform_data

AB8500 GPIO no longer handles its GPIO IRQs. Instead, the AB8500
core driver has taken back the responsibility. Prior to this
happening, the AB8500 GPIO driver provided a set of virtual IRQs
which were used as a pass-through. These virtual IRQs had a base
of MOP500_AB8500_VIR_GPIO_IRQ_BASE, which was passed though pdata.
We don't need to do this anymore, so we're pulling out the
property from the structure.

Cc: arm@kernel.org
Acked-by: Olof Johansson <olof@lixom.net>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 arch/arm/mach-ux500/board-mop500.c     | 1 -
 include/linux/mfd/abx500/ab8500-gpio.h | 1 -
 2 files changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-ux500/board-mop500.c b/arch/arm/mach-ux500/board-mop500.c
index b6f14ee507ca..b8781caa54b8 100644
--- a/arch/arm/mach-ux500/board-mop500.c
+++ b/arch/arm/mach-ux500/board-mop500.c
@@ -92,7 +92,6 @@ static struct platform_device snowball_gpio_en_3v3_regulator_dev = {
 
 static struct abx500_gpio_platform_data ab8500_gpio_pdata = {
 	.gpio_base		= MOP500_AB8500_PIN_GPIO(1),
-	.irq_base		= MOP500_AB8500_VIR_GPIO_IRQ_BASE,
 };
 
 /* ab8500-codec */
diff --git a/include/linux/mfd/abx500/ab8500-gpio.h b/include/linux/mfd/abx500/ab8500-gpio.h
index e8c8281e194a..172b2f201ae0 100644
--- a/include/linux/mfd/abx500/ab8500-gpio.h
+++ b/include/linux/mfd/abx500/ab8500-gpio.h
@@ -16,7 +16,6 @@
 
 struct abx500_gpio_platform_data {
 	int gpio_base;
-	u32 irq_base;
 };
 
 enum abx500_gpio_pull_updown {
-- 
cgit v1.2.3


From 095c3752e673c0ba039a2f67fd867297fde75ae7 Mon Sep 17 00:00:00 2001
From: Andreas Larsson <andreas@gaisler.com>
Date: Tue, 29 Jan 2013 15:53:41 +0100
Subject: spi: Document cs_gpios and cs_gpio in kernel-doc

This adds missing kernel-doc entries for cs_gpios in struct spi_master and
cs_gpio in struct spi_device.

Signed-off-by: Andreas Larsson <andreas@gaisler.com>
[grant.likely: tweaked the language of the descriptions]
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 include/linux/spi/spi.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index f62918946d86..30e9c50a5e20 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -57,6 +57,8 @@ extern struct bus_type spi_bus_type;
  * @modalias: Name of the driver to use with this device, or an alias
  *	for that name.  This appears in the sysfs "modalias" attribute
  *	for driver coldplugging, and in uevents used for hotplugging
+ * @cs_gpio: gpio number of the chipselect line (optional, -EINVAL when
+ *	when not using a GPIO line)
  *
  * A @spi_device is used to interchange data between an SPI slave
  * (usually a discrete chip) and CPU memory.
@@ -258,6 +260,9 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv)
  * @unprepare_transfer_hardware: there are currently no more messages on the
  *	queue so the subsystem notifies the driver that it may relax the
  *	hardware by issuing this call
+ * @cs_gpios: Array of GPIOs to use as chip select lines; one per CS
+ *	number. Any individual value may be -EINVAL for CS lines that
+ *	are not GPIOs (driven by the SPI controller itself).
  *
  * Each SPI master controller can communicate with one or more @spi_device
  * children.  These make a small bus, sharing MOSI, MISO and SCK signals
-- 
cgit v1.2.3


From d021c344051af91f42c5ba9fdedc176740cbd238 Mon Sep 17 00:00:00 2001
From: Andy King <acking@vmware.com>
Date: Wed, 6 Feb 2013 14:23:56 +0000
Subject: VSOCK: Introduce VM Sockets

VM Sockets allows communication between virtual machines and the hypervisor.
User level applications both in a virtual machine and on the host can use the
VM Sockets API, which facilitates fast and efficient communication between
guest virtual machines and their host.  A socket address family, designed to be
compatible with UDP and TCP at the interface level, is provided.

Today, VM Sockets is used by various VMware Tools components inside the guest
for zero-config, network-less access to VMware host services.  In addition to
this, VMware's users are using VM Sockets for various applications, where
network access of the virtual machine is restricted or non-existent.  Examples
of this are VMs communicating with device proxies for proprietary hardware
running as host applications and automated testing of applications running
within virtual machines.

The VMware VM Sockets are similar to other socket types, like Berkeley UNIX
socket interface.  The VM Sockets module supports both connection-oriented
stream sockets like TCP, and connectionless datagram sockets like UDP. The VM
Sockets protocol family is defined as "AF_VSOCK" and the socket operations
split for SOCK_DGRAM and SOCK_STREAM.

For additional information about the use of VM Sockets, please refer to the
VM Sockets Programming Guide available at:

https://www.vmware.com/support/developer/vmci-sdk/

Signed-off-by: George Zhang <georgezhang@vmware.com>
Signed-off-by: Dmitry Torokhov <dtor@vmware.com>
Signed-off-by: Andy king <acking@vmware.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/socket.h                       |    4 +-
 include/uapi/linux/vm_sockets.h              |  171 ++
 net/Kconfig                                  |    1 +
 net/Makefile                                 |    1 +
 net/vmw_vsock/Kconfig                        |   28 +
 net/vmw_vsock/Makefile                       |    7 +
 net/vmw_vsock/af_vsock.c                     | 2015 ++++++++++++++++++++++++
 net/vmw_vsock/af_vsock.h                     |  175 +++
 net/vmw_vsock/vmci_transport.c               | 2157 ++++++++++++++++++++++++++
 net/vmw_vsock/vmci_transport.h               |  139 ++
 net/vmw_vsock/vmci_transport_notify.c        |  680 ++++++++
 net/vmw_vsock/vmci_transport_notify.h        |   83 +
 net/vmw_vsock/vmci_transport_notify_qstate.c |  438 ++++++
 net/vmw_vsock/vsock_addr.c                   |   86 +
 net/vmw_vsock/vsock_addr.h                   |   32 +
 net/vmw_vsock/vsock_version.h                |   22 +
 16 files changed, 6038 insertions(+), 1 deletion(-)
 create mode 100644 include/uapi/linux/vm_sockets.h
 create mode 100644 net/vmw_vsock/Kconfig
 create mode 100644 net/vmw_vsock/Makefile
 create mode 100644 net/vmw_vsock/af_vsock.c
 create mode 100644 net/vmw_vsock/af_vsock.h
 create mode 100644 net/vmw_vsock/vmci_transport.c
 create mode 100644 net/vmw_vsock/vmci_transport.h
 create mode 100644 net/vmw_vsock/vmci_transport_notify.c
 create mode 100644 net/vmw_vsock/vmci_transport_notify.h
 create mode 100644 net/vmw_vsock/vmci_transport_notify_qstate.c
 create mode 100644 net/vmw_vsock/vsock_addr.c
 create mode 100644 net/vmw_vsock/vsock_addr.h
 create mode 100644 net/vmw_vsock/vsock_version.h

(limited to 'include/linux')

diff --git a/include/linux/socket.h b/include/linux/socket.h
index 9a546ff853dc..2b9f74b0ffea 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -178,7 +178,8 @@ struct ucred {
 #define AF_CAIF		37	/* CAIF sockets			*/
 #define AF_ALG		38	/* Algorithm sockets		*/
 #define AF_NFC		39	/* NFC sockets			*/
-#define AF_MAX		40	/* For now.. */
+#define AF_VSOCK	40	/* vSockets			*/
+#define AF_MAX		41	/* For now.. */
 
 /* Protocol families, same as address families. */
 #define PF_UNSPEC	AF_UNSPEC
@@ -221,6 +222,7 @@ struct ucred {
 #define PF_CAIF		AF_CAIF
 #define PF_ALG		AF_ALG
 #define PF_NFC		AF_NFC
+#define PF_VSOCK	AF_VSOCK
 #define PF_MAX		AF_MAX
 
 /* Maximum queue length specifiable by listen.  */
diff --git a/include/uapi/linux/vm_sockets.h b/include/uapi/linux/vm_sockets.h
new file mode 100644
index 000000000000..f7f2e99dec84
--- /dev/null
+++ b/include/uapi/linux/vm_sockets.h
@@ -0,0 +1,171 @@
+/*
+ * VMware vSockets Driver
+ *
+ * Copyright (C) 2007-2013 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _VM_SOCKETS_H_
+#define _VM_SOCKETS_H_
+
+#if !defined(__KERNEL__)
+#include <sys/socket.h>
+#endif
+
+/* Option name for STREAM socket buffer size.  Use as the option name in
+ * setsockopt(3) or getsockopt(3) to set or get an unsigned long long that
+ * specifies the size of the buffer underlying a vSockets STREAM socket.
+ * Value is clamped to the MIN and MAX.
+ */
+
+#define SO_VM_SOCKETS_BUFFER_SIZE 0
+
+/* Option name for STREAM socket minimum buffer size.  Use as the option name
+ * in setsockopt(3) or getsockopt(3) to set or get an unsigned long long that
+ * specifies the minimum size allowed for the buffer underlying a vSockets
+ * STREAM socket.
+ */
+
+#define SO_VM_SOCKETS_BUFFER_MIN_SIZE 1
+
+/* Option name for STREAM socket maximum buffer size.  Use as the option name
+ * in setsockopt(3) or getsockopt(3) to set or get an unsigned long long
+ * that specifies the maximum size allowed for the buffer underlying a
+ * vSockets STREAM socket.
+ */
+
+#define SO_VM_SOCKETS_BUFFER_MAX_SIZE 2
+
+/* Option name for socket peer's host-specific VM ID.  Use as the option name
+ * in getsockopt(3) to get a host-specific identifier for the peer endpoint's
+ * VM.  The identifier is a signed integer.
+ * Only available for hypervisor endpoints.
+ */
+
+#define SO_VM_SOCKETS_PEER_HOST_VM_ID 3
+
+/* Option name for socket's service label.  Use as the option name in
+ * setsockopt(3) or getsockopt(3) to set or get the service label for a socket.
+ * The service label is a C-style NUL-terminated string.  Only available for
+ * hypervisor endpoints.
+ */
+
+#define SO_VM_SOCKETS_SERVICE_LABEL 4
+
+/* Option name for determining if a socket is trusted.  Use as the option name
+ * in getsockopt(3) to determine if a socket is trusted.  The value is a
+ * signed integer.
+ */
+
+#define SO_VM_SOCKETS_TRUSTED 5
+
+/* Option name for STREAM socket connection timeout.  Use as the option name
+ * in setsockopt(3) or getsockopt(3) to set or get the connection
+ * timeout for a STREAM socket.
+ */
+
+#define SO_VM_SOCKETS_CONNECT_TIMEOUT 6
+
+/* Option name for using non-blocking send/receive.  Use as the option name
+ * for setsockopt(3) or getsockopt(3) to set or get the non-blocking
+ * transmit/receive flag for a STREAM socket.  This flag determines whether
+ * send() and recv() can be called in non-blocking contexts for the given
+ * socket.  The value is a signed integer.
+ *
+ * This option is only relevant to kernel endpoints, where descheduling the
+ * thread of execution is not allowed, for example, while holding a spinlock.
+ * It is not to be confused with conventional non-blocking socket operations.
+ *
+ * Only available for hypervisor endpoints.
+ */
+
+#define SO_VM_SOCKETS_NONBLOCK_TXRX 7
+
+/* The vSocket equivalent of INADDR_ANY.  This works for the svm_cid field of
+ * sockaddr_vm and indicates the context ID of the current endpoint.
+ */
+
+#define VMADDR_CID_ANY -1U
+
+/* Bind to any available port.  Works for the svm_port field of
+ * sockaddr_vm.
+ */
+
+#define VMADDR_PORT_ANY -1U
+
+/* Use this as the destination CID in an address when referring to the
+ * hypervisor.  VMCI relies on it being 0, but this would be useful for other
+ * transports too.
+ */
+
+#define VMADDR_CID_HYPERVISOR 0
+
+/* This CID is specific to VMCI and can be considered reserved (even VMCI
+ * doesn't use it anymore, it's a legacy value from an older release).
+ */
+
+#define VMADDR_CID_RESERVED 1
+
+/* Use this as the destination CID in an address when referring to the host
+ * (any process other than the hypervisor).  VMCI relies on it being 2, but
+ * this would be useful for other transports too.
+ */
+
+#define VMADDR_CID_HOST 2
+
+/* Invalid vSockets version. */
+
+#define VM_SOCKETS_INVALID_VERSION -1U
+
+/* The epoch (first) component of the vSockets version.  A single byte
+ * representing the epoch component of the vSockets version.
+ */
+
+#define VM_SOCKETS_VERSION_EPOCH(_v) (((_v) & 0xFF000000) >> 24)
+
+/* The major (second) component of the vSockets version.   A single byte
+ * representing the major component of the vSockets version.  Typically
+ * changes for every major release of a product.
+ */
+
+#define VM_SOCKETS_VERSION_MAJOR(_v) (((_v) & 0x00FF0000) >> 16)
+
+/* The minor (third) component of the vSockets version.  Two bytes representing
+ * the minor component of the vSockets version.
+ */
+
+#define VM_SOCKETS_VERSION_MINOR(_v) (((_v) & 0x0000FFFF))
+
+/* Address structure for vSockets.   The address family should be set to
+ * whatever vmci_sock_get_af_value_fd() returns.  The structure members should
+ * all align on their natural boundaries without resorting to compiler packing
+ * directives.  The total size of this structure should be exactly the same as
+ * that of struct sockaddr.
+ */
+
+struct sockaddr_vm {
+	sa_family_t svm_family;
+	unsigned short svm_reserved1;
+	unsigned int svm_port;
+	unsigned int svm_cid;
+	unsigned char svm_zero[sizeof(struct sockaddr) -
+			       sizeof(sa_family_t) -
+			       sizeof(unsigned short) -
+			       sizeof(unsigned int) - sizeof(unsigned int)];
+};
+
+#define IOCTL_VM_SOCKETS_GET_LOCAL_CID		_IO(7, 0xb9)
+
+#if defined(__KERNEL__)
+int vm_sockets_get_local_cid(void);
+#endif
+
+#endif
diff --git a/net/Kconfig b/net/Kconfig
index c31348e70aad..5a1888bb036d 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -217,6 +217,7 @@ source "net/dcb/Kconfig"
 source "net/dns_resolver/Kconfig"
 source "net/batman-adv/Kconfig"
 source "net/openvswitch/Kconfig"
+source "net/vmw_vsock/Kconfig"
 
 config RPS
 	boolean
diff --git a/net/Makefile b/net/Makefile
index c5aa8b3b49dc..091e7b04f301 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -69,3 +69,4 @@ obj-$(CONFIG_CEPH_LIB)		+= ceph/
 obj-$(CONFIG_BATMAN_ADV)	+= batman-adv/
 obj-$(CONFIG_NFC)		+= nfc/
 obj-$(CONFIG_OPENVSWITCH)	+= openvswitch/
+obj-$(CONFIG_VSOCKETS)	+= vmw_vsock/
diff --git a/net/vmw_vsock/Kconfig b/net/vmw_vsock/Kconfig
new file mode 100644
index 000000000000..b5fa7e40cdcb
--- /dev/null
+++ b/net/vmw_vsock/Kconfig
@@ -0,0 +1,28 @@
+#
+# Vsock protocol
+#
+
+config VSOCKETS
+	tristate "Virtual Socket protocol"
+	help
+	  Virtual Socket Protocol is a socket protocol similar to TCP/IP
+	  allowing comunication between Virtual Machines and hypervisor
+	  or host.
+
+	  You should also select one or more hypervisor-specific transports
+	  below.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called vsock. If unsure, say N.
+
+config VMWARE_VMCI_VSOCKETS
+	tristate "VMware VMCI transport for Virtual Sockets"
+	depends on VSOCKETS && VMWARE_VMCI
+	help
+	  This module implements a VMCI transport for Virtual Sockets.
+
+	  Enable this transport if your Virtual Machine runs on a VMware
+	  hypervisor.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called vmw_vsock_vmci_transport. If unsure, say N.
diff --git a/net/vmw_vsock/Makefile b/net/vmw_vsock/Makefile
new file mode 100644
index 000000000000..2ce52d70f224
--- /dev/null
+++ b/net/vmw_vsock/Makefile
@@ -0,0 +1,7 @@
+obj-$(CONFIG_VSOCKETS) += vsock.o
+obj-$(CONFIG_VMWARE_VMCI_VSOCKETS) += vmw_vsock_vmci_transport.o
+
+vsock-y += af_vsock.o vsock_addr.o
+
+vmw_vsock_vmci_transport-y += vmci_transport.o vmci_transport_notify.o \
+	vmci_transport_notify_qstate.o
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
new file mode 100644
index 000000000000..54bb7bdf92d3
--- /dev/null
+++ b/net/vmw_vsock/af_vsock.c
@@ -0,0 +1,2015 @@
+/*
+ * VMware vSockets Driver
+ *
+ * Copyright (C) 2007-2013 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+/* Implementation notes:
+ *
+ * - There are two kinds of sockets: those created by user action (such as
+ * calling socket(2)) and those created by incoming connection request packets.
+ *
+ * - There are two "global" tables, one for bound sockets (sockets that have
+ * specified an address that they are responsible for) and one for connected
+ * sockets (sockets that have established a connection with another socket).
+ * These tables are "global" in that all sockets on the system are placed
+ * within them. - Note, though, that the bound table contains an extra entry
+ * for a list of unbound sockets and SOCK_DGRAM sockets will always remain in
+ * that list. The bound table is used solely for lookup of sockets when packets
+ * are received and that's not necessary for SOCK_DGRAM sockets since we create
+ * a datagram handle for each and need not perform a lookup.  Keeping SOCK_DGRAM
+ * sockets out of the bound hash buckets will reduce the chance of collisions
+ * when looking for SOCK_STREAM sockets and prevents us from having to check the
+ * socket type in the hash table lookups.
+ *
+ * - Sockets created by user action will either be "client" sockets that
+ * initiate a connection or "server" sockets that listen for connections; we do
+ * not support simultaneous connects (two "client" sockets connecting).
+ *
+ * - "Server" sockets are referred to as listener sockets throughout this
+ * implementation because they are in the SS_LISTEN state.  When a connection
+ * request is received (the second kind of socket mentioned above), we create a
+ * new socket and refer to it as a pending socket.  These pending sockets are
+ * placed on the pending connection list of the listener socket.  When future
+ * packets are received for the address the listener socket is bound to, we
+ * check if the source of the packet is from one that has an existing pending
+ * connection.  If it does, we process the packet for the pending socket.  When
+ * that socket reaches the connected state, it is removed from the listener
+ * socket's pending list and enqueued in the listener socket's accept queue.
+ * Callers of accept(2) will accept connected sockets from the listener socket's
+ * accept queue.  If the socket cannot be accepted for some reason then it is
+ * marked rejected.  Once the connection is accepted, it is owned by the user
+ * process and the responsibility for cleanup falls with that user process.
+ *
+ * - It is possible that these pending sockets will never reach the connected
+ * state; in fact, we may never receive another packet after the connection
+ * request.  Because of this, we must schedule a cleanup function to run in the
+ * future, after some amount of time passes where a connection should have been
+ * established.  This function ensures that the socket is off all lists so it
+ * cannot be retrieved, then drops all references to the socket so it is cleaned
+ * up (sock_put() -> sk_free() -> our sk_destruct implementation).  Note this
+ * function will also cleanup rejected sockets, those that reach the connected
+ * state but leave it before they have been accepted.
+ *
+ * - Sockets created by user action will be cleaned up when the user process
+ * calls close(2), causing our release implementation to be called. Our release
+ * implementation will perform some cleanup then drop the last reference so our
+ * sk_destruct implementation is invoked.  Our sk_destruct implementation will
+ * perform additional cleanup that's common for both types of sockets.
+ *
+ * - A socket's reference count is what ensures that the structure won't be
+ * freed.  Each entry in a list (such as the "global" bound and connected tables
+ * and the listener socket's pending list and connected queue) ensures a
+ * reference.  When we defer work until process context and pass a socket as our
+ * argument, we must ensure the reference count is increased to ensure the
+ * socket isn't freed before the function is run; the deferred function will
+ * then drop the reference.
+ */
+
+#include <linux/types.h>
+
+#define EXPORT_SYMTAB
+#include <linux/bitops.h>
+#include <linux/cred.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/kmod.h>
+#include <linux/list.h>
+#include <linux/miscdevice.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/net.h>
+#include <linux/poll.h>
+#include <linux/skbuff.h>
+#include <linux/smp.h>
+#include <linux/socket.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/wait.h>
+#include <linux/workqueue.h>
+#include <net/sock.h>
+
+#include "af_vsock.h"
+#include "vsock_version.h"
+
+static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr);
+static void vsock_sk_destruct(struct sock *sk);
+static int vsock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
+
+/* Protocol family. */
+static struct proto vsock_proto = {
+	.name = "AF_VSOCK",
+	.owner = THIS_MODULE,
+	.obj_size = sizeof(struct vsock_sock),
+};
+
+/* The default peer timeout indicates how long we will wait for a peer response
+ * to a control message.
+ */
+#define VSOCK_DEFAULT_CONNECT_TIMEOUT (2 * HZ)
+
+#define SS_LISTEN 255
+
+static const struct vsock_transport *transport;
+static DEFINE_MUTEX(vsock_register_mutex);
+
+/**** EXPORTS ****/
+
+/* Get the ID of the local context.  This is transport dependent. */
+
+int vm_sockets_get_local_cid(void)
+{
+	return transport->get_local_cid();
+}
+EXPORT_SYMBOL_GPL(vm_sockets_get_local_cid);
+
+/**** UTILS ****/
+
+/* Each bound VSocket is stored in the bind hash table and each connected
+ * VSocket is stored in the connected hash table.
+ *
+ * Unbound sockets are all put on the same list attached to the end of the hash
+ * table (vsock_unbound_sockets).  Bound sockets are added to the hash table in
+ * the bucket that their local address hashes to (vsock_bound_sockets(addr)
+ * represents the list that addr hashes to).
+ *
+ * Specifically, we initialize the vsock_bind_table array to a size of
+ * VSOCK_HASH_SIZE + 1 so that vsock_bind_table[0] through
+ * vsock_bind_table[VSOCK_HASH_SIZE - 1] are for bound sockets and
+ * vsock_bind_table[VSOCK_HASH_SIZE] is for unbound sockets.  The hash function
+ * mods with VSOCK_HASH_SIZE - 1 to ensure this.
+ */
+#define VSOCK_HASH_SIZE         251
+#define MAX_PORT_RETRIES        24
+
+#define VSOCK_HASH(addr)        ((addr)->svm_port % (VSOCK_HASH_SIZE - 1))
+#define vsock_bound_sockets(addr) (&vsock_bind_table[VSOCK_HASH(addr)])
+#define vsock_unbound_sockets     (&vsock_bind_table[VSOCK_HASH_SIZE])
+
+/* XXX This can probably be implemented in a better way. */
+#define VSOCK_CONN_HASH(src, dst)				\
+	(((src)->svm_cid ^ (dst)->svm_port) % (VSOCK_HASH_SIZE - 1))
+#define vsock_connected_sockets(src, dst)		\
+	(&vsock_connected_table[VSOCK_CONN_HASH(src, dst)])
+#define vsock_connected_sockets_vsk(vsk)				\
+	vsock_connected_sockets(&(vsk)->remote_addr, &(vsk)->local_addr)
+
+static struct list_head vsock_bind_table[VSOCK_HASH_SIZE + 1];
+static struct list_head vsock_connected_table[VSOCK_HASH_SIZE];
+static DEFINE_SPINLOCK(vsock_table_lock);
+
+static __init void vsock_init_tables(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(vsock_bind_table); i++)
+		INIT_LIST_HEAD(&vsock_bind_table[i]);
+
+	for (i = 0; i < ARRAY_SIZE(vsock_connected_table); i++)
+		INIT_LIST_HEAD(&vsock_connected_table[i]);
+}
+
+static void __vsock_insert_bound(struct list_head *list,
+				 struct vsock_sock *vsk)
+{
+	sock_hold(&vsk->sk);
+	list_add(&vsk->bound_table, list);
+}
+
+static void __vsock_insert_connected(struct list_head *list,
+				     struct vsock_sock *vsk)
+{
+	sock_hold(&vsk->sk);
+	list_add(&vsk->connected_table, list);
+}
+
+static void __vsock_remove_bound(struct vsock_sock *vsk)
+{
+	list_del_init(&vsk->bound_table);
+	sock_put(&vsk->sk);
+}
+
+static void __vsock_remove_connected(struct vsock_sock *vsk)
+{
+	list_del_init(&vsk->connected_table);
+	sock_put(&vsk->sk);
+}
+
+static struct sock *__vsock_find_bound_socket(struct sockaddr_vm *addr)
+{
+	struct vsock_sock *vsk;
+
+	list_for_each_entry(vsk, vsock_bound_sockets(addr), bound_table)
+		if (vsock_addr_equals_addr_any(addr, &vsk->local_addr))
+			return sk_vsock(vsk);
+
+	return NULL;
+}
+
+static struct sock *__vsock_find_connected_socket(struct sockaddr_vm *src,
+						  struct sockaddr_vm *dst)
+{
+	struct vsock_sock *vsk;
+
+	list_for_each_entry(vsk, vsock_connected_sockets(src, dst),
+			    connected_table) {
+		if (vsock_addr_equals_addr(src, &vsk->remote_addr)
+		    && vsock_addr_equals_addr(dst, &vsk->local_addr)) {
+			return sk_vsock(vsk);
+		}
+	}
+
+	return NULL;
+}
+
+static bool __vsock_in_bound_table(struct vsock_sock *vsk)
+{
+	return !list_empty(&vsk->bound_table);
+}
+
+static bool __vsock_in_connected_table(struct vsock_sock *vsk)
+{
+	return !list_empty(&vsk->connected_table);
+}
+
+static void vsock_insert_unbound(struct vsock_sock *vsk)
+{
+	spin_lock_bh(&vsock_table_lock);
+	__vsock_insert_bound(vsock_unbound_sockets, vsk);
+	spin_unlock_bh(&vsock_table_lock);
+}
+
+void vsock_insert_connected(struct vsock_sock *vsk)
+{
+	struct list_head *list = vsock_connected_sockets(
+		&vsk->remote_addr, &vsk->local_addr);
+
+	spin_lock_bh(&vsock_table_lock);
+	__vsock_insert_connected(list, vsk);
+	spin_unlock_bh(&vsock_table_lock);
+}
+EXPORT_SYMBOL_GPL(vsock_insert_connected);
+
+void vsock_remove_bound(struct vsock_sock *vsk)
+{
+	spin_lock_bh(&vsock_table_lock);
+	__vsock_remove_bound(vsk);
+	spin_unlock_bh(&vsock_table_lock);
+}
+EXPORT_SYMBOL_GPL(vsock_remove_bound);
+
+void vsock_remove_connected(struct vsock_sock *vsk)
+{
+	spin_lock_bh(&vsock_table_lock);
+	__vsock_remove_connected(vsk);
+	spin_unlock_bh(&vsock_table_lock);
+}
+EXPORT_SYMBOL_GPL(vsock_remove_connected);
+
+struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr)
+{
+	struct sock *sk;
+
+	spin_lock_bh(&vsock_table_lock);
+	sk = __vsock_find_bound_socket(addr);
+	if (sk)
+		sock_hold(sk);
+
+	spin_unlock_bh(&vsock_table_lock);
+
+	return sk;
+}
+EXPORT_SYMBOL_GPL(vsock_find_bound_socket);
+
+struct sock *vsock_find_connected_socket(struct sockaddr_vm *src,
+					 struct sockaddr_vm *dst)
+{
+	struct sock *sk;
+
+	spin_lock_bh(&vsock_table_lock);
+	sk = __vsock_find_connected_socket(src, dst);
+	if (sk)
+		sock_hold(sk);
+
+	spin_unlock_bh(&vsock_table_lock);
+
+	return sk;
+}
+EXPORT_SYMBOL_GPL(vsock_find_connected_socket);
+
+static bool vsock_in_bound_table(struct vsock_sock *vsk)
+{
+	bool ret;
+
+	spin_lock_bh(&vsock_table_lock);
+	ret = __vsock_in_bound_table(vsk);
+	spin_unlock_bh(&vsock_table_lock);
+
+	return ret;
+}
+
+static bool vsock_in_connected_table(struct vsock_sock *vsk)
+{
+	bool ret;
+
+	spin_lock_bh(&vsock_table_lock);
+	ret = __vsock_in_connected_table(vsk);
+	spin_unlock_bh(&vsock_table_lock);
+
+	return ret;
+}
+
+void vsock_for_each_connected_socket(void (*fn)(struct sock *sk))
+{
+	int i;
+
+	spin_lock_bh(&vsock_table_lock);
+
+	for (i = 0; i < ARRAY_SIZE(vsock_connected_table); i++) {
+		struct vsock_sock *vsk;
+		list_for_each_entry(vsk, &vsock_connected_table[i],
+				    connected_table);
+			fn(sk_vsock(vsk));
+	}
+
+	spin_unlock_bh(&vsock_table_lock);
+}
+EXPORT_SYMBOL_GPL(vsock_for_each_connected_socket);
+
+void vsock_add_pending(struct sock *listener, struct sock *pending)
+{
+	struct vsock_sock *vlistener;
+	struct vsock_sock *vpending;
+
+	vlistener = vsock_sk(listener);
+	vpending = vsock_sk(pending);
+
+	sock_hold(pending);
+	sock_hold(listener);
+	list_add_tail(&vpending->pending_links, &vlistener->pending_links);
+}
+EXPORT_SYMBOL_GPL(vsock_add_pending);
+
+void vsock_remove_pending(struct sock *listener, struct sock *pending)
+{
+	struct vsock_sock *vpending = vsock_sk(pending);
+
+	list_del_init(&vpending->pending_links);
+	sock_put(listener);
+	sock_put(pending);
+}
+EXPORT_SYMBOL_GPL(vsock_remove_pending);
+
+void vsock_enqueue_accept(struct sock *listener, struct sock *connected)
+{
+	struct vsock_sock *vlistener;
+	struct vsock_sock *vconnected;
+
+	vlistener = vsock_sk(listener);
+	vconnected = vsock_sk(connected);
+
+	sock_hold(connected);
+	sock_hold(listener);
+	list_add_tail(&vconnected->accept_queue, &vlistener->accept_queue);
+}
+EXPORT_SYMBOL_GPL(vsock_enqueue_accept);
+
+static struct sock *vsock_dequeue_accept(struct sock *listener)
+{
+	struct vsock_sock *vlistener;
+	struct vsock_sock *vconnected;
+
+	vlistener = vsock_sk(listener);
+
+	if (list_empty(&vlistener->accept_queue))
+		return NULL;
+
+	vconnected = list_entry(vlistener->accept_queue.next,
+				struct vsock_sock, accept_queue);
+
+	list_del_init(&vconnected->accept_queue);
+	sock_put(listener);
+	/* The caller will need a reference on the connected socket so we let
+	 * it call sock_put().
+	 */
+
+	return sk_vsock(vconnected);
+}
+
+static bool vsock_is_accept_queue_empty(struct sock *sk)
+{
+	struct vsock_sock *vsk = vsock_sk(sk);
+	return list_empty(&vsk->accept_queue);
+}
+
+static bool vsock_is_pending(struct sock *sk)
+{
+	struct vsock_sock *vsk = vsock_sk(sk);
+	return !list_empty(&vsk->pending_links);
+}
+
+static int vsock_send_shutdown(struct sock *sk, int mode)
+{
+	return transport->shutdown(vsock_sk(sk), mode);
+}
+
+void vsock_pending_work(struct work_struct *work)
+{
+	struct sock *sk;
+	struct sock *listener;
+	struct vsock_sock *vsk;
+	bool cleanup;
+
+	vsk = container_of(work, struct vsock_sock, dwork.work);
+	sk = sk_vsock(vsk);
+	listener = vsk->listener;
+	cleanup = true;
+
+	lock_sock(listener);
+	lock_sock(sk);
+
+	if (vsock_is_pending(sk)) {
+		vsock_remove_pending(listener, sk);
+	} else if (!vsk->rejected) {
+		/* We are not on the pending list and accept() did not reject
+		 * us, so we must have been accepted by our user process.  We
+		 * just need to drop our references to the sockets and be on
+		 * our way.
+		 */
+		cleanup = false;
+		goto out;
+	}
+
+	listener->sk_ack_backlog--;
+
+	/* We need to remove ourself from the global connected sockets list so
+	 * incoming packets can't find this socket, and to reduce the reference
+	 * count.
+	 */
+	if (vsock_in_connected_table(vsk))
+		vsock_remove_connected(vsk);
+
+	sk->sk_state = SS_FREE;
+
+out:
+	release_sock(sk);
+	release_sock(listener);
+	if (cleanup)
+		sock_put(sk);
+
+	sock_put(sk);
+	sock_put(listener);
+}
+EXPORT_SYMBOL_GPL(vsock_pending_work);
+
+/**** SOCKET OPERATIONS ****/
+
+static int __vsock_bind_stream(struct vsock_sock *vsk,
+			       struct sockaddr_vm *addr)
+{
+	static u32 port = LAST_RESERVED_PORT + 1;
+	struct sockaddr_vm new_addr;
+
+	vsock_addr_init(&new_addr, addr->svm_cid, addr->svm_port);
+
+	if (addr->svm_port == VMADDR_PORT_ANY) {
+		bool found = false;
+		unsigned int i;
+
+		for (i = 0; i < MAX_PORT_RETRIES; i++) {
+			if (port <= LAST_RESERVED_PORT)
+				port = LAST_RESERVED_PORT + 1;
+
+			new_addr.svm_port = port++;
+
+			if (!__vsock_find_bound_socket(&new_addr)) {
+				found = true;
+				break;
+			}
+		}
+
+		if (!found)
+			return -EADDRNOTAVAIL;
+	} else {
+		/* If port is in reserved range, ensure caller
+		 * has necessary privileges.
+		 */
+		if (addr->svm_port <= LAST_RESERVED_PORT &&
+		    !capable(CAP_NET_BIND_SERVICE)) {
+			return -EACCES;
+		}
+
+		if (__vsock_find_bound_socket(&new_addr))
+			return -EADDRINUSE;
+	}
+
+	vsock_addr_init(&vsk->local_addr, new_addr.svm_cid, new_addr.svm_port);
+
+	/* Remove stream sockets from the unbound list and add them to the hash
+	 * table for easy lookup by its address.  The unbound list is simply an
+	 * extra entry at the end of the hash table, a trick used by AF_UNIX.
+	 */
+	__vsock_remove_bound(vsk);
+	__vsock_insert_bound(vsock_bound_sockets(&vsk->local_addr), vsk);
+
+	return 0;
+}
+
+static int __vsock_bind_dgram(struct vsock_sock *vsk,
+			      struct sockaddr_vm *addr)
+{
+	return transport->dgram_bind(vsk, addr);
+}
+
+static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr)
+{
+	struct vsock_sock *vsk = vsock_sk(sk);
+	u32 cid;
+	int retval;
+
+	/* First ensure this socket isn't already bound. */
+	if (vsock_addr_bound(&vsk->local_addr))
+		return -EINVAL;
+
+	/* Now bind to the provided address or select appropriate values if
+	 * none are provided (VMADDR_CID_ANY and VMADDR_PORT_ANY).  Note that
+	 * like AF_INET prevents binding to a non-local IP address (in most
+	 * cases), we only allow binding to the local CID.
+	 */
+	cid = transport->get_local_cid();
+	if (addr->svm_cid != cid && addr->svm_cid != VMADDR_CID_ANY)
+		return -EADDRNOTAVAIL;
+
+	switch (sk->sk_socket->type) {
+	case SOCK_STREAM:
+		spin_lock_bh(&vsock_table_lock);
+		retval = __vsock_bind_stream(vsk, addr);
+		spin_unlock_bh(&vsock_table_lock);
+		break;
+
+	case SOCK_DGRAM:
+		retval = __vsock_bind_dgram(vsk, addr);
+		break;
+
+	default:
+		retval = -EINVAL;
+		break;
+	}
+
+	return retval;
+}
+
+struct sock *__vsock_create(struct net *net,
+			    struct socket *sock,
+			    struct sock *parent,
+			    gfp_t priority,
+			    unsigned short type)
+{
+	struct sock *sk;
+	struct vsock_sock *psk;
+	struct vsock_sock *vsk;
+
+	sk = sk_alloc(net, AF_VSOCK, priority, &vsock_proto);
+	if (!sk)
+		return NULL;
+
+	sock_init_data(sock, sk);
+
+	/* sk->sk_type is normally set in sock_init_data, but only if sock is
+	 * non-NULL. We make sure that our sockets always have a type by
+	 * setting it here if needed.
+	 */
+	if (!sock)
+		sk->sk_type = type;
+
+	vsk = vsock_sk(sk);
+	vsock_addr_init(&vsk->local_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY);
+	vsock_addr_init(&vsk->remote_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY);
+
+	sk->sk_destruct = vsock_sk_destruct;
+	sk->sk_backlog_rcv = vsock_queue_rcv_skb;
+	sk->sk_state = 0;
+	sock_reset_flag(sk, SOCK_DONE);
+
+	INIT_LIST_HEAD(&vsk->bound_table);
+	INIT_LIST_HEAD(&vsk->connected_table);
+	vsk->listener = NULL;
+	INIT_LIST_HEAD(&vsk->pending_links);
+	INIT_LIST_HEAD(&vsk->accept_queue);
+	vsk->rejected = false;
+	vsk->sent_request = false;
+	vsk->ignore_connecting_rst = false;
+	vsk->peer_shutdown = 0;
+
+	psk = parent ? vsock_sk(parent) : NULL;
+	if (parent) {
+		vsk->trusted = psk->trusted;
+		vsk->owner = get_cred(psk->owner);
+		vsk->connect_timeout = psk->connect_timeout;
+	} else {
+		vsk->trusted = capable(CAP_NET_ADMIN);
+		vsk->owner = get_current_cred();
+		vsk->connect_timeout = VSOCK_DEFAULT_CONNECT_TIMEOUT;
+	}
+
+	if (transport->init(vsk, psk) < 0) {
+		sk_free(sk);
+		return NULL;
+	}
+
+	if (sock)
+		vsock_insert_unbound(vsk);
+
+	return sk;
+}
+EXPORT_SYMBOL_GPL(__vsock_create);
+
+static void __vsock_release(struct sock *sk)
+{
+	if (sk) {
+		struct sk_buff *skb;
+		struct sock *pending;
+		struct vsock_sock *vsk;
+
+		vsk = vsock_sk(sk);
+		pending = NULL;	/* Compiler warning. */
+
+		if (vsock_in_bound_table(vsk))
+			vsock_remove_bound(vsk);
+
+		if (vsock_in_connected_table(vsk))
+			vsock_remove_connected(vsk);
+
+		transport->release(vsk);
+
+		lock_sock(sk);
+		sock_orphan(sk);
+		sk->sk_shutdown = SHUTDOWN_MASK;
+
+		while ((skb = skb_dequeue(&sk->sk_receive_queue)))
+			kfree_skb(skb);
+
+		/* Clean up any sockets that never were accepted. */
+		while ((pending = vsock_dequeue_accept(sk)) != NULL) {
+			__vsock_release(pending);
+			sock_put(pending);
+		}
+
+		release_sock(sk);
+		sock_put(sk);
+	}
+}
+
+static void vsock_sk_destruct(struct sock *sk)
+{
+	struct vsock_sock *vsk = vsock_sk(sk);
+
+	transport->destruct(vsk);
+
+	/* When clearing these addresses, there's no need to set the family and
+	 * possibly register the address family with the kernel.
+	 */
+	vsock_addr_init(&vsk->local_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY);
+	vsock_addr_init(&vsk->remote_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY);
+
+	put_cred(vsk->owner);
+}
+
+static int vsock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+{
+	int err;
+
+	err = sock_queue_rcv_skb(sk, skb);
+	if (err)
+		kfree_skb(skb);
+
+	return err;
+}
+
+s64 vsock_stream_has_data(struct vsock_sock *vsk)
+{
+	return transport->stream_has_data(vsk);
+}
+EXPORT_SYMBOL_GPL(vsock_stream_has_data);
+
+s64 vsock_stream_has_space(struct vsock_sock *vsk)
+{
+	return transport->stream_has_space(vsk);
+}
+EXPORT_SYMBOL_GPL(vsock_stream_has_space);
+
+static int vsock_release(struct socket *sock)
+{
+	__vsock_release(sock->sk);
+	sock->sk = NULL;
+	sock->state = SS_FREE;
+
+	return 0;
+}
+
+static int
+vsock_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
+{
+	int err;
+	struct sock *sk;
+	struct sockaddr_vm *vm_addr;
+
+	sk = sock->sk;
+
+	if (vsock_addr_cast(addr, addr_len, &vm_addr) != 0)
+		return -EINVAL;
+
+	lock_sock(sk);
+	err = __vsock_bind(sk, vm_addr);
+	release_sock(sk);
+
+	return err;
+}
+
+static int vsock_getname(struct socket *sock,
+			 struct sockaddr *addr, int *addr_len, int peer)
+{
+	int err;
+	struct sock *sk;
+	struct vsock_sock *vsk;
+	struct sockaddr_vm *vm_addr;
+
+	sk = sock->sk;
+	vsk = vsock_sk(sk);
+	err = 0;
+
+	lock_sock(sk);
+
+	if (peer) {
+		if (sock->state != SS_CONNECTED) {
+			err = -ENOTCONN;
+			goto out;
+		}
+		vm_addr = &vsk->remote_addr;
+	} else {
+		vm_addr = &vsk->local_addr;
+	}
+
+	if (!vm_addr) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	/* sys_getsockname() and sys_getpeername() pass us a
+	 * MAX_SOCK_ADDR-sized buffer and don't set addr_len.  Unfortunately
+	 * that macro is defined in socket.c instead of .h, so we hardcode its
+	 * value here.
+	 */
+	BUILD_BUG_ON(sizeof(*vm_addr) > 128);
+	memcpy(addr, vm_addr, sizeof(*vm_addr));
+	*addr_len = sizeof(*vm_addr);
+
+out:
+	release_sock(sk);
+	return err;
+}
+
+static int vsock_shutdown(struct socket *sock, int mode)
+{
+	int err;
+	struct sock *sk;
+
+	/* User level uses SHUT_RD (0) and SHUT_WR (1), but the kernel uses
+	 * RCV_SHUTDOWN (1) and SEND_SHUTDOWN (2), so we must increment mode
+	 * here like the other address families do.  Note also that the
+	 * increment makes SHUT_RDWR (2) into RCV_SHUTDOWN | SEND_SHUTDOWN (3),
+	 * which is what we want.
+	 */
+	mode++;
+
+	if ((mode & ~SHUTDOWN_MASK) || !mode)
+		return -EINVAL;
+
+	/* If this is a STREAM socket and it is not connected then bail out
+	 * immediately.  If it is a DGRAM socket then we must first kick the
+	 * socket so that it wakes up from any sleeping calls, for example
+	 * recv(), and then afterwards return the error.
+	 */
+
+	sk = sock->sk;
+	if (sock->state == SS_UNCONNECTED) {
+		err = -ENOTCONN;
+		if (sk->sk_type == SOCK_STREAM)
+			return err;
+	} else {
+		sock->state = SS_DISCONNECTING;
+		err = 0;
+	}
+
+	/* Receive and send shutdowns are treated alike. */
+	mode = mode & (RCV_SHUTDOWN | SEND_SHUTDOWN);
+	if (mode) {
+		lock_sock(sk);
+		sk->sk_shutdown |= mode;
+		sk->sk_state_change(sk);
+		release_sock(sk);
+
+		if (sk->sk_type == SOCK_STREAM) {
+			sock_reset_flag(sk, SOCK_DONE);
+			vsock_send_shutdown(sk, mode);
+		}
+	}
+
+	return err;
+}
+
+static unsigned int vsock_poll(struct file *file, struct socket *sock,
+			       poll_table *wait)
+{
+	struct sock *sk;
+	unsigned int mask;
+	struct vsock_sock *vsk;
+
+	sk = sock->sk;
+	vsk = vsock_sk(sk);
+
+	poll_wait(file, sk_sleep(sk), wait);
+	mask = 0;
+
+	if (sk->sk_err)
+		/* Signify that there has been an error on this socket. */
+		mask |= POLLERR;
+
+	/* INET sockets treat local write shutdown and peer write shutdown as a
+	 * case of POLLHUP set.
+	 */
+	if ((sk->sk_shutdown == SHUTDOWN_MASK) ||
+	    ((sk->sk_shutdown & SEND_SHUTDOWN) &&
+	     (vsk->peer_shutdown & SEND_SHUTDOWN))) {
+		mask |= POLLHUP;
+	}
+
+	if (sk->sk_shutdown & RCV_SHUTDOWN ||
+	    vsk->peer_shutdown & SEND_SHUTDOWN) {
+		mask |= POLLRDHUP;
+	}
+
+	if (sock->type == SOCK_DGRAM) {
+		/* For datagram sockets we can read if there is something in
+		 * the queue and write as long as the socket isn't shutdown for
+		 * sending.
+		 */
+		if (!skb_queue_empty(&sk->sk_receive_queue) ||
+		    (sk->sk_shutdown & RCV_SHUTDOWN)) {
+			mask |= POLLIN | POLLRDNORM;
+		}
+
+		if (!(sk->sk_shutdown & SEND_SHUTDOWN))
+			mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
+
+	} else if (sock->type == SOCK_STREAM) {
+		lock_sock(sk);
+
+		/* Listening sockets that have connections in their accept
+		 * queue can be read.
+		 */
+		if (sk->sk_state == SS_LISTEN
+		    && !vsock_is_accept_queue_empty(sk))
+			mask |= POLLIN | POLLRDNORM;
+
+		/* If there is something in the queue then we can read. */
+		if (transport->stream_is_active(vsk) &&
+		    !(sk->sk_shutdown & RCV_SHUTDOWN)) {
+			bool data_ready_now = false;
+			int ret = transport->notify_poll_in(
+					vsk, 1, &data_ready_now);
+			if (ret < 0) {
+				mask |= POLLERR;
+			} else {
+				if (data_ready_now)
+					mask |= POLLIN | POLLRDNORM;
+
+			}
+		}
+
+		/* Sockets whose connections have been closed, reset, or
+		 * terminated should also be considered read, and we check the
+		 * shutdown flag for that.
+		 */
+		if (sk->sk_shutdown & RCV_SHUTDOWN ||
+		    vsk->peer_shutdown & SEND_SHUTDOWN) {
+			mask |= POLLIN | POLLRDNORM;
+		}
+
+		/* Connected sockets that can produce data can be written. */
+		if (sk->sk_state == SS_CONNECTED) {
+			if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
+				bool space_avail_now = false;
+				int ret = transport->notify_poll_out(
+						vsk, 1, &space_avail_now);
+				if (ret < 0) {
+					mask |= POLLERR;
+				} else {
+					if (space_avail_now)
+						/* Remove POLLWRBAND since INET
+						 * sockets are not setting it.
+						 */
+						mask |= POLLOUT | POLLWRNORM;
+
+				}
+			}
+		}
+
+		/* Simulate INET socket poll behaviors, which sets
+		 * POLLOUT|POLLWRNORM when peer is closed and nothing to read,
+		 * but local send is not shutdown.
+		 */
+		if (sk->sk_state == SS_UNCONNECTED) {
+			if (!(sk->sk_shutdown & SEND_SHUTDOWN))
+				mask |= POLLOUT | POLLWRNORM;
+
+		}
+
+		release_sock(sk);
+	}
+
+	return mask;
+}
+
+static int vsock_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
+			       struct msghdr *msg, size_t len)
+{
+	int err;
+	struct sock *sk;
+	struct vsock_sock *vsk;
+	struct sockaddr_vm *remote_addr;
+
+	if (msg->msg_flags & MSG_OOB)
+		return -EOPNOTSUPP;
+
+	/* For now, MSG_DONTWAIT is always assumed... */
+	err = 0;
+	sk = sock->sk;
+	vsk = vsock_sk(sk);
+
+	lock_sock(sk);
+
+	if (!vsock_addr_bound(&vsk->local_addr)) {
+		struct sockaddr_vm local_addr;
+
+		vsock_addr_init(&local_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY);
+		err = __vsock_bind(sk, &local_addr);
+		if (err != 0)
+			goto out;
+
+	}
+
+	/* If the provided message contains an address, use that.  Otherwise
+	 * fall back on the socket's remote handle (if it has been connected).
+	 */
+	if (msg->msg_name &&
+	    vsock_addr_cast(msg->msg_name, msg->msg_namelen,
+			    &remote_addr) == 0) {
+		/* Ensure this address is of the right type and is a valid
+		 * destination.
+		 */
+
+		if (remote_addr->svm_cid == VMADDR_CID_ANY)
+			remote_addr->svm_cid = transport->get_local_cid();
+
+		if (!vsock_addr_bound(remote_addr)) {
+			err = -EINVAL;
+			goto out;
+		}
+	} else if (sock->state == SS_CONNECTED) {
+		remote_addr = &vsk->remote_addr;
+
+		if (remote_addr->svm_cid == VMADDR_CID_ANY)
+			remote_addr->svm_cid = transport->get_local_cid();
+
+		/* XXX Should connect() or this function ensure remote_addr is
+		 * bound?
+		 */
+		if (!vsock_addr_bound(&vsk->remote_addr)) {
+			err = -EINVAL;
+			goto out;
+		}
+	} else {
+		err = -EINVAL;
+		goto out;
+	}
+
+	if (!transport->dgram_allow(remote_addr->svm_cid,
+				    remote_addr->svm_port)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	err = transport->dgram_enqueue(vsk, remote_addr, msg->msg_iov, len);
+
+out:
+	release_sock(sk);
+	return err;
+}
+
+static int vsock_dgram_connect(struct socket *sock,
+			       struct sockaddr *addr, int addr_len, int flags)
+{
+	int err;
+	struct sock *sk;
+	struct vsock_sock *vsk;
+	struct sockaddr_vm *remote_addr;
+
+	sk = sock->sk;
+	vsk = vsock_sk(sk);
+
+	err = vsock_addr_cast(addr, addr_len, &remote_addr);
+	if (err == -EAFNOSUPPORT && remote_addr->svm_family == AF_UNSPEC) {
+		lock_sock(sk);
+		vsock_addr_init(&vsk->remote_addr, VMADDR_CID_ANY,
+				VMADDR_PORT_ANY);
+		sock->state = SS_UNCONNECTED;
+		release_sock(sk);
+		return 0;
+	} else if (err != 0)
+		return -EINVAL;
+
+	lock_sock(sk);
+
+	if (!vsock_addr_bound(&vsk->local_addr)) {
+		struct sockaddr_vm local_addr;
+
+		vsock_addr_init(&local_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY);
+		err = __vsock_bind(sk, &local_addr);
+		if (err != 0)
+			goto out;
+
+	}
+
+	if (!transport->dgram_allow(remote_addr->svm_cid,
+				    remote_addr->svm_port)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	memcpy(&vsk->remote_addr, remote_addr, sizeof(vsk->remote_addr));
+	sock->state = SS_CONNECTED;
+
+out:
+	release_sock(sk);
+	return err;
+}
+
+static int vsock_dgram_recvmsg(struct kiocb *kiocb, struct socket *sock,
+			       struct msghdr *msg, size_t len, int flags)
+{
+	return transport->dgram_dequeue(kiocb, vsock_sk(sock->sk), msg, len,
+					flags);
+}
+
+static const struct proto_ops vsock_dgram_ops = {
+	.family = PF_VSOCK,
+	.owner = THIS_MODULE,
+	.release = vsock_release,
+	.bind = vsock_bind,
+	.connect = vsock_dgram_connect,
+	.socketpair = sock_no_socketpair,
+	.accept = sock_no_accept,
+	.getname = vsock_getname,
+	.poll = vsock_poll,
+	.ioctl = sock_no_ioctl,
+	.listen = sock_no_listen,
+	.shutdown = vsock_shutdown,
+	.setsockopt = sock_no_setsockopt,
+	.getsockopt = sock_no_getsockopt,
+	.sendmsg = vsock_dgram_sendmsg,
+	.recvmsg = vsock_dgram_recvmsg,
+	.mmap = sock_no_mmap,
+	.sendpage = sock_no_sendpage,
+};
+
+static void vsock_connect_timeout(struct work_struct *work)
+{
+	struct sock *sk;
+	struct vsock_sock *vsk;
+
+	vsk = container_of(work, struct vsock_sock, dwork.work);
+	sk = sk_vsock(vsk);
+
+	lock_sock(sk);
+	if (sk->sk_state == SS_CONNECTING &&
+	    (sk->sk_shutdown != SHUTDOWN_MASK)) {
+		sk->sk_state = SS_UNCONNECTED;
+		sk->sk_err = ETIMEDOUT;
+		sk->sk_error_report(sk);
+	}
+	release_sock(sk);
+
+	sock_put(sk);
+}
+
+static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr,
+				int addr_len, int flags)
+{
+	int err;
+	struct sock *sk;
+	struct vsock_sock *vsk;
+	struct sockaddr_vm *remote_addr;
+	long timeout;
+	DEFINE_WAIT(wait);
+
+	err = 0;
+	sk = sock->sk;
+	vsk = vsock_sk(sk);
+
+	lock_sock(sk);
+
+	/* XXX AF_UNSPEC should make us disconnect like AF_INET. */
+	switch (sock->state) {
+	case SS_CONNECTED:
+		err = -EISCONN;
+		goto out;
+	case SS_DISCONNECTING:
+		err = -EINVAL;
+		goto out;
+	case SS_CONNECTING:
+		/* This continues on so we can move sock into the SS_CONNECTED
+		 * state once the connection has completed (at which point err
+		 * will be set to zero also).  Otherwise, we will either wait
+		 * for the connection or return -EALREADY should this be a
+		 * non-blocking call.
+		 */
+		err = -EALREADY;
+		break;
+	default:
+		if ((sk->sk_state == SS_LISTEN) ||
+		    vsock_addr_cast(addr, addr_len, &remote_addr) != 0) {
+			err = -EINVAL;
+			goto out;
+		}
+
+		/* The hypervisor and well-known contexts do not have socket
+		 * endpoints.
+		 */
+		if (!transport->stream_allow(remote_addr->svm_cid,
+					     remote_addr->svm_port)) {
+			err = -ENETUNREACH;
+			goto out;
+		}
+
+		/* Set the remote address that we are connecting to. */
+		memcpy(&vsk->remote_addr, remote_addr,
+		       sizeof(vsk->remote_addr));
+
+		/* Autobind this socket to the local address if necessary. */
+		if (!vsock_addr_bound(&vsk->local_addr)) {
+			struct sockaddr_vm local_addr;
+
+			vsock_addr_init(&local_addr, VMADDR_CID_ANY,
+					VMADDR_PORT_ANY);
+			err = __vsock_bind(sk, &local_addr);
+			if (err != 0)
+				goto out;
+
+		}
+
+		sk->sk_state = SS_CONNECTING;
+
+		err = transport->connect(vsk);
+		if (err < 0)
+			goto out;
+
+		/* Mark sock as connecting and set the error code to in
+		 * progress in case this is a non-blocking connect.
+		 */
+		sock->state = SS_CONNECTING;
+		err = -EINPROGRESS;
+	}
+
+	/* The receive path will handle all communication until we are able to
+	 * enter the connected state.  Here we wait for the connection to be
+	 * completed or a notification of an error.
+	 */
+	timeout = vsk->connect_timeout;
+	prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
+
+	while (sk->sk_state != SS_CONNECTED && sk->sk_err == 0) {
+		if (flags & O_NONBLOCK) {
+			/* If we're not going to block, we schedule a timeout
+			 * function to generate a timeout on the connection
+			 * attempt, in case the peer doesn't respond in a
+			 * timely manner. We hold on to the socket until the
+			 * timeout fires.
+			 */
+			sock_hold(sk);
+			INIT_DELAYED_WORK(&vsk->dwork,
+					  vsock_connect_timeout);
+			schedule_delayed_work(&vsk->dwork, timeout);
+
+			/* Skip ahead to preserve error code set above. */
+			goto out_wait;
+		}
+
+		release_sock(sk);
+		timeout = schedule_timeout(timeout);
+		lock_sock(sk);
+
+		if (signal_pending(current)) {
+			err = sock_intr_errno(timeout);
+			goto out_wait_error;
+		} else if (timeout == 0) {
+			err = -ETIMEDOUT;
+			goto out_wait_error;
+		}
+
+		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
+	}
+
+	if (sk->sk_err) {
+		err = -sk->sk_err;
+		goto out_wait_error;
+	} else
+		err = 0;
+
+out_wait:
+	finish_wait(sk_sleep(sk), &wait);
+out:
+	release_sock(sk);
+	return err;
+
+out_wait_error:
+	sk->sk_state = SS_UNCONNECTED;
+	sock->state = SS_UNCONNECTED;
+	goto out_wait;
+}
+
+static int vsock_accept(struct socket *sock, struct socket *newsock, int flags)
+{
+	struct sock *listener;
+	int err;
+	struct sock *connected;
+	struct vsock_sock *vconnected;
+	long timeout;
+	DEFINE_WAIT(wait);
+
+	err = 0;
+	listener = sock->sk;
+
+	lock_sock(listener);
+
+	if (sock->type != SOCK_STREAM) {
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+
+	if (listener->sk_state != SS_LISTEN) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	/* Wait for children sockets to appear; these are the new sockets
+	 * created upon connection establishment.
+	 */
+	timeout = sock_sndtimeo(listener, flags & O_NONBLOCK);
+	prepare_to_wait(sk_sleep(listener), &wait, TASK_INTERRUPTIBLE);
+
+	while ((connected = vsock_dequeue_accept(listener)) == NULL &&
+	       listener->sk_err == 0) {
+		release_sock(listener);
+		timeout = schedule_timeout(timeout);
+		lock_sock(listener);
+
+		if (signal_pending(current)) {
+			err = sock_intr_errno(timeout);
+			goto out_wait;
+		} else if (timeout == 0) {
+			err = -EAGAIN;
+			goto out_wait;
+		}
+
+		prepare_to_wait(sk_sleep(listener), &wait, TASK_INTERRUPTIBLE);
+	}
+
+	if (listener->sk_err)
+		err = -listener->sk_err;
+
+	if (connected) {
+		listener->sk_ack_backlog--;
+
+		lock_sock(connected);
+		vconnected = vsock_sk(connected);
+
+		/* If the listener socket has received an error, then we should
+		 * reject this socket and return.  Note that we simply mark the
+		 * socket rejected, drop our reference, and let the cleanup
+		 * function handle the cleanup; the fact that we found it in
+		 * the listener's accept queue guarantees that the cleanup
+		 * function hasn't run yet.
+		 */
+		if (err) {
+			vconnected->rejected = true;
+			release_sock(connected);
+			sock_put(connected);
+			goto out_wait;
+		}
+
+		newsock->state = SS_CONNECTED;
+		sock_graft(connected, newsock);
+		release_sock(connected);
+		sock_put(connected);
+	}
+
+out_wait:
+	finish_wait(sk_sleep(listener), &wait);
+out:
+	release_sock(listener);
+	return err;
+}
+
+static int vsock_listen(struct socket *sock, int backlog)
+{
+	int err;
+	struct sock *sk;
+	struct vsock_sock *vsk;
+
+	sk = sock->sk;
+
+	lock_sock(sk);
+
+	if (sock->type != SOCK_STREAM) {
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+
+	if (sock->state != SS_UNCONNECTED) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	vsk = vsock_sk(sk);
+
+	if (!vsock_addr_bound(&vsk->local_addr)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	sk->sk_max_ack_backlog = backlog;
+	sk->sk_state = SS_LISTEN;
+
+	err = 0;
+
+out:
+	release_sock(sk);
+	return err;
+}
+
+static int vsock_stream_setsockopt(struct socket *sock,
+				   int level,
+				   int optname,
+				   char __user *optval,
+				   unsigned int optlen)
+{
+	int err;
+	struct sock *sk;
+	struct vsock_sock *vsk;
+	u64 val;
+
+	if (level != AF_VSOCK)
+		return -ENOPROTOOPT;
+
+#define COPY_IN(_v)                                       \
+	do {						  \
+		if (optlen < sizeof(_v)) {		  \
+			err = -EINVAL;			  \
+			goto exit;			  \
+		}					  \
+		if (copy_from_user(&_v, optval, sizeof(_v)) != 0) {	\
+			err = -EFAULT;					\
+			goto exit;					\
+		}							\
+	} while (0)
+
+	err = 0;
+	sk = sock->sk;
+	vsk = vsock_sk(sk);
+
+	lock_sock(sk);
+
+	switch (optname) {
+	case SO_VM_SOCKETS_BUFFER_SIZE:
+		COPY_IN(val);
+		transport->set_buffer_size(vsk, val);
+		break;
+
+	case SO_VM_SOCKETS_BUFFER_MAX_SIZE:
+		COPY_IN(val);
+		transport->set_max_buffer_size(vsk, val);
+		break;
+
+	case SO_VM_SOCKETS_BUFFER_MIN_SIZE:
+		COPY_IN(val);
+		transport->set_min_buffer_size(vsk, val);
+		break;
+
+	case SO_VM_SOCKETS_CONNECT_TIMEOUT: {
+		struct timeval tv;
+		COPY_IN(tv);
+		if (tv.tv_sec >= 0 && tv.tv_usec < USEC_PER_SEC &&
+		    tv.tv_sec < (MAX_SCHEDULE_TIMEOUT / HZ - 1)) {
+			vsk->connect_timeout = tv.tv_sec * HZ +
+			    DIV_ROUND_UP(tv.tv_usec, (1000000 / HZ));
+			if (vsk->connect_timeout == 0)
+				vsk->connect_timeout =
+				    VSOCK_DEFAULT_CONNECT_TIMEOUT;
+
+		} else {
+			err = -ERANGE;
+		}
+		break;
+	}
+
+	default:
+		err = -ENOPROTOOPT;
+		break;
+	}
+
+#undef COPY_IN
+
+exit:
+	release_sock(sk);
+	return err;
+}
+
+static int vsock_stream_getsockopt(struct socket *sock,
+				   int level, int optname,
+				   char __user *optval,
+				   int __user *optlen)
+{
+	int err;
+	int len;
+	struct sock *sk;
+	struct vsock_sock *vsk;
+	u64 val;
+
+	if (level != AF_VSOCK)
+		return -ENOPROTOOPT;
+
+	err = get_user(len, optlen);
+	if (err != 0)
+		return err;
+
+#define COPY_OUT(_v)                            \
+	do {					\
+		if (len < sizeof(_v))		\
+			return -EINVAL;		\
+						\
+		len = sizeof(_v);		\
+		if (copy_to_user(optval, &_v, len) != 0)	\
+			return -EFAULT;				\
+								\
+	} while (0)
+
+	err = 0;
+	sk = sock->sk;
+	vsk = vsock_sk(sk);
+
+	switch (optname) {
+	case SO_VM_SOCKETS_BUFFER_SIZE:
+		val = transport->get_buffer_size(vsk);
+		COPY_OUT(val);
+		break;
+
+	case SO_VM_SOCKETS_BUFFER_MAX_SIZE:
+		val = transport->get_max_buffer_size(vsk);
+		COPY_OUT(val);
+		break;
+
+	case SO_VM_SOCKETS_BUFFER_MIN_SIZE:
+		val = transport->get_min_buffer_size(vsk);
+		COPY_OUT(val);
+		break;
+
+	case SO_VM_SOCKETS_CONNECT_TIMEOUT: {
+		struct timeval tv;
+		tv.tv_sec = vsk->connect_timeout / HZ;
+		tv.tv_usec =
+		    (vsk->connect_timeout -
+		     tv.tv_sec * HZ) * (1000000 / HZ);
+		COPY_OUT(tv);
+		break;
+	}
+	default:
+		return -ENOPROTOOPT;
+	}
+
+	err = put_user(len, optlen);
+	if (err != 0)
+		return -EFAULT;
+
+#undef COPY_OUT
+
+	return 0;
+}
+
+static int vsock_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
+				struct msghdr *msg, size_t len)
+{
+	struct sock *sk;
+	struct vsock_sock *vsk;
+	ssize_t total_written;
+	long timeout;
+	int err;
+	struct vsock_transport_send_notify_data send_data;
+
+	DEFINE_WAIT(wait);
+
+	sk = sock->sk;
+	vsk = vsock_sk(sk);
+	total_written = 0;
+	err = 0;
+
+	if (msg->msg_flags & MSG_OOB)
+		return -EOPNOTSUPP;
+
+	lock_sock(sk);
+
+	/* Callers should not provide a destination with stream sockets. */
+	if (msg->msg_namelen) {
+		err = sk->sk_state == SS_CONNECTED ? -EISCONN : -EOPNOTSUPP;
+		goto out;
+	}
+
+	/* Send data only if both sides are not shutdown in the direction. */
+	if (sk->sk_shutdown & SEND_SHUTDOWN ||
+	    vsk->peer_shutdown & RCV_SHUTDOWN) {
+		err = -EPIPE;
+		goto out;
+	}
+
+	if (sk->sk_state != SS_CONNECTED ||
+	    !vsock_addr_bound(&vsk->local_addr)) {
+		err = -ENOTCONN;
+		goto out;
+	}
+
+	if (!vsock_addr_bound(&vsk->remote_addr)) {
+		err = -EDESTADDRREQ;
+		goto out;
+	}
+
+	/* Wait for room in the produce queue to enqueue our user's data. */
+	timeout = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
+
+	err = transport->notify_send_init(vsk, &send_data);
+	if (err < 0)
+		goto out;
+
+	prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
+
+	while (total_written < len) {
+		ssize_t written;
+
+		while (vsock_stream_has_space(vsk) == 0 &&
+		       sk->sk_err == 0 &&
+		       !(sk->sk_shutdown & SEND_SHUTDOWN) &&
+		       !(vsk->peer_shutdown & RCV_SHUTDOWN)) {
+
+			/* Don't wait for non-blocking sockets. */
+			if (timeout == 0) {
+				err = -EAGAIN;
+				goto out_wait;
+			}
+
+			err = transport->notify_send_pre_block(vsk, &send_data);
+			if (err < 0)
+				goto out_wait;
+
+			release_sock(sk);
+			timeout = schedule_timeout(timeout);
+			lock_sock(sk);
+			if (signal_pending(current)) {
+				err = sock_intr_errno(timeout);
+				goto out_wait;
+			} else if (timeout == 0) {
+				err = -EAGAIN;
+				goto out_wait;
+			}
+
+			prepare_to_wait(sk_sleep(sk), &wait,
+					TASK_INTERRUPTIBLE);
+		}
+
+		/* These checks occur both as part of and after the loop
+		 * conditional since we need to check before and after
+		 * sleeping.
+		 */
+		if (sk->sk_err) {
+			err = -sk->sk_err;
+			goto out_wait;
+		} else if ((sk->sk_shutdown & SEND_SHUTDOWN) ||
+			   (vsk->peer_shutdown & RCV_SHUTDOWN)) {
+			err = -EPIPE;
+			goto out_wait;
+		}
+
+		err = transport->notify_send_pre_enqueue(vsk, &send_data);
+		if (err < 0)
+			goto out_wait;
+
+		/* Note that enqueue will only write as many bytes as are free
+		 * in the produce queue, so we don't need to ensure len is
+		 * smaller than the queue size.  It is the caller's
+		 * responsibility to check how many bytes we were able to send.
+		 */
+
+		written = transport->stream_enqueue(
+				vsk, msg->msg_iov,
+				len - total_written);
+		if (written < 0) {
+			err = -ENOMEM;
+			goto out_wait;
+		}
+
+		total_written += written;
+
+		err = transport->notify_send_post_enqueue(
+				vsk, written, &send_data);
+		if (err < 0)
+			goto out_wait;
+
+	}
+
+out_wait:
+	if (total_written > 0)
+		err = total_written;
+	finish_wait(sk_sleep(sk), &wait);
+out:
+	release_sock(sk);
+	return err;
+}
+
+
+static int
+vsock_stream_recvmsg(struct kiocb *kiocb,
+		     struct socket *sock,
+		     struct msghdr *msg, size_t len, int flags)
+{
+	struct sock *sk;
+	struct vsock_sock *vsk;
+	int err;
+	size_t target;
+	ssize_t copied;
+	long timeout;
+	struct vsock_transport_recv_notify_data recv_data;
+
+	DEFINE_WAIT(wait);
+
+	sk = sock->sk;
+	vsk = vsock_sk(sk);
+	err = 0;
+
+	lock_sock(sk);
+
+	if (sk->sk_state != SS_CONNECTED) {
+		/* Recvmsg is supposed to return 0 if a peer performs an
+		 * orderly shutdown. Differentiate between that case and when a
+		 * peer has not connected or a local shutdown occured with the
+		 * SOCK_DONE flag.
+		 */
+		if (sock_flag(sk, SOCK_DONE))
+			err = 0;
+		else
+			err = -ENOTCONN;
+
+		goto out;
+	}
+
+	if (flags & MSG_OOB) {
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+
+	/* We don't check peer_shutdown flag here since peer may actually shut
+	 * down, but there can be data in the queue that a local socket can
+	 * receive.
+	 */
+	if (sk->sk_shutdown & RCV_SHUTDOWN) {
+		err = 0;
+		goto out;
+	}
+
+	/* It is valid on Linux to pass in a zero-length receive buffer.  This
+	 * is not an error.  We may as well bail out now.
+	 */
+	if (!len) {
+		err = 0;
+		goto out;
+	}
+
+	/* We must not copy less than target bytes into the user's buffer
+	 * before returning successfully, so we wait for the consume queue to
+	 * have that much data to consume before dequeueing.  Note that this
+	 * makes it impossible to handle cases where target is greater than the
+	 * queue size.
+	 */
+	target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
+	if (target >= transport->stream_rcvhiwat(vsk)) {
+		err = -ENOMEM;
+		goto out;
+	}
+	timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
+	copied = 0;
+
+	err = transport->notify_recv_init(vsk, target, &recv_data);
+	if (err < 0)
+		goto out;
+
+	prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
+
+	while (1) {
+		s64 ready = vsock_stream_has_data(vsk);
+
+		if (ready < 0) {
+			/* Invalid queue pair content. XXX This should be
+			 * changed to a connection reset in a later change.
+			 */
+
+			err = -ENOMEM;
+			goto out_wait;
+		} else if (ready > 0) {
+			ssize_t read;
+
+			err = transport->notify_recv_pre_dequeue(
+					vsk, target, &recv_data);
+			if (err < 0)
+				break;
+
+			read = transport->stream_dequeue(
+					vsk, msg->msg_iov,
+					len - copied, flags);
+			if (read < 0) {
+				err = -ENOMEM;
+				break;
+			}
+
+			copied += read;
+
+			err = transport->notify_recv_post_dequeue(
+					vsk, target, read,
+					!(flags & MSG_PEEK), &recv_data);
+			if (err < 0)
+				goto out_wait;
+
+			if (read >= target || flags & MSG_PEEK)
+				break;
+
+			target -= read;
+		} else {
+			if (sk->sk_err != 0 || (sk->sk_shutdown & RCV_SHUTDOWN)
+			    || (vsk->peer_shutdown & SEND_SHUTDOWN)) {
+				break;
+			}
+			/* Don't wait for non-blocking sockets. */
+			if (timeout == 0) {
+				err = -EAGAIN;
+				break;
+			}
+
+			err = transport->notify_recv_pre_block(
+					vsk, target, &recv_data);
+			if (err < 0)
+				break;
+
+			release_sock(sk);
+			timeout = schedule_timeout(timeout);
+			lock_sock(sk);
+
+			if (signal_pending(current)) {
+				err = sock_intr_errno(timeout);
+				break;
+			} else if (timeout == 0) {
+				err = -EAGAIN;
+				break;
+			}
+
+			prepare_to_wait(sk_sleep(sk), &wait,
+					TASK_INTERRUPTIBLE);
+		}
+	}
+
+	if (sk->sk_err)
+		err = -sk->sk_err;
+	else if (sk->sk_shutdown & RCV_SHUTDOWN)
+		err = 0;
+
+	if (copied > 0) {
+		/* We only do these additional bookkeeping/notification steps
+		 * if we actually copied something out of the queue pair
+		 * instead of just peeking ahead.
+		 */
+
+		if (!(flags & MSG_PEEK)) {
+			/* If the other side has shutdown for sending and there
+			 * is nothing more to read, then modify the socket
+			 * state.
+			 */
+			if (vsk->peer_shutdown & SEND_SHUTDOWN) {
+				if (vsock_stream_has_data(vsk) <= 0) {
+					sk->sk_state = SS_UNCONNECTED;
+					sock_set_flag(sk, SOCK_DONE);
+					sk->sk_state_change(sk);
+				}
+			}
+		}
+		err = copied;
+	}
+
+out_wait:
+	finish_wait(sk_sleep(sk), &wait);
+out:
+	release_sock(sk);
+	return err;
+}
+
+static const struct proto_ops vsock_stream_ops = {
+	.family = PF_VSOCK,
+	.owner = THIS_MODULE,
+	.release = vsock_release,
+	.bind = vsock_bind,
+	.connect = vsock_stream_connect,
+	.socketpair = sock_no_socketpair,
+	.accept = vsock_accept,
+	.getname = vsock_getname,
+	.poll = vsock_poll,
+	.ioctl = sock_no_ioctl,
+	.listen = vsock_listen,
+	.shutdown = vsock_shutdown,
+	.setsockopt = vsock_stream_setsockopt,
+	.getsockopt = vsock_stream_getsockopt,
+	.sendmsg = vsock_stream_sendmsg,
+	.recvmsg = vsock_stream_recvmsg,
+	.mmap = sock_no_mmap,
+	.sendpage = sock_no_sendpage,
+};
+
+static int vsock_create(struct net *net, struct socket *sock,
+			int protocol, int kern)
+{
+	if (!sock)
+		return -EINVAL;
+
+	if (protocol)
+		return -EPROTONOSUPPORT;
+
+	switch (sock->type) {
+	case SOCK_DGRAM:
+		sock->ops = &vsock_dgram_ops;
+		break;
+	case SOCK_STREAM:
+		sock->ops = &vsock_stream_ops;
+		break;
+	default:
+		return -ESOCKTNOSUPPORT;
+	}
+
+	sock->state = SS_UNCONNECTED;
+
+	return __vsock_create(net, sock, NULL, GFP_KERNEL, 0) ? 0 : -ENOMEM;
+}
+
+static const struct net_proto_family vsock_family_ops = {
+	.family = AF_VSOCK,
+	.create = vsock_create,
+	.owner = THIS_MODULE,
+};
+
+static long vsock_dev_do_ioctl(struct file *filp,
+			       unsigned int cmd, void __user *ptr)
+{
+	u32 __user *p = ptr;
+	int retval = 0;
+
+	switch (cmd) {
+	case IOCTL_VM_SOCKETS_GET_LOCAL_CID:
+		if (put_user(transport->get_local_cid(), p) != 0)
+			retval = -EFAULT;
+		break;
+
+	default:
+		pr_err("Unknown ioctl %d\n", cmd);
+		retval = -EINVAL;
+	}
+
+	return retval;
+}
+
+static long vsock_dev_ioctl(struct file *filp,
+			    unsigned int cmd, unsigned long arg)
+{
+	return vsock_dev_do_ioctl(filp, cmd, (void __user *)arg);
+}
+
+#ifdef CONFIG_COMPAT
+static long vsock_dev_compat_ioctl(struct file *filp,
+				   unsigned int cmd, unsigned long arg)
+{
+	return vsock_dev_do_ioctl(filp, cmd, compat_ptr(arg));
+}
+#endif
+
+static const struct file_operations vsock_device_ops = {
+	.owner		= THIS_MODULE,
+	.unlocked_ioctl	= vsock_dev_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl	= vsock_dev_compat_ioctl,
+#endif
+	.open		= nonseekable_open,
+};
+
+static struct miscdevice vsock_device = {
+	.name		= "vsock",
+	.minor		= MISC_DYNAMIC_MINOR,
+	.fops		= &vsock_device_ops,
+};
+
+static int __vsock_core_init(void)
+{
+	int err;
+
+	vsock_init_tables();
+
+	err = misc_register(&vsock_device);
+	if (err) {
+		pr_err("Failed to register misc device\n");
+		return -ENOENT;
+	}
+
+	err = proto_register(&vsock_proto, 1);	/* we want our slab */
+	if (err) {
+		pr_err("Cannot register vsock protocol\n");
+		goto err_misc_deregister;
+	}
+
+	err = sock_register(&vsock_family_ops);
+	if (err) {
+		pr_err("could not register af_vsock (%d) address family: %d\n",
+		       AF_VSOCK, err);
+		goto err_unregister_proto;
+	}
+
+	return 0;
+
+err_unregister_proto:
+	proto_unregister(&vsock_proto);
+err_misc_deregister:
+	misc_deregister(&vsock_device);
+	return err;
+}
+
+int vsock_core_init(const struct vsock_transport *t)
+{
+	int retval = mutex_lock_interruptible(&vsock_register_mutex);
+	if (retval)
+		return retval;
+
+	if (transport) {
+		retval = -EBUSY;
+		goto out;
+	}
+
+	transport = t;
+	retval = __vsock_core_init();
+	if (retval)
+		transport = NULL;
+
+out:
+	mutex_unlock(&vsock_register_mutex);
+	return retval;
+}
+EXPORT_SYMBOL_GPL(vsock_core_init);
+
+void vsock_core_exit(void)
+{
+	mutex_lock(&vsock_register_mutex);
+
+	misc_deregister(&vsock_device);
+	sock_unregister(AF_VSOCK);
+	proto_unregister(&vsock_proto);
+
+	/* We do not want the assignment below re-ordered. */
+	mb();
+	transport = NULL;
+
+	mutex_unlock(&vsock_register_mutex);
+}
+EXPORT_SYMBOL_GPL(vsock_core_exit);
+
+MODULE_AUTHOR("VMware, Inc.");
+MODULE_DESCRIPTION("VMware Virtual Socket Family");
+MODULE_VERSION(VSOCK_DRIVER_VERSION_STRING);
+MODULE_LICENSE("GPL v2");
diff --git a/net/vmw_vsock/af_vsock.h b/net/vmw_vsock/af_vsock.h
new file mode 100644
index 000000000000..7d64d3609ec9
--- /dev/null
+++ b/net/vmw_vsock/af_vsock.h
@@ -0,0 +1,175 @@
+/*
+ * VMware vSockets Driver
+ *
+ * Copyright (C) 2007-2013 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef __AF_VSOCK_H__
+#define __AF_VSOCK_H__
+
+#include <linux/kernel.h>
+#include <linux/workqueue.h>
+#include <linux/vm_sockets.h>
+
+#include "vsock_addr.h"
+
+#define LAST_RESERVED_PORT 1023
+
+#define vsock_sk(__sk)    ((struct vsock_sock *)__sk)
+#define sk_vsock(__vsk)   (&(__vsk)->sk)
+
+struct vsock_sock {
+	/* sk must be the first member. */
+	struct sock sk;
+	struct sockaddr_vm local_addr;
+	struct sockaddr_vm remote_addr;
+	/* Links for the global tables of bound and connected sockets. */
+	struct list_head bound_table;
+	struct list_head connected_table;
+	/* Accessed without the socket lock held. This means it can never be
+	 * modified outsided of socket create or destruct.
+	 */
+	bool trusted;
+	bool cached_peer_allow_dgram;	/* Dgram communication allowed to
+					 * cached peer?
+					 */
+	u32 cached_peer;  /* Context ID of last dgram destination check. */
+	const struct cred *owner;
+	/* Rest are SOCK_STREAM only. */
+	long connect_timeout;
+	/* Listening socket that this came from. */
+	struct sock *listener;
+	/* Used for pending list and accept queue during connection handshake.
+	 * The listening socket is the head for both lists.  Sockets created
+	 * for connection requests are placed in the pending list until they
+	 * are connected, at which point they are put in the accept queue list
+	 * so they can be accepted in accept().  If accept() cannot accept the
+	 * connection, it is marked as rejected so the cleanup function knows
+	 * to clean up the socket.
+	 */
+	struct list_head pending_links;
+	struct list_head accept_queue;
+	bool rejected;
+	struct delayed_work dwork;
+	u32 peer_shutdown;
+	bool sent_request;
+	bool ignore_connecting_rst;
+
+	/* Private to transport. */
+	void *trans;
+};
+
+s64 vsock_stream_has_data(struct vsock_sock *vsk);
+s64 vsock_stream_has_space(struct vsock_sock *vsk);
+void vsock_pending_work(struct work_struct *work);
+struct sock *__vsock_create(struct net *net,
+			    struct socket *sock,
+			    struct sock *parent,
+			    gfp_t priority, unsigned short type);
+
+/**** TRANSPORT ****/
+
+struct vsock_transport_recv_notify_data {
+	u64 data1; /* Transport-defined. */
+	u64 data2; /* Transport-defined. */
+	bool notify_on_block;
+};
+
+struct vsock_transport_send_notify_data {
+	u64 data1; /* Transport-defined. */
+	u64 data2; /* Transport-defined. */
+};
+
+struct vsock_transport {
+	/* Initialize/tear-down socket. */
+	int (*init)(struct vsock_sock *, struct vsock_sock *);
+	void (*destruct)(struct vsock_sock *);
+	void (*release)(struct vsock_sock *);
+
+	/* Connections. */
+	int (*connect)(struct vsock_sock *);
+
+	/* DGRAM. */
+	int (*dgram_bind)(struct vsock_sock *, struct sockaddr_vm *);
+	int (*dgram_dequeue)(struct kiocb *kiocb, struct vsock_sock *vsk,
+			     struct msghdr *msg, size_t len, int flags);
+	int (*dgram_enqueue)(struct vsock_sock *, struct sockaddr_vm *,
+			     struct iovec *, size_t len);
+	bool (*dgram_allow)(u32 cid, u32 port);
+
+	/* STREAM. */
+	/* TODO: stream_bind() */
+	ssize_t (*stream_dequeue)(struct vsock_sock *, struct iovec *,
+				  size_t len, int flags);
+	ssize_t (*stream_enqueue)(struct vsock_sock *, struct iovec *,
+				  size_t len);
+	s64 (*stream_has_data)(struct vsock_sock *);
+	s64 (*stream_has_space)(struct vsock_sock *);
+	u64 (*stream_rcvhiwat)(struct vsock_sock *);
+	bool (*stream_is_active)(struct vsock_sock *);
+	bool (*stream_allow)(u32 cid, u32 port);
+
+	/* Notification. */
+	int (*notify_poll_in)(struct vsock_sock *, size_t, bool *);
+	int (*notify_poll_out)(struct vsock_sock *, size_t, bool *);
+	int (*notify_recv_init)(struct vsock_sock *, size_t,
+		struct vsock_transport_recv_notify_data *);
+	int (*notify_recv_pre_block)(struct vsock_sock *, size_t,
+		struct vsock_transport_recv_notify_data *);
+	int (*notify_recv_pre_dequeue)(struct vsock_sock *, size_t,
+		struct vsock_transport_recv_notify_data *);
+	int (*notify_recv_post_dequeue)(struct vsock_sock *, size_t,
+		ssize_t, bool, struct vsock_transport_recv_notify_data *);
+	int (*notify_send_init)(struct vsock_sock *,
+		struct vsock_transport_send_notify_data *);
+	int (*notify_send_pre_block)(struct vsock_sock *,
+		struct vsock_transport_send_notify_data *);
+	int (*notify_send_pre_enqueue)(struct vsock_sock *,
+		struct vsock_transport_send_notify_data *);
+	int (*notify_send_post_enqueue)(struct vsock_sock *, ssize_t,
+		struct vsock_transport_send_notify_data *);
+
+	/* Shutdown. */
+	int (*shutdown)(struct vsock_sock *, int);
+
+	/* Buffer sizes. */
+	void (*set_buffer_size)(struct vsock_sock *, u64);
+	void (*set_min_buffer_size)(struct vsock_sock *, u64);
+	void (*set_max_buffer_size)(struct vsock_sock *, u64);
+	u64 (*get_buffer_size)(struct vsock_sock *);
+	u64 (*get_min_buffer_size)(struct vsock_sock *);
+	u64 (*get_max_buffer_size)(struct vsock_sock *);
+
+	/* Addressing. */
+	u32 (*get_local_cid)(void);
+};
+
+/**** CORE ****/
+
+int vsock_core_init(const struct vsock_transport *t);
+void vsock_core_exit(void);
+
+/**** UTILS ****/
+
+void vsock_release_pending(struct sock *pending);
+void vsock_add_pending(struct sock *listener, struct sock *pending);
+void vsock_remove_pending(struct sock *listener, struct sock *pending);
+void vsock_enqueue_accept(struct sock *listener, struct sock *connected);
+void vsock_insert_connected(struct vsock_sock *vsk);
+void vsock_remove_bound(struct vsock_sock *vsk);
+void vsock_remove_connected(struct vsock_sock *vsk);
+struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr);
+struct sock *vsock_find_connected_socket(struct sockaddr_vm *src,
+					 struct sockaddr_vm *dst);
+void vsock_for_each_connected_socket(void (*fn)(struct sock *sk));
+
+#endif /* __AF_VSOCK_H__ */
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
new file mode 100644
index 000000000000..e8a87cf37072
--- /dev/null
+++ b/net/vmw_vsock/vmci_transport.c
@@ -0,0 +1,2157 @@
+/*
+ * VMware vSockets Driver
+ *
+ * Copyright (C) 2007-2013 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/types.h>
+
+#define EXPORT_SYMTAB
+#include <linux/bitops.h>
+#include <linux/cred.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/kmod.h>
+#include <linux/list.h>
+#include <linux/miscdevice.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/net.h>
+#include <linux/poll.h>
+#include <linux/skbuff.h>
+#include <linux/smp.h>
+#include <linux/socket.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/wait.h>
+#include <linux/workqueue.h>
+#include <net/sock.h>
+
+#include "af_vsock.h"
+#include "vmci_transport_notify.h"
+
+static int vmci_transport_recv_dgram_cb(void *data, struct vmci_datagram *dg);
+static int vmci_transport_recv_stream_cb(void *data, struct vmci_datagram *dg);
+static void vmci_transport_peer_attach_cb(u32 sub_id,
+					  const struct vmci_event_data *ed,
+					  void *client_data);
+static void vmci_transport_peer_detach_cb(u32 sub_id,
+					  const struct vmci_event_data *ed,
+					  void *client_data);
+static void vmci_transport_recv_pkt_work(struct work_struct *work);
+static int vmci_transport_recv_listen(struct sock *sk,
+				      struct vmci_transport_packet *pkt);
+static int vmci_transport_recv_connecting_server(
+					struct sock *sk,
+					struct sock *pending,
+					struct vmci_transport_packet *pkt);
+static int vmci_transport_recv_connecting_client(
+					struct sock *sk,
+					struct vmci_transport_packet *pkt);
+static int vmci_transport_recv_connecting_client_negotiate(
+					struct sock *sk,
+					struct vmci_transport_packet *pkt);
+static int vmci_transport_recv_connecting_client_invalid(
+					struct sock *sk,
+					struct vmci_transport_packet *pkt);
+static int vmci_transport_recv_connected(struct sock *sk,
+					 struct vmci_transport_packet *pkt);
+static bool vmci_transport_old_proto_override(bool *old_pkt_proto);
+static u16 vmci_transport_new_proto_supported_versions(void);
+static bool vmci_transport_proto_to_notify_struct(struct sock *sk, u16 *proto,
+						  bool old_pkt_proto);
+
+struct vmci_transport_recv_pkt_info {
+	struct work_struct work;
+	struct sock *sk;
+	struct vmci_transport_packet pkt;
+};
+
+static struct vmci_handle vmci_transport_stream_handle = { VMCI_INVALID_ID,
+							   VMCI_INVALID_ID };
+static u32 vmci_transport_qp_resumed_sub_id = VMCI_INVALID_ID;
+
+static int PROTOCOL_OVERRIDE = -1;
+
+#define VMCI_TRANSPORT_DEFAULT_QP_SIZE_MIN   128
+#define VMCI_TRANSPORT_DEFAULT_QP_SIZE       262144
+#define VMCI_TRANSPORT_DEFAULT_QP_SIZE_MAX   262144
+
+/* The default peer timeout indicates how long we will wait for a peer response
+ * to a control message.
+ */
+#define VSOCK_DEFAULT_CONNECT_TIMEOUT (2 * HZ)
+
+#define SS_LISTEN 255
+
+/* Helper function to convert from a VMCI error code to a VSock error code. */
+
+static s32 vmci_transport_error_to_vsock_error(s32 vmci_error)
+{
+	int err;
+
+	switch (vmci_error) {
+	case VMCI_ERROR_NO_MEM:
+		err = ENOMEM;
+		break;
+	case VMCI_ERROR_DUPLICATE_ENTRY:
+	case VMCI_ERROR_ALREADY_EXISTS:
+		err = EADDRINUSE;
+		break;
+	case VMCI_ERROR_NO_ACCESS:
+		err = EPERM;
+		break;
+	case VMCI_ERROR_NO_RESOURCES:
+		err = ENOBUFS;
+		break;
+	case VMCI_ERROR_INVALID_RESOURCE:
+		err = EHOSTUNREACH;
+		break;
+	case VMCI_ERROR_INVALID_ARGS:
+	default:
+		err = EINVAL;
+	}
+
+	return err > 0 ? -err : err;
+}
+
+static inline void
+vmci_transport_packet_init(struct vmci_transport_packet *pkt,
+			   struct sockaddr_vm *src,
+			   struct sockaddr_vm *dst,
+			   u8 type,
+			   u64 size,
+			   u64 mode,
+			   struct vmci_transport_waiting_info *wait,
+			   u16 proto,
+			   struct vmci_handle handle)
+{
+	/* We register the stream control handler as an any cid handle so we
+	 * must always send from a source address of VMADDR_CID_ANY
+	 */
+	pkt->dg.src = vmci_make_handle(VMADDR_CID_ANY,
+				       VMCI_TRANSPORT_PACKET_RID);
+	pkt->dg.dst = vmci_make_handle(dst->svm_cid,
+				       VMCI_TRANSPORT_PACKET_RID);
+	pkt->dg.payload_size = sizeof(*pkt) - sizeof(pkt->dg);
+	pkt->version = VMCI_TRANSPORT_PACKET_VERSION;
+	pkt->type = type;
+	pkt->src_port = src->svm_port;
+	pkt->dst_port = dst->svm_port;
+	memset(&pkt->proto, 0, sizeof(pkt->proto));
+	memset(&pkt->_reserved2, 0, sizeof(pkt->_reserved2));
+
+	switch (pkt->type) {
+	case VMCI_TRANSPORT_PACKET_TYPE_INVALID:
+		pkt->u.size = 0;
+		break;
+
+	case VMCI_TRANSPORT_PACKET_TYPE_REQUEST:
+	case VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE:
+		pkt->u.size = size;
+		break;
+
+	case VMCI_TRANSPORT_PACKET_TYPE_OFFER:
+	case VMCI_TRANSPORT_PACKET_TYPE_ATTACH:
+		pkt->u.handle = handle;
+		break;
+
+	case VMCI_TRANSPORT_PACKET_TYPE_WROTE:
+	case VMCI_TRANSPORT_PACKET_TYPE_READ:
+	case VMCI_TRANSPORT_PACKET_TYPE_RST:
+		pkt->u.size = 0;
+		break;
+
+	case VMCI_TRANSPORT_PACKET_TYPE_SHUTDOWN:
+		pkt->u.mode = mode;
+		break;
+
+	case VMCI_TRANSPORT_PACKET_TYPE_WAITING_READ:
+	case VMCI_TRANSPORT_PACKET_TYPE_WAITING_WRITE:
+		memcpy(&pkt->u.wait, wait, sizeof(pkt->u.wait));
+		break;
+
+	case VMCI_TRANSPORT_PACKET_TYPE_REQUEST2:
+	case VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE2:
+		pkt->u.size = size;
+		pkt->proto = proto;
+		break;
+	}
+}
+
+static inline void
+vmci_transport_packet_get_addresses(struct vmci_transport_packet *pkt,
+				    struct sockaddr_vm *local,
+				    struct sockaddr_vm *remote)
+{
+	vsock_addr_init(local, pkt->dg.dst.context, pkt->dst_port);
+	vsock_addr_init(remote, pkt->dg.src.context, pkt->src_port);
+}
+
+static int
+__vmci_transport_send_control_pkt(struct vmci_transport_packet *pkt,
+				  struct sockaddr_vm *src,
+				  struct sockaddr_vm *dst,
+				  enum vmci_transport_packet_type type,
+				  u64 size,
+				  u64 mode,
+				  struct vmci_transport_waiting_info *wait,
+				  u16 proto,
+				  struct vmci_handle handle,
+				  bool convert_error)
+{
+	int err;
+
+	vmci_transport_packet_init(pkt, src, dst, type, size, mode, wait,
+				   proto, handle);
+	err = vmci_datagram_send(&pkt->dg);
+	if (convert_error && (err < 0))
+		return vmci_transport_error_to_vsock_error(err);
+
+	return err;
+}
+
+static int
+vmci_transport_reply_control_pkt_fast(struct vmci_transport_packet *pkt,
+				      enum vmci_transport_packet_type type,
+				      u64 size,
+				      u64 mode,
+				      struct vmci_transport_waiting_info *wait,
+				      struct vmci_handle handle)
+{
+	struct vmci_transport_packet reply;
+	struct sockaddr_vm src, dst;
+
+	if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_RST) {
+		return 0;
+	} else {
+		vmci_transport_packet_get_addresses(pkt, &src, &dst);
+		return __vmci_transport_send_control_pkt(&reply, &src, &dst,
+							 type,
+							 size, mode, wait,
+							 VSOCK_PROTO_INVALID,
+							 handle, true);
+	}
+}
+
+static int
+vmci_transport_send_control_pkt_bh(struct sockaddr_vm *src,
+				   struct sockaddr_vm *dst,
+				   enum vmci_transport_packet_type type,
+				   u64 size,
+				   u64 mode,
+				   struct vmci_transport_waiting_info *wait,
+				   struct vmci_handle handle)
+{
+	/* Note that it is safe to use a single packet across all CPUs since
+	 * two tasklets of the same type are guaranteed to not ever run
+	 * simultaneously. If that ever changes, or VMCI stops using tasklets,
+	 * we can use per-cpu packets.
+	 */
+	static struct vmci_transport_packet pkt;
+
+	return __vmci_transport_send_control_pkt(&pkt, src, dst, type,
+						 size, mode, wait,
+						 VSOCK_PROTO_INVALID, handle,
+						 false);
+}
+
+static int
+vmci_transport_send_control_pkt(struct sock *sk,
+				enum vmci_transport_packet_type type,
+				u64 size,
+				u64 mode,
+				struct vmci_transport_waiting_info *wait,
+				u16 proto,
+				struct vmci_handle handle)
+{
+	struct vmci_transport_packet *pkt;
+	struct vsock_sock *vsk;
+	int err;
+
+	vsk = vsock_sk(sk);
+
+	if (!vsock_addr_bound(&vsk->local_addr))
+		return -EINVAL;
+
+	if (!vsock_addr_bound(&vsk->remote_addr))
+		return -EINVAL;
+
+	pkt = kmalloc(sizeof(*pkt), GFP_KERNEL);
+	if (!pkt)
+		return -ENOMEM;
+
+	err = __vmci_transport_send_control_pkt(pkt, &vsk->local_addr,
+						&vsk->remote_addr, type, size,
+						mode, wait, proto, handle,
+						true);
+	kfree(pkt);
+
+	return err;
+}
+
+static int vmci_transport_send_reset_bh(struct sockaddr_vm *dst,
+					struct sockaddr_vm *src,
+					struct vmci_transport_packet *pkt)
+{
+	if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_RST)
+		return 0;
+	return vmci_transport_send_control_pkt_bh(
+					dst, src,
+					VMCI_TRANSPORT_PACKET_TYPE_RST, 0,
+					0, NULL, VMCI_INVALID_HANDLE);
+}
+
+static int vmci_transport_send_reset(struct sock *sk,
+				     struct vmci_transport_packet *pkt)
+{
+	if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_RST)
+		return 0;
+	return vmci_transport_send_control_pkt(sk,
+					VMCI_TRANSPORT_PACKET_TYPE_RST,
+					0, 0, NULL, VSOCK_PROTO_INVALID,
+					VMCI_INVALID_HANDLE);
+}
+
+static int vmci_transport_send_negotiate(struct sock *sk, size_t size)
+{
+	return vmci_transport_send_control_pkt(
+					sk,
+					VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE,
+					size, 0, NULL,
+					VSOCK_PROTO_INVALID,
+					VMCI_INVALID_HANDLE);
+}
+
+static int vmci_transport_send_negotiate2(struct sock *sk, size_t size,
+					  u16 version)
+{
+	return vmci_transport_send_control_pkt(
+					sk,
+					VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE2,
+					size, 0, NULL, version,
+					VMCI_INVALID_HANDLE);
+}
+
+static int vmci_transport_send_qp_offer(struct sock *sk,
+					struct vmci_handle handle)
+{
+	return vmci_transport_send_control_pkt(
+					sk, VMCI_TRANSPORT_PACKET_TYPE_OFFER, 0,
+					0, NULL,
+					VSOCK_PROTO_INVALID, handle);
+}
+
+static int vmci_transport_send_attach(struct sock *sk,
+				      struct vmci_handle handle)
+{
+	return vmci_transport_send_control_pkt(
+					sk, VMCI_TRANSPORT_PACKET_TYPE_ATTACH,
+					0, 0, NULL, VSOCK_PROTO_INVALID,
+					handle);
+}
+
+static int vmci_transport_reply_reset(struct vmci_transport_packet *pkt)
+{
+	return vmci_transport_reply_control_pkt_fast(
+						pkt,
+						VMCI_TRANSPORT_PACKET_TYPE_RST,
+						0, 0, NULL,
+						VMCI_INVALID_HANDLE);
+}
+
+static int vmci_transport_send_invalid_bh(struct sockaddr_vm *dst,
+					  struct sockaddr_vm *src)
+{
+	return vmci_transport_send_control_pkt_bh(
+					dst, src,
+					VMCI_TRANSPORT_PACKET_TYPE_INVALID,
+					0, 0, NULL, VMCI_INVALID_HANDLE);
+}
+
+int vmci_transport_send_wrote_bh(struct sockaddr_vm *dst,
+				 struct sockaddr_vm *src)
+{
+	return vmci_transport_send_control_pkt_bh(
+					dst, src,
+					VMCI_TRANSPORT_PACKET_TYPE_WROTE, 0,
+					0, NULL, VMCI_INVALID_HANDLE);
+}
+
+int vmci_transport_send_read_bh(struct sockaddr_vm *dst,
+				struct sockaddr_vm *src)
+{
+	return vmci_transport_send_control_pkt_bh(
+					dst, src,
+					VMCI_TRANSPORT_PACKET_TYPE_READ, 0,
+					0, NULL, VMCI_INVALID_HANDLE);
+}
+
+int vmci_transport_send_wrote(struct sock *sk)
+{
+	return vmci_transport_send_control_pkt(
+					sk, VMCI_TRANSPORT_PACKET_TYPE_WROTE, 0,
+					0, NULL, VSOCK_PROTO_INVALID,
+					VMCI_INVALID_HANDLE);
+}
+
+int vmci_transport_send_read(struct sock *sk)
+{
+	return vmci_transport_send_control_pkt(
+					sk, VMCI_TRANSPORT_PACKET_TYPE_READ, 0,
+					0, NULL, VSOCK_PROTO_INVALID,
+					VMCI_INVALID_HANDLE);
+}
+
+int vmci_transport_send_waiting_write(struct sock *sk,
+				      struct vmci_transport_waiting_info *wait)
+{
+	return vmci_transport_send_control_pkt(
+				sk, VMCI_TRANSPORT_PACKET_TYPE_WAITING_WRITE,
+				0, 0, wait, VSOCK_PROTO_INVALID,
+				VMCI_INVALID_HANDLE);
+}
+
+int vmci_transport_send_waiting_read(struct sock *sk,
+				     struct vmci_transport_waiting_info *wait)
+{
+	return vmci_transport_send_control_pkt(
+				sk, VMCI_TRANSPORT_PACKET_TYPE_WAITING_READ,
+				0, 0, wait, VSOCK_PROTO_INVALID,
+				VMCI_INVALID_HANDLE);
+}
+
+static int vmci_transport_shutdown(struct vsock_sock *vsk, int mode)
+{
+	return vmci_transport_send_control_pkt(
+					&vsk->sk,
+					VMCI_TRANSPORT_PACKET_TYPE_SHUTDOWN,
+					0, mode, NULL,
+					VSOCK_PROTO_INVALID,
+					VMCI_INVALID_HANDLE);
+}
+
+static int vmci_transport_send_conn_request(struct sock *sk, size_t size)
+{
+	return vmci_transport_send_control_pkt(sk,
+					VMCI_TRANSPORT_PACKET_TYPE_REQUEST,
+					size, 0, NULL,
+					VSOCK_PROTO_INVALID,
+					VMCI_INVALID_HANDLE);
+}
+
+static int vmci_transport_send_conn_request2(struct sock *sk, size_t size,
+					     u16 version)
+{
+	return vmci_transport_send_control_pkt(
+					sk, VMCI_TRANSPORT_PACKET_TYPE_REQUEST2,
+					size, 0, NULL, version,
+					VMCI_INVALID_HANDLE);
+}
+
+static struct sock *vmci_transport_get_pending(
+					struct sock *listener,
+					struct vmci_transport_packet *pkt)
+{
+	struct vsock_sock *vlistener;
+	struct vsock_sock *vpending;
+	struct sock *pending;
+
+	vlistener = vsock_sk(listener);
+
+	list_for_each_entry(vpending, &vlistener->pending_links,
+			    pending_links) {
+		struct sockaddr_vm src;
+		struct sockaddr_vm dst;
+
+		vsock_addr_init(&src, pkt->dg.src.context, pkt->src_port);
+		vsock_addr_init(&dst, pkt->dg.dst.context, pkt->dst_port);
+
+		if (vsock_addr_equals_addr(&src, &vpending->remote_addr) &&
+		    vsock_addr_equals_addr(&dst, &vpending->local_addr)) {
+			pending = sk_vsock(vpending);
+			sock_hold(pending);
+			goto found;
+		}
+	}
+
+	pending = NULL;
+found:
+	return pending;
+
+}
+
+static void vmci_transport_release_pending(struct sock *pending)
+{
+	sock_put(pending);
+}
+
+/* We allow two kinds of sockets to communicate with a restricted VM: 1)
+ * trusted sockets 2) sockets from applications running as the same user as the
+ * VM (this is only true for the host side and only when using hosted products)
+ */
+
+static bool vmci_transport_is_trusted(struct vsock_sock *vsock, u32 peer_cid)
+{
+	return vsock->trusted ||
+	       vmci_is_context_owner(peer_cid, vsock->owner->uid);
+}
+
+/* We allow sending datagrams to and receiving datagrams from a restricted VM
+ * only if it is trusted as described in vmci_transport_is_trusted.
+ */
+
+static bool vmci_transport_allow_dgram(struct vsock_sock *vsock, u32 peer_cid)
+{
+	if (vsock->cached_peer != peer_cid) {
+		vsock->cached_peer = peer_cid;
+		if (!vmci_transport_is_trusted(vsock, peer_cid) &&
+		    (vmci_context_get_priv_flags(peer_cid) &
+		     VMCI_PRIVILEGE_FLAG_RESTRICTED)) {
+			vsock->cached_peer_allow_dgram = false;
+		} else {
+			vsock->cached_peer_allow_dgram = true;
+		}
+	}
+
+	return vsock->cached_peer_allow_dgram;
+}
+
+static int
+vmci_transport_queue_pair_alloc(struct vmci_qp **qpair,
+				struct vmci_handle *handle,
+				u64 produce_size,
+				u64 consume_size,
+				u32 peer, u32 flags, bool trusted)
+{
+	int err = 0;
+
+	if (trusted) {
+		/* Try to allocate our queue pair as trusted. This will only
+		 * work if vsock is running in the host.
+		 */
+
+		err = vmci_qpair_alloc(qpair, handle, produce_size,
+				       consume_size,
+				       peer, flags,
+				       VMCI_PRIVILEGE_FLAG_TRUSTED);
+		if (err != VMCI_ERROR_NO_ACCESS)
+			goto out;
+
+	}
+
+	err = vmci_qpair_alloc(qpair, handle, produce_size, consume_size,
+			       peer, flags, VMCI_NO_PRIVILEGE_FLAGS);
+out:
+	if (err < 0) {
+		pr_err("Could not attach to queue pair with %d\n",
+		       err);
+		err = vmci_transport_error_to_vsock_error(err);
+	}
+
+	return err;
+}
+
+static int
+vmci_transport_datagram_create_hnd(u32 resource_id,
+				   u32 flags,
+				   vmci_datagram_recv_cb recv_cb,
+				   void *client_data,
+				   struct vmci_handle *out_handle)
+{
+	int err = 0;
+
+	/* Try to allocate our datagram handler as trusted. This will only work
+	 * if vsock is running in the host.
+	 */
+
+	err = vmci_datagram_create_handle_priv(resource_id, flags,
+					       VMCI_PRIVILEGE_FLAG_TRUSTED,
+					       recv_cb,
+					       client_data, out_handle);
+
+	if (err == VMCI_ERROR_NO_ACCESS)
+		err = vmci_datagram_create_handle(resource_id, flags,
+						  recv_cb, client_data,
+						  out_handle);
+
+	return err;
+}
+
+/* This is invoked as part of a tasklet that's scheduled when the VMCI
+ * interrupt fires.  This is run in bottom-half context and if it ever needs to
+ * sleep it should defer that work to a work queue.
+ */
+
+static int vmci_transport_recv_dgram_cb(void *data, struct vmci_datagram *dg)
+{
+	struct sock *sk;
+	size_t size;
+	struct sk_buff *skb;
+	struct vsock_sock *vsk;
+
+	sk = (struct sock *)data;
+
+	/* This handler is privileged when this module is running on the host.
+	 * We will get datagrams from all endpoints (even VMs that are in a
+	 * restricted context). If we get one from a restricted context then
+	 * the destination socket must be trusted.
+	 *
+	 * NOTE: We access the socket struct without holding the lock here.
+	 * This is ok because the field we are interested is never modified
+	 * outside of the create and destruct socket functions.
+	 */
+	vsk = vsock_sk(sk);
+	if (!vmci_transport_allow_dgram(vsk, dg->src.context))
+		return VMCI_ERROR_NO_ACCESS;
+
+	size = VMCI_DG_SIZE(dg);
+
+	/* Attach the packet to the socket's receive queue as an sk_buff. */
+	skb = alloc_skb(size, GFP_ATOMIC);
+	if (skb) {
+		/* sk_receive_skb() will do a sock_put(), so hold here. */
+		sock_hold(sk);
+		skb_put(skb, size);
+		memcpy(skb->data, dg, size);
+		sk_receive_skb(sk, skb, 0);
+	}
+
+	return VMCI_SUCCESS;
+}
+
+static bool vmci_transport_stream_allow(u32 cid, u32 port)
+{
+	static const u32 non_socket_contexts[] = {
+		VMADDR_CID_HYPERVISOR,
+		VMADDR_CID_RESERVED,
+	};
+	int i;
+
+	BUILD_BUG_ON(sizeof(cid) != sizeof(*non_socket_contexts));
+
+	for (i = 0; i < ARRAY_SIZE(non_socket_contexts); i++) {
+		if (cid == non_socket_contexts[i])
+			return false;
+	}
+
+	return true;
+}
+
+/* This is invoked as part of a tasklet that's scheduled when the VMCI
+ * interrupt fires.  This is run in bottom-half context but it defers most of
+ * its work to the packet handling work queue.
+ */
+
+static int vmci_transport_recv_stream_cb(void *data, struct vmci_datagram *dg)
+{
+	struct sock *sk;
+	struct sockaddr_vm dst;
+	struct sockaddr_vm src;
+	struct vmci_transport_packet *pkt;
+	struct vsock_sock *vsk;
+	bool bh_process_pkt;
+	int err;
+
+	sk = NULL;
+	err = VMCI_SUCCESS;
+	bh_process_pkt = false;
+
+	/* Ignore incoming packets from contexts without sockets, or resources
+	 * that aren't vsock implementations.
+	 */
+
+	if (!vmci_transport_stream_allow(dg->src.context, -1)
+	    || VMCI_TRANSPORT_PACKET_RID != dg->src.resource)
+		return VMCI_ERROR_NO_ACCESS;
+
+	if (VMCI_DG_SIZE(dg) < sizeof(*pkt))
+		/* Drop datagrams that do not contain full VSock packets. */
+		return VMCI_ERROR_INVALID_ARGS;
+
+	pkt = (struct vmci_transport_packet *)dg;
+
+	/* Find the socket that should handle this packet.  First we look for a
+	 * connected socket and if there is none we look for a socket bound to
+	 * the destintation address.
+	 */
+	vsock_addr_init(&src, pkt->dg.src.context, pkt->src_port);
+	vsock_addr_init(&dst, pkt->dg.dst.context, pkt->dst_port);
+
+	sk = vsock_find_connected_socket(&src, &dst);
+	if (!sk) {
+		sk = vsock_find_bound_socket(&dst);
+		if (!sk) {
+			/* We could not find a socket for this specified
+			 * address.  If this packet is a RST, we just drop it.
+			 * If it is another packet, we send a RST.  Note that
+			 * we do not send a RST reply to RSTs so that we do not
+			 * continually send RSTs between two endpoints.
+			 *
+			 * Note that since this is a reply, dst is src and src
+			 * is dst.
+			 */
+			if (vmci_transport_send_reset_bh(&dst, &src, pkt) < 0)
+				pr_err("unable to send reset\n");
+
+			err = VMCI_ERROR_NOT_FOUND;
+			goto out;
+		}
+	}
+
+	/* If the received packet type is beyond all types known to this
+	 * implementation, reply with an invalid message.  Hopefully this will
+	 * help when implementing backwards compatibility in the future.
+	 */
+	if (pkt->type >= VMCI_TRANSPORT_PACKET_TYPE_MAX) {
+		vmci_transport_send_invalid_bh(&dst, &src);
+		err = VMCI_ERROR_INVALID_ARGS;
+		goto out;
+	}
+
+	/* This handler is privileged when this module is running on the host.
+	 * We will get datagram connect requests from all endpoints (even VMs
+	 * that are in a restricted context). If we get one from a restricted
+	 * context then the destination socket must be trusted.
+	 *
+	 * NOTE: We access the socket struct without holding the lock here.
+	 * This is ok because the field we are interested is never modified
+	 * outside of the create and destruct socket functions.
+	 */
+	vsk = vsock_sk(sk);
+	if (!vmci_transport_allow_dgram(vsk, pkt->dg.src.context)) {
+		err = VMCI_ERROR_NO_ACCESS;
+		goto out;
+	}
+
+	/* We do most everything in a work queue, but let's fast path the
+	 * notification of reads and writes to help data transfer performance.
+	 * We can only do this if there is no process context code executing
+	 * for this socket since that may change the state.
+	 */
+	bh_lock_sock(sk);
+
+	if (!sock_owned_by_user(sk) && sk->sk_state == SS_CONNECTED)
+		vmci_trans(vsk)->notify_ops->handle_notify_pkt(
+				sk, pkt, true, &dst, &src,
+				&bh_process_pkt);
+
+	bh_unlock_sock(sk);
+
+	if (!bh_process_pkt) {
+		struct vmci_transport_recv_pkt_info *recv_pkt_info;
+
+		recv_pkt_info = kmalloc(sizeof(*recv_pkt_info), GFP_ATOMIC);
+		if (!recv_pkt_info) {
+			if (vmci_transport_send_reset_bh(&dst, &src, pkt) < 0)
+				pr_err("unable to send reset\n");
+
+			err = VMCI_ERROR_NO_MEM;
+			goto out;
+		}
+
+		recv_pkt_info->sk = sk;
+		memcpy(&recv_pkt_info->pkt, pkt, sizeof(recv_pkt_info->pkt));
+		INIT_WORK(&recv_pkt_info->work, vmci_transport_recv_pkt_work);
+
+		schedule_work(&recv_pkt_info->work);
+		/* Clear sk so that the reference count incremented by one of
+		 * the Find functions above is not decremented below.  We need
+		 * that reference count for the packet handler we've scheduled
+		 * to run.
+		 */
+		sk = NULL;
+	}
+
+out:
+	if (sk)
+		sock_put(sk);
+
+	return err;
+}
+
+static void vmci_transport_peer_attach_cb(u32 sub_id,
+					  const struct vmci_event_data *e_data,
+					  void *client_data)
+{
+	struct sock *sk = client_data;
+	const struct vmci_event_payload_qp *e_payload;
+	struct vsock_sock *vsk;
+
+	e_payload = vmci_event_data_const_payload(e_data);
+
+	vsk = vsock_sk(sk);
+
+	/* We don't ask for delayed CBs when we subscribe to this event (we
+	 * pass 0 as flags to vmci_event_subscribe()).  VMCI makes no
+	 * guarantees in that case about what context we might be running in,
+	 * so it could be BH or process, blockable or non-blockable.  So we
+	 * need to account for all possible contexts here.
+	 */
+	local_bh_disable();
+	bh_lock_sock(sk);
+
+	/* XXX This is lame, we should provide a way to lookup sockets by
+	 * qp_handle.
+	 */
+	if (vmci_handle_is_equal(vmci_trans(vsk)->qp_handle,
+				 e_payload->handle)) {
+		/* XXX This doesn't do anything, but in the future we may want
+		 * to set a flag here to verify the attach really did occur and
+		 * we weren't just sent a datagram claiming it was.
+		 */
+		goto out;
+	}
+
+out:
+	bh_unlock_sock(sk);
+	local_bh_enable();
+}
+
+static void vmci_transport_handle_detach(struct sock *sk)
+{
+	struct vsock_sock *vsk;
+
+	vsk = vsock_sk(sk);
+	if (!vmci_handle_is_invalid(vmci_trans(vsk)->qp_handle)) {
+		sock_set_flag(sk, SOCK_DONE);
+
+		/* On a detach the peer will not be sending or receiving
+		 * anymore.
+		 */
+		vsk->peer_shutdown = SHUTDOWN_MASK;
+
+		/* We should not be sending anymore since the peer won't be
+		 * there to receive, but we can still receive if there is data
+		 * left in our consume queue.
+		 */
+		if (vsock_stream_has_data(vsk) <= 0) {
+			if (sk->sk_state == SS_CONNECTING) {
+				/* The peer may detach from a queue pair while
+				 * we are still in the connecting state, i.e.,
+				 * if the peer VM is killed after attaching to
+				 * a queue pair, but before we complete the
+				 * handshake. In that case, we treat the detach
+				 * event like a reset.
+				 */
+
+				sk->sk_state = SS_UNCONNECTED;
+				sk->sk_err = ECONNRESET;
+				sk->sk_error_report(sk);
+				return;
+			}
+			sk->sk_state = SS_UNCONNECTED;
+		}
+		sk->sk_state_change(sk);
+	}
+}
+
+static void vmci_transport_peer_detach_cb(u32 sub_id,
+					  const struct vmci_event_data *e_data,
+					  void *client_data)
+{
+	struct sock *sk = client_data;
+	const struct vmci_event_payload_qp *e_payload;
+	struct vsock_sock *vsk;
+
+	e_payload = vmci_event_data_const_payload(e_data);
+	vsk = vsock_sk(sk);
+	if (vmci_handle_is_invalid(e_payload->handle))
+		return;
+
+	/* Same rules for locking as for peer_attach_cb(). */
+	local_bh_disable();
+	bh_lock_sock(sk);
+
+	/* XXX This is lame, we should provide a way to lookup sockets by
+	 * qp_handle.
+	 */
+	if (vmci_handle_is_equal(vmci_trans(vsk)->qp_handle,
+				 e_payload->handle))
+		vmci_transport_handle_detach(sk);
+
+	bh_unlock_sock(sk);
+	local_bh_enable();
+}
+
+static void vmci_transport_qp_resumed_cb(u32 sub_id,
+					 const struct vmci_event_data *e_data,
+					 void *client_data)
+{
+	vsock_for_each_connected_socket(vmci_transport_handle_detach);
+}
+
+static void vmci_transport_recv_pkt_work(struct work_struct *work)
+{
+	struct vmci_transport_recv_pkt_info *recv_pkt_info;
+	struct vmci_transport_packet *pkt;
+	struct sock *sk;
+
+	recv_pkt_info =
+		container_of(work, struct vmci_transport_recv_pkt_info, work);
+	sk = recv_pkt_info->sk;
+	pkt = &recv_pkt_info->pkt;
+
+	lock_sock(sk);
+
+	switch (sk->sk_state) {
+	case SS_LISTEN:
+		vmci_transport_recv_listen(sk, pkt);
+		break;
+	case SS_CONNECTING:
+		/* Processing of pending connections for servers goes through
+		 * the listening socket, so see vmci_transport_recv_listen()
+		 * for that path.
+		 */
+		vmci_transport_recv_connecting_client(sk, pkt);
+		break;
+	case SS_CONNECTED:
+		vmci_transport_recv_connected(sk, pkt);
+		break;
+	default:
+		/* Because this function does not run in the same context as
+		 * vmci_transport_recv_stream_cb it is possible that the
+		 * socket has closed. We need to let the other side know or it
+		 * could be sitting in a connect and hang forever. Send a
+		 * reset to prevent that.
+		 */
+		vmci_transport_send_reset(sk, pkt);
+		goto out;
+	}
+
+out:
+	release_sock(sk);
+	kfree(recv_pkt_info);
+	/* Release reference obtained in the stream callback when we fetched
+	 * this socket out of the bound or connected list.
+	 */
+	sock_put(sk);
+}
+
+static int vmci_transport_recv_listen(struct sock *sk,
+				      struct vmci_transport_packet *pkt)
+{
+	struct sock *pending;
+	struct vsock_sock *vpending;
+	int err;
+	u64 qp_size;
+	bool old_request = false;
+	bool old_pkt_proto = false;
+
+	err = 0;
+
+	/* Because we are in the listen state, we could be receiving a packet
+	 * for ourself or any previous connection requests that we received.
+	 * If it's the latter, we try to find a socket in our list of pending
+	 * connections and, if we do, call the appropriate handler for the
+	 * state that that socket is in.  Otherwise we try to service the
+	 * connection request.
+	 */
+	pending = vmci_transport_get_pending(sk, pkt);
+	if (pending) {
+		lock_sock(pending);
+		switch (pending->sk_state) {
+		case SS_CONNECTING:
+			err = vmci_transport_recv_connecting_server(sk,
+								    pending,
+								    pkt);
+			break;
+		default:
+			vmci_transport_send_reset(pending, pkt);
+			err = -EINVAL;
+		}
+
+		if (err < 0)
+			vsock_remove_pending(sk, pending);
+
+		release_sock(pending);
+		vmci_transport_release_pending(pending);
+
+		return err;
+	}
+
+	/* The listen state only accepts connection requests.  Reply with a
+	 * reset unless we received a reset.
+	 */
+
+	if (!(pkt->type == VMCI_TRANSPORT_PACKET_TYPE_REQUEST ||
+	      pkt->type == VMCI_TRANSPORT_PACKET_TYPE_REQUEST2)) {
+		vmci_transport_reply_reset(pkt);
+		return -EINVAL;
+	}
+
+	if (pkt->u.size == 0) {
+		vmci_transport_reply_reset(pkt);
+		return -EINVAL;
+	}
+
+	/* If this socket can't accommodate this connection request, we send a
+	 * reset.  Otherwise we create and initialize a child socket and reply
+	 * with a connection negotiation.
+	 */
+	if (sk->sk_ack_backlog >= sk->sk_max_ack_backlog) {
+		vmci_transport_reply_reset(pkt);
+		return -ECONNREFUSED;
+	}
+
+	pending = __vsock_create(sock_net(sk), NULL, sk, GFP_KERNEL,
+				 sk->sk_type);
+	if (!pending) {
+		vmci_transport_send_reset(sk, pkt);
+		return -ENOMEM;
+	}
+
+	vpending = vsock_sk(pending);
+
+	vsock_addr_init(&vpending->local_addr, pkt->dg.dst.context,
+			pkt->dst_port);
+	vsock_addr_init(&vpending->remote_addr, pkt->dg.src.context,
+			pkt->src_port);
+
+	/* If the proposed size fits within our min/max, accept it. Otherwise
+	 * propose our own size.
+	 */
+	if (pkt->u.size >= vmci_trans(vpending)->queue_pair_min_size &&
+	    pkt->u.size <= vmci_trans(vpending)->queue_pair_max_size) {
+		qp_size = pkt->u.size;
+	} else {
+		qp_size = vmci_trans(vpending)->queue_pair_size;
+	}
+
+	/* Figure out if we are using old or new requests based on the
+	 * overrides pkt types sent by our peer.
+	 */
+	if (vmci_transport_old_proto_override(&old_pkt_proto)) {
+		old_request = old_pkt_proto;
+	} else {
+		if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_REQUEST)
+			old_request = true;
+		else if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_REQUEST2)
+			old_request = false;
+
+	}
+
+	if (old_request) {
+		/* Handle a REQUEST (or override) */
+		u16 version = VSOCK_PROTO_INVALID;
+		if (vmci_transport_proto_to_notify_struct(
+			pending, &version, true))
+			err = vmci_transport_send_negotiate(pending, qp_size);
+		else
+			err = -EINVAL;
+
+	} else {
+		/* Handle a REQUEST2 (or override) */
+		int proto_int = pkt->proto;
+		int pos;
+		u16 active_proto_version = 0;
+
+		/* The list of possible protocols is the intersection of all
+		 * protocols the client supports ... plus all the protocols we
+		 * support.
+		 */
+		proto_int &= vmci_transport_new_proto_supported_versions();
+
+		/* We choose the highest possible protocol version and use that
+		 * one.
+		 */
+		pos = fls(proto_int);
+		if (pos) {
+			active_proto_version = (1 << (pos - 1));
+			if (vmci_transport_proto_to_notify_struct(
+				pending, &active_proto_version, false))
+				err = vmci_transport_send_negotiate2(pending,
+							qp_size,
+							active_proto_version);
+			else
+				err = -EINVAL;
+
+		} else {
+			err = -EINVAL;
+		}
+	}
+
+	if (err < 0) {
+		vmci_transport_send_reset(sk, pkt);
+		sock_put(pending);
+		err = vmci_transport_error_to_vsock_error(err);
+		goto out;
+	}
+
+	vsock_add_pending(sk, pending);
+	sk->sk_ack_backlog++;
+
+	pending->sk_state = SS_CONNECTING;
+	vmci_trans(vpending)->produce_size =
+		vmci_trans(vpending)->consume_size = qp_size;
+	vmci_trans(vpending)->queue_pair_size = qp_size;
+
+	vmci_trans(vpending)->notify_ops->process_request(pending);
+
+	/* We might never receive another message for this socket and it's not
+	 * connected to any process, so we have to ensure it gets cleaned up
+	 * ourself.  Our delayed work function will take care of that.  Note
+	 * that we do not ever cancel this function since we have few
+	 * guarantees about its state when calling cancel_delayed_work().
+	 * Instead we hold a reference on the socket for that function and make
+	 * it capable of handling cases where it needs to do nothing but
+	 * release that reference.
+	 */
+	vpending->listener = sk;
+	sock_hold(sk);
+	sock_hold(pending);
+	INIT_DELAYED_WORK(&vpending->dwork, vsock_pending_work);
+	schedule_delayed_work(&vpending->dwork, HZ);
+
+out:
+	return err;
+}
+
+static int
+vmci_transport_recv_connecting_server(struct sock *listener,
+				      struct sock *pending,
+				      struct vmci_transport_packet *pkt)
+{
+	struct vsock_sock *vpending;
+	struct vmci_handle handle;
+	struct vmci_qp *qpair;
+	bool is_local;
+	u32 flags;
+	u32 detach_sub_id;
+	int err;
+	int skerr;
+
+	vpending = vsock_sk(pending);
+	detach_sub_id = VMCI_INVALID_ID;
+
+	switch (pkt->type) {
+	case VMCI_TRANSPORT_PACKET_TYPE_OFFER:
+		if (vmci_handle_is_invalid(pkt->u.handle)) {
+			vmci_transport_send_reset(pending, pkt);
+			skerr = EPROTO;
+			err = -EINVAL;
+			goto destroy;
+		}
+		break;
+	default:
+		/* Close and cleanup the connection. */
+		vmci_transport_send_reset(pending, pkt);
+		skerr = EPROTO;
+		err = pkt->type == VMCI_TRANSPORT_PACKET_TYPE_RST ? 0 : -EINVAL;
+		goto destroy;
+	}
+
+	/* In order to complete the connection we need to attach to the offered
+	 * queue pair and send an attach notification.  We also subscribe to the
+	 * detach event so we know when our peer goes away, and we do that
+	 * before attaching so we don't miss an event.  If all this succeeds,
+	 * we update our state and wakeup anything waiting in accept() for a
+	 * connection.
+	 */
+
+	/* We don't care about attach since we ensure the other side has
+	 * attached by specifying the ATTACH_ONLY flag below.
+	 */
+	err = vmci_event_subscribe(VMCI_EVENT_QP_PEER_DETACH,
+				   vmci_transport_peer_detach_cb,
+				   pending, &detach_sub_id);
+	if (err < VMCI_SUCCESS) {
+		vmci_transport_send_reset(pending, pkt);
+		err = vmci_transport_error_to_vsock_error(err);
+		skerr = -err;
+		goto destroy;
+	}
+
+	vmci_trans(vpending)->detach_sub_id = detach_sub_id;
+
+	/* Now attach to the queue pair the client created. */
+	handle = pkt->u.handle;
+
+	/* vpending->local_addr always has a context id so we do not need to
+	 * worry about VMADDR_CID_ANY in this case.
+	 */
+	is_local =
+	    vpending->remote_addr.svm_cid == vpending->local_addr.svm_cid;
+	flags = VMCI_QPFLAG_ATTACH_ONLY;
+	flags |= is_local ? VMCI_QPFLAG_LOCAL : 0;
+
+	err = vmci_transport_queue_pair_alloc(
+					&qpair,
+					&handle,
+					vmci_trans(vpending)->produce_size,
+					vmci_trans(vpending)->consume_size,
+					pkt->dg.src.context,
+					flags,
+					vmci_transport_is_trusted(
+						vpending,
+						vpending->remote_addr.svm_cid));
+	if (err < 0) {
+		vmci_transport_send_reset(pending, pkt);
+		skerr = -err;
+		goto destroy;
+	}
+
+	vmci_trans(vpending)->qp_handle = handle;
+	vmci_trans(vpending)->qpair = qpair;
+
+	/* When we send the attach message, we must be ready to handle incoming
+	 * control messages on the newly connected socket. So we move the
+	 * pending socket to the connected state before sending the attach
+	 * message. Otherwise, an incoming packet triggered by the attach being
+	 * received by the peer may be processed concurrently with what happens
+	 * below after sending the attach message, and that incoming packet
+	 * will find the listening socket instead of the (currently) pending
+	 * socket. Note that enqueueing the socket increments the reference
+	 * count, so even if a reset comes before the connection is accepted,
+	 * the socket will be valid until it is removed from the queue.
+	 *
+	 * If we fail sending the attach below, we remove the socket from the
+	 * connected list and move the socket to SS_UNCONNECTED before
+	 * releasing the lock, so a pending slow path processing of an incoming
+	 * packet will not see the socket in the connected state in that case.
+	 */
+	pending->sk_state = SS_CONNECTED;
+
+	vsock_insert_connected(vpending);
+
+	/* Notify our peer of our attach. */
+	err = vmci_transport_send_attach(pending, handle);
+	if (err < 0) {
+		vsock_remove_connected(vpending);
+		pr_err("Could not send attach\n");
+		vmci_transport_send_reset(pending, pkt);
+		err = vmci_transport_error_to_vsock_error(err);
+		skerr = -err;
+		goto destroy;
+	}
+
+	/* We have a connection. Move the now connected socket from the
+	 * listener's pending list to the accept queue so callers of accept()
+	 * can find it.
+	 */
+	vsock_remove_pending(listener, pending);
+	vsock_enqueue_accept(listener, pending);
+
+	/* Callers of accept() will be be waiting on the listening socket, not
+	 * the pending socket.
+	 */
+	listener->sk_state_change(listener);
+
+	return 0;
+
+destroy:
+	pending->sk_err = skerr;
+	pending->sk_state = SS_UNCONNECTED;
+	/* As long as we drop our reference, all necessary cleanup will handle
+	 * when the cleanup function drops its reference and our destruct
+	 * implementation is called.  Note that since the listen handler will
+	 * remove pending from the pending list upon our failure, the cleanup
+	 * function won't drop the additional reference, which is why we do it
+	 * here.
+	 */
+	sock_put(pending);
+
+	return err;
+}
+
+static int
+vmci_transport_recv_connecting_client(struct sock *sk,
+				      struct vmci_transport_packet *pkt)
+{
+	struct vsock_sock *vsk;
+	int err;
+	int skerr;
+
+	vsk = vsock_sk(sk);
+
+	switch (pkt->type) {
+	case VMCI_TRANSPORT_PACKET_TYPE_ATTACH:
+		if (vmci_handle_is_invalid(pkt->u.handle) ||
+		    !vmci_handle_is_equal(pkt->u.handle,
+					  vmci_trans(vsk)->qp_handle)) {
+			skerr = EPROTO;
+			err = -EINVAL;
+			goto destroy;
+		}
+
+		/* Signify the socket is connected and wakeup the waiter in
+		 * connect(). Also place the socket in the connected table for
+		 * accounting (it can already be found since it's in the bound
+		 * table).
+		 */
+		sk->sk_state = SS_CONNECTED;
+		sk->sk_socket->state = SS_CONNECTED;
+		vsock_insert_connected(vsk);
+		sk->sk_state_change(sk);
+
+		break;
+	case VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE:
+	case VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE2:
+		if (pkt->u.size == 0
+		    || pkt->dg.src.context != vsk->remote_addr.svm_cid
+		    || pkt->src_port != vsk->remote_addr.svm_port
+		    || !vmci_handle_is_invalid(vmci_trans(vsk)->qp_handle)
+		    || vmci_trans(vsk)->qpair
+		    || vmci_trans(vsk)->produce_size != 0
+		    || vmci_trans(vsk)->consume_size != 0
+		    || vmci_trans(vsk)->attach_sub_id != VMCI_INVALID_ID
+		    || vmci_trans(vsk)->detach_sub_id != VMCI_INVALID_ID) {
+			skerr = EPROTO;
+			err = -EINVAL;
+
+			goto destroy;
+		}
+
+		err = vmci_transport_recv_connecting_client_negotiate(sk, pkt);
+		if (err) {
+			skerr = -err;
+			goto destroy;
+		}
+
+		break;
+	case VMCI_TRANSPORT_PACKET_TYPE_INVALID:
+		err = vmci_transport_recv_connecting_client_invalid(sk, pkt);
+		if (err) {
+			skerr = -err;
+			goto destroy;
+		}
+
+		break;
+	case VMCI_TRANSPORT_PACKET_TYPE_RST:
+		/* Older versions of the linux code (WS 6.5 / ESX 4.0) used to
+		 * continue processing here after they sent an INVALID packet.
+		 * This meant that we got a RST after the INVALID. We ignore a
+		 * RST after an INVALID. The common code doesn't send the RST
+		 * ... so we can hang if an old version of the common code
+		 * fails between getting a REQUEST and sending an OFFER back.
+		 * Not much we can do about it... except hope that it doesn't
+		 * happen.
+		 */
+		if (vsk->ignore_connecting_rst) {
+			vsk->ignore_connecting_rst = false;
+		} else {
+			skerr = ECONNRESET;
+			err = 0;
+			goto destroy;
+		}
+
+		break;
+	default:
+		/* Close and cleanup the connection. */
+		skerr = EPROTO;
+		err = -EINVAL;
+		goto destroy;
+	}
+
+	return 0;
+
+destroy:
+	vmci_transport_send_reset(sk, pkt);
+
+	sk->sk_state = SS_UNCONNECTED;
+	sk->sk_err = skerr;
+	sk->sk_error_report(sk);
+	return err;
+}
+
+static int vmci_transport_recv_connecting_client_negotiate(
+					struct sock *sk,
+					struct vmci_transport_packet *pkt)
+{
+	int err;
+	struct vsock_sock *vsk;
+	struct vmci_handle handle;
+	struct vmci_qp *qpair;
+	u32 attach_sub_id;
+	u32 detach_sub_id;
+	bool is_local;
+	u32 flags;
+	bool old_proto = true;
+	bool old_pkt_proto;
+	u16 version;
+
+	vsk = vsock_sk(sk);
+	handle = VMCI_INVALID_HANDLE;
+	attach_sub_id = VMCI_INVALID_ID;
+	detach_sub_id = VMCI_INVALID_ID;
+
+	/* If we have gotten here then we should be past the point where old
+	 * linux vsock could have sent the bogus rst.
+	 */
+	vsk->sent_request = false;
+	vsk->ignore_connecting_rst = false;
+
+	/* Verify that we're OK with the proposed queue pair size */
+	if (pkt->u.size < vmci_trans(vsk)->queue_pair_min_size ||
+	    pkt->u.size > vmci_trans(vsk)->queue_pair_max_size) {
+		err = -EINVAL;
+		goto destroy;
+	}
+
+	/* At this point we know the CID the peer is using to talk to us. */
+
+	if (vsk->local_addr.svm_cid == VMADDR_CID_ANY)
+		vsk->local_addr.svm_cid = pkt->dg.dst.context;
+
+	/* Setup the notify ops to be the highest supported version that both
+	 * the server and the client support.
+	 */
+
+	if (vmci_transport_old_proto_override(&old_pkt_proto)) {
+		old_proto = old_pkt_proto;
+	} else {
+		if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE)
+			old_proto = true;
+		else if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE2)
+			old_proto = false;
+
+	}
+
+	if (old_proto)
+		version = VSOCK_PROTO_INVALID;
+	else
+		version = pkt->proto;
+
+	if (!vmci_transport_proto_to_notify_struct(sk, &version, old_proto)) {
+		err = -EINVAL;
+		goto destroy;
+	}
+
+	/* Subscribe to attach and detach events first.
+	 *
+	 * XXX We attach once for each queue pair created for now so it is easy
+	 * to find the socket (it's provided), but later we should only
+	 * subscribe once and add a way to lookup sockets by queue pair handle.
+	 */
+	err = vmci_event_subscribe(VMCI_EVENT_QP_PEER_ATTACH,
+				   vmci_transport_peer_attach_cb,
+				   sk, &attach_sub_id);
+	if (err < VMCI_SUCCESS) {
+		err = vmci_transport_error_to_vsock_error(err);
+		goto destroy;
+	}
+
+	err = vmci_event_subscribe(VMCI_EVENT_QP_PEER_DETACH,
+				   vmci_transport_peer_detach_cb,
+				   sk, &detach_sub_id);
+	if (err < VMCI_SUCCESS) {
+		err = vmci_transport_error_to_vsock_error(err);
+		goto destroy;
+	}
+
+	/* Make VMCI select the handle for us. */
+	handle = VMCI_INVALID_HANDLE;
+	is_local = vsk->remote_addr.svm_cid == vsk->local_addr.svm_cid;
+	flags = is_local ? VMCI_QPFLAG_LOCAL : 0;
+
+	err = vmci_transport_queue_pair_alloc(&qpair,
+					      &handle,
+					      pkt->u.size,
+					      pkt->u.size,
+					      vsk->remote_addr.svm_cid,
+					      flags,
+					      vmci_transport_is_trusted(
+						  vsk,
+						  vsk->
+						  remote_addr.svm_cid));
+	if (err < 0)
+		goto destroy;
+
+	err = vmci_transport_send_qp_offer(sk, handle);
+	if (err < 0) {
+		err = vmci_transport_error_to_vsock_error(err);
+		goto destroy;
+	}
+
+	vmci_trans(vsk)->qp_handle = handle;
+	vmci_trans(vsk)->qpair = qpair;
+
+	vmci_trans(vsk)->produce_size = vmci_trans(vsk)->consume_size =
+		pkt->u.size;
+
+	vmci_trans(vsk)->attach_sub_id = attach_sub_id;
+	vmci_trans(vsk)->detach_sub_id = detach_sub_id;
+
+	vmci_trans(vsk)->notify_ops->process_negotiate(sk);
+
+	return 0;
+
+destroy:
+	if (attach_sub_id != VMCI_INVALID_ID)
+		vmci_event_unsubscribe(attach_sub_id);
+
+	if (detach_sub_id != VMCI_INVALID_ID)
+		vmci_event_unsubscribe(detach_sub_id);
+
+	if (!vmci_handle_is_invalid(handle))
+		vmci_qpair_detach(&qpair);
+
+	return err;
+}
+
+static int
+vmci_transport_recv_connecting_client_invalid(struct sock *sk,
+					      struct vmci_transport_packet *pkt)
+{
+	int err = 0;
+	struct vsock_sock *vsk = vsock_sk(sk);
+
+	if (vsk->sent_request) {
+		vsk->sent_request = false;
+		vsk->ignore_connecting_rst = true;
+
+		err = vmci_transport_send_conn_request(
+			sk, vmci_trans(vsk)->queue_pair_size);
+		if (err < 0)
+			err = vmci_transport_error_to_vsock_error(err);
+		else
+			err = 0;
+
+	}
+
+	return err;
+}
+
+static int vmci_transport_recv_connected(struct sock *sk,
+					 struct vmci_transport_packet *pkt)
+{
+	struct vsock_sock *vsk;
+	bool pkt_processed = false;
+
+	/* In cases where we are closing the connection, it's sufficient to
+	 * mark the state change (and maybe error) and wake up any waiting
+	 * threads. Since this is a connected socket, it's owned by a user
+	 * process and will be cleaned up when the failure is passed back on
+	 * the current or next system call.  Our system call implementations
+	 * must therefore check for error and state changes on entry and when
+	 * being awoken.
+	 */
+	switch (pkt->type) {
+	case VMCI_TRANSPORT_PACKET_TYPE_SHUTDOWN:
+		if (pkt->u.mode) {
+			vsk = vsock_sk(sk);
+
+			vsk->peer_shutdown |= pkt->u.mode;
+			sk->sk_state_change(sk);
+		}
+		break;
+
+	case VMCI_TRANSPORT_PACKET_TYPE_RST:
+		vsk = vsock_sk(sk);
+		/* It is possible that we sent our peer a message (e.g a
+		 * WAITING_READ) right before we got notified that the peer had
+		 * detached. If that happens then we can get a RST pkt back
+		 * from our peer even though there is data available for us to
+		 * read. In that case, don't shutdown the socket completely but
+		 * instead allow the local client to finish reading data off
+		 * the queuepair. Always treat a RST pkt in connected mode like
+		 * a clean shutdown.
+		 */
+		sock_set_flag(sk, SOCK_DONE);
+		vsk->peer_shutdown = SHUTDOWN_MASK;
+		if (vsock_stream_has_data(vsk) <= 0)
+			sk->sk_state = SS_DISCONNECTING;
+
+		sk->sk_state_change(sk);
+		break;
+
+	default:
+		vsk = vsock_sk(sk);
+		vmci_trans(vsk)->notify_ops->handle_notify_pkt(
+				sk, pkt, false, NULL, NULL,
+				&pkt_processed);
+		if (!pkt_processed)
+			return -EINVAL;
+
+		break;
+	}
+
+	return 0;
+}
+
+static int vmci_transport_socket_init(struct vsock_sock *vsk,
+				      struct vsock_sock *psk)
+{
+	vsk->trans = kmalloc(sizeof(struct vmci_transport), GFP_KERNEL);
+	if (!vsk->trans)
+		return -ENOMEM;
+
+	vmci_trans(vsk)->dg_handle = VMCI_INVALID_HANDLE;
+	vmci_trans(vsk)->qp_handle = VMCI_INVALID_HANDLE;
+	vmci_trans(vsk)->qpair = NULL;
+	vmci_trans(vsk)->produce_size = vmci_trans(vsk)->consume_size = 0;
+	vmci_trans(vsk)->attach_sub_id = vmci_trans(vsk)->detach_sub_id =
+		VMCI_INVALID_ID;
+	vmci_trans(vsk)->notify_ops = NULL;
+	if (psk) {
+		vmci_trans(vsk)->queue_pair_size =
+			vmci_trans(psk)->queue_pair_size;
+		vmci_trans(vsk)->queue_pair_min_size =
+			vmci_trans(psk)->queue_pair_min_size;
+		vmci_trans(vsk)->queue_pair_max_size =
+			vmci_trans(psk)->queue_pair_max_size;
+	} else {
+		vmci_trans(vsk)->queue_pair_size =
+			VMCI_TRANSPORT_DEFAULT_QP_SIZE;
+		vmci_trans(vsk)->queue_pair_min_size =
+			 VMCI_TRANSPORT_DEFAULT_QP_SIZE_MIN;
+		vmci_trans(vsk)->queue_pair_max_size =
+			VMCI_TRANSPORT_DEFAULT_QP_SIZE_MAX;
+	}
+
+	return 0;
+}
+
+static void vmci_transport_destruct(struct vsock_sock *vsk)
+{
+	if (vmci_trans(vsk)->attach_sub_id != VMCI_INVALID_ID) {
+		vmci_event_unsubscribe(vmci_trans(vsk)->attach_sub_id);
+		vmci_trans(vsk)->attach_sub_id = VMCI_INVALID_ID;
+	}
+
+	if (vmci_trans(vsk)->detach_sub_id != VMCI_INVALID_ID) {
+		vmci_event_unsubscribe(vmci_trans(vsk)->detach_sub_id);
+		vmci_trans(vsk)->detach_sub_id = VMCI_INVALID_ID;
+	}
+
+	if (!vmci_handle_is_invalid(vmci_trans(vsk)->qp_handle)) {
+		vmci_qpair_detach(&vmci_trans(vsk)->qpair);
+		vmci_trans(vsk)->qp_handle = VMCI_INVALID_HANDLE;
+		vmci_trans(vsk)->produce_size = 0;
+		vmci_trans(vsk)->consume_size = 0;
+	}
+
+	if (vmci_trans(vsk)->notify_ops)
+		vmci_trans(vsk)->notify_ops->socket_destruct(vsk);
+
+	kfree(vsk->trans);
+	vsk->trans = NULL;
+}
+
+static void vmci_transport_release(struct vsock_sock *vsk)
+{
+	if (!vmci_handle_is_invalid(vmci_trans(vsk)->dg_handle)) {
+		vmci_datagram_destroy_handle(vmci_trans(vsk)->dg_handle);
+		vmci_trans(vsk)->dg_handle = VMCI_INVALID_HANDLE;
+	}
+}
+
+static int vmci_transport_dgram_bind(struct vsock_sock *vsk,
+				     struct sockaddr_vm *addr)
+{
+	u32 port;
+	u32 flags;
+	int err;
+
+	/* VMCI will select a resource ID for us if we provide
+	 * VMCI_INVALID_ID.
+	 */
+	port = addr->svm_port == VMADDR_PORT_ANY ?
+			VMCI_INVALID_ID : addr->svm_port;
+
+	if (port <= LAST_RESERVED_PORT && !capable(CAP_NET_BIND_SERVICE))
+		return -EACCES;
+
+	flags = addr->svm_cid == VMADDR_CID_ANY ?
+				VMCI_FLAG_ANYCID_DG_HND : 0;
+
+	err = vmci_transport_datagram_create_hnd(port, flags,
+						 vmci_transport_recv_dgram_cb,
+						 &vsk->sk,
+						 &vmci_trans(vsk)->dg_handle);
+	if (err < VMCI_SUCCESS)
+		return vmci_transport_error_to_vsock_error(err);
+	vsock_addr_init(&vsk->local_addr, addr->svm_cid,
+			vmci_trans(vsk)->dg_handle.resource);
+
+	return 0;
+}
+
+static int vmci_transport_dgram_enqueue(
+	struct vsock_sock *vsk,
+	struct sockaddr_vm *remote_addr,
+	struct iovec *iov,
+	size_t len)
+{
+	int err;
+	struct vmci_datagram *dg;
+
+	if (len > VMCI_MAX_DG_PAYLOAD_SIZE)
+		return -EMSGSIZE;
+
+	if (!vmci_transport_allow_dgram(vsk, remote_addr->svm_cid))
+		return -EPERM;
+
+	/* Allocate a buffer for the user's message and our packet header. */
+	dg = kmalloc(len + sizeof(*dg), GFP_KERNEL);
+	if (!dg)
+		return -ENOMEM;
+
+	memcpy_fromiovec(VMCI_DG_PAYLOAD(dg), iov, len);
+
+	dg->dst = vmci_make_handle(remote_addr->svm_cid,
+				   remote_addr->svm_port);
+	dg->src = vmci_make_handle(vsk->local_addr.svm_cid,
+				   vsk->local_addr.svm_port);
+	dg->payload_size = len;
+
+	err = vmci_datagram_send(dg);
+	kfree(dg);
+	if (err < 0)
+		return vmci_transport_error_to_vsock_error(err);
+
+	return err - sizeof(*dg);
+}
+
+static int vmci_transport_dgram_dequeue(struct kiocb *kiocb,
+					struct vsock_sock *vsk,
+					struct msghdr *msg, size_t len,
+					int flags)
+{
+	int err;
+	int noblock;
+	struct vmci_datagram *dg;
+	size_t payload_len;
+	struct sk_buff *skb;
+
+	noblock = flags & MSG_DONTWAIT;
+
+	if (flags & MSG_OOB || flags & MSG_ERRQUEUE)
+		return -EOPNOTSUPP;
+
+	/* Retrieve the head sk_buff from the socket's receive queue. */
+	err = 0;
+	skb = skb_recv_datagram(&vsk->sk, flags, noblock, &err);
+	if (err)
+		return err;
+
+	if (!skb)
+		return -EAGAIN;
+
+	dg = (struct vmci_datagram *)skb->data;
+	if (!dg)
+		/* err is 0, meaning we read zero bytes. */
+		goto out;
+
+	payload_len = dg->payload_size;
+	/* Ensure the sk_buff matches the payload size claimed in the packet. */
+	if (payload_len != skb->len - sizeof(*dg)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	if (payload_len > len) {
+		payload_len = len;
+		msg->msg_flags |= MSG_TRUNC;
+	}
+
+	/* Place the datagram payload in the user's iovec. */
+	err = skb_copy_datagram_iovec(skb, sizeof(*dg), msg->msg_iov,
+		payload_len);
+	if (err)
+		goto out;
+
+	msg->msg_namelen = 0;
+	if (msg->msg_name) {
+		struct sockaddr_vm *vm_addr;
+
+		/* Provide the address of the sender. */
+		vm_addr = (struct sockaddr_vm *)msg->msg_name;
+		vsock_addr_init(vm_addr, dg->src.context, dg->src.resource);
+		msg->msg_namelen = sizeof(*vm_addr);
+	}
+	err = payload_len;
+
+out:
+	skb_free_datagram(&vsk->sk, skb);
+	return err;
+}
+
+static bool vmci_transport_dgram_allow(u32 cid, u32 port)
+{
+	if (cid == VMADDR_CID_HYPERVISOR) {
+		/* Registrations of PBRPC Servers do not modify VMX/Hypervisor
+		 * state and are allowed.
+		 */
+		return port == VMCI_UNITY_PBRPC_REGISTER;
+	}
+
+	return true;
+}
+
+static int vmci_transport_connect(struct vsock_sock *vsk)
+{
+	int err;
+	bool old_pkt_proto = false;
+	struct sock *sk = &vsk->sk;
+
+	if (vmci_transport_old_proto_override(&old_pkt_proto) &&
+		old_pkt_proto) {
+		err = vmci_transport_send_conn_request(
+			sk, vmci_trans(vsk)->queue_pair_size);
+		if (err < 0) {
+			sk->sk_state = SS_UNCONNECTED;
+			return err;
+		}
+	} else {
+		int supported_proto_versions =
+			vmci_transport_new_proto_supported_versions();
+		err = vmci_transport_send_conn_request2(
+				sk, vmci_trans(vsk)->queue_pair_size,
+				supported_proto_versions);
+		if (err < 0) {
+			sk->sk_state = SS_UNCONNECTED;
+			return err;
+		}
+
+		vsk->sent_request = true;
+	}
+
+	return err;
+}
+
+static ssize_t vmci_transport_stream_dequeue(
+	struct vsock_sock *vsk,
+	struct iovec *iov,
+	size_t len,
+	int flags)
+{
+	if (flags & MSG_PEEK)
+		return vmci_qpair_peekv(vmci_trans(vsk)->qpair, iov, len, 0);
+	else
+		return vmci_qpair_dequev(vmci_trans(vsk)->qpair, iov, len, 0);
+}
+
+static ssize_t vmci_transport_stream_enqueue(
+	struct vsock_sock *vsk,
+	struct iovec *iov,
+	size_t len)
+{
+	return vmci_qpair_enquev(vmci_trans(vsk)->qpair, iov, len, 0);
+}
+
+static s64 vmci_transport_stream_has_data(struct vsock_sock *vsk)
+{
+	return vmci_qpair_consume_buf_ready(vmci_trans(vsk)->qpair);
+}
+
+static s64 vmci_transport_stream_has_space(struct vsock_sock *vsk)
+{
+	return vmci_qpair_produce_free_space(vmci_trans(vsk)->qpair);
+}
+
+static u64 vmci_transport_stream_rcvhiwat(struct vsock_sock *vsk)
+{
+	return vmci_trans(vsk)->consume_size;
+}
+
+static bool vmci_transport_stream_is_active(struct vsock_sock *vsk)
+{
+	return !vmci_handle_is_invalid(vmci_trans(vsk)->qp_handle);
+}
+
+static u64 vmci_transport_get_buffer_size(struct vsock_sock *vsk)
+{
+	return vmci_trans(vsk)->queue_pair_size;
+}
+
+static u64 vmci_transport_get_min_buffer_size(struct vsock_sock *vsk)
+{
+	return vmci_trans(vsk)->queue_pair_min_size;
+}
+
+static u64 vmci_transport_get_max_buffer_size(struct vsock_sock *vsk)
+{
+	return vmci_trans(vsk)->queue_pair_max_size;
+}
+
+static void vmci_transport_set_buffer_size(struct vsock_sock *vsk, u64 val)
+{
+	if (val < vmci_trans(vsk)->queue_pair_min_size)
+		vmci_trans(vsk)->queue_pair_min_size = val;
+	if (val > vmci_trans(vsk)->queue_pair_max_size)
+		vmci_trans(vsk)->queue_pair_max_size = val;
+	vmci_trans(vsk)->queue_pair_size = val;
+}
+
+static void vmci_transport_set_min_buffer_size(struct vsock_sock *vsk,
+					       u64 val)
+{
+	if (val > vmci_trans(vsk)->queue_pair_size)
+		vmci_trans(vsk)->queue_pair_size = val;
+	vmci_trans(vsk)->queue_pair_min_size = val;
+}
+
+static void vmci_transport_set_max_buffer_size(struct vsock_sock *vsk,
+					       u64 val)
+{
+	if (val < vmci_trans(vsk)->queue_pair_size)
+		vmci_trans(vsk)->queue_pair_size = val;
+	vmci_trans(vsk)->queue_pair_max_size = val;
+}
+
+static int vmci_transport_notify_poll_in(
+	struct vsock_sock *vsk,
+	size_t target,
+	bool *data_ready_now)
+{
+	return vmci_trans(vsk)->notify_ops->poll_in(
+			&vsk->sk, target, data_ready_now);
+}
+
+static int vmci_transport_notify_poll_out(
+	struct vsock_sock *vsk,
+	size_t target,
+	bool *space_available_now)
+{
+	return vmci_trans(vsk)->notify_ops->poll_out(
+			&vsk->sk, target, space_available_now);
+}
+
+static int vmci_transport_notify_recv_init(
+	struct vsock_sock *vsk,
+	size_t target,
+	struct vsock_transport_recv_notify_data *data)
+{
+	return vmci_trans(vsk)->notify_ops->recv_init(
+			&vsk->sk, target,
+			(struct vmci_transport_recv_notify_data *)data);
+}
+
+static int vmci_transport_notify_recv_pre_block(
+	struct vsock_sock *vsk,
+	size_t target,
+	struct vsock_transport_recv_notify_data *data)
+{
+	return vmci_trans(vsk)->notify_ops->recv_pre_block(
+			&vsk->sk, target,
+			(struct vmci_transport_recv_notify_data *)data);
+}
+
+static int vmci_transport_notify_recv_pre_dequeue(
+	struct vsock_sock *vsk,
+	size_t target,
+	struct vsock_transport_recv_notify_data *data)
+{
+	return vmci_trans(vsk)->notify_ops->recv_pre_dequeue(
+			&vsk->sk, target,
+			(struct vmci_transport_recv_notify_data *)data);
+}
+
+static int vmci_transport_notify_recv_post_dequeue(
+	struct vsock_sock *vsk,
+	size_t target,
+	ssize_t copied,
+	bool data_read,
+	struct vsock_transport_recv_notify_data *data)
+{
+	return vmci_trans(vsk)->notify_ops->recv_post_dequeue(
+			&vsk->sk, target, copied, data_read,
+			(struct vmci_transport_recv_notify_data *)data);
+}
+
+static int vmci_transport_notify_send_init(
+	struct vsock_sock *vsk,
+	struct vsock_transport_send_notify_data *data)
+{
+	return vmci_trans(vsk)->notify_ops->send_init(
+			&vsk->sk,
+			(struct vmci_transport_send_notify_data *)data);
+}
+
+static int vmci_transport_notify_send_pre_block(
+	struct vsock_sock *vsk,
+	struct vsock_transport_send_notify_data *data)
+{
+	return vmci_trans(vsk)->notify_ops->send_pre_block(
+			&vsk->sk,
+			(struct vmci_transport_send_notify_data *)data);
+}
+
+static int vmci_transport_notify_send_pre_enqueue(
+	struct vsock_sock *vsk,
+	struct vsock_transport_send_notify_data *data)
+{
+	return vmci_trans(vsk)->notify_ops->send_pre_enqueue(
+			&vsk->sk,
+			(struct vmci_transport_send_notify_data *)data);
+}
+
+static int vmci_transport_notify_send_post_enqueue(
+	struct vsock_sock *vsk,
+	ssize_t written,
+	struct vsock_transport_send_notify_data *data)
+{
+	return vmci_trans(vsk)->notify_ops->send_post_enqueue(
+			&vsk->sk, written,
+			(struct vmci_transport_send_notify_data *)data);
+}
+
+static bool vmci_transport_old_proto_override(bool *old_pkt_proto)
+{
+	if (PROTOCOL_OVERRIDE != -1) {
+		if (PROTOCOL_OVERRIDE == 0)
+			*old_pkt_proto = true;
+		else
+			*old_pkt_proto = false;
+
+		pr_info("Proto override in use\n");
+		return true;
+	}
+
+	return false;
+}
+
+static bool vmci_transport_proto_to_notify_struct(struct sock *sk,
+						  u16 *proto,
+						  bool old_pkt_proto)
+{
+	struct vsock_sock *vsk = vsock_sk(sk);
+
+	if (old_pkt_proto) {
+		if (*proto != VSOCK_PROTO_INVALID) {
+			pr_err("Can't set both an old and new protocol\n");
+			return false;
+		}
+		vmci_trans(vsk)->notify_ops = &vmci_transport_notify_pkt_ops;
+		goto exit;
+	}
+
+	switch (*proto) {
+	case VSOCK_PROTO_PKT_ON_NOTIFY:
+		vmci_trans(vsk)->notify_ops =
+			&vmci_transport_notify_pkt_q_state_ops;
+		break;
+	default:
+		pr_err("Unknown notify protocol version\n");
+		return false;
+	}
+
+exit:
+	vmci_trans(vsk)->notify_ops->socket_init(sk);
+	return true;
+}
+
+static u16 vmci_transport_new_proto_supported_versions(void)
+{
+	if (PROTOCOL_OVERRIDE != -1)
+		return PROTOCOL_OVERRIDE;
+
+	return VSOCK_PROTO_ALL_SUPPORTED;
+}
+
+static u32 vmci_transport_get_local_cid(void)
+{
+	return vmci_get_context_id();
+}
+
+static struct vsock_transport vmci_transport = {
+	.init = vmci_transport_socket_init,
+	.destruct = vmci_transport_destruct,
+	.release = vmci_transport_release,
+	.connect = vmci_transport_connect,
+	.dgram_bind = vmci_transport_dgram_bind,
+	.dgram_dequeue = vmci_transport_dgram_dequeue,
+	.dgram_enqueue = vmci_transport_dgram_enqueue,
+	.dgram_allow = vmci_transport_dgram_allow,
+	.stream_dequeue = vmci_transport_stream_dequeue,
+	.stream_enqueue = vmci_transport_stream_enqueue,
+	.stream_has_data = vmci_transport_stream_has_data,
+	.stream_has_space = vmci_transport_stream_has_space,
+	.stream_rcvhiwat = vmci_transport_stream_rcvhiwat,
+	.stream_is_active = vmci_transport_stream_is_active,
+	.stream_allow = vmci_transport_stream_allow,
+	.notify_poll_in = vmci_transport_notify_poll_in,
+	.notify_poll_out = vmci_transport_notify_poll_out,
+	.notify_recv_init = vmci_transport_notify_recv_init,
+	.notify_recv_pre_block = vmci_transport_notify_recv_pre_block,
+	.notify_recv_pre_dequeue = vmci_transport_notify_recv_pre_dequeue,
+	.notify_recv_post_dequeue = vmci_transport_notify_recv_post_dequeue,
+	.notify_send_init = vmci_transport_notify_send_init,
+	.notify_send_pre_block = vmci_transport_notify_send_pre_block,
+	.notify_send_pre_enqueue = vmci_transport_notify_send_pre_enqueue,
+	.notify_send_post_enqueue = vmci_transport_notify_send_post_enqueue,
+	.shutdown = vmci_transport_shutdown,
+	.set_buffer_size = vmci_transport_set_buffer_size,
+	.set_min_buffer_size = vmci_transport_set_min_buffer_size,
+	.set_max_buffer_size = vmci_transport_set_max_buffer_size,
+	.get_buffer_size = vmci_transport_get_buffer_size,
+	.get_min_buffer_size = vmci_transport_get_min_buffer_size,
+	.get_max_buffer_size = vmci_transport_get_max_buffer_size,
+	.get_local_cid = vmci_transport_get_local_cid,
+};
+
+static int __init vmci_transport_init(void)
+{
+	int err;
+
+	/* Create the datagram handle that we will use to send and receive all
+	 * VSocket control messages for this context.
+	 */
+	err = vmci_transport_datagram_create_hnd(VMCI_TRANSPORT_PACKET_RID,
+						 VMCI_FLAG_ANYCID_DG_HND,
+						 vmci_transport_recv_stream_cb,
+						 NULL,
+						 &vmci_transport_stream_handle);
+	if (err < VMCI_SUCCESS) {
+		pr_err("Unable to create datagram handle. (%d)\n", err);
+		return vmci_transport_error_to_vsock_error(err);
+	}
+
+	err = vmci_event_subscribe(VMCI_EVENT_QP_RESUMED,
+				   vmci_transport_qp_resumed_cb,
+				   NULL, &vmci_transport_qp_resumed_sub_id);
+	if (err < VMCI_SUCCESS) {
+		pr_err("Unable to subscribe to resumed event. (%d)\n", err);
+		err = vmci_transport_error_to_vsock_error(err);
+		vmci_transport_qp_resumed_sub_id = VMCI_INVALID_ID;
+		goto err_destroy_stream_handle;
+	}
+
+	err = vsock_core_init(&vmci_transport);
+	if (err < 0)
+		goto err_unsubscribe;
+
+	return 0;
+
+err_unsubscribe:
+	vmci_event_unsubscribe(vmci_transport_qp_resumed_sub_id);
+err_destroy_stream_handle:
+	vmci_datagram_destroy_handle(vmci_transport_stream_handle);
+	return err;
+}
+module_init(vmci_transport_init);
+
+static void __exit vmci_transport_exit(void)
+{
+	if (!vmci_handle_is_invalid(vmci_transport_stream_handle)) {
+		if (vmci_datagram_destroy_handle(
+			vmci_transport_stream_handle) != VMCI_SUCCESS)
+			pr_err("Couldn't destroy datagram handle\n");
+		vmci_transport_stream_handle = VMCI_INVALID_HANDLE;
+	}
+
+	if (vmci_transport_qp_resumed_sub_id != VMCI_INVALID_ID) {
+		vmci_event_unsubscribe(vmci_transport_qp_resumed_sub_id);
+		vmci_transport_qp_resumed_sub_id = VMCI_INVALID_ID;
+	}
+
+	vsock_core_exit();
+}
+module_exit(vmci_transport_exit);
+
+MODULE_AUTHOR("VMware, Inc.");
+MODULE_DESCRIPTION("VMCI transport for Virtual Sockets");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("vmware_vsock");
+MODULE_ALIAS_NETPROTO(PF_VSOCK);
diff --git a/net/vmw_vsock/vmci_transport.h b/net/vmw_vsock/vmci_transport.h
new file mode 100644
index 000000000000..1bf991803ec0
--- /dev/null
+++ b/net/vmw_vsock/vmci_transport.h
@@ -0,0 +1,139 @@
+/*
+ * VMware vSockets Driver
+ *
+ * Copyright (C) 2013 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _VMCI_TRANSPORT_H_
+#define _VMCI_TRANSPORT_H_
+
+#include <linux/vmw_vmci_defs.h>
+#include <linux/vmw_vmci_api.h>
+
+#include "vsock_addr.h"
+#include "af_vsock.h"
+
+/* If the packet format changes in a release then this should change too. */
+#define VMCI_TRANSPORT_PACKET_VERSION 1
+
+/* The resource ID on which control packets are sent. */
+#define VMCI_TRANSPORT_PACKET_RID 1
+
+#define VSOCK_PROTO_INVALID        0
+#define VSOCK_PROTO_PKT_ON_NOTIFY (1 << 0)
+#define VSOCK_PROTO_ALL_SUPPORTED (VSOCK_PROTO_PKT_ON_NOTIFY)
+
+#define vmci_trans(_vsk) ((struct vmci_transport *)((_vsk)->trans))
+
+enum vmci_transport_packet_type {
+	VMCI_TRANSPORT_PACKET_TYPE_INVALID = 0,
+	VMCI_TRANSPORT_PACKET_TYPE_REQUEST,
+	VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE,
+	VMCI_TRANSPORT_PACKET_TYPE_OFFER,
+	VMCI_TRANSPORT_PACKET_TYPE_ATTACH,
+	VMCI_TRANSPORT_PACKET_TYPE_WROTE,
+	VMCI_TRANSPORT_PACKET_TYPE_READ,
+	VMCI_TRANSPORT_PACKET_TYPE_RST,
+	VMCI_TRANSPORT_PACKET_TYPE_SHUTDOWN,
+	VMCI_TRANSPORT_PACKET_TYPE_WAITING_WRITE,
+	VMCI_TRANSPORT_PACKET_TYPE_WAITING_READ,
+	VMCI_TRANSPORT_PACKET_TYPE_REQUEST2,
+	VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE2,
+	VMCI_TRANSPORT_PACKET_TYPE_MAX
+};
+
+struct vmci_transport_waiting_info {
+	u64 generation;
+	u64 offset;
+};
+
+/* Control packet type for STREAM sockets.  DGRAMs have no control packets nor
+ * special packet header for data packets, they are just raw VMCI DGRAM
+ * messages.  For STREAMs, control packets are sent over the control channel
+ * while data is written and read directly from queue pairs with no packet
+ * format.
+ */
+struct vmci_transport_packet {
+	struct vmci_datagram dg;
+	u8 version;
+	u8 type;
+	u16 proto;
+	u32 src_port;
+	u32 dst_port;
+	u32 _reserved2;
+	union {
+		u64 size;
+		u64 mode;
+		struct vmci_handle handle;
+		struct vmci_transport_waiting_info wait;
+	} u;
+};
+
+struct vmci_transport_notify_pkt {
+	u64 write_notify_window;
+	u64 write_notify_min_window;
+	bool peer_waiting_read;
+	bool peer_waiting_write;
+	bool peer_waiting_write_detected;
+	bool sent_waiting_read;
+	bool sent_waiting_write;
+	struct vmci_transport_waiting_info peer_waiting_read_info;
+	struct vmci_transport_waiting_info peer_waiting_write_info;
+	u64 produce_q_generation;
+	u64 consume_q_generation;
+};
+
+struct vmci_transport_notify_pkt_q_state {
+	u64 write_notify_window;
+	u64 write_notify_min_window;
+	bool peer_waiting_write;
+	bool peer_waiting_write_detected;
+};
+
+union vmci_transport_notify {
+	struct vmci_transport_notify_pkt pkt;
+	struct vmci_transport_notify_pkt_q_state pkt_q_state;
+};
+
+/* Our transport-specific data. */
+struct vmci_transport {
+	/* For DGRAMs. */
+	struct vmci_handle dg_handle;
+	/* For STREAMs. */
+	struct vmci_handle qp_handle;
+	struct vmci_qp *qpair;
+	u64 produce_size;
+	u64 consume_size;
+	u64 queue_pair_size;
+	u64 queue_pair_min_size;
+	u64 queue_pair_max_size;
+	u32 attach_sub_id;
+	u32 detach_sub_id;
+	union vmci_transport_notify notify;
+	struct vmci_transport_notify_ops *notify_ops;
+};
+
+int vmci_transport_register(void);
+void vmci_transport_unregister(void);
+
+int vmci_transport_send_wrote_bh(struct sockaddr_vm *dst,
+				 struct sockaddr_vm *src);
+int vmci_transport_send_read_bh(struct sockaddr_vm *dst,
+				struct sockaddr_vm *src);
+int vmci_transport_send_wrote(struct sock *sk);
+int vmci_transport_send_read(struct sock *sk);
+int vmci_transport_send_waiting_write(struct sock *sk,
+				      struct vmci_transport_waiting_info *wait);
+int vmci_transport_send_waiting_read(struct sock *sk,
+				     struct vmci_transport_waiting_info *wait);
+
+#endif
diff --git a/net/vmw_vsock/vmci_transport_notify.c b/net/vmw_vsock/vmci_transport_notify.c
new file mode 100644
index 000000000000..9a730744e7bc
--- /dev/null
+++ b/net/vmw_vsock/vmci_transport_notify.c
@@ -0,0 +1,680 @@
+/*
+ * VMware vSockets Driver
+ *
+ * Copyright (C) 2009-2013 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/stddef.h>
+#include <net/sock.h>
+
+#include "vmci_transport_notify.h"
+
+#define PKT_FIELD(vsk, field_name) (vmci_trans(vsk)->notify.pkt.field_name)
+
+static bool vmci_transport_notify_waiting_write(struct vsock_sock *vsk)
+{
+#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
+	bool retval;
+	u64 notify_limit;
+
+	if (!PKT_FIELD(vsk, peer_waiting_write))
+		return false;
+
+#ifdef VSOCK_OPTIMIZATION_FLOW_CONTROL
+	/* When the sender blocks, we take that as a sign that the sender is
+	 * faster than the receiver. To reduce the transmit rate of the sender,
+	 * we delay the sending of the read notification by decreasing the
+	 * write_notify_window. The notification is delayed until the number of
+	 * bytes used in the queue drops below the write_notify_window.
+	 */
+
+	if (!PKT_FIELD(vsk, peer_waiting_write_detected)) {
+		PKT_FIELD(vsk, peer_waiting_write_detected) = true;
+		if (PKT_FIELD(vsk, write_notify_window) < PAGE_SIZE) {
+			PKT_FIELD(vsk, write_notify_window) =
+			    PKT_FIELD(vsk, write_notify_min_window);
+		} else {
+			PKT_FIELD(vsk, write_notify_window) -= PAGE_SIZE;
+			if (PKT_FIELD(vsk, write_notify_window) <
+			    PKT_FIELD(vsk, write_notify_min_window))
+				PKT_FIELD(vsk, write_notify_window) =
+				    PKT_FIELD(vsk, write_notify_min_window);
+
+		}
+	}
+	notify_limit = vmci_trans(vsk)->consume_size -
+		PKT_FIELD(vsk, write_notify_window);
+#else
+	notify_limit = 0;
+#endif
+
+	/* For now we ignore the wait information and just see if the free
+	 * space exceeds the notify limit.  Note that improving this function
+	 * to be more intelligent will not require a protocol change and will
+	 * retain compatibility between endpoints with mixed versions of this
+	 * function.
+	 *
+	 * The notify_limit is used to delay notifications in the case where
+	 * flow control is enabled. Below the test is expressed in terms of
+	 * free space in the queue: if free_space > ConsumeSize -
+	 * write_notify_window then notify An alternate way of expressing this
+	 * is to rewrite the expression to use the data ready in the receive
+	 * queue: if write_notify_window > bufferReady then notify as
+	 * free_space == ConsumeSize - bufferReady.
+	 */
+	retval = vmci_qpair_consume_free_space(vmci_trans(vsk)->qpair) >
+		notify_limit;
+#ifdef VSOCK_OPTIMIZATION_FLOW_CONTROL
+	if (retval) {
+		/*
+		 * Once we notify the peer, we reset the detected flag so the
+		 * next wait will again cause a decrease in the window size.
+		 */
+
+		PKT_FIELD(vsk, peer_waiting_write_detected) = false;
+	}
+#endif
+	return retval;
+#else
+	return true;
+#endif
+}
+
+static bool vmci_transport_notify_waiting_read(struct vsock_sock *vsk)
+{
+#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
+	if (!PKT_FIELD(vsk, peer_waiting_read))
+		return false;
+
+	/* For now we ignore the wait information and just see if there is any
+	 * data for our peer to read.  Note that improving this function to be
+	 * more intelligent will not require a protocol change and will retain
+	 * compatibility between endpoints with mixed versions of this
+	 * function.
+	 */
+	return vmci_qpair_produce_buf_ready(vmci_trans(vsk)->qpair) > 0;
+#else
+	return true;
+#endif
+}
+
+static void
+vmci_transport_handle_waiting_read(struct sock *sk,
+				   struct vmci_transport_packet *pkt,
+				   bool bottom_half,
+				   struct sockaddr_vm *dst,
+				   struct sockaddr_vm *src)
+{
+#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
+	struct vsock_sock *vsk;
+
+	vsk = vsock_sk(sk);
+
+	PKT_FIELD(vsk, peer_waiting_read) = true;
+	memcpy(&PKT_FIELD(vsk, peer_waiting_read_info), &pkt->u.wait,
+	       sizeof(PKT_FIELD(vsk, peer_waiting_read_info)));
+
+	if (vmci_transport_notify_waiting_read(vsk)) {
+		bool sent;
+
+		if (bottom_half)
+			sent = vmci_transport_send_wrote_bh(dst, src) > 0;
+		else
+			sent = vmci_transport_send_wrote(sk) > 0;
+
+		if (sent)
+			PKT_FIELD(vsk, peer_waiting_read) = false;
+	}
+#endif
+}
+
+static void
+vmci_transport_handle_waiting_write(struct sock *sk,
+				    struct vmci_transport_packet *pkt,
+				    bool bottom_half,
+				    struct sockaddr_vm *dst,
+				    struct sockaddr_vm *src)
+{
+#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
+	struct vsock_sock *vsk;
+
+	vsk = vsock_sk(sk);
+
+	PKT_FIELD(vsk, peer_waiting_write) = true;
+	memcpy(&PKT_FIELD(vsk, peer_waiting_write_info), &pkt->u.wait,
+	       sizeof(PKT_FIELD(vsk, peer_waiting_write_info)));
+
+	if (vmci_transport_notify_waiting_write(vsk)) {
+		bool sent;
+
+		if (bottom_half)
+			sent = vmci_transport_send_read_bh(dst, src) > 0;
+		else
+			sent = vmci_transport_send_read(sk) > 0;
+
+		if (sent)
+			PKT_FIELD(vsk, peer_waiting_write) = false;
+	}
+#endif
+}
+
+static void
+vmci_transport_handle_read(struct sock *sk,
+			   struct vmci_transport_packet *pkt,
+			   bool bottom_half,
+			   struct sockaddr_vm *dst, struct sockaddr_vm *src)
+{
+#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
+	struct vsock_sock *vsk;
+
+	vsk = vsock_sk(sk);
+	PKT_FIELD(vsk, sent_waiting_write) = false;
+#endif
+
+	sk->sk_write_space(sk);
+}
+
+static bool send_waiting_read(struct sock *sk, u64 room_needed)
+{
+#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
+	struct vsock_sock *vsk;
+	struct vmci_transport_waiting_info waiting_info;
+	u64 tail;
+	u64 head;
+	u64 room_left;
+	bool ret;
+
+	vsk = vsock_sk(sk);
+
+	if (PKT_FIELD(vsk, sent_waiting_read))
+		return true;
+
+	if (PKT_FIELD(vsk, write_notify_window) <
+			vmci_trans(vsk)->consume_size)
+		PKT_FIELD(vsk, write_notify_window) =
+		    min(PKT_FIELD(vsk, write_notify_window) + PAGE_SIZE,
+			vmci_trans(vsk)->consume_size);
+
+	vmci_qpair_get_consume_indexes(vmci_trans(vsk)->qpair, &tail, &head);
+	room_left = vmci_trans(vsk)->consume_size - head;
+	if (room_needed >= room_left) {
+		waiting_info.offset = room_needed - room_left;
+		waiting_info.generation =
+		    PKT_FIELD(vsk, consume_q_generation) + 1;
+	} else {
+		waiting_info.offset = head + room_needed;
+		waiting_info.generation = PKT_FIELD(vsk, consume_q_generation);
+	}
+
+	ret = vmci_transport_send_waiting_read(sk, &waiting_info) > 0;
+	if (ret)
+		PKT_FIELD(vsk, sent_waiting_read) = true;
+
+	return ret;
+#else
+	return true;
+#endif
+}
+
+static bool send_waiting_write(struct sock *sk, u64 room_needed)
+{
+#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
+	struct vsock_sock *vsk;
+	struct vmci_transport_waiting_info waiting_info;
+	u64 tail;
+	u64 head;
+	u64 room_left;
+	bool ret;
+
+	vsk = vsock_sk(sk);
+
+	if (PKT_FIELD(vsk, sent_waiting_write))
+		return true;
+
+	vmci_qpair_get_produce_indexes(vmci_trans(vsk)->qpair, &tail, &head);
+	room_left = vmci_trans(vsk)->produce_size - tail;
+	if (room_needed + 1 >= room_left) {
+		/* Wraps around to current generation. */
+		waiting_info.offset = room_needed + 1 - room_left;
+		waiting_info.generation = PKT_FIELD(vsk, produce_q_generation);
+	} else {
+		waiting_info.offset = tail + room_needed + 1;
+		waiting_info.generation =
+		    PKT_FIELD(vsk, produce_q_generation) - 1;
+	}
+
+	ret = vmci_transport_send_waiting_write(sk, &waiting_info) > 0;
+	if (ret)
+		PKT_FIELD(vsk, sent_waiting_write) = true;
+
+	return ret;
+#else
+	return true;
+#endif
+}
+
+static int vmci_transport_send_read_notification(struct sock *sk)
+{
+	struct vsock_sock *vsk;
+	bool sent_read;
+	unsigned int retries;
+	int err;
+
+	vsk = vsock_sk(sk);
+	sent_read = false;
+	retries = 0;
+	err = 0;
+
+	if (vmci_transport_notify_waiting_write(vsk)) {
+		/* Notify the peer that we have read, retrying the send on
+		 * failure up to our maximum value.  XXX For now we just log
+		 * the failure, but later we should schedule a work item to
+		 * handle the resend until it succeeds.  That would require
+		 * keeping track of work items in the vsk and cleaning them up
+		 * upon socket close.
+		 */
+		while (!(vsk->peer_shutdown & RCV_SHUTDOWN) &&
+		       !sent_read &&
+		       retries < VMCI_TRANSPORT_MAX_DGRAM_RESENDS) {
+			err = vmci_transport_send_read(sk);
+			if (err >= 0)
+				sent_read = true;
+
+			retries++;
+		}
+
+		if (retries >= VMCI_TRANSPORT_MAX_DGRAM_RESENDS)
+			pr_err("%p unable to send read notify to peer\n", sk);
+		else
+#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
+			PKT_FIELD(vsk, peer_waiting_write) = false;
+#endif
+
+	}
+	return err;
+}
+
+static void
+vmci_transport_handle_wrote(struct sock *sk,
+			    struct vmci_transport_packet *pkt,
+			    bool bottom_half,
+			    struct sockaddr_vm *dst, struct sockaddr_vm *src)
+{
+#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
+	struct vsock_sock *vsk = vsock_sk(sk);
+	PKT_FIELD(vsk, sent_waiting_read) = false;
+#endif
+	sk->sk_data_ready(sk, 0);
+}
+
+static void vmci_transport_notify_pkt_socket_init(struct sock *sk)
+{
+	struct vsock_sock *vsk = vsock_sk(sk);
+
+	PKT_FIELD(vsk, write_notify_window) = PAGE_SIZE;
+	PKT_FIELD(vsk, write_notify_min_window) = PAGE_SIZE;
+	PKT_FIELD(vsk, peer_waiting_read) = false;
+	PKT_FIELD(vsk, peer_waiting_write) = false;
+	PKT_FIELD(vsk, peer_waiting_write_detected) = false;
+	PKT_FIELD(vsk, sent_waiting_read) = false;
+	PKT_FIELD(vsk, sent_waiting_write) = false;
+	PKT_FIELD(vsk, produce_q_generation) = 0;
+	PKT_FIELD(vsk, consume_q_generation) = 0;
+
+	memset(&PKT_FIELD(vsk, peer_waiting_read_info), 0,
+	       sizeof(PKT_FIELD(vsk, peer_waiting_read_info)));
+	memset(&PKT_FIELD(vsk, peer_waiting_write_info), 0,
+	       sizeof(PKT_FIELD(vsk, peer_waiting_write_info)));
+}
+
+static void vmci_transport_notify_pkt_socket_destruct(struct vsock_sock *vsk)
+{
+}
+
+static int
+vmci_transport_notify_pkt_poll_in(struct sock *sk,
+				  size_t target, bool *data_ready_now)
+{
+	struct vsock_sock *vsk = vsock_sk(sk);
+
+	if (vsock_stream_has_data(vsk)) {
+		*data_ready_now = true;
+	} else {
+		/* We can't read right now because there is nothing in the
+		 * queue. Ask for notifications when there is something to
+		 * read.
+		 */
+		if (sk->sk_state == SS_CONNECTED) {
+			if (!send_waiting_read(sk, 1))
+				return -1;
+
+		}
+		*data_ready_now = false;
+	}
+
+	return 0;
+}
+
+static int
+vmci_transport_notify_pkt_poll_out(struct sock *sk,
+				   size_t target, bool *space_avail_now)
+{
+	s64 produce_q_free_space;
+	struct vsock_sock *vsk = vsock_sk(sk);
+
+	produce_q_free_space = vsock_stream_has_space(vsk);
+	if (produce_q_free_space > 0) {
+		*space_avail_now = true;
+		return 0;
+	} else if (produce_q_free_space == 0) {
+		/* This is a connected socket but we can't currently send data.
+		 * Notify the peer that we are waiting if the queue is full. We
+		 * only send a waiting write if the queue is full because
+		 * otherwise we end up in an infinite WAITING_WRITE, READ,
+		 * WAITING_WRITE, READ, etc. loop. Treat failing to send the
+		 * notification as a socket error, passing that back through
+		 * the mask.
+		 */
+		if (!send_waiting_write(sk, 1))
+			return -1;
+
+		*space_avail_now = false;
+	}
+
+	return 0;
+}
+
+static int
+vmci_transport_notify_pkt_recv_init(
+			struct sock *sk,
+			size_t target,
+			struct vmci_transport_recv_notify_data *data)
+{
+	struct vsock_sock *vsk = vsock_sk(sk);
+
+#ifdef VSOCK_OPTIMIZATION_WAITING_NOTIFY
+	data->consume_head = 0;
+	data->produce_tail = 0;
+#ifdef VSOCK_OPTIMIZATION_FLOW_CONTROL
+	data->notify_on_block = false;
+
+	if (PKT_FIELD(vsk, write_notify_min_window) < target + 1) {
+		PKT_FIELD(vsk, write_notify_min_window) = target + 1;
+		if (PKT_FIELD(vsk, write_notify_window) <
+		    PKT_FIELD(vsk, write_notify_min_window)) {
+			/* If the current window is smaller than the new
+			 * minimal window size, we need to reevaluate whether
+			 * we need to notify the sender. If the number of ready
+			 * bytes are smaller than the new window, we need to
+			 * send a notification to the sender before we block.
+			 */
+
+			PKT_FIELD(vsk, write_notify_window) =
+			    PKT_FIELD(vsk, write_notify_min_window);
+			data->notify_on_block = true;
+		}
+	}
+#endif
+#endif
+
+	return 0;
+}
+
+static int
+vmci_transport_notify_pkt_recv_pre_block(
+				struct sock *sk,
+				size_t target,
+				struct vmci_transport_recv_notify_data *data)
+{
+	int err = 0;
+
+	/* Notify our peer that we are waiting for data to read. */
+	if (!send_waiting_read(sk, target)) {
+		err = -EHOSTUNREACH;
+		return err;
+	}
+#ifdef VSOCK_OPTIMIZATION_FLOW_CONTROL
+	if (data->notify_on_block) {
+		err = vmci_transport_send_read_notification(sk);
+		if (err < 0)
+			return err;
+
+		data->notify_on_block = false;
+	}
+#endif
+
+	return err;
+}
+
+static int
+vmci_transport_notify_pkt_recv_pre_dequeue(
+				struct sock *sk,
+				size_t target,
+				struct vmci_transport_recv_notify_data *data)
+{
+	struct vsock_sock *vsk = vsock_sk(sk);
+
+	/* Now consume up to len bytes from the queue.  Note that since we have
+	 * the socket locked we should copy at least ready bytes.
+	 */
+#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
+	vmci_qpair_get_consume_indexes(vmci_trans(vsk)->qpair,
+				       &data->produce_tail,
+				       &data->consume_head);
+#endif
+
+	return 0;
+}
+
+static int
+vmci_transport_notify_pkt_recv_post_dequeue(
+				struct sock *sk,
+				size_t target,
+				ssize_t copied,
+				bool data_read,
+				struct vmci_transport_recv_notify_data *data)
+{
+	struct vsock_sock *vsk;
+	int err;
+
+	vsk = vsock_sk(sk);
+	err = 0;
+
+	if (data_read) {
+#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
+		/* Detect a wrap-around to maintain queue generation.  Note
+		 * that this is safe since we hold the socket lock across the
+		 * two queue pair operations.
+		 */
+		if (copied >=
+			vmci_trans(vsk)->consume_size - data->consume_head)
+			PKT_FIELD(vsk, consume_q_generation)++;
+#endif
+
+		err = vmci_transport_send_read_notification(sk);
+		if (err < 0)
+			return err;
+
+	}
+	return err;
+}
+
+static int
+vmci_transport_notify_pkt_send_init(
+			struct sock *sk,
+			struct vmci_transport_send_notify_data *data)
+{
+#ifdef VSOCK_OPTIMIZATION_WAITING_NOTIFY
+	data->consume_head = 0;
+	data->produce_tail = 0;
+#endif
+
+	return 0;
+}
+
+static int
+vmci_transport_notify_pkt_send_pre_block(
+				struct sock *sk,
+				struct vmci_transport_send_notify_data *data)
+{
+	/* Notify our peer that we are waiting for room to write. */
+	if (!send_waiting_write(sk, 1))
+		return -EHOSTUNREACH;
+
+	return 0;
+}
+
+static int
+vmci_transport_notify_pkt_send_pre_enqueue(
+				struct sock *sk,
+				struct vmci_transport_send_notify_data *data)
+{
+	struct vsock_sock *vsk = vsock_sk(sk);
+
+#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
+	vmci_qpair_get_produce_indexes(vmci_trans(vsk)->qpair,
+				       &data->produce_tail,
+				       &data->consume_head);
+#endif
+
+	return 0;
+}
+
+static int
+vmci_transport_notify_pkt_send_post_enqueue(
+				struct sock *sk,
+				ssize_t written,
+				struct vmci_transport_send_notify_data *data)
+{
+	int err = 0;
+	struct vsock_sock *vsk;
+	bool sent_wrote = false;
+	int retries = 0;
+
+	vsk = vsock_sk(sk);
+
+#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
+	/* Detect a wrap-around to maintain queue generation.  Note that this
+	 * is safe since we hold the socket lock across the two queue pair
+	 * operations.
+	 */
+	if (written >= vmci_trans(vsk)->produce_size - data->produce_tail)
+		PKT_FIELD(vsk, produce_q_generation)++;
+
+#endif
+
+	if (vmci_transport_notify_waiting_read(vsk)) {
+		/* Notify the peer that we have written, retrying the send on
+		 * failure up to our maximum value. See the XXX comment for the
+		 * corresponding piece of code in StreamRecvmsg() for potential
+		 * improvements.
+		 */
+		while (!(vsk->peer_shutdown & RCV_SHUTDOWN) &&
+		       !sent_wrote &&
+		       retries < VMCI_TRANSPORT_MAX_DGRAM_RESENDS) {
+			err = vmci_transport_send_wrote(sk);
+			if (err >= 0)
+				sent_wrote = true;
+
+			retries++;
+		}
+
+		if (retries >= VMCI_TRANSPORT_MAX_DGRAM_RESENDS) {
+			pr_err("%p unable to send wrote notify to peer\n", sk);
+			return err;
+		} else {
+#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
+			PKT_FIELD(vsk, peer_waiting_read) = false;
+#endif
+		}
+	}
+	return err;
+}
+
+static void
+vmci_transport_notify_pkt_handle_pkt(
+			struct sock *sk,
+			struct vmci_transport_packet *pkt,
+			bool bottom_half,
+			struct sockaddr_vm *dst,
+			struct sockaddr_vm *src, bool *pkt_processed)
+{
+	bool processed = false;
+
+	switch (pkt->type) {
+	case VMCI_TRANSPORT_PACKET_TYPE_WROTE:
+		vmci_transport_handle_wrote(sk, pkt, bottom_half, dst, src);
+		processed = true;
+		break;
+	case VMCI_TRANSPORT_PACKET_TYPE_READ:
+		vmci_transport_handle_read(sk, pkt, bottom_half, dst, src);
+		processed = true;
+		break;
+	case VMCI_TRANSPORT_PACKET_TYPE_WAITING_WRITE:
+		vmci_transport_handle_waiting_write(sk, pkt, bottom_half,
+						    dst, src);
+		processed = true;
+		break;
+
+	case VMCI_TRANSPORT_PACKET_TYPE_WAITING_READ:
+		vmci_transport_handle_waiting_read(sk, pkt, bottom_half,
+						   dst, src);
+		processed = true;
+		break;
+	}
+
+	if (pkt_processed)
+		*pkt_processed = processed;
+}
+
+static void vmci_transport_notify_pkt_process_request(struct sock *sk)
+{
+	struct vsock_sock *vsk = vsock_sk(sk);
+
+	PKT_FIELD(vsk, write_notify_window) = vmci_trans(vsk)->consume_size;
+	if (vmci_trans(vsk)->consume_size <
+		PKT_FIELD(vsk, write_notify_min_window))
+		PKT_FIELD(vsk, write_notify_min_window) =
+			vmci_trans(vsk)->consume_size;
+}
+
+static void vmci_transport_notify_pkt_process_negotiate(struct sock *sk)
+{
+	struct vsock_sock *vsk = vsock_sk(sk);
+
+	PKT_FIELD(vsk, write_notify_window) = vmci_trans(vsk)->consume_size;
+	if (vmci_trans(vsk)->consume_size <
+		PKT_FIELD(vsk, write_notify_min_window))
+		PKT_FIELD(vsk, write_notify_min_window) =
+			vmci_trans(vsk)->consume_size;
+}
+
+/* Socket control packet based operations. */
+struct vmci_transport_notify_ops vmci_transport_notify_pkt_ops = {
+	vmci_transport_notify_pkt_socket_init,
+	vmci_transport_notify_pkt_socket_destruct,
+	vmci_transport_notify_pkt_poll_in,
+	vmci_transport_notify_pkt_poll_out,
+	vmci_transport_notify_pkt_handle_pkt,
+	vmci_transport_notify_pkt_recv_init,
+	vmci_transport_notify_pkt_recv_pre_block,
+	vmci_transport_notify_pkt_recv_pre_dequeue,
+	vmci_transport_notify_pkt_recv_post_dequeue,
+	vmci_transport_notify_pkt_send_init,
+	vmci_transport_notify_pkt_send_pre_block,
+	vmci_transport_notify_pkt_send_pre_enqueue,
+	vmci_transport_notify_pkt_send_post_enqueue,
+	vmci_transport_notify_pkt_process_request,
+	vmci_transport_notify_pkt_process_negotiate,
+};
diff --git a/net/vmw_vsock/vmci_transport_notify.h b/net/vmw_vsock/vmci_transport_notify.h
new file mode 100644
index 000000000000..7df793249b6c
--- /dev/null
+++ b/net/vmw_vsock/vmci_transport_notify.h
@@ -0,0 +1,83 @@
+/*
+ * VMware vSockets Driver
+ *
+ * Copyright (C) 2009-2013 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef __VMCI_TRANSPORT_NOTIFY_H__
+#define __VMCI_TRANSPORT_NOTIFY_H__
+
+#include <linux/types.h>
+#include <linux/vmw_vmci_defs.h>
+#include <linux/vmw_vmci_api.h>
+#include <linux/vm_sockets.h>
+
+#include "vmci_transport.h"
+
+/* Comment this out to compare with old protocol. */
+#define VSOCK_OPTIMIZATION_WAITING_NOTIFY 1
+#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
+/* Comment this out to remove flow control for "new" protocol */
+#define VSOCK_OPTIMIZATION_FLOW_CONTROL 1
+#endif
+
+#define VMCI_TRANSPORT_MAX_DGRAM_RESENDS       10
+
+struct vmci_transport_recv_notify_data {
+	u64 consume_head;
+	u64 produce_tail;
+	bool notify_on_block;
+};
+
+struct vmci_transport_send_notify_data {
+	u64 consume_head;
+	u64 produce_tail;
+};
+
+/* Socket notification callbacks. */
+struct vmci_transport_notify_ops {
+	void (*socket_init) (struct sock *sk);
+	void (*socket_destruct) (struct vsock_sock *vsk);
+	int (*poll_in) (struct sock *sk, size_t target,
+			  bool *data_ready_now);
+	int (*poll_out) (struct sock *sk, size_t target,
+			   bool *space_avail_now);
+	void (*handle_notify_pkt) (struct sock *sk,
+				   struct vmci_transport_packet *pkt,
+				   bool bottom_half, struct sockaddr_vm *dst,
+				   struct sockaddr_vm *src,
+				   bool *pkt_processed);
+	int (*recv_init) (struct sock *sk, size_t target,
+			  struct vmci_transport_recv_notify_data *data);
+	int (*recv_pre_block) (struct sock *sk, size_t target,
+			       struct vmci_transport_recv_notify_data *data);
+	int (*recv_pre_dequeue) (struct sock *sk, size_t target,
+				 struct vmci_transport_recv_notify_data *data);
+	int (*recv_post_dequeue) (struct sock *sk, size_t target,
+				  ssize_t copied, bool data_read,
+				  struct vmci_transport_recv_notify_data *data);
+	int (*send_init) (struct sock *sk,
+			  struct vmci_transport_send_notify_data *data);
+	int (*send_pre_block) (struct sock *sk,
+			       struct vmci_transport_send_notify_data *data);
+	int (*send_pre_enqueue) (struct sock *sk,
+				 struct vmci_transport_send_notify_data *data);
+	int (*send_post_enqueue) (struct sock *sk, ssize_t written,
+				  struct vmci_transport_send_notify_data *data);
+	void (*process_request) (struct sock *sk);
+	void (*process_negotiate) (struct sock *sk);
+};
+
+extern struct vmci_transport_notify_ops vmci_transport_notify_pkt_ops;
+extern struct vmci_transport_notify_ops vmci_transport_notify_pkt_q_state_ops;
+
+#endif /* __VMCI_TRANSPORT_NOTIFY_H__ */
diff --git a/net/vmw_vsock/vmci_transport_notify_qstate.c b/net/vmw_vsock/vmci_transport_notify_qstate.c
new file mode 100644
index 000000000000..622bd7aa1016
--- /dev/null
+++ b/net/vmw_vsock/vmci_transport_notify_qstate.c
@@ -0,0 +1,438 @@
+/*
+ * VMware vSockets Driver
+ *
+ * Copyright (C) 2009-2013 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/stddef.h>
+#include <net/sock.h>
+
+#include "vmci_transport_notify.h"
+
+#define PKT_FIELD(vsk, field_name) \
+	(vmci_trans(vsk)->notify.pkt_q_state.field_name)
+
+static bool vmci_transport_notify_waiting_write(struct vsock_sock *vsk)
+{
+	bool retval;
+	u64 notify_limit;
+
+	if (!PKT_FIELD(vsk, peer_waiting_write))
+		return false;
+
+	/* When the sender blocks, we take that as a sign that the sender is
+	 * faster than the receiver. To reduce the transmit rate of the sender,
+	 * we delay the sending of the read notification by decreasing the
+	 * write_notify_window. The notification is delayed until the number of
+	 * bytes used in the queue drops below the write_notify_window.
+	 */
+
+	if (!PKT_FIELD(vsk, peer_waiting_write_detected)) {
+		PKT_FIELD(vsk, peer_waiting_write_detected) = true;
+		if (PKT_FIELD(vsk, write_notify_window) < PAGE_SIZE) {
+			PKT_FIELD(vsk, write_notify_window) =
+			    PKT_FIELD(vsk, write_notify_min_window);
+		} else {
+			PKT_FIELD(vsk, write_notify_window) -= PAGE_SIZE;
+			if (PKT_FIELD(vsk, write_notify_window) <
+			    PKT_FIELD(vsk, write_notify_min_window))
+				PKT_FIELD(vsk, write_notify_window) =
+				    PKT_FIELD(vsk, write_notify_min_window);
+
+		}
+	}
+	notify_limit = vmci_trans(vsk)->consume_size -
+		PKT_FIELD(vsk, write_notify_window);
+
+	/* The notify_limit is used to delay notifications in the case where
+	 * flow control is enabled. Below the test is expressed in terms of
+	 * free space in the queue: if free_space > ConsumeSize -
+	 * write_notify_window then notify An alternate way of expressing this
+	 * is to rewrite the expression to use the data ready in the receive
+	 * queue: if write_notify_window > bufferReady then notify as
+	 * free_space == ConsumeSize - bufferReady.
+	 */
+
+	retval = vmci_qpair_consume_free_space(vmci_trans(vsk)->qpair) >
+		notify_limit;
+
+	if (retval) {
+		/* Once we notify the peer, we reset the detected flag so the
+		 * next wait will again cause a decrease in the window size.
+		 */
+
+		PKT_FIELD(vsk, peer_waiting_write_detected) = false;
+	}
+	return retval;
+}
+
+static void
+vmci_transport_handle_read(struct sock *sk,
+			   struct vmci_transport_packet *pkt,
+			   bool bottom_half,
+			   struct sockaddr_vm *dst, struct sockaddr_vm *src)
+{
+	sk->sk_write_space(sk);
+}
+
+static void
+vmci_transport_handle_wrote(struct sock *sk,
+			    struct vmci_transport_packet *pkt,
+			    bool bottom_half,
+			    struct sockaddr_vm *dst, struct sockaddr_vm *src)
+{
+	sk->sk_data_ready(sk, 0);
+}
+
+static void vsock_block_update_write_window(struct sock *sk)
+{
+	struct vsock_sock *vsk = vsock_sk(sk);
+
+	if (PKT_FIELD(vsk, write_notify_window) < vmci_trans(vsk)->consume_size)
+		PKT_FIELD(vsk, write_notify_window) =
+		    min(PKT_FIELD(vsk, write_notify_window) + PAGE_SIZE,
+			vmci_trans(vsk)->consume_size);
+}
+
+static int vmci_transport_send_read_notification(struct sock *sk)
+{
+	struct vsock_sock *vsk;
+	bool sent_read;
+	unsigned int retries;
+	int err;
+
+	vsk = vsock_sk(sk);
+	sent_read = false;
+	retries = 0;
+	err = 0;
+
+	if (vmci_transport_notify_waiting_write(vsk)) {
+		/* Notify the peer that we have read, retrying the send on
+		 * failure up to our maximum value.  XXX For now we just log
+		 * the failure, but later we should schedule a work item to
+		 * handle the resend until it succeeds.  That would require
+		 * keeping track of work items in the vsk and cleaning them up
+		 * upon socket close.
+		 */
+		while (!(vsk->peer_shutdown & RCV_SHUTDOWN) &&
+		       !sent_read &&
+		       retries < VMCI_TRANSPORT_MAX_DGRAM_RESENDS) {
+			err = vmci_transport_send_read(sk);
+			if (err >= 0)
+				sent_read = true;
+
+			retries++;
+		}
+
+		if (retries >= VMCI_TRANSPORT_MAX_DGRAM_RESENDS && !sent_read)
+			pr_err("%p unable to send read notification to peer\n",
+			       sk);
+		else
+			PKT_FIELD(vsk, peer_waiting_write) = false;
+
+	}
+	return err;
+}
+
+static void vmci_transport_notify_pkt_socket_init(struct sock *sk)
+{
+	struct vsock_sock *vsk = vsock_sk(sk);
+
+	PKT_FIELD(vsk, write_notify_window) = PAGE_SIZE;
+	PKT_FIELD(vsk, write_notify_min_window) = PAGE_SIZE;
+	PKT_FIELD(vsk, peer_waiting_write) = false;
+	PKT_FIELD(vsk, peer_waiting_write_detected) = false;
+}
+
+static void vmci_transport_notify_pkt_socket_destruct(struct vsock_sock *vsk)
+{
+	PKT_FIELD(vsk, write_notify_window) = PAGE_SIZE;
+	PKT_FIELD(vsk, write_notify_min_window) = PAGE_SIZE;
+	PKT_FIELD(vsk, peer_waiting_write) = false;
+	PKT_FIELD(vsk, peer_waiting_write_detected) = false;
+}
+
+static int
+vmci_transport_notify_pkt_poll_in(struct sock *sk,
+				  size_t target, bool *data_ready_now)
+{
+	struct vsock_sock *vsk = vsock_sk(sk);
+
+	if (vsock_stream_has_data(vsk)) {
+		*data_ready_now = true;
+	} else {
+		/* We can't read right now because there is nothing in the
+		 * queue. Ask for notifications when there is something to
+		 * read.
+		 */
+		if (sk->sk_state == SS_CONNECTED)
+			vsock_block_update_write_window(sk);
+		*data_ready_now = false;
+	}
+
+	return 0;
+}
+
+static int
+vmci_transport_notify_pkt_poll_out(struct sock *sk,
+				   size_t target, bool *space_avail_now)
+{
+	s64 produce_q_free_space;
+	struct vsock_sock *vsk = vsock_sk(sk);
+
+	produce_q_free_space = vsock_stream_has_space(vsk);
+	if (produce_q_free_space > 0) {
+		*space_avail_now = true;
+		return 0;
+	} else if (produce_q_free_space == 0) {
+		/* This is a connected socket but we can't currently send data.
+		 * Nothing else to do.
+		 */
+		*space_avail_now = false;
+	}
+
+	return 0;
+}
+
+static int
+vmci_transport_notify_pkt_recv_init(
+				struct sock *sk,
+				size_t target,
+				struct vmci_transport_recv_notify_data *data)
+{
+	struct vsock_sock *vsk = vsock_sk(sk);
+
+	data->consume_head = 0;
+	data->produce_tail = 0;
+	data->notify_on_block = false;
+
+	if (PKT_FIELD(vsk, write_notify_min_window) < target + 1) {
+		PKT_FIELD(vsk, write_notify_min_window) = target + 1;
+		if (PKT_FIELD(vsk, write_notify_window) <
+		    PKT_FIELD(vsk, write_notify_min_window)) {
+			/* If the current window is smaller than the new
+			 * minimal window size, we need to reevaluate whether
+			 * we need to notify the sender. If the number of ready
+			 * bytes are smaller than the new window, we need to
+			 * send a notification to the sender before we block.
+			 */
+
+			PKT_FIELD(vsk, write_notify_window) =
+			    PKT_FIELD(vsk, write_notify_min_window);
+			data->notify_on_block = true;
+		}
+	}
+
+	return 0;
+}
+
+static int
+vmci_transport_notify_pkt_recv_pre_block(
+				struct sock *sk,
+				size_t target,
+				struct vmci_transport_recv_notify_data *data)
+{
+	int err = 0;
+
+	vsock_block_update_write_window(sk);
+
+	if (data->notify_on_block) {
+		err = vmci_transport_send_read_notification(sk);
+		if (err < 0)
+			return err;
+		data->notify_on_block = false;
+	}
+
+	return err;
+}
+
+static int
+vmci_transport_notify_pkt_recv_post_dequeue(
+				struct sock *sk,
+				size_t target,
+				ssize_t copied,
+				bool data_read,
+				struct vmci_transport_recv_notify_data *data)
+{
+	struct vsock_sock *vsk;
+	int err;
+	bool was_full = false;
+	u64 free_space;
+
+	vsk = vsock_sk(sk);
+	err = 0;
+
+	if (data_read) {
+		smp_mb();
+
+		free_space =
+			vmci_qpair_consume_free_space(vmci_trans(vsk)->qpair);
+		was_full = free_space == copied;
+
+		if (was_full)
+			PKT_FIELD(vsk, peer_waiting_write) = true;
+
+		err = vmci_transport_send_read_notification(sk);
+		if (err < 0)
+			return err;
+
+		/* See the comment in
+		 * vmci_transport_notify_pkt_send_post_enqueue().
+		 */
+		sk->sk_data_ready(sk, 0);
+	}
+
+	return err;
+}
+
+static int
+vmci_transport_notify_pkt_send_init(
+				struct sock *sk,
+				struct vmci_transport_send_notify_data *data)
+{
+	data->consume_head = 0;
+	data->produce_tail = 0;
+
+	return 0;
+}
+
+static int
+vmci_transport_notify_pkt_send_post_enqueue(
+				struct sock *sk,
+				ssize_t written,
+				struct vmci_transport_send_notify_data *data)
+{
+	int err = 0;
+	struct vsock_sock *vsk;
+	bool sent_wrote = false;
+	bool was_empty;
+	int retries = 0;
+
+	vsk = vsock_sk(sk);
+
+	smp_mb();
+
+	was_empty =
+		vmci_qpair_produce_buf_ready(vmci_trans(vsk)->qpair) == written;
+	if (was_empty) {
+		while (!(vsk->peer_shutdown & RCV_SHUTDOWN) &&
+		       !sent_wrote &&
+		       retries < VMCI_TRANSPORT_MAX_DGRAM_RESENDS) {
+			err = vmci_transport_send_wrote(sk);
+			if (err >= 0)
+				sent_wrote = true;
+
+			retries++;
+		}
+	}
+
+	if (retries >= VMCI_TRANSPORT_MAX_DGRAM_RESENDS && !sent_wrote) {
+		pr_err("%p unable to send wrote notification to peer\n",
+		       sk);
+		return err;
+	}
+
+	return err;
+}
+
+static void
+vmci_transport_notify_pkt_handle_pkt(
+				struct sock *sk,
+				struct vmci_transport_packet *pkt,
+				bool bottom_half,
+				struct sockaddr_vm *dst,
+				struct sockaddr_vm *src, bool *pkt_processed)
+{
+	bool processed = false;
+
+	switch (pkt->type) {
+	case VMCI_TRANSPORT_PACKET_TYPE_WROTE:
+		vmci_transport_handle_wrote(sk, pkt, bottom_half, dst, src);
+		processed = true;
+		break;
+	case VMCI_TRANSPORT_PACKET_TYPE_READ:
+		vmci_transport_handle_read(sk, pkt, bottom_half, dst, src);
+		processed = true;
+		break;
+	}
+
+	if (pkt_processed)
+		*pkt_processed = processed;
+}
+
+static void vmci_transport_notify_pkt_process_request(struct sock *sk)
+{
+	struct vsock_sock *vsk = vsock_sk(sk);
+
+	PKT_FIELD(vsk, write_notify_window) = vmci_trans(vsk)->consume_size;
+	if (vmci_trans(vsk)->consume_size <
+		PKT_FIELD(vsk, write_notify_min_window))
+		PKT_FIELD(vsk, write_notify_min_window) =
+			vmci_trans(vsk)->consume_size;
+}
+
+static void vmci_transport_notify_pkt_process_negotiate(struct sock *sk)
+{
+	struct vsock_sock *vsk = vsock_sk(sk);
+
+	PKT_FIELD(vsk, write_notify_window) = vmci_trans(vsk)->consume_size;
+	if (vmci_trans(vsk)->consume_size <
+		PKT_FIELD(vsk, write_notify_min_window))
+		PKT_FIELD(vsk, write_notify_min_window) =
+			vmci_trans(vsk)->consume_size;
+}
+
+static int
+vmci_transport_notify_pkt_recv_pre_dequeue(
+				struct sock *sk,
+				size_t target,
+				struct vmci_transport_recv_notify_data *data)
+{
+	return 0; /* NOP for QState. */
+}
+
+static int
+vmci_transport_notify_pkt_send_pre_block(
+				struct sock *sk,
+				struct vmci_transport_send_notify_data *data)
+{
+	return 0; /* NOP for QState. */
+}
+
+static int
+vmci_transport_notify_pkt_send_pre_enqueue(
+				struct sock *sk,
+				struct vmci_transport_send_notify_data *data)
+{
+	return 0; /* NOP for QState. */
+}
+
+/* Socket always on control packet based operations. */
+struct vmci_transport_notify_ops vmci_transport_notify_pkt_q_state_ops = {
+	vmci_transport_notify_pkt_socket_init,
+	vmci_transport_notify_pkt_socket_destruct,
+	vmci_transport_notify_pkt_poll_in,
+	vmci_transport_notify_pkt_poll_out,
+	vmci_transport_notify_pkt_handle_pkt,
+	vmci_transport_notify_pkt_recv_init,
+	vmci_transport_notify_pkt_recv_pre_block,
+	vmci_transport_notify_pkt_recv_pre_dequeue,
+	vmci_transport_notify_pkt_recv_post_dequeue,
+	vmci_transport_notify_pkt_send_init,
+	vmci_transport_notify_pkt_send_pre_block,
+	vmci_transport_notify_pkt_send_pre_enqueue,
+	vmci_transport_notify_pkt_send_post_enqueue,
+	vmci_transport_notify_pkt_process_request,
+	vmci_transport_notify_pkt_process_negotiate,
+};
diff --git a/net/vmw_vsock/vsock_addr.c b/net/vmw_vsock/vsock_addr.c
new file mode 100644
index 000000000000..b7df1aea7c59
--- /dev/null
+++ b/net/vmw_vsock/vsock_addr.c
@@ -0,0 +1,86 @@
+/*
+ * VMware vSockets Driver
+ *
+ * Copyright (C) 2007-2012 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/stddef.h>
+#include <net/sock.h>
+
+#include "vsock_addr.h"
+
+void vsock_addr_init(struct sockaddr_vm *addr, u32 cid, u32 port)
+{
+	memset(addr, 0, sizeof(*addr));
+	addr->svm_family = AF_VSOCK;
+	addr->svm_cid = cid;
+	addr->svm_port = port;
+}
+EXPORT_SYMBOL_GPL(vsock_addr_init);
+
+int vsock_addr_validate(const struct sockaddr_vm *addr)
+{
+	if (!addr)
+		return -EFAULT;
+
+	if (addr->svm_family != AF_VSOCK)
+		return -EAFNOSUPPORT;
+
+	if (addr->svm_zero[0] != 0)
+		return -EINVAL;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(vsock_addr_validate);
+
+bool vsock_addr_bound(const struct sockaddr_vm *addr)
+{
+	return addr->svm_port != VMADDR_PORT_ANY;
+}
+EXPORT_SYMBOL_GPL(vsock_addr_bound);
+
+void vsock_addr_unbind(struct sockaddr_vm *addr)
+{
+	vsock_addr_init(addr, VMADDR_CID_ANY, VMADDR_PORT_ANY);
+}
+EXPORT_SYMBOL_GPL(vsock_addr_unbind);
+
+bool vsock_addr_equals_addr(const struct sockaddr_vm *addr,
+			    const struct sockaddr_vm *other)
+{
+	return addr->svm_cid == other->svm_cid &&
+		addr->svm_port == other->svm_port;
+}
+EXPORT_SYMBOL_GPL(vsock_addr_equals_addr);
+
+bool vsock_addr_equals_addr_any(const struct sockaddr_vm *addr,
+				const struct sockaddr_vm *other)
+{
+	return (addr->svm_cid == VMADDR_CID_ANY ||
+		other->svm_cid == VMADDR_CID_ANY ||
+		addr->svm_cid == other->svm_cid) &&
+	       addr->svm_port == other->svm_port;
+}
+EXPORT_SYMBOL_GPL(vsock_addr_equals_addr_any);
+
+int vsock_addr_cast(const struct sockaddr *addr,
+		    size_t len, struct sockaddr_vm **out_addr)
+{
+	if (len < sizeof(**out_addr))
+		return -EFAULT;
+
+	*out_addr = (struct sockaddr_vm *)addr;
+	return vsock_addr_validate(*out_addr);
+}
+EXPORT_SYMBOL_GPL(vsock_addr_cast);
diff --git a/net/vmw_vsock/vsock_addr.h b/net/vmw_vsock/vsock_addr.h
new file mode 100644
index 000000000000..cdfbcefdf843
--- /dev/null
+++ b/net/vmw_vsock/vsock_addr.h
@@ -0,0 +1,32 @@
+/*
+ * VMware vSockets Driver
+ *
+ * Copyright (C) 2007-2013 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _VSOCK_ADDR_H_
+#define _VSOCK_ADDR_H_
+
+#include <linux/vm_sockets.h>
+
+void vsock_addr_init(struct sockaddr_vm *addr, u32 cid, u32 port);
+int vsock_addr_validate(const struct sockaddr_vm *addr);
+bool vsock_addr_bound(const struct sockaddr_vm *addr);
+void vsock_addr_unbind(struct sockaddr_vm *addr);
+bool vsock_addr_equals_addr(const struct sockaddr_vm *addr,
+			    const struct sockaddr_vm *other);
+bool vsock_addr_equals_addr_any(const struct sockaddr_vm *addr,
+				const struct sockaddr_vm *other);
+int vsock_addr_cast(const struct sockaddr *addr, size_t len,
+		    struct sockaddr_vm **out_addr);
+
+#endif
diff --git a/net/vmw_vsock/vsock_version.h b/net/vmw_vsock/vsock_version.h
new file mode 100644
index 000000000000..4df7f5e2151c
--- /dev/null
+++ b/net/vmw_vsock/vsock_version.h
@@ -0,0 +1,22 @@
+/*
+ * VMware vSockets Driver
+ *
+ * Copyright (C) 2011-2012 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _VSOCK_VERSION_H_
+#define _VSOCK_VERSION_H_
+
+#define VSOCK_DRIVER_VERSION_PARTS	{ 1, 0, 0, 0 }
+#define VSOCK_DRIVER_VERSION_STRING	"1.0.0.0-k"
+
+#endif /* _VSOCK_VERSION_H_ */
-- 
cgit v1.2.3


From febf018d22347b5df94066bca05d0c11a84e839d Mon Sep 17 00:00:00 2001
From: David Ward <david.ward@ll.mit.edu>
Date: Fri, 8 Feb 2013 17:17:06 +0000
Subject: net/802: Implement Multiple Registration Protocol (MRP)

Initial implementation of the Multiple Registration Protocol (MRP)
from IEEE 802.1Q-2011, based on the existing implementation of the
Generic Attribute Registration Protocol (GARP).

Signed-off-by: David Ward <david.ward@ll.mit.edu>
Acked-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |   2 +
 include/net/mrp.h         | 143 ++++++++
 net/802/Kconfig           |   3 +
 net/802/Makefile          |   1 +
 net/802/mrp.c             | 895 ++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 1044 insertions(+)
 create mode 100644 include/net/mrp.h
 create mode 100644 net/802/mrp.c

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index ab2774eb49e8..25bd46f52877 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1290,6 +1290,8 @@ struct net_device {
 	};
 	/* GARP */
 	struct garp_port __rcu	*garp_port;
+	/* MRP */
+	struct mrp_port __rcu	*mrp_port;
 
 	/* class/net/name entry */
 	struct device		dev;
diff --git a/include/net/mrp.h b/include/net/mrp.h
new file mode 100644
index 000000000000..4fbf02aa2ec1
--- /dev/null
+++ b/include/net/mrp.h
@@ -0,0 +1,143 @@
+#ifndef _NET_MRP_H
+#define _NET_MRP_H
+
+#define MRP_END_MARK		0x0
+
+struct mrp_pdu_hdr {
+	u8	version;
+};
+
+struct mrp_msg_hdr {
+	u8	attrtype;
+	u8	attrlen;
+};
+
+struct mrp_vecattr_hdr {
+	__be16	lenflags;
+	unsigned char	firstattrvalue[];
+#define MRP_VECATTR_HDR_LEN_MASK cpu_to_be16(0x1FFF)
+#define MRP_VECATTR_HDR_FLAG_LA cpu_to_be16(0x2000)
+};
+
+enum mrp_vecattr_event {
+	MRP_VECATTR_EVENT_NEW,
+	MRP_VECATTR_EVENT_JOIN_IN,
+	MRP_VECATTR_EVENT_IN,
+	MRP_VECATTR_EVENT_JOIN_MT,
+	MRP_VECATTR_EVENT_MT,
+	MRP_VECATTR_EVENT_LV,
+	__MRP_VECATTR_EVENT_MAX
+};
+
+struct mrp_skb_cb {
+	struct mrp_msg_hdr	*mh;
+	struct mrp_vecattr_hdr	*vah;
+	unsigned char		attrvalue[];
+};
+
+static inline struct mrp_skb_cb *mrp_cb(struct sk_buff *skb)
+{
+	BUILD_BUG_ON(sizeof(struct mrp_skb_cb) >
+		     FIELD_SIZEOF(struct sk_buff, cb));
+	return (struct mrp_skb_cb *)skb->cb;
+}
+
+enum mrp_applicant_state {
+	MRP_APPLICANT_INVALID,
+	MRP_APPLICANT_VO,
+	MRP_APPLICANT_VP,
+	MRP_APPLICANT_VN,
+	MRP_APPLICANT_AN,
+	MRP_APPLICANT_AA,
+	MRP_APPLICANT_QA,
+	MRP_APPLICANT_LA,
+	MRP_APPLICANT_AO,
+	MRP_APPLICANT_QO,
+	MRP_APPLICANT_AP,
+	MRP_APPLICANT_QP,
+	__MRP_APPLICANT_MAX
+};
+#define MRP_APPLICANT_MAX	(__MRP_APPLICANT_MAX - 1)
+
+enum mrp_event {
+	MRP_EVENT_NEW,
+	MRP_EVENT_JOIN,
+	MRP_EVENT_LV,
+	MRP_EVENT_TX,
+	MRP_EVENT_R_NEW,
+	MRP_EVENT_R_JOIN_IN,
+	MRP_EVENT_R_IN,
+	MRP_EVENT_R_JOIN_MT,
+	MRP_EVENT_R_MT,
+	MRP_EVENT_R_LV,
+	MRP_EVENT_R_LA,
+	MRP_EVENT_REDECLARE,
+	MRP_EVENT_PERIODIC,
+	__MRP_EVENT_MAX
+};
+#define MRP_EVENT_MAX		(__MRP_EVENT_MAX - 1)
+
+enum mrp_tx_action {
+	MRP_TX_ACTION_NONE,
+	MRP_TX_ACTION_S_NEW,
+	MRP_TX_ACTION_S_JOIN_IN,
+	MRP_TX_ACTION_S_JOIN_IN_OPTIONAL,
+	MRP_TX_ACTION_S_IN_OPTIONAL,
+	MRP_TX_ACTION_S_LV,
+};
+
+struct mrp_attr {
+	struct rb_node			node;
+	enum mrp_applicant_state	state;
+	u8				type;
+	u8				len;
+	unsigned char			value[];
+};
+
+enum mrp_applications {
+	MRP_APPLICATION_MVRP,
+	__MRP_APPLICATION_MAX
+};
+#define MRP_APPLICATION_MAX	(__MRP_APPLICATION_MAX - 1)
+
+struct mrp_application {
+	enum mrp_applications	type;
+	unsigned int		maxattr;
+	struct packet_type	pkttype;
+	unsigned char		group_address[ETH_ALEN];
+	u8			version;
+};
+
+struct mrp_applicant {
+	struct mrp_application	*app;
+	struct net_device	*dev;
+	struct timer_list	join_timer;
+
+	spinlock_t		lock;
+	struct sk_buff_head	queue;
+	struct sk_buff		*pdu;
+	struct rb_root		mad;
+	struct rcu_head		rcu;
+};
+
+struct mrp_port {
+	struct mrp_applicant __rcu	*applicants[MRP_APPLICATION_MAX + 1];
+	struct rcu_head			rcu;
+};
+
+extern int	mrp_register_application(struct mrp_application *app);
+extern void	mrp_unregister_application(struct mrp_application *app);
+
+extern int	mrp_init_applicant(struct net_device *dev,
+				    struct mrp_application *app);
+extern void	mrp_uninit_applicant(struct net_device *dev,
+				      struct mrp_application *app);
+
+extern int	mrp_request_join(const struct net_device *dev,
+				  const struct mrp_application *app,
+				  const void *value, u8 len, u8 type);
+extern void	mrp_request_leave(const struct net_device *dev,
+				   const struct mrp_application *app,
+				   const void *value, u8 len, u8 type);
+
+#endif /* _NET_MRP_H */
diff --git a/net/802/Kconfig b/net/802/Kconfig
index be33d27c8e69..80d4bf78905d 100644
--- a/net/802/Kconfig
+++ b/net/802/Kconfig
@@ -5,3 +5,6 @@ config STP
 config GARP
 	tristate
 	select STP
+
+config MRP
+	tristate
diff --git a/net/802/Makefile b/net/802/Makefile
index a30d6e385aed..37e654d6615e 100644
--- a/net/802/Makefile
+++ b/net/802/Makefile
@@ -11,3 +11,4 @@ obj-$(CONFIG_IPX)	+= p8022.o psnap.o p8023.o
 obj-$(CONFIG_ATALK)	+= p8022.o psnap.o
 obj-$(CONFIG_STP)	+= stp.o
 obj-$(CONFIG_GARP)	+= garp.o
+obj-$(CONFIG_MRP)	+= mrp.o
diff --git a/net/802/mrp.c b/net/802/mrp.c
new file mode 100644
index 000000000000..47a9e14c8ba7
--- /dev/null
+++ b/net/802/mrp.c
@@ -0,0 +1,895 @@
+/*
+ *	IEEE 802.1Q Multiple Registration Protocol (MRP)
+ *
+ *	Copyright (c) 2012 Massachusetts Institute of Technology
+ *
+ *	Adapted from code in net/802/garp.c
+ *	Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	version 2 as published by the Free Software Foundation.
+ */
+#include <linux/kernel.h>
+#include <linux/timer.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <net/mrp.h>
+#include <asm/unaligned.h>
+
+static unsigned int mrp_join_time __read_mostly = 200;
+module_param(mrp_join_time, uint, 0644);
+MODULE_PARM_DESC(mrp_join_time, "Join time in ms (default 200ms)");
+MODULE_LICENSE("GPL");
+
+static const u8
+mrp_applicant_state_table[MRP_APPLICANT_MAX + 1][MRP_EVENT_MAX + 1] = {
+	[MRP_APPLICANT_VO] = {
+		[MRP_EVENT_NEW]		= MRP_APPLICANT_VN,
+		[MRP_EVENT_JOIN]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_LV]		= MRP_APPLICANT_VO,
+		[MRP_EVENT_TX]		= MRP_APPLICANT_VO,
+		[MRP_EVENT_R_NEW]	= MRP_APPLICANT_VO,
+		[MRP_EVENT_R_JOIN_IN]	= MRP_APPLICANT_AO,
+		[MRP_EVENT_R_IN]	= MRP_APPLICANT_VO,
+		[MRP_EVENT_R_JOIN_MT]	= MRP_APPLICANT_VO,
+		[MRP_EVENT_R_MT]	= MRP_APPLICANT_VO,
+		[MRP_EVENT_R_LV]	= MRP_APPLICANT_VO,
+		[MRP_EVENT_R_LA]	= MRP_APPLICANT_VO,
+		[MRP_EVENT_REDECLARE]	= MRP_APPLICANT_VO,
+		[MRP_EVENT_PERIODIC]	= MRP_APPLICANT_VO,
+	},
+	[MRP_APPLICANT_VP] = {
+		[MRP_EVENT_NEW]		= MRP_APPLICANT_VN,
+		[MRP_EVENT_JOIN]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_LV]		= MRP_APPLICANT_VO,
+		[MRP_EVENT_TX]		= MRP_APPLICANT_AA,
+		[MRP_EVENT_R_NEW]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_R_JOIN_IN]	= MRP_APPLICANT_AP,
+		[MRP_EVENT_R_IN]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_R_JOIN_MT]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_R_MT]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_R_LV]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_R_LA]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_REDECLARE]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_PERIODIC]	= MRP_APPLICANT_VP,
+	},
+	[MRP_APPLICANT_VN] = {
+		[MRP_EVENT_NEW]		= MRP_APPLICANT_VN,
+		[MRP_EVENT_JOIN]	= MRP_APPLICANT_VN,
+		[MRP_EVENT_LV]		= MRP_APPLICANT_LA,
+		[MRP_EVENT_TX]		= MRP_APPLICANT_AN,
+		[MRP_EVENT_R_NEW]	= MRP_APPLICANT_VN,
+		[MRP_EVENT_R_JOIN_IN]	= MRP_APPLICANT_VN,
+		[MRP_EVENT_R_IN]	= MRP_APPLICANT_VN,
+		[MRP_EVENT_R_JOIN_MT]	= MRP_APPLICANT_VN,
+		[MRP_EVENT_R_MT]	= MRP_APPLICANT_VN,
+		[MRP_EVENT_R_LV]	= MRP_APPLICANT_VN,
+		[MRP_EVENT_R_LA]	= MRP_APPLICANT_VN,
+		[MRP_EVENT_REDECLARE]	= MRP_APPLICANT_VN,
+		[MRP_EVENT_PERIODIC]	= MRP_APPLICANT_VN,
+	},
+	[MRP_APPLICANT_AN] = {
+		[MRP_EVENT_NEW]		= MRP_APPLICANT_AN,
+		[MRP_EVENT_JOIN]	= MRP_APPLICANT_AN,
+		[MRP_EVENT_LV]		= MRP_APPLICANT_LA,
+		[MRP_EVENT_TX]		= MRP_APPLICANT_QA,
+		[MRP_EVENT_R_NEW]	= MRP_APPLICANT_AN,
+		[MRP_EVENT_R_JOIN_IN]	= MRP_APPLICANT_AN,
+		[MRP_EVENT_R_IN]	= MRP_APPLICANT_AN,
+		[MRP_EVENT_R_JOIN_MT]	= MRP_APPLICANT_AN,
+		[MRP_EVENT_R_MT]	= MRP_APPLICANT_AN,
+		[MRP_EVENT_R_LV]	= MRP_APPLICANT_VN,
+		[MRP_EVENT_R_LA]	= MRP_APPLICANT_VN,
+		[MRP_EVENT_REDECLARE]	= MRP_APPLICANT_VN,
+		[MRP_EVENT_PERIODIC]	= MRP_APPLICANT_AN,
+	},
+	[MRP_APPLICANT_AA] = {
+		[MRP_EVENT_NEW]		= MRP_APPLICANT_VN,
+		[MRP_EVENT_JOIN]	= MRP_APPLICANT_AA,
+		[MRP_EVENT_LV]		= MRP_APPLICANT_LA,
+		[MRP_EVENT_TX]		= MRP_APPLICANT_QA,
+		[MRP_EVENT_R_NEW]	= MRP_APPLICANT_AA,
+		[MRP_EVENT_R_JOIN_IN]	= MRP_APPLICANT_QA,
+		[MRP_EVENT_R_IN]	= MRP_APPLICANT_AA,
+		[MRP_EVENT_R_JOIN_MT]	= MRP_APPLICANT_AA,
+		[MRP_EVENT_R_MT]	= MRP_APPLICANT_AA,
+		[MRP_EVENT_R_LV]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_R_LA]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_REDECLARE]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_PERIODIC]	= MRP_APPLICANT_AA,
+	},
+	[MRP_APPLICANT_QA] = {
+		[MRP_EVENT_NEW]		= MRP_APPLICANT_VN,
+		[MRP_EVENT_JOIN]	= MRP_APPLICANT_QA,
+		[MRP_EVENT_LV]		= MRP_APPLICANT_LA,
+		[MRP_EVENT_TX]		= MRP_APPLICANT_QA,
+		[MRP_EVENT_R_NEW]	= MRP_APPLICANT_QA,
+		[MRP_EVENT_R_JOIN_IN]	= MRP_APPLICANT_QA,
+		[MRP_EVENT_R_IN]	= MRP_APPLICANT_QA,
+		[MRP_EVENT_R_JOIN_MT]	= MRP_APPLICANT_AA,
+		[MRP_EVENT_R_MT]	= MRP_APPLICANT_AA,
+		[MRP_EVENT_R_LV]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_R_LA]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_REDECLARE]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_PERIODIC]	= MRP_APPLICANT_AA,
+	},
+	[MRP_APPLICANT_LA] = {
+		[MRP_EVENT_NEW]		= MRP_APPLICANT_VN,
+		[MRP_EVENT_JOIN]	= MRP_APPLICANT_AA,
+		[MRP_EVENT_LV]		= MRP_APPLICANT_LA,
+		[MRP_EVENT_TX]		= MRP_APPLICANT_VO,
+		[MRP_EVENT_R_NEW]	= MRP_APPLICANT_LA,
+		[MRP_EVENT_R_JOIN_IN]	= MRP_APPLICANT_LA,
+		[MRP_EVENT_R_IN]	= MRP_APPLICANT_LA,
+		[MRP_EVENT_R_JOIN_MT]	= MRP_APPLICANT_LA,
+		[MRP_EVENT_R_MT]	= MRP_APPLICANT_LA,
+		[MRP_EVENT_R_LV]	= MRP_APPLICANT_LA,
+		[MRP_EVENT_R_LA]	= MRP_APPLICANT_LA,
+		[MRP_EVENT_REDECLARE]	= MRP_APPLICANT_LA,
+		[MRP_EVENT_PERIODIC]	= MRP_APPLICANT_LA,
+	},
+	[MRP_APPLICANT_AO] = {
+		[MRP_EVENT_NEW]		= MRP_APPLICANT_VN,
+		[MRP_EVENT_JOIN]	= MRP_APPLICANT_AP,
+		[MRP_EVENT_LV]		= MRP_APPLICANT_AO,
+		[MRP_EVENT_TX]		= MRP_APPLICANT_AO,
+		[MRP_EVENT_R_NEW]	= MRP_APPLICANT_AO,
+		[MRP_EVENT_R_JOIN_IN]	= MRP_APPLICANT_QO,
+		[MRP_EVENT_R_IN]	= MRP_APPLICANT_AO,
+		[MRP_EVENT_R_JOIN_MT]	= MRP_APPLICANT_AO,
+		[MRP_EVENT_R_MT]	= MRP_APPLICANT_AO,
+		[MRP_EVENT_R_LV]	= MRP_APPLICANT_VO,
+		[MRP_EVENT_R_LA]	= MRP_APPLICANT_VO,
+		[MRP_EVENT_REDECLARE]	= MRP_APPLICANT_VO,
+		[MRP_EVENT_PERIODIC]	= MRP_APPLICANT_AO,
+	},
+	[MRP_APPLICANT_QO] = {
+		[MRP_EVENT_NEW]		= MRP_APPLICANT_VN,
+		[MRP_EVENT_JOIN]	= MRP_APPLICANT_QP,
+		[MRP_EVENT_LV]		= MRP_APPLICANT_QO,
+		[MRP_EVENT_TX]		= MRP_APPLICANT_QO,
+		[MRP_EVENT_R_NEW]	= MRP_APPLICANT_QO,
+		[MRP_EVENT_R_JOIN_IN]	= MRP_APPLICANT_QO,
+		[MRP_EVENT_R_IN]	= MRP_APPLICANT_QO,
+		[MRP_EVENT_R_JOIN_MT]	= MRP_APPLICANT_AO,
+		[MRP_EVENT_R_MT]	= MRP_APPLICANT_AO,
+		[MRP_EVENT_R_LV]	= MRP_APPLICANT_VO,
+		[MRP_EVENT_R_LA]	= MRP_APPLICANT_VO,
+		[MRP_EVENT_REDECLARE]	= MRP_APPLICANT_VO,
+		[MRP_EVENT_PERIODIC]	= MRP_APPLICANT_QO,
+	},
+	[MRP_APPLICANT_AP] = {
+		[MRP_EVENT_NEW]		= MRP_APPLICANT_VN,
+		[MRP_EVENT_JOIN]	= MRP_APPLICANT_AP,
+		[MRP_EVENT_LV]		= MRP_APPLICANT_AO,
+		[MRP_EVENT_TX]		= MRP_APPLICANT_QA,
+		[MRP_EVENT_R_NEW]	= MRP_APPLICANT_AP,
+		[MRP_EVENT_R_JOIN_IN]	= MRP_APPLICANT_QP,
+		[MRP_EVENT_R_IN]	= MRP_APPLICANT_AP,
+		[MRP_EVENT_R_JOIN_MT]	= MRP_APPLICANT_AP,
+		[MRP_EVENT_R_MT]	= MRP_APPLICANT_AP,
+		[MRP_EVENT_R_LV]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_R_LA]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_REDECLARE]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_PERIODIC]	= MRP_APPLICANT_AP,
+	},
+	[MRP_APPLICANT_QP] = {
+		[MRP_EVENT_NEW]		= MRP_APPLICANT_VN,
+		[MRP_EVENT_JOIN]	= MRP_APPLICANT_QP,
+		[MRP_EVENT_LV]		= MRP_APPLICANT_QO,
+		[MRP_EVENT_TX]		= MRP_APPLICANT_QP,
+		[MRP_EVENT_R_NEW]	= MRP_APPLICANT_QP,
+		[MRP_EVENT_R_JOIN_IN]	= MRP_APPLICANT_QP,
+		[MRP_EVENT_R_IN]	= MRP_APPLICANT_QP,
+		[MRP_EVENT_R_JOIN_MT]	= MRP_APPLICANT_AP,
+		[MRP_EVENT_R_MT]	= MRP_APPLICANT_AP,
+		[MRP_EVENT_R_LV]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_R_LA]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_REDECLARE]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_PERIODIC]	= MRP_APPLICANT_AP,
+	},
+};
+
+static const u8
+mrp_tx_action_table[MRP_APPLICANT_MAX + 1] = {
+	[MRP_APPLICANT_VO] = MRP_TX_ACTION_S_IN_OPTIONAL,
+	[MRP_APPLICANT_VP] = MRP_TX_ACTION_S_JOIN_IN,
+	[MRP_APPLICANT_VN] = MRP_TX_ACTION_S_NEW,
+	[MRP_APPLICANT_AN] = MRP_TX_ACTION_S_NEW,
+	[MRP_APPLICANT_AA] = MRP_TX_ACTION_S_JOIN_IN,
+	[MRP_APPLICANT_QA] = MRP_TX_ACTION_S_JOIN_IN_OPTIONAL,
+	[MRP_APPLICANT_LA] = MRP_TX_ACTION_S_LV,
+	[MRP_APPLICANT_AO] = MRP_TX_ACTION_S_IN_OPTIONAL,
+	[MRP_APPLICANT_QO] = MRP_TX_ACTION_S_IN_OPTIONAL,
+	[MRP_APPLICANT_AP] = MRP_TX_ACTION_S_JOIN_IN,
+	[MRP_APPLICANT_QP] = MRP_TX_ACTION_S_IN_OPTIONAL,
+};
+
+static void mrp_attrvalue_inc(void *value, u8 len)
+{
+	u8 *v = (u8 *)value;
+
+	/* Add 1 to the last byte. If it becomes zero,
+	 * go to the previous byte and repeat.
+	 */
+	while (len > 0 && !++v[--len])
+		;
+}
+
+static int mrp_attr_cmp(const struct mrp_attr *attr,
+			 const void *value, u8 len, u8 type)
+{
+	if (attr->type != type)
+		return attr->type - type;
+	if (attr->len != len)
+		return attr->len - len;
+	return memcmp(attr->value, value, len);
+}
+
+static struct mrp_attr *mrp_attr_lookup(const struct mrp_applicant *app,
+					const void *value, u8 len, u8 type)
+{
+	struct rb_node *parent = app->mad.rb_node;
+	struct mrp_attr *attr;
+	int d;
+
+	while (parent) {
+		attr = rb_entry(parent, struct mrp_attr, node);
+		d = mrp_attr_cmp(attr, value, len, type);
+		if (d > 0)
+			parent = parent->rb_left;
+		else if (d < 0)
+			parent = parent->rb_right;
+		else
+			return attr;
+	}
+	return NULL;
+}
+
+static struct mrp_attr *mrp_attr_create(struct mrp_applicant *app,
+					const void *value, u8 len, u8 type)
+{
+	struct rb_node *parent = NULL, **p = &app->mad.rb_node;
+	struct mrp_attr *attr;
+	int d;
+
+	while (*p) {
+		parent = *p;
+		attr = rb_entry(parent, struct mrp_attr, node);
+		d = mrp_attr_cmp(attr, value, len, type);
+		if (d > 0)
+			p = &parent->rb_left;
+		else if (d < 0)
+			p = &parent->rb_right;
+		else {
+			/* The attribute already exists; re-use it. */
+			return attr;
+		}
+	}
+	attr = kmalloc(sizeof(*attr) + len, GFP_ATOMIC);
+	if (!attr)
+		return attr;
+	attr->state = MRP_APPLICANT_VO;
+	attr->type  = type;
+	attr->len   = len;
+	memcpy(attr->value, value, len);
+
+	rb_link_node(&attr->node, parent, p);
+	rb_insert_color(&attr->node, &app->mad);
+	return attr;
+}
+
+static void mrp_attr_destroy(struct mrp_applicant *app, struct mrp_attr *attr)
+{
+	rb_erase(&attr->node, &app->mad);
+	kfree(attr);
+}
+
+static int mrp_pdu_init(struct mrp_applicant *app)
+{
+	struct sk_buff *skb;
+	struct mrp_pdu_hdr *ph;
+
+	skb = alloc_skb(app->dev->mtu + LL_RESERVED_SPACE(app->dev),
+			GFP_ATOMIC);
+	if (!skb)
+		return -ENOMEM;
+
+	skb->dev = app->dev;
+	skb->protocol = app->app->pkttype.type;
+	skb_reserve(skb, LL_RESERVED_SPACE(app->dev));
+	skb_reset_network_header(skb);
+	skb_reset_transport_header(skb);
+
+	ph = (struct mrp_pdu_hdr *)__skb_put(skb, sizeof(*ph));
+	ph->version = app->app->version;
+
+	app->pdu = skb;
+	return 0;
+}
+
+static int mrp_pdu_append_end_mark(struct mrp_applicant *app)
+{
+	__be16 *endmark;
+
+	if (skb_tailroom(app->pdu) < sizeof(*endmark))
+		return -1;
+	endmark = (__be16 *)__skb_put(app->pdu, sizeof(*endmark));
+	put_unaligned(MRP_END_MARK, endmark);
+	return 0;
+}
+
+static void mrp_pdu_queue(struct mrp_applicant *app)
+{
+	if (!app->pdu)
+		return;
+
+	if (mrp_cb(app->pdu)->mh)
+		mrp_pdu_append_end_mark(app);
+	mrp_pdu_append_end_mark(app);
+
+	dev_hard_header(app->pdu, app->dev, ntohs(app->app->pkttype.type),
+			app->app->group_address, app->dev->dev_addr,
+			app->pdu->len);
+
+	skb_queue_tail(&app->queue, app->pdu);
+	app->pdu = NULL;
+}
+
+static void mrp_queue_xmit(struct mrp_applicant *app)
+{
+	struct sk_buff *skb;
+
+	while ((skb = skb_dequeue(&app->queue)))
+		dev_queue_xmit(skb);
+}
+
+static int mrp_pdu_append_msg_hdr(struct mrp_applicant *app,
+				  u8 attrtype, u8 attrlen)
+{
+	struct mrp_msg_hdr *mh;
+
+	if (mrp_cb(app->pdu)->mh) {
+		if (mrp_pdu_append_end_mark(app) < 0)
+			return -1;
+		mrp_cb(app->pdu)->mh = NULL;
+		mrp_cb(app->pdu)->vah = NULL;
+	}
+
+	if (skb_tailroom(app->pdu) < sizeof(*mh))
+		return -1;
+	mh = (struct mrp_msg_hdr *)__skb_put(app->pdu, sizeof(*mh));
+	mh->attrtype = attrtype;
+	mh->attrlen = attrlen;
+	mrp_cb(app->pdu)->mh = mh;
+	return 0;
+}
+
+static int mrp_pdu_append_vecattr_hdr(struct mrp_applicant *app,
+				      const void *firstattrvalue, u8 attrlen)
+{
+	struct mrp_vecattr_hdr *vah;
+
+	if (skb_tailroom(app->pdu) < sizeof(*vah) + attrlen)
+		return -1;
+	vah = (struct mrp_vecattr_hdr *)__skb_put(app->pdu,
+						  sizeof(*vah) + attrlen);
+	put_unaligned(0, &vah->lenflags);
+	memcpy(vah->firstattrvalue, firstattrvalue, attrlen);
+	mrp_cb(app->pdu)->vah = vah;
+	memcpy(mrp_cb(app->pdu)->attrvalue, firstattrvalue, attrlen);
+	return 0;
+}
+
+static int mrp_pdu_append_vecattr_event(struct mrp_applicant *app,
+					const struct mrp_attr *attr,
+					enum mrp_vecattr_event vaevent)
+{
+	u16 len, pos;
+	u8 *vaevents;
+	int err;
+again:
+	if (!app->pdu) {
+		err = mrp_pdu_init(app);
+		if (err < 0)
+			return err;
+	}
+
+	/* If there is no Message header in the PDU, or the Message header is
+	 * for a different attribute type, add an EndMark (if necessary) and a
+	 * new Message header to the PDU.
+	 */
+	if (!mrp_cb(app->pdu)->mh ||
+	    mrp_cb(app->pdu)->mh->attrtype != attr->type ||
+	    mrp_cb(app->pdu)->mh->attrlen != attr->len) {
+		if (mrp_pdu_append_msg_hdr(app, attr->type, attr->len) < 0)
+			goto queue;
+	}
+
+	/* If there is no VectorAttribute header for this Message in the PDU,
+	 * or this attribute's value does not sequentially follow the previous
+	 * attribute's value, add a new VectorAttribute header to the PDU.
+	 */
+	if (!mrp_cb(app->pdu)->vah ||
+	    memcmp(mrp_cb(app->pdu)->attrvalue, attr->value, attr->len)) {
+		if (mrp_pdu_append_vecattr_hdr(app, attr->value, attr->len) < 0)
+			goto queue;
+	}
+
+	len = be16_to_cpu(get_unaligned(&mrp_cb(app->pdu)->vah->lenflags));
+	pos = len % 3;
+
+	/* Events are packed into Vectors in the PDU, three to a byte. Add a
+	 * byte to the end of the Vector if necessary.
+	 */
+	if (!pos) {
+		if (skb_tailroom(app->pdu) < sizeof(u8))
+			goto queue;
+		vaevents = (u8 *)__skb_put(app->pdu, sizeof(u8));
+	} else {
+		vaevents = (u8 *)(skb_tail_pointer(app->pdu) - sizeof(u8));
+	}
+
+	switch (pos) {
+	case 0:
+		*vaevents = vaevent * (__MRP_VECATTR_EVENT_MAX *
+				       __MRP_VECATTR_EVENT_MAX);
+		break;
+	case 1:
+		*vaevents += vaevent * __MRP_VECATTR_EVENT_MAX;
+		break;
+	case 2:
+		*vaevents += vaevent;
+		break;
+	default:
+		WARN_ON(1);
+	}
+
+	/* Increment the length of the VectorAttribute in the PDU, as well as
+	 * the value of the next attribute that would continue its Vector.
+	 */
+	put_unaligned(cpu_to_be16(++len), &mrp_cb(app->pdu)->vah->lenflags);
+	mrp_attrvalue_inc(mrp_cb(app->pdu)->attrvalue, attr->len);
+
+	return 0;
+
+queue:
+	mrp_pdu_queue(app);
+	goto again;
+}
+
+static void mrp_attr_event(struct mrp_applicant *app,
+			   struct mrp_attr *attr, enum mrp_event event)
+{
+	enum mrp_applicant_state state;
+
+	state = mrp_applicant_state_table[attr->state][event];
+	if (state == MRP_APPLICANT_INVALID) {
+		WARN_ON(1);
+		return;
+	}
+
+	if (event == MRP_EVENT_TX) {
+		/* When appending the attribute fails, don't update its state
+		 * in order to retry at the next TX event.
+		 */
+
+		switch (mrp_tx_action_table[attr->state]) {
+		case MRP_TX_ACTION_NONE:
+		case MRP_TX_ACTION_S_JOIN_IN_OPTIONAL:
+		case MRP_TX_ACTION_S_IN_OPTIONAL:
+			break;
+		case MRP_TX_ACTION_S_NEW:
+			if (mrp_pdu_append_vecattr_event(
+				    app, attr, MRP_VECATTR_EVENT_NEW) < 0)
+				return;
+			break;
+		case MRP_TX_ACTION_S_JOIN_IN:
+			if (mrp_pdu_append_vecattr_event(
+				    app, attr, MRP_VECATTR_EVENT_JOIN_IN) < 0)
+				return;
+			break;
+		case MRP_TX_ACTION_S_LV:
+			if (mrp_pdu_append_vecattr_event(
+				    app, attr, MRP_VECATTR_EVENT_LV) < 0)
+				return;
+			/* As a pure applicant, sending a leave message
+			 * implies that the attribute was unregistered and
+			 * can be destroyed.
+			 */
+			mrp_attr_destroy(app, attr);
+			return;
+		default:
+			WARN_ON(1);
+		}
+	}
+
+	attr->state = state;
+}
+
+int mrp_request_join(const struct net_device *dev,
+		     const struct mrp_application *appl,
+		     const void *value, u8 len, u8 type)
+{
+	struct mrp_port *port = rtnl_dereference(dev->mrp_port);
+	struct mrp_applicant *app = rtnl_dereference(
+		port->applicants[appl->type]);
+	struct mrp_attr *attr;
+
+	if (sizeof(struct mrp_skb_cb) + len >
+	    FIELD_SIZEOF(struct sk_buff, cb))
+		return -ENOMEM;
+
+	spin_lock_bh(&app->lock);
+	attr = mrp_attr_create(app, value, len, type);
+	if (!attr) {
+		spin_unlock_bh(&app->lock);
+		return -ENOMEM;
+	}
+	mrp_attr_event(app, attr, MRP_EVENT_JOIN);
+	spin_unlock_bh(&app->lock);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mrp_request_join);
+
+void mrp_request_leave(const struct net_device *dev,
+		       const struct mrp_application *appl,
+		       const void *value, u8 len, u8 type)
+{
+	struct mrp_port *port = rtnl_dereference(dev->mrp_port);
+	struct mrp_applicant *app = rtnl_dereference(
+		port->applicants[appl->type]);
+	struct mrp_attr *attr;
+
+	if (sizeof(struct mrp_skb_cb) + len >
+	    FIELD_SIZEOF(struct sk_buff, cb))
+		return;
+
+	spin_lock_bh(&app->lock);
+	attr = mrp_attr_lookup(app, value, len, type);
+	if (!attr) {
+		spin_unlock_bh(&app->lock);
+		return;
+	}
+	mrp_attr_event(app, attr, MRP_EVENT_LV);
+	spin_unlock_bh(&app->lock);
+}
+EXPORT_SYMBOL_GPL(mrp_request_leave);
+
+static void mrp_mad_event(struct mrp_applicant *app, enum mrp_event event)
+{
+	struct rb_node *node, *next;
+	struct mrp_attr *attr;
+
+	for (node = rb_first(&app->mad);
+	     next = node ? rb_next(node) : NULL, node != NULL;
+	     node = next) {
+		attr = rb_entry(node, struct mrp_attr, node);
+		mrp_attr_event(app, attr, event);
+	}
+}
+
+static void mrp_join_timer_arm(struct mrp_applicant *app)
+{
+	unsigned long delay;
+
+	delay = (u64)msecs_to_jiffies(mrp_join_time) * net_random() >> 32;
+	mod_timer(&app->join_timer, jiffies + delay);
+}
+
+static void mrp_join_timer(unsigned long data)
+{
+	struct mrp_applicant *app = (struct mrp_applicant *)data;
+
+	spin_lock(&app->lock);
+	mrp_mad_event(app, MRP_EVENT_TX);
+	mrp_pdu_queue(app);
+	spin_unlock(&app->lock);
+
+	mrp_queue_xmit(app);
+	mrp_join_timer_arm(app);
+}
+
+static int mrp_pdu_parse_end_mark(struct sk_buff *skb, int *offset)
+{
+	__be16 endmark;
+
+	if (skb_copy_bits(skb, *offset, &endmark, sizeof(endmark)) < 0)
+		return -1;
+	if (endmark == MRP_END_MARK) {
+		*offset += sizeof(endmark);
+		return -1;
+	}
+	return 0;
+}
+
+static void mrp_pdu_parse_vecattr_event(struct mrp_applicant *app,
+					struct sk_buff *skb,
+					enum mrp_vecattr_event vaevent)
+{
+	struct mrp_attr *attr;
+	enum mrp_event event;
+
+	attr = mrp_attr_lookup(app, mrp_cb(skb)->attrvalue,
+			       mrp_cb(skb)->mh->attrlen,
+			       mrp_cb(skb)->mh->attrtype);
+	if (attr == NULL)
+		return;
+
+	switch (vaevent) {
+	case MRP_VECATTR_EVENT_NEW:
+		event = MRP_EVENT_R_NEW;
+		break;
+	case MRP_VECATTR_EVENT_JOIN_IN:
+		event = MRP_EVENT_R_JOIN_IN;
+		break;
+	case MRP_VECATTR_EVENT_IN:
+		event = MRP_EVENT_R_IN;
+		break;
+	case MRP_VECATTR_EVENT_JOIN_MT:
+		event = MRP_EVENT_R_JOIN_MT;
+		break;
+	case MRP_VECATTR_EVENT_MT:
+		event = MRP_EVENT_R_MT;
+		break;
+	case MRP_VECATTR_EVENT_LV:
+		event = MRP_EVENT_R_LV;
+		break;
+	default:
+		return;
+	}
+
+	mrp_attr_event(app, attr, event);
+}
+
+static int mrp_pdu_parse_vecattr(struct mrp_applicant *app,
+				 struct sk_buff *skb, int *offset)
+{
+	struct mrp_vecattr_hdr _vah;
+	u16 valen;
+	u8 vaevents, vaevent;
+
+	mrp_cb(skb)->vah = skb_header_pointer(skb, *offset, sizeof(_vah),
+					      &_vah);
+	if (!mrp_cb(skb)->vah)
+		return -1;
+	*offset += sizeof(_vah);
+
+	if (get_unaligned(&mrp_cb(skb)->vah->lenflags) &
+	    MRP_VECATTR_HDR_FLAG_LA)
+		mrp_mad_event(app, MRP_EVENT_R_LA);
+	valen = be16_to_cpu(get_unaligned(&mrp_cb(skb)->vah->lenflags) &
+			    MRP_VECATTR_HDR_LEN_MASK);
+
+	/* The VectorAttribute structure in a PDU carries event information
+	 * about one or more attributes having consecutive values. Only the
+	 * value for the first attribute is contained in the structure. So
+	 * we make a copy of that value, and then increment it each time we
+	 * advance to the next event in its Vector.
+	 */
+	if (sizeof(struct mrp_skb_cb) + mrp_cb(skb)->mh->attrlen >
+	    FIELD_SIZEOF(struct sk_buff, cb))
+		return -1;
+	if (skb_copy_bits(skb, *offset, mrp_cb(skb)->attrvalue,
+			  mrp_cb(skb)->mh->attrlen) < 0)
+		return -1;
+	*offset += mrp_cb(skb)->mh->attrlen;
+
+	/* In a VectorAttribute, the Vector contains events which are packed
+	 * three to a byte. We process one byte of the Vector at a time.
+	 */
+	while (valen > 0) {
+		if (skb_copy_bits(skb, *offset, &vaevents,
+				  sizeof(vaevents)) < 0)
+			return -1;
+		*offset += sizeof(vaevents);
+
+		/* Extract and process the first event. */
+		vaevent = vaevents / (__MRP_VECATTR_EVENT_MAX *
+				      __MRP_VECATTR_EVENT_MAX);
+		if (vaevent >= __MRP_VECATTR_EVENT_MAX) {
+			/* The byte is malformed; stop processing. */
+			return -1;
+		}
+		mrp_pdu_parse_vecattr_event(app, skb, vaevent);
+
+		/* If present, extract and process the second event. */
+		if (!--valen)
+			break;
+		mrp_attrvalue_inc(mrp_cb(skb)->attrvalue,
+				  mrp_cb(skb)->mh->attrlen);
+		vaevents %= (__MRP_VECATTR_EVENT_MAX *
+			     __MRP_VECATTR_EVENT_MAX);
+		vaevent = vaevents / __MRP_VECATTR_EVENT_MAX;
+		mrp_pdu_parse_vecattr_event(app, skb, vaevent);
+
+		/* If present, extract and process the third event. */
+		if (!--valen)
+			break;
+		mrp_attrvalue_inc(mrp_cb(skb)->attrvalue,
+				  mrp_cb(skb)->mh->attrlen);
+		vaevents %= __MRP_VECATTR_EVENT_MAX;
+		vaevent = vaevents;
+		mrp_pdu_parse_vecattr_event(app, skb, vaevent);
+	}
+	return 0;
+}
+
+static int mrp_pdu_parse_msg(struct mrp_applicant *app, struct sk_buff *skb,
+			     int *offset)
+{
+	struct mrp_msg_hdr _mh;
+
+	mrp_cb(skb)->mh = skb_header_pointer(skb, *offset, sizeof(_mh), &_mh);
+	if (!mrp_cb(skb)->mh)
+		return -1;
+	*offset += sizeof(_mh);
+
+	if (mrp_cb(skb)->mh->attrtype == 0 ||
+	    mrp_cb(skb)->mh->attrtype > app->app->maxattr ||
+	    mrp_cb(skb)->mh->attrlen == 0)
+		return -1;
+
+	while (skb->len > *offset) {
+		if (mrp_pdu_parse_end_mark(skb, offset) < 0)
+			break;
+		if (mrp_pdu_parse_vecattr(app, skb, offset) < 0)
+			return -1;
+	}
+	return 0;
+}
+
+int mrp_rcv(struct sk_buff *skb, struct net_device *dev,
+	    struct packet_type *pt, struct net_device *orig_dev)
+{
+	struct mrp_application *appl = container_of(pt, struct mrp_application,
+						    pkttype);
+	struct mrp_port *port;
+	struct mrp_applicant *app;
+	struct mrp_pdu_hdr _ph;
+	const struct mrp_pdu_hdr *ph;
+	int offset = skb_network_offset(skb);
+
+	/* If the interface is in promiscuous mode, drop the packet if
+	 * it was unicast to another host.
+	 */
+	if (unlikely(skb->pkt_type == PACKET_OTHERHOST))
+		goto out;
+	skb = skb_share_check(skb, GFP_ATOMIC);
+	if (unlikely(!skb))
+		goto out;
+	port = rcu_dereference(dev->mrp_port);
+	if (unlikely(!port))
+		goto out;
+	app = rcu_dereference(port->applicants[appl->type]);
+	if (unlikely(!app))
+		goto out;
+
+	ph = skb_header_pointer(skb, offset, sizeof(_ph), &_ph);
+	if (!ph)
+		goto out;
+	offset += sizeof(_ph);
+
+	if (ph->version != app->app->version)
+		goto out;
+
+	spin_lock(&app->lock);
+	while (skb->len > offset) {
+		if (mrp_pdu_parse_end_mark(skb, &offset) < 0)
+			break;
+		if (mrp_pdu_parse_msg(app, skb, &offset) < 0)
+			break;
+	}
+	spin_unlock(&app->lock);
+out:
+	kfree_skb(skb);
+	return 0;
+}
+
+static int mrp_init_port(struct net_device *dev)
+{
+	struct mrp_port *port;
+
+	port = kzalloc(sizeof(*port), GFP_KERNEL);
+	if (!port)
+		return -ENOMEM;
+	rcu_assign_pointer(dev->mrp_port, port);
+	return 0;
+}
+
+static void mrp_release_port(struct net_device *dev)
+{
+	struct mrp_port *port = rtnl_dereference(dev->mrp_port);
+	unsigned int i;
+
+	for (i = 0; i <= MRP_APPLICATION_MAX; i++) {
+		if (rtnl_dereference(port->applicants[i]))
+			return;
+	}
+	RCU_INIT_POINTER(dev->mrp_port, NULL);
+	kfree_rcu(port, rcu);
+}
+
+int mrp_init_applicant(struct net_device *dev, struct mrp_application *appl)
+{
+	struct mrp_applicant *app;
+	int err;
+
+	ASSERT_RTNL();
+
+	if (!rtnl_dereference(dev->mrp_port)) {
+		err = mrp_init_port(dev);
+		if (err < 0)
+			goto err1;
+	}
+
+	err = -ENOMEM;
+	app = kzalloc(sizeof(*app), GFP_KERNEL);
+	if (!app)
+		goto err2;
+
+	err = dev_mc_add(dev, appl->group_address);
+	if (err < 0)
+		goto err3;
+
+	app->dev = dev;
+	app->app = appl;
+	app->mad = RB_ROOT;
+	spin_lock_init(&app->lock);
+	skb_queue_head_init(&app->queue);
+	rcu_assign_pointer(dev->mrp_port->applicants[appl->type], app);
+	setup_timer(&app->join_timer, mrp_join_timer, (unsigned long)app);
+	mrp_join_timer_arm(app);
+	return 0;
+
+err3:
+	kfree(app);
+err2:
+	mrp_release_port(dev);
+err1:
+	return err;
+}
+EXPORT_SYMBOL_GPL(mrp_init_applicant);
+
+void mrp_uninit_applicant(struct net_device *dev, struct mrp_application *appl)
+{
+	struct mrp_port *port = rtnl_dereference(dev->mrp_port);
+	struct mrp_applicant *app = rtnl_dereference(
+		port->applicants[appl->type]);
+
+	ASSERT_RTNL();
+
+	RCU_INIT_POINTER(port->applicants[appl->type], NULL);
+
+	/* Delete timer and generate a final TX event to flush out
+	 * all pending messages before the applicant is gone.
+	 */
+	del_timer_sync(&app->join_timer);
+	mrp_mad_event(app, MRP_EVENT_TX);
+	mrp_pdu_queue(app);
+	mrp_queue_xmit(app);
+
+	dev_mc_del(dev, appl->group_address);
+	kfree_rcu(app, rcu);
+	mrp_release_port(dev);
+}
+EXPORT_SYMBOL_GPL(mrp_uninit_applicant);
+
+int mrp_register_application(struct mrp_application *appl)
+{
+	appl->pkttype.func = mrp_rcv;
+	dev_add_pack(&appl->pkttype);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mrp_register_application);
+
+void mrp_unregister_application(struct mrp_application *appl)
+{
+	dev_remove_pack(&appl->pkttype);
+}
+EXPORT_SYMBOL_GPL(mrp_unregister_application);
-- 
cgit v1.2.3


From 2c5e89338493882719f8d138f8f2717ee9a04153 Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Sun, 10 Feb 2013 03:50:18 +0000
Subject: ipv6: by default join ff01::1 and in case of forwarding ff01::2 and
 ff05:2

Cc: Erik Hugne <erik.hugne@ericsson.com>
Cc: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/in6.h |  9 +++++++++
 net/ipv6/addrconf.c | 15 +++++++++++++--
 2 files changed, 22 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/in6.h b/include/linux/in6.h
index a16e19349ec0..34edf1f6c9a3 100644
--- a/include/linux/in6.h
+++ b/include/linux/in6.h
@@ -36,4 +36,13 @@ extern const struct in6_addr in6addr_linklocal_allnodes;
 extern const struct in6_addr in6addr_linklocal_allrouters;
 #define IN6ADDR_LINKLOCAL_ALLROUTERS_INIT \
 		{ { { 0xff,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2 } } }
+extern const struct in6_addr in6addr_interfacelocal_allnodes;
+#define IN6ADDR_INTERFACELOCAL_ALLNODES_INIT \
+		{ { { 0xff,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1 } } }
+extern const struct in6_addr in6addr_interfacelocal_allrouters;
+#define IN6ADDR_INTERFACELOCAL_ALLROUTERS_INIT \
+		{ { { 0xff,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2 } } }
+extern const struct in6_addr in6addr_sitelocal_allrouters;
+#define IN6ADDR_SITELOCAL_ALLROUTERS_INIT \
+		{ { { 0xff,5,0,0,0,0,0,0,0,0,0,0,0,0,0,2 } } }
 #endif
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index bd9f9360f769..86c235d05aba 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -244,6 +244,9 @@ const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT;
 const struct in6_addr in6addr_loopback = IN6ADDR_LOOPBACK_INIT;
 const struct in6_addr in6addr_linklocal_allnodes = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
 const struct in6_addr in6addr_linklocal_allrouters = IN6ADDR_LINKLOCAL_ALLROUTERS_INIT;
+const struct in6_addr in6addr_interfacelocal_allnodes = IN6ADDR_INTERFACELOCAL_ALLNODES_INIT;
+const struct in6_addr in6addr_interfacelocal_allrouters = IN6ADDR_INTERFACELOCAL_ALLROUTERS_INIT;
+const struct in6_addr in6addr_sitelocal_allrouters = IN6ADDR_SITELOCAL_ALLROUTERS_INIT;
 
 /* Check if a valid qdisc is available */
 static inline bool addrconf_qdisc_ok(const struct net_device *dev)
@@ -428,6 +431,9 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
 	/* protected by rtnl_lock */
 	rcu_assign_pointer(dev->ip6_ptr, ndev);
 
+	/* Join interface-local all-node multicast group */
+	ipv6_dev_mc_inc(dev, &in6addr_interfacelocal_allnodes);
+
 	/* Join all-node multicast group */
 	ipv6_dev_mc_inc(dev, &in6addr_linklocal_allnodes);
 
@@ -611,10 +617,15 @@ static void dev_forward_change(struct inet6_dev *idev)
 	if (idev->cnf.forwarding)
 		dev_disable_lro(dev);
 	if (dev->flags & IFF_MULTICAST) {
-		if (idev->cnf.forwarding)
+		if (idev->cnf.forwarding) {
 			ipv6_dev_mc_inc(dev, &in6addr_linklocal_allrouters);
-		else
+			ipv6_dev_mc_inc(dev, &in6addr_interfacelocal_allrouters);
+			ipv6_dev_mc_inc(dev, &in6addr_sitelocal_allrouters);
+		} else {
 			ipv6_dev_mc_dec(dev, &in6addr_linklocal_allrouters);
+			ipv6_dev_mc_dec(dev, &in6addr_interfacelocal_allrouters);
+			ipv6_dev_mc_dec(dev, &in6addr_sitelocal_allrouters);
+		}
 	}
 
 	list_for_each_entry(ifa, &idev->addr_list, if_list) {
-- 
cgit v1.2.3


From 57fa8273a246d6abbbfdc995da6293b0040807d8 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Fri, 8 Feb 2013 18:51:05 -0500
Subject: cpuidle: remove vestage definition of cpuidle_state_usage.driver_data

This field is no longer used.

Signed-off-by: Len Brown <len.brown@intel.com>
Acked-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 include/linux/cpuidle.h | 22 ----------------------
 1 file changed, 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index 24cd1037b6d6..480c14dc1ddd 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -32,8 +32,6 @@ struct cpuidle_driver;
  ****************************/
 
 struct cpuidle_state_usage {
-	void		*driver_data;
-
 	unsigned long long	disable;
 	unsigned long long	usage;
 	unsigned long long	time; /* in US */
@@ -62,26 +60,6 @@ struct cpuidle_state {
 
 #define CPUIDLE_DRIVER_FLAGS_MASK (0xFFFF0000)
 
-/**
- * cpuidle_get_statedata - retrieves private driver state data
- * @st_usage: the state usage statistics
- */
-static inline void *cpuidle_get_statedata(struct cpuidle_state_usage *st_usage)
-{
-	return st_usage->driver_data;
-}
-
-/**
- * cpuidle_set_statedata - stores private driver state data
- * @st_usage: the state usage statistics
- * @data: the private data
- */
-static inline void
-cpuidle_set_statedata(struct cpuidle_state_usage *st_usage, void *data)
-{
-	st_usage->driver_data = data;
-}
-
 struct cpuidle_device {
 	unsigned int		registered:1;
 	unsigned int		enabled:1;
-- 
cgit v1.2.3


From 2cde6acd49daca58b96f1fbc697492825511ad31 Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Mon, 11 Feb 2013 10:25:30 +0000
Subject: netpoll: Fix __netpoll_rcu_free so that it can hold the rtnl lock

__netpoll_rcu_free is used to free netpoll structures when the rtnl_lock is
already held.  The mechanism is used to asynchronously call __netpoll_cleanup
outside of the holding of the rtnl_lock, so as to avoid deadlock.
Unfortunately, __netpoll_cleanup modifies pointers (dev->np), which means the
rtnl_lock must be held while calling it.  Further, it cannot be held, because
rcu callbacks may be issued in softirq contexts, which cannot sleep.

Fix this by converting the rcu callback to a work queue that is guaranteed to
get scheduled in process context, so that we can hold the rtnl properly while
calling __netpoll_cleanup

Tested successfully by myself.

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
CC: "David S. Miller" <davem@davemloft.net>
CC: Cong Wang <amwang@redhat.com>
CC: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c |  2 +-
 include/linux/netpoll.h         |  4 ++--
 net/8021q/vlan_dev.c            |  2 +-
 net/bridge/br_device.c          |  2 +-
 net/core/netpoll.c              | 16 ++++++++++------
 5 files changed, 15 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 22399374b1e1..94c1534dd578 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1249,7 +1249,7 @@ static inline void slave_disable_netpoll(struct slave *slave)
 		return;
 
 	slave->np = NULL;
-	__netpoll_free_rcu(np);
+	__netpoll_free_async(np);
 }
 static inline bool slave_dev_support_netpoll(struct net_device *slave_dev)
 {
diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h
index ab856d507b7e..9d7d8c64f7c8 100644
--- a/include/linux/netpoll.h
+++ b/include/linux/netpoll.h
@@ -32,7 +32,7 @@ struct netpoll {
 	u8 remote_mac[ETH_ALEN];
 
 	struct list_head rx; /* rx_np list element */
-	struct rcu_head rcu;
+	struct work_struct cleanup_work;
 };
 
 struct netpoll_info {
@@ -68,7 +68,7 @@ int netpoll_setup(struct netpoll *np);
 int netpoll_trap(void);
 void netpoll_set_trap(int trap);
 void __netpoll_cleanup(struct netpoll *np);
-void __netpoll_free_rcu(struct netpoll *np);
+void __netpoll_free_async(struct netpoll *np);
 void netpoll_cleanup(struct netpoll *np);
 int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo);
 void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 34df5b3c9b75..19cf81bf9f69 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -733,7 +733,7 @@ static void vlan_dev_netpoll_cleanup(struct net_device *dev)
 
 	vlan->netpoll = NULL;
 
-	__netpoll_free_rcu(netpoll);
+	__netpoll_free_async(netpoll);
 }
 #endif /* CONFIG_NET_POLL_CONTROLLER */
 
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index ba6fb2d60940..ca98fa5b2c78 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -265,7 +265,7 @@ void br_netpoll_disable(struct net_bridge_port *p)
 
 	p->np = NULL;
 
-	__netpoll_free_rcu(np);
+	__netpoll_free_async(np);
 }
 
 #endif
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index edcd9ad95304..c536474e2260 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -61,6 +61,7 @@ static struct srcu_struct netpoll_srcu;
 
 static void zap_completion_queue(void);
 static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo);
+static void netpoll_async_cleanup(struct work_struct *work);
 
 static unsigned int carrier_timeout = 4;
 module_param(carrier_timeout, uint, 0644);
@@ -1020,6 +1021,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp)
 
 	np->dev = ndev;
 	strlcpy(np->dev_name, ndev->name, IFNAMSIZ);
+	INIT_WORK(&np->cleanup_work, netpoll_async_cleanup);
 
 	if ((ndev->priv_flags & IFF_DISABLE_NETPOLL) ||
 	    !ndev->netdev_ops->ndo_poll_controller) {
@@ -1255,25 +1257,27 @@ void __netpoll_cleanup(struct netpoll *np)
 		if (ops->ndo_netpoll_cleanup)
 			ops->ndo_netpoll_cleanup(np->dev);
 
-		RCU_INIT_POINTER(np->dev->npinfo, NULL);
+		rcu_assign_pointer(np->dev->npinfo, NULL);
 		call_rcu_bh(&npinfo->rcu, rcu_cleanup_netpoll_info);
 	}
 }
 EXPORT_SYMBOL_GPL(__netpoll_cleanup);
 
-static void rcu_cleanup_netpoll(struct rcu_head *rcu_head)
+static void netpoll_async_cleanup(struct work_struct *work)
 {
-	struct netpoll *np = container_of(rcu_head, struct netpoll, rcu);
+	struct netpoll *np = container_of(work, struct netpoll, cleanup_work);
 
+	rtnl_lock();
 	__netpoll_cleanup(np);
+	rtnl_unlock();
 	kfree(np);
 }
 
-void __netpoll_free_rcu(struct netpoll *np)
+void __netpoll_free_async(struct netpoll *np)
 {
-	call_rcu_bh(&np->rcu, rcu_cleanup_netpoll);
+	schedule_work(&np->cleanup_work);
 }
-EXPORT_SYMBOL_GPL(__netpoll_free_rcu);
+EXPORT_SYMBOL_GPL(__netpoll_free_async);
 
 void netpoll_cleanup(struct netpoll *np)
 {
-- 
cgit v1.2.3


From d9ba8f9e6298af71ec1c1fd3d88c3ef68abd0ec3 Mon Sep 17 00:00:00 2001
From: Mugunthan V N <mugunthanvnm@ti.com>
Date: Mon, 11 Feb 2013 09:52:20 +0000
Subject: driver: net: ethernet: cpsw: dual emac interface implementation

The CPSW switch can act as Dual EMAC by segregating the switch ports
using VLAN and port VLAN as per the TRM description in
14.3.2.10.2 Dual Mac Mode

Following CPSW components will be common for both the interfaces.
* Interrupt source is common for both eth interfaces
* Interrupt pacing is common for both interfaces
* Hardware statistics is common for all the ports
* CPDMA is common for both eth interface
* CPTS is common for both the interface and it should not be enabled on
  both the interface as timestamping information doesn't contain port
  information.

Constrains
* Reserved VID of One port should not be used in other interface which will
  enable switching functionality
* Same VID must not be used in both the interface which will enable switching
  functionality

Signed-off-by: Mugunthan V N <mugunthanvnm@ti.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/cpsw.txt |   2 +
 drivers/net/ethernet/ti/cpsw.c                 | 335 +++++++++++++++++++++----
 include/linux/platform_data/cpsw.h             |   3 +
 3 files changed, 288 insertions(+), 52 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/net/cpsw.txt b/Documentation/devicetree/bindings/net/cpsw.txt
index 6ddd0286a9b7..ecfdf756d10f 100644
--- a/Documentation/devicetree/bindings/net/cpsw.txt
+++ b/Documentation/devicetree/bindings/net/cpsw.txt
@@ -24,6 +24,8 @@ Required properties:
 Optional properties:
 - ti,hwmods		: Must be "cpgmac0"
 - no_bd_ram		: Must be 0 or 1
+- dual_emac		: Specifies Switch to act as Dual EMAC
+- dual_emac_res_vlan	: Specifies VID to be used to segregate the ports
 
 Note: "ti,hwmods" field is used to fetch the base address and irq
 resources from TI, omap hwmod data base during device registration.
diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index 4b964bb02d4c..4ceed6e0f1be 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -122,6 +122,10 @@ do {								\
 #define CPSW_VLAN_AWARE		BIT(1)
 #define CPSW_ALE_VLAN_AWARE	1
 
+#define CPSW_FIFO_NORMAL_MODE		(0 << 15)
+#define CPSW_FIFO_DUAL_MAC_MODE		(1 << 15)
+#define CPSW_FIFO_RATE_LIMIT_MODE	(2 << 15)
+
 #define cpsw_enable_irq(priv)	\
 	do {			\
 		u32 i;		\
@@ -254,7 +258,7 @@ struct cpsw_ss_regs {
 struct cpsw_host_regs {
 	u32	max_blks;
 	u32	blk_cnt;
-	u32	flow_thresh;
+	u32	tx_in_ctl;
 	u32	port_vlan;
 	u32	tx_pri_map;
 	u32	cpdma_tx_pri_map;
@@ -281,6 +285,9 @@ struct cpsw_slave {
 	u32				mac_control;
 	struct cpsw_slave_data		*data;
 	struct phy_device		*phy;
+	struct net_device		*ndev;
+	u32				port_vlan;
+	u32				open_stat;
 };
 
 static inline u32 slave_read(struct cpsw_slave *slave, u32 offset)
@@ -320,15 +327,63 @@ struct cpsw_priv {
 	u32 irqs_table[4];
 	u32 num_irqs;
 	struct cpts *cpts;
+	u32 emac_port;
 };
 
 #define napi_to_priv(napi)	container_of(napi, struct cpsw_priv, napi)
-#define for_each_slave(priv, func, arg...)			\
-	do {							\
-		int idx;					\
-		for (idx = 0; idx < (priv)->data.slaves; idx++)	\
-			(func)((priv)->slaves + idx, ##arg);	\
+#define for_each_slave(priv, func, arg...)				\
+	do {								\
+		int idx;						\
+		if (priv->data.dual_emac)				\
+			(func)((priv)->slaves + priv->emac_port, ##arg);\
+		else							\
+			for (idx = 0; idx < (priv)->data.slaves; idx++)	\
+				(func)((priv)->slaves + idx, ##arg);	\
+	} while (0)
+#define cpsw_get_slave_ndev(priv, __slave_no__)				\
+	(priv->slaves[__slave_no__].ndev)
+#define cpsw_get_slave_priv(priv, __slave_no__)				\
+	((priv->slaves[__slave_no__].ndev) ?				\
+		netdev_priv(priv->slaves[__slave_no__].ndev) : NULL)	\
+
+#define cpsw_dual_emac_src_port_detect(status, priv, ndev, skb)		\
+	do {								\
+		if (!priv->data.dual_emac)				\
+			break;						\
+		if (CPDMA_RX_SOURCE_PORT(status) == 1) {		\
+			ndev = cpsw_get_slave_ndev(priv, 0);		\
+			priv = netdev_priv(ndev);			\
+			skb->dev = ndev;				\
+		} else if (CPDMA_RX_SOURCE_PORT(status) == 2) {		\
+			ndev = cpsw_get_slave_ndev(priv, 1);		\
+			priv = netdev_priv(ndev);			\
+			skb->dev = ndev;				\
+		}							\
 	} while (0)
+#define cpsw_add_mcast(priv, addr)					\
+	do {								\
+		if (priv->data.dual_emac) {				\
+			struct cpsw_slave *slave = priv->slaves +	\
+						priv->emac_port;	\
+			int slave_port = cpsw_get_slave_port(priv,	\
+						slave->slave_num);	\
+			cpsw_ale_add_mcast(priv->ale, addr,		\
+				1 << slave_port | 1 << priv->host_port,	\
+				ALE_VLAN, slave->port_vlan, 0);		\
+		} else {						\
+			cpsw_ale_add_mcast(priv->ale, addr,		\
+				ALE_ALL_PORTS << priv->host_port,	\
+				0, 0, 0);				\
+		}							\
+	} while (0)
+
+static inline int cpsw_get_slave_port(struct cpsw_priv *priv, u32 slave_num)
+{
+	if (priv->host_port == 0)
+		return slave_num + 1;
+	else
+		return slave_num;
+}
 
 static void cpsw_ndo_set_rx_mode(struct net_device *ndev)
 {
@@ -348,8 +403,7 @@ static void cpsw_ndo_set_rx_mode(struct net_device *ndev)
 
 		/* program multicast address list into ALE register */
 		netdev_for_each_mc_addr(ha, ndev) {
-			cpsw_ale_add_mcast(priv->ale, (u8 *)ha->addr,
-				ALE_ALL_PORTS << priv->host_port, 0, 0, 0);
+			cpsw_add_mcast(priv, (u8 *)ha->addr);
 		}
 	}
 }
@@ -396,6 +450,8 @@ void cpsw_rx_handler(void *token, int len, int status)
 	struct cpsw_priv	*priv = netdev_priv(ndev);
 	int			ret = 0;
 
+	cpsw_dual_emac_src_port_detect(status, priv, ndev, skb);
+
 	/* free and bail if we are shutting down */
 	if (unlikely(!netif_running(ndev)) ||
 			unlikely(!netif_carrier_ok(ndev))) {
@@ -437,18 +493,17 @@ static irqreturn_t cpsw_interrupt(int irq, void *dev_id)
 		cpsw_intr_disable(priv);
 		cpsw_disable_irq(priv);
 		napi_schedule(&priv->napi);
+	} else {
+		priv = cpsw_get_slave_priv(priv, 1);
+		if (likely(priv) && likely(netif_running(priv->ndev))) {
+			cpsw_intr_disable(priv);
+			cpsw_disable_irq(priv);
+			napi_schedule(&priv->napi);
+		}
 	}
 	return IRQ_HANDLED;
 }
 
-static inline int cpsw_get_slave_port(struct cpsw_priv *priv, u32 slave_num)
-{
-	if (priv->host_port == 0)
-		return slave_num + 1;
-	else
-		return slave_num;
-}
-
 static int cpsw_poll(struct napi_struct *napi, int budget)
 {
 	struct cpsw_priv	*priv = napi_to_priv(napi);
@@ -566,6 +621,54 @@ static inline int __show_stat(char *buf, int maxlen, const char *name, u32 val)
 				leader + strlen(name), val);
 }
 
+static int cpsw_common_res_usage_state(struct cpsw_priv *priv)
+{
+	u32 i;
+	u32 usage_count = 0;
+
+	if (!priv->data.dual_emac)
+		return 0;
+
+	for (i = 0; i < priv->data.slaves; i++)
+		if (priv->slaves[i].open_stat)
+			usage_count++;
+
+	return usage_count;
+}
+
+static inline int cpsw_tx_packet_submit(struct net_device *ndev,
+			struct cpsw_priv *priv, struct sk_buff *skb)
+{
+	if (!priv->data.dual_emac)
+		return cpdma_chan_submit(priv->txch, skb, skb->data,
+				  skb->len, 0, GFP_KERNEL);
+
+	if (ndev == cpsw_get_slave_ndev(priv, 0))
+		return cpdma_chan_submit(priv->txch, skb, skb->data,
+				  skb->len, 1, GFP_KERNEL);
+	else
+		return cpdma_chan_submit(priv->txch, skb, skb->data,
+				  skb->len, 2, GFP_KERNEL);
+}
+
+static inline void cpsw_add_dual_emac_def_ale_entries(
+		struct cpsw_priv *priv, struct cpsw_slave *slave,
+		u32 slave_port)
+{
+	u32 port_mask = 1 << slave_port | 1 << priv->host_port;
+
+	if (priv->version == CPSW_VERSION_1)
+		slave_write(slave, slave->port_vlan, CPSW1_PORT_VLAN);
+	else
+		slave_write(slave, slave->port_vlan, CPSW2_PORT_VLAN);
+	cpsw_ale_add_vlan(priv->ale, slave->port_vlan, port_mask,
+			  port_mask, port_mask, 0);
+	cpsw_ale_add_mcast(priv->ale, priv->ndev->broadcast,
+			   port_mask, ALE_VLAN, slave->port_vlan, 0);
+	cpsw_ale_add_ucast(priv->ale, priv->mac_addr,
+		priv->host_port, ALE_VLAN, slave->port_vlan);
+}
+
 static void cpsw_slave_open(struct cpsw_slave *slave, struct cpsw_priv *priv)
 {
 	char name[32];
@@ -595,8 +698,11 @@ static void cpsw_slave_open(struct cpsw_slave *slave, struct cpsw_priv *priv)
 
 	slave_port = cpsw_get_slave_port(priv, slave->slave_num);
 
-	cpsw_ale_add_mcast(priv->ale, priv->ndev->broadcast,
-			   1 << slave_port, 0, 0, ALE_MCAST_FWD_2);
+	if (priv->data.dual_emac)
+		cpsw_add_dual_emac_def_ale_entries(priv, slave, slave_port);
+	else
+		cpsw_ale_add_mcast(priv->ale, priv->ndev->broadcast,
+				   1 << slave_port, 0, 0, ALE_MCAST_FWD_2);
 
 	slave->phy = phy_connect(priv->ndev, slave->data->phy_id,
 				 &cpsw_adjust_link, slave->data->phy_if);
@@ -634,6 +740,7 @@ static inline void cpsw_add_default_vlan(struct cpsw_priv *priv)
 static void cpsw_init_host_port(struct cpsw_priv *priv)
 {
 	u32 control_reg;
+	u32 fifo_mode;
 
 	/* soft reset the controller and initialize ale */
 	soft_reset("cpsw", &priv->regs->soft_reset);
@@ -645,6 +752,9 @@ static void cpsw_init_host_port(struct cpsw_priv *priv)
 	control_reg = readl(&priv->regs->control);
 	control_reg |= CPSW_VLAN_AWARE;
 	writel(control_reg, &priv->regs->control);
+	fifo_mode = (priv->data.dual_emac) ? CPSW_FIFO_DUAL_MAC_MODE :
+		     CPSW_FIFO_NORMAL_MODE;
+	writel(fifo_mode, &priv->host_port_regs->tx_in_ctl);
 
 	/* setup host port priority mapping */
 	__raw_writel(CPDMA_TX_PRIORITY_MAP,
@@ -654,9 +764,12 @@ static void cpsw_init_host_port(struct cpsw_priv *priv)
 	cpsw_ale_control_set(priv->ale, priv->host_port,
 			     ALE_PORT_STATE, ALE_PORT_STATE_FORWARD);
 
-	cpsw_ale_add_ucast(priv->ale, priv->mac_addr, priv->host_port, 0, 0);
-	cpsw_ale_add_mcast(priv->ale, priv->ndev->broadcast,
-			   1 << priv->host_port, 0, 0, ALE_MCAST_FWD_2);
+	if (!priv->data.dual_emac) {
+		cpsw_ale_add_ucast(priv->ale, priv->mac_addr, priv->host_port,
+				   0, 0);
+		cpsw_ale_add_mcast(priv->ale, priv->ndev->broadcast,
+				   1 << priv->host_port, 0, 0, ALE_MCAST_FWD_2);
+	}
 }
 
 static int cpsw_ndo_open(struct net_device *ndev)
@@ -665,7 +778,8 @@ static int cpsw_ndo_open(struct net_device *ndev)
 	int i, ret;
 	u32 reg;
 
-	cpsw_intr_disable(priv);
+	if (!cpsw_common_res_usage_state(priv))
+		cpsw_intr_disable(priv);
 	netif_carrier_off(ndev);
 
 	pm_runtime_get_sync(&priv->pdev->dev);
@@ -677,46 +791,54 @@ static int cpsw_ndo_open(struct net_device *ndev)
 		 CPSW_RTL_VERSION(reg));
 
 	/* initialize host and slave ports */
-	cpsw_init_host_port(priv);
+	if (!cpsw_common_res_usage_state(priv))
+		cpsw_init_host_port(priv);
 	for_each_slave(priv, cpsw_slave_open, priv);
 
 	/* Add default VLAN */
-	cpsw_add_default_vlan(priv);
+	if (!priv->data.dual_emac)
+		cpsw_add_default_vlan(priv);
 
-	/* setup tx dma to fixed prio and zero offset */
-	cpdma_control_set(priv->dma, CPDMA_TX_PRIO_FIXED, 1);
-	cpdma_control_set(priv->dma, CPDMA_RX_BUFFER_OFFSET, 0);
+	if (!cpsw_common_res_usage_state(priv)) {
+		/* setup tx dma to fixed prio and zero offset */
+		cpdma_control_set(priv->dma, CPDMA_TX_PRIO_FIXED, 1);
+		cpdma_control_set(priv->dma, CPDMA_RX_BUFFER_OFFSET, 0);
 
-	/* disable priority elevation and enable statistics on all ports */
-	__raw_writel(0, &priv->regs->ptype);
+		/* disable priority elevation */
+		__raw_writel(0, &priv->regs->ptype);
 
-	/* enable statistics collection only on the host port */
-	__raw_writel(0x7, &priv->regs->stat_port_en);
+		/* enable statistics collection only on all ports */
+		__raw_writel(0x7, &priv->regs->stat_port_en);
 
-	if (WARN_ON(!priv->data.rx_descs))
-		priv->data.rx_descs = 128;
+		if (WARN_ON(!priv->data.rx_descs))
+			priv->data.rx_descs = 128;
 
-	for (i = 0; i < priv->data.rx_descs; i++) {
-		struct sk_buff *skb;
+		for (i = 0; i < priv->data.rx_descs; i++) {
+			struct sk_buff *skb;
 
-		ret = -ENOMEM;
-		skb = netdev_alloc_skb_ip_align(priv->ndev,
-						priv->rx_packet_max);
-		if (!skb)
-			break;
-		ret = cpdma_chan_submit(priv->rxch, skb, skb->data,
+			ret = -ENOMEM;
+			skb = netdev_alloc_skb_ip_align(priv->ndev,
+							priv->rx_packet_max);
+			if (!skb)
+				break;
+			ret = cpdma_chan_submit(priv->rxch, skb, skb->data,
 					skb_tailroom(skb), 0, GFP_KERNEL);
-		if (WARN_ON(ret < 0))
-			break;
+			if (WARN_ON(ret < 0))
+				break;
+		}
+		/* continue even if we didn't manage to submit all
+		 * receive descs
+		 */
+		cpsw_info(priv, ifup, "submitted %d rx descriptors\n", i);
 	}
-	/* continue even if we didn't manage to submit all receive descs */
-	cpsw_info(priv, ifup, "submitted %d rx descriptors\n", i);
 
 	cpdma_ctlr_start(priv->dma);
 	cpsw_intr_enable(priv);
 	napi_enable(&priv->napi);
 	cpdma_ctlr_eoi(priv->dma);
 
+	if (priv->data.dual_emac)
+		priv->slaves[priv->emac_port].open_stat = true;
 	return 0;
 }
 
@@ -737,12 +859,17 @@ static int cpsw_ndo_stop(struct net_device *ndev)
 	netif_stop_queue(priv->ndev);
 	napi_disable(&priv->napi);
 	netif_carrier_off(priv->ndev);
-	cpsw_intr_disable(priv);
-	cpdma_ctlr_int_ctrl(priv->dma, false);
-	cpdma_ctlr_stop(priv->dma);
-	cpsw_ale_stop(priv->ale);
+
+	if (cpsw_common_res_usage_state(priv) <= 1) {
+		cpsw_intr_disable(priv);
+		cpdma_ctlr_int_ctrl(priv->dma, false);
+		cpdma_ctlr_stop(priv->dma);
+		cpsw_ale_stop(priv->ale);
+	}
 	for_each_slave(priv, cpsw_slave_stop, priv);
 	pm_runtime_put_sync(&priv->pdev->dev);
+	if (priv->data.dual_emac)
+		priv->slaves[priv->emac_port].open_stat = false;
 	return 0;
 }
 
@@ -766,8 +893,7 @@ static netdev_tx_t cpsw_ndo_start_xmit(struct sk_buff *skb,
 
 	skb_tx_timestamp(skb);
 
-	ret = cpdma_chan_submit(priv->txch, skb, skb->data,
-				skb->len, 0, GFP_KERNEL);
+	ret = cpsw_tx_packet_submit(ndev, priv, skb);
 	if (unlikely(ret != 0)) {
 		cpsw_err(priv, tx_err, "desc submit failed\n");
 		goto fail;
@@ -836,9 +962,14 @@ static void cpsw_hwtstamp_v1(struct cpsw_priv *priv)
 
 static void cpsw_hwtstamp_v2(struct cpsw_priv *priv)
 {
-	struct cpsw_slave *slave = &priv->slaves[priv->data.cpts_active_slave];
+	struct cpsw_slave *slave;
 	u32 ctrl, mtype;
 
+	if (priv->data.dual_emac)
+		slave = &priv->slaves[priv->emac_port];
+	else
+		slave = &priv->slaves[priv->data.cpts_active_slave];
+
 	ctrl = slave_read(slave, CPSW2_CONTROL);
 	ctrl &= ~CTRL_ALL_TS_MASK;
 
@@ -1124,6 +1255,7 @@ static void cpsw_slave_init(struct cpsw_slave *slave, struct cpsw_priv *priv,
 	slave->data	= data;
 	slave->regs	= regs + slave_reg_ofs;
 	slave->sliver	= regs + sliver_reg_ofs;
+	slave->port_vlan = data->dual_emac_res_vlan;
 }
 
 static int cpsw_probe_dt(struct cpsw_platform_data *data,
@@ -1204,6 +1336,9 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data,
 	}
 	data->mac_control = prop;
 
+	if (!of_property_read_u32(node, "dual_emac", &prop))
+		data->dual_emac = prop;
+
 	/*
 	 * Populate all the child nodes here...
 	 */
@@ -1237,6 +1372,18 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data,
 		if (mac_addr)
 			memcpy(slave_data->mac_addr, mac_addr, ETH_ALEN);
 
+		if (data->dual_emac) {
+			if (of_property_read_u32(node, "dual_emac_res_vlan",
+						 &prop)) {
+				pr_err("Missing dual_emac_res_vlan in DT.\n");
+				slave_data->dual_emac_res_vlan = i+1;
+				pr_err("Using %d as Reserved VLAN for %d slave\n",
+				       slave_data->dual_emac_res_vlan, i);
+			} else {
+				slave_data->dual_emac_res_vlan = prop;
+			}
+		}
+
 		i++;
 	}
 
@@ -1247,6 +1394,79 @@ error_ret:
 	return ret;
 }
 
+static int cpsw_probe_dual_emac(struct platform_device *pdev,
+				struct cpsw_priv *priv)
+{
+	struct cpsw_platform_data	*data = &priv->data;
+	struct net_device		*ndev;
+	struct cpsw_priv		*priv_sl2;
+	int ret = 0, i;
+
+	ndev = alloc_etherdev(sizeof(struct cpsw_priv));
+	if (!ndev) {
+		pr_err("cpsw: error allocating net_device\n");
+		return -ENOMEM;
+	}
+
+	priv_sl2 = netdev_priv(ndev);
+	spin_lock_init(&priv_sl2->lock);
+	priv_sl2->data = *data;
+	priv_sl2->pdev = pdev;
+	priv_sl2->ndev = ndev;
+	priv_sl2->dev  = &ndev->dev;
+	priv_sl2->msg_enable = netif_msg_init(debug_level, CPSW_DEBUG);
+	priv_sl2->rx_packet_max = max(rx_packet_max, 128);
+
+	if (is_valid_ether_addr(data->slave_data[1].mac_addr)) {
+		memcpy(priv_sl2->mac_addr, data->slave_data[1].mac_addr,
+			ETH_ALEN);
+		pr_info("cpsw: Detected MACID = %pM\n", priv_sl2->mac_addr);
+	} else {
+		random_ether_addr(priv_sl2->mac_addr);
+		pr_info("cpsw: Random MACID = %pM\n", priv_sl2->mac_addr);
+	}
+	memcpy(ndev->dev_addr, priv_sl2->mac_addr, ETH_ALEN);
+
+	priv_sl2->slaves = priv->slaves;
+	priv_sl2->clk = priv->clk;
+
+	priv_sl2->cpsw_res = priv->cpsw_res;
+	priv_sl2->regs = priv->regs;
+	priv_sl2->host_port = priv->host_port;
+	priv_sl2->host_port_regs = priv->host_port_regs;
+	priv_sl2->wr_regs = priv->wr_regs;
+	priv_sl2->dma = priv->dma;
+	priv_sl2->txch = priv->txch;
+	priv_sl2->rxch = priv->rxch;
+	priv_sl2->ale = priv->ale;
+	priv_sl2->emac_port = 1;
+	priv->slaves[1].ndev = ndev;
+	priv_sl2->cpts = priv->cpts;
+	priv_sl2->version = priv->version;
+
+	for (i = 0; i < priv->num_irqs; i++) {
+		priv_sl2->irqs_table[i] = priv->irqs_table[i];
+		priv_sl2->num_irqs = priv->num_irqs;
+	}
+
+	ndev->features |= NETIF_F_HW_VLAN_FILTER;
+
+	ndev->netdev_ops = &cpsw_netdev_ops;
+	SET_ETHTOOL_OPS(ndev, &cpsw_ethtool_ops);
+	netif_napi_add(ndev, &priv_sl2->napi, cpsw_poll, CPSW_POLL_WEIGHT);
+
+	/* register the network device */
+	SET_NETDEV_DEV(ndev, &pdev->dev);
+	ret = register_netdev(ndev);
+	if (ret) {
+		pr_err("cpsw: error registering net device\n");
+		free_netdev(ndev);
+		ret = -ENODEV;
+	}
+
+	return ret;
+}
+
 static int cpsw_probe(struct platform_device *pdev)
 {
 	struct cpsw_platform_data	*data = pdev->dev.platform_data;
@@ -1310,6 +1530,9 @@ static int cpsw_probe(struct platform_device *pdev)
 	for (i = 0; i < data->slaves; i++)
 		priv->slaves[i].slave_num = i;
 
+	priv->slaves[0].ndev = ndev;
+	priv->emac_port = 0;
+
 	priv->clk = clk_get(&pdev->dev, "fck");
 	if (IS_ERR(priv->clk)) {
 		dev_err(&pdev->dev, "fck is not found\n");
@@ -1484,6 +1707,14 @@ static int cpsw_probe(struct platform_device *pdev)
 	cpsw_notice(priv, probe, "initialized device (regs %x, irq %d)\n",
 		  priv->cpsw_res->start, ndev->irq);
 
+	if (priv->data.dual_emac) {
+		ret = cpsw_probe_dual_emac(pdev, priv);
+		if (ret) {
+			cpsw_err(priv, probe, "error probe slave 2 emac interface\n");
+			goto clean_irq_ret;
+		}
+	}
+
 	return 0;
 
 clean_irq_ret:
diff --git a/include/linux/platform_data/cpsw.h b/include/linux/platform_data/cpsw.h
index e962cfd552e3..798fb80b024b 100644
--- a/include/linux/platform_data/cpsw.h
+++ b/include/linux/platform_data/cpsw.h
@@ -21,6 +21,8 @@ struct cpsw_slave_data {
 	char		phy_id[MII_BUS_ID_SIZE];
 	int		phy_if;
 	u8		mac_addr[ETH_ALEN];
+	u16		dual_emac_res_vlan;	/* Reserved VLAN for DualEMAC */
+
 };
 
 struct cpsw_platform_data {
@@ -36,6 +38,7 @@ struct cpsw_platform_data {
 	u32	rx_descs;	/* Number of Rx Descriptios */
 	u32	mac_control;	/* Mac control register */
 	u16	default_vlan;	/* Def VLAN for ALE lookup in VLAN aware mode*/
+	bool	dual_emac;	/* Enable Dual EMAC mode */
 };
 
 #endif /* __CPSW_H__ */
-- 
cgit v1.2.3


From bd69f73f2c81eed9a398708b8c4bb3409ba1b0f9 Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Sun, 10 Feb 2013 22:57:21 +0000
Subject: of: Create function for counting number of phandles in a property

This patch creates of_count_phandle_with_args(), a new function for
counting the number of phandle+argument tuples in a given property. This
is better than the existing method of parsing each phandle individually
until parsing fails which is a horribly slow way to do the count.

Tested on ARM using the selftest code.

v3: - Rebased on top of selftest code cleanup patch
v2: - fix bug where of_parse_phandle_with_args() could behave like _count_.
    - made of_gpio_named_count() into a static inline regardless of CONFIG_OF_GPIO

Tested-by: Andreas Larsson <andreas@gaisler.com>
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
Cc: Linus Walleij <linus.walleij@linaro.org>
Cc: Rob Herring <rob.herring@calxeda.com>
---
 drivers/of/base.c     | 41 +++++++++++++++++++++++++++++++++++++----
 drivers/of/selftest.c | 15 +++++++++++++++
 include/linux/of.h    |  9 +++++++++
 3 files changed, 61 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/of/base.c b/drivers/of/base.c
index c5572cb87a88..321d3ef05006 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -1091,9 +1091,10 @@ EXPORT_SYMBOL(of_parse_phandle);
  * To get a device_node of the `node2' node you may call this:
  * of_parse_phandle_with_args(node3, "list", "#list-cells", 1, &args);
  */
-int of_parse_phandle_with_args(const struct device_node *np, const char *list_name,
-				const char *cells_name, int index,
-				struct of_phandle_args *out_args)
+static int __of_parse_phandle_with_args(const struct device_node *np,
+					const char *list_name,
+					const char *cells_name, int index,
+					struct of_phandle_args *out_args)
 {
 	const __be32 *list, *list_end;
 	int rc = 0, size, cur_index = 0;
@@ -1183,15 +1184,47 @@ int of_parse_phandle_with_args(const struct device_node *np, const char *list_na
 	 * Unlock node before returning result; will be one of:
 	 * -ENOENT : index is for empty phandle
 	 * -EINVAL : parsing error on data
+	 * [1..n]  : Number of phandle (count mode; when index = -1)
 	 */
-	rc = -ENOENT;
+	rc = index < 0 ? cur_index : -ENOENT;
  err:
 	if (node)
 		of_node_put(node);
 	return rc;
 }
+
+int of_parse_phandle_with_args(const struct device_node *np, const char *list_name,
+				const char *cells_name, int index,
+				struct of_phandle_args *out_args)
+{
+	if (index < 0)
+		return -EINVAL;
+	return __of_parse_phandle_with_args(np, list_name, cells_name, index, out_args);
+}
 EXPORT_SYMBOL(of_parse_phandle_with_args);
 
+/**
+ * of_count_phandle_with_args() - Find the number of phandles references in a property
+ * @np:		pointer to a device tree node containing a list
+ * @list_name:	property name that contains a list
+ * @cells_name:	property name that specifies phandles' arguments count
+ *
+ * Returns the number of phandle + argument tuples within a property. It
+ * is a typical pattern to encode a list of phandle and variable
+ * arguments into a single property. The number of arguments is encoded
+ * by a property in the phandle-target node. For example, a gpios
+ * property would contain a list of GPIO specifies consisting of a
+ * phandle and 1 or more arguments. The number of arguments are
+ * determined by the #gpio-cells property in the node pointed to by the
+ * phandle.
+ */
+int of_count_phandle_with_args(const struct device_node *np, const char *list_name,
+				const char *cells_name)
+{
+	return __of_parse_phandle_with_args(np, list_name, cells_name, -1, NULL);
+}
+EXPORT_SYMBOL(of_count_phandle_with_args);
+
 #if defined(CONFIG_OF_DYNAMIC)
 static int of_property_notify(int action, struct device_node *np,
 			      struct property *prop)
diff --git a/drivers/of/selftest.c b/drivers/of/selftest.c
index 01b4174a3b46..0eb5c38b4e07 100644
--- a/drivers/of/selftest.c
+++ b/drivers/of/selftest.c
@@ -36,6 +36,9 @@ static void __init of_selftest_parse_phandle_with_args(void)
 		return;
 	}
 
+	rc = of_count_phandle_with_args(np, "phandle-list", "#phandle-cells");
+	selftest(rc == 7, "of_count_phandle_with_args() returned %i, expected 7\n", rc);
+
 	for (i = 0; i < 8; i++) {
 		bool passed = true;
 		rc = of_parse_phandle_with_args(np, "phandle-list",
@@ -94,21 +97,33 @@ static void __init of_selftest_parse_phandle_with_args(void)
 	rc = of_parse_phandle_with_args(np, "phandle-list-missing",
 					"#phandle-cells", 0, &args);
 	selftest(rc == -ENOENT, "expected:%i got:%i\n", -ENOENT, rc);
+	rc = of_count_phandle_with_args(np, "phandle-list-missing",
+					"#phandle-cells");
+	selftest(rc == -ENOENT, "expected:%i got:%i\n", -ENOENT, rc);
 
 	/* Check for missing cells property */
 	rc = of_parse_phandle_with_args(np, "phandle-list",
 					"#phandle-cells-missing", 0, &args);
 	selftest(rc == -EINVAL, "expected:%i got:%i\n", -EINVAL, rc);
+	rc = of_count_phandle_with_args(np, "phandle-list",
+					"#phandle-cells-missing");
+	selftest(rc == -EINVAL, "expected:%i got:%i\n", -EINVAL, rc);
 
 	/* Check for bad phandle in list */
 	rc = of_parse_phandle_with_args(np, "phandle-list-bad-phandle",
 					"#phandle-cells", 0, &args);
 	selftest(rc == -EINVAL, "expected:%i got:%i\n", -EINVAL, rc);
+	rc = of_count_phandle_with_args(np, "phandle-list-bad-phandle",
+					"#phandle-cells");
+	selftest(rc == -EINVAL, "expected:%i got:%i\n", -EINVAL, rc);
 
 	/* Check for incorrectly formed argument list */
 	rc = of_parse_phandle_with_args(np, "phandle-list-bad-args",
 					"#phandle-cells", 1, &args);
 	selftest(rc == -EINVAL, "expected:%i got:%i\n", -EINVAL, rc);
+	rc = of_count_phandle_with_args(np, "phandle-list-bad-args",
+					"#phandle-cells");
+	selftest(rc == -EINVAL, "expected:%i got:%i\n", -EINVAL, rc);
 }
 
 static void __init of_selftest_property_match_string(void)
diff --git a/include/linux/of.h b/include/linux/of.h
index b9e1b911f0eb..a0f129284948 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -277,6 +277,8 @@ extern struct device_node *of_parse_phandle(const struct device_node *np,
 extern int of_parse_phandle_with_args(const struct device_node *np,
 	const char *list_name, const char *cells_name, int index,
 	struct of_phandle_args *out_args);
+extern int of_count_phandle_with_args(const struct device_node *np,
+	const char *list_name, const char *cells_name);
 
 extern void of_alias_scan(void * (*dt_alloc)(u64 size, u64 align));
 extern int of_alias_get_id(struct device_node *np, const char *stem);
@@ -467,6 +469,13 @@ static inline int of_parse_phandle_with_args(struct device_node *np,
 	return -ENOSYS;
 }
 
+static inline int of_count_phandle_with_args(struct device_node *np,
+					     const char *list_name,
+					     const char *cells_name)
+{
+	return -ENOSYS;
+}
+
 static inline int of_alias_get_id(struct device_node *np, const char *stem)
 {
 	return -ENOSYS;
-- 
cgit v1.2.3


From e80beb27d2f81a1c3c8887e0e0a82d77bb392d28 Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Tue, 12 Feb 2013 17:48:37 +0000
Subject: gpio: Make of_count_named_gpios() use new
 of_count_phandle_with_args()

This patch replaces the horribly coded of_count_named_gpios() with a
call to of_count_phandle_with_args() which is far more efficient. This
also changes the return value of of_gpio_count() & of_gpio_named_count()
from 'unsigned int' to 'int' so that it can return an error code. All
the users of that function are fixed up to correctly handle a negative
return value.

v2: Split GPIO portion into a separate patch

Tested-by: Andreas Larsson <andreas@gaisler.com>
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
Cc: Linus Walleij <linus.walleij@linaro.org>
Cc: Rob Herring <rob.herring@calxeda.com>
---
 drivers/gpio/gpiolib-of.c              | 35 -----------------------------
 drivers/hwmon/gpio-fan.c               |  4 ++--
 drivers/input/keyboard/matrix_keypad.c |  8 +++----
 drivers/net/phy/mdio-mux-gpio.c        |  4 ++--
 drivers/spi/spi-fsl-spi.c              |  4 ++--
 drivers/spi/spi-oc-tiny.c              |  8 +++----
 drivers/spi/spi-ppc4xx.c               |  4 ++--
 drivers/spi/spi.c                      |  5 ++---
 include/linux/of_gpio.h                | 40 +++++++++++++++++++---------------
 9 files changed, 41 insertions(+), 71 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c
index d542a141811a..dd8a2129222f 100644
--- a/drivers/gpio/gpiolib-of.c
+++ b/drivers/gpio/gpiolib-of.c
@@ -88,41 +88,6 @@ int of_get_named_gpio_flags(struct device_node *np, const char *propname,
 }
 EXPORT_SYMBOL(of_get_named_gpio_flags);
 
-/**
- * of_gpio_named_count - Count GPIOs for a device
- * @np:		device node to count GPIOs for
- * @propname:	property name containing gpio specifier(s)
- *
- * The function returns the count of GPIOs specified for a node.
- *
- * Note that the empty GPIO specifiers counts too. For example,
- *
- * gpios = <0
- *          &pio1 1 2
- *          0
- *          &pio2 3 4>;
- *
- * defines four GPIOs (so this function will return 4), two of which
- * are not specified.
- */
-unsigned int of_gpio_named_count(struct device_node *np, const char* propname)
-{
-	unsigned int cnt = 0;
-
-	do {
-		int ret;
-
-		ret = of_parse_phandle_with_args(np, propname, "#gpio-cells",
-						 cnt, NULL);
-		/* A hole in the gpios = <> counts anyway. */
-		if (ret < 0 && ret != -EEXIST)
-			break;
-	} while (++cnt);
-
-	return cnt;
-}
-EXPORT_SYMBOL(of_gpio_named_count);
-
 /**
  * of_gpio_simple_xlate - translate gpio_spec to the GPIO number and flags
  * @gc:		pointer to the gpio_chip structure
diff --git a/drivers/hwmon/gpio-fan.c b/drivers/hwmon/gpio-fan.c
index 4e04c1228e51..39781945a5d2 100644
--- a/drivers/hwmon/gpio-fan.c
+++ b/drivers/hwmon/gpio-fan.c
@@ -422,7 +422,7 @@ static int gpio_fan_get_of_pdata(struct device *dev,
 
 	/* Fill GPIO pin array */
 	pdata->num_ctrl = of_gpio_count(node);
-	if (!pdata->num_ctrl) {
+	if (pdata->num_ctrl <= 0) {
 		dev_err(dev, "gpios DT property empty / missing");
 		return -ENODEV;
 	}
@@ -477,7 +477,7 @@ static int gpio_fan_get_of_pdata(struct device *dev,
 	pdata->speed = speed;
 
 	/* Alarm GPIO if one exists */
-	if (of_gpio_named_count(node, "alarm-gpios")) {
+	if (of_gpio_named_count(node, "alarm-gpios") > 0) {
 		struct gpio_fan_alarm *alarm;
 		int val;
 		enum of_gpio_flags flags;
diff --git a/drivers/input/keyboard/matrix_keypad.c b/drivers/input/keyboard/matrix_keypad.c
index f4ff0dda7597..71d77192ac1e 100644
--- a/drivers/input/keyboard/matrix_keypad.c
+++ b/drivers/input/keyboard/matrix_keypad.c
@@ -403,7 +403,7 @@ matrix_keypad_parse_dt(struct device *dev)
 	struct matrix_keypad_platform_data *pdata;
 	struct device_node *np = dev->of_node;
 	unsigned int *gpios;
-	int i;
+	int i, nrow, ncol;
 
 	if (!np) {
 		dev_err(dev, "device lacks DT data\n");
@@ -416,9 +416,9 @@ matrix_keypad_parse_dt(struct device *dev)
 		return ERR_PTR(-ENOMEM);
 	}
 
-	pdata->num_row_gpios = of_gpio_named_count(np, "row-gpios");
-	pdata->num_col_gpios = of_gpio_named_count(np, "col-gpios");
-	if (!pdata->num_row_gpios || !pdata->num_col_gpios) {
+	pdata->num_row_gpios = nrow = of_gpio_named_count(np, "row-gpios");
+	pdata->num_col_gpios = ncol = of_gpio_named_count(np, "col-gpios");
+	if (nrow <= 0 || ncol <= 0) {
 		dev_err(dev, "number of keypad rows/columns not specified\n");
 		return ERR_PTR(-EINVAL);
 	}
diff --git a/drivers/net/phy/mdio-mux-gpio.c b/drivers/net/phy/mdio-mux-gpio.c
index 0c9accb1c14f..e91d7d736ae2 100644
--- a/drivers/net/phy/mdio-mux-gpio.c
+++ b/drivers/net/phy/mdio-mux-gpio.c
@@ -53,7 +53,7 @@ static int mdio_mux_gpio_probe(struct platform_device *pdev)
 {
 	enum of_gpio_flags f;
 	struct mdio_mux_gpio_state *s;
-	unsigned int num_gpios;
+	int num_gpios;
 	unsigned int n;
 	int r;
 
@@ -61,7 +61,7 @@ static int mdio_mux_gpio_probe(struct platform_device *pdev)
 		return -ENODEV;
 
 	num_gpios = of_gpio_count(pdev->dev.of_node);
-	if (num_gpios == 0 || num_gpios > MDIO_MUX_GPIO_MAX_BITS)
+	if (num_gpios <= 0 || num_gpios > MDIO_MUX_GPIO_MAX_BITS)
 		return -ENODEV;
 
 	s = devm_kzalloc(&pdev->dev, sizeof(*s), GFP_KERNEL);
diff --git a/drivers/spi/spi-fsl-spi.c b/drivers/spi/spi-fsl-spi.c
index 1a7f6359d998..086a9eef2e05 100644
--- a/drivers/spi/spi-fsl-spi.c
+++ b/drivers/spi/spi-fsl-spi.c
@@ -947,12 +947,12 @@ static int of_fsl_spi_get_chipselects(struct device *dev)
 	struct device_node *np = dev->of_node;
 	struct fsl_spi_platform_data *pdata = dev->platform_data;
 	struct mpc8xxx_spi_probe_info *pinfo = to_of_pinfo(pdata);
-	unsigned int ngpios;
+	int ngpios;
 	int i = 0;
 	int ret;
 
 	ngpios = of_gpio_count(np);
-	if (!ngpios) {
+	if (ngpios <= 0) {
 		/*
 		 * SPI w/o chip-select line. One SPI device is still permitted
 		 * though.
diff --git a/drivers/spi/spi-oc-tiny.c b/drivers/spi/spi-oc-tiny.c
index 432e66ec3088..cb2e284bd814 100644
--- a/drivers/spi/spi-oc-tiny.c
+++ b/drivers/spi/spi-oc-tiny.c
@@ -54,7 +54,7 @@ struct tiny_spi {
 	unsigned int txc, rxc;
 	const u8 *txp;
 	u8 *rxp;
-	unsigned int gpio_cs_count;
+	int gpio_cs_count;
 	int *gpio_cs;
 };
 
@@ -74,7 +74,7 @@ static void tiny_spi_chipselect(struct spi_device *spi, int is_active)
 {
 	struct tiny_spi *hw = tiny_spi_to_hw(spi);
 
-	if (hw->gpio_cs_count) {
+	if (hw->gpio_cs_count > 0) {
 		gpio_set_value(hw->gpio_cs[spi->chip_select],
 			(spi->mode & SPI_CS_HIGH) ? is_active : !is_active);
 	}
@@ -254,7 +254,7 @@ static int tiny_spi_of_probe(struct platform_device *pdev)
 	if (!np)
 		return 0;
 	hw->gpio_cs_count = of_gpio_count(np);
-	if (hw->gpio_cs_count) {
+	if (hw->gpio_cs_count > 0) {
 		hw->gpio_cs = devm_kzalloc(&pdev->dev,
 				hw->gpio_cs_count * sizeof(unsigned int),
 				GFP_KERNEL);
@@ -352,7 +352,7 @@ static int tiny_spi_probe(struct platform_device *pdev)
 			goto exit_gpio;
 		gpio_direction_output(hw->gpio_cs[i], 1);
 	}
-	hw->bitbang.master->num_chipselect = max(1U, hw->gpio_cs_count);
+	hw->bitbang.master->num_chipselect = max(1, hw->gpio_cs_count);
 
 	/* register our spi controller */
 	err = spi_bitbang_start(&hw->bitbang);
diff --git a/drivers/spi/spi-ppc4xx.c b/drivers/spi/spi-ppc4xx.c
index 7a85f22b6474..af3e6e756dc9 100644
--- a/drivers/spi/spi-ppc4xx.c
+++ b/drivers/spi/spi-ppc4xx.c
@@ -419,7 +419,7 @@ static int __init spi_ppc4xx_of_probe(struct platform_device *op)
 	 * This includes both "null" gpio's and real ones.
 	 */
 	num_gpios = of_gpio_count(np);
-	if (num_gpios) {
+	if (num_gpios > 0) {
 		int i;
 
 		hw->gpios = kzalloc(sizeof(int) * num_gpios, GFP_KERNEL);
@@ -471,7 +471,7 @@ static int __init spi_ppc4xx_of_probe(struct platform_device *op)
 		SPI_CPHA | SPI_CPOL | SPI_CS_HIGH | SPI_LSB_FIRST;
 
 	/* this many pins in all GPIO controllers */
-	bbp->master->num_chipselect = num_gpios;
+	bbp->master->num_chipselect = num_gpios > 0 ? num_gpios : 0;
 
 	/* Get the clock for the OPB */
 	opbnp = of_find_compatible_node(NULL, NULL, "ibm,opb");
diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 19ee901577da..21c47482d9fd 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -1059,15 +1059,14 @@ EXPORT_SYMBOL_GPL(spi_alloc_master);
 #ifdef CONFIG_OF
 static int of_spi_register_master(struct spi_master *master)
 {
-	u16 nb;
-	int i, *cs;
+	int nb, i, *cs;
 	struct device_node *np = master->dev.of_node;
 
 	if (!np)
 		return 0;
 
 	nb = of_gpio_named_count(np, "cs-gpios");
-	master->num_chipselect = max(nb, master->num_chipselect);
+	master->num_chipselect = max(nb, (int)master->num_chipselect);
 
 	if (nb < 1)
 		return 0;
diff --git a/include/linux/of_gpio.h b/include/linux/of_gpio.h
index c454f5796747..a83dc6f5008e 100644
--- a/include/linux/of_gpio.h
+++ b/include/linux/of_gpio.h
@@ -50,9 +50,6 @@ static inline struct of_mm_gpio_chip *to_of_mm_gpio_chip(struct gpio_chip *gc)
 extern int of_get_named_gpio_flags(struct device_node *np,
 		const char *list_name, int index, enum of_gpio_flags *flags);
 
-extern unsigned int of_gpio_named_count(struct device_node *np,
-					const char* propname);
-
 extern int of_mm_gpiochip_add(struct device_node *np,
 			      struct of_mm_gpio_chip *mm_gc);
 
@@ -71,12 +68,6 @@ static inline int of_get_named_gpio_flags(struct device_node *np,
 	return -ENOSYS;
 }
 
-static inline unsigned int of_gpio_named_count(struct device_node *np,
-					const char* propname)
-{
-	return 0;
-}
-
 static inline int of_gpio_simple_xlate(struct gpio_chip *gc,
 				       const struct of_phandle_args *gpiospec,
 				       u32 *flags)
@@ -90,22 +81,37 @@ static inline void of_gpiochip_remove(struct gpio_chip *gc) { }
 #endif /* CONFIG_OF_GPIO */
 
 /**
- * of_gpio_count - Count GPIOs for a device
+ * of_gpio_named_count() - Count GPIOs for a device
  * @np:		device node to count GPIOs for
+ * @propname:	property name containing gpio specifier(s)
  *
  * The function returns the count of GPIOs specified for a node.
+ * Note that the empty GPIO specifiers count too. Returns either
+ *   Number of gpios defined in property,
+ *   -EINVAL for an incorrectly formed gpios property, or
+ *   -ENOENT for a missing gpios property
  *
- * Note that the empty GPIO specifiers counts too. For example,
- *
+ * Example:
  * gpios = <0
- *          &pio1 1 2
+ *          &gpio1 1 2
  *          0
- *          &pio2 3 4>;
+ *          &gpio2 3 4>;
+ *
+ * The above example defines four GPIOs, two of which are not specified.
+ * This function will return '4'
+ */
+static inline int of_gpio_named_count(struct device_node *np, const char* propname)
+{
+	return of_count_phandle_with_args(np, propname, "#gpio-cells");
+}
+
+/**
+ * of_gpio_count() - Count GPIOs for a device
+ * @np:		device node to count GPIOs for
  *
- * defines four GPIOs (so this function will return 4), two of which
- * are not specified.
+ * Same as of_gpio_named_count, but hard coded to use the 'gpios' property
  */
-static inline unsigned int of_gpio_count(struct device_node *np)
+static inline int of_gpio_count(struct device_node *np)
 {
 	return of_gpio_named_count(np, "gpios");
 }
-- 
cgit v1.2.3


From 64fd7401c5e4cf7c64452ecd9b700a55a5ebea50 Mon Sep 17 00:00:00 2001
From: Toshi Kani <toshi.kani@hp.com>
Date: Mon, 11 Feb 2013 22:33:20 +0000
Subject: ACPI: Remove the use of CONFIG_ACPI_CONTAINER_MODULE

config ACPI_CONTAINER has been changed to bool (y/n), and its
module option is no longer valid.  So, remove the use of
CONFIG_ACPI_CONTAINER_MODULE.

Signed-off-by: Toshi Kani <toshi.kani@hp.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/acpi.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index c6ccd9fa8f08..bcbdd7484e58 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -363,8 +363,7 @@ extern acpi_status acpi_pci_osc_control_set(acpi_handle handle,
 #if defined(CONFIG_ACPI_HOTPLUG_CPU) &&			\
 	(defined(CONFIG_ACPI_HOTPLUG_MEMORY) ||		\
 	 defined(CONFIG_ACPI_HOTPLUG_MEMORY_MODULE)) &&	\
-	(defined(CONFIG_ACPI_CONTAINER) ||		\
-	 defined(CONFIG_ACPI_CONTAINER_MODULE))
+	defined(CONFIG_ACPI_CONTAINER)
 #define ACPI_HOTPLUG_OST
 #endif
 
-- 
cgit v1.2.3


From ceaa1fef65a7c2e017b260b879b310dd24888083 Mon Sep 17 00:00:00 2001
From: Andrey Vagin <avagin@openvz.org>
Date: Mon, 11 Feb 2013 05:50:17 +0000
Subject: tcp: adding a per-socket timestamp offset

This functionality is used for restoring tcp sockets. A tcp timestamp
depends on how long a system has been running, so it's differ for each
host. The solution is to set a per-socket offset.

A per-socket offset for a TIME_WAIT socket is inherited from a proper
tcp socket.

tcp_request_sock doesn't have a timestamp offset, because the repair
mode for them are not implemented.

Cc: "David S. Miller" <davem@davemloft.net>
Cc: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
Cc: James Morris <jmorris@namei.org>
Cc: Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org>
Cc: Patrick McHardy <kaber@trash.net>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: Andrey Vagin <avagin@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h      | 3 +++
 net/ipv4/tcp.c           | 2 ++
 net/ipv4/tcp_minisocks.c | 2 ++
 3 files changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 6d0d46138ae8..f28408c07dc2 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -162,6 +162,8 @@ struct tcp_sock {
 	u32	rcv_tstamp;	/* timestamp of last received ACK (for keepalives) */
 	u32	lsndtime;	/* timestamp of last sent data packet (for restart window) */
 
+	u32	tsoffset;	/* timestamp offset */
+
 	struct list_head tsq_node; /* anchor in tsq_tasklet.head list */
 	unsigned long	tsq_flags;
 
@@ -353,6 +355,7 @@ struct tcp_timewait_sock {
 	u32			  tw_rcv_nxt;
 	u32			  tw_snd_nxt;
 	u32			  tw_rcv_wnd;
+	u32			  tw_ts_offset;
 	u32			  tw_ts_recent;
 	long			  tw_ts_recent_stamp;
 #ifdef CONFIG_TCP_MD5SIG
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 2c7e5963c2ea..8a90bda96038 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -400,6 +400,8 @@ void tcp_init_sock(struct sock *sk)
 	tcp_enable_early_retrans(tp);
 	icsk->icsk_ca_ops = &tcp_init_congestion_ops;
 
+	tp->tsoffset = 0;
+
 	sk->sk_state = TCP_CLOSE;
 
 	sk->sk_write_space = sk_stream_write_space;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index f0409287b5f4..4dfc99f54f67 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -288,6 +288,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
 		tcptw->tw_rcv_wnd	= tcp_receive_window(tp);
 		tcptw->tw_ts_recent	= tp->rx_opt.ts_recent;
 		tcptw->tw_ts_recent_stamp = tp->rx_opt.ts_recent_stamp;
+		tcptw->tw_ts_offset	= tp->tsoffset;
 
 #if IS_ENABLED(CONFIG_IPV6)
 		if (tw->tw_family == PF_INET6) {
@@ -499,6 +500,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
 			newtp->rx_opt.ts_recent_stamp = 0;
 			newtp->tcp_header_len = sizeof(struct tcphdr);
 		}
+		newtp->tsoffset = 0;
 #ifdef CONFIG_TCP_MD5SIG
 		newtp->md5sig_info = NULL;	/*XXX*/
 		if (newtp->af_specific->md5_lookup(sk, newsk))
-- 
cgit v1.2.3


From c9af6db4c11ccc6c3e7f19bbc15d54023956f97c Mon Sep 17 00:00:00 2001
From: Pravin B Shelar <pshelar@nicira.com>
Date: Mon, 11 Feb 2013 09:27:41 +0000
Subject: net: Fix possible wrong checksum generation.

Patch cef401de7be8c4e (net: fix possible wrong checksum
generation) fixed wrong checksum calculation but it broke TSO by
defining new GSO type but not a netdev feature for that type.
net_gso_ok() would not allow hardware checksum/segmentation
offload of such packets without the feature.

Following patch fixes TSO and wrong checksum. This patch uses
same logic that Eric Dumazet used. Patch introduces new flag
SKBTX_SHARED_FRAG if at least one frag can be modified by
the user. but SKBTX_SHARED_FRAG flag is kept in skb shared
info tx_flags rather than gso_type.

tx_flags is better compared to gso_type since we can have skb with
shared frag without gso packet. It does not link SHARED_FRAG to
GSO, So there is no need to define netdev feature for this.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/macvtap.c    |  4 ++--
 drivers/net/tun.c        | 13 +++++--------
 drivers/net/virtio_net.c | 13 +++++--------
 include/linux/skbuff.h   | 17 +++++++++--------
 net/core/skbuff.c        |  5 ++---
 net/ipv4/af_inet.c       |  1 -
 net/ipv4/ip_output.c     |  1 +
 net/ipv4/tcp.c           |  4 +---
 net/ipv4/tcp_input.c     |  4 ++--
 net/ipv4/tcp_output.c    |  4 ++--
 net/ipv6/ip6_offload.c   |  1 -
 11 files changed, 29 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
index b181dfb3d6d6..97243011d319 100644
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -543,7 +543,6 @@ static int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from,
 		skb->data_len += len;
 		skb->len += len;
 		skb->truesize += truesize;
-		skb_shinfo(skb)->gso_type |= SKB_GSO_SHARED_FRAG;
 		atomic_add(truesize, &skb->sk->sk_wmem_alloc);
 		while (len) {
 			int off = base & ~PAGE_MASK;
@@ -599,7 +598,7 @@ static int macvtap_skb_from_vnet_hdr(struct sk_buff *skb,
 
 	if (vnet_hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
 		skb_shinfo(skb)->gso_size = vnet_hdr->gso_size;
-		skb_shinfo(skb)->gso_type |= gso_type;
+		skb_shinfo(skb)->gso_type = gso_type;
 
 		/* Header must be checked, and gso_segs computed. */
 		skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
@@ -743,6 +742,7 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m,
 	if (zerocopy) {
 		skb_shinfo(skb)->destructor_arg = m->msg_control;
 		skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
+		skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG;
 	}
 	if (vlan)
 		macvlan_start_xmit(skb, vlan->dev);
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index b1038c0e2240..b6f45c5d84d5 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1019,7 +1019,6 @@ static int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from,
 		skb->data_len += len;
 		skb->len += len;
 		skb->truesize += truesize;
-		skb_shinfo(skb)->gso_type |= SKB_GSO_SHARED_FRAG;
 		atomic_add(truesize, &skb->sk->sk_wmem_alloc);
 		while (len) {
 			int off = base & ~PAGE_MASK;
@@ -1165,18 +1164,16 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
 	}
 
 	if (gso.gso_type != VIRTIO_NET_HDR_GSO_NONE) {
-		unsigned short gso_type = 0;
-
 		pr_debug("GSO!\n");
 		switch (gso.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
 		case VIRTIO_NET_HDR_GSO_TCPV4:
-			gso_type = SKB_GSO_TCPV4;
+			skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
 			break;
 		case VIRTIO_NET_HDR_GSO_TCPV6:
-			gso_type = SKB_GSO_TCPV6;
+			skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
 			break;
 		case VIRTIO_NET_HDR_GSO_UDP:
-			gso_type = SKB_GSO_UDP;
+			skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
 			break;
 		default:
 			tun->dev->stats.rx_frame_errors++;
@@ -1185,10 +1182,9 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
 		}
 
 		if (gso.gso_type & VIRTIO_NET_HDR_GSO_ECN)
-			gso_type |= SKB_GSO_TCP_ECN;
+			skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
 
 		skb_shinfo(skb)->gso_size = gso.gso_size;
-		skb_shinfo(skb)->gso_type |= gso_type;
 		if (skb_shinfo(skb)->gso_size == 0) {
 			tun->dev->stats.rx_frame_errors++;
 			kfree_skb(skb);
@@ -1204,6 +1200,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
 	if (zerocopy) {
 		skb_shinfo(skb)->destructor_arg = msg_control;
 		skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
+		skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG;
 	}
 
 	skb_reset_network_header(skb);
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 381a2d8d8a81..192c91c8e799 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -227,7 +227,7 @@ static void set_skb_frag(struct sk_buff *skb, struct page *page,
 	skb->len += size;
 	skb->truesize += PAGE_SIZE;
 	skb_shinfo(skb)->nr_frags++;
-	skb_shinfo(skb)->gso_type |= SKB_GSO_SHARED_FRAG;
+	skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG;
 	*len -= size;
 }
 
@@ -387,18 +387,16 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len)
 		 ntohs(skb->protocol), skb->len, skb->pkt_type);
 
 	if (hdr->hdr.gso_type != VIRTIO_NET_HDR_GSO_NONE) {
-		unsigned short gso_type = 0;
-
 		pr_debug("GSO!\n");
 		switch (hdr->hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
 		case VIRTIO_NET_HDR_GSO_TCPV4:
-			gso_type = SKB_GSO_TCPV4;
+			skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
 			break;
 		case VIRTIO_NET_HDR_GSO_UDP:
-			gso_type = SKB_GSO_UDP;
+			skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
 			break;
 		case VIRTIO_NET_HDR_GSO_TCPV6:
-			gso_type = SKB_GSO_TCPV6;
+			skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
 			break;
 		default:
 			net_warn_ratelimited("%s: bad gso type %u.\n",
@@ -407,7 +405,7 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len)
 		}
 
 		if (hdr->hdr.gso_type & VIRTIO_NET_HDR_GSO_ECN)
-			gso_type |= SKB_GSO_TCP_ECN;
+			skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
 
 		skb_shinfo(skb)->gso_size = hdr->hdr.gso_size;
 		if (skb_shinfo(skb)->gso_size == 0) {
@@ -415,7 +413,6 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len)
 			goto frame_err;
 		}
 
-		skb_shinfo(skb)->gso_type |= gso_type;
 		/* Header must be checked, and gso_segs computed. */
 		skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
 		skb_shinfo(skb)->gso_segs = 0;
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index d7573c37a51d..9da99520ccd5 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -230,6 +230,13 @@ enum {
 
 	/* generate wifi status information (where possible) */
 	SKBTX_WIFI_STATUS = 1 << 4,
+
+	/* This indicates at least one fragment might be overwritten
+	 * (as in vmsplice(), sendfile() ...)
+	 * If we need to compute a TX checksum, we'll need to copy
+	 * all frags to avoid possible bad checksum
+	 */
+	SKBTX_SHARED_FRAG = 1 << 5,
 };
 
 /*
@@ -307,13 +314,6 @@ enum {
 	SKB_GSO_TCPV6 = 1 << 4,
 
 	SKB_GSO_FCOE = 1 << 5,
-
-	/* This indicates at least one fragment might be overwritten
-	 * (as in vmsplice(), sendfile() ...)
-	 * If we need to compute a TX checksum, we'll need to copy
-	 * all frags to avoid possible bad checksum
-	 */
-	SKB_GSO_SHARED_FRAG = 1 << 6,
 };
 
 #if BITS_PER_LONG > 32
@@ -2220,7 +2220,8 @@ static inline int skb_linearize(struct sk_buff *skb)
  */
 static inline bool skb_has_shared_frag(const struct sk_buff *skb)
 {
-	return skb_shinfo(skb)->gso_type & SKB_GSO_SHARED_FRAG;
+	return skb_is_nonlinear(skb) &&
+	       skb_shinfo(skb)->tx_flags & SKBTX_SHARED_FRAG;
 }
 
 /**
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 21a22cce6e53..6c1ad09f8796 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2326,8 +2326,7 @@ void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len)
 {
 	int pos = skb_headlen(skb);
 
-	skb_shinfo(skb1)->gso_type = skb_shinfo(skb)->gso_type;
-
+	skb_shinfo(skb)->tx_flags = skb_shinfo(skb1)->tx_flags & SKBTX_SHARED_FRAG;
 	if (len < pos)	/* Split line is inside header. */
 		skb_split_inside_header(skb, skb1, len, pos);
 	else		/* Second chunk has no header, nothing to copy. */
@@ -2833,7 +2832,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
 		skb_copy_from_linear_data_offset(skb, offset,
 						 skb_put(nskb, hsize), hsize);
 
-		skb_shinfo(nskb)->gso_type = skb_shinfo(skb)->gso_type;
+		skb_shinfo(nskb)->tx_flags = skb_shinfo(skb)->tx_flags & SKBTX_SHARED_FRAG;
 
 		while (pos < offset + len && i < nfrags) {
 			*frag = skb_shinfo(skb)->frags[i];
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 1aec92bf8018..e6e5d8506336 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1287,7 +1287,6 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
 		       SKB_GSO_UDP |
 		       SKB_GSO_DODGY |
 		       SKB_GSO_TCP_ECN |
-		       SKB_GSO_SHARED_FRAG |
 		       0)))
 		goto out;
 
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 3e98ed2bff55..5e12dca7b3dd 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -598,6 +598,7 @@ slow_path:
 	/* for offloaded checksums cleanup checksum before fragmentation */
 	if ((skb->ip_summed == CHECKSUM_PARTIAL) && skb_checksum_help(skb))
 		goto fail;
+	iph = ip_hdr(skb);
 
 	left = skb->len - hlen;		/* Space per frame */
 	ptr = hlen;		/* Where to start from */
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 801b07b796f0..1f0bedb8622f 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -897,8 +897,7 @@ new_segment:
 			get_page(page);
 			skb_fill_page_desc(skb, i, page, offset, copy);
 		}
-
-		skb_shinfo(skb)->gso_type |= SKB_GSO_SHARED_FRAG;
+		skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG;
 
 		skb->len += copy;
 		skb->data_len += copy;
@@ -3044,7 +3043,6 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
 			       SKB_GSO_DODGY |
 			       SKB_GSO_TCP_ECN |
 			       SKB_GSO_TCPV6 |
-			       SKB_GSO_SHARED_FRAG |
 			       0) ||
 			     !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))))
 			goto out;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index d9bfaea34322..a759e19496d2 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1239,13 +1239,13 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
 	 */
 	if (!skb_shinfo(prev)->gso_size) {
 		skb_shinfo(prev)->gso_size = mss;
-		skb_shinfo(prev)->gso_type |= sk->sk_gso_type;
+		skb_shinfo(prev)->gso_type = sk->sk_gso_type;
 	}
 
 	/* CHECKME: To clear or not to clear? Mimics normal skb currently */
 	if (skb_shinfo(skb)->gso_segs <= 1) {
 		skb_shinfo(skb)->gso_size = 0;
-		skb_shinfo(skb)->gso_type &= SKB_GSO_SHARED_FRAG;
+		skb_shinfo(skb)->gso_type = 0;
 	}
 
 	/* Difference in this won't matter, both ACKed by the same cumul. ACK */
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 564bf89d9fd3..6182d90e97b0 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1133,7 +1133,6 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
 static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb,
 				 unsigned int mss_now)
 {
-	skb_shinfo(skb)->gso_type &= SKB_GSO_SHARED_FRAG;
 	if (skb->len <= mss_now || !sk_can_gso(sk) ||
 	    skb->ip_summed == CHECKSUM_NONE) {
 		/* Avoid the costly divide in the normal
@@ -1141,10 +1140,11 @@ static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb,
 		 */
 		skb_shinfo(skb)->gso_segs = 1;
 		skb_shinfo(skb)->gso_size = 0;
+		skb_shinfo(skb)->gso_type = 0;
 	} else {
 		skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss_now);
 		skb_shinfo(skb)->gso_size = mss_now;
-		skb_shinfo(skb)->gso_type |= sk->sk_gso_type;
+		skb_shinfo(skb)->gso_type = sk->sk_gso_type;
 	}
 }
 
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index d141fc32a2ea..f26f0da7f095 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -100,7 +100,6 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
 		       SKB_GSO_DODGY |
 		       SKB_GSO_TCP_ECN |
 		       SKB_GSO_TCPV6 |
-		       SKB_GSO_SHARED_FRAG |
 		       0)))
 		goto out;
 
-- 
cgit v1.2.3


From 407af3299ef1ac7e87ce3fb530e32a009d1a9efd Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vyasevic@redhat.com>
Date: Wed, 13 Feb 2013 12:00:12 +0000
Subject: bridge: Add netlink interface to configure vlans on bridge ports

Add a netlink interface to add and remove vlan configuration on bridge port.
The interface uses the RTM_SETLINK message and encodes the vlan
configuration inside the IFLA_AF_SPEC.  It is possble to include multiple
vlans to either add or remove in a single message.

Signed-off-by: Vlad Yasevich <vyasevic@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h      |   2 +
 include/uapi/linux/if_bridge.h |   9 +++
 net/bridge/br_device.c         |   1 +
 net/bridge/br_if.c             |   1 +
 net/bridge/br_netlink.c        | 139 +++++++++++++++++++++++++++++++++++------
 net/bridge/br_private.h        |   1 +
 net/core/rtnetlink.c           |  72 +++++++++++++++++++++
 7 files changed, 207 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 25bd46f52877..1b90f9401000 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1020,6 +1020,8 @@ struct net_device_ops {
 	int			(*ndo_bridge_getlink)(struct sk_buff *skb,
 						      u32 pid, u32 seq,
 						      struct net_device *dev);
+	int			(*ndo_bridge_dellink)(struct net_device *dev,
+						      struct nlmsghdr *nlh);
 	int			(*ndo_change_carrier)(struct net_device *dev,
 						      bool new_carrier);
 };
diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h
index 5db297514aec..3ca9817ca7e8 100644
--- a/include/uapi/linux/if_bridge.h
+++ b/include/uapi/linux/if_bridge.h
@@ -108,15 +108,24 @@ struct __fdb_entry {
  * [IFLA_AF_SPEC] = {
  *     [IFLA_BRIDGE_FLAGS]
  *     [IFLA_BRIDGE_MODE]
+ *     [IFLA_BRIDGE_VLAN_INFO]
  * }
  */
 enum {
 	IFLA_BRIDGE_FLAGS,
 	IFLA_BRIDGE_MODE,
+	IFLA_BRIDGE_VLAN_INFO,
 	__IFLA_BRIDGE_MAX,
 };
 #define IFLA_BRIDGE_MAX (__IFLA_BRIDGE_MAX - 1)
 
+#define BRIDGE_VLAN_INFO_MASTER	(1<<0)	/* Operate on Bridge device as well */
+
+struct bridge_vlan_info {
+	u16 flags;
+	u16 vid;
+};
+
 /* Bridge multicast database attributes
  * [MDBA_MDB] = {
  *     [MDBA_MDB_ENTRY] = {
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 35a2c2c84f33..091bedf266a0 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -316,6 +316,7 @@ static const struct net_device_ops br_netdev_ops = {
 	.ndo_fdb_dump		 = br_fdb_dump,
 	.ndo_bridge_getlink	 = br_getlink,
 	.ndo_bridge_setlink	 = br_setlink,
+	.ndo_bridge_dellink	 = br_dellink,
 };
 
 static void br_dev_free(struct net_device *dev)
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index af9d65ab4001..335c60cebfd1 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -23,6 +23,7 @@
 #include <linux/if_ether.h>
 #include <linux/slab.h>
 #include <net/sock.h>
+#include <linux/if_vlan.h>
 
 #include "br_private.h"
 
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 39ca9796f3f7..534a9f4587a9 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -16,6 +16,7 @@
 #include <net/rtnetlink.h>
 #include <net/net_namespace.h>
 #include <net/sock.h>
+#include <uapi/linux/if_bridge.h>
 
 #include "br_private.h"
 #include "br_private_stp.h"
@@ -119,10 +120,14 @@ nla_put_failure:
  */
 void br_ifinfo_notify(int event, struct net_bridge_port *port)
 {
-	struct net *net = dev_net(port->dev);
+	struct net *net;
 	struct sk_buff *skb;
 	int err = -ENOBUFS;
 
+	if (!port)
+		return;
+
+	net = dev_net(port->dev);
 	br_debug(port->br, "port %u(%s) event %d\n",
 		 (unsigned int)port->port_no, port->dev->name, event);
 
@@ -144,6 +149,7 @@ errout:
 		rtnl_set_sk_err(net, RTNLGRP_LINK, err);
 }
 
+
 /*
  * Dump information about all ports, in response to GETLINK
  */
@@ -162,6 +168,64 @@ out:
 	return err;
 }
 
+const struct nla_policy ifla_br_policy[IFLA_MAX+1] = {
+	[IFLA_BRIDGE_FLAGS]	= { .type = NLA_U16 },
+	[IFLA_BRIDGE_MODE]	= { .type = NLA_U16 },
+	[IFLA_BRIDGE_VLAN_INFO]	= { .type = NLA_BINARY,
+				    .len = sizeof(struct bridge_vlan_info), },
+};
+
+static int br_afspec(struct net_bridge *br,
+		     struct net_bridge_port *p,
+		     struct nlattr *af_spec,
+		     int cmd)
+{
+	struct nlattr *tb[IFLA_BRIDGE_MAX+1];
+	int err = 0;
+
+	err = nla_parse_nested(tb, IFLA_BRIDGE_MAX, af_spec, ifla_br_policy);
+	if (err)
+		return err;
+
+	if (tb[IFLA_BRIDGE_VLAN_INFO]) {
+		struct bridge_vlan_info *vinfo;
+
+		vinfo = nla_data(tb[IFLA_BRIDGE_VLAN_INFO]);
+
+		if (vinfo->vid >= VLAN_N_VID)
+			return -EINVAL;
+
+		switch (cmd) {
+		case RTM_SETLINK:
+			if (p) {
+				err = nbp_vlan_add(p, vinfo->vid);
+				if (err)
+					break;
+
+				if (vinfo->flags & BRIDGE_VLAN_INFO_MASTER)
+					err = br_vlan_add(p->br, vinfo->vid);
+			} else
+				err = br_vlan_add(br, vinfo->vid);
+
+			if (err)
+				break;
+
+			break;
+
+		case RTM_DELLINK:
+			if (p) {
+				nbp_vlan_delete(p, vinfo->vid);
+				if (vinfo->flags & BRIDGE_VLAN_INFO_MASTER)
+					br_vlan_delete(p->br, vinfo->vid);
+			} else
+				br_vlan_delete(br, vinfo->vid);
+			break;
+		}
+	}
+
+	return err;
+}
+
 static const struct nla_policy ifla_brport_policy[IFLA_BRPORT_MAX + 1] = {
 	[IFLA_BRPORT_STATE]	= { .type = NLA_U8 },
 	[IFLA_BRPORT_COST]	= { .type = NLA_U32 },
@@ -241,6 +305,7 @@ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh)
 {
 	struct ifinfomsg *ifm;
 	struct nlattr *protinfo;
+	struct nlattr *afspec;
 	struct net_bridge_port *p;
 	struct nlattr *tb[IFLA_BRPORT_MAX + 1];
 	int err;
@@ -248,38 +313,76 @@ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh)
 	ifm = nlmsg_data(nlh);
 
 	protinfo = nlmsg_find_attr(nlh, sizeof(*ifm), IFLA_PROTINFO);
-	if (!protinfo)
+	afspec = nlmsg_find_attr(nlh, sizeof(*ifm), IFLA_AF_SPEC);
+	if (!protinfo && !afspec)
 		return 0;
 
 	p = br_port_get_rtnl(dev);
-	if (!p)
+	/* We want to accept dev as bridge itself if the AF_SPEC
+	 * is set to see if someone is setting vlan info on the brigde
+	 */
+	if (!p && ((dev->priv_flags & IFF_EBRIDGE) && !afspec))
 		return -EINVAL;
 
-	if (protinfo->nla_type & NLA_F_NESTED) {
-		err = nla_parse_nested(tb, IFLA_BRPORT_MAX,
-				       protinfo, ifla_brport_policy);
+	if (p && protinfo) {
+		if (protinfo->nla_type & NLA_F_NESTED) {
+			err = nla_parse_nested(tb, IFLA_BRPORT_MAX,
+					       protinfo, ifla_brport_policy);
+			if (err)
+				return err;
+
+			spin_lock_bh(&p->br->lock);
+			err = br_setport(p, tb);
+			spin_unlock_bh(&p->br->lock);
+		} else {
+			/* Binary compatability with old RSTP */
+			if (nla_len(protinfo) < sizeof(u8))
+				return -EINVAL;
+
+			spin_lock_bh(&p->br->lock);
+			err = br_set_port_state(p, nla_get_u8(protinfo));
+			spin_unlock_bh(&p->br->lock);
+		}
 		if (err)
-			return err;
-
-		spin_lock_bh(&p->br->lock);
-		err = br_setport(p, tb);
-		spin_unlock_bh(&p->br->lock);
-	} else {
-		/* Binary compatability with old RSTP */
-		if (nla_len(protinfo) < sizeof(u8))
-			return -EINVAL;
+			goto out;
+	}
 
-		spin_lock_bh(&p->br->lock);
-		err = br_set_port_state(p, nla_get_u8(protinfo));
-		spin_unlock_bh(&p->br->lock);
+	if (afspec) {
+		err = br_afspec((struct net_bridge *)netdev_priv(dev), p,
+				afspec, RTM_SETLINK);
 	}
 
 	if (err == 0)
 		br_ifinfo_notify(RTM_NEWLINK, p);
 
+out:
 	return err;
 }
 
+/* Delete port information */
+int br_dellink(struct net_device *dev, struct nlmsghdr *nlh)
+{
+	struct ifinfomsg *ifm;
+	struct nlattr *afspec;
+	struct net_bridge_port *p;
+	int err;
+
+	ifm = nlmsg_data(nlh);
+
+	afspec = nlmsg_find_attr(nlh, sizeof(*ifm), IFLA_AF_SPEC);
+	if (!afspec)
+		return 0;
+
+	p = br_port_get_rtnl(dev);
+	/* We want to accept dev as bridge itself as well */
+	if (!p && !(dev->priv_flags & IFF_EBRIDGE))
+		return -EINVAL;
+
+	err = br_afspec((struct net_bridge *)netdev_priv(dev), p,
+			afspec, RTM_DELLINK);
+
+	return err;
+}
 static int br_validate(struct nlattr *tb[], struct nlattr *data[])
 {
 	if (tb[IFLA_ADDRESS]) {
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index f0f24610d111..a42f9d49a64e 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -713,6 +713,7 @@ extern int br_netlink_init(void);
 extern void br_netlink_fini(void);
 extern void br_ifinfo_notify(int event, struct net_bridge_port *port);
 extern int br_setlink(struct net_device *dev, struct nlmsghdr *nlmsg);
+extern int br_dellink(struct net_device *dev, struct nlmsghdr *nlmsg);
 extern int br_getlink(struct sk_buff *skb, u32 pid, u32 seq,
 		      struct net_device *dev);
 
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index c1e4db60eeca..2c9ccbfbd93c 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -2464,6 +2464,77 @@ out:
 	return err;
 }
 
+static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
+			       void *arg)
+{
+	struct net *net = sock_net(skb->sk);
+	struct ifinfomsg *ifm;
+	struct net_device *dev;
+	struct nlattr *br_spec, *attr = NULL;
+	int rem, err = -EOPNOTSUPP;
+	u16 oflags, flags = 0;
+	bool have_flags = false;
+
+	if (nlmsg_len(nlh) < sizeof(*ifm))
+		return -EINVAL;
+
+	ifm = nlmsg_data(nlh);
+	if (ifm->ifi_family != AF_BRIDGE)
+		return -EPFNOSUPPORT;
+
+	dev = __dev_get_by_index(net, ifm->ifi_index);
+	if (!dev) {
+		pr_info("PF_BRIDGE: RTM_SETLINK with unknown ifindex\n");
+		return -ENODEV;
+	}
+
+	br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
+	if (br_spec) {
+		nla_for_each_nested(attr, br_spec, rem) {
+			if (nla_type(attr) == IFLA_BRIDGE_FLAGS) {
+				have_flags = true;
+				flags = nla_get_u16(attr);
+				break;
+			}
+		}
+	}
+
+	oflags = flags;
+
+	if (!flags || (flags & BRIDGE_FLAGS_MASTER)) {
+		struct net_device *br_dev = netdev_master_upper_dev_get(dev);
+
+		if (!br_dev || !br_dev->netdev_ops->ndo_bridge_dellink) {
+			err = -EOPNOTSUPP;
+			goto out;
+		}
+
+		err = br_dev->netdev_ops->ndo_bridge_dellink(dev, nlh);
+		if (err)
+			goto out;
+
+		flags &= ~BRIDGE_FLAGS_MASTER;
+	}
+
+	if ((flags & BRIDGE_FLAGS_SELF)) {
+		if (!dev->netdev_ops->ndo_bridge_dellink)
+			err = -EOPNOTSUPP;
+		else
+			err = dev->netdev_ops->ndo_bridge_dellink(dev, nlh);
+
+		if (!err)
+			flags &= ~BRIDGE_FLAGS_SELF;
+	}
+
+	if (have_flags)
+		memcpy(nla_data(attr), &flags, sizeof(flags));
+	/* Generate event to notify upper layer of bridge change */
+	if (!err)
+		err = rtnl_bridge_notify(dev, oflags);
+out:
+	return err;
+}
+
 /* Protected by RTNL sempahore.  */
 static struct rtattr **rta_buf;
 static int rtattr_max;
@@ -2647,6 +2718,7 @@ void __init rtnetlink_init(void)
 	rtnl_register(PF_BRIDGE, RTM_GETNEIGH, NULL, rtnl_fdb_dump, NULL);
 
 	rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, rtnl_bridge_getlink, NULL);
+	rtnl_register(PF_BRIDGE, RTM_DELLINK, rtnl_bridge_dellink, NULL, NULL);
 	rtnl_register(PF_BRIDGE, RTM_SETLINK, rtnl_bridge_setlink, NULL, NULL);
 }
 
-- 
cgit v1.2.3


From 6cbdceeb1cb12c7d620161925a8c3e81daadb2e4 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vyasevic@redhat.com>
Date: Wed, 13 Feb 2013 12:00:13 +0000
Subject: bridge: Dump vlan information from a bridge port

Using the RTM_GETLINK dump the vlan filter list of a given
bridge port.  The information depends on setting the filter
flag similar to how nic VF info is dumped.

Signed-off-by: Vlad Yasevich <vyasevic@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c |  3 +-
 include/linux/netdevice.h                     |  3 +-
 include/uapi/linux/rtnetlink.h                |  1 +
 net/bridge/br_netlink.c                       | 94 +++++++++++++++++++++++----
 net/bridge/br_private.h                       |  3 +-
 net/bridge/br_vlan.c                          |  2 +
 net/core/rtnetlink.c                          | 16 +++--
 7 files changed, 104 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 6999269b3a4a..4e2aa47193cb 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -7079,7 +7079,8 @@ static int ixgbe_ndo_bridge_setlink(struct net_device *dev,
 }
 
 static int ixgbe_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
-				    struct net_device *dev)
+				    struct net_device *dev,
+				    u32 filter_mask)
 {
 	struct ixgbe_adapter *adapter = netdev_priv(dev);
 	u16 mode;
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 1b90f9401000..1964ca66df56 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1019,7 +1019,8 @@ struct net_device_ops {
 						      struct nlmsghdr *nlh);
 	int			(*ndo_bridge_getlink)(struct sk_buff *skb,
 						      u32 pid, u32 seq,
-						      struct net_device *dev);
+						      struct net_device *dev,
+						      u32 filter_mask);
 	int			(*ndo_bridge_dellink)(struct net_device *dev,
 						      struct nlmsghdr *nlh);
 	int			(*ndo_change_carrier)(struct net_device *dev,
diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index 7a5eb196ade9..7a2144e1afae 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -630,6 +630,7 @@ struct tcamsg {
 
 /* New extended info filters for IFLA_EXT_MASK */
 #define RTEXT_FILTER_VF		(1 << 0)
+#define RTEXT_FILTER_BRVLAN	(1 << 1)
 
 /* End of information exported to user level */
 
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 534a9f4587a9..fe1980d5a7e4 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -65,15 +65,21 @@ static int br_port_fill_attrs(struct sk_buff *skb,
  * Create one netlink message for one interface
  * Contains port and master info as well as carrier and bridge state.
  */
-static int br_fill_ifinfo(struct sk_buff *skb, const struct net_bridge_port *port,
-			  u32 pid, u32 seq, int event, unsigned int flags)
+static int br_fill_ifinfo(struct sk_buff *skb,
+			  const struct net_bridge_port *port,
+			  u32 pid, u32 seq, int event, unsigned int flags,
+			  u32 filter_mask, const struct net_device *dev)
 {
-	const struct net_bridge *br = port->br;
-	const struct net_device *dev = port->dev;
+	const struct net_bridge *br;
 	struct ifinfomsg *hdr;
 	struct nlmsghdr *nlh;
 	u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN;
 
+	if (port)
+		br = port->br;
+	else
+		br = netdev_priv(dev);
+
 	br_debug(br, "br_fill_info event %d port %s master %s\n",
 		     event, dev->name, br->dev->name);
 
@@ -99,7 +105,7 @@ static int br_fill_ifinfo(struct sk_buff *skb, const struct net_bridge_port *por
 	     nla_put_u32(skb, IFLA_LINK, dev->iflink)))
 		goto nla_put_failure;
 
-	if (event == RTM_NEWLINK) {
+	if (event == RTM_NEWLINK && port) {
 		struct nlattr *nest
 			= nla_nest_start(skb, IFLA_PROTINFO | NLA_F_NESTED);
 
@@ -108,6 +114,40 @@ static int br_fill_ifinfo(struct sk_buff *skb, const struct net_bridge_port *por
 		nla_nest_end(skb, nest);
 	}
 
+	/* Check if  the VID information is requested */
+	if (filter_mask & RTEXT_FILTER_BRVLAN) {
+		struct nlattr *af;
+		const struct net_port_vlans *pv;
+		struct bridge_vlan_info vinfo;
+		u16 vid;
+
+		if (port)
+			pv = nbp_get_vlan_info(port);
+		else
+			pv = br_get_vlan_info(br);
+
+		if (!pv || bitmap_empty(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN))
+			goto done;
+
+		af = nla_nest_start(skb, IFLA_AF_SPEC);
+		if (!af)
+			goto nla_put_failure;
+
+		for (vid = find_first_bit(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN);
+		     vid < BR_VLAN_BITMAP_LEN;
+		     vid = find_next_bit(pv->vlan_bitmap,
+					 BR_VLAN_BITMAP_LEN, vid+1)) {
+			vinfo.vid = vid;
+			vinfo.flags = 0;
+			if (nla_put(skb, IFLA_BRIDGE_VLAN_INFO,
+				    sizeof(vinfo), &vinfo))
+				goto nla_put_failure;
+		}
+
+		nla_nest_end(skb, af);
+	}
+
+done:
 	return nlmsg_end(skb, nlh);
 
 nla_put_failure:
@@ -135,7 +175,7 @@ void br_ifinfo_notify(int event, struct net_bridge_port *port)
 	if (skb == NULL)
 		goto errout;
 
-	err = br_fill_ifinfo(skb, port, 0, 0, event, 0);
+	err = br_fill_ifinfo(skb, port, 0, 0, event, 0, 0, port->dev);
 	if (err < 0) {
 		/* -EMSGSIZE implies BUG in br_nlmsg_size() */
 		WARN_ON(err == -EMSGSIZE);
@@ -154,16 +194,17 @@ errout:
  * Dump information about all ports, in response to GETLINK
  */
 int br_getlink(struct sk_buff *skb, u32 pid, u32 seq,
-	       struct net_device *dev)
+	       struct net_device *dev, u32 filter_mask)
 {
 	int err = 0;
 	struct net_bridge_port *port = br_port_get_rcu(dev);
 
-	/* not a bridge port */
-	if (!port)
+	/* not a bridge port and  */
+	if (!port && !(filter_mask & RTEXT_FILTER_BRVLAN))
 		goto out;
 
-	err = br_fill_ifinfo(skb, port, pid, seq, RTM_NEWLINK, NLM_F_MULTI);
+	err = br_fill_ifinfo(skb, port, pid, seq, RTM_NEWLINK, NLM_F_MULTI,
+			     filter_mask, dev);
 out:
 	return err;
 }
@@ -395,6 +436,29 @@ static int br_validate(struct nlattr *tb[], struct nlattr *data[])
 	return 0;
 }
 
+static size_t br_get_link_af_size(const struct net_device *dev)
+{
+	struct net_port_vlans *pv;
+
+	if (br_port_exists(dev))
+		pv = nbp_get_vlan_info(br_port_get_rcu(dev));
+	else if (dev->priv_flags & IFF_EBRIDGE)
+		pv = br_get_vlan_info((struct net_bridge *)netdev_priv(dev));
+	else
+		return 0;
+
+	if (!pv)
+		return 0;
+
+	/* Each VLAN is returned in bridge_vlan_info along with flags */
+	return pv->num_vlans * nla_total_size(sizeof(struct bridge_vlan_info));
+}
+
+struct rtnl_af_ops br_af_ops = {
+	.family			= AF_BRIDGE,
+	.get_link_af_size	= br_get_link_af_size,
+};
+
 struct rtnl_link_ops br_link_ops __read_mostly = {
 	.kind		= "bridge",
 	.priv_size	= sizeof(struct net_bridge),
@@ -408,11 +472,18 @@ int __init br_netlink_init(void)
 	int err;
 
 	br_mdb_init();
-	err = rtnl_link_register(&br_link_ops);
+	err = rtnl_af_register(&br_af_ops);
 	if (err)
 		goto out;
 
+	err = rtnl_link_register(&br_link_ops);
+	if (err)
+		goto out_af;
+
 	return 0;
+
+out_af:
+	rtnl_af_unregister(&br_af_ops);
 out:
 	br_mdb_uninit();
 	return err;
@@ -421,5 +492,6 @@ out:
 void __exit br_netlink_fini(void)
 {
 	br_mdb_uninit();
+	rtnl_af_unregister(&br_af_ops);
 	rtnl_link_unregister(&br_link_ops);
 }
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index a42f9d49a64e..ce2235255c2f 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -73,6 +73,7 @@ struct net_port_vlans {
 	}				parent;
 	struct rcu_head			rcu;
 	unsigned long			vlan_bitmap[BR_VLAN_BITMAP_LEN];
+	u16				num_vlans;
 };
 
 struct net_bridge_fdb_entry
@@ -715,7 +716,7 @@ extern void br_ifinfo_notify(int event, struct net_bridge_port *port);
 extern int br_setlink(struct net_device *dev, struct nlmsghdr *nlmsg);
 extern int br_dellink(struct net_device *dev, struct nlmsghdr *nlmsg);
 extern int br_getlink(struct sk_buff *skb, u32 pid, u32 seq,
-		      struct net_device *dev);
+		      struct net_device *dev, u32 filter_mask);
 
 #ifdef CONFIG_SYSFS
 /* br_sysfs_if.c */
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index d8690bfe63d4..f2bf5a197ea3 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -28,6 +28,7 @@ static int __vlan_add(struct net_port_vlans *v, u16 vid)
 	}
 
 	set_bit(vid, v->vlan_bitmap);
+	v->num_vlans++;
 	return 0;
 }
 
@@ -44,6 +45,7 @@ static int __vlan_del(struct net_port_vlans *v, u16 vid)
 	}
 
 	clear_bit(vid, v->vlan_bitmap);
+	v->num_vlans--;
 	if (bitmap_empty(v->vlan_bitmap, BR_VLAN_BITMAP_LEN)) {
 		if (v->port_idx)
 			rcu_assign_pointer(v->parent.port->vlan_info, NULL);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 2c9ccbfbd93c..f3a112ec86d5 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -2315,6 +2315,13 @@ static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb)
 	int idx = 0;
 	u32 portid = NETLINK_CB(cb->skb).portid;
 	u32 seq = cb->nlh->nlmsg_seq;
+	struct nlattr *extfilt;
+	u32 filter_mask = 0;
+
+	extfilt = nlmsg_find_attr(cb->nlh, sizeof(struct rtgenmsg),
+				  IFLA_EXT_MASK);
+	if (extfilt)
+		filter_mask = nla_get_u32(extfilt);
 
 	rcu_read_lock();
 	for_each_netdev_rcu(net, dev) {
@@ -2324,14 +2331,15 @@ static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb)
 		if (br_dev && br_dev->netdev_ops->ndo_bridge_getlink) {
 			if (idx >= cb->args[0] &&
 			    br_dev->netdev_ops->ndo_bridge_getlink(
-				    skb, portid, seq, dev) < 0)
+				    skb, portid, seq, dev, filter_mask) < 0)
 				break;
 			idx++;
 		}
 
 		if (ops->ndo_bridge_getlink) {
 			if (idx >= cb->args[0] &&
-			    ops->ndo_bridge_getlink(skb, portid, seq, dev) < 0)
+			    ops->ndo_bridge_getlink(skb, portid, seq, dev,
+						    filter_mask) < 0)
 				break;
 			idx++;
 		}
@@ -2372,14 +2380,14 @@ static int rtnl_bridge_notify(struct net_device *dev, u16 flags)
 
 	if ((!flags || (flags & BRIDGE_FLAGS_MASTER)) &&
 	    br_dev && br_dev->netdev_ops->ndo_bridge_getlink) {
-		err = br_dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev);
+		err = br_dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev, 0);
 		if (err < 0)
 			goto errout;
 	}
 
 	if ((flags & BRIDGE_FLAGS_SELF) &&
 	    dev->netdev_ops->ndo_bridge_getlink) {
-		err = dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev);
+		err = dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev, 0);
 		if (err < 0)
 			goto errout;
 	}
-- 
cgit v1.2.3


From 1690be63a27b20ae65c792729a44f5970561ffa4 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vyasevic@redhat.com>
Date: Wed, 13 Feb 2013 12:00:18 +0000
Subject: bridge: Add vlan support to static neighbors

When a user adds bridge neighbors, allow him to specify VLAN id.
If the VLAN id is not specified, the neighbor will be added
for VLANs currently in the ports filter list.  If no VLANs are
configured on the port, we use vlan 0 and only add 1 entry.

Signed-off-by: Vlad Yasevich <vyasevic@redhat.com>
Acked-by: Jitendra Kalsaria <jitendra.kalsaria@qlogic.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c    |   2 +-
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c   |   1 +
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c |   4 +-
 drivers/net/macvlan.c                            |   2 +-
 drivers/net/vxlan.c                              |   3 +-
 include/linux/netdevice.h                        |   4 +-
 include/uapi/linux/neighbour.h                   |   1 +
 net/bridge/br_fdb.c                              | 148 ++++++++++++++++++++---
 net/bridge/br_private.h                          |   6 +-
 net/core/rtnetlink.c                             |  26 ++--
 10 files changed, 162 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 4e2aa47193cb..1c0efcb7920f 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -7002,7 +7002,7 @@ static int ixgbe_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 	return err;
 }
 
-static int ixgbe_ndo_fdb_del(struct ndmsg *ndm,
+static int ixgbe_ndo_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
 			     struct net_device *dev,
 			     const unsigned char *addr)
 {
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 937bcc3d3212..5088dc5c3d1a 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -1959,6 +1959,7 @@ static int mlx4_en_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 }
 
 static int mlx4_en_fdb_del(struct ndmsg *ndm,
+			   struct nlattr *tb[],
 			   struct net_device *dev,
 			   const unsigned char *addr)
 {
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
index b745194391a1..b95316831587 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
@@ -247,8 +247,8 @@ static int qlcnic_set_mac(struct net_device *netdev, void *p)
 	return 0;
 }
 
-static int qlcnic_fdb_del(struct ndmsg *ndm, struct net_device *netdev,
-			const unsigned char *addr)
+static int qlcnic_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
+			struct net_device *netdev, const unsigned char *addr)
 {
 	struct qlcnic_adapter *adapter = netdev_priv(netdev);
 	int err = -EOPNOTSUPP;
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index e4b8078e88a9..defcd8a85744 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -599,7 +599,7 @@ static int macvlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 	return err;
 }
 
-static int macvlan_fdb_del(struct ndmsg *ndm,
+static int macvlan_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
 			   struct net_device *dev,
 			   const unsigned char *addr)
 {
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 72485b9b9005..9d70421cf3a0 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -393,7 +393,8 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 }
 
 /* Delete entry (via netlink) */
-static int vxlan_fdb_delete(struct ndmsg *ndm, struct net_device *dev,
+static int vxlan_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
+			    struct net_device *dev,
 			    const unsigned char *addr)
 {
 	struct vxlan_dev *vxlan = netdev_priv(dev);
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 1964ca66df56..9deb672d999f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -884,7 +884,8 @@ struct netdev_fcoe_hbainfo {
  *		      struct net_device *dev,
  *		      const unsigned char *addr, u16 flags)
  *	Adds an FDB entry to dev for addr.
- * int (*ndo_fdb_del)(struct ndmsg *ndm, struct net_device *dev,
+ * int (*ndo_fdb_del)(struct ndmsg *ndm, struct nlattr *tb[],
+ *		      struct net_device *dev,
  *		      const unsigned char *addr)
  *	Deletes the FDB entry from dev coresponding to addr.
  * int (*ndo_fdb_dump)(struct sk_buff *skb, struct netlink_callback *cb,
@@ -1008,6 +1009,7 @@ struct net_device_ops {
 					       const unsigned char *addr,
 					       u16 flags);
 	int			(*ndo_fdb_del)(struct ndmsg *ndm,
+					       struct nlattr *tb[],
 					       struct net_device *dev,
 					       const unsigned char *addr);
 	int			(*ndo_fdb_dump)(struct sk_buff *skb,
diff --git a/include/uapi/linux/neighbour.h b/include/uapi/linux/neighbour.h
index 275e5d65dcb2..adb068c53c4e 100644
--- a/include/uapi/linux/neighbour.h
+++ b/include/uapi/linux/neighbour.h
@@ -20,6 +20,7 @@ enum {
 	NDA_LLADDR,
 	NDA_CACHEINFO,
 	NDA_PROBES,
+	NDA_VLAN,
 	__NDA_MAX
 };
 
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 276a52254606..4b75ad43aa85 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -505,6 +505,10 @@ static int fdb_fill_info(struct sk_buff *skb, const struct net_bridge *br,
 	ci.ndm_refcnt	 = 0;
 	if (nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
 		goto nla_put_failure;
+
+	if (nla_put(skb, NDA_VLAN, sizeof(u16), &fdb->vlan_id))
+		goto nla_put_failure;
+
 	return nlmsg_end(skb, nlh);
 
 nla_put_failure:
@@ -516,6 +520,7 @@ static inline size_t fdb_nlmsg_size(void)
 {
 	return NLMSG_ALIGN(sizeof(struct ndmsg))
 		+ nla_total_size(ETH_ALEN) /* NDA_LLADDR */
+		+ nla_total_size(sizeof(u16)) /* NDA_VLAN */
 		+ nla_total_size(sizeof(struct nda_cacheinfo));
 }
 
@@ -617,6 +622,25 @@ static int fdb_add_entry(struct net_bridge_port *source, const __u8 *addr,
 	return 0;
 }
 
+static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge_port *p,
+	       const unsigned char *addr, u16 nlh_flags, u16 vid)
+{
+	int err = 0;
+
+	if (ndm->ndm_flags & NTF_USE) {
+		rcu_read_lock();
+		br_fdb_update(p->br, p, addr, vid);
+		rcu_read_unlock();
+	} else {
+		spin_lock_bh(&p->br->hash_lock);
+		err = fdb_add_entry(p, addr, ndm->ndm_state,
+				    nlh_flags, vid);
+		spin_unlock_bh(&p->br->hash_lock);
+	}
+
+	return err;
+}
+
 /* Add new permanent fdb entry with RTM_NEWNEIGH */
 int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 	       struct net_device *dev,
@@ -624,12 +648,29 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 {
 	struct net_bridge_port *p;
 	int err = 0;
+	struct net_port_vlans *pv;
+	unsigned short vid = VLAN_N_VID;
 
 	if (!(ndm->ndm_state & (NUD_PERMANENT|NUD_NOARP|NUD_REACHABLE))) {
 		pr_info("bridge: RTM_NEWNEIGH with invalid state %#x\n", ndm->ndm_state);
 		return -EINVAL;
 	}
 
+	if (tb[NDA_VLAN]) {
+		if (nla_len(tb[NDA_VLAN]) != sizeof(unsigned short)) {
+			pr_info("bridge: RTM_NEWNEIGH with invalid vlan\n");
+			return -EINVAL;
+		}
+
+		vid = nla_get_u16(tb[NDA_VLAN]);
+
+		if (vid >= VLAN_N_VID) {
+			pr_info("bridge: RTM_NEWNEIGH with invalid vlan id %d\n",
+				vid);
+			return -EINVAL;
+		}
+	}
+
 	p = br_port_get_rtnl(dev);
 	if (p == NULL) {
 		pr_info("bridge: RTM_NEWNEIGH %s not a bridge port\n",
@@ -637,41 +678,90 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 		return -EINVAL;
 	}
 
-	if (ndm->ndm_flags & NTF_USE) {
-		rcu_read_lock();
-		br_fdb_update(p->br, p, addr, 0);
-		rcu_read_unlock();
+	pv = nbp_get_vlan_info(p);
+	if (vid != VLAN_N_VID) {
+		if (!pv || !test_bit(vid, pv->vlan_bitmap)) {
+			pr_info("bridge: RTM_NEWNEIGH with unconfigured "
+				"vlan %d on port %s\n", vid, dev->name);
+			return -EINVAL;
+		}
+
+		/* VID was specified, so use it. */
+		err = __br_fdb_add(ndm, p, addr, nlh_flags, vid);
 	} else {
-		spin_lock_bh(&p->br->hash_lock);
-		err = fdb_add_entry(p, addr, ndm->ndm_state, nlh_flags,
-				0);
-		spin_unlock_bh(&p->br->hash_lock);
+		if (!pv || bitmap_empty(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN)) {
+			err = __br_fdb_add(ndm, p, addr, nlh_flags, 0);
+			goto out;
+		}
+
+		/* We have vlans configured on this port and user didn't
+		 * specify a VLAN.  To be nice, add/update entry for every
+		 * vlan on this port.
+		 */
+		vid = find_first_bit(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN);
+		while (vid < BR_VLAN_BITMAP_LEN) {
+			err = __br_fdb_add(ndm, p, addr, nlh_flags, vid);
+			if (err)
+				goto out;
+			vid = find_next_bit(pv->vlan_bitmap,
+					    BR_VLAN_BITMAP_LEN, vid+1);
+		}
 	}
 
+out:
 	return err;
 }
 
-static int fdb_delete_by_addr(struct net_bridge_port *p, const u8 *addr)
+static int fdb_delete_by_addr(struct net_bridge *br, const u8 *addr,
+			      u16 vlan)
 {
-	struct net_bridge *br = p->br;
-	struct hlist_head *head = &br->hash[br_mac_hash(addr, 0)];
+	struct hlist_head *head = &br->hash[br_mac_hash(addr, vlan)];
 	struct net_bridge_fdb_entry *fdb;
 
-	fdb = fdb_find(head, addr, 0);
+	fdb = fdb_find(head, addr, vlan);
 	if (!fdb)
 		return -ENOENT;
 
-	fdb_delete(p->br, fdb);
+	fdb_delete(br, fdb);
 	return 0;
 }
 
+static int __br_fdb_delete(struct net_bridge_port *p,
+			   const unsigned char *addr, u16 vid)
+{
+	int err;
+
+	spin_lock_bh(&p->br->hash_lock);
+	err = fdb_delete_by_addr(p->br, addr, vid);
+	spin_unlock_bh(&p->br->hash_lock);
+
+	return err;
+}
+
 /* Remove neighbor entry with RTM_DELNEIGH */
-int br_fdb_delete(struct ndmsg *ndm, struct net_device *dev,
+int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
+		  struct net_device *dev,
 		  const unsigned char *addr)
 {
 	struct net_bridge_port *p;
 	int err;
+	struct net_port_vlans *pv;
+	unsigned short vid = VLAN_N_VID;
 
+	if (tb[NDA_VLAN]) {
+		if (nla_len(tb[NDA_VLAN]) != sizeof(unsigned short)) {
+			pr_info("bridge: RTM_NEWNEIGH with invalid vlan\n");
+			return -EINVAL;
+		}
+
+		vid = nla_get_u16(tb[NDA_VLAN]);
+
+		if (vid >= VLAN_N_VID) {
+			pr_info("bridge: RTM_NEWNEIGH with invalid vlan id %d\n",
+				vid);
+			return -EINVAL;
+		}
+	}
 	p = br_port_get_rtnl(dev);
 	if (p == NULL) {
 		pr_info("bridge: RTM_DELNEIGH %s not a bridge port\n",
@@ -679,9 +769,33 @@ int br_fdb_delete(struct ndmsg *ndm, struct net_device *dev,
 		return -EINVAL;
 	}
 
-	spin_lock_bh(&p->br->hash_lock);
-	err = fdb_delete_by_addr(p, addr);
-	spin_unlock_bh(&p->br->hash_lock);
+	pv = nbp_get_vlan_info(p);
+	if (vid != VLAN_N_VID) {
+		if (!pv || !test_bit(vid, pv->vlan_bitmap)) {
+			pr_info("bridge: RTM_DELNEIGH with unconfigured "
+				"vlan %d on port %s\n", vid, dev->name);
+			return -EINVAL;
+		}
 
+		err = __br_fdb_delete(p, addr, vid);
+	} else {
+		if (!pv || bitmap_empty(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN)) {
+			err = __br_fdb_delete(p, addr, 0);
+			goto out;
+		}
+
+		/* We have vlans configured on this port and user didn't
+		 * specify a VLAN.  To be nice, add/update entry for every
+		 * vlan on this port.
+		 */
+		err = -ENOENT;
+		vid = find_first_bit(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN);
+		while (vid < BR_VLAN_BITMAP_LEN) {
+			err &= __br_fdb_delete(p, addr, vid);
+			vid = find_next_bit(pv->vlan_bitmap,
+					    BR_VLAN_BITMAP_LEN, vid+1);
+		}
+	}
+out:
 	return err;
 }
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 22915c8e9961..799dbb37e5a2 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -388,7 +388,7 @@ extern void br_fdb_update(struct net_bridge *br,
 			  const unsigned char *addr,
 			  u16 vid);
 
-extern int br_fdb_delete(struct ndmsg *ndm,
+extern int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
 			 struct net_device *dev,
 			 const unsigned char *addr);
 extern int br_fdb_add(struct ndmsg *nlh, struct nlattr *tb[],
@@ -577,13 +577,13 @@ extern void nbp_vlan_flush(struct net_bridge_port *port);
 static inline struct net_port_vlans *br_get_vlan_info(
 						const struct net_bridge *br)
 {
-	return rcu_dereference(br->vlan_info);
+	return rcu_dereference_rtnl(br->vlan_info);
 }
 
 static inline struct net_port_vlans *nbp_get_vlan_info(
 						const struct net_bridge_port *p)
 {
-	return rcu_dereference(p->vlan_info);
+	return rcu_dereference_rtnl(p->vlan_info);
 }
 
 /* Since bridge now depends on 8021Q module, but the time bridge sees the
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index f3a112ec86d5..d8aa20f6a46e 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -2119,13 +2119,17 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 {
 	struct net *net = sock_net(skb->sk);
 	struct ndmsg *ndm;
-	struct nlattr *llattr;
+	struct nlattr *tb[NDA_MAX+1];
 	struct net_device *dev;
 	int err = -EINVAL;
 	__u8 *addr;
 
-	if (nlmsg_len(nlh) < sizeof(*ndm))
-		return -EINVAL;
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
+	err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
+	if (err < 0)
+		return err;
 
 	ndm = nlmsg_data(nlh);
 	if (ndm->ndm_ifindex == 0) {
@@ -2139,13 +2143,17 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 		return -ENODEV;
 	}
 
-	llattr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_LLADDR);
-	if (llattr == NULL || nla_len(llattr) != ETH_ALEN) {
-		pr_info("PF_BRIGDE: RTM_DELNEIGH with invalid address\n");
+	if (!tb[NDA_LLADDR] || nla_len(tb[NDA_LLADDR]) != ETH_ALEN) {
+		pr_info("PF_BRIDGE: RTM_DELNEIGH with invalid address\n");
+		return -EINVAL;
+	}
+
+	addr = nla_data(tb[NDA_LLADDR]);
+	if (!is_valid_ether_addr(addr)) {
+		pr_info("PF_BRIDGE: RTM_DELNEIGH with invalid ether address\n");
 		return -EINVAL;
 	}
 
-	addr = nla_data(llattr);
 	err = -EOPNOTSUPP;
 
 	/* Support fdb on master device the net/bridge default case */
@@ -2155,7 +2163,7 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 		const struct net_device_ops *ops = br_dev->netdev_ops;
 
 		if (ops->ndo_fdb_del)
-			err = ops->ndo_fdb_del(ndm, dev, addr);
+			err = ops->ndo_fdb_del(ndm, tb, dev, addr);
 
 		if (err)
 			goto out;
@@ -2165,7 +2173,7 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 
 	/* Embedded bridge, macvlan, and any other device support */
 	if ((ndm->ndm_flags & NTF_SELF) && dev->netdev_ops->ndo_fdb_del) {
-		err = dev->netdev_ops->ndo_fdb_del(ndm, dev, addr);
+		err = dev->netdev_ops->ndo_fdb_del(ndm, tb, dev, addr);
 
 		if (!err) {
 			rtnl_fdb_notify(dev, addr, RTM_DELNEIGH);
-- 
cgit v1.2.3


From 112202d9098aae2c36436e5178c0cf3ced423c7b Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 13 Feb 2013 19:29:12 -0800
Subject: workqueue: rename cpu_workqueue to pool_workqueue

workqueue has moved away from global_cwqs to worker_pools and with the
scheduled custom worker pools, wforkqueues will be associated with
pools which don't have anything to do with CPUs.  The workqueue code
went through significant amount of changes recently and mass renaming
isn't likely to hurt much additionally.  Let's replace 'cpu' with
'pool' so that it reflects the current design.

* s/struct cpu_workqueue_struct/struct pool_workqueue/
* s/cpu_wq/pool_wq/
* s/cwq/pwq/

This patch is purely cosmetic.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h        |  12 +-
 include/trace/events/workqueue.h |  10 +-
 kernel/workqueue.c               | 433 +++++++++++++++++++--------------------
 kernel/workqueue_internal.h      |   2 +-
 4 files changed, 228 insertions(+), 229 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index a3d7556510c3..8afab27cdbc2 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -27,7 +27,7 @@ void delayed_work_timer_fn(unsigned long __data);
 enum {
 	WORK_STRUCT_PENDING_BIT	= 0,	/* work item is pending execution */
 	WORK_STRUCT_DELAYED_BIT	= 1,	/* work item is delayed */
-	WORK_STRUCT_CWQ_BIT	= 2,	/* data points to cwq */
+	WORK_STRUCT_PWQ_BIT	= 2,	/* data points to pwq */
 	WORK_STRUCT_LINKED_BIT	= 3,	/* next work is linked to this one */
 #ifdef CONFIG_DEBUG_OBJECTS_WORK
 	WORK_STRUCT_STATIC_BIT	= 4,	/* static initializer (debugobjects) */
@@ -40,7 +40,7 @@ enum {
 
 	WORK_STRUCT_PENDING	= 1 << WORK_STRUCT_PENDING_BIT,
 	WORK_STRUCT_DELAYED	= 1 << WORK_STRUCT_DELAYED_BIT,
-	WORK_STRUCT_CWQ		= 1 << WORK_STRUCT_CWQ_BIT,
+	WORK_STRUCT_PWQ		= 1 << WORK_STRUCT_PWQ_BIT,
 	WORK_STRUCT_LINKED	= 1 << WORK_STRUCT_LINKED_BIT,
 #ifdef CONFIG_DEBUG_OBJECTS_WORK
 	WORK_STRUCT_STATIC	= 1 << WORK_STRUCT_STATIC_BIT,
@@ -60,14 +60,14 @@ enum {
 	WORK_CPU_END		= NR_CPUS + 1,
 
 	/*
-	 * Reserve 7 bits off of cwq pointer w/ debugobjects turned
-	 * off.  This makes cwqs aligned to 256 bytes and allows 15
-	 * workqueue flush colors.
+	 * Reserve 7 bits off of pwq pointer w/ debugobjects turned off.
+	 * This makes pwqs aligned to 256 bytes and allows 15 workqueue
+	 * flush colors.
 	 */
 	WORK_STRUCT_FLAG_BITS	= WORK_STRUCT_COLOR_SHIFT +
 				  WORK_STRUCT_COLOR_BITS,
 
-	/* data contains off-queue information when !WORK_STRUCT_CWQ */
+	/* data contains off-queue information when !WORK_STRUCT_PWQ */
 	WORK_OFFQ_FLAG_BASE	= WORK_STRUCT_FLAG_BITS,
 
 	WORK_OFFQ_CANCELING	= (1 << WORK_OFFQ_FLAG_BASE),
diff --git a/include/trace/events/workqueue.h b/include/trace/events/workqueue.h
index 4e798e384a6a..bf0e18ba6cfb 100644
--- a/include/trace/events/workqueue.h
+++ b/include/trace/events/workqueue.h
@@ -27,7 +27,7 @@ DECLARE_EVENT_CLASS(workqueue_work,
 /**
  * workqueue_queue_work - called when a work gets queued
  * @req_cpu:	the requested cpu
- * @cwq:	pointer to struct cpu_workqueue_struct
+ * @pwq:	pointer to struct pool_workqueue
  * @work:	pointer to struct work_struct
  *
  * This event occurs when a work is queued immediately or once a
@@ -36,10 +36,10 @@ DECLARE_EVENT_CLASS(workqueue_work,
  */
 TRACE_EVENT(workqueue_queue_work,
 
-	TP_PROTO(unsigned int req_cpu, struct cpu_workqueue_struct *cwq,
+	TP_PROTO(unsigned int req_cpu, struct pool_workqueue *pwq,
 		 struct work_struct *work),
 
-	TP_ARGS(req_cpu, cwq, work),
+	TP_ARGS(req_cpu, pwq, work),
 
 	TP_STRUCT__entry(
 		__field( void *,	work	)
@@ -52,9 +52,9 @@ TRACE_EVENT(workqueue_queue_work,
 	TP_fast_assign(
 		__entry->work		= work;
 		__entry->function	= work->func;
-		__entry->workqueue	= cwq->wq;
+		__entry->workqueue	= pwq->wq;
 		__entry->req_cpu	= req_cpu;
-		__entry->cpu		= cwq->pool->cpu;
+		__entry->cpu		= pwq->pool->cpu;
 	),
 
 	TP_printk("work struct=%p function=%pf workqueue=%p req_cpu=%u cpu=%u",
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index ea7f696f1060..0f1a264d0f22 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -154,11 +154,12 @@ struct worker_pool {
 } ____cacheline_aligned_in_smp;
 
 /*
- * The per-CPU workqueue.  The lower WORK_STRUCT_FLAG_BITS of
- * work_struct->data are used for flags and thus cwqs need to be
- * aligned at two's power of the number of flag bits.
+ * The per-pool workqueue.  While queued, the lower WORK_STRUCT_FLAG_BITS
+ * of work_struct->data are used for flags and the remaining high bits
+ * point to the pwq; thus, pwqs need to be aligned at two's power of the
+ * number of flag bits.
  */
-struct cpu_workqueue_struct {
+struct pool_workqueue {
 	struct worker_pool	*pool;		/* I: the associated pool */
 	struct workqueue_struct *wq;		/* I: the owning workqueue */
 	int			work_color;	/* L: current color */
@@ -207,16 +208,16 @@ typedef unsigned long mayday_mask_t;
 struct workqueue_struct {
 	unsigned int		flags;		/* W: WQ_* flags */
 	union {
-		struct cpu_workqueue_struct __percpu	*pcpu;
-		struct cpu_workqueue_struct		*single;
+		struct pool_workqueue __percpu		*pcpu;
+		struct pool_workqueue			*single;
 		unsigned long				v;
-	} cpu_wq;				/* I: cwq's */
+	} pool_wq;				/* I: pwq's */
 	struct list_head	list;		/* W: list of all workqueues */
 
 	struct mutex		flush_mutex;	/* protects wq flushing */
 	int			work_color;	/* F: current work color */
 	int			flush_color;	/* F: current flush color */
-	atomic_t		nr_cwqs_to_flush; /* flush in progress */
+	atomic_t		nr_pwqs_to_flush; /* flush in progress */
 	struct wq_flusher	*first_flusher;	/* F: first flusher */
 	struct list_head	flusher_queue;	/* F: flush waiters */
 	struct list_head	flusher_overflow; /* F: flush overflow list */
@@ -225,7 +226,7 @@ struct workqueue_struct {
 	struct worker		*rescuer;	/* I: rescue worker */
 
 	int			nr_drainers;	/* W: drain in progress */
-	int			saved_max_active; /* W: saved cwq max_active */
+	int			saved_max_active; /* W: saved pwq max_active */
 #ifdef CONFIG_LOCKDEP
 	struct lockdep_map	lockdep_map;
 #endif
@@ -268,7 +269,7 @@ static inline int __next_wq_cpu(int cpu, const struct cpumask *mask,
 	return WORK_CPU_END;
 }
 
-static inline int __next_cwq_cpu(int cpu, const struct cpumask *mask,
+static inline int __next_pwq_cpu(int cpu, const struct cpumask *mask,
 				 struct workqueue_struct *wq)
 {
 	return __next_wq_cpu(cpu, mask, !(wq->flags & WQ_UNBOUND) ? 1 : 2);
@@ -284,7 +285,7 @@ static inline int __next_cwq_cpu(int cpu, const struct cpumask *mask,
  *
  * for_each_wq_cpu()		: possible CPUs + WORK_CPU_UNBOUND
  * for_each_online_wq_cpu()	: online CPUs + WORK_CPU_UNBOUND
- * for_each_cwq_cpu()		: possible CPUs for bound workqueues,
+ * for_each_pwq_cpu()		: possible CPUs for bound workqueues,
  *				  WORK_CPU_UNBOUND for unbound workqueues
  */
 #define for_each_wq_cpu(cpu)						\
@@ -297,10 +298,10 @@ static inline int __next_cwq_cpu(int cpu, const struct cpumask *mask,
 	     (cpu) < WORK_CPU_END;					\
 	     (cpu) = __next_wq_cpu((cpu), cpu_online_mask, 3))
 
-#define for_each_cwq_cpu(cpu, wq)					\
-	for ((cpu) = __next_cwq_cpu(-1, cpu_possible_mask, (wq));	\
+#define for_each_pwq_cpu(cpu, wq)					\
+	for ((cpu) = __next_pwq_cpu(-1, cpu_possible_mask, (wq));	\
 	     (cpu) < WORK_CPU_END;					\
-	     (cpu) = __next_cwq_cpu((cpu), cpu_possible_mask, (wq)))
+	     (cpu) = __next_pwq_cpu((cpu), cpu_possible_mask, (wq)))
 
 #ifdef CONFIG_DEBUG_OBJECTS_WORK
 
@@ -479,14 +480,14 @@ static struct worker_pool *get_std_worker_pool(int cpu, bool highpri)
 	return &pools[highpri];
 }
 
-static struct cpu_workqueue_struct *get_cwq(unsigned int cpu,
-					    struct workqueue_struct *wq)
+static struct pool_workqueue *get_pwq(unsigned int cpu,
+				      struct workqueue_struct *wq)
 {
 	if (!(wq->flags & WQ_UNBOUND)) {
 		if (likely(cpu < nr_cpu_ids))
-			return per_cpu_ptr(wq->cpu_wq.pcpu, cpu);
+			return per_cpu_ptr(wq->pool_wq.pcpu, cpu);
 	} else if (likely(cpu == WORK_CPU_UNBOUND))
-		return wq->cpu_wq.single;
+		return wq->pool_wq.single;
 	return NULL;
 }
 
@@ -507,18 +508,18 @@ static int work_next_color(int color)
 }
 
 /*
- * While queued, %WORK_STRUCT_CWQ is set and non flag bits of a work's data
- * contain the pointer to the queued cwq.  Once execution starts, the flag
+ * While queued, %WORK_STRUCT_PWQ is set and non flag bits of a work's data
+ * contain the pointer to the queued pwq.  Once execution starts, the flag
  * is cleared and the high bits contain OFFQ flags and pool ID.
  *
- * set_work_cwq(), set_work_pool_and_clear_pending(), mark_work_canceling()
- * and clear_work_data() can be used to set the cwq, pool or clear
+ * set_work_pwq(), set_work_pool_and_clear_pending(), mark_work_canceling()
+ * and clear_work_data() can be used to set the pwq, pool or clear
  * work->data.  These functions should only be called while the work is
  * owned - ie. while the PENDING bit is set.
  *
- * get_work_pool() and get_work_cwq() can be used to obtain the pool or cwq
+ * get_work_pool() and get_work_pwq() can be used to obtain the pool or pwq
  * corresponding to a work.  Pool is available once the work has been
- * queued anywhere after initialization until it is sync canceled.  cwq is
+ * queued anywhere after initialization until it is sync canceled.  pwq is
  * available only while the work item is queued.
  *
  * %WORK_OFFQ_CANCELING is used to mark a work item which is being
@@ -533,12 +534,11 @@ static inline void set_work_data(struct work_struct *work, unsigned long data,
 	atomic_long_set(&work->data, data | flags | work_static(work));
 }
 
-static void set_work_cwq(struct work_struct *work,
-			 struct cpu_workqueue_struct *cwq,
+static void set_work_pwq(struct work_struct *work, struct pool_workqueue *pwq,
 			 unsigned long extra_flags)
 {
-	set_work_data(work, (unsigned long)cwq,
-		      WORK_STRUCT_PENDING | WORK_STRUCT_CWQ | extra_flags);
+	set_work_data(work, (unsigned long)pwq,
+		      WORK_STRUCT_PENDING | WORK_STRUCT_PWQ | extra_flags);
 }
 
 static void set_work_pool_and_keep_pending(struct work_struct *work,
@@ -567,11 +567,11 @@ static void clear_work_data(struct work_struct *work)
 	set_work_data(work, WORK_STRUCT_NO_POOL, 0);
 }
 
-static struct cpu_workqueue_struct *get_work_cwq(struct work_struct *work)
+static struct pool_workqueue *get_work_pwq(struct work_struct *work)
 {
 	unsigned long data = atomic_long_read(&work->data);
 
-	if (data & WORK_STRUCT_CWQ)
+	if (data & WORK_STRUCT_PWQ)
 		return (void *)(data & WORK_STRUCT_WQ_DATA_MASK);
 	else
 		return NULL;
@@ -589,8 +589,8 @@ static struct worker_pool *get_work_pool(struct work_struct *work)
 	struct worker_pool *pool;
 	int pool_id;
 
-	if (data & WORK_STRUCT_CWQ)
-		return ((struct cpu_workqueue_struct *)
+	if (data & WORK_STRUCT_PWQ)
+		return ((struct pool_workqueue *)
 			(data & WORK_STRUCT_WQ_DATA_MASK))->pool;
 
 	pool_id = data >> WORK_OFFQ_POOL_SHIFT;
@@ -613,8 +613,8 @@ static int get_work_pool_id(struct work_struct *work)
 {
 	unsigned long data = atomic_long_read(&work->data);
 
-	if (data & WORK_STRUCT_CWQ)
-		return ((struct cpu_workqueue_struct *)
+	if (data & WORK_STRUCT_PWQ)
+		return ((struct pool_workqueue *)
 			(data & WORK_STRUCT_WQ_DATA_MASK))->pool->id;
 
 	return data >> WORK_OFFQ_POOL_SHIFT;
@@ -632,7 +632,7 @@ static bool work_is_canceling(struct work_struct *work)
 {
 	unsigned long data = atomic_long_read(&work->data);
 
-	return !(data & WORK_STRUCT_CWQ) && (data & WORK_OFFQ_CANCELING);
+	return !(data & WORK_STRUCT_PWQ) && (data & WORK_OFFQ_CANCELING);
 }
 
 /*
@@ -961,67 +961,67 @@ static void move_linked_works(struct work_struct *work, struct list_head *head,
 		*nextp = n;
 }
 
-static void cwq_activate_delayed_work(struct work_struct *work)
+static void pwq_activate_delayed_work(struct work_struct *work)
 {
-	struct cpu_workqueue_struct *cwq = get_work_cwq(work);
+	struct pool_workqueue *pwq = get_work_pwq(work);
 
 	trace_workqueue_activate_work(work);
-	move_linked_works(work, &cwq->pool->worklist, NULL);
+	move_linked_works(work, &pwq->pool->worklist, NULL);
 	__clear_bit(WORK_STRUCT_DELAYED_BIT, work_data_bits(work));
-	cwq->nr_active++;
+	pwq->nr_active++;
 }
 
-static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq)
+static void pwq_activate_first_delayed(struct pool_workqueue *pwq)
 {
-	struct work_struct *work = list_first_entry(&cwq->delayed_works,
+	struct work_struct *work = list_first_entry(&pwq->delayed_works,
 						    struct work_struct, entry);
 
-	cwq_activate_delayed_work(work);
+	pwq_activate_delayed_work(work);
 }
 
 /**
- * cwq_dec_nr_in_flight - decrement cwq's nr_in_flight
- * @cwq: cwq of interest
+ * pwq_dec_nr_in_flight - decrement pwq's nr_in_flight
+ * @pwq: pwq of interest
  * @color: color of work which left the queue
  *
  * A work either has completed or is removed from pending queue,
- * decrement nr_in_flight of its cwq and handle workqueue flushing.
+ * decrement nr_in_flight of its pwq and handle workqueue flushing.
  *
  * CONTEXT:
  * spin_lock_irq(pool->lock).
  */
-static void cwq_dec_nr_in_flight(struct cpu_workqueue_struct *cwq, int color)
+static void pwq_dec_nr_in_flight(struct pool_workqueue *pwq, int color)
 {
 	/* ignore uncolored works */
 	if (color == WORK_NO_COLOR)
 		return;
 
-	cwq->nr_in_flight[color]--;
+	pwq->nr_in_flight[color]--;
 
-	cwq->nr_active--;
-	if (!list_empty(&cwq->delayed_works)) {
+	pwq->nr_active--;
+	if (!list_empty(&pwq->delayed_works)) {
 		/* one down, submit a delayed one */
-		if (cwq->nr_active < cwq->max_active)
-			cwq_activate_first_delayed(cwq);
+		if (pwq->nr_active < pwq->max_active)
+			pwq_activate_first_delayed(pwq);
 	}
 
 	/* is flush in progress and are we at the flushing tip? */
-	if (likely(cwq->flush_color != color))
+	if (likely(pwq->flush_color != color))
 		return;
 
 	/* are there still in-flight works? */
-	if (cwq->nr_in_flight[color])
+	if (pwq->nr_in_flight[color])
 		return;
 
-	/* this cwq is done, clear flush_color */
-	cwq->flush_color = -1;
+	/* this pwq is done, clear flush_color */
+	pwq->flush_color = -1;
 
 	/*
-	 * If this was the last cwq, wake up the first flusher.  It
+	 * If this was the last pwq, wake up the first flusher.  It
 	 * will handle the rest.
 	 */
-	if (atomic_dec_and_test(&cwq->wq->nr_cwqs_to_flush))
-		complete(&cwq->wq->first_flusher->done);
+	if (atomic_dec_and_test(&pwq->wq->nr_pwqs_to_flush))
+		complete(&pwq->wq->first_flusher->done);
 }
 
 /**
@@ -1053,7 +1053,7 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork,
 			       unsigned long *flags)
 {
 	struct worker_pool *pool;
-	struct cpu_workqueue_struct *cwq;
+	struct pool_workqueue *pwq;
 
 	local_irq_save(*flags);
 
@@ -1084,31 +1084,31 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork,
 
 	spin_lock(&pool->lock);
 	/*
-	 * work->data is guaranteed to point to cwq only while the work
-	 * item is queued on cwq->wq, and both updating work->data to point
-	 * to cwq on queueing and to pool on dequeueing are done under
-	 * cwq->pool->lock.  This in turn guarantees that, if work->data
-	 * points to cwq which is associated with a locked pool, the work
+	 * work->data is guaranteed to point to pwq only while the work
+	 * item is queued on pwq->wq, and both updating work->data to point
+	 * to pwq on queueing and to pool on dequeueing are done under
+	 * pwq->pool->lock.  This in turn guarantees that, if work->data
+	 * points to pwq which is associated with a locked pool, the work
 	 * item is currently queued on that pool.
 	 */
-	cwq = get_work_cwq(work);
-	if (cwq && cwq->pool == pool) {
+	pwq = get_work_pwq(work);
+	if (pwq && pwq->pool == pool) {
 		debug_work_deactivate(work);
 
 		/*
 		 * A delayed work item cannot be grabbed directly because
 		 * it might have linked NO_COLOR work items which, if left
-		 * on the delayed_list, will confuse cwq->nr_active
+		 * on the delayed_list, will confuse pwq->nr_active
 		 * management later on and cause stall.  Make sure the work
 		 * item is activated before grabbing.
 		 */
 		if (*work_data_bits(work) & WORK_STRUCT_DELAYED)
-			cwq_activate_delayed_work(work);
+			pwq_activate_delayed_work(work);
 
 		list_del_init(&work->entry);
-		cwq_dec_nr_in_flight(get_work_cwq(work), get_work_color(work));
+		pwq_dec_nr_in_flight(get_work_pwq(work), get_work_color(work));
 
-		/* work->data points to cwq iff queued, point to pool */
+		/* work->data points to pwq iff queued, point to pool */
 		set_work_pool_and_keep_pending(work, pool->id);
 
 		spin_unlock(&pool->lock);
@@ -1125,25 +1125,24 @@ fail:
 
 /**
  * insert_work - insert a work into a pool
- * @cwq: cwq @work belongs to
+ * @pwq: pwq @work belongs to
  * @work: work to insert
  * @head: insertion point
  * @extra_flags: extra WORK_STRUCT_* flags to set
  *
- * Insert @work which belongs to @cwq after @head.  @extra_flags is or'd to
+ * Insert @work which belongs to @pwq after @head.  @extra_flags is or'd to
  * work_struct flags.
  *
  * CONTEXT:
  * spin_lock_irq(pool->lock).
  */
-static void insert_work(struct cpu_workqueue_struct *cwq,
-			struct work_struct *work, struct list_head *head,
-			unsigned int extra_flags)
+static void insert_work(struct pool_workqueue *pwq, struct work_struct *work,
+			struct list_head *head, unsigned int extra_flags)
 {
-	struct worker_pool *pool = cwq->pool;
+	struct worker_pool *pool = pwq->pool;
 
 	/* we own @work, set data and link */
-	set_work_cwq(work, cwq, extra_flags);
+	set_work_pwq(work, pwq, extra_flags);
 	list_add_tail(&work->entry, head);
 
 	/*
@@ -1170,13 +1169,13 @@ static bool is_chained_work(struct workqueue_struct *wq)
 	 * Return %true iff I'm a worker execuing a work item on @wq.  If
 	 * I'm @worker, it's safe to dereference it without locking.
 	 */
-	return worker && worker->current_cwq->wq == wq;
+	return worker && worker->current_pwq->wq == wq;
 }
 
 static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
 			 struct work_struct *work)
 {
-	struct cpu_workqueue_struct *cwq;
+	struct pool_workqueue *pwq;
 	struct list_head *worklist;
 	unsigned int work_flags;
 	unsigned int req_cpu = cpu;
@@ -1196,7 +1195,7 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
 	    WARN_ON_ONCE(!is_chained_work(wq)))
 		return;
 
-	/* determine the cwq to use */
+	/* determine the pwq to use */
 	if (!(wq->flags & WQ_UNBOUND)) {
 		struct worker_pool *last_pool;
 
@@ -1209,54 +1208,54 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
 		 * work needs to be queued on that cpu to guarantee
 		 * non-reentrancy.
 		 */
-		cwq = get_cwq(cpu, wq);
+		pwq = get_pwq(cpu, wq);
 		last_pool = get_work_pool(work);
 
-		if (last_pool && last_pool != cwq->pool) {
+		if (last_pool && last_pool != pwq->pool) {
 			struct worker *worker;
 
 			spin_lock(&last_pool->lock);
 
 			worker = find_worker_executing_work(last_pool, work);
 
-			if (worker && worker->current_cwq->wq == wq) {
-				cwq = get_cwq(last_pool->cpu, wq);
+			if (worker && worker->current_pwq->wq == wq) {
+				pwq = get_pwq(last_pool->cpu, wq);
 			} else {
 				/* meh... not running there, queue here */
 				spin_unlock(&last_pool->lock);
-				spin_lock(&cwq->pool->lock);
+				spin_lock(&pwq->pool->lock);
 			}
 		} else {
-			spin_lock(&cwq->pool->lock);
+			spin_lock(&pwq->pool->lock);
 		}
 	} else {
-		cwq = get_cwq(WORK_CPU_UNBOUND, wq);
-		spin_lock(&cwq->pool->lock);
+		pwq = get_pwq(WORK_CPU_UNBOUND, wq);
+		spin_lock(&pwq->pool->lock);
 	}
 
-	/* cwq determined, queue */
-	trace_workqueue_queue_work(req_cpu, cwq, work);
+	/* pwq determined, queue */
+	trace_workqueue_queue_work(req_cpu, pwq, work);
 
 	if (WARN_ON(!list_empty(&work->entry))) {
-		spin_unlock(&cwq->pool->lock);
+		spin_unlock(&pwq->pool->lock);
 		return;
 	}
 
-	cwq->nr_in_flight[cwq->work_color]++;
-	work_flags = work_color_to_flags(cwq->work_color);
+	pwq->nr_in_flight[pwq->work_color]++;
+	work_flags = work_color_to_flags(pwq->work_color);
 
-	if (likely(cwq->nr_active < cwq->max_active)) {
+	if (likely(pwq->nr_active < pwq->max_active)) {
 		trace_workqueue_activate_work(work);
-		cwq->nr_active++;
-		worklist = &cwq->pool->worklist;
+		pwq->nr_active++;
+		worklist = &pwq->pool->worklist;
 	} else {
 		work_flags |= WORK_STRUCT_DELAYED;
-		worklist = &cwq->delayed_works;
+		worklist = &pwq->delayed_works;
 	}
 
-	insert_work(cwq, work, worklist, work_flags);
+	insert_work(pwq, work, worklist, work_flags);
 
-	spin_unlock(&cwq->pool->lock);
+	spin_unlock(&pwq->pool->lock);
 }
 
 /**
@@ -1661,14 +1660,14 @@ static void rebind_workers(struct worker_pool *pool)
 
 		/*
 		 * wq doesn't really matter but let's keep @worker->pool
-		 * and @cwq->pool consistent for sanity.
+		 * and @pwq->pool consistent for sanity.
 		 */
 		if (std_worker_pool_pri(worker->pool))
 			wq = system_highpri_wq;
 		else
 			wq = system_wq;
 
-		insert_work(get_cwq(pool->cpu, wq), rebind_work,
+		insert_work(get_pwq(pool->cpu, wq), rebind_work,
 			    worker->scheduled.next,
 			    work_color_to_flags(WORK_NO_COLOR));
 	}
@@ -1845,15 +1844,15 @@ static void idle_worker_timeout(unsigned long __pool)
 
 static bool send_mayday(struct work_struct *work)
 {
-	struct cpu_workqueue_struct *cwq = get_work_cwq(work);
-	struct workqueue_struct *wq = cwq->wq;
+	struct pool_workqueue *pwq = get_work_pwq(work);
+	struct workqueue_struct *wq = pwq->wq;
 	unsigned int cpu;
 
 	if (!(wq->flags & WQ_RESCUER))
 		return false;
 
 	/* mayday mayday mayday */
-	cpu = cwq->pool->cpu;
+	cpu = pwq->pool->cpu;
 	/* WORK_CPU_UNBOUND can't be set in cpumask, use cpu 0 instead */
 	if (cpu == WORK_CPU_UNBOUND)
 		cpu = 0;
@@ -2082,9 +2081,9 @@ static void process_one_work(struct worker *worker, struct work_struct *work)
 __releases(&pool->lock)
 __acquires(&pool->lock)
 {
-	struct cpu_workqueue_struct *cwq = get_work_cwq(work);
+	struct pool_workqueue *pwq = get_work_pwq(work);
 	struct worker_pool *pool = worker->pool;
-	bool cpu_intensive = cwq->wq->flags & WQ_CPU_INTENSIVE;
+	bool cpu_intensive = pwq->wq->flags & WQ_CPU_INTENSIVE;
 	int work_color;
 	struct worker *collision;
 #ifdef CONFIG_LOCKDEP
@@ -2125,7 +2124,7 @@ __acquires(&pool->lock)
 	hash_add(pool->busy_hash, &worker->hentry, (unsigned long)work);
 	worker->current_work = work;
 	worker->current_func = work->func;
-	worker->current_cwq = cwq;
+	worker->current_pwq = pwq;
 	work_color = get_work_color(work);
 
 	list_del_init(&work->entry);
@@ -2154,7 +2153,7 @@ __acquires(&pool->lock)
 
 	spin_unlock_irq(&pool->lock);
 
-	lock_map_acquire_read(&cwq->wq->lockdep_map);
+	lock_map_acquire_read(&pwq->wq->lockdep_map);
 	lock_map_acquire(&lockdep_map);
 	trace_workqueue_execute_start(work);
 	worker->current_func(work);
@@ -2164,7 +2163,7 @@ __acquires(&pool->lock)
 	 */
 	trace_workqueue_execute_end(work);
 	lock_map_release(&lockdep_map);
-	lock_map_release(&cwq->wq->lockdep_map);
+	lock_map_release(&pwq->wq->lockdep_map);
 
 	if (unlikely(in_atomic() || lockdep_depth(current) > 0)) {
 		pr_err("BUG: workqueue leaked lock or atomic: %s/0x%08x/%d\n"
@@ -2185,8 +2184,8 @@ __acquires(&pool->lock)
 	hash_del(&worker->hentry);
 	worker->current_work = NULL;
 	worker->current_func = NULL;
-	worker->current_cwq = NULL;
-	cwq_dec_nr_in_flight(cwq, work_color);
+	worker->current_pwq = NULL;
+	pwq_dec_nr_in_flight(pwq, work_color);
 }
 
 /**
@@ -2353,8 +2352,8 @@ repeat:
 	 */
 	for_each_mayday_cpu(cpu, wq->mayday_mask) {
 		unsigned int tcpu = is_unbound ? WORK_CPU_UNBOUND : cpu;
-		struct cpu_workqueue_struct *cwq = get_cwq(tcpu, wq);
-		struct worker_pool *pool = cwq->pool;
+		struct pool_workqueue *pwq = get_pwq(tcpu, wq);
+		struct worker_pool *pool = pwq->pool;
 		struct work_struct *work, *n;
 
 		__set_current_state(TASK_RUNNING);
@@ -2370,7 +2369,7 @@ repeat:
 		 */
 		BUG_ON(!list_empty(&rescuer->scheduled));
 		list_for_each_entry_safe(work, n, &pool->worklist, entry)
-			if (get_work_cwq(work) == cwq)
+			if (get_work_pwq(work) == pwq)
 				move_linked_works(work, scheduled, &n);
 
 		process_scheduled_works(rescuer);
@@ -2405,7 +2404,7 @@ static void wq_barrier_func(struct work_struct *work)
 
 /**
  * insert_wq_barrier - insert a barrier work
- * @cwq: cwq to insert barrier into
+ * @pwq: pwq to insert barrier into
  * @barr: wq_barrier to insert
  * @target: target work to attach @barr to
  * @worker: worker currently executing @target, NULL if @target is not executing
@@ -2422,12 +2421,12 @@ static void wq_barrier_func(struct work_struct *work)
  * after a work with LINKED flag set.
  *
  * Note that when @worker is non-NULL, @target may be modified
- * underneath us, so we can't reliably determine cwq from @target.
+ * underneath us, so we can't reliably determine pwq from @target.
  *
  * CONTEXT:
  * spin_lock_irq(pool->lock).
  */
-static void insert_wq_barrier(struct cpu_workqueue_struct *cwq,
+static void insert_wq_barrier(struct pool_workqueue *pwq,
 			      struct wq_barrier *barr,
 			      struct work_struct *target, struct worker *worker)
 {
@@ -2460,23 +2459,23 @@ static void insert_wq_barrier(struct cpu_workqueue_struct *cwq,
 	}
 
 	debug_work_activate(&barr->work);
-	insert_work(cwq, &barr->work, head,
+	insert_work(pwq, &barr->work, head,
 		    work_color_to_flags(WORK_NO_COLOR) | linked);
 }
 
 /**
- * flush_workqueue_prep_cwqs - prepare cwqs for workqueue flushing
+ * flush_workqueue_prep_pwqs - prepare pwqs for workqueue flushing
  * @wq: workqueue being flushed
  * @flush_color: new flush color, < 0 for no-op
  * @work_color: new work color, < 0 for no-op
  *
- * Prepare cwqs for workqueue flushing.
+ * Prepare pwqs for workqueue flushing.
  *
- * If @flush_color is non-negative, flush_color on all cwqs should be
- * -1.  If no cwq has in-flight commands at the specified color, all
- * cwq->flush_color's stay at -1 and %false is returned.  If any cwq
- * has in flight commands, its cwq->flush_color is set to
- * @flush_color, @wq->nr_cwqs_to_flush is updated accordingly, cwq
+ * If @flush_color is non-negative, flush_color on all pwqs should be
+ * -1.  If no pwq has in-flight commands at the specified color, all
+ * pwq->flush_color's stay at -1 and %false is returned.  If any pwq
+ * has in flight commands, its pwq->flush_color is set to
+ * @flush_color, @wq->nr_pwqs_to_flush is updated accordingly, pwq
  * wakeup logic is armed and %true is returned.
  *
  * The caller should have initialized @wq->first_flusher prior to
@@ -2484,7 +2483,7 @@ static void insert_wq_barrier(struct cpu_workqueue_struct *cwq,
  * @flush_color is negative, no flush color update is done and %false
  * is returned.
  *
- * If @work_color is non-negative, all cwqs should have the same
+ * If @work_color is non-negative, all pwqs should have the same
  * work_color which is previous to @work_color and all will be
  * advanced to @work_color.
  *
@@ -2495,42 +2494,42 @@ static void insert_wq_barrier(struct cpu_workqueue_struct *cwq,
  * %true if @flush_color >= 0 and there's something to flush.  %false
  * otherwise.
  */
-static bool flush_workqueue_prep_cwqs(struct workqueue_struct *wq,
+static bool flush_workqueue_prep_pwqs(struct workqueue_struct *wq,
 				      int flush_color, int work_color)
 {
 	bool wait = false;
 	unsigned int cpu;
 
 	if (flush_color >= 0) {
-		BUG_ON(atomic_read(&wq->nr_cwqs_to_flush));
-		atomic_set(&wq->nr_cwqs_to_flush, 1);
+		BUG_ON(atomic_read(&wq->nr_pwqs_to_flush));
+		atomic_set(&wq->nr_pwqs_to_flush, 1);
 	}
 
-	for_each_cwq_cpu(cpu, wq) {
-		struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
-		struct worker_pool *pool = cwq->pool;
+	for_each_pwq_cpu(cpu, wq) {
+		struct pool_workqueue *pwq = get_pwq(cpu, wq);
+		struct worker_pool *pool = pwq->pool;
 
 		spin_lock_irq(&pool->lock);
 
 		if (flush_color >= 0) {
-			BUG_ON(cwq->flush_color != -1);
+			BUG_ON(pwq->flush_color != -1);
 
-			if (cwq->nr_in_flight[flush_color]) {
-				cwq->flush_color = flush_color;
-				atomic_inc(&wq->nr_cwqs_to_flush);
+			if (pwq->nr_in_flight[flush_color]) {
+				pwq->flush_color = flush_color;
+				atomic_inc(&wq->nr_pwqs_to_flush);
 				wait = true;
 			}
 		}
 
 		if (work_color >= 0) {
-			BUG_ON(work_color != work_next_color(cwq->work_color));
-			cwq->work_color = work_color;
+			BUG_ON(work_color != work_next_color(pwq->work_color));
+			pwq->work_color = work_color;
 		}
 
 		spin_unlock_irq(&pool->lock);
 	}
 
-	if (flush_color >= 0 && atomic_dec_and_test(&wq->nr_cwqs_to_flush))
+	if (flush_color >= 0 && atomic_dec_and_test(&wq->nr_pwqs_to_flush))
 		complete(&wq->first_flusher->done);
 
 	return wait;
@@ -2581,7 +2580,7 @@ void flush_workqueue(struct workqueue_struct *wq)
 
 			wq->first_flusher = &this_flusher;
 
-			if (!flush_workqueue_prep_cwqs(wq, wq->flush_color,
+			if (!flush_workqueue_prep_pwqs(wq, wq->flush_color,
 						       wq->work_color)) {
 				/* nothing to flush, done */
 				wq->flush_color = next_color;
@@ -2592,7 +2591,7 @@ void flush_workqueue(struct workqueue_struct *wq)
 			/* wait in queue */
 			BUG_ON(wq->flush_color == this_flusher.flush_color);
 			list_add_tail(&this_flusher.list, &wq->flusher_queue);
-			flush_workqueue_prep_cwqs(wq, -1, wq->work_color);
+			flush_workqueue_prep_pwqs(wq, -1, wq->work_color);
 		}
 	} else {
 		/*
@@ -2659,7 +2658,7 @@ void flush_workqueue(struct workqueue_struct *wq)
 
 			list_splice_tail_init(&wq->flusher_overflow,
 					      &wq->flusher_queue);
-			flush_workqueue_prep_cwqs(wq, -1, wq->work_color);
+			flush_workqueue_prep_pwqs(wq, -1, wq->work_color);
 		}
 
 		if (list_empty(&wq->flusher_queue)) {
@@ -2669,7 +2668,7 @@ void flush_workqueue(struct workqueue_struct *wq)
 
 		/*
 		 * Need to flush more colors.  Make the next flusher
-		 * the new first flusher and arm cwqs.
+		 * the new first flusher and arm pwqs.
 		 */
 		BUG_ON(wq->flush_color == wq->work_color);
 		BUG_ON(wq->flush_color != next->flush_color);
@@ -2677,7 +2676,7 @@ void flush_workqueue(struct workqueue_struct *wq)
 		list_del_init(&next->list);
 		wq->first_flusher = next;
 
-		if (flush_workqueue_prep_cwqs(wq, wq->flush_color, -1))
+		if (flush_workqueue_prep_pwqs(wq, wq->flush_color, -1))
 			break;
 
 		/*
@@ -2720,13 +2719,13 @@ void drain_workqueue(struct workqueue_struct *wq)
 reflush:
 	flush_workqueue(wq);
 
-	for_each_cwq_cpu(cpu, wq) {
-		struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
+	for_each_pwq_cpu(cpu, wq) {
+		struct pool_workqueue *pwq = get_pwq(cpu, wq);
 		bool drained;
 
-		spin_lock_irq(&cwq->pool->lock);
-		drained = !cwq->nr_active && list_empty(&cwq->delayed_works);
-		spin_unlock_irq(&cwq->pool->lock);
+		spin_lock_irq(&pwq->pool->lock);
+		drained = !pwq->nr_active && list_empty(&pwq->delayed_works);
+		spin_unlock_irq(&pwq->pool->lock);
 
 		if (drained)
 			continue;
@@ -2749,7 +2748,7 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr)
 {
 	struct worker *worker = NULL;
 	struct worker_pool *pool;
-	struct cpu_workqueue_struct *cwq;
+	struct pool_workqueue *pwq;
 
 	might_sleep();
 	pool = get_work_pool(work);
@@ -2758,18 +2757,18 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr)
 
 	spin_lock_irq(&pool->lock);
 	/* see the comment in try_to_grab_pending() with the same code */
-	cwq = get_work_cwq(work);
-	if (cwq) {
-		if (unlikely(cwq->pool != pool))
+	pwq = get_work_pwq(work);
+	if (pwq) {
+		if (unlikely(pwq->pool != pool))
 			goto already_gone;
 	} else {
 		worker = find_worker_executing_work(pool, work);
 		if (!worker)
 			goto already_gone;
-		cwq = worker->current_cwq;
+		pwq = worker->current_pwq;
 	}
 
-	insert_wq_barrier(cwq, barr, work, worker);
+	insert_wq_barrier(pwq, barr, work, worker);
 	spin_unlock_irq(&pool->lock);
 
 	/*
@@ -2778,11 +2777,11 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr)
 	 * flusher is not running on the same workqueue by verifying write
 	 * access.
 	 */
-	if (cwq->wq->saved_max_active == 1 || cwq->wq->flags & WQ_RESCUER)
-		lock_map_acquire(&cwq->wq->lockdep_map);
+	if (pwq->wq->saved_max_active == 1 || pwq->wq->flags & WQ_RESCUER)
+		lock_map_acquire(&pwq->wq->lockdep_map);
 	else
-		lock_map_acquire_read(&cwq->wq->lockdep_map);
-	lock_map_release(&cwq->wq->lockdep_map);
+		lock_map_acquire_read(&pwq->wq->lockdep_map);
+	lock_map_release(&pwq->wq->lockdep_map);
 
 	return true;
 already_gone:
@@ -3092,46 +3091,46 @@ int keventd_up(void)
 	return system_wq != NULL;
 }
 
-static int alloc_cwqs(struct workqueue_struct *wq)
+static int alloc_pwqs(struct workqueue_struct *wq)
 {
 	/*
-	 * cwqs are forced aligned according to WORK_STRUCT_FLAG_BITS.
+	 * pwqs are forced aligned according to WORK_STRUCT_FLAG_BITS.
 	 * Make sure that the alignment isn't lower than that of
 	 * unsigned long long.
 	 */
-	const size_t size = sizeof(struct cpu_workqueue_struct);
+	const size_t size = sizeof(struct pool_workqueue);
 	const size_t align = max_t(size_t, 1 << WORK_STRUCT_FLAG_BITS,
 				   __alignof__(unsigned long long));
 
 	if (!(wq->flags & WQ_UNBOUND))
-		wq->cpu_wq.pcpu = __alloc_percpu(size, align);
+		wq->pool_wq.pcpu = __alloc_percpu(size, align);
 	else {
 		void *ptr;
 
 		/*
-		 * Allocate enough room to align cwq and put an extra
+		 * Allocate enough room to align pwq and put an extra
 		 * pointer at the end pointing back to the originally
 		 * allocated pointer which will be used for free.
 		 */
 		ptr = kzalloc(size + align + sizeof(void *), GFP_KERNEL);
 		if (ptr) {
-			wq->cpu_wq.single = PTR_ALIGN(ptr, align);
-			*(void **)(wq->cpu_wq.single + 1) = ptr;
+			wq->pool_wq.single = PTR_ALIGN(ptr, align);
+			*(void **)(wq->pool_wq.single + 1) = ptr;
 		}
 	}
 
 	/* just in case, make sure it's actually aligned */
-	BUG_ON(!IS_ALIGNED(wq->cpu_wq.v, align));
-	return wq->cpu_wq.v ? 0 : -ENOMEM;
+	BUG_ON(!IS_ALIGNED(wq->pool_wq.v, align));
+	return wq->pool_wq.v ? 0 : -ENOMEM;
 }
 
-static void free_cwqs(struct workqueue_struct *wq)
+static void free_pwqs(struct workqueue_struct *wq)
 {
 	if (!(wq->flags & WQ_UNBOUND))
-		free_percpu(wq->cpu_wq.pcpu);
-	else if (wq->cpu_wq.single) {
-		/* the pointer to free is stored right after the cwq */
-		kfree(*(void **)(wq->cpu_wq.single + 1));
+		free_percpu(wq->pool_wq.pcpu);
+	else if (wq->pool_wq.single) {
+		/* the pointer to free is stored right after the pwq */
+		kfree(*(void **)(wq->pool_wq.single + 1));
 	}
 }
 
@@ -3185,25 +3184,25 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
 	wq->flags = flags;
 	wq->saved_max_active = max_active;
 	mutex_init(&wq->flush_mutex);
-	atomic_set(&wq->nr_cwqs_to_flush, 0);
+	atomic_set(&wq->nr_pwqs_to_flush, 0);
 	INIT_LIST_HEAD(&wq->flusher_queue);
 	INIT_LIST_HEAD(&wq->flusher_overflow);
 
 	lockdep_init_map(&wq->lockdep_map, lock_name, key, 0);
 	INIT_LIST_HEAD(&wq->list);
 
-	if (alloc_cwqs(wq) < 0)
+	if (alloc_pwqs(wq) < 0)
 		goto err;
 
-	for_each_cwq_cpu(cpu, wq) {
-		struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
+	for_each_pwq_cpu(cpu, wq) {
+		struct pool_workqueue *pwq = get_pwq(cpu, wq);
 
-		BUG_ON((unsigned long)cwq & WORK_STRUCT_FLAG_MASK);
-		cwq->pool = get_std_worker_pool(cpu, flags & WQ_HIGHPRI);
-		cwq->wq = wq;
-		cwq->flush_color = -1;
-		cwq->max_active = max_active;
-		INIT_LIST_HEAD(&cwq->delayed_works);
+		BUG_ON((unsigned long)pwq & WORK_STRUCT_FLAG_MASK);
+		pwq->pool = get_std_worker_pool(cpu, flags & WQ_HIGHPRI);
+		pwq->wq = wq;
+		pwq->flush_color = -1;
+		pwq->max_active = max_active;
+		INIT_LIST_HEAD(&pwq->delayed_works);
 	}
 
 	if (flags & WQ_RESCUER) {
@@ -3234,8 +3233,8 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
 	spin_lock(&workqueue_lock);
 
 	if (workqueue_freezing && wq->flags & WQ_FREEZABLE)
-		for_each_cwq_cpu(cpu, wq)
-			get_cwq(cpu, wq)->max_active = 0;
+		for_each_pwq_cpu(cpu, wq)
+			get_pwq(cpu, wq)->max_active = 0;
 
 	list_add(&wq->list, &workqueues);
 
@@ -3244,7 +3243,7 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
 	return wq;
 err:
 	if (wq) {
-		free_cwqs(wq);
+		free_pwqs(wq);
 		free_mayday_mask(wq->mayday_mask);
 		kfree(wq->rescuer);
 		kfree(wq);
@@ -3275,14 +3274,14 @@ void destroy_workqueue(struct workqueue_struct *wq)
 	spin_unlock(&workqueue_lock);
 
 	/* sanity check */
-	for_each_cwq_cpu(cpu, wq) {
-		struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
+	for_each_pwq_cpu(cpu, wq) {
+		struct pool_workqueue *pwq = get_pwq(cpu, wq);
 		int i;
 
 		for (i = 0; i < WORK_NR_COLORS; i++)
-			BUG_ON(cwq->nr_in_flight[i]);
-		BUG_ON(cwq->nr_active);
-		BUG_ON(!list_empty(&cwq->delayed_works));
+			BUG_ON(pwq->nr_in_flight[i]);
+		BUG_ON(pwq->nr_active);
+		BUG_ON(!list_empty(&pwq->delayed_works));
 	}
 
 	if (wq->flags & WQ_RESCUER) {
@@ -3291,29 +3290,29 @@ void destroy_workqueue(struct workqueue_struct *wq)
 		kfree(wq->rescuer);
 	}
 
-	free_cwqs(wq);
+	free_pwqs(wq);
 	kfree(wq);
 }
 EXPORT_SYMBOL_GPL(destroy_workqueue);
 
 /**
- * cwq_set_max_active - adjust max_active of a cwq
- * @cwq: target cpu_workqueue_struct
+ * pwq_set_max_active - adjust max_active of a pwq
+ * @pwq: target pool_workqueue
  * @max_active: new max_active value.
  *
- * Set @cwq->max_active to @max_active and activate delayed works if
+ * Set @pwq->max_active to @max_active and activate delayed works if
  * increased.
  *
  * CONTEXT:
  * spin_lock_irq(pool->lock).
  */
-static void cwq_set_max_active(struct cpu_workqueue_struct *cwq, int max_active)
+static void pwq_set_max_active(struct pool_workqueue *pwq, int max_active)
 {
-	cwq->max_active = max_active;
+	pwq->max_active = max_active;
 
-	while (!list_empty(&cwq->delayed_works) &&
-	       cwq->nr_active < cwq->max_active)
-		cwq_activate_first_delayed(cwq);
+	while (!list_empty(&pwq->delayed_works) &&
+	       pwq->nr_active < pwq->max_active)
+		pwq_activate_first_delayed(pwq);
 }
 
 /**
@@ -3336,15 +3335,15 @@ void workqueue_set_max_active(struct workqueue_struct *wq, int max_active)
 
 	wq->saved_max_active = max_active;
 
-	for_each_cwq_cpu(cpu, wq) {
-		struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
-		struct worker_pool *pool = cwq->pool;
+	for_each_pwq_cpu(cpu, wq) {
+		struct pool_workqueue *pwq = get_pwq(cpu, wq);
+		struct worker_pool *pool = pwq->pool;
 
 		spin_lock_irq(&pool->lock);
 
 		if (!(wq->flags & WQ_FREEZABLE) ||
 		    !(pool->flags & POOL_FREEZING))
-			cwq_set_max_active(cwq, max_active);
+			pwq_set_max_active(pwq, max_active);
 
 		spin_unlock_irq(&pool->lock);
 	}
@@ -3367,9 +3366,9 @@ EXPORT_SYMBOL_GPL(workqueue_set_max_active);
  */
 bool workqueue_congested(unsigned int cpu, struct workqueue_struct *wq)
 {
-	struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
+	struct pool_workqueue *pwq = get_pwq(cpu, wq);
 
-	return !list_empty(&cwq->delayed_works);
+	return !list_empty(&pwq->delayed_works);
 }
 EXPORT_SYMBOL_GPL(workqueue_congested);
 
@@ -3408,7 +3407,7 @@ EXPORT_SYMBOL_GPL(work_busy);
  * CPU hotplug.
  *
  * There are two challenges in supporting CPU hotplug.  Firstly, there
- * are a lot of assumptions on strong associations among work, cwq and
+ * are a lot of assumptions on strong associations among work, pwq and
  * pool which make migrating pending and scheduled works very
  * difficult to implement without impacting hot paths.  Secondly,
  * worker pools serve mix of short, long and very long running works making
@@ -3612,11 +3611,11 @@ void freeze_workqueues_begin(void)
 			pool->flags |= POOL_FREEZING;
 
 			list_for_each_entry(wq, &workqueues, list) {
-				struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
+				struct pool_workqueue *pwq = get_pwq(cpu, wq);
 
-				if (cwq && cwq->pool == pool &&
+				if (pwq && pwq->pool == pool &&
 				    (wq->flags & WQ_FREEZABLE))
-					cwq->max_active = 0;
+					pwq->max_active = 0;
 			}
 
 			spin_unlock_irq(&pool->lock);
@@ -3655,13 +3654,13 @@ bool freeze_workqueues_busy(void)
 		 * to peek without lock.
 		 */
 		list_for_each_entry(wq, &workqueues, list) {
-			struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
+			struct pool_workqueue *pwq = get_pwq(cpu, wq);
 
-			if (!cwq || !(wq->flags & WQ_FREEZABLE))
+			if (!pwq || !(wq->flags & WQ_FREEZABLE))
 				continue;
 
-			BUG_ON(cwq->nr_active < 0);
-			if (cwq->nr_active) {
+			BUG_ON(pwq->nr_active < 0);
+			if (pwq->nr_active) {
 				busy = true;
 				goto out_unlock;
 			}
@@ -3701,14 +3700,14 @@ void thaw_workqueues(void)
 			pool->flags &= ~POOL_FREEZING;
 
 			list_for_each_entry(wq, &workqueues, list) {
-				struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
+				struct pool_workqueue *pwq = get_pwq(cpu, wq);
 
-				if (!cwq || cwq->pool != pool ||
+				if (!pwq || pwq->pool != pool ||
 				    !(wq->flags & WQ_FREEZABLE))
 					continue;
 
 				/* restore max_active and repopulate worklist */
-				cwq_set_max_active(cwq, wq->saved_max_active);
+				pwq_set_max_active(pwq, wq->saved_max_active);
 			}
 
 			wake_up_worker(pool);
diff --git a/kernel/workqueue_internal.h b/kernel/workqueue_internal.h
index 328be4a269aa..07650264ec15 100644
--- a/kernel/workqueue_internal.h
+++ b/kernel/workqueue_internal.h
@@ -28,7 +28,7 @@ struct worker {
 
 	struct work_struct	*current_work;	/* L: work being processed */
 	work_func_t		current_func;	/* L: current_work's fn */
-	struct cpu_workqueue_struct *current_cwq; /* L: current_work's cwq */
+	struct pool_workqueue	*current_pwq; /* L: current_work's pwq */
 	struct list_head	scheduled;	/* L: scheduled works */
 	struct task_struct	*task;		/* I: worker task */
 	struct worker_pool	*pool;		/* I: the associated pool */
-- 
cgit v1.2.3


From 7d7e499f7333f68b7e7f67d14b9c1480913b4afb Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 31 Jan 2013 12:11:12 +0000
Subject: smpboot: Allow selfparking per cpu threads

The stop machine threads are still killed when a cpu goes offline. The
reason is that the thread is used to bring the cpu down, so it can't
be parked along with the other per cpu threads.

Allow a per cpu thread to be excluded from automatic parking, so it
can park itself once it's done

Add a create callback function as well.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Paul McKenney <paulmck@linux.vnet.ibm.com>
Cc: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Cc: Arjan van de Veen <arjan@infradead.org>
Cc: Paul Turner <pjt@google.com>
Cc: Richard Weinberger <rw@linutronix.de>
Cc: Magnus Damm <magnus.damm@gmail.com>
Link: http://lkml.kernel.org/r/20130131120741.553993267@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/smpboot.h | 5 +++++
 kernel/smpboot.c        | 5 +++--
 2 files changed, 8 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/smpboot.h b/include/linux/smpboot.h
index e0106d8581d3..c65dee059913 100644
--- a/include/linux/smpboot.h
+++ b/include/linux/smpboot.h
@@ -14,6 +14,8 @@ struct smpboot_thread_data;
  * @thread_should_run:	Check whether the thread should run or not. Called with
  *			preemption disabled.
  * @thread_fn:		The associated thread function
+ * @create:		Optional setup function, called when the thread gets
+ *			created (Not called from the thread context)
  * @setup:		Optional setup function, called when the thread gets
  *			operational the first time
  * @cleanup:		Optional cleanup function, called when the thread
@@ -22,6 +24,7 @@ struct smpboot_thread_data;
  *			parked (cpu offline)
  * @unpark:		Optional unpark function, called when the thread is
  *			unparked (cpu online)
+ * @selfparking:	Thread is not parked by the park function.
  * @thread_comm:	The base name of the thread
  */
 struct smp_hotplug_thread {
@@ -29,10 +32,12 @@ struct smp_hotplug_thread {
 	struct list_head		list;
 	int				(*thread_should_run)(unsigned int cpu);
 	void				(*thread_fn)(unsigned int cpu);
+	void				(*create)(unsigned int cpu);
 	void				(*setup)(unsigned int cpu);
 	void				(*cleanup)(unsigned int cpu, bool online);
 	void				(*park)(unsigned int cpu);
 	void				(*unpark)(unsigned int cpu);
+	bool				selfparking;
 	const char			*thread_comm;
 };
 
diff --git a/kernel/smpboot.c b/kernel/smpboot.c
index d6c5fc054242..d4abac261779 100644
--- a/kernel/smpboot.c
+++ b/kernel/smpboot.c
@@ -183,9 +183,10 @@ __smpboot_create_thread(struct smp_hotplug_thread *ht, unsigned int cpu)
 		kfree(td);
 		return PTR_ERR(tsk);
 	}
-
 	get_task_struct(tsk);
 	*per_cpu_ptr(ht->store, cpu) = tsk;
+	if (ht->create)
+		ht->create(cpu);
 	return 0;
 }
 
@@ -225,7 +226,7 @@ static void smpboot_park_thread(struct smp_hotplug_thread *ht, unsigned int cpu)
 {
 	struct task_struct *tsk = *per_cpu_ptr(ht->store, cpu);
 
-	if (tsk)
+	if (tsk && !ht->selfparking)
 		kthread_park(tsk);
 }
 
-- 
cgit v1.2.3


From 878ec67b3ac528a2b6d44055f049cdbb9cfda30c Mon Sep 17 00:00:00 2001
From: Philipp Zabel <p.zabel@pengutronix.de>
Date: Thu, 14 Feb 2013 17:39:08 +0100
Subject: regmap: mmio: add register clock support

Some mmio devices have a dedicated interface clock that needs
to be enabled to access their registers. This patch optionally
enables a clock before accessing registers in the regmap_bus
callbacks.

I added (devm_)regmap_init_mmio_clk variants of the init
functions that have an added clk_id string parameter. This
is passed to clk_get to request the clock from the clk
framework.

Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 drivers/base/regmap/regmap-mmio.c | 79 ++++++++++++++++++++++++++++++++-------
 include/linux/regmap.h            | 47 ++++++++++++++++++++---
 2 files changed, 107 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/regmap/regmap-mmio.c b/drivers/base/regmap/regmap-mmio.c
index f05fc74dd84a..98745dd77e8c 100644
--- a/drivers/base/regmap/regmap-mmio.c
+++ b/drivers/base/regmap/regmap-mmio.c
@@ -16,6 +16,7 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include <linux/clk.h>
 #include <linux/err.h>
 #include <linux/init.h>
 #include <linux/io.h>
@@ -26,6 +27,7 @@
 struct regmap_mmio_context {
 	void __iomem *regs;
 	unsigned val_bytes;
+	struct clk *clk;
 };
 
 static int regmap_mmio_gather_write(void *context,
@@ -34,9 +36,16 @@ static int regmap_mmio_gather_write(void *context,
 {
 	struct regmap_mmio_context *ctx = context;
 	u32 offset;
+	int ret;
 
 	BUG_ON(reg_size != 4);
 
+	if (ctx->clk) {
+		ret = clk_enable(ctx->clk);
+		if (ret < 0)
+			return ret;
+	}
+
 	offset = *(u32 *)reg;
 
 	while (val_size) {
@@ -64,6 +73,9 @@ static int regmap_mmio_gather_write(void *context,
 		offset += ctx->val_bytes;
 	}
 
+	if (ctx->clk)
+		clk_disable(ctx->clk);
+
 	return 0;
 }
 
@@ -80,9 +92,16 @@ static int regmap_mmio_read(void *context,
 {
 	struct regmap_mmio_context *ctx = context;
 	u32 offset;
+	int ret;
 
 	BUG_ON(reg_size != 4);
 
+	if (ctx->clk) {
+		ret = clk_enable(ctx->clk);
+		if (ret < 0)
+			return ret;
+	}
+
 	offset = *(u32 *)reg;
 
 	while (val_size) {
@@ -110,11 +129,20 @@ static int regmap_mmio_read(void *context,
 		offset += ctx->val_bytes;
 	}
 
+	if (ctx->clk)
+		clk_disable(ctx->clk);
+
 	return 0;
 }
 
 static void regmap_mmio_free_context(void *context)
 {
+	struct regmap_mmio_context *ctx = context;
+
+	if (ctx->clk) {
+		clk_unprepare(ctx->clk);
+		clk_put(ctx->clk);
+	}
 	kfree(context);
 }
 
@@ -128,11 +156,14 @@ static struct regmap_bus regmap_mmio = {
 	.val_format_endian_default = REGMAP_ENDIAN_NATIVE,
 };
 
-static struct regmap_mmio_context *regmap_mmio_gen_context(void __iomem *regs,
+static struct regmap_mmio_context *regmap_mmio_gen_context(struct device *dev,
+					const char *clk_id,
+					void __iomem *regs,
 					const struct regmap_config *config)
 {
 	struct regmap_mmio_context *ctx;
 	int min_stride;
+	int ret;
 
 	if (config->reg_bits != 32)
 		return ERR_PTR(-EINVAL);
@@ -179,37 +210,59 @@ static struct regmap_mmio_context *regmap_mmio_gen_context(void __iomem *regs,
 	ctx->regs = regs;
 	ctx->val_bytes = config->val_bits / 8;
 
+	if (clk_id == NULL)
+		return ctx;
+
+	ctx->clk = clk_get(dev, clk_id);
+	if (IS_ERR(ctx->clk)) {
+		ret = PTR_ERR(ctx->clk);
+		goto err_free;
+	}
+
+	ret = clk_prepare(ctx->clk);
+	if (ret < 0) {
+		clk_put(ctx->clk);
+		goto err_free;
+	}
+
 	return ctx;
+
+err_free:
+	kfree(ctx);
+
+	return ERR_PTR(ret);
 }
 
 /**
- * regmap_init_mmio(): Initialise register map
+ * regmap_init_mmio_clk(): Initialise register map with register clock
  *
  * @dev: Device that will be interacted with
+ * @clk_id: register clock consumer ID
  * @regs: Pointer to memory-mapped IO region
  * @config: Configuration for register map
  *
  * The return value will be an ERR_PTR() on error or a valid pointer to
  * a struct regmap.
  */
-struct regmap *regmap_init_mmio(struct device *dev,
-				void __iomem *regs,
-				const struct regmap_config *config)
+struct regmap *regmap_init_mmio_clk(struct device *dev, const char *clk_id,
+				    void __iomem *regs,
+				    const struct regmap_config *config)
 {
 	struct regmap_mmio_context *ctx;
 
-	ctx = regmap_mmio_gen_context(regs, config);
+	ctx = regmap_mmio_gen_context(dev, clk_id, regs, config);
 	if (IS_ERR(ctx))
 		return ERR_CAST(ctx);
 
 	return regmap_init(dev, &regmap_mmio, ctx, config);
 }
-EXPORT_SYMBOL_GPL(regmap_init_mmio);
+EXPORT_SYMBOL_GPL(regmap_init_mmio_clk);
 
 /**
- * devm_regmap_init_mmio(): Initialise managed register map
+ * devm_regmap_init_mmio_clk(): Initialise managed register map with clock
  *
  * @dev: Device that will be interacted with
+ * @clk_id: register clock consumer ID
  * @regs: Pointer to memory-mapped IO region
  * @config: Configuration for register map
  *
@@ -217,18 +270,18 @@ EXPORT_SYMBOL_GPL(regmap_init_mmio);
  * to a struct regmap.  The regmap will be automatically freed by the
  * device management code.
  */
-struct regmap *devm_regmap_init_mmio(struct device *dev,
-				     void __iomem *regs,
-				     const struct regmap_config *config)
+struct regmap *devm_regmap_init_mmio_clk(struct device *dev, const char *clk_id,
+					 void __iomem *regs,
+					 const struct regmap_config *config)
 {
 	struct regmap_mmio_context *ctx;
 
-	ctx = regmap_mmio_gen_context(regs, config);
+	ctx = regmap_mmio_gen_context(dev, clk_id, regs, config);
 	if (IS_ERR(ctx))
 		return ERR_CAST(ctx);
 
 	return devm_regmap_init(dev, &regmap_mmio, ctx, config);
 }
-EXPORT_SYMBOL_GPL(devm_regmap_init_mmio);
+EXPORT_SYMBOL_GPL(devm_regmap_init_mmio_clk);
 
 MODULE_LICENSE("GPL v2");
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index b7e95bf942c9..f0480a3297e4 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -285,9 +285,9 @@ struct regmap *regmap_init_i2c(struct i2c_client *i2c,
 			       const struct regmap_config *config);
 struct regmap *regmap_init_spi(struct spi_device *dev,
 			       const struct regmap_config *config);
-struct regmap *regmap_init_mmio(struct device *dev,
-				void __iomem *regs,
-				const struct regmap_config *config);
+struct regmap *regmap_init_mmio_clk(struct device *dev, const char *clk_id,
+				    void __iomem *regs,
+				    const struct regmap_config *config);
 
 struct regmap *devm_regmap_init(struct device *dev,
 				const struct regmap_bus *bus,
@@ -297,9 +297,44 @@ struct regmap *devm_regmap_init_i2c(struct i2c_client *i2c,
 				    const struct regmap_config *config);
 struct regmap *devm_regmap_init_spi(struct spi_device *dev,
 				    const struct regmap_config *config);
-struct regmap *devm_regmap_init_mmio(struct device *dev,
-				     void __iomem *regs,
-				     const struct regmap_config *config);
+struct regmap *devm_regmap_init_mmio_clk(struct device *dev, const char *clk_id,
+					 void __iomem *regs,
+					 const struct regmap_config *config);
+
+/**
+ * regmap_init_mmio(): Initialise register map
+ *
+ * @dev: Device that will be interacted with
+ * @regs: Pointer to memory-mapped IO region
+ * @config: Configuration for register map
+ *
+ * The return value will be an ERR_PTR() on error or a valid pointer to
+ * a struct regmap.
+ */
+static inline struct regmap *regmap_init_mmio(struct device *dev,
+					void __iomem *regs,
+					const struct regmap_config *config)
+{
+	return regmap_init_mmio_clk(dev, NULL, regs, config);
+}
+
+/**
+ * devm_regmap_init_mmio(): Initialise managed register map
+ *
+ * @dev: Device that will be interacted with
+ * @regs: Pointer to memory-mapped IO region
+ * @config: Configuration for register map
+ *
+ * The return value will be an ERR_PTR() on error or a valid pointer
+ * to a struct regmap.  The regmap will be automatically freed by the
+ * device management code.
+ */
+static inline struct regmap *devm_regmap_init_mmio(struct device *dev,
+					void __iomem *regs,
+					const struct regmap_config *config)
+{
+	return devm_regmap_init_mmio_clk(dev, NULL, regs, config);
+}
 
 void regmap_exit(struct regmap *map);
 int regmap_reinit_cache(struct regmap *map,
-- 
cgit v1.2.3


From 7bf9b9a0f0372d45b581f00173505fb76a9c5d23 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 27 Dec 2012 18:45:41 +0100
Subject: wireless: define operating mode action frame

Define the action frame format, the VHT category
and its action types and the field format and EID
for operating mode notifications. The frame may
be used outside of VHT context as well, so don't
include "VHT" in the names.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 37 +++++++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 7e8a498efe6d..67c1a6c45837 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -714,6 +714,30 @@ enum ieee80211_ht_chanwidth_values {
 	IEEE80211_HT_CHANWIDTH_ANY = 1,
 };
 
+/**
+ * enum ieee80211_opmode_bits - VHT operating mode field bits
+ * @IEEE80211_OPMODE_NOTIF_CHANWIDTH_MASK: channel width mask
+ * @IEEE80211_OPMODE_NOTIF_CHANWIDTH_20MHZ: 20 MHz channel width
+ * @IEEE80211_OPMODE_NOTIF_CHANWIDTH_40MHZ: 40 MHz channel width
+ * @IEEE80211_OPMODE_NOTIF_CHANWIDTH_80MHZ: 80 MHz channel width
+ * @IEEE80211_OPMODE_NOTIF_CHANWIDTH_160MHZ: 160 MHz or 80+80 MHz channel width
+ * @IEEE80211_OPMODE_NOTIF_RX_NSS_MASK: number of spatial streams mask
+ *	(the NSS value is the value of this field + 1)
+ * @IEEE80211_OPMODE_NOTIF_RX_NSS_SHIFT: number of spatial streams shift
+ * @IEEE80211_OPMODE_NOTIF_RX_NSS_TYPE_BF: indicates streams in SU-MIMO PPDU
+ *	using a beamforming steering matrix
+ */
+enum ieee80211_vht_opmode_bits {
+	IEEE80211_OPMODE_NOTIF_CHANWIDTH_MASK	= 3,
+	IEEE80211_OPMODE_NOTIF_CHANWIDTH_20MHZ	= 0,
+	IEEE80211_OPMODE_NOTIF_CHANWIDTH_40MHZ	= 1,
+	IEEE80211_OPMODE_NOTIF_CHANWIDTH_80MHZ	= 2,
+	IEEE80211_OPMODE_NOTIF_CHANWIDTH_160MHZ	= 3,
+	IEEE80211_OPMODE_NOTIF_RX_NSS_MASK	= 0x70,
+	IEEE80211_OPMODE_NOTIF_RX_NSS_SHIFT	= 4,
+	IEEE80211_OPMODE_NOTIF_RX_NSS_TYPE_BF	= 0x80,
+};
+
 #define WLAN_SA_QUERY_TR_ID_LEN 2
 
 struct ieee80211_mgmt {
@@ -844,6 +868,10 @@ struct ieee80211_mgmt {
 					__le16 capability;
 					u8 variable[0];
 				} __packed tdls_discover_resp;
+				struct {
+					u8 action_code;
+					u8 operating_mode;
+				} __packed vht_opmode_notif;
 			} u;
 		} __packed action;
 	} u;
@@ -1598,6 +1626,7 @@ enum ieee80211_eid {
 
 	WLAN_EID_VHT_CAPABILITY = 191,
 	WLAN_EID_VHT_OPERATION = 192,
+	WLAN_EID_OPMODE_NOTIF = 199,
 
 	/* 802.11ad */
 	WLAN_EID_NON_TX_BSSID_CAP =  83,
@@ -1652,6 +1681,7 @@ enum ieee80211_category {
 	WLAN_CATEGORY_WMM = 17,
 	WLAN_CATEGORY_FST = 18,
 	WLAN_CATEGORY_UNPROT_DMG = 20,
+	WLAN_CATEGORY_VHT = 21,
 	WLAN_CATEGORY_VENDOR_SPECIFIC_PROTECTED = 126,
 	WLAN_CATEGORY_VENDOR_SPECIFIC = 127,
 };
@@ -1677,6 +1707,13 @@ enum ieee80211_ht_actioncode {
 	WLAN_HT_ACTION_ASEL_IDX_FEEDBACK = 7,
 };
 
+/* VHT action codes */
+enum ieee80211_vht_actioncode {
+	WLAN_VHT_ACTION_COMPRESSED_BF = 0,
+	WLAN_VHT_ACTION_GROUPID_MGMT = 1,
+	WLAN_VHT_ACTION_OPMODE_NOTIF = 2,
+};
+
 /* Self Protected Action codes */
 enum ieee80211_self_protected_actioncode {
 	WLAN_SP_RESERVED = 0,
-- 
cgit v1.2.3


From 0af83d3df5863224336a18c24a14fda542b712f5 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 27 Dec 2012 18:55:36 +0100
Subject: mac80211: handle VHT operating mode notification

Handle the operating mode notification action frame.
When the supported streams or the bandwidth change
let the driver and rate control algorithm know.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h  |  1 +
 include/net/mac80211.h     |  3 ++
 net/mac80211/ht.c          |  4 ++
 net/mac80211/ieee80211_i.h |  3 ++
 net/mac80211/rx.c          | 30 +++++++++++++++
 net/mac80211/sta_info.h    |  4 ++
 net/mac80211/vht.c         | 93 +++++++++++++++++++++++++++++++++++++++++-----
 7 files changed, 128 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 67c1a6c45837..12b5996533ec 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1301,6 +1301,7 @@ struct ieee80211_vht_operation {
 #define IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_11454			0x00000002
 #define IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ		0x00000004
 #define IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ	0x00000008
+#define IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK			0x0000000C
 #define IEEE80211_VHT_CAP_RXLDPC				0x00000010
 #define IEEE80211_VHT_CAP_SHORT_GI_80				0x00000020
 #define IEEE80211_VHT_CAP_SHORT_GI_160				0x00000040
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index a608ab9879b4..b7fb311e83f4 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -2118,11 +2118,14 @@ enum ieee80211_frame_release_type {
  * @IEEE80211_RC_SUPP_RATES_CHANGED: The supported rate set of this peer
  *	changed (in IBSS mode) due to discovering more information about
  *	the peer.
+ * @IEEE80211_RC_NSS_CHANGED: N_SS (number of spatial streams) was changed
+ *	by the peer
  */
 enum ieee80211_rate_control_changed {
 	IEEE80211_RC_BW_CHANGED		= BIT(0),
 	IEEE80211_RC_SMPS_CHANGED	= BIT(1),
 	IEEE80211_RC_SUPP_RATES_CHANGED	= BIT(2),
+	IEEE80211_RC_NSS_CHANGED	= BIT(3),
 };
 
 /**
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index a64b4f0d373f..797969bc26e1 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -212,6 +212,10 @@ bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata,
 		changed = true;
 	sta->sta.bandwidth = bw;
 
+	sta->cur_max_bandwidth =
+		ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 ?
+				IEEE80211_STA_RX_BW_40 : IEEE80211_STA_RX_BW_20;
+
 	return changed;
 }
 
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 3b13af4e6c49..4947c91c6c86 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1433,6 +1433,9 @@ void ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata,
 					 struct sta_info *sta);
 enum ieee80211_sta_rx_bandwidth ieee80211_sta_cur_vht_bw(struct sta_info *sta);
 void ieee80211_sta_set_rx_nss(struct sta_info *sta);
+void ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
+				 struct sta_info *sta, u8 opmode,
+				 enum ieee80211_band band);
 
 /* Spectrum management */
 void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata,
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 8a861a50b12f..1617e0bd4ca6 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2435,6 +2435,36 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 			goto invalid;
 		}
 
+		break;
+	case WLAN_CATEGORY_VHT:
+		if (sdata->vif.type != NL80211_IFTYPE_STATION &&
+		    sdata->vif.type != NL80211_IFTYPE_MESH_POINT &&
+		    sdata->vif.type != NL80211_IFTYPE_AP_VLAN &&
+		    sdata->vif.type != NL80211_IFTYPE_AP &&
+		    sdata->vif.type != NL80211_IFTYPE_ADHOC)
+			break;
+
+		/* verify action code is present */
+		if (len < IEEE80211_MIN_ACTION_SIZE + 1)
+			goto invalid;
+
+		switch (mgmt->u.action.u.vht_opmode_notif.action_code) {
+		case WLAN_VHT_ACTION_OPMODE_NOTIF: {
+			u8 opmode;
+
+			/* verify opmode is present */
+			if (len < IEEE80211_MIN_ACTION_SIZE + 2)
+				goto invalid;
+
+			opmode = mgmt->u.action.u.vht_opmode_notif.operating_mode;
+
+			ieee80211_vht_handle_opmode(rx->sdata, rx->sta,
+						    opmode, status->band);
+			goto handled;
+		}
+		default:
+			break;
+		}
 		break;
 	case WLAN_CATEGORY_BACK:
 		if (sdata->vif.type != NL80211_IFTYPE_STATION &&
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 03c42f8d39f0..63dfdb5e91da 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -297,6 +297,8 @@ struct sta_ampdu_mlme {
  * @sta_state: duplicates information about station state (for debug)
  * @beacon_loss_count: number of times beacon loss has triggered
  * @rcu_head: RCU head used for freeing this station struct
+ * @cur_max_bandwidth: maximum bandwidth to use for TX to the station,
+ *	taken from HT/VHT capabilities or VHT operating mode notification
  */
 struct sta_info {
 	/* General information, mostly static */
@@ -398,6 +400,8 @@ struct sta_info {
 	} debugfs;
 #endif
 
+	enum ieee80211_sta_rx_bandwidth cur_max_bandwidth;
+
 	unsigned int lost_packets;
 	unsigned int beacon_loss_count;
 
diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c
index 67436e3efbbd..0951f74e7ff5 100644
--- a/net/mac80211/vht.c
+++ b/net/mac80211/vht.c
@@ -10,6 +10,7 @@
 #include <linux/export.h>
 #include <net/mac80211.h>
 #include "ieee80211_i.h"
+#include "rate.h"
 
 
 void ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata,
@@ -39,6 +40,15 @@ void ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata,
 	memcpy(&vht_cap->vht_mcs, &vht_cap_ie->supp_mcs,
 	       sizeof(struct ieee80211_vht_mcs_info));
 
+	switch (vht_cap->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK) {
+	case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ:
+	case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ:
+		sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_160;
+		break;
+	default:
+		sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_80;
+	}
+
 	sta->sta.bandwidth = ieee80211_sta_cur_vht_bw(sta);
 }
 
@@ -46,12 +56,13 @@ enum ieee80211_sta_rx_bandwidth ieee80211_sta_cur_vht_bw(struct sta_info *sta)
 {
 	struct ieee80211_sub_if_data *sdata = sta->sdata;
 	u32 cap = sta->sta.vht_cap.cap;
+	enum ieee80211_sta_rx_bandwidth bw;
 
-	if (!sta->sta.vht_cap.vht_supported)
-		return sta->sta.ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 ?
+	if (!sta->sta.vht_cap.vht_supported) {
+		bw = sta->sta.ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 ?
 				IEEE80211_STA_RX_BW_40 : IEEE80211_STA_RX_BW_20;
-
-	/* TODO: handle VHT opmode notification data */
+		goto check_max;
+	}
 
 	switch (sdata->vif.bss_conf.chandef.width) {
 	default:
@@ -60,19 +71,31 @@ enum ieee80211_sta_rx_bandwidth ieee80211_sta_cur_vht_bw(struct sta_info *sta)
 	case NL80211_CHAN_WIDTH_20_NOHT:
 	case NL80211_CHAN_WIDTH_20:
 	case NL80211_CHAN_WIDTH_40:
-		return sta->sta.ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 ?
+		bw = sta->sta.ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 ?
 				IEEE80211_STA_RX_BW_40 : IEEE80211_STA_RX_BW_20;
+		break;
 	case NL80211_CHAN_WIDTH_160:
-		if (cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ)
-			return IEEE80211_STA_RX_BW_160;
+		if ((cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK) ==
+				IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ) {
+			bw = IEEE80211_STA_RX_BW_160;
+			break;
+		}
 		/* fall through */
 	case NL80211_CHAN_WIDTH_80P80:
-		if (cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ)
-			return IEEE80211_STA_RX_BW_160;
+		if ((cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK) ==
+				IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ) {
+			bw = IEEE80211_STA_RX_BW_160;
+			break;
+		}
 		/* fall through */
 	case NL80211_CHAN_WIDTH_80:
-		return IEEE80211_STA_RX_BW_80;
+		bw = IEEE80211_STA_RX_BW_80;
 	}
+
+ check_max:
+	if (bw > sta->cur_max_bandwidth)
+		bw = sta->cur_max_bandwidth;
+	return bw;
 }
 
 void ieee80211_sta_set_rx_nss(struct sta_info *sta)
@@ -115,3 +138,53 @@ void ieee80211_sta_set_rx_nss(struct sta_info *sta)
 	ht_rx_nss = max(ht_rx_nss, vht_rx_nss);
 	sta->sta.rx_nss = max_t(u8, 1, ht_rx_nss);
 }
+
+void ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
+				 struct sta_info *sta, u8 opmode,
+				 enum ieee80211_band band)
+{
+	struct ieee80211_local *local = sdata->local;
+	struct ieee80211_supported_band *sband;
+	enum ieee80211_sta_rx_bandwidth new_bw;
+	u32 changed = 0;
+	u8 nss;
+
+	sband = local->hw.wiphy->bands[band];
+
+	/* ignore - no support for BF yet */
+	if (opmode & IEEE80211_OPMODE_NOTIF_RX_NSS_TYPE_BF)
+		return;
+
+	nss = opmode & IEEE80211_OPMODE_NOTIF_RX_NSS_MASK;
+	nss >>= IEEE80211_OPMODE_NOTIF_RX_NSS_SHIFT;
+	nss += 1;
+
+	if (sta->sta.rx_nss != nss) {
+		sta->sta.rx_nss = nss;
+		changed |= IEEE80211_RC_NSS_CHANGED;
+	}
+
+	switch (opmode & IEEE80211_OPMODE_NOTIF_CHANWIDTH_MASK) {
+	case IEEE80211_OPMODE_NOTIF_CHANWIDTH_20MHZ:
+		sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_20;
+		break;
+	case IEEE80211_OPMODE_NOTIF_CHANWIDTH_40MHZ:
+		sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_40;
+		break;
+	case IEEE80211_OPMODE_NOTIF_CHANWIDTH_80MHZ:
+		sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_80;
+		break;
+	case IEEE80211_OPMODE_NOTIF_CHANWIDTH_160MHZ:
+		sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_160;
+		break;
+	}
+
+	new_bw = ieee80211_sta_cur_vht_bw(sta);
+	if (new_bw != sta->sta.bandwidth) {
+		sta->sta.bandwidth = new_bw;
+		changed |= IEEE80211_RC_NSS_CHANGED;
+	}
+
+	if (changed)
+		rate_control_rate_update(local, sband, sta, changed);
+}
-- 
cgit v1.2.3


From 4a3cb702b05868f67c4ee3da3380461c5b90b4ca Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 12 Feb 2013 16:43:19 +0100
Subject: mac80211: constify IE parsing

Make all the parsed IE pointers const, and propagate
the change to all the users etc.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h   |  2 +-
 net/mac80211/ht.c           |  2 +-
 net/mac80211/ieee80211_i.h  | 87 +++++++++++++++++++++++----------------------
 net/mac80211/mesh.h         | 11 +++---
 net/mac80211/mesh_hwmp.c    | 42 ++++++++++++----------
 net/mac80211/mesh_pathtbl.c | 11 +++---
 net/mac80211/mlme.c         | 15 ++++----
 net/mac80211/util.c         |  4 +--
 net/mac80211/vht.c          |  9 ++---
 9 files changed, 96 insertions(+), 87 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 12b5996533ec..e085fcf52b26 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -2152,7 +2152,7 @@ static inline unsigned long ieee80211_tu_to_usec(unsigned long tu)
  * @tim_len: length of the TIM IE
  * @aid: the AID to look for
  */
-static inline bool ieee80211_check_tim(struct ieee80211_tim_ie *tim,
+static inline bool ieee80211_check_tim(const struct ieee80211_tim_ie *tim,
 				       u8 tim_len, u16 aid)
 {
 	u8 mask;
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index 797969bc26e1..b84147ac5b4c 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -94,7 +94,7 @@ void ieee80211_apply_htcap_overrides(struct ieee80211_sub_if_data *sdata,
 
 bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata,
 				       struct ieee80211_supported_band *sband,
-				       struct ieee80211_ht_cap *ht_cap_ie,
+				       const struct ieee80211_ht_cap *ht_cap_ie,
 				       struct sta_info *sta)
 {
 	struct ieee80211_sta_ht_cap ht_cap;
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index d1074442f1b5..d702f7dc321b 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1163,41 +1163,41 @@ struct ieee80211_ra_tid {
 
 /* Parsed Information Elements */
 struct ieee802_11_elems {
-	u8 *ie_start;
+	const u8 *ie_start;
 	size_t total_len;
 
 	/* pointers to IEs */
-	u8 *ssid;
-	u8 *supp_rates;
-	u8 *fh_params;
-	u8 *ds_params;
-	u8 *cf_params;
-	struct ieee80211_tim_ie *tim;
-	u8 *ibss_params;
-	u8 *challenge;
-	u8 *rsn;
-	u8 *erp_info;
-	u8 *ext_supp_rates;
-	u8 *wmm_info;
-	u8 *wmm_param;
-	struct ieee80211_ht_cap *ht_cap_elem;
-	struct ieee80211_ht_operation *ht_operation;
-	struct ieee80211_vht_cap *vht_cap_elem;
-	struct ieee80211_vht_operation *vht_operation;
-	struct ieee80211_meshconf_ie *mesh_config;
-	u8 *mesh_id;
-	u8 *peering;
-	__le16 *awake_window;
-	u8 *preq;
-	u8 *prep;
-	u8 *perr;
-	struct ieee80211_rann_ie *rann;
-	struct ieee80211_channel_sw_ie *ch_switch_ie;
-	u8 *country_elem;
-	u8 *pwr_constr_elem;
-	u8 *quiet_elem;	/* first quite element */
-	u8 *timeout_int;
-	u8 *opmode_notif;
+	const u8 *ssid;
+	const u8 *supp_rates;
+	const u8 *fh_params;
+	const u8 *ds_params;
+	const u8 *cf_params;
+	const struct ieee80211_tim_ie *tim;
+	const u8 *ibss_params;
+	const u8 *challenge;
+	const u8 *rsn;
+	const u8 *erp_info;
+	const u8 *ext_supp_rates;
+	const u8 *wmm_info;
+	const u8 *wmm_param;
+	const struct ieee80211_ht_cap *ht_cap_elem;
+	const struct ieee80211_ht_operation *ht_operation;
+	const struct ieee80211_vht_cap *vht_cap_elem;
+	const struct ieee80211_vht_operation *vht_operation;
+	const struct ieee80211_meshconf_ie *mesh_config;
+	const u8 *mesh_id;
+	const u8 *peering;
+	const __le16 *awake_window;
+	const u8 *preq;
+	const u8 *prep;
+	const u8 *perr;
+	const struct ieee80211_rann_ie *rann;
+	const struct ieee80211_channel_sw_ie *ch_switch_ie;
+	const u8 *country_elem;
+	const u8 *pwr_constr_elem;
+	const u8 *quiet_elem;	/* first quite element */
+	const u8 *timeout_int;
+	const u8 *opmode_notif;
 
 	/* length of them, respectively */
 	u8 ssid_len;
@@ -1276,10 +1276,10 @@ void ieee80211_recalc_ps_vif(struct ieee80211_sub_if_data *sdata);
 int ieee80211_max_network_latency(struct notifier_block *nb,
 				  unsigned long data, void *dummy);
 int ieee80211_set_arp_filter(struct ieee80211_sub_if_data *sdata);
-void ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
-				      struct ieee80211_channel_sw_ie *sw_elem,
-				      struct ieee80211_bss *bss,
-				      u64 timestamp);
+void
+ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
+				 const struct ieee80211_channel_sw_ie *sw_elem,
+				 struct ieee80211_bss *bss, u64 timestamp);
 void ieee80211_sta_quiesce(struct ieee80211_sub_if_data *sdata);
 void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata);
 void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata);
@@ -1387,7 +1387,7 @@ void ieee80211_apply_htcap_overrides(struct ieee80211_sub_if_data *sdata,
 				     struct ieee80211_sta_ht_cap *ht_cap);
 bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata,
 				       struct ieee80211_supported_band *sband,
-				       struct ieee80211_ht_cap *ht_cap_ie,
+				       const struct ieee80211_ht_cap *ht_cap_ie,
 				       struct sta_info *sta);
 void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata,
 			  const u8 *da, u16 tid,
@@ -1428,10 +1428,11 @@ void ieee80211_release_reorder_timeout(struct sta_info *sta, int tid);
 u8 ieee80211_mcs_to_chains(const struct ieee80211_mcs_info *mcs);
 
 /* VHT */
-void ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata,
-					 struct ieee80211_supported_band *sband,
-					 struct ieee80211_vht_cap *vht_cap_ie,
-					 struct sta_info *sta);
+void
+ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata,
+				    struct ieee80211_supported_band *sband,
+				    const struct ieee80211_vht_cap *vht_cap_ie,
+				    struct sta_info *sta);
 enum ieee80211_sta_rx_bandwidth ieee80211_sta_cur_vht_bw(struct sta_info *sta);
 void ieee80211_sta_set_rx_nss(struct sta_info *sta);
 void ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
@@ -1555,7 +1556,7 @@ static inline void ieee80211_add_pending_skbs(struct ieee80211_local *local,
 
 void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata,
 			 u16 transaction, u16 auth_alg, u16 status,
-			 u8 *extra, size_t extra_len, const u8 *bssid,
+			 const u8 *extra, size_t extra_len, const u8 *bssid,
 			 const u8 *da, const u8 *key, u8 key_len, u8 key_idx,
 			 u32 tx_flags);
 void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata,
@@ -1606,7 +1607,7 @@ int ieee80211_add_ext_srates_ie(struct ieee80211_sub_if_data *sdata,
 
 /* channel management */
 void ieee80211_ht_oper_to_chandef(struct ieee80211_channel *control_chan,
-				  struct ieee80211_ht_operation *ht_oper,
+				  const struct ieee80211_ht_operation *ht_oper,
 				  struct cfg80211_chan_def *chandef);
 
 int __must_check
diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h
index 7ad035f0cacc..a1bad310f2e9 100644
--- a/net/mac80211/mesh.h
+++ b/net/mac80211/mesh.h
@@ -265,8 +265,8 @@ int mesh_nexthop_lookup(struct sk_buff *skb,
 int mesh_nexthop_resolve(struct sk_buff *skb,
 			 struct ieee80211_sub_if_data *sdata);
 void mesh_path_start_discovery(struct ieee80211_sub_if_data *sdata);
-struct mesh_path *mesh_path_lookup(u8 *dst,
-		struct ieee80211_sub_if_data *sdata);
+struct mesh_path *mesh_path_lookup(const u8 *dst,
+				   struct ieee80211_sub_if_data *sdata);
 struct mesh_path *mpp_path_lookup(u8 *dst,
 				  struct ieee80211_sub_if_data *sdata);
 int mpp_path_add(u8 *dst, u8 *mpp, struct ieee80211_sub_if_data *sdata);
@@ -276,7 +276,7 @@ void mesh_path_fix_nexthop(struct mesh_path *mpath, struct sta_info *next_hop);
 void mesh_path_expire(struct ieee80211_sub_if_data *sdata);
 void mesh_rx_path_sel_frame(struct ieee80211_sub_if_data *sdata,
 		struct ieee80211_mgmt *mgmt, size_t len);
-int mesh_path_add(u8 *dst, struct ieee80211_sub_if_data *sdata);
+int mesh_path_add(const u8 *dst, struct ieee80211_sub_if_data *sdata);
 
 int mesh_path_add_gate(struct mesh_path *mpath);
 int mesh_path_send_to_gates(struct mesh_path *mpath);
@@ -301,8 +301,9 @@ void mesh_sta_cleanup(struct sta_info *sta);
 void mesh_mpath_table_grow(void);
 void mesh_mpp_table_grow(void);
 /* Mesh paths */
-int mesh_path_error_tx(u8 ttl, u8 *target, __le32 target_sn, __le16 target_rcode,
-		       const u8 *ra, struct ieee80211_sub_if_data *sdata);
+int mesh_path_error_tx(u8 ttl, const u8 *target, __le32 target_sn,
+		       __le16 target_rcode, const u8 *ra,
+		       struct ieee80211_sub_if_data *sdata);
 void mesh_path_assign_nexthop(struct mesh_path *mpath, struct sta_info *sta);
 void mesh_path_flush_pending(struct mesh_path *mpath);
 void mesh_path_tx_pending(struct mesh_path *mpath);
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index f0dd8742ed42..585c1e26cca8 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -30,14 +30,14 @@
 
 static void mesh_queue_preq(struct mesh_path *, u8);
 
-static inline u32 u32_field_get(u8 *preq_elem, int offset, bool ae)
+static inline u32 u32_field_get(const u8 *preq_elem, int offset, bool ae)
 {
 	if (ae)
 		offset += 6;
 	return get_unaligned_le32(preq_elem + offset);
 }
 
-static inline u32 u16_field_get(u8 *preq_elem, int offset, bool ae)
+static inline u32 u16_field_get(const u8 *preq_elem, int offset, bool ae)
 {
 	if (ae)
 		offset += 6;
@@ -102,10 +102,13 @@ enum mpath_frame_type {
 static const u8 broadcast_addr[ETH_ALEN] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
 
 static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags,
-		u8 *orig_addr, __le32 orig_sn, u8 target_flags, u8 *target,
-		__le32 target_sn, const u8 *da, u8 hop_count, u8 ttl,
-		__le32 lifetime, __le32 metric, __le32 preq_id,
-		struct ieee80211_sub_if_data *sdata)
+				  const u8 *orig_addr, __le32 orig_sn,
+				  u8 target_flags, const u8 *target,
+				  __le32 target_sn, const u8 *da,
+				  u8 hop_count, u8 ttl,
+				  __le32 lifetime, __le32 metric,
+				  __le32 preq_id,
+				  struct ieee80211_sub_if_data *sdata)
 {
 	struct ieee80211_local *local = sdata->local;
 	struct sk_buff *skb;
@@ -235,7 +238,7 @@ static void prepare_frame_for_deferred_tx(struct ieee80211_sub_if_data *sdata,
  * also acquires in the TX path.  To avoid a deadlock we don't transmit the
  * frame directly but add it to the pending queue instead.
  */
-int mesh_path_error_tx(u8 ttl, u8 *target, __le32 target_sn,
+int mesh_path_error_tx(u8 ttl, const u8 *target, __le32 target_sn,
 		       __le16 target_rcode, const u8 *ra,
 		       struct ieee80211_sub_if_data *sdata)
 {
@@ -369,14 +372,14 @@ static u32 airtime_link_metric_get(struct ieee80211_local *local,
  * path routing information is updated.
  */
 static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata,
-			    struct ieee80211_mgmt *mgmt,
-			    u8 *hwmp_ie, enum mpath_frame_type action)
+			       struct ieee80211_mgmt *mgmt,
+			       const u8 *hwmp_ie, enum mpath_frame_type action)
 {
 	struct ieee80211_local *local = sdata->local;
 	struct mesh_path *mpath;
 	struct sta_info *sta;
 	bool fresh_info;
-	u8 *orig_addr, *ta;
+	const u8 *orig_addr, *ta;
 	u32 orig_sn, orig_metric;
 	unsigned long orig_lifetime, exp_time;
 	u32 last_hop_metric, new_metric;
@@ -511,11 +514,11 @@ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata,
 
 static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata,
 				    struct ieee80211_mgmt *mgmt,
-				    u8 *preq_elem, u32 metric)
+				    const u8 *preq_elem, u32 metric)
 {
 	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
 	struct mesh_path *mpath = NULL;
-	u8 *target_addr, *orig_addr;
+	const u8 *target_addr, *orig_addr;
 	const u8 *da;
 	u8 target_flags, ttl, flags;
 	u32 orig_sn, target_sn, lifetime, orig_metric;
@@ -648,11 +651,11 @@ next_hop_deref_protected(struct mesh_path *mpath)
 
 static void hwmp_prep_frame_process(struct ieee80211_sub_if_data *sdata,
 				    struct ieee80211_mgmt *mgmt,
-				    u8 *prep_elem, u32 metric)
+				    const u8 *prep_elem, u32 metric)
 {
 	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
 	struct mesh_path *mpath;
-	u8 *target_addr, *orig_addr;
+	const u8 *target_addr, *orig_addr;
 	u8 ttl, hopcount, flags;
 	u8 next_hop[ETH_ALEN];
 	u32 target_sn, orig_sn, lifetime;
@@ -711,12 +714,13 @@ fail:
 }
 
 static void hwmp_perr_frame_process(struct ieee80211_sub_if_data *sdata,
-			     struct ieee80211_mgmt *mgmt, u8 *perr_elem)
+				    struct ieee80211_mgmt *mgmt,
+				    const u8 *perr_elem)
 {
 	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
 	struct mesh_path *mpath;
 	u8 ttl;
-	u8 *ta, *target_addr;
+	const u8 *ta, *target_addr;
 	u32 target_sn;
 	u16 target_rcode;
 
@@ -758,15 +762,15 @@ endperr:
 }
 
 static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata,
-				struct ieee80211_mgmt *mgmt,
-				struct ieee80211_rann_ie *rann)
+				    struct ieee80211_mgmt *mgmt,
+				    const struct ieee80211_rann_ie *rann)
 {
 	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
 	struct ieee80211_local *local = sdata->local;
 	struct sta_info *sta;
 	struct mesh_path *mpath;
 	u8 ttl, flags, hopcount;
-	u8 *orig_addr;
+	const u8 *orig_addr;
 	u32 orig_sn, metric, metric_txsta, interval;
 	bool root_is_gate;
 
diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index d5786c3eaee2..2ce4c4023a97 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -181,7 +181,7 @@ errcopy:
 	return -ENOMEM;
 }
 
-static u32 mesh_table_hash(u8 *addr, struct ieee80211_sub_if_data *sdata,
+static u32 mesh_table_hash(const u8 *addr, struct ieee80211_sub_if_data *sdata,
 			   struct mesh_table *tbl)
 {
 	/* Use last four bytes of hw addr and interface index as hash index */
@@ -326,8 +326,8 @@ static void mesh_path_move_to_queue(struct mesh_path *gate_mpath,
 }
 
 
-static struct mesh_path *mpath_lookup(struct mesh_table *tbl, u8 *dst,
-					  struct ieee80211_sub_if_data *sdata)
+static struct mesh_path *mpath_lookup(struct mesh_table *tbl, const u8 *dst,
+				      struct ieee80211_sub_if_data *sdata)
 {
 	struct mesh_path *mpath;
 	struct hlist_node *n;
@@ -359,7 +359,8 @@ static struct mesh_path *mpath_lookup(struct mesh_table *tbl, u8 *dst,
  *
  * Locking: must be called within a read rcu section.
  */
-struct mesh_path *mesh_path_lookup(u8 *dst, struct ieee80211_sub_if_data *sdata)
+struct mesh_path *mesh_path_lookup(const u8 *dst,
+				   struct ieee80211_sub_if_data *sdata)
 {
 	return mpath_lookup(rcu_dereference(mesh_paths), dst, sdata);
 }
@@ -494,7 +495,7 @@ int mesh_gate_num(struct ieee80211_sub_if_data *sdata)
  *
  * State: the initial state of the new path is set to 0
  */
-int mesh_path_add(u8 *dst, struct ieee80211_sub_if_data *sdata)
+int mesh_path_add(const u8 *dst, struct ieee80211_sub_if_data *sdata)
 {
 	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
 	struct ieee80211_local *local = sdata->local;
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 05b229e3b226..7a8cd789e487 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1041,10 +1041,10 @@ static void ieee80211_chswitch_timer(unsigned long data)
 	ieee80211_queue_work(&sdata->local->hw, &ifmgd->chswitch_work);
 }
 
-void ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
-				      struct ieee80211_channel_sw_ie *sw_elem,
-				      struct ieee80211_bss *bss,
-				      u64 timestamp)
+void
+ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
+				 const struct ieee80211_channel_sw_ie *sw_elem,
+				 struct ieee80211_bss *bss, u64 timestamp)
 {
 	struct cfg80211_bss *cbss =
 		container_of((void *)bss, struct cfg80211_bss, priv);
@@ -1479,13 +1479,14 @@ void ieee80211_dfs_cac_timer_work(struct work_struct *work)
 /* MLME */
 static bool ieee80211_sta_wmm_params(struct ieee80211_local *local,
 				     struct ieee80211_sub_if_data *sdata,
-				     u8 *wmm_param, size_t wmm_param_len)
+				     const u8 *wmm_param, size_t wmm_param_len)
 {
 	struct ieee80211_tx_queue_params params;
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
 	size_t left;
 	int count;
-	u8 *pos, uapsd_queues = 0;
+	const u8 *pos;
+	u8 uapsd_queues = 0;
 
 	if (!local->ops->conf_tx)
 		return false;
@@ -2670,7 +2671,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 		need_ps = sdata->u.mgd.associated && !sdata->u.mgd.dtim_period;
 
 		if (elems->tim && !elems->parse_error) {
-			struct ieee80211_tim_ie *tim_ie = elems->tim;
+			const struct ieee80211_tim_ie *tim_ie = elems->tim;
 			sdata->u.mgd.dtim_period = tim_ie->dtim_period;
 		}
 	}
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index e24ff38606a9..0f38f43ac62e 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -1035,7 +1035,7 @@ u32 ieee80211_mandatory_rates(struct ieee80211_local *local,
 
 void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata,
 			 u16 transaction, u16 auth_alg, u16 status,
-			 u8 *extra, size_t extra_len, const u8 *da,
+			 const u8 *extra, size_t extra_len, const u8 *da,
 			 const u8 *bssid, const u8 *key, u8 key_len, u8 key_idx,
 			 u32 tx_flags)
 {
@@ -1947,7 +1947,7 @@ u8 *ieee80211_ie_build_ht_oper(u8 *pos, struct ieee80211_sta_ht_cap *ht_cap,
 }
 
 void ieee80211_ht_oper_to_chandef(struct ieee80211_channel *control_chan,
-				  struct ieee80211_ht_operation *ht_oper,
+				  const struct ieee80211_ht_operation *ht_oper,
 				  struct cfg80211_chan_def *chandef)
 {
 	enum nl80211_channel_type channel_type;
diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c
index a9549fcc5a04..a2c2258bc84e 100644
--- a/net/mac80211/vht.c
+++ b/net/mac80211/vht.c
@@ -13,10 +13,11 @@
 #include "rate.h"
 
 
-void ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata,
-					 struct ieee80211_supported_band *sband,
-					 struct ieee80211_vht_cap *vht_cap_ie,
-					 struct sta_info *sta)
+void
+ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata,
+				    struct ieee80211_supported_band *sband,
+				    const struct ieee80211_vht_cap *vht_cap_ie,
+				    struct sta_info *sta)
 {
 	struct ieee80211_sta_vht_cap *vht_cap = &sta->sta.vht_cap;
 
-- 
cgit v1.2.3


From c6f9d6c3bdeb337809d667ef2a41597229a1ce57 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 11 Feb 2013 14:27:08 +0100
Subject: mac80211: advertise operating mode notification capability

Use the new extended capabilities advertising to advertise
the fact that operating mode notification is supported.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 2 ++
 net/mac80211/main.c       | 9 +++++++++
 2 files changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index e085fcf52b26..7e24fe0cfbcd 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1776,6 +1776,8 @@ enum ieee80211_tdls_actioncode {
 #define WLAN_EXT_CAPA5_TDLS_ENABLED	BIT(5)
 #define WLAN_EXT_CAPA5_TDLS_PROHIBITED	BIT(6)
 
+#define WLAN_EXT_CAPA8_OPMODE_NOTIF	BIT(6)
+
 /* TDLS specific payload type in the LLC/SNAP header */
 #define WLAN_TDLS_SNAP_RFTYPE	0x2
 
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 9cdbc774cfd7..035344bc6b9c 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -501,6 +501,11 @@ static const struct ieee80211_ht_cap mac80211_ht_capa_mod_mask = {
 	},
 };
 
+static const u8 extended_capabilities[] = {
+	0, 0, 0, 0, 0, 0, 0,
+	WLAN_EXT_CAPA8_OPMODE_NOTIF,
+};
+
 struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
 					const struct ieee80211_ops *ops)
 {
@@ -557,6 +562,10 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
 			WIPHY_FLAG_REPORTS_OBSS |
 			WIPHY_FLAG_OFFCHAN_TX;
 
+	wiphy->extended_capabilities = extended_capabilities;
+	wiphy->extended_capabilities_mask = extended_capabilities;
+	wiphy->extended_capabilities_len = ARRAY_SIZE(extended_capabilities);
+
 	if (ops->remain_on_channel)
 		wiphy->flags |= WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL;
 
-- 
cgit v1.2.3


From ea27c396174d5a4576853cbe7aeabeb9f7cba6e1 Mon Sep 17 00:00:00 2001
From: Haojian Zhuang <haojian.zhuang@linaro.org>
Date: Tue, 12 Feb 2013 01:10:57 +0800
Subject: pinctrl: generic: rename input schmitt disable

Rename PIN_CONFIG_INPUT_SCHMITT_DISABLE to
PIN_CONFIG_INPUT_SCHMITT_ENABLE. It's used to make it more generialize.

Signed-off-by: Haojian Zhuang <haojian.zhuang@linaro.org>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/pinconf-generic.c       | 2 +-
 include/linux/pinctrl/pinconf-generic.h | 6 ++++--
 2 files changed, 5 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pinctrl/pinconf-generic.c b/drivers/pinctrl/pinconf-generic.c
index e5948f8172af..06c304ac6f7d 100644
--- a/drivers/pinctrl/pinconf-generic.c
+++ b/drivers/pinctrl/pinconf-generic.c
@@ -41,7 +41,7 @@ struct pin_config_item conf_items[] = {
 	PCONFDUMP(PIN_CONFIG_DRIVE_PUSH_PULL, "output drive push pull", NULL),
 	PCONFDUMP(PIN_CONFIG_DRIVE_OPEN_DRAIN, "output drive open drain", NULL),
 	PCONFDUMP(PIN_CONFIG_DRIVE_OPEN_SOURCE, "output drive open source", NULL),
-	PCONFDUMP(PIN_CONFIG_INPUT_SCHMITT_DISABLE, "input schmitt disabled", NULL),
+	PCONFDUMP(PIN_CONFIG_INPUT_SCHMITT_ENABLE, "input schmitt enabled", NULL),
 	PCONFDUMP(PIN_CONFIG_INPUT_SCHMITT, "input schmitt trigger", NULL),
 	PCONFDUMP(PIN_CONFIG_INPUT_DEBOUNCE, "input debounce", "time units"),
 	PCONFDUMP(PIN_CONFIG_POWER_SOURCE, "pin power source", "selector"),
diff --git a/include/linux/pinctrl/pinconf-generic.h b/include/linux/pinctrl/pinconf-generic.h
index 3e7909aa5c03..72474e18f1e0 100644
--- a/include/linux/pinctrl/pinconf-generic.h
+++ b/include/linux/pinctrl/pinconf-generic.h
@@ -48,7 +48,9 @@
  *	argument is ignored.
  * @PIN_CONFIG_DRIVE_STRENGTH: the pin will output the current passed as
  * 	argument. The argument is in mA.
- * @PIN_CONFIG_INPUT_SCHMITT_DISABLE: disable schmitt-trigger mode on the pin.
+ * @PIN_CONFIG_INPUT_SCHMITT_ENABLE: control schmitt-trigger mode on the pin.
+ *      If the argument != 0, schmitt-trigger mode is enabled. If it's 0,
+ *      schmitt-trigger mode is disabled.
  * @PIN_CONFIG_INPUT_SCHMITT: this will configure an input pin to run in
  *	schmitt-trigger mode. If the schmitt-trigger has adjustable hysteresis,
  *	the threshold value is given on a custom format as argument when
@@ -82,7 +84,7 @@ enum pin_config_param {
 	PIN_CONFIG_DRIVE_OPEN_DRAIN,
 	PIN_CONFIG_DRIVE_OPEN_SOURCE,
 	PIN_CONFIG_DRIVE_STRENGTH,
-	PIN_CONFIG_INPUT_SCHMITT_DISABLE,
+	PIN_CONFIG_INPUT_SCHMITT_ENABLE,
 	PIN_CONFIG_INPUT_SCHMITT,
 	PIN_CONFIG_INPUT_DEBOUNCE,
 	PIN_CONFIG_POWER_SOURCE,
-- 
cgit v1.2.3


From 14bbd6a565e1bcdc240d44687edb93f721cfdf99 Mon Sep 17 00:00:00 2001
From: Pravin B Shelar <pshelar@nicira.com>
Date: Thu, 14 Feb 2013 09:44:49 +0000
Subject: net: Add skb_unclone() helper function.

This function will be used in next GRE_GSO patch. This patch does
not change any functionality.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Eric Dumazet <edumazet@google.com>
---
 drivers/net/ppp/ppp_generic.c           |  3 +--
 include/linux/skbuff.h                  | 10 ++++++++++
 net/ipv4/ah4.c                          |  3 +--
 net/ipv4/ip_fragment.c                  |  2 +-
 net/ipv4/tcp_output.c                   |  2 +-
 net/ipv4/xfrm4_input.c                  |  2 +-
 net/ipv4/xfrm4_mode_tunnel.c            |  3 +--
 net/ipv6/ah6.c                          |  3 +--
 net/ipv6/netfilter/nf_conntrack_reasm.c |  2 +-
 net/ipv6/reassembly.c                   |  2 +-
 net/ipv6/xfrm6_mode_tunnel.c            |  3 +--
 net/sched/act_ipt.c                     |  6 ++----
 net/sched/act_pedit.c                   |  3 +--
 13 files changed, 23 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index 0b2706abe3e3..4fd754e74eb2 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -1805,8 +1805,7 @@ ppp_receive_nonmp_frame(struct ppp *ppp, struct sk_buff *skb)
 		/* the filter instructions are constructed assuming
 		   a four-byte PPP header on each packet */
 		if (ppp->pass_filter || ppp->active_filter) {
-			if (skb_cloned(skb) &&
-			    pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+			if (skb_unclone(skb, GFP_ATOMIC))
 				goto err;
 
 			*skb_push(skb, 2) = 0;
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 9da99520ccd5..ca6ee7d93edb 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -804,6 +804,16 @@ static inline int skb_cloned(const struct sk_buff *skb)
 	       (atomic_read(&skb_shinfo(skb)->dataref) & SKB_DATAREF_MASK) != 1;
 }
 
+static inline int skb_unclone(struct sk_buff *skb, gfp_t pri)
+{
+	might_sleep_if(pri & __GFP_WAIT);
+
+	if (skb_cloned(skb))
+		return pskb_expand_head(skb, 0, 0, pri);
+
+	return 0;
+}
+
 /**
  *	skb_header_cloned - is the header a clone
  *	@skb: buffer to check
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index a69b4e4a02b5..2e7f1948216f 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -321,8 +321,7 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb)
 
 	/* We are going to _remove_ AH header to keep sockets happy,
 	 * so... Later this can change. */
-	if (skb_cloned(skb) &&
-	    pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+	if (skb_unclone(skb, GFP_ATOMIC))
 		goto out;
 
 	skb->ip_summed = CHECKSUM_NONE;
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 1211613c6c34..b6d30acb600c 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -590,7 +590,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
 		goto out_oversize;
 
 	/* Head of list must not be cloned. */
-	if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC))
+	if (skb_unclone(head, GFP_ATOMIC))
 		goto out_nomem;
 
 	/* If the first fragment is fragmented itself, we split
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 6182d90e97b0..fd0cea114b5d 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1331,7 +1331,7 @@ static void __pskb_trim_head(struct sk_buff *skb, int len)
 /* Remove acked data from a packet in the transmit queue. */
 int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
 {
-	if (skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+	if (skb_unclone(skb, GFP_ATOMIC))
 		return -ENOMEM;
 
 	__pskb_trim_head(skb, len);
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index 06814b6216dc..1f12c8b45864 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -132,7 +132,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
 	 * header and optional ESP marker bytes) and then modify the
 	 * protocol to ESP, and then call into the transform receiver.
 	 */
-	if (skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+	if (skb_unclone(skb, GFP_ATOMIC))
 		goto drop;
 
 	/* Now we can update and verify the packet length... */
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index ddee0a099a2c..1162ace30838 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -142,8 +142,7 @@ static int xfrm4_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
 	for_each_input_rcu(rcv_notify_handlers, handler)
 		handler->handler(skb);
 
-	if (skb_cloned(skb) &&
-	    (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
+	if (err = skb_unclone(skb, GFP_ATOMIC))
 		goto out;
 
 	if (x->props.flags & XFRM_STATE_DECAP_DSCP)
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index 384233188ac1..bb02e176cb70 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -521,8 +521,7 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
 
 	/* We are going to _remove_ AH header to keep sockets happy,
 	 * so... Later this can change. */
-	if (skb_cloned(skb) &&
-	    pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+	if (skb_unclone(skb, GFP_ATOMIC))
 		goto out;
 
 	skb->ip_summed = CHECKSUM_NONE;
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index c674f158efa8..b89a8c3186cd 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -368,7 +368,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev)
 	}
 
 	/* Head of list must not be cloned. */
-	if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC)) {
+	if (skb_unclone(head, GFP_ATOMIC)) {
 		pr_debug("skb is cloned but can't expand head");
 		goto out_oom;
 	}
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index bab2c270f292..e354743ed426 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -404,7 +404,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
 		goto out_oversize;
 
 	/* Head of list must not be cloned. */
-	if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC))
+	if (skb_unclone(head, GFP_ATOMIC))
 		goto out_oom;
 
 	/* If the first fragment is fragmented itself, we split
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
index 9f2095b19ad0..93c41a81c4c3 100644
--- a/net/ipv6/xfrm6_mode_tunnel.c
+++ b/net/ipv6/xfrm6_mode_tunnel.c
@@ -69,8 +69,7 @@ static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
 	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
 		goto out;
 
-	if (skb_cloned(skb) &&
-	    (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
+	if (err = skb_unclone(skb, GFP_ATOMIC))
 		goto out;
 
 	if (x->props.flags & XFRM_STATE_DECAP_DSCP)
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 0fb9e3f567e6..e0f6de64afec 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -207,10 +207,8 @@ static int tcf_ipt(struct sk_buff *skb, const struct tc_action *a,
 	struct tcf_ipt *ipt = a->priv;
 	struct xt_action_param par;
 
-	if (skb_cloned(skb)) {
-		if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
-			return TC_ACT_UNSPEC;
-	}
+	if (skb_unclone(skb, GFP_ATOMIC))
+		return TC_ACT_UNSPEC;
 
 	spin_lock(&ipt->tcf_lock);
 
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 0c3faddf3f2c..7ed78c9e505c 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -131,8 +131,7 @@ static int tcf_pedit(struct sk_buff *skb, const struct tc_action *a,
 	int i, munged = 0;
 	unsigned int off;
 
-	if (skb_cloned(skb) &&
-	    pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+	if (skb_unclone(skb, GFP_ATOMIC))
 		return p->tcf_action;
 
 	off = skb_network_offset(skb);
-- 
cgit v1.2.3


From 05e8ef4ab2d8087d360e814d14da20b9f7fb2283 Mon Sep 17 00:00:00 2001
From: Pravin B Shelar <pshelar@nicira.com>
Date: Thu, 14 Feb 2013 09:44:55 +0000
Subject: net: factor out skb_mac_gso_segment() from skb_gso_segment()

This function will be used in next GRE_GSO patch. This patch does
not change any functionality. It only exports skb_mac_gso_segment()
function.

[ Use skb_reset_mac_len() -DaveM ]

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  2 ++
 net/core/dev.c            | 79 ++++++++++++++++++++++++++++-------------------
 2 files changed, 50 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 9deb672d999f..920361bc27e7 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2671,6 +2671,8 @@ extern void netdev_upper_dev_unlink(struct net_device *dev,
 extern int skb_checksum_help(struct sk_buff *skb);
 extern struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
 	netdev_features_t features, bool tx_path);
+extern struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb,
+					  netdev_features_t features);
 
 static inline
 struct sk_buff *skb_gso_segment(struct sk_buff *skb, netdev_features_t features)
diff --git a/net/core/dev.c b/net/core/dev.c
index f44473696b8b..67deae60214c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2327,37 +2327,20 @@ out:
 }
 EXPORT_SYMBOL(skb_checksum_help);
 
-/* openvswitch calls this on rx path, so we need a different check.
- */
-static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path)
-{
-	if (tx_path)
-		return skb->ip_summed != CHECKSUM_PARTIAL;
-	else
-		return skb->ip_summed == CHECKSUM_NONE;
-}
-
 /**
- *	__skb_gso_segment - Perform segmentation on skb.
+ *	skb_mac_gso_segment - mac layer segmentation handler.
  *	@skb: buffer to segment
  *	@features: features for the output path (see dev->features)
- *	@tx_path: whether it is called in TX path
- *
- *	This function segments the given skb and returns a list of segments.
- *
- *	It may return NULL if the skb requires no segmentation.  This is
- *	only possible when GSO is used for verifying header integrity.
  */
-struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
-				  netdev_features_t features, bool tx_path)
+struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb,
+				    netdev_features_t features)
 {
 	struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
 	struct packet_offload *ptype;
 	__be16 type = skb->protocol;
-	int vlan_depth = ETH_HLEN;
-	int err;
 
 	while (type == htons(ETH_P_8021Q)) {
+		int vlan_depth = ETH_HLEN;
 		struct vlan_hdr *vh;
 
 		if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN)))
@@ -2368,22 +2351,14 @@ struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
 		vlan_depth += VLAN_HLEN;
 	}
 
-	skb_reset_mac_header(skb);
-	skb->mac_len = skb->network_header - skb->mac_header;
 	__skb_pull(skb, skb->mac_len);
 
-	if (unlikely(skb_needs_check(skb, tx_path))) {
-		skb_warn_bad_offload(skb);
-
-		if (skb_header_cloned(skb) &&
-		    (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
-			return ERR_PTR(err);
-	}
-
 	rcu_read_lock();
 	list_for_each_entry_rcu(ptype, &offload_base, list) {
 		if (ptype->type == type && ptype->callbacks.gso_segment) {
 			if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
+				int err;
+
 				err = ptype->callbacks.gso_send_check(skb);
 				segs = ERR_PTR(err);
 				if (err || skb_gso_ok(skb, features))
@@ -2401,6 +2376,48 @@ struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
 
 	return segs;
 }
+EXPORT_SYMBOL(skb_mac_gso_segment);
+
+
+/* openvswitch calls this on rx path, so we need a different check.
+ */
+static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path)
+{
+	if (tx_path)
+		return skb->ip_summed != CHECKSUM_PARTIAL;
+	else
+		return skb->ip_summed == CHECKSUM_NONE;
+}
+
+/**
+ *	__skb_gso_segment - Perform segmentation on skb.
+ *	@skb: buffer to segment
+ *	@features: features for the output path (see dev->features)
+ *	@tx_path: whether it is called in TX path
+ *
+ *	This function segments the given skb and returns a list of segments.
+ *
+ *	It may return NULL if the skb requires no segmentation.  This is
+ *	only possible when GSO is used for verifying header integrity.
+ */
+struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
+				  netdev_features_t features, bool tx_path)
+{
+	if (unlikely(skb_needs_check(skb, tx_path))) {
+		int err;
+
+		skb_warn_bad_offload(skb);
+
+		if (skb_header_cloned(skb) &&
+		    (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
+			return ERR_PTR(err);
+	}
+
+	skb_reset_mac_header(skb);
+	skb_reset_mac_len(skb);
+
+	return skb_mac_gso_segment(skb, features);
+}
 EXPORT_SYMBOL(__skb_gso_segment);
 
 /* Take action when hardware reception checksum errors are detected. */
-- 
cgit v1.2.3


From 68c331631143f5f039baac99a650e0b9e1ea02b6 Mon Sep 17 00:00:00 2001
From: Pravin B Shelar <pshelar@nicira.com>
Date: Thu, 14 Feb 2013 14:02:41 +0000
Subject: v4 GRE: Add TCP segmentation offload for GRE

Following patch adds GRE protocol offload handler so that
skb_gso_segment() can segment GRE packets.
SKB GSO CB is added to keep track of total header length so that
skb_segment can push entire header. e.g. in case of GRE, skb_segment
need to push inner and outer headers to every segment.
New NETIF_F_GRE_GSO feature is added for devices which support HW
GRE TSO offload. Currently none of devices support it therefore GRE GSO
always fall backs to software GSO.

[ Compute pkt_len before ip_local_out() invocation. -DaveM ]

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdev_features.h |   3 +-
 include/linux/skbuff.h          |  17 ++++++
 net/core/dev.c                  |   1 +
 net/core/ethtool.c              |   1 +
 net/core/skbuff.c               |   6 +-
 net/ipv4/af_inet.c              |   1 +
 net/ipv4/gre.c                  | 118 ++++++++++++++++++++++++++++++++++++++++
 net/ipv4/ip_gre.c               |  82 +++++++++++++++++++++++++---
 net/ipv4/tcp.c                  |   1 +
 net/ipv4/udp.c                  |   3 +-
 net/ipv6/ip6_offload.c          |   1 +
 net/ipv6/udp_offload.c          |   3 +-
 12 files changed, 226 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h
index 5ac32123035a..3dd39340430e 100644
--- a/include/linux/netdev_features.h
+++ b/include/linux/netdev_features.h
@@ -41,7 +41,7 @@ enum {
 	NETIF_F_TSO_ECN_BIT,		/* ... TCP ECN support */
 	NETIF_F_TSO6_BIT,		/* ... TCPv6 segmentation */
 	NETIF_F_FSO_BIT,		/* ... FCoE segmentation */
-	NETIF_F_GSO_RESERVED1,		/* ... free (fill GSO_MASK to 8 bits) */
+	NETIF_F_GSO_GRE_BIT,		/* ... GRE with TSO */
 	/**/NETIF_F_GSO_LAST,		/* [can't be last bit, see GSO_MASK] */
 	NETIF_F_GSO_RESERVED2		/* ... free (fill GSO_MASK to 8 bits) */
 		= NETIF_F_GSO_LAST,
@@ -102,6 +102,7 @@ enum {
 #define NETIF_F_VLAN_CHALLENGED	__NETIF_F(VLAN_CHALLENGED)
 #define NETIF_F_RXFCS		__NETIF_F(RXFCS)
 #define NETIF_F_RXALL		__NETIF_F(RXALL)
+#define NETIF_F_GRE_GSO		__NETIF_F(GSO_GRE)
 
 /* Features valid for ethtool to change */
 /* = all defined minus driver/device-class-related */
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index ca6ee7d93edb..821c7f45d2a7 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -314,6 +314,8 @@ enum {
 	SKB_GSO_TCPV6 = 1 << 4,
 
 	SKB_GSO_FCOE = 1 << 5,
+
+	SKB_GSO_GRE = 1 << 6,
 };
 
 #if BITS_PER_LONG > 32
@@ -2732,6 +2734,21 @@ static inline struct sec_path *skb_sec_path(struct sk_buff *skb)
 }
 #endif
 
+/* Keeps track of mac header offset relative to skb->head.
+ * It is useful for TSO of Tunneling protocol. e.g. GRE.
+ * For non-tunnel skb it points to skb_mac_header() and for
+ * tunnel skb it points to outer mac header. */
+struct skb_gso_cb {
+	int mac_offset;
+};
+#define SKB_GSO_CB(skb) ((struct skb_gso_cb *)(skb)->cb)
+
+static inline int skb_tnl_header_len(const struct sk_buff *inner_skb)
+{
+	return (skb_mac_header(inner_skb) - inner_skb->head) -
+		SKB_GSO_CB(inner_skb)->mac_offset;
+}
+
 static inline bool skb_is_gso(const struct sk_buff *skb)
 {
 	return skb_shinfo(skb)->gso_size;
diff --git a/net/core/dev.c b/net/core/dev.c
index 67deae60214c..1cd6297fd34b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2413,6 +2413,7 @@ struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
 			return ERR_PTR(err);
 	}
 
+	SKB_GSO_CB(skb)->mac_offset = skb_headroom(skb);
 	skb_reset_mac_header(skb);
 	skb_reset_mac_len(skb);
 
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index d9d55209db67..3e9b2c3e30f0 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -77,6 +77,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
 	[NETIF_F_TSO_ECN_BIT] =          "tx-tcp-ecn-segmentation",
 	[NETIF_F_TSO6_BIT] =             "tx-tcp6-segmentation",
 	[NETIF_F_FSO_BIT] =              "tx-fcoe-segmentation",
+	[NETIF_F_GSO_GRE_BIT] =		 "tx-gre-segmentation",
 
 	[NETIF_F_FCOE_CRC_BIT] =         "tx-checksum-fcoe-crc",
 	[NETIF_F_SCTP_CSUM_BIT] =        "tx-checksum-sctp",
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 6c1ad09f8796..2a3ca33c30aa 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2738,6 +2738,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
 	unsigned int mss = skb_shinfo(skb)->gso_size;
 	unsigned int doffset = skb->data - skb_mac_header(skb);
 	unsigned int offset = doffset;
+	unsigned int tnl_hlen = skb_tnl_header_len(skb);
 	unsigned int headroom;
 	unsigned int len;
 	int sg = !!(features & NETIF_F_SG);
@@ -2814,7 +2815,10 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
 		skb_set_network_header(nskb, skb->mac_len);
 		nskb->transport_header = (nskb->network_header +
 					  skb_network_header_len(skb));
-		skb_copy_from_linear_data(skb, nskb->data, doffset);
+
+		skb_copy_from_linear_data_offset(skb, -tnl_hlen,
+						 nskb->data - tnl_hlen,
+						 doffset + tnl_hlen);
 
 		if (fskb != skb_shinfo(skb)->frag_list)
 			continue;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index e6e5d8506336..e225a4e5b572 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1287,6 +1287,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
 		       SKB_GSO_UDP |
 		       SKB_GSO_DODGY |
 		       SKB_GSO_TCP_ECN |
+		       SKB_GSO_GRE |
 		       0)))
 		goto out;
 
diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c
index 42a491055c76..7a4c710c4cdd 100644
--- a/net/ipv4/gre.c
+++ b/net/ipv4/gre.c
@@ -19,6 +19,7 @@
 #include <linux/in.h>
 #include <linux/ip.h>
 #include <linux/netdevice.h>
+#include <linux/if_tunnel.h>
 #include <linux/spinlock.h>
 #include <net/protocol.h>
 #include <net/gre.h>
@@ -26,6 +27,11 @@
 
 static const struct gre_protocol __rcu *gre_proto[GREPROTO_MAX] __read_mostly;
 static DEFINE_SPINLOCK(gre_proto_lock);
+struct gre_base_hdr {
+	__be16 flags;
+	__be16 protocol;
+};
+#define GRE_HEADER_SECTION 4
 
 int gre_add_protocol(const struct gre_protocol *proto, u8 version)
 {
@@ -112,12 +118,117 @@ static void gre_err(struct sk_buff *skb, u32 info)
 	rcu_read_unlock();
 }
 
+static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
+				       netdev_features_t features)
+{
+	struct sk_buff *segs = ERR_PTR(-EINVAL);
+	netdev_features_t enc_features;
+	int ghl = GRE_HEADER_SECTION;
+	struct gre_base_hdr *greh;
+	int mac_len = skb->mac_len;
+	int tnl_hlen;
+	bool csum;
+
+	if (unlikely(skb_shinfo(skb)->gso_type &
+				~(SKB_GSO_TCPV4 |
+				  SKB_GSO_TCPV6 |
+				  SKB_GSO_UDP |
+				  SKB_GSO_DODGY |
+				  SKB_GSO_TCP_ECN |
+				  SKB_GSO_GRE)))
+		goto out;
+
+	if (unlikely(!pskb_may_pull(skb, sizeof(*greh))))
+		goto out;
+
+	greh = (struct gre_base_hdr *)skb_transport_header(skb);
+
+	if (greh->flags & GRE_KEY)
+		ghl += GRE_HEADER_SECTION;
+	if (greh->flags & GRE_SEQ)
+		ghl += GRE_HEADER_SECTION;
+	if (greh->flags & GRE_CSUM) {
+		ghl += GRE_HEADER_SECTION;
+		csum = true;
+	} else
+		csum = false;
+
+	/* setup inner skb. */
+	if (greh->protocol == htons(ETH_P_TEB)) {
+		struct ethhdr *eth = eth_hdr(skb);
+		skb->protocol = eth->h_proto;
+	} else {
+		skb->protocol = greh->protocol;
+	}
+
+	skb->encapsulation = 0;
+
+	if (unlikely(!pskb_may_pull(skb, ghl)))
+		goto out;
+	__skb_pull(skb, ghl);
+	skb_reset_mac_header(skb);
+	skb_set_network_header(skb, skb_inner_network_offset(skb));
+	skb->mac_len = skb_inner_network_offset(skb);
+
+	/* segment inner packet. */
+	enc_features = skb->dev->hw_enc_features & netif_skb_features(skb);
+	segs = skb_mac_gso_segment(skb, enc_features);
+	if (!segs || IS_ERR(segs))
+		goto out;
+
+	skb = segs;
+	tnl_hlen = skb_tnl_header_len(skb);
+	do {
+		__skb_push(skb, ghl);
+		if (csum) {
+			__be32 *pcsum;
+
+			if (skb_has_shared_frag(skb)) {
+				int err;
+
+				err = __skb_linearize(skb);
+				if (err) {
+					kfree_skb(segs);
+					segs = ERR_PTR(err);
+					goto out;
+				}
+			}
+
+			greh = (struct gre_base_hdr *)(skb->data);
+			pcsum = (__be32 *)(greh + 1);
+			*pcsum = 0;
+			*(__sum16 *)pcsum = csum_fold(skb_checksum(skb, 0, skb->len, 0));
+		}
+		__skb_push(skb, tnl_hlen - ghl);
+
+		skb_reset_mac_header(skb);
+		skb_set_network_header(skb, mac_len);
+		skb->mac_len = mac_len;
+	} while ((skb = skb->next));
+out:
+	return segs;
+}
+
+static int gre_gso_send_check(struct sk_buff *skb)
+{
+	if (!skb->encapsulation)
+		return -EINVAL;
+	return 0;
+}
+
 static const struct net_protocol net_gre_protocol = {
 	.handler     = gre_rcv,
 	.err_handler = gre_err,
 	.netns_ok    = 1,
 };
 
+static const struct net_offload gre_offload = {
+	.callbacks = {
+		.gso_send_check =	gre_gso_send_check,
+		.gso_segment    =	gre_gso_segment,
+	},
+};
+
 static int __init gre_init(void)
 {
 	pr_info("GRE over IPv4 demultiplexor driver\n");
@@ -127,11 +238,18 @@ static int __init gre_init(void)
 		return -EAGAIN;
 	}
 
+	if (inet_add_offload(&gre_offload, IPPROTO_GRE)) {
+		pr_err("can't add protocol offload\n");
+		inet_del_protocol(&net_gre_protocol, IPPROTO_GRE);
+		return -EAGAIN;
+	}
+
 	return 0;
 }
 
 static void __exit gre_exit(void)
 {
+	inet_del_offload(&gre_offload, IPPROTO_GRE);
 	inet_del_protocol(&net_gre_protocol, IPPROTO_GRE);
 }
 
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 00a14b9864ea..a56f1182c176 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -735,8 +735,33 @@ drop:
 	return 0;
 }
 
+static struct sk_buff *handle_offloads(struct sk_buff *skb)
+{
+	int err;
+
+	if (skb_is_gso(skb)) {
+		err = skb_unclone(skb, GFP_ATOMIC);
+		if (unlikely(err))
+			goto error;
+		skb_shinfo(skb)->gso_type |= SKB_GSO_GRE;
+		return skb;
+	} else if (skb->ip_summed == CHECKSUM_PARTIAL) {
+		err = skb_checksum_help(skb);
+		if (unlikely(err))
+			goto error;
+	}
+	skb->ip_summed = CHECKSUM_NONE;
+
+	return skb;
+
+error:
+	kfree_skb(skb);
+	return ERR_PTR(err);
+}
+
 static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 {
+	struct pcpu_tstats *tstats = this_cpu_ptr(dev->tstats);
 	struct ip_tunnel *tunnel = netdev_priv(dev);
 	const struct iphdr  *old_iph;
 	const struct iphdr  *tiph;
@@ -751,10 +776,19 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 	__be32 dst;
 	int    mtu;
 	u8     ttl;
+	int    err;
+	int    pkt_len;
 
-	if (skb->ip_summed == CHECKSUM_PARTIAL &&
-	    skb_checksum_help(skb))
-		goto tx_error;
+	skb = handle_offloads(skb);
+	if (IS_ERR(skb)) {
+		dev->stats.tx_dropped++;
+		return NETDEV_TX_OK;
+	}
+
+	if (!skb->encapsulation) {
+		skb_reset_inner_headers(skb);
+		skb->encapsulation = 1;
+	}
 
 	old_iph = ip_hdr(skb);
 
@@ -855,7 +889,8 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 	if (skb->protocol == htons(ETH_P_IP)) {
 		df |= (old_iph->frag_off&htons(IP_DF));
 
-		if ((old_iph->frag_off&htons(IP_DF)) &&
+		if (!skb_is_gso(skb) &&
+		    (old_iph->frag_off&htons(IP_DF)) &&
 		    mtu < ntohs(old_iph->tot_len)) {
 			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
 			ip_rt_put(rt);
@@ -875,7 +910,9 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 			}
 		}
 
-		if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
+		if (!skb_is_gso(skb) &&
+		    mtu >= IPV6_MIN_MTU &&
+		    mtu < skb->len - tunnel->hlen + gre_hlen) {
 			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 			ip_rt_put(rt);
 			goto tx_error;
@@ -936,6 +973,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 	iph->daddr		=	fl4.daddr;
 	iph->saddr		=	fl4.saddr;
 	iph->ttl		=	ttl;
+	iph->id			=	0;
 
 	if (ttl == 0) {
 		if (skb->protocol == htons(ETH_P_IP))
@@ -964,9 +1002,19 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 			*ptr = tunnel->parms.o_key;
 			ptr--;
 		}
-		if (tunnel->parms.o_flags&GRE_CSUM) {
+		/* Skip GRE checksum if skb is getting offloaded. */
+		if (!(skb_shinfo(skb)->gso_type & SKB_GSO_GRE) &&
+		    (tunnel->parms.o_flags&GRE_CSUM)) {
 			int offset = skb_transport_offset(skb);
 
+			if (skb_has_shared_frag(skb)) {
+				err = __skb_linearize(skb);
+				if (err) {
+					ip_rt_put(rt);
+					goto tx_error;
+				}
+			}
+
 			*ptr = 0;
 			*(__sum16 *)ptr = csum_fold(skb_checksum(skb, offset,
 								 skb->len - offset,
@@ -974,7 +1022,19 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 		}
 	}
 
-	iptunnel_xmit(skb, dev);
+	nf_reset(skb);
+
+	pkt_len = skb->len - skb_transport_offset(skb);
+	err = ip_local_out(skb);
+	if (likely(net_xmit_eval(err) == 0)) {
+		u64_stats_update_begin(&tstats->syncp);
+		tstats->tx_bytes += pkt_len;
+		tstats->tx_packets++;
+		u64_stats_update_end(&tstats->syncp);
+	} else {
+		dev->stats.tx_errors++;
+		dev->stats.tx_aborted_errors++;
+	}
 	return NETDEV_TX_OK;
 
 #if IS_ENABLED(CONFIG_IPV6)
@@ -1044,6 +1104,11 @@ static int ipgre_tunnel_bind_dev(struct net_device *dev)
 		mtu = 68;
 
 	tunnel->hlen = addend;
+	/* TCP offload with GRE SEQ is not supported. */
+	if (!(tunnel->parms.o_flags & GRE_SEQ)) {
+		dev->features		|= NETIF_F_GSO_SOFTWARE;
+		dev->hw_features	|= NETIF_F_GSO_SOFTWARE;
+	}
 
 	return mtu;
 }
@@ -1593,6 +1658,9 @@ static void ipgre_tap_setup(struct net_device *dev)
 
 	dev->iflink		= 0;
 	dev->features		|= NETIF_F_NETNS_LOCAL;
+
+	dev->features		|= GRE_FEATURES;
+	dev->hw_features	|= GRE_FEATURES;
 }
 
 static int ipgre_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[],
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 1f0bedb8622f..7a5ba48c2cc9 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3043,6 +3043,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
 			       SKB_GSO_DODGY |
 			       SKB_GSO_TCP_ECN |
 			       SKB_GSO_TCPV6 |
+			       SKB_GSO_GRE |
 			       0) ||
 			     !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))))
 			goto out;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 6791aac06ea9..39a5e7a9a77f 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2305,7 +2305,8 @@ struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
 		/* Packet is from an untrusted source, reset gso_segs. */
 		int type = skb_shinfo(skb)->gso_type;
 
-		if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY) ||
+		if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY |
+				      SKB_GSO_GRE) ||
 			     !(type & (SKB_GSO_UDP))))
 			goto out;
 
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index f26f0da7f095..8234c1dcdf72 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -99,6 +99,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
 		     ~(SKB_GSO_UDP |
 		       SKB_GSO_DODGY |
 		       SKB_GSO_TCP_ECN |
+		       SKB_GSO_GRE |
 		       SKB_GSO_TCPV6 |
 		       0)))
 		goto out;
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index 0c8934a317c2..cf05cf073c51 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -56,7 +56,8 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
 		/* Packet is from an untrusted source, reset gso_segs. */
 		int type = skb_shinfo(skb)->gso_type;
 
-		if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY) ||
+		if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY |
+				      SKB_GSO_GRE) ||
 			     !(type & (SKB_GSO_UDP))))
 			goto out;
 
-- 
cgit v1.2.3


From 0db3863add5cb249520b31a4ad36f275a30e7ba6 Mon Sep 17 00:00:00 2001
From: Michael Trimarchi <michael@amarulasolutions.com>
Date: Fri, 15 Feb 2013 14:43:38 -0800
Subject: Input: bma150 - make some defines public and fix some comments

Make the constants referring to range and bandwidth public so they can
be used when initializing the platform data fields in the platform code.

Fix also some comments regarding the unit of measurement to use for the
range and bandwidth fields, the values are not actually expected to be
in G or HZ, the code in bma150.c just uses the BMA150_RANGE_xxx and
BMA150_BW_xxx constants like they are with no translation from actual
values in G or HZ.

Signed-off-by: Michael Trimarchi <michael@amarulasolutions.com>
Signed-off-by: Antonio Ospite <ao2@amarulasolutions.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/misc/bma150.c | 12 ------------
 include/linux/bma150.h      | 16 ++++++++++++++--
 2 files changed, 14 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/input/misc/bma150.c b/drivers/input/misc/bma150.c
index e5d18946bb2e..865c2f9d25b9 100644
--- a/drivers/input/misc/bma150.c
+++ b/drivers/input/misc/bma150.c
@@ -46,18 +46,6 @@
 #define BMA150_POLL_MAX		200
 #define BMA150_POLL_MIN		0
 
-#define BMA150_BW_25HZ		0
-#define BMA150_BW_50HZ		1
-#define BMA150_BW_100HZ		2
-#define BMA150_BW_190HZ		3
-#define BMA150_BW_375HZ		4
-#define BMA150_BW_750HZ		5
-#define BMA150_BW_1500HZ	6
-
-#define BMA150_RANGE_2G		0
-#define BMA150_RANGE_4G		1
-#define BMA150_RANGE_8G		2
-
 #define BMA150_MODE_NORMAL	0
 #define BMA150_MODE_SLEEP	2
 #define BMA150_MODE_WAKE_UP	3
diff --git a/include/linux/bma150.h b/include/linux/bma150.h
index 7911fda23bb4..97ade7cdc870 100644
--- a/include/linux/bma150.h
+++ b/include/linux/bma150.h
@@ -22,6 +22,18 @@
 
 #define BMA150_DRIVER		"bma150"
 
+#define BMA150_RANGE_2G		0
+#define BMA150_RANGE_4G		1
+#define BMA150_RANGE_8G		2
+
+#define BMA150_BW_25HZ		0
+#define BMA150_BW_50HZ		1
+#define BMA150_BW_100HZ		2
+#define BMA150_BW_190HZ		3
+#define BMA150_BW_375HZ		4
+#define BMA150_BW_750HZ		5
+#define BMA150_BW_1500HZ	6
+
 struct bma150_cfg {
 	bool any_motion_int;		/* Set to enable any-motion interrupt */
 	bool hg_int;			/* Set to enable high-G interrupt */
@@ -34,8 +46,8 @@ struct bma150_cfg {
 	unsigned char lg_hyst;		/* Low-G hysterisis */
 	unsigned char lg_dur;		/* Low-G duration */
 	unsigned char lg_thres;		/* Low-G threshold */
-	unsigned char range;		/* BMA0150_RANGE_xxx (in G) */
-	unsigned char bandwidth;	/* BMA0150_BW_xxx (in Hz) */
+	unsigned char range;		/* one of BMA0150_RANGE_xxx */
+	unsigned char bandwidth;	/* one of BMA0150_BW_xxx */
 };
 
 struct bma150_platform_data {
-- 
cgit v1.2.3


From 9eee07d39fa606a191ae65d3c0e12771a80e70ca Mon Sep 17 00:00:00 2001
From: Stephen Warren <swarren@nvidia.com>
Date: Fri, 15 Feb 2013 17:04:12 -0800
Subject: Input: tegra-kbc - require CONFIG_OF, remove platform data

Tegra only supports, and always enables, device tree. Remove all ifdefs
and runtime checks for DT support from the driver. Platform data is
therefore no longer required. Delete the header that defines it, and
rework the driver to parse the device tree directly into struct
tegra_kbc.

Signed-off-by: Stephen Warren <swarren@nvidia.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/keyboard/Kconfig     |   2 +-
 drivers/input/keyboard/tegra-kbc.c | 195 +++++++++++++++++--------------------
 include/linux/input/tegra_kbc.h    |  62 ------------
 3 files changed, 93 insertions(+), 166 deletions(-)
 delete mode 100644 include/linux/input/tegra_kbc.h

(limited to 'include/linux')

diff --git a/drivers/input/keyboard/Kconfig b/drivers/input/keyboard/Kconfig
index 72377737eec8..992137cf3a64 100644
--- a/drivers/input/keyboard/Kconfig
+++ b/drivers/input/keyboard/Kconfig
@@ -420,7 +420,7 @@ config KEYBOARD_NOMADIK
 
 config KEYBOARD_TEGRA
 	tristate "NVIDIA Tegra internal matrix keyboard controller support"
-	depends on ARCH_TEGRA
+	depends on ARCH_TEGRA && OF
 	select INPUT_MATRIXKMAP
 	help
 	  Say Y here if you want to use a matrix keyboard connected directly
diff --git a/drivers/input/keyboard/tegra-kbc.c b/drivers/input/keyboard/tegra-kbc.c
index 4526bdd36022..d89e7d392d1e 100644
--- a/drivers/input/keyboard/tegra-kbc.c
+++ b/drivers/input/keyboard/tegra-kbc.c
@@ -29,9 +29,16 @@
 #include <linux/of.h>
 #include <linux/clk.h>
 #include <linux/slab.h>
-#include <linux/input/tegra_kbc.h>
+#include <linux/input/matrix_keypad.h>
 #include <mach/clk.h>
 
+#define KBC_MAX_GPIO	24
+#define KBC_MAX_KPENT	8
+
+#define KBC_MAX_ROW	16
+#define KBC_MAX_COL	8
+#define KBC_MAX_KEY	(KBC_MAX_ROW * KBC_MAX_COL)
+
 #define KBC_MAX_DEBOUNCE_CNT	0x3ffu
 
 /* KBC row scan time and delay for beginning the row scan. */
@@ -67,10 +74,27 @@
 
 #define KBC_ROW_SHIFT	3
 
+enum tegra_pin_type {
+	PIN_CFG_IGNORE,
+	PIN_CFG_COL,
+	PIN_CFG_ROW,
+};
+
+struct tegra_kbc_pin_cfg {
+	enum tegra_pin_type type;
+	unsigned char num;
+};
+
 struct tegra_kbc {
+	struct device *dev;
+	unsigned int debounce_cnt;
+	unsigned int repeat_cnt;
+	struct tegra_kbc_pin_cfg pin_cfg[KBC_MAX_GPIO];
+	const struct matrix_keymap_data *keymap_data;
+	bool wakeup;
 	void __iomem *mmio;
 	struct input_dev *idev;
-	unsigned int irq;
+	int irq;
 	spinlock_t lock;
 	unsigned int repoll_dly;
 	unsigned long cp_dly_jiffies;
@@ -78,7 +102,6 @@ struct tegra_kbc {
 	bool use_fn_map;
 	bool use_ghost_filter;
 	bool keypress_caused_wake;
-	const struct tegra_kbc_platform_data *pdata;
 	unsigned short keycode[KBC_MAX_KEY * 2];
 	unsigned short current_keys[KBC_MAX_KPENT];
 	unsigned int num_pressed_keys;
@@ -286,12 +309,11 @@ static irqreturn_t tegra_kbc_isr(int irq, void *args)
 
 static void tegra_kbc_setup_wakekeys(struct tegra_kbc *kbc, bool filter)
 {
-	const struct tegra_kbc_platform_data *pdata = kbc->pdata;
 	int i;
 	unsigned int rst_val;
 
 	/* Either mask all keys or none. */
-	rst_val = (filter && !pdata->wakeup) ? ~0 : 0;
+	rst_val = (filter && !kbc->wakeup) ? ~0 : 0;
 
 	for (i = 0; i < KBC_MAX_ROW; i++)
 		writel(rst_val, kbc->mmio + KBC_ROW0_MASK_0 + i * 4);
@@ -299,7 +321,6 @@ static void tegra_kbc_setup_wakekeys(struct tegra_kbc *kbc, bool filter)
 
 static void tegra_kbc_config_pins(struct tegra_kbc *kbc)
 {
-	const struct tegra_kbc_platform_data *pdata = kbc->pdata;
 	int i;
 
 	for (i = 0; i < KBC_MAX_GPIO; i++) {
@@ -315,13 +336,13 @@ static void tegra_kbc_config_pins(struct tegra_kbc *kbc)
 		row_cfg &= ~r_mask;
 		col_cfg &= ~c_mask;
 
-		switch (pdata->pin_cfg[i].type) {
+		switch (kbc->pin_cfg[i].type) {
 		case PIN_CFG_ROW:
-			row_cfg |= ((pdata->pin_cfg[i].num << 1) | 1) << r_shft;
+			row_cfg |= ((kbc->pin_cfg[i].num << 1) | 1) << r_shft;
 			break;
 
 		case PIN_CFG_COL:
-			col_cfg |= ((pdata->pin_cfg[i].num << 1) | 1) << c_shft;
+			col_cfg |= ((kbc->pin_cfg[i].num << 1) | 1) << c_shft;
 			break;
 
 		case PIN_CFG_IGNORE:
@@ -335,7 +356,6 @@ static void tegra_kbc_config_pins(struct tegra_kbc *kbc)
 
 static int tegra_kbc_start(struct tegra_kbc *kbc)
 {
-	const struct tegra_kbc_platform_data *pdata = kbc->pdata;
 	unsigned int debounce_cnt;
 	u32 val = 0;
 
@@ -350,10 +370,10 @@ static int tegra_kbc_start(struct tegra_kbc *kbc)
 	tegra_kbc_config_pins(kbc);
 	tegra_kbc_setup_wakekeys(kbc, false);
 
-	writel(pdata->repeat_cnt, kbc->mmio + KBC_RPT_DLY_0);
+	writel(kbc->repeat_cnt, kbc->mmio + KBC_RPT_DLY_0);
 
 	/* Keyboard debounce count is maximum of 12 bits. */
-	debounce_cnt = min(pdata->debounce_cnt, KBC_MAX_DEBOUNCE_CNT);
+	debounce_cnt = min(kbc->debounce_cnt, KBC_MAX_DEBOUNCE_CNT);
 	val = KBC_DEBOUNCE_CNT_SHIFT(debounce_cnt);
 	val |= KBC_FIFO_TH_CNT_SHIFT(1); /* set fifo interrupt threshold to 1 */
 	val |= KBC_CONTROL_FIFO_CNT_INT_EN;  /* interrupt on FIFO threshold */
@@ -420,21 +440,20 @@ static void tegra_kbc_close(struct input_dev *dev)
 	return tegra_kbc_stop(kbc);
 }
 
-static bool
-tegra_kbc_check_pin_cfg(const struct tegra_kbc_platform_data *pdata,
-			struct device *dev, unsigned int *num_rows)
+static bool tegra_kbc_check_pin_cfg(const struct tegra_kbc *kbc,
+					unsigned int *num_rows)
 {
 	int i;
 
 	*num_rows = 0;
 
 	for (i = 0; i < KBC_MAX_GPIO; i++) {
-		const struct tegra_kbc_pin_cfg *pin_cfg = &pdata->pin_cfg[i];
+		const struct tegra_kbc_pin_cfg *pin_cfg = &kbc->pin_cfg[i];
 
 		switch (pin_cfg->type) {
 		case PIN_CFG_ROW:
 			if (pin_cfg->num >= KBC_MAX_ROW) {
-				dev_err(dev,
+				dev_err(kbc->dev,
 					"pin_cfg[%d]: invalid row number %d\n",
 					i, pin_cfg->num);
 				return false;
@@ -444,7 +463,7 @@ tegra_kbc_check_pin_cfg(const struct tegra_kbc_platform_data *pdata,
 
 		case PIN_CFG_COL:
 			if (pin_cfg->num >= KBC_MAX_COL) {
-				dev_err(dev,
+				dev_err(kbc->dev,
 					"pin_cfg[%d]: invalid column number %d\n",
 					i, pin_cfg->num);
 				return false;
@@ -455,7 +474,7 @@ tegra_kbc_check_pin_cfg(const struct tegra_kbc_platform_data *pdata,
 			break;
 
 		default:
-			dev_err(dev,
+			dev_err(kbc->dev,
 				"pin_cfg[%d]: invalid entry type %d\n",
 				pin_cfg->type, pin_cfg->num);
 			return false;
@@ -465,12 +484,9 @@ tegra_kbc_check_pin_cfg(const struct tegra_kbc_platform_data *pdata,
 	return true;
 }
 
-#ifdef CONFIG_OF
-static struct tegra_kbc_platform_data *tegra_kbc_dt_parse_pdata(
-	struct platform_device *pdev)
+static int tegra_kbc_parse_dt(struct tegra_kbc *kbc)
 {
-	struct tegra_kbc_platform_data *pdata;
-	struct device_node *np = pdev->dev.of_node;
+	struct device_node *np = kbc->dev->of_node;
 	u32 prop;
 	int i;
 	u32 num_rows = 0;
@@ -480,109 +496,96 @@ static struct tegra_kbc_platform_data *tegra_kbc_dt_parse_pdata(
 	int proplen;
 	int ret;
 
-	if (!np) {
-		dev_err(&pdev->dev, "device tree data is missing\n");
-		return ERR_PTR(-ENOENT);
-	}
-
-	pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
-	if (!pdata)
-		return ERR_PTR(-ENOMEM);
-
 	if (!of_property_read_u32(np, "nvidia,debounce-delay-ms", &prop))
-		pdata->debounce_cnt = prop;
+		kbc->debounce_cnt = prop;
 
 	if (!of_property_read_u32(np, "nvidia,repeat-delay-ms", &prop))
-		pdata->repeat_cnt = prop;
+		kbc->repeat_cnt = prop;
 
 	if (of_find_property(np, "nvidia,needs-ghost-filter", NULL))
-		pdata->use_ghost_filter = true;
+		kbc->use_ghost_filter = true;
 
 	if (of_find_property(np, "nvidia,wakeup-source", NULL))
-		pdata->wakeup = true;
+		kbc->wakeup = true;
 
 	if (!of_get_property(np, "nvidia,kbc-row-pins", &proplen)) {
-		dev_err(&pdev->dev, "property nvidia,kbc-row-pins not found\n");
-		return ERR_PTR(-ENOENT);
+		dev_err(kbc->dev, "property nvidia,kbc-row-pins not found\n");
+		return -ENOENT;
 	}
 	num_rows = proplen / sizeof(u32);
 
 	if (!of_get_property(np, "nvidia,kbc-col-pins", &proplen)) {
-		dev_err(&pdev->dev, "property nvidia,kbc-col-pins not found\n");
-		return ERR_PTR(-ENOENT);
+		dev_err(kbc->dev, "property nvidia,kbc-col-pins not found\n");
+		return -ENOENT;
 	}
 	num_cols = proplen / sizeof(u32);
 
 	if (!of_get_property(np, "linux,keymap", &proplen)) {
-		dev_err(&pdev->dev, "property linux,keymap not found\n");
-		return ERR_PTR(-ENOENT);
+		dev_err(kbc->dev, "property linux,keymap not found\n");
+		return -ENOENT;
 	}
 
 	if (!num_rows || !num_cols || ((num_rows + num_cols) > KBC_MAX_GPIO)) {
-		dev_err(&pdev->dev,
+		dev_err(kbc->dev,
 			"keypad rows/columns not porperly specified\n");
-		return ERR_PTR(-EINVAL);
+		return -EINVAL;
 	}
 
 	/* Set all pins as non-configured */
 	for (i = 0; i < KBC_MAX_GPIO; i++)
-		pdata->pin_cfg[i].type = PIN_CFG_IGNORE;
+		kbc->pin_cfg[i].type = PIN_CFG_IGNORE;
 
 	ret = of_property_read_u32_array(np, "nvidia,kbc-row-pins",
 				rows_cfg, num_rows);
 	if (ret < 0) {
-		dev_err(&pdev->dev, "Rows configurations are not proper\n");
-		return ERR_PTR(-EINVAL);
+		dev_err(kbc->dev, "Rows configurations are not proper\n");
+		return -EINVAL;
 	}
 
 	ret = of_property_read_u32_array(np, "nvidia,kbc-col-pins",
 				cols_cfg, num_cols);
 	if (ret < 0) {
-		dev_err(&pdev->dev, "Cols configurations are not proper\n");
-		return ERR_PTR(-EINVAL);
+		dev_err(kbc->dev, "Cols configurations are not proper\n");
+		return -EINVAL;
 	}
 
 	for (i = 0; i < num_rows; i++) {
-		pdata->pin_cfg[rows_cfg[i]].type = PIN_CFG_ROW;
-		pdata->pin_cfg[rows_cfg[i]].num = i;
+		kbc->pin_cfg[rows_cfg[i]].type = PIN_CFG_ROW;
+		kbc->pin_cfg[rows_cfg[i]].num = i;
 	}
 
 	for (i = 0; i < num_cols; i++) {
-		pdata->pin_cfg[cols_cfg[i]].type = PIN_CFG_COL;
-		pdata->pin_cfg[cols_cfg[i]].num = i;
+		kbc->pin_cfg[cols_cfg[i]].type = PIN_CFG_COL;
+		kbc->pin_cfg[cols_cfg[i]].num = i;
 	}
 
-	return pdata;
-}
-#else
-static inline struct tegra_kbc_platform_data *tegra_kbc_dt_parse_pdata(
-	struct platform_device *pdev)
-{
-	dev_err(&pdev->dev, "platform data is missing\n");
-	return ERR_PTR(-EINVAL);
+	return 0;
 }
-#endif
 
 static int tegra_kbc_probe(struct platform_device *pdev)
 {
-	const struct tegra_kbc_platform_data *pdata = pdev->dev.platform_data;
 	struct tegra_kbc *kbc;
-	struct input_dev *input_dev;
 	struct resource *res;
-	int irq;
 	int err;
 	int num_rows = 0;
 	unsigned int debounce_cnt;
 	unsigned int scan_time_rows;
 	unsigned int keymap_rows = KBC_MAX_KEY;
 
-	if (!pdata)
-		pdata = tegra_kbc_dt_parse_pdata(pdev);
+	kbc = devm_kzalloc(&pdev->dev, sizeof(*kbc), GFP_KERNEL);
+	if (!kbc) {
+		dev_err(&pdev->dev, "failed to alloc memory for kbc\n");
+		return -ENOMEM;
+	}
+
+	kbc->dev = &pdev->dev;
+	spin_lock_init(&kbc->lock);
 
-	if (IS_ERR(pdata))
-		return PTR_ERR(pdata);
+	err = tegra_kbc_parse_dt(kbc);
+	if (err)
+		return err;
 
-	if (!tegra_kbc_check_pin_cfg(pdata, &pdev->dev, &num_rows))
+	if (!tegra_kbc_check_pin_cfg(kbc, &num_rows))
 		return -EINVAL;
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
@@ -591,28 +594,18 @@ static int tegra_kbc_probe(struct platform_device *pdev)
 		return -ENXIO;
 	}
 
-	irq = platform_get_irq(pdev, 0);
-	if (irq < 0) {
+	kbc->irq = platform_get_irq(pdev, 0);
+	if (kbc->irq < 0) {
 		dev_err(&pdev->dev, "failed to get keyboard IRQ\n");
 		return -ENXIO;
 	}
 
-	kbc = devm_kzalloc(&pdev->dev, sizeof(*kbc), GFP_KERNEL);
-	if (!kbc) {
-		dev_err(&pdev->dev, "failed to alloc memory for kbc\n");
-		return -ENOMEM;
-	}
-
-	input_dev = devm_input_allocate_device(&pdev->dev);
-	if (!input_dev) {
+	kbc->idev = devm_input_allocate_device(&pdev->dev);
+	if (!kbc->idev) {
 		dev_err(&pdev->dev, "failed to allocate input device\n");
 		return -ENOMEM;
 	}
 
-	kbc->pdata = pdata;
-	kbc->idev = input_dev;
-	kbc->irq = irq;
-	spin_lock_init(&kbc->lock);
 	setup_timer(&kbc->timer, tegra_kbc_keypress_timer, (unsigned long)kbc);
 
 	kbc->mmio = devm_request_and_ioremap(&pdev->dev, res);
@@ -633,36 +626,32 @@ static int tegra_kbc_probe(struct platform_device *pdev)
 	 * the rows. There is an additional delay before the row scanning
 	 * starts. The repoll delay is computed in milliseconds.
 	 */
-	debounce_cnt = min(pdata->debounce_cnt, KBC_MAX_DEBOUNCE_CNT);
+	debounce_cnt = min(kbc->debounce_cnt, KBC_MAX_DEBOUNCE_CNT);
 	scan_time_rows = (KBC_ROW_SCAN_TIME + debounce_cnt) * num_rows;
-	kbc->repoll_dly = KBC_ROW_SCAN_DLY + scan_time_rows + pdata->repeat_cnt;
+	kbc->repoll_dly = KBC_ROW_SCAN_DLY + scan_time_rows + kbc->repeat_cnt;
 	kbc->repoll_dly = DIV_ROUND_UP(kbc->repoll_dly, KBC_CYCLE_MS);
 
-	kbc->wakeup_key = pdata->wakeup_key;
-	kbc->use_fn_map = pdata->use_fn_map;
-	kbc->use_ghost_filter = pdata->use_ghost_filter;
-
-	input_dev->name = pdev->name;
-	input_dev->id.bustype = BUS_HOST;
-	input_dev->dev.parent = &pdev->dev;
-	input_dev->open = tegra_kbc_open;
-	input_dev->close = tegra_kbc_close;
+	kbc->idev->name = pdev->name;
+	kbc->idev->id.bustype = BUS_HOST;
+	kbc->idev->dev.parent = &pdev->dev;
+	kbc->idev->open = tegra_kbc_open;
+	kbc->idev->close = tegra_kbc_close;
 
-	if (pdata->keymap_data && pdata->use_fn_map)
+	if (kbc->keymap_data && kbc->use_fn_map)
 		keymap_rows *= 2;
 
-	err = matrix_keypad_build_keymap(pdata->keymap_data, NULL,
+	err = matrix_keypad_build_keymap(kbc->keymap_data, NULL,
 					 keymap_rows, KBC_MAX_COL,
-					 kbc->keycode, input_dev);
+					 kbc->keycode, kbc->idev);
 	if (err) {
 		dev_err(&pdev->dev, "failed to setup keymap\n");
 		return err;
 	}
 
-	__set_bit(EV_REP, input_dev->evbit);
-	input_set_capability(input_dev, EV_MSC, MSC_SCAN);
+	__set_bit(EV_REP, kbc->idev->evbit);
+	input_set_capability(kbc->idev, EV_MSC, MSC_SCAN);
 
-	input_set_drvdata(input_dev, kbc);
+	input_set_drvdata(kbc->idev, kbc);
 
 	err = devm_request_irq(&pdev->dev, kbc->irq, tegra_kbc_isr,
 			  IRQF_NO_SUSPEND | IRQF_TRIGGER_HIGH, pdev->name, kbc);
@@ -680,7 +669,7 @@ static int tegra_kbc_probe(struct platform_device *pdev)
 	}
 
 	platform_set_drvdata(pdev, kbc);
-	device_init_wakeup(&pdev->dev, pdata->wakeup);
+	device_init_wakeup(&pdev->dev, kbc->wakeup);
 
 	return 0;
 }
diff --git a/include/linux/input/tegra_kbc.h b/include/linux/input/tegra_kbc.h
deleted file mode 100644
index a13025612939..000000000000
--- a/include/linux/input/tegra_kbc.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Platform definitions for tegra-kbc keyboard input driver
- *
- * Copyright (c) 2010-2011, NVIDIA Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
- */
-
-#ifndef ASMARM_ARCH_TEGRA_KBC_H
-#define ASMARM_ARCH_TEGRA_KBC_H
-
-#include <linux/types.h>
-#include <linux/input/matrix_keypad.h>
-
-#define KBC_MAX_GPIO	24
-#define KBC_MAX_KPENT	8
-
-#define KBC_MAX_ROW	16
-#define KBC_MAX_COL	8
-#define KBC_MAX_KEY	(KBC_MAX_ROW * KBC_MAX_COL)
-
-enum tegra_pin_type {
-	PIN_CFG_IGNORE,
-	PIN_CFG_COL,
-	PIN_CFG_ROW,
-};
-
-struct tegra_kbc_pin_cfg {
-	enum tegra_pin_type type;
-	unsigned char num;
-};
-
-struct tegra_kbc_wake_key {
-	u8 row:4;
-	u8 col:4;
-};
-
-struct tegra_kbc_platform_data {
-	unsigned int debounce_cnt;
-	unsigned int repeat_cnt;
-
-	struct tegra_kbc_pin_cfg pin_cfg[KBC_MAX_GPIO];
-	const struct matrix_keymap_data *keymap_data;
-
-	u32 wakeup_key;
-	bool wakeup;
-	bool use_fn_map;
-	bool use_ghost_filter;
-};
-#endif
-- 
cgit v1.2.3


From ac6324e7021dfa917ce4f9a836318c3e46fbb84e Mon Sep 17 00:00:00 2001
From: Pali Rohár <pali.rohar@gmail.com>
Date: Sun, 10 Feb 2013 18:32:18 +0100
Subject: bq2415x_charger: Add support for offline and 100mA mode
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Renamed mode BQ2415X_MODE_NONE to BQ2415X_MODE_OFF because this mode
  turning chaging completly off

* Added new mode BQ2415X_MODE_NONE which enable charging with maximal
  current limit 100mA (this is minimal safe value for bq2415x chips)

Signed-off-by: Pali Rohár <pali.rohar@gmail.com>
Signed-off-by: Anton Vorontsov <anton@enomsg.org>
---
 drivers/power/bq2415x_charger.c       | 25 ++++++++++++++++++-------
 include/linux/power/bq2415x_charger.h |  3 ++-
 2 files changed, 20 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/power/bq2415x_charger.c b/drivers/power/bq2415x_charger.c
index ca70365e9410..ca91396fc48e 100644
--- a/drivers/power/bq2415x_charger.c
+++ b/drivers/power/bq2415x_charger.c
@@ -733,12 +733,10 @@ static int bq2415x_set_mode(struct bq2415x_device *bq, enum bq2415x_mode mode)
 	int charger = 0;
 	int boost = 0;
 
-	if (mode == BQ2415X_MODE_HOST_CHARGER ||
-		mode == BQ2415X_MODE_DEDICATED_CHARGER)
-			charger = 1;
-
 	if (mode == BQ2415X_MODE_BOOST)
 		boost = 1;
+	else if (mode != BQ2415X_MODE_OFF)
+		charger = 1;
 
 	if (!charger)
 		ret = bq2415x_exec_command(bq, BQ2415X_CHARGER_DISABLE);
@@ -750,6 +748,10 @@ static int bq2415x_set_mode(struct bq2415x_device *bq, enum bq2415x_mode mode)
 		return ret;
 
 	switch (mode) {
+	case BQ2415X_MODE_OFF:
+		dev_dbg(bq->dev, "changing mode to: Offline\n");
+		ret = bq2415x_set_current_limit(bq, 100);
+		break;
 	case BQ2415X_MODE_NONE:
 		dev_dbg(bq->dev, "changing mode to: N/A\n");
 		ret = bq2415x_set_current_limit(bq, 100);
@@ -842,7 +844,7 @@ static void bq2415x_timer_error(struct bq2415x_device *bq, const char *msg)
 	dev_err(bq->dev, "%s\n", msg);
 	if (bq->automode > 0)
 		bq->automode = 0;
-	bq2415x_set_mode(bq, BQ2415X_MODE_NONE);
+	bq2415x_set_mode(bq, BQ2415X_MODE_OFF);
 	bq2415x_set_autotimer(bq, 0);
 }
 
@@ -1135,6 +1137,10 @@ static ssize_t bq2415x_sysfs_set_mode(struct device *dev,
 			return -ENOSYS;
 		bq->automode = 1;
 		mode = bq->reported_mode;
+	} else if (strncmp(buf, "off", 3) == 0) {
+		if (bq->automode > 0)
+			bq->automode = 0;
+		mode = BQ2415X_MODE_OFF;
 	} else if (strncmp(buf, "none", 4) == 0) {
 		if (bq->automode > 0)
 			bq->automode = 0;
@@ -1182,6 +1188,9 @@ static ssize_t bq2415x_sysfs_show_mode(struct device *dev,
 		ret += sprintf(buf+ret, "auto (");
 
 	switch (bq->mode) {
+	case BQ2415X_MODE_OFF:
+		ret += sprintf(buf+ret, "off");
+		break;
 	case BQ2415X_MODE_NONE:
 		ret += sprintf(buf+ret, "none");
 		break;
@@ -1216,6 +1225,8 @@ static ssize_t bq2415x_sysfs_show_reported_mode(struct device *dev,
 		return -EINVAL;
 
 	switch (bq->reported_mode) {
+	case BQ2415X_MODE_OFF:
+		return sprintf(buf, "off\n");
 	case BQ2415X_MODE_NONE:
 		return sprintf(buf, "none\n");
 	case BQ2415X_MODE_HOST_CHARGER:
@@ -1535,8 +1546,8 @@ static int bq2415x_probe(struct i2c_client *client,
 	bq->dev = &client->dev;
 	bq->chip = id->driver_data;
 	bq->name = name;
-	bq->mode = BQ2415X_MODE_NONE;
-	bq->reported_mode = BQ2415X_MODE_NONE;
+	bq->mode = BQ2415X_MODE_OFF;
+	bq->reported_mode = BQ2415X_MODE_OFF;
 	bq->autotimer = 0;
 	bq->automode = 0;
 
diff --git a/include/linux/power/bq2415x_charger.h b/include/linux/power/bq2415x_charger.h
index 97a1665eaeaf..8dcc0f46fc0a 100644
--- a/include/linux/power/bq2415x_charger.h
+++ b/include/linux/power/bq2415x_charger.h
@@ -75,7 +75,8 @@
 
 /* Supported modes with maximal current limit */
 enum bq2415x_mode {
-	BQ2415X_MODE_NONE,		/* unknown or no charger (100mA) */
+	BQ2415X_MODE_OFF,		/* offline mode (charger disabled) */
+	BQ2415X_MODE_NONE,		/* unknown charger (100mA) */
 	BQ2415X_MODE_HOST_CHARGER,	/* usb host/hub charger (500mA) */
 	BQ2415X_MODE_DEDICATED_CHARGER, /* dedicated charger (unlimited) */
 	BQ2415X_MODE_BOOST,		/* boost mode (charging disabled) */
-- 
cgit v1.2.3


From 558bd3e8dc7a798c5c845f90cf038b9bbd2df2b8 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Sat, 9 Feb 2013 21:51:27 -0500
Subject: PM idle: remove global declaration of pm_idle

pm_idle appears in no generic Linux code,
it appears only in architecture-specific code.

Thus, pm_idle should not be declared in pm.h.

Architectures that  use an idle function pointer
should delcare one local to their architecture,
and/or use cpuidle.

Signed-off-by: Len Brown <len.brown@intel.com>
Reviewed-by: Kevin Hilman <khilman@linaro.org>
Tested-by: Kevin Hilman <khilman@linaro.org>
Cc: linux-pm@vger.kernel.org
---
 include/linux/pm.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/pm.h b/include/linux/pm.h
index 03d7bb145311..97bcf23e045a 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -31,7 +31,6 @@
 /*
  * Callbacks for platform drivers to implement.
  */
-extern void (*pm_idle)(void);
 extern void (*pm_power_off)(void);
 extern void (*pm_power_off_prepare)(void);
 
-- 
cgit v1.2.3


From b4278c961aca320839964e23cfc7906ff61af0c2 Mon Sep 17 00:00:00 2001
From: Gao feng <gaofeng@cn.fujitsu.com>
Date: Mon, 18 Feb 2013 01:34:55 +0000
Subject: net: proc: remove proc_net_fops_create

proc_net_fops_create has been replaced by proc_create,
we can remove it now.

Signed-off-by: Gao feng <gaofeng@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 fs/proc/proc_net.c      | 8 --------
 include/linux/proc_fs.h | 3 ---
 2 files changed, 11 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index fe72cd073dea..30f7c678424b 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -177,14 +177,6 @@ const struct file_operations proc_net_operations = {
 	.readdir	= proc_tgid_net_readdir,
 };
 
-
-struct proc_dir_entry *proc_net_fops_create(struct net *net,
-	const char *name, umode_t mode, const struct file_operations *fops)
-{
-	return proc_create(name, mode, net->proc_net, fops);
-}
-EXPORT_SYMBOL_GPL(proc_net_fops_create);
-
 void proc_net_remove(struct net *net, const char *name)
 {
 	remove_proc_entry(name, net->proc_net);
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 32676b35d2f5..35ee1891ae25 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -171,8 +171,6 @@ static inline struct proc_dir_entry *create_proc_read_entry(const char *name,
 	return res;
 }
  
-extern struct proc_dir_entry *proc_net_fops_create(struct net *net,
-	const char *name, umode_t mode, const struct file_operations *fops);
 extern void proc_net_remove(struct net *net, const char *name);
 extern struct proc_dir_entry *proc_net_mkdir(struct net *net, const char *name,
 	struct proc_dir_entry *parent);
@@ -184,7 +182,6 @@ extern int proc_alloc_inum(unsigned int *pino);
 extern void proc_free_inum(unsigned int inum);
 #else
 
-#define proc_net_fops_create(net, name, mode, fops)  ({ (void)(mode), NULL; })
 static inline void proc_net_remove(struct net *net, const char *name) {}
 
 static inline void proc_flush_task(struct task_struct *task)
-- 
cgit v1.2.3


From c2399059a389ba686fa7f45d8913a708914752c4 Mon Sep 17 00:00:00 2001
From: Gao feng <gaofeng@cn.fujitsu.com>
Date: Mon, 18 Feb 2013 01:34:57 +0000
Subject: net: proc: remove proc_net_remove

proc_net_remove has been replaced by remove_proc_entry.
we can remove it now.

Signed-off-by: Gao feng <gaofeng@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 fs/proc/proc_net.c      | 6 ------
 include/linux/proc_fs.h | 3 ---
 2 files changed, 9 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index 30f7c678424b..3131a03d7d37 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -177,12 +177,6 @@ const struct file_operations proc_net_operations = {
 	.readdir	= proc_tgid_net_readdir,
 };
 
-void proc_net_remove(struct net *net, const char *name)
-{
-	remove_proc_entry(name, net->proc_net);
-}
-EXPORT_SYMBOL_GPL(proc_net_remove);
-
 static __net_init int proc_net_ns_init(struct net *net)
 {
 	struct proc_dir_entry *netd, *net_statd;
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 35ee1891ae25..319f69422667 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -171,7 +171,6 @@ static inline struct proc_dir_entry *create_proc_read_entry(const char *name,
 	return res;
 }
  
-extern void proc_net_remove(struct net *net, const char *name);
 extern struct proc_dir_entry *proc_net_mkdir(struct net *net, const char *name,
 	struct proc_dir_entry *parent);
 
@@ -182,8 +181,6 @@ extern int proc_alloc_inum(unsigned int *pino);
 extern void proc_free_inum(unsigned int inum);
 #else
 
-static inline void proc_net_remove(struct net *net, const char *name) {}
-
 static inline void proc_flush_task(struct task_struct *task)
 {
 }
-- 
cgit v1.2.3


From 900ff8c6321418dafa03c22e215cb9646a2541b9 Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Mon, 18 Feb 2013 19:20:33 +0000
Subject: net: move procfs code to net/core/net-procfs.c

Similar to net/core/net-sysfs.c, group procfs code to
a single unit.

Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  36 +++-
 net/core/Makefile         |   1 +
 net/core/dev.c            | 384 +-----------------------------------------
 net/core/dev_addr_lists.c |  74 ---------
 net/core/net-procfs.c     | 414 ++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 450 insertions(+), 459 deletions(-)
 create mode 100644 net/core/net-procfs.c

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 920361bc27e7..f111b4f038f3 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2692,9 +2692,9 @@ extern void		net_enable_timestamp(void);
 extern void		net_disable_timestamp(void);
 
 #ifdef CONFIG_PROC_FS
-extern void *dev_seq_start(struct seq_file *seq, loff_t *pos);
-extern void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos);
-extern void dev_seq_stop(struct seq_file *seq, void *v);
+extern int __init dev_proc_init(void);
+#else
+#define dev_proc_init() 0
 #endif
 
 extern int netdev_class_create_file(struct class_attribute *class_attr);
@@ -2896,4 +2896,34 @@ do {								\
 })
 #endif
 
+/*
+ *	The list of packet types we will receive (as opposed to discard)
+ *	and the routines to invoke.
+ *
+ *	Why 16. Because with 16 the only overlap we get on a hash of the
+ *	low nibble of the protocol value is RARP/SNAP/X.25.
+ *
+ *      NOTE:  That is no longer true with the addition of VLAN tags.  Not
+ *             sure which should go first, but I bet it won't make much
+ *             difference if we are running VLANs.  The good news is that
+ *             this protocol won't be in the list unless compiled in, so
+ *             the average user (w/out VLANs) will not be adversely affected.
+ *             --BLG
+ *
+ *		0800	IP
+ *		8100    802.1Q VLAN
+ *		0001	802.3
+ *		0002	AX.25
+ *		0004	802.2
+ *		8035	RARP
+ *		0005	SNAP
+ *		0805	X.25
+ *		0806	ARP
+ *		8137	IPX
+ *		0009	Localtalk
+ *		86DD	IPv6
+ */
+#define PTYPE_HASH_SIZE	(16)
+#define PTYPE_HASH_MASK	(PTYPE_HASH_SIZE - 1)
+
 #endif	/* _LINUX_NETDEVICE_H */
diff --git a/net/core/Makefile b/net/core/Makefile
index 0c5e3618c80b..b33b996f5dd6 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -13,6 +13,7 @@ obj-y		     += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \
 
 obj-$(CONFIG_XFRM) += flow.o
 obj-y += net-sysfs.o
+obj-$(CONFIG_PROC_FS) += net-procfs.o
 obj-$(CONFIG_NET_PKTGEN) += pktgen.o
 obj-$(CONFIG_NETPOLL) += netpoll.o
 obj-$(CONFIG_NET_DMA) += user_dma.o
diff --git a/net/core/dev.c b/net/core/dev.c
index decf55f9ad80..8d9ddb09f208 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -97,8 +97,6 @@
 #include <net/net_namespace.h>
 #include <net/sock.h>
 #include <linux/rtnetlink.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
 #include <linux/stat.h>
 #include <net/dst.h>
 #include <net/pkt_sched.h>
@@ -110,7 +108,6 @@
 #include <linux/netpoll.h>
 #include <linux/rcupdate.h>
 #include <linux/delay.h>
-#include <net/wext.h>
 #include <net/iw_handler.h>
 #include <asm/current.h>
 #include <linux/audit.h>
@@ -141,41 +138,10 @@
 /* This should be increased if a protocol with a bigger head is added. */
 #define GRO_MAX_HEAD (MAX_HEADER + 128)
 
-/*
- *	The list of packet types we will receive (as opposed to discard)
- *	and the routines to invoke.
- *
- *	Why 16. Because with 16 the only overlap we get on a hash of the
- *	low nibble of the protocol value is RARP/SNAP/X.25.
- *
- *      NOTE:  That is no longer true with the addition of VLAN tags.  Not
- *             sure which should go first, but I bet it won't make much
- *             difference if we are running VLANs.  The good news is that
- *             this protocol won't be in the list unless compiled in, so
- *             the average user (w/out VLANs) will not be adversely affected.
- *             --BLG
- *
- *		0800	IP
- *		8100    802.1Q VLAN
- *		0001	802.3
- *		0002	AX.25
- *		0004	802.2
- *		8035	RARP
- *		0005	SNAP
- *		0805	X.25
- *		0806	ARP
- *		8137	IPX
- *		0009	Localtalk
- *		86DD	IPv6
- */
-
-#define PTYPE_HASH_SIZE	(16)
-#define PTYPE_HASH_MASK	(PTYPE_HASH_SIZE - 1)
-
 static DEFINE_SPINLOCK(ptype_lock);
 static DEFINE_SPINLOCK(offload_lock);
-static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
-static struct list_head ptype_all __read_mostly;	/* Taps */
+struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
+struct list_head ptype_all __read_mostly;	/* Taps */
 static struct list_head offload_base __read_mostly;
 
 /*
@@ -4217,352 +4183,6 @@ softnet_break:
 	goto out;
 }
 
-#ifdef CONFIG_PROC_FS
-
-#define BUCKET_SPACE (32 - NETDEV_HASHBITS - 1)
-
-#define get_bucket(x) ((x) >> BUCKET_SPACE)
-#define get_offset(x) ((x) & ((1 << BUCKET_SPACE) - 1))
-#define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
-
-static inline struct net_device *dev_from_same_bucket(struct seq_file *seq, loff_t *pos)
-{
-	struct net *net = seq_file_net(seq);
-	struct net_device *dev;
-	struct hlist_node *p;
-	struct hlist_head *h;
-	unsigned int count = 0, offset = get_offset(*pos);
-
-	h = &net->dev_name_head[get_bucket(*pos)];
-	hlist_for_each_entry_rcu(dev, p, h, name_hlist) {
-		if (++count == offset)
-			return dev;
-	}
-
-	return NULL;
-}
-
-static inline struct net_device *dev_from_bucket(struct seq_file *seq, loff_t *pos)
-{
-	struct net_device *dev;
-	unsigned int bucket;
-
-	do {
-		dev = dev_from_same_bucket(seq, pos);
-		if (dev)
-			return dev;
-
-		bucket = get_bucket(*pos) + 1;
-		*pos = set_bucket_offset(bucket, 1);
-	} while (bucket < NETDEV_HASHENTRIES);
-
-	return NULL;
-}
-
-/*
- *	This is invoked by the /proc filesystem handler to display a device
- *	in detail.
- */
-void *dev_seq_start(struct seq_file *seq, loff_t *pos)
-	__acquires(RCU)
-{
-	rcu_read_lock();
-	if (!*pos)
-		return SEQ_START_TOKEN;
-
-	if (get_bucket(*pos) >= NETDEV_HASHENTRIES)
-		return NULL;
-
-	return dev_from_bucket(seq, pos);
-}
-
-void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
-	++*pos;
-	return dev_from_bucket(seq, pos);
-}
-
-void dev_seq_stop(struct seq_file *seq, void *v)
-	__releases(RCU)
-{
-	rcu_read_unlock();
-}
-
-static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
-{
-	struct rtnl_link_stats64 temp;
-	const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp);
-
-	seq_printf(seq, "%6s: %7llu %7llu %4llu %4llu %4llu %5llu %10llu %9llu "
-		   "%8llu %7llu %4llu %4llu %4llu %5llu %7llu %10llu\n",
-		   dev->name, stats->rx_bytes, stats->rx_packets,
-		   stats->rx_errors,
-		   stats->rx_dropped + stats->rx_missed_errors,
-		   stats->rx_fifo_errors,
-		   stats->rx_length_errors + stats->rx_over_errors +
-		    stats->rx_crc_errors + stats->rx_frame_errors,
-		   stats->rx_compressed, stats->multicast,
-		   stats->tx_bytes, stats->tx_packets,
-		   stats->tx_errors, stats->tx_dropped,
-		   stats->tx_fifo_errors, stats->collisions,
-		   stats->tx_carrier_errors +
-		    stats->tx_aborted_errors +
-		    stats->tx_window_errors +
-		    stats->tx_heartbeat_errors,
-		   stats->tx_compressed);
-}
-
-/*
- *	Called from the PROCfs module. This now uses the new arbitrary sized
- *	/proc/net interface to create /proc/net/dev
- */
-static int dev_seq_show(struct seq_file *seq, void *v)
-{
-	if (v == SEQ_START_TOKEN)
-		seq_puts(seq, "Inter-|   Receive                            "
-			      "                    |  Transmit\n"
-			      " face |bytes    packets errs drop fifo frame "
-			      "compressed multicast|bytes    packets errs "
-			      "drop fifo colls carrier compressed\n");
-	else
-		dev_seq_printf_stats(seq, v);
-	return 0;
-}
-
-static struct softnet_data *softnet_get_online(loff_t *pos)
-{
-	struct softnet_data *sd = NULL;
-
-	while (*pos < nr_cpu_ids)
-		if (cpu_online(*pos)) {
-			sd = &per_cpu(softnet_data, *pos);
-			break;
-		} else
-			++*pos;
-	return sd;
-}
-
-static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
-{
-	return softnet_get_online(pos);
-}
-
-static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
-	++*pos;
-	return softnet_get_online(pos);
-}
-
-static void softnet_seq_stop(struct seq_file *seq, void *v)
-{
-}
-
-static int softnet_seq_show(struct seq_file *seq, void *v)
-{
-	struct softnet_data *sd = v;
-
-	seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
-		   sd->processed, sd->dropped, sd->time_squeeze, 0,
-		   0, 0, 0, 0, /* was fastroute */
-		   sd->cpu_collision, sd->received_rps);
-	return 0;
-}
-
-static const struct seq_operations dev_seq_ops = {
-	.start = dev_seq_start,
-	.next  = dev_seq_next,
-	.stop  = dev_seq_stop,
-	.show  = dev_seq_show,
-};
-
-static int dev_seq_open(struct inode *inode, struct file *file)
-{
-	return seq_open_net(inode, file, &dev_seq_ops,
-			    sizeof(struct seq_net_private));
-}
-
-static const struct file_operations dev_seq_fops = {
-	.owner	 = THIS_MODULE,
-	.open    = dev_seq_open,
-	.read    = seq_read,
-	.llseek  = seq_lseek,
-	.release = seq_release_net,
-};
-
-static const struct seq_operations softnet_seq_ops = {
-	.start = softnet_seq_start,
-	.next  = softnet_seq_next,
-	.stop  = softnet_seq_stop,
-	.show  = softnet_seq_show,
-};
-
-static int softnet_seq_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &softnet_seq_ops);
-}
-
-static const struct file_operations softnet_seq_fops = {
-	.owner	 = THIS_MODULE,
-	.open    = softnet_seq_open,
-	.read    = seq_read,
-	.llseek  = seq_lseek,
-	.release = seq_release,
-};
-
-static void *ptype_get_idx(loff_t pos)
-{
-	struct packet_type *pt = NULL;
-	loff_t i = 0;
-	int t;
-
-	list_for_each_entry_rcu(pt, &ptype_all, list) {
-		if (i == pos)
-			return pt;
-		++i;
-	}
-
-	for (t = 0; t < PTYPE_HASH_SIZE; t++) {
-		list_for_each_entry_rcu(pt, &ptype_base[t], list) {
-			if (i == pos)
-				return pt;
-			++i;
-		}
-	}
-	return NULL;
-}
-
-static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
-	__acquires(RCU)
-{
-	rcu_read_lock();
-	return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
-}
-
-static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
-	struct packet_type *pt;
-	struct list_head *nxt;
-	int hash;
-
-	++*pos;
-	if (v == SEQ_START_TOKEN)
-		return ptype_get_idx(0);
-
-	pt = v;
-	nxt = pt->list.next;
-	if (pt->type == htons(ETH_P_ALL)) {
-		if (nxt != &ptype_all)
-			goto found;
-		hash = 0;
-		nxt = ptype_base[0].next;
-	} else
-		hash = ntohs(pt->type) & PTYPE_HASH_MASK;
-
-	while (nxt == &ptype_base[hash]) {
-		if (++hash >= PTYPE_HASH_SIZE)
-			return NULL;
-		nxt = ptype_base[hash].next;
-	}
-found:
-	return list_entry(nxt, struct packet_type, list);
-}
-
-static void ptype_seq_stop(struct seq_file *seq, void *v)
-	__releases(RCU)
-{
-	rcu_read_unlock();
-}
-
-static int ptype_seq_show(struct seq_file *seq, void *v)
-{
-	struct packet_type *pt = v;
-
-	if (v == SEQ_START_TOKEN)
-		seq_puts(seq, "Type Device      Function\n");
-	else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) {
-		if (pt->type == htons(ETH_P_ALL))
-			seq_puts(seq, "ALL ");
-		else
-			seq_printf(seq, "%04x", ntohs(pt->type));
-
-		seq_printf(seq, " %-8s %pF\n",
-			   pt->dev ? pt->dev->name : "", pt->func);
-	}
-
-	return 0;
-}
-
-static const struct seq_operations ptype_seq_ops = {
-	.start = ptype_seq_start,
-	.next  = ptype_seq_next,
-	.stop  = ptype_seq_stop,
-	.show  = ptype_seq_show,
-};
-
-static int ptype_seq_open(struct inode *inode, struct file *file)
-{
-	return seq_open_net(inode, file, &ptype_seq_ops,
-			sizeof(struct seq_net_private));
-}
-
-static const struct file_operations ptype_seq_fops = {
-	.owner	 = THIS_MODULE,
-	.open    = ptype_seq_open,
-	.read    = seq_read,
-	.llseek  = seq_lseek,
-	.release = seq_release_net,
-};
-
-
-static int __net_init dev_proc_net_init(struct net *net)
-{
-	int rc = -ENOMEM;
-
-	if (!proc_create("dev", S_IRUGO, net->proc_net, &dev_seq_fops))
-		goto out;
-	if (!proc_create("softnet_stat", S_IRUGO, net->proc_net,
-			 &softnet_seq_fops))
-		goto out_dev;
-	if (!proc_create("ptype", S_IRUGO, net->proc_net, &ptype_seq_fops))
-		goto out_softnet;
-
-	if (wext_proc_init(net))
-		goto out_ptype;
-	rc = 0;
-out:
-	return rc;
-out_ptype:
-	remove_proc_entry("ptype", net->proc_net);
-out_softnet:
-	remove_proc_entry("softnet_stat", net->proc_net);
-out_dev:
-	remove_proc_entry("dev", net->proc_net);
-	goto out;
-}
-
-static void __net_exit dev_proc_net_exit(struct net *net)
-{
-	wext_proc_exit(net);
-
-	remove_proc_entry("ptype", net->proc_net);
-	remove_proc_entry("softnet_stat", net->proc_net);
-	remove_proc_entry("dev", net->proc_net);
-}
-
-static struct pernet_operations __net_initdata dev_proc_ops = {
-	.init = dev_proc_net_init,
-	.exit = dev_proc_net_exit,
-};
-
-static int __init dev_proc_init(void)
-{
-	return register_pernet_subsys(&dev_proc_ops);
-}
-#else
-#define dev_proc_init() 0
-#endif	/* CONFIG_PROC_FS */
-
-
 struct netdev_upper {
 	struct net_device *dev;
 	bool master;
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index 89562529df45..bd2eb9d3e369 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -15,7 +15,6 @@
 #include <linux/rtnetlink.h>
 #include <linux/export.h>
 #include <linux/list.h>
-#include <linux/proc_fs.h>
 
 /*
  * General list handling functions
@@ -727,76 +726,3 @@ void dev_mc_init(struct net_device *dev)
 	__hw_addr_init(&dev->mc);
 }
 EXPORT_SYMBOL(dev_mc_init);
-
-#ifdef CONFIG_PROC_FS
-#include <linux/seq_file.h>
-
-static int dev_mc_seq_show(struct seq_file *seq, void *v)
-{
-	struct netdev_hw_addr *ha;
-	struct net_device *dev = v;
-
-	if (v == SEQ_START_TOKEN)
-		return 0;
-
-	netif_addr_lock_bh(dev);
-	netdev_for_each_mc_addr(ha, dev) {
-		int i;
-
-		seq_printf(seq, "%-4d %-15s %-5d %-5d ", dev->ifindex,
-			   dev->name, ha->refcount, ha->global_use);
-
-		for (i = 0; i < dev->addr_len; i++)
-			seq_printf(seq, "%02x", ha->addr[i]);
-
-		seq_putc(seq, '\n');
-	}
-	netif_addr_unlock_bh(dev);
-	return 0;
-}
-
-static const struct seq_operations dev_mc_seq_ops = {
-	.start = dev_seq_start,
-	.next  = dev_seq_next,
-	.stop  = dev_seq_stop,
-	.show  = dev_mc_seq_show,
-};
-
-static int dev_mc_seq_open(struct inode *inode, struct file *file)
-{
-	return seq_open_net(inode, file, &dev_mc_seq_ops,
-			    sizeof(struct seq_net_private));
-}
-
-static const struct file_operations dev_mc_seq_fops = {
-	.owner	 = THIS_MODULE,
-	.open    = dev_mc_seq_open,
-	.read    = seq_read,
-	.llseek  = seq_lseek,
-	.release = seq_release_net,
-};
-
-#endif
-
-static int __net_init dev_mc_net_init(struct net *net)
-{
-	if (!proc_create("dev_mcast", 0, net->proc_net, &dev_mc_seq_fops))
-		return -ENOMEM;
-	return 0;
-}
-
-static void __net_exit dev_mc_net_exit(struct net *net)
-{
-	remove_proc_entry("dev_mcast", net->proc_net);
-}
-
-static struct pernet_operations __net_initdata dev_mc_net_ops = {
-	.init = dev_mc_net_init,
-	.exit = dev_mc_net_exit,
-};
-
-void __init dev_mcast_init(void)
-{
-	register_pernet_subsys(&dev_mc_net_ops);
-}
-
diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c
new file mode 100644
index 000000000000..ac87066491e9
--- /dev/null
+++ b/net/core/net-procfs.c
@@ -0,0 +1,414 @@
+#include <linux/netdevice.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <net/wext.h>
+
+#define BUCKET_SPACE (32 - NETDEV_HASHBITS - 1)
+
+#define get_bucket(x) ((x) >> BUCKET_SPACE)
+#define get_offset(x) ((x) & ((1 << BUCKET_SPACE) - 1))
+#define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
+
+extern struct list_head ptype_all __read_mostly;
+extern struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
+
+static inline struct net_device *dev_from_same_bucket(struct seq_file *seq, loff_t *pos)
+{
+	struct net *net = seq_file_net(seq);
+	struct net_device *dev;
+	struct hlist_node *p;
+	struct hlist_head *h;
+	unsigned int count = 0, offset = get_offset(*pos);
+
+	h = &net->dev_name_head[get_bucket(*pos)];
+	hlist_for_each_entry_rcu(dev, p, h, name_hlist) {
+		if (++count == offset)
+			return dev;
+	}
+
+	return NULL;
+}
+
+static inline struct net_device *dev_from_bucket(struct seq_file *seq, loff_t *pos)
+{
+	struct net_device *dev;
+	unsigned int bucket;
+
+	do {
+		dev = dev_from_same_bucket(seq, pos);
+		if (dev)
+			return dev;
+
+		bucket = get_bucket(*pos) + 1;
+		*pos = set_bucket_offset(bucket, 1);
+	} while (bucket < NETDEV_HASHENTRIES);
+
+	return NULL;
+}
+
+/*
+ *	This is invoked by the /proc filesystem handler to display a device
+ *	in detail.
+ */
+static void *dev_seq_start(struct seq_file *seq, loff_t *pos)
+	__acquires(RCU)
+{
+	rcu_read_lock();
+	if (!*pos)
+		return SEQ_START_TOKEN;
+
+	if (get_bucket(*pos) >= NETDEV_HASHENTRIES)
+		return NULL;
+
+	return dev_from_bucket(seq, pos);
+}
+
+static void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	++*pos;
+	return dev_from_bucket(seq, pos);
+}
+
+static void dev_seq_stop(struct seq_file *seq, void *v)
+	__releases(RCU)
+{
+	rcu_read_unlock();
+}
+
+static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
+{
+	struct rtnl_link_stats64 temp;
+	const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp);
+
+	seq_printf(seq, "%6s: %7llu %7llu %4llu %4llu %4llu %5llu %10llu %9llu "
+		   "%8llu %7llu %4llu %4llu %4llu %5llu %7llu %10llu\n",
+		   dev->name, stats->rx_bytes, stats->rx_packets,
+		   stats->rx_errors,
+		   stats->rx_dropped + stats->rx_missed_errors,
+		   stats->rx_fifo_errors,
+		   stats->rx_length_errors + stats->rx_over_errors +
+		    stats->rx_crc_errors + stats->rx_frame_errors,
+		   stats->rx_compressed, stats->multicast,
+		   stats->tx_bytes, stats->tx_packets,
+		   stats->tx_errors, stats->tx_dropped,
+		   stats->tx_fifo_errors, stats->collisions,
+		   stats->tx_carrier_errors +
+		    stats->tx_aborted_errors +
+		    stats->tx_window_errors +
+		    stats->tx_heartbeat_errors,
+		   stats->tx_compressed);
+}
+
+/*
+ *	Called from the PROCfs module. This now uses the new arbitrary sized
+ *	/proc/net interface to create /proc/net/dev
+ */
+static int dev_seq_show(struct seq_file *seq, void *v)
+{
+	if (v == SEQ_START_TOKEN)
+		seq_puts(seq, "Inter-|   Receive                            "
+			      "                    |  Transmit\n"
+			      " face |bytes    packets errs drop fifo frame "
+			      "compressed multicast|bytes    packets errs "
+			      "drop fifo colls carrier compressed\n");
+	else
+		dev_seq_printf_stats(seq, v);
+	return 0;
+}
+
+static struct softnet_data *softnet_get_online(loff_t *pos)
+{
+	struct softnet_data *sd = NULL;
+
+	while (*pos < nr_cpu_ids)
+		if (cpu_online(*pos)) {
+			sd = &per_cpu(softnet_data, *pos);
+			break;
+		} else
+			++*pos;
+	return sd;
+}
+
+static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	return softnet_get_online(pos);
+}
+
+static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	++*pos;
+	return softnet_get_online(pos);
+}
+
+static void softnet_seq_stop(struct seq_file *seq, void *v)
+{
+}
+
+static int softnet_seq_show(struct seq_file *seq, void *v)
+{
+	struct softnet_data *sd = v;
+
+	seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
+		   sd->processed, sd->dropped, sd->time_squeeze, 0,
+		   0, 0, 0, 0, /* was fastroute */
+		   sd->cpu_collision, sd->received_rps);
+	return 0;
+}
+
+static const struct seq_operations dev_seq_ops = {
+	.start = dev_seq_start,
+	.next  = dev_seq_next,
+	.stop  = dev_seq_stop,
+	.show  = dev_seq_show,
+};
+
+static int dev_seq_open(struct inode *inode, struct file *file)
+{
+	return seq_open_net(inode, file, &dev_seq_ops,
+			    sizeof(struct seq_net_private));
+}
+
+static const struct file_operations dev_seq_fops = {
+	.owner	 = THIS_MODULE,
+	.open    = dev_seq_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = seq_release_net,
+};
+
+static const struct seq_operations softnet_seq_ops = {
+	.start = softnet_seq_start,
+	.next  = softnet_seq_next,
+	.stop  = softnet_seq_stop,
+	.show  = softnet_seq_show,
+};
+
+static int softnet_seq_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &softnet_seq_ops);
+}
+
+static const struct file_operations softnet_seq_fops = {
+	.owner	 = THIS_MODULE,
+	.open    = softnet_seq_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = seq_release,
+};
+
+static void *ptype_get_idx(loff_t pos)
+{
+	struct packet_type *pt = NULL;
+	loff_t i = 0;
+	int t;
+
+	list_for_each_entry_rcu(pt, &ptype_all, list) {
+		if (i == pos)
+			return pt;
+		++i;
+	}
+
+	for (t = 0; t < PTYPE_HASH_SIZE; t++) {
+		list_for_each_entry_rcu(pt, &ptype_base[t], list) {
+			if (i == pos)
+				return pt;
+			++i;
+		}
+	}
+	return NULL;
+}
+
+static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
+	__acquires(RCU)
+{
+	rcu_read_lock();
+	return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
+}
+
+static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct packet_type *pt;
+	struct list_head *nxt;
+	int hash;
+
+	++*pos;
+	if (v == SEQ_START_TOKEN)
+		return ptype_get_idx(0);
+
+	pt = v;
+	nxt = pt->list.next;
+	if (pt->type == htons(ETH_P_ALL)) {
+		if (nxt != &ptype_all)
+			goto found;
+		hash = 0;
+		nxt = ptype_base[0].next;
+	} else
+		hash = ntohs(pt->type) & PTYPE_HASH_MASK;
+
+	while (nxt == &ptype_base[hash]) {
+		if (++hash >= PTYPE_HASH_SIZE)
+			return NULL;
+		nxt = ptype_base[hash].next;
+	}
+found:
+	return list_entry(nxt, struct packet_type, list);
+}
+
+static void ptype_seq_stop(struct seq_file *seq, void *v)
+	__releases(RCU)
+{
+	rcu_read_unlock();
+}
+
+static int ptype_seq_show(struct seq_file *seq, void *v)
+{
+	struct packet_type *pt = v;
+
+	if (v == SEQ_START_TOKEN)
+		seq_puts(seq, "Type Device      Function\n");
+	else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) {
+		if (pt->type == htons(ETH_P_ALL))
+			seq_puts(seq, "ALL ");
+		else
+			seq_printf(seq, "%04x", ntohs(pt->type));
+
+		seq_printf(seq, " %-8s %pF\n",
+			   pt->dev ? pt->dev->name : "", pt->func);
+	}
+
+	return 0;
+}
+
+static const struct seq_operations ptype_seq_ops = {
+	.start = ptype_seq_start,
+	.next  = ptype_seq_next,
+	.stop  = ptype_seq_stop,
+	.show  = ptype_seq_show,
+};
+
+static int ptype_seq_open(struct inode *inode, struct file *file)
+{
+	return seq_open_net(inode, file, &ptype_seq_ops,
+			sizeof(struct seq_net_private));
+}
+
+static const struct file_operations ptype_seq_fops = {
+	.owner	 = THIS_MODULE,
+	.open    = ptype_seq_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = seq_release_net,
+};
+
+
+static int __net_init dev_proc_net_init(struct net *net)
+{
+	int rc = -ENOMEM;
+
+	if (!proc_create("dev", S_IRUGO, net->proc_net, &dev_seq_fops))
+		goto out;
+	if (!proc_create("softnet_stat", S_IRUGO, net->proc_net,
+			 &softnet_seq_fops))
+		goto out_dev;
+	if (!proc_create("ptype", S_IRUGO, net->proc_net, &ptype_seq_fops))
+		goto out_softnet;
+
+	if (wext_proc_init(net))
+		goto out_ptype;
+	rc = 0;
+out:
+	return rc;
+out_ptype:
+	remove_proc_entry("ptype", net->proc_net);
+out_softnet:
+	remove_proc_entry("softnet_stat", net->proc_net);
+out_dev:
+	remove_proc_entry("dev", net->proc_net);
+	goto out;
+}
+
+static void __net_exit dev_proc_net_exit(struct net *net)
+{
+	wext_proc_exit(net);
+
+	remove_proc_entry("ptype", net->proc_net);
+	remove_proc_entry("softnet_stat", net->proc_net);
+	remove_proc_entry("dev", net->proc_net);
+}
+
+static struct pernet_operations __net_initdata dev_proc_ops = {
+	.init = dev_proc_net_init,
+	.exit = dev_proc_net_exit,
+};
+
+int __init dev_proc_init(void)
+{
+	return register_pernet_subsys(&dev_proc_ops);
+}
+
+static int dev_mc_seq_show(struct seq_file *seq, void *v)
+{
+	struct netdev_hw_addr *ha;
+	struct net_device *dev = v;
+
+	if (v == SEQ_START_TOKEN)
+		return 0;
+
+	netif_addr_lock_bh(dev);
+	netdev_for_each_mc_addr(ha, dev) {
+		int i;
+
+		seq_printf(seq, "%-4d %-15s %-5d %-5d ", dev->ifindex,
+			   dev->name, ha->refcount, ha->global_use);
+
+		for (i = 0; i < dev->addr_len; i++)
+			seq_printf(seq, "%02x", ha->addr[i]);
+
+		seq_putc(seq, '\n');
+	}
+	netif_addr_unlock_bh(dev);
+	return 0;
+}
+
+static const struct seq_operations dev_mc_seq_ops = {
+	.start = dev_seq_start,
+	.next  = dev_seq_next,
+	.stop  = dev_seq_stop,
+	.show  = dev_mc_seq_show,
+};
+
+static int dev_mc_seq_open(struct inode *inode, struct file *file)
+{
+	return seq_open_net(inode, file, &dev_mc_seq_ops,
+			    sizeof(struct seq_net_private));
+}
+
+static const struct file_operations dev_mc_seq_fops = {
+	.owner	 = THIS_MODULE,
+	.open    = dev_mc_seq_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = seq_release_net,
+};
+
+static int __net_init dev_mc_net_init(struct net *net)
+{
+	if (!proc_create("dev_mcast", 0, net->proc_net, &dev_mc_seq_fops))
+		return -ENOMEM;
+	return 0;
+}
+
+static void __net_exit dev_mc_net_exit(struct net *net)
+{
+	remove_proc_entry("dev_mcast", net->proc_net);
+}
+
+static struct pernet_operations __net_initdata dev_mc_net_ops = {
+	.init = dev_mc_net_init,
+	.exit = dev_mc_net_exit,
+};
+
+void __init dev_mcast_init(void)
+{
+	register_pernet_subsys(&dev_mc_net_ops);
+}
-- 
cgit v1.2.3


From 77852fea6e2442a0e654a9292060489895de18c7 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 16 Feb 2013 09:46:48 +0100
Subject: sched/rt: Add <linux/sched/rt.h> header to <linux/init_task.h>

IA64 relied on it through sched.h inclusion:

  arch/ia64/kernel/init_task.c:38:11: error: 'MAX_PRIO' undeclared here (not in a function)
  arch/ia64/kernel/init_task.c:38:11: error: 'RR_TIMESLICE' undeclared here (not in a function)

Reported-by: kbuild test robot <fengguang.wu@intel.com>
Cc: Clark Williams <williams@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/n/tip-xaan1twswggedMR0airtpjui@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/init_task.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index cc898b871cef..5cd0f0949927 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -12,6 +12,7 @@
 #include <linux/securebits.h>
 #include <linux/seqlock.h>
 #include <net/net_namespace.h>
+#include <linux/sched/rt.h>
 
 #ifdef CONFIG_SMP
 # define INIT_PUSHABLE_TASKS(tsk)					\
-- 
cgit v1.2.3


From 4fc1a601f147abe3bfb4d70fe718110ed21953e1 Mon Sep 17 00:00:00 2001
From: Gao feng <gaofeng@cn.fujitsu.com>
Date: Tue, 19 Feb 2013 00:43:10 +0000
Subject: net: proc: fix build failed when procfs is not configured

commit d4beaa66add8aebf83ab16d2fde4e4de8dac36df
"net: proc: change proc_net_fops_create to proc_create"
uses proc_create to replace proc_net_fops_create, when
CONFIG_PROC isn't configured, some build error will
occurs.

net/packet/af_packet.c: In function 'packet_net_init':
net/packet/af_packet.c:3831:48: error: 'packet_seq_fops' undeclared (first use in this function)
net/packet/af_packet.c:3831:48: note: each undeclared identifier is reported only once for each function it appears in

There may be other build fails like above,this patch
change proc_create from function to macros when CONFIG_PROC
is not configured,just like what proc_net_fops_create did
before this commit.

Reported-by: Fengguang Wu <fengguang.wu@intel.com>
Signed-off-by: Gao feng <gaofeng@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/proc_fs.h | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 319f69422667..d0a1f2ca1c3f 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -187,12 +187,9 @@ static inline void proc_flush_task(struct task_struct *task)
 
 static inline struct proc_dir_entry *create_proc_entry(const char *name,
 	umode_t mode, struct proc_dir_entry *parent) { return NULL; }
-static inline struct proc_dir_entry *proc_create(const char *name,
-	umode_t mode, struct proc_dir_entry *parent,
-	const struct file_operations *proc_fops)
-{
-	return NULL;
-}
+
+#define proc_create(name, mode, parent, fops)  ({ (void)(mode), NULL; })
+
 static inline struct proc_dir_entry *proc_create_data(const char *name,
 	umode_t mode, struct proc_dir_entry *parent,
 	const struct file_operations *proc_fops, void *data)
-- 
cgit v1.2.3


From cd0615746ba0f6643fb984345ae6ee0b73404ca6 Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Tue, 19 Feb 2013 02:47:05 +0000
Subject: net: fix a build failure when !CONFIG_PROC_FS

When !CONFIG_PROC_FS dev_mcast_init() is not defined,
actually we can just merge dev_mcast_init() into
dev_proc_init().

Reported-by: Gao feng <gaofeng@cn.fujitsu.com>
Cc: Gao feng <gaofeng@cn.fujitsu.com>
Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  1 -
 net/core/dev.c            |  1 -
 net/core/net-procfs.c     | 12 +++++-------
 3 files changed, 5 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index f111b4f038f3..b3d00fa4b314 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2646,7 +2646,6 @@ extern void		netdev_notify_peers(struct net_device *dev);
 extern void		netdev_features_change(struct net_device *dev);
 /* Load a device via the kmod */
 extern void		dev_load(struct net *net, const char *name);
-extern void		dev_mcast_init(void);
 extern struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
 					       struct rtnl_link_stats64 *storage);
 extern void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
diff --git a/net/core/dev.c b/net/core/dev.c
index 8d9ddb09f208..17bc535115d3 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6260,7 +6260,6 @@ static int __init net_dev_init(void)
 
 	hotcpu_notifier(dev_cpu_callback, 0);
 	dst_init();
-	dev_mcast_init();
 	rc = 0;
 out:
 	return rc;
diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c
index ac87066491e9..0f6bb6f8d391 100644
--- a/net/core/net-procfs.c
+++ b/net/core/net-procfs.c
@@ -341,11 +341,6 @@ static struct pernet_operations __net_initdata dev_proc_ops = {
 	.exit = dev_proc_net_exit,
 };
 
-int __init dev_proc_init(void)
-{
-	return register_pernet_subsys(&dev_proc_ops);
-}
-
 static int dev_mc_seq_show(struct seq_file *seq, void *v)
 {
 	struct netdev_hw_addr *ha;
@@ -408,7 +403,10 @@ static struct pernet_operations __net_initdata dev_mc_net_ops = {
 	.exit = dev_mc_net_exit,
 };
 
-void __init dev_mcast_init(void)
+int __init dev_proc_init(void)
 {
-	register_pernet_subsys(&dev_mc_net_ops);
+	int ret = register_pernet_subsys(&dev_proc_ops);
+	if (!ret)
+		return register_pernet_subsys(&dev_mc_net_ops);
+	return ret;
 }
-- 
cgit v1.2.3