From 967fc04671feea4dbf780c9e55a0bc8fcf68a14e Mon Sep 17 00:00:00 2001
From: Gregory Haskins <ghaskins@novell.com>
Date: Mon, 29 Dec 2008 09:39:52 -0500
Subject: sched: add sched_class->needs_post_schedule() member

We currently run class->post_schedule() outside of the rq->lock, which
means that we need to test for the need to post_schedule outside of
the lock to avoid a forced reacquistion.  This is currently not a problem
as we only look at rq->rt.overloaded.  However, we want to enhance this
going forward to look at more state to reduce the need to post_schedule to
a bare minimum set.  Therefore, we introduce a new member-func called
needs_post_schedule() which tests for the post_schedule condtion without
actually performing the work.  Therefore it is safe to call this
function before the rq->lock is released, because we are guaranteed not
to drop the lock at an intermediate point (such as what post_schedule()
may do).

We will use this later in the series

[ rostedt: removed paranoid BUG_ON ]

Signed-off-by: Gregory Haskins <ghaskins@novell.com>
---
 include/linux/sched.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index e5f928a079e8..836a86c32a65 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1012,6 +1012,7 @@ struct sched_class {
 			      struct rq *busiest, struct sched_domain *sd,
 			      enum cpu_idle_type idle);
 	void (*pre_schedule) (struct rq *this_rq, struct task_struct *task);
+	int (*needs_post_schedule) (struct rq *this_rq);
 	void (*post_schedule) (struct rq *this_rq);
 	void (*task_wake_up) (struct rq *this_rq, struct task_struct *task);
 
-- 
cgit v1.2.3


From 4075134e40804821f90866d7de56802e4dcecb1e Mon Sep 17 00:00:00 2001
From: Gregory Haskins <ghaskins@novell.com>
Date: Mon, 29 Dec 2008 09:39:53 -0500
Subject: plist: fix PLIST_NODE_INIT to work with debug enabled

It seems that PLIST_NODE_INIT breaks if used and DEBUG_PI_LIST is defined.
Since there are no current users of PLIST_NODE_INIT, this has gone
undetected.  This patch fixes the build issue that enables the
DEBUG_PI_LIST later in the series when we use it in init_task.h

Signed-off-by: Gregory Haskins <ghaskins@novell.com>
---
 include/linux/plist.h | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/plist.h b/include/linux/plist.h
index 85de2f055874..45926d77d6ac 100644
--- a/include/linux/plist.h
+++ b/include/linux/plist.h
@@ -96,6 +96,10 @@ struct plist_node {
 # define PLIST_HEAD_LOCK_INIT(_lock)
 #endif
 
+#define _PLIST_HEAD_INIT(head)				\
+	.prio_list = LIST_HEAD_INIT((head).prio_list),	\
+	.node_list = LIST_HEAD_INIT((head).node_list)
+
 /**
  * PLIST_HEAD_INIT - static struct plist_head initializer
  * @head:	struct plist_head variable name
@@ -103,8 +107,7 @@ struct plist_node {
  */
 #define PLIST_HEAD_INIT(head, _lock)			\
 {							\
-	.prio_list = LIST_HEAD_INIT((head).prio_list),	\
-	.node_list = LIST_HEAD_INIT((head).node_list),	\
+        _PLIST_HEAD_INIT(head),                         \
 	PLIST_HEAD_LOCK_INIT(&(_lock))			\
 }
 
@@ -116,7 +119,7 @@ struct plist_node {
 #define PLIST_NODE_INIT(node, __prio)			\
 {							\
 	.prio  = (__prio),				\
-	.plist = PLIST_HEAD_INIT((node).plist, NULL),	\
+	.plist = { _PLIST_HEAD_INIT((node).plist) }, 	\
 }
 
 /**
-- 
cgit v1.2.3


From 917b627d4d981dc614519d7b34ea31a976b14e12 Mon Sep 17 00:00:00 2001
From: Gregory Haskins <ghaskins@novell.com>
Date: Mon, 29 Dec 2008 09:39:53 -0500
Subject: sched: create "pushable_tasks" list to limit pushing to one attempt

The RT scheduler employs a "push/pull" design to actively balance tasks
within the system (on a per disjoint cpuset basis).  When a task is
awoken, it is immediately determined if there are any lower priority
cpus which should be preempted.  This is opposed to the way normal
SCHED_OTHER tasks behave, which will wait for a periodic rebalancing
operation to occur before spreading out load.

When a particular RQ has more than 1 active RT task, it is said to
be in an "overloaded" state.  Once this occurs, the system enters
the active balancing mode, where it will try to push the task away,
or persuade a different cpu to pull it over.  The system will stay
in this state until the system falls back below the <= 1 queued RT
task per RQ.

However, the current implementation suffers from a limitation in the
push logic.  Once overloaded, all tasks (other than current) on the
RQ are analyzed on every push operation, even if it was previously
unpushable (due to affinity, etc).  Whats more, the operation stops
at the first task that is unpushable and will not look at items
lower in the queue.  This causes two problems:

1) We can have the same tasks analyzed over and over again during each
   push, which extends out the fast path in the scheduler for no
   gain.  Consider a RQ that has dozens of tasks that are bound to a
   core.  Each one of those tasks will be encountered and skipped
   for each push operation while they are queued.

2) There may be lower-priority tasks under the unpushable task that
   could have been successfully pushed, but will never be considered
   until either the unpushable task is cleared, or a pull operation
   succeeds.  The net result is a potential latency source for mid
   priority tasks.

This patch aims to rectify these two conditions by introducing a new
priority sorted list: "pushable_tasks".  A task is added to the list
each time a task is activated or preempted.  It is removed from the
list any time it is deactivated, made current, or fails to push.

This works because a task only needs to be attempted to push once.
After an initial failure to push, the other cpus will eventually try to
pull the task when the conditions are proper.  This also solves the
problem that we don't completely analyze all tasks due to encountering
an unpushable tasks.  Now every task will have a push attempted (when
appropriate).

This reduces latency both by shorting the critical section of the
rq->lock for certain workloads, and by making sure the algorithm
considers all eligible tasks in the system.

[ rostedt: added a couple more BUG_ONs ]

Signed-off-by: Gregory Haskins <ghaskins@novell.com>
Acked-by: Steven Rostedt <srostedt@redhat.com>
---
 include/linux/init_task.h |   1 +
 include/linux/sched.h     |   1 +
 kernel/sched.c            |   4 ++
 kernel/sched_rt.c         | 119 +++++++++++++++++++++++++++++++++++++++-------
 4 files changed, 107 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 23fd8909b9e5..6851225f44a7 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -140,6 +140,7 @@ extern struct group_info init_groups;
 		.nr_cpus_allowed = NR_CPUS,				\
 	},								\
 	.tasks		= LIST_HEAD_INIT(tsk.tasks),			\
+	.pushable_tasks = PLIST_NODE_INIT(tsk.pushable_tasks, MAX_PRIO), \
 	.ptraced	= LIST_HEAD_INIT(tsk.ptraced),			\
 	.ptrace_entry	= LIST_HEAD_INIT(tsk.ptrace_entry),		\
 	.real_parent	= &tsk,						\
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 836a86c32a65..440cabb2d432 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1179,6 +1179,7 @@ struct task_struct {
 #endif
 
 	struct list_head tasks;
+	struct plist_node pushable_tasks;
 
 	struct mm_struct *mm, *active_mm;
 
diff --git a/kernel/sched.c b/kernel/sched.c
index 3acbad8991a2..24ab80c28765 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -471,6 +471,7 @@ struct rt_rq {
 #ifdef CONFIG_SMP
 	unsigned long rt_nr_migratory;
 	int overloaded;
+	struct plist_head pushable_tasks;
 #endif
 	int rt_throttled;
 	u64 rt_time;
@@ -2481,6 +2482,8 @@ void sched_fork(struct task_struct *p, int clone_flags)
 	/* Want to start with kernel preemption disabled. */
 	task_thread_info(p)->preempt_count = 1;
 #endif
+	plist_node_init(&p->pushable_tasks, MAX_PRIO);
+
 	put_cpu();
 }
 
@@ -8237,6 +8240,7 @@ static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq)
 #ifdef CONFIG_SMP
 	rt_rq->rt_nr_migratory = 0;
 	rt_rq->overloaded = 0;
+	plist_head_init(&rq->rt.pushable_tasks, &rq->lock);
 #endif
 
 	rt_rq->rt_time = 0;
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index b0b6ea4ed674..fe9da6084c87 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -49,6 +49,24 @@ static void update_rt_migration(struct rq *rq)
 		rq->rt.overloaded = 0;
 	}
 }
+
+static void enqueue_pushable_task(struct rq *rq, struct task_struct *p)
+{
+	plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks);
+	plist_node_init(&p->pushable_tasks, p->prio);
+	plist_add(&p->pushable_tasks, &rq->rt.pushable_tasks);
+}
+
+static void dequeue_pushable_task(struct rq *rq, struct task_struct *p)
+{
+	plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks);
+}
+
+#else
+
+#define enqueue_pushable_task(rq, p) do { } while (0)
+#define dequeue_pushable_task(rq, p) do { } while (0)
+
 #endif /* CONFIG_SMP */
 
 static inline struct task_struct *rt_task_of(struct sched_rt_entity *rt_se)
@@ -751,6 +769,9 @@ static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup)
 
 	enqueue_rt_entity(rt_se);
 
+	if (!task_current(rq, p) && p->rt.nr_cpus_allowed > 1)
+		enqueue_pushable_task(rq, p);
+
 	inc_cpu_load(rq, p->se.load.weight);
 }
 
@@ -761,6 +782,8 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep)
 	update_curr_rt(rq);
 	dequeue_rt_entity(rt_se);
 
+	dequeue_pushable_task(rq, p);
+
 	dec_cpu_load(rq, p->se.load.weight);
 }
 
@@ -911,7 +934,7 @@ static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq,
 	return next;
 }
 
-static struct task_struct *pick_next_task_rt(struct rq *rq)
+static struct task_struct *_pick_next_task_rt(struct rq *rq)
 {
 	struct sched_rt_entity *rt_se;
 	struct task_struct *p;
@@ -933,6 +956,18 @@ static struct task_struct *pick_next_task_rt(struct rq *rq)
 
 	p = rt_task_of(rt_se);
 	p->se.exec_start = rq->clock;
+
+	return p;
+}
+
+static struct task_struct *pick_next_task_rt(struct rq *rq)
+{
+	struct task_struct *p = _pick_next_task_rt(rq);
+
+	/* The running task is never eligible for pushing */
+	if (p)
+		dequeue_pushable_task(rq, p);
+
 	return p;
 }
 
@@ -940,6 +975,13 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
 {
 	update_curr_rt(rq);
 	p->se.exec_start = 0;
+
+	/*
+	 * The previous task needs to be made eligible for pushing
+	 * if it is still active
+	 */
+	if (p->se.on_rq && p->rt.nr_cpus_allowed > 1)
+		enqueue_pushable_task(rq, p);
 }
 
 #ifdef CONFIG_SMP
@@ -1116,6 +1158,31 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
 	return lowest_rq;
 }
 
+static inline int has_pushable_tasks(struct rq *rq)
+{
+	return !plist_head_empty(&rq->rt.pushable_tasks);
+}
+
+static struct task_struct *pick_next_pushable_task(struct rq *rq)
+{
+	struct task_struct *p;
+
+	if (!has_pushable_tasks(rq))
+		return NULL;
+
+	p = plist_first_entry(&rq->rt.pushable_tasks,
+			      struct task_struct, pushable_tasks);
+
+	BUG_ON(rq->cpu != task_cpu(p));
+	BUG_ON(task_current(rq, p));
+	BUG_ON(p->rt.nr_cpus_allowed <= 1);
+
+	BUG_ON(!p->se.on_rq);
+	BUG_ON(!rt_task(p));
+
+	return p;
+}
+
 /*
  * If the current CPU has more than one RT task, see if the non
  * running task can migrate over to a CPU that is running a task
@@ -1125,13 +1192,12 @@ static int push_rt_task(struct rq *rq)
 {
 	struct task_struct *next_task;
 	struct rq *lowest_rq;
-	int ret = 0;
 	int paranoid = RT_MAX_TRIES;
 
 	if (!rq->rt.overloaded)
 		return 0;
 
-	next_task = pick_next_highest_task_rt(rq, -1);
+	next_task = pick_next_pushable_task(rq);
 	if (!next_task)
 		return 0;
 
@@ -1163,12 +1229,19 @@ static int push_rt_task(struct rq *rq)
 		 * so it is possible that next_task has changed.
 		 * If it has, then try again.
 		 */
-		task = pick_next_highest_task_rt(rq, -1);
+		task = pick_next_pushable_task(rq);
 		if (unlikely(task != next_task) && task && paranoid--) {
 			put_task_struct(next_task);
 			next_task = task;
 			goto retry;
 		}
+
+		/*
+		 * Once we have failed to push this task, we will not
+		 * try again, since the other cpus will pull from us
+		 * when they are ready
+		 */
+		dequeue_pushable_task(rq, next_task);
 		goto out;
 	}
 
@@ -1180,23 +1253,12 @@ static int push_rt_task(struct rq *rq)
 
 	double_unlock_balance(rq, lowest_rq);
 
-	ret = 1;
 out:
 	put_task_struct(next_task);
 
-	return ret;
+	return 1;
 }
 
-/*
- * TODO: Currently we just use the second highest prio task on
- *       the queue, and stop when it can't migrate (or there's
- *       no more RT tasks).  There may be a case where a lower
- *       priority RT task has a different affinity than the
- *       higher RT task. In this case the lower RT task could
- *       possibly be able to migrate where as the higher priority
- *       RT task could not.  We currently ignore this issue.
- *       Enhancements are welcome!
- */
 static void push_rt_tasks(struct rq *rq)
 {
 	/* push_rt_task will return true if it moved an RT */
@@ -1295,7 +1357,7 @@ static void pre_schedule_rt(struct rq *rq, struct task_struct *prev)
  */
 static int needs_post_schedule_rt(struct rq *rq)
 {
-	return rq->rt.overloaded ? 1 : 0;
+	return has_pushable_tasks(rq);
 }
 
 static void post_schedule_rt(struct rq *rq)
@@ -1317,7 +1379,7 @@ static void task_wake_up_rt(struct rq *rq, struct task_struct *p)
 {
 	if (!task_running(rq, p) &&
 	    !test_tsk_need_resched(rq->curr) &&
-	    rq->rt.overloaded &&
+	    has_pushable_tasks(rq) &&
 	    p->rt.nr_cpus_allowed > 1)
 		push_rt_tasks(rq);
 }
@@ -1354,6 +1416,24 @@ static void set_cpus_allowed_rt(struct task_struct *p,
 	if (p->se.on_rq && (weight != p->rt.nr_cpus_allowed)) {
 		struct rq *rq = task_rq(p);
 
+		if (!task_current(rq, p)) {
+			/*
+			 * Make sure we dequeue this task from the pushable list
+			 * before going further.  It will either remain off of
+			 * the list because we are no longer pushable, or it
+			 * will be requeued.
+			 */
+			if (p->rt.nr_cpus_allowed > 1)
+				dequeue_pushable_task(rq, p);
+
+			/*
+			 * Requeue if our weight is changing and still > 1
+			 */
+			if (weight > 1)
+				enqueue_pushable_task(rq, p);
+
+		}
+
 		if ((p->rt.nr_cpus_allowed <= 1) && (weight > 1)) {
 			rq->rt.rt_nr_migratory++;
 		} else if ((p->rt.nr_cpus_allowed > 1) && (weight <= 1)) {
@@ -1538,6 +1618,9 @@ static void set_curr_task_rt(struct rq *rq)
 	struct task_struct *p = rq->curr;
 
 	p->se.exec_start = rq->clock;
+
+	/* The running task is never eligible for pushing */
+	dequeue_pushable_task(rq, p);
 }
 
 static const struct sched_class rt_sched_class = {
-- 
cgit v1.2.3


From 5762ba1873b0bb9faa631aaa02f533c2b9837f82 Mon Sep 17 00:00:00 2001
From: Sebastien Dugue <sebastien.dugue@bull.net>
Date: Mon, 1 Dec 2008 14:09:07 +0100
Subject: hrtimers: allow the hot-unplugging of all cpus

Impact: fix CPU hotplug hang on Power6 testbox

On architectures that support offlining all cpus (at least powerpc/pseries),
hot-unpluging the tick_do_timer_cpu can result in a system hang.

This comes from the fact that if the cpu going down happens to be the
cpu doing the tick, then as the tick_do_timer_cpu handover happens after the
cpu is dead (via the CPU_DEAD notification), we're left without ticks,
jiffies are frozen and any task relying on timers (msleep, ...) is stuck.
That's particularly the case for the cpu looping in __cpu_die() waiting
for the dying cpu to be dead.

This patch addresses this by having the tick_do_timer_cpu handover happen
earlier during the CPU_DYING notification. For this, a new clockevent
notification type is introduced (CLOCK_EVT_NOTIFY_CPU_DYING) which is triggered
in hrtimer_cpu_notify().

Signed-off-by: Sebastien Dugue <sebastien.dugue@bull.net>
Cc: <stable@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/clockchips.h |  1 +
 kernel/hrtimer.c           |  4 ++++
 kernel/time/tick-common.c  | 26 +++++++++++++++++++-------
 3 files changed, 24 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h
index ed3a5d473e52..c6de413c5dd1 100644
--- a/include/linux/clockchips.h
+++ b/include/linux/clockchips.h
@@ -36,6 +36,7 @@ enum clock_event_nofitiers {
 	CLOCK_EVT_NOTIFY_BROADCAST_EXIT,
 	CLOCK_EVT_NOTIFY_SUSPEND,
 	CLOCK_EVT_NOTIFY_RESUME,
+	CLOCK_EVT_NOTIFY_CPU_DYING,
 	CLOCK_EVT_NOTIFY_CPU_DEAD,
 };
 
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index c2a69b89ac61..61cb933395ba 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -1609,6 +1609,10 @@ static int __cpuinit hrtimer_cpu_notify(struct notifier_block *self,
 		break;
 
 #ifdef CONFIG_HOTPLUG_CPU
+	case CPU_DYING:
+	case CPU_DYING_FROZEN:
+		clockevents_notify(CLOCK_EVT_NOTIFY_CPU_DYING, &scpu);
+		break;
 	case CPU_DEAD:
 	case CPU_DEAD_FROZEN:
 	{
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index df12434b43ca..457d281258ee 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -273,6 +273,21 @@ out_bc:
 	return ret;
 }
 
+/*
+ * Transfer the do_timer job away from a dying cpu.
+ *
+ * Called with interrupts disabled.
+ */
+static void tick_handover_do_timer(int *cpup)
+{
+	if (*cpup == tick_do_timer_cpu) {
+		int cpu = first_cpu(cpu_online_map);
+
+		tick_do_timer_cpu = (cpu != NR_CPUS) ? cpu :
+			TICK_DO_TIMER_NONE;
+	}
+}
+
 /*
  * Shutdown an event device on a given cpu:
  *
@@ -297,13 +312,6 @@ static void tick_shutdown(unsigned int *cpup)
 		clockevents_exchange_device(dev, NULL);
 		td->evtdev = NULL;
 	}
-	/* Transfer the do_timer job away from this cpu */
-	if (*cpup == tick_do_timer_cpu) {
-		int cpu = first_cpu(cpu_online_map);
-
-		tick_do_timer_cpu = (cpu != NR_CPUS) ? cpu :
-			TICK_DO_TIMER_NONE;
-	}
 	spin_unlock_irqrestore(&tick_device_lock, flags);
 }
 
@@ -357,6 +365,10 @@ static int tick_notify(struct notifier_block *nb, unsigned long reason,
 		tick_broadcast_oneshot_control(reason);
 		break;
 
+	case CLOCK_EVT_NOTIFY_CPU_DYING:
+		tick_handover_do_timer(dev);
+		break;
+
 	case CLOCK_EVT_NOTIFY_CPU_DEAD:
 		tick_shutdown_broadcast_oneshot(dev);
 		tick_shutdown_broadcast(dev);
-- 
cgit v1.2.3


From 01d07820a0df6b6134c1bb75b1e84c9d0cdab3be Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Sun, 4 Jan 2009 03:11:05 +0900
Subject: sparseirq: make for_each_irq_desc() more robust

Raja reported for_each_irq_desc() has possibility unsafeness:

if anyone write folliwing code, for_each_irq_desc() doesn't work
as intended:

(right now this code does not exist at all)

 if (safe)
   for_each_irq_desc(irq, desc) {
      ...
   }
 else
   panic();

Reported-by: Raja R Harinath <harinath@hurrynot.org>
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/irqnr.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irqnr.h b/include/linux/irqnr.h
index 86af92e9e84c..52ebbb4b161d 100644
--- a/include/linux/irqnr.h
+++ b/include/linux/irqnr.h
@@ -28,13 +28,17 @@ extern struct irq_desc *irq_to_desc(unsigned int irq);
 # define for_each_irq_desc(irq, desc)					\
 	for (irq = 0, desc = irq_to_desc(irq); irq < nr_irqs;		\
 	     irq++, desc = irq_to_desc(irq))				\
-		if (desc)
+		if (!desc)						\
+			;						\
+		else
 
 
 # define for_each_irq_desc_reverse(irq, desc)				\
 	for (irq = nr_irqs - 1, desc = irq_to_desc(irq); irq >= 0;	\
 	     irq--, desc = irq_to_desc(irq))				\
-		if (desc)
+		if (!desc)						\
+			;						\
+		else
 
 #endif /* CONFIG_GENERIC_HARDIRQS */
 
-- 
cgit v1.2.3


From d7e51e66899f95dabc89b4d4c6674a6e50fa37fc Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Wed, 7 Jan 2009 15:03:13 -0800
Subject: sparseirq: make some func to be used with genirq

Impact: clean up sparseirq fallout on random.c

Ingo suggested to change some ifdef from SPARSE_IRQ to GENERIC_HARDIRQS
so we could some #ifdef later if all arch support genirq

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Acked-by: Matt Mackall <mpm@selenic.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 drivers/char/random.c        | 2 +-
 drivers/pci/intr_remapping.c | 2 +-
 include/linux/irq.h          | 6 ++----
 include/linux/kernel_stat.h  | 6 +++---
 kernel/irq/handle.c          | 7 ++++---
 5 files changed, 11 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 7c13581ca9cd..a778918c8f42 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -558,7 +558,7 @@ struct timer_rand_state {
 	unsigned dont_count_entropy:1;
 };
 
-#ifndef CONFIG_SPARSE_IRQ
+#ifndef CONFIG_GENERIC_HARDIRQS
 
 static struct timer_rand_state *irq_timer_state[NR_IRQS];
 
diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c
index f78371b22529..3d604132a04f 100644
--- a/drivers/pci/intr_remapping.c
+++ b/drivers/pci/intr_remapping.c
@@ -20,7 +20,7 @@ struct irq_2_iommu {
 	u8  irte_mask;
 };
 
-#ifdef CONFIG_SPARSE_IRQ
+#ifdef CONFIG_GENERIC_HARDIRQS
 static struct irq_2_iommu *get_one_free_irq_2_iommu(int cpu)
 {
 	struct irq_2_iommu *iommu;
diff --git a/include/linux/irq.h b/include/linux/irq.h
index f899b502f186..e9a878978c85 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -160,12 +160,10 @@ struct irq_2_iommu;
  */
 struct irq_desc {
 	unsigned int		irq;
-#ifdef CONFIG_SPARSE_IRQ
 	struct timer_rand_state *timer_rand_state;
 	unsigned int            *kstat_irqs;
-# ifdef CONFIG_INTR_REMAP
+#ifdef CONFIG_INTR_REMAP
 	struct irq_2_iommu      *irq_2_iommu;
-# endif
 #endif
 	irq_flow_handler_t	handle_irq;
 	struct irq_chip		*chip;
@@ -202,13 +200,13 @@ extern void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc
 extern struct irq_desc irq_desc[NR_IRQS];
 #else /* CONFIG_SPARSE_IRQ */
 extern struct irq_desc *move_irq_desc(struct irq_desc *old_desc, int cpu);
+#endif /* CONFIG_SPARSE_IRQ */
 
 #define kstat_irqs_this_cpu(DESC) \
 	((DESC)->kstat_irqs[smp_processor_id()])
 #define kstat_incr_irqs_this_cpu(irqno, DESC) \
 	((DESC)->kstat_irqs[smp_processor_id()]++)
 
-#endif /* CONFIG_SPARSE_IRQ */
 
 extern struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu);
 
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index 570d20413119..a3431b164bea 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -28,7 +28,7 @@ struct cpu_usage_stat {
 
 struct kernel_stat {
 	struct cpu_usage_stat	cpustat;
-#ifndef CONFIG_SPARSE_IRQ
+#ifndef CONFIG_GENERIC_HARDIRQS
        unsigned int irqs[NR_IRQS];
 #endif
 };
@@ -41,7 +41,7 @@ DECLARE_PER_CPU(struct kernel_stat, kstat);
 
 extern unsigned long long nr_context_switches(void);
 
-#ifndef CONFIG_SPARSE_IRQ
+#ifndef CONFIG_GENERIC_HARDIRQS
 #define kstat_irqs_this_cpu(irq) \
 	(kstat_this_cpu.irqs[irq])
 
@@ -55,7 +55,7 @@ static inline void kstat_incr_irqs_this_cpu(unsigned int irq,
 #endif
 
 
-#ifndef CONFIG_SPARSE_IRQ
+#ifndef CONFIG_GENERIC_HARDIRQS
 static inline unsigned int kstat_irqs_cpu(unsigned int irq, int cpu)
 {
        return kstat_cpu(cpu).irqs[irq];
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index c20db0be9173..48299a8a22f8 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -213,6 +213,7 @@ struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = {
 	}
 };
 
+static unsigned int kstat_irqs_all[NR_IRQS][NR_CPUS];
 int __init early_irq_init(void)
 {
 	struct irq_desc *desc;
@@ -222,8 +223,10 @@ int __init early_irq_init(void)
 	desc = irq_desc;
 	count = ARRAY_SIZE(irq_desc);
 
-	for (i = 0; i < count; i++)
+	for (i = 0; i < count; i++) {
 		desc[i].irq = i;
+		desc[i].kstat_irqs = kstat_irqs_all[i];
+	}
 
 	return arch_early_irq_init();
 }
@@ -451,12 +454,10 @@ void early_init_irq_lock_class(void)
 	}
 }
 
-#ifdef CONFIG_SPARSE_IRQ
 unsigned int kstat_irqs_cpu(unsigned int irq, int cpu)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
 	return desc ? desc->kstat_irqs[cpu] : 0;
 }
-#endif
 EXPORT_SYMBOL(kstat_irqs_cpu);
 
-- 
cgit v1.2.3


From 831451ac4e44d3a20b581ce726ef1d1144373f7d Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 14 Jan 2009 12:39:18 +0100
Subject: sched: introduce avg_wakeup

Introduce a new avg_wakeup statistic.

avg_wakeup is a measure of how frequently a task wakes up other tasks, it
represents the average time between wakeups, with a limit of avg_runtime
for when it doesn't wake up anybody.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h |  3 +++
 kernel/sched.c        | 36 ++++++++++++++++++++++++++++++------
 kernel/sched_debug.c  |  1 +
 3 files changed, 34 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4cae9b81a1f8..daf4e07bc978 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1046,6 +1046,9 @@ struct sched_entity {
 	u64			exec_max;
 	u64			slice_max;
 
+	u64			start_runtime;
+	u64			avg_wakeup;
+
 	u64			nr_migrations;
 	u64			nr_migrations_cold;
 	u64			nr_failed_migrations_affine;
diff --git a/kernel/sched.c b/kernel/sched.c
index 8be2c13b50d0..86f5a063f0b9 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1705,6 +1705,9 @@ static void update_avg(u64 *avg, u64 sample)
 
 static void enqueue_task(struct rq *rq, struct task_struct *p, int wakeup)
 {
+	if (wakeup)
+		p->se.start_runtime = p->se.sum_exec_runtime;
+
 	sched_info_queued(p);
 	p->sched_class->enqueue_task(rq, p, wakeup);
 	p->se.on_rq = 1;
@@ -1712,10 +1715,15 @@ static void enqueue_task(struct rq *rq, struct task_struct *p, int wakeup)
 
 static void dequeue_task(struct rq *rq, struct task_struct *p, int sleep)
 {
-	if (sleep && p->se.last_wakeup) {
-		update_avg(&p->se.avg_overlap,
-			   p->se.sum_exec_runtime - p->se.last_wakeup);
-		p->se.last_wakeup = 0;
+	if (sleep) {
+		if (p->se.last_wakeup) {
+			update_avg(&p->se.avg_overlap,
+				p->se.sum_exec_runtime - p->se.last_wakeup);
+			p->se.last_wakeup = 0;
+		} else {
+			update_avg(&p->se.avg_wakeup,
+				sysctl_sched_wakeup_granularity);
+		}
 	}
 
 	sched_info_dequeued(p);
@@ -2345,6 +2353,22 @@ out_activate:
 	activate_task(rq, p, 1);
 	success = 1;
 
+	/*
+	 * Only attribute actual wakeups done by this task.
+	 */
+	if (!in_interrupt()) {
+		struct sched_entity *se = &current->se;
+		u64 sample = se->sum_exec_runtime;
+
+		if (se->last_wakeup)
+			sample -= se->last_wakeup;
+		else
+			sample -= se->start_runtime;
+		update_avg(&se->avg_wakeup, sample);
+
+		se->last_wakeup = se->sum_exec_runtime;
+	}
+
 out_running:
 	trace_sched_wakeup(rq, p, success);
 	check_preempt_curr(rq, p, sync);
@@ -2355,8 +2379,6 @@ out_running:
 		p->sched_class->task_wake_up(rq, p);
 #endif
 out:
-	current->se.last_wakeup = current->se.sum_exec_runtime;
-
 	task_rq_unlock(rq, &flags);
 
 	return success;
@@ -2386,6 +2408,8 @@ static void __sched_fork(struct task_struct *p)
 	p->se.prev_sum_exec_runtime	= 0;
 	p->se.last_wakeup		= 0;
 	p->se.avg_overlap		= 0;
+	p->se.start_runtime		= 0;
+	p->se.avg_wakeup		= sysctl_sched_wakeup_granularity;
 
 #ifdef CONFIG_SCHEDSTATS
 	p->se.wait_start		= 0;
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 16eeba4e4169..2b1260f0e800 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -397,6 +397,7 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
 	PN(se.vruntime);
 	PN(se.sum_exec_runtime);
 	PN(se.avg_overlap);
+	PN(se.avg_wakeup);
 
 	nr_switches = p->nvcsw + p->nivcsw;
 
-- 
cgit v1.2.3


From 34cb61359b503d7aff6447acb037a5efd6ce93b2 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 16 Jan 2009 13:36:06 +0100
Subject: sched: fix !CONFIG_SCHEDSTATS build failure

Stephen Rothwell reported this linux-next build failure with !CONFIG_SCHEDSTATS:

| In file included from kernel/sched.c:1703:
| kernel/sched_fair.c: In function 'adaptive_gran':
| kernel/sched_fair.c:1324: error: 'struct sched_entity' has no member named 'avg_wakeup'

The start_runtime and avg_wakeup metrics are now not just for statistics,
but also for scheduling - so they always need to be available. (Also
move out the nr_migrations fields - for future perfcounters usage.)

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index daf4e07bc978..5d56b54350a5 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1031,6 +1031,10 @@ struct sched_entity {
 	u64			last_wakeup;
 	u64			avg_overlap;
 
+	u64			start_runtime;
+	u64			avg_wakeup;
+	u64			nr_migrations;
+
 #ifdef CONFIG_SCHEDSTATS
 	u64			wait_start;
 	u64			wait_max;
@@ -1046,10 +1050,6 @@ struct sched_entity {
 	u64			exec_max;
 	u64			slice_max;
 
-	u64			start_runtime;
-	u64			avg_wakeup;
-
-	u64			nr_migrations;
 	u64			nr_migrations_cold;
 	u64			nr_failed_migrations_affine;
 	u64			nr_failed_migrations_running;
-- 
cgit v1.2.3


From 74296a8ed6aa3c5bf672808ada690de7ba323ecc Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 16 Jan 2009 17:43:50 +0100
Subject: irq: provide debug_poll_all_shared_irqs() method under
 CONFIG_DEBUG_SHIRQ

Provide a shared interrupt debug facility under CONFIG_DEBUG_SHIRQ:
it uses the existing irqpoll facilities to iterate through all
registered interrupt handlers and call those which can handle shared
IRQ lines.

This can be handy for suspend/resume debugging: if we call this function
early during resume we can trigger crashes in those drivers which have
incorrect assumptions about when exactly their ISRs will be called
during suspend/resume.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/interrupt.h |  6 ++++++
 kernel/irq/spurious.c     | 14 +++++++++++++-
 2 files changed, 19 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 9127f6b51a39..468e3a25a4a1 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -462,6 +462,12 @@ static inline void init_irq_proc(void)
 }
 #endif
 
+#if defined(CONFIG_GENERIC_HARDIRQS) && defined(CONFIG_DEBUG_SHIRQ)
+extern void debug_poll_all_shared_irqs(void);
+#else
+static inline void debug_poll_all_shared_irqs(void) { }
+#endif
+
 int show_interrupts(struct seq_file *p, void *v);
 
 struct irq_desc;
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index dd364c11e56e..4d568294de3e 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -104,7 +104,7 @@ static int misrouted_irq(int irq)
 	return ok;
 }
 
-static void poll_spurious_irqs(unsigned long dummy)
+static void poll_all_shared_irqs(void)
 {
 	struct irq_desc *desc;
 	int i;
@@ -123,11 +123,23 @@ static void poll_spurious_irqs(unsigned long dummy)
 
 		try_one_irq(i, desc);
 	}
+}
+
+static void poll_spurious_irqs(unsigned long dummy)
+{
+	poll_all_shared_irqs();
 
 	mod_timer(&poll_spurious_irq_timer,
 		  jiffies + POLL_SPURIOUS_IRQ_INTERVAL);
 }
 
+#ifdef CONFIG_DEBUG_SHIRQ
+void debug_poll_all_shared_irqs(void)
+{
+	poll_all_shared_irqs();
+}
+#endif
+
 /*
  * If 99,900 of the previous 100,000 interrupts have not been handled
  * then assume that the IRQ is stuck in some manner. Drop a diagnostic
-- 
cgit v1.2.3


From 5803c5122acb31ebf5f76b1a9925e2c72c4436e1 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Fri, 9 Jan 2009 13:01:08 +0000
Subject: arcnet: convert to internal stats

Use pre-existing network_device_stats inside network_device rather than own
private structure.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/arcnet/arc-rawmode.c |  2 +-
 drivers/net/arcnet/arcnet.c      | 38 ++++++++++----------------------
 drivers/net/arcnet/capmode.c     |  2 +-
 drivers/net/arcnet/rfc1051.c     | 12 +++++-----
 drivers/net/arcnet/rfc1201.c     | 47 ++++++++++++++++++++--------------------
 include/linux/arcdevice.h        |  2 --
 6 files changed, 42 insertions(+), 61 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/arcnet/arc-rawmode.c b/drivers/net/arcnet/arc-rawmode.c
index 3ff9affb1a91..da017cbb5f64 100644
--- a/drivers/net/arcnet/arc-rawmode.c
+++ b/drivers/net/arcnet/arc-rawmode.c
@@ -102,7 +102,7 @@ static void rx(struct net_device *dev, int bufnum,
 	skb = alloc_skb(length + ARC_HDR_SIZE, GFP_ATOMIC);
 	if (skb == NULL) {
 		BUGMSG(D_NORMAL, "Memory squeeze, dropping packet.\n");
-		lp->stats.rx_dropped++;
+		dev->stats.rx_dropped++;
 		return;
 	}
 	skb_put(skb, length + ARC_HDR_SIZE);
diff --git a/drivers/net/arcnet/arcnet.c b/drivers/net/arcnet/arcnet.c
index 6b53e5ed125c..34b9a4d0da30 100644
--- a/drivers/net/arcnet/arcnet.c
+++ b/drivers/net/arcnet/arcnet.c
@@ -105,7 +105,6 @@ static int arcnet_header(struct sk_buff *skb, struct net_device *dev,
 			 unsigned short type, const void *daddr,
 			 const void *saddr, unsigned len);
 static int arcnet_rebuild_header(struct sk_buff *skb);
-static struct net_device_stats *arcnet_get_stats(struct net_device *dev);
 static int go_tx(struct net_device *dev);
 
 static int debug = ARCNET_DEBUG;
@@ -347,7 +346,6 @@ static void arcdev_setup(struct net_device *dev)
 	dev->stop = arcnet_close;
 	dev->hard_start_xmit = arcnet_send_packet;
 	dev->tx_timeout = arcnet_timeout;
-	dev->get_stats = arcnet_get_stats;
 }
 
 struct net_device *alloc_arcdev(char *name)
@@ -583,8 +581,8 @@ static int arcnet_rebuild_header(struct sk_buff *skb)
 	} else {
 		BUGMSG(D_NORMAL,
 		       "I don't understand ethernet protocol %Xh addresses!\n", type);
-		lp->stats.tx_errors++;
-		lp->stats.tx_aborted_errors++;
+		dev->stats.tx_errors++;
+		dev->stats.tx_aborted_errors++;
 	}
 
 	/* if we couldn't resolve the address... give up. */
@@ -645,7 +643,7 @@ static int arcnet_send_packet(struct sk_buff *skb, struct net_device *dev)
 		    !proto->ack_tx) {
 			/* done right away and we don't want to acknowledge
 			   the package later - forget about it now */
-			lp->stats.tx_bytes += skb->len;
+			dev->stats.tx_bytes += skb->len;
 			freeskb = 1;
 		} else {
 			/* do it the 'split' way */
@@ -709,7 +707,7 @@ static int go_tx(struct net_device *dev)
 	/* start sending */
 	ACOMMAND(TXcmd | (lp->cur_tx << 3));
 
-	lp->stats.tx_packets++;
+	dev->stats.tx_packets++;
 	lp->lasttrans_dest = lp->lastload_dest;
 	lp->lastload_dest = 0;
 	lp->excnak_pending = 0;
@@ -732,11 +730,11 @@ static void arcnet_timeout(struct net_device *dev)
 		msg = " - missed IRQ?";
 	} else {
 		msg = "";
-		lp->stats.tx_aborted_errors++;
+		dev->stats.tx_aborted_errors++;
 		lp->timed_out = 1;
 		ACOMMAND(NOTXcmd | (lp->cur_tx << 3));
 	}
-	lp->stats.tx_errors++;
+	dev->stats.tx_errors++;
 
 	/* make sure we didn't miss a TX or a EXC NAK IRQ */
 	AINTMASK(0);
@@ -865,8 +863,8 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id)
 						       "transmit was not acknowledged! "
 						       "(status=%Xh, dest=%02Xh)\n",
 						       status, lp->lasttrans_dest);
-						lp->stats.tx_errors++;
-						lp->stats.tx_carrier_errors++;
+						dev->stats.tx_errors++;
+						dev->stats.tx_carrier_errors++;
 					} else {
 						BUGMSG(D_DURING,
 						       "broadcast was not acknowledged; that's normal "
@@ -905,7 +903,7 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id)
 				if (txbuf != -1) {
 					if (lp->outgoing.proto->continue_tx(dev, txbuf)) {
 						/* that was the last segment */
-						lp->stats.tx_bytes += lp->outgoing.skb->len;
+						dev->stats.tx_bytes += lp->outgoing.skb->len;
 						if(!lp->outgoing.proto->ack_tx)
 						  {
 						    dev_kfree_skb_irq(lp->outgoing.skb);
@@ -930,7 +928,7 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id)
 		}
 		if (status & lp->intmask & RECONflag) {
 			ACOMMAND(CFLAGScmd | CONFIGclear);
-			lp->stats.tx_carrier_errors++;
+			dev->stats.tx_carrier_errors++;
 
 			BUGMSG(D_RECON, "Network reconfiguration detected (status=%Xh)\n",
 			       status);
@@ -1038,8 +1036,8 @@ static void arcnet_rx(struct net_device *dev, int bufnum)
 	       "(%d+4 bytes)\n",
 	       bufnum, pkt.hard.source, pkt.hard.dest, length);
 
-	lp->stats.rx_packets++;
-	lp->stats.rx_bytes += length + ARC_HDR_SIZE;
+	dev->stats.rx_packets++;
+	dev->stats.rx_bytes += length + ARC_HDR_SIZE;
 
 	/* call the right receiver for the protocol */
 	if (arc_proto_map[soft->proto]->is_ip) {
@@ -1067,18 +1065,6 @@ static void arcnet_rx(struct net_device *dev, int bufnum)
 }
 
 
-
-/* 
- * Get the current statistics.  This may be called with the card open or
- * closed.
- */
-static struct net_device_stats *arcnet_get_stats(struct net_device *dev)
-{
-	struct arcnet_local *lp = netdev_priv(dev);
-	return &lp->stats;
-}
-
-
 static void null_rx(struct net_device *dev, int bufnum,
 		    struct archdr *pkthdr, int length)
 {
diff --git a/drivers/net/arcnet/capmode.c b/drivers/net/arcnet/capmode.c
index 30580bbe252d..1613929ff301 100644
--- a/drivers/net/arcnet/capmode.c
+++ b/drivers/net/arcnet/capmode.c
@@ -119,7 +119,7 @@ static void rx(struct net_device *dev, int bufnum,
 	skb = alloc_skb(length + ARC_HDR_SIZE + sizeof(int), GFP_ATOMIC);
 	if (skb == NULL) {
 		BUGMSG(D_NORMAL, "Memory squeeze, dropping packet.\n");
-		lp->stats.rx_dropped++;
+		dev->stats.rx_dropped++;
 		return;
 	}
 	skb_put(skb, length + ARC_HDR_SIZE + sizeof(int));
diff --git a/drivers/net/arcnet/rfc1051.c b/drivers/net/arcnet/rfc1051.c
index 49d39a9cb696..06f8fa2f8f2f 100644
--- a/drivers/net/arcnet/rfc1051.c
+++ b/drivers/net/arcnet/rfc1051.c
@@ -88,7 +88,6 @@ MODULE_LICENSE("GPL");
  */
 static __be16 type_trans(struct sk_buff *skb, struct net_device *dev)
 {
-	struct arcnet_local *lp = netdev_priv(dev);
 	struct archdr *pkt = (struct archdr *) skb->data;
 	struct arc_rfc1051 *soft = &pkt->soft.rfc1051;
 	int hdr_size = ARC_HDR_SIZE + RFC1051_HDR_SIZE;
@@ -112,8 +111,8 @@ static __be16 type_trans(struct sk_buff *skb, struct net_device *dev)
 		return htons(ETH_P_ARP);
 
 	default:
-		lp->stats.rx_errors++;
-		lp->stats.rx_crc_errors++;
+		dev->stats.rx_errors++;
+		dev->stats.rx_crc_errors++;
 		return 0;
 	}
 
@@ -140,7 +139,7 @@ static void rx(struct net_device *dev, int bufnum,
 	skb = alloc_skb(length + ARC_HDR_SIZE, GFP_ATOMIC);
 	if (skb == NULL) {
 		BUGMSG(D_NORMAL, "Memory squeeze, dropping packet.\n");
-		lp->stats.rx_dropped++;
+		dev->stats.rx_dropped++;
 		return;
 	}
 	skb_put(skb, length + ARC_HDR_SIZE);
@@ -168,7 +167,6 @@ static void rx(struct net_device *dev, int bufnum,
 static int build_header(struct sk_buff *skb, struct net_device *dev,
 			unsigned short type, uint8_t daddr)
 {
-	struct arcnet_local *lp = netdev_priv(dev);
 	int hdr_size = ARC_HDR_SIZE + RFC1051_HDR_SIZE;
 	struct archdr *pkt = (struct archdr *) skb_push(skb, hdr_size);
 	struct arc_rfc1051 *soft = &pkt->soft.rfc1051;
@@ -184,8 +182,8 @@ static int build_header(struct sk_buff *skb, struct net_device *dev,
 	default:
 		BUGMSG(D_NORMAL, "RFC1051: I don't understand protocol %d (%Xh)\n",
 		       type, type);
-		lp->stats.tx_errors++;
-		lp->stats.tx_aborted_errors++;
+		dev->stats.tx_errors++;
+		dev->stats.tx_aborted_errors++;
 		return 0;
 	}
 
diff --git a/drivers/net/arcnet/rfc1201.c b/drivers/net/arcnet/rfc1201.c
index 2303d3a1f4b6..745530651c45 100644
--- a/drivers/net/arcnet/rfc1201.c
+++ b/drivers/net/arcnet/rfc1201.c
@@ -92,7 +92,6 @@ static __be16 type_trans(struct sk_buff *skb, struct net_device *dev)
 {
 	struct archdr *pkt = (struct archdr *) skb->data;
 	struct arc_rfc1201 *soft = &pkt->soft.rfc1201;
-	struct arcnet_local *lp = netdev_priv(dev);
 	int hdr_size = ARC_HDR_SIZE + RFC1201_HDR_SIZE;
 
 	/* Pull off the arcnet header. */
@@ -121,8 +120,8 @@ static __be16 type_trans(struct sk_buff *skb, struct net_device *dev)
 	case ARC_P_NOVELL_EC:
 		return htons(ETH_P_802_3);
 	default:
-		lp->stats.rx_errors++;
-		lp->stats.rx_crc_errors++;
+		dev->stats.rx_errors++;
+		dev->stats.rx_crc_errors++;
 		return 0;
 	}
 
@@ -172,8 +171,8 @@ static void rx(struct net_device *dev, int bufnum,
 			 in->sequence, soft->split_flag, soft->sequence);
 			lp->rfc1201.aborted_seq = soft->sequence;
 			dev_kfree_skb_irq(in->skb);
-			lp->stats.rx_errors++;
-			lp->stats.rx_missed_errors++;
+			dev->stats.rx_errors++;
+			dev->stats.rx_missed_errors++;
 			in->skb = NULL;
 		}
 		in->sequence = soft->sequence;
@@ -181,7 +180,7 @@ static void rx(struct net_device *dev, int bufnum,
 		skb = alloc_skb(length + ARC_HDR_SIZE, GFP_ATOMIC);
 		if (skb == NULL) {
 			BUGMSG(D_NORMAL, "Memory squeeze, dropping packet.\n");
-			lp->stats.rx_dropped++;
+			dev->stats.rx_dropped++;
 			return;
 		}
 		skb_put(skb, length + ARC_HDR_SIZE);
@@ -213,7 +212,7 @@ static void rx(struct net_device *dev, int bufnum,
 					BUGMSG(D_EXTRA,
 					       "ARP source address was 00h, set to %02Xh.\n",
 					       saddr);
-					lp->stats.rx_crc_errors++;
+					dev->stats.rx_crc_errors++;
 					*cptr = saddr;
 				} else {
 					BUGMSG(D_DURING, "ARP source address (%Xh) is fine.\n",
@@ -222,8 +221,8 @@ static void rx(struct net_device *dev, int bufnum,
 			} else {
 				BUGMSG(D_NORMAL, "funny-shaped ARP packet. (%Xh, %Xh)\n",
 				       arp->ar_hln, arp->ar_pln);
-				lp->stats.rx_errors++;
-				lp->stats.rx_crc_errors++;
+				dev->stats.rx_errors++;
+				dev->stats.rx_crc_errors++;
 			}
 		}
 		BUGLVL(D_SKB) arcnet_dump_skb(dev, skb, "rx");
@@ -257,8 +256,8 @@ static void rx(struct net_device *dev, int bufnum,
 			       soft->split_flag);
 			dev_kfree_skb_irq(in->skb);
 			in->skb = NULL;
-			lp->stats.rx_errors++;
-			lp->stats.rx_missed_errors++;
+			dev->stats.rx_errors++;
+			dev->stats.rx_missed_errors++;
 			in->lastpacket = in->numpackets = 0;
 		}
 		if (soft->split_flag & 1) {	/* first packet in split */
@@ -269,8 +268,8 @@ static void rx(struct net_device *dev, int bufnum,
 				       "(splitflag=%d, seq=%d)\n",
 				       in->sequence, soft->split_flag,
 				       soft->sequence);
-				lp->stats.rx_errors++;
-				lp->stats.rx_missed_errors++;
+				dev->stats.rx_errors++;
+				dev->stats.rx_missed_errors++;
 				dev_kfree_skb_irq(in->skb);
 			}
 			in->sequence = soft->sequence;
@@ -281,8 +280,8 @@ static void rx(struct net_device *dev, int bufnum,
 				BUGMSG(D_EXTRA, "incoming packet more than 16 segments; dropping. (splitflag=%d)\n",
 				       soft->split_flag);
 				lp->rfc1201.aborted_seq = soft->sequence;
-				lp->stats.rx_errors++;
-				lp->stats.rx_length_errors++;
+				dev->stats.rx_errors++;
+				dev->stats.rx_length_errors++;
 				return;
 			}
 			in->skb = skb = alloc_skb(508 * in->numpackets + ARC_HDR_SIZE,
@@ -290,7 +289,7 @@ static void rx(struct net_device *dev, int bufnum,
 			if (skb == NULL) {
 				BUGMSG(D_NORMAL, "(split) memory squeeze, dropping packet.\n");
 				lp->rfc1201.aborted_seq = soft->sequence;
-				lp->stats.rx_dropped++;
+				dev->stats.rx_dropped++;
 				return;
 			}
 			skb->dev = dev;
@@ -314,8 +313,8 @@ static void rx(struct net_device *dev, int bufnum,
 					       "first! (splitflag=%d, seq=%d, aborted=%d)\n",
 					soft->split_flag, soft->sequence,
 					       lp->rfc1201.aborted_seq);
-					lp->stats.rx_errors++;
-					lp->stats.rx_missed_errors++;
+					dev->stats.rx_errors++;
+					dev->stats.rx_missed_errors++;
 				}
 				return;
 			}
@@ -325,8 +324,8 @@ static void rx(struct net_device *dev, int bufnum,
 				if (packetnum <= in->lastpacket - 1) {
 					BUGMSG(D_EXTRA, "duplicate splitpacket ignored! (splitflag=%d)\n",
 					       soft->split_flag);
-					lp->stats.rx_errors++;
-					lp->stats.rx_frame_errors++;
+					dev->stats.rx_errors++;
+					dev->stats.rx_frame_errors++;
 					return;
 				}
 				/* "bad" duplicate, kill reassembly */
@@ -336,8 +335,8 @@ static void rx(struct net_device *dev, int bufnum,
 				lp->rfc1201.aborted_seq = soft->sequence;
 				dev_kfree_skb_irq(in->skb);
 				in->skb = NULL;
-				lp->stats.rx_errors++;
-				lp->stats.rx_missed_errors++;
+				dev->stats.rx_errors++;
+				dev->stats.rx_missed_errors++;
 				in->lastpacket = in->numpackets = 0;
 				return;
 			}
@@ -404,8 +403,8 @@ static int build_header(struct sk_buff *skb, struct net_device *dev,
 	default:
 		BUGMSG(D_NORMAL, "RFC1201: I don't understand protocol %d (%Xh)\n",
 		       type, type);
-		lp->stats.tx_errors++;
-		lp->stats.tx_aborted_errors++;
+		dev->stats.tx_errors++;
+		dev->stats.tx_aborted_errors++;
 		return 0;
 	}
 
diff --git a/include/linux/arcdevice.h b/include/linux/arcdevice.h
index a1916078fd08..ef0d6b7df44c 100644
--- a/include/linux/arcdevice.h
+++ b/include/linux/arcdevice.h
@@ -235,8 +235,6 @@ struct Outgoing {
 
 
 struct arcnet_local {
-	struct net_device_stats stats;
-
 	uint8_t config,		/* current value of CONFIG register */
 		timeout,	/* Extended timeout for COM20020 */
 		backplane,	/* Backplane flag for COM20020 */
-- 
cgit v1.2.3


From bca5b8939f107e498b3fdc92b3a2d286a868d347 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Fri, 9 Jan 2009 13:01:09 +0000
Subject: arcnet: convert to net_device_ops

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/arcnet/arcnet.c | 33 ++++++++++++++++-----------------
 include/linux/arcdevice.h   |  7 ++++++-
 2 files changed, 22 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/arcnet/arcnet.c b/drivers/net/arcnet/arcnet.c
index 34b9a4d0da30..a80d4a30a464 100644
--- a/drivers/net/arcnet/arcnet.c
+++ b/drivers/net/arcnet/arcnet.c
@@ -95,12 +95,12 @@ EXPORT_SYMBOL(arcnet_unregister_proto);
 EXPORT_SYMBOL(arcnet_debug);
 EXPORT_SYMBOL(alloc_arcdev);
 EXPORT_SYMBOL(arcnet_interrupt);
+EXPORT_SYMBOL(arcnet_open);
+EXPORT_SYMBOL(arcnet_close);
+EXPORT_SYMBOL(arcnet_send_packet);
+EXPORT_SYMBOL(arcnet_timeout);
 
 /* Internal function prototypes */
-static int arcnet_open(struct net_device *dev);
-static int arcnet_close(struct net_device *dev);
-static int arcnet_send_packet(struct sk_buff *skb, struct net_device *dev);
-static void arcnet_timeout(struct net_device *dev);
 static int arcnet_header(struct sk_buff *skb, struct net_device *dev,
 			 unsigned short type, const void *daddr,
 			 const void *saddr, unsigned len);
@@ -321,11 +321,18 @@ static const struct header_ops arcnet_header_ops = {
 	.rebuild = arcnet_rebuild_header,
 };
 
+static const struct net_device_ops arcnet_netdev_ops = {
+	.ndo_open	= arcnet_open,
+	.ndo_stop	= arcnet_close,
+	.ndo_start_xmit = arcnet_send_packet,
+	.ndo_tx_timeout = arcnet_timeout,
+};
 
 /* Setup a struct device for ARCnet. */
 static void arcdev_setup(struct net_device *dev)
 {
 	dev->type = ARPHRD_ARCNET;
+	dev->netdev_ops = &arcnet_netdev_ops;
 	dev->header_ops = &arcnet_header_ops;
 	dev->hard_header_len = sizeof(struct archdr);
 	dev->mtu = choose_mtu();
@@ -338,17 +345,9 @@ static void arcdev_setup(struct net_device *dev)
 	/* New-style flags. */
 	dev->flags = IFF_BROADCAST;
 
-	/*
-	 * Put in this stuff here, so we don't have to export the symbols to
-	 * the chipset drivers.
-	 */
-	dev->open = arcnet_open;
-	dev->stop = arcnet_close;
-	dev->hard_start_xmit = arcnet_send_packet;
-	dev->tx_timeout = arcnet_timeout;
 }
 
-struct net_device *alloc_arcdev(char *name)
+struct net_device *alloc_arcdev(const char *name)
 {
 	struct net_device *dev;
 
@@ -370,7 +369,7 @@ struct net_device *alloc_arcdev(char *name)
  * that "should" only need to be set once at boot, so that there is
  * non-reboot way to recover if something goes wrong.
  */
-static int arcnet_open(struct net_device *dev)
+int arcnet_open(struct net_device *dev)
 {
 	struct arcnet_local *lp = netdev_priv(dev);
 	int count, newmtu, error;
@@ -470,7 +469,7 @@ static int arcnet_open(struct net_device *dev)
 
 
 /* The inverse routine to arcnet_open - shuts down the card. */
-static int arcnet_close(struct net_device *dev)
+int arcnet_close(struct net_device *dev)
 {
 	struct arcnet_local *lp = netdev_priv(dev);
 
@@ -599,7 +598,7 @@ static int arcnet_rebuild_header(struct sk_buff *skb)
 
 
 /* Called by the kernel in order to transmit a packet. */
-static int arcnet_send_packet(struct sk_buff *skb, struct net_device *dev)
+int arcnet_send_packet(struct sk_buff *skb, struct net_device *dev)
 {
 	struct arcnet_local *lp = netdev_priv(dev);
 	struct archdr *pkt;
@@ -718,7 +717,7 @@ static int go_tx(struct net_device *dev)
 
 
 /* Called by the kernel when transmit times out */
-static void arcnet_timeout(struct net_device *dev)
+void arcnet_timeout(struct net_device *dev)
 {
 	unsigned long flags;
 	struct arcnet_local *lp = netdev_priv(dev);
diff --git a/include/linux/arcdevice.h b/include/linux/arcdevice.h
index ef0d6b7df44c..cd4bcb6989ce 100644
--- a/include/linux/arcdevice.h
+++ b/include/linux/arcdevice.h
@@ -333,7 +333,12 @@ void arcnet_dump_skb(struct net_device *dev, struct sk_buff *skb, char *desc);
 
 void arcnet_unregister_proto(struct ArcProto *proto);
 irqreturn_t arcnet_interrupt(int irq, void *dev_id);
-struct net_device *alloc_arcdev(char *name);
+struct net_device *alloc_arcdev(const char *name);
+
+int arcnet_open(struct net_device *dev);
+int arcnet_close(struct net_device *dev);
+int arcnet_send_packet(struct sk_buff *skb, struct net_device *dev);
+void arcnet_timeout(struct net_device *dev);
 
 #endif				/* __KERNEL__ */
 #endif				/* _LINUX_ARCDEVICE_H */
-- 
cgit v1.2.3


From a1799af4d7deefccdaa9d222a886fa1373dbb49a Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Fri, 9 Jan 2009 13:01:10 +0000
Subject: com20020: convert to net_devic_ops

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/arcnet/com20020-isa.c |  2 ++
 drivers/net/arcnet/com20020-pci.c |  3 +++
 drivers/net/arcnet/com20020.c     | 10 ++++++++--
 include/linux/com20020.h          |  1 +
 4 files changed, 14 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/arcnet/com20020-isa.c b/drivers/net/arcnet/com20020-isa.c
index ea53a940272f..db08fc24047a 100644
--- a/drivers/net/arcnet/com20020-isa.c
+++ b/drivers/net/arcnet/com20020-isa.c
@@ -151,6 +151,8 @@ static int __init com20020_init(void)
 	if (node && node != 0xff)
 		dev->dev_addr[0] = node;
 
+	dev->netdev_ops = &com20020_netdev_ops;
+
 	lp = netdev_priv(dev);
 	lp->backplane = backplane;
 	lp->clockp = clockp & 7;
diff --git a/drivers/net/arcnet/com20020-pci.c b/drivers/net/arcnet/com20020-pci.c
index 8b51f632581d..dbf4de39754d 100644
--- a/drivers/net/arcnet/com20020-pci.c
+++ b/drivers/net/arcnet/com20020-pci.c
@@ -72,6 +72,9 @@ static int __devinit com20020pci_probe(struct pci_dev *pdev, const struct pci_de
 	dev = alloc_arcdev(device);
 	if (!dev)
 		return -ENOMEM;
+
+	dev->netdev_ops = &com20020_netdev_ops;
+
 	lp = netdev_priv(dev);
 
 	pci_set_drvdata(pdev, dev);
diff --git a/drivers/net/arcnet/com20020.c b/drivers/net/arcnet/com20020.c
index 103688358fb8..bbe8f2ccdadb 100644
--- a/drivers/net/arcnet/com20020.c
+++ b/drivers/net/arcnet/com20020.c
@@ -149,6 +149,14 @@ int com20020_check(struct net_device *dev)
 	return 0;
 }
 
+const struct net_device_ops com20020_netdev_ops = {
+	.ndo_open	= arcnet_open,
+	.ndo_stop	= arcnet_close,
+	.ndo_start_xmit = arcnet_send_packet,
+	.ndo_tx_timeout = arcnet_timeout,
+	.ndo_set_multicast_list = com20020_set_mc_list,
+};
+
 /* Set up the struct net_device associated with this card.  Called after
  * probing succeeds.
  */
@@ -170,8 +178,6 @@ int com20020_found(struct net_device *dev, int shared)
 	lp->hw.copy_from_card = com20020_copy_from_card;
 	lp->hw.close = com20020_close;
 
-	dev->set_multicast_list = com20020_set_mc_list;
-
 	if (!dev->dev_addr[0])
 		dev->dev_addr[0] = inb(ioaddr + BUS_ALIGN*8);	/* FIXME: do this some other way! */
 
diff --git a/include/linux/com20020.h b/include/linux/com20020.h
index ac6d9a43e085..350afa773f8f 100644
--- a/include/linux/com20020.h
+++ b/include/linux/com20020.h
@@ -29,6 +29,7 @@
 
 int com20020_check(struct net_device *dev);
 int com20020_found(struct net_device *dev, int shared);
+const struct net_device_ops com20020_netdev_ops;
 
 /* The number of low I/O ports used by the card. */
 #define ARCNET_TOTAL_SIZE 8
-- 
cgit v1.2.3


From 9fd3238e95046b61d518ddacaa767fa09f31b0d0 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Fri, 9 Jan 2009 13:01:19 +0000
Subject: ibmtr: convert to internal network_device_stats

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/tokenring/ibmtr.c | 29 ++++++-----------------------
 include/linux/ibmtr.h         |  2 +-
 2 files changed, 7 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/tokenring/ibmtr.c b/drivers/net/tokenring/ibmtr.c
index fa7bce6e0c6d..f195d54ae421 100644
--- a/drivers/net/tokenring/ibmtr.c
+++ b/drivers/net/tokenring/ibmtr.c
@@ -200,7 +200,6 @@ static void 	tr_rx(struct net_device *dev);
 static void	ibmtr_reset_timer(struct timer_list*tmr,struct net_device *dev);
 static void	tok_rerun(unsigned long dev_addr);
 static void	ibmtr_readlog(struct net_device *dev);
-static struct 	net_device_stats *tok_get_stats(struct net_device *dev);
 static int	ibmtr_change_mtu(struct net_device *dev, int mtu);
 static void	find_turbo_adapters(int *iolist);
 
@@ -825,7 +824,6 @@ static int __devinit trdev_init(struct net_device *dev)
 	dev->open = tok_open;
 	dev->stop = tok_close;
 	dev->hard_start_xmit = tok_send_packet;
-	dev->get_stats = tok_get_stats;
 	dev->set_multicast_list = tok_set_multicast_list;
 	dev->change_mtu = ibmtr_change_mtu;
 
@@ -1460,7 +1458,7 @@ static irqreturn_t tok_interrupt(int irq, void *dev_id)
 					"%02X\n",
 					(int)retcode, (int)readb(ti->ssb + 6));
 			else
-				ti->tr_stats.tx_packets++;
+				dev->stats.tx_packets++;
 			break;
 		case XMIT_XID_CMD:
 			DPRINTK("xmit xid ret_code: %02X\n",
@@ -1646,7 +1644,7 @@ static void tr_tx(struct net_device *dev)
 		break;
 	}
 	writeb(RESP_IN_ASB, ti->mmio + ACA_OFFSET + ACA_SET + ISRA_ODD);
-	ti->tr_stats.tx_bytes += ti->current_skb->len;
+	dev->stats.tx_bytes += ti->current_skb->len;
 	dev_kfree_skb_irq(ti->current_skb);
 	ti->current_skb = NULL;
 	netif_wake_queue(dev);
@@ -1722,7 +1720,7 @@ static void tr_rx(struct net_device *dev)
 	if (readb(llc + offsetof(struct trllc, llc)) != UI_CMD) {
 		SET_PAGE(ti->asb_page);
 		writeb(DATA_LOST, ti->asb + RETCODE_OFST);
-		ti->tr_stats.rx_dropped++;
+		dev->stats.rx_dropped++;
 		writeb(RESP_IN_ASB, ti->mmio + ACA_OFFSET + ACA_SET + ISRA_ODD);
 		return;
 	}
@@ -1757,7 +1755,7 @@ static void tr_rx(struct net_device *dev)
 
 	if (!(skb = dev_alloc_skb(skb_size))) {
 		DPRINTK("out of memory. frame dropped.\n");
-		ti->tr_stats.rx_dropped++;
+		dev->stats.rx_dropped++;
 		SET_PAGE(ti->asb_page);
 		writeb(DATA_LOST, ti->asb + offsetof(struct asb_rec, ret_code));
 		writeb(RESP_IN_ASB, ti->mmio + ACA_OFFSET + ACA_SET + ISRA_ODD);
@@ -1813,8 +1811,8 @@ static void tr_rx(struct net_device *dev)
 
 	writeb(RESP_IN_ASB, ti->mmio + ACA_OFFSET + ACA_SET + ISRA_ODD);
 
-	ti->tr_stats.rx_bytes += skb->len;
-	ti->tr_stats.rx_packets++;
+	dev->stats.rx_bytes += skb->len;
+	dev->stats.rx_packets++;
 
 	skb->protocol = tr_type_trans(skb, dev);
 	if (IPv4_p) {
@@ -1876,21 +1874,6 @@ static void ibmtr_readlog(struct net_device *dev)
 
 /*****************************************************************************/
 
-/* tok_get_stats():  Basically a scaffold routine which will return
-   the address of the tr_statistics structure associated with
-   this device -- the tr.... structure is an ethnet look-alike
-   so at least for this iteration may suffice.   */
-
-static struct net_device_stats *tok_get_stats(struct net_device *dev)
-{
-
-	struct tok_info *toki;
-	toki = netdev_priv(dev);
-	return (struct net_device_stats *) &toki->tr_stats;
-}
-
-/*****************************************************************************/
-
 static int ibmtr_change_mtu(struct net_device *dev, int mtu)
 {
 	struct tok_info *ti = netdev_priv(dev);
diff --git a/include/linux/ibmtr.h b/include/linux/ibmtr.h
index 1c7a0dd5536a..06695b74d405 100644
--- a/include/linux/ibmtr.h
+++ b/include/linux/ibmtr.h
@@ -207,7 +207,7 @@ struct tok_info {
 	unsigned short exsap_station_id;
 	unsigned short global_int_enable;
 	struct sk_buff *current_skb;
-	struct net_device_stats tr_stats;
+
 	unsigned char auto_speedsave;
 	open_state			open_status, sap_status;
 	enum {MANUAL, AUTOMATIC}	open_mode;
-- 
cgit v1.2.3


From 5a7616af604caf0d436a1ed0d4298bb25cd77d67 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Fri, 9 Jan 2009 13:01:35 +0000
Subject: hdlcdrv: convert to internal net_device_stats

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Acked-by: Thomas Sailer <t.sailer@alumni.ethz.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/hamradio/hdlcdrv.c | 27 +++++++--------------------
 include/linux/hdlcdrv.h        |  1 -
 2 files changed, 7 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/hamradio/hdlcdrv.c b/drivers/net/hamradio/hdlcdrv.c
index 8eba61a1d4ab..1215a49c38f1 100644
--- a/drivers/net/hamradio/hdlcdrv.c
+++ b/drivers/net/hamradio/hdlcdrv.c
@@ -154,7 +154,7 @@ static void hdlc_rx_flag(struct net_device *dev, struct hdlcdrv_state *s)
 	pkt_len = s->hdlcrx.len - 2 + 1; /* KISS kludge */
 	if (!(skb = dev_alloc_skb(pkt_len))) {
 		printk("%s: memory squeeze, dropping packet\n", dev->name);
-		s->stats.rx_dropped++;
+		dev->stats.rx_dropped++;
 		return;
 	}
 	cp = skb_put(skb, pkt_len);
@@ -162,7 +162,7 @@ static void hdlc_rx_flag(struct net_device *dev, struct hdlcdrv_state *s)
 	memcpy(cp, s->hdlcrx.buffer, pkt_len - 1);
 	skb->protocol = ax25_type_trans(skb, dev);
 	netif_rx(skb);
-	s->stats.rx_packets++;
+	dev->stats.rx_packets++;
 }
 
 void hdlcdrv_receiver(struct net_device *dev, struct hdlcdrv_state *s)
@@ -326,7 +326,7 @@ void hdlcdrv_transmitter(struct net_device *dev, struct hdlcdrv_state *s)
 			s->hdlctx.len = pkt_len+2; /* the appended CRC */
 			s->hdlctx.tx_state = 2;
 			s->hdlctx.bitstream = 0;
-			s->stats.tx_packets++;
+			dev->stats.tx_packets++;
 			break;
 		case 2:
 			if (!s->hdlctx.len) {
@@ -426,19 +426,6 @@ static int hdlcdrv_set_mac_address(struct net_device *dev, void *addr)
 	return 0;                                         
 }
 
-/* --------------------------------------------------------------------- */
-
-static struct net_device_stats *hdlcdrv_get_stats(struct net_device *dev)
-{
-	struct hdlcdrv_state *sm = netdev_priv(dev);
-
-	/* 
-	 * Get the current statistics.  This may be called with the
-	 * card open or closed. 
-	 */
-	return &sm->stats;
-}
-
 /* --------------------------------------------------------------------- */
 /*
  * Open/initialize the board. This is called (in the current kernel)
@@ -568,10 +555,10 @@ static int hdlcdrv_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 		bi.data.cs.ptt = hdlcdrv_ptt(s);
 		bi.data.cs.dcd = s->hdlcrx.dcd;
 		bi.data.cs.ptt_keyed = s->ptt_keyed;
-		bi.data.cs.tx_packets = s->stats.tx_packets;
-		bi.data.cs.tx_errors = s->stats.tx_errors;
-		bi.data.cs.rx_packets = s->stats.rx_packets;
-		bi.data.cs.rx_errors = s->stats.rx_errors;
+		bi.data.cs.tx_packets = dev->stats.tx_packets;
+		bi.data.cs.tx_errors = dev->stats.tx_errors;
+		bi.data.cs.rx_packets = dev->stats.rx_packets;
+		bi.data.cs.rx_errors = dev->stats.rx_errors;
 		break;		
 
 	case HDLCDRVCTL_OLDGETSTAT:
diff --git a/include/linux/hdlcdrv.h b/include/linux/hdlcdrv.h
index bf6302f6b5f8..0821bac62b83 100644
--- a/include/linux/hdlcdrv.h
+++ b/include/linux/hdlcdrv.h
@@ -241,7 +241,6 @@ struct hdlcdrv_state {
 	struct hdlcdrv_bitbuffer bitbuf_hdlc;
 #endif /* HDLCDRV_DEBUG */
 
-	struct net_device_stats stats;
 	int ptt_keyed;
 
 	/* queued skb for transmission */
-- 
cgit v1.2.3


From 7cdc15f5f9db71e9c92422918ab9f8df0d31f81f Mon Sep 17 00:00:00 2001
From: Krzysztof Hałasa <khc@pm.waw.pl>
Date: Thu, 8 Jan 2009 19:46:54 +0100
Subject: WAN: Generic HDLC now uses IFF_WAN_HDLC private flag.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Krzysztof Hałasa <khc@pm.waw.pl>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/hdlc.c | 3 ++-
 include/linux/if.h     | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/net/wan/hdlc.c b/drivers/net/wan/hdlc.c
index 1f2a140c9f7c..d83cd7884e05 100644
--- a/drivers/net/wan/hdlc.c
+++ b/drivers/net/wan/hdlc.c
@@ -106,7 +106,7 @@ static int hdlc_device_event(struct notifier_block *this, unsigned long event,
 	if (dev_net(dev) != &init_net)
 		return NOTIFY_DONE;
 
-	if (dev->get_stats != hdlc_get_stats)
+	if (!(dev->priv_flags & IFF_WAN_HDLC))
 		return NOTIFY_DONE; /* not an HDLC device */
 
 	if (event != NETDEV_CHANGE)
@@ -235,6 +235,7 @@ static void hdlc_setup_dev(struct net_device *dev)
 	 */
 	dev->get_stats		 = hdlc_get_stats;
 	dev->flags		 = IFF_POINTOPOINT | IFF_NOARP;
+	dev->priv_flags		 = IFF_WAN_HDLC;
 	dev->mtu		 = HDLC_MAX_MTU;
 	dev->type		 = ARPHRD_RAWHDLC;
 	dev->hard_header_len	 = 16;
diff --git a/include/linux/if.h b/include/linux/if.h
index 2a6e29620a96..1108f3e099e3 100644
--- a/include/linux/if.h
+++ b/include/linux/if.h
@@ -66,6 +66,7 @@
 #define IFF_SLAVE_NEEDARP 0x40		/* need ARPs for validation	*/
 #define IFF_ISATAP	0x80		/* ISATAP interface (RFC4214)	*/
 #define IFF_MASTER_ARPMON 0x100		/* bonding master, ARP mon in use */
+#define IFF_WAN_HDLC	0x200		/* WAN HDLC device		*/
 
 #define IF_GET_IFACE	0x0001		/* for querying only */
 #define IF_GET_PROTO	0x0002
-- 
cgit v1.2.3


From 991990a12de42281f81b4e3a6471586d2d0caf6a Mon Sep 17 00:00:00 2001
From: Krzysztof Hałasa <khc@pm.waw.pl>
Date: Thu, 8 Jan 2009 22:52:11 +0100
Subject: WAN: Convert generic HDLC drivers to netdev_ops.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Also remove unneeded last_rx update from Synclink drivers.
Synclink part mostly by Stephen Hemminger.

Signed-off-by: Krzysztof Hałasa <khc@pm.waw.pl>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/char/pcmcia/synclink_cs.c | 18 +++++++++++-------
 drivers/char/synclink.c           | 18 +++++++++++-------
 drivers/char/synclink_gt.c        | 18 +++++++++++-------
 drivers/char/synclinkmp.c         | 18 +++++++++++-------
 drivers/net/wan/c101.c            | 12 ++++++++----
 drivers/net/wan/cosa.c            | 14 ++++++++++----
 drivers/net/wan/dscc4.c           | 18 +++++++++++-------
 drivers/net/wan/farsync.c         | 18 ++++++++++++------
 drivers/net/wan/hdlc.c            | 14 +++++++++++---
 drivers/net/wan/hdlc_cisco.c      |  1 -
 drivers/net/wan/hdlc_fr.c         | 28 +++++++++-------------------
 drivers/net/wan/hdlc_ppp.c        |  2 --
 drivers/net/wan/hdlc_raw.c        |  3 ---
 drivers/net/wan/hdlc_raw_eth.c    |  8 ++------
 drivers/net/wan/hdlc_x25.c        |  2 +-
 drivers/net/wan/hostess_sv11.c    | 12 +++++++++---
 drivers/net/wan/ixp4xx_hss.c      | 12 +++++++++---
 drivers/net/wan/lmc/lmc_main.c    | 19 +++++++++++--------
 drivers/net/wan/lmc/lmc_proto.c   | 17 +----------------
 drivers/net/wan/n2.c              | 12 ++++++++----
 drivers/net/wan/pc300too.c        | 12 ++++++++----
 drivers/net/wan/pci200syn.c       | 12 ++++++++----
 drivers/net/wan/sealevel.c        | 12 +++++++++---
 drivers/net/wan/wanxl.c           | 14 ++++++++++----
 include/linux/hdlc.h              |  5 +++++
 25 files changed, 186 insertions(+), 133 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/pcmcia/synclink_cs.c b/drivers/char/pcmcia/synclink_cs.c
index dc073e167abc..5608a1e5a3b3 100644
--- a/drivers/char/pcmcia/synclink_cs.c
+++ b/drivers/char/pcmcia/synclink_cs.c
@@ -4311,10 +4311,17 @@ static void hdlcdev_rx(MGSLPC_INFO *info, char *buf, int size)
 	dev->stats.rx_bytes += size;
 
 	netif_rx(skb);
-
-	dev->last_rx = jiffies;
 }
 
+static const struct net_device_ops hdlcdev_ops = {
+	.ndo_open       = hdlcdev_open,
+	.ndo_stop       = hdlcdev_close,
+	.ndo_change_mtu = hdlc_change_mtu,
+	.ndo_start_xmit = hdlc_start_xmit,
+	.ndo_do_ioctl   = hdlcdev_ioctl,
+	.ndo_tx_timeout = hdlcdev_tx_timeout,
+};
+
 /**
  * called by device driver when adding device instance
  * do generic HDLC initialization
@@ -4341,11 +4348,8 @@ static int hdlcdev_init(MGSLPC_INFO *info)
 	dev->irq       = info->irq_level;
 
 	/* network layer callbacks and settings */
-	dev->do_ioctl       = hdlcdev_ioctl;
-	dev->open           = hdlcdev_open;
-	dev->stop           = hdlcdev_close;
-	dev->tx_timeout     = hdlcdev_tx_timeout;
-	dev->watchdog_timeo = 10*HZ;
+	dev->netdev_ops	    = &hdlcdev_ops;
+	dev->watchdog_timeo = 10 * HZ;
 	dev->tx_queue_len   = 50;
 
 	/* generic HDLC layer callbacks and settings */
diff --git a/drivers/char/synclink.c b/drivers/char/synclink.c
index b8063d4cad32..0057a8f58cb1 100644
--- a/drivers/char/synclink.c
+++ b/drivers/char/synclink.c
@@ -8007,10 +8007,17 @@ static void hdlcdev_rx(struct mgsl_struct *info, char *buf, int size)
 	dev->stats.rx_bytes += size;
 
 	netif_rx(skb);
-
-	dev->last_rx = jiffies;
 }
 
+static const struct net_device_ops hdlcdev_ops = {
+	.ndo_open       = hdlcdev_open,
+	.ndo_stop       = hdlcdev_close,
+	.ndo_change_mtu = hdlc_change_mtu,
+	.ndo_start_xmit = hdlc_start_xmit,
+	.ndo_do_ioctl   = hdlcdev_ioctl,
+	.ndo_tx_timeout = hdlcdev_tx_timeout,
+};
+
 /**
  * called by device driver when adding device instance
  * do generic HDLC initialization
@@ -8038,11 +8045,8 @@ static int hdlcdev_init(struct mgsl_struct *info)
 	dev->dma       = info->dma_level;
 
 	/* network layer callbacks and settings */
-	dev->do_ioctl       = hdlcdev_ioctl;
-	dev->open           = hdlcdev_open;
-	dev->stop           = hdlcdev_close;
-	dev->tx_timeout     = hdlcdev_tx_timeout;
-	dev->watchdog_timeo = 10*HZ;
+	dev->netdev_ops     = &hdlcdev_ops;
+	dev->watchdog_timeo = 10 * HZ;
 	dev->tx_queue_len   = 50;
 
 	/* generic HDLC layer callbacks and settings */
diff --git a/drivers/char/synclink_gt.c b/drivers/char/synclink_gt.c
index f329f459817c..efb3dc928a43 100644
--- a/drivers/char/synclink_gt.c
+++ b/drivers/char/synclink_gt.c
@@ -1763,10 +1763,17 @@ static void hdlcdev_rx(struct slgt_info *info, char *buf, int size)
 	dev->stats.rx_bytes += size;
 
 	netif_rx(skb);
-
-	dev->last_rx = jiffies;
 }
 
+static const struct net_device_ops hdlcdev_ops = {
+	.ndo_open       = hdlcdev_open,
+	.ndo_stop       = hdlcdev_close,
+	.ndo_change_mtu = hdlc_change_mtu,
+	.ndo_start_xmit = hdlc_start_xmit,
+	.ndo_do_ioctl   = hdlcdev_ioctl,
+	.ndo_tx_timeout = hdlcdev_tx_timeout,
+};
+
 /**
  * called by device driver when adding device instance
  * do generic HDLC initialization
@@ -1794,11 +1801,8 @@ static int hdlcdev_init(struct slgt_info *info)
 	dev->irq       = info->irq_level;
 
 	/* network layer callbacks and settings */
-	dev->do_ioctl       = hdlcdev_ioctl;
-	dev->open           = hdlcdev_open;
-	dev->stop           = hdlcdev_close;
-	dev->tx_timeout     = hdlcdev_tx_timeout;
-	dev->watchdog_timeo = 10*HZ;
+	dev->netdev_ops	    = &hdlcdev_ops;
+	dev->watchdog_timeo = 10 * HZ;
 	dev->tx_queue_len   = 50;
 
 	/* generic HDLC layer callbacks and settings */
diff --git a/drivers/char/synclinkmp.c b/drivers/char/synclinkmp.c
index 7b0c5b2dd263..8eb6c89a980e 100644
--- a/drivers/char/synclinkmp.c
+++ b/drivers/char/synclinkmp.c
@@ -1907,10 +1907,17 @@ static void hdlcdev_rx(SLMP_INFO *info, char *buf, int size)
 	dev->stats.rx_bytes += size;
 
 	netif_rx(skb);
-
-	dev->last_rx = jiffies;
 }
 
+static const struct net_device_ops hdlcdev_ops = {
+	.ndo_open       = hdlcdev_open,
+	.ndo_stop       = hdlcdev_close,
+	.ndo_change_mtu = hdlc_change_mtu,
+	.ndo_start_xmit = hdlc_start_xmit,
+	.ndo_do_ioctl   = hdlcdev_ioctl,
+	.ndo_tx_timeout = hdlcdev_tx_timeout,
+};
+
 /**
  * called by device driver when adding device instance
  * do generic HDLC initialization
@@ -1938,11 +1945,8 @@ static int hdlcdev_init(SLMP_INFO *info)
 	dev->irq       = info->irq_level;
 
 	/* network layer callbacks and settings */
-	dev->do_ioctl       = hdlcdev_ioctl;
-	dev->open           = hdlcdev_open;
-	dev->stop           = hdlcdev_close;
-	dev->tx_timeout     = hdlcdev_tx_timeout;
-	dev->watchdog_timeo = 10*HZ;
+	dev->netdev_ops	    = &hdlcdev_ops;
+	dev->watchdog_timeo = 10 * HZ;
 	dev->tx_queue_len   = 50;
 
 	/* generic HDLC layer callbacks and settings */
diff --git a/drivers/net/wan/c101.c b/drivers/net/wan/c101.c
index b46897996f7e..9693b0fd323d 100644
--- a/drivers/net/wan/c101.c
+++ b/drivers/net/wan/c101.c
@@ -296,7 +296,13 @@ static void c101_destroy_card(card_t *card)
 	kfree(card);
 }
 
-
+static const struct net_device_ops c101_ops = {
+	.ndo_open       = c101_open,
+	.ndo_stop       = c101_close,
+	.ndo_change_mtu = hdlc_change_mtu,
+	.ndo_start_xmit = hdlc_start_xmit,
+	.ndo_do_ioctl   = c101_ioctl,
+};
 
 static int __init c101_run(unsigned long irq, unsigned long winbase)
 {
@@ -367,9 +373,7 @@ static int __init c101_run(unsigned long irq, unsigned long winbase)
 	dev->mem_start = winbase;
 	dev->mem_end = winbase + C101_MAPPED_RAM_SIZE - 1;
 	dev->tx_queue_len = 50;
-	dev->do_ioctl = c101_ioctl;
-	dev->open = c101_open;
-	dev->stop = c101_close;
+	dev->netdev_ops = &c101_ops;
 	hdlc->attach = sca_attach;
 	hdlc->xmit = sca_xmit;
 	card->settings.clock_type = CLOCK_EXT;
diff --git a/drivers/net/wan/cosa.c b/drivers/net/wan/cosa.c
index d80b72e22dea..0d7ba117ef60 100644
--- a/drivers/net/wan/cosa.c
+++ b/drivers/net/wan/cosa.c
@@ -427,6 +427,15 @@ static void __exit cosa_exit(void)
 }
 module_exit(cosa_exit);
 
+static const struct net_device_ops cosa_ops = {
+	.ndo_open       = cosa_net_open,
+	.ndo_stop       = cosa_net_close,
+	.ndo_change_mtu = hdlc_change_mtu,
+	.ndo_start_xmit = hdlc_start_xmit,
+	.ndo_do_ioctl   = cosa_net_ioctl,
+	.ndo_tx_timeout = cosa_net_timeout,
+};
+
 static int cosa_probe(int base, int irq, int dma)
 {
 	struct cosa_data *cosa = cosa_cards+nr_cards;
@@ -575,10 +584,7 @@ static int cosa_probe(int base, int irq, int dma)
 		}
 		dev_to_hdlc(chan->netdev)->attach = cosa_net_attach;
 		dev_to_hdlc(chan->netdev)->xmit = cosa_net_tx;
-		chan->netdev->open = cosa_net_open;
-		chan->netdev->stop = cosa_net_close;
-		chan->netdev->do_ioctl = cosa_net_ioctl;
-		chan->netdev->tx_timeout = cosa_net_timeout;
+		chan->netdev->netdev_ops = &cosa_ops;
 		chan->netdev->watchdog_timeo = TX_TIMEOUT;
 		chan->netdev->base_addr = chan->cosa->datareg;
 		chan->netdev->irq = chan->cosa->irq;
diff --git a/drivers/net/wan/dscc4.c b/drivers/net/wan/dscc4.c
index 888025db2f02..8face5db8f32 100644
--- a/drivers/net/wan/dscc4.c
+++ b/drivers/net/wan/dscc4.c
@@ -883,6 +883,15 @@ static inline int dscc4_set_quartz(struct dscc4_dev_priv *dpriv, int hz)
 	return ret;
 }
 
+static const struct net_device_ops dscc4_ops = {
+	.ndo_open       = dscc4_open,
+	.ndo_stop       = dscc4_close,
+	.ndo_change_mtu = hdlc_change_mtu,
+	.ndo_start_xmit = hdlc_start_xmit,
+	.ndo_do_ioctl   = dscc4_ioctl,
+	.ndo_tx_timeout = dscc4_tx_timeout,
+};
+
 static int dscc4_found1(struct pci_dev *pdev, void __iomem *ioaddr)
 {
 	struct dscc4_pci_priv *ppriv;
@@ -916,13 +925,8 @@ static int dscc4_found1(struct pci_dev *pdev, void __iomem *ioaddr)
 		hdlc_device *hdlc = dev_to_hdlc(d);
 
 	        d->base_addr = (unsigned long)ioaddr;
-		d->init = NULL;
 	        d->irq = pdev->irq;
-	        d->open = dscc4_open;
-	        d->stop = dscc4_close;
-		d->set_multicast_list = NULL;
-	        d->do_ioctl = dscc4_ioctl;
-		d->tx_timeout = dscc4_tx_timeout;
+		d->netdev_ops = &dscc4_ops;
 		d->watchdog_timeo = TX_TIMEOUT;
 		SET_NETDEV_DEV(d, &pdev->dev);
 
@@ -1048,7 +1052,7 @@ static int dscc4_open(struct net_device *dev)
 	struct dscc4_pci_priv *ppriv;
 	int ret = -EAGAIN;
 
-	if ((dscc4_loopback_check(dpriv) < 0) || !dev->hard_start_xmit)
+	if ((dscc4_loopback_check(dpriv) < 0))
 		goto err;
 
 	if ((ret = hdlc_open(dev)))
diff --git a/drivers/net/wan/farsync.c b/drivers/net/wan/farsync.c
index 48a2c9d28950..00945f7c1e9b 100644
--- a/drivers/net/wan/farsync.c
+++ b/drivers/net/wan/farsync.c
@@ -2424,6 +2424,15 @@ fst_init_card(struct fst_card_info *card)
 	       type_strings[card->type], card->irq, card->nports);
 }
 
+static const struct net_device_ops fst_ops = {
+	.ndo_open       = fst_open,
+	.ndo_stop       = fst_close,
+	.ndo_change_mtu = hdlc_change_mtu,
+	.ndo_start_xmit = hdlc_start_xmit,
+	.ndo_do_ioctl   = fst_ioctl,
+	.ndo_tx_timeout = fst_tx_timeout,
+};
+
 /*
  *      Initialise card when detected.
  *      Returns 0 to indicate success, or errno otherwise.
@@ -2565,12 +2574,9 @@ fst_add_one(struct pci_dev *pdev, const struct pci_device_id *ent)
                 dev->base_addr   = card->pci_conf;
                 dev->irq         = card->irq;
 
-                dev->tx_queue_len          = FST_TX_QUEUE_LEN;
-                dev->open                  = fst_open;
-                dev->stop                  = fst_close;
-                dev->do_ioctl              = fst_ioctl;
-                dev->watchdog_timeo        = FST_TX_TIMEOUT;
-                dev->tx_timeout            = fst_tx_timeout;
+		dev->netdev_ops = &fst_ops;
+		dev->tx_queue_len = FST_TX_QUEUE_LEN;
+		dev->watchdog_timeo = FST_TX_TIMEOUT;
                 hdlc->attach = fst_attach;
                 hdlc->xmit   = fst_start_xmit;
 	}
diff --git a/drivers/net/wan/hdlc.c b/drivers/net/wan/hdlc.c
index dbc179887f8b..43da8bd72973 100644
--- a/drivers/net/wan/hdlc.c
+++ b/drivers/net/wan/hdlc.c
@@ -44,7 +44,7 @@ static const char* version = "HDLC support module revision 1.22";
 
 static struct hdlc_proto *first_proto;
 
-static int hdlc_change_mtu(struct net_device *dev, int new_mtu)
+int hdlc_change_mtu(struct net_device *dev, int new_mtu)
 {
 	if ((new_mtu < 68) || (new_mtu > HDLC_MAX_MTU))
 		return -EINVAL;
@@ -66,7 +66,15 @@ static int hdlc_rcv(struct sk_buff *skb, struct net_device *dev,
 	return hdlc->proto->netif_rx(skb);
 }
 
+int hdlc_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	hdlc_device *hdlc = dev_to_hdlc(dev);
 
+	if (hdlc->proto->xmit)
+		return hdlc->proto->xmit(skb, dev);
+
+	return hdlc->xmit(skb, dev); /* call hardware driver directly */
+}
 
 static inline void hdlc_proto_start(struct net_device *dev)
 {
@@ -231,8 +239,6 @@ static void hdlc_setup_dev(struct net_device *dev)
 	dev->hard_header_len	 = 16;
 	dev->addr_len		 = 0;
 	dev->header_ops		 = &hdlc_null_ops;
-
-	dev->change_mtu		 = hdlc_change_mtu;
 }
 
 static void hdlc_setup(struct net_device *dev)
@@ -330,6 +336,8 @@ MODULE_AUTHOR("Krzysztof Halasa <khc@pm.waw.pl>");
 MODULE_DESCRIPTION("HDLC support module");
 MODULE_LICENSE("GPL v2");
 
+EXPORT_SYMBOL(hdlc_change_mtu);
+EXPORT_SYMBOL(hdlc_start_xmit);
 EXPORT_SYMBOL(hdlc_open);
 EXPORT_SYMBOL(hdlc_close);
 EXPORT_SYMBOL(hdlc_ioctl);
diff --git a/drivers/net/wan/hdlc_cisco.c b/drivers/net/wan/hdlc_cisco.c
index 44e64b15dbd1..af3fd4fead8a 100644
--- a/drivers/net/wan/hdlc_cisco.c
+++ b/drivers/net/wan/hdlc_cisco.c
@@ -382,7 +382,6 @@ static int cisco_ioctl(struct net_device *dev, struct ifreq *ifr)
 
 		memcpy(&state(hdlc)->settings, &new_settings, size);
 		spin_lock_init(&state(hdlc)->lock);
-		dev->hard_start_xmit = hdlc->xmit;
 		dev->header_ops = &cisco_header_ops;
 		dev->type = ARPHRD_CISCO;
 		netif_dormant_on(dev);
diff --git a/drivers/net/wan/hdlc_fr.c b/drivers/net/wan/hdlc_fr.c
index f1ddd7c3459c..70e57cebc955 100644
--- a/drivers/net/wan/hdlc_fr.c
+++ b/drivers/net/wan/hdlc_fr.c
@@ -444,18 +444,6 @@ static int pvc_xmit(struct sk_buff *skb, struct net_device *dev)
 	return 0;
 }
 
-
-
-static int pvc_change_mtu(struct net_device *dev, int new_mtu)
-{
-	if ((new_mtu < 68) || (new_mtu > HDLC_MAX_MTU))
-		return -EINVAL;
-	dev->mtu = new_mtu;
-	return 0;
-}
-
-
-
 static inline void fr_log_dlci_active(pvc_device *pvc)
 {
 	printk(KERN_INFO "%s: DLCI %d [%s%s%s]%s %s\n",
@@ -1068,6 +1056,14 @@ static void pvc_setup(struct net_device *dev)
 	dev->addr_len = 2;
 }
 
+static const struct net_device_ops pvc_ops = {
+	.ndo_open       = pvc_open,
+	.ndo_stop       = pvc_close,
+	.ndo_change_mtu = hdlc_change_mtu,
+	.ndo_start_xmit = pvc_xmit,
+	.ndo_do_ioctl   = pvc_ioctl,
+};
+
 static int fr_add_pvc(struct net_device *frad, unsigned int dlci, int type)
 {
 	hdlc_device *hdlc = dev_to_hdlc(frad);
@@ -1104,11 +1100,7 @@ static int fr_add_pvc(struct net_device *frad, unsigned int dlci, int type)
 		*(__be16*)dev->dev_addr = htons(dlci);
 		dlci_to_q922(dev->broadcast, dlci);
 	}
-	dev->hard_start_xmit = pvc_xmit;
-	dev->open = pvc_open;
-	dev->stop = pvc_close;
-	dev->do_ioctl = pvc_ioctl;
-	dev->change_mtu = pvc_change_mtu;
+	dev->netdev_ops = &pvc_ops;
 	dev->mtu = HDLC_MAX_MTU;
 	dev->tx_queue_len = 0;
 	dev->ml_priv = pvc;
@@ -1260,8 +1252,6 @@ static int fr_ioctl(struct net_device *dev, struct ifreq *ifr)
 			state(hdlc)->dce_pvc_count = 0;
 		}
 		memcpy(&state(hdlc)->settings, &new_settings, size);
-
-		dev->hard_start_xmit = hdlc->xmit;
 		dev->type = ARPHRD_FRAD;
 		return 0;
 
diff --git a/drivers/net/wan/hdlc_ppp.c b/drivers/net/wan/hdlc_ppp.c
index 57fe714c1c7f..7b8a5eae201d 100644
--- a/drivers/net/wan/hdlc_ppp.c
+++ b/drivers/net/wan/hdlc_ppp.c
@@ -558,7 +558,6 @@ out:
 	return NET_RX_DROP;
 }
 
-
 static void ppp_timer(unsigned long arg)
 {
 	struct proto *proto = (struct proto *)arg;
@@ -679,7 +678,6 @@ static int ppp_ioctl(struct net_device *dev, struct ifreq *ifr)
 		ppp->keepalive_interval = 10;
 		ppp->keepalive_timeout = 60;
 
-		dev->hard_start_xmit = hdlc->xmit;
 		dev->hard_header_len = sizeof(struct hdlc_header);
 		dev->header_ops = &ppp_header_ops;
 		dev->type = ARPHRD_PPP;
diff --git a/drivers/net/wan/hdlc_raw.c b/drivers/net/wan/hdlc_raw.c
index 8612311748f4..6e92c64ebd0f 100644
--- a/drivers/net/wan/hdlc_raw.c
+++ b/drivers/net/wan/hdlc_raw.c
@@ -30,8 +30,6 @@ static __be16 raw_type_trans(struct sk_buff *skb, struct net_device *dev)
 	return __constant_htons(ETH_P_IP);
 }
 
-
-
 static struct hdlc_proto proto = {
 	.type_trans	= raw_type_trans,
 	.ioctl		= raw_ioctl,
@@ -86,7 +84,6 @@ static int raw_ioctl(struct net_device *dev, struct ifreq *ifr)
 		if (result)
 			return result;
 		memcpy(hdlc->state, &new_settings, size);
-		dev->hard_start_xmit = hdlc->xmit;
 		dev->type = ARPHRD_RAWHDLC;
 		netif_dormant_off(dev);
 		return 0;
diff --git a/drivers/net/wan/hdlc_raw_eth.c b/drivers/net/wan/hdlc_raw_eth.c
index a13fc3207520..49e68f5ca5f2 100644
--- a/drivers/net/wan/hdlc_raw_eth.c
+++ b/drivers/net/wan/hdlc_raw_eth.c
@@ -45,6 +45,7 @@ static int eth_tx(struct sk_buff *skb, struct net_device *dev)
 
 static struct hdlc_proto proto = {
 	.type_trans	= eth_type_trans,
+	.xmit		= eth_tx,
 	.ioctl		= raw_eth_ioctl,
 	.module		= THIS_MODULE,
 };
@@ -56,9 +57,7 @@ static int raw_eth_ioctl(struct net_device *dev, struct ifreq *ifr)
 	const size_t size = sizeof(raw_hdlc_proto);
 	raw_hdlc_proto new_settings;
 	hdlc_device *hdlc = dev_to_hdlc(dev);
-	int result;
-	int (*old_ch_mtu)(struct net_device *, int);
-	int old_qlen;
+	int result, old_qlen;
 
 	switch (ifr->ifr_settings.type) {
 	case IF_GET_PROTO:
@@ -99,11 +98,8 @@ static int raw_eth_ioctl(struct net_device *dev, struct ifreq *ifr)
 		if (result)
 			return result;
 		memcpy(hdlc->state, &new_settings, size);
-		dev->hard_start_xmit = eth_tx;
-		old_ch_mtu = dev->change_mtu;
 		old_qlen = dev->tx_queue_len;
 		ether_setup(dev);
-		dev->change_mtu = old_ch_mtu;
 		dev->tx_queue_len = old_qlen;
 		random_ether_addr(dev->dev_addr);
 		netif_dormant_off(dev);
diff --git a/drivers/net/wan/hdlc_x25.c b/drivers/net/wan/hdlc_x25.c
index cbcbf6f0414c..b1dc29ed1583 100644
--- a/drivers/net/wan/hdlc_x25.c
+++ b/drivers/net/wan/hdlc_x25.c
@@ -184,6 +184,7 @@ static struct hdlc_proto proto = {
 	.close		= x25_close,
 	.ioctl		= x25_ioctl,
 	.netif_rx	= x25_rx,
+	.xmit		= x25_xmit,
 	.module		= THIS_MODULE,
 };
 
@@ -213,7 +214,6 @@ static int x25_ioctl(struct net_device *dev, struct ifreq *ifr)
 
 		if ((result = attach_hdlc_protocol(dev, &proto, 0)))
 			return result;
-		dev->hard_start_xmit = x25_xmit;
 		dev->type = ARPHRD_X25;
 		netif_dormant_off(dev);
 		return 0;
diff --git a/drivers/net/wan/hostess_sv11.c b/drivers/net/wan/hostess_sv11.c
index af54f0cf1b35..567d4f5062d6 100644
--- a/drivers/net/wan/hostess_sv11.c
+++ b/drivers/net/wan/hostess_sv11.c
@@ -173,6 +173,14 @@ static int hostess_attach(struct net_device *dev, unsigned short encoding,
  *	Description block for a Comtrol Hostess SV11 card
  */
 
+static const struct net_device_ops hostess_ops = {
+	.ndo_open       = hostess_open,
+	.ndo_stop       = hostess_close,
+	.ndo_change_mtu = hdlc_change_mtu,
+	.ndo_start_xmit = hdlc_start_xmit,
+	.ndo_do_ioctl   = hostess_ioctl,
+};
+
 static struct z8530_dev *sv11_init(int iobase, int irq)
 {
 	struct z8530_dev *sv;
@@ -267,9 +275,7 @@ static struct z8530_dev *sv11_init(int iobase, int irq)
 
 	dev_to_hdlc(netdev)->attach = hostess_attach;
 	dev_to_hdlc(netdev)->xmit = hostess_queue_xmit;
-	netdev->open = hostess_open;
-	netdev->stop = hostess_close;
-	netdev->do_ioctl = hostess_ioctl;
+	netdev->netdev_ops = &hostess_ops;
 	netdev->base_addr = iobase;
 	netdev->irq = irq;
 
diff --git a/drivers/net/wan/ixp4xx_hss.c b/drivers/net/wan/ixp4xx_hss.c
index 0dbd85b0162d..7e8bbba2cc1b 100644
--- a/drivers/net/wan/ixp4xx_hss.c
+++ b/drivers/net/wan/ixp4xx_hss.c
@@ -1230,6 +1230,14 @@ static int hss_hdlc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
  * initialization
  ****************************************************************************/
 
+static const struct net_device_ops hss_hdlc_ops = {
+	.ndo_open       = hss_hdlc_open,
+	.ndo_stop       = hss_hdlc_close,
+	.ndo_change_mtu = hdlc_change_mtu,
+	.ndo_start_xmit = hdlc_start_xmit,
+	.ndo_do_ioctl   = hss_hdlc_ioctl,
+};
+
 static int __devinit hss_init_one(struct platform_device *pdev)
 {
 	struct port *port;
@@ -1254,9 +1262,7 @@ static int __devinit hss_init_one(struct platform_device *pdev)
 	hdlc = dev_to_hdlc(dev);
 	hdlc->attach = hss_hdlc_attach;
 	hdlc->xmit = hss_hdlc_xmit;
-	dev->open = hss_hdlc_open;
-	dev->stop = hss_hdlc_close;
-	dev->do_ioctl = hss_hdlc_ioctl;
+	dev->netdev_ops = &hss_hdlc_ops;
 	dev->tx_queue_len = 100;
 	port->clock_type = CLOCK_EXT;
 	port->clock_rate = 2048000;
diff --git a/drivers/net/wan/lmc/lmc_main.c b/drivers/net/wan/lmc/lmc_main.c
index feac3b99f8fe..45b1822c962d 100644
--- a/drivers/net/wan/lmc/lmc_main.c
+++ b/drivers/net/wan/lmc/lmc_main.c
@@ -806,6 +806,16 @@ static int lmc_attach(struct net_device *dev, unsigned short encoding,
 	return -EINVAL;
 }
 
+static const struct net_device_ops lmc_ops = {
+	.ndo_open       = lmc_open,
+	.ndo_stop       = lmc_close,
+	.ndo_change_mtu = hdlc_change_mtu,
+	.ndo_start_xmit = hdlc_start_xmit,
+	.ndo_do_ioctl   = lmc_ioctl,
+	.ndo_tx_timeout = lmc_driver_timeout,
+	.ndo_get_stats  = lmc_get_stats,
+};
+
 static int __devinit lmc_init_one(struct pci_dev *pdev,
 				  const struct pci_device_id *ent)
 {
@@ -849,11 +859,7 @@ static int __devinit lmc_init_one(struct pci_dev *pdev,
 	dev->type = ARPHRD_HDLC;
 	dev_to_hdlc(dev)->xmit = lmc_start_xmit;
 	dev_to_hdlc(dev)->attach = lmc_attach;
-	dev->open = lmc_open;
-	dev->stop = lmc_close;
-	dev->get_stats = lmc_get_stats;
-	dev->do_ioctl = lmc_ioctl;
-	dev->tx_timeout = lmc_driver_timeout;
+	dev->netdev_ops = &lmc_ops;
 	dev->watchdog_timeo = HZ; /* 1 second */
 	dev->tx_queue_len = 100;
 	sc->lmc_device = dev;
@@ -1059,9 +1065,6 @@ static int lmc_open(struct net_device *dev)
     if ((err = lmc_proto_open(sc)) != 0)
 	    return err;
 
-    dev->do_ioctl = lmc_ioctl;
-
-
     netif_start_queue(dev);
     sc->extra_stats.tx_tbusy0++;
 
diff --git a/drivers/net/wan/lmc/lmc_proto.c b/drivers/net/wan/lmc/lmc_proto.c
index 94b4c208b013..044a48175c42 100644
--- a/drivers/net/wan/lmc/lmc_proto.c
+++ b/drivers/net/wan/lmc/lmc_proto.c
@@ -51,30 +51,15 @@
 void lmc_proto_attach(lmc_softc_t *sc) /*FOLD00*/
 {
     lmc_trace(sc->lmc_device, "lmc_proto_attach in");
-    switch(sc->if_type){
-    case LMC_PPP:
-        {
-            struct net_device *dev = sc->lmc_device;
-            dev->do_ioctl = lmc_ioctl;
-        }
-        break;
-    case LMC_NET:
-        {
+    if (sc->if_type == LMC_NET) {
             struct net_device *dev = sc->lmc_device;
             /*
 	     * They set a few basics because they don't use HDLC
              */
             dev->flags |= IFF_POINTOPOINT;
-
             dev->hard_header_len = 0;
             dev->addr_len = 0;
         }
-    case LMC_RAW: /* Setup the task queue, maybe we should notify someone? */
-        {
-        }
-    default:
-        break;
-    }
     lmc_trace(sc->lmc_device, "lmc_proto_attach out");
 }
 
diff --git a/drivers/net/wan/n2.c b/drivers/net/wan/n2.c
index 697715ae80f4..83da596e2052 100644
--- a/drivers/net/wan/n2.c
+++ b/drivers/net/wan/n2.c
@@ -324,7 +324,13 @@ static void n2_destroy_card(card_t *card)
 	kfree(card);
 }
 
-
+static const struct net_device_ops n2_ops = {
+	.ndo_open       = n2_open,
+	.ndo_stop       = n2_close,
+	.ndo_change_mtu = hdlc_change_mtu,
+	.ndo_start_xmit = hdlc_start_xmit,
+	.ndo_do_ioctl   = n2_ioctl,
+};
 
 static int __init n2_run(unsigned long io, unsigned long irq,
 			 unsigned long winbase, long valid0, long valid1)
@@ -460,9 +466,7 @@ static int __init n2_run(unsigned long io, unsigned long irq,
 		dev->mem_start = winbase;
 		dev->mem_end = winbase + USE_WINDOWSIZE - 1;
 		dev->tx_queue_len = 50;
-		dev->do_ioctl = n2_ioctl;
-		dev->open = n2_open;
-		dev->stop = n2_close;
+		dev->netdev_ops = &n2_ops;
 		hdlc->attach = sca_attach;
 		hdlc->xmit = sca_xmit;
 		port->settings.clock_type = CLOCK_EXT;
diff --git a/drivers/net/wan/pc300too.c b/drivers/net/wan/pc300too.c
index f247e5d9002a..60ece54bdd94 100644
--- a/drivers/net/wan/pc300too.c
+++ b/drivers/net/wan/pc300too.c
@@ -287,7 +287,13 @@ static void pc300_pci_remove_one(struct pci_dev *pdev)
 	kfree(card);
 }
 
-
+static const struct net_device_ops pc300_ops = {
+	.ndo_open       = pc300_open,
+	.ndo_stop       = pc300_close,
+	.ndo_change_mtu = hdlc_change_mtu,
+	.ndo_start_xmit = hdlc_start_xmit,
+	.ndo_do_ioctl   = pc300_ioctl,
+};
 
 static int __devinit pc300_pci_init_one(struct pci_dev *pdev,
 					const struct pci_device_id *ent)
@@ -448,9 +454,7 @@ static int __devinit pc300_pci_init_one(struct pci_dev *pdev,
 		dev->mem_start = ramphys;
 		dev->mem_end = ramphys + ramsize - 1;
 		dev->tx_queue_len = 50;
-		dev->do_ioctl = pc300_ioctl;
-		dev->open = pc300_open;
-		dev->stop = pc300_close;
+		dev->netdev_ops = &pc300_ops;
 		hdlc->attach = sca_attach;
 		hdlc->xmit = sca_xmit;
 		port->settings.clock_type = CLOCK_EXT;
diff --git a/drivers/net/wan/pci200syn.c b/drivers/net/wan/pci200syn.c
index 1104d3a692f7..e035d8c57e11 100644
--- a/drivers/net/wan/pci200syn.c
+++ b/drivers/net/wan/pci200syn.c
@@ -265,7 +265,13 @@ static void pci200_pci_remove_one(struct pci_dev *pdev)
 	kfree(card);
 }
 
-
+static const struct net_device_ops pci200_ops = {
+	.ndo_open       = pci200_open,
+	.ndo_stop       = pci200_close,
+	.ndo_change_mtu = hdlc_change_mtu,
+	.ndo_start_xmit = hdlc_start_xmit,
+	.ndo_do_ioctl   = pci200_ioctl,
+};
 
 static int __devinit pci200_pci_init_one(struct pci_dev *pdev,
 					 const struct pci_device_id *ent)
@@ -395,9 +401,7 @@ static int __devinit pci200_pci_init_one(struct pci_dev *pdev,
 		dev->mem_start = ramphys;
 		dev->mem_end = ramphys + ramsize - 1;
 		dev->tx_queue_len = 50;
-		dev->do_ioctl = pci200_ioctl;
-		dev->open = pci200_open;
-		dev->stop = pci200_close;
+		dev->netdev_ops = &pci200_ops;
 		hdlc->attach = sca_attach;
 		hdlc->xmit = sca_xmit;
 		port->settings.clock_type = CLOCK_EXT;
diff --git a/drivers/net/wan/sealevel.c b/drivers/net/wan/sealevel.c
index 0941a26f6e3f..23b269027453 100644
--- a/drivers/net/wan/sealevel.c
+++ b/drivers/net/wan/sealevel.c
@@ -169,6 +169,14 @@ static int sealevel_attach(struct net_device *dev, unsigned short encoding,
 	return -EINVAL;
 }
 
+static const struct net_device_ops sealevel_ops = {
+	.ndo_open       = sealevel_open,
+	.ndo_stop       = sealevel_close,
+	.ndo_change_mtu = hdlc_change_mtu,
+	.ndo_start_xmit = hdlc_start_xmit,
+	.ndo_do_ioctl   = sealevel_ioctl,
+};
+
 static int slvl_setup(struct slvl_device *sv, int iobase, int irq)
 {
 	struct net_device *dev = alloc_hdlcdev(sv);
@@ -177,9 +185,7 @@ static int slvl_setup(struct slvl_device *sv, int iobase, int irq)
 
 	dev_to_hdlc(dev)->attach = sealevel_attach;
 	dev_to_hdlc(dev)->xmit = sealevel_queue_xmit;
-	dev->open = sealevel_open;
-	dev->stop = sealevel_close;
-	dev->do_ioctl = sealevel_ioctl;
+	dev->netdev_ops = &sealevel_ops;
 	dev->base_addr = iobase;
 	dev->irq = irq;
 
diff --git a/drivers/net/wan/wanxl.c b/drivers/net/wan/wanxl.c
index 4bffb67ebcae..887acb0dc807 100644
--- a/drivers/net/wan/wanxl.c
+++ b/drivers/net/wan/wanxl.c
@@ -547,6 +547,15 @@ static void wanxl_pci_remove_one(struct pci_dev *pdev)
 
 #include "wanxlfw.inc"
 
+static const struct net_device_ops wanxl_ops = {
+	.ndo_open       = wanxl_open,
+	.ndo_stop       = wanxl_close,
+	.ndo_change_mtu = hdlc_change_mtu,
+	.ndo_start_xmit = hdlc_start_xmit,
+	.ndo_do_ioctl   = wanxl_ioctl,
+	.ndo_get_stats  = wanxl_get_stats,
+};
+
 static int __devinit wanxl_pci_init_one(struct pci_dev *pdev,
 					const struct pci_device_id *ent)
 {
@@ -777,12 +786,9 @@ static int __devinit wanxl_pci_init_one(struct pci_dev *pdev,
 		hdlc = dev_to_hdlc(dev);
 		spin_lock_init(&port->lock);
 		dev->tx_queue_len = 50;
-		dev->do_ioctl = wanxl_ioctl;
-		dev->open = wanxl_open;
-		dev->stop = wanxl_close;
+		dev->netdev_ops = &wanxl_ops;
 		hdlc->attach = wanxl_attach;
 		hdlc->xmit = wanxl_xmit;
-		dev->get_stats = wanxl_get_stats;
 		port->card = card;
 		port->node = i;
 		get_status(port)->clocking = CLOCK_EXT;
diff --git a/include/linux/hdlc.h b/include/linux/hdlc.h
index fd47a151665e..6a6e701f1631 100644
--- a/include/linux/hdlc.h
+++ b/include/linux/hdlc.h
@@ -38,6 +38,7 @@ struct hdlc_proto {
 	int (*ioctl)(struct net_device *dev, struct ifreq *ifr);
 	__be16 (*type_trans)(struct sk_buff *skb, struct net_device *dev);
 	int (*netif_rx)(struct sk_buff *skb);
+	int (*xmit)(struct sk_buff *skb, struct net_device *dev);
 	struct module *module;
 	struct hdlc_proto *next; /* next protocol in the list */
 };
@@ -102,6 +103,10 @@ static __inline__ void debug_frame(const struct sk_buff *skb)
 int hdlc_open(struct net_device *dev);
 /* Must be called by hardware driver when HDLC device is being closed */
 void hdlc_close(struct net_device *dev);
+/* May be used by hardware driver */
+int hdlc_change_mtu(struct net_device *dev, int new_mtu);
+/* Must be pointed to by hw driver's dev->netdev_ops->ndo_start_xmit */
+int hdlc_start_xmit(struct sk_buff *skb, struct net_device *dev);
 
 int attach_hdlc_protocol(struct net_device *dev, struct hdlc_proto *proto,
 			 size_t size);
-- 
cgit v1.2.3


From 5ec99fdf8e1a6ee90fce22c2fce94871ab44e8ba Mon Sep 17 00:00:00 2001
From: Cesar Eduardo Barros <cesarb@cesarb.net>
Date: Wed, 14 Jan 2009 20:34:04 -0800
Subject: sc92031: use device id directly instead of made-up name

Instead of making up a name for the device ids, put them directly in the
device id table. Also move the vendor id to pci_ids.h.

Signed-off-by: Cesar Eduardo Barros <cesarb@cesarb.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/sc92031.c   | 8 ++------
 include/linux/pci_ids.h | 2 ++
 2 files changed, 4 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/sc92031.c b/drivers/net/sc92031.c
index d24acdcdb157..00dfddbf2f9b 100644
--- a/drivers/net/sc92031.c
+++ b/drivers/net/sc92031.c
@@ -31,10 +31,6 @@
 
 #include <asm/irq.h>
 
-#define PCI_VENDOR_ID_SILAN		0x1904
-#define PCI_DEVICE_ID_SILAN_SC92031	0x2031
-#define PCI_DEVICE_ID_SILAN_8139D	0x8139
-
 #define SC92031_NAME "sc92031"
 
 /* BAR 0 is MMIO, BAR 1 is PIO */
@@ -1592,8 +1588,8 @@ out:
 }
 
 static struct pci_device_id sc92031_pci_device_id_table[] __devinitdata = {
-	{ PCI_DEVICE(PCI_VENDOR_ID_SILAN, PCI_DEVICE_ID_SILAN_SC92031) },
-	{ PCI_DEVICE(PCI_VENDOR_ID_SILAN, PCI_DEVICE_ID_SILAN_8139D) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_SILAN, 0x2031) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_SILAN, 0x8139) },
 	{ 0, }
 };
 MODULE_DEVICE_TABLE(pci, sc92031_pci_device_id_table);
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index d56ad9c21c09..302423afa136 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2209,6 +2209,8 @@
 
 #define PCI_VENDOR_ID_TOPSPIN		0x1867
 
+#define PCI_VENDOR_ID_SILAN		0x1904
+
 #define PCI_VENDOR_ID_TDI               0x192E
 #define PCI_DEVICE_ID_TDI_EHCI          0x0101
 
-- 
cgit v1.2.3


From 288379f050284087578b77e04f040b57db3db3f8 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Mon, 19 Jan 2009 16:43:59 -0800
Subject: net: Remove redundant NAPI functions

Following the removal of the unused struct net_device * parameter from
the NAPI functions named *netif_rx_* in commit 908a7a1, they are
exactly equivalent to the corresponding *napi_* functions and are
therefore redundant.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/infiniband/hw/nes/nes_hw.c      |  2 +-
 drivers/infiniband/hw/nes/nes_nic.c     |  2 +-
 drivers/infiniband/ulp/ipoib/ipoib_ib.c |  6 ++--
 drivers/net/8139cp.c                    |  6 ++--
 drivers/net/8139too.c                   |  6 ++--
 drivers/net/amd8111e.c                  |  6 ++--
 drivers/net/arm/ep93xx_eth.c            |  8 +++---
 drivers/net/arm/ixp4xx_eth.c            | 12 ++++----
 drivers/net/atl1e/atl1e_main.c          |  6 ++--
 drivers/net/b44.c                       |  6 ++--
 drivers/net/bnx2.c                      | 12 ++++----
 drivers/net/bnx2x_main.c                |  6 ++--
 drivers/net/cassini.c                   |  8 +++---
 drivers/net/chelsio/sge.c               |  4 +--
 drivers/net/cpmac.c                     | 10 +++----
 drivers/net/e100.c                      |  6 ++--
 drivers/net/e1000/e1000_main.c          | 10 +++----
 drivers/net/e1000e/netdev.c             | 14 ++++-----
 drivers/net/ehea/ehea_main.c            |  8 +++---
 drivers/net/enic/enic_main.c            | 12 ++++----
 drivers/net/epic100.c                   |  6 ++--
 drivers/net/forcedeth.c                 | 10 +++----
 drivers/net/fs_enet/fs_enet-main.c      |  4 +--
 drivers/net/gianfar.c                   |  6 ++--
 drivers/net/ibmveth.c                   |  8 +++---
 drivers/net/igb/igb_main.c              | 12 ++++----
 drivers/net/ixgb/ixgb_main.c            |  6 ++--
 drivers/net/ixgbe/ixgbe_main.c          | 12 ++++----
 drivers/net/ixp2000/ixpdev.c            |  4 +--
 drivers/net/jme.h                       |  6 ++--
 drivers/net/korina.c                    |  4 +--
 drivers/net/macb.c                      | 10 +++----
 drivers/net/mlx4/en_rx.c                |  4 +--
 drivers/net/myri10ge/myri10ge.c         |  6 ++--
 drivers/net/natsemi.c                   |  6 ++--
 drivers/net/netxen/netxen_nic_main.c    |  2 +-
 drivers/net/niu.c                       |  6 ++--
 drivers/net/pasemi_mac.c                |  6 ++--
 drivers/net/pcnet32.c                   |  6 ++--
 drivers/net/qla3xxx.c                   |  6 ++--
 drivers/net/qlge/qlge_main.c            |  6 ++--
 drivers/net/r6040.c                     |  4 +--
 drivers/net/r8169.c                     |  6 ++--
 drivers/net/s2io.c                      |  8 +++---
 drivers/net/sb1250-mac.c                |  6 ++--
 drivers/net/sfc/efx.c                   |  4 +--
 drivers/net/sfc/efx.h                   |  2 +-
 drivers/net/skge.c                      |  6 ++--
 drivers/net/smsc911x.c                  |  8 +++---
 drivers/net/smsc9420.c                  |  4 +--
 drivers/net/spider_net.c                | 12 ++++----
 drivers/net/starfire.c                  |  6 ++--
 drivers/net/sungem.c                    |  6 ++--
 drivers/net/tc35815.c                   |  6 ++--
 drivers/net/tehuti.c                    |  6 ++--
 drivers/net/tg3.c                       | 14 ++++-----
 drivers/net/tsi108_eth.c                |  8 +++---
 drivers/net/tulip/interrupt.c           | 10 +++----
 drivers/net/typhoon.c                   |  6 ++--
 drivers/net/ucc_geth.c                  |  6 ++--
 drivers/net/via-rhine.c                 |  4 +--
 drivers/net/virtio_net.c                | 12 ++++----
 drivers/net/wan/hd64572.c               |  4 +--
 drivers/net/wan/ixp4xx_hss.c            | 12 ++++----
 drivers/net/xen-netfront.c              |  8 +++---
 include/linux/netdevice.h               | 50 ---------------------------------
 66 files changed, 227 insertions(+), 277 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c
index 5d139db1b771..53df9de23423 100644
--- a/drivers/infiniband/hw/nes/nes_hw.c
+++ b/drivers/infiniband/hw/nes/nes_hw.c
@@ -2541,7 +2541,7 @@ static void nes_nic_napi_ce_handler(struct nes_device *nesdev, struct nes_hw_nic
 {
 	struct nes_vnic *nesvnic = container_of(cq, struct nes_vnic, nic_cq);
 
-	netif_rx_schedule(&nesvnic->napi);
+	napi_schedule(&nesvnic->napi);
 }
 
 
diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c
index 57a47cf7e513..f5484ad1279b 100644
--- a/drivers/infiniband/hw/nes/nes_nic.c
+++ b/drivers/infiniband/hw/nes/nes_nic.c
@@ -111,7 +111,7 @@ static int nes_netdev_poll(struct napi_struct *napi, int budget)
 	nes_nic_ce_handler(nesdev, nescq);
 
 	if (nescq->cqes_pending == 0) {
-		netif_rx_complete(napi);
+		napi_complete(napi);
 		/* clear out completed cqes and arm */
 		nes_write32(nesdev->regs+NES_CQE_ALLOC, NES_CQE_ALLOC_NOTIFY_NEXT |
 				nescq->cq_number | (nescq->cqe_allocs_pending << 16));
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index a1925810be3c..da6082739839 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -446,11 +446,11 @@ poll_more:
 		if (dev->features & NETIF_F_LRO)
 			lro_flush_all(&priv->lro.lro_mgr);
 
-		netif_rx_complete(napi);
+		napi_complete(napi);
 		if (unlikely(ib_req_notify_cq(priv->recv_cq,
 					      IB_CQ_NEXT_COMP |
 					      IB_CQ_REPORT_MISSED_EVENTS)) &&
-		    netif_rx_reschedule(napi))
+		    napi_reschedule(napi))
 			goto poll_more;
 	}
 
@@ -462,7 +462,7 @@ void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr)
 	struct net_device *dev = dev_ptr;
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 
-	netif_rx_schedule(&priv->napi);
+	napi_schedule(&priv->napi);
 }
 
 static void drain_tx_cq(struct net_device *dev)
diff --git a/drivers/net/8139cp.c b/drivers/net/8139cp.c
index 4e19ae3ce6be..35517b06ec3f 100644
--- a/drivers/net/8139cp.c
+++ b/drivers/net/8139cp.c
@@ -604,7 +604,7 @@ rx_next:
 
 		spin_lock_irqsave(&cp->lock, flags);
 		cpw16_f(IntrMask, cp_intr_mask);
-		__netif_rx_complete(napi);
+		__napi_complete(napi);
 		spin_unlock_irqrestore(&cp->lock, flags);
 	}
 
@@ -641,9 +641,9 @@ static irqreturn_t cp_interrupt (int irq, void *dev_instance)
 	}
 
 	if (status & (RxOK | RxErr | RxEmpty | RxFIFOOvr))
-		if (netif_rx_schedule_prep(&cp->napi)) {
+		if (napi_schedule_prep(&cp->napi)) {
 			cpw16_f(IntrMask, cp_norx_intr_mask);
-			__netif_rx_schedule(&cp->napi);
+			__napi_schedule(&cp->napi);
 		}
 
 	if (status & (TxOK | TxErr | TxEmpty | SWInt))
diff --git a/drivers/net/8139too.c b/drivers/net/8139too.c
index a5b24202d564..5341da604e84 100644
--- a/drivers/net/8139too.c
+++ b/drivers/net/8139too.c
@@ -2128,7 +2128,7 @@ static int rtl8139_poll(struct napi_struct *napi, int budget)
 		 */
 		spin_lock_irqsave(&tp->lock, flags);
 		RTL_W16_F(IntrMask, rtl8139_intr_mask);
-		__netif_rx_complete(napi);
+		__napi_complete(napi);
 		spin_unlock_irqrestore(&tp->lock, flags);
 	}
 	spin_unlock(&tp->rx_lock);
@@ -2178,9 +2178,9 @@ static irqreturn_t rtl8139_interrupt (int irq, void *dev_instance)
 	/* Receive packets are processed by poll routine.
 	   If not running start it now. */
 	if (status & RxAckBits){
-		if (netif_rx_schedule_prep(&tp->napi)) {
+		if (napi_schedule_prep(&tp->napi)) {
 			RTL_W16_F (IntrMask, rtl8139_norx_intr_mask);
-			__netif_rx_schedule(&tp->napi);
+			__napi_schedule(&tp->napi);
 		}
 	}
 
diff --git a/drivers/net/amd8111e.c b/drivers/net/amd8111e.c
index 7709992bb6bf..cb9c95d3ed0a 100644
--- a/drivers/net/amd8111e.c
+++ b/drivers/net/amd8111e.c
@@ -831,7 +831,7 @@ static int amd8111e_rx_poll(struct napi_struct *napi, int budget)
 	if (rx_pkt_limit > 0) {
 		/* Receive descriptor is empty now */
 		spin_lock_irqsave(&lp->lock, flags);
-		__netif_rx_complete(napi);
+		__napi_complete(napi);
 		writel(VAL0|RINTEN0, mmio + INTEN0);
 		writel(VAL2 | RDMD0, mmio + CMD0);
 		spin_unlock_irqrestore(&lp->lock, flags);
@@ -1170,11 +1170,11 @@ static irqreturn_t amd8111e_interrupt(int irq, void *dev_id)
 
 	/* Check if Receive Interrupt has occurred. */
 	if (intr0 & RINT0) {
-		if (netif_rx_schedule_prep(&lp->napi)) {
+		if (napi_schedule_prep(&lp->napi)) {
 			/* Disable receive interupts */
 			writel(RINTEN0, mmio + INTEN0);
 			/* Schedule a polling routine */
-			__netif_rx_schedule(&lp->napi);
+			__napi_schedule(&lp->napi);
 		} else if (intren0 & RINTEN0) {
 			printk("************Driver bug! \
 				interrupt while in poll\n");
diff --git a/drivers/net/arm/ep93xx_eth.c b/drivers/net/arm/ep93xx_eth.c
index 3ec20cc18b0c..cc7708775da0 100644
--- a/drivers/net/arm/ep93xx_eth.c
+++ b/drivers/net/arm/ep93xx_eth.c
@@ -298,7 +298,7 @@ poll_some_more:
 		int more = 0;
 
 		spin_lock_irq(&ep->rx_lock);
-		__netif_rx_complete(napi);
+		__napi_complete(napi);
 		wrl(ep, REG_INTEN, REG_INTEN_TX | REG_INTEN_RX);
 		if (ep93xx_have_more_rx(ep)) {
 			wrl(ep, REG_INTEN, REG_INTEN_TX);
@@ -307,7 +307,7 @@ poll_some_more:
 		}
 		spin_unlock_irq(&ep->rx_lock);
 
-		if (more && netif_rx_reschedule(napi))
+		if (more && napi_reschedule(napi))
 			goto poll_some_more;
 	}
 
@@ -415,9 +415,9 @@ static irqreturn_t ep93xx_irq(int irq, void *dev_id)
 
 	if (status & REG_INTSTS_RX) {
 		spin_lock(&ep->rx_lock);
-		if (likely(netif_rx_schedule_prep(&ep->napi))) {
+		if (likely(napi_schedule_prep(&ep->napi))) {
 			wrl(ep, REG_INTEN, REG_INTEN_TX);
-			__netif_rx_schedule(&ep->napi);
+			__napi_schedule(&ep->napi);
 		}
 		spin_unlock(&ep->rx_lock);
 	}
diff --git a/drivers/net/arm/ixp4xx_eth.c b/drivers/net/arm/ixp4xx_eth.c
index 5fce1d5c1a1a..5fe17d5eaa54 100644
--- a/drivers/net/arm/ixp4xx_eth.c
+++ b/drivers/net/arm/ixp4xx_eth.c
@@ -473,7 +473,7 @@ static void eth_rx_irq(void *pdev)
 	printk(KERN_DEBUG "%s: eth_rx_irq\n", dev->name);
 #endif
 	qmgr_disable_irq(port->plat->rxq);
-	netif_rx_schedule(&port->napi);
+	napi_schedule(&port->napi);
 }
 
 static int eth_poll(struct napi_struct *napi, int budget)
@@ -498,16 +498,16 @@ static int eth_poll(struct napi_struct *napi, int budget)
 
 		if ((n = queue_get_desc(rxq, port, 0)) < 0) {
 #if DEBUG_RX
-			printk(KERN_DEBUG "%s: eth_poll netif_rx_complete\n",
+			printk(KERN_DEBUG "%s: eth_poll napi_complete\n",
 			       dev->name);
 #endif
-			netif_rx_complete(napi);
+			napi_complete(napi);
 			qmgr_enable_irq(rxq);
 			if (!qmgr_stat_empty(rxq) &&
-			    netif_rx_reschedule(napi)) {
+			    napi_reschedule(napi)) {
 #if DEBUG_RX
 				printk(KERN_DEBUG "%s: eth_poll"
-				       " netif_rx_reschedule successed\n",
+				       " napi_reschedule successed\n",
 				       dev->name);
 #endif
 				qmgr_disable_irq(rxq);
@@ -1036,7 +1036,7 @@ static int eth_open(struct net_device *dev)
 	}
 	ports_open++;
 	/* we may already have RX data, enables IRQ */
-	netif_rx_schedule(&port->napi);
+	napi_schedule(&port->napi);
 	return 0;
 }
 
diff --git a/drivers/net/atl1e/atl1e_main.c b/drivers/net/atl1e/atl1e_main.c
index bb9094d4cbc9..c758884728a5 100644
--- a/drivers/net/atl1e/atl1e_main.c
+++ b/drivers/net/atl1e/atl1e_main.c
@@ -1326,9 +1326,9 @@ static irqreturn_t atl1e_intr(int irq, void *data)
 			AT_WRITE_REG(hw, REG_IMR,
 				     IMR_NORMAL_MASK & ~ISR_RX_EVENT);
 			AT_WRITE_FLUSH(hw);
-			if (likely(netif_rx_schedule_prep(
+			if (likely(napi_schedule_prep(
 				   &adapter->napi)))
-				__netif_rx_schedule(&adapter->napi);
+				__napi_schedule(&adapter->napi);
 		}
 	} while (--max_ints > 0);
 	/* re-enable Interrupt*/
@@ -1514,7 +1514,7 @@ static int atl1e_clean(struct napi_struct *napi, int budget)
 	/* If no Tx and not enough Rx work done, exit the polling mode */
 	if (work_done < budget) {
 quit_polling:
-		netif_rx_complete(napi);
+		napi_complete(napi);
 		imr_data = AT_READ_REG(&adapter->hw, REG_IMR);
 		AT_WRITE_REG(&adapter->hw, REG_IMR, imr_data | ISR_RX_EVENT);
 		/* test debug */
diff --git a/drivers/net/b44.c b/drivers/net/b44.c
index c38512ebcea6..92aaaa1ee9f1 100644
--- a/drivers/net/b44.c
+++ b/drivers/net/b44.c
@@ -874,7 +874,7 @@ static int b44_poll(struct napi_struct *napi, int budget)
 	}
 
 	if (work_done < budget) {
-		netif_rx_complete(napi);
+		napi_complete(napi);
 		b44_enable_ints(bp);
 	}
 
@@ -906,13 +906,13 @@ static irqreturn_t b44_interrupt(int irq, void *dev_id)
 			goto irq_ack;
 		}
 
-		if (netif_rx_schedule_prep(&bp->napi)) {
+		if (napi_schedule_prep(&bp->napi)) {
 			/* NOTE: These writes are posted by the readback of
 			 *       the ISTAT register below.
 			 */
 			bp->istat = istat;
 			__b44_disable_ints(bp);
-			__netif_rx_schedule(&bp->napi);
+			__napi_schedule(&bp->napi);
 		} else {
 			printk(KERN_ERR PFX "%s: Error, poll already scheduled\n",
 			       dev->name);
diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c
index d4a3dac21dcf..e817802b2483 100644
--- a/drivers/net/bnx2.c
+++ b/drivers/net/bnx2.c
@@ -3053,7 +3053,7 @@ bnx2_msi(int irq, void *dev_instance)
 	if (unlikely(atomic_read(&bp->intr_sem) != 0))
 		return IRQ_HANDLED;
 
-	netif_rx_schedule(&bnapi->napi);
+	napi_schedule(&bnapi->napi);
 
 	return IRQ_HANDLED;
 }
@@ -3070,7 +3070,7 @@ bnx2_msi_1shot(int irq, void *dev_instance)
 	if (unlikely(atomic_read(&bp->intr_sem) != 0))
 		return IRQ_HANDLED;
 
-	netif_rx_schedule(&bnapi->napi);
+	napi_schedule(&bnapi->napi);
 
 	return IRQ_HANDLED;
 }
@@ -3106,9 +3106,9 @@ bnx2_interrupt(int irq, void *dev_instance)
 	if (unlikely(atomic_read(&bp->intr_sem) != 0))
 		return IRQ_HANDLED;
 
-	if (netif_rx_schedule_prep(&bnapi->napi)) {
+	if (napi_schedule_prep(&bnapi->napi)) {
 		bnapi->last_status_idx = sblk->status_idx;
-		__netif_rx_schedule(&bnapi->napi);
+		__napi_schedule(&bnapi->napi);
 	}
 
 	return IRQ_HANDLED;
@@ -3218,7 +3218,7 @@ static int bnx2_poll_msix(struct napi_struct *napi, int budget)
 		rmb();
 		if (likely(!bnx2_has_fast_work(bnapi))) {
 
-			netif_rx_complete(napi);
+			napi_complete(napi);
 			REG_WR(bp, BNX2_PCICFG_INT_ACK_CMD, bnapi->int_num |
 			       BNX2_PCICFG_INT_ACK_CMD_INDEX_VALID |
 			       bnapi->last_status_idx);
@@ -3251,7 +3251,7 @@ static int bnx2_poll(struct napi_struct *napi, int budget)
 
 		rmb();
 		if (likely(!bnx2_has_work(bnapi))) {
-			netif_rx_complete(napi);
+			napi_complete(napi);
 			if (likely(bp->flags & BNX2_FLAG_USING_MSI_OR_MSIX)) {
 				REG_WR(bp, BNX2_PCICFG_INT_ACK_CMD,
 				       BNX2_PCICFG_INT_ACK_CMD_INDEX_VALID |
diff --git a/drivers/net/bnx2x_main.c b/drivers/net/bnx2x_main.c
index 074374ff93f3..21764bfc048e 100644
--- a/drivers/net/bnx2x_main.c
+++ b/drivers/net/bnx2x_main.c
@@ -1647,7 +1647,7 @@ static irqreturn_t bnx2x_msix_fp_int(int irq, void *fp_cookie)
 	prefetch(&fp->status_blk->c_status_block.status_block_index);
 	prefetch(&fp->status_blk->u_status_block.status_block_index);
 
-	netif_rx_schedule(&bnx2x_fp(bp, index, napi));
+	napi_schedule(&bnx2x_fp(bp, index, napi));
 
 	return IRQ_HANDLED;
 }
@@ -1686,7 +1686,7 @@ static irqreturn_t bnx2x_interrupt(int irq, void *dev_instance)
 		prefetch(&fp->status_blk->c_status_block.status_block_index);
 		prefetch(&fp->status_blk->u_status_block.status_block_index);
 
-		netif_rx_schedule(&bnx2x_fp(bp, 0, napi));
+		napi_schedule(&bnx2x_fp(bp, 0, napi));
 
 		status &= ~mask;
 	}
@@ -9339,7 +9339,7 @@ static int bnx2x_poll(struct napi_struct *napi, int budget)
 #ifdef BNX2X_STOP_ON_ERROR
 poll_panic:
 #endif
-		netif_rx_complete(napi);
+		napi_complete(napi);
 
 		bnx2x_ack_sb(bp, FP_SB_ID(fp), USTORM_ID,
 			     le16_to_cpu(fp->fp_u_idx), IGU_INT_NOP, 1);
diff --git a/drivers/net/cassini.c b/drivers/net/cassini.c
index 840b3d1a22f5..bb46be275339 100644
--- a/drivers/net/cassini.c
+++ b/drivers/net/cassini.c
@@ -2506,7 +2506,7 @@ static irqreturn_t cas_interruptN(int irq, void *dev_id)
 	if (status & INTR_RX_DONE_ALT) { /* handle rx separately */
 #ifdef USE_NAPI
 		cas_mask_intr(cp);
-		netif_rx_schedule(&cp->napi);
+		napi_schedule(&cp->napi);
 #else
 		cas_rx_ringN(cp, ring, 0);
 #endif
@@ -2557,7 +2557,7 @@ static irqreturn_t cas_interrupt1(int irq, void *dev_id)
 	if (status & INTR_RX_DONE_ALT) { /* handle rx separately */
 #ifdef USE_NAPI
 		cas_mask_intr(cp);
-		netif_rx_schedule(&cp->napi);
+		napi_schedule(&cp->napi);
 #else
 		cas_rx_ringN(cp, 1, 0);
 #endif
@@ -2613,7 +2613,7 @@ static irqreturn_t cas_interrupt(int irq, void *dev_id)
 	if (status & INTR_RX_DONE) {
 #ifdef USE_NAPI
 		cas_mask_intr(cp);
-		netif_rx_schedule(&cp->napi);
+		napi_schedule(&cp->napi);
 #else
 		cas_rx_ringN(cp, 0, 0);
 #endif
@@ -2691,7 +2691,7 @@ rx_comp:
 #endif
 	spin_unlock_irqrestore(&cp->lock, flags);
 	if (enable_intr) {
-		netif_rx_complete(napi);
+		napi_complete(napi);
 		cas_unmask_intr(cp);
 	}
 	return credits;
diff --git a/drivers/net/chelsio/sge.c b/drivers/net/chelsio/sge.c
index d984b7995763..840da83fb3cf 100644
--- a/drivers/net/chelsio/sge.c
+++ b/drivers/net/chelsio/sge.c
@@ -1612,7 +1612,7 @@ int t1_poll(struct napi_struct *napi, int budget)
 	int work_done = process_responses(adapter, budget);
 
 	if (likely(work_done < budget)) {
-		netif_rx_complete(napi);
+		napi_complete(napi);
 		writel(adapter->sge->respQ.cidx,
 		       adapter->regs + A_SG_SLEEPING);
 	}
@@ -1630,7 +1630,7 @@ irqreturn_t t1_interrupt(int irq, void *data)
 
 		if (napi_schedule_prep(&adapter->napi)) {
 			if (process_pure_responses(adapter))
-				__netif_rx_schedule(&adapter->napi);
+				__napi_schedule(&adapter->napi);
 			else {
 				/* no data, no NAPI needed */
 				writel(sge->respQ.cidx, adapter->regs + A_SG_SLEEPING);
diff --git a/drivers/net/cpmac.c b/drivers/net/cpmac.c
index f66548751c38..4dad04e91f6d 100644
--- a/drivers/net/cpmac.c
+++ b/drivers/net/cpmac.c
@@ -428,7 +428,7 @@ static int cpmac_poll(struct napi_struct *napi, int budget)
 			printk(KERN_WARNING "%s: rx: polling, but no queue\n",
 			       priv->dev->name);
 		spin_unlock(&priv->rx_lock);
-		netif_rx_complete(napi);
+		napi_complete(napi);
 		return 0;
 	}
 
@@ -514,7 +514,7 @@ static int cpmac_poll(struct napi_struct *napi, int budget)
 	if (processed == 0) {
 		/* we ran out of packets to read,
 		 * revert to interrupt-driven mode */
-		netif_rx_complete(napi);
+		napi_complete(napi);
 		cpmac_write(priv->regs, CPMAC_RX_INT_ENABLE, 1);
 		return 0;
 	}
@@ -536,7 +536,7 @@ fatal_error:
 	}
 
 	spin_unlock(&priv->rx_lock);
-	netif_rx_complete(napi);
+	napi_complete(napi);
 	netif_tx_stop_all_queues(priv->dev);
 	napi_disable(&priv->napi);
 
@@ -802,9 +802,9 @@ static irqreturn_t cpmac_irq(int irq, void *dev_id)
 
 	if (status & MAC_INT_RX) {
 		queue = (status >> 8) & 7;
-		if (netif_rx_schedule_prep(&priv->napi)) {
+		if (napi_schedule_prep(&priv->napi)) {
 			cpmac_write(priv->regs, CPMAC_RX_INT_CLEAR, 1 << queue);
-			__netif_rx_schedule(&priv->napi);
+			__napi_schedule(&priv->napi);
 		}
 	}
 
diff --git a/drivers/net/e100.c b/drivers/net/e100.c
index 86bb876fb123..861d2eeaa43c 100644
--- a/drivers/net/e100.c
+++ b/drivers/net/e100.c
@@ -1944,9 +1944,9 @@ static irqreturn_t e100_intr(int irq, void *dev_id)
 	if (stat_ack & stat_ack_rnr)
 		nic->ru_running = RU_SUSPENDED;
 
-	if (likely(netif_rx_schedule_prep(&nic->napi))) {
+	if (likely(napi_schedule_prep(&nic->napi))) {
 		e100_disable_irq(nic);
-		__netif_rx_schedule(&nic->napi);
+		__napi_schedule(&nic->napi);
 	}
 
 	return IRQ_HANDLED;
@@ -1962,7 +1962,7 @@ static int e100_poll(struct napi_struct *napi, int budget)
 
 	/* If budget not fully consumed, exit the polling mode */
 	if (work_done < budget) {
-		netif_rx_complete(napi);
+		napi_complete(napi);
 		e100_enable_irq(nic);
 	}
 
diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c
index 26474c92193f..ffe466e0afb9 100644
--- a/drivers/net/e1000/e1000_main.c
+++ b/drivers/net/e1000/e1000_main.c
@@ -3687,12 +3687,12 @@ static irqreturn_t e1000_intr_msi(int irq, void *data)
 			mod_timer(&adapter->watchdog_timer, jiffies + 1);
 	}
 
-	if (likely(netif_rx_schedule_prep(&adapter->napi))) {
+	if (likely(napi_schedule_prep(&adapter->napi))) {
 		adapter->total_tx_bytes = 0;
 		adapter->total_tx_packets = 0;
 		adapter->total_rx_bytes = 0;
 		adapter->total_rx_packets = 0;
-		__netif_rx_schedule(&adapter->napi);
+		__napi_schedule(&adapter->napi);
 	} else
 		e1000_irq_enable(adapter);
 
@@ -3747,12 +3747,12 @@ static irqreturn_t e1000_intr(int irq, void *data)
 		ew32(IMC, ~0);
 		E1000_WRITE_FLUSH();
 	}
-	if (likely(netif_rx_schedule_prep(&adapter->napi))) {
+	if (likely(napi_schedule_prep(&adapter->napi))) {
 		adapter->total_tx_bytes = 0;
 		adapter->total_tx_packets = 0;
 		adapter->total_rx_bytes = 0;
 		adapter->total_rx_packets = 0;
-		__netif_rx_schedule(&adapter->napi);
+		__napi_schedule(&adapter->napi);
 	} else
 		/* this really should not happen! if it does it is basically a
 		 * bug, but not a hard error, so enable ints and continue */
@@ -3793,7 +3793,7 @@ static int e1000_clean(struct napi_struct *napi, int budget)
 	if (work_done < budget) {
 		if (likely(adapter->itr_setting & 3))
 			e1000_set_itr(adapter);
-		netif_rx_complete(napi);
+		napi_complete(napi);
 		e1000_irq_enable(adapter);
 	}
 
diff --git a/drivers/net/e1000e/netdev.c b/drivers/net/e1000e/netdev.c
index 91817d0afcaf..ff5b66adfc42 100644
--- a/drivers/net/e1000e/netdev.c
+++ b/drivers/net/e1000e/netdev.c
@@ -1179,12 +1179,12 @@ static irqreturn_t e1000_intr_msi(int irq, void *data)
 			mod_timer(&adapter->watchdog_timer, jiffies + 1);
 	}
 
-	if (netif_rx_schedule_prep(&adapter->napi)) {
+	if (napi_schedule_prep(&adapter->napi)) {
 		adapter->total_tx_bytes = 0;
 		adapter->total_tx_packets = 0;
 		adapter->total_rx_bytes = 0;
 		adapter->total_rx_packets = 0;
-		__netif_rx_schedule(&adapter->napi);
+		__napi_schedule(&adapter->napi);
 	}
 
 	return IRQ_HANDLED;
@@ -1246,12 +1246,12 @@ static irqreturn_t e1000_intr(int irq, void *data)
 			mod_timer(&adapter->watchdog_timer, jiffies + 1);
 	}
 
-	if (netif_rx_schedule_prep(&adapter->napi)) {
+	if (napi_schedule_prep(&adapter->napi)) {
 		adapter->total_tx_bytes = 0;
 		adapter->total_tx_packets = 0;
 		adapter->total_rx_bytes = 0;
 		adapter->total_rx_packets = 0;
-		__netif_rx_schedule(&adapter->napi);
+		__napi_schedule(&adapter->napi);
 	}
 
 	return IRQ_HANDLED;
@@ -1320,10 +1320,10 @@ static irqreturn_t e1000_intr_msix_rx(int irq, void *data)
 		adapter->rx_ring->set_itr = 0;
 	}
 
-	if (netif_rx_schedule_prep(&adapter->napi)) {
+	if (napi_schedule_prep(&adapter->napi)) {
 		adapter->total_rx_bytes = 0;
 		adapter->total_rx_packets = 0;
-		__netif_rx_schedule(&adapter->napi);
+		__napi_schedule(&adapter->napi);
 	}
 	return IRQ_HANDLED;
 }
@@ -2028,7 +2028,7 @@ clean_rx:
 	if (work_done < budget) {
 		if (adapter->itr_setting & 3)
 			e1000_set_itr(adapter);
-		netif_rx_complete(napi);
+		napi_complete(napi);
 		if (adapter->msix_entries)
 			ew32(IMS, adapter->rx_ring->ims_val);
 		else
diff --git a/drivers/net/ehea/ehea_main.c b/drivers/net/ehea/ehea_main.c
index dfe92264e825..8dc2047da5c0 100644
--- a/drivers/net/ehea/ehea_main.c
+++ b/drivers/net/ehea/ehea_main.c
@@ -830,7 +830,7 @@ static int ehea_poll(struct napi_struct *napi, int budget)
 	while ((rx != budget) || force_irq) {
 		pr->poll_counter = 0;
 		force_irq = 0;
-		netif_rx_complete(napi);
+		napi_complete(napi);
 		ehea_reset_cq_ep(pr->recv_cq);
 		ehea_reset_cq_ep(pr->send_cq);
 		ehea_reset_cq_n1(pr->recv_cq);
@@ -841,7 +841,7 @@ static int ehea_poll(struct napi_struct *napi, int budget)
 		if (!cqe && !cqe_skb)
 			return rx;
 
-		if (!netif_rx_reschedule(napi))
+		if (!napi_reschedule(napi))
 			return rx;
 
 		cqe_skb = ehea_proc_cqes(pr, EHEA_POLL_MAX_CQES);
@@ -859,7 +859,7 @@ static void ehea_netpoll(struct net_device *dev)
 	int i;
 
 	for (i = 0; i < port->num_def_qps; i++)
-		netif_rx_schedule(&port->port_res[i].napi);
+		napi_schedule(&port->port_res[i].napi);
 }
 #endif
 
@@ -867,7 +867,7 @@ static irqreturn_t ehea_recv_irq_handler(int irq, void *param)
 {
 	struct ehea_port_res *pr = param;
 
-	netif_rx_schedule(&pr->napi);
+	napi_schedule(&pr->napi);
 
 	return IRQ_HANDLED;
 }
diff --git a/drivers/net/enic/enic_main.c b/drivers/net/enic/enic_main.c
index 7d60551d538f..4617956821cd 100644
--- a/drivers/net/enic/enic_main.c
+++ b/drivers/net/enic/enic_main.c
@@ -411,8 +411,8 @@ static irqreturn_t enic_isr_legacy(int irq, void *data)
 	}
 
 	if (ENIC_TEST_INTR(pba, ENIC_INTX_WQ_RQ)) {
-		if (netif_rx_schedule_prep(&enic->napi))
-			__netif_rx_schedule(&enic->napi);
+		if (napi_schedule_prep(&enic->napi))
+			__napi_schedule(&enic->napi);
 	} else {
 		vnic_intr_unmask(&enic->intr[ENIC_INTX_WQ_RQ]);
 	}
@@ -440,7 +440,7 @@ static irqreturn_t enic_isr_msi(int irq, void *data)
 	 * writes).
 	 */
 
-	netif_rx_schedule(&enic->napi);
+	napi_schedule(&enic->napi);
 
 	return IRQ_HANDLED;
 }
@@ -450,7 +450,7 @@ static irqreturn_t enic_isr_msix_rq(int irq, void *data)
 	struct enic *enic = data;
 
 	/* schedule NAPI polling for RQ cleanup */
-	netif_rx_schedule(&enic->napi);
+	napi_schedule(&enic->napi);
 
 	return IRQ_HANDLED;
 }
@@ -1068,7 +1068,7 @@ static int enic_poll(struct napi_struct *napi, int budget)
 		if (netdev->features & NETIF_F_LRO)
 			lro_flush_all(&enic->lro_mgr);
 
-		netif_rx_complete(napi);
+		napi_complete(napi);
 		vnic_intr_unmask(&enic->intr[ENIC_MSIX_RQ]);
 	}
 
@@ -1112,7 +1112,7 @@ static int enic_poll_msix(struct napi_struct *napi, int budget)
 		if (netdev->features & NETIF_F_LRO)
 			lro_flush_all(&enic->lro_mgr);
 
-		netif_rx_complete(napi);
+		napi_complete(napi);
 		vnic_intr_unmask(&enic->intr[ENIC_MSIX_RQ]);
 	}
 
diff --git a/drivers/net/epic100.c b/drivers/net/epic100.c
index a539bc3163cf..b60e27dfcfa7 100644
--- a/drivers/net/epic100.c
+++ b/drivers/net/epic100.c
@@ -1114,9 +1114,9 @@ static irqreturn_t epic_interrupt(int irq, void *dev_instance)
 
 	if ((status & EpicNapiEvent) && !ep->reschedule_in_poll) {
 		spin_lock(&ep->napi_lock);
-		if (netif_rx_schedule_prep(&ep->napi)) {
+		if (napi_schedule_prep(&ep->napi)) {
 			epic_napi_irq_off(dev, ep);
-			__netif_rx_schedule(&ep->napi);
+			__napi_schedule(&ep->napi);
 		} else
 			ep->reschedule_in_poll++;
 		spin_unlock(&ep->napi_lock);
@@ -1293,7 +1293,7 @@ rx_action:
 
 		more = ep->reschedule_in_poll;
 		if (!more) {
-			__netif_rx_complete(napi);
+			__napi_complete(napi);
 			outl(EpicNapiEvent, ioaddr + INTSTAT);
 			epic_napi_irq_on(dev, ep);
 		} else
diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c
index 5b910cf63740..875509d7d86b 100644
--- a/drivers/net/forcedeth.c
+++ b/drivers/net/forcedeth.c
@@ -1760,7 +1760,7 @@ static void nv_do_rx_refill(unsigned long data)
 	struct fe_priv *np = netdev_priv(dev);
 
 	/* Just reschedule NAPI rx processing */
-	netif_rx_schedule(&np->napi);
+	napi_schedule(&np->napi);
 }
 #else
 static void nv_do_rx_refill(unsigned long data)
@@ -3406,7 +3406,7 @@ static irqreturn_t nv_nic_irq(int foo, void *data)
 #ifdef CONFIG_FORCEDETH_NAPI
 		if (events & NVREG_IRQ_RX_ALL) {
 			spin_lock(&np->lock);
-			netif_rx_schedule(&np->napi);
+			napi_schedule(&np->napi);
 
 			/* Disable furthur receive irq's */
 			np->irqmask &= ~NVREG_IRQ_RX_ALL;
@@ -3523,7 +3523,7 @@ static irqreturn_t nv_nic_irq_optimized(int foo, void *data)
 #ifdef CONFIG_FORCEDETH_NAPI
 		if (events & NVREG_IRQ_RX_ALL) {
 			spin_lock(&np->lock);
-			netif_rx_schedule(&np->napi);
+			napi_schedule(&np->napi);
 
 			/* Disable furthur receive irq's */
 			np->irqmask &= ~NVREG_IRQ_RX_ALL;
@@ -3680,7 +3680,7 @@ static int nv_napi_poll(struct napi_struct *napi, int budget)
 		/* re-enable receive interrupts */
 		spin_lock_irqsave(&np->lock, flags);
 
-		__netif_rx_complete(napi);
+		__napi_complete(napi);
 
 		np->irqmask |= NVREG_IRQ_RX_ALL;
 		if (np->msi_flags & NV_MSI_X_ENABLED)
@@ -3706,7 +3706,7 @@ static irqreturn_t nv_nic_irq_rx(int foo, void *data)
 	writel(NVREG_IRQ_RX_ALL, base + NvRegMSIXIrqStatus);
 
 	if (events) {
-		netif_rx_schedule(&np->napi);
+		napi_schedule(&np->napi);
 		/* disable receive interrupts on the nic */
 		writel(NVREG_IRQ_RX_ALL, base + NvRegIrqMask);
 		pci_push(base);
diff --git a/drivers/net/fs_enet/fs_enet-main.c b/drivers/net/fs_enet/fs_enet-main.c
index ce900e54d8d1..b037ce9857bf 100644
--- a/drivers/net/fs_enet/fs_enet-main.c
+++ b/drivers/net/fs_enet/fs_enet-main.c
@@ -209,7 +209,7 @@ static int fs_enet_rx_napi(struct napi_struct *napi, int budget)
 
 	if (received < budget) {
 		/* done */
-		netif_rx_complete(napi);
+		napi_complete(napi);
 		(*fep->ops->napi_enable_rx)(dev);
 	}
 	return received;
@@ -478,7 +478,7 @@ fs_enet_interrupt(int irq, void *dev_id)
 				/* NOTE: it is possible for FCCs in NAPI mode    */
 				/* to submit a spurious interrupt while in poll  */
 				if (napi_ok)
-					__netif_rx_schedule(&fep->napi);
+					__napi_schedule(&fep->napi);
 			}
 		}
 
diff --git a/drivers/net/gianfar.c b/drivers/net/gianfar.c
index ea530673236e..2e76699f8104 100644
--- a/drivers/net/gianfar.c
+++ b/drivers/net/gianfar.c
@@ -1627,9 +1627,9 @@ static void gfar_schedule_cleanup(struct net_device *dev)
 	spin_lock_irqsave(&priv->txlock, flags);
 	spin_lock(&priv->rxlock);
 
-	if (netif_rx_schedule_prep(&priv->napi)) {
+	if (napi_schedule_prep(&priv->napi)) {
 		gfar_write(&priv->regs->imask, IMASK_RTX_DISABLED);
-		__netif_rx_schedule(&priv->napi);
+		__napi_schedule(&priv->napi);
 	}
 
 	spin_unlock(&priv->rxlock);
@@ -1886,7 +1886,7 @@ static int gfar_poll(struct napi_struct *napi, int budget)
 		return budget;
 
 	if (rx_cleaned < budget) {
-		netif_rx_complete(napi);
+		napi_complete(napi);
 
 		/* Clear the halt bit in RSTAT */
 		gfar_write(&priv->regs->rstat, RSTAT_CLEAR_RHALT);
diff --git a/drivers/net/ibmveth.c b/drivers/net/ibmveth.c
index dfa6348ac1dc..5c6315df86b9 100644
--- a/drivers/net/ibmveth.c
+++ b/drivers/net/ibmveth.c
@@ -1028,10 +1028,10 @@ static int ibmveth_poll(struct napi_struct *napi, int budget)
 
 		ibmveth_assert(lpar_rc == H_SUCCESS);
 
-		netif_rx_complete(napi);
+		napi_complete(napi);
 
 		if (ibmveth_rxq_pending_buffer(adapter) &&
-		    netif_rx_reschedule(napi)) {
+		    napi_reschedule(napi)) {
 			lpar_rc = h_vio_signal(adapter->vdev->unit_address,
 					       VIO_IRQ_DISABLE);
 			goto restart_poll;
@@ -1047,11 +1047,11 @@ static irqreturn_t ibmveth_interrupt(int irq, void *dev_instance)
 	struct ibmveth_adapter *adapter = netdev_priv(netdev);
 	unsigned long lpar_rc;
 
-	if (netif_rx_schedule_prep(&adapter->napi)) {
+	if (napi_schedule_prep(&adapter->napi)) {
 		lpar_rc = h_vio_signal(adapter->vdev->unit_address,
 				       VIO_IRQ_DISABLE);
 		ibmveth_assert(lpar_rc == H_SUCCESS);
-		__netif_rx_schedule(&adapter->napi);
+		__napi_schedule(&adapter->napi);
 	}
 	return IRQ_HANDLED;
 }
diff --git a/drivers/net/igb/igb_main.c b/drivers/net/igb/igb_main.c
index b82b0fb2056c..3806bb9d8bfa 100644
--- a/drivers/net/igb/igb_main.c
+++ b/drivers/net/igb/igb_main.c
@@ -3386,8 +3386,8 @@ static irqreturn_t igb_msix_rx(int irq, void *data)
 
 	igb_write_itr(rx_ring);
 
-	if (netif_rx_schedule_prep(&rx_ring->napi))
-		__netif_rx_schedule(&rx_ring->napi);
+	if (napi_schedule_prep(&rx_ring->napi))
+		__napi_schedule(&rx_ring->napi);
 
 #ifdef CONFIG_IGB_DCA
 	if (rx_ring->adapter->flags & IGB_FLAG_DCA_ENABLED)
@@ -3539,7 +3539,7 @@ static irqreturn_t igb_intr_msi(int irq, void *data)
 			mod_timer(&adapter->watchdog_timer, jiffies + 1);
 	}
 
-	netif_rx_schedule(&adapter->rx_ring[0].napi);
+	napi_schedule(&adapter->rx_ring[0].napi);
 
 	return IRQ_HANDLED;
 }
@@ -3577,7 +3577,7 @@ static irqreturn_t igb_intr(int irq, void *data)
 			mod_timer(&adapter->watchdog_timer, jiffies + 1);
 	}
 
-	netif_rx_schedule(&adapter->rx_ring[0].napi);
+	napi_schedule(&adapter->rx_ring[0].napi);
 
 	return IRQ_HANDLED;
 }
@@ -3612,7 +3612,7 @@ static int igb_poll(struct napi_struct *napi, int budget)
 	    !netif_running(netdev)) {
 		if (adapter->itr_setting & 3)
 			igb_set_itr(adapter);
-		netif_rx_complete(napi);
+		napi_complete(napi);
 		if (!test_bit(__IGB_DOWN, &adapter->state))
 			igb_irq_enable(adapter);
 		return 0;
@@ -3638,7 +3638,7 @@ static int igb_clean_rx_ring_msix(struct napi_struct *napi, int budget)
 
 	/* If not enough Rx work done, exit the polling mode */
 	if ((work_done == 0) || !netif_running(netdev)) {
-		netif_rx_complete(napi);
+		napi_complete(napi);
 
 		if (adapter->itr_setting & 3) {
 			if (adapter->num_rx_queues == 1)
diff --git a/drivers/net/ixgb/ixgb_main.c b/drivers/net/ixgb/ixgb_main.c
index eee28d395682..e2ef16b29700 100644
--- a/drivers/net/ixgb/ixgb_main.c
+++ b/drivers/net/ixgb/ixgb_main.c
@@ -1721,14 +1721,14 @@ ixgb_intr(int irq, void *data)
 		if (!test_bit(__IXGB_DOWN, &adapter->flags))
 			mod_timer(&adapter->watchdog_timer, jiffies);
 
-	if (netif_rx_schedule_prep(&adapter->napi)) {
+	if (napi_schedule_prep(&adapter->napi)) {
 
 		/* Disable interrupts and register for poll. The flush
 		  of the posted write is intentionally left out.
 		*/
 
 		IXGB_WRITE_REG(&adapter->hw, IMC, ~0);
-		__netif_rx_schedule(&adapter->napi);
+		__napi_schedule(&adapter->napi);
 	}
 	return IRQ_HANDLED;
 }
@@ -1749,7 +1749,7 @@ ixgb_clean(struct napi_struct *napi, int budget)
 
 	/* If budget not fully consumed, exit the polling mode */
 	if (work_done < budget) {
-		netif_rx_complete(napi);
+		napi_complete(napi);
 		if (!test_bit(__IXGB_DOWN, &adapter->flags))
 			ixgb_irq_enable(adapter);
 	}
diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
index d2f4d5f508b7..7489094bbbc8 100644
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -1015,7 +1015,7 @@ static irqreturn_t ixgbe_msix_clean_rx(int irq, void *data)
 	rx_ring = &(adapter->rx_ring[r_idx]);
 	/* disable interrupts on this vector only */
 	IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, rx_ring->v_idx);
-	netif_rx_schedule(&q_vector->napi);
+	napi_schedule(&q_vector->napi);
 
 	return IRQ_HANDLED;
 }
@@ -1056,7 +1056,7 @@ static int ixgbe_clean_rxonly(struct napi_struct *napi, int budget)
 
 	/* If all Rx work done, exit the polling mode */
 	if (work_done < budget) {
-		netif_rx_complete(napi);
+		napi_complete(napi);
 		if (adapter->itr_setting & 3)
 			ixgbe_set_itr_msix(q_vector);
 		if (!test_bit(__IXGBE_DOWN, &adapter->state))
@@ -1105,7 +1105,7 @@ static int ixgbe_clean_rxonly_many(struct napi_struct *napi, int budget)
 	rx_ring = &(adapter->rx_ring[r_idx]);
 	/* If all Rx work done, exit the polling mode */
 	if (work_done < budget) {
-		netif_rx_complete(napi);
+		napi_complete(napi);
 		if (adapter->itr_setting & 3)
 			ixgbe_set_itr_msix(q_vector);
 		if (!test_bit(__IXGBE_DOWN, &adapter->state))
@@ -1381,13 +1381,13 @@ static irqreturn_t ixgbe_intr(int irq, void *data)
 
 	ixgbe_check_fan_failure(adapter, eicr);
 
-	if (netif_rx_schedule_prep(&adapter->q_vector[0].napi)) {
+	if (napi_schedule_prep(&adapter->q_vector[0].napi)) {
 		adapter->tx_ring[0].total_packets = 0;
 		adapter->tx_ring[0].total_bytes = 0;
 		adapter->rx_ring[0].total_packets = 0;
 		adapter->rx_ring[0].total_bytes = 0;
 		/* would disable interrupts here but EIAM disabled it */
-		__netif_rx_schedule(&adapter->q_vector[0].napi);
+		__napi_schedule(&adapter->q_vector[0].napi);
 	}
 
 	return IRQ_HANDLED;
@@ -2317,7 +2317,7 @@ static int ixgbe_poll(struct napi_struct *napi, int budget)
 
 	/* If budget not fully consumed, exit the polling mode */
 	if (work_done < budget) {
-		netif_rx_complete(napi);
+		napi_complete(napi);
 		if (adapter->itr_setting & 3)
 			ixgbe_set_itr(adapter);
 		if (!test_bit(__IXGBE_DOWN, &adapter->state))
diff --git a/drivers/net/ixp2000/ixpdev.c b/drivers/net/ixp2000/ixpdev.c
index 014745720560..d3bf2f017cc2 100644
--- a/drivers/net/ixp2000/ixpdev.c
+++ b/drivers/net/ixp2000/ixpdev.c
@@ -141,7 +141,7 @@ static int ixpdev_poll(struct napi_struct *napi, int budget)
 			break;
 	} while (ixp2000_reg_read(IXP2000_IRQ_THD_RAW_STATUS_A_0) & 0x00ff);
 
-	netif_rx_complete(napi);
+	napi_complete(napi);
 	ixp2000_reg_write(IXP2000_IRQ_THD_ENABLE_SET_A_0, 0x00ff);
 
 	return rx;
@@ -204,7 +204,7 @@ static irqreturn_t ixpdev_interrupt(int irq, void *dev_id)
 
 		ixp2000_reg_wrb(IXP2000_IRQ_THD_ENABLE_CLEAR_A_0, 0x00ff);
 		if (likely(napi_schedule_prep(&ip->napi))) {
-			__netif_rx_schedule(&ip->napi);
+			__napi_schedule(&ip->napi);
 		} else {
 			printk(KERN_CRIT "ixp2000: irq while polling!!\n");
 		}
diff --git a/drivers/net/jme.h b/drivers/net/jme.h
index 5154411b5e6b..e321c678b11c 100644
--- a/drivers/net/jme.h
+++ b/drivers/net/jme.h
@@ -398,15 +398,15 @@ struct jme_ring {
 #define JME_NAPI_WEIGHT(w) int w
 #define JME_NAPI_WEIGHT_VAL(w) w
 #define JME_NAPI_WEIGHT_SET(w, r)
-#define JME_RX_COMPLETE(dev, napis) netif_rx_complete(napis)
+#define JME_RX_COMPLETE(dev, napis) napi_complete(napis)
 #define JME_NAPI_ENABLE(priv) napi_enable(&priv->napi);
 #define JME_NAPI_DISABLE(priv) \
 	if (!napi_disable_pending(&priv->napi)) \
 		napi_disable(&priv->napi);
 #define JME_RX_SCHEDULE_PREP(priv) \
-	netif_rx_schedule_prep(&priv->napi)
+	napi_schedule_prep(&priv->napi)
 #define JME_RX_SCHEDULE(priv) \
-	__netif_rx_schedule(&priv->napi);
+	__napi_schedule(&priv->napi);
 
 /*
  * Jmac Adapter Private data
diff --git a/drivers/net/korina.c b/drivers/net/korina.c
index 75010cac76ac..38d6649a29c4 100644
--- a/drivers/net/korina.c
+++ b/drivers/net/korina.c
@@ -334,7 +334,7 @@ static irqreturn_t korina_rx_dma_interrupt(int irq, void *dev_id)
 				DMA_STAT_HALT | DMA_STAT_ERR),
 				&lp->rx_dma_regs->dmasm);
 
-		netif_rx_schedule(&lp->napi);
+		napi_schedule(&lp->napi);
 
 		if (dmas & DMA_STAT_ERR)
 			printk(KERN_ERR DRV_NAME "%s: DMA error\n", dev->name);
@@ -468,7 +468,7 @@ static int korina_poll(struct napi_struct *napi, int budget)
 
 	work_done = korina_rx(dev, budget);
 	if (work_done < budget) {
-		netif_rx_complete(napi);
+		napi_complete(napi);
 
 		writel(readl(&lp->rx_dma_regs->dmasm) &
 			~(DMA_STAT_DONE | DMA_STAT_HALT | DMA_STAT_ERR),
diff --git a/drivers/net/macb.c b/drivers/net/macb.c
index f6c4936e2fa8..dc33d51213d7 100644
--- a/drivers/net/macb.c
+++ b/drivers/net/macb.c
@@ -527,7 +527,7 @@ static int macb_poll(struct napi_struct *napi, int budget)
 		 * this function was called last time, and no packets
 		 * have been received since.
 		 */
-		netif_rx_complete(napi);
+		napi_complete(napi);
 		goto out;
 	}
 
@@ -538,13 +538,13 @@ static int macb_poll(struct napi_struct *napi, int budget)
 		dev_warn(&bp->pdev->dev,
 			 "No RX buffers complete, status = %02lx\n",
 			 (unsigned long)status);
-		netif_rx_complete(napi);
+		napi_complete(napi);
 		goto out;
 	}
 
 	work_done = macb_rx(bp, budget);
 	if (work_done < budget)
-		netif_rx_complete(napi);
+		napi_complete(napi);
 
 	/*
 	 * We've done what we can to clean the buffers. Make sure we
@@ -579,7 +579,7 @@ static irqreturn_t macb_interrupt(int irq, void *dev_id)
 		}
 
 		if (status & MACB_RX_INT_FLAGS) {
-			if (netif_rx_schedule_prep(&bp->napi)) {
+			if (napi_schedule_prep(&bp->napi)) {
 				/*
 				 * There's no point taking any more interrupts
 				 * until we have processed the buffers
@@ -587,7 +587,7 @@ static irqreturn_t macb_interrupt(int irq, void *dev_id)
 				macb_writel(bp, IDR, MACB_RX_INT_FLAGS);
 				dev_dbg(&bp->pdev->dev,
 					"scheduling RX softirq\n");
-				__netif_rx_schedule(&bp->napi);
+				__napi_schedule(&bp->napi);
 			}
 		}
 
diff --git a/drivers/net/mlx4/en_rx.c b/drivers/net/mlx4/en_rx.c
index c61b0bdca1a4..ac55ebd2f146 100644
--- a/drivers/net/mlx4/en_rx.c
+++ b/drivers/net/mlx4/en_rx.c
@@ -814,7 +814,7 @@ void mlx4_en_rx_irq(struct mlx4_cq *mcq)
 	struct mlx4_en_priv *priv = netdev_priv(cq->dev);
 
 	if (priv->port_up)
-		netif_rx_schedule(&cq->napi);
+		napi_schedule(&cq->napi);
 	else
 		mlx4_en_arm_cq(priv, cq);
 }
@@ -834,7 +834,7 @@ int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget)
 		INC_PERF_COUNTER(priv->pstats.napi_quota);
 	else {
 		/* Done for now */
-		netif_rx_complete(napi);
+		napi_complete(napi);
 		mlx4_en_arm_cq(priv, cq);
 	}
 	return done;
diff --git a/drivers/net/myri10ge/myri10ge.c b/drivers/net/myri10ge/myri10ge.c
index e9c1296b267e..2dacb8852dc3 100644
--- a/drivers/net/myri10ge/myri10ge.c
+++ b/drivers/net/myri10ge/myri10ge.c
@@ -1514,7 +1514,7 @@ static int myri10ge_poll(struct napi_struct *napi, int budget)
 	work_done = myri10ge_clean_rx_done(ss, budget);
 
 	if (work_done < budget) {
-		netif_rx_complete(napi);
+		napi_complete(napi);
 		put_be32(htonl(3), ss->irq_claim);
 	}
 	return work_done;
@@ -1532,7 +1532,7 @@ static irqreturn_t myri10ge_intr(int irq, void *arg)
 	/* an interrupt on a non-zero receive-only slice is implicitly
 	 * valid  since MSI-X irqs are not shared */
 	if ((mgp->dev->real_num_tx_queues == 1) && (ss != mgp->ss)) {
-		netif_rx_schedule(&ss->napi);
+		napi_schedule(&ss->napi);
 		return (IRQ_HANDLED);
 	}
 
@@ -1543,7 +1543,7 @@ static irqreturn_t myri10ge_intr(int irq, void *arg)
 	/* low bit indicates receives are present, so schedule
 	 * napi poll handler */
 	if (stats->valid & 1)
-		netif_rx_schedule(&ss->napi);
+		napi_schedule(&ss->napi);
 
 	if (!mgp->msi_enabled && !mgp->msix_enabled) {
 		put_be32(0, mgp->irq_deassert);
diff --git a/drivers/net/natsemi.c b/drivers/net/natsemi.c
index c5dec54251bf..c23a58624a33 100644
--- a/drivers/net/natsemi.c
+++ b/drivers/net/natsemi.c
@@ -2198,10 +2198,10 @@ static irqreturn_t intr_handler(int irq, void *dev_instance)
 
 	prefetch(&np->rx_skbuff[np->cur_rx % RX_RING_SIZE]);
 
-	if (netif_rx_schedule_prep(&np->napi)) {
+	if (napi_schedule_prep(&np->napi)) {
 		/* Disable interrupts and register for poll */
 		natsemi_irq_disable(dev);
-		__netif_rx_schedule(&np->napi);
+		__napi_schedule(&np->napi);
 	} else
 		printk(KERN_WARNING
 	       	       "%s: Ignoring interrupt, status %#08x, mask %#08x.\n",
@@ -2253,7 +2253,7 @@ static int natsemi_poll(struct napi_struct *napi, int budget)
 		np->intr_status = readl(ioaddr + IntrStatus);
 	} while (np->intr_status);
 
-	netif_rx_complete(napi);
+	napi_complete(napi);
 
 	/* Reenable interrupts providing nothing is trying to shut
 	 * the chip down. */
diff --git a/drivers/net/netxen/netxen_nic_main.c b/drivers/net/netxen/netxen_nic_main.c
index d854f07ef4d3..1139e637f5da 100644
--- a/drivers/net/netxen/netxen_nic_main.c
+++ b/drivers/net/netxen/netxen_nic_main.c
@@ -1631,7 +1631,7 @@ static int netxen_nic_poll(struct napi_struct *napi, int budget)
 	}
 
 	if ((work_done < budget) && tx_complete) {
-		netif_rx_complete(&adapter->napi);
+		napi_complete(&adapter->napi);
 		netxen_nic_enable_int(adapter);
 	}
 
diff --git a/drivers/net/niu.c b/drivers/net/niu.c
index 0c0b752315ca..4a5a089fa301 100644
--- a/drivers/net/niu.c
+++ b/drivers/net/niu.c
@@ -3669,7 +3669,7 @@ static int niu_poll(struct napi_struct *napi, int budget)
 	work_done = niu_poll_core(np, lp, budget);
 
 	if (work_done < budget) {
-		netif_rx_complete(napi);
+		napi_complete(napi);
 		niu_ldg_rearm(np, lp, 1);
 	}
 	return work_done;
@@ -4088,12 +4088,12 @@ static void __niu_fastpath_interrupt(struct niu *np, int ldg, u64 v0)
 static void niu_schedule_napi(struct niu *np, struct niu_ldg *lp,
 			      u64 v0, u64 v1, u64 v2)
 {
-	if (likely(netif_rx_schedule_prep(&lp->napi))) {
+	if (likely(napi_schedule_prep(&lp->napi))) {
 		lp->v0 = v0;
 		lp->v1 = v1;
 		lp->v2 = v2;
 		__niu_fastpath_interrupt(np, lp->ldg_num, v0);
-		__netif_rx_schedule(&lp->napi);
+		__napi_schedule(&lp->napi);
 	}
 }
 
diff --git a/drivers/net/pasemi_mac.c b/drivers/net/pasemi_mac.c
index d0349e7d73ea..5eeb5a87b738 100644
--- a/drivers/net/pasemi_mac.c
+++ b/drivers/net/pasemi_mac.c
@@ -970,7 +970,7 @@ static irqreturn_t pasemi_mac_rx_intr(int irq, void *data)
 	if (*chan->status & PAS_STATUS_ERROR)
 		reg |= PAS_IOB_DMA_RXCH_RESET_DINTC;
 
-	netif_rx_schedule(&mac->napi);
+	napi_schedule(&mac->napi);
 
 	write_iob_reg(PAS_IOB_DMA_RXCH_RESET(chan->chno), reg);
 
@@ -1010,7 +1010,7 @@ static irqreturn_t pasemi_mac_tx_intr(int irq, void *data)
 
 	mod_timer(&txring->clean_timer, jiffies + (TX_CLEAN_INTERVAL)*2);
 
-	netif_rx_schedule(&mac->napi);
+	napi_schedule(&mac->napi);
 
 	if (reg)
 		write_iob_reg(PAS_IOB_DMA_TXCH_RESET(chan->chno), reg);
@@ -1639,7 +1639,7 @@ static int pasemi_mac_poll(struct napi_struct *napi, int budget)
 	pkts = pasemi_mac_clean_rx(rx_ring(mac), budget);
 	if (pkts < budget) {
 		/* all done, no more packets present */
-		netif_rx_complete(napi);
+		napi_complete(napi);
 
 		pasemi_mac_restart_rx_intr(mac);
 		pasemi_mac_restart_tx_intr(mac);
diff --git a/drivers/net/pcnet32.c b/drivers/net/pcnet32.c
index 665a4286da39..80124fac65fa 100644
--- a/drivers/net/pcnet32.c
+++ b/drivers/net/pcnet32.c
@@ -1397,7 +1397,7 @@ static int pcnet32_poll(struct napi_struct *napi, int budget)
 	if (work_done < budget) {
 		spin_lock_irqsave(&lp->lock, flags);
 
-		__netif_rx_complete(napi);
+		__napi_complete(napi);
 
 		/* clear interrupt masks */
 		val = lp->a.read_csr(ioaddr, CSR3);
@@ -2592,14 +2592,14 @@ pcnet32_interrupt(int irq, void *dev_id)
 				       dev->name, csr0);
 			/* unlike for the lance, there is no restart needed */
 		}
-		if (netif_rx_schedule_prep(&lp->napi)) {
+		if (napi_schedule_prep(&lp->napi)) {
 			u16 val;
 			/* set interrupt masks */
 			val = lp->a.read_csr(ioaddr, CSR3);
 			val |= 0x5f00;
 			lp->a.write_csr(ioaddr, CSR3, val);
 			mmiowb();
-			__netif_rx_schedule(&lp->napi);
+			__napi_schedule(&lp->napi);
 			break;
 		}
 		csr0 = lp->a.read_csr(ioaddr, CSR0);
diff --git a/drivers/net/qla3xxx.c b/drivers/net/qla3xxx.c
index 189ec29ac7a4..8b2823c8dccf 100644
--- a/drivers/net/qla3xxx.c
+++ b/drivers/net/qla3xxx.c
@@ -2292,7 +2292,7 @@ static int ql_poll(struct napi_struct *napi, int budget)
 
 	if (tx_cleaned + rx_cleaned != budget) {
 		spin_lock_irqsave(&qdev->hw_lock, hw_flags);
-		__netif_rx_complete(napi);
+		__napi_complete(napi);
 		ql_update_small_bufq_prod_index(qdev);
 		ql_update_lrg_bufq_prod_index(qdev);
 		writel(qdev->rsp_consumer_index,
@@ -2351,8 +2351,8 @@ static irqreturn_t ql3xxx_isr(int irq, void *dev_id)
 		spin_unlock(&qdev->adapter_lock);
 	} else if (value & ISP_IMR_DISABLE_CMPL_INT) {
 		ql_disable_interrupts(qdev);
-		if (likely(netif_rx_schedule_prep(&qdev->napi))) {
-			__netif_rx_schedule(&qdev->napi);
+		if (likely(napi_schedule_prep(&qdev->napi))) {
+			__napi_schedule(&qdev->napi);
 		}
 	} else {
 		return IRQ_NONE;
diff --git a/drivers/net/qlge/qlge_main.c b/drivers/net/qlge/qlge_main.c
index 45421c8b6010..16eb9dd85286 100644
--- a/drivers/net/qlge/qlge_main.c
+++ b/drivers/net/qlge/qlge_main.c
@@ -1642,7 +1642,7 @@ static int ql_napi_poll_msix(struct napi_struct *napi, int budget)
 		rx_ring->cq_id);
 
 	if (work_done < budget) {
-		__netif_rx_complete(napi);
+		__napi_complete(napi);
 		ql_enable_completion_interrupt(qdev, rx_ring->irq);
 	}
 	return work_done;
@@ -1727,7 +1727,7 @@ static irqreturn_t qlge_msix_tx_isr(int irq, void *dev_id)
 static irqreturn_t qlge_msix_rx_isr(int irq, void *dev_id)
 {
 	struct rx_ring *rx_ring = dev_id;
-	netif_rx_schedule(&rx_ring->napi);
+	napi_schedule(&rx_ring->napi);
 	return IRQ_HANDLED;
 }
 
@@ -1813,7 +1813,7 @@ static irqreturn_t qlge_isr(int irq, void *dev_id)
 							      &rx_ring->rx_work,
 							      0);
 				else
-					netif_rx_schedule(&rx_ring->napi);
+					napi_schedule(&rx_ring->napi);
 				work_done++;
 			}
 		}
diff --git a/drivers/net/r6040.c b/drivers/net/r6040.c
index 72fd9e97c190..cc0f886b0c29 100644
--- a/drivers/net/r6040.c
+++ b/drivers/net/r6040.c
@@ -677,7 +677,7 @@ static int r6040_poll(struct napi_struct *napi, int budget)
 	work_done = r6040_rx(dev, budget);
 
 	if (work_done < budget) {
-		netif_rx_complete(napi);
+		napi_complete(napi);
 		/* Enable RX interrupt */
 		iowrite16(ioread16(ioaddr + MIER) | RX_INTS, ioaddr + MIER);
 	}
@@ -714,7 +714,7 @@ static irqreturn_t r6040_interrupt(int irq, void *dev_id)
 
 		/* Mask off RX interrupt */
 		misr &= ~RX_INTS;
-		netif_rx_schedule(&lp->napi);
+		napi_schedule(&lp->napi);
 	}
 
 	/* TX interrupt request */
diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c
index 2c73ca606b35..1c4a980253fe 100644
--- a/drivers/net/r8169.c
+++ b/drivers/net/r8169.c
@@ -3581,8 +3581,8 @@ static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance)
 		RTL_W16(IntrMask, tp->intr_event & ~tp->napi_event);
 		tp->intr_mask = ~tp->napi_event;
 
-		if (likely(netif_rx_schedule_prep(&tp->napi)))
-			__netif_rx_schedule(&tp->napi);
+		if (likely(napi_schedule_prep(&tp->napi)))
+			__napi_schedule(&tp->napi);
 		else if (netif_msg_intr(tp)) {
 			printk(KERN_INFO "%s: interrupt %04x in poll\n",
 			       dev->name, status);
@@ -3603,7 +3603,7 @@ static int rtl8169_poll(struct napi_struct *napi, int budget)
 	rtl8169_tx_interrupt(dev, tp, ioaddr);
 
 	if (work_done < budget) {
-		netif_rx_complete(napi);
+		napi_complete(napi);
 		tp->intr_mask = 0xffff;
 		/*
 		 * 20040426: the barrier is not strictly required but the
diff --git a/drivers/net/s2io.c b/drivers/net/s2io.c
index f5c57c059bca..2a96a10fd0cf 100644
--- a/drivers/net/s2io.c
+++ b/drivers/net/s2io.c
@@ -2852,7 +2852,7 @@ static int s2io_poll_msix(struct napi_struct *napi, int budget)
 	s2io_chk_rx_buffers(nic, ring);
 
 	if (pkts_processed < budget_org) {
-		netif_rx_complete(napi);
+		napi_complete(napi);
 		/*Re Enable MSI-Rx Vector*/
 		addr = (u8 __iomem *)&bar0->xmsi_mask_reg;
 		addr += 7 - ring->ring_no;
@@ -2889,7 +2889,7 @@ static int s2io_poll_inta(struct napi_struct *napi, int budget)
 			break;
 	}
 	if (pkts_processed < budget_org) {
-		netif_rx_complete(napi);
+		napi_complete(napi);
 		/* Re enable the Rx interrupts for the ring */
 		writeq(0, &bar0->rx_traffic_mask);
 		readl(&bar0->rx_traffic_mask);
@@ -4342,7 +4342,7 @@ static irqreturn_t s2io_msix_ring_handle(int irq, void *dev_id)
 		val8 = (ring->ring_no == 0) ? 0x7f : 0xff;
 		writeb(val8, addr);
 		val8 = readb(addr);
-		netif_rx_schedule(&ring->napi);
+		napi_schedule(&ring->napi);
 	} else {
 		rx_intr_handler(ring, 0);
 		s2io_chk_rx_buffers(sp, ring);
@@ -4789,7 +4789,7 @@ static irqreturn_t s2io_isr(int irq, void *dev_id)
 
 		if (config->napi) {
 			if (reason & GEN_INTR_RXTRAFFIC) {
-				netif_rx_schedule(&sp->napi);
+				napi_schedule(&sp->napi);
 				writeq(S2IO_MINUS_ONE, &bar0->rx_traffic_mask);
 				writeq(S2IO_MINUS_ONE, &bar0->rx_traffic_int);
 				readl(&bar0->rx_traffic_int);
diff --git a/drivers/net/sb1250-mac.c b/drivers/net/sb1250-mac.c
index 31e38fae017f..3e11c1d6d792 100644
--- a/drivers/net/sb1250-mac.c
+++ b/drivers/net/sb1250-mac.c
@@ -2039,9 +2039,9 @@ static irqreturn_t sbmac_intr(int irq,void *dev_instance)
 		sbdma_tx_process(sc,&(sc->sbm_txdma), 0);
 
 	if (isr & (M_MAC_INT_CHANNEL << S_MAC_RX_CH0)) {
-		if (netif_rx_schedule_prep(&sc->napi)) {
+		if (napi_schedule_prep(&sc->napi)) {
 			__raw_writeq(0, sc->sbm_imr);
-			__netif_rx_schedule(&sc->napi);
+			__napi_schedule(&sc->napi);
 			/* Depend on the exit from poll to reenable intr */
 		}
 		else {
@@ -2667,7 +2667,7 @@ static int sbmac_poll(struct napi_struct *napi, int budget)
 	sbdma_tx_process(sc, &(sc->sbm_txdma), 1);
 
 	if (work_done < budget) {
-		netif_rx_complete(napi);
+		napi_complete(napi);
 
 #ifdef CONFIG_SBMAC_COALESCE
 		__raw_writeq(((M_MAC_INT_EOP_COUNT | M_MAC_INT_EOP_TIMER) << S_MAC_TX_CH0) |
diff --git a/drivers/net/sfc/efx.c b/drivers/net/sfc/efx.c
index 7673fd92eaf5..77aca5d67b57 100644
--- a/drivers/net/sfc/efx.c
+++ b/drivers/net/sfc/efx.c
@@ -225,11 +225,11 @@ static int efx_poll(struct napi_struct *napi, int budget)
 
 	if (rx_packets < budget) {
 		/* There is no race here; although napi_disable() will
-		 * only wait for netif_rx_complete(), this isn't a problem
+		 * only wait for napi_complete(), this isn't a problem
 		 * since efx_channel_processed() will have no effect if
 		 * interrupts have already been disabled.
 		 */
-		netif_rx_complete(napi);
+		napi_complete(napi);
 		efx_channel_processed(channel);
 	}
 
diff --git a/drivers/net/sfc/efx.h b/drivers/net/sfc/efx.h
index 0dd7a532c78a..fb1ac0e63c0b 100644
--- a/drivers/net/sfc/efx.h
+++ b/drivers/net/sfc/efx.h
@@ -77,7 +77,7 @@ static inline void efx_schedule_channel(struct efx_channel *channel)
 		  channel->channel, raw_smp_processor_id());
 	channel->work_pending = true;
 
-	netif_rx_schedule(&channel->napi_str);
+	napi_schedule(&channel->napi_str);
 }
 
 #endif /* EFX_EFX_H */
diff --git a/drivers/net/skge.c b/drivers/net/skge.c
index c9dbb06f8c94..952d37ffee51 100644
--- a/drivers/net/skge.c
+++ b/drivers/net/skge.c
@@ -3214,7 +3214,7 @@ static int skge_poll(struct napi_struct *napi, int to_do)
 		unsigned long flags;
 
 		spin_lock_irqsave(&hw->hw_lock, flags);
-		__netif_rx_complete(napi);
+		__napi_complete(napi);
 		hw->intr_mask |= napimask[skge->port];
 		skge_write32(hw, B0_IMSK, hw->intr_mask);
 		skge_read32(hw, B0_IMSK);
@@ -3377,7 +3377,7 @@ static irqreturn_t skge_intr(int irq, void *dev_id)
 	if (status & (IS_XA1_F|IS_R1_F)) {
 		struct skge_port *skge = netdev_priv(hw->dev[0]);
 		hw->intr_mask &= ~(IS_XA1_F|IS_R1_F);
-		netif_rx_schedule(&skge->napi);
+		napi_schedule(&skge->napi);
 	}
 
 	if (status & IS_PA_TO_TX1)
@@ -3397,7 +3397,7 @@ static irqreturn_t skge_intr(int irq, void *dev_id)
 
 		if (status & (IS_XA2_F|IS_R2_F)) {
 			hw->intr_mask &= ~(IS_XA2_F|IS_R2_F);
-			netif_rx_schedule(&skge->napi);
+			napi_schedule(&skge->napi);
 		}
 
 		if (status & IS_PA_TO_RX2) {
diff --git a/drivers/net/smsc911x.c b/drivers/net/smsc911x.c
index f513bdf1c887..d271ae39c6f3 100644
--- a/drivers/net/smsc911x.c
+++ b/drivers/net/smsc911x.c
@@ -984,7 +984,7 @@ static int smsc911x_poll(struct napi_struct *napi, int budget)
 			/* We processed all packets available.  Tell NAPI it can
 			 * stop polling then re-enable rx interrupts */
 			smsc911x_reg_write(pdata, INT_STS, INT_STS_RSFL_);
-			netif_rx_complete(napi);
+			napi_complete(napi);
 			temp = smsc911x_reg_read(pdata, INT_EN);
 			temp |= INT_EN_RSFL_EN_;
 			smsc911x_reg_write(pdata, INT_EN, temp);
@@ -1485,16 +1485,16 @@ static irqreturn_t smsc911x_irqhandler(int irq, void *dev_id)
 	}
 
 	if (likely(intsts & inten & INT_STS_RSFL_)) {
-		if (likely(netif_rx_schedule_prep(&pdata->napi))) {
+		if (likely(napi_schedule_prep(&pdata->napi))) {
 			/* Disable Rx interrupts */
 			temp = smsc911x_reg_read(pdata, INT_EN);
 			temp &= (~INT_EN_RSFL_EN_);
 			smsc911x_reg_write(pdata, INT_EN, temp);
 			/* Schedule a NAPI poll */
-			__netif_rx_schedule(&pdata->napi);
+			__napi_schedule(&pdata->napi);
 		} else {
 			SMSC_WARNING(RX_ERR,
-				"netif_rx_schedule_prep failed");
+				"napi_schedule_prep failed");
 		}
 		serviced = IRQ_HANDLED;
 	}
diff --git a/drivers/net/smsc9420.c b/drivers/net/smsc9420.c
index c14a4c6452c7..79f4c228b030 100644
--- a/drivers/net/smsc9420.c
+++ b/drivers/net/smsc9420.c
@@ -666,7 +666,7 @@ static irqreturn_t smsc9420_isr(int irq, void *dev_id)
 			smsc9420_pci_flush_write(pd);
 
 			ints_to_clear |= (DMAC_STS_RX_ | DMAC_STS_NIS_);
-			netif_rx_schedule(&pd->napi);
+			napi_schedule(&pd->napi);
 		}
 
 		if (ints_to_clear)
@@ -889,7 +889,7 @@ static int smsc9420_rx_poll(struct napi_struct *napi, int budget)
 	smsc9420_pci_flush_write(pd);
 
 	if (work_done < budget) {
-		netif_rx_complete(&pd->napi);
+		napi_complete(&pd->napi);
 
 		/* re-enable RX DMA interrupts */
 		dma_intr_ena = smsc9420_reg_read(pd, DMAC_INTR_ENA);
diff --git a/drivers/net/spider_net.c b/drivers/net/spider_net.c
index 88d2c67788df..7f6b4a4052ee 100644
--- a/drivers/net/spider_net.c
+++ b/drivers/net/spider_net.c
@@ -1301,7 +1301,7 @@ static int spider_net_poll(struct napi_struct *napi, int budget)
 	/* if all packets are in the stack, enable interrupts and return 0 */
 	/* if not, return 1 */
 	if (packets_done < budget) {
-		netif_rx_complete(napi);
+		napi_complete(napi);
 		spider_net_rx_irq_on(card);
 		card->ignore_rx_ramfull = 0;
 	}
@@ -1528,7 +1528,7 @@ spider_net_handle_error_irq(struct spider_net_card *card, u32 status_reg,
 			spider_net_refill_rx_chain(card);
 			spider_net_enable_rxdmac(card);
 			card->num_rx_ints ++;
-			netif_rx_schedule(&card->napi);
+			napi_schedule(&card->napi);
 		}
 		show_error = 0;
 		break;
@@ -1548,7 +1548,7 @@ spider_net_handle_error_irq(struct spider_net_card *card, u32 status_reg,
 		spider_net_refill_rx_chain(card);
 		spider_net_enable_rxdmac(card);
 		card->num_rx_ints ++;
-		netif_rx_schedule(&card->napi);
+		napi_schedule(&card->napi);
 		show_error = 0;
 		break;
 
@@ -1562,7 +1562,7 @@ spider_net_handle_error_irq(struct spider_net_card *card, u32 status_reg,
 		spider_net_refill_rx_chain(card);
 		spider_net_enable_rxdmac(card);
 		card->num_rx_ints ++;
-		netif_rx_schedule(&card->napi);
+		napi_schedule(&card->napi);
 		show_error = 0;
 		break;
 
@@ -1656,11 +1656,11 @@ spider_net_interrupt(int irq, void *ptr)
 
 	if (status_reg & SPIDER_NET_RXINT ) {
 		spider_net_rx_irq_off(card);
-		netif_rx_schedule(&card->napi);
+		napi_schedule(&card->napi);
 		card->num_rx_ints ++;
 	}
 	if (status_reg & SPIDER_NET_TXINT)
-		netif_rx_schedule(&card->napi);
+		napi_schedule(&card->napi);
 
 	if (status_reg & SPIDER_NET_LINKINT)
 		spider_net_link_reset(netdev);
diff --git a/drivers/net/starfire.c b/drivers/net/starfire.c
index da3a76b18eff..98fe79515bab 100644
--- a/drivers/net/starfire.c
+++ b/drivers/net/starfire.c
@@ -1342,8 +1342,8 @@ static irqreturn_t intr_handler(int irq, void *dev_instance)
 		if (intr_status & (IntrRxDone | IntrRxEmpty)) {
 			u32 enable;
 
-			if (likely(netif_rx_schedule_prep(&np->napi))) {
-				__netif_rx_schedule(&np->napi);
+			if (likely(napi_schedule_prep(&np->napi))) {
+				__napi_schedule(&np->napi);
 				enable = readl(ioaddr + IntrEnable);
 				enable &= ~(IntrRxDone | IntrRxEmpty);
 				writel(enable, ioaddr + IntrEnable);
@@ -1587,7 +1587,7 @@ static int netdev_poll(struct napi_struct *napi, int budget)
 		intr_status = readl(ioaddr + IntrStatus);
 	} while (intr_status & (IntrRxDone | IntrRxEmpty));
 
-	netif_rx_complete(napi);
+	napi_complete(napi);
 	intr_status = readl(ioaddr + IntrEnable);
 	intr_status |= IntrRxDone | IntrRxEmpty;
 	writel(intr_status, ioaddr + IntrEnable);
diff --git a/drivers/net/sungem.c b/drivers/net/sungem.c
index 86c765d83de1..4942059109f3 100644
--- a/drivers/net/sungem.c
+++ b/drivers/net/sungem.c
@@ -921,7 +921,7 @@ static int gem_poll(struct napi_struct *napi, int budget)
 		gp->status = readl(gp->regs + GREG_STAT);
 	} while (gp->status & GREG_STAT_NAPI);
 
-	__netif_rx_complete(napi);
+	__napi_complete(napi);
 	gem_enable_ints(gp);
 
 	spin_unlock_irqrestore(&gp->lock, flags);
@@ -944,7 +944,7 @@ static irqreturn_t gem_interrupt(int irq, void *dev_id)
 
 	spin_lock_irqsave(&gp->lock, flags);
 
-	if (netif_rx_schedule_prep(&gp->napi)) {
+	if (napi_schedule_prep(&gp->napi)) {
 		u32 gem_status = readl(gp->regs + GREG_STAT);
 
 		if (gem_status == 0) {
@@ -954,7 +954,7 @@ static irqreturn_t gem_interrupt(int irq, void *dev_id)
 		}
 		gp->status = gem_status;
 		gem_disable_ints(gp);
-		__netif_rx_schedule(&gp->napi);
+		__napi_schedule(&gp->napi);
 	}
 
 	spin_unlock_irqrestore(&gp->lock, flags);
diff --git a/drivers/net/tc35815.c b/drivers/net/tc35815.c
index bcd0e60cbda9..f42c67e93bf4 100644
--- a/drivers/net/tc35815.c
+++ b/drivers/net/tc35815.c
@@ -1609,8 +1609,8 @@ static irqreturn_t tc35815_interrupt(int irq, void *dev_id)
 	if (!(dmactl & DMA_IntMask)) {
 		/* disable interrupts */
 		tc_writel(dmactl | DMA_IntMask, &tr->DMA_Ctl);
-		if (netif_rx_schedule_prep(&lp->napi))
-			__netif_rx_schedule(&lp->napi);
+		if (napi_schedule_prep(&lp->napi))
+			__napi_schedule(&lp->napi);
 		else {
 			printk(KERN_ERR "%s: interrupt taken in poll\n",
 			       dev->name);
@@ -1919,7 +1919,7 @@ static int tc35815_poll(struct napi_struct *napi, int budget)
 	spin_unlock(&lp->lock);
 
 	if (received < budget) {
-		netif_rx_complete(napi);
+		napi_complete(napi);
 		/* enable interrupts */
 		tc_writel(tc_readl(&tr->DMA_Ctl) & ~DMA_IntMask, &tr->DMA_Ctl);
 	}
diff --git a/drivers/net/tehuti.c b/drivers/net/tehuti.c
index a7a4dc4d6313..be9f38f8f0bf 100644
--- a/drivers/net/tehuti.c
+++ b/drivers/net/tehuti.c
@@ -265,8 +265,8 @@ static irqreturn_t bdx_isr_napi(int irq, void *dev)
 		bdx_isr_extra(priv, isr);
 
 	if (isr & (IR_RX_DESC_0 | IR_TX_FREE_0)) {
-		if (likely(netif_rx_schedule_prep(&priv->napi))) {
-			__netif_rx_schedule(&priv->napi);
+		if (likely(napi_schedule_prep(&priv->napi))) {
+			__napi_schedule(&priv->napi);
 			RET(IRQ_HANDLED);
 		} else {
 			/* NOTE: we get here if intr has slipped into window
@@ -302,7 +302,7 @@ static int bdx_poll(struct napi_struct *napi, int budget)
 		 * device lock and allow waiting tasks (eg rmmod) to advance) */
 		priv->napi_stop = 0;
 
-		netif_rx_complete(napi);
+		napi_complete(napi);
 		bdx_enable_interrupts(priv);
 	}
 	return work_done;
diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 8b3f84685387..5fa65acb68e5 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -4460,7 +4460,7 @@ static int tg3_poll(struct napi_struct *napi, int budget)
 			sblk->status &= ~SD_STATUS_UPDATED;
 
 		if (likely(!tg3_has_work(tp))) {
-			netif_rx_complete(napi);
+			napi_complete(napi);
 			tg3_restart_ints(tp);
 			break;
 		}
@@ -4470,7 +4470,7 @@ static int tg3_poll(struct napi_struct *napi, int budget)
 
 tx_recovery:
 	/* work_done is guaranteed to be less than budget. */
-	netif_rx_complete(napi);
+	napi_complete(napi);
 	schedule_work(&tp->reset_task);
 	return work_done;
 }
@@ -4519,7 +4519,7 @@ static irqreturn_t tg3_msi_1shot(int irq, void *dev_id)
 	prefetch(&tp->rx_rcb[tp->rx_rcb_ptr]);
 
 	if (likely(!tg3_irq_sync(tp)))
-		netif_rx_schedule(&tp->napi);
+		napi_schedule(&tp->napi);
 
 	return IRQ_HANDLED;
 }
@@ -4544,7 +4544,7 @@ static irqreturn_t tg3_msi(int irq, void *dev_id)
 	 */
 	tw32_mailbox(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW, 0x00000001);
 	if (likely(!tg3_irq_sync(tp)))
-		netif_rx_schedule(&tp->napi);
+		napi_schedule(&tp->napi);
 
 	return IRQ_RETVAL(1);
 }
@@ -4586,7 +4586,7 @@ static irqreturn_t tg3_interrupt(int irq, void *dev_id)
 	sblk->status &= ~SD_STATUS_UPDATED;
 	if (likely(tg3_has_work(tp))) {
 		prefetch(&tp->rx_rcb[tp->rx_rcb_ptr]);
-		netif_rx_schedule(&tp->napi);
+		napi_schedule(&tp->napi);
 	} else {
 		/* No work, shared interrupt perhaps?  re-enable
 		 * interrupts, and flush that PCI write
@@ -4632,7 +4632,7 @@ static irqreturn_t tg3_interrupt_tagged(int irq, void *dev_id)
 	tw32_mailbox_f(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW, 0x00000001);
 	if (tg3_irq_sync(tp))
 		goto out;
-	if (netif_rx_schedule_prep(&tp->napi)) {
+	if (napi_schedule_prep(&tp->napi)) {
 		prefetch(&tp->rx_rcb[tp->rx_rcb_ptr]);
 		/* Update last_tag to mark that this status has been
 		 * seen. Because interrupt may be shared, we may be
@@ -4640,7 +4640,7 @@ static irqreturn_t tg3_interrupt_tagged(int irq, void *dev_id)
 		 * if tg3_poll() is not scheduled.
 		 */
 		tp->last_tag = sblk->status_tag;
-		__netif_rx_schedule(&tp->napi);
+		__napi_schedule(&tp->napi);
 	}
 out:
 	return IRQ_RETVAL(handled);
diff --git a/drivers/net/tsi108_eth.c b/drivers/net/tsi108_eth.c
index 75461dbd4876..1138782e5611 100644
--- a/drivers/net/tsi108_eth.c
+++ b/drivers/net/tsi108_eth.c
@@ -888,7 +888,7 @@ static int tsi108_poll(struct napi_struct *napi, int budget)
 
 	if (num_received < budget) {
 		data->rxpending = 0;
-		netif_rx_complete(napi);
+		napi_complete(napi);
 
 		TSI_WRITE(TSI108_EC_INTMASK,
 				     TSI_READ(TSI108_EC_INTMASK)
@@ -915,11 +915,11 @@ static void tsi108_rx_int(struct net_device *dev)
 	 *
 	 * This can happen if this code races with tsi108_poll(), which masks
 	 * the interrupts after tsi108_irq_one() read the mask, but before
-	 * netif_rx_schedule is called.  It could also happen due to calls
+	 * napi_schedule is called.  It could also happen due to calls
 	 * from tsi108_check_rxring().
 	 */
 
-	if (netif_rx_schedule_prep(&data->napi)) {
+	if (napi_schedule_prep(&data->napi)) {
 		/* Mask, rather than ack, the receive interrupts.  The ack
 		 * will happen in tsi108_poll().
 		 */
@@ -930,7 +930,7 @@ static void tsi108_rx_int(struct net_device *dev)
 				     | TSI108_INT_RXTHRESH |
 				     TSI108_INT_RXOVERRUN | TSI108_INT_RXERROR |
 				     TSI108_INT_RXWAIT);
-		__netif_rx_schedule(&data->napi);
+		__napi_schedule(&data->napi);
 	} else {
 		if (!netif_running(dev)) {
 			/* This can happen if an interrupt occurs while the
diff --git a/drivers/net/tulip/interrupt.c b/drivers/net/tulip/interrupt.c
index 6c3428a37c0b..9f946d421088 100644
--- a/drivers/net/tulip/interrupt.c
+++ b/drivers/net/tulip/interrupt.c
@@ -103,7 +103,7 @@ void oom_timer(unsigned long data)
 {
         struct net_device *dev = (struct net_device *)data;
 	struct tulip_private *tp = netdev_priv(dev);
-	netif_rx_schedule(&tp->napi);
+	napi_schedule(&tp->napi);
 }
 
 int tulip_poll(struct napi_struct *napi, int budget)
@@ -300,7 +300,7 @@ int tulip_poll(struct napi_struct *napi, int budget)
 
          /* Remove us from polling list and enable RX intr. */
 
-         netif_rx_complete(napi);
+         napi_complete(napi);
          iowrite32(tulip_tbl[tp->chip_id].valid_intrs, tp->base_addr+CSR7);
 
          /* The last op happens after poll completion. Which means the following:
@@ -333,10 +333,10 @@ int tulip_poll(struct napi_struct *napi, int budget)
 
          /* Think: timer_pending() was an explicit signature of bug.
           * Timer can be pending now but fired and completed
-          * before we did netif_rx_complete(). See? We would lose it. */
+          * before we did napi_complete(). See? We would lose it. */
 
          /* remove ourselves from the polling list */
-         netif_rx_complete(napi);
+         napi_complete(napi);
 
          return work_done;
 }
@@ -519,7 +519,7 @@ irqreturn_t tulip_interrupt(int irq, void *dev_instance)
 			rxd++;
 			/* Mask RX intrs and add the device to poll list. */
 			iowrite32(tulip_tbl[tp->chip_id].valid_intrs&~RxPollInt, ioaddr + CSR7);
-			netif_rx_schedule(&tp->napi);
+			napi_schedule(&tp->napi);
 
 			if (!(csr5&~(AbnormalIntr|NormalIntr|RxPollInt|TPLnkPass)))
                                break;
diff --git a/drivers/net/typhoon.c b/drivers/net/typhoon.c
index 3af9a9516ccb..dcff5ade6d08 100644
--- a/drivers/net/typhoon.c
+++ b/drivers/net/typhoon.c
@@ -1783,7 +1783,7 @@ typhoon_poll(struct napi_struct *napi, int budget)
 	}
 
 	if (work_done < budget) {
-		netif_rx_complete(napi);
+		napi_complete(napi);
 		iowrite32(TYPHOON_INTR_NONE,
 				tp->ioaddr + TYPHOON_REG_INTR_MASK);
 		typhoon_post_pci_writes(tp->ioaddr);
@@ -1806,10 +1806,10 @@ typhoon_interrupt(int irq, void *dev_instance)
 
 	iowrite32(intr_status, ioaddr + TYPHOON_REG_INTR_STATUS);
 
-	if (netif_rx_schedule_prep(&tp->napi)) {
+	if (napi_schedule_prep(&tp->napi)) {
 		iowrite32(TYPHOON_INTR_ALL, ioaddr + TYPHOON_REG_INTR_MASK);
 		typhoon_post_pci_writes(ioaddr);
-		__netif_rx_schedule(&tp->napi);
+		__napi_schedule(&tp->napi);
 	} else {
 		printk(KERN_ERR "%s: Error, poll already scheduled\n",
                        dev->name);
diff --git a/drivers/net/ucc_geth.c b/drivers/net/ucc_geth.c
index 11441225bf41..6def6f826a54 100644
--- a/drivers/net/ucc_geth.c
+++ b/drivers/net/ucc_geth.c
@@ -3251,7 +3251,7 @@ static int ucc_geth_poll(struct napi_struct *napi, int budget)
 		howmany += ucc_geth_rx(ugeth, i, budget - howmany);
 
 	if (howmany < budget) {
-		netif_rx_complete(napi);
+		napi_complete(napi);
 		setbits32(ugeth->uccf->p_uccm, UCCE_RX_EVENTS);
 	}
 
@@ -3282,10 +3282,10 @@ static irqreturn_t ucc_geth_irq_handler(int irq, void *info)
 
 	/* check for receive events that require processing */
 	if (ucce & UCCE_RX_EVENTS) {
-		if (netif_rx_schedule_prep(&ugeth->napi)) {
+		if (napi_schedule_prep(&ugeth->napi)) {
 			uccm &= ~UCCE_RX_EVENTS;
 			out_be32(uccf->p_uccm, uccm);
-			__netif_rx_schedule(&ugeth->napi);
+			__napi_schedule(&ugeth->napi);
 		}
 	}
 
diff --git a/drivers/net/via-rhine.c b/drivers/net/via-rhine.c
index 3b8e63254277..4671436ecf0e 100644
--- a/drivers/net/via-rhine.c
+++ b/drivers/net/via-rhine.c
@@ -589,7 +589,7 @@ static int rhine_napipoll(struct napi_struct *napi, int budget)
 	work_done = rhine_rx(dev, budget);
 
 	if (work_done < budget) {
-		netif_rx_complete(napi);
+		napi_complete(napi);
 
 		iowrite16(IntrRxDone | IntrRxErr | IntrRxEmpty| IntrRxOverflow |
 			  IntrRxDropped | IntrRxNoBuf | IntrTxAborted |
@@ -1319,7 +1319,7 @@ static irqreturn_t rhine_interrupt(int irq, void *dev_instance)
 				  IntrPCIErr | IntrStatsMax | IntrLinkChange,
 				  ioaddr + IntrEnable);
 
-			netif_rx_schedule(&rp->napi);
+			napi_schedule(&rp->napi);
 		}
 
 		if (intr_status & (IntrTxErrSummary | IntrTxDone)) {
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 43f6523c40be..30ae6d9a12af 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -374,9 +374,9 @@ static void skb_recv_done(struct virtqueue *rvq)
 {
 	struct virtnet_info *vi = rvq->vdev->priv;
 	/* Schedule NAPI, Suppress further interrupts if successful. */
-	if (netif_rx_schedule_prep(&vi->napi)) {
+	if (napi_schedule_prep(&vi->napi)) {
 		rvq->vq_ops->disable_cb(rvq);
-		__netif_rx_schedule(&vi->napi);
+		__napi_schedule(&vi->napi);
 	}
 }
 
@@ -402,11 +402,11 @@ again:
 
 	/* Out of packets? */
 	if (received < budget) {
-		netif_rx_complete(napi);
+		napi_complete(napi);
 		if (unlikely(!vi->rvq->vq_ops->enable_cb(vi->rvq))
 		    && napi_schedule_prep(napi)) {
 			vi->rvq->vq_ops->disable_cb(vi->rvq);
-			__netif_rx_schedule(napi);
+			__napi_schedule(napi);
 			goto again;
 		}
 	}
@@ -580,9 +580,9 @@ static int virtnet_open(struct net_device *dev)
 	 * won't get another interrupt, so process any outstanding packets
 	 * now.  virtnet_poll wants re-enable the queue, so we disable here.
 	 * We synchronize against interrupts via NAPI_STATE_SCHED */
-	if (netif_rx_schedule_prep(&vi->napi)) {
+	if (napi_schedule_prep(&vi->napi)) {
 		vi->rvq->vq_ops->disable_cb(vi->rvq);
-		__netif_rx_schedule(&vi->napi);
+		__napi_schedule(&vi->napi);
 	}
 	return 0;
 }
diff --git a/drivers/net/wan/hd64572.c b/drivers/net/wan/hd64572.c
index 08b3536944fe..497b003d7239 100644
--- a/drivers/net/wan/hd64572.c
+++ b/drivers/net/wan/hd64572.c
@@ -341,7 +341,7 @@ static int sca_poll(struct napi_struct *napi, int budget)
 		received = sca_rx_done(port, budget);
 
 	if (received < budget) {
-		netif_rx_complete(napi);
+		napi_complete(napi);
 		enable_intr(port);
 	}
 
@@ -359,7 +359,7 @@ static irqreturn_t sca_intr(int irq, void *dev_id)
 		if (port && (isr0 & (i ? 0x08002200 : 0x00080022))) {
 			handled = 1;
 			disable_intr(port);
-			netif_rx_schedule(&port->napi);
+			napi_schedule(&port->napi);
 		}
 	}
 
diff --git a/drivers/net/wan/ixp4xx_hss.c b/drivers/net/wan/ixp4xx_hss.c
index 7e8bbba2cc1b..3bf7d3f447db 100644
--- a/drivers/net/wan/ixp4xx_hss.c
+++ b/drivers/net/wan/ixp4xx_hss.c
@@ -622,7 +622,7 @@ static void hss_hdlc_rx_irq(void *pdev)
 	printk(KERN_DEBUG "%s: hss_hdlc_rx_irq\n", dev->name);
 #endif
 	qmgr_disable_irq(queue_ids[port->id].rx);
-	netif_rx_schedule(&port->napi);
+	napi_schedule(&port->napi);
 }
 
 static int hss_hdlc_poll(struct napi_struct *napi, int budget)
@@ -649,15 +649,15 @@ static int hss_hdlc_poll(struct napi_struct *napi, int budget)
 		if ((n = queue_get_desc(rxq, port, 0)) < 0) {
 #if DEBUG_RX
 			printk(KERN_DEBUG "%s: hss_hdlc_poll"
-			       " netif_rx_complete\n", dev->name);
+			       " napi_complete\n", dev->name);
 #endif
-			netif_rx_complete(napi);
+			napi_complete(napi);
 			qmgr_enable_irq(rxq);
 			if (!qmgr_stat_empty(rxq) &&
-			    netif_rx_reschedule(napi)) {
+			    napi_reschedule(napi)) {
 #if DEBUG_RX
 				printk(KERN_DEBUG "%s: hss_hdlc_poll"
-				       " netif_rx_reschedule succeeded\n",
+				       " napi_reschedule succeeded\n",
 				       dev->name);
 #endif
 				qmgr_disable_irq(rxq);
@@ -1069,7 +1069,7 @@ static int hss_hdlc_open(struct net_device *dev)
 	hss_start_hdlc(port);
 
 	/* we may already have RX data, enables IRQ */
-	netif_rx_schedule(&port->napi);
+	napi_schedule(&port->napi);
 	return 0;
 
 err_unlock:
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index cd6184ee08ee..9f102a6535c4 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -196,7 +196,7 @@ static void rx_refill_timeout(unsigned long data)
 {
 	struct net_device *dev = (struct net_device *)data;
 	struct netfront_info *np = netdev_priv(dev);
-	netif_rx_schedule(&np->napi);
+	napi_schedule(&np->napi);
 }
 
 static int netfront_tx_slot_available(struct netfront_info *np)
@@ -328,7 +328,7 @@ static int xennet_open(struct net_device *dev)
 		xennet_alloc_rx_buffers(dev);
 		np->rx.sring->rsp_event = np->rx.rsp_cons + 1;
 		if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx))
-			netif_rx_schedule(&np->napi);
+			napi_schedule(&np->napi);
 	}
 	spin_unlock_bh(&np->rx_lock);
 
@@ -979,7 +979,7 @@ err:
 
 		RING_FINAL_CHECK_FOR_RESPONSES(&np->rx, more_to_do);
 		if (!more_to_do)
-			__netif_rx_complete(napi);
+			__napi_complete(napi);
 
 		local_irq_restore(flags);
 	}
@@ -1317,7 +1317,7 @@ static irqreturn_t xennet_interrupt(int irq, void *dev_id)
 		xennet_tx_buf_gc(dev);
 		/* Under tx_lock: protects access to rx shared-ring indexes. */
 		if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx))
-			netif_rx_schedule(&np->napi);
+			napi_schedule(&np->napi);
 	}
 
 	spin_unlock_irqrestore(&np->tx_lock, flags);
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index ec54785d34f9..dd8a35b3e8b2 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1574,56 +1574,6 @@ static inline u32 netif_msg_init(int debug_value, int default_msg_enable_bits)
 	return (1 << debug_value) - 1;
 }
 
-/* Test if receive needs to be scheduled but only if up */
-static inline int netif_rx_schedule_prep(struct napi_struct *napi)
-{
-	return napi_schedule_prep(napi);
-}
-
-/* Add interface to tail of rx poll list. This assumes that _prep has
- * already been called and returned 1.
- */
-static inline void __netif_rx_schedule(struct napi_struct *napi)
-{
-	__napi_schedule(napi);
-}
-
-/* Try to reschedule poll. Called by irq handler. */
-
-static inline void netif_rx_schedule(struct napi_struct *napi)
-{
-	if (netif_rx_schedule_prep(napi))
-		__netif_rx_schedule(napi);
-}
-
-/* Try to reschedule poll. Called by dev->poll() after netif_rx_complete().  */
-static inline int netif_rx_reschedule(struct napi_struct *napi)
-{
-	if (napi_schedule_prep(napi)) {
-		__netif_rx_schedule(napi);
-		return 1;
-	}
-	return 0;
-}
-
-/* same as netif_rx_complete, except that local_irq_save(flags)
- * has already been issued
- */
-static inline void __netif_rx_complete(struct napi_struct *napi)
-{
-	__napi_complete(napi);
-}
-
-/* Remove interface from poll list: it must be in the poll list
- * on current cpu. This primitive is called by dev->poll(), when
- * it completes the work. The device cannot be out of poll list at this
- * moment, it is BUG().
- */
-static inline void netif_rx_complete(struct napi_struct *napi)
-{
-	napi_complete(napi);
-}
-
 static inline void __netif_tx_lock(struct netdev_queue *txq, int cpu)
 {
 	spin_lock(&txq->_xmit_lock);
-- 
cgit v1.2.3


From f90f92eed74251034f251e3cdf4fa5c4c1f09df0 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Fri, 16 Jan 2009 23:36:30 +0000
Subject: dccp: Initialisation framework for feature negotiation

This initialises feature negotiation from two tables, which are in
turn are initialised from sysctls.

As a novel feature, specifics of the implementation (e.g. that short
seqnos and ECN are not yet available) are advertised for robustness.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Acked-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/dccp.h | 19 ---------------
 net/dccp/feat.c      | 65 ++++++++++++++++++++++++++++++++++++++++++++--------
 net/dccp/feat.h      |  2 +-
 3 files changed, 57 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dccp.h b/include/linux/dccp.h
index 61734e27abb7..990e97fa1f07 100644
--- a/include/linux/dccp.h
+++ b/include/linux/dccp.h
@@ -369,28 +369,9 @@ static inline unsigned int dccp_hdr_len(const struct sk_buff *skb)
   * Will be used to pass the state from dccp_request_sock to dccp_sock.
   *
   * @dccpms_sequence_window - Sequence Window Feature (section 7.5.2)
-  * @dccpms_pending - List of features being negotiated
-  * @dccpms_conf -
   */
 struct dccp_minisock {
 	__u64			dccpms_sequence_window;
-	struct list_head	dccpms_pending;
-	struct list_head	dccpms_conf;
-};
-
-struct dccp_opt_conf {
-	__u8			*dccpoc_val;
-	__u8			dccpoc_len;
-};
-
-struct dccp_opt_pend {
-	struct list_head	dccpop_node;
-	__u8			dccpop_type;
-	__u8			dccpop_feat;
-	__u8		        *dccpop_val;
-	__u8			dccpop_len;
-	int			dccpop_conf;
-	struct dccp_opt_conf    *dccpop_sc;
 };
 
 extern void dccp_minisock_init(struct dccp_minisock *dmsk);
diff --git a/net/dccp/feat.c b/net/dccp/feat.c
index 4152308958ab..67ffac9905f8 100644
--- a/net/dccp/feat.c
+++ b/net/dccp/feat.c
@@ -1115,23 +1115,70 @@ int dccp_feat_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
 	return 0;	/* ignore FN options in all other states */
 }
 
+/**
+ * dccp_feat_init  -  Seed feature negotiation with host-specific defaults
+ * This initialises global defaults, depending on the value of the sysctls.
+ * These can later be overridden by registering changes via setsockopt calls.
+ * The last link in the chain is finalise_settings, to make sure that between
+ * here and the start of actual feature negotiation no inconsistencies enter.
+ *
+ * All features not appearing below use either defaults or are otherwise
+ * later adjusted through dccp_feat_finalise_settings().
+ */
 int dccp_feat_init(struct sock *sk)
 {
-	struct dccp_sock *dp = dccp_sk(sk);
-	struct dccp_minisock *dmsk = dccp_msk(sk);
+	struct list_head *fn = &dccp_sk(sk)->dccps_featneg;
+	u8 on = 1, off = 0;
 	int rc;
+	struct {
+		u8 *val;
+		u8 len;
+	} tx, rx;
+
+	/* Non-negotiable (NN) features */
+	rc = __feat_register_nn(fn, DCCPF_SEQUENCE_WINDOW, 0,
+				    sysctl_dccp_feat_sequence_window);
+	if (rc)
+		return rc;
+
+	/* Server-priority (SP) features */
+
+	/* Advertise that short seqnos are not supported (7.6.1) */
+	rc = __feat_register_sp(fn, DCCPF_SHORT_SEQNOS, true, true, &off, 1);
+	if (rc)
+		return rc;
 
-	INIT_LIST_HEAD(&dmsk->dccpms_pending);	/* XXX no longer used */
-	INIT_LIST_HEAD(&dmsk->dccpms_conf);	/* XXX no longer used */
+	/* RFC 4340 12.1: "If a DCCP is not ECN capable, ..." */
+	rc = __feat_register_sp(fn, DCCPF_ECN_INCAPABLE, true, true, &on, 1);
+	if (rc)
+		return rc;
+
+	/*
+	 * We advertise the available list of CCIDs and reorder according to
+	 * preferences, to avoid failure resulting from negotiating different
+	 * singleton values (which always leads to failure).
+	 * These settings can still (later) be overridden via sockopts.
+	 */
+	if (ccid_get_builtin_ccids(&tx.val, &tx.len) ||
+	    ccid_get_builtin_ccids(&rx.val, &rx.len))
+		return -ENOBUFS;
 
-	/* Ack ratio */
-	rc = __feat_register_nn(&dp->dccps_featneg, DCCPF_ACK_RATIO, 0,
-				dp->dccps_l_ack_ratio);
+	if (!dccp_feat_prefer(sysctl_dccp_feat_tx_ccid, tx.val, tx.len) ||
+	    !dccp_feat_prefer(sysctl_dccp_feat_rx_ccid, rx.val, rx.len))
+		goto free_ccid_lists;
+
+	rc = __feat_register_sp(fn, DCCPF_CCID, true, false, tx.val, tx.len);
+	if (rc)
+		goto free_ccid_lists;
+
+	rc = __feat_register_sp(fn, DCCPF_CCID, false, false, rx.val, rx.len);
+
+free_ccid_lists:
+	kfree(tx.val);
+	kfree(rx.val);
 	return rc;
 }
 
-EXPORT_SYMBOL_GPL(dccp_feat_init);
-
 int dccp_feat_activate_values(struct sock *sk, struct list_head *fn_list)
 {
 	struct dccp_sock *dp = dccp_sk(sk);
diff --git a/net/dccp/feat.h b/net/dccp/feat.h
index 9b46e2a7866e..5e7b8481cd04 100644
--- a/net/dccp/feat.h
+++ b/net/dccp/feat.h
@@ -113,13 +113,13 @@ static inline void dccp_feat_debug(const u8 type, const u8 feat, const u8 val)
 #define dccp_feat_debug(type, feat, val)
 #endif /* CONFIG_IP_DCCP_DEBUG */
 
+extern int  dccp_feat_init(struct sock *sk);
 extern int  dccp_feat_register_sp(struct sock *sk, u8 feat, u8 is_local,
 				  u8 const *list, u8 len);
 extern int  dccp_feat_register_nn(struct sock *sk, u8 feat, u64 val);
 extern int  dccp_feat_parse_options(struct sock *, struct dccp_request_sock *,
 				    u8 mand, u8 opt, u8 feat, u8 *val, u8 len);
 extern int  dccp_feat_clone_list(struct list_head const *, struct list_head *);
-extern int  dccp_feat_init(struct sock *sk);
 
 /*
  * Encoding variable-length options and their maximum length.
-- 
cgit v1.2.3


From 792b48780e8b6435d017cef4b5c304876a48653e Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Fri, 16 Jan 2009 23:36:31 +0000
Subject: dccp: Implement both feature-local and feature-remote Sequence Window
 feature

This adds full support for local/remote Sequence Window feature, from which the
  * sequence-number-validity (W) and
  * acknowledgment-number-validity (W') windows
derive as specified in RFC 4340, 7.5.3.

Specifically, the following is contained in this patch:
  * integrated new socket fields into dccp_sk;
  * updated the update_gsr/gss routines with regard to these fields;
  * updated handler code: the Sequence Window feature is located at the TX side,
    so the local feature is meant if the handler-rx flag is false;
  * the initialisation of `rcv_wnd' in reqsk is removed, since
    - rcv_wnd is not used by the code anywhere;
    - sequence number checks are not done in the LISTEN state (cf. 7.5.3);
    - dccp_check_req checks the Ack number validity more rigorously;
  * the `struct dccp_minisock' became empty and is now removed.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Acked-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/dccp.txt |  3 ++-
 include/linux/dccp.h              | 24 ++++--------------------
 net/dccp/dccp.h                   | 16 +++++++---------
 net/dccp/feat.c                   | 13 +++++++++++--
 net/dccp/minisocks.c              | 11 -----------
 net/dccp/proto.c                  |  2 --
 6 files changed, 24 insertions(+), 45 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/networking/dccp.txt b/Documentation/networking/dccp.txt
index 7a3bb1abb830..b132e4a3cf0f 100644
--- a/Documentation/networking/dccp.txt
+++ b/Documentation/networking/dccp.txt
@@ -141,7 +141,8 @@ rx_ccid = 2
 	Default CCID for the receiver-sender half-connection; see tx_ccid.
 
 seq_window = 100
-	The initial sequence window (sec. 7.5.2).
+	The initial sequence window (sec. 7.5.2) of the sender. This influences
+	the local ackno validity and the remote seqno validity windows (7.5.1).
 
 tx_qlen = 5
 	The size of the transmit buffer in packets. A value of 0 corresponds
diff --git a/include/linux/dccp.h b/include/linux/dccp.h
index 990e97fa1f07..7a0502ab383a 100644
--- a/include/linux/dccp.h
+++ b/include/linux/dccp.h
@@ -363,19 +363,6 @@ static inline unsigned int dccp_hdr_len(const struct sk_buff *skb)
 /* FIXME: for now we're default to 1 but it should really be 0 */
 #define DCCPF_INITIAL_SEND_NDP_COUNT		1
 
-/**
-  * struct dccp_minisock - Minimal DCCP connection representation
-  *
-  * Will be used to pass the state from dccp_request_sock to dccp_sock.
-  *
-  * @dccpms_sequence_window - Sequence Window Feature (section 7.5.2)
-  */
-struct dccp_minisock {
-	__u64			dccpms_sequence_window;
-};
-
-extern void dccp_minisock_init(struct dccp_minisock *dmsk);
-
 /**
  * struct dccp_request_sock  -  represent DCCP-specific connection request
  * @dreq_inet_rsk: structure inherited from
@@ -464,13 +451,14 @@ struct dccp_ackvec;
  * @dccps_timestamp_time - time of receiving latest @dccps_timestamp_echo
  * @dccps_l_ack_ratio - feature-local Ack Ratio
  * @dccps_r_ack_ratio - feature-remote Ack Ratio
+ * @dccps_l_seq_win - local Sequence Window (influences ack number validity)
+ * @dccps_r_seq_win - remote Sequence Window (influences seq number validity)
  * @dccps_pcslen - sender   partial checksum coverage (via sockopt)
  * @dccps_pcrlen - receiver partial checksum coverage (via sockopt)
  * @dccps_send_ndp_count - local Send NDP Count feature (7.7.2)
  * @dccps_ndp_count - number of Non Data Packets since last data packet
  * @dccps_mss_cache - current value of MSS (path MTU minus header sizes)
  * @dccps_rate_last - timestamp for rate-limiting DCCP-Sync (RFC 4340, 7.5.4)
- * @dccps_minisock - associated minisock (accessed via dccp_msk)
  * @dccps_featneg - tracks feature-negotiation state (mostly during handshake)
  * @dccps_hc_rx_ackvec - rx half connection ack vector
  * @dccps_hc_rx_ccid - CCID used for the receiver (or receiving half-connection)
@@ -504,12 +492,13 @@ struct dccp_sock {
 	__u32				dccps_timestamp_time;
 	__u16				dccps_l_ack_ratio;
 	__u16				dccps_r_ack_ratio;
+	__u64				dccps_l_seq_win:48;
+	__u64				dccps_r_seq_win:48;
 	__u8				dccps_pcslen:4;
 	__u8				dccps_pcrlen:4;
 	__u8				dccps_send_ndp_count:1;
 	__u64				dccps_ndp_count:48;
 	unsigned long			dccps_rate_last;
-	struct dccp_minisock		dccps_minisock;
 	struct list_head		dccps_featneg;
 	struct dccp_ackvec		*dccps_hc_rx_ackvec;
 	struct ccid			*dccps_hc_rx_ccid;
@@ -527,11 +516,6 @@ static inline struct dccp_sock *dccp_sk(const struct sock *sk)
 	return (struct dccp_sock *)sk;
 }
 
-static inline struct dccp_minisock *dccp_msk(const struct sock *sk)
-{
-	return (struct dccp_minisock *)&dccp_sk(sk)->dccps_minisock;
-}
-
 static inline const char *dccp_role(const struct sock *sk)
 {
 	switch (dccp_sk(sk)->dccps_role) {
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index f2230fc168e1..04ae91898a68 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -409,23 +409,21 @@ static inline void dccp_hdr_set_ack(struct dccp_hdr_ack_bits *dhack,
 static inline void dccp_update_gsr(struct sock *sk, u64 seq)
 {
 	struct dccp_sock *dp = dccp_sk(sk);
-	const struct dccp_minisock *dmsk = dccp_msk(sk);
 
 	dp->dccps_gsr = seq;
-	dccp_set_seqno(&dp->dccps_swl,
-		       dp->dccps_gsr + 1 - (dmsk->dccpms_sequence_window / 4));
-	dccp_set_seqno(&dp->dccps_swh,
-		       dp->dccps_gsr + (3 * dmsk->dccpms_sequence_window) / 4);
+	/* Sequence validity window depends on remote Sequence Window (7.5.1) */
+	dp->dccps_swl = SUB48(ADD48(dp->dccps_gsr, 1), dp->dccps_r_seq_win / 4);
+	dp->dccps_swh = ADD48(dp->dccps_gsr, (3 * dp->dccps_r_seq_win) / 4);
 }
 
 static inline void dccp_update_gss(struct sock *sk, u64 seq)
 {
 	struct dccp_sock *dp = dccp_sk(sk);
 
-	dp->dccps_awh = dp->dccps_gss = seq;
-	dccp_set_seqno(&dp->dccps_awl,
-		       (dp->dccps_gss -
-			dccp_msk(sk)->dccpms_sequence_window + 1));
+	dp->dccps_gss = seq;
+	/* Ack validity window depends on local Sequence Window value (7.5.1) */
+	dp->dccps_awl = SUB48(ADD48(dp->dccps_gss, 1), dp->dccps_l_seq_win);
+	dp->dccps_awh = dp->dccps_gss;
 }
 
 static inline int dccp_ack_pending(const struct sock *sk)
diff --git a/net/dccp/feat.c b/net/dccp/feat.c
index 67ffac9905f8..7303f79705d2 100644
--- a/net/dccp/feat.c
+++ b/net/dccp/feat.c
@@ -51,8 +51,17 @@ static int dccp_hdlr_ccid(struct sock *sk, u64 ccid, bool rx)
 
 static int dccp_hdlr_seq_win(struct sock *sk, u64 seq_win, bool rx)
 {
-	if (!rx)
-		dccp_msk(sk)->dccpms_sequence_window = seq_win;
+	struct dccp_sock *dp = dccp_sk(sk);
+
+	if (rx) {
+		dp->dccps_r_seq_win = seq_win;
+		/* propagate changes to update SWL/SWH */
+		dccp_update_gsr(sk, dp->dccps_gsr);
+	} else {
+		dp->dccps_l_seq_win = seq_win;
+		/* propagate changes to update AWL */
+		dccp_update_gss(sk, dp->dccps_gss);
+	}
 	return 0;
 }
 
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index 6821ae33dd37..5ca49cec95f5 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -42,11 +42,6 @@ struct inet_timewait_death_row dccp_death_row = {
 
 EXPORT_SYMBOL_GPL(dccp_death_row);
 
-void dccp_minisock_init(struct dccp_minisock *dmsk)
-{
-	dmsk->dccpms_sequence_window = sysctl_dccp_feat_sequence_window;
-}
-
 void dccp_time_wait(struct sock *sk, int state, int timeo)
 {
 	struct inet_timewait_sock *tw = NULL;
@@ -110,7 +105,6 @@ struct sock *dccp_create_openreq_child(struct sock *sk,
 		struct dccp_request_sock *dreq = dccp_rsk(req);
 		struct inet_connection_sock *newicsk = inet_csk(newsk);
 		struct dccp_sock *newdp = dccp_sk(newsk);
-		struct dccp_minisock *newdmsk = dccp_msk(newsk);
 
 		newdp->dccps_role	    = DCCP_ROLE_SERVER;
 		newdp->dccps_hc_rx_ackvec   = NULL;
@@ -128,10 +122,6 @@ struct sock *dccp_create_openreq_child(struct sock *sk,
 		 *    Initialize S.GAR := S.ISS
 		 *    Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookies
 		 */
-
-		/* See dccp_v4_conn_request */
-		newdmsk->dccpms_sequence_window = req->rcv_wnd;
-
 		newdp->dccps_gar = newdp->dccps_iss = dreq->dreq_iss;
 		dccp_update_gss(newsk, dreq->dreq_iss);
 
@@ -290,7 +280,6 @@ int dccp_reqsk_init(struct request_sock *req,
 	inet_rsk(req)->rmt_port	  = dccp_hdr(skb)->dccph_sport;
 	inet_rsk(req)->loc_port	  = dccp_hdr(skb)->dccph_dport;
 	inet_rsk(req)->acked	  = 0;
-	req->rcv_wnd		  = sysctl_dccp_feat_sequence_window;
 	dreq->dreq_timestamp_echo = 0;
 
 	/* inherit feature negotiation options from listening socket */
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 945b4d5d23b3..314a1b5c033c 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -174,8 +174,6 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
 	struct dccp_sock *dp = dccp_sk(sk);
 	struct inet_connection_sock *icsk = inet_csk(sk);
 
-	dccp_minisock_init(&dp->dccps_minisock);
-
 	icsk->icsk_rto		= DCCP_TIMEOUT_INIT;
 	icsk->icsk_syn_retries	= sysctl_dccp_request_retries;
 	sk->sk_state		= DCCP_CLOSED;
-- 
cgit v1.2.3


From 883ca833e5fb814fb03426c9d35e5489ce43e8da Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Fri, 16 Jan 2009 23:36:32 +0000
Subject: dccp: Initialisation and type-checking of feature sysctls

This patch takes care of initialising and type-checking sysctls
related to feature negotiation. Type checking is important since some
of the sysctls now directly impact the feature-negotiation process.

The sysctls are initialised with the known default values for each
feature.  For the type-checking the value constraints from RFC 4340
are used:

 * Sequence Window uses the specified Wmin=32, the maximum is ulong (4 bytes),
   tested and confirmed that it works up to 4294967295 - for Gbps speed;
 * Ack Ratio is between 0 .. 0xffff (2-byte unsigned integer);
 * CCIDs are between 0 .. 255;
 * request_retries, retries1, retries2 also between 0..255 for good measure;
 * tx_qlen is checked to be non-negative;
 * sync_ratelimit remains as before.

Notes:
------
 1. Die s@sysctl_dccp_feat@sysctl_dccp@g since the sysctls are now in feat.c.
 2. As pointed out by Arnaldo, the pattern of type-checking repeats itself in
    other places, sometimes with exactly the same kind of definitions (e.g.
    "static int zero;"). It may be a good idea (kernel janitors?) to consolidate
    type checking. For the sake of keeping the changeset small and in order not
    to affect other subsystems, I have not strived to generalise here.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Acked-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/dccp.h |  8 --------
 net/dccp/dccp.h      |  3 ---
 net/dccp/feat.c      | 11 ++++++++---
 net/dccp/feat.h      |  8 ++++++++
 net/dccp/options.c   |  4 ----
 net/dccp/sysctl.c    | 43 ++++++++++++++++++++++++++++++-------------
 6 files changed, 46 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dccp.h b/include/linux/dccp.h
index 7a0502ab383a..7434a8353e23 100644
--- a/include/linux/dccp.h
+++ b/include/linux/dccp.h
@@ -355,14 +355,6 @@ static inline unsigned int dccp_hdr_len(const struct sk_buff *skb)
 	return __dccp_hdr_len(dccp_hdr(skb));
 }
 
-
-/* initial values for each feature */
-#define DCCPF_INITIAL_SEQUENCE_WINDOW		100
-#define DCCPF_INITIAL_ACK_RATIO			2
-#define DCCPF_INITIAL_CCID			DCCPC_CCID2
-/* FIXME: for now we're default to 1 but it should really be 0 */
-#define DCCPF_INITIAL_SEND_NDP_COUNT		1
-
 /**
  * struct dccp_request_sock  -  represent DCCP-specific connection request
  * @dreq_inet_rsk: structure inherited from
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 04ae91898a68..44a5bc6f6785 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -95,9 +95,6 @@ extern void dccp_time_wait(struct sock *sk, int state, int timeo);
 extern int  sysctl_dccp_request_retries;
 extern int  sysctl_dccp_retries1;
 extern int  sysctl_dccp_retries2;
-extern int  sysctl_dccp_feat_sequence_window;
-extern int  sysctl_dccp_feat_rx_ccid;
-extern int  sysctl_dccp_feat_tx_ccid;
 extern int  sysctl_dccp_tx_qlen;
 extern int  sysctl_dccp_sync_ratelimit;
 
diff --git a/net/dccp/feat.c b/net/dccp/feat.c
index 7303f79705d2..12006e9b2472 100644
--- a/net/dccp/feat.c
+++ b/net/dccp/feat.c
@@ -25,6 +25,11 @@
 #include "ccid.h"
 #include "feat.h"
 
+/* feature-specific sysctls - initialised to the defaults from RFC 4340, 6.4 */
+unsigned long	sysctl_dccp_sequence_window __read_mostly = 100;
+int		sysctl_dccp_rx_ccid	    __read_mostly = 2,
+		sysctl_dccp_tx_ccid	    __read_mostly = 2;
+
 /*
  * Feature activation handlers.
  *
@@ -1146,7 +1151,7 @@ int dccp_feat_init(struct sock *sk)
 
 	/* Non-negotiable (NN) features */
 	rc = __feat_register_nn(fn, DCCPF_SEQUENCE_WINDOW, 0,
-				    sysctl_dccp_feat_sequence_window);
+				    sysctl_dccp_sequence_window);
 	if (rc)
 		return rc;
 
@@ -1172,8 +1177,8 @@ int dccp_feat_init(struct sock *sk)
 	    ccid_get_builtin_ccids(&rx.val, &rx.len))
 		return -ENOBUFS;
 
-	if (!dccp_feat_prefer(sysctl_dccp_feat_tx_ccid, tx.val, tx.len) ||
-	    !dccp_feat_prefer(sysctl_dccp_feat_rx_ccid, rx.val, rx.len))
+	if (!dccp_feat_prefer(sysctl_dccp_tx_ccid, tx.val, tx.len) ||
+	    !dccp_feat_prefer(sysctl_dccp_rx_ccid, rx.val, rx.len))
 		goto free_ccid_lists;
 
 	rc = __feat_register_sp(fn, DCCPF_CCID, true, false, tx.val, tx.len);
diff --git a/net/dccp/feat.h b/net/dccp/feat.h
index 5e7b8481cd04..40aa7a10bd5f 100644
--- a/net/dccp/feat.h
+++ b/net/dccp/feat.h
@@ -100,6 +100,13 @@ struct ccid_dependency {
 	u8	val;
 };
 
+/*
+ * Sysctls to seed defaults for feature negotiation
+ */
+extern unsigned long sysctl_dccp_sequence_window;
+extern int	     sysctl_dccp_rx_ccid;
+extern int	     sysctl_dccp_tx_ccid;
+
 #ifdef CONFIG_IP_DCCP_DEBUG
 extern const char *dccp_feat_typename(const u8 type);
 extern const char *dccp_feat_name(const u8 feat);
@@ -114,6 +121,7 @@ static inline void dccp_feat_debug(const u8 type, const u8 feat, const u8 val)
 #endif /* CONFIG_IP_DCCP_DEBUG */
 
 extern int  dccp_feat_init(struct sock *sk);
+extern void dccp_feat_initialise_sysctls(void);
 extern int  dccp_feat_register_sp(struct sock *sk, u8 feat, u8 is_local,
 				  u8 const *list, u8 len);
 extern int  dccp_feat_register_nn(struct sock *sk, u8 feat, u64 val);
diff --git a/net/dccp/options.c b/net/dccp/options.c
index 7b1165c21f51..3e2726c7182d 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -23,10 +23,6 @@
 #include "dccp.h"
 #include "feat.h"
 
-int sysctl_dccp_feat_sequence_window = DCCPF_INITIAL_SEQUENCE_WINDOW;
-int sysctl_dccp_feat_rx_ccid	      = DCCPF_INITIAL_CCID;
-int sysctl_dccp_feat_tx_ccid	      = DCCPF_INITIAL_CCID;
-
 u64 dccp_decode_value_var(const u8 *bf, const u8 len)
 {
 	u64 value = 0;
diff --git a/net/dccp/sysctl.c b/net/dccp/sysctl.c
index 018e210875e1..a5a1856234e7 100644
--- a/net/dccp/sysctl.c
+++ b/net/dccp/sysctl.c
@@ -18,55 +18,72 @@
 #error This file should not be compiled without CONFIG_SYSCTL defined
 #endif
 
+/* Boundary values */
+static int		zero     = 0,
+			u8_max   = 0xFF;
+static unsigned long	seqw_min = 32;
+
 static struct ctl_table dccp_default_table[] = {
 	{
 		.procname	= "seq_window",
-		.data		= &sysctl_dccp_feat_sequence_window,
-		.maxlen		= sizeof(sysctl_dccp_feat_sequence_window),
+		.data		= &sysctl_dccp_sequence_window,
+		.maxlen		= sizeof(sysctl_dccp_sequence_window),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
+		.proc_handler	= proc_doulongvec_minmax,
+		.extra1		= &seqw_min,		/* RFC 4340, 7.5.2 */
 	},
 	{
 		.procname	= "rx_ccid",
-		.data		= &sysctl_dccp_feat_rx_ccid,
-		.maxlen		= sizeof(sysctl_dccp_feat_rx_ccid),
+		.data		= &sysctl_dccp_rx_ccid,
+		.maxlen		= sizeof(sysctl_dccp_rx_ccid),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &zero,
+		.extra2		= &u8_max,		/* RFC 4340, 10. */
 	},
 	{
 		.procname	= "tx_ccid",
-		.data		= &sysctl_dccp_feat_tx_ccid,
-		.maxlen		= sizeof(sysctl_dccp_feat_tx_ccid),
+		.data		= &sysctl_dccp_tx_ccid,
+		.maxlen		= sizeof(sysctl_dccp_tx_ccid),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &zero,
+		.extra2		= &u8_max,		/* RFC 4340, 10. */
 	},
 	{
 		.procname	= "request_retries",
 		.data		= &sysctl_dccp_request_retries,
 		.maxlen		= sizeof(sysctl_dccp_request_retries),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &zero,
+		.extra2		= &u8_max,
 	},
 	{
 		.procname	= "retries1",
 		.data		= &sysctl_dccp_retries1,
 		.maxlen		= sizeof(sysctl_dccp_retries1),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &zero,
+		.extra2		= &u8_max,
 	},
 	{
 		.procname	= "retries2",
 		.data		= &sysctl_dccp_retries2,
 		.maxlen		= sizeof(sysctl_dccp_retries2),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &zero,
+		.extra2		= &u8_max,
 	},
 	{
 		.procname	= "tx_qlen",
 		.data		= &sysctl_dccp_tx_qlen,
 		.maxlen		= sizeof(sysctl_dccp_tx_qlen),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &zero,
 	},
 	{
 		.procname	= "sync_ratelimit",
-- 
cgit v1.2.3


From 9f4d26d0f3016cf8813977d624751b94465fa317 Mon Sep 17 00:00:00 2001
From: Mark McLoughlin <markmc@redhat.com>
Date: Mon, 19 Jan 2009 17:09:49 -0800
Subject: virtio_net: add link status handling

Allow the host to inform us that the link is down by adding
a VIRTIO_NET_F_STATUS which indicates that device status is
available in virtio_net config.

This is currently useful for simulating link down conditions
(e.g. using proposed qemu 'set_link' monitor command) but
would also be needed if we were to support device assignment
via virtio.

Signed-off-by: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (added future masking)
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/virtio_net.c   | 43 ++++++++++++++++++++++++++++++++++++++++++-
 include/linux/virtio_net.h |  5 +++++
 2 files changed, 47 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 30ae6d9a12af..9b33d6ebf542 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -42,6 +42,7 @@ struct virtnet_info
 	struct virtqueue *rvq, *svq;
 	struct net_device *dev;
 	struct napi_struct napi;
+	unsigned int status;
 
 	/* The skb we couldn't send because buffers were full. */
 	struct sk_buff *last_xmit_skb;
@@ -611,6 +612,7 @@ static struct ethtool_ops virtnet_ethtool_ops = {
 	.set_tx_csum = virtnet_set_tx_csum,
 	.set_sg = ethtool_op_set_sg,
 	.set_tso = ethtool_op_set_tso,
+	.get_link = ethtool_op_get_link,
 };
 
 #define MIN_MTU 68
@@ -636,6 +638,41 @@ static const struct net_device_ops virtnet_netdev = {
 #endif
 };
 
+static void virtnet_update_status(struct virtnet_info *vi)
+{
+	u16 v;
+
+	if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS))
+		return;
+
+	vi->vdev->config->get(vi->vdev,
+			      offsetof(struct virtio_net_config, status),
+			      &v, sizeof(v));
+
+	/* Ignore unknown (future) status bits */
+	v &= VIRTIO_NET_S_LINK_UP;
+
+	if (vi->status == v)
+		return;
+
+	vi->status = v;
+
+	if (vi->status & VIRTIO_NET_S_LINK_UP) {
+		netif_carrier_on(vi->dev);
+		netif_wake_queue(vi->dev);
+	} else {
+		netif_carrier_off(vi->dev);
+		netif_stop_queue(vi->dev);
+	}
+}
+
+static void virtnet_config_changed(struct virtio_device *vdev)
+{
+	struct virtnet_info *vi = vdev->priv;
+
+	virtnet_update_status(vi);
+}
+
 static int virtnet_probe(struct virtio_device *vdev)
 {
 	int err;
@@ -738,6 +775,9 @@ static int virtnet_probe(struct virtio_device *vdev)
 		goto unregister;
 	}
 
+	vi->status = VIRTIO_NET_S_LINK_UP;
+	virtnet_update_status(vi);
+
 	pr_debug("virtnet: registered device %s\n", dev->name);
 	return 0;
 
@@ -793,7 +833,7 @@ static unsigned int features[] = {
 	VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6,
 	VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6,
 	VIRTIO_NET_F_GUEST_ECN, /* We don't yet handle UFO input. */
-	VIRTIO_NET_F_MRG_RXBUF,
+	VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS,
 	VIRTIO_F_NOTIFY_ON_EMPTY,
 };
 
@@ -805,6 +845,7 @@ static struct virtio_driver virtio_net = {
 	.id_table =	id_table,
 	.probe =	virtnet_probe,
 	.remove =	__devexit_p(virtnet_remove),
+	.config_changed = virtnet_config_changed,
 };
 
 static int __init init(void)
diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
index 5cdd0aa8bde9..f76bd4a753ef 100644
--- a/include/linux/virtio_net.h
+++ b/include/linux/virtio_net.h
@@ -21,11 +21,16 @@
 #define VIRTIO_NET_F_HOST_ECN	13	/* Host can handle TSO[6] w/ ECN in. */
 #define VIRTIO_NET_F_HOST_UFO	14	/* Host can handle UFO in. */
 #define VIRTIO_NET_F_MRG_RXBUF	15	/* Host can merge receive buffers. */
+#define VIRTIO_NET_F_STATUS	16	/* virtio_net_config.status available */
+
+#define VIRTIO_NET_S_LINK_UP	1	/* Link is up */
 
 struct virtio_net_config
 {
 	/* The config defining mac address (if VIRTIO_NET_F_MAC) */
 	__u8 mac[6];
+	/* See VIRTIO_NET_F_STATUS and VIRTIO_NET_S_* above */
+	__u16 status;
 } __attribute__((packed));
 
 /* This is the first element of the scatter-gather list.  If you don't
-- 
cgit v1.2.3


From 273ec51dd7ceaa76e038875d85061ec856d8905e Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Wed, 21 Jan 2009 15:55:35 -0800
Subject: net: ppp_generic - introduce net-namespace functionality v2

- Each namespace contains ppp channels and units separately
  with appropriate locks

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ppp_generic.c   | 275 +++++++++++++++++++++++++++++++-------------
 include/linux/ppp_channel.h |   4 +
 2 files changed, 202 insertions(+), 77 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ppp_generic.c b/drivers/net/ppp_generic.c
index 7b2728b8f1b7..4405a76ed3da 100644
--- a/drivers/net/ppp_generic.c
+++ b/drivers/net/ppp_generic.c
@@ -49,6 +49,10 @@
 #include <net/slhc_vj.h>
 #include <asm/atomic.h>
 
+#include <linux/nsproxy.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+
 #define PPP_VERSION	"2.4.2"
 
 /*
@@ -131,6 +135,7 @@ struct ppp {
 	struct sock_filter *active_filter;/* filter for pkts to reset idle */
 	unsigned pass_len, active_len;
 #endif /* CONFIG_PPP_FILTER */
+	struct net	*ppp_net;	/* the net we belong to */
 };
 
 /*
@@ -155,6 +160,7 @@ struct channel {
 	struct rw_semaphore chan_sem;	/* protects `chan' during chan ioctl */
 	spinlock_t	downl;		/* protects `chan', file.xq dequeue */
 	struct ppp	*ppp;		/* ppp unit we're connected to */
+	struct net	*chan_net;	/* the net channel belongs to */
 	struct list_head clist;		/* link in list of channels per unit */
 	rwlock_t	upl;		/* protects `ppp' */
 #ifdef CONFIG_PPP_MULTILINK
@@ -173,26 +179,35 @@ struct channel {
  * channel.downl.
  */
 
-/*
- * all_ppp_mutex protects the all_ppp_units mapping.
- * It also ensures that finding a ppp unit in the all_ppp_units map
- * and updating its file.refcnt field is atomic.
- */
-static DEFINE_MUTEX(all_ppp_mutex);
 static atomic_t ppp_unit_count = ATOMIC_INIT(0);
-static DEFINE_IDR(ppp_units_idr);
-
-/*
- * all_channels_lock protects all_channels and last_channel_index,
- * and the atomicity of find a channel and updating its file.refcnt
- * field.
- */
-static DEFINE_SPINLOCK(all_channels_lock);
-static LIST_HEAD(all_channels);
-static LIST_HEAD(new_channels);
-static int last_channel_index;
 static atomic_t channel_count = ATOMIC_INIT(0);
 
+/* per-net private data for this module */
+static unsigned int ppp_net_id;
+struct ppp_net {
+	/* units to ppp mapping */
+	struct idr units_idr;
+
+	/*
+	 * all_ppp_mutex protects the units_idr mapping.
+	 * It also ensures that finding a ppp unit in the units_idr
+	 * map and updating its file.refcnt field is atomic.
+	 */
+	struct mutex all_ppp_mutex;
+
+	/* channels */
+	struct list_head all_channels;
+	struct list_head new_channels;
+	int last_channel_index;
+
+	/*
+	 * all_channels_lock protects all_channels and
+	 * last_channel_index, and the atomicity of find
+	 * a channel and updating its file.refcnt field.
+	 */
+	spinlock_t all_channels_lock;
+};
+
 /* Get the PPP protocol number from a skb */
 #define PPP_PROTO(skb)	(((skb)->data[0] << 8) + (skb)->data[1])
 
@@ -216,8 +231,8 @@ static atomic_t channel_count = ATOMIC_INIT(0);
 #define seq_after(a, b)		((s32)((a) - (b)) > 0)
 
 /* Prototypes. */
-static int ppp_unattached_ioctl(struct ppp_file *pf, struct file *file,
-				unsigned int cmd, unsigned long arg);
+static int ppp_unattached_ioctl(struct net *net, struct ppp_file *pf,
+			struct file *file, unsigned int cmd, unsigned long arg);
 static void ppp_xmit_process(struct ppp *ppp);
 static void ppp_send_frame(struct ppp *ppp, struct sk_buff *skb);
 static void ppp_push(struct ppp *ppp);
@@ -240,12 +255,12 @@ static void ppp_ccp_peek(struct ppp *ppp, struct sk_buff *skb, int inbound);
 static void ppp_ccp_closed(struct ppp *ppp);
 static struct compressor *find_compressor(int type);
 static void ppp_get_stats(struct ppp *ppp, struct ppp_stats *st);
-static struct ppp *ppp_create_interface(int unit, int *retp);
+static struct ppp *ppp_create_interface(struct net *net, int unit, int *retp);
 static void init_ppp_file(struct ppp_file *pf, int kind);
 static void ppp_shutdown_interface(struct ppp *ppp);
 static void ppp_destroy_interface(struct ppp *ppp);
-static struct ppp *ppp_find_unit(int unit);
-static struct channel *ppp_find_channel(int unit);
+static struct ppp *ppp_find_unit(struct ppp_net *pn, int unit);
+static struct channel *ppp_find_channel(struct ppp_net *pn, int unit);
 static int ppp_connect_channel(struct channel *pch, int unit);
 static int ppp_disconnect_channel(struct channel *pch);
 static void ppp_destroy_channel(struct channel *pch);
@@ -256,6 +271,14 @@ static void *unit_find(struct idr *p, int n);
 
 static struct class *ppp_class;
 
+/* per net-namespace data */
+static inline struct ppp_net *ppp_pernet(struct net *net)
+{
+	BUG_ON(!net);
+
+	return net_generic(net, ppp_net_id);
+}
+
 /* Translates a PPP protocol number to a NP index (NP == network protocol) */
 static inline int proto_to_npindex(int proto)
 {
@@ -544,7 +567,8 @@ static long ppp_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	int __user *p = argp;
 
 	if (!pf)
-		return ppp_unattached_ioctl(pf, file, cmd, arg);
+		return ppp_unattached_ioctl(current->nsproxy->net_ns,
+					pf, file, cmd, arg);
 
 	if (cmd == PPPIOCDETACH) {
 		/*
@@ -763,12 +787,13 @@ static long ppp_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	return err;
 }
 
-static int ppp_unattached_ioctl(struct ppp_file *pf, struct file *file,
-				unsigned int cmd, unsigned long arg)
+static int ppp_unattached_ioctl(struct net *net, struct ppp_file *pf,
+			struct file *file, unsigned int cmd, unsigned long arg)
 {
 	int unit, err = -EFAULT;
 	struct ppp *ppp;
 	struct channel *chan;
+	struct ppp_net *pn;
 	int __user *p = (int __user *)arg;
 
 	lock_kernel();
@@ -777,7 +802,7 @@ static int ppp_unattached_ioctl(struct ppp_file *pf, struct file *file,
 		/* Create a new ppp unit */
 		if (get_user(unit, p))
 			break;
-		ppp = ppp_create_interface(unit, &err);
+		ppp = ppp_create_interface(net, unit, &err);
 		if (!ppp)
 			break;
 		file->private_data = &ppp->file;
@@ -792,29 +817,31 @@ static int ppp_unattached_ioctl(struct ppp_file *pf, struct file *file,
 		/* Attach to an existing ppp unit */
 		if (get_user(unit, p))
 			break;
-		mutex_lock(&all_ppp_mutex);
 		err = -ENXIO;
-		ppp = ppp_find_unit(unit);
+		pn = ppp_pernet(net);
+		mutex_lock(&pn->all_ppp_mutex);
+		ppp = ppp_find_unit(pn, unit);
 		if (ppp) {
 			atomic_inc(&ppp->file.refcnt);
 			file->private_data = &ppp->file;
 			err = 0;
 		}
-		mutex_unlock(&all_ppp_mutex);
+		mutex_unlock(&pn->all_ppp_mutex);
 		break;
 
 	case PPPIOCATTCHAN:
 		if (get_user(unit, p))
 			break;
-		spin_lock_bh(&all_channels_lock);
 		err = -ENXIO;
-		chan = ppp_find_channel(unit);
+		pn = ppp_pernet(net);
+		spin_lock_bh(&pn->all_channels_lock);
+		chan = ppp_find_channel(pn, unit);
 		if (chan) {
 			atomic_inc(&chan->file.refcnt);
 			file->private_data = &chan->file;
 			err = 0;
 		}
-		spin_unlock_bh(&all_channels_lock);
+		spin_unlock_bh(&pn->all_channels_lock);
 		break;
 
 	default:
@@ -834,6 +861,51 @@ static const struct file_operations ppp_device_fops = {
 	.release	= ppp_release
 };
 
+static __net_init int ppp_init_net(struct net *net)
+{
+	struct ppp_net *pn;
+	int err;
+
+	pn = kzalloc(sizeof(*pn), GFP_KERNEL);
+	if (!pn)
+		return -ENOMEM;
+
+	idr_init(&pn->units_idr);
+	mutex_init(&pn->all_ppp_mutex);
+
+	INIT_LIST_HEAD(&pn->all_channels);
+	INIT_LIST_HEAD(&pn->new_channels);
+
+	spin_lock_init(&pn->all_channels_lock);
+
+	err = net_assign_generic(net, ppp_net_id, pn);
+	if (err) {
+		kfree(pn);
+		return err;
+	}
+
+	return 0;
+}
+
+static __net_exit void ppp_exit_net(struct net *net)
+{
+	struct ppp_net *pn;
+
+	pn = net_generic(net, ppp_net_id);
+	idr_destroy(&pn->units_idr);
+	/*
+	 * if someone has cached our net then
+	 * further net_generic call will return NULL
+	 */
+	net_assign_generic(net, ppp_net_id, NULL);
+	kfree(pn);
+}
+
+static __net_initdata struct pernet_operations ppp_net_ops = {
+	.init = ppp_init_net,
+	.exit = ppp_exit_net,
+};
+
 #define PPP_MAJOR	108
 
 /* Called at boot time if ppp is compiled into the kernel,
@@ -843,25 +915,36 @@ static int __init ppp_init(void)
 	int err;
 
 	printk(KERN_INFO "PPP generic driver version " PPP_VERSION "\n");
-	err = register_chrdev(PPP_MAJOR, "ppp", &ppp_device_fops);
-	if (!err) {
-		ppp_class = class_create(THIS_MODULE, "ppp");
-		if (IS_ERR(ppp_class)) {
-			err = PTR_ERR(ppp_class);
-			goto out_chrdev;
-		}
-		device_create(ppp_class, NULL, MKDEV(PPP_MAJOR, 0), NULL,
-			      "ppp");
+
+	err = register_pernet_gen_device(&ppp_net_id, &ppp_net_ops);
+	if (err) {
+		printk(KERN_ERR "failed to register PPP pernet device (%d)\n", err);
+		goto out;
 	}
 
-out:
-	if (err)
+	err = register_chrdev(PPP_MAJOR, "ppp", &ppp_device_fops);
+	if (err) {
 		printk(KERN_ERR "failed to register PPP device (%d)\n", err);
-	return err;
+		goto out_net;
+	}
+
+	ppp_class = class_create(THIS_MODULE, "ppp");
+	if (IS_ERR(ppp_class)) {
+		err = PTR_ERR(ppp_class);
+		goto out_chrdev;
+	}
+
+	/* not a big deal if we fail here :-) */
+	device_create(ppp_class, NULL, MKDEV(PPP_MAJOR, 0), NULL, "ppp");
+
+	return 0;
 
 out_chrdev:
 	unregister_chrdev(PPP_MAJOR, "ppp");
-	goto out;
+out_net:
+	unregister_pernet_gen_device(ppp_net_id, &ppp_net_ops);
+out:
+	return err;
 }
 
 /*
@@ -969,6 +1052,7 @@ static void ppp_setup(struct net_device *dev)
 	dev->tx_queue_len = 3;
 	dev->type = ARPHRD_PPP;
 	dev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST;
+	dev->features |= NETIF_F_NETNS_LOCAL;
 }
 
 /*
@@ -1986,19 +2070,27 @@ ppp_mp_reconstruct(struct ppp *ppp)
  * Channel interface.
  */
 
-/*
- * Create a new, unattached ppp channel.
- */
-int
-ppp_register_channel(struct ppp_channel *chan)
+/* Create a new, unattached ppp channel. */
+int ppp_register_channel(struct ppp_channel *chan)
+{
+	return ppp_register_net_channel(current->nsproxy->net_ns, chan);
+}
+
+/* Create a new, unattached ppp channel for specified net. */
+int ppp_register_net_channel(struct net *net, struct ppp_channel *chan)
 {
 	struct channel *pch;
+	struct ppp_net *pn;
 
 	pch = kzalloc(sizeof(struct channel), GFP_KERNEL);
 	if (!pch)
 		return -ENOMEM;
+
+	pn = ppp_pernet(net);
+
 	pch->ppp = NULL;
 	pch->chan = chan;
+	pch->chan_net = net;
 	chan->ppp = pch;
 	init_ppp_file(&pch->file, CHANNEL);
 	pch->file.hdrlen = chan->hdrlen;
@@ -2008,11 +2100,13 @@ ppp_register_channel(struct ppp_channel *chan)
 	init_rwsem(&pch->chan_sem);
 	spin_lock_init(&pch->downl);
 	rwlock_init(&pch->upl);
-	spin_lock_bh(&all_channels_lock);
-	pch->file.index = ++last_channel_index;
-	list_add(&pch->list, &new_channels);
+
+	spin_lock_bh(&pn->all_channels_lock);
+	pch->file.index = ++pn->last_channel_index;
+	list_add(&pch->list, &pn->new_channels);
 	atomic_inc(&channel_count);
-	spin_unlock_bh(&all_channels_lock);
+	spin_unlock_bh(&pn->all_channels_lock);
+
 	return 0;
 }
 
@@ -2053,9 +2147,11 @@ void
 ppp_unregister_channel(struct ppp_channel *chan)
 {
 	struct channel *pch = chan->ppp;
+	struct ppp_net *pn;
 
 	if (!pch)
 		return;		/* should never happen */
+
 	chan->ppp = NULL;
 
 	/*
@@ -2068,9 +2164,12 @@ ppp_unregister_channel(struct ppp_channel *chan)
 	spin_unlock_bh(&pch->downl);
 	up_write(&pch->chan_sem);
 	ppp_disconnect_channel(pch);
-	spin_lock_bh(&all_channels_lock);
+
+	pn = ppp_pernet(pch->chan_net);
+	spin_lock_bh(&pn->all_channels_lock);
 	list_del(&pch->list);
-	spin_unlock_bh(&all_channels_lock);
+	spin_unlock_bh(&pn->all_channels_lock);
+
 	pch->file.dead = 1;
 	wake_up_interruptible(&pch->file.rwait);
 	if (atomic_dec_and_test(&pch->file.refcnt))
@@ -2395,9 +2494,10 @@ ppp_get_stats(struct ppp *ppp, struct ppp_stats *st)
  * unit == -1 means allocate a new number.
  */
 static struct ppp *
-ppp_create_interface(int unit, int *retp)
+ppp_create_interface(struct net *net, int unit, int *retp)
 {
 	struct ppp *ppp;
+	struct ppp_net *pn;
 	struct net_device *dev = NULL;
 	int ret = -ENOMEM;
 	int i;
@@ -2406,6 +2506,8 @@ ppp_create_interface(int unit, int *retp)
 	if (!dev)
 		goto out1;
 
+	pn = ppp_pernet(net);
+
 	ppp = netdev_priv(dev);
 	ppp->dev = dev;
 	ppp->mru = PPP_MRU;
@@ -2421,17 +2523,23 @@ ppp_create_interface(int unit, int *retp)
 	skb_queue_head_init(&ppp->mrq);
 #endif /* CONFIG_PPP_MULTILINK */
 
+	/*
+	 * drum roll: don't forget to set
+	 * the net device is belong to
+	 */
+	dev_net_set(dev, net);
+
 	ret = -EEXIST;
-	mutex_lock(&all_ppp_mutex);
+	mutex_lock(&pn->all_ppp_mutex);
 
 	if (unit < 0) {
-		unit = unit_get(&ppp_units_idr, ppp);
+		unit = unit_get(&pn->units_idr, ppp);
 		if (unit < 0) {
 			*retp = unit;
 			goto out2;
 		}
 	} else {
-		if (unit_find(&ppp_units_idr, unit))
+		if (unit_find(&pn->units_idr, unit))
 			goto out2; /* unit already exists */
 		/*
 		 * if caller need a specified unit number
@@ -2442,7 +2550,7 @@ ppp_create_interface(int unit, int *retp)
 		 * fair but at least pppd will ask us to allocate
 		 * new unit in this case so user is happy :)
 		 */
-		unit = unit_set(&ppp_units_idr, ppp, unit);
+		unit = unit_set(&pn->units_idr, ppp, unit);
 		if (unit < 0)
 			goto out2;
 	}
@@ -2453,20 +2561,22 @@ ppp_create_interface(int unit, int *retp)
 
 	ret = register_netdev(dev);
 	if (ret != 0) {
-		unit_put(&ppp_units_idr, unit);
+		unit_put(&pn->units_idr, unit);
 		printk(KERN_ERR "PPP: couldn't register device %s (%d)\n",
 		       dev->name, ret);
 		goto out2;
 	}
 
+	ppp->ppp_net = net;
+
 	atomic_inc(&ppp_unit_count);
-	mutex_unlock(&all_ppp_mutex);
+	mutex_unlock(&pn->all_ppp_mutex);
 
 	*retp = 0;
 	return ppp;
 
 out2:
-	mutex_unlock(&all_ppp_mutex);
+	mutex_unlock(&pn->all_ppp_mutex);
 	free_netdev(dev);
 out1:
 	*retp = ret;
@@ -2492,7 +2602,11 @@ init_ppp_file(struct ppp_file *pf, int kind)
  */
 static void ppp_shutdown_interface(struct ppp *ppp)
 {
-	mutex_lock(&all_ppp_mutex);
+	struct ppp_net *pn;
+
+	pn = ppp_pernet(ppp->ppp_net);
+	mutex_lock(&pn->all_ppp_mutex);
+
 	/* This will call dev_close() for us. */
 	ppp_lock(ppp);
 	if (!ppp->closing) {
@@ -2502,11 +2616,12 @@ static void ppp_shutdown_interface(struct ppp *ppp)
 	} else
 		ppp_unlock(ppp);
 
-	unit_put(&ppp_units_idr, ppp->file.index);
+	unit_put(&pn->units_idr, ppp->file.index);
 	ppp->file.dead = 1;
 	ppp->owner = NULL;
 	wake_up_interruptible(&ppp->file.rwait);
-	mutex_unlock(&all_ppp_mutex);
+
+	mutex_unlock(&pn->all_ppp_mutex);
 }
 
 /*
@@ -2554,9 +2669,9 @@ static void ppp_destroy_interface(struct ppp *ppp)
  * The caller should have locked the all_ppp_mutex.
  */
 static struct ppp *
-ppp_find_unit(int unit)
+ppp_find_unit(struct ppp_net *pn, int unit)
 {
-	return unit_find(&ppp_units_idr, unit);
+	return unit_find(&pn->units_idr, unit);
 }
 
 /*
@@ -2568,20 +2683,22 @@ ppp_find_unit(int unit)
  * when we have a lot of channels in use.
  */
 static struct channel *
-ppp_find_channel(int unit)
+ppp_find_channel(struct ppp_net *pn, int unit)
 {
 	struct channel *pch;
 
-	list_for_each_entry(pch, &new_channels, list) {
+	list_for_each_entry(pch, &pn->new_channels, list) {
 		if (pch->file.index == unit) {
-			list_move(&pch->list, &all_channels);
+			list_move(&pch->list, &pn->all_channels);
 			return pch;
 		}
 	}
-	list_for_each_entry(pch, &all_channels, list) {
+
+	list_for_each_entry(pch, &pn->all_channels, list) {
 		if (pch->file.index == unit)
 			return pch;
 	}
+
 	return NULL;
 }
 
@@ -2592,11 +2709,14 @@ static int
 ppp_connect_channel(struct channel *pch, int unit)
 {
 	struct ppp *ppp;
+	struct ppp_net *pn;
 	int ret = -ENXIO;
 	int hdrlen;
 
-	mutex_lock(&all_ppp_mutex);
-	ppp = ppp_find_unit(unit);
+	pn = ppp_pernet(pch->chan_net);
+
+	mutex_lock(&pn->all_ppp_mutex);
+	ppp = ppp_find_unit(pn, unit);
 	if (!ppp)
 		goto out;
 	write_lock_bh(&pch->upl);
@@ -2620,7 +2740,7 @@ ppp_connect_channel(struct channel *pch, int unit)
  outl:
 	write_unlock_bh(&pch->upl);
  out:
-	mutex_unlock(&all_ppp_mutex);
+	mutex_unlock(&pn->all_ppp_mutex);
 	return ret;
 }
 
@@ -2677,7 +2797,7 @@ static void __exit ppp_cleanup(void)
 	unregister_chrdev(PPP_MAJOR, "ppp");
 	device_destroy(ppp_class, MKDEV(PPP_MAJOR, 0));
 	class_destroy(ppp_class);
-	idr_destroy(&ppp_units_idr);
+	unregister_pernet_gen_device(ppp_net_id, &ppp_net_ops);
 }
 
 /*
@@ -2743,6 +2863,7 @@ static void *unit_find(struct idr *p, int n)
 module_init(ppp_init);
 module_exit(ppp_cleanup);
 
+EXPORT_SYMBOL(ppp_register_net_channel);
 EXPORT_SYMBOL(ppp_register_channel);
 EXPORT_SYMBOL(ppp_unregister_channel);
 EXPORT_SYMBOL(ppp_channel_index);
diff --git a/include/linux/ppp_channel.h b/include/linux/ppp_channel.h
index a942892d6dfe..9d64bdf14770 100644
--- a/include/linux/ppp_channel.h
+++ b/include/linux/ppp_channel.h
@@ -22,6 +22,7 @@
 #include <linux/list.h>
 #include <linux/skbuff.h>
 #include <linux/poll.h>
+#include <net/net_namespace.h>
 
 struct ppp_channel;
 
@@ -56,6 +57,9 @@ extern void ppp_input(struct ppp_channel *, struct sk_buff *);
    that we may have missed a packet. */
 extern void ppp_input_error(struct ppp_channel *, int code);
 
+/* Attach a channel to a given PPP unit in specified net. */
+extern int ppp_register_net_channel(struct net *, struct ppp_channel *);
+
 /* Attach a channel to a given PPP unit. */
 extern int ppp_register_channel(struct ppp_channel *);
 
-- 
cgit v1.2.3


From d52a61c04c6c0814ca270a088feedb126436598e Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Thu, 22 Jan 2009 00:38:56 -0800
Subject: irq: clean up irq stat methods

David Miller suggested, related to a kstat_irqs related build breakage:

> Either linux/kernel_stat.h provides the kstat_incr_irqs_this_cpu
> interface or linux/irq.h does, not both.

So move them to kernel_stat.h.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/irq.h         | 6 ------
 include/linux/kernel_stat.h | 9 ++++++---
 2 files changed, 6 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index e9a878978c85..48901e9a33b9 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -202,12 +202,6 @@ extern struct irq_desc irq_desc[NR_IRQS];
 extern struct irq_desc *move_irq_desc(struct irq_desc *old_desc, int cpu);
 #endif /* CONFIG_SPARSE_IRQ */
 
-#define kstat_irqs_this_cpu(DESC) \
-	((DESC)->kstat_irqs[smp_processor_id()])
-#define kstat_incr_irqs_this_cpu(irqno, DESC) \
-	((DESC)->kstat_irqs[smp_processor_id()]++)
-
-
 extern struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu);
 
 static inline struct irq_desc *
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index a3431b164bea..0c8b89f28a95 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -52,16 +52,19 @@ static inline void kstat_incr_irqs_this_cpu(unsigned int irq,
 {
 	kstat_this_cpu.irqs[irq]++;
 }
-#endif
-
 
-#ifndef CONFIG_GENERIC_HARDIRQS
 static inline unsigned int kstat_irqs_cpu(unsigned int irq, int cpu)
 {
        return kstat_cpu(cpu).irqs[irq];
 }
 #else
+#include <linux/irq.h>
 extern unsigned int kstat_irqs_cpu(unsigned int irq, int cpu);
+#define kstat_irqs_this_cpu(DESC) \
+	((DESC)->kstat_irqs[smp_processor_id()])
+#define kstat_incr_irqs_this_cpu(irqno, DESC) \
+	((DESC)->kstat_irqs[smp_processor_id()]++)
+
 #endif
 
 /*
-- 
cgit v1.2.3


From 5c0a66f5f3c9c59e2c341400048e2cff768e67a9 Mon Sep 17 00:00:00 2001
From: Benjamin Thery <benjamin.thery@bull.net>
Date: Thu, 22 Jan 2009 04:56:17 +0000
Subject: netns: ipmr: store netns in struct mfc_cache

This patch stores into struct mfc_cache the network namespace each
mfc_cache belongs to. The new member is mfc_net.

mfc_net is assigned at cache allocation and doesn't change during
the rest of the cache entry life.
A new net parameter is added to ipmr_cache_alloc/ipmr_cache_alloc_unres.

This will help to retrieve the current netns around the IPv4 multicast
routing code.

At the moment, all mfc_cache are allocated in init_net.

Signed-off-by: Benjamin Thery <benjamin.thery@bull.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mroute.h | 15 +++++++++++++++
 net/ipv4/ipmr.c        | 26 +++++++++++++++++---------
 2 files changed, 32 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mroute.h b/include/linux/mroute.h
index 8a455694d682..beaf3aae8aaf 100644
--- a/include/linux/mroute.h
+++ b/include/linux/mroute.h
@@ -193,6 +193,9 @@ struct vif_device
 struct mfc_cache 
 {
 	struct mfc_cache *next;			/* Next entry on cache line 	*/
+#ifdef CONFIG_NET_NS
+	struct net *mfc_net;
+#endif
 	__be32 mfc_mcastgrp;			/* Group the entry belongs to 	*/
 	__be32 mfc_origin;			/* Source of packet 		*/
 	vifi_t mfc_parent;			/* Source interface		*/
@@ -215,6 +218,18 @@ struct mfc_cache
 	} mfc_un;
 };
 
+static inline
+struct net *mfc_net(const struct mfc_cache *mfc)
+{
+	return read_pnet(&mfc->mfc_net);
+}
+
+static inline
+void mfc_net_set(struct mfc_cache *mfc, struct net *net)
+{
+	write_pnet(&mfc->mfc_net, hold_net(net));
+}
+
 #define MFC_STATIC		1
 #define MFC_NOTIFY		2
 
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 75a5f79cc226..8428a0fb5c10 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -327,6 +327,12 @@ static int vif_delete(int vifi, int notify)
 	return 0;
 }
 
+static inline void ipmr_cache_free(struct mfc_cache *c)
+{
+	release_net(mfc_net(c));
+	kmem_cache_free(mrt_cachep, c);
+}
+
 /* Destroy an unresolved cache entry, killing queued skbs
    and reporting error to netlink readers.
  */
@@ -353,7 +359,7 @@ static void ipmr_destroy_unres(struct mfc_cache *c)
 			kfree_skb(skb);
 	}
 
-	kmem_cache_free(mrt_cachep, c);
+	ipmr_cache_free(c);
 }
 
 
@@ -528,22 +534,24 @@ static struct mfc_cache *ipmr_cache_find(__be32 origin, __be32 mcastgrp)
 /*
  *	Allocate a multicast cache entry
  */
-static struct mfc_cache *ipmr_cache_alloc(void)
+static struct mfc_cache *ipmr_cache_alloc(struct net *net)
 {
 	struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
 	if (c == NULL)
 		return NULL;
 	c->mfc_un.res.minvif = MAXVIFS;
+	mfc_net_set(c, net);
 	return c;
 }
 
-static struct mfc_cache *ipmr_cache_alloc_unres(void)
+static struct mfc_cache *ipmr_cache_alloc_unres(struct net *net)
 {
 	struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
 	if (c == NULL)
 		return NULL;
 	skb_queue_head_init(&c->mfc_un.unres.unresolved);
 	c->mfc_un.unres.expires = jiffies + 10*HZ;
+	mfc_net_set(c, net);
 	return c;
 }
 
@@ -695,7 +703,7 @@ ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb)
 		 */
 
 		if (atomic_read(&cache_resolve_queue_len) >= 10 ||
-		    (c=ipmr_cache_alloc_unres())==NULL) {
+		    (c = ipmr_cache_alloc_unres(&init_net)) == NULL) {
 			spin_unlock_bh(&mfc_unres_lock);
 
 			kfree_skb(skb);
@@ -718,7 +726,7 @@ ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb)
 			 */
 			spin_unlock_bh(&mfc_unres_lock);
 
-			kmem_cache_free(mrt_cachep, c);
+			ipmr_cache_free(c);
 			kfree_skb(skb);
 			return err;
 		}
@@ -763,7 +771,7 @@ static int ipmr_mfc_delete(struct mfcctl *mfc)
 			*cp = c->next;
 			write_unlock_bh(&mrt_lock);
 
-			kmem_cache_free(mrt_cachep, c);
+			ipmr_cache_free(c);
 			return 0;
 		}
 	}
@@ -796,7 +804,7 @@ static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock)
 	if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
 		return -EINVAL;
 
-	c = ipmr_cache_alloc();
+	c = ipmr_cache_alloc(&init_net);
 	if (c == NULL)
 		return -ENOMEM;
 
@@ -831,7 +839,7 @@ static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock)
 
 	if (uc) {
 		ipmr_cache_resolve(uc, c);
-		kmem_cache_free(mrt_cachep, uc);
+		ipmr_cache_free(uc);
 	}
 	return 0;
 }
@@ -868,7 +876,7 @@ static void mroute_clean_tables(struct sock *sk)
 			*cp = c->next;
 			write_unlock_bh(&mrt_lock);
 
-			kmem_cache_free(mrt_cachep, c);
+			ipmr_cache_free(c);
 		}
 	}
 
-- 
cgit v1.2.3


From 4feb88e5c694bfe414cbc3ce0e383f7f7038f90b Mon Sep 17 00:00:00 2001
From: Benjamin Thery <benjamin.thery@bull.net>
Date: Thu, 22 Jan 2009 04:56:23 +0000
Subject: netns: ipmr: enable namespace support in ipv4 multicast routing code

This last patch makes the appropriate changes to use and propagate the
network namespace where needed in IPv4 multicast routing code.

This consists mainly in replacing all the remaining init_net occurences
with current netns pointer retrieved from sockets, net devices or
mfc_caches depending on the routines' contexts.

Some routines receive a new 'struct net' parameter to propagate the current
netns:
* vif_add/vif_delete
* ipmr_new_tunnel
* mroute_clean_tables
* ipmr_cache_find
* ipmr_cache_report
* ipmr_cache_unresolved
* ipmr_mfc_add/ipmr_mfc_delete
* ipmr_get_route
* rt_fill_info (in route.c)

Signed-off-by: Benjamin Thery <benjamin.thery@bull.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mroute.h |   3 +-
 net/ipv4/ipmr.c        | 243 +++++++++++++++++++++++++++----------------------
 net/ipv4/route.c       |  11 ++-
 3 files changed, 143 insertions(+), 114 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mroute.h b/include/linux/mroute.h
index beaf3aae8aaf..0d45b4e8d367 100644
--- a/include/linux/mroute.h
+++ b/include/linux/mroute.h
@@ -256,7 +256,8 @@ void mfc_net_set(struct mfc_cache *mfc, struct net *net)
 
 #ifdef __KERNEL__
 struct rtmsg;
-extern int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait);
+extern int ipmr_get_route(struct net *net, struct sk_buff *skb,
+			  struct rtmsg *rtm, int nowait);
 #endif
 
 #endif
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index a4fd97f1920c..21a6dc710f20 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -95,7 +95,8 @@ static DEFINE_SPINLOCK(mfc_unres_lock);
 static struct kmem_cache *mrt_cachep __read_mostly;
 
 static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local);
-static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert);
+static int ipmr_cache_report(struct net *net,
+			     struct sk_buff *pkt, vifi_t vifi, int assert);
 static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
 
 #ifdef CONFIG_IP_PIMSM_V2
@@ -108,9 +109,11 @@ static struct timer_list ipmr_expire_timer;
 
 static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
 {
+	struct net *net = dev_net(dev);
+
 	dev_close(dev);
 
-	dev = __dev_get_by_name(&init_net, "tunl0");
+	dev = __dev_get_by_name(net, "tunl0");
 	if (dev) {
 		const struct net_device_ops *ops = dev->netdev_ops;
 		struct ifreq ifr;
@@ -136,11 +139,11 @@ static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
 }
 
 static
-struct net_device *ipmr_new_tunnel(struct vifctl *v)
+struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
 {
 	struct net_device  *dev;
 
-	dev = __dev_get_by_name(&init_net, "tunl0");
+	dev = __dev_get_by_name(net, "tunl0");
 
 	if (dev) {
 		const struct net_device_ops *ops = dev->netdev_ops;
@@ -169,7 +172,8 @@ struct net_device *ipmr_new_tunnel(struct vifctl *v)
 
 		dev = NULL;
 
-		if (err == 0 && (dev = __dev_get_by_name(&init_net, p.name)) != NULL) {
+		if (err == 0 &&
+		    (dev = __dev_get_by_name(net, p.name)) != NULL) {
 			dev->flags |= IFF_MULTICAST;
 
 			in_dev = __in_dev_get_rtnl(dev);
@@ -199,10 +203,13 @@ failure:
 
 static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
 {
+	struct net *net = dev_net(dev);
+
 	read_lock(&mrt_lock);
 	dev->stats.tx_bytes += skb->len;
 	dev->stats.tx_packets++;
-	ipmr_cache_report(skb, init_net.ipv4.mroute_reg_vif_num, IGMPMSG_WHOLEPKT);
+	ipmr_cache_report(net, skb, net->ipv4.mroute_reg_vif_num,
+			  IGMPMSG_WHOLEPKT);
 	read_unlock(&mrt_lock);
 	kfree_skb(skb);
 	return 0;
@@ -269,16 +276,16 @@ failure:
  *	@notify: Set to 1, if the caller is a notifier_call
  */
 
-static int vif_delete(int vifi, int notify)
+static int vif_delete(struct net *net, int vifi, int notify)
 {
 	struct vif_device *v;
 	struct net_device *dev;
 	struct in_device *in_dev;
 
-	if (vifi < 0 || vifi >= init_net.ipv4.maxvif)
+	if (vifi < 0 || vifi >= net->ipv4.maxvif)
 		return -EADDRNOTAVAIL;
 
-	v = &init_net.ipv4.vif_table[vifi];
+	v = &net->ipv4.vif_table[vifi];
 
 	write_lock_bh(&mrt_lock);
 	dev = v->dev;
@@ -290,17 +297,17 @@ static int vif_delete(int vifi, int notify)
 	}
 
 #ifdef CONFIG_IP_PIMSM
-	if (vifi == init_net.ipv4.mroute_reg_vif_num)
-		init_net.ipv4.mroute_reg_vif_num = -1;
+	if (vifi == net->ipv4.mroute_reg_vif_num)
+		net->ipv4.mroute_reg_vif_num = -1;
 #endif
 
-	if (vifi+1 == init_net.ipv4.maxvif) {
+	if (vifi+1 == net->ipv4.maxvif) {
 		int tmp;
 		for (tmp=vifi-1; tmp>=0; tmp--) {
-			if (VIF_EXISTS(&init_net, tmp))
+			if (VIF_EXISTS(net, tmp))
 				break;
 		}
-		init_net.ipv4.maxvif = tmp+1;
+		net->ipv4.maxvif = tmp+1;
 	}
 
 	write_unlock_bh(&mrt_lock);
@@ -333,8 +340,9 @@ static void ipmr_destroy_unres(struct mfc_cache *c)
 {
 	struct sk_buff *skb;
 	struct nlmsgerr *e;
+	struct net *net = mfc_net(c);
 
-	atomic_dec(&init_net.ipv4.cache_resolve_queue_len);
+	atomic_dec(&net->ipv4.cache_resolve_queue_len);
 
 	while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
 		if (ip_hdr(skb)->version == 0) {
@@ -346,7 +354,7 @@ static void ipmr_destroy_unres(struct mfc_cache *c)
 			e->error = -ETIMEDOUT;
 			memset(&e->msg, 0, sizeof(e->msg));
 
-			rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
+			rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
 		} else
 			kfree_skb(skb);
 	}
@@ -401,13 +409,14 @@ out:
 static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls)
 {
 	int vifi;
+	struct net *net = mfc_net(cache);
 
 	cache->mfc_un.res.minvif = MAXVIFS;
 	cache->mfc_un.res.maxvif = 0;
 	memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
 
-	for (vifi = 0; vifi < init_net.ipv4.maxvif; vifi++) {
-		if (VIF_EXISTS(&init_net, vifi) &&
+	for (vifi = 0; vifi < net->ipv4.maxvif; vifi++) {
+		if (VIF_EXISTS(net, vifi) &&
 		    ttls[vifi] && ttls[vifi] < 255) {
 			cache->mfc_un.res.ttls[vifi] = ttls[vifi];
 			if (cache->mfc_un.res.minvif > vifi)
@@ -418,16 +427,16 @@ static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls)
 	}
 }
 
-static int vif_add(struct vifctl *vifc, int mrtsock)
+static int vif_add(struct net *net, struct vifctl *vifc, int mrtsock)
 {
 	int vifi = vifc->vifc_vifi;
-	struct vif_device *v = &init_net.ipv4.vif_table[vifi];
+	struct vif_device *v = &net->ipv4.vif_table[vifi];
 	struct net_device *dev;
 	struct in_device *in_dev;
 	int err;
 
 	/* Is vif busy ? */
-	if (VIF_EXISTS(&init_net, vifi))
+	if (VIF_EXISTS(net, vifi))
 		return -EADDRINUSE;
 
 	switch (vifc->vifc_flags) {
@@ -437,7 +446,7 @@ static int vif_add(struct vifctl *vifc, int mrtsock)
 		 * Special Purpose VIF in PIM
 		 * All the packets will be sent to the daemon
 		 */
-		if (init_net.ipv4.mroute_reg_vif_num >= 0)
+		if (net->ipv4.mroute_reg_vif_num >= 0)
 			return -EADDRINUSE;
 		dev = ipmr_reg_vif();
 		if (!dev)
@@ -451,7 +460,7 @@ static int vif_add(struct vifctl *vifc, int mrtsock)
 		break;
 #endif
 	case VIFF_TUNNEL:
-		dev = ipmr_new_tunnel(vifc);
+		dev = ipmr_new_tunnel(net, vifc);
 		if (!dev)
 			return -ENOBUFS;
 		err = dev_set_allmulti(dev, 1);
@@ -462,7 +471,7 @@ static int vif_add(struct vifctl *vifc, int mrtsock)
 		}
 		break;
 	case 0:
-		dev = ip_dev_find(&init_net, vifc->vifc_lcl_addr.s_addr);
+		dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr);
 		if (!dev)
 			return -EADDRNOTAVAIL;
 		err = dev_set_allmulti(dev, 1);
@@ -503,20 +512,22 @@ static int vif_add(struct vifctl *vifc, int mrtsock)
 	v->dev = dev;
 #ifdef CONFIG_IP_PIMSM
 	if (v->flags&VIFF_REGISTER)
-		init_net.ipv4.mroute_reg_vif_num = vifi;
+		net->ipv4.mroute_reg_vif_num = vifi;
 #endif
-	if (vifi+1 > init_net.ipv4.maxvif)
-		init_net.ipv4.maxvif = vifi+1;
+	if (vifi+1 > net->ipv4.maxvif)
+		net->ipv4.maxvif = vifi+1;
 	write_unlock_bh(&mrt_lock);
 	return 0;
 }
 
-static struct mfc_cache *ipmr_cache_find(__be32 origin, __be32 mcastgrp)
+static struct mfc_cache *ipmr_cache_find(struct net *net,
+					 __be32 origin,
+					 __be32 mcastgrp)
 {
 	int line = MFC_HASH(mcastgrp, origin);
 	struct mfc_cache *c;
 
-	for (c = init_net.ipv4.mfc_cache_array[line]; c; c = c->next) {
+	for (c = net->ipv4.mfc_cache_array[line]; c; c = c->next) {
 		if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp)
 			break;
 	}
@@ -576,7 +587,7 @@ static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
 				memset(&e->msg, 0, sizeof(e->msg));
 			}
 
-			rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
+			rtnl_unicast(skb, mfc_net(c), NETLINK_CB(skb).pid);
 		} else
 			ip_mr_forward(skb, c, 0);
 	}
@@ -589,7 +600,8 @@ static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
  *	Called under mrt_lock.
  */
 
-static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
+static int ipmr_cache_report(struct net *net,
+			     struct sk_buff *pkt, vifi_t vifi, int assert)
 {
 	struct sk_buff *skb;
 	const int ihl = ip_hdrlen(pkt);
@@ -621,7 +633,7 @@ static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
 		memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
 		msg->im_msgtype = IGMPMSG_WHOLEPKT;
 		msg->im_mbz = 0;
-		msg->im_vif = init_net.ipv4.mroute_reg_vif_num;
+		msg->im_vif = net->ipv4.mroute_reg_vif_num;
 		ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
 		ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
 					     sizeof(struct iphdr));
@@ -653,7 +665,7 @@ static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
 	skb->transport_header = skb->network_header;
 	}
 
-	if (init_net.ipv4.mroute_sk == NULL) {
+	if (net->ipv4.mroute_sk == NULL) {
 		kfree_skb(skb);
 		return -EINVAL;
 	}
@@ -661,7 +673,7 @@ static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
 	/*
 	 *	Deliver to mrouted
 	 */
-	ret = sock_queue_rcv_skb(init_net.ipv4.mroute_sk, skb);
+	ret = sock_queue_rcv_skb(net->ipv4.mroute_sk, skb);
 	if (ret < 0) {
 		if (net_ratelimit())
 			printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
@@ -676,7 +688,7 @@ static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
  */
 
 static int
-ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb)
+ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb)
 {
 	int err;
 	struct mfc_cache *c;
@@ -684,7 +696,7 @@ ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb)
 
 	spin_lock_bh(&mfc_unres_lock);
 	for (c=mfc_unres_queue; c; c=c->next) {
-		if (net_eq(mfc_net(c), &init_net) &&
+		if (net_eq(mfc_net(c), net) &&
 		    c->mfc_mcastgrp == iph->daddr &&
 		    c->mfc_origin == iph->saddr)
 			break;
@@ -695,8 +707,8 @@ ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb)
 		 *	Create a new entry if allowable
 		 */
 
-		if (atomic_read(&init_net.ipv4.cache_resolve_queue_len) >= 10 ||
-		    (c = ipmr_cache_alloc_unres(&init_net)) == NULL) {
+		if (atomic_read(&net->ipv4.cache_resolve_queue_len) >= 10 ||
+		    (c = ipmr_cache_alloc_unres(net)) == NULL) {
 			spin_unlock_bh(&mfc_unres_lock);
 
 			kfree_skb(skb);
@@ -713,7 +725,8 @@ ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb)
 		/*
 		 *	Reflect first query at mrouted.
 		 */
-		if ((err = ipmr_cache_report(skb, vifi, IGMPMSG_NOCACHE))<0) {
+		err = ipmr_cache_report(net, skb, vifi, IGMPMSG_NOCACHE);
+		if (err < 0) {
 			/* If the report failed throw the cache entry
 			   out - Brad Parker
 			 */
@@ -724,7 +737,7 @@ ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb)
 			return err;
 		}
 
-		atomic_inc(&init_net.ipv4.cache_resolve_queue_len);
+		atomic_inc(&net->ipv4.cache_resolve_queue_len);
 		c->next = mfc_unres_queue;
 		mfc_unres_queue = c;
 
@@ -750,14 +763,14 @@ ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb)
  *	MFC cache manipulation by user space mroute daemon
  */
 
-static int ipmr_mfc_delete(struct mfcctl *mfc)
+static int ipmr_mfc_delete(struct net *net, struct mfcctl *mfc)
 {
 	int line;
 	struct mfc_cache *c, **cp;
 
 	line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
 
-	for (cp = &init_net.ipv4.mfc_cache_array[line];
+	for (cp = &net->ipv4.mfc_cache_array[line];
 	     (c = *cp) != NULL; cp = &c->next) {
 		if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
 		    c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
@@ -772,14 +785,14 @@ static int ipmr_mfc_delete(struct mfcctl *mfc)
 	return -ENOENT;
 }
 
-static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock)
+static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock)
 {
 	int line;
 	struct mfc_cache *uc, *c, **cp;
 
 	line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
 
-	for (cp = &init_net.ipv4.mfc_cache_array[line];
+	for (cp = &net->ipv4.mfc_cache_array[line];
 	     (c = *cp) != NULL; cp = &c->next) {
 		if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
 		    c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr)
@@ -799,7 +812,7 @@ static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock)
 	if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
 		return -EINVAL;
 
-	c = ipmr_cache_alloc(&init_net);
+	c = ipmr_cache_alloc(net);
 	if (c == NULL)
 		return -ENOMEM;
 
@@ -811,8 +824,8 @@ static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock)
 		c->mfc_flags |= MFC_STATIC;
 
 	write_lock_bh(&mrt_lock);
-	c->next = init_net.ipv4.mfc_cache_array[line];
-	init_net.ipv4.mfc_cache_array[line] = c;
+	c->next = net->ipv4.mfc_cache_array[line];
+	net->ipv4.mfc_cache_array[line] = c;
 	write_unlock_bh(&mrt_lock);
 
 	/*
@@ -822,11 +835,11 @@ static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock)
 	spin_lock_bh(&mfc_unres_lock);
 	for (cp = &mfc_unres_queue; (uc=*cp) != NULL;
 	     cp = &uc->next) {
-		if (net_eq(mfc_net(uc), &init_net) &&
+		if (net_eq(mfc_net(uc), net) &&
 		    uc->mfc_origin == c->mfc_origin &&
 		    uc->mfc_mcastgrp == c->mfc_mcastgrp) {
 			*cp = uc->next;
-			atomic_dec(&init_net.ipv4.cache_resolve_queue_len);
+			atomic_dec(&net->ipv4.cache_resolve_queue_len);
 			break;
 		}
 	}
@@ -845,16 +858,16 @@ static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock)
  *	Close the multicast socket, and clear the vif tables etc
  */
 
-static void mroute_clean_tables(struct sock *sk)
+static void mroute_clean_tables(struct net *net)
 {
 	int i;
 
 	/*
 	 *	Shut down all active vif entries
 	 */
-	for (i = 0; i < init_net.ipv4.maxvif; i++) {
-		if (!(init_net.ipv4.vif_table[i].flags&VIFF_STATIC))
-			vif_delete(i, 0);
+	for (i = 0; i < net->ipv4.maxvif; i++) {
+		if (!(net->ipv4.vif_table[i].flags&VIFF_STATIC))
+			vif_delete(net, i, 0);
 	}
 
 	/*
@@ -863,7 +876,7 @@ static void mroute_clean_tables(struct sock *sk)
 	for (i=0; i<MFC_LINES; i++) {
 		struct mfc_cache *c, **cp;
 
-		cp = &init_net.ipv4.mfc_cache_array[i];
+		cp = &net->ipv4.mfc_cache_array[i];
 		while ((c = *cp) != NULL) {
 			if (c->mfc_flags&MFC_STATIC) {
 				cp = &c->next;
@@ -877,13 +890,13 @@ static void mroute_clean_tables(struct sock *sk)
 		}
 	}
 
-	if (atomic_read(&init_net.ipv4.cache_resolve_queue_len) != 0) {
+	if (atomic_read(&net->ipv4.cache_resolve_queue_len) != 0) {
 		struct mfc_cache *c, **cp;
 
 		spin_lock_bh(&mfc_unres_lock);
 		cp = &mfc_unres_queue;
 		while ((c = *cp) != NULL) {
-			if (!net_eq(mfc_net(c), &init_net)) {
+			if (!net_eq(mfc_net(c), net)) {
 				cp = &c->next;
 				continue;
 			}
@@ -897,15 +910,17 @@ static void mroute_clean_tables(struct sock *sk)
 
 static void mrtsock_destruct(struct sock *sk)
 {
+	struct net *net = sock_net(sk);
+
 	rtnl_lock();
-	if (sk == init_net.ipv4.mroute_sk) {
-		IPV4_DEVCONF_ALL(sock_net(sk), MC_FORWARDING)--;
+	if (sk == net->ipv4.mroute_sk) {
+		IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
 
 		write_lock_bh(&mrt_lock);
-		init_net.ipv4.mroute_sk = NULL;
+		net->ipv4.mroute_sk = NULL;
 		write_unlock_bh(&mrt_lock);
 
-		mroute_clean_tables(sk);
+		mroute_clean_tables(net);
 	}
 	rtnl_unlock();
 }
@@ -922,9 +937,10 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int
 	int ret;
 	struct vifctl vif;
 	struct mfcctl mfc;
+	struct net *net = sock_net(sk);
 
 	if (optname != MRT_INIT) {
-		if (sk != init_net.ipv4.mroute_sk && !capable(CAP_NET_ADMIN))
+		if (sk != net->ipv4.mroute_sk && !capable(CAP_NET_ADMIN))
 			return -EACCES;
 	}
 
@@ -937,7 +953,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int
 			return -ENOPROTOOPT;
 
 		rtnl_lock();
-		if (init_net.ipv4.mroute_sk) {
+		if (net->ipv4.mroute_sk) {
 			rtnl_unlock();
 			return -EADDRINUSE;
 		}
@@ -945,15 +961,15 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int
 		ret = ip_ra_control(sk, 1, mrtsock_destruct);
 		if (ret == 0) {
 			write_lock_bh(&mrt_lock);
-			init_net.ipv4.mroute_sk = sk;
+			net->ipv4.mroute_sk = sk;
 			write_unlock_bh(&mrt_lock);
 
-			IPV4_DEVCONF_ALL(sock_net(sk), MC_FORWARDING)++;
+			IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
 		}
 		rtnl_unlock();
 		return ret;
 	case MRT_DONE:
-		if (sk != init_net.ipv4.mroute_sk)
+		if (sk != net->ipv4.mroute_sk)
 			return -EACCES;
 		return ip_ra_control(sk, 0, NULL);
 	case MRT_ADD_VIF:
@@ -966,9 +982,9 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int
 			return -ENFILE;
 		rtnl_lock();
 		if (optname == MRT_ADD_VIF) {
-			ret = vif_add(&vif, sk == init_net.ipv4.mroute_sk);
+			ret = vif_add(net, &vif, sk == net->ipv4.mroute_sk);
 		} else {
-			ret = vif_delete(vif.vifc_vifi, 0);
+			ret = vif_delete(net, vif.vifc_vifi, 0);
 		}
 		rtnl_unlock();
 		return ret;
@@ -985,9 +1001,9 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int
 			return -EFAULT;
 		rtnl_lock();
 		if (optname == MRT_DEL_MFC)
-			ret = ipmr_mfc_delete(&mfc);
+			ret = ipmr_mfc_delete(net, &mfc);
 		else
-			ret = ipmr_mfc_add(&mfc, sk == init_net.ipv4.mroute_sk);
+			ret = ipmr_mfc_add(net, &mfc, sk == net->ipv4.mroute_sk);
 		rtnl_unlock();
 		return ret;
 		/*
@@ -998,7 +1014,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int
 		int v;
 		if (get_user(v,(int __user *)optval))
 			return -EFAULT;
-		init_net.ipv4.mroute_do_assert = (v) ? 1 : 0;
+		net->ipv4.mroute_do_assert = (v) ? 1 : 0;
 		return 0;
 	}
 #ifdef CONFIG_IP_PIMSM
@@ -1012,11 +1028,11 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int
 
 		rtnl_lock();
 		ret = 0;
-		if (v != init_net.ipv4.mroute_do_pim) {
-			init_net.ipv4.mroute_do_pim = v;
-			init_net.ipv4.mroute_do_assert = v;
+		if (v != net->ipv4.mroute_do_pim) {
+			net->ipv4.mroute_do_pim = v;
+			net->ipv4.mroute_do_assert = v;
 #ifdef CONFIG_IP_PIMSM_V2
-			if (init_net.ipv4.mroute_do_pim)
+			if (net->ipv4.mroute_do_pim)
 				ret = inet_add_protocol(&pim_protocol,
 							IPPROTO_PIM);
 			else
@@ -1047,6 +1063,7 @@ int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int
 {
 	int olr;
 	int val;
+	struct net *net = sock_net(sk);
 
 	if (optname != MRT_VERSION &&
 #ifdef CONFIG_IP_PIMSM
@@ -1068,10 +1085,10 @@ int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int
 		val = 0x0305;
 #ifdef CONFIG_IP_PIMSM
 	else if (optname == MRT_PIM)
-		val = init_net.ipv4.mroute_do_pim;
+		val = net->ipv4.mroute_do_pim;
 #endif
 	else
-		val = init_net.ipv4.mroute_do_assert;
+		val = net->ipv4.mroute_do_assert;
 	if (copy_to_user(optval, &val, olr))
 		return -EFAULT;
 	return 0;
@@ -1087,16 +1104,17 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
 	struct sioc_vif_req vr;
 	struct vif_device *vif;
 	struct mfc_cache *c;
+	struct net *net = sock_net(sk);
 
 	switch (cmd) {
 	case SIOCGETVIFCNT:
 		if (copy_from_user(&vr, arg, sizeof(vr)))
 			return -EFAULT;
-		if (vr.vifi >= init_net.ipv4.maxvif)
+		if (vr.vifi >= net->ipv4.maxvif)
 			return -EINVAL;
 		read_lock(&mrt_lock);
-		vif = &init_net.ipv4.vif_table[vr.vifi];
-		if (VIF_EXISTS(&init_net, vr.vifi)) {
+		vif = &net->ipv4.vif_table[vr.vifi];
+		if (VIF_EXISTS(net, vr.vifi)) {
 			vr.icount = vif->pkt_in;
 			vr.ocount = vif->pkt_out;
 			vr.ibytes = vif->bytes_in;
@@ -1114,7 +1132,7 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
 			return -EFAULT;
 
 		read_lock(&mrt_lock);
-		c = ipmr_cache_find(sr.src.s_addr, sr.grp.s_addr);
+		c = ipmr_cache_find(net, sr.src.s_addr, sr.grp.s_addr);
 		if (c) {
 			sr.pktcnt = c->mfc_un.res.pkt;
 			sr.bytecnt = c->mfc_un.res.bytes;
@@ -1136,18 +1154,19 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
 static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
 {
 	struct net_device *dev = ptr;
+	struct net *net = dev_net(dev);
 	struct vif_device *v;
 	int ct;
 
-	if (!net_eq(dev_net(dev), &init_net))
+	if (!net_eq(dev_net(dev), net))
 		return NOTIFY_DONE;
 
 	if (event != NETDEV_UNREGISTER)
 		return NOTIFY_DONE;
-	v = &init_net.ipv4.vif_table[0];
-	for (ct = 0; ct < init_net.ipv4.maxvif; ct++, v++) {
+	v = &net->ipv4.vif_table[0];
+	for (ct = 0; ct < net->ipv4.maxvif; ct++, v++) {
 		if (v->dev == dev)
-			vif_delete(ct, 1);
+			vif_delete(net, ct, 1);
 	}
 	return NOTIFY_DONE;
 }
@@ -1207,8 +1226,9 @@ static inline int ipmr_forward_finish(struct sk_buff *skb)
 
 static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
 {
+	struct net *net = mfc_net(c);
 	const struct iphdr *iph = ip_hdr(skb);
-	struct vif_device *vif = &init_net.ipv4.vif_table[vifi];
+	struct vif_device *vif = &net->ipv4.vif_table[vifi];
 	struct net_device *dev;
 	struct rtable *rt;
 	int    encap = 0;
@@ -1222,7 +1242,7 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
 		vif->bytes_out += skb->len;
 		vif->dev->stats.tx_bytes += skb->len;
 		vif->dev->stats.tx_packets++;
-		ipmr_cache_report(skb, vifi, IGMPMSG_WHOLEPKT);
+		ipmr_cache_report(net, skb, vifi, IGMPMSG_WHOLEPKT);
 		kfree_skb(skb);
 		return;
 	}
@@ -1235,7 +1255,7 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
 						.saddr = vif->local,
 						.tos = RT_TOS(iph->tos) } },
 				    .proto = IPPROTO_IPIP };
-		if (ip_route_output_key(&init_net, &rt, &fl))
+		if (ip_route_output_key(net, &rt, &fl))
 			goto out_free;
 		encap = sizeof(struct iphdr);
 	} else {
@@ -1244,7 +1264,7 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
 					      { .daddr = iph->daddr,
 						.tos = RT_TOS(iph->tos) } },
 				    .proto = IPPROTO_IPIP };
-		if (ip_route_output_key(&init_net, &rt, &fl))
+		if (ip_route_output_key(net, &rt, &fl))
 			goto out_free;
 	}
 
@@ -1308,9 +1328,10 @@ out_free:
 
 static int ipmr_find_vif(struct net_device *dev)
 {
+	struct net *net = dev_net(dev);
 	int ct;
-	for (ct = init_net.ipv4.maxvif-1; ct >= 0; ct--) {
-		if (init_net.ipv4.vif_table[ct].dev == dev)
+	for (ct = net->ipv4.maxvif-1; ct >= 0; ct--) {
+		if (net->ipv4.vif_table[ct].dev == dev)
 			break;
 	}
 	return ct;
@@ -1322,6 +1343,7 @@ static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local
 {
 	int psend = -1;
 	int vif, ct;
+	struct net *net = mfc_net(cache);
 
 	vif = cache->mfc_parent;
 	cache->mfc_un.res.pkt++;
@@ -1330,7 +1352,7 @@ static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local
 	/*
 	 * Wrong interface: drop packet and (maybe) send PIM assert.
 	 */
-	if (init_net.ipv4.vif_table[vif].dev != skb->dev) {
+	if (net->ipv4.vif_table[vif].dev != skb->dev) {
 		int true_vifi;
 
 		if (skb->rtable->fl.iif == 0) {
@@ -1351,24 +1373,24 @@ static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local
 		cache->mfc_un.res.wrong_if++;
 		true_vifi = ipmr_find_vif(skb->dev);
 
-		if (true_vifi >= 0 && init_net.ipv4.mroute_do_assert &&
+		if (true_vifi >= 0 && net->ipv4.mroute_do_assert &&
 		    /* pimsm uses asserts, when switching from RPT to SPT,
 		       so that we cannot check that packet arrived on an oif.
 		       It is bad, but otherwise we would need to move pretty
 		       large chunk of pimd to kernel. Ough... --ANK
 		     */
-		    (init_net.ipv4.mroute_do_pim ||
+		    (net->ipv4.mroute_do_pim ||
 		     cache->mfc_un.res.ttls[true_vifi] < 255) &&
 		    time_after(jiffies,
 			       cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
 			cache->mfc_un.res.last_assert = jiffies;
-			ipmr_cache_report(skb, true_vifi, IGMPMSG_WRONGVIF);
+			ipmr_cache_report(net, skb, true_vifi, IGMPMSG_WRONGVIF);
 		}
 		goto dont_forward;
 	}
 
-	init_net.ipv4.vif_table[vif].pkt_in++;
-	init_net.ipv4.vif_table[vif].bytes_in += skb->len;
+	net->ipv4.vif_table[vif].pkt_in++;
+	net->ipv4.vif_table[vif].bytes_in += skb->len;
 
 	/*
 	 *	Forward the frame
@@ -1408,6 +1430,7 @@ dont_forward:
 int ip_mr_input(struct sk_buff *skb)
 {
 	struct mfc_cache *cache;
+	struct net *net = dev_net(skb->dev);
 	int local = skb->rtable->rt_flags&RTCF_LOCAL;
 
 	/* Packet is looped back after forward, it should not be
@@ -1428,9 +1451,9 @@ int ip_mr_input(struct sk_buff *skb)
 			       that we can forward NO IGMP messages.
 			     */
 			    read_lock(&mrt_lock);
-			    if (init_net.ipv4.mroute_sk) {
+			    if (net->ipv4.mroute_sk) {
 				    nf_reset(skb);
-				    raw_rcv(init_net.ipv4.mroute_sk, skb);
+				    raw_rcv(net->ipv4.mroute_sk, skb);
 				    read_unlock(&mrt_lock);
 				    return 0;
 			    }
@@ -1439,7 +1462,7 @@ int ip_mr_input(struct sk_buff *skb)
 	}
 
 	read_lock(&mrt_lock);
-	cache = ipmr_cache_find(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
+	cache = ipmr_cache_find(net, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
 
 	/*
 	 *	No usable cache entry
@@ -1459,7 +1482,7 @@ int ip_mr_input(struct sk_buff *skb)
 
 		vif = ipmr_find_vif(skb->dev);
 		if (vif >= 0) {
-			int err = ipmr_cache_unresolved(vif, skb);
+			int err = ipmr_cache_unresolved(net, vif, skb);
 			read_unlock(&mrt_lock);
 
 			return err;
@@ -1490,6 +1513,7 @@ static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen)
 {
 	struct net_device *reg_dev = NULL;
 	struct iphdr *encap;
+	struct net *net = dev_net(skb->dev);
 
 	encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
 	/*
@@ -1504,8 +1528,8 @@ static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen)
 		return 1;
 
 	read_lock(&mrt_lock);
-	if (init_net.ipv4.mroute_reg_vif_num >= 0)
-		reg_dev = init_net.ipv4.vif_table[init_net.ipv4.mroute_reg_vif_num].dev;
+	if (net->ipv4.mroute_reg_vif_num >= 0)
+		reg_dev = net->ipv4.vif_table[net->ipv4.mroute_reg_vif_num].dev;
 	if (reg_dev)
 		dev_hold(reg_dev);
 	read_unlock(&mrt_lock);
@@ -1540,13 +1564,14 @@ static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen)
 int pim_rcv_v1(struct sk_buff * skb)
 {
 	struct igmphdr *pim;
+	struct net *net = dev_net(skb->dev);
 
 	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
 		goto drop;
 
 	pim = igmp_hdr(skb);
 
-	if (!init_net.ipv4.mroute_do_pim ||
+	if (!net->ipv4.mroute_do_pim ||
 	    pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
 		goto drop;
 
@@ -1586,7 +1611,8 @@ ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
 {
 	int ct;
 	struct rtnexthop *nhp;
-	struct net_device *dev = init_net.ipv4.vif_table[c->mfc_parent].dev;
+	struct net *net = mfc_net(c);
+	struct net_device *dev = net->ipv4.vif_table[c->mfc_parent].dev;
 	u8 *b = skb_tail_pointer(skb);
 	struct rtattr *mp_head;
 
@@ -1602,7 +1628,7 @@ ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
 			nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
 			nhp->rtnh_flags = 0;
 			nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
-			nhp->rtnh_ifindex = init_net.ipv4.vif_table[ct].dev->ifindex;
+			nhp->rtnh_ifindex = net->ipv4.vif_table[ct].dev->ifindex;
 			nhp->rtnh_len = sizeof(*nhp);
 		}
 	}
@@ -1616,14 +1642,15 @@ rtattr_failure:
 	return -EMSGSIZE;
 }
 
-int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
+int ipmr_get_route(struct net *net,
+		   struct sk_buff *skb, struct rtmsg *rtm, int nowait)
 {
 	int err;
 	struct mfc_cache *cache;
 	struct rtable *rt = skb->rtable;
 
 	read_lock(&mrt_lock);
-	cache = ipmr_cache_find(rt->rt_src, rt->rt_dst);
+	cache = ipmr_cache_find(net, rt->rt_src, rt->rt_dst);
 
 	if (cache == NULL) {
 		struct sk_buff *skb2;
@@ -1654,7 +1681,7 @@ int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
 		iph->saddr = rt->rt_src;
 		iph->daddr = rt->rt_dst;
 		iph->version = 0;
-		err = ipmr_cache_unresolved(vif, skb2);
+		err = ipmr_cache_unresolved(net, vif, skb2);
 		read_unlock(&mrt_lock);
 		return err;
 	}
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 97f71153584f..6a9e204c8024 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2779,7 +2779,8 @@ int ip_route_output_key(struct net *net, struct rtable **rp, struct flowi *flp)
 	return ip_route_output_flow(net, rp, flp, NULL, 0);
 }
 
-static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
+static int rt_fill_info(struct net *net,
+			struct sk_buff *skb, u32 pid, u32 seq, int event,
 			int nowait, unsigned int flags)
 {
 	struct rtable *rt = skb->rtable;
@@ -2844,8 +2845,8 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
 		__be32 dst = rt->rt_dst;
 
 		if (ipv4_is_multicast(dst) && !ipv4_is_local_multicast(dst) &&
-		    IPV4_DEVCONF_ALL(&init_net, MC_FORWARDING)) {
-			int err = ipmr_get_route(skb, r, nowait);
+		    IPV4_DEVCONF_ALL(net, MC_FORWARDING)) {
+			int err = ipmr_get_route(net, skb, r, nowait);
 			if (err <= 0) {
 				if (!nowait) {
 					if (err == 0)
@@ -2950,7 +2951,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
 	if (rtm->rtm_flags & RTM_F_NOTIFY)
 		rt->rt_flags |= RTCF_NOTIFY;
 
-	err = rt_fill_info(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq,
+	err = rt_fill_info(net, skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq,
 			   RTM_NEWROUTE, 0, 0);
 	if (err <= 0)
 		goto errout_free;
@@ -2988,7 +2989,7 @@ int ip_rt_dump(struct sk_buff *skb,  struct netlink_callback *cb)
 			if (rt_is_expired(rt))
 				continue;
 			skb->dst = dst_clone(&rt->u.dst);
-			if (rt_fill_info(skb, NETLINK_CB(cb->skb).pid,
+			if (rt_fill_info(net, skb, NETLINK_CB(cb->skb).pid,
 					 cb->nlh->nlmsg_seq, RTM_NEWROUTE,
 					 1, NLM_F_MULTI) <= 0) {
 				dst_release(xchg(&skb->dst, NULL));
-- 
cgit v1.2.3


From 0db155de988031f925096a7df1bf9633790a2c18 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Fri, 23 Jan 2009 22:28:48 -0800
Subject: com20020: Fix allyesconfig build failure.

Reported by Stephen Rothwell.

Due to missing 'extern' in the com20020_netdev_ops declaration,
each file that includes linux/com20020.h gets another copy
defined in it's resulting object file.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/com20020.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/com20020.h b/include/linux/com20020.h
index 350afa773f8f..5dcfb944b6ce 100644
--- a/include/linux/com20020.h
+++ b/include/linux/com20020.h
@@ -29,7 +29,7 @@
 
 int com20020_check(struct net_device *dev);
 int com20020_found(struct net_device *dev, int shared);
-const struct net_device_ops com20020_netdev_ops;
+extern const struct net_device_ops com20020_netdev_ops;
 
 /* The number of low I/O ports used by the card. */
 #define ARCNET_TOTAL_SIZE 8
-- 
cgit v1.2.3


From d5a9e24afb4ab38110ebb777588ea0bd0eacbd0a Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 27 Jan 2009 16:22:11 -0800
Subject: net: Allow RX queue selection to seed TX queue hashing.

The idea is that drivers which implement multiqueue RX
pre-seed the SKB by recording the RX queue selected by
the hardware.

If such a seed is found on TX, we'll use that to select
the outgoing TX queue.

This helps get more consistent load balancing on router
and firewall loads.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 15 +++++++++++++++
 net/core/dev.c         |  8 ++++++++
 2 files changed, 23 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index cf2cb50f77d1..a2c2378a9c58 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1904,6 +1904,21 @@ static inline void skb_copy_queue_mapping(struct sk_buff *to, const struct sk_bu
 	to->queue_mapping = from->queue_mapping;
 }
 
+static inline void skb_record_rx_queue(struct sk_buff *skb, u16 rx_queue)
+{
+	skb->queue_mapping = rx_queue + 1;
+}
+
+static inline u16 skb_get_rx_queue(struct sk_buff *skb)
+{
+	return skb->queue_mapping - 1;
+}
+
+static inline bool skb_rx_queue_recorded(struct sk_buff *skb)
+{
+	return (skb->queue_mapping != 0);
+}
+
 #ifdef CONFIG_XFRM
 static inline struct sec_path *skb_sec_path(struct sk_buff *skb)
 {
diff --git a/net/core/dev.c b/net/core/dev.c
index 5379b0c1190a..b21ad0b47aae 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1722,6 +1722,13 @@ static u16 simple_tx_hash(struct net_device *dev, struct sk_buff *skb)
 		simple_tx_hashrnd_initialized = 1;
 	}
 
+	if (skb_rx_queue_recorded(skb)) {
+		u32 val = skb_get_rx_queue(skb);
+
+		hash = jhash_1word(val, simple_tx_hashrnd);
+		goto out;
+	}
+
 	switch (skb->protocol) {
 	case htons(ETH_P_IP):
 		if (!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)))
@@ -1759,6 +1766,7 @@ static u16 simple_tx_hash(struct net_device *dev, struct sk_buff *skb)
 
 	hash = jhash_3words(addr1, addr2, ports, simple_tx_hashrnd);
 
+out:
 	return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32);
 }
 
-- 
cgit v1.2.3


From 9ee677c2276bfcbcf68042ec2718a504af0c5fd7 Mon Sep 17 00:00:00 2001
From: David Kilroy <kilroyd@googlemail.com>
Date: Tue, 23 Dec 2008 14:03:38 +0000
Subject: wireless: Add channel/frequency conversions to ieee80211.h

Added mappings for FHSS, DSSS and OFDM channels - with macros to point
HR DSSS and ERP to the DSSS mappings. Currently just static inline
functions.

Use the new functions in the older fullmac drivers. This eliminates a
number of const static buffers and removes a couple of range checks that
are now redundant.

Signed-off-by: David Kilroy <kilroyd@googlemail.com>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Acked-by: Richard Farina <sidhayn@gmail.com>
Acked-by: Jeroen Vreeken <pe1rxq@amsat.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/airo.c            |  25 ++-----
 drivers/net/wireless/atmel.c           |  20 +++---
 drivers/net/wireless/orinoco/orinoco.c |  33 ++++------
 drivers/net/wireless/rndis_wlan.c      |  13 ++--
 drivers/net/wireless/wl3501_cs.c       |   9 +--
 drivers/net/wireless/zd1201.c          |   7 +-
 include/linux/ieee80211.h              | 116 +++++++++++++++++++++++++++++++++
 7 files changed, 155 insertions(+), 68 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/wireless/airo.c b/drivers/net/wireless/airo.c
index fc4322ca669f..2ff588bb0a7c 100644
--- a/drivers/net/wireless/airo.c
+++ b/drivers/net/wireless/airo.c
@@ -1070,10 +1070,6 @@ static WifiCtlHdr wifictlhdr8023 = {
 	}
 };
 
-// Frequency list (map channels to frequencies)
-static const long frequency_list[] = { 2412, 2417, 2422, 2427, 2432, 2437, 2442,
-				2447, 2452, 2457, 2462, 2467, 2472, 2484 };
-
 // A few details needed for WEP (Wireless Equivalent Privacy)
 #define MAX_KEY_SIZE 13			// 128 (?) bits
 #define MIN_KEY_SIZE  5			// 40 bits RC4 - WEP
@@ -5725,16 +5721,12 @@ static int airo_set_freq(struct net_device *dev,
 	int rc = -EINPROGRESS;		/* Call commit handler */
 
 	/* If setting by frequency, convert to a channel */
-	if((fwrq->e == 1) &&
-	   (fwrq->m >= (int) 2.412e8) &&
-	   (fwrq->m <= (int) 2.487e8)) {
+	if(fwrq->e == 1) {
 		int f = fwrq->m / 100000;
-		int c = 0;
-		while((c < 14) && (f != frequency_list[c]))
-			c++;
+
 		/* Hack to fall through... */
 		fwrq->e = 0;
-		fwrq->m = c + 1;
+		fwrq->m = ieee80211_freq_to_dsss_chan(f);
 	}
 	/* Setting by channel number */
 	if((fwrq->m > 1000) || (fwrq->e > 0))
@@ -5778,7 +5770,7 @@ static int airo_get_freq(struct net_device *dev,
 
 	ch = le16_to_cpu(status_rid.channel);
 	if((ch > 0) && (ch < 15)) {
-		fwrq->m = frequency_list[ch - 1] * 100000;
+		fwrq->m = ieee80211_dsss_chan_to_freq(ch) * 100000;
 		fwrq->e = 1;
 	} else {
 		fwrq->m = ch;
@@ -6795,8 +6787,8 @@ static int airo_get_range(struct net_device *dev,
 	k = 0;
 	for(i = 0; i < 14; i++) {
 		range->freq[k].i = i + 1; /* List index */
-		range->freq[k].m = frequency_list[i] * 100000;
-		range->freq[k++].e = 1;	/* Values in table in MHz -> * 10^5 * 10 */
+		range->freq[k].m = ieee80211_dsss_chan_to_freq(i + 1) * 100000;
+		range->freq[k++].e = 1;	/* Values in MHz -> * 10^5 * 10 */
 	}
 	range->num_frequency = k;
 
@@ -7189,10 +7181,7 @@ static inline char *airo_translate_scan(struct net_device *dev,
 	/* Add frequency */
 	iwe.cmd = SIOCGIWFREQ;
 	iwe.u.freq.m = le16_to_cpu(bss->dsChannel);
-	/* iwe.u.freq.m containt the channel (starting 1), our 
-	 * frequency_list array start at index 0...
-	 */
-	iwe.u.freq.m = frequency_list[iwe.u.freq.m - 1] * 100000;
+	iwe.u.freq.m = ieee80211_dsss_chan_to_freq(iwe.u.freq.m) * 100000;
 	iwe.u.freq.e = 1;
 	current_ev = iwe_stream_add_event(info, current_ev, end_buf,
 					  &iwe, IW_EV_FREQ_LEN);
diff --git a/drivers/net/wireless/atmel.c b/drivers/net/wireless/atmel.c
index 4223672c4432..91930a2c3c6b 100644
--- a/drivers/net/wireless/atmel.c
+++ b/drivers/net/wireless/atmel.c
@@ -2204,9 +2204,6 @@ static int atmel_get_frag(struct net_device *dev,
 	return 0;
 }
 
-static const long frequency_list[] = { 2412, 2417, 2422, 2427, 2432, 2437, 2442,
-				2447, 2452, 2457, 2462, 2467, 2472, 2484 };
-
 static int atmel_set_freq(struct net_device *dev,
 			  struct iw_request_info *info,
 			  struct iw_freq *fwrq,
@@ -2216,16 +2213,12 @@ static int atmel_set_freq(struct net_device *dev,
 	int rc = -EINPROGRESS;		/* Call commit handler */
 
 	/* If setting by frequency, convert to a channel */
-	if ((fwrq->e == 1) &&
-	    (fwrq->m >= (int) 241200000) &&
-	    (fwrq->m <= (int) 248700000)) {
+	if (fwrq->e == 1) {
 		int f = fwrq->m / 100000;
-		int c = 0;
-		while ((c < 14) && (f != frequency_list[c]))
-			c++;
+
 		/* Hack to fall through... */
 		fwrq->e = 0;
-		fwrq->m = c + 1;
+		fwrq->m = ieee80211_freq_to_dsss_chan(f);
 	}
 	/* Setting by channel number */
 	if ((fwrq->m > 1000) || (fwrq->e > 0))
@@ -2384,8 +2377,11 @@ static int atmel_get_range(struct net_device *dev,
 	if (range->num_channels != 0) {
 		for (k = 0, i = channel_table[j].min; i <= channel_table[j].max; i++) {
 			range->freq[k].i = i; /* List index */
-			range->freq[k].m = frequency_list[i - 1] * 100000;
-			range->freq[k++].e = 1;	/* Values in table in MHz -> * 10^5 * 10 */
+
+			/* Values in MHz -> * 10^5 * 10 */
+			range->freq[k].m = (ieee80211_dsss_chan_to_freq(i) *
+					    100000);
+			range->freq[k++].e = 1;
 		}
 		range->num_frequency = k;
 	}
diff --git a/drivers/net/wireless/orinoco/orinoco.c b/drivers/net/wireless/orinoco/orinoco.c
index 45a04faa7818..beb4d1f8c184 100644
--- a/drivers/net/wireless/orinoco/orinoco.c
+++ b/drivers/net/wireless/orinoco/orinoco.c
@@ -178,12 +178,7 @@ static const struct ethtool_ops orinoco_ethtool_ops;
 /* Data tables                                                      */
 /********************************************************************/
 
-/* The frequency of each channel in MHz */
-static const long channel_frequency[] = {
-	2412, 2417, 2422, 2427, 2432, 2437, 2442,
-	2447, 2452, 2457, 2462, 2467, 2472, 2484
-};
-#define NUM_CHANNELS ARRAY_SIZE(channel_frequency)
+#define NUM_CHANNELS 14
 
 /* This tables gives the actual meanings of the bitrate IDs returned
  * by the firmware. */
@@ -3742,13 +3737,13 @@ static int orinoco_hw_get_essid(struct orinoco_private *priv, int *active,
 	return err;       
 }
 
-static long orinoco_hw_get_freq(struct orinoco_private *priv)
+static int orinoco_hw_get_freq(struct orinoco_private *priv)
 {
 	
 	hermes_t *hw = &priv->hw;
 	int err = 0;
 	u16 channel;
-	long freq = 0;
+	int freq = 0;
 	unsigned long flags;
 
 	if (orinoco_lock(priv, &flags) != 0)
@@ -3771,7 +3766,7 @@ static long orinoco_hw_get_freq(struct orinoco_private *priv)
 		goto out;
 
 	}
-	freq = channel_frequency[channel-1] * 100000;
+	freq = ieee80211_dsss_chan_to_freq(channel);
 
  out:
 	orinoco_unlock(priv, &flags);
@@ -3998,7 +3993,8 @@ static int orinoco_ioctl_getiwrange(struct net_device *dev,
 	for (i = 0; i < NUM_CHANNELS; i++) {
 		if (priv->channel_mask & (1 << i)) {
 			range->freq[k].i = i + 1;
-			range->freq[k].m = channel_frequency[i] * 100000;
+			range->freq[k].m = (ieee80211_dsss_chan_to_freq(i + 1) *
+					    100000);
 			range->freq[k].e = 1;
 			k++;
 		}
@@ -4346,16 +4342,15 @@ static int orinoco_ioctl_setfreq(struct net_device *dev,
 		/* Setting by channel number */
 		chan = frq->m;
 	} else {
-		/* Setting by frequency - search the table */
-		int mult = 1;
+		/* Setting by frequency */
+		int denom = 1;
 		int i;
 
+		/* Calculate denominator to rescale to MHz */
 		for (i = 0; i < (6 - frq->e); i++)
-			mult *= 10;
+			denom *= 10;
 
-		for (i = 0; i < NUM_CHANNELS; i++)
-			if (frq->m == (channel_frequency[i] * mult))
-				chan = i+1;
+		chan = ieee80211_freq_to_dsss_chan(frq->m / denom);
 	}
 
 	if ( (chan < 1) || (chan > NUM_CHANNELS) ||
@@ -4392,7 +4387,7 @@ static int orinoco_ioctl_getfreq(struct net_device *dev,
 		return tmp;
 	}
 
-	frq->m = tmp;
+	frq->m = tmp * 100000;
 	frq->e = 1;
 
 	return 0;
@@ -5609,7 +5604,7 @@ static inline char *orinoco_translate_scan(struct net_device *dev,
 		current_ev = iwe_stream_add_event(info, current_ev, end_buf,
 						  &iwe, IW_EV_FREQ_LEN);
 
-		iwe.u.freq.m = channel_frequency[channel-1] * 100000;
+		iwe.u.freq.m = ieee80211_dsss_chan_to_freq(channel) * 100000;
 		iwe.u.freq.e = 1;
 		current_ev = iwe_stream_add_event(info, current_ev, end_buf,
 						  &iwe, IW_EV_FREQ_LEN);
@@ -5760,7 +5755,7 @@ static inline char *orinoco_translate_ext_scan(struct net_device *dev,
 		current_ev = iwe_stream_add_event(info, current_ev, end_buf,
 						  &iwe, IW_EV_FREQ_LEN);
 
-		iwe.u.freq.m = channel_frequency[channel-1] * 100000;
+		iwe.u.freq.m = ieee80211_dsss_chan_to_freq(channel) * 100000;
 		iwe.u.freq.e = 1;
 		current_ev = iwe_stream_add_event(info, current_ev, end_buf,
 						  &iwe, IW_EV_FREQ_LEN);
diff --git a/drivers/net/wireless/rndis_wlan.c b/drivers/net/wireless/rndis_wlan.c
index ed93ac41297f..105f214e21f4 100644
--- a/drivers/net/wireless/rndis_wlan.c
+++ b/drivers/net/wireless/rndis_wlan.c
@@ -369,9 +369,6 @@ struct rndis_wext_private {
 };
 
 
-static const int freq_chan[] = { 2412, 2417, 2422, 2427, 2432, 2437, 2442,
-				2447, 2452, 2457, 2462, 2467, 2472, 2484 };
-
 static const int rates_80211g[8] = { 6, 9, 12, 18, 24, 36, 48, 54 };
 
 static const int bcm4320_power_output[4] = { 25, 50, 75, 100 };
@@ -640,8 +637,8 @@ static void dsconfig_to_freq(unsigned int dsconfig, struct iw_freq *freq)
 static int freq_to_dsconfig(struct iw_freq *freq, unsigned int *dsconfig)
 {
 	if (freq->m < 1000 && freq->e == 0) {
-		if (freq->m >= 1 && freq->m <= ARRAY_SIZE(freq_chan))
-			*dsconfig = freq_chan[freq->m - 1] * 1000;
+		if (freq->m >= 1 && freq->m <= 14)
+			*dsconfig = ieee80211_dsss_chan_to_freq(freq->m) * 1000;
 		else
 			return -1;
 	} else {
@@ -1178,11 +1175,11 @@ static int rndis_iw_get_range(struct net_device *dev,
 		range->throughput = 11 * 1000 * 1000 / 2;
 	}
 
-	range->num_channels = ARRAY_SIZE(freq_chan);
+	range->num_channels = 14;
 
-	for (i = 0; i < ARRAY_SIZE(freq_chan) && i < IW_MAX_FREQUENCIES; i++) {
+	for (i = 0; (i < 14) && (i < IW_MAX_FREQUENCIES); i++) {
 		range->freq[i].i = i + 1;
-		range->freq[i].m = freq_chan[i] * 100000;
+		range->freq[i].m = ieee80211_dsss_chan_to_freq(i + 1) * 100000;
 		range->freq[i].e = 1;
 	}
 	range->num_frequency = i;
diff --git a/drivers/net/wireless/wl3501_cs.c b/drivers/net/wireless/wl3501_cs.c
index c99a1b6b948f..c8d5c34e8ddf 100644
--- a/drivers/net/wireless/wl3501_cs.c
+++ b/drivers/net/wireless/wl3501_cs.c
@@ -44,6 +44,7 @@
 #include <linux/slab.h>
 #include <linux/string.h>
 #include <linux/wireless.h>
+#include <linux/ieee80211.h>
 
 #include <net/iw_handler.h>
 
@@ -111,12 +112,6 @@ static void wl3501_release(struct pcmcia_device *link);
  */
 static dev_info_t wl3501_dev_info = "wl3501_cs";
 
-static int wl3501_chan2freq[] = {
-	[0]  = 2412, [1]  = 2417, [2]  = 2422, [3]  = 2427, [4] = 2432,
-	[5]  = 2437, [6]  = 2442, [7]  = 2447, [8]  = 2452, [9] = 2457,
-	[10] = 2462, [11] = 2467, [12] = 2472, [13] = 2477,
-};
-
 static const struct {
 	int reg_domain;
 	int min, max, deflt;
@@ -1510,7 +1505,7 @@ static int wl3501_get_freq(struct net_device *dev, struct iw_request_info *info,
 {
 	struct wl3501_card *this = netdev_priv(dev);
 
-	wrqu->freq.m = wl3501_chan2freq[this->chan - 1] * 100000;
+	wrqu->freq.m = ieee80211_dsss_chan_to_freq(this->chan) * 100000;
 	wrqu->freq.e = 1;
 	return 0;
 }
diff --git a/drivers/net/wireless/zd1201.c b/drivers/net/wireless/zd1201.c
index b45c27d42fd8..6226ac2357f8 100644
--- a/drivers/net/wireless/zd1201.c
+++ b/drivers/net/wireless/zd1201.c
@@ -919,10 +919,9 @@ static int zd1201_set_freq(struct net_device *dev,
 	if (freq->e == 0)
 		channel = freq->m;
 	else {
-		if (freq->m >= 2482)
-			channel = 14;
-		if (freq->m >= 2407)
-			channel = (freq->m-2407)/5;
+		channel = ieee80211_freq_to_dsss_chan(freq->m);
+		if (channel < 0)
+			channel = 0;
 	}
 
 	err = zd1201_setconfig16(zd, ZD1201_RID_CNFOWNCHANNEL, channel);
diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index c4e6ca1a6306..cade2556af0e 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1185,4 +1185,120 @@ static inline u8 *ieee80211_get_DA(struct ieee80211_hdr *hdr)
 		return hdr->addr1;
 }
 
+/**
+ * ieee80211_fhss_chan_to_freq - get channel frequency
+ * @channel: the FHSS channel
+ *
+ * Convert IEEE802.11 FHSS channel to frequency (MHz)
+ * Ref IEEE 802.11-2007 section 14.6
+ */
+static inline int ieee80211_fhss_chan_to_freq(int channel)
+{
+	if ((channel > 1) && (channel < 96))
+		return channel + 2400;
+	else
+		return -1;
+}
+
+/**
+ * ieee80211_freq_to_fhss_chan - get channel
+ * @freq: the channels frequency
+ *
+ * Convert frequency (MHz) to IEEE802.11 FHSS channel
+ * Ref IEEE 802.11-2007 section 14.6
+ */
+static inline int ieee80211_freq_to_fhss_chan(int freq)
+{
+	if ((freq > 2401) && (freq < 2496))
+		return freq - 2400;
+	else
+		return -1;
+}
+
+/**
+ * ieee80211_dsss_chan_to_freq - get channel center frequency
+ * @channel: the DSSS channel
+ *
+ * Convert IEEE802.11 DSSS channel to the center frequency (MHz).
+ * Ref IEEE 802.11-2007 section 15.6
+ */
+static inline int ieee80211_dsss_chan_to_freq(int channel)
+{
+	if ((channel > 0) && (channel < 14))
+		return 2407 + (channel * 5);
+	else if (channel == 14)
+		return 2484;
+	else
+		return -1;
+}
+
+/**
+ * ieee80211_freq_to_dsss_chan - get channel
+ * @freq: the frequency
+ *
+ * Convert frequency (MHz) to IEEE802.11 DSSS channel
+ * Ref IEEE 802.11-2007 section 15.6
+ *
+ * This routine selects the channel with the closest center frequency.
+ */
+static inline int ieee80211_freq_to_dsss_chan(int freq)
+{
+	if ((freq >= 2410) && (freq < 2475))
+		return (freq - 2405) / 5;
+	else if ((freq >= 2482) && (freq < 2487))
+		return 14;
+	else
+		return -1;
+}
+
+/* Convert IEEE802.11 HR DSSS channel to frequency (MHz) and back
+ * Ref IEEE 802.11-2007 section 18.4.6.2
+ *
+ * The channels and frequencies are the same as those defined for DSSS
+ */
+#define ieee80211_hr_chan_to_freq(chan) ieee80211_dsss_chan_to_freq(chan)
+#define ieee80211_freq_to_hr_chan(freq) ieee80211_freq_to_dsss_chan(freq)
+
+/* Convert IEEE802.11 ERP channel to frequency (MHz) and back
+ * Ref IEEE 802.11-2007 section 19.4.2
+ */
+#define ieee80211_erp_chan_to_freq(chan) ieee80211_hr_chan_to_freq(chan)
+#define ieee80211_freq_to_erp_chan(freq) ieee80211_freq_to_hr_chan(freq)
+
+/**
+ * ieee80211_ofdm_chan_to_freq - get channel center frequency
+ * @s_freq: starting frequency == (dotChannelStartingFactor/2) MHz
+ * @channel: the OFDM channel
+ *
+ * Convert IEEE802.11 OFDM channel to center frequency (MHz)
+ * Ref IEEE 802.11-2007 section 17.3.8.3.2
+ */
+static inline int ieee80211_ofdm_chan_to_freq(int s_freq, int channel)
+{
+	if ((channel > 0) && (channel <= 200) &&
+	    (s_freq >= 4000))
+		return s_freq + (channel * 5);
+	else
+		return -1;
+}
+
+/**
+ * ieee80211_freq_to_ofdm_channel - get channel
+ * @s_freq: starting frequency == (dotChannelStartingFactor/2) MHz
+ * @freq: the frequency
+ *
+ * Convert frequency (MHz) to IEEE802.11 OFDM channel
+ * Ref IEEE 802.11-2007 section 17.3.8.3.2
+ *
+ * This routine selects the channel with the closest center frequency.
+ */
+static inline int ieee80211_freq_to_ofdm_chan(int s_freq, int freq)
+{
+	if ((freq > (s_freq + 2)) && (freq <= (s_freq + 1202)) &&
+	    (s_freq >= 4000))
+		return (freq + 2 - s_freq) / 5;
+	else
+		return -1;
+}
+
 #endif /* LINUX_IEEE80211_H */
-- 
cgit v1.2.3


From 6b1c7c67603efdf0b39f6056989b0f8194cdc1f3 Mon Sep 17 00:00:00 2001
From: Michael Buesch <mb@bu3sch.de>
Date: Thu, 25 Dec 2008 00:39:28 +0100
Subject: b43/ssb: Add SPROM8 extraction and LP-PHY detection

This adds detection code for the LP-PHY and SPROM
extraction code for version 8, which is needed by the LP-PHY and
newer N-PHY.

Signed-off-by: Michael Buesch <mb@bu3sch.de>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/b43/main.c | 12 +++++++
 drivers/ssb/b43_pci_bridge.c    |  1 +
 drivers/ssb/pci.c               | 74 ++++++++++++++++++++++++++++++++++-------
 include/linux/ssb/ssb_regs.h    | 36 ++++++++++++++++++++
 4 files changed, 111 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c
index 26e733a5a56e..c627bac87a40 100644
--- a/drivers/net/wireless/b43/main.c
+++ b/drivers/net/wireless/b43/main.c
@@ -97,6 +97,7 @@ static const struct ssb_device_id b43_ssb_tbl[] = {
 	SSB_DEVICE(SSB_VENDOR_BROADCOM, SSB_DEV_80211, 10),
 	SSB_DEVICE(SSB_VENDOR_BROADCOM, SSB_DEV_80211, 11),
 	SSB_DEVICE(SSB_VENDOR_BROADCOM, SSB_DEV_80211, 13),
+	SSB_DEVICE(SSB_VENDOR_BROADCOM, SSB_DEV_80211, 15),
 	SSB_DEVICE(SSB_VENDOR_BROADCOM, SSB_DEV_80211, 16),
 	SSB_DEVTABLE_END
 };
@@ -3755,6 +3756,12 @@ static int b43_phy_versioning(struct b43_wldev *dev)
 		if (phy_rev > 4)
 			unsupported = 1;
 		break;
+#endif
+#ifdef CONFIG_B43_PHY_LP
+	case B43_PHYTYPE_LP:
+		if (phy_rev > 1)
+			unsupported = 1;
+		break;
 #endif
 	default:
 		unsupported = 1;
@@ -3808,6 +3815,10 @@ static int b43_phy_versioning(struct b43_wldev *dev)
 		if (radio_ver != 0x2055 && radio_ver != 0x2056)
 			unsupported = 1;
 		break;
+	case B43_PHYTYPE_LP:
+		if (radio_ver != 0x2062)
+			unsupported = 1;
+		break;
 	default:
 		B43_WARN_ON(1);
 	}
@@ -4402,6 +4413,7 @@ static int b43_wireless_core_attach(struct b43_wldev *dev)
 			break;
 		case B43_PHYTYPE_G:
 		case B43_PHYTYPE_N:
+		case B43_PHYTYPE_LP:
 			have_2ghz_phy = 1;
 			break;
 		default:
diff --git a/drivers/ssb/b43_pci_bridge.c b/drivers/ssb/b43_pci_bridge.c
index 6433a7ed39f8..27a677584a4c 100644
--- a/drivers/ssb/b43_pci_bridge.c
+++ b/drivers/ssb/b43_pci_bridge.c
@@ -21,6 +21,7 @@ static const struct pci_device_id b43_pci_bridge_tbl[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 0x4307) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 0x4311) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 0x4312) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 0x4315) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 0x4318) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 0x4319) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 0x4320) },
diff --git a/drivers/ssb/pci.c b/drivers/ssb/pci.c
index d5cde051806b..c958ac16423c 100644
--- a/drivers/ssb/pci.c
+++ b/drivers/ssb/pci.c
@@ -467,6 +467,51 @@ static void sprom_extract_r45(struct ssb_sprom *out, const u16 *in)
 	/* TODO - get remaining rev 4 stuff needed */
 }
 
+static void sprom_extract_r8(struct ssb_sprom *out, const u16 *in)
+{
+	int i;
+	u16 v;
+
+	/* extract the MAC address */
+	for (i = 0; i < 3; i++) {
+		v = in[SPOFF(SSB_SPROM1_IL0MAC) + i];
+		*(((__be16 *)out->il0mac) + i) = cpu_to_be16(v);
+	}
+	SPEX(country_code, SSB_SPROM8_CCODE, 0xFFFF, 0);
+	SPEX(boardflags_lo, SSB_SPROM8_BFLLO, 0xFFFF, 0);
+	SPEX(boardflags_hi, SSB_SPROM8_BFLHI, 0xFFFF, 0);
+	SPEX(ant_available_a, SSB_SPROM8_ANTAVAIL, SSB_SPROM8_ANTAVAIL_A,
+	     SSB_SPROM8_ANTAVAIL_A_SHIFT);
+	SPEX(ant_available_bg, SSB_SPROM8_ANTAVAIL, SSB_SPROM8_ANTAVAIL_BG,
+	     SSB_SPROM8_ANTAVAIL_BG_SHIFT);
+	SPEX(maxpwr_bg, SSB_SPROM8_MAXP_BG, SSB_SPROM8_MAXP_BG_MASK, 0);
+	SPEX(itssi_bg, SSB_SPROM8_MAXP_BG, SSB_SPROM8_ITSSI_BG,
+	     SSB_SPROM8_ITSSI_BG_SHIFT);
+	SPEX(maxpwr_a, SSB_SPROM8_MAXP_A, SSB_SPROM8_MAXP_A_MASK, 0);
+	SPEX(itssi_a, SSB_SPROM8_MAXP_A, SSB_SPROM8_ITSSI_A,
+	     SSB_SPROM8_ITSSI_A_SHIFT);
+	SPEX(gpio0, SSB_SPROM8_GPIOA, SSB_SPROM8_GPIOA_P0, 0);
+	SPEX(gpio1, SSB_SPROM8_GPIOA, SSB_SPROM8_GPIOA_P1,
+	     SSB_SPROM8_GPIOA_P1_SHIFT);
+	SPEX(gpio2, SSB_SPROM8_GPIOB, SSB_SPROM8_GPIOB_P2, 0);
+	SPEX(gpio3, SSB_SPROM8_GPIOB, SSB_SPROM8_GPIOB_P3,
+	     SSB_SPROM8_GPIOB_P3_SHIFT);
+
+	/* Extract the antenna gain values. */
+	SPEX(antenna_gain.ghz24.a0, SSB_SPROM8_AGAIN01,
+	     SSB_SPROM8_AGAIN0, SSB_SPROM8_AGAIN0_SHIFT);
+	SPEX(antenna_gain.ghz24.a1, SSB_SPROM8_AGAIN01,
+	     SSB_SPROM8_AGAIN1, SSB_SPROM8_AGAIN1_SHIFT);
+	SPEX(antenna_gain.ghz24.a2, SSB_SPROM8_AGAIN23,
+	     SSB_SPROM8_AGAIN2, SSB_SPROM8_AGAIN2_SHIFT);
+	SPEX(antenna_gain.ghz24.a3, SSB_SPROM8_AGAIN23,
+	     SSB_SPROM8_AGAIN3, SSB_SPROM8_AGAIN3_SHIFT);
+	memcpy(&out->antenna_gain.ghz5, &out->antenna_gain.ghz24,
+	       sizeof(out->antenna_gain.ghz5));
+
+	/* TODO - get remaining rev 8 stuff needed */
+}
+
 static int sprom_extract(struct ssb_bus *bus, struct ssb_sprom *out,
 			 const u16 *in, u16 size)
 {
@@ -487,15 +532,25 @@ static int sprom_extract(struct ssb_bus *bus, struct ssb_sprom *out,
 		out->revision = 4;
 		sprom_extract_r45(out, in);
 	} else {
-		if (out->revision == 0)
-			goto unsupported;
-		if (out->revision >= 1 && out->revision <= 3) {
+		switch (out->revision) {
+		case 1:
+		case 2:
+		case 3:
 			sprom_extract_r123(out, in);
-		}
-		if (out->revision == 4 || out->revision == 5)
+			break;
+		case 4:
+		case 5:
 			sprom_extract_r45(out, in);
-		if (out->revision > 5)
-			goto unsupported;
+			break;
+		case 8:
+			sprom_extract_r8(out, in);
+			break;
+		default:
+			ssb_printk(KERN_WARNING PFX "Unsupported SPROM"
+				   "  revision %d detected. Will extract"
+				   " v1\n", out->revision);
+			sprom_extract_r123(out, in);
+		}
 	}
 
 	if (out->boardflags_lo == 0xFFFF)
@@ -504,11 +559,6 @@ static int sprom_extract(struct ssb_bus *bus, struct ssb_sprom *out,
 		out->boardflags_hi = 0;  /* per specs */
 
 	return 0;
-unsupported:
-	ssb_printk(KERN_WARNING PFX "Unsupported SPROM revision %d "
-		   "detected. Will extract v1\n", out->revision);
-	sprom_extract_r123(out, in);
-	return 0;
 }
 
 static int ssb_pci_sprom_get(struct ssb_bus *bus,
diff --git a/include/linux/ssb/ssb_regs.h b/include/linux/ssb/ssb_regs.h
index 99a0f991e850..a01b982b5783 100644
--- a/include/linux/ssb/ssb_regs.h
+++ b/include/linux/ssb/ssb_regs.h
@@ -326,6 +326,42 @@
 #define  SSB_SPROM5_GPIOB_P3		0xFF00	/* Pin 3 */
 #define  SSB_SPROM5_GPIOB_P3_SHIFT	8
 
+/* SPROM Revision 8 */
+#define SSB_SPROM8_BFLLO		0x1084	/* Boardflags (low 16 bits) */
+#define SSB_SPROM8_BFLHI		0x1086	/* Boardflags Hi */
+#define SSB_SPROM8_IL0MAC		0x108C	/* 6 byte MAC address */
+#define SSB_SPROM8_CCODE		0x1092	/* 2 byte country code */
+#define SSB_SPROM8_ANTAVAIL		0x109C  /* Antenna available bitfields*/
+#define SSB_SPROM8_ANTAVAIL_A		0xFF00	/* A-PHY bitfield */
+#define SSB_SPROM8_ANTAVAIL_A_SHIFT	8
+#define SSB_SPROM8_ANTAVAIL_BG		0x00FF	/* B-PHY and G-PHY bitfield */
+#define SSB_SPROM8_ANTAVAIL_BG_SHIFT	0
+#define SSB_SPROM8_AGAIN01		0x109E	/* Antenna Gain (in dBm Q5.2) */
+#define  SSB_SPROM8_AGAIN0		0x00FF	/* Antenna 0 */
+#define  SSB_SPROM8_AGAIN0_SHIFT	0
+#define  SSB_SPROM8_AGAIN1		0xFF00	/* Antenna 1 */
+#define  SSB_SPROM8_AGAIN1_SHIFT	8
+#define SSB_SPROM8_AGAIN23		0x10A0
+#define  SSB_SPROM8_AGAIN2		0x00FF	/* Antenna 2 */
+#define  SSB_SPROM8_AGAIN2_SHIFT	0
+#define  SSB_SPROM8_AGAIN3		0xFF00	/* Antenna 3 */
+#define  SSB_SPROM8_AGAIN3_SHIFT	8
+#define SSB_SPROM8_GPIOA		0x1096	/*Gen. Purpose IO # 0 and 1 */
+#define  SSB_SPROM8_GPIOA_P0		0x00FF	/* Pin 0 */
+#define  SSB_SPROM8_GPIOA_P1		0xFF00	/* Pin 1 */
+#define  SSB_SPROM8_GPIOA_P1_SHIFT	8
+#define SSB_SPROM8_GPIOB		0x1098	/* Gen. Purpose IO # 2 and 3 */
+#define  SSB_SPROM8_GPIOB_P2		0x00FF	/* Pin 2 */
+#define  SSB_SPROM8_GPIOB_P3		0xFF00	/* Pin 3 */
+#define  SSB_SPROM8_GPIOB_P3_SHIFT	8
+#define SSB_SPROM8_MAXP_BG		0x10C0  /* Max Power BG in path 1 */
+#define  SSB_SPROM8_MAXP_BG_MASK	0x00FF  /* Mask for Max Power BG */
+#define  SSB_SPROM8_ITSSI_BG		0xFF00	/* Mask for path 1 itssi_bg */
+#define  SSB_SPROM8_ITSSI_BG_SHIFT	8
+#define SSB_SPROM8_MAXP_A		0x10C8  /* Max Power A in path 1 */
+#define  SSB_SPROM8_MAXP_A_MASK		0x00FF  /* Mask for Max Power A */
+#define  SSB_SPROM8_ITSSI_A		0xFF00	/* Mask for path 1 itssi_a */
+#define  SSB_SPROM8_ITSSI_A_SHIFT	8
 
 /* Values for SSB_SPROM1_BINF_CCODE */
 enum {
-- 
cgit v1.2.3


From 63649b6cf0a964582af2b4d4734e28ca90ec8f5c Mon Sep 17 00:00:00 2001
From: Bob Copeland <me@bobcopeland.com>
Date: Thu, 1 Jan 2009 15:01:44 -0500
Subject: ath5k: support LEDs on Acer Aspire One netbook

Add vendor ID for Foxconn and use it to set the ath5k LED gpio and
polarity for Acer branded laptops.

base.c:
Changes-licensed-under: 3-Clause-BSD

Reported-by: Maxim Levitsky <maximlevitsky@gmail.com>
Tested-by: Maxim Levitsky <maximlevitsky@gmail.com>
Tested-by: Andreas Mohr <andi@lisas.de>
Signed-off-by: Bob Copeland <me@bobcopeland.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/ath5k/base.c | 7 +++++++
 include/linux/pci_ids.h           | 2 ++
 2 files changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/wireless/ath5k/base.c b/drivers/net/wireless/ath5k/base.c
index 88618645a7e4..fdf7733e76e1 100644
--- a/drivers/net/wireless/ath5k/base.c
+++ b/drivers/net/wireless/ath5k/base.c
@@ -2596,6 +2596,13 @@ ath5k_init_leds(struct ath5k_softc *sc)
 		sc->led_pin = 1;
 		sc->led_on = 1;  /* active high */
 	}
+	/* Pin 3 on Foxconn chips used in Acer Aspire One (0x105b:e008) */
+	if (pdev->subsystem_vendor == PCI_VENDOR_ID_FOXCONN) {
+		__set_bit(ATH_STAT_LEDSOFT, sc->status);
+		sc->led_pin = 3;
+		sc->led_on = 0;  /* active low */
+	}
+
 	if (!test_bit(ATH_STAT_LEDSOFT, sc->status))
 		goto out;
 
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 302423afa136..5b7a48c1d616 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -834,6 +834,8 @@
 #define PCI_DEVICE_ID_PROMISE_20276	0x5275
 #define PCI_DEVICE_ID_PROMISE_20277	0x7275
 
+#define PCI_VENDOR_ID_FOXCONN		0x105b
+
 #define PCI_VENDOR_ID_UMC		0x1060
 #define PCI_DEVICE_ID_UMC_UM8673F	0x0101
 #define PCI_DEVICE_ID_UMC_UM8886BF	0x673a
-- 
cgit v1.2.3


From 5394af4d86ae51b369ff243c3f75b6f9a74e164b Mon Sep 17 00:00:00 2001
From: Jouni Malinen <j@w1.fi>
Date: Thu, 8 Jan 2009 13:31:59 +0200
Subject: mac80211: 802.11w - STA flag for MFP

Add flags for setting STA entries and struct ieee80211_if_sta to
indicate whether management frame protection (MFP) is used.

Signed-off-by: Jouni Malinen <j@w1.fi>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h    | 2 ++
 include/net/cfg80211.h     | 2 ++
 net/mac80211/cfg.c         | 4 ++++
 net/mac80211/debugfs_sta.c | 5 +++--
 net/mac80211/ieee80211_i.h | 1 +
 net/mac80211/mlme.c        | 7 +++++--
 net/mac80211/sta_info.h    | 2 ++
 7 files changed, 19 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index e86ed59f9ad5..218f0e73a7ae 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -412,12 +412,14 @@ enum nl80211_iftype {
  * @NL80211_STA_FLAG_SHORT_PREAMBLE: station is capable of receiving frames
  *	with short barker preamble
  * @NL80211_STA_FLAG_WME: station is WME/QoS capable
+ * @NL80211_STA_FLAG_MFP: station uses management frame protection
  */
 enum nl80211_sta_flags {
 	__NL80211_STA_FLAG_INVALID,
 	NL80211_STA_FLAG_AUTHORIZED,
 	NL80211_STA_FLAG_SHORT_PREAMBLE,
 	NL80211_STA_FLAG_WME,
+	NL80211_STA_FLAG_MFP,
 
 	/* keep last */
 	__NL80211_STA_FLAG_AFTER_LAST,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 23c0ab74ded6..6619ed106134 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -112,12 +112,14 @@ struct beacon_parameters {
  * @STATION_FLAG_SHORT_PREAMBLE: station is capable of receiving frames
  *	with short preambles
  * @STATION_FLAG_WME: station is WME/QoS capable
+ * @STATION_FLAG_MFP: station uses management frame protection
  */
 enum station_flags {
 	STATION_FLAG_CHANGED		= 1<<0,
 	STATION_FLAG_AUTHORIZED		= 1<<NL80211_STA_FLAG_AUTHORIZED,
 	STATION_FLAG_SHORT_PREAMBLE	= 1<<NL80211_STA_FLAG_SHORT_PREAMBLE,
 	STATION_FLAG_WME		= 1<<NL80211_STA_FLAG_WME,
+	STATION_FLAG_MFP		= 1<<NL80211_STA_FLAG_MFP,
 };
 
 /**
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 9d4e4d846ec1..309d9189aa49 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -630,6 +630,10 @@ static void sta_apply_parameters(struct ieee80211_local *local,
 		sta->flags &= ~WLAN_STA_WME;
 		if (params->station_flags & STATION_FLAG_WME)
 			sta->flags |= WLAN_STA_WME;
+
+		sta->flags &= ~WLAN_STA_MFP;
+		if (params->station_flags & STATION_FLAG_MFP)
+			sta->flags |= WLAN_STA_MFP;
 		spin_unlock_bh(&sta->lock);
 	}
 
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index a2fbe0131312..90230c718b5b 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -67,14 +67,15 @@ static ssize_t sta_flags_read(struct file *file, char __user *userbuf,
 	char buf[100];
 	struct sta_info *sta = file->private_data;
 	u32 staflags = get_sta_flags(sta);
-	int res = scnprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s",
+	int res = scnprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s",
 		staflags & WLAN_STA_AUTH ? "AUTH\n" : "",
 		staflags & WLAN_STA_ASSOC ? "ASSOC\n" : "",
 		staflags & WLAN_STA_PS ? "PS\n" : "",
 		staflags & WLAN_STA_AUTHORIZED ? "AUTHORIZED\n" : "",
 		staflags & WLAN_STA_SHORT_PREAMBLE ? "SHORT PREAMBLE\n" : "",
 		staflags & WLAN_STA_WME ? "WME\n" : "",
-		staflags & WLAN_STA_WDS ? "WDS\n" : "");
+		staflags & WLAN_STA_WDS ? "WDS\n" : "",
+		staflags & WLAN_STA_MFP ? "MFP\n" : "");
 	return simple_read_from_buffer(userbuf, count, ppos, buf, res);
 }
 STA_OPS(flags);
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 3db6bc3cdaf2..b5f86cb17630 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -260,6 +260,7 @@ struct mesh_preq_queue {
 #define IEEE80211_STA_PRIVACY_INVOKED	BIT(13)
 #define IEEE80211_STA_TKIP_WEP_USED	BIT(14)
 #define IEEE80211_STA_CSA_RECEIVED	BIT(15)
+#define IEEE80211_STA_MFP_ENABLED	BIT(16)
 /* flags for MLME request */
 #define IEEE80211_STA_REQ_SCAN 0
 #define IEEE80211_STA_REQ_DIRECT_PROBE 1
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index a1e683e305f0..bc8a7f1a6a15 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1,6 +1,6 @@
 /*
  * BSS client mode implementation
- * Copyright 2003, Jouni Malinen <jkmaline@cc.hut.fi>
+ * Copyright 2003-2008, Jouni Malinen <j@w1.fi>
  * Copyright 2004, Instant802 Networks, Inc.
  * Copyright 2005, Devicescape Software, Inc.
  * Copyright 2006-2007	Jiri Benc <jbenc@suse.cz>
@@ -472,7 +472,7 @@ static void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata,
 	/* u.deauth.reason_code == u.disassoc.reason_code */
 	mgmt->u.deauth.reason_code = cpu_to_le16(reason);
 
-	ieee80211_tx_skb(sdata, skb, 0);
+	ieee80211_tx_skb(sdata, skb, ifsta->flags & IEEE80211_STA_MFP_ENABLED);
 }
 
 /* MLME */
@@ -1408,6 +1408,9 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
 
 	rate_control_rate_init(sta);
 
+	if (ifsta->flags & IEEE80211_STA_MFP_ENABLED)
+		set_sta_flags(sta, WLAN_STA_MFP);
+
 	if (elems.wmm_param)
 		set_sta_flags(sta, WLAN_STA_WME);
 
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index b683d3f5ef8a..d13a44b935e2 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -34,6 +34,7 @@
  * @WLAN_STA_CLEAR_PS_FILT: Clear PS filter in hardware (using the
  *	IEEE80211_TX_CTL_CLEAR_PS_FILT control flag) when the next
  *	frame to this station is transmitted.
+ * @WLAN_STA_MFP: Management frame protection is used with this STA.
  */
 enum ieee80211_sta_info_flags {
 	WLAN_STA_AUTH		= 1<<0,
@@ -46,6 +47,7 @@ enum ieee80211_sta_info_flags {
 	WLAN_STA_WDS		= 1<<7,
 	WLAN_STA_PSPOLL		= 1<<8,
 	WLAN_STA_CLEAR_PS_FILT	= 1<<9,
+	WLAN_STA_MFP		= 1<<10,
 };
 
 #define STA_TID_NUM 16
-- 
cgit v1.2.3


From fb7333367632c67d8b6b06fb8d906cdabb11b02a Mon Sep 17 00:00:00 2001
From: Jouni Malinen <j@w1.fi>
Date: Thu, 8 Jan 2009 13:32:00 +0200
Subject: mac80211: 802.11w - CCMP for management frames

Extend CCMP to support encryption and decryption of unicast management
frames.

Signed-off-by: Jouni Malinen <j@w1.fi>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/ieee80211.h | 30 ++++++++++++++++++++++++++++++
 net/mac80211/tx.c         | 23 ++++++++++++++++++++++-
 net/mac80211/wpa.c        | 18 ++++++++++++------
 3 files changed, 64 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index cade2556af0e..d5165895f316 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1030,6 +1030,7 @@ enum ieee80211_category {
 	WLAN_CATEGORY_QOS = 1,
 	WLAN_CATEGORY_DLS = 2,
 	WLAN_CATEGORY_BACK = 3,
+	WLAN_CATEGORY_PUBLIC = 4,
 	WLAN_CATEGORY_WMM = 17,
 };
 
@@ -1185,6 +1186,35 @@ static inline u8 *ieee80211_get_DA(struct ieee80211_hdr *hdr)
 		return hdr->addr1;
 }
 
+/**
+ * ieee80211_is_robust_mgmt_frame - check if frame is a robust management frame
+ * @hdr: the frame (buffer must include at least the first octet of payload)
+ */
+static inline bool ieee80211_is_robust_mgmt_frame(struct ieee80211_hdr *hdr)
+{
+	if (ieee80211_is_disassoc(hdr->frame_control) ||
+	    ieee80211_is_deauth(hdr->frame_control))
+		return true;
+
+	if (ieee80211_is_action(hdr->frame_control)) {
+		u8 *category;
+
+		/*
+		 * Action frames, excluding Public Action frames, are Robust
+		 * Management Frames. However, if we are looking at a Protected
+		 * frame, skip the check since the data may be encrypted and
+		 * the frame has already been found to be a Robust Management
+		 * Frame (by the other end).
+		 */
+		if (ieee80211_has_protected(hdr->frame_control))
+			return true;
+		category = ((u8 *) hdr) + 24;
+		return *category != WLAN_CATEGORY_PUBLIC;
+	}
+
+	return false;
+}
+
 /**
  * ieee80211_fhss_chan_to_freq - get channel frequency
  * @channel: the FHSS channel
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index cd6bc87eec73..50c6c4fabea5 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -330,6 +330,22 @@ ieee80211_tx_h_multicast_ps_buf(struct ieee80211_tx_data *tx)
 	return TX_CONTINUE;
 }
 
+static int ieee80211_use_mfp(__le16 fc, struct sta_info *sta,
+			     struct sk_buff *skb)
+{
+	if (!ieee80211_is_mgmt(fc))
+		return 0;
+
+	if (sta == NULL || !test_sta_flags(sta, WLAN_STA_MFP))
+		return 0;
+
+	if (!ieee80211_is_robust_mgmt_frame((struct ieee80211_hdr *)
+					    skb->data))
+		return 0;
+
+	return 1;
+}
+
 static ieee80211_tx_result
 ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx)
 {
@@ -428,10 +444,15 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
 			if (ieee80211_is_auth(hdr->frame_control))
 				break;
 		case ALG_TKIP:
-		case ALG_CCMP:
 			if (!ieee80211_is_data_present(hdr->frame_control))
 				tx->key = NULL;
 			break;
+		case ALG_CCMP:
+			if (!ieee80211_is_data_present(hdr->frame_control) &&
+			    !ieee80211_use_mfp(hdr->frame_control, tx->sta,
+					       tx->skb))
+				tx->key = NULL;
+			break;
 		}
 	}
 
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index 7aa63caf8d50..aff46adde3f0 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -266,7 +266,7 @@ static void ccmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *scratch,
 				int encrypted)
 {
 	__le16 mask_fc;
-	int a4_included;
+	int a4_included, mgmt;
 	u8 qos_tid;
 	u8 *b_0, *aad;
 	u16 data_len, len_a;
@@ -277,12 +277,15 @@ static void ccmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *scratch,
 	aad = scratch + 4 * AES_BLOCK_LEN;
 
 	/*
-	 * Mask FC: zero subtype b4 b5 b6
+	 * Mask FC: zero subtype b4 b5 b6 (if not mgmt)
 	 * Retry, PwrMgt, MoreData; set Protected
 	 */
+	mgmt = ieee80211_is_mgmt(hdr->frame_control);
 	mask_fc = hdr->frame_control;
-	mask_fc &= ~cpu_to_le16(0x0070 | IEEE80211_FCTL_RETRY |
+	mask_fc &= ~cpu_to_le16(IEEE80211_FCTL_RETRY |
 				IEEE80211_FCTL_PM | IEEE80211_FCTL_MOREDATA);
+	if (!mgmt)
+		mask_fc &= ~cpu_to_le16(0x0070);
 	mask_fc |= cpu_to_le16(IEEE80211_FCTL_PROTECTED);
 
 	hdrlen = ieee80211_hdrlen(hdr->frame_control);
@@ -300,8 +303,10 @@ static void ccmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *scratch,
 
 	/* First block, b_0 */
 	b_0[0] = 0x59; /* flags: Adata: 1, M: 011, L: 001 */
-	/* Nonce: QoS Priority | A2 | PN */
-	b_0[1] = qos_tid;
+	/* Nonce: Nonce Flags | A2 | PN
+	 * Nonce Flags: Priority (b0..b3) | Management (b4) | Reserved (b5..b7)
+	 */
+	b_0[1] = qos_tid | (mgmt << 4);
 	memcpy(&b_0[2], hdr->addr2, ETH_ALEN);
 	memcpy(&b_0[8], pn, CCMP_PN_LEN);
 	/* l(m) */
@@ -446,7 +451,8 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx)
 
 	hdrlen = ieee80211_hdrlen(hdr->frame_control);
 
-	if (!ieee80211_is_data(hdr->frame_control))
+	if (!ieee80211_is_data(hdr->frame_control) &&
+	    !ieee80211_is_robust_mgmt_frame(hdr))
 		return RX_CONTINUE;
 
 	data_len = skb->len - hdrlen - CCMP_HDR_LEN - CCMP_MIC_LEN;
-- 
cgit v1.2.3


From 765cb46a3fc856245ea68a7c961ac87c77e4ae2d Mon Sep 17 00:00:00 2001
From: Jouni Malinen <j@w1.fi>
Date: Thu, 8 Jan 2009 13:32:01 +0200
Subject: mac80211: 802.11w - Add BIP (AES-128-CMAC)

Implement Broadcast/Multicast Integrity Protocol for management frame
protection. This patch adds the needed definitions for the new
information element (MMIE) and implementation for the new "encryption"
type (though, BIP is actually not encrypting data, it provides only
integrity protection). These routines will be used by a follow-on patch
that enables BIP for multicast/broadcast robust management frames.

Signed-off-by: Jouni Malinen <j@w1.fi>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/ieee80211.h  |  10 ++++
 net/mac80211/Makefile      |   1 +
 net/mac80211/aes_cmac.c    | 135 +++++++++++++++++++++++++++++++++++++++++++++
 net/mac80211/aes_cmac.h    |  19 +++++++
 net/mac80211/ieee80211_i.h |   2 +-
 net/mac80211/key.h         |  10 ++++
 net/mac80211/wpa.c         | 125 +++++++++++++++++++++++++++++++++++++++++
 net/mac80211/wpa.h         |   5 ++
 8 files changed, 306 insertions(+), 1 deletion(-)
 create mode 100644 net/mac80211/aes_cmac.c
 create mode 100644 net/mac80211/aes_cmac.h

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index d5165895f316..cceb9e86c744 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -655,6 +655,15 @@ struct ieee80211_mgmt {
 #define IEEE80211_MIN_ACTION_SIZE offsetof(struct ieee80211_mgmt, u.action.u)
 
 
+/* Management MIC information element (IEEE 802.11w) */
+struct ieee80211_mmie {
+	u8 element_id;
+	u8 length;
+	__le16 key_id;
+	u8 sequence_number[6];
+	u8 mic[8];
+} __attribute__ ((packed));
+
 /* Control frames */
 struct ieee80211_rts {
 	__le16 frame_control;
@@ -1018,6 +1027,7 @@ enum ieee80211_eid {
 	WLAN_EID_HT_INFORMATION = 61,
 	/* 802.11i */
 	WLAN_EID_RSN = 48,
+	WLAN_EID_MMIE = 76 /* 802.11w */,
 	WLAN_EID_WPA = 221,
 	WLAN_EID_GENERIC = 221,
 	WLAN_EID_VENDOR_SPECIFIC = 221,
diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile
index 7d4971aa443f..5c6fadfb6a00 100644
--- a/net/mac80211/Makefile
+++ b/net/mac80211/Makefile
@@ -15,6 +15,7 @@ mac80211-y := \
 	michael.o \
 	tkip.o \
 	aes_ccm.o \
+	aes_cmac.o \
 	cfg.o \
 	rx.o \
 	spectmgmt.o \
diff --git a/net/mac80211/aes_cmac.c b/net/mac80211/aes_cmac.c
new file mode 100644
index 000000000000..3d097b3d7b62
--- /dev/null
+++ b/net/mac80211/aes_cmac.c
@@ -0,0 +1,135 @@
+/*
+ * AES-128-CMAC with TLen 16 for IEEE 802.11w BIP
+ * Copyright 2008, Jouni Malinen <j@w1.fi>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/crypto.h>
+#include <linux/err.h>
+
+#include <net/mac80211.h>
+#include "key.h"
+#include "aes_cmac.h"
+
+#define AES_BLOCK_SIZE 16
+#define AES_CMAC_KEY_LEN 16
+#define CMAC_TLEN 8 /* CMAC TLen = 64 bits (8 octets) */
+#define AAD_LEN 20
+
+
+static void gf_mulx(u8 *pad)
+{
+	int i, carry;
+
+	carry = pad[0] & 0x80;
+	for (i = 0; i < AES_BLOCK_SIZE - 1; i++)
+		pad[i] = (pad[i] << 1) | (pad[i + 1] >> 7);
+	pad[AES_BLOCK_SIZE - 1] <<= 1;
+	if (carry)
+		pad[AES_BLOCK_SIZE - 1] ^= 0x87;
+}
+
+
+static void aes_128_cmac_vector(struct crypto_cipher *tfm, u8 *scratch,
+				size_t num_elem,
+				const u8 *addr[], const size_t *len, u8 *mac)
+{
+	u8 *cbc, *pad;
+	const u8 *pos, *end;
+	size_t i, e, left, total_len;
+
+	cbc = scratch;
+	pad = scratch + AES_BLOCK_SIZE;
+
+	memset(cbc, 0, AES_BLOCK_SIZE);
+
+	total_len = 0;
+	for (e = 0; e < num_elem; e++)
+		total_len += len[e];
+	left = total_len;
+
+	e = 0;
+	pos = addr[0];
+	end = pos + len[0];
+
+	while (left >= AES_BLOCK_SIZE) {
+		for (i = 0; i < AES_BLOCK_SIZE; i++) {
+			cbc[i] ^= *pos++;
+			if (pos >= end) {
+				e++;
+				pos = addr[e];
+				end = pos + len[e];
+			}
+		}
+		if (left > AES_BLOCK_SIZE)
+			crypto_cipher_encrypt_one(tfm, cbc, cbc);
+		left -= AES_BLOCK_SIZE;
+	}
+
+	memset(pad, 0, AES_BLOCK_SIZE);
+	crypto_cipher_encrypt_one(tfm, pad, pad);
+	gf_mulx(pad);
+
+	if (left || total_len == 0) {
+		for (i = 0; i < left; i++) {
+			cbc[i] ^= *pos++;
+			if (pos >= end) {
+				e++;
+				pos = addr[e];
+				end = pos + len[e];
+			}
+		}
+		cbc[left] ^= 0x80;
+		gf_mulx(pad);
+	}
+
+	for (i = 0; i < AES_BLOCK_SIZE; i++)
+		pad[i] ^= cbc[i];
+	crypto_cipher_encrypt_one(tfm, pad, pad);
+	memcpy(mac, pad, CMAC_TLEN);
+}
+
+
+void ieee80211_aes_cmac(struct crypto_cipher *tfm, u8 *scratch, const u8 *aad,
+			const u8 *data, size_t data_len, u8 *mic)
+{
+	const u8 *addr[3];
+	size_t len[3];
+	u8 zero[CMAC_TLEN];
+
+	memset(zero, 0, CMAC_TLEN);
+	addr[0] = aad;
+	len[0] = AAD_LEN;
+	addr[1] = data;
+	len[1] = data_len - CMAC_TLEN;
+	addr[2] = zero;
+	len[2] = CMAC_TLEN;
+
+	aes_128_cmac_vector(tfm, scratch, 3, addr, len, mic);
+}
+
+
+struct crypto_cipher * ieee80211_aes_cmac_key_setup(const u8 key[])
+{
+	struct crypto_cipher *tfm;
+
+	tfm = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(tfm))
+		return NULL;
+
+	crypto_cipher_setkey(tfm, key, AES_CMAC_KEY_LEN);
+
+	return tfm;
+}
+
+
+void ieee80211_aes_cmac_key_free(struct crypto_cipher *tfm)
+{
+	if (tfm)
+		crypto_free_cipher(tfm);
+}
diff --git a/net/mac80211/aes_cmac.h b/net/mac80211/aes_cmac.h
new file mode 100644
index 000000000000..0eb9a4831508
--- /dev/null
+++ b/net/mac80211/aes_cmac.h
@@ -0,0 +1,19 @@
+/*
+ * Copyright 2008, Jouni Malinen <j@w1.fi>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef AES_CMAC_H
+#define AES_CMAC_H
+
+#include <linux/crypto.h>
+
+struct crypto_cipher * ieee80211_aes_cmac_key_setup(const u8 key[]);
+void ieee80211_aes_cmac(struct crypto_cipher *tfm, u8 *scratch, const u8 *aad,
+			const u8 *data, size_t data_len, u8 *mic);
+void ieee80211_aes_cmac_key_free(struct crypto_cipher *tfm);
+
+#endif /* AES_CMAC_H */
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index b5f86cb17630..20af92abd61d 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -43,7 +43,7 @@ struct ieee80211_local;
 
 /* Required encryption head and tailroom */
 #define IEEE80211_ENCRYPT_HEADROOM 8
-#define IEEE80211_ENCRYPT_TAILROOM 12
+#define IEEE80211_ENCRYPT_TAILROOM 18
 
 /* IEEE 802.11 (Ch. 9.5 Defragmentation) requires support for concurrent
  * reception of at least three fragmented frames. This limit can be increased
diff --git a/net/mac80211/key.h b/net/mac80211/key.h
index 425816e0996c..73ac28ca2ede 100644
--- a/net/mac80211/key.h
+++ b/net/mac80211/key.h
@@ -96,6 +96,16 @@ struct ieee80211_key {
 			u8 tx_crypto_buf[6 * AES_BLOCK_LEN];
 			u8 rx_crypto_buf[6 * AES_BLOCK_LEN];
 		} ccmp;
+		struct {
+			u8 tx_pn[6];
+			u8 rx_pn[6];
+			struct crypto_cipher *tfm;
+			u32 replays; /* dot11RSNAStatsCMACReplays */
+			u32 icverrors; /* dot11RSNAStatsCMACICVErrors */
+			/* scratch buffers for virt_to_page() (crypto API) */
+			u8 tx_crypto_buf[2 * AES_BLOCK_LEN];
+			u8 rx_crypto_buf[2 * AES_BLOCK_LEN];
+		} aes_cmac;
 	} u;
 
 	/* number of times this key has been used */
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index aff46adde3f0..53e11e6ff66e 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -1,5 +1,6 @@
 /*
  * Copyright 2002-2004, Instant802 Networks, Inc.
+ * Copyright 2008, Jouni Malinen <j@w1.fi>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -19,6 +20,7 @@
 #include "michael.h"
 #include "tkip.h"
 #include "aes_ccm.h"
+#include "aes_cmac.h"
 #include "wpa.h"
 
 ieee80211_tx_result
@@ -491,3 +493,126 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx)
 
 	return RX_CONTINUE;
 }
+
+
+static void bip_aad(struct sk_buff *skb, u8 *aad)
+{
+	/* BIP AAD: FC(masked) || A1 || A2 || A3 */
+
+	/* FC type/subtype */
+	aad[0] = skb->data[0];
+	/* Mask FC Retry, PwrMgt, MoreData flags to zero */
+	aad[1] = skb->data[1] & ~(BIT(4) | BIT(5) | BIT(6));
+	/* A1 || A2 || A3 */
+	memcpy(aad + 2, skb->data + 4, 3 * ETH_ALEN);
+}
+
+
+static inline void bip_ipn_swap(u8 *d, const u8 *s)
+{
+	*d++ = s[5];
+	*d++ = s[4];
+	*d++ = s[3];
+	*d++ = s[2];
+	*d++ = s[1];
+	*d = s[0];
+}
+
+
+ieee80211_tx_result
+ieee80211_crypto_aes_cmac_encrypt(struct ieee80211_tx_data *tx)
+{
+	struct sk_buff *skb = tx->skb;
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+	struct ieee80211_key *key = tx->key;
+	struct ieee80211_mmie *mmie;
+	u8 *pn, aad[20];
+	int i;
+
+	if (tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) {
+		/* hwaccel */
+		info->control.hw_key = &tx->key->conf;
+		return 0;
+	}
+
+	if (WARN_ON(skb_tailroom(skb) < sizeof(*mmie)))
+		return TX_DROP;
+
+	mmie = (struct ieee80211_mmie *) skb_put(skb, sizeof(*mmie));
+	mmie->element_id = WLAN_EID_MMIE;
+	mmie->length = sizeof(*mmie) - 2;
+	mmie->key_id = cpu_to_le16(key->conf.keyidx);
+
+	/* PN = PN + 1 */
+	pn = key->u.aes_cmac.tx_pn;
+
+	for (i = sizeof(key->u.aes_cmac.tx_pn) - 1; i >= 0; i--) {
+		pn[i]++;
+		if (pn[i])
+			break;
+	}
+	bip_ipn_swap(mmie->sequence_number, pn);
+
+	bip_aad(skb, aad);
+
+	/*
+	 * MIC = AES-128-CMAC(IGTK, AAD || Management Frame Body || MMIE, 64)
+	 */
+	ieee80211_aes_cmac(key->u.aes_cmac.tfm, key->u.aes_cmac.tx_crypto_buf,
+			   aad, skb->data + 24, skb->len - 24, mmie->mic);
+
+	return TX_CONTINUE;
+}
+
+
+ieee80211_rx_result
+ieee80211_crypto_aes_cmac_decrypt(struct ieee80211_rx_data *rx)
+{
+	struct sk_buff *skb = rx->skb;
+	struct ieee80211_key *key = rx->key;
+	struct ieee80211_mmie *mmie;
+	u8 aad[20], mic[8], ipn[6];
+	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
+
+	if (!ieee80211_is_mgmt(hdr->frame_control))
+		return RX_CONTINUE;
+
+	if ((rx->status->flag & RX_FLAG_DECRYPTED) &&
+	    (rx->status->flag & RX_FLAG_IV_STRIPPED))
+		return RX_CONTINUE;
+
+	if (skb->len < 24 + sizeof(*mmie))
+		return RX_DROP_UNUSABLE;
+
+	mmie = (struct ieee80211_mmie *)
+		(skb->data + skb->len - sizeof(*mmie));
+	if (mmie->element_id != WLAN_EID_MMIE ||
+	    mmie->length != sizeof(*mmie) - 2)
+		return RX_DROP_UNUSABLE; /* Invalid MMIE */
+
+	bip_ipn_swap(ipn, mmie->sequence_number);
+
+	if (memcmp(ipn, key->u.aes_cmac.rx_pn, 6) <= 0) {
+		key->u.aes_cmac.replays++;
+		return RX_DROP_UNUSABLE;
+	}
+
+	if (!(rx->status->flag & RX_FLAG_DECRYPTED)) {
+		/* hardware didn't decrypt/verify MIC */
+		bip_aad(skb, aad);
+		ieee80211_aes_cmac(key->u.aes_cmac.tfm,
+				   key->u.aes_cmac.rx_crypto_buf, aad,
+				   skb->data + 24, skb->len - 24, mic);
+		if (memcmp(mic, mmie->mic, sizeof(mmie->mic)) != 0) {
+			key->u.aes_cmac.icverrors++;
+			return RX_DROP_UNUSABLE;
+		}
+	}
+
+	memcpy(key->u.aes_cmac.rx_pn, ipn, 6);
+
+	/* Remove MMIE */
+	skb_trim(skb, skb->len - sizeof(*mmie));
+
+	return RX_CONTINUE;
+}
diff --git a/net/mac80211/wpa.h b/net/mac80211/wpa.h
index d42d221d8a1d..baba0608313e 100644
--- a/net/mac80211/wpa.h
+++ b/net/mac80211/wpa.h
@@ -28,4 +28,9 @@ ieee80211_crypto_ccmp_encrypt(struct ieee80211_tx_data *tx);
 ieee80211_rx_result
 ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx);
 
+ieee80211_tx_result
+ieee80211_crypto_aes_cmac_encrypt(struct ieee80211_tx_data *tx);
+ieee80211_rx_result
+ieee80211_crypto_aes_cmac_decrypt(struct ieee80211_rx_data *rx);
+
 #endif /* WPA_H */
-- 
cgit v1.2.3


From 3cfcf6ac6d69dc290e96416731eea5c88ac7d426 Mon Sep 17 00:00:00 2001
From: Jouni Malinen <j@w1.fi>
Date: Thu, 8 Jan 2009 13:32:02 +0200
Subject: mac80211: 802.11w - Use BIP (AES-128-CMAC)

Add mechanism for managing BIP keys (IGTK) and integrate BIP into the
TX/RX paths.

Signed-off-by: Jouni Malinen <j@w1.fi>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/ath5k/pcu.c |  3 ++
 include/linux/ieee80211.h        |  1 +
 include/linux/nl80211.h          |  6 ++-
 include/net/cfg80211.h           |  5 +++
 include/net/mac80211.h           |  2 +
 net/mac80211/cfg.c               | 31 ++++++++++++++
 net/mac80211/debugfs_key.c       | 79 ++++++++++++++++++++++++++++++++++-
 net/mac80211/debugfs_key.h       | 10 +++++
 net/mac80211/ieee80211_i.h       |  5 ++-
 net/mac80211/key.c               | 62 ++++++++++++++++++++++++++-
 net/mac80211/key.h               |  6 +++
 net/mac80211/rx.c                | 90 ++++++++++++++++++++++++++++++++++++----
 net/mac80211/tx.c                |  9 ++++
 net/wireless/nl80211.c           | 29 +++++++++----
 14 files changed, 317 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/wireless/ath5k/pcu.c b/drivers/net/wireless/ath5k/pcu.c
index 5b416ed65299..e758b70ab7eb 100644
--- a/drivers/net/wireless/ath5k/pcu.c
+++ b/drivers/net/wireless/ath5k/pcu.c
@@ -1026,6 +1026,9 @@ int ath5k_keycache_type(const struct ieee80211_key_conf *key)
 			return AR5K_KEYTABLE_TYPE_40;
 		else if (key->keylen == LEN_WEP104)
 			return AR5K_KEYTABLE_TYPE_104;
+		return -EINVAL;
+	default:
+		return -EINVAL;
 	}
 	return -EINVAL;
 }
diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index cceb9e86c744..df98a8a549a2 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1139,6 +1139,7 @@ enum ieee80211_back_parties {
 /* reserved: 				0x000FAC03 */
 #define WLAN_CIPHER_SUITE_CCMP		0x000FAC04
 #define WLAN_CIPHER_SUITE_WEP104	0x000FAC05
+#define WLAN_CIPHER_SUITE_AES_CMAC	0x000FAC06
 
 #define WLAN_MAX_KEY_LEN		32
 
diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 218f0e73a7ae..ee742bc9761e 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -72,8 +72,8 @@
  *
  * @NL80211_CMD_GET_KEY: Get sequence counter information for a key specified
  *	by %NL80211_ATTR_KEY_IDX and/or %NL80211_ATTR_MAC.
- * @NL80211_CMD_SET_KEY: Set key attributes %NL80211_ATTR_KEY_DEFAULT or
- *	%NL80211_ATTR_KEY_THRESHOLD.
+ * @NL80211_CMD_SET_KEY: Set key attributes %NL80211_ATTR_KEY_DEFAULT,
+ *	%NL80211_ATTR_KEY_DEFAULT_MGMT, or %NL80211_ATTR_KEY_THRESHOLD.
  * @NL80211_CMD_NEW_KEY: add a key with given %NL80211_ATTR_KEY_DATA,
  *	%NL80211_ATTR_KEY_IDX, %NL80211_ATTR_MAC and %NL80211_ATTR_KEY_CIPHER
  *	attributes.
@@ -346,6 +346,8 @@ enum nl80211_attrs {
 	NL80211_ATTR_WIPHY_FREQ,
 	NL80211_ATTR_WIPHY_CHANNEL_TYPE,
 
+	NL80211_ATTR_KEY_DEFAULT_MGMT,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 6619ed106134..df78abc496f1 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -473,6 +473,8 @@ struct ieee80211_channel;
  *
  * @set_default_key: set the default key on an interface
  *
+ * @set_default_mgmt_key: set the default management frame key on an interface
+ *
  * @add_beacon: Add a beacon with given parameters, @head, @interval
  *	and @dtim_period will be valid, @tail is optional.
  * @set_beacon: Change the beacon parameters for an access point mode
@@ -520,6 +522,9 @@ struct cfg80211_ops {
 	int	(*set_default_key)(struct wiphy *wiphy,
 				   struct net_device *netdev,
 				   u8 key_index);
+	int	(*set_default_mgmt_key)(struct wiphy *wiphy,
+					struct net_device *netdev,
+					u8 key_index);
 
 	int	(*add_beacon)(struct wiphy *wiphy, struct net_device *dev,
 			      struct beacon_parameters *info);
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 8a305bfdb87b..61f1f37a9e27 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -651,11 +651,13 @@ struct ieee80211_if_conf {
  * @ALG_WEP: WEP40 or WEP104
  * @ALG_TKIP: TKIP
  * @ALG_CCMP: CCMP (AES)
+ * @ALG_AES_CMAC: AES-128-CMAC
  */
 enum ieee80211_key_alg {
 	ALG_WEP,
 	ALG_TKIP,
 	ALG_CCMP,
+	ALG_AES_CMAC,
 };
 
 /**
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 309d9189aa49..72c106915433 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -133,6 +133,9 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev,
 	case WLAN_CIPHER_SUITE_CCMP:
 		alg = ALG_CCMP;
 		break;
+	case WLAN_CIPHER_SUITE_AES_CMAC:
+		alg = ALG_AES_CMAC;
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -275,6 +278,17 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev,
 		else
 			params.cipher = WLAN_CIPHER_SUITE_WEP104;
 		break;
+	case ALG_AES_CMAC:
+		params.cipher = WLAN_CIPHER_SUITE_AES_CMAC;
+		seq[0] = key->u.aes_cmac.tx_pn[5];
+		seq[1] = key->u.aes_cmac.tx_pn[4];
+		seq[2] = key->u.aes_cmac.tx_pn[3];
+		seq[3] = key->u.aes_cmac.tx_pn[2];
+		seq[4] = key->u.aes_cmac.tx_pn[1];
+		seq[5] = key->u.aes_cmac.tx_pn[0];
+		params.seq = seq;
+		params.seq_len = 6;
+		break;
 	}
 
 	params.key = key->conf.key;
@@ -304,6 +318,22 @@ static int ieee80211_config_default_key(struct wiphy *wiphy,
 	return 0;
 }
 
+static int ieee80211_config_default_mgmt_key(struct wiphy *wiphy,
+					     struct net_device *dev,
+					     u8 key_idx)
+{
+	struct ieee80211_sub_if_data *sdata;
+
+	rcu_read_lock();
+
+	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+	ieee80211_set_default_mgmt_key(sdata, key_idx);
+
+	rcu_read_unlock();
+
+	return 0;
+}
+
 static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
 {
 	struct ieee80211_sub_if_data *sdata = sta->sdata;
@@ -1153,6 +1183,7 @@ struct cfg80211_ops mac80211_config_ops = {
 	.del_key = ieee80211_del_key,
 	.get_key = ieee80211_get_key,
 	.set_default_key = ieee80211_config_default_key,
+	.set_default_mgmt_key = ieee80211_config_default_mgmt_key,
 	.add_beacon = ieee80211_add_beacon,
 	.set_beacon = ieee80211_set_beacon,
 	.del_beacon = ieee80211_del_beacon,
diff --git a/net/mac80211/debugfs_key.c b/net/mac80211/debugfs_key.c
index 6424ac565ae0..99c752588b30 100644
--- a/net/mac80211/debugfs_key.c
+++ b/net/mac80211/debugfs_key.c
@@ -76,6 +76,9 @@ static ssize_t key_algorithm_read(struct file *file,
 	case ALG_CCMP:
 		alg = "CCMP\n";
 		break;
+	case ALG_AES_CMAC:
+		alg = "AES-128-CMAC\n";
+		break;
 	default:
 		return 0;
 	}
@@ -105,6 +108,12 @@ static ssize_t key_tx_spec_read(struct file *file, char __user *userbuf,
 		len = scnprintf(buf, sizeof(buf), "%02x%02x%02x%02x%02x%02x\n",
 				tpn[0], tpn[1], tpn[2], tpn[3], tpn[4], tpn[5]);
 		break;
+	case ALG_AES_CMAC:
+		tpn = key->u.aes_cmac.tx_pn;
+		len = scnprintf(buf, sizeof(buf), "%02x%02x%02x%02x%02x%02x\n",
+				tpn[0], tpn[1], tpn[2], tpn[3], tpn[4],
+				tpn[5]);
+		break;
 	default:
 		return 0;
 	}
@@ -142,6 +151,14 @@ static ssize_t key_rx_spec_read(struct file *file, char __user *userbuf,
 		}
 		len = p - buf;
 		break;
+	case ALG_AES_CMAC:
+		rpn = key->u.aes_cmac.rx_pn;
+		p += scnprintf(p, sizeof(buf)+buf-p,
+			       "%02x%02x%02x%02x%02x%02x\n",
+			       rpn[0], rpn[1], rpn[2],
+			       rpn[3], rpn[4], rpn[5]);
+		len = p - buf;
+		break;
 	default:
 		return 0;
 	}
@@ -156,13 +173,40 @@ static ssize_t key_replays_read(struct file *file, char __user *userbuf,
 	char buf[20];
 	int len;
 
-	if (key->conf.alg != ALG_CCMP)
+	switch (key->conf.alg) {
+	case ALG_CCMP:
+		len = scnprintf(buf, sizeof(buf), "%u\n", key->u.ccmp.replays);
+		break;
+	case ALG_AES_CMAC:
+		len = scnprintf(buf, sizeof(buf), "%u\n",
+				key->u.aes_cmac.replays);
+		break;
+	default:
 		return 0;
-	len = scnprintf(buf, sizeof(buf), "%u\n", key->u.ccmp.replays);
+	}
 	return simple_read_from_buffer(userbuf, count, ppos, buf, len);
 }
 KEY_OPS(replays);
 
+static ssize_t key_icverrors_read(struct file *file, char __user *userbuf,
+				  size_t count, loff_t *ppos)
+{
+	struct ieee80211_key *key = file->private_data;
+	char buf[20];
+	int len;
+
+	switch (key->conf.alg) {
+	case ALG_AES_CMAC:
+		len = scnprintf(buf, sizeof(buf), "%u\n",
+				key->u.aes_cmac.icverrors);
+		break;
+	default:
+		return 0;
+	}
+	return simple_read_from_buffer(userbuf, count, ppos, buf, len);
+}
+KEY_OPS(icverrors);
+
 static ssize_t key_key_read(struct file *file, char __user *userbuf,
 			    size_t count, loff_t *ppos)
 {
@@ -222,6 +266,7 @@ void ieee80211_debugfs_key_add(struct ieee80211_key *key)
 	DEBUGFS_ADD(tx_spec);
 	DEBUGFS_ADD(rx_spec);
 	DEBUGFS_ADD(replays);
+	DEBUGFS_ADD(icverrors);
 	DEBUGFS_ADD(key);
 	DEBUGFS_ADD(ifindex);
 };
@@ -243,6 +288,7 @@ void ieee80211_debugfs_key_remove(struct ieee80211_key *key)
 	DEBUGFS_DEL(tx_spec);
 	DEBUGFS_DEL(rx_spec);
 	DEBUGFS_DEL(replays);
+	DEBUGFS_DEL(icverrors);
 	DEBUGFS_DEL(key);
 	DEBUGFS_DEL(ifindex);
 
@@ -280,6 +326,35 @@ void ieee80211_debugfs_key_remove_default(struct ieee80211_sub_if_data *sdata)
 	sdata->common_debugfs.default_key = NULL;
 }
 
+void ieee80211_debugfs_key_add_mgmt_default(struct ieee80211_sub_if_data *sdata)
+{
+	char buf[50];
+	struct ieee80211_key *key;
+
+	if (!sdata->debugfsdir)
+		return;
+
+	/* this is running under the key lock */
+
+	key = sdata->default_mgmt_key;
+	if (key) {
+		sprintf(buf, "../keys/%d", key->debugfs.cnt);
+		sdata->common_debugfs.default_mgmt_key =
+			debugfs_create_symlink("default_mgmt_key",
+					       sdata->debugfsdir, buf);
+	} else
+		ieee80211_debugfs_key_remove_mgmt_default(sdata);
+}
+
+void ieee80211_debugfs_key_remove_mgmt_default(struct ieee80211_sub_if_data *sdata)
+{
+	if (!sdata)
+		return;
+
+	debugfs_remove(sdata->common_debugfs.default_mgmt_key);
+	sdata->common_debugfs.default_mgmt_key = NULL;
+}
+
 void ieee80211_debugfs_key_sta_del(struct ieee80211_key *key,
 				   struct sta_info *sta)
 {
diff --git a/net/mac80211/debugfs_key.h b/net/mac80211/debugfs_key.h
index b1a3754ee240..54717b4e1371 100644
--- a/net/mac80211/debugfs_key.h
+++ b/net/mac80211/debugfs_key.h
@@ -6,6 +6,10 @@ void ieee80211_debugfs_key_add(struct ieee80211_key *key);
 void ieee80211_debugfs_key_remove(struct ieee80211_key *key);
 void ieee80211_debugfs_key_add_default(struct ieee80211_sub_if_data *sdata);
 void ieee80211_debugfs_key_remove_default(struct ieee80211_sub_if_data *sdata);
+void ieee80211_debugfs_key_add_mgmt_default(
+	struct ieee80211_sub_if_data *sdata);
+void ieee80211_debugfs_key_remove_mgmt_default(
+	struct ieee80211_sub_if_data *sdata);
 void ieee80211_debugfs_key_sta_del(struct ieee80211_key *key,
 				   struct sta_info *sta);
 #else
@@ -19,6 +23,12 @@ static inline void ieee80211_debugfs_key_add_default(
 static inline void ieee80211_debugfs_key_remove_default(
 	struct ieee80211_sub_if_data *sdata)
 {}
+static inline void ieee80211_debugfs_key_add_mgmt_default(
+	struct ieee80211_sub_if_data *sdata)
+{}
+static inline void ieee80211_debugfs_key_remove_mgmt_default(
+	struct ieee80211_sub_if_data *sdata)
+{}
 static inline void ieee80211_debugfs_key_sta_del(struct ieee80211_key *key,
 						 struct sta_info *sta)
 {}
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 20af92abd61d..8c3245717c55 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -409,8 +409,10 @@ struct ieee80211_sub_if_data {
 	unsigned int fragment_next;
 
 #define NUM_DEFAULT_KEYS 4
-	struct ieee80211_key *keys[NUM_DEFAULT_KEYS];
+#define NUM_DEFAULT_MGMT_KEYS 2
+	struct ieee80211_key *keys[NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS];
 	struct ieee80211_key *default_key;
+	struct ieee80211_key *default_mgmt_key;
 
 	u16 sequence_number;
 
@@ -482,6 +484,7 @@ struct ieee80211_sub_if_data {
 	} debugfs;
 	struct {
 		struct dentry *default_key;
+		struct dentry *default_mgmt_key;
 	} common_debugfs;
 
 #ifdef CONFIG_MAC80211_MESH
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index b0a025c9b615..19b480de4bbc 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -18,6 +18,7 @@
 #include "ieee80211_i.h"
 #include "debugfs_key.h"
 #include "aes_ccm.h"
+#include "aes_cmac.h"
 
 
 /**
@@ -215,13 +216,38 @@ void ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, int idx)
 	spin_unlock_irqrestore(&sdata->local->key_lock, flags);
 }
 
+static void
+__ieee80211_set_default_mgmt_key(struct ieee80211_sub_if_data *sdata, int idx)
+{
+	struct ieee80211_key *key = NULL;
+
+	if (idx >= NUM_DEFAULT_KEYS &&
+	    idx < NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS)
+		key = sdata->keys[idx];
+
+	rcu_assign_pointer(sdata->default_mgmt_key, key);
+
+	if (key)
+		add_todo(key, KEY_FLAG_TODO_DEFMGMTKEY);
+}
+
+void ieee80211_set_default_mgmt_key(struct ieee80211_sub_if_data *sdata,
+				    int idx)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&sdata->local->key_lock, flags);
+	__ieee80211_set_default_mgmt_key(sdata, idx);
+	spin_unlock_irqrestore(&sdata->local->key_lock, flags);
+}
+
 
 static void __ieee80211_key_replace(struct ieee80211_sub_if_data *sdata,
 				    struct sta_info *sta,
 				    struct ieee80211_key *old,
 				    struct ieee80211_key *new)
 {
-	int idx, defkey;
+	int idx, defkey, defmgmtkey;
 
 	if (new)
 		list_add(&new->list, &sdata->key_list);
@@ -237,13 +263,19 @@ static void __ieee80211_key_replace(struct ieee80211_sub_if_data *sdata,
 			idx = new->conf.keyidx;
 
 		defkey = old && sdata->default_key == old;
+		defmgmtkey = old && sdata->default_mgmt_key == old;
 
 		if (defkey && !new)
 			__ieee80211_set_default_key(sdata, -1);
+		if (defmgmtkey && !new)
+			__ieee80211_set_default_mgmt_key(sdata, -1);
 
 		rcu_assign_pointer(sdata->keys[idx], new);
 		if (defkey && new)
 			__ieee80211_set_default_key(sdata, new->conf.keyidx);
+		if (defmgmtkey && new)
+			__ieee80211_set_default_mgmt_key(sdata,
+							 new->conf.keyidx);
 	}
 
 	if (old) {
@@ -262,7 +294,7 @@ struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg,
 {
 	struct ieee80211_key *key;
 
-	BUG_ON(idx < 0 || idx >= NUM_DEFAULT_KEYS);
+	BUG_ON(idx < 0 || idx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS);
 
 	key = kzalloc(sizeof(struct ieee80211_key) + key_len, GFP_KERNEL);
 	if (!key)
@@ -291,6 +323,10 @@ struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg,
 		key->conf.iv_len = CCMP_HDR_LEN;
 		key->conf.icv_len = CCMP_MIC_LEN;
 		break;
+	case ALG_AES_CMAC:
+		key->conf.iv_len = 0;
+		key->conf.icv_len = sizeof(struct ieee80211_mmie);
+		break;
 	}
 	memcpy(key->conf.key, key_data, key_len);
 	INIT_LIST_HEAD(&key->list);
@@ -308,6 +344,19 @@ struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg,
 		}
 	}
 
+	if (alg == ALG_AES_CMAC) {
+		/*
+		 * Initialize AES key state here as an optimization so that
+		 * it does not need to be initialized for every packet.
+		 */
+		key->u.aes_cmac.tfm =
+			ieee80211_aes_cmac_key_setup(key_data);
+		if (!key->u.aes_cmac.tfm) {
+			kfree(key);
+			return NULL;
+		}
+	}
+
 	return key;
 }
 
@@ -461,6 +510,8 @@ static void __ieee80211_key_destroy(struct ieee80211_key *key)
 
 	if (key->conf.alg == ALG_CCMP)
 		ieee80211_aes_key_free(key->u.ccmp.tfm);
+	if (key->conf.alg == ALG_AES_CMAC)
+		ieee80211_aes_cmac_key_free(key->u.aes_cmac.tfm);
 	ieee80211_debugfs_key_remove(key);
 
 	kfree(key);
@@ -483,6 +534,7 @@ static void __ieee80211_key_todo(void)
 		list_del_init(&key->todo);
 		todoflags = key->flags & (KEY_FLAG_TODO_ADD_DEBUGFS |
 					  KEY_FLAG_TODO_DEFKEY |
+					  KEY_FLAG_TODO_DEFMGMTKEY |
 					  KEY_FLAG_TODO_HWACCEL_ADD |
 					  KEY_FLAG_TODO_HWACCEL_REMOVE |
 					  KEY_FLAG_TODO_DELETE);
@@ -500,6 +552,11 @@ static void __ieee80211_key_todo(void)
 			ieee80211_debugfs_key_add_default(key->sdata);
 			work_done = true;
 		}
+		if (todoflags & KEY_FLAG_TODO_DEFMGMTKEY) {
+			ieee80211_debugfs_key_remove_mgmt_default(key->sdata);
+			ieee80211_debugfs_key_add_mgmt_default(key->sdata);
+			work_done = true;
+		}
 		if (todoflags & KEY_FLAG_TODO_HWACCEL_ADD) {
 			ieee80211_key_enable_hw_accel(key);
 			work_done = true;
@@ -535,6 +592,7 @@ void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata)
 	ieee80211_key_lock();
 
 	ieee80211_debugfs_key_remove_default(sdata);
+	ieee80211_debugfs_key_remove_mgmt_default(sdata);
 
 	spin_lock_irqsave(&sdata->local->key_lock, flags);
 	list_for_each_entry_safe(key, tmp, &sdata->key_list, list)
diff --git a/net/mac80211/key.h b/net/mac80211/key.h
index 73ac28ca2ede..215d3ef42a4f 100644
--- a/net/mac80211/key.h
+++ b/net/mac80211/key.h
@@ -46,6 +46,8 @@ struct sta_info;
  *	acceleration.
  * @KEY_FLAG_TODO_DEFKEY: Key is default key and debugfs needs to be updated.
  * @KEY_FLAG_TODO_ADD_DEBUGFS: Key needs to be added to debugfs.
+ * @KEY_FLAG_TODO_DEFMGMTKEY: Key is default management key and debugfs needs
+ *	to be updated.
  */
 enum ieee80211_internal_key_flags {
 	KEY_FLAG_UPLOADED_TO_HARDWARE	= BIT(0),
@@ -54,6 +56,7 @@ enum ieee80211_internal_key_flags {
 	KEY_FLAG_TODO_HWACCEL_REMOVE	= BIT(3),
 	KEY_FLAG_TODO_DEFKEY		= BIT(4),
 	KEY_FLAG_TODO_ADD_DEBUGFS	= BIT(5),
+	KEY_FLAG_TODO_DEFMGMTKEY	= BIT(6),
 };
 
 struct tkip_ctx {
@@ -124,6 +127,7 @@ struct ieee80211_key {
 		struct dentry *tx_spec;
 		struct dentry *rx_spec;
 		struct dentry *replays;
+		struct dentry *icverrors;
 		struct dentry *key;
 		struct dentry *ifindex;
 		int cnt;
@@ -150,6 +154,8 @@ void ieee80211_key_link(struct ieee80211_key *key,
 			struct sta_info *sta);
 void ieee80211_key_free(struct ieee80211_key *key);
 void ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, int idx);
+void ieee80211_set_default_mgmt_key(struct ieee80211_sub_if_data *sdata,
+				    int idx);
 void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata);
 void ieee80211_enable_keys(struct ieee80211_sub_if_data *sdata);
 void ieee80211_disable_keys(struct ieee80211_sub_if_data *sdata);
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index b68e082e99ce..abc3aa583ca6 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -446,6 +446,52 @@ ieee80211_rx_h_passive_scan(struct ieee80211_rx_data *rx)
 	return RX_CONTINUE;
 }
 
+
+static int ieee80211_is_unicast_robust_mgmt_frame(struct sk_buff *skb)
+{
+	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
+
+	if (skb->len < 24 || is_multicast_ether_addr(hdr->addr1))
+		return 0;
+
+	return ieee80211_is_robust_mgmt_frame(hdr);
+}
+
+
+static int ieee80211_is_multicast_robust_mgmt_frame(struct sk_buff *skb)
+{
+	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
+
+	if (skb->len < 24 || !is_multicast_ether_addr(hdr->addr1))
+		return 0;
+
+	return ieee80211_is_robust_mgmt_frame(hdr);
+}
+
+
+/* Get the BIP key index from MMIE; return -1 if this is not a BIP frame */
+static int ieee80211_get_mmie_keyidx(struct sk_buff *skb)
+{
+	struct ieee80211_mgmt *hdr = (struct ieee80211_mgmt *) skb->data;
+	struct ieee80211_mmie *mmie;
+
+	if (skb->len < 24 + sizeof(*mmie) ||
+	    !is_multicast_ether_addr(hdr->da))
+		return -1;
+
+	if (!ieee80211_is_robust_mgmt_frame((struct ieee80211_hdr *) hdr))
+		return -1; /* not a robust management frame */
+
+	mmie = (struct ieee80211_mmie *)
+		(skb->data + skb->len - sizeof(*mmie));
+	if (mmie->element_id != WLAN_EID_MMIE ||
+	    mmie->length != sizeof(*mmie) - 2)
+		return -1;
+
+	return le16_to_cpu(mmie->key_id);
+}
+
+
 static ieee80211_rx_result
 ieee80211_rx_mesh_check(struct ieee80211_rx_data *rx)
 {
@@ -561,21 +607,23 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
 	int hdrlen;
 	ieee80211_rx_result result = RX_DROP_UNUSABLE;
 	struct ieee80211_key *stakey = NULL;
+	int mmie_keyidx = -1;
 
 	/*
 	 * Key selection 101
 	 *
-	 * There are three types of keys:
+	 * There are four types of keys:
 	 *  - GTK (group keys)
+	 *  - IGTK (group keys for management frames)
 	 *  - PTK (pairwise keys)
 	 *  - STK (station-to-station pairwise keys)
 	 *
 	 * When selecting a key, we have to distinguish between multicast
 	 * (including broadcast) and unicast frames, the latter can only
-	 * use PTKs and STKs while the former always use GTKs. Unless, of
-	 * course, actual WEP keys ("pre-RSNA") are used, then unicast
-	 * frames can also use key indizes like GTKs. Hence, if we don't
-	 * have a PTK/STK we check the key index for a WEP key.
+	 * use PTKs and STKs while the former always use GTKs and IGTKs.
+	 * Unless, of course, actual WEP keys ("pre-RSNA") are used, then
+	 * unicast frames can also use key indices like GTKs. Hence, if we
+	 * don't have a PTK/STK we check the key index for a WEP key.
 	 *
 	 * Note that in a regular BSS, multicast frames are sent by the
 	 * AP only, associated stations unicast the frame to the AP first
@@ -588,8 +636,14 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
 	 * possible.
 	 */
 
-	if (!ieee80211_has_protected(hdr->frame_control))
-		return RX_CONTINUE;
+	if (!ieee80211_has_protected(hdr->frame_control)) {
+		if (!ieee80211_is_mgmt(hdr->frame_control) ||
+		    rx->sta == NULL || !test_sta_flags(rx->sta, WLAN_STA_MFP))
+			return RX_CONTINUE;
+		mmie_keyidx = ieee80211_get_mmie_keyidx(rx->skb);
+		if (mmie_keyidx < 0)
+			return RX_CONTINUE;
+	}
 
 	/*
 	 * No point in finding a key and decrypting if the frame is neither
@@ -603,6 +657,16 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
 
 	if (!is_multicast_ether_addr(hdr->addr1) && stakey) {
 		rx->key = stakey;
+	} else if (mmie_keyidx >= 0) {
+		/* Broadcast/multicast robust management frame / BIP */
+		if ((rx->status->flag & RX_FLAG_DECRYPTED) &&
+		    (rx->status->flag & RX_FLAG_IV_STRIPPED))
+			return RX_CONTINUE;
+
+		if (mmie_keyidx < NUM_DEFAULT_KEYS ||
+		    mmie_keyidx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS)
+			return RX_DROP_MONITOR; /* unexpected BIP keyidx */
+		rx->key = rcu_dereference(rx->sdata->keys[mmie_keyidx]);
 	} else {
 		/*
 		 * The device doesn't give us the IV so we won't be
@@ -665,6 +729,9 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
 	case ALG_CCMP:
 		result = ieee80211_crypto_ccmp_decrypt(rx);
 		break;
+	case ALG_AES_CMAC:
+		result = ieee80211_crypto_aes_cmac_decrypt(rx);
+		break;
 	}
 
 	/* either the frame has been decrypted or will be dropped */
@@ -1112,6 +1179,15 @@ ieee80211_drop_unencrypted(struct ieee80211_rx_data *rx, __le16 fc)
 	/* Drop unencrypted frames if key is set. */
 	if (unlikely(!ieee80211_has_protected(fc) &&
 		     !ieee80211_is_nullfunc(fc) &&
+		     (!ieee80211_is_mgmt(fc) ||
+		      (ieee80211_is_unicast_robust_mgmt_frame(rx->skb) &&
+		       rx->sta && test_sta_flags(rx->sta, WLAN_STA_MFP))) &&
+		     (rx->key || rx->sdata->drop_unencrypted)))
+		return -EACCES;
+	/* BIP does not use Protected field, so need to check MMIE */
+	if (unlikely(rx->sta && test_sta_flags(rx->sta, WLAN_STA_MFP) &&
+		     ieee80211_is_multicast_robust_mgmt_frame(rx->skb) &&
+		     ieee80211_get_mmie_keyidx(rx->skb) < 0 &&
 		     (rx->key || rx->sdata->drop_unencrypted)))
 		return -EACCES;
 
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 50c6c4fabea5..ad53ea9e9c77 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -425,6 +425,9 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
 		tx->key = NULL;
 	else if (tx->sta && (key = rcu_dereference(tx->sta->key)))
 		tx->key = key;
+	else if (ieee80211_is_mgmt(hdr->frame_control) &&
+		 (key = rcu_dereference(tx->sdata->default_mgmt_key)))
+		tx->key = key;
 	else if ((key = rcu_dereference(tx->sdata->default_key)))
 		tx->key = key;
 	else if (tx->sdata->drop_unencrypted &&
@@ -453,6 +456,10 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
 					       tx->skb))
 				tx->key = NULL;
 			break;
+		case ALG_AES_CMAC:
+			if (!ieee80211_is_mgmt(hdr->frame_control))
+				tx->key = NULL;
+			break;
 		}
 	}
 
@@ -808,6 +815,8 @@ ieee80211_tx_h_encrypt(struct ieee80211_tx_data *tx)
 		return ieee80211_crypto_tkip_encrypt(tx);
 	case ALG_CCMP:
 		return ieee80211_crypto_ccmp_encrypt(tx);
+	case ALG_AES_CMAC:
+		return ieee80211_crypto_aes_cmac_encrypt(tx);
 	}
 
 	/* not reached */
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 1e728fff474e..123d3b160fad 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -738,7 +738,7 @@ static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info)
 	if (info->attrs[NL80211_ATTR_KEY_IDX])
 		key_idx = nla_get_u8(info->attrs[NL80211_ATTR_KEY_IDX]);
 
-	if (key_idx > 3)
+	if (key_idx > 5)
 		return -EINVAL;
 
 	if (info->attrs[NL80211_ATTR_MAC])
@@ -804,30 +804,41 @@ static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info)
 	int err;
 	struct net_device *dev;
 	u8 key_idx;
+	int (*func)(struct wiphy *wiphy, struct net_device *netdev,
+		    u8 key_index);
 
 	if (!info->attrs[NL80211_ATTR_KEY_IDX])
 		return -EINVAL;
 
 	key_idx = nla_get_u8(info->attrs[NL80211_ATTR_KEY_IDX]);
 
-	if (key_idx > 3)
+	if (info->attrs[NL80211_ATTR_KEY_DEFAULT_MGMT]) {
+		if (key_idx < 4 || key_idx > 5)
+			return -EINVAL;
+	} else if (key_idx > 3)
 		return -EINVAL;
 
 	/* currently only support setting default key */
-	if (!info->attrs[NL80211_ATTR_KEY_DEFAULT])
+	if (!info->attrs[NL80211_ATTR_KEY_DEFAULT] &&
+	    !info->attrs[NL80211_ATTR_KEY_DEFAULT_MGMT])
 		return -EINVAL;
 
 	err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev);
 	if (err)
 		return err;
 
-	if (!drv->ops->set_default_key) {
+	if (info->attrs[NL80211_ATTR_KEY_DEFAULT])
+		func = drv->ops->set_default_key;
+	else
+		func = drv->ops->set_default_mgmt_key;
+
+	if (!func) {
 		err = -EOPNOTSUPP;
 		goto out;
 	}
 
 	rtnl_lock();
-	err = drv->ops->set_default_key(&drv->wiphy, dev, key_idx);
+	err = func(&drv->wiphy, dev, key_idx);
 	rtnl_unlock();
 
  out:
@@ -863,7 +874,7 @@ static int nl80211_new_key(struct sk_buff *skb, struct genl_info *info)
 	if (info->attrs[NL80211_ATTR_MAC])
 		mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]);
 
-	if (key_idx > 3)
+	if (key_idx > 5)
 		return -EINVAL;
 
 	/*
@@ -894,6 +905,10 @@ static int nl80211_new_key(struct sk_buff *skb, struct genl_info *info)
 		if (params.key_len != 13)
 			return -EINVAL;
 		break;
+	case WLAN_CIPHER_SUITE_AES_CMAC:
+		if (params.key_len != 16)
+			return -EINVAL;
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -928,7 +943,7 @@ static int nl80211_del_key(struct sk_buff *skb, struct genl_info *info)
 	if (info->attrs[NL80211_ATTR_KEY_IDX])
 		key_idx = nla_get_u8(info->attrs[NL80211_ATTR_KEY_IDX]);
 
-	if (key_idx > 3)
+	if (key_idx > 5)
 		return -EINVAL;
 
 	if (info->attrs[NL80211_ATTR_MAC])
-- 
cgit v1.2.3


From 54604d3a827b37525ef017adba313c7112e0f484 Mon Sep 17 00:00:00 2001
From: Jouni Malinen <j@w1.fi>
Date: Thu, 8 Jan 2009 13:32:03 +0200
Subject: mac80211: 802.11w - WEXT parameter for setting mgmt cipher

Add a new IW_AUTH parameter for setting cipher suite for
multicast/broadcast management frames. This is for full-mac drivers
that take care of RSN IE generation for (re)association request frames.

Signed-off-by: Jouni Malinen <j@w1.fi>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/wireless.h | 5 ++++-
 net/mac80211/wext.c      | 1 +
 2 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/wireless.h b/include/linux/wireless.h
index d7958f9b52cb..d426dce47e7c 100644
--- a/include/linux/wireless.h
+++ b/include/linux/wireless.h
@@ -577,18 +577,21 @@
 #define IW_AUTH_RX_UNENCRYPTED_EAPOL	8
 #define IW_AUTH_ROAMING_CONTROL		9
 #define IW_AUTH_PRIVACY_INVOKED		10
+#define IW_AUTH_CIPHER_GROUP_MGMT	11
 
 /* IW_AUTH_WPA_VERSION values (bit field) */
 #define IW_AUTH_WPA_VERSION_DISABLED	0x00000001
 #define IW_AUTH_WPA_VERSION_WPA		0x00000002
 #define IW_AUTH_WPA_VERSION_WPA2	0x00000004
 
-/* IW_AUTH_PAIRWISE_CIPHER and IW_AUTH_GROUP_CIPHER values (bit field) */
+/* IW_AUTH_PAIRWISE_CIPHER, IW_AUTH_GROUP_CIPHER, and IW_AUTH_CIPHER_GROUP_MGMT
+ * values (bit field) */
 #define IW_AUTH_CIPHER_NONE	0x00000001
 #define IW_AUTH_CIPHER_WEP40	0x00000002
 #define IW_AUTH_CIPHER_TKIP	0x00000004
 #define IW_AUTH_CIPHER_CCMP	0x00000008
 #define IW_AUTH_CIPHER_WEP104	0x00000010
+#define IW_AUTH_CIPHER_AES_CMAC	0x00000020
 
 /* IW_AUTH_KEY_MGMT values (bit field) */
 #define IW_AUTH_KEY_MGMT_802_1X	1
diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c
index 1e5b29bdb3a7..c3b2dd5706fb 100644
--- a/net/mac80211/wext.c
+++ b/net/mac80211/wext.c
@@ -927,6 +927,7 @@ static int ieee80211_ioctl_siwauth(struct net_device *dev,
 	case IW_AUTH_WPA_ENABLED:
 	case IW_AUTH_RX_UNENCRYPTED_EAPOL:
 	case IW_AUTH_KEY_MGMT:
+	case IW_AUTH_CIPHER_GROUP_MGMT:
 		break;
 	case IW_AUTH_CIPHER_PAIRWISE:
 		if (sdata->vif.type == NL80211_IFTYPE_STATION) {
-- 
cgit v1.2.3


From 22787dbaa3b952602542506e0426ea6d5f104042 Mon Sep 17 00:00:00 2001
From: Jouni Malinen <j@w1.fi>
Date: Thu, 8 Jan 2009 13:32:04 +0200
Subject: mac80211: 802.11w - WEXT configuration for IGTK

Added new SIOCSIWENCODEEXT algorithm for configuring BIP (AES-CMAC)
keys (IGTK).

Signed-off-by: Jouni Malinen <j@w1.fi>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/wireless.h |  1 +
 net/mac80211/wext.c      | 62 +++++++++++++++++++++++++++++++++++++-----------
 2 files changed, 49 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/wireless.h b/include/linux/wireless.h
index d426dce47e7c..5d1f3fbffd77 100644
--- a/include/linux/wireless.h
+++ b/include/linux/wireless.h
@@ -615,6 +615,7 @@
 #define IW_ENCODE_ALG_TKIP	2
 #define IW_ENCODE_ALG_CCMP	3
 #define IW_ENCODE_ALG_PMK	4
+#define IW_ENCODE_ALG_AES_CMAC	5
 /* struct iw_encode_ext ->ext_flags */
 #define IW_ENCODE_EXT_TX_SEQ_VALID	0x00000001
 #define IW_ENCODE_EXT_RX_SEQ_VALID	0x00000002
diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c
index c3b2dd5706fb..7ba1d5ba3afa 100644
--- a/net/mac80211/wext.c
+++ b/net/mac80211/wext.c
@@ -37,7 +37,14 @@ static int ieee80211_set_encryption(struct ieee80211_sub_if_data *sdata, u8 *sta
 	struct ieee80211_key *key;
 	int err;
 
-	if (idx < 0 || idx >= NUM_DEFAULT_KEYS) {
+	if (alg == ALG_AES_CMAC) {
+		if (idx < NUM_DEFAULT_KEYS ||
+		    idx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS) {
+			printk(KERN_DEBUG "%s: set_encrypt - invalid idx=%d "
+			       "(BIP)\n", sdata->dev->name, idx);
+			return -EINVAL;
+		}
+	} else if (idx < 0 || idx >= NUM_DEFAULT_KEYS) {
 		printk(KERN_DEBUG "%s: set_encrypt - invalid idx=%d\n",
 		       sdata->dev->name, idx);
 		return -EINVAL;
@@ -103,6 +110,9 @@ static int ieee80211_set_encryption(struct ieee80211_sub_if_data *sdata, u8 *sta
 
 		if (set_tx_key || (!sta && !sdata->default_key && key))
 			ieee80211_set_default_key(sdata, idx);
+		if (alg == ALG_AES_CMAC &&
+		    (set_tx_key || (!sta && !sdata->default_mgmt_key && key)))
+			ieee80211_set_default_mgmt_key(sdata, idx);
 	}
 
  out_unlock:
@@ -1048,6 +1058,9 @@ static int ieee80211_ioctl_siwencodeext(struct net_device *dev,
 	case IW_ENCODE_ALG_CCMP:
 		alg = ALG_CCMP;
 		break;
+	case IW_ENCODE_ALG_AES_CMAC:
+		alg = ALG_AES_CMAC;
+		break;
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -1056,20 +1069,41 @@ static int ieee80211_ioctl_siwencodeext(struct net_device *dev,
 		remove = 1;
 
 	idx = erq->flags & IW_ENCODE_INDEX;
-	if (idx < 1 || idx > 4) {
-		idx = -1;
-		if (!sdata->default_key)
-			idx = 0;
-		else for (i = 0; i < NUM_DEFAULT_KEYS; i++) {
-			if (sdata->default_key == sdata->keys[i]) {
-				idx = i;
-				break;
+	if (alg == ALG_AES_CMAC) {
+		if (idx < NUM_DEFAULT_KEYS + 1 ||
+		    idx > NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS) {
+			idx = -1;
+			if (!sdata->default_mgmt_key)
+				idx = 0;
+			else for (i = NUM_DEFAULT_KEYS;
+				  i < NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS;
+				  i++) {
+				if (sdata->default_mgmt_key == sdata->keys[i])
+				{
+					idx = i;
+					break;
+				}
 			}
-		}
-		if (idx < 0)
-			return -EINVAL;
-	} else
-		idx--;
+			if (idx < 0)
+				return -EINVAL;
+		} else
+			idx--;
+	} else {
+		if (idx < 1 || idx > 4) {
+			idx = -1;
+			if (!sdata->default_key)
+				idx = 0;
+			else for (i = 0; i < NUM_DEFAULT_KEYS; i++) {
+				if (sdata->default_key == sdata->keys[i]) {
+					idx = i;
+					break;
+				}
+			}
+			if (idx < 0)
+				return -EINVAL;
+		} else
+			idx--;
+	}
 
 	return ieee80211_set_encryption(sdata, ext->addr.sa_data, idx, alg,
 					remove,
-- 
cgit v1.2.3


From fdfacf0ae2e8339098b1164d2317b792d7662c0a Mon Sep 17 00:00:00 2001
From: Jouni Malinen <j@w1.fi>
Date: Thu, 8 Jan 2009 13:32:05 +0200
Subject: mac80211: 802.11w - Configuration of MFP disabled/optional/required

Add new WEXT IW_AUTH_* parameter for setting MFP
disabled/optional/required.

Signed-off-by: Jouni Malinen <j@w1.fi>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/wireless.h   | 6 ++++++
 net/mac80211/ieee80211_i.h | 6 ++++++
 net/mac80211/mlme.c        | 4 ++++
 net/mac80211/wext.c        | 7 +++++++
 4 files changed, 23 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/wireless.h b/include/linux/wireless.h
index 5d1f3fbffd77..cb24204851f7 100644
--- a/include/linux/wireless.h
+++ b/include/linux/wireless.h
@@ -578,6 +578,7 @@
 #define IW_AUTH_ROAMING_CONTROL		9
 #define IW_AUTH_PRIVACY_INVOKED		10
 #define IW_AUTH_CIPHER_GROUP_MGMT	11
+#define IW_AUTH_MFP			12
 
 /* IW_AUTH_WPA_VERSION values (bit field) */
 #define IW_AUTH_WPA_VERSION_DISABLED	0x00000001
@@ -607,6 +608,11 @@
 #define IW_AUTH_ROAMING_DISABLE	1	/* user space program used for roaming
 					 * control */
 
+/* IW_AUTH_MFP (management frame protection) values */
+#define IW_AUTH_MFP_DISABLED	0	/* MFP disabled */
+#define IW_AUTH_MFP_OPTIONAL	1	/* MFP optional */
+#define IW_AUTH_MFP_REQUIRED	2	/* MFP required */
+
 /* SIOCSIWENCODEEXT definitions */
 #define IW_ENCODE_SEQ_MAX_SIZE	8
 /* struct iw_encode_ext ->alg */
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 8c3245717c55..212c732fbba7 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -320,6 +320,12 @@ struct ieee80211_if_sta {
 	int auth_alg; /* currently used IEEE 802.11 authentication algorithm */
 	int auth_transaction;
 
+	enum {
+		IEEE80211_MFP_DISABLED,
+		IEEE80211_MFP_OPTIONAL,
+		IEEE80211_MFP_REQUIRED
+	} mfp; /* management frame protection */
+
 	unsigned long ibss_join_req;
 	struct sk_buff *probe_resp; /* ProbeResp template for IBSS */
 	u32 supp_rates_bits[IEEE80211_NUM_BANDS];
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index bc8a7f1a6a15..42c5f981c715 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -2317,6 +2317,10 @@ static int ieee80211_sta_config_auth(struct ieee80211_sub_if_data *sdata,
 					       selected->ssid_len);
 		ieee80211_sta_set_bssid(sdata, selected->bssid);
 		ieee80211_sta_def_wmm_params(sdata, selected);
+		if (sdata->u.sta.mfp == IEEE80211_MFP_REQUIRED)
+			sdata->u.sta.flags |= IEEE80211_STA_MFP_ENABLED;
+		else
+			sdata->u.sta.flags &= ~IEEE80211_STA_MFP_ENABLED;
 
 		/* Send out direct probe if no probe resp was received or
 		 * the one we have is outdated
diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c
index 7ba1d5ba3afa..2dd387495dfe 100644
--- a/net/mac80211/wext.c
+++ b/net/mac80211/wext.c
@@ -975,6 +975,13 @@ static int ieee80211_ioctl_siwauth(struct net_device *dev,
 		else
 			ret = -EOPNOTSUPP;
 		break;
+	case IW_AUTH_MFP:
+		if (sdata->vif.type == NL80211_IFTYPE_STATION ||
+		    sdata->vif.type == NL80211_IFTYPE_ADHOC)
+			sdata->u.sta.mfp = data->value;
+		else
+			ret = -EOPNOTSUPP;
+		break;
 	default:
 		ret = -EOPNOTSUPP;
 		break;
-- 
cgit v1.2.3


From fea147328908b7e2bfcaf9dc4377909d5507ca35 Mon Sep 17 00:00:00 2001
From: Jouni Malinen <j@w1.fi>
Date: Thu, 8 Jan 2009 13:32:06 +0200
Subject: mac80211: 802.11w - SA Query processing

Process SA Query Requests for client mode in mac80211. AP side
processing of SA Query Response frames is in user space (hostapd).

Signed-off-by: Jouni Malinen <jouni.malinen@atheros.com>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/ieee80211.h | 14 ++++++++++
 net/mac80211/rx.c         | 69 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 83 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index df98a8a549a2..9fe1948d28d3 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -527,6 +527,8 @@ struct ieee80211_tim_ie {
 	u8 virtual_map[0];
 } __attribute__ ((packed));
 
+#define WLAN_SA_QUERY_TR_ID_LEN 16
+
 struct ieee80211_mgmt {
 	__le16 frame_control;
 	__le16 duration;
@@ -646,6 +648,10 @@ struct ieee80211_mgmt {
 					u8 action_code;
 					u8 variable[0];
 				} __attribute__((packed)) mesh_action;
+				struct {
+					u8 action;
+					u8 trans_id[WLAN_SA_QUERY_TR_ID_LEN];
+				} __attribute__ ((packed)) sa_query;
 			} u;
 		} __attribute__ ((packed)) action;
 	} u;
@@ -1041,6 +1047,7 @@ enum ieee80211_category {
 	WLAN_CATEGORY_DLS = 2,
 	WLAN_CATEGORY_BACK = 3,
 	WLAN_CATEGORY_PUBLIC = 4,
+	WLAN_CATEGORY_SA_QUERY = 8,
 	WLAN_CATEGORY_WMM = 17,
 };
 
@@ -1129,6 +1136,13 @@ enum ieee80211_back_parties {
 	WLAN_BACK_TIMER = 2,
 };
 
+/* SA Query action */
+enum ieee80211_sa_query_action {
+	WLAN_ACTION_SA_QUERY_REQUEST = 0,
+	WLAN_ACTION_SA_QUERY_RESPONSE = 1,
+};
+
+
 /* A-MSDU 802.11n */
 #define IEEE80211_QOS_CONTROL_A_MSDU_PRESENT 0x0080
 
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index abc3aa583ca6..63db89aef3e4 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1667,6 +1667,57 @@ ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx)
 	return RX_CONTINUE;
 }
 
+void ieee80211_process_sa_query_req(struct ieee80211_sub_if_data *sdata,
+				    struct ieee80211_mgmt *mgmt,
+				    size_t len)
+{
+	struct ieee80211_local *local = sdata->local;
+	struct sk_buff *skb;
+	struct ieee80211_mgmt *resp;
+
+	if (compare_ether_addr(mgmt->da, sdata->dev->dev_addr) != 0) {
+		/* Not to own unicast address */
+		return;
+	}
+
+	if (compare_ether_addr(mgmt->sa, sdata->u.sta.bssid) != 0 ||
+	    compare_ether_addr(mgmt->bssid, sdata->u.sta.bssid) != 0) {
+		/* Not from the current AP. */
+		return;
+	}
+
+	if (sdata->u.sta.state == IEEE80211_STA_MLME_ASSOCIATE) {
+		/* Association in progress; ignore SA Query */
+		return;
+	}
+
+	if (len < 24 + 1 + sizeof(resp->u.action.u.sa_query)) {
+		/* Too short SA Query request frame */
+		return;
+	}
+
+	skb = dev_alloc_skb(sizeof(*resp) + local->hw.extra_tx_headroom);
+	if (skb == NULL)
+		return;
+
+	skb_reserve(skb, local->hw.extra_tx_headroom);
+	resp = (struct ieee80211_mgmt *) skb_put(skb, 24);
+	memset(resp, 0, 24);
+	memcpy(resp->da, mgmt->sa, ETH_ALEN);
+	memcpy(resp->sa, sdata->dev->dev_addr, ETH_ALEN);
+	memcpy(resp->bssid, sdata->u.sta.bssid, ETH_ALEN);
+	resp->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
+					  IEEE80211_STYPE_ACTION);
+	skb_put(skb, 1 + sizeof(resp->u.action.u.sa_query));
+	resp->u.action.category = WLAN_CATEGORY_SA_QUERY;
+	resp->u.action.u.sa_query.action = WLAN_ACTION_SA_QUERY_RESPONSE;
+	memcpy(resp->u.action.u.sa_query.trans_id,
+	       mgmt->u.action.u.sa_query.trans_id,
+	       WLAN_SA_QUERY_TR_ID_LEN);
+
+	ieee80211_tx_skb(sdata, skb, 1);
+}
+
 static ieee80211_rx_result debug_noinline
 ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 {
@@ -1743,6 +1794,24 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 			break;
 		}
 		break;
+	case WLAN_CATEGORY_SA_QUERY:
+		if (len < (IEEE80211_MIN_ACTION_SIZE +
+			   sizeof(mgmt->u.action.u.sa_query)))
+			return RX_DROP_MONITOR;
+		switch (mgmt->u.action.u.sa_query.action) {
+		case WLAN_ACTION_SA_QUERY_REQUEST:
+			if (sdata->vif.type != NL80211_IFTYPE_STATION)
+				return RX_DROP_MONITOR;
+			ieee80211_process_sa_query_req(sdata, mgmt, len);
+			break;
+		case WLAN_ACTION_SA_QUERY_RESPONSE:
+			/*
+			 * SA Query response is currently only used in AP mode
+			 * and it is processed in user space.
+			 */
+			return RX_CONTINUE;
+		}
+		break;
 	default:
 		return RX_CONTINUE;
 	}
-- 
cgit v1.2.3


From 63a5ab82255a4ff5d0783f16427210f1d45d7ec8 Mon Sep 17 00:00:00 2001
From: Jouni Malinen <j@w1.fi>
Date: Thu, 8 Jan 2009 13:32:09 +0200
Subject: mac80211: 802.11w - Implement Association Comeback processing

When MFP is enabled, the AP does not allow a STA to associate if an
existing security association exists without first going through SA
Query process. When this happens, the association request is denied
with a new status code ("temporarily rejected") ans Association
Comeback IE is used to notify when the association may be tried again
(i.e., when the SA Query procedure has timed out).

Use the comeback time to update the mac80211 client MLME timer for
next association attempt to minimize waiting time if association is
temporarily rejected.

Signed-off-by: Jouni Malinen <jouni.malinen@atheros.com>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/ieee80211.h  |  4 ++++
 net/mac80211/ieee80211_i.h |  2 ++
 net/mac80211/mlme.c        | 20 +++++++++++++++++---
 net/mac80211/util.c        |  4 ++++
 4 files changed, 27 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 9fe1948d28d3..7800e20f197f 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -914,6 +914,9 @@ enum ieee80211_statuscode {
 	/* 802.11g */
 	WLAN_STATUS_ASSOC_DENIED_NOSHORTTIME = 25,
 	WLAN_STATUS_ASSOC_DENIED_NODSSSOFDM = 26,
+	/* 802.11w */
+	WLAN_STATUS_ASSOC_REJECTED_TEMPORARILY = 30,
+	WLAN_STATUS_ROBUST_MGMT_FRAME_POLICY_VIOLATION = 31,
 	/* 802.11i */
 	WLAN_STATUS_INVALID_IE = 40,
 	WLAN_STATUS_INVALID_GROUP_CIPHER = 41,
@@ -1034,6 +1037,7 @@ enum ieee80211_eid {
 	/* 802.11i */
 	WLAN_EID_RSN = 48,
 	WLAN_EID_MMIE = 76 /* 802.11w */,
+	WLAN_EID_ASSOC_COMEBACK_TIME = 77,
 	WLAN_EID_WPA = 221,
 	WLAN_EID_GENERIC = 221,
 	WLAN_EID_VENDOR_SPECIFIC = 221,
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 212c732fbba7..9112c5247c35 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -820,6 +820,7 @@ struct ieee802_11_elems {
 	u8 *country_elem;
 	u8 *pwr_constr_elem;
 	u8 *quiet_elem; 	/* first quite element */
+	u8 *assoc_comeback;
 
 	/* length of them, respectively */
 	u8 ssid_len;
@@ -847,6 +848,7 @@ struct ieee802_11_elems {
 	u8 pwr_constr_elem_len;
 	u8 quiet_elem_len;
 	u8 num_of_quiet_elem;	/* can be more the one */
+	u8 assoc_comeback_len;
 };
 
 static inline struct ieee80211_local *hw_to_local(
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 42c5f981c715..82c598a83687 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1275,6 +1275,23 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
 	       sdata->dev->name, reassoc ? "Rea" : "A", mgmt->sa,
 	       capab_info, status_code, (u16)(aid & ~(BIT(15) | BIT(14))));
 
+	pos = mgmt->u.assoc_resp.variable;
+	ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), &elems);
+
+	if (status_code == WLAN_STATUS_ASSOC_REJECTED_TEMPORARILY &&
+	    elems.assoc_comeback && elems.assoc_comeback_len == 4) {
+		u32 tu, ms;
+		tu = get_unaligned_le32(elems.assoc_comeback);
+		ms = tu * 1024 / 1000;
+		printk(KERN_DEBUG "%s: AP rejected association temporarily; "
+		       "comeback duration %u TU (%u ms)\n",
+		       sdata->dev->name, tu, ms);
+		if (ms > IEEE80211_ASSOC_TIMEOUT)
+			mod_timer(&ifsta->timer,
+				  jiffies + msecs_to_jiffies(ms));
+		return;
+	}
+
 	if (status_code != WLAN_STATUS_SUCCESS) {
 		printk(KERN_DEBUG "%s: AP denied association (code=%d)\n",
 		       sdata->dev->name, status_code);
@@ -1290,9 +1307,6 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
 		       "set\n", sdata->dev->name, aid);
 	aid &= ~(BIT(15) | BIT(14));
 
-	pos = mgmt->u.assoc_resp.variable;
-	ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), &elems);
-
 	if (!elems.supp_rates) {
 		printk(KERN_DEBUG "%s: no SuppRates element in AssocResp\n",
 		       sdata->dev->name);
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 5cd430333f08..963e0473205c 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -653,6 +653,10 @@ void ieee802_11_parse_elems(u8 *start, size_t len,
 			elems->pwr_constr_elem = pos;
 			elems->pwr_constr_elem_len = elen;
 			break;
+		case WLAN_EID_ASSOC_COMEBACK_TIME:
+			elems->assoc_comeback = pos;
+			elems->assoc_comeback_len = elen;
+			break;
 		default:
 			break;
 		}
-- 
cgit v1.2.3


From d2b21f191753abd12c4063776cb1a3d635397509 Mon Sep 17 00:00:00 2001
From: Colin McCabe <colin@cozybit.com>
Date: Fri, 9 Jan 2009 14:58:09 -0800
Subject: libertas: if_spi, driver for libertas GSPI devices

Add initial support for libertas devices using a GSPI interface.  This has
been tested with the 8686.

GSPI is intended to be used on embedded systems. Board-specific parameters are
required (see libertas_spi.h).

Thanks to everyone who took a look at the earlier versions of the patch.

Signed-off-by: Colin McCabe <colin@cozybit.com>
Signed-off-by: Andrey Yurovsky <andrey@cozybit.com>
Acked-by: Dan Williams <dcbw@redhat.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/Kconfig           |    6 +
 drivers/net/wireless/libertas/Makefile |    2 +
 drivers/net/wireless/libertas/defs.h   |    2 +
 drivers/net/wireless/libertas/if_spi.c | 1203 ++++++++++++++++++++++++++++++++
 drivers/net/wireless/libertas/if_spi.h |  208 ++++++
 include/linux/spi/libertas_spi.h       |   25 +
 6 files changed, 1446 insertions(+)
 create mode 100644 drivers/net/wireless/libertas/if_spi.c
 create mode 100644 drivers/net/wireless/libertas/if_spi.h
 create mode 100644 include/linux/spi/libertas_spi.h

(limited to 'include/linux')

diff --git a/drivers/net/wireless/Kconfig b/drivers/net/wireless/Kconfig
index e4f9f747de88..2dddbd012a99 100644
--- a/drivers/net/wireless/Kconfig
+++ b/drivers/net/wireless/Kconfig
@@ -151,6 +151,12 @@ config LIBERTAS_SDIO
 	---help---
 	  A driver for Marvell Libertas 8385 and 8686 SDIO devices.
 
+config LIBERTAS_SPI
+	tristate "Marvell Libertas 8686 SPI 802.11b/g cards"
+	depends on LIBERTAS && SPI && GENERIC_GPIO
+	---help---
+	  A driver for Marvell Libertas 8686 SPI devices.
+
 config LIBERTAS_DEBUG
 	bool "Enable full debugging output in the Libertas module."
 	depends on LIBERTAS
diff --git a/drivers/net/wireless/libertas/Makefile b/drivers/net/wireless/libertas/Makefile
index 02080a3682a9..0b6918584503 100644
--- a/drivers/net/wireless/libertas/Makefile
+++ b/drivers/net/wireless/libertas/Makefile
@@ -4,8 +4,10 @@ libertas-objs := main.o wext.o rx.o tx.o cmd.o cmdresp.o scan.o 11d.o	\
 usb8xxx-objs += if_usb.o
 libertas_cs-objs += if_cs.o
 libertas_sdio-objs += if_sdio.o
+libertas_spi-objs += if_spi.o
 
 obj-$(CONFIG_LIBERTAS)     += libertas.o
 obj-$(CONFIG_LIBERTAS_USB) += usb8xxx.o
 obj-$(CONFIG_LIBERTAS_CS)  += libertas_cs.o
 obj-$(CONFIG_LIBERTAS_SDIO) += libertas_sdio.o
+obj-$(CONFIG_LIBERTAS_SPI) += libertas_spi.o
diff --git a/drivers/net/wireless/libertas/defs.h b/drivers/net/wireless/libertas/defs.h
index c364e4c01d1b..6388b05df4fc 100644
--- a/drivers/net/wireless/libertas/defs.h
+++ b/drivers/net/wireless/libertas/defs.h
@@ -41,6 +41,7 @@
 #define LBS_DEB_HEX	0x00200000
 #define LBS_DEB_SDIO	0x00400000
 #define LBS_DEB_SYSFS	0x00800000
+#define LBS_DEB_SPI	0x01000000
 
 extern unsigned int lbs_debug;
 
@@ -84,6 +85,7 @@ do { if ((lbs_debug & (grp)) == (grp)) \
 #define lbs_deb_thread(fmt, args...)    LBS_DEB_LL(LBS_DEB_THREAD, " thread", fmt, ##args)
 #define lbs_deb_sdio(fmt, args...)      LBS_DEB_LL(LBS_DEB_SDIO, " sdio", fmt, ##args)
 #define lbs_deb_sysfs(fmt, args...)     LBS_DEB_LL(LBS_DEB_SYSFS, " sysfs", fmt, ##args)
+#define lbs_deb_spi(fmt, args...)       LBS_DEB_LL(LBS_DEB_SPI, " spi", fmt, ##args)
 
 #define lbs_pr_info(format, args...) \
 	printk(KERN_INFO DRV_NAME": " format, ## args)
diff --git a/drivers/net/wireless/libertas/if_spi.c b/drivers/net/wireless/libertas/if_spi.c
new file mode 100644
index 000000000000..7c02ea314fd1
--- /dev/null
+++ b/drivers/net/wireless/libertas/if_spi.c
@@ -0,0 +1,1203 @@
+/*
+ *	linux/drivers/net/wireless/libertas/if_spi.c
+ *
+ *	Driver for Marvell SPI WLAN cards.
+ *
+ *	Copyright 2008 Analog Devices Inc.
+ *
+ *	Authors:
+ *	Andrey Yurovsky <andrey@cozybit.com>
+ *	Colin McCabe <colin@cozybit.com>
+ *
+ *	Inspired by if_sdio.c, Copyright 2007-2008 Pierre Ossman
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/moduleparam.h>
+#include <linux/firmware.h>
+#include <linux/gpio.h>
+#include <linux/jiffies.h>
+#include <linux/kthread.h>
+#include <linux/list.h>
+#include <linux/netdevice.h>
+#include <linux/spi/libertas_spi.h>
+#include <linux/spi/spi.h>
+
+#include "host.h"
+#include "decl.h"
+#include "defs.h"
+#include "dev.h"
+#include "if_spi.h"
+
+struct if_spi_packet {
+	struct list_head		list;
+	u16				blen;
+	u8				buffer[0] __attribute__((aligned(4)));
+};
+
+struct if_spi_card {
+	struct spi_device		*spi;
+	struct lbs_private		*priv;
+
+	char				helper_fw_name[FIRMWARE_NAME_MAX];
+	char				main_fw_name[FIRMWARE_NAME_MAX];
+
+	/* The card ID and card revision, as reported by the hardware. */
+	u16				card_id;
+	u8				card_rev;
+
+	/* Pin number for our GPIO chip-select. */
+	/* TODO: Once the generic SPI layer has some additional features, we
+	 * should take this out and use the normal chip select here.
+	 * We need support for chip select delays, and not dropping chipselect
+	 * after each word. */
+	int				gpio_cs;
+
+	/* The last time that we initiated an SPU operation */
+	unsigned long			prev_xfer_time;
+
+	int				use_dummy_writes;
+	unsigned long			spu_port_delay;
+	unsigned long			spu_reg_delay;
+
+	/* Handles all SPI communication (except for FW load) */
+	struct task_struct		*spi_thread;
+	int				run_thread;
+
+	/* Used to wake up the spi_thread */
+	struct semaphore		spi_ready;
+	struct semaphore		spi_thread_terminated;
+
+	u8				cmd_buffer[IF_SPI_CMD_BUF_SIZE];
+
+	/* A buffer of incoming packets from libertas core.
+	 * Since we can't sleep in hw_host_to_card, we have to buffer
+	 * them. */
+	struct list_head		cmd_packet_list;
+	struct list_head		data_packet_list;
+
+	/* Protects cmd_packet_list and data_packet_list */
+	spinlock_t			buffer_lock;
+};
+
+static void free_if_spi_card(struct if_spi_card *card)
+{
+	struct list_head *cursor, *next;
+	struct if_spi_packet *packet;
+
+	BUG_ON(card->run_thread);
+	list_for_each_safe(cursor, next, &card->cmd_packet_list) {
+		packet = container_of(cursor, struct if_spi_packet, list);
+		list_del(&packet->list);
+		kfree(packet);
+	}
+	list_for_each_safe(cursor, next, &card->data_packet_list) {
+		packet = container_of(cursor, struct if_spi_packet, list);
+		list_del(&packet->list);
+		kfree(packet);
+	}
+	spi_set_drvdata(card->spi, NULL);
+	kfree(card);
+}
+
+static struct chip_ident chip_id_to_device_name[] = {
+	{ .chip_id = 0x04, .name = 8385 },
+	{ .chip_id = 0x0b, .name = 8686 },
+};
+
+/*
+ * SPI Interface Unit Routines
+ *
+ * The SPU sits between the host and the WLAN module.
+ * All communication with the firmware is through SPU transactions.
+ *
+ * First we have to put a SPU register name on the bus. Then we can
+ * either read from or write to that register.
+ *
+ * For 16-bit transactions, byte order on the bus is big-endian.
+ * We don't have to worry about that here, though.
+ * The translation takes place in the SPI routines.
+ */
+
+static void spu_transaction_init(struct if_spi_card *card)
+{
+	if (!time_after(jiffies, card->prev_xfer_time + 1)) {
+		/* Unfortunately, the SPU requires a delay between successive
+		 * transactions. If our last transaction was more than a jiffy
+		 * ago, we have obviously already delayed enough.
+		 * If not, we have to busy-wait to be on the safe side. */
+		ndelay(400);
+	}
+	gpio_set_value(card->gpio_cs, 0); /* assert CS */
+}
+
+static void spu_transaction_finish(struct if_spi_card *card)
+{
+	gpio_set_value(card->gpio_cs, 1); /* drop CS */
+	card->prev_xfer_time = jiffies;
+}
+
+/* Write out a byte buffer to an SPI register,
+ * using a series of 16-bit transfers. */
+static int spu_write(struct if_spi_card *card, u16 reg, const u8 *buf, int len)
+{
+	int err = 0;
+	u16 reg_out = reg | IF_SPI_WRITE_OPERATION_MASK;
+
+	/* You must give an even number of bytes to the SPU, even if it
+	 * doesn't care about the last one.  */
+	BUG_ON(len & 0x1);
+
+	spu_transaction_init(card);
+
+	/* write SPU register index */
+	err = spi_write(card->spi, (u8 *)&reg_out, sizeof(u16));
+	if (err)
+		goto out;
+
+	err = spi_write(card->spi, buf, len);
+
+out:
+	spu_transaction_finish(card);
+	return err;
+}
+
+static inline int spu_write_u16(struct if_spi_card *card, u16 reg, u16 val)
+{
+	return spu_write(card, reg, (u8 *)&val, sizeof(u16));
+}
+
+static inline int spu_write_u32(struct if_spi_card *card, u16 reg, u32 val)
+{
+	/* The lower 16 bits are written first. */
+	u16 out[2];
+	out[0] = val & 0xffff;
+	out[1] = (val & 0xffff0000) >> 16;
+	return spu_write(card, reg, (u8 *)&out, sizeof(u32));
+}
+
+static inline int spu_reg_is_port_reg(u16 reg)
+{
+	switch (reg) {
+	case IF_SPI_IO_RDWRPORT_REG:
+	case IF_SPI_CMD_RDWRPORT_REG:
+	case IF_SPI_DATA_RDWRPORT_REG:
+		return 1;
+	default:
+		return 0;
+	}
+}
+
+static int spu_read(struct if_spi_card *card, u16 reg, u8 *buf, int len)
+{
+	unsigned int i, delay;
+	int err = 0;
+	u16 zero = 0;
+	u16 reg_out = reg | IF_SPI_READ_OPERATION_MASK;
+
+	/* You must take an even number of bytes from the SPU, even if you
+	 * don't care about the last one.  */
+	BUG_ON(len & 0x1);
+
+	spu_transaction_init(card);
+
+	/* write SPU register index */
+	err = spi_write(card->spi, (u8 *)&reg_out, sizeof(u16));
+	if (err)
+		goto out;
+
+	delay = spu_reg_is_port_reg(reg) ? card->spu_port_delay :
+						card->spu_reg_delay;
+	if (card->use_dummy_writes) {
+		/* Clock in dummy cycles while the SPU fills the FIFO */
+		for (i = 0; i < delay / 16; ++i) {
+			err = spi_write(card->spi, (u8 *)&zero, sizeof(u16));
+			if (err)
+				return err;
+		}
+	} else {
+		/* Busy-wait while the SPU fills the FIFO */
+		ndelay(100 + (delay * 10));
+	}
+
+	/* read in data */
+	err = spi_read(card->spi, buf, len);
+
+out:
+	spu_transaction_finish(card);
+	return err;
+}
+
+/* Read 16 bits from an SPI register */
+static inline int spu_read_u16(struct if_spi_card *card, u16 reg, u16 *val)
+{
+	return spu_read(card, reg, (u8 *)val, sizeof(u16));
+}
+
+/* Read 32 bits from an SPI register.
+ * The low 16 bits are read first. */
+static int spu_read_u32(struct if_spi_card *card, u16 reg, u32 *val)
+{
+	u16 buf[2];
+	int err;
+	err = spu_read(card, reg, (u8 *)buf, sizeof(u32));
+	if (!err)
+		*val = buf[0] | (buf[1] << 16);
+	return err;
+}
+
+/* Keep reading 16 bits from an SPI register until you get the correct result.
+ *
+ * If mask = 0, the correct result is any non-zero number.
+ * If mask != 0, the correct result is any number where
+ * number & target_mask == target
+ *
+ * Returns -ETIMEDOUT if a second passes without the correct result. */
+static int spu_wait_for_u16(struct if_spi_card *card, u16 reg,
+			u16 target_mask, u16 target)
+{
+	int err;
+	unsigned long timeout = jiffies + 5*HZ;
+	while (1) {
+		u16 val;
+		err = spu_read_u16(card, reg, &val);
+		if (err)
+			return err;
+		if (target_mask) {
+			if ((val & target_mask) == target)
+				return 0;
+		} else {
+			if (val)
+				return 0;
+		}
+		udelay(100);
+		if (time_after(jiffies, timeout)) {
+			lbs_pr_err("%s: timeout with val=%02x, "
+			       "target_mask=%02x, target=%02x\n",
+			       __func__, val, target_mask, target);
+			return -ETIMEDOUT;
+		}
+	}
+}
+
+/* Read 16 bits from an SPI register until you receive a specific value.
+ * Returns -ETIMEDOUT if a 4 tries pass without success. */
+static int spu_wait_for_u32(struct if_spi_card *card, u32 reg, u32 target)
+{
+	int err, try;
+	for (try = 0; try < 4; ++try) {
+		u32 val = 0;
+		err = spu_read_u32(card, reg, &val);
+		if (err)
+			return err;
+		if (val == target)
+			return 0;
+		mdelay(100);
+	}
+	return -ETIMEDOUT;
+}
+
+static int spu_set_interrupt_mode(struct if_spi_card *card,
+			   int suppress_host_int,
+			   int auto_int)
+{
+	int err = 0;
+
+	/* We can suppress a host interrupt by clearing the appropriate
+	 * bit in the "host interrupt status mask" register */
+	if (suppress_host_int) {
+		err = spu_write_u16(card, IF_SPI_HOST_INT_STATUS_MASK_REG, 0);
+		if (err)
+			return err;
+	} else {
+		err = spu_write_u16(card, IF_SPI_HOST_INT_STATUS_MASK_REG,
+			      IF_SPI_HISM_TX_DOWNLOAD_RDY |
+			      IF_SPI_HISM_RX_UPLOAD_RDY |
+			      IF_SPI_HISM_CMD_DOWNLOAD_RDY |
+			      IF_SPI_HISM_CARDEVENT |
+			      IF_SPI_HISM_CMD_UPLOAD_RDY);
+		if (err)
+			return err;
+	}
+
+	/* If auto-interrupts are on, the completion of certain transactions
+	 * will trigger an interrupt automatically. If auto-interrupts
+	 * are off, we need to set the "Card Interrupt Cause" register to
+	 * trigger a card interrupt. */
+	if (auto_int) {
+		err = spu_write_u16(card, IF_SPI_HOST_INT_CTRL_REG,
+				IF_SPI_HICT_TX_DOWNLOAD_OVER_AUTO |
+				IF_SPI_HICT_RX_UPLOAD_OVER_AUTO |
+				IF_SPI_HICT_CMD_DOWNLOAD_OVER_AUTO |
+				IF_SPI_HICT_CMD_UPLOAD_OVER_AUTO);
+		if (err)
+			return err;
+	} else {
+		err = spu_write_u16(card, IF_SPI_HOST_INT_STATUS_MASK_REG, 0);
+		if (err)
+			return err;
+	}
+	return err;
+}
+
+static int spu_get_chip_revision(struct if_spi_card *card,
+				  u16 *card_id, u8 *card_rev)
+{
+	int err = 0;
+	u32 dev_ctrl;
+	err = spu_read_u32(card, IF_SPI_DEVICEID_CTRL_REG, &dev_ctrl);
+	if (err)
+		return err;
+	*card_id = IF_SPI_DEVICEID_CTRL_REG_TO_CARD_ID(dev_ctrl);
+	*card_rev = IF_SPI_DEVICEID_CTRL_REG_TO_CARD_REV(dev_ctrl);
+	return err;
+}
+
+static int spu_set_bus_mode(struct if_spi_card *card, u16 mode)
+{
+	int err = 0;
+	u16 rval;
+	/* set bus mode */
+	err = spu_write_u16(card, IF_SPI_SPU_BUS_MODE_REG, mode);
+	if (err)
+		return err;
+	/* Check that we were able to read back what we just wrote. */
+	err = spu_read_u16(card, IF_SPI_SPU_BUS_MODE_REG, &rval);
+	if (err)
+		return err;
+	if (rval != mode) {
+		lbs_pr_err("Can't read bus mode register.\n");
+		return -EIO;
+	}
+	return 0;
+}
+
+static int spu_init(struct if_spi_card *card, int use_dummy_writes)
+{
+	int err = 0;
+	u32 delay;
+
+	/* We have to start up in timed delay mode so that we can safely
+	 * read the Delay Read Register. */
+	card->use_dummy_writes = 0;
+	err = spu_set_bus_mode(card,
+				IF_SPI_BUS_MODE_SPI_CLOCK_PHASE_RISING |
+				IF_SPI_BUS_MODE_DELAY_METHOD_TIMED |
+				IF_SPI_BUS_MODE_16_BIT_ADDRESS_16_BIT_DATA);
+	if (err)
+		return err;
+	card->spu_port_delay = 1000;
+	card->spu_reg_delay = 1000;
+	err = spu_read_u32(card, IF_SPI_DELAY_READ_REG, &delay);
+	if (err)
+		return err;
+	card->spu_port_delay = delay & 0x0000ffff;
+	card->spu_reg_delay = (delay & 0xffff0000) >> 16;
+
+	/* If dummy clock delay mode has been requested, switch to it now */
+	if (use_dummy_writes) {
+		card->use_dummy_writes = 1;
+		err = spu_set_bus_mode(card,
+				IF_SPI_BUS_MODE_SPI_CLOCK_PHASE_RISING |
+				IF_SPI_BUS_MODE_DELAY_METHOD_DUMMY_CLOCK |
+				IF_SPI_BUS_MODE_16_BIT_ADDRESS_16_BIT_DATA);
+		if (err)
+			return err;
+	}
+
+	lbs_deb_spi("Initialized SPU unit. "
+		    "spu_port_delay=0x%04lx, spu_reg_delay=0x%04lx\n",
+		    card->spu_port_delay, card->spu_reg_delay);
+	return err;
+}
+
+/*
+ * Firmware Loading
+ */
+
+static int if_spi_prog_helper_firmware(struct if_spi_card *card)
+{
+	int err = 0;
+	const struct firmware *firmware = NULL;
+	int bytes_remaining;
+	const u8 *fw;
+	u8 temp[HELPER_FW_LOAD_CHUNK_SZ];
+	struct spi_device *spi = card->spi;
+
+	lbs_deb_enter(LBS_DEB_SPI);
+
+	err = spu_set_interrupt_mode(card, 1, 0);
+	if (err)
+		goto out;
+	/* Get helper firmware image */
+	err = request_firmware(&firmware, card->helper_fw_name, &spi->dev);
+	if (err) {
+		lbs_pr_err("request_firmware failed with err = %d\n", err);
+		goto out;
+	}
+	bytes_remaining = firmware->size;
+	fw = firmware->data;
+
+	/* Load helper firmware image */
+	while (bytes_remaining > 0) {
+		/* Scratch pad 1 should contain the number of bytes we
+		 * want to download to the firmware */
+		err = spu_write_u16(card, IF_SPI_SCRATCH_1_REG,
+					HELPER_FW_LOAD_CHUNK_SZ);
+		if (err)
+			goto release_firmware;
+
+		err = spu_wait_for_u16(card, IF_SPI_HOST_INT_STATUS_REG,
+					IF_SPI_HIST_CMD_DOWNLOAD_RDY,
+					IF_SPI_HIST_CMD_DOWNLOAD_RDY);
+		if (err)
+			goto release_firmware;
+
+		/* Feed the data into the command read/write port reg
+		 * in chunks of 64 bytes */
+		memset(temp, 0, sizeof(temp));
+		memcpy(temp, fw,
+		       min(bytes_remaining, HELPER_FW_LOAD_CHUNK_SZ));
+		mdelay(10);
+		err = spu_write(card, IF_SPI_CMD_RDWRPORT_REG,
+					temp, HELPER_FW_LOAD_CHUNK_SZ);
+		if (err)
+			goto release_firmware;
+
+		/* Interrupt the boot code */
+		err = spu_write_u16(card, IF_SPI_HOST_INT_STATUS_REG, 0);
+		if (err)
+			goto release_firmware;
+		err = spu_write_u16(card, IF_SPI_CARD_INT_CAUSE_REG,
+				       IF_SPI_CIC_CMD_DOWNLOAD_OVER);
+		if (err)
+			goto release_firmware;
+		bytes_remaining -= HELPER_FW_LOAD_CHUNK_SZ;
+		fw += HELPER_FW_LOAD_CHUNK_SZ;
+	}
+
+	/* Once the helper / single stage firmware download is complete,
+	 * write 0 to scratch pad 1 and interrupt the
+	 * bootloader. This completes the helper download. */
+	err = spu_write_u16(card, IF_SPI_SCRATCH_1_REG, FIRMWARE_DNLD_OK);
+	if (err)
+		goto release_firmware;
+	err = spu_write_u16(card, IF_SPI_HOST_INT_STATUS_REG, 0);
+	if (err)
+		goto release_firmware;
+	err = spu_write_u16(card, IF_SPI_CARD_INT_CAUSE_REG,
+				IF_SPI_CIC_CMD_DOWNLOAD_OVER);
+		goto release_firmware;
+
+	lbs_deb_spi("waiting for helper to boot...\n");
+
+release_firmware:
+	release_firmware(firmware);
+out:
+	if (err)
+		lbs_pr_err("failed to load helper firmware (err=%d)\n", err);
+	lbs_deb_leave_args(LBS_DEB_SPI, "err %d", err);
+	return err;
+}
+
+/* Returns the length of the next packet the firmware expects us to send
+ * Sets crc_err if the previous transfer had a CRC error. */
+static int if_spi_prog_main_firmware_check_len(struct if_spi_card *card,
+						int *crc_err)
+{
+	u16 len;
+	int err = 0;
+
+	/* wait until the host interrupt status register indicates
+	 * that we are ready to download */
+	err = spu_wait_for_u16(card, IF_SPI_HOST_INT_STATUS_REG,
+				IF_SPI_HIST_CMD_DOWNLOAD_RDY,
+				IF_SPI_HIST_CMD_DOWNLOAD_RDY);
+	if (err) {
+		lbs_pr_err("timed out waiting for host_int_status\n");
+		return err;
+	}
+
+	/* Ask the device how many bytes of firmware it wants. */
+	err = spu_read_u16(card, IF_SPI_SCRATCH_1_REG, &len);
+	if (err)
+		return err;
+
+	if (len > IF_SPI_CMD_BUF_SIZE) {
+		lbs_pr_err("firmware load device requested a larger "
+			   "tranfer than we are prepared to "
+			   "handle. (len = %d)\n", len);
+		return -EIO;
+	}
+	if (len & 0x1) {
+		lbs_deb_spi("%s: crc error\n", __func__);
+		len &= ~0x1;
+		*crc_err = 1;
+	} else
+		*crc_err = 0;
+
+	return len;
+}
+
+static int if_spi_prog_main_firmware(struct if_spi_card *card)
+{
+	int len, prev_len;
+	int bytes, crc_err = 0, err = 0;
+	const struct firmware *firmware = NULL;
+	const u8 *fw;
+	struct spi_device *spi = card->spi;
+	u16 num_crc_errs;
+
+	lbs_deb_enter(LBS_DEB_SPI);
+
+	err = spu_set_interrupt_mode(card, 1, 0);
+	if (err)
+		goto out;
+
+	/* Get firmware image */
+	err = request_firmware(&firmware, card->main_fw_name, &spi->dev);
+	if (err) {
+		lbs_pr_err("%s: can't get firmware '%s' from kernel. "
+			"err = %d\n", __func__, card->main_fw_name, err);
+		goto out;
+	}
+
+	err = spu_wait_for_u16(card, IF_SPI_SCRATCH_1_REG, 0, 0);
+	if (err) {
+		lbs_pr_err("%s: timed out waiting for initial "
+			   "scratch reg = 0\n", __func__);
+		goto release_firmware;
+	}
+
+	num_crc_errs = 0;
+	prev_len = 0;
+	bytes = firmware->size;
+	fw = firmware->data;
+	while ((len = if_spi_prog_main_firmware_check_len(card, &crc_err))) {
+		if (len < 0) {
+			err = len;
+			goto release_firmware;
+		}
+		if (bytes < 0) {
+			/* If there are no more bytes left, we would normally
+			 * expect to have terminated with len = 0 */
+			lbs_pr_err("Firmware load wants more bytes "
+				   "than we have to offer.\n");
+			break;
+		}
+		if (crc_err) {
+			/* Previous transfer failed. */
+			if (++num_crc_errs > MAX_MAIN_FW_LOAD_CRC_ERR) {
+				lbs_pr_err("Too many CRC errors encountered "
+					   "in firmware load.\n");
+				err = -EIO;
+				goto release_firmware;
+			}
+		} else {
+			/* Previous transfer succeeded. Advance counters. */
+			bytes -= prev_len;
+			fw += prev_len;
+		}
+		if (bytes < len) {
+			memset(card->cmd_buffer, 0, len);
+			memcpy(card->cmd_buffer, fw, bytes);
+		} else
+			memcpy(card->cmd_buffer, fw, len);
+
+		err = spu_write_u16(card, IF_SPI_HOST_INT_STATUS_REG, 0);
+		if (err)
+			goto release_firmware;
+		err = spu_write(card, IF_SPI_CMD_RDWRPORT_REG,
+				card->cmd_buffer, len);
+		if (err)
+			goto release_firmware;
+		err = spu_write_u16(card, IF_SPI_CARD_INT_CAUSE_REG ,
+					IF_SPI_CIC_CMD_DOWNLOAD_OVER);
+		if (err)
+			goto release_firmware;
+		prev_len = len;
+	}
+	if (bytes > prev_len) {
+		lbs_pr_err("firmware load wants fewer bytes than "
+			   "we have to offer.\n");
+	}
+
+	/* Confirm firmware download */
+	err = spu_wait_for_u32(card, IF_SPI_SCRATCH_4_REG,
+					SUCCESSFUL_FW_DOWNLOAD_MAGIC);
+	if (err) {
+		lbs_pr_err("failed to confirm the firmware download\n");
+		goto release_firmware;
+	}
+
+release_firmware:
+	release_firmware(firmware);
+
+out:
+	if (err)
+		lbs_pr_err("failed to load firmware (err=%d)\n", err);
+	lbs_deb_leave_args(LBS_DEB_SPI, "err %d", err);
+	return err;
+}
+
+/*
+ * SPI Transfer Thread
+ *
+ * The SPI thread handles all SPI transfers, so there is no need for a lock.
+ */
+
+/* Move a command from the card to the host */
+static int if_spi_c2h_cmd(struct if_spi_card *card)
+{
+	struct lbs_private *priv = card->priv;
+	unsigned long flags;
+	int err = 0;
+	u16 len;
+	u8 i;
+
+	/* We need a buffer big enough to handle whatever people send to
+	 * hw_host_to_card */
+	BUILD_BUG_ON(IF_SPI_CMD_BUF_SIZE < LBS_CMD_BUFFER_SIZE);
+	BUILD_BUG_ON(IF_SPI_CMD_BUF_SIZE < LBS_UPLD_SIZE);
+
+	/* It's just annoying if the buffer size isn't a multiple of 4, because
+	 * then we might have len <  IF_SPI_CMD_BUF_SIZE but
+	 * ALIGN(len, 4) > IF_SPI_CMD_BUF_SIZE */
+	BUILD_BUG_ON(IF_SPI_CMD_BUF_SIZE % 4 != 0);
+
+	lbs_deb_enter(LBS_DEB_SPI);
+
+	/* How many bytes are there to read? */
+	err = spu_read_u16(card, IF_SPI_SCRATCH_2_REG, &len);
+	if (err)
+		goto out;
+	if (!len) {
+		lbs_pr_err("%s: error: card has no data for host\n",
+			   __func__);
+		err = -EINVAL;
+		goto out;
+	} else if (len > IF_SPI_CMD_BUF_SIZE) {
+		lbs_pr_err("%s: error: response packet too large: "
+			   "%d bytes, but maximum is %d\n",
+			   __func__, len, IF_SPI_CMD_BUF_SIZE);
+		err = -EINVAL;
+		goto out;
+	}
+
+	/* Read the data from the WLAN module into our command buffer */
+	err = spu_read(card, IF_SPI_CMD_RDWRPORT_REG,
+				card->cmd_buffer, ALIGN(len, 4));
+	if (err)
+		goto out;
+
+	spin_lock_irqsave(&priv->driver_lock, flags);
+	i = (priv->resp_idx == 0) ? 1 : 0;
+	BUG_ON(priv->resp_len[i]);
+	priv->resp_len[i] = len;
+	memcpy(priv->resp_buf[i], card->cmd_buffer, len);
+	lbs_notify_command_response(priv, i);
+	spin_unlock_irqrestore(&priv->driver_lock, flags);
+
+out:
+	if (err)
+		lbs_pr_err("%s: err=%d\n", __func__, err);
+	lbs_deb_leave(LBS_DEB_SPI);
+	return err;
+}
+
+/* Move data from the card to the host */
+static int if_spi_c2h_data(struct if_spi_card *card)
+{
+	struct sk_buff *skb;
+	char *data;
+	u16 len;
+	int err = 0;
+
+	lbs_deb_enter(LBS_DEB_SPI);
+
+	/* How many bytes are there to read? */
+	err = spu_read_u16(card, IF_SPI_SCRATCH_1_REG, &len);
+	if (err)
+		goto out;
+	if (!len) {
+		lbs_pr_err("%s: error: card has no data for host\n",
+			   __func__);
+		err = -EINVAL;
+		goto out;
+	} else if (len > MRVDRV_ETH_RX_PACKET_BUFFER_SIZE) {
+		lbs_pr_err("%s: error: card has %d bytes of data, but "
+			   "our maximum skb size is %u\n",
+			   __func__, len, MRVDRV_ETH_RX_PACKET_BUFFER_SIZE);
+		err = -EINVAL;
+		goto out;
+	}
+
+	/* TODO: should we allocate a smaller skb if we have less data? */
+	skb = dev_alloc_skb(MRVDRV_ETH_RX_PACKET_BUFFER_SIZE);
+	if (!skb) {
+		err = -ENOBUFS;
+		goto out;
+	}
+	skb_reserve(skb, IPFIELD_ALIGN_OFFSET);
+	data = skb_put(skb, len);
+
+	/* Read the data from the WLAN module into our skb... */
+	err = spu_read(card, IF_SPI_DATA_RDWRPORT_REG, data, ALIGN(len, 4));
+	if (err)
+		goto free_skb;
+
+	/* pass the SKB to libertas */
+	err = lbs_process_rxed_packet(card->priv, skb);
+	if (err)
+		goto free_skb;
+
+	/* success */
+	goto out;
+
+free_skb:
+	dev_kfree_skb(skb);
+out:
+	if (err)
+		lbs_pr_err("%s: err=%d\n", __func__, err);
+	lbs_deb_leave(LBS_DEB_SPI);
+	return err;
+}
+
+/* Move data or a command from the host to the card. */
+static void if_spi_h2c(struct if_spi_card *card,
+			struct if_spi_packet *packet, int type)
+{
+	int err = 0;
+	u16 int_type, port_reg;
+
+	switch (type) {
+	case MVMS_DAT:
+		int_type = IF_SPI_CIC_TX_DOWNLOAD_OVER;
+		port_reg = IF_SPI_DATA_RDWRPORT_REG;
+		break;
+	case MVMS_CMD:
+		int_type = IF_SPI_CIC_CMD_DOWNLOAD_OVER;
+		port_reg = IF_SPI_CMD_RDWRPORT_REG;
+		break;
+	default:
+		lbs_pr_err("can't transfer buffer of type %d\n", type);
+		err = -EINVAL;
+		goto out;
+	}
+
+	/* Write the data to the card */
+	err = spu_write(card, port_reg, packet->buffer, packet->blen);
+	if (err)
+		goto out;
+
+out:
+	kfree(packet);
+
+	if (err)
+		lbs_pr_err("%s: error %d\n", __func__, err);
+}
+
+/* Inform the host about a card event */
+static void if_spi_e2h(struct if_spi_card *card)
+{
+	int err = 0;
+	unsigned long flags;
+	u32 cause;
+	struct lbs_private *priv = card->priv;
+
+	err = spu_read_u32(card, IF_SPI_SCRATCH_3_REG, &cause);
+	if (err)
+		goto out;
+
+	spin_lock_irqsave(&priv->driver_lock, flags);
+	lbs_queue_event(priv, cause & 0xff);
+	spin_unlock_irqrestore(&priv->driver_lock, flags);
+
+out:
+	if (err)
+		lbs_pr_err("%s: error %d\n", __func__, err);
+}
+
+static int lbs_spi_thread(void *data)
+{
+	int err;
+	struct if_spi_card *card = data;
+	u16 hiStatus;
+	unsigned long flags;
+	struct if_spi_packet *packet;
+
+	while (1) {
+		/* Wait to be woken up by one of two things.  First, our ISR
+		 * could tell us that something happened on the WLAN.
+		 * Secondly, libertas could call hw_host_to_card with more
+		 * data, which we might be able to send.
+		 */
+		do {
+			err = down_interruptible(&card->spi_ready);
+			if (!card->run_thread) {
+				up(&card->spi_thread_terminated);
+				do_exit(0);
+			}
+		} while (err == EINTR);
+
+		/* Read the host interrupt status register to see what we
+		 * can do. */
+		err = spu_read_u16(card, IF_SPI_HOST_INT_STATUS_REG,
+					&hiStatus);
+		if (err) {
+			lbs_pr_err("I/O error\n");
+			goto err;
+		}
+
+		if (hiStatus & IF_SPI_HIST_CMD_UPLOAD_RDY)
+			err = if_spi_c2h_cmd(card);
+			if (err)
+				goto err;
+		if (hiStatus & IF_SPI_HIST_RX_UPLOAD_RDY)
+			err = if_spi_c2h_data(card);
+			if (err)
+				goto err;
+		if (hiStatus & IF_SPI_HIST_CMD_DOWNLOAD_RDY) {
+			/* This means two things. First of all,
+			 * if there was a previous command sent, the card has
+			 * successfully received it.
+			 * Secondly, it is now ready to download another
+			 * command.
+			 */
+			lbs_host_to_card_done(card->priv);
+
+			/* Do we have any command packets from the host to
+			 * send? */
+			packet = NULL;
+			spin_lock_irqsave(&card->buffer_lock, flags);
+			if (!list_empty(&card->cmd_packet_list)) {
+				packet = (struct if_spi_packet *)(card->
+						cmd_packet_list.next);
+				list_del(&packet->list);
+			}
+			spin_unlock_irqrestore(&card->buffer_lock, flags);
+
+			if (packet)
+				if_spi_h2c(card, packet, MVMS_CMD);
+		}
+		if (hiStatus & IF_SPI_HIST_TX_DOWNLOAD_RDY) {
+			/* Do we have any data packets from the host to
+			 * send? */
+			packet = NULL;
+			spin_lock_irqsave(&card->buffer_lock, flags);
+			if (!list_empty(&card->data_packet_list)) {
+				packet = (struct if_spi_packet *)(card->
+						data_packet_list.next);
+				list_del(&packet->list);
+			}
+			spin_unlock_irqrestore(&card->buffer_lock, flags);
+
+			if (packet)
+				if_spi_h2c(card, packet, MVMS_DAT);
+		}
+		if (hiStatus & IF_SPI_HIST_CARD_EVENT)
+			if_spi_e2h(card);
+
+err:
+		if (err)
+			lbs_pr_err("%s: got error %d\n", __func__, err);
+	}
+}
+
+/* Block until lbs_spi_thread thread has terminated */
+static void if_spi_terminate_spi_thread(struct if_spi_card *card)
+{
+	/* It would be nice to use kthread_stop here, but that function
+	 * can't wake threads waiting for a semaphore. */
+	card->run_thread = 0;
+	up(&card->spi_ready);
+	down(&card->spi_thread_terminated);
+}
+
+/*
+ * Host to Card
+ *
+ * Called from Libertas to transfer some data to the WLAN device
+ * We can't sleep here. */
+static int if_spi_host_to_card(struct lbs_private *priv,
+				u8 type, u8 *buf, u16 nb)
+{
+	int err = 0;
+	unsigned long flags;
+	struct if_spi_card *card = priv->card;
+	struct if_spi_packet *packet;
+	u16 blen;
+
+	lbs_deb_enter_args(LBS_DEB_SPI, "type %d, bytes %d", type, nb);
+
+	if (nb == 0) {
+		lbs_pr_err("%s: invalid size requested: %d\n", __func__, nb);
+		err = -EINVAL;
+		goto out;
+	}
+	blen = ALIGN(nb, 4);
+	packet = kzalloc(sizeof(struct if_spi_packet) + blen, GFP_ATOMIC);
+	if (!packet) {
+		err = -ENOMEM;
+		goto out;
+	}
+	packet->blen = blen;
+	memcpy(packet->buffer, buf, nb);
+	memset(packet->buffer + nb, 0, blen - nb);
+
+	switch (type) {
+	case MVMS_CMD:
+		priv->dnld_sent = DNLD_CMD_SENT;
+		spin_lock_irqsave(&card->buffer_lock, flags);
+		list_add_tail(&packet->list, &card->cmd_packet_list);
+		spin_unlock_irqrestore(&card->buffer_lock, flags);
+		break;
+	case MVMS_DAT:
+		priv->dnld_sent = DNLD_DATA_SENT;
+		spin_lock_irqsave(&card->buffer_lock, flags);
+		list_add_tail(&packet->list, &card->data_packet_list);
+		spin_unlock_irqrestore(&card->buffer_lock, flags);
+		break;
+	default:
+		lbs_pr_err("can't transfer buffer of type %d", type);
+		err = -EINVAL;
+		break;
+	}
+
+	/* Wake up the spi thread */
+	up(&card->spi_ready);
+out:
+	lbs_deb_leave_args(LBS_DEB_SPI, "err=%d", err);
+	return err;
+}
+
+/*
+ * Host Interrupts
+ *
+ * Service incoming interrupts from the WLAN device. We can't sleep here, so
+ * don't try to talk on the SPI bus, just wake up the SPI thread.
+ */
+static irqreturn_t if_spi_host_interrupt(int irq, void *dev_id)
+{
+	struct if_spi_card *card = dev_id;
+
+	up(&card->spi_ready);
+	return IRQ_HANDLED;
+}
+
+/*
+ * SPI callbacks
+ */
+
+static int if_spi_calculate_fw_names(u16 card_id,
+			      char *helper_fw, char *main_fw)
+{
+	int i;
+	for (i = 0; i < ARRAY_SIZE(chip_id_to_device_name); ++i) {
+		if (card_id == chip_id_to_device_name[i].chip_id)
+			break;
+	}
+	if (i == ARRAY_SIZE(chip_id_to_device_name)) {
+		lbs_pr_err("Unsupported chip_id: 0x%02x\n", card_id);
+		return -EAFNOSUPPORT;
+	}
+	snprintf(helper_fw, FIRMWARE_NAME_MAX, "libertas/gspi%d_hlp.bin",
+		 chip_id_to_device_name[i].name);
+	snprintf(main_fw, FIRMWARE_NAME_MAX, "libertas/gspi%d.bin",
+		 chip_id_to_device_name[i].name);
+	return 0;
+}
+
+static int __devinit if_spi_probe(struct spi_device *spi)
+{
+	struct if_spi_card *card;
+	struct lbs_private *priv = NULL;
+	struct libertas_spi_platform_data *pdata = spi->dev.platform_data;
+	int err = 0;
+	u32 scratch;
+
+	lbs_deb_enter(LBS_DEB_SPI);
+
+	/* Allocate card structure to represent this specific device */
+	card = kzalloc(sizeof(struct if_spi_card), GFP_KERNEL);
+	if (!card) {
+		err = -ENOMEM;
+		goto out;
+	}
+	spi_set_drvdata(spi, card);
+	card->spi = spi;
+	card->gpio_cs = pdata->gpio_cs;
+	card->prev_xfer_time = jiffies;
+
+	sema_init(&card->spi_ready, 0);
+	sema_init(&card->spi_thread_terminated, 0);
+	INIT_LIST_HEAD(&card->cmd_packet_list);
+	INIT_LIST_HEAD(&card->data_packet_list);
+	spin_lock_init(&card->buffer_lock);
+
+	/* set up GPIO CS line. TODO: use  regular CS line */
+	err = gpio_request(card->gpio_cs, "if_spi_gpio_chip_select");
+	if (err)
+		goto free_card;
+	err = gpio_direction_output(card->gpio_cs, 1);
+	if (err)
+		goto free_gpio;
+
+	/* Initialize the SPI Interface Unit */
+	err = spu_init(card, pdata->use_dummy_writes);
+	if (err)
+		goto free_gpio;
+	err = spu_get_chip_revision(card, &card->card_id, &card->card_rev);
+	if (err)
+		goto free_gpio;
+
+	/* Firmware load */
+	err = spu_read_u32(card, IF_SPI_SCRATCH_4_REG, &scratch);
+	if (err)
+		goto free_gpio;
+	if (scratch == SUCCESSFUL_FW_DOWNLOAD_MAGIC)
+		lbs_deb_spi("Firmware is already loaded for "
+			    "Marvell WLAN 802.11 adapter\n");
+	else {
+		err = if_spi_calculate_fw_names(card->card_id,
+				card->helper_fw_name, card->main_fw_name);
+		if (err)
+			goto free_gpio;
+
+		lbs_deb_spi("Initializing FW for Marvell WLAN 802.11 adapter "
+				"(chip_id = 0x%04x, chip_rev = 0x%02x) "
+				"attached to SPI bus_num %d, chip_select %d. "
+				"spi->max_speed_hz=%d\n",
+				card->card_id, card->card_rev,
+				spi->master->bus_num, spi->chip_select,
+				spi->max_speed_hz);
+		err = if_spi_prog_helper_firmware(card);
+		if (err)
+			goto free_gpio;
+		err = if_spi_prog_main_firmware(card);
+		if (err)
+			goto free_gpio;
+		lbs_deb_spi("loaded FW for Marvell WLAN 802.11 adapter\n");
+	}
+
+	err = spu_set_interrupt_mode(card, 0, 1);
+	if (err)
+		goto free_gpio;
+
+	/* Register our card with libertas.
+	 * This will call alloc_etherdev */
+	priv = lbs_add_card(card, &spi->dev);
+	if (!priv) {
+		err = -ENOMEM;
+		goto free_gpio;
+	}
+	card->priv = priv;
+	priv->card = card;
+	priv->hw_host_to_card = if_spi_host_to_card;
+	priv->fw_ready = 1;
+	priv->ps_supported = 1;
+
+	/* Initialize interrupt handling stuff. */
+	card->run_thread = 1;
+	card->spi_thread = kthread_run(lbs_spi_thread, card, "lbs_spi_thread");
+	if (IS_ERR(card->spi_thread)) {
+		card->run_thread = 0;
+		err = PTR_ERR(card->spi_thread);
+		lbs_pr_err("error creating SPI thread: err=%d\n", err);
+		goto remove_card;
+	}
+	err = request_irq(spi->irq, if_spi_host_interrupt,
+			IRQF_TRIGGER_FALLING, "libertas_spi", card);
+	if (err) {
+		lbs_pr_err("can't get host irq line-- request_irq failed\n");
+		goto terminate_thread;
+	}
+
+	/* Start the card.
+	 * This will call register_netdev, and we'll start
+	 * getting interrupts... */
+	err = lbs_start_card(priv);
+	if (err)
+		goto release_irq;
+
+	lbs_deb_spi("Finished initializing WLAN module.\n");
+
+	/* successful exit */
+	goto out;
+
+release_irq:
+	free_irq(spi->irq, card);
+terminate_thread:
+	if_spi_terminate_spi_thread(card);
+remove_card:
+	lbs_remove_card(priv); /* will call free_netdev */
+free_gpio:
+	gpio_free(card->gpio_cs);
+free_card:
+	free_if_spi_card(card);
+out:
+	lbs_deb_leave_args(LBS_DEB_SPI, "err %d\n", err);
+	return err;
+}
+
+static int __devexit libertas_spi_remove(struct spi_device *spi)
+{
+	struct if_spi_card *card = spi_get_drvdata(spi);
+	struct lbs_private *priv = card->priv;
+
+	lbs_deb_spi("libertas_spi_remove\n");
+	lbs_deb_enter(LBS_DEB_SPI);
+	priv->surpriseremoved = 1;
+
+	lbs_stop_card(priv);
+	free_irq(spi->irq, card);
+	if_spi_terminate_spi_thread(card);
+	lbs_remove_card(priv); /* will call free_netdev */
+	gpio_free(card->gpio_cs);
+	free_if_spi_card(card);
+	lbs_deb_leave(LBS_DEB_SPI);
+	return 0;
+}
+
+static struct spi_driver libertas_spi_driver = {
+	.probe	= if_spi_probe,
+	.remove = __devexit_p(libertas_spi_remove),
+	.driver = {
+		.name	= "libertas_spi",
+		.bus	= &spi_bus_type,
+		.owner	= THIS_MODULE,
+	},
+};
+
+/*
+ * Module functions
+ */
+
+static int __init if_spi_init_module(void)
+{
+	int ret = 0;
+	lbs_deb_enter(LBS_DEB_SPI);
+	printk(KERN_INFO "libertas_spi: Libertas SPI driver\n");
+	ret = spi_register_driver(&libertas_spi_driver);
+	lbs_deb_leave(LBS_DEB_SPI);
+	return ret;
+}
+
+static void __exit if_spi_exit_module(void)
+{
+	lbs_deb_enter(LBS_DEB_SPI);
+	spi_unregister_driver(&libertas_spi_driver);
+	lbs_deb_leave(LBS_DEB_SPI);
+}
+
+module_init(if_spi_init_module);
+module_exit(if_spi_exit_module);
+
+MODULE_DESCRIPTION("Libertas SPI WLAN Driver");
+MODULE_AUTHOR("Andrey Yurovsky <andrey@cozybit.com>, "
+	      "Colin McCabe <colin@cozybit.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/wireless/libertas/if_spi.h b/drivers/net/wireless/libertas/if_spi.h
new file mode 100644
index 000000000000..2103869cc5b0
--- /dev/null
+++ b/drivers/net/wireless/libertas/if_spi.h
@@ -0,0 +1,208 @@
+/*
+ *	linux/drivers/net/wireless/libertas/if_spi.c
+ *
+ *	Driver for Marvell SPI WLAN cards.
+ *
+ *	Copyright 2008 Analog Devices Inc.
+ *
+ *	Authors:
+ *	Andrey Yurovsky <andrey@cozybit.com>
+ *	Colin McCabe <colin@cozybit.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or (at
+ * your option) any later version.
+ */
+
+#ifndef _LBS_IF_SPI_H_
+#define _LBS_IF_SPI_H_
+
+#define IPFIELD_ALIGN_OFFSET 2
+#define IF_SPI_CMD_BUF_SIZE 2400
+
+/***************** Firmware *****************/
+struct chip_ident {
+	u16 chip_id;
+	u16 name;
+};
+
+#define MAX_MAIN_FW_LOAD_CRC_ERR 10
+
+/* Chunk size when loading the helper firmware */
+#define HELPER_FW_LOAD_CHUNK_SZ 64
+
+/* Value to write to indicate end of helper firmware dnld */
+#define FIRMWARE_DNLD_OK 0x0000
+
+/* Value to check once the main firmware is downloaded */
+#define SUCCESSFUL_FW_DOWNLOAD_MAGIC 0x88888888
+
+/***************** SPI Interface Unit *****************/
+/* Masks used in SPI register read/write operations */
+#define IF_SPI_READ_OPERATION_MASK 0x0
+#define IF_SPI_WRITE_OPERATION_MASK 0x8000
+
+/* SPI register offsets. 4-byte aligned. */
+#define IF_SPI_DEVICEID_CTRL_REG 0x00	/* DeviceID controller reg */
+#define IF_SPI_IO_READBASE_REG 0x04 	/* Read I/O base reg */
+#define IF_SPI_IO_WRITEBASE_REG 0x08	/* Write I/O base reg */
+#define IF_SPI_IO_RDWRPORT_REG 0x0C	/* Read/Write I/O port reg */
+
+#define IF_SPI_CMD_READBASE_REG 0x10	/* Read command base reg */
+#define IF_SPI_CMD_WRITEBASE_REG 0x14	/* Write command base reg */
+#define IF_SPI_CMD_RDWRPORT_REG 0x18	/* Read/Write command port reg */
+
+#define IF_SPI_DATA_READBASE_REG 0x1C	/* Read data base reg */
+#define IF_SPI_DATA_WRITEBASE_REG 0x20	/* Write data base reg */
+#define IF_SPI_DATA_RDWRPORT_REG 0x24	/* Read/Write data port reg */
+
+#define IF_SPI_SCRATCH_1_REG 0x28	/* Scratch reg 1 */
+#define IF_SPI_SCRATCH_2_REG 0x2C	/* Scratch reg 2 */
+#define IF_SPI_SCRATCH_3_REG 0x30	/* Scratch reg 3 */
+#define IF_SPI_SCRATCH_4_REG 0x34	/* Scratch reg 4 */
+
+#define IF_SPI_TX_FRAME_SEQ_NUM_REG 0x38 /* Tx frame sequence number reg */
+#define IF_SPI_TX_FRAME_STATUS_REG 0x3C	/* Tx frame status reg */
+
+#define IF_SPI_HOST_INT_CTRL_REG 0x40	/* Host interrupt controller reg */
+
+#define IF_SPI_CARD_INT_CAUSE_REG 0x44	/* Card interrupt cause reg */
+#define IF_SPI_CARD_INT_STATUS_REG 0x48 /* Card interupt status reg */
+#define IF_SPI_CARD_INT_EVENT_MASK_REG 0x4C /* Card interrupt event mask */
+#define IF_SPI_CARD_INT_STATUS_MASK_REG	0x50 /* Card interrupt status mask */
+
+#define IF_SPI_CARD_INT_RESET_SELECT_REG 0x54 /* Card interrupt reset select */
+
+#define IF_SPI_HOST_INT_CAUSE_REG 0x58	/* Host interrupt cause reg */
+#define IF_SPI_HOST_INT_STATUS_REG 0x5C	/* Host interrupt status reg */
+#define IF_SPI_HOST_INT_EVENT_MASK_REG 0x60 /* Host interrupt event mask */
+#define IF_SPI_HOST_INT_STATUS_MASK_REG	0x64 /* Host interrupt status mask */
+#define IF_SPI_HOST_INT_RESET_SELECT_REG 0x68 /* Host interrupt reset select */
+
+#define IF_SPI_DELAY_READ_REG 0x6C	/* Delay read reg */
+#define IF_SPI_SPU_BUS_MODE_REG 0x70	/* SPU BUS mode reg */
+
+/***************** IF_SPI_DEVICEID_CTRL_REG *****************/
+#define IF_SPI_DEVICEID_CTRL_REG_TO_CARD_ID(dc) ((dc & 0xffff0000)>>16)
+#define IF_SPI_DEVICEID_CTRL_REG_TO_CARD_REV(dc) (dc & 0x000000ff)
+
+/***************** IF_SPI_HOST_INT_CTRL_REG *****************/
+/** Host Interrupt Control bit : Wake up */
+#define IF_SPI_HICT_WAKE_UP				(1<<0)
+/** Host Interrupt Control bit : WLAN ready */
+#define IF_SPI_HICT_WLAN_READY				(1<<1)
+/*#define IF_SPI_HICT_FIFO_FIRST_HALF_EMPTY		(1<<2) */
+/*#define IF_SPI_HICT_FIFO_SECOND_HALF_EMPTY		(1<<3) */
+/*#define IF_SPI_HICT_IRQSRC_WLAN			(1<<4) */
+/** Host Interrupt Control bit : Tx auto download */
+#define IF_SPI_HICT_TX_DOWNLOAD_OVER_AUTO		(1<<5)
+/** Host Interrupt Control bit : Rx auto upload */
+#define IF_SPI_HICT_RX_UPLOAD_OVER_AUTO			(1<<6)
+/** Host Interrupt Control bit : Command auto download */
+#define IF_SPI_HICT_CMD_DOWNLOAD_OVER_AUTO		(1<<7)
+/** Host Interrupt Control bit : Command auto upload */
+#define IF_SPI_HICT_CMD_UPLOAD_OVER_AUTO		(1<<8)
+
+/***************** IF_SPI_CARD_INT_CAUSE_REG *****************/
+/** Card Interrupt Case bit : Tx download over */
+#define IF_SPI_CIC_TX_DOWNLOAD_OVER			(1<<0)
+/** Card Interrupt Case bit : Rx upload over */
+#define IF_SPI_CIC_RX_UPLOAD_OVER			(1<<1)
+/** Card Interrupt Case bit : Command download over */
+#define IF_SPI_CIC_CMD_DOWNLOAD_OVER			(1<<2)
+/** Card Interrupt Case bit : Host event */
+#define IF_SPI_CIC_HOST_EVENT				(1<<3)
+/** Card Interrupt Case bit : Command upload over */
+#define IF_SPI_CIC_CMD_UPLOAD_OVER			(1<<4)
+/** Card Interrupt Case bit : Power down */
+#define IF_SPI_CIC_POWER_DOWN				(1<<5)
+
+/***************** IF_SPI_CARD_INT_STATUS_REG *****************/
+#define IF_SPI_CIS_TX_DOWNLOAD_OVER			(1<<0)
+#define IF_SPI_CIS_RX_UPLOAD_OVER			(1<<1)
+#define IF_SPI_CIS_CMD_DOWNLOAD_OVER			(1<<2)
+#define IF_SPI_CIS_HOST_EVENT				(1<<3)
+#define IF_SPI_CIS_CMD_UPLOAD_OVER			(1<<4)
+#define IF_SPI_CIS_POWER_DOWN				(1<<5)
+
+/***************** IF_SPI_HOST_INT_CAUSE_REG *****************/
+#define IF_SPI_HICU_TX_DOWNLOAD_RDY			(1<<0)
+#define IF_SPI_HICU_RX_UPLOAD_RDY			(1<<1)
+#define IF_SPI_HICU_CMD_DOWNLOAD_RDY			(1<<2)
+#define IF_SPI_HICU_CARD_EVENT				(1<<3)
+#define IF_SPI_HICU_CMD_UPLOAD_RDY			(1<<4)
+#define IF_SPI_HICU_IO_WR_FIFO_OVERFLOW			(1<<5)
+#define IF_SPI_HICU_IO_RD_FIFO_UNDERFLOW		(1<<6)
+#define IF_SPI_HICU_DATA_WR_FIFO_OVERFLOW		(1<<7)
+#define IF_SPI_HICU_DATA_RD_FIFO_UNDERFLOW		(1<<8)
+#define IF_SPI_HICU_CMD_WR_FIFO_OVERFLOW		(1<<9)
+#define IF_SPI_HICU_CMD_RD_FIFO_UNDERFLOW		(1<<10)
+
+/***************** IF_SPI_HOST_INT_STATUS_REG *****************/
+/** Host Interrupt Status bit : Tx download ready */
+#define IF_SPI_HIST_TX_DOWNLOAD_RDY			(1<<0)
+/** Host Interrupt Status bit : Rx upload ready */
+#define IF_SPI_HIST_RX_UPLOAD_RDY			(1<<1)
+/** Host Interrupt Status bit : Command download ready */
+#define IF_SPI_HIST_CMD_DOWNLOAD_RDY			(1<<2)
+/** Host Interrupt Status bit : Card event */
+#define IF_SPI_HIST_CARD_EVENT				(1<<3)
+/** Host Interrupt Status bit : Command upload ready */
+#define IF_SPI_HIST_CMD_UPLOAD_RDY			(1<<4)
+/** Host Interrupt Status bit : I/O write FIFO overflow */
+#define IF_SPI_HIST_IO_WR_FIFO_OVERFLOW			(1<<5)
+/** Host Interrupt Status bit : I/O read FIFO underflow */
+#define IF_SPI_HIST_IO_RD_FIFO_UNDRFLOW			(1<<6)
+/** Host Interrupt Status bit : Data write FIFO overflow */
+#define IF_SPI_HIST_DATA_WR_FIFO_OVERFLOW		(1<<7)
+/** Host Interrupt Status bit : Data read FIFO underflow */
+#define IF_SPI_HIST_DATA_RD_FIFO_UNDERFLOW		(1<<8)
+/** Host Interrupt Status bit : Command write FIFO overflow */
+#define IF_SPI_HIST_CMD_WR_FIFO_OVERFLOW		(1<<9)
+/** Host Interrupt Status bit : Command read FIFO underflow */
+#define IF_SPI_HIST_CMD_RD_FIFO_UNDERFLOW		(1<<10)
+
+/***************** IF_SPI_HOST_INT_STATUS_MASK_REG *****************/
+/** Host Interrupt Status Mask bit : Tx download ready */
+#define IF_SPI_HISM_TX_DOWNLOAD_RDY			(1<<0)
+/** Host Interrupt Status Mask bit : Rx upload ready */
+#define IF_SPI_HISM_RX_UPLOAD_RDY			(1<<1)
+/** Host Interrupt Status Mask bit : Command download ready */
+#define IF_SPI_HISM_CMD_DOWNLOAD_RDY			(1<<2)
+/** Host Interrupt Status Mask bit : Card event */
+#define IF_SPI_HISM_CARDEVENT				(1<<3)
+/** Host Interrupt Status Mask bit : Command upload ready */
+#define IF_SPI_HISM_CMD_UPLOAD_RDY			(1<<4)
+/** Host Interrupt Status Mask bit : I/O write FIFO overflow */
+#define IF_SPI_HISM_IO_WR_FIFO_OVERFLOW			(1<<5)
+/** Host Interrupt Status Mask bit : I/O read FIFO underflow */
+#define IF_SPI_HISM_IO_RD_FIFO_UNDERFLOW		(1<<6)
+/** Host Interrupt Status Mask bit : Data write FIFO overflow */
+#define IF_SPI_HISM_DATA_WR_FIFO_OVERFLOW		(1<<7)
+/** Host Interrupt Status Mask bit : Data write FIFO underflow */
+#define IF_SPI_HISM_DATA_RD_FIFO_UNDERFLOW		(1<<8)
+/** Host Interrupt Status Mask bit : Command write FIFO overflow */
+#define IF_SPI_HISM_CMD_WR_FIFO_OVERFLOW		(1<<9)
+/** Host Interrupt Status Mask bit : Command write FIFO underflow */
+#define IF_SPI_HISM_CMD_RD_FIFO_UNDERFLOW		(1<<10)
+
+/***************** IF_SPI_SPU_BUS_MODE_REG *****************/
+/* SCK edge on which the WLAN module outputs data on MISO */
+#define IF_SPI_BUS_MODE_SPI_CLOCK_PHASE_FALLING 0x8
+#define IF_SPI_BUS_MODE_SPI_CLOCK_PHASE_RISING 0x0
+
+/* In a SPU read operation, there is a delay between writing the SPU
+ * register name and getting back data from the WLAN module.
+ * This can be specified in terms of nanoseconds or in terms of dummy
+ * clock cycles which the master must output before receiving a response. */
+#define IF_SPI_BUS_MODE_DELAY_METHOD_DUMMY_CLOCK 0x4
+#define IF_SPI_BUS_MODE_DELAY_METHOD_TIMED 0x0
+
+/* Some different modes of SPI operation */
+#define IF_SPI_BUS_MODE_8_BIT_ADDRESS_16_BIT_DATA 0x00
+#define IF_SPI_BUS_MODE_8_BIT_ADDRESS_32_BIT_DATA 0x01
+#define IF_SPI_BUS_MODE_16_BIT_ADDRESS_16_BIT_DATA 0x02
+#define IF_SPI_BUS_MODE_16_BIT_ADDRESS_32_BIT_DATA 0x03
+
+#endif
diff --git a/include/linux/spi/libertas_spi.h b/include/linux/spi/libertas_spi.h
new file mode 100644
index 000000000000..ada71b4f3788
--- /dev/null
+++ b/include/linux/spi/libertas_spi.h
@@ -0,0 +1,25 @@
+/*
+ * board-specific data for the libertas_spi driver.
+ *
+ * Copyright 2008 Analog Devices Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or (at
+ * your option) any later version.
+ */
+#ifndef _LIBERTAS_SPI_H_
+#define _LIBERTAS_SPI_H_
+struct libertas_spi_platform_data {
+	/* There are two ways to read data from the WLAN module's SPI
+	 * interface. Setting 0 or 1 here controls which one is used.
+	 *
+	 * Usually you want to set use_dummy_writes = 1.
+	 * However, if that doesn't work or if you are using a slow SPI clock
+	 * speed, you may want to use 0 here. */
+	u16 use_dummy_writes;
+
+	/* GPIO number to use as chip select */
+	u16 gpio_cs;
+};
+#endif
-- 
cgit v1.2.3


From d03415e6771cd709b2b2ec64d3e6315cc3ebfa74 Mon Sep 17 00:00:00 2001
From: Jouni Malinen <j@w1.fi>
Date: Mon, 12 Jan 2009 14:24:40 +0200
Subject: nl80211: Fix documentation errors

Couple of '_ATTR's were missing and SEC_CHAN_OFFSET to CHANNEL_TYPE
rename was missed in couple of places.

Signed-off-by: Jouni Malinen <jouni.malinen@atheros.com>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index ee742bc9761e..4e7a7986a521 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -47,7 +47,7 @@
  * @NL80211_CMD_SET_WIPHY: set wiphy parameters, needs %NL80211_ATTR_WIPHY or
  *	%NL80211_ATTR_IFINDEX; can be used to set %NL80211_ATTR_WIPHY_NAME,
  *	%NL80211_ATTR_WIPHY_TXQ_PARAMS, %NL80211_ATTR_WIPHY_FREQ, and/or
- *	%NL80211_ATTR_WIPHY_SEC_CHAN_OFFSET.
+ *	%NL80211_ATTR_WIPHY_CHANNEL_TYPE.
  * @NL80211_CMD_NEW_WIPHY: Newly created wiphy, response to get request
  *	or rename notification. Has attributes %NL80211_ATTR_WIPHY and
  *	%NL80211_ATTR_WIPHY_NAME.
@@ -84,7 +84,7 @@
  *	%NL80222_CMD_NEW_BEACON message)
  * @NL80211_CMD_SET_BEACON: set the beacon on an access point interface
  *	using the %NL80211_ATTR_BEACON_INTERVAL, %NL80211_ATTR_DTIM_PERIOD,
- *	%NL80211_BEACON_HEAD and %NL80211_BEACON_TAIL attributes.
+ *	%NL80211_ATTR_BEACON_HEAD and %NL80211_ATTR_BEACON_TAIL attributes.
  * @NL80211_CMD_NEW_BEACON: add a new beacon to an access point interface,
  *	parameters are like for %NL80211_CMD_SET_BEACON.
  * @NL80211_CMD_DEL_BEACON: remove the beacon, stop sending it
@@ -362,7 +362,7 @@ enum nl80211_attrs {
 #define NL80211_ATTR_BSS_BASIC_RATES NL80211_ATTR_BSS_BASIC_RATES
 #define NL80211_ATTR_WIPHY_TXQ_PARAMS NL80211_ATTR_WIPHY_TXQ_PARAMS
 #define NL80211_ATTR_WIPHY_FREQ NL80211_ATTR_WIPHY_FREQ
-#define NL80211_ATTR_WIPHY_SEC_CHAN_OFFSET NL80211_ATTR_WIPHY_SEC_CHAN_OFFSET
+#define NL80211_ATTR_WIPHY_CHANNEL_TYPE NL80211_ATTR_WIPHY_CHANNEL_TYPE
 
 #define NL80211_MAX_SUPP_RATES			32
 #define NL80211_MAX_SUPP_REG_RULES		32
-- 
cgit v1.2.3


From 9dbeb91a8b97e2892c04461e28d2bdd0198b719d Mon Sep 17 00:00:00 2001
From: Gabor Juhos <juhosg@openwrt.org>
Date: Wed, 14 Jan 2009 20:17:08 +0100
Subject: ath9k: get EEPROM contents from platform data on AHB bus

On the AR913x SOCs we have to provide EEPROM contents via platform_data,
because accessing the flash via MMIO is not safe. Additionally different
boards may store the radio calibration data at different locations.

Changes-licensed-under: ISC

Signed-off-by: Gabor Juhos <juhosg@openwrt.org>
Signed-off-by: Imre Kaloz <kaloz@openwrt.org>
Tested-by: Pavel Roskin <proski@gnu.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/ath9k/ahb.c    | 27 ++++++++++++++++++++
 drivers/net/wireless/ath9k/core.h   |  1 +
 drivers/net/wireless/ath9k/eeprom.c | 51 +++----------------------------------
 drivers/net/wireless/ath9k/pci.c    | 18 +++++++++++++
 include/linux/ath9k_platform.h      | 28 ++++++++++++++++++++
 5 files changed, 77 insertions(+), 48 deletions(-)
 create mode 100644 include/linux/ath9k_platform.h

(limited to 'include/linux')

diff --git a/drivers/net/wireless/ath9k/ahb.c b/drivers/net/wireless/ath9k/ahb.c
index 8cbd4c2a7fa0..7f2c3a09bcac 100644
--- a/drivers/net/wireless/ath9k/ahb.c
+++ b/drivers/net/wireless/ath9k/ahb.c
@@ -18,6 +18,7 @@
 
 #include <linux/nl80211.h>
 #include <linux/platform_device.h>
+#include <linux/ath9k_platform.h>
 #include "core.h"
 #include "reg.h"
 #include "hw.h"
@@ -33,9 +34,29 @@ static void ath_ahb_cleanup(struct ath_softc *sc)
 	iounmap(sc->mem);
 }
 
+static bool ath_ahb_eeprom_read(struct ath_hal *ah, u32 off, u16 *data)
+{
+	struct ath_softc *sc = ah->ah_sc;
+	struct platform_device *pdev = to_platform_device(sc->dev);
+	struct ath9k_platform_data *pdata;
+
+	pdata = (struct ath9k_platform_data *) pdev->dev.platform_data;
+	if (off >= (ARRAY_SIZE(pdata->eeprom_data))) {
+		DPRINTF(ah->ah_sc, ATH_DBG_FATAL,
+			"%s: flash read failed, offset %08x is out of range\n",
+				__func__, off);
+		return false;
+	}
+
+	*data = pdata->eeprom_data[off];
+	return true;
+}
+
 static struct ath_bus_ops ath_ahb_bus_ops  = {
 	.read_cachesize = ath_ahb_read_cachesize,
 	.cleanup = ath_ahb_cleanup,
+
+	.eeprom_read = ath_ahb_eeprom_read,
 };
 
 static int ath_ahb_probe(struct platform_device *pdev)
@@ -48,6 +69,12 @@ static int ath_ahb_probe(struct platform_device *pdev)
 	int ret = 0;
 	struct ath_hal *ah;
 
+	if (!pdev->dev.platform_data) {
+		dev_err(&pdev->dev, "no platform data specified\n");
+		ret = -EINVAL;
+		goto err_out;
+	}
+
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (res == NULL) {
 		dev_err(&pdev->dev, "no memory resource found\n");
diff --git a/drivers/net/wireless/ath9k/core.h b/drivers/net/wireless/ath9k/core.h
index c5dae11d6086..b687ae9c9e17 100644
--- a/drivers/net/wireless/ath9k/core.h
+++ b/drivers/net/wireless/ath9k/core.h
@@ -696,6 +696,7 @@ enum PROT_MODE {
 struct ath_bus_ops {
 	void		(*read_cachesize)(struct ath_softc *sc, int *csz);
 	void		(*cleanup)(struct ath_softc *sc);
+	bool		(*eeprom_read)(struct ath_hal *ah, u32 off, u16 *data);
 };
 
 struct ath_softc {
diff --git a/drivers/net/wireless/ath9k/eeprom.c b/drivers/net/wireless/ath9k/eeprom.c
index 1ef8b5a70e5b..50cb3883416a 100644
--- a/drivers/net/wireless/ath9k/eeprom.c
+++ b/drivers/net/wireless/ath9k/eeprom.c
@@ -91,53 +91,11 @@ static inline bool ath9k_hw_get_lower_upper_index(u8 target, u8 *pList,
 	return false;
 }
 
-static bool ath9k_hw_eeprom_read(struct ath_hal *ah, u32 off, u16 *data)
-{
-	(void)REG_READ(ah, AR5416_EEPROM_OFFSET + (off << AR5416_EEPROM_S));
-
-	if (!ath9k_hw_wait(ah,
-			   AR_EEPROM_STATUS_DATA,
-			   AR_EEPROM_STATUS_DATA_BUSY |
-			   AR_EEPROM_STATUS_DATA_PROT_ACCESS, 0)) {
-		return false;
-	}
-
-	*data = MS(REG_READ(ah, AR_EEPROM_STATUS_DATA),
-		   AR_EEPROM_STATUS_DATA_VAL);
-
-	return true;
-}
-
-static int ath9k_hw_flash_map(struct ath_hal *ah)
-{
-	struct ath_hal_5416 *ahp = AH5416(ah);
-
-	ahp->ah_cal_mem = ioremap(AR5416_EEPROM_START_ADDR, AR5416_EEPROM_MAX);
-
-	if (!ahp->ah_cal_mem) {
-		DPRINTF(ah->ah_sc, ATH_DBG_EEPROM,
-			"cannot remap eeprom region \n");
-		return -EIO;
-	}
-
-	return 0;
-}
-
-static bool ath9k_hw_flash_read(struct ath_hal *ah, u32 off, u16 *data)
-{
-	struct ath_hal_5416 *ahp = AH5416(ah);
-
-	*data = ioread16(ahp->ah_cal_mem + off);
-
-	return true;
-}
-
 static inline bool ath9k_hw_nvram_read(struct ath_hal *ah, u32 off, u16 *data)
 {
-	if (ath9k_hw_use_flash(ah))
-		return ath9k_hw_flash_read(ah, off, data);
-	else
-		return ath9k_hw_eeprom_read(ah, off, data);
+	struct ath_softc *sc = ah->ah_sc;
+
+	return sc->bus_ops->eeprom_read(ah, off, data);
 }
 
 static bool ath9k_hw_fill_4k_eeprom(struct ath_hal *ah)
@@ -2825,9 +2783,6 @@ int ath9k_hw_eeprom_attach(struct ath_hal *ah)
 	int status;
 	struct ath_hal_5416 *ahp = AH5416(ah);
 
-	if (ath9k_hw_use_flash(ah))
-		ath9k_hw_flash_map(ah);
-
 	if (AR_SREV_9285(ah))
 		ahp->ah_eep_map = EEP_MAP_4KBITS;
 	else
diff --git a/drivers/net/wireless/ath9k/pci.c b/drivers/net/wireless/ath9k/pci.c
index 4ff1caa9ba99..05612bf28360 100644
--- a/drivers/net/wireless/ath9k/pci.c
+++ b/drivers/net/wireless/ath9k/pci.c
@@ -58,9 +58,27 @@ static void ath_pci_cleanup(struct ath_softc *sc)
 	pci_disable_device(pdev);
 }
 
+static bool ath_pci_eeprom_read(struct ath_hal *ah, u32 off, u16 *data)
+{
+	(void)REG_READ(ah, AR5416_EEPROM_OFFSET + (off << AR5416_EEPROM_S));
+
+	if (!ath9k_hw_wait(ah,
+			   AR_EEPROM_STATUS_DATA,
+			   AR_EEPROM_STATUS_DATA_BUSY |
+			   AR_EEPROM_STATUS_DATA_PROT_ACCESS, 0)) {
+		return false;
+	}
+
+	*data = MS(REG_READ(ah, AR_EEPROM_STATUS_DATA),
+		   AR_EEPROM_STATUS_DATA_VAL);
+
+	return true;
+}
+
 static struct ath_bus_ops ath_pci_bus_ops = {
 	.read_cachesize = ath_pci_read_cachesize,
 	.cleanup = ath_pci_cleanup,
+	.eeprom_read = ath_pci_eeprom_read,
 };
 
 static int ath_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
diff --git a/include/linux/ath9k_platform.h b/include/linux/ath9k_platform.h
new file mode 100644
index 000000000000..b847fc7b93f9
--- /dev/null
+++ b/include/linux/ath9k_platform.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2008 Atheros Communications Inc.
+ * Copyright (c) 2009 Gabor Juhos <juhosg@openwrt.org>
+ * Copyright (c) 2009 Imre Kaloz <kaloz@openwrt.org>
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef _LINUX_ATH9K_PLATFORM_H
+#define _LINUX_ATH9K_PLATFORM_H
+
+#define ATH9K_PLAT_EEP_MAX_WORDS	2048
+
+struct ath9k_platform_data {
+	u16 eeprom_data[ATH9K_PLAT_EEP_MAX_WORDS];
+};
+
+#endif /* _LINUX_ATH9K_PLATFORM_H */
-- 
cgit v1.2.3


From 9aed3cc124343d92be6697e9af3928bdfe8eb03e Mon Sep 17 00:00:00 2001
From: Jouni Malinen <jouni.malinen@atheros.com>
Date: Tue, 13 Jan 2009 16:03:29 +0200
Subject: nl80211: New command for adding extra IE(s) into management frames

A new nl80211 command, NL80211_CMD_SET_MGMT_EXTRA_IE, can be used to
add arbitrary IE data into the end of management frames. The interface
allows extra IEs to be configured for each management frame subtype, but
only some of them (ProbeReq, ProbeResp, Auth, (Re)AssocReq, Deauth,
Disassoc) are currently accepted in mac80211 implementation.

This makes it easier to implement IEEE 802.11 extensions like WPS and
FT that add IE(s) into some management frames. In addition, this can
be useful for testing and experimentation purposes.

Signed-off-by: Jouni Malinen <jouni.malinen@atheros.com>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h    | 22 +++++++++++++
 include/net/cfg80211.h     | 26 +++++++++++++++
 net/mac80211/cfg.c         | 82 ++++++++++++++++++++++++++++++++++++++++++++++
 net/mac80211/ieee80211_i.h | 16 +++++++++
 net/mac80211/iface.c       |  7 ++++
 net/mac80211/mlme.c        | 52 +++++++++++++++++++++++++----
 net/wireless/nl80211.c     | 47 ++++++++++++++++++++++++++
 7 files changed, 246 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 4e7a7986a521..76aae3d8e97e 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -133,6 +133,14 @@
  * @NL80211_CMD_SET_MESH_PARAMS: Set mesh networking properties for the
  *      interface identified by %NL80211_ATTR_IFINDEX
  *
+ * @NL80211_CMD_SET_MGMT_EXTRA_IE: Set extra IEs for management frames. The
+ *	interface is identified with %NL80211_ATTR_IFINDEX and the management
+ *	frame subtype with %NL80211_ATTR_MGMT_SUBTYPE. The extra IE data to be
+ *	added to the end of the specified management frame is specified with
+ *	%NL80211_ATTR_IE. If the command succeeds, the requested data will be
+ *	added to all specified management frames generated by
+ *	kernel/firmware/driver.
+ *
  * @NL80211_CMD_MAX: highest used command number
  * @__NL80211_CMD_AFTER_LAST: internal use
  */
@@ -178,6 +186,8 @@ enum nl80211_commands {
 	NL80211_CMD_GET_MESH_PARAMS,
 	NL80211_CMD_SET_MESH_PARAMS,
 
+	NL80211_CMD_SET_MGMT_EXTRA_IE,
+
 	/* add new commands above here */
 
 	/* used to define NL80211_CMD_MAX below */
@@ -190,6 +200,7 @@ enum nl80211_commands {
  * here
  */
 #define NL80211_CMD_SET_BSS NL80211_CMD_SET_BSS
+#define NL80211_CMD_SET_MGMT_EXTRA_IE NL80211_CMD_SET_MGMT_EXTRA_IE
 
 /**
  * enum nl80211_attrs - nl80211 netlink attributes
@@ -284,6 +295,12 @@ enum nl80211_commands {
  *	supported interface types, each a flag attribute with the number
  *	of the interface mode.
  *
+ * @NL80211_ATTR_MGMT_SUBTYPE: Management frame subtype for
+ *	%NL80211_CMD_SET_MGMT_EXTRA_IE.
+ *
+ * @NL80211_ATTR_IE: Information element(s) data (used, e.g., with
+ *	%NL80211_CMD_SET_MGMT_EXTRA_IE).
+ *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
  */
@@ -348,6 +365,9 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_KEY_DEFAULT_MGMT,
 
+	NL80211_ATTR_MGMT_SUBTYPE,
+	NL80211_ATTR_IE,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -363,6 +383,8 @@ enum nl80211_attrs {
 #define NL80211_ATTR_WIPHY_TXQ_PARAMS NL80211_ATTR_WIPHY_TXQ_PARAMS
 #define NL80211_ATTR_WIPHY_FREQ NL80211_ATTR_WIPHY_FREQ
 #define NL80211_ATTR_WIPHY_CHANNEL_TYPE NL80211_ATTR_WIPHY_CHANNEL_TYPE
+#define NL80211_ATTR_MGMT_SUBTYPE NL80211_ATTR_MGMT_SUBTYPE
+#define NL80211_ATTR_IE NL80211_ATTR_IE
 
 #define NL80211_MAX_SUPP_RATES			32
 #define NL80211_MAX_SUPP_REG_RULES		32
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index df78abc496f1..c7da88fb15b7 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -433,6 +433,26 @@ struct ieee80211_txq_params {
 	u8 aifs;
 };
 
+/**
+ * struct mgmt_extra_ie_params - Extra management frame IE parameters
+ *
+ * Used to add extra IE(s) into management frames. If the driver cannot add the
+ * requested data into all management frames of the specified subtype that are
+ * generated in kernel or firmware/hardware, it must reject the configuration
+ * call. The IE data buffer is added to the end of the specified management
+ * frame body after all other IEs. This addition is not applied to frames that
+ * are injected through a monitor interface.
+ *
+ * @subtype: Management frame subtype
+ * @ies: IE data buffer or %NULL to remove previous data
+ * @ies_len: Length of @ies in octets
+ */
+struct mgmt_extra_ie_params {
+	u8 subtype;
+	u8 *ies;
+	int ies_len;
+};
+
 /* from net/wireless.h */
 struct wiphy;
 
@@ -501,6 +521,8 @@ struct ieee80211_channel;
  * @set_txq_params: Set TX queue parameters
  *
  * @set_channel: Set channel
+ *
+ * @set_mgmt_extra_ie: Set extra IE data for management frames
  */
 struct cfg80211_ops {
 	int	(*add_virtual_intf)(struct wiphy *wiphy, char *name,
@@ -571,6 +593,10 @@ struct cfg80211_ops {
 	int	(*set_channel)(struct wiphy *wiphy,
 			       struct ieee80211_channel *chan,
 			       enum nl80211_channel_type channel_type);
+
+	int	(*set_mgmt_extra_ie)(struct wiphy *wiphy,
+				     struct net_device *dev,
+				     struct mgmt_extra_ie_params *params);
 };
 
 /* temporary wext handlers */
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 72c106915433..d1ac3ab2c515 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1175,6 +1175,87 @@ static int ieee80211_set_channel(struct wiphy *wiphy,
 	return ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL);
 }
 
+static int set_mgmt_extra_ie_sta(struct ieee80211_if_sta *ifsta, u8 subtype,
+				 u8 *ies, size_t ies_len)
+{
+	switch (subtype) {
+	case IEEE80211_STYPE_PROBE_REQ >> 4:
+		kfree(ifsta->ie_probereq);
+		ifsta->ie_probereq = ies;
+		ifsta->ie_probereq_len = ies_len;
+		return 0;
+	case IEEE80211_STYPE_PROBE_RESP >> 4:
+		kfree(ifsta->ie_proberesp);
+		ifsta->ie_proberesp = ies;
+		ifsta->ie_proberesp_len = ies_len;
+		return 0;
+	case IEEE80211_STYPE_AUTH >> 4:
+		kfree(ifsta->ie_auth);
+		ifsta->ie_auth = ies;
+		ifsta->ie_auth_len = ies_len;
+		return 0;
+	case IEEE80211_STYPE_ASSOC_REQ >> 4:
+		kfree(ifsta->ie_assocreq);
+		ifsta->ie_assocreq = ies;
+		ifsta->ie_assocreq_len = ies_len;
+		return 0;
+	case IEEE80211_STYPE_REASSOC_REQ >> 4:
+		kfree(ifsta->ie_reassocreq);
+		ifsta->ie_reassocreq = ies;
+		ifsta->ie_reassocreq_len = ies_len;
+		return 0;
+	case IEEE80211_STYPE_DEAUTH >> 4:
+		kfree(ifsta->ie_deauth);
+		ifsta->ie_deauth = ies;
+		ifsta->ie_deauth_len = ies_len;
+		return 0;
+	case IEEE80211_STYPE_DISASSOC >> 4:
+		kfree(ifsta->ie_disassoc);
+		ifsta->ie_disassoc = ies;
+		ifsta->ie_disassoc_len = ies_len;
+		return 0;
+	}
+
+	return -EOPNOTSUPP;
+}
+
+static int ieee80211_set_mgmt_extra_ie(struct wiphy *wiphy,
+				       struct net_device *dev,
+				       struct mgmt_extra_ie_params *params)
+{
+	struct ieee80211_sub_if_data *sdata;
+	u8 *ies;
+	size_t ies_len;
+	int ret = -EOPNOTSUPP;
+
+	if (params->ies) {
+		ies = kmemdup(params->ies, params->ies_len, GFP_KERNEL);
+		if (ies == NULL)
+			return -ENOMEM;
+		ies_len = params->ies_len;
+	} else {
+		ies = NULL;
+		ies_len = 0;
+	}
+
+	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+
+	switch (sdata->vif.type) {
+	case NL80211_IFTYPE_STATION:
+	case NL80211_IFTYPE_ADHOC:
+		ret = set_mgmt_extra_ie_sta(&sdata->u.sta, params->subtype,
+					    ies, ies_len);
+		break;
+	default:
+		ret = -EOPNOTSUPP;
+		break;
+	}
+
+	if (ret)
+		kfree(ies);
+	return ret;
+}
+
 struct cfg80211_ops mac80211_config_ops = {
 	.add_virtual_intf = ieee80211_add_iface,
 	.del_virtual_intf = ieee80211_del_iface,
@@ -1204,4 +1285,5 @@ struct cfg80211_ops mac80211_config_ops = {
 	.change_bss = ieee80211_change_bss,
 	.set_txq_params = ieee80211_set_txq_params,
 	.set_channel = ieee80211_set_channel,
+	.set_mgmt_extra_ie = ieee80211_set_mgmt_extra_ie,
 };
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index c9ffadb55d36..5eafd3affe27 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -331,6 +331,22 @@ struct ieee80211_if_sta {
 	u32 supp_rates_bits[IEEE80211_NUM_BANDS];
 
 	int wmm_last_param_set;
+
+	/* Extra IE data for management frames */
+	u8 *ie_probereq;
+	size_t ie_probereq_len;
+	u8 *ie_proberesp;
+	size_t ie_proberesp_len;
+	u8 *ie_auth;
+	size_t ie_auth_len;
+	u8 *ie_assocreq;
+	size_t ie_assocreq_len;
+	u8 *ie_reassocreq;
+	size_t ie_reassocreq_len;
+	u8 *ie_deauth;
+	size_t ie_deauth_len;
+	u8 *ie_disassoc;
+	size_t ie_disassoc_len;
 };
 
 struct ieee80211_if_mesh {
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 5d5a029228be..8dc2c2188d92 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -632,6 +632,13 @@ static void ieee80211_teardown_sdata(struct net_device *dev)
 		kfree(sdata->u.sta.assocreq_ies);
 		kfree(sdata->u.sta.assocresp_ies);
 		kfree_skb(sdata->u.sta.probe_resp);
+		kfree(sdata->u.sta.ie_probereq);
+		kfree(sdata->u.sta.ie_proberesp);
+		kfree(sdata->u.sta.ie_auth);
+		kfree(sdata->u.sta.ie_assocreq);
+		kfree(sdata->u.sta.ie_reassocreq);
+		kfree(sdata->u.sta.ie_deauth);
+		kfree(sdata->u.sta.ie_disassoc);
 		break;
 	case NL80211_IFTYPE_WDS:
 	case NL80211_IFTYPE_AP_VLAN:
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index f0d42498c257..43da6227b37c 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -131,6 +131,12 @@ u64 ieee80211_sta_get_rates(struct ieee80211_local *local,
 
 /* frame sending functions */
 
+static void add_extra_ies(struct sk_buff *skb, u8 *ies, size_t ies_len)
+{
+	if (ies)
+		memcpy(skb_put(skb, ies_len), ies, ies_len);
+}
+
 /* also used by scanning code */
 void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst,
 			      u8 *ssid, size_t ssid_len)
@@ -142,7 +148,8 @@ void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst,
 	u8 *pos, *supp_rates, *esupp_rates = NULL;
 	int i;
 
-	skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*mgmt) + 200);
+	skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*mgmt) + 200 +
+			    sdata->u.sta.ie_probereq_len);
 	if (!skb) {
 		printk(KERN_DEBUG "%s: failed to allocate buffer for probe "
 		       "request\n", sdata->dev->name);
@@ -189,6 +196,9 @@ void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst,
 		*pos = rate->bitrate / 5;
 	}
 
+	add_extra_ies(skb, sdata->u.sta.ie_probereq,
+		      sdata->u.sta.ie_probereq_len);
+
 	ieee80211_tx_skb(sdata, skb, 0);
 }
 
@@ -202,7 +212,8 @@ static void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata,
 	struct ieee80211_mgmt *mgmt;
 
 	skb = dev_alloc_skb(local->hw.extra_tx_headroom +
-			    sizeof(*mgmt) + 6 + extra_len);
+			    sizeof(*mgmt) + 6 + extra_len +
+			    sdata->u.sta.ie_auth_len);
 	if (!skb) {
 		printk(KERN_DEBUG "%s: failed to allocate buffer for auth "
 		       "frame\n", sdata->dev->name);
@@ -225,6 +236,7 @@ static void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata,
 	mgmt->u.auth.status_code = cpu_to_le16(0);
 	if (extra)
 		memcpy(skb_put(skb, extra_len), extra, extra_len);
+	add_extra_ies(skb, sdata->u.sta.ie_auth, sdata->u.sta.ie_auth_len);
 
 	ieee80211_tx_skb(sdata, skb, encrypt);
 }
@@ -235,17 +247,26 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata,
 	struct ieee80211_local *local = sdata->local;
 	struct sk_buff *skb;
 	struct ieee80211_mgmt *mgmt;
-	u8 *pos, *ies, *ht_ie;
+	u8 *pos, *ies, *ht_ie, *e_ies;
 	int i, len, count, rates_len, supp_rates_len;
 	u16 capab;
 	struct ieee80211_bss *bss;
 	int wmm = 0;
 	struct ieee80211_supported_band *sband;
 	u64 rates = 0;
+	size_t e_ies_len;
+
+	if (ifsta->flags & IEEE80211_STA_PREV_BSSID_SET) {
+		e_ies = sdata->u.sta.ie_reassocreq;
+		e_ies_len = sdata->u.sta.ie_reassocreq_len;
+	} else {
+		e_ies = sdata->u.sta.ie_assocreq;
+		e_ies_len = sdata->u.sta.ie_assocreq_len;
+	}
 
 	skb = dev_alloc_skb(local->hw.extra_tx_headroom +
 			    sizeof(*mgmt) + 200 + ifsta->extra_ie_len +
-			    ifsta->ssid_len);
+			    ifsta->ssid_len + e_ies_len);
 	if (!skb) {
 		printk(KERN_DEBUG "%s: failed to allocate buffer for assoc "
 		       "frame\n", sdata->dev->name);
@@ -436,6 +457,8 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata,
 		memcpy(pos, &sband->ht_cap.mcs, sizeof(sband->ht_cap.mcs));
 	}
 
+	add_extra_ies(skb, e_ies, e_ies_len);
+
 	kfree(ifsta->assocreq_ies);
 	ifsta->assocreq_ies_len = (skb->data + skb->len) - ies;
 	ifsta->assocreq_ies = kmalloc(ifsta->assocreq_ies_len, GFP_KERNEL);
@@ -453,8 +476,19 @@ static void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata,
 	struct ieee80211_if_sta *ifsta = &sdata->u.sta;
 	struct sk_buff *skb;
 	struct ieee80211_mgmt *mgmt;
+	u8 *ies;
+	size_t ies_len;
 
-	skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*mgmt));
+	if (stype == IEEE80211_STYPE_DEAUTH) {
+		ies = sdata->u.sta.ie_deauth;
+		ies_len = sdata->u.sta.ie_deauth_len;
+	} else {
+		ies = sdata->u.sta.ie_disassoc;
+		ies_len = sdata->u.sta.ie_disassoc_len;
+	}
+
+	skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*mgmt) +
+			    ies_len);
 	if (!skb) {
 		printk(KERN_DEBUG "%s: failed to allocate buffer for "
 		       "deauth/disassoc frame\n", sdata->dev->name);
@@ -472,6 +506,8 @@ static void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata,
 	/* u.deauth.reason_code == u.disassoc.reason_code */
 	mgmt->u.deauth.reason_code = cpu_to_le16(reason);
 
+	add_extra_ies(skb, ies, ies_len);
+
 	ieee80211_tx_skb(sdata, skb, ifsta->flags & IEEE80211_STA_MFP_ENABLED);
 }
 
@@ -1473,7 +1509,8 @@ static int ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
 	struct ieee80211_supported_band *sband;
 	union iwreq_data wrqu;
 
-	skb = dev_alloc_skb(local->hw.extra_tx_headroom + 400);
+	skb = dev_alloc_skb(local->hw.extra_tx_headroom + 400 +
+			    sdata->u.sta.ie_proberesp_len);
 	if (!skb) {
 		printk(KERN_DEBUG "%s: failed to allocate buffer for probe "
 		       "response\n", sdata->dev->name);
@@ -1556,6 +1593,9 @@ static int ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
 		memcpy(pos, &bss->supp_rates[8], rates);
 	}
 
+	add_extra_ies(skb, sdata->u.sta.ie_proberesp,
+		      sdata->u.sta.ie_proberesp_len);
+
 	ifsta->probe_resp = skb;
 
 	ieee80211_if_config(sdata, IEEE80211_IFCC_BEACON);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 123d3b160fad..09a5d0f1d6dc 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -105,6 +105,10 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = {
 
 	[NL80211_ATTR_HT_CAPABILITY] = { .type = NLA_BINARY,
 					 .len = NL80211_HT_CAPABILITY_LEN },
+
+	[NL80211_ATTR_MGMT_SUBTYPE] = { .type = NLA_U8 },
+	[NL80211_ATTR_IE] = { .type = NLA_BINARY,
+			      .len = IEEE80211_MAX_DATA_LEN },
 };
 
 /* message building helper */
@@ -2149,6 +2153,43 @@ static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info)
 	return -EINVAL;
 }
 
+static int nl80211_set_mgmt_extra_ie(struct sk_buff *skb,
+				     struct genl_info *info)
+{
+	struct cfg80211_registered_device *drv;
+	int err;
+	struct net_device *dev;
+	struct mgmt_extra_ie_params params;
+
+	memset(&params, 0, sizeof(params));
+
+	if (!info->attrs[NL80211_ATTR_MGMT_SUBTYPE])
+		return -EINVAL;
+	params.subtype = nla_get_u8(info->attrs[NL80211_ATTR_MGMT_SUBTYPE]);
+	if (params.subtype > 15)
+		return -EINVAL; /* FC Subtype field is 4 bits (0..15) */
+
+	if (info->attrs[NL80211_ATTR_IE]) {
+		params.ies = nla_data(info->attrs[NL80211_ATTR_IE]);
+		params.ies_len = nla_len(info->attrs[NL80211_ATTR_IE]);
+	}
+
+	err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev);
+	if (err)
+		return err;
+
+	if (drv->ops->set_mgmt_extra_ie) {
+		rtnl_lock();
+		err = drv->ops->set_mgmt_extra_ie(&drv->wiphy, dev, &params);
+		rtnl_unlock();
+	} else
+		err = -EOPNOTSUPP;
+
+	cfg80211_put_dev(drv);
+	dev_put(dev);
+	return err;
+}
+
 static struct genl_ops nl80211_ops[] = {
 	{
 		.cmd = NL80211_CMD_GET_WIPHY,
@@ -2310,6 +2351,12 @@ static struct genl_ops nl80211_ops[] = {
 		.policy = nl80211_policy,
 		.flags = GENL_ADMIN_PERM,
 	},
+	{
+		.cmd = NL80211_CMD_SET_MGMT_EXTRA_IE,
+		.doit = nl80211_set_mgmt_extra_ie,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
 };
 
 /* multicast groups */
-- 
cgit v1.2.3


From f797eb7e2903571e9c0e7e5d64113f51209f8dc4 Mon Sep 17 00:00:00 2001
From: Jouni Malinen <jouni.malinen@atheros.com>
Date: Mon, 19 Jan 2009 18:48:46 +0200
Subject: mac80211: Fix MFP Association Comeback to use Timeout Interval IE

The separate Association Comeback Time IE was removed from IEEE 802.11w
and the Timeout Interval IE (from IEEE 802.11r) is used instead. The
editing on this is still somewhat incomplete in IEEE 802.11w/D7.0, but
still, the use of Timeout Interval IE is the expected mechanism.

Signed-off-by: Jouni Malinen <jouni.malinen@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/ieee80211.h  | 8 +++++++-
 net/mac80211/ieee80211_i.h | 4 ++--
 net/mac80211/mlme.c        | 5 +++--
 net/mac80211/util.c        | 6 +++---
 4 files changed, 15 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 7800e20f197f..b1bb817d1427 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1036,8 +1036,8 @@ enum ieee80211_eid {
 	WLAN_EID_HT_INFORMATION = 61,
 	/* 802.11i */
 	WLAN_EID_RSN = 48,
+	WLAN_EID_TIMEOUT_INTERVAL = 56,
 	WLAN_EID_MMIE = 76 /* 802.11w */,
-	WLAN_EID_ASSOC_COMEBACK_TIME = 77,
 	WLAN_EID_WPA = 221,
 	WLAN_EID_GENERIC = 221,
 	WLAN_EID_VENDOR_SPECIFIC = 221,
@@ -1126,6 +1126,12 @@ struct ieee80211_country_ie_triplet {
 	};
 } __attribute__ ((packed));
 
+enum ieee80211_timeout_interval_type {
+	WLAN_TIMEOUT_REASSOC_DEADLINE = 1 /* 802.11r */,
+	WLAN_TIMEOUT_KEY_LIFETIME = 2 /* 802.11r */,
+	WLAN_TIMEOUT_ASSOC_COMEBACK = 3 /* 802.11w */,
+};
+
 /* BACK action code */
 enum ieee80211_back_actioncode {
 	WLAN_ACTION_ADDBA_REQ = 0,
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index faa2476a2451..a8c72742a8b1 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -837,7 +837,7 @@ struct ieee802_11_elems {
 	u8 *country_elem;
 	u8 *pwr_constr_elem;
 	u8 *quiet_elem; 	/* first quite element */
-	u8 *assoc_comeback;
+	u8 *timeout_int;
 
 	/* length of them, respectively */
 	u8 ssid_len;
@@ -865,7 +865,7 @@ struct ieee802_11_elems {
 	u8 pwr_constr_elem_len;
 	u8 quiet_elem_len;
 	u8 num_of_quiet_elem;	/* can be more the one */
-	u8 assoc_comeback_len;
+	u8 timeout_int_len;
 };
 
 static inline struct ieee80211_local *hw_to_local(
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 43da6227b37c..b9e4b93089c4 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1317,9 +1317,10 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
 	ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), &elems);
 
 	if (status_code == WLAN_STATUS_ASSOC_REJECTED_TEMPORARILY &&
-	    elems.assoc_comeback && elems.assoc_comeback_len == 4) {
+	    elems.timeout_int && elems.timeout_int_len == 5 &&
+	    elems.timeout_int[0] == WLAN_TIMEOUT_ASSOC_COMEBACK) {
 		u32 tu, ms;
-		tu = get_unaligned_le32(elems.assoc_comeback);
+		tu = get_unaligned_le32(elems.timeout_int + 1);
 		ms = tu * 1024 / 1000;
 		printk(KERN_DEBUG "%s: AP rejected association temporarily; "
 		       "comeback duration %u TU (%u ms)\n",
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 963e0473205c..3f559e3d0a7c 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -653,9 +653,9 @@ void ieee802_11_parse_elems(u8 *start, size_t len,
 			elems->pwr_constr_elem = pos;
 			elems->pwr_constr_elem_len = elen;
 			break;
-		case WLAN_EID_ASSOC_COMEBACK_TIME:
-			elems->assoc_comeback = pos;
-			elems->assoc_comeback_len = elen;
+		case WLAN_EID_TIMEOUT_INTERVAL:
+			elems->timeout_int = pos;
+			elems->timeout_int_len = elen;
 			break;
 		default:
 			break;
-- 
cgit v1.2.3


From f677d7702d48b7b3dfcce3b2c0db601dbee0aa24 Mon Sep 17 00:00:00 2001
From: Tulio Magno Quites Machado Filho <tuliom@gmail.com>
Date: Sun, 25 Jan 2009 23:54:25 +0100
Subject: ath5k: support LED's on emachines E510 notebook

Add vendor ID for AMBIT and use it to set the ath5k LED gpio.

base.c:
Changes-licensed-under: 3-Clause-BSD

Signed-off-by: Tulio Magno Quites Machado Filho <tuliom@gmail.com>
Acked-by: Bob Copeland <me@bobcopeland.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/ath5k/base.c | 8 ++++++--
 include/linux/pci_ids.h           | 2 ++
 2 files changed, 8 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/wireless/ath5k/base.c b/drivers/net/wireless/ath5k/base.c
index b3f41acb9065..368db944f11f 100644
--- a/drivers/net/wireless/ath5k/base.c
+++ b/drivers/net/wireless/ath5k/base.c
@@ -2626,8 +2626,12 @@ ath5k_init_leds(struct ath5k_softc *sc)
 		sc->led_pin = 1;
 		sc->led_on = 1;  /* active high */
 	}
-	/* Pin 3 on Foxconn chips used in Acer Aspire One (0x105b:e008) */
-	if (pdev->subsystem_vendor == PCI_VENDOR_ID_FOXCONN) {
+	/*
+	 * Pin 3 on Foxconn chips used in Acer Aspire One (0x105b:e008) and
+	 * in emachines notebooks with AMBIT subsystem.
+	 */
+	if (pdev->subsystem_vendor == PCI_VENDOR_ID_FOXCONN ||
+	    pdev->subsystem_vendor == PCI_VENDOR_ID_AMBIT) {
 		__set_bit(ATH_STAT_LEDSOFT, sc->status);
 		sc->led_pin = 3;
 		sc->led_on = 0;  /* active low */
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 5b7a48c1d616..b7697c934a49 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -1967,6 +1967,8 @@
 
 #define PCI_VENDOR_ID_SAMSUNG		0x144d
 
+#define PCI_VENDOR_ID_AMBIT		0x1468
+
 #define PCI_VENDOR_ID_MYRICOM		0x14c1
 
 #define PCI_VENDOR_ID_TITAN		0x14D2
-- 
cgit v1.2.3


From 5d0d9be8ef456afc6c3fb5f8aad06ef19b704b05 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 29 Jan 2009 14:19:48 +0000
Subject: gro: Move common completion code into helpers

Currently VLAN still has a bit of common code handling the aftermath
of GRO that's shared with the common path.  This patch moves them
into shared helpers to reduce code duplication.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  3 ++
 net/8021q/vlan_core.c     | 39 +++---------------------
 net/core/dev.c            | 76 ++++++++++++++++++++++++++++++++---------------
 3 files changed, 59 insertions(+), 59 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index dd8a35b3e8b2..20419508eec1 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1375,12 +1375,15 @@ extern int		netif_receive_skb(struct sk_buff *skb);
 extern void		napi_gro_flush(struct napi_struct *napi);
 extern int		dev_gro_receive(struct napi_struct *napi,
 					struct sk_buff *skb);
+extern int		napi_skb_finish(int ret, struct sk_buff *skb);
 extern int		napi_gro_receive(struct napi_struct *napi,
 					 struct sk_buff *skb);
 extern void		napi_reuse_skb(struct napi_struct *napi,
 				       struct sk_buff *skb);
 extern struct sk_buff *	napi_fraginfo_skb(struct napi_struct *napi,
 					  struct napi_gro_fraginfo *info);
+extern int		napi_frags_finish(struct napi_struct *napi,
+					  struct sk_buff *skb, int ret);
 extern int		napi_gro_frags(struct napi_struct *napi,
 				       struct napi_gro_fraginfo *info);
 extern void		netif_nit_deliver(struct sk_buff *skb);
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index e9db889d6222..2eb057a74654 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -98,22 +98,7 @@ drop:
 int vlan_gro_receive(struct napi_struct *napi, struct vlan_group *grp,
 		     unsigned int vlan_tci, struct sk_buff *skb)
 {
-	int err = NET_RX_SUCCESS;
-
-	switch (vlan_gro_common(napi, grp, vlan_tci, skb)) {
-	case -1:
-		return netif_receive_skb(skb);
-
-	case 2:
-		err = NET_RX_DROP;
-		/* fall through */
-
-	case 1:
-		kfree_skb(skb);
-		break;
-	}
-
-	return err;
+	return napi_skb_finish(vlan_gro_common(napi, grp, vlan_tci, skb), skb);
 }
 EXPORT_SYMBOL(vlan_gro_receive);
 
@@ -121,27 +106,11 @@ int vlan_gro_frags(struct napi_struct *napi, struct vlan_group *grp,
 		   unsigned int vlan_tci, struct napi_gro_fraginfo *info)
 {
 	struct sk_buff *skb = napi_fraginfo_skb(napi, info);
-	int err = NET_RX_DROP;
 
 	if (!skb)
-		goto out;
-
-	err = NET_RX_SUCCESS;
-
-	switch (vlan_gro_common(napi, grp, vlan_tci, skb)) {
-	case -1:
-		return netif_receive_skb(skb);
-
-	case 2:
-		err = NET_RX_DROP;
-		/* fall through */
-
-	case 1:
-		napi_reuse_skb(napi, skb);
-		break;
-	}
+		return NET_RX_DROP;
 
-out:
-	return err;
+	return napi_frags_finish(napi, skb,
+				 vlan_gro_common(napi, grp, vlan_tci, skb));
 }
 EXPORT_SYMBOL(vlan_gro_frags);
diff --git a/net/core/dev.c b/net/core/dev.c
index e61b95c11fc0..cd23ae15a1d5 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -135,6 +135,14 @@
 /* This should be increased if a protocol with a bigger head is added. */
 #define GRO_MAX_HEAD (MAX_HEADER + 128)
 
+enum {
+	GRO_MERGED,
+	GRO_MERGED_FREE,
+	GRO_HELD,
+	GRO_NORMAL,
+	GRO_DROP,
+};
+
 /*
  *	The list of packet types we will receive (as opposed to discard)
  *	and the routines to invoke.
@@ -2369,7 +2377,7 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 	int count = 0;
 	int same_flow;
 	int mac_len;
-	int free;
+	int ret;
 
 	if (!(skb->dev->features & NETIF_F_GRO))
 		goto normal;
@@ -2412,7 +2420,7 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 		goto normal;
 
 	same_flow = NAPI_GRO_CB(skb)->same_flow;
-	free = NAPI_GRO_CB(skb)->free;
+	ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED;
 
 	if (pp) {
 		struct sk_buff *nskb = *pp;
@@ -2435,12 +2443,13 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 	skb_shinfo(skb)->gso_size = skb->len;
 	skb->next = napi->gro_list;
 	napi->gro_list = skb;
+	ret = GRO_HELD;
 
 ok:
-	return free;
+	return ret;
 
 normal:
-	return -1;
+	return GRO_NORMAL;
 }
 EXPORT_SYMBOL(dev_gro_receive);
 
@@ -2456,18 +2465,30 @@ static int __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 	return dev_gro_receive(napi, skb);
 }
 
-int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
+int napi_skb_finish(int ret, struct sk_buff *skb)
 {
-	switch (__napi_gro_receive(napi, skb)) {
-	case -1:
+	int err = NET_RX_SUCCESS;
+
+	switch (ret) {
+	case GRO_NORMAL:
 		return netif_receive_skb(skb);
 
-	case 1:
+	case GRO_DROP:
+		err = NET_RX_DROP;
+		/* fall through */
+
+	case GRO_MERGED_FREE:
 		kfree_skb(skb);
 		break;
 	}
 
-	return NET_RX_SUCCESS;
+	return err;
+}
+EXPORT_SYMBOL(napi_skb_finish);
+
+int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
+{
+	return napi_skb_finish(__napi_gro_receive(napi, skb), skb);
 }
 EXPORT_SYMBOL(napi_gro_receive);
 
@@ -2520,29 +2541,36 @@ out:
 }
 EXPORT_SYMBOL(napi_fraginfo_skb);
 
-int napi_gro_frags(struct napi_struct *napi, struct napi_gro_fraginfo *info)
+int napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, int ret)
 {
-	struct sk_buff *skb = napi_fraginfo_skb(napi, info);
-	int err = NET_RX_DROP;
-
-	if (!skb)
-		goto out;
+	int err = NET_RX_SUCCESS;
 
-	err = NET_RX_SUCCESS;
-
-	switch (__napi_gro_receive(napi, skb)) {
-	case -1:
+	switch (ret) {
+	case GRO_NORMAL:
 		return netif_receive_skb(skb);
 
-	case 0:
-		goto out;
-	}
+	case GRO_DROP:
+		err = NET_RX_DROP;
+		/* fall through */
 
-	napi_reuse_skb(napi, skb);
+	case GRO_MERGED_FREE:
+		napi_reuse_skb(napi, skb);
+		break;
+	}
 
-out:
 	return err;
 }
+EXPORT_SYMBOL(napi_frags_finish);
+
+int napi_gro_frags(struct napi_struct *napi, struct napi_gro_fraginfo *info)
+{
+	struct sk_buff *skb = napi_fraginfo_skb(napi, info);
+
+	if (!skb)
+		return NET_RX_DROP;
+
+	return napi_frags_finish(napi, skb, __napi_gro_receive(napi, skb));
+}
 EXPORT_SYMBOL(napi_gro_frags);
 
 static int process_backlog(struct napi_struct *napi, int quota)
-- 
cgit v1.2.3


From 86911732d3996a9da07914b280621450111bb6da Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 29 Jan 2009 14:19:50 +0000
Subject: gro: Avoid copying headers of unmerged packets

Unfortunately simplicity isn't always the best.  The fraginfo
interface turned out to be suboptimal.  The problem was quite
obvious.  For every packet, we have to copy the headers from
the frags structure into skb->head, even though for 99% of the
packets this part is immediately thrown away after the merge.

LRO didn't have this problem because it directly read the headers
from the frags structure.

This patch attempts to address this by creating an interface
that allows GRO to access the headers in the first frag without
having to copy it.  Because all drivers that use frags place the
headers in the first frag this optimisation should be enough.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 26 ++++++++++++++++++
 include/linux/skbuff.h    |  2 --
 net/8021q/vlan_core.c     |  2 ++
 net/core/dev.c            | 70 +++++++++++++++++++++++++++++++++++++++--------
 net/core/skbuff.c         | 23 ++++++++++------
 net/ipv4/af_inet.c        | 10 +++----
 net/ipv4/tcp.c            | 16 +++++------
 net/ipv4/tcp_ipv4.c       |  2 +-
 net/ipv6/af_inet6.c       | 30 +++++++++++++-------
 net/ipv6/tcp_ipv6.c       |  2 +-
 10 files changed, 137 insertions(+), 46 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 20419508eec1..7a5057fbb7cd 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -984,6 +984,9 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
 void netif_napi_del(struct napi_struct *napi);
 
 struct napi_gro_cb {
+	/* This indicates where we are processing relative to skb->data. */
+	int data_offset;
+
 	/* This is non-zero if the packet may be of the same flow. */
 	int same_flow;
 
@@ -1087,6 +1090,29 @@ extern int		dev_restart(struct net_device *dev);
 #ifdef CONFIG_NETPOLL_TRAP
 extern int		netpoll_trap(void);
 #endif
+extern void	      *skb_gro_header(struct sk_buff *skb, unsigned int hlen);
+extern int	       skb_gro_receive(struct sk_buff **head,
+				       struct sk_buff *skb);
+
+static inline unsigned int skb_gro_offset(const struct sk_buff *skb)
+{
+	return NAPI_GRO_CB(skb)->data_offset;
+}
+
+static inline unsigned int skb_gro_len(const struct sk_buff *skb)
+{
+	return skb->len - NAPI_GRO_CB(skb)->data_offset;
+}
+
+static inline void skb_gro_pull(struct sk_buff *skb, unsigned int len)
+{
+	NAPI_GRO_CB(skb)->data_offset += len;
+}
+
+static inline void skb_gro_reset_offset(struct sk_buff *skb)
+{
+	NAPI_GRO_CB(skb)->data_offset = 0;
+}
 
 static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev,
 				  unsigned short type,
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index a2c2378a9c58..08670d017479 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1687,8 +1687,6 @@ extern int	       skb_shift(struct sk_buff *tgt, struct sk_buff *skb,
 				 int shiftlen);
 
 extern struct sk_buff *skb_segment(struct sk_buff *skb, int features);
-extern int	       skb_gro_receive(struct sk_buff **head,
-				       struct sk_buff *skb);
 
 static inline void *skb_header_pointer(const struct sk_buff *skb, int offset,
 				       int len, void *buffer)
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 2eb057a74654..378fa69d625a 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -98,6 +98,8 @@ drop:
 int vlan_gro_receive(struct napi_struct *napi, struct vlan_group *grp,
 		     unsigned int vlan_tci, struct sk_buff *skb)
 {
+	skb_gro_reset_offset(skb);
+
 	return napi_skb_finish(vlan_gro_common(napi, grp, vlan_tci, skb), skb);
 }
 EXPORT_SYMBOL(vlan_gro_receive);
diff --git a/net/core/dev.c b/net/core/dev.c
index cd23ae15a1d5..df406dcf7482 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -215,6 +215,13 @@ static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
 	return &net->dev_index_head[ifindex & ((1 << NETDEV_HASHBITS) - 1)];
 }
 
+static inline void *skb_gro_mac_header(struct sk_buff *skb)
+{
+	return skb_headlen(skb) ? skb_mac_header(skb) :
+	       page_address(skb_shinfo(skb)->frags[0].page) +
+	       skb_shinfo(skb)->frags[0].page_offset;
+}
+
 /* Device list insertion */
 static int list_netdevice(struct net_device *dev)
 {
@@ -2350,7 +2357,6 @@ static int napi_gro_complete(struct sk_buff *skb)
 
 out:
 	skb_shinfo(skb)->gso_size = 0;
-	__skb_push(skb, -skb_network_offset(skb));
 	return netif_receive_skb(skb);
 }
 
@@ -2368,6 +2374,25 @@ void napi_gro_flush(struct napi_struct *napi)
 }
 EXPORT_SYMBOL(napi_gro_flush);
 
+void *skb_gro_header(struct sk_buff *skb, unsigned int hlen)
+{
+	unsigned int offset = skb_gro_offset(skb);
+
+	hlen += offset;
+	if (hlen <= skb_headlen(skb))
+		return skb->data + offset;
+
+	if (unlikely(!skb_shinfo(skb)->nr_frags ||
+		     skb_shinfo(skb)->frags[0].size <=
+		     hlen - skb_headlen(skb) ||
+		     PageHighMem(skb_shinfo(skb)->frags[0].page)))
+		return pskb_may_pull(skb, hlen) ? skb->data + offset : NULL;
+
+	return page_address(skb_shinfo(skb)->frags[0].page) +
+	       skb_shinfo(skb)->frags[0].page_offset + offset;
+}
+EXPORT_SYMBOL(skb_gro_header);
+
 int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 {
 	struct sk_buff **pp = NULL;
@@ -2388,11 +2413,13 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 	rcu_read_lock();
 	list_for_each_entry_rcu(ptype, head, list) {
 		struct sk_buff *p;
+		void *mac;
 
 		if (ptype->type != type || ptype->dev || !ptype->gro_receive)
 			continue;
 
-		skb_reset_network_header(skb);
+		skb_set_network_header(skb, skb_gro_offset(skb));
+		mac = skb_gro_mac_header(skb);
 		mac_len = skb->network_header - skb->mac_header;
 		skb->mac_len = mac_len;
 		NAPI_GRO_CB(skb)->same_flow = 0;
@@ -2406,8 +2433,7 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 				continue;
 
 			if (p->mac_len != mac_len ||
-			    memcmp(skb_mac_header(p), skb_mac_header(skb),
-				   mac_len))
+			    memcmp(skb_mac_header(p), mac, mac_len))
 				NAPI_GRO_CB(p)->same_flow = 0;
 		}
 
@@ -2434,13 +2460,11 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 	if (same_flow)
 		goto ok;
 
-	if (NAPI_GRO_CB(skb)->flush || count >= MAX_GRO_SKBS) {
-		__skb_push(skb, -skb_network_offset(skb));
+	if (NAPI_GRO_CB(skb)->flush || count >= MAX_GRO_SKBS)
 		goto normal;
-	}
 
 	NAPI_GRO_CB(skb)->count = 1;
-	skb_shinfo(skb)->gso_size = skb->len;
+	skb_shinfo(skb)->gso_size = skb_gro_len(skb);
 	skb->next = napi->gro_list;
 	napi->gro_list = skb;
 	ret = GRO_HELD;
@@ -2488,6 +2512,8 @@ EXPORT_SYMBOL(napi_skb_finish);
 
 int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 {
+	skb_gro_reset_offset(skb);
+
 	return napi_skb_finish(__napi_gro_receive(napi, skb), skb);
 }
 EXPORT_SYMBOL(napi_gro_receive);
@@ -2506,6 +2532,7 @@ struct sk_buff *napi_fraginfo_skb(struct napi_struct *napi,
 {
 	struct net_device *dev = napi->dev;
 	struct sk_buff *skb = napi->skb;
+	struct ethhdr *eth;
 
 	napi->skb = NULL;
 
@@ -2525,13 +2552,23 @@ struct sk_buff *napi_fraginfo_skb(struct napi_struct *napi,
 	skb->len += info->len;
 	skb->truesize += info->len;
 
-	if (!pskb_may_pull(skb, ETH_HLEN)) {
+	skb_reset_mac_header(skb);
+	skb_gro_reset_offset(skb);
+
+	eth = skb_gro_header(skb, sizeof(*eth));
+	if (!eth) {
 		napi_reuse_skb(napi, skb);
 		skb = NULL;
 		goto out;
 	}
 
-	skb->protocol = eth_type_trans(skb, dev);
+	skb_gro_pull(skb, sizeof(*eth));
+
+	/*
+	 * This works because the only protocols we care about don't require
+	 * special handling.  We'll fix it up properly at the end.
+	 */
+	skb->protocol = eth->h_proto;
 
 	skb->ip_summed = info->ip_summed;
 	skb->csum = info->csum;
@@ -2544,10 +2581,21 @@ EXPORT_SYMBOL(napi_fraginfo_skb);
 int napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, int ret)
 {
 	int err = NET_RX_SUCCESS;
+	int may;
 
 	switch (ret) {
 	case GRO_NORMAL:
-		return netif_receive_skb(skb);
+	case GRO_HELD:
+		may = pskb_may_pull(skb, skb_gro_offset(skb));
+		BUG_ON(!may);
+
+		skb->protocol = eth_type_trans(skb, napi->dev);
+
+		if (ret == GRO_NORMAL)
+			return netif_receive_skb(skb);
+
+		skb_gro_pull(skb, -ETH_HLEN);
+		break;
 
 	case GRO_DROP:
 		err = NET_RX_DROP;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 2e5f2ca3bdcd..f9f4065a7e9b 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2584,17 +2584,21 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
 	struct sk_buff *p = *head;
 	struct sk_buff *nskb;
 	unsigned int headroom;
-	unsigned int hlen = p->data - skb_mac_header(p);
-	unsigned int len = skb->len;
+	unsigned int len = skb_gro_len(skb);
 
-	if (hlen + p->len + len >= 65536)
+	if (p->len + len >= 65536)
 		return -E2BIG;
 
 	if (skb_shinfo(p)->frag_list)
 		goto merge;
-	else if (!skb_headlen(p) && !skb_headlen(skb) &&
-		 skb_shinfo(p)->nr_frags + skb_shinfo(skb)->nr_frags <
+	else if (skb_headlen(skb) <= skb_gro_offset(skb) &&
+		 skb_shinfo(p)->nr_frags + skb_shinfo(skb)->nr_frags <=
 		 MAX_SKB_FRAGS) {
+		skb_shinfo(skb)->frags[0].page_offset +=
+			skb_gro_offset(skb) - skb_headlen(skb);
+		skb_shinfo(skb)->frags[0].size -=
+			skb_gro_offset(skb) - skb_headlen(skb);
+
 		memcpy(skb_shinfo(p)->frags + skb_shinfo(p)->nr_frags,
 		       skb_shinfo(skb)->frags,
 		       skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t));
@@ -2611,7 +2615,7 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
 	}
 
 	headroom = skb_headroom(p);
-	nskb = netdev_alloc_skb(p->dev, headroom);
+	nskb = netdev_alloc_skb(p->dev, headroom + skb_gro_offset(p));
 	if (unlikely(!nskb))
 		return -ENOMEM;
 
@@ -2619,12 +2623,15 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
 	nskb->mac_len = p->mac_len;
 
 	skb_reserve(nskb, headroom);
+	__skb_put(nskb, skb_gro_offset(p));
 
-	skb_set_mac_header(nskb, -hlen);
+	skb_set_mac_header(nskb, skb_mac_header(p) - p->data);
 	skb_set_network_header(nskb, skb_network_offset(p));
 	skb_set_transport_header(nskb, skb_transport_offset(p));
 
-	memcpy(skb_mac_header(nskb), skb_mac_header(p), hlen);
+	__skb_pull(p, skb_gro_offset(p));
+	memcpy(skb_mac_header(nskb), skb_mac_header(p),
+	       p->data - skb_mac_header(p));
 
 	*NAPI_GRO_CB(nskb) = *NAPI_GRO_CB(p);
 	skb_shinfo(nskb)->frag_list = p;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 743f5542d65a..d6770f295d5b 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1253,10 +1253,10 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head,
 	int proto;
 	int id;
 
-	if (unlikely(!pskb_may_pull(skb, sizeof(*iph))))
+	iph = skb_gro_header(skb, sizeof(*iph));
+	if (unlikely(!iph))
 		goto out;
 
-	iph = ip_hdr(skb);
 	proto = iph->protocol & (MAX_INET_PROTOS - 1);
 
 	rcu_read_lock();
@@ -1270,7 +1270,7 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head,
 	if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl)))
 		goto out_unlock;
 
-	flush = ntohs(iph->tot_len) != skb->len ||
+	flush = ntohs(iph->tot_len) != skb_gro_len(skb) ||
 		iph->frag_off != htons(IP_DF);
 	id = ntohs(iph->id);
 
@@ -1298,8 +1298,8 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head,
 	}
 
 	NAPI_GRO_CB(skb)->flush |= flush;
-	__skb_pull(skb, sizeof(*iph));
-	skb_reset_transport_header(skb);
+	skb_gro_pull(skb, sizeof(*iph));
+	skb_set_transport_header(skb, skb_gro_offset(skb));
 
 	pp = ops->gro_receive(head, skb);
 
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 0cd71b84e483..1cd608253940 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2481,19 +2481,19 @@ struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb)
 	unsigned int mss = 1;
 	int flush = 1;
 
-	if (!pskb_may_pull(skb, sizeof(*th)))
+	th = skb_gro_header(skb, sizeof(*th));
+	if (unlikely(!th))
 		goto out;
 
-	th = tcp_hdr(skb);
 	thlen = th->doff * 4;
 	if (thlen < sizeof(*th))
 		goto out;
 
-	if (!pskb_may_pull(skb, thlen))
+	th = skb_gro_header(skb, thlen);
+	if (unlikely(!th))
 		goto out;
 
-	th = tcp_hdr(skb);
-	__skb_pull(skb, thlen);
+	skb_gro_pull(skb, thlen);
 
 	flags = tcp_flag_word(th);
 
@@ -2521,10 +2521,10 @@ found:
 	flush |= th->ack_seq != th2->ack_seq || th->window != th2->window;
 	flush |= memcmp(th + 1, th2 + 1, thlen - sizeof(*th));
 
-	total = p->len;
+	total = skb_gro_len(p);
 	mss = skb_shinfo(p)->gso_size;
 
-	flush |= skb->len > mss || skb->len <= 0;
+	flush |= skb_gro_len(skb) > mss || !skb_gro_len(skb);
 	flush |= ntohl(th2->seq) + total != ntohl(th->seq);
 
 	if (flush || skb_gro_receive(head, skb)) {
@@ -2537,7 +2537,7 @@ found:
 	tcp_flag_word(th2) |= flags & (TCP_FLAG_FIN | TCP_FLAG_PSH);
 
 out_check_final:
-	flush = skb->len < mss;
+	flush = skb_gro_len(skb) < mss;
 	flush |= flags & (TCP_FLAG_URG | TCP_FLAG_PSH | TCP_FLAG_RST |
 			  TCP_FLAG_SYN | TCP_FLAG_FIN);
 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 19d7b429a262..f6b962f56ab4 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2355,7 +2355,7 @@ struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
 
 	switch (skb->ip_summed) {
 	case CHECKSUM_COMPLETE:
-		if (!tcp_v4_check(skb->len, iph->saddr, iph->daddr,
+		if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr,
 				  skb->csum)) {
 			skb->ip_summed = CHECKSUM_UNNECESSARY;
 			break;
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index c802bc1658a8..bd91eadcbe3f 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -799,24 +799,34 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head,
 	int proto;
 	__wsum csum;
 
-	if (unlikely(!pskb_may_pull(skb, sizeof(*iph))))
+	iph = skb_gro_header(skb, sizeof(*iph));
+	if (unlikely(!iph))
 		goto out;
 
-	iph = ipv6_hdr(skb);
-	__skb_pull(skb, sizeof(*iph));
+	skb_gro_pull(skb, sizeof(*iph));
+	skb_set_transport_header(skb, skb_gro_offset(skb));
 
-	flush += ntohs(iph->payload_len) != skb->len;
+	flush += ntohs(iph->payload_len) != skb_gro_len(skb);
 
 	rcu_read_lock();
-	proto = ipv6_gso_pull_exthdrs(skb, iph->nexthdr);
-	iph = ipv6_hdr(skb);
-	IPV6_GRO_CB(skb)->proto = proto;
+	proto = iph->nexthdr;
 	ops = rcu_dereference(inet6_protos[proto]);
-	if (!ops || !ops->gro_receive)
-		goto out_unlock;
+	if (!ops || !ops->gro_receive) {
+		__pskb_pull(skb, skb_gro_offset(skb));
+		proto = ipv6_gso_pull_exthdrs(skb, proto);
+		skb_gro_pull(skb, -skb_transport_offset(skb));
+		skb_reset_transport_header(skb);
+		__skb_push(skb, skb_gro_offset(skb));
+
+		if (!ops || !ops->gro_receive)
+			goto out_unlock;
+
+		iph = ipv6_hdr(skb);
+	}
+
+	IPV6_GRO_CB(skb)->proto = proto;
 
 	flush--;
-	skb_reset_transport_header(skb);
 	nlen = skb_network_header_len(skb);
 
 	for (p = *head; p; p = p->next) {
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index e5b85d45bee8..00f1269e11e9 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -948,7 +948,7 @@ struct sk_buff **tcp6_gro_receive(struct sk_buff **head, struct sk_buff *skb)
 
 	switch (skb->ip_summed) {
 	case CHECKSUM_COMPLETE:
-		if (!tcp_v6_check(skb->len, &iph->saddr, &iph->daddr,
+		if (!tcp_v6_check(skb_gro_len(skb), &iph->saddr, &iph->daddr,
 				  skb->csum)) {
 			skb->ip_summed = CHECKSUM_UNNECESSARY;
 			break;
-- 
cgit v1.2.3


From d23f028a4ddce8b783c212bfe911d1d307ff3617 Mon Sep 17 00:00:00 2001
From: Steve Glendinning <steve.glendinning@smsc.com>
Date: Tue, 27 Jan 2009 06:51:11 +0000
Subject: smsc911x: add external phy detection overrides

On LAN9115/LAN9117/LAN9215/LAN9217, external phys are supported.  These
are usually indicated by a hardware strap which sets an "external PHY
detected" bit in the HW_CFG register.

In some cases it is desirable to override this hardware strap and force
use of either the internal phy or an external PHY.  This patch adds
SMSC911X_FORCE_INTERNAL_PHY and SMSC911X_FORCE_EXTERNAL_PHY flags so a
platform can indicate this preference via its platform_data.

Signed-off-by: Steve Glendinning <steve.glendinning@smsc.com>
Acked-by: Sascha Hauer <s.hauer@pengutronix.de>
Tested-by: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/smsc911x.c   | 75 ++++++++++++++++++++++++------------------------
 include/linux/smsc911x.h |  2 ++
 2 files changed, 40 insertions(+), 37 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/smsc911x.c b/drivers/net/smsc911x.c
index 6aa2b165de6a..27d2d7c45519 100644
--- a/drivers/net/smsc911x.c
+++ b/drivers/net/smsc911x.c
@@ -368,48 +368,53 @@ out:
 	return reg;
 }
 
-/* Autodetects and initialises external phy for SMSC9115 and SMSC9117 flavors.
- * If something goes wrong, returns -ENODEV to revert back to internal phy.
- * Performed at initialisation only, so interrupts are enabled */
-static int smsc911x_phy_initialise_external(struct smsc911x_data *pdata)
+/* Switch to external phy. Assumes tx and rx are stopped. */
+static void smsc911x_phy_enable_external(struct smsc911x_data *pdata)
 {
 	unsigned int hwcfg = smsc911x_reg_read(pdata, HW_CFG);
 
-	/* External phy is requested, supported, and detected */
-	if (hwcfg & HW_CFG_EXT_PHY_DET_) {
+	/* Disable phy clocks to the MAC */
+	hwcfg &= (~HW_CFG_PHY_CLK_SEL_);
+	hwcfg |= HW_CFG_PHY_CLK_SEL_CLK_DIS_;
+	smsc911x_reg_write(pdata, HW_CFG, hwcfg);
+	udelay(10);	/* Enough time for clocks to stop */
 
-		/* Switch to external phy. Assuming tx and rx are stopped
-		 * because smsc911x_phy_initialise is called before
-		 * smsc911x_rx_initialise and tx_initialise. */
+	/* Switch to external phy */
+	hwcfg |= HW_CFG_EXT_PHY_EN_;
+	smsc911x_reg_write(pdata, HW_CFG, hwcfg);
 
-		/* Disable phy clocks to the MAC */
-		hwcfg &= (~HW_CFG_PHY_CLK_SEL_);
-		hwcfg |= HW_CFG_PHY_CLK_SEL_CLK_DIS_;
-		smsc911x_reg_write(pdata, HW_CFG, hwcfg);
-		udelay(10);	/* Enough time for clocks to stop */
+	/* Enable phy clocks to the MAC */
+	hwcfg &= (~HW_CFG_PHY_CLK_SEL_);
+	hwcfg |= HW_CFG_PHY_CLK_SEL_EXT_PHY_;
+	smsc911x_reg_write(pdata, HW_CFG, hwcfg);
+	udelay(10);	/* Enough time for clocks to restart */
 
-		/* Switch to external phy */
-		hwcfg |= HW_CFG_EXT_PHY_EN_;
-		smsc911x_reg_write(pdata, HW_CFG, hwcfg);
-
-		/* Enable phy clocks to the MAC */
-		hwcfg &= (~HW_CFG_PHY_CLK_SEL_);
-		hwcfg |= HW_CFG_PHY_CLK_SEL_EXT_PHY_;
-		smsc911x_reg_write(pdata, HW_CFG, hwcfg);
-		udelay(10);	/* Enough time for clocks to restart */
+	hwcfg |= HW_CFG_SMI_SEL_;
+	smsc911x_reg_write(pdata, HW_CFG, hwcfg);
+}
 
-		hwcfg |= HW_CFG_SMI_SEL_;
-		smsc911x_reg_write(pdata, HW_CFG, hwcfg);
+/* Autodetects and enables external phy if present on supported chips.
+ * autodetection can be overridden by specifying SMSC911X_FORCE_INTERNAL_PHY
+ * or SMSC911X_FORCE_EXTERNAL_PHY in the platform_data flags. */
+static void smsc911x_phy_initialise_external(struct smsc911x_data *pdata)
+{
+	unsigned int hwcfg = smsc911x_reg_read(pdata, HW_CFG);
 
-		SMSC_TRACE(HW, "Successfully switched to external PHY");
+	if (pdata->config.flags & SMSC911X_FORCE_INTERNAL_PHY) {
+		SMSC_TRACE(HW, "Forcing internal PHY");
+		pdata->using_extphy = 0;
+	} else if (pdata->config.flags & SMSC911X_FORCE_EXTERNAL_PHY) {
+		SMSC_TRACE(HW, "Forcing external PHY");
+		smsc911x_phy_enable_external(pdata);
+		pdata->using_extphy = 1;
+	} else if (hwcfg & HW_CFG_EXT_PHY_DET_) {
+		SMSC_TRACE(HW, "HW_CFG EXT_PHY_DET set, using external PHY");
+		smsc911x_phy_enable_external(pdata);
 		pdata->using_extphy = 1;
 	} else {
-		SMSC_WARNING(HW, "No external PHY detected, "
-			"Using internal PHY instead.");
-		/* Use internal phy */
-		return -ENODEV;
+		SMSC_TRACE(HW, "HW_CFG EXT_PHY_DET clear, using internal PHY");
+		pdata->using_extphy = 0;
 	}
-	return 0;
 }
 
 /* Fetches a tx status out of the status fifo */
@@ -825,22 +830,18 @@ static int __devinit smsc911x_mii_init(struct platform_device *pdev,
 
 	pdata->mii_bus->parent = &pdev->dev;
 
-	pdata->using_extphy = 0;
-
 	switch (pdata->idrev & 0xFFFF0000) {
 	case 0x01170000:
 	case 0x01150000:
 	case 0x117A0000:
 	case 0x115A0000:
 		/* External PHY supported, try to autodetect */
-		if (smsc911x_phy_initialise_external(pdata) < 0) {
-			SMSC_TRACE(HW, "No external PHY detected, "
-				"using internal PHY");
-		}
+		smsc911x_phy_initialise_external(pdata);
 		break;
 	default:
 		SMSC_TRACE(HW, "External PHY is not supported, "
 			"using internal PHY");
+		pdata->using_extphy = 0;
 		break;
 	}
 
diff --git a/include/linux/smsc911x.h b/include/linux/smsc911x.h
index 1cbf0313adde..170c76b8f7a6 100644
--- a/include/linux/smsc911x.h
+++ b/include/linux/smsc911x.h
@@ -43,5 +43,7 @@ struct smsc911x_platform_config {
 /* Constants for flags */
 #define SMSC911X_USE_16BIT 			(BIT(0))
 #define SMSC911X_USE_32BIT 			(BIT(1))
+#define SMSC911X_FORCE_INTERNAL_PHY		(BIT(2))
+#define SMSC911X_FORCE_EXTERNAL_PHY 		(BIT(3))
 
 #endif /* __LINUX_SMSC911X_H__ */
-- 
cgit v1.2.3


From 31f4574774e98aa275aeeee94f41ce042285ed8e Mon Sep 17 00:00:00 2001
From: Steve Glendinning <steve.glendinning@smsc.com>
Date: Tue, 27 Jan 2009 06:51:12 +0000
Subject: smsc911x: allow mac address to be saved before device reset

Some platforms (for example pcm037) do not have an EEPROM fitted,
instead storing their mac address somewhere else.  The bootloader
fetches this and configures the ethernet adapter before the kernel is
started.

This patch allows a platform to indicate to the driver via the
SMSC911X_SAVE_MAC_ADDRESS flag that the mac address has already been
configured via such a mechanism, and should be saved before resetting
the chip.

Signed-off-by: Steve Glendinning <steve.glendinning@smsc.com>
Acked-by: Sascha Hauer <s.hauer@pengutronix.de>
Tested-by: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/smsc911x.c   | 30 ++++++++++++++++++++++--------
 include/linux/smsc911x.h |  1 +
 2 files changed, 23 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/smsc911x.c b/drivers/net/smsc911x.c
index 27d2d7c45519..6e175e5555a1 100644
--- a/drivers/net/smsc911x.c
+++ b/drivers/net/smsc911x.c
@@ -1742,6 +1742,21 @@ static const struct net_device_ops smsc911x_netdev_ops = {
 #endif
 };
 
+/* copies the current mac address from hardware to dev->dev_addr */
+static void __devinit smsc911x_read_mac_address(struct net_device *dev)
+{
+	struct smsc911x_data *pdata = netdev_priv(dev);
+	u32 mac_high16 = smsc911x_mac_read(pdata, ADDRH);
+	u32 mac_low32 = smsc911x_mac_read(pdata, ADDRL);
+
+	dev->dev_addr[0] = (u8)(mac_low32);
+	dev->dev_addr[1] = (u8)(mac_low32 >> 8);
+	dev->dev_addr[2] = (u8)(mac_low32 >> 16);
+	dev->dev_addr[3] = (u8)(mac_low32 >> 24);
+	dev->dev_addr[4] = (u8)(mac_high16);
+	dev->dev_addr[5] = (u8)(mac_high16 >> 8);
+}
+
 /* Initializing private device structures, only called from probe */
 static int __devinit smsc911x_init(struct net_device *dev)
 {
@@ -1829,6 +1844,12 @@ static int __devinit smsc911x_init(struct net_device *dev)
 		SMSC_WARNING(PROBE,
 			"This driver is not intended for this chip revision");
 
+	/* workaround for platforms without an eeprom, where the mac address
+	 * is stored elsewhere and set by the bootloader.  This saves the
+	 * mac address before resetting the device */
+	if (pdata->config.flags & SMSC911X_SAVE_MAC_ADDRESS)
+		smsc911x_read_mac_address(dev);
+
 	/* Reset the LAN911x */
 	if (smsc911x_soft_reset(pdata))
 		return -ENODEV;
@@ -2009,14 +2030,7 @@ static int __devinit smsc911x_drv_probe(struct platform_device *pdev)
 	} else {
 		/* Try reading mac address from device. if EEPROM is present
 		 * it will already have been set */
-		u32 mac_high16 = smsc911x_mac_read(pdata, ADDRH);
-		u32 mac_low32 = smsc911x_mac_read(pdata, ADDRL);
-		dev->dev_addr[0] = (u8)(mac_low32);
-		dev->dev_addr[1] = (u8)(mac_low32 >> 8);
-		dev->dev_addr[2] = (u8)(mac_low32 >> 16);
-		dev->dev_addr[3] = (u8)(mac_low32 >> 24);
-		dev->dev_addr[4] = (u8)(mac_high16);
-		dev->dev_addr[5] = (u8)(mac_high16 >> 8);
+		smsc911x_read_mac_address(dev);
 
 		if (is_valid_ether_addr(dev->dev_addr)) {
 			/* eeprom values are valid  so use them */
diff --git a/include/linux/smsc911x.h b/include/linux/smsc911x.h
index 170c76b8f7a6..b32725075d71 100644
--- a/include/linux/smsc911x.h
+++ b/include/linux/smsc911x.h
@@ -45,5 +45,6 @@ struct smsc911x_platform_config {
 #define SMSC911X_USE_32BIT 			(BIT(1))
 #define SMSC911X_FORCE_INTERNAL_PHY		(BIT(2))
 #define SMSC911X_FORCE_EXTERNAL_PHY 		(BIT(3))
+#define SMSC911X_SAVE_MAC_ADDRESS		(BIT(4))
 
 #endif /* __LINUX_SMSC911X_H__ */
-- 
cgit v1.2.3


From eefef1cf7653cd4e0aaf743c00ae8345086cdc01 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Sun, 1 Feb 2009 01:04:33 -0800
Subject: net: add ARP notify option for devices

This adds another inet device option to enable gratuitous ARP
when device is brought up or address change. This is handy for
clusters or virtualization.

Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.txt | 6 ++++++
 include/linux/inetdevice.h             | 1 +
 include/linux/sysctl.h                 | 1 +
 kernel/sysctl_check.c                  | 1 +
 net/ipv4/devinet.c                     | 9 +++++++++
 5 files changed, 18 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index c7712787933c..ff3f219ee4d7 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -782,6 +782,12 @@ arp_ignore - INTEGER
 	The max value from conf/{all,interface}/arp_ignore is used
 	when ARP request is received on the {interface}
 
+arp_notify - BOOLEAN
+	Define mode for notification of address and device changes.
+	0 - (default): do nothing
+	1 - Generate gratuitous arp replies when device is brought up
+	    or hardware address changes.
+
 arp_accept - BOOLEAN
 	Define behavior when gratuitous arp replies are received:
 	0 - drop gratuitous arp frames
diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h
index 06fcdb45106b..acef2a770b6b 100644
--- a/include/linux/inetdevice.h
+++ b/include/linux/inetdevice.h
@@ -108,6 +108,7 @@ static inline void ipv4_devconf_setall(struct in_device *in_dev)
 #define IN_DEV_ARPFILTER(in_dev)	IN_DEV_ORCONF((in_dev), ARPFILTER)
 #define IN_DEV_ARP_ANNOUNCE(in_dev)	IN_DEV_MAXCONF((in_dev), ARP_ANNOUNCE)
 #define IN_DEV_ARP_IGNORE(in_dev)	IN_DEV_MAXCONF((in_dev), ARP_IGNORE)
+#define IN_DEV_ARP_NOTIFY(in_dev)	IN_DEV_MAXCONF((in_dev), ARP_NOTIFY)
 
 struct in_ifaddr
 {
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 39d471d1163b..e76d3b22a466 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -490,6 +490,7 @@ enum
 	NET_IPV4_CONF_ARP_IGNORE=19,
 	NET_IPV4_CONF_PROMOTE_SECONDARIES=20,
 	NET_IPV4_CONF_ARP_ACCEPT=21,
+	NET_IPV4_CONF_ARP_NOTIFY=22,
 	__NET_IPV4_CONF_MAX
 };
 
diff --git a/kernel/sysctl_check.c b/kernel/sysctl_check.c
index fafeb48f27c0..b38423ca711a 100644
--- a/kernel/sysctl_check.c
+++ b/kernel/sysctl_check.c
@@ -219,6 +219,7 @@ static const struct trans_ctl_table trans_net_ipv4_conf_vars_table[] = {
 	{ NET_IPV4_CONF_ARP_IGNORE,		"arp_ignore" },
 	{ NET_IPV4_CONF_PROMOTE_SECONDARIES,	"promote_secondaries" },
 	{ NET_IPV4_CONF_ARP_ACCEPT,		"arp_accept" },
+	{ NET_IPV4_CONF_ARP_NOTIFY,		"arp_notify" },
 	{}
 };
 
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 309997edc8a5..d519a6a66726 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1075,6 +1075,14 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
 			}
 		}
 		ip_mc_up(in_dev);
+		/* fall through */
+	case NETDEV_CHANGEADDR:
+		if (IN_DEV_ARP_NOTIFY(in_dev))
+			arp_send(ARPOP_REQUEST, ETH_P_ARP,
+				 in_dev->ifa_list->ifa_address,
+				 dev,
+				 in_dev->ifa_list->ifa_address,
+				 NULL, dev->dev_addr, NULL);
 		break;
 	case NETDEV_DOWN:
 		ip_mc_down(in_dev);
@@ -1439,6 +1447,7 @@ static struct devinet_sysctl_table {
 		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
 		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
 		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
+		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
 
 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
-- 
cgit v1.2.3


From 7e7f4eae28711fbb7f4d5e4b0aa3195776194bc1 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Date: Mon, 2 Feb 2009 21:40:10 +0530
Subject: headers_check fix: linux/coda_psdev.h

fix the following 'make headers_check' warning:

  usr/include/linux/coda_psdev.h:90: extern's make no sense in userspace

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
---
 include/linux/coda_psdev.h | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/coda_psdev.h b/include/linux/coda_psdev.h
index 07ae8f846055..6f06352cf55e 100644
--- a/include/linux/coda_psdev.h
+++ b/include/linux/coda_psdev.h
@@ -24,7 +24,7 @@ static inline struct venus_comm *coda_vcp(struct super_block *sb)
 	return (struct venus_comm *)((sb)->s_fs_info);
 }
 
-
+#ifdef __KERNEL__
 /* upcalls */
 int venus_rootfid(struct super_block *sb, struct CodaFid *fidp);
 int venus_getattr(struct super_block *sb, struct CodaFid *fid,
@@ -64,6 +64,12 @@ int coda_downcall(int opcode, union outputArgs *out, struct super_block *sb);
 int venus_fsync(struct super_block *sb, struct CodaFid *fid);
 int venus_statfs(struct dentry *dentry, struct kstatfs *sfs);
 
+/*
+ * Statistics
+ */
+
+extern struct venus_comm coda_comms[];
+#endif /* __KERNEL__ */
 
 /* messages between coda filesystem in kernel and Venus */
 struct upc_req {
@@ -82,11 +88,4 @@ struct upc_req {
 #define REQ_WRITE  0x4
 #define REQ_ABORT  0x8
 
-
-/*
- * Statistics
- */
-
-extern struct venus_comm coda_comms[];
-
 #endif
-- 
cgit v1.2.3


From 25d00fddf8d23234da2d45c051a14450939496d6 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Date: Mon, 2 Feb 2009 21:40:58 +0530
Subject: headers_check fix: linux/in6.h

fix the following 'make headers_check' warnings:

  usr/include/linux/in6.h:47: extern's make no sense in userspace
  usr/include/linux/in6.h:49: extern's make no sense in userspace

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
---
 include/linux/in6.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/in6.h b/include/linux/in6.h
index bc492048c349..718bf21c5754 100644
--- a/include/linux/in6.h
+++ b/include/linux/in6.h
@@ -44,11 +44,11 @@ struct in6_addr
  * NOTE: Be aware the IN6ADDR_* constants and in6addr_* externals are defined
  * in network byte order, not in host byte order as are the IPv4 equivalents
  */
+#ifdef __KERNEL__
 extern const struct in6_addr in6addr_any;
 #define IN6ADDR_ANY_INIT { { { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } } }
 extern const struct in6_addr in6addr_loopback;
 #define IN6ADDR_LOOPBACK_INIT { { { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 } } }
-#ifdef __KERNEL__
 extern const struct in6_addr in6addr_linklocal_allnodes;
 #define IN6ADDR_LINKLOCAL_ALLNODES_INIT	\
 		{ { { 0xff,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1 } } }
-- 
cgit v1.2.3


From 9fe03bc3139503fbad66016bf714f4575babf651 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Date: Mon, 2 Feb 2009 21:41:41 +0530
Subject: headers_check fix: linux/nubus.h

fix the following 'make headers_check' warnings:

  usr/include/linux/nubus.h:297: extern's make no sense in userspace
  usr/include/linux/nubus.h:299: extern's make no sense in userspace
  usr/include/linux/nubus.h:303: extern's make no sense in userspace

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
---
 include/linux/nubus.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nubus.h b/include/linux/nubus.h
index 7382af374731..9be57d992695 100644
--- a/include/linux/nubus.h
+++ b/include/linux/nubus.h
@@ -296,6 +296,7 @@ struct nubus_dev {
 	struct nubus_board* board;
 };
 
+#ifdef __KERNEL__
 /* This is all NuBus devices (used to find devices later on) */
 extern struct nubus_dev* nubus_devices;
 /* This is all NuBus cards */
@@ -351,6 +352,7 @@ void nubus_get_rsrc_mem(void* dest,
 void nubus_get_rsrc_str(void* dest,
 			const struct nubus_dirent *dirent,
 			int maxlen);
+#endif /* __KERNEL__ */
 
 /* We'd like to get rid of this eventually.  Only daynaport.c uses it now. */
 static inline void *nubus_slot_addr(int slot)
-- 
cgit v1.2.3


From 7d7dc0d6b0565484e0623cb08b5dcdd56424697b Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Date: Mon, 2 Feb 2009 21:44:09 +0530
Subject: headers_check fix: linux/socket.h

fix the following 'make headers_check' warning:

  usr/include/linux/socket.h:29: extern's make no sense in userspace

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
---
 include/linux/socket.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/socket.h b/include/linux/socket.h
index 20fc4bbfca42..afc01909a428 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -24,10 +24,12 @@ struct __kernel_sockaddr_storage {
 #include <linux/types.h>		/* pid_t			*/
 #include <linux/compiler.h>		/* __user			*/
 
-#ifdef CONFIG_PROC_FS
+#ifdef __KERNEL__
+# ifdef CONFIG_PROC_FS
 struct seq_file;
 extern void socket_seq_show(struct seq_file *seq);
-#endif
+# endif
+#endif /* __KERNEL__ */
 
 typedef unsigned short	sa_family_t;
 
-- 
cgit v1.2.3


From 11d9f653aff1d445b4300ae1d2e2d675a0e9172f Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Date: Mon, 2 Feb 2009 21:45:41 +0530
Subject: headers_check fix: linux/reinserfs_fs.h

fix the following 'make headers_check' warnings:

  usr/include/linux/reiserfs_fs.h:687: extern's make no sense in userspace
  usr/include/linux/reiserfs_fs.h:995: extern's make no sense in userspace
  usr/include/linux/reiserfs_fs.h:997: extern's make no sense in userspace
  usr/include/linux/reiserfs_fs.h:1467: extern's make no sense in userspace
  usr/include/linux/reiserfs_fs.h:1760: extern's make no sense in userspace
  usr/include/linux/reiserfs_fs.h:1764: extern's make no sense in userspace
  usr/include/linux/reiserfs_fs.h:1766: extern's make no sense in userspace
  usr/include/linux/reiserfs_fs.h:1769: extern's make no sense in userspace
  usr/include/linux/reiserfs_fs.h:1771: extern's make no sense in userspace
  usr/include/linux/reiserfs_fs.h:1805: extern's make no sense in userspace
  usr/include/linux/reiserfs_fs.h:1948: extern's make no sense in userspace
  usr/include/linux/reiserfs_fs.h:1949: extern's make no sense in userspace
  usr/include/linux/reiserfs_fs.h:1950: extern's make no sense in userspace
  usr/include/linux/reiserfs_fs.h:1951: extern's make no sense in userspace
  usr/include/linux/reiserfs_fs.h:1962: extern's make no sense in userspace
  usr/include/linux/reiserfs_fs.h:1963: extern's make no sense in userspace
  usr/include/linux/reiserfs_fs.h:1964: extern's make no sense in userspace

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
---
 include/linux/reiserfs_fs.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h
index bc5114d35e99..a4db55fd1f65 100644
--- a/include/linux/reiserfs_fs.h
+++ b/include/linux/reiserfs_fs.h
@@ -698,7 +698,9 @@ static inline void cpu_key_k_offset_dec(struct cpu_key *key)
 /* object identifier for root dir */
 #define REISERFS_ROOT_OBJECTID 2
 #define REISERFS_ROOT_PARENT_OBJECTID 1
+#ifdef __KERNEL__
 extern struct reiserfs_key root_key;
+#endif /* __KERNEL__ */
 
 /* 
  * Picture represents a leaf of the S+tree
@@ -1006,10 +1008,12 @@ struct reiserfs_de_head {
 #define de_visible(deh)	    	    test_bit_unaligned (DEH_Visible, &((deh)->deh_state))
 #define de_hidden(deh)	    	    !test_bit_unaligned (DEH_Visible, &((deh)->deh_state))
 
+#ifdef __KERNEL__
 extern void make_empty_dir_item_v1(char *body, __le32 dirid, __le32 objid,
 				   __le32 par_dirid, __le32 par_objid);
 extern void make_empty_dir_item(char *body, __le32 dirid, __le32 objid,
 				__le32 par_dirid, __le32 par_objid);
+#endif /* __KERNEL__ */
 
 /* array of the entry headers */
  /* get item body */
@@ -1478,7 +1482,9 @@ struct item_operations {
 	void (*print_vi) (struct virtual_item * vi);
 };
 
+#ifdef __KERNEL__
 extern struct item_operations *item_ops[TYPE_ANY + 1];
+#endif /* __KERNEL__ */
 
 #define op_bytes_number(ih,bsize)                    item_ops[le_ih_k_type (ih)]->bytes_number (ih, bsize)
 #define op_is_left_mergeable(key,bsize)              item_ops[le_key_k_type (le_key_version (key), key)]->is_left_mergeable (key, bsize)
@@ -1679,6 +1685,7 @@ struct reiserfs_transaction_handle {
 	struct list_head t_list;
 };
 
+#ifdef __KERNEL__
 /* used to keep track of ordered and tail writes, attached to the buffer
  * head through b_journal_head.
  */
@@ -2203,4 +2210,5 @@ int reiserfs_unpack(struct inode *inode, struct file *filp);
 /* xattr stuff */
 #define REISERFS_XATTR_DIR_SEM(s) (REISERFS_SB(s)->xattr_dir_sem)
 
+#endif /* __KERNEL__ */
 #endif				/* _LINUX_REISER_FS_H */
-- 
cgit v1.2.3


From 1a5645bc901aea6f3f446888061b2b084bbf1ba6 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Mon, 2 Feb 2009 23:22:04 -0800
Subject: connector: create connector workqueue only while needed once

The netlink connector uses its own workqueue to relay the datas sent
from userspace to the appropriate callback.  If you launch the test
from Documentation/connector and change it a bit to send a high flow
of data, you will see thousands of events coming to the "cqueue"
workqueue by looking at the workqueue tracer.

This flow of events can be sent very quickly. So, to not encumber the
kevent workqueue and delay other jobs, the "cqueue" workqueue should
remain.

But this workqueue is pointless most of the time, it will always be
created (assuming you have built it of course) although only
developpers with specific needs will use it.

So avoid this "most of the time useless task", this patch proposes to
create this workqueue only when needed once.  The first jobs to be
sent to connector callbacks will be sent to kevent while the "cqueue"
thread creation will be scheduled to kevent too.

The following jobs will continue to be scheduled to keventd until the
cqueue workqueue is created, and then the rest of the jobs will
continue to perform as usual, through this dedicated workqueue.

Each time I tested this patch, only the first event was sent to
keventd, the rest has been sent to cqueue which have been created
quickly.

Also, this patch fixes some trailing whitespaces on the connector files.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Evgeniy Polyakov <zbr@ioremap.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/connector/cn_queue.c  | 80 +++++++++++++++++++++++++++++++++++++------
 drivers/connector/connector.c | 19 +++++-----
 include/linux/connector.h     |  8 +++++
 3 files changed, 87 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/connector/cn_queue.c b/drivers/connector/cn_queue.c
index b6fe7e7a2c2f..c769ef269fb5 100644
--- a/drivers/connector/cn_queue.c
+++ b/drivers/connector/cn_queue.c
@@ -1,9 +1,9 @@
 /*
  * 	cn_queue.c
- * 
+ *
  * 2004-2005 Copyright (c) Evgeniy Polyakov <johnpol@2ka.mipt.ru>
  * All rights reserved.
- * 
+ *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2 of the License, or
@@ -31,6 +31,48 @@
 #include <linux/connector.h>
 #include <linux/delay.h>
 
+
+/*
+ * This job is sent to the kevent workqueue.
+ * While no event is once sent to any callback, the connector workqueue
+ * is not created to avoid a useless waiting kernel task.
+ * Once the first event is received, we create this dedicated workqueue which
+ * is necessary because the flow of data can be high and we don't want
+ * to encumber keventd with that.
+ */
+static void cn_queue_create(struct work_struct *work)
+{
+	struct cn_queue_dev *dev;
+
+	dev = container_of(work, struct cn_queue_dev, wq_creation);
+
+	dev->cn_queue = create_singlethread_workqueue(dev->name);
+	/* If we fail, we will use keventd for all following connector jobs */
+	WARN_ON(!dev->cn_queue);
+}
+
+/*
+ * Queue a data sent to a callback.
+ * If the connector workqueue is already created, we queue the job on it.
+ * Otherwise, we queue the job to kevent and queue the connector workqueue
+ * creation too.
+ */
+int queue_cn_work(struct cn_callback_entry *cbq, struct work_struct *work)
+{
+	struct cn_queue_dev *pdev = cbq->pdev;
+
+	if (likely(pdev->cn_queue))
+		return queue_work(pdev->cn_queue, work);
+
+	/* Don't create the connector workqueue twice */
+	if (atomic_inc_return(&pdev->wq_requested) == 1)
+		schedule_work(&pdev->wq_creation);
+	else
+		atomic_dec(&pdev->wq_requested);
+
+	return schedule_work(work);
+}
+
 void cn_queue_wrapper(struct work_struct *work)
 {
 	struct cn_callback_entry *cbq =
@@ -58,14 +100,17 @@ static struct cn_callback_entry *cn_queue_alloc_callback_entry(char *name, struc
 	snprintf(cbq->id.name, sizeof(cbq->id.name), "%s", name);
 	memcpy(&cbq->id.id, id, sizeof(struct cb_id));
 	cbq->data.callback = callback;
-	
+
 	INIT_WORK(&cbq->work, &cn_queue_wrapper);
 	return cbq;
 }
 
 static void cn_queue_free_callback(struct cn_callback_entry *cbq)
 {
-	flush_workqueue(cbq->pdev->cn_queue);
+	/* The first jobs have been sent to kevent, flush them too */
+	flush_scheduled_work();
+	if (cbq->pdev->cn_queue)
+		flush_workqueue(cbq->pdev->cn_queue);
 
 	kfree(cbq);
 }
@@ -143,14 +188,11 @@ struct cn_queue_dev *cn_queue_alloc_dev(char *name, struct sock *nls)
 	atomic_set(&dev->refcnt, 0);
 	INIT_LIST_HEAD(&dev->queue_list);
 	spin_lock_init(&dev->queue_lock);
+	init_waitqueue_head(&dev->wq_created);
 
 	dev->nls = nls;
 
-	dev->cn_queue = create_singlethread_workqueue(dev->name);
-	if (!dev->cn_queue) {
-		kfree(dev);
-		return NULL;
-	}
+	INIT_WORK(&dev->wq_creation, cn_queue_create);
 
 	return dev;
 }
@@ -158,9 +200,25 @@ struct cn_queue_dev *cn_queue_alloc_dev(char *name, struct sock *nls)
 void cn_queue_free_dev(struct cn_queue_dev *dev)
 {
 	struct cn_callback_entry *cbq, *n;
+	long timeout;
+	DEFINE_WAIT(wait);
+
+	/* Flush the first pending jobs queued on kevent */
+	flush_scheduled_work();
+
+	/* If the connector workqueue creation is still pending, wait for it */
+	prepare_to_wait(&dev->wq_created, &wait, TASK_UNINTERRUPTIBLE);
+	if (atomic_read(&dev->wq_requested) && !dev->cn_queue) {
+		timeout = schedule_timeout(HZ * 2);
+		if (!timeout && !dev->cn_queue)
+			WARN_ON(1);
+	}
+	finish_wait(&dev->wq_created, &wait);
 
-	flush_workqueue(dev->cn_queue);
-	destroy_workqueue(dev->cn_queue);
+	if (dev->cn_queue) {
+		flush_workqueue(dev->cn_queue);
+		destroy_workqueue(dev->cn_queue);
+	}
 
 	spin_lock_bh(&dev->queue_lock);
 	list_for_each_entry_safe(cbq, n, &dev->queue_list, callback_entry)
diff --git a/drivers/connector/connector.c b/drivers/connector/connector.c
index bf4830082a13..fd336c5a9057 100644
--- a/drivers/connector/connector.c
+++ b/drivers/connector/connector.c
@@ -1,9 +1,9 @@
 /*
  * 	connector.c
- * 
+ *
  * 2004-2005 Copyright (c) Evgeniy Polyakov <johnpol@2ka.mipt.ru>
  * All rights reserved.
- * 
+ *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2 of the License, or
@@ -145,14 +145,13 @@ static int cn_call_callback(struct cn_msg *msg, void (*destruct_data)(void *), v
 				__cbq->data.ddata = data;
 				__cbq->data.destruct_data = destruct_data;
 
-				if (queue_work(dev->cbdev->cn_queue,
-							&__cbq->work))
+				if (queue_cn_work(__cbq, &__cbq->work))
 					err = 0;
 				else
 					err = -EINVAL;
 			} else {
 				struct cn_callback_data *d;
-				
+
 				err = -ENOMEM;
 				__new_cbq = kzalloc(sizeof(struct cn_callback_entry), GFP_ATOMIC);
 				if (__new_cbq) {
@@ -163,10 +162,12 @@ static int cn_call_callback(struct cn_msg *msg, void (*destruct_data)(void *), v
 					d->destruct_data = destruct_data;
 					d->free = __new_cbq;
 
+					__new_cbq->pdev = __cbq->pdev;
+
 					INIT_WORK(&__new_cbq->work,
 							&cn_queue_wrapper);
 
-					if (queue_work(dev->cbdev->cn_queue,
+					if (queue_cn_work(__new_cbq,
 						    &__new_cbq->work))
 						err = 0;
 					else {
@@ -237,7 +238,7 @@ static void cn_notify(struct cb_id *id, u32 notify_event)
 
 		req = (struct cn_notify_req *)ctl->data;
 		for (i = 0; i < ctl->idx_notify_num; ++i, ++req) {
-			if (id->idx >= req->first && 
+			if (id->idx >= req->first &&
 					id->idx < req->first + req->range) {
 				idx_found = 1;
 				break;
@@ -245,7 +246,7 @@ static void cn_notify(struct cb_id *id, u32 notify_event)
 		}
 
 		for (i = 0; i < ctl->val_notify_num; ++i, ++req) {
-			if (id->val >= req->first && 
+			if (id->val >= req->first &&
 					id->val < req->first + req->range) {
 				val_found = 1;
 				break;
@@ -459,7 +460,7 @@ static int __devinit cn_init(void)
 		netlink_kernel_release(dev->nls);
 		return -EINVAL;
 	}
-	
+
 	cn_already_initialized = 1;
 
 	err = cn_add_callback(&dev->id, "connector", &cn_callback);
diff --git a/include/linux/connector.h b/include/linux/connector.h
index 34f2789d9b9b..fc65d219d88c 100644
--- a/include/linux/connector.h
+++ b/include/linux/connector.h
@@ -109,6 +109,12 @@ struct cn_queue_dev {
 	unsigned char name[CN_CBQ_NAMELEN];
 
 	struct workqueue_struct *cn_queue;
+	/* Sent to kevent to create cn_queue only when needed */
+	struct work_struct wq_creation;
+	/* Tell if the wq_creation job is pending/completed */
+	atomic_t wq_requested;
+	/* Wait for cn_queue to be created */
+	wait_queue_head_t wq_created;
 
 	struct list_head queue_list;
 	spinlock_t queue_lock;
@@ -164,6 +170,8 @@ int cn_netlink_send(struct cn_msg *, u32, gfp_t);
 int cn_queue_add_callback(struct cn_queue_dev *dev, char *name, struct cb_id *id, void (*callback)(void *));
 void cn_queue_del_callback(struct cn_queue_dev *dev, struct cb_id *id);
 
+int queue_cn_work(struct cn_callback_entry *cbq, struct work_struct *work);
+
 struct cn_queue_dev *cn_queue_alloc_dev(char *name, struct sock *);
 void cn_queue_free_dev(struct cn_queue_dev *dev);
 
-- 
cgit v1.2.3


From f2cddb29ebfc02dfd2c4b439aa0433393ad15575 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Date: Tue, 3 Feb 2009 19:21:38 +0530
Subject: headers_check fix cleanup: linux/coda_psdev.h

These are only for kernel internals as pointed by Arnd Bergmann:
  struct kstatfs
  struct venus_comm
  coda_vcp()

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
---
 include/linux/coda_psdev.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/coda_psdev.h b/include/linux/coda_psdev.h
index 6f06352cf55e..5b5d4731f956 100644
--- a/include/linux/coda_psdev.h
+++ b/include/linux/coda_psdev.h
@@ -6,6 +6,7 @@
 #define CODA_PSDEV_MAJOR 67
 #define MAX_CODADEVS  5	   /* how many do we allow */
 
+#ifdef __KERNEL__
 struct kstatfs;
 
 /* communication pending/processing queues */
@@ -24,7 +25,6 @@ static inline struct venus_comm *coda_vcp(struct super_block *sb)
 	return (struct venus_comm *)((sb)->s_fs_info);
 }
 
-#ifdef __KERNEL__
 /* upcalls */
 int venus_rootfid(struct super_block *sb, struct CodaFid *fidp);
 int venus_getattr(struct super_block *sb, struct CodaFid *fid,
-- 
cgit v1.2.3


From 5007b1fc4ef2c1b496536b2f026353c1d44d92ef Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Date: Tue, 3 Feb 2009 19:28:24 +0530
Subject: headers_check fix cleanup: linux/nubus.h

These are only for kernel internals as pointed by Arnd Bergmann:
   struct nubus_board
   struct nubus_dev

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
---
 include/linux/nubus.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nubus.h b/include/linux/nubus.h
index 9be57d992695..e137b3c486a7 100644
--- a/include/linux/nubus.h
+++ b/include/linux/nubus.h
@@ -237,6 +237,7 @@ struct nubus_dirent
 	int mask;
 };
 
+#ifdef __KERNEL__
 struct nubus_board {
 	struct nubus_board* next;
 	struct nubus_dev* first_dev;
@@ -296,7 +297,6 @@ struct nubus_dev {
 	struct nubus_board* board;
 };
 
-#ifdef __KERNEL__
 /* This is all NuBus devices (used to find devices later on) */
 extern struct nubus_dev* nubus_devices;
 /* This is all NuBus cards */
-- 
cgit v1.2.3


From 750e1c18251345e662bb7e7062b5fd5c1ade36de Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Date: Tue, 3 Feb 2009 19:40:03 +0530
Subject: headers_check fix cleanup: linux/reiserfs_fs.h

Only REISERFS_IOC_* definitions are required for user space
rest should be in #ifdef __KERNEL__ as pointed by Arnd Bergmann.

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
---
 include/linux/reiserfs_fs.h | 62 ++++++++++++++++++++-------------------------
 1 file changed, 28 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h
index a4db55fd1f65..e356c99f0659 100644
--- a/include/linux/reiserfs_fs.h
+++ b/include/linux/reiserfs_fs.h
@@ -28,8 +28,6 @@
 #include <linux/reiserfs_fs_sb.h>
 #endif
 
-struct fid;
-
 /*
  *  include/linux/reiser_fs.h
  *
@@ -37,6 +35,33 @@ struct fid;
  *
  */
 
+/* ioctl's command */
+#define REISERFS_IOC_UNPACK		_IOW(0xCD,1,long)
+/* define following flags to be the same as in ext2, so that chattr(1),
+   lsattr(1) will work with us. */
+#define REISERFS_IOC_GETFLAGS		FS_IOC_GETFLAGS
+#define REISERFS_IOC_SETFLAGS		FS_IOC_SETFLAGS
+#define REISERFS_IOC_GETVERSION		FS_IOC_GETVERSION
+#define REISERFS_IOC_SETVERSION		FS_IOC_SETVERSION
+
+#ifdef __KERNEL__
+/* the 32 bit compat definitions with int argument */
+#define REISERFS_IOC32_UNPACK		_IOW(0xCD, 1, int)
+#define REISERFS_IOC32_GETFLAGS		FS_IOC32_GETFLAGS
+#define REISERFS_IOC32_SETFLAGS		FS_IOC32_SETFLAGS
+#define REISERFS_IOC32_GETVERSION	FS_IOC32_GETVERSION
+#define REISERFS_IOC32_SETVERSION	FS_IOC32_SETVERSION
+
+/* Locking primitives */
+/* Right now we are still falling back to (un)lock_kernel, but eventually that
+   would evolve into real per-fs locks */
+#define reiserfs_write_lock( sb ) lock_kernel()
+#define reiserfs_write_unlock( sb ) unlock_kernel()
+
+/* xattr stuff */
+#define REISERFS_XATTR_DIR_SEM(s) (REISERFS_SB(s)->xattr_dir_sem)
+struct fid;
+
 /* in reading the #defines, it may help to understand that they employ
    the following abbreviations:
 
@@ -698,9 +723,8 @@ static inline void cpu_key_k_offset_dec(struct cpu_key *key)
 /* object identifier for root dir */
 #define REISERFS_ROOT_OBJECTID 2
 #define REISERFS_ROOT_PARENT_OBJECTID 1
-#ifdef __KERNEL__
+
 extern struct reiserfs_key root_key;
-#endif /* __KERNEL__ */
 
 /* 
  * Picture represents a leaf of the S+tree
@@ -1008,12 +1032,10 @@ struct reiserfs_de_head {
 #define de_visible(deh)	    	    test_bit_unaligned (DEH_Visible, &((deh)->deh_state))
 #define de_hidden(deh)	    	    !test_bit_unaligned (DEH_Visible, &((deh)->deh_state))
 
-#ifdef __KERNEL__
 extern void make_empty_dir_item_v1(char *body, __le32 dirid, __le32 objid,
 				   __le32 par_dirid, __le32 par_objid);
 extern void make_empty_dir_item(char *body, __le32 dirid, __le32 objid,
 				__le32 par_dirid, __le32 par_objid);
-#endif /* __KERNEL__ */
 
 /* array of the entry headers */
  /* get item body */
@@ -1482,9 +1504,7 @@ struct item_operations {
 	void (*print_vi) (struct virtual_item * vi);
 };
 
-#ifdef __KERNEL__
 extern struct item_operations *item_ops[TYPE_ANY + 1];
-#endif /* __KERNEL__ */
 
 #define op_bytes_number(ih,bsize)                    item_ops[le_ih_k_type (ih)]->bytes_number (ih, bsize)
 #define op_is_left_mergeable(key,bsize)              item_ops[le_key_k_type (le_key_version (key), key)]->is_left_mergeable (key, bsize)
@@ -1546,7 +1566,6 @@ struct reiserfs_iget_args {
 /*                    FUNCTION DECLARATIONS                                */
 /***************************************************************************/
 
-/*#ifdef __KERNEL__*/
 #define get_journal_desc_magic(bh) (bh->b_data + bh->b_size - 12)
 
 #define journal_trans_half(blocksize) \
@@ -1685,7 +1704,6 @@ struct reiserfs_transaction_handle {
 	struct list_head t_list;
 };
 
-#ifdef __KERNEL__
 /* used to keep track of ordered and tail writes, attached to the buffer
  * head through b_journal_head.
  */
@@ -2185,30 +2203,6 @@ long reiserfs_compat_ioctl(struct file *filp,
 		   unsigned int cmd, unsigned long arg);
 int reiserfs_unpack(struct inode *inode, struct file *filp);
 
-/* ioctl's command */
-#define REISERFS_IOC_UNPACK		_IOW(0xCD,1,long)
-/* define following flags to be the same as in ext2, so that chattr(1),
-   lsattr(1) will work with us. */
-#define REISERFS_IOC_GETFLAGS		FS_IOC_GETFLAGS
-#define REISERFS_IOC_SETFLAGS		FS_IOC_SETFLAGS
-#define REISERFS_IOC_GETVERSION		FS_IOC_GETVERSION
-#define REISERFS_IOC_SETVERSION		FS_IOC_SETVERSION
-
-/* the 32 bit compat definitions with int argument */
-#define REISERFS_IOC32_UNPACK		_IOW(0xCD, 1, int)
-#define REISERFS_IOC32_GETFLAGS		FS_IOC32_GETFLAGS
-#define REISERFS_IOC32_SETFLAGS		FS_IOC32_SETFLAGS
-#define REISERFS_IOC32_GETVERSION	FS_IOC32_GETVERSION
-#define REISERFS_IOC32_SETVERSION	FS_IOC32_SETVERSION
-
-/* Locking primitives */
-/* Right now we are still falling back to (un)lock_kernel, but eventually that
-   would evolve into real per-fs locks */
-#define reiserfs_write_lock( sb ) lock_kernel()
-#define reiserfs_write_unlock( sb ) unlock_kernel()
-
-/* xattr stuff */
-#define REISERFS_XATTR_DIR_SEM(s) (REISERFS_SB(s)->xattr_dir_sem)
 
 #endif /* __KERNEL__ */
 #endif				/* _LINUX_REISER_FS_H */
-- 
cgit v1.2.3


From 2a41f71d3bd97dde3305b4e1c43ab0eca46e7c71 Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@hp.com>
Date: Wed, 4 Feb 2009 09:02:34 +0000
Subject: virtio_net: Add a virtqueue for outbound control commands

This will be used for RX mode, MAC filter table, VLAN filtering, etc...

The control transaction consists of one or more "out" sg entries and
one or more "in" sg entries.  The first out entry contains a header
defining the class and command.  Additional out entries may provide
data for the command.  The last in entry provides a status response
back from the command.

Virtqueues typically run asynchronous, running a callback function
when there's data in the channel.  We can't readily make use of this
in the command paths where we need to use this.  Instead, we kick
the virtqueue and spin.  The kick causes an I/O write, triggering an
immediate trap into the hypervisor.

Signed-off-by: Alex Williamson <alex.williamson@hp.com>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/virtio_net.c   | 68 ++++++++++++++++++++++++++++++++++++++++++++--
 include/linux/virtio_net.h | 18 ++++++++++++
 2 files changed, 83 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index fe576e75a538..67bb583b7fc9 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -37,10 +37,12 @@ module_param(gso, bool, 0444);
 #define MAX_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN)
 #define GOOD_COPY_LEN	128
 
+#define VIRTNET_SEND_COMMAND_SG_MAX    0
+
 struct virtnet_info
 {
 	struct virtio_device *vdev;
-	struct virtqueue *rvq, *svq;
+	struct virtqueue *rvq, *svq, *cvq;
 	struct net_device *dev;
 	struct napi_struct napi;
 	unsigned int status;
@@ -589,6 +591,53 @@ static int virtnet_open(struct net_device *dev)
 	return 0;
 }
 
+/*
+ * Send command via the control virtqueue and check status.  Commands
+ * supported by the hypervisor, as indicated by feature bits, should
+ * never fail unless improperly formated.
+ */
+static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
+				 struct scatterlist *data, int out, int in)
+{
+	struct scatterlist sg[VIRTNET_SEND_COMMAND_SG_MAX + 2];
+	struct virtio_net_ctrl_hdr ctrl;
+	virtio_net_ctrl_ack status = ~0;
+	unsigned int tmp;
+
+	if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ)) {
+		BUG();  /* Caller should know better */
+		return false;
+	}
+
+	BUG_ON(out + in > VIRTNET_SEND_COMMAND_SG_MAX);
+
+	out++; /* Add header */
+	in++; /* Add return status */
+
+	ctrl.class = class;
+	ctrl.cmd = cmd;
+
+	sg_init_table(sg, out + in);
+
+	sg_set_buf(&sg[0], &ctrl, sizeof(ctrl));
+	memcpy(&sg[1], data, sizeof(struct scatterlist) * (out + in - 2));
+	sg_set_buf(&sg[out + in - 1], &status, sizeof(status));
+
+	if (vi->cvq->vq_ops->add_buf(vi->cvq, sg, out, in, vi) != 0)
+		BUG();
+
+	vi->cvq->vq_ops->kick(vi->cvq);
+
+	/*
+	 * Spin for a response, the kick causes an ioport write, trapping
+	 * into the hypervisor, so the request should be handled immediately.
+	 */
+	while (!vi->cvq->vq_ops->get_buf(vi->cvq, &tmp))
+		cpu_relax();
+
+	return status == VIRTIO_NET_OK;
+}
+
 static int virtnet_close(struct net_device *dev)
 {
 	struct virtnet_info *vi = netdev_priv(dev);
@@ -752,6 +801,14 @@ static int virtnet_probe(struct virtio_device *vdev)
 		goto free_recv;
 	}
 
+	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ)) {
+		vi->cvq = vdev->config->find_vq(vdev, 2, NULL);
+		if (IS_ERR(vi->cvq)) {
+			err = PTR_ERR(vi->svq);
+			goto free_send;
+		}
+	}
+
 	/* Initialize our empty receive and send queues. */
 	skb_queue_head_init(&vi->recv);
 	skb_queue_head_init(&vi->send);
@@ -764,7 +821,7 @@ static int virtnet_probe(struct virtio_device *vdev)
 	err = register_netdev(dev);
 	if (err) {
 		pr_debug("virtio_net: registering device failed\n");
-		goto free_send;
+		goto free_ctrl;
 	}
 
 	/* Last of all, set up some receive buffers. */
@@ -784,6 +841,9 @@ static int virtnet_probe(struct virtio_device *vdev)
 
 unregister:
 	unregister_netdev(dev);
+free_ctrl:
+	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ))
+		vdev->config->del_vq(vi->cvq);
 free_send:
 	vdev->config->del_vq(vi->svq);
 free_recv:
@@ -815,6 +875,8 @@ static void virtnet_remove(struct virtio_device *vdev)
 
 	vdev->config->del_vq(vi->svq);
 	vdev->config->del_vq(vi->rvq);
+	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ))
+		vdev->config->del_vq(vi->cvq);
 	unregister_netdev(vi->dev);
 
 	while (vi->pages)
@@ -834,7 +896,7 @@ static unsigned int features[] = {
 	VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6,
 	VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6,
 	VIRTIO_NET_F_GUEST_ECN, /* We don't yet handle UFO input. */
-	VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS,
+	VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ,
 	VIRTIO_F_NOTIFY_ON_EMPTY,
 };
 
diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
index d8e362d52fd8..245eda829aa8 100644
--- a/include/linux/virtio_net.h
+++ b/include/linux/virtio_net.h
@@ -23,6 +23,7 @@
 #define VIRTIO_NET_F_HOST_UFO	14	/* Host can handle UFO in. */
 #define VIRTIO_NET_F_MRG_RXBUF	15	/* Host can merge receive buffers. */
 #define VIRTIO_NET_F_STATUS	16	/* virtio_net_config.status available */
+#define VIRTIO_NET_F_CTRL_VQ	17	/* Control channel available */
 
 #define VIRTIO_NET_S_LINK_UP	1	/* Link is up */
 
@@ -59,4 +60,21 @@ struct virtio_net_hdr_mrg_rxbuf {
 	__u16 num_buffers;	/* Number of merged rx buffers */
 };
 
+/*
+ * Control virtqueue data structures
+ *
+ * The control virtqueue expects a header in the first sg entry
+ * and an ack/status response in the last entry.  Data for the
+ * command goes in between.
+ */
+struct virtio_net_ctrl_hdr {
+	__u8 class;
+	__u8 cmd;
+} __attribute__((packed));
+
+typedef __u8 virtio_net_ctrl_ack;
+
+#define VIRTIO_NET_OK     0
+#define VIRTIO_NET_ERR    1
+
 #endif /* _LINUX_VIRTIO_NET_H */
-- 
cgit v1.2.3


From 2af7698e2dd698d452ab9d63a9ca5956bbe8fc3b Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@hp.com>
Date: Wed, 4 Feb 2009 09:02:40 +0000
Subject: virtio_net: Add a set_rx_mode interface

Make use of the RX_MODE control virtqueue class to enable the
set_rx_mode netdev interface.  This allows us to selectively
enable/disable promiscuous and allmulti mode so we don't see
packets we don't want.  For now, we automatically enable these
as needed if additional unicast or multicast addresses are
requested.

Signed-off-by: Alex Williamson <alex.williamson@hp.com>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/virtio_net.c   | 34 +++++++++++++++++++++++++++++++++-
 include/linux/virtio_net.h | 11 +++++++++++
 2 files changed, 44 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 67bb583b7fc9..1abea9dc6f0f 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -37,7 +37,7 @@ module_param(gso, bool, 0444);
 #define MAX_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN)
 #define GOOD_COPY_LEN	128
 
-#define VIRTNET_SEND_COMMAND_SG_MAX    0
+#define VIRTNET_SEND_COMMAND_SG_MAX    1
 
 struct virtnet_info
 {
@@ -658,6 +658,36 @@ static int virtnet_set_tx_csum(struct net_device *dev, u32 data)
 	return ethtool_op_set_tx_hw_csum(dev, data);
 }
 
+static void virtnet_set_rx_mode(struct net_device *dev)
+{
+	struct virtnet_info *vi = netdev_priv(dev);
+	struct scatterlist sg;
+	u8 promisc, allmulti;
+
+	/* We can't dynamicaly set ndo_set_rx_mode, so return gracefully */
+	if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_RX))
+		return;
+
+	promisc = ((dev->flags & IFF_PROMISC) != 0 || dev->uc_count > 0);
+	allmulti = ((dev->flags & IFF_ALLMULTI) != 0 || dev->mc_count > 0);
+
+	sg_set_buf(&sg, &promisc, sizeof(promisc));
+
+	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX,
+				  VIRTIO_NET_CTRL_RX_PROMISC,
+				  &sg, 1, 0))
+		dev_warn(&dev->dev, "Failed to %sable promisc mode.\n",
+			 promisc ? "en" : "dis");
+
+	sg_set_buf(&sg, &allmulti, sizeof(allmulti));
+
+	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX,
+				  VIRTIO_NET_CTRL_RX_ALLMULTI,
+				  &sg, 1, 0))
+		dev_warn(&dev->dev, "Failed to %sable allmulti mode.\n",
+			 allmulti ? "en" : "dis");
+}
+
 static struct ethtool_ops virtnet_ethtool_ops = {
 	.set_tx_csum = virtnet_set_tx_csum,
 	.set_sg = ethtool_op_set_sg,
@@ -682,6 +712,7 @@ static const struct net_device_ops virtnet_netdev = {
 	.ndo_start_xmit      = start_xmit,
 	.ndo_validate_addr   = eth_validate_addr,
 	.ndo_set_mac_address = eth_mac_addr,
+	.ndo_set_rx_mode     = virtnet_set_rx_mode,
 	.ndo_change_mtu	     = virtnet_change_mtu,
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller = virtnet_netpoll,
@@ -897,6 +928,7 @@ static unsigned int features[] = {
 	VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6,
 	VIRTIO_NET_F_GUEST_ECN, /* We don't yet handle UFO input. */
 	VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ,
+	VIRTIO_NET_F_CTRL_RX,
 	VIRTIO_F_NOTIFY_ON_EMPTY,
 };
 
diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
index 245eda829aa8..63b2461a40f1 100644
--- a/include/linux/virtio_net.h
+++ b/include/linux/virtio_net.h
@@ -24,6 +24,7 @@
 #define VIRTIO_NET_F_MRG_RXBUF	15	/* Host can merge receive buffers. */
 #define VIRTIO_NET_F_STATUS	16	/* virtio_net_config.status available */
 #define VIRTIO_NET_F_CTRL_VQ	17	/* Control channel available */
+#define VIRTIO_NET_F_CTRL_RX	18	/* Control channel RX mode support */
 
 #define VIRTIO_NET_S_LINK_UP	1	/* Link is up */
 
@@ -77,4 +78,14 @@ typedef __u8 virtio_net_ctrl_ack;
 #define VIRTIO_NET_OK     0
 #define VIRTIO_NET_ERR    1
 
+/*
+ * Control the RX mode, ie. promisucous and allmulti.  PROMISC and
+ * ALLMULTI commands require an "out" sg entry containing a 1 byte
+ * state value, zero = disable, non-zero = enable.  These commands
+ * are supported with the VIRTIO_NET_F_CTRL_RX feature.
+ */
+#define VIRTIO_NET_CTRL_RX    0
+ #define VIRTIO_NET_CTRL_RX_PROMISC      0
+ #define VIRTIO_NET_CTRL_RX_ALLMULTI     1
+
 #endif /* _LINUX_VIRTIO_NET_H */
-- 
cgit v1.2.3


From f565a7c259d71cc186753653d978c646d2354b36 Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@hp.com>
Date: Wed, 4 Feb 2009 09:02:45 +0000
Subject: virtio_net: Add a MAC filter table

Make use of the MAC control virtqueue class to support a MAC
filter table.  The filter table is managed by the hypervisor.
We consider the table to be available if the CTRL_RX feature
bit is set.  We leave it to the hypervisor to manage the table
and enable promiscuous or all-multi mode as necessary depending
on the resources available to it.

Signed-off-by: Alex Williamson <alex.williamson@hp.com>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/virtio_net.c   | 55 +++++++++++++++++++++++++++++++++++++++-------
 include/linux/virtio_net.h | 23 +++++++++++++++++++
 2 files changed, 70 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 1abea9dc6f0f..daab9c9b0a40 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -37,7 +37,7 @@ module_param(gso, bool, 0444);
 #define MAX_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN)
 #define GOOD_COPY_LEN	128
 
-#define VIRTNET_SEND_COMMAND_SG_MAX    1
+#define VIRTNET_SEND_COMMAND_SG_MAX    2
 
 struct virtnet_info
 {
@@ -661,31 +661,70 @@ static int virtnet_set_tx_csum(struct net_device *dev, u32 data)
 static void virtnet_set_rx_mode(struct net_device *dev)
 {
 	struct virtnet_info *vi = netdev_priv(dev);
-	struct scatterlist sg;
+	struct scatterlist sg[2];
 	u8 promisc, allmulti;
+	struct virtio_net_ctrl_mac *mac_data;
+	struct dev_addr_list *addr;
+	void *buf;
+	int i;
 
 	/* We can't dynamicaly set ndo_set_rx_mode, so return gracefully */
 	if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_RX))
 		return;
 
-	promisc = ((dev->flags & IFF_PROMISC) != 0 || dev->uc_count > 0);
-	allmulti = ((dev->flags & IFF_ALLMULTI) != 0 || dev->mc_count > 0);
+	promisc = ((dev->flags & IFF_PROMISC) != 0);
+	allmulti = ((dev->flags & IFF_ALLMULTI) != 0);
 
-	sg_set_buf(&sg, &promisc, sizeof(promisc));
+	sg_set_buf(sg, &promisc, sizeof(promisc));
 
 	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX,
 				  VIRTIO_NET_CTRL_RX_PROMISC,
-				  &sg, 1, 0))
+				  sg, 1, 0))
 		dev_warn(&dev->dev, "Failed to %sable promisc mode.\n",
 			 promisc ? "en" : "dis");
 
-	sg_set_buf(&sg, &allmulti, sizeof(allmulti));
+	sg_set_buf(sg, &allmulti, sizeof(allmulti));
 
 	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX,
 				  VIRTIO_NET_CTRL_RX_ALLMULTI,
-				  &sg, 1, 0))
+				  sg, 1, 0))
 		dev_warn(&dev->dev, "Failed to %sable allmulti mode.\n",
 			 allmulti ? "en" : "dis");
+
+	/* MAC filter - use one buffer for both lists */
+	mac_data = buf = kzalloc(((dev->uc_count + dev->mc_count) * ETH_ALEN) +
+				 (2 * sizeof(mac_data->entries)), GFP_ATOMIC);
+	if (!buf) {
+		dev_warn(&dev->dev, "No memory for MAC address buffer\n");
+		return;
+	}
+
+	/* Store the unicast list and count in the front of the buffer */
+	mac_data->entries = dev->uc_count;
+	addr = dev->uc_list;
+	for (i = 0; i < dev->uc_count; i++, addr = addr->next)
+		memcpy(&mac_data->macs[i][0], addr->da_addr, ETH_ALEN);
+
+	sg_set_buf(&sg[0], mac_data,
+		   sizeof(mac_data->entries) + (dev->uc_count * ETH_ALEN));
+
+	/* multicast list and count fill the end */
+	mac_data = (void *)&mac_data->macs[dev->uc_count][0];
+
+	mac_data->entries = dev->mc_count;
+	addr = dev->mc_list;
+	for (i = 0; i < dev->mc_count; i++, addr = addr->next)
+		memcpy(&mac_data->macs[i][0], addr->da_addr, ETH_ALEN);
+
+	sg_set_buf(&sg[1], mac_data,
+		   sizeof(mac_data->entries) + (dev->mc_count * ETH_ALEN));
+
+	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC,
+				  VIRTIO_NET_CTRL_MAC_TABLE_SET,
+				  sg, 2, 0))
+		dev_warn(&dev->dev, "Failed to set MAC fitler table.\n");
+
+	kfree(buf);
 }
 
 static struct ethtool_ops virtnet_ethtool_ops = {
diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
index 63b2461a40f1..ba82b653cace 100644
--- a/include/linux/virtio_net.h
+++ b/include/linux/virtio_net.h
@@ -88,4 +88,27 @@ typedef __u8 virtio_net_ctrl_ack;
  #define VIRTIO_NET_CTRL_RX_PROMISC      0
  #define VIRTIO_NET_CTRL_RX_ALLMULTI     1
 
+/*
+ * Control the MAC filter table.
+ *
+ * The MAC filter table is managed by the hypervisor, the guest should
+ * assume the size is infinite.  Filtering should be considered
+ * non-perfect, ie. based on hypervisor resources, the guest may
+ * received packets from sources not specified in the filter list.
+ *
+ * In addition to the class/cmd header, the TABLE_SET command requires
+ * two out scatterlists.  Each contains a 4 byte count of entries followed
+ * by a concatenated byte stream of the ETH_ALEN MAC addresses.  The
+ * first sg list contains unicast addresses, the second is for multicast.
+ * This functionality is present if the VIRTIO_NET_F_CTRL_RX feature
+ * is available.
+ */
+struct virtio_net_ctrl_mac {
+	__u32 entries;
+	__u8 macs[][ETH_ALEN];
+} __attribute__((packed));
+
+#define VIRTIO_NET_CTRL_MAC    1
+ #define VIRTIO_NET_CTRL_MAC_TABLE_SET        0
+
 #endif /* _LINUX_VIRTIO_NET_H */
-- 
cgit v1.2.3


From 0bde95690d65653e420d04856c5d5783155c747c Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@hp.com>
Date: Wed, 4 Feb 2009 09:02:50 +0000
Subject: virtio_net: Add support for VLAN filtering in the hypervisor

VLAN filtering allows the hypervisor to drop packets from VLANs
that we're not a part of, further reducing the number of extraneous
packets recieved.  This makes use of the VLAN virtqueue command class.
The CTRL_VLAN feature bit tells us whether the backend supports VLAN
filtering.

Signed-off-by: Alex Williamson <alex.williamson@hp.com>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/virtio_net.c   | 31 ++++++++++++++++++++++++++++++-
 include/linux/virtio_net.h | 14 ++++++++++++++
 2 files changed, 44 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index daab9c9b0a40..e68813a246db 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -727,6 +727,30 @@ static void virtnet_set_rx_mode(struct net_device *dev)
 	kfree(buf);
 }
 
+static void virnet_vlan_rx_add_vid(struct net_device *dev, u16 vid)
+{
+	struct virtnet_info *vi = netdev_priv(dev);
+	struct scatterlist sg;
+
+	sg_set_buf(&sg, &vid, sizeof(vid));
+
+	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN,
+				  VIRTIO_NET_CTRL_VLAN_ADD, &sg, 1, 0))
+		dev_warn(&dev->dev, "Failed to add VLAN ID %d.\n", vid);
+}
+
+static void virnet_vlan_rx_kill_vid(struct net_device *dev, u16 vid)
+{
+	struct virtnet_info *vi = netdev_priv(dev);
+	struct scatterlist sg;
+
+	sg_set_buf(&sg, &vid, sizeof(vid));
+
+	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN,
+				  VIRTIO_NET_CTRL_VLAN_DEL, &sg, 1, 0))
+		dev_warn(&dev->dev, "Failed to kill VLAN ID %d.\n", vid);
+}
+
 static struct ethtool_ops virtnet_ethtool_ops = {
 	.set_tx_csum = virtnet_set_tx_csum,
 	.set_sg = ethtool_op_set_sg,
@@ -753,6 +777,8 @@ static const struct net_device_ops virtnet_netdev = {
 	.ndo_set_mac_address = eth_mac_addr,
 	.ndo_set_rx_mode     = virtnet_set_rx_mode,
 	.ndo_change_mtu	     = virtnet_change_mtu,
+	.ndo_vlan_rx_add_vid = virnet_vlan_rx_add_vid,
+	.ndo_vlan_rx_kill_vid = virnet_vlan_rx_kill_vid,
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller = virtnet_netpoll,
 #endif
@@ -877,6 +903,9 @@ static int virtnet_probe(struct virtio_device *vdev)
 			err = PTR_ERR(vi->svq);
 			goto free_send;
 		}
+
+		if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VLAN))
+			dev->features |= NETIF_F_HW_VLAN_FILTER;
 	}
 
 	/* Initialize our empty receive and send queues. */
@@ -967,7 +996,7 @@ static unsigned int features[] = {
 	VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6,
 	VIRTIO_NET_F_GUEST_ECN, /* We don't yet handle UFO input. */
 	VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ,
-	VIRTIO_NET_F_CTRL_RX,
+	VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN,
 	VIRTIO_F_NOTIFY_ON_EMPTY,
 };
 
diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
index ba82b653cace..242348bb3766 100644
--- a/include/linux/virtio_net.h
+++ b/include/linux/virtio_net.h
@@ -25,6 +25,7 @@
 #define VIRTIO_NET_F_STATUS	16	/* virtio_net_config.status available */
 #define VIRTIO_NET_F_CTRL_VQ	17	/* Control channel available */
 #define VIRTIO_NET_F_CTRL_RX	18	/* Control channel RX mode support */
+#define VIRTIO_NET_F_CTRL_VLAN	19	/* Control channel VLAN filtering */
 
 #define VIRTIO_NET_S_LINK_UP	1	/* Link is up */
 
@@ -111,4 +112,17 @@ struct virtio_net_ctrl_mac {
 #define VIRTIO_NET_CTRL_MAC    1
  #define VIRTIO_NET_CTRL_MAC_TABLE_SET        0
 
+/*
+ * Control VLAN filtering
+ *
+ * The VLAN filter table is controlled via a simple ADD/DEL interface.
+ * VLAN IDs not added may be filterd by the hypervisor.  Del is the
+ * opposite of add.  Both commands expect an out entry containing a 2
+ * byte VLAN ID.  VLAN filterting is available with the
+ * VIRTIO_NET_F_CTRL_VLAN feature bit.
+ */
+#define VIRTIO_NET_CTRL_VLAN       2
+ #define VIRTIO_NET_CTRL_VLAN_ADD             0
+ #define VIRTIO_NET_CTRL_VLAN_DEL             1
+
 #endif /* _LINUX_VIRTIO_NET_H */
-- 
cgit v1.2.3


From 33dccbb050bbe35b88ca8cf1228dcf3e4d4b3554 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 5 Feb 2009 21:25:32 -0800
Subject: tun: Limit amount of queued packets per device

Unlike a normal socket path, the tuntap device send path does
not have any accounting.  This means that the user-space sender
may be able to pin down arbitrary amounts of kernel memory by
continuing to send data to an end-point that is congested.

Even when this isn't an issue because of limited queueing at
most end points, this can also be a problem because its only
response to congestion is packet loss.  That is, when those
local queues at the end-point fills up, the tuntap device will
start wasting system time because it will continue to send
data there which simply gets dropped straight away.

Of course one could argue that everybody should do congestion
control end-to-end, unfortunately there are people in this world
still hooked on UDP, and they don't appear to be going away
anywhere fast.  In fact, we've always helped them by performing
accounting in our UDP code, the sole purpose of which is to
provide congestion feedback other than through packet loss.

This patch attempts to apply the same bandaid to the tuntap device.
It creates a pseudo-socket object which is used to account our
packets just as a normal socket does for UDP.  Of course things
are a little complex because we're actually reinjecting traffic
back into the stack rather than out of the stack.

The stack complexities however should have been resolved by preceding
patches.  So this one can simply start using skb_set_owner_w.

For now the accounting is essentially disabled by default for
backwards compatibility.  In particular, we set the cap to INT_MAX.
This is so that existing applications don't get confused by the
sudden arrival EAGAIN errors.

In future we may wish (or be forced to) do this by default.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/tun.c      | 167 +++++++++++++++++++++++++++++++++----------------
 fs/compat_ioctl.c      |   2 +
 include/linux/if_tun.h |   2 +
 3 files changed, 118 insertions(+), 53 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 15d67635bb10..0476549841ac 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -64,6 +64,7 @@
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
 #include <net/rtnetlink.h>
+#include <net/sock.h>
 
 #include <asm/system.h>
 #include <asm/uaccess.h>
@@ -95,6 +96,8 @@ struct tun_file {
 	wait_queue_head_t	read_wait;
 };
 
+struct tun_sock;
+
 struct tun_struct {
 	struct tun_file		*tfile;
 	unsigned int 		flags;
@@ -107,12 +110,24 @@ struct tun_struct {
 	struct fasync_struct	*fasync;
 
 	struct tap_filter       txflt;
+	struct sock		*sk;
+	struct socket		socket;
 
 #ifdef TUN_DEBUG
 	int debug;
 #endif
 };
 
+struct tun_sock {
+	struct sock		sk;
+	struct tun_struct	*tun;
+};
+
+static inline struct tun_sock *tun_sk(struct sock *sk)
+{
+	return container_of(sk, struct tun_sock, sk);
+}
+
 static int tun_attach(struct tun_struct *tun, struct file *file)
 {
 	struct tun_file *tfile = file->private_data;
@@ -461,7 +476,8 @@ static unsigned int tun_chr_poll(struct file *file, poll_table * wait)
 {
 	struct tun_file *tfile = file->private_data;
 	struct tun_struct *tun = __tun_get(tfile);
-	unsigned int mask = POLLOUT | POLLWRNORM;
+	struct sock *sk = tun->sk;
+	unsigned int mask = 0;
 
 	if (!tun)
 		return POLLERR;
@@ -473,6 +489,11 @@ static unsigned int tun_chr_poll(struct file *file, poll_table * wait)
 	if (!skb_queue_empty(&tun->readq))
 		mask |= POLLIN | POLLRDNORM;
 
+	if (sock_writeable(sk) ||
+	    (!test_and_set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags) &&
+	     sock_writeable(sk)))
+		mask |= POLLOUT | POLLWRNORM;
+
 	if (tun->dev->reg_state != NETREG_REGISTERED)
 		mask = POLLERR;
 
@@ -482,66 +503,35 @@ static unsigned int tun_chr_poll(struct file *file, poll_table * wait)
 
 /* prepad is the amount to reserve at front.  len is length after that.
  * linear is a hint as to how much to copy (usually headers). */
-static struct sk_buff *tun_alloc_skb(size_t prepad, size_t len, size_t linear,
-				     gfp_t gfp)
+static inline struct sk_buff *tun_alloc_skb(struct tun_struct *tun,
+					    size_t prepad, size_t len,
+					    size_t linear, int noblock)
 {
+	struct sock *sk = tun->sk;
 	struct sk_buff *skb;
-	unsigned int i;
-
-	skb = alloc_skb(prepad + len, gfp|__GFP_NOWARN);
-	if (skb) {
-		skb_reserve(skb, prepad);
-		skb_put(skb, len);
-		return skb;
-	}
+	int err;
 
 	/* Under a page?  Don't bother with paged skb. */
 	if (prepad + len < PAGE_SIZE)
-		return NULL;
+		linear = len;
 
-	/* Start with a normal skb, and add pages. */
-	skb = alloc_skb(prepad + linear, gfp);
+	skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock,
+				   &err);
 	if (!skb)
-		return NULL;
+		return ERR_PTR(err);
 
 	skb_reserve(skb, prepad);
 	skb_put(skb, linear);
-
-	len -= linear;
-
-	for (i = 0; i < MAX_SKB_FRAGS; i++) {
-		skb_frag_t *f = &skb_shinfo(skb)->frags[i];
-
-		f->page = alloc_page(gfp|__GFP_ZERO);
-		if (!f->page)
-			break;
-
-		f->page_offset = 0;
-		f->size = PAGE_SIZE;
-
-		skb->data_len += PAGE_SIZE;
-		skb->len += PAGE_SIZE;
-		skb->truesize += PAGE_SIZE;
-		skb_shinfo(skb)->nr_frags++;
-
-		if (len < PAGE_SIZE) {
-			len = 0;
-			break;
-		}
-		len -= PAGE_SIZE;
-	}
-
-	/* Too large, or alloc fail? */
-	if (unlikely(len)) {
-		kfree_skb(skb);
-		skb = NULL;
-	}
+	skb->data_len = len - linear;
+	skb->len += len - linear;
 
 	return skb;
 }
 
 /* Get packet from user space buffer */
-static __inline__ ssize_t tun_get_user(struct tun_struct *tun, struct iovec *iv, size_t count)
+static __inline__ ssize_t tun_get_user(struct tun_struct *tun,
+				       struct iovec *iv, size_t count,
+				       int noblock)
 {
 	struct tun_pi pi = { 0, cpu_to_be16(ETH_P_IP) };
 	struct sk_buff *skb;
@@ -573,9 +563,11 @@ static __inline__ ssize_t tun_get_user(struct tun_struct *tun, struct iovec *iv,
 			return -EINVAL;
 	}
 
-	if (!(skb = tun_alloc_skb(align, len, gso.hdr_len, GFP_KERNEL))) {
-		tun->dev->stats.rx_dropped++;
-		return -ENOMEM;
+	skb = tun_alloc_skb(tun, align, len, gso.hdr_len, noblock);
+	if (IS_ERR(skb)) {
+		if (PTR_ERR(skb) != -EAGAIN)
+			tun->dev->stats.rx_dropped++;
+		return PTR_ERR(skb);
 	}
 
 	if (skb_copy_datagram_from_iovec(skb, 0, iv, len)) {
@@ -661,7 +653,8 @@ static __inline__ ssize_t tun_get_user(struct tun_struct *tun, struct iovec *iv,
 static ssize_t tun_chr_aio_write(struct kiocb *iocb, const struct iovec *iv,
 			      unsigned long count, loff_t pos)
 {
-	struct tun_struct *tun = tun_get(iocb->ki_filp);
+	struct file *file = iocb->ki_filp;
+	struct tun_struct *tun = file->private_data;
 	ssize_t result;
 
 	if (!tun)
@@ -669,7 +662,8 @@ static ssize_t tun_chr_aio_write(struct kiocb *iocb, const struct iovec *iv,
 
 	DBG(KERN_INFO "%s: tun_chr_write %ld\n", tun->dev->name, count);
 
-	result = tun_get_user(tun, (struct iovec *) iv, iov_length(iv, count));
+	result = tun_get_user(tun, (struct iovec *)iv, iov_length(iv, count),
+			      file->f_flags & O_NONBLOCK);
 
 	tun_put(tun);
 	return result;
@@ -828,11 +822,40 @@ static struct rtnl_link_ops tun_link_ops __read_mostly = {
 	.validate	= tun_validate,
 };
 
+static void tun_sock_write_space(struct sock *sk)
+{
+	struct tun_struct *tun;
+
+	if (!sock_writeable(sk))
+		return;
+
+	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
+		wake_up_interruptible_sync(sk->sk_sleep);
+
+	if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags))
+		return;
+
+	tun = container_of(sk, struct tun_sock, sk)->tun;
+	kill_fasync(&tun->fasync, SIGIO, POLL_OUT);
+}
+
+static void tun_sock_destruct(struct sock *sk)
+{
+	dev_put(container_of(sk, struct tun_sock, sk)->tun->dev);
+}
+
+static struct proto tun_proto = {
+	.name		= "tun",
+	.owner		= THIS_MODULE,
+	.obj_size	= sizeof(struct tun_sock),
+};
 
 static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
 {
+	struct sock *sk;
 	struct tun_struct *tun;
 	struct net_device *dev;
+	struct tun_file *tfile = file->private_data;
 	int err;
 
 	dev = __dev_get_by_name(net, ifr->ifr_name);
@@ -885,14 +908,31 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
 		tun->flags = flags;
 		tun->txflt.count = 0;
 
+		err = -ENOMEM;
+		sk = sk_alloc(net, AF_UNSPEC, GFP_KERNEL, &tun_proto);
+		if (!sk)
+			goto err_free_dev;
+
+		/* This ref count is for tun->sk. */
+		dev_hold(dev);
+		sock_init_data(&tun->socket, sk);
+		sk->sk_write_space = tun_sock_write_space;
+		sk->sk_destruct = tun_sock_destruct;
+		sk->sk_sndbuf = INT_MAX;
+		sk->sk_sleep = &tfile->read_wait;
+
+		tun->sk = sk;
+		container_of(sk, struct tun_sock, sk)->tun = tun;
+
 		tun_net_init(dev);
 
 		if (strchr(dev->name, '%')) {
 			err = dev_alloc_name(dev, dev->name);
 			if (err < 0)
-				goto err_free_dev;
+				goto err_free_sk;
 		}
 
+		err = -EINVAL;
 		err = register_netdevice(tun->dev);
 		if (err < 0)
 			goto err_free_dev;
@@ -928,6 +968,8 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
 	strcpy(ifr->ifr_name, tun->dev->name);
 	return 0;
 
+ err_free_sk:
+	sock_put(sk);
  err_free_dev:
 	free_netdev(dev);
  failed:
@@ -1012,6 +1054,7 @@ static int tun_chr_ioctl(struct inode *inode, struct file *file,
 	struct tun_struct *tun;
 	void __user* argp = (void __user*)arg;
 	struct ifreq ifr;
+	int sndbuf;
 	int ret;
 
 	if (cmd == TUNSETIFF || _IOC_TYPE(cmd) == 0x89)
@@ -1151,6 +1194,22 @@ static int tun_chr_ioctl(struct inode *inode, struct file *file,
 		ret = dev_set_mac_address(tun->dev, &ifr.ifr_hwaddr);
 		rtnl_unlock();
 		break;
+
+	case TUNGETSNDBUF:
+		sndbuf = tun->sk->sk_sndbuf;
+		if (copy_to_user(argp, &sndbuf, sizeof(sndbuf)))
+			ret = -EFAULT;
+		break;
+
+	case TUNSETSNDBUF:
+		if (copy_from_user(&sndbuf, argp, sizeof(sndbuf))) {
+			ret = -EFAULT;
+			break;
+		}
+
+		tun->sk->sk_sndbuf = sndbuf;
+		break;
+
 	default:
 		ret = -EINVAL;
 		break;
@@ -1218,8 +1277,10 @@ static int tun_chr_close(struct inode *inode, struct file *file)
 		__tun_detach(tun);
 
 		/* If desireable, unregister the netdevice. */
-		if (!(tun->flags & TUN_PERSIST))
+		if (!(tun->flags & TUN_PERSIST)) {
+			sock_put(tun->sk);
 			unregister_netdevice(tun->dev);
+		}
 
 		rtnl_unlock();
 	}
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index c8f8d5904f5e..c03c10d7fb6b 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -1988,6 +1988,8 @@ COMPATIBLE_IOCTL(TUNSETGROUP)
 COMPATIBLE_IOCTL(TUNGETFEATURES)
 COMPATIBLE_IOCTL(TUNSETOFFLOAD)
 COMPATIBLE_IOCTL(TUNSETTXFILTER)
+COMPATIBLE_IOCTL(TUNGETSNDBUF)
+COMPATIBLE_IOCTL(TUNSETSNDBUF)
 /* Big V */
 COMPATIBLE_IOCTL(VT_SETMODE)
 COMPATIBLE_IOCTL(VT_GETMODE)
diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h
index 8529f57ba263..049d6c9428db 100644
--- a/include/linux/if_tun.h
+++ b/include/linux/if_tun.h
@@ -46,6 +46,8 @@
 #define TUNSETOFFLOAD  _IOW('T', 208, unsigned int)
 #define TUNSETTXFILTER _IOW('T', 209, unsigned int)
 #define TUNGETIFF      _IOR('T', 210, unsigned int)
+#define TUNGETSNDBUF   _IOR('T', 211, int)
+#define TUNSETSNDBUF   _IOW('T', 212, int)
 
 /* TUNSETIFF ifr flags */
 #define IFF_TUN		0x0001
-- 
cgit v1.2.3


From fe2918b098cdbf55b69ba8762bd3de0ae64f33ff Mon Sep 17 00:00:00 2001
From: Graf Yang <graf.yang@analog.com>
Date: Thu, 5 Feb 2009 21:26:19 -0800
Subject: net: fix some trailing whitespaces

Signed-off-by: Graf Yang <graf.yang@analog.com>
Signed-off-by: Bryan Wu <cooloney@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_ether.h  |  8 ++++----
 include/linux/netdevice.h | 10 +++++-----
 2 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h
index 7f3c735f422b..0216e1bdbc56 100644
--- a/include/linux/if_ether.h
+++ b/include/linux/if_ether.h
@@ -17,7 +17,7 @@
  *		as published by the Free Software Foundation; either version
  *		2 of the License, or (at your option) any later version.
  */
- 
+
 #ifndef _LINUX_IF_ETHER_H
 #define _LINUX_IF_ETHER_H
 
@@ -25,7 +25,7 @@
 
 /*
  *	IEEE 802.3 Ethernet magic constants.  The frame sizes omit the preamble
- *	and FCS/CRC (frame check sequence). 
+ *	and FCS/CRC (frame check sequence).
  */
 
 #define ETH_ALEN	6		/* Octets in one ethernet addr	 */
@@ -83,7 +83,7 @@
 /*
  *	Non DIX types. Won't clash for 1500 types.
  */
- 
+
 #define ETH_P_802_3	0x0001		/* Dummy type for 802.3 frames  */
 #define ETH_P_AX25	0x0002		/* Dummy protocol id for AX.25  */
 #define ETH_P_ALL	0x0003		/* Every packet (be careful!!!) */
@@ -109,7 +109,7 @@
 /*
  *	This is an Ethernet frame header.
  */
- 
+
 struct ethhdr {
 	unsigned char	h_dest[ETH_ALEN];	/* destination eth addr	*/
 	unsigned char	h_source[ETH_ALEN];	/* source ether addr	*/
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 7a5057fbb7cd..864519e585fc 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -96,7 +96,7 @@ struct wireless_dev;
  *	Compute the worst case header length according to the protocols
  *	used.
  */
- 
+
 #if defined(CONFIG_WLAN_80211) || defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE)
 # if defined(CONFIG_MAC80211_MESH)
 #  define LL_MAX_HEADER 128
@@ -124,7 +124,7 @@ struct wireless_dev;
  *	Network device statistics. Akin to the 2.0 ether stats but
  *	with byte counters.
  */
- 
+
 struct net_device_stats
 {
 	unsigned long	rx_packets;		/* total packets received	*/
@@ -285,7 +285,7 @@ enum netdev_state_t
 
 /*
  * This structure holds at boot time configured netdevice settings. They
- * are then used in the device probing. 
+ * are then used in the device probing.
  */
 struct netdev_boot_setup {
 	char name[IFNAMSIZ];
@@ -740,7 +740,7 @@ struct net_device
 	void			*dsa_ptr;	/* dsa specific data */
 #endif
 	void 			*atalk_ptr;	/* AppleTalk link 	*/
-	void			*ip_ptr;	/* IPv4 specific data	*/  
+	void			*ip_ptr;	/* IPv4 specific data	*/
 	void                    *dn_ptr;        /* DECnet specific data */
 	void                    *ip6_ptr;       /* IPv6 specific data */
 	void			*ec_ptr;	/* Econet specific data	*/
@@ -753,7 +753,7 @@ struct net_device
  */
 	unsigned long		last_rx;	/* Time of last Rx	*/
 	/* Interface address info used in eth_type_trans() */
-	unsigned char		dev_addr[MAX_ADDR_LEN];	/* hw address, (before bcast 
+	unsigned char		dev_addr[MAX_ADDR_LEN];	/* hw address, (before bcast
 							   because most packets are unicast) */
 
 	unsigned char		broadcast[MAX_ADDR_LEN];	/* hw bcast add	*/
-- 
cgit v1.2.3


From 527bdfee18ac6a4c026060c2c2b1144df9a5bf1f Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Date: Fri, 6 Feb 2009 20:47:58 +0530
Subject: make linux/types.h as assembly safe

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
---
 include/linux/types.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/types.h b/include/linux/types.h
index 712ca53bc348..c30973ace890 100644
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -1,6 +1,7 @@
 #ifndef _LINUX_TYPES_H
 #define _LINUX_TYPES_H
 
+#ifndef __ASSEMBLY__
 #ifdef	__KERNEL__
 
 #define DECLARE_BITMAP(name,bits) \
@@ -212,5 +213,5 @@ struct ustat {
 };
 
 #endif	/* __KERNEL__ */
-
+#endif /*  __ASSEMBLY__ */
 #endif /* _LINUX_TYPES_H */
-- 
cgit v1.2.3


From d6301d3dd1c287b32132dda15272a50c11e92a14 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sun, 8 Feb 2009 19:24:13 -0800
Subject: net: Increase default NET_SKB_PAD to 32.

Several devices need to insert some "pre headers" in front of the
main packet data when they transmit a packet.

Currently we allocate only 16 bytes of pad room and this ends up not
being enough for some types of hardware (NIU, usb-net, s390 qeth,
etc.)

So increase this to 32.

Note that drivers still need to check in their transmit routine
whether enough headroom exists, and if not use skb_realloc_headroom().
Tunneling, IPSEC, and other encapsulation methods can cause the
padding area to be used up.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 08670d017479..5eba4007e07f 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1287,7 +1287,7 @@ static inline int skb_network_offset(const struct sk_buff *skb)
  * The networking layer reserves some headroom in skb data (via
  * dev_alloc_skb). This is used to avoid having to reallocate skb data when
  * the header has to grow. In the default case, if the header has to grow
- * 16 bytes or less we avoid the reallocation.
+ * 32 bytes or less we avoid the reallocation.
  *
  * Unfortunately this headroom changes the DMA alignment of the resulting
  * network packet. As for NET_IP_ALIGN, this unaligned DMA is expensive
@@ -1295,11 +1295,11 @@ static inline int skb_network_offset(const struct sk_buff *skb)
  * perhaps setting it to a cacheline in size (since that will maintain
  * cacheline alignment of the DMA). It must be a power of 2.
  *
- * Various parts of the networking layer expect at least 16 bytes of
+ * Various parts of the networking layer expect at least 32 bytes of
  * headroom, you should not reduce this.
  */
 #ifndef NET_SKB_PAD
-#define NET_SKB_PAD	16
+#define NET_SKB_PAD	32
 #endif
 
 extern int ___pskb_trim(struct sk_buff *skb, unsigned int len);
-- 
cgit v1.2.3


From 4ae5544f9a33e4ae306e337f96951eb3ff2df6d9 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 8 Feb 2009 18:00:36 +0000
Subject: gro: Remember number of held packets instead of counting every time

This patch prepares for the move of the same_flow checks out of
dev_gro_receive.  As such we need to remember the number of held
packets since doing a loop just to count them every time is silly.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  3 +++
 net/core/dev.c            | 12 +++++++-----
 2 files changed, 10 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 864519e585fc..9ee344bc6c13 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -314,6 +314,9 @@ struct napi_struct {
 	spinlock_t		poll_lock;
 	int			poll_owner;
 #endif
+
+	unsigned int		gro_count;
+
 	struct net_device	*dev;
 	struct list_head	dev_list;
 	struct sk_buff		*gro_list;
diff --git a/net/core/dev.c b/net/core/dev.c
index 709a9a922258..ae0b66936abe 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2372,6 +2372,7 @@ void napi_gro_flush(struct napi_struct *napi)
 		napi_gro_complete(skb);
 	}
 
+	napi->gro_count = 0;
 	napi->gro_list = NULL;
 }
 EXPORT_SYMBOL(napi_gro_flush);
@@ -2402,7 +2403,6 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 	struct packet_type *ptype;
 	__be16 type = skb->protocol;
 	struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
-	int count = 0;
 	int same_flow;
 	int mac_len;
 	int ret;
@@ -2430,8 +2430,6 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 		NAPI_GRO_CB(skb)->free = 0;
 
 		for (p = napi->gro_list; p; p = p->next) {
-			count++;
-
 			if (!NAPI_GRO_CB(p)->same_flow)
 				continue;
 
@@ -2457,15 +2455,16 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 		*pp = nskb->next;
 		nskb->next = NULL;
 		napi_gro_complete(nskb);
-		count--;
+		napi->gro_count--;
 	}
 
 	if (same_flow)
 		goto ok;
 
-	if (NAPI_GRO_CB(skb)->flush || count >= MAX_GRO_SKBS)
+	if (NAPI_GRO_CB(skb)->flush || napi->gro_count >= MAX_GRO_SKBS)
 		goto normal;
 
+	napi->gro_count++;
 	NAPI_GRO_CB(skb)->count = 1;
 	skb_shinfo(skb)->gso_size = skb_gro_len(skb);
 	skb->next = napi->gro_list;
@@ -2713,6 +2712,7 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
 		    int (*poll)(struct napi_struct *, int), int weight)
 {
 	INIT_LIST_HEAD(&napi->poll_list);
+	napi->gro_count = 0;
 	napi->gro_list = NULL;
 	napi->skb = NULL;
 	napi->poll = poll;
@@ -2741,6 +2741,7 @@ void netif_napi_del(struct napi_struct *napi)
 	}
 
 	napi->gro_list = NULL;
+	napi->gro_count = 0;
 }
 EXPORT_SYMBOL(netif_napi_del);
 
@@ -5246,6 +5247,7 @@ static int __init net_dev_init(void)
 		queue->backlog.poll = process_backlog;
 		queue->backlog.weight = weight_p;
 		queue->backlog.gro_list = NULL;
+		queue->backlog.gro_count = 0;
 	}
 
 	dev_boot_phase = 0;
-- 
cgit v1.2.3


From aa4b9f533ed5a22952e038b9fac2447ccc682124 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 8 Feb 2009 18:00:37 +0000
Subject: gro: Optimise Ethernet header comparison

This patch optimises the Ethernet header comparison to use 2-byte
and 4-byte xors instead of memcmp.  In order to facilitate this,
the actual comparison is now carried out by the callers of the
shared dev_gro_receive function.

This has a significant impact when receiving 1500B packets through
10GbE.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/etherdevice.h | 21 +++++++++++++++++++++
 include/linux/netdevice.h   |  7 +++++++
 net/8021q/vlan_core.c       |  4 +++-
 net/core/dev.c              | 23 ++---------------------
 4 files changed, 33 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index 1cb0f0b90926..a1f17abba7dc 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -184,4 +184,25 @@ static inline unsigned compare_ether_addr_64bits(const u8 addr1[6+2],
 }
 #endif	/* __KERNEL__ */
 
+/**
+ * compare_ether_header - Compare two Ethernet headers
+ * @a: Pointer to Ethernet header
+ * @b: Pointer to Ethernet header
+ *
+ * Compare two ethernet headers, returns 0 if equal.
+ * This assumes that the network header (i.e., IP header) is 4-byte
+ * aligned OR the platform can handle unaligned access.  This is the
+ * case for all packets coming into netif_receive_skb or similar
+ * entry points.
+ */
+
+static inline int compare_ether_header(const void *a, const void *b)
+{
+	u32 *a32 = (u32 *)((u8 *)a + 2);
+	u32 *b32 = (u32 *)((u8 *)b + 2);
+
+	return (*(u16 *)a ^ *(u16 *)b) | (a32[0] ^ b32[0]) |
+	       (a32[1] ^ b32[1]) | (a32[2] ^ b32[2]);
+}
+
 #endif	/* _LINUX_ETHERDEVICE_H */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 9ee344bc6c13..355662aac940 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1117,6 +1117,13 @@ static inline void skb_gro_reset_offset(struct sk_buff *skb)
 	NAPI_GRO_CB(skb)->data_offset = 0;
 }
 
+static inline void *skb_gro_mac_header(struct sk_buff *skb)
+{
+	return skb_mac_header(skb) < skb->data ? skb_mac_header(skb) :
+	       page_address(skb_shinfo(skb)->frags[0].page) +
+	       skb_shinfo(skb)->frags[0].page_offset;
+}
+
 static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev,
 				  unsigned short type,
 				  const void *daddr, const void *saddr,
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 378fa69d625a..70435af153f2 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -85,7 +85,9 @@ static int vlan_gro_common(struct napi_struct *napi, struct vlan_group *grp,
 		goto drop;
 
 	for (p = napi->gro_list; p; p = p->next) {
-		NAPI_GRO_CB(p)->same_flow = p->dev == skb->dev;
+		NAPI_GRO_CB(p)->same_flow =
+			p->dev == skb->dev && !compare_ether_header(
+				skb_mac_header(p), skb_gro_mac_header(skb));
 		NAPI_GRO_CB(p)->flush = 0;
 	}
 
diff --git a/net/core/dev.c b/net/core/dev.c
index ae0b66936abe..1e27a67df242 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -215,13 +215,6 @@ static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
 	return &net->dev_index_head[ifindex & ((1 << NETDEV_HASHBITS) - 1)];
 }
 
-static inline void *skb_gro_mac_header(struct sk_buff *skb)
-{
-	return skb_mac_header(skb) < skb->data ? skb_mac_header(skb) :
-	       page_address(skb_shinfo(skb)->frags[0].page) +
-	       skb_shinfo(skb)->frags[0].page_offset;
-}
-
 /* Device list insertion */
 static int list_netdevice(struct net_device *dev)
 {
@@ -2415,29 +2408,16 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(ptype, head, list) {
-		struct sk_buff *p;
-		void *mac;
-
 		if (ptype->type != type || ptype->dev || !ptype->gro_receive)
 			continue;
 
 		skb_set_network_header(skb, skb_gro_offset(skb));
-		mac = skb_gro_mac_header(skb);
 		mac_len = skb->network_header - skb->mac_header;
 		skb->mac_len = mac_len;
 		NAPI_GRO_CB(skb)->same_flow = 0;
 		NAPI_GRO_CB(skb)->flush = 0;
 		NAPI_GRO_CB(skb)->free = 0;
 
-		for (p = napi->gro_list; p; p = p->next) {
-			if (!NAPI_GRO_CB(p)->same_flow)
-				continue;
-
-			if (p->mac_len != mac_len ||
-			    memcmp(skb_mac_header(p), mac, mac_len))
-				NAPI_GRO_CB(p)->same_flow = 0;
-		}
-
 		pp = ptype->gro_receive(&napi->gro_list, skb);
 		break;
 	}
@@ -2492,7 +2472,8 @@ static int __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 	struct sk_buff *p;
 
 	for (p = napi->gro_list; p; p = p->next) {
-		NAPI_GRO_CB(p)->same_flow = 1;
+		NAPI_GRO_CB(p)->same_flow = !compare_ether_header(
+			skb_mac_header(p), skb_gro_mac_header(skb));
 		NAPI_GRO_CB(p)->flush = 0;
 	}
 
-- 
cgit v1.2.3


From f130347c2dd8e7ce0757cd3cf80bedbc6ed63c4c Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <lrodriguez@atheros.com>
Date: Fri, 30 Jan 2009 09:26:42 -0800
Subject: cfg80211: add get reg command

This lets userspace request to get the currently set
regulatory domain.

Signed-off-by: Luis R. Rodriguez <lrodriguez@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h |  4 +++
 net/wireless/nl80211.c  | 81 +++++++++++++++++++++++++++++++++++++++++++++++++
 net/wireless/reg.c      |  2 +-
 net/wireless/reg.h      |  2 ++
 4 files changed, 88 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 76aae3d8e97e..4bc27049f4e5 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -113,6 +113,8 @@
  * @NL80211_CMD_SET_BSS: Set BSS attributes for BSS identified by
  *	%NL80211_ATTR_IFINDEX.
  *
+ * @NL80211_CMD_GET_REG: ask the wireless core to send us its currently set
+ * 	regulatory domain.
  * @NL80211_CMD_SET_REG: Set current regulatory domain. CRDA sends this command
  *	after being queried by the kernel. CRDA replies by sending a regulatory
  *	domain structure which consists of %NL80211_ATTR_REG_ALPHA set to our
@@ -188,6 +190,8 @@ enum nl80211_commands {
 
 	NL80211_CMD_SET_MGMT_EXTRA_IE,
 
+	NL80211_CMD_GET_REG,
+
 	/* add new commands above here */
 
 	/* used to define NL80211_CMD_MAX below */
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index e69da8d20474..d452396006ee 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -2093,6 +2093,81 @@ static int nl80211_set_mesh_params(struct sk_buff *skb, struct genl_info *info)
 
 #undef FILL_IN_MESH_PARAM_IF_SET
 
+static int nl80211_get_reg(struct sk_buff *skb, struct genl_info *info)
+{
+	struct sk_buff *msg;
+	void *hdr = NULL;
+	struct nlattr *nl_reg_rules;
+	unsigned int i;
+	int err = -EINVAL;
+
+	mutex_lock(&cfg80211_drv_mutex);
+
+	if (!cfg80211_regdomain)
+		goto out;
+
+	msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!msg) {
+		err = -ENOBUFS;
+		goto out;
+	}
+
+	hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0,
+			     NL80211_CMD_GET_REG);
+	if (!hdr)
+		goto nla_put_failure;
+
+	NLA_PUT_STRING(msg, NL80211_ATTR_REG_ALPHA2,
+		cfg80211_regdomain->alpha2);
+
+	nl_reg_rules = nla_nest_start(msg, NL80211_ATTR_REG_RULES);
+	if (!nl_reg_rules)
+		goto nla_put_failure;
+
+	for (i = 0; i < cfg80211_regdomain->n_reg_rules; i++) {
+		struct nlattr *nl_reg_rule;
+		const struct ieee80211_reg_rule *reg_rule;
+		const struct ieee80211_freq_range *freq_range;
+		const struct ieee80211_power_rule *power_rule;
+
+		reg_rule = &cfg80211_regdomain->reg_rules[i];
+		freq_range = &reg_rule->freq_range;
+		power_rule = &reg_rule->power_rule;
+
+		nl_reg_rule = nla_nest_start(msg, i);
+		if (!nl_reg_rule)
+			goto nla_put_failure;
+
+		NLA_PUT_U32(msg, NL80211_ATTR_REG_RULE_FLAGS,
+			reg_rule->flags);
+		NLA_PUT_U32(msg, NL80211_ATTR_FREQ_RANGE_START,
+			freq_range->start_freq_khz);
+		NLA_PUT_U32(msg, NL80211_ATTR_FREQ_RANGE_END,
+			freq_range->end_freq_khz);
+		NLA_PUT_U32(msg, NL80211_ATTR_FREQ_RANGE_MAX_BW,
+			freq_range->max_bandwidth_khz);
+		NLA_PUT_U32(msg, NL80211_ATTR_POWER_RULE_MAX_ANT_GAIN,
+			power_rule->max_antenna_gain);
+		NLA_PUT_U32(msg, NL80211_ATTR_POWER_RULE_MAX_EIRP,
+			power_rule->max_eirp);
+
+		nla_nest_end(msg, nl_reg_rule);
+	}
+
+	nla_nest_end(msg, nl_reg_rules);
+
+	genlmsg_end(msg, hdr);
+	err = genlmsg_unicast(msg, info->snd_pid);
+	goto out;
+
+nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	err = -EMSGSIZE;
+out:
+	mutex_unlock(&cfg80211_drv_mutex);
+	return err;
+}
+
 static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info)
 {
 	struct nlattr *tb[NL80211_REG_RULE_ATTR_MAX + 1];
@@ -2332,6 +2407,12 @@ static struct genl_ops nl80211_ops[] = {
 		.policy = nl80211_policy,
 		.flags = GENL_ADMIN_PERM,
 	},
+	{
+		.cmd = NL80211_CMD_GET_REG,
+		.doit = nl80211_get_reg,
+		.policy = nl80211_policy,
+		/* can be retrieved by unprivileged users */
+	},
 	{
 		.cmd = NL80211_CMD_SET_REG,
 		.doit = nl80211_set_reg,
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index f643d3981102..2323644330cd 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -57,7 +57,7 @@ static u32 supported_bandwidths[] = {
 /* Central wireless core regulatory domains, we only need two,
  * the current one and a world regulatory domain in case we have no
  * information to give us an alpha2 */
-static const struct ieee80211_regdomain *cfg80211_regdomain;
+const struct ieee80211_regdomain *cfg80211_regdomain;
 
 /* We use this as a place for the rd structure built from the
  * last parsed country IE to rest until CRDA gets back to us with
diff --git a/net/wireless/reg.h b/net/wireless/reg.h
index eb1dd5bc9b27..fe8c83f34fb7 100644
--- a/net/wireless/reg.h
+++ b/net/wireless/reg.h
@@ -1,6 +1,8 @@
 #ifndef __NET_WIRELESS_REG_H
 #define __NET_WIRELESS_REG_H
 
+extern const struct ieee80211_regdomain *cfg80211_regdomain;
+
 bool is_world_regdom(const char *alpha2);
 bool reg_is_valid_request(const char *alpha2);
 
-- 
cgit v1.2.3


From 0c2bec96945ccfc4a58a88d73531e392972ba6c5 Mon Sep 17 00:00:00 2001
From: Mike Rapoport <mike@compulab.co.il>
Date: Tue, 3 Feb 2009 09:04:20 +0200
Subject: libertas: if_spi: add ability to call board specific setup/teardown
 methods

In certain cases it is required to perform board specific actions
before activating libertas G-SPI interface. These actions may include
power up of the chip, GPIOs setup, proper pin-strapping and SPI
controller config.
This patch adds ability to call board specific setup/teardown methods

Signed-off-by: Mike Rapoport <mike@compulab.co.il>
Acked-by: Andrey Yurovsky <andrey@cozybit.com>
Acked-by: Dan Williams <dcbw@redhat.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/libertas/if_spi.c | 15 +++++++++++++++
 include/linux/spi/libertas_spi.h       |  7 +++++++
 2 files changed, 22 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/wireless/libertas/if_spi.c b/drivers/net/wireless/libertas/if_spi.c
index 7c02ea314fd1..07311e71af92 100644
--- a/drivers/net/wireless/libertas/if_spi.c
+++ b/drivers/net/wireless/libertas/if_spi.c
@@ -42,6 +42,7 @@ struct if_spi_packet {
 struct if_spi_card {
 	struct spi_device		*spi;
 	struct lbs_private		*priv;
+	struct libertas_spi_platform_data *pdata;
 
 	char				helper_fw_name[FIRMWARE_NAME_MAX];
 	char				main_fw_name[FIRMWARE_NAME_MAX];
@@ -1022,6 +1023,17 @@ static int __devinit if_spi_probe(struct spi_device *spi)
 
 	lbs_deb_enter(LBS_DEB_SPI);
 
+	if (!pdata) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	if (pdata->setup) {
+		err = pdata->setup(spi);
+		if (err)
+			goto out;
+	}
+
 	/* Allocate card structure to represent this specific device */
 	card = kzalloc(sizeof(struct if_spi_card), GFP_KERNEL);
 	if (!card) {
@@ -1029,6 +1041,7 @@ static int __devinit if_spi_probe(struct spi_device *spi)
 		goto out;
 	}
 	spi_set_drvdata(spi, card);
+	card->pdata = pdata;
 	card->spi = spi;
 	card->gpio_cs = pdata->gpio_cs;
 	card->prev_xfer_time = jiffies;
@@ -1158,6 +1171,8 @@ static int __devexit libertas_spi_remove(struct spi_device *spi)
 	if_spi_terminate_spi_thread(card);
 	lbs_remove_card(priv); /* will call free_netdev */
 	gpio_free(card->gpio_cs);
+	if (card->pdata->teardown)
+		card->pdata->teardown(spi);
 	free_if_spi_card(card);
 	lbs_deb_leave(LBS_DEB_SPI);
 	return 0;
diff --git a/include/linux/spi/libertas_spi.h b/include/linux/spi/libertas_spi.h
index ada71b4f3788..79506f5f9e67 100644
--- a/include/linux/spi/libertas_spi.h
+++ b/include/linux/spi/libertas_spi.h
@@ -10,6 +10,9 @@
  */
 #ifndef _LIBERTAS_SPI_H_
 #define _LIBERTAS_SPI_H_
+
+struct spi_device;
+
 struct libertas_spi_platform_data {
 	/* There are two ways to read data from the WLAN module's SPI
 	 * interface. Setting 0 or 1 here controls which one is used.
@@ -21,5 +24,9 @@ struct libertas_spi_platform_data {
 
 	/* GPIO number to use as chip select */
 	u16 gpio_cs;
+
+	/* Board specific setup/teardown */
+	int (*setup)(struct spi_device *spi);
+	int (*teardown)(struct spi_device *spi);
 };
 #endif
-- 
cgit v1.2.3


From c9703146158c0415a60799570397e488bc982af5 Mon Sep 17 00:00:00 2001
From: Michael Buesch <mb@bu3sch.de>
Date: Tue, 3 Feb 2009 19:23:18 +0100
Subject: ssb: Add PMU support

This adds support for the SSB PMU.
A PMU is found on Low-Power devices.

Signed-off-by: Michael Buesch <mb@bu3sch.de>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/ssb/Makefile                      |   1 +
 drivers/ssb/driver_chipcommon.c           |  14 +-
 drivers/ssb/driver_chipcommon_pmu.c       | 508 ++++++++++++++++++++++++++++++
 include/linux/ssb/ssb_driver_chipcommon.h | 224 +++++++++++++
 4 files changed, 734 insertions(+), 13 deletions(-)
 create mode 100644 drivers/ssb/driver_chipcommon_pmu.c

(limited to 'include/linux')

diff --git a/drivers/ssb/Makefile b/drivers/ssb/Makefile
index 6f255e9c5af9..cfbb74f2982e 100644
--- a/drivers/ssb/Makefile
+++ b/drivers/ssb/Makefile
@@ -9,6 +9,7 @@ ssb-$(CONFIG_SSB_PCMCIAHOST)		+= pcmcia.o
 
 # built-in drivers
 ssb-y					+= driver_chipcommon.o
+ssb-y					+= driver_chipcommon_pmu.o
 ssb-$(CONFIG_SSB_DRIVER_MIPS)		+= driver_mipscore.o
 ssb-$(CONFIG_SSB_DRIVER_EXTIF)		+= driver_extif.o
 ssb-$(CONFIG_SSB_DRIVER_PCICORE)	+= driver_pcicore.o
diff --git a/drivers/ssb/driver_chipcommon.c b/drivers/ssb/driver_chipcommon.c
index 571f4fd55236..9681536163ca 100644
--- a/drivers/ssb/driver_chipcommon.c
+++ b/drivers/ssb/driver_chipcommon.c
@@ -26,19 +26,6 @@ enum ssb_clksrc {
 };
 
 
-static inline u32 chipco_read32(struct ssb_chipcommon *cc,
-				u16 offset)
-{
-	return ssb_read32(cc->dev, offset);
-}
-
-static inline void chipco_write32(struct ssb_chipcommon *cc,
-				  u16 offset,
-				  u32 value)
-{
-	ssb_write32(cc->dev, offset, value);
-}
-
 static inline u32 chipco_write32_masked(struct ssb_chipcommon *cc, u16 offset,
 					u32 mask, u32 value)
 {
@@ -246,6 +233,7 @@ void ssb_chipcommon_init(struct ssb_chipcommon *cc)
 {
 	if (!cc->dev)
 		return; /* We don't have a ChipCommon */
+	ssb_pmu_init(cc);
 	chipco_powercontrol_init(cc);
 	ssb_chipco_set_clockmode(cc, SSB_CLKMODE_FAST);
 	calc_fast_powerup_delay(cc);
diff --git a/drivers/ssb/driver_chipcommon_pmu.c b/drivers/ssb/driver_chipcommon_pmu.c
new file mode 100644
index 000000000000..4aaddeec55a2
--- /dev/null
+++ b/drivers/ssb/driver_chipcommon_pmu.c
@@ -0,0 +1,508 @@
+/*
+ * Sonics Silicon Backplane
+ * Broadcom ChipCommon Power Management Unit driver
+ *
+ * Copyright 2009, Michael Buesch <mb@bu3sch.de>
+ * Copyright 2007, Broadcom Corporation
+ *
+ * Licensed under the GNU/GPL. See COPYING for details.
+ */
+
+#include <linux/ssb/ssb.h>
+#include <linux/ssb/ssb_regs.h>
+#include <linux/ssb/ssb_driver_chipcommon.h>
+#include <linux/delay.h>
+
+#include "ssb_private.h"
+
+static u32 ssb_chipco_pll_read(struct ssb_chipcommon *cc, u32 offset)
+{
+	chipco_write32(cc, SSB_CHIPCO_PLLCTL_ADDR, offset);
+	return chipco_read32(cc, SSB_CHIPCO_PLLCTL_DATA);
+}
+
+static void ssb_chipco_pll_write(struct ssb_chipcommon *cc,
+				 u32 offset, u32 value)
+{
+	chipco_write32(cc, SSB_CHIPCO_PLLCTL_ADDR, offset);
+	chipco_write32(cc, SSB_CHIPCO_PLLCTL_DATA, value);
+}
+
+struct pmu0_plltab_entry {
+	u16 freq;	/* Crystal frequency in kHz.*/
+	u8 xf;		/* Crystal frequency value for PMU control */
+	u8 wb_int;
+	u32 wb_frac;
+};
+
+static const struct pmu0_plltab_entry pmu0_plltab[] = {
+	{ .freq = 12000, .xf =  1, .wb_int = 73, .wb_frac = 349525, },
+	{ .freq = 13000, .xf =  2, .wb_int = 67, .wb_frac = 725937, },
+	{ .freq = 14400, .xf =  3, .wb_int = 61, .wb_frac = 116508, },
+	{ .freq = 15360, .xf =  4, .wb_int = 57, .wb_frac = 305834, },
+	{ .freq = 16200, .xf =  5, .wb_int = 54, .wb_frac = 336579, },
+	{ .freq = 16800, .xf =  6, .wb_int = 52, .wb_frac = 399457, },
+	{ .freq = 19200, .xf =  7, .wb_int = 45, .wb_frac = 873813, },
+	{ .freq = 19800, .xf =  8, .wb_int = 44, .wb_frac = 466033, },
+	{ .freq = 20000, .xf =  9, .wb_int = 44, .wb_frac = 0,      },
+	{ .freq = 25000, .xf = 10, .wb_int = 70, .wb_frac = 419430, },
+	{ .freq = 26000, .xf = 11, .wb_int = 67, .wb_frac = 725937, },
+	{ .freq = 30000, .xf = 12, .wb_int = 58, .wb_frac = 699050, },
+	{ .freq = 38400, .xf = 13, .wb_int = 45, .wb_frac = 873813, },
+	{ .freq = 40000, .xf = 14, .wb_int = 45, .wb_frac = 0,      },
+};
+#define SSB_PMU0_DEFAULT_XTALFREQ	20000
+
+static const struct pmu0_plltab_entry * pmu0_plltab_find_entry(u32 crystalfreq)
+{
+	const struct pmu0_plltab_entry *e;
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(pmu0_plltab); i++) {
+		e = &pmu0_plltab[i];
+		if (e->freq == crystalfreq)
+			return e;
+	}
+
+	return NULL;
+}
+
+/* Tune the PLL to the crystal speed. crystalfreq is in kHz. */
+static void ssb_pmu0_pllinit_r0(struct ssb_chipcommon *cc,
+				u32 crystalfreq)
+{
+	struct ssb_bus *bus = cc->dev->bus;
+	const struct pmu0_plltab_entry *e = NULL;
+	u32 pmuctl, tmp, pllctl;
+	unsigned int i;
+
+	if ((bus->chip_id == 0x5354) && !crystalfreq) {
+		/* The 5354 crystal freq is 25MHz */
+		crystalfreq = 25000;
+	}
+	if (crystalfreq)
+		e = pmu0_plltab_find_entry(crystalfreq);
+	if (!e)
+		e = pmu0_plltab_find_entry(SSB_PMU0_DEFAULT_XTALFREQ);
+	BUG_ON(!e);
+	crystalfreq = e->freq;
+	cc->pmu.crystalfreq = e->freq;
+
+	/* Check if the PLL already is programmed to this frequency. */
+	pmuctl = chipco_read32(cc, SSB_CHIPCO_PMU_CTL);
+	if (((pmuctl & SSB_CHIPCO_PMU_CTL_XTALFREQ) >> SSB_CHIPCO_PMU_CTL_XTALFREQ_SHIFT) == e->xf) {
+		/* We're already there... */
+		return;
+	}
+
+	ssb_printk(KERN_INFO PFX "Programming PLL to %u.%03u MHz\n",
+		   (crystalfreq / 1000), (crystalfreq % 1000));
+
+	/* First turn the PLL off. */
+	switch (bus->chip_id) {
+	case 0x4328:
+		chipco_mask32(cc, SSB_CHIPCO_PMU_MINRES_MSK,
+			      ~(1 << SSB_PMURES_4328_BB_PLL_PU));
+		chipco_mask32(cc, SSB_CHIPCO_PMU_MAXRES_MSK,
+			      ~(1 << SSB_PMURES_4328_BB_PLL_PU));
+		break;
+	case 0x5354:
+		chipco_mask32(cc, SSB_CHIPCO_PMU_MINRES_MSK,
+			      ~(1 << SSB_PMURES_5354_BB_PLL_PU));
+		chipco_mask32(cc, SSB_CHIPCO_PMU_MAXRES_MSK,
+			      ~(1 << SSB_PMURES_5354_BB_PLL_PU));
+		break;
+	default:
+		SSB_WARN_ON(1);
+	}
+	for (i = 1500; i; i--) {
+		tmp = chipco_read32(cc, SSB_CHIPCO_CLKCTLST);
+		if (!(tmp & SSB_CHIPCO_CLKCTLST_HAVEHT))
+			break;
+		udelay(10);
+	}
+	tmp = chipco_read32(cc, SSB_CHIPCO_CLKCTLST);
+	if (tmp & SSB_CHIPCO_CLKCTLST_HAVEHT)
+		ssb_printk(KERN_EMERG PFX "Failed to turn the PLL off!\n");
+
+	/* Set PDIV in PLL control 0. */
+	pllctl = ssb_chipco_pll_read(cc, SSB_PMU0_PLLCTL0);
+	if (crystalfreq >= SSB_PMU0_PLLCTL0_PDIV_FREQ)
+		pllctl |= SSB_PMU0_PLLCTL0_PDIV_MSK;
+	else
+		pllctl &= ~SSB_PMU0_PLLCTL0_PDIV_MSK;
+	ssb_chipco_pll_write(cc, SSB_PMU0_PLLCTL0, pllctl);
+
+	/* Set WILD in PLL control 1. */
+	pllctl = ssb_chipco_pll_read(cc, SSB_PMU0_PLLCTL1);
+	pllctl &= ~SSB_PMU0_PLLCTL1_STOPMOD;
+	pllctl &= ~(SSB_PMU0_PLLCTL1_WILD_IMSK | SSB_PMU0_PLLCTL1_WILD_FMSK);
+	pllctl |= ((u32)e->wb_int << SSB_PMU0_PLLCTL1_WILD_IMSK_SHIFT) & SSB_PMU0_PLLCTL1_WILD_IMSK;
+	pllctl |= ((u32)e->wb_frac << SSB_PMU0_PLLCTL1_WILD_FMSK_SHIFT) & SSB_PMU0_PLLCTL1_WILD_FMSK;
+	if (e->wb_frac == 0)
+		pllctl |= SSB_PMU0_PLLCTL1_STOPMOD;
+	ssb_chipco_pll_write(cc, SSB_PMU0_PLLCTL1, pllctl);
+
+	/* Set WILD in PLL control 2. */
+	pllctl = ssb_chipco_pll_read(cc, SSB_PMU0_PLLCTL2);
+	pllctl &= ~SSB_PMU0_PLLCTL2_WILD_IMSKHI;
+	pllctl |= (((u32)e->wb_int >> 4) << SSB_PMU0_PLLCTL2_WILD_IMSKHI_SHIFT) & SSB_PMU0_PLLCTL2_WILD_IMSKHI;
+	ssb_chipco_pll_write(cc, SSB_PMU0_PLLCTL2, pllctl);
+
+	/* Set the crystalfrequency and the divisor. */
+	pmuctl = chipco_read32(cc, SSB_CHIPCO_PMU_CTL);
+	pmuctl &= ~SSB_CHIPCO_PMU_CTL_ILP_DIV;
+	pmuctl |= (((crystalfreq + 127) / 128 - 1) << SSB_CHIPCO_PMU_CTL_ILP_DIV_SHIFT)
+			& SSB_CHIPCO_PMU_CTL_ILP_DIV;
+	pmuctl &= ~SSB_CHIPCO_PMU_CTL_XTALFREQ;
+	pmuctl |= ((u32)e->xf << SSB_CHIPCO_PMU_CTL_XTALFREQ_SHIFT) & SSB_CHIPCO_PMU_CTL_XTALFREQ;
+	chipco_write32(cc, SSB_CHIPCO_PMU_CTL, pmuctl);
+}
+
+struct pmu1_plltab_entry {
+	u16 freq;	/* Crystal frequency in kHz.*/
+	u8 xf;		/* Crystal frequency value for PMU control */
+	u8 ndiv_int;
+	u32 ndiv_frac;
+	u8 p1div;
+	u8 p2div;
+};
+
+static const struct pmu1_plltab_entry pmu1_plltab[] = {
+	{ .freq = 12000, .xf =  1, .p1div = 3, .p2div = 22, .ndiv_int =  0x9, .ndiv_frac = 0xFFFFEF, },
+	{ .freq = 13000, .xf =  2, .p1div = 1, .p2div =  6, .ndiv_int =  0xb, .ndiv_frac = 0x483483, },
+	{ .freq = 14400, .xf =  3, .p1div = 1, .p2div = 10, .ndiv_int =  0xa, .ndiv_frac = 0x1C71C7, },
+	{ .freq = 15360, .xf =  4, .p1div = 1, .p2div =  5, .ndiv_int =  0xb, .ndiv_frac = 0x755555, },
+	{ .freq = 16200, .xf =  5, .p1div = 1, .p2div = 10, .ndiv_int =  0x5, .ndiv_frac = 0x6E9E06, },
+	{ .freq = 16800, .xf =  6, .p1div = 1, .p2div = 10, .ndiv_int =  0x5, .ndiv_frac = 0x3CF3CF, },
+	{ .freq = 19200, .xf =  7, .p1div = 1, .p2div =  9, .ndiv_int =  0x5, .ndiv_frac = 0x17B425, },
+	{ .freq = 19800, .xf =  8, .p1div = 1, .p2div = 11, .ndiv_int =  0x4, .ndiv_frac = 0xA57EB,  },
+	{ .freq = 20000, .xf =  9, .p1div = 1, .p2div = 11, .ndiv_int =  0x4, .ndiv_frac = 0,        },
+	{ .freq = 24000, .xf = 10, .p1div = 3, .p2div = 11, .ndiv_int =  0xa, .ndiv_frac = 0,        },
+	{ .freq = 25000, .xf = 11, .p1div = 5, .p2div = 16, .ndiv_int =  0xb, .ndiv_frac = 0,        },
+	{ .freq = 26000, .xf = 12, .p1div = 1, .p2div =  2, .ndiv_int = 0x10, .ndiv_frac = 0xEC4EC4, },
+	{ .freq = 30000, .xf = 13, .p1div = 3, .p2div =  8, .ndiv_int =  0xb, .ndiv_frac = 0,        },
+	{ .freq = 38400, .xf = 14, .p1div = 1, .p2div =  5, .ndiv_int =  0x4, .ndiv_frac = 0x955555, },
+	{ .freq = 40000, .xf = 15, .p1div = 1, .p2div =  2, .ndiv_int =  0xb, .ndiv_frac = 0,        },
+};
+
+#define SSB_PMU1_DEFAULT_XTALFREQ	15360
+
+static const struct pmu1_plltab_entry * pmu1_plltab_find_entry(u32 crystalfreq)
+{
+	const struct pmu1_plltab_entry *e;
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(pmu1_plltab); i++) {
+		e = &pmu1_plltab[i];
+		if (e->freq == crystalfreq)
+			return e;
+	}
+
+	return NULL;
+}
+
+/* Tune the PLL to the crystal speed. crystalfreq is in kHz. */
+static void ssb_pmu1_pllinit_r0(struct ssb_chipcommon *cc,
+				u32 crystalfreq)
+{
+	struct ssb_bus *bus = cc->dev->bus;
+	const struct pmu1_plltab_entry *e = NULL;
+	u32 buffer_strength = 0;
+	u32 tmp, pllctl, pmuctl;
+	unsigned int i;
+
+	if (bus->chip_id == 0x4312) {
+		/* We do not touch the BCM4312 PLL and assume
+		 * the default crystal settings work out-of-the-box. */
+		cc->pmu.crystalfreq = 20000;
+		return;
+	}
+
+	if (crystalfreq)
+		e = pmu1_plltab_find_entry(crystalfreq);
+	if (!e)
+		e = pmu1_plltab_find_entry(SSB_PMU1_DEFAULT_XTALFREQ);
+	BUG_ON(!e);
+	crystalfreq = e->freq;
+	cc->pmu.crystalfreq = e->freq;
+
+	/* Check if the PLL already is programmed to this frequency. */
+	pmuctl = chipco_read32(cc, SSB_CHIPCO_PMU_CTL);
+	if (((pmuctl & SSB_CHIPCO_PMU_CTL_XTALFREQ) >> SSB_CHIPCO_PMU_CTL_XTALFREQ_SHIFT) == e->xf) {
+		/* We're already there... */
+		return;
+	}
+
+	ssb_printk(KERN_INFO PFX "Programming PLL to %u.%03u MHz\n",
+		   (crystalfreq / 1000), (crystalfreq % 1000));
+
+	/* First turn the PLL off. */
+	switch (bus->chip_id) {
+	case 0x4325:
+		chipco_mask32(cc, SSB_CHIPCO_PMU_MINRES_MSK,
+			      ~((1 << SSB_PMURES_4325_BBPLL_PWRSW_PU) |
+				(1 << SSB_PMURES_4325_HT_AVAIL)));
+		chipco_mask32(cc, SSB_CHIPCO_PMU_MAXRES_MSK,
+			      ~((1 << SSB_PMURES_4325_BBPLL_PWRSW_PU) |
+				(1 << SSB_PMURES_4325_HT_AVAIL)));
+		/* Adjust the BBPLL to 2 on all channels later. */
+		buffer_strength = 0x222222;
+		break;
+	default:
+		SSB_WARN_ON(1);
+	}
+	for (i = 1500; i; i--) {
+		tmp = chipco_read32(cc, SSB_CHIPCO_CLKCTLST);
+		if (!(tmp & SSB_CHIPCO_CLKCTLST_HAVEHT))
+			break;
+		udelay(10);
+	}
+	tmp = chipco_read32(cc, SSB_CHIPCO_CLKCTLST);
+	if (tmp & SSB_CHIPCO_CLKCTLST_HAVEHT)
+		ssb_printk(KERN_EMERG PFX "Failed to turn the PLL off!\n");
+
+	/* Set p1div and p2div. */
+	pllctl = ssb_chipco_pll_read(cc, SSB_PMU1_PLLCTL0);
+	pllctl &= ~(SSB_PMU1_PLLCTL0_P1DIV | SSB_PMU1_PLLCTL0_P2DIV);
+	pllctl |= ((u32)e->p1div << SSB_PMU1_PLLCTL0_P1DIV_SHIFT) & SSB_PMU1_PLLCTL0_P1DIV;
+	pllctl |= ((u32)e->p2div << SSB_PMU1_PLLCTL0_P2DIV_SHIFT) & SSB_PMU1_PLLCTL0_P2DIV;
+	ssb_chipco_pll_write(cc, SSB_PMU1_PLLCTL0, pllctl);
+
+	/* Set ndiv int and ndiv mode */
+	pllctl = ssb_chipco_pll_read(cc, SSB_PMU1_PLLCTL2);
+	pllctl &= ~(SSB_PMU1_PLLCTL2_NDIVINT | SSB_PMU1_PLLCTL2_NDIVMODE);
+	pllctl |= ((u32)e->ndiv_int << SSB_PMU1_PLLCTL2_NDIVINT_SHIFT) & SSB_PMU1_PLLCTL2_NDIVINT;
+	pllctl |= (1 << SSB_PMU1_PLLCTL2_NDIVMODE_SHIFT) & SSB_PMU1_PLLCTL2_NDIVMODE;
+	ssb_chipco_pll_write(cc, SSB_PMU1_PLLCTL2, pllctl);
+
+	/* Set ndiv frac */
+	pllctl = ssb_chipco_pll_read(cc, SSB_PMU1_PLLCTL3);
+	pllctl &= ~SSB_PMU1_PLLCTL3_NDIVFRAC;
+	pllctl |= ((u32)e->ndiv_frac << SSB_PMU1_PLLCTL3_NDIVFRAC_SHIFT) & SSB_PMU1_PLLCTL3_NDIVFRAC;
+	ssb_chipco_pll_write(cc, SSB_PMU1_PLLCTL3, pllctl);
+
+	/* Change the drive strength, if required. */
+	if (buffer_strength) {
+		pllctl = ssb_chipco_pll_read(cc, SSB_PMU1_PLLCTL5);
+		pllctl &= ~SSB_PMU1_PLLCTL5_CLKDRV;
+		pllctl |= (buffer_strength << SSB_PMU1_PLLCTL5_CLKDRV_SHIFT) & SSB_PMU1_PLLCTL5_CLKDRV;
+		ssb_chipco_pll_write(cc, SSB_PMU1_PLLCTL5, pllctl);
+	}
+
+	/* Tune the crystalfreq and the divisor. */
+	pmuctl = chipco_read32(cc, SSB_CHIPCO_PMU_CTL);
+	pmuctl &= ~(SSB_CHIPCO_PMU_CTL_ILP_DIV | SSB_CHIPCO_PMU_CTL_XTALFREQ);
+	pmuctl |= ((((u32)e->freq + 127) / 128 - 1) << SSB_CHIPCO_PMU_CTL_ILP_DIV_SHIFT)
+			& SSB_CHIPCO_PMU_CTL_ILP_DIV;
+	pmuctl |= ((u32)e->xf << SSB_CHIPCO_PMU_CTL_XTALFREQ_SHIFT) & SSB_CHIPCO_PMU_CTL_XTALFREQ;
+	chipco_write32(cc, SSB_CHIPCO_PMU_CTL, pmuctl);
+}
+
+static void ssb_pmu_pll_init(struct ssb_chipcommon *cc)
+{
+	struct ssb_bus *bus = cc->dev->bus;
+	u32 crystalfreq = 0; /* in kHz. 0 = keep default freq. */
+
+	if (bus->bustype == SSB_BUSTYPE_SSB) {
+		/* TODO: The user may override the crystal frequency. */
+	}
+
+	switch (bus->chip_id) {
+	case 0x4312:
+	case 0x4325:
+		ssb_pmu1_pllinit_r0(cc, crystalfreq);
+		break;
+	case 0x4328:
+	case 0x5354:
+		ssb_pmu0_pllinit_r0(cc, crystalfreq);
+		break;
+	default:
+		ssb_printk(KERN_ERR PFX
+			   "ERROR: PLL init unknown for device %04X\n",
+			   bus->chip_id);
+	}
+}
+
+struct pmu_res_updown_tab_entry {
+	u8 resource;	/* The resource number */
+	u16 updown;	/* The updown value */
+};
+
+enum pmu_res_depend_tab_task {
+	PMU_RES_DEP_SET = 1,
+	PMU_RES_DEP_ADD,
+	PMU_RES_DEP_REMOVE,
+};
+
+struct pmu_res_depend_tab_entry {
+	u8 resource;	/* The resource number */
+	u8 task;	/* SET | ADD | REMOVE */
+	u32 depend;	/* The depend mask */
+};
+
+static const struct pmu_res_updown_tab_entry pmu_res_updown_tab_4328a0[] = {
+	{ .resource = SSB_PMURES_4328_EXT_SWITCHER_PWM,		.updown = 0x0101, },
+	{ .resource = SSB_PMURES_4328_BB_SWITCHER_PWM,		.updown = 0x1F01, },
+	{ .resource = SSB_PMURES_4328_BB_SWITCHER_BURST,	.updown = 0x010F, },
+	{ .resource = SSB_PMURES_4328_BB_EXT_SWITCHER_BURST,	.updown = 0x0101, },
+	{ .resource = SSB_PMURES_4328_ILP_REQUEST,		.updown = 0x0202, },
+	{ .resource = SSB_PMURES_4328_RADIO_SWITCHER_PWM,	.updown = 0x0F01, },
+	{ .resource = SSB_PMURES_4328_RADIO_SWITCHER_BURST,	.updown = 0x0F01, },
+	{ .resource = SSB_PMURES_4328_ROM_SWITCH,		.updown = 0x0101, },
+	{ .resource = SSB_PMURES_4328_PA_REF_LDO,		.updown = 0x0F01, },
+	{ .resource = SSB_PMURES_4328_RADIO_LDO,		.updown = 0x0F01, },
+	{ .resource = SSB_PMURES_4328_AFE_LDO,			.updown = 0x0F01, },
+	{ .resource = SSB_PMURES_4328_PLL_LDO,			.updown = 0x0F01, },
+	{ .resource = SSB_PMURES_4328_BG_FILTBYP,		.updown = 0x0101, },
+	{ .resource = SSB_PMURES_4328_TX_FILTBYP,		.updown = 0x0101, },
+	{ .resource = SSB_PMURES_4328_RX_FILTBYP,		.updown = 0x0101, },
+	{ .resource = SSB_PMURES_4328_XTAL_PU,			.updown = 0x0101, },
+	{ .resource = SSB_PMURES_4328_XTAL_EN,			.updown = 0xA001, },
+	{ .resource = SSB_PMURES_4328_BB_PLL_FILTBYP,		.updown = 0x0101, },
+	{ .resource = SSB_PMURES_4328_RF_PLL_FILTBYP,		.updown = 0x0101, },
+	{ .resource = SSB_PMURES_4328_BB_PLL_PU,		.updown = 0x0701, },
+};
+
+static const struct pmu_res_depend_tab_entry pmu_res_depend_tab_4328a0[] = {
+	{
+		/* Adjust ILP Request to avoid forcing EXT/BB into burst mode. */
+		.resource = SSB_PMURES_4328_ILP_REQUEST,
+		.task = PMU_RES_DEP_SET,
+		.depend = ((1 << SSB_PMURES_4328_EXT_SWITCHER_PWM) |
+			   (1 << SSB_PMURES_4328_BB_SWITCHER_PWM)),
+	},
+};
+
+static const struct pmu_res_updown_tab_entry pmu_res_updown_tab_4325a0[] = {
+	{ .resource = SSB_PMURES_4325_XTAL_PU,			.updown = 0x1501, },
+};
+
+static const struct pmu_res_depend_tab_entry pmu_res_depend_tab_4325a0[] = {
+	{
+		/* Adjust HT-Available dependencies. */
+		.resource = SSB_PMURES_4325_HT_AVAIL,
+		.task = PMU_RES_DEP_ADD,
+		.depend = ((1 << SSB_PMURES_4325_RX_PWRSW_PU) |
+			   (1 << SSB_PMURES_4325_TX_PWRSW_PU) |
+			   (1 << SSB_PMURES_4325_LOGEN_PWRSW_PU) |
+			   (1 << SSB_PMURES_4325_AFE_PWRSW_PU)),
+	},
+};
+
+static void ssb_pmu_resources_init(struct ssb_chipcommon *cc)
+{
+	struct ssb_bus *bus = cc->dev->bus;
+	u32 min_msk = 0, max_msk = 0;
+	unsigned int i;
+	const struct pmu_res_updown_tab_entry *updown_tab = NULL;
+	unsigned int updown_tab_size;
+	const struct pmu_res_depend_tab_entry *depend_tab = NULL;
+	unsigned int depend_tab_size;
+
+	switch (bus->chip_id) {
+	case 0x4312:
+		/* We keep the default settings:
+		 * min_msk = 0xCBB
+		 * max_msk = 0x7FFFF
+		 */
+		break;
+	case 0x4325:
+		/* Power OTP down later. */
+		min_msk = (1 << SSB_PMURES_4325_CBUCK_BURST) |
+			  (1 << SSB_PMURES_4325_LNLDO2_PU);
+		if (chipco_read32(cc, SSB_CHIPCO_CHIPSTAT) &
+		    SSB_CHIPCO_CHST_4325_PMUTOP_2B)
+			min_msk |= (1 << SSB_PMURES_4325_CLDO_CBUCK_BURST);
+		/* The PLL may turn on, if it decides so. */
+		max_msk = 0xFFFFF;
+		updown_tab = pmu_res_updown_tab_4325a0;
+		updown_tab_size = ARRAY_SIZE(pmu_res_updown_tab_4325a0);
+		depend_tab = pmu_res_depend_tab_4325a0;
+		depend_tab_size = ARRAY_SIZE(pmu_res_depend_tab_4325a0);
+		break;
+	case 0x4328:
+		min_msk = (1 << SSB_PMURES_4328_EXT_SWITCHER_PWM) |
+			  (1 << SSB_PMURES_4328_BB_SWITCHER_PWM) |
+			  (1 << SSB_PMURES_4328_XTAL_EN);
+		/* The PLL may turn on, if it decides so. */
+		max_msk = 0xFFFFF;
+		updown_tab = pmu_res_updown_tab_4328a0;
+		updown_tab_size = ARRAY_SIZE(pmu_res_updown_tab_4328a0);
+		depend_tab = pmu_res_depend_tab_4328a0;
+		depend_tab_size = ARRAY_SIZE(pmu_res_depend_tab_4328a0);
+		break;
+	case 0x5354:
+		/* The PLL may turn on, if it decides so. */
+		max_msk = 0xFFFFF;
+		break;
+	default:
+		ssb_printk(KERN_ERR PFX
+			   "ERROR: PMU resource config unknown for device %04X\n",
+			   bus->chip_id);
+	}
+
+	if (updown_tab) {
+		for (i = 0; i < updown_tab_size; i++) {
+			chipco_write32(cc, SSB_CHIPCO_PMU_RES_TABSEL,
+				       updown_tab[i].resource);
+			chipco_write32(cc, SSB_CHIPCO_PMU_RES_UPDNTM,
+				       updown_tab[i].updown);
+		}
+	}
+	if (depend_tab) {
+		for (i = 0; i < depend_tab_size; i++) {
+			chipco_write32(cc, SSB_CHIPCO_PMU_RES_TABSEL,
+				       depend_tab[i].resource);
+			switch (depend_tab[i].task) {
+			case PMU_RES_DEP_SET:
+				chipco_write32(cc, SSB_CHIPCO_PMU_RES_DEPMSK,
+					       depend_tab[i].depend);
+				break;
+			case PMU_RES_DEP_ADD:
+				chipco_set32(cc, SSB_CHIPCO_PMU_RES_DEPMSK,
+					     depend_tab[i].depend);
+				break;
+			case PMU_RES_DEP_REMOVE:
+				chipco_mask32(cc, SSB_CHIPCO_PMU_RES_DEPMSK,
+					      ~(depend_tab[i].depend));
+				break;
+			default:
+				SSB_WARN_ON(1);
+			}
+		}
+	}
+
+	/* Set the resource masks. */
+	if (min_msk)
+		chipco_write32(cc, SSB_CHIPCO_PMU_MINRES_MSK, min_msk);
+	if (max_msk)
+		chipco_write32(cc, SSB_CHIPCO_PMU_MAXRES_MSK, max_msk);
+}
+
+void ssb_pmu_init(struct ssb_chipcommon *cc)
+{
+	struct ssb_bus *bus = cc->dev->bus;
+	u32 pmucap;
+
+	if (!(cc->capabilities & SSB_CHIPCO_CAP_PMU))
+		return;
+
+	pmucap = chipco_read32(cc, SSB_CHIPCO_PMU_CAP);
+	cc->pmu.rev = (pmucap & SSB_CHIPCO_PMU_CAP_REVISION);
+
+	ssb_dprintk(KERN_DEBUG PFX "Found rev %u PMU (capabilities 0x%08X)\n",
+		    cc->pmu.rev, pmucap);
+
+	if (cc->pmu.rev >= 1) {
+		if ((bus->chip_id == 0x4325) && (bus->chip_rev < 2)) {
+			chipco_mask32(cc, SSB_CHIPCO_PMU_CTL,
+				      ~SSB_CHIPCO_PMU_CTL_NOILPONW);
+		} else {
+			chipco_set32(cc, SSB_CHIPCO_PMU_CTL,
+				     SSB_CHIPCO_PMU_CTL_NOILPONW);
+		}
+	}
+	ssb_pmu_pll_init(cc);
+	ssb_pmu_resources_init(cc);
+}
diff --git a/include/linux/ssb/ssb_driver_chipcommon.h b/include/linux/ssb/ssb_driver_chipcommon.h
index 7d7e03dcf77c..d3b1d18922f2 100644
--- a/include/linux/ssb/ssb_driver_chipcommon.h
+++ b/include/linux/ssb/ssb_driver_chipcommon.h
@@ -181,6 +181,16 @@
 #define SSB_CHIPCO_PROG_WAITCNT		0x0124
 #define SSB_CHIPCO_FLASH_CFG		0x0128
 #define SSB_CHIPCO_FLASH_WAITCNT	0x012C
+#define SSB_CHIPCO_CLKCTLST		0x01E0 /* Clock control and status (rev >= 20) */
+#define  SSB_CHIPCO_CLKCTLST_FORCEALP	0x00000001 /* Force ALP request */
+#define  SSB_CHIPCO_CLKCTLST_FORCEHT	0x00000002 /* Force HT request */
+#define  SSB_CHIPCO_CLKCTLST_FORCEILP	0x00000004 /* Force ILP request */
+#define  SSB_CHIPCO_CLKCTLST_HAVEALPREQ	0x00000008 /* ALP available request */
+#define  SSB_CHIPCO_CLKCTLST_HAVEHTREQ	0x00000010 /* HT available request */
+#define  SSB_CHIPCO_CLKCTLST_HWCROFF	0x00000020 /* Force HW clock request off */
+#define  SSB_CHIPCO_CLKCTLST_HAVEHT	0x00010000 /* HT available */
+#define  SSB_CHIPCO_CLKCTLST_HAVEALP	0x00020000 /* APL available */
+#define SSB_CHIPCO_HW_WORKAROUND	0x01E4 /* Hardware workaround (rev >= 20) */
 #define SSB_CHIPCO_UART0_DATA		0x0300
 #define SSB_CHIPCO_UART0_IMR		0x0304
 #define SSB_CHIPCO_UART0_FCR		0x0308
@@ -197,6 +207,196 @@
 #define SSB_CHIPCO_UART1_LSR		0x0414
 #define SSB_CHIPCO_UART1_MSR		0x0418
 #define SSB_CHIPCO_UART1_SCRATCH	0x041C
+/* PMU registers (rev >= 20) */
+#define SSB_CHIPCO_PMU_CTL			0x0600 /* PMU control */
+#define  SSB_CHIPCO_PMU_CTL_ILP_DIV		0xFFFF0000 /* ILP div mask */
+#define  SSB_CHIPCO_PMU_CTL_ILP_DIV_SHIFT	16
+#define  SSB_CHIPCO_PMU_CTL_NOILPONW		0x00000200 /* No ILP on wait */
+#define  SSB_CHIPCO_PMU_CTL_HTREQEN		0x00000100 /* HT req enable */
+#define  SSB_CHIPCO_PMU_CTL_ALPREQEN		0x00000080 /* ALP req enable */
+#define  SSB_CHIPCO_PMU_CTL_XTALFREQ		0x0000007C /* Crystal freq */
+#define  SSB_CHIPCO_PMU_CTL_XTALFREQ_SHIFT	2
+#define  SSB_CHIPCO_PMU_CTL_ILPDIVEN		0x00000002 /* ILP div enable */
+#define  SSB_CHIPCO_PMU_CTL_LPOSEL		0x00000001 /* LPO sel */
+#define SSB_CHIPCO_PMU_CAP			0x0604 /* PMU capabilities */
+#define  SSB_CHIPCO_PMU_CAP_REVISION		0x000000FF /* Revision mask */
+#define SSB_CHIPCO_PMU_STAT			0x0608 /* PMU status */
+#define  SSB_CHIPCO_PMU_STAT_INTPEND		0x00000040 /* Interrupt pending */
+#define  SSB_CHIPCO_PMU_STAT_SBCLKST		0x00000030 /* Backplane clock status? */
+#define  SSB_CHIPCO_PMU_STAT_HAVEALP		0x00000008 /* ALP available */
+#define  SSB_CHIPCO_PMU_STAT_HAVEHT		0x00000004 /* HT available */
+#define  SSB_CHIPCO_PMU_STAT_RESINIT		0x00000003 /* Res init */
+#define SSB_CHIPCO_PMU_RES_STAT			0x060C /* PMU res status */
+#define SSB_CHIPCO_PMU_RES_PEND			0x0610 /* PMU res pending */
+#define SSB_CHIPCO_PMU_TIMER			0x0614 /* PMU timer */
+#define SSB_CHIPCO_PMU_MINRES_MSK		0x0618 /* PMU min res mask */
+#define SSB_CHIPCO_PMU_MAXRES_MSK		0x061C /* PMU max res mask */
+#define SSB_CHIPCO_PMU_RES_TABSEL		0x0620 /* PMU res table sel */
+#define SSB_CHIPCO_PMU_RES_DEPMSK		0x0624 /* PMU res dep mask */
+#define SSB_CHIPCO_PMU_RES_UPDNTM		0x0628 /* PMU res updown timer */
+#define SSB_CHIPCO_PMU_RES_TIMER		0x062C /* PMU res timer */
+#define SSB_CHIPCO_PMU_CLKSTRETCH		0x0630 /* PMU clockstretch */
+#define SSB_CHIPCO_PMU_WATCHDOG			0x0634 /* PMU watchdog */
+#define SSB_CHIPCO_PMU_RES_REQTS		0x0640 /* PMU res req timer sel */
+#define SSB_CHIPCO_PMU_RES_REQT			0x0644 /* PMU res req timer */
+#define SSB_CHIPCO_PMU_RES_REQM			0x0648 /* PMU res req mask */
+#define SSB_CHIPCO_CHIPCTL_ADDR			0x0650
+#define SSB_CHIPCO_CHIPCTL_DATA			0x0654
+#define SSB_CHIPCO_REGCTL_ADDR			0x0658
+#define SSB_CHIPCO_REGCTL_DATA			0x065C
+#define SSB_CHIPCO_PLLCTL_ADDR			0x0660
+#define SSB_CHIPCO_PLLCTL_DATA			0x0664
+
+
+
+/** PMU PLL registers */
+
+/* PMU rev 0 PLL registers */
+#define SSB_PMU0_PLLCTL0			0
+#define  SSB_PMU0_PLLCTL0_PDIV_MSK		0x00000001
+#define  SSB_PMU0_PLLCTL0_PDIV_FREQ		25000 /* kHz */
+#define SSB_PMU0_PLLCTL1			1
+#define  SSB_PMU0_PLLCTL1_WILD_IMSK		0xF0000000 /* Wild int mask (low nibble) */
+#define  SSB_PMU0_PLLCTL1_WILD_IMSK_SHIFT	28
+#define  SSB_PMU0_PLLCTL1_WILD_FMSK		0x0FFFFF00 /* Wild frac mask */
+#define  SSB_PMU0_PLLCTL1_WILD_FMSK_SHIFT	8
+#define  SSB_PMU0_PLLCTL1_STOPMOD		0x00000040 /* Stop mod */
+#define SSB_PMU0_PLLCTL2			2
+#define  SSB_PMU0_PLLCTL2_WILD_IMSKHI		0x0000000F /* Wild int mask (high nibble) */
+#define  SSB_PMU0_PLLCTL2_WILD_IMSKHI_SHIFT	0
+
+/* PMU rev 1 PLL registers */
+#define SSB_PMU1_PLLCTL0			0
+#define  SSB_PMU1_PLLCTL0_P1DIV			0x00F00000 /* P1 div */
+#define  SSB_PMU1_PLLCTL0_P1DIV_SHIFT		20
+#define  SSB_PMU1_PLLCTL0_P2DIV			0x0F000000 /* P2 div */
+#define  SSB_PMU1_PLLCTL0_P2DIV_SHIFT		24
+#define SSB_PMU1_PLLCTL1			1
+#define  SSB_PMU1_PLLCTL1_M1DIV			0x000000FF /* M1 div */
+#define  SSB_PMU1_PLLCTL1_M1DIV_SHIFT		0
+#define  SSB_PMU1_PLLCTL1_M2DIV			0x0000FF00 /* M2 div */
+#define  SSB_PMU1_PLLCTL1_M2DIV_SHIFT		8
+#define  SSB_PMU1_PLLCTL1_M3DIV			0x00FF0000 /* M3 div */
+#define  SSB_PMU1_PLLCTL1_M3DIV_SHIFT		16
+#define  SSB_PMU1_PLLCTL1_M4DIV			0xFF000000 /* M4 div */
+#define  SSB_PMU1_PLLCTL1_M4DIV_SHIFT		24
+#define SSB_PMU1_PLLCTL2			2
+#define  SSB_PMU1_PLLCTL2_M5DIV			0x000000FF /* M5 div */
+#define  SSB_PMU1_PLLCTL2_M5DIV_SHIFT		0
+#define  SSB_PMU1_PLLCTL2_M6DIV			0x0000FF00 /* M6 div */
+#define  SSB_PMU1_PLLCTL2_M6DIV_SHIFT		8
+#define  SSB_PMU1_PLLCTL2_NDIVMODE		0x000E0000 /* NDIV mode */
+#define  SSB_PMU1_PLLCTL2_NDIVMODE_SHIFT	17
+#define  SSB_PMU1_PLLCTL2_NDIVINT		0x1FF00000 /* NDIV int */
+#define  SSB_PMU1_PLLCTL2_NDIVINT_SHIFT		20
+#define SSB_PMU1_PLLCTL3			3
+#define  SSB_PMU1_PLLCTL3_NDIVFRAC		0x00FFFFFF /* NDIV frac */
+#define  SSB_PMU1_PLLCTL3_NDIVFRAC_SHIFT	0
+#define SSB_PMU1_PLLCTL4			4
+#define SSB_PMU1_PLLCTL5			5
+#define  SSB_PMU1_PLLCTL5_CLKDRV		0xFFFFFF00 /* clk drv */
+#define  SSB_PMU1_PLLCTL5_CLKDRV_SHIFT		8
+
+/* BCM4312 PLL resource numbers. */
+#define SSB_PMURES_4312_SWITCHER_BURST		0
+#define SSB_PMURES_4312_SWITCHER_PWM    	1
+#define SSB_PMURES_4312_PA_REF_LDO		2
+#define SSB_PMURES_4312_CORE_LDO_BURST		3
+#define SSB_PMURES_4312_CORE_LDO_PWM		4
+#define SSB_PMURES_4312_RADIO_LDO		5
+#define SSB_PMURES_4312_ILP_REQUEST		6
+#define SSB_PMURES_4312_BG_FILTBYP		7
+#define SSB_PMURES_4312_TX_FILTBYP		8
+#define SSB_PMURES_4312_RX_FILTBYP		9
+#define SSB_PMURES_4312_XTAL_PU			10
+#define SSB_PMURES_4312_ALP_AVAIL		11
+#define SSB_PMURES_4312_BB_PLL_FILTBYP		12
+#define SSB_PMURES_4312_RF_PLL_FILTBYP		13
+#define SSB_PMURES_4312_HT_AVAIL		14
+
+/* BCM4325 PLL resource numbers. */
+#define SSB_PMURES_4325_BUCK_BOOST_BURST	0
+#define SSB_PMURES_4325_CBUCK_BURST		1
+#define SSB_PMURES_4325_CBUCK_PWM		2
+#define SSB_PMURES_4325_CLDO_CBUCK_BURST	3
+#define SSB_PMURES_4325_CLDO_CBUCK_PWM		4
+#define SSB_PMURES_4325_BUCK_BOOST_PWM		5
+#define SSB_PMURES_4325_ILP_REQUEST		6
+#define SSB_PMURES_4325_ABUCK_BURST		7
+#define SSB_PMURES_4325_ABUCK_PWM		8
+#define SSB_PMURES_4325_LNLDO1_PU		9
+#define SSB_PMURES_4325_LNLDO2_PU		10
+#define SSB_PMURES_4325_LNLDO3_PU		11
+#define SSB_PMURES_4325_LNLDO4_PU		12
+#define SSB_PMURES_4325_XTAL_PU			13
+#define SSB_PMURES_4325_ALP_AVAIL		14
+#define SSB_PMURES_4325_RX_PWRSW_PU		15
+#define SSB_PMURES_4325_TX_PWRSW_PU		16
+#define SSB_PMURES_4325_RFPLL_PWRSW_PU		17
+#define SSB_PMURES_4325_LOGEN_PWRSW_PU		18
+#define SSB_PMURES_4325_AFE_PWRSW_PU		19
+#define SSB_PMURES_4325_BBPLL_PWRSW_PU		20
+#define SSB_PMURES_4325_HT_AVAIL		21
+
+/* BCM4328 PLL resource numbers. */
+#define SSB_PMURES_4328_EXT_SWITCHER_PWM	0
+#define SSB_PMURES_4328_BB_SWITCHER_PWM		1
+#define SSB_PMURES_4328_BB_SWITCHER_BURST	2
+#define SSB_PMURES_4328_BB_EXT_SWITCHER_BURST	3
+#define SSB_PMURES_4328_ILP_REQUEST		4
+#define SSB_PMURES_4328_RADIO_SWITCHER_PWM	5
+#define SSB_PMURES_4328_RADIO_SWITCHER_BURST	6
+#define SSB_PMURES_4328_ROM_SWITCH		7
+#define SSB_PMURES_4328_PA_REF_LDO		8
+#define SSB_PMURES_4328_RADIO_LDO		9
+#define SSB_PMURES_4328_AFE_LDO			10
+#define SSB_PMURES_4328_PLL_LDO			11
+#define SSB_PMURES_4328_BG_FILTBYP		12
+#define SSB_PMURES_4328_TX_FILTBYP		13
+#define SSB_PMURES_4328_RX_FILTBYP		14
+#define SSB_PMURES_4328_XTAL_PU			15
+#define SSB_PMURES_4328_XTAL_EN			16
+#define SSB_PMURES_4328_BB_PLL_FILTBYP		17
+#define SSB_PMURES_4328_RF_PLL_FILTBYP		18
+#define SSB_PMURES_4328_BB_PLL_PU		19
+
+/* BCM5354 PLL resource numbers. */
+#define SSB_PMURES_5354_EXT_SWITCHER_PWM	0
+#define SSB_PMURES_5354_BB_SWITCHER_PWM		1
+#define SSB_PMURES_5354_BB_SWITCHER_BURST	2
+#define SSB_PMURES_5354_BB_EXT_SWITCHER_BURST	3
+#define SSB_PMURES_5354_ILP_REQUEST		4
+#define SSB_PMURES_5354_RADIO_SWITCHER_PWM	5
+#define SSB_PMURES_5354_RADIO_SWITCHER_BURST	6
+#define SSB_PMURES_5354_ROM_SWITCH		7
+#define SSB_PMURES_5354_PA_REF_LDO		8
+#define SSB_PMURES_5354_RADIO_LDO		9
+#define SSB_PMURES_5354_AFE_LDO			10
+#define SSB_PMURES_5354_PLL_LDO			11
+#define SSB_PMURES_5354_BG_FILTBYP		12
+#define SSB_PMURES_5354_TX_FILTBYP		13
+#define SSB_PMURES_5354_RX_FILTBYP		14
+#define SSB_PMURES_5354_XTAL_PU			15
+#define SSB_PMURES_5354_XTAL_EN			16
+#define SSB_PMURES_5354_BB_PLL_FILTBYP		17
+#define SSB_PMURES_5354_RF_PLL_FILTBYP		18
+#define SSB_PMURES_5354_BB_PLL_PU		19
+
+
+
+/** Chip specific Chip-Status register contents. */
+#define SSB_CHIPCO_CHST_4325_SPROM_OTP_SEL	0x00000003
+#define SSB_CHIPCO_CHST_4325_DEFCIS_SEL		0 /* OTP is powered up, use def. CIS, no SPROM */
+#define SSB_CHIPCO_CHST_4325_SPROM_SEL		1 /* OTP is powered up, SPROM is present */
+#define SSB_CHIPCO_CHST_4325_OTP_SEL		2 /* OTP is powered up, no SPROM */
+#define SSB_CHIPCO_CHST_4325_OTP_PWRDN		3 /* OTP is powered down, SPROM is present */
+#define SSB_CHIPCO_CHST_4325_SDIO_USB_MODE	0x00000004
+#define SSB_CHIPCO_CHST_4325_SDIO_USB_MODE_SHIFT  2
+#define SSB_CHIPCO_CHST_4325_RCAL_VALID		0x00000008
+#define SSB_CHIPCO_CHST_4325_RCAL_VALID_SHIFT	3
+#define SSB_CHIPCO_CHST_4325_RCAL_VALUE		0x000001F0
+#define SSB_CHIPCO_CHST_4325_RCAL_VALUE_SHIFT	4
+#define SSB_CHIPCO_CHST_4325_PMUTOP_2B 		0x00000200 /* 1 for 2b, 0 for to 2a */
 
 
@@ -353,11 +553,20 @@
 struct ssb_device;
 struct ssb_serial_port;
 
+/* Data for the PMU, if available.
+ * Check availability with ((struct ssb_chipcommon)->capabilities & SSB_CHIPCO_CAP_PMU)
+ */
+struct ssb_chipcommon_pmu {
+	u8 rev;			/* PMU revision */
+	u32 crystalfreq;	/* The active crystal frequency (in kHz) */
+};
+
 struct ssb_chipcommon {
 	struct ssb_device *dev;
 	u32 capabilities;
 	/* Fast Powerup Delay constant */
 	u16 fast_pwrup_delay;
+	struct ssb_chipcommon_pmu pmu;
 };
 
 static inline bool ssb_chipco_available(struct ssb_chipcommon *cc)
@@ -365,6 +574,17 @@ static inline bool ssb_chipco_available(struct ssb_chipcommon *cc)
 	return (cc->dev != NULL);
 }
 
+/* Register access */
+#define chipco_read32(cc, offset)	ssb_read32((cc)->dev, offset)
+#define chipco_write32(cc, offset, val)	ssb_write32((cc)->dev, offset, val)
+
+#define chipco_mask32(cc, offset, mask) \
+		chipco_write32(cc, offset, chipco_read32(cc, offset) & (mask))
+#define chipco_set32(cc, offset, set) \
+		chipco_write32(cc, offset, chipco_read32(cc, offset) | (set))
+#define chipco_maskset32(cc, offset, mask, set) \
+		chipco_write32(cc, offset, (chipco_read32(cc, offset) & (mask)) | (set))
+
 extern void ssb_chipcommon_init(struct ssb_chipcommon *cc);
 
 extern void ssb_chipco_suspend(struct ssb_chipcommon *cc);
@@ -406,4 +626,8 @@ extern int ssb_chipco_serial_init(struct ssb_chipcommon *cc,
 				  struct ssb_serial_port *ports);
 #endif /* CONFIG_SSB_SERIAL */
 
+/* PMU support */
+extern void ssb_pmu_init(struct ssb_chipcommon *cc);
+
+
 #endif /* LINUX_SSB_CHIPCO_H_ */
-- 
cgit v1.2.3


From d54e6d872767ae6512978f86a35d623a8ed948c5 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 9 Feb 2009 23:45:29 -0800
Subject: net: Kill skbuff macros from the stone ages.

This kills of HAVE_ALLOC_SKB and HAVE_ALIGNABLE_SKB.

Nothing in-tree uses them and nothing in-tree has used them
since 2.0.x times.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 5eba4007e07f..924700844580 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -29,9 +29,6 @@
 #include <linux/dmaengine.h>
 #include <linux/hrtimer.h>
 
-#define HAVE_ALLOC_SKB		/* For the drivers to know */
-#define HAVE_ALIGNABLE_SKB	/* Ditto 8)		   */
-
 /* Don't change this without changing skb_csum_unnecessary! */
 #define CHECKSUM_NONE 0
 #define CHECKSUM_UNNECESSARY 1
-- 
cgit v1.2.3


From ad0b0fd554dfc126b5750d14908dccc3bbf602be Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@infradead.org>
Date: Tue, 10 Feb 2009 11:42:26 -0800
Subject: sched, latencytop: incorporate review feedback from Andrew Morton

Andrew had some suggestions for the latencytop file; this patch takes care
of most of these:

* Add documentation
* Turn account_scheduler_latency into an inline function
* Don't report negative values to userspace
* Make the file operations struct const
* Fix a few checkpatch.pl warnings

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/latencytop.h | 10 +++++-
 kernel/latencytop.c        | 83 +++++++++++++++++++++++++++++++++++++++-------
 2 files changed, 80 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/latencytop.h b/include/linux/latencytop.h
index 901c2d6377a8..b0e99898527c 100644
--- a/include/linux/latencytop.h
+++ b/include/linux/latencytop.h
@@ -9,6 +9,7 @@
 #ifndef _INCLUDE_GUARD_LATENCYTOP_H_
 #define _INCLUDE_GUARD_LATENCYTOP_H_
 
+#include <linux/compiler.h>
 #ifdef CONFIG_LATENCYTOP
 
 #define LT_SAVECOUNT		32
@@ -24,7 +25,14 @@ struct latency_record {
 
 struct task_struct;
 
-void account_scheduler_latency(struct task_struct *task, int usecs, int inter);
+extern int latencytop_enabled;
+void __account_scheduler_latency(struct task_struct *task, int usecs, int inter);
+static inline void
+account_scheduler_latency(struct task_struct *task, int usecs, int inter)
+{
+	if (unlikely(latencytop_enabled))
+		__account_scheduler_latency(task, usecs, inter);
+}
 
 void clear_all_latency_tracing(struct task_struct *p);
 
diff --git a/kernel/latencytop.c b/kernel/latencytop.c
index 449db466bdbc..ca07c5c0c914 100644
--- a/kernel/latencytop.c
+++ b/kernel/latencytop.c
@@ -9,6 +9,44 @@
  * as published by the Free Software Foundation; version 2
  * of the License.
  */
+
+/*
+ * CONFIG_LATENCYTOP enables a kernel latency tracking infrastructure that is
+ * used by the "latencytop" userspace tool. The latency that is tracked is not
+ * the 'traditional' interrupt latency (which is primarily caused by something
+ * else consuming CPU), but instead, it is the latency an application encounters
+ * because the kernel sleeps on its behalf for various reasons.
+ *
+ * This code tracks 2 levels of statistics:
+ * 1) System level latency
+ * 2) Per process latency
+ *
+ * The latency is stored in fixed sized data structures in an accumulated form;
+ * if the "same" latency cause is hit twice, this will be tracked as one entry
+ * in the data structure. Both the count, total accumulated latency and maximum
+ * latency are tracked in this data structure. When the fixed size structure is
+ * full, no new causes are tracked until the buffer is flushed by writing to
+ * the /proc file; the userspace tool does this on a regular basis.
+ *
+ * A latency cause is identified by a stringified backtrace at the point that
+ * the scheduler gets invoked. The userland tool will use this string to
+ * identify the cause of the latency in human readable form.
+ *
+ * The information is exported via /proc/latency_stats and /proc/<pid>/latency.
+ * These files look like this:
+ *
+ * Latency Top version : v0.1
+ * 70 59433 4897 i915_irq_wait drm_ioctl vfs_ioctl do_vfs_ioctl sys_ioctl
+ * |    |    |    |
+ * |    |    |    +----> the stringified backtrace
+ * |    |    +---------> The maximum latency for this entry in microseconds
+ * |    +--------------> The accumulated latency for this entry (microseconds)
+ * +-------------------> The number of times this entry is hit
+ *
+ * (note: the average latency is the accumulated latency divided by the number
+ * of times)
+ */
+
 #include <linux/latencytop.h>
 #include <linux/kallsyms.h>
 #include <linux/seq_file.h>
@@ -72,7 +110,7 @@ account_global_scheduler_latency(struct task_struct *tsk, struct latency_record
 				firstnonnull = i;
 			continue;
 		}
-		for (q = 0 ; q < LT_BACKTRACEDEPTH ; q++) {
+		for (q = 0; q < LT_BACKTRACEDEPTH; q++) {
 			unsigned long record = lat->backtrace[q];
 
 			if (latency_record[i].backtrace[q] != record) {
@@ -101,31 +139,52 @@ account_global_scheduler_latency(struct task_struct *tsk, struct latency_record
 	memcpy(&latency_record[i], lat, sizeof(struct latency_record));
 }
 
-static inline void store_stacktrace(struct task_struct *tsk, struct latency_record *lat)
+/*
+ * Iterator to store a backtrace into a latency record entry
+ */
+static inline void store_stacktrace(struct task_struct *tsk,
+					struct latency_record *lat)
 {
 	struct stack_trace trace;
 
 	memset(&trace, 0, sizeof(trace));
 	trace.max_entries = LT_BACKTRACEDEPTH;
 	trace.entries = &lat->backtrace[0];
-	trace.skip = 0;
 	save_stack_trace_tsk(tsk, &trace);
 }
 
+/**
+ * __account_scheduler_latency - record an occured latency
+ * @tsk - the task struct of the task hitting the latency
+ * @usecs - the duration of the latency in microseconds
+ * @inter - 1 if the sleep was interruptible, 0 if uninterruptible
+ *
+ * This function is the main entry point for recording latency entries
+ * as called by the scheduler.
+ *
+ * This function has a few special cases to deal with normal 'non-latency'
+ * sleeps: specifically, interruptible sleep longer than 5 msec is skipped
+ * since this usually is caused by waiting for events via select() and co.
+ *
+ * Negative latencies (caused by time going backwards) are also explicitly
+ * skipped.
+ */
 void __sched
-account_scheduler_latency(struct task_struct *tsk, int usecs, int inter)
+__account_scheduler_latency(struct task_struct *tsk, int usecs, int inter)
 {
 	unsigned long flags;
 	int i, q;
 	struct latency_record lat;
 
-	if (!latencytop_enabled)
-		return;
-
 	/* Long interruptible waits are generally user requested... */
 	if (inter && usecs > 5000)
 		return;
 
+	/* Negative sleeps are time going backwards */
+	/* Zero-time sleeps are non-interesting */
+	if (usecs <= 0)
+		return;
+
 	memset(&lat, 0, sizeof(lat));
 	lat.count = 1;
 	lat.time = usecs;
@@ -143,12 +202,12 @@ account_scheduler_latency(struct task_struct *tsk, int usecs, int inter)
 	if (tsk->latency_record_count >= LT_SAVECOUNT)
 		goto out_unlock;
 
-	for (i = 0; i < LT_SAVECOUNT ; i++) {
+	for (i = 0; i < LT_SAVECOUNT; i++) {
 		struct latency_record *mylat;
 		int same = 1;
 
 		mylat = &tsk->latency_record[i];
-		for (q = 0 ; q < LT_BACKTRACEDEPTH ; q++) {
+		for (q = 0; q < LT_BACKTRACEDEPTH; q++) {
 			unsigned long record = lat.backtrace[q];
 
 			if (mylat->backtrace[q] != record) {
@@ -186,7 +245,7 @@ static int lstats_show(struct seq_file *m, void *v)
 	for (i = 0; i < MAXLR; i++) {
 		if (latency_record[i].backtrace[0]) {
 			int q;
-			seq_printf(m, "%i %li %li ",
+			seq_printf(m, "%i %lu %lu ",
 				latency_record[i].count,
 				latency_record[i].time,
 				latency_record[i].max);
@@ -223,7 +282,7 @@ static int lstats_open(struct inode *inode, struct file *filp)
 	return single_open(filp, lstats_show, NULL);
 }
 
-static struct file_operations lstats_fops = {
+static const struct file_operations lstats_fops = {
 	.open		= lstats_open,
 	.read		= seq_read,
 	.write		= lstats_write,
@@ -236,4 +295,4 @@ static int __init init_lstats_procfs(void)
 	proc_create("latency_stats", 0644, NULL, &lstats_fops);
 	return 0;
 }
-__initcall(init_lstats_procfs);
+device_initcall(init_lstats_procfs);
-- 
cgit v1.2.3


From 2a5193119269062608582418deba7af82844159a Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Tue, 10 Feb 2009 21:25:55 +0100
Subject: cfg80211/nl80211: scanning (and mac80211 update to use it)

This patch adds basic scan capability to cfg80211/nl80211 and
changes mac80211 to use it. The BSS list that cfg80211 maintains
is made driver-accessible with a private area in each BSS struct,
but mac80211 doesn't yet use it. That's another large project.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/iwlwifi/iwl-agn.c      |  12 +-
 drivers/net/wireless/iwlwifi/iwl-core.c     |   1 +
 drivers/net/wireless/iwlwifi/iwl-scan.c     |   2 +-
 drivers/net/wireless/iwlwifi/iwl3945-base.c |  17 +-
 include/linux/nl80211.h                     |  65 +++
 include/net/cfg80211.h                      | 131 +++++
 include/net/mac80211.h                      |   6 +-
 include/net/wireless.h                      |   3 +
 net/mac80211/cfg.c                          |  20 +
 net/mac80211/ieee80211_i.h                  |  18 +-
 net/mac80211/iface.c                        |   2 +-
 net/mac80211/main.c                         |  32 +-
 net/mac80211/mlme.c                         |  37 +-
 net/mac80211/scan.c                         | 356 +++---------
 net/mac80211/wext.c                         |  59 +-
 net/wireless/Makefile                       |   2 +-
 net/wireless/core.c                         |   8 +
 net/wireless/core.h                         |  20 +
 net/wireless/nl80211.c                      | 323 +++++++++++
 net/wireless/nl80211.h                      |   8 +
 net/wireless/scan.c                         | 807 ++++++++++++++++++++++++++++
 21 files changed, 1546 insertions(+), 383 deletions(-)
 create mode 100644 net/wireless/scan.c

(limited to 'include/linux')

diff --git a/drivers/net/wireless/iwlwifi/iwl-agn.c b/drivers/net/wireless/iwlwifi/iwl-agn.c
index c196abc6db7a..539960da7e13 100644
--- a/drivers/net/wireless/iwlwifi/iwl-agn.c
+++ b/drivers/net/wireless/iwlwifi/iwl-agn.c
@@ -2678,11 +2678,19 @@ static void iwl_bss_info_changed(struct ieee80211_hw *hw,
 
 }
 
-static int iwl_mac_hw_scan(struct ieee80211_hw *hw, u8 *ssid, size_t ssid_len)
+static int iwl_mac_hw_scan(struct ieee80211_hw *hw,
+			   struct cfg80211_scan_request *req)
 {
 	unsigned long flags;
 	struct iwl_priv *priv = hw->priv;
 	int ret;
+	u8 *ssid = NULL;
+	size_t ssid_len = 0;
+
+	if (req->n_ssids) {
+		ssid = req->ssids[0].ssid;
+		ssid_len = req->ssids[0].ssid_len;
+	}
 
 	IWL_DEBUG_MAC80211(priv, "enter\n");
 
@@ -2718,7 +2726,7 @@ static int iwl_mac_hw_scan(struct ieee80211_hw *hw, u8 *ssid, size_t ssid_len)
 
 	if (ssid_len) {
 		priv->one_direct_scan = 1;
-		priv->direct_ssid_len =  min_t(u8, ssid_len, IW_ESSID_MAX_SIZE);
+		priv->direct_ssid_len = ssid_len;
 		memcpy(priv->direct_ssid, ssid, priv->direct_ssid_len);
 	} else {
 		priv->one_direct_scan = 0;
diff --git a/drivers/net/wireless/iwlwifi/iwl-core.c b/drivers/net/wireless/iwlwifi/iwl-core.c
index e18c3f326f71..260bf903cb71 100644
--- a/drivers/net/wireless/iwlwifi/iwl-core.c
+++ b/drivers/net/wireless/iwlwifi/iwl-core.c
@@ -1271,6 +1271,7 @@ int iwl_setup_mac(struct iwl_priv *priv)
 		BIT(NL80211_IFTYPE_ADHOC);
 
 	hw->wiphy->custom_regulatory = true;
+	hw->wiphy->max_scan_ssids = 1;
 
 	/* Default value; 4 EDCA QOS priorities */
 	hw->queues = 4;
diff --git a/drivers/net/wireless/iwlwifi/iwl-scan.c b/drivers/net/wireless/iwlwifi/iwl-scan.c
index 22bad3ce7d6a..1ec2b20eb37c 100644
--- a/drivers/net/wireless/iwlwifi/iwl-scan.c
+++ b/drivers/net/wireless/iwlwifi/iwl-scan.c
@@ -860,7 +860,7 @@ void iwl_bg_scan_completed(struct work_struct *work)
 	if (test_bit(STATUS_EXIT_PENDING, &priv->status))
 		return;
 
-	ieee80211_scan_completed(priv->hw);
+	ieee80211_scan_completed(priv->hw, false);
 
 	/* Since setting the TXPOWER may have been deferred while
 	 * performing the scan, fire one off */
diff --git a/drivers/net/wireless/iwlwifi/iwl3945-base.c b/drivers/net/wireless/iwlwifi/iwl3945-base.c
index 42cc2884971c..0cd8cb96a5ef 100644
--- a/drivers/net/wireless/iwlwifi/iwl3945-base.c
+++ b/drivers/net/wireless/iwlwifi/iwl3945-base.c
@@ -4442,15 +4442,23 @@ static void iwl3945_bss_info_changed(struct ieee80211_hw *hw,
 
 }
 
-static int iwl3945_mac_hw_scan(struct ieee80211_hw *hw, u8 *ssid, size_t len)
+static int iwl3945_mac_hw_scan(struct ieee80211_hw *hw,
+			       struct cfg80211_scan_request *req)
 {
 	int rc = 0;
 	unsigned long flags;
 	struct iwl_priv *priv = hw->priv;
+	size_t len = 0;
+	u8 *ssid = NULL;
 	DECLARE_SSID_BUF(ssid_buf);
 
 	IWL_DEBUG_MAC80211(priv, "enter\n");
 
+	if (req->n_ssids) {
+		ssid = req->ssids[0].ssid;
+		len = req->ssids[0].ssid_len;
+	}
+
 	mutex_lock(&priv->mutex);
 	spin_lock_irqsave(&priv->lock, flags);
 
@@ -4478,9 +4486,8 @@ static int iwl3945_mac_hw_scan(struct ieee80211_hw *hw, u8 *ssid, size_t len)
 			       print_ssid(ssid_buf, ssid, len), len);
 
 		priv->one_direct_scan = 1;
-		priv->direct_ssid_len = (u8)
-		    min((u8) len, (u8) IW_ESSID_MAX_SIZE);
-		memcpy(priv->direct_ssid, ssid, priv->direct_ssid_len);
+		priv->direct_ssid_len = len;
+		memcpy(priv->direct_ssid, ssid, len);
 	} else
 		priv->one_direct_scan = 0;
 
@@ -5412,6 +5419,8 @@ static int iwl3945_pci_probe(struct pci_dev *pdev, const struct pci_device_id *e
 
 	hw->wiphy->custom_regulatory = true;
 
+	hw->wiphy->max_scan_ssids = 1;
+
 	/* 4 EDCA QOS priorities */
 	hw->queues = 4;
 
diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 4bc27049f4e5..8802d1bda382 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -143,6 +143,13 @@
  *	added to all specified management frames generated by
  *	kernel/firmware/driver.
  *
+ * @NL80211_CMD_GET_SCAN: get scan results
+ * @NL80211_CMD_TRIGGER_SCAN: trigger a new scan with the given parameters
+ * @NL80211_CMD_NEW_SCAN_RESULTS: scan notification (as a reply to
+ *	NL80211_CMD_GET_SCAN and on the "scan" multicast group)
+ * @NL80211_CMD_SCAN_ABORTED: scan was aborted, for unspecified reasons,
+ *	partial scan results may be available
+ *
  * @NL80211_CMD_MAX: highest used command number
  * @__NL80211_CMD_AFTER_LAST: internal use
  */
@@ -192,6 +199,11 @@ enum nl80211_commands {
 
 	NL80211_CMD_GET_REG,
 
+	NL80211_CMD_GET_SCAN,
+	NL80211_CMD_TRIGGER_SCAN,
+	NL80211_CMD_NEW_SCAN_RESULTS,
+	NL80211_CMD_SCAN_ABORTED,
+
 	/* add new commands above here */
 
 	/* used to define NL80211_CMD_MAX below */
@@ -305,6 +317,18 @@ enum nl80211_commands {
  * @NL80211_ATTR_IE: Information element(s) data (used, e.g., with
  *	%NL80211_CMD_SET_MGMT_EXTRA_IE).
  *
+ * @NL80211_ATTR_MAX_NUM_SCAN_SSIDS: number of SSIDs you can scan with
+ *	a single scan request, a wiphy attribute.
+ *
+ * @NL80211_ATTR_SCAN_FREQUENCIES: nested attribute with frequencies (in MHz)
+ * @NL80211_ATTR_SCAN_SSIDS: nested attribute with SSIDs, leave out for passive
+ *	scanning and include a zero-length SSID (wildcard) for wildcard scan
+ * @NL80211_ATTR_SCAN_GENERATION: the scan generation increases whenever the
+ *	scan result list changes (BSS expired or added) so that applications
+ *	can verify that they got a single, consistent snapshot (when all dump
+ *	messages carried the same generation number)
+ * @NL80211_ATTR_BSS: scan result BSS
+ *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
  */
@@ -372,6 +396,13 @@ enum nl80211_attrs {
 	NL80211_ATTR_MGMT_SUBTYPE,
 	NL80211_ATTR_IE,
 
+	NL80211_ATTR_MAX_NUM_SCAN_SSIDS,
+
+	NL80211_ATTR_SCAN_FREQUENCIES,
+	NL80211_ATTR_SCAN_SSIDS,
+	NL80211_ATTR_SCAN_GENERATION,
+	NL80211_ATTR_BSS,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -841,4 +872,38 @@ enum nl80211_channel_type {
 	NL80211_CHAN_HT40MINUS,
 	NL80211_CHAN_HT40PLUS
 };
+
+/**
+ * enum nl80211_bss - netlink attributes for a BSS
+ *
+ * @__NL80211_BSS_INVALID: invalid
+ * @NL80211_BSS_FREQUENCY: frequency in MHz (u32)
+ * @NL80211_BSS_TSF: TSF of the received probe response/beacon (u64)
+ * @NL80211_BSS_BEACON_INTERVAL: beacon interval of the (I)BSS (u16)
+ * @NL80211_BSS_CAPABILITY: capability field (CPU order, u16)
+ * @NL80211_BSS_INFORMATION_ELEMENTS: binary attribute containing the
+ *	raw information elements from the probe response/beacon (bin)
+ * @NL80211_BSS_SIGNAL_MBM: signal strength of probe response/beacon
+ *	in mBm (100 * dBm) (s32)
+ * @NL80211_BSS_SIGNAL_UNSPEC: signal strength of the probe response/beacon
+ *	in unspecified units, scaled to 0..100 (u8)
+ * @__NL80211_BSS_AFTER_LAST: internal
+ * @NL80211_BSS_MAX: highest BSS attribute
+ */
+enum nl80211_bss {
+	__NL80211_BSS_INVALID,
+	NL80211_BSS_BSSID,
+	NL80211_BSS_FREQUENCY,
+	NL80211_BSS_TSF,
+	NL80211_BSS_BEACON_INTERVAL,
+	NL80211_BSS_CAPABILITY,
+	NL80211_BSS_INFORMATION_ELEMENTS,
+	NL80211_BSS_SIGNAL_MBM,
+	NL80211_BSS_SIGNAL_UNSPEC,
+
+	/* keep last */
+	__NL80211_BSS_AFTER_LAST,
+	NL80211_BSS_MAX = __NL80211_BSS_AFTER_LAST - 1
+};
+
 #endif /* __LINUX_NL80211_H */
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index dd1fd51638fc..09a0b268e5cf 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -4,6 +4,10 @@
 #include <linux/netlink.h>
 #include <linux/skbuff.h>
 #include <linux/nl80211.h>
+#include <linux/if_ether.h>
+#include <linux/ieee80211.h>
+#include <linux/wireless.h>
+#include <net/iw_handler.h>
 #include <net/genetlink.h>
 /* remove once we remove the wext stuff */
 #include <net/iw_handler.h>
@@ -504,6 +508,83 @@ struct wiphy;
 /* from net/ieee80211.h */
 struct ieee80211_channel;
 
+/**
+ * struct cfg80211_ssid - SSID description
+ * @ssid: the SSID
+ * @ssid_len: length of the ssid
+ */
+struct cfg80211_ssid {
+	u8 ssid[IEEE80211_MAX_SSID_LEN];
+	u8 ssid_len;
+};
+
+/**
+ * struct cfg80211_scan_request - scan request description
+ *
+ * @ssids: SSIDs to scan for (active scan only)
+ * @n_ssids: number of SSIDs
+ * @channels: channels to scan on.
+ * @n_channels: number of channels for each band
+ * @wiphy: the wiphy this was for
+ * @ifidx: the interface index
+ */
+struct cfg80211_scan_request {
+	struct cfg80211_ssid *ssids;
+	int n_ssids;
+	struct ieee80211_channel **channels;
+	u32 n_channels;
+
+	/* internal */
+	struct wiphy *wiphy;
+	int ifidx;
+};
+
+/**
+ * enum cfg80211_signal_type - signal type
+ *
+ * @CFG80211_SIGNAL_TYPE_NONE: no signal strength information available
+ * @CFG80211_SIGNAL_TYPE_MBM: signal strength in mBm (100*dBm)
+ * @CFG80211_SIGNAL_TYPE_UNSPEC: signal strength, increasing from 0 through 100
+ */
+enum cfg80211_signal_type {
+	CFG80211_SIGNAL_TYPE_NONE,
+	CFG80211_SIGNAL_TYPE_MBM,
+	CFG80211_SIGNAL_TYPE_UNSPEC,
+};
+
+/**
+ * struct cfg80211_bss - BSS description
+ *
+ * This structure describes a BSS (which may also be a mesh network)
+ * for use in scan results and similar.
+ *
+ * @bssid: BSSID of the BSS
+ * @tsf: timestamp of last received update
+ * @beacon_interval: the beacon interval as from the frame
+ * @capability: the capability field in host byte order
+ * @information_elements: the information elements (Note that there
+ *	is no guarantee that these are well-formed!)
+ * @len_information_elements: total length of the information elements
+ * @signal: signal strength value
+ * @signal_type: signal type
+ * @priv: private area for driver use, has at least wiphy->bss_priv_size bytes
+ */
+struct cfg80211_bss {
+	struct ieee80211_channel *channel;
+
+	u8 bssid[ETH_ALEN];
+	u64 tsf;
+	u16 beacon_interval;
+	u16 capability;
+	u8 *information_elements;
+	size_t len_information_elements;
+
+	s32 signal;
+	enum cfg80211_signal_type signal_type;
+
+	u8 priv[0] __attribute__((__aligned__(sizeof(void *))));
+};
+
 /**
  * struct cfg80211_ops - backend description for wireless configuration
  *
@@ -571,6 +652,11 @@ struct ieee80211_channel;
  * @set_channel: Set channel
  *
  * @set_mgmt_extra_ie: Set extra IE data for management frames
+ *
+ * @scan: Request to do a scan. If returning zero, the scan request is given
+ *	the driver, and will be valid until passed to cfg80211_scan_done().
+ *	For scan results, call cfg80211_inform_bss(); you can call this outside
+ *	the scan/scan_done bracket too.
  */
 struct cfg80211_ops {
 	int	(*suspend)(struct wiphy *wiphy);
@@ -648,6 +734,9 @@ struct cfg80211_ops {
 	int	(*set_mgmt_extra_ie)(struct wiphy *wiphy,
 				     struct net_device *dev,
 				     struct mgmt_extra_ie_params *params);
+
+	int	(*scan)(struct wiphy *wiphy, struct net_device *dev,
+			struct cfg80211_scan_request *request);
 };
 
 /* temporary wext handlers */
@@ -658,5 +747,47 @@ int cfg80211_wext_siwmode(struct net_device *dev, struct iw_request_info *info,
 			  u32 *mode, char *extra);
 int cfg80211_wext_giwmode(struct net_device *dev, struct iw_request_info *info,
 			  u32 *mode, char *extra);
+int cfg80211_wext_siwscan(struct net_device *dev,
+			  struct iw_request_info *info,
+			  union iwreq_data *wrqu, char *extra);
+int cfg80211_wext_giwscan(struct net_device *dev,
+			  struct iw_request_info *info,
+			  struct iw_point *data, char *extra);
+
+/**
+ * cfg80211_scan_done - notify that scan finished
+ *
+ * @request: the corresponding scan request
+ * @aborted: set to true if the scan was aborted for any reason,
+ *	userspace will be notified of that
+ */
+void cfg80211_scan_done(struct cfg80211_scan_request *request, bool aborted);
+
+/**
+ * cfg80211_inform_bss - inform cfg80211 of a new BSS
+ *
+ * @wiphy: the wiphy reporting the BSS
+ * @bss: the found BSS
+ * @gfp: context flags
+ *
+ * This informs cfg80211 that BSS information was found and
+ * the BSS should be updated/added.
+ */
+struct cfg80211_bss*
+cfg80211_inform_bss_frame(struct wiphy *wiphy,
+			  struct ieee80211_channel *channel,
+			  struct ieee80211_mgmt *mgmt, size_t len,
+			  s32 signal, enum cfg80211_signal_type sigtype,
+			  gfp_t gfp);
+
+struct cfg80211_bss *cfg80211_get_bss(struct wiphy *wiphy,
+				      struct ieee80211_channel *channel,
+				      const u8 *bssid,
+				      const u8 *ssid, size_t ssid_len);
+struct cfg80211_bss *cfg80211_get_mesh(struct wiphy *wiphy,
+				       struct ieee80211_channel *channel,
+				       const u8 *meshid, size_t meshidlen,
+				       const u8 *meshcfg);
+void cfg80211_put_bss(struct cfg80211_bss *bss);
 
 #endif /* __NET_CFG80211_H */
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 341f3e595ebd..88fa3e03e3e9 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1406,7 +1406,8 @@ struct ieee80211_ops {
 	void (*update_tkip_key)(struct ieee80211_hw *hw,
 			struct ieee80211_key_conf *conf, const u8 *address,
 			u32 iv32, u16 *phase1key);
-	int (*hw_scan)(struct ieee80211_hw *hw, u8 *ssid, size_t len);
+	int (*hw_scan)(struct ieee80211_hw *hw,
+		       struct cfg80211_scan_request *req);
 	int (*get_stats)(struct ieee80211_hw *hw,
 			 struct ieee80211_low_level_stats *stats);
 	void (*get_tkip_seq)(struct ieee80211_hw *hw, u8 hw_key_idx,
@@ -1844,8 +1845,9 @@ void ieee80211_wake_queues(struct ieee80211_hw *hw);
  * mac80211 that the scan finished.
  *
  * @hw: the hardware that finished the scan
+ * @aborted: set to true if scan was aborted
  */
-void ieee80211_scan_completed(struct ieee80211_hw *hw);
+void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted);
 
 /**
  * ieee80211_iterate_active_interfaces - iterate active interfaces
diff --git a/include/net/wireless.h b/include/net/wireless.h
index a42c1562d52b..1c6285eb1666 100644
--- a/include/net/wireless.h
+++ b/include/net/wireless.h
@@ -213,6 +213,9 @@ struct wiphy {
 	bool custom_regulatory;
 	bool strict_regulatory;
 
+	int bss_priv_size;
+	u8 max_scan_ssids;
+
 	/* If multiple wiphys are registered and you're handed e.g.
 	 * a regular netdev with assigned ieee80211_ptr, you won't
 	 * know whether it points to a wiphy your driver has registered
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 42d692fd9bec..c8d969be440b 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1277,6 +1277,25 @@ static int ieee80211_resume(struct wiphy *wiphy)
 #define ieee80211_resume NULL
 #endif
 
+static int ieee80211_scan(struct wiphy *wiphy,
+			  struct net_device *dev,
+			  struct cfg80211_scan_request *req)
+{
+	struct ieee80211_sub_if_data *sdata;
+
+	if (!netif_running(dev))
+		return -ENETDOWN;
+
+	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+
+	if (sdata->vif.type != NL80211_IFTYPE_STATION &&
+	    sdata->vif.type != NL80211_IFTYPE_ADHOC &&
+	    sdata->vif.type != NL80211_IFTYPE_MESH_POINT)
+		return -EOPNOTSUPP;
+
+	return ieee80211_request_scan(sdata, req);
+}
+
 struct cfg80211_ops mac80211_config_ops = {
 	.add_virtual_intf = ieee80211_add_iface,
 	.del_virtual_intf = ieee80211_del_iface,
@@ -1309,4 +1328,5 @@ struct cfg80211_ops mac80211_config_ops = {
 	.set_mgmt_extra_ie = ieee80211_set_mgmt_extra_ie,
 	.suspend = ieee80211_suspend,
 	.resume = ieee80211_resume,
+	.scan = ieee80211_scan,
 };
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 9122416fd6af..cbc0b7d647f9 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -294,8 +294,6 @@ struct ieee80211_if_sta {
 	u8 ssid[IEEE80211_MAX_SSID_LEN];
 	enum ieee80211_sta_mlme_state state;
 	size_t ssid_len;
-	u8 scan_ssid[IEEE80211_MAX_SSID_LEN];
-	size_t scan_ssid_len;
 	u16 aid;
 	u16 ap_capab, capab;
 	u8 *extra_ie; /* to be added to the end of AssocReq */
@@ -658,17 +656,18 @@ struct ieee80211_local {
 
 	/* Scanning and BSS list */
 	bool sw_scanning, hw_scanning;
+	struct cfg80211_ssid scan_ssid;
+	struct cfg80211_scan_request int_scan_req;
+	struct cfg80211_scan_request *scan_req;
+	struct ieee80211_channel *scan_channel;
 	int scan_channel_idx;
-	enum ieee80211_band scan_band;
 
 	enum { SCAN_SET_CHANNEL, SCAN_SEND_PROBE } scan_state;
 	unsigned long last_scan_completed;
 	struct delayed_work scan_work;
 	struct ieee80211_sub_if_data *scan_sdata;
-	struct ieee80211_channel *oper_channel, *scan_channel, *csa_channel;
 	enum nl80211_channel_type oper_channel_type;
-	u8 scan_ssid[IEEE80211_MAX_SSID_LEN];
-	size_t scan_ssid_len;
+	struct ieee80211_channel *oper_channel, *csa_channel;
 	struct list_head bss_list;
 	struct ieee80211_bss *bss_hash[STA_HASH_SIZE];
 	spinlock_t bss_lock;
@@ -929,7 +928,7 @@ void ieee80211_send_pspoll(struct ieee80211_local *local,
 
 /* scan/BSS handling */
 int ieee80211_request_scan(struct ieee80211_sub_if_data *sdata,
-			   u8 *ssid, size_t ssid_len);
+			   struct cfg80211_scan_request *req);
 int ieee80211_scan_results(struct ieee80211_local *local,
 			   struct iw_request_info *info,
 			   char *buf, size_t len);
@@ -944,14 +943,15 @@ int ieee80211_sta_set_extra_ie(struct ieee80211_sub_if_data *sdata,
 
 void ieee80211_mlme_notify_scan_completed(struct ieee80211_local *local);
 int ieee80211_start_scan(struct ieee80211_sub_if_data *scan_sdata,
-			 u8 *ssid, size_t ssid_len);
+			 struct cfg80211_scan_request *req);
 struct ieee80211_bss *
 ieee80211_bss_info_update(struct ieee80211_local *local,
 			  struct ieee80211_rx_status *rx_status,
 			  struct ieee80211_mgmt *mgmt,
 			  size_t len,
 			  struct ieee802_11_elems *elems,
-			  int freq, bool beacon);
+			  struct ieee80211_channel *channel,
+			  bool beacon);
 struct ieee80211_bss *
 ieee80211_rx_bss_add(struct ieee80211_local *local, u8 *bssid, int freq,
 		     u8 *ssid, u8 ssid_len);
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 1c17fb8e4058..df94b9365264 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -522,7 +522,7 @@ static int ieee80211_stop(struct net_device *dev)
 			 * scan event to userspace -- the scan is incomplete.
 			 */
 			if (local->sw_scanning)
-				ieee80211_scan_completed(&local->hw);
+				ieee80211_scan_completed(&local->hw, true);
 		}
 
 		conf.vif = &sdata->vif;
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 956afea4214d..954edfbb6b6f 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -733,6 +733,7 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
 		return NULL;
 
 	wiphy->privid = mac80211_wiphy_privid;
+	wiphy->max_scan_ssids = 4;
 
 	local = wiphy_priv(wiphy);
 	local->hw.wiphy = wiphy;
@@ -817,25 +818,33 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 	enum ieee80211_band band;
 	struct net_device *mdev;
 	struct ieee80211_master_priv *mpriv;
+	int channels, i, j;
 
 	/*
 	 * generic code guarantees at least one band,
 	 * set this very early because much code assumes
 	 * that hw.conf.channel is assigned
 	 */
+	channels = 0;
 	for (band = 0; band < IEEE80211_NUM_BANDS; band++) {
 		struct ieee80211_supported_band *sband;
 
 		sband = local->hw.wiphy->bands[band];
-		if (sband) {
+		if (sband && !local->oper_channel) {
 			/* init channel we're on */
 			local->hw.conf.channel =
 			local->oper_channel =
 			local->scan_channel = &sband->channels[0];
-			break;
 		}
+		if (sband)
+			channels += sband->n_channels;
 	}
 
+	local->int_scan_req.n_channels = channels;
+	local->int_scan_req.channels = kzalloc(sizeof(void *) * channels, GFP_KERNEL);
+	if (!local->int_scan_req.channels)
+		return -ENOMEM;
+
 	/* if low-level driver supports AP, we also support VLAN */
 	if (local->hw.wiphy->interface_modes & BIT(NL80211_IFTYPE_AP))
 		local->hw.wiphy->interface_modes |= BIT(NL80211_IFTYPE_AP_VLAN);
@@ -845,7 +854,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 
 	result = wiphy_register(local->hw.wiphy);
 	if (result < 0)
-		return result;
+		goto fail_wiphy_register;
 
 	/*
 	 * We use the number of queues for feature tests (QoS, HT) internally
@@ -948,6 +957,20 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 
 	ieee80211_led_init(local);
 
+	/* alloc internal scan request */
+	i = 0;
+	local->int_scan_req.ssids = &local->scan_ssid;
+	local->int_scan_req.n_ssids = 1;
+	for (band = 0; band < IEEE80211_NUM_BANDS; band++) {
+		if (!hw->wiphy->bands[band])
+			continue;
+		for (j = 0; j < hw->wiphy->bands[band]->n_channels; j++) {
+			local->int_scan_req.channels[i] =
+				&hw->wiphy->bands[band]->channels[j];
+			i++;
+		}
+	}
+
 	return 0;
 
 fail_wep:
@@ -966,6 +989,8 @@ fail_workqueue:
 		free_netdev(local->mdev);
 fail_mdev_alloc:
 	wiphy_unregister(local->hw.wiphy);
+fail_wiphy_register:
+	kfree(local->int_scan_req.channels);
 	return result;
 }
 EXPORT_SYMBOL(ieee80211_register_hw);
@@ -1011,6 +1036,7 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw)
 	ieee80211_wep_free(local);
 	ieee80211_led_exit(local);
 	free_netdev(local->mdev);
+	kfree(local->int_scan_req.channels);
 }
 EXPORT_SYMBOL(ieee80211_unregister_hw);
 
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index bfc47b330687..46b4817cdea9 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1743,7 +1743,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 	}
 
 	bss = ieee80211_bss_info_update(local, rx_status, mgmt, len, elems,
-					freq, beacon);
+					channel, beacon);
 	if (!bss)
 		return;
 
@@ -2162,7 +2162,15 @@ static void ieee80211_sta_merge_ibss(struct ieee80211_sub_if_data *sdata,
 
 	printk(KERN_DEBUG "%s: No active IBSS STAs - trying to scan for other "
 	       "IBSS networks with same SSID (merge)\n", sdata->dev->name);
-	ieee80211_request_scan(sdata, ifsta->ssid, ifsta->ssid_len);
+
+	/* XXX maybe racy? */
+	if (sdata->local->scan_req)
+		return;
+
+	memcpy(sdata->local->int_scan_req.ssids[0].ssid,
+	       ifsta->ssid, IEEE80211_MAX_SSID_LEN);
+	sdata->local->int_scan_req.ssids[0].ssid_len = ifsta->ssid_len;
+	ieee80211_request_scan(sdata, &sdata->local->int_scan_req);
 }
 
 
@@ -2378,8 +2386,15 @@ dont_join:
 			      IEEE80211_SCAN_INTERVAL)) {
 		printk(KERN_DEBUG "%s: Trigger new scan to find an IBSS to "
 		       "join\n", sdata->dev->name);
-		return ieee80211_request_scan(sdata, ifsta->ssid,
-					      ifsta->ssid_len);
+
+		/* XXX maybe racy? */
+		if (local->scan_req)
+			return -EBUSY;
+
+		memcpy(local->int_scan_req.ssids[0].ssid,
+		       ifsta->ssid, IEEE80211_MAX_SSID_LEN);
+		local->int_scan_req.ssids[0].ssid_len = ifsta->ssid_len;
+		return ieee80211_request_scan(sdata, &local->int_scan_req);
 	} else if (ifsta->state != IEEE80211_STA_MLME_IBSS_JOINED) {
 		int interval = IEEE80211_SCAN_INTERVAL;
 
@@ -2478,11 +2493,16 @@ static int ieee80211_sta_config_auth(struct ieee80211_sub_if_data *sdata,
 	} else {
 		if (ifsta->assoc_scan_tries < IEEE80211_ASSOC_SCANS_MAX_TRIES) {
 			ifsta->assoc_scan_tries++;
+			/* XXX maybe racy? */
+			if (local->scan_req)
+				return -1;
+			memcpy(local->int_scan_req.ssids[0].ssid,
+			       ifsta->ssid, IEEE80211_MAX_SSID_LEN);
 			if (ifsta->flags & IEEE80211_STA_AUTO_SSID_SEL)
-				ieee80211_start_scan(sdata, NULL, 0);
+				local->int_scan_req.ssids[0].ssid_len = 0;
 			else
-				ieee80211_start_scan(sdata, ifsta->ssid,
-							 ifsta->ssid_len);
+				local->int_scan_req.ssids[0].ssid_len = ifsta->ssid_len;
+			ieee80211_start_scan(sdata, &local->int_scan_req);
 			ifsta->state = IEEE80211_STA_MLME_AUTHENTICATE;
 			set_bit(IEEE80211_STA_REQ_AUTH, &ifsta->request);
 		} else {
@@ -2520,8 +2540,7 @@ static void ieee80211_sta_work(struct work_struct *work)
 	    ifsta->state != IEEE80211_STA_MLME_AUTHENTICATE &&
 	    ifsta->state != IEEE80211_STA_MLME_ASSOCIATE &&
 	    test_and_clear_bit(IEEE80211_STA_REQ_SCAN, &ifsta->request)) {
-		ieee80211_start_scan(sdata, ifsta->scan_ssid,
-				     ifsta->scan_ssid_len);
+		ieee80211_start_scan(sdata, local->scan_req);
 		return;
 	}
 
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index eddca4e1e13c..c6b275b10cf9 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -13,6 +13,9 @@
  */
 
 /* TODO:
+ * figure out how to avoid that the "current BSS" expires
+ * clean up IBSS code (in MLME), see why it adds a BSS to the list
+ * use cfg80211's BSS handling (depends on IBSS TODO above)
  * order BSS list by RSSI(?) ("quality of AP")
  * scan result table filtering (by capability (privacy, IBSS/BSS, WPA/RSN IE,
  *    SSID)
@@ -225,10 +228,26 @@ ieee80211_bss_info_update(struct ieee80211_local *local,
 			  struct ieee80211_mgmt *mgmt,
 			  size_t len,
 			  struct ieee802_11_elems *elems,
-			  int freq, bool beacon)
+			  struct ieee80211_channel *channel,
+			  bool beacon)
 {
 	struct ieee80211_bss *bss;
-	int clen;
+	int clen, freq = channel->center_freq;
+	enum cfg80211_signal_type sigtype = CFG80211_SIGNAL_TYPE_NONE;
+	s32 signal = 0;
+
+	if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM) {
+		sigtype = CFG80211_SIGNAL_TYPE_MBM;
+		signal = rx_status->signal * 100;
+	} else if (local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC) {
+		sigtype = CFG80211_SIGNAL_TYPE_UNSPEC;
+		signal = (rx_status->signal * 100) / local->hw.max_signal;
+	}
+
+	cfg80211_put_bss(
+		cfg80211_inform_bss_frame(local->hw.wiphy, channel,
+					  mgmt, len, signal, sigtype,
+					  GFP_ATOMIC));
 
 #ifdef CONFIG_MAC80211_MESH
 	if (elems->mesh_config)
@@ -401,7 +420,7 @@ ieee80211_scan_rx(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb,
 
 	bss = ieee80211_bss_info_update(sdata->local, rx_status,
 					mgmt, skb->len, &elems,
-					freq, beacon);
+					channel, beacon);
 	if (bss)
 		ieee80211_rx_bss_put(sdata->local, bss);
 
@@ -439,26 +458,22 @@ void ieee80211_send_nullfunc(struct ieee80211_local *local,
 	ieee80211_tx_skb(sdata, skb, 0);
 }
 
-void ieee80211_scan_completed(struct ieee80211_hw *hw)
+void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
 	struct ieee80211_sub_if_data *sdata;
-	union iwreq_data wrqu;
 
 	if (WARN_ON(!local->hw_scanning && !local->sw_scanning))
 		return;
 
-	local->last_scan_completed = jiffies;
-	memset(&wrqu, 0, sizeof(wrqu));
+	if (WARN_ON(!local->scan_req))
+		return;
 
-	/*
-	 * local->scan_sdata could have been NULLed by the interface
-	 * down code in case we were scanning on an interface that is
-	 * being taken down.
-	 */
-	sdata = local->scan_sdata;
-	if (sdata)
-		wireless_send_event(sdata->dev, SIOCGIWSCAN, &wrqu, NULL);
+	if (local->scan_req != &local->int_scan_req)
+		cfg80211_scan_done(local->scan_req, aborted);
+	local->scan_req = NULL;
+
+	local->last_scan_completed = jiffies;
 
 	if (local->hw_scanning) {
 		local->hw_scanning = false;
@@ -520,9 +535,8 @@ void ieee80211_scan_work(struct work_struct *work)
 	struct ieee80211_local *local =
 		container_of(work, struct ieee80211_local, scan_work.work);
 	struct ieee80211_sub_if_data *sdata = local->scan_sdata;
-	struct ieee80211_supported_band *sband;
 	struct ieee80211_channel *chan;
-	int skip;
+	int skip, i;
 	unsigned long next_delay = 0;
 
 	/*
@@ -533,33 +547,13 @@ void ieee80211_scan_work(struct work_struct *work)
 
 	switch (local->scan_state) {
 	case SCAN_SET_CHANNEL:
-		/*
-		 * Get current scan band. scan_band may be IEEE80211_NUM_BANDS
-		 * after we successfully scanned the last channel of the last
-		 * band (and the last band is supported by the hw)
-		 */
-		if (local->scan_band < IEEE80211_NUM_BANDS)
-			sband = local->hw.wiphy->bands[local->scan_band];
-		else
-			sband = NULL;
-
-		/*
-		 * If we are at an unsupported band and have more bands
-		 * left to scan, advance to the next supported one.
-		 */
-		while (!sband && local->scan_band < IEEE80211_NUM_BANDS - 1) {
-			local->scan_band++;
-			sband = local->hw.wiphy->bands[local->scan_band];
-			local->scan_channel_idx = 0;
-		}
-
 		/* if no more bands/channels left, complete scan */
-		if (!sband || local->scan_channel_idx >= sband->n_channels) {
-			ieee80211_scan_completed(local_to_hw(local));
+		if (local->scan_channel_idx >= local->scan_req->n_channels) {
+			ieee80211_scan_completed(local_to_hw(local), false);
 			return;
 		}
 		skip = 0;
-		chan = &sband->channels[local->scan_channel_idx];
+		chan = local->scan_req->channels[local->scan_channel_idx];
 
 		if (chan->flags & IEEE80211_CHAN_DISABLED ||
 		    (sdata->vif.type == NL80211_IFTYPE_ADHOC &&
@@ -575,15 +569,6 @@ void ieee80211_scan_work(struct work_struct *work)
 
 		/* advance state machine to next channel/band */
 		local->scan_channel_idx++;
-		if (local->scan_channel_idx >= sband->n_channels) {
-			/*
-			 * scan_band may end up == IEEE80211_NUM_BANDS, but
-			 * we'll catch that case above and complete the scan
-			 * if that is the case.
-			 */
-			local->scan_band++;
-			local->scan_channel_idx = 0;
-		}
 
 		if (skip)
 			break;
@@ -596,10 +581,14 @@ void ieee80211_scan_work(struct work_struct *work)
 		next_delay = IEEE80211_PASSIVE_CHANNEL_TIME;
 		local->scan_state = SCAN_SET_CHANNEL;
 
-		if (local->scan_channel->flags & IEEE80211_CHAN_PASSIVE_SCAN)
+		if (local->scan_channel->flags & IEEE80211_CHAN_PASSIVE_SCAN ||
+		    !local->scan_req->n_ssids)
 			break;
-		ieee80211_send_probe_req(sdata, NULL, local->scan_ssid,
-					 local->scan_ssid_len);
+		for (i = 0; i < local->scan_req->n_ssids; i++)
+			ieee80211_send_probe_req(
+				sdata, NULL,
+				local->scan_req->ssids[i].ssid,
+				local->scan_req->ssids[i].ssid_len);
 		next_delay = IEEE80211_CHANNEL_TIME;
 		break;
 	}
@@ -610,14 +599,19 @@ void ieee80211_scan_work(struct work_struct *work)
 
 
 int ieee80211_start_scan(struct ieee80211_sub_if_data *scan_sdata,
-			 u8 *ssid, size_t ssid_len)
+			 struct cfg80211_scan_request *req)
 {
 	struct ieee80211_local *local = scan_sdata->local;
 	struct ieee80211_sub_if_data *sdata;
 
-	if (ssid_len > IEEE80211_MAX_SSID_LEN)
+	if (!req)
 		return -EINVAL;
 
+	if (local->scan_req && local->scan_req != req)
+		return -EBUSY;
+
+	local->scan_req = req;
+
 	/* MLME-SCAN.request (page 118)  page 144 (11.1.3.1)
 	 * BSSType: INFRASTRUCTURE, INDEPENDENT, ANY_BSS
 	 * BSSID: MACAddress
@@ -645,7 +639,7 @@ int ieee80211_start_scan(struct ieee80211_sub_if_data *scan_sdata,
 		int rc;
 
 		local->hw_scanning = true;
-		rc = local->ops->hw_scan(local_to_hw(local), ssid, ssid_len);
+		rc = local->ops->hw_scan(local_to_hw(local), req);
 		if (rc) {
 			local->hw_scanning = false;
 			return rc;
@@ -678,15 +672,10 @@ int ieee80211_start_scan(struct ieee80211_sub_if_data *scan_sdata,
 	}
 	mutex_unlock(&local->iflist_mtx);
 
-	if (ssid) {
-		local->scan_ssid_len = ssid_len;
-		memcpy(local->scan_ssid, ssid, ssid_len);
-	} else
-		local->scan_ssid_len = 0;
 	local->scan_state = SCAN_SET_CHANNEL;
 	local->scan_channel_idx = 0;
-	local->scan_band = IEEE80211_BAND_2GHZ;
 	local->scan_sdata = scan_sdata;
+	local->scan_req = req;
 
 	netif_addr_lock_bh(local->mdev);
 	local->filter_flags |= FIF_BCN_PRBRESP_PROMISC;
@@ -706,13 +695,21 @@ int ieee80211_start_scan(struct ieee80211_sub_if_data *scan_sdata,
 
 
 int ieee80211_request_scan(struct ieee80211_sub_if_data *sdata,
-			   u8 *ssid, size_t ssid_len)
+			   struct cfg80211_scan_request *req)
 {
 	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_if_sta *ifsta;
 
+	if (!req)
+		return -EINVAL;
+
+	if (local->scan_req && local->scan_req != req)
+		return -EBUSY;
+
+	local->scan_req = req;
+
 	if (sdata->vif.type != NL80211_IFTYPE_STATION)
-		return ieee80211_start_scan(sdata, ssid, ssid_len);
+		return ieee80211_start_scan(sdata, req);
 
 	/*
 	 * STA has a state machine that might need to defer scanning
@@ -727,241 +724,8 @@ int ieee80211_request_scan(struct ieee80211_sub_if_data *sdata,
 	}
 
 	ifsta = &sdata->u.sta;
-
-	ifsta->scan_ssid_len = ssid_len;
-	if (ssid_len)
-		memcpy(ifsta->scan_ssid, ssid, ssid_len);
 	set_bit(IEEE80211_STA_REQ_SCAN, &ifsta->request);
 	queue_work(local->hw.workqueue, &ifsta->work);
 
 	return 0;
 }
-
-
-static void ieee80211_scan_add_ies(struct iw_request_info *info,
-				   struct ieee80211_bss *bss,
-				   char **current_ev, char *end_buf)
-{
-	u8 *pos, *end, *next;
-	struct iw_event iwe;
-
-	if (bss == NULL || bss->ies == NULL)
-		return;
-
-	/*
-	 * If needed, fragment the IEs buffer (at IE boundaries) into short
-	 * enough fragments to fit into IW_GENERIC_IE_MAX octet messages.
-	 */
-	pos = bss->ies;
-	end = pos + bss->ies_len;
-
-	while (end - pos > IW_GENERIC_IE_MAX) {
-		next = pos + 2 + pos[1];
-		while (next + 2 + next[1] - pos < IW_GENERIC_IE_MAX)
-			next = next + 2 + next[1];
-
-		memset(&iwe, 0, sizeof(iwe));
-		iwe.cmd = IWEVGENIE;
-		iwe.u.data.length = next - pos;
-		*current_ev = iwe_stream_add_point(info, *current_ev,
-						   end_buf, &iwe, pos);
-
-		pos = next;
-	}
-
-	if (end > pos) {
-		memset(&iwe, 0, sizeof(iwe));
-		iwe.cmd = IWEVGENIE;
-		iwe.u.data.length = end - pos;
-		*current_ev = iwe_stream_add_point(info, *current_ev,
-						   end_buf, &iwe, pos);
-	}
-}
-
-
-static char *
-ieee80211_scan_result(struct ieee80211_local *local,
-		      struct iw_request_info *info,
-		      struct ieee80211_bss *bss,
-		      char *current_ev, char *end_buf)
-{
-	struct iw_event iwe;
-	char *buf;
-
-	if (time_after(jiffies,
-		       bss->last_update + IEEE80211_SCAN_RESULT_EXPIRE))
-		return current_ev;
-
-	memset(&iwe, 0, sizeof(iwe));
-	iwe.cmd = SIOCGIWAP;
-	iwe.u.ap_addr.sa_family = ARPHRD_ETHER;
-	memcpy(iwe.u.ap_addr.sa_data, bss->bssid, ETH_ALEN);
-	current_ev = iwe_stream_add_event(info, current_ev, end_buf, &iwe,
-					  IW_EV_ADDR_LEN);
-
-	memset(&iwe, 0, sizeof(iwe));
-	iwe.cmd = SIOCGIWESSID;
-	if (bss_mesh_cfg(bss)) {
-		iwe.u.data.length = bss_mesh_id_len(bss);
-		iwe.u.data.flags = 1;
-		current_ev = iwe_stream_add_point(info, current_ev, end_buf,
-						  &iwe, bss_mesh_id(bss));
-	} else {
-		iwe.u.data.length = bss->ssid_len;
-		iwe.u.data.flags = 1;
-		current_ev = iwe_stream_add_point(info, current_ev, end_buf,
-						  &iwe, bss->ssid);
-	}
-
-	if (bss->capability & (WLAN_CAPABILITY_ESS | WLAN_CAPABILITY_IBSS)
-	    || bss_mesh_cfg(bss)) {
-		memset(&iwe, 0, sizeof(iwe));
-		iwe.cmd = SIOCGIWMODE;
-		if (bss_mesh_cfg(bss))
-			iwe.u.mode = IW_MODE_MESH;
-		else if (bss->capability & WLAN_CAPABILITY_ESS)
-			iwe.u.mode = IW_MODE_MASTER;
-		else
-			iwe.u.mode = IW_MODE_ADHOC;
-		current_ev = iwe_stream_add_event(info, current_ev, end_buf,
-						  &iwe, IW_EV_UINT_LEN);
-	}
-
-	memset(&iwe, 0, sizeof(iwe));
-	iwe.cmd = SIOCGIWFREQ;
-	iwe.u.freq.m = ieee80211_frequency_to_channel(bss->freq);
-	iwe.u.freq.e = 0;
-	current_ev = iwe_stream_add_event(info, current_ev, end_buf, &iwe,
-					  IW_EV_FREQ_LEN);
-
-	memset(&iwe, 0, sizeof(iwe));
-	iwe.cmd = SIOCGIWFREQ;
-	iwe.u.freq.m = bss->freq;
-	iwe.u.freq.e = 6;
-	current_ev = iwe_stream_add_event(info, current_ev, end_buf, &iwe,
-					  IW_EV_FREQ_LEN);
-	memset(&iwe, 0, sizeof(iwe));
-	iwe.cmd = IWEVQUAL;
-	iwe.u.qual.qual = bss->qual;
-	iwe.u.qual.level = bss->signal;
-	iwe.u.qual.noise = bss->noise;
-	iwe.u.qual.updated = local->wstats_flags;
-	current_ev = iwe_stream_add_event(info, current_ev, end_buf, &iwe,
-					  IW_EV_QUAL_LEN);
-
-	memset(&iwe, 0, sizeof(iwe));
-	iwe.cmd = SIOCGIWENCODE;
-	if (bss->capability & WLAN_CAPABILITY_PRIVACY)
-		iwe.u.data.flags = IW_ENCODE_ENABLED | IW_ENCODE_NOKEY;
-	else
-		iwe.u.data.flags = IW_ENCODE_DISABLED;
-	iwe.u.data.length = 0;
-	current_ev = iwe_stream_add_point(info, current_ev, end_buf,
-					  &iwe, "");
-
-	ieee80211_scan_add_ies(info, bss, &current_ev, end_buf);
-
-	if (bss->supp_rates_len > 0) {
-		/* display all supported rates in readable format */
-		char *p = current_ev + iwe_stream_lcp_len(info);
-		int i;
-
-		memset(&iwe, 0, sizeof(iwe));
-		iwe.cmd = SIOCGIWRATE;
-		/* Those two flags are ignored... */
-		iwe.u.bitrate.fixed = iwe.u.bitrate.disabled = 0;
-
-		for (i = 0; i < bss->supp_rates_len; i++) {
-			iwe.u.bitrate.value = ((bss->supp_rates[i] &
-							0x7f) * 500000);
-			p = iwe_stream_add_value(info, current_ev, p,
-					end_buf, &iwe, IW_EV_PARAM_LEN);
-		}
-		current_ev = p;
-	}
-
-	buf = kmalloc(30, GFP_ATOMIC);
-	if (buf) {
-		memset(&iwe, 0, sizeof(iwe));
-		iwe.cmd = IWEVCUSTOM;
-		sprintf(buf, "tsf=%016llx", (unsigned long long)(bss->timestamp));
-		iwe.u.data.length = strlen(buf);
-		current_ev = iwe_stream_add_point(info, current_ev, end_buf,
-						  &iwe, buf);
-		memset(&iwe, 0, sizeof(iwe));
-		iwe.cmd = IWEVCUSTOM;
-		sprintf(buf, " Last beacon: %dms ago",
-			jiffies_to_msecs(jiffies - bss->last_update));
-		iwe.u.data.length = strlen(buf);
-		current_ev = iwe_stream_add_point(info, current_ev,
-						  end_buf, &iwe, buf);
-		kfree(buf);
-	}
-
-	if (bss_mesh_cfg(bss)) {
-		u8 *cfg = bss_mesh_cfg(bss);
-		buf = kmalloc(50, GFP_ATOMIC);
-		if (buf) {
-			memset(&iwe, 0, sizeof(iwe));
-			iwe.cmd = IWEVCUSTOM;
-			sprintf(buf, "Mesh network (version %d)", cfg[0]);
-			iwe.u.data.length = strlen(buf);
-			current_ev = iwe_stream_add_point(info, current_ev,
-							  end_buf,
-							  &iwe, buf);
-			sprintf(buf, "Path Selection Protocol ID: "
-				"0x%02X%02X%02X%02X", cfg[1], cfg[2], cfg[3],
-							cfg[4]);
-			iwe.u.data.length = strlen(buf);
-			current_ev = iwe_stream_add_point(info, current_ev,
-							  end_buf,
-							  &iwe, buf);
-			sprintf(buf, "Path Selection Metric ID: "
-				"0x%02X%02X%02X%02X", cfg[5], cfg[6], cfg[7],
-							cfg[8]);
-			iwe.u.data.length = strlen(buf);
-			current_ev = iwe_stream_add_point(info, current_ev,
-							  end_buf,
-							  &iwe, buf);
-			sprintf(buf, "Congestion Control Mode ID: "
-				"0x%02X%02X%02X%02X", cfg[9], cfg[10],
-							cfg[11], cfg[12]);
-			iwe.u.data.length = strlen(buf);
-			current_ev = iwe_stream_add_point(info, current_ev,
-							  end_buf,
-							  &iwe, buf);
-			sprintf(buf, "Channel Precedence: "
-				"0x%02X%02X%02X%02X", cfg[13], cfg[14],
-							cfg[15], cfg[16]);
-			iwe.u.data.length = strlen(buf);
-			current_ev = iwe_stream_add_point(info, current_ev,
-							  end_buf,
-							  &iwe, buf);
-			kfree(buf);
-		}
-	}
-
-	return current_ev;
-}
-
-
-int ieee80211_scan_results(struct ieee80211_local *local,
-			   struct iw_request_info *info,
-			   char *buf, size_t len)
-{
-	char *current_ev = buf;
-	char *end_buf = buf + len;
-	struct ieee80211_bss *bss;
-
-	spin_lock_bh(&local->bss_lock);
-	list_for_each_entry(bss, &local->bss_list, list) {
-		if (buf + len - current_ev <= IW_EV_ADDR_LEN) {
-			spin_unlock_bh(&local->bss_lock);
-			return -E2BIG;
-		}
-		current_ev = ieee80211_scan_result(local, info, bss,
-						       current_ev, end_buf);
-	}
-	spin_unlock_bh(&local->bss_lock);
-	return current_ev - buf;
-}
diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c
index acd5808b87f4..b337d7d5edb3 100644
--- a/net/mac80211/wext.c
+++ b/net/mac80211/wext.c
@@ -173,8 +173,9 @@ static int ieee80211_ioctl_giwrange(struct net_device *dev,
 	range->num_encoding_sizes = 2;
 	range->max_encoding_tokens = NUM_DEFAULT_KEYS;
 
+	/* cfg80211 requires this, and enforces 0..100 */
 	if (local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC)
-		range->max_qual.level = local->hw.max_signal;
+		range->max_qual.level = 100;
 	else if  (local->hw.flags & IEEE80211_HW_SIGNAL_DBM)
 		range->max_qual.level = -110;
 	else
@@ -415,58 +416,6 @@ static int ieee80211_ioctl_giwap(struct net_device *dev,
 }
 
 
-static int ieee80211_ioctl_siwscan(struct net_device *dev,
-				   struct iw_request_info *info,
-				   union iwreq_data *wrqu, char *extra)
-{
-	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
-	struct iw_scan_req *req = NULL;
-	u8 *ssid = NULL;
-	size_t ssid_len = 0;
-
-	if (!netif_running(dev))
-		return -ENETDOWN;
-
-	if (sdata->vif.type != NL80211_IFTYPE_STATION &&
-	    sdata->vif.type != NL80211_IFTYPE_ADHOC &&
-	    sdata->vif.type != NL80211_IFTYPE_MESH_POINT)
-		return -EOPNOTSUPP;
-
-	/* if SSID was specified explicitly then use that */
-	if (wrqu->data.length == sizeof(struct iw_scan_req) &&
-	    wrqu->data.flags & IW_SCAN_THIS_ESSID) {
-		req = (struct iw_scan_req *)extra;
-		ssid = req->essid;
-		ssid_len = req->essid_len;
-	}
-
-	return ieee80211_request_scan(sdata, ssid, ssid_len);
-}
-
-
-static int ieee80211_ioctl_giwscan(struct net_device *dev,
-				   struct iw_request_info *info,
-				   struct iw_point *data, char *extra)
-{
-	int res;
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
-	struct ieee80211_sub_if_data *sdata;
-
-	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
-
-	if (local->sw_scanning || local->hw_scanning)
-		return -EAGAIN;
-
-	res = ieee80211_scan_results(local, info, extra, data->length);
-	if (res >= 0) {
-		data->length = res;
-		return 0;
-	}
-	data->length = 0;
-	return res;
-}
-
-
 static int ieee80211_ioctl_siwrate(struct net_device *dev,
 				  struct iw_request_info *info,
 				  struct iw_param *rate, char *extra)
@@ -1165,8 +1114,8 @@ static const iw_handler ieee80211_handler[] =
 	(iw_handler) ieee80211_ioctl_giwap,		/* SIOCGIWAP */
 	(iw_handler) ieee80211_ioctl_siwmlme,		/* SIOCSIWMLME */
 	(iw_handler) NULL,				/* SIOCGIWAPLIST */
-	(iw_handler) ieee80211_ioctl_siwscan,		/* SIOCSIWSCAN */
-	(iw_handler) ieee80211_ioctl_giwscan,		/* SIOCGIWSCAN */
+	(iw_handler) cfg80211_wext_siwscan,		/* SIOCSIWSCAN */
+	(iw_handler) cfg80211_wext_giwscan,		/* SIOCGIWSCAN */
 	(iw_handler) ieee80211_ioctl_siwessid,		/* SIOCSIWESSID */
 	(iw_handler) ieee80211_ioctl_giwessid,		/* SIOCGIWESSID */
 	(iw_handler) NULL,				/* SIOCSIWNICKN */
diff --git a/net/wireless/Makefile b/net/wireless/Makefile
index 938a334c8dbc..dad43c24f695 100644
--- a/net/wireless/Makefile
+++ b/net/wireless/Makefile
@@ -5,7 +5,7 @@ obj-$(CONFIG_LIB80211_CRYPT_WEP) += lib80211_crypt_wep.o
 obj-$(CONFIG_LIB80211_CRYPT_CCMP) += lib80211_crypt_ccmp.o
 obj-$(CONFIG_LIB80211_CRYPT_TKIP) += lib80211_crypt_tkip.o
 
-cfg80211-y += core.o sysfs.o radiotap.o util.o reg.o
+cfg80211-y += core.o sysfs.o radiotap.o util.o reg.o scan.o
 cfg80211-$(CONFIG_WIRELESS_EXT) += wext-compat.o
 cfg80211-$(CONFIG_NL80211) += nl80211.o
 
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 125226476089..3cccd1390cea 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -240,6 +240,8 @@ struct wiphy *wiphy_new(struct cfg80211_ops *ops, int sizeof_priv)
 	mutex_init(&drv->mtx);
 	mutex_init(&drv->devlist_mtx);
 	INIT_LIST_HEAD(&drv->netdev_list);
+	spin_lock_init(&drv->bss_lock);
+	INIT_LIST_HEAD(&drv->bss_list);
 
 	device_initialize(&drv->wiphy.dev);
 	drv->wiphy.dev.class = &ieee80211_class;
@@ -259,6 +261,9 @@ int wiphy_register(struct wiphy *wiphy)
 	int i;
 	u16 ifmodes = wiphy->interface_modes;
 
+	if (WARN_ON(wiphy->max_scan_ssids < 1))
+		return -EINVAL;
+
 	/* sanity check ifmodes */
 	WARN_ON(!ifmodes);
 	ifmodes &= ((1 << __NL80211_IFTYPE_AFTER_LAST) - 1) & ~1;
@@ -367,8 +372,11 @@ EXPORT_SYMBOL(wiphy_unregister);
 
 void cfg80211_dev_free(struct cfg80211_registered_device *drv)
 {
+	struct cfg80211_internal_bss *scan, *tmp;
 	mutex_destroy(&drv->mtx);
 	mutex_destroy(&drv->devlist_mtx);
+	list_for_each_entry_safe(scan, tmp, &drv->bss_list, list)
+		kfree(scan);
 	kfree(drv);
 }
 
diff --git a/net/wireless/core.h b/net/wireless/core.h
index f7fb9f413028..e29ad4cd464f 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -8,6 +8,8 @@
 #include <linux/mutex.h>
 #include <linux/list.h>
 #include <linux/netdevice.h>
+#include <linux/kref.h>
+#include <linux/rbtree.h>
 #include <net/genetlink.h>
 #include <net/wireless.h>
 #include <net/cfg80211.h>
@@ -41,6 +43,13 @@ struct cfg80211_registered_device {
 	struct mutex devlist_mtx;
 	struct list_head netdev_list;
 
+	/* BSSes/scanning */
+	spinlock_t bss_lock;
+	struct list_head bss_list;
+	struct rb_root bss_tree;
+	u32 bss_generation;
+	struct cfg80211_scan_request *scan_req; /* protected by RTNL */
+
 	/* must be last because of the way we do wiphy_priv(),
 	 * and it should at least be aligned to NETDEV_ALIGN */
 	struct wiphy wiphy __attribute__((__aligned__(NETDEV_ALIGN)));
@@ -56,6 +65,15 @@ struct cfg80211_registered_device *wiphy_to_dev(struct wiphy *wiphy)
 extern struct mutex cfg80211_drv_mutex;
 extern struct list_head cfg80211_drv_list;
 
+struct cfg80211_internal_bss {
+	struct list_head list;
+	struct rb_node rbn;
+	unsigned long ts;
+	struct kref ref;
+	/* must be last because of priv member */
+	struct cfg80211_bss pub;
+};
+
 /*
  * This function returns a pointer to the driver
  * that the genl_info item that is passed refers to.
@@ -94,4 +112,6 @@ extern int cfg80211_dev_rename(struct cfg80211_registered_device *drv,
 void ieee80211_set_bitrate_flags(struct wiphy *wiphy);
 void wiphy_update_regulatory(struct wiphy *wiphy, enum reg_set_by setby);
 
+void cfg80211_bss_expire(struct cfg80211_registered_device *dev);
+
 #endif /* __NET_WIRELESS_CORE_H */
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index d452396006ee..298a4de59948 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -14,6 +14,7 @@
 #include <linux/nl80211.h>
 #include <linux/rtnetlink.h>
 #include <linux/netlink.h>
+#include <linux/etherdevice.h>
 #include <net/genetlink.h>
 #include <net/cfg80211.h>
 #include "core.h"
@@ -109,6 +110,8 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = {
 	[NL80211_ATTR_MGMT_SUBTYPE] = { .type = NLA_U8 },
 	[NL80211_ATTR_IE] = { .type = NLA_BINARY,
 			      .len = IEEE80211_MAX_DATA_LEN },
+	[NL80211_ATTR_SCAN_FREQUENCIES] = { .type = NLA_NESTED },
+	[NL80211_ATTR_SCAN_SSIDS] = { .type = NLA_NESTED },
 };
 
 /* message building helper */
@@ -141,6 +144,8 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 
 	NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, dev->idx);
 	NLA_PUT_STRING(msg, NL80211_ATTR_WIPHY_NAME, wiphy_name(&dev->wiphy));
+	NLA_PUT_U8(msg, NL80211_ATTR_MAX_NUM_SCAN_SSIDS,
+		   dev->wiphy.max_scan_ssids);
 
 	nl_modes = nla_nest_start(msg, NL80211_ATTR_SUPPORTED_IFTYPES);
 	if (!nl_modes)
@@ -2270,6 +2275,246 @@ static int nl80211_set_mgmt_extra_ie(struct sk_buff *skb,
 	return err;
 }
 
+static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg80211_registered_device *drv;
+	struct net_device *dev;
+	struct cfg80211_scan_request *request;
+	struct cfg80211_ssid *ssid;
+	struct ieee80211_channel *channel;
+	struct nlattr *attr;
+	struct wiphy *wiphy;
+	int err, tmp, n_ssids = 0, n_channels = 0, i;
+	enum ieee80211_band band;
+
+	err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev);
+	if (err)
+		return err;
+
+	wiphy = &drv->wiphy;
+
+	if (!drv->ops->scan) {
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+
+	rtnl_lock();
+
+	if (drv->scan_req) {
+		err = -EBUSY;
+		goto out_unlock;
+	}
+
+	if (info->attrs[NL80211_ATTR_SCAN_FREQUENCIES]) {
+		nla_for_each_nested(attr, info->attrs[NL80211_ATTR_SCAN_FREQUENCIES], tmp)
+			n_channels++;
+		if (!n_channels) {
+			err = -EINVAL;
+			goto out_unlock;
+		}
+	} else {
+		for (band = 0; band < IEEE80211_NUM_BANDS; band++)
+			if (wiphy->bands[band])
+				n_channels += wiphy->bands[band]->n_channels;
+	}
+
+	if (info->attrs[NL80211_ATTR_SCAN_SSIDS])
+		nla_for_each_nested(attr, info->attrs[NL80211_ATTR_SCAN_SSIDS], tmp)
+			n_ssids++;
+
+	if (n_ssids > wiphy->max_scan_ssids) {
+		err = -EINVAL;
+		goto out_unlock;
+	}
+
+	request = kzalloc(sizeof(*request)
+			+ sizeof(*ssid) * n_ssids
+			+ sizeof(channel) * n_channels, GFP_KERNEL);
+	if (!request) {
+		err = -ENOMEM;
+		goto out_unlock;
+	}
+
+	request->channels = (void *)((char *)request + sizeof(*request));
+	request->n_channels = n_channels;
+	if (n_ssids)
+		request->ssids = (void *)(request->channels + n_channels);
+	request->n_ssids = n_ssids;
+
+	if (info->attrs[NL80211_ATTR_SCAN_FREQUENCIES]) {
+		/* user specified, bail out if channel not found */
+		request->n_channels = n_channels;
+		i = 0;
+		nla_for_each_nested(attr, info->attrs[NL80211_ATTR_SCAN_FREQUENCIES], tmp) {
+			request->channels[i] = ieee80211_get_channel(wiphy, nla_get_u32(attr));
+			if (!request->channels[i]) {
+				err = -EINVAL;
+				goto out_free;
+			}
+			i++;
+		}
+	} else {
+		/* all channels */
+		i = 0;
+		for (band = 0; band < IEEE80211_NUM_BANDS; band++) {
+			int j;
+			if (!wiphy->bands[band])
+				continue;
+			for (j = 0; j < wiphy->bands[band]->n_channels; j++) {
+				request->channels[i] = &wiphy->bands[band]->channels[j];
+				i++;
+			}
+		}
+	}
+
+	i = 0;
+	if (info->attrs[NL80211_ATTR_SCAN_SSIDS]) {
+		nla_for_each_nested(attr, info->attrs[NL80211_ATTR_SCAN_SSIDS], tmp) {
+			if (request->ssids[i].ssid_len > IEEE80211_MAX_SSID_LEN) {
+				err = -EINVAL;
+				goto out_free;
+			}
+			memcpy(request->ssids[i].ssid, nla_data(attr), nla_len(attr));
+			request->ssids[i].ssid_len = nla_len(attr);
+			i++;
+		}
+	}
+
+	request->ifidx = dev->ifindex;
+	request->wiphy = &drv->wiphy;
+
+	drv->scan_req = request;
+	err = drv->ops->scan(&drv->wiphy, dev, request);
+
+ out_free:
+	if (err) {
+		drv->scan_req = NULL;
+		kfree(request);
+	}
+ out_unlock:
+	rtnl_unlock();
+ out:
+	cfg80211_put_dev(drv);
+	dev_put(dev);
+	return err;
+}
+
+static int nl80211_send_bss(struct sk_buff *msg, u32 pid, u32 seq, int flags,
+			    struct cfg80211_registered_device *rdev,
+			    struct net_device *dev,
+			    struct cfg80211_bss *res)
+{
+	void *hdr;
+	struct nlattr *bss;
+
+	hdr = nl80211hdr_put(msg, pid, seq, flags,
+			     NL80211_CMD_NEW_SCAN_RESULTS);
+	if (!hdr)
+		return -1;
+
+	NLA_PUT_U32(msg, NL80211_ATTR_SCAN_GENERATION,
+		    rdev->bss_generation);
+	NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, dev->ifindex);
+
+	bss = nla_nest_start(msg, NL80211_ATTR_BSS);
+	if (!bss)
+		goto nla_put_failure;
+	if (!is_zero_ether_addr(res->bssid))
+		NLA_PUT(msg, NL80211_BSS_BSSID, ETH_ALEN, res->bssid);
+	if (res->information_elements && res->len_information_elements)
+		NLA_PUT(msg, NL80211_BSS_INFORMATION_ELEMENTS,
+			res->len_information_elements,
+			res->information_elements);
+	if (res->tsf)
+		NLA_PUT_U64(msg, NL80211_BSS_TSF, res->tsf);
+	if (res->beacon_interval)
+		NLA_PUT_U16(msg, NL80211_BSS_BEACON_INTERVAL, res->beacon_interval);
+	NLA_PUT_U16(msg, NL80211_BSS_CAPABILITY, res->capability);
+	NLA_PUT_U32(msg, NL80211_BSS_FREQUENCY, res->channel->center_freq);
+
+	switch (res->signal_type) {
+	case CFG80211_SIGNAL_TYPE_MBM:
+		NLA_PUT_U32(msg, NL80211_BSS_SIGNAL_MBM, res->signal);
+		break;
+	case CFG80211_SIGNAL_TYPE_UNSPEC:
+		NLA_PUT_U8(msg, NL80211_BSS_SIGNAL_UNSPEC, res->signal);
+		break;
+	default:
+		break;
+	}
+
+	nla_nest_end(msg, bss);
+
+	return genlmsg_end(msg, hdr);
+
+ nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
+}
+
+static int nl80211_dump_scan(struct sk_buff *skb,
+			     struct netlink_callback *cb)
+{
+	struct cfg80211_registered_device *dev;
+	struct net_device *netdev;
+	struct cfg80211_internal_bss *scan;
+	int ifidx = cb->args[0];
+	int start = cb->args[1], idx = 0;
+	int err;
+
+	if (!ifidx) {
+		err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize,
+				  nl80211_fam.attrbuf, nl80211_fam.maxattr,
+				  nl80211_policy);
+		if (err)
+			return err;
+
+		if (!nl80211_fam.attrbuf[NL80211_ATTR_IFINDEX])
+			return -EINVAL;
+
+		ifidx = nla_get_u32(nl80211_fam.attrbuf[NL80211_ATTR_IFINDEX]);
+		if (!ifidx)
+			return -EINVAL;
+		cb->args[0] = ifidx;
+	}
+
+	netdev = dev_get_by_index(&init_net, ifidx);
+	if (!netdev)
+		return -ENODEV;
+
+	dev = cfg80211_get_dev_from_ifindex(ifidx);
+	if (IS_ERR(dev)) {
+		err = PTR_ERR(dev);
+		goto out_put_netdev;
+	}
+
+	spin_lock_bh(&dev->bss_lock);
+	cfg80211_bss_expire(dev);
+
+	list_for_each_entry(scan, &dev->bss_list, list) {
+		if (++idx <= start)
+			continue;
+		if (nl80211_send_bss(skb,
+				NETLINK_CB(cb->skb).pid,
+				cb->nlh->nlmsg_seq, NLM_F_MULTI,
+				dev, netdev, &scan->pub) < 0) {
+			idx--;
+			goto out;
+		}
+	}
+
+ out:
+	spin_unlock_bh(&dev->bss_lock);
+
+	cb->args[1] = idx;
+	err = skb->len;
+	cfg80211_put_dev(dev);
+ out_put_netdev:
+	dev_put(netdev);
+
+	return err;
+}
+
 static struct genl_ops nl80211_ops[] = {
 	{
 		.cmd = NL80211_CMD_GET_WIPHY,
@@ -2443,12 +2688,26 @@ static struct genl_ops nl80211_ops[] = {
 		.policy = nl80211_policy,
 		.flags = GENL_ADMIN_PERM,
 	},
+	{
+		.cmd = NL80211_CMD_TRIGGER_SCAN,
+		.doit = nl80211_trigger_scan,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
+	{
+		.cmd = NL80211_CMD_GET_SCAN,
+		.policy = nl80211_policy,
+		.dumpit = nl80211_dump_scan,
+	},
 };
 
 /* multicast groups */
 static struct genl_multicast_group nl80211_config_mcgrp = {
 	.name = "config",
 };
+static struct genl_multicast_group nl80211_scan_mcgrp = {
+	.name = "scan",
+};
 
 /* notification functions */
 
@@ -2468,6 +2727,66 @@ void nl80211_notify_dev_rename(struct cfg80211_registered_device *rdev)
 	genlmsg_multicast(msg, 0, nl80211_config_mcgrp.id, GFP_KERNEL);
 }
 
+static int nl80211_send_scan_donemsg(struct sk_buff *msg,
+				    struct cfg80211_registered_device *rdev,
+				    struct net_device *netdev,
+				    u32 pid, u32 seq, int flags,
+				    u32 cmd)
+{
+	void *hdr;
+
+	hdr = nl80211hdr_put(msg, pid, seq, flags, cmd);
+	if (!hdr)
+		return -1;
+
+	NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, rdev->idx);
+	NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex);
+
+	/* XXX: we should probably bounce back the request? */
+
+	return genlmsg_end(msg, hdr);
+
+ nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
+}
+
+void nl80211_send_scan_done(struct cfg80211_registered_device *rdev,
+			    struct net_device *netdev)
+{
+	struct sk_buff *msg;
+
+	msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!msg)
+		return;
+
+	if (nl80211_send_scan_donemsg(msg, rdev, netdev, 0, 0, 0,
+				      NL80211_CMD_NEW_SCAN_RESULTS) < 0) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	genlmsg_multicast(msg, 0, nl80211_scan_mcgrp.id, GFP_KERNEL);
+}
+
+void nl80211_send_scan_aborted(struct cfg80211_registered_device *rdev,
+			       struct net_device *netdev)
+{
+	struct sk_buff *msg;
+
+	msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!msg)
+		return;
+
+	if (nl80211_send_scan_donemsg(msg, rdev, netdev, 0, 0, 0,
+				      NL80211_CMD_SCAN_ABORTED) < 0) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	genlmsg_multicast(msg, 0, nl80211_scan_mcgrp.id, GFP_KERNEL);
+}
+
 /* initialisation/exit functions */
 
 int nl80211_init(void)
@@ -2488,6 +2807,10 @@ int nl80211_init(void)
 	if (err)
 		goto err_out;
 
+	err = genl_register_mc_group(&nl80211_fam, &nl80211_scan_mcgrp);
+	if (err)
+		goto err_out;
+
 	return 0;
  err_out:
 	genl_unregister_family(&nl80211_fam);
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index f3ea5c029aee..b565a5f84e97 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h
@@ -7,6 +7,10 @@
 extern int nl80211_init(void);
 extern void nl80211_exit(void);
 extern void nl80211_notify_dev_rename(struct cfg80211_registered_device *rdev);
+extern void nl80211_send_scan_done(struct cfg80211_registered_device *rdev,
+				   struct net_device *netdev);
+extern void nl80211_send_scan_aborted(struct cfg80211_registered_device *rdev,
+				      struct net_device *netdev);
 #else
 static inline int nl80211_init(void)
 {
@@ -19,6 +23,10 @@ static inline void nl80211_notify_dev_rename(
 	struct cfg80211_registered_device *rdev)
 {
 }
+static inline void
+nl80211_send_scan_done(struct cfg80211_registered_device *rdev,
+		       struct net_device *netdev)
+{}
 #endif /* CONFIG_NL80211 */
 
 #endif /* __NET_WIRELESS_NL80211_H */
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
new file mode 100644
index 000000000000..009d12810c55
--- /dev/null
+++ b/net/wireless/scan.c
@@ -0,0 +1,807 @@
+/*
+ * cfg80211 scan result handling
+ *
+ * Copyright 2008 Johannes Berg <johannes@sipsolutions.net>
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/wireless.h>
+#include <linux/nl80211.h>
+#include <linux/etherdevice.h>
+#include <net/arp.h>
+#include <net/cfg80211.h>
+#include <net/iw_handler.h>
+#include "core.h"
+#include "nl80211.h"
+
+#define IEEE80211_SCAN_RESULT_EXPIRE	(10 * HZ)
+
+void cfg80211_scan_done(struct cfg80211_scan_request *request, bool aborted)
+{
+	struct net_device *dev;
+#ifdef CONFIG_WIRELESS_EXT
+	union iwreq_data wrqu;
+#endif
+
+	dev = dev_get_by_index(&init_net, request->ifidx);
+	if (!dev)
+		goto out;
+
+	WARN_ON(request != wiphy_to_dev(request->wiphy)->scan_req);
+	wiphy_to_dev(request->wiphy)->scan_req = NULL;
+
+	if (aborted)
+		nl80211_send_scan_aborted(wiphy_to_dev(request->wiphy), dev);
+	else
+		nl80211_send_scan_done(wiphy_to_dev(request->wiphy), dev);
+
+#ifdef CONFIG_WIRELESS_EXT
+	if (!aborted) {
+		memset(&wrqu, 0, sizeof(wrqu));
+
+		wireless_send_event(dev, SIOCGIWSCAN, &wrqu, NULL);
+	}
+#endif
+
+	dev_put(dev);
+
+ out:
+	kfree(request);
+}
+EXPORT_SYMBOL(cfg80211_scan_done);
+
+static void bss_release(struct kref *ref)
+{
+	struct cfg80211_internal_bss *bss;
+
+	bss = container_of(ref, struct cfg80211_internal_bss, ref);
+	kfree(bss);
+}
+
+/* must hold dev->bss_lock! */
+void cfg80211_bss_expire(struct cfg80211_registered_device *dev)
+{
+	struct cfg80211_internal_bss *bss, *tmp;
+	bool expired = false;
+
+	list_for_each_entry_safe(bss, tmp, &dev->bss_list, list) {
+		if (!time_after(jiffies, bss->ts + IEEE80211_SCAN_RESULT_EXPIRE))
+			continue;
+		list_del(&bss->list);
+		rb_erase(&bss->rbn, &dev->bss_tree);
+		kref_put(&bss->ref, bss_release);
+		expired = true;
+	}
+
+	if (expired)
+		dev->bss_generation++;
+}
+
+static u8 *find_ie(u8 num, u8 *ies, size_t len)
+{
+	while (len > 2 && ies[0] != num) {
+		len -= ies[1] + 2;
+		ies += ies[1] + 2;
+	}
+	if (len < 2)
+		return NULL;
+	if (len < 2 + ies[1])
+		return NULL;
+	return ies;
+}
+
+static int cmp_ies(u8 num, u8 *ies1, size_t len1, u8 *ies2, size_t len2)
+{
+	const u8 *ie1 = find_ie(num, ies1, len1);
+	const u8 *ie2 = find_ie(num, ies2, len2);
+	int r;
+
+	if (!ie1 && !ie2)
+		return 0;
+	if (!ie1)
+		return -1;
+
+	r = memcmp(ie1 + 2, ie2 + 2, min(ie1[1], ie2[1]));
+	if (r == 0 && ie1[1] != ie2[1])
+		return ie2[1] - ie1[1];
+	return r;
+}
+
+static bool is_bss(struct cfg80211_bss *a,
+		   const u8 *bssid,
+		   const u8 *ssid, size_t ssid_len)
+{
+	const u8 *ssidie;
+
+	if (compare_ether_addr(a->bssid, bssid))
+		return false;
+
+	ssidie = find_ie(WLAN_EID_SSID,
+			 a->information_elements,
+			 a->len_information_elements);
+	if (!ssidie)
+		return false;
+	if (ssidie[1] != ssid_len)
+		return false;
+	return memcmp(ssidie + 2, ssid, ssid_len) == 0;
+}
+
+static bool is_mesh(struct cfg80211_bss *a,
+		    const u8 *meshid, size_t meshidlen,
+		    const u8 *meshcfg)
+{
+	const u8 *ie;
+
+	if (!is_zero_ether_addr(a->bssid))
+		return false;
+
+	ie = find_ie(WLAN_EID_MESH_ID,
+		     a->information_elements,
+		     a->len_information_elements);
+	if (!ie)
+		return false;
+	if (ie[1] != meshidlen)
+		return false;
+	if (memcmp(ie + 2, meshid, meshidlen))
+		return false;
+
+	ie = find_ie(WLAN_EID_MESH_CONFIG,
+		     a->information_elements,
+		     a->len_information_elements);
+	if (ie[1] != IEEE80211_MESH_CONFIG_LEN)
+		return false;
+
+	/*
+	 * Ignore mesh capability (last two bytes of the IE) when
+	 * comparing since that may differ between stations taking
+	 * part in the same mesh.
+	 */
+	return memcmp(ie + 2, meshcfg, IEEE80211_MESH_CONFIG_LEN - 2) == 0;
+}
+
+static int cmp_bss(struct cfg80211_bss *a,
+		   struct cfg80211_bss *b)
+{
+	int r;
+
+	if (a->channel != b->channel)
+		return b->channel->center_freq - a->channel->center_freq;
+
+	r = memcmp(a->bssid, b->bssid, ETH_ALEN);
+	if (r)
+		return r;
+
+	if (is_zero_ether_addr(a->bssid)) {
+		r = cmp_ies(WLAN_EID_MESH_ID,
+			    a->information_elements,
+			    a->len_information_elements,
+			    b->information_elements,
+			    b->len_information_elements);
+		if (r)
+			return r;
+		return cmp_ies(WLAN_EID_MESH_CONFIG,
+			       a->information_elements,
+			       a->len_information_elements,
+			       b->information_elements,
+			       b->len_information_elements);
+	}
+
+	return cmp_ies(WLAN_EID_SSID,
+		       a->information_elements,
+		       a->len_information_elements,
+		       b->information_elements,
+		       b->len_information_elements);
+}
+
+struct cfg80211_bss *cfg80211_get_bss(struct wiphy *wiphy,
+				      struct ieee80211_channel *channel,
+				      const u8 *bssid,
+				      const u8 *ssid, size_t ssid_len)
+{
+	struct cfg80211_registered_device *dev = wiphy_to_dev(wiphy);
+	struct cfg80211_internal_bss *bss, *res = NULL;
+
+	spin_lock_bh(&dev->bss_lock);
+
+	list_for_each_entry(bss, &dev->bss_list, list) {
+		if (channel && bss->pub.channel != channel)
+			continue;
+		if (is_bss(&bss->pub, bssid, ssid, ssid_len)) {
+			res = bss;
+			kref_get(&res->ref);
+			break;
+		}
+	}
+
+	spin_unlock_bh(&dev->bss_lock);
+	if (!res)
+		return NULL;
+	return &res->pub;
+}
+EXPORT_SYMBOL(cfg80211_get_bss);
+
+struct cfg80211_bss *cfg80211_get_mesh(struct wiphy *wiphy,
+				       struct ieee80211_channel *channel,
+				       const u8 *meshid, size_t meshidlen,
+				       const u8 *meshcfg)
+{
+	struct cfg80211_registered_device *dev = wiphy_to_dev(wiphy);
+	struct cfg80211_internal_bss *bss, *res = NULL;
+
+	spin_lock_bh(&dev->bss_lock);
+
+	list_for_each_entry(bss, &dev->bss_list, list) {
+		if (channel && bss->pub.channel != channel)
+			continue;
+		if (is_mesh(&bss->pub, meshid, meshidlen, meshcfg)) {
+			res = bss;
+			kref_get(&res->ref);
+			break;
+		}
+	}
+
+	spin_unlock_bh(&dev->bss_lock);
+	if (!res)
+		return NULL;
+	return &res->pub;
+}
+EXPORT_SYMBOL(cfg80211_get_mesh);
+
+
+static void rb_insert_bss(struct cfg80211_registered_device *dev,
+			  struct cfg80211_internal_bss *bss)
+{
+	struct rb_node **p = &dev->bss_tree.rb_node;
+	struct rb_node *parent = NULL;
+	struct cfg80211_internal_bss *tbss;
+	int cmp;
+
+	while (*p) {
+		parent = *p;
+		tbss = rb_entry(parent, struct cfg80211_internal_bss, rbn);
+
+		cmp = cmp_bss(&bss->pub, &tbss->pub);
+
+		if (WARN_ON(!cmp)) {
+			/* will sort of leak this BSS */
+			return;
+		}
+
+		if (cmp < 0)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+
+	rb_link_node(&bss->rbn, parent, p);
+	rb_insert_color(&bss->rbn, &dev->bss_tree);
+}
+
+static struct cfg80211_internal_bss *
+rb_find_bss(struct cfg80211_registered_device *dev,
+	    struct cfg80211_internal_bss *res)
+{
+	struct rb_node *n = dev->bss_tree.rb_node;
+	struct cfg80211_internal_bss *bss;
+	int r;
+
+	while (n) {
+		bss = rb_entry(n, struct cfg80211_internal_bss, rbn);
+		r = cmp_bss(&res->pub, &bss->pub);
+
+		if (r == 0)
+			return bss;
+		else if (r < 0)
+			n = n->rb_left;
+		else
+			n = n->rb_right;
+	}
+
+	return NULL;
+}
+
+static struct cfg80211_internal_bss *
+cfg80211_bss_update(struct cfg80211_registered_device *dev,
+		    struct cfg80211_internal_bss *res,
+		    bool overwrite)
+{
+	struct cfg80211_internal_bss *found = NULL;
+	const u8 *meshid, *meshcfg;
+
+	/*
+	 * The reference to "res" is donated to this function.
+	 */
+
+	if (WARN_ON(!res->pub.channel)) {
+		kref_put(&res->ref, bss_release);
+		return NULL;
+	}
+
+	res->ts = jiffies;
+
+	if (is_zero_ether_addr(res->pub.bssid)) {
+		/* must be mesh, verify */
+		meshid = find_ie(WLAN_EID_MESH_ID, res->pub.information_elements,
+				 res->pub.len_information_elements);
+		meshcfg = find_ie(WLAN_EID_MESH_CONFIG,
+				  res->pub.information_elements,
+				  res->pub.len_information_elements);
+		if (!meshid || !meshcfg ||
+		    meshcfg[1] != IEEE80211_MESH_CONFIG_LEN) {
+			/* bogus mesh */
+			kref_put(&res->ref, bss_release);
+			return NULL;
+		}
+	}
+
+	spin_lock_bh(&dev->bss_lock);
+
+	found = rb_find_bss(dev, res);
+
+	if (found && overwrite) {
+		list_replace(&found->list, &res->list);
+		rb_replace_node(&found->rbn, &res->rbn,
+				&dev->bss_tree);
+		kref_put(&found->ref, bss_release);
+		found = res;
+	} else if (found) {
+		kref_get(&found->ref);
+		found->pub.beacon_interval = res->pub.beacon_interval;
+		found->pub.tsf = res->pub.tsf;
+		found->pub.signal = res->pub.signal;
+		found->pub.signal_type = res->pub.signal_type;
+		found->pub.capability = res->pub.capability;
+		found->ts = res->ts;
+		kref_put(&res->ref, bss_release);
+	} else {
+		/* this "consumes" the reference */
+		list_add_tail(&res->list, &dev->bss_list);
+		rb_insert_bss(dev, res);
+		found = res;
+	}
+
+	dev->bss_generation++;
+	spin_unlock_bh(&dev->bss_lock);
+
+	kref_get(&found->ref);
+	return found;
+}
+
+struct cfg80211_bss *
+cfg80211_inform_bss_frame(struct wiphy *wiphy,
+			  struct ieee80211_channel *channel,
+			  struct ieee80211_mgmt *mgmt, size_t len,
+			  s32 signal, enum cfg80211_signal_type sigtype,
+			  gfp_t gfp)
+{
+	struct cfg80211_internal_bss *res;
+	size_t ielen = len - offsetof(struct ieee80211_mgmt,
+				      u.probe_resp.variable);
+	bool overwrite;
+	size_t privsz = wiphy->bss_priv_size;
+
+	if (WARN_ON(sigtype == NL80211_BSS_SIGNAL_UNSPEC &&
+	            (signal < 0 || signal > 100)))
+		return NULL;
+
+	if (WARN_ON(!mgmt || !wiphy ||
+		    len < offsetof(struct ieee80211_mgmt, u.probe_resp.variable)))
+		return NULL;
+
+	res = kzalloc(sizeof(*res) + privsz + ielen, gfp);
+	if (!res)
+		return NULL;
+
+	memcpy(res->pub.bssid, mgmt->bssid, ETH_ALEN);
+	res->pub.channel = channel;
+	res->pub.signal_type = sigtype;
+	res->pub.signal = signal;
+	res->pub.tsf = le64_to_cpu(mgmt->u.probe_resp.timestamp);
+	res->pub.beacon_interval = le16_to_cpu(mgmt->u.probe_resp.beacon_int);
+	res->pub.capability = le16_to_cpu(mgmt->u.probe_resp.capab_info);
+	/* point to after the private area */
+	res->pub.information_elements = (u8 *)res + sizeof(*res) + privsz;
+	memcpy(res->pub.information_elements, mgmt->u.probe_resp.variable, ielen);
+	res->pub.len_information_elements = ielen;
+
+	kref_init(&res->ref);
+
+	overwrite = ieee80211_is_probe_resp(mgmt->frame_control);
+
+	res = cfg80211_bss_update(wiphy_to_dev(wiphy), res, overwrite);
+	if (!res)
+		return NULL;
+
+	/* cfg80211_bss_update gives us a referenced result */
+	return &res->pub;
+}
+EXPORT_SYMBOL(cfg80211_inform_bss_frame);
+
+void cfg80211_put_bss(struct cfg80211_bss *pub)
+{
+	struct cfg80211_internal_bss *bss;
+
+	if (!pub)
+		return;
+
+	bss = container_of(pub, struct cfg80211_internal_bss, pub);
+	kref_put(&bss->ref, bss_release);
+}
+EXPORT_SYMBOL(cfg80211_put_bss);
+
+#ifdef CONFIG_WIRELESS_EXT
+int cfg80211_wext_siwscan(struct net_device *dev,
+			  struct iw_request_info *info,
+			  union iwreq_data *wrqu, char *extra)
+{
+	struct cfg80211_registered_device *rdev;
+	struct wiphy *wiphy;
+	struct iw_scan_req *wreq = NULL;
+	struct cfg80211_scan_request *creq;
+	int i, err, n_channels = 0;
+	enum ieee80211_band band;
+
+	if (!netif_running(dev))
+		return -ENETDOWN;
+
+	rdev = cfg80211_get_dev_from_ifindex(dev->ifindex);
+
+	if (IS_ERR(rdev))
+		return PTR_ERR(rdev);
+
+	if (rdev->scan_req) {
+		err = -EBUSY;
+		goto out;
+	}
+
+	wiphy = &rdev->wiphy;
+
+	for (band = 0; band < IEEE80211_NUM_BANDS; band++)
+		if (wiphy->bands[band])
+			n_channels += wiphy->bands[band]->n_channels;
+
+	creq = kzalloc(sizeof(*creq) + sizeof(struct cfg80211_ssid) +
+		       n_channels * sizeof(void *),
+		       GFP_ATOMIC);
+	if (!creq) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	creq->wiphy = wiphy;
+	creq->ifidx = dev->ifindex;
+	creq->ssids = (void *)(creq + 1);
+	creq->channels = (void *)(creq->ssids + 1);
+	creq->n_channels = n_channels;
+	creq->n_ssids = 1;
+
+	/* all channels */
+	i = 0;
+	for (band = 0; band < IEEE80211_NUM_BANDS; band++) {
+		int j;
+		if (!wiphy->bands[band])
+			continue;
+		for (j = 0; j < wiphy->bands[band]->n_channels; j++) {
+			creq->channels[i] = &wiphy->bands[band]->channels[j];
+			i++;
+		}
+	}
+
+	/* translate scan request */
+	if (wrqu->data.length == sizeof(struct iw_scan_req)) {
+		wreq = (struct iw_scan_req *)extra;
+
+		if (wrqu->data.flags & IW_SCAN_THIS_ESSID) {
+			if (wreq->essid_len > IEEE80211_MAX_SSID_LEN)
+				return -EINVAL;
+			memcpy(creq->ssids[0].ssid, wreq->essid, wreq->essid_len);
+			creq->ssids[0].ssid_len = wreq->essid_len;
+		}
+		if (wreq->scan_type == IW_SCAN_TYPE_PASSIVE)
+			creq->n_ssids = 0;
+	}
+
+	rdev->scan_req = creq;
+	err = rdev->ops->scan(wiphy, dev, creq);
+	if (err) {
+		rdev->scan_req = NULL;
+		kfree(creq);
+	}
+ out:
+	cfg80211_put_dev(rdev);
+	return err;
+}
+EXPORT_SYMBOL(cfg80211_wext_siwscan);
+
+static void ieee80211_scan_add_ies(struct iw_request_info *info,
+				   struct cfg80211_bss *bss,
+				   char **current_ev, char *end_buf)
+{
+	u8 *pos, *end, *next;
+	struct iw_event iwe;
+
+	if (!bss->information_elements ||
+	    !bss->len_information_elements)
+		return;
+
+	/*
+	 * If needed, fragment the IEs buffer (at IE boundaries) into short
+	 * enough fragments to fit into IW_GENERIC_IE_MAX octet messages.
+	 */
+	pos = bss->information_elements;
+	end = pos + bss->len_information_elements;
+
+	while (end - pos > IW_GENERIC_IE_MAX) {
+		next = pos + 2 + pos[1];
+		while (next + 2 + next[1] - pos < IW_GENERIC_IE_MAX)
+			next = next + 2 + next[1];
+
+		memset(&iwe, 0, sizeof(iwe));
+		iwe.cmd = IWEVGENIE;
+		iwe.u.data.length = next - pos;
+		*current_ev = iwe_stream_add_point(info, *current_ev,
+						   end_buf, &iwe, pos);
+
+		pos = next;
+	}
+
+	if (end > pos) {
+		memset(&iwe, 0, sizeof(iwe));
+		iwe.cmd = IWEVGENIE;
+		iwe.u.data.length = end - pos;
+		*current_ev = iwe_stream_add_point(info, *current_ev,
+						   end_buf, &iwe, pos);
+	}
+}
+
+
+static char *
+ieee80211_bss(struct iw_request_info *info,
+		      struct cfg80211_internal_bss *bss,
+		      char *current_ev, char *end_buf)
+{
+	struct iw_event iwe;
+	u8 *buf, *cfg, *p;
+	u8 *ie = bss->pub.information_elements;
+	int rem = bss->pub.len_information_elements, i;
+	bool ismesh = false;
+
+	memset(&iwe, 0, sizeof(iwe));
+	iwe.cmd = SIOCGIWAP;
+	iwe.u.ap_addr.sa_family = ARPHRD_ETHER;
+	memcpy(iwe.u.ap_addr.sa_data, bss->pub.bssid, ETH_ALEN);
+	current_ev = iwe_stream_add_event(info, current_ev, end_buf, &iwe,
+					  IW_EV_ADDR_LEN);
+
+	memset(&iwe, 0, sizeof(iwe));
+	iwe.cmd = SIOCGIWFREQ;
+	iwe.u.freq.m = ieee80211_frequency_to_channel(bss->pub.channel->center_freq);
+	iwe.u.freq.e = 0;
+	current_ev = iwe_stream_add_event(info, current_ev, end_buf, &iwe,
+					  IW_EV_FREQ_LEN);
+
+	memset(&iwe, 0, sizeof(iwe));
+	iwe.cmd = SIOCGIWFREQ;
+	iwe.u.freq.m = bss->pub.channel->center_freq;
+	iwe.u.freq.e = 6;
+	current_ev = iwe_stream_add_event(info, current_ev, end_buf, &iwe,
+					  IW_EV_FREQ_LEN);
+
+	if (bss->pub.signal_type != CFG80211_SIGNAL_TYPE_NONE) {
+		memset(&iwe, 0, sizeof(iwe));
+		iwe.cmd = IWEVQUAL;
+		iwe.u.qual.updated = IW_QUAL_LEVEL_UPDATED |
+				     IW_QUAL_NOISE_INVALID |
+				     IW_QUAL_QUAL_INVALID;
+		switch (bss->pub.signal_type) {
+		case CFG80211_SIGNAL_TYPE_MBM:
+			iwe.u.qual.level = bss->pub.signal / 100;
+			iwe.u.qual.updated |= IW_QUAL_DBM;
+			break;
+		case CFG80211_SIGNAL_TYPE_UNSPEC:
+			iwe.u.qual.level = bss->pub.signal;
+			break;
+		default:
+			/* not reached */
+			break;
+		}
+		current_ev = iwe_stream_add_event(info, current_ev, end_buf,
+						  &iwe, IW_EV_QUAL_LEN);
+	}
+
+	memset(&iwe, 0, sizeof(iwe));
+	iwe.cmd = SIOCGIWENCODE;
+	if (bss->pub.capability & WLAN_CAPABILITY_PRIVACY)
+		iwe.u.data.flags = IW_ENCODE_ENABLED | IW_ENCODE_NOKEY;
+	else
+		iwe.u.data.flags = IW_ENCODE_DISABLED;
+	iwe.u.data.length = 0;
+	current_ev = iwe_stream_add_point(info, current_ev, end_buf,
+					  &iwe, "");
+
+	while (rem >= 2) {
+		/* invalid data */
+		if (ie[1] > rem - 2)
+			break;
+
+		switch (ie[0]) {
+		case WLAN_EID_SSID:
+			memset(&iwe, 0, sizeof(iwe));
+			iwe.cmd = SIOCGIWESSID;
+			iwe.u.data.length = ie[1];
+			iwe.u.data.flags = 1;
+			current_ev = iwe_stream_add_point(info, current_ev, end_buf,
+							  &iwe, ie + 2);
+			break;
+		case WLAN_EID_MESH_ID:
+			memset(&iwe, 0, sizeof(iwe));
+			iwe.cmd = SIOCGIWESSID;
+			iwe.u.data.length = ie[1];
+			iwe.u.data.flags = 1;
+			current_ev = iwe_stream_add_point(info, current_ev, end_buf,
+							  &iwe, ie + 2);
+			break;
+		case WLAN_EID_MESH_CONFIG:
+			ismesh = true;
+			if (ie[1] != IEEE80211_MESH_CONFIG_LEN)
+				break;
+			buf = kmalloc(50, GFP_ATOMIC);
+			if (!buf)
+				break;
+			cfg = ie + 2;
+			memset(&iwe, 0, sizeof(iwe));
+			iwe.cmd = IWEVCUSTOM;
+			sprintf(buf, "Mesh network (version %d)", cfg[0]);
+			iwe.u.data.length = strlen(buf);
+			current_ev = iwe_stream_add_point(info, current_ev,
+							  end_buf,
+							  &iwe, buf);
+			sprintf(buf, "Path Selection Protocol ID: "
+				"0x%02X%02X%02X%02X", cfg[1], cfg[2], cfg[3],
+							cfg[4]);
+			iwe.u.data.length = strlen(buf);
+			current_ev = iwe_stream_add_point(info, current_ev,
+							  end_buf,
+							  &iwe, buf);
+			sprintf(buf, "Path Selection Metric ID: "
+				"0x%02X%02X%02X%02X", cfg[5], cfg[6], cfg[7],
+							cfg[8]);
+			iwe.u.data.length = strlen(buf);
+			current_ev = iwe_stream_add_point(info, current_ev,
+							  end_buf,
+							  &iwe, buf);
+			sprintf(buf, "Congestion Control Mode ID: "
+				"0x%02X%02X%02X%02X", cfg[9], cfg[10],
+							cfg[11], cfg[12]);
+			iwe.u.data.length = strlen(buf);
+			current_ev = iwe_stream_add_point(info, current_ev,
+							  end_buf,
+							  &iwe, buf);
+			sprintf(buf, "Channel Precedence: "
+				"0x%02X%02X%02X%02X", cfg[13], cfg[14],
+							cfg[15], cfg[16]);
+			iwe.u.data.length = strlen(buf);
+			current_ev = iwe_stream_add_point(info, current_ev,
+							  end_buf,
+							  &iwe, buf);
+			kfree(buf);
+			break;
+		case WLAN_EID_SUPP_RATES:
+		case WLAN_EID_EXT_SUPP_RATES:
+			/* display all supported rates in readable format */
+			p = current_ev + iwe_stream_lcp_len(info);
+
+			memset(&iwe, 0, sizeof(iwe));
+			iwe.cmd = SIOCGIWRATE;
+			/* Those two flags are ignored... */
+			iwe.u.bitrate.fixed = iwe.u.bitrate.disabled = 0;
+
+			for (i = 0; i < ie[1]; i++) {
+				iwe.u.bitrate.value =
+					((ie[i + 2] & 0x7f) * 500000);
+				p = iwe_stream_add_value(info, current_ev, p,
+						end_buf, &iwe, IW_EV_PARAM_LEN);
+			}
+			current_ev = p;
+			break;
+		}
+		rem -= ie[1] + 2;
+		ie += ie[1] + 2;
+	}
+
+	if (bss->pub.capability & (WLAN_CAPABILITY_ESS | WLAN_CAPABILITY_IBSS)
+	    || ismesh) {
+		memset(&iwe, 0, sizeof(iwe));
+		iwe.cmd = SIOCGIWMODE;
+		if (ismesh)
+			iwe.u.mode = IW_MODE_MESH;
+		else if (bss->pub.capability & WLAN_CAPABILITY_ESS)
+			iwe.u.mode = IW_MODE_MASTER;
+		else
+			iwe.u.mode = IW_MODE_ADHOC;
+		current_ev = iwe_stream_add_event(info, current_ev, end_buf,
+						  &iwe, IW_EV_UINT_LEN);
+	}
+
+	buf = kmalloc(30, GFP_ATOMIC);
+	if (buf) {
+		memset(&iwe, 0, sizeof(iwe));
+		iwe.cmd = IWEVCUSTOM;
+		sprintf(buf, "tsf=%016llx", (unsigned long long)(bss->pub.tsf));
+		iwe.u.data.length = strlen(buf);
+		current_ev = iwe_stream_add_point(info, current_ev, end_buf,
+						  &iwe, buf);
+		memset(&iwe, 0, sizeof(iwe));
+		iwe.cmd = IWEVCUSTOM;
+		sprintf(buf, " Last beacon: %dms ago",
+			jiffies_to_msecs(jiffies - bss->ts));
+		iwe.u.data.length = strlen(buf);
+		current_ev = iwe_stream_add_point(info, current_ev,
+						  end_buf, &iwe, buf);
+		kfree(buf);
+	}
+
+	ieee80211_scan_add_ies(info, &bss->pub, &current_ev, end_buf);
+
+	return current_ev;
+}
+
+
+static int ieee80211_scan_results(struct cfg80211_registered_device *dev,
+				  struct iw_request_info *info,
+				  char *buf, size_t len)
+{
+	char *current_ev = buf;
+	char *end_buf = buf + len;
+	struct cfg80211_internal_bss *bss;
+
+	spin_lock_bh(&dev->bss_lock);
+	cfg80211_bss_expire(dev);
+
+	list_for_each_entry(bss, &dev->bss_list, list) {
+		if (buf + len - current_ev <= IW_EV_ADDR_LEN) {
+			spin_unlock_bh(&dev->bss_lock);
+			return -E2BIG;
+		}
+		current_ev = ieee80211_bss(info, bss,
+						   current_ev, end_buf);
+	}
+	spin_unlock_bh(&dev->bss_lock);
+	return current_ev - buf;
+}
+
+
+int cfg80211_wext_giwscan(struct net_device *dev,
+			  struct iw_request_info *info,
+			  struct iw_point *data, char *extra)
+{
+	struct cfg80211_registered_device *rdev;
+	int res;
+
+	if (!netif_running(dev))
+		return -ENETDOWN;
+
+	rdev = cfg80211_get_dev_from_ifindex(dev->ifindex);
+
+	if (IS_ERR(rdev))
+		return PTR_ERR(rdev);
+
+	if (rdev->scan_req) {
+		res = -EAGAIN;
+		goto out;
+	}
+
+	res = ieee80211_scan_results(rdev, info, extra, data->length);
+	data->length = 0;
+	if (res >= 0) {
+		data->length = res;
+		res = 0;
+	}
+
+ out:
+	cfg80211_put_dev(rdev);
+	return res;
+}
+EXPORT_SYMBOL(cfg80211_wext_giwscan);
+#endif
-- 
cgit v1.2.3


From 35c26c2cf6a6a2d1c48add732d8ba002bd90784c Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Sat, 14 Feb 2009 22:56:56 -0800
Subject: rndis: remove private wrapper of __constant_cpu_to_le32

Use cpu_to_le32 directly as it handles constant folding now, replace direct
uses of __constant_cpu_to_{endian} as well.

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Acked-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/rndis_host.c      | 25 +++++------
 drivers/net/wireless/rndis_wlan.c | 90 +++++++++++++++++++-------------------
 drivers/usb/gadget/rndis.c        | 92 +++++++++++++++++++--------------------
 include/linux/usb/rndis_host.h    | 85 +++++++++++++++++-------------------
 4 files changed, 143 insertions(+), 149 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/usb/rndis_host.c b/drivers/net/usb/rndis_host.c
index bcd858c567e0..b7f763e1298c 100644
--- a/drivers/net/usb/rndis_host.c
+++ b/drivers/net/usb/rndis_host.c
@@ -169,7 +169,7 @@ int rndis_command(struct usbnet *dev, struct rndis_msg_hdr *buf, int buflen)
 				struct rndis_keepalive_c *msg = (void *)buf;
 
 				msg->msg_type = RNDIS_MSG_KEEPALIVE_C;
-				msg->msg_len = ccpu2(sizeof *msg);
+				msg->msg_len = cpu_to_le32(sizeof *msg);
 				msg->status = RNDIS_STATUS_SUCCESS;
 				retval = usb_control_msg(dev->udev,
 					usb_sndctrlpipe(dev->udev, 0),
@@ -237,7 +237,7 @@ static int rndis_query(struct usbnet *dev, struct usb_interface *intf,
 	u.get->msg_len = cpu_to_le32(sizeof *u.get + in_len);
 	u.get->oid = oid;
 	u.get->len = cpu_to_le32(in_len);
-	u.get->offset = ccpu2(20);
+	u.get->offset = cpu_to_le32(20);
 
 	retval = rndis_command(dev, u.header, CONTROL_BUFFER_SIZE);
 	if (unlikely(retval < 0)) {
@@ -297,9 +297,9 @@ generic_rndis_bind(struct usbnet *dev, struct usb_interface *intf, int flags)
 		goto fail;
 
 	u.init->msg_type = RNDIS_MSG_INIT;
-	u.init->msg_len = ccpu2(sizeof *u.init);
-	u.init->major_version = ccpu2(1);
-	u.init->minor_version = ccpu2(0);
+	u.init->msg_len = cpu_to_le32(sizeof *u.init);
+	u.init->major_version = cpu_to_le32(1);
+	u.init->minor_version = cpu_to_le32(0);
 
 	/* max transfer (in spec) is 0x4000 at full speed, but for
 	 * TX we'll stick to one Ethernet packet plus RNDIS framing.
@@ -403,10 +403,10 @@ generic_rndis_bind(struct usbnet *dev, struct usb_interface *intf, int flags)
 	/* set a nonzero filter to enable data transfers */
 	memset(u.set, 0, sizeof *u.set);
 	u.set->msg_type = RNDIS_MSG_SET;
-	u.set->msg_len = ccpu2(4 + sizeof *u.set);
+	u.set->msg_len = cpu_to_le32(4 + sizeof *u.set);
 	u.set->oid = OID_GEN_CURRENT_PACKET_FILTER;
-	u.set->len = ccpu2(4);
-	u.set->offset = ccpu2((sizeof *u.set) - 8);
+	u.set->len = cpu_to_le32(4);
+	u.set->offset = cpu_to_le32((sizeof *u.set) - 8);
 	*(__le32 *)(u.buf + sizeof *u.set) = RNDIS_DEFAULT_FILTER;
 
 	retval = rndis_command(dev, u.header, CONTROL_BUFFER_SIZE);
@@ -423,7 +423,7 @@ generic_rndis_bind(struct usbnet *dev, struct usb_interface *intf, int flags)
 halt_fail_and_release:
 	memset(u.halt, 0, sizeof *u.halt);
 	u.halt->msg_type = RNDIS_MSG_HALT;
-	u.halt->msg_len = ccpu2(sizeof *u.halt);
+	u.halt->msg_len = cpu_to_le32(sizeof *u.halt);
 	(void) rndis_command(dev, (void *)u.halt, CONTROL_BUFFER_SIZE);
 fail_and_release:
 	usb_set_intfdata(info->data, NULL);
@@ -448,7 +448,7 @@ void rndis_unbind(struct usbnet *dev, struct usb_interface *intf)
 	halt = kzalloc(CONTROL_BUFFER_SIZE, GFP_KERNEL);
 	if (halt) {
 		halt->msg_type = RNDIS_MSG_HALT;
-		halt->msg_len = ccpu2(sizeof *halt);
+		halt->msg_len = cpu_to_le32(sizeof *halt);
 		(void) rndis_command(dev, (void *)halt, CONTROL_BUFFER_SIZE);
 		kfree(halt);
 	}
@@ -543,7 +543,7 @@ fill:
 	memset(hdr, 0, sizeof *hdr);
 	hdr->msg_type = RNDIS_MSG_PACKET;
 	hdr->msg_len = cpu_to_le32(skb->len);
-	hdr->data_offset = ccpu2(sizeof(*hdr) - 8);
+	hdr->data_offset = cpu_to_le32(sizeof(*hdr) - 8);
 	hdr->data_len = cpu_to_le32(len);
 
 	/* FIXME make the last packet always be short ... */
@@ -562,9 +562,6 @@ static const struct driver_info	rndis_info = {
 	.tx_fixup =	rndis_tx_fixup,
 };
 
-#undef ccpu2
-
-
 /*-------------------------------------------------------------------------*/
 
 static const struct usb_device_id	products [] = {
diff --git a/drivers/net/wireless/rndis_wlan.c b/drivers/net/wireless/rndis_wlan.c
index 105f214e21f4..82af21eeb592 100644
--- a/drivers/net/wireless/rndis_wlan.c
+++ b/drivers/net/wireless/rndis_wlan.c
@@ -90,44 +90,44 @@ MODULE_PARM_DESC(workaround_interval,
 
 
 /* various RNDIS OID defs */
-#define OID_GEN_LINK_SPEED			ccpu2(0x00010107)
-#define OID_GEN_RNDIS_CONFIG_PARAMETER		ccpu2(0x0001021b)
-
-#define OID_GEN_XMIT_OK				ccpu2(0x00020101)
-#define OID_GEN_RCV_OK				ccpu2(0x00020102)
-#define OID_GEN_XMIT_ERROR			ccpu2(0x00020103)
-#define OID_GEN_RCV_ERROR			ccpu2(0x00020104)
-#define OID_GEN_RCV_NO_BUFFER			ccpu2(0x00020105)
-
-#define OID_802_3_PERMANENT_ADDRESS		ccpu2(0x01010101)
-#define OID_802_3_CURRENT_ADDRESS		ccpu2(0x01010102)
-#define OID_802_3_MULTICAST_LIST		ccpu2(0x01010103)
-#define OID_802_3_MAXIMUM_LIST_SIZE		ccpu2(0x01010104)
-
-#define OID_802_11_BSSID			ccpu2(0x0d010101)
-#define OID_802_11_SSID				ccpu2(0x0d010102)
-#define OID_802_11_INFRASTRUCTURE_MODE		ccpu2(0x0d010108)
-#define OID_802_11_ADD_WEP			ccpu2(0x0d010113)
-#define OID_802_11_REMOVE_WEP			ccpu2(0x0d010114)
-#define OID_802_11_DISASSOCIATE			ccpu2(0x0d010115)
-#define OID_802_11_AUTHENTICATION_MODE		ccpu2(0x0d010118)
-#define OID_802_11_PRIVACY_FILTER		ccpu2(0x0d010119)
-#define OID_802_11_BSSID_LIST_SCAN		ccpu2(0x0d01011a)
-#define OID_802_11_ENCRYPTION_STATUS		ccpu2(0x0d01011b)
-#define OID_802_11_ADD_KEY			ccpu2(0x0d01011d)
-#define OID_802_11_REMOVE_KEY			ccpu2(0x0d01011e)
-#define OID_802_11_ASSOCIATION_INFORMATION	ccpu2(0x0d01011f)
-#define OID_802_11_PMKID			ccpu2(0x0d010123)
-#define OID_802_11_NETWORK_TYPES_SUPPORTED	ccpu2(0x0d010203)
-#define OID_802_11_NETWORK_TYPE_IN_USE		ccpu2(0x0d010204)
-#define OID_802_11_TX_POWER_LEVEL		ccpu2(0x0d010205)
-#define OID_802_11_RSSI				ccpu2(0x0d010206)
-#define OID_802_11_RSSI_TRIGGER			ccpu2(0x0d010207)
-#define OID_802_11_FRAGMENTATION_THRESHOLD	ccpu2(0x0d010209)
-#define OID_802_11_RTS_THRESHOLD		ccpu2(0x0d01020a)
-#define OID_802_11_SUPPORTED_RATES		ccpu2(0x0d01020e)
-#define OID_802_11_CONFIGURATION		ccpu2(0x0d010211)
-#define OID_802_11_BSSID_LIST			ccpu2(0x0d010217)
+#define OID_GEN_LINK_SPEED			cpu_to_le32(0x00010107)
+#define OID_GEN_RNDIS_CONFIG_PARAMETER		cpu_to_le32(0x0001021b)
+
+#define OID_GEN_XMIT_OK				cpu_to_le32(0x00020101)
+#define OID_GEN_RCV_OK				cpu_to_le32(0x00020102)
+#define OID_GEN_XMIT_ERROR			cpu_to_le32(0x00020103)
+#define OID_GEN_RCV_ERROR			cpu_to_le32(0x00020104)
+#define OID_GEN_RCV_NO_BUFFER			cpu_to_le32(0x00020105)
+
+#define OID_802_3_PERMANENT_ADDRESS		cpu_to_le32(0x01010101)
+#define OID_802_3_CURRENT_ADDRESS		cpu_to_le32(0x01010102)
+#define OID_802_3_MULTICAST_LIST		cpu_to_le32(0x01010103)
+#define OID_802_3_MAXIMUM_LIST_SIZE		cpu_to_le32(0x01010104)
+
+#define OID_802_11_BSSID			cpu_to_le32(0x0d010101)
+#define OID_802_11_SSID				cpu_to_le32(0x0d010102)
+#define OID_802_11_INFRASTRUCTURE_MODE		cpu_to_le32(0x0d010108)
+#define OID_802_11_ADD_WEP			cpu_to_le32(0x0d010113)
+#define OID_802_11_REMOVE_WEP			cpu_to_le32(0x0d010114)
+#define OID_802_11_DISASSOCIATE			cpu_to_le32(0x0d010115)
+#define OID_802_11_AUTHENTICATION_MODE		cpu_to_le32(0x0d010118)
+#define OID_802_11_PRIVACY_FILTER		cpu_to_le32(0x0d010119)
+#define OID_802_11_BSSID_LIST_SCAN		cpu_to_le32(0x0d01011a)
+#define OID_802_11_ENCRYPTION_STATUS		cpu_to_le32(0x0d01011b)
+#define OID_802_11_ADD_KEY			cpu_to_le32(0x0d01011d)
+#define OID_802_11_REMOVE_KEY			cpu_to_le32(0x0d01011e)
+#define OID_802_11_ASSOCIATION_INFORMATION	cpu_to_le32(0x0d01011f)
+#define OID_802_11_PMKID			cpu_to_le32(0x0d010123)
+#define OID_802_11_NETWORK_TYPES_SUPPORTED	cpu_to_le32(0x0d010203)
+#define OID_802_11_NETWORK_TYPE_IN_USE		cpu_to_le32(0x0d010204)
+#define OID_802_11_TX_POWER_LEVEL		cpu_to_le32(0x0d010205)
+#define OID_802_11_RSSI				cpu_to_le32(0x0d010206)
+#define OID_802_11_RSSI_TRIGGER			cpu_to_le32(0x0d010207)
+#define OID_802_11_FRAGMENTATION_THRESHOLD	cpu_to_le32(0x0d010209)
+#define OID_802_11_RTS_THRESHOLD		cpu_to_le32(0x0d01020a)
+#define OID_802_11_SUPPORTED_RATES		cpu_to_le32(0x0d01020e)
+#define OID_802_11_CONFIGURATION		cpu_to_le32(0x0d010211)
+#define OID_802_11_BSSID_LIST			cpu_to_le32(0x0d010217)
 
 
 /* Typical noise/maximum signal level values taken from ndiswrapper iw_ndis.h */
@@ -144,8 +144,8 @@ MODULE_PARM_DESC(workaround_interval,
 
 
 /* codes for "status" field of completion messages */
-#define RNDIS_STATUS_ADAPTER_NOT_READY		ccpu2(0xc0010011)
-#define RNDIS_STATUS_ADAPTER_NOT_OPEN		ccpu2(0xc0010012)
+#define RNDIS_STATUS_ADAPTER_NOT_READY		cpu_to_le32(0xc0010011)
+#define RNDIS_STATUS_ADAPTER_NOT_OPEN		cpu_to_le32(0xc0010012)
 
 
 /* NDIS data structures. Taken from wpa_supplicant driver_ndis.c
@@ -442,7 +442,7 @@ static int rndis_query_oid(struct usbnet *dev, __le32 oid, void *data, int *len)
 
 	memset(u.get, 0, sizeof *u.get);
 	u.get->msg_type = RNDIS_MSG_QUERY;
-	u.get->msg_len = ccpu2(sizeof *u.get);
+	u.get->msg_len = cpu_to_le32(sizeof *u.get);
 	u.get->oid = oid;
 
 	ret = rndis_command(dev, u.header, buflen);
@@ -491,8 +491,8 @@ static int rndis_set_oid(struct usbnet *dev, __le32 oid, void *data, int len)
 	u.set->msg_len = cpu_to_le32(sizeof(*u.set) + len);
 	u.set->oid = oid;
 	u.set->len = cpu_to_le32(len);
-	u.set->offset = ccpu2(sizeof(*u.set) - 8);
-	u.set->handle = ccpu2(0);
+	u.set->offset = cpu_to_le32(sizeof(*u.set) - 8);
+	u.set->handle = cpu_to_le32(0);
 	memcpy(u.buf + sizeof(*u.set), data, len);
 
 	ret = rndis_command(dev, u.header, buflen);
@@ -1630,7 +1630,7 @@ static int rndis_iw_set_scan(struct net_device *dev,
 	devdbg(usbdev, "SIOCSIWSCAN");
 
 	if (wrqu->data.flags == 0) {
-		tmp = ccpu2(1);
+		tmp = cpu_to_le32(1);
 		ret = rndis_set_oid(usbdev, OID_802_11_BSSID_LIST_SCAN, &tmp,
 								sizeof(tmp));
 		evt.data.flags = 0;
@@ -2428,7 +2428,7 @@ static void rndis_update_wireless_stats(struct work_struct *work)
 		/* Send scan OID. Use of both OIDs is required to get device
 		 * working.
 		 */
-		tmp = ccpu2(1);
+		tmp = cpu_to_le32(1);
 		rndis_set_oid(usbdev, OID_802_11_BSSID_LIST_SCAN, &tmp,
 								sizeof(tmp));
 
diff --git a/drivers/usb/gadget/rndis.c b/drivers/usb/gadget/rndis.c
index 8c26f5ea2b83..d2860a823680 100644
--- a/drivers/usb/gadget/rndis.c
+++ b/drivers/usb/gadget/rndis.c
@@ -63,7 +63,7 @@ MODULE_PARM_DESC (rndis_debug, "enable debugging");
 static rndis_params rndis_per_dev_params [RNDIS_MAX_CONFIGS];
 
 /* Driver Version */
-static const __le32 rndis_driver_version = __constant_cpu_to_le32 (1);
+static const __le32 rndis_driver_version = cpu_to_le32 (1);
 
 /* Function Prototypes */
 static rndis_resp_t *rndis_add_response (int configNr, u32 length);
@@ -190,7 +190,7 @@ gen_ndis_query_resp (int configNr, u32 OID, u8 *buf, unsigned buf_len,
 
 	/* response goes here, right after the header */
 	outbuf = (__le32 *) &resp[1];
-	resp->InformationBufferOffset = __constant_cpu_to_le32 (16);
+	resp->InformationBufferOffset = cpu_to_le32 (16);
 
 	net = rndis_per_dev_params[configNr].dev;
 	if (net->get_stats)
@@ -221,7 +221,7 @@ gen_ndis_query_resp (int configNr, u32 OID, u8 *buf, unsigned buf_len,
 		 * reddite ergo quae sunt Caesaris Caesari
 		 * et quae sunt Dei Deo!
 		 */
-		*outbuf = __constant_cpu_to_le32 (0);
+		*outbuf = cpu_to_le32 (0);
 		retval = 0;
 		break;
 
@@ -256,7 +256,7 @@ gen_ndis_query_resp (int configNr, u32 OID, u8 *buf, unsigned buf_len,
 			pr_debug("%s: OID_GEN_LINK_SPEED\n", __func__);
 		if (rndis_per_dev_params [configNr].media_state
 				== NDIS_MEDIA_STATE_DISCONNECTED)
-			*outbuf = __constant_cpu_to_le32 (0);
+			*outbuf = cpu_to_le32 (0);
 		else
 			*outbuf = cpu_to_le32 (
 				rndis_per_dev_params [configNr].speed);
@@ -317,7 +317,7 @@ gen_ndis_query_resp (int configNr, u32 OID, u8 *buf, unsigned buf_len,
 	/* mandatory */
 	case OID_GEN_MAXIMUM_TOTAL_SIZE:
 		pr_debug("%s: OID_GEN_MAXIMUM_TOTAL_SIZE\n", __func__);
-		*outbuf = __constant_cpu_to_le32(RNDIS_MAX_TOTAL_SIZE);
+		*outbuf = cpu_to_le32(RNDIS_MAX_TOTAL_SIZE);
 		retval = 0;
 		break;
 
@@ -332,7 +332,7 @@ gen_ndis_query_resp (int configNr, u32 OID, u8 *buf, unsigned buf_len,
 
 	case OID_GEN_PHYSICAL_MEDIUM:
 		pr_debug("%s: OID_GEN_PHYSICAL_MEDIUM\n", __func__);
-		*outbuf = __constant_cpu_to_le32 (0);
+		*outbuf = cpu_to_le32 (0);
 		retval = 0;
 		break;
 
@@ -342,7 +342,7 @@ gen_ndis_query_resp (int configNr, u32 OID, u8 *buf, unsigned buf_len,
 	 */
 	case OID_GEN_MAC_OPTIONS:		/* from WinME */
 		pr_debug("%s: OID_GEN_MAC_OPTIONS\n", __func__);
-		*outbuf = __constant_cpu_to_le32(
+		*outbuf = cpu_to_le32(
 			  NDIS_MAC_OPTION_RECEIVE_SERIALIZED
 			| NDIS_MAC_OPTION_FULL_DUPLEX);
 		retval = 0;
@@ -431,7 +431,7 @@ gen_ndis_query_resp (int configNr, u32 OID, u8 *buf, unsigned buf_len,
 	case OID_802_3_MULTICAST_LIST:
 		pr_debug("%s: OID_802_3_MULTICAST_LIST\n", __func__);
 		/* Multicast base address only */
-		*outbuf = __constant_cpu_to_le32 (0xE0000000);
+		*outbuf = cpu_to_le32 (0xE0000000);
 		retval = 0;
 		break;
 
@@ -439,7 +439,7 @@ gen_ndis_query_resp (int configNr, u32 OID, u8 *buf, unsigned buf_len,
 	case OID_802_3_MAXIMUM_LIST_SIZE:
 		pr_debug("%s: OID_802_3_MAXIMUM_LIST_SIZE\n", __func__);
 		/* Multicast base address only */
-		*outbuf = __constant_cpu_to_le32 (1);
+		*outbuf = cpu_to_le32 (1);
 		retval = 0;
 		break;
 
@@ -461,14 +461,14 @@ gen_ndis_query_resp (int configNr, u32 OID, u8 *buf, unsigned buf_len,
 	/* mandatory */
 	case OID_802_3_XMIT_ONE_COLLISION:
 		pr_debug("%s: OID_802_3_XMIT_ONE_COLLISION\n", __func__);
-		*outbuf = __constant_cpu_to_le32 (0);
+		*outbuf = cpu_to_le32 (0);
 		retval = 0;
 		break;
 
 	/* mandatory */
 	case OID_802_3_XMIT_MORE_COLLISIONS:
 		pr_debug("%s: OID_802_3_XMIT_MORE_COLLISIONS\n", __func__);
-		*outbuf = __constant_cpu_to_le32 (0);
+		*outbuf = cpu_to_le32 (0);
 		retval = 0;
 		break;
 
@@ -572,24 +572,24 @@ static int rndis_init_response (int configNr, rndis_init_msg_type *buf)
 		return -ENOMEM;
 	resp = (rndis_init_cmplt_type *) r->buf;
 
-	resp->MessageType = __constant_cpu_to_le32 (
+	resp->MessageType = cpu_to_le32 (
 			REMOTE_NDIS_INITIALIZE_CMPLT);
-	resp->MessageLength = __constant_cpu_to_le32 (52);
+	resp->MessageLength = cpu_to_le32 (52);
 	resp->RequestID = buf->RequestID; /* Still LE in msg buffer */
-	resp->Status = __constant_cpu_to_le32 (RNDIS_STATUS_SUCCESS);
-	resp->MajorVersion = __constant_cpu_to_le32 (RNDIS_MAJOR_VERSION);
-	resp->MinorVersion = __constant_cpu_to_le32 (RNDIS_MINOR_VERSION);
-	resp->DeviceFlags = __constant_cpu_to_le32 (RNDIS_DF_CONNECTIONLESS);
-	resp->Medium = __constant_cpu_to_le32 (RNDIS_MEDIUM_802_3);
-	resp->MaxPacketsPerTransfer = __constant_cpu_to_le32 (1);
+	resp->Status = cpu_to_le32 (RNDIS_STATUS_SUCCESS);
+	resp->MajorVersion = cpu_to_le32 (RNDIS_MAJOR_VERSION);
+	resp->MinorVersion = cpu_to_le32 (RNDIS_MINOR_VERSION);
+	resp->DeviceFlags = cpu_to_le32 (RNDIS_DF_CONNECTIONLESS);
+	resp->Medium = cpu_to_le32 (RNDIS_MEDIUM_802_3);
+	resp->MaxPacketsPerTransfer = cpu_to_le32 (1);
 	resp->MaxTransferSize = cpu_to_le32 (
 		  params->dev->mtu
 		+ sizeof (struct ethhdr)
 		+ sizeof (struct rndis_packet_msg_type)
 		+ 22);
-	resp->PacketAlignmentFactor = __constant_cpu_to_le32 (0);
-	resp->AFListOffset = __constant_cpu_to_le32 (0);
-	resp->AFListSize = __constant_cpu_to_le32 (0);
+	resp->PacketAlignmentFactor = cpu_to_le32 (0);
+	resp->AFListOffset = cpu_to_le32 (0);
+	resp->AFListSize = cpu_to_le32 (0);
 
 	params->resp_avail(params->v);
 	return 0;
@@ -617,7 +617,7 @@ static int rndis_query_response (int configNr, rndis_query_msg_type *buf)
 		return -ENOMEM;
 	resp = (rndis_query_cmplt_type *) r->buf;
 
-	resp->MessageType = __constant_cpu_to_le32 (REMOTE_NDIS_QUERY_CMPLT);
+	resp->MessageType = cpu_to_le32 (REMOTE_NDIS_QUERY_CMPLT);
 	resp->RequestID = buf->RequestID; /* Still LE in msg buffer */
 
 	if (gen_ndis_query_resp (configNr, le32_to_cpu (buf->OID),
@@ -626,13 +626,13 @@ static int rndis_query_response (int configNr, rndis_query_msg_type *buf)
 			le32_to_cpu(buf->InformationBufferLength),
 			r)) {
 		/* OID not supported */
-		resp->Status = __constant_cpu_to_le32 (
+		resp->Status = cpu_to_le32 (
 				RNDIS_STATUS_NOT_SUPPORTED);
-		resp->MessageLength = __constant_cpu_to_le32 (sizeof *resp);
-		resp->InformationBufferLength = __constant_cpu_to_le32 (0);
-		resp->InformationBufferOffset = __constant_cpu_to_le32 (0);
+		resp->MessageLength = cpu_to_le32 (sizeof *resp);
+		resp->InformationBufferLength = cpu_to_le32 (0);
+		resp->InformationBufferOffset = cpu_to_le32 (0);
 	} else
-		resp->Status = __constant_cpu_to_le32 (RNDIS_STATUS_SUCCESS);
+		resp->Status = cpu_to_le32 (RNDIS_STATUS_SUCCESS);
 
 	params->resp_avail(params->v);
 	return 0;
@@ -665,14 +665,14 @@ static int rndis_set_response (int configNr, rndis_set_msg_type *buf)
 	pr_debug("\n");
 #endif
 
-	resp->MessageType = __constant_cpu_to_le32 (REMOTE_NDIS_SET_CMPLT);
-	resp->MessageLength = __constant_cpu_to_le32 (16);
+	resp->MessageType = cpu_to_le32 (REMOTE_NDIS_SET_CMPLT);
+	resp->MessageLength = cpu_to_le32 (16);
 	resp->RequestID = buf->RequestID; /* Still LE in msg buffer */
 	if (gen_ndis_set_resp (configNr, le32_to_cpu (buf->OID),
 			((u8 *) buf) + 8 + BufOffset, BufLength, r))
-		resp->Status = __constant_cpu_to_le32 (RNDIS_STATUS_NOT_SUPPORTED);
+		resp->Status = cpu_to_le32 (RNDIS_STATUS_NOT_SUPPORTED);
 	else
-		resp->Status = __constant_cpu_to_le32 (RNDIS_STATUS_SUCCESS);
+		resp->Status = cpu_to_le32 (RNDIS_STATUS_SUCCESS);
 
 	params->resp_avail(params->v);
 	return 0;
@@ -689,11 +689,11 @@ static int rndis_reset_response (int configNr, rndis_reset_msg_type *buf)
 		return -ENOMEM;
 	resp = (rndis_reset_cmplt_type *) r->buf;
 
-	resp->MessageType = __constant_cpu_to_le32 (REMOTE_NDIS_RESET_CMPLT);
-	resp->MessageLength = __constant_cpu_to_le32 (16);
-	resp->Status = __constant_cpu_to_le32 (RNDIS_STATUS_SUCCESS);
+	resp->MessageType = cpu_to_le32 (REMOTE_NDIS_RESET_CMPLT);
+	resp->MessageLength = cpu_to_le32 (16);
+	resp->Status = cpu_to_le32 (RNDIS_STATUS_SUCCESS);
 	/* resent information */
-	resp->AddressingReset = __constant_cpu_to_le32 (1);
+	resp->AddressingReset = cpu_to_le32 (1);
 
 	params->resp_avail(params->v);
 	return 0;
@@ -713,11 +713,11 @@ static int rndis_keepalive_response (int configNr,
 		return -ENOMEM;
 	resp = (rndis_keepalive_cmplt_type *) r->buf;
 
-	resp->MessageType = __constant_cpu_to_le32 (
+	resp->MessageType = cpu_to_le32 (
 			REMOTE_NDIS_KEEPALIVE_CMPLT);
-	resp->MessageLength = __constant_cpu_to_le32 (16);
+	resp->MessageLength = cpu_to_le32 (16);
 	resp->RequestID = buf->RequestID; /* Still LE in msg buffer */
-	resp->Status = __constant_cpu_to_le32 (RNDIS_STATUS_SUCCESS);
+	resp->Status = cpu_to_le32 (RNDIS_STATUS_SUCCESS);
 
 	params->resp_avail(params->v);
 	return 0;
@@ -742,12 +742,12 @@ static int rndis_indicate_status_msg (int configNr, u32 status)
 		return -ENOMEM;
 	resp = (rndis_indicate_status_msg_type *) r->buf;
 
-	resp->MessageType = __constant_cpu_to_le32 (
+	resp->MessageType = cpu_to_le32 (
 			REMOTE_NDIS_INDICATE_STATUS_MSG);
-	resp->MessageLength = __constant_cpu_to_le32 (20);
+	resp->MessageLength = cpu_to_le32 (20);
 	resp->Status = cpu_to_le32 (status);
-	resp->StatusBufferLength = __constant_cpu_to_le32 (0);
-	resp->StatusBufferOffset = __constant_cpu_to_le32 (0);
+	resp->StatusBufferLength = cpu_to_le32 (0);
+	resp->StatusBufferOffset = cpu_to_le32 (0);
 
 	params->resp_avail(params->v);
 	return 0;
@@ -963,9 +963,9 @@ void rndis_add_hdr (struct sk_buff *skb)
 		return;
 	header = (void *) skb_push (skb, sizeof *header);
 	memset (header, 0, sizeof *header);
-	header->MessageType = __constant_cpu_to_le32(REMOTE_NDIS_PACKET_MSG);
+	header->MessageType = cpu_to_le32(REMOTE_NDIS_PACKET_MSG);
 	header->MessageLength = cpu_to_le32(skb->len);
-	header->DataOffset = __constant_cpu_to_le32 (36);
+	header->DataOffset = cpu_to_le32 (36);
 	header->DataLength = cpu_to_le32(skb->len - sizeof *header);
 }
 
@@ -1029,7 +1029,7 @@ int rndis_rm_hdr(struct sk_buff *skb)
 	__le32		*tmp = (void *) skb->data;
 
 	/* MessageType, MessageLength */
-	if (__constant_cpu_to_le32(REMOTE_NDIS_PACKET_MSG)
+	if (cpu_to_le32(REMOTE_NDIS_PACKET_MSG)
 			!= get_unaligned(tmp++))
 		return -EINVAL;
 	tmp++;
diff --git a/include/linux/usb/rndis_host.h b/include/linux/usb/rndis_host.h
index 0a6e6d4b929a..37836b937d97 100644
--- a/include/linux/usb/rndis_host.h
+++ b/include/linux/usb/rndis_host.h
@@ -49,48 +49,45 @@ struct rndis_msg_hdr {
  */
 #define	RNDIS_CONTROL_TIMEOUT_MS	(5 * 1000)
 
-
-#define ccpu2 __constant_cpu_to_le32
-
-#define RNDIS_MSG_COMPLETION	ccpu2(0x80000000)
+#define RNDIS_MSG_COMPLETION	cpu_to_le32(0x80000000)
 
 /* codes for "msg_type" field of rndis messages;
  * only the data channel uses packet messages (maybe batched);
  * everything else goes on the control channel.
  */
-#define RNDIS_MSG_PACKET	ccpu2(0x00000001)	/* 1-N packets */
-#define RNDIS_MSG_INIT		ccpu2(0x00000002)
+#define RNDIS_MSG_PACKET	cpu_to_le32(0x00000001)	/* 1-N packets */
+#define RNDIS_MSG_INIT		cpu_to_le32(0x00000002)
 #define RNDIS_MSG_INIT_C	(RNDIS_MSG_INIT|RNDIS_MSG_COMPLETION)
-#define RNDIS_MSG_HALT		ccpu2(0x00000003)
-#define RNDIS_MSG_QUERY		ccpu2(0x00000004)
+#define RNDIS_MSG_HALT		cpu_to_le32(0x00000003)
+#define RNDIS_MSG_QUERY		cpu_to_le32(0x00000004)
 #define RNDIS_MSG_QUERY_C	(RNDIS_MSG_QUERY|RNDIS_MSG_COMPLETION)
-#define RNDIS_MSG_SET		ccpu2(0x00000005)
+#define RNDIS_MSG_SET		cpu_to_le32(0x00000005)
 #define RNDIS_MSG_SET_C		(RNDIS_MSG_SET|RNDIS_MSG_COMPLETION)
-#define RNDIS_MSG_RESET		ccpu2(0x00000006)
+#define RNDIS_MSG_RESET		cpu_to_le32(0x00000006)
 #define RNDIS_MSG_RESET_C	(RNDIS_MSG_RESET|RNDIS_MSG_COMPLETION)
-#define RNDIS_MSG_INDICATE	ccpu2(0x00000007)
-#define RNDIS_MSG_KEEPALIVE	ccpu2(0x00000008)
+#define RNDIS_MSG_INDICATE	cpu_to_le32(0x00000007)
+#define RNDIS_MSG_KEEPALIVE	cpu_to_le32(0x00000008)
 #define RNDIS_MSG_KEEPALIVE_C	(RNDIS_MSG_KEEPALIVE|RNDIS_MSG_COMPLETION)
 
 /* codes for "status" field of completion messages */
-#define	RNDIS_STATUS_SUCCESS		ccpu2(0x00000000)
-#define	RNDIS_STATUS_FAILURE		ccpu2(0xc0000001)
-#define	RNDIS_STATUS_INVALID_DATA	ccpu2(0xc0010015)
-#define	RNDIS_STATUS_NOT_SUPPORTED	ccpu2(0xc00000bb)
-#define	RNDIS_STATUS_MEDIA_CONNECT	ccpu2(0x4001000b)
-#define	RNDIS_STATUS_MEDIA_DISCONNECT	ccpu2(0x4001000c)
+#define	RNDIS_STATUS_SUCCESS		cpu_to_le32(0x00000000)
+#define	RNDIS_STATUS_FAILURE		cpu_to_le32(0xc0000001)
+#define	RNDIS_STATUS_INVALID_DATA	cpu_to_le32(0xc0010015)
+#define	RNDIS_STATUS_NOT_SUPPORTED	cpu_to_le32(0xc00000bb)
+#define	RNDIS_STATUS_MEDIA_CONNECT	cpu_to_le32(0x4001000b)
+#define	RNDIS_STATUS_MEDIA_DISCONNECT	cpu_to_le32(0x4001000c)
 
 /* codes for OID_GEN_PHYSICAL_MEDIUM */
-#define	RNDIS_PHYSICAL_MEDIUM_UNSPECIFIED	ccpu2(0x00000000)
-#define	RNDIS_PHYSICAL_MEDIUM_WIRELESS_LAN	ccpu2(0x00000001)
-#define	RNDIS_PHYSICAL_MEDIUM_CABLE_MODEM	ccpu2(0x00000002)
-#define	RNDIS_PHYSICAL_MEDIUM_PHONE_LINE	ccpu2(0x00000003)
-#define	RNDIS_PHYSICAL_MEDIUM_POWER_LINE	ccpu2(0x00000004)
-#define	RNDIS_PHYSICAL_MEDIUM_DSL		ccpu2(0x00000005)
-#define	RNDIS_PHYSICAL_MEDIUM_FIBRE_CHANNEL	ccpu2(0x00000006)
-#define	RNDIS_PHYSICAL_MEDIUM_1394		ccpu2(0x00000007)
-#define	RNDIS_PHYSICAL_MEDIUM_WIRELESS_WAN	ccpu2(0x00000008)
-#define	RNDIS_PHYSICAL_MEDIUM_MAX		ccpu2(0x00000009)
+#define	RNDIS_PHYSICAL_MEDIUM_UNSPECIFIED	cpu_to_le32(0x00000000)
+#define	RNDIS_PHYSICAL_MEDIUM_WIRELESS_LAN	cpu_to_le32(0x00000001)
+#define	RNDIS_PHYSICAL_MEDIUM_CABLE_MODEM	cpu_to_le32(0x00000002)
+#define	RNDIS_PHYSICAL_MEDIUM_PHONE_LINE	cpu_to_le32(0x00000003)
+#define	RNDIS_PHYSICAL_MEDIUM_POWER_LINE	cpu_to_le32(0x00000004)
+#define	RNDIS_PHYSICAL_MEDIUM_DSL		cpu_to_le32(0x00000005)
+#define	RNDIS_PHYSICAL_MEDIUM_FIBRE_CHANNEL	cpu_to_le32(0x00000006)
+#define	RNDIS_PHYSICAL_MEDIUM_1394		cpu_to_le32(0x00000007)
+#define	RNDIS_PHYSICAL_MEDIUM_WIRELESS_WAN	cpu_to_le32(0x00000008)
+#define	RNDIS_PHYSICAL_MEDIUM_MAX		cpu_to_le32(0x00000009)
 
 struct rndis_data_hdr {
 	__le32	msg_type;		/* RNDIS_MSG_PACKET */
@@ -228,24 +225,24 @@ struct rndis_keepalive_c {	/* IN (optionally OUT) */
  * there are gobs more that may optionally be supported.  We'll avoid as much
  * of that mess as possible.
  */
-#define OID_802_3_PERMANENT_ADDRESS	ccpu2(0x01010101)
-#define OID_GEN_MAXIMUM_FRAME_SIZE	ccpu2(0x00010106)
-#define OID_GEN_CURRENT_PACKET_FILTER	ccpu2(0x0001010e)
-#define OID_GEN_PHYSICAL_MEDIUM		ccpu2(0x00010202)
+#define OID_802_3_PERMANENT_ADDRESS	cpu_to_le32(0x01010101)
+#define OID_GEN_MAXIMUM_FRAME_SIZE	cpu_to_le32(0x00010106)
+#define OID_GEN_CURRENT_PACKET_FILTER	cpu_to_le32(0x0001010e)
+#define OID_GEN_PHYSICAL_MEDIUM		cpu_to_le32(0x00010202)
 
 /* packet filter bits used by OID_GEN_CURRENT_PACKET_FILTER */
-#define RNDIS_PACKET_TYPE_DIRECTED		ccpu2(0x00000001)
-#define RNDIS_PACKET_TYPE_MULTICAST		ccpu2(0x00000002)
-#define RNDIS_PACKET_TYPE_ALL_MULTICAST		ccpu2(0x00000004)
-#define RNDIS_PACKET_TYPE_BROADCAST		ccpu2(0x00000008)
-#define RNDIS_PACKET_TYPE_SOURCE_ROUTING	ccpu2(0x00000010)
-#define RNDIS_PACKET_TYPE_PROMISCUOUS		ccpu2(0x00000020)
-#define RNDIS_PACKET_TYPE_SMT			ccpu2(0x00000040)
-#define RNDIS_PACKET_TYPE_ALL_LOCAL		ccpu2(0x00000080)
-#define RNDIS_PACKET_TYPE_GROUP			ccpu2(0x00001000)
-#define RNDIS_PACKET_TYPE_ALL_FUNCTIONAL	ccpu2(0x00002000)
-#define RNDIS_PACKET_TYPE_FUNCTIONAL		ccpu2(0x00004000)
-#define RNDIS_PACKET_TYPE_MAC_FRAME		ccpu2(0x00008000)
+#define RNDIS_PACKET_TYPE_DIRECTED		cpu_to_le32(0x00000001)
+#define RNDIS_PACKET_TYPE_MULTICAST		cpu_to_le32(0x00000002)
+#define RNDIS_PACKET_TYPE_ALL_MULTICAST		cpu_to_le32(0x00000004)
+#define RNDIS_PACKET_TYPE_BROADCAST		cpu_to_le32(0x00000008)
+#define RNDIS_PACKET_TYPE_SOURCE_ROUTING	cpu_to_le32(0x00000010)
+#define RNDIS_PACKET_TYPE_PROMISCUOUS		cpu_to_le32(0x00000020)
+#define RNDIS_PACKET_TYPE_SMT			cpu_to_le32(0x00000040)
+#define RNDIS_PACKET_TYPE_ALL_LOCAL		cpu_to_le32(0x00000080)
+#define RNDIS_PACKET_TYPE_GROUP			cpu_to_le32(0x00001000)
+#define RNDIS_PACKET_TYPE_ALL_FUNCTIONAL	cpu_to_le32(0x00002000)
+#define RNDIS_PACKET_TYPE_FUNCTIONAL		cpu_to_le32(0x00004000)
+#define RNDIS_PACKET_TYPE_MAC_FRAME		cpu_to_le32(0x00008000)
 
 /* default filter used with RNDIS devices */
 #define RNDIS_DEFAULT_FILTER ( \
-- 
cgit v1.2.3


From f3a7c66b5ce0b75a9774a50b5dcce93e5ba28370 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Sat, 14 Feb 2009 22:58:35 -0800
Subject: net: replace __constant_{endian} uses in net headers

Base versions handle constant folding now.  For headers exposed to
userspace, we must only expose the __ prefixed versions.

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_pppox.h         | 20 ++++-----
 include/linux/if_tunnel.h        | 16 +++----
 include/linux/ncp_no.h           | 26 ++++++------
 include/linux/netdevice.h        |  4 +-
 include/linux/netfilter_bridge.h |  4 +-
 include/linux/pim.h              |  4 +-
 include/linux/sctp.h             | 90 ++++++++++++++++++++--------------------
 include/linux/tcp.h              | 20 ++++-----
 include/net/inet_ecn.h           |  4 +-
 include/net/ip_vs.h              |  4 +-
 include/net/ipv6.h               |  4 +-
 include/net/ipx.h                |  2 +-
 include/net/transp_v6.h          |  2 +-
 include/rdma/ib_verbs.h          |  2 +-
 14 files changed, 101 insertions(+), 101 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/if_pppox.h b/include/linux/if_pppox.h
index 30c88b2245ff..90b5fae5d714 100644
--- a/include/linux/if_pppox.h
+++ b/include/linux/if_pppox.h
@@ -95,16 +95,16 @@ struct pppoe_tag {
 } __attribute ((packed));
 
 /* Tag identifiers */
-#define PTT_EOL		__constant_htons(0x0000)
-#define PTT_SRV_NAME	__constant_htons(0x0101)
-#define PTT_AC_NAME	__constant_htons(0x0102)
-#define PTT_HOST_UNIQ	__constant_htons(0x0103)
-#define PTT_AC_COOKIE	__constant_htons(0x0104)
-#define PTT_VENDOR 	__constant_htons(0x0105)
-#define PTT_RELAY_SID	__constant_htons(0x0110)
-#define PTT_SRV_ERR     __constant_htons(0x0201)
-#define PTT_SYS_ERR  	__constant_htons(0x0202)
-#define PTT_GEN_ERR  	__constant_htons(0x0203)
+#define PTT_EOL		__cpu_to_be16(0x0000)
+#define PTT_SRV_NAME	__cpu_to_be16(0x0101)
+#define PTT_AC_NAME	__cpu_to_be16(0x0102)
+#define PTT_HOST_UNIQ	__cpu_to_be16(0x0103)
+#define PTT_AC_COOKIE	__cpu_to_be16(0x0104)
+#define PTT_VENDOR 	__cpu_to_be16(0x0105)
+#define PTT_RELAY_SID	__cpu_to_be16(0x0110)
+#define PTT_SRV_ERR     __cpu_to_be16(0x0201)
+#define PTT_SYS_ERR  	__cpu_to_be16(0x0202)
+#define PTT_GEN_ERR  	__cpu_to_be16(0x0203)
 
 struct pppoe_hdr {
 #if defined(__LITTLE_ENDIAN_BITFIELD)
diff --git a/include/linux/if_tunnel.h b/include/linux/if_tunnel.h
index 82c43624c067..5a9aae4adb44 100644
--- a/include/linux/if_tunnel.h
+++ b/include/linux/if_tunnel.h
@@ -16,14 +16,14 @@
 #define SIOCDELPRL      (SIOCDEVPRIVATE + 6)
 #define SIOCCHGPRL      (SIOCDEVPRIVATE + 7)
 
-#define GRE_CSUM	__constant_htons(0x8000)
-#define GRE_ROUTING	__constant_htons(0x4000)
-#define GRE_KEY		__constant_htons(0x2000)
-#define GRE_SEQ		__constant_htons(0x1000)
-#define GRE_STRICT	__constant_htons(0x0800)
-#define GRE_REC		__constant_htons(0x0700)
-#define GRE_FLAGS	__constant_htons(0x00F8)
-#define GRE_VERSION	__constant_htons(0x0007)
+#define GRE_CSUM	__cpu_to_be16(0x8000)
+#define GRE_ROUTING	__cpu_to_be16(0x4000)
+#define GRE_KEY		__cpu_to_be16(0x2000)
+#define GRE_SEQ		__cpu_to_be16(0x1000)
+#define GRE_STRICT	__cpu_to_be16(0x0800)
+#define GRE_REC		__cpu_to_be16(0x0700)
+#define GRE_FLAGS	__cpu_to_be16(0x00F8)
+#define GRE_VERSION	__cpu_to_be16(0x0007)
 
 struct ip_tunnel_parm
 {
diff --git a/include/linux/ncp_no.h b/include/linux/ncp_no.h
index f56a696a7cc6..cddaa48fb182 100644
--- a/include/linux/ncp_no.h
+++ b/include/linux/ncp_no.h
@@ -2,18 +2,18 @@
 #define _NCP_NO
 
 /* these define the attribute byte as seen by NCP */
-#define aRONLY			(__constant_cpu_to_le32(1))
-#define aHIDDEN			(__constant_cpu_to_le32(2))
-#define aSYSTEM			(__constant_cpu_to_le32(4))
-#define aEXECUTE		(__constant_cpu_to_le32(8))
-#define aDIR			(__constant_cpu_to_le32(0x10))
-#define aARCH			(__constant_cpu_to_le32(0x20))
-#define aSHARED			(__constant_cpu_to_le32(0x80))
-#define aDONTSUBALLOCATE	(__constant_cpu_to_le32(1L<<11))
-#define aTRANSACTIONAL		(__constant_cpu_to_le32(1L<<12))
-#define aPURGE			(__constant_cpu_to_le32(1L<<16))
-#define aRENAMEINHIBIT		(__constant_cpu_to_le32(1L<<17))
-#define aDELETEINHIBIT		(__constant_cpu_to_le32(1L<<18))
-#define aDONTCOMPRESS		(__constant_cpu_to_le32(1L<<27))
+#define aRONLY			(__cpu_to_le32(1))
+#define aHIDDEN			(__cpu_to_le32(2))
+#define aSYSTEM			(__cpu_to_le32(4))
+#define aEXECUTE		(__cpu_to_le32(8))
+#define aDIR			(__cpu_to_le32(0x10))
+#define aARCH			(__cpu_to_le32(0x20))
+#define aSHARED			(__cpu_to_le32(0x80))
+#define aDONTSUBALLOCATE	(__cpu_to_le32(1L<<11))
+#define aTRANSACTIONAL		(__cpu_to_le32(1L<<12))
+#define aPURGE			(__cpu_to_le32(1L<<16))
+#define aRENAMEINHIBIT		(__cpu_to_le32(1L<<17))
+#define aDELETEINHIBIT		(__cpu_to_le32(1L<<18))
+#define aDONTCOMPRESS		(__cpu_to_le32(1L<<27))
 
 #endif /* _NCP_NO */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 355662aac940..bd8b4ca85a2a 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1863,7 +1863,7 @@ static inline int skb_bond_should_drop(struct sk_buff *skb)
 
 		if (dev->priv_flags & IFF_SLAVE_INACTIVE) {
 			if ((dev->priv_flags & IFF_SLAVE_NEEDARP) &&
-			    skb->protocol == __constant_htons(ETH_P_ARP))
+			    skb->protocol == __cpu_to_be16(ETH_P_ARP))
 				return 0;
 
 			if (master->priv_flags & IFF_MASTER_ALB) {
@@ -1872,7 +1872,7 @@ static inline int skb_bond_should_drop(struct sk_buff *skb)
 					return 0;
 			}
 			if (master->priv_flags & IFF_MASTER_8023AD &&
-			    skb->protocol == __constant_htons(ETH_P_SLOW))
+			    skb->protocol == __cpu_to_be16(ETH_P_SLOW))
 				return 0;
 
 			return 1;
diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h
index 499aa9375901..f8105e54716a 100644
--- a/include/linux/netfilter_bridge.h
+++ b/include/linux/netfilter_bridge.h
@@ -59,9 +59,9 @@ static inline int nf_bridge_maybe_copy_header(struct sk_buff *skb)
 static inline unsigned int nf_bridge_encap_header_len(const struct sk_buff *skb)
 {
 	switch (skb->protocol) {
-	case __constant_htons(ETH_P_8021Q):
+	case __cpu_to_be16(ETH_P_8021Q):
 		return VLAN_HLEN;
-	case __constant_htons(ETH_P_PPP_SES):
+	case __cpu_to_be16(ETH_P_PPP_SES):
 		return PPPOE_SES_HLEN;
 	default:
 		return 0;
diff --git a/include/linux/pim.h b/include/linux/pim.h
index 1ba0661561a4..252bf6644c51 100644
--- a/include/linux/pim.h
+++ b/include/linux/pim.h
@@ -4,14 +4,14 @@
 #include <asm/byteorder.h>
 
 /* Message types - V1 */
-#define PIM_V1_VERSION		__constant_htonl(0x10000000)
+#define PIM_V1_VERSION		cpu_to_be32(0x10000000)
 #define PIM_V1_REGISTER		1
 
 /* Message types - V2 */
 #define PIM_VERSION		2
 #define PIM_REGISTER		1
 
-#define PIM_NULL_REGISTER	__constant_htonl(0x40000000)
+#define PIM_NULL_REGISTER	cpu_to_be32(0x40000000)
 
 /* PIMv2 register message header layout (ietf-draft-idmr-pimvsm-v2-00.ps */
 struct pimreghdr
diff --git a/include/linux/sctp.h b/include/linux/sctp.h
index 8ba1c320f975..bd50b371ffaa 100644
--- a/include/linux/sctp.h
+++ b/include/linux/sctp.h
@@ -172,35 +172,35 @@ typedef struct sctp_paramhdr {
 typedef enum {
 
 	/* RFC 2960 Section 3.3.5 */
-	SCTP_PARAM_HEARTBEAT_INFO		= __constant_htons(1),
+	SCTP_PARAM_HEARTBEAT_INFO		= cpu_to_be16(1),
 	/* RFC 2960 Section 3.3.2.1 */
-	SCTP_PARAM_IPV4_ADDRESS			= __constant_htons(5),
-	SCTP_PARAM_IPV6_ADDRESS			= __constant_htons(6),
-	SCTP_PARAM_STATE_COOKIE			= __constant_htons(7),
-	SCTP_PARAM_UNRECOGNIZED_PARAMETERS	= __constant_htons(8),
-	SCTP_PARAM_COOKIE_PRESERVATIVE		= __constant_htons(9),
-	SCTP_PARAM_HOST_NAME_ADDRESS		= __constant_htons(11),
-	SCTP_PARAM_SUPPORTED_ADDRESS_TYPES	= __constant_htons(12),
-	SCTP_PARAM_ECN_CAPABLE			= __constant_htons(0x8000),
+	SCTP_PARAM_IPV4_ADDRESS			= cpu_to_be16(5),
+	SCTP_PARAM_IPV6_ADDRESS			= cpu_to_be16(6),
+	SCTP_PARAM_STATE_COOKIE			= cpu_to_be16(7),
+	SCTP_PARAM_UNRECOGNIZED_PARAMETERS	= cpu_to_be16(8),
+	SCTP_PARAM_COOKIE_PRESERVATIVE		= cpu_to_be16(9),
+	SCTP_PARAM_HOST_NAME_ADDRESS		= cpu_to_be16(11),
+	SCTP_PARAM_SUPPORTED_ADDRESS_TYPES	= cpu_to_be16(12),
+	SCTP_PARAM_ECN_CAPABLE			= cpu_to_be16(0x8000),
 
 	/* AUTH Extension Section 3 */
-	SCTP_PARAM_RANDOM			= __constant_htons(0x8002),
-	SCTP_PARAM_CHUNKS			= __constant_htons(0x8003),
-	SCTP_PARAM_HMAC_ALGO			= __constant_htons(0x8004),
+	SCTP_PARAM_RANDOM			= cpu_to_be16(0x8002),
+	SCTP_PARAM_CHUNKS			= cpu_to_be16(0x8003),
+	SCTP_PARAM_HMAC_ALGO			= cpu_to_be16(0x8004),
 
 	/* Add-IP: Supported Extensions, Section 4.2 */
-	SCTP_PARAM_SUPPORTED_EXT	= __constant_htons(0x8008),
+	SCTP_PARAM_SUPPORTED_EXT	= cpu_to_be16(0x8008),
 
 	/* PR-SCTP Sec 3.1 */
-	SCTP_PARAM_FWD_TSN_SUPPORT	= __constant_htons(0xc000),
+	SCTP_PARAM_FWD_TSN_SUPPORT	= cpu_to_be16(0xc000),
 
 	/* Add-IP Extension. Section 3.2 */
-	SCTP_PARAM_ADD_IP		= __constant_htons(0xc001),
-	SCTP_PARAM_DEL_IP		= __constant_htons(0xc002),
-	SCTP_PARAM_ERR_CAUSE		= __constant_htons(0xc003),
-	SCTP_PARAM_SET_PRIMARY		= __constant_htons(0xc004),
-	SCTP_PARAM_SUCCESS_REPORT	= __constant_htons(0xc005),
-	SCTP_PARAM_ADAPTATION_LAYER_IND = __constant_htons(0xc006),
+	SCTP_PARAM_ADD_IP		= cpu_to_be16(0xc001),
+	SCTP_PARAM_DEL_IP		= cpu_to_be16(0xc002),
+	SCTP_PARAM_ERR_CAUSE		= cpu_to_be16(0xc003),
+	SCTP_PARAM_SET_PRIMARY		= cpu_to_be16(0xc004),
+	SCTP_PARAM_SUCCESS_REPORT	= cpu_to_be16(0xc005),
+	SCTP_PARAM_ADAPTATION_LAYER_IND = cpu_to_be16(0xc006),
 
 } sctp_param_t; /* enum */
 
@@ -212,13 +212,13 @@ typedef enum {
  *
  */
 typedef enum {
-	SCTP_PARAM_ACTION_DISCARD     = __constant_htons(0x0000),
-	SCTP_PARAM_ACTION_DISCARD_ERR = __constant_htons(0x4000),
-	SCTP_PARAM_ACTION_SKIP        = __constant_htons(0x8000),
-	SCTP_PARAM_ACTION_SKIP_ERR    = __constant_htons(0xc000),
+	SCTP_PARAM_ACTION_DISCARD     = cpu_to_be16(0x0000),
+	SCTP_PARAM_ACTION_DISCARD_ERR = cpu_to_be16(0x4000),
+	SCTP_PARAM_ACTION_SKIP        = cpu_to_be16(0x8000),
+	SCTP_PARAM_ACTION_SKIP_ERR    = cpu_to_be16(0xc000),
 } sctp_param_action_t;
 
-enum { SCTP_PARAM_ACTION_MASK = __constant_htons(0xc000), };
+enum { SCTP_PARAM_ACTION_MASK = cpu_to_be16(0xc000), };
 
 /* RFC 2960 Section 3.3.1 Payload Data (DATA) (0) */
 
@@ -457,17 +457,17 @@ typedef struct sctp_operr_chunk {
  */
 typedef enum {
 
-	SCTP_ERROR_NO_ERROR	   = __constant_htons(0x00),
-	SCTP_ERROR_INV_STRM	   = __constant_htons(0x01),
-	SCTP_ERROR_MISS_PARAM 	   = __constant_htons(0x02),
-	SCTP_ERROR_STALE_COOKIE	   = __constant_htons(0x03),
-	SCTP_ERROR_NO_RESOURCE 	   = __constant_htons(0x04),
-	SCTP_ERROR_DNS_FAILED      = __constant_htons(0x05),
-	SCTP_ERROR_UNKNOWN_CHUNK   = __constant_htons(0x06),
-	SCTP_ERROR_INV_PARAM       = __constant_htons(0x07),
-	SCTP_ERROR_UNKNOWN_PARAM   = __constant_htons(0x08),
-	SCTP_ERROR_NO_DATA         = __constant_htons(0x09),
-	SCTP_ERROR_COOKIE_IN_SHUTDOWN = __constant_htons(0x0a),
+	SCTP_ERROR_NO_ERROR	   = cpu_to_be16(0x00),
+	SCTP_ERROR_INV_STRM	   = cpu_to_be16(0x01),
+	SCTP_ERROR_MISS_PARAM 	   = cpu_to_be16(0x02),
+	SCTP_ERROR_STALE_COOKIE	   = cpu_to_be16(0x03),
+	SCTP_ERROR_NO_RESOURCE 	   = cpu_to_be16(0x04),
+	SCTP_ERROR_DNS_FAILED      = cpu_to_be16(0x05),
+	SCTP_ERROR_UNKNOWN_CHUNK   = cpu_to_be16(0x06),
+	SCTP_ERROR_INV_PARAM       = cpu_to_be16(0x07),
+	SCTP_ERROR_UNKNOWN_PARAM   = cpu_to_be16(0x08),
+	SCTP_ERROR_NO_DATA         = cpu_to_be16(0x09),
+	SCTP_ERROR_COOKIE_IN_SHUTDOWN = cpu_to_be16(0x0a),
 
 
 	/* SCTP Implementation Guide:
@@ -476,9 +476,9 @@ typedef enum {
 	 *  13  Protocol Violation
 	 */
 
-	SCTP_ERROR_RESTART         = __constant_htons(0x0b),
-	SCTP_ERROR_USER_ABORT      = __constant_htons(0x0c),
-	SCTP_ERROR_PROTO_VIOLATION = __constant_htons(0x0d),
+	SCTP_ERROR_RESTART         = cpu_to_be16(0x0b),
+	SCTP_ERROR_USER_ABORT      = cpu_to_be16(0x0c),
+	SCTP_ERROR_PROTO_VIOLATION = cpu_to_be16(0x0d),
 
 	/* ADDIP Section 3.3  New Error Causes
 	 *
@@ -493,11 +493,11 @@ typedef enum {
 	 * 0x0103          Association Aborted due to illegal ASCONF-ACK
 	 * 0x0104          Request refused - no authorization.
 	 */
-	SCTP_ERROR_DEL_LAST_IP	= __constant_htons(0x0100),
-	SCTP_ERROR_RSRC_LOW	= __constant_htons(0x0101),
-	SCTP_ERROR_DEL_SRC_IP	= __constant_htons(0x0102),
-	SCTP_ERROR_ASCONF_ACK   = __constant_htons(0x0103),
-	SCTP_ERROR_REQ_REFUSED	= __constant_htons(0x0104),
+	SCTP_ERROR_DEL_LAST_IP	= cpu_to_be16(0x0100),
+	SCTP_ERROR_RSRC_LOW	= cpu_to_be16(0x0101),
+	SCTP_ERROR_DEL_SRC_IP	= cpu_to_be16(0x0102),
+	SCTP_ERROR_ASCONF_ACK   = cpu_to_be16(0x0103),
+	SCTP_ERROR_REQ_REFUSED	= cpu_to_be16(0x0104),
 
 	/* AUTH Section 4.  New Error Cause
 	 *
@@ -509,7 +509,7 @@ typedef enum {
 	 * --------------------------------------------------------------
 	 * 0x0105          Unsupported HMAC Identifier
 	 */
-	 SCTP_ERROR_UNSUP_HMAC	= __constant_htons(0x0105)
+	 SCTP_ERROR_UNSUP_HMAC	= cpu_to_be16(0x0105)
 } sctp_error_t;
 
 
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index fe77e1499ab7..0cd99e6baca5 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -69,16 +69,16 @@ union tcp_word_hdr {
 #define tcp_flag_word(tp) ( ((union tcp_word_hdr *)(tp))->words [3]) 
 
 enum { 
-	TCP_FLAG_CWR = __constant_htonl(0x00800000), 
-	TCP_FLAG_ECE = __constant_htonl(0x00400000), 
-	TCP_FLAG_URG = __constant_htonl(0x00200000), 
-	TCP_FLAG_ACK = __constant_htonl(0x00100000), 
-	TCP_FLAG_PSH = __constant_htonl(0x00080000), 
-	TCP_FLAG_RST = __constant_htonl(0x00040000), 
-	TCP_FLAG_SYN = __constant_htonl(0x00020000), 
-	TCP_FLAG_FIN = __constant_htonl(0x00010000),
-	TCP_RESERVED_BITS = __constant_htonl(0x0F000000),
-	TCP_DATA_OFFSET = __constant_htonl(0xF0000000)
+	TCP_FLAG_CWR = __cpu_to_be32(0x00800000),
+	TCP_FLAG_ECE = __cpu_to_be32(0x00400000),
+	TCP_FLAG_URG = __cpu_to_be32(0x00200000),
+	TCP_FLAG_ACK = __cpu_to_be32(0x00100000),
+	TCP_FLAG_PSH = __cpu_to_be32(0x00080000),
+	TCP_FLAG_RST = __cpu_to_be32(0x00040000),
+	TCP_FLAG_SYN = __cpu_to_be32(0x00020000),
+	TCP_FLAG_FIN = __cpu_to_be32(0x00010000),
+	TCP_RESERVED_BITS = __cpu_to_be32(0x0F000000),
+	TCP_DATA_OFFSET = __cpu_to_be32(0xF0000000)
 }; 
 
 /* TCP socket options */
diff --git a/include/net/inet_ecn.h b/include/net/inet_ecn.h
index 7040a782c656..9b5d08f4f6e8 100644
--- a/include/net/inet_ecn.h
+++ b/include/net/inet_ecn.h
@@ -113,12 +113,12 @@ static inline void ipv6_copy_dscp(unsigned int dscp, struct ipv6hdr *inner)
 static inline int INET_ECN_set_ce(struct sk_buff *skb)
 {
 	switch (skb->protocol) {
-	case __constant_htons(ETH_P_IP):
+	case cpu_to_be16(ETH_P_IP):
 		if (skb->network_header + sizeof(struct iphdr) <= skb->tail)
 			return IP_ECN_set_ce(ip_hdr(skb));
 		break;
 
-	case __constant_htons(ETH_P_IPV6):
+	case cpu_to_be16(ETH_P_IPV6):
 		if (skb->network_header + sizeof(struct ipv6hdr) <= skb->tail)
 			return IP6_ECN_set_ce(ipv6_hdr(skb));
 		break;
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index ab9b003ab671..bbae1e87efcd 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -184,8 +184,8 @@ static inline const char *ip_vs_dbg_addr(int af, char *buf, size_t buf_len,
 /*
  *      The port number of FTP service (in network order).
  */
-#define FTPPORT  __constant_htons(21)
-#define FTPDATA  __constant_htons(20)
+#define FTPPORT  cpu_to_be16(21)
+#define FTPDATA  cpu_to_be16(20)
 
 /*
  *      TCP State Values
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 6d5b58a1c743..c1f16fc49ade 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -196,8 +196,8 @@ struct ip6_flowlabel
 	struct net		*fl_net;
 };
 
-#define IPV6_FLOWINFO_MASK	__constant_htonl(0x0FFFFFFF)
-#define IPV6_FLOWLABEL_MASK	__constant_htonl(0x000FFFFF)
+#define IPV6_FLOWINFO_MASK	cpu_to_be32(0x0FFFFFFF)
+#define IPV6_FLOWLABEL_MASK	cpu_to_be32(0x000FFFFF)
 
 struct ipv6_fl_socklist
 {
diff --git a/include/net/ipx.h b/include/net/ipx.h
index 4cc0b4eca948..a14121dd1932 100644
--- a/include/net/ipx.h
+++ b/include/net/ipx.h
@@ -27,7 +27,7 @@ struct ipx_address {
 
 struct ipxhdr {
 	__be16			ipx_checksum __attribute__ ((packed));
-#define IPX_NO_CHECKSUM	__constant_htons(0xFFFF)
+#define IPX_NO_CHECKSUM	cpu_to_be16(0xFFFF)
 	__be16			ipx_pktsize __attribute__ ((packed));
 	__u8			ipx_tctrl;
 	__u8			ipx_type;
diff --git a/include/net/transp_v6.h b/include/net/transp_v6.h
index 876b6f2bb4fd..bfb240c6cf79 100644
--- a/include/net/transp_v6.h
+++ b/include/net/transp_v6.h
@@ -46,7 +46,7 @@ extern int			datagram_send_ctl(struct net *net,
 						  struct ipv6_txoptions *opt,
 						  int *hlimit, int *tclass);
 
-#define		LOOPBACK4_IPV6		__constant_htonl(0x7f000006)
+#define		LOOPBACK4_IPV6		cpu_to_be32(0x7f000006)
 
 /*
  *	address family specific functions
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 936e333e7ce5..c179318edd92 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -388,7 +388,7 @@ enum {
 	IB_MULTICAST_QPN = 0xffffff
 };
 
-#define IB_LID_PERMISSIVE	__constant_htons(0xFFFF)
+#define IB_LID_PERMISSIVE	cpu_to_be16(0xFFFF)
 
 enum ib_ah_flags {
 	IB_AH_GRH	= 1
-- 
cgit v1.2.3


From a038a353c3de4040d8445ec568acebdac144436f Mon Sep 17 00:00:00 2001
From: Patrick Ohly <patrick.ohly@intel.com>
Date: Thu, 12 Feb 2009 05:03:34 +0000
Subject: clocksource: allow usage independent of timekeeping.c

So far struct clocksource acted as the interface between time/timekeeping.c
and hardware. This patch generalizes the concept so that a similar
interface can also be used in other contexts. For that it introduces
new structures and related functions *without* touching the existing
struct clocksource.

The reasons for adding these new structures to clocksource.[ch] are
* the APIs are clearly related
* struct clocksource could be cleaned up to use the new structs
* avoids proliferation of files with similar names (timesource.h?
  timecounter.h?)

As outlined in the discussion with John Stultz, this patch adds
* struct cyclecounter: stateless API to hardware which counts clock cycles
* struct timecounter: stateful utility code built on a cyclecounter which
  provides a nanosecond counter
* only the function to read the nanosecond counter; deltas are used internally
  and not exposed to users of timecounter

The code does no locking of the shared state. It must be called at least
as often as the cycle counter wraps around to detect these wrap arounds.
Both is the responsibility of the timecounter user.

Acked-by: John Stultz <johnstul@us.ibm.com>
Signed-off-by: Patrick Ohly <patrick.ohly@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/clocksource.h | 101 ++++++++++++++++++++++++++++++++++++++++++++
 kernel/time/clocksource.c   |  76 +++++++++++++++++++++++++++++++++
 2 files changed, 177 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index f88d32f8ff7c..573819ef4cc0 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -21,9 +21,110 @@
 typedef u64 cycle_t;
 struct clocksource;
 
+/**
+ * struct cyclecounter - hardware abstraction for a free running counter
+ *	Provides completely state-free accessors to the underlying hardware.
+ *	Depending on which hardware it reads, the cycle counter may wrap
+ *	around quickly. Locking rules (if necessary) have to be defined
+ *	by the implementor and user of specific instances of this API.
+ *
+ * @read:		returns the current cycle value
+ * @mask:		bitmask for two's complement
+ *			subtraction of non 64 bit counters,
+ *			see CLOCKSOURCE_MASK() helper macro
+ * @mult:		cycle to nanosecond multiplier
+ * @shift:		cycle to nanosecond divisor (power of two)
+ */
+struct cyclecounter {
+	cycle_t (*read)(const struct cyclecounter *cc);
+	cycle_t mask;
+	u32 mult;
+	u32 shift;
+};
+
+/**
+ * struct timecounter - layer above a %struct cyclecounter which counts nanoseconds
+ *	Contains the state needed by timecounter_read() to detect
+ *	cycle counter wrap around. Initialize with
+ *	timecounter_init(). Also used to convert cycle counts into the
+ *	corresponding nanosecond counts with timecounter_cyc2time(). Users
+ *	of this code are responsible for initializing the underlying
+ *	cycle counter hardware, locking issues and reading the time
+ *	more often than the cycle counter wraps around. The nanosecond
+ *	counter will only wrap around after ~585 years.
+ *
+ * @cc:			the cycle counter used by this instance
+ * @cycle_last:		most recent cycle counter value seen by
+ *			timecounter_read()
+ * @nsec:		continuously increasing count
+ */
+struct timecounter {
+	const struct cyclecounter *cc;
+	cycle_t cycle_last;
+	u64 nsec;
+};
+
+/**
+ * cyclecounter_cyc2ns - converts cycle counter cycles to nanoseconds
+ * @tc:		Pointer to cycle counter.
+ * @cycles:	Cycles
+ *
+ * XXX - This could use some mult_lxl_ll() asm optimization. Same code
+ * as in cyc2ns, but with unsigned result.
+ */
+static inline u64 cyclecounter_cyc2ns(const struct cyclecounter *cc,
+				      cycle_t cycles)
+{
+	u64 ret = (u64)cycles;
+	ret = (ret * cc->mult) >> cc->shift;
+	return ret;
+}
+
+/**
+ * timecounter_init - initialize a time counter
+ * @tc:			Pointer to time counter which is to be initialized/reset
+ * @cc:			A cycle counter, ready to be used.
+ * @start_tstamp:	Arbitrary initial time stamp.
+ *
+ * After this call the current cycle register (roughly) corresponds to
+ * the initial time stamp. Every call to timecounter_read() increments
+ * the time stamp counter by the number of elapsed nanoseconds.
+ */
+extern void timecounter_init(struct timecounter *tc,
+			     const struct cyclecounter *cc,
+			     u64 start_tstamp);
+
+/**
+ * timecounter_read - return nanoseconds elapsed since timecounter_init()
+ *                    plus the initial time stamp
+ * @tc:          Pointer to time counter.
+ *
+ * In other words, keeps track of time since the same epoch as
+ * the function which generated the initial time stamp.
+ */
+extern u64 timecounter_read(struct timecounter *tc);
+
+/**
+ * timecounter_cyc2time - convert a cycle counter to same
+ *                        time base as values returned by
+ *                        timecounter_read()
+ * @tc:		Pointer to time counter.
+ * @cycle:	a value returned by tc->cc->read()
+ *
+ * Cycle counts that are converted correctly as long as they
+ * fall into the interval [-1/2 max cycle count, +1/2 max cycle count],
+ * with "max cycle count" == cs->mask+1.
+ *
+ * This allows conversion of cycle counter values which were generated
+ * in the past.
+ */
+extern u64 timecounter_cyc2time(struct timecounter *tc,
+				cycle_t cycle_tstamp);
+
 /**
  * struct clocksource - hardware abstraction for a free running counter
  *	Provides mostly state-free accessors to the underlying hardware.
+ *	This is the structure used for system time.
  *
  * @name:		ptr to clocksource name
  * @list:		list head for registration
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index ca89e1593f08..c46c931a7fe7 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -31,6 +31,82 @@
 #include <linux/sched.h> /* for spin_unlock_irq() using preempt_count() m68k */
 #include <linux/tick.h>
 
+void timecounter_init(struct timecounter *tc,
+		      const struct cyclecounter *cc,
+		      u64 start_tstamp)
+{
+	tc->cc = cc;
+	tc->cycle_last = cc->read(cc);
+	tc->nsec = start_tstamp;
+}
+EXPORT_SYMBOL(timecounter_init);
+
+/**
+ * timecounter_read_delta - get nanoseconds since last call of this function
+ * @tc:         Pointer to time counter
+ *
+ * When the underlying cycle counter runs over, this will be handled
+ * correctly as long as it does not run over more than once between
+ * calls.
+ *
+ * The first call to this function for a new time counter initializes
+ * the time tracking and returns an undefined result.
+ */
+static u64 timecounter_read_delta(struct timecounter *tc)
+{
+	cycle_t cycle_now, cycle_delta;
+	u64 ns_offset;
+
+	/* read cycle counter: */
+	cycle_now = tc->cc->read(tc->cc);
+
+	/* calculate the delta since the last timecounter_read_delta(): */
+	cycle_delta = (cycle_now - tc->cycle_last) & tc->cc->mask;
+
+	/* convert to nanoseconds: */
+	ns_offset = cyclecounter_cyc2ns(tc->cc, cycle_delta);
+
+	/* update time stamp of timecounter_read_delta() call: */
+	tc->cycle_last = cycle_now;
+
+	return ns_offset;
+}
+
+u64 timecounter_read(struct timecounter *tc)
+{
+	u64 nsec;
+
+	/* increment time by nanoseconds since last call */
+	nsec = timecounter_read_delta(tc);
+	nsec += tc->nsec;
+	tc->nsec = nsec;
+
+	return nsec;
+}
+EXPORT_SYMBOL(timecounter_read);
+
+u64 timecounter_cyc2time(struct timecounter *tc,
+			 cycle_t cycle_tstamp)
+{
+	u64 cycle_delta = (cycle_tstamp - tc->cycle_last) & tc->cc->mask;
+	u64 nsec;
+
+	/*
+	 * Instead of always treating cycle_tstamp as more recent
+	 * than tc->cycle_last, detect when it is too far in the
+	 * future and treat it as old time stamp instead.
+	 */
+	if (cycle_delta > tc->cc->mask / 2) {
+		cycle_delta = (tc->cycle_last - cycle_tstamp) & tc->cc->mask;
+		nsec = tc->nsec - cyclecounter_cyc2ns(tc->cc, cycle_delta);
+	} else {
+		nsec = cyclecounter_cyc2ns(tc->cc, cycle_delta) + tc->nsec;
+	}
+
+	return nsec;
+}
+EXPORT_SYMBOL(timecounter_cyc2time);
+
 /* XXX - Would like a better way for initializing curr_clocksource */
 extern struct clocksource clocksource_jiffies;
 
-- 
cgit v1.2.3


From a75244c3d519fcb490ca2bf3f123c98017f1e8d0 Mon Sep 17 00:00:00 2001
From: Patrick Ohly <patrick.ohly@intel.com>
Date: Thu, 12 Feb 2009 05:03:35 +0000
Subject: timecompare: generic infrastructure to map between two time bases

Mapping from a struct timecounter to a time returned by functions like
ktime_get_real() is implemented. This is sufficient to use this code
in a network device driver which wants to support hardware time
stamping and transformation of hardware time stamps to system time.

The interface could have been made more versatile by not depending on
a time counter, but this wasn't done to avoid writing glue code
elsewhere.

The method implemented here is the one used and analyzed under the name
"assisted PTP" in the LCI PTP paper:
http://www.linuxclustersinstitute.org/conferences/archive/2008/PDF/Ohly_92221.pdf

Acked-by: John Stultz <johnstul@us.ibm.com>
Signed-off-by: Patrick Ohly <patrick.ohly@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/timecompare.h | 125 +++++++++++++++++++++++++++++
 kernel/time/Makefile        |   2 +-
 kernel/time/timecompare.c   | 191 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 317 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/timecompare.h
 create mode 100644 kernel/time/timecompare.c

(limited to 'include/linux')

diff --git a/include/linux/timecompare.h b/include/linux/timecompare.h
new file mode 100644
index 000000000000..546e2234e4b3
--- /dev/null
+++ b/include/linux/timecompare.h
@@ -0,0 +1,125 @@
+/*
+ * Utility code which helps transforming between two different time
+ * bases, called "source" and "target" time in this code.
+ *
+ * Source time has to be provided via the timecounter API while target
+ * time is accessed via a function callback whose prototype
+ * intentionally matches ktime_get() and ktime_get_real(). These
+ * interfaces where chosen like this so that the code serves its
+ * initial purpose without additional glue code.
+ *
+ * This purpose is synchronizing a hardware clock in a NIC with system
+ * time, in order to implement the Precision Time Protocol (PTP,
+ * IEEE1588) with more accurate hardware assisted time stamping.  In
+ * that context only synchronization against system time (=
+ * ktime_get_real()) is currently needed. But this utility code might
+ * become useful in other situations, which is why it was written as
+ * general purpose utility code.
+ *
+ * The source timecounter is assumed to return monotonically
+ * increasing time (but this code does its best to compensate if that
+ * is not the case) whereas target time may jump.
+ *
+ * The target time corresponding to a source time is determined by
+ * reading target time, reading source time, reading target time
+ * again, then assuming that average target time corresponds to source
+ * time. In other words, the assumption is that reading the source
+ * time is slow and involves equal time for sending the request and
+ * receiving the reply, whereas reading target time is assumed to be
+ * fast.
+ *
+ * Copyright (C) 2009 Intel Corporation.
+ * Author: Patrick Ohly <patrick.ohly@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+#ifndef _LINUX_TIMECOMPARE_H
+#define _LINUX_TIMECOMPARE_H
+
+#include <linux/clocksource.h>
+#include <linux/ktime.h>
+
+/**
+ * struct timecompare - stores state and configuration for the two clocks
+ *
+ * Initialize to zero, then set source/target/num_samples.
+ *
+ * Transformation between source time and target time is done with:
+ * target_time = source_time + offset +
+ *               (source_time - last_update) * skew /
+ *               TIMECOMPARE_SKEW_RESOLUTION
+ *
+ * @source:          used to get source time stamps via timecounter_read()
+ * @target:          function returning target time (for example, ktime_get
+ *                   for monotonic time, or ktime_get_real for wall clock)
+ * @num_samples:     number of times that source time and target time are to
+ *                   be compared when determining their offset
+ * @offset:          (target time - source time) at the time of the last update
+ * @skew:            average (target time - source time) / delta source time *
+ *                   TIMECOMPARE_SKEW_RESOLUTION
+ * @last_update:     last source time stamp when time offset was measured
+ */
+struct timecompare {
+	struct timecounter *source;
+	ktime_t (*target)(void);
+	int num_samples;
+
+	s64 offset;
+	s64 skew;
+	u64 last_update;
+};
+
+/**
+ * timecompare_transform - transform source time stamp into target time base
+ * @sync:            context for time sync
+ * @source_tstamp:   the result of timecounter_read() or
+ *                   timecounter_cyc2time()
+ */
+extern ktime_t timecompare_transform(struct timecompare *sync,
+				     u64 source_tstamp);
+
+/**
+ * timecompare_offset - measure current (target time - source time) offset
+ * @sync:            context for time sync
+ * @offset:          average offset during sample period returned here
+ * @source_tstamp:   average source time during sample period returned here
+ *
+ * Returns number of samples used. Might be zero (= no result) in the
+ * unlikely case that target time was monotonically decreasing for all
+ * samples (= broken).
+ */
+extern int timecompare_offset(struct timecompare *sync,
+			      s64 *offset,
+			      u64 *source_tstamp);
+
+extern void __timecompare_update(struct timecompare *sync,
+				 u64 source_tstamp);
+
+/**
+ * timecompare_update - update offset and skew by measuring current offset
+ * @sync:            context for time sync
+ * @source_tstamp:   the result of timecounter_read() or
+ *                   timecounter_cyc2time(), pass zero to force update
+ *
+ * Updates are only done at most once per second.
+ */
+static inline void timecompare_update(struct timecompare *sync,
+				      u64 source_tstamp)
+{
+	if (!source_tstamp ||
+	    (s64)(source_tstamp - sync->last_update) >= NSEC_PER_SEC)
+		__timecompare_update(sync, source_tstamp);
+}
+
+#endif /* _LINUX_TIMECOMPARE_H */
diff --git a/kernel/time/Makefile b/kernel/time/Makefile
index 905b0b50792d..0b0a6366c9d4 100644
--- a/kernel/time/Makefile
+++ b/kernel/time/Makefile
@@ -1,4 +1,4 @@
-obj-y += timekeeping.o ntp.o clocksource.o jiffies.o timer_list.o
+obj-y += timekeeping.o ntp.o clocksource.o jiffies.o timer_list.o timecompare.o
 
 obj-$(CONFIG_GENERIC_CLOCKEVENTS_BUILD)		+= clockevents.o
 obj-$(CONFIG_GENERIC_CLOCKEVENTS)		+= tick-common.o
diff --git a/kernel/time/timecompare.c b/kernel/time/timecompare.c
new file mode 100644
index 000000000000..71e7f1a19156
--- /dev/null
+++ b/kernel/time/timecompare.c
@@ -0,0 +1,191 @@
+/*
+ * Copyright (C) 2009 Intel Corporation.
+ * Author: Patrick Ohly <patrick.ohly@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/timecompare.h>
+#include <linux/module.h>
+#include <linux/math64.h>
+
+/*
+ * fixed point arithmetic scale factor for skew
+ *
+ * Usually one would measure skew in ppb (parts per billion, 1e9), but
+ * using a factor of 2 simplifies the math.
+ */
+#define TIMECOMPARE_SKEW_RESOLUTION (((s64)1)<<30)
+
+ktime_t timecompare_transform(struct timecompare *sync,
+			      u64 source_tstamp)
+{
+	u64 nsec;
+
+	nsec = source_tstamp + sync->offset;
+	nsec += (s64)(source_tstamp - sync->last_update) * sync->skew /
+		TIMECOMPARE_SKEW_RESOLUTION;
+
+	return ns_to_ktime(nsec);
+}
+EXPORT_SYMBOL(timecompare_transform);
+
+int timecompare_offset(struct timecompare *sync,
+		       s64 *offset,
+		       u64 *source_tstamp)
+{
+	u64 start_source = 0, end_source = 0;
+	struct {
+		s64 offset;
+		s64 duration_target;
+	} buffer[10], sample, *samples;
+	int counter = 0, i;
+	int used;
+	int index;
+	int num_samples = sync->num_samples;
+
+	if (num_samples > sizeof(buffer)/sizeof(buffer[0])) {
+		samples = kmalloc(sizeof(*samples) * num_samples, GFP_ATOMIC);
+		if (!samples) {
+			samples = buffer;
+			num_samples = sizeof(buffer)/sizeof(buffer[0]);
+		}
+	} else {
+		samples = buffer;
+	}
+
+	/* run until we have enough valid samples, but do not try forever */
+	i = 0;
+	counter = 0;
+	while (1) {
+		u64 ts;
+		ktime_t start, end;
+
+		start = sync->target();
+		ts = timecounter_read(sync->source);
+		end = sync->target();
+
+		if (!i)
+			start_source = ts;
+
+		/* ignore negative durations */
+		sample.duration_target = ktime_to_ns(ktime_sub(end, start));
+		if (sample.duration_target >= 0) {
+			/*
+			 * assume symetric delay to and from source:
+			 * average target time corresponds to measured
+			 * source time
+			 */
+			sample.offset =
+				ktime_to_ns(ktime_add(end, start)) / 2 -
+				ts;
+
+			/* simple insertion sort based on duration */
+			index = counter - 1;
+			while (index >= 0) {
+				if (samples[index].duration_target <
+				    sample.duration_target)
+					break;
+				samples[index + 1] = samples[index];
+				index--;
+			}
+			samples[index + 1] = sample;
+			counter++;
+		}
+
+		i++;
+		if (counter >= num_samples || i >= 100000) {
+			end_source = ts;
+			break;
+		}
+	}
+
+	*source_tstamp = (end_source + start_source) / 2;
+
+	/* remove outliers by only using 75% of the samples */
+	used = counter * 3 / 4;
+	if (!used)
+		used = counter;
+	if (used) {
+		/* calculate average */
+		s64 off = 0;
+		for (index = 0; index < used; index++)
+			off += samples[index].offset;
+		*offset = div_s64(off, used);
+	}
+
+	if (samples && samples != buffer)
+		kfree(samples);
+
+	return used;
+}
+EXPORT_SYMBOL(timecompare_offset);
+
+void __timecompare_update(struct timecompare *sync,
+			  u64 source_tstamp)
+{
+	s64 offset;
+	u64 average_time;
+
+	if (!timecompare_offset(sync, &offset, &average_time))
+		return;
+
+	if (!sync->last_update) {
+		sync->last_update = average_time;
+		sync->offset = offset;
+		sync->skew = 0;
+	} else {
+		s64 delta_nsec = average_time - sync->last_update;
+
+		/* avoid division by negative or small deltas */
+		if (delta_nsec >= 10000) {
+			s64 delta_offset_nsec = offset - sync->offset;
+			s64 skew; /* delta_offset_nsec *
+				     TIMECOMPARE_SKEW_RESOLUTION /
+				     delta_nsec */
+			u64 divisor;
+
+			/* div_s64() is limited to 32 bit divisor */
+			skew = delta_offset_nsec * TIMECOMPARE_SKEW_RESOLUTION;
+			divisor = delta_nsec;
+			while (unlikely(divisor >= ((s64)1) << 32)) {
+				/* divide both by 2; beware, right shift
+				   of negative value has undefined
+				   behavior and can only be used for
+				   the positive divisor */
+				skew = div_s64(skew, 2);
+				divisor >>= 1;
+			}
+			skew = div_s64(skew, divisor);
+
+			/*
+			 * Calculate new overall skew as 4/16 the
+			 * old value and 12/16 the new one. This is
+			 * a rather arbitrary tradeoff between
+			 * only using the latest measurement (0/16 and
+			 * 16/16) and even more weight on past measurements.
+			 */
+#define TIMECOMPARE_NEW_SKEW_PER_16 12
+			sync->skew =
+				div_s64((16 - TIMECOMPARE_NEW_SKEW_PER_16) *
+					sync->skew +
+					TIMECOMPARE_NEW_SKEW_PER_16 * skew,
+					16);
+			sync->last_update = average_time;
+			sync->offset = offset;
+		}
+	}
+}
+EXPORT_SYMBOL(__timecompare_update);
-- 
cgit v1.2.3


From cb9eff097831007afb30d64373f29d99825d0068 Mon Sep 17 00:00:00 2001
From: Patrick Ohly <patrick.ohly@intel.com>
Date: Thu, 12 Feb 2009 05:03:36 +0000
Subject: net: new user space API for time stamping of incoming and outgoing
 packets

User space can request hardware and/or software time stamping.
Reporting of the result(s) via a new control message is enabled
separately for each field in the message because some of the
fields may require additional computation and thus cause overhead.
User space can tell the different kinds of time stamps apart
and choose what suits its needs.

When a TX timestamp operation is requested, the TX skb will be cloned
and the clone will be time stamped (in hardware or software) and added
to the socket error queue of the skb, if the skb has a socket
associated with it.

The actual TX timestamp will reach userspace as a RX timestamp on the
cloned packet. If timestamping is requested and no timestamping is
done in the device driver (potentially this may use hardware
timestamping), it will be done in software after the device's
start_hard_xmit routine.

Signed-off-by: Patrick Ohly <patrick.ohly@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/timestamping.txt          | 178 +++++++
 Documentation/networking/timestamping/.gitignore   |   1 +
 Documentation/networking/timestamping/Makefile     |   6 +
 .../networking/timestamping/timestamping.c         | 533 +++++++++++++++++++++
 arch/alpha/include/asm/socket.h                    |   3 +
 arch/arm/include/asm/socket.h                      |   3 +
 arch/avr32/include/asm/socket.h                    |   3 +
 arch/blackfin/include/asm/socket.h                 |   3 +
 arch/cris/include/asm/socket.h                     |   3 +
 arch/h8300/include/asm/socket.h                    |   3 +
 arch/ia64/include/asm/socket.h                     |   3 +
 arch/m68k/include/asm/socket.h                     |   3 +
 arch/mips/include/asm/socket.h                     |   3 +
 arch/parisc/include/asm/socket.h                   |   3 +
 arch/powerpc/include/asm/socket.h                  |   3 +
 arch/s390/include/asm/socket.h                     |   3 +
 arch/sh/include/asm/socket.h                       |   3 +
 arch/sparc/include/asm/socket.h                    |   3 +
 arch/x86/include/asm/socket.h                      |   3 +
 arch/xtensa/include/asm/socket.h                   |   3 +
 include/asm-frv/socket.h                           |   3 +
 include/asm-m32r/socket.h                          |   3 +
 include/asm-mn10300/socket.h                       |   3 +
 include/linux/errqueue.h                           |   1 +
 include/linux/net_tstamp.h                         | 104 ++++
 include/linux/sockios.h                            |   3 +
 26 files changed, 883 insertions(+)
 create mode 100644 Documentation/networking/timestamping.txt
 create mode 100644 Documentation/networking/timestamping/.gitignore
 create mode 100644 Documentation/networking/timestamping/Makefile
 create mode 100644 Documentation/networking/timestamping/timestamping.c
 create mode 100644 include/linux/net_tstamp.h

(limited to 'include/linux')

diff --git a/Documentation/networking/timestamping.txt b/Documentation/networking/timestamping.txt
new file mode 100644
index 000000000000..a681a65b5bc7
--- /dev/null
+++ b/Documentation/networking/timestamping.txt
@@ -0,0 +1,178 @@
+The existing interfaces for getting network packages time stamped are:
+
+* SO_TIMESTAMP
+  Generate time stamp for each incoming packet using the (not necessarily
+  monotonous!) system time. Result is returned via recv_msg() in a
+  control message as timeval (usec resolution).
+
+* SO_TIMESTAMPNS
+  Same time stamping mechanism as SO_TIMESTAMP, but returns result as
+  timespec (nsec resolution).
+
+* IP_MULTICAST_LOOP + SO_TIMESTAMP[NS]
+  Only for multicasts: approximate send time stamp by receiving the looped
+  packet and using its receive time stamp.
+
+The following interface complements the existing ones: receive time
+stamps can be generated and returned for arbitrary packets and much
+closer to the point where the packet is really sent. Time stamps can
+be generated in software (as before) or in hardware (if the hardware
+has such a feature).
+
+SO_TIMESTAMPING:
+
+Instructs the socket layer which kind of information is wanted. The
+parameter is an integer with some of the following bits set. Setting
+other bits is an error and doesn't change the current state.
+
+SOF_TIMESTAMPING_TX_HARDWARE:  try to obtain send time stamp in hardware
+SOF_TIMESTAMPING_TX_SOFTWARE:  if SOF_TIMESTAMPING_TX_HARDWARE is off or
+                               fails, then do it in software
+SOF_TIMESTAMPING_RX_HARDWARE:  return the original, unmodified time stamp
+                               as generated by the hardware
+SOF_TIMESTAMPING_RX_SOFTWARE:  if SOF_TIMESTAMPING_RX_HARDWARE is off or
+                               fails, then do it in software
+SOF_TIMESTAMPING_RAW_HARDWARE: return original raw hardware time stamp
+SOF_TIMESTAMPING_SYS_HARDWARE: return hardware time stamp transformed to
+                               the system time base
+SOF_TIMESTAMPING_SOFTWARE:     return system time stamp generated in
+                               software
+
+SOF_TIMESTAMPING_TX/RX determine how time stamps are generated.
+SOF_TIMESTAMPING_RAW/SYS determine how they are reported in the
+following control message:
+    struct scm_timestamping {
+           struct timespec systime;
+           struct timespec hwtimetrans;
+           struct timespec hwtimeraw;
+    };
+
+recvmsg() can be used to get this control message for regular incoming
+packets. For send time stamps the outgoing packet is looped back to
+the socket's error queue with the send time stamp(s) attached. It can
+be received with recvmsg(flags=MSG_ERRQUEUE). The call returns the
+original outgoing packet data including all headers preprended down to
+and including the link layer, the scm_timestamping control message and
+a sock_extended_err control message with ee_errno==ENOMSG and
+ee_origin==SO_EE_ORIGIN_TIMESTAMPING. A socket with such a pending
+bounced packet is ready for reading as far as select() is concerned.
+
+All three values correspond to the same event in time, but were
+generated in different ways. Each of these values may be empty (= all
+zero), in which case no such value was available. If the application
+is not interested in some of these values, they can be left blank to
+avoid the potential overhead of calculating them.
+
+systime is the value of the system time at that moment. This
+corresponds to the value also returned via SO_TIMESTAMP[NS]. If the
+time stamp was generated by hardware, then this field is
+empty. Otherwise it is filled in if SOF_TIMESTAMPING_SOFTWARE is
+set.
+
+hwtimeraw is the original hardware time stamp. Filled in if
+SOF_TIMESTAMPING_RAW_HARDWARE is set. No assumptions about its
+relation to system time should be made.
+
+hwtimetrans is the hardware time stamp transformed so that it
+corresponds as good as possible to system time. This correlation is
+not perfect; as a consequence, sorting packets received via different
+NICs by their hwtimetrans may differ from the order in which they were
+received. hwtimetrans may be non-monotonic even for the same NIC.
+Filled in if SOF_TIMESTAMPING_SYS_HARDWARE is set. Requires support
+by the network device and will be empty without that support.
+
+
+SIOCSHWTSTAMP:
+
+Hardware time stamping must also be initialized for each device driver
+that is expected to do hardware time stamping. The parameter is:
+
+struct hwtstamp_config {
+    int flags;           /* no flags defined right now, must be zero */
+    int tx_type;         /* HWTSTAMP_TX_* */
+    int rx_filter;       /* HWTSTAMP_FILTER_* */
+};
+
+Desired behavior is passed into the kernel and to a specific device by
+calling ioctl(SIOCSHWTSTAMP) with a pointer to a struct ifreq whose
+ifr_data points to a struct hwtstamp_config. The tx_type and
+rx_filter are hints to the driver what it is expected to do. If
+the requested fine-grained filtering for incoming packets is not
+supported, the driver may time stamp more than just the requested types
+of packets.
+
+A driver which supports hardware time stamping shall update the struct
+with the actual, possibly more permissive configuration. If the
+requested packets cannot be time stamped, then nothing should be
+changed and ERANGE shall be returned (in contrast to EINVAL, which
+indicates that SIOCSHWTSTAMP is not supported at all).
+
+Only a processes with admin rights may change the configuration. User
+space is responsible to ensure that multiple processes don't interfere
+with each other and that the settings are reset.
+
+/* possible values for hwtstamp_config->tx_type */
+enum {
+	/*
+	 * no outgoing packet will need hardware time stamping;
+	 * should a packet arrive which asks for it, no hardware
+	 * time stamping will be done
+	 */
+	HWTSTAMP_TX_OFF,
+
+	/*
+	 * enables hardware time stamping for outgoing packets;
+	 * the sender of the packet decides which are to be
+	 * time stamped by setting SOF_TIMESTAMPING_TX_SOFTWARE
+	 * before sending the packet
+	 */
+	HWTSTAMP_TX_ON,
+};
+
+/* possible values for hwtstamp_config->rx_filter */
+enum {
+	/* time stamp no incoming packet at all */
+	HWTSTAMP_FILTER_NONE,
+
+	/* time stamp any incoming packet */
+	HWTSTAMP_FILTER_ALL,
+
+        /* return value: time stamp all packets requested plus some others */
+        HWTSTAMP_FILTER_SOME,
+
+	/* PTP v1, UDP, any kind of event packet */
+	HWTSTAMP_FILTER_PTP_V1_L4_EVENT,
+
+        ...
+};
+
+
+DEVICE IMPLEMENTATION
+
+A driver which supports hardware time stamping must support the
+SIOCSHWTSTAMP ioctl. Time stamps for received packets must be stored
+in the skb with skb_hwtstamp_set().
+
+Time stamps for outgoing packets are to be generated as follows:
+- In hard_start_xmit(), check if skb_hwtstamp_check_tx_hardware()
+  returns non-zero. If yes, then the driver is expected
+  to do hardware time stamping.
+- If this is possible for the skb and requested, then declare
+  that the driver is doing the time stamping by calling
+  skb_hwtstamp_tx_in_progress(). A driver not supporting
+  hardware time stamping doesn't do that. A driver must never
+  touch sk_buff::tstamp! It is used to store how time stamping
+  for an outgoing packets is to be done.
+- As soon as the driver has sent the packet and/or obtained a
+  hardware time stamp for it, it passes the time stamp back by
+  calling skb_hwtstamp_tx() with the original skb, the raw
+  hardware time stamp and a handle to the device (necessary
+  to convert the hardware time stamp to system time). If obtaining
+  the hardware time stamp somehow fails, then the driver should
+  not fall back to software time stamping. The rationale is that
+  this would occur at a later time in the processing pipeline
+  than other software time stamping and therefore could lead
+  to unexpected deltas between time stamps.
+- If the driver did not call skb_hwtstamp_tx_in_progress(), then
+  dev_hard_start_xmit() checks whether software time stamping
+  is wanted as fallback and potentially generates the time stamp.
diff --git a/Documentation/networking/timestamping/.gitignore b/Documentation/networking/timestamping/.gitignore
new file mode 100644
index 000000000000..71e81eb2e22f
--- /dev/null
+++ b/Documentation/networking/timestamping/.gitignore
@@ -0,0 +1 @@
+timestamping
diff --git a/Documentation/networking/timestamping/Makefile b/Documentation/networking/timestamping/Makefile
new file mode 100644
index 000000000000..2a1489fdc036
--- /dev/null
+++ b/Documentation/networking/timestamping/Makefile
@@ -0,0 +1,6 @@
+CPPFLAGS = -I../../../include
+
+timestamping: timestamping.c
+
+clean:
+	rm -f timestamping
diff --git a/Documentation/networking/timestamping/timestamping.c b/Documentation/networking/timestamping/timestamping.c
new file mode 100644
index 000000000000..43d143104210
--- /dev/null
+++ b/Documentation/networking/timestamping/timestamping.c
@@ -0,0 +1,533 @@
+/*
+ * This program demonstrates how the various time stamping features in
+ * the Linux kernel work. It emulates the behavior of a PTP
+ * implementation in stand-alone master mode by sending PTPv1 Sync
+ * multicasts once every second. It looks for similar packets, but
+ * beyond that doesn't actually implement PTP.
+ *
+ * Outgoing packets are time stamped with SO_TIMESTAMPING with or
+ * without hardware support.
+ *
+ * Incoming packets are time stamped with SO_TIMESTAMPING with or
+ * without hardware support, SIOCGSTAMP[NS] (per-socket time stamp) and
+ * SO_TIMESTAMP[NS].
+ *
+ * Copyright (C) 2009 Intel Corporation.
+ * Author: Patrick Ohly <patrick.ohly@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+
+#include <sys/time.h>
+#include <sys/socket.h>
+#include <sys/select.h>
+#include <sys/ioctl.h>
+#include <arpa/inet.h>
+#include <net/if.h>
+
+#include "asm/types.h"
+#include "linux/net_tstamp.h"
+#include "linux/errqueue.h"
+
+#ifndef SO_TIMESTAMPING
+# define SO_TIMESTAMPING         37
+# define SCM_TIMESTAMPING        SO_TIMESTAMPING
+#endif
+
+#ifndef SO_TIMESTAMPNS
+# define SO_TIMESTAMPNS 35
+#endif
+
+#ifndef SIOCGSTAMPNS
+# define SIOCGSTAMPNS 0x8907
+#endif
+
+#ifndef SIOCSHWTSTAMP
+# define SIOCSHWTSTAMP 0x89b0
+#endif
+
+static void usage(const char *error)
+{
+	if (error)
+		printf("invalid option: %s\n", error);
+	printf("timestamping interface option*\n\n"
+	       "Options:\n"
+	       "  IP_MULTICAST_LOOP - looping outgoing multicasts\n"
+	       "  SO_TIMESTAMP - normal software time stamping, ms resolution\n"
+	       "  SO_TIMESTAMPNS - more accurate software time stamping\n"
+	       "  SOF_TIMESTAMPING_TX_HARDWARE - hardware time stamping of outgoing packets\n"
+	       "  SOF_TIMESTAMPING_TX_SOFTWARE - software fallback for outgoing packets\n"
+	       "  SOF_TIMESTAMPING_RX_HARDWARE - hardware time stamping of incoming packets\n"
+	       "  SOF_TIMESTAMPING_RX_SOFTWARE - software fallback for incoming packets\n"
+	       "  SOF_TIMESTAMPING_SOFTWARE - request reporting of software time stamps\n"
+	       "  SOF_TIMESTAMPING_SYS_HARDWARE - request reporting of transformed HW time stamps\n"
+	       "  SOF_TIMESTAMPING_RAW_HARDWARE - request reporting of raw HW time stamps\n"
+	       "  SIOCGSTAMP - check last socket time stamp\n"
+	       "  SIOCGSTAMPNS - more accurate socket time stamp\n");
+	exit(1);
+}
+
+static void bail(const char *error)
+{
+	printf("%s: %s\n", error, strerror(errno));
+	exit(1);
+}
+
+static const unsigned char sync[] = {
+	0x00, 0x01, 0x00, 0x01,
+	0x5f, 0x44, 0x46, 0x4c,
+	0x54, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x01, 0x01,
+
+	/* fake uuid */
+	0x00, 0x01,
+	0x02, 0x03, 0x04, 0x05,
+
+	0x00, 0x01, 0x00, 0x37,
+	0x00, 0x00, 0x00, 0x08,
+	0x00, 0x00, 0x00, 0x00,
+	0x49, 0x05, 0xcd, 0x01,
+	0x29, 0xb1, 0x8d, 0xb0,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x01,
+
+	/* fake uuid */
+	0x00, 0x01,
+	0x02, 0x03, 0x04, 0x05,
+
+	0x00, 0x00, 0x00, 0x37,
+	0x00, 0x00, 0x00, 0x04,
+	0x44, 0x46, 0x4c, 0x54,
+	0x00, 0x00, 0xf0, 0x60,
+	0x00, 0x01, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x01,
+	0x00, 0x00, 0xf0, 0x60,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x04,
+	0x44, 0x46, 0x4c, 0x54,
+	0x00, 0x01,
+
+	/* fake uuid */
+	0x00, 0x01,
+	0x02, 0x03, 0x04, 0x05,
+
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00
+};
+
+static void sendpacket(int sock, struct sockaddr *addr, socklen_t addr_len)
+{
+	struct timeval now;
+	int res;
+
+	res = sendto(sock, sync, sizeof(sync), 0,
+		addr, addr_len);
+	gettimeofday(&now, 0);
+	if (res < 0)
+		printf("%s: %s\n", "send", strerror(errno));
+	else
+		printf("%ld.%06ld: sent %d bytes\n",
+		       (long)now.tv_sec, (long)now.tv_usec,
+		       res);
+}
+
+static void printpacket(struct msghdr *msg, int res,
+			char *data,
+			int sock, int recvmsg_flags,
+			int siocgstamp, int siocgstampns)
+{
+	struct sockaddr_in *from_addr = (struct sockaddr_in *)msg->msg_name;
+	struct cmsghdr *cmsg;
+	struct timeval tv;
+	struct timespec ts;
+	struct timeval now;
+
+	gettimeofday(&now, 0);
+
+	printf("%ld.%06ld: received %s data, %d bytes from %s, %d bytes control messages\n",
+	       (long)now.tv_sec, (long)now.tv_usec,
+	       (recvmsg_flags & MSG_ERRQUEUE) ? "error" : "regular",
+	       res,
+	       inet_ntoa(from_addr->sin_addr),
+	       msg->msg_controllen);
+	for (cmsg = CMSG_FIRSTHDR(msg);
+	     cmsg;
+	     cmsg = CMSG_NXTHDR(msg, cmsg)) {
+		printf("   cmsg len %d: ", cmsg->cmsg_len);
+		switch (cmsg->cmsg_level) {
+		case SOL_SOCKET:
+			printf("SOL_SOCKET ");
+			switch (cmsg->cmsg_type) {
+			case SO_TIMESTAMP: {
+				struct timeval *stamp =
+					(struct timeval *)CMSG_DATA(cmsg);
+				printf("SO_TIMESTAMP %ld.%06ld",
+				       (long)stamp->tv_sec,
+				       (long)stamp->tv_usec);
+				break;
+			}
+			case SO_TIMESTAMPNS: {
+				struct timespec *stamp =
+					(struct timespec *)CMSG_DATA(cmsg);
+				printf("SO_TIMESTAMPNS %ld.%09ld",
+				       (long)stamp->tv_sec,
+				       (long)stamp->tv_nsec);
+				break;
+			}
+			case SO_TIMESTAMPING: {
+				struct timespec *stamp =
+					(struct timespec *)CMSG_DATA(cmsg);
+				printf("SO_TIMESTAMPING ");
+				printf("SW %ld.%09ld ",
+				       (long)stamp->tv_sec,
+				       (long)stamp->tv_nsec);
+				stamp++;
+				printf("HW transformed %ld.%09ld ",
+				       (long)stamp->tv_sec,
+				       (long)stamp->tv_nsec);
+				stamp++;
+				printf("HW raw %ld.%09ld",
+				       (long)stamp->tv_sec,
+				       (long)stamp->tv_nsec);
+				break;
+			}
+			default:
+				printf("type %d", cmsg->cmsg_type);
+				break;
+			}
+			break;
+		case IPPROTO_IP:
+			printf("IPPROTO_IP ");
+			switch (cmsg->cmsg_type) {
+			case IP_RECVERR: {
+				struct sock_extended_err *err =
+					(struct sock_extended_err *)CMSG_DATA(cmsg);
+				printf("IP_RECVERR ee_errno '%s' ee_origin %d => %s",
+					strerror(err->ee_errno),
+					err->ee_origin,
+#ifdef SO_EE_ORIGIN_TIMESTAMPING
+					err->ee_origin == SO_EE_ORIGIN_TIMESTAMPING ?
+					"bounced packet" : "unexpected origin"
+#else
+					"probably SO_EE_ORIGIN_TIMESTAMPING"
+#endif
+					);
+				if (res < sizeof(sync))
+					printf(" => truncated data?!");
+				else if (!memcmp(sync, data + res - sizeof(sync),
+							sizeof(sync)))
+					printf(" => GOT OUR DATA BACK (HURRAY!)");
+				break;
+			}
+			case IP_PKTINFO: {
+				struct in_pktinfo *pktinfo =
+					(struct in_pktinfo *)CMSG_DATA(cmsg);
+				printf("IP_PKTINFO interface index %u",
+					pktinfo->ipi_ifindex);
+				break;
+			}
+			default:
+				printf("type %d", cmsg->cmsg_type);
+				break;
+			}
+			break;
+		default:
+			printf("level %d type %d",
+				cmsg->cmsg_level,
+				cmsg->cmsg_type);
+			break;
+		}
+		printf("\n");
+	}
+
+	if (siocgstamp) {
+		if (ioctl(sock, SIOCGSTAMP, &tv))
+			printf("   %s: %s\n", "SIOCGSTAMP", strerror(errno));
+		else
+			printf("SIOCGSTAMP %ld.%06ld\n",
+			       (long)tv.tv_sec,
+			       (long)tv.tv_usec);
+	}
+	if (siocgstampns) {
+		if (ioctl(sock, SIOCGSTAMPNS, &ts))
+			printf("   %s: %s\n", "SIOCGSTAMPNS", strerror(errno));
+		else
+			printf("SIOCGSTAMPNS %ld.%09ld\n",
+			       (long)ts.tv_sec,
+			       (long)ts.tv_nsec);
+	}
+}
+
+static void recvpacket(int sock, int recvmsg_flags,
+		       int siocgstamp, int siocgstampns)
+{
+	char data[256];
+	struct msghdr msg;
+	struct iovec entry;
+	struct sockaddr_in from_addr;
+	struct {
+		struct cmsghdr cm;
+		char control[512];
+	} control;
+	int res;
+
+	memset(&msg, 0, sizeof(msg));
+	msg.msg_iov = &entry;
+	msg.msg_iovlen = 1;
+	entry.iov_base = data;
+	entry.iov_len = sizeof(data);
+	msg.msg_name = (caddr_t)&from_addr;
+	msg.msg_namelen = sizeof(from_addr);
+	msg.msg_control = &control;
+	msg.msg_controllen = sizeof(control);
+
+	res = recvmsg(sock, &msg, recvmsg_flags|MSG_DONTWAIT);
+	if (res < 0) {
+		printf("%s %s: %s\n",
+		       "recvmsg",
+		       (recvmsg_flags & MSG_ERRQUEUE) ? "error" : "regular",
+		       strerror(errno));
+	} else {
+		printpacket(&msg, res, data,
+			    sock, recvmsg_flags,
+			    siocgstamp, siocgstampns);
+	}
+}
+
+int main(int argc, char **argv)
+{
+	int so_timestamping_flags = 0;
+	int so_timestamp = 0;
+	int so_timestampns = 0;
+	int siocgstamp = 0;
+	int siocgstampns = 0;
+	int ip_multicast_loop = 0;
+	char *interface;
+	int i;
+	int enabled = 1;
+	int sock;
+	struct ifreq device;
+	struct ifreq hwtstamp;
+	struct hwtstamp_config hwconfig, hwconfig_requested;
+	struct sockaddr_in addr;
+	struct ip_mreq imr;
+	struct in_addr iaddr;
+	int val;
+	socklen_t len;
+	struct timeval next;
+
+	if (argc < 2)
+		usage(0);
+	interface = argv[1];
+
+	for (i = 2; i < argc; i++) {
+		if (!strcasecmp(argv[i], "SO_TIMESTAMP"))
+			so_timestamp = 1;
+		else if (!strcasecmp(argv[i], "SO_TIMESTAMPNS"))
+			so_timestampns = 1;
+		else if (!strcasecmp(argv[i], "SIOCGSTAMP"))
+			siocgstamp = 1;
+		else if (!strcasecmp(argv[i], "SIOCGSTAMPNS"))
+			siocgstampns = 1;
+		else if (!strcasecmp(argv[i], "IP_MULTICAST_LOOP"))
+			ip_multicast_loop = 1;
+		else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_TX_HARDWARE"))
+			so_timestamping_flags |= SOF_TIMESTAMPING_TX_HARDWARE;
+		else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_TX_SOFTWARE"))
+			so_timestamping_flags |= SOF_TIMESTAMPING_TX_SOFTWARE;
+		else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_RX_HARDWARE"))
+			so_timestamping_flags |= SOF_TIMESTAMPING_RX_HARDWARE;
+		else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_RX_SOFTWARE"))
+			so_timestamping_flags |= SOF_TIMESTAMPING_RX_SOFTWARE;
+		else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_SOFTWARE"))
+			so_timestamping_flags |= SOF_TIMESTAMPING_SOFTWARE;
+		else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_SYS_HARDWARE"))
+			so_timestamping_flags |= SOF_TIMESTAMPING_SYS_HARDWARE;
+		else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_RAW_HARDWARE"))
+			so_timestamping_flags |= SOF_TIMESTAMPING_RAW_HARDWARE;
+		else
+			usage(argv[i]);
+	}
+
+	sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_UDP);
+	if (socket < 0)
+		bail("socket");
+
+	memset(&device, 0, sizeof(device));
+	strncpy(device.ifr_name, interface, sizeof(device.ifr_name));
+	if (ioctl(sock, SIOCGIFADDR, &device) < 0)
+		bail("getting interface IP address");
+
+	memset(&hwtstamp, 0, sizeof(hwtstamp));
+	strncpy(hwtstamp.ifr_name, interface, sizeof(hwtstamp.ifr_name));
+	hwtstamp.ifr_data = (void *)&hwconfig;
+	memset(&hwconfig, 0, sizeof(&hwconfig));
+	hwconfig.tx_type =
+		(so_timestamping_flags & SOF_TIMESTAMPING_TX_HARDWARE) ?
+		HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF;
+	hwconfig.rx_filter =
+		(so_timestamping_flags & SOF_TIMESTAMPING_RX_HARDWARE) ?
+		HWTSTAMP_FILTER_PTP_V1_L4_SYNC : HWTSTAMP_FILTER_NONE;
+	hwconfig_requested = hwconfig;
+	if (ioctl(sock, SIOCSHWTSTAMP, &hwtstamp) < 0) {
+		if ((errno == EINVAL || errno == ENOTSUP) &&
+		    hwconfig_requested.tx_type == HWTSTAMP_TX_OFF &&
+		    hwconfig_requested.rx_filter == HWTSTAMP_FILTER_NONE)
+			printf("SIOCSHWTSTAMP: disabling hardware time stamping not possible\n");
+		else
+			bail("SIOCSHWTSTAMP");
+	}
+	printf("SIOCSHWTSTAMP: tx_type %d requested, got %d; rx_filter %d requested, got %d\n",
+	       hwconfig_requested.tx_type, hwconfig.tx_type,
+	       hwconfig_requested.rx_filter, hwconfig.rx_filter);
+
+	/* bind to PTP port */
+	addr.sin_family = AF_INET;
+	addr.sin_addr.s_addr = htonl(INADDR_ANY);
+	addr.sin_port = htons(319 /* PTP event port */);
+	if (bind(sock,
+		 (struct sockaddr *)&addr,
+		 sizeof(struct sockaddr_in)) < 0)
+		bail("bind");
+
+	/* set multicast group for outgoing packets */
+	inet_aton("224.0.1.130", &iaddr); /* alternate PTP domain 1 */
+	addr.sin_addr = iaddr;
+	imr.imr_multiaddr.s_addr = iaddr.s_addr;
+	imr.imr_interface.s_addr =
+		((struct sockaddr_in *)&device.ifr_addr)->sin_addr.s_addr;
+	if (setsockopt(sock, IPPROTO_IP, IP_MULTICAST_IF,
+		       &imr.imr_interface.s_addr, sizeof(struct in_addr)) < 0)
+		bail("set multicast");
+
+	/* join multicast group, loop our own packet */
+	if (setsockopt(sock, IPPROTO_IP, IP_ADD_MEMBERSHIP,
+		       &imr, sizeof(struct ip_mreq)) < 0)
+		bail("join multicast group");
+
+	if (setsockopt(sock, IPPROTO_IP, IP_MULTICAST_LOOP,
+		       &ip_multicast_loop, sizeof(enabled)) < 0) {
+		bail("loop multicast");
+	}
+
+	/* set socket options for time stamping */
+	if (so_timestamp &&
+		setsockopt(sock, SOL_SOCKET, SO_TIMESTAMP,
+			   &enabled, sizeof(enabled)) < 0)
+		bail("setsockopt SO_TIMESTAMP");
+
+	if (so_timestampns &&
+		setsockopt(sock, SOL_SOCKET, SO_TIMESTAMPNS,
+			   &enabled, sizeof(enabled)) < 0)
+		bail("setsockopt SO_TIMESTAMPNS");
+
+	if (so_timestamping_flags &&
+		setsockopt(sock, SOL_SOCKET, SO_TIMESTAMPING,
+			   &so_timestamping_flags,
+			   sizeof(so_timestamping_flags)) < 0)
+		bail("setsockopt SO_TIMESTAMPING");
+
+	/* request IP_PKTINFO for debugging purposes */
+	if (setsockopt(sock, SOL_IP, IP_PKTINFO,
+		       &enabled, sizeof(enabled)) < 0)
+		printf("%s: %s\n", "setsockopt IP_PKTINFO", strerror(errno));
+
+	/* verify socket options */
+	len = sizeof(val);
+	if (getsockopt(sock, SOL_SOCKET, SO_TIMESTAMP, &val, &len) < 0)
+		printf("%s: %s\n", "getsockopt SO_TIMESTAMP", strerror(errno));
+	else
+		printf("SO_TIMESTAMP %d\n", val);
+
+	if (getsockopt(sock, SOL_SOCKET, SO_TIMESTAMPNS, &val, &len) < 0)
+		printf("%s: %s\n", "getsockopt SO_TIMESTAMPNS",
+		       strerror(errno));
+	else
+		printf("SO_TIMESTAMPNS %d\n", val);
+
+	if (getsockopt(sock, SOL_SOCKET, SO_TIMESTAMPING, &val, &len) < 0) {
+		printf("%s: %s\n", "getsockopt SO_TIMESTAMPING",
+		       strerror(errno));
+	} else {
+		printf("SO_TIMESTAMPING %d\n", val);
+		if (val != so_timestamping_flags)
+			printf("   not the expected value %d\n",
+			       so_timestamping_flags);
+	}
+
+	/* send packets forever every five seconds */
+	gettimeofday(&next, 0);
+	next.tv_sec = (next.tv_sec + 1) / 5 * 5;
+	next.tv_usec = 0;
+	while (1) {
+		struct timeval now;
+		struct timeval delta;
+		long delta_us;
+		int res;
+		fd_set readfs, errorfs;
+
+		gettimeofday(&now, 0);
+		delta_us = (long)(next.tv_sec - now.tv_sec) * 1000000 +
+			(long)(next.tv_usec - now.tv_usec);
+		if (delta_us > 0) {
+			/* continue waiting for timeout or data */
+			delta.tv_sec = delta_us / 1000000;
+			delta.tv_usec = delta_us % 1000000;
+
+			FD_ZERO(&readfs);
+			FD_ZERO(&errorfs);
+			FD_SET(sock, &readfs);
+			FD_SET(sock, &errorfs);
+			printf("%ld.%06ld: select %ldus\n",
+			       (long)now.tv_sec, (long)now.tv_usec,
+			       delta_us);
+			res = select(sock + 1, &readfs, 0, &errorfs, &delta);
+			gettimeofday(&now, 0);
+			printf("%ld.%06ld: select returned: %d, %s\n",
+			       (long)now.tv_sec, (long)now.tv_usec,
+			       res,
+			       res < 0 ? strerror(errno) : "success");
+			if (res > 0) {
+				if (FD_ISSET(sock, &readfs))
+					printf("ready for reading\n");
+				if (FD_ISSET(sock, &errorfs))
+					printf("has error\n");
+				recvpacket(sock, 0,
+					   siocgstamp,
+					   siocgstampns);
+				recvpacket(sock, MSG_ERRQUEUE,
+					   siocgstamp,
+					   siocgstampns);
+			}
+		} else {
+			/* write one packet */
+			sendpacket(sock,
+				   (struct sockaddr *)&addr,
+				   sizeof(addr));
+			next.tv_sec += 5;
+			continue;
+		}
+	}
+
+	return 0;
+}
diff --git a/arch/alpha/include/asm/socket.h b/arch/alpha/include/asm/socket.h
index a1057c2d95e7..3641ec1452f4 100644
--- a/arch/alpha/include/asm/socket.h
+++ b/arch/alpha/include/asm/socket.h
@@ -62,6 +62,9 @@
 
 #define SO_MARK			36
 
+#define SO_TIMESTAMPING		37
+#define SCM_TIMESTAMPING	SO_TIMESTAMPING
+
 /* O_NONBLOCK clashes with the bits used for socket types.  Therefore we
  * have to define SOCK_NONBLOCK to a different value here.
  */
diff --git a/arch/arm/include/asm/socket.h b/arch/arm/include/asm/socket.h
index 6817be9573a6..537de4e0ef50 100644
--- a/arch/arm/include/asm/socket.h
+++ b/arch/arm/include/asm/socket.h
@@ -54,4 +54,7 @@
 
 #define SO_MARK			36
 
+#define SO_TIMESTAMPING		37
+#define SCM_TIMESTAMPING	SO_TIMESTAMPING
+
 #endif /* _ASM_SOCKET_H */
diff --git a/arch/avr32/include/asm/socket.h b/arch/avr32/include/asm/socket.h
index 35863f260929..04c860619700 100644
--- a/arch/avr32/include/asm/socket.h
+++ b/arch/avr32/include/asm/socket.h
@@ -54,4 +54,7 @@
 
 #define SO_MARK			36
 
+#define SO_TIMESTAMPING		37
+#define SCM_TIMESTAMPING	SO_TIMESTAMPING
+
 #endif /* __ASM_AVR32_SOCKET_H */
diff --git a/arch/blackfin/include/asm/socket.h b/arch/blackfin/include/asm/socket.h
index 2ca702e44d47..fac7fe9e1f8a 100644
--- a/arch/blackfin/include/asm/socket.h
+++ b/arch/blackfin/include/asm/socket.h
@@ -53,4 +53,7 @@
 
 #define SO_MARK			36
 
+#define SO_TIMESTAMPING		37
+#define SCM_TIMESTAMPING	SO_TIMESTAMPING
+
 #endif				/* _ASM_SOCKET_H */
diff --git a/arch/cris/include/asm/socket.h b/arch/cris/include/asm/socket.h
index 9df0ca82f5de..d5cf74005408 100644
--- a/arch/cris/include/asm/socket.h
+++ b/arch/cris/include/asm/socket.h
@@ -56,6 +56,9 @@
 
 #define SO_MARK			36
 
+#define SO_TIMESTAMPING		37
+#define SCM_TIMESTAMPING	SO_TIMESTAMPING
+
 #endif /* _ASM_SOCKET_H */
 
 
diff --git a/arch/h8300/include/asm/socket.h b/arch/h8300/include/asm/socket.h
index da2520dbf254..602518a70a1a 100644
--- a/arch/h8300/include/asm/socket.h
+++ b/arch/h8300/include/asm/socket.h
@@ -54,4 +54,7 @@
 
 #define SO_MARK			36
 
+#define SO_TIMESTAMPING		37
+#define SCM_TIMESTAMPING	SO_TIMESTAMPING
+
 #endif /* _ASM_SOCKET_H */
diff --git a/arch/ia64/include/asm/socket.h b/arch/ia64/include/asm/socket.h
index d5ef0aa3e312..745421225ec6 100644
--- a/arch/ia64/include/asm/socket.h
+++ b/arch/ia64/include/asm/socket.h
@@ -63,4 +63,7 @@
 
 #define SO_MARK			36
 
+#define SO_TIMESTAMPING		37
+#define SCM_TIMESTAMPING	SO_TIMESTAMPING
+
 #endif /* _ASM_IA64_SOCKET_H */
diff --git a/arch/m68k/include/asm/socket.h b/arch/m68k/include/asm/socket.h
index dbc64e92c41a..ca87f938b03f 100644
--- a/arch/m68k/include/asm/socket.h
+++ b/arch/m68k/include/asm/socket.h
@@ -54,4 +54,7 @@
 
 #define SO_MARK			36
 
+#define SO_TIMESTAMPING		37
+#define SCM_TIMESTAMPING	SO_TIMESTAMPING
+
 #endif /* _ASM_SOCKET_H */
diff --git a/arch/mips/include/asm/socket.h b/arch/mips/include/asm/socket.h
index facc2d7a87ca..2abca1780169 100644
--- a/arch/mips/include/asm/socket.h
+++ b/arch/mips/include/asm/socket.h
@@ -75,6 +75,9 @@ To add: #define SO_REUSEPORT 0x0200	/* Allow local address and port reuse.  */
 
 #define SO_MARK			36
 
+#define SO_TIMESTAMPING		37
+#define SCM_TIMESTAMPING	SO_TIMESTAMPING
+
 #ifdef __KERNEL__
 
 /** sock_type - Socket types
diff --git a/arch/parisc/include/asm/socket.h b/arch/parisc/include/asm/socket.h
index fba402c95ac2..885472bf7b78 100644
--- a/arch/parisc/include/asm/socket.h
+++ b/arch/parisc/include/asm/socket.h
@@ -54,6 +54,9 @@
 
 #define SO_MARK			0x401f
 
+#define SO_TIMESTAMPING		0x4020
+#define SCM_TIMESTAMPING	SO_TIMESTAMPING
+
 /* O_NONBLOCK clashes with the bits used for socket types.  Therefore we
  * have to define SOCK_NONBLOCK to a different value here.
  */
diff --git a/arch/powerpc/include/asm/socket.h b/arch/powerpc/include/asm/socket.h
index f5a4e168e498..1e5cfad0e3f7 100644
--- a/arch/powerpc/include/asm/socket.h
+++ b/arch/powerpc/include/asm/socket.h
@@ -61,4 +61,7 @@
 
 #define SO_MARK			36
 
+#define SO_TIMESTAMPING		37
+#define SCM_TIMESTAMPING	SO_TIMESTAMPING
+
 #endif	/* _ASM_POWERPC_SOCKET_H */
diff --git a/arch/s390/include/asm/socket.h b/arch/s390/include/asm/socket.h
index c786ab623b2d..02330c50241b 100644
--- a/arch/s390/include/asm/socket.h
+++ b/arch/s390/include/asm/socket.h
@@ -62,4 +62,7 @@
 
 #define SO_MARK			36
 
+#define SO_TIMESTAMPING		37
+#define SCM_TIMESTAMPING	SO_TIMESTAMPING
+
 #endif /* _ASM_SOCKET_H */
diff --git a/arch/sh/include/asm/socket.h b/arch/sh/include/asm/socket.h
index 6d4bf6512959..345653b96826 100644
--- a/arch/sh/include/asm/socket.h
+++ b/arch/sh/include/asm/socket.h
@@ -54,4 +54,7 @@
 
 #define SO_MARK			36
 
+#define SO_TIMESTAMPING		37
+#define SCM_TIMESTAMPING	SO_TIMESTAMPING
+
 #endif /* __ASM_SH_SOCKET_H */
diff --git a/arch/sparc/include/asm/socket.h b/arch/sparc/include/asm/socket.h
index bf50d0c2d583..982a12f959f4 100644
--- a/arch/sparc/include/asm/socket.h
+++ b/arch/sparc/include/asm/socket.h
@@ -50,6 +50,9 @@
 
 #define SO_MARK			0x0022
 
+#define SO_TIMESTAMPING		0x0023
+#define SCM_TIMESTAMPING	SO_TIMESTAMPING
+
 /* Security levels - as per NRL IPv6 - don't actually do anything */
 #define SO_SECURITY_AUTHENTICATION		0x5001
 #define SO_SECURITY_ENCRYPTION_TRANSPORT	0x5002
diff --git a/arch/x86/include/asm/socket.h b/arch/x86/include/asm/socket.h
index 8ab9cc8b2ecc..ca8bf2cd0ba9 100644
--- a/arch/x86/include/asm/socket.h
+++ b/arch/x86/include/asm/socket.h
@@ -54,4 +54,7 @@
 
 #define SO_MARK			36
 
+#define SO_TIMESTAMPING		37
+#define SCM_TIMESTAMPING	SO_TIMESTAMPING
+
 #endif /* _ASM_X86_SOCKET_H */
diff --git a/arch/xtensa/include/asm/socket.h b/arch/xtensa/include/asm/socket.h
index 6100682b1da2..dd1a7a4a1cea 100644
--- a/arch/xtensa/include/asm/socket.h
+++ b/arch/xtensa/include/asm/socket.h
@@ -65,4 +65,7 @@
 
 #define SO_MARK			36
 
+#define SO_TIMESTAMPING		37
+#define SCM_TIMESTAMPING	SO_TIMESTAMPING
+
 #endif	/* _XTENSA_SOCKET_H */
diff --git a/include/asm-frv/socket.h b/include/asm-frv/socket.h
index e51ca67b9356..57c3d4054e8b 100644
--- a/include/asm-frv/socket.h
+++ b/include/asm-frv/socket.h
@@ -54,5 +54,8 @@
 
 #define SO_MARK			36
 
+#define SO_TIMESTAMPING		37
+#define SCM_TIMESTAMPING	SO_TIMESTAMPING
+
 #endif /* _ASM_SOCKET_H */
 
diff --git a/include/asm-m32r/socket.h b/include/asm-m32r/socket.h
index 9a0e20012224..be7ed589af5c 100644
--- a/include/asm-m32r/socket.h
+++ b/include/asm-m32r/socket.h
@@ -54,4 +54,7 @@
 
 #define SO_MARK			36
 
+#define SO_TIMESTAMPING		37
+#define SCM_TIMESTAMPING	SO_TIMESTAMPING
+
 #endif /* _ASM_M32R_SOCKET_H */
diff --git a/include/asm-mn10300/socket.h b/include/asm-mn10300/socket.h
index 80af9c4ccad7..fb5daf438ec9 100644
--- a/include/asm-mn10300/socket.h
+++ b/include/asm-mn10300/socket.h
@@ -54,4 +54,7 @@
 
 #define SO_MARK			36
 
+#define SO_TIMESTAMPING		37
+#define SCM_TIMESTAMPING	SO_TIMESTAMPING
+
 #endif /* _ASM_SOCKET_H */
diff --git a/include/linux/errqueue.h b/include/linux/errqueue.h
index ceb1454b6977..ec12cc74366f 100644
--- a/include/linux/errqueue.h
+++ b/include/linux/errqueue.h
@@ -18,6 +18,7 @@ struct sock_extended_err
 #define SO_EE_ORIGIN_LOCAL	1
 #define SO_EE_ORIGIN_ICMP	2
 #define SO_EE_ORIGIN_ICMP6	3
+#define SO_EE_ORIGIN_TIMESTAMPING 4
 
 #define SO_EE_OFFENDER(ee)	((struct sockaddr*)((ee)+1))
 
diff --git a/include/linux/net_tstamp.h b/include/linux/net_tstamp.h
new file mode 100644
index 000000000000..a3b8546354ac
--- /dev/null
+++ b/include/linux/net_tstamp.h
@@ -0,0 +1,104 @@
+/*
+ * Userspace API for hardware time stamping of network packets
+ *
+ * Copyright (C) 2008,2009 Intel Corporation
+ * Author: Patrick Ohly <patrick.ohly@intel.com>
+ *
+ */
+
+#ifndef _NET_TIMESTAMPING_H
+#define _NET_TIMESTAMPING_H
+
+#include <linux/socket.h>   /* for SO_TIMESTAMPING */
+
+/* SO_TIMESTAMPING gets an integer bit field comprised of these values */
+enum {
+	SOF_TIMESTAMPING_TX_HARDWARE = (1<<0),
+	SOF_TIMESTAMPING_TX_SOFTWARE = (1<<1),
+	SOF_TIMESTAMPING_RX_HARDWARE = (1<<2),
+	SOF_TIMESTAMPING_RX_SOFTWARE = (1<<3),
+	SOF_TIMESTAMPING_SOFTWARE = (1<<4),
+	SOF_TIMESTAMPING_SYS_HARDWARE = (1<<5),
+	SOF_TIMESTAMPING_RAW_HARDWARE = (1<<6),
+	SOF_TIMESTAMPING_MASK =
+	(SOF_TIMESTAMPING_RAW_HARDWARE - 1) |
+	SOF_TIMESTAMPING_RAW_HARDWARE
+};
+
+/**
+ * struct hwtstamp_config - %SIOCSHWTSTAMP parameter
+ *
+ * @flags:	no flags defined right now, must be zero
+ * @tx_type:	one of HWTSTAMP_TX_*
+ * @rx_type:	one of one of HWTSTAMP_FILTER_*
+ *
+ * %SIOCSHWTSTAMP expects a &struct ifreq with a ifr_data pointer to
+ * this structure. dev_ifsioc() in the kernel takes care of the
+ * translation between 32 bit userspace and 64 bit kernel. The
+ * structure is intentionally chosen so that it has the same layout on
+ * 32 and 64 bit systems, don't break this!
+ */
+struct hwtstamp_config {
+	int flags;
+	int tx_type;
+	int rx_filter;
+};
+
+/* possible values for hwtstamp_config->tx_type */
+enum {
+	/*
+	 * No outgoing packet will need hardware time stamping;
+	 * should a packet arrive which asks for it, no hardware
+	 * time stamping will be done.
+	 */
+	HWTSTAMP_TX_OFF,
+
+	/*
+	 * Enables hardware time stamping for outgoing packets;
+	 * the sender of the packet decides which are to be
+	 * time stamped by setting %SOF_TIMESTAMPING_TX_SOFTWARE
+	 * before sending the packet.
+	 */
+	HWTSTAMP_TX_ON,
+};
+
+/* possible values for hwtstamp_config->rx_filter */
+enum {
+	/* time stamp no incoming packet at all */
+	HWTSTAMP_FILTER_NONE,
+
+	/* time stamp any incoming packet */
+	HWTSTAMP_FILTER_ALL,
+
+	/* return value: time stamp all packets requested plus some others */
+	HWTSTAMP_FILTER_SOME,
+
+	/* PTP v1, UDP, any kind of event packet */
+	HWTSTAMP_FILTER_PTP_V1_L4_EVENT,
+	/* PTP v1, UDP, Sync packet */
+	HWTSTAMP_FILTER_PTP_V1_L4_SYNC,
+	/* PTP v1, UDP, Delay_req packet */
+	HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ,
+	/* PTP v2, UDP, any kind of event packet */
+	HWTSTAMP_FILTER_PTP_V2_L4_EVENT,
+	/* PTP v2, UDP, Sync packet */
+	HWTSTAMP_FILTER_PTP_V2_L4_SYNC,
+	/* PTP v2, UDP, Delay_req packet */
+	HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ,
+
+	/* 802.AS1, Ethernet, any kind of event packet */
+	HWTSTAMP_FILTER_PTP_V2_L2_EVENT,
+	/* 802.AS1, Ethernet, Sync packet */
+	HWTSTAMP_FILTER_PTP_V2_L2_SYNC,
+	/* 802.AS1, Ethernet, Delay_req packet */
+	HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ,
+
+	/* PTP v2/802.AS1, any layer, any kind of event packet */
+	HWTSTAMP_FILTER_PTP_V2_EVENT,
+	/* PTP v2/802.AS1, any layer, Sync packet */
+	HWTSTAMP_FILTER_PTP_V2_SYNC,
+	/* PTP v2/802.AS1, any layer, Delay_req packet */
+	HWTSTAMP_FILTER_PTP_V2_DELAY_REQ,
+};
+
+#endif /* _NET_TIMESTAMPING_H */
diff --git a/include/linux/sockios.h b/include/linux/sockios.h
index abef7596655a..241f179347d9 100644
--- a/include/linux/sockios.h
+++ b/include/linux/sockios.h
@@ -122,6 +122,9 @@
 #define SIOCBRADDIF	0x89a2		/* add interface to bridge      */
 #define SIOCBRDELIF	0x89a3		/* remove interface from bridge */
 
+/* hardware time stamping: parameters in linux/net_tstamp.h */
+#define SIOCSHWTSTAMP   0x89b0
+
 /* Device private ioctl calls */
 
 /*
-- 
cgit v1.2.3


From ac45f602ee3d1b6f326f68bc0c2591ceebf05ba4 Mon Sep 17 00:00:00 2001
From: Patrick Ohly <patrick.ohly@intel.com>
Date: Thu, 12 Feb 2009 05:03:37 +0000
Subject: net: infrastructure for hardware time stamping

The additional per-packet information (16 bytes for time stamps, 1
byte for flags) is stored for all packets in the skb_shared_info
struct. This implementation detail is hidden from users of that
information via skb_* accessor functions. A separate struct resp.
union is used for the additional information so that it can be
stored/copied easily outside of skb_shared_info.

Compared to previous implementations (reusing the tstamp field
depending on the context, optional additional structures) this
is the simplest solution. It does not extend sk_buff itself.

TX time stamping is implemented in software if the device driver
doesn't support hardware time stamping.

The new semantic for hardware/software time stamping around
ndo_start_xmit() is based on two assumptions about existing
network device drivers which don't support hardware time
stamping and know nothing about it:
 - they leave the new skb_shared_tx unmodified
 - the keep the connection to the originating socket in skb->sk
   alive, i.e., don't call skb_orphan()

Given that skb_shared_tx is new, the first assumption is safe.
The second is only true for some drivers. As a result, software
TX time stamping currently works with the bnx2 driver, but not
with the unmodified igb driver (the two drivers this patch series
was tested with).

Signed-off-by: Patrick Ohly <patrick.ohly@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 91 +++++++++++++++++++++++++++++++++++++++++++++++++-
 net/core/dev.c         | 32 ++++++++++++++++--
 net/core/skbuff.c      | 41 +++++++++++++++++++++++
 3 files changed, 161 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 924700844580..f96bc91bf0a3 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -132,6 +132,57 @@ struct skb_frag_struct {
 	__u32 size;
 };
 
+#define HAVE_HW_TIME_STAMP
+
+/**
+ * skb_shared_hwtstamps - hardware time stamps
+ *
+ * @hwtstamp:	hardware time stamp transformed into duration
+ *		since arbitrary point in time
+ * @syststamp:	hwtstamp transformed to system time base
+ *
+ * Software time stamps generated by ktime_get_real() are stored in
+ * skb->tstamp. The relation between the different kinds of time
+ * stamps is as follows:
+ *
+ * syststamp and tstamp can be compared against each other in
+ * arbitrary combinations.  The accuracy of a
+ * syststamp/tstamp/"syststamp from other device" comparison is
+ * limited by the accuracy of the transformation into system time
+ * base. This depends on the device driver and its underlying
+ * hardware.
+ *
+ * hwtstamps can only be compared against other hwtstamps from
+ * the same device.
+ *
+ * This structure is attached to packets as part of the
+ * &skb_shared_info. Use skb_hwtstamps() to get a pointer.
+ */
+struct skb_shared_hwtstamps {
+	ktime_t	hwtstamp;
+	ktime_t	syststamp;
+};
+
+/**
+ * skb_shared_tx - instructions for time stamping of outgoing packets
+ *
+ * @hardware:		generate hardware time stamp
+ * @software:		generate software time stamp
+ * @in_progress:	device driver is going to provide
+ *			hardware time stamp
+ *
+ * These flags are attached to packets as part of the
+ * &skb_shared_info. Use skb_tx() to get a pointer.
+ */
+union skb_shared_tx {
+	struct {
+		__u8	hardware:1,
+			software:1,
+			in_progress:1;
+	};
+	__u8 flags;
+};
+
 /* This data is invariant across clones and lives at
  * the end of the header data, ie. at skb->end.
  */
@@ -143,10 +194,12 @@ struct skb_shared_info {
 	unsigned short	gso_segs;
 	unsigned short  gso_type;
 	__be32          ip6_frag_id;
+	union skb_shared_tx tx_flags;
 #ifdef CONFIG_HAS_DMA
 	unsigned int	num_dma_maps;
 #endif
 	struct sk_buff	*frag_list;
+	struct skb_shared_hwtstamps hwtstamps;
 	skb_frag_t	frags[MAX_SKB_FRAGS];
 #ifdef CONFIG_HAS_DMA
 	dma_addr_t	dma_maps[MAX_SKB_FRAGS + 1];
@@ -465,6 +518,16 @@ static inline unsigned char *skb_end_pointer(const struct sk_buff *skb)
 /* Internal */
 #define skb_shinfo(SKB)	((struct skb_shared_info *)(skb_end_pointer(SKB)))
 
+static inline struct skb_shared_hwtstamps *skb_hwtstamps(struct sk_buff *skb)
+{
+	return &skb_shinfo(skb)->hwtstamps;
+}
+
+static inline union skb_shared_tx *skb_tx(struct sk_buff *skb)
+{
+	return &skb_shinfo(skb)->tx_flags;
+}
+
 /**
  *	skb_queue_empty - check if a queue is empty
  *	@list: queue head
@@ -1730,6 +1793,11 @@ static inline void skb_copy_to_linear_data_offset(struct sk_buff *skb,
 
 extern void skb_init(void);
 
+static inline ktime_t skb_get_ktime(const struct sk_buff *skb)
+{
+	return skb->tstamp;
+}
+
 /**
  *	skb_get_timestamp - get timestamp from a skb
  *	@skb: skb to get stamp from
@@ -1739,11 +1807,18 @@ extern void skb_init(void);
  *	This function converts the offset back to a struct timeval and stores
  *	it in stamp.
  */
-static inline void skb_get_timestamp(const struct sk_buff *skb, struct timeval *stamp)
+static inline void skb_get_timestamp(const struct sk_buff *skb,
+				     struct timeval *stamp)
 {
 	*stamp = ktime_to_timeval(skb->tstamp);
 }
 
+static inline void skb_get_timestampns(const struct sk_buff *skb,
+				       struct timespec *stamp)
+{
+	*stamp = ktime_to_timespec(skb->tstamp);
+}
+
 static inline void __net_timestamp(struct sk_buff *skb)
 {
 	skb->tstamp = ktime_get_real();
@@ -1759,6 +1834,20 @@ static inline ktime_t net_invalid_timestamp(void)
 	return ktime_set(0, 0);
 }
 
+/**
+ * skb_tstamp_tx - queue clone of skb with send time stamps
+ * @orig_skb:	the original outgoing packet
+ * @hwtstamps:	hardware time stamps, may be NULL if not available
+ *
+ * If the skb has a socket associated, then this function clones the
+ * skb (thus sharing the actual data and optional structures), stores
+ * the optional hardware time stamping information (if non NULL) or
+ * generates a software time stamp (otherwise), then queues the clone
+ * to the error queue of the socket.  Errors are silently ignored.
+ */
+extern void skb_tstamp_tx(struct sk_buff *orig_skb,
+			struct skb_shared_hwtstamps *hwtstamps);
+
 extern __sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len);
 extern __sum16 __skb_checksum_complete(struct sk_buff *skb);
 
diff --git a/net/core/dev.c b/net/core/dev.c
index 1e27a67df242..d20c28e839d3 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1672,10 +1672,21 @@ static int dev_gso_segment(struct sk_buff *skb)
 	return 0;
 }
 
+static void tstamp_tx(struct sk_buff *skb)
+{
+	union skb_shared_tx *shtx =
+		skb_tx(skb);
+	if (unlikely(shtx->software &&
+			!shtx->in_progress)) {
+		skb_tstamp_tx(skb, NULL);
+	}
+}
+
 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 			struct netdev_queue *txq)
 {
 	const struct net_device_ops *ops = dev->netdev_ops;
+	int rc;
 
 	prefetch(&dev->netdev_ops->ndo_start_xmit);
 	if (likely(!skb->next)) {
@@ -1689,13 +1700,29 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 				goto gso;
 		}
 
-		return ops->ndo_start_xmit(skb, dev);
+		rc = ops->ndo_start_xmit(skb, dev);
+		/*
+		 * TODO: if skb_orphan() was called by
+		 * dev->hard_start_xmit() (for example, the unmodified
+		 * igb driver does that; bnx2 doesn't), then
+		 * skb_tx_software_timestamp() will be unable to send
+		 * back the time stamp.
+		 *
+		 * How can this be prevented? Always create another
+		 * reference to the socket before calling
+		 * dev->hard_start_xmit()? Prevent that skb_orphan()
+		 * does anything in dev->hard_start_xmit() by clearing
+		 * the skb destructor before the call and restoring it
+		 * afterwards, then doing the skb_orphan() ourselves?
+		 */
+		if (likely(!rc))
+			tstamp_tx(skb);
+		return rc;
 	}
 
 gso:
 	do {
 		struct sk_buff *nskb = skb->next;
-		int rc;
 
 		skb->next = nskb->next;
 		nskb->next = NULL;
@@ -1705,6 +1732,7 @@ gso:
 			skb->next = nskb;
 			return rc;
 		}
+		tstamp_tx(skb);
 		if (unlikely(netif_tx_queue_stopped(txq) && skb->next))
 			return NETDEV_TX_BUSY;
 	} while (skb->next);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index ab7d2e9f02fa..e5a8351ff12d 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -55,6 +55,7 @@
 #include <linux/rtnetlink.h>
 #include <linux/init.h>
 #include <linux/scatterlist.h>
+#include <linux/errqueue.h>
 
 #include <net/protocol.h>
 #include <net/dst.h>
@@ -215,7 +216,9 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
 	shinfo->gso_segs = 0;
 	shinfo->gso_type = 0;
 	shinfo->ip6_frag_id = 0;
+	shinfo->tx_flags.flags = 0;
 	shinfo->frag_list = NULL;
+	memset(&shinfo->hwtstamps, 0, sizeof(shinfo->hwtstamps));
 
 	if (fclone) {
 		struct sk_buff *child = skb + 1;
@@ -2945,6 +2948,44 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer)
 }
 EXPORT_SYMBOL_GPL(skb_cow_data);
 
+void skb_tstamp_tx(struct sk_buff *orig_skb,
+		struct skb_shared_hwtstamps *hwtstamps)
+{
+	struct sock *sk = orig_skb->sk;
+	struct sock_exterr_skb *serr;
+	struct sk_buff *skb;
+	int err;
+
+	if (!sk)
+		return;
+
+	skb = skb_clone(orig_skb, GFP_ATOMIC);
+	if (!skb)
+		return;
+
+	if (hwtstamps) {
+		*skb_hwtstamps(skb) =
+			*hwtstamps;
+	} else {
+		/*
+		 * no hardware time stamps available,
+		 * so keep the skb_shared_tx and only
+		 * store software time stamp
+		 */
+		skb->tstamp = ktime_get_real();
+	}
+
+	serr = SKB_EXT_ERR(skb);
+	memset(serr, 0, sizeof(*serr));
+	serr->ee.ee_errno = ENOMSG;
+	serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
+	err = sock_queue_err_skb(sk, skb);
+	if (err)
+		kfree_skb(skb);
+}
+EXPORT_SYMBOL_GPL(skb_tstamp_tx);
+
+
 /**
  * skb_partial_csum_set - set up and verify partial csum values for packet
  * @skb: the skb to set
-- 
cgit v1.2.3


From 4458f04c02a46c679a90ef71f866a415c192deb4 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Fri, 13 Feb 2009 08:33:42 +0000
Subject: sctp: Clean up sctp checksumming code

The sctp crc32c checksum is always generated in little endian.
So, we clean up the code to treat it as little endian and remove
all the __force casts.

Suggested by Herbert Xu.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/sctp.h        |  2 +-
 include/net/sctp/checksum.h | 14 +++++++-------
 net/sctp/input.c            | 11 ++++++-----
 net/sctp/output.c           | 14 ++++++--------
 4 files changed, 20 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sctp.h b/include/linux/sctp.h
index bd50b371ffaa..c2731bfe04d8 100644
--- a/include/linux/sctp.h
+++ b/include/linux/sctp.h
@@ -60,7 +60,7 @@ typedef struct sctphdr {
 	__be16 source;
 	__be16 dest;
 	__be32 vtag;
-	__be32 checksum;
+	__le32 checksum;
 } __attribute__((packed)) sctp_sctphdr_t;
 
 #ifdef __KERNEL__
diff --git a/include/net/sctp/checksum.h b/include/net/sctp/checksum.h
index 2fec3c366e81..befc8d2a1b9f 100644
--- a/include/net/sctp/checksum.h
+++ b/include/net/sctp/checksum.h
@@ -46,14 +46,14 @@
 #include <net/sctp/sctp.h>
 #include <linux/crc32c.h>
 
-static inline __be32 sctp_crc32c(__be32 crc, u8 *buffer, u16 length)
+static inline __u32 sctp_crc32c(__u32 crc, u8 *buffer, u16 length)
 {
-	return (__force __be32)crc32c((__force u32)crc, buffer, length);
+	return crc32c(crc, buffer, length);
 }
 
-static inline __be32 sctp_start_cksum(__u8 *buffer, __u16 length)
+static inline __u32 sctp_start_cksum(__u8 *buffer, __u16 length)
 {
-	__be32 crc = ~cpu_to_be32(0);
+	__u32 crc = ~(__u32)0;
 	__u8  zero[sizeof(__u32)] = {0};
 
 	/* Optimize this routine to be SCTP specific, knowing how
@@ -72,12 +72,12 @@ static inline __be32 sctp_start_cksum(__u8 *buffer, __u16 length)
 	return crc;
 }
 
-static inline __be32 sctp_update_cksum(__u8 *buffer, __u16 length, __be32 crc32)
+static inline __u32 sctp_update_cksum(__u8 *buffer, __u16 length, __u32 crc32)
 {
 	return sctp_crc32c(crc32, buffer, length);
 }
 
-static inline __be32 sctp_end_cksum(__be32 crc32)
+static inline __le32 sctp_end_cksum(__be32 crc32)
 {
-	return (__force __be32)~cpu_to_le32((__force u32)crc32);
+	return cpu_to_le32(~crc32);
 }
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 693fd0804810..d2e98803ffe3 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -83,14 +83,15 @@ static inline int sctp_rcv_checksum(struct sk_buff *skb)
 {
 	struct sk_buff *list = skb_shinfo(skb)->frag_list;
 	struct sctphdr *sh = sctp_hdr(skb);
-	__be32 cmp = sh->checksum;
-	__be32 val = sctp_start_cksum((__u8 *)sh, skb_headlen(skb));
+	__le32 cmp = sh->checksum;
+	__le32 val;
+	__u32 tmp = sctp_start_cksum((__u8 *)sh, skb_headlen(skb));
 
 	for (; list; list = list->next)
-		val = sctp_update_cksum((__u8 *)list->data, skb_headlen(list),
-					val);
+		tmp = sctp_update_cksum((__u8 *)list->data, skb_headlen(list),
+					tmp);
 
-	val = sctp_end_cksum(val);
+	val = sctp_end_cksum(tmp);
 
 	if (val != cmp) {
 		/* CRC failure, dump it. */
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 2d65b7a7330b..07d58903a746 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -367,7 +367,6 @@ int sctp_packet_transmit(struct sctp_packet *packet)
 	struct sctp_transport *tp = packet->transport;
 	struct sctp_association *asoc = tp->asoc;
 	struct sctphdr *sh;
-	__be32 crc32 = cpu_to_be32(0);
 	struct sk_buff *nskb;
 	struct sctp_chunk *chunk, *tmp;
 	struct sock *sk;
@@ -532,16 +531,15 @@ int sctp_packet_transmit(struct sctp_packet *packet)
 	 * by CRC32-C as described in <draft-ietf-tsvwg-sctpcsum-02.txt>.
 	 */
 	if (!sctp_checksum_disable && !(dst->dev->features & NETIF_F_NO_CSUM)) {
-		crc32 = sctp_start_cksum((__u8 *)sh, cksum_buf_len);
-		crc32 = sctp_end_cksum(crc32);
+		__u32 crc32 = sctp_start_cksum((__u8 *)sh, cksum_buf_len);
+
+		/* 3) Put the resultant value into the checksum field in the
+		 *    common header, and leave the rest of the bits unchanged.
+		 */
+		sh->checksum = sctp_end_cksum(crc32);
 	} else
 		nskb->ip_summed = CHECKSUM_UNNECESSARY;
 
-	/* 3) Put the resultant value into the checksum field in the
-	 *    common header, and leave the rest of the bits unchanged.
-	 */
-	sh->checksum = crc32;
-
 	/* IP layer ECN support
 	 * From RFC 2481
 	 *  "The ECN-Capable Transport (ECT) bit would be set by the
-- 
cgit v1.2.3


From 886a63e865eb346ab20572e802fc3118cb9aee14 Mon Sep 17 00:00:00 2001
From: Hannes Eder <hannes@hanneseder.net>
Date: Sat, 14 Feb 2009 11:36:20 +0000
Subject: drivers/net/hamradio: fix sparse warnings: fix signedness

Fix this sparse warnings:
  drivers/net/hamradio/hdlcdrv.c:274:34: warning: incorrect type in argument 2 (different signedness)
  drivers/net/hamradio/hdlcdrv.c:279:47: warning: incorrect type in argument 2 (different signedness)
  drivers/net/hamradio/hdlcdrv.c:288:39: warning: incorrect type in argument 2 (different signedness)
  drivers/net/hamradio/hdlcdrv.c:300:47: warning: incorrect type in argument 2 (different signedness)

Signed-off-by: Hannes Eder <hannes@hanneseder.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/hdlcdrv.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/hdlcdrv.h b/include/linux/hdlcdrv.h
index 0821bac62b83..c010b4a785b8 100644
--- a/include/linux/hdlcdrv.h
+++ b/include/linux/hdlcdrv.h
@@ -215,7 +215,7 @@ struct hdlcdrv_state {
 
 	struct hdlcdrv_hdlctx {
 		struct hdlcdrv_hdlcbuffer hbuf;
-		long in_hdlc_tx;
+		unsigned long in_hdlc_tx;
 		/*
 		 * 0 = send flags
 		 * 1 = send txtail (flags)
-- 
cgit v1.2.3


From 4a2f965ca5a4e2593744bf75425d85e0e8ff814a Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Wed, 18 Feb 2009 16:29:44 +0100
Subject: netfilter: x_tables: change elements in x_tables

Change to proper type on private pointer rather than anonymous void.
Keep active elements on same cache line.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/x_tables.h | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index c7ee8744d26b..9fac88fc0e72 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -349,9 +349,6 @@ struct xt_table
 {
 	struct list_head list;
 
-	/* A unique name... */
-	const char name[XT_TABLE_MAXNAMELEN];
-
 	/* What hooks you will enter on */
 	unsigned int valid_hooks;
 
@@ -359,13 +356,15 @@ struct xt_table
 	rwlock_t lock;
 
 	/* Man behind the curtain... */
-	//struct ip6t_table_info *private;
-	void *private;
+	struct xt_table_info *private;
 
 	/* Set this to THIS_MODULE if you are a module, otherwise NULL */
 	struct module *me;
 
 	u_int8_t af;		/* address/protocol family */
+
+	/* A unique name... */
+	const char name[XT_TABLE_MAXNAMELEN];
 };
 
 #include <linux/netfilter_ipv4.h>
-- 
cgit v1.2.3


From 74019224ac34b044b44a31dd89a54e3477db4896 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 18 Feb 2009 12:23:29 +0100
Subject: timers: add mod_timer_pending()

Impact: new timer API

Based on an idea from Martin Josefsson with the help of
Patrick McHardy and Stephen Hemminger:

introduce the mod_timer_pending() API which is a mod_timer()
offspring that is an invariant on already removed timers.

(regular mod_timer() re-activates non-pending timers.)

This is useful for the networking code in that it can
allow unserialized mod_timer_pending() timer-forwarding
calls, but a single del_timer*() will stop the timer
from being reactivated again.

Also while at it:

- optimize the regular mod_timer() path some more, the
  timer-stat and a debug check was needlessly duplicated
  in __mod_timer().

- make the exports come straight after the function, as
  most other exports in timer.c already did.

- eliminate __mod_timer() as an external API, change the
  users to mod_timer().

The regular mod_timer() code path is not impacted
significantly, due to inlining optimizations and due to
the simplifications.

Based-on-patch-from: Stephen Hemminger <shemminger@vyatta.com>
Acked-by: Stephen Hemminger <shemminger@vyatta.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Patrick McHardy <kaber@trash.net>
Cc: netdev@vger.kernel.org
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/powerpc/platforms/cell/spufs/sched.c  |   2 +-
 drivers/infiniband/hw/ipath/ipath_driver.c |   6 +-
 include/linux/timer.h                      |  22 +-----
 kernel/relay.c                             |   2 +-
 kernel/timer.c                             | 110 +++++++++++++++++++----------
 5 files changed, 80 insertions(+), 62 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
index 6a0ad196aeb3..f085369301b1 100644
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -508,7 +508,7 @@ static void __spu_add_to_rq(struct spu_context *ctx)
 		list_add_tail(&ctx->rq, &spu_prio->runq[ctx->prio]);
 		set_bit(ctx->prio, spu_prio->bitmap);
 		if (!spu_prio->nr_waiting++)
-			__mod_timer(&spusched_timer, jiffies + SPUSCHED_TICK);
+			mod_timer(&spusched_timer, jiffies + SPUSCHED_TICK);
 	}
 }
 
diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c
index 69c0ce321b4e..cb9daa6ac029 100644
--- a/drivers/infiniband/hw/ipath/ipath_driver.c
+++ b/drivers/infiniband/hw/ipath/ipath_driver.c
@@ -2715,7 +2715,7 @@ static void ipath_hol_signal_up(struct ipath_devdata *dd)
  * to prevent HoL blocking, then start the HoL timer that
  * periodically continues, then stop procs, so they can detect
  * link down if they want, and do something about it.
- * Timer may already be running, so use __mod_timer, not add_timer.
+ * Timer may already be running, so use mod_timer, not add_timer.
  */
 void ipath_hol_down(struct ipath_devdata *dd)
 {
@@ -2724,7 +2724,7 @@ void ipath_hol_down(struct ipath_devdata *dd)
 	dd->ipath_hol_next = IPATH_HOL_DOWNCONT;
 	dd->ipath_hol_timer.expires = jiffies +
 		msecs_to_jiffies(ipath_hol_timeout_ms);
-	__mod_timer(&dd->ipath_hol_timer, dd->ipath_hol_timer.expires);
+	mod_timer(&dd->ipath_hol_timer, dd->ipath_hol_timer.expires);
 }
 
 /*
@@ -2763,7 +2763,7 @@ void ipath_hol_event(unsigned long opaque)
 	else {
 		dd->ipath_hol_timer.expires = jiffies +
 			msecs_to_jiffies(ipath_hol_timeout_ms);
-		__mod_timer(&dd->ipath_hol_timer,
+		mod_timer(&dd->ipath_hol_timer,
 			dd->ipath_hol_timer.expires);
 	}
 }
diff --git a/include/linux/timer.h b/include/linux/timer.h
index daf9685b861c..e2d662e3416e 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -86,8 +86,8 @@ static inline int timer_pending(const struct timer_list * timer)
 
 extern void add_timer_on(struct timer_list *timer, int cpu);
 extern int del_timer(struct timer_list * timer);
-extern int __mod_timer(struct timer_list *timer, unsigned long expires);
 extern int mod_timer(struct timer_list *timer, unsigned long expires);
+extern int mod_timer_pending(struct timer_list *timer, unsigned long expires);
 
 /*
  * The jiffies value which is added to now, when there is no timer
@@ -146,25 +146,7 @@ static inline void timer_stats_timer_clear_start_info(struct timer_list *timer)
 }
 #endif
 
-/**
- * add_timer - start a timer
- * @timer: the timer to be added
- *
- * The kernel will do a ->function(->data) callback from the
- * timer interrupt at the ->expires point in the future. The
- * current time is 'jiffies'.
- *
- * The timer's ->expires, ->function (and if the handler uses it, ->data)
- * fields must be set prior calling this function.
- *
- * Timers with an ->expires field in the past will be executed in the next
- * timer tick.
- */
-static inline void add_timer(struct timer_list *timer)
-{
-	BUG_ON(timer_pending(timer));
-	__mod_timer(timer, timer->expires);
-}
+extern void add_timer(struct timer_list *timer);
 
 #ifdef CONFIG_SMP
   extern int try_to_del_timer_sync(struct timer_list *timer);
diff --git a/kernel/relay.c b/kernel/relay.c
index 9d79b7854fa6..8f2179c8056f 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -750,7 +750,7 @@ size_t relay_switch_subbuf(struct rchan_buf *buf, size_t length)
 			 * from the scheduler (trying to re-grab
 			 * rq->lock), so defer it.
 			 */
-			__mod_timer(&buf->timer, jiffies + 1);
+			mod_timer(&buf->timer, jiffies + 1);
 	}
 
 	old = buf->data;
diff --git a/kernel/timer.c b/kernel/timer.c
index 13dd64fe143d..9b77fc9a9ac8 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -589,11 +589,14 @@ static struct tvec_base *lock_timer_base(struct timer_list *timer,
 	}
 }
 
-int __mod_timer(struct timer_list *timer, unsigned long expires)
+static inline int
+__mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
 {
 	struct tvec_base *base, *new_base;
 	unsigned long flags;
-	int ret = 0;
+	int ret;
+
+	ret = 0;
 
 	timer_stats_timer_set_start_info(timer);
 	BUG_ON(!timer->function);
@@ -603,6 +606,9 @@ int __mod_timer(struct timer_list *timer, unsigned long expires)
 	if (timer_pending(timer)) {
 		detach_timer(timer, 0);
 		ret = 1;
+	} else {
+		if (pending_only)
+			goto out_unlock;
 	}
 
 	debug_timer_activate(timer);
@@ -629,42 +635,28 @@ int __mod_timer(struct timer_list *timer, unsigned long expires)
 
 	timer->expires = expires;
 	internal_add_timer(base, timer);
+
+out_unlock:
 	spin_unlock_irqrestore(&base->lock, flags);
 
 	return ret;
 }
 
-EXPORT_SYMBOL(__mod_timer);
-
 /**
- * add_timer_on - start a timer on a particular CPU
- * @timer: the timer to be added
- * @cpu: the CPU to start it on
+ * mod_timer_pending - modify a pending timer's timeout
+ * @timer: the pending timer to be modified
+ * @expires: new timeout in jiffies
  *
- * This is not very scalable on SMP. Double adds are not possible.
+ * mod_timer_pending() is the same for pending timers as mod_timer(),
+ * but will not re-activate and modify already deleted timers.
+ *
+ * It is useful for unserialized use of timers.
  */
-void add_timer_on(struct timer_list *timer, int cpu)
+int mod_timer_pending(struct timer_list *timer, unsigned long expires)
 {
-	struct tvec_base *base = per_cpu(tvec_bases, cpu);
-	unsigned long flags;
-
-	timer_stats_timer_set_start_info(timer);
-	BUG_ON(timer_pending(timer) || !timer->function);
-	spin_lock_irqsave(&base->lock, flags);
-	timer_set_base(timer, base);
-	debug_timer_activate(timer);
-	internal_add_timer(base, timer);
-	/*
-	 * Check whether the other CPU is idle and needs to be
-	 * triggered to reevaluate the timer wheel when nohz is
-	 * active. We are protected against the other CPU fiddling
-	 * with the timer by holding the timer base lock. This also
-	 * makes sure that a CPU on the way to idle can not evaluate
-	 * the timer wheel.
-	 */
-	wake_up_idle_cpu(cpu);
-	spin_unlock_irqrestore(&base->lock, flags);
+	return __mod_timer(timer, expires, true);
 }
+EXPORT_SYMBOL(mod_timer_pending);
 
 /**
  * mod_timer - modify a timer's timeout
@@ -688,9 +680,6 @@ void add_timer_on(struct timer_list *timer, int cpu)
  */
 int mod_timer(struct timer_list *timer, unsigned long expires)
 {
-	BUG_ON(!timer->function);
-
-	timer_stats_timer_set_start_info(timer);
 	/*
 	 * This is a common optimization triggered by the
 	 * networking code - if the timer is re-modified
@@ -699,11 +688,61 @@ int mod_timer(struct timer_list *timer, unsigned long expires)
 	if (timer->expires == expires && timer_pending(timer))
 		return 1;
 
-	return __mod_timer(timer, expires);
+	return __mod_timer(timer, expires, false);
 }
-
 EXPORT_SYMBOL(mod_timer);
 
+/**
+ * add_timer - start a timer
+ * @timer: the timer to be added
+ *
+ * The kernel will do a ->function(->data) callback from the
+ * timer interrupt at the ->expires point in the future. The
+ * current time is 'jiffies'.
+ *
+ * The timer's ->expires, ->function (and if the handler uses it, ->data)
+ * fields must be set prior calling this function.
+ *
+ * Timers with an ->expires field in the past will be executed in the next
+ * timer tick.
+ */
+void add_timer(struct timer_list *timer)
+{
+	BUG_ON(timer_pending(timer));
+	mod_timer(timer, timer->expires);
+}
+EXPORT_SYMBOL(add_timer);
+
+/**
+ * add_timer_on - start a timer on a particular CPU
+ * @timer: the timer to be added
+ * @cpu: the CPU to start it on
+ *
+ * This is not very scalable on SMP. Double adds are not possible.
+ */
+void add_timer_on(struct timer_list *timer, int cpu)
+{
+	struct tvec_base *base = per_cpu(tvec_bases, cpu);
+	unsigned long flags;
+
+	timer_stats_timer_set_start_info(timer);
+	BUG_ON(timer_pending(timer) || !timer->function);
+	spin_lock_irqsave(&base->lock, flags);
+	timer_set_base(timer, base);
+	debug_timer_activate(timer);
+	internal_add_timer(base, timer);
+	/*
+	 * Check whether the other CPU is idle and needs to be
+	 * triggered to reevaluate the timer wheel when nohz is
+	 * active. We are protected against the other CPU fiddling
+	 * with the timer by holding the timer base lock. This also
+	 * makes sure that a CPU on the way to idle can not evaluate
+	 * the timer wheel.
+	 */
+	wake_up_idle_cpu(cpu);
+	spin_unlock_irqrestore(&base->lock, flags);
+}
+
 /**
  * del_timer - deactive a timer.
  * @timer: the timer to be deactivated
@@ -733,7 +772,6 @@ int del_timer(struct timer_list *timer)
 
 	return ret;
 }
-
 EXPORT_SYMBOL(del_timer);
 
 #ifdef CONFIG_SMP
@@ -767,7 +805,6 @@ out:
 
 	return ret;
 }
-
 EXPORT_SYMBOL(try_to_del_timer_sync);
 
 /**
@@ -796,7 +833,6 @@ int del_timer_sync(struct timer_list *timer)
 		cpu_relax();
 	}
 }
-
 EXPORT_SYMBOL(del_timer_sync);
 #endif
 
@@ -1268,7 +1304,7 @@ signed long __sched schedule_timeout(signed long timeout)
 	expire = timeout + jiffies;
 
 	setup_timer_on_stack(&timer, process_timeout, (unsigned long)current);
-	__mod_timer(&timer, expire);
+	__mod_timer(&timer, expire, false);
 	schedule();
 	del_singleshot_timer_sync(&timer);
 
-- 
cgit v1.2.3


From 59089d8d162ddcb5c434672e915331964d38a754 Mon Sep 17 00:00:00 2001
From: Santwona Behera <santwona.behera@sun.com>
Date: Fri, 20 Feb 2009 00:58:13 -0800
Subject: ethtool: Add RX pkt classification interface

Signed-off-by: Santwona Behera <santwona.behera@sun.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h | 89 +++++++++++++++++++++++++++++++++++++++++++++----
 net/core/ethtool.c      | 58 ++++++++++++++++++++++++++------
 2 files changed, 131 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 27c67a542235..131b127b70f8 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -7,6 +7,7 @@
  * Portions Copyright 2002 Intel (eli.kupermann@intel.com,
  *                                christopher.leech@intel.com,
  *                                scott.feldman@intel.com)
+ * Portions Copyright (C) Sun Microsystems 2008
  */
 
 #ifndef _LINUX_ETHTOOL_H
@@ -287,10 +288,75 @@ enum ethtool_flags {
 	ETH_FLAG_LRO		= (1 << 15),	/* LRO is enabled */
 };
 
-struct ethtool_rxnfc {
-	__u32		cmd;
+/* The following structures are for supporting RX network flow
+ * classification configuration. Note, all multibyte fields, e.g.,
+ * ip4src, ip4dst, psrc, pdst, spi, etc. are expected to be in network
+ * byte order.
+ */
+struct ethtool_tcpip4_spec {
+	__be32	ip4src;
+	__be32	ip4dst;
+	__be16	psrc;
+	__be16	pdst;
+	__u8    tos;
+};
+
+struct ethtool_ah_espip4_spec {
+	__be32	ip4src;
+	__be32	ip4dst;
+	__be32	spi;
+	__u8    tos;
+};
+
+struct ethtool_rawip4_spec {
+	__be32	ip4src;
+	__be32	ip4dst;
+	__u8	hdata[64];
+};
+
+struct ethtool_ether_spec {
+	__be16	ether_type;
+	__u8	frame_size;
+	__u8	eframe[16];
+};
+
+#define	ETH_RX_NFC_IP4	1
+#define	ETH_RX_NFC_IP6	2
+
+struct ethtool_usrip4_spec {
+	__be32	ip4src;
+	__be32	ip4dst;
+	__be32	l4_4_bytes;
+	__u8    tos;
+	__u8    ip_ver;
+	__u8    proto;
+};
+
+struct ethtool_rx_flow_spec {
 	__u32		flow_type;
-	__u64		data;
+	union {
+		struct ethtool_tcpip4_spec		tcp_ip4_spec;
+		struct ethtool_tcpip4_spec		udp_ip4_spec;
+		struct ethtool_tcpip4_spec		sctp_ip4_spec;
+		struct ethtool_ah_espip4_spec		ah_ip4_spec;
+		struct ethtool_ah_espip4_spec		esp_ip4_spec;
+		struct ethtool_rawip4_spec		raw_ip4_spec;
+		struct ethtool_ether_spec		ether_spec;
+		struct ethtool_usrip4_spec		usr_ip4_spec;
+		__u8					hdata[64];
+	} h_u, m_u; /* entry, mask */
+	__u64		ring_cookie;
+	__u32		location;
+};
+
+struct ethtool_rxnfc {
+	__u32				cmd;
+	__u32				flow_type;
+	/* The rx flow hash value or the rule DB size */
+	__u64				data;
+	struct ethtool_rx_flow_spec	fs;
+	__u32				rule_cnt;
+	__u32				rule_locs[0];
 };
 
 #ifdef __KERNEL__
@@ -417,8 +483,8 @@ struct ethtool_ops {
 	/* the following hooks are obsolete */
 	int	(*self_test_count)(struct net_device *);/* use get_sset_count */
 	int	(*get_stats_count)(struct net_device *);/* use get_sset_count */
-	int	(*get_rxhash)(struct net_device *, struct ethtool_rxnfc *);
-	int	(*set_rxhash)(struct net_device *, struct ethtool_rxnfc *);
+	int	(*get_rxnfc)(struct net_device *, struct ethtool_rxnfc *, void *);
+	int	(*set_rxnfc)(struct net_device *, struct ethtool_rxnfc *);
 };
 #endif /* __KERNEL__ */
 
@@ -469,6 +535,12 @@ struct ethtool_ops {
 #define	ETHTOOL_SRXFH		0x0000002a /* Set RX flow hash configuration */
 #define ETHTOOL_GGRO		0x0000002b /* Get GRO enable (ethtool_value) */
 #define ETHTOOL_SGRO		0x0000002c /* Set GRO enable (ethtool_value) */
+#define	ETHTOOL_GRXRINGS	0x0000002d /* Get RX rings available for LB */
+#define	ETHTOOL_GRXCLSRLCNT	0x0000002e /* Get RX class rule count */
+#define	ETHTOOL_GRXCLSRULE	0x0000002f /* Get RX classification rule */
+#define	ETHTOOL_GRXCLSRLALL	0x00000030 /* Get all RX classification rule */
+#define	ETHTOOL_SRXCLSRLDEL	0x00000031 /* Delete RX classification rule */
+#define	ETHTOOL_SRXCLSRLINS	0x00000032 /* Insert RX classification rule */
 
 /* compatibility with older code */
 #define SPARC_ETH_GSET		ETHTOOL_GSET
@@ -565,9 +637,13 @@ struct ethtool_ops {
 #define	UDP_V6_FLOW	0x06
 #define	SCTP_V6_FLOW	0x07
 #define	AH_ESP_V6_FLOW	0x08
+#define	AH_V4_FLOW	0x09
+#define	ESP_V4_FLOW	0x0a
+#define	AH_V6_FLOW	0x0b
+#define	ESP_V6_FLOW	0x0c
+#define	IP_USER_FLOW	0x0d
 
 /* L3-L4 network traffic flow hash options */
-#define	RXH_DEV_PORT	(1 << 0)
 #define	RXH_L2DA	(1 << 1)
 #define	RXH_VLAN	(1 << 2)
 #define	RXH_L3_PROTO	(1 << 3)
@@ -577,5 +653,6 @@ struct ethtool_ops {
 #define	RXH_L4_B_2_3	(1 << 7) /* dst port in case of TCP/UDP/SCTP */
 #define	RXH_DISCARD	(1 << 31)
 
+#define	RX_CLS_FLOW_DISC	0xffffffffffffffffULL
 
 #endif /* _LINUX_ETHTOOL_H */
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 947710a36ced..244ca56dffac 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -209,34 +209,62 @@ static int ethtool_get_drvinfo(struct net_device *dev, void __user *useraddr)
 	return 0;
 }
 
-static int ethtool_set_rxhash(struct net_device *dev, void __user *useraddr)
+static int ethtool_set_rxnfc(struct net_device *dev, void __user *useraddr)
 {
 	struct ethtool_rxnfc cmd;
 
-	if (!dev->ethtool_ops->set_rxhash)
+	if (!dev->ethtool_ops->set_rxnfc)
 		return -EOPNOTSUPP;
 
 	if (copy_from_user(&cmd, useraddr, sizeof(cmd)))
 		return -EFAULT;
 
-	return dev->ethtool_ops->set_rxhash(dev, &cmd);
+	return dev->ethtool_ops->set_rxnfc(dev, &cmd);
 }
 
-static int ethtool_get_rxhash(struct net_device *dev, void __user *useraddr)
+static int ethtool_get_rxnfc(struct net_device *dev, void __user *useraddr)
 {
 	struct ethtool_rxnfc info;
+	const struct ethtool_ops *ops = dev->ethtool_ops;
+	int ret;
+	void *rule_buf = NULL;
 
-	if (!dev->ethtool_ops->get_rxhash)
+	if (!ops->get_rxnfc)
 		return -EOPNOTSUPP;
 
 	if (copy_from_user(&info, useraddr, sizeof(info)))
 		return -EFAULT;
 
-	dev->ethtool_ops->get_rxhash(dev, &info);
+	if (info.cmd == ETHTOOL_GRXCLSRLALL) {
+		if (info.rule_cnt > 0) {
+			rule_buf = kmalloc(info.rule_cnt * sizeof(u32),
+					   GFP_USER);
+			if (!rule_buf)
+				return -ENOMEM;
+		}
+	}
 
+	ret = ops->get_rxnfc(dev, &info, rule_buf);
+	if (ret < 0)
+		goto err_out;
+
+	ret = -EFAULT;
 	if (copy_to_user(useraddr, &info, sizeof(info)))
-		return -EFAULT;
-	return 0;
+		goto err_out;
+
+	if (rule_buf) {
+		useraddr += offsetof(struct ethtool_rxnfc, rule_locs);
+		if (copy_to_user(useraddr, rule_buf,
+				 info.rule_cnt * sizeof(u32)))
+			goto err_out;
+	}
+	ret = 0;
+
+err_out:
+	if (rule_buf)
+		kfree(rule_buf);
+
+	return ret;
 }
 
 static int ethtool_get_regs(struct net_device *dev, char __user *useraddr)
@@ -901,6 +929,10 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 	case ETHTOOL_GFLAGS:
 	case ETHTOOL_GPFLAGS:
 	case ETHTOOL_GRXFH:
+	case ETHTOOL_GRXRINGS:
+	case ETHTOOL_GRXCLSRLCNT:
+	case ETHTOOL_GRXCLSRULE:
+	case ETHTOOL_GRXCLSRLALL:
 		break;
 	default:
 		if (!capable(CAP_NET_ADMIN))
@@ -1052,10 +1084,16 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 				       dev->ethtool_ops->set_priv_flags);
 		break;
 	case ETHTOOL_GRXFH:
-		rc = ethtool_get_rxhash(dev, useraddr);
+	case ETHTOOL_GRXRINGS:
+	case ETHTOOL_GRXCLSRLCNT:
+	case ETHTOOL_GRXCLSRULE:
+	case ETHTOOL_GRXCLSRLALL:
+		rc = ethtool_get_rxnfc(dev, useraddr);
 		break;
 	case ETHTOOL_SRXFH:
-		rc = ethtool_set_rxhash(dev, useraddr);
+	case ETHTOOL_SRXCLSRLDEL:
+	case ETHTOOL_SRXCLSRLINS:
+		rc = ethtool_set_rxnfc(dev, useraddr);
 		break;
 	case ETHTOOL_GGRO:
 		rc = ethtool_get_gro(dev, useraddr);
-- 
cgit v1.2.3


From be0c22a46cfb79ab2342bb28fde99afa94ef868e Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 18 Feb 2009 01:40:43 +0000
Subject: netlink: add NETLINK_BROADCAST_ERROR socket option

This patch adds NETLINK_BROADCAST_ERROR which is a netlink
socket option that the listener can set to make netlink_broadcast()
return errors in the delivery to the caller. This option is useful
if the caller of netlink_broadcast() do something with the result
of the message delivery, like in ctnetlink where it drops a network
packet if the event delivery failed, this is used to enable reliable
logging and state-synchronization. If this socket option is not set,
netlink_broadcast() only reports ESRCH errors and silently ignore
ENOBUFS errors, which is what most netlink_broadcast() callers
should do.

This socket option is based on a suggestion from Patrick McHardy.
Patrick McHardy can exchange this patch for a beer from me ;).

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Acked-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netlink.h  |  1 +
 net/netlink/af_netlink.c | 25 +++++++++++++++++++++++--
 2 files changed, 24 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index 51b09a1f46c3..1e6bf995435c 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -103,6 +103,7 @@ struct nlmsgerr
 #define NETLINK_ADD_MEMBERSHIP	1
 #define NETLINK_DROP_MEMBERSHIP	2
 #define NETLINK_PKTINFO		3
+#define NETLINK_BROADCAST_ERROR	4
 
 struct nl_pktinfo
 {
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 6ee69c27f806..ed587be1e1c2 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -85,6 +85,7 @@ struct netlink_sock {
 
 #define NETLINK_KERNEL_SOCKET	0x1
 #define NETLINK_RECV_PKTINFO	0x2
+#define NETLINK_BROADCAST_SEND_ERROR	0x4
 
 static inline struct netlink_sock *nlk_sk(struct sock *sk)
 {
@@ -995,12 +996,15 @@ static inline int do_one_broadcast(struct sock *sk,
 		netlink_overrun(sk);
 		/* Clone failed. Notify ALL listeners. */
 		p->failure = 1;
+		if (nlk->flags & NETLINK_BROADCAST_SEND_ERROR)
+			p->delivery_failure = 1;
 	} else if (sk_filter(sk, p->skb2)) {
 		kfree_skb(p->skb2);
 		p->skb2 = NULL;
 	} else if ((val = netlink_broadcast_deliver(sk, p->skb2)) < 0) {
 		netlink_overrun(sk);
-		p->delivery_failure = 1;
+		if (nlk->flags & NETLINK_BROADCAST_SEND_ERROR)
+			p->delivery_failure = 1;
 	} else {
 		p->congested |= val;
 		p->delivered = 1;
@@ -1048,7 +1052,7 @@ int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid,
 	if (info.skb2)
 		kfree_skb(info.skb2);
 
-	if (info.delivery_failure || info.failure)
+	if (info.delivery_failure)
 		return -ENOBUFS;
 
 	if (info.delivered) {
@@ -1163,6 +1167,13 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname,
 		err = 0;
 		break;
 	}
+	case NETLINK_BROADCAST_ERROR:
+		if (val)
+			nlk->flags |= NETLINK_BROADCAST_SEND_ERROR;
+		else
+			nlk->flags &= ~NETLINK_BROADCAST_SEND_ERROR;
+		err = 0;
+		break;
 	default:
 		err = -ENOPROTOOPT;
 	}
@@ -1195,6 +1206,16 @@ static int netlink_getsockopt(struct socket *sock, int level, int optname,
 			return -EFAULT;
 		err = 0;
 		break;
+	case NETLINK_BROADCAST_ERROR:
+		if (len < sizeof(int))
+			return -EINVAL;
+		len = sizeof(int);
+		val = nlk->flags & NETLINK_BROADCAST_SEND_ERROR ? 1 : 0;
+		if (put_user(len, optlen) ||
+		    put_user(val, optval))
+			return -EFAULT;
+		err = 0;
+		break;
 	default:
 		err = -ENOPROTOOPT;
 	}
-- 
cgit v1.2.3


From 784544739a25c30637397ace5489eeb6e15d7d49 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Fri, 20 Feb 2009 10:35:32 +0100
Subject: netfilter: iptables: lock free counters

The reader/writer lock in ip_tables is acquired in the critical path of
processing packets and is one of the reasons just loading iptables can cause
a 20% performance loss. The rwlock serves two functions:

1) it prevents changes to table state (xt_replace) while table is in use.
   This is now handled by doing rcu on the xt_table. When table is
   replaced, the new table(s) are put in and the old one table(s) are freed
   after RCU period.

2) it provides synchronization when accesing the counter values.
   This is now handled by swapping in new table_info entries for each cpu
   then summing the old values, and putting the result back onto one
   cpu.  On a busy system it may cause sampling to occur at different
   times on each cpu, but no packet/byte counts are lost in the process.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>

Sucessfully tested on my dual quad core machine too, but iptables only (no ipv6 here)
BTW, my new "tbench 8" result is 2450 MB/s, (it was 2150 MB/s not so long ago)

Acked-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/x_tables.h |   6 +-
 net/ipv4/netfilter/arp_tables.c    | 115 ++++++++++++++++++++++++++---------
 net/ipv4/netfilter/ip_tables.c     | 120 +++++++++++++++++++++++++++----------
 net/ipv6/netfilter/ip6_tables.c    | 119 +++++++++++++++++++++++++-----------
 net/netfilter/x_tables.c           |  26 ++++++--
 5 files changed, 284 insertions(+), 102 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 9fac88fc0e72..e8e08d036752 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -353,7 +353,7 @@ struct xt_table
 	unsigned int valid_hooks;
 
 	/* Lock for the curtain */
-	rwlock_t lock;
+	struct mutex lock;
 
 	/* Man behind the curtain... */
 	struct xt_table_info *private;
@@ -385,7 +385,7 @@ struct xt_table_info
 
 	/* ipt_entry tables: one per CPU */
 	/* Note : this field MUST be the last one, see XT_TABLE_INFO_SZ */
-	char *entries[1];
+	void *entries[1];
 };
 
 #define XT_TABLE_INFO_SZ (offsetof(struct xt_table_info, entries) \
@@ -432,6 +432,8 @@ extern void xt_proto_fini(struct net *net, u_int8_t af);
 
 extern struct xt_table_info *xt_alloc_table_info(unsigned int size);
 extern void xt_free_table_info(struct xt_table_info *info);
+extern void xt_table_entry_swap_rcu(struct xt_table_info *old,
+				    struct xt_table_info *new);
 
 #ifdef CONFIG_COMPAT
 #include <net/compat.h>
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index b5db46342614..64a7c6ce0b98 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -261,9 +261,10 @@ unsigned int arpt_do_table(struct sk_buff *skb,
 	indev = in ? in->name : nulldevname;
 	outdev = out ? out->name : nulldevname;
 
-	read_lock_bh(&table->lock);
-	private = table->private;
-	table_base = (void *)private->entries[smp_processor_id()];
+	rcu_read_lock();
+	private = rcu_dereference(table->private);
+	table_base = rcu_dereference(private->entries[smp_processor_id()]);
+
 	e = get_entry(table_base, private->hook_entry[hook]);
 	back = get_entry(table_base, private->underflow[hook]);
 
@@ -335,7 +336,8 @@ unsigned int arpt_do_table(struct sk_buff *skb,
 			e = (void *)e + e->next_offset;
 		}
 	} while (!hotdrop);
-	read_unlock_bh(&table->lock);
+
+	rcu_read_unlock();
 
 	if (hotdrop)
 		return NF_DROP;
@@ -738,11 +740,65 @@ static void get_counters(const struct xt_table_info *t,
 	}
 }
 
-static inline struct xt_counters *alloc_counters(struct xt_table *table)
+
+/* We're lazy, and add to the first CPU; overflow works its fey magic
+ * and everything is OK. */
+static int
+add_counter_to_entry(struct arpt_entry *e,
+		     const struct xt_counters addme[],
+		     unsigned int *i)
+{
+	ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
+
+	(*i)++;
+	return 0;
+}
+
+/* Take values from counters and add them back onto the current cpu */
+static void put_counters(struct xt_table_info *t,
+			 const struct xt_counters counters[])
+{
+	unsigned int i, cpu;
+
+	local_bh_disable();
+	cpu = smp_processor_id();
+	i = 0;
+	ARPT_ENTRY_ITERATE(t->entries[cpu],
+			  t->size,
+			  add_counter_to_entry,
+			  counters,
+			  &i);
+	local_bh_enable();
+}
+
+static inline int
+zero_entry_counter(struct arpt_entry *e, void *arg)
+{
+	e->counters.bcnt = 0;
+	e->counters.pcnt = 0;
+	return 0;
+}
+
+static void
+clone_counters(struct xt_table_info *newinfo, const struct xt_table_info *info)
+{
+	unsigned int cpu;
+	const void *loc_cpu_entry = info->entries[raw_smp_processor_id()];
+
+	memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
+	for_each_possible_cpu(cpu) {
+		memcpy(newinfo->entries[cpu], loc_cpu_entry, info->size);
+		ARPT_ENTRY_ITERATE(newinfo->entries[cpu], newinfo->size,
+				  zero_entry_counter, NULL);
+	}
+}
+
+static struct xt_counters *alloc_counters(struct xt_table *table)
 {
 	unsigned int countersize;
 	struct xt_counters *counters;
-	const struct xt_table_info *private = table->private;
+	struct xt_table_info *private = table->private;
+	struct xt_table_info *info;
 
 	/* We need atomic snapshot of counters: rest doesn't change
 	 * (other than comefrom, which userspace doesn't care
@@ -752,14 +808,30 @@ static inline struct xt_counters *alloc_counters(struct xt_table *table)
 	counters = vmalloc_node(countersize, numa_node_id());
 
 	if (counters == NULL)
-		return ERR_PTR(-ENOMEM);
+		goto nomem;
+
+	info = xt_alloc_table_info(private->size);
+	if (!info)
+		goto free_counters;
 
-	/* First, sum counters... */
-	write_lock_bh(&table->lock);
-	get_counters(private, counters);
-	write_unlock_bh(&table->lock);
+	clone_counters(info, private);
+
+	mutex_lock(&table->lock);
+	xt_table_entry_swap_rcu(private, info);
+	synchronize_net();	/* Wait until smoke has cleared */
+
+	get_counters(info, counters);
+	put_counters(private, counters);
+	mutex_unlock(&table->lock);
+
+	xt_free_table_info(info);
 
 	return counters;
+
+ free_counters:
+	vfree(counters);
+ nomem:
+	return ERR_PTR(-ENOMEM);
 }
 
 static int copy_entries_to_user(unsigned int total_size,
@@ -1099,20 +1171,6 @@ static int do_replace(struct net *net, void __user *user, unsigned int len)
 	return ret;
 }
 
-/* We're lazy, and add to the first CPU; overflow works its fey magic
- * and everything is OK.
- */
-static inline int add_counter_to_entry(struct arpt_entry *e,
-				       const struct xt_counters addme[],
-				       unsigned int *i)
-{
-
-	ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
-
-	(*i)++;
-	return 0;
-}
-
 static int do_add_counters(struct net *net, void __user *user, unsigned int len,
 			   int compat)
 {
@@ -1172,13 +1230,14 @@ static int do_add_counters(struct net *net, void __user *user, unsigned int len,
 		goto free;
 	}
 
-	write_lock_bh(&t->lock);
+	mutex_lock(&t->lock);
 	private = t->private;
 	if (private->number != num_counters) {
 		ret = -EINVAL;
 		goto unlock_up_free;
 	}
 
+	preempt_disable();
 	i = 0;
 	/* Choose the copy that is on our node */
 	loc_cpu_entry = private->entries[smp_processor_id()];
@@ -1187,8 +1246,10 @@ static int do_add_counters(struct net *net, void __user *user, unsigned int len,
 			   add_counter_to_entry,
 			   paddc,
 			   &i);
+	preempt_enable();
  unlock_up_free:
-	write_unlock_bh(&t->lock);
+	mutex_unlock(&t->lock);
+
 	xt_table_unlock(t);
 	module_put(t->me);
  free:
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index ef8b6ca068b2..08cde5bd70a5 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -347,10 +347,12 @@ ipt_do_table(struct sk_buff *skb,
 	mtpar.family  = tgpar.family = NFPROTO_IPV4;
 	tgpar.hooknum = hook;
 
-	read_lock_bh(&table->lock);
 	IP_NF_ASSERT(table->valid_hooks & (1 << hook));
-	private = table->private;
-	table_base = (void *)private->entries[smp_processor_id()];
+
+	rcu_read_lock();
+	private = rcu_dereference(table->private);
+	table_base = rcu_dereference(private->entries[smp_processor_id()]);
+
 	e = get_entry(table_base, private->hook_entry[hook]);
 
 	/* For return from builtin chain */
@@ -445,7 +447,7 @@ ipt_do_table(struct sk_buff *skb,
 		}
 	} while (!hotdrop);
 
-	read_unlock_bh(&table->lock);
+	rcu_read_unlock();
 
 #ifdef DEBUG_ALLOW_ALL
 	return NF_ACCEPT;
@@ -924,13 +926,68 @@ get_counters(const struct xt_table_info *t,
 				  counters,
 				  &i);
 	}
+
+}
+
+/* We're lazy, and add to the first CPU; overflow works its fey magic
+ * and everything is OK. */
+static int
+add_counter_to_entry(struct ipt_entry *e,
+		     const struct xt_counters addme[],
+		     unsigned int *i)
+{
+	ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
+
+	(*i)++;
+	return 0;
+}
+
+/* Take values from counters and add them back onto the current cpu */
+static void put_counters(struct xt_table_info *t,
+			 const struct xt_counters counters[])
+{
+	unsigned int i, cpu;
+
+	local_bh_disable();
+	cpu = smp_processor_id();
+	i = 0;
+	IPT_ENTRY_ITERATE(t->entries[cpu],
+			  t->size,
+			  add_counter_to_entry,
+			  counters,
+			  &i);
+	local_bh_enable();
+}
+
+
+static inline int
+zero_entry_counter(struct ipt_entry *e, void *arg)
+{
+	e->counters.bcnt = 0;
+	e->counters.pcnt = 0;
+	return 0;
+}
+
+static void
+clone_counters(struct xt_table_info *newinfo, const struct xt_table_info *info)
+{
+	unsigned int cpu;
+	const void *loc_cpu_entry = info->entries[raw_smp_processor_id()];
+
+	memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
+	for_each_possible_cpu(cpu) {
+		memcpy(newinfo->entries[cpu], loc_cpu_entry, info->size);
+		IPT_ENTRY_ITERATE(newinfo->entries[cpu], newinfo->size,
+				  zero_entry_counter, NULL);
+	}
 }
 
 static struct xt_counters * alloc_counters(struct xt_table *table)
 {
 	unsigned int countersize;
 	struct xt_counters *counters;
-	const struct xt_table_info *private = table->private;
+	struct xt_table_info *private = table->private;
+	struct xt_table_info *info;
 
 	/* We need atomic snapshot of counters: rest doesn't change
 	   (other than comefrom, which userspace doesn't care
@@ -939,14 +996,30 @@ static struct xt_counters * alloc_counters(struct xt_table *table)
 	counters = vmalloc_node(countersize, numa_node_id());
 
 	if (counters == NULL)
-		return ERR_PTR(-ENOMEM);
+		goto nomem;
 
-	/* First, sum counters... */
-	write_lock_bh(&table->lock);
-	get_counters(private, counters);
-	write_unlock_bh(&table->lock);
+	info = xt_alloc_table_info(private->size);
+	if (!info)
+		goto free_counters;
+
+	clone_counters(info, private);
+
+	mutex_lock(&table->lock);
+	xt_table_entry_swap_rcu(private, info);
+	synchronize_net();	/* Wait until smoke has cleared */
+
+	get_counters(info, counters);
+	put_counters(private, counters);
+	mutex_unlock(&table->lock);
+
+	xt_free_table_info(info);
 
 	return counters;
+
+ free_counters:
+	vfree(counters);
+ nomem:
+	return ERR_PTR(-ENOMEM);
 }
 
 static int
@@ -1312,27 +1385,6 @@ do_replace(struct net *net, void __user *user, unsigned int len)
 	return ret;
 }
 
-/* We're lazy, and add to the first CPU; overflow works its fey magic
- * and everything is OK. */
-static int
-add_counter_to_entry(struct ipt_entry *e,
-		     const struct xt_counters addme[],
-		     unsigned int *i)
-{
-#if 0
-	duprintf("add_counter: Entry %u %lu/%lu + %lu/%lu\n",
-		 *i,
-		 (long unsigned int)e->counters.pcnt,
-		 (long unsigned int)e->counters.bcnt,
-		 (long unsigned int)addme[*i].pcnt,
-		 (long unsigned int)addme[*i].bcnt);
-#endif
-
-	ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
-
-	(*i)++;
-	return 0;
-}
 
 static int
 do_add_counters(struct net *net, void __user *user, unsigned int len, int compat)
@@ -1393,13 +1445,14 @@ do_add_counters(struct net *net, void __user *user, unsigned int len, int compat
 		goto free;
 	}
 
-	write_lock_bh(&t->lock);
+	mutex_lock(&t->lock);
 	private = t->private;
 	if (private->number != num_counters) {
 		ret = -EINVAL;
 		goto unlock_up_free;
 	}
 
+	preempt_disable();
 	i = 0;
 	/* Choose the copy that is on our node */
 	loc_cpu_entry = private->entries[raw_smp_processor_id()];
@@ -1408,8 +1461,9 @@ do_add_counters(struct net *net, void __user *user, unsigned int len, int compat
 			  add_counter_to_entry,
 			  paddc,
 			  &i);
+	preempt_enable();
  unlock_up_free:
-	write_unlock_bh(&t->lock);
+	mutex_unlock(&t->lock);
 	xt_table_unlock(t);
 	module_put(t->me);
  free:
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index d64594b6c061..34af7bb8df5f 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -382,10 +382,12 @@ ip6t_do_table(struct sk_buff *skb,
 	mtpar.family  = tgpar.family = NFPROTO_IPV6;
 	tgpar.hooknum = hook;
 
-	read_lock_bh(&table->lock);
 	IP_NF_ASSERT(table->valid_hooks & (1 << hook));
-	private = table->private;
-	table_base = (void *)private->entries[smp_processor_id()];
+
+	rcu_read_lock();
+	private = rcu_dereference(table->private);
+	table_base = rcu_dereference(private->entries[smp_processor_id()]);
+
 	e = get_entry(table_base, private->hook_entry[hook]);
 
 	/* For return from builtin chain */
@@ -483,7 +485,7 @@ ip6t_do_table(struct sk_buff *skb,
 #ifdef CONFIG_NETFILTER_DEBUG
 	((struct ip6t_entry *)table_base)->comefrom = NETFILTER_LINK_POISON;
 #endif
-	read_unlock_bh(&table->lock);
+	rcu_read_unlock();
 
 #ifdef DEBUG_ALLOW_ALL
 	return NF_ACCEPT;
@@ -964,11 +966,64 @@ get_counters(const struct xt_table_info *t,
 	}
 }
 
+/* We're lazy, and add to the first CPU; overflow works its fey magic
+ * and everything is OK. */
+static int
+add_counter_to_entry(struct ip6t_entry *e,
+		     const struct xt_counters addme[],
+		     unsigned int *i)
+{
+	ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
+
+	(*i)++;
+	return 0;
+}
+
+/* Take values from counters and add them back onto the current cpu */
+static void put_counters(struct xt_table_info *t,
+			 const struct xt_counters counters[])
+{
+	unsigned int i, cpu;
+
+	local_bh_disable();
+	cpu = smp_processor_id();
+	i = 0;
+	IP6T_ENTRY_ITERATE(t->entries[cpu],
+			   t->size,
+			   add_counter_to_entry,
+			   counters,
+			   &i);
+	local_bh_enable();
+}
+
+static inline int
+zero_entry_counter(struct ip6t_entry *e, void *arg)
+{
+	e->counters.bcnt = 0;
+	e->counters.pcnt = 0;
+	return 0;
+}
+
+static void
+clone_counters(struct xt_table_info *newinfo, const struct xt_table_info *info)
+{
+	unsigned int cpu;
+	const void *loc_cpu_entry = info->entries[raw_smp_processor_id()];
+
+	memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
+	for_each_possible_cpu(cpu) {
+		memcpy(newinfo->entries[cpu], loc_cpu_entry, info->size);
+		IP6T_ENTRY_ITERATE(newinfo->entries[cpu], newinfo->size,
+				   zero_entry_counter, NULL);
+	}
+}
+
 static struct xt_counters *alloc_counters(struct xt_table *table)
 {
 	unsigned int countersize;
 	struct xt_counters *counters;
-	const struct xt_table_info *private = table->private;
+	struct xt_table_info *private = table->private;
+	struct xt_table_info *info;
 
 	/* We need atomic snapshot of counters: rest doesn't change
 	   (other than comefrom, which userspace doesn't care
@@ -977,14 +1032,28 @@ static struct xt_counters *alloc_counters(struct xt_table *table)
 	counters = vmalloc_node(countersize, numa_node_id());
 
 	if (counters == NULL)
-		return ERR_PTR(-ENOMEM);
+		goto nomem;
+
+	info = xt_alloc_table_info(private->size);
+	if (!info)
+		goto free_counters;
+
+	clone_counters(info, private);
+
+	mutex_lock(&table->lock);
+	xt_table_entry_swap_rcu(private, info);
+	synchronize_net();	/* Wait until smoke has cleared */
+
+	get_counters(info, counters);
+	put_counters(private, counters);
+	mutex_unlock(&table->lock);
 
-	/* First, sum counters... */
-	write_lock_bh(&table->lock);
-	get_counters(private, counters);
-	write_unlock_bh(&table->lock);
+	xt_free_table_info(info);
 
-	return counters;
+ free_counters:
+	vfree(counters);
+ nomem:
+	return ERR_PTR(-ENOMEM);
 }
 
 static int
@@ -1351,28 +1420,6 @@ do_replace(struct net *net, void __user *user, unsigned int len)
 	return ret;
 }
 
-/* We're lazy, and add to the first CPU; overflow works its fey magic
- * and everything is OK. */
-static inline int
-add_counter_to_entry(struct ip6t_entry *e,
-		     const struct xt_counters addme[],
-		     unsigned int *i)
-{
-#if 0
-	duprintf("add_counter: Entry %u %lu/%lu + %lu/%lu\n",
-		 *i,
-		 (long unsigned int)e->counters.pcnt,
-		 (long unsigned int)e->counters.bcnt,
-		 (long unsigned int)addme[*i].pcnt,
-		 (long unsigned int)addme[*i].bcnt);
-#endif
-
-	ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
-
-	(*i)++;
-	return 0;
-}
-
 static int
 do_add_counters(struct net *net, void __user *user, unsigned int len,
 		int compat)
@@ -1433,13 +1480,14 @@ do_add_counters(struct net *net, void __user *user, unsigned int len,
 		goto free;
 	}
 
-	write_lock_bh(&t->lock);
+	mutex_lock(&t->lock);
 	private = t->private;
 	if (private->number != num_counters) {
 		ret = -EINVAL;
 		goto unlock_up_free;
 	}
 
+	preempt_disable();
 	i = 0;
 	/* Choose the copy that is on our node */
 	loc_cpu_entry = private->entries[raw_smp_processor_id()];
@@ -1448,8 +1496,9 @@ do_add_counters(struct net *net, void __user *user, unsigned int len,
 			  add_counter_to_entry,
 			  paddc,
 			  &i);
+	preempt_enable();
  unlock_up_free:
-	write_unlock_bh(&t->lock);
+	mutex_unlock(&t->lock);
 	xt_table_unlock(t);
 	module_put(t->me);
  free:
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index bfbf521f6ea5..bfcac92d5563 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -625,6 +625,20 @@ void xt_free_table_info(struct xt_table_info *info)
 }
 EXPORT_SYMBOL(xt_free_table_info);
 
+void xt_table_entry_swap_rcu(struct xt_table_info *oldinfo,
+			     struct xt_table_info *newinfo)
+{
+	unsigned int cpu;
+
+	for_each_possible_cpu(cpu) {
+		void *p = oldinfo->entries[cpu];
+		rcu_assign_pointer(oldinfo->entries[cpu], newinfo->entries[cpu]);
+		newinfo->entries[cpu] = p;
+	}
+
+}
+EXPORT_SYMBOL_GPL(xt_table_entry_swap_rcu);
+
 /* Find table by name, grabs mutex & ref.  Returns ERR_PTR() on error. */
 struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af,
 				    const char *name)
@@ -671,21 +685,22 @@ xt_replace_table(struct xt_table *table,
 	struct xt_table_info *oldinfo, *private;
 
 	/* Do the substitution. */
-	write_lock_bh(&table->lock);
+	mutex_lock(&table->lock);
 	private = table->private;
 	/* Check inside lock: is the old number correct? */
 	if (num_counters != private->number) {
 		duprintf("num_counters != table->private->number (%u/%u)\n",
 			 num_counters, private->number);
-		write_unlock_bh(&table->lock);
+		mutex_unlock(&table->lock);
 		*error = -EAGAIN;
 		return NULL;
 	}
 	oldinfo = private;
-	table->private = newinfo;
+	rcu_assign_pointer(table->private, newinfo);
 	newinfo->initial_entries = oldinfo->initial_entries;
-	write_unlock_bh(&table->lock);
+	mutex_unlock(&table->lock);
 
+	synchronize_net();
 	return oldinfo;
 }
 EXPORT_SYMBOL_GPL(xt_replace_table);
@@ -719,7 +734,8 @@ struct xt_table *xt_register_table(struct net *net, struct xt_table *table,
 
 	/* Simplifies replace_table code. */
 	table->private = bootstrap;
-	rwlock_init(&table->lock);
+	mutex_init(&table->lock);
+
 	if (!xt_replace_table(table, 0, newinfo, &ret))
 		goto unlock;
 
-- 
cgit v1.2.3


From 268cb38e1802db560c73167e643f14a3dcb4b07c Mon Sep 17 00:00:00 2001
From: Adam Nielsen <a.nielsen@shikadi.net>
Date: Fri, 20 Feb 2009 10:55:14 +0100
Subject: netfilter: x_tables: add LED trigger target

Kernel module providing implementation of LED netfilter target.  Each
instance of the target appears as a led-trigger device, which can be
associated with one or more LEDs in /sys/class/leds/

Signed-off-by: Adam Nielsen <a.nielsen@shikadi.net>
Acked-by: Richard Purdie <rpurdie@linux.intel.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 drivers/leds/Kconfig             |   3 +
 include/linux/netfilter/Kbuild   |   1 +
 include/linux/netfilter/xt_LED.h |  13 ++++
 net/netfilter/Kconfig            |  24 ++++++
 net/netfilter/Makefile           |   1 +
 net/netfilter/xt_LED.c           | 161 +++++++++++++++++++++++++++++++++++++++
 6 files changed, 203 insertions(+)
 create mode 100644 include/linux/netfilter/xt_LED.h
 create mode 100644 net/netfilter/xt_LED.c

(limited to 'include/linux')

diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig
index 742713611bc5..556aeca0d860 100644
--- a/drivers/leds/Kconfig
+++ b/drivers/leds/Kconfig
@@ -223,4 +223,7 @@ config LEDS_TRIGGER_DEFAULT_ON
 	  This allows LEDs to be initialised in the ON state.
 	  If unsure, say Y.
 
+comment "iptables trigger is under Netfilter config (LED target)"
+	depends on LEDS_TRIGGERS
+
 endif # NEW_LEDS
diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild
index 5a8af875bce2..deeaee5c83f2 100644
--- a/include/linux/netfilter/Kbuild
+++ b/include/linux/netfilter/Kbuild
@@ -7,6 +7,7 @@ header-y += xt_CLASSIFY.h
 header-y += xt_CONNMARK.h
 header-y += xt_CONNSECMARK.h
 header-y += xt_DSCP.h
+header-y += xt_LED.h
 header-y += xt_MARK.h
 header-y += xt_NFLOG.h
 header-y += xt_NFQUEUE.h
diff --git a/include/linux/netfilter/xt_LED.h b/include/linux/netfilter/xt_LED.h
new file mode 100644
index 000000000000..4c91a0d770d0
--- /dev/null
+++ b/include/linux/netfilter/xt_LED.h
@@ -0,0 +1,13 @@
+#ifndef _XT_LED_H
+#define _XT_LED_H
+
+struct xt_led_info {
+	char id[27];        /* Unique ID for this trigger in the LED class */
+	__u8 always_blink;  /* Blink even if the LED is already on */
+	__u32 delay;        /* Delay until LED is switched off after trigger */
+
+	/* Kernel data used in the module */
+	void *internal_data __attribute__((aligned(8)));
+};
+
+#endif /* _XT_LED_H */
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 0eb98b4fbf44..cdbaaff6d0d6 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -372,6 +372,30 @@ config NETFILTER_XT_TARGET_HL
 	since you can easily create immortal packets that loop
 	forever on the network.
 
+config NETFILTER_XT_TARGET_LED
+	tristate '"LED" target support'
+	depends on LEDS_CLASS
+	depends on NETFILTER_ADVANCED
+	help
+	  This option adds a `LED' target, which allows you to blink LEDs in
+	  response to particular packets passing through your machine.
+
+	  This can be used to turn a spare LED into a network activity LED,
+	  which only flashes in response to FTP transfers, for example.  Or
+	  you could have an LED which lights up for a minute or two every time
+	  somebody connects to your machine via SSH.
+
+	  You will need support for the "led" class to make this work.
+
+	  To create an LED trigger for incoming SSH traffic:
+	    iptables -A INPUT -p tcp --dport 22 -j LED --led-trigger-id ssh --led-delay 1000
+
+	  Then attach the new trigger to an LED on your system:
+	    echo netfilter-ssh > /sys/class/leds/<ledname>/trigger
+
+	  For more information on the LEDs available on your system, see
+	  Documentation/leds-class.txt
+
 config NETFILTER_XT_TARGET_MARK
 	tristate '"MARK" target support'
 	default m if NETFILTER_ADVANCED=n
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index da73ed25701c..7a9b8397573a 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -46,6 +46,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_CONNMARK) += xt_CONNMARK.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_HL) += xt_HL.o
+obj-$(CONFIG_NETFILTER_XT_TARGET_LED) += xt_LED.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_MARK) += xt_MARK.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_NFLOG) += xt_NFLOG.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_NFQUEUE) += xt_NFQUEUE.o
diff --git a/net/netfilter/xt_LED.c b/net/netfilter/xt_LED.c
new file mode 100644
index 000000000000..8ff7843bb921
--- /dev/null
+++ b/net/netfilter/xt_LED.c
@@ -0,0 +1,161 @@
+/*
+ * xt_LED.c - netfilter target to make LEDs blink upon packet matches
+ *
+ * Copyright (C) 2008 Adam Nielsen <a.nielsen@shikadi.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301 USA.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/leds.h>
+#include <linux/mutex.h>
+
+#include <linux/netfilter/xt_LED.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Adam Nielsen <a.nielsen@shikadi.net>");
+MODULE_DESCRIPTION("Xtables: trigger LED devices on packet match");
+
+/*
+ * This is declared in here (the kernel module) only, to avoid having these
+ * dependencies in userspace code.  This is what xt_led_info.internal_data
+ * points to.
+ */
+struct xt_led_info_internal {
+	struct led_trigger netfilter_led_trigger;
+	struct timer_list timer;
+};
+
+static unsigned int
+led_tg(struct sk_buff *skb, const struct xt_target_param *par)
+{
+	const struct xt_led_info *ledinfo = par->targinfo;
+	struct xt_led_info_internal *ledinternal = ledinfo->internal_data;
+
+	/*
+	 * If "always blink" is enabled, and there's still some time until the
+	 * LED will switch off, briefly switch it off now.
+	 */
+	if ((ledinfo->delay > 0) && ledinfo->always_blink &&
+	    timer_pending(&ledinternal->timer))
+		led_trigger_event(&ledinternal->netfilter_led_trigger,LED_OFF);
+
+	led_trigger_event(&ledinternal->netfilter_led_trigger, LED_FULL);
+
+	/* If there's a positive delay, start/update the timer */
+	if (ledinfo->delay > 0) {
+		mod_timer(&ledinternal->timer,
+			  jiffies + msecs_to_jiffies(ledinfo->delay));
+
+	/* Otherwise if there was no delay given, blink as fast as possible */
+	} else if (ledinfo->delay == 0) {
+		led_trigger_event(&ledinternal->netfilter_led_trigger, LED_OFF);
+	}
+
+	/* else the delay is negative, which means switch on and stay on */
+
+	return XT_CONTINUE;
+}
+
+static void led_timeout_callback(unsigned long data)
+{
+	struct xt_led_info *ledinfo = (struct xt_led_info *)data;
+	struct xt_led_info_internal *ledinternal = ledinfo->internal_data;
+
+	led_trigger_event(&ledinternal->netfilter_led_trigger, LED_OFF);
+}
+
+static bool led_tg_check(const struct xt_tgchk_param *par)
+{
+	struct xt_led_info *ledinfo = par->targinfo;
+	struct xt_led_info_internal *ledinternal;
+	int err;
+
+	if (ledinfo->id[0] == '\0') {
+		printk(KERN_ERR KBUILD_MODNAME ": No 'id' parameter given.\n");
+		return false;
+	}
+
+	ledinternal = kzalloc(sizeof(struct xt_led_info_internal), GFP_KERNEL);
+	if (!ledinternal) {
+		printk(KERN_CRIT KBUILD_MODNAME ": out of memory\n");
+		return false;
+	}
+
+	ledinternal->netfilter_led_trigger.name = ledinfo->id;
+
+	err = led_trigger_register(&ledinternal->netfilter_led_trigger);
+	if (err) {
+		printk(KERN_CRIT KBUILD_MODNAME
+			": led_trigger_register() failed\n");
+		if (err == -EEXIST)
+			printk(KERN_ERR KBUILD_MODNAME
+				": Trigger name is already in use.\n");
+		goto exit_alloc;
+	}
+
+	/* See if we need to set up a timer */
+	if (ledinfo->delay > 0)
+		setup_timer(&ledinternal->timer, led_timeout_callback,
+			    (unsigned long)ledinfo);
+
+	ledinfo->internal_data = ledinternal;
+
+	return true;
+
+exit_alloc:
+	kfree(ledinternal);
+
+	return false;
+}
+
+static void led_tg_destroy(const struct xt_tgdtor_param *par)
+{
+	const struct xt_led_info *ledinfo = par->targinfo;
+	struct xt_led_info_internal *ledinternal = ledinfo->internal_data;
+
+	if (ledinfo->delay > 0)
+		del_timer_sync(&ledinternal->timer);
+
+	led_trigger_unregister(&ledinternal->netfilter_led_trigger);
+	kfree(ledinternal);
+}
+
+static struct xt_target led_tg_reg __read_mostly = {
+	.name		= "LED",
+	.revision	= 0,
+	.family		= NFPROTO_UNSPEC,
+	.target		= led_tg,
+	.targetsize	= XT_ALIGN(sizeof(struct xt_led_info)),
+	.checkentry	= led_tg_check,
+	.destroy	= led_tg_destroy,
+	.me		= THIS_MODULE,
+};
+
+static int __init led_tg_init(void)
+{
+	return xt_register_target(&led_tg_reg);
+}
+
+static void __exit led_tg_exit(void)
+{
+	xt_unregister_target(&led_tg_reg);
+}
+
+module_init(led_tg_init);
+module_exit(led_tg_exit);
-- 
cgit v1.2.3


From ffadd4d0feb5376c82dc3a4104731b7ce2794edc Mon Sep 17 00:00:00 2001
From: Christoph Lameter <cl@linux-foundation.org>
Date: Tue, 17 Feb 2009 12:05:07 -0500
Subject: SLUB: Introduce and use SLUB_MAX_SIZE and SLUB_PAGE_SHIFT constants

As a preparational patch to bump up page allocator pass-through threshold,
introduce two new constants SLUB_MAX_SIZE and SLUB_PAGE_SHIFT and convert
mm/slub.c to use them.

Reported-by: "Zhang, Yanmin" <yanmin_zhang@linux.intel.com>
Tested-by: "Zhang, Yanmin" <yanmin_zhang@linux.intel.com>
Signed-off-by: Christoph Lameter <cl@linux-foundation.org>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
---
 include/linux/slub_def.h | 19 ++++++++++++++++---
 mm/slub.c                | 16 ++++++++--------
 2 files changed, 24 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 2f5c16b1aacd..986e09dcfd8f 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -120,11 +120,24 @@ struct kmem_cache {
 
 #define KMALLOC_SHIFT_LOW ilog2(KMALLOC_MIN_SIZE)
 
+/*
+ * Maximum kmalloc object size handled by SLUB. Larger object allocations
+ * are passed through to the page allocator. The page allocator "fastpath"
+ * is relatively slow so we need this value sufficiently high so that
+ * performance critical objects are allocated through the SLUB fastpath.
+ *
+ * This should be dropped to PAGE_SIZE / 2 once the page allocator
+ * "fastpath" becomes competitive with the slab allocator fastpaths.
+ */
+#define SLUB_MAX_SIZE (PAGE_SIZE)
+
+#define SLUB_PAGE_SHIFT (PAGE_SHIFT + 1)
+
 /*
  * We keep the general caches in an array of slab caches that are used for
  * 2^x bytes of allocations.
  */
-extern struct kmem_cache kmalloc_caches[PAGE_SHIFT + 1];
+extern struct kmem_cache kmalloc_caches[SLUB_PAGE_SHIFT];
 
 /*
  * Sorry that the following has to be that ugly but some versions of GCC
@@ -212,7 +225,7 @@ static __always_inline void *kmalloc_large(size_t size, gfp_t flags)
 static __always_inline void *kmalloc(size_t size, gfp_t flags)
 {
 	if (__builtin_constant_p(size)) {
-		if (size > PAGE_SIZE)
+		if (size > SLUB_MAX_SIZE)
 			return kmalloc_large(size, flags);
 
 		if (!(flags & SLUB_DMA)) {
@@ -234,7 +247,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node);
 static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
 {
 	if (__builtin_constant_p(size) &&
-		size <= PAGE_SIZE && !(flags & SLUB_DMA)) {
+		size <= SLUB_MAX_SIZE && !(flags & SLUB_DMA)) {
 			struct kmem_cache *s = kmalloc_slab(size);
 
 		if (!s)
diff --git a/mm/slub.c b/mm/slub.c
index bdc9abb08a23..5a5e7f5bf799 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2475,7 +2475,7 @@ EXPORT_SYMBOL(kmem_cache_destroy);
  *		Kmalloc subsystem
  *******************************************************************/
 
-struct kmem_cache kmalloc_caches[PAGE_SHIFT + 1] __cacheline_aligned;
+struct kmem_cache kmalloc_caches[SLUB_PAGE_SHIFT] __cacheline_aligned;
 EXPORT_SYMBOL(kmalloc_caches);
 
 static int __init setup_slub_min_order(char *str)
@@ -2537,7 +2537,7 @@ panic:
 }
 
 #ifdef CONFIG_ZONE_DMA
-static struct kmem_cache *kmalloc_caches_dma[PAGE_SHIFT + 1];
+static struct kmem_cache *kmalloc_caches_dma[SLUB_PAGE_SHIFT];
 
 static void sysfs_add_func(struct work_struct *w)
 {
@@ -2658,7 +2658,7 @@ void *__kmalloc(size_t size, gfp_t flags)
 {
 	struct kmem_cache *s;
 
-	if (unlikely(size > PAGE_SIZE))
+	if (unlikely(size > SLUB_MAX_SIZE))
 		return kmalloc_large(size, flags);
 
 	s = get_slab(size, flags);
@@ -2686,7 +2686,7 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node)
 {
 	struct kmem_cache *s;
 
-	if (unlikely(size > PAGE_SIZE))
+	if (unlikely(size > SLUB_MAX_SIZE))
 		return kmalloc_large_node(size, flags, node);
 
 	s = get_slab(size, flags);
@@ -2985,7 +2985,7 @@ void __init kmem_cache_init(void)
 		caches++;
 	}
 
-	for (i = KMALLOC_SHIFT_LOW; i <= PAGE_SHIFT; i++) {
+	for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) {
 		create_kmalloc_cache(&kmalloc_caches[i],
 			"kmalloc", 1 << i, GFP_KERNEL);
 		caches++;
@@ -3022,7 +3022,7 @@ void __init kmem_cache_init(void)
 	slab_state = UP;
 
 	/* Provide the correct kmalloc names now that the caches are up */
-	for (i = KMALLOC_SHIFT_LOW; i <= PAGE_SHIFT; i++)
+	for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++)
 		kmalloc_caches[i]. name =
 			kasprintf(GFP_KERNEL, "kmalloc-%d", 1 << i);
 
@@ -3222,7 +3222,7 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
 {
 	struct kmem_cache *s;
 
-	if (unlikely(size > PAGE_SIZE))
+	if (unlikely(size > SLUB_MAX_SIZE))
 		return kmalloc_large(size, gfpflags);
 
 	s = get_slab(size, gfpflags);
@@ -3238,7 +3238,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
 {
 	struct kmem_cache *s;
 
-	if (unlikely(size > PAGE_SIZE))
+	if (unlikely(size > SLUB_MAX_SIZE))
 		return kmalloc_large_node(size, gfpflags, node);
 
 	s = get_slab(size, gfpflags);
-- 
cgit v1.2.3


From 51735a7ca67531267a27b57e5fe20f7815192f9c Mon Sep 17 00:00:00 2001
From: Pekka Enberg <penberg@cs.helsinki.fi>
Date: Fri, 20 Feb 2009 12:21:33 +0200
Subject: SLUB: Do not pass 8k objects through to the page allocator

Increase the maximum object size in SLUB so that 8k objects are not
passed through to the page allocator anymore. The network stack uses 8k
objects for performance critical operations.

The patch is motivated by a SLAB vs. SLUB regression in the netperf
benchmark. The problem is that the kfree(skb->head) call in
skb_release_data() that is subject to page allocator pass-through as the
size passed to __alloc_skb() is larger than 4 KB in this test.

As explained by Yanmin Zhang:

  I use 2.6.29-rc2 kernel to run netperf UDP-U-4k CPU_NUM client/server
  pair loopback testing on x86-64 machines. Comparing with SLUB, SLAB's
  result is about 2.3 times of SLUB's. After applying the reverting patch,
  the result difference between SLUB and SLAB becomes 1% which we might
  consider as fluctuation.

[ penberg@cs.helsinki.fi: fix oops in kmalloc() ]
Reported-by: "Zhang, Yanmin" <yanmin_zhang@linux.intel.com>
Tested-by: "Zhang, Yanmin" <yanmin_zhang@linux.intel.com>
Signed-off-by: Christoph Lameter <cl@linux-foundation.org>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
---
 include/linux/slub_def.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 986e09dcfd8f..e217a7a68ea7 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -129,9 +129,9 @@ struct kmem_cache {
  * This should be dropped to PAGE_SIZE / 2 once the page allocator
  * "fastpath" becomes competitive with the slab allocator fastpaths.
  */
-#define SLUB_MAX_SIZE (PAGE_SIZE)
+#define SLUB_MAX_SIZE (2 * PAGE_SIZE)
 
-#define SLUB_PAGE_SHIFT (PAGE_SHIFT + 1)
+#define SLUB_PAGE_SHIFT (PAGE_SHIFT + 2)
 
 /*
  * We keep the general caches in an array of slab caches that are used for
-- 
cgit v1.2.3


From 3b89d7d881a1dbb4da158f7eb5d6b3ceefc72810 Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Sun, 22 Feb 2009 17:40:07 -0800
Subject: slub: move min_partial to struct kmem_cache

Although it allows for better cacheline use, it is unnecessary to save a
copy of the cache's min_partial value in each kmem_cache_node.

Cc: Christoph Lameter <cl@linux-foundation.org>
Signed-off-by: David Rientjes <rientjes@google.com>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
---
 include/linux/slub_def.h |  2 +-
 mm/slub.c                | 29 ++++++++++++++++-------------
 2 files changed, 17 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 2f5c16b1aacd..f20a89e4d52c 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -46,7 +46,6 @@ struct kmem_cache_cpu {
 struct kmem_cache_node {
 	spinlock_t list_lock;	/* Protect partial list and nr_partial */
 	unsigned long nr_partial;
-	unsigned long min_partial;
 	struct list_head partial;
 #ifdef CONFIG_SLUB_DEBUG
 	atomic_long_t nr_slabs;
@@ -89,6 +88,7 @@ struct kmem_cache {
 	void (*ctor)(void *);
 	int inuse;		/* Offset to metadata */
 	int align;		/* Alignment */
+	unsigned long min_partial;
 	const char *name;	/* Name (only for display!) */
 	struct list_head list;	/* List of slab caches */
 #ifdef CONFIG_SLUB_DEBUG
diff --git a/mm/slub.c b/mm/slub.c
index bdc9abb08a23..4fff385b17a3 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1335,7 +1335,7 @@ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags)
 		n = get_node(s, zone_to_nid(zone));
 
 		if (n && cpuset_zone_allowed_hardwall(zone, flags) &&
-				n->nr_partial > n->min_partial) {
+				n->nr_partial > s->min_partial) {
 			page = get_partial_node(n);
 			if (page)
 				return page;
@@ -1387,7 +1387,7 @@ static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail)
 		slab_unlock(page);
 	} else {
 		stat(c, DEACTIVATE_EMPTY);
-		if (n->nr_partial < n->min_partial) {
+		if (n->nr_partial < s->min_partial) {
 			/*
 			 * Adding an empty slab to the partial slabs in order
 			 * to avoid page allocator overhead. This slab needs
@@ -1928,17 +1928,6 @@ static void
 init_kmem_cache_node(struct kmem_cache_node *n, struct kmem_cache *s)
 {
 	n->nr_partial = 0;
-
-	/*
-	 * The larger the object size is, the more pages we want on the partial
-	 * list to avoid pounding the page allocator excessively.
-	 */
-	n->min_partial = ilog2(s->size);
-	if (n->min_partial < MIN_PARTIAL)
-		n->min_partial = MIN_PARTIAL;
-	else if (n->min_partial > MAX_PARTIAL)
-		n->min_partial = MAX_PARTIAL;
-
 	spin_lock_init(&n->list_lock);
 	INIT_LIST_HEAD(&n->partial);
 #ifdef CONFIG_SLUB_DEBUG
@@ -2181,6 +2170,15 @@ static int init_kmem_cache_nodes(struct kmem_cache *s, gfp_t gfpflags)
 }
 #endif
 
+static void calculate_min_partial(struct kmem_cache *s, unsigned long min)
+{
+	if (min < MIN_PARTIAL)
+		min = MIN_PARTIAL;
+	else if (min > MAX_PARTIAL)
+		min = MAX_PARTIAL;
+	s->min_partial = min;
+}
+
 /*
  * calculate_sizes() determines the order and the distribution of data within
  * a slab object.
@@ -2319,6 +2317,11 @@ static int kmem_cache_open(struct kmem_cache *s, gfp_t gfpflags,
 	if (!calculate_sizes(s, -1))
 		goto error;
 
+	/*
+	 * The larger the object size is, the more pages we want on the partial
+	 * list to avoid pounding the page allocator excessively.
+	 */
+	calculate_min_partial(s, ilog2(s->size));
 	s->refcount = 1;
 #ifdef CONFIG_NUMA
 	s->remote_node_defrag_ratio = 1000;
-- 
cgit v1.2.3


From d060ffc1840e37100628f520e66600c5ae483b44 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Tue, 24 Feb 2009 15:23:58 +0100
Subject: netfilter: install missing headers

iptables imports headers from (the unifdefed headers of a)
kernel tree, but some headers happened to not be installed.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/Kbuild      | 6 ++++++
 include/linux/netfilter_ipv6/Kbuild | 1 +
 2 files changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild
index deeaee5c83f2..947b47d7f6c0 100644
--- a/include/linux/netfilter/Kbuild
+++ b/include/linux/netfilter/Kbuild
@@ -14,8 +14,11 @@ header-y += xt_NFQUEUE.h
 header-y += xt_RATEEST.h
 header-y += xt_SECMARK.h
 header-y += xt_TCPMSS.h
+header-y += xt_TCPOPTSTRIP.h
+header-y += xt_TPROXY.h
 header-y += xt_comment.h
 header-y += xt_connbytes.h
+header-y += xt_connlimit.h
 header-y += xt_connmark.h
 header-y += xt_conntrack.h
 header-y += xt_dccp.h
@@ -31,6 +34,7 @@ header-y += xt_mark.h
 header-y += xt_multiport.h
 header-y += xt_owner.h
 header-y += xt_pkttype.h
+header-y += xt_quota.h
 header-y += xt_rateest.h
 header-y += xt_realm.h
 header-y += xt_recent.h
@@ -40,6 +44,8 @@ header-y += xt_statistic.h
 header-y += xt_string.h
 header-y += xt_tcpmss.h
 header-y += xt_tcpudp.h
+header-y += xt_time.h
+header-y += xt_u32.h
 
 unifdef-y += nf_conntrack_common.h
 unifdef-y += nf_conntrack_ftp.h
diff --git a/include/linux/netfilter_ipv6/Kbuild b/include/linux/netfilter_ipv6/Kbuild
index 8887a5fcd1d0..aca4bd1f6d7c 100644
--- a/include/linux/netfilter_ipv6/Kbuild
+++ b/include/linux/netfilter_ipv6/Kbuild
@@ -11,6 +11,7 @@ header-y += ip6t_length.h
 header-y += ip6t_limit.h
 header-y += ip6t_mac.h
 header-y += ip6t_mark.h
+header-y += ip6t_mh.h
 header-y += ip6t_multiport.h
 header-y += ip6t_opts.h
 header-y += ip6t_owner.h
-- 
cgit v1.2.3


From 1ce85fe402137824246bad03ff85f3913d565c17 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 24 Feb 2009 23:18:28 -0800
Subject: netlink: change nlmsg_notify() return value logic

This patch changes the return value of nlmsg_notify() as follows:

If NETLINK_BROADCAST_ERROR is set by any of the listeners and
an error in the delivery happened, return the broadcast error;
else if there are no listeners apart from the socket that
requested a change with the echo flag, return the result of the
unicast notification. Thus, with this patch, the unicast
notification is handled in the same way of a broadcast listener
that has set the NETLINK_BROADCAST_ERROR socket flag.

This patch is useful in case that the caller of nlmsg_notify()
wants to know the result of the delivery of a netlink notification
(including the broadcast delivery) and take any action in case
that the delivery failed. For example, ctnetlink can drop packets
if the event delivery failed to provide reliable logging and
state-synchronization at the cost of dropping packets.

This patch also modifies the rtnetlink code to ignore the return
value of rtnl_notify() in all callers. The function rtnl_notify()
(before this patch) returned the error of the unicast notification
which makes rtnl_set_sk_err() reports errors to all listeners. This
is not of any help since the origin of the change (the socket that
requested the echoing) notices the ENOBUFS error if the notification
fails and should resync itself.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Acked-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rtnetlink.h |  4 ++--
 net/bridge/br_netlink.c   |  3 ++-
 net/core/fib_rules.c      |  3 ++-
 net/core/neighbour.c      |  3 ++-
 net/core/rtnetlink.c      |  9 +++++----
 net/decnet/dn_dev.c       |  3 ++-
 net/decnet/dn_table.c     |  3 ++-
 net/ipv4/devinet.c        |  3 ++-
 net/ipv4/fib_semantics.c  |  5 +++--
 net/ipv6/addrconf.c       |  9 ++++++---
 net/ipv6/ndisc.c          |  6 +-----
 net/ipv6/route.c          |  5 +++--
 net/netlink/af_netlink.c  | 14 ++++++++++----
 net/phonet/pn_netlink.c   |  5 +++--
 14 files changed, 45 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 1e5f6730ff31..35a07c830f79 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -622,8 +622,8 @@ static __inline__ int rtattr_strcmp(const struct rtattr *rta, const char *str)
 
 extern int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, u32 group, int echo);
 extern int rtnl_unicast(struct sk_buff *skb, struct net *net, u32 pid);
-extern int rtnl_notify(struct sk_buff *skb, struct net *net, u32 pid, u32 group,
-		       struct nlmsghdr *nlh, gfp_t flags);
+extern void rtnl_notify(struct sk_buff *skb, struct net *net, u32 pid,
+			u32 group, struct nlmsghdr *nlh, gfp_t flags);
 extern void rtnl_set_sk_err(struct net *net, u32 group, int error);
 extern int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics);
 extern int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst,
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index ba7be195803c..fcffb3fb1177 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -98,7 +98,8 @@ void br_ifinfo_notify(int event, struct net_bridge_port *port)
 		kfree_skb(skb);
 		goto errout;
 	}
-	err = rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC);
+	rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC);
+	return;
 errout:
 	if (err < 0)
 		rtnl_set_sk_err(net, RTNLGRP_LINK, err);
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 32b3a0152d7a..98691e1466b8 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -588,7 +588,8 @@ static void notify_rule_change(int event, struct fib_rule *rule,
 		goto errout;
 	}
 
-	err = rtnl_notify(skb, net, pid, ops->nlgroup, nlh, GFP_KERNEL);
+	rtnl_notify(skb, net, pid, ops->nlgroup, nlh, GFP_KERNEL);
+	return;
 errout:
 	if (err < 0)
 		rtnl_set_sk_err(net, ops->nlgroup, err);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 278a142d1047..e1144cb94b99 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -2534,7 +2534,8 @@ static void __neigh_notify(struct neighbour *n, int type, int flags)
 		kfree_skb(skb);
 		goto errout;
 	}
-	err = rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
+	rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
+	return;
 errout:
 	if (err < 0)
 		rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 790dd205bb5d..d78030f88bd0 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -455,8 +455,8 @@ int rtnl_unicast(struct sk_buff *skb, struct net *net, u32 pid)
 	return nlmsg_unicast(rtnl, skb, pid);
 }
 
-int rtnl_notify(struct sk_buff *skb, struct net *net, u32 pid, u32 group,
-		struct nlmsghdr *nlh, gfp_t flags)
+void rtnl_notify(struct sk_buff *skb, struct net *net, u32 pid, u32 group,
+		 struct nlmsghdr *nlh, gfp_t flags)
 {
 	struct sock *rtnl = net->rtnl;
 	int report = 0;
@@ -464,7 +464,7 @@ int rtnl_notify(struct sk_buff *skb, struct net *net, u32 pid, u32 group,
 	if (nlh)
 		report = nlmsg_report(nlh);
 
-	return nlmsg_notify(rtnl, skb, pid, group, report, flags);
+	nlmsg_notify(rtnl, skb, pid, group, report, flags);
 }
 
 void rtnl_set_sk_err(struct net *net, u32 group, int error)
@@ -1246,7 +1246,8 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change)
 		kfree_skb(skb);
 		goto errout;
 	}
-	err = rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_KERNEL);
+	rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_KERNEL);
+	return;
 errout:
 	if (err < 0)
 		rtnl_set_sk_err(net, RTNLGRP_LINK, err);
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index daf2b98b15fe..e457769bf7a7 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -769,7 +769,8 @@ static void dn_ifaddr_notify(int event, struct dn_ifaddr *ifa)
 		kfree_skb(skb);
 		goto errout;
 	}
-	err = rtnl_notify(skb, &init_net, 0, RTNLGRP_DECnet_IFADDR, NULL, GFP_KERNEL);
+	rtnl_notify(skb, &init_net, 0, RTNLGRP_DECnet_IFADDR, NULL, GFP_KERNEL);
+	return;
 errout:
 	if (err < 0)
 		rtnl_set_sk_err(&init_net, RTNLGRP_DECnet_IFADDR, err);
diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c
index 69ad9280c693..67054b0d550f 100644
--- a/net/decnet/dn_table.c
+++ b/net/decnet/dn_table.c
@@ -375,7 +375,8 @@ static void dn_rtmsg_fib(int event, struct dn_fib_node *f, int z, u32 tb_id,
 		kfree_skb(skb);
 		goto errout;
 	}
-	err = rtnl_notify(skb, &init_net, pid, RTNLGRP_DECnet_ROUTE, nlh, GFP_KERNEL);
+	rtnl_notify(skb, &init_net, pid, RTNLGRP_DECnet_ROUTE, nlh, GFP_KERNEL);
+	return;
 errout:
 	if (err < 0)
 		rtnl_set_sk_err(&init_net, RTNLGRP_DECnet_ROUTE, err);
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index d519a6a66726..126bb911880f 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1216,7 +1216,8 @@ static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
 		kfree_skb(skb);
 		goto errout;
 	}
-	err = rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
+	rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
+	return;
 errout:
 	if (err < 0)
 		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 4817dea3bc73..f831df500907 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -322,8 +322,9 @@ void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
 		kfree_skb(skb);
 		goto errout;
 	}
-	err = rtnl_notify(skb, info->nl_net, info->pid, RTNLGRP_IPV4_ROUTE,
-			  info->nlh, GFP_KERNEL);
+	rtnl_notify(skb, info->nl_net, info->pid, RTNLGRP_IPV4_ROUTE,
+		    info->nlh, GFP_KERNEL);
+	return;
 errout:
 	if (err < 0)
 		rtnl_set_sk_err(info->nl_net, RTNLGRP_IPV4_ROUTE, err);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 03e2a1ad71e9..f8f76d6e21cb 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3638,7 +3638,8 @@ static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa)
 		kfree_skb(skb);
 		goto errout;
 	}
-	err = rtnl_notify(skb, net, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC);
+	rtnl_notify(skb, net, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC);
+	return;
 errout:
 	if (err < 0)
 		rtnl_set_sk_err(net, RTNLGRP_IPV6_IFADDR, err);
@@ -3849,7 +3850,8 @@ void inet6_ifinfo_notify(int event, struct inet6_dev *idev)
 		kfree_skb(skb);
 		goto errout;
 	}
-	err = rtnl_notify(skb, net, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC);
+	rtnl_notify(skb, net, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC);
+	return;
 errout:
 	if (err < 0)
 		rtnl_set_sk_err(net, RTNLGRP_IPV6_IFADDR, err);
@@ -3919,7 +3921,8 @@ static void inet6_prefix_notify(int event, struct inet6_dev *idev,
 		kfree_skb(skb);
 		goto errout;
 	}
-	err = rtnl_notify(skb, net, 0, RTNLGRP_IPV6_PREFIX, NULL, GFP_ATOMIC);
+	rtnl_notify(skb, net, 0, RTNLGRP_IPV6_PREFIX, NULL, GFP_ATOMIC);
+	return;
 errout:
 	if (err < 0)
 		rtnl_set_sk_err(net, RTNLGRP_IPV6_PREFIX, err);
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 3cd83b85e9ef..9f061d1adbc2 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1095,11 +1095,7 @@ static void ndisc_ra_useropt(struct sk_buff *ra, struct nd_opt_hdr *opt)
 		&ipv6_hdr(ra)->saddr);
 	nlmsg_end(skb, nlh);
 
-	err = rtnl_notify(skb, net, 0, RTNLGRP_ND_USEROPT, NULL,
-			  GFP_ATOMIC);
-	if (err < 0)
-		goto errout;
-
+	rtnl_notify(skb, net, 0, RTNLGRP_ND_USEROPT, NULL, GFP_ATOMIC);
 	return;
 
 nla_put_failure:
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index c3d486a3edad..1394ddb6e35c 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2400,8 +2400,9 @@ void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
 		kfree_skb(skb);
 		goto errout;
 	}
-	err = rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
-			  info->nlh, gfp_any());
+	rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
+		    info->nlh, gfp_any());
+	return;
 errout:
 	if (err < 0)
 		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index ed587be1e1c2..2760b62dc2c1 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1760,12 +1760,18 @@ int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 pid,
 			exclude_pid = pid;
 		}
 
-		/* errors reported via destination sk->sk_err */
-		nlmsg_multicast(sk, skb, exclude_pid, group, flags);
+		/* errors reported via destination sk->sk_err, but propagate
+		 * delivery errors if NETLINK_BROADCAST_ERROR flag is set */
+		err = nlmsg_multicast(sk, skb, exclude_pid, group, flags);
 	}
 
-	if (report)
-		err = nlmsg_unicast(sk, skb, pid);
+	if (report) {
+		int err2;
+
+		err2 = nlmsg_unicast(sk, skb, pid);
+		if (!err || err == -ESRCH)
+			err = err2;
+	}
 
 	return err;
 }
diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c
index 1ceea1f92413..cec4e5951681 100644
--- a/net/phonet/pn_netlink.c
+++ b/net/phonet/pn_netlink.c
@@ -47,8 +47,9 @@ static void rtmsg_notify(int event, struct net_device *dev, u8 addr)
 		kfree_skb(skb);
 		goto errout;
 	}
-	err = rtnl_notify(skb, dev_net(dev), 0,
-			  RTNLGRP_PHONET_IFADDR, NULL, GFP_KERNEL);
+	rtnl_notify(skb, dev_net(dev), 0,
+		    RTNLGRP_PHONET_IFADDR, NULL, GFP_KERNEL);
+	return;
 errout:
 	if (err < 0)
 		rtnl_set_sk_err(dev_net(dev), RTNLGRP_PHONET_IFADDR, err);
-- 
cgit v1.2.3


From 2b9d1496e7835a603c340e8f0dd81f4b74d5f248 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sun, 22 Feb 2009 15:48:43 +0100
Subject: time: ntp: make 64-bit constants more robust

Impact: cleanup, no functionality changed

 - make PPM_SCALE an explicit s64 constant, to
   remove (s64) casts from usage sites.

kernel/time/ntp.o:

   text	   data	    bss	    dec	    hex	filename
   2536	    114	    136	   2786	    ae2	ntp.o.before
   2536	    114	    136	   2786	    ae2	ntp.o.after

md5:
   40a7728d1188aa18e83e21a81fa7b150  ntp.o.before.asm
   40a7728d1188aa18e83e21a81fa7b150  ntp.o.after.asm

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/timex.h | 2 +-
 kernel/time/ntp.c     | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/timex.h b/include/linux/timex.h
index 998a55d80acf..aa3475fcff64 100644
--- a/include/linux/timex.h
+++ b/include/linux/timex.h
@@ -190,7 +190,7 @@ struct timex {
  * offset and maximum frequency tolerance.
  */
 #define SHIFT_USEC 16		/* frequency offset scale (shift) */
-#define PPM_SCALE (NSEC_PER_USEC << (NTP_SCALE_SHIFT - SHIFT_USEC))
+#define PPM_SCALE ((s64)NSEC_PER_USEC << (NTP_SCALE_SHIFT - SHIFT_USEC))
 #define PPM_SCALE_INV_SHIFT 19
 #define PPM_SCALE_INV ((1ll << (PPM_SCALE_INV_SHIFT + NTP_SCALE_SHIFT)) / \
 		       PPM_SCALE + 1)
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 4346ed6e623f..7447d57e021a 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -408,7 +408,7 @@ static inline void process_adjtimex_modes(struct timex *txc, struct timespec *ts
 		time_status &= ~STA_NANO;
 
 	if (txc->modes & ADJ_FREQUENCY) {
-		time_freq = (s64)txc->freq * PPM_SCALE;
+		time_freq = txc->freq * PPM_SCALE;
 		time_freq = min(time_freq, MAXFREQ_SCALED);
 		time_freq = max(time_freq, -MAXFREQ_SCALED);
 	}
@@ -505,7 +505,7 @@ int do_adjtimex(struct timex *txc)
 		result = TIME_ERROR;
 
 	txc->freq	   = shift_right((time_freq >> PPM_SCALE_INV_SHIFT) *
-					 (s64)PPM_SCALE_INV, NTP_SCALE_SHIFT);
+					 PPM_SCALE_INV, NTP_SCALE_SHIFT);
 	txc->maxerror	   = time_maxerror;
 	txc->esterror	   = time_esterror;
 	txc->status	   = time_status;
-- 
cgit v1.2.3


From b342501cd31e5546d0c9ca8ceff5ded1832f9e5b Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 26 Feb 2009 20:20:29 +0100
Subject: sched: allow architectures to specify sched_clock_stable

Allow CONFIG_HAVE_UNSTABLE_SCHED_CLOCK architectures to still specify
that their sched_clock() implementation is reliable.

This will be used by x86 to switch on a faster sched_clock_cpu()
implementation on certain CPU types.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 10 ++++++++++
 kernel/sched_clock.c  | 45 ++++++++++++++++++++-------------------------
 2 files changed, 30 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8981e52c714f..a063d19b7a7d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1670,6 +1670,16 @@ static inline int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask)
 	return set_cpus_allowed_ptr(p, &new_mask);
 }
 
+/*
+ * Architectures can set this to 1 if they have specified
+ * CONFIG_HAVE_UNSTABLE_SCHED_CLOCK in their arch Kconfig,
+ * but then during bootup it turns out that sched_clock()
+ * is reliable after all:
+ */
+#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
+extern int sched_clock_stable;
+#endif
+
 extern unsigned long long sched_clock(void);
 
 extern void sched_clock_init(void);
diff --git a/kernel/sched_clock.c b/kernel/sched_clock.c
index a0b0852414cc..a755d023805a 100644
--- a/kernel/sched_clock.c
+++ b/kernel/sched_clock.c
@@ -24,11 +24,11 @@
  * The clock: sched_clock_cpu() is monotonic per cpu, and should be somewhat
  * consistent between cpus (never more than 2 jiffies difference).
  */
-#include <linux/sched.h>
-#include <linux/percpu.h>
 #include <linux/spinlock.h>
-#include <linux/ktime.h>
 #include <linux/module.h>
+#include <linux/percpu.h>
+#include <linux/ktime.h>
+#include <linux/sched.h>
 
 /*
  * Scheduler clock - returns current time in nanosec units.
@@ -43,6 +43,10 @@ unsigned long long __attribute__((weak)) sched_clock(void)
 static __read_mostly int sched_clock_running;
 
 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
+__read_mostly int sched_clock_stable;
+#else
+static const int sched_clock_stable = 1;
+#endif
 
 struct sched_clock_data {
 	/*
@@ -87,7 +91,7 @@ void sched_clock_init(void)
 }
 
 /*
- * min,max except they take wrapping into account
+ * min, max except they take wrapping into account
  */
 
 static inline u64 wrap_min(u64 x, u64 y)
@@ -116,10 +120,13 @@ static u64 __update_sched_clock(struct sched_clock_data *scd, u64 now)
 	if (unlikely(delta < 0))
 		delta = 0;
 
+	if (unlikely(!sched_clock_running))
+		return 0ull;
+
 	/*
 	 * scd->clock = clamp(scd->tick_gtod + delta,
-	 * 		      max(scd->tick_gtod, scd->clock),
-	 * 		      scd->tick_gtod + TICK_NSEC);
+	 *		      max(scd->tick_gtod, scd->clock),
+	 *		      scd->tick_gtod + TICK_NSEC);
 	 */
 
 	clock = scd->tick_gtod + delta;
@@ -148,12 +155,13 @@ static void lock_double_clock(struct sched_clock_data *data1,
 
 u64 sched_clock_cpu(int cpu)
 {
-	struct sched_clock_data *scd = cpu_sdc(cpu);
 	u64 now, clock, this_clock, remote_clock;
+	struct sched_clock_data *scd;
 
-	if (unlikely(!sched_clock_running))
-		return 0ull;
+	if (sched_clock_stable)
+		return sched_clock();
 
+	scd = cpu_sdc(cpu);
 	WARN_ON_ONCE(!irqs_disabled());
 	now = sched_clock();
 
@@ -193,6 +201,8 @@ u64 sched_clock_cpu(int cpu)
 	return clock;
 }
 
+#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
+
 void sched_clock_tick(void)
 {
 	struct sched_clock_data *scd = this_scd();
@@ -235,22 +245,7 @@ void sched_clock_idle_wakeup_event(u64 delta_ns)
 }
 EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);
 
-#else /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
-
-void sched_clock_init(void)
-{
-	sched_clock_running = 1;
-}
-
-u64 sched_clock_cpu(int cpu)
-{
-	if (unlikely(!sched_clock_running))
-		return 0;
-
-	return sched_clock();
-}
-
-#endif
+#endif /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
 
 unsigned long long cpu_clock(int cpu)
 {
-- 
cgit v1.2.3


From 8a7c4c77267b1c77296cd03e6704813cb70706d1 Mon Sep 17 00:00:00 2001
From: Andy Grover <andy.grover@oracle.com>
Date: Thu, 26 Feb 2009 23:41:38 -0800
Subject: RDS: Add AF and PF #defines for RDS sockets

RDS is a reliable datagram protocol used for IPC on Oracle
database clusters. This adds address and protocol family numbers
for it.

Signed-off-by: Andy Grover <andy.grover@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/socket.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/socket.h b/include/linux/socket.h
index 20fc4bbfca42..9c90dc403efc 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -179,6 +179,7 @@ struct ucred {
 #define AF_ASH		18	/* Ash				*/
 #define AF_ECONET	19	/* Acorn Econet			*/
 #define AF_ATMSVC	20	/* ATM SVCs			*/
+#define AF_RDS		21	/* RDS sockets 			*/
 #define AF_SNA		22	/* Linux SNA Project (nutters!) */
 #define AF_IRDA		23	/* IRDA sockets			*/
 #define AF_PPPOX	24	/* PPPoX sockets		*/
@@ -217,6 +218,7 @@ struct ucred {
 #define PF_ASH		AF_ASH
 #define PF_ECONET	AF_ECONET
 #define PF_ATMSVC	AF_ATMSVC
+#define PF_RDS		AF_RDS
 #define PF_SNA		AF_SNA
 #define PF_IRDA		AF_IRDA
 #define PF_PPPOX	AF_PPPOX
@@ -298,6 +300,7 @@ struct ucred {
 #define SOL_PPPOL2TP	273
 #define SOL_BLUETOOTH	274
 #define SOL_PNPIPE	275
+#define SOL_RDS		276
 
 /* IPX options */
 #define IPX_TYPE	1
-- 
cgit v1.2.3


From db49b9d26c1966c683efced9e1c37f391d8f8182 Mon Sep 17 00:00:00 2001
From: Andy Grover <andy.grover@oracle.com>
Date: Tue, 24 Feb 2009 15:30:42 +0000
Subject: RDS: Add userspace header

Applications include this header in order to use RDS sockets.

Signed-off-by: Andy Grover <andy.grover@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rds.h | 250 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 250 insertions(+)
 create mode 100644 include/linux/rds.h

(limited to 'include/linux')

diff --git a/include/linux/rds.h b/include/linux/rds.h
new file mode 100644
index 000000000000..d91dc91f5443
--- /dev/null
+++ b/include/linux/rds.h
@@ -0,0 +1,250 @@
+/*
+ * Copyright (c) 2008 Oracle.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef _LINUX_RDS_H
+#define _LINUX_RDS_H
+
+#include <linux/types.h>
+
+/* These sparse annotated types shouldn't be in any user
+ * visible header file. We should clean this up rather
+ * than kludging around them. */
+#ifndef __KERNEL__
+#define __be16	u_int16_t
+#define __be32	u_int32_t
+#define __be64	u_int64_t
+#endif
+
+#define RDS_IB_ABI_VERSION		0x301
+
+/*
+ * setsockopt/getsockopt for SOL_RDS
+ */
+#define RDS_CANCEL_SENT_TO      	1
+#define RDS_GET_MR			2
+#define RDS_FREE_MR			3
+/* deprecated: RDS_BARRIER 4 */
+#define RDS_RECVERR			5
+#define RDS_CONG_MONITOR		6
+
+/*
+ * Control message types for SOL_RDS.
+ *
+ * CMSG_RDMA_ARGS (sendmsg)
+ *	Request a RDMA transfer to/from the specified
+ *	memory ranges.
+ *	The cmsg_data is a struct rds_rdma_args.
+ * RDS_CMSG_RDMA_DEST (recvmsg, sendmsg)
+ *	Kernel informs application about intended
+ *	source/destination of a RDMA transfer
+ * RDS_CMSG_RDMA_MAP (sendmsg)
+ *	Application asks kernel to map the given
+ *	memory range into a IB MR, and send the
+ *	R_Key along in an RDS extension header.
+ *	The cmsg_data is a struct rds_get_mr_args,
+ *	the same as for the GET_MR setsockopt.
+ * RDS_CMSG_RDMA_STATUS (recvmsg)
+ *	Returns the status of a completed RDMA operation.
+ */
+#define RDS_CMSG_RDMA_ARGS		1
+#define RDS_CMSG_RDMA_DEST		2
+#define RDS_CMSG_RDMA_MAP		3
+#define RDS_CMSG_RDMA_STATUS		4
+#define RDS_CMSG_CONG_UPDATE		5
+
+#define RDS_INFO_FIRST			10000
+#define RDS_INFO_COUNTERS		10000
+#define RDS_INFO_CONNECTIONS		10001
+/* 10002 aka RDS_INFO_FLOWS is deprecated */
+#define RDS_INFO_SEND_MESSAGES		10003
+#define RDS_INFO_RETRANS_MESSAGES       10004
+#define RDS_INFO_RECV_MESSAGES          10005
+#define RDS_INFO_SOCKETS                10006
+#define RDS_INFO_TCP_SOCKETS            10007
+#define RDS_INFO_IB_CONNECTIONS		10008
+#define RDS_INFO_CONNECTION_STATS	10009
+#define RDS_INFO_IWARP_CONNECTIONS	10010
+#define RDS_INFO_LAST			10010
+
+struct rds_info_counter {
+	u_int8_t	name[32];
+	u_int64_t	value;
+} __attribute__((packed));
+
+#define RDS_INFO_CONNECTION_FLAG_SENDING	0x01
+#define RDS_INFO_CONNECTION_FLAG_CONNECTING	0x02
+#define RDS_INFO_CONNECTION_FLAG_CONNECTED	0x04
+
+#define TRANSNAMSIZ	16
+
+struct rds_info_connection {
+	u_int64_t	next_tx_seq;
+	u_int64_t	next_rx_seq;
+	__be32		laddr;
+	__be32		faddr;
+	u_int8_t	transport[TRANSNAMSIZ];		/* null term ascii */
+	u_int8_t	flags;
+} __attribute__((packed));
+
+struct rds_info_flow {
+	__be32		laddr;
+	__be32		faddr;
+	u_int32_t	bytes;
+	__be16		lport;
+	__be16		fport;
+} __attribute__((packed));
+
+#define RDS_INFO_MESSAGE_FLAG_ACK               0x01
+#define RDS_INFO_MESSAGE_FLAG_FAST_ACK          0x02
+
+struct rds_info_message {
+	u_int64_t	seq;
+	u_int32_t	len;
+	__be32		laddr;
+	__be32		faddr;
+	__be16		lport;
+	__be16		fport;
+	u_int8_t	flags;
+} __attribute__((packed));
+
+struct rds_info_socket {
+	u_int32_t	sndbuf;
+	__be32		bound_addr;
+	__be32		connected_addr;
+	__be16		bound_port;
+	__be16		connected_port;
+	u_int32_t	rcvbuf;
+	u_int64_t	inum;
+} __attribute__((packed));
+
+#define RDS_IB_GID_LEN	16
+struct rds_info_rdma_connection {
+	__be32		src_addr;
+	__be32		dst_addr;
+	uint8_t		src_gid[RDS_IB_GID_LEN];
+	uint8_t		dst_gid[RDS_IB_GID_LEN];
+
+	uint32_t	max_send_wr;
+	uint32_t	max_recv_wr;
+	uint32_t	max_send_sge;
+	uint32_t	rdma_mr_max;
+	uint32_t	rdma_mr_size;
+};
+
+/*
+ * Congestion monitoring.
+ * Congestion control in RDS happens at the host connection
+ * level by exchanging a bitmap marking congested ports.
+ * By default, a process sleeping in poll() is always woken
+ * up when the congestion map is updated.
+ * With explicit monitoring, an application can have more
+ * fine-grained control.
+ * The application installs a 64bit mask value in the socket,
+ * where each bit corresponds to a group of ports.
+ * When a congestion update arrives, RDS checks the set of
+ * ports that are now uncongested against the list bit mask
+ * installed in the socket, and if they overlap, we queue a
+ * cong_notification on the socket.
+ *
+ * To install the congestion monitor bitmask, use RDS_CONG_MONITOR
+ * with the 64bit mask.
+ * Congestion updates are received via RDS_CMSG_CONG_UPDATE
+ * control messages.
+ *
+ * The correspondence between bits and ports is
+ *	1 << (portnum % 64)
+ */
+#define RDS_CONG_MONITOR_SIZE	64
+#define RDS_CONG_MONITOR_BIT(port)  (((unsigned int) port) % RDS_CONG_MONITOR_SIZE)
+#define RDS_CONG_MONITOR_MASK(port) (1ULL << RDS_CONG_MONITOR_BIT(port))
+
+/*
+ * RDMA related types
+ */
+
+/*
+ * This encapsulates a remote memory location.
+ * In the current implementation, it contains the R_Key
+ * of the remote memory region, and the offset into it
+ * (so that the application does not have to worry about
+ * alignment).
+ */
+typedef u_int64_t	rds_rdma_cookie_t;
+
+struct rds_iovec {
+	u_int64_t	addr;
+	u_int64_t	bytes;
+};
+
+struct rds_get_mr_args {
+	struct rds_iovec vec;
+	u_int64_t	cookie_addr;
+	uint64_t	flags;
+};
+
+struct rds_free_mr_args {
+	rds_rdma_cookie_t cookie;
+	u_int64_t	flags;
+};
+
+struct rds_rdma_args {
+	rds_rdma_cookie_t cookie;
+	struct rds_iovec remote_vec;
+	u_int64_t	local_vec_addr;
+	u_int64_t	nr_local;
+	u_int64_t	flags;
+	u_int64_t	user_token;
+};
+
+struct rds_rdma_notify {
+	u_int64_t	user_token;
+	int32_t		status;
+};
+
+#define RDS_RDMA_SUCCESS	0
+#define RDS_RDMA_REMOTE_ERROR	1
+#define RDS_RDMA_CANCELED	2
+#define RDS_RDMA_DROPPED	3
+#define RDS_RDMA_OTHER_ERROR	4
+
+/*
+ * Common set of flags for all RDMA related structs
+ */
+#define RDS_RDMA_READWRITE	0x0001
+#define RDS_RDMA_FENCE		0x0002	/* use FENCE for immediate send */
+#define RDS_RDMA_INVALIDATE	0x0004	/* invalidate R_Key after freeing MR */
+#define RDS_RDMA_USE_ONCE	0x0008	/* free MR after use */
+#define RDS_RDMA_DONTWAIT	0x0010	/* Don't wait in SET_BARRIER */
+#define RDS_RDMA_NOTIFY_ME	0x0020	/* Notify when operation completes */
+
+#endif /* IB_RDS_H */
-- 
cgit v1.2.3


From 98c8a60a04316e94ccea8221cf16768ce91bd214 Mon Sep 17 00:00:00 2001
From: Jouni Malinen <jouni.malinen@atheros.com>
Date: Tue, 17 Feb 2009 13:24:57 +0200
Subject: nl80211: Provide access to STA TX/RX packet counters

The TX/RX packet counters are needed to fill in RADIUS Accounting
attributes Acct-Output-Packets and Acct-Input-Packets. We already
collect the needed information, but only the TX/RX bytes were
previously exposed through nl80211. Allow applications to fetch the
packet counters, too, to provide more complete support for accounting.

Signed-off-by: Jouni Malinen <jouni.malinen@atheros.com>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h | 5 +++++
 include/net/cfg80211.h  | 8 ++++++++
 net/mac80211/cfg.c      | 4 ++++
 net/wireless/nl80211.c  | 6 ++++++
 4 files changed, 23 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 8802d1bda382..f6e56370ea65 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -526,6 +526,9 @@ enum nl80211_rate_info {
  * @NL80211_STA_INFO_SIGNAL: signal strength of last received PPDU (u8, dBm)
  * @NL80211_STA_INFO_TX_BITRATE: current unicast tx rate, nested attribute
  * 	containing info as possible, see &enum nl80211_sta_info_txrate.
+ * @NL80211_STA_INFO_RX_PACKETS: total received packet (u32, from this station)
+ * @NL80211_STA_INFO_TX_PACKETS: total transmitted packets (u32, to this
+ *	station)
  */
 enum nl80211_sta_info {
 	__NL80211_STA_INFO_INVALID,
@@ -537,6 +540,8 @@ enum nl80211_sta_info {
 	NL80211_STA_INFO_PLINK_STATE,
 	NL80211_STA_INFO_SIGNAL,
 	NL80211_STA_INFO_TX_BITRATE,
+	NL80211_STA_INFO_RX_PACKETS,
+	NL80211_STA_INFO_TX_PACKETS,
 
 	/* keep last */
 	__NL80211_STA_INFO_AFTER_LAST,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 33f43b0d08fb..8dcc46444037 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -178,6 +178,8 @@ struct station_parameters {
  * @STATION_INFO_SIGNAL: @signal filled
  * @STATION_INFO_TX_BITRATE: @tx_bitrate fields are filled
  *  (tx_bitrate, tx_bitrate_flags and tx_bitrate_mcs)
+ * @STATION_INFO_RX_PACKETS: @rx_packets filled
+ * @STATION_INFO_TX_PACKETS: @tx_packets filled
  */
 enum station_info_flags {
 	STATION_INFO_INACTIVE_TIME	= 1<<0,
@@ -188,6 +190,8 @@ enum station_info_flags {
 	STATION_INFO_PLINK_STATE	= 1<<5,
 	STATION_INFO_SIGNAL		= 1<<6,
 	STATION_INFO_TX_BITRATE		= 1<<7,
+	STATION_INFO_RX_PACKETS		= 1<<8,
+	STATION_INFO_TX_PACKETS		= 1<<9,
 };
 
 /**
@@ -235,6 +239,8 @@ struct rate_info {
  * @plink_state: mesh peer link state
  * @signal: signal strength of last received packet in dBm
  * @txrate: current unicast bitrate to this station
+ * @rx_packets: packets received from this station
+ * @tx_packets: packets transmitted to this station
  */
 struct station_info {
 	u32 filled;
@@ -246,6 +252,8 @@ struct station_info {
 	u8 plink_state;
 	s8 signal;
 	struct rate_info txrate;
+	u32 rx_packets;
+	u32 tx_packets;
 };
 
 /**
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index f453bb7c564b..c43129efc3bf 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -341,11 +341,15 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
 	sinfo->filled = STATION_INFO_INACTIVE_TIME |
 			STATION_INFO_RX_BYTES |
 			STATION_INFO_TX_BYTES |
+			STATION_INFO_RX_PACKETS |
+			STATION_INFO_TX_PACKETS |
 			STATION_INFO_TX_BITRATE;
 
 	sinfo->inactive_time = jiffies_to_msecs(jiffies - sta->last_rx);
 	sinfo->rx_bytes = sta->rx_bytes;
 	sinfo->tx_bytes = sta->tx_bytes;
+	sinfo->rx_packets = sta->rx_packets;
+	sinfo->tx_packets = sta->tx_packets;
 
 	if (sta->local->hw.flags & IEEE80211_HW_SIGNAL_DBM) {
 		sinfo->filled |= STATION_INFO_SIGNAL;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 67b18b3a93a0..badccf98074e 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -1206,6 +1206,12 @@ static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq,
 
 		nla_nest_end(msg, txrate);
 	}
+	if (sinfo->filled & STATION_INFO_RX_PACKETS)
+		NLA_PUT_U32(msg, NL80211_STA_INFO_RX_PACKETS,
+			    sinfo->rx_packets);
+	if (sinfo->filled & STATION_INFO_TX_PACKETS)
+		NLA_PUT_U32(msg, NL80211_STA_INFO_TX_PACKETS,
+			    sinfo->tx_packets);
 	nla_nest_end(msg, sinfoattr);
 
 	return genlmsg_end(msg, hdr);
-- 
cgit v1.2.3


From cabeccbd172cc305f4383f5a4808ae254745275f Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Date: Sat, 28 Feb 2009 04:44:38 +0000
Subject: tcp: kill eff_sacks "cache", the sole user can calculate itself
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Also fixes insignificant bug that would cause sending of stale
SACK block (would occur in some corner cases).

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h      |  1 -
 include/net/tcp.h        |  1 -
 net/ipv4/tcp_input.c     | 15 ++-------------
 net/ipv4/tcp_minisocks.c |  3 +--
 net/ipv4/tcp_output.c    | 12 ++++++------
 5 files changed, 9 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 0cd99e6baca5..4b86ad71e054 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -218,7 +218,6 @@ struct tcp_options_received {
 		snd_wscale : 4,	/* Window scaling received from sender	*/
 		rcv_wscale : 4;	/* Window scaling to send to receiver	*/
 /*	SACKs data	*/
-	u8	eff_sacks;	/* Size of SACK array to send with next packet */
 	u8	num_sacks;	/* Number of SACK blocks		*/
 	u16	user_mss;  	/* mss requested by user in ioctl */
 	u16	mss_clamp;	/* Maximal mss, negotiated at connection setup */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 0366a559afec..055e4946d4c8 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -926,7 +926,6 @@ extern void tcp_done(struct sock *sk);
 static inline void tcp_sack_reset(struct tcp_options_received *rx_opt)
 {
 	rx_opt->dsack = 0;
-	rx_opt->eff_sacks = 0;
 	rx_opt->num_sacks = 0;
 }
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 03f5ede87224..e4442a293eb0 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4099,7 +4099,6 @@ static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq)
 		tp->rx_opt.dsack = 1;
 		tp->duplicate_sack[0].start_seq = seq;
 		tp->duplicate_sack[0].end_seq = end_seq;
-		tp->rx_opt.eff_sacks = tp->rx_opt.num_sacks + 1;
 	}
 }
 
@@ -4154,8 +4153,6 @@ static void tcp_sack_maybe_coalesce(struct tcp_sock *tp)
 			 * Decrease num_sacks.
 			 */
 			tp->rx_opt.num_sacks--;
-			tp->rx_opt.eff_sacks = tp->rx_opt.num_sacks +
-					       tp->rx_opt.dsack;
 			for (i = this_sack; i < tp->rx_opt.num_sacks; i++)
 				sp[i] = sp[i + 1];
 			continue;
@@ -4218,7 +4215,6 @@ new_sack:
 	sp->start_seq = seq;
 	sp->end_seq = end_seq;
 	tp->rx_opt.num_sacks++;
-	tp->rx_opt.eff_sacks = tp->rx_opt.num_sacks + tp->rx_opt.dsack;
 }
 
 /* RCV.NXT advances, some SACKs should be eaten. */
@@ -4232,7 +4228,6 @@ static void tcp_sack_remove(struct tcp_sock *tp)
 	/* Empty ofo queue, hence, all the SACKs are eaten. Clear. */
 	if (skb_queue_empty(&tp->out_of_order_queue)) {
 		tp->rx_opt.num_sacks = 0;
-		tp->rx_opt.eff_sacks = tp->rx_opt.dsack;
 		return;
 	}
 
@@ -4253,11 +4248,8 @@ static void tcp_sack_remove(struct tcp_sock *tp)
 		this_sack++;
 		sp++;
 	}
-	if (num_sacks != tp->rx_opt.num_sacks) {
+	if (num_sacks != tp->rx_opt.num_sacks)
 		tp->rx_opt.num_sacks = num_sacks;
-		tp->rx_opt.eff_sacks = tp->rx_opt.num_sacks +
-				       tp->rx_opt.dsack;
-	}
 }
 
 /* This one checks to see if we can put data from the
@@ -4333,10 +4325,8 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
 
 	TCP_ECN_accept_cwr(tp, skb);
 
-	if (tp->rx_opt.dsack) {
+	if (tp->rx_opt.dsack)
 		tp->rx_opt.dsack = 0;
-		tp->rx_opt.eff_sacks = tp->rx_opt.num_sacks;
-	}
 
 	/*  Queue data for delivery to the user.
 	 *  Packets in sequence go to the receive queue.
@@ -4456,7 +4446,6 @@ drop:
 		if (tcp_is_sack(tp)) {
 			tp->rx_opt.num_sacks = 1;
 			tp->rx_opt.dsack     = 0;
-			tp->rx_opt.eff_sacks = 1;
 			tp->selective_acks[0].start_seq = TCP_SKB_CB(skb)->seq;
 			tp->selective_acks[0].end_seq =
 						TCP_SKB_CB(skb)->end_seq;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index f67effbb102b..bb3d8b35f19a 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -434,9 +434,8 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
 		newtp->rx_opt.saw_tstamp = 0;
 
 		newtp->rx_opt.dsack = 0;
-		newtp->rx_opt.eff_sacks = 0;
-
 		newtp->rx_opt.num_sacks = 0;
+
 		newtp->urg_data = 0;
 
 		if (sock_flag(newsk, SOCK_KEEPOPEN))
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 61445b57610c..1555bb73b638 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -441,10 +441,8 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
 			*ptr++ = htonl(sp[this_sack].end_seq);
 		}
 
-		if (tp->rx_opt.dsack) {
+		if (tp->rx_opt.dsack)
 			tp->rx_opt.dsack = 0;
-			tp->rx_opt.eff_sacks = tp->rx_opt.num_sacks;
-		}
 	}
 }
 
@@ -550,6 +548,7 @@ static unsigned tcp_established_options(struct sock *sk, struct sk_buff *skb,
 	struct tcp_skb_cb *tcb = skb ? TCP_SKB_CB(skb) : NULL;
 	struct tcp_sock *tp = tcp_sk(sk);
 	unsigned size = 0;
+	unsigned int eff_sacks;
 
 #ifdef CONFIG_TCP_MD5SIG
 	*md5 = tp->af_specific->md5_lookup(sk, sk);
@@ -568,10 +567,11 @@ static unsigned tcp_established_options(struct sock *sk, struct sk_buff *skb,
 		size += TCPOLEN_TSTAMP_ALIGNED;
 	}
 
-	if (unlikely(tp->rx_opt.eff_sacks)) {
+	eff_sacks = tp->rx_opt.num_sacks + tp->rx_opt.dsack;
+	if (unlikely(eff_sacks)) {
 		const unsigned remaining = MAX_TCP_OPTION_SPACE - size;
 		opts->num_sack_blocks =
-			min_t(unsigned, tp->rx_opt.eff_sacks,
+			min_t(unsigned, eff_sacks,
 			      (remaining - TCPOLEN_SACK_BASE_ALIGNED) /
 			      TCPOLEN_SACK_PERBLOCK);
 		size += TCPOLEN_SACK_BASE_ALIGNED +
@@ -1418,7 +1418,7 @@ static int tcp_mtu_probe(struct sock *sk)
 	    icsk->icsk_mtup.probe_size ||
 	    inet_csk(sk)->icsk_ca_state != TCP_CA_Open ||
 	    tp->snd_cwnd < 11 ||
-	    tp->rx_opt.eff_sacks)
+	    tp->rx_opt.num_sacks || tp->rx_opt.dsack)
 		return -1;
 
 	/* Very simple search strategy: just double the MSS. */
-- 
cgit v1.2.3


From 8987691a4aa6622a1b58bb12c56abaf3d2098fad Mon Sep 17 00:00:00 2001
From: Inaky Perez-Gonzalez <inaky@linux.intel.com>
Date: Sat, 28 Feb 2009 23:42:50 +0000
Subject: wimax/i2400m: allow control of the base-station idle mode timeout

For power saving reasons, WiMAX links can be put in idle mode while
connected after a certain time of the link not being used for tx or
rx. In this mode, the device pages the base-station regularly and when
data is ready to be transmitted, the link is revived.

This patch allows the user to control the time the device has to be
idle before it decides to go to idle mode from a sysfs
interace.

It also updates the initialization code to acknowledge the module
variable 'idle_mode_disabled' when the firmware is a newer version
(upcoming 1.4 vs 2.6.29's v1.3).

The method for setting the idle mode timeout in the older firmwares is
much more limited and can be only done at initialization time. Thus,
the sysfs file will return -ENOSYS on older ones.

Signed-off-by: Inaky Perez-Gonzalez <inaky@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wimax/i2400m/Makefile       |  1 +
 drivers/net/wimax/i2400m/control.c      | 98 ++++++++++++++++++++++++++++++---
 drivers/net/wimax/i2400m/debug-levels.h |  1 +
 drivers/net/wimax/i2400m/driver.c       | 10 ++++
 drivers/net/wimax/i2400m/i2400m.h       | 29 ++++++++++
 drivers/net/wimax/i2400m/sysfs.c        | 80 +++++++++++++++++++++++++++
 include/linux/wimax/i2400m.h            | 10 ++++
 7 files changed, 221 insertions(+), 8 deletions(-)
 create mode 100644 drivers/net/wimax/i2400m/sysfs.c

(limited to 'include/linux')

diff --git a/drivers/net/wimax/i2400m/Makefile b/drivers/net/wimax/i2400m/Makefile
index 1696e936cf5a..5d9e018d31af 100644
--- a/drivers/net/wimax/i2400m/Makefile
+++ b/drivers/net/wimax/i2400m/Makefile
@@ -8,6 +8,7 @@ i2400m-y :=		\
 	driver.o	\
 	fw.o		\
 	op-rfkill.o	\
+	sysfs.o		\
 	netdev.o	\
 	tx.o		\
 	rx.o
diff --git a/drivers/net/wimax/i2400m/control.c b/drivers/net/wimax/i2400m/control.c
index c8b3a68b72b8..c3968b240d69 100644
--- a/drivers/net/wimax/i2400m/control.c
+++ b/drivers/net/wimax/i2400m/control.c
@@ -1221,6 +1221,77 @@ none:
 EXPORT_SYMBOL_GPL(i2400m_set_init_config);
 
 
+/**
+ * i2400m_set_idle_timeout - Set the device's idle mode timeout
+ *
+ * @i2400m: i2400m device descriptor
+ *
+ * @msecs: milliseconds for the timeout to enter idle mode. Between
+ *     100 to 300000 (5m); 0 to disable. In increments of 100.
+ *
+ * After this @msecs of the link being idle (no data being sent or
+ * received), the device will negotiate with the basestation entering
+ * idle mode for saving power. The connection is maintained, but
+ * getting out of it (done in tx.c) will require some negotiation,
+ * possible crypto re-handshake and a possible DHCP re-lease.
+ *
+ * Only available if fw_version >= 0x00090002.
+ *
+ * Returns: 0 if ok, < 0 errno code on error.
+ */
+int i2400m_set_idle_timeout(struct i2400m *i2400m, unsigned msecs)
+{
+	int result;
+	struct device *dev = i2400m_dev(i2400m);
+	struct sk_buff *ack_skb;
+	struct {
+		struct i2400m_l3l4_hdr hdr;
+		struct i2400m_tlv_config_idle_timeout cit;
+	} *cmd;
+	const struct i2400m_l3l4_hdr *ack;
+	size_t ack_len;
+	char strerr[32];
+
+	result = -ENOSYS;
+	if (i2400m_le_v1_3(i2400m))
+		goto error_alloc;
+	result = -ENOMEM;
+	cmd = kzalloc(sizeof(*cmd), GFP_KERNEL);
+	if (cmd == NULL)
+		goto error_alloc;
+	cmd->hdr.type = cpu_to_le16(I2400M_MT_GET_STATE);
+	cmd->hdr.length = cpu_to_le16(sizeof(*cmd) - sizeof(cmd->hdr));
+	cmd->hdr.version = cpu_to_le16(I2400M_L3L4_VERSION);
+
+	cmd->cit.hdr.type =
+		cpu_to_le16(I2400M_TLV_CONFIG_IDLE_TIMEOUT);
+	cmd->cit.hdr.length = cpu_to_le16(sizeof(cmd->cit.timeout));
+	cmd->cit.timeout = cpu_to_le32(msecs);
+
+	ack_skb = i2400m_msg_to_dev(i2400m, cmd, sizeof(*cmd));
+	if (IS_ERR(ack_skb)) {
+		dev_err(dev, "Failed to issue 'set idle timeout' command: "
+			"%ld\n", PTR_ERR(ack_skb));
+		result = PTR_ERR(ack_skb);
+		goto error_msg_to_dev;
+	}
+	ack = wimax_msg_data_len(ack_skb, &ack_len);
+	result = i2400m_msg_check_status(ack, strerr, sizeof(strerr));
+	if (result < 0) {
+		dev_err(dev, "'set idle timeout' (0x%04x) command failed: "
+			"%d - %s\n", I2400M_MT_GET_STATE, result, strerr);
+		goto error_cmd_failed;
+	}
+	result = 0;
+	kfree_skb(ack_skb);
+error_cmd_failed:
+error_msg_to_dev:
+	kfree(cmd);
+error_alloc:
+	return result;
+}
+
+
 /**
  * i2400m_dev_initialize - Initialize the device once communications are ready
  *
@@ -1239,19 +1310,28 @@ int i2400m_dev_initialize(struct i2400m *i2400m)
 	int result;
 	struct device *dev = i2400m_dev(i2400m);
 	struct i2400m_tlv_config_idle_parameters idle_params;
+	struct i2400m_tlv_config_idle_timeout idle_timeout;
 	const struct i2400m_tlv_hdr *args[9];
 	unsigned argc = 0;
 
 	d_fnstart(3, dev, "(i2400m %p)\n", i2400m);
-	/* Useless for now...might change */
 	if (i2400m_idle_mode_disabled) {
-		idle_params.hdr.type =
-			cpu_to_le16(I2400M_TLV_CONFIG_IDLE_PARAMETERS);
-		idle_params.hdr.length = cpu_to_le16(
-			sizeof(idle_params) - sizeof(idle_params.hdr));
-		idle_params.idle_timeout = 0;
-		idle_params.idle_paging_interval = 0;
-		args[argc++] = &idle_params.hdr;
+		if (i2400m_le_v1_3(i2400m)) {
+			idle_params.hdr.type =
+				cpu_to_le16(I2400M_TLV_CONFIG_IDLE_PARAMETERS);
+			idle_params.hdr.length = cpu_to_le16(
+				sizeof(idle_params) - sizeof(idle_params.hdr));
+			idle_params.idle_timeout = 0;
+			idle_params.idle_paging_interval = 0;
+			args[argc++] = &idle_params.hdr;
+		} else {
+			idle_timeout.hdr.type =
+				cpu_to_le16(I2400M_TLV_CONFIG_IDLE_TIMEOUT);
+			idle_timeout.hdr.length = cpu_to_le16(
+				sizeof(idle_timeout) - sizeof(idle_timeout.hdr));
+			idle_timeout.timeout = 0;
+			args[argc++] = &idle_timeout.hdr;
+		}
 	}
 	result = i2400m_set_init_config(i2400m, args, argc);
 	if (result < 0)
@@ -1264,6 +1344,8 @@ int i2400m_dev_initialize(struct i2400m *i2400m)
 	 */
 	result = i2400m_cmd_get_state(i2400m);
 error:
+	if (result < 0)
+		dev_err(dev, "failed to initialize the device: %d\n", result);
 	d_fnend(3, dev, "(i2400m %p) = %d\n", i2400m, result);
 	return result;
 }
diff --git a/drivers/net/wimax/i2400m/debug-levels.h b/drivers/net/wimax/i2400m/debug-levels.h
index 3183baa16a52..48fbfaa0d403 100644
--- a/drivers/net/wimax/i2400m/debug-levels.h
+++ b/drivers/net/wimax/i2400m/debug-levels.h
@@ -38,6 +38,7 @@ enum d_module {
 	D_SUBMODULE_DECLARE(netdev),
 	D_SUBMODULE_DECLARE(rfkill),
 	D_SUBMODULE_DECLARE(rx),
+	D_SUBMODULE_DECLARE(sysfs),
 	D_SUBMODULE_DECLARE(tx),
 };
 
diff --git a/drivers/net/wimax/i2400m/driver.c b/drivers/net/wimax/i2400m/driver.c
index 69a816e7c5db..f988771bfae0 100644
--- a/drivers/net/wimax/i2400m/driver.c
+++ b/drivers/net/wimax/i2400m/driver.c
@@ -662,6 +662,11 @@ int i2400m_setup(struct i2400m *i2400m, enum i2400m_bri bm_flags)
 	wimax_state_change(wimax_dev, WIMAX_ST_UNINITIALIZED);
 
 	/* Now setup all that requires a registered net and wimax device. */
+	result = sysfs_create_group(&net_dev->dev.kobj, &i2400m_dev_attr_group);
+	if (result < 0) {
+		dev_err(dev, "cannot setup i2400m's sysfs: %d\n", result);
+		goto error_sysfs_setup;
+	}
 	result = i2400m_debugfs_add(i2400m);
 	if (result < 0) {
 		dev_err(dev, "cannot setup i2400m's debugfs: %d\n", result);
@@ -671,6 +676,9 @@ int i2400m_setup(struct i2400m *i2400m, enum i2400m_bri bm_flags)
 	return result;
 
 error_debugfs_setup:
+	sysfs_remove_group(&i2400m->wimax_dev.net_dev->dev.kobj,
+			   &i2400m_dev_attr_group);
+error_sysfs_setup:
 	wimax_dev_rm(&i2400m->wimax_dev);
 error_wimax_dev_add:
 	i2400m_dev_stop(i2400m);
@@ -702,6 +710,8 @@ void i2400m_release(struct i2400m *i2400m)
 	netif_stop_queue(i2400m->wimax_dev.net_dev);
 
 	i2400m_debugfs_rm(i2400m);
+	sysfs_remove_group(&i2400m->wimax_dev.net_dev->dev.kobj,
+			   &i2400m_dev_attr_group);
 	wimax_dev_rm(&i2400m->wimax_dev);
 	i2400m_dev_stop(i2400m);
 	unregister_netdev(i2400m->wimax_dev.net_dev);
diff --git a/drivers/net/wimax/i2400m/i2400m.h b/drivers/net/wimax/i2400m/i2400m.h
index 5008cdb12b42..0c60d5c43007 100644
--- a/drivers/net/wimax/i2400m/i2400m.h
+++ b/drivers/net/wimax/i2400m/i2400m.h
@@ -585,6 +585,8 @@ unsigned i2400m_brh_get_signature(const struct i2400m_bootrom_header *hdr)
  * Driver / device setup and internal functions
  */
 extern void i2400m_netdev_setup(struct net_device *net_dev);
+extern int i2400m_sysfs_setup(struct device_driver *);
+extern void i2400m_sysfs_release(struct device_driver *);
 extern int i2400m_tx_setup(struct i2400m *);
 extern void i2400m_wake_tx_work(struct work_struct *);
 extern void i2400m_tx_release(struct i2400m *);
@@ -728,6 +730,7 @@ extern struct sk_buff *i2400m_get_device_info(struct i2400m *);
 extern int i2400m_firmware_check(struct i2400m *);
 extern int i2400m_set_init_config(struct i2400m *,
 				  const struct i2400m_tlv_hdr **, size_t);
+extern int i2400m_set_idle_timeout(struct i2400m *, unsigned);
 
 static inline
 struct usb_endpoint_descriptor *usb_get_epd(struct usb_interface *iface, int ep)
@@ -740,6 +743,32 @@ extern int i2400m_op_rfkill_sw_toggle(struct wimax_dev *,
 extern void i2400m_report_tlv_rf_switches_status(
 	struct i2400m *, const struct i2400m_tlv_rf_switches_status *);
 
+/*
+ * Helpers for firmware backwards compability
+ *
+ * As we aim to support at least the firmware version that was
+ * released with the previous kernel/driver release, some code will be
+ * conditionally executed depending on the firmware version. On each
+ * release, the code to support fw releases past the last two ones
+ * will be purged.
+ *
+ * By making it depend on this macros, it is easier to keep it a tab
+ * on what has to go and what not.
+ */
+static inline
+unsigned i2400m_le_v1_3(struct i2400m *i2400m)
+{
+	/* running fw is lower or v1.3 */
+	return i2400m->fw_version <= 0x00090001;
+}
+
+static inline
+unsigned i2400m_ge_v1_4(struct i2400m *i2400m)
+{
+	/* running fw is higher or v1.4 */
+	return i2400m->fw_version >= 0x00090002;
+}
+
 
 /*
  * Do a millisecond-sleep for allowing wireshark to dump all the data
diff --git a/drivers/net/wimax/i2400m/sysfs.c b/drivers/net/wimax/i2400m/sysfs.c
new file mode 100644
index 000000000000..1237109f251a
--- /dev/null
+++ b/drivers/net/wimax/i2400m/sysfs.c
@@ -0,0 +1,80 @@
+/*
+ * Intel Wireless WiMAX Connection 2400m
+ * Sysfs interfaces to show driver and device information
+ *
+ *
+ * Copyright (C) 2007 Intel Corporation <linux-wimax@intel.com>
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/spinlock.h>
+#include <linux/device.h>
+#include "i2400m.h"
+
+
+#define D_SUBMODULE sysfs
+#include "debug-levels.h"
+
+
+/*
+ * Set the idle timeout (msecs)
+ *
+ * FIXME: eventually this should be a common WiMAX stack method, but
+ * would like to wait to see how other devices manage it.
+ */
+static
+ssize_t i2400m_idle_timeout_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t size)
+{
+	ssize_t result;
+	struct i2400m *i2400m = net_dev_to_i2400m(to_net_dev(dev));
+	unsigned val;
+
+	result = -EINVAL;
+	if (sscanf(buf, "%u\n", &val) != 1)
+		goto error_no_unsigned;
+	if (val != 0 && (val < 100 || val > 300000 || val % 100 != 0)) {
+		dev_err(dev, "idle_timeout: %u: invalid msecs specification; "
+			"valid values are 0, 100-300000 in 100 increments\n",
+			val);
+		goto error_bad_value;
+	}
+	result = i2400m_set_idle_timeout(i2400m, val);
+	if (result >= 0)
+		result = size;
+error_no_unsigned:
+error_bad_value:
+	return result;
+}
+
+static
+DEVICE_ATTR(i2400m_idle_timeout, S_IWUSR,
+	    NULL, i2400m_idle_timeout_store);
+
+static
+struct attribute *i2400m_dev_attrs[] = {
+	&dev_attr_i2400m_idle_timeout.attr,
+	NULL,
+};
+
+struct attribute_group i2400m_dev_attr_group = {
+	.name = NULL,		/* we want them in the same directory */
+	.attrs = i2400m_dev_attrs,
+};
diff --git a/include/linux/wimax/i2400m.h b/include/linux/wimax/i2400m.h
index 74198f5bb4dc..686eeb2b9704 100644
--- a/include/linux/wimax/i2400m.h
+++ b/include/linux/wimax/i2400m.h
@@ -381,6 +381,7 @@ enum i2400m_tlv {
 	I2400M_TLV_RF_STATUS = 163,
 	I2400M_TLV_DEVICE_RESET_TYPE = 132,
 	I2400M_TLV_CONFIG_IDLE_PARAMETERS = 601,
+	I2400M_TLV_CONFIG_IDLE_TIMEOUT = 611,
 };
 
 
@@ -509,4 +510,13 @@ struct i2400m_tlv_media_status {
 	__le32 media_status;
 } __attribute__((packed));
 
+
+/* New in v1.4 */
+struct i2400m_tlv_config_idle_timeout {
+	struct i2400m_tlv_hdr hdr;
+	__le32 timeout;	/* 100 to 300000 ms [5min], 100 increments
+			 * 0 disabled */
+} __attribute__((packed));
+
+
 #endif /* #ifndef __LINUX__WIMAX__I2400M_H__ */
-- 
cgit v1.2.3


From 347707baa77d273d79258303e00200d40cf3b323 Mon Sep 17 00:00:00 2001
From: Kay Sievers <kay.sievers@vrfy.org>
Date: Sat, 28 Feb 2009 23:42:51 +0000
Subject: wimax: struct device - replace bus_id with dev_name(), dev_set_name()

Cc: inaky.perez-gonzalez@intel.com
Cc: linux-wimax@intel.com
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Inaky Perez-Gonzalez <inaky@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wimax/i2400m/driver.c    | 2 +-
 drivers/net/wimax/i2400m/usb-notif.c | 2 +-
 include/linux/wimax/debug.h          | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/wimax/i2400m/driver.c b/drivers/net/wimax/i2400m/driver.c
index f988771bfae0..e4f1ce5bc294 100644
--- a/drivers/net/wimax/i2400m/driver.c
+++ b/drivers/net/wimax/i2400m/driver.c
@@ -618,7 +618,7 @@ int i2400m_setup(struct i2400m *i2400m, enum i2400m_bri bm_flags)
 	d_fnstart(3, dev, "(i2400m %p)\n", i2400m);
 
 	snprintf(wimax_dev->name, sizeof(wimax_dev->name),
-		 "i2400m-%s:%s", dev->bus->name, dev->bus_id);
+		 "i2400m-%s:%s", dev->bus->name, dev_name(dev));
 
 	i2400m->bm_cmd_buf = kzalloc(I2400M_BM_CMD_BUF_SIZE, GFP_KERNEL);
 	if (i2400m->bm_cmd_buf == NULL) {
diff --git a/drivers/net/wimax/i2400m/usb-notif.c b/drivers/net/wimax/i2400m/usb-notif.c
index 9702c22b2497..6add27c3f35c 100644
--- a/drivers/net/wimax/i2400m/usb-notif.c
+++ b/drivers/net/wimax/i2400m/usb-notif.c
@@ -102,7 +102,7 @@ int i2400mu_notification_grok(struct i2400mu *i2400mu, const void *buf,
 		dev_err(dev, "HW BUG? Unknown/unexpected data in notification "
 			"message (%zu bytes)\n", buf_len);
 		snprintf(prefix, sizeof(prefix), "%s %s: ",
-			 dev_driver_string(dev) , dev->bus_id);
+			 dev_driver_string(dev), dev_name(dev));
 		if (buf_len > 64) {
 			print_hex_dump(KERN_ERR, prefix, DUMP_PREFIX_OFFSET,
 				       8, 4, buf, 64, 0);
diff --git a/include/linux/wimax/debug.h b/include/linux/wimax/debug.h
index ba0c49399a83..c703e0340423 100644
--- a/include/linux/wimax/debug.h
+++ b/include/linux/wimax/debug.h
@@ -178,7 +178,7 @@ void __d_head(char *head, size_t head_size,
 		WARN_ON(1);
 	} else
 		snprintf(head, head_size, "%s %s: ",
-			 dev_driver_string(dev), dev->bus_id);
+			 dev_driver_string(dev), dev_name(dev));
 }
 
 
-- 
cgit v1.2.3


From fd5c565c0c04d2716cfdac3f1de3c2261d6a457d Mon Sep 17 00:00:00 2001
From: Inaky Perez-Gonzalez <inaky@linux.intel.com>
Date: Sat, 28 Feb 2009 23:42:52 +0000
Subject: wimax/i2400m: support extended data RX protocol (no need to
 reallocate skbs)

Newer i2400m firmwares (>= v1.4) extend the data RX protocol so that
each packet has a 16 byte header. This header is mainly used to
implement host reordeing (which is addressed in later commits).

However, this header also allows us to overwrite it (once data has
been extracted) with an Ethernet header and deliver to the networking
stack without having to reallocate the skb (as it happened in fw <=
v1.3) to make room for it.

- control.c: indicate the device [dev_initialize()] that the driver
  wants to use the extended data RX protocol. Also involves adding the
  definition of the needed data types in include/linux/wimax/i2400m.h.

- rx.c: handle the new payload type for the extended RX data
  protocol. Prepares the skb for delivery to
  netdev.c:i2400m_net_erx().

- netdev.c: Introduce i2400m_net_erx() that adds the fake ethernet
  address to a prepared skb and delivers it to the networking
  stack.

- cleanup: in most instances in rx.c, the variable 'single' was
  renamed to 'single_last' for it better conveys its meaning.

Signed-off-by: Inaky Perez-Gonzalez <inaky@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wimax/i2400m/control.c |   9 +++
 drivers/net/wimax/i2400m/i2400m.h  |   2 +
 drivers/net/wimax/i2400m/netdev.c  | 104 +++++++++++++++++++++++++--------
 drivers/net/wimax/i2400m/rx.c      | 117 ++++++++++++++++++++++++++++++++++---
 include/linux/wimax/i2400m.h       |  35 +++++++++++
 5 files changed, 234 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/wimax/i2400m/control.c b/drivers/net/wimax/i2400m/control.c
index c3968b240d69..4073c3e93bd4 100644
--- a/drivers/net/wimax/i2400m/control.c
+++ b/drivers/net/wimax/i2400m/control.c
@@ -1311,6 +1311,7 @@ int i2400m_dev_initialize(struct i2400m *i2400m)
 	struct device *dev = i2400m_dev(i2400m);
 	struct i2400m_tlv_config_idle_parameters idle_params;
 	struct i2400m_tlv_config_idle_timeout idle_timeout;
+	struct i2400m_tlv_config_d2h_data_format df;
 	const struct i2400m_tlv_hdr *args[9];
 	unsigned argc = 0;
 
@@ -1333,6 +1334,14 @@ int i2400m_dev_initialize(struct i2400m *i2400m)
 			args[argc++] = &idle_timeout.hdr;
 		}
 	}
+	if (i2400m_ge_v1_4(i2400m)) {
+		df.hdr.type =
+			cpu_to_le16(I2400M_TLV_CONFIG_D2H_DATA_FORMAT);
+		df.hdr.length = cpu_to_le16(
+			sizeof(df) - sizeof(df.hdr));
+		df.format = 1;
+		args[argc++] = &df.hdr;
+	}
 	result = i2400m_set_init_config(i2400m, args, argc);
 	if (result < 0)
 		goto error;
diff --git a/drivers/net/wimax/i2400m/i2400m.h b/drivers/net/wimax/i2400m/i2400m.h
index 0c60d5c43007..125c30594e63 100644
--- a/drivers/net/wimax/i2400m/i2400m.h
+++ b/drivers/net/wimax/i2400m/i2400m.h
@@ -593,6 +593,8 @@ extern void i2400m_tx_release(struct i2400m *);
 
 extern void i2400m_net_rx(struct i2400m *, struct sk_buff *, unsigned,
 			  const void *, int);
+extern void i2400m_net_erx(struct i2400m *, struct sk_buff *,
+			   enum i2400m_cs);
 enum i2400m_pt;
 extern int i2400m_tx(struct i2400m *, const void *, size_t, enum i2400m_pt);
 
diff --git a/drivers/net/wimax/i2400m/netdev.c b/drivers/net/wimax/i2400m/netdev.c
index be8be4d0709c..2bdd0cdbb319 100644
--- a/drivers/net/wimax/i2400m/netdev.c
+++ b/drivers/net/wimax/i2400m/netdev.c
@@ -28,13 +28,12 @@
  * space and from the other side. The world is (sadly) configured to
  * take in only Ethernet devices...
  *
- * Because of this, currently there is an copy-each-rxed-packet
- * overhead on the RX path. Each IP packet has to be reallocated to
- * add an ethernet header (as there is no space in what we get from
- * the device). This is a known drawback and coming versions of the
- * device's firmware are being changed to add header space that can be
- * used to insert the ethernet header without having to reallocate and
- * copy.
+ * Because of this, when using firmwares <= v1.3, there is an
+ * copy-each-rxed-packet overhead on the RX path. Each IP packet has
+ * to be reallocated to add an ethernet header (as there is no space
+ * in what we get from the device). This is a known drawback and
+ * firmwares >= 1.4 add header space that can be used to insert the
+ * ethernet header without having to reallocate and copy.
  *
  * TX error handling is tricky; because we have to FIFO/queue the
  * buffers for transmission (as the hardware likes it aggregated), we
@@ -67,7 +66,9 @@
  * i2400m_tx_timeout      Called when the device times out
  *
  * i2400m_net_rx          Called by the RX code when a data frame is
- *                        available.
+ *                        available (firmware <= 1.3)
+ * i2400m_net_erx         Called by the RX code when a data frame is
+ *                        available (firmware >= 1.4).
  * i2400m_netdev_setup    Called to setup all the netdev stuff from
  *                        alloc_netdev.
  */
@@ -396,30 +397,18 @@ void i2400m_tx_timeout(struct net_device *net_dev)
  * Create a fake ethernet header
  *
  * For emulating an ethernet device, every received IP header has to
- * be prefixed with an ethernet header.
- *
- * What we receive has (potentially) many IP packets concatenated with
- * no ETH_HLEN bytes prefixed. Thus there is no space for an eth
- * header.
- *
- * We would have to reallocate or do ugly fragment tricks in order to
- * add it.
- *
- * But what we do is use the header space of the RX transaction
- * (*msg_hdr) as we don't need it anymore; then we'll point all the
- * data skbs there, as they share the same backing store.
- *
- * We only support IPv4 for v3 firmware.
+ * be prefixed with an ethernet header. Fake it with the given
+ * protocol.
  */
 static
 void i2400m_rx_fake_eth_header(struct net_device *net_dev,
-			       void *_eth_hdr)
+			       void *_eth_hdr, int protocol)
 {
 	struct ethhdr *eth_hdr = _eth_hdr;
 
 	memcpy(eth_hdr->h_dest, net_dev->dev_addr, sizeof(eth_hdr->h_dest));
 	memset(eth_hdr->h_source, 0, sizeof(eth_hdr->h_dest));
-	eth_hdr->h_proto = cpu_to_be16(ETH_P_IP);
+	eth_hdr->h_proto = cpu_to_be16(protocol);
 }
 
 
@@ -432,6 +421,13 @@ void i2400m_rx_fake_eth_header(struct net_device *net_dev,
  * @buf: pointer to the buffer containing the data
  * @len: buffer's length
  *
+ * This is only used now for the v1.3 firmware. It will be deprecated
+ * in >= 2.6.31.
+ *
+ * Note that due to firmware limitations, we don't have space to add
+ * an ethernet header, so we need to copy each packet. Firmware
+ * versions >= v1.4 fix this [see i2400m_net_erx()].
+ *
  * We just clone the skb and set it up so that it's skb->data pointer
  * points to "buf" and it's length.
  *
@@ -478,7 +474,7 @@ void i2400m_net_rx(struct i2400m *i2400m, struct sk_buff *skb_rx,
 		memcpy(skb_put(skb, buf_len), buf, buf_len);
 	}
 	i2400m_rx_fake_eth_header(i2400m->wimax_dev.net_dev,
-				  skb->data - ETH_HLEN);
+				  skb->data - ETH_HLEN, ETH_P_IP);
 	skb_set_mac_header(skb, -ETH_HLEN);
 	skb->dev = i2400m->wimax_dev.net_dev;
 	skb->protocol = htons(ETH_P_IP);
@@ -493,6 +489,64 @@ error_skb_realloc:
 		i2400m, buf, buf_len);
 }
 
+
+/*
+ * i2400m_net_erx - pass a network packet to the stack (extended version)
+ *
+ * @i2400m: device descriptor
+ * @skb: the skb where the packet is - the skb should be set to point
+ *     at the IP packet; this function will add ethernet headers if
+ *     needed.
+ * @cs: packet type
+ *
+ * This is only used now for firmware >= v1.4. Note it is quite
+ * similar to i2400m_net_rx() (used only for v1.3 firmware).
+ *
+ * This function is normally run from a thread context. However, we
+ * still use netif_rx() instead of netif_receive_skb() as was
+ * recommended in the mailing list. Reason is in some stress tests
+ * when sending/receiving a lot of data we seem to hit a softlock in
+ * the kernel's TCP implementation [aroudn tcp_delay_timer()]. Using
+ * netif_rx() took care of the issue.
+ *
+ * This is, of course, still open to do more research on why running
+ * with netif_receive_skb() hits this softlock. FIXME.
+ */
+void i2400m_net_erx(struct i2400m *i2400m, struct sk_buff *skb,
+		    enum i2400m_cs cs)
+{
+	struct net_device *net_dev = i2400m->wimax_dev.net_dev;
+	struct device *dev = i2400m_dev(i2400m);
+	int protocol;
+
+	d_fnstart(2, dev, "(i2400m %p skb %p [%zu] cs %d)\n",
+		  i2400m, skb, skb->len, cs);
+	switch(cs) {
+	case I2400M_CS_IPV4_0:
+	case I2400M_CS_IPV4:
+		protocol = ETH_P_IP;
+		i2400m_rx_fake_eth_header(i2400m->wimax_dev.net_dev,
+					  skb->data - ETH_HLEN, ETH_P_IP);
+		skb_set_mac_header(skb, -ETH_HLEN);
+		skb->dev = i2400m->wimax_dev.net_dev;
+		skb->protocol = htons(ETH_P_IP);
+		net_dev->stats.rx_packets++;
+		net_dev->stats.rx_bytes += skb->len;
+		break;
+	default:
+		dev_err(dev, "ERX: BUG? CS type %u unsupported\n", cs);
+		goto error;
+
+	}
+	d_printf(3, dev, "ERX: receiving %d bytes to the network stack\n",
+		 skb->len);
+	d_dump(4, dev, skb->data, skb->len);
+	netif_rx_ni(skb);	/* see notes in function header */
+error:
+	d_fnend(2, dev, "(i2400m %p skb %p [%zu] cs %d) = void\n",
+		i2400m, skb, skb->len, cs);
+}
+
 static const struct net_device_ops i2400m_netdev_ops = {
 	.ndo_open = i2400m_open,
 	.ndo_stop = i2400m_stop,
diff --git a/drivers/net/wimax/i2400m/rx.c b/drivers/net/wimax/i2400m/rx.c
index c62b8c564161..cd525066d4b7 100644
--- a/drivers/net/wimax/i2400m/rx.c
+++ b/drivers/net/wimax/i2400m/rx.c
@@ -69,6 +69,22 @@
  * See tx.c for a deeper description on alignment requirements and
  * other fun facts of it.
  *
+ * DATA PACKETS
+ *
+ * In firmwares <= v1.3, data packets have no header for RX, but they
+ * do for TX (currently unused).
+ *
+ * In firmware >= 1.4, RX packets have an extended header (16
+ * bytes). This header conveys information for management of host
+ * reordering of packets (the device offloads storage of the packets
+ * for reordering to the host).
+ *
+ * Currently this information is not used as the current code doesn't
+ * enable host reordering.
+ *
+ * The header is used as dummy space to emulate an ethernet header and
+ * thus be able to act as an ethernet device without having to reallocate.
+ *
  * ROADMAP
  *
  * i2400m_rx
@@ -76,6 +92,8 @@
  *   i2400m_rx_pl_descr_check
  *   i2400m_rx_payload
  *     i2400m_net_rx
+ *     i2400m_rx_edata
+ *       i2400m_net_erx
  *     i2400m_rx_ctl
  *       i2400m_msg_size_check
  *       i2400m_report_hook_work    [in a workqueue]
@@ -264,8 +282,6 @@ error_check:
 }
 
 
-
-
 /*
  * Receive and send up a trace
  *
@@ -314,32 +330,112 @@ error_check:
 	return;
 }
 
+/*
+ * Receive and send up an extended data packet
+ *
+ * @i2400m: device descriptor
+ * @skb_rx: skb that contains the extended data packet
+ * @single_last: 1 if the payload is the only one or the last one of
+ *     the skb.
+ * @payload: pointer to the packet's data inside the skb
+ * @size: size of the payload
+ *
+ * Starting in v1.4 of the i2400m's firmware, the device can send data
+ * packets to the host in an extended format that; this incudes a 16
+ * byte header (struct i2400m_pl_edata_hdr). Using this header's space
+ * we can fake ethernet headers for ethernet device emulation without
+ * having to copy packets around.
+ *
+ * This function handles said path.
+ */
+static
+void i2400m_rx_edata(struct i2400m *i2400m, struct sk_buff *skb_rx,
+		     unsigned single_last, const void *payload, size_t size)
+{
+	struct device *dev = i2400m_dev(i2400m);
+	const struct i2400m_pl_edata_hdr *hdr = payload;
+	struct net_device *net_dev = i2400m->wimax_dev.net_dev;
+	struct sk_buff *skb;
+	enum i2400m_cs cs;
+	unsigned reorder_needed;
+
+	d_fnstart(4, dev, "(i2400m %p skb_rx %p single %u payload %p "
+		  "size %zu)\n", i2400m, skb_rx, single_last, payload, size);
+	if (size < sizeof(*hdr)) {
+		dev_err(dev, "ERX: HW BUG? message with short header (%zu "
+			"vs %zu bytes expected)\n", size, sizeof(*hdr));
+		goto error;
+	}
+	reorder_needed = le32_to_cpu(hdr->reorder & I2400M_REORDER_NEEDED);
+	cs = hdr->cs;
+	if (reorder_needed) {
+		dev_err(dev, "ERX: HW BUG? reorder needed, it was disabled\n");
+		goto error;
+	}
+	/* ok, so now decide if we want to clone or reuse the skb,
+	 * pull and trim it so the beginning is the space for the eth
+	 * header and pass it to i2400m_net_erx() for the stack */
+	if (single_last) {
+		skb = skb_get(skb_rx);
+		d_printf(3, dev, "ERX: reusing single payload skb %p\n", skb);
+	} else {
+		skb = skb_clone(skb_rx, GFP_KERNEL);
+		d_printf(3, dev, "ERX: cloning %p\n", skb);
+		if (skb == NULL) {
+			dev_err(dev, "ERX: no memory to clone skb\n");
+			net_dev->stats.rx_dropped++;
+			goto error_skb_clone;
+		}
+	}
+	/* now we have to pull and trim so that the skb points to the
+	 * beginning of the IP packet; the netdev part will add the
+	 * ethernet header as needed. */
+	BUILD_BUG_ON(ETH_HLEN > sizeof(*hdr));
+	skb_pull(skb, payload + sizeof(*hdr) - (void *) skb->data);
+	skb_trim(skb, (void *) skb_end_pointer(skb) - payload + sizeof(*hdr));
+	i2400m_net_erx(i2400m, skb, cs);
+error_skb_clone:
+error:
+	d_fnend(4, dev, "(i2400m %p skb_rx %p single %u payload %p "
+		"size %zu) = void\n", i2400m, skb_rx, single_last, payload, size);
+	return;
+}
+
+
+
 
 /*
  * Act on a received payload
  *
  * @i2400m: device instance
  * @skb_rx: skb where the transaction was received
- * @single: 1 if there is only one payload, 0 otherwise
+ * @single_last: 1 this is the only payload or the last one (so the
+ *     skb can be reused instead of cloned).
  * @pld: payload descriptor
  * @payload: payload data
  *
  * Upon reception of a payload, look at its guts in the payload
- * descriptor and decide what to do with it.
+ * descriptor and decide what to do with it. If it is a single payload
+ * skb or if the last skb is a data packet, the skb will be referenced
+ * and modified (so it doesn't have to be cloned).
  */
 static
 void i2400m_rx_payload(struct i2400m *i2400m, struct sk_buff *skb_rx,
-		       unsigned single, const struct i2400m_pld *pld,
+		       unsigned single_last, const struct i2400m_pld *pld,
 		       const void *payload)
 {
 	struct device *dev = i2400m_dev(i2400m);
 	size_t pl_size = i2400m_pld_size(pld);
 	enum i2400m_pt pl_type = i2400m_pld_type(pld);
 
+	d_printf(7, dev, "RX: received payload type %u, %zu bytes\n",
+		 pl_type, pl_size);
+	d_dump(8, dev, payload, pl_size);
+
 	switch (pl_type) {
 	case I2400M_PT_DATA:
 		d_printf(3, dev, "RX: data payload %zu bytes\n", pl_size);
-		i2400m_net_rx(i2400m, skb_rx, single, payload, pl_size);
+		i2400m_net_rx(i2400m, skb_rx, single_last, payload, pl_size);
 		break;
 	case I2400M_PT_CTRL:
 		i2400m_rx_ctl(i2400m, skb_rx, payload, pl_size);
@@ -347,6 +443,10 @@ void i2400m_rx_payload(struct i2400m *i2400m, struct sk_buff *skb_rx,
 	case I2400M_PT_TRACE:
 		i2400m_rx_trace(i2400m, payload, pl_size);
 		break;
+	case I2400M_PT_EDATA:
+		d_printf(3, dev, "ERX: data payload %zu bytes\n", pl_size);
+		i2400m_rx_edata(i2400m, skb_rx, single_last, payload, pl_size);
+		break;
 	default:	/* Anything else shouldn't come to the host */
 		if (printk_ratelimit())
 			dev_err(dev, "RX: HW BUG? unexpected payload type %u\n",
@@ -474,7 +574,7 @@ int i2400m_rx(struct i2400m *i2400m, struct sk_buff *skb)
 	const struct i2400m_msg_hdr *msg_hdr;
 	size_t pl_itr, pl_size, skb_len;
 	unsigned long flags;
-	unsigned num_pls;
+	unsigned num_pls, single_last;
 
 	skb_len = skb->len;
 	d_fnstart(4, dev, "(i2400m %p skb %p [size %zu])\n",
@@ -503,7 +603,8 @@ int i2400m_rx(struct i2400m *i2400m, struct sk_buff *skb)
 						  pl_itr, skb->len);
 		if (result < 0)
 			goto error_pl_descr_check;
-		i2400m_rx_payload(i2400m, skb, num_pls == 1, &msg_hdr->pld[i],
+		single_last = num_pls == 1 || i == num_pls - 1;
+		i2400m_rx_payload(i2400m, skb, single_last, &msg_hdr->pld[i],
 				  skb->data + pl_itr);
 		pl_itr += ALIGN(pl_size, I2400M_PL_PAD);
 		cond_resched();		/* Don't monopolize */
diff --git a/include/linux/wimax/i2400m.h b/include/linux/wimax/i2400m.h
index 686eeb2b9704..ad36e073a70c 100644
--- a/include/linux/wimax/i2400m.h
+++ b/include/linux/wimax/i2400m.h
@@ -207,6 +207,7 @@ enum i2400m_pt {
 	I2400M_PT_TRACE,	/* For device debug */
 	I2400M_PT_RESET_WARM,	/* device reset */
 	I2400M_PT_RESET_COLD,	/* USB[transport] reset, like reconnect */
+	I2400M_PT_EDATA,	/* Extended RX data */
 	I2400M_PT_ILLEGAL
 };
 
@@ -221,6 +222,32 @@ struct i2400m_pl_data_hdr {
 } __attribute__((packed));
 
 
+/*
+ * Payload for an extended data packet
+ *
+ * New in v1.4
+ *
+ * @cs: the type of data in the packet, as defined per (802.16e
+ *     T11.13.19.1). Currently only 2 (IPv4 packet) supported.
+ *
+ * This is prefixed to each and every INCOMING DATA packet.
+ */
+struct i2400m_pl_edata_hdr {
+	__le32 reorder;
+	__u8 cs;
+	__u8 reserved[11];
+} __attribute__((packed));
+
+enum i2400m_cs {
+	I2400M_CS_IPV4_0 = 0,
+	I2400M_CS_IPV4 = 2,
+};
+
+enum i2400m_reorder {
+	I2400M_REORDER_NEEDED     = 0x01,
+};
+
+
 /* Misc constants */
 enum {
 	I2400M_PL_PAD = 16,	/* Payload data size alignment */
@@ -382,6 +409,7 @@ enum i2400m_tlv {
 	I2400M_TLV_DEVICE_RESET_TYPE = 132,
 	I2400M_TLV_CONFIG_IDLE_PARAMETERS = 601,
 	I2400M_TLV_CONFIG_IDLE_TIMEOUT = 611,
+	I2400M_TLV_CONFIG_D2H_DATA_FORMAT = 614,
 };
 
 
@@ -518,5 +546,12 @@ struct i2400m_tlv_config_idle_timeout {
 			 * 0 disabled */
 } __attribute__((packed));
 
+/* New in v1.4 -- for backward compat, will be removed */
+struct i2400m_tlv_config_d2h_data_format {
+	struct i2400m_tlv_hdr hdr;
+	__u8 format; 		/* 0 old format, 1 enhanced */
+	__u8 reserved[3];
+} __attribute__((packed));
+
 
 #endif /* #ifndef __LINUX__WIMAX__I2400M_H__ */
-- 
cgit v1.2.3


From c747583d19d5d5147a9f0eae480c1fdbc84c4252 Mon Sep 17 00:00:00 2001
From: Inaky Perez-Gonzalez <inaky@linux.intel.com>
Date: Sat, 28 Feb 2009 23:42:54 +0000
Subject: wimax/i2400m: implement RX reorder support

Allow the device to give the driver RX data with reorder information.

When that is done, the device will indicate the driver if a packet has
to be held in a (sorted) queue. It will also tell the driver when held
packets have to be released to the OS.

This is done to improve the WiMAX-protocol level retransmission
support when missing frames are detected.

The code docs provide details about the implementation.

In general, this just hooks into the RX path in rx.c; if a packet with
the reorder bit in the RX header is detected, the reorder information
in the header is extracted and one of the four main reorder operations
are executed. In one case (queue) no packet will be delivered to the
networking stack, just queued, whereas in the others (reset, update_ws
and queue_update_ws), queued packet might be delivered depending on
the window start for the specific queue.

The modifications to files other than rx.c are:

- control.c: during device initialization, enable reordering support
  if the rx_reorder_disabled module parameter is not enabled

- driver.c: expose a rx_reorder_disable module parameter and call
  i2400m_rx_setup/release() to initialize/shutdown RX reorder
  support.

- i2400m.h: introduce members in 'struct i2400m' needed for
  implementing reorder support.

- linux/i2400m.h: introduce TLVs, commands and constant definitions
  related to RX reorder

Last but not least, the rx reorder code includes an small circular log
where the last N reorder operations are recorded to be displayed in
case of inconsistency. Otherwise diagnosing issues would be almost
impossible.

Signed-off-by: Inaky Perez-Gonzalez <inaky@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wimax/i2400m/control.c |  14 +
 drivers/net/wimax/i2400m/driver.c  |  11 +
 drivers/net/wimax/i2400m/i2400m.h  |  19 +-
 drivers/net/wimax/i2400m/rx.c      | 677 +++++++++++++++++++++++++++++++++++--
 include/linux/wimax/i2400m.h       |  32 +-
 5 files changed, 723 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/wimax/i2400m/control.c b/drivers/net/wimax/i2400m/control.c
index 4073c3e93bd4..b3cadb626fe0 100644
--- a/drivers/net/wimax/i2400m/control.c
+++ b/drivers/net/wimax/i2400m/control.c
@@ -1312,10 +1312,12 @@ int i2400m_dev_initialize(struct i2400m *i2400m)
 	struct i2400m_tlv_config_idle_parameters idle_params;
 	struct i2400m_tlv_config_idle_timeout idle_timeout;
 	struct i2400m_tlv_config_d2h_data_format df;
+	struct i2400m_tlv_config_dl_host_reorder dlhr;
 	const struct i2400m_tlv_hdr *args[9];
 	unsigned argc = 0;
 
 	d_fnstart(3, dev, "(i2400m %p)\n", i2400m);
+	/* Disable idle mode? (enabled by default) */
 	if (i2400m_idle_mode_disabled) {
 		if (i2400m_le_v1_3(i2400m)) {
 			idle_params.hdr.type =
@@ -1335,12 +1337,24 @@ int i2400m_dev_initialize(struct i2400m *i2400m)
 		}
 	}
 	if (i2400m_ge_v1_4(i2400m)) {
+		/* Enable extended RX data format? */
 		df.hdr.type =
 			cpu_to_le16(I2400M_TLV_CONFIG_D2H_DATA_FORMAT);
 		df.hdr.length = cpu_to_le16(
 			sizeof(df) - sizeof(df.hdr));
 		df.format = 1;
 		args[argc++] = &df.hdr;
+
+		/* Enable RX data reordering?
+		 * (switch flipped in rx.c:i2400m_rx_setup() after fw upload) */
+		if (i2400m->rx_reorder) {
+			dlhr.hdr.type =
+				cpu_to_le16(I2400M_TLV_CONFIG_DL_HOST_REORDER);
+			dlhr.hdr.length = cpu_to_le16(
+				sizeof(dlhr) - sizeof(dlhr.hdr));
+			dlhr.reorder = 1;
+			args[argc++] = &dlhr.hdr;
+		}
 	}
 	result = i2400m_set_init_config(i2400m, args, argc);
 	if (result < 0)
diff --git a/drivers/net/wimax/i2400m/driver.c b/drivers/net/wimax/i2400m/driver.c
index e4f1ce5bc294..07a54bad237b 100644
--- a/drivers/net/wimax/i2400m/driver.c
+++ b/drivers/net/wimax/i2400m/driver.c
@@ -76,6 +76,11 @@ MODULE_PARM_DESC(idle_mode_disabled,
 		 "If true, the device will not enable idle mode negotiation "
 		 "with the base station (when connected) to save power.");
 
+int i2400m_rx_reorder_disabled;	/* 0 (rx reorder enabled) by default */
+module_param_named(rx_reorder_disabled, i2400m_rx_reorder_disabled, int, 0644);
+MODULE_PARM_DESC(rx_reorder_disabled,
+		 "If true, RX reordering will be disabled.");
+
 /**
  * i2400m_queue_work - schedule work on a i2400m's queue
  *
@@ -396,6 +401,9 @@ retry:
 	result = i2400m_tx_setup(i2400m);
 	if (result < 0)
 		goto error_tx_setup;
+	result = i2400m_rx_setup(i2400m);
+	if (result < 0)
+		goto error_rx_setup;
 	result = i2400m->bus_dev_start(i2400m);
 	if (result < 0)
 		goto error_bus_dev_start;
@@ -430,6 +438,8 @@ error_fw_check:
 error_create_workqueue:
 	i2400m->bus_dev_stop(i2400m);
 error_bus_dev_start:
+	i2400m_rx_release(i2400m);
+error_rx_setup:
 	i2400m_tx_release(i2400m);
 error_tx_setup:
 error_bootstrap:
@@ -477,6 +487,7 @@ void __i2400m_dev_stop(struct i2400m *i2400m)
 	i2400m->ready = 0;
 	destroy_workqueue(i2400m->work_queue);
 	i2400m->bus_dev_stop(i2400m);
+	i2400m_rx_release(i2400m);
 	i2400m_tx_release(i2400m);
 	wimax_state_change(wimax_dev, WIMAX_ST_DOWN);
 	d_fnend(3, dev, "(i2400m %p) = 0\n", i2400m);
diff --git a/drivers/net/wimax/i2400m/i2400m.h b/drivers/net/wimax/i2400m/i2400m.h
index 125c30594e63..3ae2df38b59a 100644
--- a/drivers/net/wimax/i2400m/i2400m.h
+++ b/drivers/net/wimax/i2400m/i2400m.h
@@ -174,6 +174,7 @@ enum i2400m_reset_type {
 };
 
 struct i2400m_reset_ctx;
+struct i2400m_roq;
 
 /**
  * struct i2400m - descriptor for an Intel 2400m
@@ -257,6 +258,9 @@ struct i2400m_reset_ctx;
  *     force this to be the first field so that we can get from
  *     netdev_priv() the right pointer.
  *
+ * @rx_reorder: 1 if RX reordering is enabled; this can only be
+ *     set at probe time.
+ *
  * @state: device's state (as reported by it)
  *
  * @state_wq: waitqueue that is woken up whenever the state changes
@@ -313,6 +317,12 @@ struct i2400m_reset_ctx;
  *
  * @rx_size_max: buggest RX message received.
  *
+ * @rx_roq: RX ReOrder queues. (fw >= v1.4) When packets are received
+ *     out of order, the device will ask the driver to hold certain
+ *     packets until the ones that are received out of order can be
+ *     delivered. Then the driver can release them to the host. See
+ *     drivers/net/i2400m/rx.c for details.
+ *
  * @init_mutex: Mutex used for serializing the device bringup
  *     sequence; this way if the device reboots in the middle, we
  *     don't try to do a bringup again while we are tearing down the
@@ -377,6 +387,7 @@ struct i2400m {
 	unsigned boot_mode:1;		/* is the device in boot mode? */
 	unsigned sboot:1;		/* signed or unsigned fw boot */
 	unsigned ready:1;		/* all probing steps done */
+	unsigned rx_reorder:1;		/* RX reorder is enabled */
 	u8 trace_msg_from_user;		/* echo rx msgs to 'trace' pipe */
 					/* typed u8 so debugfs/u8 can tweak */
 	enum i2400m_system_state state;
@@ -405,10 +416,11 @@ struct i2400m {
 	unsigned tx_pl_num, tx_pl_max, tx_pl_min,
 		tx_num, tx_size_acc, tx_size_min, tx_size_max;
 
-	/* RX stats */
+	/* RX stuff */
 	spinlock_t rx_lock;		/* protect RX state */
 	unsigned rx_pl_num, rx_pl_max, rx_pl_min,
 		rx_num, rx_size_acc, rx_size_min, rx_size_max;
+	struct i2400m_roq *rx_roq;	/* not under rx_lock! */
 
 	struct mutex msg_mutex;		/* serialize command execution */
 	struct completion msg_completion;
@@ -442,6 +454,7 @@ void i2400m_init(struct i2400m *i2400m)
 	wimax_dev_init(&i2400m->wimax_dev);
 
 	i2400m->boot_mode = 1;
+	i2400m->rx_reorder = 1;
 	init_waitqueue_head(&i2400m->state_wq);
 
 	spin_lock_init(&i2400m->tx_lock);
@@ -591,6 +604,9 @@ extern int i2400m_tx_setup(struct i2400m *);
 extern void i2400m_wake_tx_work(struct work_struct *);
 extern void i2400m_tx_release(struct i2400m *);
 
+extern int i2400m_rx_setup(struct i2400m *);
+extern void i2400m_rx_release(struct i2400m *);
+
 extern void i2400m_net_rx(struct i2400m *, struct sk_buff *, unsigned,
 			  const void *, int);
 extern void i2400m_net_erx(struct i2400m *, struct sk_buff *,
@@ -788,6 +804,7 @@ void __i2400m_msleep(unsigned ms)
 /* Module parameters */
 
 extern int i2400m_idle_mode_disabled;
+extern int i2400m_rx_reorder_disabled;
 
 
 #endif /* #ifndef __I2400M_H__ */
diff --git a/drivers/net/wimax/i2400m/rx.c b/drivers/net/wimax/i2400m/rx.c
index cd525066d4b7..02419bfd64b5 100644
--- a/drivers/net/wimax/i2400m/rx.c
+++ b/drivers/net/wimax/i2400m/rx.c
@@ -39,7 +39,7 @@
  *  - Use skb_clone(), break up processing in chunks
  *  - Split transport/device specific
  *  - Make buffer size dynamic to exert less memory pressure
- *
+ *  - RX reorder support
  *
  * This handles the RX path.
  *
@@ -77,14 +77,42 @@
  * In firmware >= 1.4, RX packets have an extended header (16
  * bytes). This header conveys information for management of host
  * reordering of packets (the device offloads storage of the packets
- * for reordering to the host).
- *
- * Currently this information is not used as the current code doesn't
- * enable host reordering.
+ * for reordering to the host). Read below for more information.
  *
  * The header is used as dummy space to emulate an ethernet header and
  * thus be able to act as an ethernet device without having to reallocate.
  *
+ * DATA RX REORDERING
+ *
+ * Starting in firmware v1.4, the device can deliver packets for
+ * delivery with special reordering information; this allows it to
+ * more effectively do packet management when some frames were lost in
+ * the radio traffic.
+ *
+ * Thus, for RX packets that come out of order, the device gives the
+ * driver enough information to queue them properly and then at some
+ * point, the signal to deliver the whole (or part) of the queued
+ * packets to the networking stack. There are 16 such queues.
+ *
+ * This only happens when a packet comes in with the "need reorder"
+ * flag set in the RX header. When such bit is set, the following
+ * operations might be indicated:
+ *
+ *  - reset queue: send all queued packets to the OS
+ *
+ *  - queue: queue a packet
+ *
+ *  - update ws: update the queue's window start and deliver queued
+ *    packets that meet the criteria
+ *
+ *  - queue & update ws: queue a packet, update the window start and
+ *    deliver queued packets that meet the criteria
+ *
+ * (delivery criteria: the packet's [normalized] sequence number is
+ * lower than the new [normalized] window start).
+ *
+ * See the i2400m_roq_*() functions for details.
+ *
  * ROADMAP
  *
  * i2400m_rx
@@ -94,6 +122,17 @@
  *     i2400m_net_rx
  *     i2400m_rx_edata
  *       i2400m_net_erx
+ *       i2400m_roq_reset
+ *         i2400m_net_erx
+ *       i2400m_roq_queue
+ *         __i2400m_roq_queue
+ *       i2400m_roq_update_ws
+ *         __i2400m_roq_update_ws
+ *           i2400m_net_erx
+ *       i2400m_roq_queue_update_ws
+ *         __i2400m_roq_queue
+ *         __i2400m_roq_update_ws
+ *           i2400m_net_erx
  *     i2400m_rx_ctl
  *       i2400m_msg_size_check
  *       i2400m_report_hook_work    [in a workqueue]
@@ -330,6 +369,469 @@ error_check:
 	return;
 }
 
+
+/*
+ * Reorder queue data stored on skb->cb while the skb is queued in the
+ * reorder queues.
+ */
+struct i2400m_roq_data {
+	unsigned sn;		/* Serial number for the skb */
+	enum i2400m_cs cs;	/* packet type for the skb */
+};
+
+
+/*
+ * ReOrder Queue
+ *
+ * @ws: Window Start; sequence number where the current window start
+ *     is for this queue
+ * @queue: the skb queue itself
+ * @log: circular ring buffer used to log information about the
+ *     reorder process in this queue that can be displayed in case of
+ *     error to help diagnose it.
+ *
+ * This is the head for a list of skbs. In the skb->cb member of the
+ * skb when queued here contains a 'struct i2400m_roq_data' were we
+ * store the sequence number (sn) and the cs (packet type) coming from
+ * the RX payload header from the device.
+ */
+struct i2400m_roq
+{
+	unsigned ws;
+	struct sk_buff_head queue;
+	struct i2400m_roq_log *log;
+};
+
+
+static
+void __i2400m_roq_init(struct i2400m_roq *roq)
+{
+	roq->ws = 0;
+	skb_queue_head_init(&roq->queue);
+}
+
+
+static
+unsigned __i2400m_roq_index(struct i2400m *i2400m, struct i2400m_roq *roq)
+{
+	return ((unsigned long) roq - (unsigned long) i2400m->rx_roq)
+		/ sizeof(*roq);
+}
+
+
+/*
+ * Normalize a sequence number based on the queue's window start
+ *
+ * nsn = (sn - ws) % 2048
+ *
+ * Note that if @sn < @roq->ws, we still need a positive number; %'s
+ * sign is implementation specific, so we normalize it by adding 2048
+ * to bring it to be positive.
+ */
+static
+unsigned __i2400m_roq_nsn(struct i2400m_roq *roq, unsigned sn)
+{
+	int r;
+	r =  ((int) sn - (int) roq->ws) % 2048;
+	if (r < 0)
+		r += 2048;
+	return r;
+}
+
+
+/*
+ * Circular buffer to keep the last N reorder operations
+ *
+ * In case something fails, dumb then to try to come up with what
+ * happened.
+ */
+enum {
+	I2400M_ROQ_LOG_LENGTH = 32,
+};
+
+struct i2400m_roq_log {
+	struct i2400m_roq_log_entry {
+		enum i2400m_ro_type type;
+		unsigned ws, count, sn, nsn, new_ws;
+	} entry[I2400M_ROQ_LOG_LENGTH];
+	unsigned in, out;
+};
+
+
+/* Print a log entry */
+static
+void i2400m_roq_log_entry_print(struct i2400m *i2400m, unsigned index,
+				unsigned e_index,
+				struct i2400m_roq_log_entry *e)
+{
+	struct device *dev = i2400m_dev(i2400m);
+
+	switch(e->type) {
+	case I2400M_RO_TYPE_RESET:
+		dev_err(dev, "q#%d reset           ws %u cnt %u sn %u/%u"
+			" - new nws %u\n",
+			index, e->ws, e->count, e->sn, e->nsn, e->new_ws);
+		break;
+	case I2400M_RO_TYPE_PACKET:
+		dev_err(dev, "q#%d queue           ws %u cnt %u sn %u/%u\n",
+			index, e->ws, e->count, e->sn, e->nsn);
+		break;
+	case I2400M_RO_TYPE_WS:
+		dev_err(dev, "q#%d update_ws       ws %u cnt %u sn %u/%u"
+			" - new nws %u\n",
+			index, e->ws, e->count, e->sn, e->nsn, e->new_ws);
+		break;
+	case I2400M_RO_TYPE_PACKET_WS:
+		dev_err(dev, "q#%d queue_update_ws ws %u cnt %u sn %u/%u"
+			" - new nws %u\n",
+			index, e->ws, e->count, e->sn, e->nsn, e->new_ws);
+		break;
+	default:
+		dev_err(dev, "q#%d BUG? entry %u - unknown type %u\n",
+			index, e_index, e->type);
+		break;
+	}
+}
+
+
+static
+void i2400m_roq_log_add(struct i2400m *i2400m,
+			struct i2400m_roq *roq, enum i2400m_ro_type type,
+			unsigned ws, unsigned count, unsigned sn,
+			unsigned nsn, unsigned new_ws)
+{
+	struct i2400m_roq_log_entry *e;
+	unsigned cnt_idx;
+	int index = __i2400m_roq_index(i2400m, roq);
+
+	/* if we run out of space, we eat from the end */
+	if (roq->log->in - roq->log->out == I2400M_ROQ_LOG_LENGTH)
+		roq->log->out++;
+	cnt_idx = roq->log->in++ % I2400M_ROQ_LOG_LENGTH;
+	e = &roq->log->entry[cnt_idx];
+
+	e->type = type;
+	e->ws = ws;
+	e->count = count;
+	e->sn = sn;
+	e->nsn = nsn;
+	e->new_ws = new_ws;
+
+	if (d_test(1))
+		i2400m_roq_log_entry_print(i2400m, index, cnt_idx, e);
+}
+
+
+/* Dump all the entries in the FIFO and reinitialize it */
+static
+void i2400m_roq_log_dump(struct i2400m *i2400m, struct i2400m_roq *roq)
+{
+	unsigned cnt, cnt_idx;
+	struct i2400m_roq_log_entry *e;
+	int index = __i2400m_roq_index(i2400m, roq);
+
+	BUG_ON(roq->log->out > roq->log->in);
+	for (cnt = roq->log->out; cnt < roq->log->in; cnt++) {
+		cnt_idx = cnt % I2400M_ROQ_LOG_LENGTH;
+		e = &roq->log->entry[cnt_idx];
+		i2400m_roq_log_entry_print(i2400m, index, cnt_idx, e);
+		memset(e, 0, sizeof(*e));
+	}
+	roq->log->in = roq->log->out = 0;
+}
+
+
+/*
+ * Backbone for the queuing of an skb (by normalized sequence number)
+ *
+ * @i2400m: device descriptor
+ * @roq: reorder queue where to add
+ * @skb: the skb to add
+ * @sn: the sequence number of the skb
+ * @nsn: the normalized sequence number of the skb (pre-computed by the
+ *     caller from the @sn and @roq->ws).
+ *
+ * We try first a couple of quick cases:
+ *
+ *   - the queue is empty
+ *   - the skb would be appended to the queue
+ *
+ * These will be the most common operations.
+ *
+ * If these fail, then we have to do a sorted insertion in the queue,
+ * which is the slowest path.
+ *
+ * We don't have to acquire a reference count as we are going to own it.
+ */
+static
+void __i2400m_roq_queue(struct i2400m *i2400m, struct i2400m_roq *roq,
+			struct sk_buff *skb, unsigned sn, unsigned nsn)
+{
+	struct device *dev = i2400m_dev(i2400m);
+	struct sk_buff *skb_itr;
+	struct i2400m_roq_data *roq_data_itr, *roq_data;
+	unsigned nsn_itr;
+
+	d_fnstart(4, dev, "(i2400m %p roq %p skb %p sn %u nsn %u)\n",
+		  i2400m, roq, skb, sn, nsn);
+
+	roq_data = (struct i2400m_roq_data *) &skb->cb;
+	BUILD_BUG_ON(sizeof(*roq_data) > sizeof(skb->cb));
+	roq_data->sn = sn;
+	d_printf(3, dev, "ERX: roq %p [ws %u] nsn %d sn %u\n",
+		 roq, roq->ws, nsn, roq_data->sn);
+
+	/* Queues will be empty on not-so-bad environments, so try
+	 * that first */
+	if (skb_queue_empty(&roq->queue)) {
+		d_printf(2, dev, "ERX: roq %p - first one\n", roq);
+		__skb_queue_head(&roq->queue, skb);
+		goto out;
+	}
+	/* Now try append, as most of the operations will be that */
+	skb_itr = skb_peek_tail(&roq->queue);
+	roq_data_itr = (struct i2400m_roq_data *) &skb_itr->cb;
+	nsn_itr = __i2400m_roq_nsn(roq, roq_data_itr->sn);
+	/* NSN bounds assumed correct (checked when it was queued) */
+	if (nsn >= nsn_itr) {
+		d_printf(2, dev, "ERX: roq %p - appended after %p (nsn %d sn %u)\n",
+			 roq, skb_itr, nsn_itr, roq_data_itr->sn);
+		__skb_queue_tail(&roq->queue, skb);
+		goto out;
+	}
+	/* None of the fast paths option worked. Iterate to find the
+	 * right spot where to insert the packet; we know the queue is
+	 * not empty, so we are not the first ones; we also know we
+	 * are not going to be the last ones. The list is sorted, so
+	 * we have to insert before the the first guy with an nsn_itr
+	 * greater that our nsn. */
+	skb_queue_walk(&roq->queue, skb_itr) {
+		roq_data_itr = (struct i2400m_roq_data *) &skb_itr->cb;
+		nsn_itr = __i2400m_roq_nsn(roq, roq_data_itr->sn);
+		/* NSN bounds assumed correct (checked when it was queued) */
+		if (nsn_itr > nsn) {
+			d_printf(2, dev, "ERX: roq %p - queued before %p "
+				 "(nsn %d sn %u)\n", roq, skb_itr, nsn_itr,
+				 roq_data_itr->sn);
+			__skb_queue_before(&roq->queue, skb_itr, skb);
+			goto out;
+		}
+	}
+	/* If we get here, that is VERY bad -- print info to help
+	 * diagnose and crash it */
+	dev_err(dev, "SW BUG? failed to insert packet\n");
+	dev_err(dev, "ERX: roq %p [ws %u] skb %p nsn %d sn %u\n",
+		roq, roq->ws, skb, nsn, roq_data->sn);
+	skb_queue_walk(&roq->queue, skb_itr) {
+		roq_data_itr = (struct i2400m_roq_data *) &skb_itr->cb;
+		nsn_itr = __i2400m_roq_nsn(roq, roq_data_itr->sn);
+		/* NSN bounds assumed correct (checked when it was queued) */
+		dev_err(dev, "ERX: roq %p skb_itr %p nsn %d sn %u\n",
+			roq, skb_itr, nsn_itr, roq_data_itr->sn);
+	}
+	BUG();
+out:
+	d_fnend(4, dev, "(i2400m %p roq %p skb %p sn %u nsn %d) = void\n",
+		i2400m, roq, skb, sn, nsn);
+	return;
+}
+
+
+/*
+ * Backbone for the update window start operation
+ *
+ * @i2400m: device descriptor
+ * @roq: Reorder queue
+ * @sn: New sequence number
+ *
+ * Updates the window start of a queue; when doing so, it must deliver
+ * to the networking stack all the queued skb's whose normalized
+ * sequence number is lower than the new normalized window start.
+ */
+static
+unsigned __i2400m_roq_update_ws(struct i2400m *i2400m, struct i2400m_roq *roq,
+				unsigned sn)
+{
+	struct device *dev = i2400m_dev(i2400m);
+	struct sk_buff *skb_itr, *tmp_itr;
+	struct i2400m_roq_data *roq_data_itr;
+	unsigned new_nws, nsn_itr;
+
+	new_nws = __i2400m_roq_nsn(roq, sn);
+	if (unlikely(new_nws >= 1024) && d_test(1)) {
+		dev_err(dev, "SW BUG? __update_ws new_nws %u (sn %u ws %u)\n",
+			new_nws, sn, roq->ws);
+		WARN_ON(1);
+		i2400m_roq_log_dump(i2400m, roq);
+	}
+	skb_queue_walk_safe(&roq->queue, skb_itr, tmp_itr) {
+		roq_data_itr = (struct i2400m_roq_data *) &skb_itr->cb;
+		nsn_itr = __i2400m_roq_nsn(roq, roq_data_itr->sn);
+		/* NSN bounds assumed correct (checked when it was queued) */
+		if (nsn_itr < new_nws) {
+			d_printf(2, dev, "ERX: roq %p - release skb %p "
+				 "(nsn %u/%u new nws %u)\n",
+				 roq, skb_itr, nsn_itr, roq_data_itr->sn,
+				 new_nws);
+			__skb_unlink(skb_itr, &roq->queue);
+			i2400m_net_erx(i2400m, skb_itr, roq_data_itr->cs);
+		}
+		else
+			break;	/* rest of packets all nsn_itr > nws */
+	}
+	roq->ws = sn;
+	return new_nws;
+}
+
+
+/*
+ * Reset a queue
+ *
+ * @i2400m: device descriptor
+ * @cin: Queue Index
+ *
+ * Deliver all the packets and reset the window-start to zero. Name is
+ * kind of misleading.
+ */
+static
+void i2400m_roq_reset(struct i2400m *i2400m, struct i2400m_roq *roq)
+{
+	struct device *dev = i2400m_dev(i2400m);
+	struct sk_buff *skb_itr, *tmp_itr;
+	struct i2400m_roq_data *roq_data_itr;
+
+	d_fnstart(2, dev, "(i2400m %p roq %p)\n", i2400m, roq);
+	i2400m_roq_log_add(i2400m, roq, I2400M_RO_TYPE_RESET,
+			     roq->ws, skb_queue_len(&roq->queue),
+			     ~0, ~0, 0);
+	skb_queue_walk_safe(&roq->queue, skb_itr, tmp_itr) {
+		roq_data_itr = (struct i2400m_roq_data *) &skb_itr->cb;
+		d_printf(2, dev, "ERX: roq %p - release skb %p (sn %u)\n",
+			 roq, skb_itr, roq_data_itr->sn);
+		__skb_unlink(skb_itr, &roq->queue);
+		i2400m_net_erx(i2400m, skb_itr, roq_data_itr->cs);
+	}
+	roq->ws = 0;
+	d_fnend(2, dev, "(i2400m %p roq %p) = void\n", i2400m, roq);
+	return;
+}
+
+
+/*
+ * Queue a packet
+ *
+ * @i2400m: device descriptor
+ * @cin: Queue Index
+ * @skb: containing the packet data
+ * @fbn: First block number of the packet in @skb
+ * @lbn: Last block number of the packet in @skb
+ *
+ * The hardware is asking the driver to queue a packet for later
+ * delivery to the networking stack.
+ */
+static
+void i2400m_roq_queue(struct i2400m *i2400m, struct i2400m_roq *roq,
+		      struct sk_buff * skb, unsigned lbn)
+{
+	struct device *dev = i2400m_dev(i2400m);
+	unsigned nsn, len;
+
+	d_fnstart(2, dev, "(i2400m %p roq %p skb %p lbn %u) = void\n",
+		  i2400m, roq, skb, lbn);
+	len = skb_queue_len(&roq->queue);
+	nsn = __i2400m_roq_nsn(roq, lbn);
+	if (unlikely(nsn >= 1024)) {
+		dev_err(dev, "SW BUG? queue nsn %d (lbn %u ws %u)\n",
+			nsn, lbn, roq->ws);
+		i2400m_roq_log_dump(i2400m, roq);
+		i2400m->bus_reset(i2400m, I2400M_RT_WARM);
+	} else {
+		__i2400m_roq_queue(i2400m, roq, skb, lbn, nsn);
+		i2400m_roq_log_add(i2400m, roq, I2400M_RO_TYPE_PACKET,
+				     roq->ws, len, lbn, nsn, ~0);
+	}
+	d_fnend(2, dev, "(i2400m %p roq %p skb %p lbn %u) = void\n",
+		i2400m, roq, skb, lbn);
+	return;
+}
+
+
+/*
+ * Update the window start in a reorder queue and deliver all skbs
+ * with a lower window start
+ *
+ * @i2400m: device descriptor
+ * @roq: Reorder queue
+ * @sn: New sequence number
+ */
+static
+void i2400m_roq_update_ws(struct i2400m *i2400m, struct i2400m_roq *roq,
+			  unsigned sn)
+{
+	struct device *dev = i2400m_dev(i2400m);
+	unsigned old_ws, nsn, len;
+
+	d_fnstart(2, dev, "(i2400m %p roq %p sn %u)\n", i2400m, roq, sn);
+	old_ws = roq->ws;
+	len = skb_queue_len(&roq->queue);
+	nsn = __i2400m_roq_update_ws(i2400m, roq, sn);
+	i2400m_roq_log_add(i2400m, roq, I2400M_RO_TYPE_WS,
+			     old_ws, len, sn, nsn, roq->ws);
+	d_fnstart(2, dev, "(i2400m %p roq %p sn %u) = void\n", i2400m, roq, sn);
+	return;
+}
+
+
+/*
+ * Queue a packet and update the window start
+ *
+ * @i2400m: device descriptor
+ * @cin: Queue Index
+ * @skb: containing the packet data
+ * @fbn: First block number of the packet in @skb
+ * @sn: Last block number of the packet in @skb
+ *
+ * Note that unlike i2400m_roq_update_ws(), which sets the new window
+ * start to @sn, in here we'll set it to @sn + 1.
+ */
+static
+void i2400m_roq_queue_update_ws(struct i2400m *i2400m, struct i2400m_roq *roq,
+				struct sk_buff * skb, unsigned sn)
+{
+	struct device *dev = i2400m_dev(i2400m);
+	unsigned nsn, old_ws, len;
+
+	d_fnstart(2, dev, "(i2400m %p roq %p skb %p sn %u)\n",
+		  i2400m, roq, skb, sn);
+	len = skb_queue_len(&roq->queue);
+	nsn = __i2400m_roq_nsn(roq, sn);
+	old_ws = roq->ws;
+	if (unlikely(nsn >= 1024)) {
+		dev_err(dev, "SW BUG? queue_update_ws nsn %u (sn %u ws %u)\n",
+			nsn, sn, roq->ws);
+		i2400m_roq_log_dump(i2400m, roq);
+		i2400m->bus_reset(i2400m, I2400M_RT_WARM);
+	} else {
+		/* if the queue is empty, don't bother as we'd queue
+		 * it and inmediately unqueue it -- just deliver it */
+		if (len == 0) {
+			struct i2400m_roq_data *roq_data;
+			roq_data = (struct i2400m_roq_data *) &skb->cb;
+			i2400m_net_erx(i2400m, skb, roq_data->cs);
+		}
+		else {
+			__i2400m_roq_queue(i2400m, roq, skb, sn, nsn);
+			__i2400m_roq_update_ws(i2400m, roq, sn + 1);
+		}
+		i2400m_roq_log_add(i2400m, roq, I2400M_RO_TYPE_PACKET_WS,
+				   old_ws, len, sn, nsn, roq->ws);
+	}
+	d_fnend(2, dev, "(i2400m %p roq %p skb %p sn %u) = void\n",
+		i2400m, roq, skb, sn);
+	return;
+}
+
+
 /*
  * Receive and send up an extended data packet
  *
@@ -347,6 +849,28 @@ error_check:
  * having to copy packets around.
  *
  * This function handles said path.
+ *
+ *
+ * Receive and send up an extended data packet that requires no reordering
+ *
+ * @i2400m: device descriptor
+ * @skb_rx: skb that contains the extended data packet
+ * @single_last: 1 if the payload is the only one or the last one of
+ *     the skb.
+ * @payload: pointer to the packet's data (past the actual extended
+ *     data payload header).
+ * @size: size of the payload
+ *
+ * Pass over to the networking stack a data packet that might have
+ * reordering requirements.
+ *
+ * This needs to the decide if the skb in which the packet is
+ * contained can be reused or if it needs to be cloned. Then it has to
+ * be trimmed in the edges so that the beginning is the space for eth
+ * header and then pass it to i2400m_net_erx() for the stack
+ *
+ * Assumes the caller has verified the sanity of the payload (size,
+ * etc) already.
  */
 static
 void i2400m_rx_edata(struct i2400m *i2400m, struct sk_buff *skb_rx,
@@ -357,53 +881,86 @@ void i2400m_rx_edata(struct i2400m *i2400m, struct sk_buff *skb_rx,
 	struct net_device *net_dev = i2400m->wimax_dev.net_dev;
 	struct sk_buff *skb;
 	enum i2400m_cs cs;
-	unsigned reorder_needed;
+	u32 reorder;
+	unsigned ro_needed, ro_type, ro_cin, ro_sn;
+	struct i2400m_roq *roq;
+	struct i2400m_roq_data *roq_data;
 
-	d_fnstart(4, dev, "(i2400m %p skb_rx %p single %u payload %p "
+	BUILD_BUG_ON(ETH_HLEN > sizeof(*hdr));
+
+	d_fnstart(2, dev, "(i2400m %p skb_rx %p single %u payload %p "
 		  "size %zu)\n", i2400m, skb_rx, single_last, payload, size);
 	if (size < sizeof(*hdr)) {
 		dev_err(dev, "ERX: HW BUG? message with short header (%zu "
 			"vs %zu bytes expected)\n", size, sizeof(*hdr));
 		goto error;
 	}
-	reorder_needed = le32_to_cpu(hdr->reorder & I2400M_REORDER_NEEDED);
-	cs = hdr->cs;
-	if (reorder_needed) {
-		dev_err(dev, "ERX: HW BUG? reorder needed, it was disabled\n");
-		goto error;
-	}
-	/* ok, so now decide if we want to clone or reuse the skb,
-	 * pull and trim it so the beginning is the space for the eth
-	 * header and pass it to i2400m_net_erx() for the stack */
+
 	if (single_last) {
 		skb = skb_get(skb_rx);
-		d_printf(3, dev, "ERX: reusing single payload skb %p\n", skb);
+		d_printf(3, dev, "ERX: skb %p reusing\n", skb);
 	} else {
 		skb = skb_clone(skb_rx, GFP_KERNEL);
-		d_printf(3, dev, "ERX: cloning %p\n", skb);
 		if (skb == NULL) {
 			dev_err(dev, "ERX: no memory to clone skb\n");
 			net_dev->stats.rx_dropped++;
 			goto error_skb_clone;
 		}
+		d_printf(3, dev, "ERX: skb %p cloned from %p\n", skb, skb_rx);
 	}
 	/* now we have to pull and trim so that the skb points to the
 	 * beginning of the IP packet; the netdev part will add the
-	 * ethernet header as needed. */
-	BUILD_BUG_ON(ETH_HLEN > sizeof(*hdr));
+	 * ethernet header as needed - we know there is enough space
+	 * because we checked in i2400m_rx_edata(). */
 	skb_pull(skb, payload + sizeof(*hdr) - (void *) skb->data);
-	skb_trim(skb, (void *) skb_end_pointer(skb) - payload + sizeof(*hdr));
-	i2400m_net_erx(i2400m, skb, cs);
+	skb_trim(skb, (void *) skb_end_pointer(skb) - payload - sizeof(*hdr));
+
+	reorder = le32_to_cpu(hdr->reorder);
+	ro_needed = reorder & I2400M_RO_NEEDED;
+	cs = hdr->cs;
+	if (ro_needed) {
+		ro_type = (reorder >> I2400M_RO_TYPE_SHIFT) & I2400M_RO_TYPE;
+		ro_cin = (reorder >> I2400M_RO_CIN_SHIFT) & I2400M_RO_CIN;
+		ro_sn = (reorder >> I2400M_RO_SN_SHIFT) & I2400M_RO_SN;
+
+		roq = &i2400m->rx_roq[ro_cin];
+		roq_data = (struct i2400m_roq_data *) &skb->cb;
+		roq_data->sn = ro_sn;
+		roq_data->cs = cs;
+		d_printf(2, dev, "ERX: reorder needed: "
+			 "type %u cin %u [ws %u] sn %u/%u len %zuB\n",
+			 ro_type, ro_cin, roq->ws, ro_sn,
+			 __i2400m_roq_nsn(roq, ro_sn), size);
+		d_dump(2, dev, payload, size);
+		switch(ro_type) {
+		case I2400M_RO_TYPE_RESET:
+			i2400m_roq_reset(i2400m, roq);
+			kfree_skb(skb);	/* no data here */
+			break;
+		case I2400M_RO_TYPE_PACKET:
+			i2400m_roq_queue(i2400m, roq, skb, ro_sn);
+			break;
+		case I2400M_RO_TYPE_WS:
+			i2400m_roq_update_ws(i2400m, roq, ro_sn);
+			kfree_skb(skb);	/* no data here */
+			break;
+		case I2400M_RO_TYPE_PACKET_WS:
+			i2400m_roq_queue_update_ws(i2400m, roq, skb, ro_sn);
+			break;
+		default:
+			dev_err(dev, "HW BUG? unknown reorder type %u\n", ro_type);
+		}
+	}
+	else
+		i2400m_net_erx(i2400m, skb, cs);
 error_skb_clone:
 error:
-	d_fnend(4, dev, "(i2400m %p skb_rx %p single %u payload %p "
+	d_fnend(2, dev, "(i2400m %p skb_rx %p single %u payload %p "
 		"size %zu) = void\n", i2400m, skb_rx, single_last, payload, size);
 	return;
 }
 
 
-
-
 /*
  * Act on a received payload
  *
@@ -632,3 +1189,73 @@ error_msg_hdr_check:
 	return result;
 }
 EXPORT_SYMBOL_GPL(i2400m_rx);
+
+
+/*
+ * Initialize the RX queue and infrastructure
+ *
+ * This sets up all the RX reordering infrastructures, which will not
+ * be used if reordering is not enabled or if the firmware does not
+ * support it. The device is told to do reordering in
+ * i2400m_dev_initialize(), where it also looks at the value of the
+ * i2400m->rx_reorder switch before taking a decission.
+ *
+ * Note we allocate the roq queues in one chunk and the actual logging
+ * support for it (logging) in another one and then we setup the
+ * pointers from the first to the last.
+ */
+int i2400m_rx_setup(struct i2400m *i2400m)
+{
+	int result = 0;
+	struct device *dev = i2400m_dev(i2400m);
+
+	i2400m->rx_reorder = i2400m_rx_reorder_disabled? 0 : 1;
+	if (i2400m->rx_reorder) {
+		unsigned itr;
+		size_t size;
+		struct i2400m_roq_log *rd;
+
+		result = -ENOMEM;
+
+		size = sizeof(i2400m->rx_roq[0]) * (I2400M_RO_CIN + 1);
+		i2400m->rx_roq = kzalloc(size, GFP_KERNEL);
+		if (i2400m->rx_roq == NULL) {
+			dev_err(dev, "RX: cannot allocate %zu bytes for "
+				"reorder queues\n", size);
+			goto error_roq_alloc;
+		}
+
+		size = sizeof(*i2400m->rx_roq[0].log) * (I2400M_RO_CIN + 1);
+		rd = kzalloc(size, GFP_KERNEL);
+		if (rd == NULL) {
+			dev_err(dev, "RX: cannot allocate %zu bytes for "
+				"reorder queues log areas\n", size);
+			result = -ENOMEM;
+			goto error_roq_log_alloc;
+		}
+
+		for(itr = 0; itr < I2400M_RO_CIN + 1; itr++) {
+			__i2400m_roq_init(&i2400m->rx_roq[itr]);
+			i2400m->rx_roq[itr].log = &rd[itr];
+		}
+	}
+	return 0;
+
+error_roq_log_alloc:
+	kfree(i2400m->rx_roq);
+error_roq_alloc:
+	return result;
+}
+
+
+/* Tear down the RX queue and infrastructure */
+void i2400m_rx_release(struct i2400m *i2400m)
+{
+	if (i2400m->rx_reorder) {
+		unsigned itr;
+		for(itr = 0; itr < I2400M_RO_CIN + 1; itr++)
+			__skb_queue_purge(&i2400m->rx_roq[itr].queue);
+		kfree(i2400m->rx_roq[0].log);
+		kfree(i2400m->rx_roq);
+	}
+}
diff --git a/include/linux/wimax/i2400m.h b/include/linux/wimax/i2400m.h
index ad36e073a70c..d5148a7889a6 100644
--- a/include/linux/wimax/i2400m.h
+++ b/include/linux/wimax/i2400m.h
@@ -225,15 +225,16 @@ struct i2400m_pl_data_hdr {
 /*
  * Payload for an extended data packet
  *
- * New in v1.4
+ * New in fw v1.4
  *
+ * @reorder: if this payload has to be reorder or not (and how)
  * @cs: the type of data in the packet, as defined per (802.16e
  *     T11.13.19.1). Currently only 2 (IPv4 packet) supported.
  *
  * This is prefixed to each and every INCOMING DATA packet.
  */
 struct i2400m_pl_edata_hdr {
-	__le32 reorder;
+	__le32 reorder;		/* bits defined in i2400m_ro */
 	__u8 cs;
 	__u8 reserved[11];
 } __attribute__((packed));
@@ -243,8 +244,23 @@ enum i2400m_cs {
 	I2400M_CS_IPV4 = 2,
 };
 
-enum i2400m_reorder {
-	I2400M_REORDER_NEEDED     = 0x01,
+enum i2400m_ro {
+	I2400M_RO_NEEDED     = 0x01,
+	I2400M_RO_TYPE       = 0x03,
+	I2400M_RO_TYPE_SHIFT = 1,
+	I2400M_RO_CIN        = 0x0f,
+	I2400M_RO_CIN_SHIFT  = 4,
+	I2400M_RO_FBN        = 0x07ff,
+	I2400M_RO_FBN_SHIFT  = 8,
+	I2400M_RO_SN         = 0x07ff,
+	I2400M_RO_SN_SHIFT   = 21,
+};
+
+enum i2400m_ro_type {
+	I2400M_RO_TYPE_RESET = 0,
+	I2400M_RO_TYPE_PACKET,
+	I2400M_RO_TYPE_WS,
+	I2400M_RO_TYPE_PACKET_WS,
 };
 
 
@@ -410,6 +426,7 @@ enum i2400m_tlv {
 	I2400M_TLV_CONFIG_IDLE_PARAMETERS = 601,
 	I2400M_TLV_CONFIG_IDLE_TIMEOUT = 611,
 	I2400M_TLV_CONFIG_D2H_DATA_FORMAT = 614,
+	I2400M_TLV_CONFIG_DL_HOST_REORDER = 615,
 };
 
 
@@ -553,5 +570,12 @@ struct i2400m_tlv_config_d2h_data_format {
 	__u8 reserved[3];
 } __attribute__((packed));
 
+/* New in v1.4 */
+struct i2400m_tlv_config_dl_host_reorder {
+	struct i2400m_tlv_hdr hdr;
+	__u8 reorder; 		/* 0 disabled, 1 enabled */
+	__u8 reserved[3];
+} __attribute__((packed));
+
 
 #endif /* #ifndef __LINUX__WIMAX__I2400M_H__ */
-- 
cgit v1.2.3


From d3a21be86c178964167aa54c39a01260d33e7509 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Mon, 2 Mar 2009 03:15:58 -0800
Subject: skbuff.h: fix timestamps kernel-doc

Fix skbuff.h kernel-doc for timestamps: must include "struct" keyword,
otherwise there are kernel-doc errors:

Error(linux-next-20090227//include/linux/skbuff.h:161): cannot understand prototype: 'struct skb_shared_hwtstamps '
Error(linux-next-20090227//include/linux/skbuff.h:177): cannot understand prototype: 'union skb_shared_tx '

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 61ce97a8b868..1f659e8c2b88 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -135,8 +135,7 @@ struct skb_frag_struct {
 #define HAVE_HW_TIME_STAMP
 
 /**
- * skb_shared_hwtstamps - hardware time stamps
- *
+ * struct skb_shared_hwtstamps - hardware time stamps
  * @hwtstamp:	hardware time stamp transformed into duration
  *		since arbitrary point in time
  * @syststamp:	hwtstamp transformed to system time base
@@ -164,8 +163,7 @@ struct skb_shared_hwtstamps {
 };
 
 /**
- * skb_shared_tx - instructions for time stamping of outgoing packets
- *
+ * struct skb_shared_tx - instructions for time stamping of outgoing packets
  * @hardware:		generate hardware time stamp
  * @software:		generate software time stamp
  * @in_progress:	device driver is going to provide
-- 
cgit v1.2.3


From 0c5c2d3089068d4aa378f7a40d2b5ad9d4f52ce8 Mon Sep 17 00:00:00 2001
From: Eric Biederman <ebiederm@aristanetworks.com>
Date: Wed, 4 Mar 2009 00:03:08 -0800
Subject: neigh: Allow for user space users of the neighbour table

Currently it is possible to do just about everything with the arp table
from user space except treat an entry like you are using it.  To that end
implement and a flag NTF_USE that when set in a netwlink update request
treats the neighbour table entry like the kernel does on the output path.

This allows user space applications to share the kernel's arp cache.

Signed-off-by: Eric Biederman <ebiederm@aristanetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/neighbour.h | 1 +
 net/core/neighbour.c      | 6 +++++-
 2 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/neighbour.h b/include/linux/neighbour.h
index 8730d5dae1bc..12c9de138451 100644
--- a/include/linux/neighbour.h
+++ b/include/linux/neighbour.h
@@ -31,6 +31,7 @@ enum
  *	Neighbor Cache Entry Flags
  */
 
+#define NTF_USE		0x01
 #define NTF_PROXY	0x08	/* == ATF_PUBL */
 #define NTF_ROUTER	0x80
 
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 417b6d739fb7..a1cbce7fdae5 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1654,7 +1654,11 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 				flags &= ~NEIGH_UPDATE_F_OVERRIDE;
 		}
 
-		err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
+		if (ndm->ndm_flags & NTF_USE) {
+			neigh_event_send(neigh, NULL);
+			err = 0;
+		} else
+			err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
 		neigh_release(neigh);
 		goto out_dev_put;
 	}
-- 
cgit v1.2.3


From e79c1ba84c68de9161d541bd2bcc8ea65c89955c Mon Sep 17 00:00:00 2001
From: Michael Buesch <mb@bu3sch.de>
Date: Fri, 27 Feb 2009 16:59:05 +0100
Subject: ssb: Add SPROM fallback support

This adds SSB functionality to register a fallback SPROM image from the
architecture setup code.

Weird architectures exist that have half-assed SSB devices without SPROM attached to
their PCI busses. The architecture can register a fallback SPROM image that is
used if no SPROM is found on the SSB device.

Signed-off-by: Michael Buesch <mb@bu3sch.de>
Cc: Florian Fainelli <florian@openwrt.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/ssb/pci.c         | 14 +++++++++++++-
 drivers/ssb/sprom.c       | 36 ++++++++++++++++++++++++++++++++++++
 drivers/ssb/ssb_private.h |  1 +
 include/linux/ssb/ssb.h   |  4 ++++
 4 files changed, 54 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/ssb/pci.c b/drivers/ssb/pci.c
index c958ac16423c..40ea41762247 100644
--- a/drivers/ssb/pci.c
+++ b/drivers/ssb/pci.c
@@ -564,6 +564,7 @@ static int sprom_extract(struct ssb_bus *bus, struct ssb_sprom *out,
 static int ssb_pci_sprom_get(struct ssb_bus *bus,
 			     struct ssb_sprom *sprom)
 {
+	const struct ssb_sprom *fallback;
 	int err = -ENOMEM;
 	u16 *buf;
 
@@ -583,12 +584,23 @@ static int ssb_pci_sprom_get(struct ssb_bus *bus,
 		bus->sprom_size = SSB_SPROMSIZE_WORDS_R4;
 		sprom_do_read(bus, buf);
 		err = sprom_check_crc(buf, bus->sprom_size);
-		if (err)
+		if (err) {
+			/* All CRC attempts failed.
+			 * Maybe there is no SPROM on the device?
+			 * If we have a fallback, use that. */
+			fallback = ssb_get_fallback_sprom();
+			if (fallback) {
+				memcpy(sprom, fallback, sizeof(*sprom));
+				err = 0;
+				goto out_free;
+			}
 			ssb_printk(KERN_WARNING PFX "WARNING: Invalid"
 				   " SPROM CRC (corrupt SPROM)\n");
+		}
 	}
 	err = sprom_extract(bus, sprom, buf, bus->sprom_size);
 
+out_free:
 	kfree(buf);
 out:
 	return err;
diff --git a/drivers/ssb/sprom.c b/drivers/ssb/sprom.c
index 3668edb39315..8943015a3eef 100644
--- a/drivers/ssb/sprom.c
+++ b/drivers/ssb/sprom.c
@@ -14,6 +14,9 @@
 #include "ssb_private.h"
 
 
+static const struct ssb_sprom *fallback_sprom;
+
+
 static int sprom2hex(const u16 *sprom, char *buf, size_t buf_len,
 		     size_t sprom_size_words)
 {
@@ -131,3 +134,36 @@ out:
 		return res;
 	return err ? err : count;
 }
+
+/**
+ * ssb_arch_set_fallback_sprom - Set a fallback SPROM for use if no SPROM is found.
+ *
+ * @sprom: The SPROM data structure to register.
+ *
+ * With this function the architecture implementation may register a fallback
+ * SPROM data structure. The fallback is only used for PCI based SSB devices,
+ * where no valid SPROM can be found in the shadow registers.
+ *
+ * This function is useful for weird architectures that have a half-assed SSB device
+ * hardwired to their PCI bus.
+ *
+ * Note that it does only work with PCI attached SSB devices. PCMCIA devices currently
+ * don't use this fallback.
+ * Architectures must provide the SPROM for native SSB devices anyway,
+ * so the fallback also isn't used for native devices.
+ *
+ * This function is available for architecture code, only. So it is not exported.
+ */
+int ssb_arch_set_fallback_sprom(const struct ssb_sprom *sprom)
+{
+	if (fallback_sprom)
+		return -EEXIST;
+	fallback_sprom = sprom;
+
+	return 0;
+}
+
+const struct ssb_sprom *ssb_get_fallback_sprom(void)
+{
+	return fallback_sprom;
+}
diff --git a/drivers/ssb/ssb_private.h b/drivers/ssb/ssb_private.h
index ebc32d8fe15f..57fa482abb94 100644
--- a/drivers/ssb/ssb_private.h
+++ b/drivers/ssb/ssb_private.h
@@ -131,6 +131,7 @@ ssize_t ssb_attr_sprom_store(struct ssb_bus *bus,
 			     const char *buf, size_t count,
 			     int (*sprom_check_crc)(const u16 *sprom, size_t size),
 			     int (*sprom_write)(struct ssb_bus *bus, const u16 *sprom));
+extern const struct ssb_sprom *ssb_get_fallback_sprom(void);
 
 
 /* core.c */
diff --git a/include/linux/ssb/ssb.h b/include/linux/ssb/ssb.h
index 17d9b58f6379..5ae8fa22d331 100644
--- a/include/linux/ssb/ssb.h
+++ b/include/linux/ssb/ssb.h
@@ -339,6 +339,10 @@ extern int ssb_bus_pcmciabus_register(struct ssb_bus *bus,
 
 extern void ssb_bus_unregister(struct ssb_bus *bus);
 
+/* Set a fallback SPROM.
+ * See kdoc at the function definition for complete documentation. */
+extern int ssb_arch_set_fallback_sprom(const struct ssb_sprom *sprom);
+
 /* Suspend a SSB bus.
  * Call this from the parent bus suspend routine. */
 extern int ssb_bus_suspend(struct ssb_bus *bus);
-- 
cgit v1.2.3


From f21cfb258df6dd3ea0b3e56d75c7e994edb81b35 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Thu, 12 Mar 2009 21:05:42 +0900
Subject: irq: add remove_irq() for freeing of setup_irq() irqs

Impact: add new API

This patch adds a remove_irq() function for releasing
interrupts requested with setup_irq().

Without this patch we have no way of releasing such
interrupts since free_irq() today tries to kfree()
the irqaction passed with setup_irq().

Signed-off-by: Magnus Damm <damm@igel.co.jp>
LKML-Reference: <20090312120542.2926.56609.sendpatchset@rx1.opensource.se>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/irq.h |  1 +
 kernel/irq/manage.c | 39 ++++++++++++++++++++++++++-------------
 2 files changed, 27 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index f899b502f186..56f9988362ec 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -236,6 +236,7 @@ typedef struct irq_desc		irq_desc_t;
 #include <asm/hw_irq.h>
 
 extern int setup_irq(unsigned int irq, struct irqaction *new);
+extern struct irqaction *remove_irq(unsigned int irq, void *dev_id);
 
 #ifdef CONFIG_GENERIC_HARDIRQS
 
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 52ee17135092..8b069a7046e9 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -551,20 +551,14 @@ int setup_irq(unsigned int irq, struct irqaction *act)
 }
 
 /**
- *	free_irq - free an interrupt
+ *	remove_irq - free an interrupt
  *	@irq: Interrupt line to free
  *	@dev_id: Device identity to free
  *
- *	Remove an interrupt handler. The handler is removed and if the
- *	interrupt line is no longer in use by any driver it is disabled.
- *	On a shared IRQ the caller must ensure the interrupt is disabled
- *	on the card it drives before calling this function. The function
- *	does not return until any executing interrupts for this IRQ
- *	have completed.
- *
- *	This function must not be called from interrupt context.
+ * Used to remove interrupts statically setup by the early boot process.
  */
-void free_irq(unsigned int irq, void *dev_id)
+
+struct irqaction *remove_irq(unsigned int irq, void *dev_id)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
 	struct irqaction *action, **action_ptr;
@@ -573,7 +567,7 @@ void free_irq(unsigned int irq, void *dev_id)
 	WARN(in_interrupt(), "Trying to free IRQ %d from IRQ context!\n", irq);
 
 	if (!desc)
-		return;
+		return NULL;
 
 	spin_lock_irqsave(&desc->lock, flags);
 
@@ -589,7 +583,7 @@ void free_irq(unsigned int irq, void *dev_id)
 			WARN(1, "Trying to free already-free IRQ %d\n", irq);
 			spin_unlock_irqrestore(&desc->lock, flags);
 
-			return;
+			return NULL;
 		}
 
 		if (action->dev_id == dev_id)
@@ -636,7 +630,26 @@ void free_irq(unsigned int irq, void *dev_id)
 		local_irq_restore(flags);
 	}
 #endif
-	kfree(action);
+	return action;
+}
+
+/**
+ *	free_irq - free an interrupt allocated with request_irq
+ *	@irq: Interrupt line to free
+ *	@dev_id: Device identity to free
+ *
+ *	Remove an interrupt handler. The handler is removed and if the
+ *	interrupt line is no longer in use by any driver it is disabled.
+ *	On a shared IRQ the caller must ensure the interrupt is disabled
+ *	on the card it drives before calling this function. The function
+ *	does not return until any executing interrupts for this IRQ
+ *	have completed.
+ *
+ *	This function must not be called from interrupt context.
+ */
+void free_irq(unsigned int irq, void *dev_id)
+{
+	kfree(remove_irq(irq, dev_id));
 }
 EXPORT_SYMBOL(free_irq);
 
-- 
cgit v1.2.3


From cbf94f06824780183e4bba165c7c29d5c7bd9a51 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Thu, 12 Mar 2009 21:05:51 +0900
Subject: irq: match remove_irq() args with setup_irq()

Modify remove_irq() to match setup_irq().

Signed-off-by: Magnus Damm <damm@igel.co.jp>
LKML-Reference: <20090312120551.2926.43942.sendpatchset@rx1.opensource.se>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/irq.h |  2 +-
 kernel/irq/manage.c | 26 +++++++++++++++++---------
 2 files changed, 18 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 56f9988362ec..737eafbc1f3d 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -236,7 +236,7 @@ typedef struct irq_desc		irq_desc_t;
 #include <asm/hw_irq.h>
 
 extern int setup_irq(unsigned int irq, struct irqaction *new);
-extern struct irqaction *remove_irq(unsigned int irq, void *dev_id);
+extern void remove_irq(unsigned int irq, struct irqaction *act);
 
 #ifdef CONFIG_GENERIC_HARDIRQS
 
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 8b069a7046e9..fc16570c9b46 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -550,15 +550,11 @@ int setup_irq(unsigned int irq, struct irqaction *act)
 	return __setup_irq(irq, desc, act);
 }
 
-/**
- *	remove_irq - free an interrupt
- *	@irq: Interrupt line to free
- *	@dev_id: Device identity to free
- *
- * Used to remove interrupts statically setup by the early boot process.
+ /*
+ * Internal function to unregister an irqaction - used to free
+ * regular and special interrupts that are part of the architecture.
  */
-
-struct irqaction *remove_irq(unsigned int irq, void *dev_id)
+static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
 	struct irqaction *action, **action_ptr;
@@ -633,6 +629,18 @@ struct irqaction *remove_irq(unsigned int irq, void *dev_id)
 	return action;
 }
 
+/**
+ *	remove_irq - free an interrupt
+ *	@irq: Interrupt line to free
+ *	@act: irqaction for the interrupt
+ *
+ * Used to remove interrupts statically setup by the early boot process.
+ */
+void remove_irq(unsigned int irq, struct irqaction *act)
+{
+	__free_irq(irq, act->dev_id);
+}
+
 /**
  *	free_irq - free an interrupt allocated with request_irq
  *	@irq: Interrupt line to free
@@ -649,7 +657,7 @@ struct irqaction *remove_irq(unsigned int irq, void *dev_id)
  */
 void free_irq(unsigned int irq, void *dev_id)
 {
-	kfree(remove_irq(irq, dev_id));
+	kfree(__free_irq(irq, dev_id));
 }
 EXPORT_SYMBOL(free_irq);
 
-- 
cgit v1.2.3


From 3dd3d46b78c22503957230ca5981849b7bb29b9a Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 9 Mar 2009 21:48:32 +0100
Subject: genirq: remove unused hw_irq_controller typedef

hw_irq_controller is unused. Remove the typedef

Impact: cleanup

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irq.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 737eafbc1f3d..7c07a09931db 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -226,7 +226,6 @@ irq_remap_to_desc(unsigned int irq, struct irq_desc *desc)
  * Migration helpers for obsolete names, they will go away:
  */
 #define hw_interrupt_type	irq_chip
-typedef struct irq_chip		hw_irq_controller;
 #define no_irq_type		no_irq_chip
 typedef struct irq_desc		irq_desc_t;
 
-- 
cgit v1.2.3


From bedd30d986a05e32dc3eab874e4b9ed8a38058bb Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 30 Sep 2008 23:14:27 +0200
Subject: genirq: make irqreturn_t an enum

Impact: cleanup

Remove the 2.4 compabiliy cruft

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Peter Zijlstra <peterz@infradead.org>
---
 include/linux/irq.h       |  4 ++--
 include/linux/irqreturn.h | 28 ++++++++++------------------
 2 files changed, 12 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 7c07a09931db..19770923bcb0 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -280,7 +280,7 @@ static inline int irq_balancing_disabled(unsigned int irq)
 }
 
 /* Handle irq action chains: */
-extern int handle_IRQ_event(unsigned int irq, struct irqaction *action);
+extern irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action);
 
 /*
  * Built-in IRQ handlers for various IRQ types,
@@ -325,7 +325,7 @@ static inline void generic_handle_irq(unsigned int irq)
 
 /* Handling of unhandled and spurious interrupts: */
 extern void note_interrupt(unsigned int irq, struct irq_desc *desc,
-			   int action_ret);
+			   irqreturn_t action_ret);
 
 /* Resending of interrupts :*/
 void check_irq_resend(struct irq_desc *desc, unsigned int irq);
diff --git a/include/linux/irqreturn.h b/include/linux/irqreturn.h
index 881883c2009d..c5584ca5b8c9 100644
--- a/include/linux/irqreturn.h
+++ b/include/linux/irqreturn.h
@@ -1,25 +1,17 @@
-/* irqreturn.h */
 #ifndef _LINUX_IRQRETURN_H
 #define _LINUX_IRQRETURN_H
 
-/*
- * For 2.4.x compatibility, 2.4.x can use
- *
- *	typedef void irqreturn_t;
- *	#define IRQ_NONE
- *	#define IRQ_HANDLED
- *	#define IRQ_RETVAL(x)
- *
- * To mix old-style and new-style irq handler returns.
- *
- * IRQ_NONE means we didn't handle it.
- * IRQ_HANDLED means that we did have a valid interrupt and handled it.
- * IRQ_RETVAL(x) selects on the two depending on x being non-zero (for handled)
+/**
+ * enum irqreturn
+ * @IRQ_NONE		interrupt was not from this device
+ * @IRQ_HANDLED		interrupt was handled by this device
  */
-typedef int irqreturn_t;
+enum irqreturn {
+	IRQ_NONE,
+	IRQ_HANDLED,
+};
 
-#define IRQ_NONE	(0)
-#define IRQ_HANDLED	(1)
-#define IRQ_RETVAL(x)	((x) != 0)
+typedef enum irqreturn irqreturn_t;
+#define IRQ_RETVAL(x)	((x) != IRQ_NONE)
 
 #endif
-- 
cgit v1.2.3


From a9d0a1a38352c4fb8946e73b3e42ba4ada29e733 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 3 Mar 2009 16:58:16 +0100
Subject: genirq: add doc to struct irqaction

Impact: documentation

struct irqaction is not documented. Add kernel doc comments and add
interrupt.h to the genirq docbook.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 Documentation/DocBook/genericirq.tmpl |  1 +
 include/linux/interrupt.h             | 11 +++++++++++
 2 files changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/DocBook/genericirq.tmpl b/Documentation/DocBook/genericirq.tmpl
index 3a882d9a90a9..c671a0168096 100644
--- a/Documentation/DocBook/genericirq.tmpl
+++ b/Documentation/DocBook/genericirq.tmpl
@@ -440,6 +440,7 @@ desc->chip->end();
      used in the generic IRQ layer.
      </para>
 !Iinclude/linux/irq.h
+!Iinclude/linux/interrupt.h
   </chapter>
 
   <chapter id="pubfunctions">
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 468e3a25a4a1..91658d076598 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -61,6 +61,17 @@
 
 typedef irqreturn_t (*irq_handler_t)(int, void *);
 
+/**
+ * struct irqaction - per interrupt action descriptor
+ * @handler:	interrupt handler function
+ * @flags:	flags (see IRQF_* above)
+ * @mask:	no comment as it is useless and about to be removed
+ * @name:	name of the device
+ * @dev_id:	cookie to identify the device
+ * @next:	pointer to the next irqaction for shared interrupts
+ * @irq:	interrupt number
+ * @dir:	pointer to the proc/irq/NN/name entry
+ */
 struct irqaction {
 	irq_handler_t handler;
 	unsigned long flags;
-- 
cgit v1.2.3


From ead2ceb0ec9f85cff19c43b5cdb2f8a054484431 Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Wed, 11 Mar 2009 09:49:55 +0000
Subject: Network Drop Monitor: Adding kfree_skb_clean for non-drops and
 modifying end-of-line points for skbs

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>

 include/linux/skbuff.h |    4 +++-
 net/core/datagram.c    |    2 +-
 net/core/skbuff.c      |   22 ++++++++++++++++++++++
 net/ipv4/arp.c         |    2 +-
 net/ipv4/udp.c         |    2 +-
 net/packet/af_packet.c |    2 +-
 6 files changed, 29 insertions(+), 5 deletions(-)
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h |  4 +++-
 net/core/datagram.c    |  2 +-
 net/core/skbuff.c      | 22 ++++++++++++++++++++++
 net/ipv4/arp.c         |  2 +-
 net/ipv4/udp.c         |  2 +-
 net/packet/af_packet.c |  2 +-
 6 files changed, 29 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 1f659e8c2b88..1fbab2ae613c 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -421,6 +421,7 @@ extern void skb_dma_unmap(struct device *dev, struct sk_buff *skb,
 #endif
 
 extern void kfree_skb(struct sk_buff *skb);
+extern void consume_skb(struct sk_buff *skb);
 extern void	       __kfree_skb(struct sk_buff *skb);
 extern struct sk_buff *__alloc_skb(unsigned int size,
 				   gfp_t priority, int fclone, int node);
@@ -459,7 +460,8 @@ extern int	       skb_to_sgvec(struct sk_buff *skb,
 extern int	       skb_cow_data(struct sk_buff *skb, int tailbits,
 				    struct sk_buff **trailer);
 extern int	       skb_pad(struct sk_buff *skb, int pad);
-#define dev_kfree_skb(a)	kfree_skb(a)
+#define dev_kfree_skb(a)	consume_skb(a)
+#define dev_consume_skb(a)	kfree_skb_clean(a)
 extern void	      skb_over_panic(struct sk_buff *skb, int len,
 				     void *here);
 extern void	      skb_under_panic(struct sk_buff *skb, int len,
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 5e2ac0c4b07c..d0de644b378d 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -208,7 +208,7 @@ struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags,
 
 void skb_free_datagram(struct sock *sk, struct sk_buff *skb)
 {
-	kfree_skb(skb);
+	consume_skb(skb);
 	sk_mem_reclaim_partial(sk);
 }
 
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index e5e2111a397d..6acbf9e79eb1 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -65,6 +65,7 @@
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
+#include <trace/skb.h>
 
 #include "kmap_skb.h"
 
@@ -442,10 +443,31 @@ void kfree_skb(struct sk_buff *skb)
 		smp_rmb();
 	else if (likely(!atomic_dec_and_test(&skb->users)))
 		return;
+	trace_kfree_skb(skb, __builtin_return_address(0));
 	__kfree_skb(skb);
 }
 EXPORT_SYMBOL(kfree_skb);
 
+/**
+ *	consume_skb - free an skbuff
+ *	@skb: buffer to free
+ *
+ *	Drop a ref to the buffer and free it if the usage count has hit zero
+ *	Functions identically to kfree_skb, but kfree_skb assumes that the frame
+ *	is being dropped after a failure and notes that
+ */
+void consume_skb(struct sk_buff *skb)
+{
+	if (unlikely(!skb))
+		return;
+	if (likely(atomic_read(&skb->users) == 1))
+		smp_rmb();
+	else if (likely(!atomic_dec_and_test(&skb->users)))
+		return;
+	__kfree_skb(skb);
+}
+EXPORT_SYMBOL(consume_skb);
+
 /**
  *	skb_recycle_check - check if skb can be reused for receive
  *	@skb: buffer
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 3d67d1ffed77..9c220323f353 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -892,7 +892,7 @@ static int arp_process(struct sk_buff *skb)
 out:
 	if (in_dev)
 		in_dev_put(in_dev);
-	kfree_skb(skb);
+	consume_skb(skb);
 	return 0;
 }
 
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 4bd178a111d5..05b7abb99f69 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1184,7 +1184,7 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
 			sk = sknext;
 		} while (sknext);
 	} else
-		kfree_skb(skb);
+		consume_skb(skb);
 	spin_unlock(&hslot->lock);
 	return 0;
 }
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index d8cc006fac45..74776de523ec 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -584,7 +584,7 @@ drop_n_restore:
 		skb->len = skb_len;
 	}
 drop:
-	kfree_skb(skb);
+	consume_skb(skb);
 	return 0;
 }
 
-- 
cgit v1.2.3


From 9a8afc8d3962f3ed26fd6b56db34133860ed1e72 Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Wed, 11 Mar 2009 09:51:26 +0000
Subject: Network Drop Monitor: Adding drop monitor implementation & Netlink
 protocol

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>

 include/linux/net_dropmon.h |   56 +++++++++
 net/core/drop_monitor.c     |  263 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 319 insertions(+)
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/net_dropmon.h |  56 ++++++++++
 net/core/drop_monitor.c     | 263 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 319 insertions(+)
 create mode 100644 include/linux/net_dropmon.h
 create mode 100644 net/core/drop_monitor.c

(limited to 'include/linux')

diff --git a/include/linux/net_dropmon.h b/include/linux/net_dropmon.h
new file mode 100644
index 000000000000..0217fb81a630
--- /dev/null
+++ b/include/linux/net_dropmon.h
@@ -0,0 +1,56 @@
+#ifndef __NET_DROPMON_H
+#define __NET_DROPMON_H
+
+#include <linux/netlink.h>
+
+struct net_dm_drop_point {
+	__u8 pc[8];
+	__u32 count;
+};
+
+#define NET_DM_CFG_VERSION  0
+#define NET_DM_CFG_ALERT_COUNT  1
+#define NET_DM_CFG_ALERT_DELAY 2
+#define NET_DM_CFG_MAX 3
+
+struct net_dm_config_entry {
+	__u32 type;
+	__u64 data __attribute__((aligned(8)));
+};
+
+struct net_dm_config_msg {
+	__u32 entries;
+	struct net_dm_config_entry options[0];
+};
+
+struct net_dm_alert_msg {
+	__u32 entries;
+	struct net_dm_drop_point points[0];
+};
+
+struct net_dm_user_msg {
+	union {
+		struct net_dm_config_msg user;
+		struct net_dm_alert_msg alert;
+	} u;
+};
+
+
+/* These are the netlink message types for this protocol */
+
+enum {
+	NET_DM_CMD_UNSPEC = 0,
+	NET_DM_CMD_ALERT,
+	NET_DM_CMD_CONFIG,
+	NET_DM_CMD_START,
+	NET_DM_CMD_STOP,
+	_NET_DM_CMD_MAX,
+};
+
+#define NET_DM_CMD_MAX (_NET_DM_CMD_MAX - 1)
+
+/*
+ * Our group identifiers
+ */
+#define NET_DM_GRP_ALERT 1
+#endif
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
new file mode 100644
index 000000000000..9fd0dc3cca99
--- /dev/null
+++ b/net/core/drop_monitor.c
@@ -0,0 +1,263 @@
+/*
+ * Monitoring code for network dropped packet alerts
+ *
+ * Copyright (C) 2009 Neil Horman <nhorman@tuxdriver.com>
+ */
+
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/string.h>
+#include <linux/if_arp.h>
+#include <linux/inetdevice.h>
+#include <linux/inet.h>
+#include <linux/interrupt.h>
+#include <linux/netpoll.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <linux/types.h>
+#include <linux/workqueue.h>
+#include <linux/netlink.h>
+#include <linux/net_dropmon.h>
+#include <linux/percpu.h>
+#include <linux/timer.h>
+#include <linux/bitops.h>
+#include <net/genetlink.h>
+
+#include <trace/skb.h>
+
+#include <asm/unaligned.h>
+
+#define TRACE_ON 1
+#define TRACE_OFF 0
+
+static void send_dm_alert(struct work_struct *unused);
+
+
+/*
+ * Globals, our netlink socket pointer
+ * and the work handle that will send up
+ * netlink alerts
+ */
+struct sock *dm_sock;
+
+struct per_cpu_dm_data {
+	struct work_struct dm_alert_work;
+	struct sk_buff *skb;
+	atomic_t dm_hit_count;
+	struct timer_list send_timer;
+};
+
+static struct genl_family net_drop_monitor_family = {
+	.id             = GENL_ID_GENERATE,
+	.hdrsize        = 0,
+	.name           = "NET_DM",
+	.version        = 1,
+	.maxattr        = NET_DM_CMD_MAX,
+};
+
+static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_cpu_data);
+
+static int dm_hit_limit = 64;
+static int dm_delay = 1;
+
+
+static void reset_per_cpu_data(struct per_cpu_dm_data *data)
+{
+	size_t al;
+	struct net_dm_alert_msg *msg;
+
+	al = sizeof(struct net_dm_alert_msg);
+	al += dm_hit_limit * sizeof(struct net_dm_drop_point);
+	data->skb = genlmsg_new(al, GFP_KERNEL);
+	genlmsg_put(data->skb, 0, 0, &net_drop_monitor_family,
+			0, NET_DM_CMD_ALERT);
+	msg = __nla_reserve_nohdr(data->skb, sizeof(struct net_dm_alert_msg));
+	memset(msg, 0, al);
+	atomic_set(&data->dm_hit_count, dm_hit_limit);
+}
+
+static void send_dm_alert(struct work_struct *unused)
+{
+	struct sk_buff *skb;
+	struct per_cpu_dm_data *data = &__get_cpu_var(dm_cpu_data);
+
+	/*
+	 * Grab the skb we're about to send
+	 */
+	skb = data->skb;
+
+	/*
+	 * Replace it with a new one
+	 */
+	reset_per_cpu_data(data);
+
+	/*
+	 * Ship it!
+	 */
+	genlmsg_multicast(skb, 0, NET_DM_GRP_ALERT, GFP_KERNEL);
+
+}
+
+/*
+ * This is the timer function to delay the sending of an alert
+ * in the event that more drops will arrive during the
+ * hysteresis period.  Note that it operates under the timer interrupt
+ * so we don't need to disable preemption here
+ */
+static void sched_send_work(unsigned long unused)
+{
+	struct per_cpu_dm_data *data =  &__get_cpu_var(dm_cpu_data);
+
+	schedule_work(&data->dm_alert_work);
+}
+
+static void trace_kfree_skb_hit(struct sk_buff *skb, void *location)
+{
+	struct net_dm_alert_msg *msg;
+	struct nlmsghdr *nlh;
+	int i;
+	struct per_cpu_dm_data *data = &__get_cpu_var(dm_cpu_data);
+
+
+	if (!atomic_add_unless(&data->dm_hit_count, -1, 0)) {
+		/*
+		 * we're already at zero, discard this hit
+		 */
+		goto out;
+	}
+
+	nlh = (struct nlmsghdr *)data->skb->data;
+	msg = genlmsg_data(nlmsg_data(nlh));
+	for (i = 0; i < msg->entries; i++) {
+		if (!memcmp(&location, msg->points[i].pc, sizeof(void *))) {
+			msg->points[i].count++;
+			goto out;
+		}
+	}
+
+	/*
+	 * We need to create a new entry
+	 */
+	__nla_reserve_nohdr(data->skb, sizeof(struct net_dm_drop_point));
+	memcpy(msg->points[msg->entries].pc, &location, sizeof(void *));
+	msg->points[msg->entries].count = 1;
+	msg->entries++;
+
+	if (!timer_pending(&data->send_timer)) {
+		data->send_timer.expires = jiffies + dm_delay * HZ;
+		add_timer_on(&data->send_timer, smp_processor_id());
+	}
+
+out:
+	return;
+}
+
+static int set_all_monitor_traces(int state)
+{
+	int rc = 0;
+
+	switch (state) {
+	case TRACE_ON:
+		rc |= register_trace_kfree_skb(trace_kfree_skb_hit);
+		break;
+	case TRACE_OFF:
+		rc |= unregister_trace_kfree_skb(trace_kfree_skb_hit);
+
+		tracepoint_synchronize_unregister();
+		break;
+	default:
+		rc = 1;
+		break;
+	}
+
+	if (rc)
+		return -EINPROGRESS;
+	return rc;
+}
+
+
+static int net_dm_cmd_config(struct sk_buff *skb,
+			struct genl_info *info)
+{
+	return -ENOTSUPP;
+}
+
+static int net_dm_cmd_trace(struct sk_buff *skb,
+			struct genl_info *info)
+{
+	switch (info->genlhdr->cmd) {
+	case NET_DM_CMD_START:
+		return set_all_monitor_traces(TRACE_ON);
+		break;
+	case NET_DM_CMD_STOP:
+		return set_all_monitor_traces(TRACE_OFF);
+		break;
+	}
+
+	return -ENOTSUPP;
+}
+
+
+static struct genl_ops dropmon_ops[] = {
+	{
+		.cmd = NET_DM_CMD_CONFIG,
+		.doit = net_dm_cmd_config,
+	},
+	{
+		.cmd = NET_DM_CMD_START,
+		.doit = net_dm_cmd_trace,
+	},
+	{
+		.cmd = NET_DM_CMD_STOP,
+		.doit = net_dm_cmd_trace,
+	},
+};
+
+static int __init init_net_drop_monitor(void)
+{
+	int cpu;
+	int rc, i, ret;
+	struct per_cpu_dm_data *data;
+	printk(KERN_INFO "Initalizing network drop monitor service\n");
+
+	if (sizeof(void *) > 8) {
+		printk(KERN_ERR "Unable to store program counters on this arch, Drop monitor failed\n");
+		return -ENOSPC;
+	}
+
+	if (genl_register_family(&net_drop_monitor_family) < 0) {
+		printk(KERN_ERR "Could not create drop monitor netlink family\n");
+		return -EFAULT;
+	}
+
+	rc = -EFAULT;
+
+	for (i = 0; i < ARRAY_SIZE(dropmon_ops); i++) {
+		ret = genl_register_ops(&net_drop_monitor_family,
+					&dropmon_ops[i]);
+		if (ret) {
+			printk(KERN_CRIT "failed to register operation %d\n",
+				dropmon_ops[i].cmd);
+			goto out_unreg;
+		}
+	}
+
+	rc = 0;
+
+	for_each_present_cpu(cpu) {
+		data = &per_cpu(dm_cpu_data, cpu);
+		reset_per_cpu_data(data);
+		INIT_WORK(&data->dm_alert_work, send_dm_alert);
+		init_timer(&data->send_timer);
+		data->send_timer.data = cpu;
+		data->send_timer.function = sched_send_work;
+	}
+	goto out;
+
+out_unreg:
+	genl_unregister_family(&net_drop_monitor_family);
+out:
+	return rc;
+}
+
+late_initcall(init_net_drop_monitor);
-- 
cgit v1.2.3


From 273ae44b9cb9443e0b5265cdc99f127ddb95c8db Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Wed, 11 Mar 2009 09:53:16 +0000
Subject: Network Drop Monitor: Adding Build changes to enable drop monitor

Network Drop Monitor: Adding Build changes to enable drop monitor

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>

 include/linux/Kbuild |    1 +
 net/Kconfig          |   11 +++++++++++
 net/core/Makefile    |    1 +
 3 files changed, 13 insertions(+)
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/Kbuild |  1 +
 net/Kconfig          | 11 +++++++++++
 net/core/Makefile    |  1 +
 3 files changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 106c3ba50844..e9581fd9fb66 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -115,6 +115,7 @@ header-y += mqueue.h
 header-y += mtio.h
 header-y += ncp_no.h
 header-y += neighbour.h
+header-y += net_dropmon.h
 header-y += netfilter_arp.h
 header-y += netrom.h
 header-y += nfs2.h
diff --git a/net/Kconfig b/net/Kconfig
index 6b39ede3b1b1..c9fdcd7e71ea 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -222,6 +222,17 @@ config NET_TCPPROBE
 	To compile this code as a module, choose M here: the
 	module will be called tcp_probe.
 
+config NET_DROP_MONITOR
+	boolean "Network packet drop alerting service"
+	depends on INET && EXPERIMENTAL && TRACEPOINTS
+	---help---
+	This feature provides an alerting service to userspace in the
+	event that packets are discarded in the network stack.  Alerts
+	are broadcast via netlink socket to any listening user space
+	process.  If you don't need network drop alerts, or if you are ok
+	just checking the various proc files and other utilities for
+	drop statistics, say N here.
+
 endmenu
 
 endmenu
diff --git a/net/core/Makefile b/net/core/Makefile
index d47092bc525c..796f46eece5f 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -18,4 +18,5 @@ obj-$(CONFIG_NETPOLL) += netpoll.o
 obj-$(CONFIG_NET_DMA) += user_dma.o
 obj-$(CONFIG_FIB_RULES) += fib_rules.o
 obj-$(CONFIG_TRACEPOINTS) += net-traces.o
+obj-$(CONFIG_NET_DROP_MONITOR) += drop_monitor.o
 
-- 
cgit v1.2.3


From a390d1f379cf821248b735f43d2e1147ebb8241d Mon Sep 17 00:00:00 2001
From: Marcin Slusarz <marcin.slusarz@gmail.com>
Date: Fri, 13 Mar 2009 15:41:19 -0700
Subject: phylib: convert state_queue work to delayed_work

It closes a race in phy_stop_machine when reprogramming of phy_timer
(from phy_state_machine) happens between del_timer_sync and cancel_work_sync.

Without this change it could lead to crash if phy_device would be freed after
phy_stop_machine (timer would fire and schedule freed work).

Signed-off-by: Marcin Slusarz <marcin.slusarz@gmail.com>
Acked-by: Jean Delvare <khali@linux-fr.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy.c | 41 +++++++++++------------------------------
 include/linux/phy.h   |  3 +--
 2 files changed, 12 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index e4ede6080c9d..58b73b08dde0 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -414,7 +414,6 @@ EXPORT_SYMBOL(phy_start_aneg);
 
 static void phy_change(struct work_struct *work);
 static void phy_state_machine(struct work_struct *work);
-static void phy_timer(unsigned long data);
 
 /**
  * phy_start_machine - start PHY state machine tracking
@@ -434,11 +433,8 @@ void phy_start_machine(struct phy_device *phydev,
 {
 	phydev->adjust_state = handler;
 
-	INIT_WORK(&phydev->state_queue, phy_state_machine);
-	init_timer(&phydev->phy_timer);
-	phydev->phy_timer.function = &phy_timer;
-	phydev->phy_timer.data = (unsigned long) phydev;
-	mod_timer(&phydev->phy_timer, jiffies + HZ);
+	INIT_DELAYED_WORK(&phydev->state_queue, phy_state_machine);
+	schedule_delayed_work(&phydev->state_queue, jiffies + HZ);
 }
 
 /**
@@ -451,8 +447,7 @@ void phy_start_machine(struct phy_device *phydev,
  */
 void phy_stop_machine(struct phy_device *phydev)
 {
-	del_timer_sync(&phydev->phy_timer);
-	cancel_work_sync(&phydev->state_queue);
+	cancel_delayed_work_sync(&phydev->state_queue);
 
 	mutex_lock(&phydev->lock);
 	if (phydev->state > PHY_UP)
@@ -680,11 +675,9 @@ static void phy_change(struct work_struct *work)
 	if (err)
 		goto irq_enable_err;
 
-	/* Stop timer and run the state queue now.  The work function for
-	 * state_queue will start the timer up again.
-	 */
-	del_timer(&phydev->phy_timer);
-	schedule_work(&phydev->state_queue);
+	/* reschedule state queue work to run as soon as possible */
+	cancel_delayed_work_sync(&phydev->state_queue);
+	schedule_delayed_work(&phydev->state_queue, 0);
 
 	return;
 
@@ -761,14 +754,13 @@ EXPORT_SYMBOL(phy_start);
 /**
  * phy_state_machine - Handle the state machine
  * @work: work_struct that describes the work to be done
- *
- * Description: Scheduled by the state_queue workqueue each time
- *   phy_timer is triggered.
  */
 static void phy_state_machine(struct work_struct *work)
 {
+	struct delayed_work *dwork =
+			container_of(work, struct delayed_work, work);
 	struct phy_device *phydev =
-			container_of(work, struct phy_device, state_queue);
+			container_of(dwork, struct phy_device, state_queue);
 	int needs_aneg = 0;
 	int err = 0;
 
@@ -946,17 +938,6 @@ static void phy_state_machine(struct work_struct *work)
 	if (err < 0)
 		phy_error(phydev);
 
-	mod_timer(&phydev->phy_timer, jiffies + PHY_STATE_TIME * HZ);
-}
-
-/* PHY timer which schedules the state machine work */
-static void phy_timer(unsigned long data)
-{
-	struct phy_device *phydev = (struct phy_device *)data;
-
-	/*
-	 * PHY I/O operations can potentially sleep so we ensure that
-	 * it's done from a process context
-	 */
-	schedule_work(&phydev->state_queue);
+	schedule_delayed_work(&phydev->state_queue,
+				jiffies + PHY_STATE_TIME * HZ);
 }
diff --git a/include/linux/phy.h b/include/linux/phy.h
index d7e54d98869f..32cf14a4b034 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -315,8 +315,7 @@ struct phy_device {
 
 	/* Interrupt and Polling infrastructure */
 	struct work_struct phy_queue;
-	struct work_struct state_queue;
-	struct timer_list phy_timer;
+	struct delayed_work state_queue;
 	atomic_t irq_disable;
 
 	struct mutex lock;
-- 
cgit v1.2.3


From 9c705260feea6ae329bc6b6d5f6d2ef0227eda0a Mon Sep 17 00:00:00 2001
From: Gabriele Paoloni <gabriele.paoloni@intel.com>
Date: Fri, 13 Mar 2009 16:09:12 -0700
Subject: ppp: ppp_mp_explode() redesign

I found the PPP subsystem to not work properly when connecting channels
with different speeds to the same bundle.

Problem Description:

As the "ppp_mp_explode" function fragments the sk_buff buffer evenly
among the PPP channels that are connected to a certain PPP unit to
make up a bundle, if we are transmitting using an upper layer protocol
that requires an Ack before sending the next packet (like TCP/IP for
example), we will have a bandwidth bottleneck on the slowest channel
of the bundle.

Let's clarify by an example. Let's consider a scenario where we have
two PPP links making up a bundle: a slow link (10KB/sec) and a fast
link (1000KB/sec) working at the best (full bandwidth). On the top we
have a TCP/IP stack sending a 1000 Bytes sk_buff buffer down to the
PPP subsystem. The "ppp_mp_explode" function will divide the buffer in
two fragments of 500B each (we are neglecting all the headers, crc,
flags etc?.). Before the TCP/IP stack sends out the next buffer, it
will have to wait for the ACK response from the remote peer, so it
will have to wait for both fragments to have been sent over the two
PPP links, received by the remote peer and reconstructed. The
resulting behaviour is that, rather than having a bundle working
@1010KB/sec (the sum of the channels bandwidths), we'll have a bundle
working @20KB/sec (the double of the slowest channels bandwidth).


Problem Solution:

The problem has been solved by redesigning the "ppp_mp_explode"
function in such a way to make it split the sk_buff buffer according
to the speeds of the underlying PPP channels (the speeds of the serial
interfaces respectively attached to the PPP channels). Referring to
the above example, the redesigned "ppp_mp_explode" function will now
divide the 1000 Bytes buffer into two fragments whose sizes are set
according to the speeds of the channels where they are going to be
sent on (e.g .  10 Byets on 10KB/sec channel and 990 Bytes on
1000KB/sec channel).  The reworked function grants the same
performances of the original one in optimal working conditions (i.e. a
bundle made up of PPP links all working at the same speed), while
greatly improving performances on the bundles made up of channels
working at different speeds.

Signed-off-by: Gabriele Paoloni <gabriele.paoloni@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ppp_async.c     |   3 +
 drivers/net/ppp_generic.c   | 211 +++++++++++++++++++++++++-------------------
 drivers/net/ppp_synctty.c   |   3 +
 include/linux/ppp_channel.h |   2 +-
 4 files changed, 127 insertions(+), 92 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ppp_async.c b/drivers/net/ppp_async.c
index 5de6fedd1d76..6de8399d6dd9 100644
--- a/drivers/net/ppp_async.c
+++ b/drivers/net/ppp_async.c
@@ -157,6 +157,7 @@ ppp_asynctty_open(struct tty_struct *tty)
 {
 	struct asyncppp *ap;
 	int err;
+	int speed;
 
 	if (tty->ops->write == NULL)
 		return -EOPNOTSUPP;
@@ -187,6 +188,8 @@ ppp_asynctty_open(struct tty_struct *tty)
 	ap->chan.private = ap;
 	ap->chan.ops = &async_ops;
 	ap->chan.mtu = PPP_MRU;
+	speed = tty_get_baud_rate(tty);
+	ap->chan.speed = speed;
 	err = ppp_register_channel(&ap->chan);
 	if (err)
 		goto out_free;
diff --git a/drivers/net/ppp_generic.c b/drivers/net/ppp_generic.c
index 42d455578453..8ee91421db12 100644
--- a/drivers/net/ppp_generic.c
+++ b/drivers/net/ppp_generic.c
@@ -167,6 +167,7 @@ struct channel {
 	u8		avail;		/* flag used in multilink stuff */
 	u8		had_frag;	/* >= 1 fragments have been sent */
 	u32		lastseq;	/* MP: last sequence # received */
+	int     speed;		/* speed of the corresponding ppp channel*/
 #endif /* CONFIG_PPP_MULTILINK */
 };
 
@@ -1307,138 +1308,181 @@ ppp_push(struct ppp *ppp)
  */
 static int ppp_mp_explode(struct ppp *ppp, struct sk_buff *skb)
 {
-	int len, fragsize;
-	int i, bits, hdrlen, mtu;
-	int flen;
-	int navail, nfree;
-	int nbigger;
+	int	len, totlen;
+	int	i, bits, hdrlen, mtu;
+	int	flen;
+	int	navail,	nfree, nzero;
+	int	nbigger;
+	int	totspeed;
+	int	totfree;
 	unsigned char *p, *q;
 	struct list_head *list;
 	struct channel *pch;
 	struct sk_buff *frag;
 	struct ppp_channel *chan;
 
-	nfree = 0;	/* # channels which have no packet already queued */
+	totspeed = 0; /*total bitrate of the bundle*/
+	nfree =	0;	/* # channels which	have no	packet already queued */
 	navail = 0;	/* total # of usable channels (not deregistered) */
+	nzero =	0; /* number of	channels with zero speed associated*/
+	totfree	= 0; /*total # of channels available and
+				  *having no queued packets before
+				  *starting the fragmentation*/
+
 	hdrlen = (ppp->flags & SC_MP_XSHORTSEQ)? MPHDRLEN_SSN: MPHDRLEN;
-	i = 0;
-	list_for_each_entry(pch, &ppp->channels, clist) {
+	i =	0;
+	list_for_each_entry(pch, &ppp->channels, clist)	{
 		navail += pch->avail = (pch->chan != NULL);
-		if (pch->avail) {
+		pch->speed = pch->chan->speed;
+		if (pch->avail)	{
 			if (skb_queue_empty(&pch->file.xq) ||
-			    !pch->had_frag) {
-				pch->avail = 2;
-				++nfree;
-			}
-			if (!pch->had_frag && i < ppp->nxchan)
-				ppp->nxchan = i;
+				!pch->had_frag)	{
+					if (pch->speed == 0)
+						nzero++;
+					else
+						totspeed += pch->speed;
+
+					pch->avail = 2;
+					++nfree;
+					++totfree;
+				}
+			if (!pch->had_frag && i	< ppp->nxchan)
+				ppp->nxchan	= i;
 		}
 		++i;
 	}
-
 	/*
-	 * Don't start sending this packet unless at least half of
-	 * the channels are free.  This gives much better TCP
-	 * performance if we have a lot of channels.
+	 * Don't start sending this	packet unless at least half	of
+	 * the channels	are	free.  This	gives much better TCP
+	 * performance if we have a	lot	of channels.
 	 */
-	if (nfree == 0 || nfree < navail / 2)
-		return 0;	/* can't take now, leave it in xmit_pending */
+	if (nfree == 0 || nfree	< navail / 2)
+		return 0; /* can't take now, leave it in xmit_pending	*/
 
 	/* Do protocol field compression (XXX this should be optional) */
-	p = skb->data;
-	len = skb->len;
+	p =	skb->data;
+	len	= skb->len;
 	if (*p == 0) {
 		++p;
 		--len;
 	}
 
-	/*
-	 * Decide on fragment size.
-	 * We create a fragment for each free channel regardless of
-	 * how small they are (i.e. even 0 length) in order to minimize
-	 * the time that it will take to detect when a channel drops
-	 * a fragment.
-	 */
-	fragsize = len;
-	if (nfree > 1)
-		fragsize = DIV_ROUND_UP(fragsize, nfree);
-	/* nbigger channels get fragsize bytes, the rest get fragsize-1,
-	   except if nbigger==0, then they all get fragsize. */
-	nbigger = len % nfree;
-
-	/* skip to the channel after the one we last used
-	   and start at that one */
+	totlen = len;
+	nbigger	= len %	nfree;
+
+	/* skip	to the channel after the one we	last used
+	   and start at	that one */
 	list = &ppp->channels;
-	for (i = 0; i < ppp->nxchan; ++i) {
+	for	(i = 0;	i <	ppp->nxchan; ++i) {
 		list = list->next;
-		if (list == &ppp->channels) {
-			i = 0;
+		if (list ==	&ppp->channels)	{
+			i =	0;
 			break;
 		}
 	}
 
-	/* create a fragment for each channel */
+	/* create a	fragment for each channel */
 	bits = B;
-	while (nfree > 0 || len > 0) {
+	while (nfree > 0 &&	len	> 0) {
 		list = list->next;
-		if (list == &ppp->channels) {
-			i = 0;
+		if (list ==	&ppp->channels)	{
+			i =	0;
 			continue;
 		}
-		pch = list_entry(list, struct channel, clist);
+		pch	= list_entry(list, struct channel, clist);
 		++i;
 		if (!pch->avail)
 			continue;
 
 		/*
-		 * Skip this channel if it has a fragment pending already and
-		 * we haven't given a fragment to all of the free channels.
+		 * Skip	this channel if	it has a fragment pending already and
+		 * we haven't given	a fragment to all of the free channels.
 		 */
 		if (pch->avail == 1) {
-			if (nfree > 0)
+			if (nfree >	0)
 				continue;
 		} else {
-			--nfree;
 			pch->avail = 1;
 		}
 
 		/* check the channel's mtu and whether it is still attached. */
 		spin_lock_bh(&pch->downl);
 		if (pch->chan == NULL) {
-			/* can't use this channel, it's being deregistered */
+			/* can't use this channel, it's	being deregistered */
+			if (pch->speed == 0)
+				nzero--;
+			else
+				totspeed -=	pch->speed;
+
 			spin_unlock_bh(&pch->downl);
 			pch->avail = 0;
-			if (--navail == 0)
+			totlen = len;
+			totfree--;
+			nfree--;
+			if (--navail ==	0)
 				break;
 			continue;
 		}
 
 		/*
-		 * Create a fragment for this channel of
-		 * min(max(mtu+2-hdrlen, 4), fragsize, len) bytes.
-		 * If mtu+2-hdrlen < 4, that is a ridiculously small
-		 * MTU, so we use mtu = 2 + hdrlen.
+		*if the channel speed is not set divide
+		*the packet	evenly among the free channels;
+		*otherwise divide it according to the speed
+		*of the channel we are going to transmit on
+		*/
+		if (pch->speed == 0) {
+			flen = totlen/nfree	;
+			if (nbigger > 0) {
+				flen++;
+				nbigger--;
+			}
+		} else {
+			flen = (((totfree - nzero)*(totlen + hdrlen*totfree)) /
+				((totspeed*totfree)/pch->speed)) - hdrlen;
+			if (nbigger > 0) {
+				flen += ((totfree - nzero)*pch->speed)/totspeed;
+				nbigger -= ((totfree - nzero)*pch->speed)/
+							totspeed;
+			}
+		}
+		nfree--;
+
+		/*
+		 *check	if we are on the last channel or
+		 *we exceded the lenght	of the data	to
+		 *fragment
+		 */
+		if ((nfree == 0) || (flen > len))
+			flen = len;
+		/*
+		 *it is not worth to tx on slow channels:
+		 *in that case from the resulting flen according to the
+		 *above formula will be equal or less than zero.
+		 *Skip the channel in this case
 		 */
-		if (fragsize > len)
-			fragsize = len;
-		flen = fragsize;
-		mtu = pch->chan->mtu + 2 - hdrlen;
-		if (mtu < 4)
-			mtu = 4;
+		if (flen <=	0) {
+			pch->avail = 2;
+			spin_unlock_bh(&pch->downl);
+			continue;
+		}
+
+		mtu	= pch->chan->mtu + 2 - hdrlen;
+		if (mtu	< 4)
+			mtu	= 4;
 		if (flen > mtu)
 			flen = mtu;
-		if (flen == len && nfree == 0)
-			bits |= E;
-		frag = alloc_skb(flen + hdrlen + (flen == 0), GFP_ATOMIC);
+		if (flen ==	len)
+			bits |=	E;
+		frag = alloc_skb(flen +	hdrlen + (flen == 0), GFP_ATOMIC);
 		if (!frag)
 			goto noskb;
-		q = skb_put(frag, flen + hdrlen);
+		q =	skb_put(frag, flen + hdrlen);
 
-		/* make the MP header */
+		/* make	the	MP header */
 		q[0] = PPP_MP >> 8;
 		q[1] = PPP_MP;
 		if (ppp->flags & SC_MP_XSHORTSEQ) {
-			q[2] = bits + ((ppp->nxseq >> 8) & 0xf);
+			q[2] = bits	+ ((ppp->nxseq >> 8) & 0xf);
 			q[3] = ppp->nxseq;
 		} else {
 			q[2] = bits;
@@ -1447,43 +1491,28 @@ static int ppp_mp_explode(struct ppp *ppp, struct sk_buff *skb)
 			q[5] = ppp->nxseq;
 		}
 
-		/*
-		 * Copy the data in.
-		 * Unfortunately there is a bug in older versions of
-		 * the Linux PPP multilink reconstruction code where it
-		 * drops 0-length fragments.  Therefore we make sure the
-		 * fragment has at least one byte of data.  Any bytes
-		 * we add in this situation will end up as padding on the
-		 * end of the reconstructed packet.
-		 */
-		if (flen == 0)
-			*skb_put(frag, 1) = 0;
-		else
-			memcpy(q + hdrlen, p, flen);
+		memcpy(q + hdrlen, p, flen);
 
 		/* try to send it down the channel */
 		chan = pch->chan;
-		if (!skb_queue_empty(&pch->file.xq) ||
-		    !chan->ops->start_xmit(chan, frag))
+		if (!skb_queue_empty(&pch->file.xq)	||
+			!chan->ops->start_xmit(chan, frag))
 			skb_queue_tail(&pch->file.xq, frag);
-		pch->had_frag = 1;
+		pch->had_frag =	1;
 		p += flen;
-		len -= flen;
+		len	-= flen;
 		++ppp->nxseq;
 		bits = 0;
 		spin_unlock_bh(&pch->downl);
-
-		if (--nbigger == 0 && fragsize > 0)
-			--fragsize;
 	}
-	ppp->nxchan = i;
+	ppp->nxchan	= i;
 
 	return 1;
 
  noskb:
 	spin_unlock_bh(&pch->downl);
 	if (ppp->debug & 1)
-		printk(KERN_ERR "PPP: no memory (fragment)\n");
+		printk(KERN_ERR	"PPP: no memory	(fragment)\n");
 	++ppp->dev->stats.tx_errors;
 	++ppp->nxseq;
 	return 1;	/* abandon the frame */
diff --git a/drivers/net/ppp_synctty.c b/drivers/net/ppp_synctty.c
index 3ea791d16b00..d2fa2db13586 100644
--- a/drivers/net/ppp_synctty.c
+++ b/drivers/net/ppp_synctty.c
@@ -206,6 +206,7 @@ ppp_sync_open(struct tty_struct *tty)
 {
 	struct syncppp *ap;
 	int err;
+	int speed;
 
 	if (tty->ops->write == NULL)
 		return -EOPNOTSUPP;
@@ -234,6 +235,8 @@ ppp_sync_open(struct tty_struct *tty)
 	ap->chan.ops = &sync_ops;
 	ap->chan.mtu = PPP_MRU;
 	ap->chan.hdrlen = 2;	/* for A/C bytes */
+	speed = tty_get_baud_rate(tty);
+	ap->chan.speed = speed;
 	err = ppp_register_channel(&ap->chan);
 	if (err)
 		goto out_free;
diff --git a/include/linux/ppp_channel.h b/include/linux/ppp_channel.h
index 9d64bdf14770..0d3fa63e90ea 100644
--- a/include/linux/ppp_channel.h
+++ b/include/linux/ppp_channel.h
@@ -40,8 +40,8 @@ struct ppp_channel {
 	int		mtu;		/* max transmit packet size */
 	int		hdrlen;		/* amount of headroom channel needs */
 	void		*ppp;		/* opaque to channel */
-	/* the following are not used at present */
 	int		speed;		/* transfer rate (bytes/second) */
+	/* the following is not used at present */
 	int		latency;	/* overhead time in milliseconds */
 };
 
-- 
cgit v1.2.3


From 8bdd663aba341c15cd2fa9dbd7061b8b387964dc Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Sun, 15 Mar 2009 19:59:13 -0700
Subject: net: reorder fields of struct socket

On x86_64, its rather unfortunate that "wait_queue_head_t wait"
field of "struct socket" spans two cache lines (assuming a 64
bytes cache line in current cpus)

offsetof(struct socket, wait)=0x30
sizeof(wait_queue_head_t)=0x18

This might explain why Kenny Chang noticed that his multicast workload
was performing bad with 64 bit kernels, since more cache lines ping pongs
were involved.

This litle patch moves "wait" field next "fasync_list" so that both
fields share a single cache line, to speedup sock_def_readable()

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/net.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/net.h b/include/linux/net.h
index 4515efae4c39..4fc2ffd527f9 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -129,11 +129,15 @@ struct socket {
 	socket_state		state;
 	short			type;
 	unsigned long		flags;
-	const struct proto_ops	*ops;
+	/*
+	 * Please keep fasync_list & wait fields in the same cache line
+	 */
 	struct fasync_struct	*fasync_list;
+	wait_queue_head_t	wait;
+
 	struct file		*file;
 	struct sock		*sk;
-	wait_queue_head_t	wait;
+	const struct proto_ops	*ops;
 };
 
 struct vm_area_struct;
-- 
cgit v1.2.3


From 0c54b85f2828128274f319a1eb3ce7f604fe2a53 Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Date: Sat, 14 Mar 2009 14:23:05 +0000
Subject: tcp: simplify tcp_current_mss
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There's very little need for most of the callsites to get
tp->xmit_goal_size updated. That will cost us divide as is,
so slice the function in two. Also, the only users of the
tp->xmit_goal_size are directly behind tcp_current_mss(),
so there's no need to store that variable into tcp_sock
at all! The drop of xmit_goal_size currently leaves 16-bit
hole and some reorganization would again be necessary to
change that (but I'm aiming to fill that hole with u16
xmit_goal_size_segs to cache the results of the remaining
divide to get that tso on regression).

Bring xmit_goal_size parts into tcp.c

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Cc: Evgeniy Polyakov <zbr@ioremap.net>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h   |  1 -
 include/net/tcp.h     | 13 +++++++++++--
 net/ipv4/tcp.c        | 43 +++++++++++++++++++++++++++++++++++--------
 net/ipv4/tcp_input.c  |  2 +-
 net/ipv4/tcp_output.c | 41 +++++++----------------------------------
 5 files changed, 54 insertions(+), 46 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 4b86ad71e054..ad2021ccc55a 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -248,7 +248,6 @@ struct tcp_sock {
 	/* inet_connection_sock has to be the first member of tcp_sock */
 	struct inet_connection_sock	inet_conn;
 	u16	tcp_header_len;	/* Bytes of tcp header to send		*/
-	u16	xmit_size_goal;	/* Goal for segmenting output packets	*/
 
 /*
  *	Header prediction flags
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 255ca35bea05..e54c76d75495 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -481,7 +481,16 @@ static inline void tcp_clear_xmit_timers(struct sock *sk)
 }
 
 extern unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu);
-extern unsigned int tcp_current_mss(struct sock *sk, int large);
+extern unsigned int tcp_current_mss(struct sock *sk);
+
+/* Bound MSS / TSO packet size with the half of the window */
+static inline int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize)
+{
+	if (tp->max_window && pktsize > (tp->max_window >> 1))
+		return max(tp->max_window >> 1, 68U - tp->tcp_header_len);
+	else
+		return pktsize;
+}
 
 /* tcp.c */
 extern void tcp_get_info(struct sock *, struct tcp_info *);
@@ -822,7 +831,7 @@ static inline void tcp_push_pending_frames(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
-	__tcp_push_pending_frames(sk, tcp_current_mss(sk, 1), tp->nonagle);
+	__tcp_push_pending_frames(sk, tcp_current_mss(sk), tp->nonagle);
 }
 
 static inline void tcp_init_wl(struct tcp_sock *tp, u32 seq)
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index d3f9beee74c0..886596ff0aae 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -661,6 +661,37 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp)
 	return NULL;
 }
 
+static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now,
+				       int large_allowed)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	u32 xmit_size_goal;
+
+	xmit_size_goal = mss_now;
+
+	if (large_allowed && sk_can_gso(sk)) {
+		xmit_size_goal = ((sk->sk_gso_max_size - 1) -
+				  inet_csk(sk)->icsk_af_ops->net_header_len -
+				  inet_csk(sk)->icsk_ext_hdr_len -
+				  tp->tcp_header_len);
+
+		xmit_size_goal = tcp_bound_to_half_wnd(tp, xmit_size_goal);
+		xmit_size_goal -= (xmit_size_goal % mss_now);
+	}
+
+	return xmit_size_goal;
+}
+
+static int tcp_send_mss(struct sock *sk, int *size_goal, int flags)
+{
+	int mss_now;
+
+	mss_now = tcp_current_mss(sk);
+	*size_goal = tcp_xmit_size_goal(sk, mss_now, !(flags & MSG_OOB));
+
+	return mss_now;
+}
+
 static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset,
 			 size_t psize, int flags)
 {
@@ -677,8 +708,7 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffse
 
 	clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
 
-	mss_now = tcp_current_mss(sk, !(flags&MSG_OOB));
-	size_goal = tp->xmit_size_goal;
+	mss_now = tcp_send_mss(sk, &size_goal, flags);
 	copied = 0;
 
 	err = -EPIPE;
@@ -761,8 +791,7 @@ wait_for_memory:
 		if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
 			goto do_error;
 
-		mss_now = tcp_current_mss(sk, !(flags&MSG_OOB));
-		size_goal = tp->xmit_size_goal;
+		mss_now = tcp_send_mss(sk, &size_goal, flags);
 	}
 
 out:
@@ -844,8 +873,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
 	/* This should be in poll */
 	clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
 
-	mss_now = tcp_current_mss(sk, !(flags&MSG_OOB));
-	size_goal = tp->xmit_size_goal;
+	mss_now = tcp_send_mss(sk, &size_goal, flags);
 
 	/* Ok commence sending. */
 	iovlen = msg->msg_iovlen;
@@ -1007,8 +1035,7 @@ wait_for_memory:
 			if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
 				goto do_error;
 
-			mss_now = tcp_current_mss(sk, !(flags&MSG_OOB));
-			size_goal = tp->xmit_size_goal;
+			mss_now = tcp_send_mss(sk, &size_goal, flags);
 		}
 	}
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 311c30f73ee4..fae78e3eccc4 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2864,7 +2864,7 @@ void tcp_simple_retransmit(struct sock *sk)
 	const struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *skb;
-	unsigned int mss = tcp_current_mss(sk, 0);
+	unsigned int mss = tcp_current_mss(sk);
 	u32 prior_lost = tp->lost_out;
 
 	tcp_for_write_queue(skb, sk) {
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 325658039139..c1f259d2d33b 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -921,7 +921,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
 	 * factor and mss.
 	 */
 	if (tcp_skb_pcount(skb) > 1)
-		tcp_set_skb_tso_segs(sk, skb, tcp_current_mss(sk, 1));
+		tcp_set_skb_tso_segs(sk, skb, tcp_current_mss(sk));
 
 	return 0;
 }
@@ -982,15 +982,6 @@ void tcp_mtup_init(struct sock *sk)
 	icsk->icsk_mtup.probe_size = 0;
 }
 
-/* Bound MSS / TSO packet size with the half of the window */
-static int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize)
-{
-	if (tp->max_window && pktsize > (tp->max_window >> 1))
-		return max(tp->max_window >> 1, 68U - tp->tcp_header_len);
-	else
-		return pktsize;
-}
-
 /* This function synchronize snd mss to current pmtu/exthdr set.
 
    tp->rx_opt.user_mss is mss set by user by TCP_MAXSEG. It does NOT counts
@@ -1037,22 +1028,17 @@ unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu)
 /* Compute the current effective MSS, taking SACKs and IP options,
  * and even PMTU discovery events into account.
  */
-unsigned int tcp_current_mss(struct sock *sk, int large_allowed)
+unsigned int tcp_current_mss(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct dst_entry *dst = __sk_dst_get(sk);
 	u32 mss_now;
-	u16 xmit_size_goal;
-	int doing_tso = 0;
 	unsigned header_len;
 	struct tcp_out_options opts;
 	struct tcp_md5sig_key *md5;
 
 	mss_now = tp->mss_cache;
 
-	if (large_allowed && sk_can_gso(sk))
-		doing_tso = 1;
-
 	if (dst) {
 		u32 mtu = dst_mtu(dst);
 		if (mtu != inet_csk(sk)->icsk_pmtu_cookie)
@@ -1070,19 +1056,6 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed)
 		mss_now -= delta;
 	}
 
-	xmit_size_goal = mss_now;
-
-	if (doing_tso) {
-		xmit_size_goal = ((sk->sk_gso_max_size - 1) -
-				  inet_csk(sk)->icsk_af_ops->net_header_len -
-				  inet_csk(sk)->icsk_ext_hdr_len -
-				  tp->tcp_header_len);
-
-		xmit_size_goal = tcp_bound_to_half_wnd(tp, xmit_size_goal);
-		xmit_size_goal -= (xmit_size_goal % mss_now);
-	}
-	tp->xmit_size_goal = xmit_size_goal;
-
 	return mss_now;
 }
 
@@ -1264,7 +1237,7 @@ int tcp_may_send_now(struct sock *sk)
 	struct sk_buff *skb = tcp_send_head(sk);
 
 	return (skb &&
-		tcp_snd_test(sk, skb, tcp_current_mss(sk, 1),
+		tcp_snd_test(sk, skb, tcp_current_mss(sk),
 			     (tcp_skb_is_last(sk, skb) ?
 			      tp->nonagle : TCP_NAGLE_PUSH)));
 }
@@ -1421,7 +1394,7 @@ static int tcp_mtu_probe(struct sock *sk)
 		return -1;
 
 	/* Very simple search strategy: just double the MSS. */
-	mss_now = tcp_current_mss(sk, 0);
+	mss_now = tcp_current_mss(sk);
 	probe_size = 2 * tp->mss_cache;
 	size_needed = probe_size + (tp->reordering + 1) * tp->mss_cache;
 	if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high)) {
@@ -1903,7 +1876,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
 	if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk))
 		return -EHOSTUNREACH; /* Routing failure or similar. */
 
-	cur_mss = tcp_current_mss(sk, 0);
+	cur_mss = tcp_current_mss(sk);
 
 	/* If receiver has shrunk his window, and skb is out of
 	 * new window, do not retransmit it. The exception is the
@@ -2111,7 +2084,7 @@ void tcp_send_fin(struct sock *sk)
 	 * unsent frames.  But be careful about outgoing SACKS
 	 * and IP options.
 	 */
-	mss_now = tcp_current_mss(sk, 1);
+	mss_now = tcp_current_mss(sk);
 
 	if (tcp_send_head(sk) != NULL) {
 		TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_FIN;
@@ -2523,7 +2496,7 @@ int tcp_write_wakeup(struct sock *sk)
 	if ((skb = tcp_send_head(sk)) != NULL &&
 	    before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp))) {
 		int err;
-		unsigned int mss = tcp_current_mss(sk, 0);
+		unsigned int mss = tcp_current_mss(sk);
 		unsigned int seg_size = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
 
 		if (before(tp->pushed_seq, TCP_SKB_CB(skb)->end_seq))
-- 
cgit v1.2.3


From 2a3a041c4e2c1685e668b280c121a5a40a029a03 Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Date: Sat, 14 Mar 2009 22:45:16 +0000
Subject: tcp: cache result of earlier divides when mss-aligning things
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The results is very unlikely change every so often so we
hardly need to divide again after doing that once for a
connection. Yet, if divide still becomes necessary we
detect that and do the right thing and again settle for
non-divide state. Takes the u16 space which was previously
taken by the plain xmit_size_goal.

This should take care part of the tso vs non-tso difference
we found earlier.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h |  1 +
 net/ipv4/tcp.c      | 14 ++++++++++++--
 2 files changed, 13 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index ad2021ccc55a..9d5078bd23a3 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -248,6 +248,7 @@ struct tcp_sock {
 	/* inet_connection_sock has to be the first member of tcp_sock */
 	struct inet_connection_sock	inet_conn;
 	u16	tcp_header_len;	/* Bytes of tcp header to send		*/
+	u16	xmit_size_goal_segs; /* Goal for segmenting output packets */
 
 /*
  *	Header prediction flags
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 886596ff0aae..0db9f3b984f7 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -665,7 +665,7 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now,
 				       int large_allowed)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	u32 xmit_size_goal;
+	u32 xmit_size_goal, old_size_goal;
 
 	xmit_size_goal = mss_now;
 
@@ -676,7 +676,17 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now,
 				  tp->tcp_header_len);
 
 		xmit_size_goal = tcp_bound_to_half_wnd(tp, xmit_size_goal);
-		xmit_size_goal -= (xmit_size_goal % mss_now);
+
+		/* We try hard to avoid divides here */
+		old_size_goal = tp->xmit_size_goal_segs * mss_now;
+
+		if (likely(old_size_goal <= xmit_size_goal &&
+			   old_size_goal + mss_now > xmit_size_goal)) {
+			xmit_size_goal = old_size_goal;
+		} else {
+			tp->xmit_size_goal_segs = xmit_size_goal / mss_now;
+			xmit_size_goal = tp->xmit_size_goal_segs * mss_now;
+		}
 	}
 
 	return xmit_size_goal;
-- 
cgit v1.2.3


From 684999149002dd046269666a390458e0acb38280 Mon Sep 17 00:00:00 2001
From: Jonathan Corbet <corbet@lwn.net>
Date: Fri, 6 Feb 2009 13:52:43 -0700
Subject: Rename struct file->f_ep_lock

This lock moves out of the CONFIG_EPOLL ifdef and becomes f_lock.  For now,
epoll remains the only user, but a future patch will use it to protect
f_flags as well.

Cc: Davide Libenzi <davidel@xmailserver.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 fs/eventpoll.c            | 12 +++++++-----
 fs/file_table.c           |  1 +
 include/linux/eventpoll.h |  1 -
 include/linux/fs.h        |  2 +-
 4 files changed, 9 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 011b9b8c90c6..c5c424f23fd5 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -417,10 +417,10 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi)
 	ep_unregister_pollwait(ep, epi);
 
 	/* Remove the current item from the list of epoll hooks */
-	spin_lock(&file->f_ep_lock);
+	spin_lock(&file->f_lock);
 	if (ep_is_linked(&epi->fllink))
 		list_del_init(&epi->fllink);
-	spin_unlock(&file->f_ep_lock);
+	spin_unlock(&file->f_lock);
 
 	rb_erase(&epi->rbn, &ep->rbr);
 
@@ -538,7 +538,7 @@ void eventpoll_release_file(struct file *file)
 	struct epitem *epi;
 
 	/*
-	 * We don't want to get "file->f_ep_lock" because it is not
+	 * We don't want to get "file->f_lock" because it is not
 	 * necessary. It is not necessary because we're in the "struct file"
 	 * cleanup path, and this means that noone is using this file anymore.
 	 * So, for example, epoll_ctl() cannot hit here sicne if we reach this
@@ -547,6 +547,8 @@ void eventpoll_release_file(struct file *file)
 	 * will correctly serialize the operation. We do need to acquire
 	 * "ep->mtx" after "epmutex" because ep_remove() requires it when called
 	 * from anywhere but ep_free().
+	 *
+	 * Besides, ep_remove() acquires the lock, so we can't hold it here.
 	 */
 	mutex_lock(&epmutex);
 
@@ -785,9 +787,9 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
 		goto error_unregister;
 
 	/* Add the current item to the list of active epoll hook for this file */
-	spin_lock(&tfile->f_ep_lock);
+	spin_lock(&tfile->f_lock);
 	list_add_tail(&epi->fllink, &tfile->f_ep_links);
-	spin_unlock(&tfile->f_ep_lock);
+	spin_unlock(&tfile->f_lock);
 
 	/*
 	 * Add the current item to the RB tree. All RB tree operations are
diff --git a/fs/file_table.c b/fs/file_table.c
index bbeeac6efa1a..aa1e18050282 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -127,6 +127,7 @@ struct file *get_empty_filp(void)
 	atomic_long_set(&f->f_count, 1);
 	rwlock_init(&f->f_owner.lock);
 	f->f_cred = get_cred(cred);
+	spin_lock_init(&f->f_lock);
 	eventpoll_init_file(f);
 	/* f->f_version: 0 */
 	return f;
diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h
index f1e1d3c47125..f6856a5a1d4b 100644
--- a/include/linux/eventpoll.h
+++ b/include/linux/eventpoll.h
@@ -61,7 +61,6 @@ struct file;
 static inline void eventpoll_init_file(struct file *file)
 {
 	INIT_LIST_HEAD(&file->f_ep_links);
-	spin_lock_init(&file->f_ep_lock);
 }
 
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 92734c0012e6..2011600d12c7 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -848,6 +848,7 @@ struct file {
 #define f_dentry	f_path.dentry
 #define f_vfsmnt	f_path.mnt
 	const struct file_operations	*f_op;
+	spinlock_t		f_lock;  /* f_ep_links */
 	atomic_long_t		f_count;
 	unsigned int 		f_flags;
 	fmode_t			f_mode;
@@ -866,7 +867,6 @@ struct file {
 #ifdef CONFIG_EPOLL
 	/* Used by fs/eventpoll.c to link all the hooks to this file */
 	struct list_head	f_ep_links;
-	spinlock_t		f_ep_lock;
 #endif /* #ifdef CONFIG_EPOLL */
 	struct address_space	*f_mapping;
 #ifdef CONFIG_DEBUG_WRITECOUNT
-- 
cgit v1.2.3


From db1dd4d376134eba0e08af523b61cc566a4ea1cd Mon Sep 17 00:00:00 2001
From: Jonathan Corbet <corbet@lwn.net>
Date: Fri, 6 Feb 2009 15:25:24 -0700
Subject: Use f_lock to protect f_flags

Traditionally, changes to struct file->f_flags have been done under BKL
protection, or with no protection at all.  This patch causes all f_flags
changes after file open/creation time to be done under protection of
f_lock.  This allows the removal of some BKL usage and fixes a number of
longstanding (if microscopic) races.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Al Viro <viro@ZenIV.linux.org.uk>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 drivers/char/tty_io.c             | 5 ++---
 drivers/usb/gadget/file_storage.c | 7 ++++++-
 fs/fcntl.c                        | 2 ++
 fs/ioctl.c                        | 7 ++++---
 fs/nfsd/vfs.c                     | 5 ++++-
 include/linux/fs.h                | 2 +-
 ipc/mqueue.c                      | 2 ++
 sound/core/oss/pcm_oss.c          | 2 ++
 sound/oss/au1550_ac97.c           | 2 ++
 sound/oss/audio.c                 | 2 ++
 sound/oss/sh_dac_audio.c          | 2 ++
 sound/oss/swarm_cs4297a.c         | 2 ++
 sound/oss/vwsnd.c                 | 2 ++
 13 files changed, 33 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index bc84e125c6bc..224f271d8cbe 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -2162,13 +2162,12 @@ static int fionbio(struct file *file, int __user *p)
 	if (get_user(nonblock, p))
 		return -EFAULT;
 
-	/* file->f_flags is still BKL protected in the fs layer - vomit */
-	lock_kernel();
+	spin_lock(&file->f_lock);
 	if (nonblock)
 		file->f_flags |= O_NONBLOCK;
 	else
 		file->f_flags &= ~O_NONBLOCK;
-	unlock_kernel();
+	spin_unlock(&file->f_lock);
 	return 0;
 }
 
diff --git a/drivers/usb/gadget/file_storage.c b/drivers/usb/gadget/file_storage.c
index 1ab9dac7e12d..33bb76cef33c 100644
--- a/drivers/usb/gadget/file_storage.c
+++ b/drivers/usb/gadget/file_storage.c
@@ -1711,7 +1711,9 @@ static int do_write(struct fsg_dev *fsg)
 		curlun->sense_data = SS_WRITE_PROTECTED;
 		return -EINVAL;
 	}
+	spin_lock(&curlun->filp->f_lock);
 	curlun->filp->f_flags &= ~O_SYNC;	// Default is not to wait
+	spin_unlock(&curlun->filp->f_lock);
 
 	/* Get the starting Logical Block Address and check that it's
 	 * not too big */
@@ -1728,8 +1730,11 @@ static int do_write(struct fsg_dev *fsg)
 			curlun->sense_data = SS_INVALID_FIELD_IN_CDB;
 			return -EINVAL;
 		}
-		if (fsg->cmnd[1] & 0x08)	// FUA
+		if (fsg->cmnd[1] & 0x08) {	// FUA
+			spin_lock(&curlun->filp->f_lock);
 			curlun->filp->f_flags |= O_SYNC;
+			spin_unlock(&curlun->filp->f_lock);
+		}
 	}
 	if (lba >= curlun->num_sectors) {
 		curlun->sense_data = SS_LOGICAL_BLOCK_ADDRESS_OUT_OF_RANGE;
diff --git a/fs/fcntl.c b/fs/fcntl.c
index bd215cc791da..04df8570a2d2 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -189,7 +189,9 @@ static int setfl(int fd, struct file * filp, unsigned long arg)
 		}
 	}
 
+	spin_lock(&filp->f_lock);
 	filp->f_flags = (arg & SETFL_MASK) | (filp->f_flags & ~SETFL_MASK);
+	spin_unlock(&filp->f_lock);
  out:
 	unlock_kernel();
 	return error;
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 240ec63984cb..421aab465dab 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -404,10 +404,12 @@ static int ioctl_fionbio(struct file *filp, int __user *argp)
 	if (O_NONBLOCK != O_NDELAY)
 		flag |= O_NDELAY;
 #endif
+	spin_lock(&filp->f_lock);
 	if (on)
 		filp->f_flags |= flag;
 	else
 		filp->f_flags &= ~flag;
+	spin_unlock(&filp->f_lock);
 	return error;
 }
 
@@ -432,10 +434,12 @@ static int ioctl_fioasync(unsigned int fd, struct file *filp,
 	if (error)
 		return error;
 
+	spin_lock(&filp->f_lock);
 	if (on)
 		filp->f_flags |= FASYNC;
 	else
 		filp->f_flags &= ~FASYNC;
+	spin_unlock(&filp->f_lock);
 	return error;
 }
 
@@ -499,10 +503,7 @@ int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
 		break;
 
 	case FIONBIO:
-		/* BKL needed to avoid races tweaking f_flags */
-		lock_kernel();
 		error = ioctl_fionbio(filp, argp);
-		unlock_kernel();
 		break;
 
 	case FIOASYNC:
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 6e50aaa56ca2..c165a6403df0 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -998,8 +998,11 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
 
 	if (!EX_ISSYNC(exp))
 		stable = 0;
-	if (stable && !EX_WGATHER(exp))
+	if (stable && !EX_WGATHER(exp)) {
+		spin_lock(&file->f_lock);
 		file->f_flags |= O_SYNC;
+		spin_unlock(&file->f_lock);
+	}
 
 	/* Write the data. */
 	oldfs = get_fs(); set_fs(KERNEL_DS);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 2011600d12c7..7428c6d35e65 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -848,7 +848,7 @@ struct file {
 #define f_dentry	f_path.dentry
 #define f_vfsmnt	f_path.mnt
 	const struct file_operations	*f_op;
-	spinlock_t		f_lock;  /* f_ep_links */
+	spinlock_t		f_lock;  /* f_ep_links, f_flags */
 	atomic_long_t		f_count;
 	unsigned int 		f_flags;
 	fmode_t			f_mode;
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 54b4077fed79..a8ddadbc7459 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -1156,10 +1156,12 @@ SYSCALL_DEFINE3(mq_getsetattr, mqd_t, mqdes,
 	omqstat.mq_flags = filp->f_flags & O_NONBLOCK;
 	if (u_mqstat) {
 		audit_mq_getsetattr(mqdes, &mqstat);
+		spin_lock(&filp->f_lock);
 		if (mqstat.mq_flags & O_NONBLOCK)
 			filp->f_flags |= O_NONBLOCK;
 		else
 			filp->f_flags &= ~O_NONBLOCK;
+		spin_unlock(&filp->f_lock);
 
 		inode->i_atime = inode->i_ctime = CURRENT_TIME;
 	}
diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c
index 0a1798eafb0b..d4460f18e76c 100644
--- a/sound/core/oss/pcm_oss.c
+++ b/sound/core/oss/pcm_oss.c
@@ -1895,7 +1895,9 @@ static int snd_pcm_oss_set_fragment(struct snd_pcm_oss_file *pcm_oss_file, unsig
 
 static int snd_pcm_oss_nonblock(struct file * file)
 {
+	spin_lock(&file->f_lock);
 	file->f_flags |= O_NONBLOCK;
+	spin_unlock(&file->f_lock);
 	return 0;
 }
 
diff --git a/sound/oss/au1550_ac97.c b/sound/oss/au1550_ac97.c
index 81e1f443d094..4191acccbcdb 100644
--- a/sound/oss/au1550_ac97.c
+++ b/sound/oss/au1550_ac97.c
@@ -1627,7 +1627,9 @@ au1550_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
 				    sizeof(abinfo)) ? -EFAULT : 0;
 
 	case SNDCTL_DSP_NONBLOCK:
+		spin_lock(&file->f_lock);
 		file->f_flags |= O_NONBLOCK;
+		spin_unlock(&file->f_lock);
 		return 0;
 
 	case SNDCTL_DSP_GETODELAY:
diff --git a/sound/oss/audio.c b/sound/oss/audio.c
index 89bd27a5e865..b69c05b7ea7b 100644
--- a/sound/oss/audio.c
+++ b/sound/oss/audio.c
@@ -433,7 +433,9 @@ int audio_ioctl(int dev, struct file *file, unsigned int cmd, void __user *arg)
 			return dma_ioctl(dev, cmd, arg);
 		
 		case SNDCTL_DSP_NONBLOCK:
+			spin_lock(&file->f_lock);
 			file->f_flags |= O_NONBLOCK;
+			spin_unlock(&file->f_lock);
 			return 0;
 
 		case SNDCTL_DSP_GETCAPS:
diff --git a/sound/oss/sh_dac_audio.c b/sound/oss/sh_dac_audio.c
index e5d423994918..78cfb66e4c59 100644
--- a/sound/oss/sh_dac_audio.c
+++ b/sound/oss/sh_dac_audio.c
@@ -135,7 +135,9 @@ static int dac_audio_ioctl(struct inode *inode, struct file *file,
 		return put_user(AFMT_U8, (int *)arg);
 
 	case SNDCTL_DSP_NONBLOCK:
+		spin_lock(&file->f_lock);
 		file->f_flags |= O_NONBLOCK;
+		spin_unlock(&file->f_lock);
 		return 0;
 
 	case SNDCTL_DSP_GETCAPS:
diff --git a/sound/oss/swarm_cs4297a.c b/sound/oss/swarm_cs4297a.c
index 41562ecde5bb..1edab7b4ea83 100644
--- a/sound/oss/swarm_cs4297a.c
+++ b/sound/oss/swarm_cs4297a.c
@@ -2200,7 +2200,9 @@ static int cs4297a_ioctl(struct inode *inode, struct file *file,
 				    sizeof(abinfo)) ? -EFAULT : 0;
 
 	case SNDCTL_DSP_NONBLOCK:
+		spin_lock(&file->f_lock);
 		file->f_flags |= O_NONBLOCK;
+		spin_unlock(&file->f_lock);
 		return 0;
 
 	case SNDCTL_DSP_GETODELAY:
diff --git a/sound/oss/vwsnd.c b/sound/oss/vwsnd.c
index 78b8acc7c3b9..187f72750e8f 100644
--- a/sound/oss/vwsnd.c
+++ b/sound/oss/vwsnd.c
@@ -2673,7 +2673,9 @@ static int vwsnd_audio_do_ioctl(struct inode *inode,
 
 	case SNDCTL_DSP_NONBLOCK:	/* _SIO  ('P',14) */
 		DBGX("SNDCTL_DSP_NONBLOCK\n");
+		spin_lock(&file->f_lock);
 		file->f_flags |= O_NONBLOCK;
+		spin_unlock(&file->f_lock);
 		return 0;
 
 	case SNDCTL_DSP_RESET:		/* _SIO  ('P', 0) */
-- 
cgit v1.2.3


From acc738fec03bdaa5b77340c32a82fbfedaaabef0 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Mon, 16 Mar 2009 15:35:29 +0100
Subject: netfilter: xtables: avoid pointer to self

Commit 784544739a25c30637397ace5489eeb6e15d7d49 (netfilter: iptables:
lock free counters) broke a number of modules whose rule data referenced
itself. A reallocation would not reestablish the correct references, so
it is best to use a separate struct that does not fall under RCU.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/xt_limit.h     |  9 ++++----
 include/linux/netfilter/xt_quota.h     |  4 +++-
 include/linux/netfilter/xt_statistic.h |  7 +++---
 net/netfilter/xt_limit.c               | 40 ++++++++++++++++++++++++----------
 net/netfilter/xt_quota.c               | 31 ++++++++++++++++++++------
 net/netfilter/xt_statistic.c           | 28 +++++++++++++++++++-----
 6 files changed, 88 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/xt_limit.h b/include/linux/netfilter/xt_limit.h
index b3ce65375ecb..fda222c7953b 100644
--- a/include/linux/netfilter/xt_limit.h
+++ b/include/linux/netfilter/xt_limit.h
@@ -4,6 +4,8 @@
 /* timings are in milliseconds. */
 #define XT_LIMIT_SCALE 10000
 
+struct xt_limit_priv;
+
 /* 1/10,000 sec period => max of 10,000/sec.  Min rate is then 429490
    seconds, or one every 59 hours. */
 struct xt_rateinfo {
@@ -11,11 +13,10 @@ struct xt_rateinfo {
 	u_int32_t burst;  /* Period multiplier for upper limit. */
 
 	/* Used internally by the kernel */
-	unsigned long prev;
-	u_int32_t credit;
+	unsigned long prev; /* moved to xt_limit_priv */
+	u_int32_t credit; /* moved to xt_limit_priv */
 	u_int32_t credit_cap, cost;
 
-	/* Ugly, ugly fucker. */
-	struct xt_rateinfo *master;
+	struct xt_limit_priv *master;
 };
 #endif /*_XT_RATE_H*/
diff --git a/include/linux/netfilter/xt_quota.h b/include/linux/netfilter/xt_quota.h
index 4c8368d781e5..8dc89dfc1361 100644
--- a/include/linux/netfilter/xt_quota.h
+++ b/include/linux/netfilter/xt_quota.h
@@ -6,13 +6,15 @@ enum xt_quota_flags {
 };
 #define XT_QUOTA_MASK		0x1
 
+struct xt_quota_priv;
+
 struct xt_quota_info {
 	u_int32_t		flags;
 	u_int32_t		pad;
 
 	/* Used internally by the kernel */
 	aligned_u64		quota;
-	struct xt_quota_info	*master;
+	struct xt_quota_priv	*master;
 };
 
 #endif /* _XT_QUOTA_H */
diff --git a/include/linux/netfilter/xt_statistic.h b/include/linux/netfilter/xt_statistic.h
index 3d38bc975048..8f521ab49ef7 100644
--- a/include/linux/netfilter/xt_statistic.h
+++ b/include/linux/netfilter/xt_statistic.h
@@ -13,6 +13,8 @@ enum xt_statistic_flags {
 };
 #define XT_STATISTIC_MASK		0x1
 
+struct xt_statistic_priv;
+
 struct xt_statistic_info {
 	u_int16_t			mode;
 	u_int16_t			flags;
@@ -23,11 +25,10 @@ struct xt_statistic_info {
 		struct {
 			u_int32_t	every;
 			u_int32_t	packet;
-			/* Used internally by the kernel */
-			u_int32_t	count;
+			u_int32_t	count; /* unused */
 		} nth;
 	} u;
-	struct xt_statistic_info	*master __attribute__((aligned(8)));
+	struct xt_statistic_priv *master __attribute__((aligned(8)));
 };
 
 #endif /* _XT_STATISTIC_H */
diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c
index c908d69a5595..2e8089ecd0af 100644
--- a/net/netfilter/xt_limit.c
+++ b/net/netfilter/xt_limit.c
@@ -14,6 +14,11 @@
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter/xt_limit.h>
 
+struct xt_limit_priv {
+	unsigned long prev;
+	uint32_t credit;
+};
+
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Herve Eychenne <rv@wallfire.org>");
 MODULE_DESCRIPTION("Xtables: rate-limit match");
@@ -60,18 +65,18 @@ static DEFINE_SPINLOCK(limit_lock);
 static bool
 limit_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	struct xt_rateinfo *r =
-		((const struct xt_rateinfo *)par->matchinfo)->master;
+	const struct xt_rateinfo *r = par->matchinfo;
+	struct xt_limit_priv *priv = r->master;
 	unsigned long now = jiffies;
 
 	spin_lock_bh(&limit_lock);
-	r->credit += (now - xchg(&r->prev, now)) * CREDITS_PER_JIFFY;
-	if (r->credit > r->credit_cap)
-		r->credit = r->credit_cap;
+	priv->credit += (now - xchg(&priv->prev, now)) * CREDITS_PER_JIFFY;
+	if (priv->credit > r->credit_cap)
+		priv->credit = r->credit_cap;
 
-	if (r->credit >= r->cost) {
+	if (priv->credit >= r->cost) {
 		/* We're not limited. */
-		r->credit -= r->cost;
+		priv->credit -= r->cost;
 		spin_unlock_bh(&limit_lock);
 		return true;
 	}
@@ -95,6 +100,7 @@ user2credits(u_int32_t user)
 static bool limit_mt_check(const struct xt_mtchk_param *par)
 {
 	struct xt_rateinfo *r = par->matchinfo;
+	struct xt_limit_priv *priv;
 
 	/* Check for overflow. */
 	if (r->burst == 0
@@ -104,19 +110,30 @@ static bool limit_mt_check(const struct xt_mtchk_param *par)
 		return false;
 	}
 
-	/* For SMP, we only want to use one set of counters. */
-	r->master = r;
+	priv = kmalloc(sizeof(*priv), GFP_KERNEL);
+	if (priv == NULL)
+		return -ENOMEM;
+
+	/* For SMP, we only want to use one set of state. */
+	r->master = priv;
 	if (r->cost == 0) {
 		/* User avg in seconds * XT_LIMIT_SCALE: convert to jiffies *
 		   128. */
-		r->prev = jiffies;
-		r->credit = user2credits(r->avg * r->burst);	 /* Credits full. */
+		priv->prev = jiffies;
+		priv->credit = user2credits(r->avg * r->burst); /* Credits full. */
 		r->credit_cap = user2credits(r->avg * r->burst); /* Credits full. */
 		r->cost = user2credits(r->avg);
 	}
 	return true;
 }
 
+static void limit_mt_destroy(const struct xt_mtdtor_param *par)
+{
+	const struct xt_rateinfo *info = par->matchinfo;
+
+	kfree(info->master);
+}
+
 #ifdef CONFIG_COMPAT
 struct compat_xt_rateinfo {
 	u_int32_t avg;
@@ -167,6 +184,7 @@ static struct xt_match limit_mt_reg __read_mostly = {
 	.family           = NFPROTO_UNSPEC,
 	.match            = limit_mt,
 	.checkentry       = limit_mt_check,
+	.destroy          = limit_mt_destroy,
 	.matchsize        = sizeof(struct xt_rateinfo),
 #ifdef CONFIG_COMPAT
 	.compatsize       = sizeof(struct compat_xt_rateinfo),
diff --git a/net/netfilter/xt_quota.c b/net/netfilter/xt_quota.c
index c84fce5e0f3e..01dd07b764ec 100644
--- a/net/netfilter/xt_quota.c
+++ b/net/netfilter/xt_quota.c
@@ -9,6 +9,10 @@
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter/xt_quota.h>
 
+struct xt_quota_priv {
+	uint64_t quota;
+};
+
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Sam Johnston <samj@samj.net>");
 MODULE_DESCRIPTION("Xtables: countdown quota match");
@@ -20,18 +24,20 @@ static DEFINE_SPINLOCK(quota_lock);
 static bool
 quota_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	struct xt_quota_info *q =
-		((const struct xt_quota_info *)par->matchinfo)->master;
+	struct xt_quota_info *q = (void *)par->matchinfo;
+	struct xt_quota_priv *priv = q->master;
 	bool ret = q->flags & XT_QUOTA_INVERT;
 
 	spin_lock_bh(&quota_lock);
-	if (q->quota >= skb->len) {
-		q->quota -= skb->len;
+	if (priv->quota >= skb->len) {
+		priv->quota -= skb->len;
 		ret = !ret;
 	} else {
 		/* we do not allow even small packets from now on */
-		q->quota = 0;
+		priv->quota = 0;
 	}
+	/* Copy quota back to matchinfo so that iptables can display it */
+	q->quota = priv->quota;
 	spin_unlock_bh(&quota_lock);
 
 	return ret;
@@ -43,17 +49,28 @@ static bool quota_mt_check(const struct xt_mtchk_param *par)
 
 	if (q->flags & ~XT_QUOTA_MASK)
 		return false;
-	/* For SMP, we only want to use one set of counters. */
-	q->master = q;
+
+	q->master = kmalloc(sizeof(*q->master), GFP_KERNEL);
+	if (q->master == NULL)
+		return -ENOMEM;
+
 	return true;
 }
 
+static void quota_mt_destroy(const struct xt_mtdtor_param *par)
+{
+	const struct xt_quota_info *q = par->matchinfo;
+
+	kfree(q->master);
+}
+
 static struct xt_match quota_mt_reg __read_mostly = {
 	.name       = "quota",
 	.revision   = 0,
 	.family     = NFPROTO_UNSPEC,
 	.match      = quota_mt,
 	.checkentry = quota_mt_check,
+	.destroy    = quota_mt_destroy,
 	.matchsize  = sizeof(struct xt_quota_info),
 	.me         = THIS_MODULE,
 };
diff --git a/net/netfilter/xt_statistic.c b/net/netfilter/xt_statistic.c
index 0d75141139d5..d8c0f8f1a78e 100644
--- a/net/netfilter/xt_statistic.c
+++ b/net/netfilter/xt_statistic.c
@@ -16,6 +16,10 @@
 #include <linux/netfilter/xt_statistic.h>
 #include <linux/netfilter/x_tables.h>
 
+struct xt_statistic_priv {
+	uint32_t count;
+};
+
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
 MODULE_DESCRIPTION("Xtables: statistics-based matching (\"Nth\", random)");
@@ -27,7 +31,7 @@ static DEFINE_SPINLOCK(nth_lock);
 static bool
 statistic_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	struct xt_statistic_info *info = (void *)par->matchinfo;
+	const struct xt_statistic_info *info = par->matchinfo;
 	bool ret = info->flags & XT_STATISTIC_INVERT;
 
 	switch (info->mode) {
@@ -36,10 +40,9 @@ statistic_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 			ret = !ret;
 		break;
 	case XT_STATISTIC_MODE_NTH:
-		info = info->master;
 		spin_lock_bh(&nth_lock);
-		if (info->u.nth.count++ == info->u.nth.every) {
-			info->u.nth.count = 0;
+		if (info->master->count++ == info->u.nth.every) {
+			info->master->count = 0;
 			ret = !ret;
 		}
 		spin_unlock_bh(&nth_lock);
@@ -56,16 +59,31 @@ static bool statistic_mt_check(const struct xt_mtchk_param *par)
 	if (info->mode > XT_STATISTIC_MODE_MAX ||
 	    info->flags & ~XT_STATISTIC_MASK)
 		return false;
-	info->master = info;
+
+	info->master = kzalloc(sizeof(*info->master), GFP_KERNEL);
+	if (info->master == NULL) {
+		printk(KERN_ERR KBUILD_MODNAME ": Out of memory\n");
+		return false;
+	}
+	info->master->count = info->u.nth.count;
+
 	return true;
 }
 
+static void statistic_mt_destroy(const struct xt_mtdtor_param *par)
+{
+	const struct xt_statistic_info *info = par->matchinfo;
+
+	kfree(info->master);
+}
+
 static struct xt_match xt_statistic_mt_reg __read_mostly = {
 	.name       = "statistic",
 	.revision   = 0,
 	.family     = NFPROTO_UNSPEC,
 	.match      = statistic_mt,
 	.checkentry = statistic_mt_check,
+	.destroy    = statistic_mt_destroy,
 	.matchsize  = sizeof(struct xt_statistic_info),
 	.me         = THIS_MODULE,
 };
-- 
cgit v1.2.3


From 0269ea4937343536ec7e85649932bc8c9686ea78 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Mon, 16 Mar 2009 17:10:36 +0100
Subject: netfilter: xtables: add cluster match

This patch adds the iptables cluster match. This match can be used
to deploy gateway and back-end load-sharing clusters. The cluster
can be composed of 32 nodes maximum (although I have only tested
this with two nodes, so I cannot tell what is the real scalability
limit of this solution in terms of cluster nodes).

Assuming that all the nodes see all packets (see below for an
example on how to do that if your switch does not allow this), the
cluster match decides if this node has to handle a packet given:

	(jhash(source IP) % total_nodes) & node_mask

For related connections, the master conntrack is used. The following
is an example of its use to deploy a gateway cluster composed of two
nodes (where this is the node 1):

iptables -I PREROUTING -t mangle -i eth1 -m cluster \
	--cluster-total-nodes 2 --cluster-local-node 1 \
	--cluster-proc-name eth1 -j MARK --set-mark 0xffff
iptables -A PREROUTING -t mangle -i eth1 \
	-m mark ! --mark 0xffff -j DROP
iptables -A PREROUTING -t mangle -i eth2 -m cluster \
	--cluster-total-nodes 2 --cluster-local-node 1 \
	--cluster-proc-name eth2 -j MARK --set-mark 0xffff
iptables -A PREROUTING -t mangle -i eth2 \
	-m mark ! --mark 0xffff -j DROP

And the following commands to make all nodes see the same packets:

ip maddr add 01:00:5e:00:01:01 dev eth1
ip maddr add 01:00:5e:00:01:02 dev eth2
arptables -I OUTPUT -o eth1 --h-length 6 \
	-j mangle --mangle-mac-s 01:00:5e:00:01:01
arptables -I INPUT -i eth1 --h-length 6 \
	--destination-mac 01:00:5e:00:01:01 \
	-j mangle --mangle-mac-d 00:zz:yy:xx:5a:27
arptables -I OUTPUT -o eth2 --h-length 6 \
	-j mangle --mangle-mac-s 01:00:5e:00:01:02
arptables -I INPUT -i eth2 --h-length 6 \
	--destination-mac 01:00:5e:00:01:02 \
	-j mangle --mangle-mac-d 00:zz:yy:xx:5a:27

In the case of TCP connections, pickup facility has to be disabled
to avoid marking TCP ACK packets coming in the reply direction as
valid.

echo 0 > /proc/sys/net/netfilter/nf_conntrack_tcp_loose

BTW, some final notes:

 * This match mangles the skbuff pkt_type in case that it detects
PACKET_MULTICAST for a non-multicast address. This may be done in
a PKTTYPE target for this sole purpose.
 * This match supersedes the CLUSTERIP target.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/Kbuild       |   1 +
 include/linux/netfilter/xt_cluster.h |  15 ++++
 net/netfilter/Kconfig                |  16 ++++
 net/netfilter/Makefile               |   1 +
 net/netfilter/xt_cluster.c           | 164 +++++++++++++++++++++++++++++++++++
 5 files changed, 197 insertions(+)
 create mode 100644 include/linux/netfilter/xt_cluster.h
 create mode 100644 net/netfilter/xt_cluster.c

(limited to 'include/linux')

diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild
index 947b47d7f6c0..af9d2fb97212 100644
--- a/include/linux/netfilter/Kbuild
+++ b/include/linux/netfilter/Kbuild
@@ -21,6 +21,7 @@ header-y += xt_connbytes.h
 header-y += xt_connlimit.h
 header-y += xt_connmark.h
 header-y += xt_conntrack.h
+header-y += xt_cluster.h
 header-y += xt_dccp.h
 header-y += xt_dscp.h
 header-y += xt_esp.h
diff --git a/include/linux/netfilter/xt_cluster.h b/include/linux/netfilter/xt_cluster.h
new file mode 100644
index 000000000000..5e0a0d07b526
--- /dev/null
+++ b/include/linux/netfilter/xt_cluster.h
@@ -0,0 +1,15 @@
+#ifndef _XT_CLUSTER_MATCH_H
+#define _XT_CLUSTER_MATCH_H
+
+enum xt_cluster_flags {
+	XT_CLUSTER_F_INV	= (1 << 0)
+};
+
+struct xt_cluster_match_info {
+	u_int32_t		total_nodes;
+	u_int32_t		node_mask;
+	u_int32_t		hash_seed;
+	u_int32_t		flags;
+};
+
+#endif /* _XT_CLUSTER_MATCH_H */
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index cdbaaff6d0d6..2562d05dbaf5 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -527,6 +527,22 @@ config NETFILTER_XT_TARGET_TCPOPTSTRIP
 	  This option adds a "TCPOPTSTRIP" target, which allows you to strip
 	  TCP options from TCP packets.
 
+config NETFILTER_XT_MATCH_CLUSTER
+	tristate '"cluster" match support'
+	depends on NF_CONNTRACK
+	depends on NETFILTER_ADVANCED
+	---help---
+	  This option allows you to build work-load-sharing clusters of
+	  network servers/stateful firewalls without having a dedicated
+	  load-balancing router/server/switch. Basically, this match returns
+	  true when the packet must be handled by this cluster node. Thus,
+	  all nodes see all packets and this match decides which node handles
+	  what packets. The work-load sharing algorithm is based on source
+	  address hashing.
+
+	  If you say Y or M here, try `iptables -m cluster --help` for
+	  more information.
+
 config NETFILTER_XT_MATCH_COMMENT
 	tristate  '"comment" match support'
 	depends on NETFILTER_ADVANCED
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 7a9b8397573a..6282060fbda9 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -59,6 +59,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP) += xt_TCPOPTSTRIP.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_TRACE) += xt_TRACE.o
 
 # matches
+obj-$(CONFIG_NETFILTER_XT_MATCH_CLUSTER) += xt_cluster.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_COMMENT) += xt_comment.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_CONNBYTES) += xt_connbytes.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_CONNLIMIT) += xt_connlimit.o
diff --git a/net/netfilter/xt_cluster.c b/net/netfilter/xt_cluster.c
new file mode 100644
index 000000000000..ad5bd890e4e8
--- /dev/null
+++ b/net/netfilter/xt_cluster.c
@@ -0,0 +1,164 @@
+/*
+ * (C) 2008-2009 Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/jhash.h>
+#include <linux/ip.h>
+#include <net/ipv6.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <linux/netfilter/xt_cluster.h>
+
+static inline u_int32_t nf_ct_orig_ipv4_src(const struct nf_conn *ct)
+{
+	return ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip;
+}
+
+static inline const void *nf_ct_orig_ipv6_src(const struct nf_conn *ct)
+{
+	return ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip6;
+}
+
+static inline u_int32_t
+xt_cluster_hash_ipv4(u_int32_t ip, const struct xt_cluster_match_info *info)
+{
+	return jhash_1word(ip, info->hash_seed);
+}
+
+static inline u_int32_t
+xt_cluster_hash_ipv6(const void *ip, const struct xt_cluster_match_info *info)
+{
+	return jhash2(ip, NF_CT_TUPLE_L3SIZE / sizeof(__u32), info->hash_seed);
+}
+
+static inline u_int32_t
+xt_cluster_hash(const struct nf_conn *ct,
+		const struct xt_cluster_match_info *info)
+{
+	u_int32_t hash = 0;
+
+	switch(nf_ct_l3num(ct)) {
+	case AF_INET:
+		hash = xt_cluster_hash_ipv4(nf_ct_orig_ipv4_src(ct), info);
+		break;
+	case AF_INET6:
+		hash = xt_cluster_hash_ipv6(nf_ct_orig_ipv6_src(ct), info);
+		break;
+	default:
+		WARN_ON(1);
+		break;
+	}
+	return (((u64)hash * info->total_nodes) >> 32);
+}
+
+static inline bool
+xt_cluster_is_multicast_addr(const struct sk_buff *skb, u_int8_t family)
+{
+	bool is_multicast = false;
+
+	switch(family) {
+	case NFPROTO_IPV4:
+		is_multicast = ipv4_is_multicast(ip_hdr(skb)->daddr);
+		break;
+	case NFPROTO_IPV6:
+		is_multicast = ipv6_addr_type(&ipv6_hdr(skb)->daddr) &
+						IPV6_ADDR_MULTICAST;
+		break;
+	default:
+		WARN_ON(1);
+		break;
+	}
+	return is_multicast;
+}
+
+static bool
+xt_cluster_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+{
+	struct sk_buff *pskb = (struct sk_buff *)skb;
+	const struct xt_cluster_match_info *info = par->matchinfo;
+	const struct nf_conn *ct;
+	enum ip_conntrack_info ctinfo;
+	unsigned long hash;
+
+	/* This match assumes that all nodes see the same packets. This can be
+	 * achieved if the switch that connects the cluster nodes support some
+	 * sort of 'port mirroring'. However, if your switch does not support
+	 * this, your cluster nodes can reply ARP request using a multicast MAC
+	 * address. Thus, your switch will flood the same packets to the
+	 * cluster nodes with the same multicast MAC address. Using a multicast
+	 * link address is a RFC 1812 (section 3.3.2) violation, but this works
+	 * fine in practise.
+	 *
+	 * Unfortunately, if you use the multicast MAC address, the link layer
+	 * sets skbuff's pkt_type to PACKET_MULTICAST, which is not accepted
+	 * by TCP and others for packets coming to this node. For that reason,
+	 * this match mangles skbuff's pkt_type if it detects a packet
+	 * addressed to a unicast address but using PACKET_MULTICAST. Yes, I
+	 * know, matches should not alter packets, but we are doing this here
+	 * because we would need to add a PKTTYPE target for this sole purpose.
+	 */
+	if (!xt_cluster_is_multicast_addr(skb, par->family) &&
+	    skb->pkt_type == PACKET_MULTICAST) {
+	    	pskb->pkt_type = PACKET_HOST;
+	}
+
+	ct = nf_ct_get(skb, &ctinfo);
+	if (ct == NULL)
+		return false;
+
+	if (ct == &nf_conntrack_untracked)
+		return false;
+
+	if (ct->master)
+		hash = xt_cluster_hash(ct->master, info);
+	else
+		hash = xt_cluster_hash(ct, info);
+
+	return !!((1 << hash) & info->node_mask) ^
+	       !!(info->flags & XT_CLUSTER_F_INV);
+}
+
+static bool xt_cluster_mt_checkentry(const struct xt_mtchk_param *par)
+{
+	struct xt_cluster_match_info *info = par->matchinfo;
+
+	if (info->node_mask >= (1 << info->total_nodes)) {
+		printk(KERN_ERR "xt_cluster: this node mask cannot be "
+				"higher than the total number of nodes\n");
+		return false;
+	}
+	return true;
+}
+
+static struct xt_match xt_cluster_match __read_mostly = {
+	.name		= "cluster",
+	.family		= NFPROTO_UNSPEC,
+	.match		= xt_cluster_mt,
+	.checkentry	= xt_cluster_mt_checkentry,
+	.matchsize	= sizeof(struct xt_cluster_match_info),
+	.me		= THIS_MODULE,
+};
+
+static int __init xt_cluster_mt_init(void)
+{
+	return xt_register_match(&xt_cluster_match);
+}
+
+static void __exit xt_cluster_mt_fini(void)
+{
+	xt_unregister_match(&xt_cluster_match);
+}
+
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Xtables: hash-based cluster match");
+MODULE_ALIAS("ipt_cluster");
+MODULE_ALIAS("ip6t_cluster");
+module_init(xt_cluster_mt_init);
+module_exit(xt_cluster_mt_fini);
-- 
cgit v1.2.3


From d1c76af9e2434fac3add561e26c61b06503de986 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 16 Mar 2009 10:50:02 -0700
Subject: GRO: Move netpoll checks to correct location

As my netpoll fix for net doesn't really work for net-next, we
need this update to move the checks into the right place.  As it
stands we may pass freed skbs to netpoll_receive_skb.

This patch also introduces a netpoll_rx_on function to avoid GRO
completely if we're invoked through netpoll.  This might seem
paranoid but as netpoll may have an external receive hook it's
better to be safe than sorry.  I don't think we need this for
2.6.29 though since there's nothing immediately broken by it.

This patch also moves the GRO_* return values to netdevice.h since
VLAN needs them too (I tried to avoid this originally but alas
this seems to be the easiest way out).  This fixes a bug in VLAN
where it continued to use the old return value 2 instead of the
correct GRO_DROP.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  8 ++++++++
 include/linux/netpoll.h   | 11 +++++++++++
 net/8021q/vlan_core.c     | 11 ++++-------
 net/core/dev.c            | 17 +++--------------
 4 files changed, 26 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 493b065f76d7..be3ebd7e8ce5 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -330,6 +330,14 @@ enum
 	NAPI_STATE_NPSVC,	/* Netpoll - don't dequeue from poll_list */
 };
 
+enum {
+	GRO_MERGED,
+	GRO_MERGED_FREE,
+	GRO_HELD,
+	GRO_NORMAL,
+	GRO_DROP,
+};
+
 extern void __napi_schedule(struct napi_struct *n);
 
 static inline int napi_disable_pending(struct napi_struct *n)
diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h
index e38d3c9dccda..de99025f2c5d 100644
--- a/include/linux/netpoll.h
+++ b/include/linux/netpoll.h
@@ -63,6 +63,13 @@ static inline int netpoll_rx(struct sk_buff *skb)
 	return ret;
 }
 
+static inline int netpoll_rx_on(struct sk_buff *skb)
+{
+	struct netpoll_info *npinfo = skb->dev->npinfo;
+
+	return npinfo && (npinfo->rx_np || npinfo->rx_flags);
+}
+
 static inline int netpoll_receive_skb(struct sk_buff *skb)
 {
 	if (!list_empty(&skb->dev->napi_list))
@@ -99,6 +106,10 @@ static inline int netpoll_rx(struct sk_buff *skb)
 {
 	return 0;
 }
+static inline int netpoll_rx_on(struct sk_buff *skb)
+{
+	return 0;
+}
 static inline int netpoll_receive_skb(struct sk_buff *skb)
 {
 	return 0;
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 2d6e405fc498..6227248597c4 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -79,6 +79,9 @@ static int vlan_gro_common(struct napi_struct *napi, struct vlan_group *grp,
 {
 	struct sk_buff *p;
 
+	if (netpoll_rx_on(skb))
+		return GRO_NORMAL;
+
 	if (skb_bond_should_drop(skb))
 		goto drop;
 
@@ -98,7 +101,7 @@ static int vlan_gro_common(struct napi_struct *napi, struct vlan_group *grp,
 	return dev_gro_receive(napi, skb);
 
 drop:
-	return 2;
+	return GRO_DROP;
 }
 
 int vlan_gro_receive(struct napi_struct *napi, struct vlan_group *grp,
@@ -106,9 +109,6 @@ int vlan_gro_receive(struct napi_struct *napi, struct vlan_group *grp,
 {
 	skb_gro_reset_offset(skb);
 
-	if (netpoll_receive_skb(skb))
-		return NET_RX_DROP;
-
 	return napi_skb_finish(vlan_gro_common(napi, grp, vlan_tci, skb), skb);
 }
 EXPORT_SYMBOL(vlan_gro_receive);
@@ -121,9 +121,6 @@ int vlan_gro_frags(struct napi_struct *napi, struct vlan_group *grp,
 	if (!skb)
 		return NET_RX_DROP;
 
-	if (netpoll_receive_skb(skb))
-		return NET_RX_DROP;
-
 	return napi_frags_finish(napi, skb,
 				 vlan_gro_common(napi, grp, vlan_tci, skb));
 }
diff --git a/net/core/dev.c b/net/core/dev.c
index 033d7ca28e6e..7bd3c29c5a78 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -135,14 +135,6 @@
 /* This should be increased if a protocol with a bigger head is added. */
 #define GRO_MAX_HEAD (MAX_HEADER + 128)
 
-enum {
-	GRO_MERGED,
-	GRO_MERGED_FREE,
-	GRO_HELD,
-	GRO_NORMAL,
-	GRO_DROP,
-};
-
 /*
  *	The list of packet types we will receive (as opposed to discard)
  *	and the routines to invoke.
@@ -2474,6 +2466,9 @@ static int __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 {
 	struct sk_buff *p;
 
+	if (netpoll_rx_on(skb))
+		return GRO_NORMAL;
+
 	for (p = napi->gro_list; p; p = p->next) {
 		NAPI_GRO_CB(p)->same_flow = !compare_ether_header(
 			skb_mac_header(p), skb_gro_mac_header(skb));
@@ -2487,9 +2482,6 @@ int napi_skb_finish(int ret, struct sk_buff *skb)
 {
 	int err = NET_RX_SUCCESS;
 
-	if (netpoll_receive_skb(skb))
-		return NET_RX_DROP;
-
 	switch (ret) {
 	case GRO_NORMAL:
 		return netif_receive_skb(skb);
@@ -2587,9 +2579,6 @@ int napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, int ret)
 {
 	int err = NET_RX_SUCCESS;
 
-	if (netpoll_receive_skb(skb))
-		return NET_RX_DROP;
-
 	switch (ret) {
 	case GRO_NORMAL:
 	case GRO_HELD:
-- 
cgit v1.2.3


From 7db90f4a25bd4184f3d36dfa4f512f53b0448da7 Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <lrodriguez@atheros.com>
Date: Mon, 9 Mar 2009 22:07:41 -0400
Subject: cfg80211: move enum reg_set_by to nl80211.h

We do this so we can later inform userspace who set the
regulatory domain and provide details of the request.

Signed-off-by: Luis R. Rodriguez <lrodriguez@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/ath9k/main.c |  2 +-
 drivers/net/wireless/ath9k/regd.c | 31 ++++++++-------
 drivers/net/wireless/ath9k/regd.h |  3 +-
 include/linux/nl80211.h           | 19 +++++++++
 include/net/cfg80211.h            | 24 ++----------
 net/wireless/core.c               |  2 +-
 net/wireless/core.h               |  3 +-
 net/wireless/reg.c                | 82 +++++++++++++++++++++------------------
 8 files changed, 90 insertions(+), 76 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/wireless/ath9k/main.c b/drivers/net/wireless/ath9k/main.c
index 1d6b05c0d800..e9b3f365f099 100644
--- a/drivers/net/wireless/ath9k/main.c
+++ b/drivers/net/wireless/ath9k/main.c
@@ -1670,7 +1670,7 @@ int ath_attach(u16 devid, struct ath_softc *sc)
 	}
 	wiphy_apply_custom_regulatory(hw->wiphy, regd);
 	ath9k_reg_apply_radar_flags(hw->wiphy);
-	ath9k_reg_apply_world_flags(hw->wiphy, REGDOM_SET_BY_DRIVER);
+	ath9k_reg_apply_world_flags(hw->wiphy, NL80211_REGDOM_SET_BY_DRIVER);
 
 	INIT_WORK(&sc->chan_work, ath9k_wiphy_chan_work);
 	INIT_DELAYED_WORK(&sc->wiphy_work, ath9k_wiphy_work);
diff --git a/drivers/net/wireless/ath9k/regd.c b/drivers/net/wireless/ath9k/regd.c
index ff0afc02f3ce..b8f9b6d6bec4 100644
--- a/drivers/net/wireless/ath9k/regd.c
+++ b/drivers/net/wireless/ath9k/regd.c
@@ -168,8 +168,9 @@ static bool ath9k_is_radar_freq(u16 center_freq)
  *   received a beacon on a channel we can enable active scan and
  *   adhoc (or beaconing).
  */
-static void ath9k_reg_apply_beaconing_flags(struct wiphy *wiphy,
-					     enum reg_set_by setby)
+static void ath9k_reg_apply_beaconing_flags(
+	struct wiphy *wiphy,
+	enum nl80211_reg_initiator initiator)
 {
 	enum ieee80211_band band;
 	struct ieee80211_supported_band *sband;
@@ -194,7 +195,7 @@ static void ath9k_reg_apply_beaconing_flags(struct wiphy *wiphy,
 			    (ch->flags & IEEE80211_CHAN_RADAR))
 				continue;
 
-			if (setby == REGDOM_SET_BY_COUNTRY_IE) {
+			if (initiator == NL80211_REGDOM_SET_BY_COUNTRY_IE) {
 				r = freq_reg_info(wiphy, ch->center_freq,
 					&bandwidth, &reg_rule);
 				if (r)
@@ -226,8 +227,9 @@ static void ath9k_reg_apply_beaconing_flags(struct wiphy *wiphy,
 }
 
 /* Allows active scan scan on Ch 12 and 13 */
-static void ath9k_reg_apply_active_scan_flags(struct wiphy *wiphy,
-					      enum reg_set_by setby)
+static void ath9k_reg_apply_active_scan_flags(
+	struct wiphy *wiphy,
+	enum nl80211_reg_initiator initiator)
 {
 	struct ieee80211_supported_band *sband;
 	struct ieee80211_channel *ch;
@@ -241,7 +243,7 @@ static void ath9k_reg_apply_active_scan_flags(struct wiphy *wiphy,
 	 * If no country IE has been received always enable active scan
 	 * on these channels. This is only done for specific regulatory SKUs
 	 */
-	if (setby != REGDOM_SET_BY_COUNTRY_IE) {
+	if (initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE) {
 		ch = &sband->channels[11]; /* CH 12 */
 		if (ch->flags & IEEE80211_CHAN_PASSIVE_SCAN)
 			ch->flags &= ~IEEE80211_CHAN_PASSIVE_SCAN;
@@ -308,7 +310,8 @@ void ath9k_reg_apply_radar_flags(struct wiphy *wiphy)
 	}
 }
 
-void ath9k_reg_apply_world_flags(struct wiphy *wiphy, enum reg_set_by setby)
+void ath9k_reg_apply_world_flags(struct wiphy *wiphy,
+				 enum nl80211_reg_initiator initiator)
 {
 	struct ieee80211_hw *hw = wiphy_to_ieee80211_hw(wiphy);
 	struct ath_wiphy *aphy = hw->priv;
@@ -320,11 +323,11 @@ void ath9k_reg_apply_world_flags(struct wiphy *wiphy, enum reg_set_by setby)
 	case 0x63:
 	case 0x66:
 	case 0x67:
-		ath9k_reg_apply_beaconing_flags(wiphy, setby);
+		ath9k_reg_apply_beaconing_flags(wiphy, initiator);
 		break;
 	case 0x68:
-		ath9k_reg_apply_beaconing_flags(wiphy, setby);
-		ath9k_reg_apply_active_scan_flags(wiphy, setby);
+		ath9k_reg_apply_beaconing_flags(wiphy, initiator);
+		ath9k_reg_apply_active_scan_flags(wiphy, initiator);
 		break;
 	}
 	return;
@@ -340,11 +343,11 @@ int ath9k_reg_notifier(struct wiphy *wiphy, struct regulatory_request *request)
 	ath9k_reg_apply_radar_flags(wiphy);
 
 	switch (request->initiator) {
-	case REGDOM_SET_BY_DRIVER:
-	case REGDOM_SET_BY_CORE:
-	case REGDOM_SET_BY_USER:
+	case NL80211_REGDOM_SET_BY_DRIVER:
+	case NL80211_REGDOM_SET_BY_CORE:
+	case NL80211_REGDOM_SET_BY_USER:
 		break;
-	case REGDOM_SET_BY_COUNTRY_IE:
+	case NL80211_REGDOM_SET_BY_COUNTRY_IE:
 		if (ath9k_is_world_regd(sc->sc_ah))
 			ath9k_reg_apply_world_flags(wiphy, request->initiator);
 		break;
diff --git a/drivers/net/wireless/ath9k/regd.h b/drivers/net/wireless/ath9k/regd.h
index d48160d0c0e9..8f885f3bc8df 100644
--- a/drivers/net/wireless/ath9k/regd.h
+++ b/drivers/net/wireless/ath9k/regd.h
@@ -236,7 +236,8 @@ enum CountryCode {
 bool ath9k_is_world_regd(struct ath_hw *ah);
 const struct ieee80211_regdomain *ath9k_world_regdomain(struct ath_hw *ah);
 const struct ieee80211_regdomain *ath9k_default_world_regdomain(void);
-void ath9k_reg_apply_world_flags(struct wiphy *wiphy, enum reg_set_by setby);
+void ath9k_reg_apply_world_flags(struct wiphy *wiphy,
+				 enum nl80211_reg_initiator initiator);
 void ath9k_reg_apply_radar_flags(struct wiphy *wiphy);
 int ath9k_regd_init(struct ath_hw *ah);
 bool ath9k_regd_is_eeprom_valid(struct ath_hw *ah);
diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index f6e56370ea65..c0fd432b57dc 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -672,6 +672,25 @@ enum nl80211_bitrate_attr {
 	NL80211_BITRATE_ATTR_MAX = __NL80211_BITRATE_ATTR_AFTER_LAST - 1
 };
 
+/**
+ * enum nl80211_initiator - Indicates the initiator of a reg domain request
+ * @NL80211_REGDOM_SET_BY_CORE: Core queried CRDA for a dynamic world
+ * 	regulatory domain.
+ * @NL80211_REGDOM_SET_BY_USER: User asked the wireless core to set the
+ * 	regulatory domain.
+ * @NL80211_REGDOM_SET_BY_DRIVER: a wireless drivers has hinted to the
+ * 	wireless core it thinks its knows the regulatory domain we should be in.
+ * @NL80211_REGDOM_SET_BY_COUNTRY_IE: the wireless core has received an
+ * 	802.11 country information element with regulatory information it
+ * 	thinks we should consider.
+ */
+enum nl80211_reg_initiator {
+	NL80211_REGDOM_SET_BY_CORE,
+	NL80211_REGDOM_SET_BY_USER,
+	NL80211_REGDOM_SET_BY_DRIVER,
+	NL80211_REGDOM_SET_BY_COUNTRY_IE,
+};
+
 /**
  * enum nl80211_reg_rule_attr - regulatory rule attributes
  * @NL80211_ATTR_REG_RULE_FLAGS: a set of flags which specify additional
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index f195ea460811..50f3fd9ff524 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -348,28 +348,10 @@ struct bss_parameters {
 	u8 basic_rates_len;
 };
 
-/**
- * enum reg_set_by - Indicates who is trying to set the regulatory domain
- * @REGDOM_SET_BY_CORE: Core queried CRDA for a dynamic world regulatory domain.
- * @REGDOM_SET_BY_USER: User asked the wireless core to set the
- * 	regulatory domain.
- * @REGDOM_SET_BY_DRIVER: a wireless drivers has hinted to the wireless core
- *	it thinks its knows the regulatory domain we should be in.
- * @REGDOM_SET_BY_COUNTRY_IE: the wireless core has received an 802.11 country
- *	information element with regulatory information it thinks we
- *	should consider.
- */
-enum reg_set_by {
-	REGDOM_SET_BY_CORE,
-	REGDOM_SET_BY_USER,
-	REGDOM_SET_BY_DRIVER,
-	REGDOM_SET_BY_COUNTRY_IE,
-};
-
 /**
  * enum environment_cap - Environment parsed from country IE
  * @ENVIRON_ANY: indicates country IE applies to both indoor and
- * 	outdoor operation.
+ *	outdoor operation.
  * @ENVIRON_INDOOR: indicates country IE applies only to indoor operation
  * @ENVIRON_OUTDOOR: indicates country IE applies only to outdoor operation
  */
@@ -388,7 +370,7 @@ enum environment_cap {
  * 	and potentially inform users of which devices specifically
  * 	cased the conflicts.
  * @initiator: indicates who sent this request, could be any of
- * 	of those set in reg_set_by, %REGDOM_SET_BY_*
+ * 	of those set in nl80211_reg_initiator (%NL80211_REGDOM_SET_BY_*)
  * @alpha2: the ISO / IEC 3166 alpha2 country code of the requested
  * 	regulatory domain. We have a few special codes:
  * 	00 - World regulatory domain
@@ -405,7 +387,7 @@ enum environment_cap {
  */
 struct regulatory_request {
 	int wiphy_idx;
-	enum reg_set_by initiator;
+	enum nl80211_reg_initiator initiator;
 	char alpha2[2];
 	bool intersect;
 	u32 country_ie_checksum;
diff --git a/net/wireless/core.c b/net/wireless/core.c
index dd7f222919fe..c939f5ee065e 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -350,7 +350,7 @@ int wiphy_register(struct wiphy *wiphy)
 	mutex_lock(&cfg80211_mutex);
 
 	/* set up regulatory info */
-	wiphy_update_regulatory(wiphy, REGDOM_SET_BY_CORE);
+	wiphy_update_regulatory(wiphy, NL80211_REGDOM_SET_BY_CORE);
 
 	res = device_add(&drv->wiphy.dev);
 	if (res)
diff --git a/net/wireless/core.h b/net/wireless/core.h
index f6c53f5807f4..6acd483a61f8 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -136,7 +136,8 @@ extern int cfg80211_dev_rename(struct cfg80211_registered_device *drv,
 			       char *newname);
 
 void ieee80211_set_bitrate_flags(struct wiphy *wiphy);
-void wiphy_update_regulatory(struct wiphy *wiphy, enum reg_set_by setby);
+void wiphy_update_regulatory(struct wiphy *wiphy,
+			     enum nl80211_reg_initiator setby);
 
 void cfg80211_bss_expire(struct cfg80211_registered_device *dev);
 void cfg80211_bss_age(struct cfg80211_registered_device *dev,
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 47ff44751b70..68fde6d33dc3 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -857,8 +857,8 @@ static int freq_reg_info_regd(struct wiphy *wiphy,
 	 * Follow the driver's regulatory domain, if present, unless a country
 	 * IE has been processed or a user wants to help complaince further
 	 */
-	if (last_request->initiator != REGDOM_SET_BY_COUNTRY_IE &&
-	    last_request->initiator != REGDOM_SET_BY_USER &&
+	if (last_request->initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE &&
+	    last_request->initiator != NL80211_REGDOM_SET_BY_USER &&
 	    wiphy->regd)
 		regd = wiphy->regd;
 
@@ -943,7 +943,8 @@ static void handle_channel(struct wiphy *wiphy, enum ieee80211_band band,
 		 * http://tinyurl.com/11d-clarification
 		 */
 		if (r == -ERANGE &&
-		    last_request->initiator == REGDOM_SET_BY_COUNTRY_IE) {
+		    last_request->initiator ==
+		    NL80211_REGDOM_SET_BY_COUNTRY_IE) {
 #ifdef CONFIG_CFG80211_REG_DEBUG
 			printk(KERN_DEBUG "cfg80211: Leaving channel %d MHz "
 				"intact on %s - no rule found in band on "
@@ -956,7 +957,8 @@ static void handle_channel(struct wiphy *wiphy, enum ieee80211_band band,
 		 * for the band so we respect its band definitions
 		 */
 #ifdef CONFIG_CFG80211_REG_DEBUG
-			if (last_request->initiator == REGDOM_SET_BY_COUNTRY_IE)
+			if (last_request->initiator ==
+			    NL80211_REGDOM_SET_BY_COUNTRY_IE)
 				printk(KERN_DEBUG "cfg80211: Disabling "
 					"channel %d MHz on %s due to "
 					"Country IE\n",
@@ -970,7 +972,7 @@ static void handle_channel(struct wiphy *wiphy, enum ieee80211_band band,
 
 	power_rule = &reg_rule->power_rule;
 
-	if (last_request->initiator == REGDOM_SET_BY_DRIVER &&
+	if (last_request->initiator == NL80211_REGDOM_SET_BY_DRIVER &&
 	    request_wiphy && request_wiphy == wiphy &&
 	    request_wiphy->strict_regulatory) {
 		/*
@@ -1011,11 +1013,12 @@ static void handle_band(struct wiphy *wiphy, enum ieee80211_band band)
 		handle_channel(wiphy, band, i);
 }
 
-static bool ignore_reg_update(struct wiphy *wiphy, enum reg_set_by setby)
+static bool ignore_reg_update(struct wiphy *wiphy,
+			      enum nl80211_reg_initiator initiator)
 {
 	if (!last_request)
 		return true;
-	if (setby == REGDOM_SET_BY_CORE &&
+	if (initiator == NL80211_REGDOM_SET_BY_CORE &&
 		  wiphy->custom_regulatory)
 		return true;
 	/*
@@ -1028,12 +1031,12 @@ static bool ignore_reg_update(struct wiphy *wiphy, enum reg_set_by setby)
 	return false;
 }
 
-static void update_all_wiphy_regulatory(enum reg_set_by setby)
+static void update_all_wiphy_regulatory(enum nl80211_reg_initiator initiator)
 {
 	struct cfg80211_registered_device *drv;
 
 	list_for_each_entry(drv, &cfg80211_drv_list, list)
-		wiphy_update_regulatory(&drv->wiphy, setby);
+		wiphy_update_regulatory(&drv->wiphy, initiator);
 }
 
 static void handle_reg_beacon(struct wiphy *wiphy,
@@ -1124,7 +1127,7 @@ static bool reg_is_world_roaming(struct wiphy *wiphy)
 	if (is_world_regdom(cfg80211_regdomain->alpha2) ||
 	    (wiphy->regd && is_world_regdom(wiphy->regd->alpha2)))
 		return true;
-	if (last_request->initiator != REGDOM_SET_BY_COUNTRY_IE &&
+	if (last_request->initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE &&
 	    wiphy->custom_regulatory)
 		return true;
 	return false;
@@ -1138,11 +1141,12 @@ static void reg_process_beacons(struct wiphy *wiphy)
 	wiphy_update_beacon_reg(wiphy);
 }
 
-void wiphy_update_regulatory(struct wiphy *wiphy, enum reg_set_by setby)
+void wiphy_update_regulatory(struct wiphy *wiphy,
+			     enum nl80211_reg_initiator initiator)
 {
 	enum ieee80211_band band;
 
-	if (ignore_reg_update(wiphy, setby))
+	if (ignore_reg_update(wiphy, initiator))
 		goto out;
 	for (band = 0; band < IEEE80211_NUM_BANDS; band++) {
 		if (wiphy->bands[band])
@@ -1255,15 +1259,16 @@ static int ignore_request(struct wiphy *wiphy,
 		return 0;
 
 	switch (pending_request->initiator) {
-	case REGDOM_SET_BY_CORE:
+	case NL80211_REGDOM_SET_BY_CORE:
 		return -EINVAL;
-	case REGDOM_SET_BY_COUNTRY_IE:
+	case NL80211_REGDOM_SET_BY_COUNTRY_IE:
 
 		last_wiphy = wiphy_idx_to_wiphy(last_request->wiphy_idx);
 
 		if (unlikely(!is_an_alpha2(pending_request->alpha2)))
 			return -EINVAL;
-		if (last_request->initiator == REGDOM_SET_BY_COUNTRY_IE) {
+		if (last_request->initiator ==
+		    NL80211_REGDOM_SET_BY_COUNTRY_IE) {
 			if (last_wiphy != wiphy) {
 				/*
 				 * Two cards with two APs claiming different
@@ -1284,8 +1289,8 @@ static int ignore_request(struct wiphy *wiphy,
 			return -EALREADY;
 		}
 		return REG_INTERSECT;
-	case REGDOM_SET_BY_DRIVER:
-		if (last_request->initiator == REGDOM_SET_BY_CORE) {
+	case NL80211_REGDOM_SET_BY_DRIVER:
+		if (last_request->initiator == NL80211_REGDOM_SET_BY_CORE) {
 			if (is_old_static_regdom(cfg80211_regdomain))
 				return 0;
 			if (regdom_changes(pending_request->alpha2))
@@ -1298,28 +1303,28 @@ static int ignore_request(struct wiphy *wiphy,
 		 * back in or if you add a new device for which the previously
 		 * loaded card also agrees on the regulatory domain.
 		 */
-		if (last_request->initiator == REGDOM_SET_BY_DRIVER &&
+		if (last_request->initiator == NL80211_REGDOM_SET_BY_DRIVER &&
 		    !regdom_changes(pending_request->alpha2))
 			return -EALREADY;
 
 		return REG_INTERSECT;
-	case REGDOM_SET_BY_USER:
-		if (last_request->initiator == REGDOM_SET_BY_COUNTRY_IE)
+	case NL80211_REGDOM_SET_BY_USER:
+		if (last_request->initiator == NL80211_REGDOM_SET_BY_COUNTRY_IE)
 			return REG_INTERSECT;
 		/*
 		 * If the user knows better the user should set the regdom
 		 * to their country before the IE is picked up
 		 */
-		if (last_request->initiator == REGDOM_SET_BY_USER &&
+		if (last_request->initiator == NL80211_REGDOM_SET_BY_USER &&
 			  last_request->intersect)
 			return -EOPNOTSUPP;
 		/*
 		 * Process user requests only after previous user/driver/core
 		 * requests have been processed
 		 */
-		if (last_request->initiator == REGDOM_SET_BY_CORE ||
-		    last_request->initiator == REGDOM_SET_BY_DRIVER ||
-		    last_request->initiator == REGDOM_SET_BY_USER) {
+		if (last_request->initiator == NL80211_REGDOM_SET_BY_CORE ||
+		    last_request->initiator == NL80211_REGDOM_SET_BY_DRIVER ||
+		    last_request->initiator == NL80211_REGDOM_SET_BY_USER) {
 			if (regdom_changes(last_request->alpha2))
 				return -EAGAIN;
 		}
@@ -1359,7 +1364,8 @@ static int __regulatory_hint(struct wiphy *wiphy,
 	r = ignore_request(wiphy, pending_request);
 
 	if (r == REG_INTERSECT) {
-		if (pending_request->initiator == REGDOM_SET_BY_DRIVER) {
+		if (pending_request->initiator ==
+		    NL80211_REGDOM_SET_BY_DRIVER) {
 			r = reg_copy_regd(&wiphy->regd, cfg80211_regdomain);
 			if (r) {
 				kfree(pending_request);
@@ -1374,7 +1380,8 @@ static int __regulatory_hint(struct wiphy *wiphy,
 		 * wiphy
 		 */
 		if (r == -EALREADY &&
-		    pending_request->initiator == REGDOM_SET_BY_DRIVER) {
+		    pending_request->initiator ==
+		    NL80211_REGDOM_SET_BY_DRIVER) {
 			r = reg_copy_regd(&wiphy->regd, cfg80211_regdomain);
 			if (r) {
 				kfree(pending_request);
@@ -1425,7 +1432,7 @@ static void reg_process_hint(struct regulatory_request *reg_request)
 	if (wiphy_idx_valid(reg_request->wiphy_idx))
 		wiphy = wiphy_idx_to_wiphy(reg_request->wiphy_idx);
 
-	if (reg_request->initiator == REGDOM_SET_BY_DRIVER &&
+	if (reg_request->initiator == NL80211_REGDOM_SET_BY_DRIVER &&
 	    !wiphy) {
 		kfree(reg_request);
 		goto out;
@@ -1439,7 +1446,7 @@ out:
 	mutex_unlock(&cfg80211_mutex);
 }
 
-/* Processes regulatory hints, this is all the REGDOM_SET_BY_* */
+/* Processes regulatory hints, this is all the NL80211_REGDOM_SET_BY_* */
 static void reg_process_pending_hints(void)
 	{
 	struct regulatory_request *reg_request;
@@ -1523,7 +1530,7 @@ static int regulatory_hint_core(const char *alpha2)
 
 	request->alpha2[0] = alpha2[0];
 	request->alpha2[1] = alpha2[1];
-	request->initiator = REGDOM_SET_BY_CORE;
+	request->initiator = NL80211_REGDOM_SET_BY_CORE;
 
 	queue_regulatory_request(request);
 
@@ -1544,7 +1551,7 @@ int regulatory_hint_user(const char *alpha2)
 	request->wiphy_idx = WIPHY_IDX_STALE;
 	request->alpha2[0] = alpha2[0];
 	request->alpha2[1] = alpha2[1];
-	request->initiator = REGDOM_SET_BY_USER,
+	request->initiator = NL80211_REGDOM_SET_BY_USER,
 
 	queue_regulatory_request(request);
 
@@ -1570,7 +1577,7 @@ int regulatory_hint(struct wiphy *wiphy, const char *alpha2)
 
 	request->alpha2[0] = alpha2[0];
 	request->alpha2[1] = alpha2[1];
-	request->initiator = REGDOM_SET_BY_DRIVER;
+	request->initiator = NL80211_REGDOM_SET_BY_DRIVER;
 
 	queue_regulatory_request(request);
 
@@ -1719,7 +1726,7 @@ void regulatory_hint_11d(struct wiphy *wiphy,
 	request->wiphy_idx = get_wiphy_idx(wiphy);
 	request->alpha2[0] = rd->alpha2[0];
 	request->alpha2[1] = rd->alpha2[1];
-	request->initiator = REGDOM_SET_BY_COUNTRY_IE;
+	request->initiator = NL80211_REGDOM_SET_BY_COUNTRY_IE;
 	request->country_ie_checksum = checksum;
 	request->country_ie_env = env;
 
@@ -1827,7 +1834,8 @@ static void print_regdomain(const struct ieee80211_regdomain *rd)
 
 	if (is_intersected_alpha2(rd->alpha2)) {
 
-		if (last_request->initiator == REGDOM_SET_BY_COUNTRY_IE) {
+		if (last_request->initiator ==
+		    NL80211_REGDOM_SET_BY_COUNTRY_IE) {
 			struct cfg80211_registered_device *drv;
 			drv = cfg80211_drv_by_wiphy_idx(
 				last_request->wiphy_idx);
@@ -1919,7 +1927,7 @@ static int __set_regdom(const struct ieee80211_regdomain *rd)
 	 * rd is non static (it means CRDA was present and was used last)
 	 * and the pending request came in from a country IE
 	 */
-	if (last_request->initiator != REGDOM_SET_BY_COUNTRY_IE) {
+	if (last_request->initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE) {
 		/*
 		 * If someone else asked us to change the rd lets only bother
 		 * checking if the alpha2 changes if CRDA was already called
@@ -1951,7 +1959,7 @@ static int __set_regdom(const struct ieee80211_regdomain *rd)
 	if (!last_request->intersect) {
 		int r;
 
-		if (last_request->initiator != REGDOM_SET_BY_DRIVER) {
+		if (last_request->initiator != NL80211_REGDOM_SET_BY_DRIVER) {
 			reset_regdomains();
 			cfg80211_regdomain = rd;
 			return 0;
@@ -1975,7 +1983,7 @@ static int __set_regdom(const struct ieee80211_regdomain *rd)
 
 	/* Intersection requires a bit more work */
 
-	if (last_request->initiator != REGDOM_SET_BY_COUNTRY_IE) {
+	if (last_request->initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE) {
 
 		intersected_rd = regdom_intersect(rd, cfg80211_regdomain);
 		if (!intersected_rd)
@@ -1986,7 +1994,7 @@ static int __set_regdom(const struct ieee80211_regdomain *rd)
 		 * However if a driver requested this specific regulatory
 		 * domain we keep it for its private use
 		 */
-		if (last_request->initiator == REGDOM_SET_BY_DRIVER)
+		if (last_request->initiator == NL80211_REGDOM_SET_BY_DRIVER)
 			request_wiphy->regd = rd;
 		else
 			kfree(rd);
-- 
cgit v1.2.3


From 73d54c9e74c4d8ee8a41bc516f481f0f754eca32 Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <lrodriguez@atheros.com>
Date: Mon, 9 Mar 2009 22:07:42 -0400
Subject: cfg80211: add regulatory netlink multicast group

This allows us to send to userspace "regulatory" events.
For now we just send an event when we change regulatory domains.
We also notify userspace when devices are using their own custom
world roaming regulatory domains.

Signed-off-by: Luis R. Rodriguez <lrodriguez@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h | 48 ++++++++++++++++++++++++++++++++++++++
 net/wireless/core.c     | 11 +++++++++
 net/wireless/nl80211.c  | 62 +++++++++++++++++++++++++++++++++++++++++++++++++
 net/wireless/nl80211.h  |  5 ++++
 net/wireless/reg.c      | 13 ++++++++++-
 5 files changed, 138 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index c0fd432b57dc..f33aa08dd9b3 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -150,6 +150,17 @@
  * @NL80211_CMD_SCAN_ABORTED: scan was aborted, for unspecified reasons,
  *	partial scan results may be available
  *
+ * @NL80211_CMD_REG_CHANGE: indicates to userspace the regulatory domain
+ * 	has been changed and provides details of the request information
+ * 	that caused the change such as who initiated the regulatory request
+ * 	(%NL80211_ATTR_REG_INITIATOR), the wiphy_idx
+ * 	(%NL80211_ATTR_REG_ALPHA2) on which the request was made from if
+ * 	the initiator was %NL80211_REGDOM_SET_BY_COUNTRY_IE or
+ * 	%NL80211_REGDOM_SET_BY_DRIVER, the type of regulatory domain
+ * 	set (%NL80211_ATTR_REG_TYPE), if the type of regulatory domain is
+ * 	%NL80211_REG_TYPE_COUNTRY the alpha2 to which we have moved on
+ * 	to (%NL80211_ATTR_REG_ALPHA2).
+ *
  * @NL80211_CMD_MAX: highest used command number
  * @__NL80211_CMD_AFTER_LAST: internal use
  */
@@ -204,6 +215,8 @@ enum nl80211_commands {
 	NL80211_CMD_NEW_SCAN_RESULTS,
 	NL80211_CMD_SCAN_ABORTED,
 
+	NL80211_CMD_REG_CHANGE,
+
 	/* add new commands above here */
 
 	/* used to define NL80211_CMD_MAX below */
@@ -218,6 +231,8 @@ enum nl80211_commands {
 #define NL80211_CMD_SET_BSS NL80211_CMD_SET_BSS
 #define NL80211_CMD_SET_MGMT_EXTRA_IE NL80211_CMD_SET_MGMT_EXTRA_IE
 
+#define NL80211_CMD_REG_CHANGE NL80211_CMD_REG_CHANGE
+
 /**
  * enum nl80211_attrs - nl80211 netlink attributes
  *
@@ -329,6 +344,11 @@ enum nl80211_commands {
  *	messages carried the same generation number)
  * @NL80211_ATTR_BSS: scan result BSS
  *
+ * @NL80211_ATTR_REG_INITIATOR: indicates who requested the regulatory domain
+ * 	currently in effect. This could be any of the %NL80211_REGDOM_SET_BY_*
+ * @NL80211_ATTR_REG_TYPE: indicates the type of the regulatory domain currently
+ * 	set. This can be one of the nl80211_reg_type (%NL80211_REGDOM_TYPE_*)
+ *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
  */
@@ -403,6 +423,9 @@ enum nl80211_attrs {
 	NL80211_ATTR_SCAN_GENERATION,
 	NL80211_ATTR_BSS,
 
+	NL80211_ATTR_REG_INITIATOR,
+	NL80211_ATTR_REG_TYPE,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -420,6 +443,8 @@ enum nl80211_attrs {
 #define NL80211_ATTR_WIPHY_CHANNEL_TYPE NL80211_ATTR_WIPHY_CHANNEL_TYPE
 #define NL80211_ATTR_MGMT_SUBTYPE NL80211_ATTR_MGMT_SUBTYPE
 #define NL80211_ATTR_IE NL80211_ATTR_IE
+#define NL80211_ATTR_REG_INITIATOR NL80211_ATTR_REG_INITIATOR
+#define NL80211_ATTR_REG_TYPE NL80211_ATTR_REG_TYPE
 
 #define NL80211_MAX_SUPP_RATES			32
 #define NL80211_MAX_SUPP_REG_RULES		32
@@ -691,6 +716,29 @@ enum nl80211_reg_initiator {
 	NL80211_REGDOM_SET_BY_COUNTRY_IE,
 };
 
+/**
+ * enum nl80211_reg_type - specifies the type of regulatory domain
+ * @NL80211_REGDOM_TYPE_COUNTRY: the regulatory domain set is one that pertains
+ *	to a specific country. When this is set you can count on the
+ *	ISO / IEC 3166 alpha2 country code being valid.
+ * @NL80211_REGDOM_TYPE_WORLD: the regulatory set domain is the world regulatory
+ * 	domain.
+ * @NL80211_REGDOM_TYPE_CUSTOM_WORLD: the regulatory domain set is a custom
+ * 	driver specific world regulatory domain. These do not apply system-wide
+ * 	and are only applicable to the individual devices which have requested
+ * 	them to be applied.
+ * @NL80211_REGDOM_TYPE_INTERSECTION: the regulatory domain set is the product
+ *	of an intersection between two regulatory domains -- the previously
+ *	set regulatory domain on the system and the last accepted regulatory
+ *	domain request to be processed.
+ */
+enum nl80211_reg_type {
+	NL80211_REGDOM_TYPE_COUNTRY,
+	NL80211_REGDOM_TYPE_WORLD,
+	NL80211_REGDOM_TYPE_CUSTOM_WORLD,
+	NL80211_REGDOM_TYPE_INTERSECTION,
+};
+
 /**
  * enum nl80211_reg_rule_attr - regulatory rule attributes
  * @NL80211_ATTR_REG_RULE_FLAGS: a set of flags which specify additional
diff --git a/net/wireless/core.c b/net/wireless/core.c
index c939f5ee065e..17fe39049740 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -365,6 +365,17 @@ int wiphy_register(struct wiphy *wiphy)
 	if (IS_ERR(drv->wiphy.debugfsdir))
 		drv->wiphy.debugfsdir = NULL;
 
+	if (wiphy->custom_regulatory) {
+		struct regulatory_request request;
+
+		request.wiphy_idx = get_wiphy_idx(wiphy);
+		request.initiator = NL80211_REGDOM_SET_BY_DRIVER;
+		request.alpha2[0] = '9';
+		request.alpha2[1] = '9';
+
+		nl80211_send_reg_change_event(&request);
+	}
+
 	res = 0;
 out_unlock:
 	mutex_unlock(&cfg80211_mutex);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 531bb67cf502..8ac3d26014a8 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -2739,6 +2739,9 @@ static struct genl_multicast_group nl80211_config_mcgrp = {
 static struct genl_multicast_group nl80211_scan_mcgrp = {
 	.name = "scan",
 };
+static struct genl_multicast_group nl80211_regulatory_mcgrp = {
+	.name = "regulatory",
+};
 
 /* notification functions */
 
@@ -2818,6 +2821,61 @@ void nl80211_send_scan_aborted(struct cfg80211_registered_device *rdev,
 	genlmsg_multicast(msg, 0, nl80211_scan_mcgrp.id, GFP_KERNEL);
 }
 
+/*
+ * This can happen on global regulatory changes or device specific settings
+ * based on custom world regulatory domains.
+ */
+void nl80211_send_reg_change_event(struct regulatory_request *request)
+{
+	struct sk_buff *msg;
+	void *hdr;
+
+	msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!msg)
+		return;
+
+	hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_REG_CHANGE);
+	if (!hdr) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	/* Userspace can always count this one always being set */
+	NLA_PUT_U8(msg, NL80211_ATTR_REG_INITIATOR, request->initiator);
+
+	if (request->alpha2[0] == '0' && request->alpha2[1] == '0')
+		NLA_PUT_U8(msg, NL80211_ATTR_REG_TYPE,
+			   NL80211_REGDOM_TYPE_WORLD);
+	else if (request->alpha2[0] == '9' && request->alpha2[1] == '9')
+		NLA_PUT_U8(msg, NL80211_ATTR_REG_TYPE,
+			   NL80211_REGDOM_TYPE_CUSTOM_WORLD);
+	else if ((request->alpha2[0] == '9' && request->alpha2[1] == '8') ||
+		 request->intersect)
+		NLA_PUT_U8(msg, NL80211_ATTR_REG_TYPE,
+			   NL80211_REGDOM_TYPE_INTERSECTION);
+	else {
+		NLA_PUT_U8(msg, NL80211_ATTR_REG_TYPE,
+			   NL80211_REGDOM_TYPE_COUNTRY);
+		NLA_PUT_STRING(msg, NL80211_ATTR_REG_ALPHA2, request->alpha2);
+	}
+
+	if (wiphy_idx_valid(request->wiphy_idx))
+		NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, request->wiphy_idx);
+
+	if (genlmsg_end(msg, hdr) < 0) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	genlmsg_multicast(msg, 0, nl80211_regulatory_mcgrp.id, GFP_KERNEL);
+
+	return;
+
+nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	nlmsg_free(msg);
+}
+
 /* initialisation/exit functions */
 
 int nl80211_init(void)
@@ -2842,6 +2900,10 @@ int nl80211_init(void)
 	if (err)
 		goto err_out;
 
+	err = genl_register_mc_group(&nl80211_fam, &nl80211_regulatory_mcgrp);
+	if (err)
+		goto err_out;
+
 	return 0;
  err_out:
 	genl_unregister_family(&nl80211_fam);
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index 69787b621365..e65a3c38c52f 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h
@@ -11,6 +11,7 @@ extern void nl80211_send_scan_done(struct cfg80211_registered_device *rdev,
 				   struct net_device *netdev);
 extern void nl80211_send_scan_aborted(struct cfg80211_registered_device *rdev,
 				      struct net_device *netdev);
+extern void nl80211_send_reg_change_event(struct regulatory_request *request);
 #else
 static inline int nl80211_init(void)
 {
@@ -31,6 +32,10 @@ static inline void nl80211_send_scan_aborted(
 					struct cfg80211_registered_device *rdev,
 					struct net_device *netdev)
 {}
+static inline void
+nl80211_send_reg_change_event(struct regulatory_request *request)
+{
+}
 #endif /* CONFIG_NL80211 */
 
 #endif /* __NET_WIRELESS_NL80211_H */
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 68fde6d33dc3..eb8b8ed16155 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -41,6 +41,7 @@
 #include <net/cfg80211.h>
 #include "core.h"
 #include "reg.h"
+#include "nl80211.h"
 
 /* Receipt of information from last regulatory request */
 static struct regulatory_request *last_request;
@@ -1403,8 +1404,16 @@ new_request:
 	pending_request = NULL;
 
 	/* When r == REG_INTERSECT we do need to call CRDA */
-	if (r < 0)
+	if (r < 0) {
+		/*
+		 * Since CRDA will not be called in this case as we already
+		 * have applied the requested regulatory domain before we just
+		 * inform userspace we have processed the request
+		 */
+		if (r == -EALREADY)
+			nl80211_send_reg_change_event(last_request);
 		return r;
+	}
 
 	/*
 	 * Note: When CONFIG_WIRELESS_OLD_REGULATORY is enabled
@@ -2084,6 +2093,8 @@ int set_regdom(const struct ieee80211_regdomain *rd)
 
 	print_regdomain(cfg80211_regdomain);
 
+	nl80211_send_reg_change_event(last_request);
+
 	return r;
 }
 
-- 
cgit v1.2.3


From 27bf91d6a0d5a9c7224e8687754249bba67dd4cf Mon Sep 17 00:00:00 2001
From: Yevgeny Petrilin <yevgenyp@mellanox.co.il>
Date: Wed, 18 Mar 2009 19:45:11 -0700
Subject: mlx4_core: Add link type autosensing

When a port's link is down (except to driver restart) and the port is
configured for auto sensing, we try to sense port link type (Ethernet
or InfiniBand) in order to determine how to initialize the port.  If
the port type needs to be changed, all mlx4 for the device interfaces
are unregistered and then registered again with the new port
types.  Sensing is done with intervals of 3 seconds.

Signed-off-by: Yevgeny Petrilin <yevgenyp@mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/net/mlx4/Makefile   |   2 +-
 drivers/net/mlx4/catas.c    |  16 +----
 drivers/net/mlx4/eq.c       |  16 +++--
 drivers/net/mlx4/main.c     | 104 +++++++++++++++++++++--------
 drivers/net/mlx4/mlx4.h     |  27 +++++++-
 drivers/net/mlx4/sense.c    | 156 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/mlx4/cmd.h    |   1 +
 include/linux/mlx4/device.h |   6 +-
 8 files changed, 277 insertions(+), 51 deletions(-)
 create mode 100644 drivers/net/mlx4/sense.c

(limited to 'include/linux')

diff --git a/drivers/net/mlx4/Makefile b/drivers/net/mlx4/Makefile
index a7a97bf998f8..21040a0d81fe 100644
--- a/drivers/net/mlx4/Makefile
+++ b/drivers/net/mlx4/Makefile
@@ -1,7 +1,7 @@
 obj-$(CONFIG_MLX4_CORE)		+= mlx4_core.o
 
 mlx4_core-y :=	alloc.o catas.o cmd.o cq.o eq.o fw.o icm.o intf.o main.o mcg.o \
-		mr.o pd.o port.o profile.o qp.o reset.o srq.o
+		mr.o pd.o port.o profile.o qp.o reset.o sense.o srq.o
 
 obj-$(CONFIG_MLX4_EN)               += mlx4_en.o
 
diff --git a/drivers/net/mlx4/catas.c b/drivers/net/mlx4/catas.c
index f094ee00c416..aa9674b7f19c 100644
--- a/drivers/net/mlx4/catas.c
+++ b/drivers/net/mlx4/catas.c
@@ -42,7 +42,6 @@ enum {
 static DEFINE_SPINLOCK(catas_lock);
 
 static LIST_HEAD(catas_list);
-static struct workqueue_struct *catas_wq;
 static struct work_struct catas_work;
 
 static int internal_err_reset = 1;
@@ -77,7 +76,7 @@ static void poll_catas(unsigned long dev_ptr)
 			list_add(&priv->catas_err.list, &catas_list);
 			spin_unlock(&catas_lock);
 
-			queue_work(catas_wq, &catas_work);
+			queue_work(mlx4_wq, &catas_work);
 		}
 	} else
 		mod_timer(&priv->catas_err.timer,
@@ -146,18 +145,7 @@ void mlx4_stop_catas_poll(struct mlx4_dev *dev)
 	spin_unlock_irq(&catas_lock);
 }
 
-int __init mlx4_catas_init(void)
+void  __init mlx4_catas_init(void)
 {
 	INIT_WORK(&catas_work, catas_reset);
-
-	catas_wq = create_singlethread_workqueue("mlx4_err");
-	if (!catas_wq)
-		return -ENOMEM;
-
-	return 0;
-}
-
-void mlx4_catas_cleanup(void)
-{
-	destroy_workqueue(catas_wq);
 }
diff --git a/drivers/net/mlx4/eq.c b/drivers/net/mlx4/eq.c
index 2c19bff7cbab..8830dcb92ec8 100644
--- a/drivers/net/mlx4/eq.c
+++ b/drivers/net/mlx4/eq.c
@@ -163,6 +163,7 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq)
 	int cqn;
 	int eqes_found = 0;
 	int set_ci = 0;
+	int port;
 
 	while ((eqe = next_eqe_sw(eq))) {
 		/*
@@ -203,11 +204,16 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq)
 			break;
 
 		case MLX4_EVENT_TYPE_PORT_CHANGE:
-			mlx4_dispatch_event(dev,
-					    eqe->subtype == MLX4_PORT_CHANGE_SUBTYPE_ACTIVE ?
-					    MLX4_DEV_EVENT_PORT_UP :
-					    MLX4_DEV_EVENT_PORT_DOWN,
-					    be32_to_cpu(eqe->event.port_change.port) >> 28);
+			port = be32_to_cpu(eqe->event.port_change.port) >> 28;
+			if (eqe->subtype == MLX4_PORT_CHANGE_SUBTYPE_DOWN) {
+				mlx4_dispatch_event(dev, MLX4_DEV_EVENT_PORT_DOWN,
+						    port);
+				mlx4_priv(dev)->sense.do_sense_port[port] = 1;
+			} else {
+				mlx4_dispatch_event(dev, MLX4_DEV_EVENT_PORT_UP,
+						    port);
+				mlx4_priv(dev)->sense.do_sense_port[port] = 0;
+			}
 			break;
 
 		case MLX4_EVENT_TYPE_CQ_ERROR:
diff --git a/drivers/net/mlx4/main.c b/drivers/net/mlx4/main.c
index 8480f0346844..a66f5b2fd288 100644
--- a/drivers/net/mlx4/main.c
+++ b/drivers/net/mlx4/main.c
@@ -51,6 +51,8 @@ MODULE_DESCRIPTION("Mellanox ConnectX HCA low-level driver");
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_VERSION(DRV_VERSION);
 
+struct workqueue_struct *mlx4_wq;
+
 #ifdef CONFIG_MLX4_DEBUG
 
 int mlx4_debug_level = 0;
@@ -98,24 +100,23 @@ module_param_named(use_prio, use_prio, bool, 0444);
 MODULE_PARM_DESC(use_prio, "Enable steering by VLAN priority on ETH ports "
 		  "(0/1, default 0)");
 
-static int mlx4_check_port_params(struct mlx4_dev *dev,
-				  enum mlx4_port_type *port_type)
+int mlx4_check_port_params(struct mlx4_dev *dev,
+			   enum mlx4_port_type *port_type)
 {
 	int i;
 
 	for (i = 0; i < dev->caps.num_ports - 1; i++) {
-		if (port_type[i] != port_type[i+1] &&
-		    !(dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP)) {
-			mlx4_err(dev, "Only same port types supported "
-				 "on this HCA, aborting.\n");
-			return -EINVAL;
+		if (port_type[i] != port_type[i + 1]) {
+			if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP)) {
+				mlx4_err(dev, "Only same port types supported "
+					 "on this HCA, aborting.\n");
+				return -EINVAL;
+			}
+			if (port_type[i] == MLX4_PORT_TYPE_ETH &&
+			    port_type[i + 1] == MLX4_PORT_TYPE_IB)
+				return -EINVAL;
 		}
 	}
-	if ((port_type[0] == MLX4_PORT_TYPE_ETH) &&
-	    (port_type[1] == MLX4_PORT_TYPE_IB)) {
-		mlx4_err(dev, "eth-ib configuration is not supported.\n");
-		return -EINVAL;
-	}
 
 	for (i = 0; i < dev->caps.num_ports; i++) {
 		if (!(port_type[i] & dev->caps.supported_type[i+1])) {
@@ -225,6 +226,9 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 			dev->caps.port_type[i] = MLX4_PORT_TYPE_IB;
 		else
 			dev->caps.port_type[i] = MLX4_PORT_TYPE_ETH;
+		dev->caps.possible_type[i] = dev->caps.port_type[i];
+		mlx4_priv(dev)->sense.sense_allowed[i] =
+			dev->caps.supported_type[i] == MLX4_PORT_TYPE_AUTO;
 
 		if (dev->caps.log_num_macs > dev_cap->log_max_macs[i]) {
 			dev->caps.log_num_macs = dev_cap->log_max_macs[i];
@@ -263,14 +267,16 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
  * Change the port configuration of the device.
  * Every user of this function must hold the port mutex.
  */
-static int mlx4_change_port_types(struct mlx4_dev *dev,
-				  enum mlx4_port_type *port_types)
+int mlx4_change_port_types(struct mlx4_dev *dev,
+			   enum mlx4_port_type *port_types)
 {
 	int err = 0;
 	int change = 0;
 	int port;
 
 	for (port = 0; port <  dev->caps.num_ports; port++) {
+		/* Change the port type only if the new type is different
+		 * from the current, and not set to Auto */
 		if (port_types[port] != dev->caps.port_type[port + 1]) {
 			change = 1;
 			dev->caps.port_type[port + 1] = port_types[port];
@@ -302,10 +308,17 @@ static ssize_t show_port_type(struct device *dev,
 	struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info,
 						   port_attr);
 	struct mlx4_dev *mdev = info->dev;
+	char type[8];
+
+	sprintf(type, "%s",
+		(mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_IB) ?
+		"ib" : "eth");
+	if (mdev->caps.possible_type[info->port] == MLX4_PORT_TYPE_AUTO)
+		sprintf(buf, "auto (%s)\n", type);
+	else
+		sprintf(buf, "%s\n", type);
 
-	return sprintf(buf, "%s\n",
-		       mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_IB ?
-		       "ib" : "eth");
+	return strlen(buf);
 }
 
 static ssize_t set_port_type(struct device *dev,
@@ -317,6 +330,7 @@ static ssize_t set_port_type(struct device *dev,
 	struct mlx4_dev *mdev = info->dev;
 	struct mlx4_priv *priv = mlx4_priv(mdev);
 	enum mlx4_port_type types[MLX4_MAX_PORTS];
+	enum mlx4_port_type new_types[MLX4_MAX_PORTS];
 	int i;
 	int err = 0;
 
@@ -324,26 +338,56 @@ static ssize_t set_port_type(struct device *dev,
 		info->tmp_type = MLX4_PORT_TYPE_IB;
 	else if (!strcmp(buf, "eth\n"))
 		info->tmp_type = MLX4_PORT_TYPE_ETH;
+	else if (!strcmp(buf, "auto\n"))
+		info->tmp_type = MLX4_PORT_TYPE_AUTO;
 	else {
 		mlx4_err(mdev, "%s is not supported port type\n", buf);
 		return -EINVAL;
 	}
 
+	mlx4_stop_sense(mdev);
 	mutex_lock(&priv->port_mutex);
-	for (i = 0; i < mdev->caps.num_ports; i++)
+	/* Possible type is always the one that was delivered */
+	mdev->caps.possible_type[info->port] = info->tmp_type;
+
+	for (i = 0; i < mdev->caps.num_ports; i++) {
 		types[i] = priv->port[i+1].tmp_type ? priv->port[i+1].tmp_type :
-					mdev->caps.port_type[i+1];
+					mdev->caps.possible_type[i+1];
+		if (types[i] == MLX4_PORT_TYPE_AUTO)
+			types[i] = mdev->caps.port_type[i+1];
+	}
 
-	err = mlx4_check_port_params(mdev, types);
+	if (!(mdev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP)) {
+		for (i = 1; i <= mdev->caps.num_ports; i++) {
+			if (mdev->caps.possible_type[i] == MLX4_PORT_TYPE_AUTO) {
+				mdev->caps.possible_type[i] = mdev->caps.port_type[i];
+				err = -EINVAL;
+			}
+		}
+	}
+	if (err) {
+		mlx4_err(mdev, "Auto sensing is not supported on this HCA. "
+			       "Set only 'eth' or 'ib' for both ports "
+			       "(should be the same)\n");
+		goto out;
+	}
+
+	mlx4_do_sense_ports(mdev, new_types, types);
+
+	err = mlx4_check_port_params(mdev, new_types);
 	if (err)
 		goto out;
 
-	for (i = 1; i <= mdev->caps.num_ports; i++)
-		priv->port[i].tmp_type = 0;
+	/* We are about to apply the changes after the configuration
+	 * was verified, no need to remember the temporary types
+	 * any more */
+	for (i = 0; i < mdev->caps.num_ports; i++)
+		priv->port[i + 1].tmp_type = 0;
 
-	err = mlx4_change_port_types(mdev, types);
+	err = mlx4_change_port_types(mdev, new_types);
 
 out:
+	mlx4_start_sense(mdev);
 	mutex_unlock(&priv->port_mutex);
 	return err ? err : count;
 }
@@ -1117,6 +1161,9 @@ static int __mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
 	if (err)
 		goto err_port;
 
+	mlx4_sense_init(dev);
+	mlx4_start_sense(dev);
+
 	pci_set_drvdata(pdev, dev);
 
 	return 0;
@@ -1182,6 +1229,7 @@ static void mlx4_remove_one(struct pci_dev *pdev)
 	int p;
 
 	if (dev) {
+		mlx4_stop_sense(dev);
 		mlx4_unregister_device(dev);
 
 		for (p = 1; p <= dev->caps.num_ports; p++) {
@@ -1266,9 +1314,11 @@ static int __init mlx4_init(void)
 	if (mlx4_verify_params())
 		return -EINVAL;
 
-	ret = mlx4_catas_init();
-	if (ret)
-		return ret;
+	mlx4_catas_init();
+
+	mlx4_wq = create_singlethread_workqueue("mlx4");
+	if (!mlx4_wq)
+		return -ENOMEM;
 
 	ret = pci_register_driver(&mlx4_driver);
 	return ret < 0 ? ret : 0;
@@ -1277,7 +1327,7 @@ static int __init mlx4_init(void)
 static void __exit mlx4_cleanup(void)
 {
 	pci_unregister_driver(&mlx4_driver);
-	mlx4_catas_cleanup();
+	destroy_workqueue(mlx4_wq);
 }
 
 module_init(mlx4_init);
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index e0213bad61c7..5bd79c2b184f 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -40,6 +40,7 @@
 #include <linux/mutex.h>
 #include <linux/radix-tree.h>
 #include <linux/timer.h>
+#include <linux/workqueue.h>
 
 #include <linux/mlx4/device.h>
 #include <linux/mlx4/driver.h>
@@ -276,6 +277,13 @@ struct mlx4_port_info {
 	struct mlx4_vlan_table	vlan_table;
 };
 
+struct mlx4_sense {
+	struct mlx4_dev		*dev;
+	u8			do_sense_port[MLX4_MAX_PORTS + 1];
+	u8			sense_allowed[MLX4_MAX_PORTS + 1];
+	struct delayed_work	sense_poll;
+};
+
 struct mlx4_priv {
 	struct mlx4_dev		dev;
 
@@ -305,6 +313,7 @@ struct mlx4_priv {
 	struct mlx4_uar		driver_uar;
 	void __iomem	       *kar;
 	struct mlx4_port_info	port[MLX4_MAX_PORTS + 1];
+	struct mlx4_sense       sense;
 	struct mutex		port_mutex;
 };
 
@@ -313,6 +322,10 @@ static inline struct mlx4_priv *mlx4_priv(struct mlx4_dev *dev)
 	return container_of(dev, struct mlx4_priv, dev);
 }
 
+#define MLX4_SENSE_RANGE	(HZ * 3)
+
+extern struct workqueue_struct *mlx4_wq;
+
 u32 mlx4_bitmap_alloc(struct mlx4_bitmap *bitmap);
 void mlx4_bitmap_free(struct mlx4_bitmap *bitmap, u32 obj);
 u32 mlx4_bitmap_alloc_range(struct mlx4_bitmap *bitmap, int cnt, int align);
@@ -346,8 +359,7 @@ void mlx4_cleanup_mcg_table(struct mlx4_dev *dev);
 
 void mlx4_start_catas_poll(struct mlx4_dev *dev);
 void mlx4_stop_catas_poll(struct mlx4_dev *dev);
-int mlx4_catas_init(void);
-void mlx4_catas_cleanup(void);
+void mlx4_catas_init(void);
 int mlx4_restart_one(struct pci_dev *pdev);
 int mlx4_register_device(struct mlx4_dev *dev);
 void mlx4_unregister_device(struct mlx4_dev *dev);
@@ -379,6 +391,17 @@ void mlx4_srq_event(struct mlx4_dev *dev, u32 srqn, int event_type);
 
 void mlx4_handle_catas_err(struct mlx4_dev *dev);
 
+void mlx4_do_sense_ports(struct mlx4_dev *dev,
+			 enum mlx4_port_type *stype,
+			 enum mlx4_port_type *defaults);
+void mlx4_start_sense(struct mlx4_dev *dev);
+void mlx4_stop_sense(struct mlx4_dev *dev);
+void mlx4_sense_init(struct mlx4_dev *dev);
+int mlx4_check_port_params(struct mlx4_dev *dev,
+			   enum mlx4_port_type *port_type);
+int mlx4_change_port_types(struct mlx4_dev *dev,
+			   enum mlx4_port_type *port_types);
+
 void mlx4_init_mac_table(struct mlx4_dev *dev, struct mlx4_mac_table *table);
 void mlx4_init_vlan_table(struct mlx4_dev *dev, struct mlx4_vlan_table *table);
 
diff --git a/drivers/net/mlx4/sense.c b/drivers/net/mlx4/sense.c
new file mode 100644
index 000000000000..6d5089ecb5af
--- /dev/null
+++ b/drivers/net/mlx4/sense.c
@@ -0,0 +1,156 @@
+/*
+ * Copyright (c) 2007 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/errno.h>
+#include <linux/if_ether.h>
+
+#include <linux/mlx4/cmd.h>
+
+#include "mlx4.h"
+
+static int mlx4_SENSE_PORT(struct mlx4_dev *dev, int port,
+			   enum mlx4_port_type *type)
+{
+	u64 out_param;
+	int err = 0;
+
+	err = mlx4_cmd_imm(dev, 0, &out_param, port, 0,
+			   MLX4_CMD_SENSE_PORT, MLX4_CMD_TIME_CLASS_B);
+	if (err) {
+		mlx4_err(dev, "Sense command failed for port: %d\n", port);
+		return err;
+	}
+
+	if (out_param > 2) {
+		mlx4_err(dev, "Sense returned illegal value: 0x%llx\n", out_param);
+		return EINVAL;
+	}
+
+	*type = out_param;
+	return 0;
+}
+
+void mlx4_do_sense_ports(struct mlx4_dev *dev,
+			 enum mlx4_port_type *stype,
+			 enum mlx4_port_type *defaults)
+{
+	struct mlx4_sense *sense = &mlx4_priv(dev)->sense;
+	int err;
+	int i;
+
+	for (i = 1; i <= dev->caps.num_ports; i++) {
+		stype[i - 1] = 0;
+		if (sense->do_sense_port[i] && sense->sense_allowed[i] &&
+		    dev->caps.possible_type[i] == MLX4_PORT_TYPE_AUTO) {
+			err = mlx4_SENSE_PORT(dev, i, &stype[i - 1]);
+			if (err)
+				stype[i - 1] = defaults[i - 1];
+		} else
+			stype[i - 1] = defaults[i - 1];
+	}
+
+	/*
+	 * Adjust port configuration:
+	 * If port 1 sensed nothing and port 2 is IB, set both as IB
+	 * If port 2 sensed nothing and port 1 is Eth, set both as Eth
+	 */
+	if (stype[0] == MLX4_PORT_TYPE_ETH) {
+		for (i = 1; i < dev->caps.num_ports; i++)
+			stype[i] = stype[i] ? stype[i] : MLX4_PORT_TYPE_ETH;
+	}
+	if (stype[dev->caps.num_ports - 1] == MLX4_PORT_TYPE_IB) {
+		for (i = 0; i < dev->caps.num_ports - 1; i++)
+			stype[i] = stype[i] ? stype[i] : MLX4_PORT_TYPE_IB;
+	}
+
+	/*
+	 * If sensed nothing, remain in current configuration.
+	 */
+	for (i = 0; i < dev->caps.num_ports; i++)
+		stype[i] = stype[i] ? stype[i] : defaults[i];
+
+}
+
+static void mlx4_sense_port(struct work_struct *work)
+{
+	struct delayed_work *delay = container_of(work, struct delayed_work, work);
+	struct mlx4_sense *sense = container_of(delay, struct mlx4_sense,
+						sense_poll);
+	struct mlx4_dev *dev = sense->dev;
+	struct mlx4_priv *priv = mlx4_priv(dev);
+	enum mlx4_port_type stype[MLX4_MAX_PORTS];
+
+	mutex_lock(&priv->port_mutex);
+	mlx4_do_sense_ports(dev, stype, &dev->caps.port_type[1]);
+
+	if (mlx4_check_port_params(dev, stype))
+		goto sense_again;
+
+	if (mlx4_change_port_types(dev, stype))
+		mlx4_err(dev, "Failed to change port_types\n");
+
+sense_again:
+	mutex_unlock(&priv->port_mutex);
+	queue_delayed_work(mlx4_wq , &sense->sense_poll,
+			   round_jiffies_relative(MLX4_SENSE_RANGE));
+}
+
+void mlx4_start_sense(struct mlx4_dev *dev)
+{
+	struct mlx4_priv *priv = mlx4_priv(dev);
+	struct mlx4_sense *sense = &priv->sense;
+
+	if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP))
+		return;
+
+	queue_delayed_work(mlx4_wq , &sense->sense_poll,
+			   round_jiffies_relative(MLX4_SENSE_RANGE));
+}
+
+void mlx4_stop_sense(struct mlx4_dev *dev)
+{
+	cancel_delayed_work_sync(&mlx4_priv(dev)->sense.sense_poll);
+}
+
+void  mlx4_sense_init(struct mlx4_dev *dev)
+{
+	struct mlx4_priv *priv = mlx4_priv(dev);
+	struct mlx4_sense *sense = &priv->sense;
+	int port;
+
+	sense->dev = dev;
+	for (port = 1; port <= dev->caps.num_ports; port++)
+		sense->do_sense_port[port] = 1;
+
+	INIT_DELAYED_WORK_DEFERRABLE(&sense->sense_poll, mlx4_sense_port);
+}
diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h
index cf9c679ab38b..0f82293a82ed 100644
--- a/include/linux/mlx4/cmd.h
+++ b/include/linux/mlx4/cmd.h
@@ -55,6 +55,7 @@ enum {
 	MLX4_CMD_CLOSE_PORT	 = 0xa,
 	MLX4_CMD_QUERY_HCA	 = 0xb,
 	MLX4_CMD_QUERY_PORT	 = 0x43,
+	MLX4_CMD_SENSE_PORT	 = 0x4d,
 	MLX4_CMD_SET_PORT	 = 0xc,
 	MLX4_CMD_ACCESS_DDR	 = 0x2e,
 	MLX4_CMD_MAP_ICM	 = 0xffa,
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 8f659cc29960..3aff8a6a389e 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -155,8 +155,9 @@ enum mlx4_qp_region {
 };
 
 enum mlx4_port_type {
-	MLX4_PORT_TYPE_IB	= 1 << 0,
-	MLX4_PORT_TYPE_ETH	= 1 << 1,
+	MLX4_PORT_TYPE_IB	= 1,
+	MLX4_PORT_TYPE_ETH	= 2,
+	MLX4_PORT_TYPE_AUTO	= 3
 };
 
 enum mlx4_special_vlan_idx {
@@ -237,6 +238,7 @@ struct mlx4_caps {
 	enum mlx4_port_type	port_type[MLX4_MAX_PORTS + 1];
 	u8			supported_type[MLX4_MAX_PORTS + 1];
 	u32			port_mask;
+	enum mlx4_port_type	possible_type[MLX4_MAX_PORTS + 1];
 };
 
 struct mlx4_buf_list {
-- 
cgit v1.2.3


From 2e1ab634bf013792d8803ec57c7a428a76f50028 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Thu, 19 Mar 2009 23:49:41 -0700
Subject: rtnetlink: add new value for DHCP added routes

To improve manageability, it would be good to be able to disambiguate routes
added by administrator from those added by DHCP client.  The only necessary
kernel change is to add value to rtnetlink include file so iproute2 utility
can use it.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rtnetlink.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 35a07c830f79..ba3254ecf7fb 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -217,6 +217,7 @@ enum
 #define RTPROT_DNROUTED	13	/* DECnet routing daemon */
 #define RTPROT_XORP	14	/* XORP */
 #define RTPROT_NTK	15	/* Netsukuku */
+#define RTPROT_DHCP	16      /* DHCP client */
 
 /* rtm_scope
 
-- 
cgit v1.2.3


From 5e140dfc1fe87eae27846f193086724806b33c7d Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Fri, 20 Mar 2009 01:33:32 -0700
Subject: net: reorder struct Qdisc for better SMP performance

dev_queue_xmit() needs to dirty fields "state", "q", "bstats" and "qstats"

On x86_64 arch, they currently span three cache lines, involving more
cache line ping pongs than necessary, making longer holding of queue spinlock.

We can reduce this to one cache line, by grouping all read-mostly fields
at the beginning of structure. (Or should I say, all highly modified fields
at the end :) )

Before patch :

offsetof(struct Qdisc, state)=0x38
offsetof(struct Qdisc, q)=0x48
offsetof(struct Qdisc, bstats)=0x80
offsetof(struct Qdisc, qstats)=0x90
sizeof(struct Qdisc)=0xc8

After patch :

offsetof(struct Qdisc, state)=0x80
offsetof(struct Qdisc, q)=0x88
offsetof(struct Qdisc, bstats)=0xa0
offsetof(struct Qdisc, qstats)=0xac
sizeof(struct Qdisc)=0xc0

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/gen_stats.h |  2 +-
 include/net/sch_generic.h | 21 ++++++++++++---------
 2 files changed, 13 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gen_stats.h b/include/linux/gen_stats.h
index 13f4e74609ac..0ffa41df0ee8 100644
--- a/include/linux/gen_stats.h
+++ b/include/linux/gen_stats.h
@@ -22,7 +22,7 @@ struct gnet_stats_basic
 {
 	__u64	bytes;
 	__u32	packets;
-};
+} __attribute__ ((packed));
 
 /**
  * struct gnet_stats_rate_est - rate estimator
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 3d78a4d22460..964ffa0d8815 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -49,18 +49,10 @@ struct Qdisc
 	int			padded;
 	struct Qdisc_ops	*ops;
 	struct qdisc_size_table	*stab;
+	struct list_head	list;
 	u32			handle;
 	u32			parent;
 	atomic_t		refcnt;
-	unsigned long		state;
-	struct sk_buff		*gso_skb;
-	struct sk_buff_head	q;
-	struct netdev_queue	*dev_queue;
-	struct Qdisc		*next_sched;
-	struct list_head	list;
-
-	struct gnet_stats_basic	bstats;
-	struct gnet_stats_queue	qstats;
 	struct gnet_stats_rate_est	rate_est;
 	int			(*reshape_fail)(struct sk_buff *skb,
 					struct Qdisc *q);
@@ -71,6 +63,17 @@ struct Qdisc
 	 * and it will live until better solution will be invented.
 	 */
 	struct Qdisc		*__parent;
+	struct netdev_queue	*dev_queue;
+	struct Qdisc		*next_sched;
+
+	struct sk_buff		*gso_skb;
+	/*
+	 * For performance sake on SMP, we put highly modified fields at the end
+	 */
+	unsigned long		state;
+	struct sk_buff_head	q;
+	struct gnet_stats_basic bstats;
+	struct gnet_stats_queue	qstats;
 };
 
 struct Qdisc_class_ops
-- 
cgit v1.2.3


From 9247744e5eaa29aecee5342a0c8694187a6aadcd Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Sat, 21 Mar 2009 13:39:26 -0700
Subject: skb: expose and constify hash primitives

Some minor changes to queue hashing:
 1. Use const on accessor functions
 2. Export skb_tx_hash for use in drivers (see ixgbe)

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 9 ++++++---
 net/core/dev.c         | 3 ++-
 2 files changed, 8 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 1fbab2ae613c..bb1981fd60f3 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1969,7 +1969,7 @@ static inline void skb_set_queue_mapping(struct sk_buff *skb, u16 queue_mapping)
 	skb->queue_mapping = queue_mapping;
 }
 
-static inline u16 skb_get_queue_mapping(struct sk_buff *skb)
+static inline u16 skb_get_queue_mapping(const struct sk_buff *skb)
 {
 	return skb->queue_mapping;
 }
@@ -1984,16 +1984,19 @@ static inline void skb_record_rx_queue(struct sk_buff *skb, u16 rx_queue)
 	skb->queue_mapping = rx_queue + 1;
 }
 
-static inline u16 skb_get_rx_queue(struct sk_buff *skb)
+static inline u16 skb_get_rx_queue(const struct sk_buff *skb)
 {
 	return skb->queue_mapping - 1;
 }
 
-static inline bool skb_rx_queue_recorded(struct sk_buff *skb)
+static inline bool skb_rx_queue_recorded(const struct sk_buff *skb)
 {
 	return (skb->queue_mapping != 0);
 }
 
+extern u16 skb_tx_hash(const struct net_device *dev,
+		       const struct sk_buff *skb);
+
 #ifdef CONFIG_XFRM
 static inline struct sec_path *skb_sec_path(struct sk_buff *skb)
 {
diff --git a/net/core/dev.c b/net/core/dev.c
index ca212acd3348..fdb9973b82a6 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1725,7 +1725,7 @@ out_kfree_skb:
 
 static u32 skb_tx_hashrnd;
 
-static u16 skb_tx_hash(struct net_device *dev, struct sk_buff *skb)
+u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb)
 {
 	u32 hash;
 
@@ -1740,6 +1740,7 @@ static u16 skb_tx_hash(struct net_device *dev, struct sk_buff *skb)
 
 	return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32);
 }
+EXPORT_SYMBOL(skb_tx_hash);
 
 static struct netdev_queue *dev_pick_tx(struct net_device *dev,
 					struct sk_buff *skb)
-- 
cgit v1.2.3


From 777baa4711c6b8373f4e03a3a558d44a6b046d7a Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Fri, 20 Mar 2009 19:35:54 +0000
Subject: usbnet: support net_device_ops

Use net_device_ops for usbnet device, and export for use
by other derived drivers.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Acked-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/usbnet.c   | 31 +++++++++++++++++++++++--------
 include/linux/usb/usbnet.h |  5 +++++
 2 files changed, 28 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index 084141692245..659654f45880 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -223,7 +223,7 @@ EXPORT_SYMBOL_GPL(usbnet_skb_return);
  *
  *-------------------------------------------------------------------------*/
 
-static int usbnet_change_mtu (struct net_device *net, int new_mtu)
+int usbnet_change_mtu (struct net_device *net, int new_mtu)
 {
 	struct usbnet	*dev = netdev_priv(net);
 	int		ll_mtu = new_mtu + net->hard_header_len;
@@ -246,6 +246,7 @@ static int usbnet_change_mtu (struct net_device *net, int new_mtu)
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(usbnet_change_mtu);
 
 /*-------------------------------------------------------------------------*/
 
@@ -540,7 +541,7 @@ EXPORT_SYMBOL_GPL(usbnet_unlink_rx_urbs);
 
 // precondition: never called in_interrupt
 
-static int usbnet_stop (struct net_device *net)
+int usbnet_stop (struct net_device *net)
 {
 	struct usbnet		*dev = netdev_priv(net);
 	int			temp;
@@ -584,6 +585,7 @@ static int usbnet_stop (struct net_device *net)
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(usbnet_stop);
 
 /*-------------------------------------------------------------------------*/
 
@@ -591,7 +593,7 @@ static int usbnet_stop (struct net_device *net)
 
 // precondition: never called in_interrupt
 
-static int usbnet_open (struct net_device *net)
+int usbnet_open (struct net_device *net)
 {
 	struct usbnet		*dev = netdev_priv(net);
 	int			retval;
@@ -666,6 +668,7 @@ done:
 done_nopm:
 	return retval;
 }
+EXPORT_SYMBOL_GPL(usbnet_open);
 
 /*-------------------------------------------------------------------------*/
 
@@ -900,7 +903,7 @@ static void tx_complete (struct urb *urb)
 
 /*-------------------------------------------------------------------------*/
 
-static void usbnet_tx_timeout (struct net_device *net)
+void usbnet_tx_timeout (struct net_device *net)
 {
 	struct usbnet		*dev = netdev_priv(net);
 
@@ -909,10 +912,11 @@ static void usbnet_tx_timeout (struct net_device *net)
 
 	// FIXME: device recovery -- reset?
 }
+EXPORT_SYMBOL_GPL(usbnet_tx_timeout);
 
 /*-------------------------------------------------------------------------*/
 
-static int usbnet_start_xmit (struct sk_buff *skb, struct net_device *net)
+int usbnet_start_xmit (struct sk_buff *skb, struct net_device *net)
 {
 	struct usbnet		*dev = netdev_priv(net);
 	int			length;
@@ -995,7 +999,7 @@ drop:
 	}
 	return retval;
 }
-
+EXPORT_SYMBOL_GPL(usbnet_start_xmit);
 
 /*-------------------------------------------------------------------------*/
 
@@ -1102,6 +1106,15 @@ void usbnet_disconnect (struct usb_interface *intf)
 }
 EXPORT_SYMBOL_GPL(usbnet_disconnect);
 
+static const struct net_device_ops usbnet_netdev_ops = {
+	.ndo_open		= usbnet_open,
+	.ndo_stop		= usbnet_stop,
+	.ndo_start_xmit		= usbnet_start_xmit,
+	.ndo_tx_timeout		= usbnet_tx_timeout,
+	.ndo_change_mtu		= usbnet_change_mtu,
+	.ndo_set_mac_address 	= eth_mac_addr,
+	.ndo_validate_addr	= eth_validate_addr,
+};
 
 /*-------------------------------------------------------------------------*/
 
@@ -1171,12 +1184,14 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod)
 		net->features |= NETIF_F_HIGHDMA;
 #endif
 
-	net->change_mtu = usbnet_change_mtu;
+	net->netdev_ops = &usbnet_netdev_ops;
+#ifdef CONFIG_COMPAT_NET_DEV_OPS
 	net->hard_start_xmit = usbnet_start_xmit;
 	net->open = usbnet_open;
 	net->stop = usbnet_stop;
-	net->watchdog_timeo = TX_TIMEOUT_JIFFIES;
 	net->tx_timeout = usbnet_tx_timeout;
+#endif
+	net->watchdog_timeo = TX_TIMEOUT_JIFFIES;
 	net->ethtool_ops = &usbnet_ethtool_ops;
 
 	// allow device-specific bind/init procedures
diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h
index 7d3822243074..36fabb95c7d3 100644
--- a/include/linux/usb/usbnet.h
+++ b/include/linux/usb/usbnet.h
@@ -176,6 +176,11 @@ struct skb_data {	/* skb->cb is one of these */
 	size_t			length;
 };
 
+extern int usbnet_open (struct net_device *net);
+extern int usbnet_stop (struct net_device *net);
+extern int usbnet_start_xmit (struct sk_buff *skb, struct net_device *net);
+extern void usbnet_tx_timeout (struct net_device *net);
+extern int usbnet_change_mtu (struct net_device *net, int new_mtu);
 
 extern int usbnet_get_endpoints(struct usbnet *, struct usb_interface *);
 extern void usbnet_defer_kevent (struct usbnet *, int);
-- 
cgit v1.2.3


From dd5b6ce6fd465eab90357711c8e8124dc3a31ff0 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Mon, 23 Mar 2009 13:21:06 +0100
Subject: nefilter: nfnetlink: add nfnetlink_set_err and use it in ctnetlink

This patch adds nfnetlink_set_err() to propagate the error to netlink
broadcast listener in case of memory allocation errors in the
message building.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/nfnetlink.h  | 1 +
 net/netfilter/nf_conntrack_netlink.c | 2 ++
 net/netfilter/nfnetlink.c            | 6 ++++++
 net/netlink/af_netlink.c             | 1 +
 4 files changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h
index 7d8e0455ccac..135e5cfe68a2 100644
--- a/include/linux/netfilter/nfnetlink.h
+++ b/include/linux/netfilter/nfnetlink.h
@@ -76,6 +76,7 @@ extern int nfnetlink_subsys_unregister(const struct nfnetlink_subsystem *n);
 extern int nfnetlink_has_listeners(unsigned int group);
 extern int nfnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, 
 			  int echo);
+extern void nfnetlink_set_err(u32 pid, u32 group, int error);
 extern int nfnetlink_unicast(struct sk_buff *skb, u_int32_t pid, int flags);
 
 extern void nfnl_lock(void);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index d1fe9d15ac5c..1b75c9efb0eb 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -518,6 +518,7 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
 nla_put_failure:
 	rcu_read_unlock();
 nlmsg_failure:
+	nfnetlink_set_err(0, group, -ENOBUFS);
 	kfree_skb(skb);
 	return NOTIFY_DONE;
 }
@@ -1514,6 +1515,7 @@ static int ctnetlink_expect_event(struct notifier_block *this,
 nla_put_failure:
 	rcu_read_unlock();
 nlmsg_failure:
+	nfnetlink_set_err(0, 0, -ENOBUFS);
 	kfree_skb(skb);
 	return NOTIFY_DONE;
 }
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 9c0ba17a1ddb..2785d66a7e38 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -113,6 +113,12 @@ int nfnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo)
 }
 EXPORT_SYMBOL_GPL(nfnetlink_send);
 
+void nfnetlink_set_err(u32 pid, u32 group, int error)
+{
+	netlink_set_err(nfnl, pid, group, error);
+}
+EXPORT_SYMBOL_GPL(nfnetlink_set_err);
+
 int nfnetlink_unicast(struct sk_buff *skb, u_int32_t pid, int flags)
 {
 	return netlink_unicast(nfnl, skb, pid, flags);
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 6ee69c27f806..5b33879c6422 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1106,6 +1106,7 @@ void netlink_set_err(struct sock *ssk, u32 pid, u32 group, int code)
 
 	read_unlock(&nl_table_lock);
 }
+EXPORT_SYMBOL(netlink_set_err);
 
 /* must be called with netlink table grabbed */
 static void netlink_update_socket_mc(struct netlink_sock *nlk,
-- 
cgit v1.2.3


From d0bfb940ecabf0b44fb1fd80d8d60594e569e5ec Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@siemens.com>
Date: Mon, 15 Dec 2008 13:52:10 +0100
Subject: KVM: New guest debug interface

This rips out the support for KVM_DEBUG_GUEST and introduces a new IOCTL
instead: KVM_SET_GUEST_DEBUG. The IOCTL payload consists of a generic
part, controlling the "main switch" and the single-step feature. The
arch specific part adds an x86 interface for intercepting both types of
debug exceptions separately and re-injecting them when the host was not
interested. Moveover, the foundation for guest debugging via debug
registers is layed.

To signal breakpoint events properly back to userland, an arch-specific
data block is now returned along KVM_EXIT_DEBUG. For x86, the arch block
contains the PC, the debug exception, and relevant debug registers to
tell debug events properly apart.

The availability of this new interface is signaled by
KVM_CAP_SET_GUEST_DEBUG. Empty stubs for not yet supported archs are
provided.

Note that both SVM and VTX are supported, but only the latter was tested
yet. Based on the experience with all those VTX corner case, I would be
fairly surprised if SVM will work out of the box.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/ia64/include/asm/kvm.h     |  7 ++++
 arch/ia64/kvm/kvm-ia64.c        |  4 +-
 arch/powerpc/include/asm/kvm.h  |  7 ++++
 arch/powerpc/kvm/powerpc.c      |  4 +-
 arch/s390/include/asm/kvm.h     |  7 ++++
 arch/s390/kvm/kvm-s390.c        |  4 +-
 arch/x86/include/asm/kvm.h      | 18 ++++++++
 arch/x86/include/asm/kvm_host.h |  9 +---
 arch/x86/kvm/svm.c              | 50 +++++++++++++++++++++-
 arch/x86/kvm/vmx.c              | 93 ++++++++++++++++-------------------------
 arch/x86/kvm/x86.c              | 14 ++++---
 include/linux/kvm.h             | 51 +++++++++++++++-------
 include/linux/kvm_host.h        |  6 +--
 virt/kvm/kvm_main.c             |  6 +--
 14 files changed, 179 insertions(+), 101 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/include/asm/kvm.h b/arch/ia64/include/asm/kvm.h
index bfa86b6af7cd..be3fdb891214 100644
--- a/arch/ia64/include/asm/kvm.h
+++ b/arch/ia64/include/asm/kvm.h
@@ -214,4 +214,11 @@ struct kvm_sregs {
 struct kvm_fpu {
 };
 
+struct kvm_debug_exit_arch {
+};
+
+/* for KVM_SET_GUEST_DEBUG */
+struct kvm_guest_debug_arch {
+};
+
 #endif
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 28f982045f29..de47467a0e61 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -1303,8 +1303,8 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 	return -EINVAL;
 }
 
-int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
-		struct kvm_debug_guest *dbg)
+int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
+					struct kvm_guest_debug *dbg)
 {
 	return -EINVAL;
 }
diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h
index f993e4198d5c..755f1b1948c5 100644
--- a/arch/powerpc/include/asm/kvm.h
+++ b/arch/powerpc/include/asm/kvm.h
@@ -52,4 +52,11 @@ struct kvm_fpu {
 	__u64 fpr[32];
 };
 
+struct kvm_debug_exit_arch {
+};
+
+/* for KVM_SET_GUEST_DEBUG */
+struct kvm_guest_debug_arch {
+};
+
 #endif /* __LINUX_KVM_POWERPC_H */
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 5f81256287f5..7c2ad4017d6a 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -240,8 +240,8 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 	kvmppc_core_vcpu_put(vcpu);
 }
 
-int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
-                                    struct kvm_debug_guest *dbg)
+int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
+					struct kvm_guest_debug *dbg)
 {
 	int i;
 
diff --git a/arch/s390/include/asm/kvm.h b/arch/s390/include/asm/kvm.h
index e1f54654e3ae..0b2f829f6d50 100644
--- a/arch/s390/include/asm/kvm.h
+++ b/arch/s390/include/asm/kvm.h
@@ -42,4 +42,11 @@ struct kvm_fpu {
 	__u64 fprs[16];
 };
 
+struct kvm_debug_exit_arch {
+};
+
+/* for KVM_SET_GUEST_DEBUG */
+struct kvm_guest_debug_arch {
+};
+
 #endif
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 0d33893e1e89..cbfe91e10120 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -422,8 +422,8 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
 	return -EINVAL; /* not implemented yet */
 }
 
-int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
-				    struct kvm_debug_guest *dbg)
+int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
+					struct kvm_guest_debug *dbg)
 {
 	return -EINVAL; /* not implemented yet */
 }
diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h
index 886c9402ec45..32eb96c7ca27 100644
--- a/arch/x86/include/asm/kvm.h
+++ b/arch/x86/include/asm/kvm.h
@@ -212,6 +212,24 @@ struct kvm_pit_channel_state {
 	__s64 count_load_time;
 };
 
+struct kvm_debug_exit_arch {
+	__u32 exception;
+	__u32 pad;
+	__u64 pc;
+	__u64 dr6;
+	__u64 dr7;
+};
+
+#define KVM_GUESTDBG_USE_SW_BP		0x00010000
+#define KVM_GUESTDBG_USE_HW_BP		0x00020000
+#define KVM_GUESTDBG_INJECT_DB		0x00040000
+#define KVM_GUESTDBG_INJECT_BP		0x00080000
+
+/* for KVM_SET_GUEST_DEBUG */
+struct kvm_guest_debug_arch {
+	__u64 debugreg[8];
+};
+
 struct kvm_pit_state {
 	struct kvm_pit_channel_state channels[3];
 };
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 53779309514a..c430cd580ee2 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -135,12 +135,6 @@ enum {
 
 #define KVM_NR_MEM_OBJS 40
 
-struct kvm_guest_debug {
-	int enabled;
-	unsigned long bp[4];
-	int singlestep;
-};
-
 /*
  * We don't want allocation failures within the mmu code, so we preallocate
  * enough memory for a single page fault in a cache.
@@ -448,8 +442,7 @@ struct kvm_x86_ops {
 	void (*vcpu_put)(struct kvm_vcpu *vcpu);
 
 	int (*set_guest_debug)(struct kvm_vcpu *vcpu,
-			       struct kvm_debug_guest *dbg);
-	void (*guest_debug_pre)(struct kvm_vcpu *vcpu);
+			       struct kvm_guest_debug *dbg);
 	int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata);
 	int (*set_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 data);
 	u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 0fbbde54ecae..88d9062f4545 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -968,9 +968,32 @@ static void svm_set_segment(struct kvm_vcpu *vcpu,
 
 }
 
-static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg)
+static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
 {
-	return -EOPNOTSUPP;
+	int old_debug = vcpu->guest_debug;
+	struct vcpu_svm *svm = to_svm(vcpu);
+
+	vcpu->guest_debug = dbg->control;
+
+	svm->vmcb->control.intercept_exceptions &=
+		~((1 << DB_VECTOR) | (1 << BP_VECTOR));
+	if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) {
+		if (vcpu->guest_debug &
+		    (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
+			svm->vmcb->control.intercept_exceptions |=
+				1 << DB_VECTOR;
+		if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
+			svm->vmcb->control.intercept_exceptions |=
+				1 << BP_VECTOR;
+	} else
+		vcpu->guest_debug = 0;
+
+	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
+		svm->vmcb->save.rflags |= X86_EFLAGS_TF | X86_EFLAGS_RF;
+	else if (old_debug & KVM_GUESTDBG_SINGLESTEP)
+		svm->vmcb->save.rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
+
+	return 0;
 }
 
 static int svm_get_irq(struct kvm_vcpu *vcpu)
@@ -1094,6 +1117,27 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 	return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code);
 }
 
+static int db_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+{
+	if (!(svm->vcpu.guest_debug &
+	      (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) {
+		kvm_queue_exception(&svm->vcpu, DB_VECTOR);
+		return 1;
+	}
+	kvm_run->exit_reason = KVM_EXIT_DEBUG;
+	kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip;
+	kvm_run->debug.arch.exception = DB_VECTOR;
+	return 0;
+}
+
+static int bp_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+{
+	kvm_run->exit_reason = KVM_EXIT_DEBUG;
+	kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip;
+	kvm_run->debug.arch.exception = BP_VECTOR;
+	return 0;
+}
+
 static int ud_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 {
 	int er;
@@ -2050,6 +2094,8 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
 	[SVM_EXIT_WRITE_DR3]			= emulate_on_interception,
 	[SVM_EXIT_WRITE_DR5]			= emulate_on_interception,
 	[SVM_EXIT_WRITE_DR7]			= emulate_on_interception,
+	[SVM_EXIT_EXCP_BASE + DB_VECTOR]	= db_interception,
+	[SVM_EXIT_EXCP_BASE + BP_VECTOR]	= bp_interception,
 	[SVM_EXIT_EXCP_BASE + UD_VECTOR]	= ud_interception,
 	[SVM_EXIT_EXCP_BASE + PF_VECTOR] 	= pf_interception,
 	[SVM_EXIT_EXCP_BASE + NM_VECTOR] 	= nm_interception,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 1d974c1eaa7d..f55690ddb3ac 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -480,8 +480,13 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
 	eb = (1u << PF_VECTOR) | (1u << UD_VECTOR);
 	if (!vcpu->fpu_active)
 		eb |= 1u << NM_VECTOR;
-	if (vcpu->guest_debug.enabled)
-		eb |= 1u << DB_VECTOR;
+	if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) {
+		if (vcpu->guest_debug &
+		    (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
+			eb |= 1u << DB_VECTOR;
+		if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
+			eb |= 1u << BP_VECTOR;
+	}
 	if (vcpu->arch.rmode.active)
 		eb = ~0;
 	if (vm_need_ept())
@@ -1003,40 +1008,23 @@ static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
 	}
 }
 
-static int set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg)
+static int set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
 {
-	unsigned long dr7 = 0x400;
-	int old_singlestep;
-
-	old_singlestep = vcpu->guest_debug.singlestep;
-
-	vcpu->guest_debug.enabled = dbg->enabled;
-	if (vcpu->guest_debug.enabled) {
-		int i;
-
-		dr7 |= 0x200;  /* exact */
-		for (i = 0; i < 4; ++i) {
-			if (!dbg->breakpoints[i].enabled)
-				continue;
-			vcpu->guest_debug.bp[i] = dbg->breakpoints[i].address;
-			dr7 |= 2 << (i*2);    /* global enable */
-			dr7 |= 0 << (i*4+16); /* execution breakpoint */
-		}
-
-		vcpu->guest_debug.singlestep = dbg->singlestep;
-	} else
-		vcpu->guest_debug.singlestep = 0;
+	int old_debug = vcpu->guest_debug;
+	unsigned long flags;
 
-	if (old_singlestep && !vcpu->guest_debug.singlestep) {
-		unsigned long flags;
+	vcpu->guest_debug = dbg->control;
+	if (!(vcpu->guest_debug & KVM_GUESTDBG_ENABLE))
+		vcpu->guest_debug = 0;
 
-		flags = vmcs_readl(GUEST_RFLAGS);
+	flags = vmcs_readl(GUEST_RFLAGS);
+	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
+		flags |= X86_EFLAGS_TF | X86_EFLAGS_RF;
+	else if (old_debug & KVM_GUESTDBG_SINGLESTEP)
 		flags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
-		vmcs_writel(GUEST_RFLAGS, flags);
-	}
+	vmcs_writel(GUEST_RFLAGS, flags);
 
 	update_exception_bitmap(vcpu);
-	vmcs_writel(GUEST_DR7, dr7);
 
 	return 0;
 }
@@ -2540,24 +2528,6 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
 	return 0;
 }
 
-static void kvm_guest_debug_pre(struct kvm_vcpu *vcpu)
-{
-	struct kvm_guest_debug *dbg = &vcpu->guest_debug;
-
-	set_debugreg(dbg->bp[0], 0);
-	set_debugreg(dbg->bp[1], 1);
-	set_debugreg(dbg->bp[2], 2);
-	set_debugreg(dbg->bp[3], 3);
-
-	if (dbg->singlestep) {
-		unsigned long flags;
-
-		flags = vmcs_readl(GUEST_RFLAGS);
-		flags |= X86_EFLAGS_TF | X86_EFLAGS_RF;
-		vmcs_writel(GUEST_RFLAGS, flags);
-	}
-}
-
 static int handle_rmode_exception(struct kvm_vcpu *vcpu,
 				  int vec, u32 err_code)
 {
@@ -2574,9 +2544,17 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu,
 	 *        the required debugging infrastructure rework.
 	 */
 	switch (vec) {
-	case DE_VECTOR:
 	case DB_VECTOR:
+		if (vcpu->guest_debug &
+		    (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
+			return 0;
+		kvm_queue_exception(vcpu, vec);
+		return 1;
 	case BP_VECTOR:
+		if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
+			return 0;
+		/* fall through */
+	case DE_VECTOR:
 	case OF_VECTOR:
 	case BR_VECTOR:
 	case UD_VECTOR:
@@ -2593,7 +2571,7 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu,
 static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
-	u32 intr_info, error_code;
+	u32 intr_info, ex_no, error_code;
 	unsigned long cr2, rip;
 	u32 vect_info;
 	enum emulation_result er;
@@ -2653,14 +2631,16 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 		return 1;
 	}
 
-	if ((intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK)) ==
-	    (INTR_TYPE_HARD_EXCEPTION | 1)) {
+	ex_no = intr_info & INTR_INFO_VECTOR_MASK;
+	if (ex_no == DB_VECTOR || ex_no == BP_VECTOR) {
 		kvm_run->exit_reason = KVM_EXIT_DEBUG;
-		return 0;
+		kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip;
+		kvm_run->debug.arch.exception = ex_no;
+	} else {
+		kvm_run->exit_reason = KVM_EXIT_EXCEPTION;
+		kvm_run->ex.exception = ex_no;
+		kvm_run->ex.error_code = error_code;
 	}
-	kvm_run->exit_reason = KVM_EXIT_EXCEPTION;
-	kvm_run->ex.exception = intr_info & INTR_INFO_VECTOR_MASK;
-	kvm_run->ex.error_code = error_code;
 	return 0;
 }
 
@@ -3600,7 +3580,6 @@ static struct kvm_x86_ops vmx_x86_ops = {
 	.vcpu_put = vmx_vcpu_put,
 
 	.set_guest_debug = set_guest_debug,
-	.guest_debug_pre = kvm_guest_debug_pre,
 	.get_msr = vmx_get_msr,
 	.set_msr = vmx_set_msr,
 	.get_segment_base = vmx_get_segment_base,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index b5e9932e0f62..e990d164b56d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3005,9 +3005,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 		goto out;
 	}
 
-	if (vcpu->guest_debug.enabled)
-		kvm_x86_ops->guest_debug_pre(vcpu);
-
 	vcpu->guest_mode = 1;
 	/*
 	 * Make sure that guest_mode assignment won't happen after
@@ -3218,7 +3215,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 	/*
 	 * Don't leak debug flags in case they were set for guest debugging
 	 */
-	if (vcpu->guest_debug.enabled && vcpu->guest_debug.singlestep)
+	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
 		regs->rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
 
 	vcpu_put(vcpu);
@@ -3837,8 +3834,8 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 	return 0;
 }
 
-int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
-				    struct kvm_debug_guest *dbg)
+int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
+					struct kvm_guest_debug *dbg)
 {
 	int r;
 
@@ -3846,6 +3843,11 @@ int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
 
 	r = kvm_x86_ops->set_guest_debug(vcpu, dbg);
 
+	if (dbg->control & KVM_GUESTDBG_INJECT_DB)
+		kvm_queue_exception(vcpu, DB_VECTOR);
+	else if (dbg->control & KVM_GUESTDBG_INJECT_BP)
+		kvm_queue_exception(vcpu, BP_VECTOR);
+
 	vcpu_put(vcpu);
 
 	return r;
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 0424326f1679..429a2ce202f9 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -126,6 +126,7 @@ struct kvm_run {
 			__u64 data_offset; /* relative to kvm_run start */
 		} io;
 		struct {
+			struct kvm_debug_exit_arch arch;
 		} debug;
 		/* KVM_EXIT_MMIO */
 		struct {
@@ -217,21 +218,6 @@ struct kvm_interrupt {
 	__u32 irq;
 };
 
-struct kvm_breakpoint {
-	__u32 enabled;
-	__u32 padding;
-	__u64 address;
-};
-
-/* for KVM_DEBUG_GUEST */
-struct kvm_debug_guest {
-	/* int */
-	__u32 enabled;
-	__u32 pad;
-	struct kvm_breakpoint breakpoints[4];
-	__u32 singlestep;
-};
-
 /* for KVM_GET_DIRTY_LOG */
 struct kvm_dirty_log {
 	__u32 slot;
@@ -292,6 +278,17 @@ struct kvm_s390_interrupt {
 	__u64 parm64;
 };
 
+/* for KVM_SET_GUEST_DEBUG */
+
+#define KVM_GUESTDBG_ENABLE		0x00000001
+#define KVM_GUESTDBG_SINGLESTEP		0x00000002
+
+struct kvm_guest_debug {
+	__u32 control;
+	__u32 pad;
+	struct kvm_guest_debug_arch arch;
+};
+
 #define KVM_TRC_SHIFT           16
 /*
  * kvm trace categories
@@ -396,6 +393,7 @@ struct kvm_trace_rec {
 #ifdef __KVM_HAVE_USER_NMI
 #define KVM_CAP_USER_NMI 22
 #endif
+#define KVM_CAP_SET_GUEST_DEBUG 23
 
 /*
  * ioctls for VM fds
@@ -440,7 +438,8 @@ struct kvm_trace_rec {
 #define KVM_SET_SREGS             _IOW(KVMIO,  0x84, struct kvm_sregs)
 #define KVM_TRANSLATE             _IOWR(KVMIO, 0x85, struct kvm_translation)
 #define KVM_INTERRUPT             _IOW(KVMIO,  0x86, struct kvm_interrupt)
-#define KVM_DEBUG_GUEST           _IOW(KVMIO,  0x87, struct kvm_debug_guest)
+/* KVM_DEBUG_GUEST is no longer supported, use KVM_SET_GUEST_DEBUG instead */
+#define KVM_DEBUG_GUEST           __KVM_DEPRECATED_DEBUG_GUEST
 #define KVM_GET_MSRS              _IOWR(KVMIO, 0x88, struct kvm_msrs)
 #define KVM_SET_MSRS              _IOW(KVMIO,  0x89, struct kvm_msrs)
 #define KVM_SET_CPUID             _IOW(KVMIO,  0x8a, struct kvm_cpuid)
@@ -469,6 +468,26 @@ struct kvm_trace_rec {
 #define KVM_SET_MP_STATE          _IOW(KVMIO,  0x99, struct kvm_mp_state)
 /* Available with KVM_CAP_NMI */
 #define KVM_NMI                   _IO(KVMIO,  0x9a)
+/* Available with KVM_CAP_SET_GUEST_DEBUG */
+#define KVM_SET_GUEST_DEBUG       _IOW(KVMIO,  0x9b, struct kvm_guest_debug)
+
+/*
+ * Deprecated interfaces
+ */
+struct kvm_breakpoint {
+	__u32 enabled;
+	__u32 padding;
+	__u64 address;
+};
+
+struct kvm_debug_guest {
+	__u32 enabled;
+	__u32 pad;
+	struct kvm_breakpoint breakpoints[4];
+	__u32 singlestep;
+};
+
+#define __KVM_DEPRECATED_DEBUG_GUEST _IOW(KVMIO,  0x87, struct kvm_debug_guest)
 
 #define KVM_TRC_INJ_VIRQ         (KVM_TRC_HANDLER + 0x02)
 #define KVM_TRC_REDELIVER_EVT    (KVM_TRC_HANDLER + 0x03)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index bf6f703642fc..e92212f970db 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -73,7 +73,7 @@ struct kvm_vcpu {
 	struct kvm_run *run;
 	int guest_mode;
 	unsigned long requests;
-	struct kvm_guest_debug guest_debug;
+	unsigned long guest_debug;
 	int fpu_active;
 	int guest_fpu_loaded;
 	wait_queue_head_t wq;
@@ -255,8 +255,8 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
 				    struct kvm_mp_state *mp_state);
 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
 				    struct kvm_mp_state *mp_state);
-int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
-				    struct kvm_debug_guest *dbg);
+int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
+					struct kvm_guest_debug *dbg);
 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run);
 
 int kvm_arch_init(void *opaque);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 29a667ce35b0..f83ef9c7e89b 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1755,13 +1755,13 @@ out_free2:
 		r = 0;
 		break;
 	}
-	case KVM_DEBUG_GUEST: {
-		struct kvm_debug_guest dbg;
+	case KVM_SET_GUEST_DEBUG: {
+		struct kvm_guest_debug dbg;
 
 		r = -EFAULT;
 		if (copy_from_user(&dbg, argp, sizeof dbg))
 			goto out;
-		r = kvm_arch_vcpu_ioctl_debug_guest(vcpu, &dbg);
+		r = kvm_arch_vcpu_ioctl_set_guest_debug(vcpu, &dbg);
 		if (r)
 			goto out;
 		r = 0;
-- 
cgit v1.2.3


From e9a999fe1feaddb71bffbacbbd68e0da8ca8b50b Mon Sep 17 00:00:00 2001
From: Jes Sorensen <jes@sgi.com>
Date: Thu, 18 Dec 2008 12:17:51 +0100
Subject: KVM: ia64: stack get/restore patch

Implement KVM_IA64_VCPU_[GS]ET_STACK ioctl calls. This is required
for live migrations.

Patch is based on previous implementation that was part of old
GET/SET_REGS ioctl calls.

Signed-off-by: Jes Sorensen <jes@sgi.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/ia64/include/asm/kvm.h      |  7 +++
 arch/ia64/include/asm/kvm_host.h |  6 ++-
 arch/ia64/kvm/kvm-ia64.c         | 92 ++++++++++++++++++++++++++++++++++++++--
 include/linux/kvm.h              |  3 ++
 4 files changed, 104 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/include/asm/kvm.h b/arch/ia64/include/asm/kvm.h
index be3fdb891214..b5145784233e 100644
--- a/arch/ia64/include/asm/kvm.h
+++ b/arch/ia64/include/asm/kvm.h
@@ -214,6 +214,13 @@ struct kvm_sregs {
 struct kvm_fpu {
 };
 
+#define KVM_IA64_VCPU_STACK_SHIFT	16
+#define KVM_IA64_VCPU_STACK_SIZE	(1UL << KVM_IA64_VCPU_STACK_SHIFT)
+
+struct kvm_ia64_vcpu_stack {
+	unsigned char stack[KVM_IA64_VCPU_STACK_SIZE];
+};
+
 struct kvm_debug_exit_arch {
 };
 
diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h
index 348663661659..7da0c0963226 100644
--- a/arch/ia64/include/asm/kvm_host.h
+++ b/arch/ia64/include/asm/kvm_host.h
@@ -112,7 +112,11 @@
 #define VCPU_STRUCT_SHIFT	16
 #define VCPU_STRUCT_SIZE	(__IA64_UL_CONST(1) << VCPU_STRUCT_SHIFT)
 
-#define KVM_STK_OFFSET		VCPU_STRUCT_SIZE
+/*
+ * This must match KVM_IA64_VCPU_STACK_{SHIFT,SIZE} arch/ia64/include/asm/kvm.h
+ */
+#define KVM_STK_SHIFT		16
+#define KVM_STK_OFFSET		(__IA64_UL_CONST(1)<< KVM_STK_SHIFT)
 
 #define KVM_VM_STRUCT_SHIFT	19
 #define KVM_VM_STRUCT_SIZE	(__IA64_UL_CONST(1) << KVM_VM_STRUCT_SHIFT)
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index de47467a0e61..1477f91617a5 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -1421,6 +1421,23 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 	return 0;
 }
 
+int kvm_arch_vcpu_ioctl_get_stack(struct kvm_vcpu *vcpu,
+				  struct kvm_ia64_vcpu_stack *stack)
+{
+	memcpy(stack, vcpu, sizeof(struct kvm_ia64_vcpu_stack));
+	return 0;
+}
+
+int kvm_arch_vcpu_ioctl_set_stack(struct kvm_vcpu *vcpu,
+				  struct kvm_ia64_vcpu_stack *stack)
+{
+	memcpy(vcpu + 1, &stack->stack[0] + sizeof(struct kvm_vcpu),
+	       sizeof(struct kvm_ia64_vcpu_stack) - sizeof(struct kvm_vcpu));
+
+	vcpu->arch.exit_data = ((struct kvm_vcpu *)stack)->arch.exit_data;
+	return 0;
+}
+
 void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 {
 
@@ -1430,9 +1447,78 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 
 
 long kvm_arch_vcpu_ioctl(struct file *filp,
-		unsigned int ioctl, unsigned long arg)
+			 unsigned int ioctl, unsigned long arg)
 {
-	return -EINVAL;
+	struct kvm_vcpu *vcpu = filp->private_data;
+	void __user *argp = (void __user *)arg;
+	struct kvm_ia64_vcpu_stack *stack = NULL;
+	long r;
+
+	switch (ioctl) {
+	case KVM_IA64_VCPU_GET_STACK: {
+		struct kvm_ia64_vcpu_stack __user *user_stack;
+	        void __user *first_p = argp;
+
+		r = -EFAULT;
+		if (copy_from_user(&user_stack, first_p, sizeof(void *)))
+			goto out;
+
+		if (!access_ok(VERIFY_WRITE, user_stack,
+			       sizeof(struct kvm_ia64_vcpu_stack))) {
+			printk(KERN_INFO "KVM_IA64_VCPU_GET_STACK: "
+			       "Illegal user destination address for stack\n");
+			goto out;
+		}
+		stack = kzalloc(sizeof(struct kvm_ia64_vcpu_stack), GFP_KERNEL);
+		if (!stack) {
+			r = -ENOMEM;
+			goto out;
+		}
+
+		r = kvm_arch_vcpu_ioctl_get_stack(vcpu, stack);
+		if (r)
+			goto out;
+
+		if (copy_to_user(user_stack, stack,
+				 sizeof(struct kvm_ia64_vcpu_stack)))
+			goto out;
+
+		break;
+	}
+	case KVM_IA64_VCPU_SET_STACK: {
+		struct kvm_ia64_vcpu_stack __user *user_stack;
+	        void __user *first_p = argp;
+
+		r = -EFAULT;
+		if (copy_from_user(&user_stack, first_p, sizeof(void *)))
+			goto out;
+
+		if (!access_ok(VERIFY_READ, user_stack,
+			    sizeof(struct kvm_ia64_vcpu_stack))) {
+			printk(KERN_INFO "KVM_IA64_VCPU_SET_STACK: "
+			       "Illegal user address for stack\n");
+			goto out;
+		}
+		stack = kmalloc(sizeof(struct kvm_ia64_vcpu_stack), GFP_KERNEL);
+		if (!stack) {
+			r = -ENOMEM;
+			goto out;
+		}
+		if (copy_from_user(stack, user_stack,
+				   sizeof(struct kvm_ia64_vcpu_stack)))
+			goto out;
+
+		r = kvm_arch_vcpu_ioctl_set_stack(vcpu, stack);
+		break;
+	}
+
+	default:
+		r = -EINVAL;
+	}
+
+out:
+	kfree(stack);
+	return r;
 }
 
 int kvm_arch_set_memory_region(struct kvm *kvm,
@@ -1472,7 +1558,7 @@ void kvm_arch_flush_shadow(struct kvm *kvm)
 }
 
 long kvm_arch_dev_ioctl(struct file *filp,
-		unsigned int ioctl, unsigned long arg)
+			unsigned int ioctl, unsigned long arg)
 {
 	return -EINVAL;
 }
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 429a2ce202f9..28582fcd3f79 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -489,6 +489,9 @@ struct kvm_debug_guest {
 
 #define __KVM_DEPRECATED_DEBUG_GUEST _IOW(KVMIO,  0x87, struct kvm_debug_guest)
 
+#define KVM_IA64_VCPU_GET_STACK   _IOR(KVMIO,  0x9a, void *)
+#define KVM_IA64_VCPU_SET_STACK   _IOW(KVMIO,  0x9b, void *)
+
 #define KVM_TRC_INJ_VIRQ         (KVM_TRC_HANDLER + 0x02)
 #define KVM_TRC_REDELIVER_EVT    (KVM_TRC_HANDLER + 0x03)
 #define KVM_TRC_PEND_INTR        (KVM_TRC_HANDLER + 0x04)
-- 
cgit v1.2.3


From 971cc3dcbc0e020b82f568e61a47b72be03307dd Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@siemens.com>
Date: Fri, 19 Dec 2008 18:13:54 +0100
Subject: KVM: Advertise guest debug capability per-arch

Limit KVM_CAP_SET_GUEST_DEBUG only to those archs (currently x86) that
support it. This simplifies user space stub implementations.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 include/linux/kvm.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 28582fcd3f79..11e3e6197c83 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -393,7 +393,9 @@ struct kvm_trace_rec {
 #ifdef __KVM_HAVE_USER_NMI
 #define KVM_CAP_USER_NMI 22
 #endif
+#if defined(CONFIG_X86)
 #define KVM_CAP_SET_GUEST_DEBUG 23
+#endif
 
 /*
  * ioctls for VM fds
-- 
cgit v1.2.3


From 0f346074403bc109f9569f14b45cb09e83729032 Mon Sep 17 00:00:00 2001
From: Izik Eidus <ieidus@redhat.com>
Date: Mon, 29 Dec 2008 01:42:20 +0200
Subject: KVM: remove the vmap usage

vmap() on guest pages hides those pages from the Linux mm for an extended
(userspace determined) amount of time.  Get rid of it.

Signed-off-by: Izik Eidus <ieidus@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/x86.c        | 62 ++++++++++-------------------------------------
 include/linux/kvm_types.h |  3 +--
 2 files changed, 14 insertions(+), 51 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 67f91764e99b..2a4f3a697343 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2371,40 +2371,19 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
 }
 EXPORT_SYMBOL_GPL(emulate_instruction);
 
-static void free_pio_guest_pages(struct kvm_vcpu *vcpu)
-{
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(vcpu->arch.pio.guest_pages); ++i)
-		if (vcpu->arch.pio.guest_pages[i]) {
-			kvm_release_page_dirty(vcpu->arch.pio.guest_pages[i]);
-			vcpu->arch.pio.guest_pages[i] = NULL;
-		}
-}
-
 static int pio_copy_data(struct kvm_vcpu *vcpu)
 {
 	void *p = vcpu->arch.pio_data;
-	void *q;
+	gva_t q = vcpu->arch.pio.guest_gva;
 	unsigned bytes;
-	int nr_pages = vcpu->arch.pio.guest_pages[1] ? 2 : 1;
+	int ret;
 
-	q = vmap(vcpu->arch.pio.guest_pages, nr_pages, VM_READ|VM_WRITE,
-		 PAGE_KERNEL);
-	if (!q) {
-		free_pio_guest_pages(vcpu);
-		return -ENOMEM;
-	}
-	q += vcpu->arch.pio.guest_page_offset;
 	bytes = vcpu->arch.pio.size * vcpu->arch.pio.cur_count;
 	if (vcpu->arch.pio.in)
-		memcpy(q, p, bytes);
+		ret = kvm_write_guest_virt(q, p, bytes, vcpu);
 	else
-		memcpy(p, q, bytes);
-	q -= vcpu->arch.pio.guest_page_offset;
-	vunmap(q);
-	free_pio_guest_pages(vcpu);
-	return 0;
+		ret = kvm_read_guest_virt(q, p, bytes, vcpu);
+	return ret;
 }
 
 int complete_pio(struct kvm_vcpu *vcpu)
@@ -2515,7 +2494,6 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
 	vcpu->arch.pio.in = in;
 	vcpu->arch.pio.string = 0;
 	vcpu->arch.pio.down = 0;
-	vcpu->arch.pio.guest_page_offset = 0;
 	vcpu->arch.pio.rep = 0;
 
 	if (vcpu->run->io.direction == KVM_EXIT_IO_IN)
@@ -2543,9 +2521,7 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
 		  gva_t address, int rep, unsigned port)
 {
 	unsigned now, in_page;
-	int i, ret = 0;
-	int nr_pages = 1;
-	struct page *page;
+	int ret = 0;
 	struct kvm_io_device *pio_dev;
 
 	vcpu->run->exit_reason = KVM_EXIT_IO;
@@ -2557,7 +2533,6 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
 	vcpu->arch.pio.in = in;
 	vcpu->arch.pio.string = 1;
 	vcpu->arch.pio.down = down;
-	vcpu->arch.pio.guest_page_offset = offset_in_page(address);
 	vcpu->arch.pio.rep = rep;
 
 	if (vcpu->run->io.direction == KVM_EXIT_IO_IN)
@@ -2577,15 +2552,8 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
 	else
 		in_page = offset_in_page(address) + size;
 	now = min(count, (unsigned long)in_page / size);
-	if (!now) {
-		/*
-		 * String I/O straddles page boundary.  Pin two guest pages
-		 * so that we satisfy atomicity constraints.  Do just one
-		 * transaction to avoid complexity.
-		 */
-		nr_pages = 2;
+	if (!now)
 		now = 1;
-	}
 	if (down) {
 		/*
 		 * String I/O in reverse.  Yuck.  Kill the guest, fix later.
@@ -2600,15 +2568,7 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
 	if (vcpu->arch.pio.cur_count == vcpu->arch.pio.count)
 		kvm_x86_ops->skip_emulated_instruction(vcpu);
 
-	for (i = 0; i < nr_pages; ++i) {
-		page = gva_to_page(vcpu, address + i * PAGE_SIZE);
-		vcpu->arch.pio.guest_pages[i] = page;
-		if (!page) {
-			kvm_inject_gp(vcpu, 0);
-			free_pio_guest_pages(vcpu);
-			return 1;
-		}
-	}
+	vcpu->arch.pio.guest_gva = address;
 
 	pio_dev = vcpu_find_pio_dev(vcpu, port,
 				    vcpu->arch.pio.cur_count,
@@ -2616,7 +2576,11 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
 	if (!vcpu->arch.pio.in) {
 		/* string PIO write */
 		ret = pio_copy_data(vcpu);
-		if (ret >= 0 && pio_dev) {
+		if (ret == X86EMUL_PROPAGATE_FAULT) {
+			kvm_inject_gp(vcpu, 0);
+			return 1;
+		}
+		if (ret == 0 && pio_dev) {
 			pio_string_write(pio_dev, vcpu);
 			complete_pio(vcpu);
 			if (vcpu->arch.pio.count == 0)
diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
index 9b6f395c9625..5f4a18cae26b 100644
--- a/include/linux/kvm_types.h
+++ b/include/linux/kvm_types.h
@@ -43,8 +43,7 @@ typedef hfn_t pfn_t;
 struct kvm_pio_request {
 	unsigned long count;
 	int cur_count;
-	struct page *guest_pages[2];
-	unsigned guest_page_offset;
+	gva_t guest_gva;
 	int in;
 	int port;
 	int size;
-- 
cgit v1.2.3


From 52d939a0bf44081bc9f69b4fbdc9e7f416df27c7 Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Tue, 30 Dec 2008 15:55:06 -0200
Subject: KVM: PIT: provide an option to disable interrupt reinjection

Certain clocks (such as TSC) in older 2.6 guests overaccount for lost
ticks, causing severe time drift. Interrupt reinjection magnifies the
problem.

Provide an option to disable it.

[avi: allow room for expansion in case we want to disable reinjection
      of other timers]

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/kvm.h |  5 +++++
 arch/x86/kvm/i8254.c       |  4 ++++
 arch/x86/kvm/i8254.h       |  1 +
 arch/x86/kvm/x86.c         | 21 +++++++++++++++++++++
 include/linux/kvm.h        |  4 ++++
 5 files changed, 35 insertions(+)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h
index 32eb96c7ca27..54bcf2281526 100644
--- a/arch/x86/include/asm/kvm.h
+++ b/arch/x86/include/asm/kvm.h
@@ -233,4 +233,9 @@ struct kvm_guest_debug_arch {
 struct kvm_pit_state {
 	struct kvm_pit_channel_state channels[3];
 };
+
+struct kvm_reinject_control {
+	__u8 pit_reinject;
+	__u8 reserved[31];
+};
 #endif /* _ASM_X86_KVM_H */
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 72bd275a9b5c..528daadeba49 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -201,6 +201,9 @@ static int __pit_timer_fn(struct kvm_kpit_state *ps)
 	if (!atomic_inc_and_test(&pt->pending))
 		set_bit(KVM_REQ_PENDING_TIMER, &vcpu0->requests);
 
+	if (!pt->reinject)
+		atomic_set(&pt->pending, 1);
+
 	if (vcpu0 && waitqueue_active(&vcpu0->wq))
 		wake_up_interruptible(&vcpu0->wq);
 
@@ -580,6 +583,7 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm)
 	pit_state->irq_ack_notifier.gsi = 0;
 	pit_state->irq_ack_notifier.irq_acked = kvm_pit_ack_irq;
 	kvm_register_irq_ack_notifier(kvm, &pit_state->irq_ack_notifier);
+	pit_state->pit_timer.reinject = true;
 	mutex_unlock(&pit->pit_state.lock);
 
 	kvm_pit_reset(pit);
diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h
index 4178022b97aa..76959c4b500e 100644
--- a/arch/x86/kvm/i8254.h
+++ b/arch/x86/kvm/i8254.h
@@ -9,6 +9,7 @@ struct kvm_kpit_timer {
 	s64 period; /* unit: ns */
 	s64 scheduled;
 	atomic_t pending;
+	bool reinject;
 };
 
 struct kvm_kpit_channel_state {
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c3fbe8c55c13..a1f14611f4b9 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -993,6 +993,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_NOP_IO_DELAY:
 	case KVM_CAP_MP_STATE:
 	case KVM_CAP_SYNC_MMU:
+	case KVM_CAP_REINJECT_CONTROL:
 		r = 1;
 		break;
 	case KVM_CAP_COALESCED_MMIO:
@@ -1728,6 +1729,15 @@ static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps)
 	return r;
 }
 
+static int kvm_vm_ioctl_reinject(struct kvm *kvm,
+				 struct kvm_reinject_control *control)
+{
+	if (!kvm->arch.vpit)
+		return -ENXIO;
+	kvm->arch.vpit->pit_state.pit_timer.reinject = control->pit_reinject;
+	return 0;
+}
+
 /*
  * Get (and clear) the dirty memory log for a memory slot.
  */
@@ -1925,6 +1935,17 @@ long kvm_arch_vm_ioctl(struct file *filp,
 		r = 0;
 		break;
 	}
+	case KVM_REINJECT_CONTROL: {
+		struct kvm_reinject_control control;
+		r =  -EFAULT;
+		if (copy_from_user(&control, argp, sizeof(control)))
+			goto out;
+		r = kvm_vm_ioctl_reinject(kvm, &control);
+		if (r)
+			goto out;
+		r = 0;
+		break;
+	}
 	default:
 		;
 	}
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 11e3e6197c83..ae7a12c77427 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -396,6 +396,9 @@ struct kvm_trace_rec {
 #if defined(CONFIG_X86)
 #define KVM_CAP_SET_GUEST_DEBUG 23
 #endif
+#if defined(CONFIG_X86)
+#define KVM_CAP_REINJECT_CONTROL 24
+#endif
 
 /*
  * ioctls for VM fds
@@ -429,6 +432,7 @@ struct kvm_trace_rec {
 				   struct kvm_assigned_pci_dev)
 #define KVM_ASSIGN_IRQ _IOR(KVMIO, 0x70, \
 			    struct kvm_assigned_irq)
+#define KVM_REINJECT_CONTROL      _IO(KVMIO, 0x71)
 
 /*
  * ioctls for vcpu fds
-- 
cgit v1.2.3


From 1c08364c3565242f1e1bd585bc2ce458967941af Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Sun, 4 Jan 2009 12:39:07 +0200
Subject: KVM: Move struct kvm_pio_request into x86 kvm_host.h

This is an x86 specific stucture and has no business living in common code.

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 12 ++++++++++++
 include/linux/kvm_types.h       | 12 ------------
 2 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index b74576aec19a..863ea73431ad 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -227,6 +227,18 @@ struct kvm_pv_mmu_op_buffer {
 	char buf[512] __aligned(sizeof(long));
 };
 
+struct kvm_pio_request {
+	unsigned long count;
+	int cur_count;
+	gva_t guest_gva;
+	int in;
+	int port;
+	int size;
+	int string;
+	int down;
+	int rep;
+};
+
 /*
  * x86 supports 3 paging modes (4-level 64-bit, 3-level 64-bit, and 2-level
  * 32-bit).  The kvm_mmu structure abstracts the details of the current mmu
diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
index 5f4a18cae26b..2b8318c83e53 100644
--- a/include/linux/kvm_types.h
+++ b/include/linux/kvm_types.h
@@ -40,16 +40,4 @@ typedef unsigned long  hfn_t;
 
 typedef hfn_t pfn_t;
 
-struct kvm_pio_request {
-	unsigned long count;
-	int cur_count;
-	gva_t guest_gva;
-	int in;
-	int port;
-	int size;
-	int string;
-	int down;
-	int rep;
-};
-
 #endif /* __KVM_TYPES_H__ */
-- 
cgit v1.2.3


From 67346440e83d2a2f2e9801f370b6240317c7d9bd Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng@linux.intel.com>
Date: Tue, 6 Jan 2009 10:03:01 +0800
Subject: KVM: Remove duplicated prototype of kvm_arch_destroy_vm

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 include/linux/kvm_host.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index e92212f970db..3cf0ede3fd73 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -237,7 +237,6 @@ int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
 				   int user_alloc);
 long kvm_arch_vm_ioctl(struct file *filp,
 		       unsigned int ioctl, unsigned long arg);
-void kvm_arch_destroy_vm(struct kvm *kvm);
 
 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu);
 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu);
-- 
cgit v1.2.3


From 17071fe74fe0fbfdb03cd9b82f2490447cf1f986 Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng@linux.intel.com>
Date: Tue, 6 Jan 2009 16:25:11 +0800
Subject: KVM: Add support to disable MSI for assigned device

MSI is always enabled by default for msi2intx=1. But if msi2intx=0, we
have to disable MSI if guest require to do so.

The patch also discard unnecessary msi2intx judgment if guest want to update
MSI state.

Notice KVM_DEV_IRQ_ASSIGN_MSI_ACTION is a mask which should cover all MSI
related operations, though we only got one for now.

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 include/linux/kvm.h |  1 +
 virt/kvm/kvm_main.c | 18 ++++++++++++++++--
 2 files changed, 17 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index ae7a12c77427..73f348066866 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -550,6 +550,7 @@ struct kvm_assigned_irq {
 
 #define KVM_DEV_ASSIGN_ENABLE_IOMMU	(1 << 0)
 
+#define KVM_DEV_IRQ_ASSIGN_MSI_ACTION	KVM_DEV_IRQ_ASSIGN_ENABLE_MSI
 #define KVM_DEV_IRQ_ASSIGN_ENABLE_MSI	(1 << 0)
 
 #endif
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index f83ef9c7e89b..04401e17c758 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -343,6 +343,14 @@ static int assigned_device_update_msi(struct kvm *kvm,
 		adev->irq_requested_type &= ~KVM_ASSIGNED_DEV_GUEST_MSI;
 		adev->guest_irq = airq->guest_irq;
 		adev->ack_notifier.gsi = airq->guest_irq;
+	} else {
+		/*
+		 * Guest require to disable device MSI, we disable MSI and
+		 * re-enable INTx by default again. Notice it's only for
+		 * non-msi2intx.
+		 */
+		assigned_device_update_intx(kvm, adev, airq);
+		return 0;
 	}
 
 	if (adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI)
@@ -379,6 +387,7 @@ static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
 {
 	int r = 0;
 	struct kvm_assigned_dev_kernel *match;
+	u32 current_flags = 0, changed_flags;
 
 	mutex_lock(&kvm->lock);
 
@@ -416,8 +425,13 @@ static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
 		}
 	}
 
-	if ((!msi2intx &&
-	     (assigned_irq->flags & KVM_DEV_IRQ_ASSIGN_ENABLE_MSI)) ||
+	if ((match->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI) &&
+		 (match->irq_requested_type & KVM_ASSIGNED_DEV_GUEST_MSI))
+		current_flags |= KVM_DEV_IRQ_ASSIGN_ENABLE_MSI;
+
+	changed_flags = assigned_irq->flags ^ current_flags;
+
+	if ((changed_flags & KVM_DEV_IRQ_ASSIGN_MSI_ACTION) ||
 	    (msi2intx && match->dev->msi_enabled)) {
 #ifdef CONFIG_X86
 		r = assigned_device_update_msi(kvm, match, assigned_irq);
-- 
cgit v1.2.3


From 75858a84a6207f5e60196f6bbd18fde4250e5759 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Sun, 4 Jan 2009 17:10:50 +0200
Subject: KVM: Interrupt mask notifiers for ioapic

Allow clients to request notifications when the guest masks or unmasks a
particular irq line.  This complements irq ack notifications, as the guest
will not ack an irq line that is masked.

Currently implemented for the ioapic only.

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 include/linux/kvm_host.h | 17 +++++++++++++++++
 virt/kvm/ioapic.c        |  6 ++++++
 virt/kvm/irq_comm.c      | 24 ++++++++++++++++++++++++
 virt/kvm/kvm_main.c      |  3 +++
 4 files changed, 50 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 3cf0ede3fd73..99963f36a6db 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -127,6 +127,10 @@ struct kvm {
 	struct kvm_coalesced_mmio_ring *coalesced_mmio_ring;
 #endif
 
+#ifdef CONFIG_HAVE_KVM_IRQCHIP
+	struct hlist_head mask_notifier_list;
+#endif
+
 #ifdef KVM_ARCH_WANT_MMU_NOTIFIER
 	struct mmu_notifier mmu_notifier;
 	unsigned long mmu_notifier_seq;
@@ -320,6 +324,19 @@ struct kvm_assigned_dev_kernel {
 	struct pci_dev *dev;
 	struct kvm *kvm;
 };
+
+struct kvm_irq_mask_notifier {
+	void (*func)(struct kvm_irq_mask_notifier *kimn, bool masked);
+	int irq;
+	struct hlist_node link;
+};
+
+void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
+				    struct kvm_irq_mask_notifier *kimn);
+void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
+				      struct kvm_irq_mask_notifier *kimn);
+void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask);
+
 void kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level);
 void kvm_notify_acked_irq(struct kvm *kvm, unsigned gsi);
 void kvm_register_irq_ack_notifier(struct kvm *kvm,
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index 23b81cf242af..e85a2bcd2db1 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -101,6 +101,7 @@ static void ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx)
 static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
 {
 	unsigned index;
+	bool mask_before, mask_after;
 
 	switch (ioapic->ioregsel) {
 	case IOAPIC_REG_VERSION:
@@ -120,6 +121,7 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
 		ioapic_debug("change redir index %x val %x\n", index, val);
 		if (index >= IOAPIC_NUM_PINS)
 			return;
+		mask_before = ioapic->redirtbl[index].fields.mask;
 		if (ioapic->ioregsel & 1) {
 			ioapic->redirtbl[index].bits &= 0xffffffff;
 			ioapic->redirtbl[index].bits |= (u64) val << 32;
@@ -128,6 +130,9 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
 			ioapic->redirtbl[index].bits |= (u32) val;
 			ioapic->redirtbl[index].fields.remote_irr = 0;
 		}
+		mask_after = ioapic->redirtbl[index].fields.mask;
+		if (mask_before != mask_after)
+			kvm_fire_mask_notifiers(ioapic->kvm, index, mask_after);
 		if (ioapic->irr & (1 << index))
 			ioapic_service(ioapic, index);
 		break;
@@ -426,3 +431,4 @@ int kvm_ioapic_init(struct kvm *kvm)
 	kvm_io_bus_register_dev(&kvm->mmio_bus, &ioapic->dev);
 	return 0;
 }
+
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index aa5d1e5c497e..5162a411e4d2 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -99,3 +99,27 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id)
 		clear_bit(irq_source_id, &kvm->arch.irq_states[i]);
 	clear_bit(irq_source_id, &kvm->arch.irq_sources_bitmap);
 }
+
+void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
+				    struct kvm_irq_mask_notifier *kimn)
+{
+	kimn->irq = irq;
+	hlist_add_head(&kimn->link, &kvm->mask_notifier_list);
+}
+
+void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
+				      struct kvm_irq_mask_notifier *kimn)
+{
+	hlist_del(&kimn->link);
+}
+
+void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
+{
+	struct kvm_irq_mask_notifier *kimn;
+	struct hlist_node *n;
+
+	hlist_for_each_entry(kimn, n, &kvm->mask_notifier_list, link)
+		if (kimn->irq == irq)
+			kimn->func(kimn, mask);
+}
+
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 04401e17c758..786a3ae373b0 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -842,6 +842,9 @@ static struct kvm *kvm_create_vm(void)
 
 	if (IS_ERR(kvm))
 		goto out;
+#ifdef CONFIG_HAVE_KVM_IRQCHIP
+	INIT_HLIST_HEAD(&kvm->mask_notifier_list);
+#endif
 
 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
 	page = alloc_page(GFP_KERNEL | __GFP_ZERO);
-- 
cgit v1.2.3


From 399ec807ddc38ecccf8c06dbde04531cbdc63e11 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Wed, 19 Nov 2008 13:58:46 +0200
Subject: KVM: Userspace controlled irq routing

Currently KVM has a static routing from GSI numbers to interrupts (namely,
0-15 are mapped 1:1 to both PIC and IOAPIC, and 16:23 are mapped 1:1 to
the IOAPIC).  This is insufficient for several reasons:

- HPET requires non 1:1 mapping for the timer interrupt
- MSIs need a new method to assign interrupt numbers and dispatch them
- ACPI APIC mode needs to be able to reassign the PCI LINK interrupts to the
  ioapics

This patch implements an interrupt routing table (as a linked list, but this
can be easily changed) and a userspace interface to replace the table.  The
routing table is initialized according to the current hardwired mapping.

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/ia64/kvm/kvm-ia64.c |   5 ++
 arch/x86/kvm/x86.c       |   6 ++
 include/linux/kvm.h      |  33 ++++++++++
 include/linux/kvm_host.h |  31 +++++++++
 virt/kvm/irq_comm.c      | 168 +++++++++++++++++++++++++++++++++++++++++++++--
 virt/kvm/kvm_main.c      |  36 ++++++++++
 6 files changed, 275 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 1477f91617a5..dbf527a57341 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -919,6 +919,11 @@ long kvm_arch_vm_ioctl(struct file *filp,
 		r = kvm_ioapic_init(kvm);
 		if (r)
 			goto out;
+		r = kvm_setup_default_irq_routing(kvm);
+		if (r) {
+			kfree(kvm->arch.vioapic);
+			goto out;
+		}
 		break;
 	case KVM_IRQ_LINE: {
 		struct kvm_irq_level irq_event;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 141a0166e51c..32e3a7ec6ad2 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1835,6 +1835,12 @@ long kvm_arch_vm_ioctl(struct file *filp,
 			}
 		} else
 			goto out;
+		r = kvm_setup_default_irq_routing(kvm);
+		if (r) {
+			kfree(kvm->arch.vpic);
+			kfree(kvm->arch.vioapic);
+			goto out;
+		}
 		break;
 	case KVM_CREATE_PIT:
 		mutex_lock(&kvm->lock);
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 73f348066866..7a5d73a8d4fa 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -398,6 +398,38 @@ struct kvm_trace_rec {
 #endif
 #if defined(CONFIG_X86)
 #define KVM_CAP_REINJECT_CONTROL 24
+#endif
+#if defined(CONFIG_X86)||defined(CONFIG_IA64)
+#define KVM_CAP_IRQ_ROUTING 25
+#endif
+
+#ifdef KVM_CAP_IRQ_ROUTING
+
+struct kvm_irq_routing_irqchip {
+	__u32 irqchip;
+	__u32 pin;
+};
+
+/* gsi routing entry types */
+#define KVM_IRQ_ROUTING_IRQCHIP 1
+
+struct kvm_irq_routing_entry {
+	__u32 gsi;
+	__u32 type;
+	__u32 flags;
+	__u32 pad;
+	union {
+		struct kvm_irq_routing_irqchip irqchip;
+		__u32 pad[8];
+	} u;
+};
+
+struct kvm_irq_routing {
+	__u32 nr;
+	__u32 flags;
+	struct kvm_irq_routing_entry entries[0];
+};
+
 #endif
 
 /*
@@ -430,6 +462,7 @@ struct kvm_trace_rec {
 			_IOW(KVMIO,  0x68, struct kvm_coalesced_mmio_zone)
 #define KVM_ASSIGN_PCI_DEVICE _IOR(KVMIO, 0x69, \
 				   struct kvm_assigned_pci_dev)
+#define KVM_SET_GSI_ROUTING       _IOW(KVMIO, 0x6a, struct kvm_irq_routing)
 #define KVM_ASSIGN_IRQ _IOR(KVMIO, 0x70, \
 			    struct kvm_assigned_irq)
 #define KVM_REINJECT_CONTROL      _IO(KVMIO, 0x71)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 99963f36a6db..ce285e01bd57 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -107,6 +107,19 @@ struct kvm_memory_slot {
 	int user_alloc;
 };
 
+struct kvm_kernel_irq_routing_entry {
+	u32 gsi;
+	void (*set)(struct kvm_kernel_irq_routing_entry *e,
+		    struct kvm *kvm, int level);
+	union {
+		struct {
+			unsigned irqchip;
+			unsigned pin;
+		} irqchip;
+	};
+	struct list_head link;
+};
+
 struct kvm {
 	struct mutex lock; /* protects the vcpus array and APIC accesses */
 	spinlock_t mmu_lock;
@@ -128,6 +141,7 @@ struct kvm {
 #endif
 
 #ifdef CONFIG_HAVE_KVM_IRQCHIP
+	struct list_head irq_routing; /* of kvm_kernel_irq_routing_entry */
 	struct hlist_head mask_notifier_list;
 #endif
 
@@ -480,4 +494,21 @@ static inline int mmu_notifier_retry(struct kvm_vcpu *vcpu, unsigned long mmu_se
 }
 #endif
 
+#ifdef CONFIG_HAVE_KVM_IRQCHIP
+
+#define KVM_MAX_IRQ_ROUTES 1024
+
+int kvm_setup_default_irq_routing(struct kvm *kvm);
+int kvm_set_irq_routing(struct kvm *kvm,
+			const struct kvm_irq_routing_entry *entries,
+			unsigned nr,
+			unsigned flags);
+void kvm_free_irq_routing(struct kvm *kvm);
+
+#else
+
+static inline void kvm_free_irq_routing(struct kvm *kvm) {}
+
+#endif
+
 #endif
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 5162a411e4d2..a797fa5e6420 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -24,9 +24,24 @@
 
 #include "ioapic.h"
 
+static void kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e,
+			    struct kvm *kvm, int level)
+{
+#ifdef CONFIG_X86
+	kvm_pic_set_irq(pic_irqchip(kvm), e->irqchip.pin, level);
+#endif
+}
+
+static void kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e,
+			       struct kvm *kvm, int level)
+{
+	kvm_ioapic_set_irq(kvm->arch.vioapic, e->irqchip.pin, level);
+}
+
 /* This should be called with the kvm->lock mutex held */
 void kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
 {
+	struct kvm_kernel_irq_routing_entry *e;
 	unsigned long *irq_state = (unsigned long *)&kvm->arch.irq_states[irq];
 
 	/* Logical OR for level trig interrupt */
@@ -39,10 +54,9 @@ void kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
 	 * IOAPIC.  So set the bit in both. The guest will ignore
 	 * writes to the unused one.
 	 */
-	kvm_ioapic_set_irq(kvm->arch.vioapic, irq, !!(*irq_state));
-#ifdef CONFIG_X86
-	kvm_pic_set_irq(pic_irqchip(kvm), irq, !!(*irq_state));
-#endif
+	list_for_each_entry(e, &kvm->irq_routing, link)
+		if (e->gsi == irq)
+			e->set(e, kvm, !!(*irq_state));
 }
 
 void kvm_notify_acked_irq(struct kvm *kvm, unsigned gsi)
@@ -123,3 +137,149 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
 			kimn->func(kimn, mask);
 }
 
+static void __kvm_free_irq_routing(struct list_head *irq_routing)
+{
+	struct kvm_kernel_irq_routing_entry *e, *n;
+
+	list_for_each_entry_safe(e, n, irq_routing, link)
+		kfree(e);
+}
+
+void kvm_free_irq_routing(struct kvm *kvm)
+{
+	__kvm_free_irq_routing(&kvm->irq_routing);
+}
+
+int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e,
+			const struct kvm_irq_routing_entry *ue)
+{
+	int r = -EINVAL;
+	int delta;
+
+	e->gsi = ue->gsi;
+	switch (ue->type) {
+	case KVM_IRQ_ROUTING_IRQCHIP:
+		delta = 0;
+		switch (ue->u.irqchip.irqchip) {
+		case KVM_IRQCHIP_PIC_MASTER:
+			e->set = kvm_set_pic_irq;
+			break;
+		case KVM_IRQCHIP_PIC_SLAVE:
+				e->set = kvm_set_pic_irq;
+			delta = 8;
+			break;
+		case KVM_IRQCHIP_IOAPIC:
+				e->set = kvm_set_ioapic_irq;
+			break;
+		default:
+			goto out;
+		}
+		e->irqchip.irqchip = ue->u.irqchip.irqchip;
+		e->irqchip.pin = ue->u.irqchip.pin + delta;
+		break;
+	default:
+		goto out;
+	}
+	r = 0;
+out:
+	return r;
+}
+
+
+int kvm_set_irq_routing(struct kvm *kvm,
+			const struct kvm_irq_routing_entry *ue,
+			unsigned nr,
+			unsigned flags)
+{
+	struct list_head irq_list = LIST_HEAD_INIT(irq_list);
+	struct list_head tmp = LIST_HEAD_INIT(tmp);
+	struct kvm_kernel_irq_routing_entry *e = NULL;
+	unsigned i;
+	int r;
+
+	for (i = 0; i < nr; ++i) {
+		r = -EINVAL;
+		if (ue->gsi >= KVM_MAX_IRQ_ROUTES)
+			goto out;
+		if (ue->flags)
+			goto out;
+		r = -ENOMEM;
+		e = kzalloc(sizeof(*e), GFP_KERNEL);
+		if (!e)
+			goto out;
+		r = setup_routing_entry(e, ue);
+		if (r)
+			goto out;
+		++ue;
+		list_add(&e->link, &irq_list);
+		e = NULL;
+	}
+
+	mutex_lock(&kvm->lock);
+	list_splice(&kvm->irq_routing, &tmp);
+	INIT_LIST_HEAD(&kvm->irq_routing);
+	list_splice(&irq_list, &kvm->irq_routing);
+	INIT_LIST_HEAD(&irq_list);
+	list_splice(&tmp, &irq_list);
+	mutex_unlock(&kvm->lock);
+
+	r = 0;
+
+out:
+	kfree(e);
+	__kvm_free_irq_routing(&irq_list);
+	return r;
+}
+
+#define IOAPIC_ROUTING_ENTRY(irq) \
+	{ .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP,	\
+	  .u.irqchip.irqchip = KVM_IRQCHIP_IOAPIC, .u.irqchip.pin = (irq) }
+#define ROUTING_ENTRY1(irq) IOAPIC_ROUTING_ENTRY(irq)
+
+#ifdef CONFIG_X86
+#define SELECT_PIC(irq) \
+	((irq) < 8 ? KVM_IRQCHIP_PIC_MASTER : KVM_IRQCHIP_PIC_SLAVE)
+#  define PIC_ROUTING_ENTRY(irq) \
+	{ .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP,	\
+	  .u.irqchip.irqchip = SELECT_PIC(irq), .u.irqchip.pin = (irq) % 8 }
+#  define ROUTING_ENTRY2(irq) \
+	IOAPIC_ROUTING_ENTRY(irq), PIC_ROUTING_ENTRY(irq)
+#else
+#  define ROUTING_ENTRY2(irq) \
+	IOAPIC_ROUTING_ENTRY(irq)
+#endif
+
+static const struct kvm_irq_routing_entry default_routing[] = {
+	ROUTING_ENTRY2(0), ROUTING_ENTRY2(1),
+	ROUTING_ENTRY2(2), ROUTING_ENTRY2(3),
+	ROUTING_ENTRY2(4), ROUTING_ENTRY2(5),
+	ROUTING_ENTRY2(6), ROUTING_ENTRY2(7),
+	ROUTING_ENTRY2(8), ROUTING_ENTRY2(9),
+	ROUTING_ENTRY2(10), ROUTING_ENTRY2(11),
+	ROUTING_ENTRY2(12), ROUTING_ENTRY2(13),
+	ROUTING_ENTRY2(14), ROUTING_ENTRY2(15),
+	ROUTING_ENTRY1(16), ROUTING_ENTRY1(17),
+	ROUTING_ENTRY1(18), ROUTING_ENTRY1(19),
+	ROUTING_ENTRY1(20), ROUTING_ENTRY1(21),
+	ROUTING_ENTRY1(22), ROUTING_ENTRY1(23),
+#ifdef CONFIG_IA64
+	ROUTING_ENTRY1(24), ROUTING_ENTRY1(25),
+	ROUTING_ENTRY1(26), ROUTING_ENTRY1(27),
+	ROUTING_ENTRY1(28), ROUTING_ENTRY1(29),
+	ROUTING_ENTRY1(30), ROUTING_ENTRY1(31),
+	ROUTING_ENTRY1(32), ROUTING_ENTRY1(33),
+	ROUTING_ENTRY1(34), ROUTING_ENTRY1(35),
+	ROUTING_ENTRY1(36), ROUTING_ENTRY1(37),
+	ROUTING_ENTRY1(38), ROUTING_ENTRY1(39),
+	ROUTING_ENTRY1(40), ROUTING_ENTRY1(41),
+	ROUTING_ENTRY1(42), ROUTING_ENTRY1(43),
+	ROUTING_ENTRY1(44), ROUTING_ENTRY1(45),
+	ROUTING_ENTRY1(46), ROUTING_ENTRY1(47),
+#endif
+};
+
+int kvm_setup_default_irq_routing(struct kvm *kvm)
+{
+	return kvm_set_irq_routing(kvm, default_routing,
+				   ARRAY_SIZE(default_routing), 0);
+}
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 786a3ae373b0..c65484b471c6 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -843,6 +843,7 @@ static struct kvm *kvm_create_vm(void)
 	if (IS_ERR(kvm))
 		goto out;
 #ifdef CONFIG_HAVE_KVM_IRQCHIP
+	INIT_LIST_HEAD(&kvm->irq_routing);
 	INIT_HLIST_HEAD(&kvm->mask_notifier_list);
 #endif
 
@@ -926,6 +927,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
 	spin_lock(&kvm_lock);
 	list_del(&kvm->vm_list);
 	spin_unlock(&kvm_lock);
+	kvm_free_irq_routing(kvm);
 	kvm_io_bus_destroy(&kvm->pio_bus);
 	kvm_io_bus_destroy(&kvm->mmio_bus);
 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
@@ -1945,6 +1947,36 @@ static long kvm_vm_ioctl(struct file *filp,
 			goto out;
 		break;
 	}
+#endif
+#ifdef KVM_CAP_IRQ_ROUTING
+	case KVM_SET_GSI_ROUTING: {
+		struct kvm_irq_routing routing;
+		struct kvm_irq_routing __user *urouting;
+		struct kvm_irq_routing_entry *entries;
+
+		r = -EFAULT;
+		if (copy_from_user(&routing, argp, sizeof(routing)))
+			goto out;
+		r = -EINVAL;
+		if (routing.nr >= KVM_MAX_IRQ_ROUTES)
+			goto out;
+		if (routing.flags)
+			goto out;
+		r = -ENOMEM;
+		entries = vmalloc(routing.nr * sizeof(*entries));
+		if (!entries)
+			goto out;
+		r = -EFAULT;
+		urouting = argp;
+		if (copy_from_user(entries, urouting->entries,
+				   routing.nr * sizeof(*entries)))
+			goto out_free_irq_routing;
+		r = kvm_set_irq_routing(kvm, entries, routing.nr,
+					routing.flags);
+	out_free_irq_routing:
+		vfree(entries);
+		break;
+	}
 #endif
 	default:
 		r = kvm_arch_vm_ioctl(filp, ioctl, arg);
@@ -2012,6 +2044,10 @@ static long kvm_dev_ioctl_check_extension_generic(long arg)
 	case KVM_CAP_USER_MEMORY:
 	case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
 		return 1;
+#ifdef CONFIG_HAVE_KVM_IRQCHIP
+	case KVM_CAP_IRQ_ROUTING:
+		return 1;
+#endif
 	default:
 		break;
 	}
-- 
cgit v1.2.3


From 91b2ae773d3b168b763237fac33f75b13d891f20 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Mon, 19 Jan 2009 14:57:52 +0200
Subject: KVM: Avoid using CONFIG_ in userspace visible headers

Kconfig symbols are not available in userspace, and are not stripped by
headers-install.  Avoid their use by adding #defines in <asm/kvm.h> to
suit each architecture.

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/kvm.h | 1 +
 include/linux/kvm.h        | 6 +++---
 2 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h
index 54bcf2281526..dc3f6cf11704 100644
--- a/arch/x86/include/asm/kvm.h
+++ b/arch/x86/include/asm/kvm.h
@@ -15,6 +15,7 @@
 #define __KVM_HAVE_DEVICE_ASSIGNMENT
 #define __KVM_HAVE_MSI
 #define __KVM_HAVE_USER_NMI
+#define __KVM_HAVE_GUEST_DEBUG
 
 /* Architectural interrupt line count. */
 #define KVM_NR_INTERRUPTS 256
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 7a5d73a8d4fa..869462ca7625 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -393,13 +393,13 @@ struct kvm_trace_rec {
 #ifdef __KVM_HAVE_USER_NMI
 #define KVM_CAP_USER_NMI 22
 #endif
-#if defined(CONFIG_X86)
+#ifdef __KVM_HAVE_GUEST_DEBUG
 #define KVM_CAP_SET_GUEST_DEBUG 23
 #endif
-#if defined(CONFIG_X86)
+#ifdef __KVM_HAVE_PIT
 #define KVM_CAP_REINJECT_CONTROL 24
 #endif
-#if defined(CONFIG_X86)||defined(CONFIG_IA64)
+#ifdef __KVM_HAVE_IOAPIC
 #define KVM_CAP_IRQ_ROUTING 25
 #endif
 
-- 
cgit v1.2.3


From 44882eed2ebe7f75f8cdae5671ab1d6e0fa40dbc Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Tue, 27 Jan 2009 15:12:38 -0200
Subject: KVM: make irq ack notifications aware of routing table

IRQ ack notifications assume an identity mapping between pin->gsi,
which might not be the case with, for example, HPET.

Translate before acking.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Acked-by: Gleb Natapov <gleb@redhat.com>
---
 arch/x86/kvm/i8259.c     |  5 +++--
 arch/x86/kvm/irq.h       |  2 ++
 include/linux/kvm_host.h |  2 +-
 virt/kvm/ioapic.c        | 10 +++++-----
 virt/kvm/irq_comm.c      | 13 ++++++++++---
 5 files changed, 21 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index 179dcb0103fd..93160375c841 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -49,7 +49,8 @@ static void pic_unlock(struct kvm_pic *s)
 	spin_unlock(&s->lock);
 
 	while (acks) {
-		kvm_notify_acked_irq(kvm, __ffs(acks));
+		kvm_notify_acked_irq(kvm, SELECT_PIC(__ffs(acks)),
+				     __ffs(acks));
 		acks &= acks - 1;
 	}
 
@@ -232,7 +233,7 @@ int kvm_pic_read_irq(struct kvm *kvm)
 	}
 	pic_update_irq(s);
 	pic_unlock(s);
-	kvm_notify_acked_irq(kvm, irq);
+	kvm_notify_acked_irq(kvm, SELECT_PIC(irq), irq);
 
 	return intno;
 }
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index 82579ee538d0..9f593188129e 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -32,6 +32,8 @@
 #include "lapic.h"
 
 #define PIC_NUM_PINS 16
+#define SELECT_PIC(irq) \
+	((irq) < 8 ? KVM_IRQCHIP_PIC_MASTER : KVM_IRQCHIP_PIC_SLAVE)
 
 struct kvm;
 struct kvm_vcpu;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ce285e01bd57..c03a0a9a8584 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -352,7 +352,7 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
 void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask);
 
 void kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level);
-void kvm_notify_acked_irq(struct kvm *kvm, unsigned gsi);
+void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin);
 void kvm_register_irq_ack_notifier(struct kvm *kvm,
 				   struct kvm_irq_ack_notifier *kian);
 void kvm_unregister_irq_ack_notifier(struct kvm_irq_ack_notifier *kian);
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index e85a2bcd2db1..1c986ac59ad6 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -293,20 +293,20 @@ void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level)
 	}
 }
 
-static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int gsi,
+static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int pin,
 				    int trigger_mode)
 {
 	union ioapic_redir_entry *ent;
 
-	ent = &ioapic->redirtbl[gsi];
+	ent = &ioapic->redirtbl[pin];
 
-	kvm_notify_acked_irq(ioapic->kvm, gsi);
+	kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, pin);
 
 	if (trigger_mode == IOAPIC_LEVEL_TRIG) {
 		ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);
 		ent->fields.remote_irr = 0;
-		if (!ent->fields.mask && (ioapic->irr & (1 << gsi)))
-			ioapic_service(ioapic, gsi);
+		if (!ent->fields.mask && (ioapic->irr & (1 << pin)))
+			ioapic_service(ioapic, pin);
 	}
 }
 
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index a797fa5e6420..7aa5086c8622 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -59,10 +59,19 @@ void kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
 			e->set(e, kvm, !!(*irq_state));
 }
 
-void kvm_notify_acked_irq(struct kvm *kvm, unsigned gsi)
+void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
 {
+	struct kvm_kernel_irq_routing_entry *e;
 	struct kvm_irq_ack_notifier *kian;
 	struct hlist_node *n;
+	unsigned gsi = pin;
+
+	list_for_each_entry(e, &kvm->irq_routing, link)
+		if (e->irqchip.irqchip == irqchip &&
+		    e->irqchip.pin == pin) {
+			gsi = e->gsi;
+			break;
+		}
 
 	hlist_for_each_entry(kian, n, &kvm->arch.irq_ack_notifier_list, link)
 		if (kian->gsi == gsi)
@@ -237,8 +246,6 @@ out:
 #define ROUTING_ENTRY1(irq) IOAPIC_ROUTING_ENTRY(irq)
 
 #ifdef CONFIG_X86
-#define SELECT_PIC(irq) \
-	((irq) < 8 ? KVM_IRQCHIP_PIC_MASTER : KVM_IRQCHIP_PIC_SLAVE)
 #  define PIC_ROUTING_ENTRY(irq) \
 	{ .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP,	\
 	  .u.irqchip.irqchip = SELECT_PIC(irq), .u.irqchip.pin = (irq) % 8 }
-- 
cgit v1.2.3


From 79950e1073150909619b7c0f9a39a2fea83a42d8 Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng@linux.intel.com>
Date: Tue, 10 Feb 2009 13:57:06 +0800
Subject: KVM: Use irq routing API for MSI

Merge MSI userspace interface with IRQ routing table. Notice the API have been
changed, and using IRQ routing table would be the only interface kvm-userspace
supported.

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 include/linux/kvm.h      |  9 ++++++
 include/linux/kvm_host.h |  2 +-
 virt/kvm/irq_comm.c      | 78 +++++++++++++++++++++++++++++++++++++++++++-----
 virt/kvm/kvm_main.c      | 70 ++++---------------------------------------
 4 files changed, 86 insertions(+), 73 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 869462ca7625..2163b3dd36e7 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -410,8 +410,16 @@ struct kvm_irq_routing_irqchip {
 	__u32 pin;
 };
 
+struct kvm_irq_routing_msi {
+	__u32 address_lo;
+	__u32 address_hi;
+	__u32 data;
+	__u32 pad;
+};
+
 /* gsi routing entry types */
 #define KVM_IRQ_ROUTING_IRQCHIP 1
+#define KVM_IRQ_ROUTING_MSI 2
 
 struct kvm_irq_routing_entry {
 	__u32 gsi;
@@ -420,6 +428,7 @@ struct kvm_irq_routing_entry {
 	__u32 pad;
 	union {
 		struct kvm_irq_routing_irqchip irqchip;
+		struct kvm_irq_routing_msi msi;
 		__u32 pad[8];
 	} u;
 };
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index c03a0a9a8584..339eda3ca6ee 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -116,6 +116,7 @@ struct kvm_kernel_irq_routing_entry {
 			unsigned irqchip;
 			unsigned pin;
 		} irqchip;
+		struct msi_msg msi;
 	};
 	struct list_head link;
 };
@@ -327,7 +328,6 @@ struct kvm_assigned_dev_kernel {
 	int host_irq;
 	bool host_irq_disabled;
 	int guest_irq;
-	struct msi_msg guest_msi;
 #define KVM_ASSIGNED_DEV_GUEST_INTX	(1 << 0)
 #define KVM_ASSIGNED_DEV_GUEST_MSI	(1 << 1)
 #define KVM_ASSIGNED_DEV_HOST_INTX	(1 << 8)
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 7aa5086c8622..6bc7439eff6e 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -20,6 +20,11 @@
  */
 
 #include <linux/kvm_host.h>
+
+#ifdef CONFIG_X86
+#include <asm/msidef.h>
+#endif
+
 #include "irq.h"
 
 #include "ioapic.h"
@@ -38,17 +43,70 @@ static void kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e,
 	kvm_ioapic_set_irq(kvm->arch.vioapic, e->irqchip.pin, level);
 }
 
+static void kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
+			struct kvm *kvm, int level)
+{
+	int vcpu_id;
+	struct kvm_vcpu *vcpu;
+	struct kvm_ioapic *ioapic = ioapic_irqchip(kvm);
+	int dest_id = (e->msi.address_lo & MSI_ADDR_DEST_ID_MASK)
+			>> MSI_ADDR_DEST_ID_SHIFT;
+	int vector = (e->msi.data & MSI_DATA_VECTOR_MASK)
+			>> MSI_DATA_VECTOR_SHIFT;
+	int dest_mode = test_bit(MSI_ADDR_DEST_MODE_SHIFT,
+				(unsigned long *)&e->msi.address_lo);
+	int trig_mode = test_bit(MSI_DATA_TRIGGER_SHIFT,
+				(unsigned long *)&e->msi.data);
+	int delivery_mode = test_bit(MSI_DATA_DELIVERY_MODE_SHIFT,
+				(unsigned long *)&e->msi.data);
+	u32 deliver_bitmask;
+
+	BUG_ON(!ioapic);
+
+	deliver_bitmask = kvm_ioapic_get_delivery_bitmask(ioapic,
+				dest_id, dest_mode);
+	/* IOAPIC delivery mode value is the same as MSI here */
+	switch (delivery_mode) {
+	case IOAPIC_LOWEST_PRIORITY:
+		vcpu = kvm_get_lowest_prio_vcpu(ioapic->kvm, vector,
+				deliver_bitmask);
+		if (vcpu != NULL)
+			kvm_apic_set_irq(vcpu, vector, trig_mode);
+		else
+			printk(KERN_INFO "kvm: null lowest priority vcpu!\n");
+		break;
+	case IOAPIC_FIXED:
+		for (vcpu_id = 0; deliver_bitmask != 0; vcpu_id++) {
+			if (!(deliver_bitmask & (1 << vcpu_id)))
+				continue;
+			deliver_bitmask &= ~(1 << vcpu_id);
+			vcpu = ioapic->kvm->vcpus[vcpu_id];
+			if (vcpu)
+				kvm_apic_set_irq(vcpu, vector, trig_mode);
+		}
+		break;
+	default:
+		break;
+	}
+}
+
 /* This should be called with the kvm->lock mutex held */
 void kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
 {
 	struct kvm_kernel_irq_routing_entry *e;
-	unsigned long *irq_state = (unsigned long *)&kvm->arch.irq_states[irq];
+	unsigned long *irq_state, sig_level;
+
+	if (irq < KVM_IOAPIC_NUM_PINS) {
+		irq_state = (unsigned long *)&kvm->arch.irq_states[irq];
 
-	/* Logical OR for level trig interrupt */
-	if (level)
-		set_bit(irq_source_id, irq_state);
-	else
-		clear_bit(irq_source_id, irq_state);
+		/* Logical OR for level trig interrupt */
+		if (level)
+			set_bit(irq_source_id, irq_state);
+		else
+			clear_bit(irq_source_id, irq_state);
+		sig_level = !!(*irq_state);
+	} else /* Deal with MSI/MSI-X */
+		sig_level = 1;
 
 	/* Not possible to detect if the guest uses the PIC or the
 	 * IOAPIC.  So set the bit in both. The guest will ignore
@@ -56,7 +114,7 @@ void kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
 	 */
 	list_for_each_entry(e, &kvm->irq_routing, link)
 		if (e->gsi == irq)
-			e->set(e, kvm, !!(*irq_state));
+			e->set(e, kvm, sig_level);
 }
 
 void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
@@ -186,6 +244,12 @@ int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e,
 		e->irqchip.irqchip = ue->u.irqchip.irqchip;
 		e->irqchip.pin = ue->u.irqchip.pin + delta;
 		break;
+	case KVM_IRQ_ROUTING_MSI:
+		e->set = kvm_set_msi;
+		e->msi.address_lo = ue->u.msi.address_lo;
+		e->msi.address_hi = ue->u.msi.address_hi;
+		e->msi.data = ue->u.msi.data;
+		break;
 	default:
 		goto out;
 	}
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index c65484b471c6..266bdaf0ce44 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -47,10 +47,6 @@
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 
-#ifdef CONFIG_X86
-#include <asm/msidef.h>
-#endif
-
 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
 #include "coalesced_mmio.h"
 #endif
@@ -85,57 +81,6 @@ static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
 static bool kvm_rebooting;
 
 #ifdef KVM_CAP_DEVICE_ASSIGNMENT
-
-#ifdef CONFIG_X86
-static void assigned_device_msi_dispatch(struct kvm_assigned_dev_kernel *dev)
-{
-	int vcpu_id;
-	struct kvm_vcpu *vcpu;
-	struct kvm_ioapic *ioapic = ioapic_irqchip(dev->kvm);
-	int dest_id = (dev->guest_msi.address_lo & MSI_ADDR_DEST_ID_MASK)
-			>> MSI_ADDR_DEST_ID_SHIFT;
-	int vector = (dev->guest_msi.data & MSI_DATA_VECTOR_MASK)
-			>> MSI_DATA_VECTOR_SHIFT;
-	int dest_mode = test_bit(MSI_ADDR_DEST_MODE_SHIFT,
-				(unsigned long *)&dev->guest_msi.address_lo);
-	int trig_mode = test_bit(MSI_DATA_TRIGGER_SHIFT,
-				(unsigned long *)&dev->guest_msi.data);
-	int delivery_mode = test_bit(MSI_DATA_DELIVERY_MODE_SHIFT,
-				(unsigned long *)&dev->guest_msi.data);
-	u32 deliver_bitmask;
-
-	BUG_ON(!ioapic);
-
-	deliver_bitmask = kvm_ioapic_get_delivery_bitmask(ioapic,
-				dest_id, dest_mode);
-	/* IOAPIC delivery mode value is the same as MSI here */
-	switch (delivery_mode) {
-	case IOAPIC_LOWEST_PRIORITY:
-		vcpu = kvm_get_lowest_prio_vcpu(ioapic->kvm, vector,
-				deliver_bitmask);
-		if (vcpu != NULL)
-			kvm_apic_set_irq(vcpu, vector, trig_mode);
-		else
-			printk(KERN_INFO "kvm: null lowest priority vcpu!\n");
-		break;
-	case IOAPIC_FIXED:
-		for (vcpu_id = 0; deliver_bitmask != 0; vcpu_id++) {
-			if (!(deliver_bitmask & (1 << vcpu_id)))
-				continue;
-			deliver_bitmask &= ~(1 << vcpu_id);
-			vcpu = ioapic->kvm->vcpus[vcpu_id];
-			if (vcpu)
-				kvm_apic_set_irq(vcpu, vector, trig_mode);
-		}
-		break;
-	default:
-		printk(KERN_INFO "kvm: unsupported MSI delivery mode\n");
-	}
-}
-#else
-static void assigned_device_msi_dispatch(struct kvm_assigned_dev_kernel *dev) {}
-#endif
-
 static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head,
 						      int assigned_dev_id)
 {
@@ -162,13 +107,10 @@ static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work)
 	 * finer-grained lock, update this
 	 */
 	mutex_lock(&assigned_dev->kvm->lock);
-	if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_GUEST_INTX)
-		kvm_set_irq(assigned_dev->kvm,
-			    assigned_dev->irq_source_id,
-			    assigned_dev->guest_irq, 1);
-	else if (assigned_dev->irq_requested_type &
-				KVM_ASSIGNED_DEV_GUEST_MSI) {
-		assigned_device_msi_dispatch(assigned_dev);
+	kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
+		    assigned_dev->guest_irq, 1);
+
+	if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_GUEST_MSI) {
 		enable_irq(assigned_dev->host_irq);
 		assigned_dev->host_irq_disabled = false;
 	}
@@ -331,17 +273,15 @@ static int assigned_device_update_msi(struct kvm *kvm,
 {
 	int r;
 
+	adev->guest_irq = airq->guest_irq;
 	if (airq->flags & KVM_DEV_IRQ_ASSIGN_ENABLE_MSI) {
 		/* x86 don't care upper address of guest msi message addr */
 		adev->irq_requested_type |= KVM_ASSIGNED_DEV_GUEST_MSI;
 		adev->irq_requested_type &= ~KVM_ASSIGNED_DEV_GUEST_INTX;
-		adev->guest_msi.address_lo = airq->guest_msi.addr_lo;
-		adev->guest_msi.data = airq->guest_msi.data;
 		adev->ack_notifier.gsi = -1;
 	} else if (msi2intx) {
 		adev->irq_requested_type |= KVM_ASSIGNED_DEV_GUEST_INTX;
 		adev->irq_requested_type &= ~KVM_ASSIGNED_DEV_GUEST_MSI;
-		adev->guest_irq = airq->guest_irq;
 		adev->ack_notifier.gsi = airq->guest_irq;
 	} else {
 		/*
-- 
cgit v1.2.3


From c807660407a695f390034e402edfe544a1d2e40c Mon Sep 17 00:00:00 2001
From: Gerd Hoffmann <kraxel@redhat.com>
Date: Wed, 4 Feb 2009 17:52:04 +0100
Subject: KVM: Fix kvmclock on !constant_tsc boxes

kvmclock currently falls apart on machines without constant tsc.
This patch fixes it.  Changes:

  * keep tsc frequency in a per-cpu variable.
  * handle kvmclock update using a new request flag, thus checking
    whenever we need an update each time we enter guest context.
  * use a cpufreq notifier to track frequency changes and force
    kvmclock updates.
  * send ipis to kick cpu out of guest context if needed to make
    sure the guest doesn't see stale values.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/x86.c       | 103 ++++++++++++++++++++++++++++++++++++++++++-----
 include/linux/kvm_host.h |   1 +
 2 files changed, 95 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 8f83590b47dd..05d7be89b5eb 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -36,6 +36,7 @@
 #include <linux/highmem.h>
 #include <linux/iommu.h>
 #include <linux/intel-iommu.h>
+#include <linux/cpufreq.h>
 
 #include <asm/uaccess.h>
 #include <asm/msr.h>
@@ -617,6 +618,8 @@ static void kvm_set_time_scale(uint32_t tsc_khz, struct pvclock_vcpu_time_info *
 		 hv_clock->tsc_to_system_mul);
 }
 
+static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz);
+
 static void kvm_write_guest_time(struct kvm_vcpu *v)
 {
 	struct timespec ts;
@@ -627,9 +630,9 @@ static void kvm_write_guest_time(struct kvm_vcpu *v)
 	if ((!vcpu->time_page))
 		return;
 
-	if (unlikely(vcpu->hv_clock_tsc_khz != tsc_khz)) {
-		kvm_set_time_scale(tsc_khz, &vcpu->hv_clock);
-		vcpu->hv_clock_tsc_khz = tsc_khz;
+	if (unlikely(vcpu->hv_clock_tsc_khz != __get_cpu_var(cpu_tsc_khz))) {
+		kvm_set_time_scale(__get_cpu_var(cpu_tsc_khz), &vcpu->hv_clock);
+		vcpu->hv_clock_tsc_khz = __get_cpu_var(cpu_tsc_khz);
 	}
 
 	/* Keep irq disabled to prevent changes to the clock */
@@ -660,6 +663,16 @@ static void kvm_write_guest_time(struct kvm_vcpu *v)
 	mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT);
 }
 
+static int kvm_request_guest_time_update(struct kvm_vcpu *v)
+{
+	struct kvm_vcpu_arch *vcpu = &v->arch;
+
+	if (!vcpu->time_page)
+		return 0;
+	set_bit(KVM_REQ_KVMCLOCK_UPDATE, &v->requests);
+	return 1;
+}
+
 static bool msr_mtrr_valid(unsigned msr)
 {
 	switch (msr) {
@@ -790,7 +803,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 			vcpu->arch.time_page = NULL;
 		}
 
-		kvm_write_guest_time(vcpu);
+		kvm_request_guest_time_update(vcpu);
 		break;
 	}
 	default:
@@ -1000,6 +1013,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_MMU_SHADOW_CACHE_CONTROL:
 	case KVM_CAP_SET_TSS_ADDR:
 	case KVM_CAP_EXT_CPUID:
+	case KVM_CAP_CLOCKSOURCE:
 	case KVM_CAP_PIT:
 	case KVM_CAP_NOP_IO_DELAY:
 	case KVM_CAP_MP_STATE:
@@ -1025,9 +1039,6 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_IOMMU:
 		r = iommu_found();
 		break;
-	case KVM_CAP_CLOCKSOURCE:
-		r = boot_cpu_has(X86_FEATURE_CONSTANT_TSC);
-		break;
 	default:
 		r = 0;
 		break;
@@ -1098,7 +1109,7 @@ out:
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
 	kvm_x86_ops->vcpu_load(vcpu, cpu);
-	kvm_write_guest_time(vcpu);
+	kvm_request_guest_time_update(vcpu);
 }
 
 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
@@ -2642,9 +2653,72 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
 }
 EXPORT_SYMBOL_GPL(kvm_emulate_pio_string);
 
+static void bounce_off(void *info)
+{
+	/* nothing */
+}
+
+static unsigned int  ref_freq;
+static unsigned long tsc_khz_ref;
+
+static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
+				     void *data)
+{
+	struct cpufreq_freqs *freq = data;
+	struct kvm *kvm;
+	struct kvm_vcpu *vcpu;
+	int i, send_ipi = 0;
+
+	if (!ref_freq)
+		ref_freq = freq->old;
+
+	if (val == CPUFREQ_PRECHANGE && freq->old > freq->new)
+		return 0;
+	if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new)
+		return 0;
+	per_cpu(cpu_tsc_khz, freq->cpu) = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new);
+
+	spin_lock(&kvm_lock);
+	list_for_each_entry(kvm, &vm_list, vm_list) {
+		for (i = 0; i < KVM_MAX_VCPUS; ++i) {
+			vcpu = kvm->vcpus[i];
+			if (!vcpu)
+				continue;
+			if (vcpu->cpu != freq->cpu)
+				continue;
+			if (!kvm_request_guest_time_update(vcpu))
+				continue;
+			if (vcpu->cpu != smp_processor_id())
+				send_ipi++;
+		}
+	}
+	spin_unlock(&kvm_lock);
+
+	if (freq->old < freq->new && send_ipi) {
+		/*
+		 * We upscale the frequency.  Must make the guest
+		 * doesn't see old kvmclock values while running with
+		 * the new frequency, otherwise we risk the guest sees
+		 * time go backwards.
+		 *
+		 * In case we update the frequency for another cpu
+		 * (which might be in guest context) send an interrupt
+		 * to kick the cpu out of guest context.  Next time
+		 * guest context is entered kvmclock will be updated,
+		 * so the guest will not see stale values.
+		 */
+		smp_call_function_single(freq->cpu, bounce_off, NULL, 1);
+	}
+	return 0;
+}
+
+static struct notifier_block kvmclock_cpufreq_notifier_block = {
+        .notifier_call  = kvmclock_cpufreq_notifier
+};
+
 int kvm_arch_init(void *opaque)
 {
-	int r;
+	int r, cpu;
 	struct kvm_x86_ops *ops = (struct kvm_x86_ops *)opaque;
 
 	if (kvm_x86_ops) {
@@ -2675,6 +2749,15 @@ int kvm_arch_init(void *opaque)
 	kvm_mmu_set_base_ptes(PT_PRESENT_MASK);
 	kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
 			PT_DIRTY_MASK, PT64_NX_MASK, 0, 0);
+
+	for_each_possible_cpu(cpu)
+		per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
+	if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
+		tsc_khz_ref = tsc_khz;
+		cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
+					  CPUFREQ_TRANSITION_NOTIFIER);
+	}
+
 	return 0;
 
 out:
@@ -3010,6 +3093,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	if (vcpu->requests) {
 		if (test_and_clear_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests))
 			__kvm_migrate_timers(vcpu);
+		if (test_and_clear_bit(KVM_REQ_KVMCLOCK_UPDATE, &vcpu->requests))
+			kvm_write_guest_time(vcpu);
 		if (test_and_clear_bit(KVM_REQ_MMU_SYNC, &vcpu->requests))
 			kvm_mmu_sync_roots(vcpu);
 		if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 339eda3ca6ee..18b4df8264cf 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -37,6 +37,7 @@
 #define KVM_REQ_PENDING_TIMER      5
 #define KVM_REQ_UNHALT             6
 #define KVM_REQ_MMU_SYNC           7
+#define KVM_REQ_KVMCLOCK_UPDATE    8
 
 #define KVM_USERSPACE_IRQ_SOURCE_ID	0
 
-- 
cgit v1.2.3


From 4925663a079c77d95d8685228ad6675fc5639c8e Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Wed, 4 Feb 2009 17:28:14 +0200
Subject: KVM: Report IRQ injection status to userspace.

IRQ injection status is either -1 (if there was no CPU found
that should except the interrupt because IRQ was masked or
ioapic was misconfigured or ...) or >= 0 in that case the
number indicates to how many CPUs interrupt was injected.
If the value is 0 it means that the interrupt was coalesced
and probably should be reinjected.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/ia64/kvm/kvm-ia64.c        | 12 ++++++++++--
 arch/x86/include/asm/kvm_host.h |  2 +-
 arch/x86/kvm/i8259.c            | 18 +++++++++++++-----
 arch/x86/kvm/x86.c              | 13 +++++++++++--
 include/linux/kvm.h             |  7 ++++++-
 include/linux/kvm_host.h        |  4 ++--
 virt/kvm/ioapic.c               | 23 ++++++++++++++++-------
 virt/kvm/ioapic.h               |  2 +-
 virt/kvm/irq_comm.c             | 41 ++++++++++++++++++++++++++++-------------
 9 files changed, 88 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 9c77e3939e97..076b00d1dbff 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -182,7 +182,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	switch (ext) {
 	case KVM_CAP_IRQCHIP:
 	case KVM_CAP_MP_STATE:
-
+	case KVM_CAP_IRQ_INJECT_STATUS:
 		r = 1;
 		break;
 	case KVM_CAP_COALESCED_MMIO:
@@ -927,6 +927,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
 			goto out;
 		}
 		break;
+	case KVM_IRQ_LINE_STATUS:
 	case KVM_IRQ_LINE: {
 		struct kvm_irq_level irq_event;
 
@@ -934,10 +935,17 @@ long kvm_arch_vm_ioctl(struct file *filp,
 		if (copy_from_user(&irq_event, argp, sizeof irq_event))
 			goto out;
 		if (irqchip_in_kernel(kvm)) {
+			__s32 status;
 			mutex_lock(&kvm->lock);
-			kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
+			status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
 				    irq_event.irq, irq_event.level);
 			mutex_unlock(&kvm->lock);
+			if (ioctl == KVM_IRQ_LINE_STATUS) {
+				irq_event.status = status;
+				if (copy_to_user(argp, &irq_event,
+							sizeof irq_event))
+					goto out;
+			}
 			r = 0;
 		}
 		break;
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 55fd4c5fd388..f0faf58044ff 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -616,7 +616,7 @@ void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code);
 void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long cr2,
 			   u32 error_code);
 
-void kvm_pic_set_irq(void *opaque, int irq, int level);
+int kvm_pic_set_irq(void *opaque, int irq, int level);
 
 void kvm_inject_nmi(struct kvm_vcpu *vcpu);
 
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index 93160375c841..b4e662e94ddc 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -77,12 +77,13 @@ void kvm_pic_clear_isr_ack(struct kvm *kvm)
 /*
  * set irq level. If an edge is detected, then the IRR is set to 1
  */
-static inline void pic_set_irq1(struct kvm_kpic_state *s, int irq, int level)
+static inline int pic_set_irq1(struct kvm_kpic_state *s, int irq, int level)
 {
-	int mask;
+	int mask, ret = 1;
 	mask = 1 << irq;
 	if (s->elcr & mask)	/* level triggered */
 		if (level) {
+			ret = !(s->irr & mask);
 			s->irr |= mask;
 			s->last_irr |= mask;
 		} else {
@@ -91,11 +92,15 @@ static inline void pic_set_irq1(struct kvm_kpic_state *s, int irq, int level)
 		}
 	else	/* edge triggered */
 		if (level) {
-			if ((s->last_irr & mask) == 0)
+			if ((s->last_irr & mask) == 0) {
+				ret = !(s->irr & mask);
 				s->irr |= mask;
+			}
 			s->last_irr |= mask;
 		} else
 			s->last_irr &= ~mask;
+
+	return (s->imr & mask) ? -1 : ret;
 }
 
 /*
@@ -172,16 +177,19 @@ void kvm_pic_update_irq(struct kvm_pic *s)
 	pic_unlock(s);
 }
 
-void kvm_pic_set_irq(void *opaque, int irq, int level)
+int kvm_pic_set_irq(void *opaque, int irq, int level)
 {
 	struct kvm_pic *s = opaque;
+	int ret = -1;
 
 	pic_lock(s);
 	if (irq >= 0 && irq < PIC_NUM_PINS) {
-		pic_set_irq1(&s->pics[irq >> 3], irq & 7, level);
+		ret = pic_set_irq1(&s->pics[irq >> 3], irq & 7, level);
 		pic_update_irq(s);
 	}
 	pic_unlock(s);
+
+	return ret;
 }
 
 /*
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 05d7be89b5eb..e4db5be7c953 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1019,6 +1019,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_MP_STATE:
 	case KVM_CAP_SYNC_MMU:
 	case KVM_CAP_REINJECT_CONTROL:
+	case KVM_CAP_IRQ_INJECT_STATUS:
 		r = 1;
 		break;
 	case KVM_CAP_COALESCED_MMIO:
@@ -1877,6 +1878,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
 	create_pit_unlock:
 		mutex_unlock(&kvm->lock);
 		break;
+	case KVM_IRQ_LINE_STATUS:
 	case KVM_IRQ_LINE: {
 		struct kvm_irq_level irq_event;
 
@@ -1884,10 +1886,17 @@ long kvm_arch_vm_ioctl(struct file *filp,
 		if (copy_from_user(&irq_event, argp, sizeof irq_event))
 			goto out;
 		if (irqchip_in_kernel(kvm)) {
+			__s32 status;
 			mutex_lock(&kvm->lock);
-			kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
-				    irq_event.irq, irq_event.level);
+			status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
+					irq_event.irq, irq_event.level);
 			mutex_unlock(&kvm->lock);
+			if (ioctl == KVM_IRQ_LINE_STATUS) {
+				irq_event.status = status;
+				if (copy_to_user(argp, &irq_event,
+							sizeof irq_event))
+					goto out;
+			}
 			r = 0;
 		}
 		break;
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 2163b3dd36e7..dd48225d1824 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -48,7 +48,10 @@ struct kvm_irq_level {
 	 * For IA-64 (APIC model) IOAPIC0: irq 0-23; IOAPIC1: irq 24-47..
 	 * For X86 (standard AT mode) PIC0/1: irq 0-15. IOAPIC0: 0-23..
 	 */
-	__u32 irq;
+	union {
+		__u32 irq;
+		__s32 status;
+	};
 	__u32 level;
 };
 
@@ -402,6 +405,7 @@ struct kvm_trace_rec {
 #ifdef __KVM_HAVE_IOAPIC
 #define KVM_CAP_IRQ_ROUTING 25
 #endif
+#define KVM_CAP_IRQ_INJECT_STATUS 26
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -465,6 +469,7 @@ struct kvm_irq_routing {
 #define KVM_CREATE_PIT		  _IO(KVMIO,  0x64)
 #define KVM_GET_PIT		  _IOWR(KVMIO, 0x65, struct kvm_pit_state)
 #define KVM_SET_PIT		  _IOR(KVMIO,  0x66, struct kvm_pit_state)
+#define KVM_IRQ_LINE_STATUS	  _IOWR(KVMIO, 0x67, struct kvm_irq_level)
 #define KVM_REGISTER_COALESCED_MMIO \
 			_IOW(KVMIO,  0x67, struct kvm_coalesced_mmio_zone)
 #define KVM_UNREGISTER_COALESCED_MMIO \
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 18b4df8264cf..894a56e365e8 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -110,7 +110,7 @@ struct kvm_memory_slot {
 
 struct kvm_kernel_irq_routing_entry {
 	u32 gsi;
-	void (*set)(struct kvm_kernel_irq_routing_entry *e,
+	int (*set)(struct kvm_kernel_irq_routing_entry *e,
 		    struct kvm *kvm, int level);
 	union {
 		struct {
@@ -352,7 +352,7 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
 				      struct kvm_irq_mask_notifier *kimn);
 void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask);
 
-void kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level);
+int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level);
 void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin);
 void kvm_register_irq_ack_notifier(struct kvm *kvm,
 				   struct kvm_irq_ack_notifier *kian);
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index 1c986ac59ad6..c3b99def9cbc 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -83,19 +83,22 @@ static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic,
 	return result;
 }
 
-static void ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx)
+static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx)
 {
 	union ioapic_redir_entry *pent;
+	int injected = -1;
 
 	pent = &ioapic->redirtbl[idx];
 
 	if (!pent->fields.mask) {
-		int injected = ioapic_deliver(ioapic, idx);
+		injected = ioapic_deliver(ioapic, idx);
 		if (injected && pent->fields.trig_mode == IOAPIC_LEVEL_TRIG)
 			pent->fields.remote_irr = 1;
 	}
 	if (!pent->fields.trig_mode)
 		ioapic->irr &= ~(1 << idx);
+
+	return injected;
 }
 
 static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
@@ -207,7 +210,7 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
 	u8 trig_mode = ioapic->redirtbl[irq].fields.trig_mode;
 	u32 deliver_bitmask;
 	struct kvm_vcpu *vcpu;
-	int vcpu_id, r = 0;
+	int vcpu_id, r = -1;
 
 	ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x "
 		     "vector=%x trig_mode=%x\n",
@@ -247,7 +250,9 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
 			deliver_bitmask &= ~(1 << vcpu_id);
 			vcpu = ioapic->kvm->vcpus[vcpu_id];
 			if (vcpu) {
-				r = ioapic_inj_irq(ioapic, vcpu, vector,
+				if (r < 0)
+					r = 0;
+				r += ioapic_inj_irq(ioapic, vcpu, vector,
 					       trig_mode, delivery_mode);
 			}
 		}
@@ -258,8 +263,10 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
 				continue;
 			deliver_bitmask &= ~(1 << vcpu_id);
 			vcpu = ioapic->kvm->vcpus[vcpu_id];
-			if (vcpu)
+			if (vcpu) {
 				ioapic_inj_nmi(vcpu);
+				r = 1;
+			}
 			else
 				ioapic_debug("NMI to vcpu %d failed\n",
 						vcpu->vcpu_id);
@@ -273,11 +280,12 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
 	return r;
 }
 
-void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level)
+int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level)
 {
 	u32 old_irr = ioapic->irr;
 	u32 mask = 1 << irq;
 	union ioapic_redir_entry entry;
+	int ret = 1;
 
 	if (irq >= 0 && irq < IOAPIC_NUM_PINS) {
 		entry = ioapic->redirtbl[irq];
@@ -288,9 +296,10 @@ void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level)
 			ioapic->irr |= mask;
 			if ((!entry.fields.trig_mode && old_irr != ioapic->irr)
 			    || !entry.fields.remote_irr)
-				ioapic_service(ioapic, irq);
+				ret = ioapic_service(ioapic, irq);
 		}
 	}
+	return ret;
 }
 
 static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int pin,
diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h
index 49c9581d2586..a34bd5e6436b 100644
--- a/virt/kvm/ioapic.h
+++ b/virt/kvm/ioapic.h
@@ -83,7 +83,7 @@ struct kvm_vcpu *kvm_get_lowest_prio_vcpu(struct kvm *kvm, u8 vector,
 				       unsigned long bitmap);
 void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode);
 int kvm_ioapic_init(struct kvm *kvm);
-void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level);
+int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level);
 void kvm_ioapic_reset(struct kvm_ioapic *ioapic);
 u32 kvm_ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest,
 				u8 dest_mode);
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 6bc7439eff6e..be8aba791554 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -29,22 +29,24 @@
 
 #include "ioapic.h"
 
-static void kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e,
-			    struct kvm *kvm, int level)
+static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e,
+			   struct kvm *kvm, int level)
 {
 #ifdef CONFIG_X86
-	kvm_pic_set_irq(pic_irqchip(kvm), e->irqchip.pin, level);
+	return kvm_pic_set_irq(pic_irqchip(kvm), e->irqchip.pin, level);
+#else
+	return -1;
 #endif
 }
 
-static void kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e,
-			       struct kvm *kvm, int level)
+static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e,
+			      struct kvm *kvm, int level)
 {
-	kvm_ioapic_set_irq(kvm->arch.vioapic, e->irqchip.pin, level);
+	return kvm_ioapic_set_irq(kvm->arch.vioapic, e->irqchip.pin, level);
 }
 
-static void kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
-			struct kvm *kvm, int level)
+static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
+		       struct kvm *kvm, int level)
 {
 	int vcpu_id;
 	struct kvm_vcpu *vcpu;
@@ -88,13 +90,20 @@ static void kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
 	default:
 		break;
 	}
+	return 1;
 }
 
-/* This should be called with the kvm->lock mutex held */
-void kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
+/* This should be called with the kvm->lock mutex held
+ * Return value:
+ *  < 0   Interrupt was ignored (masked or not delivered for other reasons)
+ *  = 0   Interrupt was coalesced (previous irq is still pending)
+ *  > 0   Number of CPUs interrupt was delivered to
+ */
+int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
 {
 	struct kvm_kernel_irq_routing_entry *e;
 	unsigned long *irq_state, sig_level;
+	int ret = -1;
 
 	if (irq < KVM_IOAPIC_NUM_PINS) {
 		irq_state = (unsigned long *)&kvm->arch.irq_states[irq];
@@ -113,8 +122,14 @@ void kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
 	 * writes to the unused one.
 	 */
 	list_for_each_entry(e, &kvm->irq_routing, link)
-		if (e->gsi == irq)
-			e->set(e, kvm, sig_level);
+		if (e->gsi == irq) {
+			int r = e->set(e, kvm, sig_level);
+			if (r < 0)
+				continue;
+
+			ret = r + ((ret < 0) ? 0 : ret);
+		}
+	return ret;
 }
 
 void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
@@ -232,7 +247,7 @@ int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e,
 			e->set = kvm_set_pic_irq;
 			break;
 		case KVM_IRQCHIP_PIC_SLAVE:
-				e->set = kvm_set_pic_irq;
+			e->set = kvm_set_pic_irq;
 			delta = 8;
 			break;
 		case KVM_IRQCHIP_IOAPIC:
-- 
cgit v1.2.3


From 2df8a40bccf5999261d0d3a82eac5a77678e61bd Mon Sep 17 00:00:00 2001
From: Weidong Han <weidong.han@intel.com>
Date: Fri, 13 Feb 2009 10:50:56 +0800
Subject: KVM: define KVM_CAP_DEVICE_DEASSIGNMENT

define KVM_CAP_DEVICE_DEASSIGNMENT and KVM_DEASSIGN_PCI_DEVICE
for device deassignment.

the ioctl has been already implemented in the
commit: 0a920356748df4fb06e86c21c23d2ed6d31d37ad

Acked-by: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Weidong Han <weidong.han@intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 include/linux/kvm.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index dd48225d1824..0d94b274c3ae 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -406,6 +406,9 @@ struct kvm_trace_rec {
 #define KVM_CAP_IRQ_ROUTING 25
 #endif
 #define KVM_CAP_IRQ_INJECT_STATUS 26
+#ifdef __KVM_HAVE_DEVICE_ASSIGNMENT
+#define KVM_CAP_DEVICE_DEASSIGNMENT 27
+#endif
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -480,6 +483,8 @@ struct kvm_irq_routing {
 #define KVM_ASSIGN_IRQ _IOR(KVMIO, 0x70, \
 			    struct kvm_assigned_irq)
 #define KVM_REINJECT_CONTROL      _IO(KVMIO, 0x71)
+#define KVM_DEASSIGN_PCI_DEVICE _IOR(KVMIO, 0x72, \
+				     struct kvm_assigned_pci_dev)
 
 /*
  * ioctls for vcpu fds
-- 
cgit v1.2.3


From bc7a8660df62da3fb5cad025322eda75fbee8731 Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng@linux.intel.com>
Date: Tue, 17 Mar 2009 19:27:19 +0800
Subject: KVM: Correct deassign device ioctl to IOW

It's IOR by mistake, so fix it before release.

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 include/linux/kvm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 0d94b274c3ae..311a073afe8a 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -483,7 +483,7 @@ struct kvm_irq_routing {
 #define KVM_ASSIGN_IRQ _IOR(KVMIO, 0x70, \
 			    struct kvm_assigned_irq)
 #define KVM_REINJECT_CONTROL      _IO(KVMIO, 0x71)
-#define KVM_DEASSIGN_PCI_DEVICE _IOR(KVMIO, 0x72, \
+#define KVM_DEASSIGN_PCI_DEVICE _IOW(KVMIO, 0x72, \
 				     struct kvm_assigned_pci_dev)
 
 /*
-- 
cgit v1.2.3


From 32ca163c9cdb33151d79e95a7cf244f62b5d4418 Mon Sep 17 00:00:00 2001
From: Petros Koutoupis <pkoutoupis@hydrasystemsllc.com>
Date: Tue, 10 Mar 2009 08:25:54 +0100
Subject: block: genhd.h comment needs updating

The include/linux/genhd.h file, on line 338-352 declares some function
prototypes in which the comment on line 338 states that the definition of
these prototypes are to be found at drivers/block/genhd.c. The problem is
that genhd.c has been relocated to block/genhd.c. See attached patch to
correct this minor cosmetic typo.

Signed-off-by: Petros Koutoupis <pkoutoupis@hydrasystemsllc.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 include/linux/genhd.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 16948eaecae3..56946b21ab78 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -336,7 +336,7 @@ static inline void part_dec_in_flight(struct hd_struct *part)
 /* drivers/block/ll_rw_blk.c */
 extern void part_round_stats(int cpu, struct hd_struct *part);
 
-/* drivers/block/genhd.c */
+/* block/genhd.c */
 extern int get_blkdev_list(char *, int);
 extern void add_disk(struct gendisk *disk);
 extern void del_gendisk(struct gendisk *gp);
-- 
cgit v1.2.3


From 6d2a78e783416ba99e36beb1d4395b785b34e867 Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Tue, 10 Mar 2009 08:27:39 +0100
Subject: block: add private bio_set for bio integrity allocations

The integrity bio allocation needs its own bio_set to avoid violating
the mempool allocation rules and risking deadlocks.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 fs/bio-integrity.c  | 85 ++++++++++++++++-------------------------------------
 fs/bio.c            |  9 ++----
 include/linux/bio.h | 18 +++---------
 3 files changed, 32 insertions(+), 80 deletions(-)

(limited to 'include/linux')

diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
index fe2b1aa2464e..31c46a241bac 100644
--- a/fs/bio-integrity.c
+++ b/fs/bio-integrity.c
@@ -26,23 +26,23 @@
 #include <linux/workqueue.h>
 
 static struct kmem_cache *bio_integrity_slab __read_mostly;
+static mempool_t *bio_integrity_pool;
+static struct bio_set *integrity_bio_set;
 static struct workqueue_struct *kintegrityd_wq;
 
 /**
- * bio_integrity_alloc_bioset - Allocate integrity payload and attach it to bio
+ * bio_integrity_alloc - Allocate integrity payload and attach it to bio
  * @bio:	bio to attach integrity metadata to
  * @gfp_mask:	Memory allocation mask
  * @nr_vecs:	Number of integrity metadata scatter-gather elements
- * @bs:		bio_set to allocate from
  *
  * Description: This function prepares a bio for attaching integrity
  * metadata.  nr_vecs specifies the maximum number of pages containing
  * integrity metadata that can be attached.
  */
-struct bio_integrity_payload *bio_integrity_alloc_bioset(struct bio *bio,
-							 gfp_t gfp_mask,
-							 unsigned int nr_vecs,
-							 struct bio_set *bs)
+struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
+						  gfp_t gfp_mask,
+						  unsigned int nr_vecs)
 {
 	struct bio_integrity_payload *bip;
 	struct bio_vec *iv;
@@ -50,7 +50,7 @@ struct bio_integrity_payload *bio_integrity_alloc_bioset(struct bio *bio,
 
 	BUG_ON(bio == NULL);
 
-	bip = mempool_alloc(bs->bio_integrity_pool, gfp_mask);
+	bip = mempool_alloc(bio_integrity_pool, gfp_mask);
 	if (unlikely(bip == NULL)) {
 		printk(KERN_ERR "%s: could not alloc bip\n", __func__);
 		return NULL;
@@ -58,10 +58,10 @@ struct bio_integrity_payload *bio_integrity_alloc_bioset(struct bio *bio,
 
 	memset(bip, 0, sizeof(*bip));
 
-	iv = bvec_alloc_bs(gfp_mask, nr_vecs, &idx, bs);
+	iv = bvec_alloc_bs(gfp_mask, nr_vecs, &idx, integrity_bio_set);
 	if (unlikely(iv == NULL)) {
 		printk(KERN_ERR "%s: could not alloc bip_vec\n", __func__);
-		mempool_free(bip, bs->bio_integrity_pool);
+		mempool_free(bip, bio_integrity_pool);
 		return NULL;
 	}
 
@@ -72,35 +72,16 @@ struct bio_integrity_payload *bio_integrity_alloc_bioset(struct bio *bio,
 
 	return bip;
 }
-EXPORT_SYMBOL(bio_integrity_alloc_bioset);
-
-/**
- * bio_integrity_alloc - Allocate integrity payload and attach it to bio
- * @bio:	bio to attach integrity metadata to
- * @gfp_mask:	Memory allocation mask
- * @nr_vecs:	Number of integrity metadata scatter-gather elements
- *
- * Description: This function prepares a bio for attaching integrity
- * metadata.  nr_vecs specifies the maximum number of pages containing
- * integrity metadata that can be attached.
- */
-struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
-						  gfp_t gfp_mask,
-						  unsigned int nr_vecs)
-{
-	return bio_integrity_alloc_bioset(bio, gfp_mask, nr_vecs, fs_bio_set);
-}
 EXPORT_SYMBOL(bio_integrity_alloc);
 
 /**
  * bio_integrity_free - Free bio integrity payload
  * @bio:	bio containing bip to be freed
- * @bs:		bio_set this bio was allocated from
  *
  * Description: Used to free the integrity portion of a bio. Usually
  * called from bio_free().
  */
-void bio_integrity_free(struct bio *bio, struct bio_set *bs)
+void bio_integrity_free(struct bio *bio)
 {
 	struct bio_integrity_payload *bip = bio->bi_integrity;
 
@@ -111,8 +92,8 @@ void bio_integrity_free(struct bio *bio, struct bio_set *bs)
 	    && bip->bip_buf != NULL)
 		kfree(bip->bip_buf);
 
-	bvec_free_bs(bs, bip->bip_vec, bip->bip_pool);
-	mempool_free(bip, bs->bio_integrity_pool);
+	bvec_free_bs(integrity_bio_set, bip->bip_vec, bip->bip_pool);
+	mempool_free(bip, bio_integrity_pool);
 
 	bio->bi_integrity = NULL;
 }
@@ -686,19 +667,17 @@ EXPORT_SYMBOL(bio_integrity_split);
  * @bio:	New bio
  * @bio_src:	Original bio
  * @gfp_mask:	Memory allocation mask
- * @bs:		bio_set to allocate bip from
  *
  * Description:	Called to allocate a bip when cloning a bio
  */
-int bio_integrity_clone(struct bio *bio, struct bio *bio_src,
-			gfp_t gfp_mask, struct bio_set *bs)
+int bio_integrity_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp_mask)
 {
 	struct bio_integrity_payload *bip_src = bio_src->bi_integrity;
 	struct bio_integrity_payload *bip;
 
 	BUG_ON(bip_src == NULL);
 
-	bip = bio_integrity_alloc_bioset(bio, gfp_mask, bip_src->bip_vcnt, bs);
+	bip = bio_integrity_alloc(bio, gfp_mask, bip_src->bip_vcnt);
 
 	if (bip == NULL)
 		return -EIO;
@@ -714,37 +693,25 @@ int bio_integrity_clone(struct bio *bio, struct bio *bio_src,
 }
 EXPORT_SYMBOL(bio_integrity_clone);
 
-int bioset_integrity_create(struct bio_set *bs, int pool_size)
+static int __init bio_integrity_init(void)
 {
-	bs->bio_integrity_pool = mempool_create_slab_pool(pool_size,
-							  bio_integrity_slab);
-	if (!bs->bio_integrity_pool)
-		return -1;
-
-	return 0;
-}
-EXPORT_SYMBOL(bioset_integrity_create);
+	kintegrityd_wq = create_workqueue("kintegrityd");
 
-void bioset_integrity_free(struct bio_set *bs)
-{
-	if (bs->bio_integrity_pool)
-		mempool_destroy(bs->bio_integrity_pool);
-}
-EXPORT_SYMBOL(bioset_integrity_free);
+	if (!kintegrityd_wq)
+		panic("Failed to create kintegrityd\n");
 
-void __init bio_integrity_init_slab(void)
-{
 	bio_integrity_slab = KMEM_CACHE(bio_integrity_payload,
 					SLAB_HWCACHE_ALIGN|SLAB_PANIC);
-}
 
-static int __init integrity_init(void)
-{
-	kintegrityd_wq = create_workqueue("kintegrityd");
+	bio_integrity_pool = mempool_create_slab_pool(BIO_POOL_SIZE,
+						      bio_integrity_slab);
+	if (!bio_integrity_pool)
+		panic("bio_integrity: can't allocate bip pool\n");
 
-	if (!kintegrityd_wq)
-		panic("Failed to create kintegrityd\n");
+	integrity_bio_set = bioset_create(BIO_POOL_SIZE, 0);
+	if (!integrity_bio_set)
+		panic("bio_integrity: can't allocate bio_set\n");
 
 	return 0;
 }
-subsys_initcall(integrity_init);
+subsys_initcall(bio_integrity_init);
diff --git a/fs/bio.c b/fs/bio.c
index 9cc1430b4495..a040cde7f6fd 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -248,7 +248,7 @@ void bio_free(struct bio *bio, struct bio_set *bs)
 		bvec_free_bs(bs, bio->bi_io_vec, BIO_POOL_IDX(bio));
 
 	if (bio_integrity(bio))
-		bio_integrity_free(bio, bs);
+		bio_integrity_free(bio);
 
 	/*
 	 * If we have front padding, adjust the bio pointer before freeing
@@ -466,7 +466,7 @@ struct bio *bio_clone(struct bio *bio, gfp_t gfp_mask)
 	if (bio_integrity(bio)) {
 		int ret;
 
-		ret = bio_integrity_clone(b, bio, gfp_mask, fs_bio_set);
+		ret = bio_integrity_clone(b, bio, gfp_mask);
 
 		if (ret < 0) {
 			bio_put(b);
@@ -1529,7 +1529,6 @@ void bioset_free(struct bio_set *bs)
 	if (bs->bio_pool)
 		mempool_destroy(bs->bio_pool);
 
-	bioset_integrity_free(bs);
 	biovec_free_pools(bs);
 	bio_put_slab(bs);
 
@@ -1570,9 +1569,6 @@ struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad)
 	if (!bs->bio_pool)
 		goto bad;
 
-	if (bioset_integrity_create(bs, pool_size))
-		goto bad;
-
 	if (!biovec_create_pools(bs, pool_size))
 		return bs;
 
@@ -1610,7 +1606,6 @@ static int __init init_bio(void)
 	if (!bio_slabs)
 		panic("bio: can't allocate bios\n");
 
-	bio_integrity_init_slab();
 	biovec_init_slabs();
 
 	fs_bio_set = bioset_create(BIO_POOL_SIZE, 0);
diff --git a/include/linux/bio.h b/include/linux/bio.h
index d8bd43bfdcf5..b05b1d4d17d2 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -426,9 +426,6 @@ struct bio_set {
 	unsigned int front_pad;
 
 	mempool_t *bio_pool;
-#if defined(CONFIG_BLK_DEV_INTEGRITY)
-	mempool_t *bio_integrity_pool;
-#endif
 	mempool_t *bvec_pool;
 };
 
@@ -519,9 +516,8 @@ static inline int bio_has_data(struct bio *bio)
 
 #define bio_integrity(bio) (bio->bi_integrity != NULL)
 
-extern struct bio_integrity_payload *bio_integrity_alloc_bioset(struct bio *, gfp_t, unsigned int, struct bio_set *);
 extern struct bio_integrity_payload *bio_integrity_alloc(struct bio *, gfp_t, unsigned int);
-extern void bio_integrity_free(struct bio *, struct bio_set *);
+extern void bio_integrity_free(struct bio *);
 extern int bio_integrity_add_page(struct bio *, struct page *, unsigned int, unsigned int);
 extern int bio_integrity_enabled(struct bio *bio);
 extern int bio_integrity_set_tag(struct bio *, void *, unsigned int);
@@ -531,27 +527,21 @@ extern void bio_integrity_endio(struct bio *, int);
 extern void bio_integrity_advance(struct bio *, unsigned int);
 extern void bio_integrity_trim(struct bio *, unsigned int, unsigned int);
 extern void bio_integrity_split(struct bio *, struct bio_pair *, int);
-extern int bio_integrity_clone(struct bio *, struct bio *, gfp_t, struct bio_set *);
-extern int bioset_integrity_create(struct bio_set *, int);
-extern void bioset_integrity_free(struct bio_set *);
-extern void bio_integrity_init_slab(void);
+extern int bio_integrity_clone(struct bio *, struct bio *, gfp_t);
 
 #else /* CONFIG_BLK_DEV_INTEGRITY */
 
 #define bio_integrity(a)		(0)
-#define bioset_integrity_create(a, b)	(0)
 #define bio_integrity_prep(a)		(0)
 #define bio_integrity_enabled(a)	(0)
-#define bio_integrity_clone(a, b, c,d )	(0)
-#define bioset_integrity_free(a)	do { } while (0)
-#define bio_integrity_free(a, b)	do { } while (0)
+#define bio_integrity_clone(a, b, c)	(0)
+#define bio_integrity_free(a)		do { } while (0)
 #define bio_integrity_endio(a, b)	do { } while (0)
 #define bio_integrity_advance(a, b)	do { } while (0)
 #define bio_integrity_trim(a, b, c)	do { } while (0)
 #define bio_integrity_split(a, b, c)	do { } while (0)
 #define bio_integrity_set_tag(a, b, c)	do { } while (0)
 #define bio_integrity_get_tag(a, b, c)	do { } while (0)
-#define bio_integrity_init_slab(a)	do { } while (0)
 
 #endif /* CONFIG_BLK_DEV_INTEGRITY */
 
-- 
cgit v1.2.3


From d399228646e26db315d6233bed65ec9d08c57f57 Mon Sep 17 00:00:00 2001
From: Petros Koutoupis <pkoutoupis@hydrasystemsllc.com>
Date: Wed, 11 Mar 2009 10:49:35 +0100
Subject: block: genhd.h cleanup patch

In include/linux/genhd.h: Line 335 has a comment that needs to be updated from: /* drivers/block/ll_rw_blk.c */ to /* block/blk-core.c */. Also as of kernel 2.6.16, the function definition for get_blkdev_list was removed from block/genhd.c but the function declaration is still present on line 339. This patch addresses both those fixes, by updating the comment and removing the declaration.

Signed-off-by: Petros Koutoupis <pkoutoupis@hydrasystemsllc.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 include/linux/genhd.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 56946b21ab78..634c53028fb8 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -333,11 +333,10 @@ static inline void part_dec_in_flight(struct hd_struct *part)
 		part_to_disk(part)->part0.in_flight--;
 }
 
-/* drivers/block/ll_rw_blk.c */
+/* block/blk-core.c */
 extern void part_round_stats(int cpu, struct hd_struct *part);
 
 /* block/genhd.c */
-extern int get_blkdev_list(char *, int);
 extern void add_disk(struct gendisk *disk);
 extern void del_gendisk(struct gendisk *gp);
 extern void unlink_gendisk(struct gendisk *gp);
-- 
cgit v1.2.3


From 05378940caf979a8655c18b18a17213dcfa52412 Mon Sep 17 00:00:00 2001
From: Boaz Harrosh <bharrosh@panasas.com>
Date: Tue, 24 Mar 2009 12:23:40 +0100
Subject: bsg: add support for tail queuing

Currently inherited from sg.c bsg will submit asynchronous request
 at the head-of-the-queue, (using "at_head" set in the call to
 blk_execute_rq_nowait()). This is bad in situation where the queues
 are full, requests will execute out of order, and can cause
 starvation of the first submitted requests.

The sg_io_v4->flags member is used and a bit is allocated to denote the
Q_AT_TAIL. Zero is to queue at_head as before, to be compatible with old
code at the write/read path. SG_IO code path behavior was changed so to
be the same as write/read behavior. SG_IO was very rarely used and breaking
compatibility with it is OK at this stage.

sg_io_hdr at sg.h also has a flags member and uses 3 bits from the first
nibble and one bit from the last nibble. Even though none of these bits
are supported by bsg, The second nibble is allocated for use by bsg. Just
in case.

Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
CC: Douglas Gilbert <dgilbert@interlog.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/bsg.c         | 9 +++++++--
 include/linux/bsg.h | 8 ++++++++
 2 files changed, 15 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/block/bsg.c b/block/bsg.c
index 0ce8806dd0c1..0f63b91d0af6 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -353,6 +353,8 @@ static void bsg_rq_end_io(struct request *rq, int uptodate)
 static void bsg_add_command(struct bsg_device *bd, struct request_queue *q,
 			    struct bsg_command *bc, struct request *rq)
 {
+	int at_head = (0 == (bc->hdr.flags & BSG_FLAG_Q_AT_TAIL));
+
 	/*
 	 * add bc command to busy queue and submit rq for io
 	 */
@@ -368,7 +370,7 @@ static void bsg_add_command(struct bsg_device *bd, struct request_queue *q,
 	dprintk("%s: queueing rq %p, bc %p\n", bd->name, rq, bc);
 
 	rq->end_io_data = bc;
-	blk_execute_rq_nowait(q, NULL, rq, 1, bsg_rq_end_io);
+	blk_execute_rq_nowait(q, NULL, rq, at_head, bsg_rq_end_io);
 }
 
 static struct bsg_command *bsg_next_done_cmd(struct bsg_device *bd)
@@ -924,6 +926,7 @@ static long bsg_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 		struct request *rq;
 		struct bio *bio, *bidi_bio = NULL;
 		struct sg_io_v4 hdr;
+		int at_head;
 		u8 sense[SCSI_SENSE_BUFFERSIZE];
 
 		if (copy_from_user(&hdr, uarg, sizeof(hdr)))
@@ -936,7 +939,9 @@ static long bsg_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 		bio = rq->bio;
 		if (rq->next_rq)
 			bidi_bio = rq->next_rq->bio;
-		blk_execute_rq(bd->queue, NULL, rq, 0);
+
+		at_head = (0 == (hdr.flags & BSG_FLAG_Q_AT_TAIL));
+		blk_execute_rq(bd->queue, NULL, rq, at_head);
 		ret = blk_complete_sgv4_hdr_rq(rq, &hdr, bio, bidi_bio);
 
 		if (copy_to_user(uarg, &hdr, sizeof(hdr)))
diff --git a/include/linux/bsg.h b/include/linux/bsg.h
index cf0303a60611..3f0c64ace424 100644
--- a/include/linux/bsg.h
+++ b/include/linux/bsg.h
@@ -7,6 +7,14 @@
 #define BSG_SUB_PROTOCOL_SCSI_TMF	1
 #define BSG_SUB_PROTOCOL_SCSI_TRANSPORT	2
 
+/*
+ * For flags member below
+ * sg.h sg_io_hdr also has bits defined for it's flags member. However
+ * none of these bits are implemented/used by bsg. The bits below are
+ * allocated to not conflict with sg.h ones anyway.
+ */
+#define BSG_FLAG_Q_AT_TAIL 0x10 /* default, == 0 at this bit, is Q_AT_HEAD */
+
 struct sg_io_v4 {
 	__s32 guard;		/* [i] 'Q' to differentiate from v3 */
 	__u32 protocol;		/* [i] 0 -> SCSI , .... */
-- 
cgit v1.2.3


From bf8e3355ec8f4e472f9841e94203cd759b45226e Mon Sep 17 00:00:00 2001
From: Stefan Richter <stefanr@s5r6.in-berlin.de>
Date: Fri, 5 Dec 2008 22:43:41 +0100
Subject: firewire: cdev: documentation fixlet

Reported-by: Jay Fenlason <fenlason@redhat.com>
Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
---
 include/linux/firewire-cdev.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/firewire-cdev.h b/include/linux/firewire-cdev.h
index 4d078e99c017..899ef279f5be 100644
--- a/include/linux/firewire-cdev.h
+++ b/include/linux/firewire-cdev.h
@@ -229,7 +229,7 @@ struct fw_cdev_get_info {
  * Send a request to the device.  This ioctl implements all outgoing requests.
  * Both quadlet and block request specify the payload as a pointer to the data
  * in the @data field.  Once the transaction completes, the kernel writes an
- * &fw_cdev_event_request event back.  The @closure field is passed back to
+ * &fw_cdev_event_response event back.  The @closure field is passed back to
  * user space in the response event.
  */
 struct fw_cdev_send_request {
-- 
cgit v1.2.3


From 632321ecd99bf85c982a75f8329b4ecbb95b3a8f Mon Sep 17 00:00:00 2001
From: Stefan Richter <stefanr@s5r6.in-berlin.de>
Date: Fri, 2 Jan 2009 12:47:13 +0100
Subject: firewire: cdev: fix documentation of FW_CDEV_IOC_GET_INFO

The FW_CDEV_IOC_GET_INFO ioctl looks at client->device->config_rom, not
at the local node's config ROM.

We could fix the implementation or the documentation.  I believe the way
how it is currently implemented is more useful than the way how it is
currently documented.  In fact, libdc1394 uses the ABI already as
implemented, not as documented.  Hence let's change the documentation.

Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
---
 include/linux/firewire-cdev.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/firewire-cdev.h b/include/linux/firewire-cdev.h
index 899ef279f5be..86c8ff5326f9 100644
--- a/include/linux/firewire-cdev.h
+++ b/include/linux/firewire-cdev.h
@@ -201,7 +201,7 @@ union fw_cdev_event {
  *		case, @rom_length is updated with the actual length of the
  *		configuration ROM.
  * @rom:	If non-zero, address of a buffer to be filled by a copy of the
- *		local node's configuration ROM
+ *		device's configuration ROM
  * @bus_reset:	If non-zero, address of a buffer to be filled by a
  *		&struct fw_cdev_event_bus_reset with the current state
  *		of the bus.  This does not cause a bus reset to happen.
-- 
cgit v1.2.3


From b1bda4cdc2037447bd66753bf5ccab66d91b0b59 Mon Sep 17 00:00:00 2001
From: "Jay Fenlason, Stefan Richter" <stefanr@s5r6.in-berlin.de>
Date: Sun, 4 Jan 2009 16:23:29 +0100
Subject: firewire: cdev: add ioctls for isochronous resource management

Based on
    Date: Tue, 18 Nov 2008 11:41:27 -0500
    From: Jay Fenlason <fenlason@redhat.com>
    Subject: [Patch V4] Add ISO resource management support
with several changes to the ABI and implementation.  Only the part of
the ABI which enables auto-reallocation and auto-deallocation is
included here.

This implements ioctls for kernel-assisted allocation of isochronous
channels and isochronous bandwidth.  The benefits are:
  - The client does not have to have write access to the /dev/fw* device
    corresponding to the IRM.
  - The client does not have to perform reallocation after bus resets.
  - Channel and bandwidth are deallocated by the kernel if the file is
    closed before the client deallocated the resources.  Thus resources
    are released even if the client crashes.

It is anticipated that future in-kernel code (firewire-core IRM code;
the firewire port of firedtv), will use the fw-iso.c portions of this
code too.

Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
Tested-by: David Moore <dcm@acm.org>
---
 drivers/firewire/fw-cdev.c        | 215 +++++++++++++++++++++++++++++++++++++-
 drivers/firewire/fw-iso.c         | 176 +++++++++++++++++++++++++++++--
 drivers/firewire/fw-transaction.h |   4 +
 include/linux/firewire-cdev.h     | 100 +++++++++++++++---
 4 files changed, 475 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/firewire/fw-cdev.c b/drivers/firewire/fw-cdev.c
index 4c33b51b735a..a227853aa1e2 100644
--- a/drivers/firewire/fw-cdev.c
+++ b/drivers/firewire/fw-cdev.c
@@ -24,6 +24,7 @@
 #include <linux/errno.h>
 #include <linux/firewire-cdev.h>
 #include <linux/idr.h>
+#include <linux/jiffies.h>
 #include <linux/kernel.h>
 #include <linux/kref.h>
 #include <linux/mm.h>
@@ -35,6 +36,7 @@
 #include <linux/time.h>
 #include <linux/vmalloc.h>
 #include <linux/wait.h>
+#include <linux/workqueue.h>
 
 #include <asm/system.h>
 #include <asm/uaccess.h>
@@ -114,6 +116,21 @@ struct descriptor_resource {
 	u32 data[0];
 };
 
+struct iso_resource {
+	struct client_resource resource;
+	struct client *client;
+	/* Schedule work and access todo only with client->lock held. */
+	struct delayed_work work;
+	enum {ISO_RES_ALLOC, ISO_RES_REALLOC, ISO_RES_DEALLOC,} todo;
+	int generation;
+	u64 channels;
+	s32 bandwidth;
+	struct iso_resource_event *e_alloc, *e_dealloc;
+};
+
+static void schedule_iso_resource(struct iso_resource *);
+static void release_iso_resource(struct client *, struct client_resource *);
+
 /*
  * dequeue_event() just kfree()'s the event, so the event has to be
  * the first field in a struct XYZ_event.
@@ -145,6 +162,11 @@ struct iso_interrupt_event {
 	struct fw_cdev_event_iso_interrupt interrupt;
 };
 
+struct iso_resource_event {
+	struct event event;
+	struct fw_cdev_event_iso_resource resource;
+};
+
 static inline void __user *u64_to_uptr(__u64 value)
 {
 	return (void __user *)(unsigned long)value;
@@ -290,6 +312,16 @@ static void for_each_client(struct fw_device *device,
 	mutex_unlock(&device->client_list_mutex);
 }
 
+static int schedule_reallocations(int id, void *p, void *data)
+{
+	struct client_resource *r = p;
+
+	if (r->release == release_iso_resource)
+		schedule_iso_resource(container_of(r,
+					struct iso_resource, resource));
+	return 0;
+}
+
 static void queue_bus_reset_event(struct client *client)
 {
 	struct bus_reset_event *e;
@@ -304,6 +336,10 @@ static void queue_bus_reset_event(struct client *client)
 
 	queue_event(client, &e->event,
 		    &e->reset, sizeof(e->reset), NULL, 0);
+
+	spin_lock_irq(&client->lock);
+	idr_for_each(&client->resource_idr, schedule_reallocations, client);
+	spin_unlock_irq(&client->lock);
 }
 
 void fw_device_cdev_update(struct fw_device *device)
@@ -376,8 +412,12 @@ static int add_client_resource(struct client *client,
 	else
 		ret = idr_get_new(&client->resource_idr, resource,
 				  &resource->handle);
-	if (ret >= 0)
+	if (ret >= 0) {
 		client_get(client);
+		if (resource->release == release_iso_resource)
+			schedule_iso_resource(container_of(resource,
+						struct iso_resource, resource));
+	}
 	spin_unlock_irqrestore(&client->lock, flags);
 
 	if (ret == -EAGAIN)
@@ -970,6 +1010,177 @@ static int ioctl_get_cycle_timer(struct client *client, void *buffer)
 	return 0;
 }
 
+static void iso_resource_work(struct work_struct *work)
+{
+	struct iso_resource_event *e;
+	struct iso_resource *r =
+			container_of(work, struct iso_resource, work.work);
+	struct client *client = r->client;
+	int generation, channel, bandwidth, todo;
+	bool skip, free, success;
+
+	spin_lock_irq(&client->lock);
+	generation = client->device->generation;
+	todo = r->todo;
+	/* Allow 1000ms grace period for other reallocations. */
+	if (todo == ISO_RES_ALLOC &&
+	    time_is_after_jiffies(client->device->card->reset_jiffies + HZ)) {
+		if (schedule_delayed_work(&r->work, DIV_ROUND_UP(HZ, 3)))
+			client_get(client);
+		skip = true;
+	} else {
+		/* We could be called twice within the same generation. */
+		skip = todo == ISO_RES_REALLOC &&
+		       r->generation == generation;
+	}
+	free = todo == ISO_RES_DEALLOC;
+	r->generation = generation;
+	spin_unlock_irq(&client->lock);
+
+	if (skip)
+		goto out;
+
+	bandwidth = r->bandwidth;
+
+	fw_iso_resource_manage(client->device->card, generation,
+			r->channels, &channel, &bandwidth,
+			todo == ISO_RES_ALLOC || todo == ISO_RES_REALLOC);
+	/*
+	 * Is this generation outdated already?  As long as this resource sticks
+	 * in the idr, it will be scheduled again for a newer generation or at
+	 * shutdown.
+	 */
+	if (channel == -EAGAIN &&
+	    (todo == ISO_RES_ALLOC || todo == ISO_RES_REALLOC))
+		goto out;
+
+	success = channel >= 0 || bandwidth > 0;
+
+	spin_lock_irq(&client->lock);
+	/*
+	 * Transit from allocation to reallocation, except if the client
+	 * requested deallocation in the meantime.
+	 */
+	if (r->todo == ISO_RES_ALLOC)
+		r->todo = ISO_RES_REALLOC;
+	/*
+	 * Allocation or reallocation failure?  Pull this resource out of the
+	 * idr and prepare for deletion, unless the client is shutting down.
+	 */
+	if (r->todo == ISO_RES_REALLOC && !success &&
+	    !client->in_shutdown &&
+	    idr_find(&client->resource_idr, r->resource.handle)) {
+		idr_remove(&client->resource_idr, r->resource.handle);
+		client_put(client);
+		free = true;
+	}
+	spin_unlock_irq(&client->lock);
+
+	if (todo == ISO_RES_ALLOC && channel >= 0)
+		r->channels = 1ULL << (63 - channel);
+
+	if (todo == ISO_RES_REALLOC && success)
+		goto out;
+
+	if (todo == ISO_RES_ALLOC) {
+		e = r->e_alloc;
+		r->e_alloc = NULL;
+	} else {
+		e = r->e_dealloc;
+		r->e_dealloc = NULL;
+	}
+	e->resource.handle	= r->resource.handle;
+	e->resource.channel	= channel;
+	e->resource.bandwidth	= bandwidth;
+
+	queue_event(client, &e->event,
+		    &e->resource, sizeof(e->resource), NULL, 0);
+
+	if (free) {
+		cancel_delayed_work(&r->work);
+		kfree(r->e_alloc);
+		kfree(r->e_dealloc);
+		kfree(r);
+	}
+ out:
+	client_put(client);
+}
+
+static void schedule_iso_resource(struct iso_resource *r)
+{
+	if (schedule_delayed_work(&r->work, 0))
+		client_get(r->client);
+}
+
+static void release_iso_resource(struct client *client,
+				 struct client_resource *resource)
+{
+	struct iso_resource *r =
+		container_of(resource, struct iso_resource, resource);
+
+	spin_lock_irq(&client->lock);
+	r->todo = ISO_RES_DEALLOC;
+	schedule_iso_resource(r);
+	spin_unlock_irq(&client->lock);
+}
+
+static int ioctl_allocate_iso_resource(struct client *client, void *buffer)
+{
+	struct fw_cdev_allocate_iso_resource *request = buffer;
+	struct iso_resource_event *e1, *e2;
+	struct iso_resource *r;
+	int ret;
+
+	if ((request->channels == 0 && request->bandwidth == 0) ||
+	    request->bandwidth > BANDWIDTH_AVAILABLE_INITIAL ||
+	    request->bandwidth < 0)
+		return -EINVAL;
+
+	r  = kmalloc(sizeof(*r), GFP_KERNEL);
+	e1 = kmalloc(sizeof(*e1), GFP_KERNEL);
+	e2 = kmalloc(sizeof(*e2), GFP_KERNEL);
+	if (r == NULL || e1 == NULL || e2 == NULL) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	INIT_DELAYED_WORK(&r->work, iso_resource_work);
+	r->client	= client;
+	r->todo		= ISO_RES_ALLOC;
+	r->generation	= -1;
+	r->channels	= request->channels;
+	r->bandwidth	= request->bandwidth;
+	r->e_alloc	= e1;
+	r->e_dealloc	= e2;
+
+	e1->resource.closure	= request->closure;
+	e1->resource.type	= FW_CDEV_EVENT_ISO_RESOURCE_ALLOCATED;
+	e2->resource.closure	= request->closure;
+	e2->resource.type	= FW_CDEV_EVENT_ISO_RESOURCE_DEALLOCATED;
+
+	r->resource.release = release_iso_resource;
+	ret = add_client_resource(client, &r->resource, GFP_KERNEL);
+	if (ret < 0)
+		goto fail;
+	request->handle = r->resource.handle;
+
+	return 0;
+ fail:
+	kfree(r);
+	kfree(e1);
+	kfree(e2);
+
+	return ret;
+}
+
+static int ioctl_deallocate_iso_resource(struct client *client, void *buffer)
+{
+	struct fw_cdev_deallocate *request = buffer;
+
+	return release_client_resource(client, request->handle,
+				       release_iso_resource, NULL);
+}
+
 static int (* const ioctl_handlers[])(struct client *client, void *buffer) = {
 	ioctl_get_info,
 	ioctl_send_request,
@@ -984,6 +1195,8 @@ static int (* const ioctl_handlers[])(struct client *client, void *buffer) = {
 	ioctl_start_iso,
 	ioctl_stop_iso,
 	ioctl_get_cycle_timer,
+	ioctl_allocate_iso_resource,
+	ioctl_deallocate_iso_resource,
 };
 
 static int dispatch_ioctl(struct client *client,
diff --git a/drivers/firewire/fw-iso.c b/drivers/firewire/fw-iso.c
index 39f3bacee404..a7b57b253b06 100644
--- a/drivers/firewire/fw-iso.c
+++ b/drivers/firewire/fw-iso.c
@@ -1,5 +1,7 @@
 /*
- * Isochronous IO functionality
+ * Isochronous I/O functionality:
+ *   - Isochronous DMA context management
+ *   - Isochronous bus resource management (channels, bandwidth), client side
  *
  * Copyright (C) 2006 Kristian Hoegsberg <krh@bitplanet.net>
  *
@@ -18,15 +20,20 @@
  * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  */
 
-#include <linux/kernel.h>
-#include <linux/module.h>
 #include <linux/dma-mapping.h>
-#include <linux/vmalloc.h>
+#include <linux/errno.h>
+#include <linux/firewire-constants.h>
+#include <linux/kernel.h>
 #include <linux/mm.h>
+#include <linux/spinlock.h>
+#include <linux/vmalloc.h>
 
-#include "fw-transaction.h"
 #include "fw-topology.h"
-#include "fw-device.h"
+#include "fw-transaction.h"
+
+/*
+ * Isochronous DMA context management
+ */
 
 int fw_iso_buffer_init(struct fw_iso_buffer *buffer, struct fw_card *card,
 		       int page_count, enum dma_data_direction direction)
@@ -153,3 +160,160 @@ int fw_iso_context_stop(struct fw_iso_context *ctx)
 {
 	return ctx->card->driver->stop_iso(ctx);
 }
+
+/*
+ * Isochronous bus resource management (channels, bandwidth), client side
+ */
+
+static int manage_bandwidth(struct fw_card *card, int irm_id, int generation,
+			    int bandwidth, bool allocate)
+{
+	__be32 data[2];
+	int try, new, old = allocate ? BANDWIDTH_AVAILABLE_INITIAL : 0;
+
+	/*
+	 * On a 1394a IRM with low contention, try < 1 is enough.
+	 * On a 1394-1995 IRM, we need at least try < 2.
+	 * Let's just do try < 5.
+	 */
+	for (try = 0; try < 5; try++) {
+		new = allocate ? old - bandwidth : old + bandwidth;
+		if (new < 0 || new > BANDWIDTH_AVAILABLE_INITIAL)
+			break;
+
+		data[0] = cpu_to_be32(old);
+		data[1] = cpu_to_be32(new);
+		switch (fw_run_transaction(card, TCODE_LOCK_COMPARE_SWAP,
+				irm_id, generation, SCODE_100,
+				CSR_REGISTER_BASE + CSR_BANDWIDTH_AVAILABLE,
+				data, sizeof(data))) {
+		case RCODE_GENERATION:
+			/* A generation change frees all bandwidth. */
+			return allocate ? -EAGAIN : bandwidth;
+
+		case RCODE_COMPLETE:
+			if (be32_to_cpup(data) == old)
+				return bandwidth;
+
+			old = be32_to_cpup(data);
+			/* Fall through. */
+		}
+	}
+
+	return -EIO;
+}
+
+static int manage_channel(struct fw_card *card, int irm_id, int generation,
+			  __be32 channels_mask, u64 offset, bool allocate)
+{
+	__be32 data[2], c, old = allocate ? cpu_to_be32(~0) : 0;
+	int i, retry = 5;
+
+	for (i = 0; i < 32; i++) {
+		c = cpu_to_be32(1 << (31 - i));
+		if (!(channels_mask & c))
+			continue;
+
+		if (allocate == !(old & c))
+			continue;
+
+		data[0] = old;
+		data[1] = old ^ c;
+		switch (fw_run_transaction(card, TCODE_LOCK_COMPARE_SWAP,
+					   irm_id, generation, SCODE_100,
+					   offset, data, sizeof(data))) {
+		case RCODE_GENERATION:
+			/* A generation change frees all channels. */
+			return allocate ? -EAGAIN : i;
+
+		case RCODE_COMPLETE:
+			if (data[0] == old)
+				return i;
+
+			old = data[0];
+
+			/* Is the IRM 1394a-2000 compliant? */
+			if ((data[0] & c) != (data[1] & c))
+				continue;
+
+			/* 1394-1995 IRM, fall through to retry. */
+		default:
+			if (retry--)
+				i--;
+		}
+	}
+
+	return -EIO;
+}
+
+static void deallocate_channel(struct fw_card *card, int irm_id,
+			       int generation, int channel)
+{
+	__be32 mask;
+	u64 offset;
+
+	mask = channel < 32 ? cpu_to_be32(1 << (31 - channel)) :
+			      cpu_to_be32(1 << (63 - channel));
+	offset = channel < 32 ? CSR_REGISTER_BASE + CSR_CHANNELS_AVAILABLE_HI :
+				CSR_REGISTER_BASE + CSR_CHANNELS_AVAILABLE_LO;
+
+	manage_channel(card, irm_id, generation, mask, offset, false);
+}
+
+/**
+ * fw_iso_resource_manage - Allocate or deallocate a channel and/or bandwidth
+ *
+ * In parameters: card, generation, channels_mask, bandwidth, allocate
+ * Out parameters: channel, bandwidth
+ * This function blocks (sleeps) during communication with the IRM.
+ * Allocates or deallocates at most one channel out of channels_mask.
+ *
+ * Returns channel < 0 if no channel was allocated or deallocated.
+ * Returns bandwidth = 0 if no bandwidth was allocated or deallocated.
+ *
+ * If generation is stale, deallocations succeed but allocations fail with
+ * channel = -EAGAIN.
+ *
+ * If channel (de)allocation fails, bandwidth (de)allocation fails too.
+ * If bandwidth allocation fails, no channel will be allocated either.
+ * If bandwidth deallocation fails, channel deallocation may still have been
+ * successful.
+ */
+void fw_iso_resource_manage(struct fw_card *card, int generation,
+			    u64 channels_mask, int *channel, int *bandwidth,
+			    bool allocate)
+{
+	__be32 channels_hi = cpu_to_be32(channels_mask >> 32);
+	__be32 channels_lo = cpu_to_be32(channels_mask);
+	int irm_id, ret, c = -EINVAL;
+
+	spin_lock_irq(&card->lock);
+	irm_id = card->irm_node->node_id;
+	spin_unlock_irq(&card->lock);
+
+	if (channels_hi)
+		c = manage_channel(card, irm_id, generation, channels_hi,
+		    CSR_REGISTER_BASE + CSR_CHANNELS_AVAILABLE_HI, allocate);
+	if (channels_lo && c < 0) {
+		c = manage_channel(card, irm_id, generation, channels_lo,
+		    CSR_REGISTER_BASE + CSR_CHANNELS_AVAILABLE_LO, allocate);
+		if (c >= 0)
+			c += 32;
+	}
+	*channel = c;
+
+	if (channels_mask != 0 && c < 0)
+		*bandwidth = 0;
+
+	if (*bandwidth == 0)
+		return;
+
+	ret = manage_bandwidth(card, irm_id, generation, *bandwidth, allocate);
+	if (ret < 0)
+		*bandwidth = 0;
+
+	if (ret < 0 && c >= 0 && allocate) {
+		deallocate_channel(card, irm_id, generation, c);
+		*channel = ret;
+	}
+}
diff --git a/drivers/firewire/fw-transaction.h b/drivers/firewire/fw-transaction.h
index 48e88d53998b..212a10293828 100644
--- a/drivers/firewire/fw-transaction.h
+++ b/drivers/firewire/fw-transaction.h
@@ -82,6 +82,7 @@
 #define CSR_SPEED_MAP			0x2000
 #define CSR_SPEED_MAP_END		0x3000
 
+#define BANDWIDTH_AVAILABLE_INITIAL	4915
 #define BROADCAST_CHANNEL_INITIAL	(1 << 31 | 31)
 #define BROADCAST_CHANNEL_VALID		(1 << 30)
 
@@ -343,6 +344,9 @@ int fw_iso_context_start(struct fw_iso_context *ctx,
 int fw_iso_context_stop(struct fw_iso_context *ctx);
 void fw_iso_context_destroy(struct fw_iso_context *ctx);
 
+void fw_iso_resource_manage(struct fw_card *card, int generation,
+		u64 channels_mask, int *channel, int *bandwidth, bool allocate);
+
 struct fw_card_driver {
 	/*
 	 * Enable the given card with the given initial config rom.
diff --git a/include/linux/firewire-cdev.h b/include/linux/firewire-cdev.h
index 86c8ff5326f9..25b96dd0574f 100644
--- a/include/linux/firewire-cdev.h
+++ b/include/linux/firewire-cdev.h
@@ -25,10 +25,12 @@
 #include <linux/types.h>
 #include <linux/firewire-constants.h>
 
-#define FW_CDEV_EVENT_BUS_RESET		0x00
-#define FW_CDEV_EVENT_RESPONSE		0x01
-#define FW_CDEV_EVENT_REQUEST		0x02
-#define FW_CDEV_EVENT_ISO_INTERRUPT	0x03
+#define FW_CDEV_EVENT_BUS_RESET			0x00
+#define FW_CDEV_EVENT_RESPONSE			0x01
+#define FW_CDEV_EVENT_REQUEST			0x02
+#define FW_CDEV_EVENT_ISO_INTERRUPT		0x03
+#define FW_CDEV_EVENT_ISO_RESOURCE_ALLOCATED	0x04
+#define FW_CDEV_EVENT_ISO_RESOURCE_DEALLOCATED	0x05
 
 /**
  * struct fw_cdev_event_common - Common part of all fw_cdev_event_ types
@@ -146,6 +148,37 @@ struct fw_cdev_event_iso_interrupt {
 	__u32 header[0];
 };
 
+/**
+ * struct fw_cdev_event_iso_resource - Iso resources were allocated or freed
+ * @closure:	See &fw_cdev_event_common;
+ *		set by %FW_CDEV_IOC_ALLOCATE_ISO_RESOURCE ioctl
+ * @type:	%FW_CDEV_EVENT_ISO_RESOURCE_ALLOCATED or
+ *		%FW_CDEV_EVENT_ISO_RESOURCE_DEALLOCATED
+ * @handle:	Reference by which an allocated resource can be deallocated
+ * @channel:	Isochronous channel which was (de)allocated, if any
+ * @bandwidth:	Bandwidth allocation units which were (de)allocated, if any
+ * @channels_available:  Last known availability of channels
+ * @bandwidth_available: Last known availability of bandwidth
+ *
+ * An %FW_CDEV_EVENT_ISO_RESOURCE_ALLOCATED event is sent after an isochronous
+ * resource was allocated at the IRM.  The client has to check @channel and
+ * @bandwidth for whether the allocation actually succeeded.
+ *
+ * @channel is <0 if no channel was allocated.
+ * @bandwidth is 0 if no bandwidth was allocated.
+ *
+ * An %FW_CDEV_EVENT_ISO_RESOURCE_DEALLOCATED event is sent after an isochronous
+ * resource was deallocated at the IRM.  It is also sent when automatic
+ * reallocation after a bus reset failed.
+ */
+struct fw_cdev_event_iso_resource {
+	__u64 closure;
+	__u32 type;
+	__u32 handle;
+	__s32 channel;
+	__s32 bandwidth;
+};
+
 /**
  * union fw_cdev_event - Convenience union of fw_cdev_event_ types
  * @common:        Valid for all types
@@ -153,6 +186,9 @@ struct fw_cdev_event_iso_interrupt {
  * @response:      Valid if @common.type == %FW_CDEV_EVENT_RESPONSE
  * @request:       Valid if @common.type == %FW_CDEV_EVENT_REQUEST
  * @iso_interrupt: Valid if @common.type == %FW_CDEV_EVENT_ISO_INTERRUPT
+ * @iso_resource:  Valid if @common.type ==
+ *				%FW_CDEV_EVENT_ISO_RESOURCE_ALLOCATED or
+ *				%FW_CDEV_EVENT_ISO_RESOURCE_DEALLOCATED
  *
  * Convenience union for userspace use.  Events could be read(2) into an
  * appropriately aligned char buffer and then cast to this union for further
@@ -163,13 +199,15 @@ struct fw_cdev_event_iso_interrupt {
  * not fit will be discarded so that the next read(2) will return a new event.
  */
 union fw_cdev_event {
-	struct fw_cdev_event_common common;
-	struct fw_cdev_event_bus_reset bus_reset;
-	struct fw_cdev_event_response response;
-	struct fw_cdev_event_request request;
-	struct fw_cdev_event_iso_interrupt iso_interrupt;
+	struct fw_cdev_event_common		common;
+	struct fw_cdev_event_bus_reset		bus_reset;
+	struct fw_cdev_event_response		response;
+	struct fw_cdev_event_request		request;
+	struct fw_cdev_event_iso_interrupt	iso_interrupt;
+	struct fw_cdev_event_iso_resource	iso_resource;
 };
 
+/* available since kernel version 2.6.22 */
 #define FW_CDEV_IOC_GET_INFO		_IOWR('#', 0x00, struct fw_cdev_get_info)
 #define FW_CDEV_IOC_SEND_REQUEST	_IOW('#', 0x01, struct fw_cdev_send_request)
 #define FW_CDEV_IOC_ALLOCATE		_IOWR('#', 0x02, struct fw_cdev_allocate)
@@ -178,13 +216,18 @@ union fw_cdev_event {
 #define FW_CDEV_IOC_INITIATE_BUS_RESET	_IOW('#', 0x05, struct fw_cdev_initiate_bus_reset)
 #define FW_CDEV_IOC_ADD_DESCRIPTOR	_IOWR('#', 0x06, struct fw_cdev_add_descriptor)
 #define FW_CDEV_IOC_REMOVE_DESCRIPTOR	_IOW('#', 0x07, struct fw_cdev_remove_descriptor)
-
 #define FW_CDEV_IOC_CREATE_ISO_CONTEXT	_IOWR('#', 0x08, struct fw_cdev_create_iso_context)
 #define FW_CDEV_IOC_QUEUE_ISO		_IOWR('#', 0x09, struct fw_cdev_queue_iso)
 #define FW_CDEV_IOC_START_ISO		_IOW('#', 0x0a, struct fw_cdev_start_iso)
 #define FW_CDEV_IOC_STOP_ISO		_IOW('#', 0x0b, struct fw_cdev_stop_iso)
+
+/* available since kernel version 2.6.24 */
 #define FW_CDEV_IOC_GET_CYCLE_TIMER	_IOR('#', 0x0c, struct fw_cdev_get_cycle_timer)
 
+/* available since kernel version 2.6.30 */
+#define FW_CDEV_IOC_ALLOCATE_ISO_RESOURCE   _IOWR('#', 0x0d, struct fw_cdev_allocate_iso_resource)
+#define FW_CDEV_IOC_DEALLOCATE_ISO_RESOURCE _IOW('#', 0x0e, struct fw_cdev_deallocate)
+
 /* FW_CDEV_VERSION History
  *
  * 1	Feb 18, 2007:  Initial version.
@@ -284,9 +327,9 @@ struct fw_cdev_allocate {
 };
 
 /**
- * struct fw_cdev_deallocate - Free an address range allocation
- * @handle:	Handle to the address range, as returned by the kernel when the
- *		range was allocated
+ * struct fw_cdev_deallocate - Free a CSR address range or isochronous resource
+ * @handle:	Handle to the address range or iso resource, as returned by the
+ *		kernel when the range or resource was allocated
  */
 struct fw_cdev_deallocate {
 	__u32 handle;
@@ -479,4 +522,35 @@ struct fw_cdev_get_cycle_timer {
 	__u32 cycle_timer;
 };
 
+/**
+ * struct fw_cdev_allocate_iso_resource - Allocate a channel or bandwidth
+ * @closure:	Passed back to userspace in correponding iso resource events
+ * @channels:	Isochronous channels of which one is to be allocated
+ * @bandwidth:	Isochronous bandwidth units to be allocated
+ * @handle:	Handle to the allocation, written by the kernel
+ *
+ * The %FW_CDEV_IOC_ALLOCATE_ISO_RESOURCE ioctl initiates allocation of an
+ * isochronous channel and/or of isochronous bandwidth at the isochronous
+ * resource manager (IRM).  Only one of the channels specified in @channels is
+ * allocated.  An %FW_CDEV_EVENT_ISO_RESOURCE_ALLOCATED is sent after
+ * communication with the IRM, indicating success or failure in the event data.
+ * The kernel will automatically reallocate the resources after bus resets.
+ * Should a reallocation fail, an %FW_CDEV_EVENT_ISO_RESOURCE_DEALLOCATED event
+ * will be sent.  The kernel will also automatically deallocate the resources
+ * when the file descriptor is closed.
+ *
+ * @channels is a host-endian bitfield with the most significant bit
+ * representing channel 0 and the least significant bit representing channel 63:
+ * 1ULL << (63 - c)
+ *
+ * @bandwidth is expressed in bandwidth allocation units, i.e. the time to send
+ * one quadlet of data (payload or header data) at speed S1600.
+ */
+struct fw_cdev_allocate_iso_resource {
+	__u64 closure;
+	__u64 channels;
+	__u32 bandwidth;
+	__u32 handle;
+};
+
 #endif /* _LINUX_FIREWIRE_CDEV_H */
-- 
cgit v1.2.3


From 1ec3c0269d7196118cc7c403654ca5f19ef4d584 Mon Sep 17 00:00:00 2001
From: Stefan Richter <stefanr@s5r6.in-berlin.de>
Date: Sun, 4 Jan 2009 16:23:29 +0100
Subject: firewire: cdev: add ioctls for manual iso resource management

This adds ioctls for allocation and deallocation of a channel or/and
bandwidth without auto-reallocation and without auto-deallocation.

The benefit of these ioctls is that libraw1394-style isochronous
resource management can be implemented without write access to the IRM's
character device file.

Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
---
 drivers/firewire/fw-cdev.c    | 67 ++++++++++++++++++++++++++++++++++---------
 include/linux/firewire-cdev.h | 42 ++++++++++++++++++++-------
 2 files changed, 86 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/firewire/fw-cdev.c b/drivers/firewire/fw-cdev.c
index a227853aa1e2..08fe68d34f32 100644
--- a/drivers/firewire/fw-cdev.c
+++ b/drivers/firewire/fw-cdev.c
@@ -121,14 +121,15 @@ struct iso_resource {
 	struct client *client;
 	/* Schedule work and access todo only with client->lock held. */
 	struct delayed_work work;
-	enum {ISO_RES_ALLOC, ISO_RES_REALLOC, ISO_RES_DEALLOC,} todo;
+	enum {ISO_RES_ALLOC, ISO_RES_REALLOC, ISO_RES_DEALLOC,
+	      ISO_RES_ALLOC_ONCE, ISO_RES_DEALLOC_ONCE,} todo;
 	int generation;
 	u64 channels;
 	s32 bandwidth;
 	struct iso_resource_event *e_alloc, *e_dealloc;
 };
 
-static void schedule_iso_resource(struct iso_resource *);
+static int schedule_iso_resource(struct iso_resource *);
 static void release_iso_resource(struct client *, struct client_resource *);
 
 /*
@@ -1033,7 +1034,9 @@ static void iso_resource_work(struct work_struct *work)
 		skip = todo == ISO_RES_REALLOC &&
 		       r->generation == generation;
 	}
-	free = todo == ISO_RES_DEALLOC;
+	free = todo == ISO_RES_DEALLOC ||
+	       todo == ISO_RES_ALLOC_ONCE ||
+	       todo == ISO_RES_DEALLOC_ONCE;
 	r->generation = generation;
 	spin_unlock_irq(&client->lock);
 
@@ -1044,7 +1047,9 @@ static void iso_resource_work(struct work_struct *work)
 
 	fw_iso_resource_manage(client->device->card, generation,
 			r->channels, &channel, &bandwidth,
-			todo == ISO_RES_ALLOC || todo == ISO_RES_REALLOC);
+			todo == ISO_RES_ALLOC ||
+			todo == ISO_RES_REALLOC ||
+			todo == ISO_RES_ALLOC_ONCE);
 	/*
 	 * Is this generation outdated already?  As long as this resource sticks
 	 * in the idr, it will be scheduled again for a newer generation or at
@@ -1082,7 +1087,7 @@ static void iso_resource_work(struct work_struct *work)
 	if (todo == ISO_RES_REALLOC && success)
 		goto out;
 
-	if (todo == ISO_RES_ALLOC) {
+	if (todo == ISO_RES_ALLOC || todo == ISO_RES_ALLOC_ONCE) {
 		e = r->e_alloc;
 		r->e_alloc = NULL;
 	} else {
@@ -1106,10 +1111,17 @@ static void iso_resource_work(struct work_struct *work)
 	client_put(client);
 }
 
-static void schedule_iso_resource(struct iso_resource *r)
+static int schedule_iso_resource(struct iso_resource *r)
 {
-	if (schedule_delayed_work(&r->work, 0))
-		client_get(r->client);
+	int scheduled;
+
+	client_get(r->client);
+
+	scheduled = schedule_delayed_work(&r->work, 0);
+	if (!scheduled)
+		client_put(r->client);
+
+	return scheduled;
 }
 
 static void release_iso_resource(struct client *client,
@@ -1124,9 +1136,9 @@ static void release_iso_resource(struct client *client,
 	spin_unlock_irq(&client->lock);
 }
 
-static int ioctl_allocate_iso_resource(struct client *client, void *buffer)
+static int init_iso_resource(struct client *client,
+		struct fw_cdev_allocate_iso_resource *request, int todo)
 {
-	struct fw_cdev_allocate_iso_resource *request = buffer;
 	struct iso_resource_event *e1, *e2;
 	struct iso_resource *r;
 	int ret;
@@ -1146,7 +1158,7 @@ static int ioctl_allocate_iso_resource(struct client *client, void *buffer)
 
 	INIT_DELAYED_WORK(&r->work, iso_resource_work);
 	r->client	= client;
-	r->todo		= ISO_RES_ALLOC;
+	r->todo		= todo;
 	r->generation	= -1;
 	r->channels	= request->channels;
 	r->bandwidth	= request->bandwidth;
@@ -1158,8 +1170,14 @@ static int ioctl_allocate_iso_resource(struct client *client, void *buffer)
 	e2->resource.closure	= request->closure;
 	e2->resource.type	= FW_CDEV_EVENT_ISO_RESOURCE_DEALLOCATED;
 
-	r->resource.release = release_iso_resource;
-	ret = add_client_resource(client, &r->resource, GFP_KERNEL);
+	if (todo == ISO_RES_ALLOC) {
+		r->resource.release = release_iso_resource;
+		ret = add_client_resource(client, &r->resource, GFP_KERNEL);
+	} else {
+		r->resource.release = NULL;
+		r->resource.handle = -1;
+		ret = schedule_iso_resource(r) ? 0 : -ENOMEM;
+	}
 	if (ret < 0)
 		goto fail;
 	request->handle = r->resource.handle;
@@ -1173,6 +1191,13 @@ static int ioctl_allocate_iso_resource(struct client *client, void *buffer)
 	return ret;
 }
 
+static int ioctl_allocate_iso_resource(struct client *client, void *buffer)
+{
+	struct fw_cdev_allocate_iso_resource *request = buffer;
+
+	return init_iso_resource(client, request, ISO_RES_ALLOC);
+}
+
 static int ioctl_deallocate_iso_resource(struct client *client, void *buffer)
 {
 	struct fw_cdev_deallocate *request = buffer;
@@ -1181,6 +1206,20 @@ static int ioctl_deallocate_iso_resource(struct client *client, void *buffer)
 				       release_iso_resource, NULL);
 }
 
+static int ioctl_allocate_iso_resource_once(struct client *client, void *buffer)
+{
+	struct fw_cdev_allocate_iso_resource *request = buffer;
+
+	return init_iso_resource(client, request, ISO_RES_ALLOC_ONCE);
+}
+
+static int ioctl_deallocate_iso_resource_once(struct client *client, void *buffer)
+{
+	struct fw_cdev_allocate_iso_resource *request = buffer;
+
+	return init_iso_resource(client, request, ISO_RES_DEALLOC_ONCE);
+}
+
 static int (* const ioctl_handlers[])(struct client *client, void *buffer) = {
 	ioctl_get_info,
 	ioctl_send_request,
@@ -1197,6 +1236,8 @@ static int (* const ioctl_handlers[])(struct client *client, void *buffer) = {
 	ioctl_get_cycle_timer,
 	ioctl_allocate_iso_resource,
 	ioctl_deallocate_iso_resource,
+	ioctl_allocate_iso_resource_once,
+	ioctl_deallocate_iso_resource_once,
 };
 
 static int dispatch_ioctl(struct client *client,
diff --git a/include/linux/firewire-cdev.h b/include/linux/firewire-cdev.h
index 25b96dd0574f..08ca838a727b 100644
--- a/include/linux/firewire-cdev.h
+++ b/include/linux/firewire-cdev.h
@@ -151,7 +151,7 @@ struct fw_cdev_event_iso_interrupt {
 /**
  * struct fw_cdev_event_iso_resource - Iso resources were allocated or freed
  * @closure:	See &fw_cdev_event_common;
- *		set by %FW_CDEV_IOC_ALLOCATE_ISO_RESOURCE ioctl
+ *		set by %FW_CDEV_IOC_(DE)ALLOCATE_ISO_RESOURCE(_ONCE) ioctl
  * @type:	%FW_CDEV_EVENT_ISO_RESOURCE_ALLOCATED or
  *		%FW_CDEV_EVENT_ISO_RESOURCE_DEALLOCATED
  * @handle:	Reference by which an allocated resource can be deallocated
@@ -164,12 +164,12 @@ struct fw_cdev_event_iso_interrupt {
  * resource was allocated at the IRM.  The client has to check @channel and
  * @bandwidth for whether the allocation actually succeeded.
  *
- * @channel is <0 if no channel was allocated.
- * @bandwidth is 0 if no bandwidth was allocated.
- *
  * An %FW_CDEV_EVENT_ISO_RESOURCE_DEALLOCATED event is sent after an isochronous
  * resource was deallocated at the IRM.  It is also sent when automatic
  * reallocation after a bus reset failed.
+ *
+ * @channel is <0 if no channel was (de)allocated or if reallocation failed.
+ * @bandwidth is 0 if no bandwidth was (de)allocated or if reallocation failed.
  */
 struct fw_cdev_event_iso_resource {
 	__u64 closure;
@@ -225,8 +225,10 @@ union fw_cdev_event {
 #define FW_CDEV_IOC_GET_CYCLE_TIMER	_IOR('#', 0x0c, struct fw_cdev_get_cycle_timer)
 
 /* available since kernel version 2.6.30 */
-#define FW_CDEV_IOC_ALLOCATE_ISO_RESOURCE   _IOWR('#', 0x0d, struct fw_cdev_allocate_iso_resource)
-#define FW_CDEV_IOC_DEALLOCATE_ISO_RESOURCE _IOW('#', 0x0e, struct fw_cdev_deallocate)
+#define FW_CDEV_IOC_ALLOCATE_ISO_RESOURCE       _IOWR('#', 0x0d, struct fw_cdev_allocate_iso_resource)
+#define FW_CDEV_IOC_DEALLOCATE_ISO_RESOURCE      _IOW('#', 0x0e, struct fw_cdev_deallocate)
+#define FW_CDEV_IOC_ALLOCATE_ISO_RESOURCE_ONCE   _IOW('#', 0x0f, struct fw_cdev_allocate_iso_resource)
+#define FW_CDEV_IOC_DEALLOCATE_ISO_RESOURCE_ONCE _IOW('#', 0x10, struct fw_cdev_allocate_iso_resource)
 
 /* FW_CDEV_VERSION History
  *
@@ -523,11 +525,12 @@ struct fw_cdev_get_cycle_timer {
 };
 
 /**
- * struct fw_cdev_allocate_iso_resource - Allocate a channel or bandwidth
+ * struct fw_cdev_allocate_iso_resource - (De)allocate a channel or bandwidth
  * @closure:	Passed back to userspace in correponding iso resource events
- * @channels:	Isochronous channels of which one is to be allocated
- * @bandwidth:	Isochronous bandwidth units to be allocated
- * @handle:	Handle to the allocation, written by the kernel
+ * @channels:	Isochronous channels of which one is to be (de)allocated
+ * @bandwidth:	Isochronous bandwidth units to be (de)allocated
+ * @handle:	Handle to the allocation, written by the kernel (only valid in
+ *		case of %FW_CDEV_IOC_ALLOCATE_ISO_RESOURCE ioctls)
  *
  * The %FW_CDEV_IOC_ALLOCATE_ISO_RESOURCE ioctl initiates allocation of an
  * isochronous channel and/or of isochronous bandwidth at the isochronous
@@ -539,6 +542,25 @@ struct fw_cdev_get_cycle_timer {
  * will be sent.  The kernel will also automatically deallocate the resources
  * when the file descriptor is closed.
  *
+ * The %FW_CDEV_IOC_DEALLOCATE_ISO_RESOURCE ioctl can be used to initiate
+ * deallocation of resources which were allocated as described above.
+ * An %FW_CDEV_EVENT_ISO_RESOURCE_DEALLOCATED event concludes this operation.
+ *
+ * The %FW_CDEV_IOC_ALLOCATE_ISO_RESOURCE_ONCE ioctl is a variant of allocation
+ * without automatic re- or deallocation.
+ * An %FW_CDEV_EVENT_ISO_RESOURCE_ALLOCATED event concludes this operation,
+ * indicating success or failure in its data.
+ *
+ * The %FW_CDEV_IOC_DEALLOCATE_ISO_RESOURCE_ONCE ioctl works like
+ * %FW_CDEV_IOC_ALLOCATE_ISO_RESOURCE_ONCE except that resources are freed
+ * instead of allocated.  At most one channel may be specified in this ioctl.
+ * An %FW_CDEV_EVENT_ISO_RESOURCE_DEALLOCATED event concludes this operation.
+ *
+ * To summarize, %FW_CDEV_IOC_DEALLOCATE_ISO_RESOURCE allocates iso resources
+ * for the lifetime of the fd or handle.
+ * In contrast, %FW_CDEV_IOC_ALLOCATE_ISO_RESOURCE_ONCE allocates iso resources
+ * for the duration of a bus generation.
+ *
  * @channels is a host-endian bitfield with the most significant bit
  * representing channel 0 and the least significant bit representing channel 63:
  * 1ULL << (63 - c)
-- 
cgit v1.2.3


From 33580a3ef5ba3bc0ee1b520df82a24bb37ce28f0 Mon Sep 17 00:00:00 2001
From: Stefan Richter <stefanr@s5r6.in-berlin.de>
Date: Sun, 4 Jan 2009 16:23:29 +0100
Subject: firewire: cdev: add ioctl to query maximum transmission speed

While the speed of asynchronous transactions is automatically chosen by
the kernel, the speed of isochronous streams has to be chosen by the
initiating client.

In case of 1394a bus topologies, the maximum possible speed could be
figured out with some effort by evaluation of the remote node's link
speed field in the config ROM, the local node's link speed field, and
the PHY speeds and topologic information in the local node's or IRM's
topology map CSR.  However, this does not work in case of 1394b buses.

Hence add an ioctl to export the maximum speed which the kernel already
determined.

Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
---
 drivers/firewire/fw-cdev.c    | 10 ++++++++++
 include/linux/firewire-cdev.h | 10 ++++++++++
 2 files changed, 20 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/firewire/fw-cdev.c b/drivers/firewire/fw-cdev.c
index 08fe68d34f32..05ad2a8f286c 100644
--- a/drivers/firewire/fw-cdev.c
+++ b/drivers/firewire/fw-cdev.c
@@ -1220,6 +1220,15 @@ static int ioctl_deallocate_iso_resource_once(struct client *client, void *buffe
 	return init_iso_resource(client, request, ISO_RES_DEALLOC_ONCE);
 }
 
+static int ioctl_get_speed(struct client *client, void *buffer)
+{
+	struct fw_cdev_get_speed *request = buffer;
+
+	request->max_speed = client->device->max_speed;
+
+	return 0;
+}
+
 static int (* const ioctl_handlers[])(struct client *client, void *buffer) = {
 	ioctl_get_info,
 	ioctl_send_request,
@@ -1238,6 +1247,7 @@ static int (* const ioctl_handlers[])(struct client *client, void *buffer) = {
 	ioctl_deallocate_iso_resource,
 	ioctl_allocate_iso_resource_once,
 	ioctl_deallocate_iso_resource_once,
+	ioctl_get_speed,
 };
 
 static int dispatch_ioctl(struct client *client,
diff --git a/include/linux/firewire-cdev.h b/include/linux/firewire-cdev.h
index 08ca838a727b..f819c1026958 100644
--- a/include/linux/firewire-cdev.h
+++ b/include/linux/firewire-cdev.h
@@ -229,6 +229,7 @@ union fw_cdev_event {
 #define FW_CDEV_IOC_DEALLOCATE_ISO_RESOURCE      _IOW('#', 0x0e, struct fw_cdev_deallocate)
 #define FW_CDEV_IOC_ALLOCATE_ISO_RESOURCE_ONCE   _IOW('#', 0x0f, struct fw_cdev_allocate_iso_resource)
 #define FW_CDEV_IOC_DEALLOCATE_ISO_RESOURCE_ONCE _IOW('#', 0x10, struct fw_cdev_allocate_iso_resource)
+#define FW_CDEV_IOC_GET_SPEED                    _IOR('#', 0x11, struct fw_cdev_get_speed)
 
 /* FW_CDEV_VERSION History
  *
@@ -575,4 +576,13 @@ struct fw_cdev_allocate_iso_resource {
 	__u32 handle;
 };
 
+/**
+ * struct fw_cdev_get_speed - Query maximum speed to or from this device
+ * @max_speed:	Speed code; minimum of the device's link speed, the local node's
+ *		link speed, and all PHY port speeds between the two links
+ */
+struct fw_cdev_get_speed {
+	__u32 max_speed;
+};
+
 #endif /* _LINUX_FIREWIRE_CDEV_H */
-- 
cgit v1.2.3


From acfe8333572cad5dc70fce18ac966be0446548d7 Mon Sep 17 00:00:00 2001
From: "Jay Fenlason, Stefan Richter" <stefanr@s5r6.in-berlin.de>
Date: Sun, 4 Jan 2009 16:23:29 +0100
Subject: firewire: cdev: add ioctl for broadcast write requests

Write transactions to the broadcast node ID are a convenient way to
trigger functions of multiple nodes at once.  IIDC is a protocol which
can make use of this if multiple cameras with same command_regs_base are
connected at the same bus.

Based on
    Date: Wed, 10 Sep 2008 11:32:16 -0400
    From: Jay Fenlason <fenlason@redhat.com>
    Subject: [patch] SEND_BROADCAST_REQUEST
Changes:  ioctl_send_request() and ioctl_send_broadcast_request() now
share code.  Broadcast speed corrected to S100.  Check for proper tcode.

Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
---
 drivers/firewire/fw-cdev.c    | 74 +++++++++++++++++++++++++++----------------
 include/linux/firewire-cdev.h |  1 +
 2 files changed, 48 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/firewire/fw-cdev.c b/drivers/firewire/fw-cdev.c
index 05ad2a8f286c..a1637a86da3d 100644
--- a/drivers/firewire/fw-cdev.c
+++ b/drivers/firewire/fw-cdev.c
@@ -518,10 +518,10 @@ static void complete_transaction(struct fw_card *card, int rcode,
 	client_put(client);
 }
 
-static int ioctl_send_request(struct client *client, void *buffer)
+static int init_request(struct client *client,
+			struct fw_cdev_send_request *request,
+			int destination_id, int speed)
 {
-	struct fw_device *device = client->device;
-	struct fw_cdev_send_request *request = buffer;
 	struct outbound_transaction_event *e;
 	int ret;
 
@@ -544,24 +544,6 @@ static int ioctl_send_request(struct client *client, void *buffer)
 		goto failed;
 	}
 
-	switch (request->tcode) {
-	case TCODE_WRITE_QUADLET_REQUEST:
-	case TCODE_WRITE_BLOCK_REQUEST:
-	case TCODE_READ_QUADLET_REQUEST:
-	case TCODE_READ_BLOCK_REQUEST:
-	case TCODE_LOCK_MASK_SWAP:
-	case TCODE_LOCK_COMPARE_SWAP:
-	case TCODE_LOCK_FETCH_ADD:
-	case TCODE_LOCK_LITTLE_ADD:
-	case TCODE_LOCK_BOUNDED_ADD:
-	case TCODE_LOCK_WRAP_ADD:
-	case TCODE_LOCK_VENDOR_DEPENDENT:
-		break;
-	default:
-		ret = -EINVAL;
-		goto failed;
-	}
-
 	e->r.resource.release = release_transaction;
 	ret = add_client_resource(client, &e->r.resource, GFP_KERNEL);
 	if (ret < 0)
@@ -570,12 +552,9 @@ static int ioctl_send_request(struct client *client, void *buffer)
 	/* Get a reference for the transaction callback */
 	client_get(client);
 
-	fw_send_request(device->card, &e->r.transaction,
-			request->tcode & 0x1f,
-			device->node->node_id,
-			request->generation,
-			device->max_speed,
-			request->offset,
+	fw_send_request(client->device->card, &e->r.transaction,
+			request->tcode & 0x1f, destination_id,
+			request->generation, speed, request->offset,
 			e->response.data, request->length,
 			complete_transaction, e);
 
@@ -589,6 +568,31 @@ static int ioctl_send_request(struct client *client, void *buffer)
 	return ret;
 }
 
+static int ioctl_send_request(struct client *client, void *buffer)
+{
+	struct fw_cdev_send_request *request = buffer;
+
+	switch (request->tcode) {
+	case TCODE_WRITE_QUADLET_REQUEST:
+	case TCODE_WRITE_BLOCK_REQUEST:
+	case TCODE_READ_QUADLET_REQUEST:
+	case TCODE_READ_BLOCK_REQUEST:
+	case TCODE_LOCK_MASK_SWAP:
+	case TCODE_LOCK_COMPARE_SWAP:
+	case TCODE_LOCK_FETCH_ADD:
+	case TCODE_LOCK_LITTLE_ADD:
+	case TCODE_LOCK_BOUNDED_ADD:
+	case TCODE_LOCK_WRAP_ADD:
+	case TCODE_LOCK_VENDOR_DEPENDENT:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return init_request(client, request, client->device->node->node_id,
+			    client->device->max_speed);
+}
+
 static void release_request(struct client *client,
 			    struct client_resource *resource)
 {
@@ -1229,6 +1233,21 @@ static int ioctl_get_speed(struct client *client, void *buffer)
 	return 0;
 }
 
+static int ioctl_send_broadcast_request(struct client *client, void *buffer)
+{
+	struct fw_cdev_send_request *request = buffer;
+
+	switch (request->tcode) {
+	case TCODE_WRITE_QUADLET_REQUEST:
+	case TCODE_WRITE_BLOCK_REQUEST:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return init_request(client, request, LOCAL_BUS | 0x3f, SCODE_100);
+}
+
 static int (* const ioctl_handlers[])(struct client *client, void *buffer) = {
 	ioctl_get_info,
 	ioctl_send_request,
@@ -1248,6 +1267,7 @@ static int (* const ioctl_handlers[])(struct client *client, void *buffer) = {
 	ioctl_allocate_iso_resource_once,
 	ioctl_deallocate_iso_resource_once,
 	ioctl_get_speed,
+	ioctl_send_broadcast_request,
 };
 
 static int dispatch_ioctl(struct client *client,
diff --git a/include/linux/firewire-cdev.h b/include/linux/firewire-cdev.h
index f819c1026958..340a78502bca 100644
--- a/include/linux/firewire-cdev.h
+++ b/include/linux/firewire-cdev.h
@@ -230,6 +230,7 @@ union fw_cdev_event {
 #define FW_CDEV_IOC_ALLOCATE_ISO_RESOURCE_ONCE   _IOW('#', 0x0f, struct fw_cdev_allocate_iso_resource)
 #define FW_CDEV_IOC_DEALLOCATE_ISO_RESOURCE_ONCE _IOW('#', 0x10, struct fw_cdev_allocate_iso_resource)
 #define FW_CDEV_IOC_GET_SPEED                    _IOR('#', 0x11, struct fw_cdev_get_speed)
+#define FW_CDEV_IOC_SEND_BROADCAST_REQUEST       _IOW('#', 0x12, struct fw_cdev_send_request)
 
 /* FW_CDEV_VERSION History
  *
-- 
cgit v1.2.3


From 77258da403be4cfce84b6abcdb515ad0bd1f92f1 Mon Sep 17 00:00:00 2001
From: Stefan Richter <stefanr@s5r6.in-berlin.de>
Date: Wed, 7 Jan 2009 20:14:53 +0100
Subject: firewire: cdev: increment fw_cdev_version, update documentation

Necessary due to
    Date: Tue, 22 Jul 2008 23:23:40 -0700
    From: David Moore <dcm@acm.org>
    Subject: firewire: Include iso timestamp in headers when header_size > 4

Side note:  The lack of upwards compatibility sounds worse than it is.
All existing client implementations, libraw1394 and libdc1394, set
header_size = 4.  And since the ABI v1 behaviour does not offer any
advantages over the new behaviour, we deliberately do not provide the
old behaviour anymore.

Also add documentation about the format of fw_cdev_get_cycle_timer which
may be used in conjunction with the timestamp of iso packets but has a
different format.

Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
---
 include/linux/firewire-cdev.h | 35 ++++++++++++++++++++++++++++++-----
 1 file changed, 30 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/firewire-cdev.h b/include/linux/firewire-cdev.h
index 340a78502bca..6ed9127680fd 100644
--- a/include/linux/firewire-cdev.h
+++ b/include/linux/firewire-cdev.h
@@ -138,7 +138,24 @@ struct fw_cdev_event_request {
  * This event is sent when the controller has completed an &fw_cdev_iso_packet
  * with the %FW_CDEV_ISO_INTERRUPT bit set.  In the receive case, the headers
  * stripped of all packets up until and including the interrupt packet are
- * returned in the @header field.
+ * returned in the @header field.  The amount of header data per packet is as
+ * specified at iso context creation by &fw_cdev_create_iso_context.header_size.
+ *
+ * In version 1 of this ABI, header data consisted of the 1394 isochronous
+ * packet header, followed by quadlets from the packet payload if
+ * &fw_cdev_create_iso_context.header_size > 4.
+ *
+ * In version 2 of this ABI, header data consist of the 1394 isochronous
+ * packet header, followed by a timestamp quadlet if
+ * &fw_cdev_create_iso_context.header_size > 4, followed by quadlets from the
+ * packet payload if &fw_cdev_create_iso_context.header_size > 8.
+ *
+ * Behaviour of ver. 1 of this ABI is no longer available since ABI ver. 2.
+ *
+ * Format of 1394 iso packet header: 16 bits len, 2 bits tag, 6 bits channel,
+ * 4 bits tcode, 4 bits sy, in big endian byte order.  Format of timestamp:
+ * 16 bits invalid, 3 bits cycleSeconds, 13 bits cycleCount, in big endian byte
+ * order.
  */
 struct fw_cdev_event_iso_interrupt {
 	__u64 closure;
@@ -232,11 +249,13 @@ union fw_cdev_event {
 #define FW_CDEV_IOC_GET_SPEED                    _IOR('#', 0x11, struct fw_cdev_get_speed)
 #define FW_CDEV_IOC_SEND_BROADCAST_REQUEST       _IOW('#', 0x12, struct fw_cdev_send_request)
 
-/* FW_CDEV_VERSION History
- *
- * 1	Feb 18, 2007:  Initial version.
+/*
+ * FW_CDEV_VERSION History
+ *  1  (2.6.22)  - initial version
+ *  2  (2.6.30)  - changed &fw_cdev_event_iso_interrupt.header if
+ *                 &fw_cdev_create_iso_context.header_size is 8 or more
  */
-#define FW_CDEV_VERSION		1
+#define FW_CDEV_VERSION 2
 
 /**
  * struct fw_cdev_get_info - General purpose information ioctl
@@ -417,6 +436,9 @@ struct fw_cdev_remove_descriptor {
  *
  * If a context was successfully created, the kernel writes back a handle to the
  * context, which must be passed in for subsequent operations on that context.
+ *
+ * Note that the effect of a @header_size > 4 depends on
+ * &fw_cdev_get_info.version, as documented at &fw_cdev_event_iso_interrupt.
  */
 struct fw_cdev_create_iso_context {
 	__u32 type;
@@ -520,6 +542,9 @@ struct fw_cdev_stop_iso {
  * The %FW_CDEV_IOC_GET_CYCLE_TIMER ioctl reads the isochronous cycle timer
  * and also the system clock.  This allows to express the receive time of an
  * isochronous packet as a system time with microsecond accuracy.
+ *
+ * @cycle_timer consists of 7 bits cycleSeconds, 13 bits cycleCount, and
+ * 12 bits cycleOffset, in host byte order.
  */
 struct fw_cdev_get_cycle_timer {
 	__u64 local_time;
-- 
cgit v1.2.3


From 5d9cb7d276a9c465fef5a771792eac2cf1929f2b Mon Sep 17 00:00:00 2001
From: Stefan Richter <stefanr@s5r6.in-berlin.de>
Date: Thu, 8 Jan 2009 23:07:40 +0100
Subject: firewire: cdev: add ioctls for iso resource management, amendment

Some fixes:
  - Remove stale documentation.
  - Fix a != vs. == thinko that got in the way of channel management.
  - Try bandwidth deallocation even if channel deallocation failed.

A simplification:
  - fw_cdev_allocate_iso_resource.channels is now ordered like
    libdc1394's dc1394_iso_allocate_channel() channels_allowed
    argument.

By the way, I looked closer at cards from NEC, TI, and VIA, and noticed
that they all don't implement IEEE 1394a behaviour which is meant to
deviate from IEEE 1212's notion of lock compare-swap.  This means that
we have to do two lock transactions instead of one in many cases where
one transaction would already succeed on a fully 1394a compliant IRM.

Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
---
 drivers/firewire/fw-cdev.c    |  2 +-
 drivers/firewire/fw-iso.c     | 38 ++++++++++++++++++++++----------------
 include/linux/firewire-cdev.h | 10 ++++------
 3 files changed, 27 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/firewire/fw-cdev.c b/drivers/firewire/fw-cdev.c
index b93ad9c0a0d0..257b0c709a8b 100644
--- a/drivers/firewire/fw-cdev.c
+++ b/drivers/firewire/fw-cdev.c
@@ -1082,7 +1082,7 @@ static void iso_resource_work(struct work_struct *work)
 	spin_unlock_irq(&client->lock);
 
 	if (todo == ISO_RES_ALLOC && channel >= 0)
-		r->channels = 1ULL << (63 - channel);
+		r->channels = 1ULL << channel;
 
 	if (todo == ISO_RES_REALLOC && success)
 		goto out;
diff --git a/drivers/firewire/fw-iso.c b/drivers/firewire/fw-iso.c
index a7b57b253b06..f511d16efaee 100644
--- a/drivers/firewire/fw-iso.c
+++ b/drivers/firewire/fw-iso.c
@@ -204,17 +204,19 @@ static int manage_bandwidth(struct fw_card *card, int irm_id, int generation,
 }
 
 static int manage_channel(struct fw_card *card, int irm_id, int generation,
-			  __be32 channels_mask, u64 offset, bool allocate)
+			  u32 channels_mask, u64 offset, bool allocate)
 {
-	__be32 data[2], c, old = allocate ? cpu_to_be32(~0) : 0;
+	__be32 data[2], c, all, old;
 	int i, retry = 5;
 
+	old = all = allocate ? cpu_to_be32(~0) : 0;
+
 	for (i = 0; i < 32; i++) {
-		c = cpu_to_be32(1 << (31 - i));
-		if (!(channels_mask & c))
+		if (!(channels_mask & 1 << i))
 			continue;
 
-		if (allocate == !(old & c))
+		c = cpu_to_be32(1 << (31 - i));
+		if ((old & c) != (all & c))
 			continue;
 
 		data[0] = old;
@@ -233,7 +235,7 @@ static int manage_channel(struct fw_card *card, int irm_id, int generation,
 			old = data[0];
 
 			/* Is the IRM 1394a-2000 compliant? */
-			if ((data[0] & c) != (data[1] & c))
+			if ((data[0] & c) == (data[1] & c))
 				continue;
 
 			/* 1394-1995 IRM, fall through to retry. */
@@ -249,11 +251,10 @@ static int manage_channel(struct fw_card *card, int irm_id, int generation,
 static void deallocate_channel(struct fw_card *card, int irm_id,
 			       int generation, int channel)
 {
-	__be32 mask;
+	u32 mask;
 	u64 offset;
 
-	mask = channel < 32 ? cpu_to_be32(1 << (31 - channel)) :
-			      cpu_to_be32(1 << (63 - channel));
+	mask = channel < 32 ? 1 << channel : 1 << (channel - 32);
 	offset = channel < 32 ? CSR_REGISTER_BASE + CSR_CHANNELS_AVAILABLE_HI :
 				CSR_REGISTER_BASE + CSR_CHANNELS_AVAILABLE_LO;
 
@@ -266,7 +267,12 @@ static void deallocate_channel(struct fw_card *card, int irm_id,
  * In parameters: card, generation, channels_mask, bandwidth, allocate
  * Out parameters: channel, bandwidth
  * This function blocks (sleeps) during communication with the IRM.
+ *
  * Allocates or deallocates at most one channel out of channels_mask.
+ * channels_mask is a bitfield with MSB for channel 63 and LSB for channel 0.
+ * (Note, the IRM's CHANNELS_AVAILABLE is a big-endian bitfield with MSB for
+ * channel 0 and LSB for channel 63.)
+ * Allocates or deallocates as many bandwidth allocation units as specified.
  *
  * Returns channel < 0 if no channel was allocated or deallocated.
  * Returns bandwidth = 0 if no bandwidth was allocated or deallocated.
@@ -274,17 +280,17 @@ static void deallocate_channel(struct fw_card *card, int irm_id,
  * If generation is stale, deallocations succeed but allocations fail with
  * channel = -EAGAIN.
  *
- * If channel (de)allocation fails, bandwidth (de)allocation fails too.
+ * If channel allocation fails, no bandwidth will be allocated either.
  * If bandwidth allocation fails, no channel will be allocated either.
- * If bandwidth deallocation fails, channel deallocation may still have been
- * successful.
+ * But deallocations of channel and bandwidth are tried independently
+ * of each other's success.
  */
 void fw_iso_resource_manage(struct fw_card *card, int generation,
 			    u64 channels_mask, int *channel, int *bandwidth,
 			    bool allocate)
 {
-	__be32 channels_hi = cpu_to_be32(channels_mask >> 32);
-	__be32 channels_lo = cpu_to_be32(channels_mask);
+	u32 channels_hi = channels_mask;	/* channels 31...0 */
+	u32 channels_lo = channels_mask >> 32;	/* channels 63...32 */
 	int irm_id, ret, c = -EINVAL;
 
 	spin_lock_irq(&card->lock);
@@ -302,7 +308,7 @@ void fw_iso_resource_manage(struct fw_card *card, int generation,
 	}
 	*channel = c;
 
-	if (channels_mask != 0 && c < 0)
+	if (allocate && channels_mask != 0 && c < 0)
 		*bandwidth = 0;
 
 	if (*bandwidth == 0)
@@ -312,7 +318,7 @@ void fw_iso_resource_manage(struct fw_card *card, int generation,
 	if (ret < 0)
 		*bandwidth = 0;
 
-	if (ret < 0 && c >= 0 && allocate) {
+	if (allocate && ret < 0 && c >= 0) {
 		deallocate_channel(card, irm_id, generation, c);
 		*channel = ret;
 	}
diff --git a/include/linux/firewire-cdev.h b/include/linux/firewire-cdev.h
index 6ed9127680fd..2e35379bf96c 100644
--- a/include/linux/firewire-cdev.h
+++ b/include/linux/firewire-cdev.h
@@ -174,8 +174,6 @@ struct fw_cdev_event_iso_interrupt {
  * @handle:	Reference by which an allocated resource can be deallocated
  * @channel:	Isochronous channel which was (de)allocated, if any
  * @bandwidth:	Bandwidth allocation units which were (de)allocated, if any
- * @channels_available:  Last known availability of channels
- * @bandwidth_available: Last known availability of bandwidth
  *
  * An %FW_CDEV_EVENT_ISO_RESOURCE_ALLOCATED event is sent after an isochronous
  * resource was allocated at the IRM.  The client has to check @channel and
@@ -580,7 +578,7 @@ struct fw_cdev_get_cycle_timer {
  *
  * The %FW_CDEV_IOC_DEALLOCATE_ISO_RESOURCE_ONCE ioctl works like
  * %FW_CDEV_IOC_ALLOCATE_ISO_RESOURCE_ONCE except that resources are freed
- * instead of allocated.  At most one channel may be specified in this ioctl.
+ * instead of allocated.
  * An %FW_CDEV_EVENT_ISO_RESOURCE_DEALLOCATED event concludes this operation.
  *
  * To summarize, %FW_CDEV_IOC_DEALLOCATE_ISO_RESOURCE allocates iso resources
@@ -588,9 +586,9 @@ struct fw_cdev_get_cycle_timer {
  * In contrast, %FW_CDEV_IOC_ALLOCATE_ISO_RESOURCE_ONCE allocates iso resources
  * for the duration of a bus generation.
  *
- * @channels is a host-endian bitfield with the most significant bit
- * representing channel 0 and the least significant bit representing channel 63:
- * 1ULL << (63 - c)
+ * @channels is a host-endian bitfield with the least significant bit
+ * representing channel 0 and the most significant bit representing channel 63:
+ * 1ULL << c for each channel c that is a candidate for (de)allocation.
  *
  * @bandwidth is expressed in bandwidth allocation units, i.e. the time to send
  * one quadlet of data (payload or header data) at speed S1600.
-- 
cgit v1.2.3


From f8c2287c65f8f72000102fc058232669e4540bc4 Mon Sep 17 00:00:00 2001
From: Jay Fenlason <fenlason@redhat.com>
Date: Thu, 5 Mar 2009 19:08:40 +0100
Subject: firewire: implement asynchronous stream transmission

Allow userspace and other firewire drivers (fw-ipv4 I'm looking at
you!) to send Asynchronous Transmit Streams as described in 7.8.3 of
release 1.1 of the 1394 Open Host Controller Interface Specification.

Signed-off-by: Jay Fenlason <fenlason@redhat.com>
Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de> (tweaks)
---
 drivers/firewire/fw-cdev.c        | 33 +++++++++++++++++++++++++++++++++
 drivers/firewire/fw-ohci.c        | 21 +++++++++++++++++++--
 drivers/firewire/fw-transaction.c | 25 +++++++++++++++++++++++++
 drivers/firewire/fw-transaction.h |  4 ++++
 include/linux/firewire-cdev.h     | 27 +++++++++++++++++++++++++++
 5 files changed, 108 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/firewire/fw-cdev.c b/drivers/firewire/fw-cdev.c
index 214e534efee5..539dae5eb5b2 100644
--- a/drivers/firewire/fw-cdev.c
+++ b/drivers/firewire/fw-cdev.c
@@ -1242,6 +1242,38 @@ static int ioctl_send_broadcast_request(struct client *client, void *buffer)
 	return init_request(client, request, LOCAL_BUS | 0x3f, SCODE_100);
 }
 
+struct stream_packet {
+	struct fw_packet packet;
+	u8 data[0];
+};
+
+static void send_stream_packet_done(struct fw_packet *packet,
+				    struct fw_card *card, int status)
+{
+	kfree(container_of(packet, struct stream_packet, packet));
+}
+
+static int ioctl_send_stream_packet(struct client *client, void *buffer)
+{
+	struct fw_cdev_send_stream_packet *request = buffer;
+	struct stream_packet *p;
+
+	p = kmalloc(sizeof(*p) + request->size, GFP_KERNEL);
+	if (p == NULL)
+		return -ENOMEM;
+
+	if (request->data &&
+	    copy_from_user(p->data, u64_to_uptr(request->data), request->size)) {
+		kfree(p);
+		return -EFAULT;
+	}
+	fw_send_stream_packet(client->device->card, &p->packet,
+			      request->generation, request->speed,
+			      request->channel, request->sy, request->tag,
+			      p->data, request->size, send_stream_packet_done);
+	return 0;
+}
+
 static int (* const ioctl_handlers[])(struct client *client, void *buffer) = {
 	ioctl_get_info,
 	ioctl_send_request,
@@ -1262,6 +1294,7 @@ static int (* const ioctl_handlers[])(struct client *client, void *buffer) = {
 	ioctl_deallocate_iso_resource_once,
 	ioctl_get_speed,
 	ioctl_send_broadcast_request,
+	ioctl_send_stream_packet,
 };
 
 static int dispatch_ioctl(struct client *client,
diff --git a/drivers/firewire/fw-ohci.c b/drivers/firewire/fw-ohci.c
index c92278374658..1180d0be0bb4 100644
--- a/drivers/firewire/fw-ohci.c
+++ b/drivers/firewire/fw-ohci.c
@@ -936,7 +936,9 @@ static int at_context_queue_packet(struct context *ctx,
 	 */
 
 	header = (__le32 *) &d[1];
-	if (packet->header_length > 8) {
+	switch (packet->header_length) {
+	case 16:
+	case 12:
 		header[0] = cpu_to_le32((packet->header[0] & 0xffff) |
 					(packet->speed << 16));
 		header[1] = cpu_to_le32((packet->header[1] & 0xffff) |
@@ -950,12 +952,27 @@ static int at_context_queue_packet(struct context *ctx,
 			header[3] = (__force __le32) packet->header[3];
 
 		d[0].req_count = cpu_to_le16(packet->header_length);
-	} else {
+		break;
+
+	case 8:
 		header[0] = cpu_to_le32((OHCI1394_phy_tcode << 4) |
 					(packet->speed << 16));
 		header[1] = cpu_to_le32(packet->header[0]);
 		header[2] = cpu_to_le32(packet->header[1]);
 		d[0].req_count = cpu_to_le16(12);
+		break;
+
+	case 4:
+		header[0] = cpu_to_le32((packet->header[0] & 0xffff) |
+					(packet->speed << 16));
+		header[1] = cpu_to_le32(packet->header[0] & 0xffff0000);
+		d[0].req_count = cpu_to_le16(8);
+		break;
+
+	default:
+		/* BUG(); */
+		packet->ack = RCODE_SEND_ERROR;
+		return -1;
 	}
 
 	driver_data = (struct driver_data *) &d[3];
diff --git a/drivers/firewire/fw-transaction.c b/drivers/firewire/fw-transaction.c
index 76938fe432a0..e3da58991960 100644
--- a/drivers/firewire/fw-transaction.c
+++ b/drivers/firewire/fw-transaction.c
@@ -37,6 +37,10 @@
 #include "fw-topology.h"
 #include "fw-device.h"
 
+#define HEADER_TAG(tag)			((tag) << 14)
+#define HEADER_CHANNEL(ch)		((ch) << 8)
+#define HEADER_SY(sy)			((sy) << 0)
+
 #define HEADER_PRI(pri)			((pri) << 0)
 #define HEADER_TCODE(tcode)		((tcode) << 4)
 #define HEADER_RETRY(retry)		((retry) << 8)
@@ -293,6 +297,27 @@ void fw_send_request(struct fw_card *card, struct fw_transaction *t, int tcode,
 }
 EXPORT_SYMBOL(fw_send_request);
 
+void fw_send_stream_packet(struct fw_card *card, struct fw_packet *p,
+		int generation, int speed, int channel, int sy, int tag,
+		void *payload, size_t length, fw_packet_callback_t callback)
+{
+	p->callback = callback;
+	p->header[0] =
+		  HEADER_DATA_LENGTH(length)
+		| HEADER_TAG(tag)
+		| HEADER_CHANNEL(channel)
+		| HEADER_TCODE(TCODE_STREAM_DATA)
+		| HEADER_SY(sy);
+	p->header_length = 4;
+	p->payload = payload;
+	p->payload_length = length;
+	p->speed = speed;
+	p->generation = generation;
+	p->ack = 0;
+
+	card->driver->send_request(card, p);
+}
+
 struct transaction_callback_data {
 	struct completion done;
 	void *payload;
diff --git a/drivers/firewire/fw-transaction.h b/drivers/firewire/fw-transaction.h
index 35d0a4bb6d5c..eed2e295eb3c 100644
--- a/drivers/firewire/fw-transaction.h
+++ b/drivers/firewire/fw-transaction.h
@@ -407,6 +407,10 @@ void fw_send_request(struct fw_card *card, struct fw_transaction *t,
 		int tcode, int destination_id, int generation, int speed,
 		unsigned long long offset, void *payload, size_t length,
 		fw_transaction_callback_t callback, void *callback_data);
+void fw_send_stream_packet(struct fw_card *card, struct fw_packet *p,
+		int generation, int speed, int channel, int sy, int tag,
+		void *payload, size_t length, fw_packet_callback_t callback);
+
 int fw_cancel_transaction(struct fw_card *card,
 			  struct fw_transaction *transaction);
 void fw_flush_transactions(struct fw_card *card);
diff --git a/include/linux/firewire-cdev.h b/include/linux/firewire-cdev.h
index 2e35379bf96c..4dfc84d0ac76 100644
--- a/include/linux/firewire-cdev.h
+++ b/include/linux/firewire-cdev.h
@@ -246,6 +246,7 @@ union fw_cdev_event {
 #define FW_CDEV_IOC_DEALLOCATE_ISO_RESOURCE_ONCE _IOW('#', 0x10, struct fw_cdev_allocate_iso_resource)
 #define FW_CDEV_IOC_GET_SPEED                    _IOR('#', 0x11, struct fw_cdev_get_speed)
 #define FW_CDEV_IOC_SEND_BROADCAST_REQUEST       _IOW('#', 0x12, struct fw_cdev_send_request)
+#define FW_CDEV_IOC_SEND_STREAM_PACKET           _IOW('#', 0x13, struct fw_cdev_send_stream_packet)
 
 /*
  * FW_CDEV_VERSION History
@@ -609,4 +610,30 @@ struct fw_cdev_get_speed {
 	__u32 max_speed;
 };
 
+/**
+ * struct fw_cdev_send_stream_packet - send an asynchronous stream packet
+ * @generation:   Bus generation where the packet is valid
+ * @speed:	  Speed code to send the packet at
+ * @channel:	  Channel to send the packet on
+ * @sy:		  Four-bit sy code for the packet
+ * @tag:	  Two-bit tag field to use for the packet
+ * @size:	  Size of the packet's data payload
+ * @data:	  Userspace pointer to the payload
+ *
+ * The %FW_CDEV_IOC_SEND_STREAM_PACKET ioctl sends an asynchronous stream packet
+ * to every device (that is listening to the specified channel) on the
+ * firewire bus.  It is the applications's job to ensure
+ * that the intended device(s) will be able to receive the packet at the chosen
+ * transmit speed.
+ */
+struct fw_cdev_send_stream_packet {
+	__u32 generation;
+	__u32 speed;
+	__u32 channel;
+	__u32 sy;
+	__u32 tag;
+	__u32 size;
+	__u64 data;
+};
+
 #endif /* _LINUX_FIREWIRE_CDEV_H */
-- 
cgit v1.2.3


From c8a25900f35e575938c791507894c036c0f2ca7d Mon Sep 17 00:00:00 2001
From: Stefan Richter <stefanr@s5r6.in-berlin.de>
Date: Tue, 10 Mar 2009 20:59:16 +0100
Subject: firewire: cdev: amendment to "add ioctl to query maximum transmission
 speed"

The as yet unreleased FW_CDEV_IOC_GET_SPEED ioctl puts only a single
integer into the parameter buffer.  We can use ioctl()'s return value
instead.

(Also: Some whitespace change in firewire-cdev.h.)

Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
---
 drivers/firewire/fw-cdev.c    | 11 ++++++-----
 include/linux/firewire-cdev.h | 37 ++++++++++++++-----------------------
 2 files changed, 20 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/firewire/fw-cdev.c b/drivers/firewire/fw-cdev.c
index 539dae5eb5b2..2784f91896db 100644
--- a/drivers/firewire/fw-cdev.c
+++ b/drivers/firewire/fw-cdev.c
@@ -1214,13 +1214,14 @@ static int ioctl_deallocate_iso_resource_once(struct client *client, void *buffe
 	return init_iso_resource(client, request, ISO_RES_DEALLOC_ONCE);
 }
 
+/*
+ * Returns a speed code:  Maximum speed to or from this device,
+ * limited by the device's link speed, the local node's link speed,
+ * and all PHY port speeds between the two links.
+ */
 static int ioctl_get_speed(struct client *client, void *buffer)
 {
-	struct fw_cdev_get_speed *request = buffer;
-
-	request->max_speed = client->device->max_speed;
-
-	return 0;
+	return client->device->max_speed;
 }
 
 static int ioctl_send_broadcast_request(struct client *client, void *buffer)
diff --git a/include/linux/firewire-cdev.h b/include/linux/firewire-cdev.h
index 4dfc84d0ac76..de4035792f70 100644
--- a/include/linux/firewire-cdev.h
+++ b/include/linux/firewire-cdev.h
@@ -223,28 +223,28 @@ union fw_cdev_event {
 };
 
 /* available since kernel version 2.6.22 */
-#define FW_CDEV_IOC_GET_INFO		_IOWR('#', 0x00, struct fw_cdev_get_info)
-#define FW_CDEV_IOC_SEND_REQUEST	_IOW('#', 0x01, struct fw_cdev_send_request)
-#define FW_CDEV_IOC_ALLOCATE		_IOWR('#', 0x02, struct fw_cdev_allocate)
-#define FW_CDEV_IOC_DEALLOCATE		_IOW('#', 0x03, struct fw_cdev_deallocate)
-#define FW_CDEV_IOC_SEND_RESPONSE	_IOW('#', 0x04, struct fw_cdev_send_response)
-#define FW_CDEV_IOC_INITIATE_BUS_RESET	_IOW('#', 0x05, struct fw_cdev_initiate_bus_reset)
-#define FW_CDEV_IOC_ADD_DESCRIPTOR	_IOWR('#', 0x06, struct fw_cdev_add_descriptor)
-#define FW_CDEV_IOC_REMOVE_DESCRIPTOR	_IOW('#', 0x07, struct fw_cdev_remove_descriptor)
-#define FW_CDEV_IOC_CREATE_ISO_CONTEXT	_IOWR('#', 0x08, struct fw_cdev_create_iso_context)
-#define FW_CDEV_IOC_QUEUE_ISO		_IOWR('#', 0x09, struct fw_cdev_queue_iso)
-#define FW_CDEV_IOC_START_ISO		_IOW('#', 0x0a, struct fw_cdev_start_iso)
-#define FW_CDEV_IOC_STOP_ISO		_IOW('#', 0x0b, struct fw_cdev_stop_iso)
+#define FW_CDEV_IOC_GET_INFO           _IOWR('#', 0x00, struct fw_cdev_get_info)
+#define FW_CDEV_IOC_SEND_REQUEST        _IOW('#', 0x01, struct fw_cdev_send_request)
+#define FW_CDEV_IOC_ALLOCATE           _IOWR('#', 0x02, struct fw_cdev_allocate)
+#define FW_CDEV_IOC_DEALLOCATE          _IOW('#', 0x03, struct fw_cdev_deallocate)
+#define FW_CDEV_IOC_SEND_RESPONSE       _IOW('#', 0x04, struct fw_cdev_send_response)
+#define FW_CDEV_IOC_INITIATE_BUS_RESET  _IOW('#', 0x05, struct fw_cdev_initiate_bus_reset)
+#define FW_CDEV_IOC_ADD_DESCRIPTOR     _IOWR('#', 0x06, struct fw_cdev_add_descriptor)
+#define FW_CDEV_IOC_REMOVE_DESCRIPTOR   _IOW('#', 0x07, struct fw_cdev_remove_descriptor)
+#define FW_CDEV_IOC_CREATE_ISO_CONTEXT _IOWR('#', 0x08, struct fw_cdev_create_iso_context)
+#define FW_CDEV_IOC_QUEUE_ISO          _IOWR('#', 0x09, struct fw_cdev_queue_iso)
+#define FW_CDEV_IOC_START_ISO           _IOW('#', 0x0a, struct fw_cdev_start_iso)
+#define FW_CDEV_IOC_STOP_ISO            _IOW('#', 0x0b, struct fw_cdev_stop_iso)
 
 /* available since kernel version 2.6.24 */
-#define FW_CDEV_IOC_GET_CYCLE_TIMER	_IOR('#', 0x0c, struct fw_cdev_get_cycle_timer)
+#define FW_CDEV_IOC_GET_CYCLE_TIMER     _IOR('#', 0x0c, struct fw_cdev_get_cycle_timer)
 
 /* available since kernel version 2.6.30 */
 #define FW_CDEV_IOC_ALLOCATE_ISO_RESOURCE       _IOWR('#', 0x0d, struct fw_cdev_allocate_iso_resource)
 #define FW_CDEV_IOC_DEALLOCATE_ISO_RESOURCE      _IOW('#', 0x0e, struct fw_cdev_deallocate)
 #define FW_CDEV_IOC_ALLOCATE_ISO_RESOURCE_ONCE   _IOW('#', 0x0f, struct fw_cdev_allocate_iso_resource)
 #define FW_CDEV_IOC_DEALLOCATE_ISO_RESOURCE_ONCE _IOW('#', 0x10, struct fw_cdev_allocate_iso_resource)
-#define FW_CDEV_IOC_GET_SPEED                    _IOR('#', 0x11, struct fw_cdev_get_speed)
+#define FW_CDEV_IOC_GET_SPEED                     _IO('#', 0x11) /* returns speed code */
 #define FW_CDEV_IOC_SEND_BROADCAST_REQUEST       _IOW('#', 0x12, struct fw_cdev_send_request)
 #define FW_CDEV_IOC_SEND_STREAM_PACKET           _IOW('#', 0x13, struct fw_cdev_send_stream_packet)
 
@@ -601,15 +601,6 @@ struct fw_cdev_allocate_iso_resource {
 	__u32 handle;
 };
 
-/**
- * struct fw_cdev_get_speed - Query maximum speed to or from this device
- * @max_speed:	Speed code; minimum of the device's link speed, the local node's
- *		link speed, and all PHY port speeds between the two links
- */
-struct fw_cdev_get_speed {
-	__u32 max_speed;
-};
-
 /**
  * struct fw_cdev_send_stream_packet - send an asynchronous stream packet
  * @generation:   Bus generation where the packet is valid
-- 
cgit v1.2.3


From de487da8ca5839d057e1f4b57ee3f387e180b800 Mon Sep 17 00:00:00 2001
From: Stefan Richter <stefanr@s5r6.in-berlin.de>
Date: Tue, 10 Mar 2009 21:00:23 +0100
Subject: firewire: cdev: secure add_descriptor ioctl

The access permissions and ownership or ACL of /dev/fw* character device
files will typically be set based on the device type of the respective
nodes, as obtained by firewire-core from descriptors in the device's
configuration ROM.  An example policy is to deny write permission by
default but grant write permission to files of AV/C video and audio
devices and IIDC video devices.

The FW_CDEV_IOC_ADD_DESCRIPTOR ioctl could be used to partly subvert
such a policy:  Find a device file with relaxed permissions, use the
ioctl to add a descriptor with AV/C marker to the local node's ROM, thus
gain access to the local node's character device file.  (This is only
possible if there are udev scripts installed which actively relax
permissions for known device types and if there is a device of such a
type connected.)

Accessibility of the local node's device file is relevant to host
security if the host contains two or more IEEE 1394 link layer
controllers which are plugged into a single bus.

Therefore change the ABI to deny FW_CDEV_IOC_ADD_DESCRIPTOR if the file
belongs to a remote node.  (This change has no impact on known
implementers of the ABI:  None of them uses the ioctl yet.)

Also clarify the documentation:  The ioctl affects all local nodes, not
just one local node.

Cc: stable@kernel.org
Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
---
 drivers/firewire/fw-cdev.c    | 8 ++++++++
 include/linux/firewire-cdev.h | 5 ++++-
 2 files changed, 12 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/firewire/fw-cdev.c b/drivers/firewire/fw-cdev.c
index 2784f91896db..160cb27e120c 100644
--- a/drivers/firewire/fw-cdev.c
+++ b/drivers/firewire/fw-cdev.c
@@ -742,9 +742,17 @@ static void release_descriptor(struct client *client,
 static int ioctl_add_descriptor(struct client *client, void *buffer)
 {
 	struct fw_cdev_add_descriptor *request = buffer;
+	struct fw_card *card = client->device->card;
 	struct descriptor_resource *r;
 	int ret;
 
+	/* Access policy: Allow this ioctl only on local nodes' device files. */
+	spin_lock_irq(&card->lock);
+	ret = client->device->node_id != card->local_node->node_id;
+	spin_unlock_irq(&card->lock);
+	if (ret)
+		return -ENOSYS;
+
 	if (request->length > 256)
 		return -EINVAL;
 
diff --git a/include/linux/firewire-cdev.h b/include/linux/firewire-cdev.h
index de4035792f70..25bc82726ef7 100644
--- a/include/linux/firewire-cdev.h
+++ b/include/linux/firewire-cdev.h
@@ -394,6 +394,9 @@ struct fw_cdev_initiate_bus_reset {
  * If successful, the kernel adds the descriptor and writes back a handle to the
  * kernel-side object to be used for later removal of the descriptor block and
  * immediate key.
+ *
+ * This ioctl affects the configuration ROMs of all local nodes.
+ * The ioctl only succeeds on device files which represent a local node.
  */
 struct fw_cdev_add_descriptor {
 	__u32 immediate;
@@ -409,7 +412,7 @@ struct fw_cdev_add_descriptor {
  *		descriptor was added
  *
  * Remove a descriptor block and accompanying immediate key from the local
- * node's configuration ROM.
+ * nodes' configuration ROMs.
  */
 struct fw_cdev_remove_descriptor {
 	__u32 handle;
-- 
cgit v1.2.3


From 18e9b10fcdc090d3a38606958167d5923c7099b7 Mon Sep 17 00:00:00 2001
From: Stefan Richter <stefanr@s5r6.in-berlin.de>
Date: Tue, 10 Mar 2009 21:02:21 +0100
Subject: firewire: cdev: add closure to async stream ioctl

This changes the as yet unreleased FW_CDEV_IOC_SEND_STREAM_PACKET ioctl
to generate an fw_cdev_event_response event just like the other two
ioctls for asynchronous request transmission do.  This way, clients get
feedback on successful or unsuccessful transmission.

This also adds input validation for length, tag, channel, sy, speed.

Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
---
 drivers/firewire/fw-cdev.c        | 46 ++++++++++++++++-----------------------
 drivers/firewire/fw-transaction.c | 42 ++++++++++++++---------------------
 drivers/firewire/fw-transaction.h |  9 ++++----
 include/linux/firewire-cdev.h     | 31 +++++++++++++-------------
 4 files changed, 56 insertions(+), 72 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/firewire/fw-cdev.c b/drivers/firewire/fw-cdev.c
index 95a207545eb3..7eb6594cc3e5 100644
--- a/drivers/firewire/fw-cdev.c
+++ b/drivers/firewire/fw-cdev.c
@@ -522,7 +522,8 @@ static int init_request(struct client *client,
 	struct outbound_transaction_event *e;
 	int ret;
 
-	if (request->length > 4096 || request->length > 512 << speed)
+	if (request->tcode != TCODE_STREAM_DATA &&
+	    (request->length > 4096 || request->length > 512 << speed))
 		return -EIO;
 
 	e = kmalloc(sizeof(*e) + request->length, GFP_KERNEL);
@@ -1247,36 +1248,27 @@ static int ioctl_send_broadcast_request(struct client *client, void *buffer)
 	return init_request(client, request, LOCAL_BUS | 0x3f, SCODE_100);
 }
 
-struct stream_packet {
-	struct fw_packet packet;
-	u8 data[0];
-};
-
-static void send_stream_packet_done(struct fw_packet *packet,
-				    struct fw_card *card, int status)
-{
-	kfree(container_of(packet, struct stream_packet, packet));
-}
-
 static int ioctl_send_stream_packet(struct client *client, void *buffer)
 {
-	struct fw_cdev_send_stream_packet *request = buffer;
-	struct stream_packet *p;
+	struct fw_cdev_send_stream_packet *p = buffer;
+	struct fw_cdev_send_request request;
+	int dest;
 
-	p = kmalloc(sizeof(*p) + request->size, GFP_KERNEL);
-	if (p == NULL)
-		return -ENOMEM;
+	if (p->speed > client->device->card->link_speed ||
+	    p->length > 1024 << p->speed)
+		return -EIO;
 
-	if (request->data &&
-	    copy_from_user(p->data, u64_to_uptr(request->data), request->size)) {
-		kfree(p);
-		return -EFAULT;
-	}
-	fw_send_stream_packet(client->device->card, &p->packet,
-			      request->generation, request->speed,
-			      request->channel, request->sy, request->tag,
-			      p->data, request->size, send_stream_packet_done);
-	return 0;
+	if (p->tag > 3 || p->channel > 63 || p->sy > 15)
+		return -EINVAL;
+
+	dest = fw_stream_packet_destination_id(p->tag, p->channel, p->sy);
+	request.tcode		= TCODE_STREAM_DATA;
+	request.length		= p->length;
+	request.closure		= p->closure;
+	request.data		= p->data;
+	request.generation	= p->generation;
+
+	return init_request(client, &request, dest, p->speed);
 }
 
 static int (* const ioctl_handlers[])(struct client *client, void *buffer) = {
diff --git a/drivers/firewire/fw-transaction.c b/drivers/firewire/fw-transaction.c
index e3da58991960..4a9b37461c26 100644
--- a/drivers/firewire/fw-transaction.c
+++ b/drivers/firewire/fw-transaction.c
@@ -37,10 +37,6 @@
 #include "fw-topology.h"
 #include "fw-device.h"
 
-#define HEADER_TAG(tag)			((tag) << 14)
-#define HEADER_CHANNEL(ch)		((ch) << 8)
-#define HEADER_SY(sy)			((sy) << 0)
-
 #define HEADER_PRI(pri)			((pri) << 0)
 #define HEADER_TCODE(tcode)		((tcode) << 4)
 #define HEADER_RETRY(retry)		((retry) << 8)
@@ -158,6 +154,18 @@ static void fw_fill_request(struct fw_packet *packet, int tcode, int tlabel,
 {
 	int ext_tcode;
 
+	if (tcode == TCODE_STREAM_DATA) {
+		packet->header[0] =
+			HEADER_DATA_LENGTH(length) |
+			destination_id |
+			HEADER_TCODE(TCODE_STREAM_DATA);
+		packet->header_length = 4;
+		packet->payload = payload;
+		packet->payload_length = length;
+
+		goto common;
+	}
+
 	if (tcode > 0x10) {
 		ext_tcode = tcode & ~0x10;
 		tcode = TCODE_LOCK_REQUEST;
@@ -204,7 +212,7 @@ static void fw_fill_request(struct fw_packet *packet, int tcode, int tlabel,
 		packet->payload_length = 0;
 		break;
 	}
-
+ common:
 	packet->speed = speed;
 	packet->generation = generation;
 	packet->ack = 0;
@@ -246,6 +254,9 @@ static void fw_fill_request(struct fw_packet *packet, int tcode, int tlabel,
  * @param callback function to be called when the transaction is completed
  * @param callback_data pointer to arbitrary data, which will be
  *   passed to the callback
+ *
+ * In case of asynchronous stream packets i.e. TCODE_STREAM_DATA, the caller
+ * needs to synthesize @destination_id with fw_stream_packet_destination_id().
  */
 void fw_send_request(struct fw_card *card, struct fw_transaction *t, int tcode,
 		     int destination_id, int generation, int speed,
@@ -297,27 +308,6 @@ void fw_send_request(struct fw_card *card, struct fw_transaction *t, int tcode,
 }
 EXPORT_SYMBOL(fw_send_request);
 
-void fw_send_stream_packet(struct fw_card *card, struct fw_packet *p,
-		int generation, int speed, int channel, int sy, int tag,
-		void *payload, size_t length, fw_packet_callback_t callback)
-{
-	p->callback = callback;
-	p->header[0] =
-		  HEADER_DATA_LENGTH(length)
-		| HEADER_TAG(tag)
-		| HEADER_CHANNEL(channel)
-		| HEADER_TCODE(TCODE_STREAM_DATA)
-		| HEADER_SY(sy);
-	p->header_length = 4;
-	p->payload = payload;
-	p->payload_length = length;
-	p->speed = speed;
-	p->generation = generation;
-	p->ack = 0;
-
-	card->driver->send_request(card, p);
-}
-
 struct transaction_callback_data {
 	struct completion done;
 	void *payload;
diff --git a/drivers/firewire/fw-transaction.h b/drivers/firewire/fw-transaction.h
index f90f09c05833..d4f42cecbdfa 100644
--- a/drivers/firewire/fw-transaction.h
+++ b/drivers/firewire/fw-transaction.h
@@ -412,10 +412,6 @@ void fw_send_request(struct fw_card *card, struct fw_transaction *t,
 		int tcode, int destination_id, int generation, int speed,
 		unsigned long long offset, void *payload, size_t length,
 		fw_transaction_callback_t callback, void *callback_data);
-void fw_send_stream_packet(struct fw_card *card, struct fw_packet *p,
-		int generation, int speed, int channel, int sy, int tag,
-		void *payload, size_t length, fw_packet_callback_t callback);
-
 int fw_cancel_transaction(struct fw_card *card,
 			  struct fw_transaction *transaction);
 void fw_flush_transactions(struct fw_card *card);
@@ -425,6 +421,11 @@ int fw_run_transaction(struct fw_card *card, int tcode, int destination_id,
 void fw_send_phy_config(struct fw_card *card,
 			int node_id, int generation, int gap_count);
 
+static inline int fw_stream_packet_destination_id(int tag, int channel, int sy)
+{
+	return tag << 14 | channel << 8 | sy;
+}
+
 /*
  * Called by the topology code to inform the device code of node
  * activity; found, lost, or updated nodes.
diff --git a/include/linux/firewire-cdev.h b/include/linux/firewire-cdev.h
index 25bc82726ef7..c6b3ca3af6df 100644
--- a/include/linux/firewire-cdev.h
+++ b/include/linux/firewire-cdev.h
@@ -606,28 +606,29 @@ struct fw_cdev_allocate_iso_resource {
 
 /**
  * struct fw_cdev_send_stream_packet - send an asynchronous stream packet
- * @generation:   Bus generation where the packet is valid
- * @speed:	  Speed code to send the packet at
- * @channel:	  Channel to send the packet on
- * @sy:		  Four-bit sy code for the packet
- * @tag:	  Two-bit tag field to use for the packet
- * @size:	  Size of the packet's data payload
- * @data:	  Userspace pointer to the payload
+ * @length:	Length of outgoing payload, in bytes
+ * @tag:	Data format tag
+ * @channel:	Isochronous channel to transmit to
+ * @sy:		Synchronization code
+ * @closure:	Passed back to userspace in the response event
+ * @data:	Userspace pointer to payload
+ * @generation:	The bus generation where packet is valid
+ * @speed:	Speed to transmit at
  *
  * The %FW_CDEV_IOC_SEND_STREAM_PACKET ioctl sends an asynchronous stream packet
- * to every device (that is listening to the specified channel) on the
- * firewire bus.  It is the applications's job to ensure
- * that the intended device(s) will be able to receive the packet at the chosen
- * transmit speed.
+ * to every device which is listening to the specified channel.  The kernel
+ * writes an &fw_cdev_event_response event which indicates success or failure of
+ * the transmission.
  */
 struct fw_cdev_send_stream_packet {
-	__u32 generation;
-	__u32 speed;
+	__u32 length;
+	__u32 tag;
 	__u32 channel;
 	__u32 sy;
-	__u32 tag;
-	__u32 size;
+	__u64 closure;
 	__u64 data;
+	__u32 generation;
+	__u32 speed;
 };
 
 #endif /* _LINUX_FIREWIRE_CDEV_H */
-- 
cgit v1.2.3


From 38938bfe3489394e2eed5e40c9bb8f66a2ce1405 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 24 Mar 2009 16:37:55 -0700
Subject: netlink: add NETLINK_NO_ENOBUFS socket flag

This patch adds the NETLINK_NO_ENOBUFS socket flag. This flag can
be used by unicast and broadcast listeners to avoid receiving
ENOBUFS errors.

Generally speaking, ENOBUFS errors are useful to notify two things
to the listener:

a) You may increase the receiver buffer size via setsockopt().
b) You have lost messages, you may be out of sync.

In some cases, ignoring ENOBUFS errors can be useful. For example:

a) nfnetlink_queue: this subsystem does not have any sort of resync
method and you can decide to ignore ENOBUFS once you have set a
given buffer size.

b) ctnetlink: you can use this together with the socket flag
NETLINK_BROADCAST_SEND_ERROR to stop getting ENOBUFS errors as
you do not need to resync (packets whose event are not delivered
are drop to provide reliable logging and state-synchronization).

Moreover, the use of NETLINK_NO_ENOBUFS also reduces a "go up, go down"
effect in terms of performance which is due to the netlink congestion
control when the listener cannot back off. The effect is the following:

1) throughput rate goes up and netlink messages are inserted in the
receiver buffer.
2) Then, netlink buffer fills and overruns (set on nlk->state bit 0).
3) While the listener empties the receiver buffer, netlink keeps
dropping messages. Thus, throughput goes dramatically down.
4) Then, once the listener has emptied the buffer (nlk->state
bit 0 is set off), goto step 1.

This effect is easy to trigger with netlink broadcast under heavy
load, and it is more noticeable when using a big receiver buffer.
You can find some results in [1] that show this problem.

[1] http://1984.lsi.us.es/linux/netlink/

This patch also includes the use of sk_drop to account the number of
netlink messages drop due to overrun. This value is shown in
/proc/net/netlink.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netlink.h  |  1 +
 net/netlink/af_netlink.c | 38 ++++++++++++++++++++++++++++++++------
 2 files changed, 33 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index 1e6bf995435c..5ba398e90304 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -104,6 +104,7 @@ struct nlmsgerr
 #define NETLINK_DROP_MEMBERSHIP	2
 #define NETLINK_PKTINFO		3
 #define NETLINK_BROADCAST_ERROR	4
+#define NETLINK_NO_ENOBUFS	5
 
 struct nl_pktinfo
 {
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index b73d4e61c5ac..8b6bbb3032b0 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -86,6 +86,7 @@ struct netlink_sock {
 #define NETLINK_KERNEL_SOCKET	0x1
 #define NETLINK_RECV_PKTINFO	0x2
 #define NETLINK_BROADCAST_SEND_ERROR	0x4
+#define NETLINK_RECV_NO_ENOBUFS	0x8
 
 static inline struct netlink_sock *nlk_sk(struct sock *sk)
 {
@@ -717,10 +718,15 @@ static int netlink_getname(struct socket *sock, struct sockaddr *addr,
 
 static void netlink_overrun(struct sock *sk)
 {
-	if (!test_and_set_bit(0, &nlk_sk(sk)->state)) {
-		sk->sk_err = ENOBUFS;
-		sk->sk_error_report(sk);
+	struct netlink_sock *nlk = nlk_sk(sk);
+
+	if (!(nlk->flags & NETLINK_RECV_NO_ENOBUFS)) {
+		if (!test_and_set_bit(0, &nlk_sk(sk)->state)) {
+			sk->sk_err = ENOBUFS;
+			sk->sk_error_report(sk);
+		}
 	}
+	atomic_inc(&sk->sk_drops);
 }
 
 static struct sock *netlink_getsockbypid(struct sock *ssk, u32 pid)
@@ -1182,6 +1188,15 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname,
 			nlk->flags &= ~NETLINK_BROADCAST_SEND_ERROR;
 		err = 0;
 		break;
+	case NETLINK_NO_ENOBUFS:
+		if (val) {
+			nlk->flags |= NETLINK_RECV_NO_ENOBUFS;
+			clear_bit(0, &nlk->state);
+			wake_up_interruptible(&nlk->wait);
+		} else
+			nlk->flags &= ~NETLINK_RECV_NO_ENOBUFS;
+		err = 0;
+		break;
 	default:
 		err = -ENOPROTOOPT;
 	}
@@ -1224,6 +1239,16 @@ static int netlink_getsockopt(struct socket *sock, int level, int optname,
 			return -EFAULT;
 		err = 0;
 		break;
+	case NETLINK_NO_ENOBUFS:
+		if (len < sizeof(int))
+			return -EINVAL;
+		len = sizeof(int);
+		val = nlk->flags & NETLINK_RECV_NO_ENOBUFS ? 1 : 0;
+		if (put_user(len, optlen) ||
+		    put_user(val, optval))
+			return -EFAULT;
+		err = 0;
+		break;
 	default:
 		err = -ENOPROTOOPT;
 	}
@@ -1879,12 +1904,12 @@ static int netlink_seq_show(struct seq_file *seq, void *v)
 	if (v == SEQ_START_TOKEN)
 		seq_puts(seq,
 			 "sk       Eth Pid    Groups   "
-			 "Rmem     Wmem     Dump     Locks\n");
+			 "Rmem     Wmem     Dump     Locks     Drops\n");
 	else {
 		struct sock *s = v;
 		struct netlink_sock *nlk = nlk_sk(s);
 
-		seq_printf(seq, "%p %-3d %-6d %08x %-8d %-8d %p %d\n",
+		seq_printf(seq, "%p %-3d %-6d %08x %-8d %-8d %p %-8d %-8d\n",
 			   s,
 			   s->sk_protocol,
 			   nlk->pid,
@@ -1892,7 +1917,8 @@ static int netlink_seq_show(struct seq_file *seq, void *v)
 			   atomic_read(&s->sk_rmem_alloc),
 			   atomic_read(&s->sk_wmem_alloc),
 			   nlk->cb,
-			   atomic_read(&s->sk_refcnt)
+			   atomic_read(&s->sk_refcnt),
+			   atomic_read(&s->sk_drops)
 			);
 
 	}
-- 
cgit v1.2.3


From b8dfe498775de912116f275680ddb57c8799d9ef Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Wed, 25 Mar 2009 17:31:52 +0100
Subject: netfilter: factorize ifname_compare()

We use same not trivial helper function in four places. We can factorize it.

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/x_tables.h | 23 +++++++++++++++++++++++
 net/ipv4/netfilter/arp_tables.c    | 14 +-------------
 net/ipv4/netfilter/ip_tables.c     | 23 ++---------------------
 net/ipv6/netfilter/ip6_tables.c    | 23 ++---------------------
 net/netfilter/xt_physdev.c         | 21 ++-------------------
 5 files changed, 30 insertions(+), 74 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index e8e08d036752..72918b7cbe85 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -435,6 +435,29 @@ extern void xt_free_table_info(struct xt_table_info *info);
 extern void xt_table_entry_swap_rcu(struct xt_table_info *old,
 				    struct xt_table_info *new);
 
+/*
+ * This helper is performance critical and must be inlined
+ */
+static inline unsigned long ifname_compare_aligned(const char *_a,
+						   const char *_b,
+						   const char *_mask)
+{
+	const unsigned long *a = (const unsigned long *)_a;
+	const unsigned long *b = (const unsigned long *)_b;
+	const unsigned long *mask = (const unsigned long *)_mask;
+	unsigned long ret;
+
+	ret = (a[0] ^ b[0]) & mask[0];
+	if (IFNAMSIZ > sizeof(unsigned long))
+		ret |= (a[1] ^ b[1]) & mask[1];
+	if (IFNAMSIZ > 2 * sizeof(unsigned long))
+		ret |= (a[2] ^ b[2]) & mask[2];
+	if (IFNAMSIZ > 3 * sizeof(unsigned long))
+		ret |= (a[3] ^ b[3]) & mask[3];
+	BUILD_BUG_ON(IFNAMSIZ > 4 * sizeof(unsigned long));
+	return ret;
+}
+
 #ifdef CONFIG_COMPAT
 #include <net/compat.h>
 
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 64a7c6ce0b98..4b35dba7cf7d 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -80,19 +80,7 @@ static inline int arp_devaddr_compare(const struct arpt_devaddr_info *ap,
 static unsigned long ifname_compare(const char *_a, const char *_b, const char *_mask)
 {
 #ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
-	const unsigned long *a = (const unsigned long *)_a;
-	const unsigned long *b = (const unsigned long *)_b;
-	const unsigned long *mask = (const unsigned long *)_mask;
-	unsigned long ret;
-
-	ret = (a[0] ^ b[0]) & mask[0];
-	if (IFNAMSIZ > sizeof(unsigned long))
-		ret |= (a[1] ^ b[1]) & mask[1];
-	if (IFNAMSIZ > 2 * sizeof(unsigned long))
-		ret |= (a[2] ^ b[2]) & mask[2];
-	if (IFNAMSIZ > 3 * sizeof(unsigned long))
-		ret |= (a[3] ^ b[3]) & mask[3];
-	BUILD_BUG_ON(IFNAMSIZ > 4 * sizeof(unsigned long));
+	unsigned long ret = ifname_compare_aligned(_a, _b, _mask);
 #else
 	unsigned long ret = 0;
 	int i;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index e5294aec967d..41c59e391a6a 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -74,25 +74,6 @@ do {								\
 
    Hence the start of any table is given by get_table() below.  */
 
-static unsigned long ifname_compare(const char *_a, const char *_b,
-				    const unsigned char *_mask)
-{
-	const unsigned long *a = (const unsigned long *)_a;
-	const unsigned long *b = (const unsigned long *)_b;
-	const unsigned long *mask = (const unsigned long *)_mask;
-	unsigned long ret;
-
-	ret = (a[0] ^ b[0]) & mask[0];
-	if (IFNAMSIZ > sizeof(unsigned long))
-		ret |= (a[1] ^ b[1]) & mask[1];
-	if (IFNAMSIZ > 2 * sizeof(unsigned long))
-		ret |= (a[2] ^ b[2]) & mask[2];
-	if (IFNAMSIZ > 3 * sizeof(unsigned long))
-		ret |= (a[3] ^ b[3]) & mask[3];
-	BUILD_BUG_ON(IFNAMSIZ > 4 * sizeof(unsigned long));
-	return ret;
-}
-
 /* Returns whether matches rule or not. */
 /* Performance critical - called for every packet */
 static inline bool
@@ -121,7 +102,7 @@ ip_packet_match(const struct iphdr *ip,
 		return false;
 	}
 
-	ret = ifname_compare(indev, ipinfo->iniface, ipinfo->iniface_mask);
+	ret = ifname_compare_aligned(indev, ipinfo->iniface, ipinfo->iniface_mask);
 
 	if (FWINV(ret != 0, IPT_INV_VIA_IN)) {
 		dprintf("VIA in mismatch (%s vs %s).%s\n",
@@ -130,7 +111,7 @@ ip_packet_match(const struct iphdr *ip,
 		return false;
 	}
 
-	ret = ifname_compare(outdev, ipinfo->outiface, ipinfo->outiface_mask);
+	ret = ifname_compare_aligned(outdev, ipinfo->outiface, ipinfo->outiface_mask);
 
 	if (FWINV(ret != 0, IPT_INV_VIA_OUT)) {
 		dprintf("VIA out mismatch (%s vs %s).%s\n",
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 34af7bb8df5f..e59662b3b5b9 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -89,25 +89,6 @@ ip6t_ext_hdr(u8 nexthdr)
 		 (nexthdr == IPPROTO_DSTOPTS) );
 }
 
-static unsigned long ifname_compare(const char *_a, const char *_b,
-				    const unsigned char *_mask)
-{
-	const unsigned long *a = (const unsigned long *)_a;
-	const unsigned long *b = (const unsigned long *)_b;
-	const unsigned long *mask = (const unsigned long *)_mask;
-	unsigned long ret;
-
-	ret = (a[0] ^ b[0]) & mask[0];
-	if (IFNAMSIZ > sizeof(unsigned long))
-		ret |= (a[1] ^ b[1]) & mask[1];
-	if (IFNAMSIZ > 2 * sizeof(unsigned long))
-		ret |= (a[2] ^ b[2]) & mask[2];
-	if (IFNAMSIZ > 3 * sizeof(unsigned long))
-		ret |= (a[3] ^ b[3]) & mask[3];
-	BUILD_BUG_ON(IFNAMSIZ > 4 * sizeof(unsigned long));
-	return ret;
-}
-
 /* Returns whether matches rule or not. */
 /* Performance critical - called for every packet */
 static inline bool
@@ -138,7 +119,7 @@ ip6_packet_match(const struct sk_buff *skb,
 		return false;
 	}
 
-	ret = ifname_compare(indev, ip6info->iniface, ip6info->iniface_mask);
+	ret = ifname_compare_aligned(indev, ip6info->iniface, ip6info->iniface_mask);
 
 	if (FWINV(ret != 0, IP6T_INV_VIA_IN)) {
 		dprintf("VIA in mismatch (%s vs %s).%s\n",
@@ -147,7 +128,7 @@ ip6_packet_match(const struct sk_buff *skb,
 		return false;
 	}
 
-	ret = ifname_compare(outdev, ip6info->outiface, ip6info->outiface_mask);
+	ret = ifname_compare_aligned(outdev, ip6info->outiface, ip6info->outiface_mask);
 
 	if (FWINV(ret != 0, IP6T_INV_VIA_OUT)) {
 		dprintf("VIA out mismatch (%s vs %s).%s\n",
diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c
index 44a234ef4439..8d28ca5848bc 100644
--- a/net/netfilter/xt_physdev.c
+++ b/net/netfilter/xt_physdev.c
@@ -20,23 +20,6 @@ MODULE_DESCRIPTION("Xtables: Bridge physical device match");
 MODULE_ALIAS("ipt_physdev");
 MODULE_ALIAS("ip6t_physdev");
 
-static unsigned long ifname_compare(const char *_a, const char *_b, const char *_mask)
-{
-	const unsigned long *a = (const unsigned long *)_a;
-	const unsigned long *b = (const unsigned long *)_b;
-	const unsigned long *mask = (const unsigned long *)_mask;
-	unsigned long ret;
-
-	ret = (a[0] ^ b[0]) & mask[0];
-	if (IFNAMSIZ > sizeof(unsigned long))
-		ret |= (a[1] ^ b[1]) & mask[1];
-	if (IFNAMSIZ > 2 * sizeof(unsigned long))
-		ret |= (a[2] ^ b[2]) & mask[2];
-	if (IFNAMSIZ > 3 * sizeof(unsigned long))
-		ret |= (a[3] ^ b[3]) & mask[3];
-	BUILD_BUG_ON(IFNAMSIZ > 4 * sizeof(unsigned long));
-	return ret;
-}
 
 static bool
 physdev_mt(const struct sk_buff *skb, const struct xt_match_param *par)
@@ -85,7 +68,7 @@ physdev_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	if (!(info->bitmask & XT_PHYSDEV_OP_IN))
 		goto match_outdev;
 	indev = nf_bridge->physindev ? nf_bridge->physindev->name : nulldevname;
-	ret = ifname_compare(indev, info->physindev, info->in_mask);
+	ret = ifname_compare_aligned(indev, info->physindev, info->in_mask);
 
 	if (!ret ^ !(info->invert & XT_PHYSDEV_OP_IN))
 		return false;
@@ -95,7 +78,7 @@ match_outdev:
 		return true;
 	outdev = nf_bridge->physoutdev ?
 		 nf_bridge->physoutdev->name : nulldevname;
-	ret = ifname_compare(outdev, info->physoutdev, info->out_mask);
+	ret = ifname_compare_aligned(outdev, info->physoutdev, info->out_mask);
 
 	return (!!ret ^ !(info->invert & XT_PHYSDEV_OP_OUT));
 }
-- 
cgit v1.2.3


From f18df228997fb716990590d248663981a15f17d4 Mon Sep 17 00:00:00 2001
From: Mingming Cao <cmm@us.ibm.com>
Date: Tue, 13 Jan 2009 16:43:09 +0100
Subject: quota: Add quota reservation support

Delayed allocation defers the block allocation at the dirty pages
flush-out time, doing quota charge/check at that time is too late.
But we can't charge the quota blocks until blocks are really allocated,
otherwise users could get overcharged after reboot from system crash.

This patch adds quota reservation for delayed allocation. Quota blocks
are reserved in memory, inode and quota won't gets dirtied until later
block allocation time.

Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/dquot.c               | 117 ++++++++++++++++++++++++++++++++++-------------
 include/linux/quota.h    |   3 ++
 include/linux/quotaops.h |  21 +++++++++
 3 files changed, 110 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dquot.c b/fs/dquot.c
index bca3cac4bee7..9b1c4d3c9d83 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -899,6 +899,11 @@ static inline void dquot_incr_space(struct dquot *dquot, qsize_t number)
 	dquot->dq_dqb.dqb_curspace += number;
 }
 
+static inline void dquot_resv_space(struct dquot *dquot, qsize_t number)
+{
+	dquot->dq_dqb.dqb_rsvspace += number;
+}
+
 static inline void dquot_decr_inodes(struct dquot *dquot, qsize_t number)
 {
 	if (sb_dqopt(dquot->dq_sb)->flags & DQUOT_NEGATIVE_USAGE ||
@@ -1068,7 +1073,11 @@ err_out:
 	kfree_skb(skb);
 }
 #endif
-
+/*
+ * Write warnings to the console and send warning messages over netlink.
+ *
+ * Note that this function can sleep.
+ */
 static inline void flush_warnings(struct dquot * const *dquots, char *warntype)
 {
 	int i;
@@ -1129,13 +1138,18 @@ static int check_idq(struct dquot *dquot, qsize_t inodes, char *warntype)
 /* needs dq_data_lock */
 static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *warntype)
 {
+	qsize_t tspace;
+
 	*warntype = QUOTA_NL_NOWARN;
 	if (!sb_has_quota_limits_enabled(dquot->dq_sb, dquot->dq_type) ||
 	    test_bit(DQ_FAKE_B, &dquot->dq_flags))
 		return QUOTA_OK;
 
+	tspace = dquot->dq_dqb.dqb_curspace + dquot->dq_dqb.dqb_rsvspace
+		+ space;
+
 	if (dquot->dq_dqb.dqb_bhardlimit &&
-	    dquot->dq_dqb.dqb_curspace + space > dquot->dq_dqb.dqb_bhardlimit &&
+	    tspace > dquot->dq_dqb.dqb_bhardlimit &&
             !ignore_hardlimit(dquot)) {
 		if (!prealloc)
 			*warntype = QUOTA_NL_BHARDWARN;
@@ -1143,7 +1157,7 @@ static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *war
 	}
 
 	if (dquot->dq_dqb.dqb_bsoftlimit &&
-	    dquot->dq_dqb.dqb_curspace + space > dquot->dq_dqb.dqb_bsoftlimit &&
+	    tspace > dquot->dq_dqb.dqb_bsoftlimit &&
 	    dquot->dq_dqb.dqb_btime && get_seconds() >= dquot->dq_dqb.dqb_btime &&
             !ignore_hardlimit(dquot)) {
 		if (!prealloc)
@@ -1152,7 +1166,7 @@ static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *war
 	}
 
 	if (dquot->dq_dqb.dqb_bsoftlimit &&
-	    dquot->dq_dqb.dqb_curspace + space > dquot->dq_dqb.dqb_bsoftlimit &&
+	    tspace > dquot->dq_dqb.dqb_bsoftlimit &&
 	    dquot->dq_dqb.dqb_btime == 0) {
 		if (!prealloc) {
 			*warntype = QUOTA_NL_BSOFTWARN;
@@ -1306,51 +1320,92 @@ void vfs_dq_drop(struct inode *inode)
 /*
  * This operation can block, but only after everything is updated
  */
-int dquot_alloc_space(struct inode *inode, qsize_t number, int warn)
+int __dquot_alloc_space(struct inode *inode, qsize_t number,
+			int warn, int reserve)
 {
-	int cnt, ret = NO_QUOTA;
+	int cnt, ret = QUOTA_OK;
 	char warntype[MAXQUOTAS];
 
-	/* First test before acquiring mutex - solves deadlocks when we
-         * re-enter the quota code and are already holding the mutex */
-	if (IS_NOQUOTA(inode)) {
-out_add:
-		inode_add_bytes(inode, number);
-		return QUOTA_OK;
-	}
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++)
 		warntype[cnt] = QUOTA_NL_NOWARN;
 
-	down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
-	if (IS_NOQUOTA(inode)) {	/* Now we can do reliable test... */
-		up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
-		goto out_add;
-	}
 	spin_lock(&dq_data_lock);
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
 		if (inode->i_dquot[cnt] == NODQUOT)
 			continue;
-		if (check_bdq(inode->i_dquot[cnt], number, warn, warntype+cnt) == NO_QUOTA)
-			goto warn_put_all;
+		if (check_bdq(inode->i_dquot[cnt], number, warn, warntype+cnt)
+		    == NO_QUOTA) {
+			ret = NO_QUOTA;
+			goto out_unlock;
+		}
 	}
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
 		if (inode->i_dquot[cnt] == NODQUOT)
 			continue;
-		dquot_incr_space(inode->i_dquot[cnt], number);
+		if (reserve)
+			dquot_resv_space(inode->i_dquot[cnt], number);
+		else
+			dquot_incr_space(inode->i_dquot[cnt], number);
 	}
-	inode_add_bytes(inode, number);
-	ret = QUOTA_OK;
-warn_put_all:
+	if (!reserve)
+		inode_add_bytes(inode, number);
+out_unlock:
 	spin_unlock(&dq_data_lock);
-	if (ret == QUOTA_OK)
-		/* Dirtify all the dquots - this can block when journalling */
-		for (cnt = 0; cnt < MAXQUOTAS; cnt++)
-			if (inode->i_dquot[cnt])
-				mark_dquot_dirty(inode->i_dquot[cnt]);
 	flush_warnings(inode->i_dquot, warntype);
+	return ret;
+}
+
+int dquot_alloc_space(struct inode *inode, qsize_t number, int warn)
+{
+	int cnt, ret = QUOTA_OK;
+
+	/*
+	 * First test before acquiring mutex - solves deadlocks when we
+	 * re-enter the quota code and are already holding the mutex
+	 */
+	if (IS_NOQUOTA(inode)) {
+		inode_add_bytes(inode, number);
+		goto out;
+	}
+
+	down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
+	if (IS_NOQUOTA(inode)) {
+		inode_add_bytes(inode, number);
+		goto out_unlock;
+	}
+
+	ret = __dquot_alloc_space(inode, number, warn, 0);
+	if (ret == NO_QUOTA)
+		goto out_unlock;
+
+	/* Dirtify all the dquots - this can block when journalling */
+	for (cnt = 0; cnt < MAXQUOTAS; cnt++)
+		if (inode->i_dquot[cnt])
+			mark_dquot_dirty(inode->i_dquot[cnt]);
+out_unlock:
 	up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
+out:
+	return ret;
+}
+
+int dquot_reserve_space(struct inode *inode, qsize_t number, int warn)
+{
+	int ret = QUOTA_OK;
+
+	if (IS_NOQUOTA(inode))
+		goto out;
+
+	down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
+	if (IS_NOQUOTA(inode))
+		goto out_unlock;
+
+	ret = __dquot_alloc_space(inode, number, warn, 1);
+out_unlock:
+	up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
+out:
 	return ret;
 }
+EXPORT_SYMBOL(dquot_reserve_space);
 
 /*
  * This operation can block, but only after everything is updated
@@ -2057,7 +2112,7 @@ static void do_get_dqblk(struct dquot *dquot, struct if_dqblk *di)
 	spin_lock(&dq_data_lock);
 	di->dqb_bhardlimit = stoqb(dm->dqb_bhardlimit);
 	di->dqb_bsoftlimit = stoqb(dm->dqb_bsoftlimit);
-	di->dqb_curspace = dm->dqb_curspace;
+	di->dqb_curspace = dm->dqb_curspace + dm->dqb_rsvspace;
 	di->dqb_ihardlimit = dm->dqb_ihardlimit;
 	di->dqb_isoftlimit = dm->dqb_isoftlimit;
 	di->dqb_curinodes = dm->dqb_curinodes;
@@ -2097,7 +2152,7 @@ static int do_set_dqblk(struct dquot *dquot, struct if_dqblk *di)
 
 	spin_lock(&dq_data_lock);
 	if (di->dqb_valid & QIF_SPACE) {
-		dm->dqb_curspace = di->dqb_curspace;
+		dm->dqb_curspace = di->dqb_curspace - dm->dqb_rsvspace;
 		check_blim = 1;
 		__set_bit(DQ_LASTSET_B + QIF_SPACE_B, &dquot->dq_flags);
 	}
diff --git a/include/linux/quota.h b/include/linux/quota.h
index d72d5d84fde5..54b837fa64f2 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -198,6 +198,7 @@ struct mem_dqblk {
 	qsize_t dqb_bhardlimit;	/* absolute limit on disk blks alloc */
 	qsize_t dqb_bsoftlimit;	/* preferred limit on disk blks */
 	qsize_t dqb_curspace;	/* current used space */
+	qsize_t dqb_rsvspace;   /* current reserved space for delalloc*/
 	qsize_t dqb_ihardlimit;	/* absolute limit on allocated inodes */
 	qsize_t dqb_isoftlimit;	/* preferred inode limit */
 	qsize_t dqb_curinodes;	/* current # allocated inodes */
@@ -308,6 +309,8 @@ struct dquot_operations {
 	int (*release_dquot) (struct dquot *);		/* Quota is going to be deleted from disk */
 	int (*mark_dirty) (struct dquot *);		/* Dquot is marked dirty */
 	int (*write_info) (struct super_block *, int);	/* Write of quota "superblock" */
+	/* reserve quota for delayed block allocation */
+	int (*reserve_space) (struct inode *, qsize_t, int);
 };
 
 /* Operations handling requests from userspace */
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index 0b35b3a1be05..3e3a0d2874d9 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -183,6 +183,16 @@ static inline int vfs_dq_alloc_space(struct inode *inode, qsize_t nr)
 	return ret;
 }
 
+static inline int vfs_dq_reserve_space(struct inode *inode, qsize_t nr)
+{
+	if (sb_any_quota_active(inode->i_sb)) {
+		/* Used space is updated in alloc_space() */
+		if (inode->i_sb->dq_op->reserve_space(inode, nr, 0) == NO_QUOTA)
+			return 1;
+	}
+	return 0;
+}
+
 static inline int vfs_dq_alloc_inode(struct inode *inode)
 {
 	if (sb_any_quota_active(inode->i_sb)) {
@@ -339,6 +349,11 @@ static inline int vfs_dq_alloc_space(struct inode *inode, qsize_t nr)
 	return 0;
 }
 
+static inline int vfs_dq_reserve_space(struct inode *inode, qsize_t nr)
+{
+	return 0;
+}
+
 static inline void vfs_dq_free_space_nodirty(struct inode *inode, qsize_t nr)
 {
 	inode_sub_bytes(inode, nr);
@@ -376,6 +391,12 @@ static inline int vfs_dq_alloc_block(struct inode *inode, qsize_t nr)
 			nr << inode->i_sb->s_blocksize_bits);
 }
 
+static inline int vfs_dq_reserve_block(struct inode *inode, qsize_t nr)
+{
+	return vfs_dq_reserve_space(inode,
+			nr << inode->i_blkbits);
+}
+
 static inline void vfs_dq_free_block_nodirty(struct inode *inode, qsize_t nr)
 {
 	vfs_dq_free_space_nodirty(inode, nr << inode->i_sb->s_blocksize_bits);
-- 
cgit v1.2.3


From 740d9dcd949a986c88886a591054a0cdb89ef669 Mon Sep 17 00:00:00 2001
From: Mingming Cao <cmm@us.ibm.com>
Date: Tue, 13 Jan 2009 16:43:14 +0100
Subject: quota: Add quota reservation claim and released operations

Reserved quota will be claimed at the block allocation time. Over-booked
quota could be returned back with the release callback function.

Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/dquot.c               | 110 +++++++++++++++++++++++++++++++++++++++++++++--
 include/linux/quota.h    |   6 +++
 include/linux/quotaops.h |  53 +++++++++++++++++++++++
 3 files changed, 165 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dquot.c b/fs/dquot.c
index 9b1c4d3c9d83..2916f91ca40c 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -904,6 +904,23 @@ static inline void dquot_resv_space(struct dquot *dquot, qsize_t number)
 	dquot->dq_dqb.dqb_rsvspace += number;
 }
 
+/*
+ * Claim reserved quota space
+ */
+static void dquot_claim_reserved_space(struct dquot *dquot,
+						qsize_t number)
+{
+	WARN_ON(dquot->dq_dqb.dqb_rsvspace < number);
+	dquot->dq_dqb.dqb_curspace += number;
+	dquot->dq_dqb.dqb_rsvspace -= number;
+}
+
+static inline
+void dquot_free_reserved_space(struct dquot *dquot, qsize_t number)
+{
+	dquot->dq_dqb.dqb_rsvspace -= number;
+}
+
 static inline void dquot_decr_inodes(struct dquot *dquot, qsize_t number)
 {
 	if (sb_dqopt(dquot->dq_sb)->flags & DQUOT_NEGATIVE_USAGE ||
@@ -1452,6 +1469,72 @@ warn_put_all:
 	return ret;
 }
 
+int dquot_claim_space(struct inode *inode, qsize_t number)
+{
+	int cnt;
+	int ret = QUOTA_OK;
+
+	if (IS_NOQUOTA(inode)) {
+		inode_add_bytes(inode, number);
+		goto out;
+	}
+
+	down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
+	if (IS_NOQUOTA(inode))	{
+		up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
+		inode_add_bytes(inode, number);
+		goto out;
+	}
+
+	spin_lock(&dq_data_lock);
+	/* Claim reserved quotas to allocated quotas */
+	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+		if (inode->i_dquot[cnt] != NODQUOT)
+			dquot_claim_reserved_space(inode->i_dquot[cnt],
+							number);
+	}
+	/* Update inode bytes */
+	inode_add_bytes(inode, number);
+	spin_unlock(&dq_data_lock);
+	/* Dirtify all the dquots - this can block when journalling */
+	for (cnt = 0; cnt < MAXQUOTAS; cnt++)
+		if (inode->i_dquot[cnt])
+			mark_dquot_dirty(inode->i_dquot[cnt]);
+	up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
+out:
+	return ret;
+}
+EXPORT_SYMBOL(dquot_claim_space);
+
+/*
+ * Release reserved quota space
+ */
+void dquot_release_reserved_space(struct inode *inode, qsize_t number)
+{
+	int cnt;
+
+	if (IS_NOQUOTA(inode))
+		goto out;
+
+	down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
+	if (IS_NOQUOTA(inode))
+		goto out_unlock;
+
+	spin_lock(&dq_data_lock);
+	/* Release reserved dquots */
+	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+		if (inode->i_dquot[cnt] != NODQUOT)
+			dquot_free_reserved_space(inode->i_dquot[cnt], number);
+	}
+	spin_unlock(&dq_data_lock);
+
+out_unlock:
+	up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
+out:
+	return;
+}
+EXPORT_SYMBOL(dquot_release_reserved_space);
+
 /*
  * This operation can block, but only after everything is updated
  */
@@ -1528,6 +1611,19 @@ int dquot_free_inode(const struct inode *inode, qsize_t number)
 	return QUOTA_OK;
 }
 
+/*
+ * call back function, get reserved quota space from underlying fs
+ */
+qsize_t dquot_get_reserved_space(struct inode *inode)
+{
+	qsize_t reserved_space = 0;
+
+	if (sb_any_quota_active(inode->i_sb) &&
+	    inode->i_sb->dq_op->get_reserved_space)
+		reserved_space = inode->i_sb->dq_op->get_reserved_space(inode);
+	return reserved_space;
+}
+
 /*
  * Transfer the number of inode and blocks from one diskquota to an other.
  *
@@ -1536,7 +1632,8 @@ int dquot_free_inode(const struct inode *inode, qsize_t number)
  */
 int dquot_transfer(struct inode *inode, struct iattr *iattr)
 {
-	qsize_t space;
+	qsize_t space, cur_space;
+	qsize_t rsv_space = 0;
 	struct dquot *transfer_from[MAXQUOTAS];
 	struct dquot *transfer_to[MAXQUOTAS];
 	int cnt, ret = QUOTA_OK;
@@ -1575,7 +1672,9 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
 		goto put_all;
 	}
 	spin_lock(&dq_data_lock);
-	space = inode_get_bytes(inode);
+	cur_space = inode_get_bytes(inode);
+	rsv_space = dquot_get_reserved_space(inode);
+	space = cur_space + rsv_space;
 	/* Build the transfer_from list and check the limits */
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
 		if (transfer_to[cnt] == NODQUOT)
@@ -1604,11 +1703,14 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
 			warntype_from_space[cnt] =
 				info_bdq_free(transfer_from[cnt], space);
 			dquot_decr_inodes(transfer_from[cnt], 1);
-			dquot_decr_space(transfer_from[cnt], space);
+			dquot_decr_space(transfer_from[cnt], cur_space);
+			dquot_free_reserved_space(transfer_from[cnt],
+						  rsv_space);
 		}
 
 		dquot_incr_inodes(transfer_to[cnt], 1);
-		dquot_incr_space(transfer_to[cnt], space);
+		dquot_incr_space(transfer_to[cnt], cur_space);
+		dquot_resv_space(transfer_to[cnt], rsv_space);
 
 		inode->i_dquot[cnt] = transfer_to[cnt];
 	}
diff --git a/include/linux/quota.h b/include/linux/quota.h
index 54b837fa64f2..a510d91561f4 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -311,6 +311,12 @@ struct dquot_operations {
 	int (*write_info) (struct super_block *, int);	/* Write of quota "superblock" */
 	/* reserve quota for delayed block allocation */
 	int (*reserve_space) (struct inode *, qsize_t, int);
+	/* claim reserved quota for delayed alloc */
+	int (*claim_space) (struct inode *, qsize_t);
+	/* release rsved quota for delayed alloc */
+	void (*release_rsv) (struct inode *, qsize_t);
+	/* get reserved quota for delayed alloc */
+	qsize_t (*get_reserved_space) (struct inode *);
 };
 
 /* Operations handling requests from userspace */
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index 3e3a0d2874d9..7369d04e0a86 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -35,6 +35,11 @@ void dquot_destroy(struct dquot *dquot);
 int dquot_alloc_space(struct inode *inode, qsize_t number, int prealloc);
 int dquot_alloc_inode(const struct inode *inode, qsize_t number);
 
+int dquot_reserve_space(struct inode *inode, qsize_t number, int prealloc);
+int dquot_claim_space(struct inode *inode, qsize_t number);
+void dquot_release_reserved_space(struct inode *inode, qsize_t number);
+qsize_t dquot_get_reserved_space(struct inode *inode);
+
 int dquot_free_space(struct inode *inode, qsize_t number);
 int dquot_free_inode(const struct inode *inode, qsize_t number);
 
@@ -203,6 +208,31 @@ static inline int vfs_dq_alloc_inode(struct inode *inode)
 	return 0;
 }
 
+/*
+ * Convert in-memory reserved quotas to real consumed quotas
+ */
+static inline int vfs_dq_claim_space(struct inode *inode, qsize_t nr)
+{
+	if (sb_any_quota_active(inode->i_sb)) {
+		if (inode->i_sb->dq_op->claim_space(inode, nr) == NO_QUOTA)
+			return 1;
+	} else
+		inode_add_bytes(inode, nr);
+
+	mark_inode_dirty(inode);
+	return 0;
+}
+
+/*
+ * Release reserved (in-memory) quotas
+ */
+static inline
+void vfs_dq_release_reservation_space(struct inode *inode, qsize_t nr)
+{
+	if (sb_any_quota_active(inode->i_sb))
+		inode->i_sb->dq_op->release_rsv(inode, nr);
+}
+
 static inline void vfs_dq_free_space_nodirty(struct inode *inode, qsize_t nr)
 {
 	if (sb_any_quota_active(inode->i_sb))
@@ -354,6 +384,17 @@ static inline int vfs_dq_reserve_space(struct inode *inode, qsize_t nr)
 	return 0;
 }
 
+static inline int vfs_dq_claim_space(struct inode *inode, qsize_t nr)
+{
+	return vfs_dq_alloc_space(inode, nr);
+}
+
+static inline
+int vfs_dq_release_reservation_space(struct inode *inode, qsize_t nr)
+{
+	return 0;
+}
+
 static inline void vfs_dq_free_space_nodirty(struct inode *inode, qsize_t nr)
 {
 	inode_sub_bytes(inode, nr);
@@ -397,6 +438,18 @@ static inline int vfs_dq_reserve_block(struct inode *inode, qsize_t nr)
 			nr << inode->i_blkbits);
 }
 
+static inline int vfs_dq_claim_block(struct inode *inode, qsize_t nr)
+{
+	return vfs_dq_claim_space(inode,
+			nr << inode->i_blkbits);
+}
+
+static inline
+void vfs_dq_release_reservation_block(struct inode *inode, qsize_t nr)
+{
+	vfs_dq_release_reservation_space(inode, nr << inode->i_blkbits);
+}
+
 static inline void vfs_dq_free_block_nodirty(struct inode *inode, qsize_t nr)
 {
 	vfs_dq_free_space_nodirty(inode, nr << inode->i_sb->s_blocksize_bits);
-- 
cgit v1.2.3


From 9900ba3487f9ba392db30e12d210f768a90abb13 Mon Sep 17 00:00:00 2001
From: Mingming Cao <cmm@us.ibm.com>
Date: Wed, 14 Jan 2009 16:18:57 +0100
Subject: quota: Use inode->i_blkbits to get block bits

Andrew has suggested to use inode->i_blkbits to get the block bits info,
rather than use super block's blockbits. That should be faster and emit
less code.

Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 include/linux/quotaops.h | 22 ++++++++--------------
 1 file changed, 8 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index 7369d04e0a86..69b502e5eba0 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -410,38 +410,32 @@ static inline void vfs_dq_free_space(struct inode *inode, qsize_t nr)
 
 static inline int vfs_dq_prealloc_block_nodirty(struct inode *inode, qsize_t nr)
 {
-	return vfs_dq_prealloc_space_nodirty(inode,
-			nr << inode->i_sb->s_blocksize_bits);
+	return vfs_dq_prealloc_space_nodirty(inode, nr << inode->i_blkbits);
 }
 
 static inline int vfs_dq_prealloc_block(struct inode *inode, qsize_t nr)
 {
-	return vfs_dq_prealloc_space(inode,
-			nr << inode->i_sb->s_blocksize_bits);
+	return vfs_dq_prealloc_space(inode, nr << inode->i_blkbits);
 }
 
 static inline int vfs_dq_alloc_block_nodirty(struct inode *inode, qsize_t nr)
 {
- 	return vfs_dq_alloc_space_nodirty(inode,
-			nr << inode->i_sb->s_blocksize_bits);
+	return vfs_dq_alloc_space_nodirty(inode, nr << inode->i_blkbits);
 }
 
 static inline int vfs_dq_alloc_block(struct inode *inode, qsize_t nr)
 {
-	return vfs_dq_alloc_space(inode,
-			nr << inode->i_sb->s_blocksize_bits);
+	return vfs_dq_alloc_space(inode, nr << inode->i_blkbits);
 }
 
 static inline int vfs_dq_reserve_block(struct inode *inode, qsize_t nr)
 {
-	return vfs_dq_reserve_space(inode,
-			nr << inode->i_blkbits);
+	return vfs_dq_reserve_space(inode, nr << inode->i_blkbits);
 }
 
 static inline int vfs_dq_claim_block(struct inode *inode, qsize_t nr)
 {
-	return vfs_dq_claim_space(inode,
-			nr << inode->i_blkbits);
+	return vfs_dq_claim_space(inode, nr << inode->i_blkbits);
 }
 
 static inline
@@ -452,12 +446,12 @@ void vfs_dq_release_reservation_block(struct inode *inode, qsize_t nr)
 
 static inline void vfs_dq_free_block_nodirty(struct inode *inode, qsize_t nr)
 {
-	vfs_dq_free_space_nodirty(inode, nr << inode->i_sb->s_blocksize_bits);
+	vfs_dq_free_space_nodirty(inode, nr << inode->i_blkbits);
 }
 
 static inline void vfs_dq_free_block(struct inode *inode, qsize_t nr)
 {
-	vfs_dq_free_space(inode, nr << inode->i_sb->s_blocksize_bits);
+	vfs_dq_free_space(inode, nr << inode->i_blkbits);
 }
 
 /*
-- 
cgit v1.2.3


From dd6f3c6d5a26a282521f15a183fdc2d6f35cfa0f Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 26 Jan 2009 16:01:43 +0100
Subject: quota: Remove NODQUOT macro

Remove this macro which is just a definition of NULL. Fix a few coding style
issues along the way.

Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/quota/dquot.c      | 70 ++++++++++++++++++++++++++-------------------------
 include/linux/quota.h |  2 --
 2 files changed, 36 insertions(+), 36 deletions(-)

(limited to 'include/linux')

diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index e840fa2b112e..4881db32e56d 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -253,7 +253,7 @@ static inline struct dquot *find_dquot(unsigned int hashent, struct super_block
 		if (dquot->dq_sb == sb && dquot->dq_id == id && dquot->dq_type == type)
 			return dquot;
 	}
-	return NODQUOT;
+	return NULL;
 }
 
 /* Add a dquot to the tail of the free list */
@@ -696,7 +696,7 @@ static struct dquot *get_empty_dquot(struct super_block *sb, int type)
 
 	dquot = sb->dq_op->alloc_dquot(sb, type);
 	if(!dquot)
-		return NODQUOT;
+		return NULL;
 
 	mutex_init(&dquot->dq_lock);
 	INIT_LIST_HEAD(&dquot->dq_free);
@@ -722,10 +722,10 @@ static struct dquot *get_empty_dquot(struct super_block *sb, int type)
 struct dquot *dqget(struct super_block *sb, unsigned int id, int type)
 {
 	unsigned int hashent = hashfn(sb, id, type);
-	struct dquot *dquot = NODQUOT, *empty = NODQUOT;
+	struct dquot *dquot = NULL, *empty = NULL;
 
         if (!sb_has_quota_active(sb, type))
-		return NODQUOT;
+		return NULL;
 we_slept:
 	spin_lock(&dq_list_lock);
 	spin_lock(&dq_state_lock);
@@ -736,15 +736,17 @@ we_slept:
 	}
 	spin_unlock(&dq_state_lock);
 
-	if ((dquot = find_dquot(hashent, sb, id, type)) == NODQUOT) {
-		if (empty == NODQUOT) {
+	dquot = find_dquot(hashent, sb, id, type);
+	if (!dquot) {
+		if (!empty) {
 			spin_unlock(&dq_list_lock);
-			if ((empty = get_empty_dquot(sb, type)) == NODQUOT)
+			empty = get_empty_dquot(sb, type);
+			if (!empty)
 				schedule();	/* Try to wait for a moment... */
 			goto we_slept;
 		}
 		dquot = empty;
-		empty = NODQUOT;
+		empty = NULL;
 		dquot->dq_id = id;
 		/* all dquots go on the inuse_list */
 		put_inuse(dquot);
@@ -766,7 +768,7 @@ we_slept:
 	/* Read the dquot and instantiate it (everything done only if needed) */
 	if (!test_bit(DQ_ACTIVE_B, &dquot->dq_flags) && sb->dq_op->acquire_dquot(dquot) < 0) {
 		dqput(dquot);
-		dquot = NODQUOT;
+		dquot = NULL;
 		goto out;
 	}
 #ifdef __DQUOT_PARANOIA
@@ -787,9 +789,9 @@ static int dqinit_needed(struct inode *inode, int type)
 	if (IS_NOQUOTA(inode))
 		return 0;
 	if (type != -1)
-		return inode->i_dquot[type] == NODQUOT;
+		return !inode->i_dquot[type];
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++)
-		if (inode->i_dquot[cnt] == NODQUOT)
+		if (!inode->i_dquot[cnt])
 			return 1;
 	return 0;
 }
@@ -840,8 +842,8 @@ static int remove_inode_dquot_ref(struct inode *inode, int type,
 {
 	struct dquot *dquot = inode->i_dquot[type];
 
-	inode->i_dquot[type] = NODQUOT;
-	if (dquot != NODQUOT) {
+	inode->i_dquot[type] = NULL;
+	if (dquot) {
 		if (dqput_blocks(dquot)) {
 #ifdef __DQUOT_PARANOIA
 			if (atomic_read(&dquot->dq_count) != 1)
@@ -1112,7 +1114,7 @@ static inline void flush_warnings(struct dquot * const *dquots, char *warntype)
 	int i;
 
 	for (i = 0; i < MAXQUOTAS; i++)
-		if (dquots[i] != NODQUOT && warntype[i] != QUOTA_NL_NOWARN &&
+		if (dquots[i] && warntype[i] != QUOTA_NL_NOWARN &&
 		    !warning_issued(dquots[i], warntype[i])) {
 #ifdef CONFIG_PRINT_QUOTA_WARNING
 			print_warning(dquots[i], warntype[i]);
@@ -1249,7 +1251,7 @@ int dquot_initialize(struct inode *inode, int type)
 {
 	unsigned int id = 0;
 	int cnt, ret = 0;
-	struct dquot *got[MAXQUOTAS] = { NODQUOT, NODQUOT };
+	struct dquot *got[MAXQUOTAS] = { NULL, NULL };
 	struct super_block *sb = inode->i_sb;
 
 	/* First test before acquiring mutex - solves deadlocks when we
@@ -1282,9 +1284,9 @@ int dquot_initialize(struct inode *inode, int type)
 		/* Avoid races with quotaoff() */
 		if (!sb_has_quota_active(sb, cnt))
 			continue;
-		if (inode->i_dquot[cnt] == NODQUOT) {
+		if (!inode->i_dquot[cnt]) {
 			inode->i_dquot[cnt] = got[cnt];
-			got[cnt] = NODQUOT;
+			got[cnt] = NULL;
 		}
 	}
 out_err:
@@ -1307,7 +1309,7 @@ int dquot_drop(struct inode *inode)
 	down_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
 		put[cnt] = inode->i_dquot[cnt];
-		inode->i_dquot[cnt] = NODQUOT;
+		inode->i_dquot[cnt] = NULL;
 	}
 	up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
 
@@ -1332,7 +1334,7 @@ void vfs_dq_drop(struct inode *inode)
 		 * must assure that nobody can come after the DQUOT_DROP and
 		 * add quota pointers back anyway */
 		for (cnt = 0; cnt < MAXQUOTAS; cnt++)
-			if (inode->i_dquot[cnt] != NODQUOT)
+			if (inode->i_dquot[cnt])
 				break;
 		if (cnt < MAXQUOTAS)
 			inode->i_sb->dq_op->drop(inode);
@@ -1363,7 +1365,7 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number,
 
 	spin_lock(&dq_data_lock);
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-		if (inode->i_dquot[cnt] == NODQUOT)
+		if (!inode->i_dquot[cnt])
 			continue;
 		if (check_bdq(inode->i_dquot[cnt], number, warn, warntype+cnt)
 		    == NO_QUOTA) {
@@ -1372,7 +1374,7 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number,
 		}
 	}
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-		if (inode->i_dquot[cnt] == NODQUOT)
+		if (!inode->i_dquot[cnt])
 			continue;
 		if (reserve)
 			dquot_resv_space(inode->i_dquot[cnt], number);
@@ -1461,14 +1463,14 @@ int dquot_alloc_inode(const struct inode *inode, qsize_t number)
 	}
 	spin_lock(&dq_data_lock);
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-		if (inode->i_dquot[cnt] == NODQUOT)
+		if (!inode->i_dquot[cnt])
 			continue;
 		if (check_idq(inode->i_dquot[cnt], number, warntype+cnt) == NO_QUOTA)
 			goto warn_put_all;
 	}
 
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-		if (inode->i_dquot[cnt] == NODQUOT)
+		if (!inode->i_dquot[cnt])
 			continue;
 		dquot_incr_inodes(inode->i_dquot[cnt], number);
 	}
@@ -1506,7 +1508,7 @@ int dquot_claim_space(struct inode *inode, qsize_t number)
 	spin_lock(&dq_data_lock);
 	/* Claim reserved quotas to allocated quotas */
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-		if (inode->i_dquot[cnt] != NODQUOT)
+		if (inode->i_dquot[cnt])
 			dquot_claim_reserved_space(inode->i_dquot[cnt],
 							number);
 	}
@@ -1540,7 +1542,7 @@ void dquot_release_reserved_space(struct inode *inode, qsize_t number)
 	spin_lock(&dq_data_lock);
 	/* Release reserved dquots */
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-		if (inode->i_dquot[cnt] != NODQUOT)
+		if (inode->i_dquot[cnt])
 			dquot_free_reserved_space(inode->i_dquot[cnt], number);
 	}
 	spin_unlock(&dq_data_lock);
@@ -1576,7 +1578,7 @@ out_sub:
 	}
 	spin_lock(&dq_data_lock);
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-		if (inode->i_dquot[cnt] == NODQUOT)
+		if (!inode->i_dquot[cnt])
 			continue;
 		warntype[cnt] = info_bdq_free(inode->i_dquot[cnt], number);
 		dquot_decr_space(inode->i_dquot[cnt], number);
@@ -1614,7 +1616,7 @@ int dquot_free_inode(const struct inode *inode, qsize_t number)
 	}
 	spin_lock(&dq_data_lock);
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-		if (inode->i_dquot[cnt] == NODQUOT)
+		if (!inode->i_dquot[cnt])
 			continue;
 		warntype[cnt] = info_idq_free(inode->i_dquot[cnt], number);
 		dquot_decr_inodes(inode->i_dquot[cnt], number);
@@ -1667,8 +1669,8 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
 		return QUOTA_OK;
 	/* Initialize the arrays */
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-		transfer_from[cnt] = NODQUOT;
-		transfer_to[cnt] = NODQUOT;
+		transfer_from[cnt] = NULL;
+		transfer_to[cnt] = NULL;
 		warntype_to[cnt] = QUOTA_NL_NOWARN;
 		switch (cnt) {
 			case USRQUOTA:
@@ -1696,7 +1698,7 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
 	space = cur_space + rsv_space;
 	/* Build the transfer_from list and check the limits */
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-		if (transfer_to[cnt] == NODQUOT)
+		if (!transfer_to[cnt])
 			continue;
 		transfer_from[cnt] = inode->i_dquot[cnt];
 		if (check_idq(transfer_to[cnt], 1, warntype_to + cnt) ==
@@ -1712,7 +1714,7 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
 		/*
 		 * Skip changes for same uid or gid or for turned off quota-type.
 		 */
-		if (transfer_to[cnt] == NODQUOT)
+		if (!transfer_to[cnt])
 			continue;
 
 		/* Due to IO error we might not have transfer_from[] structure */
@@ -1743,7 +1745,7 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
 		if (transfer_to[cnt]) {
 			mark_dquot_dirty(transfer_to[cnt]);
 			/* The reference we got is transferred to the inode */
-			transfer_to[cnt] = NODQUOT;
+			transfer_to[cnt] = NULL;
 		}
 	}
 warn_put_all:
@@ -1761,7 +1763,7 @@ over_quota:
 	up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
 	/* Clear dquot pointers we don't want to dqput() */
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++)
-		transfer_from[cnt] = NODQUOT;
+		transfer_from[cnt] = NULL;
 	ret = NO_QUOTA;
 	goto warn_put_all;
 }
@@ -2256,7 +2258,7 @@ int vfs_get_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *d
 	struct dquot *dquot;
 
 	dquot = dqget(sb, id, type);
-	if (dquot == NODQUOT)
+	if (!dquot)
 		return -ESRCH;
 	do_get_dqblk(dquot, di);
 	dqput(dquot);
diff --git a/include/linux/quota.h b/include/linux/quota.h
index a510d91561f4..78c48895b12a 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -277,8 +277,6 @@ struct dquot {
 	struct mem_dqblk dq_dqb;	/* Diskquota usage */
 };
 
-#define NODQUOT (struct dquot *)NULL
-
 #define QUOTA_OK          0
 #define NO_QUOTA          1
 
-- 
cgit v1.2.3


From bf84c82d000b9820b01f516d13d328f354f8a8ee Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 26 Jan 2009 17:37:01 +0100
Subject: quota: Remove uppercase aliases for quota functions.

Since all users have been converted, remove uppercase names of quota functions.

Signed-off-by: Jan Kara <jack@suse.cz>
---
 include/linux/quotaops.h | 31 -------------------------------
 1 file changed, 31 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index 69b502e5eba0..36353d95c8db 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -454,35 +454,4 @@ static inline void vfs_dq_free_block(struct inode *inode, qsize_t nr)
 	vfs_dq_free_space(inode, nr << inode->i_blkbits);
 }
 
-/*
- * Define uppercase equivalents for compatibility with old function names
- * Can go away when we think all users have been converted (15/04/2008)
- */
-#define DQUOT_INIT(inode) vfs_dq_init(inode)
-#define DQUOT_DROP(inode) vfs_dq_drop(inode)
-#define DQUOT_PREALLOC_SPACE_NODIRTY(inode, nr) \
-				vfs_dq_prealloc_space_nodirty(inode, nr)
-#define DQUOT_PREALLOC_SPACE(inode, nr) vfs_dq_prealloc_space(inode, nr)
-#define DQUOT_ALLOC_SPACE_NODIRTY(inode, nr) \
-				vfs_dq_alloc_space_nodirty(inode, nr)
-#define DQUOT_ALLOC_SPACE(inode, nr) vfs_dq_alloc_space(inode, nr)
-#define DQUOT_PREALLOC_BLOCK_NODIRTY(inode, nr) \
-				vfs_dq_prealloc_block_nodirty(inode, nr)
-#define DQUOT_PREALLOC_BLOCK(inode, nr) vfs_dq_prealloc_block(inode, nr)
-#define DQUOT_ALLOC_BLOCK_NODIRTY(inode, nr) \
-				vfs_dq_alloc_block_nodirty(inode, nr)
-#define DQUOT_ALLOC_BLOCK(inode, nr) vfs_dq_alloc_block(inode, nr)
-#define DQUOT_ALLOC_INODE(inode) vfs_dq_alloc_inode(inode)
-#define DQUOT_FREE_SPACE_NODIRTY(inode, nr) \
-				vfs_dq_free_space_nodirty(inode, nr)
-#define DQUOT_FREE_SPACE(inode, nr) vfs_dq_free_space(inode, nr)
-#define DQUOT_FREE_BLOCK_NODIRTY(inode, nr) \
-				vfs_dq_free_block_nodirty(inode, nr)
-#define DQUOT_FREE_BLOCK(inode, nr) vfs_dq_free_block(inode, nr)
-#define DQUOT_FREE_INODE(inode) vfs_dq_free_inode(inode)
-#define DQUOT_TRANSFER(inode, iattr) vfs_dq_transfer(inode, iattr)
-#define DQUOT_SYNC(sb) vfs_dq_sync(sb)
-#define DQUOT_OFF(sb, remount) vfs_dq_off(sb, remount)
-#define DQUOT_ON_REMOUNT(sb) vfs_dq_quota_on_remount(sb)
-
 #endif /* _LINUX_QUOTAOPS_ */
-- 
cgit v1.2.3


From 9d50638bae05ab7f62d700c9e4a83a1845cf9ef4 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Date: Sun, 8 Feb 2009 11:00:25 +0530
Subject: unconditionally include asm/types.h from linux/types.h

Reported-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/types.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/types.h b/include/linux/types.h
index c30973ace890..fca82ed55f49 100644
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -1,6 +1,8 @@
 #ifndef _LINUX_TYPES_H
 #define _LINUX_TYPES_H
 
+#include <asm/types.h>
+
 #ifndef __ASSEMBLY__
 #ifdef	__KERNEL__
 
@@ -10,7 +12,6 @@
 #endif
 
 #include <linux/posix_types.h>
-#include <asm/types.h>
 
 #ifndef __KERNEL_STRICT_NAMES
 
-- 
cgit v1.2.3


From 85efde6f4e0de9577256c5f0030088d3fd4347c1 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 26 Feb 2009 00:51:39 +0100
Subject: make exported headers use strict posix types

A number of standard posix types are used in exported headers, which
is not allowed if __STRICT_KERNEL_NAMES is defined. In order to
get rid of the non-__STRICT_KERNEL_NAMES part and to make sane headers
the default, we have to change them all to safe types.

There are also still some leftovers in reiserfs_fs.h, elfcore.h
and coda.h, but these files have not compiled in user space for
a long time.

This leaves out the various integer types ({u_,u,}int{8,16,32,64}_t),
which we take care of separately.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Cc: David Airlie <airlied@linux.ie>
Cc: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Cc: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Cc: netdev@vger.kernel.org
Cc: linux-ppp@vger.kernel.org
Cc: Jaroslav Kysela <perex@perex.cz>
Cc: Takashi Iwai <tiwai@suse.de>
Cc: David Woodhouse <dwmw2@infradead.org>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-generic/fcntl.h               | 12 ++++++------
 include/asm-generic/siginfo.h             | 14 +++++++-------
 include/linux/agpgart.h                   | 14 +++++++-------
 include/linux/cn_proc.h                   | 20 ++++++++++----------
 include/linux/cyclades.h                  |  6 +++---
 include/linux/dvb/video.h                 |  2 +-
 include/linux/if_pppol2tp.h               |  2 +-
 include/linux/mroute6.h                   |  2 +-
 include/linux/netfilter_ipv4/ipt_owner.h  |  8 ++++----
 include/linux/netfilter_ipv6/ip6t_owner.h |  8 ++++----
 include/linux/ppp_defs.h                  |  4 ++--
 include/linux/suspend_ioctls.h            | 11 ++++++-----
 include/linux/time.h                      |  8 ++++----
 include/linux/times.h                     |  8 ++++----
 include/linux/utime.h                     |  4 ++--
 include/linux/xfrm.h                      |  2 +-
 include/mtd/mtd-abi.h                     |  4 ++--
 include/sound/asound.h                    |  4 ++--
 18 files changed, 67 insertions(+), 66 deletions(-)

(limited to 'include/linux')

diff --git a/include/asm-generic/fcntl.h b/include/asm-generic/fcntl.h
index b8477414c5c8..4d3e48373e74 100644
--- a/include/asm-generic/fcntl.h
+++ b/include/asm-generic/fcntl.h
@@ -117,9 +117,9 @@
 struct flock {
 	short	l_type;
 	short	l_whence;
-	off_t	l_start;
-	off_t	l_len;
-	pid_t	l_pid;
+	__kernel_off_t	l_start;
+	__kernel_off_t	l_len;
+	__kernel_pid_t	l_pid;
 	__ARCH_FLOCK_PAD
 };
 #endif
@@ -140,9 +140,9 @@ struct flock {
 struct flock64 {
 	short  l_type;
 	short  l_whence;
-	loff_t l_start;
-	loff_t l_len;
-	pid_t  l_pid;
+	__kernel_loff_t l_start;
+	__kernel_loff_t l_len;
+	__kernel_pid_t  l_pid;
 	__ARCH_FLOCK64_PAD
 };
 #endif
diff --git a/include/asm-generic/siginfo.h b/include/asm-generic/siginfo.h
index 969570167e9e..35752dadd6df 100644
--- a/include/asm-generic/siginfo.h
+++ b/include/asm-generic/siginfo.h
@@ -23,7 +23,7 @@ typedef union sigval {
 #endif
 
 #ifndef __ARCH_SI_UID_T
-#define __ARCH_SI_UID_T	uid_t
+#define __ARCH_SI_UID_T	__kernel_uid32_t
 #endif
 
 /*
@@ -47,13 +47,13 @@ typedef struct siginfo {
 
 		/* kill() */
 		struct {
-			pid_t _pid;		/* sender's pid */
+			__kernel_pid_t _pid;	/* sender's pid */
 			__ARCH_SI_UID_T _uid;	/* sender's uid */
 		} _kill;
 
 		/* POSIX.1b timers */
 		struct {
-			timer_t _tid;		/* timer id */
+			__kernel_timer_t _tid;	/* timer id */
 			int _overrun;		/* overrun count */
 			char _pad[sizeof( __ARCH_SI_UID_T) - sizeof(int)];
 			sigval_t _sigval;	/* same as below */
@@ -62,18 +62,18 @@ typedef struct siginfo {
 
 		/* POSIX.1b signals */
 		struct {
-			pid_t _pid;		/* sender's pid */
+			__kernel_pid_t _pid;	/* sender's pid */
 			__ARCH_SI_UID_T _uid;	/* sender's uid */
 			sigval_t _sigval;
 		} _rt;
 
 		/* SIGCHLD */
 		struct {
-			pid_t _pid;		/* which child */
+			__kernel_pid_t _pid;	/* which child */
 			__ARCH_SI_UID_T _uid;	/* sender's uid */
 			int _status;		/* exit code */
-			clock_t _utime;
-			clock_t _stime;
+			__kernel_clock_t _utime;
+			__kernel_clock_t _stime;
 		} _sigchld;
 
 		/* SIGILL, SIGFPE, SIGSEGV, SIGBUS */
diff --git a/include/linux/agpgart.h b/include/linux/agpgart.h
index 110c600c885f..f6778eceb8f4 100644
--- a/include/linux/agpgart.h
+++ b/include/linux/agpgart.h
@@ -77,20 +77,20 @@ typedef struct _agp_setup {
  * The "prot" down below needs still a "sleep" flag somehow ...
  */
 typedef struct _agp_segment {
-	off_t pg_start;		/* starting page to populate    */
-	size_t pg_count;	/* number of pages              */
-	int prot;		/* prot flags for mmap          */
+	__kernel_off_t pg_start;	/* starting page to populate    */
+	__kernel_size_t pg_count;	/* number of pages              */
+	int prot;			/* prot flags for mmap          */
 } agp_segment;
 
 typedef struct _agp_region {
-	pid_t pid;		/* pid of process               */
-	size_t seg_count;	/* number of segments           */
+	__kernel_pid_t pid;		/* pid of process       */
+	__kernel_size_t seg_count;	/* number of segments   */
 	struct _agp_segment *seg_list;
 } agp_region;
 
 typedef struct _agp_allocate {
 	int key;		/* tag of allocation            */
-	size_t pg_count;	/* number of pages              */
+	__kernel_size_t pg_count;/* number of pages             */
 	__u32 type;		/* 0 == normal, other devspec   */
    	__u32 physical;         /* device specific (some devices  
 				 * need a phys address of the     
@@ -100,7 +100,7 @@ typedef struct _agp_allocate {
 
 typedef struct _agp_bind {
 	int key;		/* tag of allocation            */
-	off_t pg_start;		/* starting page to populate    */
+	__kernel_off_t pg_start;/* starting page to populate    */
 } agp_bind;
 
 typedef struct _agp_unbind {
diff --git a/include/linux/cn_proc.h b/include/linux/cn_proc.h
index 1c86d65bc4b9..b8125b2eb665 100644
--- a/include/linux/cn_proc.h
+++ b/include/linux/cn_proc.h
@@ -65,20 +65,20 @@ struct proc_event {
 		} ack;
 
 		struct fork_proc_event {
-			pid_t parent_pid;
-			pid_t parent_tgid;
-			pid_t child_pid;
-			pid_t child_tgid;
+			__kernel_pid_t parent_pid;
+			__kernel_pid_t parent_tgid;
+			__kernel_pid_t child_pid;
+			__kernel_pid_t child_tgid;
 		} fork;
 
 		struct exec_proc_event {
-			pid_t process_pid;
-			pid_t process_tgid;
+			__kernel_pid_t process_pid;
+			__kernel_pid_t process_tgid;
 		} exec;
 
 		struct id_proc_event {
-			pid_t process_pid;
-			pid_t process_tgid;
+			__kernel_pid_t process_pid;
+			__kernel_pid_t process_tgid;
 			union {
 				__u32 ruid; /* task uid */
 				__u32 rgid; /* task gid */
@@ -90,8 +90,8 @@ struct proc_event {
 		} id;
 
 		struct exit_proc_event {
-			pid_t process_pid;
-			pid_t process_tgid;
+			__kernel_pid_t process_pid;
+			__kernel_pid_t process_tgid;
 			__u32 exit_code, exit_signal;
 		} exit;
 	} event_data;
diff --git a/include/linux/cyclades.h b/include/linux/cyclades.h
index d06fbf286346..788850ba4e75 100644
--- a/include/linux/cyclades.h
+++ b/include/linux/cyclades.h
@@ -82,9 +82,9 @@ struct cyclades_monitor {
  * open)
  */
 struct cyclades_idle_stats {
-    time_t	   in_use;	/* Time device has been in use (secs) */
-    time_t	   recv_idle;	/* Time since last char received (secs) */
-    time_t	   xmit_idle;	/* Time since last char transmitted (secs) */
+    __kernel_time_t in_use;	/* Time device has been in use (secs) */
+    __kernel_time_t recv_idle;	/* Time since last char received (secs) */
+    __kernel_time_t xmit_idle;	/* Time since last char transmitted (secs) */
     unsigned long  recv_bytes;	/* Bytes received */
     unsigned long  xmit_bytes;	/* Bytes transmitted */
     unsigned long  overruns;	/* Input overruns */
diff --git a/include/linux/dvb/video.h b/include/linux/dvb/video.h
index bd49c3ebf916..ee5d2df2d78d 100644
--- a/include/linux/dvb/video.h
+++ b/include/linux/dvb/video.h
@@ -137,7 +137,7 @@ struct video_event {
 #define VIDEO_EVENT_FRAME_RATE_CHANGED	2
 #define VIDEO_EVENT_DECODER_STOPPED 	3
 #define VIDEO_EVENT_VSYNC 		4
-	time_t timestamp;
+	__kernel_time_t timestamp;
 	union {
 		video_size_t size;
 		unsigned int frame_rate;	/* in frames per 1000sec */
diff --git a/include/linux/if_pppol2tp.h b/include/linux/if_pppol2tp.h
index c7a66882b6d0..3a14b088c8ec 100644
--- a/include/linux/if_pppol2tp.h
+++ b/include/linux/if_pppol2tp.h
@@ -26,7 +26,7 @@
  */
 struct pppol2tp_addr
 {
-	pid_t	pid;			/* pid that owns the fd.
+	__kernel_pid_t	pid;		/* pid that owns the fd.
 					 * 0 => current */
 	int	fd;			/* FD of UDP socket to use */
 
diff --git a/include/linux/mroute6.h b/include/linux/mroute6.h
index 5375faca1f72..43dc97e32183 100644
--- a/include/linux/mroute6.h
+++ b/include/linux/mroute6.h
@@ -65,7 +65,7 @@ struct mif6ctl {
 	mifi_t	mif6c_mifi;		/* Index of MIF */
 	unsigned char mif6c_flags;	/* MIFF_ flags */
 	unsigned char vifc_threshold;	/* ttl limit */
-	u_short	 mif6c_pifi;		/* the index of the physical IF */
+	__u16	 mif6c_pifi;		/* the index of the physical IF */
 	unsigned int vifc_rate_limit;	/* Rate limiter values (NI) */
 };
 
diff --git a/include/linux/netfilter_ipv4/ipt_owner.h b/include/linux/netfilter_ipv4/ipt_owner.h
index 92f4bdac54ef..a78445be9992 100644
--- a/include/linux/netfilter_ipv4/ipt_owner.h
+++ b/include/linux/netfilter_ipv4/ipt_owner.h
@@ -9,10 +9,10 @@
 #define IPT_OWNER_COMM	0x10
 
 struct ipt_owner_info {
-    uid_t uid;
-    gid_t gid;
-    pid_t pid;
-    pid_t sid;
+    __kernel_uid32_t uid;
+    __kernel_gid32_t gid;
+    __kernel_pid_t pid;
+    __kernel_pid_t sid;
     char comm[16];
     u_int8_t match, invert;	/* flags */
 };
diff --git a/include/linux/netfilter_ipv6/ip6t_owner.h b/include/linux/netfilter_ipv6/ip6t_owner.h
index 19937da3d101..ec5cc7a38c42 100644
--- a/include/linux/netfilter_ipv6/ip6t_owner.h
+++ b/include/linux/netfilter_ipv6/ip6t_owner.h
@@ -8,10 +8,10 @@
 #define IP6T_OWNER_SID	0x08
 
 struct ip6t_owner_info {
-    uid_t uid;
-    gid_t gid;
-    pid_t pid;
-    pid_t sid;
+    __kernel_uid32_t uid;
+    __kernel_gid32_t gid;
+    __kernel_pid_t pid;
+    __kernel_pid_t sid;
     u_int8_t match, invert;	/* flags */
 };
 
diff --git a/include/linux/ppp_defs.h b/include/linux/ppp_defs.h
index 1c866bda2018..0f93ed6b4a88 100644
--- a/include/linux/ppp_defs.h
+++ b/include/linux/ppp_defs.h
@@ -177,8 +177,8 @@ struct ppp_comp_stats {
  * the last NP packet was sent or received.
  */
 struct ppp_idle {
-    time_t xmit_idle;		/* time since last NP packet sent */
-    time_t recv_idle;		/* time since last NP packet received */
+    __kernel_time_t xmit_idle;	/* time since last NP packet sent */
+    __kernel_time_t recv_idle;	/* time since last NP packet received */
 };
 
 #endif /* _PPP_DEFS_H_ */
diff --git a/include/linux/suspend_ioctls.h b/include/linux/suspend_ioctls.h
index 2c6faec96bde..0b30382984fe 100644
--- a/include/linux/suspend_ioctls.h
+++ b/include/linux/suspend_ioctls.h
@@ -1,14 +1,15 @@
 #ifndef _LINUX_SUSPEND_IOCTLS_H
 #define _LINUX_SUSPEND_IOCTLS_H
 
+#include <linux/types.h>
 /*
  * This structure is used to pass the values needed for the identification
  * of the resume swap area from a user space to the kernel via the
  * SNAPSHOT_SET_SWAP_AREA ioctl
  */
 struct resume_swap_area {
-	loff_t offset;
-	u_int32_t dev;
+	__kernel_loff_t offset;
+	__u32 dev;
 } __attribute__((packed));
 
 #define SNAPSHOT_IOC_MAGIC	'3'
@@ -20,13 +21,13 @@ struct resume_swap_area {
 #define SNAPSHOT_S2RAM			_IO(SNAPSHOT_IOC_MAGIC, 11)
 #define SNAPSHOT_SET_SWAP_AREA		_IOW(SNAPSHOT_IOC_MAGIC, 13, \
 							struct resume_swap_area)
-#define SNAPSHOT_GET_IMAGE_SIZE		_IOR(SNAPSHOT_IOC_MAGIC, 14, loff_t)
+#define SNAPSHOT_GET_IMAGE_SIZE		_IOR(SNAPSHOT_IOC_MAGIC, 14, __kernel_loff_t)
 #define SNAPSHOT_PLATFORM_SUPPORT	_IO(SNAPSHOT_IOC_MAGIC, 15)
 #define SNAPSHOT_POWER_OFF		_IO(SNAPSHOT_IOC_MAGIC, 16)
 #define SNAPSHOT_CREATE_IMAGE		_IOW(SNAPSHOT_IOC_MAGIC, 17, int)
 #define SNAPSHOT_PREF_IMAGE_SIZE	_IO(SNAPSHOT_IOC_MAGIC, 18)
-#define SNAPSHOT_AVAIL_SWAP_SIZE	_IOR(SNAPSHOT_IOC_MAGIC, 19, loff_t)
-#define SNAPSHOT_ALLOC_SWAP_PAGE	_IOR(SNAPSHOT_IOC_MAGIC, 20, loff_t)
+#define SNAPSHOT_AVAIL_SWAP_SIZE	_IOR(SNAPSHOT_IOC_MAGIC, 19, __kernel_loff_t)
+#define SNAPSHOT_ALLOC_SWAP_PAGE	_IOR(SNAPSHOT_IOC_MAGIC, 20, __kernel_loff_t)
 #define SNAPSHOT_IOC_MAXNR	20
 
 #endif /* _LINUX_SUSPEND_IOCTLS_H */
diff --git a/include/linux/time.h b/include/linux/time.h
index fbbd2a1c92ba..242f62499bb7 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -12,14 +12,14 @@
 #ifndef _STRUCT_TIMESPEC
 #define _STRUCT_TIMESPEC
 struct timespec {
-	time_t	tv_sec;		/* seconds */
-	long	tv_nsec;	/* nanoseconds */
+	__kernel_time_t	tv_sec;			/* seconds */
+	long		tv_nsec;		/* nanoseconds */
 };
 #endif
 
 struct timeval {
-	time_t		tv_sec;		/* seconds */
-	suseconds_t	tv_usec;	/* microseconds */
+	__kernel_time_t		tv_sec;		/* seconds */
+	__kernel_suseconds_t	tv_usec;	/* microseconds */
 };
 
 struct timezone {
diff --git a/include/linux/times.h b/include/linux/times.h
index e2d3020742a6..87b62615cedd 100644
--- a/include/linux/times.h
+++ b/include/linux/times.h
@@ -4,10 +4,10 @@
 #include <linux/types.h>
 
 struct tms {
-	clock_t tms_utime;
-	clock_t tms_stime;
-	clock_t tms_cutime;
-	clock_t tms_cstime;
+	__kernel_clock_t tms_utime;
+	__kernel_clock_t tms_stime;
+	__kernel_clock_t tms_cutime;
+	__kernel_clock_t tms_cstime;
 };
 
 #endif
diff --git a/include/linux/utime.h b/include/linux/utime.h
index 640be6a1959e..5cdf673afbdb 100644
--- a/include/linux/utime.h
+++ b/include/linux/utime.h
@@ -4,8 +4,8 @@
 #include <linux/types.h>
 
 struct utimbuf {
-	time_t actime;
-	time_t modtime;
+	__kernel_time_t actime;
+	__kernel_time_t modtime;
 };
 
 #endif
diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h
index 52f3abd453a1..2d4ec15abaca 100644
--- a/include/linux/xfrm.h
+++ b/include/linux/xfrm.h
@@ -58,7 +58,7 @@ struct xfrm_selector
 	__u8	prefixlen_s;
 	__u8	proto;
 	int	ifindex;
-	uid_t	user;
+	__kernel_uid32_t	user;
 };
 
 #define XFRM_INF (~(__u64)0)
diff --git a/include/mtd/mtd-abi.h b/include/mtd/mtd-abi.h
index c6c61cd5a254..fb672013299c 100644
--- a/include/mtd/mtd-abi.h
+++ b/include/mtd/mtd-abi.h
@@ -84,8 +84,8 @@ struct otp_info {
 #define MEMGETREGIONINFO	_IOWR('M', 8, struct region_info_user)
 #define MEMSETOOBSEL		_IOW('M', 9, struct nand_oobinfo)
 #define MEMGETOOBSEL		_IOR('M', 10, struct nand_oobinfo)
-#define MEMGETBADBLOCK		_IOW('M', 11, loff_t)
-#define MEMSETBADBLOCK		_IOW('M', 12, loff_t)
+#define MEMGETBADBLOCK		_IOW('M', 11, __kernel_loff_t)
+#define MEMSETBADBLOCK		_IOW('M', 12, __kernel_loff_t)
 #define OTPSELECT		_IOR('M', 13, int)
 #define OTPGETREGIONCOUNT	_IOW('M', 14, int)
 #define OTPGETREGIONINFO	_IOW('M', 15, struct otp_info)
diff --git a/include/sound/asound.h b/include/sound/asound.h
index 1c02ed1d7c4a..16684c5a608c 100644
--- a/include/sound/asound.h
+++ b/include/sound/asound.h
@@ -385,7 +385,7 @@ struct snd_pcm_sw_params {
 
 struct snd_pcm_channel_info {
 	unsigned int channel;
-	off_t offset;			/* mmap offset */
+	__kernel_off_t offset;		/* mmap offset */
 	unsigned int first;		/* offset to first sample in bits */
 	unsigned int step;		/* samples distance in bits */
 };
@@ -789,7 +789,7 @@ struct snd_ctl_elem_info {
 	snd_ctl_elem_type_t type;	/* R: value type - SNDRV_CTL_ELEM_TYPE_* */
 	unsigned int access;		/* R: value access (bitmask) - SNDRV_CTL_ELEM_ACCESS_* */
 	unsigned int count;		/* count of values */
-	pid_t owner;			/* owner's PID of this control */
+	__kernel_pid_t owner;		/* owner's PID of this control */
 	union {
 		struct {
 			long min;		/* R: minimum value */
-- 
cgit v1.2.3


From 9adfbfb611307060db54691bc7e6d53fdc12312b Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 26 Feb 2009 00:51:40 +0100
Subject: make most exported headers use strict integer types

This takes care of all files that have only a small number
of non-strict integer type uses.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: Mauro Carvalho Chehab <mchehab@redhat.com>
Cc: David Airlie <airlied@linux.ie>
Cc: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Cc: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Cc: netdev@vger.kernel.org
Cc: linux-ppp@vger.kernel.org
Cc: Jaroslav Kysela <perex@perex.cz>
Cc: Takashi Iwai <tiwai@suse.de>
Cc: David Woodhouse <dwmw2@infradead.org>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/atmlec.h          |   5 +-
 include/linux/atmmpc.h          |  45 ++++----
 include/linux/cm4000_cs.h       |  10 +-
 include/linux/dlm_netlink.h     |  18 +--
 include/linux/dm-ioctl.h        |  42 +++----
 include/linux/dvb/audio.h       |   2 +-
 include/linux/dvb/video.h       |  22 ++--
 include/linux/if_arcnet.h       |  27 ++---
 include/linux/ip_vs.h           |  26 ++---
 include/linux/ivtvfb.h          |   2 +-
 include/linux/matroxfb.h        |   2 +-
 include/linux/pfkeyv2.h         | 242 ++++++++++++++++++++--------------------
 include/linux/selinux_netlink.h |   6 +-
 include/sound/asound.h          |   5 +-
 include/sound/emu10k1.h         |  12 +-
 15 files changed, 240 insertions(+), 226 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/atmlec.h b/include/linux/atmlec.h
index 6f5a1bab8f50..39c917fd1b96 100644
--- a/include/linux/atmlec.h
+++ b/include/linux/atmlec.h
@@ -11,6 +11,7 @@
 #include <linux/atmioc.h>
 #include <linux/atm.h>
 #include <linux/if_ether.h>
+#include <linux/types.h>
 
 /* ATM lec daemon control socket */
 #define ATMLEC_CTRL	_IO('a', ATMIOC_LANE)
@@ -78,8 +79,8 @@ struct atmlec_msg {
 		} normal;
 		struct atmlec_config_msg config;
 		struct {
-			uint16_t lec_id;			/* requestor lec_id  */
-			uint32_t tran_id;			/* transaction id    */
+			__u16 lec_id;				/* requestor lec_id  */
+			__u32 tran_id;				/* transaction id    */
 			unsigned char mac_addr[ETH_ALEN];	/* dst mac addr      */
 			unsigned char atm_addr[ATM_ESA_LEN];	/* reqestor ATM addr */
 		} proxy;	/*
diff --git a/include/linux/atmmpc.h b/include/linux/atmmpc.h
index ea1650425a12..2aba5787fa63 100644
--- a/include/linux/atmmpc.h
+++ b/include/linux/atmmpc.h
@@ -4,6 +4,7 @@
 #include <linux/atmapi.h>
 #include <linux/atmioc.h>
 #include <linux/atm.h>
+#include <linux/types.h>
 
 #define ATMMPC_CTRL _IO('a', ATMIOC_MPOA)
 #define ATMMPC_DATA _IO('a', ATMIOC_MPOA+1)
@@ -18,39 +19,39 @@ struct atmmpc_ioc {
 };
 
 typedef struct in_ctrl_info {
-        uint8_t   Last_NHRP_CIE_code;
-        uint8_t   Last_Q2931_cause_value;     
-        uint8_t   eg_MPC_ATM_addr[ATM_ESA_LEN];
+        __u8   Last_NHRP_CIE_code;
+        __u8   Last_Q2931_cause_value;
+        __u8   eg_MPC_ATM_addr[ATM_ESA_LEN];
         __be32  tag;
         __be32  in_dst_ip;      /* IP address this ingress MPC sends packets to */
-        uint16_t  holding_time;
-        uint32_t  request_id;
+        __u16  holding_time;
+        __u32  request_id;
 } in_ctrl_info;
 
 typedef struct eg_ctrl_info {
-        uint8_t   DLL_header[256];
-        uint8_t   DH_length;
+        __u8   DLL_header[256];
+        __u8   DH_length;
         __be32  cache_id;
         __be32  tag;
         __be32  mps_ip;
         __be32  eg_dst_ip;      /* IP address to which ingress MPC sends packets */
-        uint8_t   in_MPC_data_ATM_addr[ATM_ESA_LEN];
-        uint16_t  holding_time;
+        __u8   in_MPC_data_ATM_addr[ATM_ESA_LEN];
+        __u16  holding_time;
 } eg_ctrl_info;
 
 struct mpc_parameters {
-        uint16_t mpc_p1;   /* Shortcut-Setup Frame Count    */
-        uint16_t mpc_p2;   /* Shortcut-Setup Frame Time     */
-        uint8_t mpc_p3[8]; /* Flow-detection Protocols      */
-        uint16_t mpc_p4;   /* MPC Initial Retry Time        */
-        uint16_t mpc_p5;   /* MPC Retry Time Maximum        */
-        uint16_t mpc_p6;   /* Hold Down Time                */      
+        __u16 mpc_p1;   /* Shortcut-Setup Frame Count    */
+        __u16 mpc_p2;   /* Shortcut-Setup Frame Time     */
+        __u8 mpc_p3[8]; /* Flow-detection Protocols      */
+        __u16 mpc_p4;   /* MPC Initial Retry Time        */
+        __u16 mpc_p5;   /* MPC Retry Time Maximum        */
+        __u16 mpc_p6;   /* Hold Down Time                */
 } ;
 
 struct k_message {
-        uint16_t type;
+        __u16 type;
         __be32 ip_mask;
-        uint8_t  MPS_ctrl[ATM_ESA_LEN];
+        __u8  MPS_ctrl[ATM_ESA_LEN];
         union {
                 in_ctrl_info in_info;
                 eg_ctrl_info eg_info;
@@ -61,11 +62,11 @@ struct k_message {
 
 struct llc_snap_hdr {
 	/* RFC 1483 LLC/SNAP encapsulation for routed IP PDUs */
-        uint8_t  dsap;    /* Destination Service Access Point (0xAA)     */
-        uint8_t  ssap;    /* Source Service Access Point      (0xAA)     */
-        uint8_t  ui;      /* Unnumbered Information           (0x03)     */
-        uint8_t  org[3];  /* Organizational identification    (0x000000) */
-        uint8_t  type[2]; /* Ether type (for IP)              (0x0800)   */
+        __u8  dsap;    /* Destination Service Access Point (0xAA)     */
+        __u8  ssap;    /* Source Service Access Point      (0xAA)     */
+        __u8  ui;      /* Unnumbered Information           (0x03)     */
+        __u8  org[3];  /* Organizational identification    (0x000000) */
+        __u8  type[2]; /* Ether type (for IP)              (0x0800)   */
 };
 
 /* TLVs this MPC recognizes */
diff --git a/include/linux/cm4000_cs.h b/include/linux/cm4000_cs.h
index 605ebe24bb2e..72bfefdbd767 100644
--- a/include/linux/cm4000_cs.h
+++ b/include/linux/cm4000_cs.h
@@ -1,6 +1,8 @@
 #ifndef	_CM4000_H_
 #define	_CM4000_H_
 
+#include <linux/types.h>
+
 #define	MAX_ATR			33
 
 #define	CM4000_MAX_DEV		4
@@ -10,9 +12,9 @@
  * not to break compilation of userspace apps. -HW */
 
 typedef struct atreq {
-	int32_t atr_len;
+	__s32 atr_len;
 	unsigned char atr[64];
-	int32_t power_act;
+	__s32 power_act;
 	unsigned char bIFSD;
 	unsigned char bIFSC;
 } atreq_t;
@@ -22,13 +24,13 @@ typedef struct atreq {
  * member sizes. This leads to CONFIG_COMPAT breakage, since 32bit userspace
  * will lay out the structure members differently than the 64bit kernel.
  *
- * I've changed "ptsreq.protocol" from "unsigned long" to "u_int32_t".
+ * I've changed "ptsreq.protocol" from "unsigned long" to "__u32".
  * On 32bit this will make no difference.  With 64bit kernels, it will make
  * 32bit apps work, too.
  */
 
 typedef struct ptsreq {
-	u_int32_t protocol; /*T=0: 2^0, T=1:  2^1*/
+	__u32 protocol; /*T=0: 2^0, T=1:  2^1*/
  	unsigned char flags;
  	unsigned char pts1;
  	unsigned char pts2;
diff --git a/include/linux/dlm_netlink.h b/include/linux/dlm_netlink.h
index 19276332707a..647c8ef27227 100644
--- a/include/linux/dlm_netlink.h
+++ b/include/linux/dlm_netlink.h
@@ -9,6 +9,8 @@
 #ifndef _DLM_NETLINK_H
 #define _DLM_NETLINK_H
 
+#include <linux/types.h>
+
 enum {
 	DLM_STATUS_WAITING = 1,
 	DLM_STATUS_GRANTED = 2,
@@ -18,16 +20,16 @@ enum {
 #define DLM_LOCK_DATA_VERSION 1
 
 struct dlm_lock_data {
-	uint16_t version;
-	uint32_t lockspace_id;
+	__u16 version;
+	__u32 lockspace_id;
 	int nodeid;
 	int ownpid;
-	uint32_t id;
-	uint32_t remid;
-	uint64_t xid;
-	int8_t status;
-	int8_t grmode;
-	int8_t rqmode;
+	__u32 id;
+	__u32 remid;
+	__u64 xid;
+	__s8 status;
+	__s8 grmode;
+	__s8 rqmode;
 	unsigned long timestamp;
 	int resource_namelen;
 	char resource_name[DLM_RESNAME_MAXLEN];
diff --git a/include/linux/dm-ioctl.h b/include/linux/dm-ioctl.h
index 28c2940eb30d..48e44ee2b466 100644
--- a/include/linux/dm-ioctl.h
+++ b/include/linux/dm-ioctl.h
@@ -113,20 +113,20 @@ struct dm_ioctl {
 	 * return -ENOTTY) fill out this field, even if the
 	 * command failed.
 	 */
-	uint32_t version[3];	/* in/out */
-	uint32_t data_size;	/* total size of data passed in
+	__u32 version[3];	/* in/out */
+	__u32 data_size;	/* total size of data passed in
 				 * including this struct */
 
-	uint32_t data_start;	/* offset to start of data
+	__u32 data_start;	/* offset to start of data
 				 * relative to start of this struct */
 
-	uint32_t target_count;	/* in/out */
-	int32_t open_count;	/* out */
-	uint32_t flags;		/* in/out */
-	uint32_t event_nr;      	/* in/out */
-	uint32_t padding;
+	__u32 target_count;	/* in/out */
+	__s32 open_count;	/* out */
+	__u32 flags;		/* in/out */
+	__u32 event_nr;      	/* in/out */
+	__u32 padding;
 
-	uint64_t dev;		/* in/out */
+	__u64 dev;		/* in/out */
 
 	char name[DM_NAME_LEN];	/* device name */
 	char uuid[DM_UUID_LEN];	/* unique identifier for
@@ -139,9 +139,9 @@ struct dm_ioctl {
  * dm_ioctl.
  */
 struct dm_target_spec {
-	uint64_t sector_start;
-	uint64_t length;
-	int32_t status;		/* used when reading from kernel only */
+	__u64 sector_start;
+	__u64 length;
+	__s32 status;		/* used when reading from kernel only */
 
 	/*
 	 * Location of the next dm_target_spec.
@@ -153,7 +153,7 @@ struct dm_target_spec {
 	 *   (that follows the dm_ioctl struct) to the start of the "next"
 	 *   dm_target_spec.
 	 */
-	uint32_t next;
+	__u32 next;
 
 	char target_type[DM_MAX_TYPE_NAME];
 
@@ -168,17 +168,17 @@ struct dm_target_spec {
  * Used to retrieve the target dependencies.
  */
 struct dm_target_deps {
-	uint32_t count;	/* Array size */
-	uint32_t padding;	/* unused */
-	uint64_t dev[0];	/* out */
+	__u32 count;	/* Array size */
+	__u32 padding;	/* unused */
+	__u64 dev[0];	/* out */
 };
 
 /*
  * Used to get a list of all dm devices.
  */
 struct dm_name_list {
-	uint64_t dev;
-	uint32_t next;		/* offset to the next record from
+	__u64 dev;
+	__u32 next;		/* offset to the next record from
 				   the _start_ of this */
 	char name[0];
 };
@@ -187,8 +187,8 @@ struct dm_name_list {
  * Used to retrieve the target versions
  */
 struct dm_target_versions {
-        uint32_t next;
-        uint32_t version[3];
+        __u32 next;
+        __u32 version[3];
 
         char name[0];
 };
@@ -197,7 +197,7 @@ struct dm_target_versions {
  * Used to pass message to a target
  */
 struct dm_target_msg {
-	uint64_t sector;	/* Device sector */
+	__u64 sector;	/* Device sector */
 
 	char message[0];
 };
diff --git a/include/linux/dvb/audio.h b/include/linux/dvb/audio.h
index bb0df2aaebfa..fec66bd24f22 100644
--- a/include/linux/dvb/audio.h
+++ b/include/linux/dvb/audio.h
@@ -76,7 +76,7 @@ struct audio_karaoke{  /* if Vocal1 or Vocal2 are non-zero, they get mixed  */
 } audio_karaoke_t;     /* into left and right  */
 
 
-typedef uint16_t audio_attributes_t;
+typedef __u16 audio_attributes_t;
 /*   bits: descr. */
 /*   15-13 audio coding mode (0=ac3, 2=mpeg1, 3=mpeg2ext, 4=LPCM, 6=DTS, */
 /*   12    multichannel extension */
diff --git a/include/linux/dvb/video.h b/include/linux/dvb/video.h
index ee5d2df2d78d..1d750c0fd86e 100644
--- a/include/linux/dvb/video.h
+++ b/include/linux/dvb/video.h
@@ -132,7 +132,7 @@ struct video_command {
 #define VIDEO_VSYNC_FIELD_PROGRESSIVE	(3)
 
 struct video_event {
-	int32_t type;
+	__s32 type;
 #define VIDEO_EVENT_SIZE_CHANGED	1
 #define VIDEO_EVENT_FRAME_RATE_CHANGED	2
 #define VIDEO_EVENT_DECODER_STOPPED 	3
@@ -157,25 +157,25 @@ struct video_status {
 
 struct video_still_picture {
 	char __user *iFrame;        /* pointer to a single iframe in memory */
-	int32_t size;
+	__s32 size;
 };
 
 
 typedef
 struct video_highlight {
 	int     active;      /*    1=show highlight, 0=hide highlight */
-	uint8_t contrast1;   /*    7- 4  Pattern pixel contrast */
+	__u8    contrast1;   /*    7- 4  Pattern pixel contrast */
 			     /*    3- 0  Background pixel contrast */
-	uint8_t contrast2;   /*    7- 4  Emphasis pixel-2 contrast */
+	__u8    contrast2;   /*    7- 4  Emphasis pixel-2 contrast */
 			     /*    3- 0  Emphasis pixel-1 contrast */
-	uint8_t color1;      /*    7- 4  Pattern pixel color */
+	__u8    color1;      /*    7- 4  Pattern pixel color */
 			     /*    3- 0  Background pixel color */
-	uint8_t color2;      /*    7- 4  Emphasis pixel-2 color */
+	__u8    color2;      /*    7- 4  Emphasis pixel-2 color */
 			     /*    3- 0  Emphasis pixel-1 color */
-	uint32_t ypos;       /*   23-22  auto action mode */
+	__u32    ypos;       /*   23-22  auto action mode */
 			     /*   21-12  start y */
 			     /*    9- 0  end y */
-	uint32_t xpos;       /*   23-22  button color number */
+	__u32    xpos;       /*   23-22  button color number */
 			     /*   21-12  start x */
 			     /*    9- 0  end x */
 } video_highlight_t;
@@ -189,17 +189,17 @@ typedef struct video_spu {
 
 typedef struct video_spu_palette {      /* SPU Palette information */
 	int length;
-	uint8_t __user *palette;
+	__u8 __user *palette;
 } video_spu_palette_t;
 
 
 typedef struct video_navi_pack {
 	int length;          /* 0 ... 1024 */
-	uint8_t data[1024];
+	__u8 data[1024];
 } video_navi_pack_t;
 
 
-typedef uint16_t video_attributes_t;
+typedef __u16 video_attributes_t;
 /*   bits: descr. */
 /*   15-14 Video compression mode (0=MPEG-1, 1=MPEG-2) */
 /*   13-12 TV system (0=525/60, 1=625/50) */
diff --git a/include/linux/if_arcnet.h b/include/linux/if_arcnet.h
index 27ea2ac445ad..0835debab115 100644
--- a/include/linux/if_arcnet.h
+++ b/include/linux/if_arcnet.h
@@ -16,6 +16,7 @@
 #ifndef _LINUX_IF_ARCNET_H
 #define _LINUX_IF_ARCNET_H
 
+#include <linux/types.h>
 #include <linux/if_ether.h>
 
 
@@ -57,10 +58,10 @@
  */
 struct arc_rfc1201
 {
-    uint8_t  proto;		/* protocol ID field - varies		*/
-    uint8_t  split_flag;	/* for use with split packets		*/
+    __u8  proto;		/* protocol ID field - varies		*/
+    __u8  split_flag;	/* for use with split packets		*/
     __be16   sequence;		/* sequence number			*/
-    uint8_t  payload[0];	/* space remaining in packet (504 bytes)*/
+    __u8  payload[0];	/* space remaining in packet (504 bytes)*/
 };
 #define RFC1201_HDR_SIZE 4
 
@@ -70,8 +71,8 @@ struct arc_rfc1201
  */
 struct arc_rfc1051
 {
-    uint8_t proto;		/* ARC_P_RFC1051_ARP/RFC1051_IP	*/
-    uint8_t payload[0];		/* 507 bytes			*/
+    __u8 proto;		/* ARC_P_RFC1051_ARP/RFC1051_IP	*/
+    __u8 payload[0];		/* 507 bytes			*/
 };
 #define RFC1051_HDR_SIZE 1
 
@@ -82,20 +83,20 @@ struct arc_rfc1051
  */
 struct arc_eth_encap
 {
-    uint8_t proto;		/* Always ARC_P_ETHER			*/
+    __u8 proto;		/* Always ARC_P_ETHER			*/
     struct ethhdr eth;		/* standard ethernet header (yuck!)	*/
-    uint8_t payload[0];		/* 493 bytes				*/
+    __u8 payload[0];		/* 493 bytes				*/
 };
 #define ETH_ENCAP_HDR_SIZE 14
 
 
 struct arc_cap
 {
-	uint8_t proto;
-	uint8_t cookie[sizeof(int)];   /* Actually NOT sent over the network */
+	__u8 proto;
+	__u8 cookie[sizeof(int)];   /* Actually NOT sent over the network */
 	union {
-		uint8_t ack;
-		uint8_t raw[0];		/* 507 bytes */
+		__u8 ack;
+		__u8 raw[0];		/* 507 bytes */
 	} mes;
 };
 
@@ -109,7 +110,7 @@ struct arc_cap
  */
 struct arc_hardware
 {
-    uint8_t  source,		/* source ARCnet - filled in automagically */
+    __u8  source,		/* source ARCnet - filled in automagically */
              dest,		/* destination ARCnet - 0 for broadcast    */
     	     offset[2];		/* offset bytes (some weird semantics)     */
 };
@@ -130,7 +131,7 @@ struct archdr
 	struct arc_rfc1051   rfc1051;
 	struct arc_eth_encap eth_encap;
 	struct arc_cap       cap;
-	uint8_t raw[0];		/* 508 bytes				*/
+	__u8 raw[0];		/* 508 bytes				*/
     } soft;
 };
 
diff --git a/include/linux/ip_vs.h b/include/linux/ip_vs.h
index 0f434a28fb58..148265e63e8d 100644
--- a/include/linux/ip_vs.h
+++ b/include/linux/ip_vs.h
@@ -96,10 +96,10 @@
  */
 struct ip_vs_service_user {
 	/* virtual service addresses */
-	u_int16_t		protocol;
+	__u16		protocol;
 	__be32			addr;		/* virtual ip address */
 	__be16			port;
-	u_int32_t		fwmark;		/* firwall mark of service */
+	__u32		fwmark;		/* firwall mark of service */
 
 	/* virtual service options */
 	char			sched_name[IP_VS_SCHEDNAME_MAXLEN];
@@ -119,8 +119,8 @@ struct ip_vs_dest_user {
 	int			weight;		/* destination weight */
 
 	/* thresholds for active connections */
-	u_int32_t		u_threshold;	/* upper threshold */
-	u_int32_t		l_threshold;	/* lower threshold */
+	__u32		u_threshold;	/* upper threshold */
+	__u32		l_threshold;	/* lower threshold */
 };
 
 
@@ -159,10 +159,10 @@ struct ip_vs_getinfo {
 /* The argument to IP_VS_SO_GET_SERVICE */
 struct ip_vs_service_entry {
 	/* which service: user fills in these */
-	u_int16_t		protocol;
+	__u16		protocol;
 	__be32			addr;		/* virtual address */
 	__be16			port;
-	u_int32_t		fwmark;		/* firwall mark of service */
+	__u32		fwmark;		/* firwall mark of service */
 
 	/* service options */
 	char			sched_name[IP_VS_SCHEDNAME_MAXLEN];
@@ -184,12 +184,12 @@ struct ip_vs_dest_entry {
 	unsigned		conn_flags;	/* connection flags */
 	int			weight;		/* destination weight */
 
-	u_int32_t		u_threshold;	/* upper threshold */
-	u_int32_t		l_threshold;	/* lower threshold */
+	__u32		u_threshold;	/* upper threshold */
+	__u32		l_threshold;	/* lower threshold */
 
-	u_int32_t		activeconns;	/* active connections */
-	u_int32_t		inactconns;	/* inactive connections */
-	u_int32_t		persistconns;	/* persistent connections */
+	__u32		activeconns;	/* active connections */
+	__u32		inactconns;	/* inactive connections */
+	__u32		persistconns;	/* persistent connections */
 
 	/* statistics */
 	struct ip_vs_stats_user stats;
@@ -199,10 +199,10 @@ struct ip_vs_dest_entry {
 /* The argument to IP_VS_SO_GET_DESTS */
 struct ip_vs_get_dests {
 	/* which service: user fills in these */
-	u_int16_t		protocol;
+	__u16		protocol;
 	__be32			addr;		/* virtual address */
 	__be16			port;
-	u_int32_t		fwmark;		/* firwall mark of service */
+	__u32		fwmark;		/* firwall mark of service */
 
 	/* number of real servers */
 	unsigned int		num_dests;
diff --git a/include/linux/ivtvfb.h b/include/linux/ivtvfb.h
index e20af47b59ad..9d88b29ddf55 100644
--- a/include/linux/ivtvfb.h
+++ b/include/linux/ivtvfb.h
@@ -33,6 +33,6 @@ struct ivtvfb_dma_frame {
 };
 
 #define IVTVFB_IOC_DMA_FRAME 	_IOW('V', BASE_VIDIOC_PRIVATE+0, struct ivtvfb_dma_frame)
-#define FBIO_WAITFORVSYNC	_IOW('F', 0x20, u_int32_t)
+#define FBIO_WAITFORVSYNC	_IOW('F', 0x20, __u32)
 
 #endif
diff --git a/include/linux/matroxfb.h b/include/linux/matroxfb.h
index 404f678e734b..2203121a43e9 100644
--- a/include/linux/matroxfb.h
+++ b/include/linux/matroxfb.h
@@ -37,7 +37,7 @@ enum matroxfb_ctrl_id {
   MATROXFB_CID_LAST
 };
 
-#define FBIO_WAITFORVSYNC	_IOW('F', 0x20, u_int32_t)
+#define FBIO_WAITFORVSYNC	_IOW('F', 0x20, __u32)
 
 #endif
 
diff --git a/include/linux/pfkeyv2.h b/include/linux/pfkeyv2.h
index 01b262959f2e..228b0b6306b0 100644
--- a/include/linux/pfkeyv2.h
+++ b/include/linux/pfkeyv2.h
@@ -12,187 +12,187 @@
 #define PFKEYV2_REVISION	199806L
 
 struct sadb_msg {
-	uint8_t		sadb_msg_version;
-	uint8_t		sadb_msg_type;
-	uint8_t		sadb_msg_errno;
-	uint8_t		sadb_msg_satype;
-	uint16_t	sadb_msg_len;
-	uint16_t	sadb_msg_reserved;
-	uint32_t	sadb_msg_seq;
-	uint32_t	sadb_msg_pid;
+	__u8		sadb_msg_version;
+	__u8		sadb_msg_type;
+	__u8		sadb_msg_errno;
+	__u8		sadb_msg_satype;
+	__u16	sadb_msg_len;
+	__u16	sadb_msg_reserved;
+	__u32	sadb_msg_seq;
+	__u32	sadb_msg_pid;
 } __attribute__((packed));
 /* sizeof(struct sadb_msg) == 16 */
 
 struct sadb_ext {
-	uint16_t	sadb_ext_len;
-	uint16_t	sadb_ext_type;
+	__u16	sadb_ext_len;
+	__u16	sadb_ext_type;
 } __attribute__((packed));
 /* sizeof(struct sadb_ext) == 4 */
 
 struct sadb_sa {
-	uint16_t	sadb_sa_len;
-	uint16_t	sadb_sa_exttype;
+	__u16	sadb_sa_len;
+	__u16	sadb_sa_exttype;
 	__be32		sadb_sa_spi;
-	uint8_t		sadb_sa_replay;
-	uint8_t		sadb_sa_state;
-	uint8_t		sadb_sa_auth;
-	uint8_t		sadb_sa_encrypt;
-	uint32_t	sadb_sa_flags;
+	__u8		sadb_sa_replay;
+	__u8		sadb_sa_state;
+	__u8		sadb_sa_auth;
+	__u8		sadb_sa_encrypt;
+	__u32	sadb_sa_flags;
 } __attribute__((packed));
 /* sizeof(struct sadb_sa) == 16 */
 
 struct sadb_lifetime {
-	uint16_t	sadb_lifetime_len;
-	uint16_t	sadb_lifetime_exttype;
-	uint32_t	sadb_lifetime_allocations;
-	uint64_t	sadb_lifetime_bytes;
-	uint64_t	sadb_lifetime_addtime;
-	uint64_t	sadb_lifetime_usetime;
+	__u16	sadb_lifetime_len;
+	__u16	sadb_lifetime_exttype;
+	__u32	sadb_lifetime_allocations;
+	__u64	sadb_lifetime_bytes;
+	__u64	sadb_lifetime_addtime;
+	__u64	sadb_lifetime_usetime;
 } __attribute__((packed));
 /* sizeof(struct sadb_lifetime) == 32 */
 
 struct sadb_address {
-	uint16_t	sadb_address_len;
-	uint16_t	sadb_address_exttype;
-	uint8_t		sadb_address_proto;
-	uint8_t		sadb_address_prefixlen;
-	uint16_t	sadb_address_reserved;
+	__u16	sadb_address_len;
+	__u16	sadb_address_exttype;
+	__u8		sadb_address_proto;
+	__u8		sadb_address_prefixlen;
+	__u16	sadb_address_reserved;
 } __attribute__((packed));
 /* sizeof(struct sadb_address) == 8 */
 
 struct sadb_key {
-	uint16_t	sadb_key_len;
-	uint16_t	sadb_key_exttype;
-	uint16_t	sadb_key_bits;
-	uint16_t	sadb_key_reserved;
+	__u16	sadb_key_len;
+	__u16	sadb_key_exttype;
+	__u16	sadb_key_bits;
+	__u16	sadb_key_reserved;
 } __attribute__((packed));
 /* sizeof(struct sadb_key) == 8 */
 
 struct sadb_ident {
-	uint16_t	sadb_ident_len;
-	uint16_t	sadb_ident_exttype;
-	uint16_t	sadb_ident_type;
-	uint16_t	sadb_ident_reserved;
-	uint64_t	sadb_ident_id;
+	__u16	sadb_ident_len;
+	__u16	sadb_ident_exttype;
+	__u16	sadb_ident_type;
+	__u16	sadb_ident_reserved;
+	__u64	sadb_ident_id;
 } __attribute__((packed));
 /* sizeof(struct sadb_ident) == 16 */
 
 struct sadb_sens {
-	uint16_t	sadb_sens_len;
-	uint16_t	sadb_sens_exttype;
-	uint32_t	sadb_sens_dpd;
-	uint8_t		sadb_sens_sens_level;
-	uint8_t		sadb_sens_sens_len;
-	uint8_t		sadb_sens_integ_level;
-	uint8_t		sadb_sens_integ_len;
-	uint32_t	sadb_sens_reserved;
+	__u16	sadb_sens_len;
+	__u16	sadb_sens_exttype;
+	__u32	sadb_sens_dpd;
+	__u8		sadb_sens_sens_level;
+	__u8		sadb_sens_sens_len;
+	__u8		sadb_sens_integ_level;
+	__u8		sadb_sens_integ_len;
+	__u32	sadb_sens_reserved;
 } __attribute__((packed));
 /* sizeof(struct sadb_sens) == 16 */
 
 /* followed by:
-	uint64_t	sadb_sens_bitmap[sens_len];
-	uint64_t	sadb_integ_bitmap[integ_len];  */
+	__u64	sadb_sens_bitmap[sens_len];
+	__u64	sadb_integ_bitmap[integ_len];  */
 
 struct sadb_prop {
-	uint16_t	sadb_prop_len;
-	uint16_t	sadb_prop_exttype;
-	uint8_t		sadb_prop_replay;
-	uint8_t		sadb_prop_reserved[3];
+	__u16	sadb_prop_len;
+	__u16	sadb_prop_exttype;
+	__u8		sadb_prop_replay;
+	__u8		sadb_prop_reserved[3];
 } __attribute__((packed));
 /* sizeof(struct sadb_prop) == 8 */
 
 /* followed by:
 	struct sadb_comb sadb_combs[(sadb_prop_len +
-		sizeof(uint64_t) - sizeof(struct sadb_prop)) /
+		sizeof(__u64) - sizeof(struct sadb_prop)) /
 		sizeof(struct sadb_comb)]; */
 
 struct sadb_comb {
-	uint8_t		sadb_comb_auth;
-	uint8_t		sadb_comb_encrypt;
-	uint16_t	sadb_comb_flags;
-	uint16_t	sadb_comb_auth_minbits;
-	uint16_t	sadb_comb_auth_maxbits;
-	uint16_t	sadb_comb_encrypt_minbits;
-	uint16_t	sadb_comb_encrypt_maxbits;
-	uint32_t	sadb_comb_reserved;
-	uint32_t	sadb_comb_soft_allocations;
-	uint32_t	sadb_comb_hard_allocations;
-	uint64_t	sadb_comb_soft_bytes;
-	uint64_t	sadb_comb_hard_bytes;
-	uint64_t	sadb_comb_soft_addtime;
-	uint64_t	sadb_comb_hard_addtime;
-	uint64_t	sadb_comb_soft_usetime;
-	uint64_t	sadb_comb_hard_usetime;
+	__u8		sadb_comb_auth;
+	__u8		sadb_comb_encrypt;
+	__u16	sadb_comb_flags;
+	__u16	sadb_comb_auth_minbits;
+	__u16	sadb_comb_auth_maxbits;
+	__u16	sadb_comb_encrypt_minbits;
+	__u16	sadb_comb_encrypt_maxbits;
+	__u32	sadb_comb_reserved;
+	__u32	sadb_comb_soft_allocations;
+	__u32	sadb_comb_hard_allocations;
+	__u64	sadb_comb_soft_bytes;
+	__u64	sadb_comb_hard_bytes;
+	__u64	sadb_comb_soft_addtime;
+	__u64	sadb_comb_hard_addtime;
+	__u64	sadb_comb_soft_usetime;
+	__u64	sadb_comb_hard_usetime;
 } __attribute__((packed));
 /* sizeof(struct sadb_comb) == 72 */
 
 struct sadb_supported {
-	uint16_t	sadb_supported_len;
-	uint16_t	sadb_supported_exttype;
-	uint32_t	sadb_supported_reserved;
+	__u16	sadb_supported_len;
+	__u16	sadb_supported_exttype;
+	__u32	sadb_supported_reserved;
 } __attribute__((packed));
 /* sizeof(struct sadb_supported) == 8 */
 
 /* followed by:
 	struct sadb_alg sadb_algs[(sadb_supported_len +
-		sizeof(uint64_t) - sizeof(struct sadb_supported)) /
+		sizeof(__u64) - sizeof(struct sadb_supported)) /
 		sizeof(struct sadb_alg)]; */
 
 struct sadb_alg {
-	uint8_t		sadb_alg_id;
-	uint8_t		sadb_alg_ivlen;
-	uint16_t	sadb_alg_minbits;
-	uint16_t	sadb_alg_maxbits;
-	uint16_t	sadb_alg_reserved;
+	__u8		sadb_alg_id;
+	__u8		sadb_alg_ivlen;
+	__u16	sadb_alg_minbits;
+	__u16	sadb_alg_maxbits;
+	__u16	sadb_alg_reserved;
 } __attribute__((packed));
 /* sizeof(struct sadb_alg) == 8 */
 
 struct sadb_spirange {
-	uint16_t	sadb_spirange_len;
-	uint16_t	sadb_spirange_exttype;
-	uint32_t	sadb_spirange_min;
-	uint32_t	sadb_spirange_max;
-	uint32_t	sadb_spirange_reserved;
+	__u16	sadb_spirange_len;
+	__u16	sadb_spirange_exttype;
+	__u32	sadb_spirange_min;
+	__u32	sadb_spirange_max;
+	__u32	sadb_spirange_reserved;
 } __attribute__((packed));
 /* sizeof(struct sadb_spirange) == 16 */
 
 struct sadb_x_kmprivate {
-	uint16_t	sadb_x_kmprivate_len;
-	uint16_t	sadb_x_kmprivate_exttype;
-	uint32_t	sadb_x_kmprivate_reserved;
+	__u16	sadb_x_kmprivate_len;
+	__u16	sadb_x_kmprivate_exttype;
+	__u32	sadb_x_kmprivate_reserved;
 } __attribute__((packed));
 /* sizeof(struct sadb_x_kmprivate) == 8 */
 
 struct sadb_x_sa2 {
-	uint16_t	sadb_x_sa2_len;
-	uint16_t	sadb_x_sa2_exttype;
-	uint8_t		sadb_x_sa2_mode;
-	uint8_t		sadb_x_sa2_reserved1;
-	uint16_t	sadb_x_sa2_reserved2;
-	uint32_t	sadb_x_sa2_sequence;
-	uint32_t	sadb_x_sa2_reqid;
+	__u16	sadb_x_sa2_len;
+	__u16	sadb_x_sa2_exttype;
+	__u8		sadb_x_sa2_mode;
+	__u8		sadb_x_sa2_reserved1;
+	__u16	sadb_x_sa2_reserved2;
+	__u32	sadb_x_sa2_sequence;
+	__u32	sadb_x_sa2_reqid;
 } __attribute__((packed));
 /* sizeof(struct sadb_x_sa2) == 16 */
 
 struct sadb_x_policy {
-	uint16_t	sadb_x_policy_len;
-	uint16_t	sadb_x_policy_exttype;
-	uint16_t	sadb_x_policy_type;
-	uint8_t		sadb_x_policy_dir;
-	uint8_t		sadb_x_policy_reserved;
-	uint32_t	sadb_x_policy_id;
-	uint32_t	sadb_x_policy_priority;
+	__u16	sadb_x_policy_len;
+	__u16	sadb_x_policy_exttype;
+	__u16	sadb_x_policy_type;
+	__u8		sadb_x_policy_dir;
+	__u8		sadb_x_policy_reserved;
+	__u32	sadb_x_policy_id;
+	__u32	sadb_x_policy_priority;
 } __attribute__((packed));
 /* sizeof(struct sadb_x_policy) == 16 */
 
 struct sadb_x_ipsecrequest {
-	uint16_t	sadb_x_ipsecrequest_len;
-	uint16_t	sadb_x_ipsecrequest_proto;
-	uint8_t		sadb_x_ipsecrequest_mode;
-	uint8_t		sadb_x_ipsecrequest_level;
-	uint16_t	sadb_x_ipsecrequest_reserved1;
-	uint32_t	sadb_x_ipsecrequest_reqid;
-	uint32_t	sadb_x_ipsecrequest_reserved2;
+	__u16	sadb_x_ipsecrequest_len;
+	__u16	sadb_x_ipsecrequest_proto;
+	__u8		sadb_x_ipsecrequest_mode;
+	__u8		sadb_x_ipsecrequest_level;
+	__u16	sadb_x_ipsecrequest_reserved1;
+	__u32	sadb_x_ipsecrequest_reqid;
+	__u32	sadb_x_ipsecrequest_reserved2;
 } __attribute__((packed));
 /* sizeof(struct sadb_x_ipsecrequest) == 16 */
 
@@ -200,38 +200,38 @@ struct sadb_x_ipsecrequest {
  * type of NAT-T is supported, draft-ietf-ipsec-udp-encaps-06
  */
 struct sadb_x_nat_t_type {
-	uint16_t	sadb_x_nat_t_type_len;
-	uint16_t	sadb_x_nat_t_type_exttype;
-	uint8_t		sadb_x_nat_t_type_type;
-	uint8_t		sadb_x_nat_t_type_reserved[3];
+	__u16	sadb_x_nat_t_type_len;
+	__u16	sadb_x_nat_t_type_exttype;
+	__u8		sadb_x_nat_t_type_type;
+	__u8		sadb_x_nat_t_type_reserved[3];
 } __attribute__((packed));
 /* sizeof(struct sadb_x_nat_t_type) == 8 */
 
 /* Pass a NAT Traversal port (Source or Dest port) */
 struct sadb_x_nat_t_port {
-	uint16_t	sadb_x_nat_t_port_len;
-	uint16_t	sadb_x_nat_t_port_exttype;
+	__u16	sadb_x_nat_t_port_len;
+	__u16	sadb_x_nat_t_port_exttype;
 	__be16		sadb_x_nat_t_port_port;
-	uint16_t	sadb_x_nat_t_port_reserved;
+	__u16	sadb_x_nat_t_port_reserved;
 } __attribute__((packed));
 /* sizeof(struct sadb_x_nat_t_port) == 8 */
 
 /* Generic LSM security context */
 struct sadb_x_sec_ctx {
-	uint16_t	sadb_x_sec_len;
-	uint16_t	sadb_x_sec_exttype;
-	uint8_t		sadb_x_ctx_alg;  /* LSMs: e.g., selinux == 1 */
-	uint8_t		sadb_x_ctx_doi;
-	uint16_t	sadb_x_ctx_len;
+	__u16	sadb_x_sec_len;
+	__u16	sadb_x_sec_exttype;
+	__u8		sadb_x_ctx_alg;  /* LSMs: e.g., selinux == 1 */
+	__u8		sadb_x_ctx_doi;
+	__u16	sadb_x_ctx_len;
 } __attribute__((packed));
 /* sizeof(struct sadb_sec_ctx) = 8 */
 
 /* Used by MIGRATE to pass addresses IKE will use to perform
  * negotiation with the peer */
 struct sadb_x_kmaddress {
-	uint16_t	sadb_x_kmaddress_len;
-	uint16_t	sadb_x_kmaddress_exttype;
-	uint32_t	sadb_x_kmaddress_reserved;
+	__u16	sadb_x_kmaddress_len;
+	__u16	sadb_x_kmaddress_exttype;
+	__u32	sadb_x_kmaddress_reserved;
 } __attribute__((packed));
 /* sizeof(struct sadb_x_kmaddress) == 8 */
 
diff --git a/include/linux/selinux_netlink.h b/include/linux/selinux_netlink.h
index bbf489decd84..d239797785cf 100644
--- a/include/linux/selinux_netlink.h
+++ b/include/linux/selinux_netlink.h
@@ -12,6 +12,8 @@
 #ifndef _LINUX_SELINUX_NETLINK_H
 #define _LINUX_SELINUX_NETLINK_H
 
+#include <linux/types.h>
+
 /* Message types. */
 #define SELNL_MSG_BASE 0x10
 enum {
@@ -38,11 +40,11 @@ enum selinux_nlgroups {
 
 /* Message structures */
 struct selnl_msg_setenforce {
-	int32_t		val;
+	__s32		val;
 };
 
 struct selnl_msg_policyload {
-	u_int32_t	seqno;
+	__u32	seqno;
 };
 
 #endif /* _LINUX_SELINUX_NETLINK_H */
diff --git a/include/sound/asound.h b/include/sound/asound.h
index 16684c5a608c..d9beda5f74a7 100644
--- a/include/sound/asound.h
+++ b/include/sound/asound.h
@@ -23,9 +23,10 @@
 #ifndef __SOUND_ASOUND_H
 #define __SOUND_ASOUND_H
 
+#include <linux/types.h>
+
 #ifdef __KERNEL__
 #include <linux/ioctl.h>
-#include <linux/types.h>
 #include <linux/time.h>
 #include <asm/byteorder.h>
 
@@ -342,7 +343,7 @@ struct snd_interval {
 #define SNDRV_MASK_MAX	256
 
 struct snd_mask {
-	u_int32_t bits[(SNDRV_MASK_MAX+31)/32];
+	__u32 bits[(SNDRV_MASK_MAX+31)/32];
 };
 
 struct snd_pcm_hw_params {
diff --git a/include/sound/emu10k1.h b/include/sound/emu10k1.h
index 10ee28eac018..c380056ff26d 100644
--- a/include/sound/emu10k1.h
+++ b/include/sound/emu10k1.h
@@ -1,6 +1,8 @@
 #ifndef __SOUND_EMU10K1_H
 #define __SOUND_EMU10K1_H
 
+#include <linux/types.h>
+
 /*
  *  Copyright (c) by Jaroslav Kysela <perex@perex.cz>,
  *		     Creative Labs, Inc.
@@ -34,6 +36,8 @@
 #include <sound/timer.h>
 #include <linux/interrupt.h>
 #include <linux/mutex.h>
+#include <linux/types.h>
+
 #include <asm/io.h>
 
 /* ------------------- DEFINES -------------------- */
@@ -2171,7 +2175,7 @@ struct snd_emu10k1_fx8010_code {
 	char name[128];
 
 	DECLARE_BITMAP(gpr_valid, 0x200); /* bitmask of valid initializers */
-	u_int32_t __user *gpr_map;	  /* initializers */
+	__u32 __user *gpr_map;		/* initializers */
 
 	unsigned int gpr_add_control_count; /* count of GPR controls to add/replace */
 	struct snd_emu10k1_fx8010_control_gpr __user *gpr_add_controls; /* GPR controls to add/replace */
@@ -2184,11 +2188,11 @@ struct snd_emu10k1_fx8010_code {
 	struct snd_emu10k1_fx8010_control_gpr __user *gpr_list_controls; /* listed GPR controls */
 
 	DECLARE_BITMAP(tram_valid, 0x100); /* bitmask of valid initializers */
-	u_int32_t __user *tram_data_map;  /* data initializers */
-	u_int32_t __user *tram_addr_map;  /* map initializers */
+	__u32 __user *tram_data_map;	  /* data initializers */
+	__u32 __user *tram_addr_map;	  /* map initializers */
 
 	DECLARE_BITMAP(code_valid, 1024); /* bitmask of valid instructions */
-	u_int32_t __user *code;		  /* one instruction - 64 bits */
+	__u32 __user *code;		  /* one instruction - 64 bits */
 };
 
 struct snd_emu10k1_fx8010_tram {
-- 
cgit v1.2.3


From ccef7ab534347e2e1e1ef398d2ec987d37e519f3 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 26 Feb 2009 00:51:41 +0100
Subject: make MTD headers use strict integer types

The MTD headers traditionally use stdint types rather than
the kernel integer types. This converts them to do the
same as all the others.

Cc: David Woodhouse <dwmw2@infradead.org>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/jffs2.h    | 27 +++++++++---------
 include/mtd/inftl-user.h | 36 ++++++++++++------------
 include/mtd/jffs2-user.h |  5 ++--
 include/mtd/mtd-abi.h    | 66 +++++++++++++++++++++++---------------------
 include/mtd/nftl-user.h  | 32 +++++++++++----------
 include/mtd/ubi-user.h   | 72 +++++++++++++++++++++++++-----------------------
 6 files changed, 123 insertions(+), 115 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/jffs2.h b/include/linux/jffs2.h
index da720bc3eb15..2b32d638147d 100644
--- a/include/linux/jffs2.h
+++ b/include/linux/jffs2.h
@@ -12,6 +12,7 @@
 #ifndef __LINUX_JFFS2_H__
 #define __LINUX_JFFS2_H__
 
+#include <linux/types.h>
 #include <linux/magic.h>
 
 /* You must include something which defines the C99 uintXX_t types. 
@@ -91,15 +92,15 @@
    byteswapping */
 
 typedef struct {
-	uint32_t v32;
+	__u32 v32;
 } __attribute__((packed)) jint32_t;
 
 typedef struct {
-	uint32_t m;
+	__u32 m;
 } __attribute__((packed)) jmode_t;
 
 typedef struct {
-	uint16_t v16;
+	__u16 v16;
 } __attribute__((packed)) jint16_t;
 
 struct jffs2_unknown_node
@@ -121,12 +122,12 @@ struct jffs2_raw_dirent
 	jint32_t version;
 	jint32_t ino; /* == zero for unlink */
 	jint32_t mctime;
-	uint8_t nsize;
-	uint8_t type;
-	uint8_t unused[2];
+	__u8 nsize;
+	__u8 type;
+	__u8 unused[2];
 	jint32_t node_crc;
 	jint32_t name_crc;
-	uint8_t name[0];
+	__u8 name[0];
 };
 
 /* The JFFS2 raw inode structure: Used for storage on physical media.  */
@@ -153,12 +154,12 @@ struct jffs2_raw_inode
 	jint32_t offset;     /* Where to begin to write.  */
 	jint32_t csize;      /* (Compressed) data size */
 	jint32_t dsize;	     /* Size of the node's data. (after decompression) */
-	uint8_t compr;       /* Compression algorithm used */
-	uint8_t usercompr;   /* Compression algorithm requested by the user */
+	__u8 compr;       /* Compression algorithm used */
+	__u8 usercompr;   /* Compression algorithm requested by the user */
 	jint16_t flags;	     /* See JFFS2_INO_FLAG_* */
 	jint32_t data_crc;   /* CRC for the (compressed) data.  */
 	jint32_t node_crc;   /* CRC for the raw inode (excluding data)  */
-	uint8_t data[0];
+	__u8 data[0];
 };
 
 struct jffs2_raw_xattr {
@@ -168,12 +169,12 @@ struct jffs2_raw_xattr {
 	jint32_t hdr_crc;
 	jint32_t xid;		/* XATTR identifier number */
 	jint32_t version;
-	uint8_t xprefix;
-	uint8_t name_len;
+	__u8 xprefix;
+	__u8 name_len;
 	jint16_t value_len;
 	jint32_t data_crc;
 	jint32_t node_crc;
-	uint8_t data[0];
+	__u8 data[0];
 } __attribute__((packed));
 
 struct jffs2_raw_xref
diff --git a/include/mtd/inftl-user.h b/include/mtd/inftl-user.h
index d409d489d900..8376bd1a9e01 100644
--- a/include/mtd/inftl-user.h
+++ b/include/mtd/inftl-user.h
@@ -16,33 +16,33 @@
 /* Block Control Information */
 
 struct inftl_bci {
-	uint8_t ECCsig[6];
-	uint8_t Status;
-	uint8_t Status1;
+	__u8 ECCsig[6];
+	__u8 Status;
+	__u8 Status1;
 } __attribute__((packed));
 
 struct inftl_unithead1 {
-	uint16_t virtualUnitNo;
-	uint16_t prevUnitNo;
-	uint8_t ANAC;
-	uint8_t NACs;
-	uint8_t parityPerField;
-	uint8_t discarded;
+	__u16 virtualUnitNo;
+	__u16 prevUnitNo;
+	__u8 ANAC;
+	__u8 NACs;
+	__u8 parityPerField;
+	__u8 discarded;
 } __attribute__((packed));
 
 struct inftl_unithead2 {
-	uint8_t parityPerField;
-	uint8_t ANAC;
-	uint16_t prevUnitNo;
-	uint16_t virtualUnitNo;
-	uint8_t NACs;
-	uint8_t discarded;
+	__u8 parityPerField;
+	__u8 ANAC;
+	__u16 prevUnitNo;
+	__u16 virtualUnitNo;
+	__u8 NACs;
+	__u8 discarded;
 } __attribute__((packed));
 
 struct inftl_unittail {
-	uint8_t Reserved[4];
-	uint16_t EraseMark;
-	uint16_t EraseMark1;
+	__u8 Reserved[4];
+	__u16 EraseMark;
+	__u16 EraseMark1;
 } __attribute__((packed));
 
 union inftl_uci {
diff --git a/include/mtd/jffs2-user.h b/include/mtd/jffs2-user.h
index 001685d7fa88..fa94b0eb67c1 100644
--- a/include/mtd/jffs2-user.h
+++ b/include/mtd/jffs2-user.h
@@ -7,6 +7,7 @@
 
 /* This file is blessed for inclusion by userspace */
 #include <linux/jffs2.h>
+#include <linux/types.h>
 #include <endian.h>
 #include <byteswap.h>
 
@@ -19,8 +20,8 @@
 
 extern int target_endian;
 
-#define t16(x) ({ uint16_t __b = (x); (target_endian==__BYTE_ORDER)?__b:bswap_16(__b); })
-#define t32(x) ({ uint32_t __b = (x); (target_endian==__BYTE_ORDER)?__b:bswap_32(__b); })
+#define t16(x) ({ __u16 __b = (x); (target_endian==__BYTE_ORDER)?__b:bswap_16(__b); })
+#define t32(x) ({ __u32 __b = (x); (target_endian==__BYTE_ORDER)?__b:bswap_32(__b); })
 
 #define cpu_to_je16(x) ((jint16_t){t16(x)})
 #define cpu_to_je32(x) ((jint32_t){t32(x)})
diff --git a/include/mtd/mtd-abi.h b/include/mtd/mtd-abi.h
index fb672013299c..b6595b3c68b6 100644
--- a/include/mtd/mtd-abi.h
+++ b/include/mtd/mtd-abi.h
@@ -5,14 +5,16 @@
 #ifndef __MTD_ABI_H__
 #define __MTD_ABI_H__
 
+#include <linux/types.h>
+
 struct erase_info_user {
-	uint32_t start;
-	uint32_t length;
+	__u32 start;
+	__u32 length;
 };
 
 struct mtd_oob_buf {
-	uint32_t start;
-	uint32_t length;
+	__u32 start;
+	__u32 length;
 	unsigned char __user *ptr;
 };
 
@@ -48,30 +50,30 @@ struct mtd_oob_buf {
 #define MTD_OTP_USER		2
 
 struct mtd_info_user {
-	uint8_t type;
-	uint32_t flags;
-	uint32_t size;	 // Total size of the MTD
-	uint32_t erasesize;
-	uint32_t writesize;
-	uint32_t oobsize;   // Amount of OOB data per block (e.g. 16)
+	__u8 type;
+	__u32 flags;
+	__u32 size;	 // Total size of the MTD
+	__u32 erasesize;
+	__u32 writesize;
+	__u32 oobsize;   // Amount of OOB data per block (e.g. 16)
 	/* The below two fields are obsolete and broken, do not use them
 	 * (TODO: remove at some point) */
-	uint32_t ecctype;
-	uint32_t eccsize;
+	__u32 ecctype;
+	__u32 eccsize;
 };
 
 struct region_info_user {
-	uint32_t offset;		/* At which this region starts,
+	__u32 offset;		/* At which this region starts,
 					 * from the beginning of the MTD */
-	uint32_t erasesize;		/* For this region */
-	uint32_t numblocks;		/* Number of blocks in this region */
-	uint32_t regionindex;
+	__u32 erasesize;		/* For this region */
+	__u32 numblocks;		/* Number of blocks in this region */
+	__u32 regionindex;
 };
 
 struct otp_info {
-	uint32_t start;
-	uint32_t length;
-	uint32_t locked;
+	__u32 start;
+	__u32 length;
+	__u32 locked;
 };
 
 #define MEMGETINFO		_IOR('M', 1, struct mtd_info_user)
@@ -99,15 +101,15 @@ struct otp_info {
  * interfaces
  */
 struct nand_oobinfo {
-	uint32_t useecc;
-	uint32_t eccbytes;
-	uint32_t oobfree[8][2];
-	uint32_t eccpos[32];
+	__u32 useecc;
+	__u32 eccbytes;
+	__u32 oobfree[8][2];
+	__u32 eccpos[32];
 };
 
 struct nand_oobfree {
-	uint32_t offset;
-	uint32_t length;
+	__u32 offset;
+	__u32 length;
 };
 
 #define MTD_MAX_OOBFREE_ENTRIES	8
@@ -116,9 +118,9 @@ struct nand_oobfree {
  * diagnosis and to allow creation of raw images
  */
 struct nand_ecclayout {
-	uint32_t eccbytes;
-	uint32_t eccpos[64];
-	uint32_t oobavail;
+	__u32 eccbytes;
+	__u32 eccpos[64];
+	__u32 oobavail;
 	struct nand_oobfree oobfree[MTD_MAX_OOBFREE_ENTRIES];
 };
 
@@ -131,10 +133,10 @@ struct nand_ecclayout {
  * @bbtblocks:	number of blocks reserved for bad block tables
  */
 struct mtd_ecc_stats {
-	uint32_t corrected;
-	uint32_t failed;
-	uint32_t badblocks;
-	uint32_t bbtblocks;
+	__u32 corrected;
+	__u32 failed;
+	__u32 badblocks;
+	__u32 bbtblocks;
 };
 
 /*
diff --git a/include/mtd/nftl-user.h b/include/mtd/nftl-user.h
index 390d21c080aa..98e9e57f22de 100644
--- a/include/mtd/nftl-user.h
+++ b/include/mtd/nftl-user.h
@@ -6,33 +6,35 @@
 #ifndef __MTD_NFTL_USER_H__
 #define __MTD_NFTL_USER_H__
 
+#include <linux/types.h>
+
 /* Block Control Information */
 
 struct nftl_bci {
 	unsigned char ECCSig[6];
-	uint8_t Status;
-	uint8_t Status1;
+	__u8 Status;
+	__u8 Status1;
 }__attribute__((packed));
 
 /* Unit Control Information */
 
 struct nftl_uci0 {
-	uint16_t VirtUnitNum;
-	uint16_t ReplUnitNum;
-	uint16_t SpareVirtUnitNum;
-	uint16_t SpareReplUnitNum;
+	__u16 VirtUnitNum;
+	__u16 ReplUnitNum;
+	__u16 SpareVirtUnitNum;
+	__u16 SpareReplUnitNum;
 } __attribute__((packed));
 
 struct nftl_uci1 {
-	uint32_t WearInfo;
-	uint16_t EraseMark;
-	uint16_t EraseMark1;
+	__u32 WearInfo;
+	__u16 EraseMark;
+	__u16 EraseMark1;
 } __attribute__((packed));
 
 struct nftl_uci2 {
-        uint16_t FoldMark;
-        uint16_t FoldMark1;
-	uint32_t unused;
+        __u16 FoldMark;
+        __u16 FoldMark1;
+	__u32 unused;
 } __attribute__((packed));
 
 union nftl_uci {
@@ -50,9 +52,9 @@ struct nftl_oob {
 
 struct NFTLMediaHeader {
 	char DataOrgID[6];
-	uint16_t NumEraseUnits;
-	uint16_t FirstPhysicalEUN;
-	uint32_t FormattedSize;
+	__u16 NumEraseUnits;
+	__u16 FirstPhysicalEUN;
+	__u32 FormattedSize;
 	unsigned char UnitSizeFactor;
 } __attribute__((packed));
 
diff --git a/include/mtd/ubi-user.h b/include/mtd/ubi-user.h
index 296efae3525e..466a8320f1e6 100644
--- a/include/mtd/ubi-user.h
+++ b/include/mtd/ubi-user.h
@@ -21,6 +21,8 @@
 #ifndef __UBI_USER_H__
 #define __UBI_USER_H__
 
+#include <linux/types.h>
+
 /*
  * UBI device creation (the same as MTD device attachment)
  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -152,7 +154,7 @@
 /* Create an UBI volume */
 #define UBI_IOCMKVOL _IOW(UBI_IOC_MAGIC, 0, struct ubi_mkvol_req)
 /* Remove an UBI volume */
-#define UBI_IOCRMVOL _IOW(UBI_IOC_MAGIC, 1, int32_t)
+#define UBI_IOCRMVOL _IOW(UBI_IOC_MAGIC, 1, __s32)
 /* Re-size an UBI volume */
 #define UBI_IOCRSVOL _IOW(UBI_IOC_MAGIC, 2, struct ubi_rsvol_req)
 /* Re-name volumes */
@@ -165,24 +167,24 @@
 /* Attach an MTD device */
 #define UBI_IOCATT _IOW(UBI_CTRL_IOC_MAGIC, 64, struct ubi_attach_req)
 /* Detach an MTD device */
-#define UBI_IOCDET _IOW(UBI_CTRL_IOC_MAGIC, 65, int32_t)
+#define UBI_IOCDET _IOW(UBI_CTRL_IOC_MAGIC, 65, __s32)
 
 /* ioctl commands of UBI volume character devices */
 
 #define UBI_VOL_IOC_MAGIC 'O'
 
 /* Start UBI volume update */
-#define UBI_IOCVOLUP _IOW(UBI_VOL_IOC_MAGIC, 0, int64_t)
+#define UBI_IOCVOLUP _IOW(UBI_VOL_IOC_MAGIC, 0, __s64)
 /* LEB erasure command, used for debugging, disabled by default */
-#define UBI_IOCEBER _IOW(UBI_VOL_IOC_MAGIC, 1, int32_t)
+#define UBI_IOCEBER _IOW(UBI_VOL_IOC_MAGIC, 1, __s32)
 /* Atomic LEB change command */
-#define UBI_IOCEBCH _IOW(UBI_VOL_IOC_MAGIC, 2, int32_t)
+#define UBI_IOCEBCH _IOW(UBI_VOL_IOC_MAGIC, 2, __s32)
 /* Map LEB command */
 #define UBI_IOCEBMAP _IOW(UBI_VOL_IOC_MAGIC, 3, struct ubi_map_req)
 /* Unmap LEB command */
-#define UBI_IOCEBUNMAP _IOW(UBI_VOL_IOC_MAGIC, 4, int32_t)
+#define UBI_IOCEBUNMAP _IOW(UBI_VOL_IOC_MAGIC, 4, __s32)
 /* Check if LEB is mapped command */
-#define UBI_IOCEBISMAP _IOR(UBI_VOL_IOC_MAGIC, 5, int32_t)
+#define UBI_IOCEBISMAP _IOR(UBI_VOL_IOC_MAGIC, 5, __s32)
 /* Set an UBI volume property */
 #define UBI_IOCSETPROP _IOW(UBI_VOL_IOC_MAGIC, 6, struct ubi_set_prop_req)
 
@@ -260,10 +262,10 @@ enum {
  * sub-page of the first page and add needed padding.
  */
 struct ubi_attach_req {
-	int32_t ubi_num;
-	int32_t mtd_num;
-	int32_t vid_hdr_offset;
-	int8_t padding[12];
+	__s32 ubi_num;
+	__s32 mtd_num;
+	__s32 vid_hdr_offset;
+	__s8 padding[12];
 };
 
 /**
@@ -298,13 +300,13 @@ struct ubi_attach_req {
  * BLOBs, without caring about how to properly align them.
  */
 struct ubi_mkvol_req {
-	int32_t vol_id;
-	int32_t alignment;
-	int64_t bytes;
-	int8_t vol_type;
-	int8_t padding1;
-	int16_t name_len;
-	int8_t padding2[4];
+	__s32 vol_id;
+	__s32 alignment;
+	__s64 bytes;
+	__s8 vol_type;
+	__s8 padding1;
+	__s16 name_len;
+	__s8 padding2[4];
 	char name[UBI_MAX_VOLUME_NAME + 1];
 } __attribute__ ((packed));
 
@@ -320,8 +322,8 @@ struct ubi_mkvol_req {
  * zero number of bytes).
  */
 struct ubi_rsvol_req {
-	int64_t bytes;
-	int32_t vol_id;
+	__s64 bytes;
+	__s32 vol_id;
 } __attribute__ ((packed));
 
 /**
@@ -356,12 +358,12 @@ struct ubi_rsvol_req {
  * re-name request.
  */
 struct ubi_rnvol_req {
-	int32_t count;
-	int8_t padding1[12];
+	__s32 count;
+	__s8 padding1[12];
 	struct {
-		int32_t vol_id;
-		int16_t name_len;
-		int8_t  padding2[2];
+		__s32 vol_id;
+		__s16 name_len;
+		__s8  padding2[2];
 		char    name[UBI_MAX_VOLUME_NAME + 1];
 	} ents[UBI_MAX_RNVOL];
 } __attribute__ ((packed));
@@ -375,10 +377,10 @@ struct ubi_rnvol_req {
  * @padding: reserved for future, not used, has to be zeroed
  */
 struct ubi_leb_change_req {
-	int32_t lnum;
-	int32_t bytes;
-	int8_t  dtype;
-	int8_t  padding[7];
+	__s32 lnum;
+	__s32 bytes;
+	__s8  dtype;
+	__s8  padding[7];
 } __attribute__ ((packed));
 
 /**
@@ -388,9 +390,9 @@ struct ubi_leb_change_req {
  * @padding: reserved for future, not used, has to be zeroed
  */
 struct ubi_map_req {
-	int32_t lnum;
-	int8_t  dtype;
-	int8_t  padding[3];
+	__s32 lnum;
+	__s8  dtype;
+	__s8  padding[3];
 } __attribute__ ((packed));
 
 
@@ -402,9 +404,9 @@ struct ubi_map_req {
  * @value: value to set
  */
 struct ubi_set_prop_req {
-       uint8_t  property;
-       uint8_t  padding[7];
-       uint64_t value;
+       __u8  property;
+       __u8  padding[7];
+       __u64 value;
 }  __attribute__ ((packed));
 
 #endif /* __UBI_USER_H__ */
-- 
cgit v1.2.3


From 60c195c729532815c5209c81442fa0eb26ace706 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 26 Feb 2009 00:51:43 +0100
Subject: make netfilter use strict integer types

Netfilter traditionally uses BSD integer types in its
interface headers. This changes it to use the Linux
strict integer types, like everyone else.

Cc: netfilter-devel@vger.kernel.org
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/netfilter/nf_conntrack_tcp.h |  6 +++--
 include/linux/netfilter/nfnetlink.h        |  4 ++--
 include/linux/netfilter/nfnetlink_compat.h |  7 ++++--
 include/linux/netfilter/nfnetlink_log.h    | 32 +++++++++++++-------------
 include/linux/netfilter/nfnetlink_queue.h  | 24 ++++++++++----------
 include/linux/netfilter/x_tables.h         | 30 +++++++++++++------------
 include/linux/netfilter/xt_CLASSIFY.h      |  4 +++-
 include/linux/netfilter/xt_CONNMARK.h      |  8 ++++---
 include/linux/netfilter/xt_CONNSECMARK.h   |  4 +++-
 include/linux/netfilter/xt_DSCP.h          |  7 +++---
 include/linux/netfilter/xt_MARK.h          |  6 +++--
 include/linux/netfilter/xt_NFLOG.h         | 12 +++++-----
 include/linux/netfilter/xt_NFQUEUE.h       |  4 +++-
 include/linux/netfilter/xt_RATEEST.h       |  6 +++--
 include/linux/netfilter/xt_SECMARK.h       |  6 +++--
 include/linux/netfilter/xt_TCPMSS.h        |  4 +++-
 include/linux/netfilter/xt_connbytes.h     |  6 +++--
 include/linux/netfilter/xt_connmark.h      |  8 ++++---
 include/linux/netfilter/xt_conntrack.h     | 12 +++++-----
 include/linux/netfilter/xt_dccp.h          | 14 +++++++-----
 include/linux/netfilter/xt_dscp.h          | 12 +++++-----
 include/linux/netfilter/xt_esp.h           |  6 +++--
 include/linux/netfilter/xt_hashlimit.h     | 32 +++++++++++++-------------
 include/linux/netfilter/xt_iprange.h       |  4 +++-
 include/linux/netfilter/xt_length.h        |  6 +++--
 include/linux/netfilter/xt_limit.h         | 10 +++++----
 include/linux/netfilter/xt_mark.h          |  8 ++++---
 include/linux/netfilter/xt_multiport.h     | 18 ++++++++-------
 include/linux/netfilter/xt_owner.h         |  8 ++++---
 include/linux/netfilter/xt_physdev.h       |  6 +++--
 include/linux/netfilter/xt_policy.h        | 14 +++++++-----
 include/linux/netfilter/xt_rateest.h       | 14 +++++++-----
 include/linux/netfilter/xt_realm.h         |  8 ++++---
 include/linux/netfilter/xt_recent.h        | 12 +++++-----
 include/linux/netfilter/xt_sctp.h          | 36 ++++++++++++++++--------------
 include/linux/netfilter/xt_statistic.h     | 14 +++++++-----
 include/linux/netfilter/xt_string.h        | 12 +++++-----
 include/linux/netfilter/xt_tcpmss.h        |  6 +++--
 include/linux/netfilter/xt_tcpudp.h        | 20 +++++++++--------
 39 files changed, 260 insertions(+), 190 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/nf_conntrack_tcp.h b/include/linux/netfilter/nf_conntrack_tcp.h
index a049df4f2236..3066789b972a 100644
--- a/include/linux/netfilter/nf_conntrack_tcp.h
+++ b/include/linux/netfilter/nf_conntrack_tcp.h
@@ -2,6 +2,8 @@
 #define _NF_CONNTRACK_TCP_H
 /* TCP tracking. */
 
+#include <linux/types.h>
+
 /* This is exposed to userspace (ctnetlink) */
 enum tcp_conntrack {
 	TCP_CONNTRACK_NONE,
@@ -34,8 +36,8 @@ enum tcp_conntrack {
 #define IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED	0x10
 
 struct nf_ct_tcp_flags {
-	u_int8_t flags;
-	u_int8_t mask;
+	__u8 flags;
+	__u8 mask;
 };
 
 #ifdef __KERNEL__
diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h
index 7d8e0455ccac..e53546cfa353 100644
--- a/include/linux/netfilter/nfnetlink.h
+++ b/include/linux/netfilter/nfnetlink.h
@@ -25,8 +25,8 @@ enum nfnetlink_groups {
 /* General form of address family dependent message.
  */
 struct nfgenmsg {
-	u_int8_t  nfgen_family;		/* AF_xxx */
-	u_int8_t  version;		/* nfnetlink version */
+	__u8  nfgen_family;		/* AF_xxx */
+	__u8  version;		/* nfnetlink version */
 	__be16    res_id;		/* resource id */
 };
 
diff --git a/include/linux/netfilter/nfnetlink_compat.h b/include/linux/netfilter/nfnetlink_compat.h
index e1451760c9cd..eda55cabceec 100644
--- a/include/linux/netfilter/nfnetlink_compat.h
+++ b/include/linux/netfilter/nfnetlink_compat.h
@@ -1,5 +1,8 @@
 #ifndef _NFNETLINK_COMPAT_H
 #define _NFNETLINK_COMPAT_H
+
+#include <linux/types.h>
+
 #ifndef __KERNEL__
 /* Old nfnetlink macros for userspace */
 
@@ -20,8 +23,8 @@
 
 struct nfattr
 {
-	u_int16_t nfa_len;
-	u_int16_t nfa_type;	/* we use 15 bits for the type, and the highest
+	__u16 nfa_len;
+	__u16 nfa_type;	/* we use 15 bits for the type, and the highest
 				 * bit to indicate whether the payload is nested */
 };
 
diff --git a/include/linux/netfilter/nfnetlink_log.h b/include/linux/netfilter/nfnetlink_log.h
index f661731f3cb1..d3bab7a2c9b7 100644
--- a/include/linux/netfilter/nfnetlink_log.h
+++ b/include/linux/netfilter/nfnetlink_log.h
@@ -17,14 +17,14 @@ enum nfulnl_msg_types {
 
 struct nfulnl_msg_packet_hdr {
 	__be16		hw_protocol;	/* hw protocol (network order) */
-	u_int8_t	hook;		/* netfilter hook */
-	u_int8_t	_pad;
+	__u8	hook;		/* netfilter hook */
+	__u8	_pad;
 };
 
 struct nfulnl_msg_packet_hw {
 	__be16		hw_addrlen;
-	u_int16_t	_pad;
-	u_int8_t	hw_addr[8];
+	__u16	_pad;
+	__u8	hw_addr[8];
 };
 
 struct nfulnl_msg_packet_timestamp {
@@ -35,12 +35,12 @@ struct nfulnl_msg_packet_timestamp {
 enum nfulnl_attr_type {
 	NFULA_UNSPEC,
 	NFULA_PACKET_HDR,
-	NFULA_MARK,			/* u_int32_t nfmark */
+	NFULA_MARK,			/* __u32 nfmark */
 	NFULA_TIMESTAMP,		/* nfulnl_msg_packet_timestamp */
-	NFULA_IFINDEX_INDEV,		/* u_int32_t ifindex */
-	NFULA_IFINDEX_OUTDEV,		/* u_int32_t ifindex */
-	NFULA_IFINDEX_PHYSINDEV,	/* u_int32_t ifindex */
-	NFULA_IFINDEX_PHYSOUTDEV,	/* u_int32_t ifindex */
+	NFULA_IFINDEX_INDEV,		/* __u32 ifindex */
+	NFULA_IFINDEX_OUTDEV,		/* __u32 ifindex */
+	NFULA_IFINDEX_PHYSINDEV,	/* __u32 ifindex */
+	NFULA_IFINDEX_PHYSOUTDEV,	/* __u32 ifindex */
 	NFULA_HWADDR,			/* nfulnl_msg_packet_hw */
 	NFULA_PAYLOAD,			/* opaque data payload */
 	NFULA_PREFIX,			/* string prefix */
@@ -65,23 +65,23 @@ enum nfulnl_msg_config_cmds {
 };
 
 struct nfulnl_msg_config_cmd {
-	u_int8_t	command;	/* nfulnl_msg_config_cmds */
+	__u8	command;	/* nfulnl_msg_config_cmds */
 } __attribute__ ((packed));
 
 struct nfulnl_msg_config_mode {
 	__be32		copy_range;
-	u_int8_t	copy_mode;
-	u_int8_t	_pad;
+	__u8	copy_mode;
+	__u8	_pad;
 } __attribute__ ((packed));
 
 enum nfulnl_attr_config {
 	NFULA_CFG_UNSPEC,
 	NFULA_CFG_CMD,			/* nfulnl_msg_config_cmd */
 	NFULA_CFG_MODE,			/* nfulnl_msg_config_mode */
-	NFULA_CFG_NLBUFSIZ,		/* u_int32_t buffer size */
-	NFULA_CFG_TIMEOUT,		/* u_int32_t in 1/100 s */
-	NFULA_CFG_QTHRESH,		/* u_int32_t */
-	NFULA_CFG_FLAGS,		/* u_int16_t */
+	NFULA_CFG_NLBUFSIZ,		/* __u32 buffer size */
+	NFULA_CFG_TIMEOUT,		/* __u32 in 1/100 s */
+	NFULA_CFG_QTHRESH,		/* __u32 */
+	NFULA_CFG_FLAGS,		/* __u16 */
 	__NFULA_CFG_MAX
 };
 #define NFULA_CFG_MAX (__NFULA_CFG_MAX -1)
diff --git a/include/linux/netfilter/nfnetlink_queue.h b/include/linux/netfilter/nfnetlink_queue.h
index 83e789633e35..2455fe5f4e01 100644
--- a/include/linux/netfilter/nfnetlink_queue.h
+++ b/include/linux/netfilter/nfnetlink_queue.h
@@ -15,13 +15,13 @@ enum nfqnl_msg_types {
 struct nfqnl_msg_packet_hdr {
 	__be32		packet_id;	/* unique ID of packet in queue */
 	__be16		hw_protocol;	/* hw protocol (network order) */
-	u_int8_t	hook;		/* netfilter hook */
+	__u8	hook;		/* netfilter hook */
 } __attribute__ ((packed));
 
 struct nfqnl_msg_packet_hw {
 	__be16		hw_addrlen;
-	u_int16_t	_pad;
-	u_int8_t	hw_addr[8];
+	__u16	_pad;
+	__u8	hw_addr[8];
 };
 
 struct nfqnl_msg_packet_timestamp {
@@ -33,12 +33,12 @@ enum nfqnl_attr_type {
 	NFQA_UNSPEC,
 	NFQA_PACKET_HDR,
 	NFQA_VERDICT_HDR,		/* nfqnl_msg_verdict_hrd */
-	NFQA_MARK,			/* u_int32_t nfmark */
+	NFQA_MARK,			/* __u32 nfmark */
 	NFQA_TIMESTAMP,			/* nfqnl_msg_packet_timestamp */
-	NFQA_IFINDEX_INDEV,		/* u_int32_t ifindex */
-	NFQA_IFINDEX_OUTDEV,		/* u_int32_t ifindex */
-	NFQA_IFINDEX_PHYSINDEV,		/* u_int32_t ifindex */
-	NFQA_IFINDEX_PHYSOUTDEV,	/* u_int32_t ifindex */
+	NFQA_IFINDEX_INDEV,		/* __u32 ifindex */
+	NFQA_IFINDEX_OUTDEV,		/* __u32 ifindex */
+	NFQA_IFINDEX_PHYSINDEV,		/* __u32 ifindex */
+	NFQA_IFINDEX_PHYSOUTDEV,	/* __u32 ifindex */
 	NFQA_HWADDR,			/* nfqnl_msg_packet_hw */
 	NFQA_PAYLOAD,			/* opaque data payload */
 
@@ -61,8 +61,8 @@ enum nfqnl_msg_config_cmds {
 };
 
 struct nfqnl_msg_config_cmd {
-	u_int8_t	command;	/* nfqnl_msg_config_cmds */
-	u_int8_t	_pad;
+	__u8	command;	/* nfqnl_msg_config_cmds */
+	__u8	_pad;
 	__be16		pf;		/* AF_xxx for PF_[UN]BIND */
 };
 
@@ -74,7 +74,7 @@ enum nfqnl_config_mode {
 
 struct nfqnl_msg_config_params {
 	__be32		copy_range;
-	u_int8_t	copy_mode;	/* enum nfqnl_config_mode */
+	__u8	copy_mode;	/* enum nfqnl_config_mode */
 } __attribute__ ((packed));
 
 
@@ -82,7 +82,7 @@ enum nfqnl_attr_config {
 	NFQA_CFG_UNSPEC,
 	NFQA_CFG_CMD,			/* nfqnl_msg_config_cmd */
 	NFQA_CFG_PARAMS,		/* nfqnl_msg_config_params */
-	NFQA_CFG_QUEUE_MAXLEN,		/* u_int32_t */
+	NFQA_CFG_QUEUE_MAXLEN,		/* __u32 */
 	__NFQA_CFG_MAX
 };
 #define NFQA_CFG_MAX (__NFQA_CFG_MAX-1)
diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index c7ee8744d26b..33fd9c949d80 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -1,6 +1,8 @@
 #ifndef _X_TABLES_H
 #define _X_TABLES_H
 
+#include <linux/types.h>
+
 #define XT_FUNCTION_MAXNAMELEN 30
 #define XT_TABLE_MAXNAMELEN 32
 
@@ -8,22 +10,22 @@ struct xt_entry_match
 {
 	union {
 		struct {
-			u_int16_t match_size;
+			__u16 match_size;
 
 			/* Used by userspace */
 			char name[XT_FUNCTION_MAXNAMELEN-1];
 
-			u_int8_t revision;
+			__u8 revision;
 		} user;
 		struct {
-			u_int16_t match_size;
+			__u16 match_size;
 
 			/* Used inside the kernel */
 			struct xt_match *match;
 		} kernel;
 
 		/* Total length */
-		u_int16_t match_size;
+		__u16 match_size;
 	} u;
 
 	unsigned char data[0];
@@ -33,22 +35,22 @@ struct xt_entry_target
 {
 	union {
 		struct {
-			u_int16_t target_size;
+			__u16 target_size;
 
 			/* Used by userspace */
 			char name[XT_FUNCTION_MAXNAMELEN-1];
 
-			u_int8_t revision;
+			__u8 revision;
 		} user;
 		struct {
-			u_int16_t target_size;
+			__u16 target_size;
 
 			/* Used inside the kernel */
 			struct xt_target *target;
 		} kernel;
 
 		/* Total length */
-		u_int16_t target_size;
+		__u16 target_size;
 	} u;
 
 	unsigned char data[0];
@@ -74,7 +76,7 @@ struct xt_get_revision
 {
 	char name[XT_FUNCTION_MAXNAMELEN-1];
 
-	u_int8_t revision;
+	__u8 revision;
 };
 
 /* CONTINUE verdict for targets */
@@ -90,10 +92,10 @@ struct xt_get_revision
  */
 struct _xt_align
 {
-	u_int8_t u8;
-	u_int16_t u16;
-	u_int32_t u32;
-	u_int64_t u64;
+	__u8 u8;
+	__u16 u16;
+	__u32 u32;
+	__u64 u64;
 };
 
 #define XT_ALIGN(s) (((s) + (__alignof__(struct _xt_align)-1)) 	\
@@ -109,7 +111,7 @@ struct _xt_align
 
 struct xt_counters
 {
-	u_int64_t pcnt, bcnt;			/* Packet and byte counters */
+	__u64 pcnt, bcnt;			/* Packet and byte counters */
 };
 
 /* The argument to IPT_SO_ADD_COUNTERS. */
diff --git a/include/linux/netfilter/xt_CLASSIFY.h b/include/linux/netfilter/xt_CLASSIFY.h
index 58111355255d..a813bf14dd63 100644
--- a/include/linux/netfilter/xt_CLASSIFY.h
+++ b/include/linux/netfilter/xt_CLASSIFY.h
@@ -1,8 +1,10 @@
 #ifndef _XT_CLASSIFY_H
 #define _XT_CLASSIFY_H
 
+#include <linux/types.h>
+
 struct xt_classify_target_info {
-	u_int32_t priority;
+	__u32 priority;
 };
 
 #endif /*_XT_CLASSIFY_H */
diff --git a/include/linux/netfilter/xt_CONNMARK.h b/include/linux/netfilter/xt_CONNMARK.h
index 4e58ba43c289..7635c8ffdadb 100644
--- a/include/linux/netfilter/xt_CONNMARK.h
+++ b/include/linux/netfilter/xt_CONNMARK.h
@@ -1,6 +1,8 @@
 #ifndef _XT_CONNMARK_H_target
 #define _XT_CONNMARK_H_target
 
+#include <linux/types.h>
+
 /* Copyright (C) 2002,2004 MARA Systems AB <http://www.marasystems.com>
  * by Henrik Nordstrom <hno@marasystems.com>
  *
@@ -19,12 +21,12 @@ enum {
 struct xt_connmark_target_info {
 	unsigned long mark;
 	unsigned long mask;
-	u_int8_t mode;
+	__u8 mode;
 };
 
 struct xt_connmark_tginfo1 {
-	u_int32_t ctmark, ctmask, nfmask;
-	u_int8_t mode;
+	__u32 ctmark, ctmask, nfmask;
+	__u8 mode;
 };
 
 #endif /*_XT_CONNMARK_H_target*/
diff --git a/include/linux/netfilter/xt_CONNSECMARK.h b/include/linux/netfilter/xt_CONNSECMARK.h
index c6bd75469ba2..b973ff80fa1e 100644
--- a/include/linux/netfilter/xt_CONNSECMARK.h
+++ b/include/linux/netfilter/xt_CONNSECMARK.h
@@ -1,13 +1,15 @@
 #ifndef _XT_CONNSECMARK_H_target
 #define _XT_CONNSECMARK_H_target
 
+#include <linux/types.h>
+
 enum {
 	CONNSECMARK_SAVE = 1,
 	CONNSECMARK_RESTORE,
 };
 
 struct xt_connsecmark_target_info {
-	u_int8_t mode;
+	__u8 mode;
 };
 
 #endif /*_XT_CONNSECMARK_H_target */
diff --git a/include/linux/netfilter/xt_DSCP.h b/include/linux/netfilter/xt_DSCP.h
index 14da1968e2c6..648e0b3bed29 100644
--- a/include/linux/netfilter/xt_DSCP.h
+++ b/include/linux/netfilter/xt_DSCP.h
@@ -11,15 +11,16 @@
 #ifndef _XT_DSCP_TARGET_H
 #define _XT_DSCP_TARGET_H
 #include <linux/netfilter/xt_dscp.h>
+#include <linux/types.h>
 
 /* target info */
 struct xt_DSCP_info {
-	u_int8_t dscp;
+	__u8 dscp;
 };
 
 struct xt_tos_target_info {
-	u_int8_t tos_value;
-	u_int8_t tos_mask;
+	__u8 tos_value;
+	__u8 tos_mask;
 };
 
 #endif /* _XT_DSCP_TARGET_H */
diff --git a/include/linux/netfilter/xt_MARK.h b/include/linux/netfilter/xt_MARK.h
index 778b278fd9f2..028304bcc0b1 100644
--- a/include/linux/netfilter/xt_MARK.h
+++ b/include/linux/netfilter/xt_MARK.h
@@ -1,6 +1,8 @@
 #ifndef _XT_MARK_H_target
 #define _XT_MARK_H_target
 
+#include <linux/types.h>
+
 /* Version 0 */
 struct xt_mark_target_info {
 	unsigned long mark;
@@ -15,11 +17,11 @@ enum {
 
 struct xt_mark_target_info_v1 {
 	unsigned long mark;
-	u_int8_t mode;
+	__u8 mode;
 };
 
 struct xt_mark_tginfo2 {
-	u_int32_t mark, mask;
+	__u32 mark, mask;
 };
 
 #endif /*_XT_MARK_H_target */
diff --git a/include/linux/netfilter/xt_NFLOG.h b/include/linux/netfilter/xt_NFLOG.h
index cdcd0ed58f7a..eaac7b5226e9 100644
--- a/include/linux/netfilter/xt_NFLOG.h
+++ b/include/linux/netfilter/xt_NFLOG.h
@@ -1,17 +1,19 @@
 #ifndef _XT_NFLOG_TARGET
 #define _XT_NFLOG_TARGET
 
+#include <linux/types.h>
+
 #define XT_NFLOG_DEFAULT_GROUP		0x1
 #define XT_NFLOG_DEFAULT_THRESHOLD	1
 
 #define XT_NFLOG_MASK			0x0
 
 struct xt_nflog_info {
-	u_int32_t	len;
-	u_int16_t	group;
-	u_int16_t	threshold;
-	u_int16_t	flags;
-	u_int16_t	pad;
+	__u32	len;
+	__u16	group;
+	__u16	threshold;
+	__u16	flags;
+	__u16	pad;
 	char		prefix[64];
 };
 
diff --git a/include/linux/netfilter/xt_NFQUEUE.h b/include/linux/netfilter/xt_NFQUEUE.h
index 9a9af79f74d2..982a89f78272 100644
--- a/include/linux/netfilter/xt_NFQUEUE.h
+++ b/include/linux/netfilter/xt_NFQUEUE.h
@@ -8,9 +8,11 @@
 #ifndef _XT_NFQ_TARGET_H
 #define _XT_NFQ_TARGET_H
 
+#include <linux/types.h>
+
 /* target info */
 struct xt_NFQ_info {
-	u_int16_t queuenum;
+	__u16 queuenum;
 };
 
 #endif /* _XT_NFQ_TARGET_H */
diff --git a/include/linux/netfilter/xt_RATEEST.h b/include/linux/netfilter/xt_RATEEST.h
index f79e3133cbea..6605e20ad8cf 100644
--- a/include/linux/netfilter/xt_RATEEST.h
+++ b/include/linux/netfilter/xt_RATEEST.h
@@ -1,10 +1,12 @@
 #ifndef _XT_RATEEST_TARGET_H
 #define _XT_RATEEST_TARGET_H
 
+#include <linux/types.h>
+
 struct xt_rateest_target_info {
 	char			name[IFNAMSIZ];
-	int8_t			interval;
-	u_int8_t		ewma_log;
+	__s8			interval;
+	__u8		ewma_log;
 
 	/* Used internally by the kernel */
 	struct xt_rateest	*est __attribute__((aligned(8)));
diff --git a/include/linux/netfilter/xt_SECMARK.h b/include/linux/netfilter/xt_SECMARK.h
index c53fbffa997d..6fcd3448b186 100644
--- a/include/linux/netfilter/xt_SECMARK.h
+++ b/include/linux/netfilter/xt_SECMARK.h
@@ -1,6 +1,8 @@
 #ifndef _XT_SECMARK_H_target
 #define _XT_SECMARK_H_target
 
+#include <linux/types.h>
+
 /*
  * This is intended for use by various security subsystems (but not
  * at the same time).
@@ -12,12 +14,12 @@
 #define SECMARK_SELCTX_MAX	256
 
 struct xt_secmark_target_selinux_info {
-	u_int32_t selsid;
+	__u32 selsid;
 	char selctx[SECMARK_SELCTX_MAX];
 };
 
 struct xt_secmark_target_info {
-	u_int8_t mode;
+	__u8 mode;
 	union {
 		struct xt_secmark_target_selinux_info sel;
 	} u;
diff --git a/include/linux/netfilter/xt_TCPMSS.h b/include/linux/netfilter/xt_TCPMSS.h
index 53a292cd47f3..9a6960afc134 100644
--- a/include/linux/netfilter/xt_TCPMSS.h
+++ b/include/linux/netfilter/xt_TCPMSS.h
@@ -1,8 +1,10 @@
 #ifndef _XT_TCPMSS_H
 #define _XT_TCPMSS_H
 
+#include <linux/types.h>
+
 struct xt_tcpmss_info {
-	u_int16_t mss;
+	__u16 mss;
 };
 
 #define XT_TCPMSS_CLAMP_PMTU 0xffff
diff --git a/include/linux/netfilter/xt_connbytes.h b/include/linux/netfilter/xt_connbytes.h
index c022c989754d..52bd6153b996 100644
--- a/include/linux/netfilter/xt_connbytes.h
+++ b/include/linux/netfilter/xt_connbytes.h
@@ -1,6 +1,8 @@
 #ifndef _XT_CONNBYTES_H
 #define _XT_CONNBYTES_H
 
+#include <linux/types.h>
+
 enum xt_connbytes_what {
 	XT_CONNBYTES_PKTS,
 	XT_CONNBYTES_BYTES,
@@ -19,7 +21,7 @@ struct xt_connbytes_info
 		aligned_u64 from;	/* count to be matched */
 		aligned_u64 to;		/* count to be matched */
 	} count;
-	u_int8_t what;		/* ipt_connbytes_what */
-	u_int8_t direction;	/* ipt_connbytes_direction */
+	__u8 what;		/* ipt_connbytes_what */
+	__u8 direction;	/* ipt_connbytes_direction */
 };
 #endif
diff --git a/include/linux/netfilter/xt_connmark.h b/include/linux/netfilter/xt_connmark.h
index 359ef86918dc..571e266d004c 100644
--- a/include/linux/netfilter/xt_connmark.h
+++ b/include/linux/netfilter/xt_connmark.h
@@ -1,6 +1,8 @@
 #ifndef _XT_CONNMARK_H
 #define _XT_CONNMARK_H
 
+#include <linux/types.h>
+
 /* Copyright (C) 2002,2004 MARA Systems AB <http://www.marasystems.com>
  * by Henrik Nordstrom <hno@marasystems.com>
  *
@@ -12,12 +14,12 @@
 
 struct xt_connmark_info {
 	unsigned long mark, mask;
-	u_int8_t invert;
+	__u8 invert;
 };
 
 struct xt_connmark_mtinfo1 {
-	u_int32_t mark, mask;
-	u_int8_t invert;
+	__u32 mark, mask;
+	__u8 invert;
 };
 
 #endif /*_XT_CONNMARK_H*/
diff --git a/include/linux/netfilter/xt_conntrack.h b/include/linux/netfilter/xt_conntrack.h
index 8f5345275393..3430c7751948 100644
--- a/include/linux/netfilter/xt_conntrack.h
+++ b/include/linux/netfilter/xt_conntrack.h
@@ -63,9 +63,9 @@ struct xt_conntrack_info
 	unsigned long expires_min, expires_max;
 
 	/* Flags word */
-	u_int8_t flags;
+	__u8 flags;
 	/* Inverse flags */
-	u_int8_t invflags;
+	__u8 invflags;
 };
 
 struct xt_conntrack_mtinfo1 {
@@ -73,12 +73,12 @@ struct xt_conntrack_mtinfo1 {
 	union nf_inet_addr origdst_addr, origdst_mask;
 	union nf_inet_addr replsrc_addr, replsrc_mask;
 	union nf_inet_addr repldst_addr, repldst_mask;
-	u_int32_t expires_min, expires_max;
-	u_int16_t l4proto;
+	__u32 expires_min, expires_max;
+	__u16 l4proto;
 	__be16 origsrc_port, origdst_port;
 	__be16 replsrc_port, repldst_port;
-	u_int16_t match_flags, invert_flags;
-	u_int8_t state_mask, status_mask;
+	__u16 match_flags, invert_flags;
+	__u8 state_mask, status_mask;
 };
 
 #endif /*_XT_CONNTRACK_H*/
diff --git a/include/linux/netfilter/xt_dccp.h b/include/linux/netfilter/xt_dccp.h
index e0221b9d32cb..a579e1b6f040 100644
--- a/include/linux/netfilter/xt_dccp.h
+++ b/include/linux/netfilter/xt_dccp.h
@@ -1,6 +1,8 @@
 #ifndef _XT_DCCP_H_
 #define _XT_DCCP_H_
 
+#include <linux/types.h>
+
 #define XT_DCCP_SRC_PORTS	        0x01
 #define XT_DCCP_DEST_PORTS	        0x02
 #define XT_DCCP_TYPE			0x04
@@ -9,14 +11,14 @@
 #define XT_DCCP_VALID_FLAGS		0x0f
 
 struct xt_dccp_info {
-	u_int16_t dpts[2];  /* Min, Max */
-	u_int16_t spts[2];  /* Min, Max */
+	__u16 dpts[2];  /* Min, Max */
+	__u16 spts[2];  /* Min, Max */
 
-	u_int16_t flags;
-	u_int16_t invflags;
+	__u16 flags;
+	__u16 invflags;
 
-	u_int16_t typemask;
-	u_int8_t option;
+	__u16 typemask;
+	__u8 option;
 };
 
 #endif /* _XT_DCCP_H_ */
diff --git a/include/linux/netfilter/xt_dscp.h b/include/linux/netfilter/xt_dscp.h
index f49bc1a648dc..15f8932ad5ce 100644
--- a/include/linux/netfilter/xt_dscp.h
+++ b/include/linux/netfilter/xt_dscp.h
@@ -10,20 +10,22 @@
 #ifndef _XT_DSCP_H
 #define _XT_DSCP_H
 
+#include <linux/types.h>
+
 #define XT_DSCP_MASK	0xfc	/* 11111100 */
 #define XT_DSCP_SHIFT	2
 #define XT_DSCP_MAX	0x3f	/* 00111111 */
 
 /* match info */
 struct xt_dscp_info {
-	u_int8_t dscp;
-	u_int8_t invert;
+	__u8 dscp;
+	__u8 invert;
 };
 
 struct xt_tos_match_info {
-	u_int8_t tos_mask;
-	u_int8_t tos_value;
-	u_int8_t invert;
+	__u8 tos_mask;
+	__u8 tos_value;
+	__u8 invert;
 };
 
 #endif /* _XT_DSCP_H */
diff --git a/include/linux/netfilter/xt_esp.h b/include/linux/netfilter/xt_esp.h
index 9380fb1c27da..ef6fa4747d0a 100644
--- a/include/linux/netfilter/xt_esp.h
+++ b/include/linux/netfilter/xt_esp.h
@@ -1,10 +1,12 @@
 #ifndef _XT_ESP_H
 #define _XT_ESP_H
 
+#include <linux/types.h>
+
 struct xt_esp
 {
-	u_int32_t spis[2];	/* Security Parameter Index */
-	u_int8_t  invflags;	/* Inverse flags */
+	__u32 spis[2];	/* Security Parameter Index */
+	__u8  invflags;	/* Inverse flags */
 };
 
 /* Values for "invflags" field in struct xt_esp. */
diff --git a/include/linux/netfilter/xt_hashlimit.h b/include/linux/netfilter/xt_hashlimit.h
index 51b18d83b477..b1925b5925e9 100644
--- a/include/linux/netfilter/xt_hashlimit.h
+++ b/include/linux/netfilter/xt_hashlimit.h
@@ -1,6 +1,8 @@
 #ifndef _XT_HASHLIMIT_H
 #define _XT_HASHLIMIT_H
 
+#include <linux/types.h>
+
 /* timings are in milliseconds. */
 #define XT_HASHLIMIT_SCALE 10000
 /* 1/10,000 sec period => max of 10,000/sec.  Min rate is then 429490
@@ -18,15 +20,15 @@ enum {
 };
 
 struct hashlimit_cfg {
-	u_int32_t mode;	  /* bitmask of XT_HASHLIMIT_HASH_* */
-	u_int32_t avg;    /* Average secs between packets * scale */
-	u_int32_t burst;  /* Period multiplier for upper limit. */
+	__u32 mode;	  /* bitmask of XT_HASHLIMIT_HASH_* */
+	__u32 avg;    /* Average secs between packets * scale */
+	__u32 burst;  /* Period multiplier for upper limit. */
 
 	/* user specified */
-	u_int32_t size;		/* how many buckets */
-	u_int32_t max;		/* max number of entries */
-	u_int32_t gc_interval;	/* gc interval */
-	u_int32_t expire;	/* when do entries expire? */
+	__u32 size;		/* how many buckets */
+	__u32 max;		/* max number of entries */
+	__u32 gc_interval;	/* gc interval */
+	__u32 expire;	/* when do entries expire? */
 };
 
 struct xt_hashlimit_info {
@@ -42,17 +44,17 @@ struct xt_hashlimit_info {
 };
 
 struct hashlimit_cfg1 {
-	u_int32_t mode;	  /* bitmask of XT_HASHLIMIT_HASH_* */
-	u_int32_t avg;    /* Average secs between packets * scale */
-	u_int32_t burst;  /* Period multiplier for upper limit. */
+	__u32 mode;	  /* bitmask of XT_HASHLIMIT_HASH_* */
+	__u32 avg;    /* Average secs between packets * scale */
+	__u32 burst;  /* Period multiplier for upper limit. */
 
 	/* user specified */
-	u_int32_t size;		/* how many buckets */
-	u_int32_t max;		/* max number of entries */
-	u_int32_t gc_interval;	/* gc interval */
-	u_int32_t expire;	/* when do entries expire? */
+	__u32 size;		/* how many buckets */
+	__u32 max;		/* max number of entries */
+	__u32 gc_interval;	/* gc interval */
+	__u32 expire;	/* when do entries expire? */
 
-	u_int8_t srcmask, dstmask;
+	__u8 srcmask, dstmask;
 };
 
 struct xt_hashlimit_mtinfo1 {
diff --git a/include/linux/netfilter/xt_iprange.h b/include/linux/netfilter/xt_iprange.h
index a4299c7d3680..c1f21a779a45 100644
--- a/include/linux/netfilter/xt_iprange.h
+++ b/include/linux/netfilter/xt_iprange.h
@@ -1,6 +1,8 @@
 #ifndef _LINUX_NETFILTER_XT_IPRANGE_H
 #define _LINUX_NETFILTER_XT_IPRANGE_H 1
 
+#include <linux/types.h>
+
 enum {
 	IPRANGE_SRC     = 1 << 0,	/* match source IP address */
 	IPRANGE_DST     = 1 << 1,	/* match destination IP address */
@@ -11,7 +13,7 @@ enum {
 struct xt_iprange_mtinfo {
 	union nf_inet_addr src_min, src_max;
 	union nf_inet_addr dst_min, dst_max;
-	u_int8_t flags;
+	__u8 flags;
 };
 
 #endif /* _LINUX_NETFILTER_XT_IPRANGE_H */
diff --git a/include/linux/netfilter/xt_length.h b/include/linux/netfilter/xt_length.h
index 7c2b439f73fe..b82ed7c4b1e0 100644
--- a/include/linux/netfilter/xt_length.h
+++ b/include/linux/netfilter/xt_length.h
@@ -1,9 +1,11 @@
 #ifndef _XT_LENGTH_H
 #define _XT_LENGTH_H
 
+#include <linux/types.h>
+
 struct xt_length_info {
-    u_int16_t	min, max;
-    u_int8_t	invert;
+    __u16	min, max;
+    __u8	invert;
 };
 
 #endif /*_XT_LENGTH_H*/
diff --git a/include/linux/netfilter/xt_limit.h b/include/linux/netfilter/xt_limit.h
index b3ce65375ecb..190e98b1f7c9 100644
--- a/include/linux/netfilter/xt_limit.h
+++ b/include/linux/netfilter/xt_limit.h
@@ -1,19 +1,21 @@
 #ifndef _XT_RATE_H
 #define _XT_RATE_H
 
+#include <linux/types.h>
+
 /* timings are in milliseconds. */
 #define XT_LIMIT_SCALE 10000
 
 /* 1/10,000 sec period => max of 10,000/sec.  Min rate is then 429490
    seconds, or one every 59 hours. */
 struct xt_rateinfo {
-	u_int32_t avg;    /* Average secs between packets * scale */
-	u_int32_t burst;  /* Period multiplier for upper limit. */
+	__u32 avg;    /* Average secs between packets * scale */
+	__u32 burst;  /* Period multiplier for upper limit. */
 
 	/* Used internally by the kernel */
 	unsigned long prev;
-	u_int32_t credit;
-	u_int32_t credit_cap, cost;
+	__u32 credit;
+	__u32 credit_cap, cost;
 
 	/* Ugly, ugly fucker. */
 	struct xt_rateinfo *master;
diff --git a/include/linux/netfilter/xt_mark.h b/include/linux/netfilter/xt_mark.h
index fae74bc3f34e..6fa460a3cc29 100644
--- a/include/linux/netfilter/xt_mark.h
+++ b/include/linux/netfilter/xt_mark.h
@@ -1,14 +1,16 @@
 #ifndef _XT_MARK_H
 #define _XT_MARK_H
 
+#include <linux/types.h>
+
 struct xt_mark_info {
     unsigned long mark, mask;
-    u_int8_t invert;
+    __u8 invert;
 };
 
 struct xt_mark_mtinfo1 {
-	u_int32_t mark, mask;
-	u_int8_t invert;
+	__u32 mark, mask;
+	__u8 invert;
 };
 
 #endif /*_XT_MARK_H*/
diff --git a/include/linux/netfilter/xt_multiport.h b/include/linux/netfilter/xt_multiport.h
index d49ee4183710..185db499fcbc 100644
--- a/include/linux/netfilter/xt_multiport.h
+++ b/include/linux/netfilter/xt_multiport.h
@@ -1,6 +1,8 @@
 #ifndef _XT_MULTIPORT_H
 #define _XT_MULTIPORT_H
 
+#include <linux/types.h>
+
 enum xt_multiport_flags
 {
 	XT_MULTIPORT_SOURCE,
@@ -13,18 +15,18 @@ enum xt_multiport_flags
 /* Must fit inside union xt_matchinfo: 16 bytes */
 struct xt_multiport
 {
-	u_int8_t flags;				/* Type of comparison */
-	u_int8_t count;				/* Number of ports */
-	u_int16_t ports[XT_MULTI_PORTS];	/* Ports */
+	__u8 flags;				/* Type of comparison */
+	__u8 count;				/* Number of ports */
+	__u16 ports[XT_MULTI_PORTS];	/* Ports */
 };
 
 struct xt_multiport_v1
 {
-	u_int8_t flags;				/* Type of comparison */
-	u_int8_t count;				/* Number of ports */
-	u_int16_t ports[XT_MULTI_PORTS];	/* Ports */
-	u_int8_t pflags[XT_MULTI_PORTS];	/* Port flags */
-	u_int8_t invert;			/* Invert flag */
+	__u8 flags;				/* Type of comparison */
+	__u8 count;				/* Number of ports */
+	__u16 ports[XT_MULTI_PORTS];	/* Ports */
+	__u8 pflags[XT_MULTI_PORTS];	/* Port flags */
+	__u8 invert;			/* Invert flag */
 };
 
 #endif /*_XT_MULTIPORT_H*/
diff --git a/include/linux/netfilter/xt_owner.h b/include/linux/netfilter/xt_owner.h
index c84e52cfe415..2081761714b5 100644
--- a/include/linux/netfilter/xt_owner.h
+++ b/include/linux/netfilter/xt_owner.h
@@ -1,6 +1,8 @@
 #ifndef _XT_OWNER_MATCH_H
 #define _XT_OWNER_MATCH_H
 
+#include <linux/types.h>
+
 enum {
 	XT_OWNER_UID    = 1 << 0,
 	XT_OWNER_GID    = 1 << 1,
@@ -8,9 +10,9 @@ enum {
 };
 
 struct xt_owner_match_info {
-	u_int32_t uid_min, uid_max;
-	u_int32_t gid_min, gid_max;
-	u_int8_t match, invert;
+	__u32 uid_min, uid_max;
+	__u32 gid_min, gid_max;
+	__u8 match, invert;
 };
 
 #endif /* _XT_OWNER_MATCH_H */
diff --git a/include/linux/netfilter/xt_physdev.h b/include/linux/netfilter/xt_physdev.h
index 25a7a1815b5b..8555e399886d 100644
--- a/include/linux/netfilter/xt_physdev.h
+++ b/include/linux/netfilter/xt_physdev.h
@@ -1,6 +1,8 @@
 #ifndef _XT_PHYSDEV_H
 #define _XT_PHYSDEV_H
 
+#include <linux/types.h>
+
 #ifdef __KERNEL__
 #include <linux/if.h>
 #endif
@@ -17,8 +19,8 @@ struct xt_physdev_info {
 	char in_mask[IFNAMSIZ];
 	char physoutdev[IFNAMSIZ];
 	char out_mask[IFNAMSIZ];
-	u_int8_t invert;
-	u_int8_t bitmask;
+	__u8 invert;
+	__u8 bitmask;
 };
 
 #endif /*_XT_PHYSDEV_H*/
diff --git a/include/linux/netfilter/xt_policy.h b/include/linux/netfilter/xt_policy.h
index 053d8cc65464..7bb64e7c853d 100644
--- a/include/linux/netfilter/xt_policy.h
+++ b/include/linux/netfilter/xt_policy.h
@@ -1,6 +1,8 @@
 #ifndef _XT_POLICY_H
 #define _XT_POLICY_H
 
+#include <linux/types.h>
+
 #define XT_POLICY_MAX_ELEM	4
 
 enum xt_policy_flags
@@ -19,7 +21,7 @@ enum xt_policy_modes
 
 struct xt_policy_spec
 {
-	u_int8_t	saddr:1,
+	__u8	saddr:1,
 			daddr:1,
 			proto:1,
 			mode:1,
@@ -55,9 +57,9 @@ struct xt_policy_elem
 #endif
 	};
 	__be32			spi;
-	u_int32_t		reqid;
-	u_int8_t		proto;
-	u_int8_t		mode;
+	__u32		reqid;
+	__u8		proto;
+	__u8		mode;
 
 	struct xt_policy_spec	match;
 	struct xt_policy_spec	invert;
@@ -66,8 +68,8 @@ struct xt_policy_elem
 struct xt_policy_info
 {
 	struct xt_policy_elem pol[XT_POLICY_MAX_ELEM];
-	u_int16_t flags;
-	u_int16_t len;
+	__u16 flags;
+	__u16 len;
 };
 
 #endif /* _XT_POLICY_H */
diff --git a/include/linux/netfilter/xt_rateest.h b/include/linux/netfilter/xt_rateest.h
index 2010cb74250f..d40a6196842a 100644
--- a/include/linux/netfilter/xt_rateest.h
+++ b/include/linux/netfilter/xt_rateest.h
@@ -1,6 +1,8 @@
 #ifndef _XT_RATEEST_MATCH_H
 #define _XT_RATEEST_MATCH_H
 
+#include <linux/types.h>
+
 enum xt_rateest_match_flags {
 	XT_RATEEST_MATCH_INVERT	= 1<<0,
 	XT_RATEEST_MATCH_ABS	= 1<<1,
@@ -20,12 +22,12 @@ enum xt_rateest_match_mode {
 struct xt_rateest_match_info {
 	char			name1[IFNAMSIZ];
 	char			name2[IFNAMSIZ];
-	u_int16_t		flags;
-	u_int16_t		mode;
-	u_int32_t		bps1;
-	u_int32_t		pps1;
-	u_int32_t		bps2;
-	u_int32_t		pps2;
+	__u16		flags;
+	__u16		mode;
+	__u32		bps1;
+	__u32		pps1;
+	__u32		bps2;
+	__u32		pps2;
 
 	/* Used internally by the kernel */
 	struct xt_rateest	*est1 __attribute__((aligned(8)));
diff --git a/include/linux/netfilter/xt_realm.h b/include/linux/netfilter/xt_realm.h
index 220e87245716..d4a82ee56a02 100644
--- a/include/linux/netfilter/xt_realm.h
+++ b/include/linux/netfilter/xt_realm.h
@@ -1,10 +1,12 @@
 #ifndef _XT_REALM_H
 #define _XT_REALM_H
 
+#include <linux/types.h>
+
 struct xt_realm_info {
-	u_int32_t id;
-	u_int32_t mask;
-	u_int8_t invert;
+	__u32 id;
+	__u32 mask;
+	__u8 invert;
 };
 
 #endif /* _XT_REALM_H */
diff --git a/include/linux/netfilter/xt_recent.h b/include/linux/netfilter/xt_recent.h
index 5cfeb81c6794..d2c276609925 100644
--- a/include/linux/netfilter/xt_recent.h
+++ b/include/linux/netfilter/xt_recent.h
@@ -1,6 +1,8 @@
 #ifndef _LINUX_NETFILTER_XT_RECENT_H
 #define _LINUX_NETFILTER_XT_RECENT_H 1
 
+#include <linux/types.h>
+
 enum {
 	XT_RECENT_CHECK    = 1 << 0,
 	XT_RECENT_SET      = 1 << 1,
@@ -15,12 +17,12 @@ enum {
 };
 
 struct xt_recent_mtinfo {
-	u_int32_t seconds;
-	u_int32_t hit_count;
-	u_int8_t check_set;
-	u_int8_t invert;
+	__u32 seconds;
+	__u32 hit_count;
+	__u8 check_set;
+	__u8 invert;
 	char name[XT_RECENT_NAME_LEN];
-	u_int8_t side;
+	__u8 side;
 };
 
 #endif /* _LINUX_NETFILTER_XT_RECENT_H */
diff --git a/include/linux/netfilter/xt_sctp.h b/include/linux/netfilter/xt_sctp.h
index 32000ba6ecef..29287be696a2 100644
--- a/include/linux/netfilter/xt_sctp.h
+++ b/include/linux/netfilter/xt_sctp.h
@@ -1,6 +1,8 @@
 #ifndef _XT_SCTP_H_
 #define _XT_SCTP_H_
 
+#include <linux/types.h>
+
 #define XT_SCTP_SRC_PORTS	        0x01
 #define XT_SCTP_DEST_PORTS	        0x02
 #define XT_SCTP_CHUNK_TYPES		0x04
@@ -8,49 +10,49 @@
 #define XT_SCTP_VALID_FLAGS		0x07
 
 struct xt_sctp_flag_info {
-	u_int8_t chunktype;
-	u_int8_t flag;
-	u_int8_t flag_mask;
+	__u8 chunktype;
+	__u8 flag;
+	__u8 flag_mask;
 };
 
 #define XT_NUM_SCTP_FLAGS	4
 
 struct xt_sctp_info {
-	u_int16_t dpts[2];  /* Min, Max */
-	u_int16_t spts[2];  /* Min, Max */
+	__u16 dpts[2];  /* Min, Max */
+	__u16 spts[2];  /* Min, Max */
 
-	u_int32_t chunkmap[256 / sizeof (u_int32_t)];  /* Bit mask of chunks to be matched according to RFC 2960 */
+	__u32 chunkmap[256 / sizeof (__u32)];  /* Bit mask of chunks to be matched according to RFC 2960 */
 
 #define SCTP_CHUNK_MATCH_ANY   0x01  /* Match if any of the chunk types are present */
 #define SCTP_CHUNK_MATCH_ALL   0x02  /* Match if all of the chunk types are present */
 #define SCTP_CHUNK_MATCH_ONLY  0x04  /* Match if these are the only chunk types present */
 
-	u_int32_t chunk_match_type;
+	__u32 chunk_match_type;
 	struct xt_sctp_flag_info flag_info[XT_NUM_SCTP_FLAGS];
 	int flag_count;
 
-	u_int32_t flags;
-	u_int32_t invflags;
+	__u32 flags;
+	__u32 invflags;
 };
 
 #define bytes(type) (sizeof(type) * 8)
 
 #define SCTP_CHUNKMAP_SET(chunkmap, type) 		\
 	do { 						\
-		(chunkmap)[type / bytes(u_int32_t)] |= 	\
-			1 << (type % bytes(u_int32_t));	\
+		(chunkmap)[type / bytes(__u32)] |= 	\
+			1 << (type % bytes(__u32));	\
 	} while (0)
 
 #define SCTP_CHUNKMAP_CLEAR(chunkmap, type)		 	\
 	do {							\
-		(chunkmap)[type / bytes(u_int32_t)] &= 		\
-			~(1 << (type % bytes(u_int32_t)));	\
+		(chunkmap)[type / bytes(__u32)] &= 		\
+			~(1 << (type % bytes(__u32)));	\
 	} while (0)
 
 #define SCTP_CHUNKMAP_IS_SET(chunkmap, type) 			\
 ({								\
-	((chunkmap)[type / bytes (u_int32_t)] & 		\
-		(1 << (type % bytes (u_int32_t)))) ? 1: 0;	\
+	((chunkmap)[type / bytes (__u32)] & 		\
+		(1 << (type % bytes (__u32)))) ? 1: 0;	\
 })
 
 #define SCTP_CHUNKMAP_RESET(chunkmap) \
@@ -65,7 +67,7 @@ struct xt_sctp_info {
 #define SCTP_CHUNKMAP_IS_CLEAR(chunkmap) \
 	__sctp_chunkmap_is_clear((chunkmap), ARRAY_SIZE(chunkmap))
 static inline bool
-__sctp_chunkmap_is_clear(const u_int32_t *chunkmap, unsigned int n)
+__sctp_chunkmap_is_clear(const __u32 *chunkmap, unsigned int n)
 {
 	unsigned int i;
 	for (i = 0; i < n; ++i)
@@ -77,7 +79,7 @@ __sctp_chunkmap_is_clear(const u_int32_t *chunkmap, unsigned int n)
 #define SCTP_CHUNKMAP_IS_ALL_SET(chunkmap) \
 	__sctp_chunkmap_is_all_set((chunkmap), ARRAY_SIZE(chunkmap))
 static inline bool
-__sctp_chunkmap_is_all_set(const u_int32_t *chunkmap, unsigned int n)
+__sctp_chunkmap_is_all_set(const __u32 *chunkmap, unsigned int n)
 {
 	unsigned int i;
 	for (i = 0; i < n; ++i)
diff --git a/include/linux/netfilter/xt_statistic.h b/include/linux/netfilter/xt_statistic.h
index 3d38bc975048..095f3c66f456 100644
--- a/include/linux/netfilter/xt_statistic.h
+++ b/include/linux/netfilter/xt_statistic.h
@@ -1,6 +1,8 @@
 #ifndef _XT_STATISTIC_H
 #define _XT_STATISTIC_H
 
+#include <linux/types.h>
+
 enum xt_statistic_mode {
 	XT_STATISTIC_MODE_RANDOM,
 	XT_STATISTIC_MODE_NTH,
@@ -14,17 +16,17 @@ enum xt_statistic_flags {
 #define XT_STATISTIC_MASK		0x1
 
 struct xt_statistic_info {
-	u_int16_t			mode;
-	u_int16_t			flags;
+	__u16			mode;
+	__u16			flags;
 	union {
 		struct {
-			u_int32_t	probability;
+			__u32	probability;
 		} random;
 		struct {
-			u_int32_t	every;
-			u_int32_t	packet;
+			__u32	every;
+			__u32	packet;
 			/* Used internally by the kernel */
-			u_int32_t	count;
+			__u32	count;
 		} nth;
 	} u;
 	struct xt_statistic_info	*master __attribute__((aligned(8)));
diff --git a/include/linux/netfilter/xt_string.h b/include/linux/netfilter/xt_string.h
index 8a6ba7bbef9f..ecbb95fc89ed 100644
--- a/include/linux/netfilter/xt_string.h
+++ b/include/linux/netfilter/xt_string.h
@@ -1,6 +1,8 @@
 #ifndef _XT_STRING_H
 #define _XT_STRING_H
 
+#include <linux/types.h>
+
 #define XT_STRING_MAX_PATTERN_SIZE 128
 #define XT_STRING_MAX_ALGO_NAME_SIZE 16
 
@@ -11,18 +13,18 @@ enum {
 
 struct xt_string_info
 {
-	u_int16_t from_offset;
-	u_int16_t to_offset;
+	__u16 from_offset;
+	__u16 to_offset;
 	char	  algo[XT_STRING_MAX_ALGO_NAME_SIZE];
 	char 	  pattern[XT_STRING_MAX_PATTERN_SIZE];
-	u_int8_t  patlen;
+	__u8  patlen;
 	union {
 		struct {
-			u_int8_t  invert;
+			__u8  invert;
 		} v0;
 
 		struct {
-			u_int8_t  flags;
+			__u8  flags;
 		} v1;
 	} u;
 
diff --git a/include/linux/netfilter/xt_tcpmss.h b/include/linux/netfilter/xt_tcpmss.h
index e03274c4c790..fbac56b9e667 100644
--- a/include/linux/netfilter/xt_tcpmss.h
+++ b/include/linux/netfilter/xt_tcpmss.h
@@ -1,9 +1,11 @@
 #ifndef _XT_TCPMSS_MATCH_H
 #define _XT_TCPMSS_MATCH_H
 
+#include <linux/types.h>
+
 struct xt_tcpmss_match_info {
-    u_int16_t mss_min, mss_max;
-    u_int8_t invert;
+    __u16 mss_min, mss_max;
+    __u8 invert;
 };
 
 #endif /*_XT_TCPMSS_MATCH_H*/
diff --git a/include/linux/netfilter/xt_tcpudp.h b/include/linux/netfilter/xt_tcpudp.h
index 78bc65f11adf..a490a0bc1d29 100644
--- a/include/linux/netfilter/xt_tcpudp.h
+++ b/include/linux/netfilter/xt_tcpudp.h
@@ -1,15 +1,17 @@
 #ifndef _XT_TCPUDP_H
 #define _XT_TCPUDP_H
 
+#include <linux/types.h>
+
 /* TCP matching stuff */
 struct xt_tcp
 {
-	u_int16_t spts[2];			/* Source port range. */
-	u_int16_t dpts[2];			/* Destination port range. */
-	u_int8_t option;			/* TCP Option iff non-zero*/
-	u_int8_t flg_mask;			/* TCP flags mask byte */
-	u_int8_t flg_cmp;			/* TCP flags compare byte */
-	u_int8_t invflags;			/* Inverse flags */
+	__u16 spts[2];			/* Source port range. */
+	__u16 dpts[2];			/* Destination port range. */
+	__u8 option;			/* TCP Option iff non-zero*/
+	__u8 flg_mask;			/* TCP flags mask byte */
+	__u8 flg_cmp;			/* TCP flags compare byte */
+	__u8 invflags;			/* Inverse flags */
 };
 
 /* Values for "inv" field in struct ipt_tcp. */
@@ -22,9 +24,9 @@ struct xt_tcp
 /* UDP matching stuff */
 struct xt_udp
 {
-	u_int16_t spts[2];			/* Source port range. */
-	u_int16_t dpts[2];			/* Destination port range. */
-	u_int8_t invflags;			/* Inverse flags */
+	__u16 spts[2];			/* Source port range. */
+	__u16 dpts[2];			/* Destination port range. */
+	__u8 invflags;			/* Inverse flags */
 };
 
 /* Values for "invflags" field in struct ipt_udp. */
-- 
cgit v1.2.3


From 3a471cbc081b6bf2b58a48db13d734ecd3b0d437 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 26 Feb 2009 00:51:45 +0100
Subject: remove __KERNEL_STRICT_NAMES

With the last used of non-strict names gone from the
exported header files, we can remove the old libc5
compatibility cruft from our headers and only export
strict types.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-generic/statfs.h |  5 +++--
 include/linux/types.h        | 13 ++-----------
 2 files changed, 5 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/asm-generic/statfs.h b/include/asm-generic/statfs.h
index 6129d6802149..3b4fb3e52f0d 100644
--- a/include/asm-generic/statfs.h
+++ b/include/asm-generic/statfs.h
@@ -1,8 +1,9 @@
 #ifndef _GENERIC_STATFS_H
 #define _GENERIC_STATFS_H
 
-#ifndef __KERNEL_STRICT_NAMES
-# include <linux/types.h>
+#include <linux/types.h>
+
+#ifdef __KERNEL__
 typedef __kernel_fsid_t	fsid_t;
 #endif
 
diff --git a/include/linux/types.h b/include/linux/types.h
index fca82ed55f49..5abe354020f9 100644
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -13,7 +13,7 @@
 
 #include <linux/posix_types.h>
 
-#ifndef __KERNEL_STRICT_NAMES
+#ifdef __KERNEL__
 
 typedef __u32 __kernel_dev_t;
 
@@ -31,7 +31,6 @@ typedef __kernel_timer_t	timer_t;
 typedef __kernel_clockid_t	clockid_t;
 typedef __kernel_mqd_t		mqd_t;
 
-#ifdef __KERNEL__
 typedef _Bool			bool;
 
 typedef __kernel_uid32_t	uid_t;
@@ -47,14 +46,6 @@ typedef __kernel_old_uid_t	old_uid_t;
 typedef __kernel_old_gid_t	old_gid_t;
 #endif /* CONFIG_UID16 */
 
-/* libc5 includes this file to define uid_t, thus uid_t can never change
- * when it is included by non-kernel code
- */
-#else
-typedef __kernel_uid_t		uid_t;
-typedef __kernel_gid_t		gid_t;
-#endif /* __KERNEL__ */
-
 #if defined(__GNUC__)
 typedef __kernel_loff_t		loff_t;
 #endif
@@ -156,7 +147,7 @@ typedef unsigned long blkcnt_t;
 #define pgoff_t unsigned long
 #endif
 
-#endif /* __KERNEL_STRICT_NAMES */
+#endif /* __KERNEL__ */
 
 /*
  * Below are truly Linux-specific types that should never collide with
-- 
cgit v1.2.3


From 8cd2c29dd5f04d91dac6ea7f8b9df4ff1b4380ee Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@linux.intel.com>
Date: Wed, 25 Feb 2009 15:22:19 -0800
Subject: compiler-gcc4: conditionalize #error on __KERNEL__

Impact: Fix for exported headers

We only want to error out on specific gcc versions if we are actually
building the kernel, so conditionalize the #if...#error on __KERNEL__.

Based on a patchset by Arnd Bergmann <arnd@arndb.de>.

Cc: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/compiler-gcc4.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h
index 09992718f9e8..450fa597c94d 100644
--- a/include/linux/compiler-gcc4.h
+++ b/include/linux/compiler-gcc4.h
@@ -3,8 +3,10 @@
 #endif
 
 /* GCC 4.1.[01] miscompiles __weak */
-#if __GNUC_MINOR__ == 1 && __GNUC_PATCHLEVEL__ <= 1
-# error Your version of gcc miscompiles the __weak directive
+#ifdef __KERNEL__
+# if __GNUC_MINOR__ == 1 && __GNUC_PATCHLEVEL__ <= 1
+#  error Your version of gcc miscompiles the __weak directive
+# endif
 #endif
 
 #define __used			__attribute__((__used__))
-- 
cgit v1.2.3


From ac99533fb716171db12798039671f19631cf3586 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Thu, 26 Mar 2009 15:11:25 +0000
Subject: wan: convert sdla driver to net_device_ops

Also use internal net_device_stats

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/sdla.c  | 36 +++++++++++++++---------------------
 include/linux/if_frad.h |  1 -
 2 files changed, 15 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/wan/sdla.c b/drivers/net/wan/sdla.c
index 6a07ba9371db..1d637f407a0c 100644
--- a/drivers/net/wan/sdla.c
+++ b/drivers/net/wan/sdla.c
@@ -714,19 +714,19 @@ static int sdla_transmit(struct sk_buff *skb, struct net_device *dev)
 		switch (ret)
 		{
 			case SDLA_RET_OK:
-				flp->stats.tx_packets++;
+				dev->stats.tx_packets++;
 				ret = DLCI_RET_OK;
 				break;
 
 			case SDLA_RET_CIR_OVERFLOW:
 			case SDLA_RET_BUF_OVERSIZE:
 			case SDLA_RET_NO_BUFS:
-				flp->stats.tx_dropped++;
+				dev->stats.tx_dropped++;
 				ret = DLCI_RET_DROP;
 				break;
 
 			default:
-				flp->stats.tx_errors++;
+				dev->stats.tx_errors++;
 				ret = DLCI_RET_ERR;
 				break;
 		}
@@ -807,7 +807,7 @@ static void sdla_receive(struct net_device *dev)
 		if (i == CONFIG_DLCI_MAX)
 		{
 			printk(KERN_NOTICE "%s: Received packet from invalid DLCI %i, ignoring.", dev->name, dlci);
-			flp->stats.rx_errors++;
+			dev->stats.rx_errors++;
 			success = 0;
 		}
 	}
@@ -819,7 +819,7 @@ static void sdla_receive(struct net_device *dev)
 		if (skb == NULL) 
 		{
 			printk(KERN_NOTICE "%s: Memory squeeze, dropping packet.\n", dev->name);
-			flp->stats.rx_dropped++; 
+			dev->stats.rx_dropped++;
 			success = 0;
 		}
 		else
@@ -859,7 +859,7 @@ static void sdla_receive(struct net_device *dev)
 
 	if (success)
 	{
-		flp->stats.rx_packets++;
+		dev->stats.rx_packets++;
 		dlp = netdev_priv(master);
 		(*dlp->receive)(skb, master);
 	}
@@ -1590,13 +1590,14 @@ fail:
 	return err;
 }
  
-static struct net_device_stats *sdla_stats(struct net_device *dev)
-{
-	struct frad_local *flp;
-	flp = netdev_priv(dev);
-
-	return(&flp->stats);
-}
+static const struct net_device_ops sdla_netdev_ops = {
+	.ndo_open	= sdla_open,
+	.ndo_stop	= sdla_close,
+	.ndo_do_ioctl	= sdla_ioctl,
+	.ndo_set_config	= sdla_set_config,
+	.ndo_start_xmit	= sdla_transmit,
+	.ndo_change_mtu	= sdla_change_mtu,
+};
 
 static void setup_sdla(struct net_device *dev)
 {
@@ -1604,20 +1605,13 @@ static void setup_sdla(struct net_device *dev)
 
 	netdev_boot_setup_check(dev);
 
+	dev->netdev_ops		= &sdla_netdev_ops;
 	dev->flags		= 0;
 	dev->type		= 0xFFFF;
 	dev->hard_header_len	= 0;
 	dev->addr_len		= 0;
 	dev->mtu		= SDLA_MAX_MTU;
 
-	dev->open		= sdla_open;
-	dev->stop		= sdla_close;
-	dev->do_ioctl		= sdla_ioctl;
-	dev->set_config		= sdla_set_config;
-	dev->get_stats		= sdla_stats;
-	dev->hard_start_xmit	= sdla_transmit;
-	dev->change_mtu		= sdla_change_mtu;
-
 	flp->activate		= sdla_activate;
 	flp->deactivate		= sdla_deactivate;
 	flp->assoc		= sdla_assoc;
diff --git a/include/linux/if_frad.h b/include/linux/if_frad.h
index 60e16a551dd6..673f2209453d 100644
--- a/include/linux/if_frad.h
+++ b/include/linux/if_frad.h
@@ -153,7 +153,6 @@ struct frhdr
 
 struct dlci_local
 {
-   struct net_device_stats stats;
    struct net_device      *master;
    struct net_device      *slave;
    struct dlci_conf       config;
-- 
cgit v1.2.3


From 2b1c6bd77d4e6a727ffac8630cd154b2144b751a Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 28 Nov 2008 10:09:09 +0100
Subject: generic compat_sys_ustat

Due to a different size of ino_t ustat needs a compat handler, but
currently only x86 and mips provide one.  Add a generic compat_sys_ustat
and switch all architectures over to it.  Instead of doing various
user copy hacks compat_sys_ustat just reimplements sys_ustat as
it's trivial.  This was suggested by Arnd Bergmann.

Found by Eric Sandeen when running xfstests/017 on ppc64, which causes
stack smashing warnings on RHEL/Fedora due to the too large amount of
data writen by the syscall.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/ia64/ia32/ia32_entry.S        |  2 +-
 arch/mips/kernel/linux32.c         | 34 ----------------------------------
 arch/mips/kernel/scall64-n32.S     |  2 +-
 arch/mips/kernel/scall64-o32.S     |  2 +-
 arch/parisc/kernel/syscall_table.S |  2 +-
 arch/powerpc/include/asm/systbl.h  |  2 +-
 arch/s390/kernel/compat_wrapper.S  |  2 +-
 arch/x86/ia32/ia32entry.S          |  2 +-
 arch/x86/ia32/sys_ia32.c           | 22 ----------------------
 arch/x86/include/asm/ia32.h        |  7 -------
 arch/x86/include/asm/sys_ia32.h    |  2 --
 fs/compat.c                        | 28 ++++++++++++++++++++++++++++
 include/linux/compat.h             |  8 ++++++++
 13 files changed, 43 insertions(+), 72 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/ia32/ia32_entry.S b/arch/ia64/ia32/ia32_entry.S
index a46f8395e9a5..af9405cd70e5 100644
--- a/arch/ia64/ia32/ia32_entry.S
+++ b/arch/ia64/ia32/ia32_entry.S
@@ -240,7 +240,7 @@ ia32_syscall_table:
 	data8 sys_ni_syscall
 	data8 sys_umask		  /* 60 */
 	data8 sys_chroot
-	data8 sys_ustat
+	data8 compat_sys_ustat
 	data8 sys_dup2
 	data8 sys_getppid
 	data8 sys_getpgrp	  /* 65 */
diff --git a/arch/mips/kernel/linux32.c b/arch/mips/kernel/linux32.c
index 1a86f84fa947..784859cedef7 100644
--- a/arch/mips/kernel/linux32.c
+++ b/arch/mips/kernel/linux32.c
@@ -356,40 +356,6 @@ SYSCALL_DEFINE1(32_personality, unsigned long, personality)
 	return ret;
 }
 
-/* ustat compatibility */
-struct ustat32 {
-	compat_daddr_t	f_tfree;
-	compat_ino_t	f_tinode;
-	char		f_fname[6];
-	char		f_fpack[6];
-};
-
-extern asmlinkage long sys_ustat(dev_t dev, struct ustat __user * ubuf);
-
-SYSCALL_DEFINE2(32_ustat, dev_t, dev, struct ustat32 __user *, ubuf32)
-{
-	int err;
-	struct ustat tmp;
-	struct ustat32 tmp32;
-	mm_segment_t old_fs = get_fs();
-
-	set_fs(KERNEL_DS);
-	err = sys_ustat(dev, (struct ustat __user *)&tmp);
-	set_fs(old_fs);
-
-	if (err)
-		goto out;
-
-	memset(&tmp32, 0, sizeof(struct ustat32));
-	tmp32.f_tfree = tmp.f_tfree;
-	tmp32.f_tinode = tmp.f_tinode;
-
-	err = copy_to_user(ubuf32, &tmp32, sizeof(struct ustat32)) ? -EFAULT : 0;
-
-out:
-	return err;
-}
-
 SYSCALL_DEFINE4(32_sendfile, long, out_fd, long, in_fd,
 	compat_off_t __user *, offset, s32, count)
 {
diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S
index 7438e92f8a01..f61d6b0e5731 100644
--- a/arch/mips/kernel/scall64-n32.S
+++ b/arch/mips/kernel/scall64-n32.S
@@ -253,7 +253,7 @@ EXPORT(sysn32_call_table)
 	PTR	compat_sys_utime		/* 6130 */
 	PTR	sys_mknod
 	PTR	sys_32_personality
-	PTR	sys_32_ustat
+	PTR	compat_sys_ustat
 	PTR	compat_sys_statfs
 	PTR	compat_sys_fstatfs		/* 6135 */
 	PTR	sys_sysfs
diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S
index b0fef4ff9827..60997f1f69d4 100644
--- a/arch/mips/kernel/scall64-o32.S
+++ b/arch/mips/kernel/scall64-o32.S
@@ -265,7 +265,7 @@ sys_call_table:
 	PTR	sys_olduname
 	PTR	sys_umask			/* 4060 */
 	PTR	sys_chroot
-	PTR	sys_32_ustat
+	PTR	compat_sys_ustat
 	PTR	sys_dup2
 	PTR	sys_getppid
 	PTR	sys_getpgrp			/* 4065 */
diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S
index 303d2b647e41..03b9a01bc16c 100644
--- a/arch/parisc/kernel/syscall_table.S
+++ b/arch/parisc/kernel/syscall_table.S
@@ -130,7 +130,7 @@
 	ENTRY_OURS(newuname)
 	ENTRY_SAME(umask)		/* 60 */
 	ENTRY_SAME(chroot)
-	ENTRY_SAME(ustat)
+	ENTRY_COMP(ustat)
 	ENTRY_SAME(dup2)
 	ENTRY_SAME(getppid)
 	ENTRY_SAME(getpgrp)		/* 65 */
diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h
index 72353f6070a4..fe166491e9dc 100644
--- a/arch/powerpc/include/asm/systbl.h
+++ b/arch/powerpc/include/asm/systbl.h
@@ -65,7 +65,7 @@ SYSCALL(ni_syscall)
 SYSX(sys_ni_syscall,sys_olduname, sys_olduname)
 COMPAT_SYS_SPU(umask)
 SYSCALL_SPU(chroot)
-SYSCALL(ustat)
+COMPAT_SYS(ustat)
 SYSCALL_SPU(dup2)
 SYSCALL_SPU(getppid)
 SYSCALL_SPU(getpgrp)
diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S
index 62c706eb0de6..87cf5a79a351 100644
--- a/arch/s390/kernel/compat_wrapper.S
+++ b/arch/s390/kernel/compat_wrapper.S
@@ -252,7 +252,7 @@ sys32_chroot_wrapper:
 sys32_ustat_wrapper:
 	llgfr	%r2,%r2			# dev_t
 	llgtr	%r3,%r3			# struct ustat *
-	jg	sys_ustat
+	jg	compat_sys_ustat
 
 	.globl	sys32_dup2_wrapper
 sys32_dup2_wrapper:
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 5a0d76dc56a4..8ef8876666b2 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -557,7 +557,7 @@ ia32_sys_call_table:
 	.quad sys32_olduname
 	.quad sys_umask		/* 60 */
 	.quad sys_chroot
-	.quad sys32_ustat
+	.quad compat_sys_ustat
 	.quad sys_dup2
 	.quad sys_getppid
 	.quad sys_getpgrp		/* 65 */
diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c
index 6c0d7f6231af..efac92fd1efb 100644
--- a/arch/x86/ia32/sys_ia32.c
+++ b/arch/x86/ia32/sys_ia32.c
@@ -638,28 +638,6 @@ long sys32_uname(struct old_utsname __user *name)
 	return err ? -EFAULT : 0;
 }
 
-long sys32_ustat(unsigned dev, struct ustat32 __user *u32p)
-{
-	struct ustat u;
-	mm_segment_t seg;
-	int ret;
-
-	seg = get_fs();
-	set_fs(KERNEL_DS);
-	ret = sys_ustat(dev, (struct ustat __user *)&u);
-	set_fs(seg);
-	if (ret < 0)
-		return ret;
-
-	if (!access_ok(VERIFY_WRITE, u32p, sizeof(struct ustat32)) ||
-	    __put_user((__u32) u.f_tfree, &u32p->f_tfree) ||
-	    __put_user((__u32) u.f_tinode, &u32p->f_tfree) ||
-	    __copy_to_user(&u32p->f_fname, u.f_fname, sizeof(u.f_fname)) ||
-	    __copy_to_user(&u32p->f_fpack, u.f_fpack, sizeof(u.f_fpack)))
-		ret = -EFAULT;
-	return ret;
-}
-
 asmlinkage long sys32_execve(char __user *name, compat_uptr_t __user *argv,
 			     compat_uptr_t __user *envp, struct pt_regs *regs)
 {
diff --git a/arch/x86/include/asm/ia32.h b/arch/x86/include/asm/ia32.h
index 50ca486fd88c..1f7e62517284 100644
--- a/arch/x86/include/asm/ia32.h
+++ b/arch/x86/include/asm/ia32.h
@@ -129,13 +129,6 @@ typedef struct compat_siginfo {
 	} _sifields;
 } compat_siginfo_t;
 
-struct ustat32 {
-	__u32			f_tfree;
-	compat_ino_t		f_tinode;
-	char			f_fname[6];
-	char			f_fpack[6];
-};
-
 #define IA32_STACK_TOP IA32_PAGE_OFFSET
 
 #ifdef __KERNEL__
diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h
index ffb08be2a530..72a6dcd1299b 100644
--- a/arch/x86/include/asm/sys_ia32.h
+++ b/arch/x86/include/asm/sys_ia32.h
@@ -70,8 +70,6 @@ struct old_utsname;
 asmlinkage long sys32_olduname(struct oldold_utsname __user *);
 long sys32_uname(struct old_utsname __user *);
 
-long sys32_ustat(unsigned, struct ustat32 __user *);
-
 asmlinkage long sys32_execve(char __user *, compat_uptr_t __user *,
 			     compat_uptr_t __user *, struct pt_regs *);
 asmlinkage long sys32_clone(unsigned int, unsigned int, struct pt_regs *);
diff --git a/fs/compat.c b/fs/compat.c
index d0145ca27572..4e0db94b5353 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -378,6 +378,34 @@ out:
 	return error;
 }
 
+/*
+ * This is a copy of sys_ustat, just dealing with a structure layout.
+ * Given how simple this syscall is that apporach is more maintainable
+ * than the various conversion hacks.
+ */
+asmlinkage long compat_sys_ustat(unsigned dev, struct compat_ustat __user *u)
+{
+	struct super_block *sb;
+	struct compat_ustat tmp;
+	struct kstatfs sbuf;
+	int err;
+
+	sb = user_get_super(new_decode_dev(dev));
+	if (!sb)
+		return -EINVAL;
+	err = vfs_statfs(sb->s_root, &sbuf);
+	drop_super(sb);
+	if (err)
+		return err;
+
+	memset(&tmp, 0, sizeof(struct compat_ustat));
+	tmp.f_tfree = sbuf.f_bfree;
+	tmp.f_tinode = sbuf.f_ffree;
+	if (copy_to_user(u, &tmp, sizeof(struct compat_ustat)))
+		return -EFAULT;
+	return 0;
+}
+
 static int get_compat_flock(struct flock *kfl, struct compat_flock __user *ufl)
 {
 	if (!access_ok(VERIFY_READ, ufl, sizeof(*ufl)) ||
diff --git a/include/linux/compat.h b/include/linux/compat.h
index 3fd2194ff573..b880864672de 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -125,6 +125,13 @@ struct compat_dirent {
 	char		d_name[256];
 };
 
+struct compat_ustat {
+	compat_daddr_t		f_tfree;
+	compat_ino_t		f_tinode;
+	char			f_fname[6];
+	char			f_fpack[6];
+};
+
 typedef union compat_sigval {
 	compat_int_t	sival_int;
 	compat_uptr_t	sival_ptr;
@@ -178,6 +185,7 @@ long compat_sys_semtimedop(int semid, struct sembuf __user *tsems,
 		unsigned nsems, const struct compat_timespec __user *timeout);
 asmlinkage long compat_sys_keyctl(u32 option,
 			      u32 arg2, u32 arg3, u32 arg4, u32 arg5);
+asmlinkage long compat_sys_ustat(unsigned dev, struct compat_ustat __user *u32);
 
 asmlinkage ssize_t compat_sys_readv(unsigned long fd,
 		const struct compat_iovec __user *vec, unsigned long vlen);
-- 
cgit v1.2.3


From 10f303ae1e5e77a9f7cb053e6329906afb132c67 Mon Sep 17 00:00:00 2001
From: Cheng Renquan <crquan@gmail.com>
Date: Wed, 14 Jan 2009 17:01:33 +0800
Subject: do_pipe cleanup: drop its last user in arch/alpha/

The last user of do_pipe is in arch/alpha/, after replacing it with
do_pipe_flags, the do_pipe can be totally dropped.

Signed-off-by: Cheng Renquan <crquan@gmail.com>
Acked-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/alpha/kernel/entry.S   | 3 ++-
 arch/alpha/kernel/osf_sys.c | 2 --
 fs/pipe.c                   | 5 -----
 include/linux/fs.h          | 1 -
 4 files changed, 2 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/kernel/entry.S b/arch/alpha/kernel/entry.S
index e4a54b615894..b45d913a51c3 100644
--- a/arch/alpha/kernel/entry.S
+++ b/arch/alpha/kernel/entry.S
@@ -903,8 +903,9 @@ sys_alpha_pipe:
 	stq	$26, 0($sp)
 	.prologue 0
 
+	mov	$31, $17
 	lda	$16, 8($sp)
-	jsr	$26, do_pipe
+	jsr	$26, do_pipe_flags
 
 	ldq	$26, 0($sp)
 	bne	$0, 1f
diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index ae41f097864b..42ee05981e71 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -46,8 +46,6 @@
 #include <asm/hwrpb.h>
 #include <asm/processor.h>
 
-extern int do_pipe(int *);
-
 /*
  * Brk needs to return an error.  Still support Linux's brk(0) query idiom,
  * which OSF programs just shouldn't be doing.  We're still not quite
diff --git a/fs/pipe.c b/fs/pipe.c
index 14f502b89cf5..df3719562fc1 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -1034,11 +1034,6 @@ int do_pipe_flags(int *fd, int flags)
 	return error;
 }
 
-int do_pipe(int *fd)
-{
-	return do_pipe_flags(fd, 0);
-}
-
 /*
  * sys_pipe() is the normal C calling standard for creating
  * a pipe. It's not the way Unix traditionally does this, though.
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 92734c0012e6..51de83bd8a87 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1881,7 +1881,6 @@ static inline void allow_write_access(struct file *file)
 	if (file)
 		atomic_inc(&file->f_path.dentry->d_inode->i_writecount);
 }
-extern int do_pipe(int *);
 extern int do_pipe_flags(int *, int);
 extern struct file *create_read_pipe(struct file *f, int flags);
 extern struct file *create_write_pipe(int flags);
-- 
cgit v1.2.3


From c2aca5e529a2499d454c41e01f59f1d5fe4a1364 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Tue, 20 Jan 2009 10:29:45 +0000
Subject: vfs: Update fs.h to use inline functions when no file locking set

This avoids various issues which might give rise to compiler warnings
about missing functions and/or unused variable with the previous
macros. This also fixes a bug where one of the macros was returning
0, but it should have been void.

Reported-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Tested-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h | 165 ++++++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 139 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 51de83bd8a87..d84020b7e676 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1063,34 +1063,147 @@ extern int lease_modify(struct file_lock **, int);
 extern int lock_may_read(struct inode *, loff_t start, unsigned long count);
 extern int lock_may_write(struct inode *, loff_t start, unsigned long count);
 #else /* !CONFIG_FILE_LOCKING */
-#define fcntl_getlk(a, b) ({ -EINVAL; })
-#define fcntl_setlk(a, b, c, d) ({ -EACCES; })
+static inline int fcntl_getlk(struct file *file, struct flock __user *user)
+{
+	return -EINVAL;
+}
+
+static inline int fcntl_setlk(unsigned int fd, struct file *file,
+			      unsigned int cmd, struct flock __user *user)
+{
+	return -EACCES;
+}
+
 #if BITS_PER_LONG == 32
-#define fcntl_getlk64(a, b) ({ -EINVAL; })
-#define fcntl_setlk64(a, b, c, d) ({ -EACCES; })
+static inline int fcntl_getlk64(struct file *file, struct flock64 __user *user)
+{
+	return -EINVAL;
+}
+
+static inline int fcntl_setlk64(unsigned int fd, struct file *file,
+				unsigned int cmd, struct flock64 __user *user)
+{
+	return -EACCES;
+}
 #endif
-#define fcntl_setlease(a, b, c) ({ 0; })
-#define fcntl_getlease(a) ({ 0; })
-#define locks_init_lock(a) ({ })
-#define __locks_copy_lock(a, b) ({ })
-#define locks_copy_lock(a, b) ({ })
-#define locks_remove_posix(a, b) ({ })
-#define locks_remove_flock(a) ({ })
-#define posix_test_lock(a, b) ({ 0; })
-#define posix_lock_file(a, b, c) ({ -ENOLCK; })
-#define posix_lock_file_wait(a, b) ({ -ENOLCK; })
-#define posix_unblock_lock(a, b) (-ENOENT)
-#define vfs_test_lock(a, b) ({ 0; })
-#define vfs_lock_file(a, b, c, d) (-ENOLCK)
-#define vfs_cancel_lock(a, b) ({ 0; })
-#define flock_lock_file_wait(a, b) ({ -ENOLCK; })
-#define __break_lease(a, b) ({ 0; })
-#define lease_get_mtime(a, b) ({ })
-#define generic_setlease(a, b, c) ({ -EINVAL; })
-#define vfs_setlease(a, b, c) ({ -EINVAL; })
-#define lease_modify(a, b) ({ -EINVAL; })
-#define lock_may_read(a, b, c) ({ 1; })
-#define lock_may_write(a, b, c) ({ 1; })
+static inline int fcntl_setlease(unsigned int fd, struct file *filp, long arg)
+{
+	return 0;
+}
+
+static inline int fcntl_getlease(struct file *filp)
+{
+	return 0;
+}
+
+static inline void locks_init_lock(struct file_lock *fl)
+{
+	return;
+}
+
+static inline void __locks_copy_lock(struct file_lock *new, struct file_lock *fl)
+{
+	return;
+}
+
+static inline void locks_copy_lock(struct file_lock *new, struct file_lock *fl)
+{
+	return;
+}
+
+static inline void locks_remove_posix(struct file *filp, fl_owner_t owner)
+{
+	return;
+}
+
+static inline void locks_remove_flock(struct file *filp)
+{
+	return;
+}
+
+static inline void posix_test_lock(struct file *filp, struct file_lock *fl)
+{
+	return;
+}
+
+static inline int posix_lock_file(struct file *filp, struct file_lock *fl,
+				  struct file_lock *conflock)
+{
+	return -ENOLCK;
+}
+
+static inline int posix_lock_file_wait(struct file *filp, struct file_lock *fl)
+{
+	return -ENOLCK;
+}
+
+static inline int posix_unblock_lock(struct file *filp,
+				     struct file_lock *waiter)
+{
+	return -ENOENT;
+}
+
+static inline int vfs_test_lock(struct file *filp, struct file_lock *fl)
+{
+	return 0;
+}
+
+static inline int vfs_lock_file(struct file *filp, unsigned int cmd,
+				struct file_lock *fl, struct file_lock *conf)
+{
+	return -ENOLCK;
+}
+
+static inline int vfs_cancel_lock(struct file *filp, struct file_lock *fl)
+{
+	return 0;
+}
+
+static inline int flock_lock_file_wait(struct file *filp,
+				       struct file_lock *request)
+{
+	return -ENOLCK;
+}
+
+static inline int __break_lease(struct inode *inode, unsigned int mode)
+{
+	return 0;
+}
+
+static inline void lease_get_mtime(struct inode *inode, struct timespec *time)
+{
+	return;
+}
+
+static inline int generic_setlease(struct file *filp, long arg,
+				    struct file_lock **flp)
+{
+	return -EINVAL;
+}
+
+static inline int vfs_setlease(struct file *filp, long arg,
+			       struct file_lock **lease)
+{
+	return -EINVAL;
+}
+
+static inline int lease_modify(struct file_lock **before, int arg)
+{
+	return -EINVAL;
+}
+
+static inline int lock_may_read(struct inode *inode, loff_t start,
+				unsigned long len)
+{
+	return 1;
+}
+
+static inline int lock_may_write(struct inode *inode, loff_t start,
+				 unsigned long len)
+{
+	return 1;
+}
+
 #endif /* !CONFIG_FILE_LOCKING */
 
 
-- 
cgit v1.2.3


From af5df56688acfb75c1b15b4e000ec5e82a9cdc29 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Tue, 20 Jan 2009 10:29:46 +0000
Subject: vfs: Further changes from macro to inline function in fs.h

There is a second set of macros for when CONFIG_FILE_LOCKING is
not set. This patch updates those to become inline functions
as well.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h | 45 ++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 38 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index d84020b7e676..5f74d616cd7d 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1800,13 +1800,44 @@ static inline int break_lease(struct inode *inode, unsigned int mode)
 	return 0;
 }
 #else /* !CONFIG_FILE_LOCKING */
-#define locks_mandatory_locked(a) ({ 0; })
-#define locks_mandatory_area(a, b, c, d, e) ({ 0; })
-#define __mandatory_lock(a) ({ 0; })
-#define mandatory_lock(a) ({ 0; })
-#define locks_verify_locked(a) ({ 0; })
-#define locks_verify_truncate(a, b, c) ({ 0; })
-#define break_lease(a, b) ({ 0; })
+static inline int locks_mandatory_locked(struct inode *inode)
+{
+	return 0;
+}
+
+static inline int locks_mandatory_area(int rw, struct inode *inode,
+				       struct file *filp, loff_t offset,
+				       size_t count)
+{
+	return 0;
+}
+
+static inline int __mandatory_lock(struct inode *inode)
+{
+	return 0;
+}
+
+static inline int mandatory_lock(struct inode *inode)
+{
+	return 0;
+}
+
+static inline int locks_verify_locked(struct inode *inode)
+{
+	return 0;
+}
+
+static inline int locks_verify_truncate(struct inode *inode, struct file *filp,
+					size_t size)
+{
+	return 0;
+}
+
+static inline int break_lease(struct inode *inode, unsigned int mode)
+{
+	return 0;
+}
+
 #endif /* CONFIG_FILE_LOCKING */
 
 /* fs/open.c */
-- 
cgit v1.2.3


From e56980d451904b623573ef4966cbab768e433c79 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Wed, 11 Feb 2009 13:14:54 -0800
Subject: fs: make struct dentry->d_op const

This change will allow for tagging many dentry_operations const in the
source tree.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/dcache.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index c66d22487bf8..15156364d196 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -112,7 +112,7 @@ struct dentry {
 	struct list_head d_subdirs;	/* our children */
 	struct list_head d_alias;	/* inode alias list */
 	unsigned long d_time;		/* used by d_revalidate */
-	struct dentry_operations *d_op;
+	const struct dentry_operations *d_op;
 	struct super_block *d_sb;	/* The root of the dentry tree */
 	void *d_fsdata;			/* fs-specific data */
 
-- 
cgit v1.2.3


From f786aa90e026f2174bb0c26d49f338c5c46ede55 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 20 Feb 2009 05:51:22 +0000
Subject: constify dentry_operations: NFS

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/nfs/dir.c            | 4 ++--
 fs/nfs/nfs4_fs.h        | 2 +-
 include/linux/nfs_fs.h  | 2 +-
 include/linux/nfs_xdr.h | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 672368f865ca..78bf72fc1db3 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -899,7 +899,7 @@ static void nfs_dentry_iput(struct dentry *dentry, struct inode *inode)
 	iput(inode);
 }
 
-struct dentry_operations nfs_dentry_operations = {
+const struct dentry_operations nfs_dentry_operations = {
 	.d_revalidate	= nfs_lookup_revalidate,
 	.d_delete	= nfs_dentry_delete,
 	.d_iput		= nfs_dentry_iput,
@@ -967,7 +967,7 @@ out:
 #ifdef CONFIG_NFS_V4
 static int nfs_open_revalidate(struct dentry *, struct nameidata *);
 
-struct dentry_operations nfs4_dentry_operations = {
+const struct dentry_operations nfs4_dentry_operations = {
 	.d_revalidate	= nfs_open_revalidate,
 	.d_delete	= nfs_dentry_delete,
 	.d_iput		= nfs_dentry_iput,
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 4e4d33204376..84345deab26f 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -179,7 +179,7 @@ struct nfs4_state_recovery_ops {
 	int (*recover_lock)(struct nfs4_state *, struct file_lock *);
 };
 
-extern struct dentry_operations nfs4_dentry_operations;
+extern const struct dentry_operations nfs4_dentry_operations;
 extern const struct inode_operations nfs4_dir_inode_operations;
 
 /* inode.c */
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index db867b04ac3c..8cc8807f77d6 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -415,7 +415,7 @@ extern const struct inode_operations nfs_dir_inode_operations;
 extern const struct inode_operations nfs3_dir_inode_operations;
 #endif /* CONFIG_NFS_V3 */
 extern const struct file_operations nfs_dir_operations;
-extern struct dentry_operations nfs_dentry_operations;
+extern const struct dentry_operations nfs_dentry_operations;
 
 extern void nfs_force_lookup_revalidate(struct inode *dir);
 extern int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fh, struct nfs_fattr *fattr);
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 2e5f00066afd..43a713fce11c 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -785,7 +785,7 @@ struct nfs_access_entry;
  */
 struct nfs_rpc_ops {
 	u32	version;		/* Protocol version */
-	struct dentry_operations *dentry_ops;
+	const struct dentry_operations *dentry_ops;
 	const struct inode_operations *dir_inode_ops;
 	const struct inode_operations *file_inode_ops;
 
-- 
cgit v1.2.3


From e16404ed0f3f330dc3e99b95cef69bb60bcd27f7 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 20 Feb 2009 05:55:13 +0000
Subject: constify dentry_operations: misc filesystems

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/adfs/adfs.h          | 2 +-
 fs/adfs/dir.c           | 2 +-
 fs/affs/affs.h          | 3 +--
 fs/affs/namei.c         | 4 ++--
 fs/coda/dir.c           | 2 +-
 fs/hfs/hfs_fs.h         | 2 +-
 fs/hfs/sysdep.c         | 2 +-
 fs/hfsplus/hfsplus_fs.h | 2 +-
 fs/hfsplus/inode.c      | 2 +-
 fs/hostfs/hostfs_kern.c | 4 ++--
 fs/hpfs/dentry.c        | 2 +-
 fs/isofs/inode.c        | 2 +-
 fs/ncpfs/dir.c          | 4 ++--
 fs/reiserfs/xattr.c     | 2 +-
 fs/smbfs/dir.c          | 4 ++--
 fs/sysv/namei.c         | 2 +-
 fs/sysv/sysv.h          | 2 +-
 include/linux/ncp_fs.h  | 2 +-
 18 files changed, 22 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/fs/adfs/adfs.h b/fs/adfs/adfs.h
index 831157502d5a..e0a85dbeeb88 100644
--- a/fs/adfs/adfs.h
+++ b/fs/adfs/adfs.h
@@ -86,7 +86,7 @@ void __adfs_error(struct super_block *sb, const char *function,
 /* dir_*.c */
 extern const struct inode_operations adfs_dir_inode_operations;
 extern const struct file_operations adfs_dir_operations;
-extern struct dentry_operations adfs_dentry_operations;
+extern const struct dentry_operations adfs_dentry_operations;
 extern struct adfs_dir_ops adfs_f_dir_ops;
 extern struct adfs_dir_ops adfs_fplus_dir_ops;
 
diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c
index 85a30e929800..e867ccf37246 100644
--- a/fs/adfs/dir.c
+++ b/fs/adfs/dir.c
@@ -263,7 +263,7 @@ adfs_compare(struct dentry *parent, struct qstr *entry, struct qstr *name)
 	return 0;
 }
 
-struct dentry_operations adfs_dentry_operations = {
+const struct dentry_operations adfs_dentry_operations = {
 	.d_hash		= adfs_hash,
 	.d_compare	= adfs_compare,
 };
diff --git a/fs/affs/affs.h b/fs/affs/affs.h
index e9ec915f7553..1a2d5e3c7f4e 100644
--- a/fs/affs/affs.h
+++ b/fs/affs/affs.h
@@ -199,8 +199,7 @@ extern const struct address_space_operations	 affs_symlink_aops;
 extern const struct address_space_operations	 affs_aops;
 extern const struct address_space_operations	 affs_aops_ofs;
 
-extern struct dentry_operations	 affs_dentry_operations;
-extern struct dentry_operations	 affs_dentry_operations_intl;
+extern const struct dentry_operations	 affs_dentry_operations;
 
 static inline void
 affs_set_blocksize(struct super_block *sb, int size)
diff --git a/fs/affs/namei.c b/fs/affs/namei.c
index cfcf1b6cf82b..960d336ec694 100644
--- a/fs/affs/namei.c
+++ b/fs/affs/namei.c
@@ -19,12 +19,12 @@ static int	 affs_intl_toupper(int ch);
 static int	 affs_intl_hash_dentry(struct dentry *, struct qstr *);
 static int       affs_intl_compare_dentry(struct dentry *, struct qstr *, struct qstr *);
 
-struct dentry_operations affs_dentry_operations = {
+const struct dentry_operations affs_dentry_operations = {
 	.d_hash		= affs_hash_dentry,
 	.d_compare	= affs_compare_dentry,
 };
 
-static struct dentry_operations affs_intl_dentry_operations = {
+static const struct dentry_operations affs_intl_dentry_operations = {
 	.d_hash		= affs_intl_hash_dentry,
 	.d_compare	= affs_intl_compare_dentry,
 };
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index 75b1fa90b2cb..4bb9d0a5decc 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -59,7 +59,7 @@ static int coda_return_EIO(void)
 }
 #define CODA_EIO_ERROR ((void *) (coda_return_EIO))
 
-static struct dentry_operations coda_dentry_operations =
+static const struct dentry_operations coda_dentry_operations =
 {
 	.d_revalidate	= coda_dentry_revalidate,
 	.d_delete	= coda_dentry_delete,
diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h
index 9955232fdf8c..052387e11671 100644
--- a/fs/hfs/hfs_fs.h
+++ b/fs/hfs/hfs_fs.h
@@ -213,7 +213,7 @@ extern void hfs_mdb_put(struct super_block *);
 extern int hfs_part_find(struct super_block *, sector_t *, sector_t *);
 
 /* string.c */
-extern struct dentry_operations hfs_dentry_operations;
+extern const struct dentry_operations hfs_dentry_operations;
 
 extern int hfs_hash_dentry(struct dentry *, struct qstr *);
 extern int hfs_strcmp(const unsigned char *, unsigned int,
diff --git a/fs/hfs/sysdep.c b/fs/hfs/sysdep.c
index 5bf89ec01cd4..7478f5c219aa 100644
--- a/fs/hfs/sysdep.c
+++ b/fs/hfs/sysdep.c
@@ -31,7 +31,7 @@ static int hfs_revalidate_dentry(struct dentry *dentry, struct nameidata *nd)
 	return 1;
 }
 
-struct dentry_operations hfs_dentry_operations =
+const struct dentry_operations hfs_dentry_operations =
 {
 	.d_revalidate	= hfs_revalidate_dentry,
 	.d_hash		= hfs_hash_dentry,
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index f027a905225f..5c10d803d9df 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -327,7 +327,7 @@ void hfsplus_file_truncate(struct inode *);
 /* inode.c */
 extern const struct address_space_operations hfsplus_aops;
 extern const struct address_space_operations hfsplus_btree_aops;
-extern struct dentry_operations hfsplus_dentry_operations;
+extern const struct dentry_operations hfsplus_dentry_operations;
 
 void hfsplus_inode_read_fork(struct inode *, struct hfsplus_fork_raw *);
 void hfsplus_inode_write_fork(struct inode *, struct hfsplus_fork_raw *);
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index f105ee9e1cc4..1bcf597c0562 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -137,7 +137,7 @@ const struct address_space_operations hfsplus_aops = {
 	.writepages	= hfsplus_writepages,
 };
 
-struct dentry_operations hfsplus_dentry_operations = {
+const struct dentry_operations hfsplus_dentry_operations = {
 	.d_hash       = hfsplus_hash_dentry,
 	.d_compare    = hfsplus_compare_dentry,
 };
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 5c538e0ec14b..fe02ad4740e7 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -31,12 +31,12 @@ static inline struct hostfs_inode_info *HOSTFS_I(struct inode *inode)
 
 #define FILE_HOSTFS_I(file) HOSTFS_I((file)->f_path.dentry->d_inode)
 
-int hostfs_d_delete(struct dentry *dentry)
+static int hostfs_d_delete(struct dentry *dentry)
 {
 	return 1;
 }
 
-struct dentry_operations hostfs_dentry_ops = {
+static const struct dentry_operations hostfs_dentry_ops = {
 	.d_delete		= hostfs_d_delete,
 };
 
diff --git a/fs/hpfs/dentry.c b/fs/hpfs/dentry.c
index 08319126b2af..940d6d150bee 100644
--- a/fs/hpfs/dentry.c
+++ b/fs/hpfs/dentry.c
@@ -49,7 +49,7 @@ static int hpfs_compare_dentry(struct dentry *dentry, struct qstr *a, struct qst
 	return 0;
 }
 
-static struct dentry_operations hpfs_dentry_operations = {
+static const struct dentry_operations hpfs_dentry_operations = {
 	.d_hash		= hpfs_hash_dentry,
 	.d_compare	= hpfs_compare_dentry,
 };
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 6147ec3643a0..13d2eddd0692 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -114,7 +114,7 @@ static const struct super_operations isofs_sops = {
 };
 
 
-static struct dentry_operations isofs_dentry_ops[] = {
+static const struct dentry_operations isofs_dentry_ops[] = {
 	{
 		.d_hash		= isofs_hash,
 		.d_compare	= isofs_dentry_cmp,
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c
index 07e9715b8658..9c590722d87e 100644
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c
@@ -79,7 +79,7 @@ static int ncp_hash_dentry(struct dentry *, struct qstr *);
 static int ncp_compare_dentry (struct dentry *, struct qstr *, struct qstr *);
 static int ncp_delete_dentry(struct dentry *);
 
-static struct dentry_operations ncp_dentry_operations =
+static const struct dentry_operations ncp_dentry_operations =
 {
 	.d_revalidate	= ncp_lookup_validate,
 	.d_hash		= ncp_hash_dentry,
@@ -87,7 +87,7 @@ static struct dentry_operations ncp_dentry_operations =
 	.d_delete	= ncp_delete_dentry,
 };
 
-struct dentry_operations ncp_root_dentry_operations =
+const struct dentry_operations ncp_root_dentry_operations =
 {
 	.d_hash		= ncp_hash_dentry,
 	.d_compare	= ncp_compare_dentry,
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index ad92461cbfc3..ae881ccd2f03 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -1136,7 +1136,7 @@ xattr_lookup_poison(struct dentry *dentry, struct qstr *q1, struct qstr *name)
 	return 1;
 }
 
-static struct dentry_operations xattr_lookup_poison_ops = {
+static const struct dentry_operations xattr_lookup_poison_ops = {
 	.d_compare = xattr_lookup_poison,
 };
 
diff --git a/fs/smbfs/dir.c b/fs/smbfs/dir.c
index e7ddd0328ddc..3e4803b4427e 100644
--- a/fs/smbfs/dir.c
+++ b/fs/smbfs/dir.c
@@ -277,7 +277,7 @@ static int smb_hash_dentry(struct dentry *, struct qstr *);
 static int smb_compare_dentry(struct dentry *, struct qstr *, struct qstr *);
 static int smb_delete_dentry(struct dentry *);
 
-static struct dentry_operations smbfs_dentry_operations =
+static const struct dentry_operations smbfs_dentry_operations =
 {
 	.d_revalidate	= smb_lookup_validate,
 	.d_hash		= smb_hash_dentry,
@@ -285,7 +285,7 @@ static struct dentry_operations smbfs_dentry_operations =
 	.d_delete	= smb_delete_dentry,
 };
 
-static struct dentry_operations smbfs_dentry_operations_case =
+static const struct dentry_operations smbfs_dentry_operations_case =
 {
 	.d_revalidate	= smb_lookup_validate,
 	.d_delete	= smb_delete_dentry,
diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c
index a1f1ef33e81c..33e047b59b8d 100644
--- a/fs/sysv/namei.c
+++ b/fs/sysv/namei.c
@@ -38,7 +38,7 @@ static int sysv_hash(struct dentry *dentry, struct qstr *qstr)
 	return 0;
 }
 
-struct dentry_operations sysv_dentry_operations = {
+const struct dentry_operations sysv_dentry_operations = {
 	.d_hash		= sysv_hash,
 };
 
diff --git a/fs/sysv/sysv.h b/fs/sysv/sysv.h
index 38ebe3f85b3d..5784a318c883 100644
--- a/fs/sysv/sysv.h
+++ b/fs/sysv/sysv.h
@@ -170,7 +170,7 @@ extern const struct file_operations sysv_file_operations;
 extern const struct file_operations sysv_dir_operations;
 extern const struct address_space_operations sysv_aops;
 extern const struct super_operations sysv_sops;
-extern struct dentry_operations sysv_dentry_operations;
+extern const struct dentry_operations sysv_dentry_operations;
 
 
 enum {
diff --git a/include/linux/ncp_fs.h b/include/linux/ncp_fs.h
index f69e66d151cc..30b06c893944 100644
--- a/include/linux/ncp_fs.h
+++ b/include/linux/ncp_fs.h
@@ -204,7 +204,7 @@ void ncp_update_inode2(struct inode *, struct ncp_entry_info *);
 /* linux/fs/ncpfs/dir.c */
 extern const struct inode_operations ncp_dir_inode_operations;
 extern const struct file_operations ncp_dir_operations;
-extern struct dentry_operations ncp_root_dentry_operations;
+extern const struct dentry_operations ncp_root_dentry_operations;
 int ncp_conn_logged_in(struct super_block *);
 int ncp_date_dos2unix(__le16 time, __le16 date);
 void ncp_date_unix2dos(int unix_date, __le16 * time, __le16 * date);
-- 
cgit v1.2.3


From 585d3bc06f4ca57f975a5a1f698f65a45ea66225 Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Wed, 25 Feb 2009 10:44:19 +0100
Subject: fs: move bdev code out of buffer.c

Move some block device related code out from buffer.c and put it in
block_dev.c. I'm trying to move non-buffer_head code out of buffer.c

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/block_dev.c              | 146 ++++++++++++++++++++++++++++++++++++++++++++
 fs/buffer.c                 | 145 -------------------------------------------
 include/linux/buffer_head.h |   7 ---
 include/linux/fs.h          |   7 +++
 4 files changed, 153 insertions(+), 152 deletions(-)

(limited to 'include/linux')

diff --git a/fs/block_dev.c b/fs/block_dev.c
index b3c1efff5e1d..8c3c6899ccf3 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -18,6 +18,7 @@
 #include <linux/module.h>
 #include <linux/blkpg.h>
 #include <linux/buffer_head.h>
+#include <linux/pagevec.h>
 #include <linux/writeback.h>
 #include <linux/mpage.h>
 #include <linux/mount.h>
@@ -174,6 +175,151 @@ blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
 				iov, offset, nr_segs, blkdev_get_blocks, NULL);
 }
 
+/*
+ * Write out and wait upon all the dirty data associated with a block
+ * device via its mapping.  Does not take the superblock lock.
+ */
+int sync_blockdev(struct block_device *bdev)
+{
+	int ret = 0;
+
+	if (bdev)
+		ret = filemap_write_and_wait(bdev->bd_inode->i_mapping);
+	return ret;
+}
+EXPORT_SYMBOL(sync_blockdev);
+
+/*
+ * Write out and wait upon all dirty data associated with this
+ * device.   Filesystem data as well as the underlying block
+ * device.  Takes the superblock lock.
+ */
+int fsync_bdev(struct block_device *bdev)
+{
+	struct super_block *sb = get_super(bdev);
+	if (sb) {
+		int res = fsync_super(sb);
+		drop_super(sb);
+		return res;
+	}
+	return sync_blockdev(bdev);
+}
+
+/**
+ * freeze_bdev  --  lock a filesystem and force it into a consistent state
+ * @bdev:	blockdevice to lock
+ *
+ * This takes the block device bd_mount_sem to make sure no new mounts
+ * happen on bdev until thaw_bdev() is called.
+ * If a superblock is found on this device, we take the s_umount semaphore
+ * on it to make sure nobody unmounts until the snapshot creation is done.
+ * The reference counter (bd_fsfreeze_count) guarantees that only the last
+ * unfreeze process can unfreeze the frozen filesystem actually when multiple
+ * freeze requests arrive simultaneously. It counts up in freeze_bdev() and
+ * count down in thaw_bdev(). When it becomes 0, thaw_bdev() will unfreeze
+ * actually.
+ */
+struct super_block *freeze_bdev(struct block_device *bdev)
+{
+	struct super_block *sb;
+	int error = 0;
+
+	mutex_lock(&bdev->bd_fsfreeze_mutex);
+	if (bdev->bd_fsfreeze_count > 0) {
+		bdev->bd_fsfreeze_count++;
+		sb = get_super(bdev);
+		mutex_unlock(&bdev->bd_fsfreeze_mutex);
+		return sb;
+	}
+	bdev->bd_fsfreeze_count++;
+
+	down(&bdev->bd_mount_sem);
+	sb = get_super(bdev);
+	if (sb && !(sb->s_flags & MS_RDONLY)) {
+		sb->s_frozen = SB_FREEZE_WRITE;
+		smp_wmb();
+
+		__fsync_super(sb);
+
+		sb->s_frozen = SB_FREEZE_TRANS;
+		smp_wmb();
+
+		sync_blockdev(sb->s_bdev);
+
+		if (sb->s_op->freeze_fs) {
+			error = sb->s_op->freeze_fs(sb);
+			if (error) {
+				printk(KERN_ERR
+					"VFS:Filesystem freeze failed\n");
+				sb->s_frozen = SB_UNFROZEN;
+				drop_super(sb);
+				up(&bdev->bd_mount_sem);
+				bdev->bd_fsfreeze_count--;
+				mutex_unlock(&bdev->bd_fsfreeze_mutex);
+				return ERR_PTR(error);
+			}
+		}
+	}
+
+	sync_blockdev(bdev);
+	mutex_unlock(&bdev->bd_fsfreeze_mutex);
+
+	return sb;	/* thaw_bdev releases s->s_umount and bd_mount_sem */
+}
+EXPORT_SYMBOL(freeze_bdev);
+
+/**
+ * thaw_bdev  -- unlock filesystem
+ * @bdev:	blockdevice to unlock
+ * @sb:		associated superblock
+ *
+ * Unlocks the filesystem and marks it writeable again after freeze_bdev().
+ */
+int thaw_bdev(struct block_device *bdev, struct super_block *sb)
+{
+	int error = 0;
+
+	mutex_lock(&bdev->bd_fsfreeze_mutex);
+	if (!bdev->bd_fsfreeze_count) {
+		mutex_unlock(&bdev->bd_fsfreeze_mutex);
+		return -EINVAL;
+	}
+
+	bdev->bd_fsfreeze_count--;
+	if (bdev->bd_fsfreeze_count > 0) {
+		if (sb)
+			drop_super(sb);
+		mutex_unlock(&bdev->bd_fsfreeze_mutex);
+		return 0;
+	}
+
+	if (sb) {
+		BUG_ON(sb->s_bdev != bdev);
+		if (!(sb->s_flags & MS_RDONLY)) {
+			if (sb->s_op->unfreeze_fs) {
+				error = sb->s_op->unfreeze_fs(sb);
+				if (error) {
+					printk(KERN_ERR
+						"VFS:Filesystem thaw failed\n");
+					sb->s_frozen = SB_FREEZE_TRANS;
+					bdev->bd_fsfreeze_count++;
+					mutex_unlock(&bdev->bd_fsfreeze_mutex);
+					return error;
+				}
+			}
+			sb->s_frozen = SB_UNFROZEN;
+			smp_wmb();
+			wake_up(&sb->s_wait_unfrozen);
+		}
+		drop_super(sb);
+	}
+
+	up(&bdev->bd_mount_sem);
+	mutex_unlock(&bdev->bd_fsfreeze_mutex);
+	return 0;
+}
+EXPORT_SYMBOL(thaw_bdev);
+
 static int blkdev_writepage(struct page *page, struct writeback_control *wbc)
 {
 	return block_write_full_page(page, blkdev_get_block, wbc);
diff --git a/fs/buffer.c b/fs/buffer.c
index 891e1c78e4f1..a2fd743d97cb 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -165,151 +165,6 @@ void end_buffer_write_sync(struct buffer_head *bh, int uptodate)
 	put_bh(bh);
 }
 
-/*
- * Write out and wait upon all the dirty data associated with a block
- * device via its mapping.  Does not take the superblock lock.
- */
-int sync_blockdev(struct block_device *bdev)
-{
-	int ret = 0;
-
-	if (bdev)
-		ret = filemap_write_and_wait(bdev->bd_inode->i_mapping);
-	return ret;
-}
-EXPORT_SYMBOL(sync_blockdev);
-
-/*
- * Write out and wait upon all dirty data associated with this
- * device.   Filesystem data as well as the underlying block
- * device.  Takes the superblock lock.
- */
-int fsync_bdev(struct block_device *bdev)
-{
-	struct super_block *sb = get_super(bdev);
-	if (sb) {
-		int res = fsync_super(sb);
-		drop_super(sb);
-		return res;
-	}
-	return sync_blockdev(bdev);
-}
-
-/**
- * freeze_bdev  --  lock a filesystem and force it into a consistent state
- * @bdev:	blockdevice to lock
- *
- * This takes the block device bd_mount_sem to make sure no new mounts
- * happen on bdev until thaw_bdev() is called.
- * If a superblock is found on this device, we take the s_umount semaphore
- * on it to make sure nobody unmounts until the snapshot creation is done.
- * The reference counter (bd_fsfreeze_count) guarantees that only the last
- * unfreeze process can unfreeze the frozen filesystem actually when multiple
- * freeze requests arrive simultaneously. It counts up in freeze_bdev() and
- * count down in thaw_bdev(). When it becomes 0, thaw_bdev() will unfreeze
- * actually.
- */
-struct super_block *freeze_bdev(struct block_device *bdev)
-{
-	struct super_block *sb;
-	int error = 0;
-
-	mutex_lock(&bdev->bd_fsfreeze_mutex);
-	if (bdev->bd_fsfreeze_count > 0) {
-		bdev->bd_fsfreeze_count++;
-		sb = get_super(bdev);
-		mutex_unlock(&bdev->bd_fsfreeze_mutex);
-		return sb;
-	}
-	bdev->bd_fsfreeze_count++;
-
-	down(&bdev->bd_mount_sem);
-	sb = get_super(bdev);
-	if (sb && !(sb->s_flags & MS_RDONLY)) {
-		sb->s_frozen = SB_FREEZE_WRITE;
-		smp_wmb();
-
-		__fsync_super(sb);
-
-		sb->s_frozen = SB_FREEZE_TRANS;
-		smp_wmb();
-
-		sync_blockdev(sb->s_bdev);
-
-		if (sb->s_op->freeze_fs) {
-			error = sb->s_op->freeze_fs(sb);
-			if (error) {
-				printk(KERN_ERR
-					"VFS:Filesystem freeze failed\n");
-				sb->s_frozen = SB_UNFROZEN;
-				drop_super(sb);
-				up(&bdev->bd_mount_sem);
-				bdev->bd_fsfreeze_count--;
-				mutex_unlock(&bdev->bd_fsfreeze_mutex);
-				return ERR_PTR(error);
-			}
-		}
-	}
-
-	sync_blockdev(bdev);
-	mutex_unlock(&bdev->bd_fsfreeze_mutex);
-
-	return sb;	/* thaw_bdev releases s->s_umount and bd_mount_sem */
-}
-EXPORT_SYMBOL(freeze_bdev);
-
-/**
- * thaw_bdev  -- unlock filesystem
- * @bdev:	blockdevice to unlock
- * @sb:		associated superblock
- *
- * Unlocks the filesystem and marks it writeable again after freeze_bdev().
- */
-int thaw_bdev(struct block_device *bdev, struct super_block *sb)
-{
-	int error = 0;
-
-	mutex_lock(&bdev->bd_fsfreeze_mutex);
-	if (!bdev->bd_fsfreeze_count) {
-		mutex_unlock(&bdev->bd_fsfreeze_mutex);
-		return -EINVAL;
-	}
-
-	bdev->bd_fsfreeze_count--;
-	if (bdev->bd_fsfreeze_count > 0) {
-		if (sb)
-			drop_super(sb);
-		mutex_unlock(&bdev->bd_fsfreeze_mutex);
-		return 0;
-	}
-
-	if (sb) {
-		BUG_ON(sb->s_bdev != bdev);
-		if (!(sb->s_flags & MS_RDONLY)) {
-			if (sb->s_op->unfreeze_fs) {
-				error = sb->s_op->unfreeze_fs(sb);
-				if (error) {
-					printk(KERN_ERR
-						"VFS:Filesystem thaw failed\n");
-					sb->s_frozen = SB_FREEZE_TRANS;
-					bdev->bd_fsfreeze_count++;
-					mutex_unlock(&bdev->bd_fsfreeze_mutex);
-					return error;
-				}
-			}
-			sb->s_frozen = SB_UNFROZEN;
-			smp_wmb();
-			wake_up(&sb->s_wait_unfrozen);
-		}
-		drop_super(sb);
-	}
-
-	up(&bdev->bd_mount_sem);
-	mutex_unlock(&bdev->bd_fsfreeze_mutex);
-	return 0;
-}
-EXPORT_SYMBOL(thaw_bdev);
-
 /*
  * Various filesystems appear to want __find_get_block to be non-blocking.
  * But it's the page lock which protects the buffers.  To get around this,
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index bd7ac793be19..f19fd9045ea0 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -165,15 +165,8 @@ int sync_mapping_buffers(struct address_space *mapping);
 void unmap_underlying_metadata(struct block_device *bdev, sector_t block);
 
 void mark_buffer_async_write(struct buffer_head *bh);
-void invalidate_bdev(struct block_device *);
-int sync_blockdev(struct block_device *bdev);
 void __wait_on_buffer(struct buffer_head *);
 wait_queue_head_t *bh_waitq_head(struct buffer_head *bh);
-int fsync_bdev(struct block_device *);
-struct super_block *freeze_bdev(struct block_device *);
-int thaw_bdev(struct block_device *, struct super_block *);
-int fsync_super(struct super_block *);
-int fsync_no_super(struct block_device *);
 struct buffer_head *__find_get_block(struct block_device *bdev, sector_t block,
 			unsigned size);
 struct buffer_head *__getblk(struct block_device *bdev, sector_t block,
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 5f74d616cd7d..c2c4454a268a 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1874,6 +1874,13 @@ extern void bd_set_size(struct block_device *, loff_t size);
 extern void bd_forget(struct inode *inode);
 extern void bdput(struct block_device *);
 extern struct block_device *open_by_devnum(dev_t, fmode_t);
+extern void invalidate_bdev(struct block_device *);
+extern int sync_blockdev(struct block_device *bdev);
+extern struct super_block *freeze_bdev(struct block_device *);
+extern int thaw_bdev(struct block_device *bdev, struct super_block *sb);
+extern int fsync_bdev(struct block_device *);
+extern int fsync_super(struct super_block *);
+extern int fsync_no_super(struct block_device *);
 #else
 static inline void bd_forget(struct inode *inode) {}
 #endif
-- 
cgit v1.2.3


From a3ec947c85ec339884b30ef6a08133e9311fdae1 Mon Sep 17 00:00:00 2001
From: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Date: Wed, 4 Mar 2009 12:06:34 -0800
Subject: vfs: simple_set_mnt() should return void

simple_set_mnt() is defined as returning 'int' but always returns 0.
Callers assume simple_set_mnt() never fails and don't properly cleanup if
it were to _ever_ fail.  For instance, get_sb_single() and get_sb_nodev()
should:

        up_write(sb->s_unmount);
        deactivate_super(sb);

if simple_set_mnt() fails.

Since simple_set_mnt() never fails, would be cleaner if it did not
return anything.

[akpm@linux-foundation.org: fix build]
Signed-off-by: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/mtd/mtdsuper.c | 7 +++++--
 fs/9p/vfs_super.c      | 5 +++--
 fs/cifs/cifsfs.c       | 3 ++-
 fs/devpts/inode.c      | 3 ++-
 fs/libfs.c             | 3 ++-
 fs/namespace.c         | 3 +--
 fs/proc/root.c         | 3 ++-
 fs/super.c             | 9 ++++++---
 fs/ubifs/super.c       | 3 ++-
 include/linux/fs.h     | 2 +-
 kernel/cgroup.c        | 3 ++-
 11 files changed, 28 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/mtdsuper.c b/drivers/mtd/mtdsuper.c
index 00d46e137b2a..92285d0089c2 100644
--- a/drivers/mtd/mtdsuper.c
+++ b/drivers/mtd/mtdsuper.c
@@ -81,13 +81,16 @@ static int get_sb_mtd_aux(struct file_system_type *fs_type, int flags,
 
 	/* go */
 	sb->s_flags |= MS_ACTIVE;
-	return simple_set_mnt(mnt, sb);
+	simple_set_mnt(mnt, sb);
+
+	return 0;
 
 	/* new mountpoint for an already mounted superblock */
 already_mounted:
 	DEBUG(1, "MTDSB: Device %d (\"%s\") is already mounted\n",
 	      mtd->index, mtd->name);
-	ret = simple_set_mnt(mnt, sb);
+	simple_set_mnt(mnt, sb);
+	ret = 0;
 	goto out_put;
 
 out_error:
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 93212e40221a..5f8ab8adb5f5 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -168,8 +168,9 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
 	p9stat_free(st);
 	kfree(st);
 
-P9_DPRINTK(P9_DEBUG_VFS, " return simple set mount\n");
-	return simple_set_mnt(mnt, sb);
+P9_DPRINTK(P9_DEBUG_VFS, " simple set mount, return 0\n");
+	simple_set_mnt(mnt, sb);
+	return 0;
 
 release_sb:
 	if (sb) {
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 13ea53251dcf..38491fd3871d 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -606,7 +606,8 @@ cifs_get_sb(struct file_system_type *fs_type,
 		return rc;
 	}
 	sb->s_flags |= MS_ACTIVE;
-	return simple_set_mnt(mnt, sb);
+	simple_set_mnt(mnt, sb);
+	return 0;
 }
 
 static ssize_t cifs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 140b43144cd8..b0a76340a4cd 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -454,7 +454,8 @@ static int get_init_pts_sb(struct file_system_type *fs_type, int flags,
 		s->s_flags |= MS_ACTIVE;
 	}
 	do_remount_sb(s, flags, data, 0);
-	return simple_set_mnt(mnt, s);
+	simple_set_mnt(mnt, s);
+	return 0;
 }
 
 /*
diff --git a/fs/libfs.c b/fs/libfs.c
index ec600bd33e75..4910a36f516e 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -242,7 +242,8 @@ int get_sb_pseudo(struct file_system_type *fs_type, char *name,
 	d_instantiate(dentry, root);
 	s->s_root = dentry;
 	s->s_flags |= MS_ACTIVE;
-	return simple_set_mnt(mnt, s);
+	simple_set_mnt(mnt, s);
+	return 0;
 
 Enomem:
 	up_write(&s->s_umount);
diff --git a/fs/namespace.c b/fs/namespace.c
index 06f8e63f6cb1..2432ca6bb223 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -397,11 +397,10 @@ static void __mnt_unmake_readonly(struct vfsmount *mnt)
 	spin_unlock(&vfsmount_lock);
 }
 
-int simple_set_mnt(struct vfsmount *mnt, struct super_block *sb)
+void simple_set_mnt(struct vfsmount *mnt, struct super_block *sb)
 {
 	mnt->mnt_sb = sb;
 	mnt->mnt_root = dget(sb->s_root);
-	return 0;
 }
 
 EXPORT_SYMBOL(simple_set_mnt);
diff --git a/fs/proc/root.c b/fs/proc/root.c
index f6299a25594e..1e15a2b176e8 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -83,7 +83,8 @@ static int proc_get_sb(struct file_system_type *fs_type,
 		ns->proc_mnt = mnt;
 	}
 
-	return simple_set_mnt(mnt, sb);
+	simple_set_mnt(mnt, sb);
+	return 0;
 }
 
 static void proc_kill_sb(struct super_block *sb)
diff --git a/fs/super.c b/fs/super.c
index 6ce501447ada..e512fab64c93 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -831,7 +831,8 @@ int get_sb_bdev(struct file_system_type *fs_type,
 		bdev->bd_super = s;
 	}
 
-	return simple_set_mnt(mnt, s);
+	simple_set_mnt(mnt, s);
+	return 0;
 
 error_s:
 	error = PTR_ERR(s);
@@ -877,7 +878,8 @@ int get_sb_nodev(struct file_system_type *fs_type,
 		return error;
 	}
 	s->s_flags |= MS_ACTIVE;
-	return simple_set_mnt(mnt, s);
+	simple_set_mnt(mnt, s);
+	return 0;
 }
 
 EXPORT_SYMBOL(get_sb_nodev);
@@ -909,7 +911,8 @@ int get_sb_single(struct file_system_type *fs_type,
 		s->s_flags |= MS_ACTIVE;
 	}
 	do_remount_sb(s, flags, data, 0);
-	return simple_set_mnt(mnt, s);
+	simple_set_mnt(mnt, s);
+	return 0;
 }
 
 EXPORT_SYMBOL(get_sb_single);
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 1182b66a5491..c5c98355459a 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -2034,7 +2034,8 @@ static int ubifs_get_sb(struct file_system_type *fs_type, int flags,
 	/* 'fill_super()' opens ubi again so we must close it here */
 	ubi_close_volume(ubi);
 
-	return simple_set_mnt(mnt, sb);
+	simple_set_mnt(mnt, sb);
+	return 0;
 
 out_deact:
 	up_write(&sb->s_umount);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index c2c4454a268a..a7d73914a9f7 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1719,7 +1719,7 @@ struct super_block *sget(struct file_system_type *type,
 extern int get_sb_pseudo(struct file_system_type *, char *,
 	const struct super_operations *ops, unsigned long,
 	struct vfsmount *mnt);
-extern int simple_set_mnt(struct vfsmount *mnt, struct super_block *sb);
+extern void simple_set_mnt(struct vfsmount *mnt, struct super_block *sb);
 int __put_super_and_need_restart(struct super_block *sb);
 
 /* Alas, no aliases. Too much hassle with bringing module.h everywhere */
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index b01100ebd074..c500ca7239b2 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1071,7 +1071,8 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
 		mutex_unlock(&cgroup_mutex);
 	}
 
-	return simple_set_mnt(mnt, sb);
+	simple_set_mnt(mnt, sb);
+	return 0;
 
  free_cg_links:
 	free_cg_links(&tmp_cg_links);
-- 
cgit v1.2.3


From cc0be3227df9146968311308a9d19db1469ce1db Mon Sep 17 00:00:00 2001
From: Dmitri Vorobiev <dmitri.vorobiev@movial.com>
Date: Fri, 27 Mar 2009 15:55:36 -0700
Subject: net: Add missing include into include/linux/netdevice.h

The inline function skb_gro_mac_header defined in include/linux/netdevice.h
makes use of page_address(). Depending on configuration options, the latter
is either defined as a macro or is declared as a function in another header
file, namely include/linux/mm.h. However, include/linux/netdevice.h does not
include include/linux/mm.h.

On MIPS, this has produced the following build error:

  CC      kernel/sysctl_check.o
In file included from include/linux/icmpv6.h:173,
                 from include/linux/ipv6.h:208,
                 from include/net/ip_vs.h:26,
                 from kernel/sysctl_check.c:6:
include/linux/netdevice.h: In function 'skb_gro_mac_header':
include/linux/netdevice.h:1132: error: implicit declaration of function
'page_address'
include/linux/netdevice.h:1133: warning: pointer/integer type mismatch
in conditional expression
make[1]: *** [kernel/sysctl_check.o] Error 1
make: *** [kernel] Error 2

The patch adds the missing include and fixes the build error.

Signed-off-by: Dmitri Vorobiev <dmitri.vorobiev@movial.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index be3ebd7e8ce5..1b55952a17f6 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -32,6 +32,7 @@
 #ifdef __KERNEL__
 #include <linux/timer.h>
 #include <linux/delay.h>
+#include <linux/mm.h>
 #include <asm/atomic.h>
 #include <asm/cache.h>
 #include <asm/byteorder.h>
-- 
cgit v1.2.3


From 8fdc621dc743b87879ccf0177969864b09388d9a Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Sat, 14 Mar 2009 09:34:01 +0100
Subject: nl80211: export supported commands

This makes nl80211 export the supported commands (command groups)
per wiphy so userspace has an idea what it can do -- this will be
required reading for userspace when we introduce auth/assoc /or/
connect for older hardware that cannot separate auth and assoc.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h |  6 ++++++
 net/wireless/nl80211.c  | 27 +++++++++++++++++++++++++++
 2 files changed, 33 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index f33aa08dd9b3..3700d927e245 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -349,6 +349,10 @@ enum nl80211_commands {
  * @NL80211_ATTR_REG_TYPE: indicates the type of the regulatory domain currently
  * 	set. This can be one of the nl80211_reg_type (%NL80211_REGDOM_TYPE_*)
  *
+ * @NL80211_ATTR_SUPPORTED_COMMANDS: wiphy attribute that specifies
+ *	an array of command numbers (i.e. a mapping index to command number)
+ *	that the driver for the given wiphy supports.
+ *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
  */
@@ -426,6 +430,8 @@ enum nl80211_attrs {
 	NL80211_ATTR_REG_INITIATOR,
 	NL80211_ATTR_REG_TYPE,
 
+	NL80211_ATTR_SUPPORTED_COMMANDS,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index ab9d8f14e151..58ee1b1aff89 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -131,6 +131,7 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 	struct nlattr *nl_freqs, *nl_freq;
 	struct nlattr *nl_rates, *nl_rate;
 	struct nlattr *nl_modes;
+	struct nlattr *nl_cmds;
 	enum ieee80211_band band;
 	struct ieee80211_channel *chan;
 	struct ieee80211_rate *rate;
@@ -242,6 +243,32 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 	}
 	nla_nest_end(msg, nl_bands);
 
+	nl_cmds = nla_nest_start(msg, NL80211_ATTR_SUPPORTED_COMMANDS);
+	if (!nl_cmds)
+		goto nla_put_failure;
+
+	i = 0;
+#define CMD(op, n)						\
+	 do {							\
+		if (dev->ops->op) {				\
+			i++;					\
+			NLA_PUT_U32(msg, i, NL80211_CMD_ ## n);	\
+		}						\
+	} while (0)
+
+	CMD(add_virtual_intf, NEW_INTERFACE);
+	CMD(change_virtual_intf, SET_INTERFACE);
+	CMD(add_key, NEW_KEY);
+	CMD(add_beacon, NEW_BEACON);
+	CMD(add_station, NEW_STATION);
+	CMD(add_mpath, NEW_MPATH);
+	CMD(set_mesh_params, SET_MESH_PARAMS);
+	CMD(change_bss, SET_BSS);
+	CMD(set_mgmt_extra_ie, SET_MGMT_EXTRA_IE);
+
+#undef CMD
+	nla_nest_end(msg, nl_cmds);
+
 	return genlmsg_end(msg, hdr);
 
  nla_put_failure:
-- 
cgit v1.2.3


From 3f46b29cd8caa35fcbc46e254a5abeee4e0e9e2f Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Sat, 14 Mar 2009 19:10:51 +0100
Subject: ieee80211: document DS bit usage

I keep needing this because I'm too stupid to remember it.
Everybody else can probably remember, but who knows :)

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/ieee80211.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index b1bb817d1427..382387e75b89 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -18,6 +18,22 @@
 #include <linux/types.h>
 #include <asm/byteorder.h>
 
+/*
+ * DS bit usage
+ *
+ * TA = transmitter address
+ * RA = receiver address
+ * DA = destination address
+ * SA = source address
+ *
+ * ToDS    FromDS  A1(RA)  A2(TA)  A3      A4      Use
+ * -----------------------------------------------------------------
+ *  0       0       DA      SA      BSSID   -       IBSS/DLS
+ *  0       1       DA      BSSID   SA      -       AP -> STA
+ *  1       0       BSSID   SA      DA      -       AP <- STA
+ *  1       1       RA      TA      DA      SA      unspecified (WDS)
+ */
+
 #define FCS_LEN 4
 
 #define IEEE80211_FCTL_VERS		0x0003
-- 
cgit v1.2.3


From 6039f6d23fe792d615da5449e9fa1c6b43caacf6 Mon Sep 17 00:00:00 2001
From: Jouni Malinen <jouni.malinen@atheros.com>
Date: Thu, 19 Mar 2009 13:39:21 +0200
Subject: nl80211: Event notifications for MLME events

Add new nl80211 event notifications (and a new multicast group, "mlme")
for informing user space about received and processed Authentication,
(Re)Association Response, Deauthentication, and Disassociation frames in
station and IBSS modes (i.e., MLME SAP interface primitives
MLME-AUTHENTICATE.confirm, MLME-ASSOCIATE.confirm,
MLME-REASSOCIATE.confirm, MLME-DEAUTHENTICATE.indicate, and
MLME-DISASSOCIATE.indication). The event data is encapsulated as the 802.11
management frame since we already have the frame in that format and it
includes all the needed information.

This is the initial step in providing MLME SAP interface for
authentication and association with nl80211. In other words, kernel code
will act as the MLME and a user space application can control it as the
SME.

Signed-off-by: Jouni Malinen <j@w1.fi>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h | 36 ++++++++++++++++++++++++-
 include/net/cfg80211.h  | 46 +++++++++++++++++++++++++++++++
 net/mac80211/mlme.c     |  9 +++++--
 net/wireless/Makefile   |  2 +-
 net/wireless/mlme.c     | 46 +++++++++++++++++++++++++++++++
 net/wireless/nl80211.c  | 72 +++++++++++++++++++++++++++++++++++++++++++++++++
 net/wireless/nl80211.h  | 12 +++++++++
 7 files changed, 219 insertions(+), 4 deletions(-)
 create mode 100644 net/wireless/mlme.c

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 3700d927e245..5ce68ae8314e 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -161,6 +161,25 @@
  * 	%NL80211_REG_TYPE_COUNTRY the alpha2 to which we have moved on
  * 	to (%NL80211_ATTR_REG_ALPHA2).
  *
+ * @NL80211_CMD_AUTHENTICATE: authentication notification (on the "mlme"
+ *	multicast group). This event reports reception of an Authentication
+ *	frame in station and IBSS modes when the local MLME processed the
+ *	frame, i.e., it was for the local STA and was received in correct
+ *	state. This is similar to MLME-AUTHENTICATE.confirm primitive in the
+ *	MLME SAP interface (kernel providing MLME, userspace SME). The
+ *	included NL80211_ATTR_FRAME attribute contains the management frame
+ *	(including both the header and frame body, but not FCS).
+ * @NL80211_CMD_ASSOCIATE: association notification; like
+ *	NL80211_CMD_AUTHENTICATE but for Association Response and Reassociation
+ *	Response frames (similar to MLME-ASSOCIATE.confirm or
+ *	MLME-REASSOCIATE.confirm primitives).
+ * @NL80211_CMD_DEAUTHENTICATE: deauthentication notification; like
+ *	NL80211_CMD_AUTHENTICATE but for Deauthentication frames (similar to
+ *	MLME-DEAUTHENTICATE.indication primitive).
+ * @NL80211_CMD_DISASSOCIATE: disassociation notification; like
+ *	NL80211_CMD_AUTHENTICATE but for Disassociation frames (similar to
+ *	MLME-DISASSOCIATE.indication primitive).
+ *
  * @NL80211_CMD_MAX: highest used command number
  * @__NL80211_CMD_AFTER_LAST: internal use
  */
@@ -217,6 +236,11 @@ enum nl80211_commands {
 
 	NL80211_CMD_REG_CHANGE,
 
+	NL80211_CMD_AUTHENTICATE,
+	NL80211_CMD_ASSOCIATE,
+	NL80211_CMD_DEAUTHENTICATE,
+	NL80211_CMD_DISASSOCIATE,
+
 	/* add new commands above here */
 
 	/* used to define NL80211_CMD_MAX below */
@@ -230,8 +254,11 @@ enum nl80211_commands {
  */
 #define NL80211_CMD_SET_BSS NL80211_CMD_SET_BSS
 #define NL80211_CMD_SET_MGMT_EXTRA_IE NL80211_CMD_SET_MGMT_EXTRA_IE
-
 #define NL80211_CMD_REG_CHANGE NL80211_CMD_REG_CHANGE
+#define NL80211_CMD_AUTHENTICATE NL80211_CMD_AUTHENTICATE
+#define NL80211_CMD_ASSOCIATE NL80211_CMD_ASSOCIATE
+#define NL80211_CMD_DEAUTHENTICATE NL80211_CMD_DEAUTHENTICATE
+#define NL80211_CMD_DISASSOCIATE NL80211_CMD_DISASSOCIATE
 
 /**
  * enum nl80211_attrs - nl80211 netlink attributes
@@ -353,6 +380,10 @@ enum nl80211_commands {
  *	an array of command numbers (i.e. a mapping index to command number)
  *	that the driver for the given wiphy supports.
  *
+ * @NL80211_ATTR_FRAME: frame data (binary attribute), including frame header
+ *	and body, but not FCS; used, e.g., with NL80211_CMD_AUTHENTICATE and
+ *	NL80211_CMD_ASSOCIATE events
+ *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
  */
@@ -432,6 +463,8 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_SUPPORTED_COMMANDS,
 
+	NL80211_ATTR_FRAME,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -451,6 +484,7 @@ enum nl80211_attrs {
 #define NL80211_ATTR_IE NL80211_ATTR_IE
 #define NL80211_ATTR_REG_INITIATOR NL80211_ATTR_REG_INITIATOR
 #define NL80211_ATTR_REG_TYPE NL80211_ATTR_REG_TYPE
+#define NL80211_ATTR_FRAME NL80211_ATTR_FRAME
 
 #define NL80211_MAX_SUPP_RATES			32
 #define NL80211_MAX_SUPP_REG_RULES		32
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 50f3fd9ff524..ad44016021b1 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -807,4 +807,50 @@ void cfg80211_put_bss(struct cfg80211_bss *bss);
  */
 void cfg80211_unlink_bss(struct wiphy *wiphy, struct cfg80211_bss *bss);
 
+/**
+ * cfg80211_send_rx_auth - notification of processed authentication
+ * @dev: network device
+ * @buf: authentication frame (header + body)
+ * @len: length of the frame data
+ *
+ * This function is called whenever an authentication has been processed in
+ * station mode.
+ */
+void cfg80211_send_rx_auth(struct net_device *dev, const u8 *buf, size_t len);
+
+/**
+ * cfg80211_send_rx_assoc - notification of processed association
+ * @dev: network device
+ * @buf: (re)association response frame (header + body)
+ * @len: length of the frame data
+ *
+ * This function is called whenever a (re)association response has been
+ * processed in station mode.
+ */
+void cfg80211_send_rx_assoc(struct net_device *dev, const u8 *buf, size_t len);
+
+/**
+ * cfg80211_send_rx_deauth - notification of processed deauthentication
+ * @dev: network device
+ * @buf: deauthentication frame (header + body)
+ * @len: length of the frame data
+ *
+ * This function is called whenever deauthentication has been processed in
+ * station mode.
+ */
+void cfg80211_send_rx_deauth(struct net_device *dev, const u8 *buf,
+			     size_t len);
+
+/**
+ * cfg80211_send_rx_disassoc - notification of processed disassociation
+ * @dev: network device
+ * @buf: disassociation response frame (header + body)
+ * @len: length of the frame data
+ *
+ * This function is called whenever disassociation has been processed in
+ * station mode.
+ */
+void cfg80211_send_rx_disassoc(struct net_device *dev, const u8 *buf,
+			       size_t len);
+
 #endif /* __NET_CFG80211_H */
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 1f49b63d8dd2..6dc7a61bc18b 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1085,11 +1085,13 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
 	case WLAN_AUTH_OPEN:
 	case WLAN_AUTH_LEAP:
 		ieee80211_auth_completed(sdata);
+		cfg80211_send_rx_auth(sdata->dev, (u8 *) mgmt, len);
 		break;
 	case WLAN_AUTH_SHARED_KEY:
-		if (ifmgd->auth_transaction == 4)
+		if (ifmgd->auth_transaction == 4) {
 			ieee80211_auth_completed(sdata);
-		else
+			cfg80211_send_rx_auth(sdata->dev, (u8 *) mgmt, len);
+		} else
 			ieee80211_auth_challenge(sdata, mgmt, len);
 		break;
 	}
@@ -1125,6 +1127,7 @@ static void ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata,
 
 	ieee80211_set_disassoc(sdata, true, false, 0);
 	ifmgd->flags &= ~IEEE80211_STA_AUTHENTICATED;
+	cfg80211_send_rx_deauth(sdata->dev, (u8 *) mgmt, len);
 }
 
 
@@ -1154,6 +1157,7 @@ static void ieee80211_rx_mgmt_disassoc(struct ieee80211_sub_if_data *sdata,
 	}
 
 	ieee80211_set_disassoc(sdata, false, false, reason_code);
+	cfg80211_send_rx_disassoc(sdata->dev, (u8 *) mgmt, len);
 }
 
 
@@ -1370,6 +1374,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
 	ieee80211_set_associated(sdata, changed);
 
 	ieee80211_associated(sdata);
+	cfg80211_send_rx_assoc(sdata->dev, (u8 *) mgmt, len);
 }
 
 
diff --git a/net/wireless/Makefile b/net/wireless/Makefile
index c157b4d8014b..6d1e7b27b752 100644
--- a/net/wireless/Makefile
+++ b/net/wireless/Makefile
@@ -5,7 +5,7 @@ obj-$(CONFIG_LIB80211_CRYPT_WEP) += lib80211_crypt_wep.o
 obj-$(CONFIG_LIB80211_CRYPT_CCMP) += lib80211_crypt_ccmp.o
 obj-$(CONFIG_LIB80211_CRYPT_TKIP) += lib80211_crypt_tkip.o
 
-cfg80211-y += core.o sysfs.o radiotap.o util.o reg.o scan.o nl80211.o
+cfg80211-y += core.o sysfs.o radiotap.o util.o reg.o scan.o nl80211.o mlme.o
 cfg80211-$(CONFIG_WIRELESS_EXT) += wext-compat.o
 
 ccflags-y += -D__CHECK_ENDIAN__
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
new file mode 100644
index 000000000000..bec5721b6f99
--- /dev/null
+++ b/net/wireless/mlme.c
@@ -0,0 +1,46 @@
+/*
+ * cfg80211 MLME SAP interface
+ *
+ * Copyright (c) 2009, Jouni Malinen <j@w1.fi>
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/nl80211.h>
+#include <net/cfg80211.h>
+#include "core.h"
+#include "nl80211.h"
+
+void cfg80211_send_rx_auth(struct net_device *dev, const u8 *buf, size_t len)
+{
+	struct wiphy *wiphy = dev->ieee80211_ptr->wiphy;
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+	nl80211_send_rx_auth(rdev, dev, buf, len);
+}
+EXPORT_SYMBOL(cfg80211_send_rx_auth);
+
+void cfg80211_send_rx_assoc(struct net_device *dev, const u8 *buf, size_t len)
+{
+	struct wiphy *wiphy = dev->ieee80211_ptr->wiphy;
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+	nl80211_send_rx_assoc(rdev, dev, buf, len);
+}
+EXPORT_SYMBOL(cfg80211_send_rx_assoc);
+
+void cfg80211_send_rx_deauth(struct net_device *dev, const u8 *buf, size_t len)
+{
+	struct wiphy *wiphy = dev->ieee80211_ptr->wiphy;
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+	nl80211_send_rx_deauth(rdev, dev, buf, len);
+}
+EXPORT_SYMBOL(cfg80211_send_rx_deauth);
+
+void cfg80211_send_rx_disassoc(struct net_device *dev, const u8 *buf,
+			       size_t len)
+{
+	struct wiphy *wiphy = dev->ieee80211_ptr->wiphy;
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+	nl80211_send_rx_disassoc(rdev, dev, buf, len);
+}
+EXPORT_SYMBOL(cfg80211_send_rx_disassoc);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index a3ecf8d73898..c034c2418cb3 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -2830,6 +2830,9 @@ static struct genl_ops nl80211_ops[] = {
 		.dumpit = nl80211_dump_scan,
 	},
 };
+static struct genl_multicast_group nl80211_mlme_mcgrp = {
+	.name = "mlme",
+};
 
 /* multicast groups */
 static struct genl_multicast_group nl80211_config_mcgrp = {
@@ -2975,6 +2978,71 @@ nla_put_failure:
 	nlmsg_free(msg);
 }
 
+static void nl80211_send_mlme_event(struct cfg80211_registered_device *rdev,
+				    struct net_device *netdev,
+				    const u8 *buf, size_t len,
+				    enum nl80211_commands cmd)
+{
+	struct sk_buff *msg;
+	void *hdr;
+
+	msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!msg)
+		return;
+
+	hdr = nl80211hdr_put(msg, 0, 0, 0, cmd);
+	if (!hdr) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx);
+	NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex);
+	NLA_PUT(msg, NL80211_ATTR_FRAME, len, buf);
+
+	if (genlmsg_end(msg, hdr) < 0) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	genlmsg_multicast(msg, 0, nl80211_mlme_mcgrp.id, GFP_KERNEL);
+	return;
+
+ nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	nlmsg_free(msg);
+}
+
+void nl80211_send_rx_auth(struct cfg80211_registered_device *rdev,
+			  struct net_device *netdev, const u8 *buf, size_t len)
+{
+	nl80211_send_mlme_event(rdev, netdev, buf, len,
+				NL80211_CMD_AUTHENTICATE);
+}
+
+void nl80211_send_rx_assoc(struct cfg80211_registered_device *rdev,
+			   struct net_device *netdev, const u8 *buf,
+			   size_t len)
+{
+	nl80211_send_mlme_event(rdev, netdev, buf, len, NL80211_CMD_ASSOCIATE);
+}
+
+void nl80211_send_rx_deauth(struct cfg80211_registered_device *rdev,
+			    struct net_device *netdev, const u8 *buf,
+			    size_t len)
+{
+	nl80211_send_mlme_event(rdev, netdev, buf, len,
+				NL80211_CMD_DEAUTHENTICATE);
+}
+
+void nl80211_send_rx_disassoc(struct cfg80211_registered_device *rdev,
+			      struct net_device *netdev, const u8 *buf,
+			      size_t len)
+{
+	nl80211_send_mlme_event(rdev, netdev, buf, len,
+				NL80211_CMD_DISASSOCIATE);
+}
+
 /* initialisation/exit functions */
 
 int nl80211_init(void)
@@ -3003,6 +3071,10 @@ int nl80211_init(void)
 	if (err)
 		goto err_out;
 
+	err = genl_register_mc_group(&nl80211_fam, &nl80211_mlme_mcgrp);
+	if (err)
+		goto err_out;
+
 	return 0;
  err_out:
 	genl_unregister_family(&nl80211_fam);
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index 5b5fe1339de0..b77af4ab80be 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h
@@ -11,5 +11,17 @@ extern void nl80211_send_scan_done(struct cfg80211_registered_device *rdev,
 extern void nl80211_send_scan_aborted(struct cfg80211_registered_device *rdev,
 				      struct net_device *netdev);
 extern void nl80211_send_reg_change_event(struct regulatory_request *request);
+extern void nl80211_send_rx_auth(struct cfg80211_registered_device *rdev,
+				 struct net_device *netdev,
+				 const u8 *buf, size_t len);
+extern void nl80211_send_rx_assoc(struct cfg80211_registered_device *rdev,
+				  struct net_device *netdev,
+				  const u8 *buf, size_t len);
+extern void nl80211_send_rx_deauth(struct cfg80211_registered_device *rdev,
+				   struct net_device *netdev,
+				   const u8 *buf, size_t len);
+extern void nl80211_send_rx_disassoc(struct cfg80211_registered_device *rdev,
+				     struct net_device *netdev,
+				     const u8 *buf, size_t len);
 
 #endif /* __NET_WIRELESS_NL80211_H */
-- 
cgit v1.2.3


From 636a5d3625993c5ca59abc81794b9ded93cdb740 Mon Sep 17 00:00:00 2001
From: Jouni Malinen <jouni.malinen@atheros.com>
Date: Thu, 19 Mar 2009 13:39:22 +0200
Subject: nl80211: Add MLME primitives to support external SME

This patch adds new nl80211 commands to allow user space to request
authentication and association (and also deauthentication and
disassociation). The commands are structured to allow separate
authentication and association steps, i.e., the interface between
kernel and user space is similar to the MLME SAP interface in IEEE
802.11 standard and an user space application takes the role of the
SME.

The patch introduces MLME-AUTHENTICATE.request,
MLME-{,RE}ASSOCIATE.request, MLME-DEAUTHENTICATE.request, and
MLME-DISASSOCIATE.request primitives. The authentication and
association commands request the actual operations in two steps
(assuming the driver supports this; if not, separate authentication
step is skipped; this could end up being a separate "connect"
command).

The initial implementation for mac80211 uses the current
net/mac80211/mlme.c for actual sending and processing of management
frames and the new nl80211 commands will just stop the current state
machine from moving automatically from authentication to association.
Future cleanup may move more of the MLME operations into cfg80211.

The goal of this design is to provide more control of authentication and
association process to user space without having to move the full MLME
implementation. This should be enough to allow IEEE 802.11r FT protocol
and 802.11s SAE authentication to be implemented. Obviously, this will
also bring the extra benefit of not having to use WEXT for association
requests with mac80211. An example implementation of a user space SME
using the new nl80211 commands is available for wpa_supplicant.

This patch is enough to get IEEE 802.11r FT protocol working with
over-the-air mechanism (over-the-DS will need additional MLME
primitives for handling the FT Action frames).

Signed-off-by: Jouni Malinen <j@w1.fi>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/ieee80211.h  |   1 +
 include/linux/nl80211.h    |  58 +++++++++--
 include/net/cfg80211.h     | 113 ++++++++++++++++++++
 net/mac80211/cfg.c         | 140 +++++++++++++++++++++++++
 net/mac80211/ieee80211_i.h |   7 +-
 net/mac80211/mlme.c        |  45 ++++++--
 net/mac80211/wext.c        |   3 +
 net/wireless/nl80211.c     | 255 +++++++++++++++++++++++++++++++++++++++++++++
 8 files changed, 601 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 382387e75b89..4b501b48ce86 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -867,6 +867,7 @@ struct ieee80211_ht_info {
 /* Authentication algorithms */
 #define WLAN_AUTH_OPEN 0
 #define WLAN_AUTH_SHARED_KEY 1
+#define WLAN_AUTH_FT 2
 #define WLAN_AUTH_LEAP 128
 
 #define WLAN_AUTH_CHALLENGE_LEN 128
diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 5ce68ae8314e..9685eaab40a9 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -161,24 +161,37 @@
  * 	%NL80211_REG_TYPE_COUNTRY the alpha2 to which we have moved on
  * 	to (%NL80211_ATTR_REG_ALPHA2).
  *
- * @NL80211_CMD_AUTHENTICATE: authentication notification (on the "mlme"
- *	multicast group). This event reports reception of an Authentication
+ * @NL80211_CMD_AUTHENTICATE: authentication request and notification.
+ *	This command is used both as a command (request to authenticate) and
+ *	as an event on the "mlme" multicast group indicating completion of the
+ *	authentication process.
+ *	When used as a command, %NL80211_ATTR_IFINDEX is used to identify the
+ *	interface. %NL80211_ATTR_MAC is used to specify PeerSTAAddress (and
+ *	BSSID in case of station mode). %NL80211_ATTR_SSID is used to specify
+ *	the SSID (mainly for association, but is included in authentication
+ *	request, too, to help BSS selection. %NL80211_ATTR_WIPHY_FREQ is used
+ *	to specify the frequence of the channel in MHz. %NL80211_ATTR_AUTH_TYPE
+ *	is used to specify the authentication type. %NL80211_ATTR_IE is used to
+ *	define IEs (VendorSpecificInfo, but also including RSN IE and FT IEs)
+ *	to be added to the frame.
+ *	When used as an event, this reports reception of an Authentication
  *	frame in station and IBSS modes when the local MLME processed the
  *	frame, i.e., it was for the local STA and was received in correct
  *	state. This is similar to MLME-AUTHENTICATE.confirm primitive in the
  *	MLME SAP interface (kernel providing MLME, userspace SME). The
  *	included NL80211_ATTR_FRAME attribute contains the management frame
  *	(including both the header and frame body, but not FCS).
- * @NL80211_CMD_ASSOCIATE: association notification; like
- *	NL80211_CMD_AUTHENTICATE but for Association Response and Reassociation
- *	Response frames (similar to MLME-ASSOCIATE.confirm or
- *	MLME-REASSOCIATE.confirm primitives).
- * @NL80211_CMD_DEAUTHENTICATE: deauthentication notification; like
+ * @NL80211_CMD_ASSOCIATE: association request and notification; like
+ *	NL80211_CMD_AUTHENTICATE but for Association and Reassociation
+ *	(similar to MLME-ASSOCIATE.request, MLME-REASSOCIATE.request,
+ *	MLME-ASSOCIATE.confirm or MLME-REASSOCIATE.confirm primitives).
+ * @NL80211_CMD_DEAUTHENTICATE: deauthentication request and notification; like
  *	NL80211_CMD_AUTHENTICATE but for Deauthentication frames (similar to
- *	MLME-DEAUTHENTICATE.indication primitive).
- * @NL80211_CMD_DISASSOCIATE: disassociation notification; like
+ *	MLME-DEAUTHENTICATION.request and MLME-DEAUTHENTICATE.indication
+ *	primitives).
+ * @NL80211_CMD_DISASSOCIATE: disassociation request and notification; like
  *	NL80211_CMD_AUTHENTICATE but for Disassociation frames (similar to
- *	MLME-DISASSOCIATE.indication primitive).
+ *	MLME-DISASSOCIATE.request and MLME-DISASSOCIATE.indication primitives).
  *
  * @NL80211_CMD_MAX: highest used command number
  * @__NL80211_CMD_AFTER_LAST: internal use
@@ -383,6 +396,11 @@ enum nl80211_commands {
  * @NL80211_ATTR_FRAME: frame data (binary attribute), including frame header
  *	and body, but not FCS; used, e.g., with NL80211_CMD_AUTHENTICATE and
  *	NL80211_CMD_ASSOCIATE events
+ * @NL80211_ATTR_SSID: SSID (binary attribute, 0..32 octets)
+ * @NL80211_ATTR_AUTH_TYPE: AuthenticationType, see &enum nl80211_auth_type,
+ *	represented as a u32
+ * @NL80211_ATTR_REASON_CODE: ReasonCode for %NL80211_CMD_DEAUTHENTICATE and
+ *	%NL80211_CMD_DISASSOCIATE, u16
  *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
@@ -464,6 +482,9 @@ enum nl80211_attrs {
 	NL80211_ATTR_SUPPORTED_COMMANDS,
 
 	NL80211_ATTR_FRAME,
+	NL80211_ATTR_SSID,
+	NL80211_ATTR_AUTH_TYPE,
+	NL80211_ATTR_REASON_CODE,
 
 	/* add attributes here, update the policy in nl80211.c */
 
@@ -485,6 +506,9 @@ enum nl80211_attrs {
 #define NL80211_ATTR_REG_INITIATOR NL80211_ATTR_REG_INITIATOR
 #define NL80211_ATTR_REG_TYPE NL80211_ATTR_REG_TYPE
 #define NL80211_ATTR_FRAME NL80211_ATTR_FRAME
+#define NL80211_ATTR_SSID NL80211_ATTR_SSID
+#define NL80211_ATTR_AUTH_TYPE NL80211_ATTR_AUTH_TYPE
+#define NL80211_ATTR_REASON_CODE NL80211_ATTR_REASON_CODE
 
 #define NL80211_MAX_SUPP_RATES			32
 #define NL80211_MAX_SUPP_REG_RULES		32
@@ -1018,4 +1042,18 @@ enum nl80211_bss {
 	NL80211_BSS_MAX = __NL80211_BSS_AFTER_LAST - 1
 };
 
+/**
+ * enum nl80211_auth_type - AuthenticationType
+ *
+ * @NL80211_AUTHTYPE_OPEN_SYSTEM: Open System authentication
+ * @NL80211_AUTHTYPE_SHARED_KEY: Shared Key authentication (WEP only)
+ * @NL80211_AUTHTYPE_FT: Fast BSS Transition (IEEE 802.11r)
+ * @NL80211_AUTHTYPE_NETWORK_EAP: Network EAP (some Cisco APs and mainly LEAP)
+ */
+enum nl80211_auth_type {
+	NL80211_AUTHTYPE_OPEN_SYSTEM,
+	NL80211_AUTHTYPE_SHARED_KEY,
+	NL80211_AUTHTYPE_FT,
+	NL80211_AUTHTYPE_NETWORK_EAP,
+};
 #endif /* __LINUX_NL80211_H */
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index ad44016021b1..0da9a55881a1 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -578,6 +578,105 @@ struct cfg80211_bss {
 	u8 priv[0] __attribute__((__aligned__(sizeof(void *))));
 };
 
+/**
+ * struct cfg80211_auth_request - Authentication request data
+ *
+ * This structure provides information needed to complete IEEE 802.11
+ * authentication.
+ * NOTE: This structure will likely change when more code from mac80211 is
+ * moved into cfg80211 so that non-mac80211 drivers can benefit from it, too.
+ * Before using this in a driver that does not use mac80211, it would be better
+ * to check the status of that work and better yet, volunteer to work on it.
+ *
+ * @chan: The channel to use or %NULL if not specified (auto-select based on
+ *	scan results)
+ * @peer_addr: The address of the peer STA (AP BSSID in infrastructure case);
+ *	this field is required to be present; if the driver wants to help with
+ *	BSS selection, it should use (yet to be added) MLME event to allow user
+ *	space SME to be notified of roaming candidate, so that the SME can then
+ *	use the authentication request with the recommended BSSID and whatever
+ *	other data may be needed for authentication/association
+ * @ssid: SSID or %NULL if not yet available
+ * @ssid_len: Length of ssid in octets
+ * @auth_type: Authentication type (algorithm)
+ * @ie: Extra IEs to add to Authentication frame or %NULL
+ * @ie_len: Length of ie buffer in octets
+ */
+struct cfg80211_auth_request {
+	struct ieee80211_channel *chan;
+	u8 *peer_addr;
+	const u8 *ssid;
+	size_t ssid_len;
+	enum nl80211_auth_type auth_type;
+	const u8 *ie;
+	size_t ie_len;
+};
+
+/**
+ * struct cfg80211_assoc_request - (Re)Association request data
+ *
+ * This structure provides information needed to complete IEEE 802.11
+ * (re)association.
+ * NOTE: This structure will likely change when more code from mac80211 is
+ * moved into cfg80211 so that non-mac80211 drivers can benefit from it, too.
+ * Before using this in a driver that does not use mac80211, it would be better
+ * to check the status of that work and better yet, volunteer to work on it.
+ *
+ * @chan: The channel to use or %NULL if not specified (auto-select based on
+ *	scan results)
+ * @peer_addr: The address of the peer STA (AP BSSID); this field is required
+ *	to be present and the STA must be in State 2 (authenticated) with the
+ *	peer STA
+ * @ssid: SSID
+ * @ssid_len: Length of ssid in octets
+ * @ie: Extra IEs to add to (Re)Association Request frame or %NULL
+ * @ie_len: Length of ie buffer in octets
+ */
+struct cfg80211_assoc_request {
+	struct ieee80211_channel *chan;
+	u8 *peer_addr;
+	const u8 *ssid;
+	size_t ssid_len;
+	const u8 *ie;
+	size_t ie_len;
+};
+
+/**
+ * struct cfg80211_deauth_request - Deauthentication request data
+ *
+ * This structure provides information needed to complete IEEE 802.11
+ * deauthentication.
+ *
+ * @peer_addr: The address of the peer STA (AP BSSID); this field is required
+ *	to be present and the STA must be authenticated with the peer STA
+ * @ie: Extra IEs to add to Deauthentication frame or %NULL
+ * @ie_len: Length of ie buffer in octets
+ */
+struct cfg80211_deauth_request {
+	u8 *peer_addr;
+	u16 reason_code;
+	const u8 *ie;
+	size_t ie_len;
+};
+
+/**
+ * struct cfg80211_disassoc_request - Disassociation request data
+ *
+ * This structure provides information needed to complete IEEE 802.11
+ * disassocation.
+ *
+ * @peer_addr: The address of the peer STA (AP BSSID); this field is required
+ *	to be present and the STA must be associated with the peer STA
+ * @ie: Extra IEs to add to Disassociation frame or %NULL
+ * @ie_len: Length of ie buffer in octets
+ */
+struct cfg80211_disassoc_request {
+	u8 *peer_addr;
+	u16 reason_code;
+	const u8 *ie;
+	size_t ie_len;
+};
+
 /**
  * struct cfg80211_ops - backend description for wireless configuration
  *
@@ -650,6 +749,11 @@ struct cfg80211_bss {
  *	the driver, and will be valid until passed to cfg80211_scan_done().
  *	For scan results, call cfg80211_inform_bss(); you can call this outside
  *	the scan/scan_done bracket too.
+ *
+ * @auth: Request to authenticate with the specified peer
+ * @assoc: Request to (re)associate with the specified peer
+ * @deauth: Request to deauthenticate from the specified peer
+ * @disassoc: Request to disassociate from the specified peer
  */
 struct cfg80211_ops {
 	int	(*suspend)(struct wiphy *wiphy);
@@ -730,6 +834,15 @@ struct cfg80211_ops {
 
 	int	(*scan)(struct wiphy *wiphy, struct net_device *dev,
 			struct cfg80211_scan_request *request);
+
+	int	(*auth)(struct wiphy *wiphy, struct net_device *dev,
+			struct cfg80211_auth_request *req);
+	int	(*assoc)(struct wiphy *wiphy, struct net_device *dev,
+			 struct cfg80211_assoc_request *req);
+	int	(*deauth)(struct wiphy *wiphy, struct net_device *dev,
+			  struct cfg80211_deauth_request *req);
+	int	(*disassoc)(struct wiphy *wiphy, struct net_device *dev,
+			    struct cfg80211_disassoc_request *req);
 };
 
 /* temporary wext handlers */
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 58693e52d458..223e536e8426 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1300,6 +1300,142 @@ static int ieee80211_scan(struct wiphy *wiphy,
 	return ieee80211_request_scan(sdata, req);
 }
 
+static int ieee80211_auth(struct wiphy *wiphy, struct net_device *dev,
+			  struct cfg80211_auth_request *req)
+{
+	struct ieee80211_sub_if_data *sdata;
+
+	if (!netif_running(dev))
+		return -ENETDOWN;
+
+	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+
+	if (sdata->vif.type != NL80211_IFTYPE_STATION)
+		return -EOPNOTSUPP;
+
+	switch (req->auth_type) {
+	case NL80211_AUTHTYPE_OPEN_SYSTEM:
+		sdata->u.mgd.auth_algs = IEEE80211_AUTH_ALG_OPEN;
+		break;
+	case NL80211_AUTHTYPE_SHARED_KEY:
+		sdata->u.mgd.auth_algs = IEEE80211_AUTH_ALG_SHARED_KEY;
+		break;
+	case NL80211_AUTHTYPE_FT:
+		sdata->u.mgd.auth_algs = IEEE80211_AUTH_ALG_FT;
+		break;
+	case NL80211_AUTHTYPE_NETWORK_EAP:
+		sdata->u.mgd.auth_algs = IEEE80211_AUTH_ALG_LEAP;
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	memcpy(sdata->u.mgd.bssid, req->peer_addr, ETH_ALEN);
+	sdata->u.mgd.flags &= ~IEEE80211_STA_AUTO_BSSID_SEL;
+	sdata->u.mgd.flags |= IEEE80211_STA_BSSID_SET;
+
+	/* TODO: req->chan */
+	sdata->u.mgd.flags |= IEEE80211_STA_AUTO_CHANNEL_SEL;
+
+	if (req->ssid) {
+		sdata->u.mgd.flags |= IEEE80211_STA_SSID_SET;
+		memcpy(sdata->u.mgd.ssid, req->ssid, req->ssid_len);
+		sdata->u.mgd.ssid_len = req->ssid_len;
+		sdata->u.mgd.flags &= ~IEEE80211_STA_AUTO_SSID_SEL;
+	}
+
+	kfree(sdata->u.mgd.sme_auth_ie);
+	sdata->u.mgd.sme_auth_ie = NULL;
+	sdata->u.mgd.sme_auth_ie_len = 0;
+	if (req->ie) {
+		sdata->u.mgd.sme_auth_ie = kmalloc(req->ie_len, GFP_KERNEL);
+		if (sdata->u.mgd.sme_auth_ie == NULL)
+			return -ENOMEM;
+		memcpy(sdata->u.mgd.sme_auth_ie, req->ie, req->ie_len);
+		sdata->u.mgd.sme_auth_ie_len = req->ie_len;
+	}
+
+	sdata->u.mgd.flags |= IEEE80211_STA_EXT_SME;
+	sdata->u.mgd.state = IEEE80211_STA_MLME_DIRECT_PROBE;
+	ieee80211_sta_req_auth(sdata);
+	return 0;
+}
+
+static int ieee80211_assoc(struct wiphy *wiphy, struct net_device *dev,
+			   struct cfg80211_assoc_request *req)
+{
+	struct ieee80211_sub_if_data *sdata;
+	int ret;
+
+	if (!netif_running(dev))
+		return -ENETDOWN;
+
+	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+
+	if (sdata->vif.type != NL80211_IFTYPE_STATION)
+		return -EOPNOTSUPP;
+
+	if (memcmp(sdata->u.mgd.bssid, req->peer_addr, ETH_ALEN) != 0 ||
+	    !(sdata->u.mgd.flags & IEEE80211_STA_AUTHENTICATED))
+		return -ENOLINK; /* not authenticated */
+
+	sdata->u.mgd.flags &= ~IEEE80211_STA_AUTO_BSSID_SEL;
+	sdata->u.mgd.flags |= IEEE80211_STA_BSSID_SET;
+
+	/* TODO: req->chan */
+	sdata->u.mgd.flags |= IEEE80211_STA_AUTO_CHANNEL_SEL;
+
+	if (req->ssid) {
+		sdata->u.mgd.flags |= IEEE80211_STA_SSID_SET;
+		memcpy(sdata->u.mgd.ssid, req->ssid, req->ssid_len);
+		sdata->u.mgd.ssid_len = req->ssid_len;
+		sdata->u.mgd.flags &= ~IEEE80211_STA_AUTO_SSID_SEL;
+	} else
+		sdata->u.mgd.flags |= IEEE80211_STA_AUTO_SSID_SEL;
+
+	ret = ieee80211_sta_set_extra_ie(sdata, req->ie, req->ie_len);
+	if (ret)
+		return ret;
+
+	sdata->u.mgd.flags |= IEEE80211_STA_EXT_SME;
+	sdata->u.mgd.state = IEEE80211_STA_MLME_ASSOCIATE;
+	ieee80211_sta_req_auth(sdata);
+	return 0;
+}
+
+static int ieee80211_deauth(struct wiphy *wiphy, struct net_device *dev,
+			    struct cfg80211_deauth_request *req)
+{
+	struct ieee80211_sub_if_data *sdata;
+
+	if (!netif_running(dev))
+		return -ENETDOWN;
+
+	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+	if (sdata->vif.type != NL80211_IFTYPE_STATION)
+		return -EOPNOTSUPP;
+
+	/* TODO: req->ie */
+	return ieee80211_sta_deauthenticate(sdata, req->reason_code);
+}
+
+static int ieee80211_disassoc(struct wiphy *wiphy, struct net_device *dev,
+			      struct cfg80211_disassoc_request *req)
+{
+	struct ieee80211_sub_if_data *sdata;
+
+	if (!netif_running(dev))
+		return -ENETDOWN;
+
+	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+
+	if (sdata->vif.type != NL80211_IFTYPE_STATION)
+		return -EOPNOTSUPP;
+
+	/* TODO: req->ie */
+	return ieee80211_sta_disassociate(sdata, req->reason_code);
+}
+
 struct cfg80211_ops mac80211_config_ops = {
 	.add_virtual_intf = ieee80211_add_iface,
 	.del_virtual_intf = ieee80211_del_iface,
@@ -1333,4 +1469,8 @@ struct cfg80211_ops mac80211_config_ops = {
 	.suspend = ieee80211_suspend,
 	.resume = ieee80211_resume,
 	.scan = ieee80211_scan,
+	.auth = ieee80211_auth,
+	.assoc = ieee80211_assoc,
+	.deauth = ieee80211_deauth,
+	.disassoc = ieee80211_disassoc,
 };
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index ad12c2a03a95..7b96d95f48b1 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -256,6 +256,7 @@ struct mesh_preq_queue {
 #define IEEE80211_STA_TKIP_WEP_USED	BIT(14)
 #define IEEE80211_STA_CSA_RECEIVED	BIT(15)
 #define IEEE80211_STA_MFP_ENABLED	BIT(16)
+#define IEEE80211_STA_EXT_SME		BIT(17)
 /* flags for MLME request */
 #define IEEE80211_STA_REQ_SCAN 0
 #define IEEE80211_STA_REQ_DIRECT_PROBE 1
@@ -266,6 +267,7 @@ struct mesh_preq_queue {
 #define IEEE80211_AUTH_ALG_OPEN BIT(0)
 #define IEEE80211_AUTH_ALG_SHARED_KEY BIT(1)
 #define IEEE80211_AUTH_ALG_LEAP BIT(2)
+#define IEEE80211_AUTH_ALG_FT BIT(3)
 
 struct ieee80211_if_managed {
 	struct timer_list timer;
@@ -335,6 +337,9 @@ struct ieee80211_if_managed {
 	size_t ie_deauth_len;
 	u8 *ie_disassoc;
 	size_t ie_disassoc_len;
+
+	u8 *sme_auth_ie;
+	size_t sme_auth_ie_len;
 };
 
 enum ieee80211_ibss_flags {
@@ -970,7 +975,7 @@ ieee80211_scan_rx(struct ieee80211_sub_if_data *sdata,
 		  struct sk_buff *skb,
 		  struct ieee80211_rx_status *rx_status);
 int ieee80211_sta_set_extra_ie(struct ieee80211_sub_if_data *sdata,
-			       char *ie, size_t len);
+			       const char *ie, size_t len);
 
 void ieee80211_mlme_notify_scan_completed(struct ieee80211_local *local);
 void ieee80211_scan_failed(struct ieee80211_local *local);
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 6dc7a61bc18b..d1bcc8438772 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -730,6 +730,8 @@ static void ieee80211_authenticate(struct ieee80211_sub_if_data *sdata)
 {
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
 	struct ieee80211_local *local = sdata->local;
+	u8 *ies;
+	size_t ies_len;
 
 	ifmgd->auth_tries++;
 	if (ifmgd->auth_tries > IEEE80211_AUTH_MAX_TRIES) {
@@ -755,7 +757,14 @@ static void ieee80211_authenticate(struct ieee80211_sub_if_data *sdata)
 	printk(KERN_DEBUG "%s: authenticate with AP %pM\n",
 	       sdata->dev->name, ifmgd->bssid);
 
-	ieee80211_send_auth(sdata, 1, ifmgd->auth_alg, NULL, 0,
+	if (ifmgd->flags & IEEE80211_STA_EXT_SME) {
+		ies = ifmgd->sme_auth_ie;
+		ies_len = ifmgd->sme_auth_ie_len;
+	} else {
+		ies = NULL;
+		ies_len = 0;
+	}
+	ieee80211_send_auth(sdata, 1, ifmgd->auth_alg, ies, ies_len,
 			    ifmgd->bssid, 0);
 	ifmgd->auth_transaction = 2;
 
@@ -870,7 +879,8 @@ static int ieee80211_privacy_mismatch(struct ieee80211_sub_if_data *sdata)
 	int wep_privacy;
 	int privacy_invoked;
 
-	if (!ifmgd || (ifmgd->flags & IEEE80211_STA_MIXED_CELL))
+	if (!ifmgd || (ifmgd->flags & (IEEE80211_STA_MIXED_CELL |
+				       IEEE80211_STA_EXT_SME)))
 		return 0;
 
 	bss = ieee80211_rx_bss_get(local, ifmgd->bssid,
@@ -998,7 +1008,11 @@ static void ieee80211_auth_completed(struct ieee80211_sub_if_data *sdata)
 
 	printk(KERN_DEBUG "%s: authenticated\n", sdata->dev->name);
 	ifmgd->flags |= IEEE80211_STA_AUTHENTICATED;
-	ieee80211_associate(sdata);
+	if (ifmgd->flags & IEEE80211_STA_EXT_SME) {
+		/* Wait for SME to request association */
+		ifmgd->state = IEEE80211_STA_MLME_DISABLED;
+	} else
+		ieee80211_associate(sdata);
 }
 
 
@@ -1084,6 +1098,7 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
 	switch (ifmgd->auth_alg) {
 	case WLAN_AUTH_OPEN:
 	case WLAN_AUTH_LEAP:
+	case WLAN_AUTH_FT:
 		ieee80211_auth_completed(sdata);
 		cfg80211_send_rx_auth(sdata->dev, (u8 *) mgmt, len);
 		break;
@@ -1117,9 +1132,10 @@ static void ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata,
 		printk(KERN_DEBUG "%s: deauthenticated (Reason: %u)\n",
 				sdata->dev->name, reason_code);
 
-	if (ifmgd->state == IEEE80211_STA_MLME_AUTHENTICATE ||
-	    ifmgd->state == IEEE80211_STA_MLME_ASSOCIATE ||
-	    ifmgd->state == IEEE80211_STA_MLME_ASSOCIATED) {
+	if (!(ifmgd->flags & IEEE80211_STA_EXT_SME) &&
+	    (ifmgd->state == IEEE80211_STA_MLME_AUTHENTICATE ||
+	     ifmgd->state == IEEE80211_STA_MLME_ASSOCIATE ||
+	     ifmgd->state == IEEE80211_STA_MLME_ASSOCIATED)) {
 		ifmgd->state = IEEE80211_STA_MLME_DIRECT_PROBE;
 		mod_timer(&ifmgd->timer, jiffies +
 				      IEEE80211_RETRY_AUTH_INTERVAL);
@@ -1150,7 +1166,8 @@ static void ieee80211_rx_mgmt_disassoc(struct ieee80211_sub_if_data *sdata,
 		printk(KERN_DEBUG "%s: disassociated (Reason: %u)\n",
 				sdata->dev->name, reason_code);
 
-	if (ifmgd->state == IEEE80211_STA_MLME_ASSOCIATED) {
+	if (!(ifmgd->flags & IEEE80211_STA_EXT_SME) &&
+	    ifmgd->state == IEEE80211_STA_MLME_ASSOCIATED) {
 		ifmgd->state = IEEE80211_STA_MLME_ASSOCIATE;
 		mod_timer(&ifmgd->timer, jiffies +
 				      IEEE80211_RETRY_AUTH_INTERVAL);
@@ -1664,6 +1681,8 @@ static void ieee80211_sta_reset_auth(struct ieee80211_sub_if_data *sdata)
 		ifmgd->auth_alg = WLAN_AUTH_SHARED_KEY;
 	else if (ifmgd->auth_algs & IEEE80211_AUTH_ALG_LEAP)
 		ifmgd->auth_alg = WLAN_AUTH_LEAP;
+	else if (ifmgd->auth_algs & IEEE80211_AUTH_ALG_FT)
+		ifmgd->auth_alg = WLAN_AUTH_FT;
 	else
 		ifmgd->auth_alg = WLAN_AUTH_OPEN;
 	ifmgd->auth_transaction = -1;
@@ -1687,7 +1706,8 @@ static int ieee80211_sta_config_auth(struct ieee80211_sub_if_data *sdata)
 	u16 capa_val = WLAN_CAPABILITY_ESS;
 	struct ieee80211_channel *chan = local->oper_channel;
 
-	if (ifmgd->flags & (IEEE80211_STA_AUTO_SSID_SEL |
+	if (!(ifmgd->flags & IEEE80211_STA_EXT_SME) &&
+	    ifmgd->flags & (IEEE80211_STA_AUTO_SSID_SEL |
 			    IEEE80211_STA_AUTO_BSSID_SEL |
 			    IEEE80211_STA_AUTO_CHANNEL_SEL)) {
 		capa_mask |= WLAN_CAPABILITY_PRIVACY;
@@ -1884,7 +1904,11 @@ void ieee80211_sta_req_auth(struct ieee80211_sub_if_data *sdata)
 			ieee80211_set_disassoc(sdata, true, true,
 					       WLAN_REASON_DEAUTH_LEAVING);
 
-		set_bit(IEEE80211_STA_REQ_AUTH, &ifmgd->request);
+		if (!(ifmgd->flags & IEEE80211_STA_EXT_SME) ||
+		    ifmgd->state != IEEE80211_STA_MLME_ASSOCIATE)
+			set_bit(IEEE80211_STA_REQ_AUTH, &ifmgd->request);
+		else if (ifmgd->flags & IEEE80211_STA_EXT_SME)
+			set_bit(IEEE80211_STA_REQ_RUN, &ifmgd->request);
 		queue_work(local->hw.workqueue, &ifmgd->work);
 	}
 }
@@ -1953,7 +1977,8 @@ int ieee80211_sta_set_bssid(struct ieee80211_sub_if_data *sdata, u8 *bssid)
 	return ieee80211_sta_commit(sdata);
 }
 
-int ieee80211_sta_set_extra_ie(struct ieee80211_sub_if_data *sdata, char *ie, size_t len)
+int ieee80211_sta_set_extra_ie(struct ieee80211_sub_if_data *sdata,
+			       const char *ie, size_t len)
 {
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
 
diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c
index e55d2834764c..ce21d66b1023 100644
--- a/net/mac80211/wext.c
+++ b/net/mac80211/wext.c
@@ -137,6 +137,7 @@ static int ieee80211_ioctl_siwgenie(struct net_device *dev,
 		if (ret)
 			return ret;
 		sdata->u.mgd.flags &= ~IEEE80211_STA_AUTO_BSSID_SEL;
+		sdata->u.mgd.flags &= ~IEEE80211_STA_EXT_SME;
 		ieee80211_sta_req_auth(sdata);
 		return 0;
 	}
@@ -224,6 +225,7 @@ static int ieee80211_ioctl_siwessid(struct net_device *dev,
 		if (ret)
 			return ret;
 
+		sdata->u.mgd.flags &= ~IEEE80211_STA_EXT_SME;
 		ieee80211_sta_req_auth(sdata);
 		return 0;
 	} else if (sdata->vif.type == NL80211_IFTYPE_ADHOC)
@@ -287,6 +289,7 @@ static int ieee80211_ioctl_siwap(struct net_device *dev,
 		ret = ieee80211_sta_set_bssid(sdata, (u8 *) &ap_addr->sa_data);
 		if (ret)
 			return ret;
+		sdata->u.mgd.flags &= ~IEEE80211_STA_EXT_SME;
 		ieee80211_sta_req_auth(sdata);
 		return 0;
 	} else if (sdata->vif.type == NL80211_IFTYPE_ADHOC) {
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index c034c2418cb3..9e1318d1d4bb 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -111,6 +111,11 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = {
 			      .len = IEEE80211_MAX_DATA_LEN },
 	[NL80211_ATTR_SCAN_FREQUENCIES] = { .type = NLA_NESTED },
 	[NL80211_ATTR_SCAN_SSIDS] = { .type = NLA_NESTED },
+
+	[NL80211_ATTR_SSID] = { .type = NLA_BINARY,
+				.len = IEEE80211_MAX_SSID_LEN },
+	[NL80211_ATTR_AUTH_TYPE] = { .type = NLA_U32 },
+	[NL80211_ATTR_REASON_CODE] = { .type = NLA_U16 },
 };
 
 /* message building helper */
@@ -265,6 +270,10 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 	CMD(set_mesh_params, SET_MESH_PARAMS);
 	CMD(change_bss, SET_BSS);
 	CMD(set_mgmt_extra_ie, SET_MGMT_EXTRA_IE);
+	CMD(auth, AUTHENTICATE);
+	CMD(assoc, ASSOCIATE);
+	CMD(deauth, DEAUTHENTICATE);
+	CMD(disassoc, DISASSOCIATE);
 
 #undef CMD
 	nla_nest_end(msg, nl_cmds);
@@ -2646,6 +2655,228 @@ static int nl80211_dump_scan(struct sk_buff *skb,
 	return err;
 }
 
+static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg80211_registered_device *drv;
+	struct net_device *dev;
+	struct cfg80211_auth_request req;
+	struct wiphy *wiphy;
+	int err;
+
+	rtnl_lock();
+
+	err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev);
+	if (err)
+		goto unlock_rtnl;
+
+	if (!drv->ops->auth) {
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+
+	if (!info->attrs[NL80211_ATTR_MAC]) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	wiphy = &drv->wiphy;
+	memset(&req, 0, sizeof(req));
+
+	req.peer_addr = nla_data(info->attrs[NL80211_ATTR_MAC]);
+
+	if (info->attrs[NL80211_ATTR_WIPHY_FREQ]) {
+		req.chan = ieee80211_get_channel(
+			wiphy,
+			nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]));
+		if (!req.chan) {
+			err = -EINVAL;
+			goto out;
+		}
+	}
+
+	if (info->attrs[NL80211_ATTR_SSID]) {
+		req.ssid = nla_data(info->attrs[NL80211_ATTR_SSID]);
+		req.ssid_len = nla_len(info->attrs[NL80211_ATTR_SSID]);
+	}
+
+	if (info->attrs[NL80211_ATTR_IE]) {
+		req.ie = nla_data(info->attrs[NL80211_ATTR_IE]);
+		req.ie_len = nla_len(info->attrs[NL80211_ATTR_IE]);
+	}
+
+	if (info->attrs[NL80211_ATTR_AUTH_TYPE]) {
+		req.auth_type =
+			nla_get_u32(info->attrs[NL80211_ATTR_AUTH_TYPE]);
+	}
+
+	err = drv->ops->auth(&drv->wiphy, dev, &req);
+
+out:
+	cfg80211_put_dev(drv);
+	dev_put(dev);
+unlock_rtnl:
+	rtnl_unlock();
+	return err;
+}
+
+static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg80211_registered_device *drv;
+	struct net_device *dev;
+	struct cfg80211_assoc_request req;
+	struct wiphy *wiphy;
+	int err;
+
+	rtnl_lock();
+
+	err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev);
+	if (err)
+		goto unlock_rtnl;
+
+	if (!drv->ops->assoc) {
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+
+	if (!info->attrs[NL80211_ATTR_MAC] ||
+	    !info->attrs[NL80211_ATTR_SSID]) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	wiphy = &drv->wiphy;
+	memset(&req, 0, sizeof(req));
+
+	req.peer_addr = nla_data(info->attrs[NL80211_ATTR_MAC]);
+
+	if (info->attrs[NL80211_ATTR_WIPHY_FREQ]) {
+		req.chan = ieee80211_get_channel(
+			wiphy,
+			nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]));
+		if (!req.chan) {
+			err = -EINVAL;
+			goto out;
+		}
+	}
+
+	if (nla_len(info->attrs[NL80211_ATTR_SSID]) > IEEE80211_MAX_SSID_LEN) {
+		err = -EINVAL;
+		goto out;
+	}
+	req.ssid = nla_data(info->attrs[NL80211_ATTR_SSID]);
+	req.ssid_len = nla_len(info->attrs[NL80211_ATTR_SSID]);
+
+	if (info->attrs[NL80211_ATTR_IE]) {
+		req.ie = nla_data(info->attrs[NL80211_ATTR_IE]);
+		req.ie_len = nla_len(info->attrs[NL80211_ATTR_IE]);
+	}
+
+	err = drv->ops->assoc(&drv->wiphy, dev, &req);
+
+out:
+	cfg80211_put_dev(drv);
+	dev_put(dev);
+unlock_rtnl:
+	rtnl_unlock();
+	return err;
+}
+
+static int nl80211_deauthenticate(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg80211_registered_device *drv;
+	struct net_device *dev;
+	struct cfg80211_deauth_request req;
+	struct wiphy *wiphy;
+	int err;
+
+	rtnl_lock();
+
+	err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev);
+	if (err)
+		goto unlock_rtnl;
+
+	if (!drv->ops->deauth) {
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+
+	if (!info->attrs[NL80211_ATTR_MAC]) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	wiphy = &drv->wiphy;
+	memset(&req, 0, sizeof(req));
+
+	req.peer_addr = nla_data(info->attrs[NL80211_ATTR_MAC]);
+
+	if (info->attrs[NL80211_ATTR_REASON_CODE])
+		req.reason_code =
+			nla_get_u16(info->attrs[NL80211_ATTR_REASON_CODE]);
+
+	if (info->attrs[NL80211_ATTR_IE]) {
+		req.ie = nla_data(info->attrs[NL80211_ATTR_IE]);
+		req.ie_len = nla_len(info->attrs[NL80211_ATTR_IE]);
+	}
+
+	err = drv->ops->deauth(&drv->wiphy, dev, &req);
+
+out:
+	cfg80211_put_dev(drv);
+	dev_put(dev);
+unlock_rtnl:
+	rtnl_unlock();
+	return err;
+}
+
+static int nl80211_disassociate(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg80211_registered_device *drv;
+	struct net_device *dev;
+	struct cfg80211_disassoc_request req;
+	struct wiphy *wiphy;
+	int err;
+
+	rtnl_lock();
+
+	err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev);
+	if (err)
+		goto unlock_rtnl;
+
+	if (!drv->ops->disassoc) {
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+
+	if (!info->attrs[NL80211_ATTR_MAC]) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	wiphy = &drv->wiphy;
+	memset(&req, 0, sizeof(req));
+
+	req.peer_addr = nla_data(info->attrs[NL80211_ATTR_MAC]);
+
+	if (info->attrs[NL80211_ATTR_REASON_CODE])
+		req.reason_code =
+			nla_get_u16(info->attrs[NL80211_ATTR_REASON_CODE]);
+
+	if (info->attrs[NL80211_ATTR_IE]) {
+		req.ie = nla_data(info->attrs[NL80211_ATTR_IE]);
+		req.ie_len = nla_len(info->attrs[NL80211_ATTR_IE]);
+	}
+
+	err = drv->ops->disassoc(&drv->wiphy, dev, &req);
+
+out:
+	cfg80211_put_dev(drv);
+	dev_put(dev);
+unlock_rtnl:
+	rtnl_unlock();
+	return err;
+}
+
 static struct genl_ops nl80211_ops[] = {
 	{
 		.cmd = NL80211_CMD_GET_WIPHY,
@@ -2829,6 +3060,30 @@ static struct genl_ops nl80211_ops[] = {
 		.policy = nl80211_policy,
 		.dumpit = nl80211_dump_scan,
 	},
+	{
+		.cmd = NL80211_CMD_AUTHENTICATE,
+		.doit = nl80211_authenticate,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
+	{
+		.cmd = NL80211_CMD_ASSOCIATE,
+		.doit = nl80211_associate,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
+	{
+		.cmd = NL80211_CMD_DEAUTHENTICATE,
+		.doit = nl80211_deauthenticate,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
+	{
+		.cmd = NL80211_CMD_DISASSOCIATE,
+		.doit = nl80211_disassociate,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
 };
 static struct genl_multicast_group nl80211_mlme_mcgrp = {
 	.name = "mlme",
-- 
cgit v1.2.3


From 65fc73ac4a310945dfeceac961726c2765ad2ec0 Mon Sep 17 00:00:00 2001
From: Jouni Malinen <jouni.malinen@atheros.com>
Date: Fri, 20 Mar 2009 21:21:16 +0200
Subject: nl80211: Remove NL80211_CMD_SET_MGMT_EXTRA_IE

The functionality that NL80211_CMD_SET_MGMT_EXTRA_IE provided can now
be achieved with cleaner design by adding IE(s) into
NL80211_CMD_TRIGGER_SCAN, NL80211_CMD_AUTHENTICATE,
NL80211_CMD_ASSOCIATE, NL80211_CMD_DEAUTHENTICATE, and
NL80211_CMD_DISASSOCIATE.

Since this is a very recently added command and there are no known (or
known planned) applications using NL80211_CMD_SET_MGMT_EXTRA_IE and
taken into account how much extra complexity it adds to the IE
processing we have now (and need to add in the future to fix IE order
in couple of frames), it looks like the best option is to just remove
the implementation of this command for now. The enum values themselves
are left to avoid changing the nl80211 command or attribute numbers.

Signed-off-by: Jouni Malinen <jouni.malinen@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h    |  8 ++++-
 include/net/cfg80211.h     | 26 --------------
 net/mac80211/cfg.c         | 86 ----------------------------------------------
 net/mac80211/ieee80211_i.h | 15 --------
 net/mac80211/iface.c       |  7 ----
 net/mac80211/mlme.c        | 36 ++-----------------
 net/mac80211/util.c        | 29 +++-------------
 net/wireless/nl80211.c     | 47 -------------------------
 8 files changed, 14 insertions(+), 240 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 9685eaab40a9..cbe8ce3bf486 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -142,6 +142,12 @@
  *	%NL80211_ATTR_IE. If the command succeeds, the requested data will be
  *	added to all specified management frames generated by
  *	kernel/firmware/driver.
+ *	Note: This command has been removed and it is only reserved at this
+ *	point to avoid re-using existing command number. The functionality this
+ *	command was planned for has been provided with cleaner design with the
+ *	option to specify additional IEs in NL80211_CMD_TRIGGER_SCAN,
+ *	NL80211_CMD_AUTHENTICATE, NL80211_CMD_ASSOCIATE,
+ *	NL80211_CMD_DEAUTHENTICATE, and NL80211_CMD_DISASSOCIATE.
  *
  * @NL80211_CMD_GET_SCAN: get scan results
  * @NL80211_CMD_TRIGGER_SCAN: trigger a new scan with the given parameters
@@ -238,7 +244,7 @@ enum nl80211_commands {
 	NL80211_CMD_GET_MESH_PARAMS,
 	NL80211_CMD_SET_MESH_PARAMS,
 
-	NL80211_CMD_SET_MGMT_EXTRA_IE,
+	NL80211_CMD_SET_MGMT_EXTRA_IE /* reserved; not used */,
 
 	NL80211_CMD_GET_REG,
 
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 0da9a55881a1..dca4a6b0461b 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -471,26 +471,6 @@ struct ieee80211_txq_params {
 	u8 aifs;
 };
 
-/**
- * struct mgmt_extra_ie_params - Extra management frame IE parameters
- *
- * Used to add extra IE(s) into management frames. If the driver cannot add the
- * requested data into all management frames of the specified subtype that are
- * generated in kernel or firmware/hardware, it must reject the configuration
- * call. The IE data buffer is added to the end of the specified management
- * frame body after all other IEs. This addition is not applied to frames that
- * are injected through a monitor interface.
- *
- * @subtype: Management frame subtype
- * @ies: IE data buffer or %NULL to remove previous data
- * @ies_len: Length of @ies in octets
- */
-struct mgmt_extra_ie_params {
-	u8 subtype;
-	u8 *ies;
-	int ies_len;
-};
-
 /* from net/wireless.h */
 struct wiphy;
 
@@ -743,8 +723,6 @@ struct cfg80211_disassoc_request {
  *
  * @set_channel: Set channel
  *
- * @set_mgmt_extra_ie: Set extra IE data for management frames
- *
  * @scan: Request to do a scan. If returning zero, the scan request is given
  *	the driver, and will be valid until passed to cfg80211_scan_done().
  *	For scan results, call cfg80211_inform_bss(); you can call this outside
@@ -828,10 +806,6 @@ struct cfg80211_ops {
 			       struct ieee80211_channel *chan,
 			       enum nl80211_channel_type channel_type);
 
-	int	(*set_mgmt_extra_ie)(struct wiphy *wiphy,
-				     struct net_device *dev,
-				     struct mgmt_extra_ie_params *params);
-
 	int	(*scan)(struct wiphy *wiphy, struct net_device *dev,
 			struct cfg80211_scan_request *request);
 
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 223e536e8426..f5c15c9a00ce 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1181,91 +1181,6 @@ static int ieee80211_set_channel(struct wiphy *wiphy,
 	return ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL);
 }
 
-static int set_mgmt_extra_ie_sta(struct ieee80211_sub_if_data *sdata,
-				 u8 subtype, u8 *ies, size_t ies_len)
-{
-	struct ieee80211_local *local = sdata->local;
-	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
-
-	switch (subtype) {
-	case IEEE80211_STYPE_PROBE_REQ >> 4:
-		if (local->ops->hw_scan)
-			break;
-		kfree(ifmgd->ie_probereq);
-		ifmgd->ie_probereq = ies;
-		ifmgd->ie_probereq_len = ies_len;
-		return 0;
-	case IEEE80211_STYPE_PROBE_RESP >> 4:
-		kfree(ifmgd->ie_proberesp);
-		ifmgd->ie_proberesp = ies;
-		ifmgd->ie_proberesp_len = ies_len;
-		return 0;
-	case IEEE80211_STYPE_AUTH >> 4:
-		kfree(ifmgd->ie_auth);
-		ifmgd->ie_auth = ies;
-		ifmgd->ie_auth_len = ies_len;
-		return 0;
-	case IEEE80211_STYPE_ASSOC_REQ >> 4:
-		kfree(ifmgd->ie_assocreq);
-		ifmgd->ie_assocreq = ies;
-		ifmgd->ie_assocreq_len = ies_len;
-		return 0;
-	case IEEE80211_STYPE_REASSOC_REQ >> 4:
-		kfree(ifmgd->ie_reassocreq);
-		ifmgd->ie_reassocreq = ies;
-		ifmgd->ie_reassocreq_len = ies_len;
-		return 0;
-	case IEEE80211_STYPE_DEAUTH >> 4:
-		kfree(ifmgd->ie_deauth);
-		ifmgd->ie_deauth = ies;
-		ifmgd->ie_deauth_len = ies_len;
-		return 0;
-	case IEEE80211_STYPE_DISASSOC >> 4:
-		kfree(ifmgd->ie_disassoc);
-		ifmgd->ie_disassoc = ies;
-		ifmgd->ie_disassoc_len = ies_len;
-		return 0;
-	}
-
-	return -EOPNOTSUPP;
-}
-
-static int ieee80211_set_mgmt_extra_ie(struct wiphy *wiphy,
-				       struct net_device *dev,
-				       struct mgmt_extra_ie_params *params)
-{
-	struct ieee80211_sub_if_data *sdata;
-	u8 *ies;
-	size_t ies_len;
-	int ret = -EOPNOTSUPP;
-
-	if (params->ies) {
-		ies = kmemdup(params->ies, params->ies_len, GFP_KERNEL);
-		if (ies == NULL)
-			return -ENOMEM;
-		ies_len = params->ies_len;
-	} else {
-		ies = NULL;
-		ies_len = 0;
-	}
-
-	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
-
-	switch (sdata->vif.type) {
-	case NL80211_IFTYPE_STATION:
-		ret = set_mgmt_extra_ie_sta(sdata, params->subtype,
-					    ies, ies_len);
-		break;
-	default:
-		ret = -EOPNOTSUPP;
-		break;
-	}
-
-	if (ret)
-		kfree(ies);
-	return ret;
-}
-
 #ifdef CONFIG_PM
 static int ieee80211_suspend(struct wiphy *wiphy)
 {
@@ -1465,7 +1380,6 @@ struct cfg80211_ops mac80211_config_ops = {
 	.change_bss = ieee80211_change_bss,
 	.set_txq_params = ieee80211_set_txq_params,
 	.set_channel = ieee80211_set_channel,
-	.set_mgmt_extra_ie = ieee80211_set_mgmt_extra_ie,
 	.suspend = ieee80211_suspend,
 	.resume = ieee80211_resume,
 	.scan = ieee80211_scan,
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 547cfac218ee..f69e84ab9617 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -323,21 +323,6 @@ struct ieee80211_if_managed {
 	int wmm_last_param_set;
 
 	/* Extra IE data for management frames */
-	u8 *ie_probereq;
-	size_t ie_probereq_len;
-	u8 *ie_proberesp;
-	size_t ie_proberesp_len;
-	u8 *ie_auth;
-	size_t ie_auth_len;
-	u8 *ie_assocreq;
-	size_t ie_assocreq_len;
-	u8 *ie_reassocreq;
-	size_t ie_reassocreq_len;
-	u8 *ie_deauth;
-	size_t ie_deauth_len;
-	u8 *ie_disassoc;
-	size_t ie_disassoc_len;
-
 	u8 *sme_auth_ie;
 	size_t sme_auth_ie_len;
 };
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 6b56dc2208e7..34f4798a98f7 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -653,13 +653,6 @@ static void ieee80211_teardown_sdata(struct net_device *dev)
 		kfree(sdata->u.mgd.extra_ie);
 		kfree(sdata->u.mgd.assocreq_ies);
 		kfree(sdata->u.mgd.assocresp_ies);
-		kfree(sdata->u.mgd.ie_probereq);
-		kfree(sdata->u.mgd.ie_proberesp);
-		kfree(sdata->u.mgd.ie_auth);
-		kfree(sdata->u.mgd.ie_assocreq);
-		kfree(sdata->u.mgd.ie_reassocreq);
-		kfree(sdata->u.mgd.ie_deauth);
-		kfree(sdata->u.mgd.ie_disassoc);
 		kfree(sdata->u.mgd.sme_auth_ie);
 		break;
 	case NL80211_IFTYPE_WDS:
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index d1bcc8438772..b0808efcedf6 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -82,38 +82,23 @@ static int ieee80211_compatible_rates(struct ieee80211_bss *bss,
 
 /* frame sending functions */
 
-static void add_extra_ies(struct sk_buff *skb, u8 *ies, size_t ies_len)
-{
-	if (ies)
-		memcpy(skb_put(skb, ies_len), ies, ies_len);
-}
-
 static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
 {
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
 	struct ieee80211_local *local = sdata->local;
 	struct sk_buff *skb;
 	struct ieee80211_mgmt *mgmt;
-	u8 *pos, *ies, *ht_ie, *e_ies;
+	u8 *pos, *ies, *ht_ie;
 	int i, len, count, rates_len, supp_rates_len;
 	u16 capab;
 	struct ieee80211_bss *bss;
 	int wmm = 0;
 	struct ieee80211_supported_band *sband;
 	u32 rates = 0;
-	size_t e_ies_len;
-
-	if (ifmgd->flags & IEEE80211_STA_PREV_BSSID_SET) {
-		e_ies = sdata->u.mgd.ie_reassocreq;
-		e_ies_len = sdata->u.mgd.ie_reassocreq_len;
-	} else {
-		e_ies = sdata->u.mgd.ie_assocreq;
-		e_ies_len = sdata->u.mgd.ie_assocreq_len;
-	}
 
 	skb = dev_alloc_skb(local->hw.extra_tx_headroom +
 			    sizeof(*mgmt) + 200 + ifmgd->extra_ie_len +
-			    ifmgd->ssid_len + e_ies_len);
+			    ifmgd->ssid_len);
 	if (!skb) {
 		printk(KERN_DEBUG "%s: failed to allocate buffer for assoc "
 		       "frame\n", sdata->dev->name);
@@ -304,8 +289,6 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
 		memcpy(pos, &sband->ht_cap.mcs, sizeof(sband->ht_cap.mcs));
 	}
 
-	add_extra_ies(skb, e_ies, e_ies_len);
-
 	kfree(ifmgd->assocreq_ies);
 	ifmgd->assocreq_ies_len = (skb->data + skb->len) - ies;
 	ifmgd->assocreq_ies = kmalloc(ifmgd->assocreq_ies_len, GFP_KERNEL);
@@ -323,19 +306,8 @@ static void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata,
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
 	struct sk_buff *skb;
 	struct ieee80211_mgmt *mgmt;
-	u8 *ies;
-	size_t ies_len;
 
-	if (stype == IEEE80211_STYPE_DEAUTH) {
-		ies = sdata->u.mgd.ie_deauth;
-		ies_len = sdata->u.mgd.ie_deauth_len;
-	} else {
-		ies = sdata->u.mgd.ie_disassoc;
-		ies_len = sdata->u.mgd.ie_disassoc_len;
-	}
-
-	skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*mgmt) +
-			    ies_len);
+	skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*mgmt));
 	if (!skb) {
 		printk(KERN_DEBUG "%s: failed to allocate buffer for "
 		       "deauth/disassoc frame\n", sdata->dev->name);
@@ -353,8 +325,6 @@ static void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata,
 	/* u.deauth.reason_code == u.disassoc.reason_code */
 	mgmt->u.deauth.reason_code = cpu_to_le16(reason);
 
-	add_extra_ies(skb, ies, ies_len);
-
 	ieee80211_tx_skb(sdata, skb, ifmgd->flags & IEEE80211_STA_MFP_ENABLED);
 }
 
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index e0431a1d218b..444bb14c95e1 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -846,16 +846,9 @@ void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata,
 	struct ieee80211_local *local = sdata->local;
 	struct sk_buff *skb;
 	struct ieee80211_mgmt *mgmt;
-	const u8 *ie_auth = NULL;
-	int ie_auth_len = 0;
-
-	if (sdata->vif.type == NL80211_IFTYPE_STATION) {
-		ie_auth_len = sdata->u.mgd.ie_auth_len;
-		ie_auth = sdata->u.mgd.ie_auth;
-	}
 
 	skb = dev_alloc_skb(local->hw.extra_tx_headroom +
-			    sizeof(*mgmt) + 6 + extra_len + ie_auth_len);
+			    sizeof(*mgmt) + 6 + extra_len);
 	if (!skb) {
 		printk(KERN_DEBUG "%s: failed to allocate buffer for auth "
 		       "frame\n", sdata->dev->name);
@@ -877,8 +870,6 @@ void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata,
 	mgmt->u.auth.status_code = cpu_to_le16(0);
 	if (extra)
 		memcpy(skb_put(skb, extra_len), extra, extra_len);
-	if (ie_auth)
-		memcpy(skb_put(skb, ie_auth_len), ie_auth, ie_auth_len);
 
 	ieee80211_tx_skb(sdata, skb, encrypt);
 }
@@ -891,20 +882,11 @@ void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst,
 	struct ieee80211_supported_band *sband;
 	struct sk_buff *skb;
 	struct ieee80211_mgmt *mgmt;
-	u8 *pos, *supp_rates, *esupp_rates = NULL, *extra_preq_ie = NULL;
-	int i, extra_preq_ie_len = 0;
-
-	switch (sdata->vif.type) {
-	case NL80211_IFTYPE_STATION:
-		extra_preq_ie_len = sdata->u.mgd.ie_probereq_len;
-		extra_preq_ie = sdata->u.mgd.ie_probereq;
-		break;
-	default:
-		break;
-	}
+	u8 *pos, *supp_rates, *esupp_rates = NULL;
+	int i;
 
 	skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*mgmt) + 200 +
-			    ie_len + extra_preq_ie_len);
+			    ie_len);
 	if (!skb) {
 		printk(KERN_DEBUG "%s: failed to allocate buffer for probe "
 		       "request\n", sdata->dev->name);
@@ -953,9 +935,6 @@ void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst,
 
 	if (ie)
 		memcpy(skb_put(skb, ie_len), ie, ie_len);
-	if (extra_preq_ie)
-		memcpy(skb_put(skb, extra_preq_ie_len), extra_preq_ie,
-		       extra_preq_ie_len);
 
 	ieee80211_tx_skb(sdata, skb, 0);
 }
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 9e1318d1d4bb..44c79972be57 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -269,7 +269,6 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 	CMD(add_mpath, NEW_MPATH);
 	CMD(set_mesh_params, SET_MESH_PARAMS);
 	CMD(change_bss, SET_BSS);
-	CMD(set_mgmt_extra_ie, SET_MGMT_EXTRA_IE);
 	CMD(auth, AUTHENTICATE);
 	CMD(assoc, ASSOCIATE);
 	CMD(deauth, DEAUTHENTICATE);
@@ -2355,46 +2354,6 @@ static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info)
 	return -EINVAL;
 }
 
-static int nl80211_set_mgmt_extra_ie(struct sk_buff *skb,
-				     struct genl_info *info)
-{
-	struct cfg80211_registered_device *drv;
-	int err;
-	struct net_device *dev;
-	struct mgmt_extra_ie_params params;
-
-	memset(&params, 0, sizeof(params));
-
-	if (!info->attrs[NL80211_ATTR_MGMT_SUBTYPE])
-		return -EINVAL;
-	params.subtype = nla_get_u8(info->attrs[NL80211_ATTR_MGMT_SUBTYPE]);
-	if (params.subtype > 15)
-		return -EINVAL; /* FC Subtype field is 4 bits (0..15) */
-
-	if (info->attrs[NL80211_ATTR_IE]) {
-		params.ies = nla_data(info->attrs[NL80211_ATTR_IE]);
-		params.ies_len = nla_len(info->attrs[NL80211_ATTR_IE]);
-	}
-
-	rtnl_lock();
-
-	err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev);
-	if (err)
-		goto out_rtnl;
-
-	if (drv->ops->set_mgmt_extra_ie)
-		err = drv->ops->set_mgmt_extra_ie(&drv->wiphy, dev, &params);
-	else
-		err = -EOPNOTSUPP;
-
-	cfg80211_put_dev(drv);
-	dev_put(dev);
- out_rtnl:
-	rtnl_unlock();
-
-	return err;
-}
-
 static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
 {
 	struct cfg80211_registered_device *drv;
@@ -3043,12 +3002,6 @@ static struct genl_ops nl80211_ops[] = {
 		.policy = nl80211_policy,
 		.flags = GENL_ADMIN_PERM,
 	},
-	{
-		.cmd = NL80211_CMD_SET_MGMT_EXTRA_IE,
-		.doit = nl80211_set_mgmt_extra_ie,
-		.policy = nl80211_policy,
-		.flags = GENL_ADMIN_PERM,
-	},
 	{
 		.cmd = NL80211_CMD_TRIGGER_SCAN,
 		.doit = nl80211_trigger_scan,
-- 
cgit v1.2.3


From af83debf5bb44257082d4489ac86123a0cadf6d3 Mon Sep 17 00:00:00 2001
From: Tulio Magno Quites Machado Filho <tuliom@gmail.com>
Date: Sun, 22 Mar 2009 01:41:13 +0100
Subject: ath5k: Support LED's on Acer Extensa 5620z

Add vendor ID for Quanta Microsystems and update the led table with the reported device.

Reported-by: Scott Barnes <nekoreeve@gmail.com>

Signed-off-by: Tulio Magno Quites Machado Filho <tuliom@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/ath5k/led.c | 2 ++
 include/linux/pci_ids.h          | 2 ++
 2 files changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/wireless/ath5k/led.c b/drivers/net/wireless/ath5k/led.c
index 0686e12738b3..19555fb79c9b 100644
--- a/drivers/net/wireless/ath5k/led.c
+++ b/drivers/net/wireless/ath5k/led.c
@@ -65,6 +65,8 @@ static const struct pci_device_id ath5k_led_devices[] = {
 	{ ATH_SDEVICE(PCI_VENDOR_ID_AMBIT, 0x0422), ATH_LED(1, 1) },
 	/* E-machines E510 (tuliom@gmail.com) */
 	{ ATH_SDEVICE(PCI_VENDOR_ID_AMBIT, 0x0428), ATH_LED(3, 0) },
+	/* Acer Extensa 5620z (nekoreeve@gmail.com) */
+	{ ATH_SDEVICE(PCI_VENDOR_ID_QMI, 0x0105), ATH_LED(3, 0) },
 	{ }
 };
 
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 097f410edefa..05dfa7c4fb64 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2271,6 +2271,8 @@
 #define PCI_DEVICE_ID_KORENIX_JETCARDF0	0x1600
 #define PCI_DEVICE_ID_KORENIX_JETCARDF1	0x16ff
 
+#define PCI_VENDOR_ID_QMI		0x1a32
+
 #define PCI_VENDOR_ID_TEKRAM		0x1de1
 #define PCI_DEVICE_ID_TEKRAM_DC290	0xdc29
 
-- 
cgit v1.2.3