From 2e5aa86609ec1cf37bcc204fd7ba6c24c2f49fec Mon Sep 17 00:00:00 2001
From: Paul Moore <pmoore@redhat.com>
Date: Tue, 23 Jul 2013 17:38:38 -0400
Subject: lsm: split the xfrm_state_alloc_security() hook implementation

The xfrm_state_alloc_security() LSM hook implementation is really a
multiplexed hook with two different behaviors depending on the
arguments passed to it by the caller.  This patch splits the LSM hook
implementation into two new hook implementations, which match the
LSM hooks in the rest of the kernel:

 * xfrm_state_alloc
 * xfrm_state_alloc_acquire

Also included in this patch are the necessary changes to the SELinux
code; no other LSMs are affected.

Signed-off-by: Paul Moore <pmoore@redhat.com>
Signed-off-by: Eric Paris <eparis@redhat.com>
---
 include/linux/security.h | 26 ++++++++++++++++++--------
 1 file changed, 18 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/security.h b/include/linux/security.h
index 4686491852a7..e5a5e8a41e55 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -1039,17 +1039,25 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  * @xfrm_policy_delete_security:
  *	@ctx contains the xfrm_sec_ctx.
  *	Authorize deletion of xp->security.
- * @xfrm_state_alloc_security:
+ * @xfrm_state_alloc:
  *	@x contains the xfrm_state being added to the Security Association
  *	Database by the XFRM system.
  *	@sec_ctx contains the security context information being provided by
  *	the user-level SA generation program (e.g., setkey or racoon).
- *	@secid contains the secid from which to take the mls portion of the context.
  *	Allocate a security structure to the x->security field; the security
  *	field is initialized to NULL when the xfrm_state is allocated. Set the
- *	context to correspond to either sec_ctx or polsec, with the mls portion
- *	taken from secid in the latter case.
- *	Return 0 if operation was successful (memory to allocate, legal context).
+ *	context to correspond to sec_ctx. Return 0 if operation was successful
+ *	(memory to allocate, legal context).
+ * @xfrm_state_alloc_acquire:
+ *	@x contains the xfrm_state being added to the Security Association
+ *	Database by the XFRM system.
+ *	@polsec contains the policy's security context.
+ *	@secid contains the secid from which to take the mls portion of the
+ *	context.
+ *	Allocate a security structure to the x->security field; the security
+ *	field is initialized to NULL when the xfrm_state is allocated. Set the
+ *	context to correspond to secid. Return 0 if operation was successful
+ *	(memory to allocate, legal context).
  * @xfrm_state_free_security:
  *	@x contains the xfrm_state.
  *	Deallocate x->security.
@@ -1651,9 +1659,11 @@ struct security_operations {
 	int (*xfrm_policy_clone_security) (struct xfrm_sec_ctx *old_ctx, struct xfrm_sec_ctx **new_ctx);
 	void (*xfrm_policy_free_security) (struct xfrm_sec_ctx *ctx);
 	int (*xfrm_policy_delete_security) (struct xfrm_sec_ctx *ctx);
-	int (*xfrm_state_alloc_security) (struct xfrm_state *x,
-		struct xfrm_user_sec_ctx *sec_ctx,
-		u32 secid);
+	int (*xfrm_state_alloc) (struct xfrm_state *x,
+				 struct xfrm_user_sec_ctx *sec_ctx);
+	int (*xfrm_state_alloc_acquire) (struct xfrm_state *x,
+					 struct xfrm_sec_ctx *polsec,
+					 u32 secid);
 	void (*xfrm_state_free_security) (struct xfrm_state *x);
 	int (*xfrm_state_delete_security) (struct xfrm_state *x);
 	int (*xfrm_policy_lookup) (struct xfrm_sec_ctx *ctx, u32 fl_secid, u8 dir);
-- 
cgit v1.2.3


From 1a6b69b6548cd0dd82549393f30dd982ceeb79d2 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nicolas.pitre@linaro.org>
Date: Thu, 12 Apr 2012 01:40:31 -0400
Subject: ARM: gic: add CPU migration support

This is required by the big.LITTLE switcher code.

The gic_migrate_target() changes the CPU interface mapping for the
current CPU to redirect SGIs to the specified interface, and it also
updates the target CPU for each interrupts to that CPU interface
if they were targeting the current interface.  Finally, pending
SGIs for the current CPU are forwarded to the new interface.

Because Linux does not use it, the SGI source information for the
forwarded SGIs is not preserved.  Neither is the source information
for the SGIs sent by the current CPU to other CPUs adjusted to match
the new CPU interface mapping.  The required registers are banked so
only the target CPU could do it.

Signed-off-by: Nicolas Pitre <nico@linaro.org>
---
 drivers/irqchip/irq-gic.c       | 87 +++++++++++++++++++++++++++++++++++++++--
 include/linux/irqchip/arm-gic.h |  4 ++
 2 files changed, 88 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index ee7c50312066..268874ac75e6 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -253,10 +253,9 @@ static int gic_set_affinity(struct irq_data *d, const struct cpumask *mask_val,
 	if (cpu >= NR_GIC_CPU_IF || cpu >= nr_cpu_ids)
 		return -EINVAL;
 
+	raw_spin_lock(&irq_controller_lock);
 	mask = 0xff << shift;
 	bit = gic_cpu_map[cpu] << shift;
-
-	raw_spin_lock(&irq_controller_lock);
 	val = readl_relaxed(reg) & ~mask;
 	writel_relaxed(val | bit, reg);
 	raw_spin_unlock(&irq_controller_lock);
@@ -646,7 +645,9 @@ static void __init gic_pm_init(struct gic_chip_data *gic)
 void gic_raise_softirq(const struct cpumask *mask, unsigned int irq)
 {
 	int cpu;
-	unsigned long map = 0;
+	unsigned long flags, map = 0;
+
+	raw_spin_lock_irqsave(&irq_controller_lock, flags);
 
 	/* Convert our logical CPU mask into a physical one. */
 	for_each_cpu(cpu, mask)
@@ -660,6 +661,86 @@ void gic_raise_softirq(const struct cpumask *mask, unsigned int irq)
 
 	/* this always happens on GIC0 */
 	writel_relaxed(map << 16 | irq, gic_data_dist_base(&gic_data[0]) + GIC_DIST_SOFTINT);
+
+	raw_spin_unlock_irqrestore(&irq_controller_lock, flags);
+}
+#endif
+
+#ifdef CONFIG_BL_SWITCHER
+/*
+ * gic_migrate_target - migrate IRQs to another CPU interface
+ *
+ * @new_cpu_id: the CPU target ID to migrate IRQs to
+ *
+ * Migrate all peripheral interrupts with a target matching the current CPU
+ * to the interface corresponding to @new_cpu_id.  The CPU interface mapping
+ * is also updated.  Targets to other CPU interfaces are unchanged.
+ * This must be called with IRQs locally disabled.
+ */
+void gic_migrate_target(unsigned int new_cpu_id)
+{
+	unsigned int cur_cpu_id, gic_irqs, gic_nr = 0;
+	void __iomem *dist_base;
+	int i, ror_val, cpu = smp_processor_id();
+	u32 val, cur_target_mask, active_mask;
+
+	if (gic_nr >= MAX_GIC_NR)
+		BUG();
+
+	dist_base = gic_data_dist_base(&gic_data[gic_nr]);
+	if (!dist_base)
+		return;
+	gic_irqs = gic_data[gic_nr].gic_irqs;
+
+	cur_cpu_id = __ffs(gic_cpu_map[cpu]);
+	cur_target_mask = 0x01010101 << cur_cpu_id;
+	ror_val = (cur_cpu_id - new_cpu_id) & 31;
+
+	raw_spin_lock(&irq_controller_lock);
+
+	/* Update the target interface for this logical CPU */
+	gic_cpu_map[cpu] = 1 << new_cpu_id;
+
+	/*
+	 * Find all the peripheral interrupts targetting the current
+	 * CPU interface and migrate them to the new CPU interface.
+	 * We skip DIST_TARGET 0 to 7 as they are read-only.
+	 */
+	for (i = 8; i < DIV_ROUND_UP(gic_irqs, 4); i++) {
+		val = readl_relaxed(dist_base + GIC_DIST_TARGET + i * 4);
+		active_mask = val & cur_target_mask;
+		if (active_mask) {
+			val &= ~active_mask;
+			val |= ror32(active_mask, ror_val);
+			writel_relaxed(val, dist_base + GIC_DIST_TARGET + i*4);
+		}
+	}
+
+	raw_spin_unlock(&irq_controller_lock);
+
+	/*
+	 * Now let's migrate and clear any potential SGIs that might be
+	 * pending for us (cur_cpu_id).  Since GIC_DIST_SGI_PENDING_SET
+	 * is a banked register, we can only forward the SGI using
+	 * GIC_DIST_SOFTINT.  The original SGI source is lost but Linux
+	 * doesn't use that information anyway.
+	 *
+	 * For the same reason we do not adjust SGI source information
+	 * for previously sent SGIs by us to other CPUs either.
+	 */
+	for (i = 0; i < 16; i += 4) {
+		int j;
+		val = readl_relaxed(dist_base + GIC_DIST_SGI_PENDING_SET + i);
+		if (!val)
+			continue;
+		writel_relaxed(val, dist_base + GIC_DIST_SGI_PENDING_CLEAR + i);
+		for (j = i; j < i + 4; j++) {
+			if (val & 0xff)
+				writel_relaxed((1 << (new_cpu_id + 16)) | j,
+						dist_base + GIC_DIST_SOFTINT);
+			val >>= 8;
+		}
+	}
 }
 #endif
 
diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h
index 3e203eb23cc7..40bfcac95940 100644
--- a/include/linux/irqchip/arm-gic.h
+++ b/include/linux/irqchip/arm-gic.h
@@ -31,6 +31,8 @@
 #define GIC_DIST_TARGET			0x800
 #define GIC_DIST_CONFIG			0xc00
 #define GIC_DIST_SOFTINT		0xf00
+#define GIC_DIST_SGI_PENDING_CLEAR	0xf10
+#define GIC_DIST_SGI_PENDING_SET	0xf20
 
 #define GICH_HCR			0x0
 #define GICH_VTR			0x4
@@ -73,6 +75,8 @@ static inline void gic_init(unsigned int nr, int start,
 	gic_init_bases(nr, start, dist, cpu, 0, NULL);
 }
 
+void gic_migrate_target(unsigned int new_cpu_id);
+
 #endif /* __ASSEMBLY */
 
 #endif
-- 
cgit v1.2.3


From ed96762e3241f57aa812977cf1920d3ee0363f4d Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nicolas.pitre@linaro.org>
Date: Thu, 5 Jul 2012 21:33:26 -0400
Subject: ARM: bL_switcher: do not hardcode GIC IDs in the code

Currently, GIC IDs are hardcoded making the code dependent on the 4+4 b.L
configuration.  Let's allow for GIC IDs to be discovered upon switcher
initialization to support other b.L configurations such as the 1+1 one,
or 2+3 as on the VExpress TC2.

Signed-off-by: Nicolas Pitre <nico@linaro.org>
---
 arch/arm/common/bL_switcher.c   | 14 +++++++++++++-
 drivers/irqchip/irq-gic.c       | 21 +++++++++++++++++++++
 include/linux/irqchip/arm-gic.h |  1 +
 3 files changed, 35 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/arch/arm/common/bL_switcher.c b/arch/arm/common/bL_switcher.c
index 50e95d894e35..1c2e5bcfb1f7 100644
--- a/arch/arm/common/bL_switcher.c
+++ b/arch/arm/common/bL_switcher.c
@@ -110,6 +110,8 @@ static int bL_switchpoint(unsigned long _arg)
  * Generic switcher interface
  */
 
+static unsigned int bL_gic_id[MAX_CPUS_PER_CLUSTER][MAX_NR_CLUSTERS];
+
 /*
  * bL_switch_to - Switch to a specific cluster for the current CPU
  * @new_cluster_id: the ID of the cluster to switch to.
@@ -159,7 +161,7 @@ static int bL_switch_to(unsigned int new_cluster_id)
 	this_cpu = smp_processor_id();
 
 	/* redirect GIC's SGIs to our counterpart */
-	gic_migrate_target(cpuid + ib_cluster*4);
+	gic_migrate_target(bL_gic_id[cpuid][ib_cluster]);
 
 	/*
 	 * Raise a SGI on the inbound CPU to make sure it doesn't stall
@@ -332,6 +334,16 @@ static int __init bL_switcher_halve_cpus(void)
 		cluster = (cpu_logical_map(i) >> 8) & 0xff;
 
 		if (cpumask_test_cpu(cpu, &common_mask)) {
+			/* Let's take note of the GIC ID for this CPU */
+			int gic_id = gic_get_cpu_id(i);
+			if (gic_id < 0) {
+				pr_err("%s: bad GIC ID for CPU %d\n", __func__, i);
+				return -EINVAL;
+			}
+			bL_gic_id[cpu][cluster] = gic_id;
+			pr_info("GIC ID for CPU %u cluster %u is %u\n",
+				cpu, cluster, gic_id);
+
 			/*
 			 * We keep only those logical CPUs which number
 			 * is equal to their physical CPU number. This is
diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index 268874ac75e6..3862cb54c714 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -667,6 +667,27 @@ void gic_raise_softirq(const struct cpumask *mask, unsigned int irq)
 #endif
 
 #ifdef CONFIG_BL_SWITCHER
+/*
+ * gic_get_cpu_id - get the CPU interface ID for the specified CPU
+ *
+ * @cpu: the logical CPU number to get the GIC ID for.
+ *
+ * Return the CPU interface ID for the given logical CPU number,
+ * or -1 if the CPU number is too large or the interface ID is
+ * unknown (more than one bit set).
+ */
+int gic_get_cpu_id(unsigned int cpu)
+{
+	unsigned int cpu_bit;
+
+	if (cpu >= NR_GIC_CPU_IF)
+		return -1;
+	cpu_bit = gic_cpu_map[cpu];
+	if (cpu_bit & (cpu_bit - 1))
+		return -1;
+	return __ffs(cpu_bit);
+}
+
 /*
  * gic_migrate_target - migrate IRQs to another CPU interface
  *
diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h
index 40bfcac95940..2d7d47e8dfaf 100644
--- a/include/linux/irqchip/arm-gic.h
+++ b/include/linux/irqchip/arm-gic.h
@@ -75,6 +75,7 @@ static inline void gic_init(unsigned int nr, int start,
 	gic_init_bases(nr, start, dist, cpu, 0, NULL);
 }
 
+int gic_get_cpu_id(unsigned int cpu);
 void gic_migrate_target(unsigned int new_cpu_id);
 
 #endif /* __ASSEMBLY */
-- 
cgit v1.2.3


From 87d8b9eb7eb6669aad6435a51e9862362141ba76 Mon Sep 17 00:00:00 2001
From: Stephen Boyd <sboyd@codeaurora.org>
Date: Thu, 18 Jul 2013 16:21:14 -0700
Subject: clocksource: Extract max nsec calculation into separate function

We need to calculate the same number in the clocksource code and
the sched_clock code, so extract this code into its own function.
We also drop the min_t and just use min() because the two types
are the same.

Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 include/linux/clocksource.h |  2 ++
 kernel/time/clocksource.c   | 45 ++++++++++++++++++++++++++++++---------------
 2 files changed, 32 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index dbbf8aa7731b..67301a405712 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -292,6 +292,8 @@ extern void clocksource_resume(void);
 extern struct clocksource * __init __weak clocksource_default_clock(void);
 extern void clocksource_mark_unstable(struct clocksource *cs);
 
+extern u64
+clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask);
 extern void
 clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 minsec);
 
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 50a8736757f3..637a14af6c21 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -537,40 +537,55 @@ static u32 clocksource_max_adjustment(struct clocksource *cs)
 }
 
 /**
- * clocksource_max_deferment - Returns max time the clocksource can be deferred
- * @cs:         Pointer to clocksource
- *
+ * clocks_calc_max_nsecs - Returns maximum nanoseconds that can be converted
+ * @mult:	cycle to nanosecond multiplier
+ * @shift:	cycle to nanosecond divisor (power of two)
+ * @maxadj:	maximum adjustment value to mult (~11%)
+ * @mask:	bitmask for two's complement subtraction of non 64 bit counters
  */
-static u64 clocksource_max_deferment(struct clocksource *cs)
+u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask)
 {
 	u64 max_nsecs, max_cycles;
 
 	/*
 	 * Calculate the maximum number of cycles that we can pass to the
 	 * cyc2ns function without overflowing a 64-bit signed result. The
-	 * maximum number of cycles is equal to ULLONG_MAX/(cs->mult+cs->maxadj)
+	 * maximum number of cycles is equal to ULLONG_MAX/(mult+maxadj)
 	 * which is equivalent to the below.
-	 * max_cycles < (2^63)/(cs->mult + cs->maxadj)
-	 * max_cycles < 2^(log2((2^63)/(cs->mult + cs->maxadj)))
-	 * max_cycles < 2^(log2(2^63) - log2(cs->mult + cs->maxadj))
-	 * max_cycles < 2^(63 - log2(cs->mult + cs->maxadj))
-	 * max_cycles < 1 << (63 - log2(cs->mult + cs->maxadj))
+	 * max_cycles < (2^63)/(mult + maxadj)
+	 * max_cycles < 2^(log2((2^63)/(mult + maxadj)))
+	 * max_cycles < 2^(log2(2^63) - log2(mult + maxadj))
+	 * max_cycles < 2^(63 - log2(mult + maxadj))
+	 * max_cycles < 1 << (63 - log2(mult + maxadj))
 	 * Please note that we add 1 to the result of the log2 to account for
 	 * any rounding errors, ensure the above inequality is satisfied and
 	 * no overflow will occur.
 	 */
-	max_cycles = 1ULL << (63 - (ilog2(cs->mult + cs->maxadj) + 1));
+	max_cycles = 1ULL << (63 - (ilog2(mult + maxadj) + 1));
 
 	/*
 	 * The actual maximum number of cycles we can defer the clocksource is
-	 * determined by the minimum of max_cycles and cs->mask.
+	 * determined by the minimum of max_cycles and mask.
 	 * Note: Here we subtract the maxadj to make sure we don't sleep for
 	 * too long if there's a large negative adjustment.
 	 */
-	max_cycles = min_t(u64, max_cycles, (u64) cs->mask);
-	max_nsecs = clocksource_cyc2ns(max_cycles, cs->mult - cs->maxadj,
-					cs->shift);
+	max_cycles = min(max_cycles, mask);
+	max_nsecs = clocksource_cyc2ns(max_cycles, mult - maxadj, shift);
+
+	return max_nsecs;
+}
+
+/**
+ * clocksource_max_deferment - Returns max time the clocksource can be deferred
+ * @cs:         Pointer to clocksource
+ *
+ */
+static u64 clocksource_max_deferment(struct clocksource *cs)
+{
+	u64 max_nsecs;
 
+	max_nsecs = clocks_calc_max_nsecs(cs->mult, cs->shift, cs->maxadj,
+					  cs->mask);
 	/*
 	 * To ensure that the clocksource does not wrap whilst we are idle,
 	 * limit the time the clocksource can be deferred by 12.5%. Please
-- 
cgit v1.2.3


From e7e3ff1bfe9c42ee31172e9afdc0383a9e595e29 Mon Sep 17 00:00:00 2001
From: Stephen Boyd <sboyd@codeaurora.org>
Date: Thu, 18 Jul 2013 16:21:17 -0700
Subject: sched_clock: Add support for >32 bit sched_clock

The ARM architected system counter has at least 56 usable bits.
Add support for counters with more than 32 bits to the generic
sched_clock implementation so we can increase the time between
wakeups due to dealing with wrap-around on these devices while
benefiting from the irqtime accounting and suspend/resume
handling that the generic sched_clock code already has. On my
system using 56 bits over 32 bits changes the wraparound time
from a few minutes to an hour. For faster running counters (GHz
range) this is even more important because we may not be able to
execute the timer in time to deal with the wraparound if only 32
bits are used.

We choose a maxsec value of 3600 seconds because we assume no
system will go idle for more than an hour. In the future we may
need to increase this value.

Note: All users should switch over to the 64-bit read function so
we can remove setup_sched_clock() in favor of sched_clock_register().

Cc: Russell King <linux@arm.linux.org.uk>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 include/linux/sched_clock.h |  2 ++
 kernel/time/sched_clock.c   | 46 +++++++++++++++++++++++++++++++--------------
 2 files changed, 34 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched_clock.h b/include/linux/sched_clock.h
index fa7922c80a41..eca7abeb86fc 100644
--- a/include/linux/sched_clock.h
+++ b/include/linux/sched_clock.h
@@ -15,6 +15,8 @@ static inline void sched_clock_postinit(void) { }
 #endif
 
 extern void setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate);
+extern void sched_clock_register(u64 (*read)(void), int bits,
+				 unsigned long rate);
 
 extern unsigned long long (*sched_clock_func)(void);
 
diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c
index c018ffc59937..f388baeaf2b6 100644
--- a/kernel/time/sched_clock.c
+++ b/kernel/time/sched_clock.c
@@ -16,11 +16,12 @@
 #include <linux/hrtimer.h>
 #include <linux/sched_clock.h>
 #include <linux/seqlock.h>
+#include <linux/bitops.h>
 
 struct clock_data {
 	ktime_t wrap_kt;
 	u64 epoch_ns;
-	u32 epoch_cyc;
+	u64 epoch_cyc;
 	seqcount_t seq;
 	unsigned long rate;
 	u32 mult;
@@ -37,14 +38,25 @@ static struct clock_data cd = {
 	.mult	= NSEC_PER_SEC / HZ,
 };
 
-static u32 __read_mostly sched_clock_mask = 0xffffffff;
+static u64 __read_mostly sched_clock_mask;
 
-static u32 notrace jiffy_sched_clock_read(void)
+static u64 notrace jiffy_sched_clock_read(void)
 {
-	return (u32)(jiffies - INITIAL_JIFFIES);
+	/*
+	 * We don't need to use get_jiffies_64 on 32-bit arches here
+	 * because we register with BITS_PER_LONG
+	 */
+	return (u64)(jiffies - INITIAL_JIFFIES);
+}
+
+static u32 __read_mostly (*read_sched_clock_32)(void);
+
+static u64 notrace read_sched_clock_32_wrapper(void)
+{
+	return read_sched_clock_32();
 }
 
-static u32 __read_mostly (*read_sched_clock)(void) = jiffy_sched_clock_read;
+static u64 __read_mostly (*read_sched_clock)(void) = jiffy_sched_clock_read;
 
 static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift)
 {
@@ -54,8 +66,8 @@ static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift)
 static unsigned long long notrace sched_clock_32(void)
 {
 	u64 epoch_ns;
-	u32 epoch_cyc;
-	u32 cyc;
+	u64 epoch_cyc;
+	u64 cyc;
 	unsigned long seq;
 
 	if (cd.suspended)
@@ -78,7 +90,7 @@ static unsigned long long notrace sched_clock_32(void)
 static void notrace update_sched_clock(void)
 {
 	unsigned long flags;
-	u32 cyc;
+	u64 cyc;
 	u64 ns;
 
 	cyc = read_sched_clock();
@@ -101,7 +113,8 @@ static enum hrtimer_restart sched_clock_poll(struct hrtimer *hrt)
 	return HRTIMER_RESTART;
 }
 
-void __init setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate)
+void __init sched_clock_register(u64 (*read)(void), int bits,
+				 unsigned long rate)
 {
 	unsigned long r;
 	u64 res, wrap;
@@ -110,14 +123,13 @@ void __init setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate)
 	if (cd.rate > rate)
 		return;
 
-	BUG_ON(bits > 32);
 	WARN_ON(!irqs_disabled());
 	read_sched_clock = read;
-	sched_clock_mask = (1 << bits) - 1;
+	sched_clock_mask = CLOCKSOURCE_MASK(bits);
 	cd.rate = rate;
 
 	/* calculate the mult/shift to convert counter ticks to ns. */
-	clocks_calc_mult_shift(&cd.mult, &cd.shift, rate, NSEC_PER_SEC, 0);
+	clocks_calc_mult_shift(&cd.mult, &cd.shift, rate, NSEC_PER_SEC, 3600);
 
 	r = rate;
 	if (r >= 4000000) {
@@ -130,7 +142,7 @@ void __init setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate)
 		r_unit = ' ';
 
 	/* calculate how many ns until we wrap */
-	wrap = cyc_to_ns((1ULL << bits) - 1, cd.mult, cd.shift);
+	wrap = clocks_calc_max_nsecs(cd.mult, cd.shift, 0, sched_clock_mask);
 	cd.wrap_kt = ns_to_ktime(wrap - (wrap >> 3));
 
 	/* calculate the ns resolution of this counter */
@@ -152,6 +164,12 @@ void __init setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate)
 	pr_debug("Registered %pF as sched_clock source\n", read);
 }
 
+void __init setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate)
+{
+	read_sched_clock_32 = read;
+	sched_clock_register(read_sched_clock_32_wrapper, bits, rate);
+}
+
 unsigned long long __read_mostly (*sched_clock_func)(void) = sched_clock_32;
 
 unsigned long long notrace sched_clock(void)
@@ -166,7 +184,7 @@ void __init sched_clock_postinit(void)
 	 * make it the final one one.
 	 */
 	if (read_sched_clock == jiffy_sched_clock_read)
-		setup_sched_clock(jiffy_sched_clock_read, 32, HZ);
+		sched_clock_register(jiffy_sched_clock_read, BITS_PER_LONG, HZ);
 
 	update_sched_clock();
 
-- 
cgit v1.2.3


From 6898eb89655a8ac7d098b2fada95c1c91870365c Mon Sep 17 00:00:00 2001
From: Peter Meerwald <pmeerw@pmeerw.net>
Date: Wed, 21 Aug 2013 00:15:00 +0100
Subject: iio: Remove trailing ; from function definitions

Signed-off-by: Peter Meerwald <pmeerw@pmeerw.net>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 include/linux/iio/iio.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h
index 2103cc32a5fb..8e7a8132b109 100644
--- a/include/linux/iio/iio.h
+++ b/include/linux/iio/iio.h
@@ -457,7 +457,7 @@ static inline void iio_device_put(struct iio_dev *indio_dev)
 {
 	if (indio_dev)
 		put_device(&indio_dev->dev);
-};
+}
 
 /**
  * dev_to_iio_dev() - Get IIO device struct from a device struct
@@ -593,7 +593,7 @@ static inline bool iio_buffer_enabled(struct iio_dev *indio_dev)
 {
 	return indio_dev->currentmode
 		& (INDIO_BUFFER_TRIGGERED | INDIO_BUFFER_HARDWARE);
-};
+}
 
 /**
  * iio_get_debugfs_dentry() - helper function to get the debugfs_dentry
@@ -603,12 +603,12 @@ static inline bool iio_buffer_enabled(struct iio_dev *indio_dev)
 static inline struct dentry *iio_get_debugfs_dentry(struct iio_dev *indio_dev)
 {
 	return indio_dev->debugfs_dentry;
-};
+}
 #else
 static inline struct dentry *iio_get_debugfs_dentry(struct iio_dev *indio_dev)
 {
 	return NULL;
-};
+}
 #endif
 
 int iio_str_to_fixpoint(const char *str, int fract_mult, int *integer,
-- 
cgit v1.2.3


From 272686bf46a34f86d270cf192f68769667792026 Mon Sep 17 00:00:00 2001
From: Leif Lindholm <leif.lindholm@linaro.org>
Date: Thu, 5 Sep 2013 11:34:54 +0100
Subject: efi: x86: ia64: provide a generic efi_config_init()

Common to (U)EFI support on all platforms is the global "efi" data
structure, and the code that parses the System Table to locate
addresses to populate that structure with.

This patch adds both of these to the global EFI driver code and
removes the local definition of the global "efi" data structure from
the x86 and ia64 code.

Squashed into one big patch to avoid breaking bisection.

Signed-off-by: Leif Lindholm <leif.lindholm@linaro.org>
Acked-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 arch/ia64/kernel/efi.c      |  54 +++++-----------------
 arch/x86/platform/efi/efi.c |  96 ++++-----------------------------------
 drivers/firmware/efi/efi.c  | 108 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/efi.h         |   7 +++
 4 files changed, 134 insertions(+), 131 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c
index 51bce594eb83..da5b462e6de6 100644
--- a/arch/ia64/kernel/efi.c
+++ b/arch/ia64/kernel/efi.c
@@ -44,10 +44,15 @@
 
 #define EFI_DEBUG	0
 
+static __initdata unsigned long palo_phys;
+
+static __initdata efi_config_table_type_t arch_tables[] = {
+	{PROCESSOR_ABSTRACTION_LAYER_OVERWRITE_GUID, "PALO", &palo_phys},
+	{NULL_GUID, NULL, 0},
+};
+
 extern efi_status_t efi_call_phys (void *, ...);
 
-struct efi efi;
-EXPORT_SYMBOL(efi);
 static efi_runtime_services_t *runtime;
 static u64 mem_limit = ~0UL, max_addr = ~0UL, min_addr = 0UL;
 
@@ -423,9 +428,9 @@ static u8 __init palo_checksum(u8 *buffer, u32 length)
  * Parse and handle PALO table which is published at:
  * http://www.dig64.org/home/DIG64_PALO_R1_0.pdf
  */
-static void __init handle_palo(unsigned long palo_phys)
+static void __init handle_palo(unsigned long phys_addr)
 {
-	struct palo_table *palo = __va(palo_phys);
+	struct palo_table *palo = __va(phys_addr);
 	u8  checksum;
 
 	if (strncmp(palo->signature, PALO_SIG, sizeof(PALO_SIG) - 1)) {
@@ -467,12 +472,10 @@ void __init
 efi_init (void)
 {
 	void *efi_map_start, *efi_map_end;
-	efi_config_table_t *config_tables;
 	efi_char16_t *c16;
 	u64 efi_desc_size;
 	char *cp, vendor[100] = "unknown";
 	int i;
-	unsigned long palo_phys;
 
 	/*
 	 * It's too early to be able to use the standard kernel command line
@@ -514,8 +517,6 @@ efi_init (void)
 		       efi.systab->hdr.revision >> 16,
 		       efi.systab->hdr.revision & 0xffff);
 
-	config_tables = __va(efi.systab->tables);
-
 	/* Show what we know for posterity */
 	c16 = __va(efi.systab->fw_vendor);
 	if (c16) {
@@ -528,43 +529,10 @@ efi_init (void)
 	       efi.systab->hdr.revision >> 16,
 	       efi.systab->hdr.revision & 0xffff, vendor);
 
-	efi.mps        = EFI_INVALID_TABLE_ADDR;
-	efi.acpi       = EFI_INVALID_TABLE_ADDR;
-	efi.acpi20     = EFI_INVALID_TABLE_ADDR;
-	efi.smbios     = EFI_INVALID_TABLE_ADDR;
-	efi.sal_systab = EFI_INVALID_TABLE_ADDR;
-	efi.boot_info  = EFI_INVALID_TABLE_ADDR;
-	efi.hcdp       = EFI_INVALID_TABLE_ADDR;
-	efi.uga        = EFI_INVALID_TABLE_ADDR;
-
 	palo_phys      = EFI_INVALID_TABLE_ADDR;
 
-	for (i = 0; i < (int) efi.systab->nr_tables; i++) {
-		if (efi_guidcmp(config_tables[i].guid, MPS_TABLE_GUID) == 0) {
-			efi.mps = config_tables[i].table;
-			printk(" MPS=0x%lx", config_tables[i].table);
-		} else if (efi_guidcmp(config_tables[i].guid, ACPI_20_TABLE_GUID) == 0) {
-			efi.acpi20 = config_tables[i].table;
-			printk(" ACPI 2.0=0x%lx", config_tables[i].table);
-		} else if (efi_guidcmp(config_tables[i].guid, ACPI_TABLE_GUID) == 0) {
-			efi.acpi = config_tables[i].table;
-			printk(" ACPI=0x%lx", config_tables[i].table);
-		} else if (efi_guidcmp(config_tables[i].guid, SMBIOS_TABLE_GUID) == 0) {
-			efi.smbios = config_tables[i].table;
-			printk(" SMBIOS=0x%lx", config_tables[i].table);
-		} else if (efi_guidcmp(config_tables[i].guid, SAL_SYSTEM_TABLE_GUID) == 0) {
-			efi.sal_systab = config_tables[i].table;
-			printk(" SALsystab=0x%lx", config_tables[i].table);
-		} else if (efi_guidcmp(config_tables[i].guid, HCDP_TABLE_GUID) == 0) {
-			efi.hcdp = config_tables[i].table;
-			printk(" HCDP=0x%lx", config_tables[i].table);
-		} else if (efi_guidcmp(config_tables[i].guid,
-			 PROCESSOR_ABSTRACTION_LAYER_OVERWRITE_GUID) == 0) {
-			palo_phys = config_tables[i].table;
-			printk(" PALO=0x%lx", config_tables[i].table);
-		}
-	}
-	printk("\n");
+	if (efi_config_init(arch_tables) != 0)
+		return;
 
 	if (palo_phys != EFI_INVALID_TABLE_ADDR)
 		handle_palo(palo_phys);
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 90f6ed127096..ed2be58ea3f1 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -60,19 +60,6 @@
 
 static efi_char16_t efi_dummy_name[6] = { 'D', 'U', 'M', 'M', 'Y', 0 };
 
-struct efi __read_mostly efi = {
-	.mps        = EFI_INVALID_TABLE_ADDR,
-	.acpi       = EFI_INVALID_TABLE_ADDR,
-	.acpi20     = EFI_INVALID_TABLE_ADDR,
-	.smbios     = EFI_INVALID_TABLE_ADDR,
-	.sal_systab = EFI_INVALID_TABLE_ADDR,
-	.boot_info  = EFI_INVALID_TABLE_ADDR,
-	.hcdp       = EFI_INVALID_TABLE_ADDR,
-	.uga        = EFI_INVALID_TABLE_ADDR,
-	.uv_systab  = EFI_INVALID_TABLE_ADDR,
-};
-EXPORT_SYMBOL(efi);
-
 struct efi_memory_map memmap;
 
 static struct efi efi_phys __initdata;
@@ -80,6 +67,13 @@ static efi_system_table_t efi_systab __initdata;
 
 unsigned long x86_efi_facility;
 
+static __initdata efi_config_table_type_t arch_tables[] = {
+#ifdef CONFIG_X86_UV
+	{UV_SYSTEM_TABLE_GUID, "UVsystab", &efi.uv_systab},
+#endif
+	{NULL_GUID, NULL, 0},
+};
+
 /*
  * Returns 1 if 'facility' is enabled, 0 otherwise.
  */
@@ -578,80 +572,6 @@ static int __init efi_systab_init(void *phys)
 	return 0;
 }
 
-static int __init efi_config_init(u64 tables, int nr_tables)
-{
-	void *config_tables, *tablep;
-	int i, sz;
-
-	if (efi_enabled(EFI_64BIT))
-		sz = sizeof(efi_config_table_64_t);
-	else
-		sz = sizeof(efi_config_table_32_t);
-
-	/*
-	 * Let's see what config tables the firmware passed to us.
-	 */
-	config_tables = early_ioremap(tables, nr_tables * sz);
-	if (config_tables == NULL) {
-		pr_err("Could not map Configuration table!\n");
-		return -ENOMEM;
-	}
-
-	tablep = config_tables;
-	pr_info("");
-	for (i = 0; i < efi.systab->nr_tables; i++) {
-		efi_guid_t guid;
-		unsigned long table;
-
-		if (efi_enabled(EFI_64BIT)) {
-			u64 table64;
-			guid = ((efi_config_table_64_t *)tablep)->guid;
-			table64 = ((efi_config_table_64_t *)tablep)->table;
-			table = table64;
-#ifdef CONFIG_X86_32
-			if (table64 >> 32) {
-				pr_cont("\n");
-				pr_err("Table located above 4GB, disabling EFI.\n");
-				early_iounmap(config_tables,
-					      efi.systab->nr_tables * sz);
-				return -EINVAL;
-			}
-#endif
-		} else {
-			guid = ((efi_config_table_32_t *)tablep)->guid;
-			table = ((efi_config_table_32_t *)tablep)->table;
-		}
-		if (!efi_guidcmp(guid, MPS_TABLE_GUID)) {
-			efi.mps = table;
-			pr_cont(" MPS=0x%lx ", table);
-		} else if (!efi_guidcmp(guid, ACPI_20_TABLE_GUID)) {
-			efi.acpi20 = table;
-			pr_cont(" ACPI 2.0=0x%lx ", table);
-		} else if (!efi_guidcmp(guid, ACPI_TABLE_GUID)) {
-			efi.acpi = table;
-			pr_cont(" ACPI=0x%lx ", table);
-		} else if (!efi_guidcmp(guid, SMBIOS_TABLE_GUID)) {
-			efi.smbios = table;
-			pr_cont(" SMBIOS=0x%lx ", table);
-#ifdef CONFIG_X86_UV
-		} else if (!efi_guidcmp(guid, UV_SYSTEM_TABLE_GUID)) {
-			efi.uv_systab = table;
-			pr_cont(" UVsystab=0x%lx ", table);
-#endif
-		} else if (!efi_guidcmp(guid, HCDP_TABLE_GUID)) {
-			efi.hcdp = table;
-			pr_cont(" HCDP=0x%lx ", table);
-		} else if (!efi_guidcmp(guid, UGA_IO_PROTOCOL_GUID)) {
-			efi.uga = table;
-			pr_cont(" UGA=0x%lx ", table);
-		}
-		tablep += sz;
-	}
-	pr_cont("\n");
-	early_iounmap(config_tables, efi.systab->nr_tables * sz);
-	return 0;
-}
-
 static int __init efi_runtime_init(void)
 {
 	efi_runtime_services_t *runtime;
@@ -745,7 +665,7 @@ void __init efi_init(void)
 		efi.systab->hdr.revision >> 16,
 		efi.systab->hdr.revision & 0xffff, vendor);
 
-	if (efi_config_init(efi.systab->tables, efi.systab->nr_tables))
+	if (efi_config_init(arch_tables))
 		return;
 
 	set_bit(EFI_CONFIG_TABLES, &x86_efi_facility);
diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index 5145fa344ad5..e1010d450b65 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -13,11 +13,27 @@
  * This file is released under the GPLv2.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/kobject.h>
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/device.h>
 #include <linux/efi.h>
+#include <linux/io.h>
+
+struct efi __read_mostly efi = {
+	.mps        = EFI_INVALID_TABLE_ADDR,
+	.acpi       = EFI_INVALID_TABLE_ADDR,
+	.acpi20     = EFI_INVALID_TABLE_ADDR,
+	.smbios     = EFI_INVALID_TABLE_ADDR,
+	.sal_systab = EFI_INVALID_TABLE_ADDR,
+	.boot_info  = EFI_INVALID_TABLE_ADDR,
+	.hcdp       = EFI_INVALID_TABLE_ADDR,
+	.uga        = EFI_INVALID_TABLE_ADDR,
+	.uv_systab  = EFI_INVALID_TABLE_ADDR,
+};
+EXPORT_SYMBOL(efi);
 
 static struct kobject *efi_kobj;
 static struct kobject *efivars_kobj;
@@ -132,3 +148,95 @@ err_put:
 }
 
 subsys_initcall(efisubsys_init);
+
+
+static __initdata efi_config_table_type_t common_tables[] = {
+	{ACPI_20_TABLE_GUID, "ACPI 2.0", &efi.acpi20},
+	{ACPI_TABLE_GUID, "ACPI", &efi.acpi},
+	{HCDP_TABLE_GUID, "HCDP", &efi.hcdp},
+	{MPS_TABLE_GUID, "MPS", &efi.mps},
+	{SAL_SYSTEM_TABLE_GUID, "SALsystab", &efi.sal_systab},
+	{SMBIOS_TABLE_GUID, "SMBIOS", &efi.smbios},
+	{UGA_IO_PROTOCOL_GUID, "UGA", &efi.uga},
+	{NULL_GUID, NULL, 0},
+};
+
+static __init int match_config_table(efi_guid_t *guid,
+				     unsigned long table,
+				     efi_config_table_type_t *table_types)
+{
+	u8 str[EFI_VARIABLE_GUID_LEN + 1];
+	int i;
+
+	if (table_types) {
+		efi_guid_unparse(guid, str);
+
+		for (i = 0; efi_guidcmp(table_types[i].guid, NULL_GUID); i++) {
+			efi_guid_unparse(&table_types[i].guid, str);
+
+			if (!efi_guidcmp(*guid, table_types[i].guid)) {
+				*(table_types[i].ptr) = table;
+				pr_cont(" %s=0x%lx ",
+					table_types[i].name, table);
+				return 1;
+			}
+		}
+	}
+
+	return 0;
+}
+
+int __init efi_config_init(efi_config_table_type_t *arch_tables)
+{
+	void *config_tables, *tablep;
+	int i, sz;
+
+	if (efi_enabled(EFI_64BIT))
+		sz = sizeof(efi_config_table_64_t);
+	else
+		sz = sizeof(efi_config_table_32_t);
+
+	/*
+	 * Let's see what config tables the firmware passed to us.
+	 */
+	config_tables = early_memremap(efi.systab->tables,
+				       efi.systab->nr_tables * sz);
+	if (config_tables == NULL) {
+		pr_err("Could not map Configuration table!\n");
+		return -ENOMEM;
+	}
+
+	tablep = config_tables;
+	pr_info("");
+	for (i = 0; i < efi.systab->nr_tables; i++) {
+		efi_guid_t guid;
+		unsigned long table;
+
+		if (efi_enabled(EFI_64BIT)) {
+			u64 table64;
+			guid = ((efi_config_table_64_t *)tablep)->guid;
+			table64 = ((efi_config_table_64_t *)tablep)->table;
+			table = table64;
+#ifndef CONFIG_64BIT
+			if (table64 >> 32) {
+				pr_cont("\n");
+				pr_err("Table located above 4GB, disabling EFI.\n");
+				early_iounmap(config_tables,
+					       efi.systab->nr_tables * sz);
+				return -EINVAL;
+			}
+#endif
+		} else {
+			guid = ((efi_config_table_32_t *)tablep)->guid;
+			table = ((efi_config_table_32_t *)tablep)->table;
+		}
+
+		if (!match_config_table(&guid, table, common_tables))
+			match_config_table(&guid, table, arch_tables);
+
+		tablep += sz;
+	}
+	pr_cont("\n");
+	early_iounmap(config_tables, efi.systab->nr_tables * sz);
+	return 0;
+}
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 5f8f176154f7..09d9e4212799 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -404,6 +404,12 @@ typedef struct {
 	unsigned long table;
 } efi_config_table_t;
 
+typedef struct {
+	efi_guid_t guid;
+	const char *name;
+	unsigned long *ptr;
+} efi_config_table_type_t;
+
 #define EFI_SYSTEM_TABLE_SIGNATURE ((u64)0x5453595320494249ULL)
 
 #define EFI_2_30_SYSTEM_TABLE_REVISION  ((2 << 16) | (30))
@@ -587,6 +593,7 @@ static inline efi_status_t efi_query_variable_store(u32 attributes, unsigned lon
 }
 #endif
 extern void __iomem *efi_lookup_mapped_addr(u64 phys_addr);
+extern int efi_config_init(efi_config_table_type_t *arch_tables);
 extern u64 efi_get_iobase (void);
 extern u32 efi_mem_type (unsigned long phys_addr);
 extern u64 efi_mem_attributes (unsigned long phys_addr);
-- 
cgit v1.2.3


From 258f6fd738221766b512cd8c7120563b78d62829 Mon Sep 17 00:00:00 2001
From: Leif Lindholm <leif.lindholm@linaro.org>
Date: Thu, 5 Sep 2013 11:34:55 +0100
Subject: efi: x86: make efi_lookup_mapped_addr() a common function

efi_lookup_mapped_addr() is a handy utility for other platforms than
x86. Move it from arch/x86 to drivers/firmware. Add memmap pointer
to global efi structure, and initialise it on x86.

Signed-off-by: Leif Lindholm <leif.lindholm@linaro.org>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 arch/x86/platform/efi/efi.c | 30 ++----------------------------
 drivers/firmware/efi/efi.c  | 32 ++++++++++++++++++++++++++++++++
 include/linux/efi.h         |  1 +
 3 files changed, 35 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index ed2be58ea3f1..fbc1d70188f8 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -393,6 +393,8 @@ int __init efi_memblock_x86_reserve_range(void)
 
 	memblock_reserve(pmap, memmap.nr_map * memmap.desc_size);
 
+	efi.memmap = &memmap;
+
 	return 0;
 }
 
@@ -736,34 +738,6 @@ static void __init runtime_code_page_mkexec(void)
 	}
 }
 
-/*
- * We can't ioremap data in EFI boot services RAM, because we've already mapped
- * it as RAM.  So, look it up in the existing EFI memory map instead.  Only
- * callable after efi_enter_virtual_mode and before efi_free_boot_services.
- */
-void __iomem *efi_lookup_mapped_addr(u64 phys_addr)
-{
-	void *p;
-	if (WARN_ON(!memmap.map))
-		return NULL;
-	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
-		efi_memory_desc_t *md = p;
-		u64 size = md->num_pages << EFI_PAGE_SHIFT;
-		u64 end = md->phys_addr + size;
-		if (!(md->attribute & EFI_MEMORY_RUNTIME) &&
-		    md->type != EFI_BOOT_SERVICES_CODE &&
-		    md->type != EFI_BOOT_SERVICES_DATA)
-			continue;
-		if (!md->virt_addr)
-			continue;
-		if (phys_addr >= md->phys_addr && phys_addr < end) {
-			phys_addr += md->virt_addr - md->phys_addr;
-			return (__force void __iomem *)(unsigned long)phys_addr;
-		}
-	}
-	return NULL;
-}
-
 void efi_memory_uc(u64 addr, unsigned long size)
 {
 	unsigned long page_shift = 1UL << EFI_PAGE_SHIFT;
diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index e1010d450b65..2e2fbdec0845 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -150,6 +150,38 @@ err_put:
 subsys_initcall(efisubsys_init);
 
 
+/*
+ * We can't ioremap data in EFI boot services RAM, because we've already mapped
+ * it as RAM.  So, look it up in the existing EFI memory map instead.  Only
+ * callable after efi_enter_virtual_mode and before efi_free_boot_services.
+ */
+void __iomem *efi_lookup_mapped_addr(u64 phys_addr)
+{
+	struct efi_memory_map *map;
+	void *p;
+	map = efi.memmap;
+	if (!map)
+		return NULL;
+	if (WARN_ON(!map->map))
+		return NULL;
+	for (p = map->map; p < map->map_end; p += map->desc_size) {
+		efi_memory_desc_t *md = p;
+		u64 size = md->num_pages << EFI_PAGE_SHIFT;
+		u64 end = md->phys_addr + size;
+		if (!(md->attribute & EFI_MEMORY_RUNTIME) &&
+		    md->type != EFI_BOOT_SERVICES_CODE &&
+		    md->type != EFI_BOOT_SERVICES_DATA)
+			continue;
+		if (!md->virt_addr)
+			continue;
+		if (phys_addr >= md->phys_addr && phys_addr < end) {
+			phys_addr += md->virt_addr - md->phys_addr;
+			return (__force void __iomem *)(unsigned long)phys_addr;
+		}
+	}
+	return NULL;
+}
+
 static __initdata efi_config_table_type_t common_tables[] = {
 	{ACPI_20_TABLE_GUID, "ACPI 2.0", &efi.acpi20},
 	{ACPI_TABLE_GUID, "ACPI", &efi.acpi},
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 09d9e4212799..c084b6d942c3 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -558,6 +558,7 @@ extern struct efi {
 	efi_get_next_high_mono_count_t *get_next_high_mono_count;
 	efi_reset_system_t *reset_system;
 	efi_set_virtual_address_map_t *set_virtual_address_map;
+	struct efi_memory_map *memmap;
 } efi;
 
 static inline int
-- 
cgit v1.2.3


From 5a523605afa7d3b54b2e7041f8c9e6bc39872a7e Mon Sep 17 00:00:00 2001
From: Laxman Dewangan <ldewangan@nvidia.com>
Date: Tue, 10 Sep 2013 15:45:05 +0530
Subject: regulator: core: provide fixed voltage in desc for single voltage
 rail

If given rail has the single voltage (n_voltages = 1) then provide the
rail voltage through regulator descriptor so that core can use this
value for finding voltage.

This will avoid the implementation of the callback for get_voltage() or
list_voltage() callback on regulator driver.

Signed-off-by: Laxman Dewangan <ldewangan@nvidia.com>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 drivers/regulator/core.c         | 5 ++++-
 include/linux/regulator/driver.h | 2 ++
 2 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index a01b8b3b70ca..abc899ed1ebf 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -2489,6 +2489,8 @@ static int _regulator_get_voltage(struct regulator_dev *rdev)
 		ret = rdev->desc->ops->get_voltage(rdev);
 	} else if (rdev->desc->ops->list_voltage) {
 		ret = rdev->desc->ops->list_voltage(rdev, 0);
+	} else if (rdev->desc->fixed_uV && (rdev->desc->n_voltages == 1)) {
+		ret = rdev->desc->fixed_uV;
 	} else {
 		return -EINVAL;
 	}
@@ -3170,7 +3172,8 @@ static int add_regulator_attributes(struct regulator_dev *rdev)
 	/* some attributes need specific methods to be displayed */
 	if ((ops->get_voltage && ops->get_voltage(rdev) >= 0) ||
 	    (ops->get_voltage_sel && ops->get_voltage_sel(rdev) >= 0) ||
-	    (ops->list_voltage && ops->list_voltage(rdev, 0) >= 0)) {
+	    (ops->list_voltage && ops->list_voltage(rdev, 0) >= 0) ||
+		(rdev->desc->fixed_uV && (rdev->desc->n_voltages == 1))) {
 		status = device_create_file(dev, &dev_attr_microvolts);
 		if (status < 0)
 			return status;
diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index 67e13aa5a478..9e8241a9f28f 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h
@@ -207,6 +207,7 @@ enum regulator_type {
  * @min_uV: Voltage given by the lowest selector (if linear mapping)
  * @uV_step: Voltage increase with each selector (if linear mapping)
  * @linear_min_sel: Minimal selector for starting linear mapping
+ * @fixed_uV: Fixed voltage of rails.
  * @ramp_delay: Time to settle down after voltage change (unit: uV/us)
  * @volt_table: Voltage mapping table (if table based mapping)
  *
@@ -239,6 +240,7 @@ struct regulator_desc {
 	unsigned int min_uV;
 	unsigned int uV_step;
 	unsigned int linear_min_sel;
+	int fixed_uV;
 	unsigned int ramp_delay;
 
 	const struct regulator_linear_range *linear_ranges;
-- 
cgit v1.2.3


From 899d90bdf4d4ef4c3ac0b33cd337c9b3e999ec2d Mon Sep 17 00:00:00 2001
From: Peter Meerwald <pmeerw@pmeerw.net>
Date: Sun, 8 Sep 2013 16:20:00 +0100
Subject: iio: Add INT_TIME (integration time) channel info attribute

Integration time is in seconds; it controls the measurement
time and influences the gain of a sensor.

There are two typical ways that scaling is implemented in a device:
1) input amplifier,
2) reference to the ADC is changed.
These both result in the accuracy of the ADC varying (by applying its
sampling over a more relevant range).

Integration time is a way of dealing with noise inherent in the analog
sensor itself.  In the case of a light sensor, a mixture of photon noise
and device specific noise.  Photon noise is dealt with by either improving
the efficiency of the sensor, (more photons actually captured) which is not
easily varied dynamically, or by integrating the measurement over a longer
time period.  Note that this can also be thought of as an averaging of a
number of individual samples and is infact sometimes implemented this way.
Altering integration time implies that the duration of a measurement changes,
a fact the device's user may be interested in.

Hence it makes sense to distinguish between integration time and simple
scale. In some devices both types of control are present and whilst they
will have similar effects on the amplitude of the reading, their effect
on the noise of the measurements will differ considerably.

Used by adjd_s311, tsl4531, tcs3472
The following drivers have similar controls (and could be adapted):
* tsl2563 (integration time is controlled via CALIBSCALE among other things)
* tsl2583 (has integration_time device_attr, but driver doesn't use channels yet)
* tsl2x7x (has integration_time attr)

Signed-off-by: Peter Meerwald <pmeerw@pmeerw.net>
Cc: Jon Brenner <jon.brenner@ams.com>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 Documentation/ABI/testing/sysfs-bus-iio | 11 +++++++++++
 drivers/iio/industrialio-core.c         |  1 +
 include/linux/iio/iio.h                 |  1 +
 include/linux/iio/sysfs.h               | 15 +++++++++++++++
 4 files changed, 28 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/ABI/testing/sysfs-bus-iio b/Documentation/ABI/testing/sysfs-bus-iio
index 39c8de0e53d0..ab1047c20495 100644
--- a/Documentation/ABI/testing/sysfs-bus-iio
+++ b/Documentation/ABI/testing/sysfs-bus-iio
@@ -811,3 +811,14 @@ Description:
 		Writing '1' stores the current device configuration into
 		on-chip EEPROM. After power-up or chip reset the device will
 		automatically load the saved configuration.
+
+What:		/sys/.../iio:deviceX/in_intensity_red_integration_time
+What:		/sys/.../iio:deviceX/in_intensity_green_integration_time
+What:		/sys/.../iio:deviceX/in_intensity_blue_integration_time
+What:		/sys/.../iio:deviceX/in_intensity_clear_integration_time
+What:		/sys/.../iio:deviceX/in_illuminance_integration_time
+KernelVersion:	3.12
+Contact:	linux-iio@vger.kernel.org
+Description:
+		This attribute is used to get/set the integration time in
+		seconds.
diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c
index 97f0297b120f..2cb4841d5357 100644
--- a/drivers/iio/industrialio-core.c
+++ b/drivers/iio/industrialio-core.c
@@ -101,6 +101,7 @@ static const char * const iio_chan_info_postfix[] = {
 	[IIO_CHAN_INFO_PHASE] = "phase",
 	[IIO_CHAN_INFO_HARDWAREGAIN] = "hardwaregain",
 	[IIO_CHAN_INFO_HYSTERESIS] = "hysteresis",
+	[IIO_CHAN_INFO_INT_TIME] = "integration_time",
 };
 
 const struct iio_chan_spec
diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h
index 8e7a8132b109..01edd6795550 100644
--- a/include/linux/iio/iio.h
+++ b/include/linux/iio/iio.h
@@ -36,6 +36,7 @@ enum iio_chan_info_enum {
 	IIO_CHAN_INFO_PHASE,
 	IIO_CHAN_INFO_HARDWAREGAIN,
 	IIO_CHAN_INFO_HYSTERESIS,
+	IIO_CHAN_INFO_INT_TIME,
 };
 
 enum iio_endian {
diff --git a/include/linux/iio/sysfs.h b/include/linux/iio/sysfs.h
index 2958c960003a..8a1d18640ab9 100644
--- a/include/linux/iio/sysfs.h
+++ b/include/linux/iio/sysfs.h
@@ -100,6 +100,21 @@ struct iio_const_attr {
 #define IIO_CONST_ATTR_SAMP_FREQ_AVAIL(_string)			\
 	IIO_CONST_ATTR(sampling_frequency_available, _string)
 
+/**
+ * IIO_DEV_ATTR_INT_TIME_AVAIL - list available integration times
+ * @_show: output method for the attribute
+ **/
+#define IIO_DEV_ATTR_INT_TIME_AVAIL(_show)		\
+	IIO_DEVICE_ATTR(integration_time_available, S_IRUGO, _show, NULL, 0)
+/**
+ * IIO_CONST_ATTR_INT_TIME_AVAIL - list available integration times
+ * @_string: frequency string for the attribute
+ *
+ * Constant version
+ **/
+#define IIO_CONST_ATTR_INT_TIME_AVAIL(_string)		\
+	IIO_CONST_ATTR(integration_time_available, _string)
+
 #define IIO_DEV_ATTR_TEMP_RAW(_show)			\
 	IIO_DEVICE_ATTR(in_temp_raw, S_IRUGO, _show, NULL, 0)
 
-- 
cgit v1.2.3


From ea01f2c18a22a2a8385909d64662afc92da6b13f Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Tue, 10 Sep 2013 13:49:00 +0100
Subject: iio: pressure-core: st: Allow for number of channels to vary

At the moment the number of channels specified is dictated by the first
sensor supported by the driver. As we add support for more sensors this
is likely to vary. Instead of using the ARRAY_SIZE() of the LPS331AP's
channel specifier we'll use a new adaptable 'struct st_sensors' element
instead.

Signed-off-by: Lee Jones <lee.jones@linaro.org>
Acked-by: Denis Ciocca <denis.ciocca@st.com>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/pressure/st_pressure_core.c | 3 ++-
 include/linux/iio/common/st_sensors.h   | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/iio/pressure/st_pressure_core.c b/drivers/iio/pressure/st_pressure_core.c
index 506b02d27b0a..93bff9ba05d6 100644
--- a/drivers/iio/pressure/st_pressure_core.c
+++ b/drivers/iio/pressure/st_pressure_core.c
@@ -107,6 +107,7 @@ static const struct st_sensors st_press_sensors[] = {
 			[0] = LPS331AP_PRESS_DEV_NAME,
 		},
 		.ch = (struct iio_chan_spec *)st_press_lps331ap_channels,
+		.num_ch = ARRAY_SIZE(st_press_lps331ap_channels),
 		.odr = {
 			.addr = ST_PRESS_LPS331AP_ODR_ADDR,
 			.mask = ST_PRESS_LPS331AP_ODR_MASK,
@@ -245,7 +246,7 @@ int st_press_common_probe(struct iio_dev *indio_dev,
 	pdata->num_data_channels = ST_PRESS_NUMBER_DATA_CHANNELS;
 	pdata->multiread_bit = pdata->sensor->multi_read_bit;
 	indio_dev->channels = pdata->sensor->ch;
-	indio_dev->num_channels = ARRAY_SIZE(st_press_lps331ap_channels);
+	indio_dev->num_channels = pdata->sensor->num_ch;
 
 	if (pdata->sensor->fs.addr != 0)
 		pdata->current_fullscale = (struct st_sensor_fullscale_avl *)
diff --git a/include/linux/iio/common/st_sensors.h b/include/linux/iio/common/st_sensors.h
index e51f65480ea5..e732fda6c8e6 100644
--- a/include/linux/iio/common/st_sensors.h
+++ b/include/linux/iio/common/st_sensors.h
@@ -184,6 +184,7 @@ struct st_sensors {
 	u8 wai;
 	char sensors_supported[ST_SENSORS_MAX_4WAI][ST_SENSORS_MAX_NAME];
 	struct iio_chan_spec *ch;
+	int num_ch;
 	struct st_sensor_odr odr;
 	struct st_sensor_power pw;
 	struct st_sensor_axis enable_axis;
-- 
cgit v1.2.3


From 9761696f2be276b169b77bcf359bc50a251c9280 Mon Sep 17 00:00:00 2001
From: Jonathan Cameron <jic23@kernel.org>
Date: Sun, 8 Sep 2013 14:57:00 +0100
Subject: iio: drop info_mask from struct iio_dev

Somehow this got missed when dropping all the code that used it
prior to the split.  Remove it now, there are no users.

Signed-off-by: Jonathan Cameron <jic23@kernel.org>
Reviewed-by: Lars-Peter Clausen <lars@metafoo.de>
---
 include/linux/iio/iio.h | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h
index 01edd6795550..bc408e2def40 100644
--- a/include/linux/iio/iio.h
+++ b/include/linux/iio/iio.h
@@ -147,12 +147,10 @@ ssize_t iio_enum_write(struct iio_dev *indio_dev,
  *			shift:		Shift right by this before masking out
  *					realbits.
  *			endianness:	little or big endian
- * @info_mask:		What information is to be exported about this channel.
- *			This includes calibbias, scale etc.
  * @info_mask_separate: What information is to be exported that is specific to
  *			this channel.
  * @info_mask_shared_by_type: What information is to be exported that is shared
-*			by all channels of the same type.
+ *			by all channels of the same type.
  * @event_mask:		What events can this channel produce.
  * @ext_info:		Array of extended info attributes for this channel.
  *			The array is NULL terminated, the last element should
@@ -187,7 +185,6 @@ struct iio_chan_spec {
 		u8	shift;
 		enum iio_endian endianness;
 	} scan_type;
-	long			info_mask;
 	long			info_mask_separate;
 	long			info_mask_shared_by_type;
 	long			event_mask;
-- 
cgit v1.2.3


From 3704432fb1fd8ab2df114bad6df752381246b609 Mon Sep 17 00:00:00 2001
From: Jonathan Cameron <jic23@kernel.org>
Date: Sun, 8 Sep 2013 14:57:00 +0100
Subject: iio: refactor info mask and ext_info attribute creation.

Introduce an enum to specify whether the attribute is separate or
shared.

Factor out the bitmap handling for loop into a separate function.

Tidy up error handling and add a NULL assignment to squish a false
positive warning from GCC.

Change ext_info shared type from boolean to enum and update in all
drivers.

Signed-off-by: Jonathan Cameron <jic23@kernel.org>
Reviewed-by: Lars-Peter Clausen <lars@metafoo.de>
---
 drivers/iio/dac/ad5064.c          |   3 +-
 drivers/iio/dac/ad5380.c          |   4 +-
 drivers/iio/dac/ad5446.c          |   3 +-
 drivers/iio/dac/ad5504.c          |   4 +-
 drivers/iio/dac/ad5624r_spi.c     |   4 +-
 drivers/iio/dac/ad5686.c          |   3 +-
 drivers/iio/dac/ad5755.c          |   1 +
 drivers/iio/dac/ad5791.c          |   5 +-
 drivers/iio/dac/ad7303.c          |   1 +
 drivers/iio/dac/mcp4725.c         |   3 +-
 drivers/iio/frequency/adf4350.c   |   1 +
 drivers/iio/iio_core.h            |   2 +-
 drivers/iio/industrialio-buffer.c |   2 +-
 drivers/iio/industrialio-core.c   | 149 ++++++++++++++++++++------------------
 include/linux/iio/iio.h           |   9 ++-
 15 files changed, 112 insertions(+), 82 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iio/dac/ad5064.c b/drivers/iio/dac/ad5064.c
index a3a52be4852c..b18e8c4347c2 100644
--- a/drivers/iio/dac/ad5064.c
+++ b/drivers/iio/dac/ad5064.c
@@ -285,8 +285,9 @@ static const struct iio_chan_spec_ext_info ad5064_ext_info[] = {
 		.name = "powerdown",
 		.read = ad5064_read_dac_powerdown,
 		.write = ad5064_write_dac_powerdown,
+		.shared = IIO_SEPARATE,
 	},
-	IIO_ENUM("powerdown_mode", false, &ad5064_powerdown_mode_enum),
+	IIO_ENUM("powerdown_mode", IIO_SEPARATE, &ad5064_powerdown_mode_enum),
 	IIO_ENUM_AVAILABLE("powerdown_mode", &ad5064_powerdown_mode_enum),
 	{ },
 };
diff --git a/drivers/iio/dac/ad5380.c b/drivers/iio/dac/ad5380.c
index 1c44ae3920e2..4c791e66e0d7 100644
--- a/drivers/iio/dac/ad5380.c
+++ b/drivers/iio/dac/ad5380.c
@@ -247,8 +247,10 @@ static struct iio_chan_spec_ext_info ad5380_ext_info[] = {
 		.name = "powerdown",
 		.read = ad5380_read_dac_powerdown,
 		.write = ad5380_write_dac_powerdown,
+		.shared = IIO_SEPARATE,
 	},
-	IIO_ENUM("powerdown_mode", true, &ad5380_powerdown_mode_enum),
+	IIO_ENUM("powerdown_mode", IIO_SHARED_BY_TYPE,
+		 &ad5380_powerdown_mode_enum),
 	IIO_ENUM_AVAILABLE("powerdown_mode", &ad5380_powerdown_mode_enum),
 	{ },
 };
diff --git a/drivers/iio/dac/ad5446.c b/drivers/iio/dac/ad5446.c
index 96e9ed4c2d01..6dcb6d93f0e4 100644
--- a/drivers/iio/dac/ad5446.c
+++ b/drivers/iio/dac/ad5446.c
@@ -132,8 +132,9 @@ static const struct iio_chan_spec_ext_info ad5446_ext_info_powerdown[] = {
 		.name = "powerdown",
 		.read = ad5446_read_dac_powerdown,
 		.write = ad5446_write_dac_powerdown,
+		.shared = IIO_SEPARATE,
 	},
-	IIO_ENUM("powerdown_mode", false, &ad5446_powerdown_mode_enum),
+	IIO_ENUM("powerdown_mode", IIO_SEPARATE, &ad5446_powerdown_mode_enum),
 	IIO_ENUM_AVAILABLE("powerdown_mode", &ad5446_powerdown_mode_enum),
 	{ },
 };
diff --git a/drivers/iio/dac/ad5504.c b/drivers/iio/dac/ad5504.c
index caffb16bc05c..31f4ff29f914 100644
--- a/drivers/iio/dac/ad5504.c
+++ b/drivers/iio/dac/ad5504.c
@@ -248,8 +248,10 @@ static const struct iio_chan_spec_ext_info ad5504_ext_info[] = {
 		.name = "powerdown",
 		.read = ad5504_read_dac_powerdown,
 		.write = ad5504_write_dac_powerdown,
+		.shared = IIO_SEPARATE,
 	},
-	IIO_ENUM("powerdown_mode", true, &ad5504_powerdown_mode_enum),
+	IIO_ENUM("powerdown_mode", IIO_SHARED_BY_TYPE,
+		 &ad5504_powerdown_mode_enum),
 	IIO_ENUM_AVAILABLE("powerdown_mode", &ad5504_powerdown_mode_enum),
 	{ },
 };
diff --git a/drivers/iio/dac/ad5624r_spi.c b/drivers/iio/dac/ad5624r_spi.c
index 714af757cd56..dbb1289cc7a6 100644
--- a/drivers/iio/dac/ad5624r_spi.c
+++ b/drivers/iio/dac/ad5624r_spi.c
@@ -163,8 +163,10 @@ static const struct iio_chan_spec_ext_info ad5624r_ext_info[] = {
 		.name = "powerdown",
 		.read = ad5624r_read_dac_powerdown,
 		.write = ad5624r_write_dac_powerdown,
+		.shared = IIO_SEPARATE,
 	},
-	IIO_ENUM("powerdown_mode", true, &ad5624r_powerdown_mode_enum),
+	IIO_ENUM("powerdown_mode", IIO_SHARED_BY_TYPE,
+		 &ad5624r_powerdown_mode_enum),
 	IIO_ENUM_AVAILABLE("powerdown_mode", &ad5624r_powerdown_mode_enum),
 	{ },
 };
diff --git a/drivers/iio/dac/ad5686.c b/drivers/iio/dac/ad5686.c
index 3e1080feedbf..f472b48445f6 100644
--- a/drivers/iio/dac/ad5686.c
+++ b/drivers/iio/dac/ad5686.c
@@ -264,8 +264,9 @@ static const struct iio_chan_spec_ext_info ad5686_ext_info[] = {
 		.name = "powerdown",
 		.read = ad5686_read_dac_powerdown,
 		.write = ad5686_write_dac_powerdown,
+		.shared = IIO_SEPARATE,
 	},
-	IIO_ENUM("powerdown_mode", false, &ad5686_powerdown_mode_enum),
+	IIO_ENUM("powerdown_mode", IIO_SEPARATE, &ad5686_powerdown_mode_enum),
 	IIO_ENUM_AVAILABLE("powerdown_mode", &ad5686_powerdown_mode_enum),
 	{ },
 };
diff --git a/drivers/iio/dac/ad5755.c b/drivers/iio/dac/ad5755.c
index 36a4361aece1..f305a0c83418 100644
--- a/drivers/iio/dac/ad5755.c
+++ b/drivers/iio/dac/ad5755.c
@@ -386,6 +386,7 @@ static const struct iio_chan_spec_ext_info ad5755_ext_info[] = {
 		.name = "powerdown",
 		.read = ad5755_read_powerdown,
 		.write = ad5755_write_powerdown,
+		.shared = IIO_SEPARATE,
 	},
 	{ },
 };
diff --git a/drivers/iio/dac/ad5791.c b/drivers/iio/dac/ad5791.c
index ce7458963309..3cee89be68c3 100644
--- a/drivers/iio/dac/ad5791.c
+++ b/drivers/iio/dac/ad5791.c
@@ -287,11 +287,12 @@ static int ad5791_read_raw(struct iio_dev *indio_dev,
 static const struct iio_chan_spec_ext_info ad5791_ext_info[] = {
 	{
 		.name = "powerdown",
-		.shared = true,
+		.shared = IIO_SHARED_BY_TYPE,
 		.read = ad5791_read_dac_powerdown,
 		.write = ad5791_write_dac_powerdown,
 	},
-	IIO_ENUM("powerdown_mode", true, &ad5791_powerdown_mode_enum),
+	IIO_ENUM("powerdown_mode", IIO_SHARED_BY_TYPE,
+		 &ad5791_powerdown_mode_enum),
 	IIO_ENUM_AVAILABLE("powerdown_mode", &ad5791_powerdown_mode_enum),
 	{ },
 };
diff --git a/drivers/iio/dac/ad7303.c b/drivers/iio/dac/ad7303.c
index ed2d276477bd..d0505fd22ef4 100644
--- a/drivers/iio/dac/ad7303.c
+++ b/drivers/iio/dac/ad7303.c
@@ -169,6 +169,7 @@ static const struct iio_chan_spec_ext_info ad7303_ext_info[] = {
 		.name = "powerdown",
 		.read = ad7303_read_dac_powerdown,
 		.write = ad7303_write_dac_powerdown,
+		.shared = IIO_SEPARATE,
 	},
 	{ },
 };
diff --git a/drivers/iio/dac/mcp4725.c b/drivers/iio/dac/mcp4725.c
index 1f4a48e6a82c..6711a33b16ba 100644
--- a/drivers/iio/dac/mcp4725.c
+++ b/drivers/iio/dac/mcp4725.c
@@ -195,8 +195,9 @@ static const struct iio_chan_spec_ext_info mcp4725_ext_info[] = {
 		.name = "powerdown",
 		.read = mcp4725_read_powerdown,
 		.write = mcp4725_write_powerdown,
+		.shared = IIO_SEPARATE,
 	},
-	IIO_ENUM("powerdown_mode", false, &mcp4725_powerdown_mode_enum),
+	IIO_ENUM("powerdown_mode", IIO_SEPARATE, &mcp4725_powerdown_mode_enum),
 	IIO_ENUM_AVAILABLE("powerdown_mode", &mcp4725_powerdown_mode_enum),
 	{ },
 };
diff --git a/drivers/iio/frequency/adf4350.c b/drivers/iio/frequency/adf4350.c
index a7b30be86ae0..85152547aa8b 100644
--- a/drivers/iio/frequency/adf4350.c
+++ b/drivers/iio/frequency/adf4350.c
@@ -351,6 +351,7 @@ static ssize_t adf4350_read(struct iio_dev *indio_dev,
 	.read = adf4350_read, \
 	.write = adf4350_write, \
 	.private = _ident, \
+	.shared = IIO_SEPARATE, \
 }
 
 static const struct iio_chan_spec_ext_info adf4350_ext_info[] = {
diff --git a/drivers/iio/iio_core.h b/drivers/iio/iio_core.h
index 05c1b74502a3..6be5ab864b1a 100644
--- a/drivers/iio/iio_core.h
+++ b/drivers/iio/iio_core.h
@@ -30,7 +30,7 @@ int __iio_add_chan_devattr(const char *postfix,
 						const char *buf,
 						size_t len),
 			   u64 mask,
-			   bool generic,
+			   enum iio_shared_by shared_by,
 			   struct device *dev,
 			   struct list_head *attr_list);
 
diff --git a/drivers/iio/industrialio-buffer.c b/drivers/iio/industrialio-buffer.c
index 96e97ad2538c..7ea2edbf7614 100644
--- a/drivers/iio/industrialio-buffer.c
+++ b/drivers/iio/industrialio-buffer.c
@@ -208,7 +208,7 @@ static int iio_buffer_add_channel_sysfs(struct iio_dev *indio_dev,
 				     &iio_show_scan_index,
 				     NULL,
 				     0,
-				     0,
+				     IIO_SEPARATE,
 				     &indio_dev->dev,
 				     &buffer->scan_el_dev_attr_list);
 	if (ret)
diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c
index 2cb4841d5357..d380c7278a26 100644
--- a/drivers/iio/industrialio-core.c
+++ b/drivers/iio/industrialio-core.c
@@ -517,14 +517,15 @@ int __iio_device_attr_init(struct device_attribute *dev_attr,
 						struct device_attribute *attr,
 						const char *buf,
 						size_t len),
-			   bool generic)
+			   enum iio_shared_by shared_by)
 {
-	int ret;
-	char *name_format, *full_postfix;
+	int ret = 0;
+	char *name_format = NULL;
+	char *full_postfix;
 	sysfs_attr_init(&dev_attr->attr);
 
 	/* Build up postfix of <extend_name>_<modifier>_postfix */
-	if (chan->modified && !generic) {
+	if (chan->modified && (shared_by == IIO_SEPARATE)) {
 		if (chan->extend_name)
 			full_postfix = kasprintf(GFP_KERNEL, "%s_%s_%s",
 						 iio_modifier_names[chan
@@ -545,53 +546,62 @@ int __iio_device_attr_init(struct device_attribute *dev_attr,
 						 chan->extend_name,
 						 postfix);
 	}
-	if (full_postfix == NULL) {
-		ret = -ENOMEM;
-		goto error_ret;
-	}
+	if (full_postfix == NULL)
+		return -ENOMEM;
 
 	if (chan->differential) { /* Differential can not have modifier */
-		if (generic)
+		switch (shared_by) {
+		case IIO_SHARED_BY_TYPE:
 			name_format
 				= kasprintf(GFP_KERNEL, "%s_%s-%s_%s",
 					    iio_direction[chan->output],
 					    iio_chan_type_name_spec[chan->type],
 					    iio_chan_type_name_spec[chan->type],
 					    full_postfix);
-		else if (chan->indexed)
+			break;
+		case IIO_SEPARATE:
+			if (!chan->indexed) {
+				WARN_ON("Differential channels must be indexed\n");
+				ret = -EINVAL;
+				goto error_free_full_postfix;
+			}
 			name_format
-				= kasprintf(GFP_KERNEL, "%s_%s%d-%s%d_%s",
+				= kasprintf(GFP_KERNEL,
+					    "%s_%s%d-%s%d_%s",
 					    iio_direction[chan->output],
 					    iio_chan_type_name_spec[chan->type],
 					    chan->channel,
 					    iio_chan_type_name_spec[chan->type],
 					    chan->channel2,
 					    full_postfix);
-		else {
-			WARN_ON("Differential channels must be indexed\n");
-			ret = -EINVAL;
-			goto error_free_full_postfix;
+			break;
 		}
 	} else { /* Single ended */
-		if (generic)
-			name_format
-				= kasprintf(GFP_KERNEL, "%s_%s_%s",
-					    iio_direction[chan->output],
-					    iio_chan_type_name_spec[chan->type],
-					    full_postfix);
-		else if (chan->indexed)
-			name_format
-				= kasprintf(GFP_KERNEL, "%s_%s%d_%s",
-					    iio_direction[chan->output],
-					    iio_chan_type_name_spec[chan->type],
-					    chan->channel,
-					    full_postfix);
-		else
+		switch (shared_by) {
+		case IIO_SHARED_BY_TYPE:
 			name_format
 				= kasprintf(GFP_KERNEL, "%s_%s_%s",
 					    iio_direction[chan->output],
 					    iio_chan_type_name_spec[chan->type],
 					    full_postfix);
+			break;
+
+		case IIO_SEPARATE:
+			if (chan->indexed)
+				name_format
+					= kasprintf(GFP_KERNEL, "%s_%s%d_%s",
+						    iio_direction[chan->output],
+						    iio_chan_type_name_spec[chan->type],
+						    chan->channel,
+						    full_postfix);
+			else
+				name_format
+					= kasprintf(GFP_KERNEL, "%s_%s_%s",
+						    iio_direction[chan->output],
+						    iio_chan_type_name_spec[chan->type],
+						    full_postfix);
+			break;
+		}
 	}
 	if (name_format == NULL) {
 		ret = -ENOMEM;
@@ -615,16 +625,11 @@ int __iio_device_attr_init(struct device_attribute *dev_attr,
 		dev_attr->attr.mode |= S_IWUSR;
 		dev_attr->store = writefunc;
 	}
-	kfree(name_format);
-	kfree(full_postfix);
-
-	return 0;
-
 error_free_name_format:
 	kfree(name_format);
 error_free_full_postfix:
 	kfree(full_postfix);
-error_ret:
+
 	return ret;
 }
 
@@ -643,7 +648,7 @@ int __iio_add_chan_devattr(const char *postfix,
 						const char *buf,
 						size_t len),
 			   u64 mask,
-			   bool generic,
+			   enum iio_shared_by shared_by,
 			   struct device *dev,
 			   struct list_head *attr_list)
 {
@@ -657,7 +662,7 @@ int __iio_add_chan_devattr(const char *postfix,
 	}
 	ret = __iio_device_attr_init(&iio_attr->dev_attr,
 				     postfix, chan,
-				     readfunc, writefunc, generic);
+				     readfunc, writefunc, shared_by);
 	if (ret)
 		goto error_iio_dev_attr_free;
 	iio_attr->c = chan;
@@ -665,7 +670,7 @@ int __iio_add_chan_devattr(const char *postfix,
 	list_for_each_entry(t, attr_list, l)
 		if (strcmp(t->dev_attr.attr.name,
 			   iio_attr->dev_attr.attr.name) == 0) {
-			if (!generic)
+			if (shared_by == IIO_SEPARATE)
 				dev_err(dev, "tried to double register : %s\n",
 					t->dev_attr.attr.name);
 			ret = -EBUSY;
@@ -683,46 +688,54 @@ error_ret:
 	return ret;
 }
 
-static int iio_device_add_channel_sysfs(struct iio_dev *indio_dev,
-					struct iio_chan_spec const *chan)
+static int iio_device_add_info_mask_type(struct iio_dev *indio_dev,
+					 struct iio_chan_spec const *chan,
+					 enum iio_shared_by shared_by,
+					 const long *infomask)
 {
-	int ret, attrcount = 0;
-	int i;
-	const struct iio_chan_spec_ext_info *ext_info;
+	int i, ret, attrcount = 0;
 
-	if (chan->channel < 0)
-		return 0;
-	for_each_set_bit(i, &chan->info_mask_separate, sizeof(long)*8) {
+	for_each_set_bit(i, infomask, sizeof(infomask)*8) {
 		ret = __iio_add_chan_devattr(iio_chan_info_postfix[i],
 					     chan,
 					     &iio_read_channel_info,
 					     &iio_write_channel_info,
 					     i,
-					     0,
+					     shared_by,
 					     &indio_dev->dev,
 					     &indio_dev->channel_attr_list);
-		if (ret < 0)
-			goto error_ret;
-		attrcount++;
-	}
-	for_each_set_bit(i, &chan->info_mask_shared_by_type, sizeof(long)*8) {
-		ret = __iio_add_chan_devattr(iio_chan_info_postfix[i],
-					     chan,
-					     &iio_read_channel_info,
-					     &iio_write_channel_info,
-					     i,
-					     1,
-					     &indio_dev->dev,
-					     &indio_dev->channel_attr_list);
-		if (ret == -EBUSY) {
-			ret = 0;
+		if ((ret == -EBUSY) && (shared_by != IIO_SEPARATE))
 			continue;
-		} else if (ret < 0) {
-			goto error_ret;
-		}
+		else if (ret < 0)
+			return ret;
 		attrcount++;
 	}
 
+	return attrcount;
+}
+
+static int iio_device_add_channel_sysfs(struct iio_dev *indio_dev,
+					struct iio_chan_spec const *chan)
+{
+	int ret, attrcount = 0;
+	const struct iio_chan_spec_ext_info *ext_info;
+
+	if (chan->channel < 0)
+		return 0;
+	ret = iio_device_add_info_mask_type(indio_dev, chan,
+					    IIO_SEPARATE,
+					    &chan->info_mask_separate);
+	if (ret < 0)
+		return ret;
+	attrcount += ret;
+
+	ret = iio_device_add_info_mask_type(indio_dev, chan,
+					    IIO_SHARED_BY_TYPE,
+					    &chan->info_mask_shared_by_type);
+	if (ret < 0)
+		return ret;
+	attrcount += ret;
+
 	if (chan->ext_info) {
 		unsigned int i = 0;
 		for (ext_info = chan->ext_info; ext_info->name; ext_info++) {
@@ -741,15 +754,13 @@ static int iio_device_add_channel_sysfs(struct iio_dev *indio_dev,
 				continue;
 
 			if (ret)
-				goto error_ret;
+				return ret;
 
 			attrcount++;
 		}
 	}
 
-	ret = attrcount;
-error_ret:
-	return ret;
+	return attrcount;
 }
 
 static void iio_device_remove_and_free_read_attr(struct iio_dev *indio_dev,
diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h
index bc408e2def40..21de272f1eb6 100644
--- a/include/linux/iio/iio.h
+++ b/include/linux/iio/iio.h
@@ -39,6 +39,11 @@ enum iio_chan_info_enum {
 	IIO_CHAN_INFO_INT_TIME,
 };
 
+enum iio_shared_by {
+	IIO_SEPARATE,
+	IIO_SHARED_BY_TYPE
+};
+
 enum iio_endian {
 	IIO_CPU,
 	IIO_BE,
@@ -58,7 +63,7 @@ struct iio_dev;
  */
 struct iio_chan_spec_ext_info {
 	const char *name;
-	bool shared;
+	enum iio_shared_by shared;
 	ssize_t (*read)(struct iio_dev *, uintptr_t private,
 			struct iio_chan_spec const *, char *buf);
 	ssize_t (*write)(struct iio_dev *, uintptr_t private,
@@ -126,7 +131,7 @@ ssize_t iio_enum_write(struct iio_dev *indio_dev,
 #define IIO_ENUM_AVAILABLE(_name, _e) \
 { \
 	.name = (_name "_available"), \
-	.shared = true, \
+	.shared = IIO_SHARED_BY_TYPE, \
 	.read = iio_enum_available_read, \
 	.private = (uintptr_t)(_e), \
 }
-- 
cgit v1.2.3


From c006ec838414d910bdd63ba8d919e602425e194e Mon Sep 17 00:00:00 2001
From: Jonathan Cameron <jic23@kernel.org>
Date: Sun, 8 Sep 2013 14:57:00 +0100
Subject: iio: add info_mask_[shared_by_dir/shared_by_all]

These two additional info_mask bitmaps should allow all 'standard'
numeric attributes to be handled using the read_raw and write_raw
callbacks.  Whilst this should reduce code, the more important element
is that this makes these values easily accessible to in kernel users
of IIO devices.

Signed-off-by: Jonathan Cameron <jic23@kernel.org>
Reviewed-by: Lars-Peter Clausen <lars@metafoo.de>
---
 drivers/iio/industrialio-core.c | 30 ++++++++++++++++++++++++++++++
 include/linux/iio/iio.h         | 14 ++++++++++++--
 2 files changed, 42 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c
index d380c7278a26..24db1855dbab 100644
--- a/drivers/iio/industrialio-core.c
+++ b/drivers/iio/industrialio-core.c
@@ -551,6 +551,14 @@ int __iio_device_attr_init(struct device_attribute *dev_attr,
 
 	if (chan->differential) { /* Differential can not have modifier */
 		switch (shared_by) {
+		case IIO_SHARED_BY_ALL:
+			name_format = kasprintf(GFP_KERNEL, "%s", full_postfix);
+			break;
+		case IIO_SHARED_BY_DIR:
+			name_format = kasprintf(GFP_KERNEL, "%s_%s",
+						iio_direction[chan->output],
+						full_postfix);
+			break;
 		case IIO_SHARED_BY_TYPE:
 			name_format
 				= kasprintf(GFP_KERNEL, "%s_%s-%s_%s",
@@ -578,6 +586,14 @@ int __iio_device_attr_init(struct device_attribute *dev_attr,
 		}
 	} else { /* Single ended */
 		switch (shared_by) {
+		case IIO_SHARED_BY_ALL:
+			name_format = kasprintf(GFP_KERNEL, "%s", full_postfix);
+			break;
+		case IIO_SHARED_BY_DIR:
+			name_format = kasprintf(GFP_KERNEL, "%s_%s",
+						iio_direction[chan->output],
+						full_postfix);
+			break;
 		case IIO_SHARED_BY_TYPE:
 			name_format
 				= kasprintf(GFP_KERNEL, "%s_%s_%s",
@@ -736,6 +752,20 @@ static int iio_device_add_channel_sysfs(struct iio_dev *indio_dev,
 		return ret;
 	attrcount += ret;
 
+	ret = iio_device_add_info_mask_type(indio_dev, chan,
+					    IIO_SHARED_BY_DIR,
+					    &chan->info_mask_shared_by_dir);
+	if (ret < 0)
+		return ret;
+	attrcount += ret;
+
+	ret = iio_device_add_info_mask_type(indio_dev, chan,
+					    IIO_SHARED_BY_ALL,
+					    &chan->info_mask_shared_by_all);
+	if (ret < 0)
+		return ret;
+	attrcount += ret;
+
 	if (chan->ext_info) {
 		unsigned int i = 0;
 		for (ext_info = chan->ext_info; ext_info->name; ext_info++) {
diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h
index 21de272f1eb6..ac1cb8f1858c 100644
--- a/include/linux/iio/iio.h
+++ b/include/linux/iio/iio.h
@@ -41,7 +41,9 @@ enum iio_chan_info_enum {
 
 enum iio_shared_by {
 	IIO_SEPARATE,
-	IIO_SHARED_BY_TYPE
+	IIO_SHARED_BY_TYPE,
+	IIO_SHARED_BY_DIR,
+	IIO_SHARED_BY_ALL
 };
 
 enum iio_endian {
@@ -156,6 +158,10 @@ ssize_t iio_enum_write(struct iio_dev *indio_dev,
  *			this channel.
  * @info_mask_shared_by_type: What information is to be exported that is shared
  *			by all channels of the same type.
+ * @info_mask_shared_by_dir: What information is to be exported that is shared
+ *			by all channels of the same direction.
+ * @info_mask_shared_by_all: What information is to be exported that is shared
+ *			by all channels.
  * @event_mask:		What events can this channel produce.
  * @ext_info:		Array of extended info attributes for this channel.
  *			The array is NULL terminated, the last element should
@@ -192,6 +198,8 @@ struct iio_chan_spec {
 	} scan_type;
 	long			info_mask_separate;
 	long			info_mask_shared_by_type;
+	long			info_mask_shared_by_dir;
+	long			info_mask_shared_by_all;
 	long			event_mask;
 	const struct iio_chan_spec_ext_info *ext_info;
 	const char		*extend_name;
@@ -215,7 +223,9 @@ static inline bool iio_channel_has_info(const struct iio_chan_spec *chan,
 	enum iio_chan_info_enum type)
 {
 	return (chan->info_mask_separate & BIT(type)) |
-	       (chan->info_mask_shared_by_type & BIT(type));
+		(chan->info_mask_shared_by_type & BIT(type)) |
+		(chan->info_mask_shared_by_dir & BIT(type)) |
+		(chan->info_mask_shared_by_all & BIT(type));
 }
 
 #define IIO_ST(si, rb, sb, sh)						\
-- 
cgit v1.2.3


From 5d65d92045cb7d3b2c45020c0e62d6d1c1d34f37 Mon Sep 17 00:00:00 2001
From: Lars-Peter Clausen <lars@metafoo.de>
Date: Sun, 15 Sep 2013 17:50:00 +0100
Subject: iio: iio_push_to_buffers(): Change type of 'data' to const void *

Change the type of the 'data' parameter for iio_push_to_buffers() from 'u8 *' to
'const void *'. Drivers typically use the correct type (e.g. __be16 *) for their
data buffer. When passing the buffer to iio_push_to_buffers() it needs to be
cast to 'u8 *' for the compiler to not complain (and also having to add __force
if we want to keep sparse happy as well). Since the buffer implementation should
not care about the data layout (except the size of one sample) using a void
pointer is the correct thing to do. Also make it const as the buffer
implementations are not supposed to modify it.

Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/buffer_cb.c           |  6 +++---
 drivers/iio/industrialio-buffer.c | 10 +++++-----
 drivers/iio/kfifo_buf.c           |  2 +-
 include/linux/iio/buffer.h        |  6 +++---
 include/linux/iio/consumer.h      |  2 +-
 5 files changed, 13 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iio/buffer_cb.c b/drivers/iio/buffer_cb.c
index f406889248c8..578f7199ed51 100644
--- a/drivers/iio/buffer_cb.c
+++ b/drivers/iio/buffer_cb.c
@@ -7,12 +7,12 @@
 
 struct iio_cb_buffer {
 	struct iio_buffer buffer;
-	int (*cb)(u8 *data, void *private);
+	int (*cb)(const void *data, void *private);
 	void *private;
 	struct iio_channel *channels;
 };
 
-static int iio_buffer_cb_store_to(struct iio_buffer *buffer, u8 *data)
+static int iio_buffer_cb_store_to(struct iio_buffer *buffer, const void *data)
 {
 	struct iio_cb_buffer *cb_buff = container_of(buffer,
 						     struct iio_cb_buffer,
@@ -26,7 +26,7 @@ static const struct iio_buffer_access_funcs iio_cb_access = {
 };
 
 struct iio_cb_buffer *iio_channel_get_all_cb(struct device *dev,
-					     int (*cb)(u8 *data,
+					     int (*cb)(const void *data,
 						       void *private),
 					     void *private)
 {
diff --git a/drivers/iio/industrialio-buffer.c b/drivers/iio/industrialio-buffer.c
index 7ea2edbf7614..a7ac4b5af03e 100644
--- a/drivers/iio/industrialio-buffer.c
+++ b/drivers/iio/industrialio-buffer.c
@@ -769,8 +769,8 @@ struct iio_demux_table {
 	struct list_head l;
 };
 
-static unsigned char *iio_demux(struct iio_buffer *buffer,
-				 unsigned char *datain)
+static const void *iio_demux(struct iio_buffer *buffer,
+				 const void *datain)
 {
 	struct iio_demux_table *t;
 
@@ -783,9 +783,9 @@ static unsigned char *iio_demux(struct iio_buffer *buffer,
 	return buffer->demux_bounce;
 }
 
-static int iio_push_to_buffer(struct iio_buffer *buffer, unsigned char *data)
+static int iio_push_to_buffer(struct iio_buffer *buffer, const void *data)
 {
-	unsigned char *dataout = iio_demux(buffer, data);
+	const void *dataout = iio_demux(buffer, data);
 
 	return buffer->access->store_to(buffer, dataout);
 }
@@ -800,7 +800,7 @@ static void iio_buffer_demux_free(struct iio_buffer *buffer)
 }
 
 
-int iio_push_to_buffers(struct iio_dev *indio_dev, unsigned char *data)
+int iio_push_to_buffers(struct iio_dev *indio_dev, const void *data)
 {
 	int ret;
 	struct iio_buffer *buf;
diff --git a/drivers/iio/kfifo_buf.c b/drivers/iio/kfifo_buf.c
index 1bea41bcbdc6..538d4616e48f 100644
--- a/drivers/iio/kfifo_buf.c
+++ b/drivers/iio/kfifo_buf.c
@@ -95,7 +95,7 @@ static int iio_set_length_kfifo(struct iio_buffer *r, int length)
 }
 
 static int iio_store_to_kfifo(struct iio_buffer *r,
-			      u8 *data)
+			      const void *data)
 {
 	int ret;
 	struct iio_kfifo *kf = iio_to_kfifo(r);
diff --git a/include/linux/iio/buffer.h b/include/linux/iio/buffer.h
index 2bac0eb8948d..e5507e999ed1 100644
--- a/include/linux/iio/buffer.h
+++ b/include/linux/iio/buffer.h
@@ -36,7 +36,7 @@ struct iio_buffer;
  * any of them not existing.
  **/
 struct iio_buffer_access_funcs {
-	int (*store_to)(struct iio_buffer *buffer, u8 *data);
+	int (*store_to)(struct iio_buffer *buffer, const void *data);
 	int (*read_first_n)(struct iio_buffer *buffer,
 			    size_t n,
 			    char __user *buf);
@@ -81,7 +81,7 @@ struct iio_buffer {
 	bool					stufftoread;
 	const struct attribute_group *attrs;
 	struct list_head			demux_list;
-	unsigned char				*demux_bounce;
+	void					*demux_bounce;
 	struct list_head			buffer_list;
 };
 
@@ -120,7 +120,7 @@ int iio_scan_mask_set(struct iio_dev *indio_dev,
  * @indio_dev:		iio_dev structure for device.
  * @data:		Full scan.
  */
-int iio_push_to_buffers(struct iio_dev *indio_dev, unsigned char *data);
+int iio_push_to_buffers(struct iio_dev *indio_dev, const void *data);
 
 int iio_update_demux(struct iio_dev *indio_dev);
 
diff --git a/include/linux/iio/consumer.h b/include/linux/iio/consumer.h
index 833926c91aa8..2752b1fd12be 100644
--- a/include/linux/iio/consumer.h
+++ b/include/linux/iio/consumer.h
@@ -77,7 +77,7 @@ struct iio_cb_buffer;
  * fail.
  */
 struct iio_cb_buffer *iio_channel_get_all_cb(struct device *dev,
-					     int (*cb)(u8 *data,
+					     int (*cb)(const void *data,
 						       void *private),
 					     void *private);
 /**
-- 
cgit v1.2.3


From fdf200290581150f7b69148abf6ca860684cbfbb Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Sun, 1 Sep 2013 20:24:50 -0700
Subject: regmap: add regmap_field_update_bits()

Current regmap_field is supporting read/write functions.
This patch adds new update_bits function for it.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 drivers/base/regmap/regmap.c | 20 ++++++++++++++++++++
 include/linux/regmap.h       |  2 ++
 2 files changed, 22 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index 7d689a15c500..285afa7470c0 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -1369,6 +1369,26 @@ int regmap_field_write(struct regmap_field *field, unsigned int val)
 }
 EXPORT_SYMBOL_GPL(regmap_field_write);
 
+/**
+ * regmap_field_update_bits():	Perform a read/modify/write cycle
+ *                              on the register field
+ *
+ * @field: Register field to write to
+ * @mask: Bitmask to change
+ * @val: Value to be written
+ *
+ * A value of zero will be returned on success, a negative errno will
+ * be returned in error cases.
+ */
+int regmap_field_update_bits(struct regmap_field *field, unsigned int mask, unsigned int val)
+{
+	mask = (mask << field->shift) & field->mask;
+
+	return regmap_update_bits(field->regmap, field->reg,
+				  mask, val << field->shift);
+}
+EXPORT_SYMBOL_GPL(regmap_field_update_bits);
+
 /*
  * regmap_bulk_write(): Write multiple registers to the device
  *
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index a10380bfbeac..4c8c20a7a75d 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -448,6 +448,8 @@ void devm_regmap_field_free(struct device *dev,	struct regmap_field *field);
 
 int regmap_field_read(struct regmap_field *field, unsigned int *val);
 int regmap_field_write(struct regmap_field *field, unsigned int val);
+int regmap_field_update_bits(struct regmap_field *field,
+			     unsigned int mask, unsigned int val);
 
 /**
  * Description of an IRQ for the generic regmap irq_chip.
-- 
cgit v1.2.3


From 5ce0ba88650f2606244a761d92e2b725f4ab3583 Mon Sep 17 00:00:00 2001
From: Hiep Cao Minh <cm-hiep@jinso.co.jp>
Date: Tue, 3 Sep 2013 13:10:26 +0900
Subject: spi: rcar: add Renesas QSPI support on RSPI

The R8A7790 has QSPI module which is very similar to RSPI.
This patch adds into RSPI module together to supports QSPI module.

Signed-off-by: Hiep Cao Minh <cm-hiep@jinso.co.jp>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 drivers/spi/Kconfig      |   2 +-
 drivers/spi/spi-rspi.c   | 183 ++++++++++++++++++++++++++++++++++++-----------
 include/linux/spi/rspi.h |   2 +
 3 files changed, 145 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index b9c53cc40e1f..738facf90597 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -369,7 +369,7 @@ config SPI_PXA2XX_PCI
 
 config SPI_RSPI
 	tristate "Renesas RSPI controller"
-	depends on SUPERH && SH_DMAE_BASE
+	depends on (SUPERH || ARCH_SHMOBILE) && SH_DMAE_BASE
 	help
 	  SPI driver for Renesas RSPI blocks.
 
diff --git a/drivers/spi/spi-rspi.c b/drivers/spi/spi-rspi.c
index 8719206a03a0..4c8dbac11b42 100644
--- a/drivers/spi/spi-rspi.c
+++ b/drivers/spi/spi-rspi.c
@@ -59,6 +59,14 @@
 #define RSPI_SPCMD6		0x1c
 #define RSPI_SPCMD7		0x1e
 
+/*qspi only */
+#define QSPI_SPBFCR		0x18
+#define QSPI_SPBDCR		0x1a
+#define QSPI_SPBMUL0		0x1c
+#define QSPI_SPBMUL1		0x20
+#define QSPI_SPBMUL2		0x24
+#define QSPI_SPBMUL3		0x28
+
 /* SPCR */
 #define SPCR_SPRIE		0x80
 #define SPCR_SPE		0x40
@@ -126,6 +134,8 @@
 #define SPCMD_LSBF		0x1000
 #define SPCMD_SPB_MASK		0x0f00
 #define SPCMD_SPB_8_TO_16(bit)	(((bit - 1) << 8) & SPCMD_SPB_MASK)
+#define SPCMD_SPB_8BIT		0x0000	/* qspi only */
+#define SPCMD_SPB_16BIT		0x0100
 #define SPCMD_SPB_20BIT		0x0000
 #define SPCMD_SPB_24BIT		0x0100
 #define SPCMD_SPB_32BIT		0x0200
@@ -135,6 +145,10 @@
 #define SPCMD_CPOL		0x0002
 #define SPCMD_CPHA		0x0001
 
+/* SPBFCR */
+#define SPBFCR_TXRST		0x80	/* qspi only */
+#define SPBFCR_RXRST		0x40	/* qspi only */
+
 struct rspi_data {
 	void __iomem *addr;
 	u32 max_speed_hz;
@@ -145,6 +159,7 @@ struct rspi_data {
 	spinlock_t lock;
 	struct clk *clk;
 	unsigned char spsr;
+	const struct spi_ops *ops;
 
 	/* for dmaengine */
 	struct dma_chan *chan_tx;
@@ -165,6 +180,11 @@ static void rspi_write16(struct rspi_data *rspi, u16 data, u16 offset)
 	iowrite16(data, rspi->addr + offset);
 }
 
+static void rspi_write32(struct rspi_data *rspi, u32 data, u16 offset)
+{
+	iowrite32(data, rspi->addr + offset);
+}
+
 static u8 rspi_read8(struct rspi_data *rspi, u16 offset)
 {
 	return ioread8(rspi->addr + offset);
@@ -175,17 +195,98 @@ static u16 rspi_read16(struct rspi_data *rspi, u16 offset)
 	return ioread16(rspi->addr + offset);
 }
 
-static unsigned char rspi_calc_spbr(struct rspi_data *rspi)
+/* optional functions */
+struct spi_ops {
+	int (*set_config_register)(struct rspi_data *rspi, int access_size);
+};
+
+/*
+ * functions for RSPI
+ */
+static int rspi_set_config_register(struct rspi_data *rspi, int access_size)
 {
-	int tmp;
-	unsigned char spbr;
+	int spbr;
+
+	/* Sets output mode(CMOS) and MOSI signal(from previous transfer) */
+	rspi_write8(rspi, 0x00, RSPI_SPPCR);
 
-	tmp = clk_get_rate(rspi->clk) / (2 * rspi->max_speed_hz) - 1;
-	spbr = clamp(tmp, 0, 255);
+	/* Sets transfer bit rate */
+	spbr = clk_get_rate(rspi->clk) / (2 * rspi->max_speed_hz) - 1;
+	rspi_write8(rspi, clamp(spbr, 0, 255), RSPI_SPBR);
+
+	/* Sets number of frames to be used: 1 frame */
+	rspi_write8(rspi, 0x00, RSPI_SPDCR);
 
-	return spbr;
+	/* Sets RSPCK, SSL, next-access delay value */
+	rspi_write8(rspi, 0x00, RSPI_SPCKD);
+	rspi_write8(rspi, 0x00, RSPI_SSLND);
+	rspi_write8(rspi, 0x00, RSPI_SPND);
+
+	/* Sets parity, interrupt mask */
+	rspi_write8(rspi, 0x00, RSPI_SPCR2);
+
+	/* Sets SPCMD */
+	rspi_write16(rspi, SPCMD_SPB_8_TO_16(access_size) | SPCMD_SSLKP,
+		     RSPI_SPCMD0);
+
+	/* Sets RSPI mode */
+	rspi_write8(rspi, SPCR_MSTR, RSPI_SPCR);
+
+	return 0;
 }
 
+/*
+ * functions for QSPI
+ */
+static int qspi_set_config_register(struct rspi_data *rspi, int access_size)
+{
+	u16 spcmd;
+	int spbr;
+
+	/* Sets output mode(CMOS) and MOSI signal(from previous transfer) */
+	rspi_write8(rspi, 0x00, RSPI_SPPCR);
+
+	/* Sets transfer bit rate */
+	spbr = clk_get_rate(rspi->clk) / (2 * rspi->max_speed_hz);
+	rspi_write8(rspi, clamp(spbr, 0, 255), RSPI_SPBR);
+
+	/* Sets number of frames to be used: 1 frame */
+	rspi_write8(rspi, 0x00, RSPI_SPDCR);
+
+	/* Sets RSPCK, SSL, next-access delay value */
+	rspi_write8(rspi, 0x00, RSPI_SPCKD);
+	rspi_write8(rspi, 0x00, RSPI_SSLND);
+	rspi_write8(rspi, 0x00, RSPI_SPND);
+
+	/* Data Length Setting */
+	if (access_size == 8)
+		spcmd = SPCMD_SPB_8BIT;
+	else if (access_size == 16)
+		spcmd = SPCMD_SPB_16BIT;
+	else if (access_size == 32)
+		spcmd = SPCMD_SPB_32BIT;
+
+	spcmd |= SPCMD_SCKDEN | SPCMD_SLNDEN | SPCMD_SSLKP | SPCMD_SPNDEN;
+
+	/* Resets transfer data length */
+	rspi_write32(rspi, 0, QSPI_SPBMUL0);
+
+	/* Resets transmit and receive buffer */
+	rspi_write8(rspi, SPBFCR_TXRST | SPBFCR_RXRST, QSPI_SPBFCR);
+	/* Sets buffer to allow normal operation */
+	rspi_write8(rspi, 0x00, QSPI_SPBFCR);
+
+	/* Sets SPCMD */
+	rspi_write16(rspi, spcmd, RSPI_SPCMD0);
+
+	/* Enables SPI function in a master mode */
+	rspi_write8(rspi, SPCR_SPE | SPCR_MSTR, RSPI_SPCR);
+
+	return 0;
+}
+
+#define set_config_register(spi, n) spi->ops->set_config_register(spi, n)
+
 static void rspi_enable_irq(struct rspi_data *rspi, u8 enable)
 {
 	rspi_write8(rspi, rspi_read8(rspi, RSPI_SPCR) | enable, RSPI_SPCR);
@@ -220,35 +321,6 @@ static void rspi_negate_ssl(struct rspi_data *rspi)
 	rspi_write8(rspi, rspi_read8(rspi, RSPI_SPCR) & ~SPCR_SPE, RSPI_SPCR);
 }
 
-static int rspi_set_config_register(struct rspi_data *rspi, int access_size)
-{
-	/* Sets output mode(CMOS) and MOSI signal(from previous transfer) */
-	rspi_write8(rspi, 0x00, RSPI_SPPCR);
-
-	/* Sets transfer bit rate */
-	rspi_write8(rspi, rspi_calc_spbr(rspi), RSPI_SPBR);
-
-	/* Sets number of frames to be used: 1 frame */
-	rspi_write8(rspi, 0x00, RSPI_SPDCR);
-
-	/* Sets RSPCK, SSL, next-access delay value */
-	rspi_write8(rspi, 0x00, RSPI_SPCKD);
-	rspi_write8(rspi, 0x00, RSPI_SSLND);
-	rspi_write8(rspi, 0x00, RSPI_SPND);
-
-	/* Sets parity, interrupt mask */
-	rspi_write8(rspi, 0x00, RSPI_SPCR2);
-
-	/* Sets SPCMD */
-	rspi_write16(rspi, SPCMD_SPB_8_TO_16(access_size) | SPCMD_SSLKP,
-		     RSPI_SPCMD0);
-
-	/* Sets RSPI mode */
-	rspi_write8(rspi, SPCR_MSTR, RSPI_SPCR);
-
-	return 0;
-}
-
 static int rspi_send_pio(struct rspi_data *rspi, struct spi_message *mesg,
 			 struct spi_transfer *t)
 {
@@ -616,7 +688,7 @@ static int rspi_setup(struct spi_device *spi)
 		spi->bits_per_word = 8;
 	rspi->max_speed_hz = spi->max_speed_hz;
 
-	rspi_set_config_register(rspi, 8);
+	set_config_register(rspi, 8);
 
 	return 0;
 }
@@ -745,7 +817,16 @@ static int rspi_probe(struct platform_device *pdev)
 	struct rspi_data *rspi;
 	int ret, irq;
 	char clk_name[16];
-
+	struct rspi_plat_data *rspi_pd = pdev->dev.platform_data;
+	const struct spi_ops *ops;
+	const struct platform_device_id *id_entry = pdev->id_entry;
+
+	ops = (struct spi_ops *)id_entry->driver_data;
+	/* ops parameter check */
+	if (!ops->set_config_register) {
+		dev_err(&pdev->dev, "there is no set_config_register\n");
+		return -ENODEV;
+	}
 	/* get base addr */
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (unlikely(res == NULL)) {
@@ -767,7 +848,7 @@ static int rspi_probe(struct platform_device *pdev)
 
 	rspi = spi_master_get_devdata(master);
 	platform_set_drvdata(pdev, rspi);
-
+	rspi->ops = ops;
 	rspi->master = master;
 	rspi->addr = ioremap(res->start, resource_size(res));
 	if (rspi->addr == NULL) {
@@ -776,7 +857,7 @@ static int rspi_probe(struct platform_device *pdev)
 		goto error1;
 	}
 
-	snprintf(clk_name, sizeof(clk_name), "rspi%d", pdev->id);
+	snprintf(clk_name, sizeof(clk_name), "%s%d", id_entry->name, pdev->id);
 	rspi->clk = clk_get(&pdev->dev, clk_name);
 	if (IS_ERR(rspi->clk)) {
 		dev_err(&pdev->dev, "cannot get clock\n");
@@ -790,7 +871,10 @@ static int rspi_probe(struct platform_device *pdev)
 	INIT_WORK(&rspi->ws, rspi_work);
 	init_waitqueue_head(&rspi->wait);
 
-	master->num_chipselect = 2;
+	master->num_chipselect = rspi_pd->num_chipselect;
+	if (!master->num_chipselect)
+		master->num_chipselect = 2; /* default */
+
 	master->bus_num = pdev->id;
 	master->setup = rspi_setup;
 	master->transfer = rspi_transfer;
@@ -832,11 +916,28 @@ error1:
 	return ret;
 }
 
+static struct spi_ops rspi_ops = {
+	.set_config_register =		rspi_set_config_register,
+};
+
+static struct spi_ops qspi_ops = {
+	.set_config_register =		qspi_set_config_register,
+};
+
+static struct platform_device_id spi_driver_ids[] = {
+	{ "rspi",	(kernel_ulong_t)&rspi_ops },
+	{ "qspi",	(kernel_ulong_t)&qspi_ops },
+	{},
+};
+
+MODULE_DEVICE_TABLE(platform, spi_driver_ids);
+
 static struct platform_driver rspi_driver = {
 	.probe =	rspi_probe,
 	.remove =	rspi_remove,
+	.id_table =	spi_driver_ids,
 	.driver		= {
-		.name = "rspi",
+		.name = "renesas_spi",
 		.owner	= THIS_MODULE,
 	},
 };
diff --git a/include/linux/spi/rspi.h b/include/linux/spi/rspi.h
index 900f0e328235..a25bd6f65e7f 100644
--- a/include/linux/spi/rspi.h
+++ b/include/linux/spi/rspi.h
@@ -26,6 +26,8 @@ struct rspi_plat_data {
 	unsigned int dma_rx_id;
 
 	unsigned dma_width_16bit:1;	/* DMAC read/write width = 16-bit */
+
+	u16 num_chipselect;
 };
 
 #endif
-- 
cgit v1.2.3


From b33e46bcdc4e598d738ed12a5a7906be4e11d786 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@linaro.org>
Date: Sat, 31 Aug 2013 11:58:26 +0100
Subject: regulator: core: Provide managed regulator registration

Many regulator drivers have a remove function that consists solely of
calling regulator_unregister() so provide a devm_regulator_register()
in order to allow this repeated code to be removed and help eliminate
error handling code.

Signed-off-by: Mark Brown <broonie@linaro.org>
---
 Documentation/driver-model/devres.txt |  1 +
 drivers/regulator/core.c              | 66 +++++++++++++++++++++++++++++++++++
 include/linux/regulator/driver.h      |  5 +++
 3 files changed, 72 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/driver-model/devres.txt b/Documentation/driver-model/devres.txt
index fcb34a5697ea..3d9c2a766230 100644
--- a/Documentation/driver-model/devres.txt
+++ b/Documentation/driver-model/devres.txt
@@ -283,6 +283,7 @@ REGULATOR
   devm_regulator_get()
   devm_regulator_put()
   devm_regulator_bulk_get()
+  devm_regulator_register()
 
 CLOCK
   devm_clk_get()
diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index a01b8b3b70ca..5f995d281672 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -3492,6 +3492,44 @@ clean:
 }
 EXPORT_SYMBOL_GPL(regulator_register);
 
+static void devm_rdev_release(struct device *dev, void *res)
+{
+	regulator_unregister(*(struct regulator_dev **)res);
+}
+
+/**
+ * devm_regulator_register - Resource managed regulator_register()
+ * @regulator_desc: regulator to register
+ * @config: runtime configuration for regulator
+ *
+ * Called by regulator drivers to register a regulator.  Returns a
+ * valid pointer to struct regulator_dev on success or an ERR_PTR() on
+ * error.  The regulator will automaticall be released when the device
+ * is unbound.
+ */
+struct regulator_dev *devm_regulator_register(struct device *dev,
+				  const struct regulator_desc *regulator_desc,
+				  const struct regulator_config *config)
+{
+	struct regulator_dev **ptr, *rdev;
+
+	ptr = devres_alloc(devm_rdev_release, sizeof(*ptr),
+			   GFP_KERNEL);
+	if (!ptr)
+		return ERR_PTR(-ENOMEM);
+
+	rdev = regulator_register(regulator_desc, config);
+	if (!IS_ERR(rdev)) {
+		*ptr = rdev;
+		devres_add(dev, ptr);
+	} else {
+		devres_free(ptr);
+	}
+
+	return rdev;
+}
+EXPORT_SYMBOL_GPL(devm_regulator_register);
+
 /**
  * regulator_unregister - unregister regulator
  * @rdev: regulator to unregister
@@ -3521,6 +3559,34 @@ void regulator_unregister(struct regulator_dev *rdev)
 }
 EXPORT_SYMBOL_GPL(regulator_unregister);
 
+static int devm_rdev_match(struct device *dev, void *res, void *data)
+{
+	struct regulator_dev **r = res;
+	if (!r || !*r) {
+		WARN_ON(!r || !*r);
+		return 0;
+	}
+	return *r == data;
+}
+
+/**
+ * devm_regulator_unregister - Resource managed regulator_unregister()
+ * @regulator: regulator to free
+ *
+ * Unregister a regulator registered with devm_regulator_register().
+ * Normally this function will not need to be called and the resource
+ * management code will ensure that the resource is freed.
+ */
+void devm_regulator_unregister(struct device *dev, struct regulator_dev *rdev)
+{
+	int rc;
+
+	rc = devres_release(dev, devm_rdev_release, devm_rdev_match, rdev);
+	if (rc != 0)
+		WARN_ON(rc);
+}
+EXPORT_SYMBOL_GPL(devm_regulator_unregister);
+
 /**
  * regulator_suspend_prepare - prepare regulators for system wide suspend
  * @state: system suspend state
diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index 67e13aa5a478..8474c7f88745 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h
@@ -334,7 +334,12 @@ struct regulator_dev {
 struct regulator_dev *
 regulator_register(const struct regulator_desc *regulator_desc,
 		   const struct regulator_config *config);
+struct regulator_dev *
+devm_regulator_register(struct device *dev,
+			const struct regulator_desc *regulator_desc,
+			const struct regulator_config *config);
 void regulator_unregister(struct regulator_dev *rdev);
+void devm_regulator_unregister(struct device *dev, struct regulator_dev *rdev);
 
 int regulator_notifier_call_chain(struct regulator_dev *rdev,
 				  unsigned long event, void *data);
-- 
cgit v1.2.3


From 4f0ac6dabf867095b31f851ba0d0ceaca2f87e2e Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@linaro.org>
Date: Fri, 13 Sep 2013 19:51:47 +0100
Subject: regulator: core: Remove unused regulator_use_dummy_regulator()

No boards have used this functionality and the new default of providing
dummy regulators by default provides a better solution to the problem it
was trying to solve.

Signed-off-by: Mark Brown <broonie@linaro.org>
---
 drivers/regulator/core.c          | 17 -----------------
 include/linux/regulator/machine.h |  5 -----
 2 files changed, 22 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index 8fd170788c3e..ac3a864d3635 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -53,7 +53,6 @@ static LIST_HEAD(regulator_list);
 static LIST_HEAD(regulator_map_list);
 static LIST_HEAD(regulator_ena_gpio_list);
 static bool has_full_constraints;
-static bool board_wants_dummy_regulator;
 
 static struct dentry *debugfs_root;
 
@@ -3615,22 +3614,6 @@ void regulator_has_full_constraints(void)
 }
 EXPORT_SYMBOL_GPL(regulator_has_full_constraints);
 
-/**
- * regulator_use_dummy_regulator - Provide a dummy regulator when none is found
- *
- * Calling this function will cause the regulator API to provide a
- * dummy regulator to consumers if no physical regulator is found,
- * allowing most consumers to proceed as though a regulator were
- * configured.  This allows systems such as those with software
- * controllable regulators for the CPU core only to be brought up more
- * readily.
- */
-void regulator_use_dummy_regulator(void)
-{
-	board_wants_dummy_regulator = true;
-}
-EXPORT_SYMBOL_GPL(regulator_use_dummy_regulator);
-
 /**
  * rdev_get_drvdata - get rdev regulator driver data
  * @rdev: regulator
diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h
index 999b20ce06cf..a9f7c55a4d4d 100644
--- a/include/linux/regulator/machine.h
+++ b/include/linux/regulator/machine.h
@@ -193,15 +193,10 @@ int regulator_suspend_finish(void);
 
 #ifdef CONFIG_REGULATOR
 void regulator_has_full_constraints(void);
-void regulator_use_dummy_regulator(void);
 #else
 static inline void regulator_has_full_constraints(void)
 {
 }
-
-static inline void regulator_use_dummy_regulator(void)
-{
-}
 #endif
 
 #endif
-- 
cgit v1.2.3


From 666d5b4c742ba666eb68b467d777b7862f362ae5 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@linaro.org>
Date: Sat, 31 Aug 2013 18:50:52 +0100
Subject: spi: core: Add devm_spi_register_master()

Help simplify the cleanup code for SPI master drivers by providing a
managed master registration function, ensuring that the master is
automatically unregistered whenever the device is unbound.

Signed-off-by: Mark Brown <broonie@linaro.org>
---
 Documentation/driver-model/devres.txt |  3 +++
 drivers/spi/spi.c                     | 35 +++++++++++++++++++++++++++++++++++
 include/linux/spi/spi.h               |  2 ++
 3 files changed, 40 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/driver-model/devres.txt b/Documentation/driver-model/devres.txt
index fcb34a5697ea..84ea8216cc7d 100644
--- a/Documentation/driver-model/devres.txt
+++ b/Documentation/driver-model/devres.txt
@@ -302,3 +302,6 @@ PHY
 
 SLAVE DMA ENGINE
   devm_acpi_dma_controller_register()
+
+SPI
+  devm_spi_register_master()
diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 9e039c60c068..a586ceb111fc 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -1245,6 +1245,41 @@ done:
 }
 EXPORT_SYMBOL_GPL(spi_register_master);
 
+static void devm_spi_unregister(struct device *dev, void *res)
+{
+	spi_unregister_master(*(struct spi_master **)res);
+}
+
+/**
+ * dev_spi_register_master - register managed SPI master controller
+ * @dev:    device managing SPI master
+ * @master: initialized master, originally from spi_alloc_master()
+ * Context: can sleep
+ *
+ * Register a SPI device as with spi_register_master() which will
+ * automatically be unregister
+ */
+int devm_spi_register_master(struct device *dev, struct spi_master *master)
+{
+	struct spi_master **ptr;
+	int ret;
+
+	ptr = devres_alloc(devm_spi_unregister, sizeof(*ptr), GFP_KERNEL);
+	if (!ptr)
+		return -ENOMEM;
+
+	ret = spi_register_master(master);
+	if (ret != 0) {
+		*ptr = master;
+		devres_add(dev, ptr);
+	} else {
+		devres_free(ptr);
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(devm_spi_register_master);
+
 static int __unregister(struct device *dev, void *null)
 {
 	spi_unregister_device(to_spi_device(dev));
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 887116dbce2c..4d634d66ba0b 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -434,6 +434,8 @@ extern struct spi_master *
 spi_alloc_master(struct device *host, unsigned size);
 
 extern int spi_register_master(struct spi_master *master);
+extern int devm_spi_register_master(struct device *dev,
+				    struct spi_master *master);
 extern void spi_unregister_master(struct spi_master *master);
 
 extern struct spi_master *spi_busnum_to_master(u16 busnum);
-- 
cgit v1.2.3


From 4b08478422040ae8cb11acc15d51f1cdb0ac39c8 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Fri, 30 Aug 2013 06:01:49 -0700
Subject: Drop support for Renesas H8/300 (h8300) architecture

H8/300 has been dead for several years, and the kernel for it
has not compiled for ages. Drop support for it.

Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 Documentation/scheduler/sched-arch.txt           |   5 -
 arch/h8300/Kconfig                               | 108 ------
 arch/h8300/Kconfig.cpu                           | 171 ---------
 arch/h8300/Kconfig.debug                         |  68 ----
 arch/h8300/Kconfig.ide                           |  44 ---
 arch/h8300/Makefile                              |  71 ----
 arch/h8300/README                                |  38 --
 arch/h8300/boot/Makefile                         |  22 --
 arch/h8300/boot/compressed/Makefile              |  37 --
 arch/h8300/boot/compressed/head.S                |  47 ---
 arch/h8300/boot/compressed/misc.c                | 180 ---------
 arch/h8300/boot/compressed/vmlinux.lds           |  32 --
 arch/h8300/boot/compressed/vmlinux.scr           |   9 -
 arch/h8300/defconfig                             |  42 ---
 arch/h8300/include/asm/Kbuild                    |   8 -
 arch/h8300/include/asm/asm-offsets.h             |   1 -
 arch/h8300/include/asm/atomic.h                  | 146 --------
 arch/h8300/include/asm/barrier.h                 |  29 --
 arch/h8300/include/asm/bitops.h                  | 211 -----------
 arch/h8300/include/asm/bootinfo.h                |   2 -
 arch/h8300/include/asm/bug.h                     |  12 -
 arch/h8300/include/asm/bugs.h                    |  16 -
 arch/h8300/include/asm/cache.h                   |  13 -
 arch/h8300/include/asm/cachectl.h                |  14 -
 arch/h8300/include/asm/cacheflush.h              |  40 --
 arch/h8300/include/asm/checksum.h                | 102 ------
 arch/h8300/include/asm/cmpxchg.h                 |  60 ---
 arch/h8300/include/asm/cputime.h                 |   6 -
 arch/h8300/include/asm/current.h                 |  25 --
 arch/h8300/include/asm/dbg.h                     |   2 -
 arch/h8300/include/asm/delay.h                   |  38 --
 arch/h8300/include/asm/device.h                  |   7 -
 arch/h8300/include/asm/div64.h                   |   1 -
 arch/h8300/include/asm/dma.h                     |  15 -
 arch/h8300/include/asm/elf.h                     | 101 ------
 arch/h8300/include/asm/emergency-restart.h       |   6 -
 arch/h8300/include/asm/fb.h                      |  12 -
 arch/h8300/include/asm/flat.h                    |  26 --
 arch/h8300/include/asm/fpu.h                     |   1 -
 arch/h8300/include/asm/ftrace.h                  |   1 -
 arch/h8300/include/asm/futex.h                   |   6 -
 arch/h8300/include/asm/gpio-internal.h           |  52 ---
 arch/h8300/include/asm/hardirq.h                 |  19 -
 arch/h8300/include/asm/hw_irq.h                  |   1 -
 arch/h8300/include/asm/io.h                      | 358 ------------------
 arch/h8300/include/asm/irq.h                     |  49 ---
 arch/h8300/include/asm/irq_regs.h                |   1 -
 arch/h8300/include/asm/irqflags.h                |  43 ---
 arch/h8300/include/asm/kdebug.h                  |   1 -
 arch/h8300/include/asm/kmap_types.h              |   6 -
 arch/h8300/include/asm/local.h                   |   6 -
 arch/h8300/include/asm/local64.h                 |   1 -
 arch/h8300/include/asm/mc146818rtc.h             |   9 -
 arch/h8300/include/asm/mmu_context.h             |  32 --
 arch/h8300/include/asm/mutex.h                   |   9 -
 arch/h8300/include/asm/page.h                    |  78 ----
 arch/h8300/include/asm/page_offset.h             |   3 -
 arch/h8300/include/asm/param.h                   |   9 -
 arch/h8300/include/asm/pci.h                     |  19 -
 arch/h8300/include/asm/percpu.h                  |   6 -
 arch/h8300/include/asm/pgalloc.h                 |   8 -
 arch/h8300/include/asm/pgtable.h                 |  73 ----
 arch/h8300/include/asm/processor.h               | 139 -------
 arch/h8300/include/asm/ptrace.h                  |  33 --
 arch/h8300/include/asm/regs267x.h                | 336 -----------------
 arch/h8300/include/asm/regs306x.h                | 212 -----------
 arch/h8300/include/asm/scatterlist.h             |   6 -
 arch/h8300/include/asm/sections.h                |   6 -
 arch/h8300/include/asm/segment.h                 |  49 ---
 arch/h8300/include/asm/sh_bios.h                 |  29 --
 arch/h8300/include/asm/shm.h                     |  31 --
 arch/h8300/include/asm/shmparam.h                |   6 -
 arch/h8300/include/asm/signal.h                  |  24 --
 arch/h8300/include/asm/smp.h                     |   1 -
 arch/h8300/include/asm/spinlock.h                |   6 -
 arch/h8300/include/asm/string.h                  |  44 ---
 arch/h8300/include/asm/switch_to.h               |  50 ---
 arch/h8300/include/asm/target_time.h             |   4 -
 arch/h8300/include/asm/termios.h                 |  50 ---
 arch/h8300/include/asm/thread_info.h             | 103 ------
 arch/h8300/include/asm/timer.h                   |  25 --
 arch/h8300/include/asm/timex.h                   |  19 -
 arch/h8300/include/asm/tlb.h                     |   8 -
 arch/h8300/include/asm/tlbflush.h                |  55 ---
 arch/h8300/include/asm/topology.h                |   6 -
 arch/h8300/include/asm/traps.h                   |  37 --
 arch/h8300/include/asm/types.h                   |   9 -
 arch/h8300/include/asm/uaccess.h                 | 163 ---------
 arch/h8300/include/asm/ucontext.h                |  12 -
 arch/h8300/include/asm/unaligned.h               |  11 -
 arch/h8300/include/asm/unistd.h                  |  36 --
 arch/h8300/include/asm/user.h                    |  75 ----
 arch/h8300/include/asm/virtconvert.h             |  20 -
 arch/h8300/include/uapi/asm/Kbuild               |  34 --
 arch/h8300/include/uapi/asm/auxvec.h             |   4 -
 arch/h8300/include/uapi/asm/bitsperlong.h        |   1 -
 arch/h8300/include/uapi/asm/byteorder.h          |   6 -
 arch/h8300/include/uapi/asm/errno.h              |   6 -
 arch/h8300/include/uapi/asm/fcntl.h              |  11 -
 arch/h8300/include/uapi/asm/ioctl.h              |   1 -
 arch/h8300/include/uapi/asm/ioctls.h             |   8 -
 arch/h8300/include/uapi/asm/ipcbuf.h             |   1 -
 arch/h8300/include/uapi/asm/kvm_para.h           |   1 -
 arch/h8300/include/uapi/asm/mman.h               |   1 -
 arch/h8300/include/uapi/asm/msgbuf.h             |  31 --
 arch/h8300/include/uapi/asm/param.h              |  16 -
 arch/h8300/include/uapi/asm/poll.h               |  11 -
 arch/h8300/include/uapi/asm/posix_types.h        |  26 --
 arch/h8300/include/uapi/asm/ptrace.h             |  44 ---
 arch/h8300/include/uapi/asm/resource.h           |   6 -
 arch/h8300/include/uapi/asm/sembuf.h             |  25 --
 arch/h8300/include/uapi/asm/setup.h              |   6 -
 arch/h8300/include/uapi/asm/shmbuf.h             |  42 ---
 arch/h8300/include/uapi/asm/sigcontext.h         |  18 -
 arch/h8300/include/uapi/asm/siginfo.h            |   6 -
 arch/h8300/include/uapi/asm/signal.h             | 115 ------
 arch/h8300/include/uapi/asm/socket.h             |  79 ----
 arch/h8300/include/uapi/asm/sockios.h            |  13 -
 arch/h8300/include/uapi/asm/stat.h               |  78 ----
 arch/h8300/include/uapi/asm/statfs.h             |   6 -
 arch/h8300/include/uapi/asm/swab.h               |  10 -
 arch/h8300/include/uapi/asm/termbits.h           | 201 ----------
 arch/h8300/include/uapi/asm/termios.h            |  44 ---
 arch/h8300/include/uapi/asm/types.h              |   1 -
 arch/h8300/include/uapi/asm/unistd.h             | 330 -----------------
 arch/h8300/kernel/Makefile                       |  12 -
 arch/h8300/kernel/asm-offsets.c                  |  60 ---
 arch/h8300/kernel/entry.S                        | 402 --------------------
 arch/h8300/kernel/gpio.c                         | 178 ---------
 arch/h8300/kernel/h8300_ksyms.c                  | 100 -----
 arch/h8300/kernel/irq.c                          | 165 ---------
 arch/h8300/kernel/module.c                       |  75 ----
 arch/h8300/kernel/process.c                      | 154 --------
 arch/h8300/kernel/ptrace.c                       | 168 ---------
 arch/h8300/kernel/setup.c                        | 242 ------------
 arch/h8300/kernel/signal.c                       | 444 -----------------------
 arch/h8300/kernel/sys_h8300.c                    |  48 ---
 arch/h8300/kernel/syscalls.S                     | 338 -----------------
 arch/h8300/kernel/time.c                         |  66 ----
 arch/h8300/kernel/timer/Makefile                 |   6 -
 arch/h8300/kernel/timer/itu.c                    |  82 -----
 arch/h8300/kernel/timer/timer16.c                |  77 ----
 arch/h8300/kernel/timer/timer8.c                 | 102 ------
 arch/h8300/kernel/timer/tpu.c                    | 100 -----
 arch/h8300/kernel/traps.c                        | 166 ---------
 arch/h8300/kernel/vmlinux.lds.S                  | 157 --------
 arch/h8300/lib/Makefile                          |   5 -
 arch/h8300/lib/abs.S                             |  21 --
 arch/h8300/lib/ashrdi3.c                         |  63 ----
 arch/h8300/lib/checksum.c                        | 164 ---------
 arch/h8300/lib/memcpy.S                          |  84 -----
 arch/h8300/lib/memset.S                          |  61 ----
 arch/h8300/lib/romfs.S                           |  57 ---
 arch/h8300/mm/Makefile                           |   5 -
 arch/h8300/mm/fault.c                            |  56 ---
 arch/h8300/mm/init.c                             | 155 --------
 arch/h8300/mm/kmap.c                             |  58 ---
 arch/h8300/mm/memory.c                           |  54 ---
 arch/h8300/platform/h8300h/Makefile              |   7 -
 arch/h8300/platform/h8300h/aki3068net/Makefile   |   5 -
 arch/h8300/platform/h8300h/aki3068net/crt0_ram.S | 110 ------
 arch/h8300/platform/h8300h/generic/Makefile      |   5 -
 arch/h8300/platform/h8300h/generic/crt0_ram.S    | 107 ------
 arch/h8300/platform/h8300h/generic/crt0_rom.S    | 122 -------
 arch/h8300/platform/h8300h/h8max/Makefile        |   5 -
 arch/h8300/platform/h8300h/h8max/crt0_ram.S      | 110 ------
 arch/h8300/platform/h8300h/irq.c                 |  82 -----
 arch/h8300/platform/h8300h/ptrace_h8300h.c       | 284 ---------------
 arch/h8300/platform/h8s/Makefile                 |   7 -
 arch/h8300/platform/h8s/edosk2674/Makefile       |   5 -
 arch/h8300/platform/h8s/edosk2674/crt0_ram.S     | 130 -------
 arch/h8300/platform/h8s/edosk2674/crt0_rom.S     | 186 ----------
 arch/h8300/platform/h8s/generic/Makefile         |   5 -
 arch/h8300/platform/h8s/generic/crt0_ram.S       | 127 -------
 arch/h8300/platform/h8s/generic/crt0_rom.S       | 128 -------
 arch/h8300/platform/h8s/irq.c                    | 104 ------
 arch/h8300/platform/h8s/ptrace_h8s.c             |  84 -----
 include/linux/serial_sci.h                       |   2 +-
 tools/testing/ktest/examples/crosstests.conf     |   6 -
 179 files changed, 1 insertion(+), 10890 deletions(-)
 delete mode 100644 arch/h8300/Kconfig
 delete mode 100644 arch/h8300/Kconfig.cpu
 delete mode 100644 arch/h8300/Kconfig.debug
 delete mode 100644 arch/h8300/Kconfig.ide
 delete mode 100644 arch/h8300/Makefile
 delete mode 100644 arch/h8300/README
 delete mode 100644 arch/h8300/boot/Makefile
 delete mode 100644 arch/h8300/boot/compressed/Makefile
 delete mode 100644 arch/h8300/boot/compressed/head.S
 delete mode 100644 arch/h8300/boot/compressed/misc.c
 delete mode 100644 arch/h8300/boot/compressed/vmlinux.lds
 delete mode 100644 arch/h8300/boot/compressed/vmlinux.scr
 delete mode 100644 arch/h8300/defconfig
 delete mode 100644 arch/h8300/include/asm/Kbuild
 delete mode 100644 arch/h8300/include/asm/asm-offsets.h
 delete mode 100644 arch/h8300/include/asm/atomic.h
 delete mode 100644 arch/h8300/include/asm/barrier.h
 delete mode 100644 arch/h8300/include/asm/bitops.h
 delete mode 100644 arch/h8300/include/asm/bootinfo.h
 delete mode 100644 arch/h8300/include/asm/bug.h
 delete mode 100644 arch/h8300/include/asm/bugs.h
 delete mode 100644 arch/h8300/include/asm/cache.h
 delete mode 100644 arch/h8300/include/asm/cachectl.h
 delete mode 100644 arch/h8300/include/asm/cacheflush.h
 delete mode 100644 arch/h8300/include/asm/checksum.h
 delete mode 100644 arch/h8300/include/asm/cmpxchg.h
 delete mode 100644 arch/h8300/include/asm/cputime.h
 delete mode 100644 arch/h8300/include/asm/current.h
 delete mode 100644 arch/h8300/include/asm/dbg.h
 delete mode 100644 arch/h8300/include/asm/delay.h
 delete mode 100644 arch/h8300/include/asm/device.h
 delete mode 100644 arch/h8300/include/asm/div64.h
 delete mode 100644 arch/h8300/include/asm/dma.h
 delete mode 100644 arch/h8300/include/asm/elf.h
 delete mode 100644 arch/h8300/include/asm/emergency-restart.h
 delete mode 100644 arch/h8300/include/asm/fb.h
 delete mode 100644 arch/h8300/include/asm/flat.h
 delete mode 100644 arch/h8300/include/asm/fpu.h
 delete mode 100644 arch/h8300/include/asm/ftrace.h
 delete mode 100644 arch/h8300/include/asm/futex.h
 delete mode 100644 arch/h8300/include/asm/gpio-internal.h
 delete mode 100644 arch/h8300/include/asm/hardirq.h
 delete mode 100644 arch/h8300/include/asm/hw_irq.h
 delete mode 100644 arch/h8300/include/asm/io.h
 delete mode 100644 arch/h8300/include/asm/irq.h
 delete mode 100644 arch/h8300/include/asm/irq_regs.h
 delete mode 100644 arch/h8300/include/asm/irqflags.h
 delete mode 100644 arch/h8300/include/asm/kdebug.h
 delete mode 100644 arch/h8300/include/asm/kmap_types.h
 delete mode 100644 arch/h8300/include/asm/local.h
 delete mode 100644 arch/h8300/include/asm/local64.h
 delete mode 100644 arch/h8300/include/asm/mc146818rtc.h
 delete mode 100644 arch/h8300/include/asm/mmu_context.h
 delete mode 100644 arch/h8300/include/asm/mutex.h
 delete mode 100644 arch/h8300/include/asm/page.h
 delete mode 100644 arch/h8300/include/asm/page_offset.h
 delete mode 100644 arch/h8300/include/asm/param.h
 delete mode 100644 arch/h8300/include/asm/pci.h
 delete mode 100644 arch/h8300/include/asm/percpu.h
 delete mode 100644 arch/h8300/include/asm/pgalloc.h
 delete mode 100644 arch/h8300/include/asm/pgtable.h
 delete mode 100644 arch/h8300/include/asm/processor.h
 delete mode 100644 arch/h8300/include/asm/ptrace.h
 delete mode 100644 arch/h8300/include/asm/regs267x.h
 delete mode 100644 arch/h8300/include/asm/regs306x.h
 delete mode 100644 arch/h8300/include/asm/scatterlist.h
 delete mode 100644 arch/h8300/include/asm/sections.h
 delete mode 100644 arch/h8300/include/asm/segment.h
 delete mode 100644 arch/h8300/include/asm/sh_bios.h
 delete mode 100644 arch/h8300/include/asm/shm.h
 delete mode 100644 arch/h8300/include/asm/shmparam.h
 delete mode 100644 arch/h8300/include/asm/signal.h
 delete mode 100644 arch/h8300/include/asm/smp.h
 delete mode 100644 arch/h8300/include/asm/spinlock.h
 delete mode 100644 arch/h8300/include/asm/string.h
 delete mode 100644 arch/h8300/include/asm/switch_to.h
 delete mode 100644 arch/h8300/include/asm/target_time.h
 delete mode 100644 arch/h8300/include/asm/termios.h
 delete mode 100644 arch/h8300/include/asm/thread_info.h
 delete mode 100644 arch/h8300/include/asm/timer.h
 delete mode 100644 arch/h8300/include/asm/timex.h
 delete mode 100644 arch/h8300/include/asm/tlb.h
 delete mode 100644 arch/h8300/include/asm/tlbflush.h
 delete mode 100644 arch/h8300/include/asm/topology.h
 delete mode 100644 arch/h8300/include/asm/traps.h
 delete mode 100644 arch/h8300/include/asm/types.h
 delete mode 100644 arch/h8300/include/asm/uaccess.h
 delete mode 100644 arch/h8300/include/asm/ucontext.h
 delete mode 100644 arch/h8300/include/asm/unaligned.h
 delete mode 100644 arch/h8300/include/asm/unistd.h
 delete mode 100644 arch/h8300/include/asm/user.h
 delete mode 100644 arch/h8300/include/asm/virtconvert.h
 delete mode 100644 arch/h8300/include/uapi/asm/Kbuild
 delete mode 100644 arch/h8300/include/uapi/asm/auxvec.h
 delete mode 100644 arch/h8300/include/uapi/asm/bitsperlong.h
 delete mode 100644 arch/h8300/include/uapi/asm/byteorder.h
 delete mode 100644 arch/h8300/include/uapi/asm/errno.h
 delete mode 100644 arch/h8300/include/uapi/asm/fcntl.h
 delete mode 100644 arch/h8300/include/uapi/asm/ioctl.h
 delete mode 100644 arch/h8300/include/uapi/asm/ioctls.h
 delete mode 100644 arch/h8300/include/uapi/asm/ipcbuf.h
 delete mode 100644 arch/h8300/include/uapi/asm/kvm_para.h
 delete mode 100644 arch/h8300/include/uapi/asm/mman.h
 delete mode 100644 arch/h8300/include/uapi/asm/msgbuf.h
 delete mode 100644 arch/h8300/include/uapi/asm/param.h
 delete mode 100644 arch/h8300/include/uapi/asm/poll.h
 delete mode 100644 arch/h8300/include/uapi/asm/posix_types.h
 delete mode 100644 arch/h8300/include/uapi/asm/ptrace.h
 delete mode 100644 arch/h8300/include/uapi/asm/resource.h
 delete mode 100644 arch/h8300/include/uapi/asm/sembuf.h
 delete mode 100644 arch/h8300/include/uapi/asm/setup.h
 delete mode 100644 arch/h8300/include/uapi/asm/shmbuf.h
 delete mode 100644 arch/h8300/include/uapi/asm/sigcontext.h
 delete mode 100644 arch/h8300/include/uapi/asm/siginfo.h
 delete mode 100644 arch/h8300/include/uapi/asm/signal.h
 delete mode 100644 arch/h8300/include/uapi/asm/socket.h
 delete mode 100644 arch/h8300/include/uapi/asm/sockios.h
 delete mode 100644 arch/h8300/include/uapi/asm/stat.h
 delete mode 100644 arch/h8300/include/uapi/asm/statfs.h
 delete mode 100644 arch/h8300/include/uapi/asm/swab.h
 delete mode 100644 arch/h8300/include/uapi/asm/termbits.h
 delete mode 100644 arch/h8300/include/uapi/asm/termios.h
 delete mode 100644 arch/h8300/include/uapi/asm/types.h
 delete mode 100644 arch/h8300/include/uapi/asm/unistd.h
 delete mode 100644 arch/h8300/kernel/Makefile
 delete mode 100644 arch/h8300/kernel/asm-offsets.c
 delete mode 100644 arch/h8300/kernel/entry.S
 delete mode 100644 arch/h8300/kernel/gpio.c
 delete mode 100644 arch/h8300/kernel/h8300_ksyms.c
 delete mode 100644 arch/h8300/kernel/irq.c
 delete mode 100644 arch/h8300/kernel/module.c
 delete mode 100644 arch/h8300/kernel/process.c
 delete mode 100644 arch/h8300/kernel/ptrace.c
 delete mode 100644 arch/h8300/kernel/setup.c
 delete mode 100644 arch/h8300/kernel/signal.c
 delete mode 100644 arch/h8300/kernel/sys_h8300.c
 delete mode 100644 arch/h8300/kernel/syscalls.S
 delete mode 100644 arch/h8300/kernel/time.c
 delete mode 100644 arch/h8300/kernel/timer/Makefile
 delete mode 100644 arch/h8300/kernel/timer/itu.c
 delete mode 100644 arch/h8300/kernel/timer/timer16.c
 delete mode 100644 arch/h8300/kernel/timer/timer8.c
 delete mode 100644 arch/h8300/kernel/timer/tpu.c
 delete mode 100644 arch/h8300/kernel/traps.c
 delete mode 100644 arch/h8300/kernel/vmlinux.lds.S
 delete mode 100644 arch/h8300/lib/Makefile
 delete mode 100644 arch/h8300/lib/abs.S
 delete mode 100644 arch/h8300/lib/ashrdi3.c
 delete mode 100644 arch/h8300/lib/checksum.c
 delete mode 100644 arch/h8300/lib/memcpy.S
 delete mode 100644 arch/h8300/lib/memset.S
 delete mode 100644 arch/h8300/lib/romfs.S
 delete mode 100644 arch/h8300/mm/Makefile
 delete mode 100644 arch/h8300/mm/fault.c
 delete mode 100644 arch/h8300/mm/init.c
 delete mode 100644 arch/h8300/mm/kmap.c
 delete mode 100644 arch/h8300/mm/memory.c
 delete mode 100644 arch/h8300/platform/h8300h/Makefile
 delete mode 100644 arch/h8300/platform/h8300h/aki3068net/Makefile
 delete mode 100644 arch/h8300/platform/h8300h/aki3068net/crt0_ram.S
 delete mode 100644 arch/h8300/platform/h8300h/generic/Makefile
 delete mode 100644 arch/h8300/platform/h8300h/generic/crt0_ram.S
 delete mode 100644 arch/h8300/platform/h8300h/generic/crt0_rom.S
 delete mode 100644 arch/h8300/platform/h8300h/h8max/Makefile
 delete mode 100644 arch/h8300/platform/h8300h/h8max/crt0_ram.S
 delete mode 100644 arch/h8300/platform/h8300h/irq.c
 delete mode 100644 arch/h8300/platform/h8300h/ptrace_h8300h.c
 delete mode 100644 arch/h8300/platform/h8s/Makefile
 delete mode 100644 arch/h8300/platform/h8s/edosk2674/Makefile
 delete mode 100644 arch/h8300/platform/h8s/edosk2674/crt0_ram.S
 delete mode 100644 arch/h8300/platform/h8s/edosk2674/crt0_rom.S
 delete mode 100644 arch/h8300/platform/h8s/generic/Makefile
 delete mode 100644 arch/h8300/platform/h8s/generic/crt0_ram.S
 delete mode 100644 arch/h8300/platform/h8s/generic/crt0_rom.S
 delete mode 100644 arch/h8300/platform/h8s/irq.c
 delete mode 100644 arch/h8300/platform/h8s/ptrace_h8s.c

(limited to 'include/linux')

diff --git a/Documentation/scheduler/sched-arch.txt b/Documentation/scheduler/sched-arch.txt
index b1b8587b86f0..9290de703450 100644
--- a/Documentation/scheduler/sched-arch.txt
+++ b/Documentation/scheduler/sched-arch.txt
@@ -65,11 +65,6 @@ Possible arch/ problems
 
 Possible arch problems I found (and either tried to fix or didn't):
 
-h8300 - Is such sleeping racy vs interrupts? (See #4a).
-        The H8/300 manual I found indicates yes, however disabling IRQs
-        over the sleep mean only NMIs can wake it up, so can't fix easily
-        without doing spin waiting.
-
 ia64 - is safe_halt call racy vs interrupts? (does it sleep?) (See #4a)
 
 sh64 - Is sleeping racy vs interrupts? (See #4a)
diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig
deleted file mode 100644
index 24b1dc2564f1..000000000000
--- a/arch/h8300/Kconfig
+++ /dev/null
@@ -1,108 +0,0 @@
-config H8300
-	bool
-	default y
-	select HAVE_IDE
-	select GENERIC_ATOMIC64
-	select HAVE_UID16
-	select VIRT_TO_BUS
-	select ARCH_WANT_IPC_PARSE_VERSION
-	select GENERIC_IRQ_SHOW
-	select GENERIC_CPU_DEVICES
-	select MODULES_USE_ELF_RELA
-	select OLD_SIGSUSPEND3
-	select OLD_SIGACTION
-	select HAVE_UNDERSCORE_SYMBOL_PREFIX
-
-config MMU
-	bool
-	default n
-
-config SWAP
-	bool
-	default n
-
-config ZONE_DMA
-	bool
-	default y
-
-config FPU
-	bool
-	default n
-
-config RWSEM_GENERIC_SPINLOCK
-	bool
-	default y
-
-config RWSEM_XCHGADD_ALGORITHM
-	bool
-	default n
-
-config ARCH_HAS_ILOG2_U32
-	bool
-	default n
-
-config ARCH_HAS_ILOG2_U64
-	bool
-	default n
-
-config GENERIC_HWEIGHT
-	bool
-	default y
-
-config GENERIC_CALIBRATE_DELAY
-	bool
-	default y
-
-config GENERIC_BUG
-        bool
-        depends on BUG
-
-config TIME_LOW_RES
-	bool
-	default y
-
-config NO_IOPORT
-	def_bool y
-
-config NO_DMA
-	def_bool y
-
-config ISA
-	bool
-	default y
-
-config PCI
-	bool
-	default n
-
-config HZ
-	int
-	default 100
-
-source "init/Kconfig"
-
-source "kernel/Kconfig.freezer"
-
-source "arch/h8300/Kconfig.cpu"
-
-menu "Executable file formats"
-
-source "fs/Kconfig.binfmt"
-
-endmenu
-
-source "net/Kconfig"
-
-source "drivers/Kconfig"
-
-source "arch/h8300/Kconfig.ide"
-
-source "fs/Kconfig"
-
-source "arch/h8300/Kconfig.debug"
-
-source "security/Kconfig"
-
-source "crypto/Kconfig"
-
-source "lib/Kconfig"
diff --git a/arch/h8300/Kconfig.cpu b/arch/h8300/Kconfig.cpu
deleted file mode 100644
index cdee771460ed..000000000000
--- a/arch/h8300/Kconfig.cpu
+++ /dev/null
@@ -1,171 +0,0 @@
-menu "Processor type and features"
-
-choice
-	prompt "H8/300 platform"
-	default H8300H_GENERIC
-
-config H8300H_GENERIC
-	bool "H8/300H Generic"
-	help
-	  H8/300H CPU Generic Hardware Support
-
-config H8300H_AKI3068NET
-	bool "AE-3068/69"
-	select H83068
-	help
-	  AKI-H8/3068F / AKI-H8/3069F Flashmicom LAN Board Support
-	  More Information. (Japanese Only)
-	  <http://akizukidenshi.com/catalog/default.aspx>
-	  AE-3068/69 Evaluation Board Support
-	  More Information.
-	  <http://www.microtronique.com/ae3069lan.htm>
-
-config H8300H_H8MAX
-	bool "H8MAX"
-	select H83068
-	help
-	  H8MAX Evaluation Board Support
-	  More Information. (Japanese Only)
-	  <http://strawberry-linux.com/h8/index.html>
-
-config H8300H_SIM
-	bool "H8/300H Simulator"
-	select H83007
-	help
-	  GDB Simulator Support
-	  More Information.
-	  <http://sourceware.org/sid/>
-
-config H8S_GENERIC
-	bool "H8S Generic"
-	help
-	  H8S CPU Generic Hardware Support
-
-config H8S_EDOSK2674
-	bool "EDOSK-2674"
-	select H8S2678
-	help
-	  Renesas EDOSK-2674 Evaluation Board Support
-	  More Information.
-	  <http://www.azpower.com/H8-uClinux/index.html>
- 	  <http://www.renesas.eu/products/tools/introductory_evaluation_tools/evaluation_development_os_kits/edosk2674r/edosk2674r_software_tools_root.jsp>
-
-config H8S_SIM
-	bool "H8S Simulator"
-	help
-	  GDB Simulator Support
-	  More Information.
-	  <http://sourceware.org/sid/>
-
-endchoice
-
-choice
-	prompt "CPU Selection"
-
-config H83002
-	bool "H8/3001,3002,3003"
-	depends on BROKEN
-	select CPU_H8300H
-
-config H83007
-	bool "H8/3006,3007"
-	select CPU_H8300H
-
-config H83048
-	bool "H8/3044,3045,3046,3047,3048,3052"
-	depends on BROKEN
-	select CPU_H8300H
-
-config H83068
-	bool "H8/3065,3066,3067,3068,3069"
-	select CPU_H8300H
-
-config H8S2678
-	bool "H8S/2670,2673,2674R,2675,2676"
-	select CPU_H8S
-
-endchoice
-
-config CPU_CLOCK
-	int "CPU Clock Frequency (/1KHz)"
-	default "20000"
-	help
-	  CPU Clock Frequency divide to 1000
-
-choice
-	prompt "Kernel executes from"
-	---help---
-	  Choose the memory type that the kernel will be running in.
-
-config RAMKERNEL
-	bool "RAM"
-	help
-	  The kernel will be resident in RAM when running.
-
-config ROMKERNEL
-	bool "ROM"
-	help
-	  The kernel will be resident in FLASH/ROM when running.
-endchoice
-
-
-config CPU_H8300H
-	bool
-	depends on (H83002 || H83007 || H83048 || H83068)
-	default y
-
-config CPU_H8S
-	bool
-	depends on H8S2678
-	default y
-
-choice
-	prompt "Timer"
-config H8300_TIMER8
-	bool "8bit timer (2ch cascade)"
-	depends on (H83007 || H83068 || H8S2678)
-
-config H8300_TIMER16
-	bool "16bit timer"
-	depends on (H83007 || H83068)
-
-config H8300_ITU
-	bool "ITU"
-	depends on (H83002 || H83048)
-
-config H8300_TPU
-	bool "TPU"
-	depends on H8S2678
-endchoice
-
-if H8300_TIMER8
-choice
-	prompt "Timer Channel"
-config H8300_TIMER8_CH0
-	bool "Channel 0"
-config H8300_TIMER8_CH2
-	bool "Channel 2"
-	depends on CPU_H8300H
-endchoice
-endif
-
-config H8300_TIMER16_CH
-	int "16bit timer channel (0 - 2)"
-	depends on H8300_TIMER16
-	range 0 2
-
-config H8300_ITU_CH
-	int "ITU channel"
-	depends on H8300_ITU
-	range 0 4
-
-config H8300_TPU_CH
-	int "TPU channel"
-	depends on H8300_TPU
-	range 0 4
-
-source "kernel/Kconfig.preempt"
-
-source "mm/Kconfig"
-
-endmenu
diff --git a/arch/h8300/Kconfig.debug b/arch/h8300/Kconfig.debug
deleted file mode 100644
index e8d1b236ad8c..000000000000
--- a/arch/h8300/Kconfig.debug
+++ /dev/null
@@ -1,68 +0,0 @@
-menu "Kernel hacking"
-
-source "lib/Kconfig.debug"
-
-config FULLDEBUG
-	bool "Full Symbolic/Source Debugging support"
-	help
-	  Enable debugging symbols on kernel build.
-
-config HIGHPROFILE
-	bool "Use fast second timer for profiling"
-	help
-	  Use a fast secondary clock to produce profiling information.
-
-config NO_KERNEL_MSG
-	bool "Suppress Kernel BUG Messages"
-	help
-	  Do not output any debug BUG messages within the kernel.
-
-config GDB_MAGICPRINT
-	bool "Message Output for GDB MagicPrint service"
-	depends on (H8300H_SIM || H8S_SIM)
-	help
-	  kernel messages output using MagicPrint service from GDB
-
-config SYSCALL_PRINT
-	bool "SystemCall trace print"
-	help
-	  output history of systemcall
-
-config GDB_DEBUG
-   	bool "Use gdb stub"
-	depends on (!H8300H_SIM && !H8S_SIM)
-	help
-	  gdb stub exception support
-
-config SH_STANDARD_BIOS
-	bool "Use gdb protocol serial console"
-	depends on (!H8300H_SIM && !H8S_SIM)
-	help
-	  serial console output using GDB protocol.
-	  Require eCos/RedBoot
-
-config DEFAULT_CMDLINE
-	bool "Use builtin commandline"
-	default n
-	help
-	  builtin kernel commandline enabled.
-
-config KERNEL_COMMAND
-	string "Buildin command string"
-	depends on DEFAULT_CMDLINE
-	help
-	  builtin kernel commandline strings.
-
-config BLKDEV_RESERVE
-	bool "BLKDEV Reserved Memory"
-	default n
-	help
-	  Reserved BLKDEV area.
-
-config BLKDEV_RESERVE_ADDRESS
-	hex 'start address'
-	depends on BLKDEV_RESERVE
-	help
-	  BLKDEV start address.
-
-endmenu
diff --git a/arch/h8300/Kconfig.ide b/arch/h8300/Kconfig.ide
deleted file mode 100644
index a38a63054ac2..000000000000
--- a/arch/h8300/Kconfig.ide
+++ /dev/null
@@ -1,44 +0,0 @@
-# uClinux H8/300 Target Board Selection Menu (IDE)
-
-if (H8300H_AKI3068NET)
-menu "IDE Extra configuration"
-
-config H8300_IDE_BASE
-	hex "IDE register base address"
-	depends on IDE
-	default 0
-	help
-	  IDE registers base address
-
-config H8300_IDE_ALT
-	hex "IDE register alternate address"
-	depends on IDE
-	default 0
-	help
-	  IDE alternate registers address
-
-config H8300_IDE_IRQ
-	int "IDE IRQ no"
-	depends on IDE
-	default 0
-	help
-	  IDE use IRQ no
-endmenu
-endif
-
-if (H8300H_H8MAX)
-config H8300_IDE_BASE
-	hex
-	depends on IDE
-	default 0x200000
-
-config H8300_IDE_ALT
-	hex
-	depends on IDE
-	default 0x60000c
-
-config H8300_IDE_IRQ
-	int
-	depends on IDE
-	default 5
-endif
diff --git a/arch/h8300/Makefile b/arch/h8300/Makefile
deleted file mode 100644
index a556447877b4..000000000000
--- a/arch/h8300/Makefile
+++ /dev/null
@@ -1,71 +0,0 @@
-#
-# arch/h8300/Makefile
-#
-# This file is subject to the terms and conditions of the GNU General Public
-# License.  See the file "COPYING" in the main directory of this archive
-# for more details.
-#
-# (C) Copyright 2002,2003 Yoshinori Sato <ysato@users.sourceforge.jp>
-#
-
-platform-$(CONFIG_CPU_H8300H)	:= h8300h
-platform-$(CONFIG_CPU_H8S)	:= h8s
-PLATFORM := $(platform-y)
-
-board-$(CONFIG_H8300H_GENERIC)		:= generic
-board-$(CONFIG_H8300H_AKI3068NET)	:= aki3068net
-board-$(CONFIG_H8300H_H8MAX)		:= h8max
-board-$(CONFIG_H8300H_SIM)		:= generic
-board-$(CONFIG_H8S_GENERIC)		:= generic
-board-$(CONFIG_H8S_EDOSK2674)		:= edosk2674
-board-$(CONFIG_H8S_SIM)			:= generic
-BOARD := $(board-y)
-
-model-$(CONFIG_RAMKERNEL)	:= ram
-model-$(CONFIG_ROMKERNEL)	:= rom
-MODEL := $(model-y)
-
-cflags-$(CONFIG_CPU_H8300H)	:= -mh
-ldflags-$(CONFIG_CPU_H8300H)	:= -mh8300helf
-cflags-$(CONFIG_CPU_H8S)	:= -ms
-ldflags-$(CONFIG_CPU_H8S)	:= -mh8300self
-
-KBUILD_CFLAGS += $(cflags-y)
-KBUILD_CFLAGS += -mint32 -fno-builtin
-KBUILD_CFLAGS += -g
-KBUILD_CFLAGS += -D__linux__
-KBUILD_CFLAGS += -DUTS_SYSNAME=\"uClinux\"
-KBUILD_AFLAGS += -DPLATFORM=$(PLATFORM) -DMODEL=$(MODEL) $(cflags-y)
-LDFLAGS += $(ldflags-y)
-
-CROSS_COMPILE = h8300-elf-
-LIBGCC := $(shell $(CROSS-COMPILE)$(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name)
-
-head-y := arch/$(ARCH)/platform/$(PLATFORM)/$(BOARD)/crt0_$(MODEL).o
-
-core-y	+= arch/$(ARCH)/kernel/ \
-	   arch/$(ARCH)/mm/
-ifdef PLATFORM
-core-y	+= arch/$(ARCH)/platform/$(PLATFORM)/ \
-	   arch/$(ARCH)/platform/$(PLATFORM)/$(BOARD)/
-endif
-
-libs-y	+= arch/$(ARCH)/lib/ $(LIBGCC)
-
-boot := arch/h8300/boot
-
-export MODEL PLATFORM BOARD
-
-archmrproper:
-
-archclean:
-	$(Q)$(MAKE) $(clean)=$(boot)
-
-vmlinux.srec vmlinux.bin zImage: vmlinux
-	$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
-
-define archhelp
-  @echo  'vmlinux.bin  - Create raw binary'
-  @echo  'vmlinux.srec - Create srec binary'
-  @echo  'zImage       - Compressed kernel image'
-endef
diff --git a/arch/h8300/README b/arch/h8300/README
deleted file mode 100644
index efa805fda19b..000000000000
--- a/arch/h8300/README
+++ /dev/null
@@ -1,38 +0,0 @@
-linux-2.6 for H8/300 README
-Yoshinori Sato <ysato@users.sourceforge.jp>
-
-* Supported CPU
-H8/300H and H8S
-
-* Supported Target
-1.simulator of GDB
-  require patches.
-
-2.AE 3068/AE 3069
-  more information 
-  MICROTRONIQUE <http://www.microtronique.com/>
-  Akizuki Denshi Tsusho Ltd. <http://akizukidenshi.com/> (Japanese Only)
-
-3.H8MAX 
-  see http://ip-sol.jp/h8max/ (Japanese Only)
-
-4.EDOSK2674
-  see http://www.eu.renesas.com/products/mpumcu/tool/edk/support/edosk2674.html
-      http://www.uclinux.org/pub/uClinux/ports/h8/HITACHI-EDOSK2674-HOWTO
-      http://www.azpower.com/H8-uClinux/
-
-* Toolchain Version
-gcc-3.1 or higher and patch
-see arch/h8300/tools_patch/README
-binutils-2.12 or higher
-gdb-5.2 or higher
-The environment that can compile a h8300-elf binary is necessary.
-
-* Userland Develop environment
-used h8300-elf toolchains.
-see http://www.uclinux.org/pub/uClinux/ports/h8/
-
-* A few words of thanks
-Porting to H8/300 serieses is support of Information-technology Promotion Agency, Japan.
-I thank support.
-and All developer/user.
diff --git a/arch/h8300/boot/Makefile b/arch/h8300/boot/Makefile
deleted file mode 100644
index 0bb62e064eea..000000000000
--- a/arch/h8300/boot/Makefile
+++ /dev/null
@@ -1,22 +0,0 @@
-# arch/h8300/boot/Makefile
-
-targets := vmlinux.srec vmlinux.bin zImage
-subdir- := compressed
-
-OBJCOPYFLAGS_vmlinux.srec := -Osrec
-OBJCOPYFLAGS_vmlinux.bin  := -Obinary
-OBJCOPYFLAGS_zImage := -O binary -R .note -R .comment -R .stab -R .stabstr -S
-
-$(obj)/vmlinux.srec $(obj)/vmlinux.bin:  vmlinux FORCE
-	$(call if_changed,objcopy)
-	@echo '  Kernel: $@ is ready'
-
-$(obj)/zImage: $(obj)/compressed/vmlinux FORCE
-	$(call if_changed,objcopy)
-	@echo 'Kernel: $@ is ready'
-
-$(obj)/compressed/vmlinux: FORCE
-	$(Q)$(MAKE) $(build)=$(obj)/compressed $@
-
-CLEAN_FILES += arch/$(ARCH)/vmlinux.bin arch/$(ARCH)/vmlinux.srec
-
diff --git a/arch/h8300/boot/compressed/Makefile b/arch/h8300/boot/compressed/Makefile
deleted file mode 100644
index a6c98fe3bbc3..000000000000
--- a/arch/h8300/boot/compressed/Makefile
+++ /dev/null
@@ -1,37 +0,0 @@
-#
-# linux/arch/sh/boot/compressed/Makefile
-#
-# create a compressed vmlinux image from the original vmlinux
-#
-
-targets		:= vmlinux vmlinux.bin vmlinux.bin.gz head.o misc.o piggy.o
-asflags-y	:= -traditional
-
-OBJECTS = $(obj)/head.o $(obj)/misc.o
-
-#
-# IMAGE_OFFSET is the load offset of the compression loader
-# Assign dummy values if these 2 variables are not defined,
-# in order to suppress error message.
-#
-CONFIG_MEMORY_START     ?= 0x00400000
-CONFIG_BOOT_LINK_OFFSET ?= 0x00140000
-IMAGE_OFFSET := $(shell printf "0x%08x" $$(($(CONFIG_MEMORY_START)+$(CONFIG_BOOT_LINK_OFFSET))))
-
-LDFLAGS_vmlinux := -Ttext $(IMAGE_OFFSET) -estartup $(obj)/vmlinux.lds
-
-$(obj)/vmlinux: $(OBJECTS) $(obj)/piggy.o FORCE
-	$(call if_changed,ld)
-	@:
-
-$(obj)/vmlinux.bin: vmlinux FORCE
-	$(call if_changed,objcopy)
-
-$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE
-	$(call if_changed,gzip)
-
-LDFLAGS_piggy.o := -r --format binary --oformat elf32-h8300 -T
-OBJCOPYFLAGS := -O binary
-
-$(obj)/piggy.o: $(obj)/vmlinux.scr $(obj)/vmlinux.bin.gz FORCE
-	$(call if_changed,ld)
diff --git a/arch/h8300/boot/compressed/head.S b/arch/h8300/boot/compressed/head.S
deleted file mode 100644
index 10e9a2d1cc6c..000000000000
--- a/arch/h8300/boot/compressed/head.S
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- *  linux/arch/h8300/boot/compressed/head.S
- *
- *  Copyright (C) 2006 Yoshinori Sato
- */
-
-	.h8300h
-#include <linux/linkage.h>
-
-#define SRAM_START 0xff4000
-
-	.section	.text..startup
-	.global	startup
-startup:
-	mov.l	#SRAM_START+0x8000, sp
-	mov.l	#__sbss, er0
-	mov.l	#__ebss, er1
-	sub.l	er0, er1
-	shlr	er1
-	shlr	er1
-	sub.l	er2, er2
-1:
-	mov.l	er2, @er0
-	adds	#4, er0
-	dec.l	#1, er1
-	bne	1b
-	jsr	@_decompress_kernel
-	jmp	@0x400000
-
-	.align	9
-fake_headers_as_bzImage:
-	.word	0
-	.ascii	"HdrS"		; header signature
-	.word	0x0202		; header version number (>= 0x0105)
-				; or else old loadlin-1.5 will fail)
-	.word	0		; default_switch
-	.word	0		; SETUPSEG
-	.word	0x1000
-	.word	0		; pointing to kernel version string
-	.byte	0		; = 0, old one (LILO, Loadlin,
-				; 0xTV: T=0 for LILO
-				;       V = version
-	.byte	1		; Load flags bzImage=1
-	.word	0x8000		; size to move, when setup is not
-	.long	0x100000	; 0x100000 = default for big kernel
-	.long	0		; address of loaded ramdisk image
-	.long	0		; its size in bytes
diff --git a/arch/h8300/boot/compressed/misc.c b/arch/h8300/boot/compressed/misc.c
deleted file mode 100644
index 4a1e3dd43948..000000000000
--- a/arch/h8300/boot/compressed/misc.c
+++ /dev/null
@@ -1,180 +0,0 @@
-/*
- * arch/h8300/boot/compressed/misc.c
- *
- * This is a collection of several routines from gzip-1.0.3
- * adapted for Linux.
- *
- * malloc by Hannu Savolainen 1993 and Matthias Urlichs 1994
- *
- * Adapted for h8300 by Yoshinori Sato 2006
- */
-
-#include <asm/uaccess.h>
-
-/*
- * gzip declarations
- */
-
-#define OF(args)  args
-#define STATIC static
-
-#undef memset
-#undef memcpy
-#define memzero(s, n)     memset ((s), 0, (n))
-
-typedef unsigned char  uch;
-typedef unsigned short ush;
-typedef unsigned long  ulg;
-
-#define WSIZE 0x8000		/* Window size must be at least 32k, */
-				/* and a power of two */
-
-static uch *inbuf;	     /* input buffer */
-static uch window[WSIZE];    /* Sliding window buffer */
-
-static unsigned insize = 0;  /* valid bytes in inbuf */
-static unsigned inptr = 0;   /* index of next byte to be processed in inbuf */
-static unsigned outcnt = 0;  /* bytes in output buffer */
-
-/* gzip flag byte */
-#define ASCII_FLAG   0x01 /* bit 0 set: file probably ASCII text */
-#define CONTINUATION 0x02 /* bit 1 set: continuation of multi-part gzip file */
-#define EXTRA_FIELD  0x04 /* bit 2 set: extra field present */
-#define ORIG_NAME    0x08 /* bit 3 set: original file name present */
-#define COMMENT      0x10 /* bit 4 set: file comment present */
-#define ENCRYPTED    0x20 /* bit 5 set: file is encrypted */
-#define RESERVED     0xC0 /* bit 6,7:   reserved */
-
-#define get_byte()  (inptr < insize ? inbuf[inptr++] : fill_inbuf())
-
-/* Diagnostic functions */
-#ifdef DEBUG
-#  define Assert(cond,msg) {if(!(cond)) error(msg);}
-#  define Trace(x) fprintf x
-#  define Tracev(x) {if (verbose) fprintf x ;}
-#  define Tracevv(x) {if (verbose>1) fprintf x ;}
-#  define Tracec(c,x) {if (verbose && (c)) fprintf x ;}
-#  define Tracecv(c,x) {if (verbose>1 && (c)) fprintf x ;}
-#else
-#  define Assert(cond,msg)
-#  define Trace(x)
-#  define Tracev(x)
-#  define Tracevv(x)
-#  define Tracec(c,x)
-#  define Tracecv(c,x)
-#endif
-
-static int  fill_inbuf(void);
-static void flush_window(void);
-static void error(char *m);
-
-extern char input_data[];
-extern int input_len;
-
-static long bytes_out = 0;
-static uch *output_data;
-static unsigned long output_ptr = 0;
-
-static void error(char *m);
-
-int puts(const char *);
-
-extern int _end;
-static unsigned long free_mem_ptr;
-static unsigned long free_mem_end_ptr;
-
-#define HEAP_SIZE             0x10000
-
-#include "../../../../lib/inflate.c"
-
-#define SCR *((volatile unsigned char *)0xffff8a)
-#define TDR *((volatile unsigned char *)0xffff8b)
-#define SSR *((volatile unsigned char *)0xffff8c)
-
-int puts(const char *s)
-{
-	return 0;
-}
-
-void* memset(void* s, int c, size_t n)
-{
-	int i;
-	char *ss = (char*)s;
-
-	for (i=0;i<n;i++) ss[i] = c;
-	return s;
-}
-
-void* memcpy(void* __dest, __const void* __src,
-			    size_t __n)
-{
-	int i;
-	char *d = (char *)__dest, *s = (char *)__src;
-
-	for (i=0;i<__n;i++) d[i] = s[i];
-	return __dest;
-}
-
-/* ===========================================================================
- * Fill the input buffer. This is called only when the buffer is empty
- * and at least one byte is really needed.
- */
-static int fill_inbuf(void)
-{
-	if (insize != 0) {
-		error("ran out of input data");
-	}
-
-	inbuf = input_data;
-	insize = input_len;
-	inptr = 1;
-	return inbuf[0];
-}
-
-/* ===========================================================================
- * Write the output window window[0..outcnt-1] and update crc and bytes_out.
- * (Used for the decompressed data only.)
- */
-static void flush_window(void)
-{
-    ulg c = crc;         /* temporary variable */
-    unsigned n;
-    uch *in, *out, ch;
-
-    in = window;
-    out = &output_data[output_ptr];
-    for (n = 0; n < outcnt; n++) {
-	    ch = *out++ = *in++;
-	    c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> 8);
-    }
-    crc = c;
-    bytes_out += (ulg)outcnt;
-    output_ptr += (ulg)outcnt;
-    outcnt = 0;
-}
-
-static void error(char *x)
-{
-	puts("\n\n");
-	puts(x);
-	puts("\n\n -- System halted");
-
-	while(1);	/* Halt */
-}
-
-#define STACK_SIZE (4096)
-long user_stack [STACK_SIZE];
-long* stack_start = &user_stack[STACK_SIZE];
-
-void decompress_kernel(void)
-{
-	output_data = 0;
-	output_ptr = (unsigned long)0x400000;
-	free_mem_ptr = (unsigned long)&_end;
-	free_mem_end_ptr = free_mem_ptr + HEAP_SIZE;
-
-	makecrc();
-	puts("Uncompressing Linux... ");
-	gunzip();
-	puts("Ok, booting the kernel.\n");
-}
diff --git a/arch/h8300/boot/compressed/vmlinux.lds b/arch/h8300/boot/compressed/vmlinux.lds
deleted file mode 100644
index a0a3a0ed54ef..000000000000
--- a/arch/h8300/boot/compressed/vmlinux.lds
+++ /dev/null
@@ -1,32 +0,0 @@
-SECTIONS
-{
-        .text :
-        {
-        __stext = . ;
-	__text = .;
-	       *(.text..startup)
-	       *(.text)
-        __etext = . ;
-        }
-
-	.rodata :
-	{
-		*(.rodata)
-	}
-        .data :
-
-        {
-        __sdata = . ;
-        ___data_start = . ;
-                *(.data.*)
-	}
-        .bss :
-        {
-        . = ALIGN(0x4) ;
-        __sbss = . ;
-                *(.bss*)
-        . = ALIGN(0x4) ;
-        __ebss = . ;
-        __end = . ;
-        }
-}
diff --git a/arch/h8300/boot/compressed/vmlinux.scr b/arch/h8300/boot/compressed/vmlinux.scr
deleted file mode 100644
index a0f6962736e9..000000000000
--- a/arch/h8300/boot/compressed/vmlinux.scr
+++ /dev/null
@@ -1,9 +0,0 @@
-SECTIONS
-{
-  .data : {
-	_input_len = .;
-	LONG(_input_data_end - _input_data) _input_data = .;
-	*(.data)
-	_input_data_end = .;
-	}
-}
diff --git a/arch/h8300/defconfig b/arch/h8300/defconfig
deleted file mode 100644
index 042425a02645..000000000000
--- a/arch/h8300/defconfig
+++ /dev/null
@@ -1,42 +0,0 @@
-CONFIG_EXPERIMENTAL=y
-# CONFIG_LOCALVERSION_AUTO is not set
-CONFIG_LOG_BUF_SHIFT=14
-CONFIG_EXPERT=y
-# CONFIG_UID16 is not set
-# CONFIG_SYSCTL_SYSCALL is not set
-# CONFIG_KALLSYMS is not set
-# CONFIG_HOTPLUG is not set
-# CONFIG_BASE_FULL is not set
-# CONFIG_FUTEX is not set
-# CONFIG_EPOLL is not set
-# CONFIG_SIGNALFD is not set
-# CONFIG_TIMERFD is not set
-# CONFIG_EVENTFD is not set
-# CONFIG_VM_EVENT_COUNTERS is not set
-# CONFIG_COMPAT_BRK is not set
-CONFIG_SLOB=y
-# CONFIG_BLK_DEV_BSG is not set
-# CONFIG_IOSCHED_DEADLINE is not set
-# CONFIG_IOSCHED_CFQ is not set
-CONFIG_H83007=y
-CONFIG_BINFMT_FLAT=y
-CONFIG_BINFMT_ZFLAT=y
-CONFIG_BINFMT_MISC=y
-# CONFIG_PREVENT_FIRMWARE_BUILD is not set
-CONFIG_MTD=y
-CONFIG_MTD_PARTITIONS=y
-CONFIG_MTD_REDBOOT_PARTS=y
-CONFIG_MTD_CHAR=y
-CONFIG_MTD_RAM=y
-CONFIG_MTD_ROM=y
-CONFIG_MTD_UCLINUX=y
-# CONFIG_BLK_DEV is not set
-# CONFIG_INPUT is not set
-# CONFIG_SERIO is not set
-# CONFIG_HWMON is not set
-# CONFIG_USB_SUPPORT is not set
-# CONFIG_DNOTIFY is not set
-CONFIG_ROMFS_FS=y
-# CONFIG_ENABLE_WARN_DEPRECATED is not set
-# CONFIG_ENABLE_MUST_CHECK is not set
-# CONFIG_CRC32 is not set
diff --git a/arch/h8300/include/asm/Kbuild b/arch/h8300/include/asm/Kbuild
deleted file mode 100644
index 8ada3cf0c98d..000000000000
--- a/arch/h8300/include/asm/Kbuild
+++ /dev/null
@@ -1,8 +0,0 @@
-
-generic-y += clkdev.h
-generic-y += exec.h
-generic-y += linkage.h
-generic-y += mmu.h
-generic-y += module.h
-generic-y += trace_clock.h
-generic-y += xor.h
diff --git a/arch/h8300/include/asm/asm-offsets.h b/arch/h8300/include/asm/asm-offsets.h
deleted file mode 100644
index d370ee36a182..000000000000
--- a/arch/h8300/include/asm/asm-offsets.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <generated/asm-offsets.h>
diff --git a/arch/h8300/include/asm/atomic.h b/arch/h8300/include/asm/atomic.h
deleted file mode 100644
index 40901e353c21..000000000000
--- a/arch/h8300/include/asm/atomic.h
+++ /dev/null
@@ -1,146 +0,0 @@
-#ifndef __ARCH_H8300_ATOMIC__
-#define __ARCH_H8300_ATOMIC__
-
-#include <linux/types.h>
-#include <asm/cmpxchg.h>
-
-/*
- * Atomic operations that C can't guarantee us.  Useful for
- * resource counting etc..
- */
-
-#define ATOMIC_INIT(i)	{ (i) }
-
-#define atomic_read(v)		(*(volatile int *)&(v)->counter)
-#define atomic_set(v, i)	(((v)->counter) = i)
-
-#include <linux/kernel.h>
-
-static __inline__ int atomic_add_return(int i, atomic_t *v)
-{
-	unsigned long flags;
-	int ret;
-	local_irq_save(flags);
-	ret = v->counter += i;
-	local_irq_restore(flags);
-	return ret;
-}
-
-#define atomic_add(i, v) atomic_add_return(i, v)
-#define atomic_add_negative(a, v)	(atomic_add_return((a), (v)) < 0)
-
-static __inline__ int atomic_sub_return(int i, atomic_t *v)
-{
-	unsigned long flags;
-	int ret;
-	local_irq_save(flags);
-	ret = v->counter -= i;
-	local_irq_restore(flags);
-	return ret;
-}
-
-#define atomic_sub(i, v) atomic_sub_return(i, v)
-#define atomic_sub_and_test(i,v) (atomic_sub_return(i, v) == 0)
-
-static __inline__ int atomic_inc_return(atomic_t *v)
-{
-	unsigned long flags;
-	int ret;
-	local_irq_save(flags);
-	v->counter++;
-	ret = v->counter;
-	local_irq_restore(flags);
-	return ret;
-}
-
-#define atomic_inc(v) atomic_inc_return(v)
-
-/*
- * atomic_inc_and_test - increment and test
- * @v: pointer of type atomic_t
- *
- * Atomically increments @v by 1
- * and returns true if the result is zero, or false for all
- * other cases.
- */
-#define atomic_inc_and_test(v) (atomic_inc_return(v) == 0)
-
-static __inline__ int atomic_dec_return(atomic_t *v)
-{
-	unsigned long flags;
-	int ret;
-	local_irq_save(flags);
-	--v->counter;
-	ret = v->counter;
-	local_irq_restore(flags);
-	return ret;
-}
-
-#define atomic_dec(v) atomic_dec_return(v)
-
-static __inline__ int atomic_dec_and_test(atomic_t *v)
-{
-	unsigned long flags;
-	int ret;
-	local_irq_save(flags);
-	--v->counter;
-	ret = v->counter;
-	local_irq_restore(flags);
-	return ret == 0;
-}
-
-static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
-{
-	int ret;
-	unsigned long flags;
-
-	local_irq_save(flags);
-	ret = v->counter;
-	if (likely(ret == old))
-		v->counter = new;
-	local_irq_restore(flags);
-	return ret;
-}
-
-static inline int __atomic_add_unless(atomic_t *v, int a, int u)
-{
-	int ret;
-	unsigned long flags;
-
-	local_irq_save(flags);
-	ret = v->counter;
-	if (ret != u)
-		v->counter += a;
-	local_irq_restore(flags);
-	return ret;
-}
-
-static __inline__ void atomic_clear_mask(unsigned long mask, unsigned long *v)
-{
-	__asm__ __volatile__("stc ccr,r1l\n\t"
-	                     "orc #0x80,ccr\n\t"
-	                     "mov.l %0,er0\n\t"
-	                     "and.l %1,er0\n\t"
-	                     "mov.l er0,%0\n\t"
-	                     "ldc r1l,ccr" 
-                             : "=m" (*v) : "g" (~(mask)) :"er0","er1");
-}
-
-static __inline__ void atomic_set_mask(unsigned long mask, unsigned long *v)
-{
-	__asm__ __volatile__("stc ccr,r1l\n\t"
-	                     "orc #0x80,ccr\n\t"
-	                     "mov.l %0,er0\n\t"
-	                     "or.l %1,er0\n\t"
-	                     "mov.l er0,%0\n\t"
-	                     "ldc r1l,ccr" 
-                             : "=m" (*v) : "g" (mask) :"er0","er1");
-}
-
-/* Atomic operations are already serializing */
-#define smp_mb__before_atomic_dec()    barrier()
-#define smp_mb__after_atomic_dec() barrier()
-#define smp_mb__before_atomic_inc()    barrier()
-#define smp_mb__after_atomic_inc() barrier()
-
-#endif /* __ARCH_H8300_ATOMIC __ */
diff --git a/arch/h8300/include/asm/barrier.h b/arch/h8300/include/asm/barrier.h
deleted file mode 100644
index 9e0aa9fc195d..000000000000
--- a/arch/h8300/include/asm/barrier.h
+++ /dev/null
@@ -1,29 +0,0 @@
-#ifndef _H8300_BARRIER_H
-#define _H8300_BARRIER_H
-
-#define nop()  asm volatile ("nop"::)
-
-/*
- * Force strict CPU ordering.
- * Not really required on H8...
- */
-#define mb()   asm volatile (""   : : :"memory")
-#define rmb()  asm volatile (""   : : :"memory")
-#define wmb()  asm volatile (""   : : :"memory")
-#define set_mb(var, value) do { xchg(&var, value); } while (0)
-
-#define read_barrier_depends()	do { } while (0)
-
-#ifdef CONFIG_SMP
-#define smp_mb()	mb()
-#define smp_rmb()	rmb()
-#define smp_wmb()	wmb()
-#define smp_read_barrier_depends()	read_barrier_depends()
-#else
-#define smp_mb()	barrier()
-#define smp_rmb()	barrier()
-#define smp_wmb()	barrier()
-#define smp_read_barrier_depends()	do { } while(0)
-#endif
-
-#endif /* _H8300_BARRIER_H */
diff --git a/arch/h8300/include/asm/bitops.h b/arch/h8300/include/asm/bitops.h
deleted file mode 100644
index eb34e0cd33d5..000000000000
--- a/arch/h8300/include/asm/bitops.h
+++ /dev/null
@@ -1,211 +0,0 @@
-#ifndef _H8300_BITOPS_H
-#define _H8300_BITOPS_H
-
-/*
- * Copyright 1992, Linus Torvalds.
- * Copyright 2002, Yoshinori Sato
- */
-
-#include <linux/compiler.h>
-
-#ifdef __KERNEL__
-
-#ifndef _LINUX_BITOPS_H
-#error only <linux/bitops.h> can be included directly
-#endif
-
-/*
- * Function prototypes to keep gcc -Wall happy
- */
-
-/*
- * ffz = Find First Zero in word. Undefined if no zero exists,
- * so code should check against ~0UL first..
- */
-static __inline__ unsigned long ffz(unsigned long word)
-{
-	unsigned long result;
-
-	result = -1;
-	__asm__("1:\n\t"
-		"shlr.l %2\n\t"
-		"adds #1,%0\n\t"
-		"bcs 1b"
-		: "=r" (result)
-		: "0"  (result),"r" (word));
-	return result;
-}
-
-#define H8300_GEN_BITOP_CONST(OP,BIT)			    \
-	case BIT:					    \
-	__asm__(OP " #" #BIT ",@%0"::"r"(b_addr):"memory"); \
-	break;
-
-#define H8300_GEN_BITOP(FNAME,OP)				      \
-static __inline__ void FNAME(int nr, volatile unsigned long* addr)    \
-{								      \
-	volatile unsigned char *b_addr;				      \
-	b_addr = (volatile unsigned char *)addr + ((nr >> 3) ^ 3);    \
-	if (__builtin_constant_p(nr)) {				      \
-		switch(nr & 7) {				      \
-			H8300_GEN_BITOP_CONST(OP,0)		      \
-			H8300_GEN_BITOP_CONST(OP,1)		      \
-			H8300_GEN_BITOP_CONST(OP,2)		      \
-			H8300_GEN_BITOP_CONST(OP,3)		      \
-			H8300_GEN_BITOP_CONST(OP,4)		      \
-			H8300_GEN_BITOP_CONST(OP,5)		      \
-			H8300_GEN_BITOP_CONST(OP,6)		      \
-			H8300_GEN_BITOP_CONST(OP,7)		      \
-		}						      \
-	} else {						      \
-		__asm__(OP " %w0,@%1"::"r"(nr),"r"(b_addr):"memory"); \
-	}							      \
-}
-
-/*
- * clear_bit() doesn't provide any barrier for the compiler.
- */
-#define smp_mb__before_clear_bit()	barrier()
-#define smp_mb__after_clear_bit()	barrier()
-
-H8300_GEN_BITOP(set_bit	  ,"bset")
-H8300_GEN_BITOP(clear_bit ,"bclr")
-H8300_GEN_BITOP(change_bit,"bnot")
-#define __set_bit(nr,addr)    set_bit((nr),(addr))
-#define __clear_bit(nr,addr)  clear_bit((nr),(addr))
-#define __change_bit(nr,addr) change_bit((nr),(addr))
-
-#undef H8300_GEN_BITOP
-#undef H8300_GEN_BITOP_CONST
-
-static __inline__ int test_bit(int nr, const unsigned long* addr)
-{
-	return (*((volatile unsigned char *)addr + 
-               ((nr >> 3) ^ 3)) & (1UL << (nr & 7))) != 0;
-}
-
-#define __test_bit(nr, addr) test_bit(nr, addr)
-
-#define H8300_GEN_TEST_BITOP_CONST_INT(OP,BIT)			     \
-	case BIT:						     \
-	__asm__("stc ccr,%w1\n\t"				     \
-		"orc #0x80,ccr\n\t"				     \
-		"bld #" #BIT ",@%4\n\t"				     \
-		OP " #" #BIT ",@%4\n\t"				     \
-		"rotxl.l %0\n\t"				     \
-		"ldc %w1,ccr"					     \
-		: "=r"(retval),"=&r"(ccrsave),"=m"(*b_addr)	     \
-		: "0" (retval),"r" (b_addr)			     \
-		: "memory");                                         \
-        break;
-
-#define H8300_GEN_TEST_BITOP_CONST(OP,BIT)			     \
-	case BIT:						     \
-	__asm__("bld #" #BIT ",@%3\n\t"				     \
-		OP " #" #BIT ",@%3\n\t"				     \
-		"rotxl.l %0\n\t"				     \
-		: "=r"(retval),"=m"(*b_addr)			     \
-		: "0" (retval),"r" (b_addr)			     \
-		: "memory");                                         \
-        break;
-
-#define H8300_GEN_TEST_BITOP(FNNAME,OP)				     \
-static __inline__ int FNNAME(int nr, volatile void * addr)	     \
-{								     \
-	int retval = 0;						     \
-	char ccrsave;						     \
-	volatile unsigned char *b_addr;				     \
-	b_addr = (volatile unsigned char *)addr + ((nr >> 3) ^ 3);   \
-	if (__builtin_constant_p(nr)) {				     \
-		switch(nr & 7) {				     \
-			H8300_GEN_TEST_BITOP_CONST_INT(OP,0)	     \
-			H8300_GEN_TEST_BITOP_CONST_INT(OP,1)	     \
-			H8300_GEN_TEST_BITOP_CONST_INT(OP,2)	     \
-			H8300_GEN_TEST_BITOP_CONST_INT(OP,3)	     \
-			H8300_GEN_TEST_BITOP_CONST_INT(OP,4)	     \
-			H8300_GEN_TEST_BITOP_CONST_INT(OP,5)	     \
-			H8300_GEN_TEST_BITOP_CONST_INT(OP,6)	     \
-			H8300_GEN_TEST_BITOP_CONST_INT(OP,7)	     \
-		}						     \
-	} else {						     \
-		__asm__("stc ccr,%w1\n\t"			     \
-			"orc #0x80,ccr\n\t"			     \
-			"btst %w5,@%4\n\t"			     \
-			OP " %w5,@%4\n\t"			     \
-			"beq 1f\n\t"				     \
-			"inc.l #1,%0\n"				     \
-			"1:\n\t"				     \
-			"ldc %w1,ccr"				     \
-			: "=r"(retval),"=&r"(ccrsave),"=m"(*b_addr)  \
-			: "0" (retval),"r" (b_addr),"r"(nr)	     \
-			: "memory");				     \
-	}							     \
-	return retval;						     \
-}								     \
-								     \
-static __inline__ int __ ## FNNAME(int nr, volatile void * addr)     \
-{								     \
-	int retval = 0;						     \
-	volatile unsigned char *b_addr;				     \
-	b_addr = (volatile unsigned char *)addr + ((nr >> 3) ^ 3);   \
-	if (__builtin_constant_p(nr)) {				     \
-		switch(nr & 7) {				     \
-			H8300_GEN_TEST_BITOP_CONST(OP,0) 	     \
-			H8300_GEN_TEST_BITOP_CONST(OP,1) 	     \
-			H8300_GEN_TEST_BITOP_CONST(OP,2) 	     \
-			H8300_GEN_TEST_BITOP_CONST(OP,3) 	     \
-			H8300_GEN_TEST_BITOP_CONST(OP,4) 	     \
-			H8300_GEN_TEST_BITOP_CONST(OP,5) 	     \
-			H8300_GEN_TEST_BITOP_CONST(OP,6) 	     \
-			H8300_GEN_TEST_BITOP_CONST(OP,7) 	     \
-		}						     \
-	} else {						     \
-		__asm__("btst %w4,@%3\n\t"			     \
-			OP " %w4,@%3\n\t"			     \
-			"beq 1f\n\t"				     \
-			"inc.l #1,%0\n"				     \
-			"1:"					     \
-			: "=r"(retval),"=m"(*b_addr)		     \
-			: "0" (retval),"r" (b_addr),"r"(nr)	     \
-			: "memory");				     \
-	}							     \
-	return retval;						     \
-}
-
-H8300_GEN_TEST_BITOP(test_and_set_bit,	 "bset")
-H8300_GEN_TEST_BITOP(test_and_clear_bit, "bclr")
-H8300_GEN_TEST_BITOP(test_and_change_bit,"bnot")
-#undef H8300_GEN_TEST_BITOP_CONST
-#undef H8300_GEN_TEST_BITOP_CONST_INT
-#undef H8300_GEN_TEST_BITOP
-
-#include <asm-generic/bitops/ffs.h>
-
-static __inline__ unsigned long __ffs(unsigned long word)
-{
-	unsigned long result;
-
-	result = -1;
-	__asm__("1:\n\t"
-		"shlr.l %2\n\t"
-		"adds #1,%0\n\t"
-		"bcc 1b"
-		: "=r" (result)
-		: "0"(result),"r"(word));
-	return result;
-}
-
-#include <asm-generic/bitops/find.h>
-#include <asm-generic/bitops/sched.h>
-#include <asm-generic/bitops/hweight.h>
-#include <asm-generic/bitops/lock.h>
-#include <asm-generic/bitops/le.h>
-#include <asm-generic/bitops/ext2-atomic.h>
-
-#endif /* __KERNEL__ */
-
-#include <asm-generic/bitops/fls.h>
-#include <asm-generic/bitops/__fls.h>
-#include <asm-generic/bitops/fls64.h>
-
-#endif /* _H8300_BITOPS_H */
diff --git a/arch/h8300/include/asm/bootinfo.h b/arch/h8300/include/asm/bootinfo.h
deleted file mode 100644
index 5bed7e7aac0a..000000000000
--- a/arch/h8300/include/asm/bootinfo.h
+++ /dev/null
@@ -1,2 +0,0 @@
-
-/* Nothing for h8300 */
diff --git a/arch/h8300/include/asm/bug.h b/arch/h8300/include/asm/bug.h
deleted file mode 100644
index 1e1be8119935..000000000000
--- a/arch/h8300/include/asm/bug.h
+++ /dev/null
@@ -1,12 +0,0 @@
-#ifndef _H8300_BUG_H
-#define _H8300_BUG_H
-
-/* always true */
-#define is_valid_bugaddr(addr) (1)
-
-#include <asm-generic/bug.h>
-
-struct pt_regs;
-extern void die(const char *str, struct pt_regs *fp, unsigned long err);
-
-#endif
diff --git a/arch/h8300/include/asm/bugs.h b/arch/h8300/include/asm/bugs.h
deleted file mode 100644
index 1cb4afba6eb1..000000000000
--- a/arch/h8300/include/asm/bugs.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/*
- *  include/asm-h8300/bugs.h
- *
- *  Copyright (C) 1994  Linus Torvalds
- */
-
-/*
- * This is included by init/main.c to check for architecture-dependent bugs.
- *
- * Needs:
- *	void check_bugs(void);
- */
-
-static void check_bugs(void)
-{
-}
diff --git a/arch/h8300/include/asm/cache.h b/arch/h8300/include/asm/cache.h
deleted file mode 100644
index 05887a1d80e5..000000000000
--- a/arch/h8300/include/asm/cache.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef __ARCH_H8300_CACHE_H
-#define __ARCH_H8300_CACHE_H
-
-/* bytes per L1 cache line */
-#define        L1_CACHE_SHIFT  2
-#define        L1_CACHE_BYTES  (1 << L1_CACHE_SHIFT)
-
-/* m68k-elf-gcc  2.95.2 doesn't like these */
-
-#define __cacheline_aligned
-#define ____cacheline_aligned
-
-#endif
diff --git a/arch/h8300/include/asm/cachectl.h b/arch/h8300/include/asm/cachectl.h
deleted file mode 100644
index c464022d8e26..000000000000
--- a/arch/h8300/include/asm/cachectl.h
+++ /dev/null
@@ -1,14 +0,0 @@
-#ifndef _H8300_CACHECTL_H
-#define _H8300_CACHECTL_H
-
-/* Definitions for the cacheflush system call.  */
-
-#define FLUSH_SCOPE_LINE    0	/* Flush a cache line */
-#define FLUSH_SCOPE_PAGE    0	/* Flush a page */
-#define FLUSH_SCOPE_ALL     0	/* Flush the whole cache -- superuser only */
-
-#define FLUSH_CACHE_DATA    0	/* Writeback and flush data cache */
-#define FLUSH_CACHE_INSN    0	/* Flush instruction cache */
-#define FLUSH_CACHE_BOTH    0	/* Flush both caches */
-
-#endif /* _H8300_CACHECTL_H */
diff --git a/arch/h8300/include/asm/cacheflush.h b/arch/h8300/include/asm/cacheflush.h
deleted file mode 100644
index 4cf2df20c1ce..000000000000
--- a/arch/h8300/include/asm/cacheflush.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * (C) Copyright 2002, Yoshinori Sato <ysato@users.sourceforge.jp>
- */
-
-#ifndef _ASM_H8300_CACHEFLUSH_H
-#define _ASM_H8300_CACHEFLUSH_H
-
-/*
- * Cache handling functions
- * No Cache memory all dummy functions
- */
-
-#define flush_cache_all()
-#define	flush_cache_mm(mm)
-#define	flush_cache_dup_mm(mm)		do { } while (0)
-#define	flush_cache_range(vma,a,b)
-#define	flush_cache_page(vma,p,pfn)
-#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0
-#define	flush_dcache_page(page)
-#define	flush_dcache_mmap_lock(mapping)
-#define	flush_dcache_mmap_unlock(mapping)
-#define	flush_icache()
-#define	flush_icache_page(vma,page)
-#define	flush_icache_range(start,len)
-#define flush_cache_vmap(start, end)
-#define flush_cache_vunmap(start, end)
-#define	cache_push_v(vaddr,len)
-#define	cache_push(paddr,len)
-#define	cache_clear(paddr,len)
-
-#define	flush_dcache_range(a,b)
-
-#define	flush_icache_user_range(vma,page,addr,len)
-
-#define copy_to_user_page(vma, page, vaddr, dst, src, len) \
-	memcpy(dst, src, len)
-#define copy_from_user_page(vma, page, vaddr, dst, src, len) \
-	memcpy(dst, src, len)
-
-#endif /* _ASM_H8300_CACHEFLUSH_H */
diff --git a/arch/h8300/include/asm/checksum.h b/arch/h8300/include/asm/checksum.h
deleted file mode 100644
index 98724e12508c..000000000000
--- a/arch/h8300/include/asm/checksum.h
+++ /dev/null
@@ -1,102 +0,0 @@
-#ifndef _H8300_CHECKSUM_H
-#define _H8300_CHECKSUM_H
-
-/*
- * computes the checksum of a memory block at buff, length len,
- * and adds in "sum" (32-bit)
- *
- * returns a 32-bit number suitable for feeding into itself
- * or csum_tcpudp_magic
- *
- * this function must be called with even lengths, except
- * for the last fragment, which may be odd
- *
- * it's best to have buff aligned on a 32-bit boundary
- */
-__wsum csum_partial(const void *buff, int len, __wsum sum);
-
-/*
- * the same as csum_partial, but copies from src while it
- * checksums
- *
- * here even more important to align src and dst on a 32-bit (or even
- * better 64-bit) boundary
- */
-
-__wsum csum_partial_copy_nocheck(const void *src, void *dst, int len, __wsum sum);
-
-
-/*
- * the same as csum_partial_copy, but copies from user space.
- *
- * here even more important to align src and dst on a 32-bit (or even
- * better 64-bit) boundary
- */
-
-extern __wsum csum_partial_copy_from_user(const void __user *src, void *dst,
-						int len, __wsum sum, int *csum_err);
-
-__sum16 ip_fast_csum(const void *iph, unsigned int ihl);
-
-
-/*
- *	Fold a partial checksum
- */
-
-static inline __sum16 csum_fold(__wsum sum)
-{
-	__asm__("mov.l %0,er0\n\t"
-		"add.w e0,r0\n\t"
-		"xor.w e0,e0\n\t"
-		"rotxl.w e0\n\t"
-		"add.w e0,r0\n\t"
-		"sub.w e0,e0\n\t"
-		"mov.l er0,%0"
-		: "=r"(sum)
-		: "0"(sum)
-		: "er0");
-	return (__force __sum16)~sum;
-}
-
-
-/*
- * computes the checksum of the TCP/UDP pseudo-header
- * returns a 16-bit checksum, already complemented
- */
-
-static inline __wsum
-csum_tcpudp_nofold(__be32 saddr, __be32 daddr, unsigned short len,
-		  unsigned short proto, __wsum sum)
-{
-	__asm__ ("sub.l er0,er0\n\t"
-		 "add.l %2,%0\n\t"
-		 "addx	#0,r0l\n\t"
-		 "add.l	%3,%0\n\t"
-		 "addx	#0,r0l\n\t"
-		 "add.l %4,%0\n\t"
-		 "addx	#0,r0l\n\t"
-		 "add.l	er0,%0\n\t"
-		 "bcc	1f\n\t"
-		 "inc.l	#1,%0\n"
-		 "1:"
-		 : "=&r" (sum)
-		 : "0" (sum), "r" (daddr), "r" (saddr), "r" (len + proto)
-		 :"er0");
-	return sum;
-}
-
-static inline __sum16
-csum_tcpudp_magic(__be32 saddr, __be32 daddr, unsigned short len,
-		  unsigned short proto, __wsum sum)
-{
-	return csum_fold(csum_tcpudp_nofold(saddr,daddr,len,proto,sum));
-}
-
-/*
- * this routine is used for miscellaneous IP-like checksums, mainly
- * in icmp.c
- */
-
-extern __sum16 ip_compute_csum(const void *buff, int len);
-
-#endif /* _H8300_CHECKSUM_H */
diff --git a/arch/h8300/include/asm/cmpxchg.h b/arch/h8300/include/asm/cmpxchg.h
deleted file mode 100644
index cdb203ef681f..000000000000
--- a/arch/h8300/include/asm/cmpxchg.h
+++ /dev/null
@@ -1,60 +0,0 @@
-#ifndef __ARCH_H8300_CMPXCHG__
-#define __ARCH_H8300_CMPXCHG__
-
-#include <linux/irqflags.h>
-
-#define xchg(ptr,x) ((__typeof__(*(ptr)))__xchg((unsigned long)(x),(ptr),sizeof(*(ptr))))
-
-struct __xchg_dummy { unsigned long a[100]; };
-#define __xg(x) ((volatile struct __xchg_dummy *)(x))
-
-static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int size)
-{
-  unsigned long tmp, flags;
-
-  local_irq_save(flags);
-
-  switch (size) {
-  case 1:
-    __asm__ __volatile__
-    ("mov.b %2,%0\n\t"
-     "mov.b %1,%2"
-    : "=&r" (tmp) : "r" (x), "m" (*__xg(ptr)) : "memory");
-    break;
-  case 2:
-    __asm__ __volatile__
-    ("mov.w %2,%0\n\t"
-     "mov.w %1,%2"
-    : "=&r" (tmp) : "r" (x), "m" (*__xg(ptr)) : "memory");
-    break;
-  case 4:
-    __asm__ __volatile__
-    ("mov.l %2,%0\n\t"
-     "mov.l %1,%2"
-    : "=&r" (tmp) : "r" (x), "m" (*__xg(ptr)) : "memory");
-    break;
-  default:
-    tmp = 0;	  
-  }
-  local_irq_restore(flags);
-  return tmp;
-}
-
-#include <asm-generic/cmpxchg-local.h>
-
-/*
- * cmpxchg_local and cmpxchg64_local are atomic wrt current CPU. Always make
- * them available.
- */
-#define cmpxchg_local(ptr, o, n)				  	       \
-	((__typeof__(*(ptr)))__cmpxchg_local_generic((ptr), (unsigned long)(o),\
-			(unsigned long)(n), sizeof(*(ptr))))
-#define cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n))
-
-#ifndef CONFIG_SMP
-#include <asm-generic/cmpxchg.h>
-#endif
-
-#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
-
-#endif /* __ARCH_H8300_CMPXCHG__ */
diff --git a/arch/h8300/include/asm/cputime.h b/arch/h8300/include/asm/cputime.h
deleted file mode 100644
index 092e187c7b08..000000000000
--- a/arch/h8300/include/asm/cputime.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __H8300_CPUTIME_H
-#define __H8300_CPUTIME_H
-
-#include <asm-generic/cputime.h>
-
-#endif /* __H8300_CPUTIME_H */
diff --git a/arch/h8300/include/asm/current.h b/arch/h8300/include/asm/current.h
deleted file mode 100644
index 57d74ee55a14..000000000000
--- a/arch/h8300/include/asm/current.h
+++ /dev/null
@@ -1,25 +0,0 @@
-#ifndef _H8300_CURRENT_H
-#define _H8300_CURRENT_H
-/*
- *	current.h
- *	(C) Copyright 2000, Lineo, David McCullough <davidm@lineo.com>
- *	(C) Copyright 2002, Greg Ungerer (gerg@snapgear.com)
- *
- *	rather than dedicate a register (as the m68k source does), we
- *	just keep a global,  we should probably just change it all to be
- *	current and lose _current_task.
- */
-
-#include <linux/thread_info.h>
-#include <asm/thread_info.h>
-
-struct task_struct;
-
-static inline struct task_struct *get_current(void)
-{
-	return(current_thread_info()->task);
-}
-
-#define	current	get_current()
-
-#endif /* _H8300_CURRENT_H */
diff --git a/arch/h8300/include/asm/dbg.h b/arch/h8300/include/asm/dbg.h
deleted file mode 100644
index 2c6d1cbcf736..000000000000
--- a/arch/h8300/include/asm/dbg.h
+++ /dev/null
@@ -1,2 +0,0 @@
-#define DEBUG 1
-#define	BREAK asm volatile ("trap #3")
diff --git a/arch/h8300/include/asm/delay.h b/arch/h8300/include/asm/delay.h
deleted file mode 100644
index 743beba70f82..000000000000
--- a/arch/h8300/include/asm/delay.h
+++ /dev/null
@@ -1,38 +0,0 @@
-#ifndef _H8300_DELAY_H
-#define _H8300_DELAY_H
-
-#include <asm/param.h>
-
-/*
- * Copyright (C) 2002 Yoshinori Sato <ysato@sourceforge.jp>
- *
- * Delay routines, using a pre-computed "loops_per_second" value.
- */
-
-static inline void __delay(unsigned long loops)
-{
-	__asm__ __volatile__ ("1:\n\t"
-			      "dec.l #1,%0\n\t"
-			      "bne 1b"
-			      :"=r" (loops):"0"(loops));
-}
-
-/*
- * Use only for very small delays ( < 1 msec).  Should probably use a
- * lookup table, really, as the multiplications take much too long with
- * short delays.  This is a "reasonable" implementation, though (and the
- * first constant multiplications gets optimized away if the delay is
- * a constant)  
- */
-
-extern unsigned long loops_per_jiffy;
-
-static inline void udelay(unsigned long usecs)
-{
-	usecs *= 4295;		/* 2**32 / 1000000 */
-	usecs /= (loops_per_jiffy*HZ);
-	if (usecs)
-		__delay(usecs);
-}
-
-#endif /* _H8300_DELAY_H */
diff --git a/arch/h8300/include/asm/device.h b/arch/h8300/include/asm/device.h
deleted file mode 100644
index d8f9872b0e2d..000000000000
--- a/arch/h8300/include/asm/device.h
+++ /dev/null
@@ -1,7 +0,0 @@
-/*
- * Arch specific extensions to struct device
- *
- * This file is released under the GPLv2
- */
-#include <asm-generic/device.h>
-
diff --git a/arch/h8300/include/asm/div64.h b/arch/h8300/include/asm/div64.h
deleted file mode 100644
index 6cd978cefb28..000000000000
--- a/arch/h8300/include/asm/div64.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/div64.h>
diff --git a/arch/h8300/include/asm/dma.h b/arch/h8300/include/asm/dma.h
deleted file mode 100644
index 3edbaaaedf5b..000000000000
--- a/arch/h8300/include/asm/dma.h
+++ /dev/null
@@ -1,15 +0,0 @@
-#ifndef _H8300_DMA_H
-#define _H8300_DMA_H 
- 
-
-/*
- * Set number of channels of DMA on ColdFire for different implementations.
- */
-#define MAX_DMA_CHANNELS 0
-#define MAX_DMA_ADDRESS PAGE_OFFSET
-
-/* These are in kernel/dma.c: */
-extern int request_dma(unsigned int dmanr, const char *device_id);	/* reserve a DMA channel */
-extern void free_dma(unsigned int dmanr);	/* release it again */
- 
-#endif /* _H8300_DMA_H */
diff --git a/arch/h8300/include/asm/elf.h b/arch/h8300/include/asm/elf.h
deleted file mode 100644
index 6db71248a82f..000000000000
--- a/arch/h8300/include/asm/elf.h
+++ /dev/null
@@ -1,101 +0,0 @@
-#ifndef __ASMH8300_ELF_H
-#define __ASMH8300_ELF_H
-
-/*
- * ELF register definitions..
- */
-
-#include <asm/ptrace.h>
-#include <asm/user.h>
-
-typedef unsigned long elf_greg_t;
-
-#define ELF_NGREG (sizeof(struct user_regs_struct) / sizeof(elf_greg_t))
-typedef elf_greg_t elf_gregset_t[ELF_NGREG];
-typedef unsigned long elf_fpregset_t;
-
-/*
- * This is used to ensure we don't load something for the wrong architecture.
- */
-#define elf_check_arch(x) ((x)->e_machine == EM_H8_300)
-
-/*
- * These are used to set parameters in the core dumps.
- */
-#define ELF_CLASS	ELFCLASS32
-#define ELF_DATA	ELFDATA2MSB
-#define ELF_ARCH	EM_H8_300
-#if defined(__H8300H__)
-#define ELF_CORE_EFLAGS 0x810000
-#endif
-#if defined(__H8300S__)
-#define ELF_CORE_EFLAGS 0x820000
-#endif
-
-#define ELF_PLAT_INIT(_r)	_r->er1 = 0
-
-#define ELF_EXEC_PAGESIZE	4096
-
-/* This is the location that an ET_DYN program is loaded if exec'ed.  Typical
-   use of this is to invoke "./ld.so someprog" to test out a new version of
-   the loader.  We need to make sure that it is out of the way of the program
-   that it will "exec", and that there is sufficient room for the brk.  */
-
-#define ELF_ET_DYN_BASE         0xD0000000UL
-
-/* This yields a mask that user programs can use to figure out what
-   instruction set this cpu supports.  */
-
-#define ELF_HWCAP	(0)
-
-/* This yields a string that ld.so will use to load implementation
-   specific libraries for optimization.  This is more specific in
-   intent than poking at uname or /proc/cpuinfo.  */
-
-#define ELF_PLATFORM  (NULL)
-
-#define R_H8_NONE       0
-#define R_H8_DIR32      1
-#define R_H8_DIR32_28   2
-#define R_H8_DIR32_24   3
-#define R_H8_DIR32_16   4
-#define R_H8_DIR32U     6
-#define R_H8_DIR32U_28  7
-#define R_H8_DIR32U_24  8
-#define R_H8_DIR32U_20  9
-#define R_H8_DIR32U_16 10
-#define R_H8_DIR24     11
-#define R_H8_DIR24_20  12
-#define R_H8_DIR24_16  13
-#define R_H8_DIR24U    14
-#define R_H8_DIR24U_20 15
-#define R_H8_DIR24U_16 16
-#define R_H8_DIR16     17
-#define R_H8_DIR16U    18
-#define R_H8_DIR16S_32 19
-#define R_H8_DIR16S_28 20
-#define R_H8_DIR16S_24 21
-#define R_H8_DIR16S_20 22
-#define R_H8_DIR16S    23
-#define R_H8_DIR8      24
-#define R_H8_DIR8U     25
-#define R_H8_DIR8Z_32  26
-#define R_H8_DIR8Z_28  27
-#define R_H8_DIR8Z_24  28
-#define R_H8_DIR8Z_20  29
-#define R_H8_DIR8Z_16  30
-#define R_H8_PCREL16   31
-#define R_H8_PCREL8    32
-#define R_H8_BPOS      33
-#define R_H8_PCREL32   34
-#define R_H8_GOT32O    35
-#define R_H8_GOT16O    36
-#define R_H8_DIR16A8   59
-#define R_H8_DIR16R8   60
-#define R_H8_DIR24A8   61
-#define R_H8_DIR24R8   62
-#define R_H8_DIR32A16  63
-#define R_H8_ABS32     65
-#define R_H8_ABS32A16 127
-
-#endif
diff --git a/arch/h8300/include/asm/emergency-restart.h b/arch/h8300/include/asm/emergency-restart.h
deleted file mode 100644
index 108d8c48e42e..000000000000
--- a/arch/h8300/include/asm/emergency-restart.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ASM_EMERGENCY_RESTART_H
-#define _ASM_EMERGENCY_RESTART_H
-
-#include <asm-generic/emergency-restart.h>
-
-#endif /* _ASM_EMERGENCY_RESTART_H */
diff --git a/arch/h8300/include/asm/fb.h b/arch/h8300/include/asm/fb.h
deleted file mode 100644
index c7df38030992..000000000000
--- a/arch/h8300/include/asm/fb.h
+++ /dev/null
@@ -1,12 +0,0 @@
-#ifndef _ASM_FB_H_
-#define _ASM_FB_H_
-#include <linux/fb.h>
-
-#define fb_pgprotect(...) do {} while (0)
-
-static inline int fb_is_primary_device(struct fb_info *info)
-{
-	return 0;
-}
-
-#endif /* _ASM_FB_H_ */
diff --git a/arch/h8300/include/asm/flat.h b/arch/h8300/include/asm/flat.h
deleted file mode 100644
index bd12b31b90e6..000000000000
--- a/arch/h8300/include/asm/flat.h
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * include/asm-h8300/flat.h -- uClinux flat-format executables
- */
-
-#ifndef __H8300_FLAT_H__
-#define __H8300_FLAT_H__
-
-#define	flat_argvp_envp_on_stack()		1
-#define	flat_old_ram_flag(flags)		1
-#define	flat_reloc_valid(reloc, size)		((reloc) <= (size))
-#define	flat_set_persistent(relval, p)		0
-
-/*
- * on the H8 a couple of the relocations have an instruction in the
- * top byte.  As there can only be 24bits of address space,  we just
- * always preserve that 8bits at the top,  when it isn't an instruction
- * is is 0 (davidm@snapgear.com)
- */
-
-#define	flat_get_relocate_addr(rel)		(rel)
-#define flat_get_addr_from_rp(rp, relval, flags, persistent) \
-        (get_unaligned(rp) & ((flags & FLAT_FLAG_GOTPIC) ? 0xffffffff: 0x00ffffff))
-#define flat_put_addr_at_rp(rp, addr, rel) \
-	put_unaligned (((*(char *)(rp)) << 24) | ((addr) & 0x00ffffff), rp)
-
-#endif /* __H8300_FLAT_H__ */
diff --git a/arch/h8300/include/asm/fpu.h b/arch/h8300/include/asm/fpu.h
deleted file mode 100644
index 4fc416e80bef..000000000000
--- a/arch/h8300/include/asm/fpu.h
+++ /dev/null
@@ -1 +0,0 @@
-/* Nothing do */
diff --git a/arch/h8300/include/asm/ftrace.h b/arch/h8300/include/asm/ftrace.h
deleted file mode 100644
index 40a8c178f10d..000000000000
--- a/arch/h8300/include/asm/ftrace.h
+++ /dev/null
@@ -1 +0,0 @@
-/* empty */
diff --git a/arch/h8300/include/asm/futex.h b/arch/h8300/include/asm/futex.h
deleted file mode 100644
index 6a332a9f099c..000000000000
--- a/arch/h8300/include/asm/futex.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ASM_FUTEX_H
-#define _ASM_FUTEX_H
-
-#include <asm-generic/futex.h>
-
-#endif
diff --git a/arch/h8300/include/asm/gpio-internal.h b/arch/h8300/include/asm/gpio-internal.h
deleted file mode 100644
index a714f0c0efbc..000000000000
--- a/arch/h8300/include/asm/gpio-internal.h
+++ /dev/null
@@ -1,52 +0,0 @@
-#ifndef _H8300_GPIO_H
-#define _H8300_GPIO_H
-
-#define H8300_GPIO_P1 0
-#define H8300_GPIO_P2 1
-#define H8300_GPIO_P3 2
-#define H8300_GPIO_P4 3
-#define H8300_GPIO_P5 4
-#define H8300_GPIO_P6 5
-#define H8300_GPIO_P7 6
-#define H8300_GPIO_P8 7
-#define H8300_GPIO_P9 8
-#define H8300_GPIO_PA 9
-#define H8300_GPIO_PB 10
-#define H8300_GPIO_PC 11
-#define H8300_GPIO_PD 12
-#define H8300_GPIO_PE 13
-#define H8300_GPIO_PF 14
-#define H8300_GPIO_PG 15
-#define H8300_GPIO_PH 16
-
-#define H8300_GPIO_B7 0x80
-#define H8300_GPIO_B6 0x40
-#define H8300_GPIO_B5 0x20
-#define H8300_GPIO_B4 0x10
-#define H8300_GPIO_B3 0x08
-#define H8300_GPIO_B2 0x04
-#define H8300_GPIO_B1 0x02
-#define H8300_GPIO_B0 0x01
-
-#define H8300_GPIO_INPUT 0
-#define H8300_GPIO_OUTPUT 1
-
-#define H8300_GPIO_RESERVE(port, bits) \
-        h8300_reserved_gpio(port, bits)
-
-#define H8300_GPIO_FREE(port, bits) \
-        h8300_free_gpio(port, bits)
-
-#define H8300_GPIO_DDR(port, bit, dir) \
-        h8300_set_gpio_dir(((port) << 8) | (bit), dir)
-
-#define H8300_GPIO_GETDIR(port, bit) \
-        h8300_get_gpio_dir(((port) << 8) | (bit))
-
-extern int h8300_reserved_gpio(int port, int bits);
-extern int h8300_free_gpio(int port, int bits);
-extern int h8300_set_gpio_dir(int port_bit, int dir);
-extern int h8300_get_gpio_dir(int port_bit);
-extern int h8300_init_gpio(void);
-
-#endif
diff --git a/arch/h8300/include/asm/hardirq.h b/arch/h8300/include/asm/hardirq.h
deleted file mode 100644
index c2e1aa0f0d14..000000000000
--- a/arch/h8300/include/asm/hardirq.h
+++ /dev/null
@@ -1,19 +0,0 @@
-#ifndef __H8300_HARDIRQ_H
-#define __H8300_HARDIRQ_H
-
-#include <asm/irq.h>
-
-#define HARDIRQ_BITS	8
-
-/*
- * The hardirq mask has to be large enough to have
- * space for potentially all IRQ sources in the system
- * nesting on a single CPU:
- */
-#if (1 << HARDIRQ_BITS) < NR_IRQS
-# error HARDIRQ_BITS is too low!
-#endif
-
-#include <asm-generic/hardirq.h>
-
-#endif
diff --git a/arch/h8300/include/asm/hw_irq.h b/arch/h8300/include/asm/hw_irq.h
deleted file mode 100644
index d75a5a1119e8..000000000000
--- a/arch/h8300/include/asm/hw_irq.h
+++ /dev/null
@@ -1 +0,0 @@
-/* Do Nothing */
diff --git a/arch/h8300/include/asm/io.h b/arch/h8300/include/asm/io.h
deleted file mode 100644
index c1a8df22080f..000000000000
--- a/arch/h8300/include/asm/io.h
+++ /dev/null
@@ -1,358 +0,0 @@
-#ifndef _H8300_IO_H
-#define _H8300_IO_H
-
-#ifdef __KERNEL__
-
-#include <asm/virtconvert.h>
-
-#if defined(CONFIG_H83007) || defined(CONFIG_H83068)
-#include <asm/regs306x.h>
-#elif defined(CONFIG_H8S2678)
-#include <asm/regs267x.h>
-#else
-#error UNKNOWN CPU TYPE
-#endif
-
-
-/*
- * These are for ISA/PCI shared memory _only_ and should never be used
- * on any other type of memory, including Zorro memory. They are meant to
- * access the bus in the bus byte order which is little-endian!.
- *
- * readX/writeX() are used to access memory mapped devices. On some
- * architectures the memory mapped IO stuff needs to be accessed
- * differently. On the m68k architecture, we just read/write the
- * memory location directly.
- */
-/* ++roman: The assignments to temp. vars avoid that gcc sometimes generates
- * two accesses to memory, which may be undesirable for some devices.
- */
-
-/*
- * swap functions are sometimes needed to interface little-endian hardware
- */
-
-static inline unsigned short _swapw(volatile unsigned short v)
-{
-#ifndef H8300_IO_NOSWAP
-	unsigned short r;
-	__asm__("xor.b %w0,%x0\n\t"
-		"xor.b %x0,%w0\n\t"
-		"xor.b %w0,%x0"
-		:"=r"(r)
-		:"0"(v));
-	return r;
-#else
-	return v;
-#endif
-}
-
-static inline unsigned long _swapl(volatile unsigned long v)
-{
-#ifndef H8300_IO_NOSWAP
-	unsigned long r;
-	__asm__("xor.b %w0,%x0\n\t"
-		"xor.b %x0,%w0\n\t"
-		"xor.b %w0,%x0\n\t"
-		"xor.w %e0,%f0\n\t"
-		"xor.w %f0,%e0\n\t"
-		"xor.w %e0,%f0\n\t"
-		"xor.b %w0,%x0\n\t"
-		"xor.b %x0,%w0\n\t"
-		"xor.b %w0,%x0"
-		:"=r"(r)
-		:"0"(v));
-	return r;
-#else
-	return v;
-#endif
-}
-
-#define readb(addr) \
-    ({ unsigned char __v = \
-     *(volatile unsigned char *)((unsigned long)(addr) & 0x00ffffff); \
-     __v; })
-#define readw(addr) \
-    ({ unsigned short __v = \
-     *(volatile unsigned short *)((unsigned long)(addr) & 0x00ffffff); \
-     __v; })
-#define readl(addr) \
-    ({ unsigned long __v = \
-     *(volatile unsigned long *)((unsigned long)(addr) & 0x00ffffff); \
-     __v; })
-
-#define writeb(b,addr) (void)((*(volatile unsigned char *) \
-                             ((unsigned long)(addr) & 0x00ffffff)) = (b))
-#define writew(b,addr) (void)((*(volatile unsigned short *) \
-                             ((unsigned long)(addr) & 0x00ffffff)) = (b))
-#define writel(b,addr) (void)((*(volatile unsigned long *) \
-                             ((unsigned long)(addr) & 0x00ffffff)) = (b))
-#define readb_relaxed(addr) readb(addr)
-#define readw_relaxed(addr) readw(addr)
-#define readl_relaxed(addr) readl(addr)
-
-#define __raw_readb readb
-#define __raw_readw readw
-#define __raw_readl readl
-#define __raw_writeb writeb
-#define __raw_writew writew
-#define __raw_writel writel
-
-static inline int h8300_buswidth(unsigned int addr)
-{
-	return (*(volatile unsigned char *)ABWCR & (1 << ((addr >> 21) & 7))) == 0;
-}
-
-static inline void io_outsb(unsigned int addr, const void *buf, int len)
-{
-	volatile unsigned char  *ap_b = (volatile unsigned char *) addr;
-	volatile unsigned short *ap_w = (volatile unsigned short *) addr;
-	unsigned char *bp = (unsigned char *) buf;
-
-	if(h8300_buswidth(addr) && (addr & 1)) {
-		while (len--)
-			*ap_w = *bp++;
-	} else {
-		while (len--)
-			*ap_b = *bp++;
-	}
-}
-
-static inline void io_outsw(unsigned int addr, const void *buf, int len)
-{
-	volatile unsigned short *ap = (volatile unsigned short *) addr;
-	unsigned short *bp = (unsigned short *) buf;
-	while (len--)
-		*ap = _swapw(*bp++);
-}
-
-static inline void io_outsl(unsigned int addr, const void *buf, int len)
-{
-	volatile unsigned long *ap = (volatile unsigned long *) addr;
-	unsigned long *bp = (unsigned long *) buf;
-	while (len--)
-		*ap = _swapl(*bp++);
-}
-
-static inline void io_outsw_noswap(unsigned int addr, const void *buf, int len)
-{
-	volatile unsigned short *ap = (volatile unsigned short *) addr;
-	unsigned short *bp = (unsigned short *) buf;
-	while (len--)
-		*ap = *bp++;
-}
-
-static inline void io_outsl_noswap(unsigned int addr, const void *buf, int len)
-{
-	volatile unsigned long *ap = (volatile unsigned long *) addr;
-	unsigned long *bp = (unsigned long *) buf;
-	while (len--)
-		*ap = *bp++;
-}
-
-static inline void io_insb(unsigned int addr, void *buf, int len)
-{
-	volatile unsigned char  *ap_b;
-	volatile unsigned short *ap_w;
-	unsigned char *bp = (unsigned char *) buf;
-
-	if(h8300_buswidth(addr)) {
-		ap_w = (volatile unsigned short *)(addr & ~1);
-		while (len--)
-			*bp++ = *ap_w & 0xff;
-	} else {
-		ap_b = (volatile unsigned char *)addr;
-		while (len--)
-			*bp++ = *ap_b;
-	}
-}
-
-static inline void io_insw(unsigned int addr, void *buf, int len)
-{
-	volatile unsigned short *ap = (volatile unsigned short *) addr;
-	unsigned short *bp = (unsigned short *) buf;
-	while (len--)
-		*bp++ = _swapw(*ap);
-}
-
-static inline void io_insl(unsigned int addr, void *buf, int len)
-{
-	volatile unsigned long *ap = (volatile unsigned long *) addr;
-	unsigned long *bp = (unsigned long *) buf;
-	while (len--)
-		*bp++ = _swapl(*ap);
-}
-
-static inline void io_insw_noswap(unsigned int addr, void *buf, int len)
-{
-	volatile unsigned short *ap = (volatile unsigned short *) addr;
-	unsigned short *bp = (unsigned short *) buf;
-	while (len--)
-		*bp++ = *ap;
-}
-
-static inline void io_insl_noswap(unsigned int addr, void *buf, int len)
-{
-	volatile unsigned long *ap = (volatile unsigned long *) addr;
-	unsigned long *bp = (unsigned long *) buf;
-	while (len--)
-		*bp++ = *ap;
-}
-
-/*
- *	make the short names macros so specific devices
- *	can override them as required
- */
-
-#define memset_io(a,b,c)	memset((void *)(a),(b),(c))
-#define memcpy_fromio(a,b,c)	memcpy((a),(void *)(b),(c))
-#define memcpy_toio(a,b,c)	memcpy((void *)(a),(b),(c))
-
-#define mmiowb()
-
-#define inb(addr)    ((h8300_buswidth(addr))?readw((addr) & ~1) & 0xff:readb(addr))
-#define inw(addr)    _swapw(readw(addr))
-#define inl(addr)    _swapl(readl(addr))
-#define outb(x,addr) ((void)((h8300_buswidth(addr) && \
-                      ((addr) & 1))?writew(x,(addr) & ~1):writeb(x,addr)))
-#define outw(x,addr) ((void) writew(_swapw(x),addr))
-#define outl(x,addr) ((void) writel(_swapl(x),addr))
-
-#define inb_p(addr)    inb(addr)
-#define inw_p(addr)    inw(addr)
-#define inl_p(addr)    inl(addr)
-#define outb_p(x,addr) outb(x,addr)
-#define outw_p(x,addr) outw(x,addr)
-#define outl_p(x,addr) outl(x,addr)
-
-#define outsb(a,b,l) io_outsb(a,b,l)
-#define outsw(a,b,l) io_outsw(a,b,l)
-#define outsl(a,b,l) io_outsl(a,b,l)
-
-#define insb(a,b,l) io_insb(a,b,l)
-#define insw(a,b,l) io_insw(a,b,l)
-#define insl(a,b,l) io_insl(a,b,l)
-
-#define IO_SPACE_LIMIT 0xffffff
-
-
-/* Values for nocacheflag and cmode */
-#define IOMAP_FULL_CACHING		0
-#define IOMAP_NOCACHE_SER		1
-#define IOMAP_NOCACHE_NONSER		2
-#define IOMAP_WRITETHROUGH		3
-
-extern void *__ioremap(unsigned long physaddr, unsigned long size, int cacheflag);
-extern void __iounmap(void *addr, unsigned long size);
-
-static inline void *ioremap(unsigned long physaddr, unsigned long size)
-{
-	return __ioremap(physaddr, size, IOMAP_NOCACHE_SER);
-}
-static inline void *ioremap_nocache(unsigned long physaddr, unsigned long size)
-{
-	return __ioremap(physaddr, size, IOMAP_NOCACHE_SER);
-}
-static inline void *ioremap_writethrough(unsigned long physaddr, unsigned long size)
-{
-	return __ioremap(physaddr, size, IOMAP_WRITETHROUGH);
-}
-static inline void *ioremap_fullcache(unsigned long physaddr, unsigned long size)
-{
-	return __ioremap(physaddr, size, IOMAP_FULL_CACHING);
-}
-
-extern void iounmap(void *addr);
-
-/* H8/300 internal I/O functions */
-static __inline__ unsigned char ctrl_inb(unsigned long addr)
-{
-	return *(volatile unsigned char*)addr;
-}
-
-static __inline__ unsigned short ctrl_inw(unsigned long addr)
-{
-	return *(volatile unsigned short*)addr;
-}
-
-static __inline__ unsigned long ctrl_inl(unsigned long addr)
-{
-	return *(volatile unsigned long*)addr;
-}
-
-static __inline__ void ctrl_outb(unsigned char b, unsigned long addr)
-{
-	*(volatile unsigned char*)addr = b;
-}
-
-static __inline__ void ctrl_outw(unsigned short b, unsigned long addr)
-{
-	*(volatile unsigned short*)addr = b;
-}
-
-static __inline__ void ctrl_outl(unsigned long b, unsigned long addr)
-{
-        *(volatile unsigned long*)addr = b;
-}
-
-static __inline__ void ctrl_bclr(int b, unsigned long addr)
-{
-	if (__builtin_constant_p(b))
-		switch (b) {
-		case 0: __asm__("bclr #0,@%0"::"r"(addr)); break;
-		case 1: __asm__("bclr #1,@%0"::"r"(addr)); break;
-		case 2: __asm__("bclr #2,@%0"::"r"(addr)); break;
-		case 3: __asm__("bclr #3,@%0"::"r"(addr)); break;
-		case 4: __asm__("bclr #4,@%0"::"r"(addr)); break;
-		case 5: __asm__("bclr #5,@%0"::"r"(addr)); break;
-		case 6: __asm__("bclr #6,@%0"::"r"(addr)); break;
-		case 7: __asm__("bclr #7,@%0"::"r"(addr)); break;
-		}
-	else
-		__asm__("bclr %w0,@%1"::"r"(b), "r"(addr));
-}
-
-static __inline__ void ctrl_bset(int b, unsigned long addr)
-{
-	if (__builtin_constant_p(b))
-		switch (b) {
-		case 0: __asm__("bset #0,@%0"::"r"(addr)); break;
-		case 1: __asm__("bset #1,@%0"::"r"(addr)); break;
-		case 2: __asm__("bset #2,@%0"::"r"(addr)); break;
-		case 3: __asm__("bset #3,@%0"::"r"(addr)); break;
-		case 4: __asm__("bset #4,@%0"::"r"(addr)); break;
-		case 5: __asm__("bset #5,@%0"::"r"(addr)); break;
-		case 6: __asm__("bset #6,@%0"::"r"(addr)); break;
-		case 7: __asm__("bset #7,@%0"::"r"(addr)); break;
-		}
-	else
-		__asm__("bset %w0,@%1"::"r"(b), "r"(addr));
-}
-
-/* Pages to physical address... */
-#define page_to_phys(page)      ((page - mem_map) << PAGE_SHIFT)
-#define page_to_bus(page)       ((page - mem_map) << PAGE_SHIFT)
-
-/*
- * Macros used for converting between virtual and physical mappings.
- */
-#define phys_to_virt(vaddr)	((void *) (vaddr))
-#define virt_to_phys(vaddr)	((unsigned long) (vaddr))
-
-#define virt_to_bus virt_to_phys
-#define bus_to_virt phys_to_virt
-
-/*
- * Convert a physical pointer to a virtual kernel pointer for /dev/mem
- * access
- */
-#define xlate_dev_mem_ptr(p)	__va(p)
-
-/*
- * Convert a virtual cached pointer to an uncached pointer
- */
-#define xlate_dev_kmem_ptr(p)	p
-
-#endif /* __KERNEL__ */
-
-#endif /* _H8300_IO_H */
diff --git a/arch/h8300/include/asm/irq.h b/arch/h8300/include/asm/irq.h
deleted file mode 100644
index 13d7c601cd0a..000000000000
--- a/arch/h8300/include/asm/irq.h
+++ /dev/null
@@ -1,49 +0,0 @@
-#ifndef _H8300_IRQ_H_
-#define _H8300_IRQ_H_
-
-#include <asm/ptrace.h>
-
-#if defined(CONFIG_CPU_H8300H)
-#define NR_IRQS 64
-#define EXT_IRQ0 12
-#define EXT_IRQ1 13
-#define EXT_IRQ2 14
-#define EXT_IRQ3 15
-#define EXT_IRQ4 16
-#define EXT_IRQ5 17
-#define EXT_IRQ6 18
-#define EXT_IRQ7 19
-#define EXT_IRQS 5
-#define IER_REGS *(volatile unsigned char *)IER
-#endif
-#if defined(CONFIG_CPU_H8S)
-#define NR_IRQS 128
-#define EXT_IRQ0 16
-#define EXT_IRQ1 17
-#define EXT_IRQ2 18
-#define EXT_IRQ3 19
-#define EXT_IRQ4 20
-#define EXT_IRQ5 21
-#define EXT_IRQ6 22
-#define EXT_IRQ7 23
-#define EXT_IRQ8 24
-#define EXT_IRQ9 25
-#define EXT_IRQ10 26
-#define EXT_IRQ11 27
-#define EXT_IRQ12 28
-#define EXT_IRQ13 29
-#define EXT_IRQ14 30
-#define EXT_IRQ15 31
-#define EXT_IRQS 15
-
-#define IER_REGS *(volatile unsigned short *)IER
-#endif
-
-static __inline__ int irq_canonicalize(int irq)
-{
-	return irq;
-}
-
-typedef void (*h8300_vector)(void);
-
-#endif /* _H8300_IRQ_H_ */
diff --git a/arch/h8300/include/asm/irq_regs.h b/arch/h8300/include/asm/irq_regs.h
deleted file mode 100644
index 3dd9c0b70270..000000000000
--- a/arch/h8300/include/asm/irq_regs.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/irq_regs.h>
diff --git a/arch/h8300/include/asm/irqflags.h b/arch/h8300/include/asm/irqflags.h
deleted file mode 100644
index 9617cd57aebd..000000000000
--- a/arch/h8300/include/asm/irqflags.h
+++ /dev/null
@@ -1,43 +0,0 @@
-#ifndef _H8300_IRQFLAGS_H
-#define _H8300_IRQFLAGS_H
-
-static inline unsigned long arch_local_save_flags(void)
-{
-	unsigned long flags;
-	asm volatile ("stc ccr,%w0" : "=r" (flags));
-	return flags;
-}
-
-static inline void arch_local_irq_disable(void)
-{
-	asm volatile ("orc  #0x80,ccr" : : : "memory");
-}
-
-static inline void arch_local_irq_enable(void)
-{
-	asm volatile ("andc #0x7f,ccr" : : : "memory");
-}
-
-static inline unsigned long arch_local_irq_save(void)
-{
-	unsigned long flags = arch_local_save_flags();
-	arch_local_irq_disable();
-	return flags;
-}
-
-static inline void arch_local_irq_restore(unsigned long flags)
-{
-	asm volatile ("ldc %w0,ccr" : : "r" (flags) : "memory");
-}
-
-static inline bool arch_irqs_disabled_flags(unsigned long flags)
-{
-	return (flags & 0x80) == 0x80;
-}
-
-static inline bool arch_irqs_disabled(void)
-{
-	return arch_irqs_disabled_flags(arch_local_save_flags());
-}
-
-#endif /* _H8300_IRQFLAGS_H */
diff --git a/arch/h8300/include/asm/kdebug.h b/arch/h8300/include/asm/kdebug.h
deleted file mode 100644
index 6ece1b037665..000000000000
--- a/arch/h8300/include/asm/kdebug.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/kdebug.h>
diff --git a/arch/h8300/include/asm/kmap_types.h b/arch/h8300/include/asm/kmap_types.h
deleted file mode 100644
index be12a7160116..000000000000
--- a/arch/h8300/include/asm/kmap_types.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ASM_H8300_KMAP_TYPES_H
-#define _ASM_H8300_KMAP_TYPES_H
-
-#include <asm-generic/kmap_types.h>
-
-#endif
diff --git a/arch/h8300/include/asm/local.h b/arch/h8300/include/asm/local.h
deleted file mode 100644
index fdd4efe437cd..000000000000
--- a/arch/h8300/include/asm/local.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _H8300_LOCAL_H_
-#define _H8300_LOCAL_H_
-
-#include <asm-generic/local.h>
-
-#endif
diff --git a/arch/h8300/include/asm/local64.h b/arch/h8300/include/asm/local64.h
deleted file mode 100644
index 36c93b5cc239..000000000000
--- a/arch/h8300/include/asm/local64.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/local64.h>
diff --git a/arch/h8300/include/asm/mc146818rtc.h b/arch/h8300/include/asm/mc146818rtc.h
deleted file mode 100644
index ab9d9646d241..000000000000
--- a/arch/h8300/include/asm/mc146818rtc.h
+++ /dev/null
@@ -1,9 +0,0 @@
-/*
- * Machine dependent access functions for RTC registers.
- */
-#ifndef _H8300_MC146818RTC_H
-#define _H8300_MC146818RTC_H
-
-/* empty include file to satisfy the include in genrtc.c/ide-geometry.c */
-
-#endif /* _H8300_MC146818RTC_H */
diff --git a/arch/h8300/include/asm/mmu_context.h b/arch/h8300/include/asm/mmu_context.h
deleted file mode 100644
index f44b730da54d..000000000000
--- a/arch/h8300/include/asm/mmu_context.h
+++ /dev/null
@@ -1,32 +0,0 @@
-#ifndef __H8300_MMU_CONTEXT_H
-#define __H8300_MMU_CONTEXT_H
-
-#include <asm/setup.h>
-#include <asm/page.h>
-#include <asm/pgalloc.h>
-#include <asm-generic/mm_hooks.h>
-
-static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
-{
-}
-
-static inline int
-init_new_context(struct task_struct *tsk, struct mm_struct *mm)
-{
-	// mm->context = virt_to_phys(mm->pgd);
-	return(0);
-}
-
-#define destroy_context(mm)		do { } while(0)
-#define deactivate_mm(tsk,mm)           do { } while(0)
-
-static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk)
-{
-}
-
-static inline void activate_mm(struct mm_struct *prev_mm,
-			       struct mm_struct *next_mm)
-{
-}
-
-#endif
diff --git a/arch/h8300/include/asm/mutex.h b/arch/h8300/include/asm/mutex.h
deleted file mode 100644
index 458c1f7fbc18..000000000000
--- a/arch/h8300/include/asm/mutex.h
+++ /dev/null
@@ -1,9 +0,0 @@
-/*
- * Pull in the generic implementation for the mutex fastpath.
- *
- * TODO: implement optimized primitives instead, or leave the generic
- * implementation in place, or pick the atomic_xchg() based generic
- * implementation. (see asm-generic/mutex-xchg.h for details)
- */
-
-#include <asm-generic/mutex-dec.h>
diff --git a/arch/h8300/include/asm/page.h b/arch/h8300/include/asm/page.h
deleted file mode 100644
index 837381a2df46..000000000000
--- a/arch/h8300/include/asm/page.h
+++ /dev/null
@@ -1,78 +0,0 @@
-#ifndef _H8300_PAGE_H
-#define _H8300_PAGE_H
-
-/* PAGE_SHIFT determines the page size */
-
-#define PAGE_SHIFT	(12)
-#define PAGE_SIZE	(1UL << PAGE_SHIFT)
-#define PAGE_MASK	(~(PAGE_SIZE-1))
-
-#include <asm/setup.h>
-
-#ifndef __ASSEMBLY__
- 
-#define get_user_page(vaddr)		__get_free_page(GFP_KERNEL)
-#define free_user_page(page, addr)	free_page(addr)
-
-#define clear_page(page)	memset((page), 0, PAGE_SIZE)
-#define copy_page(to,from)	memcpy((to), (from), PAGE_SIZE)
-
-#define clear_user_page(page, vaddr, pg)	clear_page(page)
-#define copy_user_page(to, from, vaddr, pg)	copy_page(to, from)
-
-#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \
-	alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr)
-#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
-
-/*
- * These are used to make use of C type-checking..
- */
-typedef struct { unsigned long pte; } pte_t;
-typedef struct { unsigned long pmd[16]; } pmd_t;
-typedef struct { unsigned long pgd; } pgd_t;
-typedef struct { unsigned long pgprot; } pgprot_t;
-typedef struct page *pgtable_t;
-
-#define pte_val(x)	((x).pte)
-#define pmd_val(x)	((&x)->pmd[0])
-#define pgd_val(x)	((x).pgd)
-#define pgprot_val(x)	((x).pgprot)
-
-#define __pte(x)	((pte_t) { (x) } )
-#define __pmd(x)	((pmd_t) { (x) } )
-#define __pgd(x)	((pgd_t) { (x) } )
-#define __pgprot(x)	((pgprot_t) { (x) } )
-
-extern unsigned long memory_start;
-extern unsigned long memory_end;
-
-#endif /* !__ASSEMBLY__ */
-
-#include <asm/page_offset.h>
-
-#define PAGE_OFFSET		(PAGE_OFFSET_RAW)
-
-#ifndef __ASSEMBLY__
-
-#define __pa(vaddr)		virt_to_phys(vaddr)
-#define __va(paddr)		phys_to_virt((unsigned long)paddr)
-
-#define virt_to_pfn(kaddr)	(__pa(kaddr) >> PAGE_SHIFT)
-#define pfn_to_virt(pfn)	__va((pfn) << PAGE_SHIFT)
-
-#define MAP_NR(addr)		(((unsigned long)(addr)-PAGE_OFFSET) >> PAGE_SHIFT)
-#define virt_to_page(addr)	(mem_map + (((unsigned long)(addr)-PAGE_OFFSET) >> PAGE_SHIFT))
-#define page_to_virt(page)	((((page) - mem_map) << PAGE_SHIFT) + PAGE_OFFSET)
-#define pfn_valid(page)	        (page < max_mapnr)
-
-#define ARCH_PFN_OFFSET		(PAGE_OFFSET >> PAGE_SHIFT)
-
-#define	virt_addr_valid(kaddr)	(((void *)(kaddr) >= (void *)PAGE_OFFSET) && \
-				((void *)(kaddr) < (void *)memory_end))
-
-#endif /* __ASSEMBLY__ */
-
-#include <asm-generic/memory_model.h>
-#include <asm-generic/getorder.h>
-
-#endif /* _H8300_PAGE_H */
diff --git a/arch/h8300/include/asm/page_offset.h b/arch/h8300/include/asm/page_offset.h
deleted file mode 100644
index f8706463008c..000000000000
--- a/arch/h8300/include/asm/page_offset.h
+++ /dev/null
@@ -1,3 +0,0 @@
-
-#define PAGE_OFFSET_RAW		0x00000000
-
diff --git a/arch/h8300/include/asm/param.h b/arch/h8300/include/asm/param.h
deleted file mode 100644
index c3909e7ff178..000000000000
--- a/arch/h8300/include/asm/param.h
+++ /dev/null
@@ -1,9 +0,0 @@
-#ifndef _H8300_PARAM_H
-#define _H8300_PARAM_H
-
-#include <uapi/asm/param.h>
-
-#define HZ		CONFIG_HZ
-#define	USER_HZ		HZ
-#define	CLOCKS_PER_SEC	(USER_HZ)
-#endif /* _H8300_PARAM_H */
diff --git a/arch/h8300/include/asm/pci.h b/arch/h8300/include/asm/pci.h
deleted file mode 100644
index 0b2acaa3dd84..000000000000
--- a/arch/h8300/include/asm/pci.h
+++ /dev/null
@@ -1,19 +0,0 @@
-#ifndef _ASM_H8300_PCI_H
-#define _ASM_H8300_PCI_H
-
-/*
- * asm-h8300/pci.h - H8/300 specific PCI declarations.
- *
- * Yoshinori Sato <ysato@users.sourceforge.jp>
- */
-
-#define pcibios_assign_all_busses()	0
-
-static inline void pcibios_penalize_isa_irq(int irq, int active)
-{
-	/* We don't do dynamic PCI IRQ allocation */
-}
-
-#define PCI_DMA_BUS_IS_PHYS	(1)
-
-#endif /* _ASM_H8300_PCI_H */
diff --git a/arch/h8300/include/asm/percpu.h b/arch/h8300/include/asm/percpu.h
deleted file mode 100644
index 72c03e3666d8..000000000000
--- a/arch/h8300/include/asm/percpu.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __ARCH_H8300_PERCPU__
-#define __ARCH_H8300_PERCPU__
-
-#include <asm-generic/percpu.h>
-
-#endif /* __ARCH_H8300_PERCPU__ */
diff --git a/arch/h8300/include/asm/pgalloc.h b/arch/h8300/include/asm/pgalloc.h
deleted file mode 100644
index c2e89a286d23..000000000000
--- a/arch/h8300/include/asm/pgalloc.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef _H8300_PGALLOC_H
-#define _H8300_PGALLOC_H
-
-#include <asm/setup.h>
-
-#define check_pgt_cache()	do { } while (0)
-
-#endif /* _H8300_PGALLOC_H */
diff --git a/arch/h8300/include/asm/pgtable.h b/arch/h8300/include/asm/pgtable.h
deleted file mode 100644
index 7ca20f894dd7..000000000000
--- a/arch/h8300/include/asm/pgtable.h
+++ /dev/null
@@ -1,73 +0,0 @@
-#ifndef _H8300_PGTABLE_H
-#define _H8300_PGTABLE_H
-
-#include <asm-generic/4level-fixup.h>
-
-#include <linux/slab.h>
-#include <asm/processor.h>
-#include <asm/page.h>
-#include <asm/io.h>
-
-#define pgd_present(pgd)     (1)       /* pages are always present on NO_MM */
-#define pgd_none(pgd)		(0)
-#define pgd_bad(pgd)		(0)
-#define pgd_clear(pgdp)
-#define kern_addr_valid(addr)	(1)
-#define	pmd_offset(a, b)	((void *)0)
-#define pmd_none(pmd)           (1)
-#define pgd_offset_k(adrdress)  ((pgd_t *)0)
-#define pte_offset_kernel(dir, address) ((pte_t *)0)
-
-#define PAGE_NONE		__pgprot(0)    /* these mean nothing to NO_MM */
-#define PAGE_SHARED		__pgprot(0)    /* these mean nothing to NO_MM */
-#define PAGE_COPY		__pgprot(0)    /* these mean nothing to NO_MM */
-#define PAGE_READONLY	__pgprot(0)    /* these mean nothing to NO_MM */
-#define PAGE_KERNEL		__pgprot(0)    /* these mean nothing to NO_MM */
-
-extern void paging_init(void);
-#define swapper_pg_dir ((pgd_t *) 0)
-
-#define __swp_type(x)		(0)
-#define __swp_offset(x)		(0)
-#define __swp_entry(typ,off)	((swp_entry_t) { ((typ) | ((off) << 7)) })
-#define __pte_to_swp_entry(pte)	((swp_entry_t) { pte_val(pte) })
-#define __swp_entry_to_pte(x)	((pte_t) { (x).val })
-
-static inline int pte_file(pte_t pte) { return 0; }
-
-/*
- * ZERO_PAGE is a global shared page that is always zero: used
- * for zero-mapped memory areas etc..
- */
-#define ZERO_PAGE(vaddr)	(virt_to_page(0))
-
-/*
- * These would be in other places but having them here reduces the diffs.
- */
-extern unsigned int kobjsize(const void *objp);
-extern int is_in_rom(unsigned long);
-
-/*
- * No page table caches to initialise
- */
-#define pgtable_cache_init()   do { } while (0)
-
-/*
- * All 32bit addresses are effectively valid for vmalloc...
- * Sort of meaningless for non-VM targets.
- */
-#define	VMALLOC_START	0
-#define	VMALLOC_END	0xffffffff
-
-/*
- * All 32bit addresses are effectively valid for vmalloc...
- * Sort of meaningless for non-VM targets.
- */
-#define	VMALLOC_START	0
-#define	VMALLOC_END	0xffffffff
-
-#define arch_enter_lazy_cpu_mode()    do {} while (0)
-
-#include <asm-generic/pgtable.h>
-
-#endif /* _H8300_PGTABLE_H */
diff --git a/arch/h8300/include/asm/processor.h b/arch/h8300/include/asm/processor.h
deleted file mode 100644
index 4b0ca49bb463..000000000000
--- a/arch/h8300/include/asm/processor.h
+++ /dev/null
@@ -1,139 +0,0 @@
-/*
- * include/asm-h8300/processor.h
- *
- * Copyright (C) 2002 Yoshinori Sato
- *
- * Based on: linux/asm-m68nommu/processor.h
- *
- * Copyright (C) 1995 Hamish Macdonald
- */
-
-#ifndef __ASM_H8300_PROCESSOR_H
-#define __ASM_H8300_PROCESSOR_H
-
-/*
- * Default implementation of macro that returns current
- * instruction pointer ("program counter").
- */
-#define current_text_addr() ({ __label__ _l; _l: &&_l;})
-
-#include <linux/compiler.h>
-#include <asm/segment.h>
-#include <asm/fpu.h>
-#include <asm/ptrace.h>
-#include <asm/current.h>
-
-static inline unsigned long rdusp(void) {
-	extern unsigned int	sw_usp;
-	return(sw_usp);
-}
-
-static inline void wrusp(unsigned long usp) {
-	extern unsigned int	sw_usp;
-	sw_usp = usp;
-}
-
-/*
- * User space process size: 3.75GB. This is hardcoded into a few places,
- * so don't change it unless you know what you are doing.
- */
-#define TASK_SIZE	(0xFFFFFFFFUL)
-
-#ifdef __KERNEL__
-#define STACK_TOP	TASK_SIZE
-#define STACK_TOP_MAX	STACK_TOP
-#endif
-
-/*
- * This decides where the kernel will search for a free chunk of vm
- * space during mmap's. We won't be using it
- */
-#define TASK_UNMAPPED_BASE	0
-
-struct thread_struct {
-	unsigned long  ksp;		/* kernel stack pointer */
-	unsigned long  usp;		/* user stack pointer */
-	unsigned long  ccr;		/* saved status register */
-	unsigned long  esp0;            /* points to SR of stack frame */
-	struct {
-		unsigned short *addr;
-		unsigned short inst;
-	} breakinfo;
-};
-
-#define INIT_THREAD  {						\
-	.ksp  = sizeof(init_stack) + (unsigned long)init_stack, \
-	.usp  = 0,						\
-	.ccr  = PS_S,						\
-	.esp0 = 0,						\
-	.breakinfo = {						\
-		.addr = (unsigned short *)-1,			\
-		.inst = 0					\
-	}							\
-}
-
-/*
- * Do necessary setup to start up a newly executed thread.
- *
- * pass the data segment into user programs if it exists,
- * it can't hurt anything as far as I can tell
- */
-#if defined(__H8300H__)
-#define start_thread(_regs, _pc, _usp)			        \
-do {							        \
-  	(_regs)->pc = (_pc);				        \
-	(_regs)->ccr = 0x00;	   /* clear all flags */        \
-	(_regs)->er5 = current->mm->start_data;	/* GOT base */  \
-	wrusp((unsigned long)(_usp) - sizeof(unsigned long)*3);	\
-} while(0)
-#endif
-#if defined(__H8300S__)
-#define start_thread(_regs, _pc, _usp)			        \
-do {							        \
-	(_regs)->pc = (_pc);				        \
-	(_regs)->ccr = 0x00;	   /* clear kernel flag */      \
-	(_regs)->exr = 0x78;       /* enable all interrupts */  \
-	(_regs)->er5 = current->mm->start_data;	/* GOT base */  \
-	/* 14 = space for retaddr(4), vector(4), er0(4) and ext(2) on stack */ \
-	wrusp(((unsigned long)(_usp)) - 14);                    \
-} while(0)
-#endif
-
-/* Forward declaration, a strange C thing */
-struct task_struct;
-
-/* Free all resources held by a thread. */
-static inline void release_thread(struct task_struct *dead_task)
-{
-}
-
-/*
- * Free current thread data structures etc..
- */
-static inline void exit_thread(void)
-{
-}
-
-/*
- * Return saved PC of a blocked thread.
- */
-unsigned long thread_saved_pc(struct task_struct *tsk);
-unsigned long get_wchan(struct task_struct *p);
-
-#define	KSTK_EIP(tsk)	\
-    ({			\
-	unsigned long eip = 0;	 \
-	if ((tsk)->thread.esp0 > PAGE_SIZE && \
-	    MAP_NR((tsk)->thread.esp0) < max_mapnr) \
-	      eip = ((struct pt_regs *) (tsk)->thread.esp0)->pc; \
-	eip; })
-#define	KSTK_ESP(tsk)	((tsk) == current ? rdusp() : (tsk)->thread.usp)
-
-#define cpu_relax()    barrier()
-
-#define HARD_RESET_NOW() ({		\
-        local_irq_disable();		\
-        asm("jmp @@0");			\
-})
-
-#endif
diff --git a/arch/h8300/include/asm/ptrace.h b/arch/h8300/include/asm/ptrace.h
deleted file mode 100644
index c1826b95c5ca..000000000000
--- a/arch/h8300/include/asm/ptrace.h
+++ /dev/null
@@ -1,33 +0,0 @@
-#ifndef _H8300_PTRACE_H
-#define _H8300_PTRACE_H
-
-#include <uapi/asm/ptrace.h>
-
-#ifndef __ASSEMBLY__
-#if defined(CONFIG_CPU_H8S)
-#endif
-#ifndef PS_S
-#define PS_S  (0x10)
-#endif
-
-#if defined(__H8300H__)
-#define H8300_REGS_NO 11
-#endif
-#if defined(__H8300S__)
-#define H8300_REGS_NO 12
-#endif
-
-/* Find the stack offset for a register, relative to thread.esp0. */
-#define PT_REG(reg)	((long)&((struct pt_regs *)0)->reg)
-
-#define arch_has_single_step()	(1)
-
-#define user_mode(regs) (!((regs)->ccr & PS_S))
-#define instruction_pointer(regs) ((regs)->pc)
-#define profile_pc(regs) instruction_pointer(regs)
-#define current_pt_regs() ((struct pt_regs *) \
-	(THREAD_SIZE + (unsigned long)current_thread_info()) - 1)
-#define signal_pt_regs() ((struct pt_regs *)current->thread.esp0)
-#define current_user_stack_pointer() rdusp()
-#endif /* __ASSEMBLY__ */
-#endif /* _H8300_PTRACE_H */
diff --git a/arch/h8300/include/asm/regs267x.h b/arch/h8300/include/asm/regs267x.h
deleted file mode 100644
index 1bff731a9f77..000000000000
--- a/arch/h8300/include/asm/regs267x.h
+++ /dev/null
@@ -1,336 +0,0 @@
-/* internal Peripherals Register address define */
-/* CPU: H8/306x                                 */
-
-#if !defined(__REGS_H8S267x__)
-#define __REGS_H8S267x__ 
-
-#if defined(__KERNEL__)
-
-#define DASTCR 0xFEE01A
-#define DADR0  0xFFFFA4
-#define DADR1  0xFFFFA5
-#define DACR01 0xFFFFA6
-#define DADR2  0xFFFFA8
-#define DADR3  0xFFFFA9
-#define DACR23 0xFFFFAA
-
-#define ADDRA  0xFFFF90
-#define ADDRAH 0xFFFF90
-#define ADDRAL 0xFFFF91
-#define ADDRB  0xFFFF92
-#define ADDRBH 0xFFFF92
-#define ADDRBL 0xFFFF93
-#define ADDRC  0xFFFF94
-#define ADDRCH 0xFFFF94
-#define ADDRCL 0xFFFF95
-#define ADDRD  0xFFFF96
-#define ADDRDH 0xFFFF96
-#define ADDRDL 0xFFFF97
-#define ADDRE  0xFFFF98
-#define ADDREH 0xFFFF98
-#define ADDREL 0xFFFF99
-#define ADDRF  0xFFFF9A
-#define ADDRFH 0xFFFF9A
-#define ADDRFL 0xFFFF9B
-#define ADDRG  0xFFFF9C
-#define ADDRGH 0xFFFF9C
-#define ADDRGL 0xFFFF9D
-#define ADDRH  0xFFFF9E
-#define ADDRHH 0xFFFF9E
-#define ADDRHL 0xFFFF9F
-
-#define ADCSR  0xFFFFA0
-#define ADCR   0xFFFFA1
-
-#define ABWCR  0xFFFEC0
-#define ASTCR  0xFFFEC1
-#define WTCRAH 0xFFFEC2
-#define WTCRAL 0xFFFEC3
-#define WTCRBH 0xFFFEC4
-#define WTCRBL 0xFFFEC5
-#define RDNCR  0xFFFEC6
-#define CSACRH 0xFFFEC8
-#define CSACRL 0xFFFEC9
-#define BROMCRH 0xFFFECA
-#define BROMCRL 0xFFFECB
-#define BCR    0xFFFECC
-#define DRAMCR 0xFFFED0
-#define DRACCR 0xFFFED2
-#define REFCR  0xFFFED4
-#define RTCNT  0xFFFED6
-#define RTCOR  0xFFFED7
-
-#define MAR0AH  0xFFFEE0
-#define MAR0AL  0xFFFEE2
-#define IOAR0A  0xFFFEE4
-#define ETCR0A  0xFFFEE6
-#define MAR0BH  0xFFFEE8
-#define MAR0BL  0xFFFEEA
-#define IOAR0B  0xFFFEEC
-#define ETCR0B  0xFFFEEE
-#define MAR1AH  0xFFFEF0
-#define MAR1AL  0xFFFEF2
-#define IOAR1A  0xFFFEF4
-#define ETCR1A  0xFFFEF6
-#define MAR1BH  0xFFFEF8
-#define MAR1BL  0xFFFEFA
-#define IOAR1B  0xFFFEFC
-#define ETCR1B  0xFFFEFE
-#define DMAWER  0xFFFF20
-#define DMATCR  0xFFFF21
-#define DMACR0A 0xFFFF22
-#define DMACR0B 0xFFFF23
-#define DMACR1A 0xFFFF24
-#define DMACR1B 0xFFFF25
-#define DMABCRH 0xFFFF26
-#define DMABCRL 0xFFFF27
-
-#define EDSAR0  0xFFFDC0
-#define EDDAR0  0xFFFDC4
-#define EDTCR0  0xFFFDC8
-#define EDMDR0  0xFFFDCC
-#define EDMDR0H 0xFFFDCC
-#define EDMDR0L 0xFFFDCD
-#define EDACR0  0xFFFDCE
-#define EDSAR1  0xFFFDD0
-#define EDDAR1  0xFFFDD4
-#define EDTCR1  0xFFFDD8
-#define EDMDR1  0xFFFDDC
-#define EDMDR1H 0xFFFDDC
-#define EDMDR1L 0xFFFDDD
-#define EDACR1  0xFFFDDE
-#define EDSAR2  0xFFFDE0
-#define EDDAR2  0xFFFDE4
-#define EDTCR2  0xFFFDE8
-#define EDMDR2  0xFFFDEC
-#define EDMDR2H 0xFFFDEC
-#define EDMDR2L 0xFFFDED
-#define EDACR2  0xFFFDEE
-#define EDSAR3  0xFFFDF0
-#define EDDAR3  0xFFFDF4
-#define EDTCR3  0xFFFDF8
-#define EDMDR3  0xFFFDFC
-#define EDMDR3H 0xFFFDFC
-#define EDMDR3L 0xFFFDFD
-#define EDACR3  0xFFFDFE
-
-#define IPRA  0xFFFE00
-#define IPRB  0xFFFE02
-#define IPRC  0xFFFE04
-#define IPRD  0xFFFE06
-#define IPRE  0xFFFE08
-#define IPRF  0xFFFE0A
-#define IPRG  0xFFFE0C
-#define IPRH  0xFFFE0E
-#define IPRI  0xFFFE10
-#define IPRJ  0xFFFE12
-#define IPRK  0xFFFE14
-#define ITSR  0xFFFE16
-#define SSIER 0xFFFE18
-#define ISCRH 0xFFFE1A
-#define ISCRL 0xFFFE1C
-
-#define INTCR 0xFFFF31
-#define IER   0xFFFF32
-#define IERH  0xFFFF32
-#define IERL  0xFFFF33
-#define ISR   0xFFFF34
-#define ISRH  0xFFFF34
-#define ISRL  0xFFFF35
-
-#define P1DDR 0xFFFE20
-#define P2DDR 0xFFFE21
-#define P3DDR 0xFFFE22
-#define P4DDR 0xFFFE23
-#define P5DDR 0xFFFE24
-#define P6DDR 0xFFFE25
-#define P7DDR 0xFFFE26
-#define P8DDR 0xFFFE27
-#define P9DDR 0xFFFE28
-#define PADDR 0xFFFE29
-#define PBDDR 0xFFFE2A
-#define PCDDR 0xFFFE2B
-#define PDDDR 0xFFFE2C
-#define PEDDR 0xFFFE2D
-#define PFDDR 0xFFFE2E
-#define PGDDR 0xFFFE2F
-#define PHDDR 0xFFFF74
-
-#define PFCR0 0xFFFE32
-#define PFCR1 0xFFFE33
-#define PFCR2 0xFFFE34
-
-#define PAPCR 0xFFFE36
-#define PBPCR 0xFFFE37
-#define PCPCR 0xFFFE38
-#define PDPCR 0xFFFE39
-#define PEPCR 0xFFFE3A
-
-#define P3ODR 0xFFFE3C
-#define PAODR 0xFFFE3D
-
-#define P1DR  0xFFFF60
-#define P2DR  0xFFFF61
-#define P3DR  0xFFFF62
-#define P4DR  0xFFFF63
-#define P5DR  0xFFFF64
-#define P6DR  0xFFFF65
-#define P7DR  0xFFFF66
-#define P8DR  0xFFFF67
-#define P9DR  0xFFFF68
-#define PADR  0xFFFF69
-#define PBDR  0xFFFF6A
-#define PCDR  0xFFFF6B
-#define PDDR  0xFFFF6C
-#define PEDR  0xFFFF6D
-#define PFDR  0xFFFF6E
-#define PGDR  0xFFFF6F
-#define PHDR  0xFFFF72
-
-#define PORT1 0xFFFF50
-#define PORT2 0xFFFF51
-#define PORT3 0xFFFF52
-#define PORT4 0xFFFF53
-#define PORT5 0xFFFF54
-#define PORT6 0xFFFF55
-#define PORT7 0xFFFF56
-#define PORT8 0xFFFF57
-#define PORT9 0xFFFF58
-#define PORTA 0xFFFF59
-#define PORTB 0xFFFF5A
-#define PORTC 0xFFFF5B
-#define PORTD 0xFFFF5C
-#define PORTE 0xFFFF5D
-#define PORTF 0xFFFF5E
-#define PORTG 0xFFFF5F
-#define PORTH 0xFFFF70
-
-#define PCR   0xFFFF46
-#define PMR   0xFFFF47
-#define NDERH 0xFFFF48
-#define NDERL 0xFFFF49
-#define PODRH 0xFFFF4A
-#define PODRL 0xFFFF4B
-#define NDRH1 0xFFFF4C
-#define NDRL1 0xFFFF4D
-#define NDRH2 0xFFFF4E
-#define NDRL2 0xFFFF4F
-
-#define SMR0  0xFFFF78
-#define BRR0  0xFFFF79
-#define SCR0  0xFFFF7A
-#define TDR0  0xFFFF7B
-#define SSR0  0xFFFF7C
-#define RDR0  0xFFFF7D
-#define SCMR0 0xFFFF7E
-#define SMR1  0xFFFF80
-#define BRR1  0xFFFF81
-#define SCR1  0xFFFF82
-#define TDR1  0xFFFF83
-#define SSR1  0xFFFF84
-#define RDR1  0xFFFF85
-#define SCMR1 0xFFFF86
-#define SMR2  0xFFFF88
-#define BRR2  0xFFFF89
-#define SCR2  0xFFFF8A
-#define TDR2  0xFFFF8B
-#define SSR2  0xFFFF8C
-#define RDR2  0xFFFF8D
-#define SCMR2 0xFFFF8E
-
-#define IRCR0 0xFFFE1E
-#define SEMR  0xFFFDA8
-
-#define MDCR    0xFFFF3E
-#define SYSCR   0xFFFF3D
-#define MSTPCRH 0xFFFF40
-#define MSTPCRL 0xFFFF41
-#define FLMCR1  0xFFFFC8
-#define FLMCR2  0xFFFFC9
-#define EBR1    0xFFFFCA
-#define EBR2    0xFFFFCB
-#define CTGARC_RAMCR   0xFFFECE
-#define SBYCR   0xFFFF3A
-#define SCKCR   0xFFFF3B
-#define PLLCR   0xFFFF45
-
-#define TSTR   0xFFFFC0
-#define TSNC   0XFFFFC1
-
-#define TCR0   0xFFFFD0
-#define TMDR0  0xFFFFD1
-#define TIORH0 0xFFFFD2
-#define TIORL0 0xFFFFD3
-#define TIER0  0xFFFFD4
-#define TSR0   0xFFFFD5
-#define TCNT0  0xFFFFD6
-#define GRA0   0xFFFFD8
-#define GRB0   0xFFFFDA
-#define GRC0   0xFFFFDC
-#define GRD0   0xFFFFDE
-#define TCR1   0xFFFFE0
-#define TMDR1  0xFFFFE1
-#define TIORH1 0xFFFFE2
-#define TIORL1 0xFFFFE3
-#define TIER1  0xFFFFE4
-#define TSR1   0xFFFFE5
-#define TCNT1  0xFFFFE6
-#define GRA1   0xFFFFE8
-#define GRB1   0xFFFFEA
-#define TCR2   0xFFFFF0
-#define TMDR2  0xFFFFF1
-#define TIORH2 0xFFFFF2
-#define TIORL2 0xFFFFF3
-#define TIER2  0xFFFFF4
-#define TSR2   0xFFFFF5
-#define TCNT2  0xFFFFF6
-#define GRA2   0xFFFFF8
-#define GRB2   0xFFFFFA
-#define TCR3   0xFFFE80
-#define TMDR3  0xFFFE81
-#define TIORH3 0xFFFE82
-#define TIORL3 0xFFFE83
-#define TIER3  0xFFFE84
-#define TSR3   0xFFFE85
-#define TCNT3  0xFFFE86
-#define GRA3   0xFFFE88
-#define GRB3   0xFFFE8A
-#define GRC3   0xFFFE8C
-#define GRD3   0xFFFE8E
-#define TCR4   0xFFFE90
-#define TMDR4  0xFFFE91
-#define TIORH4 0xFFFE92
-#define TIORL4 0xFFFE93
-#define TIER4  0xFFFE94
-#define TSR4   0xFFFE95
-#define TCNT4  0xFFFE96
-#define GRA4   0xFFFE98
-#define GRB4   0xFFFE9A
-#define TCR5   0xFFFEA0
-#define TMDR5  0xFFFEA1
-#define TIORH5 0xFFFEA2
-#define TIORL5 0xFFFEA3
-#define TIER5  0xFFFEA4
-#define TSR5   0xFFFEA5
-#define TCNT5  0xFFFEA6
-#define GRA5   0xFFFEA8
-#define GRB5   0xFFFEAA
-
-#define _8TCR0   0xFFFFB0
-#define _8TCR1   0xFFFFB1
-#define _8TCSR0  0xFFFFB2
-#define _8TCSR1  0xFFFFB3
-#define _8TCORA0 0xFFFFB4
-#define _8TCORA1 0xFFFFB5
-#define _8TCORB0 0xFFFFB6
-#define _8TCORB1 0xFFFFB7
-#define _8TCNT0  0xFFFFB8
-#define _8TCNT1  0xFFFFB9
-
-#define TCSR    0xFFFFBC
-#define TCNT    0xFFFFBD
-#define RSTCSRW 0xFFFFBE
-#define RSTCSRR 0xFFFFBF
-
-#endif /* __KERNEL__ */
-#endif /* __REGS_H8S267x__ */
diff --git a/arch/h8300/include/asm/regs306x.h b/arch/h8300/include/asm/regs306x.h
deleted file mode 100644
index 027dd633fa25..000000000000
--- a/arch/h8300/include/asm/regs306x.h
+++ /dev/null
@@ -1,212 +0,0 @@
-/* internal Peripherals Register address define */
-/* CPU: H8/306x                                 */
-
-#if !defined(__REGS_H8306x__)
-#define __REGS_H8306x__ 
-
-#if defined(__KERNEL__)
-
-#define DASTCR 0xFEE01A
-#define DADR0  0xFEE09C
-#define DADR1  0xFEE09D
-#define DACR   0xFEE09E
-
-#define ADDRAH 0xFFFFE0
-#define ADDRAL 0xFFFFE1
-#define ADDRBH 0xFFFFE2
-#define ADDRBL 0xFFFFE3
-#define ADDRCH 0xFFFFE4
-#define ADDRCL 0xFFFFE5
-#define ADDRDH 0xFFFFE6
-#define ADDRDL 0xFFFFE7
-#define ADCSR  0xFFFFE8
-#define ADCR   0xFFFFE9
-
-#define BRCR   0xFEE013
-#define ADRCR  0xFEE01E
-#define CSCR   0xFEE01F
-#define ABWCR  0xFEE020
-#define ASTCR  0xFEE021
-#define WCRH   0xFEE022
-#define WCRL   0xFEE023
-#define BCR    0xFEE024
-#define DRCRA  0xFEE026
-#define DRCRB  0xFEE027
-#define RTMCSR 0xFEE028
-#define RTCNT  0xFEE029
-#define RTCOR  0xFEE02A
-
-#define MAR0AR  0xFFFF20
-#define MAR0AE  0xFFFF21
-#define MAR0AH  0xFFFF22
-#define MAR0AL  0xFFFF23
-#define ETCR0AL 0xFFFF24
-#define ETCR0AH 0xFFFF25
-#define IOAR0A  0xFFFF26
-#define DTCR0A  0xFFFF27
-#define MAR0BR  0xFFFF28
-#define MAR0BE  0xFFFF29
-#define MAR0BH  0xFFFF2A
-#define MAR0BL  0xFFFF2B
-#define ETCR0BL 0xFFFF2C
-#define ETCR0BH 0xFFFF2D
-#define IOAR0B  0xFFFF2E
-#define DTCR0B  0xFFFF2F
-#define MAR1AR  0xFFFF30
-#define MAR1AE  0xFFFF31
-#define MAR1AH  0xFFFF32
-#define MAR1AL  0xFFFF33
-#define ETCR1AL 0xFFFF34
-#define ETCR1AH 0xFFFF35
-#define IOAR1A  0xFFFF36
-#define DTCR1A  0xFFFF37
-#define MAR1BR  0xFFFF38
-#define MAR1BE  0xFFFF39
-#define MAR1BH  0xFFFF3A
-#define MAR1BL  0xFFFF3B
-#define ETCR1BL 0xFFFF3C
-#define ETCR1BH 0xFFFF3D
-#define IOAR1B  0xFFFF3E
-#define DTCR1B  0xFFFF3F
-
-#define ISCR 0xFEE014
-#define IER  0xFEE015
-#define ISR  0xFEE016
-#define IPRA 0xFEE018
-#define IPRB 0xFEE019
-
-#define P1DDR 0xFEE000
-#define P2DDR 0xFEE001
-#define P3DDR 0xFEE002
-#define P4DDR 0xFEE003
-#define P5DDR 0xFEE004
-#define P6DDR 0xFEE005
-/*#define P7DDR 0xFEE006*/
-#define P8DDR 0xFEE007
-#define P9DDR 0xFEE008
-#define PADDR 0xFEE009
-#define PBDDR 0xFEE00A
-
-#define P1DR  0xFFFFD0
-#define P2DR  0xFFFFD1
-#define P3DR  0xFFFFD2
-#define P4DR  0xFFFFD3
-#define P5DR  0xFFFFD4
-#define P6DR  0xFFFFD5
-/*#define P7DR  0xFFFFD6*/
-#define P8DR  0xFFFFD7
-#define P9DR  0xFFFFD8
-#define PADR  0xFFFFD9
-#define PBDR  0xFFFFDA
-
-#define P2CR  0xFEE03C
-#define P4CR  0xFEE03E
-#define P5CR  0xFEE03F
-
-#define SMR0  0xFFFFB0
-#define BRR0  0xFFFFB1
-#define SCR0  0xFFFFB2
-#define TDR0  0xFFFFB3
-#define SSR0  0xFFFFB4
-#define RDR0  0xFFFFB5
-#define SCMR0 0xFFFFB6
-#define SMR1  0xFFFFB8
-#define BRR1  0xFFFFB9
-#define SCR1  0xFFFFBA
-#define TDR1  0xFFFFBB
-#define SSR1  0xFFFFBC
-#define RDR1  0xFFFFBD
-#define SCMR1 0xFFFFBE
-#define SMR2  0xFFFFC0
-#define BRR2  0xFFFFC1
-#define SCR2  0xFFFFC2
-#define TDR2  0xFFFFC3
-#define SSR2  0xFFFFC4
-#define RDR2  0xFFFFC5
-#define SCMR2 0xFFFFC6
-
-#define MDCR   0xFEE011
-#define SYSCR  0xFEE012
-#define DIVCR  0xFEE01B
-#define MSTCRH 0xFEE01C
-#define MSTCRL 0xFEE01D
-#define FLMCR1 0xFEE030
-#define FLMCR2 0xFEE031
-#define EBR1   0xFEE032
-#define EBR2   0xFEE033
-#define RAMCR  0xFEE077
-
-#define TSTR   0xFFFF60
-#define TSNC   0XFFFF61
-#define TMDR   0xFFFF62
-#define TOLR   0xFFFF63
-#define TISRA  0xFFFF64
-#define TISRB  0xFFFF65
-#define TISRC  0xFFFF66
-#define TCR0   0xFFFF68
-#define TIOR0  0xFFFF69
-#define TCNT0H 0xFFFF6A
-#define TCNT0L 0xFFFF6B
-#define GRA0H  0xFFFF6C
-#define GRA0L  0xFFFF6D
-#define GRB0H  0xFFFF6E
-#define GRB0L  0xFFFF6F
-#define TCR1   0xFFFF70
-#define TIOR1  0xFFFF71
-#define TCNT1H 0xFFFF72
-#define TCNT1L 0xFFFF73
-#define GRA1H  0xFFFF74
-#define GRA1L  0xFFFF75
-#define GRB1H  0xFFFF76
-#define GRB1L  0xFFFF77
-#define TCR3   0xFFFF78
-#define TIOR3  0xFFFF79
-#define TCNT3H 0xFFFF7A
-#define TCNT3L 0xFFFF7B
-#define GRA3H  0xFFFF7C
-#define GRA3L  0xFFFF7D
-#define GRB3H  0xFFFF7E
-#define GRB3L  0xFFFF7F
-
-#define _8TCR0  0xFFFF80
-#define _8TCR1  0xFFFF81
-#define _8TCSR0 0xFFFF82
-#define _8TCSR1 0xFFFF83
-#define TCORA0 0xFFFF84
-#define TCORA1 0xFFFF85
-#define TCORB0 0xFFFF86
-#define TCORB1 0xFFFF87
-#define _8TCNT0 0xFFFF88
-#define _8TCNT1 0xFFFF89
-
-#define _8TCR2  0xFFFF90
-#define _8TCR3  0xFFFF91
-#define _8TCSR2 0xFFFF92
-#define _8TCSR3 0xFFFF93
-#define TCORA2 0xFFFF94
-#define TCORA3 0xFFFF95
-#define TCORB2 0xFFFF96
-#define TCORB3 0xFFFF97
-#define _8TCNT2 0xFFFF98
-#define _8TCNT3 0xFFFF99
-
-#define TCSR   0xFFFF8C
-#define TCNT   0xFFFF8D
-#define RSTCSR 0xFFFF8F
-
-#define TPMR  0xFFFFA0
-#define TPCR  0xFFFFA1
-#define NDERB 0xFFFFA2
-#define NDERA 0xFFFFA3
-#define NDRB1 0xFFFFA4
-#define NDRA1 0xFFFFA5
-#define NDRB2 0xFFFFA6
-#define NDRA2 0xFFFFA7
-
-#define TCSR    0xFFFF8C
-#define TCNT    0xFFFF8D
-#define RSTCSRW 0xFFFF8E
-#define RSTCSRR 0xFFFF8F
-
-#endif /* __KERNEL__ */
-#endif /* __REGS_H8306x__ */
diff --git a/arch/h8300/include/asm/scatterlist.h b/arch/h8300/include/asm/scatterlist.h
deleted file mode 100644
index 82130eda0e5f..000000000000
--- a/arch/h8300/include/asm/scatterlist.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _H8300_SCATTERLIST_H
-#define _H8300_SCATTERLIST_H
-
-#include <asm-generic/scatterlist.h>
-
-#endif /* !(_H8300_SCATTERLIST_H) */
diff --git a/arch/h8300/include/asm/sections.h b/arch/h8300/include/asm/sections.h
deleted file mode 100644
index a81743e8b743..000000000000
--- a/arch/h8300/include/asm/sections.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _H8300_SECTIONS_H_
-#define _H8300_SECTIONS_H_
-
-#include <asm-generic/sections.h>
-
-#endif
diff --git a/arch/h8300/include/asm/segment.h b/arch/h8300/include/asm/segment.h
deleted file mode 100644
index b79a82d0f99d..000000000000
--- a/arch/h8300/include/asm/segment.h
+++ /dev/null
@@ -1,49 +0,0 @@
-#ifndef _H8300_SEGMENT_H
-#define _H8300_SEGMENT_H
-
-/* define constants */
-#define USER_DATA     (1)
-#ifndef __USER_DS
-#define __USER_DS     (USER_DATA)
-#endif
-#define USER_PROGRAM  (2)
-#define SUPER_DATA    (3)
-#ifndef __KERNEL_DS
-#define __KERNEL_DS   (SUPER_DATA)
-#endif
-#define SUPER_PROGRAM (4)
-
-#ifndef __ASSEMBLY__
-
-typedef struct {
-	unsigned long seg;
-} mm_segment_t;
-
-#define MAKE_MM_SEG(s)	((mm_segment_t) { (s) })
-#define USER_DS		MAKE_MM_SEG(__USER_DS)
-#define KERNEL_DS	MAKE_MM_SEG(__KERNEL_DS)
-
-/*
- * Get/set the SFC/DFC registers for MOVES instructions
- */
-
-static inline mm_segment_t get_fs(void)
-{
-    return USER_DS;
-}
-
-static inline mm_segment_t get_ds(void)
-{
-    /* return the supervisor data space code */
-    return KERNEL_DS;
-}
-
-static inline void set_fs(mm_segment_t val)
-{
-}
-
-#define segment_eq(a,b)	((a).seg == (b).seg)
-
-#endif /* __ASSEMBLY__ */
-
-#endif /* _H8300_SEGMENT_H */
diff --git a/arch/h8300/include/asm/sh_bios.h b/arch/h8300/include/asm/sh_bios.h
deleted file mode 100644
index b6bb6e58295c..000000000000
--- a/arch/h8300/include/asm/sh_bios.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/* eCos HAL interface header */
-
-#ifndef SH_BIOS_H
-#define SH_BIOS_H
-
-#define HAL_IF_VECTOR_TABLE 0xfffe20
-#define CALL_IF_SET_CONSOLE_COMM  13
-#define QUERY_CURRENT -1
-#define MANGLER       -3
-
-/* Checking for GDB stub active */
-/* suggestion Jonathan Larmour */
-static int sh_bios_in_gdb_mode(void)
-{
-	static int gdb_active = -1;
-	if (gdb_active == -1) {
-		int (*set_console_comm)(int);
-		set_console_comm = ((void **)HAL_IF_VECTOR_TABLE)[CALL_IF_SET_CONSOLE_COMM];
-		gdb_active = (set_console_comm(QUERY_CURRENT) == MANGLER);
-	}
-	return gdb_active;
-}
-
-static void sh_bios_gdb_detach(void)
-{
-
-}
-
-#endif
diff --git a/arch/h8300/include/asm/shm.h b/arch/h8300/include/asm/shm.h
deleted file mode 100644
index ed6623c0545d..000000000000
--- a/arch/h8300/include/asm/shm.h
+++ /dev/null
@@ -1,31 +0,0 @@
-#ifndef _H8300_SHM_H
-#define _H8300_SHM_H
-
-
-/* format of page table entries that correspond to shared memory pages
-   currently out in swap space (see also mm/swap.c):
-   bits 0-1 (PAGE_PRESENT) is  = 0
-   bits 8..2 (SWP_TYPE) are = SHM_SWP_TYPE
-   bits 31..9 are used like this:
-   bits 15..9 (SHM_ID) the id of the shared memory segment
-   bits 30..16 (SHM_IDX) the index of the page within the shared memory segment
-                    (actually only bits 25..16 get used since SHMMAX is so low)
-   bit 31 (SHM_READ_ONLY) flag whether the page belongs to a read-only attach
-*/
-/* on the m68k both bits 0 and 1 must be zero */
-/* format on the sun3 is similar, but bits 30, 31 are set to zero and all
-   others are reduced by 2. --m */
-
-#ifndef CONFIG_SUN3
-#define SHM_ID_SHIFT	9
-#else
-#define SHM_ID_SHIFT	7
-#endif
-#define _SHM_ID_BITS	7
-#define SHM_ID_MASK	((1<<_SHM_ID_BITS)-1)
-
-#define SHM_IDX_SHIFT	(SHM_ID_SHIFT+_SHM_ID_BITS)
-#define _SHM_IDX_BITS	15
-#define SHM_IDX_MASK	((1<<_SHM_IDX_BITS)-1)
-
-#endif /* _H8300_SHM_H */
diff --git a/arch/h8300/include/asm/shmparam.h b/arch/h8300/include/asm/shmparam.h
deleted file mode 100644
index d1863953ec64..000000000000
--- a/arch/h8300/include/asm/shmparam.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _H8300_SHMPARAM_H
-#define _H8300_SHMPARAM_H
-
-#define	SHMLBA PAGE_SIZE		 /* attach addr a multiple of this */
-
-#endif /* _H8300_SHMPARAM_H */
diff --git a/arch/h8300/include/asm/signal.h b/arch/h8300/include/asm/signal.h
deleted file mode 100644
index 6341e36386f8..000000000000
--- a/arch/h8300/include/asm/signal.h
+++ /dev/null
@@ -1,24 +0,0 @@
-#ifndef _H8300_SIGNAL_H
-#define _H8300_SIGNAL_H
-
-#include <uapi/asm/signal.h>
-
-/* Most things should be clean enough to redefine this at will, if care
-   is taken to make libc match.  */
-
-#define _NSIG		64
-#define _NSIG_BPW	32
-#define _NSIG_WORDS	(_NSIG / _NSIG_BPW)
-
-typedef unsigned long old_sigset_t;		/* at least 32 bits */
-
-typedef struct {
-	unsigned long sig[_NSIG_WORDS];
-} sigset_t;
-
-#define __ARCH_HAS_SA_RESTORER
-
-#include <asm/sigcontext.h>
-#undef __HAVE_ARCH_SIG_BITOPS
-
-#endif /* _H8300_SIGNAL_H */
diff --git a/arch/h8300/include/asm/smp.h b/arch/h8300/include/asm/smp.h
deleted file mode 100644
index 9e9bd7e58922..000000000000
--- a/arch/h8300/include/asm/smp.h
+++ /dev/null
@@ -1 +0,0 @@
-/* nothing required here yet */
diff --git a/arch/h8300/include/asm/spinlock.h b/arch/h8300/include/asm/spinlock.h
deleted file mode 100644
index d5407fa173e4..000000000000
--- a/arch/h8300/include/asm/spinlock.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __H8300_SPINLOCK_H
-#define __H8300_SPINLOCK_H
-
-#error "H8/300 doesn't do SMP yet"
-
-#endif
diff --git a/arch/h8300/include/asm/string.h b/arch/h8300/include/asm/string.h
deleted file mode 100644
index ca5034897d87..000000000000
--- a/arch/h8300/include/asm/string.h
+++ /dev/null
@@ -1,44 +0,0 @@
-#ifndef _H8300_STRING_H_
-#define _H8300_STRING_H_
-
-#ifdef __KERNEL__ /* only set these up for kernel code */
-
-#include <asm/setup.h>
-#include <asm/page.h>
-
-#define __HAVE_ARCH_MEMSET
-extern void * memset(void * s, int c, size_t count);
-
-#define __HAVE_ARCH_MEMCPY
-extern void * memcpy(void *d, const void *s, size_t count);
-
-#else /* KERNEL */
-
-/*
- *	let user libraries deal with these,
- *	IMHO the kernel has no place defining these functions for user apps
- */
-
-#define __HAVE_ARCH_STRCPY 1
-#define __HAVE_ARCH_STRNCPY 1
-#define __HAVE_ARCH_STRCAT 1
-#define __HAVE_ARCH_STRNCAT 1
-#define __HAVE_ARCH_STRCMP 1
-#define __HAVE_ARCH_STRNCMP 1
-#define __HAVE_ARCH_STRNICMP 1
-#define __HAVE_ARCH_STRCHR 1
-#define __HAVE_ARCH_STRRCHR 1
-#define __HAVE_ARCH_STRSTR 1
-#define __HAVE_ARCH_STRLEN 1
-#define __HAVE_ARCH_STRNLEN 1
-#define __HAVE_ARCH_MEMSET 1
-#define __HAVE_ARCH_MEMCPY 1
-#define __HAVE_ARCH_MEMMOVE 1
-#define __HAVE_ARCH_MEMSCAN 1
-#define __HAVE_ARCH_MEMCMP 1
-#define __HAVE_ARCH_MEMCHR 1
-#define __HAVE_ARCH_STRTOK 1
-
-#endif /* KERNEL */
-
-#endif /* _M68K_STRING_H_ */
diff --git a/arch/h8300/include/asm/switch_to.h b/arch/h8300/include/asm/switch_to.h
deleted file mode 100644
index cdd8731ce487..000000000000
--- a/arch/h8300/include/asm/switch_to.h
+++ /dev/null
@@ -1,50 +0,0 @@
-#ifndef _H8300_SWITCH_TO_H
-#define _H8300_SWITCH_TO_H
-
-/*
- * switch_to(n) should switch tasks to task ptr, first checking that
- * ptr isn't the current task, in which case it does nothing.  This
- * also clears the TS-flag if the task we switched to has used the
- * math co-processor latest.
- */
-/*
- * switch_to() saves the extra registers, that are not saved
- * automatically by SAVE_SWITCH_STACK in resume(), ie. d0-d5 and
- * a0-a1. Some of these are used by schedule() and its predecessors
- * and so we might get see unexpected behaviors when a task returns
- * with unexpected register values.
- *
- * syscall stores these registers itself and none of them are used
- * by syscall after the function in the syscall has been called.
- *
- * Beware that resume now expects *next to be in d1 and the offset of
- * tss to be in a1. This saves a few instructions as we no longer have
- * to push them onto the stack and read them back right after.
- *
- * 02/17/96 - Jes Sorensen (jds@kom.auc.dk)
- *
- * Changed 96/09/19 by Andreas Schwab
- * pass prev in a0, next in a1, offset of tss in d1, and whether
- * the mm structures are shared in d2 (to avoid atc flushing).
- *
- * H8/300 Porting 2002/09/04 Yoshinori Sato
- */
-
-asmlinkage void resume(void);
-#define switch_to(prev,next,last) {                         \
-  void *_last;						    \
-  __asm__ __volatile__(					    \
-  			"mov.l	%1, er0\n\t"		    \
-			"mov.l	%2, er1\n\t"		    \
-                        "mov.l  %3, er2\n\t"                \
-			"jsr @_resume\n\t"                  \
-                        "mov.l  er2,%0\n\t"                 \
-		       : "=r" (_last)			    \
-		       : "r" (&(prev->thread)),		    \
-			 "r" (&(next->thread)),		    \
-                         "g" (prev)                         \
-		       : "cc", "er0", "er1", "er2", "er3"); \
-  (last) = _last; 					    \
-}
-
-#endif /* _H8300_SWITCH_TO_H */
diff --git a/arch/h8300/include/asm/target_time.h b/arch/h8300/include/asm/target_time.h
deleted file mode 100644
index 9f2a9aa1fe6f..000000000000
--- a/arch/h8300/include/asm/target_time.h
+++ /dev/null
@@ -1,4 +0,0 @@
-extern int platform_timer_setup(void (*timer_int)(int, void *, struct pt_regs *));
-extern void platform_timer_eoi(void);
-extern void platform_gettod(unsigned int *year, unsigned int *mon, unsigned int *day, 
-                            unsigned int *hour, unsigned int *min, unsigned int *sec);
diff --git a/arch/h8300/include/asm/termios.h b/arch/h8300/include/asm/termios.h
deleted file mode 100644
index 93a63df56247..000000000000
--- a/arch/h8300/include/asm/termios.h
+++ /dev/null
@@ -1,50 +0,0 @@
-#ifndef _H8300_TERMIOS_H
-#define _H8300_TERMIOS_H
-
-#include <uapi/asm/termios.h>
-
-/*	intr=^C		quit=^|		erase=del	kill=^U
-	eof=^D		vtime=\0	vmin=\1		sxtc=\0
-	start=^Q	stop=^S		susp=^Z		eol=\0
-	reprint=^R	discard=^U	werase=^W	lnext=^V
-	eol2=\0
-*/
-#define INIT_C_CC "\003\034\177\025\004\0\1\0\021\023\032\0\022\017\027\026\0"
-
-/*
- * Translate a "termio" structure into a "termios". Ugh.
- */
-#define user_termio_to_kernel_termios(termios, termio) \
-({ \
-	unsigned short tmp; \
-	get_user(tmp, &(termio)->c_iflag); \
-	(termios)->c_iflag = (0xffff0000 & ((termios)->c_iflag)) | tmp; \
-	get_user(tmp, &(termio)->c_oflag); \
-	(termios)->c_oflag = (0xffff0000 & ((termios)->c_oflag)) | tmp; \
-	get_user(tmp, &(termio)->c_cflag); \
-	(termios)->c_cflag = (0xffff0000 & ((termios)->c_cflag)) | tmp; \
-	get_user(tmp, &(termio)->c_lflag); \
-	(termios)->c_lflag = (0xffff0000 & ((termios)->c_lflag)) | tmp; \
-	get_user((termios)->c_line, &(termio)->c_line); \
-	copy_from_user((termios)->c_cc, (termio)->c_cc, NCC); \
-})
-
-/*
- * Translate a "termios" structure into a "termio". Ugh.
- */
-#define kernel_termios_to_user_termio(termio, termios) \
-({ \
-	put_user((termios)->c_iflag, &(termio)->c_iflag); \
-	put_user((termios)->c_oflag, &(termio)->c_oflag); \
-	put_user((termios)->c_cflag, &(termio)->c_cflag); \
-	put_user((termios)->c_lflag, &(termio)->c_lflag); \
-	put_user((termios)->c_line,  &(termio)->c_line); \
-	copy_to_user((termio)->c_cc, (termios)->c_cc, NCC); \
-})
-
-#define user_termios_to_kernel_termios(k, u) copy_from_user(k, u, sizeof(struct termios2))
-#define kernel_termios_to_user_termios(u, k) copy_to_user(u, k, sizeof(struct termios2))
-#define user_termios_to_kernel_termios_1(k, u) copy_from_user(k, u, sizeof(struct termios))
-#define kernel_termios_to_user_termios_1(u, k) copy_to_user(u, k, sizeof(struct termios))
-
-#endif /* _H8300_TERMIOS_H */
diff --git a/arch/h8300/include/asm/thread_info.h b/arch/h8300/include/asm/thread_info.h
deleted file mode 100644
index ec2f7777c65a..000000000000
--- a/arch/h8300/include/asm/thread_info.h
+++ /dev/null
@@ -1,103 +0,0 @@
-/* thread_info.h: h8300 low-level thread information
- * adapted from the i386 and PPC versions by Yoshinori Sato <ysato@users.sourceforge.jp>
- *
- * Copyright (C) 2002  David Howells (dhowells@redhat.com)
- * - Incorporating suggestions made by Linus Torvalds and Dave Miller
- */
-
-#ifndef _ASM_THREAD_INFO_H
-#define _ASM_THREAD_INFO_H
-
-#include <asm/page.h>
-
-#ifdef __KERNEL__
-
-#ifndef __ASSEMBLY__
-
-/*
- * low level task data.
- * If you change this, change the TI_* offsets below to match.
- */
-struct thread_info {
-	struct task_struct *task;		/* main task structure */
-	struct exec_domain *exec_domain;	/* execution domain */
-	unsigned long	   flags;		/* low level flags */
-	int		   cpu;			/* cpu we're on */
-	int		   preempt_count;	/* 0 => preemptable, <0 => BUG */
-	struct restart_block restart_block;
-};
-
-/*
- * macros/functions for gaining access to the thread information structure
- */
-#define INIT_THREAD_INFO(tsk)			\
-{						\
-	.task =		&tsk,			\
-	.exec_domain =	&default_exec_domain,	\
-	.flags =	0,			\
-	.cpu =		0,			\
-	.preempt_count = INIT_PREEMPT_COUNT,	\
-	.restart_block	= {			\
-		.fn = do_no_restart_syscall,	\
-	},					\
-}
-
-#define init_thread_info	(init_thread_union.thread_info)
-#define init_stack		(init_thread_union.stack)
-
-
-/*
- * Size of kernel stack for each process. This must be a power of 2...
- */
-#define THREAD_SIZE_ORDER	1
-#define THREAD_SIZE		8192	/* 2 pages */
-
-
-/* how to get the thread information struct from C */
-static inline struct thread_info *current_thread_info(void)
-{
-	struct thread_info *ti;
-	__asm__(
-		"mov.l	sp, %0 \n\t"
-		"and.l	%1, %0"
-		: "=&r"(ti)
-		: "i" (~(THREAD_SIZE-1))
-		);
-	return ti;
-}
-
-#endif /* __ASSEMBLY__ */
-
-/*
- * Offsets in thread_info structure, used in assembly code
- */
-#define TI_TASK		0
-#define TI_EXECDOMAIN	4
-#define TI_FLAGS	8
-#define TI_CPU		12
-#define TI_PRE_COUNT	16
-
-#define	PREEMPT_ACTIVE	0x4000000
-
-/*
- * thread information flag bit numbers
- */
-#define TIF_SYSCALL_TRACE	0	/* syscall trace active */
-#define TIF_SIGPENDING		1	/* signal pending */
-#define TIF_NEED_RESCHED	2	/* rescheduling necessary */
-#define TIF_MEMDIE		4	/* is terminating due to OOM killer */
-#define TIF_RESTORE_SIGMASK	5	/* restore signal mask in do_signal() */
-#define TIF_NOTIFY_RESUME	6	/* callback before returning to user */
-
-/* as above, but as bit values */
-#define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
-#define _TIF_SIGPENDING		(1<<TIF_SIGPENDING)
-#define _TIF_NEED_RESCHED	(1<<TIF_NEED_RESCHED)
-#define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
-
-#define _TIF_WORK_MASK		(_TIF_SIGPENDING | _TIF_NEED_RESCHED | \
-				 _TIF_NOTIFY_RESUME)
-
-#endif /* __KERNEL__ */
-
-#endif /* _ASM_THREAD_INFO_H */
diff --git a/arch/h8300/include/asm/timer.h b/arch/h8300/include/asm/timer.h
deleted file mode 100644
index def80464d38f..000000000000
--- a/arch/h8300/include/asm/timer.h
+++ /dev/null
@@ -1,25 +0,0 @@
-#ifndef __H8300_TIMER_H
-#define __H8300_TIMER_H
-
-void h8300_timer_tick(void);
-void h8300_timer_setup(void);
-void h8300_gettod(unsigned int *year, unsigned int *mon, unsigned int *day,
-		   unsigned int *hour, unsigned int *min, unsigned int *sec);
-
-#define TIMER_FREQ (CONFIG_CPU_CLOCK*10000) /* Timer input freq. */
-
-#define calc_param(cnt, div, rate, limit)			\
-do {								\
-	cnt = TIMER_FREQ / HZ;					\
-	for (div = 0; div < ARRAY_SIZE(divide_rate); div++) {	\
-		if (rate[div] == 0)				\
-			continue;				\
-		if ((cnt / rate[div]) > limit)			\
-			break;					\
-	}							\
-	if (div == ARRAY_SIZE(divide_rate))			\
-		panic("Timer counter overflow");		\
-	cnt /= divide_rate[div];				\
-} while(0)
-
-#endif
diff --git a/arch/h8300/include/asm/timex.h b/arch/h8300/include/asm/timex.h
deleted file mode 100644
index 23e67013439f..000000000000
--- a/arch/h8300/include/asm/timex.h
+++ /dev/null
@@ -1,19 +0,0 @@
-/*
- * linux/include/asm-h8300/timex.h
- *
- * H8/300 architecture timex specifications
- */
-#ifndef _ASM_H8300_TIMEX_H
-#define _ASM_H8300_TIMEX_H
-
-#define CLOCK_TICK_RATE (CONFIG_CPU_CLOCK*1000/8192) /* Timer input freq. */
-
-typedef unsigned long cycles_t;
-extern short h8300_timer_count;
-
-static inline cycles_t get_cycles(void)
-{
-	return 0;
-}
-
-#endif
diff --git a/arch/h8300/include/asm/tlb.h b/arch/h8300/include/asm/tlb.h
deleted file mode 100644
index 7f0743051ad5..000000000000
--- a/arch/h8300/include/asm/tlb.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef __H8300_TLB_H__
-#define __H8300_TLB_H__
-
-#define tlb_flush(tlb)	do { } while(0)
-
-#include <asm-generic/tlb.h>
-
-#endif
diff --git a/arch/h8300/include/asm/tlbflush.h b/arch/h8300/include/asm/tlbflush.h
deleted file mode 100644
index 41c148a9208e..000000000000
--- a/arch/h8300/include/asm/tlbflush.h
+++ /dev/null
@@ -1,55 +0,0 @@
-#ifndef _H8300_TLBFLUSH_H
-#define _H8300_TLBFLUSH_H
-
-/*
- * Copyright (C) 2000 Lineo, David McCullough <davidm@uclinux.org>
- * Copyright (C) 2000-2002, Greg Ungerer <gerg@snapgear.com>
- */
-
-#include <asm/setup.h>
-
-/*
- * flush all user-space atc entries.
- */
-static inline void __flush_tlb(void)
-{
-	BUG();
-}
-
-static inline void __flush_tlb_one(unsigned long addr)
-{
-	BUG();
-}
-
-#define flush_tlb() __flush_tlb()
-
-/*
- * flush all atc entries (both kernel and user-space entries).
- */
-static inline void flush_tlb_all(void)
-{
-	BUG();
-}
-
-static inline void flush_tlb_mm(struct mm_struct *mm)
-{
-	BUG();
-}
-
-static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr)
-{
-	BUG();
-}
-
-static inline void flush_tlb_range(struct mm_struct *mm,
-				   unsigned long start, unsigned long end)
-{
-	BUG();
-}
-
-static inline void flush_tlb_kernel_page(unsigned long addr)
-{
-	BUG();
-}
-
-#endif /* _H8300_TLBFLUSH_H */
diff --git a/arch/h8300/include/asm/topology.h b/arch/h8300/include/asm/topology.h
deleted file mode 100644
index fdc121924d4c..000000000000
--- a/arch/h8300/include/asm/topology.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ASM_H8300_TOPOLOGY_H
-#define _ASM_H8300_TOPOLOGY_H
-
-#include <asm-generic/topology.h>
-
-#endif /* _ASM_H8300_TOPOLOGY_H */
diff --git a/arch/h8300/include/asm/traps.h b/arch/h8300/include/asm/traps.h
deleted file mode 100644
index 41cf6be02f68..000000000000
--- a/arch/h8300/include/asm/traps.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- *  linux/include/asm-h8300/traps.h
- *
- *  Copyright (C) 2003 Yoshinori Sato <ysato@users.sourceforge.jp>
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file COPYING in the main directory of this archive
- * for more details.
- */
-
-#ifndef _H8300_TRAPS_H
-#define _H8300_TRAPS_H
-
-extern void system_call(void);
-extern void interrupt_entry(void);
-extern void trace_break(void);
-
-#define JMP_OP 0x5a000000
-#define JSR_OP 0x5e000000
-#define VECTOR(address) ((JMP_OP)|((unsigned long)address))
-#define REDIRECT(address) ((JSR_OP)|((unsigned long)address))
-
-#define TRACE_VEC 5
-
-#define TRAP0_VEC 8
-#define TRAP1_VEC 9
-#define TRAP2_VEC 10
-#define TRAP3_VEC 11
-
-#if defined(__H8300H__)
-#define NR_TRAPS 12
-#endif
-#if defined(__H8300S__)
-#define NR_TRAPS 16
-#endif
-
-#endif /* _H8300_TRAPS_H */
diff --git a/arch/h8300/include/asm/types.h b/arch/h8300/include/asm/types.h
deleted file mode 100644
index c012707f6037..000000000000
--- a/arch/h8300/include/asm/types.h
+++ /dev/null
@@ -1,9 +0,0 @@
-#ifndef _H8300_TYPES_H
-#define _H8300_TYPES_H
-
-#include <uapi/asm/types.h>
-
-
-#define BITS_PER_LONG 32
-
-#endif /* _H8300_TYPES_H */
diff --git a/arch/h8300/include/asm/uaccess.h b/arch/h8300/include/asm/uaccess.h
deleted file mode 100644
index 8725d1ad4272..000000000000
--- a/arch/h8300/include/asm/uaccess.h
+++ /dev/null
@@ -1,163 +0,0 @@
-#ifndef __H8300_UACCESS_H
-#define __H8300_UACCESS_H
-
-/*
- * User space memory access functions
- */
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-
-#include <asm/segment.h>
-
-#define VERIFY_READ	0
-#define VERIFY_WRITE	1
-
-/* We let the MMU do all checking */
-#define access_ok(type, addr, size) __access_ok((unsigned long)addr,size)
-static inline int __access_ok(unsigned long addr, unsigned long size)
-{
-#define	RANGE_CHECK_OK(addr, size, lower, upper) \
-	(((addr) >= (lower)) && (((addr) + (size)) < (upper)))
-
-	extern unsigned long _ramend;
-	return(RANGE_CHECK_OK(addr, size, 0L, (unsigned long)&_ramend));
-}
-
-/*
- * The exception table consists of pairs of addresses: the first is the
- * address of an instruction that is allowed to fault, and the second is
- * the address at which the program should continue.  No registers are
- * modified, so it is entirely up to the continuation code to figure out
- * what to do.
- *
- * All the routines below use bits of fixup code that are out of line
- * with the main instruction path.  This means when everything is well,
- * we don't even have to jump over them.  Further, they do not intrude
- * on our cache or tlb entries.
- */
-
-struct exception_table_entry
-{
-	unsigned long insn, fixup;
-};
-
-/* Returns 0 if exception not found and fixup otherwise.  */
-extern unsigned long search_exception_table(unsigned long);
-
-
-/*
- * These are the main single-value transfer routines.  They automatically
- * use the right size if we just have the right pointer type.
- */
-
-#define put_user(x, ptr)				\
-({							\
-    int __pu_err = 0;					\
-    typeof(*(ptr)) __pu_val = (x);			\
-    switch (sizeof (*(ptr))) {				\
-    case 1:						\
-    case 2:						\
-    case 4:						\
-	*(ptr) = (__pu_val);   	        		\
-	break;						\
-    case 8:						\
-	memcpy(ptr, &__pu_val, sizeof (*(ptr)));        \
-	break;						\
-    default:						\
-	__pu_err = __put_user_bad();			\
-	break;						\
-    }							\
-    __pu_err;						\
-})
-#define __put_user(x, ptr) put_user(x, ptr)
-
-extern int __put_user_bad(void);
-
-/*
- * Tell gcc we read from memory instead of writing: this is because
- * we do not write to any memory gcc knows about, so there are no
- * aliasing issues.
- */
-
-#define __ptr(x) ((unsigned long *)(x))
-
-/*
- * Tell gcc we read from memory instead of writing: this is because
- * we do not write to any memory gcc knows about, so there are no
- * aliasing issues.
- */
-
-#define get_user(x, ptr)					\
-({								\
-    int __gu_err = 0;						\
-    typeof(*(ptr)) __gu_val = *ptr;				\
-    switch (sizeof(*(ptr))) {					\
-    case 1:							\
-    case 2:							\
-    case 4:							\
-    case 8: 							\
-	break;							\
-    default:							\
-	__gu_err = __get_user_bad();				\
-	break;							\
-    }								\
-    (x) = __gu_val;						\
-    __gu_err;							\
-})
-#define __get_user(x, ptr) get_user(x, ptr)
-
-extern int __get_user_bad(void);
-
-#define copy_from_user(to, from, n)		(memcpy(to, from, n), 0)
-#define copy_to_user(to, from, n)		(memcpy(to, from, n), 0)
-
-#define __copy_from_user(to, from, n) copy_from_user(to, from, n)
-#define __copy_to_user(to, from, n) copy_to_user(to, from, n)
-#define __copy_to_user_inatomic __copy_to_user
-#define __copy_from_user_inatomic __copy_from_user
-
-#define copy_to_user_ret(to,from,n,retval) ({ if (copy_to_user(to,from,n)) return retval; })
-
-#define copy_from_user_ret(to,from,n,retval) ({ if (copy_from_user(to,from,n)) return retval; })
-
-/*
- * Copy a null terminated string from userspace.
- */
-
-static inline long
-strncpy_from_user(char *dst, const char *src, long count)
-{
-	char *tmp;
-	strncpy(dst, src, count);
-	for (tmp = dst; *tmp && count > 0; tmp++, count--)
-		;
-	return(tmp - dst); /* DAVIDM should we count a NUL ?  check getname */
-}
-
-/*
- * Return the size of a string (including the ending 0)
- *
- * Return 0 on exception, a value greater than N if too long
- */
-static inline long strnlen_user(const char *src, long n)
-{
-	return(strlen(src) + 1); /* DAVIDM make safer */
-}
-
-#define strlen_user(str) strnlen_user(str, 32767)
-
-/*
- * Zero Userspace
- */
-
-static inline unsigned long
-clear_user(void *to, unsigned long n)
-{
-	memset(to, 0, n);
-	return 0;
-}
-
-#define __clear_user	clear_user
-
-#endif /* _H8300_UACCESS_H */
diff --git a/arch/h8300/include/asm/ucontext.h b/arch/h8300/include/asm/ucontext.h
deleted file mode 100644
index 0bcf8f85fab9..000000000000
--- a/arch/h8300/include/asm/ucontext.h
+++ /dev/null
@@ -1,12 +0,0 @@
-#ifndef _H8300_UCONTEXT_H
-#define _H8300_UCONTEXT_H
-
-struct ucontext {
-	unsigned long	  uc_flags;
-	struct ucontext  *uc_link;
-	stack_t		  uc_stack;
-	struct sigcontext uc_mcontext;
-	sigset_t	  uc_sigmask;	/* mask last for extensibility */
-};
-
-#endif
diff --git a/arch/h8300/include/asm/unaligned.h b/arch/h8300/include/asm/unaligned.h
deleted file mode 100644
index b8d06c70c2da..000000000000
--- a/arch/h8300/include/asm/unaligned.h
+++ /dev/null
@@ -1,11 +0,0 @@
-#ifndef _ASM_H8300_UNALIGNED_H
-#define _ASM_H8300_UNALIGNED_H
-
-#include <linux/unaligned/be_memmove.h>
-#include <linux/unaligned/le_byteshift.h>
-#include <linux/unaligned/generic.h>
-
-#define get_unaligned	__get_unaligned_be
-#define put_unaligned	__put_unaligned_be
-
-#endif /* _ASM_H8300_UNALIGNED_H */
diff --git a/arch/h8300/include/asm/unistd.h b/arch/h8300/include/asm/unistd.h
deleted file mode 100644
index ab671ecf5196..000000000000
--- a/arch/h8300/include/asm/unistd.h
+++ /dev/null
@@ -1,36 +0,0 @@
-#ifndef _ASM_H8300_UNISTD_H_
-#define _ASM_H8300_UNISTD_H_
-
-#include <uapi/asm/unistd.h>
-
-
-#define NR_syscalls 321
-
-#define __ARCH_WANT_OLD_READDIR
-#define __ARCH_WANT_OLD_STAT
-#define __ARCH_WANT_STAT64
-#define __ARCH_WANT_SYS_ALARM
-#define __ARCH_WANT_SYS_GETHOSTNAME
-#define __ARCH_WANT_SYS_IPC
-#define __ARCH_WANT_SYS_PAUSE
-#define __ARCH_WANT_SYS_SGETMASK
-#define __ARCH_WANT_SYS_SIGNAL
-#define __ARCH_WANT_SYS_TIME
-#define __ARCH_WANT_SYS_UTIME
-#define __ARCH_WANT_SYS_WAITPID
-#define __ARCH_WANT_SYS_SOCKETCALL
-#define __ARCH_WANT_SYS_FADVISE64
-#define __ARCH_WANT_SYS_GETPGRP
-#define __ARCH_WANT_SYS_LLSEEK
-#define __ARCH_WANT_SYS_NICE
-#define __ARCH_WANT_SYS_OLD_GETRLIMIT
-#define __ARCH_WANT_SYS_OLD_MMAP
-#define __ARCH_WANT_SYS_OLD_SELECT
-#define __ARCH_WANT_SYS_OLDUMOUNT
-#define __ARCH_WANT_SYS_SIGPENDING
-#define __ARCH_WANT_SYS_SIGPROCMASK
-#define __ARCH_WANT_SYS_FORK
-#define __ARCH_WANT_SYS_VFORK
-#define __ARCH_WANT_SYS_CLONE
-
-#endif /* _ASM_H8300_UNISTD_H_ */
diff --git a/arch/h8300/include/asm/user.h b/arch/h8300/include/asm/user.h
deleted file mode 100644
index 14a9e18950f1..000000000000
--- a/arch/h8300/include/asm/user.h
+++ /dev/null
@@ -1,75 +0,0 @@
-#ifndef _H8300_USER_H
-#define _H8300_USER_H
-
-#include <asm/page.h>
-
-/* Core file format: The core file is written in such a way that gdb
-   can understand it and provide useful information to the user (under
-   linux we use the 'trad-core' bfd).  There are quite a number of
-   obstacles to being able to view the contents of the floating point
-   registers, and until these are solved you will not be able to view the
-   contents of them.  Actually, you can read in the core file and look at
-   the contents of the user struct to find out what the floating point
-   registers contain.
-   The actual file contents are as follows:
-   UPAGE: 1 page consisting of a user struct that tells gdb what is present
-   in the file.  Directly after this is a copy of the task_struct, which
-   is currently not used by gdb, but it may come in useful at some point.
-   All of the registers are stored as part of the upage.  The upage should
-   always be only one page.
-   DATA: The data area is stored.  We use current->end_text to
-   current->brk to pick up all of the user variables, plus any memory
-   that may have been malloced.  No attempt is made to determine if a page
-   is demand-zero or if a page is totally unused, we just cover the entire
-   range.  All of the addresses are rounded in such a way that an integral
-   number of pages is written.
-   STACK: We need the stack information in order to get a meaningful
-   backtrace.  We need to write the data from (esp) to
-   current->start_stack, so we round each of these off in order to be able
-   to write an integer number of pages.
-   The minimum core file size is 3 pages, or 12288 bytes.
-*/
-
-/* This is the old layout of "struct pt_regs" as of Linux 1.x, and
-   is still the layout used by user (the new pt_regs doesn't have
-   all registers). */
-struct user_regs_struct {
-	long er1,er2,er3,er4,er5,er6;
-	long er0;
-	long usp;
-	long orig_er0;
-	short ccr;
-	long pc;
-};
-
-	
-/* When the kernel dumps core, it starts by dumping the user struct -
-   this will be used by gdb to figure out where the data and stack segments
-   are within the file, and what virtual addresses to use. */
-struct user{
-/* We start with the registers, to mimic the way that "memory" is returned
-   from the ptrace(3,...) function.  */
-  struct user_regs_struct regs;	/* Where the registers are actually stored */
-/* ptrace does not yet supply these.  Someday.... */
-/* The rest of this junk is to help gdb figure out what goes where */
-  unsigned long int u_tsize;	/* Text segment size (pages). */
-  unsigned long int u_dsize;	/* Data segment size (pages). */
-  unsigned long int u_ssize;	/* Stack segment size (pages). */
-  unsigned long start_code;     /* Starting virtual address of text. */
-  unsigned long start_stack;	/* Starting virtual address of stack area.
-				   This is actually the bottom of the stack,
-				   the top of the stack is always found in the
-				   esp register.  */
-  long int signal;     		/* Signal that caused the core dump. */
-  int reserved;			/* No longer used */
-  unsigned long u_ar0;		/* Used by gdb to help find the values for */
-				/* the registers. */
-  unsigned long magic;		/* To uniquely identify a core file */
-  char u_comm[32];		/* User command that was responsible */
-};
-#define NBPG PAGE_SIZE
-#define UPAGES 1
-#define HOST_TEXT_START_ADDR (u.start_code)
-#define HOST_STACK_END_ADDR (u.start_stack + u.u_ssize * NBPG)
-
-#endif
diff --git a/arch/h8300/include/asm/virtconvert.h b/arch/h8300/include/asm/virtconvert.h
deleted file mode 100644
index 19cfd62b11c3..000000000000
--- a/arch/h8300/include/asm/virtconvert.h
+++ /dev/null
@@ -1,20 +0,0 @@
-#ifndef __H8300_VIRT_CONVERT__
-#define __H8300_VIRT_CONVERT__
-
-/*
- * Macros used for converting between virtual and physical mappings.
- */
-
-#ifdef __KERNEL__
-
-#include <asm/setup.h>
-#include <asm/page.h>
-
-#define phys_to_virt(vaddr)	((void *) (vaddr))
-#define virt_to_phys(vaddr)	((unsigned long) (vaddr))
-
-#define virt_to_bus virt_to_phys
-#define bus_to_virt phys_to_virt
-
-#endif
-#endif
diff --git a/arch/h8300/include/uapi/asm/Kbuild b/arch/h8300/include/uapi/asm/Kbuild
deleted file mode 100644
index 040178cdb3eb..000000000000
--- a/arch/h8300/include/uapi/asm/Kbuild
+++ /dev/null
@@ -1,34 +0,0 @@
-# UAPI Header export list
-include include/uapi/asm-generic/Kbuild.asm
-
-header-y += auxvec.h
-header-y += bitsperlong.h
-header-y += byteorder.h
-header-y += errno.h
-header-y += fcntl.h
-header-y += ioctl.h
-header-y += ioctls.h
-header-y += ipcbuf.h
-header-y += kvm_para.h
-header-y += mman.h
-header-y += msgbuf.h
-header-y += param.h
-header-y += poll.h
-header-y += posix_types.h
-header-y += ptrace.h
-header-y += resource.h
-header-y += sembuf.h
-header-y += setup.h
-header-y += shmbuf.h
-header-y += sigcontext.h
-header-y += siginfo.h
-header-y += signal.h
-header-y += socket.h
-header-y += sockios.h
-header-y += stat.h
-header-y += statfs.h
-header-y += swab.h
-header-y += termbits.h
-header-y += termios.h
-header-y += types.h
-header-y += unistd.h
diff --git a/arch/h8300/include/uapi/asm/auxvec.h b/arch/h8300/include/uapi/asm/auxvec.h
deleted file mode 100644
index 1d36fe38b088..000000000000
--- a/arch/h8300/include/uapi/asm/auxvec.h
+++ /dev/null
@@ -1,4 +0,0 @@
-#ifndef __ASMH8300_AUXVEC_H
-#define __ASMH8300_AUXVEC_H
-
-#endif
diff --git a/arch/h8300/include/uapi/asm/bitsperlong.h b/arch/h8300/include/uapi/asm/bitsperlong.h
deleted file mode 100644
index 6dc0bb0c13b2..000000000000
--- a/arch/h8300/include/uapi/asm/bitsperlong.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/bitsperlong.h>
diff --git a/arch/h8300/include/uapi/asm/byteorder.h b/arch/h8300/include/uapi/asm/byteorder.h
deleted file mode 100644
index 13539da99efd..000000000000
--- a/arch/h8300/include/uapi/asm/byteorder.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _H8300_BYTEORDER_H
-#define _H8300_BYTEORDER_H
-
-#include <linux/byteorder/big_endian.h>
-
-#endif /* _H8300_BYTEORDER_H */
diff --git a/arch/h8300/include/uapi/asm/errno.h b/arch/h8300/include/uapi/asm/errno.h
deleted file mode 100644
index 0c2f5641fdcc..000000000000
--- a/arch/h8300/include/uapi/asm/errno.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _H8300_ERRNO_H
-#define _H8300_ERRNO_H
-
-#include <asm-generic/errno.h>
-
-#endif /* _H8300_ERRNO_H */
diff --git a/arch/h8300/include/uapi/asm/fcntl.h b/arch/h8300/include/uapi/asm/fcntl.h
deleted file mode 100644
index 1952cb2e3b06..000000000000
--- a/arch/h8300/include/uapi/asm/fcntl.h
+++ /dev/null
@@ -1,11 +0,0 @@
-#ifndef _H8300_FCNTL_H
-#define _H8300_FCNTL_H
-
-#define O_DIRECTORY	040000	/* must be a directory */
-#define O_NOFOLLOW	0100000	/* don't follow links */
-#define O_DIRECT	0200000	/* direct disk access hint - currently ignored */
-#define O_LARGEFILE	0400000
-
-#include <asm-generic/fcntl.h>
-
-#endif /* _H8300_FCNTL_H */
diff --git a/arch/h8300/include/uapi/asm/ioctl.h b/arch/h8300/include/uapi/asm/ioctl.h
deleted file mode 100644
index b279fe06dfe5..000000000000
--- a/arch/h8300/include/uapi/asm/ioctl.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/ioctl.h>
diff --git a/arch/h8300/include/uapi/asm/ioctls.h b/arch/h8300/include/uapi/asm/ioctls.h
deleted file mode 100644
index 30eaed2facdb..000000000000
--- a/arch/h8300/include/uapi/asm/ioctls.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef __ARCH_H8300_IOCTLS_H__
-#define __ARCH_H8300_IOCTLS_H__
-
-#define FIOQSIZE	0x545E
-
-#include <asm-generic/ioctls.h>
-
-#endif /* __ARCH_H8300_IOCTLS_H__ */
diff --git a/arch/h8300/include/uapi/asm/ipcbuf.h b/arch/h8300/include/uapi/asm/ipcbuf.h
deleted file mode 100644
index 84c7e51cb6d0..000000000000
--- a/arch/h8300/include/uapi/asm/ipcbuf.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/ipcbuf.h>
diff --git a/arch/h8300/include/uapi/asm/kvm_para.h b/arch/h8300/include/uapi/asm/kvm_para.h
deleted file mode 100644
index 14fab8f0b957..000000000000
--- a/arch/h8300/include/uapi/asm/kvm_para.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/kvm_para.h>
diff --git a/arch/h8300/include/uapi/asm/mman.h b/arch/h8300/include/uapi/asm/mman.h
deleted file mode 100644
index 8eebf89f5ab1..000000000000
--- a/arch/h8300/include/uapi/asm/mman.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/mman.h>
diff --git a/arch/h8300/include/uapi/asm/msgbuf.h b/arch/h8300/include/uapi/asm/msgbuf.h
deleted file mode 100644
index 6b148cd09aa5..000000000000
--- a/arch/h8300/include/uapi/asm/msgbuf.h
+++ /dev/null
@@ -1,31 +0,0 @@
-#ifndef _H8300_MSGBUF_H
-#define _H8300_MSGBUF_H
-
-/* 
- * The msqid64_ds structure for H8/300 architecture.
- * Note extra padding because this structure is passed back and forth
- * between kernel and user space.
- *
- * Pad space is left for:
- * - 64-bit time_t to solve y2038 problem
- * - 2 miscellaneous 32-bit values
- */
-
-struct msqid64_ds {
-	struct ipc64_perm msg_perm;
-	__kernel_time_t msg_stime;	/* last msgsnd time */
-	unsigned long	__unused1;
-	__kernel_time_t msg_rtime;	/* last msgrcv time */
-	unsigned long	__unused2;
-	__kernel_time_t msg_ctime;	/* last change time */
-	unsigned long	__unused3;
-	unsigned long  msg_cbytes;	/* current number of bytes on queue */
-	unsigned long  msg_qnum;	/* number of messages in queue */
-	unsigned long  msg_qbytes;	/* max number of bytes on queue */
-	__kernel_pid_t msg_lspid;	/* pid of last msgsnd */
-	__kernel_pid_t msg_lrpid;	/* last receive pid */
-	unsigned long  __unused4;
-	unsigned long  __unused5;
-};
-
-#endif /* _H8300_MSGBUF_H */
diff --git a/arch/h8300/include/uapi/asm/param.h b/arch/h8300/include/uapi/asm/param.h
deleted file mode 100644
index 3dd18ae15f03..000000000000
--- a/arch/h8300/include/uapi/asm/param.h
+++ /dev/null
@@ -1,16 +0,0 @@
-#ifndef _UAPI_H8300_PARAM_H
-#define _UAPI_H8300_PARAM_H
-
-#ifndef __KERNEL__
-#define HZ		100
-#endif
-
-#define EXEC_PAGESIZE	4096
-
-#ifndef NOGROUP
-#define NOGROUP		(-1)
-#endif
-
-#define MAXHOSTNAMELEN	64	/* max length of hostname */
-
-#endif /* _UAPI_H8300_PARAM_H */
diff --git a/arch/h8300/include/uapi/asm/poll.h b/arch/h8300/include/uapi/asm/poll.h
deleted file mode 100644
index f61540c22d94..000000000000
--- a/arch/h8300/include/uapi/asm/poll.h
+++ /dev/null
@@ -1,11 +0,0 @@
-#ifndef __H8300_POLL_H
-#define __H8300_POLL_H
-
-#define POLLWRNORM	POLLOUT
-#define POLLWRBAND	256
-
-#include <asm-generic/poll.h>
-
-#undef POLLREMOVE
-
-#endif
diff --git a/arch/h8300/include/uapi/asm/posix_types.h b/arch/h8300/include/uapi/asm/posix_types.h
deleted file mode 100644
index 91e62ba4c7b0..000000000000
--- a/arch/h8300/include/uapi/asm/posix_types.h
+++ /dev/null
@@ -1,26 +0,0 @@
-#ifndef __ARCH_H8300_POSIX_TYPES_H
-#define __ARCH_H8300_POSIX_TYPES_H
-
-/*
- * This file is generally used by user-level software, so you need to
- * be a little careful about namespace pollution etc.  Also, we cannot
- * assume GCC is being used.
- */
-
-typedef unsigned short	__kernel_mode_t;
-#define __kernel_mode_t __kernel_mode_t
-
-typedef unsigned short	__kernel_ipc_pid_t;
-#define __kernel_ipc_pid_t __kernel_ipc_pid_t
-
-typedef unsigned short	__kernel_uid_t;
-typedef unsigned short	__kernel_gid_t;
-#define __kernel_uid_t __kernel_uid_t
-
-typedef unsigned short	__kernel_old_uid_t;
-typedef unsigned short	__kernel_old_gid_t;
-#define __kernel_old_uid_t __kernel_old_uid_t
-
-#include <asm-generic/posix_types.h>
-
-#endif
diff --git a/arch/h8300/include/uapi/asm/ptrace.h b/arch/h8300/include/uapi/asm/ptrace.h
deleted file mode 100644
index ef39ec5977b6..000000000000
--- a/arch/h8300/include/uapi/asm/ptrace.h
+++ /dev/null
@@ -1,44 +0,0 @@
-#ifndef _UAPI_H8300_PTRACE_H
-#define _UAPI_H8300_PTRACE_H
-
-#ifndef __ASSEMBLY__
-
-#define PT_ER1	   0
-#define PT_ER2	   1
-#define PT_ER3	   2
-#define PT_ER4	   3
-#define PT_ER5	   4
-#define PT_ER6	   5
-#define PT_ER0	   6
-#define PT_ORIG_ER0	   7
-#define PT_CCR	   8
-#define PT_PC	   9
-#define PT_USP	   10
-#define PT_EXR     12
-
-/* this struct defines the way the registers are stored on the
-   stack during a system call. */
-
-struct pt_regs {
-	long     retpc;
-	long     er4;
-	long     er5;
-	long     er6;
-	long     er3;
-	long     er2;
-	long     er1;
-	long     orig_er0;
-	unsigned short ccr;
-	long     er0;
-	long     vector;
-#if defined(CONFIG_CPU_H8S)
-	unsigned short exr;
-#endif
-	unsigned long  pc;
-} __attribute__((aligned(2),packed));
-
-#define PTRACE_GETREGS            12
-#define PTRACE_SETREGS            13
-
-#endif /* __ASSEMBLY__ */
-#endif /* _UAPI_H8300_PTRACE_H */
diff --git a/arch/h8300/include/uapi/asm/resource.h b/arch/h8300/include/uapi/asm/resource.h
deleted file mode 100644
index 46c5f4391607..000000000000
--- a/arch/h8300/include/uapi/asm/resource.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _H8300_RESOURCE_H
-#define _H8300_RESOURCE_H
-
-#include <asm-generic/resource.h>
-
-#endif /* _H8300_RESOURCE_H */
diff --git a/arch/h8300/include/uapi/asm/sembuf.h b/arch/h8300/include/uapi/asm/sembuf.h
deleted file mode 100644
index e04a3ec0cb92..000000000000
--- a/arch/h8300/include/uapi/asm/sembuf.h
+++ /dev/null
@@ -1,25 +0,0 @@
-#ifndef _H8300_SEMBUF_H
-#define _H8300_SEMBUF_H
-
-/* 
- * The semid64_ds structure for m68k architecture.
- * Note extra padding because this structure is passed back and forth
- * between kernel and user space.
- *
- * Pad space is left for:
- * - 64-bit time_t to solve y2038 problem
- * - 2 miscellaneous 32-bit values
- */
-
-struct semid64_ds {
-	struct ipc64_perm sem_perm;		/* permissions .. see ipc.h */
-	__kernel_time_t	sem_otime;		/* last semop time */
-	unsigned long	__unused1;
-	__kernel_time_t	sem_ctime;		/* last change time */
-	unsigned long	__unused2;
-	unsigned long	sem_nsems;		/* no. of semaphores in array */
-	unsigned long	__unused3;
-	unsigned long	__unused4;
-};
-
-#endif /* _H8300_SEMBUF_H */
diff --git a/arch/h8300/include/uapi/asm/setup.h b/arch/h8300/include/uapi/asm/setup.h
deleted file mode 100644
index e2c600e96733..000000000000
--- a/arch/h8300/include/uapi/asm/setup.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __H8300_SETUP_H
-#define __H8300_SETUP_H
-
-#define COMMAND_LINE_SIZE	512
-
-#endif
diff --git a/arch/h8300/include/uapi/asm/shmbuf.h b/arch/h8300/include/uapi/asm/shmbuf.h
deleted file mode 100644
index 64e77993a7a9..000000000000
--- a/arch/h8300/include/uapi/asm/shmbuf.h
+++ /dev/null
@@ -1,42 +0,0 @@
-#ifndef _H8300_SHMBUF_H
-#define _H8300_SHMBUF_H
-
-/* 
- * The shmid64_ds structure for m68k architecture.
- * Note extra padding because this structure is passed back and forth
- * between kernel and user space.
- *
- * Pad space is left for:
- * - 64-bit time_t to solve y2038 problem
- * - 2 miscellaneous 32-bit values
- */
-
-struct shmid64_ds {
-	struct ipc64_perm	shm_perm;	/* operation perms */
-	size_t			shm_segsz;	/* size of segment (bytes) */
-	__kernel_time_t		shm_atime;	/* last attach time */
-	unsigned long		__unused1;
-	__kernel_time_t		shm_dtime;	/* last detach time */
-	unsigned long		__unused2;
-	__kernel_time_t		shm_ctime;	/* last change time */
-	unsigned long		__unused3;
-	__kernel_pid_t		shm_cpid;	/* pid of creator */
-	__kernel_pid_t		shm_lpid;	/* pid of last operator */
-	unsigned long		shm_nattch;	/* no. of current attaches */
-	unsigned long		__unused4;
-	unsigned long		__unused5;
-};
-
-struct shminfo64 {
-	unsigned long	shmmax;
-	unsigned long	shmmin;
-	unsigned long	shmmni;
-	unsigned long	shmseg;
-	unsigned long	shmall;
-	unsigned long	__unused1;
-	unsigned long	__unused2;
-	unsigned long	__unused3;
-	unsigned long	__unused4;
-};
-
-#endif /* _H8300_SHMBUF_H */
diff --git a/arch/h8300/include/uapi/asm/sigcontext.h b/arch/h8300/include/uapi/asm/sigcontext.h
deleted file mode 100644
index e4b81505f8f8..000000000000
--- a/arch/h8300/include/uapi/asm/sigcontext.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef _ASM_H8300_SIGCONTEXT_H
-#define _ASM_H8300_SIGCONTEXT_H
-
-struct sigcontext {
-	unsigned long  sc_mask; 	/* old sigmask */
-	unsigned long  sc_usp;		/* old user stack pointer */
-	unsigned long  sc_er0;
-	unsigned long  sc_er1;
-	unsigned long  sc_er2;
-	unsigned long  sc_er3;
-	unsigned long  sc_er4;
-	unsigned long  sc_er5;
-	unsigned long  sc_er6;
-	unsigned short sc_ccr;
-	unsigned long  sc_pc;
-};
-
-#endif
diff --git a/arch/h8300/include/uapi/asm/siginfo.h b/arch/h8300/include/uapi/asm/siginfo.h
deleted file mode 100644
index bc8fbea931a5..000000000000
--- a/arch/h8300/include/uapi/asm/siginfo.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _H8300_SIGINFO_H
-#define _H8300_SIGINFO_H
-
-#include <asm-generic/siginfo.h>
-
-#endif
diff --git a/arch/h8300/include/uapi/asm/signal.h b/arch/h8300/include/uapi/asm/signal.h
deleted file mode 100644
index af3a6c37fee6..000000000000
--- a/arch/h8300/include/uapi/asm/signal.h
+++ /dev/null
@@ -1,115 +0,0 @@
-#ifndef _UAPI_H8300_SIGNAL_H
-#define _UAPI_H8300_SIGNAL_H
-
-#include <linux/types.h>
-
-/* Avoid too many header ordering problems.  */
-struct siginfo;
-
-#ifndef __KERNEL__
-/* Here we must cater to libcs that poke about in kernel headers.  */
-
-#define NSIG		32
-typedef unsigned long sigset_t;
-
-#endif /* __KERNEL__ */
-
-#define SIGHUP		 1
-#define SIGINT		 2
-#define SIGQUIT		 3
-#define SIGILL		 4
-#define SIGTRAP		 5
-#define SIGABRT		 6
-#define SIGIOT		 6
-#define SIGBUS		 7
-#define SIGFPE		 8
-#define SIGKILL		 9
-#define SIGUSR1		10
-#define SIGSEGV		11
-#define SIGUSR2		12
-#define SIGPIPE		13
-#define SIGALRM		14
-#define SIGTERM		15
-#define SIGSTKFLT	16
-#define SIGCHLD		17
-#define SIGCONT		18
-#define SIGSTOP		19
-#define SIGTSTP		20
-#define SIGTTIN		21
-#define SIGTTOU		22
-#define SIGURG		23
-#define SIGXCPU		24
-#define SIGXFSZ		25
-#define SIGVTALRM	26
-#define SIGPROF		27
-#define SIGWINCH	28
-#define SIGIO		29
-#define SIGPOLL		SIGIO
-/*
-#define SIGLOST		29
-*/
-#define SIGPWR		30
-#define SIGSYS		31
-#define	SIGUNUSED	31
-
-/* These should not be considered constants from userland.  */
-#define SIGRTMIN	32
-#define SIGRTMAX	_NSIG
-
-/*
- * SA_FLAGS values:
- *
- * SA_ONSTACK indicates that a registered stack_t will be used.
- * SA_RESTART flag to get restarting signals (which were the default long ago)
- * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop.
- * SA_RESETHAND clears the handler when the signal is delivered.
- * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies.
- * SA_NODEFER prevents the current signal from being masked in the handler.
- *
- * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single
- * Unix names RESETHAND and NODEFER respectively.
- */
-#define SA_NOCLDSTOP	0x00000001
-#define SA_NOCLDWAIT	0x00000002 /* not supported yet */
-#define SA_SIGINFO	0x00000004
-#define SA_ONSTACK	0x08000000
-#define SA_RESTART	0x10000000
-#define SA_NODEFER	0x40000000
-#define SA_RESETHAND	0x80000000
-
-#define SA_NOMASK	SA_NODEFER
-#define SA_ONESHOT	SA_RESETHAND
-
-#define SA_RESTORER	0x04000000
-
-#define MINSIGSTKSZ	2048
-#define SIGSTKSZ	8192
-
-#include <asm-generic/signal-defs.h>
-
-#ifndef __KERNEL__
-/* Here we must cater to libcs that poke about in kernel headers.  */
-
-struct sigaction {
-	union {
-	  __sighandler_t _sa_handler;
-	  void (*_sa_sigaction)(int, struct siginfo *, void *);
-	} _u;
-	sigset_t sa_mask;
-	unsigned long sa_flags;
-	void (*sa_restorer)(void);
-};
-
-#define sa_handler	_u._sa_handler
-#define sa_sigaction	_u._sa_sigaction
-
-#endif /* __KERNEL__ */
-
-typedef struct sigaltstack {
-	void *ss_sp;
-	int ss_flags;
-	size_t ss_size;
-} stack_t;
-
-
-#endif /* _UAPI_H8300_SIGNAL_H */
diff --git a/arch/h8300/include/uapi/asm/socket.h b/arch/h8300/include/uapi/asm/socket.h
deleted file mode 100644
index 9490758c5e2b..000000000000
--- a/arch/h8300/include/uapi/asm/socket.h
+++ /dev/null
@@ -1,79 +0,0 @@
-#ifndef _ASM_SOCKET_H
-#define _ASM_SOCKET_H
-
-#include <asm/sockios.h>
-
-/* For setsockoptions(2) */
-#define SOL_SOCKET	1
-
-#define SO_DEBUG	1
-#define SO_REUSEADDR	2
-#define SO_TYPE		3
-#define SO_ERROR	4
-#define SO_DONTROUTE	5
-#define SO_BROADCAST	6
-#define SO_SNDBUF	7
-#define SO_RCVBUF	8
-#define SO_SNDBUFFORCE	32
-#define SO_RCVBUFFORCE	33
-#define SO_KEEPALIVE	9
-#define SO_OOBINLINE	10
-#define SO_NO_CHECK	11
-#define SO_PRIORITY	12
-#define SO_LINGER	13
-#define SO_BSDCOMPAT	14
-#define SO_REUSEPORT	15
-#define SO_PASSCRED	16
-#define SO_PEERCRED	17
-#define SO_RCVLOWAT	18
-#define SO_SNDLOWAT	19
-#define SO_RCVTIMEO	20
-#define SO_SNDTIMEO	21
-
-/* Security levels - as per NRL IPv6 - don't actually do anything */
-#define SO_SECURITY_AUTHENTICATION		22
-#define SO_SECURITY_ENCRYPTION_TRANSPORT	23
-#define SO_SECURITY_ENCRYPTION_NETWORK		24
-
-#define SO_BINDTODEVICE	25
-
-/* Socket filtering */
-#define SO_ATTACH_FILTER        26
-#define SO_DETACH_FILTER        27
-#define SO_GET_FILTER		SO_ATTACH_FILTER
-
-#define SO_PEERNAME             28
-#define SO_TIMESTAMP		29
-#define SCM_TIMESTAMP		SO_TIMESTAMP
-
-#define SO_ACCEPTCONN		30
-
-#define SO_PEERSEC		31
-#define SO_PASSSEC		34
-#define SO_TIMESTAMPNS		35
-#define SCM_TIMESTAMPNS		SO_TIMESTAMPNS
-
-#define SO_MARK			36
-
-#define SO_TIMESTAMPING		37
-#define SCM_TIMESTAMPING	SO_TIMESTAMPING
-
-#define SO_PROTOCOL		38
-#define SO_DOMAIN		39
-
-#define SO_RXQ_OVFL             40
-
-#define SO_WIFI_STATUS		41
-#define SCM_WIFI_STATUS		SO_WIFI_STATUS
-#define SO_PEEK_OFF		42
-
-/* Instruct lower device to use last 4-bytes of skb data as FCS */
-#define SO_NOFCS		43
-
-#define SO_LOCK_FILTER		44
-
-#define SO_SELECT_ERR_QUEUE	45
-
-#define SO_BUSY_POLL		46
-
-#endif /* _ASM_SOCKET_H */
diff --git a/arch/h8300/include/uapi/asm/sockios.h b/arch/h8300/include/uapi/asm/sockios.h
deleted file mode 100644
index e9c7ec810c23..000000000000
--- a/arch/h8300/include/uapi/asm/sockios.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef __ARCH_H8300_SOCKIOS__
-#define __ARCH_H8300_SOCKIOS__
-
-/* Socket-level I/O control calls. */
-#define FIOSETOWN 	0x8901
-#define SIOCSPGRP	0x8902
-#define FIOGETOWN	0x8903
-#define SIOCGPGRP	0x8904
-#define SIOCATMARK	0x8905
-#define SIOCGSTAMP	0x8906		/* Get stamp (timeval) */
-#define SIOCGSTAMPNS	0x8907		/* Get stamp (timespec) */
-
-#endif /* __ARCH_H8300_SOCKIOS__ */
diff --git a/arch/h8300/include/uapi/asm/stat.h b/arch/h8300/include/uapi/asm/stat.h
deleted file mode 100644
index 62c3cc24dfe6..000000000000
--- a/arch/h8300/include/uapi/asm/stat.h
+++ /dev/null
@@ -1,78 +0,0 @@
-#ifndef _H8300_STAT_H
-#define _H8300_STAT_H
-
-struct __old_kernel_stat {
-	unsigned short st_dev;
-	unsigned short st_ino;
-	unsigned short st_mode;
-	unsigned short st_nlink;
-	unsigned short st_uid;
-	unsigned short st_gid;
-	unsigned short st_rdev;
-	unsigned long  st_size;
-	unsigned long  st_atime;
-	unsigned long  st_mtime;
-	unsigned long  st_ctime;
-};
-
-struct stat {
-	unsigned short st_dev;
-	unsigned short __pad1;
-	unsigned long st_ino;
-	unsigned short st_mode;
-	unsigned short st_nlink;
-	unsigned short st_uid;
-	unsigned short st_gid;
-	unsigned short st_rdev;
-	unsigned short __pad2;
-	unsigned long  st_size;
-	unsigned long  st_blksize;
-	unsigned long  st_blocks;
-	unsigned long  st_atime;
-	unsigned long  __unused1;
-	unsigned long  st_mtime;
-	unsigned long  __unused2;
-	unsigned long  st_ctime;
-	unsigned long  __unused3;
-	unsigned long  __unused4;
-	unsigned long  __unused5;
-};
-
-/* This matches struct stat64 in glibc2.1, hence the absolutely
- * insane amounts of padding around dev_t's.
- */
-struct stat64 {
-	unsigned long long	st_dev;
-	unsigned char	__pad1[2];
-
-#define STAT64_HAS_BROKEN_ST_INO	1
-	unsigned long	__st_ino;
-
-	unsigned int	st_mode;
-	unsigned int	st_nlink;
-
-	unsigned long	st_uid;
-	unsigned long	st_gid;
-
-	unsigned long long	st_rdev;
-	unsigned char	__pad3[2];
-
-	long long	st_size;
-	unsigned long	st_blksize;
-
-	unsigned long	__pad4;		/* future possible st_blocks high bits */
-	unsigned long	st_blocks;	/* Number 512-byte blocks allocated. */
-
-	unsigned long	st_atime;
-	unsigned long	st_atime_nsec;
-
-	unsigned long	st_mtime;
-	unsigned long	st_mtime_nsec;
-
-	unsigned long	st_ctime;
-	unsigned long	st_ctime_nsec;
-
-	unsigned long long	st_ino;
-};
-
-#endif /* _H8300_STAT_H */
diff --git a/arch/h8300/include/uapi/asm/statfs.h b/arch/h8300/include/uapi/asm/statfs.h
deleted file mode 100644
index b96efa712aac..000000000000
--- a/arch/h8300/include/uapi/asm/statfs.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _H8300_STATFS_H
-#define _H8300_STATFS_H
-
-#include <asm-generic/statfs.h>
-
-#endif /* _H8300_STATFS_H */
diff --git a/arch/h8300/include/uapi/asm/swab.h b/arch/h8300/include/uapi/asm/swab.h
deleted file mode 100644
index 39abbf52807d..000000000000
--- a/arch/h8300/include/uapi/asm/swab.h
+++ /dev/null
@@ -1,10 +0,0 @@
-#ifndef _H8300_SWAB_H
-#define _H8300_SWAB_H
-
-#include <linux/types.h>
-
-#if defined(__GNUC__) && !defined(__STRICT_ANSI__) || defined(__KERNEL__)
-#  define __SWAB_64_THRU_32__
-#endif
-
-#endif /* _H8300_SWAB_H */
diff --git a/arch/h8300/include/uapi/asm/termbits.h b/arch/h8300/include/uapi/asm/termbits.h
deleted file mode 100644
index 3287a6244d74..000000000000
--- a/arch/h8300/include/uapi/asm/termbits.h
+++ /dev/null
@@ -1,201 +0,0 @@
-#ifndef __ARCH_H8300_TERMBITS_H__
-#define __ARCH_H8300_TERMBITS_H__
-
-#include <linux/posix_types.h>
-
-typedef unsigned char	cc_t;
-typedef unsigned int	speed_t;
-typedef unsigned int	tcflag_t;
-
-#define NCCS 19
-struct termios {
-	tcflag_t c_iflag;		/* input mode flags */
-	tcflag_t c_oflag;		/* output mode flags */
-	tcflag_t c_cflag;		/* control mode flags */
-	tcflag_t c_lflag;		/* local mode flags */
-	cc_t c_line;			/* line discipline */
-	cc_t c_cc[NCCS];		/* control characters */
-};
-
-struct termios2 {
-	tcflag_t c_iflag;		/* input mode flags */
-	tcflag_t c_oflag;		/* output mode flags */
-	tcflag_t c_cflag;		/* control mode flags */
-	tcflag_t c_lflag;		/* local mode flags */
-	cc_t c_line;			/* line discipline */
-	cc_t c_cc[NCCS];		/* control characters */
-	speed_t c_ispeed;		/* input speed */
-	speed_t c_ospeed;		/* output speed */
-};
-
-struct ktermios {
-	tcflag_t c_iflag;		/* input mode flags */
-	tcflag_t c_oflag;		/* output mode flags */
-	tcflag_t c_cflag;		/* control mode flags */
-	tcflag_t c_lflag;		/* local mode flags */
-	cc_t c_line;			/* line discipline */
-	cc_t c_cc[NCCS];		/* control characters */
-	speed_t c_ispeed;		/* input speed */
-	speed_t c_ospeed;		/* output speed */
-};
-
-/* c_cc characters */
-#define VINTR 0
-#define VQUIT 1
-#define VERASE 2
-#define VKILL 3
-#define VEOF 4
-#define VTIME 5
-#define VMIN 6
-#define VSWTC 7
-#define VSTART 8
-#define VSTOP 9
-#define VSUSP 10
-#define VEOL 11
-#define VREPRINT 12
-#define VDISCARD 13
-#define VWERASE 14
-#define VLNEXT 15
-#define VEOL2 16
-
-
-/* c_iflag bits */
-#define IGNBRK	0000001
-#define BRKINT	0000002
-#define IGNPAR	0000004
-#define PARMRK	0000010
-#define INPCK	0000020
-#define ISTRIP	0000040
-#define INLCR	0000100
-#define IGNCR	0000200
-#define ICRNL	0000400
-#define IUCLC	0001000
-#define IXON	0002000
-#define IXANY	0004000
-#define IXOFF	0010000
-#define IMAXBEL	0020000
-#define IUTF8	0040000
-
-/* c_oflag bits */
-#define OPOST	0000001
-#define OLCUC	0000002
-#define ONLCR	0000004
-#define OCRNL	0000010
-#define ONOCR	0000020
-#define ONLRET	0000040
-#define OFILL	0000100
-#define OFDEL	0000200
-#define NLDLY	0000400
-#define   NL0	0000000
-#define   NL1	0000400
-#define CRDLY	0003000
-#define   CR0	0000000
-#define   CR1	0001000
-#define   CR2	0002000
-#define   CR3	0003000
-#define TABDLY	0014000
-#define   TAB0	0000000
-#define   TAB1	0004000
-#define   TAB2	0010000
-#define   TAB3	0014000
-#define   XTABS	0014000
-#define BSDLY	0020000
-#define   BS0	0000000
-#define   BS1	0020000
-#define VTDLY	0040000
-#define   VT0	0000000
-#define   VT1	0040000
-#define FFDLY	0100000
-#define   FF0	0000000
-#define   FF1	0100000
-
-/* c_cflag bit meaning */
-#define CBAUD	0010017
-#define  B0	0000000		/* hang up */
-#define  B50	0000001
-#define  B75	0000002
-#define  B110	0000003
-#define  B134	0000004
-#define  B150	0000005
-#define  B200	0000006
-#define  B300	0000007
-#define  B600	0000010
-#define  B1200	0000011
-#define  B1800	0000012
-#define  B2400	0000013
-#define  B4800	0000014
-#define  B9600	0000015
-#define  B19200	0000016
-#define  B38400	0000017
-#define EXTA B19200
-#define EXTB B38400
-#define CSIZE	0000060
-#define   CS5	0000000
-#define   CS6	0000020
-#define   CS7	0000040
-#define   CS8	0000060
-#define CSTOPB	0000100
-#define CREAD	0000200
-#define PARENB	0000400
-#define PARODD	0001000
-#define HUPCL	0002000
-#define CLOCAL	0004000
-#define CBAUDEX 0010000
-#define    BOTHER 0010000
-#define    B57600 0010001
-#define   B115200 0010002
-#define   B230400 0010003
-#define   B460800 0010004
-#define   B500000 0010005
-#define   B576000 0010006
-#define   B921600 0010007
-#define  B1000000 0010010
-#define  B1152000 0010011
-#define  B1500000 0010012
-#define  B2000000 0010013
-#define  B2500000 0010014
-#define  B3000000 0010015
-#define  B3500000 0010016
-#define  B4000000 0010017
-#define CIBAUD	  002003600000		/* input baud rate */
-#define CMSPAR	  010000000000		/* mark or space (stick) parity */
-#define CRTSCTS	  020000000000		/* flow control */
-
-#define IBSHIFT	  16			/* shift from CBAUD to CIBAUD */
-
-/* c_lflag bits */
-#define ISIG	0000001
-#define ICANON	0000002
-#define XCASE	0000004
-#define ECHO	0000010
-#define ECHOE	0000020
-#define ECHOK	0000040
-#define ECHONL	0000100
-#define NOFLSH	0000200
-#define TOSTOP	0000400
-#define ECHOCTL	0001000
-#define ECHOPRT	0002000
-#define ECHOKE	0004000
-#define FLUSHO	0010000
-#define PENDIN	0040000
-#define IEXTEN	0100000
-#define EXTPROC	0200000
-
-
-/* tcflow() and TCXONC use these */
-#define	TCOOFF		0
-#define	TCOON		1
-#define	TCIOFF		2
-#define	TCION		3
-
-/* tcflush() and TCFLSH use these */
-#define	TCIFLUSH	0
-#define	TCOFLUSH	1
-#define	TCIOFLUSH	2
-
-/* tcsetattr uses these */
-#define	TCSANOW		0
-#define	TCSADRAIN	1
-#define	TCSAFLUSH	2
-
-#endif /* __ARCH_H8300_TERMBITS_H__ */
diff --git a/arch/h8300/include/uapi/asm/termios.h b/arch/h8300/include/uapi/asm/termios.h
deleted file mode 100644
index 5a67d7e38843..000000000000
--- a/arch/h8300/include/uapi/asm/termios.h
+++ /dev/null
@@ -1,44 +0,0 @@
-#ifndef _UAPI_H8300_TERMIOS_H
-#define _UAPI_H8300_TERMIOS_H
-
-#include <asm/termbits.h>
-#include <asm/ioctls.h>
- 
-struct winsize {
-	unsigned short ws_row;
-	unsigned short ws_col;
-	unsigned short ws_xpixel;
-	unsigned short ws_ypixel;
-};
-
-#define NCC 8
-struct termio {
-	unsigned short c_iflag;		/* input mode flags */
-	unsigned short c_oflag;		/* output mode flags */
-	unsigned short c_cflag;		/* control mode flags */
-	unsigned short c_lflag;		/* local mode flags */
-	unsigned char c_line;		/* line discipline */
-	unsigned char c_cc[NCC];	/* control characters */
-};
-
-
-/* modem lines */
-#define TIOCM_LE	0x001
-#define TIOCM_DTR	0x002
-#define TIOCM_RTS	0x004
-#define TIOCM_ST	0x008
-#define TIOCM_SR	0x010
-#define TIOCM_CTS	0x020
-#define TIOCM_CAR	0x040
-#define TIOCM_RNG	0x080
-#define TIOCM_DSR	0x100
-#define TIOCM_CD	TIOCM_CAR
-#define TIOCM_RI	TIOCM_RNG
-#define TIOCM_OUT1	0x2000
-#define TIOCM_OUT2	0x4000
-#define TIOCM_LOOP	0x8000
-
-/* ioctl (fd, TIOCSERGETLSR, &result) where result may be as below */
-
-
-#endif /* _UAPI_H8300_TERMIOS_H */
diff --git a/arch/h8300/include/uapi/asm/types.h b/arch/h8300/include/uapi/asm/types.h
deleted file mode 100644
index 9ec9d4c5ac4d..000000000000
--- a/arch/h8300/include/uapi/asm/types.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/int-ll64.h>
diff --git a/arch/h8300/include/uapi/asm/unistd.h b/arch/h8300/include/uapi/asm/unistd.h
deleted file mode 100644
index 8cb5d429f840..000000000000
--- a/arch/h8300/include/uapi/asm/unistd.h
+++ /dev/null
@@ -1,330 +0,0 @@
-#ifndef _UAPI_ASM_H8300_UNISTD_H_
-#define _UAPI_ASM_H8300_UNISTD_H_
-
-/*
- * This file contains the system call numbers.
- */
-
-#define __NR_restart_syscall      0
-#define __NR_exit		  1
-#define __NR_fork		  2
-#define __NR_read		  3
-#define __NR_write		  4
-#define __NR_open		  5
-#define __NR_close		  6
-#define __NR_waitpid		  7
-#define __NR_creat		  8
-#define __NR_link		  9
-#define __NR_unlink		 10
-#define __NR_execve		 11
-#define __NR_chdir		 12
-#define __NR_time		 13
-#define __NR_mknod		 14
-#define __NR_chmod		 15
-#define __NR_lchown		 16
-#define __NR_break		 17
-#define __NR_oldstat		 18
-#define __NR_lseek		 19
-#define __NR_getpid		 20
-#define __NR_mount		 21
-#define __NR_umount		 22
-#define __NR_setuid		 23
-#define __NR_getuid		 24
-#define __NR_stime		 25
-#define __NR_ptrace		 26
-#define __NR_alarm		 27
-#define __NR_oldfstat		 28
-#define __NR_pause		 29
-#define __NR_utime		 30
-#define __NR_stty		 31
-#define __NR_gtty		 32
-#define __NR_access		 33
-#define __NR_nice		 34
-#define __NR_ftime		 35
-#define __NR_sync		 36
-#define __NR_kill		 37
-#define __NR_rename		 38
-#define __NR_mkdir		 39
-#define __NR_rmdir		 40
-#define __NR_dup		 41
-#define __NR_pipe		 42
-#define __NR_times		 43
-#define __NR_prof		 44
-#define __NR_brk		 45
-#define __NR_setgid		 46
-#define __NR_getgid		 47
-#define __NR_signal		 48
-#define __NR_geteuid		 49
-#define __NR_getegid		 50
-#define __NR_acct		 51
-#define __NR_umount2		 52
-#define __NR_lock		 53
-#define __NR_ioctl		 54
-#define __NR_fcntl		 55
-#define __NR_mpx		 56
-#define __NR_setpgid		 57
-#define __NR_ulimit		 58
-#define __NR_oldolduname	 59
-#define __NR_umask		 60
-#define __NR_chroot		 61
-#define __NR_ustat		 62
-#define __NR_dup2		 63
-#define __NR_getppid		 64
-#define __NR_getpgrp		 65
-#define __NR_setsid		 66
-#define __NR_sigaction		 67
-#define __NR_sgetmask		 68
-#define __NR_ssetmask		 69
-#define __NR_setreuid		 70
-#define __NR_setregid		 71
-#define __NR_sigsuspend		 72
-#define __NR_sigpending		 73
-#define __NR_sethostname	 74
-#define __NR_setrlimit		 75
-#define __NR_getrlimit		 76
-#define __NR_getrusage		 77
-#define __NR_gettimeofday	 78
-#define __NR_settimeofday	 79
-#define __NR_getgroups		 80
-#define __NR_setgroups		 81
-#define __NR_select		 82
-#define __NR_symlink		 83
-#define __NR_oldlstat		 84
-#define __NR_readlink		 85
-#define __NR_uselib		 86
-#define __NR_swapon		 87
-#define __NR_reboot		 88
-#define __NR_readdir		 89
-#define __NR_mmap		 90
-#define __NR_munmap		 91
-#define __NR_truncate		 92
-#define __NR_ftruncate		 93
-#define __NR_fchmod		 94
-#define __NR_fchown		 95
-#define __NR_getpriority	 96
-#define __NR_setpriority	 97
-#define __NR_profil		 98
-#define __NR_statfs		 99
-#define __NR_fstatfs		100
-#define __NR_ioperm		101
-#define __NR_socketcall		102
-#define __NR_syslog		103
-#define __NR_setitimer		104
-#define __NR_getitimer		105
-#define __NR_stat		106
-#define __NR_lstat		107
-#define __NR_fstat		108
-#define __NR_olduname		109
-#define __NR_iopl		110
-#define __NR_vhangup		111
-#define __NR_idle		112
-#define __NR_vm86old		113
-#define __NR_wait4		114
-#define __NR_swapoff		115
-#define __NR_sysinfo		116
-#define __NR_ipc		117
-#define __NR_fsync		118
-#define __NR_sigreturn		119
-#define __NR_clone		120
-#define __NR_setdomainname	121
-#define __NR_uname		122
-#define __NR_modify_ldt		123
-#define __NR_adjtimex		124
-#define __NR_mprotect		125
-#define __NR_sigprocmask	126
-#define __NR_create_module	127
-#define __NR_init_module	128
-#define __NR_delete_module	129
-#define __NR_get_kernel_syms	130
-#define __NR_quotactl		131
-#define __NR_getpgid		132
-#define __NR_fchdir		133
-#define __NR_bdflush		134
-#define __NR_sysfs		135
-#define __NR_personality	136
-#define __NR_afs_syscall	137 /* Syscall for Andrew File System */
-#define __NR_setfsuid		138
-#define __NR_setfsgid		139
-#define __NR__llseek		140
-#define __NR_getdents		141
-#define __NR__newselect		142
-#define __NR_flock		143
-#define __NR_msync		144
-#define __NR_readv		145
-#define __NR_writev		146
-#define __NR_getsid		147
-#define __NR_fdatasync		148
-#define __NR__sysctl		149
-#define __NR_mlock		150
-#define __NR_munlock		151
-#define __NR_mlockall		152
-#define __NR_munlockall		153
-#define __NR_sched_setparam		154
-#define __NR_sched_getparam		155
-#define __NR_sched_setscheduler		156
-#define __NR_sched_getscheduler		157
-#define __NR_sched_yield		158
-#define __NR_sched_get_priority_max	159
-#define __NR_sched_get_priority_min	160
-#define __NR_sched_rr_get_interval	161
-#define __NR_nanosleep		162
-#define __NR_mremap		163
-#define __NR_setresuid		164
-#define __NR_getresuid		165
-#define __NR_vm86		166
-#define __NR_query_module	167
-#define __NR_poll		168
-#define __NR_nfsservctl		169
-#define __NR_setresgid		170
-#define __NR_getresgid		171
-#define __NR_prctl		172
-#define __NR_rt_sigreturn	173
-#define __NR_rt_sigaction	174
-#define __NR_rt_sigprocmask	175
-#define __NR_rt_sigpending	176
-#define __NR_rt_sigtimedwait	177
-#define __NR_rt_sigqueueinfo	178
-#define __NR_rt_sigsuspend	179
-#define __NR_pread64		180
-#define __NR_pwrite64		181
-#define __NR_chown		182
-#define __NR_getcwd		183
-#define __NR_capget		184
-#define __NR_capset		185
-#define __NR_sigaltstack	186
-#define __NR_sendfile		187
-#define __NR_getpmsg		188	/* some people actually want streams */
-#define __NR_putpmsg		189	/* some people actually want streams */
-#define __NR_vfork		190
-#define __NR_ugetrlimit		191
-#define __NR_mmap2		192
-#define __NR_truncate64		193
-#define __NR_ftruncate64	194
-#define __NR_stat64		195
-#define __NR_lstat64		196
-#define __NR_fstat64		197
-#define __NR_lchown32		198
-#define __NR_getuid32		199
-#define __NR_getgid32		200
-#define __NR_geteuid32		201
-#define __NR_getegid32		202
-#define __NR_setreuid32		203
-#define __NR_setregid32		204
-#define __NR_getgroups32	205
-#define __NR_setgroups32	206
-#define __NR_fchown32		207
-#define __NR_setresuid32	208
-#define __NR_getresuid32	209
-#define __NR_setresgid32	210
-#define __NR_getresgid32	211
-#define __NR_chown32		212
-#define __NR_setuid32		213
-#define __NR_setgid32		214
-#define __NR_setfsuid32		215
-#define __NR_setfsgid32		216
-#define __NR_pivot_root		217
-#define __NR_mincore		218
-#define __NR_madvise		219
-#define __NR_madvise1		219
-#define __NR_getdents64		220
-#define __NR_fcntl64		221
-/* 223 is unused */
-#define __NR_gettid		224
-#define __NR_readahead		225
-#define __NR_setxattr		226
-#define __NR_lsetxattr		227
-#define __NR_fsetxattr		228
-#define __NR_getxattr		229
-#define __NR_lgetxattr		230
-#define __NR_fgetxattr		231
-#define __NR_listxattr		232
-#define __NR_llistxattr		233
-#define __NR_flistxattr		234
-#define __NR_removexattr	235
-#define __NR_lremovexattr	236
-#define __NR_fremovexattr	237
-#define __NR_tkill		238
-#define __NR_sendfile64		239
-#define __NR_futex		240
-#define __NR_sched_setaffinity	241
-#define __NR_sched_getaffinity	242
-#define __NR_set_thread_area	243
-#define __NR_get_thread_area	244
-#define __NR_io_setup		245
-#define __NR_io_destroy		246
-#define __NR_io_getevents	247
-#define __NR_io_submit		248
-#define __NR_io_cancel		249
-#define __NR_fadvise64		250
-/* 251 is available for reuse (was briefly sys_set_zone_reclaim) */
-#define __NR_exit_group		252
-#define __NR_lookup_dcookie	253
-#define __NR_epoll_create	254
-#define __NR_epoll_ctl		255
-#define __NR_epoll_wait		256
-#define __NR_remap_file_pages	257
-#define __NR_set_tid_address	258
-#define __NR_timer_create	259
-#define __NR_timer_settime	(__NR_timer_create+1)
-#define __NR_timer_gettime	(__NR_timer_create+2)
-#define __NR_timer_getoverrun	(__NR_timer_create+3)
-#define __NR_timer_delete	(__NR_timer_create+4)
-#define __NR_clock_settime	(__NR_timer_create+5)
-#define __NR_clock_gettime	(__NR_timer_create+6)
-#define __NR_clock_getres	(__NR_timer_create+7)
-#define __NR_clock_nanosleep	(__NR_timer_create+8)
-#define __NR_statfs64		268
-#define __NR_fstatfs64		269
-#define __NR_tgkill		270
-#define __NR_utimes		271
-#define __NR_fadvise64_64	272
-#define __NR_vserver		273
-#define __NR_mbind		274
-#define __NR_get_mempolicy	275
-#define __NR_set_mempolicy	276
-#define __NR_mq_open 		277
-#define __NR_mq_unlink		(__NR_mq_open+1)
-#define __NR_mq_timedsend	(__NR_mq_open+2)
-#define __NR_mq_timedreceive	(__NR_mq_open+3)
-#define __NR_mq_notify		(__NR_mq_open+4)
-#define __NR_mq_getsetattr	(__NR_mq_open+5)
-#define __NR_kexec_load		283
-#define __NR_waitid		284
-/* #define __NR_sys_setaltroot	285 */
-#define __NR_add_key		286
-#define __NR_request_key	287
-#define __NR_keyctl		288
-#define __NR_ioprio_set		289
-#define __NR_ioprio_get		290
-#define __NR_inotify_init	291
-#define __NR_inotify_add_watch	292
-#define __NR_inotify_rm_watch	293
-#define __NR_migrate_pages	294
-#define __NR_openat		295
-#define __NR_mkdirat		296
-#define __NR_mknodat		297
-#define __NR_fchownat		298
-#define __NR_futimesat		299
-#define __NR_fstatat64		300
-#define __NR_unlinkat		301
-#define __NR_renameat		302
-#define __NR_linkat		303
-#define __NR_symlinkat		304
-#define __NR_readlinkat		305
-#define __NR_fchmodat		306
-#define __NR_faccessat		307
-#define __NR_pselect6		308
-#define __NR_ppoll		309
-#define __NR_unshare		310
-#define __NR_set_robust_list	311
-#define __NR_get_robust_list	312
-#define __NR_splice		313
-#define __NR_sync_file_range	314
-#define __NR_tee		315
-#define __NR_vmsplice		316
-#define __NR_move_pages		317
-#define __NR_getcpu		318
-#define __NR_epoll_pwait	319
-#define __NR_setns		320
-
-#endif /* _UAPI_ASM_H8300_UNISTD_H_ */
diff --git a/arch/h8300/kernel/Makefile b/arch/h8300/kernel/Makefile
deleted file mode 100644
index 1cc57f872d34..000000000000
--- a/arch/h8300/kernel/Makefile
+++ /dev/null
@@ -1,12 +0,0 @@
-#
-# Makefile for the linux kernel.
-#
-
-extra-y := vmlinux.lds
-
-obj-y := process.o traps.o ptrace.o irq.o \
-	 sys_h8300.o time.o signal.o \
-         setup.o gpio.o syscalls.o \
-	 entry.o timer/
-
-obj-$(CONFIG_MODULES) += module.o h8300_ksyms.o 
diff --git a/arch/h8300/kernel/asm-offsets.c b/arch/h8300/kernel/asm-offsets.c
deleted file mode 100644
index fd961e0bd741..000000000000
--- a/arch/h8300/kernel/asm-offsets.c
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * This program is used to generate definitions needed by
- * assembly language modules.
- *
- * We use the technique used in the OSF Mach kernel code:
- * generate asm statements containing #defines,
- * compile this file to assembler, and then extract the
- * #defines from the assembly-language output.
- */
-
-#include <linux/stddef.h>
-#include <linux/sched.h>
-#include <linux/kernel_stat.h>
-#include <linux/ptrace.h>
-#include <linux/hardirq.h>
-#include <linux/kbuild.h>
-#include <asm/bootinfo.h>
-#include <asm/irq.h>
-#include <asm/ptrace.h>
-
-int main(void)
-{
-	/* offsets into the task struct */
-	DEFINE(TASK_STATE, offsetof(struct task_struct, state));
-	DEFINE(TASK_FLAGS, offsetof(struct task_struct, flags));
-	DEFINE(TASK_PTRACE, offsetof(struct task_struct, ptrace));
-	DEFINE(TASK_BLOCKED, offsetof(struct task_struct, blocked));
-	DEFINE(TASK_THREAD, offsetof(struct task_struct, thread));
-	DEFINE(TASK_THREAD_INFO, offsetof(struct task_struct, stack));
-	DEFINE(TASK_MM, offsetof(struct task_struct, mm));
-	DEFINE(TASK_ACTIVE_MM, offsetof(struct task_struct, active_mm));
-
-	/* offsets into the irq_cpustat_t struct */
-	DEFINE(CPUSTAT_SOFTIRQ_PENDING, offsetof(irq_cpustat_t, __softirq_pending));
-
-	/* offsets into the thread struct */
-	DEFINE(THREAD_KSP, offsetof(struct thread_struct, ksp));
-	DEFINE(THREAD_USP, offsetof(struct thread_struct, usp));
-	DEFINE(THREAD_CCR, offsetof(struct thread_struct, ccr));
-
-	/* offsets into the pt_regs struct */
-	DEFINE(LER0,  offsetof(struct pt_regs, er0)      - sizeof(long));
-	DEFINE(LER1,  offsetof(struct pt_regs, er1)      - sizeof(long));
-	DEFINE(LER2,  offsetof(struct pt_regs, er2)      - sizeof(long));
-	DEFINE(LER3,  offsetof(struct pt_regs, er3)      - sizeof(long));
-	DEFINE(LER4,  offsetof(struct pt_regs, er4)      - sizeof(long));
-	DEFINE(LER5,  offsetof(struct pt_regs, er5)      - sizeof(long));
-	DEFINE(LER6,  offsetof(struct pt_regs, er6)      - sizeof(long));
-	DEFINE(LORIG, offsetof(struct pt_regs, orig_er0) - sizeof(long));
-	DEFINE(LCCR,  offsetof(struct pt_regs, ccr)      - sizeof(long));
-	DEFINE(LVEC,  offsetof(struct pt_regs, vector)   - sizeof(long));
-#if defined(__H8300S__)
-	DEFINE(LEXR,  offsetof(struct pt_regs, exr)      - sizeof(long));
-#endif
-	DEFINE(LRET,  offsetof(struct pt_regs, pc)       - sizeof(long));
-
-	DEFINE(PT_PTRACED, PT_PTRACED);
-
-	return 0;
-}
diff --git a/arch/h8300/kernel/entry.S b/arch/h8300/kernel/entry.S
deleted file mode 100644
index 94bd30f11df6..000000000000
--- a/arch/h8300/kernel/entry.S
+++ /dev/null
@@ -1,402 +0,0 @@
-/* -*- mode: asm -*-
- *
- *  linux/arch/h8300/platform/h8300h/entry.S
- *
- *  Yoshinori Sato <ysato@users.sourceforge.jp>
- *  David McCullough <davidm@snapgear.com>
- *
- */
-
-/*
- *  entry.S
- *  include exception/interrupt gateway
- *          system call entry
- */
-
-#include <linux/sys.h>
-#include <asm/unistd.h>
-#include <asm/setup.h>
-#include <asm/segment.h>
-#include <asm/linkage.h>
-#include <asm/asm-offsets.h>
-#include <asm/thread_info.h>
-#include <asm/errno.h>
-
-#if defined(CONFIG_CPU_H8300H)
-#define USERRET 8
-INTERRUPTS = 64
-	.h8300h
-	.macro	SHLL2 reg
-	shll.l	\reg
-	shll.l	\reg
-	.endm
-	.macro	SHLR2 reg
-	shlr.l	\reg
-	shlr.l	\reg
-	.endm
-	.macro	SAVEREGS
-	mov.l	er0,@-sp
-	mov.l	er1,@-sp
-	mov.l	er2,@-sp
-	mov.l	er3,@-sp
-	.endm
-	.macro	RESTOREREGS
-	mov.l	@sp+,er3
-	mov.l	@sp+,er2
-	.endm
-	.macro	SAVEEXR
-	.endm
-	.macro	RESTOREEXR
-	.endm
-#endif
-#if defined(CONFIG_CPU_H8S)
-#define USERRET 10
-#define USEREXR 8
-INTERRUPTS = 128
-	.h8300s
-	.macro	SHLL2 reg
-	shll.l	#2,\reg
-	.endm
-	.macro	SHLR2 reg
-	shlr.l	#2,\reg
-	.endm
-	.macro	SAVEREGS
-	stm.l	er0-er3,@-sp
-	.endm
-	.macro	RESTOREREGS
-	ldm.l	@sp+,er2-er3
-	.endm
-	.macro	SAVEEXR
-	mov.w	@(USEREXR:16,er0),r1
-	mov.w	r1,@(LEXR-LER3:16,sp)		/* copy EXR */
-	.endm
-	.macro	RESTOREEXR
-	mov.w	@(LEXR-LER1:16,sp),r1		/* restore EXR */
-	mov.b	r1l,r1h
-	mov.w	r1,@(USEREXR:16,er0)
-	.endm
-#endif
-
-
-/* CPU context save/restore macros. */
-
-	.macro	SAVE_ALL
-	mov.l	er0,@-sp
-	stc	ccr,r0l				/* check kernel mode */
-	btst	#4,r0l
-	bne	5f
-
-	/* user mode */
-	mov.l	sp,@_sw_usp
-	mov.l	@sp,er0				/* restore saved er0 */
-	orc	#0x10,ccr			/* switch kernel stack */
-	mov.l	@_sw_ksp,sp
-	sub.l	#(LRET-LORIG),sp		/* allocate LORIG - LRET */
-	SAVEREGS
-	mov.l   @_sw_usp,er0
-	mov.l   @(USERRET:16,er0),er1           /* copy the RET addr */
-	mov.l   er1,@(LRET-LER3:16,sp)
-	SAVEEXR
-
-	mov.l	@(LORIG-LER3:16,sp),er0
-	mov.l	er0,@(LER0-LER3:16,sp)		/* copy ER0 */
-	mov.w	e1,r1				/* e1 highbyte = ccr */
-	and	#0xef,r1h			/* mask mode? flag */
-	bra	6f
-5:
-	/* kernel mode */
-	mov.l	@sp,er0				/* restore saved er0 */
-	subs	#2,sp				/* set dummy ccr */
-	SAVEREGS
-	mov.w	@(LRET-LER3:16,sp),r1		/* copy old ccr */
-6:
-	mov.b	r1h,r1l
-	mov.b	#0,r1h
-	mov.w	r1,@(LCCR-LER3:16,sp)		/* set ccr */
-	mov.l	er6,@-sp			/* syscall arg #6 */
-	mov.l	er5,@-sp			/* syscall arg #5 */
-	mov.l	er4,@-sp			/* syscall arg #4 */
-	.endm					/* r1 = ccr */
-
-	.macro	RESTORE_ALL
-	mov.l	@sp+,er4
-	mov.l	@sp+,er5
-	mov.l	@sp+,er6
-	RESTOREREGS
-	mov.w	@(LCCR-LER1:16,sp),r0		/* check kernel mode */
-	btst	#4,r0l
-	bne	7f
-
-	orc	#0x80,ccr
-	mov.l	@_sw_usp,er0
-	mov.l	@(LER0-LER1:16,sp),er1		/* restore ER0 */
-	mov.l	er1,@er0
-	RESTOREEXR
-	mov.w	@(LCCR-LER1:16,sp),r1		/* restore the RET addr */
-	mov.b	r1l,r1h
-	mov.b	@(LRET+1-LER1:16,sp),r1l
-	mov.w	r1,e1
-	mov.w	@(LRET+2-LER1:16,sp),r1
-	mov.l	er1,@(USERRET:16,er0)
-
-	mov.l	@sp+,er1
-	add.l	#(LRET-LER1),sp			/* remove LORIG - LRET */
-	mov.l	sp,@_sw_ksp
-	andc	#0xef,ccr			/* switch to user mode */
-	mov.l	er0,sp
-	bra	8f
-7:
-	mov.l	@sp+,er1
-	adds	#4,sp
-	adds	#2,sp
-8:
-	mov.l	@sp+,er0
-	adds	#4,sp				/* remove the sw created LVEC */
-	rte
-	.endm
-
-.globl _system_call
-.globl _ret_from_exception
-.globl _ret_from_fork
-.globl _ret_from_kernel_thread
-.globl _ret_from_interrupt
-.globl _interrupt_redirect_table
-.globl _sw_ksp,_sw_usp
-.globl _resume
-.globl _interrupt_entry
-.globl _trace_break
-
-#if defined(CONFIG_ROMKERNEL)
-	.section .int_redirect,"ax"
-_interrupt_redirect_table:
-#if defined(CONFIG_CPU_H8300H)
-	.rept	7
-	.long	0
-	.endr
-#endif
-#if defined(CONFIG_CPU_H8S)
-	.rept	5
-	.long	0
-	.endr
-	jmp	@_trace_break
-	.long	0
-#endif
-
-	jsr	@_interrupt_entry		/* NMI */
-	jmp	@_system_call			/* TRAPA #0 (System call) */
-	.long	0
-	.long	0
-	jmp	@_trace_break			/* TRAPA #3 (breakpoint) */
-	.rept	INTERRUPTS-12
-	jsr	@_interrupt_entry
-	.endr
-#endif
-#if defined(CONFIG_RAMKERNEL)
-.globl _interrupt_redirect_table
-	.section .bss
-_interrupt_redirect_table:
-	.space	4
-#endif
-
-	.section .text
-	.align	2
-_interrupt_entry:
-	SAVE_ALL
-	mov.l	sp,er0
-	add.l	#LVEC,er0
-	btst	#4,r1l
-	bne	1f
-	/* user LVEC */
-	mov.l	@_sw_usp,er0
-	adds	#4,er0
-1:
-	mov.l	@er0,er0			/* LVEC address */
-#if defined(CONFIG_ROMKERNEL)
-	sub.l	#_interrupt_redirect_table,er0
-#endif
-#if defined(CONFIG_RAMKERNEL)
-	mov.l	@_interrupt_redirect_table,er1
-	sub.l	er1,er0
-#endif
-	SHLR2	er0
-	dec.l	#1,er0
-	mov.l	sp,er1
-	subs	#4,er1				/* adjust ret_pc */
-	jsr	@_do_IRQ
-	jmp	@_ret_from_interrupt
-
-_system_call:
-	subs	#4,sp				/* dummy LVEC */
-	SAVE_ALL
-	andc	#0x7f,ccr
-	mov.l	er0,er4
-
-	/* save top of frame */
-	mov.l	sp,er0
-	jsr	@_set_esp0
-	mov.l	sp,er2
-	and.w	#0xe000,r2
-	mov.b	@((TI_FLAGS+3-(TIF_SYSCALL_TRACE >> 3)):16,er2),r2l
-	btst	#(TIF_SYSCALL_TRACE & 7),r2l
-	beq	1f
-	jsr	@_do_syscall_trace
-1:
-	cmp.l	#NR_syscalls,er4
-	bcc	badsys
-	SHLL2	er4
-	mov.l	#_sys_call_table,er0
-	add.l	er4,er0
-	mov.l	@er0,er4
-	beq	_ret_from_exception:16
-	mov.l	@(LER1:16,sp),er0
-	mov.l	@(LER2:16,sp),er1
-	mov.l	@(LER3:16,sp),er2
-	jsr	@er4
-	mov.l	er0,@(LER0:16,sp)		/* save the return value */
-	mov.l	sp,er2
-	and.w	#0xe000,r2
-	mov.b	@((TI_FLAGS+3-(TIF_SYSCALL_TRACE >> 3)):16,er2),r2l
-	btst	#(TIF_SYSCALL_TRACE & 7),r2l
-	beq	2f
-	jsr	@_do_syscall_trace
-2:
-#if defined(CONFIG_SYSCALL_PRINT)
-	jsr	@_syscall_print
-#endif
-	orc	#0x80,ccr
-	bra	resume_userspace
-
-badsys:
-	mov.l	#-ENOSYS,er0
-	mov.l	er0,@(LER0:16,sp)
-	bra	resume_userspace
-
-#if !defined(CONFIG_PREEMPT)
-#define resume_kernel restore_all
-#endif
-
-_ret_from_exception:
-#if defined(CONFIG_PREEMPT)
-	orc	#0x80,ccr
-#endif
-_ret_from_interrupt:
-	mov.b	@(LCCR+1:16,sp),r0l
-	btst	#4,r0l
-	bne	resume_kernel:8		/* return from kernel */
-resume_userspace:
-	andc	#0x7f,ccr
-	mov.l	sp,er4
-	and.w	#0xe000,r4		/* er4 <- current thread info */
-	mov.l	@(TI_FLAGS:16,er4),er1
-	and.l	#_TIF_WORK_MASK,er1
-	beq	restore_all:8
-work_pending:
-	btst	#TIF_NEED_RESCHED,r1l
-	bne	work_resched:8
-	/* work notifysig */
-	mov.l	sp,er0
-	subs	#4,er0			/* er0: pt_regs */
-	jsr	@_do_notify_resume
-	bra	restore_all:8
-work_resched:
-	mov.l	sp,er0
-	jsr	@_set_esp0
-	jsr	@_schedule
-	bra	resume_userspace:8
-restore_all:
-	RESTORE_ALL			/* Does RTE */
-
-#if defined(CONFIG_PREEMPT)
-resume_kernel:
-	mov.l	@(TI_PRE_COUNT:16,er4),er0
-	bne	restore_all:8
-need_resched:
-	mov.l	@(TI_FLAGS:16,er4),er0
-	btst	#TIF_NEED_RESCHED,r0l
-	beq	restore_all:8
-	mov.b	@(LCCR+1:16,sp),r0l	/* Interrupt Enabled? */
-	bmi	restore_all:8
-	mov.l	#PREEMPT_ACTIVE,er0
-	mov.l	er0,@(TI_PRE_COUNT:16,er4)
-	andc	#0x7f,ccr
-	mov.l	sp,er0
-	jsr	@_set_esp0
-	jsr	@_schedule
-	orc	#0x80,ccr
-	bra	need_resched:8
-#endif
-
-_ret_from_fork:
-	mov.l	er2,er0
-	jsr	@_schedule_tail
-	jmp	@_ret_from_exception
-
-_ret_from_kernel_thread:
-	mov.l	er2,er0
-	jsr	@_schedule_tail
-	mov.l	@(LER4:16,sp),er0
-	mov.l	@(LER5:16,sp),er1
-	jsr	@er1
-	jmp	@_ret_from_exception
-
-_resume:
-	/*
-	 * Beware - when entering resume, offset of tss is in d1,
-	 * prev (the current task) is in a0, next (the new task)
-	 * is in a1 and d2.b is non-zero if the mm structure is
-	 * shared between the tasks, so don't change these
-	 * registers until their contents are no longer needed.
-	 */
-
-	/* save sr */
-	sub.w	r3,r3
-	stc	ccr,r3l
-	mov.w	r3,@(THREAD_CCR+2:16,er0)
-
-	/* disable interrupts */
-	orc	#0x80,ccr
-	mov.l	@_sw_usp,er3
-	mov.l	er3,@(THREAD_USP:16,er0)
-	mov.l	sp,@(THREAD_KSP:16,er0)
-
-	/* Skip address space switching if they are the same. */
-	/* FIXME: what did we hack out of here, this does nothing! */
-
-	mov.l	@(THREAD_USP:16,er1),er0
-	mov.l	er0,@_sw_usp
-	mov.l	@(THREAD_KSP:16,er1),sp
-
-	/* restore status register */
-	mov.w	@(THREAD_CCR+2:16,er1),r3
-
-	ldc	r3l,ccr
-	rts
-
-_trace_break:
-	subs	#4,sp
-	SAVE_ALL
-	sub.l	er1,er1
-	dec.l	#1,er1
-	mov.l	er1,@(LORIG,sp)
-	mov.l	sp,er0
-	jsr	@_set_esp0
-	mov.l	@_sw_usp,er0
-	mov.l	@er0,er1
-	mov.w	@(-2:16,er1),r2
-	cmp.w	#0x5730,r2
-	beq	1f
-	subs	#2,er1
-	mov.l	er1,@er0
-1:
-	and.w	#0xff,e1
-	mov.l	er1,er0
-	jsr	@_trace_trap
-	jmp	@_ret_from_exception
-
-	.section	.bss
-_sw_ksp:
-	.space	4
-_sw_usp:
-	.space	4
-
-	.end
diff --git a/arch/h8300/kernel/gpio.c b/arch/h8300/kernel/gpio.c
deleted file mode 100644
index 084bfd0c107e..000000000000
--- a/arch/h8300/kernel/gpio.c
+++ /dev/null
@@ -1,178 +0,0 @@
-/*
- *  linux/arch/h8300/kernel/gpio.c
- *
- *  Yoshinori Sato <ysato@users.sourceforge.jp>
- *
- */
-
-/*
- * Internal I/O Port Management
- */
-
-#include <linux/stddef.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/fs.h>
-#include <linux/init.h>
-
-#define _(addr) (volatile unsigned char *)(addr)
-#if defined(CONFIG_H83007) || defined(CONFIG_H83068)
-#include <asm/regs306x.h>
-static volatile unsigned char *ddrs[] = {
-	_(P1DDR),_(P2DDR),_(P3DDR),_(P4DDR),_(P5DDR),_(P6DDR),
-	NULL,    _(P8DDR),_(P9DDR),_(PADDR),_(PBDDR),
-};
-#define MAX_PORT 11
-#endif
-
- #if defined(CONFIG_H83002) || defined(CONFIG_H8048)
-/* Fix me!! */
-#include <asm/regs306x.h>
-static volatile unsigned char *ddrs[] = {
-	_(P1DDR),_(P2DDR),_(P3DDR),_(P4DDR),_(P5DDR),_(P6DDR),
-	NULL,    _(P8DDR),_(P9DDR),_(PADDR),_(PBDDR),
-};
-#define MAX_PORT 11
-#endif
-
-#if defined(CONFIG_H8S2678)
-#include <asm/regs267x.h>
-static volatile unsigned char *ddrs[] = {
-	_(P1DDR),_(P2DDR),_(P3DDR),NULL    ,_(P5DDR),_(P6DDR),
-	_(P7DDR),_(P8DDR),NULL,    _(PADDR),_(PBDDR),_(PCDDR),
-	_(PDDDR),_(PEDDR),_(PFDDR),_(PGDDR),_(PHDDR),
-	_(PADDR),_(PBDDR),_(PCDDR),_(PDDDR),_(PEDDR),_(PFDDR),
-	_(PGDDR),_(PHDDR)
-};
-#define MAX_PORT 17
-#endif
-#undef _
- 
-#if !defined(P1DDR)
-#error Unsuppoted CPU Selection
-#endif
-
-static struct {
-	unsigned char used;
-	unsigned char ddr;
-} gpio_regs[MAX_PORT];
-
-extern char *_platform_gpio_table(int length);
-
-int h8300_reserved_gpio(int port, unsigned int bits)
-{
-	unsigned char *used;
-
-	if (port < 0 || port >= MAX_PORT)
-		return -1;
-	used = &(gpio_regs[port].used);
-	if ((*used & bits) != 0)
-		return 0;
-	*used |= bits;
-	return 1;
-}
-
-int h8300_free_gpio(int port, unsigned int bits)
-{
-	unsigned char *used;
-
-	if (port < 0 || port >= MAX_PORT)
-		return -1;
-	used = &(gpio_regs[port].used);
-	if ((*used & bits) != bits)
-		return 0;
-	*used &= (~bits);
-	return 1;
-}
-
-int h8300_set_gpio_dir(int port_bit,int dir)
-{
-	int port = (port_bit >> 8) & 0xff;
-	int bit  = port_bit & 0xff;
-
-	if (ddrs[port] == NULL)
-		return 0;
-	if (gpio_regs[port].used & bit) {
-		if (dir)
-			gpio_regs[port].ddr |= bit;
-		else
-			gpio_regs[port].ddr &= ~bit;
-		*ddrs[port] = gpio_regs[port].ddr;
-		return 1;
-	} else
-		return 0;
-}
-
-int h8300_get_gpio_dir(int port_bit)
-{
-	int port = (port_bit >> 8) & 0xff;
-	int bit  = port_bit & 0xff;
-
-	if (ddrs[port] == NULL)
-		return 0;
-	if (gpio_regs[port].used & bit) {
-		return (gpio_regs[port].ddr & bit) != 0;
-	} else
-		return -1;
-}
-
-#if defined(CONFIG_PROC_FS)
-static char *port_status(int portno)
-{
-	static char result[10];
-	static const char io[2]={'I','O'};
-	char *rp;
-	int c;
-	unsigned char used,ddr;
-	
-	used = gpio_regs[portno].used;
-	ddr  = gpio_regs[portno].ddr;
-	result[8]='\0';
-	rp = result + 7;
-	for (c = 8; c > 0; c--,rp--,used >>= 1, ddr >>= 1)
-		if (used & 0x01)
-			*rp = io[ ddr & 0x01];
-		else	
-			*rp = '-';
-	return result;
-}
-
-static int gpio_proc_show(struct seq_file *m, void *v)
-{
-	static const char port_name[]="123456789ABCDEFGH";
-	int c;
-
-	for (c = 0; c < MAX_PORT; c++) {
-		if (ddrs[c] == NULL)
-			continue;
-		seq_printf(m, "P%c: %s\n", port_name[c], port_status(c));
-	}
-	return 0;
-}
-
-static int gpio_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, gpio_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations gpio_proc_fops = {
-	.open		= gpio_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-static __init int register_proc(void)
-{
-	return proc_create("gpio", S_IRUGO, NULL, &gpio_proc_fops) != NULL;
-}
-
-__initcall(register_proc);
-#endif
-
-void __init h8300_gpio_init(void)
-{
-	memcpy(gpio_regs,_platform_gpio_table(sizeof(gpio_regs)),sizeof(gpio_regs));
-}
diff --git a/arch/h8300/kernel/h8300_ksyms.c b/arch/h8300/kernel/h8300_ksyms.c
deleted file mode 100644
index 53d7c0e4bd83..000000000000
--- a/arch/h8300/kernel/h8300_ksyms.c
+++ /dev/null
@@ -1,100 +0,0 @@
-#include <linux/module.h>
-#include <linux/linkage.h>
-#include <linux/sched.h>
-#include <linux/string.h>
-#include <linux/mm.h>
-#include <linux/user.h>
-#include <linux/elfcore.h>
-#include <linux/in6.h>
-#include <linux/interrupt.h>
-
-#include <asm/setup.h>
-#include <asm/pgalloc.h>
-#include <asm/irq.h>
-#include <asm/io.h>
-#include <asm/checksum.h>
-#include <asm/current.h>
-#include <asm/gpio.h>
-
-//asmlinkage long long __ashrdi3 (long long, int);
-//asmlinkage long long __lshrdi3 (long long, int);
-extern char h8300_debug_device[];
-
-/* platform dependent support */
-
-EXPORT_SYMBOL(strnlen);
-EXPORT_SYMBOL(strrchr);
-EXPORT_SYMBOL(strstr);
-EXPORT_SYMBOL(strchr);
-EXPORT_SYMBOL(strcat);
-EXPORT_SYMBOL(strlen);
-EXPORT_SYMBOL(strcmp);
-EXPORT_SYMBOL(strncmp);
-
-EXPORT_SYMBOL(ip_fast_csum);
-
-EXPORT_SYMBOL(enable_irq);
-EXPORT_SYMBOL(disable_irq);
-
-/* Networking helper routines. */
-EXPORT_SYMBOL(csum_partial_copy_nocheck);
-
-/* The following are special because they're not called
-   explicitly (the C compiler generates them).  Fortunately,
-   their interface isn't gonna change any time soon now, so
-   it's OK to leave it out of version control.  */
-//EXPORT_SYMBOL(__ashrdi3);
-//EXPORT_SYMBOL(__lshrdi3);
-EXPORT_SYMBOL(memcpy);
-EXPORT_SYMBOL(memset);
-EXPORT_SYMBOL(memcmp);
-EXPORT_SYMBOL(memscan);
-EXPORT_SYMBOL(memmove);
-
-/*
- * libgcc functions - functions that are used internally by the
- * compiler...  (prototypes are not correct though, but that
- * doesn't really matter since they're not versioned).
- */
-extern void __gcc_bcmp(void);
-extern void __ashldi3(void);
-extern void __ashrdi3(void);
-extern void __cmpdi2(void);
-extern void __divdi3(void);
-extern void __divsi3(void);
-extern void __lshrdi3(void);
-extern void __moddi3(void);
-extern void __modsi3(void);
-extern void __muldi3(void);
-extern void __mulsi3(void);
-extern void __negdi2(void);
-extern void __ucmpdi2(void);
-extern void __udivdi3(void);
-extern void __udivmoddi4(void);
-extern void __udivsi3(void);
-extern void __umoddi3(void);
-extern void __umodsi3(void);
-
-        /* gcc lib functions */
-EXPORT_SYMBOL(__gcc_bcmp);
-EXPORT_SYMBOL(__ashldi3);
-EXPORT_SYMBOL(__ashrdi3);
-EXPORT_SYMBOL(__cmpdi2);
-EXPORT_SYMBOL(__divdi3);
-EXPORT_SYMBOL(__divsi3);
-EXPORT_SYMBOL(__lshrdi3);
-EXPORT_SYMBOL(__moddi3);
-EXPORT_SYMBOL(__modsi3);
-EXPORT_SYMBOL(__muldi3);
-EXPORT_SYMBOL(__mulsi3);
-EXPORT_SYMBOL(__negdi2);
-EXPORT_SYMBOL(__ucmpdi2);
-EXPORT_SYMBOL(__udivdi3);
-EXPORT_SYMBOL(__udivmoddi4);
-EXPORT_SYMBOL(__udivsi3);
-EXPORT_SYMBOL(__umoddi3);
-EXPORT_SYMBOL(__umodsi3);
-
-EXPORT_SYMBOL(h8300_reserved_gpio);
-EXPORT_SYMBOL(h8300_free_gpio);
-EXPORT_SYMBOL(h8300_set_gpio_dir);
diff --git a/arch/h8300/kernel/irq.c b/arch/h8300/kernel/irq.c
deleted file mode 100644
index 2fa8ac7b79b5..000000000000
--- a/arch/h8300/kernel/irq.c
+++ /dev/null
@@ -1,165 +0,0 @@
-/*
- * linux/arch/h8300/kernel/irq.c
- *
- * Copyright 2007 Yoshinori Sato <ysato@users.sourceforge.jp>
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/kernel_stat.h>
-#include <linux/seq_file.h>
-#include <linux/init.h>
-#include <linux/random.h>
-#include <linux/bootmem.h>
-#include <linux/irq.h>
-#include <linux/interrupt.h>
-
-#include <asm/traps.h>
-#include <asm/io.h>
-#include <asm/setup.h>
-#include <asm/errno.h>
-
-/*#define DEBUG*/
-
-extern unsigned long *interrupt_redirect_table;
-extern const int h8300_saved_vectors[];
-extern const h8300_vector h8300_trap_table[];
-int h8300_enable_irq_pin(unsigned int irq);
-void h8300_disable_irq_pin(unsigned int irq);
-
-#define CPU_VECTOR ((unsigned long *)0x000000)
-#define ADDR_MASK (0xffffff)
-
-static inline int is_ext_irq(unsigned int irq)
-{
-	return (irq >= EXT_IRQ0 && irq <= (EXT_IRQ0 + EXT_IRQS));
-}
-
-static void h8300_enable_irq(struct irq_data *data)
-{
-	if (is_ext_irq(data->irq))
-		IER_REGS |= 1 << (data->irq - EXT_IRQ0);
-}
-
-static void h8300_disable_irq(struct irq_data *data)
-{
-	if (is_ext_irq(data->irq))
-		IER_REGS &= ~(1 << (data->irq - EXT_IRQ0));
-}
-
-static unsigned int h8300_startup_irq(struct irq_data *data)
-{
-	if (is_ext_irq(data->irq))
-		return h8300_enable_irq_pin(data->irq);
-	else
-		return 0;
-}
-
-static void h8300_shutdown_irq(struct irq_data *data)
-{
-	if (is_ext_irq(data->irq))
-		h8300_disable_irq_pin(data->irq);
-}
-
-/*
- * h8300 interrupt controller implementation
- */
-struct irq_chip h8300irq_chip = {
-	.name		= "H8300-INTC",
-	.irq_startup	= h8300_startup_irq,
-	.irq_shutdown	= h8300_shutdown_irq,
-	.irq_enable	= h8300_enable_irq,
-	.irq_disable	= h8300_disable_irq,
-};
-
-#if defined(CONFIG_RAMKERNEL)
-static unsigned long __init *get_vector_address(void)
-{
-	unsigned long *rom_vector = CPU_VECTOR;
-	unsigned long base,tmp;
-	int vec_no;
-
-	base = rom_vector[EXT_IRQ0] & ADDR_MASK;
-
-	/* check romvector format */
-	for (vec_no = EXT_IRQ1; vec_no <= EXT_IRQ0+EXT_IRQS; vec_no++) {
-		if ((base+(vec_no - EXT_IRQ0)*4) != (rom_vector[vec_no] & ADDR_MASK))
-			return NULL;
-	}
-
-	/* ramvector base address */
-	base -= EXT_IRQ0*4;
-
-	/* writerble check */
-	tmp = ~(*(volatile unsigned long *)base);
-	(*(volatile unsigned long *)base) = tmp;
-	if ((*(volatile unsigned long *)base) != tmp)
-		return NULL;
-	return (unsigned long *)base;
-}
-
-static void __init setup_vector(void)
-{
-	int i;
-	unsigned long *ramvec,*ramvec_p;
-	const h8300_vector *trap_entry;
-	const int *saved_vector;
-
-	ramvec = get_vector_address();
-	if (ramvec == NULL)
-		panic("interrupt vector serup failed.");
-	else
-		printk(KERN_INFO "virtual vector at 0x%08lx\n",(unsigned long)ramvec);
-
-	/* create redirect table */
-	ramvec_p = ramvec;
-	trap_entry = h8300_trap_table;
-	saved_vector = h8300_saved_vectors;
-	for ( i = 0; i < NR_IRQS; i++) {
-		if (i == *saved_vector) {
-			ramvec_p++;
-			saved_vector++;
-		} else {
-			if ( i < NR_TRAPS ) {
-				if (*trap_entry)
-					*ramvec_p = VECTOR(*trap_entry);
-				ramvec_p++;
-				trap_entry++;
-			} else
-				*ramvec_p++ = REDIRECT(interrupt_entry);
-		}
-	}
-	interrupt_redirect_table = ramvec;
-#ifdef DEBUG
-	ramvec_p = ramvec;
-	for (i = 0; i < NR_IRQS; i++) {
-		if ((i % 8) == 0)
-			printk(KERN_DEBUG "\n%p: ",ramvec_p);
-		printk(KERN_DEBUG "%p ",*ramvec_p);
-		ramvec_p++;
-	}
-	printk(KERN_DEBUG "\n");
-#endif
-}
-#else
-#define setup_vector() do { } while(0)
-#endif
-
-void __init init_IRQ(void)
-{
-	int c;
-
-	setup_vector();
-
-	for (c = 0; c < NR_IRQS; c++)
-		irq_set_chip_and_handler(c, &h8300irq_chip, handle_simple_irq);
-}
-
-asmlinkage void do_IRQ(int irq)
-{
-	irq_enter();
-	generic_handle_irq(irq);
-	irq_exit();
-}
diff --git a/arch/h8300/kernel/module.c b/arch/h8300/kernel/module.c
deleted file mode 100644
index 1d526e05db19..000000000000
--- a/arch/h8300/kernel/module.c
+++ /dev/null
@@ -1,75 +0,0 @@
-#include <linux/moduleloader.h>
-#include <linux/elf.h>
-#include <linux/vmalloc.h>
-#include <linux/fs.h>
-#include <linux/string.h>
-#include <linux/kernel.h>
-
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(fmt...)
-#endif
-
-int apply_relocate_add(Elf32_Shdr *sechdrs,
-		       const char *strtab,
-		       unsigned int symindex,
-		       unsigned int relsec,
-		       struct module *me)
-{
-	unsigned int i;
-	Elf32_Rela *rela = (void *)sechdrs[relsec].sh_addr;
-
-	DEBUGP("Applying relocate section %u to %u\n", relsec,
-	       sechdrs[relsec].sh_info);
-	for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rela); i++) {
-		/* This is where to make the change */
-		uint32_t *loc = (uint32_t *)(sechdrs[sechdrs[relsec].sh_info].sh_addr
-					     + rela[i].r_offset);
-		/* This is the symbol it is referring to.  Note that all
-		   undefined symbols have been resolved.  */
-		Elf32_Sym *sym = (Elf32_Sym *)sechdrs[symindex].sh_addr
-			+ ELF32_R_SYM(rela[i].r_info);
-		uint32_t v = sym->st_value + rela[i].r_addend;
-
-		switch (ELF32_R_TYPE(rela[i].r_info)) {
-		case R_H8_DIR24R8:
-			loc = (uint32_t *)((uint32_t)loc - 1);
-			*loc = (*loc & 0xff000000) | ((*loc & 0xffffff) + v);
-			break;
-		case R_H8_DIR24A8:
-			if (ELF32_R_SYM(rela[i].r_info))
-				*loc += v;
-			break;
-		case R_H8_DIR32:
-		case R_H8_DIR32A16:
-			*loc += v;
-			break;
-		case R_H8_PCREL16:
-			v -= (unsigned long)loc + 2;
-			if ((Elf32_Sword)v > 0x7fff || 
-			    (Elf32_Sword)v < -(Elf32_Sword)0x8000)
-				goto overflow;
-			else 
-				*(unsigned short *)loc = v;
-			break;
-		case R_H8_PCREL8:
-			v -= (unsigned long)loc + 1;
-			if ((Elf32_Sword)v > 0x7f || 
-			    (Elf32_Sword)v < -(Elf32_Sword)0x80)
-				goto overflow;
-			else 
-				*(unsigned char *)loc = v;
-			break;
-		default:
-			printk(KERN_ERR "module %s: Unknown relocation: %u\n",
-			       me->name, ELF32_R_TYPE(rela[i].r_info));
-			return -ENOEXEC;
-		}
-	}
-	return 0;
- overflow:
-	printk(KERN_ERR "module %s: relocation offset overflow: %08x\n",
-	       me->name, rela[i].r_offset);
-	return -ENOEXEC;
-}
diff --git a/arch/h8300/kernel/process.c b/arch/h8300/kernel/process.c
deleted file mode 100644
index 1a744ab7e7e5..000000000000
--- a/arch/h8300/kernel/process.c
+++ /dev/null
@@ -1,154 +0,0 @@
-/*
- *  linux/arch/h8300/kernel/process.c
- *
- * Yoshinori Sato <ysato@users.sourceforge.jp>
- *
- *  Based on:
- *
- *  linux/arch/m68knommu/kernel/process.c
- *
- *  Copyright (C) 1998  D. Jeff Dionne <jeff@ryeham.ee.ryerson.ca>,
- *                      Kenneth Albanowski <kjahds@kjahds.com>,
- *                      The Silver Hammer Group, Ltd.
- *
- *  linux/arch/m68k/kernel/process.c
- *
- *  Copyright (C) 1995  Hamish Macdonald
- *
- *  68060 fixes by Jesper Skov
- */
-
-/*
- * This file handles the architecture-dependent parts of process handling..
- */
-
-#include <linux/errno.h>
-#include <linux/module.h>
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/smp.h>
-#include <linux/stddef.h>
-#include <linux/unistd.h>
-#include <linux/ptrace.h>
-#include <linux/user.h>
-#include <linux/interrupt.h>
-#include <linux/reboot.h>
-#include <linux/fs.h>
-#include <linux/slab.h>
-#include <linux/rcupdate.h>
-
-#include <asm/uaccess.h>
-#include <asm/traps.h>
-#include <asm/setup.h>
-#include <asm/pgtable.h>
-
-void (*pm_power_off)(void) = NULL;
-EXPORT_SYMBOL(pm_power_off);
-
-asmlinkage void ret_from_fork(void);
-asmlinkage void ret_from_kernel_thread(void);
-
-/*
- * The idle loop on an H8/300..
- */
-#if !defined(CONFIG_H8300H_SIM) && !defined(CONFIG_H8S_SIM)
-void arch_cpu_idle(void)
-{
-	local_irq_enable();
-	/* XXX: race here! What if need_resched() gets set now? */
-	__asm__("sleep");
-}
-#endif
-
-void machine_restart(char * __unused)
-{
-	local_irq_disable();
-	__asm__("jmp @@0"); 
-}
-
-void machine_halt(void)
-{
-	local_irq_disable();
-	__asm__("sleep");
-	for (;;);
-}
-
-void machine_power_off(void)
-{
-	local_irq_disable();
-	__asm__("sleep");
-	for (;;);
-}
-
-void show_regs(struct pt_regs * regs)
-{
-	show_regs_print_info(KERN_DEFAULT);
-
-	printk("\nPC: %08lx  Status: %02x",
-	       regs->pc, regs->ccr);
-	printk("\nORIG_ER0: %08lx ER0: %08lx ER1: %08lx",
-	       regs->orig_er0, regs->er0, regs->er1);
-	printk("\nER2: %08lx ER3: %08lx ER4: %08lx ER5: %08lx",
-	       regs->er2, regs->er3, regs->er4, regs->er5);
-	printk("\nER6' %08lx ",regs->er6);
-	if (user_mode(regs))
-		printk("USP: %08lx\n", rdusp());
-	else
-		printk("\n");
-}
-
-void flush_thread(void)
-{
-}
-
-int copy_thread(unsigned long clone_flags,
-                unsigned long usp, unsigned long topstk,
-		 struct task_struct * p)
-{
-	struct pt_regs * childregs;
-
-	childregs = (struct pt_regs *) (THREAD_SIZE + task_stack_page(p)) - 1;
-
-	if (unlikely(p->flags & PF_KTHREAD)) {
-		memset(childregs, 0, sizeof(struct pt_regs));
-		childregs->retpc = (unsigned long) ret_from_kernel_thread;
-		childregs->er4 = topstk; /* arg */
-		childregs->er5 = usp; /* fn */
-		p->thread.ksp = (unsigned long)childregs;
-	}
-	*childregs = *current_pt_regs();
-	childregs->retpc = (unsigned long) ret_from_fork;
-	childregs->er0 = 0;
-	p->thread.usp = usp ?: rdusp();
-	p->thread.ksp = (unsigned long)childregs;
-
-	return 0;
-}
-
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
-	return ((struct pt_regs *)tsk->thread.esp0)->pc;
-}
-
-unsigned long get_wchan(struct task_struct *p)
-{
-	unsigned long fp, pc;
-	unsigned long stack_page;
-	int count = 0;
-	if (!p || p == current || p->state == TASK_RUNNING)
-		return 0;
-
-	stack_page = (unsigned long)p;
-	fp = ((struct pt_regs *)p->thread.ksp)->er6;
-	do {
-		if (fp < stack_page+sizeof(struct thread_info) ||
-		    fp >= 8184+stack_page)
-			return 0;
-		pc = ((unsigned long *)fp)[1];
-		if (!in_sched_functions(pc))
-			return pc;
-		fp = *(unsigned long *) fp;
-	} while (count++ < 16);
-	return 0;
-}
diff --git a/arch/h8300/kernel/ptrace.c b/arch/h8300/kernel/ptrace.c
deleted file mode 100644
index 748cf6585aa4..000000000000
--- a/arch/h8300/kernel/ptrace.c
+++ /dev/null
@@ -1,168 +0,0 @@
-/*
- *  linux/arch/h8300/kernel/ptrace.c
- *
- *  Yoshinori Sato <ysato@users.sourceforge.jp>
- *
- *  Based on:
- *  linux/arch/m68k/kernel/ptrace.c
- *
- *  Copyright (C) 1994 by Hamish Macdonald
- *  Taken from linux/kernel/ptrace.c and modified for M680x0.
- *  linux/kernel/ptrace.c is by Ross Biro 1/23/92, edited by Linus Torvalds
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of
- * this archive for more details.
- */
-
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/smp.h>
-#include <linux/errno.h>
-#include <linux/ptrace.h>
-#include <linux/user.h>
-#include <linux/signal.h>
-
-#include <asm/uaccess.h>
-#include <asm/page.h>
-#include <asm/pgtable.h>
-#include <asm/processor.h>
-#include <asm/signal.h>
-
-/* cpu depend functions */
-extern long h8300_get_reg(struct task_struct *task, int regno);
-extern int  h8300_put_reg(struct task_struct *task, int regno, unsigned long data);
-
-
-void user_disable_single_step(struct task_struct *child)
-{
-}
-
-/*
- * does not yet catch signals sent when the child dies.
- * in exit.c or in signal.c.
- */
-
-void ptrace_disable(struct task_struct *child)
-{
-	user_disable_single_step(child);
-}
-
-long arch_ptrace(struct task_struct *child, long request,
-		 unsigned long addr, unsigned long data)
-{
-	int ret;
-	int regno = addr >> 2;
-	unsigned long __user *datap = (unsigned long __user *) data;
-
-	switch (request) {
-	/* read the word at location addr in the USER area. */
-		case PTRACE_PEEKUSR: {
-			unsigned long tmp = 0;
-			
-			if ((addr & 3) || addr >= sizeof(struct user)) {
-				ret = -EIO;
-				break ;
-			}
-			
-		        ret = 0;  /* Default return condition */
-
-			if (regno < H8300_REGS_NO)
-				tmp = h8300_get_reg(child, regno);
-			else {
-				switch (regno) {
-				case 49:
-					tmp = child->mm->start_code;
-					break ;
-				case 50:
-					tmp = child->mm->start_data;
-					break ;
-				case 51:
-					tmp = child->mm->end_code;
-					break ;
-				case 52:
-					tmp = child->mm->end_data;
-					break ;
-				default:
-					ret = -EIO;
-				}
-			}
-			if (!ret)
-				ret = put_user(tmp, datap);
-			break ;
-		}
-
-      /* when I and D space are separate, this will have to be fixed. */
-		case PTRACE_POKEUSR: /* write the word at location addr in the USER area */
-			if ((addr & 3) || addr >= sizeof(struct user)) {
-				ret = -EIO;
-				break ;
-			}
-			    
-			if (regno == PT_ORIG_ER0) {
-				ret = -EIO;
-				break ;
-			}
-			if (regno < H8300_REGS_NO) {
-				ret = h8300_put_reg(child, regno, data);
-				break ;
-			}
-			ret = -EIO;
-			break ;
-
-		case PTRACE_GETREGS: { /* Get all gp regs from the child. */
-		  	int i;
-			unsigned long tmp;
-			for (i = 0; i < H8300_REGS_NO; i++) {
-			    tmp = h8300_get_reg(child, i);
-			    if (put_user(tmp, datap)) {
-				ret = -EFAULT;
-				break;
-			    }
-			    datap++;
-			}
-			ret = 0;
-			break;
-		}
-
-		case PTRACE_SETREGS: { /* Set all gp regs in the child. */
-			int i;
-			unsigned long tmp;
-			for (i = 0; i < H8300_REGS_NO; i++) {
-			    if (get_user(tmp, datap)) {
-				ret = -EFAULT;
-				break;
-			    }
-			    h8300_put_reg(child, i, tmp);
-			    datap++;
-			}
-			ret = 0;
-			break;
-		}
-
-		default:
-			ret = ptrace_request(child, request, addr, data);
-			break;
-	}
-	return ret;
-}
-
-asmlinkage void do_syscall_trace(void)
-{
-	if (!test_thread_flag(TIF_SYSCALL_TRACE))
-		return;
-	if (!(current->ptrace & PT_PTRACED))
-		return;
-	ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD)
-				 ? 0x80 : 0));
-	/*
-	 * this isn't the same as continuing with a signal, but it will do
-	 * for normal use.  strace only continues with a signal if the
-	 * stopping signal is not SIGTRAP.  -brl
-	 */
-	if (current->exit_code) {
-		send_sig(current->exit_code, current, 1);
-		current->exit_code = 0;
-	}
-}
diff --git a/arch/h8300/kernel/setup.c b/arch/h8300/kernel/setup.c
deleted file mode 100644
index d0b1607f2711..000000000000
--- a/arch/h8300/kernel/setup.c
+++ /dev/null
@@ -1,242 +0,0 @@
-/*
- *  linux/arch/h8300/kernel/setup.c
- *
- *  Copyleft  ()) 2000       James D. Schettine {james@telos-systems.com}
- *  Copyright (C) 1999,2000  Greg Ungerer (gerg@snapgear.com)
- *  Copyright (C) 1998,1999  D. Jeff Dionne <jeff@lineo.ca>
- *  Copyright (C) 1998       Kenneth Albanowski <kjahds@kjahds.com>
- *  Copyright (C) 1995       Hamish Macdonald
- *  Copyright (C) 2000       Lineo Inc. (www.lineo.com) 
- *  Copyright (C) 2001 	     Lineo, Inc. <www.lineo.com>
- *
- *  H8/300 porting Yoshinori Sato <ysato@users.sourceforge.jp>
- */
-
-/*
- * This file handles the architecture-dependent parts of system setup
- */
-
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/delay.h>
-#include <linux/interrupt.h>
-#include <linux/mm.h>
-#include <linux/fs.h>
-#include <linux/fb.h>
-#include <linux/console.h>
-#include <linux/genhd.h>
-#include <linux/errno.h>
-#include <linux/string.h>
-#include <linux/major.h>
-#include <linux/bootmem.h>
-#include <linux/seq_file.h>
-#include <linux/init.h>
-
-#include <asm/setup.h>
-#include <asm/irq.h>
-#include <asm/pgtable.h>
-#include <asm/sections.h>
-
-#if defined(__H8300H__)
-#define CPU "H8/300H"
-#include <asm/regs306x.h>
-#endif
-
-#if defined(__H8300S__)
-#define CPU "H8S"
-#include <asm/regs267x.h>
-#endif
-
-#define STUBSIZE 0xc000
-
-unsigned long rom_length;
-unsigned long memory_start;
-unsigned long memory_end;
-
-char __initdata command_line[COMMAND_LINE_SIZE];
-
-extern int _ramstart, _ramend;
-extern char _target_name[];
-extern void h8300_gpio_init(void);
-
-#if (defined(CONFIG_H8300H_SIM) || defined(CONFIG_H8S_SIM)) \
-    && defined(CONFIG_GDB_MAGICPRINT)
-/* printk with gdb service */
-static void gdb_console_output(struct console *c, const char *msg, unsigned len)
-{
-	for (; len > 0; len--) {
-		asm("mov.w %0,r2\n\t"
-                    "jsr @0xc4"::"r"(*msg++):"er2");
-	}
-}
-
-/*
- *	Setup initial baud/bits/parity. We do two things here:
- *	- construct a cflag setting for the first rs_open()
- *	- initialize the serial port
- *	Return non-zero if we didn't find a serial port.
- */
-static int __init gdb_console_setup(struct console *co, char *options)
-{
-	return 0;
-}
-
-static const struct console gdb_console = {
-	.name		= "gdb_con",
-	.write		= gdb_console_output,
-	.device		= NULL,
-	.setup		= gdb_console_setup,
-	.flags		= CON_PRINTBUFFER,
-	.index		= -1,
-};
-#endif
-
-void __init setup_arch(char **cmdline_p)
-{
-	int bootmap_size;
-
-	memory_start = (unsigned long) &_ramstart;
-
-	/* allow for ROMFS on the end of the kernel */
-	if (memcmp((void *)memory_start, "-rom1fs-", 8) == 0) {
-#if defined(CONFIG_BLK_DEV_INITRD)
-		initrd_start = memory_start;
-		initrd_end = memory_start += be32_to_cpu(((unsigned long *) (memory_start))[2]);
-#else
-		memory_start += be32_to_cpu(((unsigned long *) memory_start)[2]);
-#endif
-	}
-	memory_start = PAGE_ALIGN(memory_start);
-#if !defined(CONFIG_BLKDEV_RESERVE)
-	memory_end = (unsigned long) &_ramend; /* by now the stack is part of the init task */
-#if defined(CONFIG_GDB_DEBUG)
-	memory_end -= STUBSIZE;
-#endif
-#else
-	if ((memory_end < CONFIG_BLKDEV_RESERVE_ADDRESS) && 
-	    (memory_end > CONFIG_BLKDEV_RESERVE_ADDRESS))
-	    /* overlap userarea */
-	    memory_end = CONFIG_BLKDEV_RESERVE_ADDRESS; 
-#endif
-
-	init_mm.start_code = (unsigned long) _stext;
-	init_mm.end_code = (unsigned long) _etext;
-	init_mm.end_data = (unsigned long) _edata;
-	init_mm.brk = (unsigned long) 0; 
-
-#if (defined(CONFIG_H8300H_SIM) || defined(CONFIG_H8S_SIM)) && defined(CONFIG_GDB_MAGICPRINT)
-	register_console((struct console *)&gdb_console);
-#endif
-
-	printk(KERN_INFO "\r\n\nuClinux " CPU "\n");
-	printk(KERN_INFO "Target Hardware: %s\n",_target_name);
-	printk(KERN_INFO "Flat model support (C) 1998,1999 Kenneth Albanowski, D. Jeff Dionne\n");
-	printk(KERN_INFO "H8/300 series support by Yoshinori Sato <ysato@users.sourceforge.jp>\n");
-
-#ifdef DEBUG
-	printk(KERN_DEBUG "KERNEL -> TEXT=0x%p-0x%p DATA=0x%p-0x%p "
-		"BSS=0x%p-0x%p\n", _stext, _etext, _sdata, _edata, __bss_start,
-		__bss_stop);
-	printk(KERN_DEBUG "KERNEL -> ROMFS=0x%p-0x%06lx MEM=0x%06lx-0x%06lx "
-		"STACK=0x%06lx-0x%p\n", __bss_stop, memory_start, memory_start,
-		memory_end, memory_end, &_ramend);
-#endif
-
-#ifdef CONFIG_DEFAULT_CMDLINE
-	/* set from default command line */
-	if (*command_line == '\0')
-		strcpy(command_line,CONFIG_KERNEL_COMMAND);
-#endif
-	/* Keep a copy of command line */
-	*cmdline_p = &command_line[0];
-	memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE);
-	boot_command_line[COMMAND_LINE_SIZE-1] = 0;
-
-#ifdef DEBUG
-	if (strlen(*cmdline_p)) 
-		printk(KERN_DEBUG "Command line: '%s'\n", *cmdline_p);
-#endif
-
-	/*
-	 * give all the memory to the bootmap allocator,  tell it to put the
-	 * boot mem_map at the start of memory
-	 */
-	bootmap_size = init_bootmem_node(
-			NODE_DATA(0),
-			memory_start >> PAGE_SHIFT, /* map goes here */
-			PAGE_OFFSET >> PAGE_SHIFT,	/* 0 on coldfire */
-			memory_end >> PAGE_SHIFT);
-	/*
-	 * free the usable memory,  we have to make sure we do not free
-	 * the bootmem bitmap so we then reserve it after freeing it :-)
-	 */
-	free_bootmem(memory_start, memory_end - memory_start);
-	reserve_bootmem(memory_start, bootmap_size, BOOTMEM_DEFAULT);
-	/*
-	 * get kmalloc into gear
-	 */
-	paging_init();
-	h8300_gpio_init();
-#if defined(CONFIG_H8300_AKI3068NET) && defined(CONFIG_IDE)
-	{
-#define AREABIT(addr) (1 << (((addr) >> 21) & 7))
-		/* setup BSC */
-		volatile unsigned char *abwcr = (volatile unsigned char *)ABWCR;
-		volatile unsigned char *cscr = (volatile unsigned char *)CSCR;
-		*abwcr &= ~(AREABIT(CONFIG_H8300_IDE_BASE) | AREABIT(CONFIG_H8300_IDE_ALT));
-		*cscr  |= (AREABIT(CONFIG_H8300_IDE_BASE) | AREABIT(CONFIG_H8300_IDE_ALT)) | 0x0f;
-	}
-#endif
-#ifdef DEBUG
-	printk(KERN_DEBUG "Done setup_arch\n");
-#endif
-}
-
-/*
- *	Get CPU information for use by the procfs.
- */
-
-static int show_cpuinfo(struct seq_file *m, void *v)
-{
-    char *cpu;
-    int mode;
-    u_long clockfreq;
-
-    cpu = CPU;
-    mode = *(volatile unsigned char *)MDCR & 0x07;
-
-    clockfreq = CONFIG_CPU_CLOCK;
-
-    seq_printf(m,  "CPU:\t\t%s (mode:%d)\n"
-		   "Clock:\t\t%lu.%1luMHz\n"
-		   "BogoMips:\t%lu.%02lu\n"
-		   "Calibration:\t%lu loops\n",
-	           cpu,mode,
-		   clockfreq/1000,clockfreq%1000,
-		   (loops_per_jiffy*HZ)/500000,((loops_per_jiffy*HZ)/5000)%100,
-		   (loops_per_jiffy*HZ));
-
-    return 0;
-}
-
-static void *c_start(struct seq_file *m, loff_t *pos)
-{
-	return *pos < NR_CPUS ? ((void *) 0x12345678) : NULL;
-}
-
-static void *c_next(struct seq_file *m, void *v, loff_t *pos)
-{
-	++*pos;
-	return c_start(m, pos);
-}
-
-static void c_stop(struct seq_file *m, void *v)
-{
-}
-
-const struct seq_operations cpuinfo_op = {
-	.start	= c_start,
-	.next	= c_next,
-	.stop	= c_stop,
-	.show	= show_cpuinfo,
-};
diff --git a/arch/h8300/kernel/signal.c b/arch/h8300/kernel/signal.c
deleted file mode 100644
index a65ff3b76326..000000000000
--- a/arch/h8300/kernel/signal.c
+++ /dev/null
@@ -1,444 +0,0 @@
-/*
- *  linux/arch/h8300/kernel/signal.c
- *
- *  Copyright (C) 1991, 1992  Linus Torvalds
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file COPYING in the main directory of this archive
- * for more details.
- */
-
-/*
- * uClinux H8/300 support by Yoshinori Sato <ysato@users.sourceforge.jp>
- *                and David McCullough <davidm@snapgear.com>
- *
- * Based on
- * Linux/m68k by Hamish Macdonald
- */
-
-/*
- * ++roman (07/09/96): implemented signal stacks (specially for tosemu on
- * Atari :-) Current limitation: Only one sigstack can be active at one time.
- * If a second signal with SA_ONSTACK set arrives while working on a sigstack,
- * SA_ONSTACK is ignored. This behaviour avoids lots of trouble with nested
- * signal handlers!
- */
-
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/kernel.h>
-#include <linux/signal.h>
-#include <linux/syscalls.h>
-#include <linux/errno.h>
-#include <linux/wait.h>
-#include <linux/ptrace.h>
-#include <linux/unistd.h>
-#include <linux/stddef.h>
-#include <linux/highuid.h>
-#include <linux/personality.h>
-#include <linux/tty.h>
-#include <linux/binfmts.h>
-#include <linux/tracehook.h>
-
-#include <asm/setup.h>
-#include <asm/uaccess.h>
-#include <asm/pgtable.h>
-#include <asm/traps.h>
-#include <asm/ucontext.h>
-
-/*
- * Do a signal return; undo the signal stack.
- *
- * Keep the return code on the stack quadword aligned!
- * That makes the cache flush below easier.
- */
-
-struct sigframe
-{
-	long dummy_er0;
-	long dummy_vector;
-#if defined(CONFIG_CPU_H8S)
-	short dummy_exr;
-#endif
-	long dummy_pc;
-	char *pretcode;
-	unsigned char retcode[8];
-	unsigned long extramask[_NSIG_WORDS-1];
-	struct sigcontext sc;
-	int sig;
-} __attribute__((aligned(2),packed));
-
-struct rt_sigframe
-{
-	long dummy_er0;
-	long dummy_vector;
-#if defined(CONFIG_CPU_H8S)
-	short dummy_exr;
-#endif
-	long dummy_pc;
-	char *pretcode;
-	struct siginfo *pinfo;
-	void *puc;
-	unsigned char retcode[8];
-	struct siginfo info;
-	struct ucontext uc;
-	int sig;
-} __attribute__((aligned(2),packed));
-
-static inline int
-restore_sigcontext(struct sigcontext *usc, int *pd0)
-{
-	struct pt_regs *regs = current_pt_regs();
-	int err = 0;
-	unsigned int ccr;
-	unsigned int usp;
-	unsigned int er0;
-
-	/* Always make any pending restarted system calls return -EINTR */
-	current_thread_info()->restart_block.fn = do_no_restart_syscall;
-
-#define COPY(r) err |= __get_user(regs->r, &usc->sc_##r)    /* restore passed registers */
-	COPY(er1);
-	COPY(er2);
-	COPY(er3);
-	COPY(er5);
-	COPY(pc);
-	ccr = regs->ccr & 0x10;
-	COPY(ccr);
-#undef COPY
-	regs->ccr &= 0xef;
-	regs->ccr |= ccr;
-	regs->orig_er0 = -1;		/* disable syscall checks */
-	err |= __get_user(usp, &usc->sc_usp);
-	wrusp(usp);
-
-	err |= __get_user(er0, &usc->sc_er0);
-	*pd0 = er0;
-	return err;
-}
-
-asmlinkage int sys_sigreturn(void)
-{
-	unsigned long usp = rdusp();
-	struct sigframe *frame = (struct sigframe *)(usp - 4);
-	sigset_t set;
-	int er0;
-
-	if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
-		goto badframe;
-	if (__get_user(set.sig[0], &frame->sc.sc_mask) ||
-	    (_NSIG_WORDS > 1 &&
-	     __copy_from_user(&set.sig[1], &frame->extramask,
-			      sizeof(frame->extramask))))
-		goto badframe;
-
-	set_current_blocked(&set);
-	
-	if (restore_sigcontext(&frame->sc, &er0))
-		goto badframe;
-	return er0;
-
-badframe:
-	force_sig(SIGSEGV, current);
-	return 0;
-}
-
-asmlinkage int sys_rt_sigreturn(void)
-{
-	unsigned long usp = rdusp();
-	struct rt_sigframe *frame = (struct rt_sigframe *)(usp - 4);
-	sigset_t set;
-	int er0;
-
-	if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
-		goto badframe;
-	if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
-		goto badframe;
-
-	set_current_blocked(&set);
-	
-	if (restore_sigcontext(&frame->uc.uc_mcontext, &er0))
-		goto badframe;
-
-	if (restore_altstack(&frame->uc.uc_stack))
-		goto badframe;
-
-	return er0;
-
-badframe:
-	force_sig(SIGSEGV, current);
-	return 0;
-}
-
-static int setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs,
-			     unsigned long mask)
-{
-	int err = 0;
-
-	err |= __put_user(regs->er0, &sc->sc_er0);
-	err |= __put_user(regs->er1, &sc->sc_er1);
-	err |= __put_user(regs->er2, &sc->sc_er2);
-	err |= __put_user(regs->er3, &sc->sc_er3);
-	err |= __put_user(regs->er4, &sc->sc_er4);
-	err |= __put_user(regs->er5, &sc->sc_er5);
-	err |= __put_user(regs->er6, &sc->sc_er6);
-	err |= __put_user(rdusp(),   &sc->sc_usp);
-	err |= __put_user(regs->pc,  &sc->sc_pc);
-	err |= __put_user(regs->ccr, &sc->sc_ccr);
-	err |= __put_user(mask,      &sc->sc_mask);
-
-	return err;
-}
-
-static inline void *
-get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size)
-{
-	unsigned long usp;
-
-	/* Default to using normal stack.  */
-	usp = rdusp();
-
-	/* This is the X/Open sanctioned signal stack switching.  */
-	if (ka->sa.sa_flags & SA_ONSTACK) {
-		if (!sas_ss_flags(usp))
-			usp = current->sas_ss_sp + current->sas_ss_size;
-	}
-	return (void *)((usp - frame_size) & -8UL);
-}
-
-static int setup_frame (int sig, struct k_sigaction *ka,
-			 sigset_t *set, struct pt_regs *regs)
-{
-	struct sigframe *frame;
-	int err = 0;
-	int usig;
-	unsigned char *ret;
-
-	frame = get_sigframe(ka, regs, sizeof(*frame));
-
-	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
-		goto give_sigsegv;
-
-	usig = current_thread_info()->exec_domain
-		&& current_thread_info()->exec_domain->signal_invmap
-		&& sig < 32
-		? current_thread_info()->exec_domain->signal_invmap[sig]
-		: sig;
-
-	err |= __put_user(usig, &frame->sig);
-	if (err)
-		goto give_sigsegv;
-
-	err |= setup_sigcontext(&frame->sc, regs, set->sig[0]);
-	if (err)
-		goto give_sigsegv;
-
-	if (_NSIG_WORDS > 1) {
-		err |= copy_to_user(frame->extramask, &set->sig[1],
-				    sizeof(frame->extramask));
-		if (err)
-			goto give_sigsegv;
-	}
-
-	ret = frame->retcode;
-	if (ka->sa.sa_flags & SA_RESTORER)
-		ret = (unsigned char *)(ka->sa.sa_restorer);
-	else {
-		/* sub.l er0,er0; mov.b #__NR_sigreturn,r0l; trapa #0 */
-		err |= __put_user(0x1a80f800 + (__NR_sigreturn & 0xff),
-				  (unsigned long *)(frame->retcode + 0));
-		err |= __put_user(0x5700, (unsigned short *)(frame->retcode + 4));
-	}
-
-	/* Set up to return from userspace.  */
-	err |= __put_user(ret, &frame->pretcode);
-
-	if (err)
-		goto give_sigsegv;
-
-	/* Set up registers for signal handler */
-	wrusp ((unsigned long) frame);
-	regs->pc = (unsigned long) ka->sa.sa_handler;
-	regs->er0 = (current_thread_info()->exec_domain
-			   && current_thread_info()->exec_domain->signal_invmap
-			   && sig < 32
-			   ? current_thread_info()->exec_domain->signal_invmap[sig]
-		          : sig);
-	regs->er1 = (unsigned long)&(frame->sc);
-	regs->er5 = current->mm->start_data;	/* GOT base */
-
-	return 0;
-
-give_sigsegv:
-	force_sigsegv(sig, current);
-	return -EFAULT;
-}
-
-static int setup_rt_frame (int sig, struct k_sigaction *ka, siginfo_t *info,
-			    sigset_t *set, struct pt_regs *regs)
-{
-	struct rt_sigframe *frame;
-	int err = 0;
-	int usig;
-	unsigned char *ret;
-
-	frame = get_sigframe(ka, regs, sizeof(*frame));
-
-	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
-		goto give_sigsegv;
-
-	usig = current_thread_info()->exec_domain
-		&& current_thread_info()->exec_domain->signal_invmap
-		&& sig < 32
-		? current_thread_info()->exec_domain->signal_invmap[sig]
-		: sig;
-
-	err |= __put_user(usig, &frame->sig);
-	if (err)
-		goto give_sigsegv;
-
-	err |= __put_user(&frame->info, &frame->pinfo);
-	err |= __put_user(&frame->uc, &frame->puc);
-	err |= copy_siginfo_to_user(&frame->info, info);
-	if (err)
-		goto give_sigsegv;
-
-	/* Create the ucontext.  */
-	err |= __put_user(0, &frame->uc.uc_flags);
-	err |= __put_user(0, &frame->uc.uc_link);
-	err |= __save_altstack(&frame->uc.uc_stack, rdusp());
-	err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, set->sig[0]);
-	err |= copy_to_user (&frame->uc.uc_sigmask, set, sizeof(*set));
-	if (err)
-		goto give_sigsegv;
-
-	/* Set up to return from userspace.  */
-	ret = frame->retcode;
-	if (ka->sa.sa_flags & SA_RESTORER)
-		ret = (unsigned char *)(ka->sa.sa_restorer);
-	else {
-		/* sub.l er0,er0; mov.b #__NR_sigreturn,r0l; trapa #0 */
-		err |= __put_user(0x1a80f800 + (__NR_sigreturn & 0xff),
-				  (unsigned long *)(frame->retcode + 0));
-		err |= __put_user(0x5700, (unsigned short *)(frame->retcode + 4));
-	}
-	err |= __put_user(ret, &frame->pretcode);
-
-	if (err)
-		goto give_sigsegv;
-
-	/* Set up registers for signal handler */
-	wrusp ((unsigned long) frame);
-	regs->pc  = (unsigned long) ka->sa.sa_handler;
-	regs->er0 = (current_thread_info()->exec_domain
-		     && current_thread_info()->exec_domain->signal_invmap
-		     && sig < 32
-		     ? current_thread_info()->exec_domain->signal_invmap[sig]
-		     : sig);
-	regs->er1 = (unsigned long)&(frame->info);
-	regs->er2 = (unsigned long)&frame->uc;
-	regs->er5 = current->mm->start_data;	/* GOT base */
-
-	return 0;
-
-give_sigsegv:
-	force_sigsegv(sig, current);
-	return -EFAULT;
-}
-
-/*
- * OK, we're invoking a handler
- */
-static void
-handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
-	      struct pt_regs * regs)
-{
-	sigset_t *oldset = sigmask_to_save();
-	int ret;
-	/* are we from a system call? */
-	if (regs->orig_er0 >= 0) {
-		switch (regs->er0) {
-		        case -ERESTART_RESTARTBLOCK:
-			case -ERESTARTNOHAND:
-				regs->er0 = -EINTR;
-				break;
-
-			case -ERESTARTSYS:
-				if (!(ka->sa.sa_flags & SA_RESTART)) {
-					regs->er0 = -EINTR;
-					break;
-				}
-			/* fallthrough */
-			case -ERESTARTNOINTR:
-				regs->er0 = regs->orig_er0;
-				regs->pc -= 2;
-		}
-	}
-
-	/* set up the stack frame */
-	if (ka->sa.sa_flags & SA_SIGINFO)
-		ret = setup_rt_frame(sig, ka, info, oldset, regs);
-	else
-		ret = setup_frame(sig, ka, oldset, regs);
-
-	if (!ret)
-		signal_delivered(sig, info, ka, regs, 0);
-}
-
-/*
- * Note that 'init' is a special process: it doesn't get signals it doesn't
- * want to handle. Thus you cannot kill init even with a SIGKILL even by
- * mistake.
- */
-static void do_signal(struct pt_regs *regs)
-{
-	siginfo_t info;
-	int signr;
-	struct k_sigaction ka;
-
-	/*
-	 * We want the common case to go fast, which
-	 * is why we may in certain cases get here from
-	 * kernel mode. Just return without doing anything
-	 * if so.
-	 */
-	if ((regs->ccr & 0x10))
-		return;
-
-	current->thread.esp0 = (unsigned long) regs;
-
-	signr = get_signal_to_deliver(&info, &ka, regs, NULL);
-	if (signr > 0) {
-		/* Whee!  Actually deliver the signal.  */
-		handle_signal(signr, &info, &ka, regs);
-		return;
-	}
-	/* Did we come from a system call? */
-	if (regs->orig_er0 >= 0) {
-		/* Restart the system call - no handlers present */
-		if (regs->er0 == -ERESTARTNOHAND ||
-		    regs->er0 == -ERESTARTSYS ||
-		    regs->er0 == -ERESTARTNOINTR) {
-			regs->er0 = regs->orig_er0;
-			regs->pc -= 2;
-		}
-		if (regs->er0 == -ERESTART_RESTARTBLOCK){
-			regs->er0 = __NR_restart_syscall;
-			regs->pc -= 2;
-		}
-	}
-
-	/* If there's no signal to deliver, we just restore the saved mask.  */
-	restore_saved_sigmask();
-}
-
-asmlinkage void do_notify_resume(struct pt_regs *regs, u32 thread_info_flags)
-{
-	if (thread_info_flags & _TIF_SIGPENDING)
-		do_signal(regs);
-
-	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
-		clear_thread_flag(TIF_NOTIFY_RESUME);
-		tracehook_notify_resume(regs);
-	}
-}
diff --git a/arch/h8300/kernel/sys_h8300.c b/arch/h8300/kernel/sys_h8300.c
deleted file mode 100644
index bf350cb7f597..000000000000
--- a/arch/h8300/kernel/sys_h8300.c
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * linux/arch/h8300/kernel/sys_h8300.c
- *
- * This file contains various random system calls that
- * have a non-standard calling sequence on the H8/300
- * platform.
- */
-
-#include <linux/errno.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/smp.h>
-#include <linux/sem.h>
-#include <linux/msg.h>
-#include <linux/shm.h>
-#include <linux/stat.h>
-#include <linux/syscalls.h>
-#include <linux/mman.h>
-#include <linux/file.h>
-#include <linux/fs.h>
-#include <linux/ipc.h>
-
-#include <asm/setup.h>
-#include <asm/uaccess.h>
-#include <asm/cachectl.h>
-#include <asm/traps.h>
-#include <asm/unistd.h>
-
-/* sys_cacheflush -- no support.  */
-asmlinkage int
-sys_cacheflush (unsigned long addr, int scope, int cache, unsigned long len)
-{
-	return -EINVAL;
-}
-
-asmlinkage int sys_getpagesize(void)
-{
-	return PAGE_SIZE;
-}
-
-#if defined(CONFIG_SYSCALL_PRINT)
-asmlinkage void syscall_print(void *dummy,...)
-{
-	struct pt_regs *regs = (struct pt_regs *) ((unsigned char *)&dummy-4);
-	printk("call %06lx:%ld 1:%08lx,2:%08lx,3:%08lx,ret:%08lx\n",
-               ((regs->pc)&0xffffff)-2,regs->orig_er0,regs->er1,regs->er2,regs->er3,regs->er0);
-}
-#endif
diff --git a/arch/h8300/kernel/syscalls.S b/arch/h8300/kernel/syscalls.S
deleted file mode 100644
index c55e0ed270d5..000000000000
--- a/arch/h8300/kernel/syscalls.S
+++ /dev/null
@@ -1,338 +0,0 @@
-/* Systemcall Entry Table */
-#include <linux/sys.h>
-#include <asm/linkage.h>
-#include <asm/unistd.h>
-
-#define CALL(x)	.long _ ## x
-
-.globl _sys_call_table
-
-#if defined(CONFIG_CPU_H8300H)
-	.h8300h
-#endif
-#if defined(CONFIG_CPU_H8S)
-	.h8300s
-#endif
-	.section .text
-	.align	2
-_sys_call_table:
-	CALL(sys_ni_syscall)		/* 0  -  old "setup()" system call*/
-	CALL(sys_exit)
-	CALL(sys_fork)
-	CALL(sys_read)
-	CALL(sys_write)
-	CALL(sys_open)			/* 5 */
-	CALL(sys_close)
-	CALL(sys_waitpid)
-	CALL(sys_creat)
-	CALL(sys_link)
-	CALL(sys_unlink)		/* 10 */
-	CALL(sys_execve)
-	CALL(sys_chdir)
-	CALL(sys_time)
-	CALL(sys_mknod)
-	CALL(sys_chmod)			/* 15 */
-	CALL(sys_chown16)
-	CALL(sys_ni_syscall)		/* old break syscall holder */
-	CALL(sys_stat)
-	CALL(sys_lseek)
-	CALL(sys_getpid)		/* 20 */
-	CALL(sys_mount)
-	CALL(sys_oldumount)
-	CALL(sys_setuid16)
-	CALL(sys_getuid16)
-	CALL(sys_stime)			/* 25 */
-	CALL(sys_ptrace)
-	CALL(sys_alarm)
-	CALL(sys_fstat)
-	CALL(sys_pause)
-	CALL(sys_utime)			/* 30 */
-	CALL(sys_ni_syscall)		/* old stty syscall holder */
-	CALL(sys_ni_syscall)		/* old gtty syscall holder */
-	CALL(sys_access)
-	CALL(sys_nice)
-	CALL(sys_ni_syscall)		/* 35 old ftime syscall holder */
-	CALL(sys_sync)
-	CALL(sys_kill)
-	CALL(sys_rename)
-	CALL(sys_mkdir)
-	CALL(sys_rmdir)			/* 40 */
-	CALL(sys_dup)
-	CALL(sys_pipe)
-	CALL(sys_times)
-	CALL(sys_ni_syscall)		/* old prof syscall holder */
-	CALL(sys_brk)			/* 45 */
-	CALL(sys_setgid16)
-	CALL(sys_getgid16)
-	CALL(sys_signal)
-	CALL(sys_geteuid16)
-	CALL(sys_getegid16)		/* 50 */
-	CALL(sys_acct)
-	CALL(sys_umount)		/* recycled never used phys() */
-	CALL(sys_ni_syscall)		/* old lock syscall holder */
-	CALL(sys_ioctl)
-	CALL(sys_fcntl)			/* 55 */
-	CALL(sys_ni_syscall)		/* old mpx syscall holder */
-	CALL(sys_setpgid)
-	CALL(sys_ni_syscall)		/* old ulimit syscall holder */
-	CALL(sys_ni_syscall)
-	CALL(sys_umask)			/* 60 */
-	CALL(sys_chroot)
-	CALL(sys_ustat)
-	CALL(sys_dup2)
-	CALL(sys_getppid)
-	CALL(sys_getpgrp)		/* 65 */
-	CALL(sys_setsid)
-	CALL(sys_sigaction)
-	CALL(sys_sgetmask)
-	CALL(sys_ssetmask)
-	CALL(sys_setreuid16)		/* 70 */
-	CALL(sys_setregid16)
-	CALL(sys_sigsuspend)
-	CALL(sys_sigpending)
-	CALL(sys_sethostname)
-	CALL(sys_setrlimit)		/* 75 */
-	CALL(sys_old_getrlimit)
-	CALL(sys_getrusage)
-	CALL(sys_gettimeofday)
-	CALL(sys_settimeofday)
-	CALL(sys_getgroups16)		/* 80 */
-	CALL(sys_setgroups16)
-	CALL(sys_old_select)
-	CALL(sys_symlink)
-	CALL(sys_lstat)
-	CALL(sys_readlink)		/* 85 */
-	CALL(sys_uselib)
-	CALL(sys_swapon)
-	CALL(sys_reboot)
-	CALL(sys_old_readdir)
-	CALL(sys_old_mmap)		/* 90 */
-	CALL(sys_munmap)
-	CALL(sys_truncate)
-	CALL(sys_ftruncate)
-	CALL(sys_fchmod)
-	CALL(sys_fchown16)		/* 95 */
-	CALL(sys_getpriority)
-	CALL(sys_setpriority)
-	CALL(sys_ni_syscall)		/* old profil syscall holder */
-	CALL(sys_statfs)
-	CALL(sys_fstatfs)		/* 100 */
-	CALL(sys_ni_syscall)		/* ioperm for i386 */
-	CALL(sys_socketcall)
-	CALL(sys_syslog)
-	CALL(sys_setitimer)
-	CALL(sys_getitimer)		/* 105 */
-	CALL(sys_newstat)
-	CALL(sys_newlstat)
-	CALL(sys_newfstat)
-	CALL(sys_ni_syscall)
-	CALL(sys_ni_syscall)		/* iopl for i386 */ /* 110 */
-	CALL(sys_vhangup)
-	CALL(sys_ni_syscall)		/* obsolete idle() syscall */
-	CALL(sys_ni_syscall)		/* vm86old for i386 */
-	CALL(sys_wait4)
-	CALL(sys_swapoff)		/* 115 */
-	CALL(sys_sysinfo)
-	CALL(sys_ipc)
-	CALL(sys_fsync)
-	CALL(sys_sigreturn)
-	CALL(sys_clone)			/* 120 */
-	CALL(sys_setdomainname)
-	CALL(sys_newuname)
-	CALL(sys_cacheflush)		/* modify_ldt for i386 */
-	CALL(sys_adjtimex)
-	CALL(sys_ni_syscall)		/* 125 sys_mprotect */
-	CALL(sys_sigprocmask)
-	CALL(sys_ni_syscall)		/* sys_create_module */
-	CALL(sys_init_module)
-	CALL(sys_delete_module)
-	CALL(sys_ni_syscall)		/* 130 sys_get_kernel_syms */
-	CALL(sys_quotactl)
-	CALL(sys_getpgid)
-	CALL(sys_fchdir)
-	CALL(sys_bdflush)
-	CALL(sys_sysfs)			/* 135 */
-	CALL(sys_personality)
-	CALL(sys_ni_syscall)		/* for afs_syscall */
-	CALL(sys_setfsuid16)
-	CALL(sys_setfsgid16)
-	CALL(sys_llseek)		/* 140 */
-	CALL(sys_getdents)
-	CALL(sys_select)
-	CALL(sys_flock)
-	CALL(sys_ni_syscall)		/* sys_msync */
-	CALL(sys_readv)			/* 145 */
-	CALL(sys_writev)
-	CALL(sys_getsid)
-	CALL(sys_fdatasync)
-	CALL(sys_sysctl)
-	CALL(sys_ni_syscall)		/* 150 sys_mlock */
-	CALL(sys_ni_syscall)		/* sys_munlock */
-	CALL(sys_ni_syscall)		/* sys_mlockall */
-	CALL(sys_ni_syscall)		/* sys_munlockall */
-	CALL(sys_sched_setparam)
-	CALL(sys_sched_getparam)	/* 155 */
-	CALL(sys_sched_setscheduler)
-	CALL(sys_sched_getscheduler)
-	CALL(sys_sched_yield)
-	CALL(sys_sched_get_priority_max)
-	CALL(sys_sched_get_priority_min)  /* 160 */
-	CALL(sys_sched_rr_get_interval)
-	CALL(sys_nanosleep)
-	CALL(sys_ni_syscall)		/* sys_mremap */
-	CALL(sys_setresuid16)
-	CALL(sys_getresuid16)		/* 165 */
-	CALL(sys_ni_syscall)		/* for vm86 */
-	CALL(sys_ni_syscall)		/* sys_query_module */
-	CALL(sys_poll)
-	CALL(sys_ni_syscall)		/* old nfsservctl */
-	CALL(sys_setresgid16)		/* 170 */
-	CALL(sys_getresgid16)
-	CALL(sys_prctl)
-	CALL(sys_rt_sigreturn)
-	CALL(sys_rt_sigaction)
-	CALL(sys_rt_sigprocmask)	/* 175 */
-	CALL(sys_rt_sigpending)
-	CALL(sys_rt_sigtimedwait)
-	CALL(sys_rt_sigqueueinfo)
-	CALL(sys_rt_sigsuspend)
-	CALL(sys_pread64)		/* 180 */
-	CALL(sys_pwrite64)
-	CALL(sys_lchown16);
-	CALL(sys_getcwd)
-	CALL(sys_capget)
-	CALL(sys_capset)		/* 185 */
-	CALL(sys_sigaltstack)
-	CALL(sys_sendfile)
-	CALL(sys_ni_syscall)		/* streams1 */
-	CALL(sys_ni_syscall)		/* streams2 */
-	CALL(sys_vfork)			/* 190 */
-	CALL(sys_getrlimit)
-	CALL(sys_mmap_pgoff)
-	CALL(sys_truncate64)
-	CALL(sys_ftruncate64)
-	CALL(sys_stat64)		/* 195 */
-	CALL(sys_lstat64)
-	CALL(sys_fstat64)
-	CALL(sys_chown)
-	CALL(sys_getuid)
-	CALL(sys_getgid)		/* 200 */
-	CALL(sys_geteuid)
-	CALL(sys_getegid)
-	CALL(sys_setreuid)
-	CALL(sys_setregid)
-	CALL(sys_getgroups)		/* 205 */
-	CALL(sys_setgroups)
-	CALL(sys_fchown)
-	CALL(sys_setresuid)
-	CALL(sys_getresuid)
-	CALL(sys_setresgid)		/* 210 */
-	CALL(sys_getresgid)
-	CALL(sys_lchown)
-	CALL(sys_setuid)
-	CALL(sys_setgid)
-	CALL(sys_setfsuid)		/* 215 */
-	CALL(sys_setfsgid)
-	CALL(sys_pivot_root)
-	CALL(sys_ni_syscall)
-	CALL(sys_ni_syscall)
-	CALL(sys_getdents64)		/* 220 */
-	CALL(sys_fcntl64)
-	CALL(sys_ni_syscall)		/* reserved TUX */
-	CALL(sys_ni_syscall)		/* reserved Security */
-	CALL(sys_gettid)
-	CALL(sys_readahead)		/* 225 */
-	CALL(sys_setxattr)
-	CALL(sys_lsetxattr)
-	CALL(sys_fsetxattr)
-	CALL(sys_getxattr)
-	CALL(sys_lgetxattr)		/* 230 */
-	CALL(sys_fgetxattr)
-	CALL(sys_listxattr)
-	CALL(sys_llistxattr)
-	CALL(sys_flistxattr)
-	CALL(sys_removexattr)		/* 235 */
-	CALL(sys_lremovexattr)
-	CALL(sys_fremovexattr)
-	CALL(sys_tkill)
-	CALL(sys_sendfile64)
-	CALL(sys_futex)			/* 240 */
-	CALL(sys_sched_setaffinity)
-	CALL(sys_sched_getaffinity)
-	CALL(sys_ni_syscall)
-	CALL(sys_ni_syscall)
-	CALL(sys_io_setup)		/* 245 */
-	CALL(sys_io_destroy)
-	CALL(sys_io_getevents)
-	CALL(sys_io_submit)
-	CALL(sys_io_cancel)
-	CALL(sys_fadvise64)		/* 250 */
-	CALL(sys_ni_syscall)
-	CALL(sys_exit_group)
-	CALL(sys_lookup_dcookie)
-	CALL(sys_epoll_create)
-	CALL(sys_epoll_ctl)		/* 255 */
-	CALL(sys_epoll_wait)
-	CALL(sys_ni_syscall)		/* sys_remap_file_pages */
-	CALL(sys_set_tid_address)
-	CALL(sys_timer_create)
-	CALL(sys_timer_settime)		/* 260 */
-	CALL(sys_timer_gettime)
-	CALL(sys_timer_getoverrun)
-	CALL(sys_timer_delete)
-	CALL(sys_clock_settime)
-	CALL(sys_clock_gettime)		/* 265 */
-	CALL(sys_clock_getres)
-	CALL(sys_clock_nanosleep)
-	CALL(sys_statfs64)
-	CALL(sys_fstatfs64)
-	CALL(sys_tgkill)		/* 270 */
-	CALL(sys_utimes)
-	CALL(sys_fadvise64_64)
-	CALL(sys_ni_syscall)		/* sys_vserver */
-	CALL(sys_ni_syscall)
-	CALL(sys_get_mempolicy)		/* 275 */
-	CALL(sys_set_mempolicy)
-	CALL(sys_mq_open)
-	CALL(sys_mq_unlink)
-	CALL(sys_mq_timedsend)
-	CALL(sys_mq_timedreceive)	/* 280 */
-	CALL(sys_mq_notify)
-	CALL(sys_mq_getsetattr)
-	CALL(sys_waitid)
-	CALL(sys_ni_syscall)		/* sys_kexec_load */
-	CALL(sys_add_key) 		/* 285 */
-	CALL(sys_request_key)
-	CALL(sys_keyctl)
-	CALL(sys_ioprio_set)
-	CALL(sys_ioprio_get)		/* 290 */
-	CALL(sys_inotify_init)
-	CALL(sys_inotify_add_watch)
-	CALL(sys_inotify_rm_watch)
-	CALL(sys_migrate_pages)
-	CALL(sys_openat)		/* 295 */
-	CALL(sys_mkdirat)
-	CALL(sys_mknodat)
-	CALL(sys_fchownat)
-	CALL(sys_futimesat)
-	CALL(sys_fstatat64)		/* 300 */
-	CALL(sys_unlinkat)
-	CALL(sys_renameat)
-	CALL(sys_linkat)
-	CALL(sys_symlinkat)
-	CALL(sys_readlinkat)		/* 305 */
-	CALL(sys_fchmodat)
-	CALL(sys_faccessat)
-	CALL(sys_ni_syscall)		/* sys_pselect6 */
-	CALL(sys_ni_syscall)		/* sys_ppoll */
-	CALL(sys_unshare)		/* 310 */
-	CALL(sys_set_robust_list)
-	CALL(sys_get_robust_list)
-	CALL(sys_splice)
-	CALL(sys_sync_file_range)
-	CALL(sys_tee)			/* 315 */
-	CALL(sys_vmsplice)
-	CALL(sys_ni_syscall)		/* sys_move_pages */
-	CALL(sys_getcpu)
-	CALL(sys_ni_syscall)		/* sys_epoll_pwait */
-	CALL(sys_setns)			/* 320 */
diff --git a/arch/h8300/kernel/time.c b/arch/h8300/kernel/time.c
deleted file mode 100644
index e0f74191d553..000000000000
--- a/arch/h8300/kernel/time.c
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- *  linux/arch/h8300/kernel/time.c
- *
- *  Yoshinori Sato <ysato@users.sourceforge.jp>
- *
- *  Copied/hacked from:
- *
- *  linux/arch/m68k/kernel/time.c
- *
- *  Copyright (C) 1991, 1992, 1995  Linus Torvalds
- *
- * This file contains the m68k-specific time handling details.
- * Most of the stuff is located in the machine specific files.
- *
- * 1997-09-10	Updated NTP code according to technical memorandum Jan '96
- *		"A Kernel Model for Precision Timekeeping" by Dave Mills
- */
-
-#include <linux/errno.h>
-#include <linux/module.h>
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/param.h>
-#include <linux/string.h>
-#include <linux/mm.h>
-#include <linux/timex.h>
-#include <linux/profile.h>
-
-#include <asm/io.h>
-#include <asm/irq_regs.h>
-#include <asm/timer.h>
-
-#define	TICK_SIZE (tick_nsec / 1000)
-
-void h8300_timer_tick(void)
-{
-	if (current->pid)
-		profile_tick(CPU_PROFILING);
-	xtime_update(1);
-	update_process_times(user_mode(get_irq_regs()));
-}
-
-void read_persistent_clock(struct timespec *ts)
-{
-	unsigned int year, mon, day, hour, min, sec;
-
-	/* FIX by dqg : Set to zero for platforms that don't have tod */
-	/* without this time is undefined and can overflow time_t, causing  */
-	/* very strange errors */
-	year = 1980;
-	mon = day = 1;
-	hour = min = sec = 0;
-#ifdef CONFIG_H8300_GETTOD
-	h8300_gettod (&year, &mon, &day, &hour, &min, &sec);
-#endif
-	if ((year += 1900) < 1970)
-		year += 100;
-	ts->tv_sec = mktime(year, mon, day, hour, min, sec);
-	ts->tv_nsec = 0;
-}
-
-void __init time_init(void)
-{
-
-	h8300_timer_setup();
-}
diff --git a/arch/h8300/kernel/timer/Makefile b/arch/h8300/kernel/timer/Makefile
deleted file mode 100644
index bef0510ea6ad..000000000000
--- a/arch/h8300/kernel/timer/Makefile
+++ /dev/null
@@ -1,6 +0,0 @@
-# h8300 internal timer handler
-
-obj-$(CONFIG_H8300_TIMER8)  := timer8.o
-obj-$(CONFIG_H8300_TIMER16) := timer16.o
-obj-$(CONFIG_H8300_ITU)     := itu.o
-obj-$(CONFIG_H8300_TPU)     := tpu.o
diff --git a/arch/h8300/kernel/timer/itu.c b/arch/h8300/kernel/timer/itu.c
deleted file mode 100644
index 0a8b5cd5bf38..000000000000
--- a/arch/h8300/kernel/timer/itu.c
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- *  linux/arch/h8300/kernel/timer/itu.c
- *
- *  Yoshinori Sato <ysato@users.sourcefoge.jp>
- *
- *  ITU Timer Handler
- *
- */
-
-#include <linux/errno.h>
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/param.h>
-#include <linux/string.h>
-#include <linux/mm.h>
-#include <linux/interrupt.h>
-#include <linux/init.h>
-#include <linux/timex.h>
-
-#include <asm/segment.h>
-#include <asm/io.h>
-#include <asm/irq.h>
-#include <asm/regs306x.h>
-
-#if CONFIG_H8300_ITU_CH == 0
-#define ITUBASE	0xffff64
-#define ITUIRQ	24
-#elif CONFIG_H8300_ITU_CH == 1
-#define ITUBASE	0xffff6e
-#define ITUIRQ	28
-#elif CONFIG_H8300_ITU_CH == 2
-#define ITUBASE	0xffff78
-#define ITUIRQ	32
-#elif CONFIG_H8300_ITU_CH == 3
-#define ITUBASE	0xffff82
-#define ITUIRQ	36
-#elif CONFIG_H8300_ITU_CH == 4
-#define ITUBASE	0xffff92
-#define ITUIRQ	40
-#else
-#error Unknown timer channel.
-#endif
-
-#define TCR	0
-#define TIOR	1
-#define TIER	2
-#define TSR	3
-#define TCNT	4
-#define GRA	6
-#define GRB	8
-
-static irqreturn_t timer_interrupt(int irq, void *dev_id)
-{
-	h8300_timer_tick();
-	ctrl_bclr(IMFA, ITUBASE + TSR);
-	return IRQ_HANDLED;
-}
-
-static struct irqaction itu_irq = {
-	.name		= "itu",
-	.handler	= timer_interrupt,
-	.flags		= IRQF_DISABLED | IRQF_TIMER,
-};
-
-static const int __initconst divide_rate[] = {1, 2, 4, 8};
-
-void __init h8300_timer_setup(void)
-{
-	unsigned int div;
-	unsigned int cnt;
-
-	calc_param(cnt, div, divide_rate, 0x10000);
-
-	setup_irq(ITUIRQ, &itu_irq);
-
-	/* initialize timer */
-	ctrl_outb(0, TSTR);
-	ctrl_outb(CCLR0 | div, ITUBASE + TCR);
-	ctrl_outb(0x01, ITUBASE + TIER);
-	ctrl_outw(cnt, ITUBASE + GRA);
-	ctrl_bset(CONFIG_H8300_ITU_CH, TSTR);
-}
diff --git a/arch/h8300/kernel/timer/timer16.c b/arch/h8300/kernel/timer/timer16.c
deleted file mode 100644
index 462d9f581719..000000000000
--- a/arch/h8300/kernel/timer/timer16.c
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- *  linux/arch/h8300/kernel/timer/timer16.c
- *
- *  Yoshinori Sato <ysato@users.sourcefoge.jp>
- *
- *  16bit Timer Handler
- *
- */
-
-#include <linux/errno.h>
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/param.h>
-#include <linux/string.h>
-#include <linux/mm.h>
-#include <linux/interrupt.h>
-#include <linux/init.h>
-#include <linux/timex.h>
-
-#include <asm/segment.h>
-#include <asm/io.h>
-#include <asm/irq.h>
-#include <asm/regs306x.h>
-
-/* 16bit timer */
-#if CONFIG_H8300_TIMER16_CH == 0
-#define _16BASE	0xffff78
-#define _16IRQ	24
-#elif CONFIG_H8300_TIMER16_CH == 1
-#define _16BASE	0xffff80
-#define _16IRQ	28
-#elif CONFIG_H8300_TIMER16_CH == 2
-#define _16BASE	0xffff88
-#define _16IRQ	32
-#else
-#error Unknown timer channel.
-#endif
-
-#define TCR	0
-#define TIOR	1
-#define TCNT	2
-#define GRA	4
-#define GRB	6
-
-#define H8300_TIMER_FREQ CONFIG_CPU_CLOCK*10000 /* Timer input freq. */
-
-static irqreturn_t timer_interrupt(int irq, void *dev_id)
-{
-	h8300_timer_tick();
-	ctrl_bclr(CONFIG_H8300_TIMER16_CH, TISRA);
-	return IRQ_HANDLED;
-}
-
-static struct irqaction timer16_irq = {
-	.name		= "timer-16",
-	.handler	= timer_interrupt,
-	.flags		= IRQF_DISABLED | IRQF_TIMER,
-};
-
-static const int __initconst divide_rate[] = {1, 2, 4, 8};
-
-void __init h8300_timer_setup(void)
-{
-	unsigned int div;
-	unsigned int cnt;
-
-	calc_param(cnt, div, divide_rate, 0x10000);
-
-	setup_irq(_16IRQ, &timer16_irq);
-
-	/* initialize timer */
-	ctrl_outb(0, TSTR);
-	ctrl_outb(CCLR0 | div, _16BASE + TCR);
-	ctrl_outw(cnt, _16BASE + GRA);
-	ctrl_bset(4 + CONFIG_H8300_TIMER16_CH, TISRA);
-	ctrl_bset(CONFIG_H8300_TIMER16_CH, TSTR);
-}
diff --git a/arch/h8300/kernel/timer/timer8.c b/arch/h8300/kernel/timer/timer8.c
deleted file mode 100644
index 505f3415b40f..000000000000
--- a/arch/h8300/kernel/timer/timer8.c
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
- *  linux/arch/h8300/kernel/cpu/timer/timer8.c
- *
- *  Yoshinori Sato <ysato@users.sourcefoge.jp>
- *
- *  8bit Timer Handler
- *
- */
-
-#include <linux/errno.h>
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/param.h>
-#include <linux/string.h>
-#include <linux/mm.h>
-#include <linux/interrupt.h>
-#include <linux/init.h>
-#include <linux/profile.h>
-
-#include <asm/io.h>
-#include <asm/irq.h>
-#include <asm/timer.h>
-#if defined(CONFIG_CPU_H8300H)
-#include <asm/regs306x.h>
-#endif
-#if defined(CONFIG_CPU_H8S)
-#include <asm/regs267x.h>
-#endif
-
-/* 8bit timer x2 */
-#define CMFA	6
-
-#if defined(CONFIG_H8300_TIMER8_CH0)
-#define _8BASE	_8TCR0
-#ifdef CONFIG_CPU_H8300H
-#define _8IRQ	36
-#endif
-#ifdef CONFIG_CPU_H8S
-#define _8IRQ	72
-#endif
-#elif defined(CONFIG_H8300_TIMER8_CH2)
-#ifdef CONFIG_CPU_H8300H
-#define _8BASE	_8TCR2
-#define _8IRQ	40
-#endif
-#endif
-
-#ifndef _8BASE
-#error Unknown timer channel.
-#endif
-
-#define _8TCR	0
-#define _8TCSR	2
-#define TCORA	4
-#define TCORB	6
-#define _8TCNT	8
-
-#define CMIEA	0x40
-#define CCLR_CMA 0x08
-#define CKS2	0x04
-
-/*
- * timer_interrupt() needs to keep up the real-time clock,
- * as well as call the "xtime_update()" routine every clocktick
- */
-
-static irqreturn_t timer_interrupt(int irq, void *dev_id)
-{
-	h8300_timer_tick();
-	ctrl_bclr(CMFA, _8BASE + _8TCSR);
-	return IRQ_HANDLED;
-}
-
-static struct irqaction timer8_irq = {
-	.name		= "timer-8",
-	.handler	= timer_interrupt,
-	.flags		= IRQF_DISABLED | IRQF_TIMER,
-};
-
-static const int __initconst divide_rate[] = {8, 64, 8192};
-
-void __init h8300_timer_setup(void)
-{
-	unsigned int div;
-	unsigned int cnt;
-
-	calc_param(cnt, div, divide_rate, 0x10000);
-	div++;
-
-	setup_irq(_8IRQ, &timer8_irq);
-
-#if defined(CONFIG_CPU_H8S)
-	/* Timer module enable */
-	ctrl_bclr(0, MSTPCRL)
-#endif
-
-	/* initialize timer */
-	ctrl_outw(cnt, _8BASE + TCORA);
-	ctrl_outw(0x0000, _8BASE + _8TCSR);
-	ctrl_outw((CMIEA|CCLR_CMA|CKS2) << 8 | div,
-		  _8BASE + _8TCR);
-}
diff --git a/arch/h8300/kernel/timer/tpu.c b/arch/h8300/kernel/timer/tpu.c
deleted file mode 100644
index 0350f6204ecf..000000000000
--- a/arch/h8300/kernel/timer/tpu.c
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- *  linux/arch/h8300/kernel/timer/tpu.c
- *
- *  Yoshinori Sato <ysato@users.sourceforge.jp>
- *
- *  TPU Timer Handler
- *
- */
-
-#include <linux/errno.h>
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/param.h>
-#include <linux/string.h>
-#include <linux/mm.h>
-#include <linux/interrupt.h>
-#include <linux/init.h>
-#include <linux/timex.h>
-
-#include <asm/segment.h>
-#include <asm/io.h>
-#include <asm/irq.h>
-#include <asm/regs267x.h>
-
-/* TPU */
-#if CONFIG_H8300_TPU_CH == 0
-#define TPUBASE	0xffffd0
-#define TPUIRQ	40
-#elif CONFIG_H8300_TPU_CH == 1
-#define TPUBASE	0xffffe0
-#define TPUIRQ	48
-#elif CONFIG_H8300_TPU_CH == 2
-#define TPUBASE	0xfffff0
-#define TPUIRQ	52
-#elif CONFIG_H8300_TPU_CH == 3
-#define TPUBASE	0xfffe80
-#define TPUIRQ	56
-#elif CONFIG_H8300_TPU_CH == 4
-#define TPUBASE	0xfffe90
-#define TPUIRQ	64
-#else
-#error Unknown timer channel.
-#endif
-
-#define _TCR	0
-#define _TMDR	1
-#define _TIOR	2
-#define _TIER	4
-#define _TSR	5
-#define _TCNT	6
-#define _GRA	8
-#define _GRB	10
-
-#define CCLR0	0x20
-
-static irqreturn_t timer_interrupt(int irq, void *dev_id)
-{
-	h8300_timer_tick();
-	ctrl_bclr(0, TPUBASE + _TSR);
-	return IRQ_HANDLED;
-}
-
-static struct irqaction tpu_irq = {
-	.name		= "tpu",
-	.handler	= timer_interrupt,
-	.flags		= IRQF_DISABLED | IRQF_TIMER,
-};
-
-static const int __initconst divide_rate[] = {
-#if CONFIG_H8300_TPU_CH == 0
-	1,4,16,64,0,0,0,0,
-#elif (CONFIG_H8300_TPU_CH == 1) || (CONFIG_H8300_TPU_CH == 5)
-	1,4,16,64,0,0,256,0,
-#elif (CONFIG_H8300_TPU_CH == 2) || (CONFIG_H8300_TPU_CH == 4)
-	1,4,16,64,0,0,0,1024,
-#elif CONFIG_H8300_TPU_CH == 3
-	1,4,16,64,0,1024,256,4096,
-#endif
-};
-
-void __init h8300_timer_setup(void)
-{
-	unsigned int cnt;
-	unsigned int div;
-
-	calc_param(cnt, div, divide_rate, 0x10000);
-
-	setup_irq(TPUIRQ, &tpu_irq);
-
-	/* TPU module enabled */
-	ctrl_bclr(3, MSTPCRH);
-
-	ctrl_outb(0, TSTR);
-	ctrl_outb(CCLR0 | div, TPUBASE + _TCR);
-	ctrl_outb(0, TPUBASE + _TMDR);
-	ctrl_outw(0, TPUBASE + _TIOR);
-	ctrl_outb(0x01, TPUBASE + _TIER);
-	ctrl_outw(cnt, TPUBASE + _GRA);
-	ctrl_bset(CONFIG_H8300_TPU_CH, TSTR);
-}
diff --git a/arch/h8300/kernel/traps.c b/arch/h8300/kernel/traps.c
deleted file mode 100644
index cfe494dbe3da..000000000000
--- a/arch/h8300/kernel/traps.c
+++ /dev/null
@@ -1,166 +0,0 @@
-/*
- * linux/arch/h8300/boot/traps.c -- general exception handling code
- * H8/300 support Yoshinori Sato <ysato@users.sourceforge.jp>
- * 
- * Cloned from Linux/m68k.
- *
- * No original Copyright holder listed,
- * Probable original (C) Roman Zippel (assigned DJD, 1999)
- *
- * Copyright 1999-2000 D. Jeff Dionne, <jeff@rt-control.com>
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file COPYING in the main directory of this archive
- * for more details.
- */
-
-#include <linux/types.h>
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/bug.h>
-
-#include <asm/irq.h>
-#include <asm/traps.h>
-#include <asm/page.h>
-
-static DEFINE_SPINLOCK(die_lock);
-
-/*
- * this must be called very early as the kernel might
- * use some instruction that are emulated on the 060
- */
-
-void __init base_trap_init(void)
-{
-}
-
-void __init trap_init (void)
-{
-}
-
-asmlinkage void set_esp0 (unsigned long ssp)
-{
-	current->thread.esp0 = ssp;
-}
-
-/*
- *	Generic dumping code. Used for panic and debug.
- */
-
-static void dump(struct pt_regs *fp)
-{
-	unsigned long	*sp;
-	unsigned char	*tp;
-	int		i;
-
-	printk("\nCURRENT PROCESS:\n\n");
-	printk("COMM=%s PID=%d\n", current->comm, current->pid);
-	if (current->mm) {
-		printk("TEXT=%08x-%08x DATA=%08x-%08x BSS=%08x-%08x\n",
-			(int) current->mm->start_code,
-			(int) current->mm->end_code,
-			(int) current->mm->start_data,
-			(int) current->mm->end_data,
-			(int) current->mm->end_data,
-			(int) current->mm->brk);
-		printk("USER-STACK=%08x  KERNEL-STACK=%08lx\n\n",
-			(int) current->mm->start_stack,
-			(int) PAGE_SIZE+(unsigned long)current);
-	}
-
-	show_regs(fp);
-	printk("\nCODE:");
-	tp = ((unsigned char *) fp->pc) - 0x20;
-	for (sp = (unsigned long *) tp, i = 0; (i < 0x40);  i += 4) {
-		if ((i % 0x10) == 0)
-			printk("\n%08x: ", (int) (tp + i));
-		printk("%08x ", (int) *sp++);
-	}
-	printk("\n");
-
-	printk("\nKERNEL STACK:");
-	tp = ((unsigned char *) fp) - 0x40;
-	for (sp = (unsigned long *) tp, i = 0; (i < 0xc0); i += 4) {
-		if ((i % 0x10) == 0)
-			printk("\n%08x: ", (int) (tp + i));
-		printk("%08x ", (int) *sp++);
-	}
-	printk("\n");
-	if (STACK_MAGIC != *(unsigned long *)((unsigned long)current+PAGE_SIZE))
-                printk("(Possibly corrupted stack page??)\n");
-
-	printk("\n\n");
-}
-
-void die(const char *str, struct pt_regs *fp, unsigned long err)
-{
-	static int diecount;
-
-	oops_enter();
-
-	console_verbose();
-	spin_lock_irq(&die_lock);
-	report_bug(fp->pc, fp);
-	printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++diecount);
-	dump(fp);
-
-	spin_unlock_irq(&die_lock);
-	do_exit(SIGSEGV);
-}
-
-extern char _start, _etext;
-#define check_kernel_text(addr) \
-        ((addr >= (unsigned long)(&_start)) && \
-         (addr <  (unsigned long)(&_etext))) 
-
-static int kstack_depth_to_print = 24;
-
-void show_stack(struct task_struct *task, unsigned long *esp)
-{
-	unsigned long *stack,  addr;
-	int i;
-
-	if (esp == NULL)
-		esp = (unsigned long *) &esp;
-
-	stack = esp;
-
-	printk("Stack from %08lx:", (unsigned long)stack);
-	for (i = 0; i < kstack_depth_to_print; i++) {
-		if (((unsigned long)stack & (THREAD_SIZE - 1)) == 0)
-			break;
-		if (i % 8 == 0)
-			printk("\n       ");
-		printk(" %08lx", *stack++);
-	}
-
-	printk("\nCall Trace:");
-	i = 0;
-	stack = esp;
-	while (((unsigned long)stack & (THREAD_SIZE - 1)) != 0) {
-		addr = *stack++;
-		/*
-		 * If the address is either in the text segment of the
-		 * kernel, or in the region which contains vmalloc'ed
-		 * memory, it *may* be the address of a calling
-		 * routine; if so, print it so that someone tracing
-		 * down the cause of the crash will be able to figure
-		 * out the call path that was taken.
-		 */
-		if (check_kernel_text(addr)) {
-			if (i % 4 == 0)
-				printk("\n       ");
-			printk(" [<%08lx>]", addr);
-			i++;
-		}
-	}
-	printk("\n");
-}
-
-void show_trace_task(struct task_struct *tsk)
-{
-	show_stack(tsk,(unsigned long *)tsk->thread.esp0);
-}
diff --git a/arch/h8300/kernel/vmlinux.lds.S b/arch/h8300/kernel/vmlinux.lds.S
deleted file mode 100644
index 3253fed42ac1..000000000000
--- a/arch/h8300/kernel/vmlinux.lds.S
+++ /dev/null
@@ -1,157 +0,0 @@
-#include <asm-generic/vmlinux.lds.h>
-#include <asm/page.h>
-
-/* target memory map */
-#ifdef CONFIG_H8300H_GENERIC
-#define ROMTOP  0x000000
-#define ROMSIZE 0x400000
-#define RAMTOP  0x400000
-#define RAMSIZE 0x400000
-#endif
-
-#ifdef CONFIG_H8300H_AKI3068NET
-#define ROMTOP  0x000000
-#define ROMSIZE 0x080000
-#define RAMTOP  0x400000
-#define RAMSIZE 0x200000
-#endif
-
-#ifdef CONFIG_H8300H_H8MAX
-#define ROMTOP  0x000000
-#define ROMSIZE 0x080000
-#define RAMTOP  0x400000
-#define RAMSIZE 0x200000
-#endif
-
-#ifdef CONFIG_H8300H_SIM
-#define ROMTOP  0x000000
-#define ROMSIZE 0x400000
-#define RAMTOP  0x400000
-#define RAMSIZE 0x400000
-#endif
-
-#ifdef CONFIG_H8S_SIM
-#define ROMTOP  0x000000
-#define ROMSIZE 0x400000
-#define RAMTOP  0x400000
-#define RAMSIZE 0x800000
-#endif
-
-#ifdef CONFIG_H8S_EDOSK2674
-#define ROMTOP  0x000000
-#define ROMSIZE 0x400000
-#define RAMTOP  0x400000
-#define RAMSIZE 0x800000
-#endif
-
-#if defined(CONFIG_H8300H_SIM) || defined(CONFIG_H8S_SIM)
-INPUT(romfs.o)
-#endif
-
-_jiffies = _jiffies_64 + 4;
-
-ENTRY(__start)
-
-SECTIONS
-{
-#if defined(CONFIG_ROMKERNEL)
-	. = ROMTOP; 
-	.vectors :
-	{
-	__vector = . ;
-		*(.vectors*)
-	}
-#else
-	. = RAMTOP; 
-	.bootvec :	
-	{
-		*(.bootvec)
-	}
-#endif
-        .text :
-	{
-	_text = .;
-#if defined(CONFIG_ROMKERNEL)
-	*(.int_redirect)
-#endif
-	__stext = . ;
-	TEXT_TEXT
-	SCHED_TEXT
-	LOCK_TEXT
-	__etext = . ;
-	}
-	EXCEPTION_TABLE(16)
-
-	RODATA
-#if defined(CONFIG_ROMKERNEL)
-	SECURITY_INIT
-#endif
-	ROEND = .; 
-#if defined(CONFIG_ROMKERNEL)
-	. = RAMTOP;
-	.data :	AT(ROEND)
-#else
-	.data :	
-#endif
-	{
-	__sdata = . ;
-	___data_start = . ;
-
-	INIT_TASK_DATA(0x2000)
-	. = ALIGN(0x4) ;
-		DATA_DATA
-	. = ALIGN(0x4) ;
-		*(.data.*)	
-
-	. = ALIGN(0x4) ;
-	___init_begin = .;
-	__sinittext = .; 
-		INIT_TEXT
-	__einittext = .; 
-		INIT_DATA
-	. = ALIGN(0x4) ;
-	INIT_SETUP(0x4)
-	___setup_start = .;
-		*(.init.setup)
-	. = ALIGN(0x4) ;
-	___setup_end = .;
-	INIT_CALLS
-	CON_INITCALL
-		EXIT_TEXT
-		EXIT_DATA
-	INIT_RAM_FS
-	. = ALIGN(0x4) ;
-	___init_end = .;
-	__edata = . ;
-	}
-#if defined(CONFIG_RAMKERNEL)
-	SECURITY_INIT
-#endif
-	__begin_data = LOADADDR(.data);
-        .bss : 
-        {
-	. = ALIGN(0x4) ;
-	__sbss = . ;
-	___bss_start = . ;
-		*(.bss*)
-	. = ALIGN(0x4) ;
-		*(COMMON)
-	. = ALIGN(0x4) ;
-	___bss_stop = . ;
-	__ebss = . ;
-	__end = . ;
-	__ramstart = .;
-	}
-        .romfs :	
-	{
-		*(.romfs*)
-	}
-	. = RAMTOP+RAMSIZE;
-        .dummy :
-        {
-	COMMAND_START = . - 0x200 ;
-	__ramend = . ;
-	}
-
-	DISCARDS
-}
diff --git a/arch/h8300/lib/Makefile b/arch/h8300/lib/Makefile
deleted file mode 100644
index 1577f5075b10..000000000000
--- a/arch/h8300/lib/Makefile
+++ /dev/null
@@ -1,5 +0,0 @@
-#
-# Makefile for H8/300-specific library files..
-#
-
-lib-y  = ashrdi3.o checksum.o memcpy.o memset.o abs.o romfs.o
diff --git a/arch/h8300/lib/abs.S b/arch/h8300/lib/abs.S
deleted file mode 100644
index ddd1fb3d01ad..000000000000
--- a/arch/h8300/lib/abs.S
+++ /dev/null
@@ -1,21 +0,0 @@
-;;; abs.S
-
-#include <asm/linkage.h>
-
-#if defined(__H8300H__) 
-	.h8300h
-#endif
-#if defined(__H8300S__) 
-	.h8300s
-#endif
-	.text
-.global _abs
-
-;;; int abs(int n)
-_abs:
-	mov.l	er0,er0
-	bpl	1f
-	neg.l	er0
-1:
-	rts
-	
diff --git a/arch/h8300/lib/ashrdi3.c b/arch/h8300/lib/ashrdi3.c
deleted file mode 100644
index 78efb65e315a..000000000000
--- a/arch/h8300/lib/ashrdi3.c
+++ /dev/null
@@ -1,63 +0,0 @@
-/* ashrdi3.c extracted from gcc-2.7.2/libgcc2.c which is: */
-/* Copyright (C) 1989, 1992, 1993, 1994, 1995 Free Software Foundation, Inc.
-
-This file is part of GNU CC.
-
-GNU CC is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2, or (at your option)
-any later version.
-
-GNU CC is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with GNU CC; see the file COPYING.  If not, write to
-the Free Software Foundation, 59 Temple Place - Suite 330,
-Boston, MA 02111-1307, USA.  */
-
-#define BITS_PER_UNIT 8
-
-typedef 	 int SItype	__attribute__ ((mode (SI)));
-typedef unsigned int USItype	__attribute__ ((mode (SI)));
-typedef		 int DItype	__attribute__ ((mode (DI)));
-typedef int word_type __attribute__ ((mode (__word__)));
-
-struct DIstruct {SItype high, low;};
-
-typedef union
-{
-  struct DIstruct s;
-  DItype ll;
-} DIunion;
-
-DItype
-__ashrdi3 (DItype u, word_type b)
-{
-  DIunion w;
-  word_type bm;
-  DIunion uu;
-
-  if (b == 0)
-    return u;
-
-  uu.ll = u;
-
-  bm = (sizeof (SItype) * BITS_PER_UNIT) - b;
-  if (bm <= 0)
-    {
-      /* w.s.high = 1..1 or 0..0 */
-      w.s.high = uu.s.high >> (sizeof (SItype) * BITS_PER_UNIT - 1);
-      w.s.low = uu.s.high >> -bm;
-    }
-  else
-    {
-      USItype carries = (USItype)uu.s.high << bm;
-      w.s.high = uu.s.high >> b;
-      w.s.low = ((USItype)uu.s.low >> b) | carries;
-    }
-
-  return w.ll;
-}
diff --git a/arch/h8300/lib/checksum.c b/arch/h8300/lib/checksum.c
deleted file mode 100644
index bdc5b032acd6..000000000000
--- a/arch/h8300/lib/checksum.c
+++ /dev/null
@@ -1,164 +0,0 @@
-/*
- * INET		An implementation of the TCP/IP protocol suite for the LINUX
- *		operating system.  INET is implemented using the  BSD Socket
- *		interface as the means of communication with the user level.
- *
- *		IP/TCP/UDP checksumming routines
- *
- * Authors:	Jorge Cwik, <jorge@laser.satlink.net>
- *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
- *		Tom May, <ftom@netcom.com>
- *		Andreas Schwab, <schwab@issan.informatik.uni-dortmund.de>
- *		Lots of code moved from tcp.c and ip.c; see those files
- *		for more names.
- *
- * 03/02/96	Jes Sorensen, Andreas Schwab, Roman Hodek:
- *		Fixed some nasty bugs, causing some horrible crashes.
- *		A: At some points, the sum (%0) was used as
- *		length-counter instead of the length counter
- *		(%1). Thanks to Roman Hodek for pointing this out.
- *		B: GCC seems to mess up if one uses too many
- *		data-registers to hold input values and one tries to
- *		specify d0 and d1 as scratch registers. Letting gcc choose these
- *      registers itself solves the problem.
- *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- */
- 
-/* Revised by Kenneth Albanowski for m68knommu. Basic problem: unaligned access kills, so most
-   of the assembly has to go. */
-
-#include <net/checksum.h>
-#include <linux/module.h>
-
-static inline unsigned short from32to16(unsigned long x)
-{
-	/* add up 16-bit and 16-bit for 16+c bit */
-	x = (x & 0xffff) + (x >> 16);
-	/* add up carry.. */
-	x = (x & 0xffff) + (x >> 16);
-	return x;
-}
-
-static unsigned long do_csum(const unsigned char * buff, int len)
-{
-	int odd, count;
-	unsigned long result = 0;
-
-	if (len <= 0)
-		goto out;
-	odd = 1 & (unsigned long) buff;
-	if (odd) {
-		result = *buff;
-		len--;
-		buff++;
-	}
-	count = len >> 1;		/* nr of 16-bit words.. */
-	if (count) {
-		if (2 & (unsigned long) buff) {
-			result += *(unsigned short *) buff;
-			count--;
-			len -= 2;
-			buff += 2;
-		}
-		count >>= 1;		/* nr of 32-bit words.. */
-		if (count) {
-		        unsigned long carry = 0;
-			do {
-				unsigned long w = *(unsigned long *) buff;
-				count--;
-				buff += 4;
-				result += carry;
-				result += w;
-				carry = (w > result);
-			} while (count);
-			result += carry;
-			result = (result & 0xffff) + (result >> 16);
-		}
-		if (len & 2) {
-			result += *(unsigned short *) buff;
-			buff += 2;
-		}
-	}
-	if (len & 1)
-		result += (*buff << 8);
-	result = from32to16(result);
-	if (odd)
-		result = ((result >> 8) & 0xff) | ((result & 0xff) << 8);
-out:
-	return result;
-}
-
-/*
- *	This is a version of ip_compute_csum() optimized for IP headers,
- *	which always checksum on 4 octet boundaries.
- */
-__sum16 ip_fast_csum(const void *iph, unsigned int ihl)
-{
-	return (__force __sum16)~do_csum(iph,ihl*4);
-}
-
-/*
- * computes the checksum of a memory block at buff, length len,
- * and adds in "sum" (32-bit)
- *
- * returns a 32-bit number suitable for feeding into itself
- * or csum_tcpudp_magic
- *
- * this function must be called with even lengths, except
- * for the last fragment, which may be odd
- *
- * it's best to have buff aligned on a 32-bit boundary
- */
-/*
- * Egads...  That thing apparently assumes that *all* checksums it ever sees will
- * be folded.  Very likely a bug.
- */
-__wsum csum_partial(const void *buff, int len, __wsum sum)
-{
-	unsigned int result = do_csum(buff, len);
-
-	/* add in old sum, and carry.. */
-	result += (__force u32)sum;
-	/* 16+c bits -> 16 bits */
-	result = (result & 0xffff) + (result >> 16);
-	return (__force __wsum)result;
-}
-
-EXPORT_SYMBOL(csum_partial);
-
-/*
- * this routine is used for miscellaneous IP-like checksums, mainly
- * in icmp.c
- */
-__sum16 ip_compute_csum(const void *buff, int len)
-{
-	return (__force __sum16)~do_csum(buff,len);
-}
-
-/*
- * copy from fs while checksumming, otherwise like csum_partial
- */
-
-__wsum
-csum_partial_copy_from_user(const void __user *src, void *dst, int len,
-			    __wsum sum, int *csum_err)
-{
-	if (csum_err) *csum_err = 0;
-	memcpy(dst, (__force const void *)src, len);
-	return csum_partial(dst, len, sum);
-}
-
-/*
- * copy from ds while checksumming, otherwise like csum_partial
- */
-
-__wsum
-csum_partial_copy_nocheck(const void *src, void *dst, int len, __wsum sum)
-{
-	memcpy(dst, src, len);
-	return csum_partial(dst, len, sum);
-}
diff --git a/arch/h8300/lib/memcpy.S b/arch/h8300/lib/memcpy.S
deleted file mode 100644
index cad325e2c0e8..000000000000
--- a/arch/h8300/lib/memcpy.S
+++ /dev/null
@@ -1,84 +0,0 @@
-;;; memcpy.S
-
-#include <asm/linkage.h>
-
-#if defined(__H8300H__) 
-	.h8300h
-#endif
-#if defined(__H8300S__) 
-	.h8300s
-#endif
-
-	.text
-.global _memcpy
-
-;;; void *memcpy(void *to, void *from, size_t n)
-_memcpy:
-	mov.l	er2,er2
-	bne	1f
-	rts	
-1:	
-	;; address check
-	bld	#0,r0l
-	bxor	#0,r1l
-	bcs	4f
-	mov.l	er4,@-sp
-	mov.l	er0,@-sp
-	btst	#0,r0l
-	beq	1f
-	;; (aligned even) odd address
-	mov.b	@er1,r3l
-	mov.b	r3l,@er0
-	adds	#1,er1
-	adds	#1,er0
-	dec.l	#1,er2
-	beq	3f
-1:	
-	;; n < sizeof(unsigned long) check
-	sub.l	er4,er4
-	adds	#4,er4		; loop count check value
-	cmp.l	er4,er2
-	blo	2f
-	;; unsigned long copy
-1:	
-	mov.l	@er1,er3
-	mov.l	er3,@er0
-	adds	#4,er0
-	adds	#4,er1
-	subs	#4,er2
-	cmp.l	er4,er2
-	bcc	1b	
-	;; rest
-2:	
-	mov.l	er2,er2
-	beq	3f
-1:	
-	mov.b	@er1,r3l
-	mov.b	r3l,@er0
-	adds	#1,er1
-	adds	#1,er0
-	dec.l	#1,er2
-	bne	1b
-3:
-	mov.l	@sp+,er0
-	mov.l	@sp+,er4
-	rts
-
-	;; odd <- even / even <- odd
-4:	
-	mov.l	er4,er3
-	mov.l	er2,er4
-	mov.l	er5,er2
-	mov.l	er1,er5
-	mov.l	er6,er1
-	mov.l	er0,er6
-1:
-	eepmov.w
-	mov.w	r4,r4
-	bne	1b
-	dec.w	#1,e4
-	bpl	1b
-	mov.l	er1,er6
-	mov.l	er2,er5
-	mov.l	er3,er4
-	rts
diff --git a/arch/h8300/lib/memset.S b/arch/h8300/lib/memset.S
deleted file mode 100644
index 4549a64c5b79..000000000000
--- a/arch/h8300/lib/memset.S
+++ /dev/null
@@ -1,61 +0,0 @@
-/* memset.S */
-
-#include <asm/linkage.h>
-
-#if defined(__H8300H__) 
-	.h8300h
-#endif
-#if defined(__H8300S__) 
-	.h8300s
-#endif
-	.text
-
-.global	_memset
-
-;;void *memset(*ptr, int c, size_t count)
-;; ptr = er0
-;; c   = er1(r1l)
-;; count = er2
-_memset:
-	btst	#0,r0l
-	beq	2f
-
-	;; odd address
-1:
-	mov.b	r1l,@er0
-	adds	#1,er0
-	dec.l	#1,er2
-	beq	6f
-
-	;; even address
-2:
-	mov.l	er2,er3
-	cmp.l	#4,er2
-	blo	4f
-	;; count>=4 -> count/4
-#if defined(__H8300H__)
-	shlr.l	er2
-	shlr.l	er2
-#endif
-#if defined(__H8300S__)
-	shlr.l	#2,er2
-#endif
-	;; byte -> long
-	mov.b	r1l,r1h
-	mov.w	r1,e1
-3:
-	mov.l	er1,@er0
-	adds	#4,er0
-	dec.l	#1,er2
-	bne	3b
-4:
-	;; count % 4
-	and.b	#3,r3l
-	beq	6f
-5:
-	mov.b	r1l,@er0
-	adds	#1,er0
-	dec.b	r3l
-	bne	5b
-6:
-	rts
diff --git a/arch/h8300/lib/romfs.S b/arch/h8300/lib/romfs.S
deleted file mode 100644
index 68910d8e1ff4..000000000000
--- a/arch/h8300/lib/romfs.S
+++ /dev/null
@@ -1,57 +0,0 @@
-/* romfs move to __ebss */
-
-#include <asm/linkage.h>
-
-#if defined(__H8300H__) 
-	.h8300h
-#endif
-#if defined(__H8300S__) 
-	.h8300s
-#endif
-
-#define BLKOFFSET 512
-
-	.text
-.globl	__move_romfs
-_romfs_sig_len = 8
-
-__move_romfs:	
-	mov.l	#__sbss,er0
-	mov.l	#_romfs_sig,er1
-	mov.b	#_romfs_sig_len,r3l
-1:					/* check romfs image */
-	mov.b	@er0+,r2l
-	mov.b	@er1+,r2h
-	cmp.b	r2l,r2h
-	bne	2f
-	dec.b	r3l
-	bne	1b
-
-	/* find romfs image */
-	mov.l	@__sbss+8,er0		/* romfs length(be) */
-	mov.l	#__sbss,er1
-	add.l	er0,er1			/* romfs image end */
-	mov.l	#__ebss,er2
-	add.l	er0,er2			/* distination address */
-#if defined(CONFIG_INTELFLASH)
-	add.l	#BLKOFFSET,er2
-#endif
-	adds	#2,er0
-	adds	#1,er0
-	shlr	er0
-	shlr	er0			/* transfer length */
-1:
-	mov.l	@er1,er3		/* copy image */
-	mov.l	er3,@er2
-	subs	#4,er1
-	subs	#4,er2
-	dec.l	#1,er0
-	bpl	1b
-2:
-	rts
-
-	.section	.rodata
-_romfs_sig:	
-	.ascii	"-rom1fs-"
-
-	.end
diff --git a/arch/h8300/mm/Makefile b/arch/h8300/mm/Makefile
deleted file mode 100644
index 5f4bc42b6453..000000000000
--- a/arch/h8300/mm/Makefile
+++ /dev/null
@@ -1,5 +0,0 @@
-#
-# Makefile for the linux m68k-specific parts of the memory manager.
-#
-
-obj-y	 := init.o fault.o memory.o kmap.o
diff --git a/arch/h8300/mm/fault.c b/arch/h8300/mm/fault.c
deleted file mode 100644
index 472535977006..000000000000
--- a/arch/h8300/mm/fault.c
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- *  linux/arch/h8300/mm/fault.c
- *
- *  Copyright (C) 1998  D. Jeff Dionne <jeff@lineo.ca>,
- *  Copyright (C) 2000  Lineo, Inc.  (www.lineo.com) 
- *
- *  Based on:
- *
- *  linux/arch/m68knommu/mm/fault.c
- *  linux/arch/m68k/mm/fault.c
- *
- *  Copyright (C) 1995  Hamish Macdonald
- */
-
-#include <linux/mman.h>
-#include <linux/mm.h>
-#include <linux/kernel.h>
-#include <linux/ptrace.h>
-
-#include <asm/pgtable.h>
-
-/*
- * This routine handles page faults.  It determines the problem, and
- * then passes it off to one of the appropriate routines.
- *
- * error_code:
- *	bit 0 == 0 means no page found, 1 means protection fault
- *	bit 1 == 0 means read, 1 means write
- *
- * If this routine detects a bad access, it returns 1, otherwise it
- * returns 0.
- */
-asmlinkage int do_page_fault(struct pt_regs *regs, unsigned long address,
-			      unsigned long error_code)
-{
-#ifdef DEBUG
-	printk ("regs->sr=%#x, regs->pc=%#lx, address=%#lx, %ld\n",
-		regs->sr, regs->pc, address, error_code);
-#endif
-
-/*
- * Oops. The kernel tried to access some bad page. We'll have to
- * terminate things with extreme prejudice.
- */
-	if ((unsigned long) address < PAGE_SIZE) {
-		printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference");
-	} else
-		printk(KERN_ALERT "Unable to handle kernel access");
-	printk(" at virtual address %08lx\n",address);
-	if (!user_mode(regs))
-		die("Oops", regs, error_code);
-	do_exit(SIGKILL);
-
-	return 1;
-}
-
diff --git a/arch/h8300/mm/init.c b/arch/h8300/mm/init.c
deleted file mode 100644
index 6c1251e491af..000000000000
--- a/arch/h8300/mm/init.c
+++ /dev/null
@@ -1,155 +0,0 @@
-/*
- *  linux/arch/h8300/mm/init.c
- *
- *  Copyright (C) 1998  D. Jeff Dionne <jeff@lineo.ca>,
- *                      Kenneth Albanowski <kjahds@kjahds.com>,
- *  Copyright (C) 2000  Lineo, Inc.  (www.lineo.com) 
- *
- *  Based on:
- *
- *  linux/arch/m68knommu/mm/init.c
- *  linux/arch/m68k/mm/init.c
- *
- *  Copyright (C) 1995  Hamish Macdonald
- *
- *  JAN/1999 -- hacked to support ColdFire (gerg@snapgear.com)
- *  DEC/2000 -- linux 2.4 support <davidm@snapgear.com>
- */
-
-#include <linux/signal.h>
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/string.h>
-#include <linux/types.h>
-#include <linux/ptrace.h>
-#include <linux/mman.h>
-#include <linux/mm.h>
-#include <linux/swap.h>
-#include <linux/init.h>
-#include <linux/highmem.h>
-#include <linux/pagemap.h>
-#include <linux/bootmem.h>
-#include <linux/gfp.h>
-
-#include <asm/setup.h>
-#include <asm/segment.h>
-#include <asm/page.h>
-#include <asm/pgtable.h>
-#include <asm/sections.h>
-
-#undef DEBUG
-
-/*
- * BAD_PAGE is the page that is used for page faults when linux
- * is out-of-memory. Older versions of linux just did a
- * do_exit(), but using this instead means there is less risk
- * for a process dying in kernel mode, possibly leaving a inode
- * unused etc..
- *
- * BAD_PAGETABLE is the accompanying page-table: it is initialized
- * to point to BAD_PAGE entries.
- *
- * ZERO_PAGE is a special page that is used for zero-initialized
- * data and COW.
- */
-static unsigned long empty_bad_page_table;
-
-static unsigned long empty_bad_page;
-
-unsigned long empty_zero_page;
-
-extern unsigned long rom_length;
-
-extern unsigned long memory_start;
-extern unsigned long memory_end;
-
-/*
- * paging_init() continues the virtual memory environment setup which
- * was begun by the code in arch/head.S.
- * The parameters are pointers to where to stick the starting and ending
- * addresses of available kernel virtual memory.
- */
-void __init paging_init(void)
-{
-	/*
-	 * Make sure start_mem is page aligned,  otherwise bootmem and
-	 * page_alloc get different views og the world.
-	 */
-#ifdef DEBUG
-	unsigned long start_mem = PAGE_ALIGN(memory_start);
-#endif
-	unsigned long end_mem   = memory_end & PAGE_MASK;
-
-#ifdef DEBUG
-	printk ("start_mem is %#lx\nvirtual_end is %#lx\n",
-		start_mem, end_mem);
-#endif
-
-	/*
-	 * Initialize the bad page table and bad page to point
-	 * to a couple of allocated pages.
-	 */
-	empty_bad_page_table = (unsigned long)alloc_bootmem_pages(PAGE_SIZE);
-	empty_bad_page = (unsigned long)alloc_bootmem_pages(PAGE_SIZE);
-	empty_zero_page = (unsigned long)alloc_bootmem_pages(PAGE_SIZE);
-	memset((void *)empty_zero_page, 0, PAGE_SIZE);
-
-	/*
-	 * Set up SFC/DFC registers (user data space).
-	 */
-	set_fs (USER_DS);
-
-#ifdef DEBUG
-	printk ("before free_area_init\n");
-
-	printk ("free_area_init -> start_mem is %#lx\nvirtual_end is %#lx\n",
-		start_mem, end_mem);
-#endif
-
-	{
-		unsigned long zones_size[MAX_NR_ZONES] = {0, };
-
-		zones_size[ZONE_DMA]     = 0 >> PAGE_SHIFT;
-		zones_size[ZONE_NORMAL]  = (end_mem - PAGE_OFFSET) >> PAGE_SHIFT;
-#ifdef CONFIG_HIGHMEM
-		zones_size[ZONE_HIGHMEM] = 0;
-#endif
-		free_area_init(zones_size);
-	}
-}
-
-void __init mem_init(void)
-{
-	unsigned long codesize = _etext - _stext;
-
-	pr_devel("Mem_init: start=%lx, end=%lx\n", memory_start, memory_end);
-
-	high_memory = (void *) (memory_end & PAGE_MASK);
-	max_mapnr = MAP_NR(high_memory);
-
-	/* this will put all low memory onto the freelists */
-	free_all_bootmem();
-
-	mem_init_print_info(NULL);
-	if (rom_length > 0 && rom_length > codesize)
-		pr_info("Memory available: %luK/%luK ROM\n",
-			(rom_length - codesize) >> 10, rom_length >> 10);
-}
-
-
-#ifdef CONFIG_BLK_DEV_INITRD
-void free_initrd_mem(unsigned long start, unsigned long end)
-{
-	free_reserved_area((void *)start, (void *)end, -1, "initrd");
-}
-#endif
-
-void
-free_initmem(void)
-{
-#ifdef CONFIG_RAMKERNEL
-	free_initmem_default(-1);
-#endif
-}
-
diff --git a/arch/h8300/mm/kmap.c b/arch/h8300/mm/kmap.c
deleted file mode 100644
index f79edcdadf39..000000000000
--- a/arch/h8300/mm/kmap.c
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- *  linux/arch/h8300/mm/kmap.c
- *  
- *  Based on
- *  linux/arch/m68knommu/mm/kmap.c
- *
- *  Copyright (C) 2000 Lineo, <davidm@snapgear.com>
- *  Copyright (C) 2000-2002 David McCullough <davidm@snapgear.com>
- */
-
-#include <linux/mm.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/types.h>
-#include <linux/vmalloc.h>
-
-#include <asm/setup.h>
-#include <asm/segment.h>
-#include <asm/page.h>
-#include <asm/pgalloc.h>
-#include <asm/io.h>
-
-#undef DEBUG
-
-#define VIRT_OFFSET (0x01000000)
-
-/*
- * Map some physical address range into the kernel address space.
- */
-void *__ioremap(unsigned long physaddr, unsigned long size, int cacheflag)
-{
-	return (void *)(physaddr + VIRT_OFFSET);
-}
-
-/*
- * Unmap a ioremap()ed region again.
- */
-void iounmap(void *addr)
-{
-}
-
-/*
- * __iounmap unmaps nearly everything, so be careful
- * it doesn't free currently pointer/page tables anymore but it
- * wans't used anyway and might be added later.
- */
-void __iounmap(void *addr, unsigned long size)
-{
-}
-
-/*
- * Set new cache mode for some kernel address space.
- * The caller must push data for that range itself, if such data may already
- * be in the cache.
- */
-void kernel_set_cachemode(void *addr, unsigned long size, int cmode)
-{
-}
diff --git a/arch/h8300/mm/memory.c b/arch/h8300/mm/memory.c
deleted file mode 100644
index 06e364641392..000000000000
--- a/arch/h8300/mm/memory.c
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- *  linux/arch/h8300/mm/memory.c
- *
- *  Copyright (C) 2002  Yoshinori Sato <ysato@users.sourceforge.jp>,
- *
- *  Based on:
- *
- *  linux/arch/m68knommu/mm/memory.c
- *
- *  Copyright (C) 1998  Kenneth Albanowski <kjahds@kjahds.com>,
- *  Copyright (C) 1999-2002, Greg Ungerer (gerg@snapgear.com)
- *
- *  Based on:
- *
- *  linux/arch/m68k/mm/memory.c
- *
- *  Copyright (C) 1995  Hamish Macdonald
- */
-
-#include <linux/mm.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/types.h>
-
-#include <asm/setup.h>
-#include <asm/segment.h>
-#include <asm/page.h>
-#include <asm/pgtable.h>
-#include <asm/traps.h>
-#include <asm/io.h>
-
-void cache_clear (unsigned long paddr, int len)
-{
-}
-
-
-void cache_push (unsigned long paddr, int len)
-{
-}
-
-void cache_push_v (unsigned long vaddr, int len)
-{
-}
-
-/*
- * Map some physical address range into the kernel address space.
- */
-
-unsigned long kernel_map(unsigned long paddr, unsigned long size,
-			 int nocacheflag, unsigned long *memavailp )
-{
-	return paddr;
-}
-
diff --git a/arch/h8300/platform/h8300h/Makefile b/arch/h8300/platform/h8300h/Makefile
deleted file mode 100644
index 420f73b0d962..000000000000
--- a/arch/h8300/platform/h8300h/Makefile
+++ /dev/null
@@ -1,7 +0,0 @@
-#
-# Makefile for the linux kernel.
-#
-# Reuse any files we can from the H8/300H
-#
-
-obj-y := irq.o ptrace_h8300h.o
diff --git a/arch/h8300/platform/h8300h/aki3068net/Makefile b/arch/h8300/platform/h8300h/aki3068net/Makefile
deleted file mode 100644
index b7ff78050b7f..000000000000
--- a/arch/h8300/platform/h8300h/aki3068net/Makefile
+++ /dev/null
@@ -1,5 +0,0 @@
-#
-# Makefile for the linux kernel.
-#
-
-extra-y := crt0_ram.o
diff --git a/arch/h8300/platform/h8300h/aki3068net/crt0_ram.S b/arch/h8300/platform/h8300h/aki3068net/crt0_ram.S
deleted file mode 100644
index b2ad0f2d0417..000000000000
--- a/arch/h8300/platform/h8300h/aki3068net/crt0_ram.S
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
- *  linux/arch/h8300/platform/h8300h/aki3068net/crt0_ram.S
- *
- *  Yoshinori Sato <ysato@users.sourceforge.jp>
- *
- *  Platform depend startup
- *  Target Archtecture:	AE-3068 (aka. aki3068net)
- *  Memory Layout     :	RAM
- */
-
-#define ASSEMBLY
-
-#include <asm/linkage.h>
-	
-#if !defined(CONFIG_BLKDEV_RESERVE)
-#if defined(CONFIG_GDB_DEBUG)
-#define RAMEND (__ramend - 0xc000)
-#else
-#define RAMEND __ramend
-#endif
-#else
-#define RAMEND CONFIG_BLKDEV_RESERVE_ADDRESS
-#endif
-	
-	.global __start
-	.global _command_line
-	.global __platform_gpio_table
-	.global __target_name
-	
-	.h8300h
-
-	.section .text
-	.file	"crt0_ram.S"
-
-	/* CPU Reset entry */
-__start:
-	mov.l	#RAMEND,sp
-	ldc	#0x80,ccr
-
-	/* Peripheral Setup */
-	
-#if defined(CONFIG_MTD_UCLINUX)
-	/* move romfs image */
-	jsr	@__move_romfs	
-#endif
-	
-	/* .bss clear */
-	mov.l	#__sbss,er5
-	mov.l	#__ebss,er4
-	sub.l	er5,er4
-	shlr	er4
-	shlr	er4
-	sub.l	er0,er0
-1:	
-	mov.l	er0,@er5
-	adds	#4,er5
-	dec.l	#1,er4
-	bne	1b
-
-	/* copy kernel commandline */
-	mov.l	#COMMAND_START,er5
-	mov.l	#_command_line,er6
-	mov.w	#512,r4
-	eepmov.w
-
-	/* uClinux kernel start */
-	ldc	#0x90,ccr	/* running kernel */
-	mov.l	#_init_thread_union,sp
-	add.l	#0x2000,sp
-	jsr	@_start_kernel
-_exit:
-
-	jmp	_exit
-
-	rts
-
-	/* I/O port assign information */
-__platform_gpio_table:	
-	mov.l	#gpio_table,er0
-	rts
-
-gpio_table:
-	;; P1DDR
-	.byte	0xff,0xff
-	;; P2DDR
-	.byte	0xff,0xff
-	;; P3DDR
-	.byte	0xff,0x00
-	;; P4DDR
-	.byte	0x00,0x00
-	;; P5DDR
-	.byte	0x01,0x01
-	;; P6DDR
-	.byte	0x00,0x00
-	;; dummy
-	.byte	0x00,0x00
-	;; P8DDR
-	.byte	0x0c,0x0c
-	;; P9DDR
-	.byte	0x00,0x00
-	;; PADDR
-	.byte	0x00,0x00
-	;; PBDDR
-	.byte	0x30,0x30
-
-__target_name:	
-	.asciz	"AE-3068"
-	
-	.section .bootvec,"ax"
-	jmp	@__start
diff --git a/arch/h8300/platform/h8300h/generic/Makefile b/arch/h8300/platform/h8300h/generic/Makefile
deleted file mode 100644
index 2b12a170209e..000000000000
--- a/arch/h8300/platform/h8300h/generic/Makefile
+++ /dev/null
@@ -1,5 +0,0 @@
-#
-# Makefile for the linux kernel.
-#
-
-extra-y :=  crt0_$(MODEL).o
diff --git a/arch/h8300/platform/h8300h/generic/crt0_ram.S b/arch/h8300/platform/h8300h/generic/crt0_ram.S
deleted file mode 100644
index 5ab7d9c12910..000000000000
--- a/arch/h8300/platform/h8300h/generic/crt0_ram.S
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- *  linux/arch/h8300/platform/h8300h/generic/crt0_ram.S
- *
- *  Yoshinori Sato <ysato@users.sourceforge.jp>
- *
- *  Platform depend startup
- *  Target Archtecture:	AE-3068 (aka. aki3068net)
- *  Memory Layout     :	RAM
- */
-
-#define ASSEMBLY
-
-#include <asm/linkage.h>
-	
-#if !defined(CONFIG_BLKDEV_RESERVE)
-#if defined(CONFIG_GDB_DEBUG)
-#define RAMEND (__ramend - 0xc000)
-#else
-#define RAMEND __ramend
-#endif
-#else
-#define RAMEND CONFIG_BLKDEV_RESERVE_ADDRESS
-#endif
-	
-	.global __start
-	.global _command_line
-	.global __platform_gpio_table
-	.global __target_name
-	
-	.h8300h
-
-	.section .text
-	.file	"crt0_ram.S"
-
-	/* CPU Reset entry */
-__start:
-	mov.l	#RAMEND,sp
-	ldc	#0x80,ccr
-
-	/* Peripheral Setup */
-	
-#if defined(CONFIG_BLK_DEV_BLKMEM)
-	/* move romfs image */
-	jsr	@__move_romfs	
-#endif
-	
-	/* .bss clear */
-	mov.l	#__sbss,er5
-	mov.l	#__ebss,er4
-	sub.l	er5,er4
-	shlr	er4
-	shlr	er4
-	sub.l	er0,er0
-1:	
-	mov.l	er0,@er5
-	adds	#4,er5
-	dec.l	#1,er4
-	bne	1b
-
-	/* copy kernel commandline */
-	mov.l	#COMMAND_START,er5
-	mov.l	#_command_line,er6
-	mov.w	#512,r4
-	eepmov.w
-
-	/* uClinux kernel start */
-	ldc	#0x90,ccr	/* running kernel */
-	mov.l	#_init_thread_union,sp
-	add.l	#0x2000,sp
-	jsr	@_start_kernel
-_exit:
-
-	jmp	_exit
-
-	rts
-
-	/* I/O port assign information */
-__platform_gpio_table:	
-	mov.l	#gpio_table,er0
-	rts
-
-gpio_table:
-	;; P1DDR
-	.byte	0x00,0x00
-	;; P2DDR
-	.byte	0x00,0x00
-	;; P3DDR
-	.byte	0x00,0x00
-	;; P4DDR
-	.byte	0x00,0x00
-	;; P5DDR
-	.byte	0x00,0x00
-	;; P6DDR
-	.byte	0x00,0x00
-	;; dummy
-	.byte	0x00,0x00
-	;; P8DDR
-	.byte	0x00,0x00
-	;; P9DDR
-	.byte	0x00,0x00
-	;; PADDR
-	.byte	0x00,0x00
-	;; PBDDR
-	.byte	0x00,0x00
-
-__target_name:	
-	.asciz	"generic"
diff --git a/arch/h8300/platform/h8300h/generic/crt0_rom.S b/arch/h8300/platform/h8300h/generic/crt0_rom.S
deleted file mode 100644
index dda1dfa15a5e..000000000000
--- a/arch/h8300/platform/h8300h/generic/crt0_rom.S
+++ /dev/null
@@ -1,122 +0,0 @@
-/*
- *  linux/arch/h8300/platform/h8300h/generic/crt0_rom.S
- *
- *  Yoshinori Sato <ysato@users.sourceforge.jp>
- *
- *  Platform depend startup
- *  Target Archtecture:	generic
- *  Memory Layout     :	ROM
- */
-
-#define ASSEMBLY
-
-#include <asm/linkage.h>
-	
-	.global __start
-	.global __command_line
-	.global __platform_gpio_table
-	.global __target_name
-	
-	.h8300h
-	.section .text
-	.file	"crt0_rom.S"
-
-	/* CPU Reset entry */
-__start:
-	mov.l	#__ramend,sp
-	ldc	#0x80,ccr
-
-	/* Peripheral Setup */
-	
-	/* .bss clear */
-	mov.l	#__sbss,er5
-	mov.l	#__ebss,er4
-	sub.l	er5,er4
-	shlr	er4
-	shlr	er4
-	sub.l	er0,er0
-1:	
-	mov.l	er0,@er5
-	adds	#4,er5
-	dec.l	#1,er4
-	bne	1b
-
-	/* copy .data */
-#if !defined(CONFIG_H8300H_SIM)
-	/* copy .data */
-	mov.l	#__begin_data,er5
-	mov.l	#__sdata,er6
-	mov.l	#__edata,er4
-	sub.l	er6,er4
-	shlr.l	er4
-	shlr.l	er4
-1:	
-	mov.l	@er5+,er0
-	mov.l	er0,@er6
-	adds	#4,er6
-	dec.l	#1,er4
-	bne	1b	
-#endif
-
-	/* copy kernel commandline */
-	mov.l	#COMMAND_START,er5
-	mov.l	#__command_line,er6
-	mov.w	#512,r4
-	eepmov.w
-
-	/* linux kernel start */
-	ldc	#0x90,ccr	/* running kernel */
-	mov.l	#_init_thread_union,sp
-	add.l	#0x2000,sp
-	jsr	@_start_kernel
-_exit:
-
-	jmp	_exit
-
-	rts
-
-	/* I/O port assign information */
-__platform_gpio_table:	
-	mov.l	#gpio_table,er0
-	rts
-
-gpio_table:
-	;; P1DDR
-	.byte	0x00,0x00
-	;; P2DDR
-	.byte	0x00,0x00
-	;; P3DDR
-	.byte	0x00,0x00
-	;; P4DDR
-	.byte	0x00,0x00
-	;; P5DDR
-	.byte	0x00,0x00
-	;; P6DDR
-	.byte	0x00,0x00
-	;; dummy
-	.byte	0x00,0x00
-	;; P8DDR
-	.byte	0x00,0x00
-	;; P9DDR
-	.byte	0x00,0x00
-	;; PADDR
-	.byte	0x00,0x00
-	;; PBDDR
-	.byte	0x00,0x00
-
-	.section .rodata
-__target_name:	
-	.asciz	"generic"
-	
-	.section .bss
-__command_line:	
-	.space	512
-
-	/* interrupt vector */
-	.section .vectors,"ax"
-	.long	__start
-vector	=	1
-	.rept	64-1
-	.long	_interrupt_redirect_table+vector*4
-vector	=	vector + 1
-	.endr
diff --git a/arch/h8300/platform/h8300h/h8max/Makefile b/arch/h8300/platform/h8300h/h8max/Makefile
deleted file mode 100644
index b7ff78050b7f..000000000000
--- a/arch/h8300/platform/h8300h/h8max/Makefile
+++ /dev/null
@@ -1,5 +0,0 @@
-#
-# Makefile for the linux kernel.
-#
-
-extra-y := crt0_ram.o
diff --git a/arch/h8300/platform/h8300h/h8max/crt0_ram.S b/arch/h8300/platform/h8300h/h8max/crt0_ram.S
deleted file mode 100644
index 6a0d4e2d9ec6..000000000000
--- a/arch/h8300/platform/h8300h/h8max/crt0_ram.S
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
- *  linux/arch/h8300/platform/h8300h/h8max/crt0_ram.S
- *
- *  Yoshinori Sato <ysato@users.sourceforge.jp>
- *
- *  Platform depend startup
- *  Target Archtecture:	H8MAX
- *  Memory Layout     :	RAM
- */
-
-#define ASSEMBLY
-
-#include <asm/linkage.h>
-	
-#if !defined(CONFIG_BLKDEV_RESERVE)
-#if defined(CONFIG_GDB_DEBUG)
-#define RAMEND (__ramend - 0xc000)
-#else
-#define RAMEND __ramend
-#endif
-#else
-#define RAMEND CONFIG_BLKDEV_RESERVE_ADDRESS
-#endif
-	
-	.global __start
-	.global _command_line
-	.global __platform_gpio_table
-	.global __target_name
-	
-	.h8300h
-
-	.section .text
-	.file	"crt0_ram.S"
-
-	/* CPU Reset entry */
-__start:
-	mov.l	#RAMEND,sp
-	ldc	#0x80,ccr
-
-	/* Peripheral Setup */
-	
-#if defined(CONFIG_MTD_UCLINUX)
-	/* move romfs image */
-	jsr	@__move_romfs	
-#endif
-	
-	/* .bss clear */
-	mov.l	#__sbss,er5
-	mov.l	#__ebss,er4
-	sub.l	er5,er4
-	shlr	er4
-	shlr	er4
-	sub.l	er0,er0
-1:	
-	mov.l	er0,@er5
-	adds	#4,er5
-	dec.l	#1,er4
-	bne	1b
-
-	/* copy kernel commandline */
-	mov.l	#COMMAND_START,er5
-	mov.l	#_command_line,er6
-	mov.w	#512,r4
-	eepmov.w
-
-	/* uClinux kernel start */
-	ldc	#0x90,ccr	/* running kernel */
-	mov.l	#_init_thread_union,sp
-	add.l	#0x2000,sp
-	jsr	@_start_kernel
-_exit:
-
-	jmp	_exit
-
-	rts
-
-	/* I/O port assign information */
-__platform_gpio_table:	
-	mov.l	#gpio_table,er0
-	rts
-
-gpio_table:
-	;; P1DDR
-	.byte	0xff,0xff
-	;; P2DDR
-	.byte	0xff,0xff
-	;; P3DDR
-	.byte	0x00,0x00
-	;; P4DDR
-	.byte	0x00,0x00
-	;; P5DDR
-	.byte	0x01,0x01
-	;; P6DDR
-	.byte	0xf6,0xf6
-	;; dummy
-	.byte	0x00,0x00
-	;; P8DDR
-	.byte	0xee,0xee
-	;; P9DDR
-	.byte	0x00,0x00
-	;; PADDR
-	.byte	0x00,0x00
-	;; PBDDR
-	.byte	0x30,0x30
-
-__target_name:	
-	.asciz	"H8MAX"
-	
-	.section .bootvec,"ax"
-	jmp	@__start
diff --git a/arch/h8300/platform/h8300h/irq.c b/arch/h8300/platform/h8300h/irq.c
deleted file mode 100644
index 0a50353e09d5..000000000000
--- a/arch/h8300/platform/h8300h/irq.c
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Interrupt handling H8/300H depend.
- * Yoshinori Sato <ysato@users.sourceforge.jp>
- *
- */
-
-#include <linux/init.h>
-#include <linux/errno.h>
-
-#include <asm/ptrace.h>
-#include <asm/traps.h>
-#include <asm/irq.h>
-#include <asm/io.h>
-#include <asm/gpio-internal.h>
-#include <asm/regs306x.h>
-
-const int __initconst h8300_saved_vectors[] = {
-#if defined(CONFIG_GDB_DEBUG)
-	TRAP3_VEC,	/* TRAPA #3 is GDB breakpoint */
-#endif
-	-1,
-};
-
-const h8300_vector __initconst h8300_trap_table[] = {
-	0, 0, 0, 0, 0, 0, 0, 0,
-	system_call,
-	0,
-	0,
-	trace_break,
-};
-
-int h8300_enable_irq_pin(unsigned int irq)
-{
-	int bitmask;
-	if (irq < EXT_IRQ0 || irq > EXT_IRQ5)
-		return 0;
-
-	/* initialize IRQ pin */
-	bitmask = 1 << (irq - EXT_IRQ0);
-	switch(irq) {
-	case EXT_IRQ0:
-	case EXT_IRQ1:
-	case EXT_IRQ2:
-	case EXT_IRQ3:
-		if (H8300_GPIO_RESERVE(H8300_GPIO_P8, bitmask) == 0)
-			return -EBUSY;
-		H8300_GPIO_DDR(H8300_GPIO_P8, bitmask, H8300_GPIO_INPUT);
-		break;
-	case EXT_IRQ4:
-	case EXT_IRQ5:
-		if (H8300_GPIO_RESERVE(H8300_GPIO_P9, bitmask) == 0)
-			return -EBUSY;
-		H8300_GPIO_DDR(H8300_GPIO_P9, bitmask, H8300_GPIO_INPUT);
-		break;
-	}
-
-	return 0;
-}
-
-void h8300_disable_irq_pin(unsigned int irq)
-{
-	int bitmask;
-	if (irq < EXT_IRQ0 || irq > EXT_IRQ5)
-		return;
-
-	/* disable interrupt & release IRQ pin */
-	bitmask = 1 << (irq - EXT_IRQ0);
-	switch(irq) {
-	case EXT_IRQ0:
-	case EXT_IRQ1:
-	case EXT_IRQ2:
-	case EXT_IRQ3:
-		*(volatile unsigned char *)IER &= ~bitmask;
-		H8300_GPIO_FREE(H8300_GPIO_P8, bitmask);
-		break ;
-	case EXT_IRQ4:
-	case EXT_IRQ5:
-		*(volatile unsigned char *)IER &= ~bitmask;
-		H8300_GPIO_FREE(H8300_GPIO_P9, bitmask);
-		break;
-	}
-}
diff --git a/arch/h8300/platform/h8300h/ptrace_h8300h.c b/arch/h8300/platform/h8300h/ptrace_h8300h.c
deleted file mode 100644
index 4f1ed0279633..000000000000
--- a/arch/h8300/platform/h8300h/ptrace_h8300h.c
+++ /dev/null
@@ -1,284 +0,0 @@
-/*
- *  linux/arch/h8300/platform/h8300h/ptrace_h8300h.c
- *    ptrace cpu depend helper functions
- *
- *  Yoshinori Sato <ysato@users.sourceforge.jp>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of
- * this archive for more details.
- */
-
-#include <linux/linkage.h>
-#include <linux/sched.h>
-#include <asm/ptrace.h>
-
-#define CCR_MASK 0x6f    /* mode/imask not set */
-#define BREAKINST 0x5730 /* trapa #3 */
-
-/* Mapping from PT_xxx to the stack offset at which the register is
-   saved.  Notice that usp has no stack-slot and needs to be treated
-   specially (see get_reg/put_reg below). */
-static const int h8300_register_offset[] = {
-	PT_REG(er1), PT_REG(er2), PT_REG(er3), PT_REG(er4),
-	PT_REG(er5), PT_REG(er6), PT_REG(er0), PT_REG(orig_er0),
-	PT_REG(ccr), PT_REG(pc)
-};
-
-/* read register */
-long h8300_get_reg(struct task_struct *task, int regno)
-{
-	switch (regno) {
-	case PT_USP:
-		return task->thread.usp + sizeof(long)*2;
-	case PT_CCR:
-	    return *(unsigned short *)(task->thread.esp0 + h8300_register_offset[regno]);
-	default:
-	    return *(unsigned long *)(task->thread.esp0 + h8300_register_offset[regno]);
-	}
-}
-
-/* write register */
-int h8300_put_reg(struct task_struct *task, int regno, unsigned long data)
-{
-	unsigned short oldccr;
-	switch (regno) {
-	case PT_USP:
-		task->thread.usp = data - sizeof(long)*2;
-	case PT_CCR:
-		oldccr = *(unsigned short *)(task->thread.esp0 + h8300_register_offset[regno]);
-		oldccr &= ~CCR_MASK;
-		data &= CCR_MASK;
-		data |= oldccr;
-		*(unsigned short *)(task->thread.esp0 + h8300_register_offset[regno]) = data;
-		break;
-	default:
-		*(unsigned long *)(task->thread.esp0 + h8300_register_offset[regno]) = data;
-		break;
-	}
-	return 0;
-}
-
-/* disable singlestep */
-void user_disable_single_step(struct task_struct *child)
-{
-	if((long)child->thread.breakinfo.addr != -1L) {
-		*child->thread.breakinfo.addr = child->thread.breakinfo.inst;
-		child->thread.breakinfo.addr = (unsigned short *)-1L;
-	}
-}
-
-/* calculate next pc */
-enum jump_type {none,    /* normal instruction */
-		jabs,    /* absolute address jump */
-		ind,     /* indirect address jump */
-		ret,     /* return to subrutine */
-		reg,     /* register indexed jump */
-		relb,    /* pc relative jump (byte offset) */
-		relw,    /* pc relative jump (word offset) */
-               };
-
-/* opcode decode table define
-   ptn: opcode pattern
-   msk: opcode bitmask
-   len: instruction length (<0 next table index)
-   jmp: jump operation mode */
-struct optable {
-	unsigned char bitpattern;
-	unsigned char bitmask;
-	signed char length;
-	signed char type;
-} __attribute__((aligned(1),packed));
-
-#define OPTABLE(ptn,msk,len,jmp)   \
-        {                          \
-		.bitpattern = ptn, \
-		.bitmask    = msk, \
-		.length	    = len, \
-		.type       = jmp, \
-	}
-
-static const struct optable optable_0[] = {
-	OPTABLE(0x00,0xff, 1,none), /* 0x00 */
-	OPTABLE(0x01,0xff,-1,none), /* 0x01 */
-	OPTABLE(0x02,0xfe, 1,none), /* 0x02-0x03 */
-	OPTABLE(0x04,0xee, 1,none), /* 0x04-0x05/0x14-0x15 */
-	OPTABLE(0x06,0xfe, 1,none), /* 0x06-0x07 */
-	OPTABLE(0x08,0xea, 1,none), /* 0x08-0x09/0x0c-0x0d/0x18-0x19/0x1c-0x1d */
-	OPTABLE(0x0a,0xee, 1,none), /* 0x0a-0x0b/0x1a-0x1b */
-	OPTABLE(0x0e,0xee, 1,none), /* 0x0e-0x0f/0x1e-0x1f */
-	OPTABLE(0x10,0xfc, 1,none), /* 0x10-0x13 */
-	OPTABLE(0x16,0xfe, 1,none), /* 0x16-0x17 */
-	OPTABLE(0x20,0xe0, 1,none), /* 0x20-0x3f */
-	OPTABLE(0x40,0xf0, 1,relb), /* 0x40-0x4f */
-	OPTABLE(0x50,0xfc, 1,none), /* 0x50-0x53 */
-	OPTABLE(0x54,0xfd, 1,ret ), /* 0x54/0x56 */
-	OPTABLE(0x55,0xff, 1,relb), /* 0x55 */
-	OPTABLE(0x57,0xff, 1,none), /* 0x57 */
-	OPTABLE(0x58,0xfb, 2,relw), /* 0x58/0x5c */
-	OPTABLE(0x59,0xfb, 1,reg ), /* 0x59/0x5b */
-	OPTABLE(0x5a,0xfb, 2,jabs), /* 0x5a/0x5e */
-	OPTABLE(0x5b,0xfb, 2,ind ), /* 0x5b/0x5f */
-	OPTABLE(0x60,0xe8, 1,none), /* 0x60-0x67/0x70-0x77 */
-	OPTABLE(0x68,0xfa, 1,none), /* 0x68-0x69/0x6c-0x6d */
-	OPTABLE(0x6a,0xfe,-2,none), /* 0x6a-0x6b */
-	OPTABLE(0x6e,0xfe, 2,none), /* 0x6e-0x6f */
-	OPTABLE(0x78,0xff, 4,none), /* 0x78 */
-	OPTABLE(0x79,0xff, 2,none), /* 0x79 */
-	OPTABLE(0x7a,0xff, 3,none), /* 0x7a */
-	OPTABLE(0x7b,0xff, 2,none), /* 0x7b */
-	OPTABLE(0x7c,0xfc, 2,none), /* 0x7c-0x7f */
-	OPTABLE(0x80,0x80, 1,none), /* 0x80-0xff */
-};
-
-static const struct optable optable_1[] = {
-	OPTABLE(0x00,0xff,-3,none), /* 0x0100 */
-	OPTABLE(0x40,0xf0,-3,none), /* 0x0140-0x14f */
-	OPTABLE(0x80,0xf0, 1,none), /* 0x0180-0x018f */
-	OPTABLE(0xc0,0xc0, 2,none), /* 0x01c0-0x01ff */
-};
-
-static const struct optable optable_2[] = {
-	OPTABLE(0x00,0x20, 2,none), /* 0x6a0?/0x6a8?/0x6b0?/0x6b8? */
-	OPTABLE(0x20,0x20, 3,none), /* 0x6a2?/0x6aa?/0x6b2?/0x6ba? */
-};
-
-static const struct optable optable_3[] = {
-	OPTABLE(0x69,0xfb, 2,none), /* 0x010069/0x01006d/014069/0x01406d */
-	OPTABLE(0x6b,0xff,-4,none), /* 0x01006b/0x01406b */
-	OPTABLE(0x6f,0xff, 3,none), /* 0x01006f/0x01406f */
-	OPTABLE(0x78,0xff, 5,none), /* 0x010078/0x014078 */
-};
-
-static const struct optable optable_4[] = {
-	OPTABLE(0x00,0x78, 3,none), /* 0x0100690?/0x01006d0?/0140690/0x01406d0?/0x0100698?/0x01006d8?/0140698?/0x01406d8? */
-	OPTABLE(0x20,0x78, 4,none), /* 0x0100692?/0x01006d2?/0140692/0x01406d2?/0x010069a?/0x01006da?/014069a?/0x01406da? */
-};
-
-static const struct optables_list {
-	const struct optable *ptr;
-	int size;
-} optables[] = {
-#define OPTABLES(no)                                                   \
-        {                                                              \
-		.ptr  = optable_##no,                                  \
-		.size = sizeof(optable_##no) / sizeof(struct optable), \
-	}
-	OPTABLES(0),
-	OPTABLES(1),
-	OPTABLES(2),
-	OPTABLES(3),
-	OPTABLES(4),
-
-};
-
-const unsigned char condmask[] = {
-	0x00,0x40,0x01,0x04,0x02,0x08,0x10,0x20
-};
-
-static int isbranch(struct task_struct *task,int reson)
-{
-	unsigned char cond = h8300_get_reg(task, PT_CCR);
-	/* encode complex conditions */
-	/* B4: N^V
-	   B5: Z|(N^V)
-	   B6: C|Z */
-	__asm__("bld #3,%w0\n\t"
-		"bxor #1,%w0\n\t"
-		"bst #4,%w0\n\t"
-		"bor #2,%w0\n\t"
-		"bst #5,%w0\n\t"
-		"bld #2,%w0\n\t"
-		"bor #0,%w0\n\t"
-		"bst #6,%w0\n\t"
-		:"=&r"(cond)::"cc");
-	cond &= condmask[reson >> 1];
-	if (!(reson & 1))
-		return cond == 0;
-	else
-		return cond != 0;
-}
-
-static unsigned short *getnextpc(struct task_struct *child, unsigned short *pc)
-{
-	const struct optable *op;
-	unsigned char *fetch_p;
-	unsigned char inst;
-	unsigned long addr;
-	unsigned long *sp;
-	int op_len,regno;
-	op = optables[0].ptr;
-	op_len = optables[0].size;
-	fetch_p = (unsigned char *)pc;
-	inst = *fetch_p++;
-	do {
-		if ((inst & op->bitmask) == op->bitpattern) {
-			if (op->length < 0) {
-				op = optables[-op->length].ptr;
-				op_len = optables[-op->length].size + 1;
-				inst = *fetch_p++;
-			} else {
-				switch (op->type) {
-				case none:
-					return pc + op->length;
-				case jabs:
-					addr = *(unsigned long *)pc;
-					return (unsigned short *)(addr & 0x00ffffff);
-				case ind:
-					addr = *pc & 0xff;
-					return (unsigned short *)(*(unsigned long *)addr);
-				case ret:
-					sp = (unsigned long *)h8300_get_reg(child, PT_USP);
-					/* user stack frames
-					   |   er0  | temporary saved
-					   +--------+
-					   |   exp  | exception stack frames
-					   +--------+
-					   | ret pc | userspace return address
-					*/
-					return (unsigned short *)(*(sp+2) & 0x00ffffff);
-				case reg:
-					regno = (*pc >> 4) & 0x07;
-					if (regno == 0)
-						addr = h8300_get_reg(child, PT_ER0);
-					else
-						addr = h8300_get_reg(child, regno-1+PT_ER1);
-					return (unsigned short *)addr;
-				case relb:
-					if (inst == 0x55 || isbranch(child,inst & 0x0f))
-						pc = (unsigned short *)((unsigned long)pc +
-								       ((signed char)(*fetch_p)));
-					return pc+1; /* skip myself */
-				case relw:
-					if (inst == 0x5c || isbranch(child,(*fetch_p & 0xf0) >> 4))
-						pc = (unsigned short *)((unsigned long)pc +
-								       ((signed short)(*(pc+1))));
-					return pc+2; /* skip myself */
-				}
-			}
-		} else
-			op++;
-	} while(--op_len > 0);
-	return NULL;
-}
-
-/* Set breakpoint(s) to simulate a single step from the current PC.  */
-
-void user_enable_single_step(struct task_struct *child)
-{
-	unsigned short *nextpc;
-	nextpc = getnextpc(child,(unsigned short *)h8300_get_reg(child, PT_PC));
-	child->thread.breakinfo.addr = nextpc;
-	child->thread.breakinfo.inst = *nextpc;
-	*nextpc = BREAKINST;
-}
-
-asmlinkage void trace_trap(unsigned long bp)
-{
-	if ((unsigned long)current->thread.breakinfo.addr == bp) {
-		user_disable_single_step(current);
-		force_sig(SIGTRAP,current);
-	} else
-	        force_sig(SIGILL,current);
-}
-
diff --git a/arch/h8300/platform/h8s/Makefile b/arch/h8300/platform/h8s/Makefile
deleted file mode 100644
index bf1241883766..000000000000
--- a/arch/h8300/platform/h8s/Makefile
+++ /dev/null
@@ -1,7 +0,0 @@
-#
-# Makefile for the linux kernel.
-#
-# Reuse any files we can from the H8S
-#
-
-obj-y := ints_h8s.o ptrace_h8s.o
diff --git a/arch/h8300/platform/h8s/edosk2674/Makefile b/arch/h8300/platform/h8s/edosk2674/Makefile
deleted file mode 100644
index 8e349723bb4f..000000000000
--- a/arch/h8300/platform/h8s/edosk2674/Makefile
+++ /dev/null
@@ -1,5 +0,0 @@
-#
-# Makefile for the linux kernel.
-#
-
-extra-y := crt0_$(MODEL).o
diff --git a/arch/h8300/platform/h8s/edosk2674/crt0_ram.S b/arch/h8300/platform/h8s/edosk2674/crt0_ram.S
deleted file mode 100644
index 5ed191b37cde..000000000000
--- a/arch/h8300/platform/h8s/edosk2674/crt0_ram.S
+++ /dev/null
@@ -1,130 +0,0 @@
-/*
- *  linux/arch/h8300/platform/h8s/edosk2674/crt0_ram.S
- *
- *  Yoshinori Sato <ysato@users.sourceforge.jp>
- *
- *  Platform depend startup
- *  Target Archtecture:	EDOSK-2674
- *  Memory Layout     :	RAM
- */
-
-#define ASSEMBLY
-
-#include <asm/linkage.h>
-#include <asm/regs267x.h>
-			
-#if !defined(CONFIG_BLKDEV_RESERVE)
-#if defined(CONFIG_GDB_DEBUG)
-#define RAMEND (__ramend - 0xc000)
-#else
-#define RAMEND __ramend
-#endif
-#else
-#define RAMEND CONFIG_BLKDEV_RESERVE_ADDRESS
-#endif
-	
-	.global __start
-	.global __command_line
-	.global __platform_gpio_table
-	.global __target_name
-	
-	.h8300s
-
-	.section .text
-	.file	"crt0_ram.S"
-
-	/* CPU Reset entry */
-__start:
-	mov.l	#RAMEND,sp
-	ldc	#0x80,ccr
-	ldc	#0x00,exr
-
-	/* Peripheral Setup */
-	bclr	#4,@INTCR:8	/* interrupt mode 2 */
-	bset	#5,@INTCR:8
-	bclr	#0,@IER+1:16
-	bset	#1,@ISCRL+1:16	/* IRQ0 Positive Edge */
-	bclr	#0,@ISCRL+1:16
-
-#if defined(CONFIG_MTD_UCLINUX)
-	/* move romfs image */
-	jsr	@__move_romfs	
-#endif
-	
-	/* .bss clear */
-	mov.l	#__sbss,er5
-	mov.l	er5,er6
-	mov.l	#__ebss,er4
-	sub.l	er5,er4
-	shlr	#2,er4
-	sub.l	er0,er0
-1:	
-	mov.l	er0,@er5
-	adds	#4,er5
-	dec.l	#1,er4
-	bne	1b
-
-	/* copy kernel commandline */
-	mov.l	#COMMAND_START,er5
-	mov.l	#_command_line,er6
-	mov.w	#512,r4
-	eepmov.w
-
-	/* uClinux kernel start */
-	ldc	#0x90,ccr	/* running kernel */
-	mov.l	#_init_thread_union,sp
-	add.l	#0x2000,sp
-	jsr	@_start_kernel
-_exit:
-
-	jmp	_exit
-
-	rts
-
-	/* I/O port assign information */
-__platform_gpio_table:	
-	mov.l	#gpio_table,er0
-	rts
-
-gpio_table:
-	;; P1DDR
-	;;      used,ddr
-	.byte	0x00,0x00
-	;; P2DDR
-	.byte	0x00,0x00
-	;; P3DDR
-	.byte	0x3f,0x3a
-	;; dummy
-	.byte	0x00,0x00
-	;; P5DDR
-	.byte	0x00,0x00
-	;; P6DDR
-	.byte	0x00,0x00
-	;; P7DDR
-	.byte	0x00,0x00
-	;; P8DDR
-	.byte	0x00,0x00
-	;; dummy
-	.byte	0x00,0x00
-	;; PADDR
-	.byte	0xff,0xff
-	;; PBDDR
-	.byte	0xff,0x00
-	;; PCDDR
-	.byte	0xff,0x00
-	;; PDDDR
-	.byte	0xff,0x00
-	;; PEDDR
-	.byte	0xff,0x00
-	;; PFDDR
-	.byte	0xff,0xff
-	;; PGDDR
-	.byte	0x0f,0x0f
-	;; PHDDR
-	.byte	0x0f,0x0f
-
-__target_name:	
-	.asciz	"EDOSK-2674"
-	
-	.section .bootvec,"ax"
-	jmp	@__start
diff --git a/arch/h8300/platform/h8s/edosk2674/crt0_rom.S b/arch/h8300/platform/h8s/edosk2674/crt0_rom.S
deleted file mode 100644
index 06d1d7f324ca..000000000000
--- a/arch/h8300/platform/h8s/edosk2674/crt0_rom.S
+++ /dev/null
@@ -1,186 +0,0 @@
-/*
- *  linux/arch/h8300/platform/h8s/edosk2674/crt0_rom.S
- *
- *  Yoshinori Sato <ysato@users.sourceforge.jp>
- *
- *  Platform depend startup
- *  Target Archtecture:	EDOSK-2674
- *  Memory Layout     :	ROM
- */
-
-#define ASSEMBLY
-
-#include <asm/linkage.h>
-#include <asm/regs267x.h>
-		
-	.global __start
-	.global __command_line
-	.global __platform_gpio_table
-	.global __target_name
-	
-	.h8300s
-	.section .text
-	.file	"crt0_rom.S"
-
-	/* CPU Reset entry */
-__start:
-	mov.l	#__ramend,sp
-	ldc	#0x80,ccr
-	ldc	#0,exr
-	
-	/* Peripheral Setup */
-;BSC/GPIO setup
-	mov.l	#init_regs,er0
-	mov.w	#0xffff,e2
-1:
-	mov.w	@er0+,r2
-	beq	2f
-	mov.w	@er0+,r1
-	mov.b	r1l,@er2
-	bra	1b
-
-2:
-;SDRAM setup
-#define SDRAM_SMR 0x400040
-
-	mov.b	#0,r0l
-	mov.b	r0l,@DRACCR:16
-	mov.w	#0x188,r0
-	mov.w	r0,@REFCR:16
-	mov.w	#0x85b4,r0
-	mov.w	r0,@DRAMCR:16
-	mov.b	#0,r1l
-	mov.b	r1l,@SDRAM_SMR
-	mov.w	#0x84b4,r0
-	mov.w	r0,@DRAMCR:16
-;special thanks to Arizona Cooperative Power
-	
-	/* copy .data */
-	mov.l	#__begin_data,er5
-	mov.l	#__sdata,er6
-	mov.l	#__edata,er4
-	sub.l	er6,er4
-	shlr.l	#2,er4
-1:	
-	mov.l	@er5+,er0
-	mov.l	er0,@er6
-	adds	#4,er6
-	dec.l	#1,er4
-	bne	1b	
-
-	/* .bss clear */
-	mov.l	#__sbss,er5
-	mov.l	#__ebss,er4
-	sub.l	er5,er4
-	shlr.l	#2,er4		
-	sub.l	er0,er0
-1:
-	mov.l	er0,@er5
-	adds	#4,er5
-	dec.l	#1,er4
-	bne	1b
-
-	/* copy kernel commandline */
-	mov.l	#COMMAND_START,er5
-	mov.l	#__command_line,er6
-	mov.w	#512,r4
-	eepmov.w
-
-	/* linux kernel start */
-	ldc	#0x90,ccr	/* running kernel */
-	mov.l	#_init_thread_union,sp
-	add.l	#0x2000,sp
-	jsr	@_start_kernel
-_exit:
-
-	jmp	_exit
-
-	rts
-
-	/* I/O port assign information */
-__platform_gpio_table:	
-	mov.l	#gpio_table,er0
-	rts
-
-#define INIT_REGS_DATA(REGS,DATA) \
-	.word	((REGS) & 0xffff),DATA
-
-init_regs:
-INIT_REGS_DATA(ASTCR,0xff)
-INIT_REGS_DATA(RDNCR,0x00)
-INIT_REGS_DATA(ABWCR,0x80)
-INIT_REGS_DATA(WTCRAH,0x27)
-INIT_REGS_DATA(WTCRAL,0x77)
-INIT_REGS_DATA(WTCRBH,0x71)
-INIT_REGS_DATA(WTCRBL,0x22)
-INIT_REGS_DATA(CSACRH,0x80)
-INIT_REGS_DATA(CSACRL,0x80)
-INIT_REGS_DATA(BROMCRH,0xa0)
-INIT_REGS_DATA(BROMCRL,0xa0)
-INIT_REGS_DATA(P3DDR,0x3a)
-INIT_REGS_DATA(P3ODR,0x06)
-INIT_REGS_DATA(PADDR,0xff)
-INIT_REGS_DATA(PFDDR,0xfe)
-INIT_REGS_DATA(PGDDR,0x0f)
-INIT_REGS_DATA(PHDDR,0x0f)
-INIT_REGS_DATA(PFCR0,0xff)
-INIT_REGS_DATA(PFCR2,0x0d)
-INIT_REGS_DATA(ITSR, 0x00)
-INIT_REGS_DATA(ITSR+1,0x3f)
-INIT_REGS_DATA(INTCR,0x20)
-		
-	.word	0
-
-gpio_table:
-	;; P1DDR
-	.byte	0x00,0x00
-	;; P2DDR
-	.byte	0x00,0x00
-	;; P3DDR
-	.byte	0x00,0x00
-	;; dummy
-	.byte	0x00,0x00
-	;; P5DDR
-	.byte	0x00,0x00
-	;; P6DDR
-	.byte	0x00,0x00
-	;; P7DDR
-	.byte	0x00,0x00
-	;; P8DDR
-	.byte	0x00,0x00
-	;; dummy
-	.byte	0x00,0x00
-	;; PADDR
-	.byte	0x00,0x00
-	;; PBDDR
-	.byte	0x00,0x00
-	;; PCDDR
-	.byte	0x00,0x00
-	;; PDDDR
-	.byte	0x00,0x00
-	;; PEDDR
-	.byte	0x00,0x00
-	;; PFDDR
-	.byte	0x00,0x00
-	;; PGDDR
-	.byte	0x00,0x00
-	;; PHDDR
-	.byte	0x00,0x00
-
-	.section .rodata
-__target_name:	
-	.asciz	"EDOSK-2674"
-	
-	.section .bss
-__command_line:	
-	.space	512
-
-	/* interrupt vector */
-	.section .vectors,"ax"
-	.long	__start
-	.long	__start
-vector	=	2
-	.rept	126
-	.long	_interrupt_redirect_table+vector*4
-vector	=	vector + 1
-	.endr
diff --git a/arch/h8300/platform/h8s/generic/Makefile b/arch/h8300/platform/h8s/generic/Makefile
deleted file mode 100644
index 44b4685c664c..000000000000
--- a/arch/h8300/platform/h8s/generic/Makefile
+++ /dev/null
@@ -1,5 +0,0 @@
-#
-# Makefile for the linux kernel.
-#
-
-extra-y =  crt0_$(MODEL).o
diff --git a/arch/h8300/platform/h8s/generic/crt0_ram.S b/arch/h8300/platform/h8s/generic/crt0_ram.S
deleted file mode 100644
index 7018915de74f..000000000000
--- a/arch/h8300/platform/h8s/generic/crt0_ram.S
+++ /dev/null
@@ -1,127 +0,0 @@
-/*
- *  linux/arch/h8300/platform/h8s/edosk2674/crt0_ram.S
- *
- *  Yoshinori Sato <ysato@users.sourceforge.jp>
- *
- *  Platform depend startup
- *  Target Archtecture:	generic
- *  Memory Layout     :	RAM
- */
-
-#define ASSEMBLY
-
-#include <asm/linkage.h>
-#include <asm/regs267x.h>
-			
-#if !defined(CONFIG_BLKDEV_RESERVE)
-#if defined(CONFIG_GDB_DEBUG)
-#define RAMEND (__ramend - 0xc000)
-#else
-#define RAMEND __ramend
-#endif
-#else
-#define RAMEND CONFIG_BLKDEV_RESERVE_ADDRESS
-#endif
-	
-	.global __start
-	.global __command_line
-	.global __platform_gpio_table
-	.global __target_name
-	
-	.h8300s
-
-	.section .text
-	.file	"crt0_ram.S"
-
-	/* CPU Reset entry */
-__start:
-	mov.l	#RAMEND,sp
-	ldc	#0x80,ccr
-	ldc	#0x00,exr
-
-	/* Peripheral Setup */
-	bclr	#4,@INTCR:8	/* interrupt mode 2 */
-	bset	#5,@INTCR:8
-
-#if defined(CONFIG_MTD_UCLINUX)
-	/* move romfs image */
-	jsr	@__move_romfs	
-#endif
-	
-	/* .bss clear */
-	mov.l	#__sbss,er5
-	mov.l	er5,er6
-	mov.l	#__ebss,er4
-	sub.l	er5,er4
-	shlr	#2,er4
-	sub.l	er0,er0
-1:	
-	mov.l	er0,@er5
-	adds	#4,er5
-	dec.l	#1,er4
-	bne	1b
-
-	/* copy kernel commandline */
-	mov.l	#COMMAND_START,er5
-	mov.l	#_command_line,er6
-	mov.w	#512,r4
-	eepmov.w
-
-	/* uClinux kernel start */
-	ldc	#0x90,ccr	/* running kernel */
-	mov.l	#_init_thread_union,sp
-	add.l	#0x2000,sp
-	jsr	@_start_kernel
-_exit:
-
-	jmp	_exit
-
-	rts
-
-	/* I/O port assign information */
-__platform_gpio_table:	
-	mov.l	#gpio_table,er0
-	rts
-
-gpio_table:
-	;; P1DDR
-	;;      used,ddr
-	.byte	0x00,0x00
-	;; P2DDR
-	.byte	0x00,0x00
-	;; P3DDR
-	.byte	0x00,0x00
-	;; dummy
-	.byte	0x00,0x00
-	;; P5DDR
-	.byte	0x00,0x00
-	;; P6DDR
-	.byte	0x00,0x00
-	;; P7DDR
-	.byte	0x00,0x00
-	;; P8DDR
-	.byte	0x00,0x00
-	;; dummy
-	.byte	0x00,0x00
-	;; PADDR
-	.byte	0x00,0x00
-	;; PBDDR
-	.byte	0x00,0x00
-	;; PCDDR
-	.byte	0x00,0x00
-	;; PDDDR
-	.byte	0x00,0x00
-	;; PEDDR
-	.byte	0x00,0x00
-	;; PFDDR
-	.byte	0x00,0x00
-	;; PGDDR
-	.byte	0x00,0x00
-	;; PHDDR
-	.byte	0x00,0x00
-
-__target_name:	
-	.asciz	"generic"
-	
-	.section .bootvec,"ax"
-	jmp	@__start
diff --git a/arch/h8300/platform/h8s/generic/crt0_rom.S b/arch/h8300/platform/h8s/generic/crt0_rom.S
deleted file mode 100644
index 623ba7828193..000000000000
--- a/arch/h8300/platform/h8s/generic/crt0_rom.S
+++ /dev/null
@@ -1,128 +0,0 @@
-/*
- *  linux/arch/h8300/platform/h8s/generic/crt0_rom.S
- *
- *  Yoshinori Sato <ysato@users.sourceforge.jp>
- *
- *  Platform depend startup 
- *  Target Archtecture:	generic
- *  Memory Layout     :	ROM
- */
-
-#define ASSEMBLY
-
-#include <asm/linkage.h>
-#include <asm/regs267x.h>
-	
-	.global __start
-	.global __command_line
-	.global __platform_gpio_table
-	.global __target_name
-	
-	.h8300s
-	.section .text
-	.file	"crt0_rom.S"
-
-	/* CPU Reset entry */
-__start:
-	mov.l	#__ramend,sp
-	ldc	#0x80,ccr
-	ldc	#0,exr
-	bclr	#4,@INTCR:8
-	bset	#5,@INTCR:8	/* Interrupt mode 2 */
-	
-	/* Peripheral Setup */
-	
-	/* copy .data */
-#if !defined(CONFIG_H8S_SIM)
-	mov.l	#__begin_data,er5
-	mov.l	#__sdata,er6
-	mov.l	#__edata,er4
-	sub.l	er6,er4
-	shlr.l	#2,er4
-1:	
-	mov.l	@er5+,er0
-	mov.l	er0,@er6
-	adds	#4,er6
-	dec.l	#1,er4
-	bne	1b	
-#endif
-
-	/* .bss clear */
-	mov.l	#__sbss,er5
-	mov.l	#__ebss,er4
-	sub.l	er5,er4
-	shlr.l	#2,er4		
-	sub.l	er0,er0
-1:
-	mov.l	er0,@er5
-	adds	#4,er5
-	dec.l	#1,er4
-	bne	1b
-
-	/* linux kernel start */
-	ldc	#0x90,ccr	/* running kernel */
-	mov.l	#_init_thread_union,sp
-	add.l	#0x2000,sp
-	jsr	@_start_kernel
-_exit:
-
-	jmp	_exit
-
-	rts
-
-	/* I/O port assign information */
-__platform_gpio_table:	
-	mov.l	#gpio_table,er0
-	rts
-
-gpio_table:
-	;; P1DDR
-	.byte	0x00,0x00
-	;; P2DDR
-	.byte	0x00,0x00
-	;; P3DDR
-	.byte	0x00,0x00
-	;; P4DDR
-	.byte	0x00,0x00
-	;; P5DDR
-	.byte	0x00,0x00
-	;; P6DDR
-	.byte	0x00,0x00
-	;; dummy
-	.byte	0x00,0x00
-	;; P8DDR
-	.byte	0x00,0x00
-	;; PADDR
-	.byte	0x00,0x00
-	;; PBDDR
-	.byte	0x00,0x00
-	;; PCDDR
-	.byte	0x00,0x00
-	;; PDDDR
-	.byte	0x00,0x00
-	;; PEDDR
-	.byte	0x00,0x00
-	;; PFDDR
-	.byte	0x00,0x00
-	;; PGDDR
-	.byte	0x00,0x00
-	;; PHDDR
-	.byte	0x00,0x00
-
-	.section .rodata
-__target_name:	
-	.asciz	"generic"
-	
-	.section .bss
-__command_line:	
-	.space	512
-
-	/* interrupt vector */
-	.section .vectors,"ax"
-	.long	__start
-	.long	__start
-vector	=	2
-	.rept	126-1
-	.long	_interrupt_redirect_table+vector*4
-vector	=	vector + 1
-	.endr
diff --git a/arch/h8300/platform/h8s/irq.c b/arch/h8300/platform/h8s/irq.c
deleted file mode 100644
index f3a5511c16b1..000000000000
--- a/arch/h8300/platform/h8s/irq.c
+++ /dev/null
@@ -1,104 +0,0 @@
-/*
- * linux/arch/h8300/platform/h8s/ints_h8s.c
- * Interrupt handling CPU variants
- *
- * Yoshinori Sato <ysato@users.sourceforge.jp>
- *
- */
-
-#include <linux/init.h>
-#include <linux/errno.h>
-#include <linux/kernel.h>
-
-#include <asm/ptrace.h>
-#include <asm/traps.h>
-#include <asm/irq.h>
-#include <asm/io.h>
-#include <asm/gpio-internal.h>
-#include <asm/regs267x.h>
-
-/* saved vector list */
-const int __initconst h8300_saved_vectors[] = {
-#if defined(CONFIG_GDB_DEBUG)
-	TRACE_VEC,
-	TRAP3_VEC,
-#endif
-	-1
-};
-
-/* trap entry table */
-const H8300_VECTOR __initconst h8300_trap_table[] = {
-	0,0,0,0,0,
-	trace_break,  /* TRACE */
-	0,0,
-	system_call,  /* TRAPA #0 */
-	0,0,0,0,0,0,0
-};
-
-/* IRQ pin assignment */
-struct irq_pins {
-	unsigned char port_no;
-	unsigned char bit_no;
-} __attribute__((aligned(1),packed));
-/* ISTR = 0 */
-static const struct irq_pins irq_assign_table0[16]={
-        {H8300_GPIO_P5,H8300_GPIO_B0},{H8300_GPIO_P5,H8300_GPIO_B1},
-	{H8300_GPIO_P5,H8300_GPIO_B2},{H8300_GPIO_P5,H8300_GPIO_B3},
-	{H8300_GPIO_P5,H8300_GPIO_B4},{H8300_GPIO_P5,H8300_GPIO_B5},
-	{H8300_GPIO_P5,H8300_GPIO_B6},{H8300_GPIO_P5,H8300_GPIO_B7},
-	{H8300_GPIO_P6,H8300_GPIO_B0},{H8300_GPIO_P6,H8300_GPIO_B1},
-	{H8300_GPIO_P6,H8300_GPIO_B2},{H8300_GPIO_P6,H8300_GPIO_B3},
-	{H8300_GPIO_P6,H8300_GPIO_B4},{H8300_GPIO_P6,H8300_GPIO_B5},
-	{H8300_GPIO_PF,H8300_GPIO_B1},{H8300_GPIO_PF,H8300_GPIO_B2},
-};
-/* ISTR = 1 */
-static const struct irq_pins irq_assign_table1[16]={
-	{H8300_GPIO_P8,H8300_GPIO_B0},{H8300_GPIO_P8,H8300_GPIO_B1},
-	{H8300_GPIO_P8,H8300_GPIO_B2},{H8300_GPIO_P8,H8300_GPIO_B3},
-	{H8300_GPIO_P8,H8300_GPIO_B4},{H8300_GPIO_P8,H8300_GPIO_B5},
-	{H8300_GPIO_PH,H8300_GPIO_B2},{H8300_GPIO_PH,H8300_GPIO_B3},
-	{H8300_GPIO_P2,H8300_GPIO_B0},{H8300_GPIO_P2,H8300_GPIO_B1},
-	{H8300_GPIO_P2,H8300_GPIO_B2},{H8300_GPIO_P2,H8300_GPIO_B3},
-	{H8300_GPIO_P2,H8300_GPIO_B4},{H8300_GPIO_P2,H8300_GPIO_B5},
-	{H8300_GPIO_P2,H8300_GPIO_B6},{H8300_GPIO_P2,H8300_GPIO_B7},
-};
-
-/* IRQ to GPIO pin translation */
-#define IRQ_GPIO_MAP(irqbit,irq,port,bit)			  \
-do {								  \
-	if (*(volatile unsigned short *)ITSR & irqbit) {	  \
-		port = irq_assign_table1[irq - EXT_IRQ0].port_no; \
-		bit  = irq_assign_table1[irq - EXT_IRQ0].bit_no;  \
-	} else {						  \
-		port = irq_assign_table0[irq - EXT_IRQ0].port_no; \
-		bit  = irq_assign_table0[irq - EXT_IRQ0].bit_no;  \
-	}							  \
-} while(0)
-
-int h8300_enable_irq_pin(unsigned int irq)
-{
-	if (irq >= EXT_IRQ0 && irq <= EXT_IRQ15) {
-		unsigned short ptn = 1 << (irq - EXT_IRQ0);
-		unsigned int port_no,bit_no;
-		IRQ_GPIO_MAP(ptn, irq, port_no, bit_no);
-		if (H8300_GPIO_RESERVE(port_no, bit_no) == 0)
-			return -EBUSY;                   /* pin already use */
-		H8300_GPIO_DDR(port_no, bit_no, H8300_GPIO_INPUT);
-		*(volatile unsigned short *)ISR &= ~ptn; /* ISR clear */
-	}
-
-	return 0;
-}
-
-void h8300_disable_irq_pin(unsigned int irq)
-{
-	if (irq >= EXT_IRQ0 && irq <= EXT_IRQ15) {
-		/* disable interrupt & release IRQ pin */
-		unsigned short ptn = 1 << (irq - EXT_IRQ0);
-		unsigned short port_no,bit_no;
-		*(volatile unsigned short *)ISR &= ~ptn;
-		*(volatile unsigned short *)IER &= ~ptn;
-		IRQ_GPIO_MAP(ptn, irq, port_no, bit_no);
-		H8300_GPIO_FREE(port_no, bit_no);
-	}
-}
diff --git a/arch/h8300/platform/h8s/ptrace_h8s.c b/arch/h8300/platform/h8s/ptrace_h8s.c
deleted file mode 100644
index c058ab1a8495..000000000000
--- a/arch/h8300/platform/h8s/ptrace_h8s.c
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- *  linux/arch/h8300/platform/h8s/ptrace_h8s.c
- *    ptrace cpu depend helper functions
- *
- *  Yoshinori Sato <ysato@users.sourceforge.jp>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of
- * this archive for more details.
- */
-
-#include <linux/linkage.h>
-#include <linux/sched.h>
-#include <linux/errno.h>
-#include <asm/ptrace.h>
-
-#define CCR_MASK  0x6f
-#define EXR_TRACE 0x80
-
-/* Mapping from PT_xxx to the stack offset at which the register is
-   saved.  Notice that usp has no stack-slot and needs to be treated
-   specially (see get_reg/put_reg below). */
-static const int h8300_register_offset[] = {
-	PT_REG(er1), PT_REG(er2), PT_REG(er3), PT_REG(er4),
-	PT_REG(er5), PT_REG(er6), PT_REG(er0), PT_REG(orig_er0),
-	PT_REG(ccr), PT_REG(pc),  0,           PT_REG(exr)
-};
-
-/* read register */
-long h8300_get_reg(struct task_struct *task, int regno)
-{
-	switch (regno) {
-	case PT_USP:
-		return task->thread.usp + sizeof(long)*2 + 2;
-	case PT_CCR:
-	case PT_EXR:
-	    return *(unsigned short *)(task->thread.esp0 + h8300_register_offset[regno]);
-	default:
-	    return *(unsigned long *)(task->thread.esp0 + h8300_register_offset[regno]);
-	}
-}
-
-/* write register */
-int h8300_put_reg(struct task_struct *task, int regno, unsigned long data)
-{
-	unsigned short oldccr;
-	switch (regno) {
-	case PT_USP:
-		task->thread.usp = data - sizeof(long)*2 - 2;
-	case PT_CCR:
-		oldccr = *(unsigned short *)(task->thread.esp0 + h8300_register_offset[regno]);
-		oldccr &= ~CCR_MASK;
-		data &= CCR_MASK;
-		data |= oldccr;
-		*(unsigned short *)(task->thread.esp0 + h8300_register_offset[regno]) = data;
-		break;
-	case PT_EXR:
-		/* exr modify not support */
-		return -EIO;
-	default:
-		*(unsigned long *)(task->thread.esp0 + h8300_register_offset[regno]) = data;
-		break;
-	}
-	return 0;
-}
-
-/* disable singlestep */
-void user_disable_single_step(struct task_struct *child)
-{
-	*(unsigned short *)(child->thread.esp0 + h8300_register_offset[PT_EXR]) &= ~EXR_TRACE;
-}
-
-/* enable singlestep */
-void user_enable_single_step(struct task_struct *child)
-{
-	*(unsigned short *)(child->thread.esp0 + h8300_register_offset[PT_EXR]) |= EXR_TRACE;
-}
-
-asmlinkage void trace_trap(unsigned long bp)
-{
-	(void)bp;
-	force_sig(SIGTRAP,current);
-}
-
diff --git a/include/linux/serial_sci.h b/include/linux/serial_sci.h
index d34049712a4d..50fe651da965 100644
--- a/include/linux/serial_sci.h
+++ b/include/linux/serial_sci.h
@@ -5,7 +5,7 @@
 #include <linux/sh_dma.h>
 
 /*
- * Generic header for SuperH (H)SCI(F) (used by sh/sh64/h8300 and related parts)
+ * Generic header for SuperH (H)SCI(F) (used by sh/sh64 and related parts)
  */
 
 #define SCIx_NOT_SUPPORTED	(-1)
diff --git a/tools/testing/ktest/examples/crosstests.conf b/tools/testing/ktest/examples/crosstests.conf
index 46736604c26c..a1203148dfa1 100644
--- a/tools/testing/ktest/examples/crosstests.conf
+++ b/tools/testing/ktest/examples/crosstests.conf
@@ -133,12 +133,6 @@ CROSS = frv-linux
 ARCH = frv
 GCC_VER = 4.5.1
 
-# h8300 - failed make defconfig??
-TEST_START IF ${RUN} == h8300 || ${DO_FAILED}
-CROSS = h8300-elf
-ARCH = h8300
-GCC_VER = 4.5.1
-
 # m68k fails with error?
 TEST_START IF ${RUN} == m68k || ${DO_DEFAULT}
 CROSS = m68k-linux
-- 
cgit v1.2.3


From a0102375ee82db1e08324b1a21484854cf2c1677 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Sun, 1 Sep 2013 20:30:50 -0700
Subject: regmap: Add regmap_fields APIs

Current Linux kernel is supporting regmap_field method
and it is very useful feature.
It needs one regmap_filed for one register access.

OTOH, there is multi port device which
has many same registers in the market.
The difference for each register access is
only its address offset.

Current API needs many regmap_field for such device,
but it is not good.
This patch adds new regmap_fileds API which can care
about multi port/offset access via regmap.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 drivers/base/regmap/internal.h |  3 ++
 drivers/base/regmap/regmap.c   | 83 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/regmap.h         | 11 ++++++
 3 files changed, 97 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/base/regmap/internal.h b/drivers/base/regmap/internal.h
index 57f777835d97..9010614f7793 100644
--- a/drivers/base/regmap/internal.h
+++ b/drivers/base/regmap/internal.h
@@ -179,6 +179,9 @@ struct regmap_field {
 	/* lsb */
 	unsigned int shift;
 	unsigned int reg;
+
+	unsigned int id_size;
+	unsigned int id_offset;
 };
 
 #ifdef CONFIG_DEBUG_FS
diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index 285afa7470c0..00152bf7ab12 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -821,6 +821,8 @@ static void regmap_field_init(struct regmap_field *rm_field,
 	rm_field->reg = reg_field.reg;
 	rm_field->shift = reg_field.lsb;
 	rm_field->mask = ((BIT(field_bits) - 1) << reg_field.lsb);
+	rm_field->id_size = reg_field.id_size;
+	rm_field->id_offset = reg_field.id_offset;
 }
 
 /**
@@ -1389,6 +1391,54 @@ int regmap_field_update_bits(struct regmap_field *field, unsigned int mask, unsi
 }
 EXPORT_SYMBOL_GPL(regmap_field_update_bits);
 
+/**
+ * regmap_fields_write(): Write a value to a single register field with port ID
+ *
+ * @field: Register field to write to
+ * @id: port ID
+ * @val: Value to be written
+ *
+ * A value of zero will be returned on success, a negative errno will
+ * be returned in error cases.
+ */
+int regmap_fields_write(struct regmap_field *field, unsigned int id,
+			unsigned int val)
+{
+	if (id >= field->id_size)
+		return -EINVAL;
+
+	return regmap_update_bits(field->regmap,
+				  field->reg + (field->id_offset * id),
+				  field->mask, val << field->shift);
+}
+EXPORT_SYMBOL_GPL(regmap_fields_write);
+
+/**
+ * regmap_fields_update_bits():	Perform a read/modify/write cycle
+ *                              on the register field
+ *
+ * @field: Register field to write to
+ * @id: port ID
+ * @mask: Bitmask to change
+ * @val: Value to be written
+ *
+ * A value of zero will be returned on success, a negative errno will
+ * be returned in error cases.
+ */
+int regmap_fields_update_bits(struct regmap_field *field,  unsigned int id,
+			      unsigned int mask, unsigned int val)
+{
+	if (id >= field->id_size)
+		return -EINVAL;
+
+	mask = (mask << field->shift) & field->mask;
+
+	return regmap_update_bits(field->regmap,
+				  field->reg + (field->id_offset * id),
+				  mask, val << field->shift);
+}
+EXPORT_SYMBOL_GPL(regmap_fields_update_bits);
+
 /*
  * regmap_bulk_write(): Write multiple registers to the device
  *
@@ -1696,6 +1746,39 @@ int regmap_field_read(struct regmap_field *field, unsigned int *val)
 }
 EXPORT_SYMBOL_GPL(regmap_field_read);
 
+/**
+ * regmap_fields_read(): Read a value to a single register field with port ID
+ *
+ * @field: Register field to read from
+ * @id: port ID
+ * @val: Pointer to store read value
+ *
+ * A value of zero will be returned on success, a negative errno will
+ * be returned in error cases.
+ */
+int regmap_fields_read(struct regmap_field *field, unsigned int id,
+		       unsigned int *val)
+{
+	int ret;
+	unsigned int reg_val;
+
+	if (id >= field->id_size)
+		return -EINVAL;
+
+	ret = regmap_read(field->regmap,
+			  field->reg + (field->id_offset * id),
+			  &reg_val);
+	if (ret != 0)
+		return ret;
+
+	reg_val &= field->mask;
+	reg_val >>= field->shift;
+	*val = reg_val;
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(regmap_fields_read);
+
 /**
  * regmap_bulk_read(): Read multiple registers from the device
  *
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index 4c8c20a7a75d..a12bea07f79e 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -425,11 +425,15 @@ bool regmap_reg_in_ranges(unsigned int reg,
  * @reg: Offset of the register within the regmap bank
  * @lsb: lsb of the register field.
  * @reg: msb of the register field.
+ * @id_size: port size if it has some ports
+ * @id_offset: address offset for each ports
  */
 struct reg_field {
 	unsigned int reg;
 	unsigned int lsb;
 	unsigned int msb;
+	unsigned int id_size;
+	unsigned int id_offset;
 };
 
 #define REG_FIELD(_reg, _lsb, _msb) {		\
@@ -451,6 +455,13 @@ int regmap_field_write(struct regmap_field *field, unsigned int val);
 int regmap_field_update_bits(struct regmap_field *field,
 			     unsigned int mask, unsigned int val);
 
+int regmap_fields_write(struct regmap_field *field, unsigned int id,
+			unsigned int val);
+int regmap_fields_read(struct regmap_field *field, unsigned int id,
+		       unsigned int *val);
+int regmap_fields_update_bits(struct regmap_field *field,  unsigned int id,
+			      unsigned int mask, unsigned int val);
+
 /**
  * Description of an IRQ for the generic regmap irq_chip.
  *
-- 
cgit v1.2.3


From 4aa806b771d16b810771d86ce23c4c3160888db3 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Wed, 26 Jun 2013 13:49:44 +0100
Subject: DMA-API: provide a helper to set both DMA and coherent DMA masks

Provide a helper to set both the DMA and coherent DMA masks to the
same value - this avoids duplicated code in a number of drivers,
sometimes with buggy error handling, and also allows us identify
which drivers do things differently.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 Documentation/DMA-API-HOWTO.txt | 37 ++++++++++++++++++++++---------------
 Documentation/DMA-API.txt       |  8 ++++++++
 include/linux/dma-mapping.h     | 14 ++++++++++++++
 3 files changed, 44 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/DMA-API-HOWTO.txt b/Documentation/DMA-API-HOWTO.txt
index 14129f149a75..5e983031cc11 100644
--- a/Documentation/DMA-API-HOWTO.txt
+++ b/Documentation/DMA-API-HOWTO.txt
@@ -101,14 +101,23 @@ style to do this even if your device holds the default setting,
 because this shows that you did think about these issues wrt. your
 device.
 
-The query is performed via a call to dma_set_mask():
+The query is performed via a call to dma_set_mask_and_coherent():
 
-	int dma_set_mask(struct device *dev, u64 mask);
+	int dma_set_mask_and_coherent(struct device *dev, u64 mask);
 
-The query for consistent allocations is performed via a call to
-dma_set_coherent_mask():
+which will query the mask for both streaming and coherent APIs together.
+If you have some special requirements, then the following two separate
+queries can be used instead:
 
-	int dma_set_coherent_mask(struct device *dev, u64 mask);
+	The query for streaming mappings is performed via a call to
+	dma_set_mask():
+
+		int dma_set_mask(struct device *dev, u64 mask);
+
+	The query for consistent allocations is performed via a call
+	to dma_set_coherent_mask():
+
+		int dma_set_coherent_mask(struct device *dev, u64 mask);
 
 Here, dev is a pointer to the device struct of your device, and mask
 is a bit mask describing which bits of an address your device
@@ -137,7 +146,7 @@ exactly why.
 
 The standard 32-bit addressing device would do something like this:
 
-	if (dma_set_mask(dev, DMA_BIT_MASK(32))) {
+	if (dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32))) {
 		printk(KERN_WARNING
 		       "mydev: No suitable DMA available.\n");
 		goto ignore_this_device;
@@ -171,22 +180,20 @@ the case would look like this:
 
 	int using_dac, consistent_using_dac;
 
-	if (!dma_set_mask(dev, DMA_BIT_MASK(64))) {
+	if (!dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64))) {
 		using_dac = 1;
 	   	consistent_using_dac = 1;
-		dma_set_coherent_mask(dev, DMA_BIT_MASK(64));
-	} else if (!dma_set_mask(dev, DMA_BIT_MASK(32))) {
+	} else if (!dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32))) {
 		using_dac = 0;
 		consistent_using_dac = 0;
-		dma_set_coherent_mask(dev, DMA_BIT_MASK(32));
 	} else {
 		printk(KERN_WARNING
 		       "mydev: No suitable DMA available.\n");
 		goto ignore_this_device;
 	}
 
-dma_set_coherent_mask() will always be able to set the same or a
-smaller mask as dma_set_mask(). However for the rare case that a
+The coherent coherent mask will always be able to set the same or a
+smaller mask as the streaming mask. However for the rare case that a
 device driver only uses consistent allocations, one would have to
 check the return value from dma_set_coherent_mask().
 
@@ -199,9 +206,9 @@ address you might do something like:
 		goto ignore_this_device;
 	}
 
-When dma_set_mask() is successful, and returns zero, the kernel saves
-away this mask you have provided.  The kernel will use this
-information later when you make DMA mappings.
+When dma_set_mask() or dma_set_mask_and_coherent() is successful, and
+returns zero, the kernel saves away this mask you have provided.  The
+kernel will use this information later when you make DMA mappings.
 
 There is a case which we are aware of at this time, which is worth
 mentioning in this documentation.  If your device supports multiple
diff --git a/Documentation/DMA-API.txt b/Documentation/DMA-API.txt
index 78a6c569d204..e865279cec58 100644
--- a/Documentation/DMA-API.txt
+++ b/Documentation/DMA-API.txt
@@ -141,6 +141,14 @@ won't change the current mask settings.  It is more intended as an
 internal API for use by the platform than an external API for use by
 driver writers.
 
+int
+dma_set_mask_and_coherent(struct device *dev, u64 mask)
+
+Checks to see if the mask is possible and updates the device
+streaming and coherent DMA mask parameters if it is.
+
+Returns: 0 if successful and a negative error if not.
+
 int
 dma_set_mask(struct device *dev, u64 mask)
 
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 3a8d0a2af607..ec951f98e3d9 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -97,6 +97,20 @@ static inline int dma_set_coherent_mask(struct device *dev, u64 mask)
 }
 #endif
 
+/*
+ * Set both the DMA mask and the coherent DMA mask to the same thing.
+ * Note that we don't check the return value from dma_set_coherent_mask()
+ * as the DMA API guarantees that the coherent DMA mask can be set to
+ * the same or smaller than the streaming DMA mask.
+ */
+static inline int dma_set_mask_and_coherent(struct device *dev, u64 mask)
+{
+	int rc = dma_set_mask(dev, mask);
+	if (rc == 0)
+		dma_set_coherent_mask(dev, mask);
+	return rc;
+}
+
 extern u64 dma_get_required_mask(struct device *dev);
 
 static inline unsigned int dma_get_max_seg_size(struct device *dev)
-- 
cgit v1.2.3


From c7ccde6eac6d3c4bc6110cc3fd76ef3823bc0831 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Tue, 3 Sep 2013 13:58:43 -0400
Subject: USB: see if URB comes from a completion handler

Now that URBs can be completed inside tasklets, we need a way of
determining whether a completion handler for a given endpoint is
currently running.  Otherwise it's not possible to maintain the API
guarantee about keeping isochronous streams synchronous when an
underrun occurs.

This patch adds a field and a routine to check whether a completion
handler for a periodic endpoint is running.  At the moment no
analogous routine appears to be necessary for async endpoints, but one
can always be added.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
CC: Ming Lei <tom.leiming@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/hcd.c  | 2 ++
 include/linux/usb/hcd.h | 7 +++++++
 2 files changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index d6a8d23f047b..3a2e82a9c115 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -1703,7 +1703,9 @@ static void usb_giveback_urb_bh(unsigned long param)
 
 		urb = list_entry(local_list.next, struct urb, urb_list);
 		list_del_init(&urb->urb_list);
+		bh->completing_ep = urb->ep;
 		__usb_hcd_giveback_urb(urb);
+		bh->completing_ep = NULL;
 	}
 
 	/* check if there are new URBs to giveback */
diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h
index 75efc45eaa2f..8c865134c881 100644
--- a/include/linux/usb/hcd.h
+++ b/include/linux/usb/hcd.h
@@ -73,6 +73,7 @@ struct giveback_urb_bh {
 	spinlock_t lock;
 	struct list_head  head;
 	struct tasklet_struct bh;
+	struct usb_host_endpoint *completing_ep;
 };
 
 struct usb_hcd {
@@ -378,6 +379,12 @@ static inline int hcd_giveback_urb_in_bh(struct usb_hcd *hcd)
 	return hcd->driver->flags & HCD_BH;
 }
 
+static inline bool hcd_periodic_completion_in_progress(struct usb_hcd *hcd,
+		struct usb_host_endpoint *ep)
+{
+	return hcd->high_prio_bh.completing_ep == ep;
+}
+
 extern int usb_hcd_link_urb_to_ep(struct usb_hcd *hcd, struct urb *urb);
 extern int usb_hcd_check_unlink_urb(struct usb_hcd *hcd, struct urb *urb,
 		int status);
-- 
cgit v1.2.3


From 6c74dada4f5a37037dee1da6a8a7aeb56c558bca Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Fri, 30 Aug 2013 14:03:59 +0200
Subject: usb-core: Make usb_free_streams return an error

The hcd-driver free_streams method can return an error, so lets properly
propagate that.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/hcd.c | 11 +++++++----
 include/linux/usb.h    |  2 +-
 2 files changed, 8 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index 3a2e82a9c115..ab54999da2bf 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -2075,8 +2075,11 @@ EXPORT_SYMBOL_GPL(usb_alloc_streams);
  *
  * Reverts a group of bulk endpoints back to not using stream IDs.
  * Can fail if we are given bad arguments, or HCD is broken.
+ *
+ * Return: On success, the number of allocated streams. On failure, a negative
+ * error code.
  */
-void usb_free_streams(struct usb_interface *interface,
+int usb_free_streams(struct usb_interface *interface,
 		struct usb_host_endpoint **eps, unsigned int num_eps,
 		gfp_t mem_flags)
 {
@@ -2087,14 +2090,14 @@ void usb_free_streams(struct usb_interface *interface,
 	dev = interface_to_usbdev(interface);
 	hcd = bus_to_hcd(dev->bus);
 	if (dev->speed != USB_SPEED_SUPER)
-		return;
+		return -EINVAL;
 
 	/* Streams only apply to bulk endpoints. */
 	for (i = 0; i < num_eps; i++)
 		if (!eps[i] || !usb_endpoint_xfer_bulk(&eps[i]->desc))
-			return;
+			return -EINVAL;
 
-	hcd->driver->free_streams(hcd, dev, eps, num_eps, mem_flags);
+	return hcd->driver->free_streams(hcd, dev, eps, num_eps, mem_flags);
 }
 EXPORT_SYMBOL_GPL(usb_free_streams);
 
diff --git a/include/linux/usb.h b/include/linux/usb.h
index 001629cd1a97..f726c39097e0 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -702,7 +702,7 @@ extern int usb_alloc_streams(struct usb_interface *interface,
 		unsigned int num_streams, gfp_t mem_flags);
 
 /* Reverts a group of bulk endpoints back to not using stream IDs. */
-extern void usb_free_streams(struct usb_interface *interface,
+extern int usb_free_streams(struct usb_interface *interface,
 		struct usb_host_endpoint **eps, unsigned int num_eps,
 		gfp_t mem_flags);
 
-- 
cgit v1.2.3


From 00c877c69ba315d6c565a4df51c71b11e82cdeb8 Mon Sep 17 00:00:00 2001
From: Laxman Dewangan <ldewangan@nvidia.com>
Date: Wed, 18 Sep 2013 18:18:02 +0530
Subject: regulator: core: add support for configuring turn-on time through
 constraints

The turn-on time of the regulator depends on the regulator device's
electrical characteristics. Sometimes regulator turn-on time also
depends on the capacitive load on the given platform and it can be
more than the datasheet value.

The driver provides the enable-time as per datasheet.

Add support for configure the enable ramp time through regulator
constraints so that regulator core can take this value for enable
time for that regulator.

Signed-off-by: Laxman Dewangan <ldewangan@nvidia.com>
Acked-by: Stephen Warren <swarren@nvidia.com>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 Documentation/devicetree/bindings/regulator/regulator.txt | 5 +++++
 drivers/regulator/core.c                                  | 2 ++
 drivers/regulator/of_regulator.c                          | 6 ++++++
 include/linux/regulator/machine.h                         | 2 ++
 4 files changed, 15 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/regulator/regulator.txt b/Documentation/devicetree/bindings/regulator/regulator.txt
index 2bd8f0978765..e2c7f1e7251a 100644
--- a/Documentation/devicetree/bindings/regulator/regulator.txt
+++ b/Documentation/devicetree/bindings/regulator/regulator.txt
@@ -14,6 +14,11 @@ Optional properties:
 - regulator-ramp-delay: ramp delay for regulator(in uV/uS)
   For hardwares which support disabling ramp rate, it should be explicitly
   intialised to zero (regulator-ramp-delay = <0>) for disabling ramp delay.
+- regulator-enable-ramp-delay: The time taken, in microseconds, for the supply
+  rail to reach the target voltage, plus/minus whatever tolerance the board
+  design requires. This property describes the total system ramp time
+  required due to the combination of internal ramping of the regulator itself,
+  and board design issues such as trace capacitance and load on the supply.
 
 Deprecated properties:
 - regulator-compatible: If a regulator chip contains multiple
diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index a01b8b3b70ca..5217c1964c32 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -1186,6 +1186,8 @@ overflow_err:
 
 static int _regulator_get_enable_time(struct regulator_dev *rdev)
 {
+	if (rdev->constraints && rdev->constraints->enable_time)
+		return rdev->constraints->enable_time;
 	if (!rdev->desc->ops->enable_time)
 		return rdev->desc->enable_time;
 	return rdev->desc->ops->enable_time(rdev);
diff --git a/drivers/regulator/of_regulator.c b/drivers/regulator/of_regulator.c
index 7827384680d6..ea4f36f2cbe2 100644
--- a/drivers/regulator/of_regulator.c
+++ b/drivers/regulator/of_regulator.c
@@ -23,6 +23,8 @@ static void of_get_regulation_constraints(struct device_node *np,
 	const __be32 *min_uA, *max_uA, *ramp_delay;
 	struct property *prop;
 	struct regulation_constraints *constraints = &(*init_data)->constraints;
+	int ret;
+	u32 pval;
 
 	constraints->name = of_get_property(np, "regulator-name", NULL);
 
@@ -73,6 +75,10 @@ static void of_get_regulation_constraints(struct device_node *np,
 		else
 			constraints->ramp_disable = true;
 	}
+
+	ret = of_property_read_u32(np, "regulator-enable-ramp-delay", &pval);
+	if (!ret)
+		constraints->enable_time = pval;
 }
 
 /**
diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h
index 999b20ce06cf..8108751acb86 100644
--- a/include/linux/regulator/machine.h
+++ b/include/linux/regulator/machine.h
@@ -95,6 +95,7 @@ struct regulator_state {
  * @initial_state: Suspend state to set by default.
  * @initial_mode: Mode to set at startup.
  * @ramp_delay: Time to settle down after voltage change (unit: uV/us)
+ * @enable_time: Turn-on time of the rails (unit: microseconds)
  */
 struct regulation_constraints {
 
@@ -129,6 +130,7 @@ struct regulation_constraints {
 	unsigned int initial_mode;
 
 	unsigned int ramp_delay;
+	unsigned int enable_time;
 
 	/* constraint flags */
 	unsigned always_on:1;	/* regulator never off when system is on */
-- 
cgit v1.2.3


From e9a03add0c6ed5341fc59ff9c76843c2888a33fa Mon Sep 17 00:00:00 2001
From: Sonic Zhang <sonic.zhang@analog.com>
Date: Tue, 3 Sep 2013 16:28:59 +0800
Subject: pinctrl: ADI PIN control driver for the GPIO controller on bf54x and
 bf60x.

The new ADI GPIO2 controller was introduced since the BF548 and BF60x
processors. It differs a lot from the old one on BF5xx processors. So,
create a pinctrl driver under the pinctrl framework.

- Define gpio ports and pin interrupt controllers as individual platform
  devices.
- Register a pinctrl driver for the whole GPIO ports and pin interrupt
  devices.
- Probe pint devices before port devices. Put device instances into
  the global gpio and pint lists.
- Define peripheral, irq and gpio reservation bit masks for each gpio
  port as runtime resources.
- Save and restore gpio port and pint status MMRs in syscore PM functions.
- Create the plug-in subdrivers to hold the pinctrl soc data for bf54x
  and bf60x. Add soc data into struct adi_pinctrl. Initialize the soc data
  in pin controller probe function. Get the pin groups and functions via
  the soc data reference.
- Call gpiochip_add_pin_range() in gpio device probe function to register
  range cross reference between gpio device and pin control device.
- Get range by pinctrl_find_gpio_range_from_pin(), find gpio_port object
  by container_of() and find adi_pinctrl by pin control device name.
- Handle peripheral and gpio requests in pinctrl operation functions.
- Demux gpio IRQs via the irq_domain created by each GPIO port.

v2-changes:
- Remove unlinke() directive.

v3-changes:
- Rename struct adi_pmx to adi_pinctrl.
- Fix the comments of struct gpio_pint.
- Remove unused pin_base in struct gpio_port.
- Change pint_assign into bool type.
- Add comments about the relationship between pint device and port device
to the driver header.
- Use BIT macro to shift bit.
- Remove all bitmap reservation help functions. Inline reservation functions
into the actual code.
- Remove gpio and offset mutual reference help functions.
- Remove all help functions to find gpio_port and adi_pinctrl structs. Get
range by pinctrl_find_gpio_range_from_pin(), find gpio_port object by
container_of() and find adi_pinctrl by pin control device name.
- Pass bool type usage variable to port_setup help function.
- Separate long bit operations into several lines and add comments.
- Use debugfs to output all GPIO request information.
- Avoid to set drvdata to NULL
- Add explanation to function adi_gpio_init_int()
- Call gpiochip_add_pin_range() in gpio device probe function to register
range cross reference between gpio device and pin control device.
- Remove the reference to pin control device from the gpio_port struct.
Remove the reference list to gpio device from the adi_pinctrl struct.
Replace the global adi_pinctrl list with adi_gpio_port_list. Walk through
the gpio list to do power suspend and resume operations.
- Remove the global GPIO base from struct adi_pinctrl, define pin base in
the platform data for each GPIO port device.
- Initialize adi_pinctrl_setup in arch_initcall().
- print the status of triggers, whether it is in GPIO mode, if it is
flagged to be used as IRQ, etc in adi_pin_dbg_show().
- Create the plug-in subdrivers to hold the pinctrl soc data for bf54x
and bf60x. Add soc data into struct adi_pinctrl. Initialize the soc data
in pin controller probe function. Get the pin groups and functions via
the soc data reference.

v4-changes:
- remove useless system_state checking.
- replace dev_err with dev_warn in both irq and gpio pin cases.
- comment on relationship between irq type and invert operation.
- It is not necessary to check the reservation mode of the requested
pin in IRQ chip operation. Remove the reservation map.
- Use existing gpio/pinctrl subsystem debugfs files. Remove pinctrl-adi2
driver specific debugfs output.
- Add linkport group and function information for bf60x.
- Separate uart and ctsrts pins into 2 groups.
- Separate APAPI and alternative ATAPI pins into 2 groups.

Signed-off-by: Sonic Zhang <sonic.zhang@analog.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/Kconfig                    |   17 +
 drivers/pinctrl/Makefile                   |    3 +
 drivers/pinctrl/pinctrl-adi2-bf54x.c       |  592 ++++++++++++++
 drivers/pinctrl/pinctrl-adi2-bf60x.c       |  522 ++++++++++++
 drivers/pinctrl/pinctrl-adi2.c             | 1183 ++++++++++++++++++++++++++++
 drivers/pinctrl/pinctrl-adi2.h             |   75 ++
 include/linux/platform_data/pinctrl-adi2.h |   40 +
 7 files changed, 2432 insertions(+)
 create mode 100644 drivers/pinctrl/pinctrl-adi2-bf54x.c
 create mode 100644 drivers/pinctrl/pinctrl-adi2-bf60x.c
 create mode 100644 drivers/pinctrl/pinctrl-adi2.c
 create mode 100644 drivers/pinctrl/pinctrl-adi2.h
 create mode 100644 include/linux/platform_data/pinctrl-adi2.h

(limited to 'include/linux')

diff --git a/drivers/pinctrl/Kconfig b/drivers/pinctrl/Kconfig
index b6e864e8c9e8..f45a21c57e0e 100644
--- a/drivers/pinctrl/Kconfig
+++ b/drivers/pinctrl/Kconfig
@@ -49,6 +49,23 @@ config PINCTRL_AB8505
 	bool "AB8505 pin controller driver"
 	depends on PINCTRL_ABX500 && ARCH_U8500
 
+config PINCTRL_ADI2
+	bool "ADI pin controller driver"
+	select PINMUX
+	select IRQ_DOMAIN
+	help
+	  This is the pin controller and gpio driver for ADI BF54x, BF60x and
+	  future processors. This option is selected automatically when specific
+	  machine and arch are selected to build.
+
+config PINCTRL_BF54x
+	def_bool y if BF54x
+	select PINCTRL_ADI2
+
+config PINCTRL_BF60x
+	def_bool y if BF60x
+	select PINCTRL_ADI2
+
 config PINCTRL_AT91
 	bool "AT91 pinctrl driver"
 	depends on OF
diff --git a/drivers/pinctrl/Makefile b/drivers/pinctrl/Makefile
index 496d9bf9e1b9..bbeb98086495 100644
--- a/drivers/pinctrl/Makefile
+++ b/drivers/pinctrl/Makefile
@@ -14,6 +14,9 @@ obj-$(CONFIG_PINCTRL_AB8500)	+= pinctrl-ab8500.o
 obj-$(CONFIG_PINCTRL_AB8540)	+= pinctrl-ab8540.o
 obj-$(CONFIG_PINCTRL_AB9540)	+= pinctrl-ab9540.o
 obj-$(CONFIG_PINCTRL_AB8505)	+= pinctrl-ab8505.o
+obj-$(CONFIG_PINCTRL_ADI2)	+= pinctrl-adi2.o
+obj-$(CONFIG_PINCTRL_BF54x)	+= pinctrl-adi2-bf54x.o
+obj-$(CONFIG_PINCTRL_BF60x)	+= pinctrl-adi2-bf60x.o
 obj-$(CONFIG_PINCTRL_AT91)	+= pinctrl-at91.o
 obj-$(CONFIG_PINCTRL_BCM2835)	+= pinctrl-bcm2835.o
 obj-$(CONFIG_PINCTRL_BAYTRAIL)	+= pinctrl-baytrail.o
diff --git a/drivers/pinctrl/pinctrl-adi2-bf54x.c b/drivers/pinctrl/pinctrl-adi2-bf54x.c
new file mode 100644
index 000000000000..ea9d9ab9cda1
--- /dev/null
+++ b/drivers/pinctrl/pinctrl-adi2-bf54x.c
@@ -0,0 +1,592 @@
+/*
+ * Pinctrl Driver for ADI GPIO2 controller
+ *
+ * Copyright 2007-2013 Analog Devices Inc.
+ *
+ * Licensed under the GPLv2 or later
+ */
+
+#include <asm/portmux.h>
+#include "pinctrl-adi2.h"
+
+static const struct pinctrl_pin_desc adi_pads[] = {
+	PINCTRL_PIN(0, "PA0"),
+	PINCTRL_PIN(1, "PA1"),
+	PINCTRL_PIN(2, "PA2"),
+	PINCTRL_PIN(3, "PG3"),
+	PINCTRL_PIN(4, "PA4"),
+	PINCTRL_PIN(5, "PA5"),
+	PINCTRL_PIN(6, "PA6"),
+	PINCTRL_PIN(7, "PA7"),
+	PINCTRL_PIN(8, "PA8"),
+	PINCTRL_PIN(9, "PA9"),
+	PINCTRL_PIN(10, "PA10"),
+	PINCTRL_PIN(11, "PA11"),
+	PINCTRL_PIN(12, "PA12"),
+	PINCTRL_PIN(13, "PA13"),
+	PINCTRL_PIN(14, "PA14"),
+	PINCTRL_PIN(15, "PA15"),
+	PINCTRL_PIN(16, "PB0"),
+	PINCTRL_PIN(17, "PB1"),
+	PINCTRL_PIN(18, "PB2"),
+	PINCTRL_PIN(19, "PB3"),
+	PINCTRL_PIN(20, "PB4"),
+	PINCTRL_PIN(21, "PB5"),
+	PINCTRL_PIN(22, "PB6"),
+	PINCTRL_PIN(23, "PB7"),
+	PINCTRL_PIN(24, "PB8"),
+	PINCTRL_PIN(25, "PB9"),
+	PINCTRL_PIN(26, "PB10"),
+	PINCTRL_PIN(27, "PB11"),
+	PINCTRL_PIN(28, "PB12"),
+	PINCTRL_PIN(29, "PB13"),
+	PINCTRL_PIN(30, "PB14"),
+	PINCTRL_PIN(32, "PC0"),
+	PINCTRL_PIN(33, "PC1"),
+	PINCTRL_PIN(34, "PC2"),
+	PINCTRL_PIN(35, "PC3"),
+	PINCTRL_PIN(36, "PC4"),
+	PINCTRL_PIN(37, "PC5"),
+	PINCTRL_PIN(38, "PC6"),
+	PINCTRL_PIN(39, "PC7"),
+	PINCTRL_PIN(40, "PC8"),
+	PINCTRL_PIN(41, "PC9"),
+	PINCTRL_PIN(42, "PC10"),
+	PINCTRL_PIN(43, "PC11"),
+	PINCTRL_PIN(44, "PC12"),
+	PINCTRL_PIN(45, "PC13"),
+	PINCTRL_PIN(48, "PD0"),
+	PINCTRL_PIN(49, "PD1"),
+	PINCTRL_PIN(50, "PD2"),
+	PINCTRL_PIN(51, "PD3"),
+	PINCTRL_PIN(52, "PD4"),
+	PINCTRL_PIN(53, "PD5"),
+	PINCTRL_PIN(54, "PD6"),
+	PINCTRL_PIN(55, "PD7"),
+	PINCTRL_PIN(56, "PD8"),
+	PINCTRL_PIN(57, "PD9"),
+	PINCTRL_PIN(58, "PD10"),
+	PINCTRL_PIN(59, "PD11"),
+	PINCTRL_PIN(60, "PD12"),
+	PINCTRL_PIN(61, "PD13"),
+	PINCTRL_PIN(62, "PD14"),
+	PINCTRL_PIN(63, "PD15"),
+	PINCTRL_PIN(64, "PE0"),
+	PINCTRL_PIN(65, "PE1"),
+	PINCTRL_PIN(66, "PE2"),
+	PINCTRL_PIN(67, "PE3"),
+	PINCTRL_PIN(68, "PE4"),
+	PINCTRL_PIN(69, "PE5"),
+	PINCTRL_PIN(70, "PE6"),
+	PINCTRL_PIN(71, "PE7"),
+	PINCTRL_PIN(72, "PE8"),
+	PINCTRL_PIN(73, "PE9"),
+	PINCTRL_PIN(74, "PE10"),
+	PINCTRL_PIN(75, "PE11"),
+	PINCTRL_PIN(76, "PE12"),
+	PINCTRL_PIN(77, "PE13"),
+	PINCTRL_PIN(78, "PE14"),
+	PINCTRL_PIN(79, "PE15"),
+	PINCTRL_PIN(80, "PF0"),
+	PINCTRL_PIN(81, "PF1"),
+	PINCTRL_PIN(82, "PF2"),
+	PINCTRL_PIN(83, "PF3"),
+	PINCTRL_PIN(84, "PF4"),
+	PINCTRL_PIN(85, "PF5"),
+	PINCTRL_PIN(86, "PF6"),
+	PINCTRL_PIN(87, "PF7"),
+	PINCTRL_PIN(88, "PF8"),
+	PINCTRL_PIN(89, "PF9"),
+	PINCTRL_PIN(90, "PF10"),
+	PINCTRL_PIN(91, "PF11"),
+	PINCTRL_PIN(92, "PF12"),
+	PINCTRL_PIN(93, "PF13"),
+	PINCTRL_PIN(94, "PF14"),
+	PINCTRL_PIN(95, "PF15"),
+	PINCTRL_PIN(96, "PG0"),
+	PINCTRL_PIN(97, "PG1"),
+	PINCTRL_PIN(98, "PG2"),
+	PINCTRL_PIN(99, "PG3"),
+	PINCTRL_PIN(100, "PG4"),
+	PINCTRL_PIN(101, "PG5"),
+	PINCTRL_PIN(102, "PG6"),
+	PINCTRL_PIN(103, "PG7"),
+	PINCTRL_PIN(104, "PG8"),
+	PINCTRL_PIN(105, "PG9"),
+	PINCTRL_PIN(106, "PG10"),
+	PINCTRL_PIN(107, "PG11"),
+	PINCTRL_PIN(108, "PG12"),
+	PINCTRL_PIN(109, "PG13"),
+	PINCTRL_PIN(110, "PG14"),
+	PINCTRL_PIN(111, "PG15"),
+	PINCTRL_PIN(112, "PH0"),
+	PINCTRL_PIN(113, "PH1"),
+	PINCTRL_PIN(114, "PH2"),
+	PINCTRL_PIN(115, "PH3"),
+	PINCTRL_PIN(116, "PH4"),
+	PINCTRL_PIN(117, "PH5"),
+	PINCTRL_PIN(118, "PH6"),
+	PINCTRL_PIN(119, "PH7"),
+	PINCTRL_PIN(120, "PH8"),
+	PINCTRL_PIN(121, "PH9"),
+	PINCTRL_PIN(122, "PH10"),
+	PINCTRL_PIN(123, "PH11"),
+	PINCTRL_PIN(124, "PH12"),
+	PINCTRL_PIN(125, "PH13"),
+	PINCTRL_PIN(128, "PI0"),
+	PINCTRL_PIN(129, "PI1"),
+	PINCTRL_PIN(130, "PI2"),
+	PINCTRL_PIN(131, "PI3"),
+	PINCTRL_PIN(132, "PI4"),
+	PINCTRL_PIN(133, "PI5"),
+	PINCTRL_PIN(134, "PI6"),
+	PINCTRL_PIN(135, "PI7"),
+	PINCTRL_PIN(136, "PI8"),
+	PINCTRL_PIN(137, "PI9"),
+	PINCTRL_PIN(138, "PI10"),
+	PINCTRL_PIN(139, "PI11"),
+	PINCTRL_PIN(140, "PI12"),
+	PINCTRL_PIN(141, "PI13"),
+	PINCTRL_PIN(142, "PI14"),
+	PINCTRL_PIN(143, "PI15"),
+	PINCTRL_PIN(144, "PJ0"),
+	PINCTRL_PIN(145, "PJ1"),
+	PINCTRL_PIN(146, "PJ2"),
+	PINCTRL_PIN(147, "PJ3"),
+	PINCTRL_PIN(148, "PJ4"),
+	PINCTRL_PIN(149, "PJ5"),
+	PINCTRL_PIN(150, "PJ6"),
+	PINCTRL_PIN(151, "PJ7"),
+	PINCTRL_PIN(152, "PJ8"),
+	PINCTRL_PIN(153, "PJ9"),
+	PINCTRL_PIN(154, "PJ10"),
+	PINCTRL_PIN(155, "PJ11"),
+	PINCTRL_PIN(156, "PJ12"),
+	PINCTRL_PIN(157, "PJ13"),
+};
+
+static const unsigned uart0_pins[] = {
+	GPIO_PE7, GPIO_PE8,
+};
+
+static const unsigned uart1_pins[] = {
+	GPIO_PH0, GPIO_PH1,
+};
+
+static const unsigned uart1_ctsrts_pins[] = {
+	GPIO_PE9, GPIO_PE10,
+};
+
+static const unsigned uart2_pins[] = {
+	GPIO_PB4, GPIO_PB5,
+};
+
+static const unsigned uart3_pins[] = {
+	GPIO_PB6, GPIO_PB7,
+};
+
+static const unsigned uart3_ctsrts_pins[] = {
+	GPIO_PB2, GPIO_PB3,
+};
+
+static const unsigned rsi0_pins[] = {
+	GPIO_PC8, GPIO_PC9, GPIO_PC10, GPIO_PC11, GPIO_PC12, GPIO_PC13,
+};
+
+static const unsigned spi0_pins[] = {
+	GPIO_PE0, GPIO_PE1, GPIO_PE2,
+};
+
+static const unsigned spi1_pins[] = {
+	GPIO_PG8, GPIO_PG9, GPIO_PG10,
+};
+
+static const unsigned twi0_pins[] = {
+	GPIO_PE14, GPIO_PE15,
+};
+
+static const unsigned twi1_pins[] = {
+	GPIO_PB0, GPIO_PB1,
+};
+
+static const unsigned rotary_pins[] = {
+	GPIO_PH4, GPIO_PH3, GPIO_PH5,
+};
+
+static const unsigned can0_pins[] = {
+	GPIO_PG13, GPIO_PG12,
+};
+
+static const unsigned can1_pins[] = {
+	GPIO_PG14, GPIO_PG15,
+};
+
+static const unsigned smc0_pins[] = {
+	GPIO_PH8, GPIO_PH9, GPIO_PH10, GPIO_PH11, GPIO_PH12, GPIO_PH13,
+	GPIO_PI0, GPIO_PI1, GPIO_PI2, GPIO_PI3, GPIO_PI4, GPIO_PI5, GPIO_PI6,
+	GPIO_PI7, GPIO_PI8, GPIO_PI9, GPIO_PI10, GPIO_PI11,
+	GPIO_PI12, GPIO_PI13, GPIO_PI14, GPIO_PI15,
+};
+
+static const unsigned sport0_pins[] = {
+	GPIO_PC0, GPIO_PC2, GPIO_PC3, GPIO_PC4, GPIO_PC6, GPIO_PC7,
+};
+
+static const unsigned sport1_pins[] = {
+	GPIO_PD0, GPIO_PD2, GPIO_PD3, GPIO_PD4, GPIO_PD6, GPIO_PD7,
+};
+
+static const unsigned sport2_pins[] = {
+	GPIO_PA0, GPIO_PA2, GPIO_PA3, GPIO_PA4, GPIO_PA6, GPIO_PA7,
+};
+
+static const unsigned sport3_pins[] = {
+	GPIO_PA8, GPIO_PA10, GPIO_PA11, GPIO_PA12, GPIO_PA14, GPIO_PA15,
+};
+
+static const unsigned ppi0_8b_pins[] = {
+	GPIO_PF0, GPIO_PF1, GPIO_PF2, GPIO_PF3, GPIO_PF4, GPIO_PF5, GPIO_PF6,
+	GPIO_PF7, GPIO_PF13, GPIO_PG0, GPIO_PG1, GPIO_PG2,
+};
+
+static const unsigned ppi0_16b_pins[] = {
+	GPIO_PF0, GPIO_PF1, GPIO_PF2, GPIO_PF3, GPIO_PF4, GPIO_PF5, GPIO_PF6,
+	GPIO_PF7, GPIO_PF9, GPIO_PF10, GPIO_PF11, GPIO_PF12,
+	GPIO_PF13, GPIO_PF14, GPIO_PF15,
+	GPIO_PG0, GPIO_PG1, GPIO_PG2,
+};
+
+static const unsigned ppi0_24b_pins[] = {
+	GPIO_PF0, GPIO_PF1, GPIO_PF2, GPIO_PF3, GPIO_PF4, GPIO_PF5, GPIO_PF6,
+	GPIO_PF7, GPIO_PF8, GPIO_PF9, GPIO_PF10, GPIO_PF11, GPIO_PF12,
+	GPIO_PF13, GPIO_PF14, GPIO_PF15, GPIO_PD0, GPIO_PD1, GPIO_PD2,
+	GPIO_PD3, GPIO_PD4, GPIO_PD5, GPIO_PG3, GPIO_PG4,
+	GPIO_PG0, GPIO_PG1, GPIO_PG2,
+};
+
+static const unsigned ppi1_8b_pins[] = {
+	GPIO_PD0, GPIO_PD1, GPIO_PD2, GPIO_PD3, GPIO_PD4, GPIO_PD5, GPIO_PD6,
+	GPIO_PD7, GPIO_PE11, GPIO_PE12, GPIO_PE13,
+};
+
+static const unsigned ppi1_16b_pins[] = {
+	GPIO_PD0, GPIO_PD1, GPIO_PD2, GPIO_PD3, GPIO_PD4, GPIO_PD5, GPIO_PD6,
+	GPIO_PD7, GPIO_PD8, GPIO_PD9, GPIO_PD10, GPIO_PD11, GPIO_PD12,
+	GPIO_PD13, GPIO_PD14, GPIO_PD15,
+	GPIO_PE11, GPIO_PE12, GPIO_PE13,
+};
+
+static const unsigned ppi2_8b_pins[] = {
+	GPIO_PD8, GPIO_PD9, GPIO_PD10, GPIO_PD11, GPIO_PD12,
+	GPIO_PD13, GPIO_PD14, GPIO_PD15,
+	GPIO_PA7, GPIO_PB0, GPIO_PB1, GPIO_PB2, GPIO_PB3,
+};
+
+static const unsigned atapi_pins[] = {
+	GPIO_PH2, GPIO_PJ3, GPIO_PJ4, GPIO_PJ5, GPIO_PJ6,
+	GPIO_PJ7, GPIO_PJ8, GPIO_PJ9, GPIO_PJ10,
+};
+
+static const unsigned atapi_alter_pins[] = {
+	GPIO_PF0, GPIO_PF1, GPIO_PF2, GPIO_PF3, GPIO_PF4, GPIO_PF5, GPIO_PF6,
+	GPIO_PF7, GPIO_PF8, GPIO_PF9, GPIO_PF10, GPIO_PF11, GPIO_PF12,
+	GPIO_PF13, GPIO_PF14, GPIO_PF15, GPIO_PG2, GPIO_PG3, GPIO_PG4,
+};
+
+static const unsigned nfc0_pins[] = {
+	GPIO_PJ1, GPIO_PJ2,
+};
+
+static const unsigned keys_4x4_pins[] = {
+	GPIO_PD8, GPIO_PD9, GPIO_PD10, GPIO_PD11,
+	GPIO_PD12, GPIO_PD13, GPIO_PD14, GPIO_PD15,
+};
+
+static const unsigned keys_8x8_pins[] = {
+	GPIO_PD8, GPIO_PD9, GPIO_PD10, GPIO_PD11,
+	GPIO_PD12, GPIO_PD13, GPIO_PD14, GPIO_PD15,
+	GPIO_PE0, GPIO_PE1, GPIO_PE2, GPIO_PE3,
+	GPIO_PE4, GPIO_PE5, GPIO_PE6, GPIO_PE7,
+};
+
+static const struct adi_pin_group adi_pin_groups[] = {
+	ADI_PIN_GROUP("uart0grp", uart0_pins),
+	ADI_PIN_GROUP("uart1grp", uart1_pins),
+	ADI_PIN_GROUP("uart1ctsrtsgrp", uart1_ctsrts_pins),
+	ADI_PIN_GROUP("uart2grp", uart2_pins),
+	ADI_PIN_GROUP("uart3grp", uart3_pins),
+	ADI_PIN_GROUP("uart3ctsrtsgrp", uart3_ctsrts_pins),
+	ADI_PIN_GROUP("rsi0grp", rsi0_pins),
+	ADI_PIN_GROUP("spi0grp", spi0_pins),
+	ADI_PIN_GROUP("spi1grp", spi1_pins),
+	ADI_PIN_GROUP("twi0grp", twi0_pins),
+	ADI_PIN_GROUP("twi1grp", twi1_pins),
+	ADI_PIN_GROUP("rotarygrp", rotary_pins),
+	ADI_PIN_GROUP("can0grp", can0_pins),
+	ADI_PIN_GROUP("can1grp", can1_pins),
+	ADI_PIN_GROUP("smc0grp", smc0_pins),
+	ADI_PIN_GROUP("sport0grp", sport0_pins),
+	ADI_PIN_GROUP("sport1grp", sport1_pins),
+	ADI_PIN_GROUP("sport2grp", sport2_pins),
+	ADI_PIN_GROUP("sport3grp", sport3_pins),
+	ADI_PIN_GROUP("ppi0_8bgrp", ppi0_8b_pins),
+	ADI_PIN_GROUP("ppi0_16bgrp", ppi0_16b_pins),
+	ADI_PIN_GROUP("ppi0_24bgrp", ppi0_24b_pins),
+	ADI_PIN_GROUP("ppi1_8bgrp", ppi1_8b_pins),
+	ADI_PIN_GROUP("ppi1_16bgrp", ppi1_16b_pins),
+	ADI_PIN_GROUP("ppi2_8bgrp", ppi2_8b_pins),
+	ADI_PIN_GROUP("atapigrp", atapi_pins),
+	ADI_PIN_GROUP("atapialtergrp", atapi_alter_pins),
+	ADI_PIN_GROUP("nfc0grp", nfc0_pins),
+	ADI_PIN_GROUP("keys_4x4grp", keys_4x4_pins),
+	ADI_PIN_GROUP("keys_8x8grp", keys_8x8_pins),
+};
+
+static const unsigned short uart0_mux[] = {
+	P_UART0_TX, P_UART0_RX,
+	0
+};
+
+static const unsigned short uart1_mux[] = {
+	P_UART1_TX, P_UART1_RX,
+	0
+};
+
+static const unsigned short uart1_ctsrts_mux[] = {
+	P_UART1_RTS, P_UART1_CTS,
+	0
+};
+
+static const unsigned short uart2_mux[] = {
+	P_UART2_TX, P_UART2_RX,
+	0
+};
+
+static const unsigned short uart3_mux[] = {
+	P_UART3_TX, P_UART3_RX,
+	0
+};
+
+static const unsigned short uart3_ctsrts_mux[] = {
+	P_UART3_RTS, P_UART3_CTS,
+	0
+};
+
+static const unsigned short rsi0_mux[] = {
+	P_SD_D0, P_SD_D1, P_SD_D2, P_SD_D3, P_SD_CLK, P_SD_CMD,
+	0
+};
+
+static const unsigned short spi0_mux[] = {
+	P_SPI0_SCK, P_SPI0_MISO, P_SPI0_MOSI, 0
+};
+
+static const unsigned short spi1_mux[] = {
+	P_SPI1_SCK, P_SPI1_MISO, P_SPI1_MOSI, 0
+};
+
+static const unsigned short twi0_mux[] = {
+	P_TWI0_SCL, P_TWI0_SDA, 0
+};
+
+static const unsigned short twi1_mux[] = {
+	P_TWI1_SCL, P_TWI1_SDA, 0
+};
+
+static const unsigned short rotary_mux[] = {
+	P_CNT_CUD, P_CNT_CDG, P_CNT_CZM, 0
+};
+
+static const unsigned short sport0_mux[] = {
+	P_SPORT0_TFS, P_SPORT0_DTPRI, P_SPORT0_TSCLK, P_SPORT0_RFS,
+	P_SPORT0_DRPRI, P_SPORT0_RSCLK, 0
+};
+
+static const unsigned short sport1_mux[] = {
+	P_SPORT1_TFS, P_SPORT1_DTPRI, P_SPORT1_TSCLK, P_SPORT1_RFS,
+	P_SPORT1_DRPRI, P_SPORT1_RSCLK, 0
+};
+
+static const unsigned short sport2_mux[] = {
+	P_SPORT2_TFS, P_SPORT2_DTPRI, P_SPORT2_TSCLK, P_SPORT2_RFS,
+	P_SPORT2_DRPRI, P_SPORT2_RSCLK, 0
+};
+
+static const unsigned short sport3_mux[] = {
+	P_SPORT3_TFS, P_SPORT3_DTPRI, P_SPORT3_TSCLK, P_SPORT3_RFS,
+	P_SPORT3_DRPRI, P_SPORT3_RSCLK, 0
+};
+
+static const unsigned short can0_mux[] = {
+	P_CAN0_RX, P_CAN0_TX, 0
+};
+
+static const unsigned short can1_mux[] = {
+	P_CAN1_RX, P_CAN1_TX, 0
+};
+
+static const unsigned short smc0_mux[] = {
+	P_A4, P_A5, P_A6, P_A7, P_A8, P_A9, P_A10, P_A11, P_A12,
+	P_A13, P_A14, P_A15, P_A16, P_A17, P_A18, P_A19, P_A20, P_A21,
+	P_A22, P_A23, P_A24, P_A25, P_NOR_CLK, 0,
+};
+
+static const unsigned short ppi0_8b_mux[] = {
+	P_PPI0_D0, P_PPI0_D1, P_PPI0_D2, P_PPI0_D3,
+	P_PPI0_D4, P_PPI0_D5, P_PPI0_D6, P_PPI0_D7,
+	P_PPI0_CLK, P_PPI0_FS1, P_PPI0_FS2,
+	0,
+};
+
+static const unsigned short ppi0_16b_mux[] = {
+	P_PPI0_D0, P_PPI0_D1, P_PPI0_D2, P_PPI0_D3,
+	P_PPI0_D4, P_PPI0_D5, P_PPI0_D6, P_PPI0_D7,
+	P_PPI0_D8, P_PPI0_D9, P_PPI0_D10, P_PPI0_D11,
+	P_PPI0_D12, P_PPI0_D13, P_PPI0_D14, P_PPI0_D15,
+	P_PPI0_CLK, P_PPI0_FS1, P_PPI0_FS2,
+	0,
+};
+
+static const unsigned short ppi0_24b_mux[] = {
+	P_PPI0_D0, P_PPI0_D1, P_PPI0_D2, P_PPI0_D3,
+	P_PPI0_D4, P_PPI0_D5, P_PPI0_D6, P_PPI0_D7,
+	P_PPI0_D8, P_PPI0_D9, P_PPI0_D10, P_PPI0_D11,
+	P_PPI0_D12, P_PPI0_D13, P_PPI0_D14, P_PPI0_D15,
+	P_PPI0_D16, P_PPI0_D17, P_PPI0_D18, P_PPI0_D19,
+	P_PPI0_D20, P_PPI0_D21, P_PPI0_D22, P_PPI0_D23,
+	P_PPI0_CLK, P_PPI0_FS1, P_PPI0_FS2,
+	0,
+};
+
+static const unsigned short ppi1_8b_mux[] = {
+	P_PPI1_D0, P_PPI1_D1, P_PPI1_D2, P_PPI1_D3,
+	P_PPI1_D4, P_PPI1_D5, P_PPI1_D6, P_PPI1_D7,
+	P_PPI1_CLK, P_PPI1_FS1, P_PPI1_FS2,
+	0,
+};
+
+static const unsigned short ppi1_16b_mux[] = {
+	P_PPI1_D0, P_PPI1_D1, P_PPI1_D2, P_PPI1_D3,
+	P_PPI1_D4, P_PPI1_D5, P_PPI1_D6, P_PPI1_D7,
+	P_PPI1_D8, P_PPI1_D9, P_PPI1_D10, P_PPI1_D11,
+	P_PPI1_D12, P_PPI1_D13, P_PPI1_D14, P_PPI1_D15,
+	P_PPI1_CLK, P_PPI1_FS1, P_PPI1_FS2,
+	0,
+};
+
+static const unsigned short ppi2_8b_mux[] = {
+	P_PPI2_D0, P_PPI2_D1, P_PPI2_D2, P_PPI2_D3,
+	P_PPI2_D4, P_PPI2_D5, P_PPI2_D6, P_PPI2_D7,
+	P_PPI2_CLK, P_PPI2_FS1, P_PPI2_FS2,
+	0,
+};
+
+static const unsigned short atapi_mux[] = {
+	P_ATAPI_RESET, P_ATAPI_DIOR, P_ATAPI_DIOW, P_ATAPI_CS0, P_ATAPI_CS1,
+	P_ATAPI_DMACK, P_ATAPI_DMARQ, P_ATAPI_INTRQ, P_ATAPI_IORDY,
+};
+
+static const unsigned short atapi_alter_mux[] = {
+	P_ATAPI_D0A, P_ATAPI_D1A, P_ATAPI_D2A, P_ATAPI_D3A, P_ATAPI_D4A,
+	P_ATAPI_D5A, P_ATAPI_D6A, P_ATAPI_D7A, P_ATAPI_D8A, P_ATAPI_D9A,
+	P_ATAPI_D10A, P_ATAPI_D11A, P_ATAPI_D12A, P_ATAPI_D13A, P_ATAPI_D14A,
+	P_ATAPI_D15A, P_ATAPI_A0A, P_ATAPI_A1A, P_ATAPI_A2A,
+	0
+};
+
+static const unsigned short nfc0_mux[] = {
+	P_NAND_CE, P_NAND_RB,
+	0
+};
+
+static const unsigned short keys_4x4_mux[] = {
+	P_KEY_ROW3, P_KEY_ROW2, P_KEY_ROW1, P_KEY_ROW0,
+	P_KEY_COL3, P_KEY_COL2, P_KEY_COL1, P_KEY_COL0,
+	0
+};
+
+static const unsigned short keys_8x8_mux[] = {
+	P_KEY_ROW7, P_KEY_ROW6, P_KEY_ROW5, P_KEY_ROW4,
+	P_KEY_ROW3, P_KEY_ROW2, P_KEY_ROW1, P_KEY_ROW0,
+	P_KEY_COL7, P_KEY_COL6, P_KEY_COL5, P_KEY_COL4,
+	P_KEY_COL3, P_KEY_COL2, P_KEY_COL1, P_KEY_COL0,
+	0
+};
+
+static const char * const uart0grp[] = { "uart0grp" };
+static const char * const uart1grp[] = { "uart1grp" };
+static const char * const uart1ctsrtsgrp[] = { "uart1ctsrtsgrp" };
+static const char * const uart2grp[] = { "uart2grp" };
+static const char * const uart3grp[] = { "uart3grp" };
+static const char * const uart3ctsrtsgrp[] = { "uart3ctsrtsgrp" };
+static const char * const rsi0grp[] = { "rsi0grp" };
+static const char * const spi0grp[] = { "spi0grp" };
+static const char * const spi1grp[] = { "spi1grp" };
+static const char * const twi0grp[] = { "twi0grp" };
+static const char * const twi1grp[] = { "twi1grp" };
+static const char * const rotarygrp[] = { "rotarygrp" };
+static const char * const can0grp[] = { "can0grp" };
+static const char * const can1grp[] = { "can1grp" };
+static const char * const smc0grp[] = { "smc0grp" };
+static const char * const sport0grp[] = { "sport0grp" };
+static const char * const sport1grp[] = { "sport1grp" };
+static const char * const sport2grp[] = { "sport2grp" };
+static const char * const sport3grp[] = { "sport3grp" };
+static const char * const ppi0_8bgrp[] = { "ppi0_8bgrp" };
+static const char * const ppi0_16bgrp[] = { "ppi0_16bgrp" };
+static const char * const ppi0_24bgrp[] = { "ppi0_24bgrp" };
+static const char * const ppi1_8bgrp[] = { "ppi1_8bgrp" };
+static const char * const ppi1_16bgrp[] = { "ppi1_16bgrp" };
+static const char * const ppi2_8bgrp[] = { "ppi2_8bgrp" };
+static const char * const atapigrp[] = { "atapigrp" };
+static const char * const atapialtergrp[] = { "atapialtergrp" };
+static const char * const nfc0grp[] = { "nfc0grp" };
+static const char * const keys_4x4grp[] = { "keys_4x4grp" };
+static const char * const keys_8x8grp[] = { "keys_8x8grp" };
+
+static const struct adi_pmx_func adi_pmx_functions[] = {
+	ADI_PMX_FUNCTION("uart0", uart0grp, uart0_mux),
+	ADI_PMX_FUNCTION("uart1", uart1grp, uart1_mux),
+	ADI_PMX_FUNCTION("uart1_ctsrts", uart1ctsrtsgrp, uart1_ctsrts_mux),
+	ADI_PMX_FUNCTION("uart2", uart2grp, uart2_mux),
+	ADI_PMX_FUNCTION("uart3", uart3grp, uart3_mux),
+	ADI_PMX_FUNCTION("uart3_ctsrts", uart3ctsrtsgrp, uart3_ctsrts_mux),
+	ADI_PMX_FUNCTION("rsi0", rsi0grp, rsi0_mux),
+	ADI_PMX_FUNCTION("spi0", spi0grp, spi0_mux),
+	ADI_PMX_FUNCTION("spi1", spi1grp, spi1_mux),
+	ADI_PMX_FUNCTION("twi0", twi0grp, twi0_mux),
+	ADI_PMX_FUNCTION("twi1", twi1grp, twi1_mux),
+	ADI_PMX_FUNCTION("rotary", rotarygrp, rotary_mux),
+	ADI_PMX_FUNCTION("can0", can0grp, can0_mux),
+	ADI_PMX_FUNCTION("can1", can1grp, can1_mux),
+	ADI_PMX_FUNCTION("smc0", smc0grp, smc0_mux),
+	ADI_PMX_FUNCTION("sport0", sport0grp, sport0_mux),
+	ADI_PMX_FUNCTION("sport1", sport1grp, sport1_mux),
+	ADI_PMX_FUNCTION("sport2", sport2grp, sport2_mux),
+	ADI_PMX_FUNCTION("sport3", sport3grp, sport3_mux),
+	ADI_PMX_FUNCTION("ppi0_8b", ppi0_8bgrp, ppi0_8b_mux),
+	ADI_PMX_FUNCTION("ppi0_16b", ppi0_16bgrp, ppi0_16b_mux),
+	ADI_PMX_FUNCTION("ppi0_24b", ppi0_24bgrp, ppi0_24b_mux),
+	ADI_PMX_FUNCTION("ppi1_8b", ppi1_8bgrp, ppi1_8b_mux),
+	ADI_PMX_FUNCTION("ppi1_16b", ppi1_16bgrp, ppi1_16b_mux),
+	ADI_PMX_FUNCTION("ppi2_8b", ppi2_8bgrp, ppi2_8b_mux),
+	ADI_PMX_FUNCTION("atapi", atapigrp, atapi_mux),
+	ADI_PMX_FUNCTION("atapi_alter", atapialtergrp, atapi_alter_mux),
+	ADI_PMX_FUNCTION("nfc0", nfc0grp, nfc0_mux),
+	ADI_PMX_FUNCTION("keys_4x4", keys_4x4grp, keys_4x4_mux),
+	ADI_PMX_FUNCTION("keys_8x8", keys_8x8grp, keys_8x8_mux),
+};
+
+static const struct adi_pinctrl_soc_data adi_bf54x_soc = {
+	.functions = adi_pmx_functions,
+	.nfunctions = ARRAY_SIZE(adi_pmx_functions),
+	.groups = adi_pin_groups,
+	.ngroups = ARRAY_SIZE(adi_pin_groups),
+	.pins = adi_pads,
+	.npins = ARRAY_SIZE(adi_pads),
+};
+
+void adi_pinctrl_soc_init(const struct adi_pinctrl_soc_data **soc)
+{
+	*soc = &adi_bf54x_soc;
+}
diff --git a/drivers/pinctrl/pinctrl-adi2-bf60x.c b/drivers/pinctrl/pinctrl-adi2-bf60x.c
new file mode 100644
index 000000000000..e90cb80c8d0b
--- /dev/null
+++ b/drivers/pinctrl/pinctrl-adi2-bf60x.c
@@ -0,0 +1,522 @@
+/*
+ * Pinctrl Driver for ADI GPIO2 controller
+ *
+ * Copyright 2007-2013 Analog Devices Inc.
+ *
+ * Licensed under the GPLv2 or later
+ */
+
+#include <asm/portmux.h>
+#include "pinctrl-adi2.h"
+
+static const struct pinctrl_pin_desc adi_pads[] = {
+	PINCTRL_PIN(0, "PA0"),
+	PINCTRL_PIN(1, "PA1"),
+	PINCTRL_PIN(2, "PA2"),
+	PINCTRL_PIN(3, "PG3"),
+	PINCTRL_PIN(4, "PA4"),
+	PINCTRL_PIN(5, "PA5"),
+	PINCTRL_PIN(6, "PA6"),
+	PINCTRL_PIN(7, "PA7"),
+	PINCTRL_PIN(8, "PA8"),
+	PINCTRL_PIN(9, "PA9"),
+	PINCTRL_PIN(10, "PA10"),
+	PINCTRL_PIN(11, "PA11"),
+	PINCTRL_PIN(12, "PA12"),
+	PINCTRL_PIN(13, "PA13"),
+	PINCTRL_PIN(14, "PA14"),
+	PINCTRL_PIN(15, "PA15"),
+	PINCTRL_PIN(16, "PB0"),
+	PINCTRL_PIN(17, "PB1"),
+	PINCTRL_PIN(18, "PB2"),
+	PINCTRL_PIN(19, "PB3"),
+	PINCTRL_PIN(20, "PB4"),
+	PINCTRL_PIN(21, "PB5"),
+	PINCTRL_PIN(22, "PB6"),
+	PINCTRL_PIN(23, "PB7"),
+	PINCTRL_PIN(24, "PB8"),
+	PINCTRL_PIN(25, "PB9"),
+	PINCTRL_PIN(26, "PB10"),
+	PINCTRL_PIN(27, "PB11"),
+	PINCTRL_PIN(28, "PB12"),
+	PINCTRL_PIN(29, "PB13"),
+	PINCTRL_PIN(30, "PB14"),
+	PINCTRL_PIN(31, "PB15"),
+	PINCTRL_PIN(32, "PC0"),
+	PINCTRL_PIN(33, "PC1"),
+	PINCTRL_PIN(34, "PC2"),
+	PINCTRL_PIN(35, "PC3"),
+	PINCTRL_PIN(36, "PC4"),
+	PINCTRL_PIN(37, "PC5"),
+	PINCTRL_PIN(38, "PC6"),
+	PINCTRL_PIN(39, "PC7"),
+	PINCTRL_PIN(40, "PC8"),
+	PINCTRL_PIN(41, "PC9"),
+	PINCTRL_PIN(42, "PC10"),
+	PINCTRL_PIN(43, "PC11"),
+	PINCTRL_PIN(44, "PC12"),
+	PINCTRL_PIN(45, "PC13"),
+	PINCTRL_PIN(46, "PC14"),
+	PINCTRL_PIN(47, "PC15"),
+	PINCTRL_PIN(48, "PD0"),
+	PINCTRL_PIN(49, "PD1"),
+	PINCTRL_PIN(50, "PD2"),
+	PINCTRL_PIN(51, "PD3"),
+	PINCTRL_PIN(52, "PD4"),
+	PINCTRL_PIN(53, "PD5"),
+	PINCTRL_PIN(54, "PD6"),
+	PINCTRL_PIN(55, "PD7"),
+	PINCTRL_PIN(56, "PD8"),
+	PINCTRL_PIN(57, "PD9"),
+	PINCTRL_PIN(58, "PD10"),
+	PINCTRL_PIN(59, "PD11"),
+	PINCTRL_PIN(60, "PD12"),
+	PINCTRL_PIN(61, "PD13"),
+	PINCTRL_PIN(62, "PD14"),
+	PINCTRL_PIN(63, "PD15"),
+	PINCTRL_PIN(64, "PE0"),
+	PINCTRL_PIN(65, "PE1"),
+	PINCTRL_PIN(66, "PE2"),
+	PINCTRL_PIN(67, "PE3"),
+	PINCTRL_PIN(68, "PE4"),
+	PINCTRL_PIN(69, "PE5"),
+	PINCTRL_PIN(70, "PE6"),
+	PINCTRL_PIN(71, "PE7"),
+	PINCTRL_PIN(72, "PE8"),
+	PINCTRL_PIN(73, "PE9"),
+	PINCTRL_PIN(74, "PE10"),
+	PINCTRL_PIN(75, "PE11"),
+	PINCTRL_PIN(76, "PE12"),
+	PINCTRL_PIN(77, "PE13"),
+	PINCTRL_PIN(78, "PE14"),
+	PINCTRL_PIN(79, "PE15"),
+	PINCTRL_PIN(80, "PF0"),
+	PINCTRL_PIN(81, "PF1"),
+	PINCTRL_PIN(82, "PF2"),
+	PINCTRL_PIN(83, "PF3"),
+	PINCTRL_PIN(84, "PF4"),
+	PINCTRL_PIN(85, "PF5"),
+	PINCTRL_PIN(86, "PF6"),
+	PINCTRL_PIN(87, "PF7"),
+	PINCTRL_PIN(88, "PF8"),
+	PINCTRL_PIN(89, "PF9"),
+	PINCTRL_PIN(90, "PF10"),
+	PINCTRL_PIN(91, "PF11"),
+	PINCTRL_PIN(92, "PF12"),
+	PINCTRL_PIN(93, "PF13"),
+	PINCTRL_PIN(94, "PF14"),
+	PINCTRL_PIN(95, "PF15"),
+	PINCTRL_PIN(96, "PG0"),
+	PINCTRL_PIN(97, "PG1"),
+	PINCTRL_PIN(98, "PG2"),
+	PINCTRL_PIN(99, "PG3"),
+	PINCTRL_PIN(100, "PG4"),
+	PINCTRL_PIN(101, "PG5"),
+	PINCTRL_PIN(102, "PG6"),
+	PINCTRL_PIN(103, "PG7"),
+	PINCTRL_PIN(104, "PG8"),
+	PINCTRL_PIN(105, "PG9"),
+	PINCTRL_PIN(106, "PG10"),
+	PINCTRL_PIN(107, "PG11"),
+	PINCTRL_PIN(108, "PG12"),
+	PINCTRL_PIN(109, "PG13"),
+	PINCTRL_PIN(110, "PG14"),
+	PINCTRL_PIN(111, "PG15"),
+};
+
+static const unsigned uart0_pins[] = {
+	GPIO_PD7, GPIO_PD8,
+};
+
+static const unsigned uart0_ctsrts_pins[] = {
+	GPIO_PD9, GPIO_PD10,
+};
+
+static const unsigned uart1_pins[] = {
+	GPIO_PG15, GPIO_PG14,
+};
+
+static const unsigned uart1_ctsrts_pins[] = {
+	GPIO_PG10, GPIO_PG13,
+};
+
+static const unsigned rsi0_pins[] = {
+	GPIO_PG3, GPIO_PG2, GPIO_PG0, GPIO_PE15, GPIO_PG5, GPIO_PG6,
+};
+
+static const unsigned eth0_pins[] = {
+	GPIO_PC6, GPIO_PC7, GPIO_PC2, GPIO_PC0, GPIO_PC3, GPIO_PC1,
+	GPIO_PB13, GPIO_PD6, GPIO_PC5, GPIO_PC4, GPIO_PB14, GPIO_PB15,
+};
+
+static const unsigned eth1_pins[] = {
+	GPIO_PE10, GPIO_PE11, GPIO_PG3, GPIO_PG0, GPIO_PG2, GPIO_PE15,
+	GPIO_PG5, GPIO_PE12, GPIO_PE13, GPIO_PE14, GPIO_PG6, GPIO_PC9,
+};
+
+static const unsigned spi0_pins[] = {
+	GPIO_PD4, GPIO_PD2, GPIO_PD3,
+};
+
+static const unsigned spi1_pins[] = {
+	GPIO_PD5, GPIO_PD14, GPIO_PD13,
+};
+
+static const unsigned twi0_pins[] = {
+};
+
+static const unsigned twi1_pins[] = {
+};
+
+static const unsigned rotary_pins[] = {
+	GPIO_PG7, GPIO_PG11, GPIO_PG12,
+};
+
+static const unsigned can0_pins[] = {
+	GPIO_PG1, GPIO_PG4,
+};
+
+static const unsigned smc0_pins[] = {
+	GPIO_PA0, GPIO_PA1, GPIO_PA2, GPIO_PA3, GPIO_PA4, GPIO_PA5, GPIO_PA6,
+	GPIO_PA7, GPIO_PA8, GPIO_PA9, GPIO_PB2, GPIO_PA10, GPIO_PA11,
+	GPIO_PB3, GPIO_PA12, GPIO_PA13, GPIO_PA14, GPIO_PA15, GPIO_PB6,
+	GPIO_PB7, GPIO_PB8, GPIO_PB10, GPIO_PB11, GPIO_PB0,
+};
+
+static const unsigned sport0_pins[] = {
+	GPIO_PB5, GPIO_PB4, GPIO_PB9, GPIO_PB8, GPIO_PB7, GPIO_PB11,
+};
+
+static const unsigned sport1_pins[] = {
+	GPIO_PE2, GPIO_PE5, GPIO_PD15, GPIO_PE4, GPIO_PE3, GPIO_PE1,
+};
+
+static const unsigned sport2_pins[] = {
+	GPIO_PG4, GPIO_PG1, GPIO_PG9, GPIO_PG10, GPIO_PG7, GPIO_PB12,
+};
+
+static const unsigned ppi0_8b_pins[] = {
+	GPIO_PF0, GPIO_PF1, GPIO_PF2, GPIO_PF3, GPIO_PF4, GPIO_PF5, GPIO_PF6,
+	GPIO_PF7, GPIO_PF13, GPIO_PF14, GPIO_PF15,
+	GPIO_PE6, GPIO_PE7, GPIO_PE8, GPIO_PE9,
+};
+
+static const unsigned ppi0_16b_pins[] = {
+	GPIO_PF0, GPIO_PF1, GPIO_PF2, GPIO_PF3, GPIO_PF4, GPIO_PF5, GPIO_PF6,
+	GPIO_PF7, GPIO_PF9, GPIO_PF10, GPIO_PF11, GPIO_PF12,
+	GPIO_PF13, GPIO_PF14, GPIO_PF15,
+	GPIO_PE6, GPIO_PE7, GPIO_PE8, GPIO_PE9,
+};
+
+static const unsigned ppi0_24b_pins[] = {
+	GPIO_PF0, GPIO_PF1, GPIO_PF2, GPIO_PF3, GPIO_PF4, GPIO_PF5, GPIO_PF6,
+	GPIO_PF7, GPIO_PF8, GPIO_PF9, GPIO_PF10, GPIO_PF11, GPIO_PF12,
+	GPIO_PF13, GPIO_PF14, GPIO_PF15, GPIO_PE0, GPIO_PE1, GPIO_PE2,
+	GPIO_PE3, GPIO_PE4, GPIO_PE5, GPIO_PE6, GPIO_PE7, GPIO_PE8,
+	GPIO_PE9, GPIO_PD12, GPIO_PD15,
+};
+
+static const unsigned ppi1_8b_pins[] = {
+	GPIO_PC0, GPIO_PC1, GPIO_PC2, GPIO_PC3, GPIO_PC4, GPIO_PC5, GPIO_PC6,
+	GPIO_PC7, GPIO_PC8, GPIO_PB13, GPIO_PB14, GPIO_PB15, GPIO_PD6,
+};
+
+static const unsigned ppi1_16b_pins[] = {
+	GPIO_PC0, GPIO_PC1, GPIO_PC2, GPIO_PC3, GPIO_PC4, GPIO_PC5, GPIO_PC6,
+	GPIO_PC7, GPIO_PC9, GPIO_PC10, GPIO_PC11, GPIO_PC12,
+	GPIO_PC13, GPIO_PC14, GPIO_PC15,
+	GPIO_PB13, GPIO_PB14, GPIO_PB15, GPIO_PD6,
+};
+
+static const unsigned ppi2_8b_pins[] = {
+	GPIO_PA0, GPIO_PA1, GPIO_PA2, GPIO_PA3, GPIO_PA4, GPIO_PA5, GPIO_PA6,
+	GPIO_PA7, GPIO_PB0, GPIO_PB1, GPIO_PB2, GPIO_PB3,
+};
+
+static const unsigned ppi2_16b_pins[] = {
+	GPIO_PA0, GPIO_PA1, GPIO_PA2, GPIO_PA3, GPIO_PA4, GPIO_PA5, GPIO_PA6,
+	GPIO_PA7, GPIO_PA8, GPIO_PA9, GPIO_PA10, GPIO_PA11, GPIO_PA12,
+	GPIO_PA13, GPIO_PA14, GPIO_PA15,
+	GPIO_PA7, GPIO_PB0, GPIO_PB1, GPIO_PB2, GPIO_PB3,
+};
+
+static const unsigned lp0_pins[] = {
+	GPIO_PB0, GPIO_PB1, GPIO_PA0, GPIO_PA1, GPIO_PA2, GPIO_PA3,
+	GPIO_PA4, GPIO_PA5, GPIO_PA6, GPIO_PA7,
+};
+
+static const unsigned lp1_pins[] = {
+	GPIO_PB3, GPIO_PB2, GPIO_PA8, GPIO_PA9, GPIO_PA10, GPIO_PA11,
+	GPIO_PA12, GPIO_PA13, GPIO_PA14, GPIO_PA15,
+};
+
+static const unsigned lp2_pins[] = {
+	GPIO_PE6, GPIO_PE7, GPIO_PF0, GPIO_PF1, GPIO_PF2, GPIO_PF3,
+	GPIO_PF4, GPIO_PF5, GPIO_PF6, GPIO_PF7,
+};
+
+static const unsigned lp3_pins[] = {
+	GPIO_PE9, GPIO_PE8, GPIO_PF8, GPIO_PF9, GPIO_PF10, GPIO_PF11,
+	GPIO_PF12, GPIO_PF13, GPIO_PF14, GPIO_PF15,
+};
+
+static const struct adi_pin_group adi_pin_groups[] = {
+	ADI_PIN_GROUP("uart0grp", uart0_pins),
+	ADI_PIN_GROUP("uart0ctsrtsgrp", uart0_ctsrts_pins),
+	ADI_PIN_GROUP("uart1grp", uart1_pins),
+	ADI_PIN_GROUP("uart1ctsrtsgrp", uart1_ctsrts_pins),
+	ADI_PIN_GROUP("rsi0grp", rsi0_pins),
+	ADI_PIN_GROUP("eth0grp", eth0_pins),
+	ADI_PIN_GROUP("eth1grp", eth1_pins),
+	ADI_PIN_GROUP("spi0grp", spi0_pins),
+	ADI_PIN_GROUP("spi1grp", spi1_pins),
+	ADI_PIN_GROUP("twi0grp", twi0_pins),
+	ADI_PIN_GROUP("twi1grp", twi1_pins),
+	ADI_PIN_GROUP("rotarygrp", rotary_pins),
+	ADI_PIN_GROUP("can0grp", can0_pins),
+	ADI_PIN_GROUP("smc0grp", smc0_pins),
+	ADI_PIN_GROUP("sport0grp", sport0_pins),
+	ADI_PIN_GROUP("sport1grp", sport1_pins),
+	ADI_PIN_GROUP("sport2grp", sport2_pins),
+	ADI_PIN_GROUP("ppi0_8bgrp", ppi0_8b_pins),
+	ADI_PIN_GROUP("ppi0_16bgrp", ppi0_16b_pins),
+	ADI_PIN_GROUP("ppi0_24bgrp", ppi0_24b_pins),
+	ADI_PIN_GROUP("ppi1_8bgrp", ppi1_8b_pins),
+	ADI_PIN_GROUP("ppi1_16bgrp", ppi1_16b_pins),
+	ADI_PIN_GROUP("ppi2_8bgrp", ppi2_8b_pins),
+	ADI_PIN_GROUP("ppi2_16bgrp", ppi2_16b_pins),
+	ADI_PIN_GROUP("lp0grp", lp0_pins),
+	ADI_PIN_GROUP("lp1grp", lp1_pins),
+	ADI_PIN_GROUP("lp2grp", lp2_pins),
+	ADI_PIN_GROUP("lp3grp", lp3_pins),
+};
+
+static const unsigned short uart0_mux[] = {
+	P_UART0_TX, P_UART0_RX,
+	0
+};
+
+static const unsigned short uart0_ctsrts_mux[] = {
+	P_UART0_RTS, P_UART0_CTS,
+	0
+};
+
+static const unsigned short uart1_mux[] = {
+	P_UART1_TX, P_UART1_RX,
+	0
+};
+
+static const unsigned short uart1_ctsrts_mux[] = {
+	P_UART1_RTS, P_UART1_CTS,
+	0
+};
+
+static const unsigned short rsi0_mux[] = {
+	P_RSI_DATA0, P_RSI_DATA1, P_RSI_DATA2, P_RSI_DATA3,
+	P_RSI_CMD, P_RSI_CLK, 0
+};
+
+static const unsigned short eth0_mux[] = P_RMII0;
+static const unsigned short eth1_mux[] = P_RMII1;
+
+static const unsigned short spi0_mux[] = {
+	P_SPI0_SCK, P_SPI0_MISO, P_SPI0_MOSI, 0
+};
+
+static const unsigned short spi1_mux[] = {
+	P_SPI1_SCK, P_SPI1_MISO, P_SPI1_MOSI, 0
+};
+
+static const unsigned short twi0_mux[] = {
+	P_TWI0_SCL, P_TWI0_SDA, 0
+};
+
+static const unsigned short twi1_mux[] = {
+	P_TWI1_SCL, P_TWI1_SDA, 0
+};
+
+static const unsigned short rotary_mux[] = {
+	P_CNT_CUD, P_CNT_CDG, P_CNT_CZM, 0
+};
+
+static const unsigned short sport0_mux[] = {
+	P_SPORT0_ACLK, P_SPORT0_AFS, P_SPORT0_AD0, P_SPORT0_BCLK,
+	P_SPORT0_BFS, P_SPORT0_BD0, 0,
+};
+
+static const unsigned short sport1_mux[] = {
+	P_SPORT1_ACLK, P_SPORT1_AFS, P_SPORT1_AD0, P_SPORT1_BCLK,
+	P_SPORT1_BFS, P_SPORT1_BD0, 0,
+};
+
+static const unsigned short sport2_mux[] = {
+	P_SPORT2_ACLK, P_SPORT2_AFS, P_SPORT2_AD0, P_SPORT2_BCLK,
+	P_SPORT2_BFS, P_SPORT2_BD0, 0,
+};
+
+static const unsigned short can0_mux[] = {
+	P_CAN0_RX, P_CAN0_TX, 0
+};
+
+static const unsigned short smc0_mux[] = {
+	P_A3, P_A4, P_A5, P_A6, P_A7, P_A8, P_A9, P_A10, P_A11, P_A12,
+	P_A13, P_A14, P_A15, P_A16, P_A17, P_A18, P_A19, P_A20, P_A21,
+	P_A22, P_A23, P_A24, P_A25, P_NORCK, 0,
+};
+
+static const unsigned short ppi0_8b_mux[] = {
+	P_PPI0_D0, P_PPI0_D1, P_PPI0_D2, P_PPI0_D3,
+	P_PPI0_D4, P_PPI0_D5, P_PPI0_D6, P_PPI0_D7,
+	P_PPI0_CLK, P_PPI0_FS1, P_PPI0_FS2,
+	0,
+};
+
+static const unsigned short ppi0_16b_mux[] = {
+	P_PPI0_D0, P_PPI0_D1, P_PPI0_D2, P_PPI0_D3,
+	P_PPI0_D4, P_PPI0_D5, P_PPI0_D6, P_PPI0_D7,
+	P_PPI0_D8, P_PPI0_D9, P_PPI0_D10, P_PPI0_D11,
+	P_PPI0_D12, P_PPI0_D13, P_PPI0_D14, P_PPI0_D15,
+	P_PPI0_CLK, P_PPI0_FS1, P_PPI0_FS2,
+	0,
+};
+
+static const unsigned short ppi0_24b_mux[] = {
+	P_PPI0_D0, P_PPI0_D1, P_PPI0_D2, P_PPI0_D3,
+	P_PPI0_D4, P_PPI0_D5, P_PPI0_D6, P_PPI0_D7,
+	P_PPI0_D8, P_PPI0_D9, P_PPI0_D10, P_PPI0_D11,
+	P_PPI0_D12, P_PPI0_D13, P_PPI0_D14, P_PPI0_D15,
+	P_PPI0_D16, P_PPI0_D17, P_PPI0_D18, P_PPI0_D19,
+	P_PPI0_D20, P_PPI0_D21, P_PPI0_D22, P_PPI0_D23,
+	P_PPI0_CLK, P_PPI0_FS1, P_PPI0_FS2,
+	0,
+};
+
+static const unsigned short ppi1_8b_mux[] = {
+	P_PPI1_D0, P_PPI1_D1, P_PPI1_D2, P_PPI1_D3,
+	P_PPI1_D4, P_PPI1_D5, P_PPI1_D6, P_PPI1_D7,
+	P_PPI1_CLK, P_PPI1_FS1, P_PPI1_FS2,
+	0,
+};
+
+static const unsigned short ppi1_16b_mux[] = {
+	P_PPI1_D0, P_PPI1_D1, P_PPI1_D2, P_PPI1_D3,
+	P_PPI1_D4, P_PPI1_D5, P_PPI1_D6, P_PPI1_D7,
+	P_PPI1_D8, P_PPI1_D9, P_PPI1_D10, P_PPI1_D11,
+	P_PPI1_D12, P_PPI1_D13, P_PPI1_D14, P_PPI1_D15,
+	P_PPI1_CLK, P_PPI1_FS1, P_PPI1_FS2,
+	0,
+};
+
+static const unsigned short ppi2_8b_mux[] = {
+	P_PPI2_D0, P_PPI2_D1, P_PPI2_D2, P_PPI2_D3,
+	P_PPI2_D4, P_PPI2_D5, P_PPI2_D6, P_PPI2_D7,
+	P_PPI2_CLK, P_PPI2_FS1, P_PPI2_FS2,
+	0,
+};
+
+static const unsigned short ppi2_16b_mux[] = {
+	P_PPI2_D0, P_PPI2_D1, P_PPI2_D2, P_PPI2_D3,
+	P_PPI2_D4, P_PPI2_D5, P_PPI2_D6, P_PPI2_D7,
+	P_PPI2_D8, P_PPI2_D9, P_PPI2_D10, P_PPI2_D11,
+	P_PPI2_D12, P_PPI2_D13, P_PPI2_D14, P_PPI2_D15,
+	P_PPI2_CLK, P_PPI2_FS1, P_PPI2_FS2,
+	0,
+};
+
+static const unsigned short lp0_mux[] = {
+	P_LP0_CLK, P_LP0_ACK, P_LP0_D0, P_LP0_D1, P_LP0_D2,
+	P_LP0_D3, P_LP0_D4, P_LP0_D5, P_LP0_D6, P_LP0_D7,
+        0
+};
+
+static const unsigned short lp1_mux[] = {
+	P_LP1_CLK, P_LP1_ACK, P_LP1_D0, P_LP1_D1, P_LP1_D2,
+	P_LP1_D3, P_LP1_D4, P_LP1_D5, P_LP1_D6, P_LP1_D7,
+        0
+};
+
+static const unsigned short lp2_mux[] = {
+	P_LP2_CLK, P_LP2_ACK, P_LP2_D0, P_LP2_D1, P_LP2_D2,
+	P_LP2_D3, P_LP2_D4, P_LP2_D5, P_LP2_D6, P_LP2_D7,
+        0
+};
+
+static const unsigned short lp3_mux[] = {
+	P_LP3_CLK, P_LP3_ACK, P_LP3_D0, P_LP3_D1, P_LP3_D2,
+	P_LP3_D3, P_LP3_D4, P_LP3_D5, P_LP3_D6, P_LP3_D7,
+        0
+};
+
+static const char * const uart0grp[] = { "uart0grp" };
+static const char * const uart0ctsrtsgrp[] = { "uart0ctsrtsgrp" };
+static const char * const uart1grp[] = { "uart1grp" };
+static const char * const uart1ctsrtsgrp[] = { "uart1ctsrtsgrp" };
+static const char * const rsi0grp[] = { "rsi0grp" };
+static const char * const eth0grp[] = { "eth0grp" };
+static const char * const eth1grp[] = { "eth1grp" };
+static const char * const spi0grp[] = { "spi0grp" };
+static const char * const spi1grp[] = { "spi1grp" };
+static const char * const twi0grp[] = { "twi0grp" };
+static const char * const twi1grp[] = { "twi1grp" };
+static const char * const rotarygrp[] = { "rotarygrp" };
+static const char * const can0grp[] = { "can0grp" };
+static const char * const smc0grp[] = { "smc0grp" };
+static const char * const sport0grp[] = { "sport0grp" };
+static const char * const sport1grp[] = { "sport1grp" };
+static const char * const sport2grp[] = { "sport2grp" };
+static const char * const ppi0_8bgrp[] = { "ppi0_8bgrp" };
+static const char * const ppi0_16bgrp[] = { "ppi0_16bgrp" };
+static const char * const ppi0_24bgrp[] = { "ppi0_24bgrp" };
+static const char * const ppi1_8bgrp[] = { "ppi1_8bgrp" };
+static const char * const ppi1_16bgrp[] = { "ppi1_16bgrp" };
+static const char * const ppi2_8bgrp[] = { "ppi2_8bgrp" };
+static const char * const ppi2_16bgrp[] = { "ppi2_16bgrp" };
+static const char * const lp0grp[] = { "lp0grp" };
+static const char * const lp1grp[] = { "lp1grp" };
+static const char * const lp2grp[] = { "lp2grp" };
+static const char * const lp3grp[] = { "lp3grp" };
+
+static const struct adi_pmx_func adi_pmx_functions[] = {
+	ADI_PMX_FUNCTION("uart0", uart0grp, uart0_mux),
+	ADI_PMX_FUNCTION("uart0_ctsrts", uart0ctsrtsgrp, uart0_ctsrts_mux),
+	ADI_PMX_FUNCTION("uart1", uart1grp, uart1_mux),
+	ADI_PMX_FUNCTION("uart1_ctsrts", uart1ctsrtsgrp, uart1_ctsrts_mux),
+	ADI_PMX_FUNCTION("rsi0", rsi0grp, rsi0_mux),
+	ADI_PMX_FUNCTION("eth0", eth0grp, eth0_mux),
+	ADI_PMX_FUNCTION("eth1", eth1grp, eth1_mux),
+	ADI_PMX_FUNCTION("spi0", spi0grp, spi0_mux),
+	ADI_PMX_FUNCTION("spi1", spi1grp, spi1_mux),
+	ADI_PMX_FUNCTION("twi0", twi0grp, twi0_mux),
+	ADI_PMX_FUNCTION("twi1", twi1grp, twi1_mux),
+	ADI_PMX_FUNCTION("rotary", rotarygrp, rotary_mux),
+	ADI_PMX_FUNCTION("can0", can0grp, can0_mux),
+	ADI_PMX_FUNCTION("smc0", smc0grp, smc0_mux),
+	ADI_PMX_FUNCTION("sport0", sport0grp, sport0_mux),
+	ADI_PMX_FUNCTION("sport1", sport1grp, sport1_mux),
+	ADI_PMX_FUNCTION("sport2", sport2grp, sport2_mux),
+	ADI_PMX_FUNCTION("ppi0_8b", ppi0_8bgrp, ppi0_8b_mux),
+	ADI_PMX_FUNCTION("ppi0_16b", ppi0_16bgrp, ppi0_16b_mux),
+	ADI_PMX_FUNCTION("ppi0_24b", ppi0_24bgrp, ppi0_24b_mux),
+	ADI_PMX_FUNCTION("ppi1_8b", ppi1_8bgrp, ppi1_8b_mux),
+	ADI_PMX_FUNCTION("ppi1_16b", ppi1_16bgrp, ppi1_16b_mux),
+	ADI_PMX_FUNCTION("ppi2_8b", ppi2_8bgrp, ppi2_8b_mux),
+	ADI_PMX_FUNCTION("ppi2_16b", ppi2_16bgrp, ppi2_16b_mux),
+	ADI_PMX_FUNCTION("lp0", lp0grp, lp0_mux),
+	ADI_PMX_FUNCTION("lp1", lp1grp, lp1_mux),
+	ADI_PMX_FUNCTION("lp2", lp2grp, lp2_mux),
+	ADI_PMX_FUNCTION("lp3", lp3grp, lp3_mux),
+};
+
+static const struct adi_pinctrl_soc_data adi_bf60x_soc = {
+	.functions = adi_pmx_functions,
+	.nfunctions = ARRAY_SIZE(adi_pmx_functions),
+	.groups = adi_pin_groups,
+	.ngroups = ARRAY_SIZE(adi_pin_groups),
+	.pins = adi_pads,
+	.npins = ARRAY_SIZE(adi_pads),
+};
+
+void adi_pinctrl_soc_init(const struct adi_pinctrl_soc_data **soc)
+{
+	*soc = &adi_bf60x_soc;
+}
diff --git a/drivers/pinctrl/pinctrl-adi2.c b/drivers/pinctrl/pinctrl-adi2.c
new file mode 100644
index 000000000000..7a24e59b4138
--- /dev/null
+++ b/drivers/pinctrl/pinctrl-adi2.c
@@ -0,0 +1,1183 @@
+/*
+ * Pinctrl Driver for ADI GPIO2 controller
+ *
+ * Copyright 2007-2013 Analog Devices Inc.
+ *
+ * Licensed under the GPLv2 or later
+ */
+
+#include <linux/bitops.h>
+#include <linux/delay.h>
+#include <linux/module.h>
+#include <linux/err.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/irq.h>
+#include <linux/platform_data/pinctrl-adi2.h>
+#include <linux/irqdomain.h>
+#include <linux/irqchip/chained_irq.h>
+#include <linux/pinctrl/pinctrl.h>
+#include <linux/pinctrl/pinmux.h>
+#include <linux/pinctrl/consumer.h>
+#include <linux/pinctrl/machine.h>
+#include <linux/syscore_ops.h>
+#include <linux/gpio.h>
+#include <asm/portmux.h>
+#include "pinctrl-adi2.h"
+#include "core.h"
+
+/*
+According to the BF54x HRM, pint means "pin interrupt".
+http://www.analog.com/static/imported-files/processor_manuals/ADSP-BF54x_hwr_rev1.2.pdf
+
+ADSP-BF54x processor Blackfin processors have four SIC interrupt chan-
+nels dedicated to pin interrupt purposes. These channels are managed by
+four hardware blocks, called PINT0, PINT1, PINT2, and PINT3. Every PINTx
+block can sense to up to 32 pins. While PINT0 and PINT1 can sense the
+pins of port A and port B, PINT2 and PINT3 manage all the pins from port
+C to port J as shown in Figure 9-2.
+
+n BF54x HRM:
+The ten GPIO ports are subdivided into 8-bit half ports, resulting in lower and
+upper half 8-bit units. The PINTx_ASSIGN registers control the 8-bit multi-
+plexers shown in Figure 9-3. Lower half units of eight pins can be
+forwarded to either byte 0 or byte 2 of either associated PINTx block.
+Upper half units can be forwarded to either byte 1 or byte 3 of the pin
+interrupt blocks, without further restrictions.
+
+All MMR registers in the pin interrupt module are 32 bits wide. To simply the
+mapping logic, this driver only maps a 16-bit gpio port to the upper or lower
+16 bits of a PINTx block. You can find the Figure 9-3 on page 583.
+
+Each IRQ domain is binding to a GPIO bank device. 2 GPIO bank devices can map
+to one PINT device. Two in "struct gpio_pint" are used to ease the PINT
+interrupt handler.
+
+The GPIO bank mapping to the lower 16 bits of the PINT device set its IRQ
+domain pointer in domain[0]. The IRQ domain pointer of the other bank is set
+to domain[1]. PINT interrupt handler adi_gpio_handle_pint_irq() finds out
+the current domain pointer according to whether the interrupt request mask
+is in lower 16 bits (domain[0]) or upper 16bits (domain[1]).
+
+A PINT device is not part of a GPIO port device in Blackfin. Multiple GPIO
+port devices can be mapped to the same PINT device.
+
+*/
+
+static LIST_HEAD(adi_pint_list);
+static LIST_HEAD(adi_gpio_port_list);
+
+#define DRIVER_NAME "pinctrl-adi2"
+
+#define PINT_HI_OFFSET		16
+
+/**
+ * struct gpio_port_saved - GPIO port registers that should be saved between
+ * power suspend and resume operations.
+ *
+ * @fer: PORTx_FER register
+ * @data: PORTx_DATA register
+ * @dir: PORTx_DIR register
+ * @inen: PORTx_INEN register
+ * @mux: PORTx_MUX register
+ */
+struct gpio_port_saved {
+	u16 fer;
+	u16 data;
+	u16 dir;
+	u16 inen;
+	u32 mux;
+};
+
+/**
+ * struct gpio_pint - Pin interrupt controller device. Multiple ADI GPIO
+ * banks can be mapped into one Pin interrupt controller.
+ *
+ * @node: All gpio_pint instances are added to a global list.
+ * @base: PINT device register base address
+ * @irq: IRQ of the PINT device, it is the parent IRQ of all
+ *       GPIO IRQs mapping to this device.
+ * @domain: [0] irq domain of the gpio port, whose hardware interrupts are
+ *		mapping to the low 16-bit of the pint registers.
+ *          [1] irq domain of the gpio port, whose hardware interrupts are
+ *		mapping to the high 16-bit of the pint registers.
+ * @regs: address pointer to the PINT device
+ * @map_count: No more than 2 GPIO banks can be mapped to this PINT device.
+ * @lock: This lock make sure the irq_chip operations to one PINT device
+ *        for different GPIO interrrupts are atomic.
+ * @pint_map_port: Set up the mapping between one PINT device and
+ *                 multiple GPIO banks.
+ */
+struct gpio_pint {
+	struct list_head node;
+	void __iomem *base;
+	int irq;
+	struct irq_domain *domain[2];
+	struct gpio_pint_regs *regs;
+	struct adi_pm_pint_save saved_data;
+	int map_count;
+	spinlock_t lock;
+
+	int (*pint_map_port)(struct gpio_pint *pint, bool assign,
+				u8 map, struct irq_domain *domain);
+};
+
+/**
+ * ADI pin controller
+ *
+ * @dev: a pointer back to containing device
+ * @pctl: the pinctrl device
+ * @soc: SoC data for this specific chip
+ */
+struct adi_pinctrl {
+	struct device *dev;
+	struct pinctrl_dev *pctl;
+	const struct adi_pinctrl_soc_data *soc;
+};
+
+/**
+ * struct gpio_port - GPIO bank device. Multiple ADI GPIO banks can be mapped
+ * into one pin interrupt controller.
+ *
+ * @node: All gpio_port instances are added to a list.
+ * @base: GPIO bank device register base address
+ * @irq_base: base IRQ of the GPIO bank device
+ * @width: PIN number of the GPIO bank device
+ * @regs: address pointer to the GPIO bank device
+ * @saved_data: registers that should be saved between PM operations.
+ * @dev: device structure of this GPIO bank
+ * @pint: GPIO PINT device that this GPIO bank mapped to
+ * @pint_map: GIOP bank mapping code in PINT device
+ * @pint_assign: The 32-bit PINT registers can be divided into 2 parts. A
+ *               GPIO bank can be mapped into either low 16 bits[0] or high 16
+ *               bits[1] of each PINT register.
+ * @lock: This lock make sure the irq_chip operations to one PINT device
+ *        for different GPIO interrrupts are atomic.
+ * @chip: abstract a GPIO controller
+ * @domain: The irq domain owned by the GPIO port.
+ * @rsvmap: Reservation map array for each pin in the GPIO bank
+ */
+struct gpio_port {
+	struct list_head node;
+	void __iomem *base;
+	unsigned int irq_base;
+	unsigned int width;
+	struct gpio_port_t *regs;
+	struct gpio_port_saved saved_data;
+	struct device *dev;
+
+	struct gpio_pint *pint;
+	u8 pint_map;
+	bool pint_assign;
+
+	spinlock_t lock;
+	struct gpio_chip chip;
+	struct irq_domain *domain;
+};
+
+static inline u8 pin_to_offset(struct pinctrl_gpio_range *range, unsigned pin)
+{
+	return pin - range->pin_base;
+}
+
+static inline u32 hwirq_to_pintbit(struct gpio_port *port, int hwirq)
+{
+	return port->pint_assign ? BIT(hwirq) << PINT_HI_OFFSET : BIT(hwirq);
+}
+
+static struct gpio_pint *find_gpio_pint(unsigned id)
+{
+	struct gpio_pint *pint;
+	int i = 0;
+
+	list_for_each_entry(pint, &adi_pint_list, node) {
+		if (id == i)
+			return pint;
+		i++;
+	}
+
+	return NULL;
+}
+
+static inline void port_setup(struct gpio_port *port, unsigned offset,
+	bool use_for_gpio)
+{
+	struct gpio_port_t *regs = port->regs;
+
+	if (use_for_gpio)
+		writew(readw(&regs->port_fer) & ~BIT(offset),
+			&regs->port_fer);
+	else
+		writew(readw(&regs->port_fer) | BIT(offset), &regs->port_fer);
+}
+
+static inline void portmux_setup(struct gpio_port *port, unsigned offset,
+	unsigned short function)
+{
+	struct gpio_port_t *regs = port->regs;
+	u32 pmux;
+
+	pmux = readl(&regs->port_mux);
+
+	/* The function field of each pin has 2 consecutive bits in
+	 * the mux register.
+	 */
+	pmux &= ~(0x3 << (2 * offset));
+	pmux |= (function & 0x3) << (2 * offset);
+
+	writel(pmux, &regs->port_mux);
+}
+
+static inline u16 get_portmux(struct gpio_port *port, unsigned offset)
+{
+	struct gpio_port_t *regs = port->regs;
+	u32 pmux = readl(&regs->port_mux);
+
+	/* The function field of each pin has 2 consecutive bits in
+	 * the mux register.
+	 */
+	return pmux >> (2 * offset) & 0x3;
+}
+
+static void adi_gpio_ack_irq(struct irq_data *d)
+{
+	unsigned long flags;
+	struct gpio_port *port = irq_data_get_irq_chip_data(d);
+	struct gpio_pint_regs *regs = port->pint->regs;
+	unsigned pintbit = hwirq_to_pintbit(port, d->hwirq);
+
+	spin_lock_irqsave(&port->lock, flags);
+	spin_lock_irqsave(&port->pint->lock, flags);
+
+	if (irqd_get_trigger_type(d) == IRQ_TYPE_EDGE_BOTH) {
+		if (readl(&regs->invert_set) & pintbit)
+			writel(pintbit, &regs->invert_clear);
+		else
+			writel(pintbit, &regs->invert_set);
+	}
+
+	writel(pintbit, &regs->request);
+
+	spin_unlock_irqrestore(&port->pint->lock, flags);
+	spin_unlock_irqrestore(&port->lock, flags);
+}
+
+static void adi_gpio_mask_ack_irq(struct irq_data *d)
+{
+	unsigned long flags;
+	struct gpio_port *port = irq_data_get_irq_chip_data(d);
+	struct gpio_pint_regs *regs = port->pint->regs;
+	unsigned pintbit = hwirq_to_pintbit(port, d->hwirq);
+
+	spin_lock_irqsave(&port->lock, flags);
+	spin_lock_irqsave(&port->pint->lock, flags);
+
+	if (irqd_get_trigger_type(d) == IRQ_TYPE_EDGE_BOTH) {
+		if (readl(&regs->invert_set) & pintbit)
+			writel(pintbit, &regs->invert_clear);
+		else
+			writel(pintbit, &regs->invert_set);
+	}
+
+	writel(pintbit, &regs->request);
+	writel(pintbit, &regs->mask_clear);
+
+	spin_unlock_irqrestore(&port->pint->lock, flags);
+	spin_unlock_irqrestore(&port->lock, flags);
+}
+
+static void adi_gpio_mask_irq(struct irq_data *d)
+{
+	unsigned long flags;
+	struct gpio_port *port = irq_data_get_irq_chip_data(d);
+	struct gpio_pint_regs *regs = port->pint->regs;
+
+	spin_lock_irqsave(&port->lock, flags);
+	spin_lock_irqsave(&port->pint->lock, flags);
+
+	writel(hwirq_to_pintbit(port, d->hwirq), &regs->mask_clear);
+
+	spin_unlock_irqrestore(&port->pint->lock, flags);
+	spin_unlock_irqrestore(&port->lock, flags);
+}
+
+static void adi_gpio_unmask_irq(struct irq_data *d)
+{
+	unsigned long flags;
+	struct gpio_port *port = irq_data_get_irq_chip_data(d);
+	struct gpio_pint_regs *regs = port->pint->regs;
+
+	spin_lock_irqsave(&port->lock, flags);
+	spin_lock_irqsave(&port->pint->lock, flags);
+
+	writel(hwirq_to_pintbit(port, d->hwirq), &regs->mask_set);
+
+	spin_unlock_irqrestore(&port->pint->lock, flags);
+	spin_unlock_irqrestore(&port->lock, flags);
+}
+
+static unsigned int adi_gpio_irq_startup(struct irq_data *d)
+{
+	unsigned long flags;
+	struct gpio_port *port = irq_data_get_irq_chip_data(d);
+	struct gpio_pint_regs *regs = port->pint->regs;
+
+	if (!port) {
+		dev_err(port->dev, "GPIO IRQ %d :Not exist\n", d->irq);
+		return -ENODEV;
+	}
+
+	spin_lock_irqsave(&port->lock, flags);
+	spin_lock_irqsave(&port->pint->lock, flags);
+
+	port_setup(port, d->hwirq, true);
+	writew(BIT(d->hwirq), &port->regs->dir_clear);
+	writew(readw(&port->regs->inen) | BIT(d->hwirq), &port->regs->inen);
+
+	writel(hwirq_to_pintbit(port, d->hwirq), &regs->mask_set);
+
+	spin_unlock_irqrestore(&port->pint->lock, flags);
+	spin_unlock_irqrestore(&port->lock, flags);
+
+	return 0;
+}
+
+static void adi_gpio_irq_shutdown(struct irq_data *d)
+{
+	unsigned long flags;
+	struct gpio_port *port = irq_data_get_irq_chip_data(d);
+	struct gpio_pint_regs *regs = port->pint->regs;
+
+	spin_lock_irqsave(&port->lock, flags);
+	spin_lock_irqsave(&port->pint->lock, flags);
+
+	writel(hwirq_to_pintbit(port, d->hwirq), &regs->mask_clear);
+
+	spin_unlock_irqrestore(&port->pint->lock, flags);
+	spin_unlock_irqrestore(&port->lock, flags);
+}
+
+static int adi_gpio_irq_type(struct irq_data *d, unsigned int type)
+{
+	unsigned long flags;
+	struct gpio_port *port = irq_data_get_irq_chip_data(d);
+	struct gpio_pint_regs *pint_regs = port->pint->regs;
+	unsigned pintmask;
+	unsigned int irq = d->irq;
+	int ret = 0;
+	char buf[16];
+
+	if (!port) {
+		dev_err(port->dev, "GPIO IRQ %d :Not exist\n", irq);
+		return -ENODEV;
+	}
+
+	pintmask = hwirq_to_pintbit(port, d->hwirq);
+
+	spin_lock_irqsave(&port->lock, flags);
+	spin_lock_irqsave(&port->pint->lock, flags);
+
+	/* In case of interrupt autodetect, set irq type to edge sensitive. */
+	if (type == IRQ_TYPE_PROBE)
+		type = IRQ_TYPE_EDGE_RISING | IRQ_TYPE_EDGE_FALLING;
+
+	if (type & (IRQ_TYPE_EDGE_RISING | IRQ_TYPE_EDGE_FALLING |
+		    IRQ_TYPE_LEVEL_HIGH | IRQ_TYPE_LEVEL_LOW)) {
+		snprintf(buf, 16, "gpio-irq%d", irq);
+		port_setup(port, d->hwirq, true);
+	} else
+		goto out;
+
+	/* The GPIO interrupt is triggered only when its input value
+	 * transfer from 0 to 1. So, invert the input value if the
+	 * irq type is low or falling
+	 */
+	if ((type & (IRQ_TYPE_EDGE_FALLING | IRQ_TYPE_LEVEL_LOW)))
+		writel(pintmask, &pint_regs->invert_set);
+	else
+		writel(pintmask, &pint_regs->invert_clear);
+
+	/* In edge sensitive case, if the input value of the requested irq
+	 * is already 1, invert it.
+	 */
+	if ((type & IRQ_TYPE_EDGE_BOTH) == IRQ_TYPE_EDGE_BOTH) {
+		if (gpio_get_value(port->chip.base + d->hwirq))
+			writel(pintmask, &pint_regs->invert_set);
+		else
+			writel(pintmask, &pint_regs->invert_clear);
+	}
+
+	if (type & (IRQ_TYPE_EDGE_RISING | IRQ_TYPE_EDGE_FALLING)) {
+		writel(pintmask, &pint_regs->edge_set);
+		__irq_set_handler_locked(irq, handle_edge_irq);
+	} else {
+		writel(pintmask, &pint_regs->edge_clear);
+		__irq_set_handler_locked(irq, handle_level_irq);
+	}
+
+out:
+	spin_unlock_irqrestore(&port->pint->lock, flags);
+	spin_unlock_irqrestore(&port->lock, flags);
+
+	return ret;
+}
+
+#ifdef CONFIG_PM
+static int adi_gpio_set_wake(struct irq_data *d, unsigned int state)
+{
+	struct gpio_port *port = irq_data_get_irq_chip_data(d);
+
+	if (!port || !port->pint || port->pint->irq != d->irq)
+		return -EINVAL;
+
+#ifndef SEC_GCTL
+	adi_internal_set_wake(port->pint->irq, state);
+#endif
+
+	return 0;
+}
+
+static int adi_pint_suspend(void)
+{
+	struct gpio_pint *pint;
+
+	list_for_each_entry(pint, &adi_pint_list, node) {
+		writel(0xffffffff, &pint->regs->mask_clear);
+		pint->saved_data.assign = readl(&pint->regs->assign);
+		pint->saved_data.edge_set = readl(&pint->regs->edge_set);
+		pint->saved_data.invert_set = readl(&pint->regs->invert_set);
+	}
+
+	return 0;
+}
+
+static void adi_pint_resume(void)
+{
+	struct gpio_pint *pint;
+
+	list_for_each_entry(pint, &adi_pint_list, node) {
+		writel(pint->saved_data.assign, &pint->regs->assign);
+		writel(pint->saved_data.edge_set, &pint->regs->edge_set);
+		writel(pint->saved_data.invert_set, &pint->regs->invert_set);
+	}
+}
+
+static int adi_gpio_suspend(void)
+{
+	struct gpio_port *port;
+
+	list_for_each_entry(port, &adi_gpio_port_list, node) {
+		port->saved_data.fer = readw(&port->regs->port_fer);
+		port->saved_data.mux = readl(&port->regs->port_mux);
+		port->saved_data.data = readw(&port->regs->data);
+		port->saved_data.inen = readw(&port->regs->inen);
+		port->saved_data.dir = readw(&port->regs->dir_set);
+	}
+
+	return adi_pint_suspend();
+}
+
+static void adi_gpio_resume(void)
+{
+	struct gpio_port *port;
+
+	adi_pint_resume();
+
+	list_for_each_entry(port, &adi_gpio_port_list, node) {
+		writel(port->saved_data.mux, &port->regs->port_mux);
+		writew(port->saved_data.fer, &port->regs->port_fer);
+		writew(port->saved_data.inen, &port->regs->inen);
+		writew(port->saved_data.data & port->saved_data.dir,
+					&port->regs->data_set);
+		writew(port->saved_data.dir, &port->regs->dir_set);
+	}
+
+}
+
+static struct syscore_ops gpio_pm_syscore_ops = {
+	.suspend = adi_gpio_suspend,
+	.resume = adi_gpio_resume,
+};
+#else /* CONFIG_PM */
+#define adi_gpio_set_wake NULL
+#endif /* CONFIG_PM */
+
+#ifdef CONFIG_IRQ_PREFLOW_FASTEOI
+static inline void preflow_handler(struct irq_desc *desc)
+{
+	if (desc->preflow_handler)
+		desc->preflow_handler(&desc->irq_data);
+}
+#else
+static inline void preflow_handler(struct irq_desc *desc) { }
+#endif
+
+static void adi_gpio_handle_pint_irq(unsigned int inta_irq,
+			struct irq_desc *desc)
+{
+	u32 request;
+	u32 level_mask, hwirq;
+	bool umask = false;
+	struct gpio_pint *pint = irq_desc_get_handler_data(desc);
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	struct gpio_pint_regs *regs = pint->regs;
+	struct irq_domain *domain;
+
+	preflow_handler(desc);
+	chained_irq_enter(chip, desc);
+
+	request = readl(&regs->request);
+	level_mask = readl(&regs->edge_set) & request;
+
+	hwirq = 0;
+	domain = pint->domain[0];
+	while (request) {
+		/* domain pointer need to be changed only once at IRQ 16 when
+		 * we go through IRQ requests from bit 0 to bit 31.
+		 */
+		if (hwirq == PINT_HI_OFFSET)
+			domain = pint->domain[1];
+
+		if (request & 1) {
+			if (level_mask & BIT(hwirq)) {
+				umask = true;
+				chained_irq_exit(chip, desc);
+			}
+			generic_handle_irq(irq_find_mapping(domain,
+					hwirq % PINT_HI_OFFSET));
+		}
+
+		hwirq++;
+		request >>= 1;
+	}
+
+	if (!umask)
+		chained_irq_exit(chip, desc);
+}
+
+static struct irq_chip adi_gpio_irqchip = {
+	.name = "GPIO",
+	.irq_ack = adi_gpio_ack_irq,
+	.irq_mask = adi_gpio_mask_irq,
+	.irq_mask_ack = adi_gpio_mask_ack_irq,
+	.irq_unmask = adi_gpio_unmask_irq,
+	.irq_disable = adi_gpio_mask_irq,
+	.irq_enable = adi_gpio_unmask_irq,
+	.irq_set_type = adi_gpio_irq_type,
+	.irq_startup = adi_gpio_irq_startup,
+	.irq_shutdown = adi_gpio_irq_shutdown,
+	.irq_set_wake = adi_gpio_set_wake,
+};
+
+static int adi_get_groups_count(struct pinctrl_dev *pctldev)
+{
+	struct adi_pinctrl *pinctrl = pinctrl_dev_get_drvdata(pctldev);
+
+	return pinctrl->soc->ngroups;
+}
+
+static const char *adi_get_group_name(struct pinctrl_dev *pctldev,
+				       unsigned selector)
+{
+	struct adi_pinctrl *pinctrl = pinctrl_dev_get_drvdata(pctldev);
+
+	return pinctrl->soc->groups[selector].name;
+}
+
+static int adi_get_group_pins(struct pinctrl_dev *pctldev, unsigned selector,
+			       const unsigned **pins,
+			       unsigned *num_pins)
+{
+	struct adi_pinctrl *pinctrl = pinctrl_dev_get_drvdata(pctldev);
+
+	*pins = pinctrl->soc->groups[selector].pins;
+	*num_pins = pinctrl->soc->groups[selector].num;
+	return 0;
+}
+
+static struct pinctrl_ops adi_pctrl_ops = {
+	.get_groups_count = adi_get_groups_count,
+	.get_group_name = adi_get_group_name,
+	.get_group_pins = adi_get_group_pins,
+};
+
+static int adi_pinmux_enable(struct pinctrl_dev *pctldev, unsigned selector,
+	unsigned group)
+{
+	struct adi_pinctrl *pinctrl = pinctrl_dev_get_drvdata(pctldev);
+	struct gpio_port *port;
+	struct pinctrl_gpio_range *range;
+	unsigned long flags;
+	unsigned short *mux, pin;
+
+	mux = (unsigned short *)pinctrl->soc->functions[selector].mux;
+
+	while (*mux) {
+		pin = P_IDENT(*mux);
+
+		range = pinctrl_find_gpio_range_from_pin(pctldev, pin);
+		if (range == NULL) /* should not happen */
+			return -ENODEV;
+
+		port = container_of(range->gc, struct gpio_port, chip);
+
+		spin_lock_irqsave(&port->lock, flags);
+
+		portmux_setup(port, pin_to_offset(range, pin),
+				 P_FUNCT2MUX(*mux));
+		port_setup(port, pin_to_offset(range, pin), false);
+		mux++;
+
+		spin_unlock_irqrestore(&port->lock, flags);
+	}
+
+	return 0;
+}
+
+static void adi_pinmux_disable(struct pinctrl_dev *pctldev, unsigned selector,
+	unsigned group)
+{
+	struct adi_pinctrl *pinctrl = pinctrl_dev_get_drvdata(pctldev);
+	struct gpio_port *port;
+	struct pinctrl_gpio_range *range;
+	unsigned long flags;
+	unsigned short *mux, pin;
+
+	mux = (unsigned short *)pinctrl->soc->functions[selector].mux;
+
+	while (*mux) {
+		pin = P_IDENT(*mux);
+
+		range = pinctrl_find_gpio_range_from_pin(pctldev, pin);
+		if (range == NULL) /* should not happen */
+			return;
+
+		port = container_of(range->gc, struct gpio_port, chip);
+
+		spin_lock_irqsave(&port->lock, flags);
+
+		port_setup(port, pin_to_offset(range, pin), true);
+		mux++;
+
+		spin_unlock_irqrestore(&port->lock, flags);
+	}
+}
+
+static int adi_pinmux_get_funcs_count(struct pinctrl_dev *pctldev)
+{
+	struct adi_pinctrl *pinctrl = pinctrl_dev_get_drvdata(pctldev);
+
+	return pinctrl->soc->nfunctions;
+}
+
+static const char *adi_pinmux_get_func_name(struct pinctrl_dev *pctldev,
+					  unsigned selector)
+{
+	struct adi_pinctrl *pinctrl = pinctrl_dev_get_drvdata(pctldev);
+
+	return pinctrl->soc->functions[selector].name;
+}
+
+static int adi_pinmux_get_groups(struct pinctrl_dev *pctldev, unsigned selector,
+			       const char * const **groups,
+			       unsigned * const num_groups)
+{
+	struct adi_pinctrl *pinctrl = pinctrl_dev_get_drvdata(pctldev);
+
+	*groups = pinctrl->soc->functions[selector].groups;
+	*num_groups = pinctrl->soc->functions[selector].num_groups;
+	return 0;
+}
+
+static int adi_pinmux_request_gpio(struct pinctrl_dev *pctldev,
+	struct pinctrl_gpio_range *range, unsigned pin)
+{
+	struct gpio_port *port;
+	unsigned long flags;
+	u8 offset;
+
+	port = container_of(range->gc, struct gpio_port, chip);
+	offset = pin_to_offset(range, pin);
+
+	spin_lock_irqsave(&port->lock, flags);
+
+	port_setup(port, offset, true);
+
+	spin_unlock_irqrestore(&port->lock, flags);
+
+	return 0;
+}
+
+static struct pinmux_ops adi_pinmux_ops = {
+	.enable = adi_pinmux_enable,
+	.disable = adi_pinmux_disable,
+	.get_functions_count = adi_pinmux_get_funcs_count,
+	.get_function_name = adi_pinmux_get_func_name,
+	.get_function_groups = adi_pinmux_get_groups,
+	.gpio_request_enable = adi_pinmux_request_gpio,
+};
+
+
+static struct pinctrl_desc adi_pinmux_desc = {
+	.name = DRIVER_NAME,
+	.pctlops = &adi_pctrl_ops,
+	.pmxops = &adi_pinmux_ops,
+	.owner = THIS_MODULE,
+};
+
+static int adi_gpio_request(struct gpio_chip *chip, unsigned offset)
+{
+	return pinctrl_request_gpio(chip->base + offset);
+}
+
+static void adi_gpio_free(struct gpio_chip *chip, unsigned offset)
+{
+	pinctrl_free_gpio(chip->base + offset);
+}
+
+static int adi_gpio_direction_input(struct gpio_chip *chip, unsigned offset)
+{
+	struct gpio_port *port;
+	unsigned long flags;
+
+	port = container_of(chip, struct gpio_port, chip);
+
+	spin_lock_irqsave(&port->lock, flags);
+
+	writew(BIT(offset), &port->regs->dir_clear);
+	writew(readw(&port->regs->inen) | BIT(offset), &port->regs->inen);
+
+	spin_unlock_irqrestore(&port->lock, flags);
+
+	return 0;
+}
+
+static void adi_gpio_set_value(struct gpio_chip *chip, unsigned offset,
+	int value)
+{
+	struct gpio_port *port = container_of(chip, struct gpio_port, chip);
+	struct gpio_port_t *regs = port->regs;
+	unsigned long flags;
+
+	spin_lock_irqsave(&port->lock, flags);
+
+	if (value)
+		writew(1 << offset, &regs->data_set);
+	else
+		writew(1 << offset, &regs->data_clear);
+
+	spin_unlock_irqrestore(&port->lock, flags);
+}
+
+static int adi_gpio_direction_output(struct gpio_chip *chip, unsigned offset,
+	int value)
+{
+	struct gpio_port *port = container_of(chip, struct gpio_port, chip);
+	struct gpio_port_t *regs = port->regs;
+	unsigned long flags;
+
+	spin_lock_irqsave(&port->lock, flags);
+
+	writew(readw(&regs->inen) & ~(1 << offset), &regs->inen);
+	adi_gpio_set_value(chip, offset, value);
+	writew(1 << offset, &regs->dir_set);
+
+	spin_unlock_irqrestore(&port->lock, flags);
+
+	return 0;
+}
+
+static int adi_gpio_get_value(struct gpio_chip *chip, unsigned offset)
+{
+	struct gpio_port *port = container_of(chip, struct gpio_port, chip);
+	struct gpio_port_t *regs = port->regs;
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&port->lock, flags);
+
+	ret = !!(readw(&regs->data) & BIT(offset));
+
+	spin_unlock_irqrestore(&port->lock, flags);
+
+	return ret;
+}
+
+static int adi_gpio_to_irq(struct gpio_chip *chip, unsigned offset)
+{
+	struct gpio_port *port = container_of(chip, struct gpio_port, chip);
+
+	if (port->irq_base >= 0)
+		return irq_find_mapping(port->domain, offset);
+	else
+		return irq_create_mapping(port->domain, offset);
+}
+
+static int adi_pint_map_port(struct gpio_pint *pint, bool assign, u8 map,
+	struct irq_domain *domain)
+{
+	struct gpio_pint_regs *regs = pint->regs;
+	u32 map_mask;
+
+	if (pint->map_count > 1)
+		return -EINVAL;
+
+	pint->map_count++;
+
+	/* The map_mask of each gpio port is a 16-bit duplicate
+	 * of the 8-bit map. It can be set to either high 16 bits or low
+	 * 16 bits of the pint assignment register.
+	 */
+	map_mask = (map << 8) | map;
+	if (assign) {
+		map_mask <<= PINT_HI_OFFSET;
+		writel((readl(&regs->assign) & 0xFFFF) | map_mask,
+			&regs->assign);
+	} else
+		writel((readl(&regs->assign) & 0xFFFF0000) | map_mask,
+			&regs->assign);
+
+	pint->domain[assign] = domain;
+
+	return 0;
+}
+
+static int adi_gpio_pint_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct resource *res;
+	struct gpio_pint *pint;
+
+	pint = devm_kzalloc(dev, sizeof(struct gpio_pint), GFP_KERNEL);
+	if (!pint) {
+		dev_err(dev, "Memory alloc failed\n");
+		return -ENOMEM;
+	}
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		dev_err(dev, "Invalid mem resource\n");
+		return -ENODEV;
+	}
+
+	if (!devm_request_mem_region(dev, res->start, resource_size(res),
+				     pdev->name)) {
+		dev_err(dev, "Region already claimed\n");
+		return -EBUSY;
+	}
+
+	pint->base = devm_ioremap(dev, res->start, resource_size(res));
+	if (!pint->base) {
+		dev_err(dev, "Could not ioremap\n");
+		return -ENOMEM;
+	}
+
+	pint->regs = (struct gpio_pint_regs *)pint->base;
+
+	res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+	if (!res) {
+		dev_err(dev, "Invalid IRQ resource\n");
+		return -ENODEV;
+	}
+
+	spin_lock_init(&pint->lock);
+
+	pint->irq = res->start;
+	pint->pint_map_port = adi_pint_map_port;
+	platform_set_drvdata(pdev, pint);
+
+	irq_set_chained_handler(pint->irq, adi_gpio_handle_pint_irq);
+	irq_set_handler_data(pint->irq, pint);
+
+	list_add_tail(&pint->node, &adi_pint_list);
+
+	return 0;
+}
+
+static int adi_gpio_pint_remove(struct platform_device *pdev)
+{
+	struct gpio_pint *pint = platform_get_drvdata(pdev);
+
+	list_del(&pint->node);
+	irq_set_handler(pint->irq, handle_simple_irq);
+
+	return 0;
+}
+
+static int adi_gpio_irq_map(struct irq_domain *d, unsigned int irq,
+				irq_hw_number_t hwirq)
+{
+	struct gpio_port *port = d->host_data;
+
+	if (!port)
+		return -EINVAL;
+
+	irq_set_chip_data(irq, port);
+	irq_set_chip_and_handler(irq, &adi_gpio_irqchip,
+				handle_level_irq);
+
+	return 0;
+}
+
+const struct irq_domain_ops adi_gpio_irq_domain_ops = {
+	.map = adi_gpio_irq_map,
+	.xlate = irq_domain_xlate_onecell,
+};
+
+static int adi_gpio_init_int(struct gpio_port *port)
+{
+	struct device_node *node = port->dev->of_node;
+	struct gpio_pint *pint = port->pint;
+	int ret;
+
+	port->domain = irq_domain_add_linear(node, port->width,
+				&adi_gpio_irq_domain_ops, port);
+	if (!port->domain) {
+		dev_err(port->dev, "Failed to create irqdomain\n");
+		return -ENOSYS;
+	}
+
+	/* According to BF54x and BF60x HRM, pin interrupt devices are not
+	 * part of the GPIO port device. in GPIO interrupt mode, the GPIO
+	 * pins of multiple port devices can be routed into one pin interrupt
+	 * device. The mapping can be configured by setting pint assignment
+	 * register with the mapping value of different GPIO port. This is
+	 * done via function pint_map_port().
+	 */
+	ret = pint->pint_map_port(port->pint, port->pint_assign,
+			port->pint_map,	port->domain);
+	if (ret)
+		return ret;
+
+	if (port->irq_base >= 0) {
+		ret = irq_create_strict_mappings(port->domain, port->irq_base,
+					0, port->width);
+		if (ret) {
+			dev_err(port->dev, "Couldn't associate to domain\n");
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+#define DEVNAME_SIZE 16
+
+static int adi_gpio_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	const struct adi_pinctrl_gpio_platform_data *pdata;
+	struct resource *res;
+	struct gpio_port *port;
+	char pinctrl_devname[DEVNAME_SIZE];
+	static int gpio;
+	int ret = 0, ret1;
+
+	pdata = dev->platform_data;
+	if (!pdata)
+		return -EINVAL;
+
+	port = devm_kzalloc(dev, sizeof(struct gpio_port), GFP_KERNEL);
+	if (!port) {
+		dev_err(dev, "Memory alloc failed\n");
+		return -ENOMEM;
+	}
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		dev_err(dev, "Invalid mem resource\n");
+		return -ENODEV;
+	}
+
+	if (!devm_request_mem_region(dev, res->start, resource_size(res),
+				     pdev->name)) {
+		dev_err(dev, "Region already claimed\n");
+		return -EBUSY;
+	}
+
+	port->base = devm_ioremap(dev, res->start, resource_size(res));
+	if (!port->base) {
+		dev_err(dev, "Could not ioremap\n");
+		return -ENOMEM;
+	}
+
+	res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+	if (!res)
+		port->irq_base = -1;
+	else
+		port->irq_base = res->start;
+
+	port->width = pdata->port_width;
+	port->dev = dev;
+	port->regs = (struct gpio_port_t *)port->base;
+	port->pint_assign = pdata->pint_assign;
+	port->pint_map = pdata->pint_map;
+
+	port->pint = find_gpio_pint(pdata->pint_id);
+	if (port->pint) {
+		ret = adi_gpio_init_int(port);
+		if (ret)
+			return ret;
+	}
+
+	spin_lock_init(&port->lock);
+
+	platform_set_drvdata(pdev, port);
+
+	port->chip.label		= "adi-gpio";
+	port->chip.direction_input	= adi_gpio_direction_input;
+	port->chip.get			= adi_gpio_get_value;
+	port->chip.direction_output	= adi_gpio_direction_output;
+	port->chip.set			= adi_gpio_set_value;
+	port->chip.request		= adi_gpio_request;
+	port->chip.free			= adi_gpio_free;
+	port->chip.to_irq		= adi_gpio_to_irq;
+	if (pdata->port_gpio_base > 0)
+		port->chip.base		= pdata->port_gpio_base;
+	else
+		port->chip.base		= gpio;
+	port->chip.ngpio		= port->width;
+	gpio = port->chip.base + port->width;
+
+	ret = gpiochip_add(&port->chip);
+	if (ret) {
+		dev_err(&pdev->dev, "Fail to add GPIO chip.\n");
+		goto out_remove_domain;
+	}
+
+	/* Add gpio pin range */
+	snprintf(pinctrl_devname, DEVNAME_SIZE, "pinctrl-adi2.%d",
+		pdata->pinctrl_id);
+	pinctrl_devname[DEVNAME_SIZE - 1] = 0;
+	ret = gpiochip_add_pin_range(&port->chip, pinctrl_devname,
+		0, pdata->port_pin_base, port->width);
+	if (ret) {
+		dev_err(&pdev->dev, "Fail to add pin range to %s.\n",
+				pinctrl_devname);
+		goto out_remove_gpiochip;
+	}
+
+	list_add_tail(&port->node, &adi_gpio_port_list);
+
+	return 0;
+
+out_remove_gpiochip:
+	ret1 = gpiochip_remove(&port->chip);
+out_remove_domain:
+	if (port->pint)
+		irq_domain_remove(port->domain);
+
+	return ret;
+}
+
+static int adi_gpio_remove(struct platform_device *pdev)
+{
+	struct gpio_port *port = platform_get_drvdata(pdev);
+	int ret;
+	u8 offset;
+
+	list_del(&port->node);
+	gpiochip_remove_pin_ranges(&port->chip);
+	ret = gpiochip_remove(&port->chip);
+	if (port->pint) {
+		for (offset = 0; offset < port->width; offset++)
+			irq_dispose_mapping(irq_find_mapping(port->domain,
+				offset));
+		irq_domain_remove(port->domain);
+	}
+
+	return ret;
+}
+
+static int adi_pinctrl_probe(struct platform_device *pdev)
+{
+	struct adi_pinctrl *pinctrl;
+
+	pinctrl = devm_kzalloc(&pdev->dev, sizeof(*pinctrl), GFP_KERNEL);
+	if (!pinctrl)
+		return -ENOMEM;
+
+	pinctrl->dev = &pdev->dev;
+
+	adi_pinctrl_soc_init(&pinctrl->soc);
+
+	adi_pinmux_desc.pins = pinctrl->soc->pins;
+	adi_pinmux_desc.npins = pinctrl->soc->npins;
+
+	/* Now register the pin controller and all pins it handles */
+	pinctrl->pctl = pinctrl_register(&adi_pinmux_desc, &pdev->dev, pinctrl);
+	if (!pinctrl->pctl) {
+		dev_err(&pdev->dev, "could not register pinctrl ADI2 driver\n");
+		return -EINVAL;
+	}
+
+	platform_set_drvdata(pdev, pinctrl);
+
+	return 0;
+}
+
+static int adi_pinctrl_remove(struct platform_device *pdev)
+{
+	struct adi_pinctrl *pinctrl = platform_get_drvdata(pdev);
+
+	pinctrl_unregister(pinctrl->pctl);
+
+	return 0;
+}
+
+static struct platform_driver adi_pinctrl_driver = {
+	.probe		= adi_pinctrl_probe,
+	.remove		= adi_pinctrl_remove,
+	.driver		= {
+		.name	= DRIVER_NAME,
+	},
+};
+
+static struct platform_driver adi_gpio_pint_driver = {
+	.probe		= adi_gpio_pint_probe,
+	.remove		= adi_gpio_pint_remove,
+	.driver		= {
+		.name	= "adi-gpio-pint",
+	},
+};
+
+static struct platform_driver adi_gpio_driver = {
+	.probe		= adi_gpio_probe,
+	.remove		= adi_gpio_remove,
+	.driver		= {
+		.name	= "adi-gpio",
+	},
+};
+
+static int __init adi_pinctrl_setup(void)
+{
+	int ret;
+
+	ret = platform_driver_register(&adi_pinctrl_driver);
+	if (ret)
+		return ret;
+
+	ret = platform_driver_register(&adi_gpio_pint_driver);
+	if (ret)
+		goto pint_error;
+
+	ret = platform_driver_register(&adi_gpio_driver);
+	if (ret)
+		goto gpio_error;
+
+#ifdef CONFIG_PM
+	register_syscore_ops(&gpio_pm_syscore_ops);
+#endif
+	return ret;
+gpio_error:
+	platform_driver_unregister(&adi_gpio_pint_driver);
+pint_error:
+	platform_driver_unregister(&adi_pinctrl_driver);
+
+	return ret;
+}
+arch_initcall(adi_pinctrl_setup);
+
+MODULE_AUTHOR("Sonic Zhang <sonic.zhang@analog.com>");
+MODULE_DESCRIPTION("ADI gpio2 pin control driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/pinctrl/pinctrl-adi2.h b/drivers/pinctrl/pinctrl-adi2.h
new file mode 100644
index 000000000000..1f06f8df1fa3
--- /dev/null
+++ b/drivers/pinctrl/pinctrl-adi2.h
@@ -0,0 +1,75 @@
+/*
+ * Pinctrl Driver for ADI GPIO2 controller
+ *
+ * Copyright 2007-2013 Analog Devices Inc.
+ *
+ * Licensed under the GPLv2 or later
+ */
+
+#ifndef PINCTRL_PINCTRL_ADI2_H
+#define PINCTRL_PINCTRL_ADI2_H
+
+#include <linux/pinctrl/pinctrl.h>
+
+ /**
+ * struct adi_pin_group - describes a pin group
+ * @name: the name of this pin group
+ * @pins: an array of pins
+ * @num: the number of pins in this array
+ */
+struct adi_pin_group {
+	const char *name;
+	const unsigned *pins;
+	const unsigned num;
+};
+
+#define ADI_PIN_GROUP(n, p)  \
+	{			\
+		.name = n,	\
+		.pins = p,	\
+		.num = ARRAY_SIZE(p),	\
+	}
+
+ /**
+ * struct adi_pmx_func - describes function mux setting of pin groups
+ * @name: the name of this function mux setting
+ * @groups: an array of pin groups
+ * @num_groups: the number of pin groups in this array
+ * @mux: the function mux setting array, end by zero
+ */
+struct adi_pmx_func {
+	const char *name;
+	const char * const *groups;
+	const unsigned num_groups;
+	const unsigned short *mux;
+};
+
+#define ADI_PMX_FUNCTION(n, g, m)		\
+	{					\
+		.name = n,			\
+		.groups = g,			\
+		.num_groups = ARRAY_SIZE(g),	\
+		.mux = m,			\
+	}
+
+/**
+ * struct adi_pinctrl_soc_data - ADI pin controller per-SoC configuration
+ * @functions:  The functions supported on this SoC.
+ * @nfunction:  The number of entries in @functions.
+ * @groups:     An array describing all pin groups the pin SoC supports.
+ * @ngroups:    The number of entries in @groups.
+ * @pins:       An array describing all pins the pin controller affects.
+ * @npins:      The number of entries in @pins.
+ */
+struct adi_pinctrl_soc_data {
+	const struct adi_pmx_func *functions;
+	int nfunctions;
+	const struct adi_pin_group *groups;
+	int ngroups;
+	const struct pinctrl_pin_desc *pins;
+	int npins;
+};
+
+void adi_pinctrl_soc_init(const struct adi_pinctrl_soc_data **soc);
+
+#endif /* PINCTRL_PINCTRL_ADI2_H */
diff --git a/include/linux/platform_data/pinctrl-adi2.h b/include/linux/platform_data/pinctrl-adi2.h
new file mode 100644
index 000000000000..8f91300617ec
--- /dev/null
+++ b/include/linux/platform_data/pinctrl-adi2.h
@@ -0,0 +1,40 @@
+/*
+ * Pinctrl Driver for ADI GPIO2 controller
+ *
+ * Copyright 2007-2013 Analog Devices Inc.
+ *
+ * Licensed under the GPLv2 or later
+ */
+
+
+#ifndef PINCTRL_ADI2_H
+#define PINCTRL_ADI2_H
+
+#include <linux/io.h>
+#include <linux/platform_device.h>
+
+/**
+ * struct adi_pinctrl_gpio_platform_data - Pinctrl gpio platform data
+ * for ADI GPIO2 device.
+ *
+ * @port_gpio_base: Optional global GPIO index of the GPIO bank.
+ *                 0 means driver decides.
+ * @port_pin_base: Pin index of the pin controller device.
+ * @port_width: PIN number of the GPIO bank device
+ * @pint_id: GPIO PINT device id that this GPIO bank should map to.
+ * @pint_assign: The 32-bit GPIO PINT registers can be divided into 2 parts. A
+ *               GPIO bank can be mapped into either low 16 bits[0] or high 16
+ *               bits[1] of each PINT register.
+ * @pint_map: GIOP bank mapping code in PINT device
+ */
+struct adi_pinctrl_gpio_platform_data {
+	unsigned int port_gpio_base;
+	unsigned int port_pin_base;
+	unsigned int port_width;
+	u8 pinctrl_id;
+	u8 pint_id;
+	bool pint_assign;
+	u8 pint_map;
+};
+
+#endif
-- 
cgit v1.2.3


From 9bd721c55c8a886b938a45198aab0ccb52f1f7fa Mon Sep 17 00:00:00 2001
From: Jason Low <jason.low2@hp.com>
Date: Fri, 13 Sep 2013 11:26:52 -0700
Subject: sched/balancing: Consider max cost of idle balance per sched domain

In this patch, we keep track of the max cost we spend doing idle load balancing
for each sched domain. If the avg time the CPU remains idle is less then the
time we have already spent on idle balancing + the max cost of idle balancing
in the sched domain, then we don't continue to attempt the balance. We also
keep a per rq variable, max_idle_balance_cost, which keeps track of the max
time spent on newidle load balances throughout all its domains so that we can
determine the avg_idle's max value.

By using the max, we avoid overrunning the average. This further reduces the
chance we attempt balancing when the CPU is not idle for longer than the cost
to balance.

Signed-off-by: Jason Low <jason.low2@hp.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1379096813-3032-3-git-send-email-jason.low2@hp.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/metag/include/asm/topology.h |  1 +
 include/linux/sched.h             |  1 +
 include/linux/topology.h          |  3 +++
 kernel/sched/core.c               |  3 ++-
 kernel/sched/fair.c               | 16 ++++++++++++++++
 kernel/sched/sched.h              |  3 +++
 6 files changed, 26 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/arch/metag/include/asm/topology.h b/arch/metag/include/asm/topology.h
index 23f5118f58db..db192924f4b0 100644
--- a/arch/metag/include/asm/topology.h
+++ b/arch/metag/include/asm/topology.h
@@ -26,6 +26,7 @@
 	.last_balance		= jiffies,		\
 	.balance_interval	= 1,			\
 	.nr_balance_failed	= 0,			\
+	.max_newidle_lb_cost	= 0,			\
 }
 
 #define cpu_to_node(cpu)	((void)(cpu), 0)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6682da36b293..be078ff9157f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -810,6 +810,7 @@ struct sched_domain {
 	unsigned int nr_balance_failed; /* initialise to 0 */
 
 	u64 last_update;
+	u64 max_newidle_lb_cost;
 
 #ifdef CONFIG_SCHEDSTATS
 	/* load_balance() stats */
diff --git a/include/linux/topology.h b/include/linux/topology.h
index d3cf0d6e7712..e2a2c3da2929 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -106,6 +106,7 @@ int arch_update_cpu_topology(void);
 	.last_balance		= jiffies,				\
 	.balance_interval	= 1,					\
 	.smt_gain		= 1178,	/* 15% */			\
+	.max_newidle_lb_cost	= 0,					\
 }
 #endif
 #endif /* CONFIG_SCHED_SMT */
@@ -135,6 +136,7 @@ int arch_update_cpu_topology(void);
 				,					\
 	.last_balance		= jiffies,				\
 	.balance_interval	= 1,					\
+	.max_newidle_lb_cost	= 0,					\
 }
 #endif
 #endif /* CONFIG_SCHED_MC */
@@ -166,6 +168,7 @@ int arch_update_cpu_topology(void);
 				,					\
 	.last_balance		= jiffies,				\
 	.balance_interval	= 1,					\
+	.max_newidle_lb_cost	= 0,					\
 }
 #endif
 
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 048f39e45761..c2283c54aed0 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1330,7 +1330,7 @@ ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags)
 
 	if (rq->idle_stamp) {
 		u64 delta = rq_clock(rq) - rq->idle_stamp;
-		u64 max = 2*sysctl_sched_migration_cost;
+		u64 max = 2*rq->max_idle_balance_cost;
 
 		update_avg(&rq->avg_idle, delta);
 
@@ -6506,6 +6506,7 @@ void __init sched_init(void)
 		rq->online = 0;
 		rq->idle_stamp = 0;
 		rq->avg_idle = 2*sysctl_sched_migration_cost;
+		rq->max_idle_balance_cost = sysctl_sched_migration_cost;
 
 		INIT_LIST_HEAD(&rq->cfs_tasks);
 
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 0784ab6fcc59..ffc99d8f0a95 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5396,6 +5396,7 @@ void idle_balance(int this_cpu, struct rq *this_rq)
 	struct sched_domain *sd;
 	int pulled_task = 0;
 	unsigned long next_balance = jiffies + HZ;
+	u64 curr_cost = 0;
 
 	this_rq->idle_stamp = rq_clock(this_rq);
 
@@ -5412,15 +5413,27 @@ void idle_balance(int this_cpu, struct rq *this_rq)
 	for_each_domain(this_cpu, sd) {
 		unsigned long interval;
 		int continue_balancing = 1;
+		u64 t0, domain_cost;
 
 		if (!(sd->flags & SD_LOAD_BALANCE))
 			continue;
 
+		if (this_rq->avg_idle < curr_cost + sd->max_newidle_lb_cost)
+			break;
+
 		if (sd->flags & SD_BALANCE_NEWIDLE) {
+			t0 = sched_clock_cpu(this_cpu);
+
 			/* If we've pulled tasks over stop searching: */
 			pulled_task = load_balance(this_cpu, this_rq,
 						   sd, CPU_NEWLY_IDLE,
 						   &continue_balancing);
+
+			domain_cost = sched_clock_cpu(this_cpu) - t0;
+			if (domain_cost > sd->max_newidle_lb_cost)
+				sd->max_newidle_lb_cost = domain_cost;
+
+			curr_cost += domain_cost;
 		}
 
 		interval = msecs_to_jiffies(sd->balance_interval);
@@ -5442,6 +5455,9 @@ void idle_balance(int this_cpu, struct rq *this_rq)
 		 */
 		this_rq->next_balance = next_balance;
 	}
+
+	if (curr_cost > this_rq->max_idle_balance_cost)
+		this_rq->max_idle_balance_cost = curr_cost;
 }
 
 /*
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 0d7544c3dba7..e82484db7699 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -476,6 +476,9 @@ struct rq {
 	u64 age_stamp;
 	u64 idle_stamp;
 	u64 avg_idle;
+
+	/* This is used to determine avg_idle's max value */
+	u64 max_idle_balance_cost;
 #endif
 
 #ifdef CONFIG_IRQ_TIME_ACCOUNTING
-- 
cgit v1.2.3


From f48627e686a69f5215cb0761e731edb3d9859dd9 Mon Sep 17 00:00:00 2001
From: Jason Low <jason.low2@hp.com>
Date: Fri, 13 Sep 2013 11:26:53 -0700
Subject: sched/balancing: Periodically decay max cost of idle balance

This patch builds on patch 2 and periodically decays that max value to
do idle balancing per sched domain by approximately 1% per second. Also
decay the rq's max_idle_balance_cost value.

Signed-off-by: Jason Low <jason.low2@hp.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1379096813-3032-4-git-send-email-jason.low2@hp.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/metag/include/asm/topology.h |  1 +
 include/linux/sched.h             |  3 +++
 include/linux/topology.h          |  3 +++
 kernel/sched/fair.c               | 38 +++++++++++++++++++++++++++++++-------
 4 files changed, 38 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/arch/metag/include/asm/topology.h b/arch/metag/include/asm/topology.h
index db192924f4b0..8e9c0b3b9691 100644
--- a/arch/metag/include/asm/topology.h
+++ b/arch/metag/include/asm/topology.h
@@ -27,6 +27,7 @@
 	.balance_interval	= 1,			\
 	.nr_balance_failed	= 0,			\
 	.max_newidle_lb_cost	= 0,			\
+	.next_decay_max_lb_cost	= jiffies,		\
 }
 
 #define cpu_to_node(cpu)	((void)(cpu), 0)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index be078ff9157f..b5344de1658b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -810,7 +810,10 @@ struct sched_domain {
 	unsigned int nr_balance_failed; /* initialise to 0 */
 
 	u64 last_update;
+
+	/* idle_balance() stats */
 	u64 max_newidle_lb_cost;
+	unsigned long next_decay_max_lb_cost;
 
 #ifdef CONFIG_SCHEDSTATS
 	/* load_balance() stats */
diff --git a/include/linux/topology.h b/include/linux/topology.h
index e2a2c3da2929..12ae6ce997d6 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -107,6 +107,7 @@ int arch_update_cpu_topology(void);
 	.balance_interval	= 1,					\
 	.smt_gain		= 1178,	/* 15% */			\
 	.max_newidle_lb_cost	= 0,					\
+	.next_decay_max_lb_cost	= jiffies,				\
 }
 #endif
 #endif /* CONFIG_SCHED_SMT */
@@ -137,6 +138,7 @@ int arch_update_cpu_topology(void);
 	.last_balance		= jiffies,				\
 	.balance_interval	= 1,					\
 	.max_newidle_lb_cost	= 0,					\
+	.next_decay_max_lb_cost	= jiffies,				\
 }
 #endif
 #endif /* CONFIG_SCHED_MC */
@@ -169,6 +171,7 @@ int arch_update_cpu_topology(void);
 	.last_balance		= jiffies,				\
 	.balance_interval	= 1,					\
 	.max_newidle_lb_cost	= 0,					\
+	.next_decay_max_lb_cost	= jiffies,				\
 }
 #endif
 
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index ffc99d8f0a95..2b89cd244b0d 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5681,15 +5681,39 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle)
 	/* Earliest time when we have to do rebalance again */
 	unsigned long next_balance = jiffies + 60*HZ;
 	int update_next_balance = 0;
-	int need_serialize;
+	int need_serialize, need_decay = 0;
+	u64 max_cost = 0;
 
 	update_blocked_averages(cpu);
 
 	rcu_read_lock();
 	for_each_domain(cpu, sd) {
+		/*
+		 * Decay the newidle max times here because this is a regular
+		 * visit to all the domains. Decay ~1% per second.
+		 */
+		if (time_after(jiffies, sd->next_decay_max_lb_cost)) {
+			sd->max_newidle_lb_cost =
+				(sd->max_newidle_lb_cost * 253) / 256;
+			sd->next_decay_max_lb_cost = jiffies + HZ;
+			need_decay = 1;
+		}
+		max_cost += sd->max_newidle_lb_cost;
+
 		if (!(sd->flags & SD_LOAD_BALANCE))
 			continue;
 
+		/*
+		 * Stop the load balance at this level. There is another
+		 * CPU in our sched group which is doing load balancing more
+		 * actively.
+		 */
+		if (!continue_balancing) {
+			if (need_decay)
+				continue;
+			break;
+		}
+
 		interval = sd->balance_interval;
 		if (idle != CPU_IDLE)
 			interval *= sd->busy_factor;
@@ -5723,14 +5747,14 @@ out:
 			next_balance = sd->last_balance + interval;
 			update_next_balance = 1;
 		}
-
+	}
+	if (need_decay) {
 		/*
-		 * Stop the load balance at this level. There is another
-		 * CPU in our sched group which is doing load balancing more
-		 * actively.
+		 * Ensure the rq-wide value also decays but keep it at a
+		 * reasonable floor to avoid funnies with rq->avg_idle.
 		 */
-		if (!continue_balancing)
-			break;
+		rq->max_idle_balance_cost =
+			max((u64)sysctl_sched_migration_cost, max_cost);
 	}
 	rcu_read_unlock();
 
-- 
cgit v1.2.3


From 6112fe60ac1bd1e68da8cc4248289d6e48015f9b Mon Sep 17 00:00:00 2001
From: Laxman Dewangan <ldewangan@nvidia.com>
Date: Fri, 20 Sep 2013 18:00:10 +0530
Subject: regmap: add helper macro to set min/max range of register

Add helper macro to set the min and max value of the register range.

This is useful when initialising the register ranges of the device like

static const struct regmap_range readable_ranges[] = {
	regmap_reg_range(DEVICE_REG0, DEVICE_REG10),
};

Signed-off-by: Laxman Dewangan <ldewangan@nvidia.com>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 include/linux/regmap.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index a10380bfbeac..7d3ae2be6869 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -70,6 +70,8 @@ struct regmap_range {
 	unsigned int range_max;
 };
 
+#define regmap_reg_range(low, high) { .range_min = low, .range_max = high, }
+
 /*
  * A table of ranges including some yes ranges and some no ranges.
  * If a register belongs to a no_range, the corresponding check function
-- 
cgit v1.2.3


From ca9a563805f7ae821e3303b1bdbc65d3a3c783ff Mon Sep 17 00:00:00 2001
From: Zubair Lutfullah <zubair.lutfullah@gmail.com>
Date: Thu, 19 Sep 2013 07:24:00 +0100
Subject: iio: ti_am335x_adc: Add continuous sampling support

Previously the driver had only one-shot reading functionality.
This patch adds continuous sampling support to the driver.

Continuous sampling starts when buffer is enabled.
HW IRQ wakes worker thread that pushes samples to userspace.
Sampling stops when buffer is disabled by userspace.

Patil Rachna (TI) laid the ground work for ADC HW register access.
Russ Dill (TI) fixed bugs in the driver relevant to FIFOs and IRQs.

I fixed channel scanning so multiple ADC channels can be read
simultaneously and pushed to userspace.
Restructured the driver to fit IIO ABI.
And added INDIO_BUFFER_HARDWARE mode.

Signed-off-by: Zubair Lutfullah <zubair.lutfullah@gmail.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Russ Dill <Russ.Dill@ti.com>
Acked-by: Lee Jones <lee.jones@linaro.org>
Acked-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/adc/Kconfig              |   1 +
 drivers/iio/adc/ti_am335x_adc.c      | 213 ++++++++++++++++++++++++++++++++++-
 include/linux/mfd/ti_am335x_tscadc.h |   9 ++
 3 files changed, 218 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iio/adc/Kconfig b/drivers/iio/adc/Kconfig
index 0cf5c611b4c6..8b885188b839 100644
--- a/drivers/iio/adc/Kconfig
+++ b/drivers/iio/adc/Kconfig
@@ -177,6 +177,7 @@ config TI_ADC081C
 config TI_AM335X_ADC
 	tristate "TI's AM335X ADC driver"
 	depends on MFD_TI_AM335X_TSCADC
+	select IIO_KFIFO_BUF
 	help
 	  Say yes here to build support for Texas Instruments ADC
 	  driver which is also a MFD client.
diff --git a/drivers/iio/adc/ti_am335x_adc.c b/drivers/iio/adc/ti_am335x_adc.c
index ebe93eba3ec8..5287bffd8a75 100644
--- a/drivers/iio/adc/ti_am335x_adc.c
+++ b/drivers/iio/adc/ti_am335x_adc.c
@@ -28,12 +28,20 @@
 #include <linux/iio/driver.h>
 
 #include <linux/mfd/ti_am335x_tscadc.h>
+#include <linux/iio/buffer.h>
+#include <linux/iio/kfifo_buf.h>
+#include <linux/iio/trigger.h>
+#include <linux/iio/trigger_consumer.h>
+#include <linux/iio/triggered_buffer.h>
 
 struct tiadc_device {
 	struct ti_tscadc_dev *mfd_tscadc;
 	int channels;
 	u8 channel_line[8];
 	u8 channel_step[8];
+	int buffer_en_ch_steps;
+	struct iio_trigger *trig;
+	u16 data[8];
 };
 
 static unsigned int tiadc_readl(struct tiadc_device *adc, unsigned int reg)
@@ -56,8 +64,14 @@ static u32 get_adc_step_mask(struct tiadc_device *adc_dev)
 	return step_en;
 }
 
-static void tiadc_step_config(struct tiadc_device *adc_dev)
+static u32 get_adc_step_bit(struct tiadc_device *adc_dev, int chan)
 {
+	return 1 << adc_dev->channel_step[chan];
+}
+
+static void tiadc_step_config(struct iio_dev *indio_dev)
+{
+	struct tiadc_device *adc_dev = iio_priv(indio_dev);
 	unsigned int stepconfig;
 	int i, steps;
 
@@ -72,7 +86,11 @@ static void tiadc_step_config(struct tiadc_device *adc_dev)
 	 */
 
 	steps = TOTAL_STEPS - adc_dev->channels;
-	stepconfig = STEPCONFIG_AVG_16 | STEPCONFIG_FIFO1;
+	if (iio_buffer_enabled(indio_dev))
+		stepconfig = STEPCONFIG_AVG_16 | STEPCONFIG_FIFO1
+					| STEPCONFIG_MODE_SWCNT;
+	else
+		stepconfig = STEPCONFIG_AVG_16 | STEPCONFIG_FIFO1;
 
 	for (i = 0; i < adc_dev->channels; i++) {
 		int chan;
@@ -85,9 +103,175 @@ static void tiadc_step_config(struct tiadc_device *adc_dev)
 		adc_dev->channel_step[i] = steps;
 		steps++;
 	}
+}
+
+static irqreturn_t tiadc_irq_h(int irq, void *private)
+{
+	struct iio_dev *indio_dev = private;
+	struct tiadc_device *adc_dev = iio_priv(indio_dev);
+	unsigned int status, config;
+	status = tiadc_readl(adc_dev, REG_IRQSTATUS);
+
+	/*
+	 * ADC and touchscreen share the IRQ line.
+	 * FIFO0 interrupts are used by TSC. Handle FIFO1 IRQs here only
+	 */
+	if (status & IRQENB_FIFO1OVRRUN) {
+		/* FIFO Overrun. Clear flag. Disable/Enable ADC to recover */
+		config = tiadc_readl(adc_dev, REG_CTRL);
+		config &= ~(CNTRLREG_TSCSSENB);
+		tiadc_writel(adc_dev, REG_CTRL, config);
+		tiadc_writel(adc_dev, REG_IRQSTATUS, IRQENB_FIFO1OVRRUN
+				| IRQENB_FIFO1UNDRFLW | IRQENB_FIFO1THRES);
+		tiadc_writel(adc_dev, REG_CTRL, (config | CNTRLREG_TSCSSENB));
+		return IRQ_HANDLED;
+	} else if (status & IRQENB_FIFO1THRES) {
+		/* Disable irq and wake worker thread */
+		tiadc_writel(adc_dev, REG_IRQCLR, IRQENB_FIFO1THRES);
+		return IRQ_WAKE_THREAD;
+	}
+
+	return IRQ_NONE;
+}
+
+static irqreturn_t tiadc_worker_h(int irq, void *private)
+{
+	struct iio_dev *indio_dev = private;
+	struct tiadc_device *adc_dev = iio_priv(indio_dev);
+	int i, k, fifo1count, read;
+	u16 *data = adc_dev->data;
+
+	fifo1count = tiadc_readl(adc_dev, REG_FIFO1CNT);
+	for (k = 0; k < fifo1count; k = k + i) {
+		for (i = 0; i < (indio_dev->scan_bytes)/2; i++) {
+			read = tiadc_readl(adc_dev, REG_FIFO1);
+			data[i] = read & FIFOREAD_DATA_MASK;
+		}
+		iio_push_to_buffers(indio_dev, (u8 *) data);
+	}
+
+	tiadc_writel(adc_dev, REG_IRQSTATUS, IRQENB_FIFO1THRES);
+	tiadc_writel(adc_dev, REG_IRQENABLE, IRQENB_FIFO1THRES);
+
+	return IRQ_HANDLED;
+}
+
+static int tiadc_buffer_preenable(struct iio_dev *indio_dev)
+{
+	struct tiadc_device *adc_dev = iio_priv(indio_dev);
+	int i, fifo1count, read;
+
+	tiadc_writel(adc_dev, REG_IRQCLR, (IRQENB_FIFO1THRES |
+				IRQENB_FIFO1OVRRUN |
+				IRQENB_FIFO1UNDRFLW));
+
+	/* Flush FIFO. Needed in corner cases in simultaneous tsc/adc use */
+	fifo1count = tiadc_readl(adc_dev, REG_FIFO1CNT);
+	for (i = 0; i < fifo1count; i++)
+		read = tiadc_readl(adc_dev, REG_FIFO1);
+
+	return iio_sw_buffer_preenable(indio_dev);
+}
+
+static int tiadc_buffer_postenable(struct iio_dev *indio_dev)
+{
+	struct tiadc_device *adc_dev = iio_priv(indio_dev);
+	struct iio_buffer *buffer = indio_dev->buffer;
+	unsigned int enb = 0;
+	u8 bit;
+
+	tiadc_step_config(indio_dev);
+	for_each_set_bit(bit, buffer->scan_mask, adc_dev->channels)
+		enb |= (get_adc_step_bit(adc_dev, bit) << 1);
+	adc_dev->buffer_en_ch_steps = enb;
+
+	am335x_tsc_se_set(adc_dev->mfd_tscadc, enb);
+
+	tiadc_writel(adc_dev,  REG_IRQSTATUS, IRQENB_FIFO1THRES
+				| IRQENB_FIFO1OVRRUN | IRQENB_FIFO1UNDRFLW);
+	tiadc_writel(adc_dev,  REG_IRQENABLE, IRQENB_FIFO1THRES
+				| IRQENB_FIFO1OVRRUN);
+
+	return 0;
+}
+
+static int tiadc_buffer_predisable(struct iio_dev *indio_dev)
+{
+	struct tiadc_device *adc_dev = iio_priv(indio_dev);
+	int fifo1count, i, read;
+
+	tiadc_writel(adc_dev, REG_IRQCLR, (IRQENB_FIFO1THRES |
+				IRQENB_FIFO1OVRRUN | IRQENB_FIFO1UNDRFLW));
+	am335x_tsc_se_clr(adc_dev->mfd_tscadc, adc_dev->buffer_en_ch_steps);
 
+	/* Flush FIFO of leftover data in the time it takes to disable adc */
+	fifo1count = tiadc_readl(adc_dev, REG_FIFO1CNT);
+	for (i = 0; i < fifo1count; i++)
+		read = tiadc_readl(adc_dev, REG_FIFO1);
+
+	return 0;
 }
 
+static int tiadc_buffer_postdisable(struct iio_dev *indio_dev)
+{
+	tiadc_step_config(indio_dev);
+
+	return 0;
+}
+
+static const struct iio_buffer_setup_ops tiadc_buffer_setup_ops = {
+	.preenable = &tiadc_buffer_preenable,
+	.postenable = &tiadc_buffer_postenable,
+	.predisable = &tiadc_buffer_predisable,
+	.postdisable = &tiadc_buffer_postdisable,
+};
+
+int tiadc_iio_buffered_hardware_setup(struct iio_dev *indio_dev,
+	irqreturn_t (*pollfunc_bh)(int irq, void *p),
+	irqreturn_t (*pollfunc_th)(int irq, void *p),
+	int irq,
+	unsigned long flags,
+	const struct iio_buffer_setup_ops *setup_ops)
+{
+	int ret;
+
+	indio_dev->buffer = iio_kfifo_allocate(indio_dev);
+	if (!indio_dev->buffer)
+		return -ENOMEM;
+
+	ret = request_threaded_irq(irq,	pollfunc_th, pollfunc_bh,
+				flags, indio_dev->name, indio_dev);
+	if (ret)
+		goto error_kfifo_free;
+
+	indio_dev->setup_ops = setup_ops;
+	indio_dev->modes |= INDIO_BUFFER_HARDWARE;
+
+	ret = iio_buffer_register(indio_dev,
+				  indio_dev->channels,
+				  indio_dev->num_channels);
+	if (ret)
+		goto error_free_irq;
+
+	return 0;
+
+error_free_irq:
+	free_irq(irq, indio_dev);
+error_kfifo_free:
+	iio_kfifo_free(indio_dev->buffer);
+	return ret;
+}
+
+static void tiadc_iio_buffered_hardware_remove(struct iio_dev *indio_dev)
+{
+	struct tiadc_device *adc_dev = iio_priv(indio_dev);
+
+	free_irq(adc_dev->mfd_tscadc->irq, indio_dev);
+	iio_kfifo_free(indio_dev->buffer);
+	iio_buffer_unregister(indio_dev);
+}
+
+
 static const char * const chan_name_ain[] = {
 	"AIN0",
 	"AIN1",
@@ -120,6 +304,7 @@ static int tiadc_channel_init(struct iio_dev *indio_dev, int channels)
 		chan->channel = adc_dev->channel_line[i];
 		chan->info_mask_separate = BIT(IIO_CHAN_INFO_RAW);
 		chan->datasheet_name = chan_name_ain[chan->channel];
+		chan->scan_index = i;
 		chan->scan_type.sign = 'u';
 		chan->scan_type.realbits = 12;
 		chan->scan_type.storagebits = 16;
@@ -147,6 +332,10 @@ static int tiadc_read_raw(struct iio_dev *indio_dev,
 	u32 step_en;
 	unsigned long timeout = jiffies + usecs_to_jiffies
 				(IDLE_TIMEOUT * adc_dev->channels);
+
+	if (iio_buffer_enabled(indio_dev))
+		return -EBUSY;
+
 	step_en = get_adc_step_mask(adc_dev);
 	am335x_tsc_se_set(adc_dev->mfd_tscadc, step_en);
 
@@ -237,20 +426,33 @@ static int tiadc_probe(struct platform_device *pdev)
 	indio_dev->modes = INDIO_DIRECT_MODE;
 	indio_dev->info = &tiadc_info;
 
-	tiadc_step_config(adc_dev);
+	tiadc_step_config(indio_dev);
+	tiadc_writel(adc_dev, REG_FIFO1THR, FIFO1_THRESHOLD);
 
 	err = tiadc_channel_init(indio_dev, adc_dev->channels);
 	if (err < 0)
 		return err;
 
-	err = iio_device_register(indio_dev);
+	err = tiadc_iio_buffered_hardware_setup(indio_dev,
+		&tiadc_worker_h,
+		&tiadc_irq_h,
+		adc_dev->mfd_tscadc->irq,
+		IRQF_SHARED,
+		&tiadc_buffer_setup_ops);
+
 	if (err)
 		goto err_free_channels;
 
+	err = iio_device_register(indio_dev);
+	if (err)
+		goto err_buffer_unregister;
+
 	platform_set_drvdata(pdev, indio_dev);
 
 	return 0;
 
+err_buffer_unregister:
+	tiadc_iio_buffered_hardware_remove(indio_dev);
 err_free_channels:
 	tiadc_channels_remove(indio_dev);
 	return err;
@@ -263,6 +465,7 @@ static int tiadc_remove(struct platform_device *pdev)
 	u32 step_en;
 
 	iio_device_unregister(indio_dev);
+	tiadc_iio_buffered_hardware_remove(indio_dev);
 	tiadc_channels_remove(indio_dev);
 
 	step_en = get_adc_step_mask(adc_dev);
@@ -301,7 +504,7 @@ static int tiadc_resume(struct device *dev)
 	restore &= ~(CNTRLREG_POWERDOWN);
 	tiadc_writel(adc_dev, REG_CTRL, restore);
 
-	tiadc_step_config(adc_dev);
+	tiadc_step_config(indio_dev);
 
 	return 0;
 }
diff --git a/include/linux/mfd/ti_am335x_tscadc.h b/include/linux/mfd/ti_am335x_tscadc.h
index db1791bb997a..7d98562b3b51 100644
--- a/include/linux/mfd/ti_am335x_tscadc.h
+++ b/include/linux/mfd/ti_am335x_tscadc.h
@@ -46,16 +46,24 @@
 /* Step Enable */
 #define STEPENB_MASK		(0x1FFFF << 0)
 #define STEPENB(val)		((val) << 0)
+#define ENB(val)			(1 << (val))
+#define STPENB_STEPENB		STEPENB(0x1FFFF)
+#define STPENB_STEPENB_TC	STEPENB(0x1FFF)
 
 /* IRQ enable */
 #define IRQENB_HW_PEN		BIT(0)
 #define IRQENB_FIFO0THRES	BIT(2)
+#define IRQENB_FIFO0OVRRUN	BIT(3)
+#define IRQENB_FIFO0UNDRFLW	BIT(4)
 #define IRQENB_FIFO1THRES	BIT(5)
+#define IRQENB_FIFO1OVRRUN	BIT(6)
+#define IRQENB_FIFO1UNDRFLW	BIT(7)
 #define IRQENB_PENUP		BIT(9)
 
 /* Step Configuration */
 #define STEPCONFIG_MODE_MASK	(3 << 0)
 #define STEPCONFIG_MODE(val)	((val) << 0)
+#define STEPCONFIG_MODE_SWCNT	STEPCONFIG_MODE(1)
 #define STEPCONFIG_MODE_HWSYNC	STEPCONFIG_MODE(2)
 #define STEPCONFIG_AVG_MASK	(7 << 2)
 #define STEPCONFIG_AVG(val)	((val) << 2)
@@ -124,6 +132,7 @@
 #define	MAX_CLK_DIV		7
 #define TOTAL_STEPS		16
 #define TOTAL_CHANNELS		8
+#define FIFO1_THRESHOLD		19
 
 /*
 * ADC runs at 3MHz, and it takes
-- 
cgit v1.2.3


From d2c3d072c4aded65f0632223cc0d3a8a2e577b3a Mon Sep 17 00:00:00 2001
From: Lars-Peter Clausen <lars@metafoo.de>
Date: Thu, 19 Sep 2013 13:59:00 +0100
Subject: iio: Add iio_push_buffers_with_timestamp() helper

Drivers using software buffers often store the timestamp in their data buffer
before calling iio_push_to_buffers() with that data buffer. Storing the
timestamp in the buffer usually involves some ugly pointer arithmetic. This
patch adds a new helper function called iio_push_buffers_with_timestamp() which
is similar to iio_push_to_buffers but takes an additional timestamp parameter.
The function will help to hide to uglyness in one central place instead of
exposing it in every driver. If timestamps are enabled for the IIO device
iio_push_buffers_with_timestamp() will store the timestamp as the last element
in buffer, before passing the buffer on to iio_push_buffers(). The buffer needs
large enough to hold the timestamp in this case. If timestamps are disabled
iio_push_buffers_with_timestamp() will behave just like iio_push_buffers().

Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
Cc: Oleksandr Kravchenko <o.v.kravchenko@globallogic.com>
Cc: Josh Wu <josh.wu@atmel.com>
Cc: Denis Ciocca <denis.ciocca@gmail.com>
Cc: Manuel Stahl <manuel.stahl@iis.fraunhofer.de>
Cc: Ge Gao <ggao@invensense.com>
Cc: Peter Meerwald <pmeerw@pmeerw.net>
Cc: Jacek Anaszewski <j.anaszewski@samsung.com>
Cc: Fabio Estevam <fabio.estevam@freescale.com>
Cc: Marek Vasut <marex@denx.de>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 include/linux/iio/buffer.h | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/iio/buffer.h b/include/linux/iio/buffer.h
index e5507e999ed1..a1124bdc4cac 100644
--- a/include/linux/iio/buffer.h
+++ b/include/linux/iio/buffer.h
@@ -122,6 +122,31 @@ int iio_scan_mask_set(struct iio_dev *indio_dev,
  */
 int iio_push_to_buffers(struct iio_dev *indio_dev, const void *data);
 
+/*
+ * iio_push_to_buffers_with_timestamp() - push data and timestamp to buffers
+ * @indio_dev:		iio_dev structure for device.
+ * @data:		sample data
+ * @timestamp:		timestamp for the sample data
+ *
+ * Pushes data to the IIO device's buffers. If timestamps are enabled for the
+ * device the function will store the supplied timestamp as the last element in
+ * the sample data buffer before pushing it to the device buffers. The sample
+ * data buffer needs to be large enough to hold the additional timestamp
+ * (usually the buffer should be indio->scan_bytes bytes large).
+ *
+ * Returns 0 on success, a negative error code otherwise.
+ */
+static inline int iio_push_to_buffers_with_timestamp(struct iio_dev *indio_dev,
+	void *data, int64_t timestamp)
+{
+	if (indio_dev->scan_timestamp) {
+		size_t ts_offset = indio_dev->scan_bytes / sizeof(int64_t) - 1;
+		((int64_t *)data)[ts_offset] = timestamp;
+	}
+
+	return iio_push_to_buffers(indio_dev, data);
+}
+
 int iio_update_demux(struct iio_dev *indio_dev);
 
 /**
-- 
cgit v1.2.3


From 3f2b9c9cdf389e303b2273679af08aab5f153517 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Tue, 17 Sep 2013 05:48:51 +0930
Subject: module: remove rmmod --wait option.

The option to wait for a module reference count to reach zero was in
the initial module implementation, but it was never supported in
modprobe (you had to use rmmod --wait).  After discussion with Lucas,
It has been deprecated (with a 10 second sleep) in kmod for the last
year.

This finally removes it: the flag will evoke a printk warning and a
normal (non-blocking) remove attempt.

Cc: Lucas De Marchi <lucas.de.marchi@gmail.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/module.h |  3 ---
 kernel/module.c        | 46 ++++++++--------------------------------------
 2 files changed, 8 insertions(+), 41 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/module.h b/include/linux/module.h
index 05f2447f8c15..15cd6b1b211e 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -367,9 +367,6 @@ struct module
 	/* What modules do I depend on? */
 	struct list_head target_list;
 
-	/* Who is waiting for us to be unloaded */
-	struct task_struct *waiter;
-
 	/* Destruction function. */
 	void (*exit)(void);
 
diff --git a/kernel/module.c b/kernel/module.c
index dc582749fa13..947105fd4cab 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -644,8 +644,6 @@ static int module_unload_init(struct module *mod)
 
 	/* Hold reference count during initialization. */
 	__this_cpu_write(mod->refptr->incs, 1);
-	/* Backwards compatibility macros put refcount during init. */
-	mod->waiter = current;
 
 	return 0;
 }
@@ -771,16 +769,9 @@ static int __try_stop_module(void *_sref)
 
 static int try_stop_module(struct module *mod, int flags, int *forced)
 {
-	if (flags & O_NONBLOCK) {
-		struct stopref sref = { mod, flags, forced };
+	struct stopref sref = { mod, flags, forced };
 
-		return stop_machine(__try_stop_module, &sref, NULL);
-	} else {
-		/* We don't need to stop the machine for this. */
-		mod->state = MODULE_STATE_GOING;
-		synchronize_sched();
-		return 0;
-	}
+	return stop_machine(__try_stop_module, &sref, NULL);
 }
 
 unsigned long module_refcount(struct module *mod)
@@ -813,21 +804,6 @@ EXPORT_SYMBOL(module_refcount);
 /* This exists whether we can unload or not */
 static void free_module(struct module *mod);
 
-static void wait_for_zero_refcount(struct module *mod)
-{
-	/* Since we might sleep for some time, release the mutex first */
-	mutex_unlock(&module_mutex);
-	for (;;) {
-		pr_debug("Looking at refcount...\n");
-		set_current_state(TASK_UNINTERRUPTIBLE);
-		if (module_refcount(mod) == 0)
-			break;
-		schedule();
-	}
-	current->state = TASK_RUNNING;
-	mutex_lock(&module_mutex);
-}
-
 SYSCALL_DEFINE2(delete_module, const char __user *, name_user,
 		unsigned int, flags)
 {
@@ -842,6 +818,11 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user,
 		return -EFAULT;
 	name[MODULE_NAME_LEN-1] = '\0';
 
+	if (!(flags & O_NONBLOCK)) {
+		printk(KERN_WARNING
+		       "waiting module removal not supported: please upgrade");
+	}
+
 	if (mutex_lock_interruptible(&module_mutex) != 0)
 		return -EINTR;
 
@@ -859,8 +840,7 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user,
 
 	/* Doing init or already dying? */
 	if (mod->state != MODULE_STATE_LIVE) {
-		/* FIXME: if (force), slam module count and wake up
-                   waiter --RR */
+		/* FIXME: if (force), slam module count damn the torpedoes */
 		pr_debug("%s already dying\n", mod->name);
 		ret = -EBUSY;
 		goto out;
@@ -876,18 +856,11 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user,
 		}
 	}
 
-	/* Set this up before setting mod->state */
-	mod->waiter = current;
-
 	/* Stop the machine so refcounts can't move and disable module. */
 	ret = try_stop_module(mod, flags, &forced);
 	if (ret != 0)
 		goto out;
 
-	/* Never wait if forced. */
-	if (!forced && module_refcount(mod) != 0)
-		wait_for_zero_refcount(mod);
-
 	mutex_unlock(&module_mutex);
 	/* Final destruction now no one is using it. */
 	if (mod->exit != NULL)
@@ -1005,9 +978,6 @@ void module_put(struct module *module)
 		__this_cpu_inc(module->refptr->decs);
 
 		trace_module_put(module, _RET_IP_);
-		/* Maybe they're waiting for us to drop reference? */
-		if (unlikely(!module_is_live(module)))
-			wake_up_process(module->waiter);
 		preempt_enable();
 	}
 }
-- 
cgit v1.2.3


From cd64647f043e3fd3569bcf068f47f030198ff93a Mon Sep 17 00:00:00 2001
From: Li Zefan <lizefan@huawei.com>
Date: Mon, 23 Sep 2013 16:43:58 +0800
Subject: hung_task: Change sysctl_hung_task_check_count to 'int'

As 'sysctl_hung_task_check_count' is 'unsigned long' when this
value is assigned to max_count in check_hung_uninterruptible_tasks(),
it's truncated to 'int' type.

This causes a minor artifact: if we write 2^32 to sysctl.hung_task_check_count,
hung task detection will be effectively disabled.

With this fix, it will still truncate the user input to 32 bits, but
reading sysctl.hung_task_check_count reflects the actual truncated value.

Signed-off-by: Li Zefan <lizefan@huawei.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Link: http://lkml.kernel.org/r/523FFF4E.9050401@huawei.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/sysctl.h | 2 +-
 kernel/hung_task.c           | 2 +-
 kernel/sysctl.c              | 5 +++--
 3 files changed, 5 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index bf8086b2506e..9552afa733d8 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -2,8 +2,8 @@
 #define _SCHED_SYSCTL_H
 
 #ifdef CONFIG_DETECT_HUNG_TASK
+extern int	     sysctl_hung_task_check_count;
 extern unsigned int  sysctl_hung_task_panic;
-extern unsigned long sysctl_hung_task_check_count;
 extern unsigned long sysctl_hung_task_timeout_secs;
 extern unsigned long sysctl_hung_task_warnings;
 extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index 3e97fb126e6b..042252383fd2 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -20,7 +20,7 @@
 /*
  * The number of tasks checked:
  */
-unsigned long __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT;
+int __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT;
 
 /*
  * Limit number of tasks checked in a batch.
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index b2f06f3c6a3f..b24ed7f87a14 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -962,9 +962,10 @@ static struct ctl_table kern_table[] = {
 	{
 		.procname	= "hung_task_check_count",
 		.data		= &sysctl_hung_task_check_count,
-		.maxlen		= sizeof(unsigned long),
+		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= proc_doulongvec_minmax,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &zero,
 	},
 	{
 		.procname	= "hung_task_timeout_secs",
-- 
cgit v1.2.3


From 47d06e532e95b71c0db3839ebdef3fe8812fca2c Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Tue, 10 Sep 2013 10:52:35 -0400
Subject: random: run random_int_secret_init() run after all late_initcalls

The some platforms (e.g., ARM) initializes their clocks as
late_initcalls for some unknown reason.  So make sure
random_int_secret_init() is run after all of the late_initcalls are
run.

Cc: stable@vger.kernel.org
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 drivers/char/random.c  | 3 +--
 include/linux/random.h | 1 +
 init/main.c            | 2 ++
 3 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 0d91fe52f3f5..92e6c67e1ae6 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -1462,12 +1462,11 @@ struct ctl_table random_table[] = {
 
 static u32 random_int_secret[MD5_MESSAGE_BYTES / 4] ____cacheline_aligned;
 
-static int __init random_int_secret_init(void)
+int random_int_secret_init(void)
 {
 	get_random_bytes(random_int_secret, sizeof(random_int_secret));
 	return 0;
 }
-late_initcall(random_int_secret_init);
 
 /*
  * Get a random word for internal kernel use only. Similar to urandom but
diff --git a/include/linux/random.h b/include/linux/random.h
index 3b9377d6b7a5..6312dd9ba449 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -17,6 +17,7 @@ extern void add_interrupt_randomness(int irq, int irq_flags);
 extern void get_random_bytes(void *buf, int nbytes);
 extern void get_random_bytes_arch(void *buf, int nbytes);
 void generate_random_uuid(unsigned char uuid_out[16]);
+extern int random_int_secret_init(void);
 
 #ifndef MODULE
 extern const struct file_operations random_fops, urandom_fops;
diff --git a/init/main.c b/init/main.c
index d03d2ec2eacf..586cd3359c02 100644
--- a/init/main.c
+++ b/init/main.c
@@ -75,6 +75,7 @@
 #include <linux/blkdev.h>
 #include <linux/elevator.h>
 #include <linux/sched_clock.h>
+#include <linux/random.h>
 
 #include <asm/io.h>
 #include <asm/bugs.h>
@@ -778,6 +779,7 @@ static void __init do_basic_setup(void)
 	do_ctors();
 	usermodehelper_enable();
 	do_initcalls();
+	random_int_secret_init();
 }
 
 static void __init do_pre_smp_initcalls(void)
-- 
cgit v1.2.3


From 2a855b644c310d5db5a80b8816c0c7748c167977 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Fri, 23 Aug 2013 09:40:42 -0700
Subject: rcu: Make list_splice_init_rcu() account for RCU readers

The list_splice_init_rcu() function allows a list visible to RCU readers
to be spliced into another list visible to RCU readers.  This is OK,
except for the use of INIT_LIST_HEAD(), which does pointer updates
without doing anything to make those updates safe for concurrent readers.

Of course, most of the time INIT_LIST_HEAD() is being used in reader-free
contexts, such as initialization or cleanup, so it is OK for it to update
pointers in an unsafe-for-RCU-readers manner.  This commit therefore
creates an INIT_LIST_HEAD_RCU() that uses ACCESS_ONCE() to make the updates
reader-safe.  The reason that we can use ACCESS_ONCE() instead of the more
typical rcu_assign_pointer() is that list_splice_init_rcu() is updating the
pointers to reference something that is already visible to readers, so
that there is no problem with pre-initialized values.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rculist.h | 23 +++++++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index 4106721c4e5e..45a0a9e81478 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -18,6 +18,21 @@
  * be used anywhere you would want to use a list_empty_rcu().
  */
 
+/*
+ * INIT_LIST_HEAD_RCU - Initialize a list_head visible to RCU readers
+ * @list: list to be initialized
+ *
+ * You should instead use INIT_LIST_HEAD() for normal initialization and
+ * cleanup tasks, when readers have no access to the list being initialized.
+ * However, if the list being initialized is visible to readers, you
+ * need to keep the compiler from being too mischievous.
+ */
+static inline void INIT_LIST_HEAD_RCU(struct list_head *list)
+{
+	ACCESS_ONCE(list->next) = list;
+	ACCESS_ONCE(list->prev) = list;
+}
+
 /*
  * return the ->next pointer of a list_head in an rcu safe
  * way, we must not access it directly
@@ -191,9 +206,13 @@ static inline void list_splice_init_rcu(struct list_head *list,
 	if (list_empty(list))
 		return;
 
-	/* "first" and "last" tracking list, so initialize it. */
+	/*
+	 * "first" and "last" tracking list, so initialize it.  RCU readers
+	 * have access to this list, so we must use INIT_LIST_HEAD_RCU()
+	 * instead of INIT_LIST_HEAD().
+	 */
 
-	INIT_LIST_HEAD(list);
+	INIT_LIST_HEAD_RCU(list);
 
 	/*
 	 * At this point, the list body still points to the source list.
-- 
cgit v1.2.3


From 774487611c949e6d194877e7147f6eeeec092b53 Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Mon, 16 Sep 2013 17:02:00 +0100
Subject: iio: pressure-core: st: Provide support for the Vdd power supply

The power to some of the sensors are controlled by regulators. In most
cases these are 'always on', but if not they will fail to work until
the regulator is enabled using the relevant APIs. This patch allows for
the Vdd power supply to be specified by either platform data or Device
Tree.

Signed-off-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/pressure/st_pressure_core.c | 28 ++++++++++++++++++++++++++++
 include/linux/iio/common/st_sensors.h   |  3 +++
 2 files changed, 31 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/iio/pressure/st_pressure_core.c b/drivers/iio/pressure/st_pressure_core.c
index 2da411b6925b..baeab109f41d 100644
--- a/drivers/iio/pressure/st_pressure_core.c
+++ b/drivers/iio/pressure/st_pressure_core.c
@@ -23,6 +23,7 @@
 #include <linux/iio/sysfs.h>
 #include <linux/iio/trigger.h>
 #include <linux/iio/buffer.h>
+#include <linux/regulator/consumer.h>
 #include <asm/unaligned.h>
 
 #include <linux/iio/common/st_sensors.h>
@@ -313,6 +314,29 @@ static const struct iio_trigger_ops st_press_trigger_ops = {
 #define ST_PRESS_TRIGGER_OPS NULL
 #endif
 
+static void st_press_power_enable(struct iio_dev *indio_dev)
+{
+	struct st_sensor_data *pdata = iio_priv(indio_dev);
+	int err;
+
+	/* Regulators not mandatory, but if requested we should enable it. */
+	pdata->vdd = devm_regulator_get_optional(&indio_dev->dev, "vdd");
+	if (!IS_ERR(pdata->vdd)) {
+		err = regulator_enable(pdata->vdd);
+		if (err != 0)
+			dev_warn(&indio_dev->dev,
+				 "Failed to enable specified Vdd supply\n");
+	}
+}
+
+static void st_press_power_disable(struct iio_dev *indio_dev)
+{
+	struct st_sensor_data *pdata = iio_priv(indio_dev);
+
+	if (!IS_ERR(pdata->vdd))
+		regulator_disable(pdata->vdd);
+}
+
 int st_press_common_probe(struct iio_dev *indio_dev,
 				struct st_sensors_platform_data *plat_data)
 {
@@ -323,6 +347,8 @@ int st_press_common_probe(struct iio_dev *indio_dev,
 	indio_dev->modes = INDIO_DIRECT_MODE;
 	indio_dev->info = &press_info;
 
+	st_press_power_enable(indio_dev);
+
 	err = st_sensors_check_device_support(indio_dev,
 					      ARRAY_SIZE(st_press_sensors),
 					      st_press_sensors);
@@ -380,6 +406,8 @@ void st_press_common_remove(struct iio_dev *indio_dev)
 {
 	struct st_sensor_data *pdata = iio_priv(indio_dev);
 
+	st_press_power_disable(indio_dev);
+
 	iio_device_unregister(indio_dev);
 	if (pdata->get_irq_data_ready(indio_dev) > 0)
 		st_sensors_deallocate_trigger(indio_dev);
diff --git a/include/linux/iio/common/st_sensors.h b/include/linux/iio/common/st_sensors.h
index e732fda6c8e6..968b84e5f380 100644
--- a/include/linux/iio/common/st_sensors.h
+++ b/include/linux/iio/common/st_sensors.h
@@ -16,6 +16,7 @@
 #include <linux/irqreturn.h>
 #include <linux/iio/trigger.h>
 #include <linux/bitops.h>
+#include <linux/regulator/consumer.h>
 
 #include <linux/platform_data/st_sensors_pdata.h>
 
@@ -201,6 +202,7 @@ struct st_sensors {
  * @trig: The trigger in use by the core driver.
  * @sensor: Pointer to the current sensor struct in use.
  * @current_fullscale: Maximum range of measure by the sensor.
+ * @vdd: Pointer to sensor's Vdd power supply
  * @enabled: Status of the sensor (false->off, true->on).
  * @multiread_bit: Use or not particular bit for [I2C/SPI] multiread.
  * @buffer_data: Data used by buffer part.
@@ -216,6 +218,7 @@ struct st_sensor_data {
 	struct iio_trigger *trig;
 	struct st_sensors *sensor;
 	struct st_sensor_fullscale_avl *current_fullscale;
+	struct regulator *vdd;
 
 	bool enabled;
 	bool multiread_bit;
-- 
cgit v1.2.3


From 71e1980c8d465fd304d867d36f2246b72513efed Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Mon, 16 Sep 2013 17:02:00 +0100
Subject: iio: pressure-core: st: Provide support for the Vdd_IO power supply

The power to some of the sensors are controlled by regulators. In most
cases these are 'always on', but if not they will fail to work until
the regulator is enabled using the relevant APIs. This patch allows for
the Vdd_IO power supply to be specified by either platform data or
Device Tree.

Signed-off-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/pressure/st_pressure_core.c | 13 ++++++++++++-
 include/linux/iio/common/st_sensors.h   |  2 ++
 2 files changed, 14 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/iio/pressure/st_pressure_core.c b/drivers/iio/pressure/st_pressure_core.c
index baeab109f41d..58083f9d51c5 100644
--- a/drivers/iio/pressure/st_pressure_core.c
+++ b/drivers/iio/pressure/st_pressure_core.c
@@ -319,7 +319,7 @@ static void st_press_power_enable(struct iio_dev *indio_dev)
 	struct st_sensor_data *pdata = iio_priv(indio_dev);
 	int err;
 
-	/* Regulators not mandatory, but if requested we should enable it. */
+	/* Regulators not mandatory, but if requested we should enable them. */
 	pdata->vdd = devm_regulator_get_optional(&indio_dev->dev, "vdd");
 	if (!IS_ERR(pdata->vdd)) {
 		err = regulator_enable(pdata->vdd);
@@ -327,6 +327,14 @@ static void st_press_power_enable(struct iio_dev *indio_dev)
 			dev_warn(&indio_dev->dev,
 				 "Failed to enable specified Vdd supply\n");
 	}
+
+	pdata->vdd_io = devm_regulator_get_optional(&indio_dev->dev, "vddio");
+	if (!IS_ERR(pdata->vdd_io)) {
+		err = regulator_enable(pdata->vdd_io);
+		if (err != 0)
+			dev_warn(&indio_dev->dev,
+				 "Failed to enable specified Vdd_IO supply\n");
+	}
 }
 
 static void st_press_power_disable(struct iio_dev *indio_dev)
@@ -335,6 +343,9 @@ static void st_press_power_disable(struct iio_dev *indio_dev)
 
 	if (!IS_ERR(pdata->vdd))
 		regulator_disable(pdata->vdd);
+
+	if (!IS_ERR(pdata->vdd_io))
+		regulator_disable(pdata->vdd_io);
 }
 
 int st_press_common_probe(struct iio_dev *indio_dev,
diff --git a/include/linux/iio/common/st_sensors.h b/include/linux/iio/common/st_sensors.h
index 968b84e5f380..3c005eb3a0a4 100644
--- a/include/linux/iio/common/st_sensors.h
+++ b/include/linux/iio/common/st_sensors.h
@@ -203,6 +203,7 @@ struct st_sensors {
  * @sensor: Pointer to the current sensor struct in use.
  * @current_fullscale: Maximum range of measure by the sensor.
  * @vdd: Pointer to sensor's Vdd power supply
+ * @vdd_io: Pointer to sensor's Vdd-IO power supply
  * @enabled: Status of the sensor (false->off, true->on).
  * @multiread_bit: Use or not particular bit for [I2C/SPI] multiread.
  * @buffer_data: Data used by buffer part.
@@ -219,6 +220,7 @@ struct st_sensor_data {
 	struct st_sensors *sensor;
 	struct st_sensor_fullscale_avl *current_fullscale;
 	struct regulator *vdd;
+	struct regulator *vdd_io;
 
 	bool enabled;
 	bool multiread_bit;
-- 
cgit v1.2.3


From eeb446581ba23a5a36b4f5c7bfa2b1f8f7c9fb66 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nicolas.pitre@linaro.org>
Date: Wed, 28 Nov 2012 18:17:25 -0500
Subject: ARM: GIC: function to retrieve the physical address of the SGIR

In order to have early assembly code signal other CPUs in the system,
we need to get the physical address for the SGIR register used to
send IPIs.  Because the register will be used with a precomputed CPU
interface ID number, there is no need for any locking in the assembly
code where this register is written to.

Signed-off-by: Nicolas Pitre <nico@linaro.org>
---
 drivers/irqchip/irq-gic.c       | 29 +++++++++++++++++++++++++++++
 include/linux/irqchip/arm-gic.h |  1 +
 2 files changed, 30 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index 6365b59181ee..09fdf3d574cd 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -769,6 +769,33 @@ void gic_migrate_target(unsigned int new_cpu_id)
 		}
 	}
 }
+
+/*
+ * gic_get_sgir_physaddr - get the physical address for the SGI register
+ *
+ * REturn the physical address of the SGI register to be used
+ * by some early assembly code when the kernel is not yet available.
+ */
+static unsigned long gic_dist_physaddr;
+
+unsigned long gic_get_sgir_physaddr(void)
+{
+	if (!gic_dist_physaddr)
+		return 0;
+	return gic_dist_physaddr + GIC_DIST_SOFTINT;
+}
+
+void __init gic_init_physaddr(struct device_node *node)
+{
+	struct resource res;
+	if (of_address_to_resource(node, 0, &res) == 0) {
+		gic_dist_physaddr = res.start;
+		pr_info("GIC physical location is %#lx\n", gic_dist_physaddr);
+	}
+}
+
+#else
+#define gic_init_physaddr(node)  do { } while (0)
 #endif
 
 static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq,
@@ -952,6 +979,8 @@ int __init gic_of_init(struct device_node *node, struct device_node *parent)
 		percpu_offset = 0;
 
 	gic_init_bases(gic_cnt, -1, dist_base, cpu_base, percpu_offset, node);
+	if (!gic_cnt)
+		gic_init_physaddr(node);
 
 	if (parent) {
 		irq = irq_of_parse_and_map(node, 0);
diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h
index 46544e381bf9..dc30835099e3 100644
--- a/include/linux/irqchip/arm-gic.h
+++ b/include/linux/irqchip/arm-gic.h
@@ -78,6 +78,7 @@ static inline void gic_init(unsigned int nr, int start,
 
 int gic_get_cpu_id(unsigned int cpu);
 void gic_migrate_target(unsigned int new_cpu_id);
+unsigned long gic_get_sgir_physaddr(void);
 
 #endif /* __ASSEMBLY */
 
-- 
cgit v1.2.3


From 14d2ca615a85e2dbc744c12c296affd35f119fa7 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nicolas.pitre@linaro.org>
Date: Wed, 28 Nov 2012 18:48:19 -0500
Subject: ARM: GIC: interface to send a SGI directly

The regular gic_raise_softirq() takes as input a CPU mask which is not
adequate when we need to send an IPI to a CPU which is not represented
in the kernel to GIC mapping.  That is the case with the b.L switcher
when GIC migration to the inbound CPU has not yet occurred.

Signed-off-by: Nicolas Pitre <nico@linaro.org>
---
 drivers/irqchip/irq-gic.c       | 14 ++++++++++++++
 include/linux/irqchip/arm-gic.h |  1 +
 2 files changed, 15 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index 09fdf3d574cd..9031171c141b 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -673,6 +673,20 @@ void gic_raise_softirq(const struct cpumask *mask, unsigned int irq)
 #endif
 
 #ifdef CONFIG_BL_SWITCHER
+/*
+ * gic_send_sgi - send a SGI directly to given CPU interface number
+ *
+ * cpu_id: the ID for the destination CPU interface
+ * irq: the IPI number to send a SGI for
+ */
+void gic_send_sgi(unsigned int cpu_id, unsigned int irq)
+{
+	BUG_ON(cpu_id >= NR_GIC_CPU_IF);
+	cpu_id = 1 << cpu_id;
+	/* this always happens on GIC0 */
+	writel_relaxed((cpu_id << 16) | irq, gic_data_dist_base(&gic_data[0]) + GIC_DIST_SOFTINT);
+}
+
 /*
  * gic_get_cpu_id - get the CPU interface ID for the specified CPU
  *
diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h
index dc30835099e3..cac496b1e279 100644
--- a/include/linux/irqchip/arm-gic.h
+++ b/include/linux/irqchip/arm-gic.h
@@ -76,6 +76,7 @@ static inline void gic_init(unsigned int nr, int start,
 	gic_init_bases(nr, start, dist, cpu, 0, NULL);
 }
 
+void gic_send_sgi(unsigned int cpu_id, unsigned int irq);
 int gic_get_cpu_id(unsigned int cpu);
 void gic_migrate_target(unsigned int new_cpu_id);
 unsigned long gic_get_sgir_physaddr(void);
-- 
cgit v1.2.3


From d536bf3dc97417471e2c5098837a1cddd7fbb3c7 Mon Sep 17 00:00:00 2001
From: Jiang Liu <jiang.liu@huawei.com>
Date: Mon, 2 Sep 2013 11:57:35 +0800
Subject: ACPI / processor: use apic_id and remove duplicated _MAT evaluation

Since APIC id is saved in processor struct, just use it and
remove the duplicated _MAT evaluation.

Signed-off-by: Jiang Liu <jiang.liu@huawei.com>
Signed-off-by: Hanjun Guo <hanjun.guo@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 arch/ia64/kernel/acpi.c       | 38 ++++----------------------------------
 arch/x86/kernel/acpi/boot.c   | 38 +++-----------------------------------
 drivers/acpi/acpi_processor.c |  2 +-
 include/linux/acpi.h          |  2 +-
 4 files changed, 9 insertions(+), 71 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
index 5eb71d22c3d5..59d52e3aef12 100644
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -882,40 +882,10 @@ __init void prefill_possible_map(void)
 		set_cpu_possible(i, true);
 }
 
-static int _acpi_map_lsapic(acpi_handle handle, int *pcpu)
+static int _acpi_map_lsapic(acpi_handle handle, int physid, int *pcpu)
 {
-	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
-	union acpi_object *obj;
-	struct acpi_madt_local_sapic *lsapic;
 	cpumask_t tmp_map;
-	int cpu, physid;
-
-	if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer)))
-		return -EINVAL;
-
-	if (!buffer.length || !buffer.pointer)
-		return -EINVAL;
-
-	obj = buffer.pointer;
-	if (obj->type != ACPI_TYPE_BUFFER)
-	{
-		kfree(buffer.pointer);
-		return -EINVAL;
-	}
-
-	lsapic = (struct acpi_madt_local_sapic *)obj->buffer.pointer;
-
-	if ((lsapic->header.type != ACPI_MADT_TYPE_LOCAL_SAPIC) ||
-	    (!(lsapic->lapic_flags & ACPI_MADT_ENABLED))) {
-		kfree(buffer.pointer);
-		return -EINVAL;
-	}
-
-	physid = ((lsapic->id << 8) | (lsapic->eid));
-
-	kfree(buffer.pointer);
-	buffer.length = ACPI_ALLOCATE_BUFFER;
-	buffer.pointer = NULL;
+	int cpu;
 
 	cpumask_complement(&tmp_map, cpu_present_mask);
 	cpu = cpumask_first(&tmp_map);
@@ -934,9 +904,9 @@ static int _acpi_map_lsapic(acpi_handle handle, int *pcpu)
 }
 
 /* wrapper to silence section mismatch warning */
-int __ref acpi_map_lsapic(acpi_handle handle, int *pcpu)
+int __ref acpi_map_lsapic(acpi_handle handle, int physid, int *pcpu)
 {
-	return _acpi_map_lsapic(handle, pcpu);
+	return _acpi_map_lsapic(handle, physid, pcpu);
 }
 EXPORT_SYMBOL(acpi_map_lsapic);
 
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 40c76604199f..f6dbb2f5e39f 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -614,44 +614,12 @@ static void acpi_map_cpu2node(acpi_handle handle, int cpu, int physid)
 #endif
 }
 
-static int _acpi_map_lsapic(acpi_handle handle, int *pcpu)
+static int _acpi_map_lsapic(acpi_handle handle, int physid, int *pcpu)
 {
-	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
-	union acpi_object *obj;
-	struct acpi_madt_local_apic *lapic;
 	cpumask_var_t tmp_map, new_map;
-	u8 physid;
 	int cpu;
 	int retval = -ENOMEM;
 
-	if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer)))
-		return -EINVAL;
-
-	if (!buffer.length || !buffer.pointer)
-		return -EINVAL;
-
-	obj = buffer.pointer;
-	if (obj->type != ACPI_TYPE_BUFFER ||
-	    obj->buffer.length < sizeof(*lapic)) {
-		kfree(buffer.pointer);
-		return -EINVAL;
-	}
-
-	lapic = (struct acpi_madt_local_apic *)obj->buffer.pointer;
-
-	if (lapic->header.type != ACPI_MADT_TYPE_LOCAL_APIC ||
-	    !(lapic->lapic_flags & ACPI_MADT_ENABLED)) {
-		kfree(buffer.pointer);
-		return -EINVAL;
-	}
-
-	physid = lapic->id;
-
-	kfree(buffer.pointer);
-	buffer.length = ACPI_ALLOCATE_BUFFER;
-	buffer.pointer = NULL;
-	lapic = NULL;
-
 	if (!alloc_cpumask_var(&tmp_map, GFP_KERNEL))
 		goto out;
 
@@ -689,9 +657,9 @@ out:
 }
 
 /* wrapper to silence section mismatch warning */
-int __ref acpi_map_lsapic(acpi_handle handle, int *pcpu)
+int __ref acpi_map_lsapic(acpi_handle handle, int physid, int *pcpu)
 {
-	return _acpi_map_lsapic(handle, pcpu);
+	return _acpi_map_lsapic(handle, physid, pcpu);
 }
 EXPORT_SYMBOL(acpi_map_lsapic);
 
diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c
index f89f914cb97e..66c9b702894b 100644
--- a/drivers/acpi/acpi_processor.c
+++ b/drivers/acpi/acpi_processor.c
@@ -181,7 +181,7 @@ static int acpi_processor_hotadd_init(struct acpi_processor *pr)
 	cpu_maps_update_begin();
 	cpu_hotplug_begin();
 
-	ret = acpi_map_lsapic(pr->handle, &pr->id);
+	ret = acpi_map_lsapic(pr->handle, pr->apic_id, &pr->id);
 	if (ret)
 		goto out;
 
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index a5db4aeefa36..3bc74141453f 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -116,7 +116,7 @@ void acpi_numa_arch_fixup(void);
 
 #ifdef CONFIG_ACPI_HOTPLUG_CPU
 /* Arch dependent functions for cpu hotplug support */
-int acpi_map_lsapic(acpi_handle handle, int *pcpu);
+int acpi_map_lsapic(acpi_handle handle, int physid, int *pcpu);
 int acpi_unmap_lsapic(int cpu);
 #endif /* CONFIG_ACPI_HOTPLUG_CPU */
 
-- 
cgit v1.2.3


From dedf1e4dfd5477b4315ad451b4be0ff8d9f7e85f Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Thu, 5 Sep 2013 15:05:53 -0600
Subject: ACPI: Write _OSC bit field definitions in hex

Update _OSC definition comments to correspond to the 1-based spec wording
(DWORD 1, etc.)  Write _OSC field #defines as hex to make clear that they
are bits in a 32-bit DWORD, not arbitrary values.  No functional change.

Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/acpi.h | 54 ++++++++++++++++++++++++++--------------------------
 1 file changed, 27 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index a5db4aeefa36..164ba10ddcb3 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -304,39 +304,39 @@ struct acpi_osc_context {
 #define OSC_SUPPORT_TYPE 		1
 #define OSC_CONTROL_TYPE		2
 
-/* _OSC DW0 Definition */
-#define OSC_QUERY_ENABLE		1
-#define OSC_REQUEST_ERROR		2
-#define OSC_INVALID_UUID_ERROR		4
-#define OSC_INVALID_REVISION_ERROR	8
-#define OSC_CAPABILITIES_MASK_ERROR	16
+/* _OSC Capabilities DWORD 1: Query/Control and Error Returns (generic) */
+#define OSC_QUERY_ENABLE			0x00000001  /* input */
+#define OSC_REQUEST_ERROR			0x00000002  /* return */
+#define OSC_INVALID_UUID_ERROR			0x00000004  /* return */
+#define OSC_INVALID_REVISION_ERROR		0x00000008  /* return */
+#define OSC_CAPABILITIES_MASK_ERROR		0x00000010  /* return */
 
 acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context);
 
-/* platform-wide _OSC bits */
-#define OSC_SB_PAD_SUPPORT		1
-#define OSC_SB_PPC_OST_SUPPORT		2
-#define OSC_SB_PR3_SUPPORT		4
-#define OSC_SB_HOTPLUG_OST_SUPPORT	8
-#define OSC_SB_APEI_SUPPORT		16
+/* Platform-Wide Capabilities _OSC: Capabilities DWORD 2: Support Field */
+#define OSC_SB_PAD_SUPPORT			0x00000001
+#define OSC_SB_PPC_OST_SUPPORT			0x00000002
+#define OSC_SB_PR3_SUPPORT			0x00000004
+#define OSC_SB_HOTPLUG_OST_SUPPORT		0x00000008
+#define OSC_SB_APEI_SUPPORT			0x00000010
+#define OSC_SB_CPC_SUPPORT			0x00000020
 
 extern bool osc_sb_apei_support_acked;
 
-/* PCI defined _OSC bits */
-/* _OSC DW1 Definition (OS Support Fields) */
-#define OSC_EXT_PCI_CONFIG_SUPPORT		1
-#define OSC_ACTIVE_STATE_PWR_SUPPORT 		2
-#define OSC_CLOCK_PWR_CAPABILITY_SUPPORT	4
-#define OSC_PCI_SEGMENT_GROUPS_SUPPORT		8
-#define OSC_MSI_SUPPORT				16
-#define OSC_PCI_SUPPORT_MASKS			0x1f
-
-/* _OSC DW1 Definition (OS Control Fields) */
-#define OSC_PCI_EXPRESS_NATIVE_HP_CONTROL	1
-#define OSC_SHPC_NATIVE_HP_CONTROL 		2
-#define OSC_PCI_EXPRESS_PME_CONTROL		4
-#define OSC_PCI_EXPRESS_AER_CONTROL		8
-#define OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL	16
+/* PCI Host Bridge _OSC: Capabilities DWORD 2: Support Field */
+#define OSC_EXT_PCI_CONFIG_SUPPORT		0x00000001
+#define OSC_ACTIVE_STATE_PWR_SUPPORT		0x00000002
+#define OSC_CLOCK_PWR_CAPABILITY_SUPPORT	0x00000004
+#define OSC_PCI_SEGMENT_GROUPS_SUPPORT		0x00000008
+#define OSC_MSI_SUPPORT				0x00000010
+#define OSC_PCI_SUPPORT_MASKS			0x0000001f
+
+/* PCI Host Bridge _OSC: Capabilities DWORD 3: Control Field */
+#define OSC_PCI_EXPRESS_NATIVE_HP_CONTROL	0x00000001
+#define OSC_SHPC_NATIVE_HP_CONTROL		0x00000002
+#define OSC_PCI_EXPRESS_PME_CONTROL		0x00000004
+#define OSC_PCI_EXPRESS_AER_CONTROL		0x00000008
+#define OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL	0x00000010
 
 #define OSC_PCI_CONTROL_MASKS 	(OSC_PCI_EXPRESS_NATIVE_HP_CONTROL | 	\
 				OSC_SHPC_NATIVE_HP_CONTROL | 		\
-- 
cgit v1.2.3


From b938a229c85a567de7dba2d806d9f63a7c90483e Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Thu, 5 Sep 2013 15:05:54 -0600
Subject: ACPI: Rename OSC_QUERY_TYPE to OSC_QUERY_DWORD

OSC_QUERY_TYPE isn't a "type"; it's an index into the _OSC Capabilities
Buffer of DWORDs.  Rename OSC_QUERY_TYPE, OSC_SUPPORT_TYPE, and
OSC_CONTROL_TYPE to OSC_QUERY_DWORD, etc., to make this clear.
No functional change.

Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/apei/apei-base.c |  6 +++---
 drivers/acpi/bus.c            | 18 +++++++++---------
 drivers/acpi/pci_root.c       | 14 +++++++-------
 include/linux/acpi.h          |  7 ++++---
 4 files changed, 23 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/apei/apei-base.c b/drivers/acpi/apei/apei-base.c
index 46f80e2c92f7..6d2c49b86b7f 100644
--- a/drivers/acpi/apei/apei-base.c
+++ b/drivers/acpi/apei/apei-base.c
@@ -758,9 +758,9 @@ int apei_osc_setup(void)
 		.cap.pointer	= capbuf,
 	};
 
-	capbuf[OSC_QUERY_TYPE] = OSC_QUERY_ENABLE;
-	capbuf[OSC_SUPPORT_TYPE] = 1;
-	capbuf[OSC_CONTROL_TYPE] = 0;
+	capbuf[OSC_QUERY_DWORD] = OSC_QUERY_ENABLE;
+	capbuf[OSC_SUPPORT_DWORD] = 1;
+	capbuf[OSC_CONTROL_DWORD] = 0;
 
 	if (ACPI_FAILURE(acpi_get_handle(NULL, "\\_SB", &handle))
 	    || ACPI_FAILURE(acpi_run_osc(handle, &context)))
diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index b587ec8257b2..fbcfaa682c15 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -255,7 +255,7 @@ acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context)
 			acpi_print_osc_error(handle, context,
 				"_OSC invalid revision");
 		if (errors & OSC_CAPABILITIES_MASK_ERROR) {
-			if (((u32 *)context->cap.pointer)[OSC_QUERY_TYPE]
+			if (((u32 *)context->cap.pointer)[OSC_QUERY_DWORD]
 			    & OSC_QUERY_ENABLE)
 				goto out_success;
 			status = AE_SUPPORT;
@@ -295,30 +295,30 @@ static void acpi_bus_osc_support(void)
 	};
 	acpi_handle handle;
 
-	capbuf[OSC_QUERY_TYPE] = OSC_QUERY_ENABLE;
-	capbuf[OSC_SUPPORT_TYPE] = OSC_SB_PR3_SUPPORT; /* _PR3 is in use */
+	capbuf[OSC_QUERY_DWORD] = OSC_QUERY_ENABLE;
+	capbuf[OSC_SUPPORT_DWORD] = OSC_SB_PR3_SUPPORT; /* _PR3 is in use */
 #if defined(CONFIG_ACPI_PROCESSOR_AGGREGATOR) ||\
 			defined(CONFIG_ACPI_PROCESSOR_AGGREGATOR_MODULE)
-	capbuf[OSC_SUPPORT_TYPE] |= OSC_SB_PAD_SUPPORT;
+	capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_PAD_SUPPORT;
 #endif
 
 #if defined(CONFIG_ACPI_PROCESSOR) || defined(CONFIG_ACPI_PROCESSOR_MODULE)
-	capbuf[OSC_SUPPORT_TYPE] |= OSC_SB_PPC_OST_SUPPORT;
+	capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_PPC_OST_SUPPORT;
 #endif
 
 #ifdef ACPI_HOTPLUG_OST
-	capbuf[OSC_SUPPORT_TYPE] |= OSC_SB_HOTPLUG_OST_SUPPORT;
+	capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_HOTPLUG_OST_SUPPORT;
 #endif
 
 	if (!ghes_disable)
-		capbuf[OSC_SUPPORT_TYPE] |= OSC_SB_APEI_SUPPORT;
+		capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_APEI_SUPPORT;
 	if (ACPI_FAILURE(acpi_get_handle(NULL, "\\_SB", &handle)))
 		return;
 	if (ACPI_SUCCESS(acpi_run_osc(handle, &context))) {
 		u32 *capbuf_ret = context.ret.pointer;
-		if (context.ret.length > OSC_SUPPORT_TYPE)
+		if (context.ret.length > OSC_SUPPORT_DWORD)
 			osc_sb_apei_support_acked =
-				capbuf_ret[OSC_SUPPORT_TYPE] & OSC_SB_APEI_SUPPORT;
+				capbuf_ret[OSC_SUPPORT_DWORD] & OSC_SB_APEI_SUPPORT;
 		kfree(context.ret.pointer);
 	}
 	/* do we need to check other returned cap? Sounds no */
diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
index d3874f425653..323afd7ccfbf 100644
--- a/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c
@@ -158,14 +158,14 @@ static acpi_status acpi_pci_query_osc(struct acpi_pci_root *root,
 	support &= OSC_PCI_SUPPORT_MASKS;
 	support |= root->osc_support_set;
 
-	capbuf[OSC_QUERY_TYPE] = OSC_QUERY_ENABLE;
-	capbuf[OSC_SUPPORT_TYPE] = support;
+	capbuf[OSC_QUERY_DWORD] = OSC_QUERY_ENABLE;
+	capbuf[OSC_SUPPORT_DWORD] = support;
 	if (control) {
 		*control &= OSC_PCI_CONTROL_MASKS;
-		capbuf[OSC_CONTROL_TYPE] = *control | root->osc_control_set;
+		capbuf[OSC_CONTROL_DWORD] = *control | root->osc_control_set;
 	} else {
 		/* Run _OSC query only with existing controls. */
-		capbuf[OSC_CONTROL_TYPE] = root->osc_control_set;
+		capbuf[OSC_CONTROL_DWORD] = root->osc_control_set;
 	}
 
 	status = acpi_pci_run_osc(root->device->handle, capbuf, &result);
@@ -357,9 +357,9 @@ acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 *mask, u32 req)
 		goto out;
 	}
 
-	capbuf[OSC_QUERY_TYPE] = 0;
-	capbuf[OSC_SUPPORT_TYPE] = root->osc_support_set;
-	capbuf[OSC_CONTROL_TYPE] = ctrl;
+	capbuf[OSC_QUERY_DWORD] = 0;
+	capbuf[OSC_SUPPORT_DWORD] = root->osc_support_set;
+	capbuf[OSC_CONTROL_DWORD] = ctrl;
 	status = acpi_pci_run_osc(handle, capbuf, mask);
 	if (ACPI_SUCCESS(status))
 		root->osc_control_set = *mask;
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 164ba10ddcb3..d220d1465d9b 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -300,9 +300,10 @@ struct acpi_osc_context {
 	struct acpi_buffer ret; /* free by caller if success */
 };
 
-#define OSC_QUERY_TYPE			0
-#define OSC_SUPPORT_TYPE 		1
-#define OSC_CONTROL_TYPE		2
+/* Indexes into _OSC Capabilities Buffer (DWORDs 2 & 3 are device-specific) */
+#define OSC_QUERY_DWORD				0	/* DWORD 1 */
+#define OSC_SUPPORT_DWORD			1	/* DWORD 2 */
+#define OSC_CONTROL_DWORD			2	/* DWORD 3 */
 
 /* _OSC Capabilities DWORD 1: Query/Control and Error Returns (generic) */
 #define OSC_QUERY_ENABLE			0x00000001  /* input */
-- 
cgit v1.2.3


From c8678473609b0271ffa0963af82e575312e882cb Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Thu, 5 Sep 2013 15:05:55 -0600
Subject: ACPI: Tidy acpi_run_osc() declarations

Move the acpi_run_osc() prototype next to the related structure and
update comments.  No functional change.

Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/acpi.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index d220d1465d9b..a2f501c0a4f4 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -294,12 +294,14 @@ void __init acpi_nvs_nosave_s3(void);
 #endif /* CONFIG_PM_SLEEP */
 
 struct acpi_osc_context {
-	char *uuid_str; /* uuid string */
+	char *uuid_str;			/* UUID string */
 	int rev;
-	struct acpi_buffer cap; /* arg2/arg3 */
-	struct acpi_buffer ret; /* free by caller if success */
+	struct acpi_buffer cap;		/* list of DWORD capabilities */
+	struct acpi_buffer ret;		/* free by caller if success */
 };
 
+acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context);
+
 /* Indexes into _OSC Capabilities Buffer (DWORDs 2 & 3 are device-specific) */
 #define OSC_QUERY_DWORD				0	/* DWORD 1 */
 #define OSC_SUPPORT_DWORD			1	/* DWORD 2 */
@@ -312,8 +314,6 @@ struct acpi_osc_context {
 #define OSC_INVALID_REVISION_ERROR		0x00000008  /* return */
 #define OSC_CAPABILITIES_MASK_ERROR		0x00000010  /* return */
 
-acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context);
-
 /* Platform-Wide Capabilities _OSC: Capabilities DWORD 2: Support Field */
 #define OSC_SB_PAD_SUPPORT			0x00000001
 #define OSC_SB_PPC_OST_SUPPORT			0x00000002
-- 
cgit v1.2.3


From b4481934167430d82ea9c857cf95276460e4b607 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Thu, 5 Sep 2013 15:22:11 -0600
Subject: ACPI: Remove unused OSC_PCI_NATIVE_HOTPLUG

OSC_PCI_NATIVE_HOTPLUG is completely unused, so remove it.  No functional
change.

Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/acpi.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index a2f501c0a4f4..ca072e3927a2 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -345,9 +345,6 @@ extern bool osc_sb_apei_support_acked;
 				OSC_PCI_EXPRESS_AER_CONTROL |		\
 				OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL)
 
-#define OSC_PCI_NATIVE_HOTPLUG	(OSC_PCI_EXPRESS_NATIVE_HP_CONTROL |	\
-				OSC_SHPC_NATIVE_HP_CONTROL)
-
 extern acpi_status acpi_pci_osc_control_set(acpi_handle handle,
 					     u32 *mask, u32 req);
 
-- 
cgit v1.2.3


From 335b15097d571007b125eb9fe4ef1f84e61bd31d Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Thu, 5 Sep 2013 15:24:24 -0600
Subject: ACPI: Write OSC_PCI_CONTROL_MASKS like OSC_PCI_SUPPORT_MASKS

We write OSC_PCI_SUPPORT_MASKS as a simple 0x1f, so do the same
for OSC_PCI_CONTROL_MASKS.  No functional change.

Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/acpi.h | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index ca072e3927a2..bb4e7701b26b 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -338,12 +338,7 @@ extern bool osc_sb_apei_support_acked;
 #define OSC_PCI_EXPRESS_PME_CONTROL		0x00000004
 #define OSC_PCI_EXPRESS_AER_CONTROL		0x00000008
 #define OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL	0x00000010
-
-#define OSC_PCI_CONTROL_MASKS 	(OSC_PCI_EXPRESS_NATIVE_HP_CONTROL | 	\
-				OSC_SHPC_NATIVE_HP_CONTROL | 		\
-				OSC_PCI_EXPRESS_PME_CONTROL |		\
-				OSC_PCI_EXPRESS_AER_CONTROL |		\
-				OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL)
+#define OSC_PCI_CONTROL_MASKS			0x0000001f
 
 extern acpi_status acpi_pci_osc_control_set(acpi_handle handle,
 					     u32 *mask, u32 req);
-- 
cgit v1.2.3


From 7dab9ef4f0823072a3c9afdb3b373c9f2f38848b Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Thu, 5 Sep 2013 15:07:39 -0600
Subject: PCI/ACPI: Name _OSC #defines more consistently

Make PCI Host Bridge _OSC #defines more consistent.  No functional change.

Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/pci_root.c          | 19 +++++++++----------
 drivers/pci/hotplug/acpi_pcihp.c |  2 +-
 drivers/pci/hotplug/shpchp.h     |  2 +-
 include/linux/acpi.h             | 12 ++++++------
 4 files changed, 17 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
index 323afd7ccfbf..28dd55509789 100644
--- a/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c
@@ -49,10 +49,10 @@ static int acpi_pci_root_add(struct acpi_device *device,
 			     const struct acpi_device_id *not_used);
 static void acpi_pci_root_remove(struct acpi_device *device);
 
-#define ACPI_PCIE_REQ_SUPPORT (OSC_EXT_PCI_CONFIG_SUPPORT \
-				| OSC_ACTIVE_STATE_PWR_SUPPORT \
-				| OSC_CLOCK_PWR_CAPABILITY_SUPPORT \
-				| OSC_MSI_SUPPORT)
+#define ACPI_PCIE_REQ_SUPPORT (OSC_PCI_EXT_CONFIG_SUPPORT \
+				| OSC_PCI_ASPM_SUPPORT \
+				| OSC_PCI_CLOCK_PM_SUPPORT \
+				| OSC_PCI_MSI_SUPPORT)
 
 static const struct acpi_device_id root_device_ids[] = {
 	{"PNP0A03", 0},
@@ -439,13 +439,12 @@ static int acpi_pci_root_add(struct acpi_device *device,
 	acpi_pci_osc_support(root, flags);
 
 	if (pci_ext_cfg_avail())
-		flags |= OSC_EXT_PCI_CONFIG_SUPPORT;
+		flags |= OSC_PCI_EXT_CONFIG_SUPPORT;
 	if (pcie_aspm_support_enabled()) {
-		flags |= OSC_ACTIVE_STATE_PWR_SUPPORT |
-		OSC_CLOCK_PWR_CAPABILITY_SUPPORT;
+		flags |= OSC_PCI_ASPM_SUPPORT | OSC_PCI_CLOCK_PM_SUPPORT;
 	}
 	if (pci_msi_enabled())
-		flags |= OSC_MSI_SUPPORT;
+		flags |= OSC_PCI_MSI_SUPPORT;
 	if (flags != base_flags) {
 		status = acpi_pci_osc_support(root, flags);
 		if (ACPI_FAILURE(status)) {
@@ -458,7 +457,7 @@ static int acpi_pci_root_add(struct acpi_device *device,
 
 	if (!pcie_ports_disabled
 	    && (flags & ACPI_PCIE_REQ_SUPPORT) == ACPI_PCIE_REQ_SUPPORT) {
-		flags = OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL
+		flags = OSC_PCI_EXPRESS_CAPABILITY_CONTROL
 			| OSC_PCI_EXPRESS_NATIVE_HP_CONTROL
 			| OSC_PCI_EXPRESS_PME_CONTROL;
 
@@ -474,7 +473,7 @@ static int acpi_pci_root_add(struct acpi_device *device,
 			"Requesting ACPI _OSC control (0x%02x)\n", flags);
 
 		status = acpi_pci_osc_control_set(handle, &flags,
-				       OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL);
+				       OSC_PCI_EXPRESS_CAPABILITY_CONTROL);
 		if (ACPI_SUCCESS(status)) {
 			dev_info(&device->dev,
 				"ACPI _OSC control (0x%02x) granted\n", flags);
diff --git a/drivers/pci/hotplug/acpi_pcihp.c b/drivers/pci/hotplug/acpi_pcihp.c
index 2a47e82821da..f8140164ec0b 100644
--- a/drivers/pci/hotplug/acpi_pcihp.c
+++ b/drivers/pci/hotplug/acpi_pcihp.c
@@ -338,7 +338,7 @@ int acpi_get_hp_hw_control_from_firmware(struct pci_dev *pdev, u32 flags)
 	acpi_handle chandle, handle;
 	struct acpi_buffer string = { ACPI_ALLOCATE_BUFFER, NULL };
 
-	flags &= OSC_SHPC_NATIVE_HP_CONTROL;
+	flags &= OSC_PCI_SHPC_NATIVE_HP_CONTROL;
 	if (!flags) {
 		err("Invalid flags %u specified!\n", flags);
 		return -EINVAL;
diff --git a/drivers/pci/hotplug/shpchp.h b/drivers/pci/hotplug/shpchp.h
index e260f207a90e..d876e4b3c6a9 100644
--- a/drivers/pci/hotplug/shpchp.h
+++ b/drivers/pci/hotplug/shpchp.h
@@ -191,7 +191,7 @@ static inline const char *slot_name(struct slot *slot)
 #include <linux/pci-acpi.h>
 static inline int get_hp_hw_control_from_firmware(struct pci_dev *dev)
 {
-	u32 flags = OSC_SHPC_NATIVE_HP_CONTROL;
+	u32 flags = OSC_PCI_SHPC_NATIVE_HP_CONTROL;
 	return acpi_get_hp_hw_control_from_firmware(dev, flags);
 }
 #else
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index bb4e7701b26b..e2e52cf53224 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -325,19 +325,19 @@ acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context);
 extern bool osc_sb_apei_support_acked;
 
 /* PCI Host Bridge _OSC: Capabilities DWORD 2: Support Field */
-#define OSC_EXT_PCI_CONFIG_SUPPORT		0x00000001
-#define OSC_ACTIVE_STATE_PWR_SUPPORT		0x00000002
-#define OSC_CLOCK_PWR_CAPABILITY_SUPPORT	0x00000004
+#define OSC_PCI_EXT_CONFIG_SUPPORT		0x00000001
+#define OSC_PCI_ASPM_SUPPORT			0x00000002
+#define OSC_PCI_CLOCK_PM_SUPPORT		0x00000004
 #define OSC_PCI_SEGMENT_GROUPS_SUPPORT		0x00000008
-#define OSC_MSI_SUPPORT				0x00000010
+#define OSC_PCI_MSI_SUPPORT			0x00000010
 #define OSC_PCI_SUPPORT_MASKS			0x0000001f
 
 /* PCI Host Bridge _OSC: Capabilities DWORD 3: Control Field */
 #define OSC_PCI_EXPRESS_NATIVE_HP_CONTROL	0x00000001
-#define OSC_SHPC_NATIVE_HP_CONTROL		0x00000002
+#define OSC_PCI_SHPC_NATIVE_HP_CONTROL		0x00000002
 #define OSC_PCI_EXPRESS_PME_CONTROL		0x00000004
 #define OSC_PCI_EXPRESS_AER_CONTROL		0x00000008
-#define OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL	0x00000010
+#define OSC_PCI_EXPRESS_CAPABILITY_CONTROL	0x00000010
 #define OSC_PCI_CONTROL_MASKS			0x0000001f
 
 extern acpi_status acpi_pci_osc_control_set(acpi_handle handle,
-- 
cgit v1.2.3


From 2ff2a7d03bbe472ed44a8380dbdbea490d81c59d Mon Sep 17 00:00:00 2001
From: Li Zefan <lizefan@huawei.com>
Date: Mon, 23 Sep 2013 16:57:03 +0800
Subject: cgroup: kill css_id

The only user of css_id was memcg, and it has been convered to use
cgroup->id, so kill css_id.

Signed-off-by: Li Zefan <lizefan@huwei.com>
Reviewed-by: Michal Hocko <mhocko@suse.cz>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/cgroup.h |  37 --------
 kernel/cgroup.c        | 248 +------------------------------------------------
 2 files changed, 1 insertion(+), 284 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 3561d305b1e0..39c1d9469677 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -612,11 +612,6 @@ struct cgroup_subsys {
 	int subsys_id;
 	int disabled;
 	int early_init;
-	/*
-	 * True if this subsys uses ID. ID is not available before cgroup_init()
-	 * (not available in early_init time.)
-	 */
-	bool use_id;
 
 	/*
 	 * If %false, this subsystem is properly hierarchical -
@@ -642,9 +637,6 @@ struct cgroup_subsys {
 	 */
 	struct cgroupfs_root *root;
 	struct list_head sibling;
-	/* used when use_id == true */
-	struct idr idr;
-	spinlock_t id_lock;
 
 	/* list of cftype_sets */
 	struct list_head cftsets;
@@ -875,35 +867,6 @@ int css_scan_tasks(struct cgroup_subsys_state *css,
 int cgroup_attach_task_all(struct task_struct *from, struct task_struct *);
 int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from);
 
-/*
- * CSS ID is ID for cgroup_subsys_state structs under subsys. This only works
- * if cgroup_subsys.use_id == true. It can be used for looking up and scanning.
- * CSS ID is assigned at cgroup allocation (create) automatically
- * and removed when subsys calls free_css_id() function. This is because
- * the lifetime of cgroup_subsys_state is subsys's matter.
- *
- * Looking up and scanning function should be called under rcu_read_lock().
- * Taking cgroup_mutex is not necessary for following calls.
- * But the css returned by this routine can be "not populated yet" or "being
- * destroyed". The caller should check css and cgroup's status.
- */
-
-/*
- * Typically Called at ->destroy(), or somewhere the subsys frees
- * cgroup_subsys_state.
- */
-void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css);
-
-/* Find a cgroup_subsys_state which has given ID */
-
-struct cgroup_subsys_state *css_lookup(struct cgroup_subsys *ss, int id);
-
-/* Returns true if root is ancestor of cg */
-bool css_is_ancestor(struct cgroup_subsys_state *cg,
-		     const struct cgroup_subsys_state *root);
-
-/* Get id and depth of css */
-unsigned short css_id(struct cgroup_subsys_state *css);
 struct cgroup_subsys_state *css_from_dir(struct dentry *dentry,
 					 struct cgroup_subsys *ss);
 
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 2418b6e71a85..a5629f1df13a 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -124,38 +124,6 @@ struct cfent {
 	struct simple_xattrs		xattrs;
 };
 
-/*
- * CSS ID -- ID per subsys's Cgroup Subsys State(CSS). used only when
- * cgroup_subsys->use_id != 0.
- */
-#define CSS_ID_MAX	(65535)
-struct css_id {
-	/*
-	 * The css to which this ID points. This pointer is set to valid value
-	 * after cgroup is populated. If cgroup is removed, this will be NULL.
-	 * This pointer is expected to be RCU-safe because destroy()
-	 * is called after synchronize_rcu(). But for safe use, css_tryget()
-	 * should be used for avoiding race.
-	 */
-	struct cgroup_subsys_state __rcu *css;
-	/*
-	 * ID of this css.
-	 */
-	unsigned short id;
-	/*
-	 * Depth in hierarchy which this ID belongs to.
-	 */
-	unsigned short depth;
-	/*
-	 * ID is freed by RCU. (and lookup routine is RCU safe.)
-	 */
-	struct rcu_head rcu_head;
-	/*
-	 * Hierarchy of CSS ID belongs to.
-	 */
-	unsigned short stack[0]; /* Array of Length (depth+1) */
-};
-
 /*
  * cgroup_event represents events which userspace want to receive.
  */
@@ -387,9 +355,6 @@ struct cgrp_cset_link {
 static struct css_set init_css_set;
 static struct cgrp_cset_link init_cgrp_cset_link;
 
-static int cgroup_init_idr(struct cgroup_subsys *ss,
-			   struct cgroup_subsys_state *css);
-
 /*
  * css_set_lock protects the list of css_set objects, and the chain of
  * tasks off each css_set.  Nests outside task->alloc_lock due to
@@ -841,8 +806,6 @@ static struct backing_dev_info cgroup_backing_dev_info = {
 	.capabilities	= BDI_CAP_NO_ACCT_AND_WRITEBACK,
 };
 
-static int alloc_css_id(struct cgroup_subsys_state *child_css);
-
 static struct inode *cgroup_new_inode(umode_t mode, struct super_block *sb)
 {
 	struct inode *inode = new_inode(sb);
@@ -4242,21 +4205,6 @@ static int cgroup_populate_dir(struct cgroup *cgrp, unsigned long subsys_mask)
 				goto err;
 		}
 	}
-
-	/* This cgroup is ready now */
-	for_each_root_subsys(cgrp->root, ss) {
-		struct cgroup_subsys_state *css = cgroup_css(cgrp, ss);
-		struct css_id *id = rcu_dereference_protected(css->id, true);
-
-		/*
-		 * Update id->css pointer and make this css visible from
-		 * CSS ID functions. This pointer will be dereferened
-		 * from RCU-read-side without locks.
-		 */
-		if (id)
-			rcu_assign_pointer(id->css, css);
-	}
-
 	return 0;
 err:
 	cgroup_clear_dir(cgrp, subsys_mask);
@@ -4325,7 +4273,6 @@ static void init_css(struct cgroup_subsys_state *css, struct cgroup_subsys *ss,
 	css->cgroup = cgrp;
 	css->ss = ss;
 	css->flags = 0;
-	css->id = NULL;
 
 	if (cgrp->parent)
 		css->parent = cgroup_css(cgrp->parent, ss);
@@ -4457,12 +4404,6 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
 			goto err_free_all;
 
 		init_css(css, ss, cgrp);
-
-		if (ss->use_id) {
-			err = alloc_css_id(css);
-			if (err)
-				goto err_free_all;
-		}
 	}
 
 	/*
@@ -4927,12 +4868,6 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss)
 
 	/* our new subsystem will be attached to the dummy hierarchy. */
 	init_css(css, ss, cgroup_dummy_top);
-	/* init_idr must be after init_css() because it sets css->id. */
-	if (ss->use_id) {
-		ret = cgroup_init_idr(ss, css);
-		if (ret)
-			goto err_unload;
-	}
 
 	/*
 	 * Now we need to entangle the css into the existing css_sets. unlike
@@ -4998,9 +4933,6 @@ void cgroup_unload_subsys(struct cgroup_subsys *ss)
 
 	offline_css(cgroup_css(cgroup_dummy_top, ss));
 
-	if (ss->use_id)
-		idr_destroy(&ss->idr);
-
 	/* deassign the subsys_id */
 	cgroup_subsys[ss->subsys_id] = NULL;
 
@@ -5027,8 +4959,7 @@ void cgroup_unload_subsys(struct cgroup_subsys *ss)
 	/*
 	 * remove subsystem's css from the cgroup_dummy_top and free it -
 	 * need to free before marking as null because ss->css_free needs
-	 * the cgrp->subsys pointer to find their state. note that this
-	 * also takes care of freeing the css_id.
+	 * the cgrp->subsys pointer to find their state.
 	 */
 	ss->css_free(cgroup_css(cgroup_dummy_top, ss));
 	RCU_INIT_POINTER(cgroup_dummy_top->subsys[ss->subsys_id], NULL);
@@ -5099,8 +5030,6 @@ int __init cgroup_init(void)
 	for_each_builtin_subsys(ss, i) {
 		if (!ss->early_init)
 			cgroup_init_subsys(ss);
-		if (ss->use_id)
-			cgroup_init_idr(ss, init_css_set.subsys[ss->subsys_id]);
 	}
 
 	/* allocate id for the dummy hierarchy */
@@ -5520,181 +5449,6 @@ static int __init cgroup_disable(char *str)
 }
 __setup("cgroup_disable=", cgroup_disable);
 
-/*
- * Functons for CSS ID.
- */
-
-/* to get ID other than 0, this should be called when !cgroup_is_dead() */
-unsigned short css_id(struct cgroup_subsys_state *css)
-{
-	struct css_id *cssid;
-
-	/*
-	 * This css_id() can return correct value when somone has refcnt
-	 * on this or this is under rcu_read_lock(). Once css->id is allocated,
-	 * it's unchanged until freed.
-	 */
-	cssid = rcu_dereference_raw(css->id);
-
-	if (cssid)
-		return cssid->id;
-	return 0;
-}
-EXPORT_SYMBOL_GPL(css_id);
-
-/**
- *  css_is_ancestor - test "root" css is an ancestor of "child"
- * @child: the css to be tested.
- * @root: the css supporsed to be an ancestor of the child.
- *
- * Returns true if "root" is an ancestor of "child" in its hierarchy. Because
- * this function reads css->id, the caller must hold rcu_read_lock().
- * But, considering usual usage, the csses should be valid objects after test.
- * Assuming that the caller will do some action to the child if this returns
- * returns true, the caller must take "child";s reference count.
- * If "child" is valid object and this returns true, "root" is valid, too.
- */
-
-bool css_is_ancestor(struct cgroup_subsys_state *child,
-		    const struct cgroup_subsys_state *root)
-{
-	struct css_id *child_id;
-	struct css_id *root_id;
-
-	child_id  = rcu_dereference(child->id);
-	if (!child_id)
-		return false;
-	root_id = rcu_dereference(root->id);
-	if (!root_id)
-		return false;
-	if (child_id->depth < root_id->depth)
-		return false;
-	if (child_id->stack[root_id->depth] != root_id->id)
-		return false;
-	return true;
-}
-
-void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css)
-{
-	struct css_id *id = rcu_dereference_protected(css->id, true);
-
-	/* When this is called before css_id initialization, id can be NULL */
-	if (!id)
-		return;
-
-	BUG_ON(!ss->use_id);
-
-	rcu_assign_pointer(id->css, NULL);
-	rcu_assign_pointer(css->id, NULL);
-	spin_lock(&ss->id_lock);
-	idr_remove(&ss->idr, id->id);
-	spin_unlock(&ss->id_lock);
-	kfree_rcu(id, rcu_head);
-}
-EXPORT_SYMBOL_GPL(free_css_id);
-
-/*
- * This is called by init or create(). Then, calls to this function are
- * always serialized (By cgroup_mutex() at create()).
- */
-
-static struct css_id *get_new_cssid(struct cgroup_subsys *ss, int depth)
-{
-	struct css_id *newid;
-	int ret, size;
-
-	BUG_ON(!ss->use_id);
-
-	size = sizeof(*newid) + sizeof(unsigned short) * (depth + 1);
-	newid = kzalloc(size, GFP_KERNEL);
-	if (!newid)
-		return ERR_PTR(-ENOMEM);
-
-	idr_preload(GFP_KERNEL);
-	spin_lock(&ss->id_lock);
-	/* Don't use 0. allocates an ID of 1-65535 */
-	ret = idr_alloc(&ss->idr, newid, 1, CSS_ID_MAX + 1, GFP_NOWAIT);
-	spin_unlock(&ss->id_lock);
-	idr_preload_end();
-
-	/* Returns error when there are no free spaces for new ID.*/
-	if (ret < 0)
-		goto err_out;
-
-	newid->id = ret;
-	newid->depth = depth;
-	return newid;
-err_out:
-	kfree(newid);
-	return ERR_PTR(ret);
-
-}
-
-static int __init_or_module cgroup_init_idr(struct cgroup_subsys *ss,
-					    struct cgroup_subsys_state *rootcss)
-{
-	struct css_id *newid;
-
-	spin_lock_init(&ss->id_lock);
-	idr_init(&ss->idr);
-
-	newid = get_new_cssid(ss, 0);
-	if (IS_ERR(newid))
-		return PTR_ERR(newid);
-
-	newid->stack[0] = newid->id;
-	RCU_INIT_POINTER(newid->css, rootcss);
-	RCU_INIT_POINTER(rootcss->id, newid);
-	return 0;
-}
-
-static int alloc_css_id(struct cgroup_subsys_state *child_css)
-{
-	struct cgroup_subsys_state *parent_css = css_parent(child_css);
-	struct css_id *child_id, *parent_id;
-	int i, depth;
-
-	parent_id = rcu_dereference_protected(parent_css->id, true);
-	depth = parent_id->depth + 1;
-
-	child_id = get_new_cssid(child_css->ss, depth);
-	if (IS_ERR(child_id))
-		return PTR_ERR(child_id);
-
-	for (i = 0; i < depth; i++)
-		child_id->stack[i] = parent_id->stack[i];
-	child_id->stack[depth] = child_id->id;
-	/*
-	 * child_id->css pointer will be set after this cgroup is available
-	 * see cgroup_populate_dir()
-	 */
-	rcu_assign_pointer(child_css->id, child_id);
-
-	return 0;
-}
-
-/**
- * css_lookup - lookup css by id
- * @ss: cgroup subsys to be looked into.
- * @id: the id
- *
- * Returns pointer to cgroup_subsys_state if there is valid one with id.
- * NULL if not. Should be called under rcu_read_lock()
- */
-struct cgroup_subsys_state *css_lookup(struct cgroup_subsys *ss, int id)
-{
-	struct css_id *cssid = NULL;
-
-	BUG_ON(!ss->use_id);
-	cssid = idr_find(&ss->idr, id);
-
-	if (unlikely(!cssid))
-		return NULL;
-
-	return rcu_dereference(cssid->css);
-}
-EXPORT_SYMBOL_GPL(css_lookup);
-
 /**
  * css_from_dir - get corresponding css from the dentry of a cgroup dir
  * @dentry: directory dentry of interest
-- 
cgit v1.2.3


From 118150f22d6b4431a1fe2e715de314a5d93836f5 Mon Sep 17 00:00:00 2001
From: KV Sujith <sujithkv@ti.com>
Date: Sun, 18 Aug 2013 10:48:58 +0530
Subject: gpio: davinci: move to platform device

Modify DaVinci GPIO driver to become a platform device
driver.

The driver does not have platform driver structure or
a probe. Instead, it has pure_initcall function for
initialization. The platform specific informaiton is
obtained using the DaVinci specific davinci_soc_info
structure. This is a problem for Device Tree (DT)
implementation.

As a first stage of DT conversion, we implement a probe.

Additional notes:

- The driver registration happens as  postcore_initcall.
  This is required since machine init functions like
  da850_lcd_hw_init() make use of GPIO.
- Start using devres APIs for simpler error handling.

Signed-off-by: KV Sujith <sujithkv@ti.com>
[avinashphilip@ti.com: Move global definition of
		       "davinci_gpio_controller" to local]
Signed-off-by: Philip Avinash <avinashphilip@ti.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
[nsekhar@ti.com: drop unused structure member, rebase to new
		 clean-up patch and fix error messages]
Signed-off-by: Sekhar Nori <nsekhar@ti.com>
Signed-off-by: Lad, Prabhakar <prabhakar.csengg@gmail.com>
---
 arch/arm/mach-davinci/include/mach/gpio-davinci.h |   1 +
 drivers/gpio/gpio-davinci.c                       | 119 +++++++++++++++-------
 include/linux/platform_data/gpio-davinci.h        |  25 +++++
 3 files changed, 107 insertions(+), 38 deletions(-)
 create mode 100644 include/linux/platform_data/gpio-davinci.h

(limited to 'include/linux')

diff --git a/arch/arm/mach-davinci/include/mach/gpio-davinci.h b/arch/arm/mach-davinci/include/mach/gpio-davinci.h
index 1fdd1fd35448..551ba43a763e 100644
--- a/arch/arm/mach-davinci/include/mach/gpio-davinci.h
+++ b/arch/arm/mach-davinci/include/mach/gpio-davinci.h
@@ -60,6 +60,7 @@ struct davinci_gpio_controller {
 	void __iomem		*set_data;
 	void __iomem		*clr_data;
 	void __iomem		*in_data;
+	unsigned		gpio_irq;
 };
 
 /* The __gpio_to_controller() and __gpio_mask() functions inline to constants
diff --git a/drivers/gpio/gpio-davinci.c b/drivers/gpio/gpio-davinci.c
index cb947a1266ae..8847adf392b7 100644
--- a/drivers/gpio/gpio-davinci.c
+++ b/drivers/gpio/gpio-davinci.c
@@ -15,8 +15,9 @@
 #include <linux/clk.h>
 #include <linux/err.h>
 #include <linux/io.h>
-
-#include <asm/mach/irq.h>
+#include <linux/irq.h>
+#include <linux/platform_device.h>
+#include <linux/platform_data/gpio-davinci.h>
 
 struct davinci_gpio_regs {
 	u32	dir;
@@ -36,10 +37,9 @@ struct davinci_gpio_regs {
 #define chip2controller(chip)	\
 	container_of(chip, struct davinci_gpio_controller, chip)
 
-static struct davinci_gpio_controller chips[DIV_ROUND_UP(DAVINCI_N_GPIO, 32)];
 static void __iomem *gpio_base;
 
-static struct davinci_gpio_regs __iomem __init *gpio2regs(unsigned gpio)
+static struct davinci_gpio_regs __iomem *gpio2regs(unsigned gpio)
 {
 	void __iomem *ptr;
 
@@ -67,7 +67,7 @@ static inline struct davinci_gpio_regs __iomem *irq2regs(int irq)
 	return g;
 }
 
-static int __init davinci_gpio_irq_setup(void);
+static int davinci_gpio_irq_setup(struct platform_device *pdev);
 
 /*--------------------------------------------------------------------------*/
 
@@ -133,33 +133,53 @@ davinci_gpio_set(struct gpio_chip *chip, unsigned offset, int value)
 	__raw_writel((1 << offset), value ? &g->set_data : &g->clr_data);
 }
 
-static int __init davinci_gpio_setup(void)
+static int davinci_gpio_probe(struct platform_device *pdev)
 {
 	int i, base;
 	unsigned ngpio;
-	struct davinci_soc_info *soc_info = &davinci_soc_info;
-	struct davinci_gpio_regs *regs;
-
-	if (soc_info->gpio_type != GPIO_TYPE_DAVINCI)
-		return 0;
+	struct davinci_gpio_controller *chips;
+	struct davinci_gpio_platform_data *pdata;
+	struct davinci_gpio_regs __iomem *regs;
+	struct device *dev = &pdev->dev;
+	struct resource *res;
+
+	pdata = dev->platform_data;
+	if (!pdata) {
+		dev_err(dev, "No platform data found\n");
+		return -EINVAL;
+	}
 
 	/*
 	 * The gpio banks conceptually expose a segmented bitmap,
 	 * and "ngpio" is one more than the largest zero-based
 	 * bit index that's valid.
 	 */
-	ngpio = soc_info->gpio_num;
+	ngpio = pdata->ngpio;
 	if (ngpio == 0) {
-		pr_err("GPIO setup:  how many GPIOs?\n");
+		dev_err(dev, "How many GPIOs?\n");
 		return -EINVAL;
 	}
 
 	if (WARN_ON(DAVINCI_N_GPIO < ngpio))
 		ngpio = DAVINCI_N_GPIO;
 
-	gpio_base = ioremap(soc_info->gpio_base, SZ_4K);
-	if (WARN_ON(!gpio_base))
+	chips = devm_kzalloc(dev,
+			     ngpio * sizeof(struct davinci_gpio_controller),
+			     GFP_KERNEL);
+	if (!chips) {
+		dev_err(dev, "Memory allocation failed\n");
 		return -ENOMEM;
+	}
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		dev_err(dev, "Invalid memory resource\n");
+		return -EBUSY;
+	}
+
+	gpio_base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(gpio_base))
+		return PTR_ERR(gpio_base);
 
 	for (i = 0, base = 0; base < ngpio; i++, base += 32) {
 		chips[i].chip.label = "DaVinci";
@@ -185,13 +205,10 @@ static int __init davinci_gpio_setup(void)
 		gpiochip_add(&chips[i].chip);
 	}
 
-	soc_info->gpio_ctlrs = chips;
-	soc_info->gpio_ctlrs_num = DIV_ROUND_UP(ngpio, 32);
-
-	davinci_gpio_irq_setup();
+	platform_set_drvdata(pdev, chips);
+	davinci_gpio_irq_setup(pdev);
 	return 0;
 }
-pure_initcall(davinci_gpio_setup);
 
 /*--------------------------------------------------------------------------*/
 /*
@@ -304,14 +321,14 @@ static int gpio_to_irq_banked(struct gpio_chip *chip, unsigned offset)
 
 static int gpio_to_irq_unbanked(struct gpio_chip *chip, unsigned offset)
 {
-	struct davinci_soc_info *soc_info = &davinci_soc_info;
+	struct davinci_gpio_controller *d = chip2controller(chip);
 
 	/*
 	 * NOTE:  we assume for now that only irqs in the first gpio_chip
 	 * can provide direct-mapped IRQs to AINTC (up to 32 GPIOs).
 	 */
-	if (offset < soc_info->gpio_unbanked)
-		return soc_info->gpio_irq + offset;
+	if (offset < d->irq_base)
+		return d->gpio_irq + offset;
 	else
 		return -ENODEV;
 }
@@ -320,12 +337,11 @@ static int gpio_irq_type_unbanked(struct irq_data *data, unsigned trigger)
 {
 	struct davinci_gpio_controller *d;
 	struct davinci_gpio_regs __iomem *g;
-	struct davinci_soc_info *soc_info = &davinci_soc_info;
 	u32 mask;
 
 	d = (struct davinci_gpio_controller *)data->handler_data;
 	g = (struct davinci_gpio_regs __iomem *)d->regs;
-	mask = __gpio_mask(data->irq - soc_info->gpio_irq);
+	mask = __gpio_mask(data->irq - d->gpio_irq);
 
 	if (trigger & ~(IRQ_TYPE_EDGE_FALLING | IRQ_TYPE_EDGE_RISING))
 		return -EINVAL;
@@ -346,24 +362,33 @@ static int gpio_irq_type_unbanked(struct irq_data *data, unsigned trigger)
  * (dm6446) can be set appropriately for GPIOV33 pins.
  */
 
-static int __init davinci_gpio_irq_setup(void)
+static int davinci_gpio_irq_setup(struct platform_device *pdev)
 {
 	unsigned	gpio, irq, bank;
 	struct clk	*clk;
 	u32		binten = 0;
 	unsigned	ngpio, bank_irq;
-	struct davinci_soc_info *soc_info = &davinci_soc_info;
-	struct davinci_gpio_regs	__iomem *g;
+	struct device *dev = &pdev->dev;
+	struct resource	*res;
+	struct davinci_gpio_controller *chips = platform_get_drvdata(pdev);
+	struct davinci_gpio_platform_data *pdata = dev->platform_data;
+	struct davinci_gpio_regs __iomem *g;
 
-	ngpio = soc_info->gpio_num;
+	ngpio = pdata->ngpio;
+	res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+	if (!res) {
+		dev_err(dev, "Invalid IRQ resource\n");
+		return -EBUSY;
+	}
 
-	bank_irq = soc_info->gpio_irq;
-	if (bank_irq == 0) {
-		printk(KERN_ERR "Don't know first GPIO bank IRQ.\n");
-		return -EINVAL;
+	bank_irq = res->start;
+
+	if (!bank_irq) {
+		dev_err(dev, "Invalid IRQ resource\n");
+		return -ENODEV;
 	}
 
-	clk = clk_get(NULL, "gpio");
+	clk = devm_clk_get(dev, "gpio");
 	if (IS_ERR(clk)) {
 		printk(KERN_ERR "Error %ld getting gpio clock?\n",
 		       PTR_ERR(clk));
@@ -379,9 +404,9 @@ static int __init davinci_gpio_irq_setup(void)
 	 */
 	for (gpio = 0, bank = 0; gpio < ngpio; bank++, gpio += 32) {
 		chips[bank].chip.to_irq = gpio_to_irq_banked;
-		chips[bank].irq_base = soc_info->gpio_unbanked
+		chips[bank].irq_base = pdata->gpio_unbanked
 			? -EINVAL
-			: (soc_info->intc_irq_num + gpio);
+			: (pdata->intc_irq_num + gpio);
 	}
 
 	/*
@@ -389,7 +414,7 @@ static int __init davinci_gpio_irq_setup(void)
 	 * controller only handling trigger modes.  We currently assume no
 	 * IRQ mux conflicts; gpio_irq_type_unbanked() is only for GPIOs.
 	 */
-	if (soc_info->gpio_unbanked) {
+	if (pdata->gpio_unbanked) {
 		static struct irq_chip_type gpio_unbanked;
 
 		/* pass "bank 0" GPIO IRQs to AINTC */
@@ -409,7 +434,7 @@ static int __init davinci_gpio_irq_setup(void)
 		__raw_writel(~0, &g->set_rising);
 
 		/* set the direct IRQs up to use that irqchip */
-		for (gpio = 0; gpio < soc_info->gpio_unbanked; gpio++, irq++) {
+		for (gpio = 0; gpio < pdata->gpio_unbanked; gpio++, irq++) {
 			irq_set_chip(irq, &gpio_unbanked.chip);
 			irq_set_handler_data(irq, &chips[gpio / 32]);
 			irq_set_status_flags(irq, IRQ_TYPE_EDGE_BOTH);
@@ -464,3 +489,21 @@ done:
 
 	return 0;
 }
+
+static struct platform_driver davinci_gpio_driver = {
+	.probe		= davinci_gpio_probe,
+	.driver		= {
+		.name	= "davinci_gpio",
+		.owner	= THIS_MODULE,
+	},
+};
+
+/**
+ * GPIO driver registration needs to be done before machine_init functions
+ * access GPIO. Hence davinci_gpio_drv_reg() is a postcore_initcall.
+ */
+static int __init davinci_gpio_drv_reg(void)
+{
+	return platform_driver_register(&davinci_gpio_driver);
+}
+postcore_initcall(davinci_gpio_drv_reg);
diff --git a/include/linux/platform_data/gpio-davinci.h b/include/linux/platform_data/gpio-davinci.h
new file mode 100644
index 000000000000..2fcc125af1aa
--- /dev/null
+++ b/include/linux/platform_data/gpio-davinci.h
@@ -0,0 +1,25 @@
+/*
+ * DaVinci GPIO Platform Related Defines
+ *
+ * Copyright (C) 2013 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation version 2.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __DAVINCI_GPIO_PLATFORM_H
+#define __DAVINCI_GPIO_PLATFORM_H
+
+struct davinci_gpio_platform_data {
+	u32	ngpio;
+	u32	gpio_unbanked;
+	u32	intc_irq_num;
+};
+
+#endif
-- 
cgit v1.2.3


From a5b4bd2874d9032b42db8cc4880058576c561b06 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 24 Sep 2013 10:35:14 +0100
Subject: KEYS: Use bool in make_key_ref() and is_key_possessed()

Make make_key_ref() take a bool possession parameter and make
is_key_possessed() return a bool.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 Documentation/security/keys.txt | 7 +++----
 include/linux/key.h             | 4 ++--
 security/keys/keyring.c         | 5 +++--
 3 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/security/keys.txt b/Documentation/security/keys.txt
index 7b4145d00452..9ede67084f0b 100644
--- a/Documentation/security/keys.txt
+++ b/Documentation/security/keys.txt
@@ -865,15 +865,14 @@ encountered:
      calling processes has a searchable link to the key from one of its
      keyrings. There are three functions for dealing with these:
 
-	key_ref_t make_key_ref(const struct key *key,
-			       unsigned long possession);
+	key_ref_t make_key_ref(const struct key *key, bool possession);
 
 	struct key *key_ref_to_ptr(const key_ref_t key_ref);
 
-	unsigned long is_key_possessed(const key_ref_t key_ref);
+	bool is_key_possessed(const key_ref_t key_ref);
 
      The first function constructs a key reference from a key pointer and
-     possession information (which must be 0 or 1 and not any other value).
+     possession information (which must be true or false).
 
      The second function retrieves the key pointer from a reference and the
      third retrieves the possession flag.
diff --git a/include/linux/key.h b/include/linux/key.h
index 4dfde1161c5e..51bce2950de4 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -99,7 +99,7 @@ struct keyring_name;
 typedef struct __key_reference_with_attributes *key_ref_t;
 
 static inline key_ref_t make_key_ref(const struct key *key,
-				     unsigned long possession)
+				     bool possession)
 {
 	return (key_ref_t) ((unsigned long) key | possession);
 }
@@ -109,7 +109,7 @@ static inline struct key *key_ref_to_ptr(const key_ref_t key_ref)
 	return (struct key *) ((unsigned long) key_ref & ~1UL);
 }
 
-static inline unsigned long is_key_possessed(const key_ref_t key_ref)
+static inline bool is_key_possessed(const key_ref_t key_ref)
 {
 	return (unsigned long) key_ref & 1UL;
 }
diff --git a/security/keys/keyring.c b/security/keys/keyring.c
index 6ece7f2e5707..f78406372ebe 100644
--- a/security/keys/keyring.c
+++ b/security/keys/keyring.c
@@ -329,9 +329,10 @@ key_ref_t keyring_search_aux(key_ref_t keyring_ref,
 
 	struct keyring_list *keylist;
 	struct timespec now;
-	unsigned long possessed, kflags;
+	unsigned long kflags;
 	struct key *keyring, *key;
 	key_ref_t key_ref;
+	bool possessed;
 	long err;
 	int sp, nkeys, kix;
 
@@ -542,8 +543,8 @@ key_ref_t __keyring_search_one(key_ref_t keyring_ref,
 			       key_perm_t perm)
 {
 	struct keyring_list *klist;
-	unsigned long possessed;
 	struct key *keyring, *key;
+	bool possessed;
 	int nkeys, loop;
 
 	keyring = key_ref_to_ptr(keyring_ref);
-- 
cgit v1.2.3


From 16feef4340172b7dbb9cba60850e78fa6388adf1 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 24 Sep 2013 10:35:15 +0100
Subject: KEYS: Consolidate the concept of an 'index key' for key access

Consolidate the concept of an 'index key' for accessing keys.  The index key
is the search term needed to find a key directly - basically the key type and
the key description.  We can add to that the description length.

This will be useful when turning a keyring into an associative array rather
than just a pointer block.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 include/linux/key.h         | 21 +++++++++----
 security/keys/internal.h    |  8 ++---
 security/keys/key.c         | 72 +++++++++++++++++++++++----------------------
 security/keys/keyring.c     | 37 +++++++++++------------
 security/keys/request_key.c | 12 +++++---
 5 files changed, 83 insertions(+), 67 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/key.h b/include/linux/key.h
index 51bce2950de4..d573e820a23d 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -82,6 +82,12 @@ struct key_owner;
 struct keyring_list;
 struct keyring_name;
 
+struct keyring_index_key {
+	struct key_type		*type;
+	const char		*description;
+	size_t			desc_len;
+};
+
 /*****************************************************************************/
 /*
  * key reference with possession attribute handling
@@ -129,7 +135,6 @@ struct key {
 		struct list_head graveyard_link;
 		struct rb_node	serial_node;
 	};
-	struct key_type		*type;		/* type of key */
 	struct rw_semaphore	sem;		/* change vs change sem */
 	struct key_user		*user;		/* owner of this key */
 	void			*security;	/* security data for this key */
@@ -163,12 +168,18 @@ struct key {
 #define KEY_FLAG_ROOT_CAN_CLEAR	6	/* set if key can be cleared by root without permission */
 #define KEY_FLAG_INVALIDATED	7	/* set if key has been invalidated */
 
-	/* the description string
-	 * - this is used to match a key against search criteria
-	 * - this should be a printable string
+	/* the key type and key description string
+	 * - the desc is used to match a key against search criteria
+	 * - it should be a printable string
 	 * - eg: for krb5 AFS, this might be "afs@REDHAT.COM"
 	 */
-	char			*description;
+	union {
+		struct keyring_index_key index_key;
+		struct {
+			struct key_type	*type;		/* type of key */
+			char		*description;
+		};
+	};
 
 	/* type specific data
 	 * - this is used by the keyring type to index the name
diff --git a/security/keys/internal.h b/security/keys/internal.h
index 490aef5ba34b..77441dd1f9d4 100644
--- a/security/keys/internal.h
+++ b/security/keys/internal.h
@@ -89,19 +89,17 @@ extern struct key_type *key_type_lookup(const char *type);
 extern void key_type_put(struct key_type *ktype);
 
 extern int __key_link_begin(struct key *keyring,
-			    const struct key_type *type,
-			    const char *description,
+			    const struct keyring_index_key *index_key,
 			    unsigned long *_prealloc);
 extern int __key_link_check_live_key(struct key *keyring, struct key *key);
 extern void __key_link(struct key *keyring, struct key *key,
 		       unsigned long *_prealloc);
 extern void __key_link_end(struct key *keyring,
-			   struct key_type *type,
+			   const struct keyring_index_key *index_key,
 			   unsigned long prealloc);
 
 extern key_ref_t __keyring_search_one(key_ref_t keyring_ref,
-				      const struct key_type *type,
-				      const char *description,
+				      const struct keyring_index_key *index_key,
 				      key_perm_t perm);
 
 extern struct key *keyring_search_instkey(struct key *keyring,
diff --git a/security/keys/key.c b/security/keys/key.c
index 8fb7c7bd4657..7e6bc396bb23 100644
--- a/security/keys/key.c
+++ b/security/keys/key.c
@@ -242,8 +242,8 @@ struct key *key_alloc(struct key_type *type, const char *desc,
 		}
 	}
 
-	desclen = strlen(desc) + 1;
-	quotalen = desclen + type->def_datalen;
+	desclen = strlen(desc);
+	quotalen = desclen + 1 + type->def_datalen;
 
 	/* get hold of the key tracking for this user */
 	user = key_user_lookup(uid);
@@ -277,7 +277,8 @@ struct key *key_alloc(struct key_type *type, const char *desc,
 		goto no_memory_2;
 
 	if (desc) {
-		key->description = kmemdup(desc, desclen, GFP_KERNEL);
+		key->index_key.desc_len = desclen;
+		key->index_key.description = kmemdup(desc, desclen + 1, GFP_KERNEL);
 		if (!key->description)
 			goto no_memory_3;
 	}
@@ -285,7 +286,7 @@ struct key *key_alloc(struct key_type *type, const char *desc,
 	atomic_set(&key->usage, 1);
 	init_rwsem(&key->sem);
 	lockdep_set_class(&key->sem, &type->lock_class);
-	key->type = type;
+	key->index_key.type = type;
 	key->user = user;
 	key->quotalen = quotalen;
 	key->datalen = type->def_datalen;
@@ -489,8 +490,7 @@ int key_instantiate_and_link(struct key *key,
 	}
 
 	if (keyring) {
-		ret = __key_link_begin(keyring, key->type, key->description,
-				       &prealloc);
+		ret = __key_link_begin(keyring, &key->index_key, &prealloc);
 		if (ret < 0)
 			goto error_free_preparse;
 	}
@@ -499,7 +499,7 @@ int key_instantiate_and_link(struct key *key,
 					 &prealloc);
 
 	if (keyring)
-		__key_link_end(keyring, key->type, prealloc);
+		__key_link_end(keyring, &key->index_key, prealloc);
 
 error_free_preparse:
 	if (key->type->preparse)
@@ -548,8 +548,7 @@ int key_reject_and_link(struct key *key,
 	ret = -EBUSY;
 
 	if (keyring)
-		link_ret = __key_link_begin(keyring, key->type,
-					    key->description, &prealloc);
+		link_ret = __key_link_begin(keyring, &key->index_key, &prealloc);
 
 	mutex_lock(&key_construction_mutex);
 
@@ -581,7 +580,7 @@ int key_reject_and_link(struct key *key,
 	mutex_unlock(&key_construction_mutex);
 
 	if (keyring)
-		__key_link_end(keyring, key->type, prealloc);
+		__key_link_end(keyring, &key->index_key, prealloc);
 
 	/* wake up anyone waiting for a key to be constructed */
 	if (awaken)
@@ -780,25 +779,27 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref,
 			       key_perm_t perm,
 			       unsigned long flags)
 {
-	unsigned long prealloc;
+	struct keyring_index_key index_key = {
+		.description	= description,
+	};
 	struct key_preparsed_payload prep;
 	const struct cred *cred = current_cred();
-	struct key_type *ktype;
+	unsigned long prealloc;
 	struct key *keyring, *key = NULL;
 	key_ref_t key_ref;
 	int ret;
 
 	/* look up the key type to see if it's one of the registered kernel
 	 * types */
-	ktype = key_type_lookup(type);
-	if (IS_ERR(ktype)) {
+	index_key.type = key_type_lookup(type);
+	if (IS_ERR(index_key.type)) {
 		key_ref = ERR_PTR(-ENODEV);
 		goto error;
 	}
 
 	key_ref = ERR_PTR(-EINVAL);
-	if (!ktype->match || !ktype->instantiate ||
-	    (!description && !ktype->preparse))
+	if (!index_key.type->match || !index_key.type->instantiate ||
+	    (!index_key.description && !index_key.type->preparse))
 		goto error_put_type;
 
 	keyring = key_ref_to_ptr(keyring_ref);
@@ -812,21 +813,22 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref,
 	memset(&prep, 0, sizeof(prep));
 	prep.data = payload;
 	prep.datalen = plen;
-	prep.quotalen = ktype->def_datalen;
-	if (ktype->preparse) {
-		ret = ktype->preparse(&prep);
+	prep.quotalen = index_key.type->def_datalen;
+	if (index_key.type->preparse) {
+		ret = index_key.type->preparse(&prep);
 		if (ret < 0) {
 			key_ref = ERR_PTR(ret);
 			goto error_put_type;
 		}
-		if (!description)
-			description = prep.description;
+		if (!index_key.description)
+			index_key.description = prep.description;
 		key_ref = ERR_PTR(-EINVAL);
-		if (!description)
+		if (!index_key.description)
 			goto error_free_prep;
 	}
+	index_key.desc_len = strlen(index_key.description);
 
-	ret = __key_link_begin(keyring, ktype, description, &prealloc);
+	ret = __key_link_begin(keyring, &index_key, &prealloc);
 	if (ret < 0) {
 		key_ref = ERR_PTR(ret);
 		goto error_free_prep;
@@ -844,9 +846,8 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref,
 	 * key of the same type and description in the destination keyring and
 	 * update that instead if possible
 	 */
-	if (ktype->update) {
-		key_ref = __keyring_search_one(keyring_ref, ktype, description,
-					       0);
+	if (index_key.type->update) {
+		key_ref = __keyring_search_one(keyring_ref, &index_key, 0);
 		if (!IS_ERR(key_ref))
 			goto found_matching_key;
 	}
@@ -856,16 +857,17 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref,
 		perm = KEY_POS_VIEW | KEY_POS_SEARCH | KEY_POS_LINK | KEY_POS_SETATTR;
 		perm |= KEY_USR_VIEW;
 
-		if (ktype->read)
+		if (index_key.type->read)
 			perm |= KEY_POS_READ;
 
-		if (ktype == &key_type_keyring || ktype->update)
+		if (index_key.type == &key_type_keyring ||
+		    index_key.type->update)
 			perm |= KEY_POS_WRITE;
 	}
 
 	/* allocate a new key */
-	key = key_alloc(ktype, description, cred->fsuid, cred->fsgid, cred,
-			perm, flags);
+	key = key_alloc(index_key.type, index_key.description,
+			cred->fsuid, cred->fsgid, cred, perm, flags);
 	if (IS_ERR(key)) {
 		key_ref = ERR_CAST(key);
 		goto error_link_end;
@@ -882,12 +884,12 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref,
 	key_ref = make_key_ref(key, is_key_possessed(keyring_ref));
 
 error_link_end:
-	__key_link_end(keyring, ktype, prealloc);
+	__key_link_end(keyring, &index_key, prealloc);
 error_free_prep:
-	if (ktype->preparse)
-		ktype->free_preparse(&prep);
+	if (index_key.type->preparse)
+		index_key.type->free_preparse(&prep);
 error_put_type:
-	key_type_put(ktype);
+	key_type_put(index_key.type);
 error:
 	return key_ref;
 
@@ -895,7 +897,7 @@ error:
 	/* we found a matching key, so we're going to try to update it
 	 * - we can drop the locks first as we have the key pinned
 	 */
-	__key_link_end(keyring, ktype, prealloc);
+	__key_link_end(keyring, &index_key, prealloc);
 
 	key_ref = __key_update(key_ref, &prep);
 	goto error_free_prep;
diff --git a/security/keys/keyring.c b/security/keys/keyring.c
index f78406372ebe..c7f59f9dd7b6 100644
--- a/security/keys/keyring.c
+++ b/security/keys/keyring.c
@@ -538,8 +538,7 @@ EXPORT_SYMBOL(keyring_search);
  * to the returned key reference.
  */
 key_ref_t __keyring_search_one(key_ref_t keyring_ref,
-			       const struct key_type *ktype,
-			       const char *description,
+			       const struct keyring_index_key *index_key,
 			       key_perm_t perm)
 {
 	struct keyring_list *klist;
@@ -558,9 +557,9 @@ key_ref_t __keyring_search_one(key_ref_t keyring_ref,
 		smp_rmb();
 		for (loop = 0; loop < nkeys ; loop++) {
 			key = rcu_dereference(klist->keys[loop]);
-			if (key->type == ktype &&
+			if (key->type == index_key->type &&
 			    (!key->type->match ||
-			     key->type->match(key, description)) &&
+			     key->type->match(key, index_key->description)) &&
 			    key_permission(make_key_ref(key, possessed),
 					   perm) == 0 &&
 			    !(key->flags & ((1 << KEY_FLAG_INVALIDATED) |
@@ -747,8 +746,8 @@ static void keyring_unlink_rcu_disposal(struct rcu_head *rcu)
 /*
  * Preallocate memory so that a key can be linked into to a keyring.
  */
-int __key_link_begin(struct key *keyring, const struct key_type *type,
-		     const char *description, unsigned long *_prealloc)
+int __key_link_begin(struct key *keyring, const struct keyring_index_key *index_key,
+		     unsigned long *_prealloc)
 	__acquires(&keyring->sem)
 	__acquires(&keyring_serialise_link_sem)
 {
@@ -759,7 +758,8 @@ int __key_link_begin(struct key *keyring, const struct key_type *type,
 	size_t size;
 	int loop, lru, ret;
 
-	kenter("%d,%s,%s,", key_serial(keyring), type->name, description);
+	kenter("%d,%s,%s,",
+	       key_serial(keyring), index_key->type->name, index_key->description);
 
 	if (keyring->type != &key_type_keyring)
 		return -ENOTDIR;
@@ -772,7 +772,7 @@ int __key_link_begin(struct key *keyring, const struct key_type *type,
 
 	/* serialise link/link calls to prevent parallel calls causing a cycle
 	 * when linking two keyring in opposite orders */
-	if (type == &key_type_keyring)
+	if (index_key->type == &key_type_keyring)
 		down_write(&keyring_serialise_link_sem);
 
 	klist = rcu_dereference_locked_keyring(keyring);
@@ -784,8 +784,8 @@ int __key_link_begin(struct key *keyring, const struct key_type *type,
 		for (loop = klist->nkeys - 1; loop >= 0; loop--) {
 			struct key *key = rcu_deref_link_locked(klist, loop,
 								keyring);
-			if (key->type == type &&
-			    strcmp(key->description, description) == 0) {
+			if (key->type == index_key->type &&
+			    strcmp(key->description, index_key->description) == 0) {
 				/* Found a match - we'll replace the link with
 				 * one to the new key.  We record the slot
 				 * position.
@@ -865,7 +865,7 @@ error_quota:
 	key_payload_reserve(keyring,
 			    keyring->datalen - KEYQUOTA_LINK_BYTES);
 error_sem:
-	if (type == &key_type_keyring)
+	if (index_key->type == &key_type_keyring)
 		up_write(&keyring_serialise_link_sem);
 error_krsem:
 	up_write(&keyring->sem);
@@ -957,16 +957,17 @@ void __key_link(struct key *keyring, struct key *key,
  *
  * Must be called with __key_link_begin() having being called.
  */
-void __key_link_end(struct key *keyring, struct key_type *type,
+void __key_link_end(struct key *keyring,
+		    const struct keyring_index_key *index_key,
 		    unsigned long prealloc)
 	__releases(&keyring->sem)
 	__releases(&keyring_serialise_link_sem)
 {
-	BUG_ON(type == NULL);
-	BUG_ON(type->name == NULL);
-	kenter("%d,%s,%lx", keyring->serial, type->name, prealloc);
+	BUG_ON(index_key->type == NULL);
+	BUG_ON(index_key->type->name == NULL);
+	kenter("%d,%s,%lx", keyring->serial, index_key->type->name, prealloc);
 
-	if (type == &key_type_keyring)
+	if (index_key->type == &key_type_keyring)
 		up_write(&keyring_serialise_link_sem);
 
 	if (prealloc) {
@@ -1007,12 +1008,12 @@ int key_link(struct key *keyring, struct key *key)
 	key_check(keyring);
 	key_check(key);
 
-	ret = __key_link_begin(keyring, key->type, key->description, &prealloc);
+	ret = __key_link_begin(keyring, &key->index_key, &prealloc);
 	if (ret == 0) {
 		ret = __key_link_check_live_key(keyring, key);
 		if (ret == 0)
 			__key_link(keyring, key, &prealloc);
-		__key_link_end(keyring, key->type, prealloc);
+		__key_link_end(keyring, &key->index_key, prealloc);
 	}
 
 	return ret;
diff --git a/security/keys/request_key.c b/security/keys/request_key.c
index 172115b38054..586cb79ee82d 100644
--- a/security/keys/request_key.c
+++ b/security/keys/request_key.c
@@ -352,6 +352,11 @@ static int construct_alloc_key(struct key_type *type,
 			       struct key_user *user,
 			       struct key **_key)
 {
+	const struct keyring_index_key index_key = {
+		.type		= type,
+		.description	= description,
+		.desc_len	= strlen(description),
+	};
 	const struct cred *cred = current_cred();
 	unsigned long prealloc;
 	struct key *key;
@@ -379,8 +384,7 @@ static int construct_alloc_key(struct key_type *type,
 	set_bit(KEY_FLAG_USER_CONSTRUCT, &key->flags);
 
 	if (dest_keyring) {
-		ret = __key_link_begin(dest_keyring, type, description,
-				       &prealloc);
+		ret = __key_link_begin(dest_keyring, &index_key, &prealloc);
 		if (ret < 0)
 			goto link_prealloc_failed;
 	}
@@ -400,7 +404,7 @@ static int construct_alloc_key(struct key_type *type,
 
 	mutex_unlock(&key_construction_mutex);
 	if (dest_keyring)
-		__key_link_end(dest_keyring, type, prealloc);
+		__key_link_end(dest_keyring, &index_key, prealloc);
 	mutex_unlock(&user->cons_lock);
 	*_key = key;
 	kleave(" = 0 [%d]", key_serial(key));
@@ -416,7 +420,7 @@ key_already_present:
 		ret = __key_link_check_live_key(dest_keyring, key);
 		if (ret == 0)
 			__key_link(dest_keyring, key, &prealloc);
-		__key_link_end(dest_keyring, type, prealloc);
+		__key_link_end(dest_keyring, &index_key, prealloc);
 		if (ret < 0)
 			goto link_check_failed;
 	}
-- 
cgit v1.2.3


From 4bdf0bc300314141e5475e145acb8b5ad846f00d Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 24 Sep 2013 10:35:15 +0100
Subject: KEYS: Introduce a search context structure

Search functions pass around a bunch of arguments, each of which gets copied
with each call.  Introduce a search context structure to hold these.

Whilst we're at it, create a search flag that indicates whether the search
should be directly to the description or whether it should iterate through all
keys looking for a non-description match.

This will be useful when keyrings use a generic data struct with generic
routines to manage their content as the search terms can just be passed
through to the iterator callback function.

Also, for future use, the data to be supplied to the match function is
separated from the description pointer in the search context.  This makes it
clear which is being supplied.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 include/linux/key-type.h         |   5 ++
 security/keys/internal.h         |  40 +++++++------
 security/keys/keyring.c          |  70 +++++++++++------------
 security/keys/proc.c             |  17 ++++--
 security/keys/process_keys.c     | 117 +++++++++++++++++++--------------------
 security/keys/request_key.c      |  56 +++++++++----------
 security/keys/request_key_auth.c |  14 +++--
 security/keys/user_defined.c     |  18 +++---
 8 files changed, 179 insertions(+), 158 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/key-type.h b/include/linux/key-type.h
index 518a53afb9ea..f58737bcb050 100644
--- a/include/linux/key-type.h
+++ b/include/linux/key-type.h
@@ -63,6 +63,11 @@ struct key_type {
 	 */
 	size_t def_datalen;
 
+	/* Default key search algorithm. */
+	unsigned def_lookup_type;
+#define KEYRING_SEARCH_LOOKUP_DIRECT	0x0000	/* Direct lookup by description. */
+#define KEYRING_SEARCH_LOOKUP_ITERATE	0x0001	/* Iterative search. */
+
 	/* vet a description */
 	int (*vet_description)(const char *description);
 
diff --git a/security/keys/internal.h b/security/keys/internal.h
index 77441dd1f9d4..f4bf938b68b4 100644
--- a/security/keys/internal.h
+++ b/security/keys/internal.h
@@ -107,23 +107,31 @@ extern struct key *keyring_search_instkey(struct key *keyring,
 
 typedef int (*key_match_func_t)(const struct key *, const void *);
 
+struct keyring_search_context {
+	struct keyring_index_key index_key;
+	const struct cred	*cred;
+	key_match_func_t	match;
+	const void		*match_data;
+	unsigned		flags;
+#define KEYRING_SEARCH_LOOKUP_TYPE	0x0001	/* [as type->def_lookup_type] */
+#define KEYRING_SEARCH_NO_STATE_CHECK	0x0002	/* Skip state checks */
+#define KEYRING_SEARCH_DO_STATE_CHECK	0x0004	/* Override NO_STATE_CHECK */
+#define KEYRING_SEARCH_NO_UPDATE_TIME	0x0008	/* Don't update times */
+#define KEYRING_SEARCH_NO_CHECK_PERM	0x0010	/* Don't check permissions */
+#define KEYRING_SEARCH_DETECT_TOO_DEEP	0x0020	/* Give an error on excessive depth */
+
+	/* Internal stuff */
+	int			skipped_ret;
+	bool			possessed;
+	key_ref_t		result;
+	struct timespec		now;
+};
+
 extern key_ref_t keyring_search_aux(key_ref_t keyring_ref,
-				    const struct cred *cred,
-				    struct key_type *type,
-				    const void *description,
-				    key_match_func_t match,
-				    bool no_state_check);
-
-extern key_ref_t search_my_process_keyrings(struct key_type *type,
-					    const void *description,
-					    key_match_func_t match,
-					    bool no_state_check,
-					    const struct cred *cred);
-extern key_ref_t search_process_keyrings(struct key_type *type,
-					 const void *description,
-					 key_match_func_t match,
-					 bool no_state_check,
-					 const struct cred *cred);
+				    struct keyring_search_context *ctx);
+
+extern key_ref_t search_my_process_keyrings(struct keyring_search_context *ctx);
+extern key_ref_t search_process_keyrings(struct keyring_search_context *ctx);
 
 extern struct key *find_keyring_by_name(const char *name, bool skip_perm_check);
 
diff --git a/security/keys/keyring.c b/security/keys/keyring.c
index c7f59f9dd7b6..b42f2d4f7f83 100644
--- a/security/keys/keyring.c
+++ b/security/keys/keyring.c
@@ -280,11 +280,7 @@ EXPORT_SYMBOL(keyring_alloc);
 /**
  * keyring_search_aux - Search a keyring tree for a key matching some criteria
  * @keyring_ref: A pointer to the keyring with possession indicator.
- * @cred: The credentials to use for permissions checks.
- * @type: The type of key to search for.
- * @description: Parameter for @match.
- * @match: Function to rule on whether or not a key is the one required.
- * @no_state_check: Don't check if a matching key is bad
+ * @ctx: The keyring search context.
  *
  * Search the supplied keyring tree for a key that matches the criteria given.
  * The root keyring and any linked keyrings must grant Search permission to the
@@ -314,11 +310,7 @@ EXPORT_SYMBOL(keyring_alloc);
  * @keyring_ref is propagated to the returned key reference.
  */
 key_ref_t keyring_search_aux(key_ref_t keyring_ref,
-			     const struct cred *cred,
-			     struct key_type *type,
-			     const void *description,
-			     key_match_func_t match,
-			     bool no_state_check)
+			     struct keyring_search_context *ctx)
 {
 	struct {
 		/* Need a separate keylist pointer for RCU purposes */
@@ -328,20 +320,18 @@ key_ref_t keyring_search_aux(key_ref_t keyring_ref,
 	} stack[KEYRING_SEARCH_MAX_DEPTH];
 
 	struct keyring_list *keylist;
-	struct timespec now;
 	unsigned long kflags;
 	struct key *keyring, *key;
 	key_ref_t key_ref;
-	bool possessed;
 	long err;
 	int sp, nkeys, kix;
 
 	keyring = key_ref_to_ptr(keyring_ref);
-	possessed = is_key_possessed(keyring_ref);
+	ctx->possessed = is_key_possessed(keyring_ref);
 	key_check(keyring);
 
 	/* top keyring must have search permission to begin the search */
-	err = key_task_permission(keyring_ref, cred, KEY_SEARCH);
+	err = key_task_permission(keyring_ref, ctx->cred, KEY_SEARCH);
 	if (err < 0) {
 		key_ref = ERR_PTR(err);
 		goto error;
@@ -353,7 +343,7 @@ key_ref_t keyring_search_aux(key_ref_t keyring_ref,
 
 	rcu_read_lock();
 
-	now = current_kernel_time();
+	ctx->now = current_kernel_time();
 	err = -EAGAIN;
 	sp = 0;
 
@@ -361,16 +351,17 @@ key_ref_t keyring_search_aux(key_ref_t keyring_ref,
 	 * are looking for */
 	key_ref = ERR_PTR(-EAGAIN);
 	kflags = keyring->flags;
-	if (keyring->type == type && match(keyring, description)) {
+	if (keyring->type == ctx->index_key.type &&
+	    ctx->match(keyring, ctx->match_data)) {
 		key = keyring;
-		if (no_state_check)
+		if (ctx->flags & KEYRING_SEARCH_NO_STATE_CHECK)
 			goto found;
 
 		/* check it isn't negative and hasn't expired or been
 		 * revoked */
 		if (kflags & (1 << KEY_FLAG_REVOKED))
 			goto error_2;
-		if (key->expiry && now.tv_sec >= key->expiry)
+		if (key->expiry && ctx->now.tv_sec >= key->expiry)
 			goto error_2;
 		key_ref = ERR_PTR(key->type_data.reject_error);
 		if (kflags & (1 << KEY_FLAG_NEGATIVE))
@@ -384,7 +375,7 @@ key_ref_t keyring_search_aux(key_ref_t keyring_ref,
 	if (kflags & ((1 << KEY_FLAG_INVALIDATED) |
 		      (1 << KEY_FLAG_REVOKED) |
 		      (1 << KEY_FLAG_NEGATIVE)) ||
-	    (keyring->expiry && now.tv_sec >= keyring->expiry))
+	    (keyring->expiry && ctx->now.tv_sec >= keyring->expiry))
 		goto error_2;
 
 	/* start processing a new keyring */
@@ -406,29 +397,29 @@ descend:
 		kflags = key->flags;
 
 		/* ignore keys not of this type */
-		if (key->type != type)
+		if (key->type != ctx->index_key.type)
 			continue;
 
 		/* skip invalidated, revoked and expired keys */
-		if (!no_state_check) {
+		if (!(ctx->flags & KEYRING_SEARCH_NO_STATE_CHECK)) {
 			if (kflags & ((1 << KEY_FLAG_INVALIDATED) |
 				      (1 << KEY_FLAG_REVOKED)))
 				continue;
 
-			if (key->expiry && now.tv_sec >= key->expiry)
+			if (key->expiry && ctx->now.tv_sec >= key->expiry)
 				continue;
 		}
 
 		/* keys that don't match */
-		if (!match(key, description))
+		if (!ctx->match(key, ctx->match_data))
 			continue;
 
 		/* key must have search permissions */
-		if (key_task_permission(make_key_ref(key, possessed),
-					cred, KEY_SEARCH) < 0)
+		if (key_task_permission(make_key_ref(key, ctx->possessed),
+					ctx->cred, KEY_SEARCH) < 0)
 			continue;
 
-		if (no_state_check)
+		if (ctx->flags & KEYRING_SEARCH_NO_STATE_CHECK)
 			goto found;
 
 		/* we set a different error code if we pass a negative key */
@@ -456,8 +447,8 @@ ascend:
 		if (sp >= KEYRING_SEARCH_MAX_DEPTH)
 			continue;
 
-		if (key_task_permission(make_key_ref(key, possessed),
-					cred, KEY_SEARCH) < 0)
+		if (key_task_permission(make_key_ref(key, ctx->possessed),
+					ctx->cred, KEY_SEARCH) < 0)
 			continue;
 
 		/* stack the current position */
@@ -489,12 +480,12 @@ not_this_keyring:
 	/* we found a viable match */
 found:
 	atomic_inc(&key->usage);
-	key->last_used_at = now.tv_sec;
-	keyring->last_used_at = now.tv_sec;
+	key->last_used_at = ctx->now.tv_sec;
+	keyring->last_used_at = ctx->now.tv_sec;
 	while (sp > 0)
-		stack[--sp].keyring->last_used_at = now.tv_sec;
+		stack[--sp].keyring->last_used_at = ctx->now.tv_sec;
 	key_check(key);
-	key_ref = make_key_ref(key, possessed);
+	key_ref = make_key_ref(key, ctx->possessed);
 error_2:
 	rcu_read_unlock();
 error:
@@ -514,11 +505,20 @@ key_ref_t keyring_search(key_ref_t keyring,
 			 struct key_type *type,
 			 const char *description)
 {
-	if (!type->match)
+	struct keyring_search_context ctx = {
+		.index_key.type		= type,
+		.index_key.description	= description,
+		.cred			= current_cred(),
+		.match			= type->match,
+		.match_data		= description,
+		.flags			= (type->def_lookup_type |
+					   KEYRING_SEARCH_DO_STATE_CHECK),
+	};
+
+	if (!ctx.match)
 		return ERR_PTR(-ENOKEY);
 
-	return keyring_search_aux(keyring, current->cred,
-				  type, description, type->match, false);
+	return keyring_search_aux(keyring, &ctx);
 }
 EXPORT_SYMBOL(keyring_search);
 
diff --git a/security/keys/proc.c b/security/keys/proc.c
index 217b6855e815..88e9a466940f 100644
--- a/security/keys/proc.c
+++ b/security/keys/proc.c
@@ -182,7 +182,6 @@ static void proc_keys_stop(struct seq_file *p, void *v)
 
 static int proc_keys_show(struct seq_file *m, void *v)
 {
-	const struct cred *cred = current_cred();
 	struct rb_node *_p = v;
 	struct key *key = rb_entry(_p, struct key, serial_node);
 	struct timespec now;
@@ -191,15 +190,23 @@ static int proc_keys_show(struct seq_file *m, void *v)
 	char xbuf[12];
 	int rc;
 
+	struct keyring_search_context ctx = {
+		.index_key.type		= key->type,
+		.index_key.description	= key->description,
+		.cred			= current_cred(),
+		.match			= lookup_user_key_possessed,
+		.match_data		= key,
+		.flags			= (KEYRING_SEARCH_NO_STATE_CHECK |
+					   KEYRING_SEARCH_LOOKUP_DIRECT),
+	};
+
 	key_ref = make_key_ref(key, 0);
 
 	/* determine if the key is possessed by this process (a test we can
 	 * skip if the key does not indicate the possessor can view it
 	 */
 	if (key->perm & KEY_POS_VIEW) {
-		skey_ref = search_my_process_keyrings(key->type, key,
-						      lookup_user_key_possessed,
-						      true, cred);
+		skey_ref = search_my_process_keyrings(&ctx);
 		if (!IS_ERR(skey_ref)) {
 			key_ref_put(skey_ref);
 			key_ref = make_key_ref(key, 1);
@@ -211,7 +218,7 @@ static int proc_keys_show(struct seq_file *m, void *v)
 	 * - the caller holds a spinlock, and thus the RCU read lock, making our
 	 *   access to __current_cred() safe
 	 */
-	rc = key_task_permission(key_ref, cred, KEY_VIEW);
+	rc = key_task_permission(key_ref, ctx.cred, KEY_VIEW);
 	if (rc < 0)
 		return 0;
 
diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c
index a3410d605849..e68a3e0e7aa0 100644
--- a/security/keys/process_keys.c
+++ b/security/keys/process_keys.c
@@ -319,11 +319,7 @@ void key_fsgid_changed(struct task_struct *tsk)
  * In the case of a successful return, the possession attribute is set on the
  * returned key reference.
  */
-key_ref_t search_my_process_keyrings(struct key_type *type,
-				     const void *description,
-				     key_match_func_t match,
-				     bool no_state_check,
-				     const struct cred *cred)
+key_ref_t search_my_process_keyrings(struct keyring_search_context *ctx)
 {
 	key_ref_t key_ref, ret, err;
 
@@ -339,10 +335,9 @@ key_ref_t search_my_process_keyrings(struct key_type *type,
 	err = ERR_PTR(-EAGAIN);
 
 	/* search the thread keyring first */
-	if (cred->thread_keyring) {
+	if (ctx->cred->thread_keyring) {
 		key_ref = keyring_search_aux(
-			make_key_ref(cred->thread_keyring, 1),
-			cred, type, description, match, no_state_check);
+			make_key_ref(ctx->cred->thread_keyring, 1), ctx);
 		if (!IS_ERR(key_ref))
 			goto found;
 
@@ -358,10 +353,9 @@ key_ref_t search_my_process_keyrings(struct key_type *type,
 	}
 
 	/* search the process keyring second */
-	if (cred->process_keyring) {
+	if (ctx->cred->process_keyring) {
 		key_ref = keyring_search_aux(
-			make_key_ref(cred->process_keyring, 1),
-			cred, type, description, match, no_state_check);
+			make_key_ref(ctx->cred->process_keyring, 1), ctx);
 		if (!IS_ERR(key_ref))
 			goto found;
 
@@ -379,11 +373,11 @@ key_ref_t search_my_process_keyrings(struct key_type *type,
 	}
 
 	/* search the session keyring */
-	if (cred->session_keyring) {
+	if (ctx->cred->session_keyring) {
 		rcu_read_lock();
 		key_ref = keyring_search_aux(
-			make_key_ref(rcu_dereference(cred->session_keyring), 1),
-			cred, type, description, match, no_state_check);
+			make_key_ref(rcu_dereference(ctx->cred->session_keyring), 1),
+			ctx);
 		rcu_read_unlock();
 
 		if (!IS_ERR(key_ref))
@@ -402,10 +396,10 @@ key_ref_t search_my_process_keyrings(struct key_type *type,
 		}
 	}
 	/* or search the user-session keyring */
-	else if (cred->user->session_keyring) {
+	else if (ctx->cred->user->session_keyring) {
 		key_ref = keyring_search_aux(
-			make_key_ref(cred->user->session_keyring, 1),
-			cred, type, description, match, no_state_check);
+			make_key_ref(ctx->cred->user->session_keyring, 1),
+			ctx);
 		if (!IS_ERR(key_ref))
 			goto found;
 
@@ -437,19 +431,14 @@ found:
  *
  * Return same as search_my_process_keyrings().
  */
-key_ref_t search_process_keyrings(struct key_type *type,
-				  const void *description,
-				  key_match_func_t match,
-				  bool no_state_check,
-				  const struct cred *cred)
+key_ref_t search_process_keyrings(struct keyring_search_context *ctx)
 {
 	struct request_key_auth *rka;
 	key_ref_t key_ref, ret = ERR_PTR(-EACCES), err;
 
 	might_sleep();
 
-	key_ref = search_my_process_keyrings(type, description, match,
-					     no_state_check, cred);
+	key_ref = search_my_process_keyrings(ctx);
 	if (!IS_ERR(key_ref))
 		goto found;
 	err = key_ref;
@@ -458,19 +447,21 @@ key_ref_t search_process_keyrings(struct key_type *type,
 	 * search the keyrings of the process mentioned there
 	 * - we don't permit access to request_key auth keys via this method
 	 */
-	if (cred->request_key_auth &&
-	    cred == current_cred() &&
-	    type != &key_type_request_key_auth
+	if (ctx->cred->request_key_auth &&
+	    ctx->cred == current_cred() &&
+	    ctx->index_key.type != &key_type_request_key_auth
 	    ) {
+		const struct cred *cred = ctx->cred;
+
 		/* defend against the auth key being revoked */
 		down_read(&cred->request_key_auth->sem);
 
-		if (key_validate(cred->request_key_auth) == 0) {
-			rka = cred->request_key_auth->payload.data;
+		if (key_validate(ctx->cred->request_key_auth) == 0) {
+			rka = ctx->cred->request_key_auth->payload.data;
 
-			key_ref = search_process_keyrings(type, description,
-							  match, no_state_check,
-							  rka->cred);
+			ctx->cred = rka->cred;
+			key_ref = search_process_keyrings(ctx);
+			ctx->cred = cred;
 
 			up_read(&cred->request_key_auth->sem);
 
@@ -524,19 +515,23 @@ int lookup_user_key_possessed(const struct key *key, const void *target)
 key_ref_t lookup_user_key(key_serial_t id, unsigned long lflags,
 			  key_perm_t perm)
 {
+	struct keyring_search_context ctx = {
+		.match	= lookup_user_key_possessed,
+		.flags	= (KEYRING_SEARCH_NO_STATE_CHECK |
+			   KEYRING_SEARCH_LOOKUP_DIRECT),
+	};
 	struct request_key_auth *rka;
-	const struct cred *cred;
 	struct key *key;
 	key_ref_t key_ref, skey_ref;
 	int ret;
 
 try_again:
-	cred = get_current_cred();
+	ctx.cred = get_current_cred();
 	key_ref = ERR_PTR(-ENOKEY);
 
 	switch (id) {
 	case KEY_SPEC_THREAD_KEYRING:
-		if (!cred->thread_keyring) {
+		if (!ctx.cred->thread_keyring) {
 			if (!(lflags & KEY_LOOKUP_CREATE))
 				goto error;
 
@@ -548,13 +543,13 @@ try_again:
 			goto reget_creds;
 		}
 
-		key = cred->thread_keyring;
+		key = ctx.cred->thread_keyring;
 		atomic_inc(&key->usage);
 		key_ref = make_key_ref(key, 1);
 		break;
 
 	case KEY_SPEC_PROCESS_KEYRING:
-		if (!cred->process_keyring) {
+		if (!ctx.cred->process_keyring) {
 			if (!(lflags & KEY_LOOKUP_CREATE))
 				goto error;
 
@@ -566,13 +561,13 @@ try_again:
 			goto reget_creds;
 		}
 
-		key = cred->process_keyring;
+		key = ctx.cred->process_keyring;
 		atomic_inc(&key->usage);
 		key_ref = make_key_ref(key, 1);
 		break;
 
 	case KEY_SPEC_SESSION_KEYRING:
-		if (!cred->session_keyring) {
+		if (!ctx.cred->session_keyring) {
 			/* always install a session keyring upon access if one
 			 * doesn't exist yet */
 			ret = install_user_keyrings();
@@ -582,13 +577,13 @@ try_again:
 				ret = join_session_keyring(NULL);
 			else
 				ret = install_session_keyring(
-					cred->user->session_keyring);
+					ctx.cred->user->session_keyring);
 
 			if (ret < 0)
 				goto error;
 			goto reget_creds;
-		} else if (cred->session_keyring ==
-			   cred->user->session_keyring &&
+		} else if (ctx.cred->session_keyring ==
+			   ctx.cred->user->session_keyring &&
 			   lflags & KEY_LOOKUP_CREATE) {
 			ret = join_session_keyring(NULL);
 			if (ret < 0)
@@ -597,32 +592,32 @@ try_again:
 		}
 
 		rcu_read_lock();
-		key = rcu_dereference(cred->session_keyring);
+		key = rcu_dereference(ctx.cred->session_keyring);
 		atomic_inc(&key->usage);
 		rcu_read_unlock();
 		key_ref = make_key_ref(key, 1);
 		break;
 
 	case KEY_SPEC_USER_KEYRING:
-		if (!cred->user->uid_keyring) {
+		if (!ctx.cred->user->uid_keyring) {
 			ret = install_user_keyrings();
 			if (ret < 0)
 				goto error;
 		}
 
-		key = cred->user->uid_keyring;
+		key = ctx.cred->user->uid_keyring;
 		atomic_inc(&key->usage);
 		key_ref = make_key_ref(key, 1);
 		break;
 
 	case KEY_SPEC_USER_SESSION_KEYRING:
-		if (!cred->user->session_keyring) {
+		if (!ctx.cred->user->session_keyring) {
 			ret = install_user_keyrings();
 			if (ret < 0)
 				goto error;
 		}
 
-		key = cred->user->session_keyring;
+		key = ctx.cred->user->session_keyring;
 		atomic_inc(&key->usage);
 		key_ref = make_key_ref(key, 1);
 		break;
@@ -633,7 +628,7 @@ try_again:
 		goto error;
 
 	case KEY_SPEC_REQKEY_AUTH_KEY:
-		key = cred->request_key_auth;
+		key = ctx.cred->request_key_auth;
 		if (!key)
 			goto error;
 
@@ -642,20 +637,20 @@ try_again:
 		break;
 
 	case KEY_SPEC_REQUESTOR_KEYRING:
-		if (!cred->request_key_auth)
+		if (!ctx.cred->request_key_auth)
 			goto error;
 
-		down_read(&cred->request_key_auth->sem);
+		down_read(&ctx.cred->request_key_auth->sem);
 		if (test_bit(KEY_FLAG_REVOKED,
-			     &cred->request_key_auth->flags)) {
+			     &ctx.cred->request_key_auth->flags)) {
 			key_ref = ERR_PTR(-EKEYREVOKED);
 			key = NULL;
 		} else {
-			rka = cred->request_key_auth->payload.data;
+			rka = ctx.cred->request_key_auth->payload.data;
 			key = rka->dest_keyring;
 			atomic_inc(&key->usage);
 		}
-		up_read(&cred->request_key_auth->sem);
+		up_read(&ctx.cred->request_key_auth->sem);
 		if (!key)
 			goto error;
 		key_ref = make_key_ref(key, 1);
@@ -675,9 +670,13 @@ try_again:
 		key_ref = make_key_ref(key, 0);
 
 		/* check to see if we possess the key */
-		skey_ref = search_process_keyrings(key->type, key,
-						   lookup_user_key_possessed,
-						   true, cred);
+		ctx.index_key.type		= key->type;
+		ctx.index_key.description	= key->description;
+		ctx.index_key.desc_len		= strlen(key->description);
+		ctx.match_data			= key;
+		kdebug("check possessed");
+		skey_ref = search_process_keyrings(&ctx);
+		kdebug("possessed=%p", skey_ref);
 
 		if (!IS_ERR(skey_ref)) {
 			key_put(key);
@@ -717,14 +716,14 @@ try_again:
 		goto invalid_key;
 
 	/* check the permissions */
-	ret = key_task_permission(key_ref, cred, perm);
+	ret = key_task_permission(key_ref, ctx.cred, perm);
 	if (ret < 0)
 		goto invalid_key;
 
 	key->last_used_at = current_kernel_time().tv_sec;
 
 error:
-	put_cred(cred);
+	put_cred(ctx.cred);
 	return key_ref;
 
 invalid_key:
@@ -735,7 +734,7 @@ invalid_key:
 	/* if we attempted to install a keyring, then it may have caused new
 	 * creds to be installed */
 reget_creds:
-	put_cred(cred);
+	put_cred(ctx.cred);
 	goto try_again;
 }
 
diff --git a/security/keys/request_key.c b/security/keys/request_key.c
index 586cb79ee82d..ab75df4745af 100644
--- a/security/keys/request_key.c
+++ b/security/keys/request_key.c
@@ -345,38 +345,34 @@ static void construct_get_dest_keyring(struct key **_dest_keyring)
  * May return a key that's already under construction instead if there was a
  * race between two thread calling request_key().
  */
-static int construct_alloc_key(struct key_type *type,
-			       const char *description,
+static int construct_alloc_key(struct keyring_search_context *ctx,
 			       struct key *dest_keyring,
 			       unsigned long flags,
 			       struct key_user *user,
 			       struct key **_key)
 {
-	const struct keyring_index_key index_key = {
-		.type		= type,
-		.description	= description,
-		.desc_len	= strlen(description),
-	};
-	const struct cred *cred = current_cred();
 	unsigned long prealloc;
 	struct key *key;
 	key_perm_t perm;
 	key_ref_t key_ref;
 	int ret;
 
-	kenter("%s,%s,,,", type->name, description);
+	kenter("%s,%s,,,",
+	       ctx->index_key.type->name, ctx->index_key.description);
 
 	*_key = NULL;
 	mutex_lock(&user->cons_lock);
 
 	perm = KEY_POS_VIEW | KEY_POS_SEARCH | KEY_POS_LINK | KEY_POS_SETATTR;
 	perm |= KEY_USR_VIEW;
-	if (type->read)
+	if (ctx->index_key.type->read)
 		perm |= KEY_POS_READ;
-	if (type == &key_type_keyring || type->update)
+	if (ctx->index_key.type == &key_type_keyring ||
+	    ctx->index_key.type->update)
 		perm |= KEY_POS_WRITE;
 
-	key = key_alloc(type, description, cred->fsuid, cred->fsgid, cred,
+	key = key_alloc(ctx->index_key.type, ctx->index_key.description,
+			ctx->cred->fsuid, ctx->cred->fsgid, ctx->cred,
 			perm, flags);
 	if (IS_ERR(key))
 		goto alloc_failed;
@@ -384,7 +380,7 @@ static int construct_alloc_key(struct key_type *type,
 	set_bit(KEY_FLAG_USER_CONSTRUCT, &key->flags);
 
 	if (dest_keyring) {
-		ret = __key_link_begin(dest_keyring, &index_key, &prealloc);
+		ret = __key_link_begin(dest_keyring, &ctx->index_key, &prealloc);
 		if (ret < 0)
 			goto link_prealloc_failed;
 	}
@@ -394,8 +390,7 @@ static int construct_alloc_key(struct key_type *type,
 	 * waited for locks */
 	mutex_lock(&key_construction_mutex);
 
-	key_ref = search_process_keyrings(type, description, type->match,
-					  false, cred);
+	key_ref = search_process_keyrings(ctx);
 	if (!IS_ERR(key_ref))
 		goto key_already_present;
 
@@ -404,7 +399,7 @@ static int construct_alloc_key(struct key_type *type,
 
 	mutex_unlock(&key_construction_mutex);
 	if (dest_keyring)
-		__key_link_end(dest_keyring, &index_key, prealloc);
+		__key_link_end(dest_keyring, &ctx->index_key, prealloc);
 	mutex_unlock(&user->cons_lock);
 	*_key = key;
 	kleave(" = 0 [%d]", key_serial(key));
@@ -420,7 +415,7 @@ key_already_present:
 		ret = __key_link_check_live_key(dest_keyring, key);
 		if (ret == 0)
 			__key_link(dest_keyring, key, &prealloc);
-		__key_link_end(dest_keyring, &index_key, prealloc);
+		__key_link_end(dest_keyring, &ctx->index_key, prealloc);
 		if (ret < 0)
 			goto link_check_failed;
 	}
@@ -449,8 +444,7 @@ alloc_failed:
 /*
  * Commence key construction.
  */
-static struct key *construct_key_and_link(struct key_type *type,
-					  const char *description,
+static struct key *construct_key_and_link(struct keyring_search_context *ctx,
 					  const char *callout_info,
 					  size_t callout_len,
 					  void *aux,
@@ -469,8 +463,7 @@ static struct key *construct_key_and_link(struct key_type *type,
 
 	construct_get_dest_keyring(&dest_keyring);
 
-	ret = construct_alloc_key(type, description, dest_keyring, flags, user,
-				  &key);
+	ret = construct_alloc_key(ctx, dest_keyring, flags, user, &key);
 	key_user_put(user);
 
 	if (ret == 0) {
@@ -534,18 +527,24 @@ struct key *request_key_and_link(struct key_type *type,
 				 struct key *dest_keyring,
 				 unsigned long flags)
 {
-	const struct cred *cred = current_cred();
+	struct keyring_search_context ctx = {
+		.index_key.type		= type,
+		.index_key.description	= description,
+		.cred			= current_cred(),
+		.match			= type->match,
+		.match_data		= description,
+		.flags			= KEYRING_SEARCH_LOOKUP_DIRECT,
+	};
 	struct key *key;
 	key_ref_t key_ref;
 	int ret;
 
 	kenter("%s,%s,%p,%zu,%p,%p,%lx",
-	       type->name, description, callout_info, callout_len, aux,
-	       dest_keyring, flags);
+	       ctx.index_key.type->name, ctx.index_key.description,
+	       callout_info, callout_len, aux, dest_keyring, flags);
 
 	/* search all the process keyrings for a key */
-	key_ref = search_process_keyrings(type, description, type->match,
-					  false, cred);
+	key_ref = search_process_keyrings(&ctx);
 
 	if (!IS_ERR(key_ref)) {
 		key = key_ref_to_ptr(key_ref);
@@ -568,9 +567,8 @@ struct key *request_key_and_link(struct key_type *type,
 		if (!callout_info)
 			goto error;
 
-		key = construct_key_and_link(type, description, callout_info,
-					     callout_len, aux, dest_keyring,
-					     flags);
+		key = construct_key_and_link(&ctx, callout_info, callout_len,
+					     aux, dest_keyring, flags);
 	}
 
 error:
diff --git a/security/keys/request_key_auth.c b/security/keys/request_key_auth.c
index 92077de555df..8d09852854ca 100644
--- a/security/keys/request_key_auth.c
+++ b/security/keys/request_key_auth.c
@@ -239,15 +239,17 @@ static int key_get_instantiation_authkey_match(const struct key *key,
  */
 struct key *key_get_instantiation_authkey(key_serial_t target_id)
 {
-	const struct cred *cred = current_cred();
+	struct keyring_search_context ctx = {
+		.index_key.type		= &key_type_request_key_auth,
+		.cred			= current_cred(),
+		.match			= key_get_instantiation_authkey_match,
+		.match_data		= (void *)(unsigned long)target_id,
+		.flags			= KEYRING_SEARCH_LOOKUP_DIRECT,
+	};
 	struct key *authkey;
 	key_ref_t authkey_ref;
 
-	authkey_ref = search_process_keyrings(
-		&key_type_request_key_auth,
-		(void *) (unsigned long) target_id,
-		key_get_instantiation_authkey_match,
-		false, cred);
+	authkey_ref = search_process_keyrings(&ctx);
 
 	if (IS_ERR(authkey_ref)) {
 		authkey = ERR_CAST(authkey_ref);
diff --git a/security/keys/user_defined.c b/security/keys/user_defined.c
index 55dc88939185..faa2caeb593f 100644
--- a/security/keys/user_defined.c
+++ b/security/keys/user_defined.c
@@ -25,14 +25,15 @@ static int logon_vet_description(const char *desc);
  * arbitrary blob of data as the payload
  */
 struct key_type key_type_user = {
-	.name		= "user",
-	.instantiate	= user_instantiate,
-	.update		= user_update,
-	.match		= user_match,
-	.revoke		= user_revoke,
-	.destroy	= user_destroy,
-	.describe	= user_describe,
-	.read		= user_read,
+	.name			= "user",
+	.def_lookup_type	= KEYRING_SEARCH_LOOKUP_DIRECT,
+	.instantiate		= user_instantiate,
+	.update			= user_update,
+	.match			= user_match,
+	.revoke			= user_revoke,
+	.destroy		= user_destroy,
+	.describe		= user_describe,
+	.read			= user_read,
 };
 
 EXPORT_SYMBOL_GPL(key_type_user);
@@ -45,6 +46,7 @@ EXPORT_SYMBOL_GPL(key_type_user);
  */
 struct key_type key_type_logon = {
 	.name			= "logon",
+	.def_lookup_type	= KEYRING_SEARCH_LOOKUP_DIRECT,
 	.instantiate		= user_instantiate,
 	.update			= user_update,
 	.match			= user_match,
-- 
cgit v1.2.3


From ccc3e6d9c9aea07a0b60b2b0bfc5b05a704b66d5 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 24 Sep 2013 10:35:16 +0100
Subject: KEYS: Define a __key_get() wrapper to use rather than atomic_inc()

Define a __key_get() wrapper to use rather than atomic_inc() on the key usage
count as this makes it easier to hook in refcount error debugging.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 Documentation/security/keys.txt | 13 ++++++++-----
 include/linux/key.h             | 10 +++++++---
 security/keys/key.c             |  2 +-
 security/keys/keyring.c         |  6 +++---
 security/keys/process_keys.c    | 16 ++++++++--------
 5 files changed, 27 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/security/keys.txt b/Documentation/security/keys.txt
index 9ede67084f0b..a4c33f1a7c6d 100644
--- a/Documentation/security/keys.txt
+++ b/Documentation/security/keys.txt
@@ -960,14 +960,17 @@ payload contents" for more information.
     the argument will not be parsed.
 
 
-(*) Extra references can be made to a key by calling the following function:
+(*) Extra references can be made to a key by calling one of the following
+    functions:
 
+	struct key *__key_get(struct key *key);
 	struct key *key_get(struct key *key);
 
-    These need to be disposed of by calling key_put() when they've been
-    finished with. The key pointer passed in will be returned. If the pointer
-    is NULL or CONFIG_KEYS is not set then the key will not be dereferenced and
-    no increment will take place.
+    Keys so references will need to be disposed of by calling key_put() when
+    they've been finished with.  The key pointer passed in will be returned.
+
+    In the case of key_get(), if the pointer is NULL or CONFIG_KEYS is not set
+    then the key will not be dereferenced and no increment will take place.
 
 
 (*) A key's serial number can be obtained by calling:
diff --git a/include/linux/key.h b/include/linux/key.h
index d573e820a23d..ef596c7af585 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -219,13 +219,17 @@ extern void key_revoke(struct key *key);
 extern void key_invalidate(struct key *key);
 extern void key_put(struct key *key);
 
-static inline struct key *key_get(struct key *key)
+static inline struct key *__key_get(struct key *key)
 {
-	if (key)
-		atomic_inc(&key->usage);
+	atomic_inc(&key->usage);
 	return key;
 }
 
+static inline struct key *key_get(struct key *key)
+{
+	return key ? __key_get(key) : key;
+}
+
 static inline void key_ref_put(key_ref_t key_ref)
 {
 	key_put(key_ref_to_ptr(key_ref));
diff --git a/security/keys/key.c b/security/keys/key.c
index 7e6bc396bb23..1e23cc288106 100644
--- a/security/keys/key.c
+++ b/security/keys/key.c
@@ -644,7 +644,7 @@ found:
 	/* this races with key_put(), but that doesn't matter since key_put()
 	 * doesn't actually change the key
 	 */
-	atomic_inc(&key->usage);
+	__key_get(key);
 
 error:
 	spin_unlock(&key_serial_lock);
diff --git a/security/keys/keyring.c b/security/keys/keyring.c
index b42f2d4f7f83..87eff32b53f4 100644
--- a/security/keys/keyring.c
+++ b/security/keys/keyring.c
@@ -479,7 +479,7 @@ not_this_keyring:
 
 	/* we found a viable match */
 found:
-	atomic_inc(&key->usage);
+	__key_get(key);
 	key->last_used_at = ctx->now.tv_sec;
 	keyring->last_used_at = ctx->now.tv_sec;
 	while (sp > 0)
@@ -573,7 +573,7 @@ key_ref_t __keyring_search_one(key_ref_t keyring_ref,
 	return ERR_PTR(-ENOKEY);
 
 found:
-	atomic_inc(&key->usage);
+	__key_get(key);
 	keyring->last_used_at = key->last_used_at =
 		current_kernel_time().tv_sec;
 	rcu_read_unlock();
@@ -909,7 +909,7 @@ void __key_link(struct key *keyring, struct key *key,
 
 	klist = rcu_dereference_locked_keyring(keyring);
 
-	atomic_inc(&key->usage);
+	__key_get(key);
 	keyring->last_used_at = key->last_used_at =
 		current_kernel_time().tv_sec;
 
diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c
index e68a3e0e7aa0..68548ea6fe01 100644
--- a/security/keys/process_keys.c
+++ b/security/keys/process_keys.c
@@ -235,7 +235,7 @@ int install_session_keyring_to_cred(struct cred *cred, struct key *keyring)
 		if (IS_ERR(keyring))
 			return PTR_ERR(keyring);
 	} else {
-		atomic_inc(&keyring->usage);
+		__key_get(keyring);
 	}
 
 	/* install the keyring */
@@ -544,7 +544,7 @@ try_again:
 		}
 
 		key = ctx.cred->thread_keyring;
-		atomic_inc(&key->usage);
+		__key_get(key);
 		key_ref = make_key_ref(key, 1);
 		break;
 
@@ -562,7 +562,7 @@ try_again:
 		}
 
 		key = ctx.cred->process_keyring;
-		atomic_inc(&key->usage);
+		__key_get(key);
 		key_ref = make_key_ref(key, 1);
 		break;
 
@@ -593,7 +593,7 @@ try_again:
 
 		rcu_read_lock();
 		key = rcu_dereference(ctx.cred->session_keyring);
-		atomic_inc(&key->usage);
+		__key_get(key);
 		rcu_read_unlock();
 		key_ref = make_key_ref(key, 1);
 		break;
@@ -606,7 +606,7 @@ try_again:
 		}
 
 		key = ctx.cred->user->uid_keyring;
-		atomic_inc(&key->usage);
+		__key_get(key);
 		key_ref = make_key_ref(key, 1);
 		break;
 
@@ -618,7 +618,7 @@ try_again:
 		}
 
 		key = ctx.cred->user->session_keyring;
-		atomic_inc(&key->usage);
+		__key_get(key);
 		key_ref = make_key_ref(key, 1);
 		break;
 
@@ -632,7 +632,7 @@ try_again:
 		if (!key)
 			goto error;
 
-		atomic_inc(&key->usage);
+		__key_get(key);
 		key_ref = make_key_ref(key, 1);
 		break;
 
@@ -648,7 +648,7 @@ try_again:
 		} else {
 			rka = ctx.cred->request_key_auth->payload.data;
 			key = rka->dest_keyring;
-			atomic_inc(&key->usage);
+			__key_get(key);
 		}
 		up_read(&ctx.cred->request_key_auth->sem);
 		if (!key)
-- 
cgit v1.2.3


From 3cb989501c2688cacbb7dc4b0d353faf838f53a1 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 24 Sep 2013 10:35:17 +0100
Subject: Add a generic associative array implementation.

Add a generic associative array implementation that can be used as the
container for keyrings, thereby massively increasing the capacity available
whilst also speeding up searching in keyrings that contain a lot of keys.

This may also be useful in FS-Cache for tracking cookies.

Documentation is added into Documentation/associative_array.txt

Some of the properties of the implementation are:

 (1) Objects are opaque pointers.  The implementation does not care where they
     point (if anywhere) or what they point to (if anything).

     [!] NOTE: Pointers to objects _must_ be zero in the two least significant
     	       bits.

 (2) Objects do not need to contain linkage blocks for use by the array.  This
     permits an object to be located in multiple arrays simultaneously.
     Rather, the array is made up of metadata blocks that point to objects.

 (3) Objects are labelled as being one of two types (the type is a bool value).
     This information is stored in the array, but has no consequence to the
     array itself or its algorithms.

 (4) Objects require index keys to locate them within the array.

 (5) Index keys must be unique.  Inserting an object with the same key as one
     already in the array will replace the old object.

 (6) Index keys can be of any length and can be of different lengths.

 (7) Index keys should encode the length early on, before any variation due to
     length is seen.

 (8) Index keys can include a hash to scatter objects throughout the array.

 (9) The array can iterated over.  The objects will not necessarily come out in
     key order.

(10) The array can be iterated whilst it is being modified, provided the RCU
     readlock is being held by the iterator.  Note, however, under these
     circumstances, some objects may be seen more than once.  If this is a
     problem, the iterator should lock against modification.  Objects will not
     be missed, however, unless deleted.

(11) Objects in the array can be looked up by means of their index key.

(12) Objects can be looked up whilst the array is being modified, provided the
     RCU readlock is being held by the thread doing the look up.

The implementation uses a tree of 16-pointer nodes internally that are indexed
on each level by nibbles from the index key.  To improve memory efficiency,
shortcuts can be emplaced to skip over what would otherwise be a series of
single-occupancy nodes.  Further, nodes pack leaf object pointers into spare
space in the node rather than making an extra branch until as such time an
object needs to be added to a full node.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 Documentation/assoc_array.txt    |  574 +++++++++++++
 include/linux/assoc_array.h      |   92 ++
 include/linux/assoc_array_priv.h |  182 ++++
 lib/Kconfig                      |   14 +
 lib/Makefile                     |    1 +
 lib/assoc_array.c                | 1745 ++++++++++++++++++++++++++++++++++++++
 6 files changed, 2608 insertions(+)
 create mode 100644 Documentation/assoc_array.txt
 create mode 100644 include/linux/assoc_array.h
 create mode 100644 include/linux/assoc_array_priv.h
 create mode 100644 lib/assoc_array.c

(limited to 'include/linux')

diff --git a/Documentation/assoc_array.txt b/Documentation/assoc_array.txt
new file mode 100644
index 000000000000..f4faec0f66e4
--- /dev/null
+++ b/Documentation/assoc_array.txt
@@ -0,0 +1,574 @@
+		   ========================================
+		   GENERIC ASSOCIATIVE ARRAY IMPLEMENTATION
+		   ========================================
+
+Contents:
+
+ - Overview.
+
+ - The public API.
+   - Edit script.
+   - Operations table.
+   - Manipulation functions.
+   - Access functions.
+   - Index key form.
+
+ - Internal workings.
+   - Basic internal tree layout.
+   - Shortcuts.
+   - Splitting and collapsing nodes.
+   - Non-recursive iteration.
+   - Simultaneous alteration and iteration.
+
+
+========
+OVERVIEW
+========
+
+This associative array implementation is an object container with the following
+properties:
+
+ (1) Objects are opaque pointers.  The implementation does not care where they
+     point (if anywhere) or what they point to (if anything).
+
+     [!] NOTE: Pointers to objects _must_ be zero in the least significant bit.
+
+ (2) Objects do not need to contain linkage blocks for use by the array.  This
+     permits an object to be located in multiple arrays simultaneously.
+     Rather, the array is made up of metadata blocks that point to objects.
+
+ (3) Objects require index keys to locate them within the array.
+
+ (4) Index keys must be unique.  Inserting an object with the same key as one
+     already in the array will replace the old object.
+
+ (5) Index keys can be of any length and can be of different lengths.
+
+ (6) Index keys should encode the length early on, before any variation due to
+     length is seen.
+
+ (7) Index keys can include a hash to scatter objects throughout the array.
+
+ (8) The array can iterated over.  The objects will not necessarily come out in
+     key order.
+
+ (9) The array can be iterated over whilst it is being modified, provided the
+     RCU readlock is being held by the iterator.  Note, however, under these
+     circumstances, some objects may be seen more than once.  If this is a
+     problem, the iterator should lock against modification.  Objects will not
+     be missed, however, unless deleted.
+
+(10) Objects in the array can be looked up by means of their index key.
+
+(11) Objects can be looked up whilst the array is being modified, provided the
+     RCU readlock is being held by the thread doing the look up.
+
+The implementation uses a tree of 16-pointer nodes internally that are indexed
+on each level by nibbles from the index key in the same manner as in a radix
+tree.  To improve memory efficiency, shortcuts can be emplaced to skip over
+what would otherwise be a series of single-occupancy nodes.  Further, nodes
+pack leaf object pointers into spare space in the node rather than making an
+extra branch until as such time an object needs to be added to a full node.
+
+
+==============
+THE PUBLIC API
+==============
+
+The public API can be found in <linux/assoc_array.h>.  The associative array is
+rooted on the following structure:
+
+	struct assoc_array {
+		...
+	};
+
+The code is selected by enabling CONFIG_ASSOCIATIVE_ARRAY.
+
+
+EDIT SCRIPT
+-----------
+
+The insertion and deletion functions produce an 'edit script' that can later be
+applied to effect the changes without risking ENOMEM.  This retains the
+preallocated metadata blocks that will be installed in the internal tree and
+keeps track of the metadata blocks that will be removed from the tree when the
+script is applied.
+
+This is also used to keep track of dead blocks and dead objects after the
+script has been applied so that they can be freed later.  The freeing is done
+after an RCU grace period has passed - thus allowing access functions to
+proceed under the RCU read lock.
+
+The script appears as outside of the API as a pointer of the type:
+
+	struct assoc_array_edit;
+
+There are two functions for dealing with the script:
+
+ (1) Apply an edit script.
+
+	void assoc_array_apply_edit(struct assoc_array_edit *edit);
+
+     This will perform the edit functions, interpolating various write barriers
+     to permit accesses under the RCU read lock to continue.  The edit script
+     will then be passed to call_rcu() to free it and any dead stuff it points
+     to.
+
+ (2) Cancel an edit script.
+
+	void assoc_array_cancel_edit(struct assoc_array_edit *edit);
+
+     This frees the edit script and all preallocated memory immediately.  If
+     this was for insertion, the new object is _not_ released by this function,
+     but must rather be released by the caller.
+
+These functions are guaranteed not to fail.
+
+
+OPERATIONS TABLE
+----------------
+
+Various functions take a table of operations:
+
+	struct assoc_array_ops {
+		...
+	};
+
+This points to a number of methods, all of which need to be provided:
+
+ (1) Get a chunk of index key from caller data:
+
+	unsigned long (*get_key_chunk)(const void *index_key, int level);
+
+     This should return a chunk of caller-supplied index key starting at the
+     *bit* position given by the level argument.  The level argument will be a
+     multiple of ASSOC_ARRAY_KEY_CHUNK_SIZE and the function should return
+     ASSOC_ARRAY_KEY_CHUNK_SIZE bits.  No error is possible.
+
+
+ (2) Get a chunk of an object's index key.
+
+	unsigned long (*get_object_key_chunk)(const void *object, int level);
+
+     As the previous function, but gets its data from an object in the array
+     rather than from a caller-supplied index key.
+
+
+ (3) See if this is the object we're looking for.
+
+	bool (*compare_object)(const void *object, const void *index_key);
+
+     Compare the object against an index key and return true if it matches and
+     false if it doesn't.
+
+
+ (4) Diff the index keys of two objects.
+
+	int (*diff_objects)(const void *a, const void *b);
+
+     Return the bit position at which the index keys of two objects differ or
+     -1 if they are the same.
+
+
+ (5) Free an object.
+
+	void (*free_object)(void *object);
+
+     Free the specified object.  Note that this may be called an RCU grace
+     period after assoc_array_apply_edit() was called, so synchronize_rcu() may
+     be necessary on module unloading.
+
+
+MANIPULATION FUNCTIONS
+----------------------
+
+There are a number of functions for manipulating an associative array:
+
+ (1) Initialise an associative array.
+
+	void assoc_array_init(struct assoc_array *array);
+
+     This initialises the base structure for an associative array.  It can't
+     fail.
+
+
+ (2) Insert/replace an object in an associative array.
+
+	struct assoc_array_edit *
+	assoc_array_insert(struct assoc_array *array,
+			   const struct assoc_array_ops *ops,
+			   const void *index_key,
+			   void *object);
+
+     This inserts the given object into the array.  Note that the least
+     significant bit of the pointer must be zero as it's used to type-mark
+     pointers internally.
+
+     If an object already exists for that key then it will be replaced with the
+     new object and the old one will be freed automatically.
+
+     The index_key argument should hold index key information and is
+     passed to the methods in the ops table when they are called.
+
+     This function makes no alteration to the array itself, but rather returns
+     an edit script that must be applied.  -ENOMEM is returned in the case of
+     an out-of-memory error.
+
+     The caller should lock exclusively against other modifiers of the array.
+
+
+ (3) Delete an object from an associative array.
+
+	struct assoc_array_edit *
+	assoc_array_delete(struct assoc_array *array,
+			   const struct assoc_array_ops *ops,
+			   const void *index_key);
+
+     This deletes an object that matches the specified data from the array.
+
+     The index_key argument should hold index key information and is
+     passed to the methods in the ops table when they are called.
+
+     This function makes no alteration to the array itself, but rather returns
+     an edit script that must be applied.  -ENOMEM is returned in the case of
+     an out-of-memory error.  NULL will be returned if the specified object is
+     not found within the array.
+
+     The caller should lock exclusively against other modifiers of the array.
+
+
+ (4) Delete all objects from an associative array.
+
+	struct assoc_array_edit *
+	assoc_array_clear(struct assoc_array *array,
+			  const struct assoc_array_ops *ops);
+
+     This deletes all the objects from an associative array and leaves it
+     completely empty.
+
+     This function makes no alteration to the array itself, but rather returns
+     an edit script that must be applied.  -ENOMEM is returned in the case of
+     an out-of-memory error.
+
+     The caller should lock exclusively against other modifiers of the array.
+
+
+ (5) Destroy an associative array, deleting all objects.
+
+	void assoc_array_destroy(struct assoc_array *array,
+				 const struct assoc_array_ops *ops);
+
+     This destroys the contents of the associative array and leaves it
+     completely empty.  It is not permitted for another thread to be traversing
+     the array under the RCU read lock at the same time as this function is
+     destroying it as no RCU deferral is performed on memory release -
+     something that would require memory to be allocated.
+
+     The caller should lock exclusively against other modifiers and accessors
+     of the array.
+
+
+ (6) Garbage collect an associative array.
+
+	int assoc_array_gc(struct assoc_array *array,
+			   const struct assoc_array_ops *ops,
+			   bool (*iterator)(void *object, void *iterator_data),
+			   void *iterator_data);
+
+     This iterates over the objects in an associative array and passes each one
+     to iterator().  If iterator() returns true, the object is kept.  If it
+     returns false, the object will be freed.  If the iterator() function
+     returns true, it must perform any appropriate refcount incrementing on the
+     object before returning.
+
+     The internal tree will be packed down if possible as part of the iteration
+     to reduce the number of nodes in it.
+
+     The iterator_data is passed directly to iterator() and is otherwise
+     ignored by the function.
+
+     The function will return 0 if successful and -ENOMEM if there wasn't
+     enough memory.
+
+     It is possible for other threads to iterate over or search the array under
+     the RCU read lock whilst this function is in progress.  The caller should
+     lock exclusively against other modifiers of the array.
+
+
+ACCESS FUNCTIONS
+----------------
+
+There are two functions for accessing an associative array:
+
+ (1) Iterate over all the objects in an associative array.
+
+	int assoc_array_iterate(const struct assoc_array *array,
+				int (*iterator)(const void *object,
+						void *iterator_data),
+				void *iterator_data);
+
+     This passes each object in the array to the iterator callback function.
+     iterator_data is private data for that function.
+
+     This may be used on an array at the same time as the array is being
+     modified, provided the RCU read lock is held.  Under such circumstances,
+     it is possible for the iteration function to see some objects twice.  If
+     this is a problem, then modification should be locked against.  The
+     iteration algorithm should not, however, miss any objects.
+
+     The function will return 0 if no objects were in the array or else it will
+     return the result of the last iterator function called.  Iteration stops
+     immediately if any call to the iteration function results in a non-zero
+     return.
+
+
+ (2) Find an object in an associative array.
+
+	void *assoc_array_find(const struct assoc_array *array,
+			       const struct assoc_array_ops *ops,
+			       const void *index_key);
+
+     This walks through the array's internal tree directly to the object
+     specified by the index key..
+
+     This may be used on an array at the same time as the array is being
+     modified, provided the RCU read lock is held.
+
+     The function will return the object if found (and set *_type to the object
+     type) or will return NULL if the object was not found.
+
+
+INDEX KEY FORM
+--------------
+
+The index key can be of any form, but since the algorithms aren't told how long
+the key is, it is strongly recommended that the index key includes its length
+very early on before any variation due to the length would have an effect on
+comparisons.
+
+This will cause leaves with different length keys to scatter away from each
+other - and those with the same length keys to cluster together.
+
+It is also recommended that the index key begin with a hash of the rest of the
+key to maximise scattering throughout keyspace.
+
+The better the scattering, the wider and lower the internal tree will be.
+
+Poor scattering isn't too much of a problem as there are shortcuts and nodes
+can contain mixtures of leaves and metadata pointers.
+
+The index key is read in chunks of machine word.  Each chunk is subdivided into
+one nibble (4 bits) per level, so on a 32-bit CPU this is good for 8 levels and
+on a 64-bit CPU, 16 levels.  Unless the scattering is really poor, it is
+unlikely that more than one word of any particular index key will have to be
+used.
+
+
+=================
+INTERNAL WORKINGS
+=================
+
+The associative array data structure has an internal tree.  This tree is
+constructed of two types of metadata blocks: nodes and shortcuts.
+
+A node is an array of slots.  Each slot can contain one of four things:
+
+ (*) A NULL pointer, indicating that the slot is empty.
+
+ (*) A pointer to an object (a leaf).
+
+ (*) A pointer to a node at the next level.
+
+ (*) A pointer to a shortcut.
+
+
+BASIC INTERNAL TREE LAYOUT
+--------------------------
+
+Ignoring shortcuts for the moment, the nodes form a multilevel tree.  The index
+key space is strictly subdivided by the nodes in the tree and nodes occur on
+fixed levels.  For example:
+
+ Level:	0		1		2		3
+	===============	===============	===============	===============
+							NODE D
+			NODE B		NODE C	+------>+---+
+		+------>+---+	+------>+---+	|	| 0 |
+	NODE A	|	| 0 |	|	| 0 |	|	+---+
+	+---+	|	+---+	|	+---+	|	:   :
+	| 0 |	|	:   :	|	:   :	|	+---+
+	+---+	|	+---+	|	+---+	|	| f |
+	| 1 |---+	| 3 |---+	| 7 |---+	+---+
+	+---+		+---+		+---+
+	:   :		:   :		| 8 |---+
+	+---+		+---+		+---+	|	NODE E
+	| e |---+	| f |		:   :   +------>+---+
+	+---+	|	+---+		+---+		| 0 |
+	| f |	|			| f |		+---+
+	+---+	|			+---+		:   :
+		|	NODE F				+---+
+		+------>+---+				| f |
+			| 0 |		NODE G		+---+
+			+---+	+------>+---+
+			:   :	|	| 0 |
+			+---+	|	+---+
+			| 6 |---+	:   :
+			+---+		+---+
+			:   :		| f |
+			+---+		+---+
+			| f |
+			+---+
+
+In the above example, there are 7 nodes (A-G), each with 16 slots (0-f).
+Assuming no other meta data nodes in the tree, the key space is divided thusly:
+
+	KEY PREFIX	NODE
+	==========	====
+	137*		D
+	138*		E
+	13[0-69-f]*	C
+	1[0-24-f]*	B
+	e6*		G
+	e[0-57-f]*	F
+	[02-df]*	A
+
+So, for instance, keys with the following example index keys will be found in
+the appropriate nodes:
+
+	INDEX KEY	PREFIX	NODE
+	===============	=======	====
+	13694892892489	13	C
+	13795289025897	137	D
+	13889dde88793	138	E
+	138bbb89003093	138	E
+	1394879524789	12	C
+	1458952489	1	B
+	9431809de993ba	-	A
+	b4542910809cd	-	A
+	e5284310def98	e	F
+	e68428974237	e6	G
+	e7fffcbd443	e	F
+	f3842239082	-	A
+
+To save memory, if a node can hold all the leaves in its portion of keyspace,
+then the node will have all those leaves in it and will not have any metadata
+pointers - even if some of those leaves would like to be in the same slot.
+
+A node can contain a heterogeneous mix of leaves and metadata pointers.
+Metadata pointers must be in the slots that match their subdivisions of key
+space.  The leaves can be in any slot not occupied by a metadata pointer.  It
+is guaranteed that none of the leaves in a node will match a slot occupied by a
+metadata pointer.  If the metadata pointer is there, any leaf whose key matches
+the metadata key prefix must be in the subtree that the metadata pointer points
+to.
+
+In the above example list of index keys, node A will contain:
+
+	SLOT	CONTENT		INDEX KEY (PREFIX)
+	====	===============	==================
+	1	PTR TO NODE B	1*
+	any	LEAF		9431809de993ba
+	any	LEAF		b4542910809cd
+	e	PTR TO NODE F	e*
+	any	LEAF		f3842239082
+
+and node B:
+
+	3	PTR TO NODE C	13*
+	any	LEAF		1458952489
+
+
+SHORTCUTS
+---------
+
+Shortcuts are metadata records that jump over a piece of keyspace.  A shortcut
+is a replacement for a series of single-occupancy nodes ascending through the
+levels.  Shortcuts exist to save memory and to speed up traversal.
+
+It is possible for the root of the tree to be a shortcut - say, for example,
+the tree contains at least 17 nodes all with key prefix '1111'.  The insertion
+algorithm will insert a shortcut to skip over the '1111' keyspace in a single
+bound and get to the fourth level where these actually become different.
+
+
+SPLITTING AND COLLAPSING NODES
+------------------------------
+
+Each node has a maximum capacity of 16 leaves and metadata pointers.  If the
+insertion algorithm finds that it is trying to insert a 17th object into a
+node, that node will be split such that at least two leaves that have a common
+key segment at that level end up in a separate node rooted on that slot for
+that common key segment.
+
+If the leaves in a full node and the leaf that is being inserted are
+sufficiently similar, then a shortcut will be inserted into the tree.
+
+When the number of objects in the subtree rooted at a node falls to 16 or
+fewer, then the subtree will be collapsed down to a single node - and this will
+ripple towards the root if possible.
+
+
+NON-RECURSIVE ITERATION
+-----------------------
+
+Each node and shortcut contains a back pointer to its parent and the number of
+slot in that parent that points to it.  None-recursive iteration uses these to
+proceed rootwards through the tree, going to the parent node, slot N + 1 to
+make sure progress is made without the need for a stack.
+
+The backpointers, however, make simultaneous alteration and iteration tricky.
+
+
+SIMULTANEOUS ALTERATION AND ITERATION
+-------------------------------------
+
+There are a number of cases to consider:
+
+ (1) Simple insert/replace.  This involves simply replacing a NULL or old
+     matching leaf pointer with the pointer to the new leaf after a barrier.
+     The metadata blocks don't change otherwise.  An old leaf won't be freed
+     until after the RCU grace period.
+
+ (2) Simple delete.  This involves just clearing an old matching leaf.  The
+     metadata blocks don't change otherwise.  The old leaf won't be freed until
+     after the RCU grace period.
+
+ (3) Insertion replacing part of a subtree that we haven't yet entered.  This
+     may involve replacement of part of that subtree - but that won't affect
+     the iteration as we won't have reached the pointer to it yet and the
+     ancestry blocks are not replaced (the layout of those does not change).
+
+ (4) Insertion replacing nodes that we're actively processing.  This isn't a
+     problem as we've passed the anchoring pointer and won't switch onto the
+     new layout until we follow the back pointers - at which point we've
+     already examined the leaves in the replaced node (we iterate over all the
+     leaves in a node before following any of its metadata pointers).
+
+     We might, however, re-see some leaves that have been split out into a new
+     branch that's in a slot further along than we were at.
+
+ (5) Insertion replacing nodes that we're processing a dependent branch of.
+     This won't affect us until we follow the back pointers.  Similar to (4).
+
+ (6) Deletion collapsing a branch under us.  This doesn't affect us because the
+     back pointers will get us back to the parent of the new node before we
+     could see the new node.  The entire collapsed subtree is thrown away
+     unchanged - and will still be rooted on the same slot, so we shouldn't
+     process it a second time as we'll go back to slot + 1.
+
+Note:
+
+ (*) Under some circumstances, we need to simultaneously change the parent
+     pointer and the parent slot pointer on a node (say, for example, we
+     inserted another node before it and moved it up a level).  We cannot do
+     this without locking against a read - so we have to replace that node too.
+
+     However, when we're changing a shortcut into a node this isn't a problem
+     as shortcuts only have one slot and so the parent slot number isn't used
+     when traversing backwards over one.  This means that it's okay to change
+     the slot number first - provided suitable barriers are used to make sure
+     the parent slot number is read after the back pointer.
+
+Obsolete blocks and leaves are freed up after an RCU grace period has passed,
+so as long as anyone doing walking or iteration holds the RCU read lock, the
+old superstructure should not go away on them.
diff --git a/include/linux/assoc_array.h b/include/linux/assoc_array.h
new file mode 100644
index 000000000000..9a193b84238a
--- /dev/null
+++ b/include/linux/assoc_array.h
@@ -0,0 +1,92 @@
+/* Generic associative array implementation.
+ *
+ * See Documentation/assoc_array.txt for information.
+ *
+ * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#ifndef _LINUX_ASSOC_ARRAY_H
+#define _LINUX_ASSOC_ARRAY_H
+
+#ifdef CONFIG_ASSOCIATIVE_ARRAY
+
+#include <linux/types.h>
+
+#define ASSOC_ARRAY_KEY_CHUNK_SIZE BITS_PER_LONG /* Key data retrieved in chunks of this size */
+
+/*
+ * Generic associative array.
+ */
+struct assoc_array {
+	struct assoc_array_ptr	*root;		/* The node at the root of the tree */
+	unsigned long		nr_leaves_on_tree;
+};
+
+/*
+ * Operations on objects and index keys for use by array manipulation routines.
+ */
+struct assoc_array_ops {
+	/* Method to get a chunk of an index key from caller-supplied data */
+	unsigned long (*get_key_chunk)(const void *index_key, int level);
+
+	/* Method to get a piece of an object's index key */
+	unsigned long (*get_object_key_chunk)(const void *object, int level);
+
+	/* Is this the object we're looking for? */
+	bool (*compare_object)(const void *object, const void *index_key);
+
+	/* How different are two objects, to a bit position in their keys? (or
+	 * -1 if they're the same)
+	 */
+	int (*diff_objects)(const void *a, const void *b);
+
+	/* Method to free an object. */
+	void (*free_object)(void *object);
+};
+
+/*
+ * Access and manipulation functions.
+ */
+struct assoc_array_edit;
+
+static inline void assoc_array_init(struct assoc_array *array)
+{
+	array->root = NULL;
+	array->nr_leaves_on_tree = 0;
+}
+
+extern int assoc_array_iterate(const struct assoc_array *array,
+			       int (*iterator)(const void *object,
+					       void *iterator_data),
+			       void *iterator_data);
+extern void *assoc_array_find(const struct assoc_array *array,
+			      const struct assoc_array_ops *ops,
+			      const void *index_key);
+extern void assoc_array_destroy(struct assoc_array *array,
+				const struct assoc_array_ops *ops);
+extern struct assoc_array_edit *assoc_array_insert(struct assoc_array *array,
+						   const struct assoc_array_ops *ops,
+						   const void *index_key,
+						   void *object);
+extern void assoc_array_insert_set_object(struct assoc_array_edit *edit,
+					  void *object);
+extern struct assoc_array_edit *assoc_array_delete(struct assoc_array *array,
+						   const struct assoc_array_ops *ops,
+						   const void *index_key);
+extern struct assoc_array_edit *assoc_array_clear(struct assoc_array *array,
+						  const struct assoc_array_ops *ops);
+extern void assoc_array_apply_edit(struct assoc_array_edit *edit);
+extern void assoc_array_cancel_edit(struct assoc_array_edit *edit);
+extern int assoc_array_gc(struct assoc_array *array,
+			  const struct assoc_array_ops *ops,
+			  bool (*iterator)(void *object, void *iterator_data),
+			  void *iterator_data);
+
+#endif /* CONFIG_ASSOCIATIVE_ARRAY */
+#endif /* _LINUX_ASSOC_ARRAY_H */
diff --git a/include/linux/assoc_array_priv.h b/include/linux/assoc_array_priv.h
new file mode 100644
index 000000000000..711275e6681c
--- /dev/null
+++ b/include/linux/assoc_array_priv.h
@@ -0,0 +1,182 @@
+/* Private definitions for the generic associative array implementation.
+ *
+ * See Documentation/assoc_array.txt for information.
+ *
+ * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#ifndef _LINUX_ASSOC_ARRAY_PRIV_H
+#define _LINUX_ASSOC_ARRAY_PRIV_H
+
+#ifdef CONFIG_ASSOCIATIVE_ARRAY
+
+#include <linux/assoc_array.h>
+
+#define ASSOC_ARRAY_FAN_OUT		16	/* Number of slots per node */
+#define ASSOC_ARRAY_FAN_MASK		(ASSOC_ARRAY_FAN_OUT - 1)
+#define ASSOC_ARRAY_LEVEL_STEP		(ilog2(ASSOC_ARRAY_FAN_OUT))
+#define ASSOC_ARRAY_LEVEL_STEP_MASK	(ASSOC_ARRAY_LEVEL_STEP - 1)
+#define ASSOC_ARRAY_KEY_CHUNK_MASK	(ASSOC_ARRAY_KEY_CHUNK_SIZE - 1)
+#define ASSOC_ARRAY_KEY_CHUNK_SHIFT	(ilog2(BITS_PER_LONG))
+
+/*
+ * Undefined type representing a pointer with type information in the bottom
+ * two bits.
+ */
+struct assoc_array_ptr;
+
+/*
+ * An N-way node in the tree.
+ *
+ * Each slot contains one of four things:
+ *
+ *	(1) Nothing (NULL).
+ *
+ *	(2) A leaf object (pointer types 0).
+ *
+ *	(3) A next-level node (pointer type 1, subtype 0).
+ *
+ *	(4) A shortcut (pointer type 1, subtype 1).
+ *
+ * The tree is optimised for search-by-ID, but permits reasonable iteration
+ * also.
+ *
+ * The tree is navigated by constructing an index key consisting of an array of
+ * segments, where each segment is ilog2(ASSOC_ARRAY_FAN_OUT) bits in size.
+ *
+ * The segments correspond to levels of the tree (the first segment is used at
+ * level 0, the second at level 1, etc.).
+ */
+struct assoc_array_node {
+	struct assoc_array_ptr	*back_pointer;
+	u8			parent_slot;
+	struct assoc_array_ptr	*slots[ASSOC_ARRAY_FAN_OUT];
+	unsigned long		nr_leaves_on_branch;
+};
+
+/*
+ * A shortcut through the index space out to where a collection of nodes/leaves
+ * with the same IDs live.
+ */
+struct assoc_array_shortcut {
+	struct assoc_array_ptr	*back_pointer;
+	int			parent_slot;
+	int			skip_to_level;
+	struct assoc_array_ptr	*next_node;
+	unsigned long		index_key[];
+};
+
+/*
+ * Preallocation cache.
+ */
+struct assoc_array_edit {
+	struct rcu_head			rcu;
+	struct assoc_array		*array;
+	const struct assoc_array_ops	*ops;
+	const struct assoc_array_ops	*ops_for_excised_subtree;
+	struct assoc_array_ptr		*leaf;
+	struct assoc_array_ptr		**leaf_p;
+	struct assoc_array_ptr		*dead_leaf;
+	struct assoc_array_ptr		*new_meta[3];
+	struct assoc_array_ptr		*excised_meta[1];
+	struct assoc_array_ptr		*excised_subtree;
+	struct assoc_array_ptr		**set_backpointers[ASSOC_ARRAY_FAN_OUT];
+	struct assoc_array_ptr		*set_backpointers_to;
+	struct assoc_array_node		*adjust_count_on;
+	long				adjust_count_by;
+	struct {
+		struct assoc_array_ptr	**ptr;
+		struct assoc_array_ptr	*to;
+	} set[2];
+	struct {
+		u8			*p;
+		u8			to;
+	} set_parent_slot[1];
+	u8				segment_cache[ASSOC_ARRAY_FAN_OUT + 1];
+};
+
+/*
+ * Internal tree member pointers are marked in the bottom one or two bits to
+ * indicate what type they are so that we don't have to look behind every
+ * pointer to see what it points to.
+ *
+ * We provide functions to test type annotations and to create and translate
+ * the annotated pointers.
+ */
+#define ASSOC_ARRAY_PTR_TYPE_MASK 0x1UL
+#define ASSOC_ARRAY_PTR_LEAF_TYPE 0x0UL	/* Points to leaf (or nowhere) */
+#define ASSOC_ARRAY_PTR_META_TYPE 0x1UL	/* Points to node or shortcut */
+#define ASSOC_ARRAY_PTR_SUBTYPE_MASK	0x2UL
+#define ASSOC_ARRAY_PTR_NODE_SUBTYPE	0x0UL
+#define ASSOC_ARRAY_PTR_SHORTCUT_SUBTYPE 0x2UL
+
+static inline bool assoc_array_ptr_is_meta(const struct assoc_array_ptr *x)
+{
+	return (unsigned long)x & ASSOC_ARRAY_PTR_TYPE_MASK;
+}
+static inline bool assoc_array_ptr_is_leaf(const struct assoc_array_ptr *x)
+{
+	return !assoc_array_ptr_is_meta(x);
+}
+static inline bool assoc_array_ptr_is_shortcut(const struct assoc_array_ptr *x)
+{
+	return (unsigned long)x & ASSOC_ARRAY_PTR_SUBTYPE_MASK;
+}
+static inline bool assoc_array_ptr_is_node(const struct assoc_array_ptr *x)
+{
+	return !assoc_array_ptr_is_shortcut(x);
+}
+
+static inline void *assoc_array_ptr_to_leaf(const struct assoc_array_ptr *x)
+{
+	return (void *)((unsigned long)x & ~ASSOC_ARRAY_PTR_TYPE_MASK);
+}
+
+static inline
+unsigned long __assoc_array_ptr_to_meta(const struct assoc_array_ptr *x)
+{
+	return (unsigned long)x &
+		~(ASSOC_ARRAY_PTR_SUBTYPE_MASK | ASSOC_ARRAY_PTR_TYPE_MASK);
+}
+static inline
+struct assoc_array_node *assoc_array_ptr_to_node(const struct assoc_array_ptr *x)
+{
+	return (struct assoc_array_node *)__assoc_array_ptr_to_meta(x);
+}
+static inline
+struct assoc_array_shortcut *assoc_array_ptr_to_shortcut(const struct assoc_array_ptr *x)
+{
+	return (struct assoc_array_shortcut *)__assoc_array_ptr_to_meta(x);
+}
+
+static inline
+struct assoc_array_ptr *__assoc_array_x_to_ptr(const void *p, unsigned long t)
+{
+	return (struct assoc_array_ptr *)((unsigned long)p | t);
+}
+static inline
+struct assoc_array_ptr *assoc_array_leaf_to_ptr(const void *p)
+{
+	return __assoc_array_x_to_ptr(p, ASSOC_ARRAY_PTR_LEAF_TYPE);
+}
+static inline
+struct assoc_array_ptr *assoc_array_node_to_ptr(const struct assoc_array_node *p)
+{
+	return __assoc_array_x_to_ptr(
+		p, ASSOC_ARRAY_PTR_META_TYPE | ASSOC_ARRAY_PTR_NODE_SUBTYPE);
+}
+static inline
+struct assoc_array_ptr *assoc_array_shortcut_to_ptr(const struct assoc_array_shortcut *p)
+{
+	return __assoc_array_x_to_ptr(
+		p, ASSOC_ARRAY_PTR_META_TYPE | ASSOC_ARRAY_PTR_SHORTCUT_SUBTYPE);
+}
+
+#endif /* CONFIG_ASSOCIATIVE_ARRAY */
+#endif /* _LINUX_ASSOC_ARRAY_PRIV_H */
diff --git a/lib/Kconfig b/lib/Kconfig
index b3c8be0da17f..3cb879b1f282 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -322,6 +322,20 @@ config TEXTSEARCH_FSM
 config BTREE
 	boolean
 
+config ASSOCIATIVE_ARRAY
+	bool
+	help
+	  Generic associative array.  Can be searched and iterated over whilst
+	  it is being modified.  It is also reasonably quick to search and
+	  modify.  The algorithms are non-recursive, and the trees are highly
+	  capacious.
+
+	  See:
+
+		Documentation/assoc_array.txt
+
+	  for more information.
+
 config HAS_IOMEM
 	boolean
 	depends on !NO_IOMEM
diff --git a/lib/Makefile b/lib/Makefile
index f3bb2cb98adf..1e806477e472 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -51,6 +51,7 @@ CFLAGS_hweight.o = $(subst $(quote),,$(CONFIG_ARCH_HWEIGHT_CFLAGS))
 obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o
 
 obj-$(CONFIG_BTREE) += btree.o
+obj-$(CONFIG_ASSOCIATIVE_ARRAY) += assoc_array.o
 obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o
 obj-$(CONFIG_DEBUG_LIST) += list_debug.o
 obj-$(CONFIG_DEBUG_OBJECTS) += debugobjects.o
diff --git a/lib/assoc_array.c b/lib/assoc_array.c
new file mode 100644
index 000000000000..a0952818f938
--- /dev/null
+++ b/lib/assoc_array.c
@@ -0,0 +1,1745 @@
+/* Generic associative array implementation.
+ *
+ * See Documentation/assoc_array.txt for information.
+ *
+ * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+//#define DEBUG
+#include <linux/slab.h>
+#include <linux/assoc_array_priv.h>
+
+/*
+ * Iterate over an associative array.  The caller must hold the RCU read lock
+ * or better.
+ */
+static int assoc_array_subtree_iterate(const struct assoc_array_ptr *root,
+				       const struct assoc_array_ptr *stop,
+				       int (*iterator)(const void *leaf,
+						       void *iterator_data),
+				       void *iterator_data)
+{
+	const struct assoc_array_shortcut *shortcut;
+	const struct assoc_array_node *node;
+	const struct assoc_array_ptr *cursor, *ptr, *parent;
+	unsigned long has_meta;
+	int slot, ret;
+
+	cursor = root;
+
+begin_node:
+	if (assoc_array_ptr_is_shortcut(cursor)) {
+		/* Descend through a shortcut */
+		shortcut = assoc_array_ptr_to_shortcut(cursor);
+		smp_read_barrier_depends();
+		cursor = ACCESS_ONCE(shortcut->next_node);
+	}
+
+	node = assoc_array_ptr_to_node(cursor);
+	smp_read_barrier_depends();
+	slot = 0;
+
+	/* We perform two passes of each node.
+	 *
+	 * The first pass does all the leaves in this node.  This means we
+	 * don't miss any leaves if the node is split up by insertion whilst
+	 * we're iterating over the branches rooted here (we may, however, see
+	 * some leaves twice).
+	 */
+	has_meta = 0;
+	for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
+		ptr = ACCESS_ONCE(node->slots[slot]);
+		has_meta |= (unsigned long)ptr;
+		if (ptr && assoc_array_ptr_is_leaf(ptr)) {
+			/* We need a barrier between the read of the pointer
+			 * and dereferencing the pointer - but only if we are
+			 * actually going to dereference it.
+			 */
+			smp_read_barrier_depends();
+
+			/* Invoke the callback */
+			ret = iterator(assoc_array_ptr_to_leaf(ptr),
+				       iterator_data);
+			if (ret)
+				return ret;
+		}
+	}
+
+	/* The second pass attends to all the metadata pointers.  If we follow
+	 * one of these we may find that we don't come back here, but rather go
+	 * back to a replacement node with the leaves in a different layout.
+	 *
+	 * We are guaranteed to make progress, however, as the slot number for
+	 * a particular portion of the key space cannot change - and we
+	 * continue at the back pointer + 1.
+	 */
+	if (!(has_meta & ASSOC_ARRAY_PTR_META_TYPE))
+		goto finished_node;
+	slot = 0;
+
+continue_node:
+	node = assoc_array_ptr_to_node(cursor);
+	smp_read_barrier_depends();
+
+	for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
+		ptr = ACCESS_ONCE(node->slots[slot]);
+		if (assoc_array_ptr_is_meta(ptr)) {
+			cursor = ptr;
+			goto begin_node;
+		}
+	}
+
+finished_node:
+	/* Move up to the parent (may need to skip back over a shortcut) */
+	parent = ACCESS_ONCE(node->back_pointer);
+	slot = node->parent_slot;
+	if (parent == stop)
+		return 0;
+
+	if (assoc_array_ptr_is_shortcut(parent)) {
+		shortcut = assoc_array_ptr_to_shortcut(parent);
+		smp_read_barrier_depends();
+		cursor = parent;
+		parent = ACCESS_ONCE(shortcut->back_pointer);
+		slot = shortcut->parent_slot;
+		if (parent == stop)
+			return 0;
+	}
+
+	/* Ascend to next slot in parent node */
+	cursor = parent;
+	slot++;
+	goto continue_node;
+}
+
+/**
+ * assoc_array_iterate - Pass all objects in the array to a callback
+ * @array: The array to iterate over.
+ * @iterator: The callback function.
+ * @iterator_data: Private data for the callback function.
+ *
+ * Iterate over all the objects in an associative array.  Each one will be
+ * presented to the iterator function.
+ *
+ * If the array is being modified concurrently with the iteration then it is
+ * possible that some objects in the array will be passed to the iterator
+ * callback more than once - though every object should be passed at least
+ * once.  If this is undesirable then the caller must lock against modification
+ * for the duration of this function.
+ *
+ * The function will return 0 if no objects were in the array or else it will
+ * return the result of the last iterator function called.  Iteration stops
+ * immediately if any call to the iteration function results in a non-zero
+ * return.
+ *
+ * The caller should hold the RCU read lock or better if concurrent
+ * modification is possible.
+ */
+int assoc_array_iterate(const struct assoc_array *array,
+			int (*iterator)(const void *object,
+					void *iterator_data),
+			void *iterator_data)
+{
+	struct assoc_array_ptr *root = ACCESS_ONCE(array->root);
+
+	if (!root)
+		return 0;
+	return assoc_array_subtree_iterate(root, NULL, iterator, iterator_data);
+}
+
+enum assoc_array_walk_status {
+	assoc_array_walk_tree_empty,
+	assoc_array_walk_found_terminal_node,
+	assoc_array_walk_found_wrong_shortcut,
+} status;
+
+struct assoc_array_walk_result {
+	struct {
+		struct assoc_array_node	*node;	/* Node in which leaf might be found */
+		int		level;
+		int		slot;
+	} terminal_node;
+	struct {
+		struct assoc_array_shortcut *shortcut;
+		int		level;
+		int		sc_level;
+		unsigned long	sc_segments;
+		unsigned long	dissimilarity;
+	} wrong_shortcut;
+};
+
+/*
+ * Navigate through the internal tree looking for the closest node to the key.
+ */
+static enum assoc_array_walk_status
+assoc_array_walk(const struct assoc_array *array,
+		 const struct assoc_array_ops *ops,
+		 const void *index_key,
+		 struct assoc_array_walk_result *result)
+{
+	struct assoc_array_shortcut *shortcut;
+	struct assoc_array_node *node;
+	struct assoc_array_ptr *cursor, *ptr;
+	unsigned long sc_segments, dissimilarity;
+	unsigned long segments;
+	int level, sc_level, next_sc_level;
+	int slot;
+
+	pr_devel("-->%s()\n", __func__);
+
+	cursor = ACCESS_ONCE(array->root);
+	if (!cursor)
+		return assoc_array_walk_tree_empty;
+
+	level = 0;
+
+	/* Use segments from the key for the new leaf to navigate through the
+	 * internal tree, skipping through nodes and shortcuts that are on
+	 * route to the destination.  Eventually we'll come to a slot that is
+	 * either empty or contains a leaf at which point we've found a node in
+	 * which the leaf we're looking for might be found or into which it
+	 * should be inserted.
+	 */
+jumped:
+	segments = ops->get_key_chunk(index_key, level);
+	pr_devel("segments[%d]: %lx\n", level, segments);
+
+	if (assoc_array_ptr_is_shortcut(cursor))
+		goto follow_shortcut;
+
+consider_node:
+	node = assoc_array_ptr_to_node(cursor);
+	smp_read_barrier_depends();
+
+	slot = segments >> (level & ASSOC_ARRAY_KEY_CHUNK_MASK);
+	slot &= ASSOC_ARRAY_FAN_MASK;
+	ptr = ACCESS_ONCE(node->slots[slot]);
+
+	pr_devel("consider slot %x [ix=%d type=%lu]\n",
+		 slot, level, (unsigned long)ptr & 3);
+
+	if (!assoc_array_ptr_is_meta(ptr)) {
+		/* The node doesn't have a node/shortcut pointer in the slot
+		 * corresponding to the index key that we have to follow.
+		 */
+		result->terminal_node.node = node;
+		result->terminal_node.level = level;
+		result->terminal_node.slot = slot;
+		pr_devel("<--%s() = terminal_node\n", __func__);
+		return assoc_array_walk_found_terminal_node;
+	}
+
+	if (assoc_array_ptr_is_node(ptr)) {
+		/* There is a pointer to a node in the slot corresponding to
+		 * this index key segment, so we need to follow it.
+		 */
+		cursor = ptr;
+		level += ASSOC_ARRAY_LEVEL_STEP;
+		if ((level & ASSOC_ARRAY_KEY_CHUNK_MASK) != 0)
+			goto consider_node;
+		goto jumped;
+	}
+
+	/* There is a shortcut in the slot corresponding to the index key
+	 * segment.  We follow the shortcut if its partial index key matches
+	 * this leaf's.  Otherwise we need to split the shortcut.
+	 */
+	cursor = ptr;
+follow_shortcut:
+	shortcut = assoc_array_ptr_to_shortcut(cursor);
+	smp_read_barrier_depends();
+	pr_devel("shortcut to %d\n", shortcut->skip_to_level);
+	sc_level = level + ASSOC_ARRAY_LEVEL_STEP;
+	BUG_ON(sc_level > shortcut->skip_to_level);
+
+	do {
+		/* Check the leaf against the shortcut's index key a word at a
+		 * time, trimming the final word (the shortcut stores the index
+		 * key completely from the root to the shortcut's target).
+		 */
+		if ((sc_level & ASSOC_ARRAY_KEY_CHUNK_MASK) == 0)
+			segments = ops->get_key_chunk(index_key, sc_level);
+
+		sc_segments = shortcut->index_key[sc_level >> ASSOC_ARRAY_KEY_CHUNK_SHIFT];
+		dissimilarity = segments ^ sc_segments;
+
+		if (round_up(sc_level, ASSOC_ARRAY_KEY_CHUNK_SIZE) > shortcut->skip_to_level) {
+			/* Trim segments that are beyond the shortcut */
+			int shift = shortcut->skip_to_level & ASSOC_ARRAY_KEY_CHUNK_MASK;
+			dissimilarity &= ~(ULONG_MAX << shift);
+			next_sc_level = shortcut->skip_to_level;
+		} else {
+			next_sc_level = sc_level + ASSOC_ARRAY_KEY_CHUNK_SIZE;
+			next_sc_level = round_down(next_sc_level, ASSOC_ARRAY_KEY_CHUNK_SIZE);
+		}
+
+		if (dissimilarity != 0) {
+			/* This shortcut points elsewhere */
+			result->wrong_shortcut.shortcut = shortcut;
+			result->wrong_shortcut.level = level;
+			result->wrong_shortcut.sc_level = sc_level;
+			result->wrong_shortcut.sc_segments = sc_segments;
+			result->wrong_shortcut.dissimilarity = dissimilarity;
+			return assoc_array_walk_found_wrong_shortcut;
+		}
+
+		sc_level = next_sc_level;
+	} while (sc_level < shortcut->skip_to_level);
+
+	/* The shortcut matches the leaf's index to this point. */
+	cursor = ACCESS_ONCE(shortcut->next_node);
+	if (((level ^ sc_level) & ~ASSOC_ARRAY_KEY_CHUNK_MASK) != 0) {
+		level = sc_level;
+		goto jumped;
+	} else {
+		level = sc_level;
+		goto consider_node;
+	}
+}
+
+/**
+ * assoc_array_find - Find an object by index key
+ * @array: The associative array to search.
+ * @ops: The operations to use.
+ * @index_key: The key to the object.
+ *
+ * Find an object in an associative array by walking through the internal tree
+ * to the node that should contain the object and then searching the leaves
+ * there.  NULL is returned if the requested object was not found in the array.
+ *
+ * The caller must hold the RCU read lock or better.
+ */
+void *assoc_array_find(const struct assoc_array *array,
+		       const struct assoc_array_ops *ops,
+		       const void *index_key)
+{
+	struct assoc_array_walk_result result;
+	const struct assoc_array_node *node;
+	const struct assoc_array_ptr *ptr;
+	const void *leaf;
+	int slot;
+
+	if (assoc_array_walk(array, ops, index_key, &result) !=
+	    assoc_array_walk_found_terminal_node)
+		return NULL;
+
+	node = result.terminal_node.node;
+	smp_read_barrier_depends();
+
+	/* If the target key is available to us, it's has to be pointed to by
+	 * the terminal node.
+	 */
+	for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
+		ptr = ACCESS_ONCE(node->slots[slot]);
+		if (ptr && assoc_array_ptr_is_leaf(ptr)) {
+			/* We need a barrier between the read of the pointer
+			 * and dereferencing the pointer - but only if we are
+			 * actually going to dereference it.
+			 */
+			leaf = assoc_array_ptr_to_leaf(ptr);
+			smp_read_barrier_depends();
+			if (ops->compare_object(leaf, index_key))
+				return (void *)leaf;
+		}
+	}
+
+	return NULL;
+}
+
+/*
+ * Destructively iterate over an associative array.  The caller must prevent
+ * other simultaneous accesses.
+ */
+static void assoc_array_destroy_subtree(struct assoc_array_ptr *root,
+					const struct assoc_array_ops *ops)
+{
+	struct assoc_array_shortcut *shortcut;
+	struct assoc_array_node *node;
+	struct assoc_array_ptr *cursor, *parent = NULL;
+	int slot = -1;
+
+	pr_devel("-->%s()\n", __func__);
+
+	cursor = root;
+	if (!cursor) {
+		pr_devel("empty\n");
+		return;
+	}
+
+move_to_meta:
+	if (assoc_array_ptr_is_shortcut(cursor)) {
+		/* Descend through a shortcut */
+		pr_devel("[%d] shortcut\n", slot);
+		BUG_ON(!assoc_array_ptr_is_shortcut(cursor));
+		shortcut = assoc_array_ptr_to_shortcut(cursor);
+		BUG_ON(shortcut->back_pointer != parent);
+		BUG_ON(slot != -1 && shortcut->parent_slot != slot);
+		parent = cursor;
+		cursor = shortcut->next_node;
+		slot = -1;
+		BUG_ON(!assoc_array_ptr_is_node(cursor));
+	}
+
+	pr_devel("[%d] node\n", slot);
+	node = assoc_array_ptr_to_node(cursor);
+	BUG_ON(node->back_pointer != parent);
+	BUG_ON(slot != -1 && node->parent_slot != slot);
+	slot = 0;
+
+continue_node:
+	pr_devel("Node %p [back=%p]\n", node, node->back_pointer);
+	for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
+		struct assoc_array_ptr *ptr = node->slots[slot];
+		if (!ptr)
+			continue;
+		if (assoc_array_ptr_is_meta(ptr)) {
+			parent = cursor;
+			cursor = ptr;
+			goto move_to_meta;
+		}
+
+		if (ops) {
+			pr_devel("[%d] free leaf\n", slot);
+			ops->free_object(assoc_array_ptr_to_leaf(ptr));
+		}
+	}
+
+	parent = node->back_pointer;
+	slot = node->parent_slot;
+	pr_devel("free node\n");
+	kfree(node);
+	if (!parent)
+		return; /* Done */
+
+	/* Move back up to the parent (may need to free a shortcut on
+	 * the way up) */
+	if (assoc_array_ptr_is_shortcut(parent)) {
+		shortcut = assoc_array_ptr_to_shortcut(parent);
+		BUG_ON(shortcut->next_node != cursor);
+		cursor = parent;
+		parent = shortcut->back_pointer;
+		slot = shortcut->parent_slot;
+		pr_devel("free shortcut\n");
+		kfree(shortcut);
+		if (!parent)
+			return;
+
+		BUG_ON(!assoc_array_ptr_is_node(parent));
+	}
+
+	/* Ascend to next slot in parent node */
+	pr_devel("ascend to %p[%d]\n", parent, slot);
+	cursor = parent;
+	node = assoc_array_ptr_to_node(cursor);
+	slot++;
+	goto continue_node;
+}
+
+/**
+ * assoc_array_destroy - Destroy an associative array
+ * @array: The array to destroy.
+ * @ops: The operations to use.
+ *
+ * Discard all metadata and free all objects in an associative array.  The
+ * array will be empty and ready to use again upon completion.  This function
+ * cannot fail.
+ *
+ * The caller must prevent all other accesses whilst this takes place as no
+ * attempt is made to adjust pointers gracefully to permit RCU readlock-holding
+ * accesses to continue.  On the other hand, no memory allocation is required.
+ */
+void assoc_array_destroy(struct assoc_array *array,
+			 const struct assoc_array_ops *ops)
+{
+	assoc_array_destroy_subtree(array->root, ops);
+	array->root = NULL;
+}
+
+/*
+ * Handle insertion into an empty tree.
+ */
+static bool assoc_array_insert_in_empty_tree(struct assoc_array_edit *edit)
+{
+	struct assoc_array_node *new_n0;
+
+	pr_devel("-->%s()\n", __func__);
+
+	new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL);
+	if (!new_n0)
+		return false;
+
+	edit->new_meta[0] = assoc_array_node_to_ptr(new_n0);
+	edit->leaf_p = &new_n0->slots[0];
+	edit->adjust_count_on = new_n0;
+	edit->set[0].ptr = &edit->array->root;
+	edit->set[0].to = assoc_array_node_to_ptr(new_n0);
+
+	pr_devel("<--%s() = ok [no root]\n", __func__);
+	return true;
+}
+
+/*
+ * Handle insertion into a terminal node.
+ */
+static bool assoc_array_insert_into_terminal_node(struct assoc_array_edit *edit,
+						  const struct assoc_array_ops *ops,
+						  const void *index_key,
+						  struct assoc_array_walk_result *result)
+{
+	struct assoc_array_shortcut *shortcut, *new_s0;
+	struct assoc_array_node *node, *new_n0, *new_n1, *side;
+	struct assoc_array_ptr *ptr;
+	unsigned long dissimilarity, base_seg, blank;
+	size_t keylen;
+	bool have_meta;
+	int level, diff;
+	int slot, next_slot, free_slot, i, j;
+
+	node	= result->terminal_node.node;
+	level	= result->terminal_node.level;
+	edit->segment_cache[ASSOC_ARRAY_FAN_OUT] = result->terminal_node.slot;
+
+	pr_devel("-->%s()\n", __func__);
+
+	/* We arrived at a node which doesn't have an onward node or shortcut
+	 * pointer that we have to follow.  This means that (a) the leaf we
+	 * want must go here (either by insertion or replacement) or (b) we
+	 * need to split this node and insert in one of the fragments.
+	 */
+	free_slot = -1;
+
+	/* Firstly, we have to check the leaves in this node to see if there's
+	 * a matching one we should replace in place.
+	 */
+	for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
+		ptr = node->slots[i];
+		if (!ptr) {
+			free_slot = i;
+			continue;
+		}
+		if (ops->compare_object(assoc_array_ptr_to_leaf(ptr), index_key)) {
+			pr_devel("replace in slot %d\n", i);
+			edit->leaf_p = &node->slots[i];
+			edit->dead_leaf = node->slots[i];
+			pr_devel("<--%s() = ok [replace]\n", __func__);
+			return true;
+		}
+	}
+
+	/* If there is a free slot in this node then we can just insert the
+	 * leaf here.
+	 */
+	if (free_slot >= 0) {
+		pr_devel("insert in free slot %d\n", free_slot);
+		edit->leaf_p = &node->slots[free_slot];
+		edit->adjust_count_on = node;
+		pr_devel("<--%s() = ok [insert]\n", __func__);
+		return true;
+	}
+
+	/* The node has no spare slots - so we're either going to have to split
+	 * it or insert another node before it.
+	 *
+	 * Whatever, we're going to need at least two new nodes - so allocate
+	 * those now.  We may also need a new shortcut, but we deal with that
+	 * when we need it.
+	 */
+	new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL);
+	if (!new_n0)
+		return false;
+	edit->new_meta[0] = assoc_array_node_to_ptr(new_n0);
+	new_n1 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL);
+	if (!new_n1)
+		return false;
+	edit->new_meta[1] = assoc_array_node_to_ptr(new_n1);
+
+	/* We need to find out how similar the leaves are. */
+	pr_devel("no spare slots\n");
+	have_meta = false;
+	for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
+		ptr = node->slots[i];
+		if (assoc_array_ptr_is_meta(ptr)) {
+			edit->segment_cache[i] = 0xff;
+			have_meta = true;
+			continue;
+		}
+		base_seg = ops->get_object_key_chunk(
+			assoc_array_ptr_to_leaf(ptr), level);
+		base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK;
+		edit->segment_cache[i] = base_seg & ASSOC_ARRAY_FAN_MASK;
+	}
+
+	if (have_meta) {
+		pr_devel("have meta\n");
+		goto split_node;
+	}
+
+	/* The node contains only leaves */
+	dissimilarity = 0;
+	base_seg = edit->segment_cache[0];
+	for (i = 1; i < ASSOC_ARRAY_FAN_OUT; i++)
+		dissimilarity |= edit->segment_cache[i] ^ base_seg;
+
+	pr_devel("only leaves; dissimilarity=%lx\n", dissimilarity);
+
+	if ((dissimilarity & ASSOC_ARRAY_FAN_MASK) == 0) {
+		/* The old leaves all cluster in the same slot.  We will need
+		 * to insert a shortcut if the new node wants to cluster with them.
+		 */
+		if ((edit->segment_cache[ASSOC_ARRAY_FAN_OUT] ^ base_seg) == 0)
+			goto all_leaves_cluster_together;
+
+		/* Otherwise we can just insert a new node ahead of the old
+		 * one.
+		 */
+		goto present_leaves_cluster_but_not_new_leaf;
+	}
+
+split_node:
+	pr_devel("split node\n");
+
+	/* We need to split the current node; we know that the node doesn't
+	 * simply contain a full set of leaves that cluster together (it
+	 * contains meta pointers and/or non-clustering leaves).
+	 *
+	 * We need to expel at least two leaves out of a set consisting of the
+	 * leaves in the node and the new leaf.
+	 *
+	 * We need a new node (n0) to replace the current one and a new node to
+	 * take the expelled nodes (n1).
+	 */
+	edit->set[0].to = assoc_array_node_to_ptr(new_n0);
+	new_n0->back_pointer = node->back_pointer;
+	new_n0->parent_slot = node->parent_slot;
+	new_n1->back_pointer = assoc_array_node_to_ptr(new_n0);
+	new_n1->parent_slot = -1; /* Need to calculate this */
+
+do_split_node:
+	pr_devel("do_split_node\n");
+
+	new_n0->nr_leaves_on_branch = node->nr_leaves_on_branch;
+	new_n1->nr_leaves_on_branch = 0;
+
+	/* Begin by finding two matching leaves.  There have to be at least two
+	 * that match - even if there are meta pointers - because any leaf that
+	 * would match a slot with a meta pointer in it must be somewhere
+	 * behind that meta pointer and cannot be here.  Further, given N
+	 * remaining leaf slots, we now have N+1 leaves to go in them.
+	 */
+	for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
+		slot = edit->segment_cache[i];
+		if (slot != 0xff)
+			for (j = i + 1; j < ASSOC_ARRAY_FAN_OUT + 1; j++)
+				if (edit->segment_cache[j] == slot)
+					goto found_slot_for_multiple_occupancy;
+	}
+found_slot_for_multiple_occupancy:
+	pr_devel("same slot: %x %x [%02x]\n", i, j, slot);
+	BUG_ON(i >= ASSOC_ARRAY_FAN_OUT);
+	BUG_ON(j >= ASSOC_ARRAY_FAN_OUT + 1);
+	BUG_ON(slot >= ASSOC_ARRAY_FAN_OUT);
+
+	new_n1->parent_slot = slot;
+
+	/* Metadata pointers cannot change slot */
+	for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++)
+		if (assoc_array_ptr_is_meta(node->slots[i]))
+			new_n0->slots[i] = node->slots[i];
+		else
+			new_n0->slots[i] = NULL;
+	BUG_ON(new_n0->slots[slot] != NULL);
+	new_n0->slots[slot] = assoc_array_node_to_ptr(new_n1);
+
+	/* Filter the leaf pointers between the new nodes */
+	free_slot = -1;
+	next_slot = 0;
+	for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
+		if (assoc_array_ptr_is_meta(node->slots[i]))
+			continue;
+		if (edit->segment_cache[i] == slot) {
+			new_n1->slots[next_slot++] = node->slots[i];
+			new_n1->nr_leaves_on_branch++;
+		} else {
+			do {
+				free_slot++;
+			} while (new_n0->slots[free_slot] != NULL);
+			new_n0->slots[free_slot] = node->slots[i];
+		}
+	}
+
+	pr_devel("filtered: f=%x n=%x\n", free_slot, next_slot);
+
+	if (edit->segment_cache[ASSOC_ARRAY_FAN_OUT] != slot) {
+		do {
+			free_slot++;
+		} while (new_n0->slots[free_slot] != NULL);
+		edit->leaf_p = &new_n0->slots[free_slot];
+		edit->adjust_count_on = new_n0;
+	} else {
+		edit->leaf_p = &new_n1->slots[next_slot++];
+		edit->adjust_count_on = new_n1;
+	}
+
+	BUG_ON(next_slot <= 1);
+
+	edit->set_backpointers_to = assoc_array_node_to_ptr(new_n0);
+	for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
+		if (edit->segment_cache[i] == 0xff) {
+			ptr = node->slots[i];
+			BUG_ON(assoc_array_ptr_is_leaf(ptr));
+			if (assoc_array_ptr_is_node(ptr)) {
+				side = assoc_array_ptr_to_node(ptr);
+				edit->set_backpointers[i] = &side->back_pointer;
+			} else {
+				shortcut = assoc_array_ptr_to_shortcut(ptr);
+				edit->set_backpointers[i] = &shortcut->back_pointer;
+			}
+		}
+	}
+
+	ptr = node->back_pointer;
+	if (!ptr)
+		edit->set[0].ptr = &edit->array->root;
+	else if (assoc_array_ptr_is_node(ptr))
+		edit->set[0].ptr = &assoc_array_ptr_to_node(ptr)->slots[node->parent_slot];
+	else
+		edit->set[0].ptr = &assoc_array_ptr_to_shortcut(ptr)->next_node;
+	edit->excised_meta[0] = assoc_array_node_to_ptr(node);
+	pr_devel("<--%s() = ok [split node]\n", __func__);
+	return true;
+
+present_leaves_cluster_but_not_new_leaf:
+	/* All the old leaves cluster in the same slot, but the new leaf wants
+	 * to go into a different slot, so we create a new node to hold the new
+	 * leaf and a pointer to a new node holding all the old leaves.
+	 */
+	pr_devel("present leaves cluster but not new leaf\n");
+
+	new_n0->back_pointer = node->back_pointer;
+	new_n0->parent_slot = node->parent_slot;
+	new_n0->nr_leaves_on_branch = node->nr_leaves_on_branch;
+	new_n1->back_pointer = assoc_array_node_to_ptr(new_n0);
+	new_n1->parent_slot = edit->segment_cache[0];
+	new_n1->nr_leaves_on_branch = node->nr_leaves_on_branch;
+	edit->adjust_count_on = new_n0;
+
+	for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++)
+		new_n1->slots[i] = node->slots[i];
+
+	new_n0->slots[edit->segment_cache[0]] = assoc_array_node_to_ptr(new_n0);
+	edit->leaf_p = &new_n0->slots[edit->segment_cache[ASSOC_ARRAY_FAN_OUT]];
+
+	edit->set[0].ptr = &assoc_array_ptr_to_node(node->back_pointer)->slots[node->parent_slot];
+	edit->set[0].to = assoc_array_node_to_ptr(new_n0);
+	edit->excised_meta[0] = assoc_array_node_to_ptr(node);
+	pr_devel("<--%s() = ok [insert node before]\n", __func__);
+	return true;
+
+all_leaves_cluster_together:
+	/* All the leaves, new and old, want to cluster together in this node
+	 * in the same slot, so we have to replace this node with a shortcut to
+	 * skip over the identical parts of the key and then place a pair of
+	 * nodes, one inside the other, at the end of the shortcut and
+	 * distribute the keys between them.
+	 *
+	 * Firstly we need to work out where the leaves start diverging as a
+	 * bit position into their keys so that we know how big the shortcut
+	 * needs to be.
+	 *
+	 * We only need to make a single pass of N of the N+1 leaves because if
+	 * any keys differ between themselves at bit X then at least one of
+	 * them must also differ with the base key at bit X or before.
+	 */
+	pr_devel("all leaves cluster together\n");
+	diff = INT_MAX;
+	for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
+		int x = ops->diff_objects(assoc_array_ptr_to_leaf(edit->leaf),
+					  assoc_array_ptr_to_leaf(node->slots[i]));
+		if (x < diff) {
+			BUG_ON(x < 0);
+			diff = x;
+		}
+	}
+	BUG_ON(diff == INT_MAX);
+	BUG_ON(diff < level + ASSOC_ARRAY_LEVEL_STEP);
+
+	keylen = round_up(diff, ASSOC_ARRAY_KEY_CHUNK_SIZE);
+	keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT;
+
+	new_s0 = kzalloc(sizeof(struct assoc_array_shortcut) +
+			 keylen * sizeof(unsigned long), GFP_KERNEL);
+	if (!new_s0)
+		return false;
+	edit->new_meta[2] = assoc_array_shortcut_to_ptr(new_s0);
+
+	edit->set[0].to = assoc_array_shortcut_to_ptr(new_s0);
+	new_s0->back_pointer = node->back_pointer;
+	new_s0->parent_slot = node->parent_slot;
+	new_s0->next_node = assoc_array_node_to_ptr(new_n0);
+	new_n0->back_pointer = assoc_array_shortcut_to_ptr(new_s0);
+	new_n0->parent_slot = 0;
+	new_n1->back_pointer = assoc_array_node_to_ptr(new_n0);
+	new_n1->parent_slot = -1; /* Need to calculate this */
+
+	new_s0->skip_to_level = level = diff & ~ASSOC_ARRAY_LEVEL_STEP_MASK;
+	pr_devel("skip_to_level = %d [diff %d]\n", level, diff);
+	BUG_ON(level <= 0);
+
+	for (i = 0; i < keylen; i++)
+		new_s0->index_key[i] =
+			ops->get_key_chunk(index_key, i * ASSOC_ARRAY_KEY_CHUNK_SIZE);
+
+	blank = ULONG_MAX << (level & ASSOC_ARRAY_KEY_CHUNK_MASK);
+	pr_devel("blank off [%zu] %d: %lx\n", keylen - 1, level, blank);
+	new_s0->index_key[keylen - 1] &= ~blank;
+
+	/* This now reduces to a node splitting exercise for which we'll need
+	 * to regenerate the disparity table.
+	 */
+	for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
+		ptr = node->slots[i];
+		base_seg = ops->get_object_key_chunk(assoc_array_ptr_to_leaf(ptr),
+						     level);
+		base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK;
+		edit->segment_cache[i] = base_seg & ASSOC_ARRAY_FAN_MASK;
+	}
+
+	base_seg = ops->get_key_chunk(index_key, level);
+	base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK;
+	edit->segment_cache[ASSOC_ARRAY_FAN_OUT] = base_seg & ASSOC_ARRAY_FAN_MASK;
+	goto do_split_node;
+}
+
+/*
+ * Handle insertion into the middle of a shortcut.
+ */
+static bool assoc_array_insert_mid_shortcut(struct assoc_array_edit *edit,
+					    const struct assoc_array_ops *ops,
+					    struct assoc_array_walk_result *result)
+{
+	struct assoc_array_shortcut *shortcut, *new_s0, *new_s1;
+	struct assoc_array_node *node, *new_n0, *side;
+	unsigned long sc_segments, dissimilarity, blank;
+	size_t keylen;
+	int level, sc_level, diff;
+	int sc_slot;
+
+	shortcut	= result->wrong_shortcut.shortcut;
+	level		= result->wrong_shortcut.level;
+	sc_level	= result->wrong_shortcut.sc_level;
+	sc_segments	= result->wrong_shortcut.sc_segments;
+	dissimilarity	= result->wrong_shortcut.dissimilarity;
+
+	pr_devel("-->%s(ix=%d dis=%lx scix=%d)\n",
+		 __func__, level, dissimilarity, sc_level);
+
+	/* We need to split a shortcut and insert a node between the two
+	 * pieces.  Zero-length pieces will be dispensed with entirely.
+	 *
+	 * First of all, we need to find out in which level the first
+	 * difference was.
+	 */
+	diff = __ffs(dissimilarity);
+	diff &= ~ASSOC_ARRAY_LEVEL_STEP_MASK;
+	diff += sc_level & ~ASSOC_ARRAY_KEY_CHUNK_MASK;
+	pr_devel("diff=%d\n", diff);
+
+	if (!shortcut->back_pointer) {
+		edit->set[0].ptr = &edit->array->root;
+	} else if (assoc_array_ptr_is_node(shortcut->back_pointer)) {
+		node = assoc_array_ptr_to_node(shortcut->back_pointer);
+		edit->set[0].ptr = &node->slots[shortcut->parent_slot];
+	} else {
+		BUG();
+	}
+
+	edit->excised_meta[0] = assoc_array_shortcut_to_ptr(shortcut);
+
+	/* Create a new node now since we're going to need it anyway */
+	new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL);
+	if (!new_n0)
+		return false;
+	edit->new_meta[0] = assoc_array_node_to_ptr(new_n0);
+	edit->adjust_count_on = new_n0;
+
+	/* Insert a new shortcut before the new node if this segment isn't of
+	 * zero length - otherwise we just connect the new node directly to the
+	 * parent.
+	 */
+	level += ASSOC_ARRAY_LEVEL_STEP;
+	if (diff > level) {
+		pr_devel("pre-shortcut %d...%d\n", level, diff);
+		keylen = round_up(diff, ASSOC_ARRAY_KEY_CHUNK_SIZE);
+		keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT;
+
+		new_s0 = kzalloc(sizeof(struct assoc_array_shortcut) +
+				 keylen * sizeof(unsigned long), GFP_KERNEL);
+		if (!new_s0)
+			return false;
+		edit->new_meta[1] = assoc_array_shortcut_to_ptr(new_s0);
+		edit->set[0].to = assoc_array_shortcut_to_ptr(new_s0);
+		new_s0->back_pointer = shortcut->back_pointer;
+		new_s0->parent_slot = shortcut->parent_slot;
+		new_s0->next_node = assoc_array_node_to_ptr(new_n0);
+		new_s0->skip_to_level = diff;
+
+		new_n0->back_pointer = assoc_array_shortcut_to_ptr(new_s0);
+		new_n0->parent_slot = 0;
+
+		memcpy(new_s0->index_key, shortcut->index_key,
+		       keylen * sizeof(unsigned long));
+
+		blank = ULONG_MAX << (diff & ASSOC_ARRAY_KEY_CHUNK_MASK);
+		pr_devel("blank off [%zu] %d: %lx\n", keylen - 1, diff, blank);
+		new_s0->index_key[keylen - 1] &= ~blank;
+	} else {
+		pr_devel("no pre-shortcut\n");
+		edit->set[0].to = assoc_array_node_to_ptr(new_n0);
+		new_n0->back_pointer = shortcut->back_pointer;
+		new_n0->parent_slot = shortcut->parent_slot;
+	}
+
+	side = assoc_array_ptr_to_node(shortcut->next_node);
+	new_n0->nr_leaves_on_branch = side->nr_leaves_on_branch;
+
+	/* We need to know which slot in the new node is going to take a
+	 * metadata pointer.
+	 */
+	sc_slot = sc_segments >> (diff & ASSOC_ARRAY_KEY_CHUNK_MASK);
+	sc_slot &= ASSOC_ARRAY_FAN_MASK;
+
+	pr_devel("new slot %lx >> %d -> %d\n",
+		 sc_segments, diff & ASSOC_ARRAY_KEY_CHUNK_MASK, sc_slot);
+
+	/* Determine whether we need to follow the new node with a replacement
+	 * for the current shortcut.  We could in theory reuse the current
+	 * shortcut if its parent slot number doesn't change - but that's a
+	 * 1-in-16 chance so not worth expending the code upon.
+	 */
+	level = diff + ASSOC_ARRAY_LEVEL_STEP;
+	if (level < shortcut->skip_to_level) {
+		pr_devel("post-shortcut %d...%d\n", level, shortcut->skip_to_level);
+		keylen = round_up(shortcut->skip_to_level, ASSOC_ARRAY_KEY_CHUNK_SIZE);
+		keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT;
+
+		new_s1 = kzalloc(sizeof(struct assoc_array_shortcut) +
+				 keylen * sizeof(unsigned long), GFP_KERNEL);
+		if (!new_s1)
+			return false;
+		edit->new_meta[2] = assoc_array_shortcut_to_ptr(new_s1);
+
+		new_s1->back_pointer = assoc_array_node_to_ptr(new_n0);
+		new_s1->parent_slot = sc_slot;
+		new_s1->next_node = shortcut->next_node;
+		new_s1->skip_to_level = shortcut->skip_to_level;
+
+		new_n0->slots[sc_slot] = assoc_array_shortcut_to_ptr(new_s1);
+
+		memcpy(new_s1->index_key, shortcut->index_key,
+		       keylen * sizeof(unsigned long));
+
+		edit->set[1].ptr = &side->back_pointer;
+		edit->set[1].to = assoc_array_shortcut_to_ptr(new_s1);
+	} else {
+		pr_devel("no post-shortcut\n");
+
+		/* We don't have to replace the pointed-to node as long as we
+		 * use memory barriers to make sure the parent slot number is
+		 * changed before the back pointer (the parent slot number is
+		 * irrelevant to the old parent shortcut).
+		 */
+		new_n0->slots[sc_slot] = shortcut->next_node;
+		edit->set_parent_slot[0].p = &side->parent_slot;
+		edit->set_parent_slot[0].to = sc_slot;
+		edit->set[1].ptr = &side->back_pointer;
+		edit->set[1].to = assoc_array_node_to_ptr(new_n0);
+	}
+
+	/* Install the new leaf in a spare slot in the new node. */
+	if (sc_slot == 0)
+		edit->leaf_p = &new_n0->slots[1];
+	else
+		edit->leaf_p = &new_n0->slots[0];
+
+	pr_devel("<--%s() = ok [split shortcut]\n", __func__);
+	return edit;
+}
+
+/**
+ * assoc_array_insert - Script insertion of an object into an associative array
+ * @array: The array to insert into.
+ * @ops: The operations to use.
+ * @index_key: The key to insert at.
+ * @object: The object to insert.
+ *
+ * Precalculate and preallocate a script for the insertion or replacement of an
+ * object in an associative array.  This results in an edit script that can
+ * either be applied or cancelled.
+ *
+ * The function returns a pointer to an edit script or -ENOMEM.
+ *
+ * The caller should lock against other modifications and must continue to hold
+ * the lock until assoc_array_apply_edit() has been called.
+ *
+ * Accesses to the tree may take place concurrently with this function,
+ * provided they hold the RCU read lock.
+ */
+struct assoc_array_edit *assoc_array_insert(struct assoc_array *array,
+					    const struct assoc_array_ops *ops,
+					    const void *index_key,
+					    void *object)
+{
+	struct assoc_array_walk_result result;
+	struct assoc_array_edit *edit;
+
+	pr_devel("-->%s()\n", __func__);
+
+	/* The leaf pointer we're given must not have the bottom bit set as we
+	 * use those for type-marking the pointer.  NULL pointers are also not
+	 * allowed as they indicate an empty slot but we have to allow them
+	 * here as they can be updated later.
+	 */
+	BUG_ON(assoc_array_ptr_is_meta(object));
+
+	edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL);
+	if (!edit)
+		return ERR_PTR(-ENOMEM);
+	edit->array = array;
+	edit->ops = ops;
+	edit->leaf = assoc_array_leaf_to_ptr(object);
+	edit->adjust_count_by = 1;
+
+	switch (assoc_array_walk(array, ops, index_key, &result)) {
+	case assoc_array_walk_tree_empty:
+		/* Allocate a root node if there isn't one yet */
+		if (!assoc_array_insert_in_empty_tree(edit))
+			goto enomem;
+		return edit;
+
+	case assoc_array_walk_found_terminal_node:
+		/* We found a node that doesn't have a node/shortcut pointer in
+		 * the slot corresponding to the index key that we have to
+		 * follow.
+		 */
+		if (!assoc_array_insert_into_terminal_node(edit, ops, index_key,
+							   &result))
+			goto enomem;
+		return edit;
+
+	case assoc_array_walk_found_wrong_shortcut:
+		/* We found a shortcut that didn't match our key in a slot we
+		 * needed to follow.
+		 */
+		if (!assoc_array_insert_mid_shortcut(edit, ops, &result))
+			goto enomem;
+		return edit;
+	}
+
+enomem:
+	/* Clean up after an out of memory error */
+	pr_devel("enomem\n");
+	assoc_array_cancel_edit(edit);
+	return ERR_PTR(-ENOMEM);
+}
+
+/**
+ * assoc_array_insert_set_object - Set the new object pointer in an edit script
+ * @edit: The edit script to modify.
+ * @object: The object pointer to set.
+ *
+ * Change the object to be inserted in an edit script.  The object pointed to
+ * by the old object is not freed.  This must be done prior to applying the
+ * script.
+ */
+void assoc_array_insert_set_object(struct assoc_array_edit *edit, void *object)
+{
+	BUG_ON(!object);
+	edit->leaf = assoc_array_leaf_to_ptr(object);
+}
+
+struct assoc_array_delete_collapse_context {
+	struct assoc_array_node	*node;
+	const void		*skip_leaf;
+	int			slot;
+};
+
+/*
+ * Subtree collapse to node iterator.
+ */
+static int assoc_array_delete_collapse_iterator(const void *leaf,
+						void *iterator_data)
+{
+	struct assoc_array_delete_collapse_context *collapse = iterator_data;
+
+	if (leaf == collapse->skip_leaf)
+		return 0;
+
+	BUG_ON(collapse->slot >= ASSOC_ARRAY_FAN_OUT);
+
+	collapse->node->slots[collapse->slot++] = assoc_array_leaf_to_ptr(leaf);
+	return 0;
+}
+
+/**
+ * assoc_array_delete - Script deletion of an object from an associative array
+ * @array: The array to search.
+ * @ops: The operations to use.
+ * @index_key: The key to the object.
+ *
+ * Precalculate and preallocate a script for the deletion of an object from an
+ * associative array.  This results in an edit script that can either be
+ * applied or cancelled.
+ *
+ * The function returns a pointer to an edit script if the object was found,
+ * NULL if the object was not found or -ENOMEM.
+ *
+ * The caller should lock against other modifications and must continue to hold
+ * the lock until assoc_array_apply_edit() has been called.
+ *
+ * Accesses to the tree may take place concurrently with this function,
+ * provided they hold the RCU read lock.
+ */
+struct assoc_array_edit *assoc_array_delete(struct assoc_array *array,
+					    const struct assoc_array_ops *ops,
+					    const void *index_key)
+{
+	struct assoc_array_delete_collapse_context collapse;
+	struct assoc_array_walk_result result;
+	struct assoc_array_node *node, *new_n0;
+	struct assoc_array_edit *edit;
+	struct assoc_array_ptr *ptr;
+	bool has_meta;
+	int slot, i;
+
+	pr_devel("-->%s()\n", __func__);
+
+	edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL);
+	if (!edit)
+		return ERR_PTR(-ENOMEM);
+	edit->array = array;
+	edit->ops = ops;
+	edit->adjust_count_by = -1;
+
+	switch (assoc_array_walk(array, ops, index_key, &result)) {
+	case assoc_array_walk_found_terminal_node:
+		/* We found a node that should contain the leaf we've been
+		 * asked to remove - *if* it's in the tree.
+		 */
+		pr_devel("terminal_node\n");
+		node = result.terminal_node.node;
+
+		for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
+			ptr = node->slots[slot];
+			if (ptr &&
+			    assoc_array_ptr_is_leaf(ptr) &&
+			    ops->compare_object(assoc_array_ptr_to_leaf(ptr),
+						index_key))
+				goto found_leaf;
+		}
+	case assoc_array_walk_tree_empty:
+	case assoc_array_walk_found_wrong_shortcut:
+	default:
+		assoc_array_cancel_edit(edit);
+		pr_devel("not found\n");
+		return NULL;
+	}
+
+found_leaf:
+	BUG_ON(array->nr_leaves_on_tree <= 0);
+
+	/* In the simplest form of deletion we just clear the slot and release
+	 * the leaf after a suitable interval.
+	 */
+	edit->dead_leaf = node->slots[slot];
+	edit->set[0].ptr = &node->slots[slot];
+	edit->set[0].to = NULL;
+	edit->adjust_count_on = node;
+
+	/* If that concludes erasure of the last leaf, then delete the entire
+	 * internal array.
+	 */
+	if (array->nr_leaves_on_tree == 1) {
+		edit->set[1].ptr = &array->root;
+		edit->set[1].to = NULL;
+		edit->adjust_count_on = NULL;
+		edit->excised_subtree = array->root;
+		pr_devel("all gone\n");
+		return edit;
+	}
+
+	/* However, we'd also like to clear up some metadata blocks if we
+	 * possibly can.
+	 *
+	 * We go for a simple algorithm of: if this node has FAN_OUT or fewer
+	 * leaves in it, then attempt to collapse it - and attempt to
+	 * recursively collapse up the tree.
+	 *
+	 * We could also try and collapse in partially filled subtrees to take
+	 * up space in this node.
+	 */
+	if (node->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT + 1) {
+		struct assoc_array_node *parent, *grandparent;
+		struct assoc_array_ptr *ptr;
+
+		/* First of all, we need to know if this node has metadata so
+		 * that we don't try collapsing if all the leaves are already
+		 * here.
+		 */
+		has_meta = false;
+		for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
+			ptr = node->slots[i];
+			if (assoc_array_ptr_is_meta(ptr)) {
+				has_meta = true;
+				break;
+			}
+		}
+
+		pr_devel("leaves: %ld [m=%d]\n",
+			 node->nr_leaves_on_branch - 1, has_meta);
+
+		/* Look further up the tree to see if we can collapse this node
+		 * into a more proximal node too.
+		 */
+		parent = node;
+	collapse_up:
+		pr_devel("collapse subtree: %ld\n", parent->nr_leaves_on_branch);
+
+		ptr = parent->back_pointer;
+		if (!ptr)
+			goto do_collapse;
+		if (assoc_array_ptr_is_shortcut(ptr)) {
+			struct assoc_array_shortcut *s = assoc_array_ptr_to_shortcut(ptr);
+			ptr = s->back_pointer;
+			if (!ptr)
+				goto do_collapse;
+		}
+
+		grandparent = assoc_array_ptr_to_node(ptr);
+		if (grandparent->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT + 1) {
+			parent = grandparent;
+			goto collapse_up;
+		}
+
+	do_collapse:
+		/* There's no point collapsing if the original node has no meta
+		 * pointers to discard and if we didn't merge into one of that
+		 * node's ancestry.
+		 */
+		if (has_meta || parent != node) {
+			node = parent;
+
+			/* Create a new node to collapse into */
+			new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL);
+			if (!new_n0)
+				goto enomem;
+			edit->new_meta[0] = assoc_array_node_to_ptr(new_n0);
+
+			new_n0->back_pointer = node->back_pointer;
+			new_n0->parent_slot = node->parent_slot;
+			new_n0->nr_leaves_on_branch = node->nr_leaves_on_branch;
+			edit->adjust_count_on = new_n0;
+
+			collapse.node = new_n0;
+			collapse.skip_leaf = assoc_array_ptr_to_leaf(edit->dead_leaf);
+			collapse.slot = 0;
+			assoc_array_subtree_iterate(assoc_array_node_to_ptr(node),
+						    node->back_pointer,
+						    assoc_array_delete_collapse_iterator,
+						    &collapse);
+			pr_devel("collapsed %d,%lu\n", collapse.slot, new_n0->nr_leaves_on_branch);
+			BUG_ON(collapse.slot != new_n0->nr_leaves_on_branch - 1);
+
+			if (!node->back_pointer) {
+				edit->set[1].ptr = &array->root;
+			} else if (assoc_array_ptr_is_leaf(node->back_pointer)) {
+				BUG();
+			} else if (assoc_array_ptr_is_node(node->back_pointer)) {
+				struct assoc_array_node *p =
+					assoc_array_ptr_to_node(node->back_pointer);
+				edit->set[1].ptr = &p->slots[node->parent_slot];
+			} else if (assoc_array_ptr_is_shortcut(node->back_pointer)) {
+				struct assoc_array_shortcut *s =
+					assoc_array_ptr_to_shortcut(node->back_pointer);
+				edit->set[1].ptr = &s->next_node;
+			}
+			edit->set[1].to = assoc_array_node_to_ptr(new_n0);
+			edit->excised_subtree = assoc_array_node_to_ptr(node);
+		}
+	}
+
+	return edit;
+
+enomem:
+	/* Clean up after an out of memory error */
+	pr_devel("enomem\n");
+	assoc_array_cancel_edit(edit);
+	return ERR_PTR(-ENOMEM);
+}
+
+/**
+ * assoc_array_clear - Script deletion of all objects from an associative array
+ * @array: The array to clear.
+ * @ops: The operations to use.
+ *
+ * Precalculate and preallocate a script for the deletion of all the objects
+ * from an associative array.  This results in an edit script that can either
+ * be applied or cancelled.
+ *
+ * The function returns a pointer to an edit script if there are objects to be
+ * deleted, NULL if there are no objects in the array or -ENOMEM.
+ *
+ * The caller should lock against other modifications and must continue to hold
+ * the lock until assoc_array_apply_edit() has been called.
+ *
+ * Accesses to the tree may take place concurrently with this function,
+ * provided they hold the RCU read lock.
+ */
+struct assoc_array_edit *assoc_array_clear(struct assoc_array *array,
+					   const struct assoc_array_ops *ops)
+{
+	struct assoc_array_edit *edit;
+
+	pr_devel("-->%s()\n", __func__);
+
+	if (!array->root)
+		return NULL;
+
+	edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL);
+	if (!edit)
+		return ERR_PTR(-ENOMEM);
+	edit->array = array;
+	edit->ops = ops;
+	edit->set[1].ptr = &array->root;
+	edit->set[1].to = NULL;
+	edit->excised_subtree = array->root;
+	edit->ops_for_excised_subtree = ops;
+	pr_devel("all gone\n");
+	return edit;
+}
+
+/*
+ * Handle the deferred destruction after an applied edit.
+ */
+static void assoc_array_rcu_cleanup(struct rcu_head *head)
+{
+	struct assoc_array_edit *edit =
+		container_of(head, struct assoc_array_edit, rcu);
+	int i;
+
+	pr_devel("-->%s()\n", __func__);
+
+	if (edit->dead_leaf)
+		edit->ops->free_object(assoc_array_ptr_to_leaf(edit->dead_leaf));
+	for (i = 0; i < ARRAY_SIZE(edit->excised_meta); i++)
+		if (edit->excised_meta[i])
+			kfree(assoc_array_ptr_to_node(edit->excised_meta[i]));
+
+	if (edit->excised_subtree) {
+		BUG_ON(assoc_array_ptr_is_leaf(edit->excised_subtree));
+		if (assoc_array_ptr_is_node(edit->excised_subtree)) {
+			struct assoc_array_node *n =
+				assoc_array_ptr_to_node(edit->excised_subtree);
+			n->back_pointer = NULL;
+		} else {
+			struct assoc_array_shortcut *s =
+				assoc_array_ptr_to_shortcut(edit->excised_subtree);
+			s->back_pointer = NULL;
+		}
+		assoc_array_destroy_subtree(edit->excised_subtree,
+					    edit->ops_for_excised_subtree);
+	}
+
+	kfree(edit);
+}
+
+/**
+ * assoc_array_apply_edit - Apply an edit script to an associative array
+ * @edit: The script to apply.
+ *
+ * Apply an edit script to an associative array to effect an insertion,
+ * deletion or clearance.  As the edit script includes preallocated memory,
+ * this is guaranteed not to fail.
+ *
+ * The edit script, dead objects and dead metadata will be scheduled for
+ * destruction after an RCU grace period to permit those doing read-only
+ * accesses on the array to continue to do so under the RCU read lock whilst
+ * the edit is taking place.
+ */
+void assoc_array_apply_edit(struct assoc_array_edit *edit)
+{
+	struct assoc_array_shortcut *shortcut;
+	struct assoc_array_node *node;
+	struct assoc_array_ptr *ptr;
+	int i;
+
+	pr_devel("-->%s()\n", __func__);
+
+	smp_wmb();
+	if (edit->leaf_p)
+		*edit->leaf_p = edit->leaf;
+
+	smp_wmb();
+	for (i = 0; i < ARRAY_SIZE(edit->set_parent_slot); i++)
+		if (edit->set_parent_slot[i].p)
+			*edit->set_parent_slot[i].p = edit->set_parent_slot[i].to;
+
+	smp_wmb();
+	for (i = 0; i < ARRAY_SIZE(edit->set_backpointers); i++)
+		if (edit->set_backpointers[i])
+			*edit->set_backpointers[i] = edit->set_backpointers_to;
+
+	smp_wmb();
+	for (i = 0; i < ARRAY_SIZE(edit->set); i++)
+		if (edit->set[i].ptr)
+			*edit->set[i].ptr = edit->set[i].to;
+
+	if (edit->array->root == NULL) {
+		edit->array->nr_leaves_on_tree = 0;
+	} else if (edit->adjust_count_on) {
+		node = edit->adjust_count_on;
+		for (;;) {
+			node->nr_leaves_on_branch += edit->adjust_count_by;
+
+			ptr = node->back_pointer;
+			if (!ptr)
+				break;
+			if (assoc_array_ptr_is_shortcut(ptr)) {
+				shortcut = assoc_array_ptr_to_shortcut(ptr);
+				ptr = shortcut->back_pointer;
+				if (!ptr)
+					break;
+			}
+			BUG_ON(!assoc_array_ptr_is_node(ptr));
+			node = assoc_array_ptr_to_node(ptr);
+		}
+
+		edit->array->nr_leaves_on_tree += edit->adjust_count_by;
+	}
+
+	call_rcu(&edit->rcu, assoc_array_rcu_cleanup);
+}
+
+/**
+ * assoc_array_cancel_edit - Discard an edit script.
+ * @edit: The script to discard.
+ *
+ * Free an edit script and all the preallocated data it holds without making
+ * any changes to the associative array it was intended for.
+ *
+ * NOTE!  In the case of an insertion script, this does _not_ release the leaf
+ * that was to be inserted.  That is left to the caller.
+ */
+void assoc_array_cancel_edit(struct assoc_array_edit *edit)
+{
+	struct assoc_array_ptr *ptr;
+	int i;
+
+	pr_devel("-->%s()\n", __func__);
+
+	/* Clean up after an out of memory error */
+	for (i = 0; i < ARRAY_SIZE(edit->new_meta); i++) {
+		ptr = edit->new_meta[i];
+		if (ptr) {
+			if (assoc_array_ptr_is_node(ptr))
+				kfree(assoc_array_ptr_to_node(ptr));
+			else
+				kfree(assoc_array_ptr_to_shortcut(ptr));
+		}
+	}
+	kfree(edit);
+}
+
+/**
+ * assoc_array_gc - Garbage collect an associative array.
+ * @array: The array to clean.
+ * @ops: The operations to use.
+ * @iterator: A callback function to pass judgement on each object.
+ * @iterator_data: Private data for the callback function.
+ *
+ * Collect garbage from an associative array and pack down the internal tree to
+ * save memory.
+ *
+ * The iterator function is asked to pass judgement upon each object in the
+ * array.  If it returns false, the object is discard and if it returns true,
+ * the object is kept.  If it returns true, it must increment the object's
+ * usage count (or whatever it needs to do to retain it) before returning.
+ *
+ * This function returns 0 if successful or -ENOMEM if out of memory.  In the
+ * latter case, the array is not changed.
+ *
+ * The caller should lock against other modifications and must continue to hold
+ * the lock until assoc_array_apply_edit() has been called.
+ *
+ * Accesses to the tree may take place concurrently with this function,
+ * provided they hold the RCU read lock.
+ */
+int assoc_array_gc(struct assoc_array *array,
+		   const struct assoc_array_ops *ops,
+		   bool (*iterator)(void *object, void *iterator_data),
+		   void *iterator_data)
+{
+	struct assoc_array_shortcut *shortcut, *new_s;
+	struct assoc_array_node *node, *new_n;
+	struct assoc_array_edit *edit;
+	struct assoc_array_ptr *cursor, *ptr;
+	struct assoc_array_ptr *new_root, *new_parent, **new_ptr_pp;
+	unsigned long nr_leaves_on_tree;
+	int keylen, slot, nr_free, next_slot, i;
+
+	pr_devel("-->%s()\n", __func__);
+
+	if (!array->root)
+		return 0;
+
+	edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL);
+	if (!edit)
+		return -ENOMEM;
+	edit->array = array;
+	edit->ops = ops;
+	edit->ops_for_excised_subtree = ops;
+	edit->set[0].ptr = &array->root;
+	edit->excised_subtree = array->root;
+
+	new_root = new_parent = NULL;
+	new_ptr_pp = &new_root;
+	cursor = array->root;
+
+descend:
+	/* If this point is a shortcut, then we need to duplicate it and
+	 * advance the target cursor.
+	 */
+	if (assoc_array_ptr_is_shortcut(cursor)) {
+		shortcut = assoc_array_ptr_to_shortcut(cursor);
+		keylen = round_up(shortcut->skip_to_level, ASSOC_ARRAY_KEY_CHUNK_SIZE);
+		keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT;
+		new_s = kmalloc(sizeof(struct assoc_array_shortcut) +
+				keylen * sizeof(unsigned long), GFP_KERNEL);
+		if (!new_s)
+			goto enomem;
+		pr_devel("dup shortcut %p -> %p\n", shortcut, new_s);
+		memcpy(new_s, shortcut, (sizeof(struct assoc_array_shortcut) +
+					 keylen * sizeof(unsigned long)));
+		new_s->back_pointer = new_parent;
+		new_s->parent_slot = shortcut->parent_slot;
+		*new_ptr_pp = new_parent = assoc_array_shortcut_to_ptr(new_s);
+		new_ptr_pp = &new_s->next_node;
+		cursor = shortcut->next_node;
+	}
+
+	/* Duplicate the node at this position */
+	node = assoc_array_ptr_to_node(cursor);
+	new_n = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL);
+	if (!new_n)
+		goto enomem;
+	pr_devel("dup node %p -> %p\n", node, new_n);
+	new_n->back_pointer = new_parent;
+	new_n->parent_slot = node->parent_slot;
+	*new_ptr_pp = new_parent = assoc_array_node_to_ptr(new_n);
+	new_ptr_pp = NULL;
+	slot = 0;
+
+continue_node:
+	/* Filter across any leaves and gc any subtrees */
+	for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
+		ptr = node->slots[slot];
+		if (!ptr)
+			continue;
+
+		if (assoc_array_ptr_is_leaf(ptr)) {
+			if (iterator(assoc_array_ptr_to_leaf(ptr),
+				     iterator_data))
+				/* The iterator will have done any reference
+				 * counting on the object for us.
+				 */
+				new_n->slots[slot] = ptr;
+			continue;
+		}
+
+		new_ptr_pp = &new_n->slots[slot];
+		cursor = ptr;
+		goto descend;
+	}
+
+	pr_devel("-- compress node %p --\n", new_n);
+
+	/* Count up the number of empty slots in this node and work out the
+	 * subtree leaf count.
+	 */
+	new_n->nr_leaves_on_branch = 0;
+	nr_free = 0;
+	for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
+		ptr = new_n->slots[slot];
+		if (!ptr)
+			nr_free++;
+		else if (assoc_array_ptr_is_leaf(ptr))
+			new_n->nr_leaves_on_branch++;
+	}
+	pr_devel("free=%d, leaves=%lu\n", nr_free, new_n->nr_leaves_on_branch);
+
+	/* See what we can fold in */
+	next_slot = 0;
+	for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
+		struct assoc_array_shortcut *s;
+		struct assoc_array_node *child;
+
+		ptr = new_n->slots[slot];
+		if (!ptr || assoc_array_ptr_is_leaf(ptr))
+			continue;
+
+		s = NULL;
+		if (assoc_array_ptr_is_shortcut(ptr)) {
+			s = assoc_array_ptr_to_shortcut(ptr);
+			ptr = s->next_node;
+		}
+
+		child = assoc_array_ptr_to_node(ptr);
+		new_n->nr_leaves_on_branch += child->nr_leaves_on_branch;
+
+		if (child->nr_leaves_on_branch <= nr_free + 1) {
+			/* Fold the child node into this one */
+			pr_devel("[%d] fold node %lu/%d [nx %d]\n",
+				 slot, child->nr_leaves_on_branch, nr_free + 1,
+				 next_slot);
+
+			/* We would already have reaped an intervening shortcut
+			 * on the way back up the tree.
+			 */
+			BUG_ON(s);
+
+			new_n->slots[slot] = NULL;
+			nr_free++;
+			if (slot < next_slot)
+				next_slot = slot;
+			for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
+				struct assoc_array_ptr *p = child->slots[i];
+				if (!p)
+					continue;
+				BUG_ON(assoc_array_ptr_is_meta(p));
+				while (new_n->slots[next_slot])
+					next_slot++;
+				BUG_ON(next_slot >= ASSOC_ARRAY_FAN_OUT);
+				new_n->slots[next_slot++] = p;
+				nr_free--;
+			}
+			kfree(child);
+		} else {
+			pr_devel("[%d] retain node %lu/%d [nx %d]\n",
+				 slot, child->nr_leaves_on_branch, nr_free + 1,
+				 next_slot);
+		}
+	}
+
+	pr_devel("after: %lu\n", new_n->nr_leaves_on_branch);
+
+	nr_leaves_on_tree = new_n->nr_leaves_on_branch;
+
+	/* Excise this node if it is singly occupied by a shortcut */
+	if (nr_free == ASSOC_ARRAY_FAN_OUT - 1) {
+		for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++)
+			if ((ptr = new_n->slots[slot]))
+				break;
+
+		if (assoc_array_ptr_is_meta(ptr) &&
+		    assoc_array_ptr_is_shortcut(ptr)) {
+			pr_devel("excise node %p with 1 shortcut\n", new_n);
+			new_s = assoc_array_ptr_to_shortcut(ptr);
+			new_parent = new_n->back_pointer;
+			slot = new_n->parent_slot;
+			kfree(new_n);
+			if (!new_parent) {
+				new_s->back_pointer = NULL;
+				new_s->parent_slot = 0;
+				new_root = ptr;
+				goto gc_complete;
+			}
+
+			if (assoc_array_ptr_is_shortcut(new_parent)) {
+				/* We can discard any preceding shortcut also */
+				struct assoc_array_shortcut *s =
+					assoc_array_ptr_to_shortcut(new_parent);
+
+				pr_devel("excise preceding shortcut\n");
+
+				new_parent = new_s->back_pointer = s->back_pointer;
+				slot = new_s->parent_slot = s->parent_slot;
+				kfree(s);
+				if (!new_parent) {
+					new_s->back_pointer = NULL;
+					new_s->parent_slot = 0;
+					new_root = ptr;
+					goto gc_complete;
+				}
+			}
+
+			new_s->back_pointer = new_parent;
+			new_s->parent_slot = slot;
+			new_n = assoc_array_ptr_to_node(new_parent);
+			new_n->slots[slot] = ptr;
+			goto ascend_old_tree;
+		}
+	}
+
+	/* Excise any shortcuts we might encounter that point to nodes that
+	 * only contain leaves.
+	 */
+	ptr = new_n->back_pointer;
+	if (!ptr)
+		goto gc_complete;
+
+	if (assoc_array_ptr_is_shortcut(ptr)) {
+		new_s = assoc_array_ptr_to_shortcut(ptr);
+		new_parent = new_s->back_pointer;
+		slot = new_s->parent_slot;
+
+		if (new_n->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT) {
+			struct assoc_array_node *n;
+
+			pr_devel("excise shortcut\n");
+			new_n->back_pointer = new_parent;
+			new_n->parent_slot = slot;
+			kfree(new_s);
+			if (!new_parent) {
+				new_root = assoc_array_node_to_ptr(new_n);
+				goto gc_complete;
+			}
+
+			n = assoc_array_ptr_to_node(new_parent);
+			n->slots[slot] = assoc_array_node_to_ptr(new_n);
+		}
+	} else {
+		new_parent = ptr;
+	}
+	new_n = assoc_array_ptr_to_node(new_parent);
+
+ascend_old_tree:
+	ptr = node->back_pointer;
+	if (assoc_array_ptr_is_shortcut(ptr)) {
+		shortcut = assoc_array_ptr_to_shortcut(ptr);
+		slot = shortcut->parent_slot;
+		cursor = shortcut->back_pointer;
+	} else {
+		slot = node->parent_slot;
+		cursor = ptr;
+	}
+	BUG_ON(!ptr);
+	node = assoc_array_ptr_to_node(cursor);
+	slot++;
+	goto continue_node;
+
+gc_complete:
+	edit->set[0].to = new_root;
+	assoc_array_apply_edit(edit);
+	edit->array->nr_leaves_on_tree = nr_leaves_on_tree;
+	return 0;
+
+enomem:
+	pr_devel("enomem\n");
+	assoc_array_destroy_subtree(new_root, edit->ops);
+	kfree(edit);
+	return -ENOMEM;
+}
-- 
cgit v1.2.3


From b2a4df200d570b2c33a57e1ebfa5896e4bc81b69 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 24 Sep 2013 10:35:18 +0100
Subject: KEYS: Expand the capacity of a keyring

Expand the capacity of a keyring to be able to hold a lot more keys by using
the previously added associative array implementation.  Currently the maximum
capacity is:

	(PAGE_SIZE - sizeof(header)) / sizeof(struct key *)

which, on a 64-bit system, is a little more 500.  However, since this is being
used for the NFS uid mapper, we need more than that.  The new implementation
gives us effectively unlimited capacity.

With some alterations, the keyutils testsuite runs successfully to completion
after this patch is applied.  The alterations are because (a) keyrings that
are simply added to no longer appear ordered and (b) some of the errors have
changed a bit.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 include/keys/keyring-type.h |   17 +-
 include/linux/key.h         |   13 +-
 lib/assoc_array.c           |    1 +
 security/keys/Kconfig       |    1 +
 security/keys/gc.c          |   33 +-
 security/keys/internal.h    |   17 +-
 security/keys/key.c         |   35 +-
 security/keys/keyring.c     | 1436 ++++++++++++++++++++++---------------------
 security/keys/request_key.c |   12 +-
 9 files changed, 803 insertions(+), 762 deletions(-)

(limited to 'include/linux')

diff --git a/include/keys/keyring-type.h b/include/keys/keyring-type.h
index cf49159b0e3a..fca5c62340a4 100644
--- a/include/keys/keyring-type.h
+++ b/include/keys/keyring-type.h
@@ -1,6 +1,6 @@
 /* Keyring key type
  *
- * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved.
+ * Copyright (C) 2008, 2013 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
  *
  * This program is free software; you can redistribute it and/or
@@ -13,19 +13,6 @@
 #define _KEYS_KEYRING_TYPE_H
 
 #include <linux/key.h>
-#include <linux/rcupdate.h>
-
-/*
- * the keyring payload contains a list of the keys to which the keyring is
- * subscribed
- */
-struct keyring_list {
-	struct rcu_head	rcu;		/* RCU deletion hook */
-	unsigned short	maxkeys;	/* max keys this list can hold */
-	unsigned short	nkeys;		/* number of keys currently held */
-	unsigned short	delkey;		/* key to be unlinked by RCU */
-	struct key __rcu *keys[0];
-};
-
+#include <linux/assoc_array.h>
 
 #endif /* _KEYS_KEYRING_TYPE_H */
diff --git a/include/linux/key.h b/include/linux/key.h
index ef596c7af585..2417f789d29b 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -22,6 +22,7 @@
 #include <linux/sysctl.h>
 #include <linux/rwsem.h>
 #include <linux/atomic.h>
+#include <linux/assoc_array.h>
 
 #ifdef __KERNEL__
 #include <linux/uidgid.h>
@@ -196,11 +197,13 @@ struct key {
 	 *   whatever
 	 */
 	union {
-		unsigned long		value;
-		void __rcu		*rcudata;
-		void			*data;
-		struct keyring_list __rcu *subscriptions;
-	} payload;
+		union {
+			unsigned long		value;
+			void __rcu		*rcudata;
+			void			*data;
+		} payload;
+		struct assoc_array keys;
+	};
 };
 
 extern struct key *key_alloc(struct key_type *type,
diff --git a/lib/assoc_array.c b/lib/assoc_array.c
index a0952818f938..17edeaf19180 100644
--- a/lib/assoc_array.c
+++ b/lib/assoc_array.c
@@ -12,6 +12,7 @@
  */
 //#define DEBUG
 #include <linux/slab.h>
+#include <linux/err.h>
 #include <linux/assoc_array_priv.h>
 
 /*
diff --git a/security/keys/Kconfig b/security/keys/Kconfig
index a90d6d300dbd..15e0dfe8c80f 100644
--- a/security/keys/Kconfig
+++ b/security/keys/Kconfig
@@ -4,6 +4,7 @@
 
 config KEYS
 	bool "Enable access key retention support"
+	select ASSOCIATIVE_ARRAY
 	help
 	  This option provides support for retaining authentication tokens and
 	  access keys in the kernel.
diff --git a/security/keys/gc.c b/security/keys/gc.c
index d67c97bb1025..cce621c33dce 100644
--- a/security/keys/gc.c
+++ b/security/keys/gc.c
@@ -130,6 +130,13 @@ void key_gc_keytype(struct key_type *ktype)
 	kleave("");
 }
 
+static int key_gc_keyring_func(const void *object, void *iterator_data)
+{
+	const struct key *key = object;
+	time_t *limit = iterator_data;
+	return key_is_dead(key, *limit);
+}
+
 /*
  * Garbage collect pointers from a keyring.
  *
@@ -138,10 +145,9 @@ void key_gc_keytype(struct key_type *ktype)
  */
 static void key_gc_keyring(struct key *keyring, time_t limit)
 {
-	struct keyring_list *klist;
-	int loop;
+	int result;
 
-	kenter("%x", key_serial(keyring));
+	kenter("%x{%s}", keyring->serial, keyring->description ?: "");
 
 	if (keyring->flags & ((1 << KEY_FLAG_INVALIDATED) |
 			      (1 << KEY_FLAG_REVOKED)))
@@ -149,27 +155,17 @@ static void key_gc_keyring(struct key *keyring, time_t limit)
 
 	/* scan the keyring looking for dead keys */
 	rcu_read_lock();
-	klist = rcu_dereference(keyring->payload.subscriptions);
-	if (!klist)
-		goto unlock_dont_gc;
-
-	loop = klist->nkeys;
-	smp_rmb();
-	for (loop--; loop >= 0; loop--) {
-		struct key *key = rcu_dereference(klist->keys[loop]);
-		if (key_is_dead(key, limit))
-			goto do_gc;
-	}
-
-unlock_dont_gc:
+	result = assoc_array_iterate(&keyring->keys,
+				     key_gc_keyring_func, &limit);
 	rcu_read_unlock();
+	if (result == true)
+		goto do_gc;
+
 dont_gc:
 	kleave(" [no gc]");
 	return;
 
 do_gc:
-	rcu_read_unlock();
-
 	keyring_gc(keyring, limit);
 	kleave(" [gc]");
 }
@@ -392,7 +388,6 @@ found_unreferenced_key:
 	 */
 found_keyring:
 	spin_unlock(&key_serial_lock);
-	kdebug("scan keyring %d", key->serial);
 	key_gc_keyring(key, limit);
 	goto maybe_resched;
 
diff --git a/security/keys/internal.h b/security/keys/internal.h
index 73950bf8f875..581c6f688352 100644
--- a/security/keys/internal.h
+++ b/security/keys/internal.h
@@ -90,20 +90,23 @@ extern void key_type_put(struct key_type *ktype);
 
 extern int __key_link_begin(struct key *keyring,
 			    const struct keyring_index_key *index_key,
-			    unsigned long *_prealloc);
+			    struct assoc_array_edit **_edit);
 extern int __key_link_check_live_key(struct key *keyring, struct key *key);
-extern void __key_link(struct key *keyring, struct key *key,
-		       unsigned long *_prealloc);
+extern void __key_link(struct key *key, struct assoc_array_edit **_edit);
 extern void __key_link_end(struct key *keyring,
 			   const struct keyring_index_key *index_key,
-			   unsigned long prealloc);
+			   struct assoc_array_edit *edit);
 
-extern key_ref_t __keyring_search_one(key_ref_t keyring_ref,
-				      const struct keyring_index_key *index_key);
+extern key_ref_t find_key_to_update(key_ref_t keyring_ref,
+				    const struct keyring_index_key *index_key);
 
 extern struct key *keyring_search_instkey(struct key *keyring,
 					  key_serial_t target_id);
 
+extern int iterate_over_keyring(const struct key *keyring,
+				int (*func)(const struct key *key, void *data),
+				void *data);
+
 typedef int (*key_match_func_t)(const struct key *, const void *);
 
 struct keyring_search_context {
@@ -119,6 +122,8 @@ struct keyring_search_context {
 #define KEYRING_SEARCH_NO_CHECK_PERM	0x0010	/* Don't check permissions */
 #define KEYRING_SEARCH_DETECT_TOO_DEEP	0x0020	/* Give an error on excessive depth */
 
+	int (*iterator)(const void *object, void *iterator_data);
+
 	/* Internal stuff */
 	int			skipped_ret;
 	bool			possessed;
diff --git a/security/keys/key.c b/security/keys/key.c
index 7d716b82a61e..a819b5c7d4ec 100644
--- a/security/keys/key.c
+++ b/security/keys/key.c
@@ -409,7 +409,7 @@ static int __key_instantiate_and_link(struct key *key,
 				      struct key_preparsed_payload *prep,
 				      struct key *keyring,
 				      struct key *authkey,
-				      unsigned long *_prealloc)
+				      struct assoc_array_edit **_edit)
 {
 	int ret, awaken;
 
@@ -436,7 +436,7 @@ static int __key_instantiate_and_link(struct key *key,
 
 			/* and link it into the destination keyring */
 			if (keyring)
-				__key_link(keyring, key, _prealloc);
+				__key_link(key, _edit);
 
 			/* disable the authorisation key */
 			if (authkey)
@@ -476,7 +476,7 @@ int key_instantiate_and_link(struct key *key,
 			     struct key *authkey)
 {
 	struct key_preparsed_payload prep;
-	unsigned long prealloc;
+	struct assoc_array_edit *edit;
 	int ret;
 
 	memset(&prep, 0, sizeof(prep));
@@ -490,16 +490,15 @@ int key_instantiate_and_link(struct key *key,
 	}
 
 	if (keyring) {
-		ret = __key_link_begin(keyring, &key->index_key, &prealloc);
+		ret = __key_link_begin(keyring, &key->index_key, &edit);
 		if (ret < 0)
 			goto error_free_preparse;
 	}
 
-	ret = __key_instantiate_and_link(key, &prep, keyring, authkey,
-					 &prealloc);
+	ret = __key_instantiate_and_link(key, &prep, keyring, authkey, &edit);
 
 	if (keyring)
-		__key_link_end(keyring, &key->index_key, prealloc);
+		__key_link_end(keyring, &key->index_key, edit);
 
 error_free_preparse:
 	if (key->type->preparse)
@@ -537,7 +536,7 @@ int key_reject_and_link(struct key *key,
 			struct key *keyring,
 			struct key *authkey)
 {
-	unsigned long prealloc;
+	struct assoc_array_edit *edit;
 	struct timespec now;
 	int ret, awaken, link_ret = 0;
 
@@ -548,7 +547,7 @@ int key_reject_and_link(struct key *key,
 	ret = -EBUSY;
 
 	if (keyring)
-		link_ret = __key_link_begin(keyring, &key->index_key, &prealloc);
+		link_ret = __key_link_begin(keyring, &key->index_key, &edit);
 
 	mutex_lock(&key_construction_mutex);
 
@@ -570,7 +569,7 @@ int key_reject_and_link(struct key *key,
 
 		/* and link it into the destination keyring */
 		if (keyring && link_ret == 0)
-			__key_link(keyring, key, &prealloc);
+			__key_link(key, &edit);
 
 		/* disable the authorisation key */
 		if (authkey)
@@ -580,7 +579,7 @@ int key_reject_and_link(struct key *key,
 	mutex_unlock(&key_construction_mutex);
 
 	if (keyring)
-		__key_link_end(keyring, &key->index_key, prealloc);
+		__key_link_end(keyring, &key->index_key, edit);
 
 	/* wake up anyone waiting for a key to be constructed */
 	if (awaken)
@@ -783,8 +782,8 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref,
 		.description	= description,
 	};
 	struct key_preparsed_payload prep;
+	struct assoc_array_edit *edit;
 	const struct cred *cred = current_cred();
-	unsigned long prealloc;
 	struct key *keyring, *key = NULL;
 	key_ref_t key_ref;
 	int ret;
@@ -828,7 +827,7 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref,
 	}
 	index_key.desc_len = strlen(index_key.description);
 
-	ret = __key_link_begin(keyring, &index_key, &prealloc);
+	ret = __key_link_begin(keyring, &index_key, &edit);
 	if (ret < 0) {
 		key_ref = ERR_PTR(ret);
 		goto error_free_prep;
@@ -847,8 +846,8 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref,
 	 * update that instead if possible
 	 */
 	if (index_key.type->update) {
-		key_ref = __keyring_search_one(keyring_ref, &index_key);
-		if (!IS_ERR(key_ref))
+		key_ref = find_key_to_update(keyring_ref, &index_key);
+		if (key_ref)
 			goto found_matching_key;
 	}
 
@@ -874,7 +873,7 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref,
 	}
 
 	/* instantiate it and link it into the target keyring */
-	ret = __key_instantiate_and_link(key, &prep, keyring, NULL, &prealloc);
+	ret = __key_instantiate_and_link(key, &prep, keyring, NULL, &edit);
 	if (ret < 0) {
 		key_put(key);
 		key_ref = ERR_PTR(ret);
@@ -884,7 +883,7 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref,
 	key_ref = make_key_ref(key, is_key_possessed(keyring_ref));
 
 error_link_end:
-	__key_link_end(keyring, &index_key, prealloc);
+	__key_link_end(keyring, &index_key, edit);
 error_free_prep:
 	if (index_key.type->preparse)
 		index_key.type->free_preparse(&prep);
@@ -897,7 +896,7 @@ error:
 	/* we found a matching key, so we're going to try to update it
 	 * - we can drop the locks first as we have the key pinned
 	 */
-	__key_link_end(keyring, &index_key, prealloc);
+	__key_link_end(keyring, &index_key, edit);
 
 	key_ref = __key_update(key_ref, &prep);
 	goto error_free_prep;
diff --git a/security/keys/keyring.c b/security/keys/keyring.c
index eeef1a073db4..f7cdea22214f 100644
--- a/security/keys/keyring.c
+++ b/security/keys/keyring.c
@@ -1,6 +1,6 @@
 /* Keyring handling
  *
- * Copyright (C) 2004-2005, 2008 Red Hat, Inc. All Rights Reserved.
+ * Copyright (C) 2004-2005, 2008, 2013 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
  *
  * This program is free software; you can redistribute it and/or
@@ -17,25 +17,11 @@
 #include <linux/seq_file.h>
 #include <linux/err.h>
 #include <keys/keyring-type.h>
+#include <keys/user-type.h>
+#include <linux/assoc_array_priv.h>
 #include <linux/uaccess.h>
 #include "internal.h"
 
-#define rcu_dereference_locked_keyring(keyring)				\
-	(rcu_dereference_protected(					\
-		(keyring)->payload.subscriptions,			\
-		rwsem_is_locked((struct rw_semaphore *)&(keyring)->sem)))
-
-#define rcu_deref_link_locked(klist, index, keyring)			\
-	(rcu_dereference_protected(					\
-		(klist)->keys[index],					\
-		rwsem_is_locked((struct rw_semaphore *)&(keyring)->sem)))
-
-#define MAX_KEYRING_LINKS						\
-	min_t(size_t, USHRT_MAX - 1,					\
-	      ((PAGE_SIZE - sizeof(struct keyring_list)) / sizeof(struct key *)))
-
-#define KEY_LINK_FIXQUOTA 1UL
-
 /*
  * When plumbing the depths of the key tree, this sets a hard limit
  * set on how deep we're willing to go.
@@ -47,6 +33,28 @@
  */
 #define KEYRING_NAME_HASH_SIZE	(1 << 5)
 
+/*
+ * We mark pointers we pass to the associative array with bit 1 set if
+ * they're keyrings and clear otherwise.
+ */
+#define KEYRING_PTR_SUBTYPE	0x2UL
+
+static inline bool keyring_ptr_is_keyring(const struct assoc_array_ptr *x)
+{
+	return (unsigned long)x & KEYRING_PTR_SUBTYPE;
+}
+static inline struct key *keyring_ptr_to_key(const struct assoc_array_ptr *x)
+{
+	void *object = assoc_array_ptr_to_leaf(x);
+	return (struct key *)((unsigned long)object & ~KEYRING_PTR_SUBTYPE);
+}
+static inline void *keyring_key_to_ptr(struct key *key)
+{
+	if (key->type == &key_type_keyring)
+		return (void *)((unsigned long)key | KEYRING_PTR_SUBTYPE);
+	return key;
+}
+
 static struct list_head	keyring_name_hash[KEYRING_NAME_HASH_SIZE];
 static DEFINE_RWLOCK(keyring_name_lock);
 
@@ -67,7 +75,6 @@ static inline unsigned keyring_hash(const char *desc)
  */
 static int keyring_instantiate(struct key *keyring,
 			       struct key_preparsed_payload *prep);
-static int keyring_match(const struct key *keyring, const void *criterion);
 static void keyring_revoke(struct key *keyring);
 static void keyring_destroy(struct key *keyring);
 static void keyring_describe(const struct key *keyring, struct seq_file *m);
@@ -76,9 +83,9 @@ static long keyring_read(const struct key *keyring,
 
 struct key_type key_type_keyring = {
 	.name		= "keyring",
-	.def_datalen	= sizeof(struct keyring_list),
+	.def_datalen	= 0,
 	.instantiate	= keyring_instantiate,
-	.match		= keyring_match,
+	.match		= user_match,
 	.revoke		= keyring_revoke,
 	.destroy	= keyring_destroy,
 	.describe	= keyring_describe,
@@ -127,6 +134,7 @@ static int keyring_instantiate(struct key *keyring,
 
 	ret = -EINVAL;
 	if (prep->datalen == 0) {
+		assoc_array_init(&keyring->keys);
 		/* make the keyring available by name if it has one */
 		keyring_publish_name(keyring);
 		ret = 0;
@@ -136,14 +144,225 @@ static int keyring_instantiate(struct key *keyring,
 }
 
 /*
- * Match keyrings on their name
+ * Multiply 64-bits by 32-bits to 96-bits and fold back to 64-bit.  Ideally we'd
+ * fold the carry back too, but that requires inline asm.
+ */
+static u64 mult_64x32_and_fold(u64 x, u32 y)
+{
+	u64 hi = (u64)(u32)(x >> 32) * y;
+	u64 lo = (u64)(u32)(x) * y;
+	return lo + ((u64)(u32)hi << 32) + (u32)(hi >> 32);
+}
+
+/*
+ * Hash a key type and description.
+ */
+static unsigned long hash_key_type_and_desc(const struct keyring_index_key *index_key)
+{
+	const unsigned level_shift = ASSOC_ARRAY_LEVEL_STEP;
+	const unsigned long level_mask = ASSOC_ARRAY_LEVEL_STEP_MASK;
+	const char *description = index_key->description;
+	unsigned long hash, type;
+	u32 piece;
+	u64 acc;
+	int n, desc_len = index_key->desc_len;
+
+	type = (unsigned long)index_key->type;
+
+	acc = mult_64x32_and_fold(type, desc_len + 13);
+	acc = mult_64x32_and_fold(acc, 9207);
+	for (;;) {
+		n = desc_len;
+		if (n <= 0)
+			break;
+		if (n > 4)
+			n = 4;
+		piece = 0;
+		memcpy(&piece, description, n);
+		description += n;
+		desc_len -= n;
+		acc = mult_64x32_and_fold(acc, piece);
+		acc = mult_64x32_and_fold(acc, 9207);
+	}
+
+	/* Fold the hash down to 32 bits if need be. */
+	hash = acc;
+	if (ASSOC_ARRAY_KEY_CHUNK_SIZE == 32)
+		hash ^= acc >> 32;
+
+	/* Squidge all the keyrings into a separate part of the tree to
+	 * ordinary keys by making sure the lowest level segment in the hash is
+	 * zero for keyrings and non-zero otherwise.
+	 */
+	if (index_key->type != &key_type_keyring && (hash & level_mask) == 0)
+		return hash | (hash >> (ASSOC_ARRAY_KEY_CHUNK_SIZE - level_shift)) | 1;
+	if (index_key->type == &key_type_keyring && (hash & level_mask) != 0)
+		return (hash + (hash << level_shift)) & ~level_mask;
+	return hash;
+}
+
+/*
+ * Build the next index key chunk.
+ *
+ * On 32-bit systems the index key is laid out as:
+ *
+ *	0	4	5	9...
+ *	hash	desclen	typeptr	desc[]
+ *
+ * On 64-bit systems:
+ *
+ *	0	8	9	17...
+ *	hash	desclen	typeptr	desc[]
+ *
+ * We return it one word-sized chunk at a time.
  */
-static int keyring_match(const struct key *keyring, const void *description)
+static unsigned long keyring_get_key_chunk(const void *data, int level)
+{
+	const struct keyring_index_key *index_key = data;
+	unsigned long chunk = 0;
+	long offset = 0;
+	int desc_len = index_key->desc_len, n = sizeof(chunk);
+
+	level /= ASSOC_ARRAY_KEY_CHUNK_SIZE;
+	switch (level) {
+	case 0:
+		return hash_key_type_and_desc(index_key);
+	case 1:
+		return ((unsigned long)index_key->type << 8) | desc_len;
+	case 2:
+		if (desc_len == 0)
+			return (u8)((unsigned long)index_key->type >>
+				    (ASSOC_ARRAY_KEY_CHUNK_SIZE - 8));
+		n--;
+		offset = 1;
+	default:
+		offset += sizeof(chunk) - 1;
+		offset += (level - 3) * sizeof(chunk);
+		if (offset >= desc_len)
+			return 0;
+		desc_len -= offset;
+		if (desc_len > n)
+			desc_len = n;
+		offset += desc_len;
+		do {
+			chunk <<= 8;
+			chunk |= ((u8*)index_key->description)[--offset];
+		} while (--desc_len > 0);
+
+		if (level == 2) {
+			chunk <<= 8;
+			chunk |= (u8)((unsigned long)index_key->type >>
+				      (ASSOC_ARRAY_KEY_CHUNK_SIZE - 8));
+		}
+		return chunk;
+	}
+}
+
+static unsigned long keyring_get_object_key_chunk(const void *object, int level)
+{
+	const struct key *key = keyring_ptr_to_key(object);
+	return keyring_get_key_chunk(&key->index_key, level);
+}
+
+static bool keyring_compare_object(const void *object, const void *data)
 {
-	return keyring->description &&
-		strcmp(keyring->description, description) == 0;
+	const struct keyring_index_key *index_key = data;
+	const struct key *key = keyring_ptr_to_key(object);
+
+	return key->index_key.type == index_key->type &&
+		key->index_key.desc_len == index_key->desc_len &&
+		memcmp(key->index_key.description, index_key->description,
+		       index_key->desc_len) == 0;
 }
 
+/*
+ * Compare the index keys of a pair of objects and determine the bit position
+ * at which they differ - if they differ.
+ */
+static int keyring_diff_objects(const void *_a, const void *_b)
+{
+	const struct key *key_a = keyring_ptr_to_key(_a);
+	const struct key *key_b = keyring_ptr_to_key(_b);
+	const struct keyring_index_key *a = &key_a->index_key;
+	const struct keyring_index_key *b = &key_b->index_key;
+	unsigned long seg_a, seg_b;
+	int level, i;
+
+	level = 0;
+	seg_a = hash_key_type_and_desc(a);
+	seg_b = hash_key_type_and_desc(b);
+	if ((seg_a ^ seg_b) != 0)
+		goto differ;
+
+	/* The number of bits contributed by the hash is controlled by a
+	 * constant in the assoc_array headers.  Everything else thereafter we
+	 * can deal with as being machine word-size dependent.
+	 */
+	level += ASSOC_ARRAY_KEY_CHUNK_SIZE / 8;
+	seg_a = a->desc_len;
+	seg_b = b->desc_len;
+	if ((seg_a ^ seg_b) != 0)
+		goto differ;
+
+	/* The next bit may not work on big endian */
+	level++;
+	seg_a = (unsigned long)a->type;
+	seg_b = (unsigned long)b->type;
+	if ((seg_a ^ seg_b) != 0)
+		goto differ;
+
+	level += sizeof(unsigned long);
+	if (a->desc_len == 0)
+		goto same;
+
+	i = 0;
+	if (((unsigned long)a->description | (unsigned long)b->description) &
+	    (sizeof(unsigned long) - 1)) {
+		do {
+			seg_a = *(unsigned long *)(a->description + i);
+			seg_b = *(unsigned long *)(b->description + i);
+			if ((seg_a ^ seg_b) != 0)
+				goto differ_plus_i;
+			i += sizeof(unsigned long);
+		} while (i < (a->desc_len & (sizeof(unsigned long) - 1)));
+	}
+
+	for (; i < a->desc_len; i++) {
+		seg_a = *(unsigned char *)(a->description + i);
+		seg_b = *(unsigned char *)(b->description + i);
+		if ((seg_a ^ seg_b) != 0)
+			goto differ_plus_i;
+	}
+
+same:
+	return -1;
+
+differ_plus_i:
+	level += i;
+differ:
+	i = level * 8 + __ffs(seg_a ^ seg_b);
+	return i;
+}
+
+/*
+ * Free an object after stripping the keyring flag off of the pointer.
+ */
+static void keyring_free_object(void *object)
+{
+	key_put(keyring_ptr_to_key(object));
+}
+
+/*
+ * Operations for keyring management by the index-tree routines.
+ */
+static const struct assoc_array_ops keyring_assoc_array_ops = {
+	.get_key_chunk		= keyring_get_key_chunk,
+	.get_object_key_chunk	= keyring_get_object_key_chunk,
+	.compare_object		= keyring_compare_object,
+	.diff_objects		= keyring_diff_objects,
+	.free_object		= keyring_free_object,
+};
+
 /*
  * Clean up a keyring when it is destroyed.  Unpublish its name if it had one
  * and dispose of its data.
@@ -155,9 +374,6 @@ static int keyring_match(const struct key *keyring, const void *description)
  */
 static void keyring_destroy(struct key *keyring)
 {
-	struct keyring_list *klist;
-	int loop;
-
 	if (keyring->description) {
 		write_lock(&keyring_name_lock);
 
@@ -168,12 +384,7 @@ static void keyring_destroy(struct key *keyring)
 		write_unlock(&keyring_name_lock);
 	}
 
-	klist = rcu_access_pointer(keyring->payload.subscriptions);
-	if (klist) {
-		for (loop = klist->nkeys - 1; loop >= 0; loop--)
-			key_put(rcu_access_pointer(klist->keys[loop]));
-		kfree(klist);
-	}
+	assoc_array_destroy(&keyring->keys, &keyring_assoc_array_ops);
 }
 
 /*
@@ -181,76 +392,88 @@ static void keyring_destroy(struct key *keyring)
  */
 static void keyring_describe(const struct key *keyring, struct seq_file *m)
 {
-	struct keyring_list *klist;
-
 	if (keyring->description)
 		seq_puts(m, keyring->description);
 	else
 		seq_puts(m, "[anon]");
 
 	if (key_is_instantiated(keyring)) {
-		rcu_read_lock();
-		klist = rcu_dereference(keyring->payload.subscriptions);
-		if (klist)
-			seq_printf(m, ": %u/%u", klist->nkeys, klist->maxkeys);
+		if (keyring->keys.nr_leaves_on_tree != 0)
+			seq_printf(m, ": %lu", keyring->keys.nr_leaves_on_tree);
 		else
 			seq_puts(m, ": empty");
-		rcu_read_unlock();
 	}
 }
 
+struct keyring_read_iterator_context {
+	size_t			qty;
+	size_t			count;
+	key_serial_t __user	*buffer;
+};
+
+static int keyring_read_iterator(const void *object, void *data)
+{
+	struct keyring_read_iterator_context *ctx = data;
+	const struct key *key = keyring_ptr_to_key(object);
+	int ret;
+
+	kenter("{%s,%d},,{%zu/%zu}",
+	       key->type->name, key->serial, ctx->count, ctx->qty);
+
+	if (ctx->count >= ctx->qty)
+		return 1;
+
+	ret = put_user(key->serial, ctx->buffer);
+	if (ret < 0)
+		return ret;
+	ctx->buffer++;
+	ctx->count += sizeof(key->serial);
+	return 0;
+}
+
 /*
  * Read a list of key IDs from the keyring's contents in binary form
  *
- * The keyring's semaphore is read-locked by the caller.
+ * The keyring's semaphore is read-locked by the caller.  This prevents someone
+ * from modifying it under us - which could cause us to read key IDs multiple
+ * times.
  */
 static long keyring_read(const struct key *keyring,
 			 char __user *buffer, size_t buflen)
 {
-	struct keyring_list *klist;
-	struct key *key;
-	size_t qty, tmp;
-	int loop, ret;
+	struct keyring_read_iterator_context ctx;
+	unsigned long nr_keys;
+	int ret;
 
-	ret = 0;
-	klist = rcu_dereference_locked_keyring(keyring);
-	if (klist) {
-		/* calculate how much data we could return */
-		qty = klist->nkeys * sizeof(key_serial_t);
-
-		if (buffer && buflen > 0) {
-			if (buflen > qty)
-				buflen = qty;
-
-			/* copy the IDs of the subscribed keys into the
-			 * buffer */
-			ret = -EFAULT;
-
-			for (loop = 0; loop < klist->nkeys; loop++) {
-				key = rcu_deref_link_locked(klist, loop,
-							    keyring);
-
-				tmp = sizeof(key_serial_t);
-				if (tmp > buflen)
-					tmp = buflen;
-
-				if (copy_to_user(buffer,
-						 &key->serial,
-						 tmp) != 0)
-					goto error;
-
-				buflen -= tmp;
-				if (buflen == 0)
-					break;
-				buffer += tmp;
-			}
-		}
+	kenter("{%d},,%zu", key_serial(keyring), buflen);
+
+	if (buflen & (sizeof(key_serial_t) - 1))
+		return -EINVAL;
+
+	nr_keys = keyring->keys.nr_leaves_on_tree;
+	if (nr_keys == 0)
+		return 0;
 
-		ret = qty;
+	/* Calculate how much data we could return */
+	ctx.qty = nr_keys * sizeof(key_serial_t);
+
+	if (!buffer || !buflen)
+		return ctx.qty;
+
+	if (buflen > ctx.qty)
+		ctx.qty = buflen;
+
+	/* Copy the IDs of the subscribed keys into the buffer */
+	ctx.buffer = (key_serial_t __user *)buffer;
+	ctx.count = 0;
+	ret = assoc_array_iterate(&keyring->keys, keyring_read_iterator, &ctx);
+	if (ret < 0) {
+		kleave(" = %d [iterate]", ret);
+		return ret;
 	}
 
-error:
-	return ret;
+	kleave(" = %zu [ok]", ctx.count);
+	return ctx.count;
 }
 
 /*
@@ -277,219 +500,360 @@ struct key *keyring_alloc(const char *description, kuid_t uid, kgid_t gid,
 }
 EXPORT_SYMBOL(keyring_alloc);
 
-/**
- * keyring_search_aux - Search a keyring tree for a key matching some criteria
- * @keyring_ref: A pointer to the keyring with possession indicator.
- * @ctx: The keyring search context.
- *
- * Search the supplied keyring tree for a key that matches the criteria given.
- * The root keyring and any linked keyrings must grant Search permission to the
- * caller to be searchable and keys can only be found if they too grant Search
- * to the caller. The possession flag on the root keyring pointer controls use
- * of the possessor bits in permissions checking of the entire tree.  In
- * addition, the LSM gets to forbid keyring searches and key matches.
- *
- * The search is performed as a breadth-then-depth search up to the prescribed
- * limit (KEYRING_SEARCH_MAX_DEPTH).
- *
- * Keys are matched to the type provided and are then filtered by the match
- * function, which is given the description to use in any way it sees fit.  The
- * match function may use any attributes of a key that it wishes to to
- * determine the match.  Normally the match function from the key type would be
- * used.
- *
- * RCU is used to prevent the keyring key lists from disappearing without the
- * need to take lots of locks.
- *
- * Returns a pointer to the found key and increments the key usage count if
- * successful; -EAGAIN if no matching keys were found, or if expired or revoked
- * keys were found; -ENOKEY if only negative keys were found; -ENOTDIR if the
- * specified keyring wasn't a keyring.
- *
- * In the case of a successful return, the possession attribute from
- * @keyring_ref is propagated to the returned key reference.
+/*
+ * Iteration function to consider each key found.
  */
-key_ref_t keyring_search_aux(key_ref_t keyring_ref,
-			     struct keyring_search_context *ctx)
+static int keyring_search_iterator(const void *object, void *iterator_data)
 {
-	struct {
-		/* Need a separate keylist pointer for RCU purposes */
-		struct key *keyring;
-		struct keyring_list *keylist;
-		int kix;
-	} stack[KEYRING_SEARCH_MAX_DEPTH];
-
-	struct keyring_list *keylist;
-	unsigned long kflags;
-	struct key *keyring, *key;
-	key_ref_t key_ref;
-	long err;
-	int sp, nkeys, kix;
+	struct keyring_search_context *ctx = iterator_data;
+	const struct key *key = keyring_ptr_to_key(object);
+	unsigned long kflags = key->flags;
 
-	keyring = key_ref_to_ptr(keyring_ref);
-	ctx->possessed = is_key_possessed(keyring_ref);
-	key_check(keyring);
+	kenter("{%d}", key->serial);
 
-	/* top keyring must have search permission to begin the search */
-	err = key_task_permission(keyring_ref, ctx->cred, KEY_SEARCH);
-	if (err < 0) {
-		key_ref = ERR_PTR(err);
-		goto error;
+	/* ignore keys not of this type */
+	if (key->type != ctx->index_key.type) {
+		kleave(" = 0 [!type]");
+		return 0;
 	}
 
-	key_ref = ERR_PTR(-ENOTDIR);
-	if (keyring->type != &key_type_keyring)
-		goto error;
+	/* skip invalidated, revoked and expired keys */
+	if (ctx->flags & KEYRING_SEARCH_DO_STATE_CHECK) {
+		if (kflags & ((1 << KEY_FLAG_INVALIDATED) |
+			      (1 << KEY_FLAG_REVOKED))) {
+			ctx->result = ERR_PTR(-EKEYREVOKED);
+			kleave(" = %d [invrev]", ctx->skipped_ret);
+			goto skipped;
+		}
 
-	rcu_read_lock();
+		if (key->expiry && ctx->now.tv_sec >= key->expiry) {
+			ctx->result = ERR_PTR(-EKEYEXPIRED);
+			kleave(" = %d [expire]", ctx->skipped_ret);
+			goto skipped;
+		}
+	}
 
-	ctx->now = current_kernel_time();
-	err = -EAGAIN;
-	sp = 0;
-
-	/* firstly we should check to see if this top-level keyring is what we
-	 * are looking for */
-	key_ref = ERR_PTR(-EAGAIN);
-	kflags = keyring->flags;
-	if (keyring->type == ctx->index_key.type &&
-	    ctx->match(keyring, ctx->match_data)) {
-		key = keyring;
-		if (ctx->flags & KEYRING_SEARCH_NO_STATE_CHECK)
-			goto found;
+	/* keys that don't match */
+	if (!ctx->match(key, ctx->match_data)) {
+		kleave(" = 0 [!match]");
+		return 0;
+	}
 
-		/* check it isn't negative and hasn't expired or been
-		 * revoked */
-		if (kflags & (1 << KEY_FLAG_REVOKED))
-			goto error_2;
-		if (key->expiry && ctx->now.tv_sec >= key->expiry)
-			goto error_2;
-		key_ref = ERR_PTR(key->type_data.reject_error);
-		if (kflags & (1 << KEY_FLAG_NEGATIVE))
-			goto error_2;
-		goto found;
+	/* key must have search permissions */
+	if (!(ctx->flags & KEYRING_SEARCH_NO_CHECK_PERM) &&
+	    key_task_permission(make_key_ref(key, ctx->possessed),
+				ctx->cred, KEY_SEARCH) < 0) {
+		ctx->result = ERR_PTR(-EACCES);
+		kleave(" = %d [!perm]", ctx->skipped_ret);
+		goto skipped;
 	}
 
-	/* otherwise, the top keyring must not be revoked, expired, or
-	 * negatively instantiated if we are to search it */
-	key_ref = ERR_PTR(-EAGAIN);
-	if (kflags & ((1 << KEY_FLAG_INVALIDATED) |
-		      (1 << KEY_FLAG_REVOKED) |
-		      (1 << KEY_FLAG_NEGATIVE)) ||
-	    (keyring->expiry && ctx->now.tv_sec >= keyring->expiry))
-		goto error_2;
-
-	/* start processing a new keyring */
-descend:
-	kflags = keyring->flags;
-	if (kflags & ((1 << KEY_FLAG_INVALIDATED) |
-		      (1 << KEY_FLAG_REVOKED)))
-		goto not_this_keyring;
+	if (ctx->flags & KEYRING_SEARCH_DO_STATE_CHECK) {
+		/* we set a different error code if we pass a negative key */
+		if (kflags & (1 << KEY_FLAG_NEGATIVE)) {
+			ctx->result = ERR_PTR(key->type_data.reject_error);
+			kleave(" = %d [neg]", ctx->skipped_ret);
+			goto skipped;
+		}
+	}
 
-	keylist = rcu_dereference(keyring->payload.subscriptions);
-	if (!keylist)
-		goto not_this_keyring;
+	/* Found */
+	ctx->result = make_key_ref(key, ctx->possessed);
+	kleave(" = 1 [found]");
+	return 1;
 
-	/* iterate through the keys in this keyring first */
-	nkeys = keylist->nkeys;
-	smp_rmb();
-	for (kix = 0; kix < nkeys; kix++) {
-		key = rcu_dereference(keylist->keys[kix]);
-		kflags = key->flags;
+skipped:
+	return ctx->skipped_ret;
+}
 
-		/* ignore keys not of this type */
-		if (key->type != ctx->index_key.type)
-			continue;
+/*
+ * Search inside a keyring for a key.  We can search by walking to it
+ * directly based on its index-key or we can iterate over the entire
+ * tree looking for it, based on the match function.
+ */
+static int search_keyring(struct key *keyring, struct keyring_search_context *ctx)
+{
+	if ((ctx->flags & KEYRING_SEARCH_LOOKUP_TYPE) ==
+	    KEYRING_SEARCH_LOOKUP_DIRECT) {
+		const void *object;
+
+		object = assoc_array_find(&keyring->keys,
+					  &keyring_assoc_array_ops,
+					  &ctx->index_key);
+		return object ? ctx->iterator(object, ctx) : 0;
+	}
+	return assoc_array_iterate(&keyring->keys, ctx->iterator, ctx);
+}
 
-		/* skip invalidated, revoked and expired keys */
-		if (!(ctx->flags & KEYRING_SEARCH_NO_STATE_CHECK)) {
-			if (kflags & ((1 << KEY_FLAG_INVALIDATED) |
-				      (1 << KEY_FLAG_REVOKED)))
-				continue;
+/*
+ * Search a tree of keyrings that point to other keyrings up to the maximum
+ * depth.
+ */
+static bool search_nested_keyrings(struct key *keyring,
+				   struct keyring_search_context *ctx)
+{
+	struct {
+		struct key *keyring;
+		struct assoc_array_node *node;
+		int slot;
+	} stack[KEYRING_SEARCH_MAX_DEPTH];
 
-			if (key->expiry && ctx->now.tv_sec >= key->expiry)
-				continue;
-		}
+	struct assoc_array_shortcut *shortcut;
+	struct assoc_array_node *node;
+	struct assoc_array_ptr *ptr;
+	struct key *key;
+	int sp = 0, slot;
 
-		/* keys that don't match */
-		if (!ctx->match(key, ctx->match_data))
-			continue;
+	kenter("{%d},{%s,%s}",
+	       keyring->serial,
+	       ctx->index_key.type->name,
+	       ctx->index_key.description);
 
-		/* key must have search permissions */
-		if (key_task_permission(make_key_ref(key, ctx->possessed),
-					ctx->cred, KEY_SEARCH) < 0)
-			continue;
+	if (ctx->index_key.description)
+		ctx->index_key.desc_len = strlen(ctx->index_key.description);
 
-		if (ctx->flags & KEYRING_SEARCH_NO_STATE_CHECK)
+	/* Check to see if this top-level keyring is what we are looking for
+	 * and whether it is valid or not.
+	 */
+	if (ctx->flags & KEYRING_SEARCH_LOOKUP_ITERATE ||
+	    keyring_compare_object(keyring, &ctx->index_key)) {
+		ctx->skipped_ret = 2;
+		ctx->flags |= KEYRING_SEARCH_DO_STATE_CHECK;
+		switch (ctx->iterator(keyring_key_to_ptr(keyring), ctx)) {
+		case 1:
 			goto found;
-
-		/* we set a different error code if we pass a negative key */
-		if (kflags & (1 << KEY_FLAG_NEGATIVE)) {
-			err = key->type_data.reject_error;
-			continue;
+		case 2:
+			return false;
+		default:
+			break;
 		}
+	}
 
+	ctx->skipped_ret = 0;
+	if (ctx->flags & KEYRING_SEARCH_NO_STATE_CHECK)
+		ctx->flags &= ~KEYRING_SEARCH_DO_STATE_CHECK;
+
+	/* Start processing a new keyring */
+descend_to_keyring:
+	kdebug("descend to %d", keyring->serial);
+	if (keyring->flags & ((1 << KEY_FLAG_INVALIDATED) |
+			      (1 << KEY_FLAG_REVOKED)))
+		goto not_this_keyring;
+
+	/* Search through the keys in this keyring before its searching its
+	 * subtrees.
+	 */
+	if (search_keyring(keyring, ctx))
 		goto found;
-	}
 
-	/* search through the keyrings nested in this one */
-	kix = 0;
-ascend:
-	nkeys = keylist->nkeys;
-	smp_rmb();
-	for (; kix < nkeys; kix++) {
-		key = rcu_dereference(keylist->keys[kix]);
-		if (key->type != &key_type_keyring)
-			continue;
+	/* Then manually iterate through the keyrings nested in this one.
+	 *
+	 * Start from the root node of the index tree.  Because of the way the
+	 * hash function has been set up, keyrings cluster on the leftmost
+	 * branch of the root node (root slot 0) or in the root node itself.
+	 * Non-keyrings avoid the leftmost branch of the root entirely (root
+	 * slots 1-15).
+	 */
+	ptr = ACCESS_ONCE(keyring->keys.root);
+	if (!ptr)
+		goto not_this_keyring;
 
-		/* recursively search nested keyrings
-		 * - only search keyrings for which we have search permission
+	if (assoc_array_ptr_is_shortcut(ptr)) {
+		/* If the root is a shortcut, either the keyring only contains
+		 * keyring pointers (everything clusters behind root slot 0) or
+		 * doesn't contain any keyring pointers.
 		 */
-		if (sp >= KEYRING_SEARCH_MAX_DEPTH)
+		shortcut = assoc_array_ptr_to_shortcut(ptr);
+		smp_read_barrier_depends();
+		if ((shortcut->index_key[0] & ASSOC_ARRAY_FAN_MASK) != 0)
+			goto not_this_keyring;
+
+		ptr = ACCESS_ONCE(shortcut->next_node);
+		node = assoc_array_ptr_to_node(ptr);
+		goto begin_node;
+	}
+
+	node = assoc_array_ptr_to_node(ptr);
+	smp_read_barrier_depends();
+
+	ptr = node->slots[0];
+	if (!assoc_array_ptr_is_meta(ptr))
+		goto begin_node;
+
+descend_to_node:
+	/* Descend to a more distal node in this keyring's content tree and go
+	 * through that.
+	 */
+	kdebug("descend");
+	if (assoc_array_ptr_is_shortcut(ptr)) {
+		shortcut = assoc_array_ptr_to_shortcut(ptr);
+		smp_read_barrier_depends();
+		ptr = ACCESS_ONCE(shortcut->next_node);
+		BUG_ON(!assoc_array_ptr_is_node(ptr));
+		node = assoc_array_ptr_to_node(ptr);
+	}
+
+begin_node:
+	kdebug("begin_node");
+	smp_read_barrier_depends();
+	slot = 0;
+ascend_to_node:
+	/* Go through the slots in a node */
+	for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
+		ptr = ACCESS_ONCE(node->slots[slot]);
+
+		if (assoc_array_ptr_is_meta(ptr) && node->back_pointer)
+			goto descend_to_node;
+
+		if (!keyring_ptr_is_keyring(ptr))
 			continue;
 
-		if (key_task_permission(make_key_ref(key, ctx->possessed),
+		key = keyring_ptr_to_key(ptr);
+
+		if (sp >= KEYRING_SEARCH_MAX_DEPTH) {
+			if (ctx->flags & KEYRING_SEARCH_DETECT_TOO_DEEP) {
+				ctx->result = ERR_PTR(-ELOOP);
+				return false;
+			}
+			goto not_this_keyring;
+		}
+
+		/* Search a nested keyring */
+		if (!(ctx->flags & KEYRING_SEARCH_NO_CHECK_PERM) &&
+		    key_task_permission(make_key_ref(key, ctx->possessed),
 					ctx->cred, KEY_SEARCH) < 0)
 			continue;
 
 		/* stack the current position */
 		stack[sp].keyring = keyring;
-		stack[sp].keylist = keylist;
-		stack[sp].kix = kix;
+		stack[sp].node = node;
+		stack[sp].slot = slot;
 		sp++;
 
 		/* begin again with the new keyring */
 		keyring = key;
-		goto descend;
+		goto descend_to_keyring;
+	}
+
+	/* We've dealt with all the slots in the current node, so now we need
+	 * to ascend to the parent and continue processing there.
+	 */
+	ptr = ACCESS_ONCE(node->back_pointer);
+	slot = node->parent_slot;
+
+	if (ptr && assoc_array_ptr_is_shortcut(ptr)) {
+		shortcut = assoc_array_ptr_to_shortcut(ptr);
+		smp_read_barrier_depends();
+		ptr = ACCESS_ONCE(shortcut->back_pointer);
+		slot = shortcut->parent_slot;
+	}
+	if (!ptr)
+		goto not_this_keyring;
+	node = assoc_array_ptr_to_node(ptr);
+	smp_read_barrier_depends();
+	slot++;
+
+	/* If we've ascended to the root (zero backpointer), we must have just
+	 * finished processing the leftmost branch rather than the root slots -
+	 * so there can't be any more keyrings for us to find.
+	 */
+	if (node->back_pointer) {
+		kdebug("ascend %d", slot);
+		goto ascend_to_node;
 	}
 
-	/* the keyring we're looking at was disqualified or didn't contain a
-	 * matching key */
+	/* The keyring we're looking at was disqualified or didn't contain a
+	 * matching key.
+	 */
 not_this_keyring:
-	if (sp > 0) {
-		/* resume the processing of a keyring higher up in the tree */
-		sp--;
-		keyring = stack[sp].keyring;
-		keylist = stack[sp].keylist;
-		kix = stack[sp].kix + 1;
-		goto ascend;
+	kdebug("not_this_keyring %d", sp);
+	if (sp <= 0) {
+		kleave(" = false");
+		return false;
 	}
 
-	key_ref = ERR_PTR(err);
-	goto error_2;
+	/* Resume the processing of a keyring higher up in the tree */
+	sp--;
+	keyring = stack[sp].keyring;
+	node = stack[sp].node;
+	slot = stack[sp].slot + 1;
+	kdebug("ascend to %d [%d]", keyring->serial, slot);
+	goto ascend_to_node;
 
-	/* we found a viable match */
+	/* We found a viable match */
 found:
-	__key_get(key);
-	key->last_used_at = ctx->now.tv_sec;
-	keyring->last_used_at = ctx->now.tv_sec;
-	while (sp > 0)
-		stack[--sp].keyring->last_used_at = ctx->now.tv_sec;
+	key = key_ref_to_ptr(ctx->result);
 	key_check(key);
-	key_ref = make_key_ref(key, ctx->possessed);
-error_2:
+	if (!(ctx->flags & KEYRING_SEARCH_NO_UPDATE_TIME)) {
+		key->last_used_at = ctx->now.tv_sec;
+		keyring->last_used_at = ctx->now.tv_sec;
+		while (sp > 0)
+			stack[--sp].keyring->last_used_at = ctx->now.tv_sec;
+	}
+	kleave(" = true");
+	return true;
+}
+
+/**
+ * keyring_search_aux - Search a keyring tree for a key matching some criteria
+ * @keyring_ref: A pointer to the keyring with possession indicator.
+ * @ctx: The keyring search context.
+ *
+ * Search the supplied keyring tree for a key that matches the criteria given.
+ * The root keyring and any linked keyrings must grant Search permission to the
+ * caller to be searchable and keys can only be found if they too grant Search
+ * to the caller. The possession flag on the root keyring pointer controls use
+ * of the possessor bits in permissions checking of the entire tree.  In
+ * addition, the LSM gets to forbid keyring searches and key matches.
+ *
+ * The search is performed as a breadth-then-depth search up to the prescribed
+ * limit (KEYRING_SEARCH_MAX_DEPTH).
+ *
+ * Keys are matched to the type provided and are then filtered by the match
+ * function, which is given the description to use in any way it sees fit.  The
+ * match function may use any attributes of a key that it wishes to to
+ * determine the match.  Normally the match function from the key type would be
+ * used.
+ *
+ * RCU can be used to prevent the keyring key lists from disappearing without
+ * the need to take lots of locks.
+ *
+ * Returns a pointer to the found key and increments the key usage count if
+ * successful; -EAGAIN if no matching keys were found, or if expired or revoked
+ * keys were found; -ENOKEY if only negative keys were found; -ENOTDIR if the
+ * specified keyring wasn't a keyring.
+ *
+ * In the case of a successful return, the possession attribute from
+ * @keyring_ref is propagated to the returned key reference.
+ */
+key_ref_t keyring_search_aux(key_ref_t keyring_ref,
+			     struct keyring_search_context *ctx)
+{
+	struct key *keyring;
+	long err;
+
+	ctx->iterator = keyring_search_iterator;
+	ctx->possessed = is_key_possessed(keyring_ref);
+	ctx->result = ERR_PTR(-EAGAIN);
+
+	keyring = key_ref_to_ptr(keyring_ref);
+	key_check(keyring);
+
+	if (keyring->type != &key_type_keyring)
+		return ERR_PTR(-ENOTDIR);
+
+	if (!(ctx->flags & KEYRING_SEARCH_NO_CHECK_PERM)) {
+		err = key_task_permission(keyring_ref, ctx->cred, KEY_SEARCH);
+		if (err < 0)
+			return ERR_PTR(err);
+	}
+
+	rcu_read_lock();
+	ctx->now = current_kernel_time();
+	if (search_nested_keyrings(keyring, ctx))
+		__key_get(key_ref_to_ptr(ctx->result));
 	rcu_read_unlock();
-error:
-	return key_ref;
+	return ctx->result;
 }
 
 /**
@@ -499,7 +863,7 @@ error:
  * @description: The name of the keyring we want to find.
  *
  * As keyring_search_aux() above, but using the current task's credentials and
- * type's default matching function.
+ * type's default matching function and preferred search method.
  */
 key_ref_t keyring_search(key_ref_t keyring,
 			 struct key_type *type,
@@ -523,58 +887,49 @@ key_ref_t keyring_search(key_ref_t keyring,
 EXPORT_SYMBOL(keyring_search);
 
 /*
- * Search the given keyring only (no recursion).
+ * Search the given keyring for a key that might be updated.
  *
  * The caller must guarantee that the keyring is a keyring and that the
- * permission is granted to search the keyring as no check is made here.
- *
- * RCU is used to make it unnecessary to lock the keyring key list here.
+ * permission is granted to modify the keyring as no check is made here.  The
+ * caller must also hold a lock on the keyring semaphore.
  *
  * Returns a pointer to the found key with usage count incremented if
- * successful and returns -ENOKEY if not found.  Revoked and invalidated keys
- * are skipped over.
+ * successful and returns NULL if not found.  Revoked and invalidated keys are
+ * skipped over.
  *
  * If successful, the possession indicator is propagated from the keyring ref
  * to the returned key reference.
  */
-key_ref_t __keyring_search_one(key_ref_t keyring_ref,
-			       const struct keyring_index_key *index_key)
+key_ref_t find_key_to_update(key_ref_t keyring_ref,
+			     const struct keyring_index_key *index_key)
 {
-	struct keyring_list *klist;
 	struct key *keyring, *key;
-	bool possessed;
-	int nkeys, loop;
+	const void *object;
 
 	keyring = key_ref_to_ptr(keyring_ref);
-	possessed = is_key_possessed(keyring_ref);
 
-	rcu_read_lock();
+	kenter("{%d},{%s,%s}",
+	       keyring->serial, index_key->type->name, index_key->description);
 
-	klist = rcu_dereference(keyring->payload.subscriptions);
-	if (klist) {
-		nkeys = klist->nkeys;
-		smp_rmb();
-		for (loop = 0; loop < nkeys ; loop++) {
-			key = rcu_dereference(klist->keys[loop]);
-			if (key->type == index_key->type &&
-			    (!key->type->match ||
-			     key->type->match(key, index_key->description)) &&
-			    !(key->flags & ((1 << KEY_FLAG_INVALIDATED) |
-					    (1 << KEY_FLAG_REVOKED)))
-			    )
-				goto found;
-		}
-	}
+	object = assoc_array_find(&keyring->keys, &keyring_assoc_array_ops,
+				  index_key);
 
-	rcu_read_unlock();
-	return ERR_PTR(-ENOKEY);
+	if (object)
+		goto found;
+
+	kleave(" = NULL");
+	return NULL;
 
 found:
+	key = keyring_ptr_to_key(object);
+	if (key->flags & ((1 << KEY_FLAG_INVALIDATED) |
+			  (1 << KEY_FLAG_REVOKED))) {
+		kleave(" = NULL [x]");
+		return NULL;
+	}
 	__key_get(key);
-	keyring->last_used_at = key->last_used_at =
-		current_kernel_time().tv_sec;
-	rcu_read_unlock();
-	return make_key_ref(key, possessed);
+	kleave(" = {%d}", key->serial);
+	return make_key_ref(key, is_key_possessed(keyring_ref));
 }
 
 /*
@@ -637,6 +992,19 @@ out:
 	return keyring;
 }
 
+static int keyring_detect_cycle_iterator(const void *object,
+					 void *iterator_data)
+{
+	struct keyring_search_context *ctx = iterator_data;
+	const struct key *key = keyring_ptr_to_key(object);
+
+	kenter("{%d}", key->serial);
+
+	BUG_ON(key != ctx->match_data);
+	ctx->result = ERR_PTR(-EDEADLK);
+	return 1;
+}
+
 /*
  * See if a cycle will will be created by inserting acyclic tree B in acyclic
  * tree A at the topmost level (ie: as a direct child of A).
@@ -646,117 +1014,39 @@ out:
  */
 static int keyring_detect_cycle(struct key *A, struct key *B)
 {
-	struct {
-		struct keyring_list *keylist;
-		int kix;
-	} stack[KEYRING_SEARCH_MAX_DEPTH];
-
-	struct keyring_list *keylist;
-	struct key *subtree, *key;
-	int sp, nkeys, kix, ret;
+	struct keyring_search_context ctx = {
+		.index_key	= A->index_key,
+		.match_data	= A,
+		.iterator	= keyring_detect_cycle_iterator,
+		.flags		= (KEYRING_SEARCH_LOOKUP_DIRECT |
+				   KEYRING_SEARCH_NO_STATE_CHECK |
+				   KEYRING_SEARCH_NO_UPDATE_TIME |
+				   KEYRING_SEARCH_NO_CHECK_PERM |
+				   KEYRING_SEARCH_DETECT_TOO_DEEP),
+	};
 
 	rcu_read_lock();
-
-	ret = -EDEADLK;
-	if (A == B)
-		goto cycle_detected;
-
-	subtree = B;
-	sp = 0;
-
-	/* start processing a new keyring */
-descend:
-	if (test_bit(KEY_FLAG_REVOKED, &subtree->flags))
-		goto not_this_keyring;
-
-	keylist = rcu_dereference(subtree->payload.subscriptions);
-	if (!keylist)
-		goto not_this_keyring;
-	kix = 0;
-
-ascend:
-	/* iterate through the remaining keys in this keyring */
-	nkeys = keylist->nkeys;
-	smp_rmb();
-	for (; kix < nkeys; kix++) {
-		key = rcu_dereference(keylist->keys[kix]);
-
-		if (key == A)
-			goto cycle_detected;
-
-		/* recursively check nested keyrings */
-		if (key->type == &key_type_keyring) {
-			if (sp >= KEYRING_SEARCH_MAX_DEPTH)
-				goto too_deep;
-
-			/* stack the current position */
-			stack[sp].keylist = keylist;
-			stack[sp].kix = kix;
-			sp++;
-
-			/* begin again with the new keyring */
-			subtree = key;
-			goto descend;
-		}
-	}
-
-	/* the keyring we're looking at was disqualified or didn't contain a
-	 * matching key */
-not_this_keyring:
-	if (sp > 0) {
-		/* resume the checking of a keyring higher up in the tree */
-		sp--;
-		keylist = stack[sp].keylist;
-		kix = stack[sp].kix + 1;
-		goto ascend;
-	}
-
-	ret = 0; /* no cycles detected */
-
-error:
+	search_nested_keyrings(B, &ctx);
 	rcu_read_unlock();
-	return ret;
-
-too_deep:
-	ret = -ELOOP;
-	goto error;
-
-cycle_detected:
-	ret = -EDEADLK;
-	goto error;
-}
-
-/*
- * Dispose of a keyring list after the RCU grace period, freeing the unlinked
- * key
- */
-static void keyring_unlink_rcu_disposal(struct rcu_head *rcu)
-{
-	struct keyring_list *klist =
-		container_of(rcu, struct keyring_list, rcu);
-
-	if (klist->delkey != USHRT_MAX)
-		key_put(rcu_access_pointer(klist->keys[klist->delkey]));
-	kfree(klist);
+	return PTR_ERR(ctx.result) == -EAGAIN ? 0 : PTR_ERR(ctx.result);
 }
 
 /*
  * Preallocate memory so that a key can be linked into to a keyring.
  */
-int __key_link_begin(struct key *keyring, const struct keyring_index_key *index_key,
-		     unsigned long *_prealloc)
+int __key_link_begin(struct key *keyring,
+		     const struct keyring_index_key *index_key,
+		     struct assoc_array_edit **_edit)
 	__acquires(&keyring->sem)
 	__acquires(&keyring_serialise_link_sem)
 {
-	struct keyring_list *klist, *nklist;
-	unsigned long prealloc;
-	unsigned max;
-	time_t lowest_lru;
-	size_t size;
-	int loop, lru, ret;
+	struct assoc_array_edit *edit;
+	int ret;
 
 	kenter("%d,%s,%s,",
-	       key_serial(keyring), index_key->type->name, index_key->description);
+	       keyring->serial, index_key->type->name, index_key->description);
+
+	BUG_ON(index_key->desc_len == 0);
 
 	if (keyring->type != &key_type_keyring)
 		return -ENOTDIR;
@@ -772,88 +1062,25 @@ int __key_link_begin(struct key *keyring, const struct keyring_index_key *index_
 	if (index_key->type == &key_type_keyring)
 		down_write(&keyring_serialise_link_sem);
 
-	klist = rcu_dereference_locked_keyring(keyring);
-
-	/* see if there's a matching key we can displace */
-	lru = -1;
-	if (klist && klist->nkeys > 0) {
-		lowest_lru = TIME_T_MAX;
-		for (loop = klist->nkeys - 1; loop >= 0; loop--) {
-			struct key *key = rcu_deref_link_locked(klist, loop,
-								keyring);
-			if (key->type == index_key->type &&
-			    strcmp(key->description, index_key->description) == 0) {
-				/* Found a match - we'll replace the link with
-				 * one to the new key.  We record the slot
-				 * position.
-				 */
-				klist->delkey = loop;
-				prealloc = 0;
-				goto done;
-			}
-			if (key->last_used_at < lowest_lru) {
-				lowest_lru = key->last_used_at;
-				lru = loop;
-			}
-		}
-	}
-
-	/* If the keyring is full then do an LRU discard */
-	if (klist &&
-	    klist->nkeys == klist->maxkeys &&
-	    klist->maxkeys >= MAX_KEYRING_LINKS) {
-		kdebug("LRU discard %d\n", lru);
-		klist->delkey = lru;
-		prealloc = 0;
-		goto done;
-	}
-
 	/* check that we aren't going to overrun the user's quota */
 	ret = key_payload_reserve(keyring,
 				  keyring->datalen + KEYQUOTA_LINK_BYTES);
 	if (ret < 0)
 		goto error_sem;
 
-	if (klist && klist->nkeys < klist->maxkeys) {
-		/* there's sufficient slack space to append directly */
-		klist->delkey = klist->nkeys;
-		prealloc = KEY_LINK_FIXQUOTA;
-	} else {
-		/* grow the key list */
-		max = 4;
-		if (klist) {
-			max += klist->maxkeys;
-			if (max > MAX_KEYRING_LINKS)
-				max = MAX_KEYRING_LINKS;
-			BUG_ON(max <= klist->maxkeys);
-		}
-
-		size = sizeof(*klist) + sizeof(struct key *) * max;
-
-		ret = -ENOMEM;
-		nklist = kmalloc(size, GFP_KERNEL);
-		if (!nklist)
-			goto error_quota;
-
-		nklist->maxkeys = max;
-		if (klist) {
-			memcpy(nklist->keys, klist->keys,
-			       sizeof(struct key *) * klist->nkeys);
-			nklist->delkey = klist->nkeys;
-			nklist->nkeys = klist->nkeys + 1;
-			klist->delkey = USHRT_MAX;
-		} else {
-			nklist->nkeys = 1;
-			nklist->delkey = 0;
-		}
-
-		/* add the key into the new space */
-		RCU_INIT_POINTER(nklist->keys[nklist->delkey], NULL);
-		prealloc = (unsigned long)nklist | KEY_LINK_FIXQUOTA;
+	/* Create an edit script that will insert/replace the key in the
+	 * keyring tree.
+	 */
+	edit = assoc_array_insert(&keyring->keys,
+				  &keyring_assoc_array_ops,
+				  index_key,
+				  NULL);
+	if (IS_ERR(edit)) {
+		ret = PTR_ERR(edit);
+		goto error_quota;
 	}
 
-done:
-	*_prealloc = prealloc;
+	*_edit = edit;
 	kleave(" = 0");
 	return 0;
 
@@ -893,60 +1120,12 @@ int __key_link_check_live_key(struct key *keyring, struct key *key)
  * holds at most one link to any given key of a particular type+description
  * combination.
  */
-void __key_link(struct key *keyring, struct key *key,
-		unsigned long *_prealloc)
+void __key_link(struct key *key, struct assoc_array_edit **_edit)
 {
-	struct keyring_list *klist, *nklist;
-	struct key *discard;
-
-	nklist = (struct keyring_list *)(*_prealloc & ~KEY_LINK_FIXQUOTA);
-	*_prealloc = 0;
-
-	kenter("%d,%d,%p", keyring->serial, key->serial, nklist);
-
-	klist = rcu_dereference_locked_keyring(keyring);
-
 	__key_get(key);
-	keyring->last_used_at = key->last_used_at =
-		current_kernel_time().tv_sec;
-
-	/* there's a matching key we can displace or an empty slot in a newly
-	 * allocated list we can fill */
-	if (nklist) {
-		kdebug("reissue %hu/%hu/%hu",
-		       nklist->delkey, nklist->nkeys, nklist->maxkeys);
-
-		RCU_INIT_POINTER(nklist->keys[nklist->delkey], key);
-
-		rcu_assign_pointer(keyring->payload.subscriptions, nklist);
-
-		/* dispose of the old keyring list and, if there was one, the
-		 * displaced key */
-		if (klist) {
-			kdebug("dispose %hu/%hu/%hu",
-			       klist->delkey, klist->nkeys, klist->maxkeys);
-			call_rcu(&klist->rcu, keyring_unlink_rcu_disposal);
-		}
-	} else if (klist->delkey < klist->nkeys) {
-		kdebug("replace %hu/%hu/%hu",
-		       klist->delkey, klist->nkeys, klist->maxkeys);
-
-		discard = rcu_dereference_protected(
-			klist->keys[klist->delkey],
-			rwsem_is_locked(&keyring->sem));
-		rcu_assign_pointer(klist->keys[klist->delkey], key);
-		/* The garbage collector will take care of RCU
-		 * synchronisation */
-		key_put(discard);
-	} else {
-		/* there's sufficient slack space to append directly */
-		kdebug("append %hu/%hu/%hu",
-		       klist->delkey, klist->nkeys, klist->maxkeys);
-
-		RCU_INIT_POINTER(klist->keys[klist->delkey], key);
-		smp_wmb();
-		klist->nkeys++;
-	}
+	assoc_array_insert_set_object(*_edit, keyring_key_to_ptr(key));
+	assoc_array_apply_edit(*_edit);
+	*_edit = NULL;
 }
 
 /*
@@ -956,23 +1135,20 @@ void __key_link(struct key *keyring, struct key *key,
  */
 void __key_link_end(struct key *keyring,
 		    const struct keyring_index_key *index_key,
-		    unsigned long prealloc)
+		    struct assoc_array_edit *edit)
 	__releases(&keyring->sem)
 	__releases(&keyring_serialise_link_sem)
 {
 	BUG_ON(index_key->type == NULL);
-	BUG_ON(index_key->type->name == NULL);
-	kenter("%d,%s,%lx", keyring->serial, index_key->type->name, prealloc);
+	kenter("%d,%s,", keyring->serial, index_key->type->name);
 
 	if (index_key->type == &key_type_keyring)
 		up_write(&keyring_serialise_link_sem);
 
-	if (prealloc) {
-		if (prealloc & KEY_LINK_FIXQUOTA)
-			key_payload_reserve(keyring,
-					    keyring->datalen -
-					    KEYQUOTA_LINK_BYTES);
-		kfree((struct keyring_list *)(prealloc & ~KEY_LINK_FIXQUOTA));
+	if (edit) {
+		key_payload_reserve(keyring,
+				    keyring->datalen - KEYQUOTA_LINK_BYTES);
+		assoc_array_cancel_edit(edit);
 	}
 	up_write(&keyring->sem);
 }
@@ -999,20 +1175,24 @@ void __key_link_end(struct key *keyring,
  */
 int key_link(struct key *keyring, struct key *key)
 {
-	unsigned long prealloc;
+	struct assoc_array_edit *edit;
 	int ret;
 
+	kenter("{%d,%d}", keyring->serial, atomic_read(&keyring->usage));
+
 	key_check(keyring);
 	key_check(key);
 
-	ret = __key_link_begin(keyring, &key->index_key, &prealloc);
+	ret = __key_link_begin(keyring, &key->index_key, &edit);
 	if (ret == 0) {
+		kdebug("begun {%d,%d}", keyring->serial, atomic_read(&keyring->usage));
 		ret = __key_link_check_live_key(keyring, key);
 		if (ret == 0)
-			__key_link(keyring, key, &prealloc);
-		__key_link_end(keyring, &key->index_key, prealloc);
+			__key_link(key, &edit);
+		__key_link_end(keyring, &key->index_key, edit);
 	}
 
+	kleave(" = %d {%d,%d}", ret, keyring->serial, atomic_read(&keyring->usage));
 	return ret;
 }
 EXPORT_SYMBOL(key_link);
@@ -1036,90 +1216,36 @@ EXPORT_SYMBOL(key_link);
  */
 int key_unlink(struct key *keyring, struct key *key)
 {
-	struct keyring_list *klist, *nklist;
-	int loop, ret;
+	struct assoc_array_edit *edit;
+	int ret;
 
 	key_check(keyring);
 	key_check(key);
 
-	ret = -ENOTDIR;
 	if (keyring->type != &key_type_keyring)
-		goto error;
+		return -ENOTDIR;
 
 	down_write(&keyring->sem);
 
-	klist = rcu_dereference_locked_keyring(keyring);
-	if (klist) {
-		/* search the keyring for the key */
-		for (loop = 0; loop < klist->nkeys; loop++)
-			if (rcu_access_pointer(klist->keys[loop]) == key)
-				goto key_is_present;
+	edit = assoc_array_delete(&keyring->keys, &keyring_assoc_array_ops,
+				  &key->index_key);
+	if (IS_ERR(edit)) {
+		ret = PTR_ERR(edit);
+		goto error;
 	}
-
-	up_write(&keyring->sem);
 	ret = -ENOENT;
-	goto error;
-
-key_is_present:
-	/* we need to copy the key list for RCU purposes */
-	nklist = kmalloc(sizeof(*klist) +
-			 sizeof(struct key *) * klist->maxkeys,
-			 GFP_KERNEL);
-	if (!nklist)
-		goto nomem;
-	nklist->maxkeys = klist->maxkeys;
-	nklist->nkeys = klist->nkeys - 1;
-
-	if (loop > 0)
-		memcpy(&nklist->keys[0],
-		       &klist->keys[0],
-		       loop * sizeof(struct key *));
-
-	if (loop < nklist->nkeys)
-		memcpy(&nklist->keys[loop],
-		       &klist->keys[loop + 1],
-		       (nklist->nkeys - loop) * sizeof(struct key *));
-
-	/* adjust the user's quota */
-	key_payload_reserve(keyring,
-			    keyring->datalen - KEYQUOTA_LINK_BYTES);
-
-	rcu_assign_pointer(keyring->payload.subscriptions, nklist);
-
-	up_write(&keyring->sem);
-
-	/* schedule for later cleanup */
-	klist->delkey = loop;
-	call_rcu(&klist->rcu, keyring_unlink_rcu_disposal);
+	if (edit == NULL)
+		goto error;
 
+	assoc_array_apply_edit(edit);
 	ret = 0;
 
 error:
-	return ret;
-nomem:
-	ret = -ENOMEM;
 	up_write(&keyring->sem);
-	goto error;
+	return ret;
 }
 EXPORT_SYMBOL(key_unlink);
 
-/*
- * Dispose of a keyring list after the RCU grace period, releasing the keys it
- * links to.
- */
-static void keyring_clear_rcu_disposal(struct rcu_head *rcu)
-{
-	struct keyring_list *klist;
-	int loop;
-
-	klist = container_of(rcu, struct keyring_list, rcu);
-
-	for (loop = klist->nkeys - 1; loop >= 0; loop--)
-		key_put(rcu_access_pointer(klist->keys[loop]));
-
-	kfree(klist);
-}
-
 /**
  * keyring_clear - Clear a keyring
  * @keyring: The keyring to clear.
@@ -1130,33 +1256,25 @@ static void keyring_clear_rcu_disposal(struct rcu_head *rcu)
  */
 int keyring_clear(struct key *keyring)
 {
-	struct keyring_list *klist;
+	struct assoc_array_edit *edit;
 	int ret;
 
-	ret = -ENOTDIR;
-	if (keyring->type == &key_type_keyring) {
-		/* detach the pointer block with the locks held */
-		down_write(&keyring->sem);
-
-		klist = rcu_dereference_locked_keyring(keyring);
-		if (klist) {
-			/* adjust the quota */
-			key_payload_reserve(keyring,
-					    sizeof(struct keyring_list));
-
-			rcu_assign_pointer(keyring->payload.subscriptions,
-					   NULL);
-		}
-
-		up_write(&keyring->sem);
+	if (keyring->type != &key_type_keyring)
+		return -ENOTDIR;
 
-		/* free the keys after the locks have been dropped */
-		if (klist)
-			call_rcu(&klist->rcu, keyring_clear_rcu_disposal);
+	down_write(&keyring->sem);
 
+	edit = assoc_array_clear(&keyring->keys, &keyring_assoc_array_ops);
+	if (IS_ERR(edit)) {
+		ret = PTR_ERR(edit);
+	} else {
+		if (edit)
+			assoc_array_apply_edit(edit);
+		key_payload_reserve(keyring, 0);
 		ret = 0;
 	}
 
+	up_write(&keyring->sem);
 	return ret;
 }
 EXPORT_SYMBOL(keyring_clear);
@@ -1168,17 +1286,25 @@ EXPORT_SYMBOL(keyring_clear);
  */
 static void keyring_revoke(struct key *keyring)
 {
-	struct keyring_list *klist;
+	struct assoc_array_edit *edit;
 
-	klist = rcu_dereference_locked_keyring(keyring);
+	edit = assoc_array_clear(&keyring->keys, &keyring_assoc_array_ops);
+	if (!IS_ERR(edit)) {
+		if (edit)
+			assoc_array_apply_edit(edit);
+		key_payload_reserve(keyring, 0);
+	}
+}
 
-	/* adjust the quota */
-	key_payload_reserve(keyring, 0);
+static bool gc_iterator(void *object, void *iterator_data)
+{
+	struct key *key = keyring_ptr_to_key(object);
+	time_t *limit = iterator_data;
 
-	if (klist) {
-		rcu_assign_pointer(keyring->payload.subscriptions, NULL);
-		call_rcu(&klist->rcu, keyring_clear_rcu_disposal);
-	}
+	if (key_is_dead(key, *limit))
+		return false;
+	key_get(key);
+	return true;
 }
 
 /*
@@ -1191,88 +1317,12 @@ static void keyring_revoke(struct key *keyring)
  */
 void keyring_gc(struct key *keyring, time_t limit)
 {
-	struct keyring_list *klist, *new;
-	struct key *key;
-	int loop, keep, max;
-
 	kenter("{%x,%s}", key_serial(keyring), keyring->description);
 
 	down_write(&keyring->sem);
-
-	klist = rcu_dereference_locked_keyring(keyring);
-	if (!klist)
-		goto no_klist;
-
-	/* work out how many subscriptions we're keeping */
-	keep = 0;
-	for (loop = klist->nkeys - 1; loop >= 0; loop--)
-		if (!key_is_dead(rcu_deref_link_locked(klist, loop, keyring),
-				 limit))
-			keep++;
-
-	if (keep == klist->nkeys)
-		goto just_return;
-
-	/* allocate a new keyring payload */
-	max = roundup(keep, 4);
-	new = kmalloc(sizeof(struct keyring_list) + max * sizeof(struct key *),
-		      GFP_KERNEL);
-	if (!new)
-		goto nomem;
-	new->maxkeys = max;
-	new->nkeys = 0;
-	new->delkey = 0;
-
-	/* install the live keys
-	 * - must take care as expired keys may be updated back to life
-	 */
-	keep = 0;
-	for (loop = klist->nkeys - 1; loop >= 0; loop--) {
-		key = rcu_deref_link_locked(klist, loop, keyring);
-		if (!key_is_dead(key, limit)) {
-			if (keep >= max)
-				goto discard_new;
-			RCU_INIT_POINTER(new->keys[keep++], key_get(key));
-		}
-	}
-	new->nkeys = keep;
-
-	/* adjust the quota */
-	key_payload_reserve(keyring,
-			    sizeof(struct keyring_list) +
-			    KEYQUOTA_LINK_BYTES * keep);
-
-	if (keep == 0) {
-		rcu_assign_pointer(keyring->payload.subscriptions, NULL);
-		kfree(new);
-	} else {
-		rcu_assign_pointer(keyring->payload.subscriptions, new);
-	}
-
-	up_write(&keyring->sem);
-
-	call_rcu(&klist->rcu, keyring_clear_rcu_disposal);
-	kleave(" [yes]");
-	return;
-
-discard_new:
-	new->nkeys = keep;
-	keyring_clear_rcu_disposal(&new->rcu);
+	assoc_array_gc(&keyring->keys, &keyring_assoc_array_ops,
+		       gc_iterator, &limit);
 	up_write(&keyring->sem);
-	kleave(" [discard]");
-	return;
 
-just_return:
-	up_write(&keyring->sem);
-	kleave(" [no dead]");
-	return;
-
-no_klist:
-	up_write(&keyring->sem);
-	kleave(" [no_klist]");
-	return;
-
-nomem:
-	up_write(&keyring->sem);
-	kleave(" [oom]");
+	kleave("");
 }
diff --git a/security/keys/request_key.c b/security/keys/request_key.c
index ab75df4745af..df94827103d0 100644
--- a/security/keys/request_key.c
+++ b/security/keys/request_key.c
@@ -351,7 +351,7 @@ static int construct_alloc_key(struct keyring_search_context *ctx,
 			       struct key_user *user,
 			       struct key **_key)
 {
-	unsigned long prealloc;
+	struct assoc_array_edit *edit;
 	struct key *key;
 	key_perm_t perm;
 	key_ref_t key_ref;
@@ -380,7 +380,7 @@ static int construct_alloc_key(struct keyring_search_context *ctx,
 	set_bit(KEY_FLAG_USER_CONSTRUCT, &key->flags);
 
 	if (dest_keyring) {
-		ret = __key_link_begin(dest_keyring, &ctx->index_key, &prealloc);
+		ret = __key_link_begin(dest_keyring, &ctx->index_key, &edit);
 		if (ret < 0)
 			goto link_prealloc_failed;
 	}
@@ -395,11 +395,11 @@ static int construct_alloc_key(struct keyring_search_context *ctx,
 		goto key_already_present;
 
 	if (dest_keyring)
-		__key_link(dest_keyring, key, &prealloc);
+		__key_link(key, &edit);
 
 	mutex_unlock(&key_construction_mutex);
 	if (dest_keyring)
-		__key_link_end(dest_keyring, &ctx->index_key, prealloc);
+		__key_link_end(dest_keyring, &ctx->index_key, edit);
 	mutex_unlock(&user->cons_lock);
 	*_key = key;
 	kleave(" = 0 [%d]", key_serial(key));
@@ -414,8 +414,8 @@ key_already_present:
 	if (dest_keyring) {
 		ret = __key_link_check_live_key(dest_keyring, key);
 		if (ret == 0)
-			__key_link(dest_keyring, key, &prealloc);
-		__key_link_end(dest_keyring, &ctx->index_key, prealloc);
+			__key_link(key, &edit);
+		__key_link_end(dest_keyring, &ctx->index_key, edit);
 		if (ret < 0)
 			goto link_check_failed;
 	}
-- 
cgit v1.2.3


From ab3c3587f8cda9083209a61dbe3a4407d3cada10 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 24 Sep 2013 10:35:18 +0100
Subject: KEYS: Implement a big key type that can save to tmpfs

Implement a big key type that can save its contents to tmpfs and thus
swapspace when memory is tight.  This is useful for Kerberos ticket caches.

Signed-off-by: David Howells <dhowells@redhat.com>
Tested-by: Simo Sorce <simo@redhat.com>
---
 include/keys/big_key-type.h |  25 ++++++
 include/linux/key.h         |   1 +
 security/keys/Kconfig       |  11 +++
 security/keys/Makefile      |   1 +
 security/keys/big_key.c     | 204 ++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 242 insertions(+)
 create mode 100644 include/keys/big_key-type.h
 create mode 100644 security/keys/big_key.c

(limited to 'include/linux')

diff --git a/include/keys/big_key-type.h b/include/keys/big_key-type.h
new file mode 100644
index 000000000000..d69bc8af3292
--- /dev/null
+++ b/include/keys/big_key-type.h
@@ -0,0 +1,25 @@
+/* Big capacity key type.
+ *
+ * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _KEYS_BIG_KEY_TYPE_H
+#define _KEYS_BIG_KEY_TYPE_H
+
+#include <linux/key-type.h>
+
+extern struct key_type key_type_big_key;
+
+extern int big_key_instantiate(struct key *key, struct key_preparsed_payload *prep);
+extern void big_key_revoke(struct key *key);
+extern void big_key_destroy(struct key *key);
+extern void big_key_describe(const struct key *big_key, struct seq_file *m);
+extern long big_key_read(const struct key *key, char __user *buffer, size_t buflen);
+
+#endif /* _KEYS_BIG_KEY_TYPE_H */
diff --git a/include/linux/key.h b/include/linux/key.h
index 2417f789d29b..010dbb618aca 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -201,6 +201,7 @@ struct key {
 			unsigned long		value;
 			void __rcu		*rcudata;
 			void			*data;
+			void			*data2[2];
 		} payload;
 		struct assoc_array keys;
 	};
diff --git a/security/keys/Kconfig b/security/keys/Kconfig
index 15e0dfe8c80f..b56362275ec8 100644
--- a/security/keys/Kconfig
+++ b/security/keys/Kconfig
@@ -20,6 +20,17 @@ config KEYS
 
 	  If you are unsure as to whether this is required, answer N.
 
+config BIG_KEYS
+	tristate "Large payload keys"
+	depends on KEYS
+	depends on TMPFS
+	help
+	  This option provides support for holding large keys within the kernel
+	  (for example Kerberos ticket caches).  The data may be stored out to
+	  swapspace by tmpfs.
+
+	  If you are unsure as to whether this is required, answer N.
+
 config TRUSTED_KEYS
 	tristate "TRUSTED KEYS"
 	depends on KEYS && TCG_TPM
diff --git a/security/keys/Makefile b/security/keys/Makefile
index 504aaa008388..c487c77a00be 100644
--- a/security/keys/Makefile
+++ b/security/keys/Makefile
@@ -22,5 +22,6 @@ obj-$(CONFIG_SYSCTL) += sysctl.o
 #
 # Key types
 #
+obj-$(CONFIG_BIG_KEYS) += big_key.o
 obj-$(CONFIG_TRUSTED_KEYS) += trusted.o
 obj-$(CONFIG_ENCRYPTED_KEYS) += encrypted-keys/
diff --git a/security/keys/big_key.c b/security/keys/big_key.c
new file mode 100644
index 000000000000..5f9defc4a807
--- /dev/null
+++ b/security/keys/big_key.c
@@ -0,0 +1,204 @@
+/* Large capacity key type
+ *
+ * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/seq_file.h>
+#include <linux/file.h>
+#include <linux/shmem_fs.h>
+#include <linux/err.h>
+#include <keys/user-type.h>
+#include <keys/big_key-type.h>
+
+MODULE_LICENSE("GPL");
+
+/*
+ * If the data is under this limit, there's no point creating a shm file to
+ * hold it as the permanently resident metadata for the shmem fs will be at
+ * least as large as the data.
+ */
+#define BIG_KEY_FILE_THRESHOLD (sizeof(struct inode) + sizeof(struct dentry))
+
+/*
+ * big_key defined keys take an arbitrary string as the description and an
+ * arbitrary blob of data as the payload
+ */
+struct key_type key_type_big_key = {
+	.name			= "big_key",
+	.def_lookup_type	= KEYRING_SEARCH_LOOKUP_DIRECT,
+	.instantiate		= big_key_instantiate,
+	.match			= user_match,
+	.revoke			= big_key_revoke,
+	.destroy		= big_key_destroy,
+	.describe		= big_key_describe,
+	.read			= big_key_read,
+};
+
+/*
+ * Instantiate a big key
+ */
+int big_key_instantiate(struct key *key, struct key_preparsed_payload *prep)
+{
+	struct path *path = (struct path *)&key->payload.data2;
+	struct file *file;
+	ssize_t written;
+	size_t datalen = prep->datalen;
+	int ret;
+
+	ret = -EINVAL;
+	if (datalen <= 0 || datalen > 1024 * 1024 || !prep->data)
+		goto error;
+
+	/* Set an arbitrary quota */
+	ret = key_payload_reserve(key, 16);
+	if (ret < 0)
+		goto error;
+
+	key->type_data.x[1] = datalen;
+
+	if (datalen > BIG_KEY_FILE_THRESHOLD) {
+		/* Create a shmem file to store the data in.  This will permit the data
+		 * to be swapped out if needed.
+		 *
+		 * TODO: Encrypt the stored data with a temporary key.
+		 */
+		file = shmem_file_setup("", datalen, 0);
+		if (IS_ERR(file))
+			goto err_quota;
+
+		written = kernel_write(file, prep->data, prep->datalen, 0);
+		if (written != datalen) {
+			if (written >= 0)
+				ret = -ENOMEM;
+			goto err_fput;
+		}
+
+		/* Pin the mount and dentry to the key so that we can open it again
+		 * later
+		 */
+		*path = file->f_path;
+		path_get(path);
+		fput(file);
+	} else {
+		/* Just store the data in a buffer */
+		void *data = kmalloc(datalen, GFP_KERNEL);
+		if (!data) {
+			ret = -ENOMEM;
+			goto err_quota;
+		}
+
+		key->payload.data = memcpy(data, prep->data, prep->datalen);
+	}
+	return 0;
+
+err_fput:
+	fput(file);
+err_quota:
+	key_payload_reserve(key, 0);
+error:
+	return ret;
+}
+
+/*
+ * dispose of the links from a revoked keyring
+ * - called with the key sem write-locked
+ */
+void big_key_revoke(struct key *key)
+{
+	struct path *path = (struct path *)&key->payload.data2;
+
+	/* clear the quota */
+	key_payload_reserve(key, 0);
+	if (key_is_instantiated(key) && key->type_data.x[1] > BIG_KEY_FILE_THRESHOLD)
+		vfs_truncate(path, 0);
+}
+
+/*
+ * dispose of the data dangling from the corpse of a big_key key
+ */
+void big_key_destroy(struct key *key)
+{
+	if (key->type_data.x[1] > BIG_KEY_FILE_THRESHOLD) {
+		struct path *path = (struct path *)&key->payload.data2;
+		path_put(path);
+		path->mnt = NULL;
+		path->dentry = NULL;
+	} else {
+		kfree(key->payload.data);
+		key->payload.data = NULL;
+	}
+}
+
+/*
+ * describe the big_key key
+ */
+void big_key_describe(const struct key *key, struct seq_file *m)
+{
+	unsigned long datalen = key->type_data.x[1];
+
+	seq_puts(m, key->description);
+
+	if (key_is_instantiated(key))
+		seq_printf(m, ": %lu [%s]",
+			   datalen,
+			   datalen > BIG_KEY_FILE_THRESHOLD ? "file" : "buff");
+}
+
+/*
+ * read the key data
+ * - the key's semaphore is read-locked
+ */
+long big_key_read(const struct key *key, char __user *buffer, size_t buflen)
+{
+	unsigned long datalen = key->type_data.x[1];
+	long ret;
+
+	if (!buffer || buflen < datalen)
+		return datalen;
+
+	if (datalen > BIG_KEY_FILE_THRESHOLD) {
+		struct path *path = (struct path *)&key->payload.data2;
+		struct file *file;
+		loff_t pos;
+
+		file = dentry_open(path, O_RDONLY, current_cred());
+		if (IS_ERR(file))
+			return PTR_ERR(file);
+
+		pos = 0;
+		ret = vfs_read(file, buffer, datalen, &pos);
+		fput(file);
+		if (ret >= 0 && ret != datalen)
+			ret = -EIO;
+	} else {
+		ret = datalen;
+		if (copy_to_user(buffer, key->payload.data, datalen) != 0)
+			ret = -EFAULT;
+	}
+
+	return ret;
+}
+
+/*
+ * Module stuff
+ */
+static int __init big_key_init(void)
+{
+	return register_key_type(&key_type_big_key);
+}
+
+static void __exit big_key_cleanup(void)
+{
+	unregister_key_type(&key_type_big_key);
+}
+
+module_init(big_key_init);
+module_exit(big_key_cleanup);
-- 
cgit v1.2.3


From f36f8c75ae2e7d4da34f4c908cebdb4aa42c977e Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 24 Sep 2013 10:35:19 +0100
Subject: KEYS: Add per-user_namespace registers for persistent per-UID
 kerberos caches

Add support for per-user_namespace registers of persistent per-UID kerberos
caches held within the kernel.

This allows the kerberos cache to be retained beyond the life of all a user's
processes so that the user's cron jobs can work.

The kerberos cache is envisioned as a keyring/key tree looking something like:

	struct user_namespace
	  \___ .krb_cache keyring		- The register
		\___ _krb.0 keyring		- Root's Kerberos cache
		\___ _krb.5000 keyring		- User 5000's Kerberos cache
		\___ _krb.5001 keyring		- User 5001's Kerberos cache
			\___ tkt785 big_key	- A ccache blob
			\___ tkt12345 big_key	- Another ccache blob

Or possibly:

	struct user_namespace
	  \___ .krb_cache keyring		- The register
		\___ _krb.0 keyring		- Root's Kerberos cache
		\___ _krb.5000 keyring		- User 5000's Kerberos cache
		\___ _krb.5001 keyring		- User 5001's Kerberos cache
			\___ tkt785 keyring	- A ccache
				\___ krbtgt/REDHAT.COM@REDHAT.COM big_key
				\___ http/REDHAT.COM@REDHAT.COM user
				\___ afs/REDHAT.COM@REDHAT.COM user
				\___ nfs/REDHAT.COM@REDHAT.COM user
				\___ krbtgt/KERNEL.ORG@KERNEL.ORG big_key
				\___ http/KERNEL.ORG@KERNEL.ORG big_key

What goes into a particular Kerberos cache is entirely up to userspace.  Kernel
support is limited to giving you the Kerberos cache keyring that you want.

The user asks for their Kerberos cache by:

	krb_cache = keyctl_get_krbcache(uid, dest_keyring);

The uid is -1 or the user's own UID for the user's own cache or the uid of some
other user's cache (requires CAP_SETUID).  This permits rpc.gssd or whatever to
mess with the cache.

The cache returned is a keyring named "_krb.<uid>" that the possessor can read,
search, clear, invalidate, unlink from and add links to.  Active LSMs get a
chance to rule on whether the caller is permitted to make a link.

Each uid's cache keyring is created when it first accessed and is given a
timeout that is extended each time this function is called so that the keyring
goes away after a while.  The timeout is configurable by sysctl but defaults to
three days.

Each user_namespace struct gets a lazily-created keyring that serves as the
register.  The cache keyrings are added to it.  This means that standard key
search and garbage collection facilities are available.

The user_namespace struct's register goes away when it does and anything left
in it is then automatically gc'd.

Signed-off-by: David Howells <dhowells@redhat.com>
Tested-by: Simo Sorce <simo@redhat.com>
cc: Serge E. Hallyn <serge.hallyn@ubuntu.com>
cc: Eric W. Biederman <ebiederm@xmission.com>
---
 include/linux/user_namespace.h |   6 ++
 include/uapi/linux/keyctl.h    |   1 +
 kernel/user.c                  |   4 +
 kernel/user_namespace.c        |   6 ++
 security/keys/Kconfig          |  17 +++++
 security/keys/Makefile         |   1 +
 security/keys/compat.c         |   3 +
 security/keys/internal.h       |   9 +++
 security/keys/keyctl.c         |   3 +
 security/keys/persistent.c     | 169 +++++++++++++++++++++++++++++++++++++++++
 security/keys/sysctl.c         |  11 +++
 11 files changed, 230 insertions(+)
 create mode 100644 security/keys/persistent.c

(limited to 'include/linux')

diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 4db29859464f..4836ba3c1cd8 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -27,6 +27,12 @@ struct user_namespace {
 	kuid_t			owner;
 	kgid_t			group;
 	unsigned int		proc_inum;
+
+	/* Register of per-UID persistent keyrings for this namespace */
+#ifdef CONFIG_PERSISTENT_KEYRINGS
+	struct key		*persistent_keyring_register;
+	struct rw_semaphore	persistent_keyring_register_sem;
+#endif
 };
 
 extern struct user_namespace init_user_ns;
diff --git a/include/uapi/linux/keyctl.h b/include/uapi/linux/keyctl.h
index c9b7f4faf97a..840cb990abe2 100644
--- a/include/uapi/linux/keyctl.h
+++ b/include/uapi/linux/keyctl.h
@@ -56,5 +56,6 @@
 #define KEYCTL_REJECT			19	/* reject a partially constructed key */
 #define KEYCTL_INSTANTIATE_IOV		20	/* instantiate a partially constructed key */
 #define KEYCTL_INVALIDATE		21	/* invalidate a key */
+#define KEYCTL_GET_PERSISTENT		22	/* get a user's persistent keyring */
 
 #endif /*  _LINUX_KEYCTL_H */
diff --git a/kernel/user.c b/kernel/user.c
index 5bbb91988e69..a3a0dbfda329 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -51,6 +51,10 @@ struct user_namespace init_user_ns = {
 	.owner = GLOBAL_ROOT_UID,
 	.group = GLOBAL_ROOT_GID,
 	.proc_inum = PROC_USER_INIT_INO,
+#ifdef CONFIG_KEYS_KERBEROS_CACHE
+	.krb_cache_register_sem =
+	__RWSEM_INITIALIZER(init_user_ns.krb_cache_register_sem),
+#endif
 };
 EXPORT_SYMBOL_GPL(init_user_ns);
 
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 13fb1134ba58..240fb62cf394 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -101,6 +101,9 @@ int create_user_ns(struct cred *new)
 
 	set_cred_user_ns(new, ns);
 
+#ifdef CONFIG_PERSISTENT_KEYRINGS
+	init_rwsem(&ns->persistent_keyring_register_sem);
+#endif
 	return 0;
 }
 
@@ -130,6 +133,9 @@ void free_user_ns(struct user_namespace *ns)
 
 	do {
 		parent = ns->parent;
+#ifdef CONFIG_PERSISTENT_KEYRINGS
+		key_put(ns->persistent_keyring_register);
+#endif
 		proc_free_inum(ns->proc_inum);
 		kmem_cache_free(user_ns_cachep, ns);
 		ns = parent;
diff --git a/security/keys/Kconfig b/security/keys/Kconfig
index b56362275ec8..53d8748c9564 100644
--- a/security/keys/Kconfig
+++ b/security/keys/Kconfig
@@ -20,6 +20,23 @@ config KEYS
 
 	  If you are unsure as to whether this is required, answer N.
 
+config PERSISTENT_KEYRINGS
+	bool "Enable register of persistent per-UID keyrings"
+	depends on KEYS
+	help
+	  This option provides a register of persistent per-UID keyrings,
+	  primarily aimed at Kerberos key storage.  The keyrings are persistent
+	  in the sense that they stay around after all processes of that UID
+	  have exited, not that they survive the machine being rebooted.
+
+	  A particular keyring may be accessed by either the user whose keyring
+	  it is or by a process with administrative privileges.  The active
+	  LSMs gets to rule on which admin-level processes get to access the
+	  cache.
+
+	  Keyrings are created and added into the register upon demand and get
+	  removed if they expire (a default timeout is set upon creation).
+
 config BIG_KEYS
 	tristate "Large payload keys"
 	depends on KEYS
diff --git a/security/keys/Makefile b/security/keys/Makefile
index c487c77a00be..dfb3a7bededf 100644
--- a/security/keys/Makefile
+++ b/security/keys/Makefile
@@ -18,6 +18,7 @@ obj-y := \
 obj-$(CONFIG_KEYS_COMPAT) += compat.o
 obj-$(CONFIG_PROC_FS) += proc.o
 obj-$(CONFIG_SYSCTL) += sysctl.o
+obj-$(CONFIG_PERSISTENT_KEYRINGS) += persistent.o
 
 #
 # Key types
diff --git a/security/keys/compat.c b/security/keys/compat.c
index d65fa7fa29ba..bbd32c729dbb 100644
--- a/security/keys/compat.c
+++ b/security/keys/compat.c
@@ -138,6 +138,9 @@ asmlinkage long compat_sys_keyctl(u32 option,
 	case KEYCTL_INVALIDATE:
 		return keyctl_invalidate_key(arg2);
 
+	case KEYCTL_GET_PERSISTENT:
+		return keyctl_get_persistent(arg2, arg3);
+
 	default:
 		return -EOPNOTSUPP;
 	}
diff --git a/security/keys/internal.h b/security/keys/internal.h
index 581c6f688352..80b2aac4f50c 100644
--- a/security/keys/internal.h
+++ b/security/keys/internal.h
@@ -255,6 +255,15 @@ extern long keyctl_invalidate_key(key_serial_t);
 extern long keyctl_instantiate_key_common(key_serial_t,
 					  const struct iovec *,
 					  unsigned, size_t, key_serial_t);
+#ifdef CONFIG_PERSISTENT_KEYRINGS
+extern long keyctl_get_persistent(uid_t, key_serial_t);
+extern unsigned persistent_keyring_expiry;
+#else
+static inline long keyctl_get_persistent(uid_t uid, key_serial_t destring)
+{
+	return -EOPNOTSUPP;
+}
+#endif
 
 /*
  * Debugging key validation
diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c
index 33cfd27b4de2..cee72ce64222 100644
--- a/security/keys/keyctl.c
+++ b/security/keys/keyctl.c
@@ -1667,6 +1667,9 @@ SYSCALL_DEFINE5(keyctl, int, option, unsigned long, arg2, unsigned long, arg3,
 	case KEYCTL_INVALIDATE:
 		return keyctl_invalidate_key((key_serial_t) arg2);
 
+	case KEYCTL_GET_PERSISTENT:
+		return keyctl_get_persistent((uid_t)arg2, (key_serial_t)arg3);
+
 	default:
 		return -EOPNOTSUPP;
 	}
diff --git a/security/keys/persistent.c b/security/keys/persistent.c
new file mode 100644
index 000000000000..82f4957a7acf
--- /dev/null
+++ b/security/keys/persistent.c
@@ -0,0 +1,169 @@
+/* General persistent per-UID keyrings register
+ *
+ * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/user_namespace.h>
+#include "internal.h"
+
+unsigned persistent_keyring_expiry = 3 * 24 * 3600; /* Expire after 3 days of non-use */
+
+/*
+ * Create the persistent keyring register for the current user namespace.
+ *
+ * Called with the namespace's sem locked for writing.
+ */
+static int key_create_persistent_register(struct user_namespace *ns)
+{
+	struct key *reg = keyring_alloc(".persistent_register",
+					KUIDT_INIT(0), KGIDT_INIT(0),
+					current_cred(),
+					((KEY_POS_ALL & ~KEY_POS_SETATTR) |
+					 KEY_USR_VIEW | KEY_USR_READ),
+					KEY_ALLOC_NOT_IN_QUOTA, NULL);
+	if (IS_ERR(reg))
+		return PTR_ERR(reg);
+
+	ns->persistent_keyring_register = reg;
+	return 0;
+}
+
+/*
+ * Create the persistent keyring for the specified user.
+ *
+ * Called with the namespace's sem locked for writing.
+ */
+static key_ref_t key_create_persistent(struct user_namespace *ns, kuid_t uid,
+				       struct keyring_index_key *index_key)
+{
+	struct key *persistent;
+	key_ref_t reg_ref, persistent_ref;
+
+	if (!ns->persistent_keyring_register) {
+		long err = key_create_persistent_register(ns);
+		if (err < 0)
+			return ERR_PTR(err);
+	} else {
+		reg_ref = make_key_ref(ns->persistent_keyring_register, true);
+		persistent_ref = find_key_to_update(reg_ref, index_key);
+		if (persistent_ref)
+			return persistent_ref;
+	}
+
+	persistent = keyring_alloc(index_key->description,
+				   uid, INVALID_GID, current_cred(),
+				   ((KEY_POS_ALL & ~KEY_POS_SETATTR) |
+				    KEY_USR_VIEW | KEY_USR_READ),
+				   KEY_ALLOC_NOT_IN_QUOTA,
+				   ns->persistent_keyring_register);
+	if (IS_ERR(persistent))
+		return ERR_CAST(persistent);
+
+	return make_key_ref(persistent, true);
+}
+
+/*
+ * Get the persistent keyring for a specific UID and link it to the nominated
+ * keyring.
+ */
+static long key_get_persistent(struct user_namespace *ns, kuid_t uid,
+			       key_ref_t dest_ref)
+{
+	struct keyring_index_key index_key;
+	struct key *persistent;
+	key_ref_t reg_ref, persistent_ref;
+	char buf[32];
+	long ret;
+
+	/* Look in the register if it exists */
+	index_key.type = &key_type_keyring;
+	index_key.description = buf;
+	index_key.desc_len = sprintf(buf, "_persistent.%u", from_kuid(ns, uid));
+
+	if (ns->persistent_keyring_register) {
+		reg_ref = make_key_ref(ns->persistent_keyring_register, true);
+		down_read(&ns->persistent_keyring_register_sem);
+		persistent_ref = find_key_to_update(reg_ref, &index_key);
+		up_read(&ns->persistent_keyring_register_sem);
+
+		if (persistent_ref)
+			goto found;
+	}
+
+	/* It wasn't in the register, so we'll need to create it.  We might
+	 * also need to create the register.
+	 */
+	down_write(&ns->persistent_keyring_register_sem);
+	persistent_ref = key_create_persistent(ns, uid, &index_key);
+	up_write(&ns->persistent_keyring_register_sem);
+	if (!IS_ERR(persistent_ref))
+		goto found;
+
+	return PTR_ERR(persistent_ref);
+
+found:
+	ret = key_task_permission(persistent_ref, current_cred(), KEY_LINK);
+	if (ret == 0) {
+		persistent = key_ref_to_ptr(persistent_ref);
+		ret = key_link(key_ref_to_ptr(dest_ref), persistent);
+		if (ret == 0) {
+			key_set_timeout(persistent, persistent_keyring_expiry);
+			ret = persistent->serial;		
+		}
+	}
+
+	key_ref_put(persistent_ref);
+	return ret;
+}
+
+/*
+ * Get the persistent keyring for a specific UID and link it to the nominated
+ * keyring.
+ */
+long keyctl_get_persistent(uid_t _uid, key_serial_t destid)
+{
+	struct user_namespace *ns = current_user_ns();
+	key_ref_t dest_ref;
+	kuid_t uid;
+	long ret;
+
+	/* -1 indicates the current user */
+	if (_uid == (uid_t)-1) {
+		uid = current_uid();
+	} else {
+		uid = make_kuid(ns, _uid);
+		if (!uid_valid(uid))
+			return -EINVAL;
+
+		/* You can only see your own persistent cache if you're not
+		 * sufficiently privileged.
+		 */
+		if (uid_eq(uid, current_uid()) &&
+		    uid_eq(uid, current_suid()) &&
+		    uid_eq(uid, current_euid()) &&
+		    uid_eq(uid, current_fsuid()) &&
+		    !ns_capable(ns, CAP_SETUID))
+			return -EPERM;
+	}
+
+	/* There must be a destination keyring */
+	dest_ref = lookup_user_key(destid, KEY_LOOKUP_CREATE, KEY_WRITE);
+	if (IS_ERR(dest_ref))
+		return PTR_ERR(dest_ref);
+	if (key_ref_to_ptr(dest_ref)->type != &key_type_keyring) {
+		ret = -ENOTDIR;
+		goto out_put_dest;
+	}
+
+	ret = key_get_persistent(ns, uid, dest_ref);
+
+out_put_dest:
+	key_ref_put(dest_ref);
+	return ret;
+}
diff --git a/security/keys/sysctl.c b/security/keys/sysctl.c
index ee32d181764a..8c0af08760c8 100644
--- a/security/keys/sysctl.c
+++ b/security/keys/sysctl.c
@@ -61,5 +61,16 @@ ctl_table key_sysctls[] = {
 		.extra1 = (void *) &zero,
 		.extra2 = (void *) &max,
 	},
+#ifdef CONFIG_PERSISTENT_KEYRINGS
+	{
+		.procname = "persistent_keyring_expiry",
+		.data = &persistent_keyring_expiry,
+		.maxlen = sizeof(unsigned),
+		.mode = 0644,
+		.proc_handler = proc_dointvec_minmax,
+		.extra1 = (void *) &zero,
+		.extra2 = (void *) &max,
+	},
+#endif
 	{ }
 };
-- 
cgit v1.2.3


From 98fda169290b3b28c0f2db2b8f02290c13da50ef Mon Sep 17 00:00:00 2001
From: Radim Krčmář <rkrcmar@redhat.com>
Date: Wed, 4 Sep 2013 22:32:24 +0200
Subject: kvm: remove .done from struct kvm_async_pf
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

'.done' is used to mark the completion of 'async_pf_execute()', but
'cancel_work_sync()' returns true when the work was canceled, so we
use it instead.

Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h | 1 -
 virt/kvm/async_pf.c      | 5 +----
 2 files changed, 1 insertion(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 0fbbc7aa02cb..749bdb12cd15 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -190,7 +190,6 @@ struct kvm_async_pf {
 	unsigned long addr;
 	struct kvm_arch_async_pf arch;
 	struct page *page;
-	bool done;
 };
 
 void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu);
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index 8a39dda7a325..b197950ac4d5 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -75,7 +75,6 @@ static void async_pf_execute(struct work_struct *work)
 	spin_lock(&vcpu->async_pf.lock);
 	list_add_tail(&apf->link, &vcpu->async_pf.done);
 	apf->page = page;
-	apf->done = true;
 	spin_unlock(&vcpu->async_pf.lock);
 
 	/*
@@ -99,9 +98,8 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
 		struct kvm_async_pf *work =
 			list_entry(vcpu->async_pf.queue.next,
 				   typeof(*work), queue);
-		cancel_work_sync(&work->work);
 		list_del(&work->queue);
-		if (!work->done) { /* work was canceled */
+		if (cancel_work_sync(&work->work)) {
 			mmdrop(work->mm);
 			kvm_put_kvm(vcpu->kvm); /* == work->vcpu->kvm */
 			kmem_cache_free(async_pf_cache, work);
@@ -166,7 +164,6 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
 		return 0;
 
 	work->page = NULL;
-	work->done = false;
 	work->vcpu = vcpu;
 	work->gva = gva;
 	work->addr = gfn_to_hva(vcpu->kvm, gfn);
-- 
cgit v1.2.3


From f1a4c52ff5913378b7baf05ac71f10282b341cf7 Mon Sep 17 00:00:00 2001
From: Philip Avinash <avinashphilip@ti.com>
Date: Sun, 18 Aug 2013 10:49:03 +0530
Subject: ARM: davinci: gpio: use gpiolib API instead of inline functions

Remove NEED_MACH_GPIO_H config select option for ARCH_DAVINCI
to start using gpiolib interface for davinci platforms. This makes
it easier to use the gpio driver on other platforms as it breaks
dependency on mach-davinci.

Latencies for gpio_get/set APIs will increase. On measurement,
latency was found to have increased by 18 microsecond with
gpiolib API as compared to inline APIs.

Measurement was done on DA850 EVM for gpio_get_value() API by
taking the printk timing across the call with interrupts disabled.

  inline gpio API with interrupt disabled
  [   29.734337] before gpio_get
  [   29.736847] after gpio_get

  Time difference 0.00251

  gpio library with interrupt disabled
  [  272.876763] before gpio_get
  [  272.879291] after gpio_get

  Time difference 0.002528
  Latency increased by (0.002528 -  0.00251) = 18 microsecond.

While at it, remove GPIO_TYPE_DAVINCI enum definition as
gpio-davinci.c is converted to Linux device driver model.

Signed-off-by: Philip Avinash <avinashphilip@ti.com>
Signed-off-by: Lad, Prabhakar <prabhakar.csengg@gmail.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
[nsekhar@ti.com: minor edits to commit message]
Signed-off-by: Sekhar Nori <nsekhar@ti.com>
---
 arch/arm/Kconfig                                  |  1 -
 arch/arm/mach-davinci/include/mach/gpio-davinci.h | 90 -----------------------
 arch/arm/mach-davinci/include/mach/gpio.h         | 88 ----------------------
 drivers/gpio/gpio-tnetv107x.c                     |  1 +
 include/linux/platform_data/gpio-davinci.h        | 35 +++++++++
 5 files changed, 36 insertions(+), 179 deletions(-)
 delete mode 100644 arch/arm/mach-davinci/include/mach/gpio-davinci.h
 delete mode 100644 arch/arm/mach-davinci/include/mach/gpio.h

(limited to 'include/linux')

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 3f7714d8d2d2..ad3767095422 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -847,7 +847,6 @@ config ARCH_DAVINCI
 	select GENERIC_CLOCKEVENTS
 	select GENERIC_IRQ_CHIP
 	select HAVE_IDE
-	select NEED_MACH_GPIO_H
 	select TI_PRIV_EDMA
 	select USE_OF
 	select ZONE_DMA
diff --git a/arch/arm/mach-davinci/include/mach/gpio-davinci.h b/arch/arm/mach-davinci/include/mach/gpio-davinci.h
deleted file mode 100644
index 0d63b24cefc9..000000000000
--- a/arch/arm/mach-davinci/include/mach/gpio-davinci.h
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * TI DaVinci GPIO Support
- *
- * Copyright (c) 2006 David Brownell
- * Copyright (c) 2007, MontaVista Software, Inc. <source@mvista.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#ifndef	__DAVINCI_DAVINCI_GPIO_H
-#define	__DAVINCI_DAVINCI_GPIO_H
-
-#include <linux/io.h>
-#include <linux/spinlock.h>
-
-#include <asm-generic/gpio.h>
-
-#include <mach/irqs.h>
-#include <mach/common.h>
-
-enum davinci_gpio_type {
-	GPIO_TYPE_DAVINCI = 0,
-	GPIO_TYPE_TNETV107X,
-};
-
-/*
- * basic gpio routines
- *
- * board-specific init should be done by arch/.../.../board-XXX.c (maybe
- * initializing banks together) rather than boot loaders; kexec() won't
- * go through boot loaders.
- *
- * the gpio clock will be turned on when gpios are used, and you may also
- * need to pay attention to PINMUX registers to be sure those pins are
- * used as gpios, not with other peripherals.
- *
- * On-chip GPIOs are numbered 0..(DAVINCI_N_GPIO-1).  For documentation,
- * and maybe for later updates, code may write GPIO(N).  These may be
- * all 1.8V signals, all 3.3V ones, or a mix of the two.  A given chip
- * may not support all the GPIOs in that range.
- *
- * GPIOs can also be on external chips, numbered after the ones built-in
- * to the DaVinci chip.  For now, they won't be usable as IRQ sources.
- */
-#define	GPIO(X)		(X)		/* 0 <= X <= (DAVINCI_N_GPIO - 1) */
-
-/* Convert GPIO signal to GPIO pin number */
-#define GPIO_TO_PIN(bank, gpio)	(16 * (bank) + (gpio))
-
-struct davinci_gpio_controller {
-	struct gpio_chip	chip;
-	int			irq_base;
-	spinlock_t		lock;
-	void __iomem		*regs;
-	void __iomem		*set_data;
-	void __iomem		*clr_data;
-	void __iomem		*in_data;
-	unsigned		gpio_irq;
-};
-
-/* The __gpio_to_controller() and __gpio_mask() functions inline to constants
- * with constant parameters; or in outlined code they execute at runtime.
- *
- * You'd access the controller directly when reading or writing more than
- * one gpio value at a time, and to support wired logic where the value
- * being driven by the cpu need not match the value read back.
- *
- * These are NOT part of the cross-platform GPIO interface
- */
-static inline struct davinci_gpio_controller *
-__gpio_to_controller(unsigned gpio)
-{
-	struct davinci_gpio_controller *ctlrs = davinci_soc_info.gpio_ctlrs;
-	int index = gpio / 32;
-
-	if (!ctlrs || index >= davinci_soc_info.gpio_ctlrs_num)
-		return NULL;
-
-	return ctlrs + index;
-}
-
-static inline u32 __gpio_mask(unsigned gpio)
-{
-	return 1 << (gpio % 32);
-}
-
-#endif	/* __DAVINCI_DAVINCI_GPIO_H */
diff --git a/arch/arm/mach-davinci/include/mach/gpio.h b/arch/arm/mach-davinci/include/mach/gpio.h
deleted file mode 100644
index 960e9de47e1e..000000000000
--- a/arch/arm/mach-davinci/include/mach/gpio.h
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
- * TI DaVinci GPIO Support
- *
- * Copyright (c) 2006 David Brownell
- * Copyright (c) 2007, MontaVista Software, Inc. <source@mvista.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#ifndef	__DAVINCI_GPIO_H
-#define	__DAVINCI_GPIO_H
-
-#include <asm-generic/gpio.h>
-
-#define __ARM_GPIOLIB_COMPLEX
-
-/* The inline versions use the static inlines in the driver header */
-#include "gpio-davinci.h"
-
-/*
- * The get/set/clear functions will inline when called with constant
- * parameters referencing built-in GPIOs, for low-overhead bitbanging.
- *
- * gpio_set_value() will inline only on traditional Davinci style controllers
- * with distinct set/clear registers.
- *
- * Otherwise, calls with variable parameters or referencing external
- * GPIOs (e.g. on GPIO expander chips) use outlined functions.
- */
-static inline void gpio_set_value(unsigned gpio, int value)
-{
-	if (__builtin_constant_p(value) && gpio < davinci_soc_info.gpio_num) {
-		struct davinci_gpio_controller *ctlr;
-		u32				mask;
-
-		ctlr = __gpio_to_controller(gpio);
-
-		if (ctlr->set_data != ctlr->clr_data) {
-			mask = __gpio_mask(gpio);
-			if (value)
-				__raw_writel(mask, ctlr->set_data);
-			else
-				__raw_writel(mask, ctlr->clr_data);
-			return;
-		}
-	}
-
-	__gpio_set_value(gpio, value);
-}
-
-/* Returns zero or nonzero; works for gpios configured as inputs OR
- * as outputs, at least for built-in GPIOs.
- *
- * NOTE: for built-in GPIOs, changes in reported values are synchronized
- * to the GPIO clock.  This is easily seen after calling gpio_set_value()
- * and then immediately gpio_get_value(), where the gpio_get_value() will
- * return the old value until the GPIO clock ticks and the new value gets
- * latched.
- */
-static inline int gpio_get_value(unsigned gpio)
-{
-	struct davinci_gpio_controller *ctlr;
-
-	if (!__builtin_constant_p(gpio) || gpio >= davinci_soc_info.gpio_num)
-		return __gpio_get_value(gpio);
-
-	ctlr = __gpio_to_controller(gpio);
-	return __gpio_mask(gpio) & __raw_readl(ctlr->in_data);
-}
-
-static inline int gpio_cansleep(unsigned gpio)
-{
-	if (__builtin_constant_p(gpio) && gpio < davinci_soc_info.gpio_num)
-		return 0;
-	else
-		return __gpio_cansleep(gpio);
-}
-
-static inline int irq_to_gpio(unsigned irq)
-{
-	/* don't support the reverse mapping */
-	return -ENOSYS;
-}
-
-#endif				/* __DAVINCI_GPIO_H */
diff --git a/drivers/gpio/gpio-tnetv107x.c b/drivers/gpio/gpio-tnetv107x.c
index 3fa3e2867e19..58445bb69106 100644
--- a/drivers/gpio/gpio-tnetv107x.c
+++ b/drivers/gpio/gpio-tnetv107x.c
@@ -15,6 +15,7 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/gpio.h>
+#include <linux/platform_data/gpio-davinci.h>
 
 #include <mach/common.h>
 #include <mach/tnetv107x.h>
diff --git a/include/linux/platform_data/gpio-davinci.h b/include/linux/platform_data/gpio-davinci.h
index 2fcc125af1aa..6efd20264585 100644
--- a/include/linux/platform_data/gpio-davinci.h
+++ b/include/linux/platform_data/gpio-davinci.h
@@ -16,10 +16,45 @@
 #ifndef __DAVINCI_GPIO_PLATFORM_H
 #define __DAVINCI_GPIO_PLATFORM_H
 
+#include <linux/io.h>
+#include <linux/spinlock.h>
+
+#include <asm-generic/gpio.h>
+
+enum davinci_gpio_type {
+	GPIO_TYPE_TNETV107X = 0,
+};
+
 struct davinci_gpio_platform_data {
 	u32	ngpio;
 	u32	gpio_unbanked;
 	u32	intc_irq_num;
 };
 
+
+struct davinci_gpio_controller {
+	struct gpio_chip	chip;
+	int			irq_base;
+	/* Serialize access to GPIO registers */
+	spinlock_t		lock;
+	void __iomem		*regs;
+	void __iomem		*set_data;
+	void __iomem		*clr_data;
+	void __iomem		*in_data;
+	int			gpio_unbanked;
+	unsigned		gpio_irq;
+};
+
+/*
+ * basic gpio routines
+ */
+#define	GPIO(X)		(X)	/* 0 <= X <= (DAVINCI_N_GPIO - 1) */
+
+/* Convert GPIO signal to GPIO pin number */
+#define GPIO_TO_PIN(bank, gpio)	(16 * (bank) + (gpio))
+
+static inline u32 __gpio_mask(unsigned gpio)
+{
+	return 1 << (gpio % 32);
+}
 #endif
-- 
cgit v1.2.3


From 56fa484969c367e3ae43a012a7b99f75bb4f3bdb Mon Sep 17 00:00:00 2001
From: Shuah Khan <shuah.kh@samsung.com>
Date: Tue, 24 Sep 2013 15:21:20 -0600
Subject: iommu: Change iommu driver to call io_page_fault trace event

Change iommu driver call io_page_fault trace event. This iommu_error class
event can be enabled to trigger when an iommu error occurs. Trace information
includes driver name, device name, iova, and flags.

Testing:
Added trace calls to iommu_prepare_identity_map() for testing some of the
conditions that are hard to trigger. Here is the trace from the testing:

       swapper/0-1     [003] ....     2.003774: io_page_fault: IOMMU:pci 0000:00:02.0 iova=0x00000000cb800000 flags=0x0002
       swapper/0-1     [003] ....     2.004098: io_page_fault: IOMMU:pci 0000:00:1d.0 iova=0x00000000cadc6000 flags=0x0002
       swapper/0-1     [003] ....     2.004115: io_page_fault: IOMMU:pci 0000:00:1a.0 iova=0x00000000cadc6000 flags=0x0002
       swapper/0-1     [003] ....     2.004129: io_page_fault: IOMMU:pci 0000:00:1f.0 iova=0x0000000000000000 flags=0x0002

Signed-off-by: Shuah Khan <shuah.kh@samsung.com>
Signed-off-by: Joerg Roedel <joro@8bytes.org>
---
 include/linux/iommu.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 7ea319e95b47..a444c790fa72 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -22,6 +22,7 @@
 #include <linux/errno.h>
 #include <linux/err.h>
 #include <linux/types.h>
+#include <trace/events/iommu.h>
 
 #define IOMMU_READ	(1)
 #define IOMMU_WRITE	(2)
@@ -227,6 +228,7 @@ static inline int report_iommu_fault(struct iommu_domain *domain,
 		ret = domain->handler(domain, dev, iova, flags,
 						domain->handler_token);
 
+	trace_io_page_fault(dev, iova, flags);
 	return ret;
 }
 
-- 
cgit v1.2.3


From ed37ddffe201bfad7be3c45bc08bd65b5298adca Mon Sep 17 00:00:00 2001
From: Roy Franz <roy.franz@linaro.org>
Date: Sun, 22 Sep 2013 15:45:26 -0700
Subject: efi: Add proper definitions for some EFI function pointers.

The x86/AMD64 EFI stubs must use a call wrapper to convert between
the Linux and EFI ABIs, so void pointers are sufficient.  For ARM,
the ABIs are compatible, so we can directly invoke the function
pointers.  The functions that are used by the ARM stub are updated
to match the EFI definitions.
Also add some EFI types used by EFI functions.

Signed-off-by: Roy Franz <roy.franz@linaro.org>
Acked-by: Mark Salter <msalter@redhat.com>
Reviewed-by: Grant Likely <grant.likely@linaro.org>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 arch/x86/boot/compressed/eboot.h |  8 -------
 include/linux/efi.h              | 50 +++++++++++++++++++++++++++-------------
 2 files changed, 34 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/boot/compressed/eboot.h b/arch/x86/boot/compressed/eboot.h
index e5b0a8f91c5f..02265107cce3 100644
--- a/arch/x86/boot/compressed/eboot.h
+++ b/arch/x86/boot/compressed/eboot.h
@@ -10,8 +10,6 @@
 #define SEG_GRANULARITY_4KB	(1 << 0)
 
 #define DESC_TYPE_CODE_DATA	(1 << 0)
-
-#define EFI_PAGE_SIZE		(1UL << EFI_PAGE_SHIFT)
 #define EFI_READ_CHUNK_SIZE	(1024 * 1024)
 
 #define EFI_CONSOLE_OUT_DEVICE_GUID    \
@@ -62,10 +60,4 @@ struct efi_uga_draw_protocol {
 	void *blt;
 };
 
-struct efi_simple_text_output_protocol {
-	void *reset;
-	void *output_string;
-	void *test_string;
-};
-
 #endif /* BOOT_COMPRESSED_EBOOT_H */
diff --git a/include/linux/efi.h b/include/linux/efi.h
index c084b6d942c3..bc5687d0f315 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -39,6 +39,8 @@
 typedef unsigned long efi_status_t;
 typedef u8 efi_bool_t;
 typedef u16 efi_char16_t;		/* UNICODE character */
+typedef u64 efi_physical_addr_t;
+typedef void *efi_handle_t;
 
 
 typedef struct {
@@ -96,6 +98,7 @@ typedef	struct {
 #define EFI_MEMORY_DESCRIPTOR_VERSION	1
 
 #define EFI_PAGE_SHIFT		12
+#define EFI_PAGE_SIZE		(1UL << EFI_PAGE_SHIFT)
 
 typedef struct {
 	u32 type;
@@ -157,11 +160,13 @@ typedef struct {
 	efi_table_hdr_t hdr;
 	void *raise_tpl;
 	void *restore_tpl;
-	void *allocate_pages;
-	void *free_pages;
-	void *get_memory_map;
-	void *allocate_pool;
-	void *free_pool;
+	efi_status_t (*allocate_pages)(int, int, unsigned long,
+				       efi_physical_addr_t *);
+	efi_status_t (*free_pages)(efi_physical_addr_t, unsigned long);
+	efi_status_t (*get_memory_map)(unsigned long *, void *, unsigned long *,
+				       unsigned long *, u32 *);
+	efi_status_t (*allocate_pool)(int, unsigned long, void **);
+	efi_status_t (*free_pool)(void *);
 	void *create_event;
 	void *set_timer;
 	void *wait_for_event;
@@ -171,7 +176,7 @@ typedef struct {
 	void *install_protocol_interface;
 	void *reinstall_protocol_interface;
 	void *uninstall_protocol_interface;
-	void *handle_protocol;
+	efi_status_t (*handle_protocol)(efi_handle_t, efi_guid_t *, void **);
 	void *__reserved;
 	void *register_protocol_notify;
 	void *locate_handle;
@@ -181,7 +186,7 @@ typedef struct {
 	void *start_image;
 	void *exit;
 	void *unload_image;
-	void *exit_boot_services;
+	efi_status_t (*exit_boot_services)(efi_handle_t, unsigned long);
 	void *get_next_monotonic_count;
 	void *stall;
 	void *set_watchdog_timer;
@@ -494,10 +499,6 @@ typedef struct {
 	unsigned long unload;
 } efi_loaded_image_t;
 
-typedef struct {
-	u64 revision;
-	void *open_volume;
-} efi_file_io_interface_t;
 
 typedef struct {
 	u64 size;
@@ -510,20 +511,30 @@ typedef struct {
 	efi_char16_t filename[1];
 } efi_file_info_t;
 
-typedef struct {
+typedef struct _efi_file_handle {
 	u64 revision;
-	void *open;
-	void *close;
+	efi_status_t (*open)(struct _efi_file_handle *,
+			     struct _efi_file_handle **,
+			     efi_char16_t *, u64, u64);
+	efi_status_t (*close)(struct _efi_file_handle *);
 	void *delete;
-	void *read;
+	efi_status_t (*read)(struct _efi_file_handle *, unsigned long *,
+			     void *);
 	void *write;
 	void *get_position;
 	void *set_position;
-	void *get_info;
+	efi_status_t (*get_info)(struct _efi_file_handle *, efi_guid_t *,
+			unsigned long *, void *);
 	void *set_info;
 	void *flush;
 } efi_file_handle_t;
 
+typedef struct _efi_file_io_interface {
+	u64 revision;
+	int (*open_volume)(struct _efi_file_io_interface *,
+			   efi_file_handle_t **);
+} efi_file_io_interface_t;
+
 #define EFI_FILE_MODE_READ	0x0000000000000001
 #define EFI_FILE_MODE_WRITE	0x0000000000000002
 #define EFI_FILE_MODE_CREATE	0x8000000000000000
@@ -792,6 +803,13 @@ struct efivar_entry {
 	struct kobject kobj;
 };
 
+
+struct efi_simple_text_output_protocol {
+	void *reset;
+	efi_status_t (*output_string)(void *, void *);
+	void *test_string;
+};
+
 extern struct list_head efivar_sysfs_list;
 
 static inline void
-- 
cgit v1.2.3


From 3150398626466c6cc626732f60bc901d58f40677 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 12 Sep 2013 15:10:31 +0200
Subject: sched: Remove {set,clear}_need_resched

Preemption semantics are going to change which mandate a change.

All DRM usage sites are already broken and will not be affected (much)
by this change. DRM people are aware and will remove the last few
stragglers.

For now, leave an empty stub that generates a warning, once all users
are gone we can remove this.

Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: airlied@linux.ie
Cc: daniel.vetter@ffwll.ch
Cc: paulmck@linux.vnet.ibm.com
Link: http://lkml.kernel.org/n/tip-qfc1el2zvhxiyut4ai99ij4n@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/thread_info.h | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index e7e04736802f..a629e4b23217 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -104,8 +104,19 @@ static inline int test_ti_thread_flag(struct thread_info *ti, int flag)
 #define test_thread_flag(flag) \
 	test_ti_thread_flag(current_thread_info(), flag)
 
-#define set_need_resched()	set_thread_flag(TIF_NEED_RESCHED)
-#define clear_need_resched()	clear_thread_flag(TIF_NEED_RESCHED)
+static inline __deprecated void set_need_resched(void)
+{
+	/*
+	 * Use of this function in deprecated.
+	 *
+	 * As of this writing there are only a few users in the DRM tree left
+	 * all of which are wrong and can be removed without causing too much
+	 * grief.
+	 *
+	 * The DRM people are aware and are working on removing the last few
+	 * instances.
+	 */
+}
 
 #if defined TIF_RESTORE_SIGMASK && !defined HAVE_SET_RESTORE_SIGMASK
 /*
-- 
cgit v1.2.3


From ea8117478918a4734586d35ff530721b682425be Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 11 Sep 2013 12:43:13 +0200
Subject: sched, idle: Fix the idle polling state logic

Mike reported that commit 7d1a9417 ("x86: Use generic idle loop")
regressed several workloads and caused excessive reschedule
interrupts.

The patch in question failed to notice that the x86 code had an
inverted sense of the polling state versus the new generic code (x86:
default polling, generic: default !polling).

Fix the two prominent x86 mwait based idle drivers and introduce a few
new generic polling helpers (fixing the wrong smp_mb__after_clear_bit
usage).

Also switch the idle routines to using tif_need_resched() which is an
immediate TIF_NEED_RESCHED test as opposed to need_resched which will
end up being slightly different.

Reported-by: Mike Galbraith <bitbucket@online.de>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: lenb@kernel.org
Cc: tglx@linutronix.de
Link: http://lkml.kernel.org/n/tip-nc03imb0etuefmzybzj7sprf@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/process.c     |  6 ++--
 drivers/acpi/processor_idle.c | 46 ++++++-------------------
 drivers/idle/intel_idle.c     |  2 +-
 include/linux/sched.h         | 78 +++++++++++++++++++++++++++++++++++++++----
 include/linux/thread_info.h   |  2 ++
 kernel/cpu/idle.c             |  9 +++--
 6 files changed, 91 insertions(+), 52 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index c83516be1052..3fb8d95ab8b5 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -391,9 +391,9 @@ static void amd_e400_idle(void)
 		 * The switch back from broadcast mode needs to be
 		 * called with interrupts disabled.
 		 */
-		 local_irq_disable();
-		 clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu);
-		 local_irq_enable();
+		local_irq_disable();
+		clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu);
+		local_irq_enable();
 	} else
 		default_idle();
 }
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index f98dd00b51a9..c7414a545a4f 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -119,17 +119,10 @@ static struct dmi_system_id processor_power_dmi_table[] = {
  */
 static void acpi_safe_halt(void)
 {
-	current_thread_info()->status &= ~TS_POLLING;
-	/*
-	 * TS_POLLING-cleared state must be visible before we
-	 * test NEED_RESCHED:
-	 */
-	smp_mb();
-	if (!need_resched()) {
+	if (!tif_need_resched()) {
 		safe_halt();
 		local_irq_disable();
 	}
-	current_thread_info()->status |= TS_POLLING;
 }
 
 #ifdef ARCH_APICTIMER_STOPS_ON_C3
@@ -737,6 +730,11 @@ static int acpi_idle_enter_c1(struct cpuidle_device *dev,
 	if (unlikely(!pr))
 		return -EINVAL;
 
+	if (cx->entry_method == ACPI_CSTATE_FFH) {
+		if (current_set_polling_and_test())
+			return -EINVAL;
+	}
+
 	lapic_timer_state_broadcast(pr, cx, 1);
 	acpi_idle_do_entry(cx);
 
@@ -790,18 +788,9 @@ static int acpi_idle_enter_simple(struct cpuidle_device *dev,
 	if (unlikely(!pr))
 		return -EINVAL;
 
-	if (cx->entry_method != ACPI_CSTATE_FFH) {
-		current_thread_info()->status &= ~TS_POLLING;
-		/*
-		 * TS_POLLING-cleared state must be visible before we test
-		 * NEED_RESCHED:
-		 */
-		smp_mb();
-
-		if (unlikely(need_resched())) {
-			current_thread_info()->status |= TS_POLLING;
+	if (cx->entry_method == ACPI_CSTATE_FFH) {
+		if (current_set_polling_and_test())
 			return -EINVAL;
-		}
 	}
 
 	/*
@@ -819,9 +808,6 @@ static int acpi_idle_enter_simple(struct cpuidle_device *dev,
 
 	sched_clock_idle_wakeup_event(0);
 
-	if (cx->entry_method != ACPI_CSTATE_FFH)
-		current_thread_info()->status |= TS_POLLING;
-
 	lapic_timer_state_broadcast(pr, cx, 0);
 	return index;
 }
@@ -858,18 +844,9 @@ static int acpi_idle_enter_bm(struct cpuidle_device *dev,
 		}
 	}
 
-	if (cx->entry_method != ACPI_CSTATE_FFH) {
-		current_thread_info()->status &= ~TS_POLLING;
-		/*
-		 * TS_POLLING-cleared state must be visible before we test
-		 * NEED_RESCHED:
-		 */
-		smp_mb();
-
-		if (unlikely(need_resched())) {
-			current_thread_info()->status |= TS_POLLING;
+	if (cx->entry_method == ACPI_CSTATE_FFH) {
+		if (current_set_polling_and_test())
 			return -EINVAL;
-		}
 	}
 
 	acpi_unlazy_tlb(smp_processor_id());
@@ -915,9 +892,6 @@ static int acpi_idle_enter_bm(struct cpuidle_device *dev,
 
 	sched_clock_idle_wakeup_event(0);
 
-	if (cx->entry_method != ACPI_CSTATE_FFH)
-		current_thread_info()->status |= TS_POLLING;
-
 	lapic_timer_state_broadcast(pr, cx, 0);
 	return index;
 }
diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index fa6964d8681a..f116d664b473 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -359,7 +359,7 @@ static int intel_idle(struct cpuidle_device *dev,
 	if (!(lapic_timer_reliable_states & (1 << (cstate))))
 		clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu);
 
-	if (!need_resched()) {
+	if (!current_set_polling_and_test()) {
 
 		__monitor((void *)&current_thread_info()->flags, 0, 0);
 		smp_mb();
diff --git a/include/linux/sched.h b/include/linux/sched.h
index b5344de1658b..e783ec52295a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2479,34 +2479,98 @@ static inline int tsk_is_polling(struct task_struct *p)
 {
 	return task_thread_info(p)->status & TS_POLLING;
 }
-static inline void current_set_polling(void)
+static inline void __current_set_polling(void)
 {
 	current_thread_info()->status |= TS_POLLING;
 }
 
-static inline void current_clr_polling(void)
+static inline bool __must_check current_set_polling_and_test(void)
+{
+	__current_set_polling();
+
+	/*
+	 * Polling state must be visible before we test NEED_RESCHED,
+	 * paired by resched_task()
+	 */
+	smp_mb();
+
+	return unlikely(tif_need_resched());
+}
+
+static inline void __current_clr_polling(void)
 {
 	current_thread_info()->status &= ~TS_POLLING;
-	smp_mb__after_clear_bit();
+}
+
+static inline bool __must_check current_clr_polling_and_test(void)
+{
+	__current_clr_polling();
+
+	/*
+	 * Polling state must be visible before we test NEED_RESCHED,
+	 * paired by resched_task()
+	 */
+	smp_mb();
+
+	return unlikely(tif_need_resched());
 }
 #elif defined(TIF_POLLING_NRFLAG)
 static inline int tsk_is_polling(struct task_struct *p)
 {
 	return test_tsk_thread_flag(p, TIF_POLLING_NRFLAG);
 }
-static inline void current_set_polling(void)
+
+static inline void __current_set_polling(void)
 {
 	set_thread_flag(TIF_POLLING_NRFLAG);
 }
 
-static inline void current_clr_polling(void)
+static inline bool __must_check current_set_polling_and_test(void)
+{
+	__current_set_polling();
+
+	/*
+	 * Polling state must be visible before we test NEED_RESCHED,
+	 * paired by resched_task()
+	 *
+	 * XXX: assumes set/clear bit are identical barrier wise.
+	 */
+	smp_mb__after_clear_bit();
+
+	return unlikely(tif_need_resched());
+}
+
+static inline void __current_clr_polling(void)
 {
 	clear_thread_flag(TIF_POLLING_NRFLAG);
 }
+
+static inline bool __must_check current_clr_polling_and_test(void)
+{
+	__current_clr_polling();
+
+	/*
+	 * Polling state must be visible before we test NEED_RESCHED,
+	 * paired by resched_task()
+	 */
+	smp_mb__after_clear_bit();
+
+	return unlikely(tif_need_resched());
+}
+
 #else
 static inline int tsk_is_polling(struct task_struct *p) { return 0; }
-static inline void current_set_polling(void) { }
-static inline void current_clr_polling(void) { }
+static inline void __current_set_polling(void) { }
+static inline void __current_clr_polling(void) { }
+
+static inline bool __must_check current_set_polling_and_test(void)
+{
+	return unlikely(tif_need_resched());
+}
+static inline bool __must_check current_clr_polling_and_test(void)
+{
+	return unlikely(tif_need_resched());
+}
 #endif
 
 /*
diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index a629e4b23217..fddbe2023a5d 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -118,6 +118,8 @@ static inline __deprecated void set_need_resched(void)
 	 */
 }
 
+#define tif_need_resched() test_thread_flag(TIF_NEED_RESCHED)
+
 #if defined TIF_RESTORE_SIGMASK && !defined HAVE_SET_RESTORE_SIGMASK
 /*
  * An arch can define its own version of set_restore_sigmask() to get the
diff --git a/kernel/cpu/idle.c b/kernel/cpu/idle.c
index e695c0a0bcb5..c261409500e4 100644
--- a/kernel/cpu/idle.c
+++ b/kernel/cpu/idle.c
@@ -44,7 +44,7 @@ static inline int cpu_idle_poll(void)
 	rcu_idle_enter();
 	trace_cpu_idle_rcuidle(0, smp_processor_id());
 	local_irq_enable();
-	while (!need_resched())
+	while (!tif_need_resched())
 		cpu_relax();
 	trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
 	rcu_idle_exit();
@@ -92,8 +92,7 @@ static void cpu_idle_loop(void)
 			if (cpu_idle_force_poll || tick_check_broadcast_expired()) {
 				cpu_idle_poll();
 			} else {
-				current_clr_polling();
-				if (!need_resched()) {
+				if (!current_clr_polling_and_test()) {
 					stop_critical_timings();
 					rcu_idle_enter();
 					arch_cpu_idle();
@@ -103,7 +102,7 @@ static void cpu_idle_loop(void)
 				} else {
 					local_irq_enable();
 				}
-				current_set_polling();
+				__current_set_polling();
 			}
 			arch_cpu_idle_exit();
 		}
@@ -129,7 +128,7 @@ void cpu_startup_entry(enum cpuhp_state state)
 	 */
 	boot_init_stack_canary();
 #endif
-	current_set_polling();
+	__current_set_polling();
 	arch_cpu_idle_prepare();
 	cpu_idle_loop();
 }
-- 
cgit v1.2.3


From 4a2b4b222743bb07fedf985b884550f2ca067ea9 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 14 Aug 2013 14:55:24 +0200
Subject: sched: Introduce preempt_count accessor functions

Replace the single preempt_count() 'function' that's an lvalue with
two proper functions:

 preempt_count() - returns the preempt_count value as rvalue
 preempt_count_set() - Allows setting the preempt-count value

Also provide preempt_count_ptr() as a convenience wrapper to implement
all modifying operations.

Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/n/tip-orxrbycjozopqfhb4dxdkdvb@git.kernel.org
[ Fixed build failure. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/preempt.h | 25 +++++++++++++++++++------
 init/main.c             |  2 +-
 kernel/sched/core.c     |  4 ++--
 kernel/softirq.c        |  4 ++--
 kernel/timer.c          |  8 ++++----
 lib/locking-selftest.c  |  2 +-
 lib/smp_processor_id.c  |  3 +--
 7 files changed, 30 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index f5d4723cdb3d..eaac52a8fe6a 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -10,19 +10,32 @@
 #include <linux/linkage.h>
 #include <linux/list.h>
 
+static __always_inline int preempt_count(void)
+{
+	return current_thread_info()->preempt_count;
+}
+
+static __always_inline int *preempt_count_ptr(void)
+{
+	return &current_thread_info()->preempt_count;
+}
+
+static __always_inline void preempt_count_set(int pc)
+{
+	*preempt_count_ptr() = pc;
+}
+
 #if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_PREEMPT_TRACER)
   extern void add_preempt_count(int val);
   extern void sub_preempt_count(int val);
 #else
-# define add_preempt_count(val)	do { preempt_count() += (val); } while (0)
-# define sub_preempt_count(val)	do { preempt_count() -= (val); } while (0)
+# define add_preempt_count(val)	do { *preempt_count_ptr() += (val); } while (0)
+# define sub_preempt_count(val)	do { *preempt_count_ptr() -= (val); } while (0)
 #endif
 
 #define inc_preempt_count() add_preempt_count(1)
 #define dec_preempt_count() sub_preempt_count(1)
 
-#define preempt_count()	(current_thread_info()->preempt_count)
-
 #ifdef CONFIG_PREEMPT
 
 asmlinkage void preempt_schedule(void);
@@ -81,9 +94,9 @@ do { \
 
 /* For debugging and tracer internals only! */
 #define add_preempt_count_notrace(val)			\
-	do { preempt_count() += (val); } while (0)
+	do { *preempt_count_ptr() += (val); } while (0)
 #define sub_preempt_count_notrace(val)			\
-	do { preempt_count() -= (val); } while (0)
+	do { *preempt_count_ptr() -= (val); } while (0)
 #define inc_preempt_count_notrace() add_preempt_count_notrace(1)
 #define dec_preempt_count_notrace() sub_preempt_count_notrace(1)
 
diff --git a/init/main.c b/init/main.c
index af310afbef28..7cc4b7889a88 100644
--- a/init/main.c
+++ b/init/main.c
@@ -692,7 +692,7 @@ int __init_or_module do_one_initcall(initcall_t fn)
 
 	if (preempt_count() != count) {
 		sprintf(msgbuf, "preemption imbalance ");
-		preempt_count() = count;
+		preempt_count_set(count);
 	}
 	if (irqs_disabled()) {
 		strlcat(msgbuf, "disabled interrupts ", sizeof(msgbuf));
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 242da0c03aba..fe89afac4d09 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2219,7 +2219,7 @@ void __kprobes add_preempt_count(int val)
 	if (DEBUG_LOCKS_WARN_ON((preempt_count() < 0)))
 		return;
 #endif
-	preempt_count() += val;
+	add_preempt_count_notrace(val);
 #ifdef CONFIG_DEBUG_PREEMPT
 	/*
 	 * Spinlock count overflowing soon?
@@ -2250,7 +2250,7 @@ void __kprobes sub_preempt_count(int val)
 
 	if (preempt_count() == val)
 		trace_preempt_on(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
-	preempt_count() -= val;
+	sub_preempt_count_notrace(val);
 }
 EXPORT_SYMBOL(sub_preempt_count);
 
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 53cc09ceb0b8..a90de70cf1f3 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -106,7 +106,7 @@ static void __local_bh_disable(unsigned long ip, unsigned int cnt)
 	 * We must manually increment preempt_count here and manually
 	 * call the trace_preempt_off later.
 	 */
-	preempt_count() += cnt;
+	add_preempt_count_notrace(cnt);
 	/*
 	 * Were softirqs turned off above:
 	 */
@@ -256,7 +256,7 @@ restart:
 				       " exited with %08x?\n", vec_nr,
 				       softirq_to_name[vec_nr], h->action,
 				       prev_count, preempt_count());
-				preempt_count() = prev_count;
+				preempt_count_set(prev_count);
 			}
 
 			rcu_bh_qs(cpu);
diff --git a/kernel/timer.c b/kernel/timer.c
index 4296d13db3d1..6582b82fa966 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1092,7 +1092,7 @@ static int cascade(struct tvec_base *base, struct tvec *tv, int index)
 static void call_timer_fn(struct timer_list *timer, void (*fn)(unsigned long),
 			  unsigned long data)
 {
-	int preempt_count = preempt_count();
+	int count = preempt_count();
 
 #ifdef CONFIG_LOCKDEP
 	/*
@@ -1119,16 +1119,16 @@ static void call_timer_fn(struct timer_list *timer, void (*fn)(unsigned long),
 
 	lock_map_release(&lockdep_map);
 
-	if (preempt_count != preempt_count()) {
+	if (count != preempt_count()) {
 		WARN_ONCE(1, "timer: %pF preempt leak: %08x -> %08x\n",
-			  fn, preempt_count, preempt_count());
+			  fn, count, preempt_count());
 		/*
 		 * Restore the preempt count. That gives us a decent
 		 * chance to survive and extract information. If the
 		 * callback kept a lock held, bad luck, but not worse
 		 * than the BUG() we had.
 		 */
-		preempt_count() = preempt_count;
+		preempt_count_set(count);
 	}
 }
 
diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c
index 6dc09d8f4c24..872a15a2a637 100644
--- a/lib/locking-selftest.c
+++ b/lib/locking-selftest.c
@@ -1002,7 +1002,7 @@ static void dotest(void (*testcase_fn)(void), int expected, int lockclass_mask)
 	 * Some tests (e.g. double-unlock) might corrupt the preemption
 	 * count, so restore it:
 	 */
-	preempt_count() = saved_preempt_count;
+	preempt_count_set(saved_preempt_count);
 #ifdef CONFIG_TRACE_IRQFLAGS
 	if (softirq_count())
 		current->softirqs_enabled = 0;
diff --git a/lib/smp_processor_id.c b/lib/smp_processor_id.c
index 4c0d0e51d49e..04abe53f12a1 100644
--- a/lib/smp_processor_id.c
+++ b/lib/smp_processor_id.c
@@ -9,10 +9,9 @@
 
 notrace unsigned int debug_smp_processor_id(void)
 {
-	unsigned long preempt_count = preempt_count();
 	int this_cpu = raw_smp_processor_id();
 
-	if (likely(preempt_count))
+	if (likely(preempt_count()))
 		goto out;
 
 	if (irqs_disabled())
-- 
cgit v1.2.3


From f27dde8deef33c9e58027df11ceab2198601d6a6 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 14 Aug 2013 14:55:31 +0200
Subject: sched: Add NEED_RESCHED to the preempt_count

In order to combine the preemption and need_resched test we need to
fold the need_resched information into the preempt_count value.

Since the NEED_RESCHED flag is set across CPUs this needs to be an
atomic operation, however we very much want to avoid making
preempt_count atomic, therefore we keep the existing TIF_NEED_RESCHED
infrastructure in place but at 3 sites test it and fold its value into
preempt_count; namely:

 - resched_task() when setting TIF_NEED_RESCHED on the current task
 - scheduler_ipi() when resched_task() sets TIF_NEED_RESCHED on a
                   remote task it follows it up with a reschedule IPI
                   and we can modify the cpu local preempt_count from
                   there.
 - cpu_idle_loop() for when resched_task() found tsk_is_polling().

We use an inverted bitmask to indicate need_resched so that a 0 means
both need_resched and !atomic.

Also remove the barrier() in preempt_enable() between
preempt_enable_no_resched() and preempt_check_resched() to avoid
having to reload the preemption value and allow the compiler to use
the flags of the previuos decrement. I couldn't come up with any sane
reason for this barrier() to be there as preempt_enable_no_resched()
already has a barrier() before doing the decrement.

Suggested-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/n/tip-7a7m5qqbn5pmwnd4wko9u6da@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/preempt.h | 47 ++++++++++++++++++++++++++++++++++++++++++-----
 include/linux/sched.h   |  7 +++++--
 kernel/cpu/idle.c       |  7 +++++++
 kernel/sched/core.c     | 20 +++++++++++++++-----
 4 files changed, 69 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index eaac52a8fe6a..92e341853e4b 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -10,9 +10,19 @@
 #include <linux/linkage.h>
 #include <linux/list.h>
 
+/*
+ * We use the MSB mostly because its available; see <linux/preempt_mask.h> for
+ * the other bits -- can't include that header due to inclusion hell.
+ */
+#define PREEMPT_NEED_RESCHED	0x80000000
+
+/*
+ * We mask the PREEMPT_NEED_RESCHED bit so as not to confuse all current users
+ * that think a non-zero value indicates we cannot preempt.
+ */
 static __always_inline int preempt_count(void)
 {
-	return current_thread_info()->preempt_count;
+	return current_thread_info()->preempt_count & ~PREEMPT_NEED_RESCHED;
 }
 
 static __always_inline int *preempt_count_ptr(void)
@@ -20,11 +30,40 @@ static __always_inline int *preempt_count_ptr(void)
 	return &current_thread_info()->preempt_count;
 }
 
+/*
+ * We now loose PREEMPT_NEED_RESCHED and cause an extra reschedule; however the
+ * alternative is loosing a reschedule. Better schedule too often -- also this
+ * should be a very rare operation.
+ */
 static __always_inline void preempt_count_set(int pc)
 {
 	*preempt_count_ptr() = pc;
 }
 
+/*
+ * We fold the NEED_RESCHED bit into the preempt count such that
+ * preempt_enable() can decrement and test for needing to reschedule with a
+ * single instruction.
+ *
+ * We invert the actual bit, so that when the decrement hits 0 we know we both
+ * need to resched (the bit is cleared) and can resched (no preempt count).
+ */
+
+static __always_inline void set_preempt_need_resched(void)
+{
+	*preempt_count_ptr() &= ~PREEMPT_NEED_RESCHED;
+}
+
+static __always_inline void clear_preempt_need_resched(void)
+{
+	*preempt_count_ptr() |= PREEMPT_NEED_RESCHED;
+}
+
+static __always_inline bool test_preempt_need_resched(void)
+{
+	return !(*preempt_count_ptr() & PREEMPT_NEED_RESCHED);
+}
+
 #if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_PREEMPT_TRACER)
   extern void add_preempt_count(int val);
   extern void sub_preempt_count(int val);
@@ -42,7 +81,7 @@ asmlinkage void preempt_schedule(void);
 
 #define preempt_check_resched() \
 do { \
-	if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) \
+	if (unlikely(!*preempt_count_ptr())) \
 		preempt_schedule(); \
 } while (0)
 
@@ -52,7 +91,7 @@ void preempt_schedule_context(void);
 
 #define preempt_check_resched_context() \
 do { \
-	if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) \
+	if (unlikely(!*preempt_count_ptr())) \
 		preempt_schedule_context(); \
 } while (0)
 #else
@@ -88,7 +127,6 @@ do { \
 #define preempt_enable() \
 do { \
 	preempt_enable_no_resched(); \
-	barrier(); \
 	preempt_check_resched(); \
 } while (0)
 
@@ -116,7 +154,6 @@ do { \
 #define preempt_enable_notrace() \
 do { \
 	preempt_enable_no_resched_notrace(); \
-	barrier(); \
 	preempt_check_resched_context(); \
 } while (0)
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index e783ec52295a..9fa151fb968e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -22,6 +22,7 @@ struct sched_param {
 #include <linux/errno.h>
 #include <linux/nodemask.h>
 #include <linux/mm_types.h>
+#include <linux/preempt.h>
 
 #include <asm/page.h>
 #include <asm/ptrace.h>
@@ -434,7 +435,9 @@ struct task_cputime {
  * We include PREEMPT_ACTIVE to avoid cond_resched() from working
  * before the scheduler is active -- see should_resched().
  */
-#define INIT_PREEMPT_COUNT	(1 + PREEMPT_ACTIVE)
+#define INIT_PREEMPT_COUNT	(1 + PREEMPT_ACTIVE + PREEMPT_NEED_RESCHED)
+#define PREEMPT_ENABLED		(PREEMPT_NEED_RESCHED)
+#define PREEMPT_DISABLED	(1 + PREEMPT_NEED_RESCHED)
 
 /**
  * struct thread_group_cputimer - thread group interval timer counts
@@ -2408,7 +2411,7 @@ static inline int signal_pending_state(long state, struct task_struct *p)
 
 static inline int need_resched(void)
 {
-	return unlikely(test_thread_flag(TIF_NEED_RESCHED));
+	return unlikely(test_preempt_need_resched());
 }
 
 /*
diff --git a/kernel/cpu/idle.c b/kernel/cpu/idle.c
index c261409500e4..988573a9a387 100644
--- a/kernel/cpu/idle.c
+++ b/kernel/cpu/idle.c
@@ -105,6 +105,13 @@ static void cpu_idle_loop(void)
 				__current_set_polling();
 			}
 			arch_cpu_idle_exit();
+			/*
+			 * We need to test and propagate the TIF_NEED_RESCHED
+			 * bit here because we might not have send the
+			 * reschedule IPI to idle tasks.
+			 */
+			if (tif_need_resched())
+				set_preempt_need_resched();
 		}
 		tick_nohz_idle_exit();
 		schedule_preempt_disabled();
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index fe89afac4d09..ee61f5affd20 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -525,8 +525,10 @@ void resched_task(struct task_struct *p)
 	set_tsk_need_resched(p);
 
 	cpu = task_cpu(p);
-	if (cpu == smp_processor_id())
+	if (cpu == smp_processor_id()) {
+		set_preempt_need_resched();
 		return;
+	}
 
 	/* NEED_RESCHED must be visible before we test polling */
 	smp_mb();
@@ -1391,6 +1393,14 @@ static void sched_ttwu_pending(void)
 
 void scheduler_ipi(void)
 {
+	/*
+	 * Fold TIF_NEED_RESCHED into the preempt_count; anybody setting
+	 * TIF_NEED_RESCHED remotely (for the first time) will also send
+	 * this IPI.
+	 */
+	if (tif_need_resched())
+		set_preempt_need_resched();
+
 	if (llist_empty(&this_rq()->wake_list)
 			&& !tick_nohz_full_cpu(smp_processor_id())
 			&& !got_nohz_idle_kick())
@@ -1714,7 +1724,7 @@ void sched_fork(struct task_struct *p)
 #endif
 #ifdef CONFIG_PREEMPT_COUNT
 	/* Want to start with kernel preemption disabled. */
-	task_thread_info(p)->preempt_count = 1;
+	task_thread_info(p)->preempt_count = PREEMPT_DISABLED;
 #endif
 #ifdef CONFIG_SMP
 	plist_node_init(&p->pushable_tasks, MAX_PRIO);
@@ -2425,6 +2435,7 @@ need_resched:
 	put_prev_task(rq, prev);
 	next = pick_next_task(rq);
 	clear_tsk_need_resched(prev);
+	clear_preempt_need_resched();
 	rq->skip_clock_update = 0;
 
 	if (likely(prev != next)) {
@@ -2536,11 +2547,10 @@ EXPORT_SYMBOL(preempt_schedule);
  */
 asmlinkage void __sched preempt_schedule_irq(void)
 {
-	struct thread_info *ti = current_thread_info();
 	enum ctx_state prev_state;
 
 	/* Catch callers which need to be fixed */
-	BUG_ON(ti->preempt_count || !irqs_disabled());
+	BUG_ON(preempt_count() || !irqs_disabled());
 
 	prev_state = exception_enter();
 
@@ -4207,7 +4217,7 @@ void init_idle(struct task_struct *idle, int cpu)
 	raw_spin_unlock_irqrestore(&rq->lock, flags);
 
 	/* Set the preempt count _outside_ the spinlocks! */
-	task_thread_info(idle)->preempt_count = 0;
+	task_thread_info(idle)->preempt_count = PREEMPT_ENABLED;
 
 	/*
 	 * The idle tasks have their own, simple scheduling class:
-- 
cgit v1.2.3


From a787870924dbd6f321661e06d4ec1c7a408c9ccf Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 14 Aug 2013 14:55:40 +0200
Subject: sched, arch: Create asm/preempt.h

In order to prepare to per-arch implementations of preempt_count move
the required bits into an asm-generic header and use this for all
archs.

Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/n/tip-h5j0c1r3e3fk015m30h8f1zx@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/alpha/include/asm/Kbuild      |  1 +
 arch/arc/include/asm/Kbuild        |  1 +
 arch/arm/include/asm/Kbuild        |  1 +
 arch/arm64/include/asm/Kbuild      |  1 +
 arch/avr32/include/asm/Kbuild      |  1 +
 arch/blackfin/include/asm/Kbuild   |  1 +
 arch/c6x/include/asm/Kbuild        |  1 +
 arch/cris/include/asm/Kbuild       |  1 +
 arch/frv/include/asm/Kbuild        |  1 +
 arch/h8300/include/asm/Kbuild      |  1 +
 arch/hexagon/include/asm/Kbuild    |  1 +
 arch/ia64/include/asm/Kbuild       |  1 +
 arch/m32r/include/asm/Kbuild       |  1 +
 arch/m68k/include/asm/Kbuild       |  1 +
 arch/metag/include/asm/Kbuild      |  1 +
 arch/microblaze/include/asm/Kbuild |  1 +
 arch/mips/include/asm/Kbuild       |  1 +
 arch/mn10300/include/asm/Kbuild    |  1 +
 arch/openrisc/include/asm/Kbuild   |  1 +
 arch/parisc/include/asm/Kbuild     |  1 +
 arch/powerpc/include/asm/Kbuild    |  1 +
 arch/s390/include/asm/Kbuild       |  1 +
 arch/score/include/asm/Kbuild      |  1 +
 arch/sh/include/asm/Kbuild         |  1 +
 arch/sparc/include/asm/Kbuild      |  1 +
 arch/tile/include/asm/Kbuild       |  1 +
 arch/um/include/asm/Kbuild         |  1 +
 arch/unicore32/include/asm/Kbuild  |  1 +
 arch/x86/include/asm/Kbuild        |  1 +
 arch/xtensa/include/asm/Kbuild     |  1 +
 include/asm-generic/preempt.h      | 54 ++++++++++++++++++++++++++++++++++++++
 include/linux/preempt.h            | 49 +---------------------------------
 32 files changed, 85 insertions(+), 48 deletions(-)
 create mode 100644 include/asm-generic/preempt.h

(limited to 'include/linux')

diff --git a/arch/alpha/include/asm/Kbuild b/arch/alpha/include/asm/Kbuild
index a6e85f448c1c..f01fb505ad52 100644
--- a/arch/alpha/include/asm/Kbuild
+++ b/arch/alpha/include/asm/Kbuild
@@ -3,3 +3,4 @@ generic-y += clkdev.h
 
 generic-y += exec.h
 generic-y += trace_clock.h
+generic-y += preempt.h
diff --git a/arch/arc/include/asm/Kbuild b/arch/arc/include/asm/Kbuild
index d8dd660898b9..5943f7f9d325 100644
--- a/arch/arc/include/asm/Kbuild
+++ b/arch/arc/include/asm/Kbuild
@@ -46,3 +46,4 @@ generic-y += ucontext.h
 generic-y += user.h
 generic-y += vga.h
 generic-y += xor.h
+generic-y += preempt.h
diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild
index d3db39860b9c..4e6838d4ddf6 100644
--- a/arch/arm/include/asm/Kbuild
+++ b/arch/arm/include/asm/Kbuild
@@ -33,3 +33,4 @@ generic-y += timex.h
 generic-y += trace_clock.h
 generic-y += types.h
 generic-y += unaligned.h
+generic-y += preempt.h
diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild
index 79a642d199f2..519f89f5b6a3 100644
--- a/arch/arm64/include/asm/Kbuild
+++ b/arch/arm64/include/asm/Kbuild
@@ -50,3 +50,4 @@ generic-y += unaligned.h
 generic-y += user.h
 generic-y += vga.h
 generic-y += xor.h
+generic-y += preempt.h
diff --git a/arch/avr32/include/asm/Kbuild b/arch/avr32/include/asm/Kbuild
index d22af851f3f6..b946080ee8bb 100644
--- a/arch/avr32/include/asm/Kbuild
+++ b/arch/avr32/include/asm/Kbuild
@@ -3,3 +3,4 @@ generic-y	+= clkdev.h
 generic-y	+= exec.h
 generic-y	+= trace_clock.h
 generic-y	+= param.h
+generic-y += preempt.h
diff --git a/arch/blackfin/include/asm/Kbuild b/arch/blackfin/include/asm/Kbuild
index 127826f8a375..f2b43474b0e2 100644
--- a/arch/blackfin/include/asm/Kbuild
+++ b/arch/blackfin/include/asm/Kbuild
@@ -44,3 +44,4 @@ generic-y += ucontext.h
 generic-y += unaligned.h
 generic-y += user.h
 generic-y += xor.h
+generic-y += preempt.h
diff --git a/arch/c6x/include/asm/Kbuild b/arch/c6x/include/asm/Kbuild
index e49f918531ad..fc0b3c356027 100644
--- a/arch/c6x/include/asm/Kbuild
+++ b/arch/c6x/include/asm/Kbuild
@@ -56,3 +56,4 @@ generic-y += ucontext.h
 generic-y += user.h
 generic-y += vga.h
 generic-y += xor.h
+generic-y += preempt.h
diff --git a/arch/cris/include/asm/Kbuild b/arch/cris/include/asm/Kbuild
index c8325455520e..b06caf649a95 100644
--- a/arch/cris/include/asm/Kbuild
+++ b/arch/cris/include/asm/Kbuild
@@ -11,3 +11,4 @@ generic-y += module.h
 generic-y += trace_clock.h
 generic-y += vga.h
 generic-y += xor.h
+generic-y += preempt.h
diff --git a/arch/frv/include/asm/Kbuild b/arch/frv/include/asm/Kbuild
index c5d767028306..74742dc6a3da 100644
--- a/arch/frv/include/asm/Kbuild
+++ b/arch/frv/include/asm/Kbuild
@@ -2,3 +2,4 @@
 generic-y += clkdev.h
 generic-y += exec.h
 generic-y += trace_clock.h
+generic-y += preempt.h
diff --git a/arch/h8300/include/asm/Kbuild b/arch/h8300/include/asm/Kbuild
index 8ada3cf0c98d..7e0e7213a481 100644
--- a/arch/h8300/include/asm/Kbuild
+++ b/arch/h8300/include/asm/Kbuild
@@ -6,3 +6,4 @@ generic-y += mmu.h
 generic-y += module.h
 generic-y += trace_clock.h
 generic-y += xor.h
+generic-y += preempt.h
diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbuild
index 1da17caac23c..67c3450309b7 100644
--- a/arch/hexagon/include/asm/Kbuild
+++ b/arch/hexagon/include/asm/Kbuild
@@ -53,3 +53,4 @@ generic-y += types.h
 generic-y += ucontext.h
 generic-y += unaligned.h
 generic-y += xor.h
+generic-y += preempt.h
diff --git a/arch/ia64/include/asm/Kbuild b/arch/ia64/include/asm/Kbuild
index a3456f34f672..f93ee087e8fe 100644
--- a/arch/ia64/include/asm/Kbuild
+++ b/arch/ia64/include/asm/Kbuild
@@ -3,4 +3,5 @@ generic-y += clkdev.h
 generic-y += exec.h
 generic-y += kvm_para.h
 generic-y += trace_clock.h
+generic-y += preempt.h
 generic-y += vtime.h
\ No newline at end of file
diff --git a/arch/m32r/include/asm/Kbuild b/arch/m32r/include/asm/Kbuild
index bebdc36ebb0a..2b58c5f0bc38 100644
--- a/arch/m32r/include/asm/Kbuild
+++ b/arch/m32r/include/asm/Kbuild
@@ -3,3 +3,4 @@ generic-y += clkdev.h
 generic-y += exec.h
 generic-y += module.h
 generic-y += trace_clock.h
+generic-y += preempt.h
diff --git a/arch/m68k/include/asm/Kbuild b/arch/m68k/include/asm/Kbuild
index 09d77a862da3..a5d27f272a59 100644
--- a/arch/m68k/include/asm/Kbuild
+++ b/arch/m68k/include/asm/Kbuild
@@ -31,3 +31,4 @@ generic-y += trace_clock.h
 generic-y += types.h
 generic-y += word-at-a-time.h
 generic-y += xor.h
+generic-y += preempt.h
diff --git a/arch/metag/include/asm/Kbuild b/arch/metag/include/asm/Kbuild
index 6ae0ccb632cb..84d0c1d6b9b3 100644
--- a/arch/metag/include/asm/Kbuild
+++ b/arch/metag/include/asm/Kbuild
@@ -52,3 +52,4 @@ generic-y += unaligned.h
 generic-y += user.h
 generic-y += vga.h
 generic-y += xor.h
+generic-y += preempt.h
diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild
index d3c51a6a601d..ce0bbf8f5640 100644
--- a/arch/microblaze/include/asm/Kbuild
+++ b/arch/microblaze/include/asm/Kbuild
@@ -3,3 +3,4 @@ generic-y += clkdev.h
 generic-y += exec.h
 generic-y += trace_clock.h
 generic-y += syscalls.h
+generic-y += preempt.h
diff --git a/arch/mips/include/asm/Kbuild b/arch/mips/include/asm/Kbuild
index 454ddf9bb76f..1acbb8b77a71 100644
--- a/arch/mips/include/asm/Kbuild
+++ b/arch/mips/include/asm/Kbuild
@@ -11,5 +11,6 @@ generic-y += sections.h
 generic-y += segment.h
 generic-y += serial.h
 generic-y += trace_clock.h
+generic-y += preempt.h
 generic-y += ucontext.h
 generic-y += xor.h
diff --git a/arch/mn10300/include/asm/Kbuild b/arch/mn10300/include/asm/Kbuild
index c5d767028306..74742dc6a3da 100644
--- a/arch/mn10300/include/asm/Kbuild
+++ b/arch/mn10300/include/asm/Kbuild
@@ -2,3 +2,4 @@
 generic-y += clkdev.h
 generic-y += exec.h
 generic-y += trace_clock.h
+generic-y += preempt.h
diff --git a/arch/openrisc/include/asm/Kbuild b/arch/openrisc/include/asm/Kbuild
index 195653e851da..78405625e799 100644
--- a/arch/openrisc/include/asm/Kbuild
+++ b/arch/openrisc/include/asm/Kbuild
@@ -67,3 +67,4 @@ generic-y += ucontext.h
 generic-y += user.h
 generic-y += word-at-a-time.h
 generic-y += xor.h
+generic-y += preempt.h
diff --git a/arch/parisc/include/asm/Kbuild b/arch/parisc/include/asm/Kbuild
index ff4c9faed546..a603b9ebe54c 100644
--- a/arch/parisc/include/asm/Kbuild
+++ b/arch/parisc/include/asm/Kbuild
@@ -4,3 +4,4 @@ generic-y += word-at-a-time.h auxvec.h user.h cputime.h emergency-restart.h \
 	  div64.h irq_regs.h kdebug.h kvm_para.h local64.h local.h param.h \
 	  poll.h xor.h clkdev.h exec.h
 generic-y += trace_clock.h
+generic-y += preempt.h
diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild
index 704e6f10ae80..d8f9d2f18a23 100644
--- a/arch/powerpc/include/asm/Kbuild
+++ b/arch/powerpc/include/asm/Kbuild
@@ -2,4 +2,5 @@
 generic-y += clkdev.h
 generic-y += rwsem.h
 generic-y += trace_clock.h
+generic-y += preempt.h
 generic-y += vtime.h
\ No newline at end of file
diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild
index f313f9cbcf44..7a5288f3479a 100644
--- a/arch/s390/include/asm/Kbuild
+++ b/arch/s390/include/asm/Kbuild
@@ -2,3 +2,4 @@
 
 generic-y += clkdev.h
 generic-y += trace_clock.h
+generic-y += preempt.h
diff --git a/arch/score/include/asm/Kbuild b/arch/score/include/asm/Kbuild
index e1c7bb999b06..f3414ade77a3 100644
--- a/arch/score/include/asm/Kbuild
+++ b/arch/score/include/asm/Kbuild
@@ -4,3 +4,4 @@ header-y +=
 generic-y += clkdev.h
 generic-y += trace_clock.h
 generic-y += xor.h
+generic-y += preempt.h
diff --git a/arch/sh/include/asm/Kbuild b/arch/sh/include/asm/Kbuild
index 280bea9e5e2b..231efbb68108 100644
--- a/arch/sh/include/asm/Kbuild
+++ b/arch/sh/include/asm/Kbuild
@@ -34,3 +34,4 @@ generic-y += termios.h
 generic-y += trace_clock.h
 generic-y += ucontext.h
 generic-y += xor.h
+generic-y += preempt.h
diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild
index 7e4a97fbded4..bf390667657a 100644
--- a/arch/sparc/include/asm/Kbuild
+++ b/arch/sparc/include/asm/Kbuild
@@ -16,3 +16,4 @@ generic-y += serial.h
 generic-y += trace_clock.h
 generic-y += types.h
 generic-y += word-at-a-time.h
+generic-y += preempt.h
diff --git a/arch/tile/include/asm/Kbuild b/arch/tile/include/asm/Kbuild
index 664d6ad23f80..22f3bd147fa7 100644
--- a/arch/tile/include/asm/Kbuild
+++ b/arch/tile/include/asm/Kbuild
@@ -38,3 +38,4 @@ generic-y += termios.h
 generic-y += trace_clock.h
 generic-y += types.h
 generic-y += xor.h
+generic-y += preempt.h
diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild
index b30f34a79882..fdde187e6087 100644
--- a/arch/um/include/asm/Kbuild
+++ b/arch/um/include/asm/Kbuild
@@ -3,3 +3,4 @@ generic-y += hw_irq.h irq_regs.h kdebug.h percpu.h sections.h topology.h xor.h
 generic-y += ftrace.h pci.h io.h param.h delay.h mutex.h current.h exec.h
 generic-y += switch_to.h clkdev.h
 generic-y += trace_clock.h
+generic-y += preempt.h
diff --git a/arch/unicore32/include/asm/Kbuild b/arch/unicore32/include/asm/Kbuild
index 89d8b6c4e39a..00045cbe5c63 100644
--- a/arch/unicore32/include/asm/Kbuild
+++ b/arch/unicore32/include/asm/Kbuild
@@ -60,3 +60,4 @@ generic-y += unaligned.h
 generic-y += user.h
 generic-y += vga.h
 generic-y += xor.h
+generic-y += preempt.h
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild
index 7f669853317a..eca20286a91c 100644
--- a/arch/x86/include/asm/Kbuild
+++ b/arch/x86/include/asm/Kbuild
@@ -5,3 +5,4 @@ genhdr-y += unistd_64.h
 genhdr-y += unistd_x32.h
 
 generic-y += clkdev.h
+generic-y += preempt.h
diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild
index 1b982641ec35..228d6aee3a16 100644
--- a/arch/xtensa/include/asm/Kbuild
+++ b/arch/xtensa/include/asm/Kbuild
@@ -28,3 +28,4 @@ generic-y += termios.h
 generic-y += topology.h
 generic-y += trace_clock.h
 generic-y += xor.h
+generic-y += preempt.h
diff --git a/include/asm-generic/preempt.h b/include/asm-generic/preempt.h
new file mode 100644
index 000000000000..a1fc6590a743
--- /dev/null
+++ b/include/asm-generic/preempt.h
@@ -0,0 +1,54 @@
+#ifndef __ASM_PREEMPT_H
+#define __ASM_PREEMPT_H
+
+#include <linux/thread_info.h>
+
+/*
+ * We mask the PREEMPT_NEED_RESCHED bit so as not to confuse all current users
+ * that think a non-zero value indicates we cannot preempt.
+ */
+static __always_inline int preempt_count(void)
+{
+	return current_thread_info()->preempt_count & ~PREEMPT_NEED_RESCHED;
+}
+
+static __always_inline int *preempt_count_ptr(void)
+{
+	return &current_thread_info()->preempt_count;
+}
+
+/*
+ * We now loose PREEMPT_NEED_RESCHED and cause an extra reschedule; however the
+ * alternative is loosing a reschedule. Better schedule too often -- also this
+ * should be a very rare operation.
+ */
+static __always_inline void preempt_count_set(int pc)
+{
+	*preempt_count_ptr() = pc;
+}
+
+/*
+ * We fold the NEED_RESCHED bit into the preempt count such that
+ * preempt_enable() can decrement and test for needing to reschedule with a
+ * single instruction.
+ *
+ * We invert the actual bit, so that when the decrement hits 0 we know we both
+ * need to resched (the bit is cleared) and can resched (no preempt count).
+ */
+
+static __always_inline void set_preempt_need_resched(void)
+{
+	*preempt_count_ptr() &= ~PREEMPT_NEED_RESCHED;
+}
+
+static __always_inline void clear_preempt_need_resched(void)
+{
+	*preempt_count_ptr() |= PREEMPT_NEED_RESCHED;
+}
+
+static __always_inline bool test_preempt_need_resched(void)
+{
+	return !(*preempt_count_ptr() & PREEMPT_NEED_RESCHED);
+}
+
+#endif /* __ASM_PREEMPT_H */
diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index 92e341853e4b..df8e245e8729 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -6,7 +6,6 @@
  * preempt_count (used for kernel preemption, interrupt count, etc.)
  */
 
-#include <linux/thread_info.h>
 #include <linux/linkage.h>
 #include <linux/list.h>
 
@@ -16,53 +15,7 @@
  */
 #define PREEMPT_NEED_RESCHED	0x80000000
 
-/*
- * We mask the PREEMPT_NEED_RESCHED bit so as not to confuse all current users
- * that think a non-zero value indicates we cannot preempt.
- */
-static __always_inline int preempt_count(void)
-{
-	return current_thread_info()->preempt_count & ~PREEMPT_NEED_RESCHED;
-}
-
-static __always_inline int *preempt_count_ptr(void)
-{
-	return &current_thread_info()->preempt_count;
-}
-
-/*
- * We now loose PREEMPT_NEED_RESCHED and cause an extra reschedule; however the
- * alternative is loosing a reschedule. Better schedule too often -- also this
- * should be a very rare operation.
- */
-static __always_inline void preempt_count_set(int pc)
-{
-	*preempt_count_ptr() = pc;
-}
-
-/*
- * We fold the NEED_RESCHED bit into the preempt count such that
- * preempt_enable() can decrement and test for needing to reschedule with a
- * single instruction.
- *
- * We invert the actual bit, so that when the decrement hits 0 we know we both
- * need to resched (the bit is cleared) and can resched (no preempt count).
- */
-
-static __always_inline void set_preempt_need_resched(void)
-{
-	*preempt_count_ptr() &= ~PREEMPT_NEED_RESCHED;
-}
-
-static __always_inline void clear_preempt_need_resched(void)
-{
-	*preempt_count_ptr() |= PREEMPT_NEED_RESCHED;
-}
-
-static __always_inline bool test_preempt_need_resched(void)
-{
-	return !(*preempt_count_ptr() & PREEMPT_NEED_RESCHED);
-}
+#include <asm/preempt.h>
 
 #if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_PREEMPT_TRACER)
   extern void add_preempt_count(int val);
-- 
cgit v1.2.3


From bdb43806589096ac4272fe1307e789846ac08d7c Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 10 Sep 2013 12:15:23 +0200
Subject: sched: Extract the basic add/sub preempt_count modifiers

Rewrite the preempt_count macros in order to extract the 3 basic
preempt_count value modifiers:

  __preempt_count_add()
  __preempt_count_sub()

and the new:

  __preempt_count_dec_and_test()

And since we're at it anyway, replace the unconventional
$op_preempt_count names with the more conventional preempt_count_$op.

Since these basic operators are equivalent to the previous _notrace()
variants, do away with the _notrace() versions.

Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/n/tip-ewbpdbupy9xpsjhg960zwbv8@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/mips/mm/init.c           |   5 +-
 arch/x86/kernel/traps.c       |   4 +-
 include/asm-generic/preempt.h |  35 ++++++++++++++
 include/linux/hardirq.h       |   8 ++--
 include/linux/preempt.h       | 106 +++++++++++++++++++-----------------------
 include/linux/sched.h         |   5 --
 include/linux/uaccess.h       |   8 +---
 kernel/context_tracking.c     |   2 +-
 kernel/sched/core.c           |  29 +++++-------
 kernel/softirq.c              |  14 +++---
 10 files changed, 113 insertions(+), 103 deletions(-)

(limited to 'include/linux')

diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index e205ef598e97..12156176c7ca 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -124,7 +124,7 @@ void *kmap_coherent(struct page *page, unsigned long addr)
 
 	BUG_ON(Page_dcache_dirty(page));
 
-	inc_preempt_count();
+	pagefault_disable();
 	idx = (addr >> PAGE_SHIFT) & (FIX_N_COLOURS - 1);
 #ifdef CONFIG_MIPS_MT_SMTC
 	idx += FIX_N_COLOURS * smp_processor_id() +
@@ -193,8 +193,7 @@ void kunmap_coherent(void)
 	write_c0_entryhi(old_ctx);
 	EXIT_CRITICAL(flags);
 #endif
-	dec_preempt_count();
-	preempt_check_resched();
+	pagefault_enable();
 }
 
 void copy_user_highpage(struct page *to, struct page *from,
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 8c8093b146ca..729aa779ff75 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -88,7 +88,7 @@ static inline void conditional_sti(struct pt_regs *regs)
 
 static inline void preempt_conditional_sti(struct pt_regs *regs)
 {
-	inc_preempt_count();
+	preempt_count_inc();
 	if (regs->flags & X86_EFLAGS_IF)
 		local_irq_enable();
 }
@@ -103,7 +103,7 @@ static inline void preempt_conditional_cli(struct pt_regs *regs)
 {
 	if (regs->flags & X86_EFLAGS_IF)
 		local_irq_disable();
-	dec_preempt_count();
+	preempt_count_dec();
 }
 
 static int __kprobes
diff --git a/include/asm-generic/preempt.h b/include/asm-generic/preempt.h
index 8100b1ec1715..82d958fc3823 100644
--- a/include/asm-generic/preempt.h
+++ b/include/asm-generic/preempt.h
@@ -65,4 +65,39 @@ static __always_inline bool test_preempt_need_resched(void)
 	return !(*preempt_count_ptr() & PREEMPT_NEED_RESCHED);
 }
 
+/*
+ * The various preempt_count add/sub methods
+ */
+
+static __always_inline void __preempt_count_add(int val)
+{
+	*preempt_count_ptr() += val;
+}
+
+static __always_inline void __preempt_count_sub(int val)
+{
+	*preempt_count_ptr() -= val;
+}
+
+static __always_inline bool __preempt_count_dec_and_test(void)
+{
+	return !--*preempt_count_ptr();
+}
+
+/*
+ * Returns true when we need to resched -- even if we can not.
+ */
+static __always_inline bool need_resched(void)
+{
+	return unlikely(test_preempt_need_resched());
+}
+
+/*
+ * Returns true when we need to resched and can (barring IRQ state).
+ */
+static __always_inline bool should_resched(void)
+{
+	return unlikely(!*preempt_count_ptr());
+}
+
 #endif /* __ASM_PREEMPT_H */
diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index 1e041063b226..d9cf963ac832 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -33,7 +33,7 @@ extern void rcu_nmi_exit(void);
 #define __irq_enter()					\
 	do {						\
 		account_irq_enter_time(current);	\
-		add_preempt_count(HARDIRQ_OFFSET);	\
+		preempt_count_add(HARDIRQ_OFFSET);	\
 		trace_hardirq_enter();			\
 	} while (0)
 
@@ -49,7 +49,7 @@ extern void irq_enter(void);
 	do {						\
 		trace_hardirq_exit();			\
 		account_irq_exit_time(current);		\
-		sub_preempt_count(HARDIRQ_OFFSET);	\
+		preempt_count_sub(HARDIRQ_OFFSET);	\
 	} while (0)
 
 /*
@@ -62,7 +62,7 @@ extern void irq_exit(void);
 		lockdep_off();					\
 		ftrace_nmi_enter();				\
 		BUG_ON(in_nmi());				\
-		add_preempt_count(NMI_OFFSET + HARDIRQ_OFFSET);	\
+		preempt_count_add(NMI_OFFSET + HARDIRQ_OFFSET);	\
 		rcu_nmi_enter();				\
 		trace_hardirq_enter();				\
 	} while (0)
@@ -72,7 +72,7 @@ extern void irq_exit(void);
 		trace_hardirq_exit();				\
 		rcu_nmi_exit();					\
 		BUG_ON(!in_nmi());				\
-		sub_preempt_count(NMI_OFFSET + HARDIRQ_OFFSET);	\
+		preempt_count_sub(NMI_OFFSET + HARDIRQ_OFFSET);	\
 		ftrace_nmi_exit();				\
 		lockdep_on();					\
 	} while (0)
diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index df8e245e8729..2343d8715299 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -18,97 +18,86 @@
 #include <asm/preempt.h>
 
 #if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_PREEMPT_TRACER)
-  extern void add_preempt_count(int val);
-  extern void sub_preempt_count(int val);
+extern void preempt_count_add(int val);
+extern void preempt_count_sub(int val);
+#define preempt_count_dec_and_test() ({ preempt_count_sub(1); should_resched(); })
 #else
-# define add_preempt_count(val)	do { *preempt_count_ptr() += (val); } while (0)
-# define sub_preempt_count(val)	do { *preempt_count_ptr() -= (val); } while (0)
+#define preempt_count_add(val)	__preempt_count_add(val)
+#define preempt_count_sub(val)	__preempt_count_sub(val)
+#define preempt_count_dec_and_test() __preempt_count_dec_and_test()
 #endif
 
-#define inc_preempt_count() add_preempt_count(1)
-#define dec_preempt_count() sub_preempt_count(1)
-
-#ifdef CONFIG_PREEMPT
-
-asmlinkage void preempt_schedule(void);
-
-#define preempt_check_resched() \
-do { \
-	if (unlikely(!*preempt_count_ptr())) \
-		preempt_schedule(); \
-} while (0)
-
-#ifdef CONFIG_CONTEXT_TRACKING
-
-void preempt_schedule_context(void);
-
-#define preempt_check_resched_context() \
-do { \
-	if (unlikely(!*preempt_count_ptr())) \
-		preempt_schedule_context(); \
-} while (0)
-#else
-
-#define preempt_check_resched_context() preempt_check_resched()
-
-#endif /* CONFIG_CONTEXT_TRACKING */
-
-#else /* !CONFIG_PREEMPT */
-
-#define preempt_check_resched()		do { } while (0)
-#define preempt_check_resched_context()	do { } while (0)
-
-#endif /* CONFIG_PREEMPT */
+#define __preempt_count_inc() __preempt_count_add(1)
+#define __preempt_count_dec() __preempt_count_sub(1)
 
+#define preempt_count_inc() preempt_count_add(1)
+#define preempt_count_dec() preempt_count_sub(1)
 
 #ifdef CONFIG_PREEMPT_COUNT
 
 #define preempt_disable() \
 do { \
-	inc_preempt_count(); \
+	preempt_count_inc(); \
 	barrier(); \
 } while (0)
 
 #define sched_preempt_enable_no_resched() \
 do { \
 	barrier(); \
-	dec_preempt_count(); \
+	preempt_count_dec(); \
 } while (0)
 
-#define preempt_enable_no_resched()	sched_preempt_enable_no_resched()
+#define preempt_enable_no_resched() sched_preempt_enable_no_resched()
 
+#ifdef CONFIG_PREEMPT
+asmlinkage void preempt_schedule(void);
 #define preempt_enable() \
 do { \
-	preempt_enable_no_resched(); \
-	preempt_check_resched(); \
+	barrier(); \
+	if (unlikely(preempt_count_dec_and_test())) \
+		preempt_schedule(); \
 } while (0)
 
-/* For debugging and tracer internals only! */
-#define add_preempt_count_notrace(val)			\
-	do { *preempt_count_ptr() += (val); } while (0)
-#define sub_preempt_count_notrace(val)			\
-	do { *preempt_count_ptr() -= (val); } while (0)
-#define inc_preempt_count_notrace() add_preempt_count_notrace(1)
-#define dec_preempt_count_notrace() sub_preempt_count_notrace(1)
+#define preempt_check_resched() \
+do { \
+	if (should_resched()) \
+		preempt_schedule(); \
+} while (0)
+
+#else
+#define preempt_enable() preempt_enable_no_resched()
+#define preempt_check_resched() do { } while (0)
+#endif
 
 #define preempt_disable_notrace() \
 do { \
-	inc_preempt_count_notrace(); \
+	__preempt_count_inc(); \
 	barrier(); \
 } while (0)
 
 #define preempt_enable_no_resched_notrace() \
 do { \
 	barrier(); \
-	dec_preempt_count_notrace(); \
+	__preempt_count_dec(); \
 } while (0)
 
-/* preempt_check_resched is OK to trace */
+#ifdef CONFIG_PREEMPT
+
+#ifdef CONFIG_CONTEXT_TRACKING
+asmlinkage void preempt_schedule_context(void);
+#else
+#define preempt_schedule_context() preempt_schedule()
+#endif
+
 #define preempt_enable_notrace() \
 do { \
-	preempt_enable_no_resched_notrace(); \
-	preempt_check_resched_context(); \
+	barrier(); \
+	if (unlikely(__preempt_count_dec_and_test())) \
+		preempt_schedule_context(); \
 } while (0)
+#else
+#define preempt_enable_notrace() preempt_enable_no_resched_notrace()
+#endif
 
 #else /* !CONFIG_PREEMPT_COUNT */
 
@@ -118,10 +107,11 @@ do { \
  * that can cause faults and scheduling migrate into our preempt-protected
  * region.
  */
-#define preempt_disable()		barrier()
+#define preempt_disable()			barrier()
 #define sched_preempt_enable_no_resched()	barrier()
-#define preempt_enable_no_resched()	barrier()
-#define preempt_enable()		barrier()
+#define preempt_enable_no_resched()		barrier()
+#define preempt_enable()			barrier()
+#define preempt_check_resched()			do { } while (0)
 
 #define preempt_disable_notrace()		barrier()
 #define preempt_enable_no_resched_notrace()	barrier()
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 9fa151fb968e..06ac17c7e639 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2409,11 +2409,6 @@ static inline int signal_pending_state(long state, struct task_struct *p)
 	return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p);
 }
 
-static inline int need_resched(void)
-{
-	return unlikely(test_preempt_need_resched());
-}
-
 /*
  * cond_resched() and cond_resched_lock(): latency reduction via
  * explicit rescheduling in places that are safe. The return
diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index 5ca0951e1855..9d8cf056e661 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -15,7 +15,7 @@
  */
 static inline void pagefault_disable(void)
 {
-	inc_preempt_count();
+	preempt_count_inc();
 	/*
 	 * make sure to have issued the store before a pagefault
 	 * can hit.
@@ -30,11 +30,7 @@ static inline void pagefault_enable(void)
 	 * the pagefault handler again.
 	 */
 	barrier();
-	dec_preempt_count();
-	/*
-	 * make sure we do..
-	 */
-	barrier();
+	preempt_count_dec();
 	preempt_check_resched();
 }
 
diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c
index 247091bf0587..013161f1c807 100644
--- a/kernel/context_tracking.c
+++ b/kernel/context_tracking.c
@@ -111,7 +111,7 @@ void context_tracking_user_enter(void)
  * instead of preempt_schedule() to exit user context if needed before
  * calling the scheduler.
  */
-void __sched notrace preempt_schedule_context(void)
+asmlinkage void __sched notrace preempt_schedule_context(void)
 {
 	enum ctx_state prev_ctx;
 
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 0ba4e4192390..9c84a9ab1892 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2219,7 +2219,7 @@ notrace unsigned long get_parent_ip(unsigned long addr)
 #if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \
 				defined(CONFIG_PREEMPT_TRACER))
 
-void __kprobes add_preempt_count(int val)
+void __kprobes preempt_count_add(int val)
 {
 #ifdef CONFIG_DEBUG_PREEMPT
 	/*
@@ -2228,7 +2228,7 @@ void __kprobes add_preempt_count(int val)
 	if (DEBUG_LOCKS_WARN_ON((preempt_count() < 0)))
 		return;
 #endif
-	add_preempt_count_notrace(val);
+	__preempt_count_add(val);
 #ifdef CONFIG_DEBUG_PREEMPT
 	/*
 	 * Spinlock count overflowing soon?
@@ -2239,9 +2239,9 @@ void __kprobes add_preempt_count(int val)
 	if (preempt_count() == val)
 		trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
 }
-EXPORT_SYMBOL(add_preempt_count);
+EXPORT_SYMBOL(preempt_count_add);
 
-void __kprobes sub_preempt_count(int val)
+void __kprobes preempt_count_sub(int val)
 {
 #ifdef CONFIG_DEBUG_PREEMPT
 	/*
@@ -2259,9 +2259,9 @@ void __kprobes sub_preempt_count(int val)
 
 	if (preempt_count() == val)
 		trace_preempt_on(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
-	sub_preempt_count_notrace(val);
+	__preempt_count_sub(val);
 }
-EXPORT_SYMBOL(sub_preempt_count);
+EXPORT_SYMBOL(preempt_count_sub);
 
 #endif
 
@@ -2525,9 +2525,9 @@ asmlinkage void __sched notrace preempt_schedule(void)
 		return;
 
 	do {
-		add_preempt_count_notrace(PREEMPT_ACTIVE);
+		__preempt_count_add(PREEMPT_ACTIVE);
 		__schedule();
-		sub_preempt_count_notrace(PREEMPT_ACTIVE);
+		__preempt_count_sub(PREEMPT_ACTIVE);
 
 		/*
 		 * Check again in case we missed a preemption opportunity
@@ -2554,11 +2554,11 @@ asmlinkage void __sched preempt_schedule_irq(void)
 	prev_state = exception_enter();
 
 	do {
-		add_preempt_count(PREEMPT_ACTIVE);
+		__preempt_count_add(PREEMPT_ACTIVE);
 		local_irq_enable();
 		__schedule();
 		local_irq_disable();
-		sub_preempt_count(PREEMPT_ACTIVE);
+		__preempt_count_sub(PREEMPT_ACTIVE);
 
 		/*
 		 * Check again in case we missed a preemption opportunity
@@ -3798,16 +3798,11 @@ SYSCALL_DEFINE0(sched_yield)
 	return 0;
 }
 
-static inline int should_resched(void)
-{
-	return need_resched() && !(preempt_count() & PREEMPT_ACTIVE);
-}
-
 static void __cond_resched(void)
 {
-	add_preempt_count(PREEMPT_ACTIVE);
+	__preempt_count_add(PREEMPT_ACTIVE);
 	__schedule();
-	sub_preempt_count(PREEMPT_ACTIVE);
+	__preempt_count_sub(PREEMPT_ACTIVE);
 }
 
 int __sched _cond_resched(void)
diff --git a/kernel/softirq.c b/kernel/softirq.c
index a90de70cf1f3..3e88612fc87e 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -100,13 +100,13 @@ static void __local_bh_disable(unsigned long ip, unsigned int cnt)
 
 	raw_local_irq_save(flags);
 	/*
-	 * The preempt tracer hooks into add_preempt_count and will break
+	 * The preempt tracer hooks into preempt_count_add and will break
 	 * lockdep because it calls back into lockdep after SOFTIRQ_OFFSET
 	 * is set and before current->softirq_enabled is cleared.
 	 * We must manually increment preempt_count here and manually
 	 * call the trace_preempt_off later.
 	 */
-	add_preempt_count_notrace(cnt);
+	__preempt_count_add(cnt);
 	/*
 	 * Were softirqs turned off above:
 	 */
@@ -120,7 +120,7 @@ static void __local_bh_disable(unsigned long ip, unsigned int cnt)
 #else /* !CONFIG_TRACE_IRQFLAGS */
 static inline void __local_bh_disable(unsigned long ip, unsigned int cnt)
 {
-	add_preempt_count(cnt);
+	preempt_count_add(cnt);
 	barrier();
 }
 #endif /* CONFIG_TRACE_IRQFLAGS */
@@ -139,7 +139,7 @@ static void __local_bh_enable(unsigned int cnt)
 
 	if (softirq_count() == cnt)
 		trace_softirqs_on(_RET_IP_);
-	sub_preempt_count(cnt);
+	preempt_count_sub(cnt);
 }
 
 /*
@@ -169,12 +169,12 @@ static inline void _local_bh_enable_ip(unsigned long ip)
 	 * Keep preemption disabled until we are done with
 	 * softirq processing:
  	 */
-	sub_preempt_count(SOFTIRQ_DISABLE_OFFSET - 1);
+	preempt_count_sub(SOFTIRQ_DISABLE_OFFSET - 1);
 
 	if (unlikely(!in_interrupt() && local_softirq_pending()))
 		do_softirq();
 
-	dec_preempt_count();
+	preempt_count_dec();
 #ifdef CONFIG_TRACE_IRQFLAGS
 	local_irq_enable();
 #endif
@@ -360,7 +360,7 @@ void irq_exit(void)
 
 	account_irq_exit_time(current);
 	trace_hardirq_exit();
-	sub_preempt_count(HARDIRQ_OFFSET);
+	preempt_count_sub(HARDIRQ_OFFSET);
 	if (!in_interrupt() && local_softirq_pending())
 		invoke_softirq();
 
-- 
cgit v1.2.3


From a233f1120c37724938f7201fe2353b2577adaaf9 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 23 Sep 2013 19:04:26 +0200
Subject: sched: Prepare for per-cpu preempt_count

When using per-cpu preempt_count variables we need to save/restore the
preempt_count on context switch (into per task storage; for instance
the old thread_info::preempt_count variable) because of
PREEMPT_ACTIVE.

However, this means that on fork() the preempt_count value of the last
context switch gets copied and if we had a PREEMPT_ACTIVE switch right
before cloning a child task the child task will now too have
PREEMPT_ACTIVE set and start its life with an extra PREEMPT_ACTIVE
count.

Therefore we need to make init_task_preempt_count() unconditional;
this resets whatever preempt_count we inherited from our parent
process.

Doing so for !per-cpu implementations is harmless.

For !PREEMPT_COUNT kernels we need to be careful not to start life
with an increased preempt_count.

Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/n/tip-4k0b7oy1rcdyzochwiixuwi9@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 12 +++++++++---
 kernel/sched/core.c   |  2 --
 2 files changed, 9 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 06ac17c7e639..b09798b672f3 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -428,6 +428,14 @@ struct task_cputime {
 		.sum_exec_runtime = 0,				\
 	}
 
+#define PREEMPT_ENABLED		(PREEMPT_NEED_RESCHED)
+
+#ifdef CONFIG_PREEMPT_COUNT
+#define PREEMPT_DISABLED	(1 + PREEMPT_ENABLED)
+#else
+#define PREEMPT_DISABLED	PREEMPT_ENABLED
+#endif
+
 /*
  * Disable preemption until the scheduler is running.
  * Reset by start_kernel()->sched_init()->init_idle().
@@ -435,9 +443,7 @@ struct task_cputime {
  * We include PREEMPT_ACTIVE to avoid cond_resched() from working
  * before the scheduler is active -- see should_resched().
  */
-#define INIT_PREEMPT_COUNT	(1 + PREEMPT_ACTIVE + PREEMPT_NEED_RESCHED)
-#define PREEMPT_ENABLED		(PREEMPT_NEED_RESCHED)
-#define PREEMPT_DISABLED	(1 + PREEMPT_NEED_RESCHED)
+#define INIT_PREEMPT_COUNT	(PREEMPT_DISABLED + PREEMPT_ACTIVE)
 
 /**
  * struct thread_group_cputimer - thread group interval timer counts
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 9c84a9ab1892..f575d5bd7e7a 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1722,9 +1722,7 @@ void sched_fork(struct task_struct *p)
 #if defined(CONFIG_SMP)
 	p->on_cpu = 0;
 #endif
-#ifdef CONFIG_PREEMPT_COUNT
 	init_task_preempt_count(p);
-#endif
 #ifdef CONFIG_SMP
 	plist_node_init(&p->pushable_tasks, MAX_PRIO);
 #endif
-- 
cgit v1.2.3


From 1a338ac32ca630f67df25b4a16436cccc314e997 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 14 Aug 2013 14:51:00 +0200
Subject: sched, x86: Optimize the preempt_schedule() call

Remove the bloat of the C calling convention out of the
preempt_enable() sites by creating an ASM wrapper which allows us to
do an asm("call ___preempt_schedule") instead.

calling.h bits by Andi Kleen

Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/n/tip-tk7xdi1cvvxewixzke8t8le1@git.kernel.org
[ Fixed build error. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/calling.h   | 50 ++++++++++++++++++++++++++++++++++++++++
 arch/x86/include/asm/preempt.h   | 10 ++++++++
 arch/x86/kernel/Makefile         |  2 ++
 arch/x86/kernel/i386_ksyms_32.c  |  7 ++++++
 arch/x86/kernel/preempt.S        | 25 ++++++++++++++++++++
 arch/x86/kernel/x8664_ksyms_64.c |  7 ++++++
 include/asm-generic/preempt.h    | 10 ++++++++
 include/linux/preempt.h          | 13 ++++-------
 8 files changed, 116 insertions(+), 8 deletions(-)
 create mode 100644 arch/x86/kernel/preempt.S

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/calling.h b/arch/x86/include/asm/calling.h
index 0fa675033912..cb4c73bfeb48 100644
--- a/arch/x86/include/asm/calling.h
+++ b/arch/x86/include/asm/calling.h
@@ -48,6 +48,8 @@ For 32-bit we have the following conventions - kernel is built with
 
 #include <asm/dwarf2.h>
 
+#ifdef CONFIG_X86_64
+
 /*
  * 64-bit system call stack frame layout defines and helpers,
  * for assembly code:
@@ -192,3 +194,51 @@ For 32-bit we have the following conventions - kernel is built with
 	.macro icebp
 	.byte 0xf1
 	.endm
+
+#else /* CONFIG_X86_64 */
+
+/*
+ * For 32bit only simplified versions of SAVE_ALL/RESTORE_ALL. These
+ * are different from the entry_32.S versions in not changing the segment
+ * registers. So only suitable for in kernel use, not when transitioning
+ * from or to user space. The resulting stack frame is not a standard
+ * pt_regs frame. The main use case is calling C code from assembler
+ * when all the registers need to be preserved.
+ */
+
+	.macro SAVE_ALL
+	pushl_cfi %eax
+	CFI_REL_OFFSET eax, 0
+	pushl_cfi %ebp
+	CFI_REL_OFFSET ebp, 0
+	pushl_cfi %edi
+	CFI_REL_OFFSET edi, 0
+	pushl_cfi %esi
+	CFI_REL_OFFSET esi, 0
+	pushl_cfi %edx
+	CFI_REL_OFFSET edx, 0
+	pushl_cfi %ecx
+	CFI_REL_OFFSET ecx, 0
+	pushl_cfi %ebx
+	CFI_REL_OFFSET ebx, 0
+	.endm
+
+	.macro RESTORE_ALL
+	popl_cfi %ebx
+	CFI_RESTORE ebx
+	popl_cfi %ecx
+	CFI_RESTORE ecx
+	popl_cfi %edx
+	CFI_RESTORE edx
+	popl_cfi %esi
+	CFI_RESTORE esi
+	popl_cfi %edi
+	CFI_RESTORE edi
+	popl_cfi %ebp
+	CFI_RESTORE ebp
+	popl_cfi %eax
+	CFI_RESTORE eax
+	.endm
+
+#endif /* CONFIG_X86_64 */
+
diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h
index 1309942b95e5..1de41690ff99 100644
--- a/arch/x86/include/asm/preempt.h
+++ b/arch/x86/include/asm/preempt.h
@@ -95,4 +95,14 @@ static __always_inline bool should_resched(void)
 	return unlikely(!__this_cpu_read_4(__preempt_count));
 }
 
+#ifdef CONFIG_PREEMPT
+  extern asmlinkage void ___preempt_schedule(void);
+# define __preempt_schedule() asm ("call ___preempt_schedule")
+  extern asmlinkage void preempt_schedule(void);
+# ifdef CONFIG_CONTEXT_TRACKING
+    extern asmlinkage void ___preempt_schedule_context(void);
+#   define __preempt_schedule_context() asm ("call ___preempt_schedule_context")
+# endif
+#endif
+
 #endif /* __ASM_PREEMPT_H */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index a5408b965c9d..9b0a34e2cd79 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -36,6 +36,8 @@ obj-y			+= tsc.o io_delay.o rtc.o
 obj-y			+= pci-iommu_table.o
 obj-y			+= resource.o
 
+obj-$(CONFIG_PREEMPT)	+= preempt.o
+
 obj-y				+= process.o
 obj-y				+= i387.o xsave.o
 obj-y				+= ptrace.o
diff --git a/arch/x86/kernel/i386_ksyms_32.c b/arch/x86/kernel/i386_ksyms_32.c
index 0fa69127209a..05fd74f537d6 100644
--- a/arch/x86/kernel/i386_ksyms_32.c
+++ b/arch/x86/kernel/i386_ksyms_32.c
@@ -37,3 +37,10 @@ EXPORT_SYMBOL(strstr);
 
 EXPORT_SYMBOL(csum_partial);
 EXPORT_SYMBOL(empty_zero_page);
+
+#ifdef CONFIG_PREEMPT
+EXPORT_SYMBOL(___preempt_schedule);
+#ifdef CONFIG_CONTEXT_TRACKING
+EXPORT_SYMBOL(___preempt_schedule_context);
+#endif
+#endif
diff --git a/arch/x86/kernel/preempt.S b/arch/x86/kernel/preempt.S
new file mode 100644
index 000000000000..ca7f0d58a87d
--- /dev/null
+++ b/arch/x86/kernel/preempt.S
@@ -0,0 +1,25 @@
+
+#include <linux/linkage.h>
+#include <asm/dwarf2.h>
+#include <asm/asm.h>
+#include <asm/calling.h>
+
+ENTRY(___preempt_schedule)
+	CFI_STARTPROC
+	SAVE_ALL
+	call preempt_schedule
+	RESTORE_ALL
+	ret
+	CFI_ENDPROC
+
+#ifdef CONFIG_CONTEXT_TRACKING
+
+ENTRY(___preempt_schedule_context)
+	CFI_STARTPROC
+	SAVE_ALL
+	call preempt_schedule_context
+	RESTORE_ALL
+	ret
+	CFI_ENDPROC
+
+#endif
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
index b014d9414d08..040681928e9d 100644
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c
@@ -66,3 +66,10 @@ EXPORT_SYMBOL(empty_zero_page);
 #ifndef CONFIG_PARAVIRT
 EXPORT_SYMBOL(native_load_gs_index);
 #endif
+
+#ifdef CONFIG_PREEMPT
+EXPORT_SYMBOL(___preempt_schedule);
+#ifdef CONFIG_CONTEXT_TRACKING
+EXPORT_SYMBOL(___preempt_schedule_context);
+#endif
+#endif
diff --git a/include/asm-generic/preempt.h b/include/asm-generic/preempt.h
index 82d958fc3823..5dc14ed3791c 100644
--- a/include/asm-generic/preempt.h
+++ b/include/asm-generic/preempt.h
@@ -100,4 +100,14 @@ static __always_inline bool should_resched(void)
 	return unlikely(!*preempt_count_ptr());
 }
 
+#ifdef CONFIG_PREEMPT
+extern asmlinkage void preempt_schedule(void);
+#define __preempt_schedule() preempt_schedule()
+
+#ifdef CONFIG_CONTEXT_TRACKING
+extern asmlinkage void preempt_schedule_context(void);
+#define __preempt_schedule_context() preempt_schedule_context()
+#endif
+#endif /* CONFIG_PREEMPT */
+
 #endif /* __ASM_PREEMPT_H */
diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index 2343d8715299..a3d9dc8c2c00 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -50,18 +50,17 @@ do { \
 #define preempt_enable_no_resched() sched_preempt_enable_no_resched()
 
 #ifdef CONFIG_PREEMPT
-asmlinkage void preempt_schedule(void);
 #define preempt_enable() \
 do { \
 	barrier(); \
 	if (unlikely(preempt_count_dec_and_test())) \
-		preempt_schedule(); \
+		__preempt_schedule(); \
 } while (0)
 
 #define preempt_check_resched() \
 do { \
 	if (should_resched()) \
-		preempt_schedule(); \
+		__preempt_schedule(); \
 } while (0)
 
 #else
@@ -83,17 +82,15 @@ do { \
 
 #ifdef CONFIG_PREEMPT
 
-#ifdef CONFIG_CONTEXT_TRACKING
-asmlinkage void preempt_schedule_context(void);
-#else
-#define preempt_schedule_context() preempt_schedule()
+#ifndef CONFIG_CONTEXT_TRACKING
+#define __preempt_schedule_context() __preempt_schedule()
 #endif
 
 #define preempt_enable_notrace() \
 do { \
 	barrier(); \
 	if (unlikely(__preempt_count_dec_and_test())) \
-		preempt_schedule_context(); \
+		__preempt_schedule_context(); \
 } while (0)
 #else
 #define preempt_enable_notrace() preempt_enable_no_resched_notrace()
-- 
cgit v1.2.3


From fd792f8fbcfa95674b6c417429f576ad1d808086 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@linaro.org>
Date: Mon, 23 Sep 2013 19:14:32 +0100
Subject: mfd: mc13xxx: Move SPI erratum workaround into SPI I/O function

Move the workaround for double sending AUDIO_CODEC and AUDIO_DAC writes
into the SPI core, aiding refactoring to eliminate the ASoC custom I/O
functions and avoiding the extra writes for I2C.

Signed-off-by: Mark Brown <broonie@linaro.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/mc13xxx-spi.c   | 5 +++++
 include/linux/mfd/mc13xxx.h | 7 +++++++
 sound/soc/codecs/mc13783.c  | 4 ----
 3 files changed, 12 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/mc13xxx-spi.c b/drivers/mfd/mc13xxx-spi.c
index 77189daadf1e..5f14ef6693c2 100644
--- a/drivers/mfd/mc13xxx-spi.c
+++ b/drivers/mfd/mc13xxx-spi.c
@@ -94,10 +94,15 @@ static int mc13xxx_spi_write(void *context, const void *data, size_t count)
 {
 	struct device *dev = context;
 	struct spi_device *spi = to_spi_device(dev);
+	const char *reg = data;
 
 	if (count != 4)
 		return -ENOTSUPP;
 
+	/* include errata fix for spi audio problems */
+	if (*reg == MC13783_AUDIO_CODEC || *reg == MC13783_AUDIO_DAC)
+		spi_write(spi, data, count);
+
 	return spi_write(spi, data, count);
 }
 
diff --git a/include/linux/mfd/mc13xxx.h b/include/linux/mfd/mc13xxx.h
index 41ed59276c00..67c17b5a6f44 100644
--- a/include/linux/mfd/mc13xxx.h
+++ b/include/linux/mfd/mc13xxx.h
@@ -41,6 +41,13 @@ int mc13xxx_adc_do_conversion(struct mc13xxx *mc13xxx,
 		unsigned int mode, unsigned int channel,
 		u8 ato, bool atox, unsigned int *sample);
 
+#define MC13783_AUDIO_RX0	36
+#define MC13783_AUDIO_RX1	37
+#define MC13783_AUDIO_TX	38
+#define MC13783_SSI_NETWORK	39
+#define MC13783_AUDIO_CODEC	40
+#define MC13783_AUDIO_DAC	41
+
 #define MC13XXX_IRQ_ADCDONE	0
 #define MC13XXX_IRQ_ADCBISDONE	1
 #define MC13XXX_IRQ_TS		2
diff --git a/sound/soc/codecs/mc13783.c b/sound/soc/codecs/mc13783.c
index ea141e1d6f28..4d3c8fd8c5db 100644
--- a/sound/soc/codecs/mc13783.c
+++ b/sound/soc/codecs/mc13783.c
@@ -125,10 +125,6 @@ static int mc13783_write(struct snd_soc_codec *codec,
 
 	ret = mc13xxx_reg_write(priv->mc13xxx, reg, value);
 
-	/* include errata fix for spi audio problems */
-	if (reg == MC13783_AUDIO_CODEC || reg == MC13783_AUDIO_DAC)
-		ret = mc13xxx_reg_write(priv->mc13xxx, reg, value);
-
 	mc13xxx_unlock(priv->mc13xxx);
 
 	return ret;
-- 
cgit v1.2.3


From cc6783f788d8fe8b23ec6fc2762f5e8c9a418eee Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Fri, 6 Sep 2013 17:39:49 -0700
Subject: rcu: Is it safe to enter an RCU read-side critical section?

There is currently no way for kernel code to determine whether it
is safe to enter an RCU read-side critical section, in other words,
whether or not RCU is paying attention to the currently running CPU.
Given the large and increasing quantity of code shared by the idle loop
and non-idle code, the this shortcoming is becoming increasingly painful.

This commit therefore adds __rcu_is_watching(), which returns true if
it is safe to enter an RCU read-side critical section on the currently
running CPU.  This function is quite fast, using only a __this_cpu_read().
However, the caller must disable preemption.

Reported-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 include/linux/rcupdate.h |  8 ++++----
 include/linux/rcutiny.h  |  9 +++++++++
 include/linux/rcutree.h  |  2 ++
 kernel/rcutiny.c         |  4 ++--
 kernel/rcutree.c         | 13 +++++++++++++
 5 files changed, 30 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index f1f1bc39346b..a53a21a2808c 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -261,6 +261,10 @@ static inline void rcu_user_hooks_switch(struct task_struct *prev,
 		rcu_irq_exit(); \
 	} while (0)
 
+#if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) || defined(CONFIG_SMP)
+extern int rcu_is_cpu_idle(void);
+#endif /* #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) || defined(CONFIG_SMP) */
+
 /*
  * Infrastructure to implement the synchronize_() primitives in
  * TREE_RCU and rcu_barrier_() primitives in TINY_RCU.
@@ -297,10 +301,6 @@ static inline void destroy_rcu_head_on_stack(struct rcu_head *head)
 }
 #endif	/* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */
 
-#if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SMP)
-extern int rcu_is_cpu_idle(void);
-#endif /* #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SMP) */
-
 #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU)
 bool rcu_lockdep_current_cpu_online(void);
 #else /* #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) */
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index e31005ee339e..bee665964878 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -132,4 +132,13 @@ static inline void rcu_scheduler_starting(void)
 }
 #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
 
+#ifdef CONFIG_RCU_TRACE
+
+static inline bool __rcu_is_watching(void)
+{
+	return !rcu_is_cpu_idle();
+}
+
+#endif /* #ifdef CONFIG_RCU_TRACE */
+
 #endif /* __LINUX_RCUTINY_H */
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 226169d1bd2b..293613dfd2a5 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -90,4 +90,6 @@ extern void exit_rcu(void);
 extern void rcu_scheduler_starting(void);
 extern int rcu_scheduler_active __read_mostly;
 
+extern bool __rcu_is_watching(void);
+
 #endif /* __LINUX_RCUTREE_H */
diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c
index 9ed6075dc562..b4bc61874d77 100644
--- a/kernel/rcutiny.c
+++ b/kernel/rcutiny.c
@@ -174,7 +174,7 @@ void rcu_irq_enter(void)
 }
 EXPORT_SYMBOL_GPL(rcu_irq_enter);
 
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
+#if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE)
 
 /*
  * Test whether RCU thinks that the current CPU is idle.
@@ -185,7 +185,7 @@ int rcu_is_cpu_idle(void)
 }
 EXPORT_SYMBOL(rcu_is_cpu_idle);
 
-#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
+#endif /* defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) */
 
 /*
  * Test whether the current CPU was interrupted from idle.  Nested
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 32618b3fe4e6..910d868808dc 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -671,6 +671,19 @@ int rcu_is_cpu_idle(void)
 }
 EXPORT_SYMBOL(rcu_is_cpu_idle);
 
+/**
+ * __rcu_is_watching - are RCU read-side critical sections safe?
+ *
+ * Return true if RCU is watching the running CPU, which means that
+ * this CPU can safely enter RCU read-side critical sections.  Unlike
+ * rcu_is_cpu_idle(), the caller of __rcu_is_watching() must have at
+ * least disabled preemption.
+ */
+bool __rcu_is_watching(void)
+{
+	return !!(atomic_read(this_cpu_ptr(&rcu_dynticks.dynticks)) & 0x1);
+}
+
 #if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU)
 
 /*
-- 
cgit v1.2.3


From 5c173eb8bcb9c1aa888bd6d14a4cb746f3dd2420 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Fri, 13 Sep 2013 17:20:11 -0700
Subject: rcu: Consistent rcu_is_watching() naming

The old rcu_is_cpu_idle() function is just __rcu_is_watching() with
preemption disabled.  This commit therefore renames rcu_is_cpu_idle()
to rcu_is_watching.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 include/linux/rcupdate.h | 18 +++++++++---------
 include/linux/rcutiny.h  | 16 ++++++++++++----
 include/linux/rcutree.h  |  2 +-
 kernel/lockdep.c         |  4 ++--
 kernel/rcupdate.c        |  2 +-
 kernel/rcutiny.c         |  6 +++---
 kernel/rcutree.c         | 36 ++++++++++++++++++------------------
 7 files changed, 46 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index a53a21a2808c..39cbb889e20d 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -262,7 +262,7 @@ static inline void rcu_user_hooks_switch(struct task_struct *prev,
 	} while (0)
 
 #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) || defined(CONFIG_SMP)
-extern int rcu_is_cpu_idle(void);
+extern bool __rcu_is_watching(void);
 #endif /* #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) || defined(CONFIG_SMP) */
 
 /*
@@ -351,7 +351,7 @@ static inline int rcu_read_lock_held(void)
 {
 	if (!debug_lockdep_rcu_enabled())
 		return 1;
-	if (rcu_is_cpu_idle())
+	if (!rcu_is_watching())
 		return 0;
 	if (!rcu_lockdep_current_cpu_online())
 		return 0;
@@ -402,7 +402,7 @@ static inline int rcu_read_lock_sched_held(void)
 
 	if (!debug_lockdep_rcu_enabled())
 		return 1;
-	if (rcu_is_cpu_idle())
+	if (!rcu_is_watching())
 		return 0;
 	if (!rcu_lockdep_current_cpu_online())
 		return 0;
@@ -771,7 +771,7 @@ static inline void rcu_read_lock(void)
 	__rcu_read_lock();
 	__acquire(RCU);
 	rcu_lock_acquire(&rcu_lock_map);
-	rcu_lockdep_assert(!rcu_is_cpu_idle(),
+	rcu_lockdep_assert(rcu_is_watching(),
 			   "rcu_read_lock() used illegally while idle");
 }
 
@@ -792,7 +792,7 @@ static inline void rcu_read_lock(void)
  */
 static inline void rcu_read_unlock(void)
 {
-	rcu_lockdep_assert(!rcu_is_cpu_idle(),
+	rcu_lockdep_assert(rcu_is_watching(),
 			   "rcu_read_unlock() used illegally while idle");
 	rcu_lock_release(&rcu_lock_map);
 	__release(RCU);
@@ -821,7 +821,7 @@ static inline void rcu_read_lock_bh(void)
 	local_bh_disable();
 	__acquire(RCU_BH);
 	rcu_lock_acquire(&rcu_bh_lock_map);
-	rcu_lockdep_assert(!rcu_is_cpu_idle(),
+	rcu_lockdep_assert(rcu_is_watching(),
 			   "rcu_read_lock_bh() used illegally while idle");
 }
 
@@ -832,7 +832,7 @@ static inline void rcu_read_lock_bh(void)
  */
 static inline void rcu_read_unlock_bh(void)
 {
-	rcu_lockdep_assert(!rcu_is_cpu_idle(),
+	rcu_lockdep_assert(rcu_is_watching(),
 			   "rcu_read_unlock_bh() used illegally while idle");
 	rcu_lock_release(&rcu_bh_lock_map);
 	__release(RCU_BH);
@@ -857,7 +857,7 @@ static inline void rcu_read_lock_sched(void)
 	preempt_disable();
 	__acquire(RCU_SCHED);
 	rcu_lock_acquire(&rcu_sched_lock_map);
-	rcu_lockdep_assert(!rcu_is_cpu_idle(),
+	rcu_lockdep_assert(rcu_is_watching(),
 			   "rcu_read_lock_sched() used illegally while idle");
 }
 
@@ -875,7 +875,7 @@ static inline notrace void rcu_read_lock_sched_notrace(void)
  */
 static inline void rcu_read_unlock_sched(void)
 {
-	rcu_lockdep_assert(!rcu_is_cpu_idle(),
+	rcu_lockdep_assert(rcu_is_watching(),
 			   "rcu_read_unlock_sched() used illegally while idle");
 	rcu_lock_release(&rcu_sched_lock_map);
 	__release(RCU_SCHED);
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index bee665964878..09ebcbe9fd78 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -132,13 +132,21 @@ static inline void rcu_scheduler_starting(void)
 }
 #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
 
-#ifdef CONFIG_RCU_TRACE
+#if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE)
 
-static inline bool __rcu_is_watching(void)
+static inline bool rcu_is_watching(void)
 {
-	return !rcu_is_cpu_idle();
+	return __rcu_is_watching();
 }
 
-#endif /* #ifdef CONFIG_RCU_TRACE */
+#else /* defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) */
+
+static inline bool rcu_is_watching(void)
+{
+	return true;
+}
+
+
+#endif /* #else defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) */
 
 #endif /* __LINUX_RCUTINY_H */
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 293613dfd2a5..4b9c81548742 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -90,6 +90,6 @@ extern void exit_rcu(void);
 extern void rcu_scheduler_starting(void);
 extern int rcu_scheduler_active __read_mostly;
 
-extern bool __rcu_is_watching(void);
+extern bool rcu_is_watching(void);
 
 #endif /* __LINUX_RCUTREE_H */
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index e16c45b9ee77..4e8e14c34e42 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -4224,7 +4224,7 @@ void lockdep_rcu_suspicious(const char *file, const int line, const char *s)
 	printk("\n%srcu_scheduler_active = %d, debug_locks = %d\n",
 	       !rcu_lockdep_current_cpu_online()
 			? "RCU used illegally from offline CPU!\n"
-			: rcu_is_cpu_idle()
+			: !rcu_is_watching()
 				? "RCU used illegally from idle CPU!\n"
 				: "",
 	       rcu_scheduler_active, debug_locks);
@@ -4247,7 +4247,7 @@ void lockdep_rcu_suspicious(const char *file, const int line, const char *s)
 	 * So complain bitterly if someone does call rcu_read_lock(),
 	 * rcu_read_lock_bh() and so on from extended quiescent states.
 	 */
-	if (rcu_is_cpu_idle())
+	if (!rcu_is_watching())
 		printk("RCU used illegally from extended quiescent state!\n");
 
 	lockdep_print_held_locks(curr);
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index b02a339836b4..3b3c0464d1eb 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -148,7 +148,7 @@ int rcu_read_lock_bh_held(void)
 {
 	if (!debug_lockdep_rcu_enabled())
 		return 1;
-	if (rcu_is_cpu_idle())
+	if (!rcu_is_watching())
 		return 0;
 	if (!rcu_lockdep_current_cpu_online())
 		return 0;
diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c
index b4bc61874d77..0fa061dfa55d 100644
--- a/kernel/rcutiny.c
+++ b/kernel/rcutiny.c
@@ -179,11 +179,11 @@ EXPORT_SYMBOL_GPL(rcu_irq_enter);
 /*
  * Test whether RCU thinks that the current CPU is idle.
  */
-int rcu_is_cpu_idle(void)
+bool __rcu_is_watching(void)
 {
-	return !rcu_dynticks_nesting;
+	return rcu_dynticks_nesting;
 }
-EXPORT_SYMBOL(rcu_is_cpu_idle);
+EXPORT_SYMBOL(__rcu_is_watching);
 
 #endif /* defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) */
 
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 1b123e179d71..981d0c15a389 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -655,34 +655,34 @@ void rcu_nmi_exit(void)
 }
 
 /**
- * rcu_is_cpu_idle - see if RCU thinks that the current CPU is idle
+ * __rcu_is_watching - are RCU read-side critical sections safe?
+ *
+ * Return true if RCU is watching the running CPU, which means that
+ * this CPU can safely enter RCU read-side critical sections.  Unlike
+ * rcu_is_watching(), the caller of __rcu_is_watching() must have at
+ * least disabled preemption.
+ */
+bool __rcu_is_watching(void)
+{
+	return atomic_read(this_cpu_ptr(&rcu_dynticks.dynticks)) & 0x1;
+}
+
+/**
+ * rcu_is_watching - see if RCU thinks that the current CPU is idle
  *
  * If the current CPU is in its idle loop and is neither in an interrupt
  * or NMI handler, return true.
  */
-int rcu_is_cpu_idle(void)
+bool rcu_is_watching(void)
 {
 	int ret;
 
 	preempt_disable();
-	ret = (atomic_read(&__get_cpu_var(rcu_dynticks).dynticks) & 0x1) == 0;
+	ret = __rcu_is_watching();
 	preempt_enable();
 	return ret;
 }
-EXPORT_SYMBOL_GPL(rcu_is_cpu_idle);
-
-/**
- * __rcu_is_watching - are RCU read-side critical sections safe?
- *
- * Return true if RCU is watching the running CPU, which means that
- * this CPU can safely enter RCU read-side critical sections.  Unlike
- * rcu_is_cpu_idle(), the caller of __rcu_is_watching() must have at
- * least disabled preemption.
- */
-bool __rcu_is_watching(void)
-{
-	return !!(atomic_read(this_cpu_ptr(&rcu_dynticks.dynticks)) & 0x1);
-}
+EXPORT_SYMBOL_GPL(rcu_is_watching);
 
 #if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU)
 
@@ -2268,7 +2268,7 @@ static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp,
 	 * If called from an extended quiescent state, invoke the RCU
 	 * core in order to force a re-evaluation of RCU's idleness.
 	 */
-	if (rcu_is_cpu_idle() && cpu_online(smp_processor_id()))
+	if (!rcu_is_watching() && cpu_online(smp_processor_id()))
 		invoke_rcu_core();
 
 	/* If interrupts were disabled or CPU offline, don't invoke RCU core. */
-- 
cgit v1.2.3


From 008643b86c5f33c115c84ccdda1725cac3ad50ad Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 30 Aug 2013 16:07:37 +0100
Subject: KEYS: Add a 'trusted' flag and a 'trusted only' flag

Add KEY_FLAG_TRUSTED to indicate that a key either comes from a trusted source
or had a cryptographic signature chain that led back to a trusted key the
kernel already possessed.

Add KEY_FLAGS_TRUSTED_ONLY to indicate that a keyring will only accept links to
keys marked with KEY_FLAGS_TRUSTED.

Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
---
 include/linux/key-type.h | 1 +
 include/linux/key.h      | 3 +++
 kernel/system_keyring.c  | 4 +++-
 security/keys/key.c      | 8 ++++++++
 security/keys/keyring.c  | 4 ++++
 5 files changed, 19 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/key-type.h b/include/linux/key-type.h
index f58737bcb050..a74c3a84dfdd 100644
--- a/include/linux/key-type.h
+++ b/include/linux/key-type.h
@@ -45,6 +45,7 @@ struct key_preparsed_payload {
 	const void	*data;		/* Raw data */
 	size_t		datalen;	/* Raw datalen */
 	size_t		quotalen;	/* Quota length for proposed payload */
+	bool		trusted;	/* True if key is trusted */
 };
 
 typedef int (*request_key_actor_t)(struct key_construction *key,
diff --git a/include/linux/key.h b/include/linux/key.h
index 010dbb618aca..80d677483e31 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -168,6 +168,8 @@ struct key {
 #define KEY_FLAG_NEGATIVE	5	/* set if key is negative */
 #define KEY_FLAG_ROOT_CAN_CLEAR	6	/* set if key can be cleared by root without permission */
 #define KEY_FLAG_INVALIDATED	7	/* set if key has been invalidated */
+#define KEY_FLAG_TRUSTED	8	/* set if key is trusted */
+#define KEY_FLAG_TRUSTED_ONLY	9	/* set if keyring only accepts links to trusted keys */
 
 	/* the key type and key description string
 	 * - the desc is used to match a key against search criteria
@@ -218,6 +220,7 @@ extern struct key *key_alloc(struct key_type *type,
 #define KEY_ALLOC_IN_QUOTA	0x0000	/* add to quota, reject if would overrun */
 #define KEY_ALLOC_QUOTA_OVERRUN	0x0001	/* add to quota, permit even if overrun */
 #define KEY_ALLOC_NOT_IN_QUOTA	0x0002	/* not in quota */
+#define KEY_ALLOC_TRUSTED	0x0004	/* Key should be flagged as trusted */
 
 extern void key_revoke(struct key *key);
 extern void key_invalidate(struct key *key);
diff --git a/kernel/system_keyring.c b/kernel/system_keyring.c
index 51c35141a13a..5296721eca5b 100644
--- a/kernel/system_keyring.c
+++ b/kernel/system_keyring.c
@@ -40,6 +40,7 @@ static __init int system_trusted_keyring_init(void)
 	if (IS_ERR(system_trusted_keyring))
 		panic("Can't allocate system trusted keyring\n");
 
+	set_bit(KEY_FLAG_TRUSTED_ONLY, &system_trusted_keyring->flags);
 	return 0;
 }
 
@@ -82,7 +83,8 @@ static __init int load_system_certificate_list(void)
 					   plen,
 					   (KEY_POS_ALL & ~KEY_POS_SETATTR) |
 					   KEY_USR_VIEW,
-					   KEY_ALLOC_NOT_IN_QUOTA);
+					   KEY_ALLOC_NOT_IN_QUOTA |
+					   KEY_ALLOC_TRUSTED);
 		if (IS_ERR(key)) {
 			pr_err("Problem loading in-kernel X.509 certificate (%ld)\n",
 			       PTR_ERR(key));
diff --git a/security/keys/key.c b/security/keys/key.c
index a819b5c7d4ec..d331ea9ef380 100644
--- a/security/keys/key.c
+++ b/security/keys/key.c
@@ -300,6 +300,8 @@ struct key *key_alloc(struct key_type *type, const char *desc,
 
 	if (!(flags & KEY_ALLOC_NOT_IN_QUOTA))
 		key->flags |= 1 << KEY_FLAG_IN_QUOTA;
+	if (flags & KEY_ALLOC_TRUSTED)
+		key->flags |= 1 << KEY_FLAG_TRUSTED;
 
 	memset(&key->type_data, 0, sizeof(key->type_data));
 
@@ -813,6 +815,7 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref,
 	prep.data = payload;
 	prep.datalen = plen;
 	prep.quotalen = index_key.type->def_datalen;
+	prep.trusted = flags & KEY_ALLOC_TRUSTED;
 	if (index_key.type->preparse) {
 		ret = index_key.type->preparse(&prep);
 		if (ret < 0) {
@@ -827,6 +830,11 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref,
 	}
 	index_key.desc_len = strlen(index_key.description);
 
+	key_ref = ERR_PTR(-EPERM);
+	if (!prep.trusted && test_bit(KEY_FLAG_TRUSTED_ONLY, &keyring->flags))
+		goto error_free_prep;
+	flags |= prep.trusted ? KEY_ALLOC_TRUSTED : 0;
+
 	ret = __key_link_begin(keyring, &index_key, &edit);
 	if (ret < 0) {
 		key_ref = ERR_PTR(ret);
diff --git a/security/keys/keyring.c b/security/keys/keyring.c
index f7cdea22214f..9b6f6e09b50c 100644
--- a/security/keys/keyring.c
+++ b/security/keys/keyring.c
@@ -1183,6 +1183,10 @@ int key_link(struct key *keyring, struct key *key)
 	key_check(keyring);
 	key_check(key);
 
+	if (test_bit(KEY_FLAG_TRUSTED_ONLY, &keyring->flags) &&
+	    !test_bit(KEY_FLAG_TRUSTED, &key->flags))
+		return -EPERM;
+
 	ret = __key_link_begin(keyring, &key->index_key, &edit);
 	if (ret == 0) {
 		kdebug("begun {%d,%d}", keyring->serial, atomic_read(&keyring->usage));
-- 
cgit v1.2.3


From a895c28a1f5d761679cb229b19fb08ca2067211e Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Tue, 3 Sep 2013 13:31:05 -0700
Subject: PCI: Fix comment typo, remove unnecessary !! in pci_is_pcie()

Use normal kernel-doc "Returns:" instead of "retrun"
Assignment to bool is always 1 or 0 so the !! isn't necessary.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 include/linux/pci.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index da172f956ad6..ee0f3b60af22 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1749,11 +1749,11 @@ static inline int pci_pcie_cap(struct pci_dev *dev)
  * pci_is_pcie - check if the PCI device is PCI Express capable
  * @dev: PCI device
  *
- * Retrun true if the PCI device is PCI Express capable, false otherwise.
+ * Returns: true if the PCI device is PCI Express capable, false otherwise.
  */
 static inline bool pci_is_pcie(struct pci_dev *dev)
 {
-	return !!pci_pcie_cap(dev);
+	return pci_pcie_cap(dev);
 }
 
 /**
-- 
cgit v1.2.3


From 115e3bc5e23e7ec3c85a2014bfa96c0ddd036083 Mon Sep 17 00:00:00 2001
From: Yijing Wang <wangyijing@huawei.com>
Date: Tue, 3 Sep 2013 10:02:09 +0800
Subject: PCI: Remove unused "is_pcie" from pci_dev structure

No one uses "is_pcie" now; remove this obsolete member.

Signed-off-by: Yijing Wang <wangyijing@huawei.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/probe.c | 1 -
 include/linux/pci.h | 2 --
 2 files changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 7ef0f868b3e0..2372babe2fc5 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -984,7 +984,6 @@ void set_pcie_port_type(struct pci_dev *pdev)
 	pos = pci_find_capability(pdev, PCI_CAP_ID_EXP);
 	if (!pos)
 		return;
-	pdev->is_pcie = 1;
 	pdev->pcie_cap = pos;
 	pci_read_config_word(pdev, pos + PCI_EXP_FLAGS, &reg16);
 	pdev->pcie_flags_reg = reg16;
diff --git a/include/linux/pci.h b/include/linux/pci.h
index ee0f3b60af22..197e2d21f2cb 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -330,8 +330,6 @@ struct pci_dev {
 	unsigned int	msix_enabled:1;
 	unsigned int	ari_enabled:1;	/* ARI forwarding */
 	unsigned int	is_managed:1;
-	unsigned int	is_pcie:1;	/* Obsolete. Will be removed.
-					   Use pci_is_pcie() instead */
 	unsigned int    needs_freset:1; /* Dev requires fundamental reset */
 	unsigned int	state_saved:1;
 	unsigned int	is_physfn:1;
-- 
cgit v1.2.3


From 77a0dfcd8a4d9a93a21c8be56eaeb65cd69b351d Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Tue, 24 Sep 2013 15:14:57 -0600
Subject: PCI: Document reason for using pci_is_root_bus()

Some code assumes "bus->self == NULL" means the bus is a root bus.  This
adds a comment explaining why this is incorrect ("virtual" buses added for
SR-IOV have "bus->self == NULL" but are not root buses).

No functional change.

Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 include/linux/pci.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 197e2d21f2cb..d3a888ae4b2e 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -470,6 +470,10 @@ struct pci_bus {
 /*
  * Returns true if the pci bus is root (behind host-pci bridge),
  * false otherwise
+ *
+ * Some code assumes that "bus->self == NULL" means that bus is a root bus.
+ * This is incorrect because "virtual" buses added for SR-IOV (via
+ * virtfn_add_bus()) have "bus->self == NULL" but are not root buses.
  */
 static inline bool pci_is_root_bus(struct pci_bus *pbus)
 {
-- 
cgit v1.2.3


From 7868943db1668fba898cf71bed1506c19d6958aa Mon Sep 17 00:00:00 2001
From: Huang Rui <ray.huang@amd.com>
Date: Mon, 16 Sep 2013 23:47:28 +0800
Subject: usb: core: implement AMD remote wakeup quirk
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The following patch is required to resolve remote wake issues with
certain devices.

Issue description:
If the remote wake is issued from the device in a specific timing
condition while the system is entering sleep state then it may cause
system to auto wake on subsequent sleep cycle.

Root cause:
Host controller rebroadcasts the Resume signal > 100 µseconds after
receiving the original resume event from the device. For proper
function, some devices may require the rebroadcast of resume event
within the USB spec of 100µS.

Workaroud:
1. Filter the AMD platforms with Yangtze chipset, then judge of all the usb
devices are mouse or not. And get out the port id which attached a mouse
with Pixart controller.
2. Then reset the port which attached issue device during system resume
from S3.

[Q] Why the special devices are only mice? Would high speed devices
such as 3G modem or USB Bluetooth adapter trigger this issue?
- Current this sensitivity is only confined to devices that use Pixart
  controllers. This controller is designed for use with LS mouse
devices only. We have not observed any other devices failing. There
may be a small risk for other devices also but this patch (reset
device in resume phase) will cover the cases if required.

[Q] Shouldn’t the resume signal be sent within 100 us for every
device?
- The Host controller may not send the resume signal within 100us,
  this our host controller specification change. This is why we
require the patch to prevent side effects on certain known devices.

[Q] Why would clicking mouse INTENSELY to wake the system up trigger
this issue?
- This behavior is specific to the devices that use Pixart controller.
  It is timing dependent on when the resume event is triggered during
the sleep state.

[Q] Is it a host controller issue or mouse?
- It is the host controller behavior during resume that triggers the
  device incorrect behavior on the next resume.

This patch sets USB_QUIRK_RESET_RESUME flag for these Pixart-based mice
when they attached to platforms with AMD Yangtze chipset.

Signed-off-by: Huang Rui <ray.huang@amd.com>
Suggested-by: Alan Stern <stern@rowland.harvard.edu>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Acked-by: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/hcd-pci.c    |  3 +++
 drivers/usb/core/quirks.c     | 37 +++++++++++++++++++++++++++++++++++++
 drivers/usb/host/pci-quirks.c | 12 ++++++++++++
 include/linux/usb/hcd.h       |  3 +++
 4 files changed, 55 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/usb/core/hcd-pci.c b/drivers/usb/core/hcd-pci.c
index b9d3c43e3859..dfe9d0f22978 100644
--- a/drivers/usb/core/hcd-pci.c
+++ b/drivers/usb/core/hcd-pci.c
@@ -215,6 +215,9 @@ int usb_hcd_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
 		goto disable_pci;
 	}
 
+	hcd->amd_resume_bug = (usb_hcd_amd_remote_wakeup_quirk(dev) &&
+			driver->flags & (HCD_USB11 | HCD_USB3)) ? 1 : 0;
+
 	if (driver->flags & HCD_MEMORY) {
 		/* EHCI, OHCI */
 		hcd->rsrc_start = pci_resource_start(dev, 0);
diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c
index 5b44cd47da5b..74d18c8b7a88 100644
--- a/drivers/usb/core/quirks.c
+++ b/drivers/usb/core/quirks.c
@@ -13,6 +13,7 @@
 
 #include <linux/usb.h>
 #include <linux/usb/quirks.h>
+#include <linux/usb/hcd.h>
 #include "usb.h"
 
 /* Lists of quirky USB devices, split in device quirks and interface quirks.
@@ -155,6 +156,21 @@ static const struct usb_device_id usb_interface_quirk_list[] = {
 	{ }  /* terminating entry must be last */
 };
 
+static const struct usb_device_id usb_amd_resume_quirk_list[] = {
+	/* Lenovo Mouse with Pixart controller */
+	{ USB_DEVICE(0x17ef, 0x602e), .driver_info = USB_QUIRK_RESET_RESUME },
+
+	/* Pixart Mouse */
+	{ USB_DEVICE(0x093a, 0x2500), .driver_info = USB_QUIRK_RESET_RESUME },
+	{ USB_DEVICE(0x093a, 0x2510), .driver_info = USB_QUIRK_RESET_RESUME },
+	{ USB_DEVICE(0x093a, 0x2521), .driver_info = USB_QUIRK_RESET_RESUME },
+
+	/* Logitech Optical Mouse M90/M100 */
+	{ USB_DEVICE(0x046d, 0xc05a), .driver_info = USB_QUIRK_RESET_RESUME },
+
+	{ }  /* terminating entry must be last */
+};
+
 static bool usb_match_any_interface(struct usb_device *udev,
 				    const struct usb_device_id *id)
 {
@@ -181,6 +197,18 @@ static bool usb_match_any_interface(struct usb_device *udev,
 	return false;
 }
 
+int usb_amd_resume_quirk(struct usb_device *udev)
+{
+	struct usb_hcd *hcd;
+
+	hcd = bus_to_hcd(udev->bus);
+	/* The device should be attached directly to root hub */
+	if (udev->level == 1 && hcd->amd_resume_bug == 1)
+		return 1;
+
+	return 0;
+}
+
 static u32 __usb_detect_quirks(struct usb_device *udev,
 			       const struct usb_device_id *id)
 {
@@ -206,6 +234,15 @@ static u32 __usb_detect_quirks(struct usb_device *udev,
 void usb_detect_quirks(struct usb_device *udev)
 {
 	udev->quirks = __usb_detect_quirks(udev, usb_quirk_list);
+
+	/*
+	 * Pixart-based mice would trigger remote wakeup issue on AMD
+	 * Yangtze chipset, so set them as RESET_RESUME flag.
+	 */
+	if (usb_amd_resume_quirk(udev))
+		udev->quirks |= __usb_detect_quirks(udev,
+				usb_amd_resume_quirk_list);
+
 	if (udev->quirks)
 		dev_dbg(&udev->dev, "USB quirks for this device: %x\n",
 			udev->quirks);
diff --git a/drivers/usb/host/pci-quirks.c b/drivers/usb/host/pci-quirks.c
index d1e68d887f6e..17eb1916e65e 100644
--- a/drivers/usb/host/pci-quirks.c
+++ b/drivers/usb/host/pci-quirks.c
@@ -250,6 +250,18 @@ commit:
 }
 EXPORT_SYMBOL_GPL(usb_amd_find_chipset_info);
 
+int usb_hcd_amd_remote_wakeup_quirk(struct pci_dev *pdev)
+{
+	/* Make sure amd chipset type has already been initialized */
+	usb_amd_find_chipset_info();
+	if (amd_chipset.sb_type.gen != AMD_CHIPSET_YANGTZE)
+		return 0;
+
+	dev_dbg(&pdev->dev, "QUIRK: Enable AMD remote wakeup fix\n");
+	return 1;
+}
+EXPORT_SYMBOL_GPL(usb_hcd_amd_remote_wakeup_quirk);
+
 /*
  * The hardware normally enables the A-link power management feature, which
  * lets the system lower the power consumption in idle states.
diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h
index 8c865134c881..fc64b6825f5e 100644
--- a/include/linux/usb/hcd.h
+++ b/include/linux/usb/hcd.h
@@ -141,6 +141,7 @@ struct usb_hcd {
 	unsigned		wireless:1;	/* Wireless USB HCD */
 	unsigned		authorized_default:1;
 	unsigned		has_tt:1;	/* Integrated TT in root hub */
+	unsigned		amd_resume_bug:1; /* AMD remote wakeup quirk */
 
 	unsigned int		irq;		/* irq allocated */
 	void __iomem		*regs;		/* device memory/io */
@@ -435,6 +436,8 @@ extern int usb_hcd_pci_probe(struct pci_dev *dev,
 extern void usb_hcd_pci_remove(struct pci_dev *dev);
 extern void usb_hcd_pci_shutdown(struct pci_dev *dev);
 
+extern int usb_hcd_amd_remote_wakeup_quirk(struct pci_dev *dev);
+
 #ifdef CONFIG_PM
 extern const struct dev_pm_ops usb_hcd_pci_pm_ops;
 #endif
-- 
cgit v1.2.3


From 469d6d0631386e6865a30c9ded87a5cc0fdf8e2e Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Wed, 18 Sep 2013 20:47:06 -0400
Subject: tty: Remove unused drop() method from tty_port interface

Although originally conceived as a hook for port drivers to know
when a port reference is dropped, no driver uses this method.

Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/tty_port.c | 6 +-----
 include/linux/tty.h    | 1 -
 2 files changed, 1 insertion(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/tty_port.c b/drivers/tty/tty_port.c
index 7efbca474689..c94d2349dd06 100644
--- a/drivers/tty/tty_port.c
+++ b/drivers/tty/tty_port.c
@@ -484,8 +484,6 @@ int tty_port_close_start(struct tty_port *port,
 
 	if (port->count) {
 		spin_unlock_irqrestore(&port->lock, flags);
-		if (port->ops->drop)
-			port->ops->drop(port);
 		return 0;
 	}
 	set_bit(ASYNCB_CLOSING, &port->flags);
@@ -504,9 +502,7 @@ int tty_port_close_start(struct tty_port *port,
 	/* Flush the ldisc buffering */
 	tty_ldisc_flush(tty);
 
-	/* Don't call port->drop for the last reference. Callers will want
-	   to drop the last active reference in ->shutdown() or the tty
-	   shutdown path */
+	/* Report to caller this is the last port reference */
 	return 1;
 }
 EXPORT_SYMBOL(tty_port_close_start);
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 64f864651d86..2f47989d8288 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -180,7 +180,6 @@ struct tty_port_operations {
 	   IFF the port was initialized. Do not use to free resources. Called
 	   under the port mutex to serialize against activate/shutdowns */
 	void (*shutdown)(struct tty_port *port);
-	void (*drop)(struct tty_port *port);
 	/* Called under the port mutex from tty_port_open, serialized using
 	   the port mutex */
         /* FIXME: long term getting the tty argument *out* of this would be
-- 
cgit v1.2.3


From b83e867026caedd6d67fccfbe6e9a621edd79b21 Mon Sep 17 00:00:00 2001
From: Grant Grundler <grundler@chromium.org>
Date: Thu, 19 Sep 2013 18:21:36 -0700
Subject: mmc: core: remove dead function mmc_try_claim_host

cscope says there are no callers for mmc_try_claim_host in the kernel.
No reason to keep it.

Signed-off-by: Grant Grundler <grundler@chromium.org>
Acked-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/core.c  | 25 -------------------------
 include/linux/mmc/core.h |  1 -
 2 files changed, 26 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index bf18b6bfce48..006ead2fb701 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -917,31 +917,6 @@ int __mmc_claim_host(struct mmc_host *host, atomic_t *abort)
 
 EXPORT_SYMBOL(__mmc_claim_host);
 
-/**
- *	mmc_try_claim_host - try exclusively to claim a host
- *	@host: mmc host to claim
- *
- *	Returns %1 if the host is claimed, %0 otherwise.
- */
-int mmc_try_claim_host(struct mmc_host *host)
-{
-	int claimed_host = 0;
-	unsigned long flags;
-
-	spin_lock_irqsave(&host->lock, flags);
-	if (!host->claimed || host->claimer == current) {
-		host->claimed = 1;
-		host->claimer = current;
-		host->claim_cnt += 1;
-		claimed_host = 1;
-	}
-	spin_unlock_irqrestore(&host->lock, flags);
-	if (host->ops->enable && claimed_host && host->claim_cnt == 1)
-		host->ops->enable(host);
-	return claimed_host;
-}
-EXPORT_SYMBOL(mmc_try_claim_host);
-
 /**
  *	mmc_release_host - release a host
  *	@host: mmc host to release
diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h
index da51bec578c3..a00fc49c8434 100644
--- a/include/linux/mmc/core.h
+++ b/include/linux/mmc/core.h
@@ -188,7 +188,6 @@ extern unsigned int mmc_align_data_size(struct mmc_card *, unsigned int);
 
 extern int __mmc_claim_host(struct mmc_host *host, atomic_t *abort);
 extern void mmc_release_host(struct mmc_host *host);
-extern int mmc_try_claim_host(struct mmc_host *host);
 
 extern void mmc_get_card(struct mmc_card *card);
 extern void mmc_put_card(struct mmc_card *card);
-- 
cgit v1.2.3


From 524268990e54dab9bd71a984b70c13ce0eb1c525 Mon Sep 17 00:00:00 2001
From: Seungwon Jeon <tgih.jun@samsung.com>
Date: Sat, 31 Aug 2013 00:13:42 +0900
Subject: mmc: dw_mmc: adjust the fifoth with block size

This change helps to choose msize, rx_watermark and tx_watermark
depending on block size for IDMAC mode.  For SDIO block size can be
variable, so if these values are set incorrectly, card clock may stop.

Signed-off-by: Seungwon Jeon <tgih.jun@samsung.com>
Tested-by: Alim Akhtar <alim.akhtar@samsung.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/host/dw_mmc.c  | 74 ++++++++++++++++++++++++++++++++++++++++++++--
 drivers/mmc/host/dw_mmc.h  |  4 +++
 include/linux/mmc/dw_mmc.h |  1 +
 3 files changed, 76 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c
index 50da6e330a15..acefdeb2465b 100644
--- a/drivers/mmc/host/dw_mmc.c
+++ b/drivers/mmc/host/dw_mmc.c
@@ -529,6 +529,47 @@ static void dw_mci_post_req(struct mmc_host *mmc,
 	data->host_cookie = 0;
 }
 
+static void dw_mci_adjust_fifoth(struct dw_mci *host, struct mmc_data *data)
+{
+#ifdef CONFIG_MMC_DW_IDMAC
+	unsigned int blksz = data->blksz;
+	const u32 mszs[] = {1, 4, 8, 16, 32, 64, 128, 256};
+	u32 fifo_width = 1 << host->data_shift;
+	u32 blksz_depth = blksz / fifo_width, fifoth_val;
+	u32 msize = 0, rx_wmark = 1, tx_wmark, tx_wmark_invers;
+	int idx = (sizeof(mszs) / sizeof(mszs[0])) - 1;
+
+	tx_wmark = (host->fifo_depth) / 2;
+	tx_wmark_invers = host->fifo_depth - tx_wmark;
+
+	/*
+	 * MSIZE is '1',
+	 * if blksz is not a multiple of the FIFO width
+	 */
+	if (blksz % fifo_width) {
+		msize = 0;
+		rx_wmark = 1;
+		goto done;
+	}
+
+	do {
+		if (!((blksz_depth % mszs[idx]) ||
+		     (tx_wmark_invers % mszs[idx]))) {
+			msize = idx;
+			rx_wmark = mszs[idx] - 1;
+			break;
+		}
+	} while (--idx > 0);
+	/*
+	 * If idx is '0', it won't be tried
+	 * Thus, initial values are uesed
+	 */
+done:
+	fifoth_val = SDMMC_SET_FIFOTH(msize, rx_wmark, tx_wmark);
+	mci_writel(host, FIFOTH, fifoth_val);
+#endif
+}
+
 static int dw_mci_submit_data_dma(struct dw_mci *host, struct mmc_data *data)
 {
 	int sg_len;
@@ -553,6 +594,14 @@ static int dw_mci_submit_data_dma(struct dw_mci *host, struct mmc_data *data)
 		 (unsigned long)host->sg_cpu, (unsigned long)host->sg_dma,
 		 sg_len);
 
+	/*
+	 * Decide the MSIZE and RX/TX Watermark.
+	 * If current block size is same with previous size,
+	 * no need to update fifoth.
+	 */
+	if (host->prev_blksz != data->blksz)
+		dw_mci_adjust_fifoth(host, data);
+
 	/* Enable the DMA interface */
 	temp = mci_readl(host, CTRL);
 	temp |= SDMMC_CTRL_DMA_ENABLE;
@@ -603,6 +652,21 @@ static void dw_mci_submit_data(struct dw_mci *host, struct mmc_data *data)
 		temp = mci_readl(host, CTRL);
 		temp &= ~SDMMC_CTRL_DMA_ENABLE;
 		mci_writel(host, CTRL, temp);
+
+		/*
+		 * Use the initial fifoth_val for PIO mode.
+		 * If next issued data may be transfered by DMA mode,
+		 * prev_blksz should be invalidated.
+		 */
+		mci_writel(host, FIFOTH, host->fifoth_val);
+		host->prev_blksz = 0;
+	} else {
+		/*
+		 * Keep the current block size.
+		 * It will be used to decide whether to update
+		 * fifoth register next time.
+		 */
+		host->prev_blksz = data->blksz;
 	}
 }
 
@@ -2368,8 +2432,8 @@ int dw_mci_probe(struct dw_mci *host)
 		fifo_size = host->pdata->fifo_depth;
 	}
 	host->fifo_depth = fifo_size;
-	host->fifoth_val = ((0x2 << 28) | ((fifo_size/2 - 1) << 16) |
-			((fifo_size/2) << 0));
+	host->fifoth_val =
+		SDMMC_SET_FIFOTH(0x2, fifo_size / 2 - 1, fifo_size / 2);
 	mci_writel(host, FIFOTH, host->fifoth_val);
 
 	/* disable clock to CIU */
@@ -2552,8 +2616,12 @@ int dw_mci_resume(struct dw_mci *host)
 	if (host->use_dma && host->dma_ops->init)
 		host->dma_ops->init(host);
 
-	/* Restore the old value at FIFOTH register */
+	/*
+	 * Restore the initial value at FIFOTH register
+	 * And Invalidate the prev_blksz with zero
+	 */
 	mci_writel(host, FIFOTH, host->fifoth_val);
+	host->prev_blksz = 0;
 
 	/* Put in max timeout */
 	mci_writel(host, TMOUT, 0xFFFFFFFF);
diff --git a/drivers/mmc/host/dw_mmc.h b/drivers/mmc/host/dw_mmc.h
index b281fdc6835f..a0f2f8d35f5d 100644
--- a/drivers/mmc/host/dw_mmc.h
+++ b/drivers/mmc/host/dw_mmc.h
@@ -128,6 +128,10 @@
 #define SDMMC_CMD_INDX(n)		((n) & 0x1F)
 /* Status register defines */
 #define SDMMC_GET_FCNT(x)		(((x)>>17) & 0x1FFF)
+/* FIFOTH register defines */
+#define SDMMC_SET_FIFOTH(m, r, t)	(((m) & 0x7) << 28 | \
+					 ((r) & 0xFFF) << 16 | \
+					 ((t) & 0xFFF))
 /* Internal DMAC interrupt defines */
 #define SDMMC_IDMAC_INT_AI		BIT(9)
 #define SDMMC_IDMAC_INT_NI		BIT(8)
diff --git a/include/linux/mmc/dw_mmc.h b/include/linux/mmc/dw_mmc.h
index 198f0fa44e9f..4ec9dcc9460e 100644
--- a/include/linux/mmc/dw_mmc.h
+++ b/include/linux/mmc/dw_mmc.h
@@ -129,6 +129,7 @@ struct dw_mci {
 	struct mmc_request	*mrq;
 	struct mmc_command	*cmd;
 	struct mmc_data		*data;
+	unsigned int		prev_blksz;
 	struct workqueue_struct	*card_workqueue;
 
 	/* DMA interface members*/
-- 
cgit v1.2.3


From f1d2736c815629f3b43d8eebf73a81b581438e65 Mon Sep 17 00:00:00 2001
From: Seungwon Jeon <tgih.jun@samsung.com>
Date: Sat, 31 Aug 2013 00:13:55 +0900
Subject: mmc: dw_mmc: control card read threshold

Card Read Threshold should be ensured that the card clock does not stop
in the middle of a block of data being transferred from the card to the
Host. Specially, clock stop is allowed in fast transfer such as HS200
or SDR104 mode. And so, it should be enabled.

Signed-off-by: Seungwon Jeon <tgih.jun@samsung.com>
Tested-by: Alim Akhtar <alim.akhtar@samsung.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/host/dw_mmc.c  | 38 ++++++++++++++++++++++++++++++++++++--
 drivers/mmc/host/dw_mmc.h  |  3 +++
 include/linux/mmc/dw_mmc.h |  1 +
 3 files changed, 40 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c
index acefdeb2465b..8eb37983a72a 100644
--- a/drivers/mmc/host/dw_mmc.c
+++ b/drivers/mmc/host/dw_mmc.c
@@ -570,6 +570,37 @@ done:
 #endif
 }
 
+static void dw_mci_ctrl_rd_thld(struct dw_mci *host, struct mmc_data *data)
+{
+	unsigned int blksz = data->blksz;
+	u32 blksz_depth, fifo_depth;
+	u16 thld_size;
+
+	WARN_ON(!(data->flags & MMC_DATA_READ));
+
+	if (host->timing != MMC_TIMING_MMC_HS200 &&
+	    host->timing != MMC_TIMING_UHS_SDR104)
+		goto disable;
+
+	blksz_depth = blksz / (1 << host->data_shift);
+	fifo_depth = host->fifo_depth;
+
+	if (blksz_depth > fifo_depth)
+		goto disable;
+
+	/*
+	 * If (blksz_depth) >= (fifo_depth >> 1), should be 'thld_size <= blksz'
+	 * If (blksz_depth) <  (fifo_depth >> 1), should be thld_size = blksz
+	 * Currently just choose blksz.
+	 */
+	thld_size = blksz;
+	mci_writel(host, CDTHRCTL, SDMMC_SET_RD_THLD(thld_size, 1));
+	return;
+
+disable:
+	mci_writel(host, CDTHRCTL, SDMMC_SET_RD_THLD(0, 0));
+}
+
 static int dw_mci_submit_data_dma(struct dw_mci *host, struct mmc_data *data)
 {
 	int sg_len;
@@ -627,10 +658,12 @@ static void dw_mci_submit_data(struct dw_mci *host, struct mmc_data *data)
 	host->sg = NULL;
 	host->data = data;
 
-	if (data->flags & MMC_DATA_READ)
+	if (data->flags & MMC_DATA_READ) {
 		host->dir_status = DW_MCI_RECV_STATUS;
-	else
+		dw_mci_ctrl_rd_thld(host, data);
+	} else {
 		host->dir_status = DW_MCI_SEND_STATUS;
+	}
 
 	if (dw_mci_submit_data_dma(host, data)) {
 		int flags = SG_MITER_ATOMIC;
@@ -877,6 +910,7 @@ static void dw_mci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 		regs &= ~((0x1 << slot->id) << 16);
 
 	mci_writel(slot->host, UHS_REG, regs);
+	slot->host->timing = ios->timing;
 
 	/*
 	 * Use mirror of ios->clock to prevent race with mmc
diff --git a/drivers/mmc/host/dw_mmc.h b/drivers/mmc/host/dw_mmc.h
index a0f2f8d35f5d..6bf24ab917e6 100644
--- a/drivers/mmc/host/dw_mmc.h
+++ b/drivers/mmc/host/dw_mmc.h
@@ -53,6 +53,7 @@
 #define SDMMC_IDINTEN		0x090
 #define SDMMC_DSCADDR		0x094
 #define SDMMC_BUFADDR		0x098
+#define SDMMC_CDTHRCTL		0x100
 #define SDMMC_DATA(x)		(x)
 
 /*
@@ -146,6 +147,8 @@
 #define SDMMC_IDMAC_SWRESET		BIT(0)
 /* Version ID register define */
 #define SDMMC_GET_VERID(x)		((x) & 0xFFFF)
+/* Card read threshold */
+#define SDMMC_SET_RD_THLD(v, x)		(((v) & 0x1FFF) << 16 | (x))
 
 /* Register access macros */
 #define mci_readl(dev, reg)			\
diff --git a/include/linux/mmc/dw_mmc.h b/include/linux/mmc/dw_mmc.h
index 4ec9dcc9460e..a829f7ee28c8 100644
--- a/include/linux/mmc/dw_mmc.h
+++ b/include/linux/mmc/dw_mmc.h
@@ -130,6 +130,7 @@ struct dw_mci {
 	struct mmc_command	*cmd;
 	struct mmc_data		*data;
 	unsigned int		prev_blksz;
+	unsigned char		timing;
 	struct workqueue_struct	*card_workqueue;
 
 	/* DMA interface members*/
-- 
cgit v1.2.3


From 90c2143a8f6d0cd1dbae1ea32fcd1befb81e4b0d Mon Sep 17 00:00:00 2001
From: Seungwon Jeon <tgih.jun@samsung.com>
Date: Sat, 31 Aug 2013 00:14:05 +0900
Subject: mmc: dw_mmc: guarantee stop-abort cmd in data errors

In error cases, DTO interrupt may or may not be generated depending
on remained data. Stop/Abort command ensures DTO generation for that
situation. Currently if 'stop' field of data is empty, there is no
stop/abort command. So, it could hang waiting DTO. This change
reinforces these cases.

Signed-off-by: Seungwon Jeon <tgih.jun@samsung.com>
Tested-by: Alim Akhtar <alim.akhtar@samsung.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/host/dw_mmc.c  | 84 +++++++++++++++++++++++++++++++++++-----------
 include/linux/mmc/dw_mmc.h |  2 ++
 2 files changed, 66 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c
index 649127e71894..b4328ad59cf0 100644
--- a/drivers/mmc/host/dw_mmc.c
+++ b/drivers/mmc/host/dw_mmc.c
@@ -29,6 +29,7 @@
 #include <linux/irq.h>
 #include <linux/mmc/host.h>
 #include <linux/mmc/mmc.h>
+#include <linux/mmc/sdio.h>
 #include <linux/mmc/dw_mmc.h>
 #include <linux/bitops.h>
 #include <linux/regulator/consumer.h>
@@ -246,10 +247,15 @@ static u32 dw_mci_prepare_command(struct mmc_host *mmc, struct mmc_command *cmd)
 
 	cmdr = cmd->opcode;
 
-	if (cmdr == MMC_STOP_TRANSMISSION)
+	if (cmd->opcode == MMC_STOP_TRANSMISSION ||
+	    cmd->opcode == MMC_GO_IDLE_STATE ||
+	    cmd->opcode == MMC_GO_INACTIVE_STATE ||
+	    (cmd->opcode == SD_IO_RW_DIRECT &&
+	     ((cmd->arg >> 9) & 0x1FFFF) == SDIO_CCCR_ABORT))
 		cmdr |= SDMMC_CMD_STOP;
 	else
-		cmdr |= SDMMC_CMD_PRV_DAT_WAIT;
+		if (cmd->opcode != MMC_SEND_STATUS && cmd->data)
+			cmdr |= SDMMC_CMD_PRV_DAT_WAIT;
 
 	if (cmd->flags & MMC_RSP_PRESENT) {
 		/* We expect a response, so set this bit */
@@ -276,6 +282,40 @@ static u32 dw_mci_prepare_command(struct mmc_host *mmc, struct mmc_command *cmd)
 	return cmdr;
 }
 
+static u32 dw_mci_prep_stop_abort(struct dw_mci *host, struct mmc_command *cmd)
+{
+	struct mmc_command *stop;
+	u32 cmdr;
+
+	if (!cmd->data)
+		return 0;
+
+	stop = &host->stop_abort;
+	cmdr = cmd->opcode;
+	memset(stop, 0, sizeof(struct mmc_command));
+
+	if (cmdr == MMC_READ_SINGLE_BLOCK ||
+	    cmdr == MMC_READ_MULTIPLE_BLOCK ||
+	    cmdr == MMC_WRITE_BLOCK ||
+	    cmdr == MMC_WRITE_MULTIPLE_BLOCK) {
+		stop->opcode = MMC_STOP_TRANSMISSION;
+		stop->arg = 0;
+		stop->flags = MMC_RSP_R1B | MMC_CMD_AC;
+	} else if (cmdr == SD_IO_RW_EXTENDED) {
+		stop->opcode = SD_IO_RW_DIRECT;
+		stop->arg |= (1 << 31) | (0 << 28) | (SDIO_CCCR_ABORT << 9) |
+			     ((cmd->arg >> 28) & 0x7);
+		stop->flags = MMC_RSP_SPI_R5 | MMC_RSP_R5 | MMC_CMD_AC;
+	} else {
+		return 0;
+	}
+
+	cmdr = stop->opcode | SDMMC_CMD_STOP |
+		SDMMC_CMD_RESP_CRC | SDMMC_CMD_RESP_EXP;
+
+	return cmdr;
+}
+
 static void dw_mci_start_command(struct dw_mci *host,
 				 struct mmc_command *cmd, u32 cmd_flags)
 {
@@ -290,9 +330,10 @@ static void dw_mci_start_command(struct dw_mci *host,
 	mci_writel(host, CMD, cmd_flags | SDMMC_CMD_START);
 }
 
-static void send_stop_cmd(struct dw_mci *host, struct mmc_data *data)
+static inline void send_stop_abort(struct dw_mci *host, struct mmc_data *data)
 {
-	dw_mci_start_command(host, data->stop, host->stop_cmdr);
+	struct mmc_command *stop = data->stop ? data->stop : &host->stop_abort;
+	dw_mci_start_command(host, stop, host->stop_cmdr);
 }
 
 /* DMA interface functions */
@@ -828,6 +869,8 @@ static void __dw_mci_start_request(struct dw_mci *host,
 
 	if (mrq->stop)
 		host->stop_cmdr = dw_mci_prepare_command(slot->mmc, mrq->stop);
+	else
+		host->stop_cmdr = dw_mci_prep_stop_abort(host, cmd);
 }
 
 static void dw_mci_start_request(struct dw_mci *host,
@@ -1190,13 +1233,9 @@ static void dw_mci_tasklet_func(unsigned long priv)
 
 			if (cmd->data && cmd->error) {
 				dw_mci_stop_dma(host);
-				if (data->stop) {
-					send_stop_cmd(host, data);
-					state = STATE_SENDING_STOP;
-					break;
-				} else {
-					host->data = NULL;
-				}
+				send_stop_abort(host, data);
+				state = STATE_SENDING_STOP;
+				break;
 			}
 
 			if (!host->mrq->data || cmd->error) {
@@ -1211,8 +1250,7 @@ static void dw_mci_tasklet_func(unsigned long priv)
 			if (test_and_clear_bit(EVENT_DATA_ERROR,
 					       &host->pending_events)) {
 				dw_mci_stop_dma(host);
-				if (data->stop)
-					send_stop_cmd(host, data);
+				send_stop_abort(host, data);
 				state = STATE_DATA_ERROR;
 				break;
 			}
@@ -1272,7 +1310,7 @@ static void dw_mci_tasklet_func(unsigned long priv)
 				data->error = 0;
 			}
 
-			if (!data->stop) {
+			if (!data->stop && !data->error) {
 				dw_mci_request_end(host, host->mrq);
 				goto unlock;
 			}
@@ -1284,8 +1322,10 @@ static void dw_mci_tasklet_func(unsigned long priv)
 			}
 
 			prev_state = state = STATE_SENDING_STOP;
-			if (!data->error)
-				send_stop_cmd(host, data);
+			if (data->stop && !data->error) {
+				/* stop command for open-ended transfer*/
+				send_stop_abort(host, data);
+			}
 			/* fall through */
 
 		case STATE_SENDING_STOP:
@@ -1304,7 +1344,12 @@ static void dw_mci_tasklet_func(unsigned long priv)
 
 			host->cmd = NULL;
 			host->data = NULL;
-			dw_mci_command_complete(host, host->mrq->stop);
+
+			if (host->mrq->stop)
+				dw_mci_command_complete(host, host->mrq->stop);
+			else
+				host->cmd_status = 0;
+
 			dw_mci_request_end(host, host->mrq);
 			goto unlock;
 
@@ -1888,11 +1933,10 @@ static void dw_mci_work_routine_card(struct work_struct *work)
 					case STATE_DATA_ERROR:
 						if (mrq->data->error == -EINPROGRESS)
 							mrq->data->error = -ENOMEDIUM;
-						if (!mrq->stop)
-							break;
 						/* fall through */
 					case STATE_SENDING_STOP:
-						mrq->stop->error = -ENOMEDIUM;
+						if (mrq->stop)
+							mrq->stop->error = -ENOMEDIUM;
 						break;
 					}
 
diff --git a/include/linux/mmc/dw_mmc.h b/include/linux/mmc/dw_mmc.h
index a829f7ee28c8..6ce7d2cd3c7a 100644
--- a/include/linux/mmc/dw_mmc.h
+++ b/include/linux/mmc/dw_mmc.h
@@ -15,6 +15,7 @@
 #define LINUX_MMC_DW_MMC_H
 
 #include <linux/scatterlist.h>
+#include <linux/mmc/core.h>
 
 #define MAX_MCI_SLOTS	2
 
@@ -129,6 +130,7 @@ struct dw_mci {
 	struct mmc_request	*mrq;
 	struct mmc_command	*cmd;
 	struct mmc_data		*data;
+	struct mmc_command	stop_abort;
 	unsigned int		prev_blksz;
 	unsigned char		timing;
 	struct workqueue_struct	*card_workqueue;
-- 
cgit v1.2.3


From 71ef1ea418ee45a4f939d53a02410627b394c336 Mon Sep 17 00:00:00 2001
From: Jackey Shen <jackey.shen@amd.com>
Date: Fri, 17 May 2013 17:17:43 +0800
Subject: mmc: core: clean up duplicate macros

Clean up the duplicate macros:
mmc_sd_card_uhs -> mmc_card_uhs
mmc_sd_card_set_uhs -> mmc_card_set_uhs

Signed-off-by: Jackey Shen <jackey.shen@amd.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/bus.c            | 2 +-
 drivers/mmc/host/rtsx_pci_sdmmc.c | 2 +-
 include/linux/mmc/card.h          | 2 --
 3 files changed, 2 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/bus.c b/drivers/mmc/core/bus.c
index 704bf66f5873..cdca8a70da38 100644
--- a/drivers/mmc/core/bus.c
+++ b/drivers/mmc/core/bus.c
@@ -340,7 +340,7 @@ int mmc_add_card(struct mmc_card *card)
 		break;
 	}
 
-	if (mmc_sd_card_uhs(card) &&
+	if (mmc_card_uhs(card) &&
 		(card->sd_bus_speed < ARRAY_SIZE(uhs_speeds)))
 		uhs_bus_speed_mode = uhs_speeds[card->sd_bus_speed];
 
diff --git a/drivers/mmc/host/rtsx_pci_sdmmc.c b/drivers/mmc/host/rtsx_pci_sdmmc.c
index 375a880e0c5f..e2ab12473f97 100644
--- a/drivers/mmc/host/rtsx_pci_sdmmc.c
+++ b/drivers/mmc/host/rtsx_pci_sdmmc.c
@@ -364,7 +364,7 @@ static int sd_rw_multi(struct realtek_pci_sdmmc *host, struct mmc_request *mrq)
 	struct mmc_host *mmc = host->mmc;
 	struct mmc_card *card = mmc->card;
 	struct mmc_data *data = mrq->data;
-	int uhs = mmc_sd_card_uhs(card);
+	int uhs = mmc_card_uhs(card);
 	int read = (data->flags & MMC_DATA_READ) ? 1 : 0;
 	u8 cfg2, trans_mode;
 	int err;
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 842de3e21e70..f42cdbd8ac21 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -420,7 +420,6 @@ static inline void __maybe_unused remove_quirk(struct mmc_card *card, int data)
 #define mmc_card_blockaddr(c)	((c)->state & MMC_STATE_BLOCKADDR)
 #define mmc_card_ddr_mode(c)	((c)->state & MMC_STATE_HIGHSPEED_DDR)
 #define mmc_card_uhs(c)		((c)->state & MMC_STATE_ULTRAHIGHSPEED)
-#define mmc_sd_card_uhs(c)	((c)->state & MMC_STATE_ULTRAHIGHSPEED)
 #define mmc_card_ext_capacity(c) ((c)->state & MMC_CARD_SDXC)
 #define mmc_card_removed(c)	((c) && ((c)->state & MMC_CARD_REMOVED))
 #define mmc_card_doing_bkops(c)	((c)->state & MMC_STATE_DOING_BKOPS)
@@ -432,7 +431,6 @@ static inline void __maybe_unused remove_quirk(struct mmc_card *card, int data)
 #define mmc_card_set_blockaddr(c) ((c)->state |= MMC_STATE_BLOCKADDR)
 #define mmc_card_set_ddr_mode(c) ((c)->state |= MMC_STATE_HIGHSPEED_DDR)
 #define mmc_card_set_uhs(c) ((c)->state |= MMC_STATE_ULTRAHIGHSPEED)
-#define mmc_sd_card_set_uhs(c) ((c)->state |= MMC_STATE_ULTRAHIGHSPEED)
 #define mmc_card_set_ext_capacity(c) ((c)->state |= MMC_CARD_SDXC)
 #define mmc_card_set_removed(c) ((c)->state |= MMC_CARD_REMOVED)
 #define mmc_card_set_doing_bkops(c)	((c)->state |= MMC_STATE_DOING_BKOPS)
-- 
cgit v1.2.3


From 19872d20c890073c5207d9e02bb8f14d451a11eb Mon Sep 17 00:00:00 2001
From: David Herrmann <dh.herrmann@gmail.com>
Date: Mon, 9 Sep 2013 18:33:54 +0200
Subject: HID: uhid: allocate static minor

udev has this nice feature of creating "dead" /dev/<node> device-nodes if
it finds a devnode:<node> modalias. Once the node is accessed, the kernel
automatically loads the module that provides the node. However, this
requires udev to know the major:minor code to use for the node. This
feature was introduced by:

  commit 578454ff7eab61d13a26b568f99a89a2c9edc881
  Author: Kay Sievers <kay.sievers@vrfy.org>
  Date:   Thu May 20 18:07:20 2010 +0200

      driver core: add devname module aliases to allow module on-demand auto-loading

However, uhid uses dynamic minor numbers so this doesn't actually work. We
need to load uhid to know which minor it's going to use.

Hence, allocate a static minor (just like uinput does) and we're good
to go.

Reported-by: Tom Gundersen <teg@jklm.no>
Signed-off-by: David Herrmann <dh.herrmann@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/uhid.c         | 3 ++-
 include/linux/miscdevice.h | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/hid/uhid.c b/drivers/hid/uhid.c
index 5bf2fb785844..93b00d76374c 100644
--- a/drivers/hid/uhid.c
+++ b/drivers/hid/uhid.c
@@ -615,7 +615,7 @@ static const struct file_operations uhid_fops = {
 
 static struct miscdevice uhid_misc = {
 	.fops		= &uhid_fops,
-	.minor		= MISC_DYNAMIC_MINOR,
+	.minor		= UHID_MINOR,
 	.name		= UHID_NAME,
 };
 
@@ -634,4 +634,5 @@ module_exit(uhid_exit);
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("David Herrmann <dh.herrmann@gmail.com>");
 MODULE_DESCRIPTION("User-space I/O driver support for HID subsystem");
+MODULE_ALIAS_MISCDEV(UHID_MINOR);
 MODULE_ALIAS("devname:" UHID_NAME);
diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h
index 09c2300ddb37..cb358355ef43 100644
--- a/include/linux/miscdevice.h
+++ b/include/linux/miscdevice.h
@@ -45,6 +45,7 @@
 #define MAPPER_CTRL_MINOR	236
 #define LOOP_CTRL_MINOR		237
 #define VHOST_NET_MINOR		238
+#define UHID_MINOR		239
 #define MISC_DYNAMIC_MINOR	255
 
 struct device;
-- 
cgit v1.2.3


From b0f4fe1edf6abbc81500d661f730cebd653a838c Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Thu, 6 Jun 2013 11:57:27 +0100
Subject: mfd: dbx500-prcmu: Correctly reorder PRCMU clock identifiers

... as stipulated by the Hardware Specification document.

Acked-by: Samuel Ortiz <sameo@linux.intel.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/mfd/dbx500-prcmu.h | 135 ++++++++++++++++++++-------------------
 1 file changed, 71 insertions(+), 64 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/dbx500-prcmu.h b/include/linux/mfd/dbx500-prcmu.h
index ca0790fba2f5..87667d48602b 100644
--- a/include/linux/mfd/dbx500-prcmu.h
+++ b/include/linux/mfd/dbx500-prcmu.h
@@ -97,70 +97,77 @@ enum prcmu_wakeup_index {
 /*
  * Clock identifiers.
  */
-enum prcmu_clock {
-	PRCMU_SGACLK,
-	PRCMU_UARTCLK,
-	PRCMU_MSP02CLK,
-	PRCMU_MSP1CLK,
-	PRCMU_I2CCLK,
-	PRCMU_SDMMCCLK,
-	PRCMU_SPARE1CLK,
-	PRCMU_SLIMCLK,
-	PRCMU_PER1CLK,
-	PRCMU_PER2CLK,
-	PRCMU_PER3CLK,
-	PRCMU_PER5CLK,
-	PRCMU_PER6CLK,
-	PRCMU_PER7CLK,
-	PRCMU_LCDCLK,
-	PRCMU_BMLCLK,
-	PRCMU_HSITXCLK,
-	PRCMU_HSIRXCLK,
-	PRCMU_HDMICLK,
-	PRCMU_APEATCLK,
-	PRCMU_APETRACECLK,
-	PRCMU_MCDECLK,
-	PRCMU_IPI2CCLK,
-	PRCMU_DSIALTCLK,
-	PRCMU_DMACLK,
-	PRCMU_B2R2CLK,
-	PRCMU_TVCLK,
-	PRCMU_SSPCLK,
-	PRCMU_RNGCLK,
-	PRCMU_UICCCLK,
-	PRCMU_PWMCLK,
-	PRCMU_IRDACLK,
-	PRCMU_IRRCCLK,
-	PRCMU_SIACLK,
-	PRCMU_SVACLK,
-	PRCMU_ACLK,
-	PRCMU_HVACLK, /* Ux540 only */
-	PRCMU_G1CLK, /* Ux540 only */
-	PRCMU_SDMMCHCLK,
-	PRCMU_CAMCLK,
-	PRCMU_BML8580CLK,
-	PRCMU_NUM_REG_CLOCKS,
-	PRCMU_SYSCLK = PRCMU_NUM_REG_CLOCKS,
-	PRCMU_CDCLK,
-	PRCMU_TIMCLK,
-	PRCMU_PLLSOC0,
-	PRCMU_PLLSOC1,
-	PRCMU_ARMSS,
-	PRCMU_PLLDDR,
-	PRCMU_PLLDSI,
-	PRCMU_DSI0CLK,
-	PRCMU_DSI1CLK,
-	PRCMU_DSI0ESCCLK,
-	PRCMU_DSI1ESCCLK,
-	PRCMU_DSI2ESCCLK,
-	/* LCD DSI PLL - Ux540 only */
-	PRCMU_PLLDSI_LCD,
-	PRCMU_DSI0CLK_LCD,
-	PRCMU_DSI1CLK_LCD,
-	PRCMU_DSI0ESCCLK_LCD,
-	PRCMU_DSI1ESCCLK_LCD,
-	PRCMU_DSI2ESCCLK_LCD,
-};
+#define ARMCLK			0
+#define PRCMU_ACLK		1
+#define PRCMU_SVAMMCSPCLK 	2
+#define PRCMU_SDMMCHCLK 	2  /* DBx540 only. */
+#define PRCMU_SIACLK 		3
+#define PRCMU_SIAMMDSPCLK 	3  /* DBx540 only. */
+#define PRCMU_SGACLK 		4
+#define PRCMU_UARTCLK 		5
+#define PRCMU_MSP02CLK 		6
+#define PRCMU_MSP1CLK 		7
+#define PRCMU_I2CCLK 		8
+#define PRCMU_SDMMCCLK 		9
+#define PRCMU_SLIMCLK 		10
+#define PRCMU_CAMCLK 		10 /* DBx540 only. */
+#define PRCMU_PER1CLK 		11
+#define PRCMU_PER2CLK 		12
+#define PRCMU_PER3CLK 		13
+#define PRCMU_PER5CLK 		14
+#define PRCMU_PER6CLK 		15
+#define PRCMU_PER7CLK 		16
+#define PRCMU_LCDCLK 		17
+#define PRCMU_BMLCLK 		18
+#define PRCMU_HSITXCLK 		19
+#define PRCMU_HSIRXCLK 		20
+#define PRCMU_HDMICLK		21
+#define PRCMU_APEATCLK 		22
+#define PRCMU_APETRACECLK 	23
+#define PRCMU_MCDECLK  	 	24
+#define PRCMU_IPI2CCLK  	25
+#define PRCMU_DSIALTCLK  	26
+#define PRCMU_DMACLK  	 	27
+#define PRCMU_B2R2CLK  	 	28
+#define PRCMU_TVCLK  	 	29
+#define SPARE_UNIPROCLK  	30
+#define PRCMU_SSPCLK  	 	31
+#define PRCMU_RNGCLK  	 	32
+#define PRCMU_UICCCLK  	 	33
+#define PRCMU_G1CLK             34 /* DBx540 only. */
+#define PRCMU_HVACLK            35 /* DBx540 only. */
+#define PRCMU_SPARE1CLK	 	36
+#define PRCMU_SPARE2CLK	 	37
+
+#define PRCMU_NUM_REG_CLOCKS  	38
+
+#define PRCMU_RTCCLK  	 	PRCMU_NUM_REG_CLOCKS
+#define PRCMU_SYSCLK  	 	39
+#define PRCMU_CDCLK  	 	40
+#define PRCMU_TIMCLK  	 	41
+#define PRCMU_PLLSOC0  	 	42
+#define PRCMU_PLLSOC1  	 	43
+#define PRCMU_ARMSS  	 	44
+#define PRCMU_PLLDDR  	 	45
+#define PRCMU_BML8580CLK        46
+
+/* DSI Clocks */
+#define PRCMU_PLLDSI  	 	47
+#define PRCMU_DSI0CLK 	  	48
+#define PRCMU_DSI1CLK  	 	49
+#define PRCMU_DSI0ESCCLK  	50
+#define PRCMU_DSI1ESCCLK  	51
+#define PRCMU_DSI2ESCCLK  	52
+
+/* LCD DSI PLL - Ux540 only */
+#define PRCMU_PLLDSI_LCD        53
+#define PRCMU_DSI0CLK_LCD       54
+#define PRCMU_DSI1CLK_LCD       55
+#define PRCMU_DSI0ESCCLK_LCD    56
+#define PRCMU_DSI1ESCCLK_LCD    57
+#define PRCMU_DSI2ESCCLK_LCD    58
+
+#define PRCMU_NUM_CLKS  	59
 
 /**
  * enum prcmu_wdog_id - PRCMU watchdog IDs
-- 
cgit v1.2.3


From 67f13daadccebf95c04f73db7b78cead844540bd Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Thu, 6 Jun 2013 11:50:47 +0100
Subject: mfd: dbx500-prcmu: Move PRCMU numerical clock identifiers into DT
 include file

These are required to request DBx500 PRCMU clocks from Device Tree. The
numbers used are taken directly from the Hardware Specification document.
We're moving them from the DBx500 PRCMU include file into the DT include
directory and referencing them from the former via a #include.

Acked-by: Samuel Ortiz <sameo@linux.intel.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/dt-bindings/mfd/dbx500-prcmu.h | 84 ++++++++++++++++++++++++++++++++++
 include/linux/mfd/dbx500-prcmu.h       | 77 +------------------------------
 2 files changed, 86 insertions(+), 75 deletions(-)
 create mode 100644 include/dt-bindings/mfd/dbx500-prcmu.h

(limited to 'include/linux')

diff --git a/include/dt-bindings/mfd/dbx500-prcmu.h b/include/dt-bindings/mfd/dbx500-prcmu.h
new file mode 100644
index 000000000000..b7ee8c909908
--- /dev/null
+++ b/include/dt-bindings/mfd/dbx500-prcmu.h
@@ -0,0 +1,84 @@
+/*
+ * This header provides constants for the PRCMU bindings.
+ *
+ */
+
+#ifndef _DT_BINDINGS_MFD_PRCMU_H
+#define _DT_BINDINGS_MFD_PRCMU_H
+
+/*
+ * Clock identifiers.
+ */
+#define ARMCLK			0
+#define PRCMU_ACLK		1
+#define PRCMU_SVAMMCSPCLK 	2
+#define PRCMU_SDMMCHCLK 	2  /* DBx540 only. */
+#define PRCMU_SIACLK 		3
+#define PRCMU_SIAMMDSPCLK 	3  /* DBx540 only. */
+#define PRCMU_SGACLK 		4
+#define PRCMU_UARTCLK 		5
+#define PRCMU_MSP02CLK 		6
+#define PRCMU_MSP1CLK 		7
+#define PRCMU_I2CCLK 		8
+#define PRCMU_SDMMCCLK 		9
+#define PRCMU_SLIMCLK 		10
+#define PRCMU_CAMCLK 		10 /* DBx540 only. */
+#define PRCMU_PER1CLK 		11
+#define PRCMU_PER2CLK 		12
+#define PRCMU_PER3CLK 		13
+#define PRCMU_PER5CLK 		14
+#define PRCMU_PER6CLK 		15
+#define PRCMU_PER7CLK 		16
+#define PRCMU_LCDCLK 		17
+#define PRCMU_BMLCLK 		18
+#define PRCMU_HSITXCLK 		19
+#define PRCMU_HSIRXCLK 		20
+#define PRCMU_HDMICLK		21
+#define PRCMU_APEATCLK 		22
+#define PRCMU_APETRACECLK 	23
+#define PRCMU_MCDECLK  	 	24
+#define PRCMU_IPI2CCLK  	25
+#define PRCMU_DSIALTCLK  	26
+#define PRCMU_DMACLK  	 	27
+#define PRCMU_B2R2CLK  	 	28
+#define PRCMU_TVCLK  	 	29
+#define SPARE_UNIPROCLK  	30
+#define PRCMU_SSPCLK  	 	31
+#define PRCMU_RNGCLK  	 	32
+#define PRCMU_UICCCLK  	 	33
+#define PRCMU_G1CLK             34 /* DBx540 only. */
+#define PRCMU_HVACLK            35 /* DBx540 only. */
+#define PRCMU_SPARE1CLK	 	36
+#define PRCMU_SPARE2CLK	 	37
+
+#define PRCMU_NUM_REG_CLOCKS  	38
+
+#define PRCMU_RTCCLK  	 	PRCMU_NUM_REG_CLOCKS
+#define PRCMU_SYSCLK  	 	39
+#define PRCMU_CDCLK  	 	40
+#define PRCMU_TIMCLK  	 	41
+#define PRCMU_PLLSOC0  	 	42
+#define PRCMU_PLLSOC1  	 	43
+#define PRCMU_ARMSS  	 	44
+#define PRCMU_PLLDDR  	 	45
+#define PRCMU_BML8580CLK        46
+
+/* DSI Clocks */
+#define PRCMU_PLLDSI  	 	47
+#define PRCMU_DSI0CLK 	  	48
+#define PRCMU_DSI1CLK  	 	49
+#define PRCMU_DSI0ESCCLK  	50
+#define PRCMU_DSI1ESCCLK  	51
+#define PRCMU_DSI2ESCCLK  	52
+
+/* LCD DSI PLL - Ux540 only */
+#define PRCMU_PLLDSI_LCD        53
+#define PRCMU_DSI0CLK_LCD       54
+#define PRCMU_DSI1CLK_LCD       55
+#define PRCMU_DSI0ESCCLK_LCD    56
+#define PRCMU_DSI1ESCCLK_LCD    57
+#define PRCMU_DSI2ESCCLK_LCD    58
+
+#define PRCMU_NUM_CLKS  	59
+
+#endif
diff --git a/include/linux/mfd/dbx500-prcmu.h b/include/linux/mfd/dbx500-prcmu.h
index 87667d48602b..060e11256fbc 100644
--- a/include/linux/mfd/dbx500-prcmu.h
+++ b/include/linux/mfd/dbx500-prcmu.h
@@ -12,6 +12,8 @@
 #include <linux/notifier.h>
 #include <linux/err.h>
 
+#include <dt-bindings/mfd/dbx500-prcmu.h> /* For clock identifiers */
+
 /* Offset for the firmware version within the TCPM */
 #define DB8500_PRCMU_FW_VERSION_OFFSET 0xA4
 #define DBX540_PRCMU_FW_VERSION_OFFSET 0xA8
@@ -94,81 +96,6 @@ enum prcmu_wakeup_index {
 #define PRCMU_CLKSRC_ARMCLKFIX		0x46
 #define PRCMU_CLKSRC_HDMICLK		0x47
 
-/*
- * Clock identifiers.
- */
-#define ARMCLK			0
-#define PRCMU_ACLK		1
-#define PRCMU_SVAMMCSPCLK 	2
-#define PRCMU_SDMMCHCLK 	2  /* DBx540 only. */
-#define PRCMU_SIACLK 		3
-#define PRCMU_SIAMMDSPCLK 	3  /* DBx540 only. */
-#define PRCMU_SGACLK 		4
-#define PRCMU_UARTCLK 		5
-#define PRCMU_MSP02CLK 		6
-#define PRCMU_MSP1CLK 		7
-#define PRCMU_I2CCLK 		8
-#define PRCMU_SDMMCCLK 		9
-#define PRCMU_SLIMCLK 		10
-#define PRCMU_CAMCLK 		10 /* DBx540 only. */
-#define PRCMU_PER1CLK 		11
-#define PRCMU_PER2CLK 		12
-#define PRCMU_PER3CLK 		13
-#define PRCMU_PER5CLK 		14
-#define PRCMU_PER6CLK 		15
-#define PRCMU_PER7CLK 		16
-#define PRCMU_LCDCLK 		17
-#define PRCMU_BMLCLK 		18
-#define PRCMU_HSITXCLK 		19
-#define PRCMU_HSIRXCLK 		20
-#define PRCMU_HDMICLK		21
-#define PRCMU_APEATCLK 		22
-#define PRCMU_APETRACECLK 	23
-#define PRCMU_MCDECLK  	 	24
-#define PRCMU_IPI2CCLK  	25
-#define PRCMU_DSIALTCLK  	26
-#define PRCMU_DMACLK  	 	27
-#define PRCMU_B2R2CLK  	 	28
-#define PRCMU_TVCLK  	 	29
-#define SPARE_UNIPROCLK  	30
-#define PRCMU_SSPCLK  	 	31
-#define PRCMU_RNGCLK  	 	32
-#define PRCMU_UICCCLK  	 	33
-#define PRCMU_G1CLK             34 /* DBx540 only. */
-#define PRCMU_HVACLK            35 /* DBx540 only. */
-#define PRCMU_SPARE1CLK	 	36
-#define PRCMU_SPARE2CLK	 	37
-
-#define PRCMU_NUM_REG_CLOCKS  	38
-
-#define PRCMU_RTCCLK  	 	PRCMU_NUM_REG_CLOCKS
-#define PRCMU_SYSCLK  	 	39
-#define PRCMU_CDCLK  	 	40
-#define PRCMU_TIMCLK  	 	41
-#define PRCMU_PLLSOC0  	 	42
-#define PRCMU_PLLSOC1  	 	43
-#define PRCMU_ARMSS  	 	44
-#define PRCMU_PLLDDR  	 	45
-#define PRCMU_BML8580CLK        46
-
-/* DSI Clocks */
-#define PRCMU_PLLDSI  	 	47
-#define PRCMU_DSI0CLK 	  	48
-#define PRCMU_DSI1CLK  	 	49
-#define PRCMU_DSI0ESCCLK  	50
-#define PRCMU_DSI1ESCCLK  	51
-#define PRCMU_DSI2ESCCLK  	52
-
-/* LCD DSI PLL - Ux540 only */
-#define PRCMU_PLLDSI_LCD        53
-#define PRCMU_DSI0CLK_LCD       54
-#define PRCMU_DSI1CLK_LCD       55
-#define PRCMU_DSI0ESCCLK_LCD    56
-#define PRCMU_DSI1ESCCLK_LCD    57
-#define PRCMU_DSI2ESCCLK_LCD    58
-
-#define PRCMU_NUM_CLKS  	59
-
 /**
  * enum prcmu_wdog_id - PRCMU watchdog IDs
  * @PRCMU_WDOG_ALL: use all timers
-- 
cgit v1.2.3


From 82b0f4b7c576d22c764239662cedc63c21f02d8d Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Tue, 17 Sep 2013 10:11:53 +0100
Subject: clk: ux500: Copy u8500_clk_init() ready for DT enablement

Here we're using the old clock initialisation function as a template.
It's necessary to remove all of the clk_register_clkdev() calls as
they don't make sense when booting with Device Tree.

Cc: Mike Turquette <mturquette@linaro.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/clk/ux500/Makefile              |   1 +
 drivers/clk/ux500/u8500_of_clk.c        | 381 ++++++++++++++++++++++++++++++++
 include/linux/platform_data/clk-ux500.h |   3 +
 3 files changed, 385 insertions(+)
 create mode 100644 drivers/clk/ux500/u8500_of_clk.c

(limited to 'include/linux')

diff --git a/drivers/clk/ux500/Makefile b/drivers/clk/ux500/Makefile
index c6a806ed0e8c..521483f0ba33 100644
--- a/drivers/clk/ux500/Makefile
+++ b/drivers/clk/ux500/Makefile
@@ -8,6 +8,7 @@ obj-y += clk-prcmu.o
 obj-y += clk-sysctrl.o
 
 # Clock definitions
+obj-y += u8500_of_clk.o
 obj-y += u8500_clk.o
 obj-y += u9540_clk.o
 obj-y += u8540_clk.o
diff --git a/drivers/clk/ux500/u8500_of_clk.c b/drivers/clk/ux500/u8500_of_clk.c
new file mode 100644
index 000000000000..ceebce6a624f
--- /dev/null
+++ b/drivers/clk/ux500/u8500_of_clk.c
@@ -0,0 +1,381 @@
+/*
+ * Clock definitions for u8500 platform.
+ *
+ * Copyright (C) 2012 ST-Ericsson SA
+ * Author: Ulf Hansson <ulf.hansson@linaro.org>
+ *
+ * License terms: GNU General Public License (GPL) version 2
+ */
+
+#include <linux/clk.h>
+#include <linux/clkdev.h>
+#include <linux/clk-provider.h>
+#include <linux/mfd/dbx500-prcmu.h>
+#include <linux/platform_data/clk-ux500.h>
+#include "clk.h"
+
+void u8500_of_clk_init(u32 clkrst1_base, u32 clkrst2_base, u32 clkrst3_base,
+		       u32 clkrst5_base, u32 clkrst6_base)
+{
+	struct prcmu_fw_version *fw_version;
+	const char *sgaclk_parent = NULL;
+	struct clk *clk;
+
+	/* Clock sources */
+	clk = clk_reg_prcmu_gate("soc0_pll", NULL, PRCMU_PLLSOC0,
+				CLK_IS_ROOT|CLK_IGNORE_UNUSED);
+
+	clk = clk_reg_prcmu_gate("soc1_pll", NULL, PRCMU_PLLSOC1,
+				CLK_IS_ROOT|CLK_IGNORE_UNUSED);
+
+	clk = clk_reg_prcmu_gate("ddr_pll", NULL, PRCMU_PLLDDR,
+				CLK_IS_ROOT|CLK_IGNORE_UNUSED);
+
+	/* FIXME: Add sys, ulp and int clocks here. */
+
+	clk = clk_register_fixed_rate(NULL, "rtc32k", "NULL",
+				CLK_IS_ROOT|CLK_IGNORE_UNUSED,
+				32768);
+
+	/* PRCMU clocks */
+	fw_version = prcmu_get_fw_version();
+	if (fw_version != NULL) {
+		switch (fw_version->project) {
+		case PRCMU_FW_PROJECT_U8500_C2:
+		case PRCMU_FW_PROJECT_U8520:
+		case PRCMU_FW_PROJECT_U8420:
+			sgaclk_parent = "soc0_pll";
+			break;
+		default:
+			break;
+		}
+	}
+
+	if (sgaclk_parent)
+		clk = clk_reg_prcmu_gate("sgclk", sgaclk_parent,
+					PRCMU_SGACLK, 0);
+	else
+		clk = clk_reg_prcmu_gate("sgclk", NULL,
+					PRCMU_SGACLK, CLK_IS_ROOT);
+
+	clk = clk_reg_prcmu_gate("uartclk", NULL, PRCMU_UARTCLK, CLK_IS_ROOT);
+
+	clk = clk_reg_prcmu_gate("msp02clk", NULL, PRCMU_MSP02CLK, CLK_IS_ROOT);
+
+	clk = clk_reg_prcmu_gate("msp1clk", NULL, PRCMU_MSP1CLK, CLK_IS_ROOT);
+
+	clk = clk_reg_prcmu_gate("i2cclk", NULL, PRCMU_I2CCLK, CLK_IS_ROOT);
+
+	clk = clk_reg_prcmu_gate("slimclk", NULL, PRCMU_SLIMCLK, CLK_IS_ROOT);
+
+	clk = clk_reg_prcmu_gate("per1clk", NULL, PRCMU_PER1CLK, CLK_IS_ROOT);
+
+	clk = clk_reg_prcmu_gate("per2clk", NULL, PRCMU_PER2CLK, CLK_IS_ROOT);
+
+	clk = clk_reg_prcmu_gate("per3clk", NULL, PRCMU_PER3CLK, CLK_IS_ROOT);
+
+	clk = clk_reg_prcmu_gate("per5clk", NULL, PRCMU_PER5CLK, CLK_IS_ROOT);
+
+	clk = clk_reg_prcmu_gate("per6clk", NULL, PRCMU_PER6CLK, CLK_IS_ROOT);
+
+	clk = clk_reg_prcmu_gate("per7clk", NULL, PRCMU_PER7CLK, CLK_IS_ROOT);
+
+	clk = clk_reg_prcmu_scalable("lcdclk", NULL, PRCMU_LCDCLK, 0,
+				CLK_IS_ROOT|CLK_SET_RATE_GATE);
+
+	clk = clk_reg_prcmu_opp_gate("bmlclk", NULL, PRCMU_BMLCLK, CLK_IS_ROOT);
+
+	clk = clk_reg_prcmu_scalable("hsitxclk", NULL, PRCMU_HSITXCLK, 0,
+				CLK_IS_ROOT|CLK_SET_RATE_GATE);
+
+	clk = clk_reg_prcmu_scalable("hsirxclk", NULL, PRCMU_HSIRXCLK, 0,
+				CLK_IS_ROOT|CLK_SET_RATE_GATE);
+
+	clk = clk_reg_prcmu_scalable("hdmiclk", NULL, PRCMU_HDMICLK, 0,
+				CLK_IS_ROOT|CLK_SET_RATE_GATE);
+
+	clk = clk_reg_prcmu_gate("apeatclk", NULL, PRCMU_APEATCLK, CLK_IS_ROOT);
+
+	clk = clk_reg_prcmu_gate("apetraceclk", NULL, PRCMU_APETRACECLK,
+				CLK_IS_ROOT);
+
+	clk = clk_reg_prcmu_gate("mcdeclk", NULL, PRCMU_MCDECLK, CLK_IS_ROOT);
+
+	clk = clk_reg_prcmu_opp_gate("ipi2cclk", NULL, PRCMU_IPI2CCLK,
+				CLK_IS_ROOT);
+
+	clk = clk_reg_prcmu_gate("dsialtclk", NULL, PRCMU_DSIALTCLK,
+				CLK_IS_ROOT);
+
+	clk = clk_reg_prcmu_gate("dmaclk", NULL, PRCMU_DMACLK, CLK_IS_ROOT);
+
+	clk = clk_reg_prcmu_gate("b2r2clk", NULL, PRCMU_B2R2CLK, CLK_IS_ROOT);
+
+	clk = clk_reg_prcmu_scalable("tvclk", NULL, PRCMU_TVCLK, 0,
+				CLK_IS_ROOT|CLK_SET_RATE_GATE);
+
+	clk = clk_reg_prcmu_gate("sspclk", NULL, PRCMU_SSPCLK, CLK_IS_ROOT);
+
+	clk = clk_reg_prcmu_gate("rngclk", NULL, PRCMU_RNGCLK, CLK_IS_ROOT);
+
+	clk = clk_reg_prcmu_gate("uiccclk", NULL, PRCMU_UICCCLK, CLK_IS_ROOT);
+
+	clk = clk_reg_prcmu_gate("timclk", NULL, PRCMU_TIMCLK, CLK_IS_ROOT);
+
+	clk = clk_reg_prcmu_opp_volt_scalable("sdmmcclk", NULL, PRCMU_SDMMCCLK,
+					100000000,
+					CLK_IS_ROOT|CLK_SET_RATE_GATE);
+
+	clk = clk_reg_prcmu_scalable("dsi_pll", "hdmiclk",
+				PRCMU_PLLDSI, 0, CLK_SET_RATE_GATE);
+
+
+	clk = clk_reg_prcmu_scalable("dsi0clk", "dsi_pll",
+				PRCMU_DSI0CLK, 0, CLK_SET_RATE_GATE);
+
+	clk = clk_reg_prcmu_scalable("dsi1clk", "dsi_pll",
+				PRCMU_DSI1CLK, 0, CLK_SET_RATE_GATE);
+
+	clk = clk_reg_prcmu_scalable("dsi0escclk", "tvclk",
+				PRCMU_DSI0ESCCLK, 0, CLK_SET_RATE_GATE);
+
+	clk = clk_reg_prcmu_scalable("dsi1escclk", "tvclk",
+				PRCMU_DSI1ESCCLK, 0, CLK_SET_RATE_GATE);
+
+	clk = clk_reg_prcmu_scalable("dsi2escclk", "tvclk",
+				PRCMU_DSI2ESCCLK, 0, CLK_SET_RATE_GATE);
+
+	clk = clk_reg_prcmu_scalable_rate("armss", NULL,
+				PRCMU_ARMSS, 0, CLK_IS_ROOT|CLK_IGNORE_UNUSED);
+
+	clk = clk_register_fixed_factor(NULL, "smp_twd", "armss",
+				CLK_IGNORE_UNUSED, 1, 2);
+
+	/*
+	 * FIXME: Add special handled PRCMU clocks here:
+	 * 1. clkout0yuv, use PRCMU as parent + need regulator + pinctrl.
+	 * 2. ab9540_clkout1yuv, see clkout0yuv
+	 */
+
+	/* PRCC P-clocks */
+	clk = clk_reg_prcc_pclk("p1_pclk0", "per1clk", clkrst1_base,
+				BIT(0), 0);
+
+	clk = clk_reg_prcc_pclk("p1_pclk1", "per1clk", clkrst1_base,
+				BIT(1), 0);
+
+	clk = clk_reg_prcc_pclk("p1_pclk2", "per1clk", clkrst1_base,
+				BIT(2), 0);
+
+	clk = clk_reg_prcc_pclk("p1_pclk3", "per1clk", clkrst1_base,
+				BIT(3), 0);
+
+	clk = clk_reg_prcc_pclk("p1_pclk4", "per1clk", clkrst1_base,
+				BIT(4), 0);
+
+	clk = clk_reg_prcc_pclk("p1_pclk5", "per1clk", clkrst1_base,
+				BIT(5), 0);
+
+	clk = clk_reg_prcc_pclk("p1_pclk6", "per1clk", clkrst1_base,
+				BIT(6), 0);
+
+	clk = clk_reg_prcc_pclk("p1_pclk7", "per1clk", clkrst1_base,
+				BIT(7), 0);
+
+	clk = clk_reg_prcc_pclk("p1_pclk8", "per1clk", clkrst1_base,
+				BIT(8), 0);
+
+	clk = clk_reg_prcc_pclk("p1_pclk9", "per1clk", clkrst1_base,
+				BIT(9), 0);
+
+	clk = clk_reg_prcc_pclk("p1_pclk10", "per1clk", clkrst1_base,
+				BIT(10), 0);
+
+	clk = clk_reg_prcc_pclk("p1_pclk11", "per1clk", clkrst1_base,
+				BIT(11), 0);
+
+	clk = clk_reg_prcc_pclk("p2_pclk0", "per2clk", clkrst2_base,
+				BIT(0), 0);
+
+	clk = clk_reg_prcc_pclk("p2_pclk1", "per2clk", clkrst2_base,
+				BIT(1), 0);
+
+	clk = clk_reg_prcc_pclk("p2_pclk2", "per2clk", clkrst2_base,
+				BIT(2), 0);
+
+	clk = clk_reg_prcc_pclk("p2_pclk3", "per2clk", clkrst2_base,
+				BIT(3), 0);
+
+	clk = clk_reg_prcc_pclk("p2_pclk4", "per2clk", clkrst2_base,
+				BIT(4), 0);
+
+	clk = clk_reg_prcc_pclk("p2_pclk5", "per2clk", clkrst2_base,
+				BIT(5), 0);
+
+	clk = clk_reg_prcc_pclk("p2_pclk6", "per2clk", clkrst2_base,
+				BIT(6), 0);
+
+	clk = clk_reg_prcc_pclk("p2_pclk7", "per2clk", clkrst2_base,
+				BIT(7), 0);
+
+	clk = clk_reg_prcc_pclk("p2_pclk8", "per2clk", clkrst2_base,
+				BIT(8), 0);
+
+	clk = clk_reg_prcc_pclk("p2_pclk9", "per2clk", clkrst2_base,
+				BIT(9), 0);
+
+	clk = clk_reg_prcc_pclk("p2_pclk10", "per2clk", clkrst2_base,
+				BIT(10), 0);
+
+	clk = clk_reg_prcc_pclk("p2_pclk11", "per2clk", clkrst2_base,
+				BIT(11), 0);
+
+	clk = clk_reg_prcc_pclk("p2_pclk12", "per2clk", clkrst2_base,
+				BIT(12), 0);
+
+	clk = clk_reg_prcc_pclk("p3_pclk0", "per3clk", clkrst3_base,
+				BIT(0), 0);
+
+	clk = clk_reg_prcc_pclk("p3_pclk1", "per3clk", clkrst3_base,
+				BIT(1), 0);
+
+	clk = clk_reg_prcc_pclk("p3_pclk2", "per3clk", clkrst3_base,
+				BIT(2), 0);
+
+	clk = clk_reg_prcc_pclk("p3_pclk3", "per3clk", clkrst3_base,
+				BIT(3), 0);
+
+	clk = clk_reg_prcc_pclk("p3_pclk4", "per3clk", clkrst3_base,
+				BIT(4), 0);
+
+	clk = clk_reg_prcc_pclk("p3_pclk5", "per3clk", clkrst3_base,
+				BIT(5), 0);
+
+	clk = clk_reg_prcc_pclk("p3_pclk6", "per3clk", clkrst3_base,
+				BIT(6), 0);
+
+	clk = clk_reg_prcc_pclk("p3_pclk7", "per3clk", clkrst3_base,
+				BIT(7), 0);
+
+	clk = clk_reg_prcc_pclk("p3_pclk8", "per3clk", clkrst3_base,
+				BIT(8), 0);
+
+	clk = clk_reg_prcc_pclk("p5_pclk0", "per5clk", clkrst5_base,
+				BIT(0), 0);
+
+	clk = clk_reg_prcc_pclk("p5_pclk1", "per5clk", clkrst5_base,
+				BIT(1), 0);
+
+	clk = clk_reg_prcc_pclk("p6_pclk0", "per6clk", clkrst6_base,
+				BIT(0), 0);
+
+	clk = clk_reg_prcc_pclk("p6_pclk1", "per6clk", clkrst6_base,
+				BIT(1), 0);
+
+	clk = clk_reg_prcc_pclk("p6_pclk2", "per6clk", clkrst6_base,
+				BIT(2), 0);
+
+	clk = clk_reg_prcc_pclk("p6_pclk3", "per6clk", clkrst6_base,
+				BIT(3), 0);
+
+	clk = clk_reg_prcc_pclk("p6_pclk4", "per6clk", clkrst6_base,
+				BIT(4), 0);
+
+	clk = clk_reg_prcc_pclk("p6_pclk5", "per6clk", clkrst6_base,
+				BIT(5), 0);
+
+	clk = clk_reg_prcc_pclk("p6_pclk6", "per6clk", clkrst6_base,
+				BIT(6), 0);
+
+	clk = clk_reg_prcc_pclk("p6_pclk7", "per6clk", clkrst6_base,
+				BIT(7), 0);
+
+	/* PRCC K-clocks
+	 *
+	 * FIXME: Some drivers requires PERPIH[n| to be automatically enabled
+	 * by enabling just the K-clock, even if it is not a valid parent to
+	 * the K-clock. Until drivers get fixed we might need some kind of
+	 * "parent muxed join".
+	 */
+
+	/* Periph1 */
+	clk = clk_reg_prcc_kclk("p1_uart0_kclk", "uartclk",
+			clkrst1_base, BIT(0), CLK_SET_RATE_GATE);
+
+	clk = clk_reg_prcc_kclk("p1_uart1_kclk", "uartclk",
+			clkrst1_base, BIT(1), CLK_SET_RATE_GATE);
+
+	clk = clk_reg_prcc_kclk("p1_i2c1_kclk", "i2cclk",
+			clkrst1_base, BIT(2), CLK_SET_RATE_GATE);
+
+	clk = clk_reg_prcc_kclk("p1_msp0_kclk", "msp02clk",
+			clkrst1_base, BIT(3), CLK_SET_RATE_GATE);
+
+	clk = clk_reg_prcc_kclk("p1_msp1_kclk", "msp1clk",
+			clkrst1_base, BIT(4), CLK_SET_RATE_GATE);
+
+	clk = clk_reg_prcc_kclk("p1_sdi0_kclk", "sdmmcclk",
+			clkrst1_base, BIT(5), CLK_SET_RATE_GATE);
+
+	clk = clk_reg_prcc_kclk("p1_i2c2_kclk", "i2cclk",
+			clkrst1_base, BIT(6), CLK_SET_RATE_GATE);
+
+	clk = clk_reg_prcc_kclk("p1_slimbus0_kclk", "slimclk",
+			clkrst1_base, BIT(8), CLK_SET_RATE_GATE);
+
+	clk = clk_reg_prcc_kclk("p1_i2c4_kclk", "i2cclk",
+			clkrst1_base, BIT(9), CLK_SET_RATE_GATE);
+
+	clk = clk_reg_prcc_kclk("p1_msp3_kclk", "msp1clk",
+			clkrst1_base, BIT(10), CLK_SET_RATE_GATE);
+
+	/* Periph2 */
+	clk = clk_reg_prcc_kclk("p2_i2c3_kclk", "i2cclk",
+			clkrst2_base, BIT(0), CLK_SET_RATE_GATE);
+
+	clk = clk_reg_prcc_kclk("p2_sdi4_kclk", "sdmmcclk",
+			clkrst2_base, BIT(2), CLK_SET_RATE_GATE);
+
+	clk = clk_reg_prcc_kclk("p2_msp2_kclk", "msp02clk",
+			clkrst2_base, BIT(3), CLK_SET_RATE_GATE);
+
+	clk = clk_reg_prcc_kclk("p2_sdi1_kclk", "sdmmcclk",
+			clkrst2_base, BIT(4), CLK_SET_RATE_GATE);
+
+	clk = clk_reg_prcc_kclk("p2_sdi3_kclk", "sdmmcclk",
+			clkrst2_base, BIT(5), CLK_SET_RATE_GATE);
+
+	/* Note that rate is received from parent. */
+	clk = clk_reg_prcc_kclk("p2_ssirx_kclk", "hsirxclk",
+			clkrst2_base, BIT(6),
+			CLK_SET_RATE_GATE|CLK_SET_RATE_PARENT);
+	clk = clk_reg_prcc_kclk("p2_ssitx_kclk", "hsitxclk",
+			clkrst2_base, BIT(7),
+			CLK_SET_RATE_GATE|CLK_SET_RATE_PARENT);
+
+	/* Periph3 */
+	clk = clk_reg_prcc_kclk("p3_ssp0_kclk", "sspclk",
+			clkrst3_base, BIT(1), CLK_SET_RATE_GATE);
+
+	clk = clk_reg_prcc_kclk("p3_ssp1_kclk", "sspclk",
+			clkrst3_base, BIT(2), CLK_SET_RATE_GATE);
+
+	clk = clk_reg_prcc_kclk("p3_i2c0_kclk", "i2cclk",
+			clkrst3_base, BIT(3), CLK_SET_RATE_GATE);
+
+	clk = clk_reg_prcc_kclk("p3_sdi2_kclk", "sdmmcclk",
+			clkrst3_base, BIT(4), CLK_SET_RATE_GATE);
+
+	clk = clk_reg_prcc_kclk("p3_ske_kclk", "rtc32k",
+			clkrst3_base, BIT(5), CLK_SET_RATE_GATE);
+
+	clk = clk_reg_prcc_kclk("p3_uart2_kclk", "uartclk",
+			clkrst3_base, BIT(6), CLK_SET_RATE_GATE);
+
+	clk = clk_reg_prcc_kclk("p3_sdi5_kclk", "sdmmcclk",
+			clkrst3_base, BIT(7), CLK_SET_RATE_GATE);
+
+	/* Periph6 */
+	clk = clk_reg_prcc_kclk("p3_rng_kclk", "rngclk",
+			clkrst6_base, BIT(0), CLK_SET_RATE_GATE);
+}
diff --git a/include/linux/platform_data/clk-ux500.h b/include/linux/platform_data/clk-ux500.h
index 9d98f3aaa16c..97baf831e071 100644
--- a/include/linux/platform_data/clk-ux500.h
+++ b/include/linux/platform_data/clk-ux500.h
@@ -10,6 +10,9 @@
 #ifndef __CLK_UX500_H
 #define __CLK_UX500_H
 
+void u8500_of_clk_init(u32 clkrst1_base, u32 clkrst2_base, u32 clkrst3_base,
+		       u32 clkrst5_base, u32 clkrst6_base);
+
 void u8500_clk_init(u32 clkrst1_base, u32 clkrst2_base, u32 clkrst3_base,
 		    u32 clkrst5_base, u32 clkrst6_base);
 void u9540_clk_init(u32 clkrst1_base, u32 clkrst2_base, u32 clkrst3_base,
-- 
cgit v1.2.3


From ad93220de7da1f5bd8f9e4be1f1eeefba5eed0ad Mon Sep 17 00:00:00 2001
From: Dong Aisheng <b29396@freescale.com>
Date: Fri, 13 Sep 2013 19:11:35 +0800
Subject: mmc: sdhci-esdhc-imx: change pinctrl state according to uhs mode

Without proper pinctrl state, the card may not be able to work
on high speed stablely. e.g. SDR104.

This patch add pinctrl state switch code according to different
uhs mode include 100mhz sate, 200mhz sate and normal state
(50Mhz and below).

Signed-off-by: Dong Aisheng <b29396@freescale.com>
Acked-by: Shawn Guo <shawn.guo@linaro.org>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/host/sdhci-esdhc-imx.c          | 96 ++++++++++++++++++++++++++++-
 include/linux/platform_data/mmc-esdhc-imx.h |  4 ++
 2 files changed, 99 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c
index f906c206901d..ed00d6ddd5fd 100644
--- a/drivers/mmc/host/sdhci-esdhc-imx.c
+++ b/drivers/mmc/host/sdhci-esdhc-imx.c
@@ -53,6 +53,10 @@
 
 #define ESDHC_TUNING_BLOCK_PATTERN_LEN	64
 
+/* pinctrl state */
+#define ESDHC_PINCTRL_STATE_100MHZ	"state_100mhz"
+#define ESDHC_PINCTRL_STATE_200MHZ	"state_200mhz"
+
 /*
  * Our interpretation of the SDHCI_HOST_CONTROL register
  */
@@ -94,6 +98,9 @@ struct pltfm_imx_data {
 	u32 scratchpad;
 	enum imx_esdhc_type devtype;
 	struct pinctrl *pinctrl;
+	struct pinctrl_state *pins_default;
+	struct pinctrl_state *pins_100mhz;
+	struct pinctrl_state *pins_200mhz;
 	struct esdhc_platform_data boarddata;
 	struct clk *clk_ipg;
 	struct clk *clk_ahb;
@@ -693,6 +700,62 @@ static int esdhc_executing_tuning(struct sdhci_host *host, u32 opcode)
 	return ret;
 }
 
+static int esdhc_change_pinstate(struct sdhci_host *host,
+						unsigned int uhs)
+{
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct pltfm_imx_data *imx_data = pltfm_host->priv;
+	struct pinctrl_state *pinctrl;
+
+	dev_dbg(mmc_dev(host->mmc), "change pinctrl state for uhs %d\n", uhs);
+
+	if (IS_ERR(imx_data->pinctrl) ||
+		IS_ERR(imx_data->pins_default) ||
+		IS_ERR(imx_data->pins_100mhz) ||
+		IS_ERR(imx_data->pins_200mhz))
+		return -EINVAL;
+
+	switch (uhs) {
+	case MMC_TIMING_UHS_SDR50:
+		pinctrl = imx_data->pins_100mhz;
+		break;
+	case MMC_TIMING_UHS_SDR104:
+		pinctrl = imx_data->pins_200mhz;
+		break;
+	default:
+		/* back to default state for other legacy timing */
+		pinctrl = imx_data->pins_default;
+	}
+
+	return pinctrl_select_state(imx_data->pinctrl, pinctrl);
+}
+
+static int esdhc_set_uhs_signaling(struct sdhci_host *host, unsigned int uhs)
+{
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct pltfm_imx_data *imx_data = pltfm_host->priv;
+
+	switch (uhs) {
+	case MMC_TIMING_UHS_SDR12:
+		imx_data->uhs_mode = SDHCI_CTRL_UHS_SDR12;
+		break;
+	case MMC_TIMING_UHS_SDR25:
+		imx_data->uhs_mode = SDHCI_CTRL_UHS_SDR25;
+		break;
+	case MMC_TIMING_UHS_SDR50:
+		imx_data->uhs_mode = SDHCI_CTRL_UHS_SDR50;
+		break;
+	case MMC_TIMING_UHS_SDR104:
+		imx_data->uhs_mode = SDHCI_CTRL_UHS_SDR104;
+		break;
+	case MMC_TIMING_UHS_DDR50:
+		imx_data->uhs_mode = SDHCI_CTRL_UHS_DDR50;
+		break;
+	}
+
+	return esdhc_change_pinstate(host, uhs);
+}
+
 static const struct sdhci_ops sdhci_esdhc_ops = {
 	.read_l = esdhc_readl_le,
 	.read_w = esdhc_readw_le,
@@ -704,6 +767,7 @@ static const struct sdhci_ops sdhci_esdhc_ops = {
 	.get_min_clock = esdhc_pltfm_get_min_clock,
 	.get_ro = esdhc_pltfm_get_ro,
 	.platform_bus_width = esdhc_pltfm_bus_width,
+	.set_uhs_signaling = esdhc_set_uhs_signaling,
 	.platform_execute_tuning = esdhc_executing_tuning,
 };
 
@@ -746,6 +810,11 @@ sdhci_esdhc_imx_probe_dt(struct platform_device *pdev,
 
 	of_property_read_u32(np, "max-frequency", &boarddata->f_max);
 
+	if (of_find_property(np, "no-1-8-v", NULL))
+		boarddata->support_vsel = false;
+	else
+		boarddata->support_vsel = true;
+
 	return 0;
 }
 #else
@@ -808,12 +877,20 @@ static int sdhci_esdhc_imx_probe(struct platform_device *pdev)
 	clk_prepare_enable(imx_data->clk_ipg);
 	clk_prepare_enable(imx_data->clk_ahb);
 
-	imx_data->pinctrl = devm_pinctrl_get_select_default(&pdev->dev);
+	imx_data->pinctrl = devm_pinctrl_get(&pdev->dev);
 	if (IS_ERR(imx_data->pinctrl)) {
 		err = PTR_ERR(imx_data->pinctrl);
 		goto disable_clk;
 	}
 
+	imx_data->pins_default = pinctrl_lookup_state(imx_data->pinctrl,
+						PINCTRL_STATE_DEFAULT);
+	if (IS_ERR(imx_data->pins_default)) {
+		err = PTR_ERR(imx_data->pins_default);
+		dev_err(mmc_dev(host->mmc), "could not get default state\n");
+		goto disable_clk;
+	}
+
 	host->quirks |= SDHCI_QUIRK_BROKEN_TIMEOUT_VAL;
 
 	if (is_imx25_esdhc(imx_data) || is_imx35_esdhc(imx_data))
@@ -890,6 +967,23 @@ static int sdhci_esdhc_imx_probe(struct platform_device *pdev)
 		break;
 	}
 
+	/* sdr50 and sdr104 needs work on 1.8v signal voltage */
+	if ((boarddata->support_vsel) && is_imx6q_usdhc(imx_data)) {
+		imx_data->pins_100mhz = pinctrl_lookup_state(imx_data->pinctrl,
+						ESDHC_PINCTRL_STATE_100MHZ);
+		imx_data->pins_200mhz = pinctrl_lookup_state(imx_data->pinctrl,
+						ESDHC_PINCTRL_STATE_200MHZ);
+		if (IS_ERR(imx_data->pins_100mhz) ||
+				IS_ERR(imx_data->pins_200mhz)) {
+			dev_warn(mmc_dev(host->mmc),
+				"could not get ultra high speed state, work on normal mode\n");
+			/* fall back to not support uhs by specify no 1.8v quirk */
+			host->quirks2 |= SDHCI_QUIRK2_NO_1_8_V;
+		}
+	} else {
+		host->quirks2 |= SDHCI_QUIRK2_NO_1_8_V;
+	}
+
 	err = sdhci_add_host(host);
 	if (err)
 		goto disable_clk;
diff --git a/include/linux/platform_data/mmc-esdhc-imx.h b/include/linux/platform_data/mmc-esdhc-imx.h
index d44912d81578..a0f5a8f9b3bc 100644
--- a/include/linux/platform_data/mmc-esdhc-imx.h
+++ b/include/linux/platform_data/mmc-esdhc-imx.h
@@ -10,6 +10,8 @@
 #ifndef __ASM_ARCH_IMX_ESDHC_H
 #define __ASM_ARCH_IMX_ESDHC_H
 
+#include <linux/types.h>
+
 enum wp_types {
 	ESDHC_WP_NONE,		/* no WP, neither controller nor gpio */
 	ESDHC_WP_CONTROLLER,	/* mmc controller internal WP */
@@ -32,6 +34,7 @@ enum cd_types {
  * @cd_gpio:	gpio for card_detect interrupt
  * @wp_type:	type of write_protect method (see wp_types enum above)
  * @cd_type:	type of card_detect method (see cd_types enum above)
+ * @support_vsel:  indicate it supports 1.8v switching
  */
 
 struct esdhc_platform_data {
@@ -41,5 +44,6 @@ struct esdhc_platform_data {
 	enum cd_types cd_type;
 	int max_bus_width;
 	unsigned int f_max;
+	bool support_vsel;
 };
 #endif /* __ASM_ARCH_IMX_ESDHC_H */
-- 
cgit v1.2.3


From 03f3a9107f5e848a42f25e3ab133db9f3b6546c7 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Fri, 13 Sep 2013 11:32:49 -0700
Subject: hv: use dev_groups for device attributes

This patch is the first in a series that moves the hv bus code to use the
dev_groups field instead of dev_attrs, as dev_attrs is going away in future
kernel releases.

It moves the id sysfs file to the dev_groups structure, and creates the needed
show/store functions, instead of relying on one "universal" function for this.
By doing this, it removes the need for this to be in a temporary structure.

Tested-by: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/channel.c   |  1 -
 drivers/hv/vmbus_drv.c | 23 ++++++++++++++++++-----
 include/linux/hyperv.h |  1 -
 3 files changed, 18 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index 6de6c98ce6eb..b58a1785d038 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -69,7 +69,6 @@ void vmbus_get_debug_info(struct vmbus_channel *channel,
 	u8 monitor_group = (u8)channel->offermsg.monitorid / 32;
 	u8 monitor_offset = (u8)channel->offermsg.monitorid % 32;
 
-	debuginfo->relid = channel->offermsg.child_relid;
 	debuginfo->state = channel->state;
 	memcpy(&debuginfo->interfacetype,
 	       &channel->offermsg.offer.if_type, sizeof(uuid_le));
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index f9fe46f52cfa..5c21b22860af 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -47,7 +47,6 @@ static struct completion probe_event;
 static int irq;
 
 struct hv_device_info {
-	u32 chn_id;
 	u32 chn_state;
 	uuid_le chn_type;
 	uuid_le chn_instance;
@@ -83,7 +82,6 @@ static void get_channel_info(struct hv_device *device,
 
 	vmbus_get_debug_info(device->channel, &debug_info);
 
-	info->chn_id = debug_info.relid;
 	info->chn_state = debug_info.state;
 	memcpy(&info->chn_type, &debug_info.interfacetype,
 	       sizeof(uuid_le));
@@ -156,8 +154,6 @@ static ssize_t vmbus_show_device_attr(struct device *dev,
 		ret = sprintf(buf, "vmbus:%s\n", alias_name);
 	} else if (!strcmp(dev_attr->attr.name, "state")) {
 		ret = sprintf(buf, "%d\n", device_info->chn_state);
-	} else if (!strcmp(dev_attr->attr.name, "id")) {
-		ret = sprintf(buf, "%d\n", device_info->chn_id);
 	} else if (!strcmp(dev_attr->attr.name, "out_intr_mask")) {
 		ret = sprintf(buf, "%d\n", device_info->outbound.int_mask);
 	} else if (!strcmp(dev_attr->attr.name, "out_read_index")) {
@@ -204,9 +200,25 @@ static ssize_t vmbus_show_device_attr(struct device *dev,
 	return ret;
 }
 
+static ssize_t id_show(struct device *dev, struct device_attribute *dev_attr,
+		       char *buf)
+{
+	struct hv_device *hv_dev = device_to_hv_device(dev);
+
+	if (!hv_dev->channel)
+		return -ENODEV;
+	return sprintf(buf, "%d\n", hv_dev->channel->offermsg.child_relid);
+}
+static DEVICE_ATTR_RO(id);
+
+static struct attribute *vmbus_attrs[] = {
+	&dev_attr_id.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(vmbus);
+
 /* Set up per device attributes in /sys/bus/vmbus/devices/<bus device> */
 static struct device_attribute vmbus_device_attrs[] = {
-	__ATTR(id, S_IRUGO, vmbus_show_device_attr, NULL),
 	__ATTR(state, S_IRUGO, vmbus_show_device_attr, NULL),
 	__ATTR(class_id, S_IRUGO, vmbus_show_device_attr, NULL),
 	__ATTR(device_id, S_IRUGO, vmbus_show_device_attr, NULL),
@@ -384,6 +396,7 @@ static struct bus_type  hv_bus = {
 	.probe =		vmbus_probe,
 	.uevent =		vmbus_uevent,
 	.dev_attrs =	vmbus_device_attrs,
+	.dev_groups =		vmbus_groups,
 };
 
 static const char *driver_name = "hyperv";
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index a3b8b2e2d244..45e9b65e6ab6 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -900,7 +900,6 @@ enum vmbus_channel_state {
 };
 
 struct vmbus_channel_debug_info {
-	u32 relid;
 	enum vmbus_channel_state state;
 	uuid_le interfacetype;
 	uuid_le interface_instance;
-- 
cgit v1.2.3


From a8fb5f3d582d3b4997d9449ceb82a0223d169486 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Fri, 13 Sep 2013 11:32:50 -0700
Subject: hv: move "state" bus attribute to dev_groups

This moves the "state" bus attribute to the dev_groups structure,
removing the need for it to be in a temporary structure.

Tested-by: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/channel.c   |  1 -
 drivers/hv/vmbus_drv.c | 17 ++++++++++++-----
 include/linux/hyperv.h |  1 -
 3 files changed, 12 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index b58a1785d038..64f19f226c4e 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -69,7 +69,6 @@ void vmbus_get_debug_info(struct vmbus_channel *channel,
 	u8 monitor_group = (u8)channel->offermsg.monitorid / 32;
 	u8 monitor_offset = (u8)channel->offermsg.monitorid % 32;
 
-	debuginfo->state = channel->state;
 	memcpy(&debuginfo->interfacetype,
 	       &channel->offermsg.offer.if_type, sizeof(uuid_le));
 	memcpy(&debuginfo->interface_instance,
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 5c21b22860af..1e5bc9c49c63 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -47,7 +47,6 @@ static struct completion probe_event;
 static int irq;
 
 struct hv_device_info {
-	u32 chn_state;
 	uuid_le chn_type;
 	uuid_le chn_instance;
 
@@ -82,7 +81,6 @@ static void get_channel_info(struct hv_device *device,
 
 	vmbus_get_debug_info(device->channel, &debug_info);
 
-	info->chn_state = debug_info.state;
 	memcpy(&info->chn_type, &debug_info.interfacetype,
 	       sizeof(uuid_le));
 	memcpy(&info->chn_instance, &debug_info.interface_instance,
@@ -152,8 +150,6 @@ static ssize_t vmbus_show_device_attr(struct device *dev,
 	} else if (!strcmp(dev_attr->attr.name, "modalias")) {
 		print_alias_name(hv_dev, alias_name);
 		ret = sprintf(buf, "vmbus:%s\n", alias_name);
-	} else if (!strcmp(dev_attr->attr.name, "state")) {
-		ret = sprintf(buf, "%d\n", device_info->chn_state);
 	} else if (!strcmp(dev_attr->attr.name, "out_intr_mask")) {
 		ret = sprintf(buf, "%d\n", device_info->outbound.int_mask);
 	} else if (!strcmp(dev_attr->attr.name, "out_read_index")) {
@@ -211,15 +207,26 @@ static ssize_t id_show(struct device *dev, struct device_attribute *dev_attr,
 }
 static DEVICE_ATTR_RO(id);
 
+static ssize_t state_show(struct device *dev, struct device_attribute *dev_attr,
+			  char *buf)
+{
+	struct hv_device *hv_dev = device_to_hv_device(dev);
+
+	if (!hv_dev->channel)
+		return -ENODEV;
+	return sprintf(buf, "%d\n", hv_dev->channel->state);
+}
+static DEVICE_ATTR_RO(state);
+
 static struct attribute *vmbus_attrs[] = {
 	&dev_attr_id.attr,
+	&dev_attr_state.attr,
 	NULL,
 };
 ATTRIBUTE_GROUPS(vmbus);
 
 /* Set up per device attributes in /sys/bus/vmbus/devices/<bus device> */
 static struct device_attribute vmbus_device_attrs[] = {
-	__ATTR(state, S_IRUGO, vmbus_show_device_attr, NULL),
 	__ATTR(class_id, S_IRUGO, vmbus_show_device_attr, NULL),
 	__ATTR(device_id, S_IRUGO, vmbus_show_device_attr, NULL),
 	__ATTR(monitor_id, S_IRUGO, vmbus_show_device_attr, NULL),
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 45e9b65e6ab6..b350a8cfcff6 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -900,7 +900,6 @@ enum vmbus_channel_state {
 };
 
 struct vmbus_channel_debug_info {
-	enum vmbus_channel_state state;
 	uuid_le interfacetype;
 	uuid_le interface_instance;
 	u32 monitorid;
-- 
cgit v1.2.3


From 5ffd00e241ea30fdd907d56419170a1e1338664c Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Fri, 13 Sep 2013 11:32:51 -0700
Subject: hv: move "monitor_id" bus attribute to dev_groups

This moves the "state" bus attribute to the dev_groups structure,
removing the need for it to be in a temporary structure.

Tested-by: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/channel.c   |  2 --
 drivers/hv/vmbus_drv.c | 18 ++++++++++++------
 include/linux/hyperv.h |  1 -
 3 files changed, 12 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index 64f19f226c4e..f26a5d252547 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -77,8 +77,6 @@ void vmbus_get_debug_info(struct vmbus_channel *channel,
 
 	monitorpage = (struct hv_monitor_page *)vmbus_connection.monitor_pages;
 
-	debuginfo->monitorid = channel->offermsg.monitorid;
-
 	debuginfo->servermonitor_pending =
 			monitorpage->trigger_group[monitor_group].pending;
 	debuginfo->servermonitor_latency =
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 1e5bc9c49c63..2114ecb83c93 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -50,7 +50,6 @@ struct hv_device_info {
 	uuid_le chn_type;
 	uuid_le chn_instance;
 
-	u32 monitor_id;
 	u32 server_monitor_pending;
 	u32 server_monitor_latency;
 	u32 server_monitor_conn_id;
@@ -86,8 +85,6 @@ static void get_channel_info(struct hv_device *device,
 	memcpy(&info->chn_instance, &debug_info.interface_instance,
 	       sizeof(uuid_le));
 
-	info->monitor_id = debug_info.monitorid;
-
 	info->server_monitor_pending = debug_info.servermonitor_pending;
 	info->server_monitor_latency = debug_info.servermonitor_latency;
 	info->server_monitor_conn_id = debug_info.servermonitor_connectionid;
@@ -174,8 +171,6 @@ static ssize_t vmbus_show_device_attr(struct device *dev,
 	} else if (!strcmp(dev_attr->attr.name, "in_write_bytes_avail")) {
 		ret = sprintf(buf, "%d\n",
 			       device_info->inbound.bytes_avail_towrite);
-	} else if (!strcmp(dev_attr->attr.name, "monitor_id")) {
-		ret = sprintf(buf, "%d\n", device_info->monitor_id);
 	} else if (!strcmp(dev_attr->attr.name, "server_monitor_pending")) {
 		ret = sprintf(buf, "%d\n", device_info->server_monitor_pending);
 	} else if (!strcmp(dev_attr->attr.name, "server_monitor_latency")) {
@@ -218,9 +213,21 @@ static ssize_t state_show(struct device *dev, struct device_attribute *dev_attr,
 }
 static DEVICE_ATTR_RO(state);
 
+static ssize_t monitor_id_show(struct device *dev,
+			       struct device_attribute *dev_attr, char *buf)
+{
+	struct hv_device *hv_dev = device_to_hv_device(dev);
+
+	if (!hv_dev->channel)
+		return -ENODEV;
+	return sprintf(buf, "%d\n", hv_dev->channel->offermsg.monitorid);
+}
+static DEVICE_ATTR_RO(monitor_id);
+
 static struct attribute *vmbus_attrs[] = {
 	&dev_attr_id.attr,
 	&dev_attr_state.attr,
+	&dev_attr_monitor_id.attr,
 	NULL,
 };
 ATTRIBUTE_GROUPS(vmbus);
@@ -229,7 +236,6 @@ ATTRIBUTE_GROUPS(vmbus);
 static struct device_attribute vmbus_device_attrs[] = {
 	__ATTR(class_id, S_IRUGO, vmbus_show_device_attr, NULL),
 	__ATTR(device_id, S_IRUGO, vmbus_show_device_attr, NULL),
-	__ATTR(monitor_id, S_IRUGO, vmbus_show_device_attr, NULL),
 	__ATTR(modalias, S_IRUGO, vmbus_show_device_attr, NULL),
 
 	__ATTR(server_monitor_pending, S_IRUGO, vmbus_show_device_attr, NULL),
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index b350a8cfcff6..888a8e533d61 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -902,7 +902,6 @@ enum vmbus_channel_state {
 struct vmbus_channel_debug_info {
 	uuid_le interfacetype;
 	uuid_le interface_instance;
-	u32 monitorid;
 	u32 servermonitor_pending;
 	u32 servermonitor_latency;
 	u32 servermonitor_connectionid;
-- 
cgit v1.2.3


From 68234c049cc1637a8898ebb3743c8587560929b7 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Fri, 13 Sep 2013 11:32:53 -0700
Subject: hv: move "class_id" bus attribute to dev_groups

This moves the "class_id" bus attribute to the dev_groups structure,
removing the need for it to be in a temporary structure.

Tested-by: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/channel.c   |  2 --
 drivers/hv/vmbus_drv.c | 22 +++++++++++++++-------
 include/linux/hyperv.h |  1 -
 3 files changed, 15 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index f26a5d252547..df9499c1ef32 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -69,8 +69,6 @@ void vmbus_get_debug_info(struct vmbus_channel *channel,
 	u8 monitor_group = (u8)channel->offermsg.monitorid / 32;
 	u8 monitor_offset = (u8)channel->offermsg.monitorid % 32;
 
-	memcpy(&debuginfo->interfacetype,
-	       &channel->offermsg.offer.if_type, sizeof(uuid_le));
 	memcpy(&debuginfo->interface_instance,
 	       &channel->offermsg.offer.if_instance,
 	       sizeof(uuid_le));
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index e3f4370036ab..48258ae3467d 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -47,7 +47,6 @@ static struct completion probe_event;
 static int irq;
 
 struct hv_device_info {
-	uuid_le chn_type;
 	uuid_le chn_instance;
 
 	u32 server_monitor_pending;
@@ -80,8 +79,6 @@ static void get_channel_info(struct hv_device *device,
 
 	vmbus_get_debug_info(device->channel, &debug_info);
 
-	memcpy(&info->chn_type, &debug_info.interfacetype,
-	       sizeof(uuid_le));
 	memcpy(&info->chn_instance, &debug_info.interface_instance,
 	       sizeof(uuid_le));
 
@@ -139,9 +136,7 @@ static ssize_t vmbus_show_device_attr(struct device *dev,
 
 	get_channel_info(hv_dev, device_info);
 
-	if (!strcmp(dev_attr->attr.name, "class_id")) {
-		ret = sprintf(buf, "{%pUl}\n", device_info->chn_type.b);
-	} else if (!strcmp(dev_attr->attr.name, "device_id")) {
+	if (!strcmp(dev_attr->attr.name, "device_id")) {
 		ret = sprintf(buf, "{%pUl}\n", device_info->chn_instance.b);
 	} else if (!strcmp(dev_attr->attr.name, "out_intr_mask")) {
 		ret = sprintf(buf, "%d\n", device_info->outbound.int_mask);
@@ -220,6 +215,18 @@ static ssize_t monitor_id_show(struct device *dev,
 }
 static DEVICE_ATTR_RO(monitor_id);
 
+static ssize_t class_id_show(struct device *dev,
+			       struct device_attribute *dev_attr, char *buf)
+{
+	struct hv_device *hv_dev = device_to_hv_device(dev);
+
+	if (!hv_dev->channel)
+		return -ENODEV;
+	return sprintf(buf, "{%pUl}\n",
+		       hv_dev->channel->offermsg.offer.if_type.b);
+}
+static DEVICE_ATTR_RO(class_id);
+
 static ssize_t modalias_show(struct device *dev,
 			     struct device_attribute *dev_attr, char *buf)
 {
@@ -231,10 +238,12 @@ static ssize_t modalias_show(struct device *dev,
 }
 static DEVICE_ATTR_RO(modalias);
 
+
 static struct attribute *vmbus_attrs[] = {
 	&dev_attr_id.attr,
 	&dev_attr_state.attr,
 	&dev_attr_monitor_id.attr,
+	&dev_attr_class_id.attr,
 	&dev_attr_modalias.attr,
 	NULL,
 };
@@ -242,7 +251,6 @@ ATTRIBUTE_GROUPS(vmbus);
 
 /* Set up per device attributes in /sys/bus/vmbus/devices/<bus device> */
 static struct device_attribute vmbus_device_attrs[] = {
-	__ATTR(class_id, S_IRUGO, vmbus_show_device_attr, NULL),
 	__ATTR(device_id, S_IRUGO, vmbus_show_device_attr, NULL),
 
 	__ATTR(server_monitor_pending, S_IRUGO, vmbus_show_device_attr, NULL),
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 888a8e533d61..8ccf6f686272 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -900,7 +900,6 @@ enum vmbus_channel_state {
 };
 
 struct vmbus_channel_debug_info {
-	uuid_le interfacetype;
 	uuid_le interface_instance;
 	u32 servermonitor_pending;
 	u32 servermonitor_latency;
-- 
cgit v1.2.3


From 7c55e1d0e64cf5acd8cf4a25927e2c7c322063aa Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Fri, 13 Sep 2013 11:32:54 -0700
Subject: hv: move "device_id" bus attribute to dev_groups

This moves the "device_id" bus attribute to the dev_groups structure,
removing the need for it to be in a temporary structure.

Tested-by: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/channel.c   |  4 ----
 drivers/hv/vmbus_drv.c | 24 ++++++++++++++----------
 include/linux/hyperv.h |  1 -
 3 files changed, 14 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index df9499c1ef32..dde30b48c56c 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -69,10 +69,6 @@ void vmbus_get_debug_info(struct vmbus_channel *channel,
 	u8 monitor_group = (u8)channel->offermsg.monitorid / 32;
 	u8 monitor_offset = (u8)channel->offermsg.monitorid % 32;
 
-	memcpy(&debuginfo->interface_instance,
-	       &channel->offermsg.offer.if_instance,
-	       sizeof(uuid_le));
-
 	monitorpage = (struct hv_monitor_page *)vmbus_connection.monitor_pages;
 
 	debuginfo->servermonitor_pending =
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 48258ae3467d..944dc4b0b54d 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -47,8 +47,6 @@ static struct completion probe_event;
 static int irq;
 
 struct hv_device_info {
-	uuid_le chn_instance;
-
 	u32 server_monitor_pending;
 	u32 server_monitor_latency;
 	u32 server_monitor_conn_id;
@@ -79,9 +77,6 @@ static void get_channel_info(struct hv_device *device,
 
 	vmbus_get_debug_info(device->channel, &debug_info);
 
-	memcpy(&info->chn_instance, &debug_info.interface_instance,
-	       sizeof(uuid_le));
-
 	info->server_monitor_pending = debug_info.servermonitor_pending;
 	info->server_monitor_latency = debug_info.servermonitor_latency;
 	info->server_monitor_conn_id = debug_info.servermonitor_connectionid;
@@ -136,9 +131,7 @@ static ssize_t vmbus_show_device_attr(struct device *dev,
 
 	get_channel_info(hv_dev, device_info);
 
-	if (!strcmp(dev_attr->attr.name, "device_id")) {
-		ret = sprintf(buf, "{%pUl}\n", device_info->chn_instance.b);
-	} else if (!strcmp(dev_attr->attr.name, "out_intr_mask")) {
+	if (!strcmp(dev_attr->attr.name, "out_intr_mask")) {
 		ret = sprintf(buf, "%d\n", device_info->outbound.int_mask);
 	} else if (!strcmp(dev_attr->attr.name, "out_read_index")) {
 		ret = sprintf(buf, "%d\n", device_info->outbound.read_idx);
@@ -227,6 +220,18 @@ static ssize_t class_id_show(struct device *dev,
 }
 static DEVICE_ATTR_RO(class_id);
 
+static ssize_t device_id_show(struct device *dev,
+			      struct device_attribute *dev_attr, char *buf)
+{
+	struct hv_device *hv_dev = device_to_hv_device(dev);
+
+	if (!hv_dev->channel)
+		return -ENODEV;
+	return sprintf(buf, "{%pUl}\n",
+		       hv_dev->channel->offermsg.offer.if_instance.b);
+}
+static DEVICE_ATTR_RO(device_id);
+
 static ssize_t modalias_show(struct device *dev,
 			     struct device_attribute *dev_attr, char *buf)
 {
@@ -244,6 +249,7 @@ static struct attribute *vmbus_attrs[] = {
 	&dev_attr_state.attr,
 	&dev_attr_monitor_id.attr,
 	&dev_attr_class_id.attr,
+	&dev_attr_device_id.attr,
 	&dev_attr_modalias.attr,
 	NULL,
 };
@@ -251,8 +257,6 @@ ATTRIBUTE_GROUPS(vmbus);
 
 /* Set up per device attributes in /sys/bus/vmbus/devices/<bus device> */
 static struct device_attribute vmbus_device_attrs[] = {
-	__ATTR(device_id, S_IRUGO, vmbus_show_device_attr, NULL),
-
 	__ATTR(server_monitor_pending, S_IRUGO, vmbus_show_device_attr, NULL),
 	__ATTR(server_monitor_latency, S_IRUGO, vmbus_show_device_attr, NULL),
 	__ATTR(server_monitor_conn_id, S_IRUGO, vmbus_show_device_attr, NULL),
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 8ccf6f686272..687c01b85037 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -900,7 +900,6 @@ enum vmbus_channel_state {
 };
 
 struct vmbus_channel_debug_info {
-	uuid_le interface_instance;
 	u32 servermonitor_pending;
 	u32 servermonitor_latency;
 	u32 servermonitor_connectionid;
-- 
cgit v1.2.3


From 76c52bbe5e5ffc6812dcd49729c09f5a207b4a9a Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Fri, 13 Sep 2013 11:32:56 -0700
Subject: hv: move "client/server_monitor_pending" bus attributes to dev_groups

This moves the "client_monitor_pending" and "server_monitor_pending" bus
attributes to the dev_groups structure, removing the need for it to be
in a temporary structure.

Tested-by: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/channel.c   |  4 ----
 drivers/hv/vmbus_drv.c | 56 +++++++++++++++++++++++++++++++++++++++++---------
 include/linux/hyperv.h |  2 --
 3 files changed, 46 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index 04bf06560085..d600360ee788 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -70,8 +70,6 @@ void vmbus_get_debug_info(struct vmbus_channel *channel,
 	u8 monitor_offset = (u8)channel->offermsg.monitorid % 32;
 
 	monitorpage = vmbus_connection.monitor_pages[0];
-	debuginfo->servermonitor_pending =
-			monitorpage->trigger_group[monitor_group].pending;
 	debuginfo->servermonitor_latency =
 			monitorpage->latency[monitor_group][monitor_offset];
 	debuginfo->servermonitor_connectionid =
@@ -79,8 +77,6 @@ void vmbus_get_debug_info(struct vmbus_channel *channel,
 					[monitor_offset].connectionid.u.id;
 
 	monitorpage = vmbus_connection.monitor_pages[1];
-	debuginfo->clientmonitor_pending =
-			monitorpage->trigger_group[monitor_group].pending;
 	debuginfo->clientmonitor_latency =
 			monitorpage->latency[monitor_group][monitor_offset];
 	debuginfo->clientmonitor_connectionid =
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 944dc4b0b54d..5a44957b7064 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -47,10 +47,8 @@ static struct completion probe_event;
 static int irq;
 
 struct hv_device_info {
-	u32 server_monitor_pending;
 	u32 server_monitor_latency;
 	u32 server_monitor_conn_id;
-	u32 client_monitor_pending;
 	u32 client_monitor_latency;
 	u32 client_monitor_conn_id;
 
@@ -77,11 +75,9 @@ static void get_channel_info(struct hv_device *device,
 
 	vmbus_get_debug_info(device->channel, &debug_info);
 
-	info->server_monitor_pending = debug_info.servermonitor_pending;
 	info->server_monitor_latency = debug_info.servermonitor_latency;
 	info->server_monitor_conn_id = debug_info.servermonitor_connectionid;
 
-	info->client_monitor_pending = debug_info.clientmonitor_pending;
 	info->client_monitor_latency = debug_info.clientmonitor_latency;
 	info->client_monitor_conn_id = debug_info.clientmonitor_connectionid;
 
@@ -155,15 +151,11 @@ static ssize_t vmbus_show_device_attr(struct device *dev,
 	} else if (!strcmp(dev_attr->attr.name, "in_write_bytes_avail")) {
 		ret = sprintf(buf, "%d\n",
 			       device_info->inbound.bytes_avail_towrite);
-	} else if (!strcmp(dev_attr->attr.name, "server_monitor_pending")) {
-		ret = sprintf(buf, "%d\n", device_info->server_monitor_pending);
 	} else if (!strcmp(dev_attr->attr.name, "server_monitor_latency")) {
 		ret = sprintf(buf, "%d\n", device_info->server_monitor_latency);
 	} else if (!strcmp(dev_attr->attr.name, "server_monitor_conn_id")) {
 		ret = sprintf(buf, "%d\n",
 			       device_info->server_monitor_conn_id);
-	} else if (!strcmp(dev_attr->attr.name, "client_monitor_pending")) {
-		ret = sprintf(buf, "%d\n", device_info->client_monitor_pending);
 	} else if (!strcmp(dev_attr->attr.name, "client_monitor_latency")) {
 		ret = sprintf(buf, "%d\n", device_info->client_monitor_latency);
 	} else if (!strcmp(dev_attr->attr.name, "client_monitor_conn_id")) {
@@ -175,6 +167,23 @@ static ssize_t vmbus_show_device_attr(struct device *dev,
 	return ret;
 }
 
+static u8 channel_monitor_group(struct vmbus_channel *channel)
+{
+	return (u8)channel->offermsg.monitorid / 32;
+}
+
+static u8 channel_monitor_offset(struct vmbus_channel *channel)
+{
+	return (u8)channel->offermsg.monitorid % 32;
+}
+
+static u32 channel_pending(struct vmbus_channel *channel,
+			   struct hv_monitor_page *monitor_page)
+{
+	u8 monitor_group = channel_monitor_group(channel);
+	return monitor_page->trigger_group[monitor_group].pending;
+}
+
 static ssize_t id_show(struct device *dev, struct device_attribute *dev_attr,
 		       char *buf)
 {
@@ -243,6 +252,33 @@ static ssize_t modalias_show(struct device *dev,
 }
 static DEVICE_ATTR_RO(modalias);
 
+static ssize_t server_monitor_pending_show(struct device *dev,
+					   struct device_attribute *dev_attr,
+					   char *buf)
+{
+	struct hv_device *hv_dev = device_to_hv_device(dev);
+
+	if (!hv_dev->channel)
+		return -ENODEV;
+	return sprintf(buf, "%d\n",
+		       channel_pending(hv_dev->channel,
+				       vmbus_connection.monitor_pages[1]));
+}
+static DEVICE_ATTR_RO(server_monitor_pending);
+
+static ssize_t client_monitor_pending_show(struct device *dev,
+					   struct device_attribute *dev_attr,
+					   char *buf)
+{
+	struct hv_device *hv_dev = device_to_hv_device(dev);
+
+	if (!hv_dev->channel)
+		return -ENODEV;
+	return sprintf(buf, "%d\n",
+		       channel_pending(hv_dev->channel,
+				       vmbus_connection.monitor_pages[1]));
+}
+static DEVICE_ATTR_RO(client_monitor_pending);
 
 static struct attribute *vmbus_attrs[] = {
 	&dev_attr_id.attr,
@@ -251,17 +287,17 @@ static struct attribute *vmbus_attrs[] = {
 	&dev_attr_class_id.attr,
 	&dev_attr_device_id.attr,
 	&dev_attr_modalias.attr,
+	&dev_attr_server_monitor_pending.attr,
+	&dev_attr_client_monitor_pending.attr,
 	NULL,
 };
 ATTRIBUTE_GROUPS(vmbus);
 
 /* Set up per device attributes in /sys/bus/vmbus/devices/<bus device> */
 static struct device_attribute vmbus_device_attrs[] = {
-	__ATTR(server_monitor_pending, S_IRUGO, vmbus_show_device_attr, NULL),
 	__ATTR(server_monitor_latency, S_IRUGO, vmbus_show_device_attr, NULL),
 	__ATTR(server_monitor_conn_id, S_IRUGO, vmbus_show_device_attr, NULL),
 
-	__ATTR(client_monitor_pending, S_IRUGO, vmbus_show_device_attr, NULL),
 	__ATTR(client_monitor_latency, S_IRUGO, vmbus_show_device_attr, NULL),
 	__ATTR(client_monitor_conn_id, S_IRUGO, vmbus_show_device_attr, NULL),
 
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 687c01b85037..f0a7d27ffb21 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -900,10 +900,8 @@ enum vmbus_channel_state {
 };
 
 struct vmbus_channel_debug_info {
-	u32 servermonitor_pending;
 	u32 servermonitor_latency;
 	u32 servermonitor_connectionid;
-	u32 clientmonitor_pending;
 	u32 clientmonitor_latency;
 	u32 clientmonitor_connectionid;
 
-- 
cgit v1.2.3


From 1cee272b0249c5007391da6cf42903b8f30dbc5a Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Fri, 13 Sep 2013 11:32:57 -0700
Subject: hv: move "client/server_monitor_latency" bus attributes to dev_groups

This moves the "client_monitor_latency" and "server_monitor_latency" bus
attributes to the dev_groups structure, removing the need for it to be
in a temporary structure.

Tested-by: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/channel.c   |  4 ----
 drivers/hv/vmbus_drv.c | 48 ++++++++++++++++++++++++++++++++++++++----------
 include/linux/hyperv.h |  2 --
 3 files changed, 38 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index d600360ee788..ff61464f57ca 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -70,15 +70,11 @@ void vmbus_get_debug_info(struct vmbus_channel *channel,
 	u8 monitor_offset = (u8)channel->offermsg.monitorid % 32;
 
 	monitorpage = vmbus_connection.monitor_pages[0];
-	debuginfo->servermonitor_latency =
-			monitorpage->latency[monitor_group][monitor_offset];
 	debuginfo->servermonitor_connectionid =
 			monitorpage->parameter[monitor_group]
 					[monitor_offset].connectionid.u.id;
 
 	monitorpage = vmbus_connection.monitor_pages[1];
-	debuginfo->clientmonitor_latency =
-			monitorpage->latency[monitor_group][monitor_offset];
 	debuginfo->clientmonitor_connectionid =
 			monitorpage->parameter[monitor_group]
 					[monitor_offset].connectionid.u.id;
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 5a44957b7064..461b989835b6 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -47,9 +47,7 @@ static struct completion probe_event;
 static int irq;
 
 struct hv_device_info {
-	u32 server_monitor_latency;
 	u32 server_monitor_conn_id;
-	u32 client_monitor_latency;
 	u32 client_monitor_conn_id;
 
 	struct hv_dev_port_info inbound;
@@ -75,10 +73,8 @@ static void get_channel_info(struct hv_device *device,
 
 	vmbus_get_debug_info(device->channel, &debug_info);
 
-	info->server_monitor_latency = debug_info.servermonitor_latency;
 	info->server_monitor_conn_id = debug_info.servermonitor_connectionid;
 
-	info->client_monitor_latency = debug_info.clientmonitor_latency;
 	info->client_monitor_conn_id = debug_info.clientmonitor_connectionid;
 
 	info->inbound.int_mask = debug_info.inbound.current_interrupt_mask;
@@ -151,13 +147,9 @@ static ssize_t vmbus_show_device_attr(struct device *dev,
 	} else if (!strcmp(dev_attr->attr.name, "in_write_bytes_avail")) {
 		ret = sprintf(buf, "%d\n",
 			       device_info->inbound.bytes_avail_towrite);
-	} else if (!strcmp(dev_attr->attr.name, "server_monitor_latency")) {
-		ret = sprintf(buf, "%d\n", device_info->server_monitor_latency);
 	} else if (!strcmp(dev_attr->attr.name, "server_monitor_conn_id")) {
 		ret = sprintf(buf, "%d\n",
 			       device_info->server_monitor_conn_id);
-	} else if (!strcmp(dev_attr->attr.name, "client_monitor_latency")) {
-		ret = sprintf(buf, "%d\n", device_info->client_monitor_latency);
 	} else if (!strcmp(dev_attr->attr.name, "client_monitor_conn_id")) {
 		ret = sprintf(buf, "%d\n",
 			       device_info->client_monitor_conn_id);
@@ -184,6 +176,14 @@ static u32 channel_pending(struct vmbus_channel *channel,
 	return monitor_page->trigger_group[monitor_group].pending;
 }
 
+static u32 channel_latency(struct vmbus_channel *channel,
+			   struct hv_monitor_page *monitor_page)
+{
+	u8 monitor_group = channel_monitor_group(channel);
+	u8 monitor_offset = channel_monitor_offset(channel);
+	return monitor_page->latency[monitor_group][monitor_offset];
+}
+
 static ssize_t id_show(struct device *dev, struct device_attribute *dev_attr,
 		       char *buf)
 {
@@ -280,6 +280,34 @@ static ssize_t client_monitor_pending_show(struct device *dev,
 }
 static DEVICE_ATTR_RO(client_monitor_pending);
 
+static ssize_t server_monitor_latency_show(struct device *dev,
+					   struct device_attribute *dev_attr,
+					   char *buf)
+{
+	struct hv_device *hv_dev = device_to_hv_device(dev);
+
+	if (!hv_dev->channel)
+		return -ENODEV;
+	return sprintf(buf, "%d\n",
+		       channel_latency(hv_dev->channel,
+				       vmbus_connection.monitor_pages[0]));
+}
+static DEVICE_ATTR_RO(server_monitor_latency);
+
+static ssize_t client_monitor_latency_show(struct device *dev,
+					   struct device_attribute *dev_attr,
+					   char *buf)
+{
+	struct hv_device *hv_dev = device_to_hv_device(dev);
+
+	if (!hv_dev->channel)
+		return -ENODEV;
+	return sprintf(buf, "%d\n",
+		       channel_latency(hv_dev->channel,
+				       vmbus_connection.monitor_pages[1]));
+}
+static DEVICE_ATTR_RO(client_monitor_latency);
+
 static struct attribute *vmbus_attrs[] = {
 	&dev_attr_id.attr,
 	&dev_attr_state.attr,
@@ -289,16 +317,16 @@ static struct attribute *vmbus_attrs[] = {
 	&dev_attr_modalias.attr,
 	&dev_attr_server_monitor_pending.attr,
 	&dev_attr_client_monitor_pending.attr,
+	&dev_attr_server_monitor_latency.attr,
+	&dev_attr_client_monitor_latency.attr,
 	NULL,
 };
 ATTRIBUTE_GROUPS(vmbus);
 
 /* Set up per device attributes in /sys/bus/vmbus/devices/<bus device> */
 static struct device_attribute vmbus_device_attrs[] = {
-	__ATTR(server_monitor_latency, S_IRUGO, vmbus_show_device_attr, NULL),
 	__ATTR(server_monitor_conn_id, S_IRUGO, vmbus_show_device_attr, NULL),
 
-	__ATTR(client_monitor_latency, S_IRUGO, vmbus_show_device_attr, NULL),
 	__ATTR(client_monitor_conn_id, S_IRUGO, vmbus_show_device_attr, NULL),
 
 	__ATTR(out_intr_mask, S_IRUGO, vmbus_show_device_attr, NULL),
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index f0a7d27ffb21..ec1e5033bb99 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -900,9 +900,7 @@ enum vmbus_channel_state {
 };
 
 struct vmbus_channel_debug_info {
-	u32 servermonitor_latency;
 	u32 servermonitor_connectionid;
-	u32 clientmonitor_latency;
 	u32 clientmonitor_connectionid;
 
 	struct hv_ring_buffer_debug_info inbound;
-- 
cgit v1.2.3


From 4947c7453b184bc33a0056cf95af61c3cad11ef6 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Fri, 13 Sep 2013 11:32:58 -0700
Subject: hv: move "client/server_monitor_conn_id" bus attributes to dev_groups

This moves the "client_monitor_conn_id" and "server_monitor_conn_id" bus
attributes to the dev_groups structure, removing the need for it to be
in a temporary structure.

Tested-by: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/channel.c   | 14 -------------
 drivers/hv/vmbus_drv.c | 55 ++++++++++++++++++++++++++++++++++----------------
 include/linux/hyperv.h |  3 ---
 3 files changed, 38 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index ff61464f57ca..75c26da3e011 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -65,20 +65,6 @@ static void vmbus_setevent(struct vmbus_channel *channel)
 void vmbus_get_debug_info(struct vmbus_channel *channel,
 			      struct vmbus_channel_debug_info *debuginfo)
 {
-	struct hv_monitor_page *monitorpage;
-	u8 monitor_group = (u8)channel->offermsg.monitorid / 32;
-	u8 monitor_offset = (u8)channel->offermsg.monitorid % 32;
-
-	monitorpage = vmbus_connection.monitor_pages[0];
-	debuginfo->servermonitor_connectionid =
-			monitorpage->parameter[monitor_group]
-					[monitor_offset].connectionid.u.id;
-
-	monitorpage = vmbus_connection.monitor_pages[1];
-	debuginfo->clientmonitor_connectionid =
-			monitorpage->parameter[monitor_group]
-					[monitor_offset].connectionid.u.id;
-
 	hv_ringbuffer_get_debuginfo(&channel->inbound, &debuginfo->inbound);
 	hv_ringbuffer_get_debuginfo(&channel->outbound, &debuginfo->outbound);
 }
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 461b989835b6..73cb456e256b 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -47,9 +47,6 @@ static struct completion probe_event;
 static int irq;
 
 struct hv_device_info {
-	u32 server_monitor_conn_id;
-	u32 client_monitor_conn_id;
-
 	struct hv_dev_port_info inbound;
 	struct hv_dev_port_info outbound;
 };
@@ -73,10 +70,6 @@ static void get_channel_info(struct hv_device *device,
 
 	vmbus_get_debug_info(device->channel, &debug_info);
 
-	info->server_monitor_conn_id = debug_info.servermonitor_connectionid;
-
-	info->client_monitor_conn_id = debug_info.clientmonitor_connectionid;
-
 	info->inbound.int_mask = debug_info.inbound.current_interrupt_mask;
 	info->inbound.read_idx = debug_info.inbound.current_read_index;
 	info->inbound.write_idx = debug_info.inbound.current_write_index;
@@ -147,12 +140,6 @@ static ssize_t vmbus_show_device_attr(struct device *dev,
 	} else if (!strcmp(dev_attr->attr.name, "in_write_bytes_avail")) {
 		ret = sprintf(buf, "%d\n",
 			       device_info->inbound.bytes_avail_towrite);
-	} else if (!strcmp(dev_attr->attr.name, "server_monitor_conn_id")) {
-		ret = sprintf(buf, "%d\n",
-			       device_info->server_monitor_conn_id);
-	} else if (!strcmp(dev_attr->attr.name, "client_monitor_conn_id")) {
-		ret = sprintf(buf, "%d\n",
-			       device_info->client_monitor_conn_id);
 	}
 
 	kfree(device_info);
@@ -184,6 +171,14 @@ static u32 channel_latency(struct vmbus_channel *channel,
 	return monitor_page->latency[monitor_group][monitor_offset];
 }
 
+static u32 channel_conn_id(struct vmbus_channel *channel,
+			   struct hv_monitor_page *monitor_page)
+{
+	u8 monitor_group = channel_monitor_group(channel);
+	u8 monitor_offset = channel_monitor_offset(channel);
+	return monitor_page->parameter[monitor_group][monitor_offset].connectionid.u.id;
+}
+
 static ssize_t id_show(struct device *dev, struct device_attribute *dev_attr,
 		       char *buf)
 {
@@ -308,6 +303,34 @@ static ssize_t client_monitor_latency_show(struct device *dev,
 }
 static DEVICE_ATTR_RO(client_monitor_latency);
 
+static ssize_t server_monitor_conn_id_show(struct device *dev,
+					   struct device_attribute *dev_attr,
+					   char *buf)
+{
+	struct hv_device *hv_dev = device_to_hv_device(dev);
+
+	if (!hv_dev->channel)
+		return -ENODEV;
+	return sprintf(buf, "%d\n",
+		       channel_conn_id(hv_dev->channel,
+				       vmbus_connection.monitor_pages[0]));
+}
+static DEVICE_ATTR_RO(server_monitor_conn_id);
+
+static ssize_t client_monitor_conn_id_show(struct device *dev,
+					   struct device_attribute *dev_attr,
+					   char *buf)
+{
+	struct hv_device *hv_dev = device_to_hv_device(dev);
+
+	if (!hv_dev->channel)
+		return -ENODEV;
+	return sprintf(buf, "%d\n",
+		       channel_conn_id(hv_dev->channel,
+				       vmbus_connection.monitor_pages[1]));
+}
+static DEVICE_ATTR_RO(client_monitor_conn_id);
+
 static struct attribute *vmbus_attrs[] = {
 	&dev_attr_id.attr,
 	&dev_attr_state.attr,
@@ -319,16 +342,14 @@ static struct attribute *vmbus_attrs[] = {
 	&dev_attr_client_monitor_pending.attr,
 	&dev_attr_server_monitor_latency.attr,
 	&dev_attr_client_monitor_latency.attr,
+	&dev_attr_server_monitor_conn_id.attr,
+	&dev_attr_client_monitor_conn_id.attr,
 	NULL,
 };
 ATTRIBUTE_GROUPS(vmbus);
 
 /* Set up per device attributes in /sys/bus/vmbus/devices/<bus device> */
 static struct device_attribute vmbus_device_attrs[] = {
-	__ATTR(server_monitor_conn_id, S_IRUGO, vmbus_show_device_attr, NULL),
-
-	__ATTR(client_monitor_conn_id, S_IRUGO, vmbus_show_device_attr, NULL),
-
 	__ATTR(out_intr_mask, S_IRUGO, vmbus_show_device_attr, NULL),
 	__ATTR(out_read_index, S_IRUGO, vmbus_show_device_attr, NULL),
 	__ATTR(out_write_index, S_IRUGO, vmbus_show_device_attr, NULL),
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index ec1e5033bb99..332e80ce9b8a 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -900,9 +900,6 @@ enum vmbus_channel_state {
 };
 
 struct vmbus_channel_debug_info {
-	u32 servermonitor_connectionid;
-	u32 clientmonitor_connectionid;
-
 	struct hv_ring_buffer_debug_info inbound;
 	struct hv_ring_buffer_debug_info outbound;
 };
-- 
cgit v1.2.3


From 2c9be3eacc39948af2341595322c014833699ac5 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Fri, 13 Sep 2013 11:32:59 -0700
Subject: hv: delete vmbus_get_debug_info()

It's only used once, only contains 2 function calls, so just make those
calls directly, deleting the function, and the now unneeded structure
entirely.

Tested-by: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/channel.c   | 10 ----------
 drivers/hv/vmbus_drv.c | 35 ++++++++++++++++-------------------
 include/linux/hyperv.h |  8 --------
 3 files changed, 16 insertions(+), 37 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index 75c26da3e011..94d54591b226 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -59,16 +59,6 @@ static void vmbus_setevent(struct vmbus_channel *channel)
 	}
 }
 
-/*
- * vmbus_get_debug_info -Retrieve various channel debug info
- */
-void vmbus_get_debug_info(struct vmbus_channel *channel,
-			      struct vmbus_channel_debug_info *debuginfo)
-{
-	hv_ringbuffer_get_debuginfo(&channel->inbound, &debuginfo->inbound);
-	hv_ringbuffer_get_debuginfo(&channel->outbound, &debuginfo->outbound);
-}
-
 /*
  * vmbus_open - Open the specified channel.
  */
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 73cb456e256b..62d9311b0541 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -63,29 +63,26 @@ static int vmbus_exists(void)
 static void get_channel_info(struct hv_device *device,
 			     struct hv_device_info *info)
 {
-	struct vmbus_channel_debug_info debug_info;
+	struct hv_ring_buffer_debug_info inbound;
+	struct hv_ring_buffer_debug_info outbound;
 
 	if (!device->channel)
 		return;
 
-	vmbus_get_debug_info(device->channel, &debug_info);
-
-	info->inbound.int_mask = debug_info.inbound.current_interrupt_mask;
-	info->inbound.read_idx = debug_info.inbound.current_read_index;
-	info->inbound.write_idx = debug_info.inbound.current_write_index;
-	info->inbound.bytes_avail_toread =
-		debug_info.inbound.bytes_avail_toread;
-	info->inbound.bytes_avail_towrite =
-		debug_info.inbound.bytes_avail_towrite;
-
-	info->outbound.int_mask =
-		debug_info.outbound.current_interrupt_mask;
-	info->outbound.read_idx = debug_info.outbound.current_read_index;
-	info->outbound.write_idx = debug_info.outbound.current_write_index;
-	info->outbound.bytes_avail_toread =
-		debug_info.outbound.bytes_avail_toread;
-	info->outbound.bytes_avail_towrite =
-		debug_info.outbound.bytes_avail_towrite;
+	hv_ringbuffer_get_debuginfo(&device->channel->inbound, &inbound);
+	hv_ringbuffer_get_debuginfo(&device->channel->outbound, &outbound);
+
+	info->inbound.int_mask = inbound.current_interrupt_mask;
+	info->inbound.read_idx = inbound.current_read_index;
+	info->inbound.write_idx = inbound.current_write_index;
+	info->inbound.bytes_avail_toread = inbound.bytes_avail_toread;
+	info->inbound.bytes_avail_towrite = inbound.bytes_avail_towrite;
+
+	info->outbound.int_mask = outbound.current_interrupt_mask;
+	info->outbound.read_idx = outbound.current_read_index;
+	info->outbound.write_idx = outbound.current_write_index;
+	info->outbound.bytes_avail_toread = outbound.bytes_avail_toread;
+	info->outbound.bytes_avail_towrite = outbound.bytes_avail_towrite;
 }
 
 #define VMBUS_ALIAS_LEN ((sizeof((struct hv_vmbus_device_id *)0)->guid) * 2)
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 332e80ce9b8a..c0e8faf40b35 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -899,11 +899,6 @@ enum vmbus_channel_state {
 	CHANNEL_OPENED_STATE,
 };
 
-struct vmbus_channel_debug_info {
-	struct hv_ring_buffer_debug_info inbound;
-	struct hv_ring_buffer_debug_info outbound;
-};
-
 /*
  * Represents each channel msg on the vmbus connection This is a
  * variable-size data structure depending on the msg type itself
@@ -1169,9 +1164,6 @@ extern int vmbus_recvpacket_raw(struct vmbus_channel *channel,
 				     u64 *requestid);
 
 
-extern void vmbus_get_debug_info(struct vmbus_channel *channel,
-				     struct vmbus_channel_debug_info *debug);
-
 extern void vmbus_ontimer(unsigned long data);
 
 struct hv_dev_port_info {
-- 
cgit v1.2.3


From 1fdde16d1f93376ad2a2df769f756572c2e84cbd Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Fri, 13 Sep 2013 11:33:00 -0700
Subject: hv: delete struct hv_dev_port_info

It's no longer needed, and the struct hv_ring_buffer_debug_info
structure shouldn't be "global" so move it to the local .h file instead.

Tested-by: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/hyperv_vmbus.h |  7 +++++++
 drivers/hv/vmbus_drv.c    | 35 ++++++++++-------------------------
 include/linux/hyperv.h    | 17 -----------------
 3 files changed, 17 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
index d58c22ffb29a..e05517616a06 100644
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -514,6 +514,13 @@ struct hv_context {
 
 extern struct hv_context hv_context;
 
+struct hv_ring_buffer_debug_info {
+	u32 current_interrupt_mask;
+	u32 current_read_index;
+	u32 current_write_index;
+	u32 bytes_avail_toread;
+	u32 bytes_avail_towrite;
+};
 
 /* Hv Interface */
 
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 62d9311b0541..cf3220e8a3a6 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -47,8 +47,8 @@ static struct completion probe_event;
 static int irq;
 
 struct hv_device_info {
-	struct hv_dev_port_info inbound;
-	struct hv_dev_port_info outbound;
+	struct hv_ring_buffer_debug_info inbound;
+	struct hv_ring_buffer_debug_info outbound;
 };
 
 static int vmbus_exists(void)
@@ -63,26 +63,11 @@ static int vmbus_exists(void)
 static void get_channel_info(struct hv_device *device,
 			     struct hv_device_info *info)
 {
-	struct hv_ring_buffer_debug_info inbound;
-	struct hv_ring_buffer_debug_info outbound;
-
 	if (!device->channel)
 		return;
 
-	hv_ringbuffer_get_debuginfo(&device->channel->inbound, &inbound);
-	hv_ringbuffer_get_debuginfo(&device->channel->outbound, &outbound);
-
-	info->inbound.int_mask = inbound.current_interrupt_mask;
-	info->inbound.read_idx = inbound.current_read_index;
-	info->inbound.write_idx = inbound.current_write_index;
-	info->inbound.bytes_avail_toread = inbound.bytes_avail_toread;
-	info->inbound.bytes_avail_towrite = inbound.bytes_avail_towrite;
-
-	info->outbound.int_mask = outbound.current_interrupt_mask;
-	info->outbound.read_idx = outbound.current_read_index;
-	info->outbound.write_idx = outbound.current_write_index;
-	info->outbound.bytes_avail_toread = outbound.bytes_avail_toread;
-	info->outbound.bytes_avail_towrite = outbound.bytes_avail_towrite;
+	hv_ringbuffer_get_debuginfo(&device->channel->inbound, &info->inbound);
+	hv_ringbuffer_get_debuginfo(&device->channel->outbound, &info->outbound);
 }
 
 #define VMBUS_ALIAS_LEN ((sizeof((struct hv_vmbus_device_id *)0)->guid) * 2)
@@ -114,11 +99,11 @@ static ssize_t vmbus_show_device_attr(struct device *dev,
 	get_channel_info(hv_dev, device_info);
 
 	if (!strcmp(dev_attr->attr.name, "out_intr_mask")) {
-		ret = sprintf(buf, "%d\n", device_info->outbound.int_mask);
+		ret = sprintf(buf, "%d\n", device_info->outbound.current_interrupt_mask);
 	} else if (!strcmp(dev_attr->attr.name, "out_read_index")) {
-		ret = sprintf(buf, "%d\n", device_info->outbound.read_idx);
+		ret = sprintf(buf, "%d\n", device_info->outbound.current_read_index);
 	} else if (!strcmp(dev_attr->attr.name, "out_write_index")) {
-		ret = sprintf(buf, "%d\n", device_info->outbound.write_idx);
+		ret = sprintf(buf, "%d\n", device_info->outbound.current_write_index);
 	} else if (!strcmp(dev_attr->attr.name, "out_read_bytes_avail")) {
 		ret = sprintf(buf, "%d\n",
 			       device_info->outbound.bytes_avail_toread);
@@ -126,11 +111,11 @@ static ssize_t vmbus_show_device_attr(struct device *dev,
 		ret = sprintf(buf, "%d\n",
 			       device_info->outbound.bytes_avail_towrite);
 	} else if (!strcmp(dev_attr->attr.name, "in_intr_mask")) {
-		ret = sprintf(buf, "%d\n", device_info->inbound.int_mask);
+		ret = sprintf(buf, "%d\n", device_info->inbound.current_interrupt_mask);
 	} else if (!strcmp(dev_attr->attr.name, "in_read_index")) {
-		ret = sprintf(buf, "%d\n", device_info->inbound.read_idx);
+		ret = sprintf(buf, "%d\n", device_info->inbound.current_read_index);
 	} else if (!strcmp(dev_attr->attr.name, "in_write_index")) {
-		ret = sprintf(buf, "%d\n", device_info->inbound.write_idx);
+		ret = sprintf(buf, "%d\n", device_info->inbound.current_write_index);
 	} else if (!strcmp(dev_attr->attr.name, "in_read_bytes_avail")) {
 		ret = sprintf(buf, "%d\n",
 			       device_info->inbound.bytes_avail_toread);
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index c0e8faf40b35..c68ecfe29441 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -429,15 +429,6 @@ struct hv_ring_buffer_info {
 	u32 ring_data_startoffset;
 };
 
-struct hv_ring_buffer_debug_info {
-	u32 current_interrupt_mask;
-	u32 current_read_index;
-	u32 current_write_index;
-	u32 bytes_avail_toread;
-	u32 bytes_avail_towrite;
-};
-
-
 /*
  *
  * hv_get_ringbuffer_availbytes()
@@ -1166,14 +1157,6 @@ extern int vmbus_recvpacket_raw(struct vmbus_channel *channel,
 
 extern void vmbus_ontimer(unsigned long data);
 
-struct hv_dev_port_info {
-	u32 int_mask;
-	u32 read_idx;
-	u32 write_idx;
-	u32 bytes_avail_toread;
-	u32 bytes_avail_towrite;
-};
-
 /* Base driver object */
 struct hv_driver {
 	const char *name;
-- 
cgit v1.2.3


From 2f268f129c2d1a05d297fe3ee34d393f862d2b22 Mon Sep 17 00:00:00 2001
From: Veaceslav Falico <vfalico@redhat.com>
Date: Wed, 25 Sep 2013 09:20:07 +0200
Subject: net: add adj_list to save only neighbours

Currently, we distinguish neighbours (first-level linked devices) from
non-neighbours by the neighbour bool in the netdev_adjacent. This could be
quite time-consuming in case we would like to traverse *only* through
neighbours - cause we'd have to traverse through all devices and check for
this flag, and in a (quite common) scenario where we have lots of vlans on
top of bridge, which is on top of a bond - the bonding would have to go
through all those vlans to get its upper neighbour linked devices.

This situation is really unpleasant, cause there are already a lot of cases
when a device with slaves needs to go through them in hot path.

To fix this, introduce a new upper/lower device lists structure -
adj_list, which contains only the neighbours. It works always in
pair with the all_adj_list structure (renamed from upper/lower_dev_list),
i.e. both of them contain the same links, only that all_adj_list contains
also non-neighbour device links. It's really a small change visible,
currently, only for __netdev_adjacent_dev_insert/remove(), and doesn't
change the main linked logic at all.

Also, add some comments a fix a name collision in
netdev_for_each_upper_dev_rcu() and rework the naming by the following
rules:

netdev_(all_)(upper|lower)_*

If "all_" is present, then we work with the whole list of upper/lower
devices, otherwise - only with direct neighbours. Uninline functions - to
get better stack traces.

CC: "David S. Miller" <davem@davemloft.net>
CC: Eric Dumazet <edumazet@google.com>
CC: Jiri Pirko <jiri@resnulli.us>
CC: Alexander Duyck <alexander.h.duyck@intel.com>
CC: Cong Wang <amwang@redhat.com>
Signed-off-by: Veaceslav Falico <vfalico@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_alb.c  |   2 +-
 drivers/net/bonding/bond_main.c |  10 +-
 include/linux/netdevice.h       |  28 ++++--
 net/core/dev.c                  | 203 ++++++++++++++++++++--------------------
 4 files changed, 129 insertions(+), 114 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index f428ef574372..8524e33e6754 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -1019,7 +1019,7 @@ static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[])
 
 	/* loop through vlans and send one packet for each */
 	rcu_read_lock();
-	netdev_for_each_upper_dev_rcu(bond->dev, upper, iter) {
+	netdev_for_each_all_upper_dev_rcu(bond->dev, upper, iter) {
 		if (upper->priv_flags & IFF_802_1Q_VLAN)
 			alb_send_lp_vid(slave, mac_addr,
 					vlan_dev_vlan_id(upper));
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 55bbb8b8200c..91c4ab8913b1 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -2267,7 +2267,7 @@ static bool bond_has_this_ip(struct bonding *bond, __be32 ip)
 		return true;
 
 	rcu_read_lock();
-	netdev_for_each_upper_dev_rcu(bond->dev, upper, iter) {
+	netdev_for_each_all_upper_dev_rcu(bond->dev, upper, iter) {
 		if (ip == bond_confirm_addr(upper, 0, ip)) {
 			ret = true;
 			break;
@@ -2342,10 +2342,12 @@ static void bond_arp_send_all(struct bonding *bond, struct slave *slave)
 		 *
 		 * TODO: QinQ?
 		 */
-		netdev_for_each_upper_dev_rcu(bond->dev, vlan_upper, vlan_iter) {
+		netdev_for_each_all_upper_dev_rcu(bond->dev, vlan_upper,
+						  vlan_iter) {
 			if (!is_vlan_dev(vlan_upper))
 				continue;
-			netdev_for_each_upper_dev_rcu(vlan_upper, upper, iter) {
+			netdev_for_each_all_upper_dev_rcu(vlan_upper, upper,
+							  iter) {
 				if (upper == rt->dst.dev) {
 					vlan_id = vlan_dev_vlan_id(vlan_upper);
 					rcu_read_unlock();
@@ -2358,7 +2360,7 @@ static void bond_arp_send_all(struct bonding *bond, struct slave *slave)
 		 * our upper vlans, then just search for any dev that
 		 * matches, and in case it's a vlan - save the id
 		 */
-		netdev_for_each_upper_dev_rcu(bond->dev, upper, iter) {
+		netdev_for_each_all_upper_dev_rcu(bond->dev, upper, iter) {
 			if (upper == rt->dst.dev) {
 				/* if it's a vlan - get its VID */
 				if (is_vlan_dev(upper))
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 3de49aca4519..514045c704a8 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1143,8 +1143,18 @@ struct net_device {
 	struct list_head	dev_list;
 	struct list_head	napi_list;
 	struct list_head	unreg_list;
-	struct list_head	upper_dev_list; /* List of upper devices */
-	struct list_head	lower_dev_list;
+
+	/* directly linked devices, like slaves for bonding */
+	struct {
+		struct list_head upper;
+		struct list_head lower;
+	} adj_list;
+
+	/* all linked devices, *including* neighbours */
+	struct {
+		struct list_head upper;
+		struct list_head lower;
+	} all_adj_list;
 
 
 	/* currently active device features */
@@ -2813,15 +2823,15 @@ extern int		bpf_jit_enable;
 extern bool netdev_has_upper_dev(struct net_device *dev,
 				 struct net_device *upper_dev);
 extern bool netdev_has_any_upper_dev(struct net_device *dev);
-extern struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev,
-							struct list_head **iter);
+extern struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev,
+							    struct list_head **iter);
 
 /* iterate through upper list, must be called under RCU read lock */
-#define netdev_for_each_upper_dev_rcu(dev, upper, iter) \
-	for (iter = &(dev)->upper_dev_list, \
-	     upper = netdev_upper_get_next_dev_rcu(dev, &(iter)); \
-	     upper; \
-	     upper = netdev_upper_get_next_dev_rcu(dev, &(iter)))
+#define netdev_for_each_all_upper_dev_rcu(dev, updev, iter) \
+	for (iter = &(dev)->all_adj_list.upper, \
+	     updev = netdev_all_upper_get_next_dev_rcu(dev, &(iter)); \
+	     updev; \
+	     updev = netdev_all_upper_get_next_dev_rcu(dev, &(iter)))
 
 extern struct net_device *netdev_master_upper_dev_get(struct net_device *dev);
 extern struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev);
diff --git a/net/core/dev.c b/net/core/dev.c
index 9be79377a0f3..9a395e03da74 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4373,9 +4373,6 @@ struct netdev_adjacent {
 	/* upper master flag, there can only be one master device per list */
 	bool master;
 
-	/* indicates that this dev is our first-level lower/upper device */
-	bool neighbour;
-
 	/* counter for the number of times this device was added to us */
 	u16 ref_nr;
 
@@ -4385,29 +4382,17 @@ struct netdev_adjacent {
 
 static struct netdev_adjacent *__netdev_find_adj(struct net_device *dev,
 						 struct net_device *adj_dev,
-						 struct list_head *dev_list)
+						 struct list_head *adj_list)
 {
 	struct netdev_adjacent *adj;
 
-	list_for_each_entry(adj, dev_list, list) {
+	list_for_each_entry(adj, adj_list, list) {
 		if (adj->dev == adj_dev)
 			return adj;
 	}
 	return NULL;
 }
 
-static inline struct netdev_adjacent *__netdev_find_upper(struct net_device *dev,
-							  struct net_device *udev)
-{
-	return __netdev_find_adj(dev, udev, &dev->upper_dev_list);
-}
-
-static inline struct netdev_adjacent *__netdev_find_lower(struct net_device *dev,
-							  struct net_device *ldev)
-{
-	return __netdev_find_adj(dev, ldev, &dev->lower_dev_list);
-}
-
 /**
  * netdev_has_upper_dev - Check if device is linked to an upper device
  * @dev: device
@@ -4422,7 +4407,7 @@ bool netdev_has_upper_dev(struct net_device *dev,
 {
 	ASSERT_RTNL();
 
-	return __netdev_find_upper(dev, upper_dev);
+	return __netdev_find_adj(dev, upper_dev, &dev->all_adj_list.upper);
 }
 EXPORT_SYMBOL(netdev_has_upper_dev);
 
@@ -4437,7 +4422,7 @@ bool netdev_has_any_upper_dev(struct net_device *dev)
 {
 	ASSERT_RTNL();
 
-	return !list_empty(&dev->upper_dev_list);
+	return !list_empty(&dev->all_adj_list.upper);
 }
 EXPORT_SYMBOL(netdev_has_any_upper_dev);
 
@@ -4454,10 +4439,10 @@ struct net_device *netdev_master_upper_dev_get(struct net_device *dev)
 
 	ASSERT_RTNL();
 
-	if (list_empty(&dev->upper_dev_list))
+	if (list_empty(&dev->adj_list.upper))
 		return NULL;
 
-	upper = list_first_entry(&dev->upper_dev_list,
+	upper = list_first_entry(&dev->adj_list.upper,
 				 struct netdev_adjacent, list);
 	if (likely(upper->master))
 		return upper->dev;
@@ -4465,15 +4450,15 @@ struct net_device *netdev_master_upper_dev_get(struct net_device *dev)
 }
 EXPORT_SYMBOL(netdev_master_upper_dev_get);
 
-/* netdev_upper_get_next_dev_rcu - Get the next dev from upper list
+/* netdev_all_upper_get_next_dev_rcu - Get the next dev from upper list
  * @dev: device
  * @iter: list_head ** of the current position
  *
  * Gets the next device from the dev's upper list, starting from iter
  * position. The caller must hold RCU read lock.
  */
-struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev,
-						 struct list_head **iter)
+struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev,
+						     struct list_head **iter)
 {
 	struct netdev_adjacent *upper;
 
@@ -4481,14 +4466,14 @@ struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev,
 
 	upper = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);
 
-	if (&upper->list == &dev->upper_dev_list)
+	if (&upper->list == &dev->all_adj_list.upper)
 		return NULL;
 
 	*iter = &upper->list;
 
 	return upper->dev;
 }
-EXPORT_SYMBOL(netdev_upper_get_next_dev_rcu);
+EXPORT_SYMBOL(netdev_all_upper_get_next_dev_rcu);
 
 /**
  * netdev_master_upper_dev_get_rcu - Get master upper device
@@ -4501,7 +4486,7 @@ struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev)
 {
 	struct netdev_adjacent *upper;
 
-	upper = list_first_or_null_rcu(&dev->upper_dev_list,
+	upper = list_first_or_null_rcu(&dev->adj_list.upper,
 				       struct netdev_adjacent, list);
 	if (upper && likely(upper->master))
 		return upper->dev;
@@ -4512,14 +4497,13 @@ EXPORT_SYMBOL(netdev_master_upper_dev_get_rcu);
 static int __netdev_adjacent_dev_insert(struct net_device *dev,
 					struct net_device *adj_dev,
 					struct list_head *dev_list,
-					bool neighbour, bool master)
+					bool master)
 {
 	struct netdev_adjacent *adj;
 
 	adj = __netdev_find_adj(dev, adj_dev, dev_list);
 
 	if (adj) {
-		BUG_ON(neighbour);
 		adj->ref_nr++;
 		return 0;
 	}
@@ -4530,13 +4514,11 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev,
 
 	adj->dev = adj_dev;
 	adj->master = master;
-	adj->neighbour = neighbour;
 	adj->ref_nr = 1;
-
 	dev_hold(adj_dev);
-	pr_debug("dev_hold for %s, because of %s link added from %s to %s\n",
-		 adj_dev->name, dev_list == &dev->upper_dev_list ?
-		 "upper" : "lower", dev->name, adj_dev->name);
+
+	pr_debug("dev_hold for %s, because of link added from %s to %s\n",
+		 adj_dev->name, dev->name, adj_dev->name);
 
 	/* Ensure that master link is always the first item in list. */
 	if (master)
@@ -4547,22 +4529,6 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev,
 	return 0;
 }
 
-static inline int __netdev_upper_dev_insert(struct net_device *dev,
-					    struct net_device *udev,
-					    bool master, bool neighbour)
-{
-	return __netdev_adjacent_dev_insert(dev, udev, &dev->upper_dev_list,
-					    neighbour, master);
-}
-
-static inline int __netdev_lower_dev_insert(struct net_device *dev,
-					    struct net_device *ldev,
-					    bool neighbour)
-{
-	return __netdev_adjacent_dev_insert(dev, ldev, &dev->lower_dev_list,
-					    neighbour, false);
-}
-
 void __netdev_adjacent_dev_remove(struct net_device *dev,
 				  struct net_device *adj_dev,
 				  struct list_head *dev_list)
@@ -4571,73 +4537,102 @@ void __netdev_adjacent_dev_remove(struct net_device *dev,
 
 	adj = __netdev_find_adj(dev, adj_dev, dev_list);
 
-	if (!adj)
+	if (!adj) {
+		pr_err("tried to remove device %s from %s\n",
+		       dev->name, adj_dev->name);
 		BUG();
+	}
 
 	if (adj->ref_nr > 1) {
+		pr_debug("%s to %s ref_nr-- = %d\n", dev->name, adj_dev->name,
+			 adj->ref_nr-1);
 		adj->ref_nr--;
 		return;
 	}
 
 	list_del_rcu(&adj->list);
-	pr_debug("dev_put for %s, because of %s link removed from %s to %s\n",
-		 adj_dev->name, dev_list == &dev->upper_dev_list ?
-		 "upper" : "lower", dev->name, adj_dev->name);
+	pr_debug("dev_put for %s, because link removed from %s to %s\n",
+		 adj_dev->name, dev->name, adj_dev->name);
 	dev_put(adj_dev);
 	kfree_rcu(adj, rcu);
 }
 
-static inline void __netdev_upper_dev_remove(struct net_device *dev,
-					     struct net_device *udev)
-{
-	return __netdev_adjacent_dev_remove(dev, udev, &dev->upper_dev_list);
-}
-
-static inline void __netdev_lower_dev_remove(struct net_device *dev,
-					     struct net_device *ldev)
-{
-	return __netdev_adjacent_dev_remove(dev, ldev, &dev->lower_dev_list);
-}
-
-int __netdev_adjacent_dev_insert_link(struct net_device *dev,
-				      struct net_device *upper_dev,
-				      bool master, bool neighbour)
+int __netdev_adjacent_dev_link_lists(struct net_device *dev,
+				     struct net_device *upper_dev,
+				     struct list_head *up_list,
+				     struct list_head *down_list,
+				     bool master)
 {
 	int ret;
 
-	ret = __netdev_upper_dev_insert(dev, upper_dev, master, neighbour);
+	ret = __netdev_adjacent_dev_insert(dev, upper_dev, up_list, master);
 	if (ret)
 		return ret;
 
-	ret = __netdev_lower_dev_insert(upper_dev, dev, neighbour);
+	ret = __netdev_adjacent_dev_insert(upper_dev, dev, down_list, false);
 	if (ret) {
-		__netdev_upper_dev_remove(dev, upper_dev);
+		__netdev_adjacent_dev_remove(dev, upper_dev, up_list);
 		return ret;
 	}
 
 	return 0;
 }
 
-static inline int __netdev_adjacent_dev_link(struct net_device *dev,
-					     struct net_device *udev)
+int __netdev_adjacent_dev_link(struct net_device *dev,
+			       struct net_device *upper_dev)
 {
-	return __netdev_adjacent_dev_insert_link(dev, udev, false, false);
+	return __netdev_adjacent_dev_link_lists(dev, upper_dev,
+						&dev->all_adj_list.upper,
+						&upper_dev->all_adj_list.lower,
+						false);
 }
 
-static inline int __netdev_adjacent_dev_link_neighbour(struct net_device *dev,
-						       struct net_device *udev,
-						       bool master)
+void __netdev_adjacent_dev_unlink_lists(struct net_device *dev,
+					struct net_device *upper_dev,
+					struct list_head *up_list,
+					struct list_head *down_list)
 {
-	return __netdev_adjacent_dev_insert_link(dev, udev, master, true);
+	__netdev_adjacent_dev_remove(dev, upper_dev, up_list);
+	__netdev_adjacent_dev_remove(upper_dev, dev, down_list);
 }
 
 void __netdev_adjacent_dev_unlink(struct net_device *dev,
 				  struct net_device *upper_dev)
 {
-	__netdev_upper_dev_remove(dev, upper_dev);
-	__netdev_lower_dev_remove(upper_dev, dev);
+	__netdev_adjacent_dev_unlink_lists(dev, upper_dev,
+					   &dev->all_adj_list.upper,
+					   &upper_dev->all_adj_list.lower);
+}
+
+int __netdev_adjacent_dev_link_neighbour(struct net_device *dev,
+					 struct net_device *upper_dev,
+					 bool master)
+{
+	int ret = __netdev_adjacent_dev_link(dev, upper_dev);
+
+	if (ret)
+		return ret;
+
+	ret = __netdev_adjacent_dev_link_lists(dev, upper_dev,
+					       &dev->adj_list.upper,
+					       &upper_dev->adj_list.lower,
+					       master);
+	if (ret) {
+		__netdev_adjacent_dev_unlink(dev, upper_dev);
+		return ret;
+	}
+
+	return 0;
 }
 
+void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev,
+					    struct net_device *upper_dev)
+{
+	__netdev_adjacent_dev_unlink(dev, upper_dev);
+	__netdev_adjacent_dev_unlink_lists(dev, upper_dev,
+					   &dev->adj_list.upper,
+					   &upper_dev->adj_list.lower);
+}
 
 static int __netdev_upper_dev_link(struct net_device *dev,
 				   struct net_device *upper_dev, bool master)
@@ -4651,10 +4646,10 @@ static int __netdev_upper_dev_link(struct net_device *dev,
 		return -EBUSY;
 
 	/* To prevent loops, check if dev is not upper device to upper_dev. */
-	if (__netdev_find_upper(upper_dev, dev))
+	if (__netdev_find_adj(upper_dev, dev, &upper_dev->all_adj_list.upper))
 		return -EBUSY;
 
-	if (__netdev_find_upper(dev, upper_dev))
+	if (__netdev_find_adj(dev, upper_dev, &dev->all_adj_list.upper))
 		return -EEXIST;
 
 	if (master && netdev_master_upper_dev_get(dev))
@@ -4665,12 +4660,14 @@ static int __netdev_upper_dev_link(struct net_device *dev,
 		return ret;
 
 	/* Now that we linked these devs, make all the upper_dev's
-	 * upper_dev_list visible to every dev's lower_dev_list and vice
+	 * all_adj_list.upper visible to every dev's all_adj_list.lower an
 	 * versa, and don't forget the devices itself. All of these
 	 * links are non-neighbours.
 	 */
-	list_for_each_entry(i, &dev->lower_dev_list, list) {
-		list_for_each_entry(j, &upper_dev->upper_dev_list, list) {
+	list_for_each_entry(i, &dev->all_adj_list.lower, list) {
+		list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) {
+			pr_debug("Interlinking %s with %s, non-neighbour\n",
+				 i->dev->name, j->dev->name);
 			ret = __netdev_adjacent_dev_link(i->dev, j->dev);
 			if (ret)
 				goto rollback_mesh;
@@ -4678,14 +4675,18 @@ static int __netdev_upper_dev_link(struct net_device *dev,
 	}
 
 	/* add dev to every upper_dev's upper device */
-	list_for_each_entry(i, &upper_dev->upper_dev_list, list) {
+	list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) {
+		pr_debug("linking %s's upper device %s with %s\n",
+			 upper_dev->name, i->dev->name, dev->name);
 		ret = __netdev_adjacent_dev_link(dev, i->dev);
 		if (ret)
 			goto rollback_upper_mesh;
 	}
 
 	/* add upper_dev to every dev's lower device */
-	list_for_each_entry(i, &dev->lower_dev_list, list) {
+	list_for_each_entry(i, &dev->all_adj_list.lower, list) {
+		pr_debug("linking %s's lower device %s with %s\n", dev->name,
+			 i->dev->name, upper_dev->name);
 		ret = __netdev_adjacent_dev_link(i->dev, upper_dev);
 		if (ret)
 			goto rollback_lower_mesh;
@@ -4696,7 +4697,7 @@ static int __netdev_upper_dev_link(struct net_device *dev,
 
 rollback_lower_mesh:
 	to_i = i;
-	list_for_each_entry(i, &dev->lower_dev_list, list) {
+	list_for_each_entry(i, &dev->all_adj_list.lower, list) {
 		if (i == to_i)
 			break;
 		__netdev_adjacent_dev_unlink(i->dev, upper_dev);
@@ -4706,7 +4707,7 @@ rollback_lower_mesh:
 
 rollback_upper_mesh:
 	to_i = i;
-	list_for_each_entry(i, &upper_dev->upper_dev_list, list) {
+	list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) {
 		if (i == to_i)
 			break;
 		__netdev_adjacent_dev_unlink(dev, i->dev);
@@ -4717,8 +4718,8 @@ rollback_upper_mesh:
 rollback_mesh:
 	to_i = i;
 	to_j = j;
-	list_for_each_entry(i, &dev->lower_dev_list, list) {
-		list_for_each_entry(j, &upper_dev->upper_dev_list, list) {
+	list_for_each_entry(i, &dev->all_adj_list.lower, list) {
+		list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) {
 			if (i == to_i && j == to_j)
 				break;
 			__netdev_adjacent_dev_unlink(i->dev, j->dev);
@@ -4727,7 +4728,7 @@ rollback_mesh:
 			break;
 	}
 
-	__netdev_adjacent_dev_unlink(dev, upper_dev);
+	__netdev_adjacent_dev_unlink_neighbour(dev, upper_dev);
 
 	return ret;
 }
@@ -4781,23 +4782,23 @@ void netdev_upper_dev_unlink(struct net_device *dev,
 	struct netdev_adjacent *i, *j;
 	ASSERT_RTNL();
 
-	__netdev_adjacent_dev_unlink(dev, upper_dev);
+	__netdev_adjacent_dev_unlink_neighbour(dev, upper_dev);
 
 	/* Here is the tricky part. We must remove all dev's lower
 	 * devices from all upper_dev's upper devices and vice
 	 * versa, to maintain the graph relationship.
 	 */
-	list_for_each_entry(i, &dev->lower_dev_list, list)
-		list_for_each_entry(j, &upper_dev->upper_dev_list, list)
+	list_for_each_entry(i, &dev->all_adj_list.lower, list)
+		list_for_each_entry(j, &upper_dev->all_adj_list.upper, list)
 			__netdev_adjacent_dev_unlink(i->dev, j->dev);
 
 	/* remove also the devices itself from lower/upper device
 	 * list
 	 */
-	list_for_each_entry(i, &dev->lower_dev_list, list)
+	list_for_each_entry(i, &dev->all_adj_list.lower, list)
 		__netdev_adjacent_dev_unlink(i->dev, upper_dev);
 
-	list_for_each_entry(i, &upper_dev->upper_dev_list, list)
+	list_for_each_entry(i, &upper_dev->all_adj_list.upper, list)
 		__netdev_adjacent_dev_unlink(dev, i->dev);
 
 	call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev);
@@ -6059,8 +6060,10 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
 	INIT_LIST_HEAD(&dev->napi_list);
 	INIT_LIST_HEAD(&dev->unreg_list);
 	INIT_LIST_HEAD(&dev->link_watch_list);
-	INIT_LIST_HEAD(&dev->upper_dev_list);
-	INIT_LIST_HEAD(&dev->lower_dev_list);
+	INIT_LIST_HEAD(&dev->adj_list.upper);
+	INIT_LIST_HEAD(&dev->adj_list.lower);
+	INIT_LIST_HEAD(&dev->all_adj_list.upper);
+	INIT_LIST_HEAD(&dev->all_adj_list.lower);
 	dev->priv_flags = IFF_XMIT_DST_RELEASE;
 	setup(dev);
 
-- 
cgit v1.2.3


From 402dae9614557296e84543008a8e582c28fb1db3 Mon Sep 17 00:00:00 2001
From: Veaceslav Falico <vfalico@redhat.com>
Date: Wed, 25 Sep 2013 09:20:09 +0200
Subject: net: add netdev_adjacent->private and allow to use it

Currently, even though we can access any linked device, we can't attach
anything to it, which is vital to properly manage them.

To fix this, add a new void *private to netdev_adjacent and functions
setting/getting it (per link), so that we can save, per example, bonding's
slave structures there, per slave device.

netdev_master_upper_dev_link_private(dev, upper_dev, private) links dev to
upper dev and populates the neighbour link only with private.

netdev_lower_dev_get_private{,_rcu}() returns the private, if found.

CC: "David S. Miller" <davem@davemloft.net>
CC: Eric Dumazet <edumazet@google.com>
CC: Jiri Pirko <jiri@resnulli.us>
CC: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: Veaceslav Falico <vfalico@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  7 +++++
 net/core/dev.c            | 68 +++++++++++++++++++++++++++++++++++++++--------
 2 files changed, 64 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 514045c704a8..75d5beac463b 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2839,8 +2839,15 @@ extern int netdev_upper_dev_link(struct net_device *dev,
 				 struct net_device *upper_dev);
 extern int netdev_master_upper_dev_link(struct net_device *dev,
 					struct net_device *upper_dev);
+extern int netdev_master_upper_dev_link_private(struct net_device *dev,
+						struct net_device *upper_dev,
+						void *private);
 extern void netdev_upper_dev_unlink(struct net_device *dev,
 				    struct net_device *upper_dev);
+extern void *netdev_lower_dev_get_private_rcu(struct net_device *dev,
+					      struct net_device *lower_dev);
+extern void *netdev_lower_dev_get_private(struct net_device *dev,
+					  struct net_device *lower_dev);
 extern int skb_checksum_help(struct sk_buff *skb);
 extern struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
 	netdev_features_t features, bool tx_path);
diff --git a/net/core/dev.c b/net/core/dev.c
index 9290f09cdf26..c69ab74fb201 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4376,6 +4376,9 @@ struct netdev_adjacent {
 	/* counter for the number of times this device was added to us */
 	u16 ref_nr;
 
+	/* private field for the users */
+	void *private;
+
 	struct list_head list;
 	struct rcu_head rcu;
 };
@@ -4510,7 +4513,7 @@ EXPORT_SYMBOL(netdev_master_upper_dev_get_rcu);
 static int __netdev_adjacent_dev_insert(struct net_device *dev,
 					struct net_device *adj_dev,
 					struct list_head *dev_list,
-					bool master)
+					void *private, bool master)
 {
 	struct netdev_adjacent *adj;
 
@@ -4528,6 +4531,7 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev,
 	adj->dev = adj_dev;
 	adj->master = master;
 	adj->ref_nr = 1;
+	adj->private = private;
 	dev_hold(adj_dev);
 
 	pr_debug("dev_hold for %s, because of link added from %s to %s\n",
@@ -4574,15 +4578,17 @@ int __netdev_adjacent_dev_link_lists(struct net_device *dev,
 				     struct net_device *upper_dev,
 				     struct list_head *up_list,
 				     struct list_head *down_list,
-				     bool master)
+				     void *private, bool master)
 {
 	int ret;
 
-	ret = __netdev_adjacent_dev_insert(dev, upper_dev, up_list, master);
+	ret = __netdev_adjacent_dev_insert(dev, upper_dev, up_list, private,
+					   master);
 	if (ret)
 		return ret;
 
-	ret = __netdev_adjacent_dev_insert(upper_dev, dev, down_list, false);
+	ret = __netdev_adjacent_dev_insert(upper_dev, dev, down_list, private,
+					   false);
 	if (ret) {
 		__netdev_adjacent_dev_remove(dev, upper_dev, up_list);
 		return ret;
@@ -4597,7 +4603,7 @@ int __netdev_adjacent_dev_link(struct net_device *dev,
 	return __netdev_adjacent_dev_link_lists(dev, upper_dev,
 						&dev->all_adj_list.upper,
 						&upper_dev->all_adj_list.lower,
-						false);
+						NULL, false);
 }
 
 void __netdev_adjacent_dev_unlink_lists(struct net_device *dev,
@@ -4619,7 +4625,7 @@ void __netdev_adjacent_dev_unlink(struct net_device *dev,
 
 int __netdev_adjacent_dev_link_neighbour(struct net_device *dev,
 					 struct net_device *upper_dev,
-					 bool master)
+					 void *private, bool master)
 {
 	int ret = __netdev_adjacent_dev_link(dev, upper_dev);
 
@@ -4629,7 +4635,7 @@ int __netdev_adjacent_dev_link_neighbour(struct net_device *dev,
 	ret = __netdev_adjacent_dev_link_lists(dev, upper_dev,
 					       &dev->adj_list.upper,
 					       &upper_dev->adj_list.lower,
-					       master);
+					       private, master);
 	if (ret) {
 		__netdev_adjacent_dev_unlink(dev, upper_dev);
 		return ret;
@@ -4648,7 +4654,8 @@ void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev,
 }
 
 static int __netdev_upper_dev_link(struct net_device *dev,
-				   struct net_device *upper_dev, bool master)
+				   struct net_device *upper_dev, bool master,
+				   void *private)
 {
 	struct netdev_adjacent *i, *j, *to_i, *to_j;
 	int ret = 0;
@@ -4668,7 +4675,8 @@ static int __netdev_upper_dev_link(struct net_device *dev,
 	if (master && netdev_master_upper_dev_get(dev))
 		return -EBUSY;
 
-	ret = __netdev_adjacent_dev_link_neighbour(dev, upper_dev, master);
+	ret = __netdev_adjacent_dev_link_neighbour(dev, upper_dev, private,
+						   master);
 	if (ret)
 		return ret;
 
@@ -4759,7 +4767,7 @@ rollback_mesh:
 int netdev_upper_dev_link(struct net_device *dev,
 			  struct net_device *upper_dev)
 {
-	return __netdev_upper_dev_link(dev, upper_dev, false);
+	return __netdev_upper_dev_link(dev, upper_dev, false, NULL);
 }
 EXPORT_SYMBOL(netdev_upper_dev_link);
 
@@ -4777,10 +4785,18 @@ EXPORT_SYMBOL(netdev_upper_dev_link);
 int netdev_master_upper_dev_link(struct net_device *dev,
 				 struct net_device *upper_dev)
 {
-	return __netdev_upper_dev_link(dev, upper_dev, true);
+	return __netdev_upper_dev_link(dev, upper_dev, true, NULL);
 }
 EXPORT_SYMBOL(netdev_master_upper_dev_link);
 
+int netdev_master_upper_dev_link_private(struct net_device *dev,
+					 struct net_device *upper_dev,
+					 void *private)
+{
+	return __netdev_upper_dev_link(dev, upper_dev, true, private);
+}
+EXPORT_SYMBOL(netdev_master_upper_dev_link_private);
+
 /**
  * netdev_upper_dev_unlink - Removes a link to upper device
  * @dev: device
@@ -4818,6 +4834,36 @@ void netdev_upper_dev_unlink(struct net_device *dev,
 }
 EXPORT_SYMBOL(netdev_upper_dev_unlink);
 
+void *netdev_lower_dev_get_private_rcu(struct net_device *dev,
+				       struct net_device *lower_dev)
+{
+	struct netdev_adjacent *lower;
+
+	if (!lower_dev)
+		return NULL;
+	lower = __netdev_find_adj_rcu(dev, lower_dev, &dev->adj_list.lower);
+	if (!lower)
+		return NULL;
+
+	return lower->private;
+}
+EXPORT_SYMBOL(netdev_lower_dev_get_private_rcu);
+
+void *netdev_lower_dev_get_private(struct net_device *dev,
+				   struct net_device *lower_dev)
+{
+	struct netdev_adjacent *lower;
+
+	if (!lower_dev)
+		return NULL;
+	lower = __netdev_find_adj(dev, lower_dev, &dev->adj_list.lower);
+	if (!lower)
+		return NULL;
+
+	return lower->private;
+}
+EXPORT_SYMBOL(netdev_lower_dev_get_private);
+
 static void dev_change_rx_flags(struct net_device *dev, int flags)
 {
 	const struct net_device_ops *ops = dev->netdev_ops;
-- 
cgit v1.2.3


From 31088a113c2a948856ed2047d8c21c217b13e85d Mon Sep 17 00:00:00 2001
From: Veaceslav Falico <vfalico@redhat.com>
Date: Wed, 25 Sep 2013 09:20:12 +0200
Subject: net: add for_each iterators through neighbour lower link's private

Add a possibility to iterate through netdev_adjacent's private, currently
only for lower neighbours.

Add both RCU and RTNL/other locking variants of iterators, and make the
non-rcu variant to be safe from removal.

CC: "David S. Miller" <davem@davemloft.net>
CC: Eric Dumazet <edumazet@google.com>
CC: Jiri Pirko <jiri@resnulli.us>
CC: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: Veaceslav Falico <vfalico@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 17 ++++++++++++++
 net/core/dev.c            | 60 ++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 76 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 75d5beac463b..168974e40cf5 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2833,6 +2833,23 @@ extern struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *d
 	     updev; \
 	     updev = netdev_all_upper_get_next_dev_rcu(dev, &(iter)))
 
+extern void *netdev_lower_get_next_private(struct net_device *dev,
+					   struct list_head **iter);
+extern void *netdev_lower_get_next_private_rcu(struct net_device *dev,
+					       struct list_head **iter);
+
+#define netdev_for_each_lower_private(dev, priv, iter) \
+	for (iter = (dev)->adj_list.lower.next, \
+	     priv = netdev_lower_get_next_private(dev, &(iter)); \
+	     priv; \
+	     priv = netdev_lower_get_next_private(dev, &(iter)))
+
+#define netdev_for_each_lower_private_rcu(dev, priv, iter) \
+	for (iter = &(dev)->adj_list.lower, \
+	     priv = netdev_lower_get_next_private_rcu(dev, &(iter)); \
+	     priv; \
+	     priv = netdev_lower_get_next_private_rcu(dev, &(iter)))
+
 extern struct net_device *netdev_master_upper_dev_get(struct net_device *dev);
 extern struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev);
 extern int netdev_upper_dev_link(struct net_device *dev,
diff --git a/net/core/dev.c b/net/core/dev.c
index c69ab74fb201..0aa844aae40b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4466,7 +4466,8 @@ struct net_device *netdev_master_upper_dev_get(struct net_device *dev)
 }
 EXPORT_SYMBOL(netdev_master_upper_dev_get);
 
-/* netdev_all_upper_get_next_dev_rcu - Get the next dev from upper list
+/**
+ * netdev_all_upper_get_next_dev_rcu - Get the next dev from upper list
  * @dev: device
  * @iter: list_head ** of the current position
  *
@@ -4491,6 +4492,63 @@ struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev,
 }
 EXPORT_SYMBOL(netdev_all_upper_get_next_dev_rcu);
 
+/**
+ * netdev_lower_get_next_private - Get the next ->private from the
+ *				   lower neighbour list
+ * @dev: device
+ * @iter: list_head ** of the current position
+ *
+ * Gets the next netdev_adjacent->private from the dev's lower neighbour
+ * list, starting from iter position. The caller must hold either hold the
+ * RTNL lock or its own locking that guarantees that the neighbour lower
+ * list will remain unchainged.
+ */
+void *netdev_lower_get_next_private(struct net_device *dev,
+				    struct list_head **iter)
+{
+	struct netdev_adjacent *lower;
+
+	lower = list_entry(*iter, struct netdev_adjacent, list);
+
+	if (&lower->list == &dev->adj_list.lower)
+		return NULL;
+
+	if (iter)
+		*iter = lower->list.next;
+
+	return lower->private;
+}
+EXPORT_SYMBOL(netdev_lower_get_next_private);
+
+/**
+ * netdev_lower_get_next_private_rcu - Get the next ->private from the
+ *				       lower neighbour list, RCU
+ *				       variant
+ * @dev: device
+ * @iter: list_head ** of the current position
+ *
+ * Gets the next netdev_adjacent->private from the dev's lower neighbour
+ * list, starting from iter position. The caller must hold RCU read lock.
+ */
+void *netdev_lower_get_next_private_rcu(struct net_device *dev,
+					struct list_head **iter)
+{
+	struct netdev_adjacent *lower;
+
+	WARN_ON_ONCE(!rcu_read_lock_held());
+
+	lower = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);
+
+	if (&lower->list == &dev->adj_list.lower)
+		return NULL;
+
+	if (iter)
+		*iter = &lower->list;
+
+	return lower->private;
+}
+EXPORT_SYMBOL(netdev_lower_get_next_private_rcu);
+
 /**
  * netdev_master_upper_dev_get_rcu - Get master upper device
  * @dev: device
-- 
cgit v1.2.3


From b6ccba4c681fdaf0070e580bf951badf7edc860b Mon Sep 17 00:00:00 2001
From: Veaceslav Falico <vfalico@redhat.com>
Date: Wed, 25 Sep 2013 09:20:23 +0200
Subject: net: add a possibility to get private from netdev_adjacent->list

It will be useful to get first/last element.

CC: "David S. Miller" <davem@davemloft.net>
CC: Eric Dumazet <edumazet@google.com>
CC: Jiri Pirko <jiri@resnulli.us>
CC: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: Veaceslav Falico <vfalico@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  1 +
 net/core/dev.c            | 10 ++++++++++
 2 files changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 168974e40cf5..b4cfb63f264e 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2850,6 +2850,7 @@ extern void *netdev_lower_get_next_private_rcu(struct net_device *dev,
 	     priv; \
 	     priv = netdev_lower_get_next_private_rcu(dev, &(iter)))
 
+extern void *netdev_adjacent_get_private(struct list_head *adj_list);
 extern struct net_device *netdev_master_upper_dev_get(struct net_device *dev);
 extern struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev);
 extern int netdev_upper_dev_link(struct net_device *dev,
diff --git a/net/core/dev.c b/net/core/dev.c
index 0aa844aae40b..acc11810805f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4466,6 +4466,16 @@ struct net_device *netdev_master_upper_dev_get(struct net_device *dev)
 }
 EXPORT_SYMBOL(netdev_master_upper_dev_get);
 
+void *netdev_adjacent_get_private(struct list_head *adj_list)
+{
+	struct netdev_adjacent *adj;
+
+	adj = list_entry(adj_list, struct netdev_adjacent, list);
+
+	return adj->private;
+}
+EXPORT_SYMBOL(netdev_adjacent_get_private);
+
 /**
  * netdev_all_upper_get_next_dev_rcu - Get the next dev from upper list
  * @dev: device
-- 
cgit v1.2.3


From a0f4ecf3494c9869d20f606e7e2b2f50f0e67a7f Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Thu, 26 Sep 2013 14:48:15 -0700
Subject: netfilter: Remove extern from function prototypes

There are a mix of function prototypes with and without extern
in the kernel sources.  Standardize on not using extern for
function prototypes.

Function prototypes don't need to be written with extern.
extern is assumed by the compiler.  Its use is as unnecessary as
using auto to declare automatic/local variables in a block.

Signed-off-by: Joe Perches <joe@perches.com>
---
 include/linux/netfilter.h                        |  10 +-
 include/linux/netfilter/nf_conntrack_common.h    |   2 +-
 include/linux/netfilter/nf_conntrack_h323.h      |  14 +--
 include/linux/netfilter/nf_conntrack_proto_gre.h |   4 +-
 include/linux/netfilter/nf_conntrack_sip.h       |  57 +++++-----
 include/linux/netfilter/nfnetlink.h              |  28 ++---
 include/linux/netfilter/nfnetlink_acct.h         |   6 +-
 include/linux/netfilter/x_tables.h               | 128 +++++++++++------------
 include/linux/netfilter_bridge.h                 |   4 +-
 include/linux/netfilter_ipv4.h                   |   6 +-
 include/linux/netfilter_ipv6.h                   |  10 +-
 11 files changed, 133 insertions(+), 136 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 708fe72ab913..61223c52414f 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -35,7 +35,7 @@ static inline void nf_inet_addr_mask(const union nf_inet_addr *a1,
 	result->all[3] = a1->all[3] & mask->all[3];
 }
 
-extern int netfilter_init(void);
+int netfilter_init(void);
 
 /* Largest hook number + 1 */
 #define NF_MAX_HOOKS 8
@@ -208,7 +208,7 @@ int compat_nf_getsockopt(struct sock *sk, u_int8_t pf, int optval,
 /* Call this before modifying an existing packet: ensures it is
    modifiable and linear to the point you care about (writable_len).
    Returns true or false. */
-extern int skb_make_writable(struct sk_buff *skb, unsigned int writable_len);
+int skb_make_writable(struct sk_buff *skb, unsigned int writable_len);
 
 struct flowi;
 struct nf_queue_entry;
@@ -269,8 +269,8 @@ nf_checksum_partial(struct sk_buff *skb, unsigned int hook,
 	return csum;
 }
 
-extern int nf_register_afinfo(const struct nf_afinfo *afinfo);
-extern void nf_unregister_afinfo(const struct nf_afinfo *afinfo);
+int nf_register_afinfo(const struct nf_afinfo *afinfo);
+void nf_unregister_afinfo(const struct nf_afinfo *afinfo);
 
 #include <net/flow.h>
 extern void (*nf_nat_decode_session_hook)(struct sk_buff *, struct flowi *);
@@ -315,7 +315,7 @@ nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family)
 
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 extern void (*ip_ct_attach)(struct sk_buff *, const struct sk_buff *) __rcu;
-extern void nf_ct_attach(struct sk_buff *, const struct sk_buff *);
+void nf_ct_attach(struct sk_buff *, const struct sk_buff *);
 extern void (*nf_ct_destroy)(struct nf_conntrack *) __rcu;
 
 struct nf_conn;
diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h
index 127d0b90604f..275505792664 100644
--- a/include/linux/netfilter/nf_conntrack_common.h
+++ b/include/linux/netfilter/nf_conntrack_common.h
@@ -23,6 +23,6 @@ struct ip_conntrack_stat {
 };
 
 /* call to create an explicit dependency on nf_conntrack. */
-extern void need_conntrack(void);
+void need_conntrack(void);
 
 #endif /* _NF_CONNTRACK_COMMON_H */
diff --git a/include/linux/netfilter/nf_conntrack_h323.h b/include/linux/netfilter/nf_conntrack_h323.h
index f381020eee92..858d9b214053 100644
--- a/include/linux/netfilter/nf_conntrack_h323.h
+++ b/include/linux/netfilter/nf_conntrack_h323.h
@@ -29,13 +29,13 @@ struct nf_ct_h323_master {
 
 struct nf_conn;
 
-extern int get_h225_addr(struct nf_conn *ct, unsigned char *data,
-			 TransportAddress *taddr,
-			 union nf_inet_addr *addr, __be16 *port);
-extern void nf_conntrack_h245_expect(struct nf_conn *new,
-				     struct nf_conntrack_expect *this);
-extern void nf_conntrack_q931_expect(struct nf_conn *new,
-				     struct nf_conntrack_expect *this);
+int get_h225_addr(struct nf_conn *ct, unsigned char *data,
+		  TransportAddress *taddr, union nf_inet_addr *addr,
+		  __be16 *port);
+void nf_conntrack_h245_expect(struct nf_conn *new,
+			      struct nf_conntrack_expect *this);
+void nf_conntrack_q931_expect(struct nf_conn *new,
+			      struct nf_conntrack_expect *this);
 extern int (*set_h245_addr_hook) (struct sk_buff *skb, unsigned int protoff,
 				  unsigned char **data, int dataoff,
 				  H245_TransportAddress *taddr,
diff --git a/include/linux/netfilter/nf_conntrack_proto_gre.h b/include/linux/netfilter/nf_conntrack_proto_gre.h
index 6a0664c0c451..ec2ffaf418c8 100644
--- a/include/linux/netfilter/nf_conntrack_proto_gre.h
+++ b/include/linux/netfilter/nf_conntrack_proto_gre.h
@@ -87,8 +87,8 @@ int nf_ct_gre_keymap_add(struct nf_conn *ct, enum ip_conntrack_dir dir,
 /* delete keymap entries */
 void nf_ct_gre_keymap_destroy(struct nf_conn *ct);
 
-extern void nf_ct_gre_keymap_flush(struct net *net);
-extern void nf_nat_need_gre(void);
+void nf_ct_gre_keymap_flush(struct net *net);
+void nf_nat_need_gre(void);
 
 #endif /* __KERNEL__ */
 #endif /* _CONNTRACK_PROTO_GRE_H */
diff --git a/include/linux/netfilter/nf_conntrack_sip.h b/include/linux/netfilter/nf_conntrack_sip.h
index ba7f571a2b1c..5cac0207b95d 100644
--- a/include/linux/netfilter/nf_conntrack_sip.h
+++ b/include/linux/netfilter/nf_conntrack_sip.h
@@ -157,35 +157,34 @@ extern unsigned int (*nf_nat_sdp_media_hook)(struct sk_buff *skb,
 					     unsigned int medialen,
 					     union nf_inet_addr *rtp_addr);
 
-extern int ct_sip_parse_request(const struct nf_conn *ct,
-				const char *dptr, unsigned int datalen,
-				unsigned int *matchoff, unsigned int *matchlen,
-				union nf_inet_addr *addr, __be16 *port);
-extern int ct_sip_get_header(const struct nf_conn *ct, const char *dptr,
-			     unsigned int dataoff, unsigned int datalen,
-			     enum sip_header_types type,
-			     unsigned int *matchoff, unsigned int *matchlen);
-extern int ct_sip_parse_header_uri(const struct nf_conn *ct, const char *dptr,
-				   unsigned int *dataoff, unsigned int datalen,
-				   enum sip_header_types type, int *in_header,
-				   unsigned int *matchoff, unsigned int *matchlen,
-				   union nf_inet_addr *addr, __be16 *port);
-extern int ct_sip_parse_address_param(const struct nf_conn *ct, const char *dptr,
-				      unsigned int dataoff, unsigned int datalen,
-				      const char *name,
-				      unsigned int *matchoff, unsigned int *matchlen,
-				      union nf_inet_addr *addr, bool delim);
-extern int ct_sip_parse_numerical_param(const struct nf_conn *ct, const char *dptr,
-					unsigned int off, unsigned int datalen,
-					const char *name,
-					unsigned int *matchoff, unsigned int *matchen,
-					unsigned int *val);
-
-extern int ct_sip_get_sdp_header(const struct nf_conn *ct, const char *dptr,
-				 unsigned int dataoff, unsigned int datalen,
-				 enum sdp_header_types type,
-				 enum sdp_header_types term,
-				 unsigned int *matchoff, unsigned int *matchlen);
+int ct_sip_parse_request(const struct nf_conn *ct, const char *dptr,
+			 unsigned int datalen, unsigned int *matchoff,
+			 unsigned int *matchlen, union nf_inet_addr *addr,
+			 __be16 *port);
+int ct_sip_get_header(const struct nf_conn *ct, const char *dptr,
+		      unsigned int dataoff, unsigned int datalen,
+		      enum sip_header_types type, unsigned int *matchoff,
+		      unsigned int *matchlen);
+int ct_sip_parse_header_uri(const struct nf_conn *ct, const char *dptr,
+			    unsigned int *dataoff, unsigned int datalen,
+			    enum sip_header_types type, int *in_header,
+			    unsigned int *matchoff, unsigned int *matchlen,
+			    union nf_inet_addr *addr, __be16 *port);
+int ct_sip_parse_address_param(const struct nf_conn *ct, const char *dptr,
+			       unsigned int dataoff, unsigned int datalen,
+			       const char *name, unsigned int *matchoff,
+			       unsigned int *matchlen, union nf_inet_addr *addr,
+			       bool delim);
+int ct_sip_parse_numerical_param(const struct nf_conn *ct, const char *dptr,
+				 unsigned int off, unsigned int datalen,
+				 const char *name, unsigned int *matchoff,
+				 unsigned int *matchen, unsigned int *val);
+
+int ct_sip_get_sdp_header(const struct nf_conn *ct, const char *dptr,
+			  unsigned int dataoff, unsigned int datalen,
+			  enum sdp_header_types type,
+			  enum sdp_header_types term,
+			  unsigned int *matchoff, unsigned int *matchlen);
 
 #endif /* __KERNEL__ */
 #endif /* __NF_CONNTRACK_SIP_H__ */
diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h
index cadb7402d7a7..4f68cd7141d2 100644
--- a/include/linux/netfilter/nfnetlink.h
+++ b/include/linux/netfilter/nfnetlink.h
@@ -25,20 +25,20 @@ struct nfnetlink_subsystem {
 	const struct nfnl_callback *cb;	/* callback for individual types */
 };
 
-extern int nfnetlink_subsys_register(const struct nfnetlink_subsystem *n);
-extern int nfnetlink_subsys_unregister(const struct nfnetlink_subsystem *n);
-
-extern int nfnetlink_has_listeners(struct net *net, unsigned int group);
-extern struct sk_buff *nfnetlink_alloc_skb(struct net *net, unsigned int size,
-					   u32 dst_portid, gfp_t gfp_mask);
-extern int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 portid,
-			  unsigned int group, int echo, gfp_t flags);
-extern int nfnetlink_set_err(struct net *net, u32 portid, u32 group, int error);
-extern int nfnetlink_unicast(struct sk_buff *skb, struct net *net,
-			     u32 portid, int flags);
-
-extern void nfnl_lock(__u8 subsys_id);
-extern void nfnl_unlock(__u8 subsys_id);
+int nfnetlink_subsys_register(const struct nfnetlink_subsystem *n);
+int nfnetlink_subsys_unregister(const struct nfnetlink_subsystem *n);
+
+int nfnetlink_has_listeners(struct net *net, unsigned int group);
+struct sk_buff *nfnetlink_alloc_skb(struct net *net, unsigned int size,
+				    u32 dst_portid, gfp_t gfp_mask);
+int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 portid,
+		   unsigned int group, int echo, gfp_t flags);
+int nfnetlink_set_err(struct net *net, u32 portid, u32 group, int error);
+int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u32 portid,
+		      int flags);
+
+void nfnl_lock(__u8 subsys_id);
+void nfnl_unlock(__u8 subsys_id);
 
 #define MODULE_ALIAS_NFNL_SUBSYS(subsys) \
 	MODULE_ALIAS("nfnetlink-subsys-" __stringify(subsys))
diff --git a/include/linux/netfilter/nfnetlink_acct.h b/include/linux/netfilter/nfnetlink_acct.h
index bb4bbc9b7a18..b2e85e59f760 100644
--- a/include/linux/netfilter/nfnetlink_acct.h
+++ b/include/linux/netfilter/nfnetlink_acct.h
@@ -6,8 +6,8 @@
 
 struct nf_acct;
 
-extern struct nf_acct *nfnl_acct_find_get(const char *filter_name);
-extern void nfnl_acct_put(struct nf_acct *acct);
-extern void nfnl_acct_update(const struct sk_buff *skb, struct nf_acct *nfacct);
+struct nf_acct *nfnl_acct_find_get(const char *filter_name);
+void nfnl_acct_put(struct nf_acct *acct);
+void nfnl_acct_update(const struct sk_buff *skb, struct nf_acct *nfacct);
 
 #endif /* _NFNL_ACCT_H */
diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index dd49566315c6..a3e215bb0241 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -229,50 +229,48 @@ struct xt_table_info {
 
 #define XT_TABLE_INFO_SZ (offsetof(struct xt_table_info, entries) \
 			  + nr_cpu_ids * sizeof(char *))
-extern int xt_register_target(struct xt_target *target);
-extern void xt_unregister_target(struct xt_target *target);
-extern int xt_register_targets(struct xt_target *target, unsigned int n);
-extern void xt_unregister_targets(struct xt_target *target, unsigned int n);
-
-extern int xt_register_match(struct xt_match *target);
-extern void xt_unregister_match(struct xt_match *target);
-extern int xt_register_matches(struct xt_match *match, unsigned int n);
-extern void xt_unregister_matches(struct xt_match *match, unsigned int n);
-
-extern int xt_check_match(struct xt_mtchk_param *,
-			  unsigned int size, u_int8_t proto, bool inv_proto);
-extern int xt_check_target(struct xt_tgchk_param *,
-			   unsigned int size, u_int8_t proto, bool inv_proto);
-
-extern struct xt_table *xt_register_table(struct net *net,
-					  const struct xt_table *table,
-					  struct xt_table_info *bootstrap,
-					  struct xt_table_info *newinfo);
-extern void *xt_unregister_table(struct xt_table *table);
-
-extern struct xt_table_info *xt_replace_table(struct xt_table *table,
-					      unsigned int num_counters,
-					      struct xt_table_info *newinfo,
-					      int *error);
-
-extern struct xt_match *xt_find_match(u8 af, const char *name, u8 revision);
-extern struct xt_target *xt_find_target(u8 af, const char *name, u8 revision);
-extern struct xt_match *xt_request_find_match(u8 af, const char *name,
-					      u8 revision);
-extern struct xt_target *xt_request_find_target(u8 af, const char *name,
-						u8 revision);
-extern int xt_find_revision(u8 af, const char *name, u8 revision,
-			    int target, int *err);
-
-extern struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af,
-					   const char *name);
-extern void xt_table_unlock(struct xt_table *t);
-
-extern int xt_proto_init(struct net *net, u_int8_t af);
-extern void xt_proto_fini(struct net *net, u_int8_t af);
-
-extern struct xt_table_info *xt_alloc_table_info(unsigned int size);
-extern void xt_free_table_info(struct xt_table_info *info);
+int xt_register_target(struct xt_target *target);
+void xt_unregister_target(struct xt_target *target);
+int xt_register_targets(struct xt_target *target, unsigned int n);
+void xt_unregister_targets(struct xt_target *target, unsigned int n);
+
+int xt_register_match(struct xt_match *target);
+void xt_unregister_match(struct xt_match *target);
+int xt_register_matches(struct xt_match *match, unsigned int n);
+void xt_unregister_matches(struct xt_match *match, unsigned int n);
+
+int xt_check_match(struct xt_mtchk_param *, unsigned int size, u_int8_t proto,
+		   bool inv_proto);
+int xt_check_target(struct xt_tgchk_param *, unsigned int size, u_int8_t proto,
+		    bool inv_proto);
+
+struct xt_table *xt_register_table(struct net *net,
+				   const struct xt_table *table,
+				   struct xt_table_info *bootstrap,
+				   struct xt_table_info *newinfo);
+void *xt_unregister_table(struct xt_table *table);
+
+struct xt_table_info *xt_replace_table(struct xt_table *table,
+				       unsigned int num_counters,
+				       struct xt_table_info *newinfo,
+				       int *error);
+
+struct xt_match *xt_find_match(u8 af, const char *name, u8 revision);
+struct xt_target *xt_find_target(u8 af, const char *name, u8 revision);
+struct xt_match *xt_request_find_match(u8 af, const char *name, u8 revision);
+struct xt_target *xt_request_find_target(u8 af, const char *name, u8 revision);
+int xt_find_revision(u8 af, const char *name, u8 revision, int target,
+		     int *err);
+
+struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af,
+				    const char *name);
+void xt_table_unlock(struct xt_table *t);
+
+int xt_proto_init(struct net *net, u_int8_t af);
+void xt_proto_fini(struct net *net, u_int8_t af);
+
+struct xt_table_info *xt_alloc_table_info(unsigned int size);
+void xt_free_table_info(struct xt_table_info *info);
 
 /**
  * xt_recseq - recursive seqcount for netfilter use
@@ -353,8 +351,8 @@ static inline unsigned long ifname_compare_aligned(const char *_a,
 	return ret;
 }
 
-extern struct nf_hook_ops *xt_hook_link(const struct xt_table *, nf_hookfn *);
-extern void xt_hook_unlink(const struct xt_table *, struct nf_hook_ops *);
+struct nf_hook_ops *xt_hook_link(const struct xt_table *, nf_hookfn *);
+void xt_hook_unlink(const struct xt_table *, struct nf_hook_ops *);
 
 #ifdef CONFIG_COMPAT
 #include <net/compat.h>
@@ -414,25 +412,25 @@ struct _compat_xt_align {
 
 #define COMPAT_XT_ALIGN(s) __ALIGN_KERNEL((s), __alignof__(struct _compat_xt_align))
 
-extern void xt_compat_lock(u_int8_t af);
-extern void xt_compat_unlock(u_int8_t af);
-
-extern int xt_compat_add_offset(u_int8_t af, unsigned int offset, int delta);
-extern void xt_compat_flush_offsets(u_int8_t af);
-extern void xt_compat_init_offsets(u_int8_t af, unsigned int number);
-extern int xt_compat_calc_jump(u_int8_t af, unsigned int offset);
-
-extern int xt_compat_match_offset(const struct xt_match *match);
-extern int xt_compat_match_from_user(struct xt_entry_match *m,
-				     void **dstptr, unsigned int *size);
-extern int xt_compat_match_to_user(const struct xt_entry_match *m,
-				   void __user **dstptr, unsigned int *size);
-
-extern int xt_compat_target_offset(const struct xt_target *target);
-extern void xt_compat_target_from_user(struct xt_entry_target *t,
-				       void **dstptr, unsigned int *size);
-extern int xt_compat_target_to_user(const struct xt_entry_target *t,
-				    void __user **dstptr, unsigned int *size);
+void xt_compat_lock(u_int8_t af);
+void xt_compat_unlock(u_int8_t af);
+
+int xt_compat_add_offset(u_int8_t af, unsigned int offset, int delta);
+void xt_compat_flush_offsets(u_int8_t af);
+void xt_compat_init_offsets(u_int8_t af, unsigned int number);
+int xt_compat_calc_jump(u_int8_t af, unsigned int offset);
+
+int xt_compat_match_offset(const struct xt_match *match);
+int xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr,
+			      unsigned int *size);
+int xt_compat_match_to_user(const struct xt_entry_match *m,
+			    void __user **dstptr, unsigned int *size);
+
+int xt_compat_target_offset(const struct xt_target *target);
+void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr,
+				unsigned int *size);
+int xt_compat_target_to_user(const struct xt_entry_target *t,
+			     void __user **dstptr, unsigned int *size);
 
 #endif /* CONFIG_COMPAT */
 #endif /* _X_TABLES_H */
diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h
index dfb4d9e52bcb..8ab1c278b66d 100644
--- a/include/linux/netfilter_bridge.h
+++ b/include/linux/netfilter_bridge.h
@@ -25,7 +25,7 @@ enum nf_br_hook_priorities {
 #define BRNF_PPPoE			0x20
 
 /* Only used in br_forward.c */
-extern int nf_bridge_copy_header(struct sk_buff *skb);
+int nf_bridge_copy_header(struct sk_buff *skb);
 static inline int nf_bridge_maybe_copy_header(struct sk_buff *skb)
 {
 	if (skb->nf_bridge &&
@@ -53,7 +53,7 @@ static inline unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb)
 	return 0;
 }
 
-extern int br_handle_frame_finish(struct sk_buff *skb);
+int br_handle_frame_finish(struct sk_buff *skb);
 /* Only used in br_device.c */
 static inline int br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb)
 {
diff --git a/include/linux/netfilter_ipv4.h b/include/linux/netfilter_ipv4.h
index dfaf116b3e81..6e4591bb54d4 100644
--- a/include/linux/netfilter_ipv4.h
+++ b/include/linux/netfilter_ipv4.h
@@ -6,7 +6,7 @@
 
 #include <uapi/linux/netfilter_ipv4.h>
 
-extern int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type);
-extern __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
-				   unsigned int dataoff, u_int8_t protocol);
+int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type);
+__sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
+		       unsigned int dataoff, u_int8_t protocol);
 #endif /*__LINUX_IP_NETFILTER_H*/
diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h
index 2d4df6ce043e..64dad1cc1a4b 100644
--- a/include/linux/netfilter_ipv6.h
+++ b/include/linux/netfilter_ipv6.h
@@ -11,12 +11,12 @@
 
 
 #ifdef CONFIG_NETFILTER
-extern int ip6_route_me_harder(struct sk_buff *skb);
-extern __sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook,
-				    unsigned int dataoff, u_int8_t protocol);
+int ip6_route_me_harder(struct sk_buff *skb);
+__sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook,
+			unsigned int dataoff, u_int8_t protocol);
 
-extern int ipv6_netfilter_init(void);
-extern void ipv6_netfilter_fini(void);
+int ipv6_netfilter_init(void);
+void ipv6_netfilter_fini(void);
 
 /*
  * Hook functions for ipv6 to allow xt_* modules to be built-in even
-- 
cgit v1.2.3


From 58292cbe6669d74498a5f08db13e57cb3bcfb81d Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 11 Sep 2013 22:29:04 -0400
Subject: sysfs: make attr namespace interface less convoluted

sysfs ns (namespace) implementation became more convoluted than
necessary while trying to hide ns information from visible interface.
The relatively recent attr ns support is a good example.

* attr ns tag is determined by sysfs_ops->namespace() callback while
  dir tag is determined by kobj_type->namespace().  The placement is
  arbitrary.

* Instead of performing operations with explicit ns tag, the namespace
  callback is routed through sysfs_attr_ns(), sysfs_ops->namespace(),
  class_attr_namespace(), class_attr->namespace().  It's not simpler
  in any sense.  The only thing this convolution does is traversing
  the whole stack backwards.

The namespace callbacks are unncessary because the operations involved
are inherently synchronous.  The information can be provided in in
straight-forward top-down direction and reversing that direction is
unnecessary and against basic design principles.

This backward interface is unnecessarily convoluted and hinders
properly separating out sysfs from driver model / kobject for proper
layering.  This patch updates attr ns support such that

* sysfs_ops->namespace() and class_attr->namespace() are dropped.

* sysfs_{create|remove}_file_ns(), which take explicit @ns param, are
  added and sysfs_{create|remove}_file() are now simple wrappers
  around the ns aware functions.

* ns handling is dropped from sysfs_chmod_file().  Nobody uses it at
  this point.  sysfs_chmod_file_ns() can be added later if necessary.

* Explicit @ns is propagated through class_{create|remove}_file_ns()
  and netdev_class_{create|remove}_file_ns().

* driver/net/bonding which is currently the only user of attr
  namespace is updated to use netdev_class_{create|remove}_file_ns()
  with @bh->net as the ns tag instead of using the namespace callback.

This patch should be an equivalent conversion without any functional
difference.  It makes the code easier to follow, reduces lines of code
a bit and helps proper separation and layering.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Kay Sievers <kay@vrfy.org>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/class.c             | 29 ++++--------
 drivers/net/bonding/bond_sysfs.c | 14 ++----
 fs/sysfs/file.c                  | 95 ++++++++++------------------------------
 fs/sysfs/group.c                 |  7 +--
 fs/sysfs/sysfs.h                 |  5 ++-
 include/linux/device.h           | 24 +++++++---
 include/linux/netdevice.h        | 16 ++++++-
 include/linux/sysfs.h            | 31 +++++++++----
 net/core/net-sysfs.c             | 14 +++---
 9 files changed, 106 insertions(+), 129 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/class.c b/drivers/base/class.c
index 8b7818b80056..f96f70419a78 100644
--- a/drivers/base/class.c
+++ b/drivers/base/class.c
@@ -47,18 +47,6 @@ static ssize_t class_attr_store(struct kobject *kobj, struct attribute *attr,
 	return ret;
 }
 
-static const void *class_attr_namespace(struct kobject *kobj,
-					const struct attribute *attr)
-{
-	struct class_attribute *class_attr = to_class_attr(attr);
-	struct subsys_private *cp = to_subsys_private(kobj);
-	const void *ns = NULL;
-
-	if (class_attr->namespace)
-		ns = class_attr->namespace(cp->class, class_attr);
-	return ns;
-}
-
 static void class_release(struct kobject *kobj)
 {
 	struct subsys_private *cp = to_subsys_private(kobj);
@@ -86,7 +74,6 @@ static const struct kobj_ns_type_operations *class_child_ns_type(struct kobject
 static const struct sysfs_ops class_sysfs_ops = {
 	.show	   = class_attr_show,
 	.store	   = class_attr_store,
-	.namespace = class_attr_namespace,
 };
 
 static struct kobj_type class_ktype = {
@@ -99,21 +86,23 @@ static struct kobj_type class_ktype = {
 static struct kset *class_kset;
 
 
-int class_create_file(struct class *cls, const struct class_attribute *attr)
+int class_create_file_ns(struct class *cls, const struct class_attribute *attr,
+			 const void *ns)
 {
 	int error;
 	if (cls)
-		error = sysfs_create_file(&cls->p->subsys.kobj,
-					  &attr->attr);
+		error = sysfs_create_file_ns(&cls->p->subsys.kobj,
+					     &attr->attr, ns);
 	else
 		error = -EINVAL;
 	return error;
 }
 
-void class_remove_file(struct class *cls, const struct class_attribute *attr)
+void class_remove_file_ns(struct class *cls, const struct class_attribute *attr,
+			  const void *ns)
 {
 	if (cls)
-		sysfs_remove_file(&cls->p->subsys.kobj, &attr->attr);
+		sysfs_remove_file_ns(&cls->p->subsys.kobj, &attr->attr, ns);
 }
 
 static struct class *class_get(struct class *cls)
@@ -600,8 +589,8 @@ int __init classes_init(void)
 	return 0;
 }
 
-EXPORT_SYMBOL_GPL(class_create_file);
-EXPORT_SYMBOL_GPL(class_remove_file);
+EXPORT_SYMBOL_GPL(class_create_file_ns);
+EXPORT_SYMBOL_GPL(class_remove_file_ns);
 EXPORT_SYMBOL_GPL(class_unregister);
 EXPORT_SYMBOL_GPL(class_destroy);
 
diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c
index c29b836749b6..ec9b6460a38d 100644
--- a/drivers/net/bonding/bond_sysfs.c
+++ b/drivers/net/bonding/bond_sysfs.c
@@ -149,14 +149,6 @@ err_no_cmd:
 	return -EPERM;
 }
 
-static const void *bonding_namespace(struct class *cls,
-				     const struct class_attribute *attr)
-{
-	const struct bond_net *bn =
-		container_of(attr, struct bond_net, class_attr_bonding_masters);
-	return bn->net;
-}
-
 /* class attribute for bond_masters file.  This ends up in /sys/class/net */
 static const struct class_attribute class_attr_bonding_masters = {
 	.attr = {
@@ -165,7 +157,6 @@ static const struct class_attribute class_attr_bonding_masters = {
 	},
 	.show = bonding_show_bonds,
 	.store = bonding_store_bonds,
-	.namespace = bonding_namespace,
 };
 
 int bond_create_slave_symlinks(struct net_device *master,
@@ -1787,7 +1778,8 @@ int bond_create_sysfs(struct bond_net *bn)
 	bn->class_attr_bonding_masters = class_attr_bonding_masters;
 	sysfs_attr_init(&bn->class_attr_bonding_masters.attr);
 
-	ret = netdev_class_create_file(&bn->class_attr_bonding_masters);
+	ret = netdev_class_create_file_ns(&bn->class_attr_bonding_masters,
+					  bn->net);
 	/*
 	 * Permit multiple loads of the module by ignoring failures to
 	 * create the bonding_masters sysfs file.  Bonding devices
@@ -1817,7 +1809,7 @@ int bond_create_sysfs(struct bond_net *bn)
  */
 void bond_destroy_sysfs(struct bond_net *bn)
 {
-	netdev_class_remove_file(&bn->class_attr_bonding_masters);
+	netdev_class_remove_file_ns(&bn->class_attr_bonding_masters, bn->net);
 }
 
 /*
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 15ef5eb13663..e784340f1599 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -485,58 +485,15 @@ const struct file_operations sysfs_file_operations = {
 	.poll		= sysfs_poll,
 };
 
-static int sysfs_attr_ns(struct kobject *kobj, const struct attribute *attr,
-			 const void **pns)
-{
-	struct sysfs_dirent *dir_sd = kobj->sd;
-	const struct sysfs_ops *ops;
-	const void *ns = NULL;
-	int err;
-
-	if (!dir_sd) {
-		WARN(1, KERN_ERR "sysfs: kobject %s without dirent\n",
-			kobject_name(kobj));
-		return -ENOENT;
-	}
-
-	err = 0;
-	if (!sysfs_ns_type(dir_sd))
-		goto out;
-
-	err = -EINVAL;
-	if (!kobj->ktype)
-		goto out;
-	ops = kobj->ktype->sysfs_ops;
-	if (!ops)
-		goto out;
-	if (!ops->namespace)
-		goto out;
-
-	err = 0;
-	ns = ops->namespace(kobj, attr);
-out:
-	if (err) {
-		WARN(1, KERN_ERR
-		     "missing sysfs namespace attribute operation for kobject: %s\n",
-		     kobject_name(kobj));
-	}
-	*pns = ns;
-	return err;
-}
-
-int sysfs_add_file_mode(struct sysfs_dirent *dir_sd,
-			const struct attribute *attr, int type, umode_t amode)
+int sysfs_add_file_mode_ns(struct sysfs_dirent *dir_sd,
+			   const struct attribute *attr, int type,
+			   umode_t amode, const void *ns)
 {
 	umode_t mode = (amode & S_IALLUGO) | S_IFREG;
 	struct sysfs_addrm_cxt acxt;
 	struct sysfs_dirent *sd;
-	const void *ns;
 	int rc;
 
-	rc = sysfs_attr_ns(dir_sd->s_dir.kobj, attr, &ns);
-	if (rc)
-		return rc;
-
 	sd = sysfs_new_dirent(attr->name, mode, type);
 	if (!sd)
 		return -ENOMEM;
@@ -559,23 +516,25 @@ int sysfs_add_file_mode(struct sysfs_dirent *dir_sd,
 int sysfs_add_file(struct sysfs_dirent *dir_sd, const struct attribute *attr,
 		   int type)
 {
-	return sysfs_add_file_mode(dir_sd, attr, type, attr->mode);
+	return sysfs_add_file_mode_ns(dir_sd, attr, type, attr->mode, NULL);
 }
 
-
 /**
- *	sysfs_create_file - create an attribute file for an object.
- *	@kobj:	object we're creating for.
- *	@attr:	attribute descriptor.
+ * sysfs_create_file_ns - create an attribute file for an object with custom ns
+ * @kobj: object we're creating for
+ * @attr: attribute descriptor
+ * @ns: namespace the new file should belong to
  */
-int sysfs_create_file(struct kobject *kobj, const struct attribute *attr)
+int sysfs_create_file_ns(struct kobject *kobj, const struct attribute *attr,
+			 const void *ns)
 {
 	BUG_ON(!kobj || !kobj->sd || !attr);
 
-	return sysfs_add_file(kobj->sd, attr, SYSFS_KOBJ_ATTR);
+	return sysfs_add_file_mode_ns(kobj->sd, attr, SYSFS_KOBJ_ATTR,
+				      attr->mode, ns);
 
 }
-EXPORT_SYMBOL_GPL(sysfs_create_file);
+EXPORT_SYMBOL_GPL(sysfs_create_file_ns);
 
 int sysfs_create_files(struct kobject *kobj, const struct attribute **ptr)
 {
@@ -630,17 +589,12 @@ int sysfs_chmod_file(struct kobject *kobj, const struct attribute *attr,
 {
 	struct sysfs_dirent *sd;
 	struct iattr newattrs;
-	const void *ns;
 	int rc;
 
-	rc = sysfs_attr_ns(kobj, attr, &ns);
-	if (rc)
-		return rc;
-
 	mutex_lock(&sysfs_mutex);
 
 	rc = -ENOENT;
-	sd = sysfs_find_dirent(kobj->sd, ns, attr->name);
+	sd = sysfs_find_dirent(kobj->sd, NULL, attr->name);
 	if (!sd)
 		goto out;
 
@@ -655,22 +609,21 @@ int sysfs_chmod_file(struct kobject *kobj, const struct attribute *attr,
 EXPORT_SYMBOL_GPL(sysfs_chmod_file);
 
 /**
- *	sysfs_remove_file - remove an object attribute.
- *	@kobj:	object we're acting for.
- *	@attr:	attribute descriptor.
+ * sysfs_remove_file_ns - remove an object attribute with a custom ns tag
+ * @kobj: object we're acting for
+ * @attr: attribute descriptor
+ * @ns: namespace tag of the file to remove
  *
- *	Hash the attribute name and kill the victim.
+ * Hash the attribute name and namespace tag and kill the victim.
  */
-void sysfs_remove_file(struct kobject *kobj, const struct attribute *attr)
+void sysfs_remove_file_ns(struct kobject *kobj, const struct attribute *attr,
+			  const void *ns)
 {
-	const void *ns;
-
-	if (sysfs_attr_ns(kobj, attr, &ns))
-		return;
+	struct sysfs_dirent *dir_sd = kobj->sd;
 
-	sysfs_hash_and_remove(kobj->sd, ns, attr->name);
+	sysfs_hash_and_remove(dir_sd, ns, attr->name);
 }
-EXPORT_SYMBOL_GPL(sysfs_remove_file);
+EXPORT_SYMBOL_GPL(sysfs_remove_file_ns);
 
 void sysfs_remove_files(struct kobject *kobj, const struct attribute **ptr)
 {
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index 5f92cd2f61c1..25c78f23dae8 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -56,9 +56,10 @@ static int create_files(struct sysfs_dirent *dir_sd, struct kobject *kobj,
 				if (!mode)
 					continue;
 			}
-			error = sysfs_add_file_mode(dir_sd, *attr,
-						    SYSFS_KOBJ_ATTR,
-						    (*attr)->mode | mode);
+			error = sysfs_add_file_mode_ns(dir_sd, *attr,
+						       SYSFS_KOBJ_ATTR,
+						       (*attr)->mode | mode,
+						       NULL);
 			if (unlikely(error))
 				break;
 		}
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index b6deca3e301d..a96da2559db2 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -230,8 +230,9 @@ extern const struct file_operations sysfs_file_operations;
 int sysfs_add_file(struct sysfs_dirent *dir_sd,
 		   const struct attribute *attr, int type);
 
-int sysfs_add_file_mode(struct sysfs_dirent *dir_sd,
-			const struct attribute *attr, int type, umode_t amode);
+int sysfs_add_file_mode_ns(struct sysfs_dirent *dir_sd,
+			   const struct attribute *attr, int type,
+			   umode_t amode, const void *ns);
 /*
  * bin.c
  */
diff --git a/include/linux/device.h b/include/linux/device.h
index 2a9d6ed59579..ce690ea34547 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -427,8 +427,6 @@ struct class_attribute {
 			char *buf);
 	ssize_t (*store)(struct class *class, struct class_attribute *attr,
 			const char *buf, size_t count);
-	const void *(*namespace)(struct class *class,
-				 const struct class_attribute *attr);
 };
 
 #define CLASS_ATTR(_name, _mode, _show, _store) \
@@ -438,10 +436,24 @@ struct class_attribute {
 #define CLASS_ATTR_RO(_name) \
 	struct class_attribute class_attr_##_name = __ATTR_RO(_name)
 
-extern int __must_check class_create_file(struct class *class,
-					  const struct class_attribute *attr);
-extern void class_remove_file(struct class *class,
-			      const struct class_attribute *attr);
+extern int __must_check class_create_file_ns(struct class *class,
+					     const struct class_attribute *attr,
+					     const void *ns);
+extern void class_remove_file_ns(struct class *class,
+				 const struct class_attribute *attr,
+				 const void *ns);
+
+static inline int __must_check class_create_file(struct class *class,
+					const struct class_attribute *attr)
+{
+	return class_create_file_ns(class, attr, NULL);
+}
+
+static inline void class_remove_file(struct class *class,
+				     const struct class_attribute *attr)
+{
+	return class_remove_file_ns(class, attr, NULL);
+}
 
 /* Simple class attribute that is just a static string */
 struct class_attribute_string {
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 3de49aca4519..42421ed49a47 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2873,8 +2873,20 @@ extern int __init dev_proc_init(void);
 #define dev_proc_init() 0
 #endif
 
-extern int netdev_class_create_file(struct class_attribute *class_attr);
-extern void netdev_class_remove_file(struct class_attribute *class_attr);
+extern int netdev_class_create_file_ns(struct class_attribute *class_attr,
+				       const void *ns);
+extern void netdev_class_remove_file_ns(struct class_attribute *class_attr,
+					const void *ns);
+
+static inline int netdev_class_create_file(struct class_attribute *class_attr)
+{
+	return netdev_class_create_file_ns(class_attr, NULL);
+}
+
+static inline void netdev_class_remove_file(struct class_attribute *class_attr)
+{
+	netdev_class_remove_file_ns(class_attr, NULL);
+}
 
 extern struct kobj_ns_type_operations net_ns_type_operations;
 
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index 11baec7c9b26..82f7fac78e77 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -173,7 +173,6 @@ struct bin_attribute bin_attr_##_name = __BIN_ATTR_RW(_name, _size)
 struct sysfs_ops {
 	ssize_t	(*show)(struct kobject *, struct attribute *, char *);
 	ssize_t	(*store)(struct kobject *, struct attribute *, const char *, size_t);
-	const void *(*namespace)(struct kobject *, const struct attribute *);
 };
 
 struct sysfs_dirent;
@@ -189,13 +188,15 @@ int __must_check sysfs_rename_dir(struct kobject *kobj, const char *new_name);
 int __must_check sysfs_move_dir(struct kobject *kobj,
 				struct kobject *new_parent_kobj);
 
-int __must_check sysfs_create_file(struct kobject *kobj,
-				   const struct attribute *attr);
+int __must_check sysfs_create_file_ns(struct kobject *kobj,
+				      const struct attribute *attr,
+				      const void *ns);
 int __must_check sysfs_create_files(struct kobject *kobj,
 				   const struct attribute **attr);
 int __must_check sysfs_chmod_file(struct kobject *kobj,
 				  const struct attribute *attr, umode_t mode);
-void sysfs_remove_file(struct kobject *kobj, const struct attribute *attr);
+void sysfs_remove_file_ns(struct kobject *kobj, const struct attribute *attr,
+			  const void *ns);
 void sysfs_remove_files(struct kobject *kobj, const struct attribute **attr);
 
 int __must_check sysfs_create_bin_file(struct kobject *kobj,
@@ -277,8 +278,9 @@ static inline int sysfs_move_dir(struct kobject *kobj,
 	return 0;
 }
 
-static inline int sysfs_create_file(struct kobject *kobj,
-				    const struct attribute *attr)
+static inline int sysfs_create_file_ns(struct kobject *kobj,
+				       const struct attribute *attr,
+				       const void *ns)
 {
 	return 0;
 }
@@ -295,8 +297,9 @@ static inline int sysfs_chmod_file(struct kobject *kobj,
 	return 0;
 }
 
-static inline void sysfs_remove_file(struct kobject *kobj,
-				     const struct attribute *attr)
+static inline void sysfs_remove_file_ns(struct kobject *kobj,
+					const struct attribute *attr,
+					const void *ns)
 {
 }
 
@@ -435,4 +438,16 @@ static inline int __must_check sysfs_init(void)
 
 #endif /* CONFIG_SYSFS */
 
+static inline int __must_check sysfs_create_file(struct kobject *kobj,
+						 const struct attribute *attr)
+{
+	return sysfs_create_file_ns(kobj, attr, NULL);
+}
+
+static inline void sysfs_remove_file(struct kobject *kobj,
+				     const struct attribute *attr)
+{
+	return sysfs_remove_file_ns(kobj, attr, NULL);
+}
+
 #endif /* _SYSFS_H_ */
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index d954b56b4e47..325dee863e46 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -1344,17 +1344,19 @@ int netdev_register_kobject(struct net_device *net)
 	return error;
 }
 
-int netdev_class_create_file(struct class_attribute *class_attr)
+int netdev_class_create_file_ns(struct class_attribute *class_attr,
+				const void *ns)
 {
-	return class_create_file(&net_class, class_attr);
+	return class_create_file_ns(&net_class, class_attr, ns);
 }
-EXPORT_SYMBOL(netdev_class_create_file);
+EXPORT_SYMBOL(netdev_class_create_file_ns);
 
-void netdev_class_remove_file(struct class_attribute *class_attr)
+void netdev_class_remove_file_ns(struct class_attribute *class_attr,
+				 const void *ns)
 {
-	class_remove_file(&net_class, class_attr);
+	class_remove_file_ns(&net_class, class_attr, ns);
 }
-EXPORT_SYMBOL(netdev_class_remove_file);
+EXPORT_SYMBOL(netdev_class_remove_file_ns);
 
 int netdev_kobject_init(void)
 {
-- 
cgit v1.2.3


From 7965bd4d71ef7cf1db00afb9e406ddfc13443c13 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Thu, 26 Sep 2013 14:48:15 -0700
Subject: net.h/skbuff.h: Remove extern from function prototypes

There are a mix of function prototypes with and without extern
in the kernel sources.  Standardize on not using extern for
function prototypes.

Function prototypes don't need to be written with extern.
extern is assumed by the compiler.  Its use is as unnecessary as
using auto to declare automatic/local variables in a block.

Signed-off-by: Joe Perches <joe@perches.com>
---
 include/linux/net.h    |  82 ++++++++--------
 include/linux/skbuff.h | 250 ++++++++++++++++++++++---------------------------
 2 files changed, 148 insertions(+), 184 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/net.h b/include/linux/net.h
index 4f27575ce1d6..ca9ec8540905 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -195,27 +195,23 @@ enum {
 	SOCK_WAKE_URG,
 };
 
-extern int	     sock_wake_async(struct socket *sk, int how, int band);
-extern int	     sock_register(const struct net_proto_family *fam);
-extern void	     sock_unregister(int family);
-extern int	     __sock_create(struct net *net, int family, int type, int proto,
-				 struct socket **res, int kern);
-extern int	     sock_create(int family, int type, int proto,
-				 struct socket **res);
-extern int	     sock_create_kern(int family, int type, int proto,
-				      struct socket **res);
-extern int	     sock_create_lite(int family, int type, int proto,
-				      struct socket **res); 
-extern void	     sock_release(struct socket *sock);
-extern int   	     sock_sendmsg(struct socket *sock, struct msghdr *msg,
-				  size_t len);
-extern int	     sock_recvmsg(struct socket *sock, struct msghdr *msg,
-				  size_t size, int flags);
-extern struct file  *sock_alloc_file(struct socket *sock, int flags, const char *dname);
-extern struct socket *sockfd_lookup(int fd, int *err);
-extern struct socket *sock_from_file(struct file *file, int *err);
+int sock_wake_async(struct socket *sk, int how, int band);
+int sock_register(const struct net_proto_family *fam);
+void sock_unregister(int family);
+int __sock_create(struct net *net, int family, int type, int proto,
+		  struct socket **res, int kern);
+int sock_create(int family, int type, int proto, struct socket **res);
+int sock_create_kern(int family, int type, int proto, struct socket **res);
+int sock_create_lite(int family, int type, int proto, struct socket **res);
+void sock_release(struct socket *sock);
+int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t len);
+int sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
+		 int flags);
+struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname);
+struct socket *sockfd_lookup(int fd, int *err);
+struct socket *sock_from_file(struct file *file, int *err);
 #define		     sockfd_put(sock) fput(sock->file)
-extern int	     net_ratelimit(void);
+int net_ratelimit(void);
 
 #define net_ratelimited_function(function, ...)			\
 do {								\
@@ -243,32 +239,28 @@ do {								\
 #define net_random()		prandom_u32()
 #define net_srandom(seed)	prandom_seed((__force u32)(seed))
 
-extern int   	     kernel_sendmsg(struct socket *sock, struct msghdr *msg,
-				    struct kvec *vec, size_t num, size_t len);
-extern int   	     kernel_recvmsg(struct socket *sock, struct msghdr *msg,
-				    struct kvec *vec, size_t num,
-				    size_t len, int flags);
+int kernel_sendmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec,
+		   size_t num, size_t len);
+int kernel_recvmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec,
+		   size_t num, size_t len, int flags);
 
-extern int kernel_bind(struct socket *sock, struct sockaddr *addr,
-		       int addrlen);
-extern int kernel_listen(struct socket *sock, int backlog);
-extern int kernel_accept(struct socket *sock, struct socket **newsock,
-			 int flags);
-extern int kernel_connect(struct socket *sock, struct sockaddr *addr,
-			  int addrlen, int flags);
-extern int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
-			      int *addrlen);
-extern int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
-			      int *addrlen);
-extern int kernel_getsockopt(struct socket *sock, int level, int optname,
-			     char *optval, int *optlen);
-extern int kernel_setsockopt(struct socket *sock, int level, int optname,
-			     char *optval, unsigned int optlen);
-extern int kernel_sendpage(struct socket *sock, struct page *page, int offset,
-			   size_t size, int flags);
-extern int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg);
-extern int kernel_sock_shutdown(struct socket *sock,
-				enum sock_shutdown_cmd how);
+int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen);
+int kernel_listen(struct socket *sock, int backlog);
+int kernel_accept(struct socket *sock, struct socket **newsock, int flags);
+int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
+		   int flags);
+int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
+		       int *addrlen);
+int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
+		       int *addrlen);
+int kernel_getsockopt(struct socket *sock, int level, int optname, char *optval,
+		      int *optlen);
+int kernel_setsockopt(struct socket *sock, int level, int optname, char *optval,
+		      unsigned int optlen);
+int kernel_sendpage(struct socket *sock, struct page *page, int offset,
+		    size_t size, int flags);
+int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg);
+int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how);
 
 #define MODULE_ALIAS_NETPROTO(proto) \
 	MODULE_ALIAS("net-pf-" __stringify(proto))
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 2ddb48d9312c..6d56840e561e 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -585,8 +585,8 @@ static inline void skb_dst_set(struct sk_buff *skb, struct dst_entry *dst)
 	skb->_skb_refdst = (unsigned long)dst;
 }
 
-extern void __skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst,
-				bool force);
+void __skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst,
+			 bool force);
 
 /**
  * skb_dst_set_noref - sets skb dst, hopefully, without taking reference
@@ -634,20 +634,20 @@ static inline struct rtable *skb_rtable(const struct sk_buff *skb)
 	return (struct rtable *)skb_dst(skb);
 }
 
-extern void kfree_skb(struct sk_buff *skb);
-extern void kfree_skb_list(struct sk_buff *segs);
-extern void skb_tx_error(struct sk_buff *skb);
-extern void consume_skb(struct sk_buff *skb);
-extern void	       __kfree_skb(struct sk_buff *skb);
+void kfree_skb(struct sk_buff *skb);
+void kfree_skb_list(struct sk_buff *segs);
+void skb_tx_error(struct sk_buff *skb);
+void consume_skb(struct sk_buff *skb);
+void  __kfree_skb(struct sk_buff *skb);
 extern struct kmem_cache *skbuff_head_cache;
 
-extern void kfree_skb_partial(struct sk_buff *skb, bool head_stolen);
-extern bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
-			     bool *fragstolen, int *delta_truesize);
+void kfree_skb_partial(struct sk_buff *skb, bool head_stolen);
+bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
+		      bool *fragstolen, int *delta_truesize);
 
-extern struct sk_buff *__alloc_skb(unsigned int size,
-				   gfp_t priority, int flags, int node);
-extern struct sk_buff *build_skb(void *data, unsigned int frag_size);
+struct sk_buff *__alloc_skb(unsigned int size, gfp_t priority, int flags,
+			    int node);
+struct sk_buff *build_skb(void *data, unsigned int frag_size);
 static inline struct sk_buff *alloc_skb(unsigned int size,
 					gfp_t priority)
 {
@@ -660,41 +660,33 @@ static inline struct sk_buff *alloc_skb_fclone(unsigned int size,
 	return __alloc_skb(size, priority, SKB_ALLOC_FCLONE, NUMA_NO_NODE);
 }
 
-extern struct sk_buff *__alloc_skb_head(gfp_t priority, int node);
+struct sk_buff *__alloc_skb_head(gfp_t priority, int node);
 static inline struct sk_buff *alloc_skb_head(gfp_t priority)
 {
 	return __alloc_skb_head(priority, -1);
 }
 
-extern struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src);
-extern int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask);
-extern struct sk_buff *skb_clone(struct sk_buff *skb,
-				 gfp_t priority);
-extern struct sk_buff *skb_copy(const struct sk_buff *skb,
-				gfp_t priority);
-extern struct sk_buff *__pskb_copy(struct sk_buff *skb,
-				 int headroom, gfp_t gfp_mask);
-
-extern int	       pskb_expand_head(struct sk_buff *skb,
-					int nhead, int ntail,
-					gfp_t gfp_mask);
-extern struct sk_buff *skb_realloc_headroom(struct sk_buff *skb,
-					    unsigned int headroom);
-extern struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
-				       int newheadroom, int newtailroom,
-				       gfp_t priority);
-extern int	       skb_to_sgvec(struct sk_buff *skb,
-				    struct scatterlist *sg, int offset,
-				    int len);
-extern int	       skb_cow_data(struct sk_buff *skb, int tailbits,
-				    struct sk_buff **trailer);
-extern int	       skb_pad(struct sk_buff *skb, int pad);
+struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src);
+int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask);
+struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t priority);
+struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t priority);
+struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom, gfp_t gfp_mask);
+
+int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, gfp_t gfp_mask);
+struct sk_buff *skb_realloc_headroom(struct sk_buff *skb,
+				     unsigned int headroom);
+struct sk_buff *skb_copy_expand(const struct sk_buff *skb, int newheadroom,
+				int newtailroom, gfp_t priority);
+int skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset,
+		 int len);
+int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer);
+int skb_pad(struct sk_buff *skb, int pad);
 #define dev_kfree_skb(a)	consume_skb(a)
 
-extern int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
-			int getfrag(void *from, char *to, int offset,
-			int len,int odd, struct sk_buff *skb),
-			void *from, int length);
+int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
+			    int getfrag(void *from, char *to, int offset,
+					int len, int odd, struct sk_buff *skb),
+			    void *from, int length);
 
 struct skb_seq_state {
 	__u32		lower_offset;
@@ -706,18 +698,17 @@ struct skb_seq_state {
 	__u8		*frag_data;
 };
 
-extern void	      skb_prepare_seq_read(struct sk_buff *skb,
-					   unsigned int from, unsigned int to,
-					   struct skb_seq_state *st);
-extern unsigned int   skb_seq_read(unsigned int consumed, const u8 **data,
-				   struct skb_seq_state *st);
-extern void	      skb_abort_seq_read(struct skb_seq_state *st);
+void skb_prepare_seq_read(struct sk_buff *skb, unsigned int from,
+			  unsigned int to, struct skb_seq_state *st);
+unsigned int skb_seq_read(unsigned int consumed, const u8 **data,
+			  struct skb_seq_state *st);
+void skb_abort_seq_read(struct skb_seq_state *st);
 
-extern unsigned int   skb_find_text(struct sk_buff *skb, unsigned int from,
-				    unsigned int to, struct ts_config *config,
-				    struct ts_state *state);
+unsigned int skb_find_text(struct sk_buff *skb, unsigned int from,
+			   unsigned int to, struct ts_config *config,
+			   struct ts_state *state);
 
-extern void __skb_get_rxhash(struct sk_buff *skb);
+void __skb_get_rxhash(struct sk_buff *skb);
 static inline __u32 skb_get_rxhash(struct sk_buff *skb)
 {
 	if (!skb->l4_rxhash)
@@ -1095,7 +1086,8 @@ static inline void skb_queue_head_init_class(struct sk_buff_head *list,
  *	The "__skb_xxxx()" functions are the non-atomic ones that
  *	can only be called with interrupts disabled.
  */
-extern void        skb_insert(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list);
+void skb_insert(struct sk_buff *old, struct sk_buff *newsk,
+		struct sk_buff_head *list);
 static inline void __skb_insert(struct sk_buff *newsk,
 				struct sk_buff *prev, struct sk_buff *next,
 				struct sk_buff_head *list)
@@ -1201,8 +1193,8 @@ static inline void __skb_queue_after(struct sk_buff_head *list,
 	__skb_insert(newsk, prev, prev->next, list);
 }
 
-extern void skb_append(struct sk_buff *old, struct sk_buff *newsk,
-		       struct sk_buff_head *list);
+void skb_append(struct sk_buff *old, struct sk_buff *newsk,
+		struct sk_buff_head *list);
 
 static inline void __skb_queue_before(struct sk_buff_head *list,
 				      struct sk_buff *next,
@@ -1221,7 +1213,7 @@ static inline void __skb_queue_before(struct sk_buff_head *list,
  *
  *	A buffer cannot be placed on two lists at the same time.
  */
-extern void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk);
+void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk);
 static inline void __skb_queue_head(struct sk_buff_head *list,
 				    struct sk_buff *newsk)
 {
@@ -1238,7 +1230,7 @@ static inline void __skb_queue_head(struct sk_buff_head *list,
  *
  *	A buffer cannot be placed on two lists at the same time.
  */
-extern void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk);
+void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk);
 static inline void __skb_queue_tail(struct sk_buff_head *list,
 				   struct sk_buff *newsk)
 {
@@ -1249,7 +1241,7 @@ static inline void __skb_queue_tail(struct sk_buff_head *list,
  * remove sk_buff from list. _Must_ be called atomically, and with
  * the list known..
  */
-extern void	   skb_unlink(struct sk_buff *skb, struct sk_buff_head *list);
+void skb_unlink(struct sk_buff *skb, struct sk_buff_head *list);
 static inline void __skb_unlink(struct sk_buff *skb, struct sk_buff_head *list)
 {
 	struct sk_buff *next, *prev;
@@ -1270,7 +1262,7 @@ static inline void __skb_unlink(struct sk_buff *skb, struct sk_buff_head *list)
  *	so must be used with appropriate locks held only. The head item is
  *	returned or %NULL if the list is empty.
  */
-extern struct sk_buff *skb_dequeue(struct sk_buff_head *list);
+struct sk_buff *skb_dequeue(struct sk_buff_head *list);
 static inline struct sk_buff *__skb_dequeue(struct sk_buff_head *list)
 {
 	struct sk_buff *skb = skb_peek(list);
@@ -1287,7 +1279,7 @@ static inline struct sk_buff *__skb_dequeue(struct sk_buff_head *list)
  *	so must be used with appropriate locks held only. The tail item is
  *	returned or %NULL if the list is empty.
  */
-extern struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list);
+struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list);
 static inline struct sk_buff *__skb_dequeue_tail(struct sk_buff_head *list)
 {
 	struct sk_buff *skb = skb_peek_tail(list);
@@ -1373,8 +1365,8 @@ static inline void skb_fill_page_desc(struct sk_buff *skb, int i,
 	skb_shinfo(skb)->nr_frags = i + 1;
 }
 
-extern void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page,
-			    int off, int size, unsigned int truesize);
+void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off,
+		     int size, unsigned int truesize);
 
 #define SKB_PAGE_ASSERT(skb) 	BUG_ON(skb_shinfo(skb)->nr_frags)
 #define SKB_FRAG_ASSERT(skb) 	BUG_ON(skb_has_frag_list(skb))
@@ -1418,7 +1410,7 @@ static inline void skb_set_tail_pointer(struct sk_buff *skb, const int offset)
 /*
  *	Add data to an sk_buff
  */
-extern unsigned char *skb_put(struct sk_buff *skb, unsigned int len);
+unsigned char *skb_put(struct sk_buff *skb, unsigned int len);
 static inline unsigned char *__skb_put(struct sk_buff *skb, unsigned int len)
 {
 	unsigned char *tmp = skb_tail_pointer(skb);
@@ -1428,7 +1420,7 @@ static inline unsigned char *__skb_put(struct sk_buff *skb, unsigned int len)
 	return tmp;
 }
 
-extern unsigned char *skb_push(struct sk_buff *skb, unsigned int len);
+unsigned char *skb_push(struct sk_buff *skb, unsigned int len);
 static inline unsigned char *__skb_push(struct sk_buff *skb, unsigned int len)
 {
 	skb->data -= len;
@@ -1436,7 +1428,7 @@ static inline unsigned char *__skb_push(struct sk_buff *skb, unsigned int len)
 	return skb->data;
 }
 
-extern unsigned char *skb_pull(struct sk_buff *skb, unsigned int len);
+unsigned char *skb_pull(struct sk_buff *skb, unsigned int len);
 static inline unsigned char *__skb_pull(struct sk_buff *skb, unsigned int len)
 {
 	skb->len -= len;
@@ -1449,7 +1441,7 @@ static inline unsigned char *skb_pull_inline(struct sk_buff *skb, unsigned int l
 	return unlikely(len > skb->len) ? NULL : __skb_pull(skb, len);
 }
 
-extern unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta);
+unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta);
 
 static inline unsigned char *__pskb_pull(struct sk_buff *skb, unsigned int len)
 {
@@ -1753,7 +1745,7 @@ static inline int pskb_network_may_pull(struct sk_buff *skb, unsigned int len)
 #define NET_SKB_PAD	max(32, L1_CACHE_BYTES)
 #endif
 
-extern int ___pskb_trim(struct sk_buff *skb, unsigned int len);
+int ___pskb_trim(struct sk_buff *skb, unsigned int len);
 
 static inline void __skb_trim(struct sk_buff *skb, unsigned int len)
 {
@@ -1765,7 +1757,7 @@ static inline void __skb_trim(struct sk_buff *skb, unsigned int len)
 	skb_set_tail_pointer(skb, len);
 }
 
-extern void skb_trim(struct sk_buff *skb, unsigned int len);
+void skb_trim(struct sk_buff *skb, unsigned int len);
 
 static inline int __pskb_trim(struct sk_buff *skb, unsigned int len)
 {
@@ -1838,7 +1830,7 @@ static inline int skb_orphan_frags(struct sk_buff *skb, gfp_t gfp_mask)
  *	the list and one reference dropped. This function does not take the
  *	list lock and the caller must hold the relevant locks to use it.
  */
-extern void skb_queue_purge(struct sk_buff_head *list);
+void skb_queue_purge(struct sk_buff_head *list);
 static inline void __skb_queue_purge(struct sk_buff_head *list)
 {
 	struct sk_buff *skb;
@@ -1850,11 +1842,10 @@ static inline void __skb_queue_purge(struct sk_buff_head *list)
 #define NETDEV_FRAG_PAGE_MAX_SIZE  (PAGE_SIZE << NETDEV_FRAG_PAGE_MAX_ORDER)
 #define NETDEV_PAGECNT_MAX_BIAS	   NETDEV_FRAG_PAGE_MAX_SIZE
 
-extern void *netdev_alloc_frag(unsigned int fragsz);
+void *netdev_alloc_frag(unsigned int fragsz);
 
-extern struct sk_buff *__netdev_alloc_skb(struct net_device *dev,
-					  unsigned int length,
-					  gfp_t gfp_mask);
+struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int length,
+				   gfp_t gfp_mask);
 
 /**
  *	netdev_alloc_skb - allocate an skbuff for rx on a specific device
@@ -2342,60 +2333,42 @@ static inline void skb_frag_add_head(struct sk_buff *skb, struct sk_buff *frag)
 #define skb_walk_frags(skb, iter)	\
 	for (iter = skb_shinfo(skb)->frag_list; iter; iter = iter->next)
 
-extern struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned flags,
-					   int *peeked, int *off, int *err);
-extern struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags,
-					 int noblock, int *err);
-extern unsigned int    datagram_poll(struct file *file, struct socket *sock,
-				     struct poll_table_struct *wait);
-extern int	       skb_copy_datagram_iovec(const struct sk_buff *from,
-					       int offset, struct iovec *to,
-					       int size);
-extern int	       skb_copy_and_csum_datagram_iovec(struct sk_buff *skb,
-							int hlen,
-							struct iovec *iov);
-extern int	       skb_copy_datagram_from_iovec(struct sk_buff *skb,
-						    int offset,
-						    const struct iovec *from,
-						    int from_offset,
-						    int len);
-extern int	       zerocopy_sg_from_iovec(struct sk_buff *skb,
-					      const struct iovec *frm,
-					      int offset,
-					      size_t count);
-extern int	       skb_copy_datagram_const_iovec(const struct sk_buff *from,
-						     int offset,
-						     const struct iovec *to,
-						     int to_offset,
-						     int size);
-extern void	       skb_free_datagram(struct sock *sk, struct sk_buff *skb);
-extern void	       skb_free_datagram_locked(struct sock *sk,
-						struct sk_buff *skb);
-extern int	       skb_kill_datagram(struct sock *sk, struct sk_buff *skb,
-					 unsigned int flags);
-extern __wsum	       skb_checksum(const struct sk_buff *skb, int offset,
-				    int len, __wsum csum);
-extern int	       skb_copy_bits(const struct sk_buff *skb, int offset,
-				     void *to, int len);
-extern int	       skb_store_bits(struct sk_buff *skb, int offset,
-				      const void *from, int len);
-extern __wsum	       skb_copy_and_csum_bits(const struct sk_buff *skb,
-					      int offset, u8 *to, int len,
-					      __wsum csum);
-extern int             skb_splice_bits(struct sk_buff *skb,
-						unsigned int offset,
-						struct pipe_inode_info *pipe,
-						unsigned int len,
-						unsigned int flags);
-extern void	       skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to);
-extern void	       skb_split(struct sk_buff *skb,
-				 struct sk_buff *skb1, const u32 len);
-extern int	       skb_shift(struct sk_buff *tgt, struct sk_buff *skb,
-				 int shiftlen);
-extern void	       skb_scrub_packet(struct sk_buff *skb, bool xnet);
-
-extern struct sk_buff *skb_segment(struct sk_buff *skb,
-				   netdev_features_t features);
+struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned flags,
+				    int *peeked, int *off, int *err);
+struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, int noblock,
+				  int *err);
+unsigned int datagram_poll(struct file *file, struct socket *sock,
+			   struct poll_table_struct *wait);
+int skb_copy_datagram_iovec(const struct sk_buff *from, int offset,
+			    struct iovec *to, int size);
+int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb, int hlen,
+				     struct iovec *iov);
+int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset,
+				 const struct iovec *from, int from_offset,
+				 int len);
+int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *frm,
+			   int offset, size_t count);
+int skb_copy_datagram_const_iovec(const struct sk_buff *from, int offset,
+				  const struct iovec *to, int to_offset,
+				  int size);
+void skb_free_datagram(struct sock *sk, struct sk_buff *skb);
+void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb);
+int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags);
+__wsum skb_checksum(const struct sk_buff *skb, int offset, int len,
+		    __wsum csum);
+int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len);
+int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len);
+__wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to,
+			      int len, __wsum csum);
+int skb_splice_bits(struct sk_buff *skb, unsigned int offset,
+		    struct pipe_inode_info *pipe, unsigned int len,
+		    unsigned int flags);
+void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to);
+void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len);
+int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen);
+void skb_scrub_packet(struct sk_buff *skb, bool xnet);
+
+struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features);
 
 static inline void *skb_header_pointer(const struct sk_buff *skb, int offset,
 				       int len, void *buffer)
@@ -2440,7 +2413,7 @@ static inline void skb_copy_to_linear_data_offset(struct sk_buff *skb,
 	memcpy(skb->data + offset, from, len);
 }
 
-extern void skb_init(void);
+void skb_init(void);
 
 static inline ktime_t skb_get_ktime(const struct sk_buff *skb)
 {
@@ -2483,12 +2456,12 @@ static inline ktime_t net_invalid_timestamp(void)
 	return ktime_set(0, 0);
 }
 
-extern void skb_timestamping_init(void);
+void skb_timestamping_init(void);
 
 #ifdef CONFIG_NETWORK_PHY_TIMESTAMPING
 
-extern void skb_clone_tx_timestamp(struct sk_buff *skb);
-extern bool skb_defer_rx_timestamp(struct sk_buff *skb);
+void skb_clone_tx_timestamp(struct sk_buff *skb);
+bool skb_defer_rx_timestamp(struct sk_buff *skb);
 
 #else /* CONFIG_NETWORK_PHY_TIMESTAMPING */
 
@@ -2529,8 +2502,8 @@ void skb_complete_tx_timestamp(struct sk_buff *skb,
  * generates a software time stamp (otherwise), then queues the clone
  * to the error queue of the socket.  Errors are silently ignored.
  */
-extern void skb_tstamp_tx(struct sk_buff *orig_skb,
-			struct skb_shared_hwtstamps *hwtstamps);
+void skb_tstamp_tx(struct sk_buff *orig_skb,
+		   struct skb_shared_hwtstamps *hwtstamps);
 
 static inline void sw_tx_timestamp(struct sk_buff *skb)
 {
@@ -2562,8 +2535,8 @@ static inline void skb_tx_timestamp(struct sk_buff *skb)
  */
 void skb_complete_wifi_ack(struct sk_buff *skb, bool acked);
 
-extern __sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len);
-extern __sum16 __skb_checksum_complete(struct sk_buff *skb);
+__sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len);
+__sum16 __skb_checksum_complete(struct sk_buff *skb);
 
 static inline int skb_csum_unnecessary(const struct sk_buff *skb)
 {
@@ -2593,7 +2566,7 @@ static inline __sum16 skb_checksum_complete(struct sk_buff *skb)
 }
 
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
-extern void nf_conntrack_destroy(struct nf_conntrack *nfct);
+void nf_conntrack_destroy(struct nf_conntrack *nfct);
 static inline void nf_conntrack_put(struct nf_conntrack *nfct)
 {
 	if (nfct && atomic_dec_and_test(&nfct->use))
@@ -2732,9 +2705,8 @@ static inline bool skb_rx_queue_recorded(const struct sk_buff *skb)
 	return skb->queue_mapping != 0;
 }
 
-extern u16 __skb_tx_hash(const struct net_device *dev,
-			 const struct sk_buff *skb,
-			 unsigned int num_tx_queues);
+u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb,
+		  unsigned int num_tx_queues);
 
 #ifdef CONFIG_XFRM
 static inline struct sec_path *skb_sec_path(struct sk_buff *skb)
@@ -2788,7 +2760,7 @@ static inline bool skb_is_gso_v6(const struct sk_buff *skb)
 	return skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6;
 }
 
-extern void __skb_warn_lro_forwarding(const struct sk_buff *skb);
+void __skb_warn_lro_forwarding(const struct sk_buff *skb);
 
 static inline bool skb_warn_if_lro(const struct sk_buff *skb)
 {
-- 
cgit v1.2.3


From f629d208d27a22f495b7734eede585b5d207e912 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Thu, 26 Sep 2013 14:48:15 -0700
Subject: [networking]device.h: Remove extern from function prototypes

There are a mix of function prototypes with and without extern
in the kernel sources.  Standardize on not using extern for
function prototypes.

Function prototypes don't need to be written with extern.
extern is assumed by the compiler.  Its use is as unnecessary as
using auto to declare automatic/local variables in a block.

Signed-off-by: Joe Perches <joe@perches.com>
---
 include/linux/etherdevice.h |  35 ++--
 include/linux/fcdevice.h    |   2 +-
 include/linux/fddidevice.h  |   7 +-
 include/linux/hippidevice.h |  10 +-
 include/linux/inetdevice.h  |  28 +--
 include/linux/netdevice.h   | 432 +++++++++++++++++++++-----------------------
 6 files changed, 248 insertions(+), 266 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index d8b512496e50..fc4a9aa7dd82 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -28,27 +28,24 @@
 #include <asm/unaligned.h>
 
 #ifdef __KERNEL__
-extern __be16		eth_type_trans(struct sk_buff *skb, struct net_device *dev);
+__be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev);
 extern const struct header_ops eth_header_ops;
 
-extern int eth_header(struct sk_buff *skb, struct net_device *dev,
-		      unsigned short type,
-		      const void *daddr, const void *saddr, unsigned len);
-extern int eth_rebuild_header(struct sk_buff *skb);
-extern int eth_header_parse(const struct sk_buff *skb, unsigned char *haddr);
-extern int eth_header_cache(const struct neighbour *neigh, struct hh_cache *hh, __be16 type);
-extern void eth_header_cache_update(struct hh_cache *hh,
-				    const struct net_device *dev,
-				    const unsigned char *haddr);
-extern int eth_prepare_mac_addr_change(struct net_device *dev, void *p);
-extern void eth_commit_mac_addr_change(struct net_device *dev, void *p);
-extern int eth_mac_addr(struct net_device *dev, void *p);
-extern int eth_change_mtu(struct net_device *dev, int new_mtu);
-extern int eth_validate_addr(struct net_device *dev);
-
-
-
-extern struct net_device *alloc_etherdev_mqs(int sizeof_priv, unsigned int txqs,
+int eth_header(struct sk_buff *skb, struct net_device *dev, unsigned short type,
+	       const void *daddr, const void *saddr, unsigned len);
+int eth_rebuild_header(struct sk_buff *skb);
+int eth_header_parse(const struct sk_buff *skb, unsigned char *haddr);
+int eth_header_cache(const struct neighbour *neigh, struct hh_cache *hh,
+		     __be16 type);
+void eth_header_cache_update(struct hh_cache *hh, const struct net_device *dev,
+			     const unsigned char *haddr);
+int eth_prepare_mac_addr_change(struct net_device *dev, void *p);
+void eth_commit_mac_addr_change(struct net_device *dev, void *p);
+int eth_mac_addr(struct net_device *dev, void *p);
+int eth_change_mtu(struct net_device *dev, int new_mtu);
+int eth_validate_addr(struct net_device *dev);
+
+struct net_device *alloc_etherdev_mqs(int sizeof_priv, unsigned int txqs,
 					    unsigned int rxqs);
 #define alloc_etherdev(sizeof_priv) alloc_etherdev_mq(sizeof_priv, 1)
 #define alloc_etherdev_mq(sizeof_priv, count) alloc_etherdev_mqs(sizeof_priv, count, count)
diff --git a/include/linux/fcdevice.h b/include/linux/fcdevice.h
index e460ef831984..5009fa16b5d8 100644
--- a/include/linux/fcdevice.h
+++ b/include/linux/fcdevice.h
@@ -27,7 +27,7 @@
 #include <linux/if_fc.h>
 
 #ifdef __KERNEL__
-extern struct net_device *alloc_fcdev(int sizeof_priv);
+struct net_device *alloc_fcdev(int sizeof_priv);
 #endif
 
 #endif	/* _LINUX_FCDEVICE_H */
diff --git a/include/linux/fddidevice.h b/include/linux/fddidevice.h
index 155bafd9e886..9a79f0106da1 100644
--- a/include/linux/fddidevice.h
+++ b/include/linux/fddidevice.h
@@ -25,10 +25,9 @@
 #include <linux/if_fddi.h>
 
 #ifdef __KERNEL__
-extern __be16	fddi_type_trans(struct sk_buff *skb,
-				struct net_device *dev);
-extern int fddi_change_mtu(struct net_device *dev, int new_mtu);
-extern struct net_device *alloc_fddidev(int sizeof_priv);
+__be16 fddi_type_trans(struct sk_buff *skb, struct net_device *dev);
+int fddi_change_mtu(struct net_device *dev, int new_mtu);
+struct net_device *alloc_fddidev(int sizeof_priv);
 #endif
 
 #endif	/* _LINUX_FDDIDEVICE_H */
diff --git a/include/linux/hippidevice.h b/include/linux/hippidevice.h
index f148e4908410..8ec23fb0b412 100644
--- a/include/linux/hippidevice.h
+++ b/include/linux/hippidevice.h
@@ -31,11 +31,11 @@ struct hippi_cb {
 	__u32	ifield;
 };
 
-extern __be16 hippi_type_trans(struct sk_buff *skb, struct net_device *dev);
-extern int hippi_change_mtu(struct net_device *dev, int new_mtu);
-extern int hippi_mac_addr(struct net_device *dev, void *p);
-extern int hippi_neigh_setup_dev(struct net_device *dev, struct neigh_parms *p);
-extern struct net_device *alloc_hippi_dev(int sizeof_priv);
+__be16 hippi_type_trans(struct sk_buff *skb, struct net_device *dev);
+int hippi_change_mtu(struct net_device *dev, int new_mtu);
+int hippi_mac_addr(struct net_device *dev, void *p);
+int hippi_neigh_setup_dev(struct net_device *dev, struct neigh_parms *p);
+struct net_device *alloc_hippi_dev(int sizeof_priv);
 #endif
 
 #endif	/* _LINUX_HIPPIDEVICE_H */
diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h
index 79640e015a86..0d678aefe69d 100644
--- a/include/linux/inetdevice.h
+++ b/include/linux/inetdevice.h
@@ -147,25 +147,27 @@ struct in_ifaddr {
 	unsigned long		ifa_tstamp; /* updated timestamp */
 };
 
-extern int register_inetaddr_notifier(struct notifier_block *nb);
-extern int unregister_inetaddr_notifier(struct notifier_block *nb);
+int register_inetaddr_notifier(struct notifier_block *nb);
+int unregister_inetaddr_notifier(struct notifier_block *nb);
 
-extern void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
-					struct ipv4_devconf *devconf);
+void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
+				 struct ipv4_devconf *devconf);
 
-extern struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref);
+struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref);
 static inline struct net_device *ip_dev_find(struct net *net, __be32 addr)
 {
 	return __ip_dev_find(net, addr, true);
 }
 
-extern int		inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b);
-extern int		devinet_ioctl(struct net *net, unsigned int cmd, void __user *);
-extern void		devinet_init(void);
-extern struct in_device	*inetdev_by_index(struct net *, int);
-extern __be32		inet_select_addr(const struct net_device *dev, __be32 dst, int scope);
-extern __be32		inet_confirm_addr(struct in_device *in_dev, __be32 dst, __be32 local, int scope);
-extern struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix, __be32 mask);
+int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b);
+int devinet_ioctl(struct net *net, unsigned int cmd, void __user *);
+void devinet_init(void);
+struct in_device *inetdev_by_index(struct net *, int);
+__be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope);
+__be32 inet_confirm_addr(struct in_device *in_dev, __be32 dst, __be32 local,
+			 int scope);
+struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
+				    __be32 mask);
 
 static __inline__ int inet_ifa_match(__be32 addr, struct in_ifaddr *ifa)
 {
@@ -218,7 +220,7 @@ static inline struct in_device *__in_dev_get_rtnl(const struct net_device *dev)
 	return rtnl_dereference(dev->ip_ptr);
 }
 
-extern void in_dev_finish_destroy(struct in_device *idev);
+void in_dev_finish_destroy(struct in_device *idev);
 
 static inline void in_dev_put(struct in_device *idev)
 {
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index b4cfb63f264e..5f01af3927ca 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -60,8 +60,8 @@ struct wireless_dev;
 #define SET_ETHTOOL_OPS(netdev,ops) \
 	( (netdev)->ethtool_ops = (ops) )
 
-extern void netdev_set_default_ethtool_ops(struct net_device *dev,
-					   const struct ethtool_ops *ops);
+void netdev_set_default_ethtool_ops(struct net_device *dev,
+				    const struct ethtool_ops *ops);
 
 /* hardware address assignment types */
 #define NET_ADDR_PERM		0	/* address is permanent (default) */
@@ -298,7 +298,7 @@ struct netdev_boot_setup {
 };
 #define NETDEV_BOOT_SETUP_MAX 8
 
-extern int __init netdev_boot_setup(char *str);
+int __init netdev_boot_setup(char *str);
 
 /*
  * Structure for NAPI scheduling similar to tasklet but with weighting
@@ -394,7 +394,7 @@ enum rx_handler_result {
 typedef enum rx_handler_result rx_handler_result_t;
 typedef rx_handler_result_t rx_handler_func_t(struct sk_buff **pskb);
 
-extern void __napi_schedule(struct napi_struct *n);
+void __napi_schedule(struct napi_struct *n);
 
 static inline bool napi_disable_pending(struct napi_struct *n)
 {
@@ -445,8 +445,8 @@ static inline bool napi_reschedule(struct napi_struct *napi)
  *
  * Mark NAPI processing as complete.
  */
-extern void __napi_complete(struct napi_struct *n);
-extern void napi_complete(struct napi_struct *n);
+void __napi_complete(struct napi_struct *n);
+void napi_complete(struct napi_struct *n);
 
 /**
  *	napi_by_id - lookup a NAPI by napi_id
@@ -455,7 +455,7 @@ extern void napi_complete(struct napi_struct *n);
  * lookup @napi_id in napi_hash table
  * must be called under rcu_read_lock()
  */
-extern struct napi_struct *napi_by_id(unsigned int napi_id);
+struct napi_struct *napi_by_id(unsigned int napi_id);
 
 /**
  *	napi_hash_add - add a NAPI to global hashtable
@@ -463,7 +463,7 @@ extern struct napi_struct *napi_by_id(unsigned int napi_id);
  *
  * generate a new napi_id and store a @napi under it in napi_hash
  */
-extern void napi_hash_add(struct napi_struct *napi);
+void napi_hash_add(struct napi_struct *napi);
 
 /**
  *	napi_hash_del - remove a NAPI from global table
@@ -472,7 +472,7 @@ extern void napi_hash_add(struct napi_struct *napi);
  * Warning: caller must observe rcu grace period
  * before freeing memory containing @napi
  */
-extern void napi_hash_del(struct napi_struct *napi);
+void napi_hash_del(struct napi_struct *napi);
 
 /**
  *	napi_disable - prevent NAPI from scheduling
@@ -664,8 +664,8 @@ static inline void rps_reset_sock_flow(struct rps_sock_flow_table *table,
 extern struct rps_sock_flow_table __rcu *rps_sock_flow_table;
 
 #ifdef CONFIG_RFS_ACCEL
-extern bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index,
-				u32 flow_id, u16 filter_id);
+bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index, u32 flow_id,
+			 u16 filter_id);
 #endif
 
 /* This structure contains an instance of an RX queue. */
@@ -1497,9 +1497,9 @@ static inline void netdev_for_each_tx_queue(struct net_device *dev,
 		f(dev, &dev->_tx[i], arg);
 }
 
-extern struct netdev_queue *netdev_pick_tx(struct net_device *dev,
-					   struct sk_buff *skb);
-extern u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb);
+struct netdev_queue *netdev_pick_tx(struct net_device *dev,
+				    struct sk_buff *skb);
+u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb);
 
 /*
  * Net namespace inlines
@@ -1683,8 +1683,8 @@ struct packet_offload {
 #define NETDEV_CHANGEUPPER	0x0015
 #define NETDEV_RESEND_IGMP	0x0016
 
-extern int register_netdevice_notifier(struct notifier_block *nb);
-extern int unregister_netdevice_notifier(struct notifier_block *nb);
+int register_netdevice_notifier(struct notifier_block *nb);
+int unregister_netdevice_notifier(struct notifier_block *nb);
 
 struct netdev_notifier_info {
 	struct net_device *dev;
@@ -1707,9 +1707,9 @@ netdev_notifier_info_to_dev(const struct netdev_notifier_info *info)
 	return info->dev;
 }
 
-extern int call_netdevice_notifiers_info(unsigned long val, struct net_device *dev,
-					 struct netdev_notifier_info *info);
-extern int call_netdevice_notifiers(unsigned long val, struct net_device *dev);
+int call_netdevice_notifiers_info(unsigned long val, struct net_device *dev,
+				  struct netdev_notifier_info *info);
+int call_netdevice_notifiers(unsigned long val, struct net_device *dev);
 
 
 extern rwlock_t				dev_base_lock;		/* Device list lock */
@@ -1764,54 +1764,52 @@ static inline struct net_device *first_net_device_rcu(struct net *net)
 	return lh == &net->dev_base_head ? NULL : net_device_entry(lh);
 }
 
-extern int 			netdev_boot_setup_check(struct net_device *dev);
-extern unsigned long		netdev_boot_base(const char *prefix, int unit);
-extern struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type,
-					      const char *hwaddr);
-extern struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type);
-extern struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type);
-extern void		dev_add_pack(struct packet_type *pt);
-extern void		dev_remove_pack(struct packet_type *pt);
-extern void		__dev_remove_pack(struct packet_type *pt);
-extern void		dev_add_offload(struct packet_offload *po);
-extern void		dev_remove_offload(struct packet_offload *po);
-extern void		__dev_remove_offload(struct packet_offload *po);
-
-extern struct net_device	*dev_get_by_flags_rcu(struct net *net, unsigned short flags,
-						      unsigned short mask);
-extern struct net_device	*dev_get_by_name(struct net *net, const char *name);
-extern struct net_device	*dev_get_by_name_rcu(struct net *net, const char *name);
-extern struct net_device	*__dev_get_by_name(struct net *net, const char *name);
-extern int		dev_alloc_name(struct net_device *dev, const char *name);
-extern int		dev_open(struct net_device *dev);
-extern int		dev_close(struct net_device *dev);
-extern void		dev_disable_lro(struct net_device *dev);
-extern int		dev_loopback_xmit(struct sk_buff *newskb);
-extern int		dev_queue_xmit(struct sk_buff *skb);
-extern int		register_netdevice(struct net_device *dev);
-extern void		unregister_netdevice_queue(struct net_device *dev,
-						   struct list_head *head);
-extern void		unregister_netdevice_many(struct list_head *head);
+int netdev_boot_setup_check(struct net_device *dev);
+unsigned long netdev_boot_base(const char *prefix, int unit);
+struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type,
+				       const char *hwaddr);
+struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type);
+struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type);
+void dev_add_pack(struct packet_type *pt);
+void dev_remove_pack(struct packet_type *pt);
+void __dev_remove_pack(struct packet_type *pt);
+void dev_add_offload(struct packet_offload *po);
+void dev_remove_offload(struct packet_offload *po);
+void __dev_remove_offload(struct packet_offload *po);
+
+struct net_device *dev_get_by_flags_rcu(struct net *net, unsigned short flags,
+					unsigned short mask);
+struct net_device *dev_get_by_name(struct net *net, const char *name);
+struct net_device *dev_get_by_name_rcu(struct net *net, const char *name);
+struct net_device *__dev_get_by_name(struct net *net, const char *name);
+int dev_alloc_name(struct net_device *dev, const char *name);
+int dev_open(struct net_device *dev);
+int dev_close(struct net_device *dev);
+void dev_disable_lro(struct net_device *dev);
+int dev_loopback_xmit(struct sk_buff *newskb);
+int dev_queue_xmit(struct sk_buff *skb);
+int register_netdevice(struct net_device *dev);
+void unregister_netdevice_queue(struct net_device *dev, struct list_head *head);
+void unregister_netdevice_many(struct list_head *head);
 static inline void unregister_netdevice(struct net_device *dev)
 {
 	unregister_netdevice_queue(dev, NULL);
 }
 
-extern int 		netdev_refcnt_read(const struct net_device *dev);
-extern void		free_netdev(struct net_device *dev);
-extern void		synchronize_net(void);
-extern int		init_dummy_netdev(struct net_device *dev);
+int netdev_refcnt_read(const struct net_device *dev);
+void free_netdev(struct net_device *dev);
+void synchronize_net(void);
+int init_dummy_netdev(struct net_device *dev);
 
-extern struct net_device	*dev_get_by_index(struct net *net, int ifindex);
-extern struct net_device	*__dev_get_by_index(struct net *net, int ifindex);
-extern struct net_device	*dev_get_by_index_rcu(struct net *net, int ifindex);
-extern int		netdev_get_name(struct net *net, char *name, int ifindex);
-extern int		dev_restart(struct net_device *dev);
+struct net_device *dev_get_by_index(struct net *net, int ifindex);
+struct net_device *__dev_get_by_index(struct net *net, int ifindex);
+struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex);
+int netdev_get_name(struct net *net, char *name, int ifindex);
+int dev_restart(struct net_device *dev);
 #ifdef CONFIG_NETPOLL_TRAP
-extern int		netpoll_trap(void);
+int netpoll_trap(void);
 #endif
-extern int	       skb_gro_receive(struct sk_buff **head,
-				       struct sk_buff *skb);
+int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb);
 
 static inline unsigned int skb_gro_offset(const struct sk_buff *skb)
 {
@@ -1883,7 +1881,7 @@ static inline int dev_parse_header(const struct sk_buff *skb,
 }
 
 typedef int gifconf_func_t(struct net_device * dev, char __user * bufptr, int len);
-extern int		register_gifconf(unsigned int family, gifconf_func_t * gifconf);
+int register_gifconf(unsigned int family, gifconf_func_t *gifconf);
 static inline int unregister_gifconf(unsigned int family)
 {
 	return register_gifconf(family, NULL);
@@ -1954,7 +1952,7 @@ static inline void input_queue_tail_incr_save(struct softnet_data *sd,
 
 DECLARE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
 
-extern void __netif_schedule(struct Qdisc *q);
+void __netif_schedule(struct Qdisc *q);
 
 static inline void netif_schedule_queue(struct netdev_queue *txq)
 {
@@ -2274,8 +2272,8 @@ static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index)
 }
 
 #ifdef CONFIG_XPS
-extern int netif_set_xps_queue(struct net_device *dev, struct cpumask *mask,
-			       u16 index);
+int netif_set_xps_queue(struct net_device *dev, struct cpumask *mask,
+			u16 index);
 #else
 static inline int netif_set_xps_queue(struct net_device *dev,
 				      struct cpumask *mask,
@@ -2306,12 +2304,10 @@ static inline bool netif_is_multiqueue(const struct net_device *dev)
 	return dev->num_tx_queues > 1;
 }
 
-extern int netif_set_real_num_tx_queues(struct net_device *dev,
-					unsigned int txq);
+int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq);
 
 #ifdef CONFIG_RPS
-extern int netif_set_real_num_rx_queues(struct net_device *dev,
-					unsigned int rxq);
+int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq);
 #else
 static inline int netif_set_real_num_rx_queues(struct net_device *dev,
 						unsigned int rxq)
@@ -2338,28 +2334,27 @@ static inline int netif_copy_real_num_queues(struct net_device *to_dev,
 }
 
 #define DEFAULT_MAX_NUM_RSS_QUEUES	(8)
-extern int netif_get_num_default_rss_queues(void);
+int netif_get_num_default_rss_queues(void);
 
 /* Use this variant when it is known for sure that it
  * is executing from hardware interrupt context or with hardware interrupts
  * disabled.
  */
-extern void dev_kfree_skb_irq(struct sk_buff *skb);
+void dev_kfree_skb_irq(struct sk_buff *skb);
 
 /* Use this variant in places where it could be invoked
  * from either hardware interrupt or other context, with hardware interrupts
  * either disabled or enabled.
  */
-extern void dev_kfree_skb_any(struct sk_buff *skb);
+void dev_kfree_skb_any(struct sk_buff *skb);
 
-extern int		netif_rx(struct sk_buff *skb);
-extern int		netif_rx_ni(struct sk_buff *skb);
-extern int		netif_receive_skb(struct sk_buff *skb);
-extern gro_result_t	napi_gro_receive(struct napi_struct *napi,
-					 struct sk_buff *skb);
-extern void		napi_gro_flush(struct napi_struct *napi, bool flush_old);
-extern struct sk_buff *	napi_get_frags(struct napi_struct *napi);
-extern gro_result_t	napi_gro_frags(struct napi_struct *napi);
+int netif_rx(struct sk_buff *skb);
+int netif_rx_ni(struct sk_buff *skb);
+int netif_receive_skb(struct sk_buff *skb);
+gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb);
+void napi_gro_flush(struct napi_struct *napi, bool flush_old);
+struct sk_buff *napi_get_frags(struct napi_struct *napi);
+gro_result_t napi_gro_frags(struct napi_struct *napi);
 
 static inline void napi_free_frags(struct napi_struct *napi)
 {
@@ -2367,40 +2362,35 @@ static inline void napi_free_frags(struct napi_struct *napi)
 	napi->skb = NULL;
 }
 
-extern int netdev_rx_handler_register(struct net_device *dev,
-				      rx_handler_func_t *rx_handler,
-				      void *rx_handler_data);
-extern void netdev_rx_handler_unregister(struct net_device *dev);
-
-extern bool		dev_valid_name(const char *name);
-extern int		dev_ioctl(struct net *net, unsigned int cmd, void __user *);
-extern int		dev_ethtool(struct net *net, struct ifreq *);
-extern unsigned int	dev_get_flags(const struct net_device *);
-extern int		__dev_change_flags(struct net_device *, unsigned int flags);
-extern int		dev_change_flags(struct net_device *, unsigned int);
-extern void		__dev_notify_flags(struct net_device *, unsigned int old_flags);
-extern int		dev_change_name(struct net_device *, const char *);
-extern int		dev_set_alias(struct net_device *, const char *, size_t);
-extern int		dev_change_net_namespace(struct net_device *,
-						 struct net *, const char *);
-extern int		dev_set_mtu(struct net_device *, int);
-extern void		dev_set_group(struct net_device *, int);
-extern int		dev_set_mac_address(struct net_device *,
-					    struct sockaddr *);
-extern int		dev_change_carrier(struct net_device *,
-					   bool new_carrier);
-extern int		dev_get_phys_port_id(struct net_device *dev,
-					     struct netdev_phys_port_id *ppid);
-extern int		dev_hard_start_xmit(struct sk_buff *skb,
-					    struct net_device *dev,
-					    struct netdev_queue *txq);
-extern int		dev_forward_skb(struct net_device *dev,
-					struct sk_buff *skb);
+int netdev_rx_handler_register(struct net_device *dev,
+			       rx_handler_func_t *rx_handler,
+			       void *rx_handler_data);
+void netdev_rx_handler_unregister(struct net_device *dev);
+
+bool dev_valid_name(const char *name);
+int dev_ioctl(struct net *net, unsigned int cmd, void __user *);
+int dev_ethtool(struct net *net, struct ifreq *);
+unsigned int dev_get_flags(const struct net_device *);
+int __dev_change_flags(struct net_device *, unsigned int flags);
+int dev_change_flags(struct net_device *, unsigned int);
+void __dev_notify_flags(struct net_device *, unsigned int old_flags);
+int dev_change_name(struct net_device *, const char *);
+int dev_set_alias(struct net_device *, const char *, size_t);
+int dev_change_net_namespace(struct net_device *, struct net *, const char *);
+int dev_set_mtu(struct net_device *, int);
+void dev_set_group(struct net_device *, int);
+int dev_set_mac_address(struct net_device *, struct sockaddr *);
+int dev_change_carrier(struct net_device *, bool new_carrier);
+int dev_get_phys_port_id(struct net_device *dev,
+			 struct netdev_phys_port_id *ppid);
+int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
+			struct netdev_queue *txq);
+int dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
 
 extern int		netdev_budget;
 
 /* Called by rtnetlink.c:rtnl_unlock() */
-extern void netdev_run_todo(void);
+void netdev_run_todo(void);
 
 /**
  *	dev_put - release reference to device
@@ -2433,9 +2423,9 @@ static inline void dev_hold(struct net_device *dev)
  * kind of lower layer not just hardware media.
  */
 
-extern void linkwatch_init_dev(struct net_device *dev);
-extern void linkwatch_fire_event(struct net_device *dev);
-extern void linkwatch_forget_dev(struct net_device *dev);
+void linkwatch_init_dev(struct net_device *dev);
+void linkwatch_fire_event(struct net_device *dev);
+void linkwatch_forget_dev(struct net_device *dev);
 
 /**
  *	netif_carrier_ok - test if carrier present
@@ -2448,13 +2438,13 @@ static inline bool netif_carrier_ok(const struct net_device *dev)
 	return !test_bit(__LINK_STATE_NOCARRIER, &dev->state);
 }
 
-extern unsigned long dev_trans_start(struct net_device *dev);
+unsigned long dev_trans_start(struct net_device *dev);
 
-extern void __netdev_watchdog_up(struct net_device *dev);
+void __netdev_watchdog_up(struct net_device *dev);
 
-extern void netif_carrier_on(struct net_device *dev);
+void netif_carrier_on(struct net_device *dev);
 
-extern void netif_carrier_off(struct net_device *dev);
+void netif_carrier_off(struct net_device *dev);
 
 /**
  *	netif_dormant_on - mark device as dormant.
@@ -2522,9 +2512,9 @@ static inline bool netif_device_present(struct net_device *dev)
 	return test_bit(__LINK_STATE_PRESENT, &dev->state);
 }
 
-extern void netif_device_detach(struct net_device *dev);
+void netif_device_detach(struct net_device *dev);
 
-extern void netif_device_attach(struct net_device *dev);
+void netif_device_attach(struct net_device *dev);
 
 /*
  * Network interface message level settings
@@ -2733,98 +2723,93 @@ static inline void netif_addr_unlock_bh(struct net_device *dev)
 
 /* These functions live elsewhere (drivers/net/net_init.c, but related) */
 
-extern void		ether_setup(struct net_device *dev);
+void ether_setup(struct net_device *dev);
 
 /* Support for loadable net-drivers */
-extern struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
-				       void (*setup)(struct net_device *),
-				       unsigned int txqs, unsigned int rxqs);
+struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
+				    void (*setup)(struct net_device *),
+				    unsigned int txqs, unsigned int rxqs);
 #define alloc_netdev(sizeof_priv, name, setup) \
 	alloc_netdev_mqs(sizeof_priv, name, setup, 1, 1)
 
 #define alloc_netdev_mq(sizeof_priv, name, setup, count) \
 	alloc_netdev_mqs(sizeof_priv, name, setup, count, count)
 
-extern int		register_netdev(struct net_device *dev);
-extern void		unregister_netdev(struct net_device *dev);
+int register_netdev(struct net_device *dev);
+void unregister_netdev(struct net_device *dev);
 
 /* General hardware address lists handling functions */
-extern int __hw_addr_add_multiple(struct netdev_hw_addr_list *to_list,
-				  struct netdev_hw_addr_list *from_list,
-				  int addr_len, unsigned char addr_type);
-extern void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list,
-				   struct netdev_hw_addr_list *from_list,
-				   int addr_len, unsigned char addr_type);
-extern int __hw_addr_sync(struct netdev_hw_addr_list *to_list,
-			  struct netdev_hw_addr_list *from_list,
-			  int addr_len);
-extern void __hw_addr_unsync(struct netdev_hw_addr_list *to_list,
-			     struct netdev_hw_addr_list *from_list,
-			     int addr_len);
-extern void __hw_addr_flush(struct netdev_hw_addr_list *list);
-extern void __hw_addr_init(struct netdev_hw_addr_list *list);
+int __hw_addr_add_multiple(struct netdev_hw_addr_list *to_list,
+			   struct netdev_hw_addr_list *from_list,
+			   int addr_len, unsigned char addr_type);
+void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list,
+			    struct netdev_hw_addr_list *from_list,
+			    int addr_len, unsigned char addr_type);
+int __hw_addr_sync(struct netdev_hw_addr_list *to_list,
+		   struct netdev_hw_addr_list *from_list, int addr_len);
+void __hw_addr_unsync(struct netdev_hw_addr_list *to_list,
+		      struct netdev_hw_addr_list *from_list, int addr_len);
+void __hw_addr_flush(struct netdev_hw_addr_list *list);
+void __hw_addr_init(struct netdev_hw_addr_list *list);
 
 /* Functions used for device addresses handling */
-extern int dev_addr_add(struct net_device *dev, const unsigned char *addr,
-			unsigned char addr_type);
-extern int dev_addr_del(struct net_device *dev, const unsigned char *addr,
-			unsigned char addr_type);
-extern int dev_addr_add_multiple(struct net_device *to_dev,
-				 struct net_device *from_dev,
-				 unsigned char addr_type);
-extern int dev_addr_del_multiple(struct net_device *to_dev,
-				 struct net_device *from_dev,
-				 unsigned char addr_type);
-extern void dev_addr_flush(struct net_device *dev);
-extern int dev_addr_init(struct net_device *dev);
+int dev_addr_add(struct net_device *dev, const unsigned char *addr,
+		 unsigned char addr_type);
+int dev_addr_del(struct net_device *dev, const unsigned char *addr,
+		 unsigned char addr_type);
+int dev_addr_add_multiple(struct net_device *to_dev,
+			  struct net_device *from_dev, unsigned char addr_type);
+int dev_addr_del_multiple(struct net_device *to_dev,
+			  struct net_device *from_dev, unsigned char addr_type);
+void dev_addr_flush(struct net_device *dev);
+int dev_addr_init(struct net_device *dev);
 
 /* Functions used for unicast addresses handling */
-extern int dev_uc_add(struct net_device *dev, const unsigned char *addr);
-extern int dev_uc_add_excl(struct net_device *dev, const unsigned char *addr);
-extern int dev_uc_del(struct net_device *dev, const unsigned char *addr);
-extern int dev_uc_sync(struct net_device *to, struct net_device *from);
-extern int dev_uc_sync_multiple(struct net_device *to, struct net_device *from);
-extern void dev_uc_unsync(struct net_device *to, struct net_device *from);
-extern void dev_uc_flush(struct net_device *dev);
-extern void dev_uc_init(struct net_device *dev);
+int dev_uc_add(struct net_device *dev, const unsigned char *addr);
+int dev_uc_add_excl(struct net_device *dev, const unsigned char *addr);
+int dev_uc_del(struct net_device *dev, const unsigned char *addr);
+int dev_uc_sync(struct net_device *to, struct net_device *from);
+int dev_uc_sync_multiple(struct net_device *to, struct net_device *from);
+void dev_uc_unsync(struct net_device *to, struct net_device *from);
+void dev_uc_flush(struct net_device *dev);
+void dev_uc_init(struct net_device *dev);
 
 /* Functions used for multicast addresses handling */
-extern int dev_mc_add(struct net_device *dev, const unsigned char *addr);
-extern int dev_mc_add_global(struct net_device *dev, const unsigned char *addr);
-extern int dev_mc_add_excl(struct net_device *dev, const unsigned char *addr);
-extern int dev_mc_del(struct net_device *dev, const unsigned char *addr);
-extern int dev_mc_del_global(struct net_device *dev, const unsigned char *addr);
-extern int dev_mc_sync(struct net_device *to, struct net_device *from);
-extern int dev_mc_sync_multiple(struct net_device *to, struct net_device *from);
-extern void dev_mc_unsync(struct net_device *to, struct net_device *from);
-extern void dev_mc_flush(struct net_device *dev);
-extern void dev_mc_init(struct net_device *dev);
+int dev_mc_add(struct net_device *dev, const unsigned char *addr);
+int dev_mc_add_global(struct net_device *dev, const unsigned char *addr);
+int dev_mc_add_excl(struct net_device *dev, const unsigned char *addr);
+int dev_mc_del(struct net_device *dev, const unsigned char *addr);
+int dev_mc_del_global(struct net_device *dev, const unsigned char *addr);
+int dev_mc_sync(struct net_device *to, struct net_device *from);
+int dev_mc_sync_multiple(struct net_device *to, struct net_device *from);
+void dev_mc_unsync(struct net_device *to, struct net_device *from);
+void dev_mc_flush(struct net_device *dev);
+void dev_mc_init(struct net_device *dev);
 
 /* Functions used for secondary unicast and multicast support */
-extern void		dev_set_rx_mode(struct net_device *dev);
-extern void		__dev_set_rx_mode(struct net_device *dev);
-extern int		dev_set_promiscuity(struct net_device *dev, int inc);
-extern int		dev_set_allmulti(struct net_device *dev, int inc);
-extern void		netdev_state_change(struct net_device *dev);
-extern void		netdev_notify_peers(struct net_device *dev);
-extern void		netdev_features_change(struct net_device *dev);
+void dev_set_rx_mode(struct net_device *dev);
+void __dev_set_rx_mode(struct net_device *dev);
+int dev_set_promiscuity(struct net_device *dev, int inc);
+int dev_set_allmulti(struct net_device *dev, int inc);
+void netdev_state_change(struct net_device *dev);
+void netdev_notify_peers(struct net_device *dev);
+void netdev_features_change(struct net_device *dev);
 /* Load a device via the kmod */
-extern void		dev_load(struct net *net, const char *name);
-extern struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
-					       struct rtnl_link_stats64 *storage);
-extern void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
-				    const struct net_device_stats *netdev_stats);
+void dev_load(struct net *net, const char *name);
+struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
+					struct rtnl_link_stats64 *storage);
+void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
+			     const struct net_device_stats *netdev_stats);
 
 extern int		netdev_max_backlog;
 extern int		netdev_tstamp_prequeue;
 extern int		weight_p;
 extern int		bpf_jit_enable;
 
-extern bool netdev_has_upper_dev(struct net_device *dev,
-				 struct net_device *upper_dev);
-extern bool netdev_has_any_upper_dev(struct net_device *dev);
-extern struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev,
-							    struct list_head **iter);
+bool netdev_has_upper_dev(struct net_device *dev, struct net_device *upper_dev);
+bool netdev_has_any_upper_dev(struct net_device *dev);
+struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev,
+						     struct list_head **iter);
 
 /* iterate through upper list, must be called under RCU read lock */
 #define netdev_for_each_all_upper_dev_rcu(dev, updev, iter) \
@@ -2833,10 +2818,10 @@ extern struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *d
 	     updev; \
 	     updev = netdev_all_upper_get_next_dev_rcu(dev, &(iter)))
 
-extern void *netdev_lower_get_next_private(struct net_device *dev,
-					   struct list_head **iter);
-extern void *netdev_lower_get_next_private_rcu(struct net_device *dev,
-					       struct list_head **iter);
+void *netdev_lower_get_next_private(struct net_device *dev,
+				    struct list_head **iter);
+void *netdev_lower_get_next_private_rcu(struct net_device *dev,
+					struct list_head **iter);
 
 #define netdev_for_each_lower_private(dev, priv, iter) \
 	for (iter = (dev)->adj_list.lower.next, \
@@ -2850,27 +2835,26 @@ extern void *netdev_lower_get_next_private_rcu(struct net_device *dev,
 	     priv; \
 	     priv = netdev_lower_get_next_private_rcu(dev, &(iter)))
 
-extern void *netdev_adjacent_get_private(struct list_head *adj_list);
-extern struct net_device *netdev_master_upper_dev_get(struct net_device *dev);
-extern struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev);
-extern int netdev_upper_dev_link(struct net_device *dev,
+void *netdev_adjacent_get_private(struct list_head *adj_list);
+struct net_device *netdev_master_upper_dev_get(struct net_device *dev);
+struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev);
+int netdev_upper_dev_link(struct net_device *dev, struct net_device *upper_dev);
+int netdev_master_upper_dev_link(struct net_device *dev,
 				 struct net_device *upper_dev);
-extern int netdev_master_upper_dev_link(struct net_device *dev,
-					struct net_device *upper_dev);
-extern int netdev_master_upper_dev_link_private(struct net_device *dev,
-						struct net_device *upper_dev,
-						void *private);
-extern void netdev_upper_dev_unlink(struct net_device *dev,
-				    struct net_device *upper_dev);
-extern void *netdev_lower_dev_get_private_rcu(struct net_device *dev,
-					      struct net_device *lower_dev);
-extern void *netdev_lower_dev_get_private(struct net_device *dev,
-					  struct net_device *lower_dev);
-extern int skb_checksum_help(struct sk_buff *skb);
-extern struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
-	netdev_features_t features, bool tx_path);
-extern struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb,
-					  netdev_features_t features);
+int netdev_master_upper_dev_link_private(struct net_device *dev,
+					 struct net_device *upper_dev,
+					 void *private);
+void netdev_upper_dev_unlink(struct net_device *dev,
+			     struct net_device *upper_dev);
+void *netdev_lower_dev_get_private_rcu(struct net_device *dev,
+				       struct net_device *lower_dev);
+void *netdev_lower_dev_get_private(struct net_device *dev,
+				   struct net_device *lower_dev);
+int skb_checksum_help(struct sk_buff *skb);
+struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
+				  netdev_features_t features, bool tx_path);
+struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb,
+				    netdev_features_t features);
 
 static inline
 struct sk_buff *skb_gso_segment(struct sk_buff *skb, netdev_features_t features)
@@ -2892,30 +2876,30 @@ static inline bool can_checksum_protocol(netdev_features_t features,
 }
 
 #ifdef CONFIG_BUG
-extern void netdev_rx_csum_fault(struct net_device *dev);
+void netdev_rx_csum_fault(struct net_device *dev);
 #else
 static inline void netdev_rx_csum_fault(struct net_device *dev)
 {
 }
 #endif
 /* rx skb timestamps */
-extern void		net_enable_timestamp(void);
-extern void		net_disable_timestamp(void);
+void net_enable_timestamp(void);
+void net_disable_timestamp(void);
 
 #ifdef CONFIG_PROC_FS
-extern int __init dev_proc_init(void);
+int __init dev_proc_init(void);
 #else
 #define dev_proc_init() 0
 #endif
 
-extern int netdev_class_create_file(struct class_attribute *class_attr);
-extern void netdev_class_remove_file(struct class_attribute *class_attr);
+int netdev_class_create_file(struct class_attribute *class_attr);
+void netdev_class_remove_file(struct class_attribute *class_attr);
 
 extern struct kobj_ns_type_operations net_ns_type_operations;
 
-extern const char *netdev_drivername(const struct net_device *dev);
+const char *netdev_drivername(const struct net_device *dev);
 
-extern void linkwatch_run_queue(void);
+void linkwatch_run_queue(void);
 
 static inline netdev_features_t netdev_get_wanted_features(
 	struct net_device *dev)
@@ -3007,22 +2991,22 @@ static inline const char *netdev_name(const struct net_device *dev)
 	return dev->name;
 }
 
-extern __printf(3, 4)
+__printf(3, 4)
 int netdev_printk(const char *level, const struct net_device *dev,
 		  const char *format, ...);
-extern __printf(2, 3)
+__printf(2, 3)
 int netdev_emerg(const struct net_device *dev, const char *format, ...);
-extern __printf(2, 3)
+__printf(2, 3)
 int netdev_alert(const struct net_device *dev, const char *format, ...);
-extern __printf(2, 3)
+__printf(2, 3)
 int netdev_crit(const struct net_device *dev, const char *format, ...);
-extern __printf(2, 3)
+__printf(2, 3)
 int netdev_err(const struct net_device *dev, const char *format, ...);
-extern __printf(2, 3)
+__printf(2, 3)
 int netdev_warn(const struct net_device *dev, const char *format, ...);
-extern __printf(2, 3)
+__printf(2, 3)
 int netdev_notice(const struct net_device *dev, const char *format, ...);
-extern __printf(2, 3)
+__printf(2, 3)
 int netdev_info(const struct net_device *dev, const char *format, ...);
 
 #define MODULE_ALIAS_NETDEV(device) \
-- 
cgit v1.2.3


From e34ff4906199d2ebd248ae897ae34f52bea151c9 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 11 Sep 2013 22:29:05 -0400
Subject: sysfs: remove ktype->namespace() invocations in directory code

For some unrecognizable reason, namespace information is communicated
to sysfs through ktype->namespace() callback when there's *nothing*
which needs the use of a callback.  The whole sequence of operations
is completely synchronous and sysfs operations simply end up calling
back into the layer which just invoked it in order to find out the
namespace information, which is completely backwards, obfuscates
what's going on and unnecessarily tangles two separate layers.

This patch doesn't remove ktype->namespace() but shifts its handling
to kobject layer.  We probably want to get rid of the callback in the
long term.

This patch adds an explicit param to sysfs_{create|rename|move}_dir()
and renames them to sysfs_{create|rename|move}_dir_ns(), respectively.
ktype->namespace() invocations are moved to the calling sites of the
above functions.  A new helper kboject_namespace() is introduced which
directly tests kobj_ns_type_operations->type which should give the
same result as testing sysfs_fs_type(parent_sd) and returns @kobj's
namespace tag as necessary.  kobject_namespace() is extern as it will
be used from another file in the following patches.

This patch should be an equivalent conversion without any functional
difference.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Kay Sievers <kay@vrfy.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/sysfs/dir.c          | 23 ++++++++---------------
 include/linux/kobject.h |  1 +
 include/linux/sysfs.h   | 20 ++++++++++++--------
 lib/kobject.c           | 28 ++++++++++++++++++++++++----
 4 files changed, 45 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 834c64cb7f88..878ac3afe1b8 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -730,14 +730,14 @@ static enum kobj_ns_type sysfs_read_ns_type(struct kobject *kobj)
 }
 
 /**
- *	sysfs_create_dir - create a directory for an object.
- *	@kobj:		object we're creating directory for.
+ * sysfs_create_dir_ns - create a directory for an object with a namespace tag
+ * @kobj: object we're creating directory for
+ * @ns: the namespace tag to use
  */
-int sysfs_create_dir(struct kobject *kobj)
+int sysfs_create_dir_ns(struct kobject *kobj, const void *ns)
 {
 	enum kobj_ns_type type;
 	struct sysfs_dirent *parent_sd, *sd;
-	const void *ns = NULL;
 	int error = 0;
 
 	BUG_ON(!kobj);
@@ -750,8 +750,6 @@ int sysfs_create_dir(struct kobject *kobj)
 	if (!parent_sd)
 		return -ENOENT;
 
-	if (sysfs_ns_type(parent_sd))
-		ns = kobj->ktype->namespace(kobj);
 	type = sysfs_read_ns_type(kobj);
 
 	error = create_dir(kobj, parent_sd, type, ns, kobject_name(kobj), &sd);
@@ -909,26 +907,21 @@ int sysfs_rename(struct sysfs_dirent *sd,
 	return error;
 }
 
-int sysfs_rename_dir(struct kobject *kobj, const char *new_name)
+int sysfs_rename_dir_ns(struct kobject *kobj, const char *new_name,
+			const void *new_ns)
 {
 	struct sysfs_dirent *parent_sd = kobj->sd->s_parent;
-	const void *new_ns = NULL;
-
-	if (sysfs_ns_type(parent_sd))
-		new_ns = kobj->ktype->namespace(kobj);
 
 	return sysfs_rename(kobj->sd, parent_sd, new_ns, new_name);
 }
 
-int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent_kobj)
+int sysfs_move_dir_ns(struct kobject *kobj, struct kobject *new_parent_kobj,
+		      const void *new_ns)
 {
 	struct sysfs_dirent *sd = kobj->sd;
 	struct sysfs_dirent *new_parent_sd;
-	const void *new_ns = NULL;
 
 	BUG_ON(!sd->s_parent);
-	if (sysfs_ns_type(sd->s_parent))
-		new_ns = kobj->ktype->namespace(kobj);
 	new_parent_sd = new_parent_kobj && new_parent_kobj->sd ?
 		new_parent_kobj->sd : &sysfs_root;
 
diff --git a/include/linux/kobject.h b/include/linux/kobject.h
index de6dcbcc6ef7..e7ba650086ce 100644
--- a/include/linux/kobject.h
+++ b/include/linux/kobject.h
@@ -107,6 +107,7 @@ extern int __must_check kobject_move(struct kobject *, struct kobject *);
 extern struct kobject *kobject_get(struct kobject *kobj);
 extern void kobject_put(struct kobject *kobj);
 
+extern const void *kobject_namespace(struct kobject *kobj);
 extern char *kobject_get_path(struct kobject *kobj, gfp_t flag);
 
 struct kobj_type {
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index 82f7fac78e77..7f56bad3be82 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -182,11 +182,13 @@ struct sysfs_dirent;
 int sysfs_schedule_callback(struct kobject *kobj, void (*func)(void *),
 			    void *data, struct module *owner);
 
-int __must_check sysfs_create_dir(struct kobject *kobj);
+int __must_check sysfs_create_dir_ns(struct kobject *kobj, const void *ns);
 void sysfs_remove_dir(struct kobject *kobj);
-int __must_check sysfs_rename_dir(struct kobject *kobj, const char *new_name);
-int __must_check sysfs_move_dir(struct kobject *kobj,
-				struct kobject *new_parent_kobj);
+int __must_check sysfs_rename_dir_ns(struct kobject *kobj, const char *new_name,
+				     const void *new_ns);
+int __must_check sysfs_move_dir_ns(struct kobject *kobj,
+				   struct kobject *new_parent_kobj,
+				   const void *new_ns);
 
 int __must_check sysfs_create_file_ns(struct kobject *kobj,
 				      const struct attribute *attr,
@@ -258,7 +260,7 @@ static inline int sysfs_schedule_callback(struct kobject *kobj,
 	return -ENOSYS;
 }
 
-static inline int sysfs_create_dir(struct kobject *kobj)
+static inline int sysfs_create_dir_ns(struct kobject *kobj, const void *ns)
 {
 	return 0;
 }
@@ -267,13 +269,15 @@ static inline void sysfs_remove_dir(struct kobject *kobj)
 {
 }
 
-static inline int sysfs_rename_dir(struct kobject *kobj, const char *new_name)
+static inline int sysfs_rename_dir_ns(struct kobject *kobj,
+				      const char *new_name, const void *new_ns)
 {
 	return 0;
 }
 
-static inline int sysfs_move_dir(struct kobject *kobj,
-				 struct kobject *new_parent_kobj)
+static inline int sysfs_move_dir_ns(struct kobject *kobj,
+				    struct kobject *new_parent_kobj,
+				    const void *new_ns)
 {
 	return 0;
 }
diff --git a/lib/kobject.c b/lib/kobject.c
index 962175134702..85fb3a161b21 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -18,6 +18,24 @@
 #include <linux/stat.h>
 #include <linux/slab.h>
 
+/**
+ * kobject_namespace - return @kobj's namespace tag
+ * @kobj: kobject in question
+ *
+ * Returns namespace tag of @kobj if its parent has namespace ops enabled
+ * and thus @kobj should have a namespace tag associated with it.  Returns
+ * %NULL otherwise.
+ */
+const void *kobject_namespace(struct kobject *kobj)
+{
+	const struct kobj_ns_type_operations *ns_ops = kobj_ns_ops(kobj);
+
+	if (!ns_ops || ns_ops->type == KOBJ_NS_TYPE_NONE)
+		return NULL;
+
+	return kobj->ktype->namespace(kobj);
+}
+
 /*
  * populate_dir - populate directory with attributes.
  * @kobj: object we're working on.
@@ -46,8 +64,9 @@ static int populate_dir(struct kobject *kobj)
 
 static int create_dir(struct kobject *kobj)
 {
-	int error = 0;
-	error = sysfs_create_dir(kobj);
+	int error;
+
+	error = sysfs_create_dir_ns(kobj, kobject_namespace(kobj));
 	if (!error) {
 		error = populate_dir(kobj);
 		if (error)
@@ -428,7 +447,7 @@ int kobject_rename(struct kobject *kobj, const char *new_name)
 		goto out;
 	}
 
-	error = sysfs_rename_dir(kobj, new_name);
+	error = sysfs_rename_dir_ns(kobj, new_name, kobject_namespace(kobj));
 	if (error)
 		goto out;
 
@@ -472,6 +491,7 @@ int kobject_move(struct kobject *kobj, struct kobject *new_parent)
 		if (kobj->kset)
 			new_parent = kobject_get(&kobj->kset->kobj);
 	}
+
 	/* old object path */
 	devpath = kobject_get_path(kobj, GFP_KERNEL);
 	if (!devpath) {
@@ -486,7 +506,7 @@ int kobject_move(struct kobject *kobj, struct kobject *new_parent)
 	sprintf(devpath_string, "DEVPATH_OLD=%s", devpath);
 	envp[0] = devpath_string;
 	envp[1] = NULL;
-	error = sysfs_move_dir(kobj, new_parent);
+	error = sysfs_move_dir_ns(kobj, new_parent, kobject_namespace(kobj));
 	if (error)
 		goto out;
 	old_parent = kobj->parent;
-- 
cgit v1.2.3


From 4b30ee58ee64c64f59fd876e4afa6ed82caef3a4 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 11 Sep 2013 22:29:06 -0400
Subject: sysfs: remove ktype->namespace() invocations in symlink code

There's no reason for sysfs to be calling ktype->namespace().  It is
backwards, obfuscates what's going on and unnecessarily tangles two
separate layers.

There are two places where symlink code calls ktype->namespace().

* sysfs_do_create_link_sd() calls it to find out the namespace tag of
  the target directory.  Unless symlinking races with cross-namespace
  renaming, this equals @target_sd->s_ns.

* sysfs_rename_link() uses it to find out the new namespace to rename
  to and the new namespace can be different from the existing one.
  The function is renamed to sysfs_rename_link_ns() with an explicit
  @ns argument and the ktype->namespace() invocation is shifted to the
  device layer.

While this patch replaces ktype->namespace() invocation with the
recorded result in @target_sd, this shouldn't result in any behvior
difference.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Kay Sievers <kay@vrfy.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/core.c   |  8 +++++---
 fs/sysfs/symlink.c    | 16 +++++++---------
 include/linux/sysfs.h | 16 ++++++++++++----
 3 files changed, 24 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/core.c b/drivers/base/core.c
index c7cfadcf6752..3335000be2dc 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -1881,6 +1881,7 @@ EXPORT_SYMBOL_GPL(device_destroy);
  */
 int device_rename(struct device *dev, const char *new_name)
 {
+	struct kobject *kobj = &dev->kobj;
 	char *old_device_name = NULL;
 	int error;
 
@@ -1898,13 +1899,14 @@ int device_rename(struct device *dev, const char *new_name)
 	}
 
 	if (dev->class) {
-		error = sysfs_rename_link(&dev->class->p->subsys.kobj,
-			&dev->kobj, old_device_name, new_name);
+		error = sysfs_rename_link_ns(&dev->class->p->subsys.kobj,
+					     kobj, old_device_name,
+					     new_name, kobject_namespace(kobj));
 		if (error)
 			goto out;
 	}
 
-	error = kobject_rename(&dev->kobj, new_name);
+	error = kobject_rename(kobj, new_name);
 	if (error)
 		goto out;
 
diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c
index 2dd4507d9edd..12d58ada3e6d 100644
--- a/fs/sysfs/symlink.c
+++ b/fs/sysfs/symlink.c
@@ -52,7 +52,7 @@ static int sysfs_do_create_link_sd(struct sysfs_dirent *parent_sd,
 
 	ns_type = sysfs_ns_type(parent_sd);
 	if (ns_type)
-		sd->s_ns = target->ktype->namespace(target);
+		sd->s_ns = target_sd->s_ns;
 	sd->s_symlink.target_sd = target_sd;
 	target_sd = NULL;	/* reference is now owned by the symlink */
 
@@ -181,19 +181,20 @@ void sysfs_remove_link(struct kobject *kobj, const char *name)
 EXPORT_SYMBOL_GPL(sysfs_remove_link);
 
 /**
- *	sysfs_rename_link - rename symlink in object's directory.
+ *	sysfs_rename_link_ns - rename symlink in object's directory.
  *	@kobj:	object we're acting for.
  *	@targ:	object we're pointing to.
  *	@old:	previous name of the symlink.
  *	@new:	new name of the symlink.
+ *	@new_ns: new namespace of the symlink.
  *
  *	A helper function for the common rename symlink idiom.
  */
-int sysfs_rename_link(struct kobject *kobj, struct kobject *targ,
-			const char *old, const char *new)
+int sysfs_rename_link_ns(struct kobject *kobj, struct kobject *targ,
+			 const char *old, const char *new, const void *new_ns)
 {
 	struct sysfs_dirent *parent_sd, *sd = NULL;
-	const void *old_ns = NULL, *new_ns = NULL;
+	const void *old_ns = NULL;
 	int result;
 
 	if (!kobj)
@@ -215,16 +216,13 @@ int sysfs_rename_link(struct kobject *kobj, struct kobject *targ,
 	if (sd->s_symlink.target_sd->s_dir.kobj != targ)
 		goto out;
 
-	if (sysfs_ns_type(parent_sd))
-		new_ns = targ->ktype->namespace(targ);
-
 	result = sysfs_rename(sd, parent_sd, new_ns, new);
 
 out:
 	sysfs_put(sd);
 	return result;
 }
-EXPORT_SYMBOL_GPL(sysfs_rename_link);
+EXPORT_SYMBOL_GPL(sysfs_rename_link_ns);
 
 static int sysfs_get_target_path(struct sysfs_dirent *parent_sd,
 				 struct sysfs_dirent *target_sd, char *path)
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index 7f56bad3be82..c792f73ac7fa 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -213,8 +213,9 @@ int __must_check sysfs_create_link_nowarn(struct kobject *kobj,
 					  const char *name);
 void sysfs_remove_link(struct kobject *kobj, const char *name);
 
-int sysfs_rename_link(struct kobject *kobj, struct kobject *target,
-			const char *old_name, const char *new_name);
+int sysfs_rename_link_ns(struct kobject *kobj, struct kobject *target,
+			 const char *old_name, const char *new_name,
+			 const void *new_ns);
 
 void sysfs_delete_link(struct kobject *dir, struct kobject *targ,
 			const char *name);
@@ -340,8 +341,9 @@ static inline void sysfs_remove_link(struct kobject *kobj, const char *name)
 {
 }
 
-static inline int sysfs_rename_link(struct kobject *k, struct kobject *t,
-				    const char *old_name, const char *new_name)
+static inline int sysfs_rename_link_ns(struct kobject *k, struct kobject *t,
+				       const char *old_name,
+				       const char *new_name, const void *ns)
 {
 	return 0;
 }
@@ -454,4 +456,10 @@ static inline void sysfs_remove_file(struct kobject *kobj,
 	return sysfs_remove_file_ns(kobj, attr, NULL);
 }
 
+static inline int sysfs_rename_link(struct kobject *kobj, struct kobject *target,
+				    const char *old_name, const char *new_name)
+{
+	return sysfs_rename_link_ns(kobj, target, old_name, new_name, NULL);
+}
+
 #endif /* _SYSFS_H_ */
-- 
cgit v1.2.3


From 388975cccaaf11abd47525f664c76891c440481a Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 11 Sep 2013 23:19:13 -0400
Subject: sysfs: clean up sysfs_get_dirent()

The pre-existing sysfs interfaces which take explicit namespace
argument are weird in that they place the optional @ns in front of
@name which is contrary to the established convention.  For example,
we end up forcing vast majority of sysfs_get_dirent() users to do
sysfs_get_dirent(parent, NULL, name), which is silly and error-prone
especially as @ns and @name may be interchanged without causing
compilation warning.

This renames sysfs_get_dirent() to sysfs_get_dirent_ns() and swap the
positions of @name and @ns, and sysfs_get_dirent() is now a wrapper
around sysfs_get_dirent_ns().  This makes confusions a lot less
likely.

There are other interfaces which take @ns before @name.  They'll be
updated by following patches.

This patch doesn't introduce any functional changes.

v2: EXPORT_SYMBOL_GPL() wasn't updated leading to undefined symbol
    error on module builds.  Reported by build test robot.  Fixed.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Kay Sievers <kay@vrfy.org>
Cc: Fengguang Wu <fengguang.wu@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpio/gpiolib.c |  2 +-
 drivers/md/bitmap.c    |  4 ++--
 drivers/md/md.c        |  2 +-
 drivers/md/md.h        |  2 +-
 fs/sysfs/dir.c         | 11 ++++++-----
 fs/sysfs/file.c        |  4 ++--
 fs/sysfs/group.c       | 10 +++++-----
 fs/sysfs/symlink.c     |  2 +-
 fs/sysfs/sysfs.h       |  3 ---
 include/linux/sysfs.h  | 19 ++++++++++++-------
 10 files changed, 31 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 86ef3461ec06..a094356020a6 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -408,7 +408,7 @@ static int gpio_setup_irq(struct gpio_desc *desc, struct device *dev,
 			IRQF_TRIGGER_FALLING : IRQF_TRIGGER_RISING;
 
 	if (!value_sd) {
-		value_sd = sysfs_get_dirent(dev->kobj.sd, NULL, "value");
+		value_sd = sysfs_get_dirent(dev->kobj.sd, "value");
 		if (!value_sd) {
 			ret = -ENODEV;
 			goto err_out;
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index a7fd82133b12..12dc29ba7399 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -1654,9 +1654,9 @@ int bitmap_create(struct mddev *mddev)
 	bitmap->mddev = mddev;
 
 	if (mddev->kobj.sd)
-		bm = sysfs_get_dirent(mddev->kobj.sd, NULL, "bitmap");
+		bm = sysfs_get_dirent(mddev->kobj.sd, "bitmap");
 	if (bm) {
-		bitmap->sysfs_can_clear = sysfs_get_dirent(bm, NULL, "can_clear");
+		bitmap->sysfs_can_clear = sysfs_get_dirent(bm, "can_clear");
 		sysfs_put(bm);
 	} else
 		bitmap->sysfs_can_clear = NULL;
diff --git a/drivers/md/md.c b/drivers/md/md.c
index adf4d7e1d5e1..8a0d7625681c 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -3555,7 +3555,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
 			printk(KERN_WARNING
 			       "md: cannot register extra attributes for %s\n",
 			       mdname(mddev));
-		mddev->sysfs_action = sysfs_get_dirent(mddev->kobj.sd, NULL, "sync_action");
+		mddev->sysfs_action = sysfs_get_dirent(mddev->kobj.sd, "sync_action");
 	}		
 	if (mddev->pers->sync_request != NULL &&
 	    pers->sync_request == NULL) {
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 608050c43f17..b0051f2fbc0c 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -501,7 +501,7 @@ extern struct attribute_group md_bitmap_group;
 static inline struct sysfs_dirent *sysfs_get_dirent_safe(struct sysfs_dirent *sd, char *name)
 {
 	if (sd)
-		return sysfs_get_dirent(sd, NULL, name);
+		return sysfs_get_dirent(sd, name);
 	return sd;
 }
 static inline void sysfs_notify_dirent_safe(struct sysfs_dirent *sd)
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 1dfb4aaf9446..fee19d16e4a2 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -630,9 +630,10 @@ struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd,
 }
 
 /**
- *	sysfs_get_dirent - find and get sysfs_dirent with the given name
+ *	sysfs_get_dirent_ns - find and get sysfs_dirent with the given name
  *	@parent_sd: sysfs_dirent to search under
  *	@name: name to look for
+ *	@ns: the namespace tag to use
  *
  *	Look for sysfs_dirent with name @name under @parent_sd and get
  *	it if found.
@@ -643,9 +644,9 @@ struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd,
  *	RETURNS:
  *	Pointer to sysfs_dirent if found, NULL if not.
  */
-struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd,
-				      const void *ns,
-				      const unsigned char *name)
+struct sysfs_dirent *sysfs_get_dirent_ns(struct sysfs_dirent *parent_sd,
+					 const unsigned char *name,
+					 const void *ns)
 {
 	struct sysfs_dirent *sd;
 
@@ -656,7 +657,7 @@ struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd,
 
 	return sd;
 }
-EXPORT_SYMBOL_GPL(sysfs_get_dirent);
+EXPORT_SYMBOL_GPL(sysfs_get_dirent_ns);
 
 static int create_dir(struct kobject *kobj, struct sysfs_dirent *parent_sd,
 	const void *ns, const char *name, struct sysfs_dirent **p_sd)
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index e784340f1599..0f3214a70985 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -563,7 +563,7 @@ int sysfs_add_file_to_group(struct kobject *kobj,
 	int error;
 
 	if (group)
-		dir_sd = sysfs_get_dirent(kobj->sd, NULL, group);
+		dir_sd = sysfs_get_dirent(kobj->sd, group);
 	else
 		dir_sd = sysfs_get(kobj->sd);
 
@@ -645,7 +645,7 @@ void sysfs_remove_file_from_group(struct kobject *kobj,
 	struct sysfs_dirent *dir_sd;
 
 	if (group)
-		dir_sd = sysfs_get_dirent(kobj->sd, NULL, group);
+		dir_sd = sysfs_get_dirent(kobj->sd, group);
 	else
 		dir_sd = sysfs_get(kobj->sd);
 	if (dir_sd) {
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index 25c78f23dae8..21102158ca33 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -207,7 +207,7 @@ void sysfs_remove_group(struct kobject *kobj,
 	struct sysfs_dirent *sd;
 
 	if (grp->name) {
-		sd = sysfs_get_dirent(dir_sd, NULL, grp->name);
+		sd = sysfs_get_dirent(dir_sd, grp->name);
 		if (!sd) {
 			WARN(!sd, KERN_WARNING
 			     "sysfs group %p not found for kobject '%s'\n",
@@ -262,7 +262,7 @@ int sysfs_merge_group(struct kobject *kobj,
 	struct attribute *const *attr;
 	int i;
 
-	dir_sd = sysfs_get_dirent(kobj->sd, NULL, grp->name);
+	dir_sd = sysfs_get_dirent(kobj->sd, grp->name);
 	if (!dir_sd)
 		return -ENOENT;
 
@@ -289,7 +289,7 @@ void sysfs_unmerge_group(struct kobject *kobj,
 	struct sysfs_dirent *dir_sd;
 	struct attribute *const *attr;
 
-	dir_sd = sysfs_get_dirent(kobj->sd, NULL, grp->name);
+	dir_sd = sysfs_get_dirent(kobj->sd, grp->name);
 	if (dir_sd) {
 		for (attr = grp->attrs; *attr; ++attr)
 			sysfs_hash_and_remove(dir_sd, NULL, (*attr)->name);
@@ -311,7 +311,7 @@ int sysfs_add_link_to_group(struct kobject *kobj, const char *group_name,
 	struct sysfs_dirent *dir_sd;
 	int error = 0;
 
-	dir_sd = sysfs_get_dirent(kobj->sd, NULL, group_name);
+	dir_sd = sysfs_get_dirent(kobj->sd, group_name);
 	if (!dir_sd)
 		return -ENOENT;
 
@@ -333,7 +333,7 @@ void sysfs_remove_link_from_group(struct kobject *kobj, const char *group_name,
 {
 	struct sysfs_dirent *dir_sd;
 
-	dir_sd = sysfs_get_dirent(kobj->sd, NULL, group_name);
+	dir_sd = sysfs_get_dirent(kobj->sd, group_name);
 	if (dir_sd) {
 		sysfs_hash_and_remove(dir_sd, NULL, link_name);
 		sysfs_put(dir_sd);
diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c
index 7d981ce2e87f..c96b31a16485 100644
--- a/fs/sysfs/symlink.c
+++ b/fs/sysfs/symlink.c
@@ -191,7 +191,7 @@ int sysfs_rename_link_ns(struct kobject *kobj, struct kobject *targ,
 		old_ns = targ->sd->s_ns;
 
 	result = -ENOENT;
-	sd = sysfs_get_dirent(parent_sd, old_ns, old);
+	sd = sysfs_get_dirent_ns(parent_sd, old, old_ns);
 	if (!sd)
 		goto out;
 
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index 7664d1b3d594..6faacafda777 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -164,9 +164,6 @@ void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt);
 struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd,
 				       const void *ns,
 				       const unsigned char *name);
-struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd,
-				      const void *ns,
-				      const unsigned char *name);
 struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type);
 
 void release_sysfs_dirent(struct sysfs_dirent *sd);
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index c792f73ac7fa..6695040a0317 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -245,9 +245,9 @@ void sysfs_remove_link_from_group(struct kobject *kobj, const char *group_name,
 
 void sysfs_notify(struct kobject *kobj, const char *dir, const char *attr);
 void sysfs_notify_dirent(struct sysfs_dirent *sd);
-struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd,
-				      const void *ns,
-				      const unsigned char *name);
+struct sysfs_dirent *sysfs_get_dirent_ns(struct sysfs_dirent *parent_sd,
+					 const unsigned char *name,
+					 const void *ns);
 struct sysfs_dirent *sysfs_get(struct sysfs_dirent *sd);
 void sysfs_put(struct sysfs_dirent *sd);
 
@@ -422,10 +422,9 @@ static inline void sysfs_notify(struct kobject *kobj, const char *dir,
 static inline void sysfs_notify_dirent(struct sysfs_dirent *sd)
 {
 }
-static inline
-struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd,
-				      const void *ns,
-				      const unsigned char *name)
+static inline struct sysfs_dirent *
+sysfs_get_dirent_ns(struct sysfs_dirent *parent_sd, const unsigned char *name,
+		    const void *ns)
 {
 	return NULL;
 }
@@ -462,4 +461,10 @@ static inline int sysfs_rename_link(struct kobject *kobj, struct kobject *target
 	return sysfs_rename_link_ns(kobj, target, old_name, new_name, NULL);
 }
 
+static inline struct sysfs_dirent *
+sysfs_get_dirent(struct sysfs_dirent *parent_sd, const unsigned char *name)
+{
+	return sysfs_get_dirent_ns(parent_sd, name, NULL);
+}
+
 #endif /* _SYSFS_H_ */
-- 
cgit v1.2.3


From eee031649707db3c9920d9498f8d03819b74fc23 Mon Sep 17 00:00:00 2001
From: Jeff Mahoney <jeffm@suse.com>
Date: Wed, 11 Sep 2013 13:00:30 -0400
Subject: kobject: introduce kobj_completion

A common way to handle kobject lifetimes in embedded in objects with
different lifetime rules is to pair the kobject with a struct completion.

This introduces a kobj_completion structure that can be used in place
of the pairing, along with several convenience functions for
initialization, release, and put-and-wait.

Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/kobj_completion.h | 18 +++++++++++++++
 lib/kobject.c                   | 50 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 68 insertions(+)
 create mode 100644 include/linux/kobj_completion.h

(limited to 'include/linux')

diff --git a/include/linux/kobj_completion.h b/include/linux/kobj_completion.h
new file mode 100644
index 000000000000..a428f6436063
--- /dev/null
+++ b/include/linux/kobj_completion.h
@@ -0,0 +1,18 @@
+#ifndef _KOBJ_COMPLETION_H_
+#define _KOBJ_COMPLETION_H_
+
+#include <linux/kobject.h>
+#include <linux/completion.h>
+
+struct kobj_completion {
+	struct kobject kc_kobj;
+	struct completion kc_unregister;
+};
+
+#define kobj_to_kobj_completion(kobj) \
+	container_of(kobj, struct kobj_completion, kc_kobj)
+
+void kobj_completion_init(struct kobj_completion *kc, struct kobj_type *ktype);
+void kobj_completion_release(struct kobject *kobj);
+void kobj_completion_del_and_wait(struct kobj_completion *kc);
+#endif /* _KOBJ_COMPLETION_H_ */
diff --git a/lib/kobject.c b/lib/kobject.c
index e769ee3c2fb9..a5a9b13b0648 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -13,6 +13,7 @@
  */
 
 #include <linux/kobject.h>
+#include <linux/kobj_completion.h>
 #include <linux/string.h>
 #include <linux/export.h>
 #include <linux/stat.h>
@@ -749,6 +750,55 @@ const struct sysfs_ops kobj_sysfs_ops = {
 	.store	= kobj_attr_store,
 };
 
+/**
+ * kobj_completion_init - initialize a kobj_completion object.
+ * @kc: kobj_completion
+ * @ktype: type of kobject to initialize
+ *
+ * kobj_completion structures can be embedded within structures with different
+ * lifetime rules.  During the release of the enclosing object, we can
+ * wait on the release of the kobject so that we don't free it while it's
+ * still busy.
+ */
+void kobj_completion_init(struct kobj_completion *kc, struct kobj_type *ktype)
+{
+	init_completion(&kc->kc_unregister);
+	kobject_init(&kc->kc_kobj, ktype);
+}
+EXPORT_SYMBOL_GPL(kobj_completion_init);
+
+/**
+ * kobj_completion_release - release a kobj_completion object
+ * @kobj: kobject embedded in kobj_completion
+ *
+ * Used with kobject_release to notify waiters that the kobject has been
+ * released.
+ */
+void kobj_completion_release(struct kobject *kobj)
+{
+	struct kobj_completion *kc = kobj_to_kobj_completion(kobj);
+	complete(&kc->kc_unregister);
+}
+EXPORT_SYMBOL_GPL(kobj_completion_release);
+
+/**
+ * kobj_completion_del_and_wait - release the kobject and wait for it
+ * @kc: kobj_completion object to release
+ *
+ * Delete the kobject from sysfs and drop the reference count.  Then wait
+ * until any other outstanding references are also dropped.  This routine
+ * is only necessary once other references may have been taken on the
+ * kobject.  Typically this happens when the kobject has been published
+ * to sysfs via kobject_add.
+ */
+void kobj_completion_del_and_wait(struct kobj_completion *kc)
+{
+	kobject_del(&kc->kc_kobj);
+	kobject_put(&kc->kc_kobj);
+	wait_for_completion(&kc->kc_unregister);
+}
+EXPORT_SYMBOL_GPL(kobj_completion_del_and_wait);
+
 /**
  * kset_register - initialize and add a kset.
  * @k: kset.
-- 
cgit v1.2.3


From 3f9120b0424f3e03c75518cb751f9e2bfa73c32a Mon Sep 17 00:00:00 2001
From: Johan Hovold <jhovold@gmail.com>
Date: Mon, 23 Sep 2013 16:27:26 +0200
Subject: driver core: prevent deferred probe with platform_driver_probe

Prevent drivers relying on platform_driver_probe from requesting
deferred probing in order to avoid further futile probe attempts (either
the driver has been unregistered or its probe function has been set to
platform_drv_probe_fail when probing is retried).

Note that several platform drivers currently return subsystem errors
from probe and that these can include -EPROBE_DEFER (e.g. if a gpio
request fails).

Add a warning to platform_drv_probe that can be used to catch drivers
that inadvertently request probe deferral while using
platform_driver_probe.

Signed-off-by: Johan Hovold <jhovold@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/platform.c         | 17 +++++++++++++----
 include/linux/platform_device.h |  1 +
 2 files changed, 14 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index 4f8bef3eb5a8..47051cd25113 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -488,6 +488,11 @@ static int platform_drv_probe(struct device *_dev)
 	if (ret && ACPI_HANDLE(_dev))
 		acpi_dev_pm_detach(_dev, true);
 
+	if (drv->prevent_deferred_probe && ret == -EPROBE_DEFER) {
+		dev_warn(_dev, "probe deferral not supported\n");
+		ret = -ENXIO;
+	}
+
 	return ret;
 }
 
@@ -553,8 +558,7 @@ EXPORT_SYMBOL_GPL(platform_driver_unregister);
 /**
  * platform_driver_probe - register driver for non-hotpluggable device
  * @drv: platform driver structure
- * @probe: the driver probe routine, probably from an __init section,
- *         must not return -EPROBE_DEFER.
+ * @probe: the driver probe routine, probably from an __init section
  *
  * Use this instead of platform_driver_register() when you know the device
  * is not hotpluggable and has already been registered, and you want to
@@ -565,8 +569,7 @@ EXPORT_SYMBOL_GPL(platform_driver_unregister);
  * into system-on-chip processors, where the controller devices have been
  * configured as part of board setup.
  *
- * This is incompatible with deferred probing so probe() must not
- * return -EPROBE_DEFER.
+ * Note that this is incompatible with deferred probing.
  *
  * Returns zero if the driver registered and bound to a device, else returns
  * a negative error code and with the driver not registered.
@@ -576,6 +579,12 @@ int __init_or_module platform_driver_probe(struct platform_driver *drv,
 {
 	int retval, code;
 
+	/*
+	 * Prevent driver from requesting probe deferral to avoid further
+	 * futile probe attempts.
+	 */
+	drv->prevent_deferred_probe = true;
+
 	/* make sure driver won't have bind/unbind attributes */
 	drv->driver.suppress_bind_attrs = true;
 
diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h
index ce8e4ffd78c7..16f6654082dd 100644
--- a/include/linux/platform_device.h
+++ b/include/linux/platform_device.h
@@ -178,6 +178,7 @@ struct platform_driver {
 	int (*resume)(struct platform_device *);
 	struct device_driver driver;
 	const struct platform_device_id *id_table;
+	bool prevent_deferred_probe;
 };
 
 #define to_platform_driver(drv)	(container_of((drv), struct platform_driver, \
-- 
cgit v1.2.3


From a75e1c73a46eed0332d036e371f714e76d167c07 Mon Sep 17 00:00:00 2001
From: Chanwoo Choi <cw00.choi@samsung.com>
Date: Sat, 31 Aug 2013 13:16:49 +0900
Subject: extcon: Fix indentation coding style to improve readability

Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
Signed-off-by: Myungjoo Ham <myungjoo.ham@samsung.com>
---
 drivers/extcon/extcon-adc-jack.c       | 20 +++++-----
 drivers/extcon/extcon-class.c          | 26 ++++++-------
 include/linux/extcon.h                 | 67 ++++++++++++++++++----------------
 include/linux/extcon/extcon-adc-jack.h | 42 ++++++++++-----------
 include/linux/extcon/extcon-gpio.h     | 16 ++++----
 5 files changed, 87 insertions(+), 84 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/extcon/extcon-adc-jack.c b/drivers/extcon/extcon-adc-jack.c
index 5985807e52c9..5d16428afd53 100644
--- a/drivers/extcon/extcon-adc-jack.c
+++ b/drivers/extcon/extcon-adc-jack.c
@@ -27,16 +27,16 @@
 
 /**
  * struct adc_jack_data - internal data for adc_jack device driver
- * @edev        - extcon device.
- * @cable_names - list of supported cables.
- * @num_cables  - size of cable_names.
- * @adc_conditions       - list of adc value conditions.
- * @num_conditions       - size of adc_conditions.
- * @irq         - irq number of attach/detach event (0 if not exist).
- * @handling_delay      - interrupt handler will schedule extcon event
- *                      handling at handling_delay jiffies.
- * @handler     - extcon event handler called by interrupt handler.
- * @chan       - iio channel being queried.
+ * @edev:		extcon device.
+ * @cable_names:	list of supported cables.
+ * @num_cables:		size of cable_names.
+ * @adc_conditions:	list of adc value conditions.
+ * @num_conditions:	size of adc_conditions.
+ * @irq:		irq number of attach/detach event (0 if not exist).
+ * @handling_delay:	interrupt handler will schedule extcon event
+ *			handling at handling_delay jiffies.
+ * @handler:		extcon event handler called by interrupt handler.
+ * @chan:		iio channel being queried.
  */
 struct adc_jack_data {
 	struct extcon_dev edev;
diff --git a/drivers/extcon/extcon-class.c b/drivers/extcon/extcon-class.c
index 148382faded9..a29720822847 100644
--- a/drivers/extcon/extcon-class.c
+++ b/drivers/extcon/extcon-class.c
@@ -74,7 +74,7 @@ static DEFINE_MUTEX(extcon_dev_list_lock);
 
 /**
  * check_mutually_exclusive - Check if new_state violates mutually_exclusive
- *			    condition.
+ *			      condition.
  * @edev:	the extcon device
  * @new_state:	new cable attach status for @edev
  *
@@ -189,7 +189,7 @@ static ssize_t cable_state_show(struct device *dev,
 
 /**
  * extcon_update_state() - Update the cable attach states of the extcon device
- *			only for the masked bits.
+ *			   only for the masked bits.
  * @edev:	the extcon device
  * @mask:	the bit mask to designate updated bits.
  * @state:	new cable attach status for @edev
@@ -227,7 +227,6 @@ int extcon_update_state(struct extcon_dev *edev, u32 mask, u32 state)
 		edev->state |= state & mask;
 
 		raw_notifier_call_chain(&edev->nh, old_state, edev);
-
 		/* This could be in interrupt handler */
 		prop_buf = (char *)get_zeroed_page(GFP_ATOMIC);
 		if (prop_buf) {
@@ -339,8 +338,9 @@ EXPORT_SYMBOL_GPL(extcon_get_cable_state);
 
 /**
  * extcon_set_cable_state_() - Set the status of a specific cable.
- * @edev:	the extcon device that has the cable.
- * @index:	cable index that can be retrieved by extcon_find_cable_index().
+ * @edev:		the extcon device that has the cable.
+ * @index:		cable index that can be retrieved by
+ *			extcon_find_cable_index().
  * @cable_state:	the new cable status. The default semantics is
  *			true: attached / false: detached.
  */
@@ -359,8 +359,8 @@ EXPORT_SYMBOL_GPL(extcon_set_cable_state_);
 
 /**
  * extcon_set_cable_state() - Set the status of a specific cable.
- * @edev:	the extcon device that has the cable.
- * @cable_name:	cable name.
+ * @edev:		the extcon device that has the cable.
+ * @cable_name:		cable name.
  * @cable_state:	the new cable status. The default semantics is
  *			true: attached / false: detached.
  *
@@ -419,14 +419,14 @@ static int _call_per_cable(struct notifier_block *nb, unsigned long val,
 
 /**
  * extcon_register_interest() - Register a notifier for a state change of a
- *			      specific cable, not an entier set of cables of a
- *			      extcon device.
- * @obj:	an empty extcon_specific_cable_nb object to be returned.
+ *				specific cable, not an entier set of cables of a
+ *				extcon device.
+ * @obj:		an empty extcon_specific_cable_nb object to be returned.
  * @extcon_name:	the name of extcon device.
  *			if NULL, extcon_register_interest will register
  *			every cable with the target cable_name given.
  * @cable_name:		the target cable name.
- * @nb:		the notifier block to get notified.
+ * @nb:			the notifier block to get notified.
  *
  * Provide an empty extcon_specific_cable_nb. extcon_register_interest() sets
  * the struct for you.
@@ -487,7 +487,7 @@ EXPORT_SYMBOL_GPL(extcon_register_interest);
 
 /**
  * extcon_unregister_interest() - Unregister the notifier registered by
- *				extcon_register_interest().
+ *				  extcon_register_interest().
  * @obj:	the extcon_specific_cable_nb object returned by
  *		extcon_register_interest().
  */
@@ -502,7 +502,7 @@ EXPORT_SYMBOL_GPL(extcon_unregister_interest);
 
 /**
  * extcon_register_notifier() - Register a notifiee to get notified by
- *			      any attach status changes from the extcon.
+ *				any attach status changes from the extcon.
  * @edev:	the extcon device.
  * @nb:		a notifier block to be registered.
  *
diff --git a/include/linux/extcon.h b/include/linux/extcon.h
index fcb51c88319f..c2b652dde72c 100644
--- a/include/linux/extcon.h
+++ b/include/linux/extcon.h
@@ -51,10 +51,10 @@
 enum extcon_cable_name {
 	EXTCON_USB = 0,
 	EXTCON_USB_HOST,
-	EXTCON_TA, /* Travel Adaptor */
+	EXTCON_TA,			/* Travel Adaptor */
 	EXTCON_FAST_CHARGER,
 	EXTCON_SLOW_CHARGER,
-	EXTCON_CHARGE_DOWNSTREAM, /* Charging an external device */
+	EXTCON_CHARGE_DOWNSTREAM,	/* Charging an external device */
 	EXTCON_HDMI,
 	EXTCON_MHL,
 	EXTCON_DVI,
@@ -76,8 +76,8 @@ struct extcon_cable;
 
 /**
  * struct extcon_dev - An extcon device represents one external connector.
- * @name:	The name of this extcon device. Parent device name is used
- *		if NULL.
+ * @name:		The name of this extcon device. Parent device name is
+ *			used if NULL.
  * @supported_cable:	Array of supported cable names ending with NULL.
  *			If supported_cable is NULL, cable name related APIs
  *			are disabled.
@@ -89,21 +89,21 @@ struct extcon_cable;
  *			be attached simulataneously. {0x7, 0} is equivalent to
  *			{0x3, 0x6, 0x5, 0}. If it is {0xFFFFFFFF, 0}, there
  *			can be no simultaneous connections.
- * @print_name:	An optional callback to override the method to print the
- *		name of the extcon device.
+ * @print_name:		An optional callback to override the method to print the
+ *			name of the extcon device.
  * @print_state:	An optional callback to override the method to print the
- *		status of the extcon device.
- * @dev:	Device of this extcon. Do not provide at register-time.
- * @state:	Attach/detach state of this extcon. Do not provide at
- *		register-time
- * @nh:	Notifier for the state change events from this extcon
- * @entry:	To support list of extcon devices so that users can search
- *		for extcon devices based on the extcon name.
+ *			status of the extcon device.
+ * @dev:		Device of this extcon. Do not provide at register-time.
+ * @state:		Attach/detach state of this extcon. Do not provide at
+ *			register-time.
+ * @nh:			Notifier for the state change events from this extcon
+ * @entry:		To support list of extcon devices so that users can search
+ *			for extcon devices based on the extcon name.
  * @lock:
  * @max_supported:	Internal value to store the number of cables.
  * @extcon_dev_type:	Device_type struct to provide attribute_groups
  *			customized for each extcon device.
- * @cables:	Sysfs subdirectories. Each represents one cable.
+ * @cables:		Sysfs subdirectories. Each represents one cable.
  *
  * In most cases, users only need to provide "User initializing data" of
  * this struct when registering an extcon. In some exceptional cases,
@@ -111,26 +111,27 @@ struct extcon_cable;
  * are overwritten by register function.
  */
 struct extcon_dev {
-	/* --- Optional user initializing data --- */
-	const char	*name;
+	/* Optional user initializing data */
+	const char *name;
 	const char **supported_cable;
-	const u32	*mutually_exclusive;
+	const u32 *mutually_exclusive;
 
-	/* --- Optional callbacks to override class functions --- */
+	/* Optional callbacks to override class functions */
 	ssize_t	(*print_name)(struct extcon_dev *edev, char *buf);
 	ssize_t	(*print_state)(struct extcon_dev *edev, char *buf);
 
-	/* --- Internal data. Please do not set. --- */
-	struct device	*dev;
-	u32		state;
+	/* Internal data. Please do not set. */
+	struct device *dev;
 	struct raw_notifier_head nh;
 	struct list_head entry;
-	spinlock_t lock; /* could be called by irq handler */
 	int max_supported;
+	spinlock_t lock;	/* could be called by irq handler */
+	u32 state;
 
 	/* /sys/class/extcon/.../cable.n/... */
 	struct device_type extcon_dev_type;
 	struct extcon_cable *cables;
+
 	/* /sys/class/extcon/.../mutually_exclusive/... */
 	struct attribute_group attr_g_muex;
 	struct attribute **attrs_muex;
@@ -138,13 +139,13 @@ struct extcon_dev {
 };
 
 /**
- * struct extcon_cable	- An internal data for each cable of extcon device.
- * @edev:	The extcon device
+ * struct extcon_cable - An internal data for each cable of extcon device.
+ * @edev:		The extcon device
  * @cable_index:	Index of this cable in the edev
- * @attr_g:	Attribute group for the cable
- * @attr_name:	"name" sysfs entry
- * @attr_state:	"state" sysfs entry
- * @attrs:	Array pointing to attr_name and attr_state for attr_g
+ * @attr_g:		Attribute group for the cable
+ * @attr_name:		"name" sysfs entry
+ * @attr_state:		"state" sysfs entry
+ * @attrs:		Array pointing to attr_name and attr_state for attr_g
  */
 struct extcon_cable {
 	struct extcon_dev *edev;
@@ -159,11 +160,13 @@ struct extcon_cable {
 
 /**
  * struct extcon_specific_cable_nb - An internal data for
- *				extcon_register_interest().
- * @internal_nb:	a notifier block bridging extcon notifier and cable notifier.
- * @user_nb:	user provided notifier block for events from a specific cable.
+ *				     extcon_register_interest().
+ * @internal_nb:	A notifier block bridging extcon notifier
+ *			and cable notifier.
+ * @user_nb:		user provided notifier block for events from
+ *			a specific cable.
  * @cable_index:	the target cable.
- * @edev:	the target extcon device.
+ * @edev:		the target extcon device.
  * @previous_value:	the saved previous event value.
  */
 struct extcon_specific_cable_nb {
diff --git a/include/linux/extcon/extcon-adc-jack.h b/include/linux/extcon/extcon-adc-jack.h
index 20e9eef25d4c..9ca958c4e94c 100644
--- a/include/linux/extcon/extcon-adc-jack.h
+++ b/include/linux/extcon/extcon-adc-jack.h
@@ -20,10 +20,10 @@
 
 /**
  * struct adc_jack_cond - condition to use an extcon state
- * @state      - the corresponding extcon state (if 0, this struct denotes
- *             the last adc_jack_cond element among the array)
- * @min_adc    - min adc value for this condition
- * @max_adc    - max adc value for this condition
+ * @state:		the corresponding extcon state (if 0, this struct
+ *			denotes the last adc_jack_cond element among the array)
+ * @min_adc:		min adc value for this condition
+ * @max_adc:		max adc value for this condition
  *
  * For example, if { .state = 0x3, .min_adc = 100, .max_adc = 200}, it means
  * that if ADC value is between (inclusive) 100 and 200, than the cable 0 and
@@ -33,34 +33,34 @@
  * because when no adc_jack_cond is met, state = 0 is automatically chosen.
  */
 struct adc_jack_cond {
-	u32 state; /* extcon state value. 0 if invalid */
+	u32 state;	/* extcon state value. 0 if invalid */
 	u32 min_adc;
 	u32 max_adc;
 };
 
 /**
  * struct adc_jack_pdata - platform data for adc jack device.
- * @name       - name of the extcon device. If null, "adc-jack" is used.
- * @consumer_channel - Unique name to identify the channel on the consumer
- *                   side. This typically describes the channels used within
- *                   the consumer. E.g. 'battery_voltage'
- * @cable_names        - array of cable names ending with null.
- * @adc_contitions     - array of struct adc_jack_cond conditions ending
- *                     with .state = 0 entry. This describes how to decode
- *                     adc values into extcon state.
- * @irq_flags  - irq flags used for the @irq
- * @handling_delay_ms  - in some devices, we need to read ADC value some
- *                     milli-seconds after the interrupt occurs. You may
- *                     describe such delays with @handling_delay_ms, which
- *                     is rounded-off by jiffies.
+ * @name:		name of the extcon device. If null, "adc-jack" is used.
+ * @consumer_channel:	Unique name to identify the channel on the consumer
+ *			side. This typically describes the channels used within
+ *			the consumer. E.g. 'battery_voltage'
+ * @cable_names:	array of cable names ending with null.
+ * @adc_contitions:	array of struct adc_jack_cond conditions ending
+ *			with .state = 0 entry. This describes how to decode
+ *			adc values into extcon state.
+ * @irq_flags:		irq flags used for the @irq
+ * @handling_delay_ms:	in some devices, we need to read ADC value some
+ *			milli-seconds after the interrupt occurs. You may
+ *			describe such delays with @handling_delay_ms, which
+ *			is rounded-off by jiffies.
  */
 struct adc_jack_pdata {
 	const char *name;
 	const char *consumer_channel;
-	/*
-	 * The last entry should be NULL
-	 */
+
+	/* The last entry should be NULL */
 	const char **cable_names;
+
 	/* The last entry's state should be 0 */
 	struct adc_jack_cond *adc_conditions;
 
diff --git a/include/linux/extcon/extcon-gpio.h b/include/linux/extcon/extcon-gpio.h
index 2d8307f7d67d..4ce1aa7d29bd 100644
--- a/include/linux/extcon/extcon-gpio.h
+++ b/include/linux/extcon/extcon-gpio.h
@@ -25,14 +25,14 @@
 
 /**
  * struct gpio_extcon_platform_data - A simple GPIO-controlled extcon device.
- * @name	The name of this GPIO extcon device.
- * @gpio	Corresponding GPIO.
- * @debounce	Debounce time for GPIO IRQ in ms.
- * @irq_flags	IRQ Flags (e.g., IRQF_TRIGGER_LOW).
- * @state_on	print_state is overriden with state_on if attached. If Null,
- *		default method of extcon class is used.
- * @state_off	print_state is overriden with state_on if detached. If Null,
- *		default method of extcon class is used.
+ * @name:		The name of this GPIO extcon device.
+ * @gpio:		Corresponding GPIO.
+ * @debounce:		Debounce time for GPIO IRQ in ms.
+ * @irq_flags:		IRQ Flags (e.g., IRQF_TRIGGER_LOW).
+ * @state_on:		print_state is overriden with state_on if attached.
+ *			If NULL, default method of extcon class is used.
+ * @state_off:		print_state is overriden with state_on if detached.
+ *			If NUll, default method of extcon class is used.
  *
  * Note that in order for state_on or state_off to be valid, both state_on
  * and state_off should be not NULL. If at least one of them is NULL,
-- 
cgit v1.2.3


From 5bfbdc9caa7e16b2a77a62a9f9a63b5693e23716 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Thu, 12 Sep 2013 08:49:54 +0900
Subject: extcon: gpio: Add support for active-low presence to detect pins

This patch add 'gpio_active_low' field to 'struct gpio_extcon_data'
to check whether gpio active state is 1(high) or 0(low).

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
---
 drivers/extcon/extcon-gpio.c       | 4 ++++
 include/linux/extcon/extcon-gpio.h | 4 ++++
 2 files changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/extcon/extcon-gpio.c b/drivers/extcon/extcon-gpio.c
index cd7dbb60f60f..b02c670ef01d 100644
--- a/drivers/extcon/extcon-gpio.c
+++ b/drivers/extcon/extcon-gpio.c
@@ -34,6 +34,7 @@
 struct gpio_extcon_data {
 	struct extcon_dev edev;
 	unsigned gpio;
+	bool gpio_active_low;
 	const char *state_on;
 	const char *state_off;
 	int irq;
@@ -49,6 +50,8 @@ static void gpio_extcon_work(struct work_struct *work)
 			     work);
 
 	state = gpio_get_value(data->gpio);
+	if (data->gpio_active_low)
+		state = !state;
 	extcon_set_state(&data->edev, state);
 }
 
@@ -96,6 +99,7 @@ static int gpio_extcon_probe(struct platform_device *pdev)
 
 	extcon_data->edev.name = pdata->name;
 	extcon_data->gpio = pdata->gpio;
+	extcon_data->gpio_active_low = pdata->gpio_active_low;
 	extcon_data->state_on = pdata->state_on;
 	extcon_data->state_off = pdata->state_off;
 	if (pdata->state_on && pdata->state_off)
diff --git a/include/linux/extcon/extcon-gpio.h b/include/linux/extcon/extcon-gpio.h
index 4ce1aa7d29bd..4195810f87fe 100644
--- a/include/linux/extcon/extcon-gpio.h
+++ b/include/linux/extcon/extcon-gpio.h
@@ -27,6 +27,9 @@
  * struct gpio_extcon_platform_data - A simple GPIO-controlled extcon device.
  * @name:		The name of this GPIO extcon device.
  * @gpio:		Corresponding GPIO.
+ * @gpio_active_low:	Boolean describing whether gpio active state is 1 or 0
+ *			If true, low state of gpio means active.
+ *			If false, high state of gpio means active.
  * @debounce:		Debounce time for GPIO IRQ in ms.
  * @irq_flags:		IRQ Flags (e.g., IRQF_TRIGGER_LOW).
  * @state_on:		print_state is overriden with state_on if attached.
@@ -41,6 +44,7 @@
 struct gpio_extcon_platform_data {
 	const char *name;
 	unsigned gpio;
+	bool gpio_active_low;
 	unsigned long debounce;
 	unsigned long irq_flags;
 
-- 
cgit v1.2.3


From dae616512476024aa61d2a598461ab6eff8c0709 Mon Sep 17 00:00:00 2001
From: Chanwoo Choi <cw00.choi@samsung.com>
Date: Fri, 27 Sep 2013 09:19:40 +0900
Subject: extcon: Change field type of 'dev' in extcon_dev structure

The extcon device must always need 'struct device' so this patch change
field type of 'dev' instead of allocating memory for 'struct device' on
extcon_dev_register() function.

Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
Signed-off-by: Myungjoo Ham <cw00.choi@samsung.com>
---
 drivers/extcon/extcon-adc-jack.c |  2 +-
 drivers/extcon/extcon-class.c    | 52 ++++++++++++++++++----------------------
 include/linux/extcon.h           |  4 ++--
 3 files changed, 26 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/extcon/extcon-adc-jack.c b/drivers/extcon/extcon-adc-jack.c
index 5694c468b958..dcbade667d0c 100644
--- a/drivers/extcon/extcon-adc-jack.c
+++ b/drivers/extcon/extcon-adc-jack.c
@@ -64,7 +64,7 @@ static void adc_jack_handler(struct work_struct *work)
 
 	ret = iio_read_channel_raw(data->chan, &adc_val);
 	if (ret < 0) {
-		dev_err(data->edev.dev, "read channel() error: %d\n", ret);
+		dev_err(&data->edev.dev, "read channel() error: %d\n", ret);
 		return;
 	}
 
diff --git a/drivers/extcon/extcon-class.c b/drivers/extcon/extcon-class.c
index 2f2d374ccd1d..2801c14d5232 100644
--- a/drivers/extcon/extcon-class.c
+++ b/drivers/extcon/extcon-class.c
@@ -162,7 +162,7 @@ static ssize_t name_show(struct device *dev, struct device_attribute *attr,
 			return ret;
 	}
 
-	return sprintf(buf, "%s\n", dev_name(edev->dev));
+	return sprintf(buf, "%s\n", dev_name(&edev->dev));
 }
 static DEVICE_ATTR_RO(name);
 
@@ -230,7 +230,7 @@ int extcon_update_state(struct extcon_dev *edev, u32 mask, u32 state)
 		/* This could be in interrupt handler */
 		prop_buf = (char *)get_zeroed_page(GFP_ATOMIC);
 		if (prop_buf) {
-			length = name_show(edev->dev, NULL, prop_buf);
+			length = name_show(&edev->dev, NULL, prop_buf);
 			if (length > 0) {
 				if (prop_buf[length - 1] == '\n')
 					prop_buf[length - 1] = 0;
@@ -238,7 +238,7 @@ int extcon_update_state(struct extcon_dev *edev, u32 mask, u32 state)
 					"NAME=%s", prop_buf);
 				envp[env_offset++] = name_buf;
 			}
-			length = state_show(edev->dev, NULL, prop_buf);
+			length = state_show(&edev->dev, NULL, prop_buf);
 			if (length > 0) {
 				if (prop_buf[length - 1] == '\n')
 					prop_buf[length - 1] = 0;
@@ -250,14 +250,14 @@ int extcon_update_state(struct extcon_dev *edev, u32 mask, u32 state)
 			/* Unlock early before uevent */
 			spin_unlock_irqrestore(&edev->lock, flags);
 
-			kobject_uevent_env(&edev->dev->kobj, KOBJ_CHANGE, envp);
+			kobject_uevent_env(&edev->dev.kobj, KOBJ_CHANGE, envp);
 			free_page((unsigned long)prop_buf);
 		} else {
 			/* Unlock early before uevent */
 			spin_unlock_irqrestore(&edev->lock, flags);
 
-			dev_err(edev->dev, "out of memory in extcon_set_state\n");
-			kobject_uevent(&edev->dev->kobj, KOBJ_CHANGE);
+			dev_err(&edev->dev, "out of memory in extcon_set_state\n");
+			kobject_uevent(&edev->dev.kobj, KOBJ_CHANGE);
 		}
 	} else {
 		/* No changes */
@@ -556,7 +556,6 @@ static int create_extcon_class(void)
 
 static void extcon_dev_release(struct device *dev)
 {
-	kfree(dev);
 }
 
 static const char *muex_name = "mutually_exclusive";
@@ -594,19 +593,16 @@ int extcon_dev_register(struct extcon_dev *edev, struct device *dev)
 	}
 
 	if (index > SUPPORTED_CABLE_MAX) {
-		dev_err(edev->dev, "extcon: maximum number of supported cables exceeded.\n");
+		dev_err(&edev->dev, "extcon: maximum number of supported cables exceeded.\n");
 		return -EINVAL;
 	}
 
-	edev->dev = kzalloc(sizeof(struct device), GFP_KERNEL);
-	if (!edev->dev)
-		return -ENOMEM;
-	edev->dev->parent = dev;
-	edev->dev->class = extcon_class;
-	edev->dev->release = extcon_dev_release;
+	edev->dev.parent = dev;
+	edev->dev.class = extcon_class;
+	edev->dev.release = extcon_dev_release;
 
 	edev->name = edev->name ? edev->name : dev_name(dev);
-	dev_set_name(edev->dev, "%s", edev->name);
+	dev_set_name(&edev->dev, "%s", edev->name);
 
 	if (edev->max_supported) {
 		char buf[10];
@@ -714,7 +710,7 @@ int extcon_dev_register(struct extcon_dev *edev, struct device *dev)
 			goto err_alloc_groups;
 		}
 
-		edev->extcon_dev_type.name = dev_name(edev->dev);
+		edev->extcon_dev_type.name = dev_name(&edev->dev);
 		edev->extcon_dev_type.release = dummy_sysfs_dev_release;
 
 		for (index = 0; index < edev->max_supported; index++)
@@ -724,25 +720,24 @@ int extcon_dev_register(struct extcon_dev *edev, struct device *dev)
 			edev->extcon_dev_type.groups[index] =
 				&edev->attr_g_muex;
 
-		edev->dev->type = &edev->extcon_dev_type;
+		edev->dev.type = &edev->extcon_dev_type;
 	}
 
-	ret = device_register(edev->dev);
+	ret = device_register(&edev->dev);
 	if (ret) {
-		put_device(edev->dev);
+		put_device(&edev->dev);
 		goto err_dev;
 	}
 #if defined(CONFIG_ANDROID)
 	if (switch_class)
-		ret = class_compat_create_link(switch_class, edev->dev,
-					       NULL);
+		ret = class_compat_create_link(switch_class, &edev->dev, NULL);
 #endif /* CONFIG_ANDROID */
 
 	spin_lock_init(&edev->lock);
 
 	RAW_INIT_NOTIFIER_HEAD(&edev->nh);
 
-	dev_set_drvdata(edev->dev, edev);
+	dev_set_drvdata(&edev->dev, edev);
 	edev->state = 0;
 
 	mutex_lock(&extcon_dev_list_lock);
@@ -768,7 +763,6 @@ err_alloc_cables:
 	if (edev->max_supported)
 		kfree(edev->cables);
 err_sysfs_alloc:
-	kfree(edev->dev);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(extcon_dev_register);
@@ -788,9 +782,9 @@ void extcon_dev_unregister(struct extcon_dev *edev)
 	list_del(&edev->entry);
 	mutex_unlock(&extcon_dev_list_lock);
 
-	if (IS_ERR_OR_NULL(get_device(edev->dev))) {
-		dev_err(edev->dev, "Failed to unregister extcon_dev (%s)\n",
-				dev_name(edev->dev));
+	if (IS_ERR_OR_NULL(get_device(&edev->dev))) {
+		dev_err(&edev->dev, "Failed to unregister extcon_dev (%s)\n",
+				dev_name(&edev->dev));
 		return;
 	}
 
@@ -812,10 +806,10 @@ void extcon_dev_unregister(struct extcon_dev *edev)
 
 #if defined(CONFIG_ANDROID)
 	if (switch_class)
-		class_compat_remove_link(switch_class, edev->dev, NULL);
+		class_compat_remove_link(switch_class, &edev->dev, NULL);
 #endif
-	device_unregister(edev->dev);
-	put_device(edev->dev);
+	device_unregister(&edev->dev);
+	put_device(&edev->dev);
 }
 EXPORT_SYMBOL_GPL(extcon_dev_unregister);
 
diff --git a/include/linux/extcon.h b/include/linux/extcon.h
index c2b652dde72c..0269bafb0ee8 100644
--- a/include/linux/extcon.h
+++ b/include/linux/extcon.h
@@ -93,7 +93,7 @@ struct extcon_cable;
  *			name of the extcon device.
  * @print_state:	An optional callback to override the method to print the
  *			status of the extcon device.
- * @dev:		Device of this extcon. Do not provide at register-time.
+ * @dev:		Device of this extcon.
  * @state:		Attach/detach state of this extcon. Do not provide at
  *			register-time.
  * @nh:			Notifier for the state change events from this extcon
@@ -121,7 +121,7 @@ struct extcon_dev {
 	ssize_t	(*print_state)(struct extcon_dev *edev, char *buf);
 
 	/* Internal data. Please do not set. */
-	struct device *dev;
+	struct device dev;
 	struct raw_notifier_head nh;
 	struct list_head entry;
 	int max_supported;
-- 
cgit v1.2.3


From 42d7d7539a7bcf1d493b989465283c464f4a0525 Mon Sep 17 00:00:00 2001
From: Chanwoo Choi <cw00.choi@samsung.com>
Date: Fri, 27 Sep 2013 09:20:26 +0900
Subject: extcon: Simplify extcon_dev_register() prototype by removing
 unnecessary parameter

This patch remove extcon_dev_register()'s second parameter which means
the pointer of parent device to simplify prototype of this function.
So, if extcon device has the parent device, it should set the pointer of
parent device to edev.dev.parent in extcon device driver instead of in
extcon_dev_register().

Cc: Graeme Gregory <gg@slimlogic.co.uk>
Cc: Kishon Vijay Abraham I <kishon@ti.com>
Cc: Charles Keepax <ckeepax@opensource.wolfsonmicro.com>
Cc: Mark Brown <broonie@kernel.org>
Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
Signed-off-by: Myungjoo Ham <myungjoo.ham@samsung.com>
---
 Documentation/extcon/porting-android-switch-class |  6 ++++--
 drivers/extcon/extcon-adc-jack.c                  |  3 ++-
 drivers/extcon/extcon-arizona.c                   |  3 ++-
 drivers/extcon/extcon-class.c                     | 11 +++++++----
 drivers/extcon/extcon-gpio.c                      |  3 ++-
 drivers/extcon/extcon-max77693.c                  |  3 ++-
 drivers/extcon/extcon-max8997.c                   |  3 ++-
 drivers/extcon/extcon-palmas.c                    |  3 ++-
 include/linux/extcon.h                            |  5 ++---
 9 files changed, 25 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/extcon/porting-android-switch-class b/Documentation/extcon/porting-android-switch-class
index eb0fa5f4fe88..5377f6317961 100644
--- a/Documentation/extcon/porting-android-switch-class
+++ b/Documentation/extcon/porting-android-switch-class
@@ -25,8 +25,10 @@ MyungJoo Ham <myungjoo.ham@samsung.com>
     @print_state: no change but type change (switch_dev->extcon_dev)
 
 - switch_dev_register(sdev, dev)
-	=> extcon_dev_register(edev, dev)
-	: no change but type change (sdev->edev)
+	=> extcon_dev_register(edev)
+	: type change (sdev->edev)
+	: remove second param('dev'). if edev has parent device, should store
+	  'dev' to 'edev.dev.parent' before registering extcon device
 - switch_dev_unregister(sdev)
 	=> extcon_dev_unregister(edev)
 	: no change but type change (sdev->edev)
diff --git a/drivers/extcon/extcon-adc-jack.c b/drivers/extcon/extcon-adc-jack.c
index dcbade667d0c..e23f1c2e5053 100644
--- a/drivers/extcon/extcon-adc-jack.c
+++ b/drivers/extcon/extcon-adc-jack.c
@@ -110,6 +110,7 @@ static int adc_jack_probe(struct platform_device *pdev)
 		goto out;
 	}
 
+	data->edev.dev.parent = &pdev->dev;
 	data->edev.supported_cable = pdata->cable_names;
 
 	/* Check the length of array and set num_cables */
@@ -148,7 +149,7 @@ static int adc_jack_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, data);
 
-	err = extcon_dev_register(&data->edev, &pdev->dev);
+	err = extcon_dev_register(&data->edev);
 	if (err)
 		goto out;
 
diff --git a/drivers/extcon/extcon-arizona.c b/drivers/extcon/extcon-arizona.c
index e54ce08f669c..ec9a14e05fdd 100644
--- a/drivers/extcon/extcon-arizona.c
+++ b/drivers/extcon/extcon-arizona.c
@@ -1130,9 +1130,10 @@ static int arizona_extcon_probe(struct platform_device *pdev)
 	}
 
 	info->edev.name = "Headset Jack";
+	info->edev.dev.parent = arizona->dev;
 	info->edev.supported_cable = arizona_cable;
 
-	ret = extcon_dev_register(&info->edev, arizona->dev);
+	ret = extcon_dev_register(&info->edev);
 	if (ret < 0) {
 		dev_err(arizona->dev, "extcon_dev_register() failed: %d\n",
 			ret);
diff --git a/drivers/extcon/extcon-class.c b/drivers/extcon/extcon-class.c
index 2801c14d5232..84902d3fee11 100644
--- a/drivers/extcon/extcon-class.c
+++ b/drivers/extcon/extcon-class.c
@@ -566,14 +566,13 @@ static void dummy_sysfs_dev_release(struct device *dev)
 /**
  * extcon_dev_register() - Register a new extcon device
  * @edev	: the new extcon device (should be allocated before calling)
- * @dev		: the parent device for this extcon device.
  *
  * Among the members of edev struct, please set the "user initializing data"
  * in any case and set the "optional callbacks" if required. However, please
  * do not set the values of "internal data", which are initialized by
  * this function.
  */
-int extcon_dev_register(struct extcon_dev *edev, struct device *dev)
+int extcon_dev_register(struct extcon_dev *edev)
 {
 	int ret, index = 0;
 
@@ -597,11 +596,15 @@ int extcon_dev_register(struct extcon_dev *edev, struct device *dev)
 		return -EINVAL;
 	}
 
-	edev->dev.parent = dev;
 	edev->dev.class = extcon_class;
 	edev->dev.release = extcon_dev_release;
 
-	edev->name = edev->name ? edev->name : dev_name(dev);
+	edev->name = edev->name ? edev->name : dev_name(edev->dev.parent);
+	if (IS_ERR_OR_NULL(edev->name)) {
+		dev_err(&edev->dev,
+			"extcon device name is null\n");
+		return -EINVAL;
+	}
 	dev_set_name(&edev->dev, "%s", edev->name);
 
 	if (edev->max_supported) {
diff --git a/drivers/extcon/extcon-gpio.c b/drivers/extcon/extcon-gpio.c
index b02c670ef01d..7e0dff58e494 100644
--- a/drivers/extcon/extcon-gpio.c
+++ b/drivers/extcon/extcon-gpio.c
@@ -98,6 +98,7 @@ static int gpio_extcon_probe(struct platform_device *pdev)
 		return -ENOMEM;
 
 	extcon_data->edev.name = pdata->name;
+	extcon_data->edev.dev.parent = &pdev->dev;
 	extcon_data->gpio = pdata->gpio;
 	extcon_data->gpio_active_low = pdata->gpio_active_low;
 	extcon_data->state_on = pdata->state_on;
@@ -112,7 +113,7 @@ static int gpio_extcon_probe(struct platform_device *pdev)
 				msecs_to_jiffies(pdata->debounce);
 	}
 
-	ret = extcon_dev_register(&extcon_data->edev, &pdev->dev);
+	ret = extcon_dev_register(&extcon_data->edev);
 	if (ret < 0)
 		return ret;
 
diff --git a/drivers/extcon/extcon-max77693.c b/drivers/extcon/extcon-max77693.c
index 4849ea1e92f6..ab9bc24e1a52 100644
--- a/drivers/extcon/extcon-max77693.c
+++ b/drivers/extcon/extcon-max77693.c
@@ -1171,8 +1171,9 @@ static int max77693_muic_probe(struct platform_device *pdev)
 		goto err_irq;
 	}
 	info->edev->name = DEV_NAME;
+	info->edev->dev.parent = &pdev->dev;
 	info->edev->supported_cable = max77693_extcon_cable;
-	ret = extcon_dev_register(info->edev, NULL);
+	ret = extcon_dev_register(info->edev);
 	if (ret) {
 		dev_err(&pdev->dev, "failed to register extcon device\n");
 		goto err_irq;
diff --git a/drivers/extcon/extcon-max8997.c b/drivers/extcon/extcon-max8997.c
index e55ec38b4148..0b1cbb5fdf9e 100644
--- a/drivers/extcon/extcon-max8997.c
+++ b/drivers/extcon/extcon-max8997.c
@@ -705,8 +705,9 @@ static int max8997_muic_probe(struct platform_device *pdev)
 		goto err_irq;
 	}
 	info->edev->name = DEV_NAME;
+	info->edev->dev.parent = &pdev->dev;
 	info->edev->supported_cable = max8997_extcon_cable;
-	ret = extcon_dev_register(info->edev, NULL);
+	ret = extcon_dev_register(info->edev);
 	if (ret) {
 		dev_err(&pdev->dev, "failed to register extcon device\n");
 		goto err_irq;
diff --git a/drivers/extcon/extcon-palmas.c b/drivers/extcon/extcon-palmas.c
index 0c2fb68fb791..6c91976dd823 100644
--- a/drivers/extcon/extcon-palmas.c
+++ b/drivers/extcon/extcon-palmas.c
@@ -178,9 +178,10 @@ static int palmas_usb_probe(struct platform_device *pdev)
 	platform_set_drvdata(pdev, palmas_usb);
 
 	palmas_usb->edev.supported_cable = palmas_extcon_cable;
+	palmas_usb->edev.dev.parent = palmas_usb->dev;
 	palmas_usb->edev.mutually_exclusive = mutually_exclusive;
 
-	status = extcon_dev_register(&palmas_usb->edev, palmas_usb->dev);
+	status = extcon_dev_register(&palmas_usb->edev);
 	if (status) {
 		dev_err(&pdev->dev, "failed to register extcon device\n");
 		return status;
diff --git a/include/linux/extcon.h b/include/linux/extcon.h
index 0269bafb0ee8..21c59af1150b 100644
--- a/include/linux/extcon.h
+++ b/include/linux/extcon.h
@@ -183,7 +183,7 @@ struct extcon_specific_cable_nb {
  * Following APIs are for notifiers or configurations.
  * Notifiers are the external port and connection devices.
  */
-extern int extcon_dev_register(struct extcon_dev *edev, struct device *dev);
+extern int extcon_dev_register(struct extcon_dev *edev);
 extern void extcon_dev_unregister(struct extcon_dev *edev);
 extern struct extcon_dev *extcon_get_extcon_dev(const char *extcon_name);
 
@@ -241,8 +241,7 @@ extern int extcon_register_notifier(struct extcon_dev *edev,
 extern int extcon_unregister_notifier(struct extcon_dev *edev,
 				      struct notifier_block *nb);
 #else /* CONFIG_EXTCON */
-static inline int extcon_dev_register(struct extcon_dev *edev,
-				      struct device *dev)
+static inline int extcon_dev_register(struct extcon_dev *edev)
 {
 	return 0;
 }
-- 
cgit v1.2.3


From 8fb883f3e30065529e4f35d4b4f355193dcdb7a2 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Sat, 21 Sep 2013 00:09:31 +0100
Subject: FS-Cache: Add use/unuse/wake cookie wrappers

Add wrapper functions for dealing with cookie->n_active:

 (*) __fscache_use_cookie() to increment it.

 (*) __fscache_unuse_cookie() to decrement and test against zero.

 (*) __fscache_wake_unused_cookie() to wake up anyone waiting for it to reach
     zero.

The second and third are split so that the third can be done after cookie->lock
has been released in case the waiter wakes up whilst we're still holding it and
tries to get it.

We will need to wake-on-zero once the cookie disablement patch is applied
because it will then be possible to see n_active become zero without the cookie
being relinquished.

Also move the cookie usement out of fscache_attr_changed_op() and into
fscache_attr_changed() and the operation struct so that cookie disablement
will be able to track it.

Whilst we're at it, only increment n_active if we're about to do
fscache_submit_op() so that we don't have to deal with undoing it if anything
earlier fails.  Possibly this should be moved into fscache_submit_op() which
could look at FSCACHE_OP_UNUSE_COOKIE.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/fscache/cookie.c           |  7 +++++--
 fs/fscache/page.c             | 42 +++++++++++++++++++++++++++++++-----------
 include/linux/fscache-cache.h | 19 +++++++++++++++++--
 3 files changed, 53 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c
index b2a86e324aac..d851aa555d28 100644
--- a/fs/fscache/cookie.c
+++ b/fs/fscache/cookie.c
@@ -568,6 +568,7 @@ int __fscache_check_consistency(struct fscache_cookie *cookie)
 {
 	struct fscache_operation *op;
 	struct fscache_object *object;
+	bool wake_cookie = false;
 	int ret;
 
 	_enter("%p,", cookie);
@@ -600,7 +601,7 @@ int __fscache_check_consistency(struct fscache_cookie *cookie)
 
 	op->debug_id = atomic_inc_return(&fscache_op_debug_id);
 
-	atomic_inc(&cookie->n_active);
+	__fscache_use_cookie(cookie);
 	if (fscache_submit_op(object, op) < 0)
 		goto submit_failed;
 
@@ -622,9 +623,11 @@ int __fscache_check_consistency(struct fscache_cookie *cookie)
 	return ret;
 
 submit_failed:
-	atomic_dec(&cookie->n_active);
+	wake_cookie = __fscache_unuse_cookie(cookie);
 inconsistent:
 	spin_unlock(&cookie->lock);
+	if (wake_cookie)
+		__fscache_wake_unused_cookie(cookie);
 	kfree(op);
 	_leave(" = -ESTALE");
 	return -ESTALE;
diff --git a/fs/fscache/page.c b/fs/fscache/page.c
index 73899c1c3449..0fe42a6d0e9c 100644
--- a/fs/fscache/page.c
+++ b/fs/fscache/page.c
@@ -163,12 +163,10 @@ static void fscache_attr_changed_op(struct fscache_operation *op)
 
 	fscache_stat(&fscache_n_attr_changed_calls);
 
-	if (fscache_object_is_active(object) &&
-	    fscache_use_cookie(object)) {
+	if (fscache_object_is_active(object)) {
 		fscache_stat(&fscache_n_cop_attr_changed);
 		ret = object->cache->ops->attr_changed(object);
 		fscache_stat_d(&fscache_n_cop_attr_changed);
-		fscache_unuse_cookie(object);
 		if (ret < 0)
 			fscache_abort_object(object);
 	}
@@ -184,6 +182,7 @@ int __fscache_attr_changed(struct fscache_cookie *cookie)
 {
 	struct fscache_operation *op;
 	struct fscache_object *object;
+	bool wake_cookie;
 
 	_enter("%p", cookie);
 
@@ -199,7 +198,9 @@ int __fscache_attr_changed(struct fscache_cookie *cookie)
 	}
 
 	fscache_operation_init(op, fscache_attr_changed_op, NULL);
-	op->flags = FSCACHE_OP_ASYNC | (1 << FSCACHE_OP_EXCLUSIVE);
+	op->flags = FSCACHE_OP_ASYNC |
+		(1 << FSCACHE_OP_EXCLUSIVE) |
+		(1 << FSCACHE_OP_UNUSE_COOKIE);
 
 	spin_lock(&cookie->lock);
 
@@ -208,6 +209,7 @@ int __fscache_attr_changed(struct fscache_cookie *cookie)
 	object = hlist_entry(cookie->backing_objects.first,
 			     struct fscache_object, cookie_link);
 
+	__fscache_use_cookie(cookie);
 	if (fscache_submit_exclusive_op(object, op) < 0)
 		goto nobufs;
 	spin_unlock(&cookie->lock);
@@ -217,8 +219,11 @@ int __fscache_attr_changed(struct fscache_cookie *cookie)
 	return 0;
 
 nobufs:
+	wake_cookie = __fscache_unuse_cookie(cookie);
 	spin_unlock(&cookie->lock);
 	kfree(op);
+	if (wake_cookie)
+		__fscache_wake_unused_cookie(cookie);
 	fscache_stat(&fscache_n_attr_changed_nobufs);
 	_leave(" = %d", -ENOBUFS);
 	return -ENOBUFS;
@@ -263,7 +268,6 @@ static struct fscache_retrieval *fscache_alloc_retrieval(
 	}
 
 	fscache_operation_init(&op->op, NULL, fscache_release_retrieval_op);
-	atomic_inc(&cookie->n_active);
 	op->op.flags	= FSCACHE_OP_MYTHREAD |
 		(1UL << FSCACHE_OP_WAITING) |
 		(1UL << FSCACHE_OP_UNUSE_COOKIE);
@@ -384,6 +388,7 @@ int __fscache_read_or_alloc_page(struct fscache_cookie *cookie,
 {
 	struct fscache_retrieval *op;
 	struct fscache_object *object;
+	bool wake_cookie = false;
 	int ret;
 
 	_enter("%p,%p,,,", cookie, page);
@@ -421,6 +426,7 @@ int __fscache_read_or_alloc_page(struct fscache_cookie *cookie,
 
 	ASSERT(test_bit(FSCACHE_OBJECT_IS_LOOKED_UP, &object->flags));
 
+	__fscache_use_cookie(cookie);
 	atomic_inc(&object->n_reads);
 	__set_bit(FSCACHE_OP_DEC_READ_CNT, &op->op.flags);
 
@@ -475,9 +481,11 @@ error:
 
 nobufs_unlock_dec:
 	atomic_dec(&object->n_reads);
+	wake_cookie = __fscache_unuse_cookie(cookie);
 nobufs_unlock:
 	spin_unlock(&cookie->lock);
-	atomic_dec(&cookie->n_active);
+	if (wake_cookie)
+		__fscache_wake_unused_cookie(cookie);
 	kfree(op);
 nobufs:
 	fscache_stat(&fscache_n_retrievals_nobufs);
@@ -514,6 +522,7 @@ int __fscache_read_or_alloc_pages(struct fscache_cookie *cookie,
 {
 	struct fscache_retrieval *op;
 	struct fscache_object *object;
+	bool wake_cookie = false;
 	int ret;
 
 	_enter("%p,,%d,,,", cookie, *nr_pages);
@@ -547,6 +556,7 @@ int __fscache_read_or_alloc_pages(struct fscache_cookie *cookie,
 	object = hlist_entry(cookie->backing_objects.first,
 			     struct fscache_object, cookie_link);
 
+	__fscache_use_cookie(cookie);
 	atomic_inc(&object->n_reads);
 	__set_bit(FSCACHE_OP_DEC_READ_CNT, &op->op.flags);
 
@@ -601,10 +611,12 @@ error:
 
 nobufs_unlock_dec:
 	atomic_dec(&object->n_reads);
+	wake_cookie = __fscache_unuse_cookie(cookie);
 nobufs_unlock:
 	spin_unlock(&cookie->lock);
-	atomic_dec(&cookie->n_active);
 	kfree(op);
+	if (wake_cookie)
+		__fscache_wake_unused_cookie(cookie);
 nobufs:
 	fscache_stat(&fscache_n_retrievals_nobufs);
 	_leave(" = -ENOBUFS");
@@ -626,6 +638,7 @@ int __fscache_alloc_page(struct fscache_cookie *cookie,
 {
 	struct fscache_retrieval *op;
 	struct fscache_object *object;
+	bool wake_cookie = false;
 	int ret;
 
 	_enter("%p,%p,,,", cookie, page);
@@ -658,8 +671,9 @@ int __fscache_alloc_page(struct fscache_cookie *cookie,
 	object = hlist_entry(cookie->backing_objects.first,
 			     struct fscache_object, cookie_link);
 
+	__fscache_use_cookie(cookie);
 	if (fscache_submit_op(object, &op->op) < 0)
-		goto nobufs_unlock;
+		goto nobufs_unlock_dec;
 	spin_unlock(&cookie->lock);
 
 	fscache_stat(&fscache_n_alloc_ops);
@@ -689,10 +703,13 @@ error:
 	_leave(" = %d", ret);
 	return ret;
 
+nobufs_unlock_dec:
+	wake_cookie = __fscache_unuse_cookie(cookie);
 nobufs_unlock:
 	spin_unlock(&cookie->lock);
-	atomic_dec(&cookie->n_active);
 	kfree(op);
+	if (wake_cookie)
+		__fscache_wake_unused_cookie(cookie);
 nobufs:
 	fscache_stat(&fscache_n_allocs_nobufs);
 	_leave(" = -ENOBUFS");
@@ -889,6 +906,7 @@ int __fscache_write_page(struct fscache_cookie *cookie,
 {
 	struct fscache_storage *op;
 	struct fscache_object *object;
+	bool wake_cookie = false;
 	int ret;
 
 	_enter("%p,%x,", cookie, (u32) page->flags);
@@ -957,7 +975,7 @@ int __fscache_write_page(struct fscache_cookie *cookie,
 	op->op.debug_id	= atomic_inc_return(&fscache_op_debug_id);
 	op->store_limit = object->store_limit;
 
-	atomic_inc(&cookie->n_active);
+	__fscache_use_cookie(cookie);
 	if (fscache_submit_op(object, &op->op) < 0)
 		goto submit_failed;
 
@@ -984,10 +1002,10 @@ already_pending:
 	return 0;
 
 submit_failed:
-	atomic_dec(&cookie->n_active);
 	spin_lock(&cookie->stores_lock);
 	radix_tree_delete(&cookie->stores, page->index);
 	spin_unlock(&cookie->stores_lock);
+	wake_cookie = __fscache_unuse_cookie(cookie);
 	page_cache_release(page);
 	ret = -ENOBUFS;
 	goto nobufs;
@@ -999,6 +1017,8 @@ nobufs:
 	spin_unlock(&cookie->lock);
 	radix_tree_preload_end();
 	kfree(op);
+	if (wake_cookie)
+		__fscache_wake_unused_cookie(cookie);
 	fscache_stat(&fscache_n_stores_nobufs);
 	_leave(" = -ENOBUFS");
 	return -ENOBUFS;
diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h
index 7823e9ef995e..96a2b66f5968 100644
--- a/include/linux/fscache-cache.h
+++ b/include/linux/fscache-cache.h
@@ -511,6 +511,11 @@ static inline void fscache_end_io(struct fscache_retrieval *op,
 	op->end_io_func(page, op->context, error);
 }
 
+static inline void __fscache_use_cookie(struct fscache_cookie *cookie)
+{
+	atomic_inc(&cookie->n_active);
+}
+
 /**
  * fscache_use_cookie - Request usage of cookie attached to an object
  * @object: Object description
@@ -524,6 +529,16 @@ static inline bool fscache_use_cookie(struct fscache_object *object)
 	return atomic_inc_not_zero(&cookie->n_active) != 0;
 }
 
+static inline bool __fscache_unuse_cookie(struct fscache_cookie *cookie)
+{
+	return atomic_dec_and_test(&cookie->n_active);
+}
+
+static inline void __fscache_wake_unused_cookie(struct fscache_cookie *cookie)
+{
+	wake_up_atomic_t(&cookie->n_active);
+}
+
 /**
  * fscache_unuse_cookie - Cease usage of cookie attached to an object
  * @object: Object description
@@ -534,8 +549,8 @@ static inline bool fscache_use_cookie(struct fscache_object *object)
 static inline void fscache_unuse_cookie(struct fscache_object *object)
 {
 	struct fscache_cookie *cookie = object->cookie;
-	if (atomic_dec_and_test(&cookie->n_active))
-		wake_up_atomic_t(&cookie->n_active);
+	if (__fscache_unuse_cookie(cookie))
+		__fscache_wake_unused_cookie(cookie);
 }
 
 /*
-- 
cgit v1.2.3


From 94d30ae90a00cafe686c1057be57f4885f963abf Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Sat, 21 Sep 2013 00:09:31 +0100
Subject: FS-Cache: Provide the ability to enable/disable cookies

Provide the ability to enable and disable fscache cookies.  A disabled cookie
will reject or ignore further requests to:

	Acquire a child cookie
	Invalidate and update backing objects
	Check the consistency of a backing object
	Allocate storage for backing page
	Read backing pages
	Write to backing pages

but still allows:

	Checks/waits on the completion of already in-progress objects
	Uncaching of pages
	Relinquishment of cookies

Two new operations are provided:

 (1) Disable a cookie:

	void fscache_disable_cookie(struct fscache_cookie *cookie,
				    bool invalidate);

     If the cookie is not already disabled, this locks the cookie against other
     dis/enablement ops, marks the cookie as being disabled, discards or
     invalidates any backing objects and waits for cessation of activity on any
     associated object.

     This is a wrapper around a chunk split out of fscache_relinquish_cookie(),
     but it reinitialises the cookie such that it can be reenabled.

     All possible failures are handled internally.  The caller should consider
     calling fscache_uncache_all_inode_pages() afterwards to make sure all page
     markings are cleared up.

 (2) Enable a cookie:

	void fscache_enable_cookie(struct fscache_cookie *cookie,
				   bool (*can_enable)(void *data),
				   void *data)

     If the cookie is not already enabled, this locks the cookie against other
     dis/enablement ops, invokes can_enable() and, if the cookie is not an
     index cookie, will begin the procedure of acquiring backing objects.

     The optional can_enable() function is passed the data argument and returns
     a ruling as to whether or not enablement should actually be permitted to
     begin.

     All possible failures are handled internally.  The cookie will only be
     marked as enabled if provisional backing objects are allocated.

A later patch will introduce these to NFS.  Cookie enablement during nfs_open()
is then contingent on i_writecount <= 0.  can_enable() checks for a race
between open(O_RDONLY) and open(O_WRONLY/O_RDWR).  This simplifies NFS's cookie
handling and allows us to get rid of open(O_RDONLY) accidentally introducing
caching to an inode that's open for writing already.

One operation has its API modified:

 (3) Acquire a cookie.

	struct fscache_cookie *fscache_acquire_cookie(
		struct fscache_cookie *parent,
		const struct fscache_cookie_def *def,
		void *netfs_data,
		bool enable);

     This now has an additional argument that indicates whether the requested
     cookie should be enabled by default.  It doesn't need the can_enable()
     function because the caller must prevent multiple calls for the same netfs
     object and it doesn't need to take the enablement lock because no one else
     can get at the cookie before this returns.

Signed-off-by: David Howells <dhowells@redhat.com
---
 Documentation/filesystems/caching/netfs-api.txt |  73 ++++++++--
 fs/9p/cache.c                                   |   6 +-
 fs/afs/cell.c                                   |   2 +-
 fs/afs/inode.c                                  |   2 +-
 fs/afs/vlocation.c                              |   3 +-
 fs/afs/volume.c                                 |   2 +-
 fs/cachefiles/interface.c                       |   2 +-
 fs/ceph/cache.c                                 |   4 +-
 fs/cifs/fscache.c                               |   8 +-
 fs/fscache/cookie.c                             | 186 +++++++++++++++++-------
 fs/fscache/fsdef.c                              |   1 +
 fs/fscache/netfs.c                              |   1 +
 fs/fscache/object.c                             |   7 +-
 fs/fscache/page.c                               |  17 ++-
 fs/nfs/fscache.c                                |   8 +-
 include/linux/fscache-cache.h                   |  31 +---
 include/linux/fscache.h                         | 113 ++++++++++++--
 17 files changed, 334 insertions(+), 132 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/caching/netfs-api.txt b/Documentation/filesystems/caching/netfs-api.txt
index 11a0a40ce445..aed6b94160b1 100644
--- a/Documentation/filesystems/caching/netfs-api.txt
+++ b/Documentation/filesystems/caching/netfs-api.txt
@@ -29,15 +29,16 @@ This document contains the following sections:
 	 (6) Index registration
 	 (7) Data file registration
 	 (8) Miscellaneous object registration
-	 (9) Setting the data file size
+ 	 (9) Setting the data file size
 	(10) Page alloc/read/write
 	(11) Page uncaching
 	(12) Index and data file consistency
-	(13) Miscellaneous cookie operations
-	(14) Cookie unregistration
-	(15) Index invalidation
-	(16) Data file invalidation
-	(17) FS-Cache specific page flags.
+	(13) Cookie enablement
+	(14) Miscellaneous cookie operations
+	(15) Cookie unregistration
+	(16) Index invalidation
+	(17) Data file invalidation
+	(18) FS-Cache specific page flags.
 
 
 =============================
@@ -334,7 +335,8 @@ the path to the file:
 	struct fscache_cookie *
 	fscache_acquire_cookie(struct fscache_cookie *parent,
 			       const struct fscache_object_def *def,
-			       void *netfs_data);
+			       void *netfs_data,
+			       bool enable);
 
 This function creates an index entry in the index represented by parent,
 filling in the index entry by calling the operations pointed to by def.
@@ -350,6 +352,10 @@ object needs to be created somewhere down the hierarchy.  Furthermore, an index
 may be created in several different caches independently at different times.
 This is all handled transparently, and the netfs doesn't see any of it.
 
+A cookie will be created in the disabled state if enabled is false.  A cookie
+must be enabled to do anything with it.  A disabled cookie can be enabled by
+calling fscache_enable_cookie() (see below).
+
 For example, with AFS, a cell would be added to the primary index.  This index
 entry would have a dependent inode containing a volume location index for the
 volume mappings within this cell:
@@ -357,7 +363,7 @@ volume mappings within this cell:
 	cell->cache =
 		fscache_acquire_cookie(afs_cache_netfs.primary_index,
 				       &afs_cell_cache_index_def,
-				       cell);
+				       cell, true);
 
 Then when a volume location was accessed, it would be entered into the cell's
 index and an inode would be allocated that acts as a volume type and hash chain
@@ -366,7 +372,7 @@ combination:
 	vlocation->cache =
 		fscache_acquire_cookie(cell->cache,
 				       &afs_vlocation_cache_index_def,
-				       vlocation);
+				       vlocation, true);
 
 And then a particular flavour of volume (R/O for example) could be added to
 that index, creating another index for vnodes (AFS inode equivalents):
@@ -374,7 +380,7 @@ that index, creating another index for vnodes (AFS inode equivalents):
 	volume->cache =
 		fscache_acquire_cookie(vlocation->cache,
 				       &afs_volume_cache_index_def,
-				       volume);
+				       volume, true);
 
 
 ======================
@@ -388,7 +394,7 @@ the object definition should be something other than index type.
 	vnode->cache =
 		fscache_acquire_cookie(volume->cache,
 				       &afs_vnode_cache_object_def,
-				       vnode);
+				       vnode, true);
 
 
 =================================
@@ -404,7 +410,7 @@ it would be some other type of object such as a data file.
 	xattr->cache =
 		fscache_acquire_cookie(vnode->cache,
 				       &afs_xattr_cache_object_def,
-				       xattr);
+				       xattr, true);
 
 Miscellaneous objects might be used to store extended attributes or directory
 entries for example.
@@ -733,6 +739,47 @@ Note that partial updates may happen automatically at other times, such as when
 data blocks are added to a data file object.
 
 
+=================
+COOKIE ENABLEMENT
+=================
+
+Cookies exist in one of two states: enabled and disabled.  If a cookie is
+disabled, it ignores all attempts to acquire child cookies; check, update or
+invalidate its state; allocate, read or write backing pages - though it is
+still possible to uncache pages and relinquish the cookie.
+
+The initial enablement state is set by fscache_acquire_cookie(), but the cookie
+can be enabled or disabled later.  To disable a cookie, call:
+    
+	void fscache_disable_cookie(struct fscache_cookie *cookie,
+    				    bool invalidate);
+    
+If the cookie is not already disabled, this locks the cookie against other
+enable and disable ops, marks the cookie as being disabled, discards or
+invalidates any backing objects and waits for cessation of activity on any
+associated object before unlocking the cookie.
+
+All possible failures are handled internally.  The caller should consider
+calling fscache_uncache_all_inode_pages() afterwards to make sure all page
+markings are cleared up.
+    
+Cookies can be enabled or reenabled with:
+    
+    	void fscache_enable_cookie(struct fscache_cookie *cookie,
+    				   bool (*can_enable)(void *data),
+    				   void *data)
+    
+If the cookie is not already enabled, this locks the cookie against other
+enable and disable ops, invokes can_enable() and, if the cookie is not an index
+cookie, will begin the procedure of acquiring backing objects.
+
+The optional can_enable() function is passed the data argument and returns a
+ruling as to whether or not enablement should actually be permitted to begin.
+
+All possible failures are handled internally.  The cookie will only be marked
+as enabled if provisional backing objects are allocated.
+
+
 ===============================
 MISCELLANEOUS COOKIE OPERATIONS
 ===============================
@@ -778,7 +825,7 @@ COOKIE UNREGISTRATION
 To get rid of a cookie, this function should be called.
 
 	void fscache_relinquish_cookie(struct fscache_cookie *cookie,
-				       int retire);
+				       bool retire);
 
 If retire is non-zero, then the object will be marked for recycling, and all
 copies of it will be removed from all active caches in which it is present.
diff --git a/fs/9p/cache.c b/fs/9p/cache.c
index a9ea73d6dcf3..2b7a032c37bc 100644
--- a/fs/9p/cache.c
+++ b/fs/9p/cache.c
@@ -90,7 +90,7 @@ void v9fs_cache_session_get_cookie(struct v9fs_session_info *v9ses)
 
 	v9ses->fscache = fscache_acquire_cookie(v9fs_cache_netfs.primary_index,
 						&v9fs_cache_session_index_def,
-						v9ses);
+						v9ses, true);
 	p9_debug(P9_DEBUG_FSC, "session %p get cookie %p\n",
 		 v9ses, v9ses->fscache);
 }
@@ -204,7 +204,7 @@ void v9fs_cache_inode_get_cookie(struct inode *inode)
 	v9ses = v9fs_inode2v9ses(inode);
 	v9inode->fscache = fscache_acquire_cookie(v9ses->fscache,
 						  &v9fs_cache_inode_index_def,
-						  v9inode);
+						  v9inode, true);
 
 	p9_debug(P9_DEBUG_FSC, "inode %p get cookie %p\n",
 		 inode, v9inode->fscache);
@@ -271,7 +271,7 @@ void v9fs_cache_inode_reset_cookie(struct inode *inode)
 	v9ses = v9fs_inode2v9ses(inode);
 	v9inode->fscache = fscache_acquire_cookie(v9ses->fscache,
 						  &v9fs_cache_inode_index_def,
-						  v9inode);
+						  v9inode, true);
 	p9_debug(P9_DEBUG_FSC, "inode %p revalidating cookie old %p new %p\n",
 		 inode, old, v9inode->fscache);
 
diff --git a/fs/afs/cell.c b/fs/afs/cell.c
index 3c090b7555ea..ca0a3cf93791 100644
--- a/fs/afs/cell.c
+++ b/fs/afs/cell.c
@@ -179,7 +179,7 @@ struct afs_cell *afs_cell_create(const char *name, unsigned namesz,
 	/* put it up for caching (this never returns an error) */
 	cell->cache = fscache_acquire_cookie(afs_cache_netfs.primary_index,
 					     &afs_cell_cache_index_def,
-					     cell);
+					     cell, true);
 #endif
 
 	/* add to the cell lists */
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 789bc253b5f6..ce25d755b7aa 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -259,7 +259,7 @@ struct inode *afs_iget(struct super_block *sb, struct key *key,
 #ifdef CONFIG_AFS_FSCACHE
 	vnode->cache = fscache_acquire_cookie(vnode->volume->cache,
 					      &afs_vnode_cache_index_def,
-					      vnode);
+					      vnode, true);
 #endif
 
 	ret = afs_inode_map_status(vnode, key);
diff --git a/fs/afs/vlocation.c b/fs/afs/vlocation.c
index 57bcb1596530..b6df2e83809f 100644
--- a/fs/afs/vlocation.c
+++ b/fs/afs/vlocation.c
@@ -308,7 +308,8 @@ static int afs_vlocation_fill_in_record(struct afs_vlocation *vl,
 	/* see if we have an in-cache copy (will set vl->valid if there is) */
 #ifdef CONFIG_AFS_FSCACHE
 	vl->cache = fscache_acquire_cookie(vl->cell->cache,
-					   &afs_vlocation_cache_index_def, vl);
+					   &afs_vlocation_cache_index_def, vl,
+					   true);
 #endif
 
 	if (vl->valid) {
diff --git a/fs/afs/volume.c b/fs/afs/volume.c
index 401eeb21869f..2b607257820c 100644
--- a/fs/afs/volume.c
+++ b/fs/afs/volume.c
@@ -131,7 +131,7 @@ struct afs_volume *afs_volume_lookup(struct afs_mount_params *params)
 #ifdef CONFIG_AFS_FSCACHE
 	volume->cache = fscache_acquire_cookie(vlocation->cache,
 					       &afs_volume_cache_index_def,
-					       volume);
+					       volume, true);
 #endif
 	afs_get_vlocation(vlocation);
 	volume->vlocation = vlocation;
diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c
index 43eb5592cdea..00baf1419989 100644
--- a/fs/cachefiles/interface.c
+++ b/fs/cachefiles/interface.c
@@ -270,7 +270,7 @@ static void cachefiles_drop_object(struct fscache_object *_object)
 #endif
 
 	/* delete retired objects */
-	if (test_bit(FSCACHE_COOKIE_RETIRED, &object->fscache.cookie->flags) &&
+	if (test_bit(FSCACHE_OBJECT_RETIRED, &object->fscache.flags) &&
 	    _object != cache->cache.fsdef
 	    ) {
 		_debug("- retire object OBJ%x", object->fscache.debug_id);
diff --git a/fs/ceph/cache.c b/fs/ceph/cache.c
index 6bfe65e0b038..7db2e6ca4b8f 100644
--- a/fs/ceph/cache.c
+++ b/fs/ceph/cache.c
@@ -68,7 +68,7 @@ int ceph_fscache_register_fs(struct ceph_fs_client* fsc)
 {
 	fsc->fscache = fscache_acquire_cookie(ceph_cache_netfs.primary_index,
 					      &ceph_fscache_fsid_object_def,
-					      fsc);
+					      fsc, true);
 
 	if (fsc->fscache == NULL) {
 		pr_err("Unable to resgister fsid: %p fscache cookie", fsc);
@@ -204,7 +204,7 @@ void ceph_fscache_register_inode_cookie(struct ceph_fs_client* fsc,
 
 	ci->fscache = fscache_acquire_cookie(fsc->fscache,
 					     &ceph_fscache_inode_object_def,
-					     ci);
+					     ci, true);
 done:
 	mutex_unlock(&inode->i_mutex);
 
diff --git a/fs/cifs/fscache.c b/fs/cifs/fscache.c
index 2f4bc5a58054..fe2492d2a8fc 100644
--- a/fs/cifs/fscache.c
+++ b/fs/cifs/fscache.c
@@ -27,7 +27,7 @@ void cifs_fscache_get_client_cookie(struct TCP_Server_Info *server)
 {
 	server->fscache =
 		fscache_acquire_cookie(cifs_fscache_netfs.primary_index,
-				&cifs_fscache_server_index_def, server);
+				&cifs_fscache_server_index_def, server, true);
 	cifs_dbg(FYI, "%s: (0x%p/0x%p)\n",
 		 __func__, server, server->fscache);
 }
@@ -46,7 +46,7 @@ void cifs_fscache_get_super_cookie(struct cifs_tcon *tcon)
 
 	tcon->fscache =
 		fscache_acquire_cookie(server->fscache,
-				&cifs_fscache_super_index_def, tcon);
+				&cifs_fscache_super_index_def, tcon, true);
 	cifs_dbg(FYI, "%s: (0x%p/0x%p)\n",
 		 __func__, server->fscache, tcon->fscache);
 }
@@ -69,7 +69,7 @@ static void cifs_fscache_enable_inode_cookie(struct inode *inode)
 
 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_FSCACHE) {
 		cifsi->fscache = fscache_acquire_cookie(tcon->fscache,
-				&cifs_fscache_inode_object_def, cifsi);
+				&cifs_fscache_inode_object_def, cifsi, true);
 		cifs_dbg(FYI, "%s: got FH cookie (0x%p/0x%p)\n",
 			 __func__, tcon->fscache, cifsi->fscache);
 	}
@@ -119,7 +119,7 @@ void cifs_fscache_reset_inode_cookie(struct inode *inode)
 		cifsi->fscache = fscache_acquire_cookie(
 					cifs_sb_master_tcon(cifs_sb)->fscache,
 					&cifs_fscache_inode_object_def,
-					cifsi);
+					cifsi, true);
 		cifs_dbg(FYI, "%s: new cookie 0x%p oldcookie 0x%p\n",
 			 __func__, cifsi->fscache, old);
 	}
diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c
index d851aa555d28..29d7feb62cf7 100644
--- a/fs/fscache/cookie.c
+++ b/fs/fscache/cookie.c
@@ -58,15 +58,16 @@ void fscache_cookie_init_once(void *_cookie)
 struct fscache_cookie *__fscache_acquire_cookie(
 	struct fscache_cookie *parent,
 	const struct fscache_cookie_def *def,
-	void *netfs_data)
+	void *netfs_data,
+	bool enable)
 {
 	struct fscache_cookie *cookie;
 
 	BUG_ON(!def);
 
-	_enter("{%s},{%s},%p",
+	_enter("{%s},{%s},%p,%u",
 	       parent ? (char *) parent->def->name : "<no-parent>",
-	       def->name, netfs_data);
+	       def->name, netfs_data, enable);
 
 	fscache_stat(&fscache_n_acquires);
 
@@ -106,7 +107,7 @@ struct fscache_cookie *__fscache_acquire_cookie(
 	cookie->def		= def;
 	cookie->parent		= parent;
 	cookie->netfs_data	= netfs_data;
-	cookie->flags		= 0;
+	cookie->flags		= (1 << FSCACHE_COOKIE_NO_DATA_YET);
 
 	/* radix tree insertion won't use the preallocation pool unless it's
 	 * told it may not wait */
@@ -124,16 +125,22 @@ struct fscache_cookie *__fscache_acquire_cookie(
 		break;
 	}
 
-	/* if the object is an index then we need do nothing more here - we
-	 * create indices on disk when we need them as an index may exist in
-	 * multiple caches */
-	if (cookie->def->type != FSCACHE_COOKIE_TYPE_INDEX) {
-		if (fscache_acquire_non_index_cookie(cookie) < 0) {
-			atomic_dec(&parent->n_children);
-			__fscache_cookie_put(cookie);
-			fscache_stat(&fscache_n_acquires_nobufs);
-			_leave(" = NULL");
-			return NULL;
+	if (enable) {
+		/* if the object is an index then we need do nothing more here
+		 * - we create indices on disk when we need them as an index
+		 * may exist in multiple caches */
+		if (cookie->def->type != FSCACHE_COOKIE_TYPE_INDEX) {
+			if (fscache_acquire_non_index_cookie(cookie) == 0) {
+				set_bit(FSCACHE_COOKIE_ENABLED, &cookie->flags);
+			} else {
+				atomic_dec(&parent->n_children);
+				__fscache_cookie_put(cookie);
+				fscache_stat(&fscache_n_acquires_nobufs);
+				_leave(" = NULL");
+				return NULL;
+			}
+		} else {
+			set_bit(FSCACHE_COOKIE_ENABLED, &cookie->flags);
 		}
 	}
 
@@ -143,6 +150,39 @@ struct fscache_cookie *__fscache_acquire_cookie(
 }
 EXPORT_SYMBOL(__fscache_acquire_cookie);
 
+/*
+ * Enable a cookie to permit it to accept new operations.
+ */
+void __fscache_enable_cookie(struct fscache_cookie *cookie,
+			     bool (*can_enable)(void *data),
+			     void *data)
+{
+	_enter("%p", cookie);
+
+	wait_on_bit_lock(&cookie->flags, FSCACHE_COOKIE_ENABLEMENT_LOCK,
+			 fscache_wait_bit, TASK_UNINTERRUPTIBLE);
+
+	if (test_bit(FSCACHE_COOKIE_ENABLED, &cookie->flags))
+		goto out_unlock;
+
+	if (can_enable && !can_enable(data)) {
+		/* The netfs decided it didn't want to enable after all */
+	} else if (cookie->def->type != FSCACHE_COOKIE_TYPE_INDEX) {
+		/* Wait for outstanding disablement to complete */
+		__fscache_wait_on_invalidate(cookie);
+
+		if (fscache_acquire_non_index_cookie(cookie) == 0)
+			set_bit(FSCACHE_COOKIE_ENABLED, &cookie->flags);
+	} else {
+		set_bit(FSCACHE_COOKIE_ENABLED, &cookie->flags);
+	}
+
+out_unlock:
+	clear_bit_unlock(FSCACHE_COOKIE_ENABLEMENT_LOCK, &cookie->flags);
+	wake_up_bit(&cookie->flags, FSCACHE_COOKIE_ENABLEMENT_LOCK);
+}
+EXPORT_SYMBOL(__fscache_enable_cookie);
+
 /*
  * acquire a non-index cookie
  * - this must make sure the index chain is instantiated and instantiate the
@@ -157,7 +197,7 @@ static int fscache_acquire_non_index_cookie(struct fscache_cookie *cookie)
 
 	_enter("");
 
-	cookie->flags = 1 << FSCACHE_COOKIE_UNAVAILABLE;
+	set_bit(FSCACHE_COOKIE_UNAVAILABLE, &cookie->flags);
 
 	/* now we need to see whether the backing objects for this cookie yet
 	 * exist, if not there'll be nothing to search */
@@ -180,9 +220,7 @@ static int fscache_acquire_non_index_cookie(struct fscache_cookie *cookie)
 
 	_debug("cache %s", cache->tag->name);
 
-	cookie->flags =
-		(1 << FSCACHE_COOKIE_LOOKING_UP) |
-		(1 << FSCACHE_COOKIE_NO_DATA_YET);
+	set_bit(FSCACHE_COOKIE_LOOKING_UP, &cookie->flags);
 
 	/* ask the cache to allocate objects for this cookie and its parent
 	 * chain */
@@ -398,7 +436,8 @@ void __fscache_invalidate(struct fscache_cookie *cookie)
 	if (!hlist_empty(&cookie->backing_objects)) {
 		spin_lock(&cookie->lock);
 
-		if (!hlist_empty(&cookie->backing_objects) &&
+		if (fscache_cookie_enabled(cookie) &&
+		    !hlist_empty(&cookie->backing_objects) &&
 		    !test_and_set_bit(FSCACHE_COOKIE_INVALIDATING,
 				      &cookie->flags)) {
 			object = hlist_entry(cookie->backing_objects.first,
@@ -452,10 +491,14 @@ void __fscache_update_cookie(struct fscache_cookie *cookie)
 
 	spin_lock(&cookie->lock);
 
-	/* update the index entry on disk in each cache backing this cookie */
-	hlist_for_each_entry(object,
-			     &cookie->backing_objects, cookie_link) {
-		fscache_raise_event(object, FSCACHE_OBJECT_EV_UPDATE);
+	if (fscache_cookie_enabled(cookie)) {
+		/* update the index entry on disk in each cache backing this
+		 * cookie.
+		 */
+		hlist_for_each_entry(object,
+				     &cookie->backing_objects, cookie_link) {
+			fscache_raise_event(object, FSCACHE_OBJECT_EV_UPDATE);
+		}
 	}
 
 	spin_unlock(&cookie->lock);
@@ -464,28 +507,14 @@ void __fscache_update_cookie(struct fscache_cookie *cookie)
 EXPORT_SYMBOL(__fscache_update_cookie);
 
 /*
- * release a cookie back to the cache
- * - the object will be marked as recyclable on disk if retire is true
- * - all dependents of this cookie must have already been unregistered
- *   (indices/files/pages)
+ * Disable a cookie to stop it from accepting new requests from the netfs.
  */
-void __fscache_relinquish_cookie(struct fscache_cookie *cookie, int retire)
+void __fscache_disable_cookie(struct fscache_cookie *cookie, bool invalidate)
 {
 	struct fscache_object *object;
+	bool awaken = false;
 
-	fscache_stat(&fscache_n_relinquishes);
-	if (retire)
-		fscache_stat(&fscache_n_relinquishes_retire);
-
-	if (!cookie) {
-		fscache_stat(&fscache_n_relinquishes_null);
-		_leave(" [no cookie]");
-		return;
-	}
-
-	_enter("%p{%s,%p,%d},%d",
-	       cookie, cookie->def->name, cookie->netfs_data,
-	       atomic_read(&cookie->n_active), retire);
+	_enter("%p,%u", cookie, invalidate);
 
 	ASSERTCMP(atomic_read(&cookie->n_active), >, 0);
 
@@ -495,24 +524,82 @@ void __fscache_relinquish_cookie(struct fscache_cookie *cookie, int retire)
 		BUG();
 	}
 
-	/* No further netfs-accessing operations on this cookie permitted */
-	set_bit(FSCACHE_COOKIE_RELINQUISHED, &cookie->flags);
-	if (retire)
-		set_bit(FSCACHE_COOKIE_RETIRED, &cookie->flags);
+	wait_on_bit_lock(&cookie->flags, FSCACHE_COOKIE_ENABLEMENT_LOCK,
+			 fscache_wait_bit, TASK_UNINTERRUPTIBLE);
+	if (!test_and_clear_bit(FSCACHE_COOKIE_ENABLED, &cookie->flags))
+		goto out_unlock_enable;
+
+	/* If the cookie is being invalidated, wait for that to complete first
+	 * so that we can reuse the flag.
+	 */
+	__fscache_wait_on_invalidate(cookie);
+
+	/* Dispose of the backing objects */
+	set_bit(FSCACHE_COOKIE_INVALIDATING, &cookie->flags);
 
 	spin_lock(&cookie->lock);
-	hlist_for_each_entry(object, &cookie->backing_objects, cookie_link) {
-		fscache_raise_event(object, FSCACHE_OBJECT_EV_KILL);
+	if (!hlist_empty(&cookie->backing_objects)) {
+		hlist_for_each_entry(object, &cookie->backing_objects, cookie_link) {
+			if (invalidate)
+				set_bit(FSCACHE_OBJECT_RETIRED, &object->flags);
+			fscache_raise_event(object, FSCACHE_OBJECT_EV_KILL);
+		}
+	} else {
+		if (test_and_clear_bit(FSCACHE_COOKIE_INVALIDATING, &cookie->flags))
+			awaken = true;
 	}
 	spin_unlock(&cookie->lock);
+	if (awaken)
+		wake_up_bit(&cookie->flags, FSCACHE_COOKIE_INVALIDATING);
 
 	/* Wait for cessation of activity requiring access to the netfs (when
-	 * n_active reaches 0).
+	 * n_active reaches 0).  This makes sure outstanding reads and writes
+	 * have completed.
 	 */
 	if (!atomic_dec_and_test(&cookie->n_active))
 		wait_on_atomic_t(&cookie->n_active, fscache_wait_atomic_t,
 				 TASK_UNINTERRUPTIBLE);
 
+	/* Reset the cookie state if it wasn't relinquished */
+	if (!test_bit(FSCACHE_COOKIE_RELINQUISHED, &cookie->flags)) {
+		atomic_inc(&cookie->n_active);
+		set_bit(FSCACHE_COOKIE_NO_DATA_YET, &cookie->flags);
+	}
+
+out_unlock_enable:
+	clear_bit_unlock(FSCACHE_COOKIE_ENABLEMENT_LOCK, &cookie->flags);
+	wake_up_bit(&cookie->flags, FSCACHE_COOKIE_ENABLEMENT_LOCK);
+	_leave("");
+}
+EXPORT_SYMBOL(__fscache_disable_cookie);
+
+/*
+ * release a cookie back to the cache
+ * - the object will be marked as recyclable on disk if retire is true
+ * - all dependents of this cookie must have already been unregistered
+ *   (indices/files/pages)
+ */
+void __fscache_relinquish_cookie(struct fscache_cookie *cookie, bool retire)
+{
+	fscache_stat(&fscache_n_relinquishes);
+	if (retire)
+		fscache_stat(&fscache_n_relinquishes_retire);
+
+	if (!cookie) {
+		fscache_stat(&fscache_n_relinquishes_null);
+		_leave(" [no cookie]");
+		return;
+	}
+
+	_enter("%p{%s,%p,%d},%d",
+	       cookie, cookie->def->name, cookie->netfs_data,
+	       atomic_read(&cookie->n_active), retire);
+
+	/* No further netfs-accessing operations on this cookie permitted */
+	set_bit(FSCACHE_COOKIE_RELINQUISHED, &cookie->flags);
+
+	__fscache_disable_cookie(cookie, retire);
+
 	/* Clear pointers back to the netfs */
 	cookie->netfs_data	= NULL;
 	cookie->def		= NULL;
@@ -592,7 +679,8 @@ int __fscache_check_consistency(struct fscache_cookie *cookie)
 
 	spin_lock(&cookie->lock);
 
-	if (hlist_empty(&cookie->backing_objects))
+	if (!fscache_cookie_enabled(cookie) ||
+	    hlist_empty(&cookie->backing_objects))
 		goto inconsistent;
 	object = hlist_entry(cookie->backing_objects.first,
 			     struct fscache_object, cookie_link);
diff --git a/fs/fscache/fsdef.c b/fs/fscache/fsdef.c
index 10a2ade0bdf8..5a117df2a9ef 100644
--- a/fs/fscache/fsdef.c
+++ b/fs/fscache/fsdef.c
@@ -59,6 +59,7 @@ struct fscache_cookie fscache_fsdef_index = {
 	.lock		= __SPIN_LOCK_UNLOCKED(fscache_fsdef_index.lock),
 	.backing_objects = HLIST_HEAD_INIT,
 	.def		= &fscache_fsdef_index_def,
+	.flags		= 1 << FSCACHE_COOKIE_ENABLED,
 };
 EXPORT_SYMBOL(fscache_fsdef_index);
 
diff --git a/fs/fscache/netfs.c b/fs/fscache/netfs.c
index b1bb6117473a..989f39401547 100644
--- a/fs/fscache/netfs.c
+++ b/fs/fscache/netfs.c
@@ -45,6 +45,7 @@ int __fscache_register_netfs(struct fscache_netfs *netfs)
 	netfs->primary_index->def		= &fscache_fsdef_netfs_def;
 	netfs->primary_index->parent		= &fscache_fsdef_index;
 	netfs->primary_index->netfs_data	= netfs;
+	netfs->primary_index->flags		= 1 << FSCACHE_COOKIE_ENABLED;
 
 	atomic_inc(&netfs->primary_index->parent->usage);
 	atomic_inc(&netfs->primary_index->parent->n_children);
diff --git a/fs/fscache/object.c b/fs/fscache/object.c
index 86d75a60b20c..dcb821617774 100644
--- a/fs/fscache/object.c
+++ b/fs/fscache/object.c
@@ -495,6 +495,7 @@ void fscache_object_lookup_negative(struct fscache_object *object)
 		 * returning ENODATA.
 		 */
 		set_bit(FSCACHE_COOKIE_NO_DATA_YET, &cookie->flags);
+		clear_bit(FSCACHE_COOKIE_UNAVAILABLE, &cookie->flags);
 
 		_debug("wake up lookup %p", &cookie->flags);
 		clear_bit_unlock(FSCACHE_COOKIE_LOOKING_UP, &cookie->flags);
@@ -527,6 +528,7 @@ void fscache_obtained_object(struct fscache_object *object)
 
 		/* We do (presumably) have data */
 		clear_bit_unlock(FSCACHE_COOKIE_NO_DATA_YET, &cookie->flags);
+		clear_bit(FSCACHE_COOKIE_UNAVAILABLE, &cookie->flags);
 
 		/* Allow write requests to begin stacking up and read requests
 		 * to begin shovelling data.
@@ -679,7 +681,8 @@ static const struct fscache_state *fscache_drop_object(struct fscache_object *ob
 	 */
 	spin_lock(&cookie->lock);
 	hlist_del_init(&object->cookie_link);
-	if (test_and_clear_bit(FSCACHE_COOKIE_INVALIDATING, &cookie->flags))
+	if (hlist_empty(&cookie->backing_objects) &&
+	    test_and_clear_bit(FSCACHE_COOKIE_INVALIDATING, &cookie->flags))
 		awaken = true;
 	spin_unlock(&cookie->lock);
 
@@ -927,7 +930,7 @@ static const struct fscache_state *_fscache_invalidate_object(struct fscache_obj
 	 */
 	if (!fscache_use_cookie(object)) {
 		ASSERT(object->cookie->stores.rnode == NULL);
-		set_bit(FSCACHE_COOKIE_RETIRED, &cookie->flags);
+		set_bit(FSCACHE_OBJECT_RETIRED, &object->flags);
 		_leave(" [no cookie]");
 		return transit_to(KILL_OBJECT);
 	}
diff --git a/fs/fscache/page.c b/fs/fscache/page.c
index 0fe42a6d0e9c..7f5c658af755 100644
--- a/fs/fscache/page.c
+++ b/fs/fscache/page.c
@@ -204,7 +204,8 @@ int __fscache_attr_changed(struct fscache_cookie *cookie)
 
 	spin_lock(&cookie->lock);
 
-	if (hlist_empty(&cookie->backing_objects))
+	if (!fscache_cookie_enabled(cookie) ||
+	    hlist_empty(&cookie->backing_objects))
 		goto nobufs;
 	object = hlist_entry(cookie->backing_objects.first,
 			     struct fscache_object, cookie_link);
@@ -410,7 +411,7 @@ int __fscache_read_or_alloc_page(struct fscache_cookie *cookie,
 		return -ERESTARTSYS;
 
 	op = fscache_alloc_retrieval(cookie, page->mapping,
-				     end_io_func,context);
+				     end_io_func, context);
 	if (!op) {
 		_leave(" = -ENOMEM");
 		return -ENOMEM;
@@ -419,7 +420,8 @@ int __fscache_read_or_alloc_page(struct fscache_cookie *cookie,
 
 	spin_lock(&cookie->lock);
 
-	if (hlist_empty(&cookie->backing_objects))
+	if (!fscache_cookie_enabled(cookie) ||
+	    hlist_empty(&cookie->backing_objects))
 		goto nobufs_unlock;
 	object = hlist_entry(cookie->backing_objects.first,
 			     struct fscache_object, cookie_link);
@@ -551,7 +553,8 @@ int __fscache_read_or_alloc_pages(struct fscache_cookie *cookie,
 
 	spin_lock(&cookie->lock);
 
-	if (hlist_empty(&cookie->backing_objects))
+	if (!fscache_cookie_enabled(cookie) ||
+	    hlist_empty(&cookie->backing_objects))
 		goto nobufs_unlock;
 	object = hlist_entry(cookie->backing_objects.first,
 			     struct fscache_object, cookie_link);
@@ -666,7 +669,8 @@ int __fscache_alloc_page(struct fscache_cookie *cookie,
 
 	spin_lock(&cookie->lock);
 
-	if (hlist_empty(&cookie->backing_objects))
+	if (!fscache_cookie_enabled(cookie) ||
+	    hlist_empty(&cookie->backing_objects))
 		goto nobufs_unlock;
 	object = hlist_entry(cookie->backing_objects.first,
 			     struct fscache_object, cookie_link);
@@ -938,7 +942,8 @@ int __fscache_write_page(struct fscache_cookie *cookie,
 	ret = -ENOBUFS;
 	spin_lock(&cookie->lock);
 
-	if (hlist_empty(&cookie->backing_objects))
+	if (!fscache_cookie_enabled(cookie) ||
+	    hlist_empty(&cookie->backing_objects))
 		goto nobufs;
 	object = hlist_entry(cookie->backing_objects.first,
 			     struct fscache_object, cookie_link);
diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c
index 24d1d1c5fcaf..cd6e7efd4305 100644
--- a/fs/nfs/fscache.c
+++ b/fs/nfs/fscache.c
@@ -39,7 +39,7 @@ void nfs_fscache_get_client_cookie(struct nfs_client *clp)
 	/* create a cache index for looking up filehandles */
 	clp->fscache = fscache_acquire_cookie(nfs_fscache_netfs.primary_index,
 					      &nfs_fscache_server_index_def,
-					      clp);
+					      clp, true);
 	dfprintk(FSCACHE, "NFS: get client cookie (0x%p/0x%p)\n",
 		 clp, clp->fscache);
 }
@@ -139,7 +139,7 @@ void nfs_fscache_get_super_cookie(struct super_block *sb, const char *uniq, int
 	/* create a cache index for looking up filehandles */
 	nfss->fscache = fscache_acquire_cookie(nfss->nfs_client->fscache,
 					       &nfs_fscache_super_index_def,
-					       nfss);
+					       nfss, true);
 	dfprintk(FSCACHE, "NFS: get superblock cookie (0x%p/0x%p)\n",
 		 nfss, nfss->fscache);
 	return;
@@ -200,7 +200,7 @@ static void nfs_fscache_enable_inode_cookie(struct inode *inode)
 		nfsi->fscache = fscache_acquire_cookie(
 			NFS_SB(sb)->fscache,
 			&nfs_fscache_inode_object_def,
-			nfsi);
+			nfsi, true);
 
 		dfprintk(FSCACHE, "NFS: get FH cookie (0x%p/0x%p/0x%p)\n",
 			 sb, nfsi, nfsi->fscache);
@@ -327,7 +327,7 @@ void nfs_fscache_reset_inode_cookie(struct inode *inode)
 		nfsi->fscache = fscache_acquire_cookie(
 			nfss->nfs_client->fscache,
 			&nfs_fscache_inode_object_def,
-			nfsi);
+			nfsi, true);
 
 		dfprintk(FSCACHE,
 			 "NFS: revalidation new cookie (0x%p/0x%p/0x%p/0x%p)\n",
diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h
index 96a2b66f5968..771484993ca7 100644
--- a/include/linux/fscache-cache.h
+++ b/include/linux/fscache-cache.h
@@ -308,36 +308,6 @@ struct fscache_cache_ops {
 	void (*dissociate_pages)(struct fscache_cache *cache);
 };
 
-/*
- * data file or index object cookie
- * - a file will only appear in one cache
- * - a request to cache a file may or may not be honoured, subject to
- *   constraints such as disk space
- * - indices are created on disk just-in-time
- */
-struct fscache_cookie {
-	atomic_t			usage;		/* number of users of this cookie */
-	atomic_t			n_children;	/* number of children of this cookie */
-	atomic_t			n_active;	/* number of active users of netfs ptrs */
-	spinlock_t			lock;
-	spinlock_t			stores_lock;	/* lock on page store tree */
-	struct hlist_head		backing_objects; /* object(s) backing this file/index */
-	const struct fscache_cookie_def	*def;		/* definition */
-	struct fscache_cookie		*parent;	/* parent of this entry */
-	void				*netfs_data;	/* back pointer to netfs */
-	struct radix_tree_root		stores;		/* pages to be stored on this cookie */
-#define FSCACHE_COOKIE_PENDING_TAG	0		/* pages tag: pending write to cache */
-#define FSCACHE_COOKIE_STORING_TAG	1		/* pages tag: writing to cache */
-
-	unsigned long			flags;
-#define FSCACHE_COOKIE_LOOKING_UP	0	/* T if non-index cookie being looked up still */
-#define FSCACHE_COOKIE_NO_DATA_YET	1	/* T if new object with no cached data yet */
-#define FSCACHE_COOKIE_UNAVAILABLE	2	/* T if cookie is unavailable (error, etc) */
-#define FSCACHE_COOKIE_INVALIDATING	3	/* T if cookie is being invalidated */
-#define FSCACHE_COOKIE_RELINQUISHED	4	/* T if cookie has been relinquished */
-#define FSCACHE_COOKIE_RETIRED		5	/* T if cookie was retired */
-};
-
 extern struct fscache_cookie fscache_fsdef_index;
 
 /*
@@ -400,6 +370,7 @@ struct fscache_object {
 #define FSCACHE_OBJECT_IS_LIVE		3	/* T if object is not withdrawn or relinquished */
 #define FSCACHE_OBJECT_IS_LOOKED_UP	4	/* T if object has been looked up */
 #define FSCACHE_OBJECT_IS_AVAILABLE	5	/* T if object has become active */
+#define FSCACHE_OBJECT_RETIRED		6	/* T if object was retired on relinquishment */
 
 	struct list_head	cache_link;	/* link in cache->object_list */
 	struct hlist_node	cookie_link;	/* link in cookie->backing_objects */
diff --git a/include/linux/fscache.h b/include/linux/fscache.h
index 19b46458e4e8..115bb81912cc 100644
--- a/include/linux/fscache.h
+++ b/include/linux/fscache.h
@@ -166,6 +166,42 @@ struct fscache_netfs {
 	struct list_head		link;		/* internal link */
 };
 
+/*
+ * data file or index object cookie
+ * - a file will only appear in one cache
+ * - a request to cache a file may or may not be honoured, subject to
+ *   constraints such as disk space
+ * - indices are created on disk just-in-time
+ */
+struct fscache_cookie {
+	atomic_t			usage;		/* number of users of this cookie */
+	atomic_t			n_children;	/* number of children of this cookie */
+	atomic_t			n_active;	/* number of active users of netfs ptrs */
+	spinlock_t			lock;
+	spinlock_t			stores_lock;	/* lock on page store tree */
+	struct hlist_head		backing_objects; /* object(s) backing this file/index */
+	const struct fscache_cookie_def	*def;		/* definition */
+	struct fscache_cookie		*parent;	/* parent of this entry */
+	void				*netfs_data;	/* back pointer to netfs */
+	struct radix_tree_root		stores;		/* pages to be stored on this cookie */
+#define FSCACHE_COOKIE_PENDING_TAG	0		/* pages tag: pending write to cache */
+#define FSCACHE_COOKIE_STORING_TAG	1		/* pages tag: writing to cache */
+
+	unsigned long			flags;
+#define FSCACHE_COOKIE_LOOKING_UP	0	/* T if non-index cookie being looked up still */
+#define FSCACHE_COOKIE_NO_DATA_YET	1	/* T if new object with no cached data yet */
+#define FSCACHE_COOKIE_UNAVAILABLE	2	/* T if cookie is unavailable (error, etc) */
+#define FSCACHE_COOKIE_INVALIDATING	3	/* T if cookie is being invalidated */
+#define FSCACHE_COOKIE_RELINQUISHED	4	/* T if cookie has been relinquished */
+#define FSCACHE_COOKIE_ENABLED		5	/* T if cookie is enabled */
+#define FSCACHE_COOKIE_ENABLEMENT_LOCK	6	/* T if cookie is being en/disabled */
+};
+
+static inline bool fscache_cookie_enabled(struct fscache_cookie *cookie)
+{
+	return test_bit(FSCACHE_COOKIE_ENABLED, &cookie->flags);
+}
+
 /*
  * slow-path functions for when there is actually caching available, and the
  * netfs does actually have a valid token
@@ -181,8 +217,8 @@ extern void __fscache_release_cache_tag(struct fscache_cache_tag *);
 extern struct fscache_cookie *__fscache_acquire_cookie(
 	struct fscache_cookie *,
 	const struct fscache_cookie_def *,
-	void *);
-extern void __fscache_relinquish_cookie(struct fscache_cookie *, int);
+	void *, bool);
+extern void __fscache_relinquish_cookie(struct fscache_cookie *, bool);
 extern int __fscache_check_consistency(struct fscache_cookie *);
 extern void __fscache_update_cookie(struct fscache_cookie *);
 extern int __fscache_attr_changed(struct fscache_cookie *);
@@ -211,6 +247,9 @@ extern void __fscache_uncache_all_inode_pages(struct fscache_cookie *,
 					      struct inode *);
 extern void __fscache_readpages_cancel(struct fscache_cookie *cookie,
 				       struct list_head *pages);
+extern void __fscache_disable_cookie(struct fscache_cookie *, bool);
+extern void __fscache_enable_cookie(struct fscache_cookie *,
+				    bool (*)(void *), void *);
 
 /**
  * fscache_register_netfs - Register a filesystem as desiring caching services
@@ -289,6 +328,7 @@ void fscache_release_cache_tag(struct fscache_cache_tag *tag)
  * @def: A description of the cache object, including callback operations
  * @netfs_data: An arbitrary piece of data to be kept in the cookie to
  * represent the cache object to the netfs
+ * @enable: Whether or not to enable a data cookie immediately
  *
  * This function is used to inform FS-Cache about part of an index hierarchy
  * that can be used to locate files.  This is done by requesting a cookie for
@@ -301,10 +341,12 @@ static inline
 struct fscache_cookie *fscache_acquire_cookie(
 	struct fscache_cookie *parent,
 	const struct fscache_cookie_def *def,
-	void *netfs_data)
+	void *netfs_data,
+	bool enable)
 {
-	if (fscache_cookie_valid(parent))
-		return __fscache_acquire_cookie(parent, def, netfs_data);
+	if (fscache_cookie_valid(parent) && fscache_cookie_enabled(parent))
+		return __fscache_acquire_cookie(parent, def, netfs_data,
+						enable);
 	else
 		return NULL;
 }
@@ -322,7 +364,7 @@ struct fscache_cookie *fscache_acquire_cookie(
  * description.
  */
 static inline
-void fscache_relinquish_cookie(struct fscache_cookie *cookie, int retire)
+void fscache_relinquish_cookie(struct fscache_cookie *cookie, bool retire)
 {
 	if (fscache_cookie_valid(cookie))
 		__fscache_relinquish_cookie(cookie, retire);
@@ -341,7 +383,7 @@ void fscache_relinquish_cookie(struct fscache_cookie *cookie, int retire)
 static inline
 int fscache_check_consistency(struct fscache_cookie *cookie)
 {
-	if (fscache_cookie_valid(cookie))
+	if (fscache_cookie_valid(cookie) && fscache_cookie_enabled(cookie))
 		return __fscache_check_consistency(cookie);
 	else
 		return 0;
@@ -360,7 +402,7 @@ int fscache_check_consistency(struct fscache_cookie *cookie)
 static inline
 void fscache_update_cookie(struct fscache_cookie *cookie)
 {
-	if (fscache_cookie_valid(cookie))
+	if (fscache_cookie_valid(cookie) && fscache_cookie_enabled(cookie))
 		__fscache_update_cookie(cookie);
 }
 
@@ -407,7 +449,7 @@ void fscache_unpin_cookie(struct fscache_cookie *cookie)
 static inline
 int fscache_attr_changed(struct fscache_cookie *cookie)
 {
-	if (fscache_cookie_valid(cookie))
+	if (fscache_cookie_valid(cookie) && fscache_cookie_enabled(cookie))
 		return __fscache_attr_changed(cookie);
 	else
 		return -ENOBUFS;
@@ -429,7 +471,7 @@ int fscache_attr_changed(struct fscache_cookie *cookie)
 static inline
 void fscache_invalidate(struct fscache_cookie *cookie)
 {
-	if (fscache_cookie_valid(cookie))
+	if (fscache_cookie_valid(cookie) && fscache_cookie_enabled(cookie))
 		__fscache_invalidate(cookie);
 }
 
@@ -503,7 +545,7 @@ int fscache_read_or_alloc_page(struct fscache_cookie *cookie,
 			       void *context,
 			       gfp_t gfp)
 {
-	if (fscache_cookie_valid(cookie))
+	if (fscache_cookie_valid(cookie) && fscache_cookie_enabled(cookie))
 		return __fscache_read_or_alloc_page(cookie, page, end_io_func,
 						    context, gfp);
 	else
@@ -554,7 +596,7 @@ int fscache_read_or_alloc_pages(struct fscache_cookie *cookie,
 				void *context,
 				gfp_t gfp)
 {
-	if (fscache_cookie_valid(cookie))
+	if (fscache_cookie_valid(cookie) && fscache_cookie_enabled(cookie))
 		return __fscache_read_or_alloc_pages(cookie, mapping, pages,
 						     nr_pages, end_io_func,
 						     context, gfp);
@@ -585,7 +627,7 @@ int fscache_alloc_page(struct fscache_cookie *cookie,
 		       struct page *page,
 		       gfp_t gfp)
 {
-	if (fscache_cookie_valid(cookie))
+	if (fscache_cookie_valid(cookie) && fscache_cookie_enabled(cookie))
 		return __fscache_alloc_page(cookie, page, gfp);
 	else
 		return -ENOBUFS;
@@ -634,7 +676,7 @@ int fscache_write_page(struct fscache_cookie *cookie,
 		       struct page *page,
 		       gfp_t gfp)
 {
-	if (fscache_cookie_valid(cookie))
+	if (fscache_cookie_valid(cookie) && fscache_cookie_enabled(cookie))
 		return __fscache_write_page(cookie, page, gfp);
 	else
 		return -ENOBUFS;
@@ -744,4 +786,47 @@ void fscache_uncache_all_inode_pages(struct fscache_cookie *cookie,
 		__fscache_uncache_all_inode_pages(cookie, inode);
 }
 
+/**
+ * fscache_disable_cookie - Disable a cookie
+ * @cookie: The cookie representing the cache object
+ * @invalidate: Invalidate the backing object
+ *
+ * Disable a cookie from accepting further alloc, read, write, invalidate,
+ * update or acquire operations.  Outstanding operations can still be waited
+ * upon and pages can still be uncached and the cookie relinquished.
+ *
+ * This will not return until all outstanding operations have completed.
+ *
+ * If @invalidate is set, then the backing object will be invalidated and
+ * detached, otherwise it will just be detached.
+ */
+static inline
+void fscache_disable_cookie(struct fscache_cookie *cookie, bool invalidate)
+{
+	if (fscache_cookie_valid(cookie) && fscache_cookie_enabled(cookie))
+		__fscache_disable_cookie(cookie, invalidate);
+}
+
+/**
+ * fscache_enable_cookie - Reenable a cookie
+ * @cookie: The cookie representing the cache object
+ * @can_enable: A function to permit enablement once lock is held
+ * @data: Data for can_enable()
+ *
+ * Reenable a previously disabled cookie, allowing it to accept further alloc,
+ * read, write, invalidate, update or acquire operations.  An attempt will be
+ * made to immediately reattach the cookie to a backing object.
+ *
+ * The can_enable() function is called (if not NULL) once the enablement lock
+ * is held to rule on whether enablement is still permitted to go ahead.
+ */
+static inline
+void fscache_enable_cookie(struct fscache_cookie *cookie,
+			   bool (*can_enable)(void *data),
+			   void *data)
+{
+	if (fscache_cookie_valid(cookie) && !fscache_cookie_enabled(cookie))
+		__fscache_enable_cookie(cookie, can_enable, data);
+}
+
 #endif /* _LINUX_FSCACHE_H */
-- 
cgit v1.2.3


From f1fe29b4a02d0805aa7d0ff6b73410a9f9316d69 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 27 Sep 2013 11:20:03 +0100
Subject: NFS: Use i_writecount to control whether to get an fscache cookie in
 nfs_open()

Use i_writecount to control whether to get an fscache cookie in nfs_open() as
NFS does not do write caching yet.  I *think* this is the cause of a problem
encountered by Mark Moseley whereby __fscache_uncache_page() gets a NULL
pointer dereference because cookie->def is NULL:

BUG: unable to handle kernel NULL pointer dereference at 0000000000000010
IP: [<ffffffff812a1903>] __fscache_uncache_page+0x23/0x160
PGD 0
Thread overran stack, or stack corrupted
Oops: 0000 [#1] SMP
Modules linked in: ...
CPU: 7 PID: 18993 Comm: php Not tainted 3.11.1 #1
Hardware name: Dell Inc. PowerEdge R420/072XWF, BIOS 1.3.5 08/21/2012
task: ffff8804203460c0 ti: ffff880420346640
RIP: 0010:[<ffffffff812a1903>] __fscache_uncache_page+0x23/0x160
RSP: 0018:ffff8801053af878 EFLAGS: 00210286
RAX: 0000000000000000 RBX: ffff8800be2f8780 RCX: ffff88022ffae5e8
RDX: 0000000000004c66 RSI: ffffea00055ff440 RDI: ffff8800be2f8780
RBP: ffff8801053af898 R08: 0000000000000001 R09: 0000000000000003
R10: 0000000000000000 R11: 0000000000000000 R12: ffffea00055ff440
R13: 0000000000001000 R14: ffff8800c50be538 R15: 0000000000000000
FS: 0000000000000000(0000) GS:ffff88042fc60000(0063) knlGS:00000000e439c700
CS: 0010 DS: 002b ES: 002b CR0: 0000000080050033
CR2: 0000000000000010 CR3: 0000000001d8f000 CR4: 00000000000607f0
Stack:
...
Call Trace:
[<ffffffff81365a72>] __nfs_fscache_invalidate_page+0x42/0x70
[<ffffffff813553d5>] nfs_invalidate_page+0x75/0x90
[<ffffffff811b8f5e>] truncate_inode_page+0x8e/0x90
[<ffffffff811b90ad>] truncate_inode_pages_range.part.12+0x14d/0x620
[<ffffffff81d6387d>] ? __mutex_lock_slowpath+0x1fd/0x2e0
[<ffffffff811b95d3>] truncate_inode_pages_range+0x53/0x70
[<ffffffff811b969d>] truncate_inode_pages+0x2d/0x40
[<ffffffff811b96ff>] truncate_pagecache+0x4f/0x70
[<ffffffff81356840>] nfs_setattr_update_inode+0xa0/0x120
[<ffffffff81368de4>] nfs3_proc_setattr+0xc4/0xe0
[<ffffffff81357f78>] nfs_setattr+0xc8/0x150
[<ffffffff8122d95b>] notify_change+0x1cb/0x390
[<ffffffff8120a55b>] do_truncate+0x7b/0xc0
[<ffffffff8121f96c>] do_last+0xa4c/0xfd0
[<ffffffff8121ffbc>] path_openat+0xcc/0x670
[<ffffffff81220a0e>] do_filp_open+0x4e/0xb0
[<ffffffff8120ba1f>] do_sys_open+0x13f/0x2b0
[<ffffffff8126aaf6>] compat_SyS_open+0x36/0x50
[<ffffffff81d7204c>] sysenter_dispatch+0x7/0x24

The code at the instruction pointer was disassembled:

> (gdb) disas __fscache_uncache_page
> Dump of assembler code for function __fscache_uncache_page:
> ...
> 0xffffffff812a18ff <+31>: mov 0x48(%rbx),%rax
> 0xffffffff812a1903 <+35>: cmpb $0x0,0x10(%rax)
> 0xffffffff812a1907 <+39>: je 0xffffffff812a19cd <__fscache_uncache_page+237>

These instructions make up:

	ASSERTCMP(cookie->def->type, !=, FSCACHE_COOKIE_TYPE_INDEX);

That cmpb is the faulting instruction (%rax is 0).  So cookie->def is NULL -
which presumably means that the cookie has already been at least partway
through __fscache_relinquish_cookie().

What I think may be happening is something like a three-way race on the same
file:

	PROCESS 1	PROCESS 2	PROCESS 3
	===============	===============	===============
	open(O_TRUNC|O_WRONLY)
			open(O_RDONLY)
					open(O_WRONLY)
	-->nfs_open()
	-->nfs_fscache_set_inode_cookie()
	nfs_fscache_inode_lock()
	nfs_fscache_disable_inode_cookie()
	__fscache_relinquish_cookie()
	nfs_inode->fscache = NULL
	<--nfs_fscache_set_inode_cookie()

			-->nfs_open()
			-->nfs_fscache_set_inode_cookie()
			nfs_fscache_inode_lock()
			nfs_fscache_enable_inode_cookie()
			__fscache_acquire_cookie()
			nfs_inode->fscache = cookie
			<--nfs_fscache_set_inode_cookie()
	<--nfs_open()
	-->nfs_setattr()
	...
	...
	-->nfs_invalidate_page()
	-->__nfs_fscache_invalidate_page()
	cookie = nfsi->fscache
					-->nfs_open()
					-->nfs_fscache_set_inode_cookie()
					nfs_fscache_inode_lock()
					nfs_fscache_disable_inode_cookie()
					-->__fscache_relinquish_cookie()
	-->__fscache_uncache_page(cookie)
	<crash>
					<--__fscache_relinquish_cookie()
					nfs_inode->fscache = NULL
					<--nfs_fscache_set_inode_cookie()

What is needed is something to prevent process #2 from reacquiring the cookie
- and I think checking i_writecount should do the trick.

It's also possible to have a two-way race on this if the file is opened
O_TRUNC|O_RDONLY instead.

Reported-by: Mark Moseley <moseleymark@gmail.com>
Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/nfs/dir.c           |   2 +-
 fs/nfs/fscache.c       | 198 ++++++++++++++-----------------------------------
 fs/nfs/fscache.h       |  18 ++---
 fs/nfs/inode.c         |   6 +-
 fs/nfs/nfs4file.c      |   2 +-
 include/linux/fs.h     |   5 ++
 include/linux/nfs_fs.h |   8 +-
 7 files changed, 79 insertions(+), 160 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 854a8f05a610..4c5edcc8b6e9 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1381,7 +1381,7 @@ static struct nfs_open_context *create_nfs_open_context(struct dentry *dentry, i
 
 static int do_open(struct inode *inode, struct file *filp)
 {
-	nfs_fscache_set_inode_cookie(inode, filp);
+	nfs_fscache_open_file(inode, filp);
 	return 0;
 }
 
diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c
index cd6e7efd4305..3ef01f0ba0bc 100644
--- a/fs/nfs/fscache.c
+++ b/fs/nfs/fscache.c
@@ -178,163 +178,79 @@ void nfs_fscache_release_super_cookie(struct super_block *sb)
 /*
  * Initialise the per-inode cache cookie pointer for an NFS inode.
  */
-void nfs_fscache_init_inode_cookie(struct inode *inode)
+void nfs_fscache_init_inode(struct inode *inode)
 {
-	NFS_I(inode)->fscache = NULL;
-	if (S_ISREG(inode->i_mode))
-		set_bit(NFS_INO_FSCACHE, &NFS_I(inode)->flags);
-}
-
-/*
- * Get the per-inode cache cookie for an NFS inode.
- */
-static void nfs_fscache_enable_inode_cookie(struct inode *inode)
-{
-	struct super_block *sb = inode->i_sb;
 	struct nfs_inode *nfsi = NFS_I(inode);
 
-	if (nfsi->fscache || !NFS_FSCACHE(inode))
+	nfsi->fscache = NULL;
+	if (!S_ISREG(inode->i_mode))
 		return;
-
-	if ((NFS_SB(sb)->options & NFS_OPTION_FSCACHE)) {
-		nfsi->fscache = fscache_acquire_cookie(
-			NFS_SB(sb)->fscache,
-			&nfs_fscache_inode_object_def,
-			nfsi, true);
-
-		dfprintk(FSCACHE, "NFS: get FH cookie (0x%p/0x%p/0x%p)\n",
-			 sb, nfsi, nfsi->fscache);
-	}
+	nfsi->fscache = fscache_acquire_cookie(NFS_SB(inode->i_sb)->fscache,
+					       &nfs_fscache_inode_object_def,
+					       nfsi, false);
 }
 
 /*
  * Release a per-inode cookie.
  */
-void nfs_fscache_release_inode_cookie(struct inode *inode)
+void nfs_fscache_clear_inode(struct inode *inode)
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
+	struct fscache_cookie *cookie = nfs_i_fscache(inode);
 
-	dfprintk(FSCACHE, "NFS: clear cookie (0x%p/0x%p)\n",
-		 nfsi, nfsi->fscache);
+	dfprintk(FSCACHE, "NFS: clear cookie (0x%p/0x%p)\n", nfsi, cookie);
 
-	fscache_relinquish_cookie(nfsi->fscache, 0);
+	fscache_relinquish_cookie(cookie, false);
 	nfsi->fscache = NULL;
 }
 
-/*
- * Retire a per-inode cookie, destroying the data attached to it.
- */
-void nfs_fscache_zap_inode_cookie(struct inode *inode)
+static bool nfs_fscache_can_enable(void *data)
 {
-	struct nfs_inode *nfsi = NFS_I(inode);
+	struct inode *inode = data;
 
-	dfprintk(FSCACHE, "NFS: zapping cookie (0x%p/0x%p)\n",
-		 nfsi, nfsi->fscache);
-
-	fscache_relinquish_cookie(nfsi->fscache, 1);
-	nfsi->fscache = NULL;
+	return !inode_is_open_for_write(inode);
 }
 
 /*
- * Turn off the cache with regard to a per-inode cookie if opened for writing,
- * invalidating all the pages in the page cache relating to the associated
- * inode to clear the per-page caching.
- */
-static void nfs_fscache_disable_inode_cookie(struct inode *inode)
-{
-	clear_bit(NFS_INO_FSCACHE, &NFS_I(inode)->flags);
-
-	if (NFS_I(inode)->fscache) {
-		dfprintk(FSCACHE,
-			 "NFS: nfsi 0x%p turning cache off\n", NFS_I(inode));
-
-		/* Need to uncache any pages attached to this inode that
-		 * fscache knows about before turning off the cache.
-		 */
-		fscache_uncache_all_inode_pages(NFS_I(inode)->fscache, inode);
-		nfs_fscache_zap_inode_cookie(inode);
-	}
-}
-
-/*
- * wait_on_bit() sleep function for uninterruptible waiting
- */
-static int nfs_fscache_wait_bit(void *flags)
-{
-	schedule();
-	return 0;
-}
-
-/*
- * Lock against someone else trying to also acquire or relinquish a cookie
- */
-static inline void nfs_fscache_inode_lock(struct inode *inode)
-{
-	struct nfs_inode *nfsi = NFS_I(inode);
-
-	while (test_and_set_bit(NFS_INO_FSCACHE_LOCK, &nfsi->flags))
-		wait_on_bit(&nfsi->flags, NFS_INO_FSCACHE_LOCK,
-			    nfs_fscache_wait_bit, TASK_UNINTERRUPTIBLE);
-}
-
-/*
- * Unlock cookie management lock
- */
-static inline void nfs_fscache_inode_unlock(struct inode *inode)
-{
-	struct nfs_inode *nfsi = NFS_I(inode);
-
-	smp_mb__before_clear_bit();
-	clear_bit(NFS_INO_FSCACHE_LOCK, &nfsi->flags);
-	smp_mb__after_clear_bit();
-	wake_up_bit(&nfsi->flags, NFS_INO_FSCACHE_LOCK);
-}
-
-/*
- * Decide if we should enable or disable local caching for this inode.
- * - For now, with NFS, only regular files that are open read-only will be able
- *   to use the cache.
- * - May be invoked multiple times in parallel by parallel nfs_open() functions.
- */
-void nfs_fscache_set_inode_cookie(struct inode *inode, struct file *filp)
-{
-	if (NFS_FSCACHE(inode)) {
-		nfs_fscache_inode_lock(inode);
-		if ((filp->f_flags & O_ACCMODE) != O_RDONLY)
-			nfs_fscache_disable_inode_cookie(inode);
-		else
-			nfs_fscache_enable_inode_cookie(inode);
-		nfs_fscache_inode_unlock(inode);
-	}
-}
-EXPORT_SYMBOL_GPL(nfs_fscache_set_inode_cookie);
-
-/*
- * Replace a per-inode cookie due to revalidation detecting a file having
- * changed on the server.
+ * Enable or disable caching for a file that is being opened as appropriate.
+ * The cookie is allocated when the inode is initialised, but is not enabled at
+ * that time.  Enablement is deferred to file-open time to avoid stat() and
+ * access() thrashing the cache.
+ *
+ * For now, with NFS, only regular files that are open read-only will be able
+ * to use the cache.
+ *
+ * We enable the cache for an inode if we open it read-only and it isn't
+ * currently open for writing.  We disable the cache if the inode is open
+ * write-only.
+ *
+ * The caller uses the file struct to pin i_writecount on the inode before
+ * calling us when a file is opened for writing, so we can make use of that.
+ *
+ * Note that this may be invoked multiple times in parallel by parallel
+ * nfs_open() functions.
  */
-void nfs_fscache_reset_inode_cookie(struct inode *inode)
+void nfs_fscache_open_file(struct inode *inode, struct file *filp)
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
-	struct nfs_server *nfss = NFS_SERVER(inode);
-	NFS_IFDEBUG(struct fscache_cookie *old = nfsi->fscache);
+	struct fscache_cookie *cookie = nfs_i_fscache(inode);
 
-	nfs_fscache_inode_lock(inode);
-	if (nfsi->fscache) {
-		/* retire the current fscache cache and get a new one */
-		fscache_relinquish_cookie(nfsi->fscache, 1);
-
-		nfsi->fscache = fscache_acquire_cookie(
-			nfss->nfs_client->fscache,
-			&nfs_fscache_inode_object_def,
-			nfsi, true);
+	if (!fscache_cookie_valid(cookie))
+		return;
 
-		dfprintk(FSCACHE,
-			 "NFS: revalidation new cookie (0x%p/0x%p/0x%p/0x%p)\n",
-			 nfss, nfsi, old, nfsi->fscache);
+	if (inode_is_open_for_write(inode)) {
+		dfprintk(FSCACHE, "NFS: nfsi 0x%p disabling cache\n", nfsi);
+		clear_bit(NFS_INO_FSCACHE, &nfsi->flags);
+		fscache_disable_cookie(cookie, true);
+		fscache_uncache_all_inode_pages(cookie, inode);
+	} else {
+		dfprintk(FSCACHE, "NFS: nfsi 0x%p enabling cache\n", nfsi);
+		fscache_enable_cookie(cookie, nfs_fscache_can_enable, inode);
+		if (fscache_cookie_enabled(cookie))
+			set_bit(NFS_INO_FSCACHE, &NFS_I(inode)->flags);
 	}
-	nfs_fscache_inode_unlock(inode);
 }
+EXPORT_SYMBOL_GPL(nfs_fscache_open_file);
 
 /*
  * Release the caching state associated with a page, if the page isn't busy
@@ -344,12 +260,11 @@ void nfs_fscache_reset_inode_cookie(struct inode *inode)
 int nfs_fscache_release_page(struct page *page, gfp_t gfp)
 {
 	if (PageFsCache(page)) {
-		struct nfs_inode *nfsi = NFS_I(page->mapping->host);
-		struct fscache_cookie *cookie = nfsi->fscache;
+		struct fscache_cookie *cookie = nfs_i_fscache(page->mapping->host);
 
 		BUG_ON(!cookie);
 		dfprintk(FSCACHE, "NFS: fscache releasepage (0x%p/0x%p/0x%p)\n",
-			 cookie, page, nfsi);
+			 cookie, page, NFS_I(page->mapping->host));
 
 		if (!fscache_maybe_release_page(cookie, page, gfp))
 			return 0;
@@ -367,13 +282,12 @@ int nfs_fscache_release_page(struct page *page, gfp_t gfp)
  */
 void __nfs_fscache_invalidate_page(struct page *page, struct inode *inode)
 {
-	struct nfs_inode *nfsi = NFS_I(inode);
-	struct fscache_cookie *cookie = nfsi->fscache;
+	struct fscache_cookie *cookie = nfs_i_fscache(inode);
 
 	BUG_ON(!cookie);
 
 	dfprintk(FSCACHE, "NFS: fscache invalidatepage (0x%p/0x%p/0x%p)\n",
-		 cookie, page, nfsi);
+		 cookie, page, NFS_I(inode));
 
 	fscache_wait_on_page_write(cookie, page);
 
@@ -417,9 +331,9 @@ int __nfs_readpage_from_fscache(struct nfs_open_context *ctx,
 
 	dfprintk(FSCACHE,
 		 "NFS: readpage_from_fscache(fsc:%p/p:%p(i:%lx f:%lx)/0x%p)\n",
-		 NFS_I(inode)->fscache, page, page->index, page->flags, inode);
+		 nfs_i_fscache(inode), page, page->index, page->flags, inode);
 
-	ret = fscache_read_or_alloc_page(NFS_I(inode)->fscache,
+	ret = fscache_read_or_alloc_page(nfs_i_fscache(inode),
 					 page,
 					 nfs_readpage_from_fscache_complete,
 					 ctx,
@@ -459,9 +373,9 @@ int __nfs_readpages_from_fscache(struct nfs_open_context *ctx,
 	int ret;
 
 	dfprintk(FSCACHE, "NFS: nfs_getpages_from_fscache (0x%p/%u/0x%p)\n",
-		 NFS_I(inode)->fscache, npages, inode);
+		 nfs_i_fscache(inode), npages, inode);
 
-	ret = fscache_read_or_alloc_pages(NFS_I(inode)->fscache,
+	ret = fscache_read_or_alloc_pages(nfs_i_fscache(inode),
 					  mapping, pages, nr_pages,
 					  nfs_readpage_from_fscache_complete,
 					  ctx,
@@ -506,15 +420,15 @@ void __nfs_readpage_to_fscache(struct inode *inode, struct page *page, int sync)
 
 	dfprintk(FSCACHE,
 		 "NFS: readpage_to_fscache(fsc:%p/p:%p(i:%lx f:%lx)/%d)\n",
-		 NFS_I(inode)->fscache, page, page->index, page->flags, sync);
+		 nfs_i_fscache(inode), page, page->index, page->flags, sync);
 
-	ret = fscache_write_page(NFS_I(inode)->fscache, page, GFP_KERNEL);
+	ret = fscache_write_page(nfs_i_fscache(inode), page, GFP_KERNEL);
 	dfprintk(FSCACHE,
 		 "NFS:     readpage_to_fscache: p:%p(i:%lu f:%lx) ret %d\n",
 		 page, page->index, page->flags, ret);
 
 	if (ret != 0) {
-		fscache_uncache_page(NFS_I(inode)->fscache, page);
+		fscache_uncache_page(nfs_i_fscache(inode), page);
 		nfs_add_fscache_stats(inode,
 				      NFSIOS_FSCACHE_PAGES_WRITTEN_FAIL, 1);
 		nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_UNCACHED, 1);
diff --git a/fs/nfs/fscache.h b/fs/nfs/fscache.h
index 4ecb76652eba..d7fe3e799f2f 100644
--- a/fs/nfs/fscache.h
+++ b/fs/nfs/fscache.h
@@ -76,11 +76,9 @@ extern void nfs_fscache_release_client_cookie(struct nfs_client *);
 extern void nfs_fscache_get_super_cookie(struct super_block *, const char *, int);
 extern void nfs_fscache_release_super_cookie(struct super_block *);
 
-extern void nfs_fscache_init_inode_cookie(struct inode *);
-extern void nfs_fscache_release_inode_cookie(struct inode *);
-extern void nfs_fscache_zap_inode_cookie(struct inode *);
-extern void nfs_fscache_set_inode_cookie(struct inode *, struct file *);
-extern void nfs_fscache_reset_inode_cookie(struct inode *);
+extern void nfs_fscache_init_inode(struct inode *);
+extern void nfs_fscache_clear_inode(struct inode *);
+extern void nfs_fscache_open_file(struct inode *, struct file *);
 
 extern void __nfs_fscache_invalidate_page(struct page *, struct inode *);
 extern int nfs_fscache_release_page(struct page *, gfp_t);
@@ -187,12 +185,10 @@ static inline void nfs_fscache_release_client_cookie(struct nfs_client *clp) {}
 
 static inline void nfs_fscache_release_super_cookie(struct super_block *sb) {}
 
-static inline void nfs_fscache_init_inode_cookie(struct inode *inode) {}
-static inline void nfs_fscache_release_inode_cookie(struct inode *inode) {}
-static inline void nfs_fscache_zap_inode_cookie(struct inode *inode) {}
-static inline void nfs_fscache_set_inode_cookie(struct inode *inode,
-						struct file *filp) {}
-static inline void nfs_fscache_reset_inode_cookie(struct inode *inode) {}
+static inline void nfs_fscache_init_inode(struct inode *inode) {}
+static inline void nfs_fscache_clear_inode(struct inode *inode) {}
+static inline void nfs_fscache_open_file(struct inode *inode,
+					 struct file *filp) {}
 
 static inline int nfs_fscache_release_page(struct page *page, gfp_t gfp)
 {
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index eda8879171c4..bb90bff0cb7a 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -122,7 +122,7 @@ void nfs_clear_inode(struct inode *inode)
 	WARN_ON_ONCE(!list_empty(&NFS_I(inode)->open_files));
 	nfs_zap_acl_cache(inode);
 	nfs_access_zap_cache(inode);
-	nfs_fscache_release_inode_cookie(inode);
+	nfs_fscache_clear_inode(inode);
 }
 EXPORT_SYMBOL_GPL(nfs_clear_inode);
 
@@ -459,7 +459,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st
 		nfsi->attrtimeo_timestamp = now;
 		nfsi->access_cache = RB_ROOT;
 
-		nfs_fscache_init_inode_cookie(inode);
+		nfs_fscache_init_inode(inode);
 
 		unlock_new_inode(inode);
 	} else
@@ -854,7 +854,7 @@ int nfs_open(struct inode *inode, struct file *filp)
 		return PTR_ERR(ctx);
 	nfs_file_set_open_context(filp, ctx);
 	put_nfs_open_context(ctx);
-	nfs_fscache_set_inode_cookie(inode, filp);
+	nfs_fscache_open_file(inode, filp);
 	return 0;
 }
 
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index e5b804dd944c..5b8a618a0f7a 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -74,7 +74,7 @@ nfs4_file_open(struct inode *inode, struct file *filp)
 
 	nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
 	nfs_file_set_open_context(filp, ctx);
-	nfs_fscache_set_inode_cookie(inode, filp);
+	nfs_fscache_open_file(inode, filp);
 	err = 0;
 
 out_put_ctx:
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 3f40547ba191..955dff5da56a 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2292,6 +2292,11 @@ static inline void allow_write_access(struct file *file)
 	if (file)
 		atomic_inc(&file_inode(file)->i_writecount);
 }
+static inline bool inode_is_open_for_write(const struct inode *inode)
+{
+	return atomic_read(&inode->i_writecount) > 0;
+}
+
 #ifdef CONFIG_IMA
 static inline void i_readcount_dec(struct inode *inode)
 {
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 3ea4cde8701c..14a48207a304 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -269,9 +269,13 @@ static inline int NFS_STALE(const struct inode *inode)
 	return test_bit(NFS_INO_STALE, &NFS_I(inode)->flags);
 }
 
-static inline int NFS_FSCACHE(const struct inode *inode)
+static inline struct fscache_cookie *nfs_i_fscache(struct inode *inode)
 {
-	return test_bit(NFS_INO_FSCACHE, &NFS_I(inode)->flags);
+#ifdef CONFIG_NFS_FSCACHE
+	return NFS_I(inode)->fscache;
+#else
+	return NULL;
+#endif
 }
 
 static inline __u64 NFS_FILEID(const struct inode *inode)
-- 
cgit v1.2.3


From 8d6a35fb13406f87d926fffeee0d70360ce3077d Mon Sep 17 00:00:00 2001
From: Sean Cross <xobs@kosagi.com>
Date: Thu, 26 Sep 2013 11:24:46 +0800
Subject: ARM: imx6q: Add PCIe bits to GPR syscon definition

PCIe requires additional bits be defined for GPR8 and GPR12.

Signed-off-by: Sean Cross <xobs@kosagi.com>
Signed-off-by: Shawn Guo <shawn.guo@linaro.org>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 include/linux/mfd/syscon/imx6q-iomuxc-gpr.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h b/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h
index b6bdcd66c07d..e00e9f362fd5 100644
--- a/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h
+++ b/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h
@@ -241,6 +241,12 @@
 
 #define IMX6Q_GPR5_L2_CLK_STOP			BIT(8)
 
+#define IMX6Q_GPR8_TX_SWING_LOW			(0x7f << 25)
+#define IMX6Q_GPR8_TX_SWING_FULL		(0x7f << 18)
+#define IMX6Q_GPR8_TX_DEEMPH_GEN2_6DB		(0x3f << 12)
+#define IMX6Q_GPR8_TX_DEEMPH_GEN2_3P5DB		(0x3f << 6)
+#define IMX6Q_GPR8_TX_DEEMPH_GEN1		(0x3f << 0)
+
 #define IMX6Q_GPR9_TZASC2_BYP			BIT(1)
 #define IMX6Q_GPR9_TZASC1_BYP			BIT(0)
 
@@ -273,7 +279,9 @@
 #define IMX6Q_GPR12_ARMP_AHB_CLK_EN		BIT(26)
 #define IMX6Q_GPR12_ARMP_ATB_CLK_EN		BIT(25)
 #define IMX6Q_GPR12_ARMP_APB_CLK_EN		BIT(24)
+#define IMX6Q_GPR12_DEVICE_TYPE			(0xf << 12)
 #define IMX6Q_GPR12_PCIE_CTL_2			BIT(10)
+#define IMX6Q_GPR12_LOS_LEVEL			(0x1f << 4)
 
 #define IMX6Q_GPR13_SDMA_STOP_REQ		BIT(30)
 #define IMX6Q_GPR13_CAN2_STOP_REQ		BIT(29)
-- 
cgit v1.2.3


From ff764963479a1b18721ab96e531404c50fefe8b1 Mon Sep 17 00:00:00 2001
From: Kishon Vijay Abraham I <kishon@ti.com>
Date: Fri, 27 Sep 2013 11:53:25 +0530
Subject: drivers: phy: add generic PHY framework

The PHY framework provides a set of APIs for the PHY drivers to
create/destroy a PHY and APIs for the PHY users to obtain a reference to the
PHY with or without using phandle. For dt-boot, the PHY drivers should
also register *PHY provider* with the framework.

PHY drivers should create the PHY by passing id and ops like init, exit,
power_on and power_off. This framework is also pm runtime enabled.

The documentation for the generic PHY framework is added in
Documentation/phy.txt and the documentation for dt binding can be found at
Documentation/devicetree/bindings/phy/phy-bindings.txt

Cc: Tomasz Figa <t.figa@samsung.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Kishon Vijay Abraham I <kishon@ti.com>
Acked-by: Felipe Balbi <balbi@ti.com>
Tested-by: Sylwester Nawrocki <s.nawrocki@samsung.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../devicetree/bindings/phy/phy-bindings.txt       |  66 ++
 Documentation/phy.txt                              | 166 +++++
 MAINTAINERS                                        |   8 +
 drivers/Kconfig                                    |   2 +
 drivers/Makefile                                   |   2 +
 drivers/phy/Kconfig                                |  18 +
 drivers/phy/Makefile                               |   5 +
 drivers/phy/phy-core.c                             | 698 +++++++++++++++++++++
 include/linux/phy/phy.h                            | 270 ++++++++
 9 files changed, 1235 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/phy/phy-bindings.txt
 create mode 100644 Documentation/phy.txt
 create mode 100644 drivers/phy/Kconfig
 create mode 100644 drivers/phy/Makefile
 create mode 100644 drivers/phy/phy-core.c
 create mode 100644 include/linux/phy/phy.h

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/phy/phy-bindings.txt b/Documentation/devicetree/bindings/phy/phy-bindings.txt
new file mode 100644
index 000000000000..8ae844fc0c60
--- /dev/null
+++ b/Documentation/devicetree/bindings/phy/phy-bindings.txt
@@ -0,0 +1,66 @@
+This document explains only the device tree data binding. For general
+information about PHY subsystem refer to Documentation/phy.txt
+
+PHY device node
+===============
+
+Required Properties:
+#phy-cells:	Number of cells in a PHY specifier;  The meaning of all those
+		cells is defined by the binding for the phy node. The PHY
+		provider can use the values in cells to find the appropriate
+		PHY.
+
+For example:
+
+phys: phy {
+    compatible = "xxx";
+    reg = <...>;
+    .
+    .
+    #phy-cells = <1>;
+    .
+    .
+};
+
+That node describes an IP block (PHY provider) that implements 2 different PHYs.
+In order to differentiate between these 2 PHYs, an additonal specifier should be
+given while trying to get a reference to it.
+
+PHY user node
+=============
+
+Required Properties:
+phys : the phandle for the PHY device (used by the PHY subsystem)
+phy-names : the names of the PHY corresponding to the PHYs present in the
+	    *phys* phandle
+
+Example 1:
+usb1: usb_otg_ss@xxx {
+    compatible = "xxx";
+    reg = <xxx>;
+    .
+    .
+    phys = <&usb2_phy>, <&usb3_phy>;
+    phy-names = "usb2phy", "usb3phy";
+    .
+    .
+};
+
+This node represents a controller that uses two PHYs, one for usb2 and one for
+usb3.
+
+Example 2:
+usb2: usb_otg_ss@xxx {
+    compatible = "xxx";
+    reg = <xxx>;
+    .
+    .
+    phys = <&phys 1>;
+    phy-names = "usbphy";
+    .
+    .
+};
+
+This node represents a controller that uses one of the PHYs of the PHY provider
+device defined previously. Note that the phy handle has an additional specifier
+"1" to differentiate between the two PHYs.
diff --git a/Documentation/phy.txt b/Documentation/phy.txt
new file mode 100644
index 000000000000..0103e4b15b0e
--- /dev/null
+++ b/Documentation/phy.txt
@@ -0,0 +1,166 @@
+			    PHY SUBSYSTEM
+		  Kishon Vijay Abraham I <kishon@ti.com>
+
+This document explains the Generic PHY Framework along with the APIs provided,
+and how-to-use.
+
+1. Introduction
+
+*PHY* is the abbreviation for physical layer. It is used to connect a device
+to the physical medium e.g., the USB controller has a PHY to provide functions
+such as serialization, de-serialization, encoding, decoding and is responsible
+for obtaining the required data transmission rate. Note that some USB
+controllers have PHY functionality embedded into it and others use an external
+PHY. Other peripherals that use PHY include Wireless LAN, Ethernet,
+SATA etc.
+
+The intention of creating this framework is to bring the PHY drivers spread
+all over the Linux kernel to drivers/phy to increase code re-use and for
+better code maintainability.
+
+This framework will be of use only to devices that use external PHY (PHY
+functionality is not embedded within the controller).
+
+2. Registering/Unregistering the PHY provider
+
+PHY provider refers to an entity that implements one or more PHY instances.
+For the simple case where the PHY provider implements only a single instance of
+the PHY, the framework provides its own implementation of of_xlate in
+of_phy_simple_xlate. If the PHY provider implements multiple instances, it
+should provide its own implementation of of_xlate. of_xlate is used only for
+dt boot case.
+
+#define of_phy_provider_register(dev, xlate)    \
+        __of_phy_provider_register((dev), THIS_MODULE, (xlate))
+
+#define devm_of_phy_provider_register(dev, xlate)       \
+        __devm_of_phy_provider_register((dev), THIS_MODULE, (xlate))
+
+of_phy_provider_register and devm_of_phy_provider_register macros can be used to
+register the phy_provider and it takes device and of_xlate as
+arguments. For the dt boot case, all PHY providers should use one of the above
+2 macros to register the PHY provider.
+
+void devm_of_phy_provider_unregister(struct device *dev,
+	struct phy_provider *phy_provider);
+void of_phy_provider_unregister(struct phy_provider *phy_provider);
+
+devm_of_phy_provider_unregister and of_phy_provider_unregister can be used to
+unregister the PHY.
+
+3. Creating the PHY
+
+The PHY driver should create the PHY in order for other peripheral controllers
+to make use of it. The PHY framework provides 2 APIs to create the PHY.
+
+struct phy *phy_create(struct device *dev, const struct phy_ops *ops,
+        struct phy_init_data *init_data);
+struct phy *devm_phy_create(struct device *dev, const struct phy_ops *ops,
+	struct phy_init_data *init_data);
+
+The PHY drivers can use one of the above 2 APIs to create the PHY by passing
+the device pointer, phy ops and init_data.
+phy_ops is a set of function pointers for performing PHY operations such as
+init, exit, power_on and power_off. *init_data* is mandatory to get a reference
+to the PHY in the case of non-dt boot. See section *Board File Initialization*
+on how init_data should be used.
+
+Inorder to dereference the private data (in phy_ops), the phy provider driver
+can use phy_set_drvdata() after creating the PHY and use phy_get_drvdata() in
+phy_ops to get back the private data.
+
+4. Getting a reference to the PHY
+
+Before the controller can make use of the PHY, it has to get a reference to
+it. This framework provides the following APIs to get a reference to the PHY.
+
+struct phy *phy_get(struct device *dev, const char *string);
+struct phy *devm_phy_get(struct device *dev, const char *string);
+
+phy_get and devm_phy_get can be used to get the PHY. In the case of dt boot,
+the string arguments should contain the phy name as given in the dt data and
+in the case of non-dt boot, it should contain the label of the PHY.
+The only difference between the two APIs is that devm_phy_get associates the
+device with the PHY using devres on successful PHY get. On driver detach,
+release function is invoked on the the devres data and devres data is freed.
+
+5. Releasing a reference to the PHY
+
+When the controller no longer needs the PHY, it has to release the reference
+to the PHY it has obtained using the APIs mentioned in the above section. The
+PHY framework provides 2 APIs to release a reference to the PHY.
+
+void phy_put(struct phy *phy);
+void devm_phy_put(struct device *dev, struct phy *phy);
+
+Both these APIs are used to release a reference to the PHY and devm_phy_put
+destroys the devres associated with this PHY.
+
+6. Destroying the PHY
+
+When the driver that created the PHY is unloaded, it should destroy the PHY it
+created using one of the following 2 APIs.
+
+void phy_destroy(struct phy *phy);
+void devm_phy_destroy(struct device *dev, struct phy *phy);
+
+Both these APIs destroy the PHY and devm_phy_destroy destroys the devres
+associated with this PHY.
+
+7. PM Runtime
+
+This subsystem is pm runtime enabled. So while creating the PHY,
+pm_runtime_enable of the phy device created by this subsystem is called and
+while destroying the PHY, pm_runtime_disable is called. Note that the phy
+device created by this subsystem will be a child of the device that calls
+phy_create (PHY provider device).
+
+So pm_runtime_get_sync of the phy_device created by this subsystem will invoke
+pm_runtime_get_sync of PHY provider device because of parent-child relationship.
+It should also be noted that phy_power_on and phy_power_off performs
+phy_pm_runtime_get_sync and phy_pm_runtime_put respectively.
+There are exported APIs like phy_pm_runtime_get, phy_pm_runtime_get_sync,
+phy_pm_runtime_put, phy_pm_runtime_put_sync, phy_pm_runtime_allow and
+phy_pm_runtime_forbid for performing PM operations.
+
+8. Board File Initialization
+
+Certain board file initialization is necessary in order to get a reference
+to the PHY in the case of non-dt boot.
+Say we have a single device that implements 3 PHYs that of USB, SATA and PCIe,
+then in the board file the following initialization should be done.
+
+struct phy_consumer consumers[] = {
+	PHY_CONSUMER("dwc3.0", "usb"),
+	PHY_CONSUMER("pcie.0", "pcie"),
+	PHY_CONSUMER("sata.0", "sata"),
+};
+PHY_CONSUMER takes 2 parameters, first is the device name of the controller
+(PHY consumer) and second is the port name.
+
+struct phy_init_data init_data = {
+	.consumers = consumers,
+	.num_consumers = ARRAY_SIZE(consumers),
+};
+
+static const struct platform_device pipe3_phy_dev = {
+	.name = "pipe3-phy",
+	.id = -1,
+	.dev = {
+		.platform_data = {
+			.init_data = &init_data,
+		},
+	},
+};
+
+then, while doing phy_create, the PHY driver should pass this init_data
+	phy_create(dev, ops, pdata->init_data);
+
+and the controller driver (phy consumer) should pass the port name along with
+the device to get a reference to the PHY
+	phy_get(dev, "pcie");
+
+9. DeviceTree Binding
+
+The documentation for PHY dt binding can be found @
+Documentation/devicetree/bindings/phy/phy-bindings.txt
diff --git a/MAINTAINERS b/MAINTAINERS
index e61c2e83fc2b..e0e6ae2b65e8 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3654,6 +3654,14 @@ S:	Maintained
 F:	include/asm-generic/
 F:	include/uapi/asm-generic/
 
+GENERIC PHY FRAMEWORK
+M:	Kishon Vijay Abraham I <kishon@ti.com>
+L:	linux-kernel@vger.kernel.org
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/kishon/linux-phy.git
+S:	Supported
+F:	drivers/phy/
+F:	include/linux/phy/
+
 GENERIC UIO DRIVER FOR PCI DEVICES
 M:	"Michael S. Tsirkin" <mst@redhat.com>
 L:	kvm@vger.kernel.org
diff --git a/drivers/Kconfig b/drivers/Kconfig
index aa43b911ccef..8f451449abd3 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -166,4 +166,6 @@ source "drivers/reset/Kconfig"
 
 source "drivers/fmc/Kconfig"
 
+source "drivers/phy/Kconfig"
+
 endmenu
diff --git a/drivers/Makefile b/drivers/Makefile
index ab93de8297f1..687da899cadb 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -8,6 +8,8 @@
 obj-y				+= irqchip/
 obj-y				+= bus/
 
+obj-$(CONFIG_GENERIC_PHY)	+= phy/
+
 # GPIO must come after pinctrl as gpios may need to mux pins etc
 obj-y				+= pinctrl/
 obj-y				+= gpio/
diff --git a/drivers/phy/Kconfig b/drivers/phy/Kconfig
new file mode 100644
index 000000000000..349bef2e6e4e
--- /dev/null
+++ b/drivers/phy/Kconfig
@@ -0,0 +1,18 @@
+#
+# PHY
+#
+
+menu "PHY Subsystem"
+
+config GENERIC_PHY
+	tristate "PHY Core"
+	help
+	  Generic PHY support.
+
+	  This framework is designed to provide a generic interface for PHY
+	  devices present in the kernel. This layer will have the generic
+	  API by which phy drivers can create PHY using the phy framework and
+	  phy users can obtain reference to the PHY. All the users of this
+	  framework should select this config.
+
+endmenu
diff --git a/drivers/phy/Makefile b/drivers/phy/Makefile
new file mode 100644
index 000000000000..9e9560fbb14d
--- /dev/null
+++ b/drivers/phy/Makefile
@@ -0,0 +1,5 @@
+#
+# Makefile for the phy drivers.
+#
+
+obj-$(CONFIG_GENERIC_PHY)	+= phy-core.o
diff --git a/drivers/phy/phy-core.c b/drivers/phy/phy-core.c
new file mode 100644
index 000000000000..03cf8fb81554
--- /dev/null
+++ b/drivers/phy/phy-core.c
@@ -0,0 +1,698 @@
+/*
+ * phy-core.c  --  Generic Phy framework.
+ *
+ * Copyright (C) 2013 Texas Instruments Incorporated - http://www.ti.com
+ *
+ * Author: Kishon Vijay Abraham I <kishon@ti.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/module.h>
+#include <linux/err.h>
+#include <linux/device.h>
+#include <linux/slab.h>
+#include <linux/of.h>
+#include <linux/phy/phy.h>
+#include <linux/idr.h>
+#include <linux/pm_runtime.h>
+
+static struct class *phy_class;
+static DEFINE_MUTEX(phy_provider_mutex);
+static LIST_HEAD(phy_provider_list);
+static DEFINE_IDA(phy_ida);
+
+static void devm_phy_release(struct device *dev, void *res)
+{
+	struct phy *phy = *(struct phy **)res;
+
+	phy_put(phy);
+}
+
+static void devm_phy_provider_release(struct device *dev, void *res)
+{
+	struct phy_provider *phy_provider = *(struct phy_provider **)res;
+
+	of_phy_provider_unregister(phy_provider);
+}
+
+static void devm_phy_consume(struct device *dev, void *res)
+{
+	struct phy *phy = *(struct phy **)res;
+
+	phy_destroy(phy);
+}
+
+static int devm_phy_match(struct device *dev, void *res, void *match_data)
+{
+	return res == match_data;
+}
+
+static struct phy *phy_lookup(struct device *device, const char *port)
+{
+	unsigned int count;
+	struct phy *phy;
+	struct device *dev;
+	struct phy_consumer *consumers;
+	struct class_dev_iter iter;
+
+	class_dev_iter_init(&iter, phy_class, NULL, NULL);
+	while ((dev = class_dev_iter_next(&iter))) {
+		phy = to_phy(dev);
+		count = phy->init_data->num_consumers;
+		consumers = phy->init_data->consumers;
+		while (count--) {
+			if (!strcmp(consumers->dev_name, dev_name(device)) &&
+					!strcmp(consumers->port, port)) {
+				class_dev_iter_exit(&iter);
+				return phy;
+			}
+			consumers++;
+		}
+	}
+
+	class_dev_iter_exit(&iter);
+	return ERR_PTR(-ENODEV);
+}
+
+static struct phy_provider *of_phy_provider_lookup(struct device_node *node)
+{
+	struct phy_provider *phy_provider;
+
+	list_for_each_entry(phy_provider, &phy_provider_list, list) {
+		if (phy_provider->dev->of_node == node)
+			return phy_provider;
+	}
+
+	return ERR_PTR(-EPROBE_DEFER);
+}
+
+int phy_pm_runtime_get(struct phy *phy)
+{
+	if (!pm_runtime_enabled(&phy->dev))
+		return -ENOTSUPP;
+
+	return pm_runtime_get(&phy->dev);
+}
+EXPORT_SYMBOL_GPL(phy_pm_runtime_get);
+
+int phy_pm_runtime_get_sync(struct phy *phy)
+{
+	if (!pm_runtime_enabled(&phy->dev))
+		return -ENOTSUPP;
+
+	return pm_runtime_get_sync(&phy->dev);
+}
+EXPORT_SYMBOL_GPL(phy_pm_runtime_get_sync);
+
+int phy_pm_runtime_put(struct phy *phy)
+{
+	if (!pm_runtime_enabled(&phy->dev))
+		return -ENOTSUPP;
+
+	return pm_runtime_put(&phy->dev);
+}
+EXPORT_SYMBOL_GPL(phy_pm_runtime_put);
+
+int phy_pm_runtime_put_sync(struct phy *phy)
+{
+	if (!pm_runtime_enabled(&phy->dev))
+		return -ENOTSUPP;
+
+	return pm_runtime_put_sync(&phy->dev);
+}
+EXPORT_SYMBOL_GPL(phy_pm_runtime_put_sync);
+
+void phy_pm_runtime_allow(struct phy *phy)
+{
+	if (!pm_runtime_enabled(&phy->dev))
+		return;
+
+	pm_runtime_allow(&phy->dev);
+}
+EXPORT_SYMBOL_GPL(phy_pm_runtime_allow);
+
+void phy_pm_runtime_forbid(struct phy *phy)
+{
+	if (!pm_runtime_enabled(&phy->dev))
+		return;
+
+	pm_runtime_forbid(&phy->dev);
+}
+EXPORT_SYMBOL_GPL(phy_pm_runtime_forbid);
+
+int phy_init(struct phy *phy)
+{
+	int ret;
+
+	ret = phy_pm_runtime_get_sync(phy);
+	if (ret < 0 && ret != -ENOTSUPP)
+		return ret;
+
+	mutex_lock(&phy->mutex);
+	if (phy->init_count++ == 0 && phy->ops->init) {
+		ret = phy->ops->init(phy);
+		if (ret < 0) {
+			dev_err(&phy->dev, "phy init failed --> %d\n", ret);
+			goto out;
+		}
+	}
+
+out:
+	mutex_unlock(&phy->mutex);
+	phy_pm_runtime_put(phy);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(phy_init);
+
+int phy_exit(struct phy *phy)
+{
+	int ret;
+
+	ret = phy_pm_runtime_get_sync(phy);
+	if (ret < 0 && ret != -ENOTSUPP)
+		return ret;
+
+	mutex_lock(&phy->mutex);
+	if (--phy->init_count == 0 && phy->ops->exit) {
+		ret = phy->ops->exit(phy);
+		if (ret < 0) {
+			dev_err(&phy->dev, "phy exit failed --> %d\n", ret);
+			goto out;
+		}
+	}
+
+out:
+	mutex_unlock(&phy->mutex);
+	phy_pm_runtime_put(phy);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(phy_exit);
+
+int phy_power_on(struct phy *phy)
+{
+	int ret = -ENOTSUPP;
+
+	ret = phy_pm_runtime_get_sync(phy);
+	if (ret < 0 && ret != -ENOTSUPP)
+		return ret;
+
+	mutex_lock(&phy->mutex);
+	if (phy->power_count++ == 0 && phy->ops->power_on) {
+		ret = phy->ops->power_on(phy);
+		if (ret < 0) {
+			dev_err(&phy->dev, "phy poweron failed --> %d\n", ret);
+			goto out;
+		}
+	}
+
+out:
+	mutex_unlock(&phy->mutex);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(phy_power_on);
+
+int phy_power_off(struct phy *phy)
+{
+	int ret = -ENOTSUPP;
+
+	mutex_lock(&phy->mutex);
+	if (--phy->power_count == 0 && phy->ops->power_off) {
+		ret =  phy->ops->power_off(phy);
+		if (ret < 0) {
+			dev_err(&phy->dev, "phy poweroff failed --> %d\n", ret);
+			goto out;
+		}
+	}
+
+out:
+	mutex_unlock(&phy->mutex);
+	phy_pm_runtime_put(phy);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(phy_power_off);
+
+/**
+ * of_phy_get() - lookup and obtain a reference to a phy by phandle
+ * @dev: device that requests this phy
+ * @index: the index of the phy
+ *
+ * Returns the phy associated with the given phandle value,
+ * after getting a refcount to it or -ENODEV if there is no such phy or
+ * -EPROBE_DEFER if there is a phandle to the phy, but the device is
+ * not yet loaded. This function uses of_xlate call back function provided
+ * while registering the phy_provider to find the phy instance.
+ */
+static struct phy *of_phy_get(struct device *dev, int index)
+{
+	int ret;
+	struct phy_provider *phy_provider;
+	struct phy *phy = NULL;
+	struct of_phandle_args args;
+
+	ret = of_parse_phandle_with_args(dev->of_node, "phys", "#phy-cells",
+		index, &args);
+	if (ret) {
+		dev_dbg(dev, "failed to get phy in %s node\n",
+			dev->of_node->full_name);
+		return ERR_PTR(-ENODEV);
+	}
+
+	mutex_lock(&phy_provider_mutex);
+	phy_provider = of_phy_provider_lookup(args.np);
+	if (IS_ERR(phy_provider) || !try_module_get(phy_provider->owner)) {
+		phy = ERR_PTR(-EPROBE_DEFER);
+		goto err0;
+	}
+
+	phy = phy_provider->of_xlate(phy_provider->dev, &args);
+	module_put(phy_provider->owner);
+
+err0:
+	mutex_unlock(&phy_provider_mutex);
+	of_node_put(args.np);
+
+	return phy;
+}
+
+/**
+ * phy_put() - release the PHY
+ * @phy: the phy returned by phy_get()
+ *
+ * Releases a refcount the caller received from phy_get().
+ */
+void phy_put(struct phy *phy)
+{
+	if (IS_ERR(phy))
+		return;
+
+	module_put(phy->ops->owner);
+	put_device(&phy->dev);
+}
+EXPORT_SYMBOL_GPL(phy_put);
+
+/**
+ * devm_phy_put() - release the PHY
+ * @dev: device that wants to release this phy
+ * @phy: the phy returned by devm_phy_get()
+ *
+ * destroys the devres associated with this phy and invokes phy_put
+ * to release the phy.
+ */
+void devm_phy_put(struct device *dev, struct phy *phy)
+{
+	int r;
+
+	r = devres_destroy(dev, devm_phy_release, devm_phy_match, phy);
+	dev_WARN_ONCE(dev, r, "couldn't find PHY resource\n");
+}
+EXPORT_SYMBOL_GPL(devm_phy_put);
+
+/**
+ * of_phy_simple_xlate() - returns the phy instance from phy provider
+ * @dev: the PHY provider device
+ * @args: of_phandle_args (not used here)
+ *
+ * Intended to be used by phy provider for the common case where #phy-cells is
+ * 0. For other cases where #phy-cells is greater than '0', the phy provider
+ * should provide a custom of_xlate function that reads the *args* and returns
+ * the appropriate phy.
+ */
+struct phy *of_phy_simple_xlate(struct device *dev, struct of_phandle_args
+	*args)
+{
+	struct phy *phy;
+	struct class_dev_iter iter;
+	struct device_node *node = dev->of_node;
+
+	class_dev_iter_init(&iter, phy_class, NULL, NULL);
+	while ((dev = class_dev_iter_next(&iter))) {
+		phy = to_phy(dev);
+		if (node != phy->dev.of_node)
+			continue;
+
+		class_dev_iter_exit(&iter);
+		return phy;
+	}
+
+	class_dev_iter_exit(&iter);
+	return ERR_PTR(-ENODEV);
+}
+EXPORT_SYMBOL_GPL(of_phy_simple_xlate);
+
+/**
+ * phy_get() - lookup and obtain a reference to a phy.
+ * @dev: device that requests this phy
+ * @string: the phy name as given in the dt data or the name of the controller
+ * port for non-dt case
+ *
+ * Returns the phy driver, after getting a refcount to it; or
+ * -ENODEV if there is no such phy.  The caller is responsible for
+ * calling phy_put() to release that count.
+ */
+struct phy *phy_get(struct device *dev, const char *string)
+{
+	int index = 0;
+	struct phy *phy = NULL;
+
+	if (string == NULL) {
+		dev_WARN(dev, "missing string\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	if (dev->of_node) {
+		index = of_property_match_string(dev->of_node, "phy-names",
+			string);
+		phy = of_phy_get(dev, index);
+		if (IS_ERR(phy)) {
+			dev_err(dev, "unable to find phy\n");
+			return phy;
+		}
+	} else {
+		phy = phy_lookup(dev, string);
+		if (IS_ERR(phy)) {
+			dev_err(dev, "unable to find phy\n");
+			return phy;
+		}
+	}
+
+	if (!try_module_get(phy->ops->owner))
+		return ERR_PTR(-EPROBE_DEFER);
+
+	get_device(&phy->dev);
+
+	return phy;
+}
+EXPORT_SYMBOL_GPL(phy_get);
+
+/**
+ * devm_phy_get() - lookup and obtain a reference to a phy.
+ * @dev: device that requests this phy
+ * @string: the phy name as given in the dt data or phy device name
+ * for non-dt case
+ *
+ * Gets the phy using phy_get(), and associates a device with it using
+ * devres. On driver detach, release function is invoked on the devres data,
+ * then, devres data is freed.
+ */
+struct phy *devm_phy_get(struct device *dev, const char *string)
+{
+	struct phy **ptr, *phy;
+
+	ptr = devres_alloc(devm_phy_release, sizeof(*ptr), GFP_KERNEL);
+	if (!ptr)
+		return ERR_PTR(-ENOMEM);
+
+	phy = phy_get(dev, string);
+	if (!IS_ERR(phy)) {
+		*ptr = phy;
+		devres_add(dev, ptr);
+	} else {
+		devres_free(ptr);
+	}
+
+	return phy;
+}
+EXPORT_SYMBOL_GPL(devm_phy_get);
+
+/**
+ * phy_create() - create a new phy
+ * @dev: device that is creating the new phy
+ * @ops: function pointers for performing phy operations
+ * @init_data: contains the list of PHY consumers or NULL
+ *
+ * Called to create a phy using phy framework.
+ */
+struct phy *phy_create(struct device *dev, const struct phy_ops *ops,
+	struct phy_init_data *init_data)
+{
+	int ret;
+	int id;
+	struct phy *phy;
+
+	if (!dev) {
+		dev_WARN(dev, "no device provided for PHY\n");
+		ret = -EINVAL;
+		goto err0;
+	}
+
+	phy = kzalloc(sizeof(*phy), GFP_KERNEL);
+	if (!phy) {
+		ret = -ENOMEM;
+		goto err0;
+	}
+
+	id = ida_simple_get(&phy_ida, 0, 0, GFP_KERNEL);
+	if (id < 0) {
+		dev_err(dev, "unable to get id\n");
+		ret = id;
+		goto err0;
+	}
+
+	device_initialize(&phy->dev);
+	mutex_init(&phy->mutex);
+
+	phy->dev.class = phy_class;
+	phy->dev.parent = dev;
+	phy->dev.of_node = dev->of_node;
+	phy->id = id;
+	phy->ops = ops;
+	phy->init_data = init_data;
+
+	ret = dev_set_name(&phy->dev, "phy-%s.%d", dev_name(dev), id);
+	if (ret)
+		goto err1;
+
+	ret = device_add(&phy->dev);
+	if (ret)
+		goto err1;
+
+	if (pm_runtime_enabled(dev)) {
+		pm_runtime_enable(&phy->dev);
+		pm_runtime_no_callbacks(&phy->dev);
+	}
+
+	return phy;
+
+err1:
+	ida_remove(&phy_ida, phy->id);
+	put_device(&phy->dev);
+	kfree(phy);
+
+err0:
+	return ERR_PTR(ret);
+}
+EXPORT_SYMBOL_GPL(phy_create);
+
+/**
+ * devm_phy_create() - create a new phy
+ * @dev: device that is creating the new phy
+ * @ops: function pointers for performing phy operations
+ * @init_data: contains the list of PHY consumers or NULL
+ *
+ * Creates a new PHY device adding it to the PHY class.
+ * While at that, it also associates the device with the phy using devres.
+ * On driver detach, release function is invoked on the devres data,
+ * then, devres data is freed.
+ */
+struct phy *devm_phy_create(struct device *dev, const struct phy_ops *ops,
+	struct phy_init_data *init_data)
+{
+	struct phy **ptr, *phy;
+
+	ptr = devres_alloc(devm_phy_consume, sizeof(*ptr), GFP_KERNEL);
+	if (!ptr)
+		return ERR_PTR(-ENOMEM);
+
+	phy = phy_create(dev, ops, init_data);
+	if (!IS_ERR(phy)) {
+		*ptr = phy;
+		devres_add(dev, ptr);
+	} else {
+		devres_free(ptr);
+	}
+
+	return phy;
+}
+EXPORT_SYMBOL_GPL(devm_phy_create);
+
+/**
+ * phy_destroy() - destroy the phy
+ * @phy: the phy to be destroyed
+ *
+ * Called to destroy the phy.
+ */
+void phy_destroy(struct phy *phy)
+{
+	pm_runtime_disable(&phy->dev);
+	device_unregister(&phy->dev);
+}
+EXPORT_SYMBOL_GPL(phy_destroy);
+
+/**
+ * devm_phy_destroy() - destroy the PHY
+ * @dev: device that wants to release this phy
+ * @phy: the phy returned by devm_phy_get()
+ *
+ * destroys the devres associated with this phy and invokes phy_destroy
+ * to destroy the phy.
+ */
+void devm_phy_destroy(struct device *dev, struct phy *phy)
+{
+	int r;
+
+	r = devres_destroy(dev, devm_phy_consume, devm_phy_match, phy);
+	dev_WARN_ONCE(dev, r, "couldn't find PHY resource\n");
+}
+EXPORT_SYMBOL_GPL(devm_phy_destroy);
+
+/**
+ * __of_phy_provider_register() - create/register phy provider with the framework
+ * @dev: struct device of the phy provider
+ * @owner: the module owner containing of_xlate
+ * @of_xlate: function pointer to obtain phy instance from phy provider
+ *
+ * Creates struct phy_provider from dev and of_xlate function pointer.
+ * This is used in the case of dt boot for finding the phy instance from
+ * phy provider.
+ */
+struct phy_provider *__of_phy_provider_register(struct device *dev,
+	struct module *owner, struct phy * (*of_xlate)(struct device *dev,
+	struct of_phandle_args *args))
+{
+	struct phy_provider *phy_provider;
+
+	phy_provider = kzalloc(sizeof(*phy_provider), GFP_KERNEL);
+	if (!phy_provider)
+		return ERR_PTR(-ENOMEM);
+
+	phy_provider->dev = dev;
+	phy_provider->owner = owner;
+	phy_provider->of_xlate = of_xlate;
+
+	mutex_lock(&phy_provider_mutex);
+	list_add_tail(&phy_provider->list, &phy_provider_list);
+	mutex_unlock(&phy_provider_mutex);
+
+	return phy_provider;
+}
+EXPORT_SYMBOL_GPL(__of_phy_provider_register);
+
+/**
+ * __devm_of_phy_provider_register() - create/register phy provider with the
+ * framework
+ * @dev: struct device of the phy provider
+ * @owner: the module owner containing of_xlate
+ * @of_xlate: function pointer to obtain phy instance from phy provider
+ *
+ * Creates struct phy_provider from dev and of_xlate function pointer.
+ * This is used in the case of dt boot for finding the phy instance from
+ * phy provider. While at that, it also associates the device with the
+ * phy provider using devres. On driver detach, release function is invoked
+ * on the devres data, then, devres data is freed.
+ */
+struct phy_provider *__devm_of_phy_provider_register(struct device *dev,
+	struct module *owner, struct phy * (*of_xlate)(struct device *dev,
+	struct of_phandle_args *args))
+{
+	struct phy_provider **ptr, *phy_provider;
+
+	ptr = devres_alloc(devm_phy_provider_release, sizeof(*ptr), GFP_KERNEL);
+	if (!ptr)
+		return ERR_PTR(-ENOMEM);
+
+	phy_provider = __of_phy_provider_register(dev, owner, of_xlate);
+	if (!IS_ERR(phy_provider)) {
+		*ptr = phy_provider;
+		devres_add(dev, ptr);
+	} else {
+		devres_free(ptr);
+	}
+
+	return phy_provider;
+}
+EXPORT_SYMBOL_GPL(__devm_of_phy_provider_register);
+
+/**
+ * of_phy_provider_unregister() - unregister phy provider from the framework
+ * @phy_provider: phy provider returned by of_phy_provider_register()
+ *
+ * Removes the phy_provider created using of_phy_provider_register().
+ */
+void of_phy_provider_unregister(struct phy_provider *phy_provider)
+{
+	if (IS_ERR(phy_provider))
+		return;
+
+	mutex_lock(&phy_provider_mutex);
+	list_del(&phy_provider->list);
+	kfree(phy_provider);
+	mutex_unlock(&phy_provider_mutex);
+}
+EXPORT_SYMBOL_GPL(of_phy_provider_unregister);
+
+/**
+ * devm_of_phy_provider_unregister() - remove phy provider from the framework
+ * @dev: struct device of the phy provider
+ *
+ * destroys the devres associated with this phy provider and invokes
+ * of_phy_provider_unregister to unregister the phy provider.
+ */
+void devm_of_phy_provider_unregister(struct device *dev,
+	struct phy_provider *phy_provider) {
+	int r;
+
+	r = devres_destroy(dev, devm_phy_provider_release, devm_phy_match,
+		phy_provider);
+	dev_WARN_ONCE(dev, r, "couldn't find PHY provider device resource\n");
+}
+EXPORT_SYMBOL_GPL(devm_of_phy_provider_unregister);
+
+/**
+ * phy_release() - release the phy
+ * @dev: the dev member within phy
+ *
+ * When the last reference to the device is removed, it is called
+ * from the embedded kobject as release method.
+ */
+static void phy_release(struct device *dev)
+{
+	struct phy *phy;
+
+	phy = to_phy(dev);
+	dev_vdbg(dev, "releasing '%s'\n", dev_name(dev));
+	ida_remove(&phy_ida, phy->id);
+	kfree(phy);
+}
+
+static int __init phy_core_init(void)
+{
+	phy_class = class_create(THIS_MODULE, "phy");
+	if (IS_ERR(phy_class)) {
+		pr_err("failed to create phy class --> %ld\n",
+			PTR_ERR(phy_class));
+		return PTR_ERR(phy_class);
+	}
+
+	phy_class->dev_release = phy_release;
+
+	return 0;
+}
+module_init(phy_core_init);
+
+static void __exit phy_core_exit(void)
+{
+	class_destroy(phy_class);
+}
+module_exit(phy_core_exit);
+
+MODULE_DESCRIPTION("Generic PHY Framework");
+MODULE_AUTHOR("Kishon Vijay Abraham I <kishon@ti.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/phy/phy.h b/include/linux/phy/phy.h
new file mode 100644
index 000000000000..6d722695e027
--- /dev/null
+++ b/include/linux/phy/phy.h
@@ -0,0 +1,270 @@
+/*
+ * phy.h -- generic phy header file
+ *
+ * Copyright (C) 2013 Texas Instruments Incorporated - http://www.ti.com
+ *
+ * Author: Kishon Vijay Abraham I <kishon@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef __DRIVERS_PHY_H
+#define __DRIVERS_PHY_H
+
+#include <linux/err.h>
+#include <linux/of.h>
+#include <linux/device.h>
+#include <linux/pm_runtime.h>
+
+struct phy;
+
+/**
+ * struct phy_ops - set of function pointers for performing phy operations
+ * @init: operation to be performed for initializing phy
+ * @exit: operation to be performed while exiting
+ * @power_on: powering on the phy
+ * @power_off: powering off the phy
+ * @owner: the module owner containing the ops
+ */
+struct phy_ops {
+	int	(*init)(struct phy *phy);
+	int	(*exit)(struct phy *phy);
+	int	(*power_on)(struct phy *phy);
+	int	(*power_off)(struct phy *phy);
+	struct module *owner;
+};
+
+/**
+ * struct phy - represents the phy device
+ * @dev: phy device
+ * @id: id of the phy device
+ * @ops: function pointers for performing phy operations
+ * @init_data: list of PHY consumers (non-dt only)
+ * @mutex: mutex to protect phy_ops
+ * @init_count: used to protect when the PHY is used by multiple consumers
+ * @power_count: used to protect when the PHY is used by multiple consumers
+ */
+struct phy {
+	struct device		dev;
+	int			id;
+	const struct phy_ops	*ops;
+	struct phy_init_data	*init_data;
+	struct mutex		mutex;
+	int			init_count;
+	int			power_count;
+};
+
+/**
+ * struct phy_provider - represents the phy provider
+ * @dev: phy provider device
+ * @owner: the module owner having of_xlate
+ * @of_xlate: function pointer to obtain phy instance from phy pointer
+ * @list: to maintain a linked list of PHY providers
+ */
+struct phy_provider {
+	struct device		*dev;
+	struct module		*owner;
+	struct list_head	list;
+	struct phy * (*of_xlate)(struct device *dev,
+		struct of_phandle_args *args);
+};
+
+/**
+ * struct phy_consumer - represents the phy consumer
+ * @dev_name: the device name of the controller that will use this PHY device
+ * @port: name given to the consumer port
+ */
+struct phy_consumer {
+	const char *dev_name;
+	const char *port;
+};
+
+/**
+ * struct phy_init_data - contains the list of PHY consumers
+ * @num_consumers: number of consumers for this PHY device
+ * @consumers: list of PHY consumers
+ */
+struct phy_init_data {
+	unsigned int num_consumers;
+	struct phy_consumer *consumers;
+};
+
+#define PHY_CONSUMER(_dev_name, _port)				\
+{								\
+	.dev_name	= _dev_name,				\
+	.port		= _port,				\
+}
+
+#define	to_phy(dev)	(container_of((dev), struct phy, dev))
+
+#define	of_phy_provider_register(dev, xlate)	\
+	__of_phy_provider_register((dev), THIS_MODULE, (xlate))
+
+#define	devm_of_phy_provider_register(dev, xlate)	\
+	__devm_of_phy_provider_register((dev), THIS_MODULE, (xlate))
+
+static inline void phy_set_drvdata(struct phy *phy, void *data)
+{
+	dev_set_drvdata(&phy->dev, data);
+}
+
+static inline void *phy_get_drvdata(struct phy *phy)
+{
+	return dev_get_drvdata(&phy->dev);
+}
+
+#if IS_ENABLED(CONFIG_GENERIC_PHY)
+int phy_pm_runtime_get(struct phy *phy);
+int phy_pm_runtime_get_sync(struct phy *phy);
+int phy_pm_runtime_put(struct phy *phy);
+int phy_pm_runtime_put_sync(struct phy *phy);
+void phy_pm_runtime_allow(struct phy *phy);
+void phy_pm_runtime_forbid(struct phy *phy);
+int phy_init(struct phy *phy);
+int phy_exit(struct phy *phy);
+int phy_power_on(struct phy *phy);
+int phy_power_off(struct phy *phy);
+struct phy *phy_get(struct device *dev, const char *string);
+struct phy *devm_phy_get(struct device *dev, const char *string);
+void phy_put(struct phy *phy);
+void devm_phy_put(struct device *dev, struct phy *phy);
+struct phy *of_phy_simple_xlate(struct device *dev,
+	struct of_phandle_args *args);
+struct phy *phy_create(struct device *dev, const struct phy_ops *ops,
+	struct phy_init_data *init_data);
+struct phy *devm_phy_create(struct device *dev,
+	const struct phy_ops *ops, struct phy_init_data *init_data);
+void phy_destroy(struct phy *phy);
+void devm_phy_destroy(struct device *dev, struct phy *phy);
+struct phy_provider *__of_phy_provider_register(struct device *dev,
+	struct module *owner, struct phy * (*of_xlate)(struct device *dev,
+	struct of_phandle_args *args));
+struct phy_provider *__devm_of_phy_provider_register(struct device *dev,
+	struct module *owner, struct phy * (*of_xlate)(struct device *dev,
+	struct of_phandle_args *args));
+void of_phy_provider_unregister(struct phy_provider *phy_provider);
+void devm_of_phy_provider_unregister(struct device *dev,
+	struct phy_provider *phy_provider);
+#else
+static inline int phy_pm_runtime_get(struct phy *phy)
+{
+	return -ENOSYS;
+}
+
+static inline int phy_pm_runtime_get_sync(struct phy *phy)
+{
+	return -ENOSYS;
+}
+
+static inline int phy_pm_runtime_put(struct phy *phy)
+{
+	return -ENOSYS;
+}
+
+static inline int phy_pm_runtime_put_sync(struct phy *phy)
+{
+	return -ENOSYS;
+}
+
+static inline void phy_pm_runtime_allow(struct phy *phy)
+{
+	return;
+}
+
+static inline void phy_pm_runtime_forbid(struct phy *phy)
+{
+	return;
+}
+
+static inline int phy_init(struct phy *phy)
+{
+	return -ENOSYS;
+}
+
+static inline int phy_exit(struct phy *phy)
+{
+	return -ENOSYS;
+}
+
+static inline int phy_power_on(struct phy *phy)
+{
+	return -ENOSYS;
+}
+
+static inline int phy_power_off(struct phy *phy)
+{
+	return -ENOSYS;
+}
+
+static inline struct phy *phy_get(struct device *dev, const char *string)
+{
+	return ERR_PTR(-ENOSYS);
+}
+
+static inline struct phy *devm_phy_get(struct device *dev, const char *string)
+{
+	return ERR_PTR(-ENOSYS);
+}
+
+static inline void phy_put(struct phy *phy)
+{
+}
+
+static inline void devm_phy_put(struct device *dev, struct phy *phy)
+{
+}
+
+static inline struct phy *of_phy_simple_xlate(struct device *dev,
+	struct of_phandle_args *args)
+{
+	return ERR_PTR(-ENOSYS);
+}
+
+static inline struct phy *phy_create(struct device *dev,
+	const struct phy_ops *ops, struct phy_init_data *init_data)
+{
+	return ERR_PTR(-ENOSYS);
+}
+
+static inline struct phy *devm_phy_create(struct device *dev,
+	const struct phy_ops *ops, struct phy_init_data *init_data)
+{
+	return ERR_PTR(-ENOSYS);
+}
+
+static inline void phy_destroy(struct phy *phy)
+{
+}
+
+static inline void devm_phy_destroy(struct device *dev, struct phy *phy)
+{
+}
+
+static inline struct phy_provider *__of_phy_provider_register(
+	struct device *dev, struct module *owner, struct phy * (*of_xlate)(
+	struct device *dev, struct of_phandle_args *args))
+{
+	return ERR_PTR(-ENOSYS);
+}
+
+static inline struct phy_provider *__devm_of_phy_provider_register(struct device
+	*dev, struct module *owner, struct phy * (*of_xlate)(struct device *dev,
+	struct of_phandle_args *args))
+{
+	return ERR_PTR(-ENOSYS);
+}
+
+static inline void of_phy_provider_unregister(struct phy_provider *phy_provider)
+{
+}
+
+static inline void devm_of_phy_provider_unregister(struct device *dev,
+	struct phy_provider *phy_provider)
+{
+}
+#endif
+
+#endif /* __DRIVERS_PHY_H */
-- 
cgit v1.2.3


From 6747caa76cab1150c60a772cf64f8cd47fa19d39 Mon Sep 17 00:00:00 2001
From: Kishon Vijay Abraham I <kishon@ti.com>
Date: Fri, 27 Sep 2013 11:53:27 +0530
Subject: usb: phy: twl4030: use the new generic PHY framework

Used the generic PHY framework API to create the PHY. For powering on
and powering off the PHY, power_on and power_off ops are used. Once the
MUSB OMAP glue is adapted to the new framework, the suspend and resume
ops of usb phy library will be removed. Also twl4030-usb driver is moved
to drivers/phy/.

However using the old usb phy library cannot be completely removed
because otg is intertwined with phy and moving to the new
framework completely will break otg. Once we have a separate otg state machine,
we can get rid of the usb phy library.

Signed-off-by: Kishon Vijay Abraham I <kishon@ti.com>
Acked-by: Felipe Balbi <balbi@ti.com>
Reviewed-by: Sylwester Nawrocki <s.nawrocki@samsung.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/phy/Kconfig               |  11 +
 drivers/phy/Makefile              |   1 +
 drivers/phy/phy-twl4030-usb.c     | 844 ++++++++++++++++++++++++++++++++++++++
 drivers/usb/phy/Kconfig           |  10 -
 drivers/usb/phy/Makefile          |   1 -
 drivers/usb/phy/phy-twl4030-usb.c | 794 -----------------------------------
 include/linux/i2c/twl.h           |   2 +
 7 files changed, 858 insertions(+), 805 deletions(-)
 create mode 100644 drivers/phy/phy-twl4030-usb.c
 delete mode 100644 drivers/usb/phy/phy-twl4030-usb.c

(limited to 'include/linux')

diff --git a/drivers/phy/Kconfig b/drivers/phy/Kconfig
index 38c3477ead4c..ac239aca77ec 100644
--- a/drivers/phy/Kconfig
+++ b/drivers/phy/Kconfig
@@ -27,4 +27,15 @@ config OMAP_USB2
 	  The USB OTG controller communicates with the comparator using this
 	  driver.
 
+config TWL4030_USB
+	tristate "TWL4030 USB Transceiver Driver"
+	depends on TWL4030_CORE && REGULATOR_TWL4030 && USB_MUSB_OMAP2PLUS
+	select GENERIC_PHY
+	select USB_PHY
+	help
+	  Enable this to support the USB OTG transceiver on TWL4030
+	  family chips (including the TWL5030 and TPS659x0 devices).
+	  This transceiver supports high and full speed devices plus,
+	  in host mode, low speed.
+
 endmenu
diff --git a/drivers/phy/Makefile b/drivers/phy/Makefile
index ed5b088abaee..0dd8a9834548 100644
--- a/drivers/phy/Makefile
+++ b/drivers/phy/Makefile
@@ -4,3 +4,4 @@
 
 obj-$(CONFIG_GENERIC_PHY)	+= phy-core.o
 obj-$(CONFIG_OMAP_USB2)		+= phy-omap-usb2.o
+obj-$(CONFIG_TWL4030_USB)	+= phy-twl4030-usb.o
diff --git a/drivers/phy/phy-twl4030-usb.c b/drivers/phy/phy-twl4030-usb.c
new file mode 100644
index 000000000000..d02913f9a6b1
--- /dev/null
+++ b/drivers/phy/phy-twl4030-usb.c
@@ -0,0 +1,844 @@
+/*
+ * twl4030_usb - TWL4030 USB transceiver, talking to OMAP OTG controller
+ *
+ * Copyright (C) 2004-2007 Texas Instruments
+ * Copyright (C) 2008 Nokia Corporation
+ * Contact: Felipe Balbi <felipe.balbi@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Current status:
+ *	- HS USB ULPI mode works.
+ *	- 3-pin mode support may be added in future.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/spinlock.h>
+#include <linux/workqueue.h>
+#include <linux/io.h>
+#include <linux/delay.h>
+#include <linux/usb/otg.h>
+#include <linux/phy/phy.h>
+#include <linux/usb/musb-omap.h>
+#include <linux/usb/ulpi.h>
+#include <linux/i2c/twl.h>
+#include <linux/regulator/consumer.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+
+/* Register defines */
+
+#define MCPC_CTRL			0x30
+#define MCPC_CTRL_RTSOL			(1 << 7)
+#define MCPC_CTRL_EXTSWR		(1 << 6)
+#define MCPC_CTRL_EXTSWC		(1 << 5)
+#define MCPC_CTRL_VOICESW		(1 << 4)
+#define MCPC_CTRL_OUT64K		(1 << 3)
+#define MCPC_CTRL_RTSCTSSW		(1 << 2)
+#define MCPC_CTRL_HS_UART		(1 << 0)
+
+#define MCPC_IO_CTRL			0x33
+#define MCPC_IO_CTRL_MICBIASEN		(1 << 5)
+#define MCPC_IO_CTRL_CTS_NPU		(1 << 4)
+#define MCPC_IO_CTRL_RXD_PU		(1 << 3)
+#define MCPC_IO_CTRL_TXDTYP		(1 << 2)
+#define MCPC_IO_CTRL_CTSTYP		(1 << 1)
+#define MCPC_IO_CTRL_RTSTYP		(1 << 0)
+
+#define MCPC_CTRL2			0x36
+#define MCPC_CTRL2_MCPC_CK_EN		(1 << 0)
+
+#define OTHER_FUNC_CTRL			0x80
+#define OTHER_FUNC_CTRL_BDIS_ACON_EN	(1 << 4)
+#define OTHER_FUNC_CTRL_FIVEWIRE_MODE	(1 << 2)
+
+#define OTHER_IFC_CTRL			0x83
+#define OTHER_IFC_CTRL_OE_INT_EN	(1 << 6)
+#define OTHER_IFC_CTRL_CEA2011_MODE	(1 << 5)
+#define OTHER_IFC_CTRL_FSLSSERIALMODE_4PIN	(1 << 4)
+#define OTHER_IFC_CTRL_HIZ_ULPI_60MHZ_OUT	(1 << 3)
+#define OTHER_IFC_CTRL_HIZ_ULPI		(1 << 2)
+#define OTHER_IFC_CTRL_ALT_INT_REROUTE	(1 << 0)
+
+#define OTHER_INT_EN_RISE		0x86
+#define OTHER_INT_EN_FALL		0x89
+#define OTHER_INT_STS			0x8C
+#define OTHER_INT_LATCH			0x8D
+#define OTHER_INT_VB_SESS_VLD		(1 << 7)
+#define OTHER_INT_DM_HI			(1 << 6) /* not valid for "latch" reg */
+#define OTHER_INT_DP_HI			(1 << 5) /* not valid for "latch" reg */
+#define OTHER_INT_BDIS_ACON		(1 << 3) /* not valid for "fall" regs */
+#define OTHER_INT_MANU			(1 << 1)
+#define OTHER_INT_ABNORMAL_STRESS	(1 << 0)
+
+#define ID_STATUS			0x96
+#define ID_RES_FLOAT			(1 << 4)
+#define ID_RES_440K			(1 << 3)
+#define ID_RES_200K			(1 << 2)
+#define ID_RES_102K			(1 << 1)
+#define ID_RES_GND			(1 << 0)
+
+#define POWER_CTRL			0xAC
+#define POWER_CTRL_OTG_ENAB		(1 << 5)
+
+#define OTHER_IFC_CTRL2			0xAF
+#define OTHER_IFC_CTRL2_ULPI_STP_LOW	(1 << 4)
+#define OTHER_IFC_CTRL2_ULPI_TXEN_POL	(1 << 3)
+#define OTHER_IFC_CTRL2_ULPI_4PIN_2430	(1 << 2)
+#define OTHER_IFC_CTRL2_USB_INT_OUTSEL_MASK	(3 << 0) /* bits 0 and 1 */
+#define OTHER_IFC_CTRL2_USB_INT_OUTSEL_INT1N	(0 << 0)
+#define OTHER_IFC_CTRL2_USB_INT_OUTSEL_INT2N	(1 << 0)
+
+#define REG_CTRL_EN			0xB2
+#define REG_CTRL_ERROR			0xB5
+#define ULPI_I2C_CONFLICT_INTEN		(1 << 0)
+
+#define OTHER_FUNC_CTRL2		0xB8
+#define OTHER_FUNC_CTRL2_VBAT_TIMER_EN	(1 << 0)
+
+/* following registers do not have separate _clr and _set registers */
+#define VBUS_DEBOUNCE			0xC0
+#define ID_DEBOUNCE			0xC1
+#define VBAT_TIMER			0xD3
+#define PHY_PWR_CTRL			0xFD
+#define PHY_PWR_PHYPWD			(1 << 0)
+#define PHY_CLK_CTRL			0xFE
+#define PHY_CLK_CTRL_CLOCKGATING_EN	(1 << 2)
+#define PHY_CLK_CTRL_CLK32K_EN		(1 << 1)
+#define REQ_PHY_DPLL_CLK		(1 << 0)
+#define PHY_CLK_CTRL_STS		0xFF
+#define PHY_DPLL_CLK			(1 << 0)
+
+/* In module TWL_MODULE_PM_MASTER */
+#define STS_HW_CONDITIONS		0x0F
+
+/* In module TWL_MODULE_PM_RECEIVER */
+#define VUSB_DEDICATED1			0x7D
+#define VUSB_DEDICATED2			0x7E
+#define VUSB1V5_DEV_GRP			0x71
+#define VUSB1V5_TYPE			0x72
+#define VUSB1V5_REMAP			0x73
+#define VUSB1V8_DEV_GRP			0x74
+#define VUSB1V8_TYPE			0x75
+#define VUSB1V8_REMAP			0x76
+#define VUSB3V1_DEV_GRP			0x77
+#define VUSB3V1_TYPE			0x78
+#define VUSB3V1_REMAP			0x79
+
+/* In module TWL4030_MODULE_INTBR */
+#define PMBR1				0x0D
+#define GPIO_USB_4PIN_ULPI_2430C	(3 << 0)
+
+struct twl4030_usb {
+	struct usb_phy		phy;
+	struct device		*dev;
+
+	/* TWL4030 internal USB regulator supplies */
+	struct regulator	*usb1v5;
+	struct regulator	*usb1v8;
+	struct regulator	*usb3v1;
+
+	/* for vbus reporting with irqs disabled */
+	spinlock_t		lock;
+
+	/* pin configuration */
+	enum twl4030_usb_mode	usb_mode;
+
+	int			irq;
+	enum omap_musb_vbus_id_status linkstat;
+	bool			vbus_supplied;
+	u8			asleep;
+	bool			irq_enabled;
+
+	struct delayed_work	id_workaround_work;
+};
+
+/* internal define on top of container_of */
+#define phy_to_twl(x)		container_of((x), struct twl4030_usb, phy)
+
+/*-------------------------------------------------------------------------*/
+
+static int twl4030_i2c_write_u8_verify(struct twl4030_usb *twl,
+		u8 module, u8 data, u8 address)
+{
+	u8 check;
+
+	if ((twl_i2c_write_u8(module, data, address) >= 0) &&
+	    (twl_i2c_read_u8(module, &check, address) >= 0) &&
+						(check == data))
+		return 0;
+	dev_dbg(twl->dev, "Write%d[%d,0x%x] wrote %02x but read %02x\n",
+			1, module, address, check, data);
+
+	/* Failed once: Try again */
+	if ((twl_i2c_write_u8(module, data, address) >= 0) &&
+	    (twl_i2c_read_u8(module, &check, address) >= 0) &&
+						(check == data))
+		return 0;
+	dev_dbg(twl->dev, "Write%d[%d,0x%x] wrote %02x but read %02x\n",
+			2, module, address, check, data);
+
+	/* Failed again: Return error */
+	return -EBUSY;
+}
+
+#define twl4030_usb_write_verify(twl, address, data)	\
+	twl4030_i2c_write_u8_verify(twl, TWL_MODULE_USB, (data), (address))
+
+static inline int twl4030_usb_write(struct twl4030_usb *twl,
+		u8 address, u8 data)
+{
+	int ret = 0;
+
+	ret = twl_i2c_write_u8(TWL_MODULE_USB, data, address);
+	if (ret < 0)
+		dev_dbg(twl->dev,
+			"TWL4030:USB:Write[0x%x] Error %d\n", address, ret);
+	return ret;
+}
+
+static inline int twl4030_readb(struct twl4030_usb *twl, u8 module, u8 address)
+{
+	u8 data;
+	int ret = 0;
+
+	ret = twl_i2c_read_u8(module, &data, address);
+	if (ret >= 0)
+		ret = data;
+	else
+		dev_dbg(twl->dev,
+			"TWL4030:readb[0x%x,0x%x] Error %d\n",
+					module, address, ret);
+
+	return ret;
+}
+
+static inline int twl4030_usb_read(struct twl4030_usb *twl, u8 address)
+{
+	return twl4030_readb(twl, TWL_MODULE_USB, address);
+}
+
+/*-------------------------------------------------------------------------*/
+
+static inline int
+twl4030_usb_set_bits(struct twl4030_usb *twl, u8 reg, u8 bits)
+{
+	return twl4030_usb_write(twl, ULPI_SET(reg), bits);
+}
+
+static inline int
+twl4030_usb_clear_bits(struct twl4030_usb *twl, u8 reg, u8 bits)
+{
+	return twl4030_usb_write(twl, ULPI_CLR(reg), bits);
+}
+
+/*-------------------------------------------------------------------------*/
+
+static bool twl4030_is_driving_vbus(struct twl4030_usb *twl)
+{
+	int ret;
+
+	ret = twl4030_usb_read(twl, PHY_CLK_CTRL_STS);
+	if (ret < 0 || !(ret & PHY_DPLL_CLK))
+		/*
+		 * if clocks are off, registers are not updated,
+		 * but we can assume we don't drive VBUS in this case
+		 */
+		return false;
+
+	ret = twl4030_usb_read(twl, ULPI_OTG_CTRL);
+	if (ret < 0)
+		return false;
+
+	return (ret & (ULPI_OTG_DRVVBUS | ULPI_OTG_CHRGVBUS)) ? true : false;
+}
+
+static enum omap_musb_vbus_id_status
+	twl4030_usb_linkstat(struct twl4030_usb *twl)
+{
+	int	status;
+	enum omap_musb_vbus_id_status linkstat = OMAP_MUSB_UNKNOWN;
+
+	twl->vbus_supplied = false;
+
+	/*
+	 * For ID/VBUS sensing, see manual section 15.4.8 ...
+	 * except when using only battery backup power, two
+	 * comparators produce VBUS_PRES and ID_PRES signals,
+	 * which don't match docs elsewhere.  But ... BIT(7)
+	 * and BIT(2) of STS_HW_CONDITIONS, respectively, do
+	 * seem to match up.  If either is true the USB_PRES
+	 * signal is active, the OTG module is activated, and
+	 * its interrupt may be raised (may wake the system).
+	 */
+	status = twl4030_readb(twl, TWL_MODULE_PM_MASTER, STS_HW_CONDITIONS);
+	if (status < 0)
+		dev_err(twl->dev, "USB link status err %d\n", status);
+	else if (status & (BIT(7) | BIT(2))) {
+		if (status & BIT(7)) {
+			if (twl4030_is_driving_vbus(twl))
+				status &= ~BIT(7);
+			else
+				twl->vbus_supplied = true;
+		}
+
+		if (status & BIT(2))
+			linkstat = OMAP_MUSB_ID_GROUND;
+		else if (status & BIT(7))
+			linkstat = OMAP_MUSB_VBUS_VALID;
+		else
+			linkstat = OMAP_MUSB_VBUS_OFF;
+	} else {
+		if (twl->linkstat != OMAP_MUSB_UNKNOWN)
+			linkstat = OMAP_MUSB_VBUS_OFF;
+	}
+
+	dev_dbg(twl->dev, "HW_CONDITIONS 0x%02x/%d; link %d\n",
+			status, status, linkstat);
+
+	/* REVISIT this assumes host and peripheral controllers
+	 * are registered, and that both are active...
+	 */
+
+	return linkstat;
+}
+
+static void twl4030_usb_set_mode(struct twl4030_usb *twl, int mode)
+{
+	twl->usb_mode = mode;
+
+	switch (mode) {
+	case T2_USB_MODE_ULPI:
+		twl4030_usb_clear_bits(twl, ULPI_IFC_CTRL,
+					ULPI_IFC_CTRL_CARKITMODE);
+		twl4030_usb_set_bits(twl, POWER_CTRL, POWER_CTRL_OTG_ENAB);
+		twl4030_usb_clear_bits(twl, ULPI_FUNC_CTRL,
+					ULPI_FUNC_CTRL_XCVRSEL_MASK |
+					ULPI_FUNC_CTRL_OPMODE_MASK);
+		break;
+	case -1:
+		/* FIXME: power on defaults */
+		break;
+	default:
+		dev_err(twl->dev, "unsupported T2 transceiver mode %d\n",
+				mode);
+		break;
+	};
+}
+
+static void twl4030_i2c_access(struct twl4030_usb *twl, int on)
+{
+	unsigned long timeout;
+	int val = twl4030_usb_read(twl, PHY_CLK_CTRL);
+
+	if (val >= 0) {
+		if (on) {
+			/* enable DPLL to access PHY registers over I2C */
+			val |= REQ_PHY_DPLL_CLK;
+			WARN_ON(twl4030_usb_write_verify(twl, PHY_CLK_CTRL,
+						(u8)val) < 0);
+
+			timeout = jiffies + HZ;
+			while (!(twl4030_usb_read(twl, PHY_CLK_CTRL_STS) &
+							PHY_DPLL_CLK)
+				&& time_before(jiffies, timeout))
+					udelay(10);
+			if (!(twl4030_usb_read(twl, PHY_CLK_CTRL_STS) &
+							PHY_DPLL_CLK))
+				dev_err(twl->dev, "Timeout setting T2 HSUSB "
+						"PHY DPLL clock\n");
+		} else {
+			/* let ULPI control the DPLL clock */
+			val &= ~REQ_PHY_DPLL_CLK;
+			WARN_ON(twl4030_usb_write_verify(twl, PHY_CLK_CTRL,
+						(u8)val) < 0);
+		}
+	}
+}
+
+static void __twl4030_phy_power(struct twl4030_usb *twl, int on)
+{
+	u8 pwr = twl4030_usb_read(twl, PHY_PWR_CTRL);
+
+	if (on)
+		pwr &= ~PHY_PWR_PHYPWD;
+	else
+		pwr |= PHY_PWR_PHYPWD;
+
+	WARN_ON(twl4030_usb_write_verify(twl, PHY_PWR_CTRL, pwr) < 0);
+}
+
+static void twl4030_phy_power(struct twl4030_usb *twl, int on)
+{
+	int ret;
+
+	if (on) {
+		ret = regulator_enable(twl->usb3v1);
+		if (ret)
+			dev_err(twl->dev, "Failed to enable usb3v1\n");
+
+		ret = regulator_enable(twl->usb1v8);
+		if (ret)
+			dev_err(twl->dev, "Failed to enable usb1v8\n");
+
+		/*
+		 * Disabling usb3v1 regulator (= writing 0 to VUSB3V1_DEV_GRP
+		 * in twl4030) resets the VUSB_DEDICATED2 register. This reset
+		 * enables VUSB3V1_SLEEP bit that remaps usb3v1 ACTIVE state to
+		 * SLEEP. We work around this by clearing the bit after usv3v1
+		 * is re-activated. This ensures that VUSB3V1 is really active.
+		 */
+		twl_i2c_write_u8(TWL_MODULE_PM_RECEIVER, 0, VUSB_DEDICATED2);
+
+		ret = regulator_enable(twl->usb1v5);
+		if (ret)
+			dev_err(twl->dev, "Failed to enable usb1v5\n");
+
+		__twl4030_phy_power(twl, 1);
+		twl4030_usb_write(twl, PHY_CLK_CTRL,
+				  twl4030_usb_read(twl, PHY_CLK_CTRL) |
+					(PHY_CLK_CTRL_CLOCKGATING_EN |
+						PHY_CLK_CTRL_CLK32K_EN));
+	} else {
+		__twl4030_phy_power(twl, 0);
+		regulator_disable(twl->usb1v5);
+		regulator_disable(twl->usb1v8);
+		regulator_disable(twl->usb3v1);
+	}
+}
+
+static void twl4030_phy_suspend(struct twl4030_usb *twl, int controller_off)
+{
+	if (twl->asleep)
+		return;
+
+	twl4030_phy_power(twl, 0);
+	twl->asleep = 1;
+	dev_dbg(twl->dev, "%s\n", __func__);
+}
+
+static int twl4030_phy_power_off(struct phy *phy)
+{
+	struct twl4030_usb *twl = phy_get_drvdata(phy);
+
+	twl4030_phy_suspend(twl, 0);
+	return 0;
+}
+
+static void __twl4030_phy_resume(struct twl4030_usb *twl)
+{
+	twl4030_phy_power(twl, 1);
+	twl4030_i2c_access(twl, 1);
+	twl4030_usb_set_mode(twl, twl->usb_mode);
+	if (twl->usb_mode == T2_USB_MODE_ULPI)
+		twl4030_i2c_access(twl, 0);
+}
+
+static void twl4030_phy_resume(struct twl4030_usb *twl)
+{
+	if (!twl->asleep)
+		return;
+	__twl4030_phy_resume(twl);
+	twl->asleep = 0;
+	dev_dbg(twl->dev, "%s\n", __func__);
+
+	/*
+	 * XXX When VBUS gets driven after musb goes to A mode,
+	 * ID_PRES related interrupts no longer arrive, why?
+	 * Register itself is updated fine though, so we must poll.
+	 */
+	if (twl->linkstat == OMAP_MUSB_ID_GROUND) {
+		cancel_delayed_work(&twl->id_workaround_work);
+		schedule_delayed_work(&twl->id_workaround_work, HZ);
+	}
+}
+
+static int twl4030_phy_power_on(struct phy *phy)
+{
+	struct twl4030_usb *twl = phy_get_drvdata(phy);
+
+	twl4030_phy_resume(twl);
+	return 0;
+}
+
+static int twl4030_usb_ldo_init(struct twl4030_usb *twl)
+{
+	/* Enable writing to power configuration registers */
+	twl_i2c_write_u8(TWL_MODULE_PM_MASTER, TWL4030_PM_MASTER_KEY_CFG1,
+			 TWL4030_PM_MASTER_PROTECT_KEY);
+
+	twl_i2c_write_u8(TWL_MODULE_PM_MASTER, TWL4030_PM_MASTER_KEY_CFG2,
+			 TWL4030_PM_MASTER_PROTECT_KEY);
+
+	/* Keep VUSB3V1 LDO in sleep state until VBUS/ID change detected*/
+	/*twl_i2c_write_u8(TWL_MODULE_PM_RECEIVER, 0, VUSB_DEDICATED2);*/
+
+	/* input to VUSB3V1 LDO is from VBAT, not VBUS */
+	twl_i2c_write_u8(TWL_MODULE_PM_RECEIVER, 0x14, VUSB_DEDICATED1);
+
+	/* Initialize 3.1V regulator */
+	twl_i2c_write_u8(TWL_MODULE_PM_RECEIVER, 0, VUSB3V1_DEV_GRP);
+
+	twl->usb3v1 = devm_regulator_get(twl->dev, "usb3v1");
+	if (IS_ERR(twl->usb3v1))
+		return -ENODEV;
+
+	twl_i2c_write_u8(TWL_MODULE_PM_RECEIVER, 0, VUSB3V1_TYPE);
+
+	/* Initialize 1.5V regulator */
+	twl_i2c_write_u8(TWL_MODULE_PM_RECEIVER, 0, VUSB1V5_DEV_GRP);
+
+	twl->usb1v5 = devm_regulator_get(twl->dev, "usb1v5");
+	if (IS_ERR(twl->usb1v5))
+		return -ENODEV;
+
+	twl_i2c_write_u8(TWL_MODULE_PM_RECEIVER, 0, VUSB1V5_TYPE);
+
+	/* Initialize 1.8V regulator */
+	twl_i2c_write_u8(TWL_MODULE_PM_RECEIVER, 0, VUSB1V8_DEV_GRP);
+
+	twl->usb1v8 = devm_regulator_get(twl->dev, "usb1v8");
+	if (IS_ERR(twl->usb1v8))
+		return -ENODEV;
+
+	twl_i2c_write_u8(TWL_MODULE_PM_RECEIVER, 0, VUSB1V8_TYPE);
+
+	/* disable access to power configuration registers */
+	twl_i2c_write_u8(TWL_MODULE_PM_MASTER, 0,
+			 TWL4030_PM_MASTER_PROTECT_KEY);
+
+	return 0;
+}
+
+static ssize_t twl4030_usb_vbus_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct twl4030_usb *twl = dev_get_drvdata(dev);
+	unsigned long flags;
+	int ret = -EINVAL;
+
+	spin_lock_irqsave(&twl->lock, flags);
+	ret = sprintf(buf, "%s\n",
+			twl->vbus_supplied ? "on" : "off");
+	spin_unlock_irqrestore(&twl->lock, flags);
+
+	return ret;
+}
+static DEVICE_ATTR(vbus, 0444, twl4030_usb_vbus_show, NULL);
+
+static irqreturn_t twl4030_usb_irq(int irq, void *_twl)
+{
+	struct twl4030_usb *twl = _twl;
+	enum omap_musb_vbus_id_status status;
+	bool status_changed = false;
+
+	status = twl4030_usb_linkstat(twl);
+
+	spin_lock_irq(&twl->lock);
+	if (status >= 0 && status != twl->linkstat) {
+		twl->linkstat = status;
+		status_changed = true;
+	}
+	spin_unlock_irq(&twl->lock);
+
+	if (status_changed) {
+		/* FIXME add a set_power() method so that B-devices can
+		 * configure the charger appropriately.  It's not always
+		 * correct to consume VBUS power, and how much current to
+		 * consume is a function of the USB configuration chosen
+		 * by the host.
+		 *
+		 * REVISIT usb_gadget_vbus_connect(...) as needed, ditto
+		 * its disconnect() sibling, when changing to/from the
+		 * USB_LINK_VBUS state.  musb_hdrc won't care until it
+		 * starts to handle softconnect right.
+		 */
+		omap_musb_mailbox(status);
+	}
+	sysfs_notify(&twl->dev->kobj, NULL, "vbus");
+
+	return IRQ_HANDLED;
+}
+
+static void twl4030_id_workaround_work(struct work_struct *work)
+{
+	struct twl4030_usb *twl = container_of(work, struct twl4030_usb,
+		id_workaround_work.work);
+	enum omap_musb_vbus_id_status status;
+	bool status_changed = false;
+
+	status = twl4030_usb_linkstat(twl);
+
+	spin_lock_irq(&twl->lock);
+	if (status >= 0 && status != twl->linkstat) {
+		twl->linkstat = status;
+		status_changed = true;
+	}
+	spin_unlock_irq(&twl->lock);
+
+	if (status_changed) {
+		dev_dbg(twl->dev, "handle missing status change to %d\n",
+				status);
+		omap_musb_mailbox(status);
+	}
+
+	/* don't schedule during sleep - irq works right then */
+	if (status == OMAP_MUSB_ID_GROUND && !twl->asleep) {
+		cancel_delayed_work(&twl->id_workaround_work);
+		schedule_delayed_work(&twl->id_workaround_work, HZ);
+	}
+}
+
+static int twl4030_usb_phy_init(struct usb_phy *phy)
+{
+	struct twl4030_usb *twl = phy_to_twl(phy);
+	enum omap_musb_vbus_id_status status;
+
+	/*
+	 * Start in sleep state, we'll get called through set_suspend()
+	 * callback when musb is runtime resumed and it's time to start.
+	 */
+	__twl4030_phy_power(twl, 0);
+	twl->asleep = 1;
+
+	status = twl4030_usb_linkstat(twl);
+	twl->linkstat = status;
+
+	if (status == OMAP_MUSB_ID_GROUND || status == OMAP_MUSB_VBUS_VALID) {
+		omap_musb_mailbox(twl->linkstat);
+		twl4030_phy_resume(twl);
+	}
+
+	sysfs_notify(&twl->dev->kobj, NULL, "vbus");
+	return 0;
+}
+
+static int twl4030_phy_init(struct phy *phy)
+{
+	struct twl4030_usb *twl = phy_get_drvdata(phy);
+
+	return twl4030_usb_phy_init(&twl->phy);
+}
+
+static int twl4030_set_suspend(struct usb_phy *x, int suspend)
+{
+	struct twl4030_usb *twl = phy_to_twl(x);
+
+	if (suspend)
+		twl4030_phy_suspend(twl, 1);
+	else
+		twl4030_phy_resume(twl);
+
+	return 0;
+}
+
+static int twl4030_set_peripheral(struct usb_otg *otg,
+					struct usb_gadget *gadget)
+{
+	if (!otg)
+		return -ENODEV;
+
+	otg->gadget = gadget;
+	if (!gadget)
+		otg->phy->state = OTG_STATE_UNDEFINED;
+
+	return 0;
+}
+
+static int twl4030_set_host(struct usb_otg *otg, struct usb_bus *host)
+{
+	if (!otg)
+		return -ENODEV;
+
+	otg->host = host;
+	if (!host)
+		otg->phy->state = OTG_STATE_UNDEFINED;
+
+	return 0;
+}
+
+static const struct phy_ops ops = {
+	.init		= twl4030_phy_init,
+	.power_on	= twl4030_phy_power_on,
+	.power_off	= twl4030_phy_power_off,
+	.owner		= THIS_MODULE,
+};
+
+static int twl4030_usb_probe(struct platform_device *pdev)
+{
+	struct twl4030_usb_data *pdata = dev_get_platdata(&pdev->dev);
+	struct twl4030_usb	*twl;
+	struct phy		*phy;
+	int			status, err;
+	struct usb_otg		*otg;
+	struct device_node	*np = pdev->dev.of_node;
+	struct phy_provider	*phy_provider;
+	struct phy_init_data	*init_data = NULL;
+
+	twl = devm_kzalloc(&pdev->dev, sizeof *twl, GFP_KERNEL);
+	if (!twl)
+		return -ENOMEM;
+
+	if (np)
+		of_property_read_u32(np, "usb_mode",
+				(enum twl4030_usb_mode *)&twl->usb_mode);
+	else if (pdata) {
+		twl->usb_mode = pdata->usb_mode;
+		init_data = pdata->init_data;
+	} else {
+		dev_err(&pdev->dev, "twl4030 initialized without pdata\n");
+		return -EINVAL;
+	}
+
+	otg = devm_kzalloc(&pdev->dev, sizeof *otg, GFP_KERNEL);
+	if (!otg)
+		return -ENOMEM;
+
+	twl->dev		= &pdev->dev;
+	twl->irq		= platform_get_irq(pdev, 0);
+	twl->vbus_supplied	= false;
+	twl->asleep		= 1;
+	twl->linkstat		= OMAP_MUSB_UNKNOWN;
+
+	twl->phy.dev		= twl->dev;
+	twl->phy.label		= "twl4030";
+	twl->phy.otg		= otg;
+	twl->phy.type		= USB_PHY_TYPE_USB2;
+	twl->phy.set_suspend	= twl4030_set_suspend;
+	twl->phy.init		= twl4030_usb_phy_init;
+
+	otg->phy		= &twl->phy;
+	otg->set_host		= twl4030_set_host;
+	otg->set_peripheral	= twl4030_set_peripheral;
+
+	phy_provider = devm_of_phy_provider_register(twl->dev,
+		of_phy_simple_xlate);
+	if (IS_ERR(phy_provider))
+		return PTR_ERR(phy_provider);
+
+	phy = devm_phy_create(twl->dev, &ops, init_data);
+	if (IS_ERR(phy)) {
+		dev_dbg(&pdev->dev, "Failed to create PHY\n");
+		return PTR_ERR(phy);
+	}
+
+	phy_set_drvdata(phy, twl);
+
+	/* init spinlock for workqueue */
+	spin_lock_init(&twl->lock);
+
+	INIT_DELAYED_WORK(&twl->id_workaround_work, twl4030_id_workaround_work);
+
+	err = twl4030_usb_ldo_init(twl);
+	if (err) {
+		dev_err(&pdev->dev, "ldo init failed\n");
+		return err;
+	}
+	usb_add_phy_dev(&twl->phy);
+
+	platform_set_drvdata(pdev, twl);
+	if (device_create_file(&pdev->dev, &dev_attr_vbus))
+		dev_warn(&pdev->dev, "could not create sysfs file\n");
+
+	/* Our job is to use irqs and status from the power module
+	 * to keep the transceiver disabled when nothing's connected.
+	 *
+	 * FIXME we actually shouldn't start enabling it until the
+	 * USB controller drivers have said they're ready, by calling
+	 * set_host() and/or set_peripheral() ... OTG_capable boards
+	 * need both handles, otherwise just one suffices.
+	 */
+	twl->irq_enabled = true;
+	status = devm_request_threaded_irq(twl->dev, twl->irq, NULL,
+			twl4030_usb_irq, IRQF_TRIGGER_FALLING |
+			IRQF_TRIGGER_RISING | IRQF_ONESHOT, "twl4030_usb", twl);
+	if (status < 0) {
+		dev_dbg(&pdev->dev, "can't get IRQ %d, err %d\n",
+			twl->irq, status);
+		return status;
+	}
+
+	dev_info(&pdev->dev, "Initialized TWL4030 USB module\n");
+	return 0;
+}
+
+static int twl4030_usb_remove(struct platform_device *pdev)
+{
+	struct twl4030_usb *twl = platform_get_drvdata(pdev);
+	int val;
+
+	cancel_delayed_work(&twl->id_workaround_work);
+	device_remove_file(twl->dev, &dev_attr_vbus);
+
+	/* set transceiver mode to power on defaults */
+	twl4030_usb_set_mode(twl, -1);
+
+	/* autogate 60MHz ULPI clock,
+	 * clear dpll clock request for i2c access,
+	 * disable 32KHz
+	 */
+	val = twl4030_usb_read(twl, PHY_CLK_CTRL);
+	if (val >= 0) {
+		val |= PHY_CLK_CTRL_CLOCKGATING_EN;
+		val &= ~(PHY_CLK_CTRL_CLK32K_EN | REQ_PHY_DPLL_CLK);
+		twl4030_usb_write(twl, PHY_CLK_CTRL, (u8)val);
+	}
+
+	/* disable complete OTG block */
+	twl4030_usb_clear_bits(twl, POWER_CTRL, POWER_CTRL_OTG_ENAB);
+
+	if (!twl->asleep)
+		twl4030_phy_power(twl, 0);
+
+	return 0;
+}
+
+#ifdef CONFIG_OF
+static const struct of_device_id twl4030_usb_id_table[] = {
+	{ .compatible = "ti,twl4030-usb" },
+	{}
+};
+MODULE_DEVICE_TABLE(of, twl4030_usb_id_table);
+#endif
+
+static struct platform_driver twl4030_usb_driver = {
+	.probe		= twl4030_usb_probe,
+	.remove		= twl4030_usb_remove,
+	.driver		= {
+		.name	= "twl4030_usb",
+		.owner	= THIS_MODULE,
+		.of_match_table = of_match_ptr(twl4030_usb_id_table),
+	},
+};
+
+static int __init twl4030_usb_init(void)
+{
+	return platform_driver_register(&twl4030_usb_driver);
+}
+subsys_initcall(twl4030_usb_init);
+
+static void __exit twl4030_usb_exit(void)
+{
+	platform_driver_unregister(&twl4030_usb_driver);
+}
+module_exit(twl4030_usb_exit);
+
+MODULE_ALIAS("platform:twl4030_usb");
+MODULE_AUTHOR("Texas Instruments, Inc, Nokia Corporation");
+MODULE_DESCRIPTION("TWL4030 USB transceiver driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/usb/phy/Kconfig b/drivers/usb/phy/Kconfig
index 2ce041109993..64b8bef1919e 100644
--- a/drivers/usb/phy/Kconfig
+++ b/drivers/usb/phy/Kconfig
@@ -113,16 +113,6 @@ config SAMSUNG_USB3PHY
 	  Enable this to support Samsung USB 3.0 (Super Speed) phy controller
 	  for samsung SoCs.
 
-config TWL4030_USB
-	tristate "TWL4030 USB Transceiver Driver"
-	depends on TWL4030_CORE && REGULATOR_TWL4030 && USB_MUSB_OMAP2PLUS
-	select USB_PHY
-	help
-	  Enable this to support the USB OTG transceiver on TWL4030
-	  family chips (including the TWL5030 and TPS659x0 devices).
-	  This transceiver supports high and full speed devices plus,
-	  in host mode, low speed.
-
 config TWL6030_USB
 	tristate "TWL6030 USB Transceiver Driver"
 	depends on TWL4030_CORE && OMAP_USB2 && USB_MUSB_OMAP2PLUS
diff --git a/drivers/usb/phy/Makefile b/drivers/usb/phy/Makefile
index fa4db0e4bdb9..9c3736109c2c 100644
--- a/drivers/usb/phy/Makefile
+++ b/drivers/usb/phy/Makefile
@@ -19,7 +19,6 @@ obj-$(CONFIG_OMAP_USB3)			+= phy-omap-usb3.o
 obj-$(CONFIG_SAMSUNG_USBPHY)		+= phy-samsung-usb.o
 obj-$(CONFIG_SAMSUNG_USB2PHY)		+= phy-samsung-usb2.o
 obj-$(CONFIG_SAMSUNG_USB3PHY)		+= phy-samsung-usb3.o
-obj-$(CONFIG_TWL4030_USB)		+= phy-twl4030-usb.o
 obj-$(CONFIG_TWL6030_USB)		+= phy-twl6030-usb.o
 obj-$(CONFIG_USB_EHCI_TEGRA)		+= phy-tegra-usb.o
 obj-$(CONFIG_USB_GPIO_VBUS)		+= phy-gpio-vbus-usb.o
diff --git a/drivers/usb/phy/phy-twl4030-usb.c b/drivers/usb/phy/phy-twl4030-usb.c
deleted file mode 100644
index 90730c8762b8..000000000000
--- a/drivers/usb/phy/phy-twl4030-usb.c
+++ /dev/null
@@ -1,794 +0,0 @@
-/*
- * twl4030_usb - TWL4030 USB transceiver, talking to OMAP OTG controller
- *
- * Copyright (C) 2004-2007 Texas Instruments
- * Copyright (C) 2008 Nokia Corporation
- * Contact: Felipe Balbi <felipe.balbi@nokia.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- * Current status:
- *	- HS USB ULPI mode works.
- *	- 3-pin mode support may be added in future.
- */
-
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/interrupt.h>
-#include <linux/platform_device.h>
-#include <linux/spinlock.h>
-#include <linux/workqueue.h>
-#include <linux/io.h>
-#include <linux/delay.h>
-#include <linux/usb/otg.h>
-#include <linux/usb/musb-omap.h>
-#include <linux/usb/ulpi.h>
-#include <linux/i2c/twl.h>
-#include <linux/regulator/consumer.h>
-#include <linux/err.h>
-#include <linux/slab.h>
-
-/* Register defines */
-
-#define MCPC_CTRL			0x30
-#define MCPC_CTRL_RTSOL			(1 << 7)
-#define MCPC_CTRL_EXTSWR		(1 << 6)
-#define MCPC_CTRL_EXTSWC		(1 << 5)
-#define MCPC_CTRL_VOICESW		(1 << 4)
-#define MCPC_CTRL_OUT64K		(1 << 3)
-#define MCPC_CTRL_RTSCTSSW		(1 << 2)
-#define MCPC_CTRL_HS_UART		(1 << 0)
-
-#define MCPC_IO_CTRL			0x33
-#define MCPC_IO_CTRL_MICBIASEN		(1 << 5)
-#define MCPC_IO_CTRL_CTS_NPU		(1 << 4)
-#define MCPC_IO_CTRL_RXD_PU		(1 << 3)
-#define MCPC_IO_CTRL_TXDTYP		(1 << 2)
-#define MCPC_IO_CTRL_CTSTYP		(1 << 1)
-#define MCPC_IO_CTRL_RTSTYP		(1 << 0)
-
-#define MCPC_CTRL2			0x36
-#define MCPC_CTRL2_MCPC_CK_EN		(1 << 0)
-
-#define OTHER_FUNC_CTRL			0x80
-#define OTHER_FUNC_CTRL_BDIS_ACON_EN	(1 << 4)
-#define OTHER_FUNC_CTRL_FIVEWIRE_MODE	(1 << 2)
-
-#define OTHER_IFC_CTRL			0x83
-#define OTHER_IFC_CTRL_OE_INT_EN	(1 << 6)
-#define OTHER_IFC_CTRL_CEA2011_MODE	(1 << 5)
-#define OTHER_IFC_CTRL_FSLSSERIALMODE_4PIN	(1 << 4)
-#define OTHER_IFC_CTRL_HIZ_ULPI_60MHZ_OUT	(1 << 3)
-#define OTHER_IFC_CTRL_HIZ_ULPI		(1 << 2)
-#define OTHER_IFC_CTRL_ALT_INT_REROUTE	(1 << 0)
-
-#define OTHER_INT_EN_RISE		0x86
-#define OTHER_INT_EN_FALL		0x89
-#define OTHER_INT_STS			0x8C
-#define OTHER_INT_LATCH			0x8D
-#define OTHER_INT_VB_SESS_VLD		(1 << 7)
-#define OTHER_INT_DM_HI			(1 << 6) /* not valid for "latch" reg */
-#define OTHER_INT_DP_HI			(1 << 5) /* not valid for "latch" reg */
-#define OTHER_INT_BDIS_ACON		(1 << 3) /* not valid for "fall" regs */
-#define OTHER_INT_MANU			(1 << 1)
-#define OTHER_INT_ABNORMAL_STRESS	(1 << 0)
-
-#define ID_STATUS			0x96
-#define ID_RES_FLOAT			(1 << 4)
-#define ID_RES_440K			(1 << 3)
-#define ID_RES_200K			(1 << 2)
-#define ID_RES_102K			(1 << 1)
-#define ID_RES_GND			(1 << 0)
-
-#define POWER_CTRL			0xAC
-#define POWER_CTRL_OTG_ENAB		(1 << 5)
-
-#define OTHER_IFC_CTRL2			0xAF
-#define OTHER_IFC_CTRL2_ULPI_STP_LOW	(1 << 4)
-#define OTHER_IFC_CTRL2_ULPI_TXEN_POL	(1 << 3)
-#define OTHER_IFC_CTRL2_ULPI_4PIN_2430	(1 << 2)
-#define OTHER_IFC_CTRL2_USB_INT_OUTSEL_MASK	(3 << 0) /* bits 0 and 1 */
-#define OTHER_IFC_CTRL2_USB_INT_OUTSEL_INT1N	(0 << 0)
-#define OTHER_IFC_CTRL2_USB_INT_OUTSEL_INT2N	(1 << 0)
-
-#define REG_CTRL_EN			0xB2
-#define REG_CTRL_ERROR			0xB5
-#define ULPI_I2C_CONFLICT_INTEN		(1 << 0)
-
-#define OTHER_FUNC_CTRL2		0xB8
-#define OTHER_FUNC_CTRL2_VBAT_TIMER_EN	(1 << 0)
-
-/* following registers do not have separate _clr and _set registers */
-#define VBUS_DEBOUNCE			0xC0
-#define ID_DEBOUNCE			0xC1
-#define VBAT_TIMER			0xD3
-#define PHY_PWR_CTRL			0xFD
-#define PHY_PWR_PHYPWD			(1 << 0)
-#define PHY_CLK_CTRL			0xFE
-#define PHY_CLK_CTRL_CLOCKGATING_EN	(1 << 2)
-#define PHY_CLK_CTRL_CLK32K_EN		(1 << 1)
-#define REQ_PHY_DPLL_CLK		(1 << 0)
-#define PHY_CLK_CTRL_STS		0xFF
-#define PHY_DPLL_CLK			(1 << 0)
-
-/* In module TWL_MODULE_PM_MASTER */
-#define STS_HW_CONDITIONS		0x0F
-
-/* In module TWL_MODULE_PM_RECEIVER */
-#define VUSB_DEDICATED1			0x7D
-#define VUSB_DEDICATED2			0x7E
-#define VUSB1V5_DEV_GRP			0x71
-#define VUSB1V5_TYPE			0x72
-#define VUSB1V5_REMAP			0x73
-#define VUSB1V8_DEV_GRP			0x74
-#define VUSB1V8_TYPE			0x75
-#define VUSB1V8_REMAP			0x76
-#define VUSB3V1_DEV_GRP			0x77
-#define VUSB3V1_TYPE			0x78
-#define VUSB3V1_REMAP			0x79
-
-/* In module TWL4030_MODULE_INTBR */
-#define PMBR1				0x0D
-#define GPIO_USB_4PIN_ULPI_2430C	(3 << 0)
-
-struct twl4030_usb {
-	struct usb_phy		phy;
-	struct device		*dev;
-
-	/* TWL4030 internal USB regulator supplies */
-	struct regulator	*usb1v5;
-	struct regulator	*usb1v8;
-	struct regulator	*usb3v1;
-
-	/* for vbus reporting with irqs disabled */
-	spinlock_t		lock;
-
-	/* pin configuration */
-	enum twl4030_usb_mode	usb_mode;
-
-	int			irq;
-	enum omap_musb_vbus_id_status linkstat;
-	bool			vbus_supplied;
-	u8			asleep;
-	bool			irq_enabled;
-
-	struct delayed_work	id_workaround_work;
-};
-
-/* internal define on top of container_of */
-#define phy_to_twl(x)		container_of((x), struct twl4030_usb, phy)
-
-/*-------------------------------------------------------------------------*/
-
-static int twl4030_i2c_write_u8_verify(struct twl4030_usb *twl,
-		u8 module, u8 data, u8 address)
-{
-	u8 check;
-
-	if ((twl_i2c_write_u8(module, data, address) >= 0) &&
-	    (twl_i2c_read_u8(module, &check, address) >= 0) &&
-						(check == data))
-		return 0;
-	dev_dbg(twl->dev, "Write%d[%d,0x%x] wrote %02x but read %02x\n",
-			1, module, address, check, data);
-
-	/* Failed once: Try again */
-	if ((twl_i2c_write_u8(module, data, address) >= 0) &&
-	    (twl_i2c_read_u8(module, &check, address) >= 0) &&
-						(check == data))
-		return 0;
-	dev_dbg(twl->dev, "Write%d[%d,0x%x] wrote %02x but read %02x\n",
-			2, module, address, check, data);
-
-	/* Failed again: Return error */
-	return -EBUSY;
-}
-
-#define twl4030_usb_write_verify(twl, address, data)	\
-	twl4030_i2c_write_u8_verify(twl, TWL_MODULE_USB, (data), (address))
-
-static inline int twl4030_usb_write(struct twl4030_usb *twl,
-		u8 address, u8 data)
-{
-	int ret = 0;
-
-	ret = twl_i2c_write_u8(TWL_MODULE_USB, data, address);
-	if (ret < 0)
-		dev_dbg(twl->dev,
-			"TWL4030:USB:Write[0x%x] Error %d\n", address, ret);
-	return ret;
-}
-
-static inline int twl4030_readb(struct twl4030_usb *twl, u8 module, u8 address)
-{
-	u8 data;
-	int ret = 0;
-
-	ret = twl_i2c_read_u8(module, &data, address);
-	if (ret >= 0)
-		ret = data;
-	else
-		dev_dbg(twl->dev,
-			"TWL4030:readb[0x%x,0x%x] Error %d\n",
-					module, address, ret);
-
-	return ret;
-}
-
-static inline int twl4030_usb_read(struct twl4030_usb *twl, u8 address)
-{
-	return twl4030_readb(twl, TWL_MODULE_USB, address);
-}
-
-/*-------------------------------------------------------------------------*/
-
-static inline int
-twl4030_usb_set_bits(struct twl4030_usb *twl, u8 reg, u8 bits)
-{
-	return twl4030_usb_write(twl, ULPI_SET(reg), bits);
-}
-
-static inline int
-twl4030_usb_clear_bits(struct twl4030_usb *twl, u8 reg, u8 bits)
-{
-	return twl4030_usb_write(twl, ULPI_CLR(reg), bits);
-}
-
-/*-------------------------------------------------------------------------*/
-
-static bool twl4030_is_driving_vbus(struct twl4030_usb *twl)
-{
-	int ret;
-
-	ret = twl4030_usb_read(twl, PHY_CLK_CTRL_STS);
-	if (ret < 0 || !(ret & PHY_DPLL_CLK))
-		/*
-		 * if clocks are off, registers are not updated,
-		 * but we can assume we don't drive VBUS in this case
-		 */
-		return false;
-
-	ret = twl4030_usb_read(twl, ULPI_OTG_CTRL);
-	if (ret < 0)
-		return false;
-
-	return (ret & (ULPI_OTG_DRVVBUS | ULPI_OTG_CHRGVBUS)) ? true : false;
-}
-
-static enum omap_musb_vbus_id_status
-	twl4030_usb_linkstat(struct twl4030_usb *twl)
-{
-	int	status;
-	enum omap_musb_vbus_id_status linkstat = OMAP_MUSB_UNKNOWN;
-
-	twl->vbus_supplied = false;
-
-	/*
-	 * For ID/VBUS sensing, see manual section 15.4.8 ...
-	 * except when using only battery backup power, two
-	 * comparators produce VBUS_PRES and ID_PRES signals,
-	 * which don't match docs elsewhere.  But ... BIT(7)
-	 * and BIT(2) of STS_HW_CONDITIONS, respectively, do
-	 * seem to match up.  If either is true the USB_PRES
-	 * signal is active, the OTG module is activated, and
-	 * its interrupt may be raised (may wake the system).
-	 */
-	status = twl4030_readb(twl, TWL_MODULE_PM_MASTER, STS_HW_CONDITIONS);
-	if (status < 0)
-		dev_err(twl->dev, "USB link status err %d\n", status);
-	else if (status & (BIT(7) | BIT(2))) {
-		if (status & BIT(7)) {
-			if (twl4030_is_driving_vbus(twl))
-				status &= ~BIT(7);
-			else
-				twl->vbus_supplied = true;
-		}
-
-		if (status & BIT(2))
-			linkstat = OMAP_MUSB_ID_GROUND;
-		else if (status & BIT(7))
-			linkstat = OMAP_MUSB_VBUS_VALID;
-		else
-			linkstat = OMAP_MUSB_VBUS_OFF;
-	} else {
-		if (twl->linkstat != OMAP_MUSB_UNKNOWN)
-			linkstat = OMAP_MUSB_VBUS_OFF;
-	}
-
-	dev_dbg(twl->dev, "HW_CONDITIONS 0x%02x/%d; link %d\n",
-			status, status, linkstat);
-
-	/* REVISIT this assumes host and peripheral controllers
-	 * are registered, and that both are active...
-	 */
-
-	return linkstat;
-}
-
-static void twl4030_usb_set_mode(struct twl4030_usb *twl, int mode)
-{
-	twl->usb_mode = mode;
-
-	switch (mode) {
-	case T2_USB_MODE_ULPI:
-		twl4030_usb_clear_bits(twl, ULPI_IFC_CTRL,
-					ULPI_IFC_CTRL_CARKITMODE);
-		twl4030_usb_set_bits(twl, POWER_CTRL, POWER_CTRL_OTG_ENAB);
-		twl4030_usb_clear_bits(twl, ULPI_FUNC_CTRL,
-					ULPI_FUNC_CTRL_XCVRSEL_MASK |
-					ULPI_FUNC_CTRL_OPMODE_MASK);
-		break;
-	case -1:
-		/* FIXME: power on defaults */
-		break;
-	default:
-		dev_err(twl->dev, "unsupported T2 transceiver mode %d\n",
-				mode);
-		break;
-	};
-}
-
-static void twl4030_i2c_access(struct twl4030_usb *twl, int on)
-{
-	unsigned long timeout;
-	int val = twl4030_usb_read(twl, PHY_CLK_CTRL);
-
-	if (val >= 0) {
-		if (on) {
-			/* enable DPLL to access PHY registers over I2C */
-			val |= REQ_PHY_DPLL_CLK;
-			WARN_ON(twl4030_usb_write_verify(twl, PHY_CLK_CTRL,
-						(u8)val) < 0);
-
-			timeout = jiffies + HZ;
-			while (!(twl4030_usb_read(twl, PHY_CLK_CTRL_STS) &
-							PHY_DPLL_CLK)
-				&& time_before(jiffies, timeout))
-					udelay(10);
-			if (!(twl4030_usb_read(twl, PHY_CLK_CTRL_STS) &
-							PHY_DPLL_CLK))
-				dev_err(twl->dev, "Timeout setting T2 HSUSB "
-						"PHY DPLL clock\n");
-		} else {
-			/* let ULPI control the DPLL clock */
-			val &= ~REQ_PHY_DPLL_CLK;
-			WARN_ON(twl4030_usb_write_verify(twl, PHY_CLK_CTRL,
-						(u8)val) < 0);
-		}
-	}
-}
-
-static void __twl4030_phy_power(struct twl4030_usb *twl, int on)
-{
-	u8 pwr = twl4030_usb_read(twl, PHY_PWR_CTRL);
-
-	if (on)
-		pwr &= ~PHY_PWR_PHYPWD;
-	else
-		pwr |= PHY_PWR_PHYPWD;
-
-	WARN_ON(twl4030_usb_write_verify(twl, PHY_PWR_CTRL, pwr) < 0);
-}
-
-static void twl4030_phy_power(struct twl4030_usb *twl, int on)
-{
-	int ret;
-
-	if (on) {
-		ret = regulator_enable(twl->usb3v1);
-		if (ret)
-			dev_err(twl->dev, "Failed to enable usb3v1\n");
-
-		ret = regulator_enable(twl->usb1v8);
-		if (ret)
-			dev_err(twl->dev, "Failed to enable usb1v8\n");
-
-		/*
-		 * Disabling usb3v1 regulator (= writing 0 to VUSB3V1_DEV_GRP
-		 * in twl4030) resets the VUSB_DEDICATED2 register. This reset
-		 * enables VUSB3V1_SLEEP bit that remaps usb3v1 ACTIVE state to
-		 * SLEEP. We work around this by clearing the bit after usv3v1
-		 * is re-activated. This ensures that VUSB3V1 is really active.
-		 */
-		twl_i2c_write_u8(TWL_MODULE_PM_RECEIVER, 0, VUSB_DEDICATED2);
-
-		ret = regulator_enable(twl->usb1v5);
-		if (ret)
-			dev_err(twl->dev, "Failed to enable usb1v5\n");
-
-		__twl4030_phy_power(twl, 1);
-		twl4030_usb_write(twl, PHY_CLK_CTRL,
-				  twl4030_usb_read(twl, PHY_CLK_CTRL) |
-					(PHY_CLK_CTRL_CLOCKGATING_EN |
-						PHY_CLK_CTRL_CLK32K_EN));
-	} else {
-		__twl4030_phy_power(twl, 0);
-		regulator_disable(twl->usb1v5);
-		regulator_disable(twl->usb1v8);
-		regulator_disable(twl->usb3v1);
-	}
-}
-
-static void twl4030_phy_suspend(struct twl4030_usb *twl, int controller_off)
-{
-	if (twl->asleep)
-		return;
-
-	twl4030_phy_power(twl, 0);
-	twl->asleep = 1;
-	dev_dbg(twl->dev, "%s\n", __func__);
-}
-
-static void __twl4030_phy_resume(struct twl4030_usb *twl)
-{
-	twl4030_phy_power(twl, 1);
-	twl4030_i2c_access(twl, 1);
-	twl4030_usb_set_mode(twl, twl->usb_mode);
-	if (twl->usb_mode == T2_USB_MODE_ULPI)
-		twl4030_i2c_access(twl, 0);
-}
-
-static void twl4030_phy_resume(struct twl4030_usb *twl)
-{
-	if (!twl->asleep)
-		return;
-	__twl4030_phy_resume(twl);
-	twl->asleep = 0;
-	dev_dbg(twl->dev, "%s\n", __func__);
-
-	/*
-	 * XXX When VBUS gets driven after musb goes to A mode,
-	 * ID_PRES related interrupts no longer arrive, why?
-	 * Register itself is updated fine though, so we must poll.
-	 */
-	if (twl->linkstat == OMAP_MUSB_ID_GROUND) {
-		cancel_delayed_work(&twl->id_workaround_work);
-		schedule_delayed_work(&twl->id_workaround_work, HZ);
-	}
-}
-
-static int twl4030_usb_ldo_init(struct twl4030_usb *twl)
-{
-	/* Enable writing to power configuration registers */
-	twl_i2c_write_u8(TWL_MODULE_PM_MASTER, TWL4030_PM_MASTER_KEY_CFG1,
-			 TWL4030_PM_MASTER_PROTECT_KEY);
-
-	twl_i2c_write_u8(TWL_MODULE_PM_MASTER, TWL4030_PM_MASTER_KEY_CFG2,
-			 TWL4030_PM_MASTER_PROTECT_KEY);
-
-	/* Keep VUSB3V1 LDO in sleep state until VBUS/ID change detected*/
-	/*twl_i2c_write_u8(TWL_MODULE_PM_RECEIVER, 0, VUSB_DEDICATED2);*/
-
-	/* input to VUSB3V1 LDO is from VBAT, not VBUS */
-	twl_i2c_write_u8(TWL_MODULE_PM_RECEIVER, 0x14, VUSB_DEDICATED1);
-
-	/* Initialize 3.1V regulator */
-	twl_i2c_write_u8(TWL_MODULE_PM_RECEIVER, 0, VUSB3V1_DEV_GRP);
-
-	twl->usb3v1 = devm_regulator_get(twl->dev, "usb3v1");
-	if (IS_ERR(twl->usb3v1))
-		return -ENODEV;
-
-	twl_i2c_write_u8(TWL_MODULE_PM_RECEIVER, 0, VUSB3V1_TYPE);
-
-	/* Initialize 1.5V regulator */
-	twl_i2c_write_u8(TWL_MODULE_PM_RECEIVER, 0, VUSB1V5_DEV_GRP);
-
-	twl->usb1v5 = devm_regulator_get(twl->dev, "usb1v5");
-	if (IS_ERR(twl->usb1v5))
-		return -ENODEV;
-
-	twl_i2c_write_u8(TWL_MODULE_PM_RECEIVER, 0, VUSB1V5_TYPE);
-
-	/* Initialize 1.8V regulator */
-	twl_i2c_write_u8(TWL_MODULE_PM_RECEIVER, 0, VUSB1V8_DEV_GRP);
-
-	twl->usb1v8 = devm_regulator_get(twl->dev, "usb1v8");
-	if (IS_ERR(twl->usb1v8))
-		return -ENODEV;
-
-	twl_i2c_write_u8(TWL_MODULE_PM_RECEIVER, 0, VUSB1V8_TYPE);
-
-	/* disable access to power configuration registers */
-	twl_i2c_write_u8(TWL_MODULE_PM_MASTER, 0,
-			 TWL4030_PM_MASTER_PROTECT_KEY);
-
-	return 0;
-}
-
-static ssize_t twl4030_usb_vbus_show(struct device *dev,
-		struct device_attribute *attr, char *buf)
-{
-	struct twl4030_usb *twl = dev_get_drvdata(dev);
-	unsigned long flags;
-	int ret = -EINVAL;
-
-	spin_lock_irqsave(&twl->lock, flags);
-	ret = sprintf(buf, "%s\n",
-			twl->vbus_supplied ? "on" : "off");
-	spin_unlock_irqrestore(&twl->lock, flags);
-
-	return ret;
-}
-static DEVICE_ATTR(vbus, 0444, twl4030_usb_vbus_show, NULL);
-
-static irqreturn_t twl4030_usb_irq(int irq, void *_twl)
-{
-	struct twl4030_usb *twl = _twl;
-	enum omap_musb_vbus_id_status status;
-	bool status_changed = false;
-
-	status = twl4030_usb_linkstat(twl);
-
-	spin_lock_irq(&twl->lock);
-	if (status >= 0 && status != twl->linkstat) {
-		twl->linkstat = status;
-		status_changed = true;
-	}
-	spin_unlock_irq(&twl->lock);
-
-	if (status_changed) {
-		/* FIXME add a set_power() method so that B-devices can
-		 * configure the charger appropriately.  It's not always
-		 * correct to consume VBUS power, and how much current to
-		 * consume is a function of the USB configuration chosen
-		 * by the host.
-		 *
-		 * REVISIT usb_gadget_vbus_connect(...) as needed, ditto
-		 * its disconnect() sibling, when changing to/from the
-		 * USB_LINK_VBUS state.  musb_hdrc won't care until it
-		 * starts to handle softconnect right.
-		 */
-		omap_musb_mailbox(status);
-	}
-	sysfs_notify(&twl->dev->kobj, NULL, "vbus");
-
-	return IRQ_HANDLED;
-}
-
-static void twl4030_id_workaround_work(struct work_struct *work)
-{
-	struct twl4030_usb *twl = container_of(work, struct twl4030_usb,
-		id_workaround_work.work);
-	enum omap_musb_vbus_id_status status;
-	bool status_changed = false;
-
-	status = twl4030_usb_linkstat(twl);
-
-	spin_lock_irq(&twl->lock);
-	if (status >= 0 && status != twl->linkstat) {
-		twl->linkstat = status;
-		status_changed = true;
-	}
-	spin_unlock_irq(&twl->lock);
-
-	if (status_changed) {
-		dev_dbg(twl->dev, "handle missing status change to %d\n",
-				status);
-		omap_musb_mailbox(status);
-	}
-
-	/* don't schedule during sleep - irq works right then */
-	if (status == OMAP_MUSB_ID_GROUND && !twl->asleep) {
-		cancel_delayed_work(&twl->id_workaround_work);
-		schedule_delayed_work(&twl->id_workaround_work, HZ);
-	}
-}
-
-static int twl4030_usb_phy_init(struct usb_phy *phy)
-{
-	struct twl4030_usb *twl = phy_to_twl(phy);
-	enum omap_musb_vbus_id_status status;
-
-	/*
-	 * Start in sleep state, we'll get called through set_suspend()
-	 * callback when musb is runtime resumed and it's time to start.
-	 */
-	__twl4030_phy_power(twl, 0);
-	twl->asleep = 1;
-
-	status = twl4030_usb_linkstat(twl);
-	twl->linkstat = status;
-
-	if (status == OMAP_MUSB_ID_GROUND || status == OMAP_MUSB_VBUS_VALID)
-		omap_musb_mailbox(twl->linkstat);
-
-	sysfs_notify(&twl->dev->kobj, NULL, "vbus");
-	return 0;
-}
-
-static int twl4030_set_suspend(struct usb_phy *x, int suspend)
-{
-	struct twl4030_usb *twl = phy_to_twl(x);
-
-	if (suspend)
-		twl4030_phy_suspend(twl, 1);
-	else
-		twl4030_phy_resume(twl);
-
-	return 0;
-}
-
-static int twl4030_set_peripheral(struct usb_otg *otg,
-					struct usb_gadget *gadget)
-{
-	if (!otg)
-		return -ENODEV;
-
-	otg->gadget = gadget;
-	if (!gadget)
-		otg->phy->state = OTG_STATE_UNDEFINED;
-
-	return 0;
-}
-
-static int twl4030_set_host(struct usb_otg *otg, struct usb_bus *host)
-{
-	if (!otg)
-		return -ENODEV;
-
-	otg->host = host;
-	if (!host)
-		otg->phy->state = OTG_STATE_UNDEFINED;
-
-	return 0;
-}
-
-static int twl4030_usb_probe(struct platform_device *pdev)
-{
-	struct twl4030_usb_data *pdata = dev_get_platdata(&pdev->dev);
-	struct twl4030_usb	*twl;
-	int			status, err;
-	struct usb_otg		*otg;
-	struct device_node	*np = pdev->dev.of_node;
-
-	twl = devm_kzalloc(&pdev->dev, sizeof *twl, GFP_KERNEL);
-	if (!twl)
-		return -ENOMEM;
-
-	if (np)
-		of_property_read_u32(np, "usb_mode",
-				(enum twl4030_usb_mode *)&twl->usb_mode);
-	else if (pdata)
-		twl->usb_mode = pdata->usb_mode;
-	else {
-		dev_err(&pdev->dev, "twl4030 initialized without pdata\n");
-		return -EINVAL;
-	}
-
-	otg = devm_kzalloc(&pdev->dev, sizeof *otg, GFP_KERNEL);
-	if (!otg)
-		return -ENOMEM;
-
-	twl->dev		= &pdev->dev;
-	twl->irq		= platform_get_irq(pdev, 0);
-	twl->vbus_supplied	= false;
-	twl->asleep		= 1;
-	twl->linkstat		= OMAP_MUSB_UNKNOWN;
-
-	twl->phy.dev		= twl->dev;
-	twl->phy.label		= "twl4030";
-	twl->phy.otg		= otg;
-	twl->phy.type		= USB_PHY_TYPE_USB2;
-	twl->phy.set_suspend	= twl4030_set_suspend;
-	twl->phy.init		= twl4030_usb_phy_init;
-
-	otg->phy		= &twl->phy;
-	otg->set_host		= twl4030_set_host;
-	otg->set_peripheral	= twl4030_set_peripheral;
-
-	/* init spinlock for workqueue */
-	spin_lock_init(&twl->lock);
-
-	INIT_DELAYED_WORK(&twl->id_workaround_work, twl4030_id_workaround_work);
-
-	err = twl4030_usb_ldo_init(twl);
-	if (err) {
-		dev_err(&pdev->dev, "ldo init failed\n");
-		return err;
-	}
-	usb_add_phy_dev(&twl->phy);
-
-	platform_set_drvdata(pdev, twl);
-	if (device_create_file(&pdev->dev, &dev_attr_vbus))
-		dev_warn(&pdev->dev, "could not create sysfs file\n");
-
-	/* Our job is to use irqs and status from the power module
-	 * to keep the transceiver disabled when nothing's connected.
-	 *
-	 * FIXME we actually shouldn't start enabling it until the
-	 * USB controller drivers have said they're ready, by calling
-	 * set_host() and/or set_peripheral() ... OTG_capable boards
-	 * need both handles, otherwise just one suffices.
-	 */
-	twl->irq_enabled = true;
-	status = devm_request_threaded_irq(twl->dev, twl->irq, NULL,
-			twl4030_usb_irq, IRQF_TRIGGER_FALLING |
-			IRQF_TRIGGER_RISING | IRQF_ONESHOT, "twl4030_usb", twl);
-	if (status < 0) {
-		dev_dbg(&pdev->dev, "can't get IRQ %d, err %d\n",
-			twl->irq, status);
-		return status;
-	}
-
-	dev_info(&pdev->dev, "Initialized TWL4030 USB module\n");
-	return 0;
-}
-
-static int twl4030_usb_remove(struct platform_device *pdev)
-{
-	struct twl4030_usb *twl = platform_get_drvdata(pdev);
-	int val;
-
-	cancel_delayed_work(&twl->id_workaround_work);
-	device_remove_file(twl->dev, &dev_attr_vbus);
-
-	/* set transceiver mode to power on defaults */
-	twl4030_usb_set_mode(twl, -1);
-
-	/* autogate 60MHz ULPI clock,
-	 * clear dpll clock request for i2c access,
-	 * disable 32KHz
-	 */
-	val = twl4030_usb_read(twl, PHY_CLK_CTRL);
-	if (val >= 0) {
-		val |= PHY_CLK_CTRL_CLOCKGATING_EN;
-		val &= ~(PHY_CLK_CTRL_CLK32K_EN | REQ_PHY_DPLL_CLK);
-		twl4030_usb_write(twl, PHY_CLK_CTRL, (u8)val);
-	}
-
-	/* disable complete OTG block */
-	twl4030_usb_clear_bits(twl, POWER_CTRL, POWER_CTRL_OTG_ENAB);
-
-	if (!twl->asleep)
-		twl4030_phy_power(twl, 0);
-
-	return 0;
-}
-
-#ifdef CONFIG_OF
-static const struct of_device_id twl4030_usb_id_table[] = {
-	{ .compatible = "ti,twl4030-usb" },
-	{}
-};
-MODULE_DEVICE_TABLE(of, twl4030_usb_id_table);
-#endif
-
-static struct platform_driver twl4030_usb_driver = {
-	.probe		= twl4030_usb_probe,
-	.remove		= twl4030_usb_remove,
-	.driver		= {
-		.name	= "twl4030_usb",
-		.owner	= THIS_MODULE,
-		.of_match_table = of_match_ptr(twl4030_usb_id_table),
-	},
-};
-
-static int __init twl4030_usb_init(void)
-{
-	return platform_driver_register(&twl4030_usb_driver);
-}
-subsys_initcall(twl4030_usb_init);
-
-static void __exit twl4030_usb_exit(void)
-{
-	platform_driver_unregister(&twl4030_usb_driver);
-}
-module_exit(twl4030_usb_exit);
-
-MODULE_ALIAS("platform:twl4030_usb");
-MODULE_AUTHOR("Texas Instruments, Inc, Nokia Corporation");
-MODULE_DESCRIPTION("TWL4030 USB transceiver driver");
-MODULE_LICENSE("GPL");
diff --git a/include/linux/i2c/twl.h b/include/linux/i2c/twl.h
index 81cbbdb96aae..673a3ce67f31 100644
--- a/include/linux/i2c/twl.h
+++ b/include/linux/i2c/twl.h
@@ -26,6 +26,7 @@
 #define __TWL_H_
 
 #include <linux/types.h>
+#include <linux/phy/phy.h>
 #include <linux/input/matrix_keypad.h>
 
 /*
@@ -615,6 +616,7 @@ enum twl4030_usb_mode {
 struct twl4030_usb_data {
 	enum twl4030_usb_mode	usb_mode;
 	unsigned long		features;
+	struct phy_init_data	*init_data;
 
 	int		(*phy_init)(struct device *dev);
 	int		(*phy_exit)(struct device *dev);
-- 
cgit v1.2.3


From 75f93fed50c2abadbab6ef546b265f51ca975b27 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 27 Sep 2013 17:30:03 +0200
Subject: sched: Revert need_resched() to look at TIF_NEED_RESCHED

Yuanhan reported a serious throughput regression in his pigz
benchmark. Using the ftrace patch I found that several idle
paths need more TLC before we can switch the generic
need_resched() over to preempt_need_resched.

The preemption paths benefit most from preempt_need_resched and
do indeed use it; all other need_resched() users don't really
care that much so reverting need_resched() back to
tif_need_resched() is the simple and safe solution.

Reported-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Fengguang Wu <fengguang.wu@intel.com>
Cc: Huang Ying <ying.huang@intel.com>
Cc: lkp@linux.intel.com
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/r/20130927153003.GF15690@laptop.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/preempt.h | 8 --------
 include/asm-generic/preempt.h  | 8 --------
 include/linux/sched.h          | 5 +++++
 3 files changed, 5 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h
index 1de41690ff99..8729723636fd 100644
--- a/arch/x86/include/asm/preempt.h
+++ b/arch/x86/include/asm/preempt.h
@@ -79,14 +79,6 @@ static __always_inline bool __preempt_count_dec_and_test(void)
 	GEN_UNARY_RMWcc("decl", __preempt_count, __percpu_arg(0), "e");
 }
 
-/*
- * Returns true when we need to resched -- even if we can not.
- */
-static __always_inline bool need_resched(void)
-{
-	return unlikely(test_preempt_need_resched());
-}
-
 /*
  * Returns true when we need to resched and can (barring IRQ state).
  */
diff --git a/include/asm-generic/preempt.h b/include/asm-generic/preempt.h
index 5dc14ed3791c..ddf2b420ac8f 100644
--- a/include/asm-generic/preempt.h
+++ b/include/asm-generic/preempt.h
@@ -84,14 +84,6 @@ static __always_inline bool __preempt_count_dec_and_test(void)
 	return !--*preempt_count_ptr();
 }
 
-/*
- * Returns true when we need to resched -- even if we can not.
- */
-static __always_inline bool need_resched(void)
-{
-	return unlikely(test_preempt_need_resched());
-}
-
 /*
  * Returns true when we need to resched and can (barring IRQ state).
  */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index b09798b672f3..2ac5285db434 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2577,6 +2577,11 @@ static inline bool __must_check current_clr_polling_and_test(void)
 }
 #endif
 
+static __always_inline bool need_resched(void)
+{
+	return unlikely(tif_need_resched());
+}
+
 /*
  * Thread group CPU time accounting.
  */
-- 
cgit v1.2.3


From b4e46138f946442608647be58e78e3ad4d15534e Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Fri, 27 Sep 2013 21:46:51 -0700
Subject: driver-core: remove struct bus_type.bus_attrs

Now that all in-kernel users of bus_type.bus_attrs have been converted
to use bus_groups instead, the bus_attrs field, and logic surrounding
it, can be removed.

Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/bus.c     | 42 ------------------------------------------
 include/linux/device.h |  2 --
 2 files changed, 44 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/bus.c b/drivers/base/bus.c
index 4c289ab91357..2cdf29464063 100644
--- a/drivers/base/bus.c
+++ b/drivers/base/bus.c
@@ -846,42 +846,6 @@ struct bus_type *find_bus(char *name)
 }
 #endif  /*  0  */
 
-
-/**
- * bus_add_attrs - Add default attributes for this bus.
- * @bus: Bus that has just been registered.
- */
-
-static int bus_add_attrs(struct bus_type *bus)
-{
-	int error = 0;
-	int i;
-
-	if (bus->bus_attrs) {
-		for (i = 0; bus->bus_attrs[i].attr.name; i++) {
-			error = bus_create_file(bus, &bus->bus_attrs[i]);
-			if (error)
-				goto err;
-		}
-	}
-done:
-	return error;
-err:
-	while (--i >= 0)
-		bus_remove_file(bus, &bus->bus_attrs[i]);
-	goto done;
-}
-
-static void bus_remove_attrs(struct bus_type *bus)
-{
-	int i;
-
-	if (bus->bus_attrs) {
-		for (i = 0; bus->bus_attrs[i].attr.name; i++)
-			bus_remove_file(bus, &bus->bus_attrs[i]);
-	}
-}
-
 static int bus_add_groups(struct bus_type *bus,
 			  const struct attribute_group **groups)
 {
@@ -983,9 +947,6 @@ int bus_register(struct bus_type *bus)
 	if (retval)
 		goto bus_probe_files_fail;
 
-	retval = bus_add_attrs(bus);
-	if (retval)
-		goto bus_attrs_fail;
 	retval = bus_add_groups(bus, bus->bus_groups);
 	if (retval)
 		goto bus_groups_fail;
@@ -994,8 +955,6 @@ int bus_register(struct bus_type *bus)
 	return 0;
 
 bus_groups_fail:
-	bus_remove_attrs(bus);
-bus_attrs_fail:
 	remove_probe_files(bus);
 bus_probe_files_fail:
 	kset_unregister(bus->p->drivers_kset);
@@ -1024,7 +983,6 @@ void bus_unregister(struct bus_type *bus)
 	pr_debug("bus: '%s': unregistering\n", bus->name);
 	if (bus->dev_root)
 		device_unregister(bus->dev_root);
-	bus_remove_attrs(bus);
 	bus_remove_groups(bus, bus->bus_groups);
 	remove_probe_files(bus);
 	kset_unregister(bus->p->drivers_kset);
diff --git a/include/linux/device.h b/include/linux/device.h
index ce690ea34547..89a852e0896d 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -63,7 +63,6 @@ extern void bus_remove_file(struct bus_type *, struct bus_attribute *);
  * @name:	The name of the bus.
  * @dev_name:	Used for subsystems to enumerate devices like ("foo%u", dev->id).
  * @dev_root:	Default device to use as the parent.
- * @bus_attrs:	Default attributes of the bus.
  * @dev_attrs:	Default attributes of the devices on the bus.
  * @drv_attrs:	Default attributes of the device drivers on the bus.
  * @bus_groups:	Default attributes of the bus.
@@ -106,7 +105,6 @@ struct bus_type {
 	const char		*name;
 	const char		*dev_name;
 	struct device		*dev_root;
-	struct bus_attribute	*bus_attrs;	/* use bus_groups instead */
 	struct device_attribute	*dev_attrs;	/* use dev_groups instead */
 	struct driver_attribute	*drv_attrs;	/* use drv_groups instead */
 	const struct attribute_group **bus_groups;
-- 
cgit v1.2.3


From e18945b159a1cdbc031f1d3b0b7e515a33bdcbf7 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Fri, 27 Sep 2013 21:47:21 -0700
Subject: driver-core: remove struct bus_type.drv_attrs

Now that all in-kernel users of bus_type.drv_attrs have been converted
to use drv_groups instead, the drv_attrs field, and logic surrounding
it, can be removed.

Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/bus.c     | 40 ++--------------------------------------
 include/linux/device.h |  2 --
 2 files changed, 2 insertions(+), 40 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/bus.c b/drivers/base/bus.c
index 2cdf29464063..73f6c2925281 100644
--- a/drivers/base/bus.c
+++ b/drivers/base/bus.c
@@ -591,37 +591,6 @@ void bus_remove_device(struct device *dev)
 	bus_put(dev->bus);
 }
 
-static int driver_add_attrs(struct bus_type *bus, struct device_driver *drv)
-{
-	int error = 0;
-	int i;
-
-	if (bus->drv_attrs) {
-		for (i = 0; bus->drv_attrs[i].attr.name; i++) {
-			error = driver_create_file(drv, &bus->drv_attrs[i]);
-			if (error)
-				goto err;
-		}
-	}
-done:
-	return error;
-err:
-	while (--i >= 0)
-		driver_remove_file(drv, &bus->drv_attrs[i]);
-	goto done;
-}
-
-static void driver_remove_attrs(struct bus_type *bus,
-				struct device_driver *drv)
-{
-	int i;
-
-	if (bus->drv_attrs) {
-		for (i = 0; bus->drv_attrs[i].attr.name; i++)
-			driver_remove_file(drv, &bus->drv_attrs[i]);
-	}
-}
-
 static int __must_check add_bind_files(struct device_driver *drv)
 {
 	int ret;
@@ -720,16 +689,12 @@ int bus_add_driver(struct device_driver *drv)
 		printk(KERN_ERR "%s: uevent attr (%s) failed\n",
 			__func__, drv->name);
 	}
-	error = driver_add_attrs(bus, drv);
+	error = driver_add_groups(drv, bus->drv_groups);
 	if (error) {
 		/* How the hell do we get out of this pickle? Give up */
-		printk(KERN_ERR "%s: driver_add_attrs(%s) failed\n",
-			__func__, drv->name);
-	}
-	error = driver_add_groups(drv, bus->drv_groups);
-	if (error)
 		printk(KERN_ERR "%s: driver_create_groups(%s) failed\n",
 			__func__, drv->name);
+	}
 
 	if (!drv->suppress_bind_attrs) {
 		error = add_bind_files(drv);
@@ -766,7 +731,6 @@ void bus_remove_driver(struct device_driver *drv)
 
 	if (!drv->suppress_bind_attrs)
 		remove_bind_files(drv);
-	driver_remove_attrs(drv->bus, drv);
 	driver_remove_groups(drv, drv->bus->drv_groups);
 	driver_remove_file(drv, &driver_attr_uevent);
 	klist_remove(&drv->p->knode_bus);
diff --git a/include/linux/device.h b/include/linux/device.h
index 89a852e0896d..e7f5b8585380 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -64,7 +64,6 @@ extern void bus_remove_file(struct bus_type *, struct bus_attribute *);
  * @dev_name:	Used for subsystems to enumerate devices like ("foo%u", dev->id).
  * @dev_root:	Default device to use as the parent.
  * @dev_attrs:	Default attributes of the devices on the bus.
- * @drv_attrs:	Default attributes of the device drivers on the bus.
  * @bus_groups:	Default attributes of the bus.
  * @dev_groups:	Default attributes of the devices on the bus.
  * @drv_groups: Default attributes of the device drivers on the bus.
@@ -106,7 +105,6 @@ struct bus_type {
 	const char		*dev_name;
 	struct device		*dev_root;
 	struct device_attribute	*dev_attrs;	/* use dev_groups instead */
-	struct driver_attribute	*drv_attrs;	/* use drv_groups instead */
 	const struct attribute_group **bus_groups;
 	const struct attribute_group **dev_groups;
 	const struct attribute_group **drv_groups;
-- 
cgit v1.2.3


From 74227e65f9742f559f6e243ba2c9a983e1f1221d Mon Sep 17 00:00:00 2001
From: Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com>
Date: Tue, 17 Sep 2013 00:32:34 +0200
Subject: clk: nomadik: declare OF clock provider

Common clock framework allows to register clock providers to get called
on of_clk_init() by using CLK_OF_DECLARE. This converts nomadik clock
provider to make use of it and get rid of the mach specific clk init
call. As clocks require system reset controller base address to be
initialized each clock driver checks src_base and calls new
nomadik_src_init if required.

Signed-off-by: Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Mike Turquette <mturquette@linaro.org>
---
 arch/arm/mach-nomadik/cpu-8815.c          |  5 ++--
 drivers/clk/clk-nomadik.c                 | 45 +++++++++++--------------------
 include/linux/platform_data/clk-nomadik.h |  2 --
 3 files changed, 17 insertions(+), 35 deletions(-)
 delete mode 100644 include/linux/platform_data/clk-nomadik.h

(limited to 'include/linux')

diff --git a/arch/arm/mach-nomadik/cpu-8815.c b/arch/arm/mach-nomadik/cpu-8815.c
index 0fcb14966d5a..2be38a00a3d9 100644
--- a/arch/arm/mach-nomadik/cpu-8815.c
+++ b/arch/arm/mach-nomadik/cpu-8815.c
@@ -25,7 +25,7 @@
 #include <linux/slab.h>
 #include <linux/irq.h>
 #include <linux/dma-mapping.h>
-#include <linux/platform_data/clk-nomadik.h>
+#include <linux/clk-provider.h>
 #include <linux/clocksource.h>
 #include <linux/of_irq.h>
 #include <linux/of_gpio.h>
@@ -115,8 +115,7 @@ static void cpu8815_restart(enum reboot_mode mode, const char *cmd)
 
 static void __init cpu8815_timer_init_of(void)
 {
-	/* We need this to be up now */
-	nomadik_clk_init();
+	of_clk_init(NULL);
 	clocksource_of_init();
 }
 
diff --git a/drivers/clk/clk-nomadik.c b/drivers/clk/clk-nomadik.c
index 06b29c264dbe..ce2d6b32e3f5 100644
--- a/drivers/clk/clk-nomadik.c
+++ b/drivers/clk/clk-nomadik.c
@@ -512,6 +512,9 @@ static void __init of_nomadik_pll_setup(struct device_node *np)
 	const char *parent_name;
 	u32 pll_id;
 
+	if (!src_base)
+		nomadik_src_init();
+
 	if (of_property_read_u32(np, "pll-id", &pll_id)) {
 		pr_err("%s: PLL \"%s\" missing pll-id property\n",
 			__func__, clk_name);
@@ -522,6 +525,8 @@ static void __init of_nomadik_pll_setup(struct device_node *np)
 	if (!IS_ERR(clk))
 		of_clk_add_provider(np, of_clk_src_simple_get, clk);
 }
+CLK_OF_DECLARE(nomadik_pll_clk,
+	"st,nomadik-pll-clock", of_nomadik_pll_setup);
 
 static void __init of_nomadik_hclk_setup(struct device_node *np)
 {
@@ -529,6 +534,9 @@ static void __init of_nomadik_hclk_setup(struct device_node *np)
 	const char *clk_name = np->name;
 	const char *parent_name;
 
+	if (!src_base)
+		nomadik_src_init();
+
 	parent_name = of_clk_get_parent_name(np, 0);
 	/*
 	 * The HCLK divides PLL1 with 1 (passthru), 2, 3 or 4.
@@ -541,6 +549,8 @@ static void __init of_nomadik_hclk_setup(struct device_node *np)
 	if (!IS_ERR(clk))
 		of_clk_add_provider(np, of_clk_src_simple_get, clk);
 }
+CLK_OF_DECLARE(nomadik_hclk_clk,
+	"st,nomadik-hclk-clock", of_nomadik_hclk_setup);
 
 static void __init of_nomadik_src_clk_setup(struct device_node *np)
 {
@@ -549,6 +559,9 @@ static void __init of_nomadik_src_clk_setup(struct device_node *np)
 	const char *parent_name;
 	u32 clk_id;
 
+	if (!src_base)
+		nomadik_src_init();
+
 	if (of_property_read_u32(np, "clock-id", &clk_id)) {
 		pr_err("%s: SRC clock \"%s\" missing clock-id property\n",
 			__func__, clk_name);
@@ -559,33 +572,5 @@ static void __init of_nomadik_src_clk_setup(struct device_node *np)
 	if (!IS_ERR(clk))
 		of_clk_add_provider(np, of_clk_src_simple_get, clk);
 }
-
-static const struct of_device_id nomadik_src_clk_match[] __initconst = {
-	{
-		.compatible = "fixed-clock",
-		.data = of_fixed_clk_setup,
-	},
-	{
-		.compatible = "fixed-factor-clock",
-		.data = of_fixed_factor_clk_setup,
-	},
-	{
-		.compatible = "st,nomadik-pll-clock",
-		.data = of_nomadik_pll_setup,
-	},
-	{
-		.compatible = "st,nomadik-hclk-clock",
-		.data = of_nomadik_hclk_setup,
-	},
-	{
-		.compatible = "st,nomadik-src-clock",
-		.data = of_nomadik_src_clk_setup,
-	},
-	{ /* sentinel */ }
-};
-
-void __init nomadik_clk_init(void)
-{
-	nomadik_src_init();
-	of_clk_init(nomadik_src_clk_match);
-}
+CLK_OF_DECLARE(nomadik_src_clk,
+	"st,nomadik-src-clock", of_nomadik_src_clk_setup);
diff --git a/include/linux/platform_data/clk-nomadik.h b/include/linux/platform_data/clk-nomadik.h
deleted file mode 100644
index 5713c87b2477..000000000000
--- a/include/linux/platform_data/clk-nomadik.h
+++ /dev/null
@@ -1,2 +0,0 @@
-/* Minimal platform data header */
-void nomadik_clk_init(void);
-- 
cgit v1.2.3


From be0804513a506de96925f9ed1aa8dc1facd4c180 Mon Sep 17 00:00:00 2001
From: Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com>
Date: Fri, 6 Sep 2013 14:59:57 +0200
Subject: clk: sunxi: declare OF clock provider

Common clock framework allows to register clock providers to get called
on of_clk_init() by using CLK_OF_DECLARE. This converts sunxi clock
providers to make use of it and get rid of the mach specific clk init
call. As sunxi has a bunch of independent clk provider nodes, we hook
current clock init to board compatible to make it called once.

Signed-off-by: Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com>
Acked-by: Maxime Ripard <maxime.ripard@free-electrons.com>
Acked-by: Mike Turquette <mturquette@linaro.org>
---
 arch/arm/mach-sunxi/sunxi.c   |  4 +---
 drivers/clk/sunxi/clk-sunxi.c | 11 ++++++-----
 include/linux/clk/sunxi.h     | 22 ----------------------
 3 files changed, 7 insertions(+), 30 deletions(-)
 delete mode 100644 include/linux/clk/sunxi.h

(limited to 'include/linux')

diff --git a/arch/arm/mach-sunxi/sunxi.c b/arch/arm/mach-sunxi/sunxi.c
index e79fb3469341..e5a69756427b 100644
--- a/arch/arm/mach-sunxi/sunxi.c
+++ b/arch/arm/mach-sunxi/sunxi.c
@@ -20,8 +20,6 @@
 #include <linux/io.h>
 #include <linux/reboot.h>
 
-#include <linux/clk/sunxi.h>
-
 #include <asm/mach/arch.h>
 #include <asm/mach/map.h>
 #include <asm/system_misc.h>
@@ -118,7 +116,7 @@ static void sunxi_setup_restart(void)
 
 static void __init sunxi_timer_init(void)
 {
-	sunxi_init_clocks();
+	of_clk_init(NULL);
 	clocksource_of_init();
 }
 
diff --git a/drivers/clk/sunxi/clk-sunxi.c b/drivers/clk/sunxi/clk-sunxi.c
index 34ee69f4d50c..9bbd03514540 100644
--- a/drivers/clk/sunxi/clk-sunxi.c
+++ b/drivers/clk/sunxi/clk-sunxi.c
@@ -16,7 +16,6 @@
 
 #include <linux/clk-provider.h>
 #include <linux/clkdev.h>
-#include <linux/clk/sunxi.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
 
@@ -617,11 +616,8 @@ static void __init of_sunxi_table_clock_setup(const struct of_device_id *clk_mat
 	}
 }
 
-void __init sunxi_init_clocks(void)
+static void __init sunxi_init_clocks(struct device_node *np)
 {
-	/* Register all the simple and basic clocks on DT */
-	of_clk_init(NULL);
-
 	/* Register factor clocks */
 	of_sunxi_table_clock_setup(clk_factors_match, sunxi_factors_clk_setup);
 
@@ -634,3 +630,8 @@ void __init sunxi_init_clocks(void)
 	/* Register gate clocks */
 	of_sunxi_table_clock_setup(clk_gates_match, sunxi_gates_clk_setup);
 }
+CLK_OF_DECLARE(sun4i_a10_clk_init, "allwinner,sun4i-a10", sunxi_init_clocks);
+CLK_OF_DECLARE(sun5i_a10s_clk_init, "allwinner,sun5i-a10s", sunxi_init_clocks);
+CLK_OF_DECLARE(sun5i_a13_clk_init, "allwinner,sun5i-a13", sunxi_init_clocks);
+CLK_OF_DECLARE(sun6i_a31_clk_init, "allwinner,sun6i-a31", sunxi_init_clocks);
+CLK_OF_DECLARE(sun7i_a20_clk_init, "allwinner,sun7i-a20", sunxi_init_clocks);
diff --git a/include/linux/clk/sunxi.h b/include/linux/clk/sunxi.h
deleted file mode 100644
index e074fdd5a236..000000000000
--- a/include/linux/clk/sunxi.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Copyright 2012 Maxime Ripard
- *
- * Maxime Ripard <maxime.ripard@free-electrons.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-
-#ifndef __LINUX_CLK_SUNXI_H_
-#define __LINUX_CLK_SUNXI_H_
-
-void __init sunxi_init_clocks(void);
-
-#endif
-- 
cgit v1.2.3


From dd03ee9ae5bc080297175c921b1a693d0de1e8b0 Mon Sep 17 00:00:00 2001
From: Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com>
Date: Wed, 4 Sep 2013 13:16:01 +0200
Subject: ARM: mxs: remove custom .init_time hook

This patch converts clk-imx2[38] clocksource_of_init compatible init
associated with fsl,imx2[38]-clkctrl. With arch/arm calling
of_clk_init(NULL) from time_init(), we can now also remove custom
.init_time hooks.

Signed-off-by: Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com>
Acked-by: Mike Turquette <mturquette@linaro.org>
Acked-by: Shawn Guo <shawn.guo@linaro.org>
---
 arch/arm/mach-mxs/mach-mxs.c | 13 -------------
 drivers/clk/mxs/clk-imx23.c  | 15 ++++++++-------
 drivers/clk/mxs/clk-imx28.c  | 16 ++++++++--------
 include/linux/clk/mxs.h      |  2 --
 4 files changed, 16 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-mxs/mach-mxs.c b/arch/arm/mach-mxs/mach-mxs.c
index 98f6e2adb53e..cc511a4890a3 100644
--- a/arch/arm/mach-mxs/mach-mxs.c
+++ b/arch/arm/mach-mxs/mach-mxs.c
@@ -13,8 +13,6 @@
 #include <linux/clk.h>
 #include <linux/clk/mxs.h>
 #include <linux/clkdev.h>
-#include <linux/clocksource.h>
-#include <linux/clk-provider.h>
 #include <linux/delay.h>
 #include <linux/err.h>
 #include <linux/gpio.h>
@@ -490,16 +488,6 @@ static void mxs_restart(enum reboot_mode mode, const char *cmd)
 	soft_restart(0);
 }
 
-static void __init mxs_timer_init(void)
-{
-	if (of_machine_is_compatible("fsl,imx23"))
-		mx23_clocks_init();
-	else
-		mx28_clocks_init();
-	of_clk_init(NULL);
-	clocksource_of_init();
-}
-
 static const char *mxs_dt_compat[] __initdata = {
 	"fsl,imx28",
 	"fsl,imx23",
@@ -508,7 +496,6 @@ static const char *mxs_dt_compat[] __initdata = {
 
 DT_MACHINE_START(MXS, "Freescale MXS (Device Tree)")
 	.handle_irq	= icoll_handle_irq,
-	.init_time	= mxs_timer_init,
 	.init_machine	= mxs_machine_init,
 	.init_late      = mxs_pm_init,
 	.dt_compat	= mxs_dt_compat,
diff --git a/drivers/clk/mxs/clk-imx23.c b/drivers/clk/mxs/clk-imx23.c
index c396fe361589..9fc9359f5133 100644
--- a/drivers/clk/mxs/clk-imx23.c
+++ b/drivers/clk/mxs/clk-imx23.c
@@ -12,6 +12,7 @@
 #include <linux/clk.h>
 #include <linux/clk/mxs.h>
 #include <linux/clkdev.h>
+#include <linux/clk-provider.h>
 #include <linux/err.h>
 #include <linux/init.h>
 #include <linux/io.h>
@@ -100,16 +101,16 @@ static enum imx23_clk clks_init_on[] __initdata = {
 	cpu, hbus, xbus, emi, uart,
 };
 
-int __init mx23_clocks_init(void)
+static void __init mx23_clocks_init(struct device_node *np)
 {
-	struct device_node *np;
+	struct device_node *dcnp;
 	u32 i;
 
-	np = of_find_compatible_node(NULL, NULL, "fsl,imx23-digctl");
-	digctrl = of_iomap(np, 0);
+	dcnp = of_find_compatible_node(NULL, NULL, "fsl,imx23-digctl");
+	digctrl = of_iomap(dcnp, 0);
 	WARN_ON(!digctrl);
+	of_node_put(dcnp);
 
-	np = of_find_compatible_node(NULL, NULL, "fsl,imx23-clkctrl");
 	clkctrl = of_iomap(np, 0);
 	WARN_ON(!clkctrl);
 
@@ -162,7 +163,7 @@ int __init mx23_clocks_init(void)
 		if (IS_ERR(clks[i])) {
 			pr_err("i.MX23 clk %d: register failed with %ld\n",
 				i, PTR_ERR(clks[i]));
-			return PTR_ERR(clks[i]);
+			return;
 		}
 
 	clk_data.clks = clks;
@@ -172,5 +173,5 @@ int __init mx23_clocks_init(void)
 	for (i = 0; i < ARRAY_SIZE(clks_init_on); i++)
 		clk_prepare_enable(clks[clks_init_on[i]]);
 
-	return 0;
 }
+CLK_OF_DECLARE(imx23_clkctrl, "fsl,imx23-clkctrl", mx23_clocks_init);
diff --git a/drivers/clk/mxs/clk-imx28.c b/drivers/clk/mxs/clk-imx28.c
index 4faf0afc44cd..a6c35010e4e5 100644
--- a/drivers/clk/mxs/clk-imx28.c
+++ b/drivers/clk/mxs/clk-imx28.c
@@ -12,6 +12,7 @@
 #include <linux/clk.h>
 #include <linux/clk/mxs.h>
 #include <linux/clkdev.h>
+#include <linux/clk-provider.h>
 #include <linux/err.h>
 #include <linux/init.h>
 #include <linux/io.h>
@@ -154,16 +155,16 @@ static enum imx28_clk clks_init_on[] __initdata = {
 	cpu, hbus, xbus, emi, uart,
 };
 
-int __init mx28_clocks_init(void)
+static void __init mx28_clocks_init(struct device_node *np)
 {
-	struct device_node *np;
+	struct device_node *dcnp;
 	u32 i;
 
-	np = of_find_compatible_node(NULL, NULL, "fsl,imx28-digctl");
-	digctrl = of_iomap(np, 0);
+	dcnp = of_find_compatible_node(NULL, NULL, "fsl,imx28-digctl");
+	digctrl = of_iomap(dcnp, 0);
 	WARN_ON(!digctrl);
+	of_node_put(dcnp);
 
-	np = of_find_compatible_node(NULL, NULL, "fsl,imx28-clkctrl");
 	clkctrl = of_iomap(np, 0);
 	WARN_ON(!clkctrl);
 
@@ -239,7 +240,7 @@ int __init mx28_clocks_init(void)
 		if (IS_ERR(clks[i])) {
 			pr_err("i.MX28 clk %d: register failed with %ld\n",
 				i, PTR_ERR(clks[i]));
-			return PTR_ERR(clks[i]);
+			return;
 		}
 
 	clk_data.clks = clks;
@@ -250,6 +251,5 @@ int __init mx28_clocks_init(void)
 
 	for (i = 0; i < ARRAY_SIZE(clks_init_on); i++)
 		clk_prepare_enable(clks[clks_init_on[i]]);
-
-	return 0;
 }
+CLK_OF_DECLARE(imx28_clkctrl, "fsl,imx28-clkctrl", mx28_clocks_init);
diff --git a/include/linux/clk/mxs.h b/include/linux/clk/mxs.h
index 90c30dc3efc7..5138a90e018c 100644
--- a/include/linux/clk/mxs.h
+++ b/include/linux/clk/mxs.h
@@ -9,8 +9,6 @@
 #ifndef __LINUX_CLK_MXS_H
 #define __LINUX_CLK_MXS_H
 
-int mx23_clocks_init(void);
-int mx28_clocks_init(void);
 int mxs_saif_clkmux_select(unsigned int clkmux);
 
 #endif
-- 
cgit v1.2.3


From 25f73ed5c67d17ecf8cefd560f55211cce726086 Mon Sep 17 00:00:00 2001
From: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
Date: Fri, 27 Sep 2013 15:06:28 -0400
Subject: misc: (at24) move header to linux/platform_data/

This patch moves the at24.h header from include/linux/i2c to
include/linux/platform_data and updates existing support accordingly.

It also fixes the following checkpatch warning:

    WARNING: please, no space before tabs
    #436: FILE: include/linux/platform_data/at24.h:31:
    + * ^Iu8 *mac_addr = ethernet_pdata->mac_addr;$

Signed-off-by: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 MAINTAINERS                                |  2 +-
 arch/arm/mach-at91/board-sam9260ek.c       |  2 +-
 arch/arm/mach-at91/board-sam9263ek.c       |  2 +-
 arch/arm/mach-davinci/board-da830-evm.c    |  2 +-
 arch/arm/mach-davinci/board-da850-evm.c    |  2 +-
 arch/arm/mach-davinci/board-dm365-evm.c    |  2 +-
 arch/arm/mach-davinci/board-dm644x-evm.c   |  2 +-
 arch/arm/mach-davinci/board-dm646x-evm.c   |  2 +-
 arch/arm/mach-davinci/board-mityomapl138.c |  2 +-
 arch/arm/mach-davinci/board-sffsdr.c       |  2 +-
 arch/arm/mach-imx/mach-pca100.c            |  2 +-
 arch/arm/mach-imx/mach-pcm037.c            |  2 +-
 arch/arm/mach-imx/mach-pcm038.c            |  2 +-
 arch/arm/mach-imx/mach-pcm043.c            |  2 +-
 arch/arm/mach-imx/mach-vpr200.c            |  2 +-
 arch/arm/mach-kirkwood/lacie_v2-common.c   |  2 +-
 arch/arm/mach-omap1/board-osk.c            |  2 +-
 arch/arm/mach-omap2/board-cm-t35.c         |  2 +-
 arch/arm/mach-omap2/board-h4.c             |  2 +-
 arch/arm/mach-omap2/board-omap3stalker.c   |  2 +-
 arch/arm/mach-pxa/stargate2.c              |  2 +-
 arch/arm/mach-s3c24xx/mach-mini2440.c      |  2 +-
 drivers/misc/eeprom/at24.c                 |  2 +-
 include/linux/i2c/at24.h                   | 55 ------------------------------
 include/linux/platform_data/at24.h         | 55 ++++++++++++++++++++++++++++++
 25 files changed, 78 insertions(+), 78 deletions(-)
 delete mode 100644 include/linux/i2c/at24.h
 create mode 100644 include/linux/platform_data/at24.h

(limited to 'include/linux')

diff --git a/MAINTAINERS b/MAINTAINERS
index 284969fa2896..b9d19a7f3144 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1396,7 +1396,7 @@ M:	Wolfram Sang <wsa@the-dreams.de>
 L:	linux-i2c@vger.kernel.org
 S:	Maintained
 F:	drivers/misc/eeprom/at24.c
-F:	include/linux/i2c/at24.h
+F:	include/linux/platform_data/at24.h
 
 ATA OVER ETHERNET (AOE) DRIVER
 M:	"Ed L. Cashin" <ecashin@coraid.com>
diff --git a/arch/arm/mach-at91/board-sam9260ek.c b/arch/arm/mach-at91/board-sam9260ek.c
index 0b153c87521d..f4f8735315da 100644
--- a/arch/arm/mach-at91/board-sam9260ek.c
+++ b/arch/arm/mach-at91/board-sam9260ek.c
@@ -28,7 +28,7 @@
 #include <linux/spi/spi.h>
 #include <linux/spi/at73c213.h>
 #include <linux/clk.h>
-#include <linux/i2c/at24.h>
+#include <linux/platform_data/at24.h>
 #include <linux/gpio_keys.h>
 #include <linux/input.h>
 
diff --git a/arch/arm/mach-at91/board-sam9263ek.c b/arch/arm/mach-at91/board-sam9263ek.c
index 3284df05df14..947e134ac4c3 100644
--- a/arch/arm/mach-at91/board-sam9263ek.c
+++ b/arch/arm/mach-at91/board-sam9263ek.c
@@ -27,7 +27,7 @@
 #include <linux/platform_device.h>
 #include <linux/spi/spi.h>
 #include <linux/spi/ads7846.h>
-#include <linux/i2c/at24.h>
+#include <linux/platform_data/at24.h>
 #include <linux/fb.h>
 #include <linux/gpio_keys.h>
 #include <linux/input.h>
diff --git a/arch/arm/mach-davinci/board-da830-evm.c b/arch/arm/mach-davinci/board-da830-evm.c
index c4bdc0a1c36e..66b5b3cb5376 100644
--- a/arch/arm/mach-davinci/board-da830-evm.c
+++ b/arch/arm/mach-davinci/board-da830-evm.c
@@ -17,7 +17,7 @@
 #include <linux/platform_device.h>
 #include <linux/i2c.h>
 #include <linux/i2c/pcf857x.h>
-#include <linux/i2c/at24.h>
+#include <linux/platform_data/at24.h>
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/partitions.h>
 #include <linux/spi/spi.h>
diff --git a/arch/arm/mach-davinci/board-da850-evm.c b/arch/arm/mach-davinci/board-da850-evm.c
index dd1fb24521aa..f25a569b0009 100644
--- a/arch/arm/mach-davinci/board-da850-evm.c
+++ b/arch/arm/mach-davinci/board-da850-evm.c
@@ -18,7 +18,7 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/i2c.h>
-#include <linux/i2c/at24.h>
+#include <linux/platform_data/at24.h>
 #include <linux/platform_data/pca953x.h>
 #include <linux/input.h>
 #include <linux/input/tps6507x-ts.h>
diff --git a/arch/arm/mach-davinci/board-dm365-evm.c b/arch/arm/mach-davinci/board-dm365-evm.c
index 92b7f770615a..30ee81fac925 100644
--- a/arch/arm/mach-davinci/board-dm365-evm.c
+++ b/arch/arm/mach-davinci/board-dm365-evm.c
@@ -18,7 +18,7 @@
 #include <linux/i2c.h>
 #include <linux/io.h>
 #include <linux/clk.h>
-#include <linux/i2c/at24.h>
+#include <linux/platform_data/at24.h>
 #include <linux/leds.h>
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/partitions.h>
diff --git a/arch/arm/mach-davinci/board-dm644x-evm.c b/arch/arm/mach-davinci/board-dm644x-evm.c
index 40bb9b5b87e8..f21fde9dce00 100644
--- a/arch/arm/mach-davinci/board-dm644x-evm.c
+++ b/arch/arm/mach-davinci/board-dm644x-evm.c
@@ -15,7 +15,7 @@
 #include <linux/gpio.h>
 #include <linux/i2c.h>
 #include <linux/i2c/pcf857x.h>
-#include <linux/i2c/at24.h>
+#include <linux/platform_data/at24.h>
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/nand.h>
 #include <linux/mtd/partitions.h>
diff --git a/arch/arm/mach-davinci/board-dm646x-evm.c b/arch/arm/mach-davinci/board-dm646x-evm.c
index 2bc3651d56cc..db2df32da6a8 100644
--- a/arch/arm/mach-davinci/board-dm646x-evm.c
+++ b/arch/arm/mach-davinci/board-dm646x-evm.c
@@ -22,7 +22,7 @@
 #include <linux/gpio.h>
 #include <linux/platform_device.h>
 #include <linux/i2c.h>
-#include <linux/i2c/at24.h>
+#include <linux/platform_data/at24.h>
 #include <linux/i2c/pcf857x.h>
 
 #include <media/tvp514x.h>
diff --git a/arch/arm/mach-davinci/board-mityomapl138.c b/arch/arm/mach-davinci/board-mityomapl138.c
index cd0f58730c2b..7aa105b1fd0f 100644
--- a/arch/arm/mach-davinci/board-mityomapl138.c
+++ b/arch/arm/mach-davinci/board-mityomapl138.c
@@ -15,7 +15,7 @@
 #include <linux/mtd/partitions.h>
 #include <linux/regulator/machine.h>
 #include <linux/i2c.h>
-#include <linux/i2c/at24.h>
+#include <linux/platform_data/at24.h>
 #include <linux/etherdevice.h>
 #include <linux/spi/spi.h>
 #include <linux/spi/flash.h>
diff --git a/arch/arm/mach-davinci/board-sffsdr.c b/arch/arm/mach-davinci/board-sffsdr.c
index d84360148100..41c7c9615791 100644
--- a/arch/arm/mach-davinci/board-sffsdr.c
+++ b/arch/arm/mach-davinci/board-sffsdr.c
@@ -26,7 +26,7 @@
 #include <linux/init.h>
 #include <linux/platform_device.h>
 #include <linux/i2c.h>
-#include <linux/i2c/at24.h>
+#include <linux/platform_data/at24.h>
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/nand.h>
 #include <linux/mtd/partitions.h>
diff --git a/arch/arm/mach-imx/mach-pca100.c b/arch/arm/mach-imx/mach-pca100.c
index 19bb6441a7d4..c5f95674e9b7 100644
--- a/arch/arm/mach-imx/mach-pca100.c
+++ b/arch/arm/mach-imx/mach-pca100.c
@@ -20,7 +20,7 @@
 #include <linux/platform_device.h>
 #include <linux/io.h>
 #include <linux/i2c.h>
-#include <linux/i2c/at24.h>
+#include <linux/platform_data/at24.h>
 #include <linux/dma-mapping.h>
 #include <linux/spi/spi.h>
 #include <linux/spi/eeprom.h>
diff --git a/arch/arm/mach-imx/mach-pcm037.c b/arch/arm/mach-imx/mach-pcm037.c
index bc0261e99d39..20cc53f4cee1 100644
--- a/arch/arm/mach-imx/mach-pcm037.c
+++ b/arch/arm/mach-imx/mach-pcm037.c
@@ -23,7 +23,7 @@
 #include <linux/smsc911x.h>
 #include <linux/interrupt.h>
 #include <linux/i2c.h>
-#include <linux/i2c/at24.h>
+#include <linux/platform_data/at24.h>
 #include <linux/delay.h>
 #include <linux/spi/spi.h>
 #include <linux/irq.h>
diff --git a/arch/arm/mach-imx/mach-pcm038.c b/arch/arm/mach-imx/mach-pcm038.c
index e805ac273e9c..592ddbe031ac 100644
--- a/arch/arm/mach-imx/mach-pcm038.c
+++ b/arch/arm/mach-imx/mach-pcm038.c
@@ -18,7 +18,7 @@
  */
 
 #include <linux/i2c.h>
-#include <linux/i2c/at24.h>
+#include <linux/platform_data/at24.h>
 #include <linux/io.h>
 #include <linux/mtd/plat-ram.h>
 #include <linux/mtd/physmap.h>
diff --git a/arch/arm/mach-imx/mach-pcm043.c b/arch/arm/mach-imx/mach-pcm043.c
index b726cb1c5fdd..ac504b67326b 100644
--- a/arch/arm/mach-imx/mach-pcm043.c
+++ b/arch/arm/mach-imx/mach-pcm043.c
@@ -24,7 +24,7 @@
 #include <linux/interrupt.h>
 #include <linux/delay.h>
 #include <linux/i2c.h>
-#include <linux/i2c/at24.h>
+#include <linux/platform_data/at24.h>
 #include <linux/usb/otg.h>
 #include <linux/usb/ulpi.h>
 
diff --git a/arch/arm/mach-imx/mach-vpr200.c b/arch/arm/mach-imx/mach-vpr200.c
index 0910761e8280..8825d1217d18 100644
--- a/arch/arm/mach-imx/mach-vpr200.c
+++ b/arch/arm/mach-imx/mach-vpr200.c
@@ -29,7 +29,7 @@
 #include <asm/mach/time.h>
 
 #include <linux/i2c.h>
-#include <linux/i2c/at24.h>
+#include <linux/platform_data/at24.h>
 #include <linux/mfd/mc13xxx.h>
 
 #include "common.h"
diff --git a/arch/arm/mach-kirkwood/lacie_v2-common.c b/arch/arm/mach-kirkwood/lacie_v2-common.c
index 489495976fcd..8e3e4331c380 100644
--- a/arch/arm/mach-kirkwood/lacie_v2-common.c
+++ b/arch/arm/mach-kirkwood/lacie_v2-common.c
@@ -12,7 +12,7 @@
 #include <linux/spi/flash.h>
 #include <linux/spi/spi.h>
 #include <linux/i2c.h>
-#include <linux/i2c/at24.h>
+#include <linux/platform_data/at24.h>
 #include <linux/gpio.h>
 #include <asm/mach/time.h>
 #include <mach/kirkwood.h>
diff --git a/arch/arm/mach-omap1/board-osk.c b/arch/arm/mach-omap1/board-osk.c
index a7ce69286688..d68909b095f1 100644
--- a/arch/arm/mach-omap1/board-osk.c
+++ b/arch/arm/mach-omap1/board-osk.c
@@ -300,7 +300,7 @@ static struct omap_lcd_config osk_lcd_config __initdata = {
 #ifdef	CONFIG_OMAP_OSK_MISTRAL
 
 #include <linux/input.h>
-#include <linux/i2c/at24.h>
+#include <linux/platform_data/at24.h>
 #include <linux/spi/spi.h>
 #include <linux/spi/ads7846.h>
 
diff --git a/arch/arm/mach-omap2/board-cm-t35.c b/arch/arm/mach-omap2/board-cm-t35.c
index 33d159e2386e..8dd0ec858cf1 100644
--- a/arch/arm/mach-omap2/board-cm-t35.c
+++ b/arch/arm/mach-omap2/board-cm-t35.c
@@ -25,7 +25,7 @@
 #include <linux/gpio.h>
 #include <linux/platform_data/gpio-omap.h>
 
-#include <linux/i2c/at24.h>
+#include <linux/platform_data/at24.h>
 #include <linux/i2c/twl.h>
 #include <linux/regulator/fixed.h>
 #include <linux/regulator/machine.h>
diff --git a/arch/arm/mach-omap2/board-h4.c b/arch/arm/mach-omap2/board-h4.c
index 87e41a8b8d46..f7808349a734 100644
--- a/arch/arm/mach-omap2/board-h4.c
+++ b/arch/arm/mach-omap2/board-h4.c
@@ -20,7 +20,7 @@
 #include <linux/delay.h>
 #include <linux/workqueue.h>
 #include <linux/i2c.h>
-#include <linux/i2c/at24.h>
+#include <linux/platform_data/at24.h>
 #include <linux/input.h>
 #include <linux/err.h>
 #include <linux/clk.h>
diff --git a/arch/arm/mach-omap2/board-omap3stalker.c b/arch/arm/mach-omap2/board-omap3stalker.c
index ba8342fef799..119efaf5808a 100644
--- a/arch/arm/mach-omap2/board-omap3stalker.c
+++ b/arch/arm/mach-omap2/board-omap3stalker.c
@@ -32,7 +32,7 @@
 #include <linux/spi/spi.h>
 #include <linux/interrupt.h>
 #include <linux/smsc911x.h>
-#include <linux/i2c/at24.h>
+#include <linux/platform_data/at24.h>
 #include <linux/usb/phy.h>
 
 #include <asm/mach-types.h>
diff --git a/arch/arm/mach-pxa/stargate2.c b/arch/arm/mach-pxa/stargate2.c
index 62aea3e835f3..01de542432a6 100644
--- a/arch/arm/mach-pxa/stargate2.c
+++ b/arch/arm/mach-pxa/stargate2.c
@@ -27,7 +27,7 @@
 
 #include <linux/i2c/pxa-i2c.h>
 #include <linux/i2c/pcf857x.h>
-#include <linux/i2c/at24.h>
+#include <linux/platform_data/at24.h>
 #include <linux/smc91x.h>
 #include <linux/gpio.h>
 #include <linux/leds.h>
diff --git a/arch/arm/mach-s3c24xx/mach-mini2440.c b/arch/arm/mach-s3c24xx/mach-mini2440.c
index a83db46320bc..4a18d49a63e0 100644
--- a/arch/arm/mach-s3c24xx/mach-mini2440.c
+++ b/arch/arm/mach-s3c24xx/mach-mini2440.c
@@ -24,7 +24,7 @@
 #include <linux/io.h>
 #include <linux/serial_core.h>
 #include <linux/dm9000.h>
-#include <linux/i2c/at24.h>
+#include <linux/platform_data/at24.h>
 #include <linux/platform_device.h>
 #include <linux/gpio_keys.h>
 #include <linux/i2c.h>
diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c
index 5d4fd69d04ca..4ef01ab67853 100644
--- a/drivers/misc/eeprom/at24.c
+++ b/drivers/misc/eeprom/at24.c
@@ -22,7 +22,7 @@
 #include <linux/jiffies.h>
 #include <linux/of.h>
 #include <linux/i2c.h>
-#include <linux/i2c/at24.h>
+#include <linux/platform_data/at24.h>
 
 /*
  * I2C EEPROMs from most vendors are inexpensive and mostly interchangeable.
diff --git a/include/linux/i2c/at24.h b/include/linux/i2c/at24.h
deleted file mode 100644
index 285025a9cdc9..000000000000
--- a/include/linux/i2c/at24.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * at24.h - platform_data for the at24 (generic eeprom) driver
- * (C) Copyright 2008 by Pengutronix
- * (C) Copyright 2012 by Wolfram Sang
- * same license as the driver
- */
-
-#ifndef _LINUX_AT24_H
-#define _LINUX_AT24_H
-
-#include <linux/types.h>
-#include <linux/memory.h>
-
-/**
- * struct at24_platform_data - data to set up at24 (generic eeprom) driver
- * @byte_len: size of eeprom in byte
- * @page_size: number of byte which can be written in one go
- * @flags: tunable options, check AT24_FLAG_* defines
- * @setup: an optional callback invoked after eeprom is probed; enables kernel
-	code to access eeprom via memory_accessor, see example
- * @context: optional parameter passed to setup()
- *
- * If you set up a custom eeprom type, please double-check the parameters.
- * Especially page_size needs extra care, as you risk data loss if your value
- * is bigger than what the chip actually supports!
- *
- * An example in pseudo code for a setup() callback:
- *
- * void get_mac_addr(struct memory_accessor *mem_acc, void *context)
- * {
- * 	u8 *mac_addr = ethernet_pdata->mac_addr;
- *	off_t offset = context;
- *
- *	// Read MAC addr from EEPROM
- *	if (mem_acc->read(mem_acc, mac_addr, offset, ETH_ALEN) == ETH_ALEN)
- *		pr_info("Read MAC addr from EEPROM: %pM\n", mac_addr);
- * }
- *
- * This function pointer and context can now be set up in at24_platform_data.
- */
-
-struct at24_platform_data {
-	u32		byte_len;		/* size (sum of all addr) */
-	u16		page_size;		/* for writes */
-	u8		flags;
-#define AT24_FLAG_ADDR16	0x80	/* address pointer is 16 bit */
-#define AT24_FLAG_READONLY	0x40	/* sysfs-entry will be read-only */
-#define AT24_FLAG_IRUGO		0x20	/* sysfs-entry will be world-readable */
-#define AT24_FLAG_TAKE8ADDR	0x10	/* take always 8 addresses (24c00) */
-
-	void		(*setup)(struct memory_accessor *, void *context);
-	void		*context;
-};
-
-#endif /* _LINUX_AT24_H */
diff --git a/include/linux/platform_data/at24.h b/include/linux/platform_data/at24.h
new file mode 100644
index 000000000000..c42aa89d34ee
--- /dev/null
+++ b/include/linux/platform_data/at24.h
@@ -0,0 +1,55 @@
+/*
+ * at24.h - platform_data for the at24 (generic eeprom) driver
+ * (C) Copyright 2008 by Pengutronix
+ * (C) Copyright 2012 by Wolfram Sang
+ * same license as the driver
+ */
+
+#ifndef _LINUX_AT24_H
+#define _LINUX_AT24_H
+
+#include <linux/types.h>
+#include <linux/memory.h>
+
+/**
+ * struct at24_platform_data - data to set up at24 (generic eeprom) driver
+ * @byte_len: size of eeprom in byte
+ * @page_size: number of byte which can be written in one go
+ * @flags: tunable options, check AT24_FLAG_* defines
+ * @setup: an optional callback invoked after eeprom is probed; enables kernel
+	code to access eeprom via memory_accessor, see example
+ * @context: optional parameter passed to setup()
+ *
+ * If you set up a custom eeprom type, please double-check the parameters.
+ * Especially page_size needs extra care, as you risk data loss if your value
+ * is bigger than what the chip actually supports!
+ *
+ * An example in pseudo code for a setup() callback:
+ *
+ * void get_mac_addr(struct memory_accessor *mem_acc, void *context)
+ * {
+ *	u8 *mac_addr = ethernet_pdata->mac_addr;
+ *	off_t offset = context;
+ *
+ *	// Read MAC addr from EEPROM
+ *	if (mem_acc->read(mem_acc, mac_addr, offset, ETH_ALEN) == ETH_ALEN)
+ *		pr_info("Read MAC addr from EEPROM: %pM\n", mac_addr);
+ * }
+ *
+ * This function pointer and context can now be set up in at24_platform_data.
+ */
+
+struct at24_platform_data {
+	u32		byte_len;		/* size (sum of all addr) */
+	u16		page_size;		/* for writes */
+	u8		flags;
+#define AT24_FLAG_ADDR16	0x80	/* address pointer is 16 bit */
+#define AT24_FLAG_READONLY	0x40	/* sysfs-entry will be read-only */
+#define AT24_FLAG_IRUGO		0x20	/* sysfs-entry will be world-readable */
+#define AT24_FLAG_TAKE8ADDR	0x10	/* take always 8 addresses (24c00) */
+
+	void		(*setup)(struct memory_accessor *, void *context);
+	void		*context;
+};
+
+#endif /* _LINUX_AT24_H */
-- 
cgit v1.2.3


From 2f303b74a62fb74983c0a66e2df353be963c527c Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 25 Sep 2013 13:53:07 +0200
Subject: KVM: Convert kvm_lock back to non-raw spinlock

In commit e935b8372cf8 ("KVM: Convert kvm_lock to raw_spinlock"),
the kvm_lock was made a raw lock.  However, the kvm mmu_shrink()
function tries to grab the (non-raw) mmu_lock within the scope of
the raw locked kvm_lock being held.  This leads to the following:

BUG: sleeping function called from invalid context at kernel/rtmutex.c:659
in_atomic(): 1, irqs_disabled(): 0, pid: 55, name: kswapd0
Preemption disabled at:[<ffffffffa0376eac>] mmu_shrink+0x5c/0x1b0 [kvm]

Pid: 55, comm: kswapd0 Not tainted 3.4.34_preempt-rt
Call Trace:
 [<ffffffff8106f2ad>] __might_sleep+0xfd/0x160
 [<ffffffff817d8d64>] rt_spin_lock+0x24/0x50
 [<ffffffffa0376f3c>] mmu_shrink+0xec/0x1b0 [kvm]
 [<ffffffff8111455d>] shrink_slab+0x17d/0x3a0
 [<ffffffff81151f00>] ? mem_cgroup_iter+0x130/0x260
 [<ffffffff8111824a>] balance_pgdat+0x54a/0x730
 [<ffffffff8111fe47>] ? set_pgdat_percpu_threshold+0xa7/0xd0
 [<ffffffff811185bf>] kswapd+0x18f/0x490
 [<ffffffff81070961>] ? get_parent_ip+0x11/0x50
 [<ffffffff81061970>] ? __init_waitqueue_head+0x50/0x50
 [<ffffffff81118430>] ? balance_pgdat+0x730/0x730
 [<ffffffff81060d2b>] kthread+0xdb/0xe0
 [<ffffffff8106e122>] ? finish_task_switch+0x52/0x100
 [<ffffffff817e1e94>] kernel_thread_helper+0x4/0x10
 [<ffffffff81060c50>] ? __init_kthread_worker+0x

After the previous patch, kvm_lock need not be a raw spinlock anymore,
so change it back.

Reported-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Cc: kvm@vger.kernel.org
Cc: gleb@redhat.com
Cc: jan.kiszka@siemens.com
Reviewed-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 Documentation/virtual/kvm/locking.txt |  2 +-
 arch/x86/kvm/mmu.c                    |  5 ++---
 arch/x86/kvm/x86.c                    |  8 ++++----
 include/linux/kvm_host.h              |  2 +-
 virt/kvm/kvm_main.c                   | 18 +++++++++---------
 5 files changed, 17 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/virtual/kvm/locking.txt b/Documentation/virtual/kvm/locking.txt
index ba9e1c2150c2..f8869410d40c 100644
--- a/Documentation/virtual/kvm/locking.txt
+++ b/Documentation/virtual/kvm/locking.txt
@@ -132,7 +132,7 @@ See the comments in spte_has_volatile_bits() and mmu_spte_update().
 ------------
 
 Name:		kvm_lock
-Type:		raw_spinlock
+Type:		spinlock_t
 Arch:		any
 Protects:	- vm_list
 
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index dce0df8150df..cf95cfe050a6 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -4428,7 +4428,7 @@ mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
 	int nr_to_scan = sc->nr_to_scan;
 	unsigned long freed = 0;
 
-	raw_spin_lock(&kvm_lock);
+	spin_lock(&kvm_lock);
 
 	list_for_each_entry(kvm, &vm_list, vm_list) {
 		int idx;
@@ -4478,9 +4478,8 @@ unlock:
 		break;
 	}
 
-	raw_spin_unlock(&kvm_lock);
+	spin_unlock(&kvm_lock);
 	return freed;
-
 }
 
 static unsigned long
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e5ca72a5cdb6..187f824b1454 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5263,7 +5263,7 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va
 
 	smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1);
 
-	raw_spin_lock(&kvm_lock);
+	spin_lock(&kvm_lock);
 	list_for_each_entry(kvm, &vm_list, vm_list) {
 		kvm_for_each_vcpu(i, vcpu, kvm) {
 			if (vcpu->cpu != freq->cpu)
@@ -5273,7 +5273,7 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va
 				send_ipi = 1;
 		}
 	}
-	raw_spin_unlock(&kvm_lock);
+	spin_unlock(&kvm_lock);
 
 	if (freq->old < freq->new && send_ipi) {
 		/*
@@ -5426,12 +5426,12 @@ static void pvclock_gtod_update_fn(struct work_struct *work)
 	struct kvm_vcpu *vcpu;
 	int i;
 
-	raw_spin_lock(&kvm_lock);
+	spin_lock(&kvm_lock);
 	list_for_each_entry(kvm, &vm_list, vm_list)
 		kvm_for_each_vcpu(i, vcpu, kvm)
 			set_bit(KVM_REQ_MASTERCLOCK_UPDATE, &vcpu->requests);
 	atomic_set(&kvm_guest_has_master_clock, 0);
-	raw_spin_unlock(&kvm_lock);
+	spin_unlock(&kvm_lock);
 }
 
 static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 749bdb12cd15..7c961e1e9270 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -142,7 +142,7 @@ struct kvm;
 struct kvm_vcpu;
 extern struct kmem_cache *kvm_vcpu_cache;
 
-extern raw_spinlock_t kvm_lock;
+extern spinlock_t kvm_lock;
 extern struct list_head vm_list;
 
 struct kvm_io_range {
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index eb94343c2ed2..d469114aff09 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -70,7 +70,7 @@ MODULE_LICENSE("GPL");
  * 		kvm->lock --> kvm->slots_lock --> kvm->irq_lock
  */
 
-DEFINE_RAW_SPINLOCK(kvm_lock);
+DEFINE_SPINLOCK(kvm_lock);
 static DEFINE_RAW_SPINLOCK(kvm_count_lock);
 LIST_HEAD(vm_list);
 
@@ -491,9 +491,9 @@ static struct kvm *kvm_create_vm(unsigned long type)
 	if (r)
 		goto out_err;
 
-	raw_spin_lock(&kvm_lock);
+	spin_lock(&kvm_lock);
 	list_add(&kvm->vm_list, &vm_list);
-	raw_spin_unlock(&kvm_lock);
+	spin_unlock(&kvm_lock);
 
 	return kvm;
 
@@ -582,9 +582,9 @@ static void kvm_destroy_vm(struct kvm *kvm)
 	struct mm_struct *mm = kvm->mm;
 
 	kvm_arch_sync_events(kvm);
-	raw_spin_lock(&kvm_lock);
+	spin_lock(&kvm_lock);
 	list_del(&kvm->vm_list);
-	raw_spin_unlock(&kvm_lock);
+	spin_unlock(&kvm_lock);
 	kvm_free_irq_routing(kvm);
 	for (i = 0; i < KVM_NR_BUSES; i++)
 		kvm_io_bus_destroy(kvm->buses[i]);
@@ -3054,10 +3054,10 @@ static int vm_stat_get(void *_offset, u64 *val)
 	struct kvm *kvm;
 
 	*val = 0;
-	raw_spin_lock(&kvm_lock);
+	spin_lock(&kvm_lock);
 	list_for_each_entry(kvm, &vm_list, vm_list)
 		*val += *(u32 *)((void *)kvm + offset);
-	raw_spin_unlock(&kvm_lock);
+	spin_unlock(&kvm_lock);
 	return 0;
 }
 
@@ -3071,12 +3071,12 @@ static int vcpu_stat_get(void *_offset, u64 *val)
 	int i;
 
 	*val = 0;
-	raw_spin_lock(&kvm_lock);
+	spin_lock(&kvm_lock);
 	list_for_each_entry(kvm, &vm_list, vm_list)
 		kvm_for_each_vcpu(i, vcpu, kvm)
 			*val += *(u32 *)((void *)vcpu + offset);
 
-	raw_spin_unlock(&kvm_lock);
+	spin_unlock(&kvm_lock);
 	return 0;
 }
 
-- 
cgit v1.2.3


From 6dedcca610c6d6189b4a54d32118d1654adb73d2 Mon Sep 17 00:00:00 2001
From: Toshi Kani <toshi.kani@hp.com>
Date: Wed, 25 Sep 2013 15:08:27 -0600
Subject: hotplug, powerpc, x86: Remove cpu_hotplug_driver_lock()

cpu_hotplug_driver_lock() serializes CPU online/offline operations
when ARCH_CPU_PROBE_RELEASE is set.  This lock interface is no longer
necessary with the following reason:

 - lock_device_hotplug() now protects CPU online/offline operations,
   including the probe & release interfaces enabled by
   ARCH_CPU_PROBE_RELEASE.  The use of cpu_hotplug_driver_lock() is
   redundant.
 - cpu_hotplug_driver_lock() is only valid when ARCH_CPU_PROBE_RELEASE
   is defined, which is misleading and is only enabled on powerpc.

This patch removes the cpu_hotplug_driver_lock() interface.  As
a result, ARCH_CPU_PROBE_RELEASE only enables / disables the cpu
probe & release interface as intended.  There is no functional change
in this patch.

Signed-off-by: Toshi Kani <toshi.kani@hp.com>
Reviewed-by: Nathan Fontenot <nfont@linux.vnet.ibm.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 arch/powerpc/kernel/smp.c              | 12 ----------
 arch/powerpc/platforms/pseries/dlpar.c | 43 +++++++++++++---------------------
 arch/x86/kernel/topology.c             |  2 --
 drivers/base/cpu.c                     | 15 +++---------
 include/linux/cpu.h                    | 13 ----------
 5 files changed, 19 insertions(+), 66 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 8e59abc237d7..930cd8af3503 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -844,18 +844,6 @@ void __cpu_die(unsigned int cpu)
 		smp_ops->cpu_die(cpu);
 }
 
-static DEFINE_MUTEX(powerpc_cpu_hotplug_driver_mutex);
-
-void cpu_hotplug_driver_lock()
-{
-	mutex_lock(&powerpc_cpu_hotplug_driver_mutex);
-}
-
-void cpu_hotplug_driver_unlock()
-{
-	mutex_unlock(&powerpc_cpu_hotplug_driver_mutex);
-}
-
 void cpu_die(void)
 {
 	if (ppc_md.cpu_die)
diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c
index 7cfdaae1721a..a8fe5aa3d34f 100644
--- a/arch/powerpc/platforms/pseries/dlpar.c
+++ b/arch/powerpc/platforms/pseries/dlpar.c
@@ -404,46 +404,38 @@ static ssize_t dlpar_cpu_probe(const char *buf, size_t count)
 	unsigned long drc_index;
 	int rc;
 
-	cpu_hotplug_driver_lock();
 	rc = strict_strtoul(buf, 0, &drc_index);
-	if (rc) {
-		rc = -EINVAL;
-		goto out;
-	}
+	if (rc)
+		return -EINVAL;
 
 	parent = of_find_node_by_path("/cpus");
-	if (!parent) {
-		rc = -ENODEV;
-		goto out;
-	}
+	if (!parent)
+		return -ENODEV;
 
 	dn = dlpar_configure_connector(drc_index, parent);
-	if (!dn) {
-		rc = -EINVAL;
-		goto out;
-	}
+	if (!dn)
+		return -EINVAL;
 
 	of_node_put(parent);
 
 	rc = dlpar_acquire_drc(drc_index);
 	if (rc) {
 		dlpar_free_cc_nodes(dn);
-		rc = -EINVAL;
-		goto out;
+		return -EINVAL;
 	}
 
 	rc = dlpar_attach_node(dn);
 	if (rc) {
 		dlpar_release_drc(drc_index);
 		dlpar_free_cc_nodes(dn);
-		goto out;
+		return rc;
 	}
 
 	rc = dlpar_online_cpu(dn);
-out:
-	cpu_hotplug_driver_unlock();
+	if (rc)
+		return rc;
 
-	return rc ? rc : count;
+	return count;
 }
 
 static int dlpar_offline_cpu(struct device_node *dn)
@@ -516,30 +508,27 @@ static ssize_t dlpar_cpu_release(const char *buf, size_t count)
 		return -EINVAL;
 	}
 
-	cpu_hotplug_driver_lock();
 	rc = dlpar_offline_cpu(dn);
 	if (rc) {
 		of_node_put(dn);
-		rc = -EINVAL;
-		goto out;
+		return -EINVAL;
 	}
 
 	rc = dlpar_release_drc(*drc_index);
 	if (rc) {
 		of_node_put(dn);
-		goto out;
+		return rc;
 	}
 
 	rc = dlpar_detach_node(dn);
 	if (rc) {
 		dlpar_acquire_drc(*drc_index);
-		goto out;
+		return rc;
 	}
 
 	of_node_put(dn);
-out:
-	cpu_hotplug_driver_unlock();
-	return rc ? rc : count;
+
+	return count;
 }
 
 static int __init pseries_dlpar_init(void)
diff --git a/arch/x86/kernel/topology.c b/arch/x86/kernel/topology.c
index a3f35ebb3b52..649b010da00b 100644
--- a/arch/x86/kernel/topology.c
+++ b/arch/x86/kernel/topology.c
@@ -66,7 +66,6 @@ int __ref _debug_hotplug_cpu(int cpu, int action)
 		return -EINVAL;
 
 	lock_device_hotplug();
-	cpu_hotplug_driver_lock();
 
 	switch (action) {
 	case 0:
@@ -91,7 +90,6 @@ int __ref _debug_hotplug_cpu(int cpu, int action)
 		ret = -EINVAL;
 	}
 
-	cpu_hotplug_driver_unlock();
 	unlock_device_hotplug();
 
 	return ret;
diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
index 51f5d7fe2f0b..f48370dfc908 100644
--- a/drivers/base/cpu.c
+++ b/drivers/base/cpu.c
@@ -44,13 +44,11 @@ static int __ref cpu_subsys_online(struct device *dev)
 	struct cpu *cpu = container_of(dev, struct cpu, dev);
 	int cpuid = dev->id;
 	int from_nid, to_nid;
-	int ret = -ENODEV;
-
-	cpu_hotplug_driver_lock();
+	int ret;
 
 	from_nid = cpu_to_node(cpuid);
 	if (from_nid == NUMA_NO_NODE)
-		goto out;
+		return -ENODEV;
 
 	ret = cpu_up(cpuid);
 	/*
@@ -61,19 +59,12 @@ static int __ref cpu_subsys_online(struct device *dev)
 	if (from_nid != to_nid)
 		change_cpu_under_node(cpu, from_nid, to_nid);
 
- out:
-	cpu_hotplug_driver_unlock();
 	return ret;
 }
 
 static int cpu_subsys_offline(struct device *dev)
 {
-	int ret;
-
-	cpu_hotplug_driver_lock();
-	ret = cpu_down(dev->id);
-	cpu_hotplug_driver_unlock();
-	return ret;
+	return cpu_down(dev->id);
 }
 
 void unregister_cpu(struct cpu *cpu)
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 801ff9e73679..3434ef7de017 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -185,19 +185,6 @@ extern void cpu_hotplug_enable(void);
 void clear_tasks_mm_cpumask(int cpu);
 int cpu_down(unsigned int cpu);
 
-#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
-extern void cpu_hotplug_driver_lock(void);
-extern void cpu_hotplug_driver_unlock(void);
-#else
-static inline void cpu_hotplug_driver_lock(void)
-{
-}
-
-static inline void cpu_hotplug_driver_unlock(void)
-{
-}
-#endif
-
 #else		/* CONFIG_HOTPLUG_CPU */
 
 static inline void cpu_hotplug_begin(void) {}
-- 
cgit v1.2.3


From 27047a603645d0885bcd72d7a0b6cce6e3c94ca7 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Mon, 16 Sep 2013 18:56:03 +0530
Subject: cpufreq: Add new helper cpufreq_table_validate_and_show()

Almost every cpufreq driver is required to validate its frequency table with:
cpufreq_frequency_table_cpuinfo() and then expose it to cpufreq core with:
cpufreq_frequency_table_get_attr().

This patch creates another helper routine cpufreq_table_validate_and_show() that
will do both these steps in a single call and will return 0 for success, error
otherwise.

This also fixes potential bugs in cpufreq drivers where people have called
cpufreq_frequency_table_get_attr() before calling
cpufreq_frequency_table_cpuinfo(), as the later may fail.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/freq_table.c | 12 ++++++++++++
 include/linux/cpufreq.h      |  2 ++
 2 files changed, 14 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/cpufreq/freq_table.c b/drivers/cpufreq/freq_table.c
index f111454a7aea..11f6fa92a58d 100644
--- a/drivers/cpufreq/freq_table.c
+++ b/drivers/cpufreq/freq_table.c
@@ -219,6 +219,18 @@ void cpufreq_frequency_table_put_attr(unsigned int cpu)
 }
 EXPORT_SYMBOL_GPL(cpufreq_frequency_table_put_attr);
 
+int cpufreq_table_validate_and_show(struct cpufreq_policy *policy,
+				      struct cpufreq_frequency_table *table)
+{
+	int ret = cpufreq_frequency_table_cpuinfo(policy, table);
+
+	if (!ret)
+		cpufreq_frequency_table_get_attr(table, policy->cpu);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(cpufreq_table_validate_and_show);
+
 void cpufreq_frequency_table_update_policy_cpu(struct cpufreq_policy *policy)
 {
 	pr_debug("Updating show_table for new_cpu %u from last_cpu %u\n",
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index fcabc42d66ab..6b199ed7aa58 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -410,5 +410,7 @@ extern struct freq_attr cpufreq_freq_attr_scaling_available_freqs;
 void cpufreq_frequency_table_get_attr(struct cpufreq_frequency_table *table,
 				      unsigned int cpu);
 void cpufreq_frequency_table_put_attr(unsigned int cpu);
+int cpufreq_table_validate_and_show(struct cpufreq_policy *policy,
+				      struct cpufreq_frequency_table *table);
 
 #endif /* _LINUX_CPUFREQ_H */
-- 
cgit v1.2.3


From a528c219df2e865e178c538c7178961dfed5a13c Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Wed, 25 Sep 2013 12:02:44 +0200
Subject: dev: update __dev_notify_flags() to send rtnl msg

This patch only prepares the next one, there is no functional change.
Now, __dev_notify_flags() can also be used to notify flags changes via
rtnetlink.

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  4 +++-
 net/core/dev.c            | 11 ++++++-----
 net/core/rtnetlink.c      |  3 +--
 3 files changed, 10 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index b4cfb63f264e..f44f99a69977 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2378,7 +2378,9 @@ extern int		dev_ethtool(struct net *net, struct ifreq *);
 extern unsigned int	dev_get_flags(const struct net_device *);
 extern int		__dev_change_flags(struct net_device *, unsigned int flags);
 extern int		dev_change_flags(struct net_device *, unsigned int);
-extern void		__dev_notify_flags(struct net_device *, unsigned int old_flags);
+void			__dev_notify_flags(struct net_device *,
+					   unsigned int old_flags,
+					   unsigned int gchanges);
 extern int		dev_change_name(struct net_device *, const char *);
 extern int		dev_set_alias(struct net_device *, const char *, size_t);
 extern int		dev_change_net_namespace(struct net_device *,
diff --git a/net/core/dev.c b/net/core/dev.c
index 25ab6fe80da2..594a6b0ab3da 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5235,10 +5235,14 @@ int __dev_change_flags(struct net_device *dev, unsigned int flags)
 	return ret;
 }
 
-void __dev_notify_flags(struct net_device *dev, unsigned int old_flags)
+void __dev_notify_flags(struct net_device *dev, unsigned int old_flags,
+			unsigned int gchanges)
 {
 	unsigned int changes = dev->flags ^ old_flags;
 
+	if (gchanges)
+		rtmsg_ifinfo(RTM_NEWLINK, dev, gchanges);
+
 	if (changes & IFF_UP) {
 		if (dev->flags & IFF_UP)
 			call_netdevice_notifiers(NETDEV_UP, dev);
@@ -5274,10 +5278,7 @@ int dev_change_flags(struct net_device *dev, unsigned int flags)
 		return ret;
 
 	changes = old_flags ^ dev->flags;
-	if (changes)
-		rtmsg_ifinfo(RTM_NEWLINK, dev, changes);
-
-	__dev_notify_flags(dev, old_flags);
+	__dev_notify_flags(dev, old_flags, changes);
 	return ret;
 }
 EXPORT_SYMBOL(dev_change_flags);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 2a0e21de3060..4aedf03da052 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1647,9 +1647,8 @@ int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm)
 	}
 
 	dev->rtnl_link_state = RTNL_LINK_INITIALIZED;
-	rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
 
-	__dev_notify_flags(dev, old_flags);
+	__dev_notify_flags(dev, old_flags, ~0U);
 	return 0;
 }
 EXPORT_SYMBOL(rtnl_configure_link);
-- 
cgit v1.2.3


From 35b8dcf8c3a0be1feb1c8b29b22e1685ba0c2e14 Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Tue, 30 Apr 2013 23:02:43 +0200
Subject: netfilter: ipset: Rename simple macro names to avoid namespace
 issues.

Reported-by: David Laight <David.Laight@ACULAB.COM>
Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
---
 include/linux/netfilter/ipset/ip_set.h      |   3 +
 net/netfilter/ipset/ip_set_bitmap_gen.h     |  47 +++++----
 net/netfilter/ipset/ip_set_bitmap_ip.c      |  10 +-
 net/netfilter/ipset/ip_set_bitmap_ipmac.c   |  10 +-
 net/netfilter/ipset/ip_set_bitmap_port.c    |  10 +-
 net/netfilter/ipset/ip_set_hash_gen.h       | 147 +++++++++++++++-------------
 net/netfilter/ipset/ip_set_hash_ip.c        |  10 +-
 net/netfilter/ipset/ip_set_hash_ipport.c    |  12 +--
 net/netfilter/ipset/ip_set_hash_ipportip.c  |  12 +--
 net/netfilter/ipset/ip_set_hash_ipportnet.c |  16 +--
 net/netfilter/ipset/ip_set_hash_net.c       |  14 +--
 net/netfilter/ipset/ip_set_hash_netiface.c  |  14 +--
 net/netfilter/ipset/ip_set_hash_netport.c   |  16 +--
 net/netfilter/ipset/ip_set_list_set.c       |  10 +-
 14 files changed, 169 insertions(+), 162 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index 9ac9fbde7b61..f900f33a5f3d 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -398,4 +398,7 @@ bitmap_bytes(u32 a, u32 b)
 	{ .bytes = ULLONG_MAX, .packets = ULLONG_MAX,	\
 	  .timeout = (map)->timeout }
 
+#define IPSET_CONCAT(a, b)		a##b
+#define IPSET_TOKEN(a, b)		IPSET_CONCAT(a, b)
+
 #endif /*_IP_SET_H */
diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h
index f6af97cf8d3e..d39905e7e881 100644
--- a/net/netfilter/ipset/ip_set_bitmap_gen.h
+++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
@@ -8,31 +8,28 @@
 #ifndef __IP_SET_BITMAP_IP_GEN_H
 #define __IP_SET_BITMAP_IP_GEN_H
 
-#define CONCAT(a, b)		a##b
-#define TOKEN(a,b)		CONCAT(a, b)
-
-#define mtype_do_test		TOKEN(MTYPE, _do_test)
-#define mtype_gc_test		TOKEN(MTYPE, _gc_test)
-#define mtype_is_filled		TOKEN(MTYPE, _is_filled)
-#define mtype_do_add		TOKEN(MTYPE, _do_add)
-#define mtype_do_del		TOKEN(MTYPE, _do_del)
-#define mtype_do_list		TOKEN(MTYPE, _do_list)
-#define mtype_do_head		TOKEN(MTYPE, _do_head)
-#define mtype_adt_elem		TOKEN(MTYPE, _adt_elem)
-#define mtype_add_timeout	TOKEN(MTYPE, _add_timeout)
-#define mtype_gc_init		TOKEN(MTYPE, _gc_init)
-#define mtype_kadt		TOKEN(MTYPE, _kadt)
-#define mtype_uadt		TOKEN(MTYPE, _uadt)
-#define mtype_destroy		TOKEN(MTYPE, _destroy)
-#define mtype_flush		TOKEN(MTYPE, _flush)
-#define mtype_head		TOKEN(MTYPE, _head)
-#define mtype_same_set		TOKEN(MTYPE, _same_set)
-#define mtype_elem		TOKEN(MTYPE, _elem)
-#define mtype_test		TOKEN(MTYPE, _test)
-#define mtype_add		TOKEN(MTYPE, _add)
-#define mtype_del		TOKEN(MTYPE, _del)
-#define mtype_list		TOKEN(MTYPE, _list)
-#define mtype_gc		TOKEN(MTYPE, _gc)
+#define mtype_do_test		IPSET_TOKEN(MTYPE, _do_test)
+#define mtype_gc_test		IPSET_TOKEN(MTYPE, _gc_test)
+#define mtype_is_filled		IPSET_TOKEN(MTYPE, _is_filled)
+#define mtype_do_add		IPSET_TOKEN(MTYPE, _do_add)
+#define mtype_do_del		IPSET_TOKEN(MTYPE, _do_del)
+#define mtype_do_list		IPSET_TOKEN(MTYPE, _do_list)
+#define mtype_do_head		IPSET_TOKEN(MTYPE, _do_head)
+#define mtype_adt_elem		IPSET_TOKEN(MTYPE, _adt_elem)
+#define mtype_add_timeout	IPSET_TOKEN(MTYPE, _add_timeout)
+#define mtype_gc_init		IPSET_TOKEN(MTYPE, _gc_init)
+#define mtype_kadt		IPSET_TOKEN(MTYPE, _kadt)
+#define mtype_uadt		IPSET_TOKEN(MTYPE, _uadt)
+#define mtype_destroy		IPSET_TOKEN(MTYPE, _destroy)
+#define mtype_flush		IPSET_TOKEN(MTYPE, _flush)
+#define mtype_head		IPSET_TOKEN(MTYPE, _head)
+#define mtype_same_set		IPSET_TOKEN(MTYPE, _same_set)
+#define mtype_elem		IPSET_TOKEN(MTYPE, _elem)
+#define mtype_test		IPSET_TOKEN(MTYPE, _test)
+#define mtype_add		IPSET_TOKEN(MTYPE, _add)
+#define mtype_del		IPSET_TOKEN(MTYPE, _del)
+#define mtype_list		IPSET_TOKEN(MTYPE, _list)
+#define mtype_gc		IPSET_TOKEN(MTYPE, _gc)
 #define mtype			MTYPE
 
 #define ext_timeout(e, m)	\
diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c
index f1a8128bef01..c2f89b1c1d92 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ip.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ip.c
@@ -25,12 +25,12 @@
 #include <linux/netfilter/ipset/ip_set.h>
 #include <linux/netfilter/ipset/ip_set_bitmap.h>
 
-#define REVISION_MIN	0
-#define REVISION_MAX	1	/* Counter support added */
+#define IPSET_TYPE_REV_MIN	0
+#define IPSET_TYPE_REV_MAX	1	/* Counter support added */
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
-IP_SET_MODULE_DESC("bitmap:ip", REVISION_MIN, REVISION_MAX);
+IP_SET_MODULE_DESC("bitmap:ip", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
 MODULE_ALIAS("ip_set_bitmap:ip");
 
 #define MTYPE		bitmap_ip
@@ -401,8 +401,8 @@ static struct ip_set_type bitmap_ip_type __read_mostly = {
 	.features	= IPSET_TYPE_IP,
 	.dimension	= IPSET_DIM_ONE,
 	.family		= NFPROTO_IPV4,
-	.revision_min	= REVISION_MIN,
-	.revision_max	= REVISION_MAX,
+	.revision_min	= IPSET_TYPE_REV_MIN,
+	.revision_max	= IPSET_TYPE_REV_MAX,
 	.create		= bitmap_ip_create,
 	.create_policy	= {
 		[IPSET_ATTR_IP]		= { .type = NLA_NESTED },
diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
index 3b30e0bef890..1d6551cc590e 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
@@ -25,12 +25,12 @@
 #include <linux/netfilter/ipset/ip_set.h>
 #include <linux/netfilter/ipset/ip_set_bitmap.h>
 
-#define REVISION_MIN	0
-#define REVISION_MAX	1	/* Counter support added */
+#define IPSET_TYPE_REV_MIN	0
+#define IPSET_TYPE_REV_MAX	1	/* Counter support added */
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
-IP_SET_MODULE_DESC("bitmap:ip,mac", REVISION_MIN, REVISION_MAX);
+IP_SET_MODULE_DESC("bitmap:ip,mac", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
 MODULE_ALIAS("ip_set_bitmap:ip,mac");
 
 #define MTYPE		bitmap_ipmac
@@ -460,8 +460,8 @@ static struct ip_set_type bitmap_ipmac_type = {
 	.features	= IPSET_TYPE_IP | IPSET_TYPE_MAC,
 	.dimension	= IPSET_DIM_TWO,
 	.family		= NFPROTO_IPV4,
-	.revision_min	= REVISION_MIN,
-	.revision_max	= REVISION_MAX,
+	.revision_min	= IPSET_TYPE_REV_MIN,
+	.revision_max	= IPSET_TYPE_REV_MAX,
 	.create		= bitmap_ipmac_create,
 	.create_policy	= {
 		[IPSET_ATTR_IP]		= { .type = NLA_NESTED },
diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c
index 8207d1fda528..b22048965d2a 100644
--- a/net/netfilter/ipset/ip_set_bitmap_port.c
+++ b/net/netfilter/ipset/ip_set_bitmap_port.c
@@ -20,12 +20,12 @@
 #include <linux/netfilter/ipset/ip_set_bitmap.h>
 #include <linux/netfilter/ipset/ip_set_getport.h>
 
-#define REVISION_MIN	0
-#define REVISION_MAX	1	/* Counter support added */
+#define IPSET_TYPE_REV_MIN	0
+#define IPSET_TYPE_REV_MAX	1	/* Counter support added */
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
-IP_SET_MODULE_DESC("bitmap:port", REVISION_MIN, REVISION_MAX);
+IP_SET_MODULE_DESC("bitmap:port", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
 MODULE_ALIAS("ip_set_bitmap:port");
 
 #define MTYPE		bitmap_port
@@ -333,8 +333,8 @@ static struct ip_set_type bitmap_port_type = {
 	.features	= IPSET_TYPE_PORT,
 	.dimension	= IPSET_DIM_ONE,
 	.family		= NFPROTO_UNSPEC,
-	.revision_min	= REVISION_MIN,
-	.revision_max	= REVISION_MAX,
+	.revision_min	= IPSET_TYPE_REV_MIN,
+	.revision_max	= IPSET_TYPE_REV_MAX,
 	.create		= bitmap_port_create,
 	.create_policy	= {
 		[IPSET_ATTR_PORT]	= { .type = NLA_U16 },
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index 09a21dd5f120..68b9ccebabaa 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -17,9 +17,6 @@
 
 #define rcu_dereference_bh_nfnl(p)	rcu_dereference_bh_check(p, 1)
 
-#define CONCAT(a, b)		a##b
-#define TOKEN(a, b)		CONCAT(a, b)
-
 /* Hashing which uses arrays to resolve clashing. The hash table is resized
  * (doubled) when searching becomes too long.
  * Internally jhash is used with the assumption that the size of the
@@ -222,41 +219,41 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize)
 
 #undef HKEY
 
-#define mtype_data_equal	TOKEN(MTYPE, _data_equal)
+#define mtype_data_equal	IPSET_TOKEN(MTYPE, _data_equal)
 #ifdef IP_SET_HASH_WITH_NETS
-#define mtype_do_data_match	TOKEN(MTYPE, _do_data_match)
+#define mtype_do_data_match	IPSET_TOKEN(MTYPE, _do_data_match)
 #else
 #define mtype_do_data_match(d)	1
 #endif
-#define mtype_data_set_flags	TOKEN(MTYPE, _data_set_flags)
-#define mtype_data_reset_flags	TOKEN(MTYPE, _data_reset_flags)
-#define mtype_data_netmask	TOKEN(MTYPE, _data_netmask)
-#define mtype_data_list		TOKEN(MTYPE, _data_list)
-#define mtype_data_next		TOKEN(MTYPE, _data_next)
-#define mtype_elem		TOKEN(MTYPE, _elem)
-#define mtype_add_cidr		TOKEN(MTYPE, _add_cidr)
-#define mtype_del_cidr		TOKEN(MTYPE, _del_cidr)
-#define mtype_ahash_memsize	TOKEN(MTYPE, _ahash_memsize)
-#define mtype_flush		TOKEN(MTYPE, _flush)
-#define mtype_destroy		TOKEN(MTYPE, _destroy)
-#define mtype_gc_init		TOKEN(MTYPE, _gc_init)
-#define mtype_same_set		TOKEN(MTYPE, _same_set)
-#define mtype_kadt		TOKEN(MTYPE, _kadt)
-#define mtype_uadt		TOKEN(MTYPE, _uadt)
+#define mtype_data_set_flags	IPSET_TOKEN(MTYPE, _data_set_flags)
+#define mtype_data_reset_flags	IPSET_TOKEN(MTYPE, _data_reset_flags)
+#define mtype_data_netmask	IPSET_TOKEN(MTYPE, _data_netmask)
+#define mtype_data_list		IPSET_TOKEN(MTYPE, _data_list)
+#define mtype_data_next		IPSET_TOKEN(MTYPE, _data_next)
+#define mtype_elem		IPSET_TOKEN(MTYPE, _elem)
+#define mtype_add_cidr		IPSET_TOKEN(MTYPE, _add_cidr)
+#define mtype_del_cidr		IPSET_TOKEN(MTYPE, _del_cidr)
+#define mtype_ahash_memsize	IPSET_TOKEN(MTYPE, _ahash_memsize)
+#define mtype_flush		IPSET_TOKEN(MTYPE, _flush)
+#define mtype_destroy		IPSET_TOKEN(MTYPE, _destroy)
+#define mtype_gc_init		IPSET_TOKEN(MTYPE, _gc_init)
+#define mtype_same_set		IPSET_TOKEN(MTYPE, _same_set)
+#define mtype_kadt		IPSET_TOKEN(MTYPE, _kadt)
+#define mtype_uadt		IPSET_TOKEN(MTYPE, _uadt)
 #define mtype			MTYPE
 
-#define mtype_elem		TOKEN(MTYPE, _elem)
-#define mtype_add		TOKEN(MTYPE, _add)
-#define mtype_del		TOKEN(MTYPE, _del)
-#define mtype_test_cidrs	TOKEN(MTYPE, _test_cidrs)
-#define mtype_test		TOKEN(MTYPE, _test)
-#define mtype_expire		TOKEN(MTYPE, _expire)
-#define mtype_resize		TOKEN(MTYPE, _resize)
-#define mtype_head		TOKEN(MTYPE, _head)
-#define mtype_list		TOKEN(MTYPE, _list)
-#define mtype_gc		TOKEN(MTYPE, _gc)
-#define mtype_variant		TOKEN(MTYPE, _variant)
-#define mtype_data_match	TOKEN(MTYPE, _data_match)
+#define mtype_elem		IPSET_TOKEN(MTYPE, _elem)
+#define mtype_add		IPSET_TOKEN(MTYPE, _add)
+#define mtype_del		IPSET_TOKEN(MTYPE, _del)
+#define mtype_test_cidrs	IPSET_TOKEN(MTYPE, _test_cidrs)
+#define mtype_test		IPSET_TOKEN(MTYPE, _test)
+#define mtype_expire		IPSET_TOKEN(MTYPE, _expire)
+#define mtype_resize		IPSET_TOKEN(MTYPE, _resize)
+#define mtype_head		IPSET_TOKEN(MTYPE, _head)
+#define mtype_list		IPSET_TOKEN(MTYPE, _list)
+#define mtype_gc		IPSET_TOKEN(MTYPE, _gc)
+#define mtype_variant		IPSET_TOKEN(MTYPE, _variant)
+#define mtype_data_match	IPSET_TOKEN(MTYPE, _data_match)
 
 #ifndef HKEY_DATALEN
 #define HKEY_DATALEN		sizeof(struct mtype_elem)
@@ -941,13 +938,13 @@ nla_put_failure:
 }
 
 static int
-TOKEN(MTYPE, _kadt)(struct ip_set *set, const struct sk_buff *skb,
-	      const struct xt_action_param *par,
-	      enum ipset_adt adt, struct ip_set_adt_opt *opt);
+IPSET_TOKEN(MTYPE, _kadt)(struct ip_set *set, const struct sk_buff *skb,
+	    const struct xt_action_param *par,
+	    enum ipset_adt adt, struct ip_set_adt_opt *opt);
 
 static int
-TOKEN(MTYPE, _uadt)(struct ip_set *set, struct nlattr *tb[],
-	      enum ipset_adt adt, u32 *lineno, u32 flags, bool retried);
+IPSET_TOKEN(MTYPE, _uadt)(struct ip_set *set, struct nlattr *tb[],
+	    enum ipset_adt adt, u32 *lineno, u32 flags, bool retried);
 
 static const struct ip_set_type_variant mtype_variant = {
 	.kadt	= mtype_kadt,
@@ -967,7 +964,7 @@ static const struct ip_set_type_variant mtype_variant = {
 
 #ifdef IP_SET_EMIT_CREATE
 static int
-TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags)
+IPSET_TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags)
 {
 	u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM;
 	u32 cadt_flags = 0;
@@ -1045,9 +1042,9 @@ TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags)
 
 	set->data = h;
 	if (set->family ==  NFPROTO_IPV4)
-		set->variant = &TOKEN(HTYPE, 4_variant);
+		set->variant = &IPSET_TOKEN(HTYPE, 4_variant);
 	else
-		set->variant = &TOKEN(HTYPE, 6_variant);
+		set->variant = &IPSET_TOKEN(HTYPE, 6_variant);
 
 	if (tb[IPSET_ATTR_CADT_FLAGS])
 		cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
@@ -1058,64 +1055,74 @@ TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags)
 				ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
 			set->extensions |= IPSET_EXT_TIMEOUT;
 			if (set->family == NFPROTO_IPV4) {
-				h->dsize =
-					sizeof(struct TOKEN(HTYPE, 4ct_elem));
+				h->dsize = sizeof(struct
+					IPSET_TOKEN(HTYPE, 4ct_elem));
 				h->offset[IPSET_OFFSET_TIMEOUT] =
-					offsetof(struct TOKEN(HTYPE, 4ct_elem),
-						 timeout);
+					offsetof(struct
+						IPSET_TOKEN(HTYPE, 4ct_elem),
+						timeout);
 				h->offset[IPSET_OFFSET_COUNTER] =
-					offsetof(struct TOKEN(HTYPE, 4ct_elem),
-						 counter);
-				TOKEN(HTYPE, 4_gc_init)(set,
-					TOKEN(HTYPE, 4_gc));
+					offsetof(struct
+						IPSET_TOKEN(HTYPE, 4ct_elem),
+						counter);
+				IPSET_TOKEN(HTYPE, 4_gc_init)(set,
+					IPSET_TOKEN(HTYPE, 4_gc));
 			} else {
-				h->dsize =
-					sizeof(struct TOKEN(HTYPE, 6ct_elem));
+				h->dsize = sizeof(struct
+					IPSET_TOKEN(HTYPE, 6ct_elem));
 				h->offset[IPSET_OFFSET_TIMEOUT] =
-					offsetof(struct TOKEN(HTYPE, 6ct_elem),
-						 timeout);
+					offsetof(struct
+						IPSET_TOKEN(HTYPE, 6ct_elem),
+						timeout);
 				h->offset[IPSET_OFFSET_COUNTER] =
-					offsetof(struct TOKEN(HTYPE, 6ct_elem),
-						 counter);
-				TOKEN(HTYPE, 6_gc_init)(set,
-					TOKEN(HTYPE, 6_gc));
+					offsetof(struct
+						IPSET_TOKEN(HTYPE, 6ct_elem),
+						counter);
+				IPSET_TOKEN(HTYPE, 6_gc_init)(set,
+					IPSET_TOKEN(HTYPE, 6_gc));
 			}
 		} else {
 			if (set->family == NFPROTO_IPV4) {
 				h->dsize =
-					sizeof(struct TOKEN(HTYPE, 4c_elem));
+					sizeof(struct
+						IPSET_TOKEN(HTYPE, 4c_elem));
 				h->offset[IPSET_OFFSET_COUNTER] =
-					offsetof(struct TOKEN(HTYPE, 4c_elem),
-						 counter);
+					offsetof(struct
+						IPSET_TOKEN(HTYPE, 4c_elem),
+						counter);
 			} else {
 				h->dsize =
-					sizeof(struct TOKEN(HTYPE, 6c_elem));
+					sizeof(struct
+						IPSET_TOKEN(HTYPE, 6c_elem));
 				h->offset[IPSET_OFFSET_COUNTER] =
-					offsetof(struct TOKEN(HTYPE, 6c_elem),
-						 counter);
+					offsetof(struct
+						IPSET_TOKEN(HTYPE, 6c_elem),
+						counter);
 			}
 		}
 	} else if (tb[IPSET_ATTR_TIMEOUT]) {
 		h->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
 		set->extensions |= IPSET_EXT_TIMEOUT;
 		if (set->family == NFPROTO_IPV4) {
-			h->dsize = sizeof(struct TOKEN(HTYPE, 4t_elem));
+			h->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 4t_elem));
 			h->offset[IPSET_OFFSET_TIMEOUT] =
-				offsetof(struct TOKEN(HTYPE, 4t_elem),
+				offsetof(struct IPSET_TOKEN(HTYPE, 4t_elem),
 					 timeout);
-			TOKEN(HTYPE, 4_gc_init)(set, TOKEN(HTYPE, 4_gc));
+			IPSET_TOKEN(HTYPE, 4_gc_init)(set,
+				IPSET_TOKEN(HTYPE, 4_gc));
 		} else {
-			h->dsize = sizeof(struct TOKEN(HTYPE, 6t_elem));
+			h->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 6t_elem));
 			h->offset[IPSET_OFFSET_TIMEOUT] =
-				offsetof(struct TOKEN(HTYPE, 6t_elem),
+				offsetof(struct IPSET_TOKEN(HTYPE, 6t_elem),
 					 timeout);
-			TOKEN(HTYPE, 6_gc_init)(set, TOKEN(HTYPE, 6_gc));
+			IPSET_TOKEN(HTYPE, 6_gc_init)(set,
+				IPSET_TOKEN(HTYPE, 6_gc));
 		}
 	} else {
 		if (set->family == NFPROTO_IPV4)
-			h->dsize = sizeof(struct TOKEN(HTYPE, 4_elem));
+			h->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 4_elem));
 		else
-			h->dsize = sizeof(struct TOKEN(HTYPE, 6_elem));
+			h->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 6_elem));
 	}
 
 	pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n",
diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c
index c74e6e14cd93..de44fca687c3 100644
--- a/net/netfilter/ipset/ip_set_hash_ip.c
+++ b/net/netfilter/ipset/ip_set_hash_ip.c
@@ -23,12 +23,12 @@
 #include <linux/netfilter/ipset/ip_set.h>
 #include <linux/netfilter/ipset/ip_set_hash.h>
 
-#define REVISION_MIN	0
-#define REVISION_MAX	1	/* Counters support */
+#define IPSET_TYPE_REV_MIN	0
+#define IPSET_TYPE_REV_MAX	1	/* Counters support */
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
-IP_SET_MODULE_DESC("hash:ip", REVISION_MIN, REVISION_MAX);
+IP_SET_MODULE_DESC("hash:ip", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
 MODULE_ALIAS("ip_set_hash:ip");
 
 /* Type specific function prefix */
@@ -304,8 +304,8 @@ static struct ip_set_type hash_ip_type __read_mostly = {
 	.features	= IPSET_TYPE_IP,
 	.dimension	= IPSET_DIM_ONE,
 	.family		= NFPROTO_UNSPEC,
-	.revision_min	= REVISION_MIN,
-	.revision_max	= REVISION_MAX,
+	.revision_min	= IPSET_TYPE_REV_MIN,
+	.revision_max	= IPSET_TYPE_REV_MAX,
 	.create		= hash_ip_create,
 	.create_policy	= {
 		[IPSET_ATTR_HASHSIZE]	= { .type = NLA_U32 },
diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c
index 7a2d2bd98d04..b514ff4e83ae 100644
--- a/net/netfilter/ipset/ip_set_hash_ipport.c
+++ b/net/netfilter/ipset/ip_set_hash_ipport.c
@@ -24,13 +24,13 @@
 #include <linux/netfilter/ipset/ip_set_getport.h>
 #include <linux/netfilter/ipset/ip_set_hash.h>
 
-#define REVISION_MIN	0
-/*			1    SCTP and UDPLITE support added */
-#define REVISION_MAX	2 /* Counters support added */
+#define IPSET_TYPE_REV_MIN	0
+/*				1    SCTP and UDPLITE support added */
+#define IPSET_TYPE_REV_MAX	2 /* Counters support added */
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
-IP_SET_MODULE_DESC("hash:ip,port", REVISION_MIN, REVISION_MAX);
+IP_SET_MODULE_DESC("hash:ip,port", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
 MODULE_ALIAS("ip_set_hash:ip,port");
 
 /* Type specific function prefix */
@@ -396,8 +396,8 @@ static struct ip_set_type hash_ipport_type __read_mostly = {
 	.features	= IPSET_TYPE_IP | IPSET_TYPE_PORT,
 	.dimension	= IPSET_DIM_TWO,
 	.family		= NFPROTO_UNSPEC,
-	.revision_min	= REVISION_MIN,
-	.revision_max	= REVISION_MAX,
+	.revision_min	= IPSET_TYPE_REV_MIN,
+	.revision_max	= IPSET_TYPE_REV_MAX,
 	.create		= hash_ipport_create,
 	.create_policy	= {
 		[IPSET_ATTR_HASHSIZE]	= { .type = NLA_U32 },
diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c
index 34e8a1acce42..d05070d74604 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportip.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportip.c
@@ -24,13 +24,13 @@
 #include <linux/netfilter/ipset/ip_set_getport.h>
 #include <linux/netfilter/ipset/ip_set_hash.h>
 
-#define REVISION_MIN	0
-/*			1    SCTP and UDPLITE support added */
-#define REVISION_MAX	2 /* Counters support added */
+#define IPSET_TYPE_REV_MIN	0
+/*				1    SCTP and UDPLITE support added */
+#define IPSET_TYPE_REV_MAX	2 /* Counters support added */
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
-IP_SET_MODULE_DESC("hash:ip,port,ip", REVISION_MIN, REVISION_MAX);
+IP_SET_MODULE_DESC("hash:ip,port,ip", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
 MODULE_ALIAS("ip_set_hash:ip,port,ip");
 
 /* Type specific function prefix */
@@ -414,8 +414,8 @@ static struct ip_set_type hash_ipportip_type __read_mostly = {
 	.features	= IPSET_TYPE_IP | IPSET_TYPE_PORT | IPSET_TYPE_IP2,
 	.dimension	= IPSET_DIM_THREE,
 	.family		= NFPROTO_UNSPEC,
-	.revision_min	= REVISION_MIN,
-	.revision_max	= REVISION_MAX,
+	.revision_min	= IPSET_TYPE_REV_MIN,
+	.revision_max	= IPSET_TYPE_REV_MAX,
 	.create		= hash_ipportip_create,
 	.create_policy	= {
 		[IPSET_ATTR_HASHSIZE]	= { .type = NLA_U32 },
diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c
index 9a80d8bc9f18..7d1dede4ab6d 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c
@@ -24,15 +24,15 @@
 #include <linux/netfilter/ipset/ip_set_getport.h>
 #include <linux/netfilter/ipset/ip_set_hash.h>
 
-#define REVISION_MIN	0
-/*			1    SCTP and UDPLITE support added */
-/*			2    Range as input support for IPv4 added */
-/*			3    nomatch flag support added */
-#define REVISION_MAX	4 /* Counters support added */
+#define IPSET_TYPE_REV_MIN	0
+/*				1    SCTP and UDPLITE support added */
+/*				2    Range as input support for IPv4 added */
+/*				3    nomatch flag support added */
+#define IPSET_TYPE_REV_MAX	4 /* Counters support added */
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
-IP_SET_MODULE_DESC("hash:ip,port,net", REVISION_MIN, REVISION_MAX);
+IP_SET_MODULE_DESC("hash:ip,port,net", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
 MODULE_ALIAS("ip_set_hash:ip,port,net");
 
 /* Type specific function prefix */
@@ -574,8 +574,8 @@ static struct ip_set_type hash_ipportnet_type __read_mostly = {
 			  IPSET_TYPE_NOMATCH,
 	.dimension	= IPSET_DIM_THREE,
 	.family		= NFPROTO_UNSPEC,
-	.revision_min	= REVISION_MIN,
-	.revision_max	= REVISION_MAX,
+	.revision_min	= IPSET_TYPE_REV_MIN,
+	.revision_max	= IPSET_TYPE_REV_MAX,
 	.create		= hash_ipportnet_create,
 	.create_policy	= {
 		[IPSET_ATTR_HASHSIZE]	= { .type = NLA_U32 },
diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c
index 223e9f546d0f..9cb9ef43d941 100644
--- a/net/netfilter/ipset/ip_set_hash_net.c
+++ b/net/netfilter/ipset/ip_set_hash_net.c
@@ -22,14 +22,14 @@
 #include <linux/netfilter/ipset/ip_set.h>
 #include <linux/netfilter/ipset/ip_set_hash.h>
 
-#define REVISION_MIN	0
-/*			1    Range as input support for IPv4 added */
-/*			2    nomatch flag support added */
-#define REVISION_MAX	3 /* Counters support added */
+#define IPSET_TYPE_REV_MIN	0
+/*				1    Range as input support for IPv4 added */
+/*				2    nomatch flag support added */
+#define IPSET_TYPE_REV_MAX	3 /* Counters support added */
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
-IP_SET_MODULE_DESC("hash:net", REVISION_MIN, REVISION_MAX);
+IP_SET_MODULE_DESC("hash:net", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
 MODULE_ALIAS("ip_set_hash:net");
 
 /* Type specific function prefix */
@@ -406,8 +406,8 @@ static struct ip_set_type hash_net_type __read_mostly = {
 	.features	= IPSET_TYPE_IP | IPSET_TYPE_NOMATCH,
 	.dimension	= IPSET_DIM_ONE,
 	.family		= NFPROTO_UNSPEC,
-	.revision_min	= REVISION_MIN,
-	.revision_max	= REVISION_MAX,
+	.revision_min	= IPSET_TYPE_REV_MIN,
+	.revision_max	= IPSET_TYPE_REV_MAX,
 	.create		= hash_net_create,
 	.create_policy	= {
 		[IPSET_ATTR_HASHSIZE]	= { .type = NLA_U32 },
diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c
index 7d798d5d5cd3..2310fc29e89e 100644
--- a/net/netfilter/ipset/ip_set_hash_netiface.c
+++ b/net/netfilter/ipset/ip_set_hash_netiface.c
@@ -23,14 +23,14 @@
 #include <linux/netfilter/ipset/ip_set.h>
 #include <linux/netfilter/ipset/ip_set_hash.h>
 
-#define REVISION_MIN	0
-/*			1    nomatch flag support added */
-/*			2    /0 support added */
-#define REVISION_MAX	3 /* Counters support added */
+#define IPSET_TYPE_REV_MIN	0
+/*				1    nomatch flag support added */
+/*				2    /0 support added */
+#define IPSET_TYPE_REV_MAX	3 /* Counters support added */
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
-IP_SET_MODULE_DESC("hash:net,iface", REVISION_MIN, REVISION_MAX);
+IP_SET_MODULE_DESC("hash:net,iface", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
 MODULE_ALIAS("ip_set_hash:net,iface");
 
 /* Interface name rbtree */
@@ -645,8 +645,8 @@ static struct ip_set_type hash_netiface_type __read_mostly = {
 			  IPSET_TYPE_NOMATCH,
 	.dimension	= IPSET_DIM_TWO,
 	.family		= NFPROTO_UNSPEC,
-	.revision_min	= REVISION_MIN,
-	.revision_max	= REVISION_MAX,
+	.revision_min	= IPSET_TYPE_REV_MIN,
+	.revision_max	= IPSET_TYPE_REV_MAX,
 	.create		= hash_netiface_create,
 	.create_policy	= {
 		[IPSET_ATTR_HASHSIZE]	= { .type = NLA_U32 },
diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c
index 09d6690bee6f..1601d489fdbc 100644
--- a/net/netfilter/ipset/ip_set_hash_netport.c
+++ b/net/netfilter/ipset/ip_set_hash_netport.c
@@ -23,15 +23,15 @@
 #include <linux/netfilter/ipset/ip_set_getport.h>
 #include <linux/netfilter/ipset/ip_set_hash.h>
 
-#define REVISION_MIN	0
-/*			1    SCTP and UDPLITE support added */
-/*			2    Range as input support for IPv4 added */
-/*			3    nomatch flag support added */
-#define REVISION_MAX	4 /* Counters support added */
+#define IPSET_TYPE_REV_MIN	0
+/*				1    SCTP and UDPLITE support added */
+/*				2    Range as input support for IPv4 added */
+/*				3    nomatch flag support added */
+#define IPSET_TYPE_REV_MAX	4 /* Counters support added */
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
-IP_SET_MODULE_DESC("hash:net,port", REVISION_MIN, REVISION_MAX);
+IP_SET_MODULE_DESC("hash:net,port", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
 MODULE_ALIAS("ip_set_hash:net,port");
 
 /* Type specific function prefix */
@@ -518,8 +518,8 @@ static struct ip_set_type hash_netport_type __read_mostly = {
 	.features	= IPSET_TYPE_IP | IPSET_TYPE_PORT | IPSET_TYPE_NOMATCH,
 	.dimension	= IPSET_DIM_TWO,
 	.family		= NFPROTO_UNSPEC,
-	.revision_min	= REVISION_MIN,
-	.revision_max	= REVISION_MAX,
+	.revision_min	= IPSET_TYPE_REV_MIN,
+	.revision_max	= IPSET_TYPE_REV_MAX,
 	.create		= hash_netport_create,
 	.create_policy	= {
 		[IPSET_ATTR_HASHSIZE]	= { .type = NLA_U32 },
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index 68299ee15847..a9e301f6e093 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -15,12 +15,12 @@
 #include <linux/netfilter/ipset/ip_set.h>
 #include <linux/netfilter/ipset/ip_set_list.h>
 
-#define REVISION_MIN	0
-#define REVISION_MAX	1 /* Counters support added */
+#define IPSET_TYPE_REV_MIN	0
+#define IPSET_TYPE_REV_MAX	1 /* Counters support added */
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
-IP_SET_MODULE_DESC("list:set", REVISION_MIN, REVISION_MAX);
+IP_SET_MODULE_DESC("list:set", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
 MODULE_ALIAS("ip_set_list:set");
 
 /* Member elements  */
@@ -703,8 +703,8 @@ static struct ip_set_type list_set_type __read_mostly = {
 	.features	= IPSET_TYPE_NAME | IPSET_DUMP_LAST,
 	.dimension	= IPSET_DIM_ONE,
 	.family		= NFPROTO_UNSPEC,
-	.revision_min	= REVISION_MIN,
-	.revision_max	= REVISION_MAX,
+	.revision_min	= IPSET_TYPE_REV_MIN,
+	.revision_max	= IPSET_TYPE_REV_MAX,
 	.create		= list_set_create,
 	.create_policy	= {
 		[IPSET_ATTR_SIZE]	= { .type = NLA_U32 },
-- 
cgit v1.2.3


From b8cd97865c903e032db85e5a4f2783928c56f2bd Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Thu, 2 May 2013 10:52:27 +0200
Subject: netfilter: ipset: Use fix sized type for timeout in the extension
 part

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
---
 include/linux/netfilter/ipset/ip_set.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index f900f33a5f3d..69aa60487f05 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -67,7 +67,7 @@ enum ip_set_offset {
 #define SET_WITH_COUNTER(s)	((s)->extensions & IPSET_EXT_COUNTER)
 
 struct ip_set_ext {
-	unsigned long timeout;
+	u32 timeout;
 	u64 packets;
 	u64 bytes;
 };
-- 
cgit v1.2.3


From a04d8b6bd9113f3e7f0c216dcaa3c1ad498f2a96 Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Mon, 30 Sep 2013 09:05:54 +0200
Subject: netfilter: ipset: Prepare ipset to support multiple networks for hash
 types

In order to support hash:net,net, hash:net,port,net etc. types,
arrays are introduced for the book-keeping of existing cidr sizes
and network numbers in a set.

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
---
 include/linux/netfilter/ipset/ip_set.h      |  2 +
 net/netfilter/ipset/ip_set_hash_gen.h       | 78 +++++++++++++++--------------
 net/netfilter/ipset/ip_set_hash_ipportnet.c |  4 +-
 net/netfilter/ipset/ip_set_hash_net.c       |  4 +-
 net/netfilter/ipset/ip_set_hash_netiface.c  |  4 +-
 net/netfilter/ipset/ip_set_hash_netport.c   |  4 +-
 6 files changed, 50 insertions(+), 46 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index 69aa60487f05..56012a3431b2 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -398,6 +398,8 @@ bitmap_bytes(u32 a, u32 b)
 	{ .bytes = ULLONG_MAX, .packets = ULLONG_MAX,	\
 	  .timeout = (map)->timeout }
 
+#define IP_SET_INIT_CIDR(a, b) ((a) ? (a) : (b))
+
 #define IPSET_CONCAT(a, b)		a##b
 #define IPSET_TOKEN(a, b)		IPSET_CONCAT(a, b)
 
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index 68b9ccebabaa..a8332404d9b4 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -77,10 +77,14 @@ struct htable {
 
 #define hbucket(h, i)		(&((h)->bucket[i]))
 
+#ifndef IPSET_NET_COUNT
+#define IPSET_NET_COUNT		1
+#endif
+
 /* Book-keeping of the prefixes added to the set */
 struct net_prefixes {
-	u8 cidr;		/* the different cidr values in the set */
-	u32 nets;		/* number of elements per cidr */
+	u32 nets[IPSET_NET_COUNT]; /* number of elements per cidr */
+	u8 cidr[IPSET_NET_COUNT];  /* the different cidr values in the set */
 };
 
 /* Compute the hash table size */
@@ -165,13 +169,13 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize)
 #define SET_HOST_MASK(family)	(family == AF_INET ? 32 : 128)
 
 #ifdef IP_SET_HASH_WITH_MULTI
-#define NETS_LENGTH(family)	(SET_HOST_MASK(family) + 1)
+#define NLEN(family)		(SET_HOST_MASK(family) + 1)
 #else
-#define NETS_LENGTH(family)	SET_HOST_MASK(family)
+#define NLEN(family)		SET_HOST_MASK(family)
 #endif
 
 #else
-#define NETS_LENGTH(family)	0
+#define NLEN(family)		0
 #endif /* IP_SET_HASH_WITH_NETS */
 
 #define ext_timeout(e, h)	\
@@ -296,49 +300,49 @@ struct htype {
 /* Network cidr size book keeping when the hash stores different
  * sized networks */
 static void
-mtype_add_cidr(struct htype *h, u8 cidr, u8 nets_length)
+mtype_add_cidr(struct htype *h, u8 cidr, u8 nets_length, u8 n)
 {
 	int i, j;
 
 	/* Add in increasing prefix order, so larger cidr first */
-	for (i = 0, j = -1; i < nets_length && h->nets[i].nets; i++) {
+	for (i = 0, j = -1; i < nets_length && h->nets[i].nets[n]; i++) {
 		if (j != -1)
 			continue;
-		else if (h->nets[i].cidr < cidr)
+		else if (h->nets[i].cidr[n] < cidr)
 			j = i;
-		else if (h->nets[i].cidr == cidr) {
-			h->nets[i].nets++;
+		else if (h->nets[i].cidr[n] == cidr) {
+			h->nets[i].nets[n]++;
 			return;
 		}
 	}
 	if (j != -1) {
 		for (; i > j; i--) {
-			h->nets[i].cidr = h->nets[i - 1].cidr;
-			h->nets[i].nets = h->nets[i - 1].nets;
+			h->nets[i].cidr[n] = h->nets[i - 1].cidr[n];
+			h->nets[i].nets[n] = h->nets[i - 1].nets[n];
 		}
 	}
-	h->nets[i].cidr = cidr;
-	h->nets[i].nets = 1;
+	h->nets[i].cidr[n] = cidr;
+	h->nets[i].nets[n] = 1;
 }
 
 static void
-mtype_del_cidr(struct htype *h, u8 cidr, u8 nets_length)
+mtype_del_cidr(struct htype *h, u8 cidr, u8 nets_length, u8 n)
 {
 	u8 i, j, net_end = nets_length - 1;
 
 	for (i = 0; i < nets_length; i++) {
-	        if (h->nets[i].cidr != cidr)
+	        if (h->nets[i].cidr[n] != cidr)
 	                continue;
-                if (h->nets[i].nets > 1 || i == net_end ||
-                    h->nets[i + 1].nets == 0) {
-                        h->nets[i].nets--;
+                if (h->nets[i].nets[n] > 1 || i == net_end ||
+                    h->nets[i + 1].nets[n] == 0) {
+                        h->nets[i].nets[n]--;
                         return;
                 }
-                for (j = i; j < net_end && h->nets[j].nets; j++) {
-		        h->nets[j].cidr = h->nets[j + 1].cidr;
-		        h->nets[j].nets = h->nets[j + 1].nets;
+                for (j = i; j < net_end && h->nets[j].nets[n]; j++) {
+		        h->nets[j].cidr[n] = h->nets[j + 1].cidr[n];
+		        h->nets[j].nets[n] = h->nets[j + 1].nets[n];
                 }
-                h->nets[j].nets = 0;
+                h->nets[j].nets[n] = 0;
                 return;
 	}
 }
@@ -382,8 +386,7 @@ mtype_flush(struct ip_set *set)
 		}
 	}
 #ifdef IP_SET_HASH_WITH_NETS
-	memset(h->nets, 0, sizeof(struct net_prefixes)
-			   * NETS_LENGTH(set->family));
+	memset(h->nets, 0, sizeof(struct net_prefixes) * NLEN(set->family));
 #endif
 	h->elements = 0;
 }
@@ -459,7 +462,7 @@ mtype_expire(struct htype *h, u8 nets_length, size_t dsize)
 				pr_debug("expired %u/%u\n", i, j);
 #ifdef IP_SET_HASH_WITH_NETS
 				mtype_del_cidr(h, CIDR(data->cidr),
-					       nets_length);
+					       nets_length, 0);
 #endif
 				if (j != n->pos - 1)
 					/* Not last one */
@@ -494,7 +497,7 @@ mtype_gc(unsigned long ul_set)
 
 	pr_debug("called\n");
 	write_lock_bh(&set->lock);
-	mtype_expire(h, NETS_LENGTH(set->family), h->dsize);
+	mtype_expire(h, NLEN(set->family), h->dsize);
 	write_unlock_bh(&set->lock);
 
 	h->gc.expires = jiffies + IPSET_GC_PERIOD(h->timeout) * HZ;
@@ -523,8 +526,7 @@ mtype_resize(struct ip_set *set, bool retried)
 	if (SET_WITH_TIMEOUT(set) && !retried) {
 		i = h->elements;
 		write_lock_bh(&set->lock);
-		mtype_expire(set->data, NETS_LENGTH(set->family),
-			     h->dsize);
+		mtype_expire(set->data, NLEN(set->family), h->dsize);
 		write_unlock_bh(&set->lock);
 		if (h->elements < i)
 			return 0;
@@ -607,7 +609,7 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 
 	if (SET_WITH_TIMEOUT(set) && h->elements >= h->maxelem)
 		/* FIXME: when set is full, we slow down here */
-		mtype_expire(h, NETS_LENGTH(set->family), h->dsize);
+		mtype_expire(h, NLEN(set->family), h->dsize);
 
 	if (h->elements >= h->maxelem) {
 		if (net_ratelimit())
@@ -645,8 +647,8 @@ reuse_slot:
 		/* Fill out reused slot */
 		data = ahash_data(n, j, h->dsize);
 #ifdef IP_SET_HASH_WITH_NETS
-		mtype_del_cidr(h, CIDR(data->cidr), NETS_LENGTH(set->family));
-		mtype_add_cidr(h, CIDR(d->cidr), NETS_LENGTH(set->family));
+		mtype_del_cidr(h, CIDR(data->cidr), NLEN(set->family), 0);
+		mtype_add_cidr(h, CIDR(d->cidr), NLEN(set->family), 0);
 #endif
 	} else {
 		/* Use/create a new slot */
@@ -659,7 +661,7 @@ reuse_slot:
 		}
 		data = ahash_data(n, n->pos++, h->dsize);
 #ifdef IP_SET_HASH_WITH_NETS
-		mtype_add_cidr(h, CIDR(d->cidr), NETS_LENGTH(set->family));
+		mtype_add_cidr(h, CIDR(d->cidr), NLEN(set->family), 0);
 #endif
 		h->elements++;
 	}
@@ -711,7 +713,7 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 		n->pos--;
 		h->elements--;
 #ifdef IP_SET_HASH_WITH_NETS
-		mtype_del_cidr(h, CIDR(d->cidr), NETS_LENGTH(set->family));
+		mtype_del_cidr(h, CIDR(d->cidr), NLEN(set->family), 0);
 #endif
 		if (n->pos + AHASH_INIT_SIZE < n->size) {
 			void *tmp = kzalloc((n->size - AHASH_INIT_SIZE)
@@ -760,11 +762,11 @@ mtype_test_cidrs(struct ip_set *set, struct mtype_elem *d,
 	struct mtype_elem *data;
 	int i, j = 0;
 	u32 key, multi = 0;
-	u8 nets_length = NETS_LENGTH(set->family);
+	u8 nets_length = NLEN(set->family);
 
 	pr_debug("test by nets\n");
-	for (; j < nets_length && h->nets[j].nets && !multi; j++) {
-		mtype_data_netmask(d, h->nets[j].cidr);
+	for (; j < nets_length && h->nets[j].nets[0] && !multi; j++) {
+		mtype_data_netmask(d, h->nets[j].cidr[0]);
 		key = HKEY(d, h->initval, t->htable_bits);
 		n = hbucket(t, key);
 		for (i = 0; i < n->pos; i++) {
@@ -839,7 +841,7 @@ mtype_head(struct ip_set *set, struct sk_buff *skb)
 	size_t memsize;
 
 	t = rcu_dereference_bh_nfnl(h->table);
-	memsize = mtype_ahash_memsize(h, t, NETS_LENGTH(set->family));
+	memsize = mtype_ahash_memsize(h, t, NLEN(set->family));
 
 	nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
 	if (!nested)
diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c
index f111558c4597..6ce5a8e2c44e 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c
@@ -170,7 +170,7 @@ hash_ipportnet4_kadt(struct ip_set *set, const struct sk_buff *skb,
 	const struct hash_ipportnet *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_ipportnet4_elem e = {
-		.cidr = h->nets[0].cidr ? h->nets[0].cidr - 1 : HOST_MASK - 1
+		.cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK) - 1,
 	};
 	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
 
@@ -454,7 +454,7 @@ hash_ipportnet6_kadt(struct ip_set *set, const struct sk_buff *skb,
 	const struct hash_ipportnet *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_ipportnet6_elem e = {
-		.cidr = h->nets[0].cidr ? h->nets[0].cidr - 1 : HOST_MASK - 1
+		.cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK) - 1,
 	};
 	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
 
diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c
index 0a64dad156d9..ec1c7dc10489 100644
--- a/net/netfilter/ipset/ip_set_hash_net.c
+++ b/net/netfilter/ipset/ip_set_hash_net.c
@@ -143,7 +143,7 @@ hash_net4_kadt(struct ip_set *set, const struct sk_buff *skb,
 	const struct hash_net *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_net4_elem e = {
-		.cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK
+		.cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
 	};
 	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
 
@@ -338,7 +338,7 @@ hash_net6_kadt(struct ip_set *set, const struct sk_buff *skb,
 	const struct hash_net *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_net6_elem e = {
-		.cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK
+		.cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
 	};
 	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
 
diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c
index 846371b08630..814b4e31a7f8 100644
--- a/net/netfilter/ipset/ip_set_hash_netiface.c
+++ b/net/netfilter/ipset/ip_set_hash_netiface.c
@@ -265,7 +265,7 @@ hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb,
 	struct hash_netiface *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_netiface4_elem e = {
-		.cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK,
+		.cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
 		.elem = 1,
 	};
 	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
@@ -534,7 +534,7 @@ hash_netiface6_kadt(struct ip_set *set, const struct sk_buff *skb,
 	struct hash_netiface *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_netiface6_elem e = {
-		.cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK,
+		.cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
 		.elem = 1,
 	};
 	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c
index d98a685cd916..3bd923d3f179 100644
--- a/net/netfilter/ipset/ip_set_hash_netport.c
+++ b/net/netfilter/ipset/ip_set_hash_netport.c
@@ -162,7 +162,7 @@ hash_netport4_kadt(struct ip_set *set, const struct sk_buff *skb,
 	const struct hash_netport *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_netport4_elem e = {
-		.cidr = h->nets[0].cidr ? h->nets[0].cidr - 1 : HOST_MASK - 1
+		.cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK) - 1,
 	};
 	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
 
@@ -407,7 +407,7 @@ hash_netport6_kadt(struct ip_set *set, const struct sk_buff *skb,
 	const struct hash_netport *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_netport6_elem e = {
-		.cidr = h->nets[0].cidr ? h->nets[0].cidr - 1 : HOST_MASK - 1,
+		.cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK) - 1,
 	};
 	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
 
-- 
cgit v1.2.3


From f925f7056920213889c5e61445f9529f1a86ae41 Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Fri, 6 Sep 2013 22:31:40 +0200
Subject: netfilter: ipset: Rename extension offset ids to extension ids

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
---
 include/linux/netfilter/ipset/ip_set.h    | 16 ++++++++--------
 net/netfilter/ipset/ip_set_bitmap_gen.h   |  4 ++--
 net/netfilter/ipset/ip_set_bitmap_ip.c    | 10 +++++-----
 net/netfilter/ipset/ip_set_bitmap_ipmac.c | 10 +++++-----
 net/netfilter/ipset/ip_set_bitmap_port.c  | 10 +++++-----
 net/netfilter/ipset/ip_set_hash_gen.h     | 22 +++++++++++-----------
 net/netfilter/ipset/ip_set_list_set.c     | 14 +++++++-------
 7 files changed, 43 insertions(+), 43 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index 56012a3431b2..b4db7912bf0d 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -56,20 +56,20 @@ enum ip_set_extension {
 	IPSET_EXT_COUNTER = (1 << IPSET_EXT_BIT_COUNTER),
 };
 
-/* Extension offsets */
-enum ip_set_offset {
-	IPSET_OFFSET_TIMEOUT = 0,
-	IPSET_OFFSET_COUNTER,
-	IPSET_OFFSET_MAX,
-};
-
 #define SET_WITH_TIMEOUT(s)	((s)->extensions & IPSET_EXT_TIMEOUT)
 #define SET_WITH_COUNTER(s)	((s)->extensions & IPSET_EXT_COUNTER)
 
+/* Extension id, in size order */
+enum ip_set_ext_id {
+	IPSET_EXT_ID_COUNTER = 0,
+	IPSET_EXT_ID_TIMEOUT,
+	IPSET_EXT_ID_MAX,
+};
+
 struct ip_set_ext {
-	u32 timeout;
 	u64 packets;
 	u64 bytes;
+	u32 timeout;
 };
 
 struct ip_set;
diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h
index d39905e7e881..889a929b908f 100644
--- a/net/netfilter/ipset/ip_set_bitmap_gen.h
+++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
@@ -33,9 +33,9 @@
 #define mtype			MTYPE
 
 #define ext_timeout(e, m)	\
-	(unsigned long *)((e) + (m)->offset[IPSET_OFFSET_TIMEOUT])
+	(unsigned long *)((e) + (m)->offset[IPSET_EXT_ID_TIMEOUT])
 #define ext_counter(e, m)	\
-	(struct ip_set_counter *)((e) + (m)->offset[IPSET_OFFSET_COUNTER])
+	(struct ip_set_counter *)((e) + (m)->offset[IPSET_EXT_ID_COUNTER])
 #define get_ext(map, id)	((map)->extensions + (map)->dsize * (id))
 
 static void
diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c
index ce99d2652d47..2ee210ef49a2 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ip.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ip.c
@@ -45,7 +45,7 @@ struct bitmap_ip {
 	u32 hosts;		/* number of hosts in a subnet */
 	size_t memsize;		/* members size */
 	size_t dsize;		/* extensions struct size */
-	size_t offset[IPSET_OFFSET_MAX]; /* Offsets to extensions */
+	size_t offset[IPSET_EXT_ID_MAX]; /* Offsets to extensions */
 	u8 netmask;		/* subnet netmask */
 	u32 timeout;		/* timeout parameter */
 	struct timer_list gc;	/* garbage collection */
@@ -342,9 +342,9 @@ bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
 		set->extensions |= IPSET_EXT_COUNTER;
 		if (tb[IPSET_ATTR_TIMEOUT]) {
 			map->dsize = sizeof(struct bitmap_ipct_elem);
-			map->offset[IPSET_OFFSET_TIMEOUT] =
+			map->offset[IPSET_EXT_ID_TIMEOUT] =
 				offsetof(struct bitmap_ipct_elem, timeout);
-			map->offset[IPSET_OFFSET_COUNTER] =
+			map->offset[IPSET_EXT_ID_COUNTER] =
 				offsetof(struct bitmap_ipct_elem, counter);
 
 			if (!init_map_ip(set, map, first_ip, last_ip,
@@ -360,7 +360,7 @@ bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
 			bitmap_ip_gc_init(set, bitmap_ip_gc);
 		} else {
 			map->dsize = sizeof(struct bitmap_ipc_elem);
-			map->offset[IPSET_OFFSET_COUNTER] =
+			map->offset[IPSET_EXT_ID_COUNTER] =
 				offsetof(struct bitmap_ipc_elem, counter);
 
 			if (!init_map_ip(set, map, first_ip, last_ip,
@@ -371,7 +371,7 @@ bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
 		}
 	} else if (tb[IPSET_ATTR_TIMEOUT]) {
 		map->dsize = sizeof(struct bitmap_ipt_elem);
-		map->offset[IPSET_OFFSET_TIMEOUT] =
+		map->offset[IPSET_EXT_ID_TIMEOUT] =
 			offsetof(struct bitmap_ipt_elem, timeout);
 
 		if (!init_map_ip(set, map, first_ip, last_ip,
diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
index 6d5bad93026b..e711875ea452 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
@@ -52,7 +52,7 @@ struct bitmap_ipmac {
 	struct timer_list gc;	/* garbage collector */
 	size_t memsize;		/* members size */
 	size_t dsize;		/* size of element */
-	size_t offset[IPSET_OFFSET_MAX]; /* Offsets to extensions */
+	size_t offset[IPSET_EXT_ID_MAX]; /* Offsets to extensions */
 };
 
 /* ADT structure for generic function args */
@@ -405,9 +405,9 @@ bitmap_ipmac_create(struct ip_set *set, struct nlattr *tb[],
 		set->extensions |= IPSET_EXT_COUNTER;
 		if (tb[IPSET_ATTR_TIMEOUT]) {
 			map->dsize = sizeof(struct bitmap_ipmacct_elem);
-			map->offset[IPSET_OFFSET_TIMEOUT] =
+			map->offset[IPSET_EXT_ID_TIMEOUT] =
 				offsetof(struct bitmap_ipmacct_elem, timeout);
-			map->offset[IPSET_OFFSET_COUNTER] =
+			map->offset[IPSET_EXT_ID_COUNTER] =
 				offsetof(struct bitmap_ipmacct_elem, counter);
 
 			if (!init_map_ipmac(set, map, first_ip, last_ip,
@@ -421,7 +421,7 @@ bitmap_ipmac_create(struct ip_set *set, struct nlattr *tb[],
 			bitmap_ipmac_gc_init(set, bitmap_ipmac_gc);
 		} else {
 			map->dsize = sizeof(struct bitmap_ipmacc_elem);
-			map->offset[IPSET_OFFSET_COUNTER] =
+			map->offset[IPSET_EXT_ID_COUNTER] =
 				offsetof(struct bitmap_ipmacc_elem, counter);
 
 			if (!init_map_ipmac(set, map, first_ip, last_ip,
@@ -432,7 +432,7 @@ bitmap_ipmac_create(struct ip_set *set, struct nlattr *tb[],
 		}
 	} else if (tb[IPSET_ATTR_TIMEOUT]) {
 		map->dsize = sizeof(struct bitmap_ipmact_elem);
-		map->offset[IPSET_OFFSET_TIMEOUT] =
+		map->offset[IPSET_EXT_ID_TIMEOUT] =
 			offsetof(struct bitmap_ipmact_elem, timeout);
 
 		if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) {
diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c
index b22048965d2a..bebc137a5737 100644
--- a/net/netfilter/ipset/ip_set_bitmap_port.c
+++ b/net/netfilter/ipset/ip_set_bitmap_port.c
@@ -39,7 +39,7 @@ struct bitmap_port {
 	u32 elements;		/* number of max elements in the set */
 	size_t memsize;		/* members size */
 	size_t dsize;		/* extensions struct size */
-	size_t offset[IPSET_OFFSET_MAX]; /* Offsets to extensions */
+	size_t offset[IPSET_EXT_ID_MAX]; /* Offsets to extensions */
 	u32 timeout;		/* timeout parameter */
 	struct timer_list gc;	/* garbage collection */
 };
@@ -282,9 +282,9 @@ bitmap_port_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
 		set->extensions |= IPSET_EXT_COUNTER;
 		if (tb[IPSET_ATTR_TIMEOUT]) {
 			map->dsize = sizeof(struct bitmap_portct_elem);
-			map->offset[IPSET_OFFSET_TIMEOUT] =
+			map->offset[IPSET_EXT_ID_TIMEOUT] =
 				offsetof(struct bitmap_portct_elem, timeout);
-			map->offset[IPSET_OFFSET_COUNTER] =
+			map->offset[IPSET_EXT_ID_COUNTER] =
 				offsetof(struct bitmap_portct_elem, counter);
 			if (!init_map_port(set, map, first_port, last_port)) {
 				kfree(map);
@@ -297,7 +297,7 @@ bitmap_port_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
 			bitmap_port_gc_init(set, bitmap_port_gc);
 		} else {
 			map->dsize = sizeof(struct bitmap_portc_elem);
-			map->offset[IPSET_OFFSET_COUNTER] =
+			map->offset[IPSET_EXT_ID_COUNTER] =
 				offsetof(struct bitmap_portc_elem, counter);
 			if (!init_map_port(set, map, first_port, last_port)) {
 				kfree(map);
@@ -306,7 +306,7 @@ bitmap_port_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
 		}
 	} else if (tb[IPSET_ATTR_TIMEOUT]) {
 		map->dsize = sizeof(struct bitmap_portt_elem);
-		map->offset[IPSET_OFFSET_TIMEOUT] =
+		map->offset[IPSET_EXT_ID_TIMEOUT] =
 			offsetof(struct bitmap_portt_elem, timeout);
 		if (!init_map_port(set, map, first_port, last_port)) {
 			kfree(map);
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index a8332404d9b4..e4db9250f337 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -179,9 +179,9 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize)
 #endif /* IP_SET_HASH_WITH_NETS */
 
 #define ext_timeout(e, h)	\
-(unsigned long *)(((void *)(e)) + (h)->offset[IPSET_OFFSET_TIMEOUT])
+(unsigned long *)(((void *)(e)) + (h)->offset[IPSET_EXT_ID_TIMEOUT])
 #define ext_counter(e, h)	\
-(struct ip_set_counter *)(((void *)(e)) + (h)->offset[IPSET_OFFSET_COUNTER])
+(struct ip_set_counter *)(((void *)(e)) + (h)->offset[IPSET_EXT_ID_COUNTER])
 
 #endif /* _IP_SET_HASH_GEN_H */
 
@@ -278,7 +278,7 @@ struct htype {
 	u32 initval;		/* random jhash init value */
 	u32 timeout;		/* timeout value, if enabled */
 	size_t dsize;		/* data struct size */
-	size_t offset[IPSET_OFFSET_MAX]; /* Offsets to extensions */
+	size_t offset[IPSET_EXT_ID_MAX]; /* Offsets to extensions */
 	struct timer_list gc;	/* garbage collection when timeout enabled */
 	struct mtype_elem next; /* temporary storage for uadd */
 #ifdef IP_SET_HASH_WITH_MULTI
@@ -1059,11 +1059,11 @@ IPSET_TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags)
 			if (set->family == NFPROTO_IPV4) {
 				h->dsize = sizeof(struct
 					IPSET_TOKEN(HTYPE, 4ct_elem));
-				h->offset[IPSET_OFFSET_TIMEOUT] =
+				h->offset[IPSET_EXT_ID_TIMEOUT] =
 					offsetof(struct
 						IPSET_TOKEN(HTYPE, 4ct_elem),
 						timeout);
-				h->offset[IPSET_OFFSET_COUNTER] =
+				h->offset[IPSET_EXT_ID_COUNTER] =
 					offsetof(struct
 						IPSET_TOKEN(HTYPE, 4ct_elem),
 						counter);
@@ -1072,11 +1072,11 @@ IPSET_TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags)
 			} else {
 				h->dsize = sizeof(struct
 					IPSET_TOKEN(HTYPE, 6ct_elem));
-				h->offset[IPSET_OFFSET_TIMEOUT] =
+				h->offset[IPSET_EXT_ID_TIMEOUT] =
 					offsetof(struct
 						IPSET_TOKEN(HTYPE, 6ct_elem),
 						timeout);
-				h->offset[IPSET_OFFSET_COUNTER] =
+				h->offset[IPSET_EXT_ID_COUNTER] =
 					offsetof(struct
 						IPSET_TOKEN(HTYPE, 6ct_elem),
 						counter);
@@ -1088,7 +1088,7 @@ IPSET_TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags)
 				h->dsize =
 					sizeof(struct
 						IPSET_TOKEN(HTYPE, 4c_elem));
-				h->offset[IPSET_OFFSET_COUNTER] =
+				h->offset[IPSET_EXT_ID_COUNTER] =
 					offsetof(struct
 						IPSET_TOKEN(HTYPE, 4c_elem),
 						counter);
@@ -1096,7 +1096,7 @@ IPSET_TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags)
 				h->dsize =
 					sizeof(struct
 						IPSET_TOKEN(HTYPE, 6c_elem));
-				h->offset[IPSET_OFFSET_COUNTER] =
+				h->offset[IPSET_EXT_ID_COUNTER] =
 					offsetof(struct
 						IPSET_TOKEN(HTYPE, 6c_elem),
 						counter);
@@ -1107,14 +1107,14 @@ IPSET_TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags)
 		set->extensions |= IPSET_EXT_TIMEOUT;
 		if (set->family == NFPROTO_IPV4) {
 			h->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 4t_elem));
-			h->offset[IPSET_OFFSET_TIMEOUT] =
+			h->offset[IPSET_EXT_ID_TIMEOUT] =
 				offsetof(struct IPSET_TOKEN(HTYPE, 4t_elem),
 					 timeout);
 			IPSET_TOKEN(HTYPE, 4_gc_init)(set,
 				IPSET_TOKEN(HTYPE, 4_gc));
 		} else {
 			h->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 6t_elem));
-			h->offset[IPSET_OFFSET_TIMEOUT] =
+			h->offset[IPSET_EXT_ID_TIMEOUT] =
 				offsetof(struct IPSET_TOKEN(HTYPE, 6t_elem),
 					 timeout);
 			IPSET_TOKEN(HTYPE, 6_gc_init)(set,
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index a9e301f6e093..0ed19b5e675e 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -59,7 +59,7 @@ struct set_adt_elem {
 /* Type structure */
 struct list_set {
 	size_t dsize;		/* element size */
-	size_t offset[IPSET_OFFSET_MAX]; /* Offsets to extensions */
+	size_t offset[IPSET_EXT_ID_MAX]; /* Offsets to extensions */
 	u32 size;		/* size of set list array */
 	u32 timeout;		/* timeout value */
 	struct timer_list gc;	/* garbage collection */
@@ -73,9 +73,9 @@ list_set_elem(const struct list_set *map, u32 id)
 }
 
 #define ext_timeout(e, m)	\
-(unsigned long *)((void *)(e) + (m)->offset[IPSET_OFFSET_TIMEOUT])
+(unsigned long *)((void *)(e) + (m)->offset[IPSET_EXT_ID_TIMEOUT])
 #define ext_counter(e, m)	\
-(struct ip_set_counter *)((void *)(e) + (m)->offset[IPSET_OFFSET_COUNTER])
+(struct ip_set_counter *)((void *)(e) + (m)->offset[IPSET_EXT_ID_COUNTER])
 
 static int
 list_set_ktest(struct ip_set *set, const struct sk_buff *skb,
@@ -667,9 +667,9 @@ list_set_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
 			if (!map)
 				return -ENOMEM;
 			set->extensions |= IPSET_EXT_TIMEOUT;
-			map->offset[IPSET_OFFSET_TIMEOUT] =
+			map->offset[IPSET_EXT_ID_TIMEOUT] =
 				offsetof(struct setct_elem, timeout);
-			map->offset[IPSET_OFFSET_COUNTER] =
+			map->offset[IPSET_EXT_ID_COUNTER] =
 				offsetof(struct setct_elem, counter);
 			list_set_gc_init(set, list_set_gc);
 		} else {
@@ -677,7 +677,7 @@ list_set_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
 					    sizeof(struct setc_elem), 0);
 			if (!map)
 				return -ENOMEM;
-			map->offset[IPSET_OFFSET_COUNTER] =
+			map->offset[IPSET_EXT_ID_COUNTER] =
 				offsetof(struct setc_elem, counter);
 		}
 	} else if (tb[IPSET_ATTR_TIMEOUT]) {
@@ -686,7 +686,7 @@ list_set_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
 		if (!map)
 			return -ENOMEM;
 		set->extensions |= IPSET_EXT_TIMEOUT;
-		map->offset[IPSET_OFFSET_TIMEOUT] =
+		map->offset[IPSET_EXT_ID_TIMEOUT] =
 			offsetof(struct sett_elem, timeout);
 		list_set_gc_init(set, list_set_gc);
 	} else {
-- 
cgit v1.2.3


From ca134ce86451f3f5ac45ffbf1494a1f42110bf93 Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Sat, 7 Sep 2013 00:10:07 +0200
Subject: netfilter: ipset: Move extension data to set structure

Default timeout and extension offsets are moved to struct set, because
all set types supports all extensions and it makes possible to generalize
extension support.

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
---
 include/linux/netfilter/ipset/ip_set.h         |  29 ++++--
 include/linux/netfilter/ipset/ip_set_timeout.h |   4 +-
 net/netfilter/ipset/ip_set_bitmap_gen.h        |  59 ++++++------
 net/netfilter/ipset/ip_set_bitmap_ip.c         |  45 +++++----
 net/netfilter/ipset/ip_set_bitmap_ipmac.c      |  64 ++++++-------
 net/netfilter/ipset/ip_set_bitmap_port.c       |  44 ++++-----
 net/netfilter/ipset/ip_set_hash_gen.h          | 127 ++++++++++++-------------
 net/netfilter/ipset/ip_set_hash_ip.c           |   8 +-
 net/netfilter/ipset/ip_set_hash_ipport.c       |  10 +-
 net/netfilter/ipset/ip_set_hash_ipportip.c     |  10 +-
 net/netfilter/ipset/ip_set_hash_ipportnet.c    |  14 +--
 net/netfilter/ipset/ip_set_hash_net.c          |   9 +-
 net/netfilter/ipset/ip_set_hash_netiface.c     |   8 +-
 net/netfilter/ipset/ip_set_hash_netport.c      |   8 +-
 net/netfilter/ipset/ip_set_list_set.c          | 116 ++++++++++------------
 15 files changed, 266 insertions(+), 289 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index b4db7912bf0d..992a2f58dbd3 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -72,6 +72,16 @@ struct ip_set_ext {
 	u32 timeout;
 };
 
+struct ip_set_counter {
+	atomic64_t bytes;
+	atomic64_t packets;
+};
+
+#define ext_timeout(e, s)	\
+(unsigned long *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_TIMEOUT])
+#define ext_counter(e, s)	\
+(struct ip_set_counter *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_COUNTER])
+
 struct ip_set;
 
 typedef int (*ipset_adtfn)(struct ip_set *set, void *value,
@@ -179,15 +189,16 @@ struct ip_set {
 	u8 revision;
 	/* Extensions */
 	u8 extensions;
+	/* Default timeout value, if enabled */
+	u32 timeout;
+	/* Element data size */
+	size_t dsize;
+	/* Offsets to extensions in elements */
+	size_t offset[IPSET_EXT_ID_MAX];
 	/* The type specific data */
 	void *data;
 };
 
-struct ip_set_counter {
-	atomic64_t bytes;
-	atomic64_t packets;
-};
-
 static inline void
 ip_set_add_bytes(u64 bytes, struct ip_set_counter *counter)
 {
@@ -390,13 +401,13 @@ bitmap_bytes(u32 a, u32 b)
 
 #include <linux/netfilter/ipset/ip_set_timeout.h>
 
-#define IP_SET_INIT_KEXT(skb, opt, map)			\
+#define IP_SET_INIT_KEXT(skb, opt, set)			\
 	{ .bytes = (skb)->len, .packets = 1,		\
-	  .timeout = ip_set_adt_opt_timeout(opt, map) }
+	  .timeout = ip_set_adt_opt_timeout(opt, set) }
 
-#define IP_SET_INIT_UEXT(map)				\
+#define IP_SET_INIT_UEXT(set)				\
 	{ .bytes = ULLONG_MAX, .packets = ULLONG_MAX,	\
-	  .timeout = (map)->timeout }
+	  .timeout = (set)->timeout }
 
 #define IP_SET_INIT_CIDR(a, b) ((a) ? (a) : (b))
 
diff --git a/include/linux/netfilter/ipset/ip_set_timeout.h b/include/linux/netfilter/ipset/ip_set_timeout.h
index 3aac04167ca7..83c2f9e0886c 100644
--- a/include/linux/netfilter/ipset/ip_set_timeout.h
+++ b/include/linux/netfilter/ipset/ip_set_timeout.h
@@ -23,8 +23,8 @@
 /* Set is defined with timeout support: timeout value may be 0 */
 #define IPSET_NO_TIMEOUT	UINT_MAX
 
-#define ip_set_adt_opt_timeout(opt, map)	\
-((opt)->ext.timeout != IPSET_NO_TIMEOUT ? (opt)->ext.timeout : (map)->timeout)
+#define ip_set_adt_opt_timeout(opt, set)	\
+((opt)->ext.timeout != IPSET_NO_TIMEOUT ? (opt)->ext.timeout : (set)->timeout)
 
 static inline unsigned int
 ip_set_timeout_uget(struct nlattr *tb)
diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h
index 889a929b908f..f32ddbca3923 100644
--- a/net/netfilter/ipset/ip_set_bitmap_gen.h
+++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
@@ -32,11 +32,7 @@
 #define mtype_gc		IPSET_TOKEN(MTYPE, _gc)
 #define mtype			MTYPE
 
-#define ext_timeout(e, m)	\
-	(unsigned long *)((e) + (m)->offset[IPSET_EXT_ID_TIMEOUT])
-#define ext_counter(e, m)	\
-	(struct ip_set_counter *)((e) + (m)->offset[IPSET_EXT_ID_COUNTER])
-#define get_ext(map, id)	((map)->extensions + (map)->dsize * (id))
+#define get_ext(set, map, id)	((map)->extensions + (set)->dsize * (id))
 
 static void
 mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set))
@@ -46,7 +42,7 @@ mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set))
 	init_timer(&map->gc);
 	map->gc.data = (unsigned long) set;
 	map->gc.function = gc;
-	map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ;
+	map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ;
 	add_timer(&map->gc);
 }
 
@@ -59,7 +55,7 @@ mtype_destroy(struct ip_set *set)
 		del_timer_sync(&map->gc);
 
 	ip_set_free(map->members);
-	if (map->dsize)
+	if (set->dsize)
 		ip_set_free(map->extensions);
 	kfree(map);
 
@@ -88,9 +84,9 @@ mtype_head(struct ip_set *set, struct sk_buff *skb)
 	    nla_put_net32(skb, IPSET_ATTR_MEMSIZE,
 			  htonl(sizeof(*map) +
 				map->memsize +
-				map->dsize * map->elements)) ||
+				set->dsize * map->elements)) ||
 	    (SET_WITH_TIMEOUT(set) &&
-	     nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(map->timeout))) ||
+	     nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(set->timeout))) ||
 	    (SET_WITH_COUNTER(set) &&
 	     nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS,
 			   htonl(IPSET_FLAG_WITH_COUNTERS))))
@@ -108,16 +104,16 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 {
 	struct mtype *map = set->data;
 	const struct mtype_adt_elem *e = value;
-	void *x = get_ext(map, e->id);
-	int ret = mtype_do_test(e, map);
+	void *x = get_ext(set, map, e->id);
+	int ret = mtype_do_test(e, map, set->dsize);
 
 	if (ret <= 0)
 		return ret;
 	if (SET_WITH_TIMEOUT(set) &&
-	    ip_set_timeout_expired(ext_timeout(x, map)))
+	    ip_set_timeout_expired(ext_timeout(x, set)))
 		return 0;
 	if (SET_WITH_COUNTER(set))
-		ip_set_update_counter(ext_counter(x, map), ext, mext, flags);
+		ip_set_update_counter(ext_counter(x, set), ext, mext, flags);
 	return 1;
 }
 
@@ -127,12 +123,12 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 {
 	struct mtype *map = set->data;
 	const struct mtype_adt_elem *e = value;
-	void *x = get_ext(map, e->id);
-	int ret = mtype_do_add(e, map, flags);
+	void *x = get_ext(set, map, e->id);
+	int ret = mtype_do_add(e, map, flags, set->dsize);
 
 	if (ret == IPSET_ADD_FAILED) {
 		if (SET_WITH_TIMEOUT(set) &&
-		    ip_set_timeout_expired(ext_timeout(x, map)))
+		    ip_set_timeout_expired(ext_timeout(x, set)))
 			ret = 0;
 		else if (!(flags & IPSET_FLAG_EXIST))
 			return -IPSET_ERR_EXIST;
@@ -140,13 +136,13 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 
 	if (SET_WITH_TIMEOUT(set))
 #ifdef IP_SET_BITMAP_STORED_TIMEOUT
-		mtype_add_timeout(ext_timeout(x, map), e, ext, map, ret);
+		mtype_add_timeout(ext_timeout(x, set), e, ext, set, map, ret);
 #else
-		ip_set_timeout_set(ext_timeout(x, map), ext->timeout);
+		ip_set_timeout_set(ext_timeout(x, set), ext->timeout);
 #endif
 
 	if (SET_WITH_COUNTER(set))
-		ip_set_init_counter(ext_counter(x, map), ext);
+		ip_set_init_counter(ext_counter(x, set), ext);
 	return 0;
 }
 
@@ -156,11 +152,11 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 {
 	struct mtype *map = set->data;
 	const struct mtype_adt_elem *e = value;
-	const void *x = get_ext(map, e->id);
+	const void *x = get_ext(set, map, e->id);
 
 	if (mtype_do_del(e, map) ||
 	    (SET_WITH_TIMEOUT(set) &&
-	     ip_set_timeout_expired(ext_timeout(x, map))))
+	     ip_set_timeout_expired(ext_timeout(x, set))))
 		return -IPSET_ERR_EXIST;
 
 	return 0;
@@ -180,13 +176,13 @@ mtype_list(const struct ip_set *set,
 		return -EMSGSIZE;
 	for (; cb->args[2] < map->elements; cb->args[2]++) {
 		id = cb->args[2];
-		x = get_ext(map, id);
+		x = get_ext(set, map, id);
 		if (!test_bit(id, map->members) ||
 		    (SET_WITH_TIMEOUT(set) &&
 #ifdef IP_SET_BITMAP_STORED_TIMEOUT
 		     mtype_is_filled((const struct mtype_elem *) x) &&
 #endif
-		     ip_set_timeout_expired(ext_timeout(x, map))))
+		     ip_set_timeout_expired(ext_timeout(x, set))))
 			continue;
 		nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
 		if (!nested) {
@@ -196,23 +192,24 @@ mtype_list(const struct ip_set *set,
 			} else
 				goto nla_put_failure;
 		}
-		if (mtype_do_list(skb, map, id))
+		if (mtype_do_list(skb, map, id, set->dsize))
 			goto nla_put_failure;
 		if (SET_WITH_TIMEOUT(set)) {
 #ifdef IP_SET_BITMAP_STORED_TIMEOUT
 			if (nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
 					  htonl(ip_set_timeout_stored(map, id,
-							ext_timeout(x, map)))))
+							ext_timeout(x, set),
+							set->dsize))))
 				goto nla_put_failure;
 #else
 			if (nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
 					  htonl(ip_set_timeout_get(
-							ext_timeout(x, map)))))
+							ext_timeout(x, set)))))
 				goto nla_put_failure;
 #endif
 		}
 		if (SET_WITH_COUNTER(set) &&
-		    ip_set_put_counter(skb, ext_counter(x, map)))
+		    ip_set_put_counter(skb, ext_counter(x, set)))
 			goto nla_put_failure;
 		ipset_nest_end(skb, nested);
 	}
@@ -245,14 +242,14 @@ mtype_gc(unsigned long ul_set)
 	 * but adding/deleting new entries is locked out */
 	read_lock_bh(&set->lock);
 	for (id = 0; id < map->elements; id++)
-		if (mtype_gc_test(id, map)) {
-			x = get_ext(map, id);
-			if (ip_set_timeout_expired(ext_timeout(x, map)))
+		if (mtype_gc_test(id, map, set->dsize)) {
+			x = get_ext(set, map, id);
+			if (ip_set_timeout_expired(ext_timeout(x, set)))
 				clear_bit(id, map->members);
 		}
 	read_unlock_bh(&set->lock);
 
-	map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ;
+	map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ;
 	add_timer(&map->gc);
 }
 
diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c
index 2ee210ef49a2..363022edb8fb 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ip.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ip.c
@@ -44,10 +44,7 @@ struct bitmap_ip {
 	u32 elements;		/* number of max elements in the set */
 	u32 hosts;		/* number of hosts in a subnet */
 	size_t memsize;		/* members size */
-	size_t dsize;		/* extensions struct size */
-	size_t offset[IPSET_EXT_ID_MAX]; /* Offsets to extensions */
 	u8 netmask;		/* subnet netmask */
-	u32 timeout;		/* timeout parameter */
 	struct timer_list gc;	/* garbage collection */
 };
 
@@ -65,20 +62,21 @@ ip_to_id(const struct bitmap_ip *m, u32 ip)
 /* Common functions */
 
 static inline int
-bitmap_ip_do_test(const struct bitmap_ip_adt_elem *e, struct bitmap_ip *map)
+bitmap_ip_do_test(const struct bitmap_ip_adt_elem *e,
+		  struct bitmap_ip *map, size_t dsize)
 {
 	return !!test_bit(e->id, map->members);
 }
 
 static inline int
-bitmap_ip_gc_test(u16 id, const struct bitmap_ip *map)
+bitmap_ip_gc_test(u16 id, const struct bitmap_ip *map, size_t dsize)
 {
 	return !!test_bit(id, map->members);
 }
 
 static inline int
 bitmap_ip_do_add(const struct bitmap_ip_adt_elem *e, struct bitmap_ip *map,
-		 u32 flags)
+		 u32 flags, size_t dsize)
 {
 	return !!test_and_set_bit(e->id, map->members);
 }
@@ -90,7 +88,8 @@ bitmap_ip_do_del(const struct bitmap_ip_adt_elem *e, struct bitmap_ip *map)
 }
 
 static inline int
-bitmap_ip_do_list(struct sk_buff *skb, const struct bitmap_ip *map, u32 id)
+bitmap_ip_do_list(struct sk_buff *skb, const struct bitmap_ip *map, u32 id,
+		  size_t dsize)
 {
 	return nla_put_ipaddr4(skb, IPSET_ATTR_IP,
 			htonl(map->first_ip + id * map->hosts));
@@ -113,7 +112,7 @@ bitmap_ip_kadt(struct ip_set *set, const struct sk_buff *skb,
 	struct bitmap_ip *map = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct bitmap_ip_adt_elem e = { };
-	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, map);
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
 	u32 ip;
 
 	ip = ntohl(ip4addr(skb, opt->flags & IPSET_DIM_ONE_SRC));
@@ -133,7 +132,7 @@ bitmap_ip_uadt(struct ip_set *set, struct nlattr *tb[],
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	u32 ip = 0, ip_to = 0;
 	struct bitmap_ip_adt_elem e = { };
-	struct ip_set_ext ext = IP_SET_INIT_UEXT(map);
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
 	int ret = 0;
 
 	if (unlikely(!tb[IPSET_ATTR_IP] ||
@@ -200,7 +199,7 @@ bitmap_ip_same_set(const struct ip_set *a, const struct ip_set *b)
 	return x->first_ip == y->first_ip &&
 	       x->last_ip == y->last_ip &&
 	       x->netmask == y->netmask &&
-	       x->timeout == y->timeout &&
+	       a->timeout == b->timeout &&
 	       a->extensions == b->extensions;
 }
 
@@ -240,8 +239,8 @@ init_map_ip(struct ip_set *set, struct bitmap_ip *map,
 	map->members = ip_set_alloc(map->memsize);
 	if (!map->members)
 		return false;
-	if (map->dsize) {
-		map->extensions = ip_set_alloc(map->dsize * elements);
+	if (set->dsize) {
+		map->extensions = ip_set_alloc(set->dsize * elements);
 		if (!map->extensions) {
 			kfree(map->members);
 			return false;
@@ -252,7 +251,7 @@ init_map_ip(struct ip_set *set, struct bitmap_ip *map,
 	map->elements = elements;
 	map->hosts = hosts;
 	map->netmask = netmask;
-	map->timeout = IPSET_NO_TIMEOUT;
+	set->timeout = IPSET_NO_TIMEOUT;
 
 	set->data = map;
 	set->family = NFPROTO_IPV4;
@@ -341,10 +340,10 @@ bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
 	if (cadt_flags & IPSET_FLAG_WITH_COUNTERS) {
 		set->extensions |= IPSET_EXT_COUNTER;
 		if (tb[IPSET_ATTR_TIMEOUT]) {
-			map->dsize = sizeof(struct bitmap_ipct_elem);
-			map->offset[IPSET_EXT_ID_TIMEOUT] =
+			set->dsize = sizeof(struct bitmap_ipct_elem);
+			set->offset[IPSET_EXT_ID_TIMEOUT] =
 				offsetof(struct bitmap_ipct_elem, timeout);
-			map->offset[IPSET_EXT_ID_COUNTER] =
+			set->offset[IPSET_EXT_ID_COUNTER] =
 				offsetof(struct bitmap_ipct_elem, counter);
 
 			if (!init_map_ip(set, map, first_ip, last_ip,
@@ -353,14 +352,14 @@ bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
 				return -ENOMEM;
 			}
 
-			map->timeout = ip_set_timeout_uget(
+			set->timeout = ip_set_timeout_uget(
 				tb[IPSET_ATTR_TIMEOUT]);
 			set->extensions |= IPSET_EXT_TIMEOUT;
 
 			bitmap_ip_gc_init(set, bitmap_ip_gc);
 		} else {
-			map->dsize = sizeof(struct bitmap_ipc_elem);
-			map->offset[IPSET_EXT_ID_COUNTER] =
+			set->dsize = sizeof(struct bitmap_ipc_elem);
+			set->offset[IPSET_EXT_ID_COUNTER] =
 				offsetof(struct bitmap_ipc_elem, counter);
 
 			if (!init_map_ip(set, map, first_ip, last_ip,
@@ -370,8 +369,8 @@ bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
 			}
 		}
 	} else if (tb[IPSET_ATTR_TIMEOUT]) {
-		map->dsize = sizeof(struct bitmap_ipt_elem);
-		map->offset[IPSET_EXT_ID_TIMEOUT] =
+		set->dsize = sizeof(struct bitmap_ipt_elem);
+		set->offset[IPSET_EXT_ID_TIMEOUT] =
 			offsetof(struct bitmap_ipt_elem, timeout);
 
 		if (!init_map_ip(set, map, first_ip, last_ip,
@@ -380,12 +379,12 @@ bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
 			return -ENOMEM;
 		}
 
-		map->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
+		set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
 		set->extensions |= IPSET_EXT_TIMEOUT;
 
 		bitmap_ip_gc_init(set, bitmap_ip_gc);
 	} else {
-		map->dsize = 0;
+		set->dsize = 0;
 		if (!init_map_ip(set, map, first_ip, last_ip,
 				 elements, hosts, netmask)) {
 			kfree(map);
diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
index e711875ea452..74576cb19264 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
@@ -48,11 +48,8 @@ struct bitmap_ipmac {
 	u32 first_ip;		/* host byte order, included in range */
 	u32 last_ip;		/* host byte order, included in range */
 	u32 elements;		/* number of max elements in the set */
-	u32 timeout;		/* timeout value */
-	struct timer_list gc;	/* garbage collector */
 	size_t memsize;		/* members size */
-	size_t dsize;		/* size of element */
-	size_t offset[IPSET_EXT_ID_MAX]; /* Offsets to extensions */
+	struct timer_list gc;	/* garbage collector */
 };
 
 /* ADT structure for generic function args */
@@ -82,13 +79,13 @@ get_elem(void *extensions, u16 id, size_t dsize)
 
 static inline int
 bitmap_ipmac_do_test(const struct bitmap_ipmac_adt_elem *e,
-		     const struct bitmap_ipmac *map)
+		     const struct bitmap_ipmac *map, size_t dsize)
 {
 	const struct bitmap_ipmac_elem *elem;
 
 	if (!test_bit(e->id, map->members))
 		return 0;
-	elem = get_elem(map->extensions, e->id, map->dsize);
+	elem = get_elem(map->extensions, e->id, dsize);
 	if (elem->filled == MAC_FILLED)
 		return e->ether == NULL ||
 		       ether_addr_equal(e->ether, elem->ether);
@@ -97,13 +94,13 @@ bitmap_ipmac_do_test(const struct bitmap_ipmac_adt_elem *e,
 }
 
 static inline int
-bitmap_ipmac_gc_test(u16 id, const struct bitmap_ipmac *map)
+bitmap_ipmac_gc_test(u16 id, const struct bitmap_ipmac *map, size_t dsize)
 {
 	const struct bitmap_ipmac_elem *elem;
 
 	if (!test_bit(id, map->members))
 		return 0;
-	elem = get_elem(map->extensions, id, map->dsize);
+	elem = get_elem(map->extensions, id, dsize);
 	/* Timer not started for the incomplete elements */
 	return elem->filled == MAC_FILLED;
 }
@@ -117,13 +114,13 @@ bitmap_ipmac_is_filled(const struct bitmap_ipmac_elem *elem)
 static inline int
 bitmap_ipmac_add_timeout(unsigned long *timeout,
 			 const struct bitmap_ipmac_adt_elem *e,
-			 const struct ip_set_ext *ext,
+			 const struct ip_set_ext *ext, struct ip_set *set,
 			 struct bitmap_ipmac *map, int mode)
 {
 	u32 t = ext->timeout;
 
 	if (mode == IPSET_ADD_START_STORED_TIMEOUT) {
-		if (t == map->timeout)
+		if (t == set->timeout)
 			/* Timeout was not specified, get stored one */
 			t = *timeout;
 		ip_set_timeout_set(timeout, t);
@@ -142,11 +139,11 @@ bitmap_ipmac_add_timeout(unsigned long *timeout,
 
 static inline int
 bitmap_ipmac_do_add(const struct bitmap_ipmac_adt_elem *e,
-		    struct bitmap_ipmac *map, u32 flags)
+		    struct bitmap_ipmac *map, u32 flags, size_t dsize)
 {
 	struct bitmap_ipmac_elem *elem;
 
-	elem = get_elem(map->extensions, e->id, map->dsize);
+	elem = get_elem(map->extensions, e->id, dsize);
 	if (test_and_set_bit(e->id, map->members)) {
 		if (elem->filled == MAC_FILLED) {
 			if (e->ether && (flags & IPSET_FLAG_EXIST))
@@ -179,10 +176,11 @@ bitmap_ipmac_do_del(const struct bitmap_ipmac_adt_elem *e,
 }
 
 static inline unsigned long
-ip_set_timeout_stored(struct bitmap_ipmac *map, u32 id, unsigned long *timeout)
+ip_set_timeout_stored(struct bitmap_ipmac *map, u32 id, unsigned long *timeout,
+		      size_t dsize)
 {
 	const struct bitmap_ipmac_elem *elem =
-		get_elem(map->extensions, id, map->dsize);
+		get_elem(map->extensions, id, dsize);
 
 	return elem->filled == MAC_FILLED ? ip_set_timeout_get(timeout) :
 					    *timeout;
@@ -190,10 +188,10 @@ ip_set_timeout_stored(struct bitmap_ipmac *map, u32 id, unsigned long *timeout)
 
 static inline int
 bitmap_ipmac_do_list(struct sk_buff *skb, const struct bitmap_ipmac *map,
-		     u32 id)
+		     u32 id, size_t dsize)
 {
 	const struct bitmap_ipmac_elem *elem =
-		get_elem(map->extensions, id, map->dsize);
+		get_elem(map->extensions, id, dsize);
 
 	return nla_put_ipaddr4(skb, IPSET_ATTR_IP,
 			       htonl(map->first_ip + id)) ||
@@ -216,7 +214,7 @@ bitmap_ipmac_kadt(struct ip_set *set, const struct sk_buff *skb,
 	struct bitmap_ipmac *map = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct bitmap_ipmac_adt_elem e = {};
-	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, map);
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
 	u32 ip;
 
 	/* MAC can be src only */
@@ -245,7 +243,7 @@ bitmap_ipmac_uadt(struct ip_set *set, struct nlattr *tb[],
 	const struct bitmap_ipmac *map = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct bitmap_ipmac_adt_elem e = {};
-	struct ip_set_ext ext = IP_SET_INIT_UEXT(map);
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
 	u32 ip = 0;
 	int ret = 0;
 
@@ -285,7 +283,7 @@ bitmap_ipmac_same_set(const struct ip_set *a, const struct ip_set *b)
 
 	return x->first_ip == y->first_ip &&
 	       x->last_ip == y->last_ip &&
-	       x->timeout == y->timeout &&
+	       a->timeout == b->timeout &&
 	       a->extensions == b->extensions;
 }
 
@@ -330,11 +328,11 @@ static bool
 init_map_ipmac(struct ip_set *set, struct bitmap_ipmac *map,
 	       u32 first_ip, u32 last_ip, u32 elements)
 {
-	map->members = ip_set_alloc((last_ip - first_ip + 1) * map->dsize);
+	map->members = ip_set_alloc((last_ip - first_ip + 1) * set->dsize);
 	if (!map->members)
 		return false;
-	if (map->dsize) {
-		map->extensions = ip_set_alloc(map->dsize * elements);
+	if (set->dsize) {
+		map->extensions = ip_set_alloc(set->dsize * elements);
 		if (!map->extensions) {
 			kfree(map->members);
 			return false;
@@ -343,7 +341,7 @@ init_map_ipmac(struct ip_set *set, struct bitmap_ipmac *map,
 	map->first_ip = first_ip;
 	map->last_ip = last_ip;
 	map->elements = elements;
-	map->timeout = IPSET_NO_TIMEOUT;
+	set->timeout = IPSET_NO_TIMEOUT;
 
 	set->data = map;
 	set->family = NFPROTO_IPV4;
@@ -404,10 +402,10 @@ bitmap_ipmac_create(struct ip_set *set, struct nlattr *tb[],
 	if (cadt_flags & IPSET_FLAG_WITH_COUNTERS) {
 		set->extensions |= IPSET_EXT_COUNTER;
 		if (tb[IPSET_ATTR_TIMEOUT]) {
-			map->dsize = sizeof(struct bitmap_ipmacct_elem);
-			map->offset[IPSET_EXT_ID_TIMEOUT] =
+			set->dsize = sizeof(struct bitmap_ipmacct_elem);
+			set->offset[IPSET_EXT_ID_TIMEOUT] =
 				offsetof(struct bitmap_ipmacct_elem, timeout);
-			map->offset[IPSET_EXT_ID_COUNTER] =
+			set->offset[IPSET_EXT_ID_COUNTER] =
 				offsetof(struct bitmap_ipmacct_elem, counter);
 
 			if (!init_map_ipmac(set, map, first_ip, last_ip,
@@ -415,13 +413,13 @@ bitmap_ipmac_create(struct ip_set *set, struct nlattr *tb[],
 				kfree(map);
 				return -ENOMEM;
 			}
-			map->timeout = ip_set_timeout_uget(
+			set->timeout = ip_set_timeout_uget(
 				tb[IPSET_ATTR_TIMEOUT]);
 			set->extensions |= IPSET_EXT_TIMEOUT;
 			bitmap_ipmac_gc_init(set, bitmap_ipmac_gc);
 		} else {
-			map->dsize = sizeof(struct bitmap_ipmacc_elem);
-			map->offset[IPSET_EXT_ID_COUNTER] =
+			set->dsize = sizeof(struct bitmap_ipmacc_elem);
+			set->offset[IPSET_EXT_ID_COUNTER] =
 				offsetof(struct bitmap_ipmacc_elem, counter);
 
 			if (!init_map_ipmac(set, map, first_ip, last_ip,
@@ -431,19 +429,19 @@ bitmap_ipmac_create(struct ip_set *set, struct nlattr *tb[],
 			}
 		}
 	} else if (tb[IPSET_ATTR_TIMEOUT]) {
-		map->dsize = sizeof(struct bitmap_ipmact_elem);
-		map->offset[IPSET_EXT_ID_TIMEOUT] =
+		set->dsize = sizeof(struct bitmap_ipmact_elem);
+		set->offset[IPSET_EXT_ID_TIMEOUT] =
 			offsetof(struct bitmap_ipmact_elem, timeout);
 
 		if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) {
 			kfree(map);
 			return -ENOMEM;
 		}
-		map->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
+		set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
 		set->extensions |= IPSET_EXT_TIMEOUT;
 		bitmap_ipmac_gc_init(set, bitmap_ipmac_gc);
 	} else {
-		map->dsize = sizeof(struct bitmap_ipmac_elem);
+		set->dsize = sizeof(struct bitmap_ipmac_elem);
 
 		if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) {
 			kfree(map);
diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c
index bebc137a5737..71da31935499 100644
--- a/net/netfilter/ipset/ip_set_bitmap_port.c
+++ b/net/netfilter/ipset/ip_set_bitmap_port.c
@@ -38,9 +38,6 @@ struct bitmap_port {
 	u16 last_port;		/* host byte order, included in range */
 	u32 elements;		/* number of max elements in the set */
 	size_t memsize;		/* members size */
-	size_t dsize;		/* extensions struct size */
-	size_t offset[IPSET_EXT_ID_MAX]; /* Offsets to extensions */
-	u32 timeout;		/* timeout parameter */
 	struct timer_list gc;	/* garbage collection */
 };
 
@@ -59,20 +56,20 @@ port_to_id(const struct bitmap_port *m, u16 port)
 
 static inline int
 bitmap_port_do_test(const struct bitmap_port_adt_elem *e,
-		    const struct bitmap_port *map)
+		    const struct bitmap_port *map, size_t dsize)
 {
 	return !!test_bit(e->id, map->members);
 }
 
 static inline int
-bitmap_port_gc_test(u16 id, const struct bitmap_port *map)
+bitmap_port_gc_test(u16 id, const struct bitmap_port *map, size_t dsize)
 {
 	return !!test_bit(id, map->members);
 }
 
 static inline int
 bitmap_port_do_add(const struct bitmap_port_adt_elem *e,
-		   struct bitmap_port *map, u32 flags)
+		   struct bitmap_port *map, u32 flags, size_t dsize)
 {
 	return !!test_and_set_bit(e->id, map->members);
 }
@@ -85,7 +82,8 @@ bitmap_port_do_del(const struct bitmap_port_adt_elem *e,
 }
 
 static inline int
-bitmap_port_do_list(struct sk_buff *skb, const struct bitmap_port *map, u32 id)
+bitmap_port_do_list(struct sk_buff *skb, const struct bitmap_port *map, u32 id,
+		    size_t dsize)
 {
 	return nla_put_net16(skb, IPSET_ATTR_PORT,
 			     htons(map->first_port + id));
@@ -106,7 +104,7 @@ bitmap_port_kadt(struct ip_set *set, const struct sk_buff *skb,
 	struct bitmap_port *map = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct bitmap_port_adt_elem e = {};
-	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, map);
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
 	__be16 __port;
 	u16 port = 0;
 
@@ -131,7 +129,7 @@ bitmap_port_uadt(struct ip_set *set, struct nlattr *tb[],
 	struct bitmap_port *map = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct bitmap_port_adt_elem e = {};
-	struct ip_set_ext ext = IP_SET_INIT_UEXT(map);
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
 	u32 port;	/* wraparound */
 	u16 port_to;
 	int ret = 0;
@@ -191,7 +189,7 @@ bitmap_port_same_set(const struct ip_set *a, const struct ip_set *b)
 
 	return x->first_port == y->first_port &&
 	       x->last_port == y->last_port &&
-	       x->timeout == y->timeout &&
+	       a->timeout == b->timeout &&
 	       a->extensions == b->extensions;
 }
 
@@ -230,8 +228,8 @@ init_map_port(struct ip_set *set, struct bitmap_port *map,
 	map->members = ip_set_alloc(map->memsize);
 	if (!map->members)
 		return false;
-	if (map->dsize) {
-		map->extensions = ip_set_alloc(map->dsize * map->elements);
+	if (set->dsize) {
+		map->extensions = ip_set_alloc(set->dsize * map->elements);
 		if (!map->extensions) {
 			kfree(map->members);
 			return false;
@@ -239,7 +237,7 @@ init_map_port(struct ip_set *set, struct bitmap_port *map,
 	}
 	map->first_port = first_port;
 	map->last_port = last_port;
-	map->timeout = IPSET_NO_TIMEOUT;
+	set->timeout = IPSET_NO_TIMEOUT;
 
 	set->data = map;
 	set->family = NFPROTO_UNSPEC;
@@ -281,23 +279,23 @@ bitmap_port_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
 	if (cadt_flags & IPSET_FLAG_WITH_COUNTERS) {
 		set->extensions |= IPSET_EXT_COUNTER;
 		if (tb[IPSET_ATTR_TIMEOUT]) {
-			map->dsize = sizeof(struct bitmap_portct_elem);
-			map->offset[IPSET_EXT_ID_TIMEOUT] =
+			set->dsize = sizeof(struct bitmap_portct_elem);
+			set->offset[IPSET_EXT_ID_TIMEOUT] =
 				offsetof(struct bitmap_portct_elem, timeout);
-			map->offset[IPSET_EXT_ID_COUNTER] =
+			set->offset[IPSET_EXT_ID_COUNTER] =
 				offsetof(struct bitmap_portct_elem, counter);
 			if (!init_map_port(set, map, first_port, last_port)) {
 				kfree(map);
 				return -ENOMEM;
 			}
 
-			map->timeout =
+			set->timeout =
 				ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
 			set->extensions |= IPSET_EXT_TIMEOUT;
 			bitmap_port_gc_init(set, bitmap_port_gc);
 		} else {
-			map->dsize = sizeof(struct bitmap_portc_elem);
-			map->offset[IPSET_EXT_ID_COUNTER] =
+			set->dsize = sizeof(struct bitmap_portc_elem);
+			set->offset[IPSET_EXT_ID_COUNTER] =
 				offsetof(struct bitmap_portc_elem, counter);
 			if (!init_map_port(set, map, first_port, last_port)) {
 				kfree(map);
@@ -305,19 +303,19 @@ bitmap_port_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
 			}
 		}
 	} else if (tb[IPSET_ATTR_TIMEOUT]) {
-		map->dsize = sizeof(struct bitmap_portt_elem);
-		map->offset[IPSET_EXT_ID_TIMEOUT] =
+		set->dsize = sizeof(struct bitmap_portt_elem);
+		set->offset[IPSET_EXT_ID_TIMEOUT] =
 			offsetof(struct bitmap_portt_elem, timeout);
 		if (!init_map_port(set, map, first_port, last_port)) {
 			kfree(map);
 			return -ENOMEM;
 		}
 
-		map->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
+		set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
 		set->extensions |= IPSET_EXT_TIMEOUT;
 		bitmap_port_gc_init(set, bitmap_port_gc);
 	} else {
-		map->dsize = 0;
+		set->dsize = 0;
 		if (!init_map_port(set, map, first_port, last_port)) {
 			kfree(map);
 			return -ENOMEM;
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index e4db9250f337..0cb840e1f8ae 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -178,11 +178,6 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize)
 #define NLEN(family)		0
 #endif /* IP_SET_HASH_WITH_NETS */
 
-#define ext_timeout(e, h)	\
-(unsigned long *)(((void *)(e)) + (h)->offset[IPSET_EXT_ID_TIMEOUT])
-#define ext_counter(e, h)	\
-(struct ip_set_counter *)(((void *)(e)) + (h)->offset[IPSET_EXT_ID_COUNTER])
-
 #endif /* _IP_SET_HASH_GEN_H */
 
 /* Family dependent templates */
@@ -276,9 +271,6 @@ struct htype {
 	u32 maxelem;		/* max elements in the hash */
 	u32 elements;		/* current element (vs timeout) */
 	u32 initval;		/* random jhash init value */
-	u32 timeout;		/* timeout value, if enabled */
-	size_t dsize;		/* data struct size */
-	size_t offset[IPSET_EXT_ID_MAX]; /* Offsets to extensions */
 	struct timer_list gc;	/* garbage collection when timeout enabled */
 	struct mtype_elem next; /* temporary storage for uadd */
 #ifdef IP_SET_HASH_WITH_MULTI
@@ -351,7 +343,7 @@ mtype_del_cidr(struct htype *h, u8 cidr, u8 nets_length, u8 n)
 /* Calculate the actual memory size of the set data */
 static size_t
 mtype_ahash_memsize(const struct htype *h, const struct htable *t,
-		    u8 nets_length)
+		    u8 nets_length, size_t dsize)
 {
 	u32 i;
 	size_t memsize = sizeof(*h)
@@ -362,7 +354,7 @@ mtype_ahash_memsize(const struct htype *h, const struct htable *t,
 			 + jhash_size(t->htable_bits) * sizeof(struct hbucket);
 
 	for (i = 0; i < jhash_size(t->htable_bits); i++)
-		memsize += t->bucket[i].size * h->dsize;
+		memsize += t->bucket[i].size * dsize;
 
 	return memsize;
 }
@@ -417,10 +409,10 @@ mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set))
 	init_timer(&h->gc);
 	h->gc.data = (unsigned long) set;
 	h->gc.function = gc;
-	h->gc.expires = jiffies + IPSET_GC_PERIOD(h->timeout) * HZ;
+	h->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ;
 	add_timer(&h->gc);
 	pr_debug("gc initialized, run in every %u\n",
-		 IPSET_GC_PERIOD(h->timeout));
+		 IPSET_GC_PERIOD(set->timeout));
 }
 
 static bool
@@ -431,7 +423,7 @@ mtype_same_set(const struct ip_set *a, const struct ip_set *b)
 
 	/* Resizing changes htable_bits, so we ignore it */
 	return x->maxelem == y->maxelem &&
-	       x->timeout == y->timeout &&
+	       a->timeout == b->timeout &&
 #ifdef IP_SET_HASH_WITH_NETMASK
 	       x->netmask == y->netmask &&
 #endif
@@ -444,7 +436,7 @@ mtype_same_set(const struct ip_set *a, const struct ip_set *b)
 
 /* Delete expired elements from the hashtable */
 static void
-mtype_expire(struct htype *h, u8 nets_length, size_t dsize)
+mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize)
 {
 	struct htable *t;
 	struct hbucket *n;
@@ -458,7 +450,7 @@ mtype_expire(struct htype *h, u8 nets_length, size_t dsize)
 		n = hbucket(t, i);
 		for (j = 0; j < n->pos; j++) {
 			data = ahash_data(n, j, dsize);
-			if (ip_set_timeout_expired(ext_timeout(data, h))) {
+			if (ip_set_timeout_expired(ext_timeout(data, set))) {
 				pr_debug("expired %u/%u\n", i, j);
 #ifdef IP_SET_HASH_WITH_NETS
 				mtype_del_cidr(h, CIDR(data->cidr),
@@ -497,10 +489,10 @@ mtype_gc(unsigned long ul_set)
 
 	pr_debug("called\n");
 	write_lock_bh(&set->lock);
-	mtype_expire(h, NLEN(set->family), h->dsize);
+	mtype_expire(set, h, NLEN(set->family), set->dsize);
 	write_unlock_bh(&set->lock);
 
-	h->gc.expires = jiffies + IPSET_GC_PERIOD(h->timeout) * HZ;
+	h->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ;
 	add_timer(&h->gc);
 }
 
@@ -526,7 +518,7 @@ mtype_resize(struct ip_set *set, bool retried)
 	if (SET_WITH_TIMEOUT(set) && !retried) {
 		i = h->elements;
 		write_lock_bh(&set->lock);
-		mtype_expire(set->data, NLEN(set->family), h->dsize);
+		mtype_expire(set, set->data, NLEN(set->family), set->dsize);
 		write_unlock_bh(&set->lock);
 		if (h->elements < i)
 			return 0;
@@ -553,13 +545,13 @@ retry:
 	for (i = 0; i < jhash_size(orig->htable_bits); i++) {
 		n = hbucket(orig, i);
 		for (j = 0; j < n->pos; j++) {
-			data = ahash_data(n, j, h->dsize);
+			data = ahash_data(n, j, set->dsize);
 #ifdef IP_SET_HASH_WITH_NETS
 			flags = 0;
 			mtype_data_reset_flags(data, &flags);
 #endif
 			m = hbucket(t, HKEY(data, h->initval, htable_bits));
-			ret = hbucket_elem_add(m, AHASH_MAX(h), h->dsize);
+			ret = hbucket_elem_add(m, AHASH_MAX(h), set->dsize);
 			if (ret < 0) {
 #ifdef IP_SET_HASH_WITH_NETS
 				mtype_data_reset_flags(data, &flags);
@@ -570,8 +562,8 @@ retry:
 					goto retry;
 				return ret;
 			}
-			d = ahash_data(m, m->pos++, h->dsize);
-			memcpy(d, data, h->dsize);
+			d = ahash_data(m, m->pos++, set->dsize);
+			memcpy(d, data, set->dsize);
 #ifdef IP_SET_HASH_WITH_NETS
 			mtype_data_reset_flags(d, &flags);
 #endif
@@ -609,7 +601,7 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 
 	if (SET_WITH_TIMEOUT(set) && h->elements >= h->maxelem)
 		/* FIXME: when set is full, we slow down here */
-		mtype_expire(h, NLEN(set->family), h->dsize);
+		mtype_expire(set, h, NLEN(set->family), set->dsize);
 
 	if (h->elements >= h->maxelem) {
 		if (net_ratelimit())
@@ -623,11 +615,11 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 	key = HKEY(value, h->initval, t->htable_bits);
 	n = hbucket(t, key);
 	for (i = 0; i < n->pos; i++) {
-		data = ahash_data(n, i, h->dsize);
+		data = ahash_data(n, i, set->dsize);
 		if (mtype_data_equal(data, d, &multi)) {
 			if (flag_exist ||
 			    (SET_WITH_TIMEOUT(set) &&
-			     ip_set_timeout_expired(ext_timeout(data, h)))) {
+			     ip_set_timeout_expired(ext_timeout(data, set)))) {
 				/* Just the extensions could be overwritten */
 				j = i;
 				goto reuse_slot;
@@ -638,14 +630,14 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 		}
 		/* Reuse first timed out entry */
 		if (SET_WITH_TIMEOUT(set) &&
-		    ip_set_timeout_expired(ext_timeout(data, h)) &&
+		    ip_set_timeout_expired(ext_timeout(data, set)) &&
 		    j != AHASH_MAX(h) + 1)
 			j = i;
 	}
 reuse_slot:
 	if (j != AHASH_MAX(h) + 1) {
 		/* Fill out reused slot */
-		data = ahash_data(n, j, h->dsize);
+		data = ahash_data(n, j, set->dsize);
 #ifdef IP_SET_HASH_WITH_NETS
 		mtype_del_cidr(h, CIDR(data->cidr), NLEN(set->family), 0);
 		mtype_add_cidr(h, CIDR(d->cidr), NLEN(set->family), 0);
@@ -653,13 +645,13 @@ reuse_slot:
 	} else {
 		/* Use/create a new slot */
 		TUNE_AHASH_MAX(h, multi);
-		ret = hbucket_elem_add(n, AHASH_MAX(h), h->dsize);
+		ret = hbucket_elem_add(n, AHASH_MAX(h), set->dsize);
 		if (ret != 0) {
 			if (ret == -EAGAIN)
 				mtype_data_next(&h->next, d);
 			goto out;
 		}
-		data = ahash_data(n, n->pos++, h->dsize);
+		data = ahash_data(n, n->pos++, set->dsize);
 #ifdef IP_SET_HASH_WITH_NETS
 		mtype_add_cidr(h, CIDR(d->cidr), NLEN(set->family), 0);
 #endif
@@ -670,9 +662,9 @@ reuse_slot:
 	mtype_data_set_flags(data, flags);
 #endif
 	if (SET_WITH_TIMEOUT(set))
-		ip_set_timeout_set(ext_timeout(data, h), ext->timeout);
+		ip_set_timeout_set(ext_timeout(data, set), ext->timeout);
 	if (SET_WITH_COUNTER(set))
-		ip_set_init_counter(ext_counter(data, h), ext);
+		ip_set_init_counter(ext_counter(data, set), ext);
 
 out:
 	rcu_read_unlock_bh();
@@ -699,16 +691,16 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 	key = HKEY(value, h->initval, t->htable_bits);
 	n = hbucket(t, key);
 	for (i = 0; i < n->pos; i++) {
-		data = ahash_data(n, i, h->dsize);
+		data = ahash_data(n, i, set->dsize);
 		if (!mtype_data_equal(data, d, &multi))
 			continue;
 		if (SET_WITH_TIMEOUT(set) &&
-		    ip_set_timeout_expired(ext_timeout(data, h)))
+		    ip_set_timeout_expired(ext_timeout(data, set)))
 			goto out;
 		if (i != n->pos - 1)
 			/* Not last one */
-			memcpy(data, ahash_data(n, n->pos - 1, h->dsize),
-			       h->dsize);
+			memcpy(data, ahash_data(n, n->pos - 1, set->dsize),
+			       set->dsize);
 
 		n->pos--;
 		h->elements--;
@@ -717,14 +709,14 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 #endif
 		if (n->pos + AHASH_INIT_SIZE < n->size) {
 			void *tmp = kzalloc((n->size - AHASH_INIT_SIZE)
-					    * h->dsize,
+					    * set->dsize,
 					    GFP_ATOMIC);
 			if (!tmp) {
 				ret = 0;
 				goto out;
 			}
 			n->size -= AHASH_INIT_SIZE;
-			memcpy(tmp, n->value, n->size * h->dsize);
+			memcpy(tmp, n->value, n->size * set->dsize);
 			kfree(n->value);
 			n->value = tmp;
 		}
@@ -742,8 +734,7 @@ mtype_data_match(struct mtype_elem *data, const struct ip_set_ext *ext,
 		 struct ip_set_ext *mext, struct ip_set *set, u32 flags)
 {
 	if (SET_WITH_COUNTER(set))
-		ip_set_update_counter(ext_counter(data,
-						  (struct htype *)(set->data)),
+		ip_set_update_counter(ext_counter(data, set),
 				      ext, mext, flags);
 	return mtype_do_data_match(data);
 }
@@ -770,12 +761,12 @@ mtype_test_cidrs(struct ip_set *set, struct mtype_elem *d,
 		key = HKEY(d, h->initval, t->htable_bits);
 		n = hbucket(t, key);
 		for (i = 0; i < n->pos; i++) {
-			data = ahash_data(n, i, h->dsize);
+			data = ahash_data(n, i, set->dsize);
 			if (!mtype_data_equal(data, d, &multi))
 				continue;
 			if (SET_WITH_TIMEOUT(set)) {
 				if (!ip_set_timeout_expired(
-							ext_timeout(data, h)))
+						ext_timeout(data, set)))
 					return mtype_data_match(data, ext,
 								mext, set,
 								flags);
@@ -818,10 +809,10 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 	key = HKEY(d, h->initval, t->htable_bits);
 	n = hbucket(t, key);
 	for (i = 0; i < n->pos; i++) {
-		data = ahash_data(n, i, h->dsize);
+		data = ahash_data(n, i, set->dsize);
 		if (mtype_data_equal(data, d, &multi) &&
 		    !(SET_WITH_TIMEOUT(set) &&
-		      ip_set_timeout_expired(ext_timeout(data, h)))) {
+		      ip_set_timeout_expired(ext_timeout(data, set)))) {
 			ret = mtype_data_match(data, ext, mext, set, flags);
 			goto out;
 		}
@@ -841,7 +832,7 @@ mtype_head(struct ip_set *set, struct sk_buff *skb)
 	size_t memsize;
 
 	t = rcu_dereference_bh_nfnl(h->table);
-	memsize = mtype_ahash_memsize(h, t, NLEN(set->family));
+	memsize = mtype_ahash_memsize(h, t, NLEN(set->family), set->dsize);
 
 	nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
 	if (!nested)
@@ -858,7 +849,7 @@ mtype_head(struct ip_set *set, struct sk_buff *skb)
 	if (nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) ||
 	    nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)) ||
 	    ((set->extensions & IPSET_EXT_TIMEOUT) &&
-	     nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(h->timeout))) ||
+	     nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(set->timeout))) ||
 	    ((set->extensions & IPSET_EXT_COUNTER) &&
 	     nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS,
 			   htonl(IPSET_FLAG_WITH_COUNTERS))))
@@ -894,9 +885,9 @@ mtype_list(const struct ip_set *set,
 		n = hbucket(t, cb->args[2]);
 		pr_debug("cb->args[2]: %lu, t %p n %p\n", cb->args[2], t, n);
 		for (i = 0; i < n->pos; i++) {
-			e = ahash_data(n, i, h->dsize);
+			e = ahash_data(n, i, set->dsize);
 			if (SET_WITH_TIMEOUT(set) &&
-			    ip_set_timeout_expired(ext_timeout(e, h)))
+			    ip_set_timeout_expired(ext_timeout(e, set)))
 				continue;
 			pr_debug("list hash %lu hbucket %p i %u, data %p\n",
 				 cb->args[2], n, i, e);
@@ -913,10 +904,10 @@ mtype_list(const struct ip_set *set,
 			if (SET_WITH_TIMEOUT(set) &&
 			    nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
 					  htonl(ip_set_timeout_get(
-						ext_timeout(e, h)))))
+						ext_timeout(e, set)))))
 				goto nla_put_failure;
 			if (SET_WITH_COUNTER(set) &&
-			    ip_set_put_counter(skb, ext_counter(e, h)))
+			    ip_set_put_counter(skb, ext_counter(e, set)))
 				goto nla_put_failure;
 			ipset_nest_end(skb, nested);
 		}
@@ -1026,7 +1017,7 @@ IPSET_TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags)
 	h->netmask = netmask;
 #endif
 	get_random_bytes(&h->initval, sizeof(h->initval));
-	h->timeout = IPSET_NO_TIMEOUT;
+	set->timeout = IPSET_NO_TIMEOUT;
 
 	hbits = htable_bits(hashsize);
 	hsize = htable_size(hbits);
@@ -1053,30 +1044,30 @@ IPSET_TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags)
 	if (cadt_flags & IPSET_FLAG_WITH_COUNTERS) {
 		set->extensions |= IPSET_EXT_COUNTER;
 		if (tb[IPSET_ATTR_TIMEOUT]) {
-			h->timeout =
+			set->timeout =
 				ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
 			set->extensions |= IPSET_EXT_TIMEOUT;
 			if (set->family == NFPROTO_IPV4) {
-				h->dsize = sizeof(struct
+				set->dsize = sizeof(struct
 					IPSET_TOKEN(HTYPE, 4ct_elem));
-				h->offset[IPSET_EXT_ID_TIMEOUT] =
+				set->offset[IPSET_EXT_ID_TIMEOUT] =
 					offsetof(struct
 						IPSET_TOKEN(HTYPE, 4ct_elem),
 						timeout);
-				h->offset[IPSET_EXT_ID_COUNTER] =
+				set->offset[IPSET_EXT_ID_COUNTER] =
 					offsetof(struct
 						IPSET_TOKEN(HTYPE, 4ct_elem),
 						counter);
 				IPSET_TOKEN(HTYPE, 4_gc_init)(set,
 					IPSET_TOKEN(HTYPE, 4_gc));
 			} else {
-				h->dsize = sizeof(struct
+				set->dsize = sizeof(struct
 					IPSET_TOKEN(HTYPE, 6ct_elem));
-				h->offset[IPSET_EXT_ID_TIMEOUT] =
+				set->offset[IPSET_EXT_ID_TIMEOUT] =
 					offsetof(struct
 						IPSET_TOKEN(HTYPE, 6ct_elem),
 						timeout);
-				h->offset[IPSET_EXT_ID_COUNTER] =
+				set->offset[IPSET_EXT_ID_COUNTER] =
 					offsetof(struct
 						IPSET_TOKEN(HTYPE, 6ct_elem),
 						counter);
@@ -1085,36 +1076,36 @@ IPSET_TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags)
 			}
 		} else {
 			if (set->family == NFPROTO_IPV4) {
-				h->dsize =
+				set->dsize =
 					sizeof(struct
 						IPSET_TOKEN(HTYPE, 4c_elem));
-				h->offset[IPSET_EXT_ID_COUNTER] =
+				set->offset[IPSET_EXT_ID_COUNTER] =
 					offsetof(struct
 						IPSET_TOKEN(HTYPE, 4c_elem),
 						counter);
 			} else {
-				h->dsize =
+				set->dsize =
 					sizeof(struct
 						IPSET_TOKEN(HTYPE, 6c_elem));
-				h->offset[IPSET_EXT_ID_COUNTER] =
+				set->offset[IPSET_EXT_ID_COUNTER] =
 					offsetof(struct
 						IPSET_TOKEN(HTYPE, 6c_elem),
 						counter);
 			}
 		}
 	} else if (tb[IPSET_ATTR_TIMEOUT]) {
-		h->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
+		set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
 		set->extensions |= IPSET_EXT_TIMEOUT;
 		if (set->family == NFPROTO_IPV4) {
-			h->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 4t_elem));
-			h->offset[IPSET_EXT_ID_TIMEOUT] =
+			set->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 4t_elem));
+			set->offset[IPSET_EXT_ID_TIMEOUT] =
 				offsetof(struct IPSET_TOKEN(HTYPE, 4t_elem),
 					 timeout);
 			IPSET_TOKEN(HTYPE, 4_gc_init)(set,
 				IPSET_TOKEN(HTYPE, 4_gc));
 		} else {
-			h->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 6t_elem));
-			h->offset[IPSET_EXT_ID_TIMEOUT] =
+			set->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 6t_elem));
+			set->offset[IPSET_EXT_ID_TIMEOUT] =
 				offsetof(struct IPSET_TOKEN(HTYPE, 6t_elem),
 					 timeout);
 			IPSET_TOKEN(HTYPE, 6_gc_init)(set,
@@ -1122,9 +1113,9 @@ IPSET_TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags)
 		}
 	} else {
 		if (set->family == NFPROTO_IPV4)
-			h->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 4_elem));
+			set->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 4_elem));
 		else
-			h->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 6_elem));
+			set->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 6_elem));
 	}
 
 	pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n",
diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c
index 260c9a80d8a5..bbde7c304622 100644
--- a/net/netfilter/ipset/ip_set_hash_ip.c
+++ b/net/netfilter/ipset/ip_set_hash_ip.c
@@ -99,7 +99,7 @@ hash_ip4_kadt(struct ip_set *set, const struct sk_buff *skb,
 	const struct hash_ip *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_ip4_elem e = {};
-	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
 	__be32 ip;
 
 	ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &ip);
@@ -118,7 +118,7 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
 	const struct hash_ip *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_ip4_elem e = {};
-	struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
 	u32 ip = 0, ip_to = 0, hosts;
 	int ret = 0;
 
@@ -253,7 +253,7 @@ hash_ip6_kadt(struct ip_set *set, const struct sk_buff *skb,
 	const struct hash_ip *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_ip6_elem e = {};
-	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
 
 	ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip.in6);
 	hash_ip6_netmask(&e.ip, h->netmask);
@@ -270,7 +270,7 @@ hash_ip6_uadt(struct ip_set *set, struct nlattr *tb[],
 	const struct hash_ip *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_ip6_elem e = {};
-	struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
 	int ret;
 
 	if (unlikely(!tb[IPSET_ATTR_IP] ||
diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c
index 64caad35a391..dd175d6f3965 100644
--- a/net/netfilter/ipset/ip_set_hash_ipport.c
+++ b/net/netfilter/ipset/ip_set_hash_ipport.c
@@ -116,10 +116,9 @@ hash_ipport4_kadt(struct ip_set *set, const struct sk_buff *skb,
 		  const struct xt_action_param *par,
 		  enum ipset_adt adt, struct ip_set_adt_opt *opt)
 {
-	const struct hash_ipport *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_ipport4_elem e = { };
-	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
 
 	if (!ip_set_get_ip4_port(skb, opt->flags & IPSET_DIM_TWO_SRC,
 				 &e.port, &e.proto))
@@ -136,7 +135,7 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
 	const struct hash_ipport *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_ipport4_elem e = { };
-	struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
 	u32 ip, ip_to = 0, p = 0, port, port_to;
 	bool with_ports = false;
 	int ret;
@@ -306,10 +305,9 @@ hash_ipport6_kadt(struct ip_set *set, const struct sk_buff *skb,
 		  const struct xt_action_param *par,
 		  enum ipset_adt adt, struct ip_set_adt_opt *opt)
 {
-	const struct hash_ipport *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_ipport6_elem e = { };
-	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
 
 	if (!ip_set_get_ip6_port(skb, opt->flags & IPSET_DIM_TWO_SRC,
 				 &e.port, &e.proto))
@@ -326,7 +324,7 @@ hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[],
 	const struct hash_ipport *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_ipport6_elem e = { };
-	struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
 	u32 port, port_to;
 	bool with_ports = false;
 	int ret;
diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c
index 2873bbc20d7a..87a2cfab2568 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportip.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportip.c
@@ -120,10 +120,9 @@ hash_ipportip4_kadt(struct ip_set *set, const struct sk_buff *skb,
 		    const struct xt_action_param *par,
 		    enum ipset_adt adt, struct ip_set_adt_opt *opt)
 {
-	const struct hash_ipportip *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_ipportip4_elem e = { };
-	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
 
 	if (!ip_set_get_ip4_port(skb, opt->flags & IPSET_DIM_TWO_SRC,
 				 &e.port, &e.proto))
@@ -141,7 +140,7 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
 	const struct hash_ipportip *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_ipportip4_elem e = { };
-	struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
 	u32 ip, ip_to = 0, p = 0, port, port_to;
 	bool with_ports = false;
 	int ret;
@@ -319,10 +318,9 @@ hash_ipportip6_kadt(struct ip_set *set, const struct sk_buff *skb,
 		    const struct xt_action_param *par,
 		    enum ipset_adt adt, struct ip_set_adt_opt *opt)
 {
-	const struct hash_ipportip *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_ipportip6_elem e = { };
-	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
 
 	if (!ip_set_get_ip6_port(skb, opt->flags & IPSET_DIM_TWO_SRC,
 				 &e.port, &e.proto))
@@ -340,7 +338,7 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[],
 	const struct hash_ipportip *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_ipportip6_elem e = { };
-	struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
 	u32 port, port_to;
 	bool with_ports = false;
 	int ret;
diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c
index 6ce5a8e2c44e..0b9a28d7c740 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c
@@ -172,7 +172,7 @@ hash_ipportnet4_kadt(struct ip_set *set, const struct sk_buff *skb,
 	struct hash_ipportnet4_elem e = {
 		.cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK) - 1,
 	};
-	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
 
 	if (adt == IPSET_TEST)
 		e.cidr = HOST_MASK - 1;
@@ -195,7 +195,7 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
 	const struct hash_ipportnet *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_ipportnet4_elem e = { .cidr = HOST_MASK - 1 };
-	struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
 	u32 ip = 0, ip_to = 0, p = 0, port, port_to;
 	u32 ip2_from = 0, ip2_to = 0, ip2_last, ip2;
 	bool with_ports = false;
@@ -306,9 +306,9 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
 						       : port;
 		for (; p <= port_to; p++) {
 			e.port = htons(p);
-			ip2 = retried
-			      && ip == ntohl(h->next.ip)
-			      && p == ntohs(h->next.port)
+			ip2 = retried &&
+			      ip == ntohl(h->next.ip) &&
+			      p == ntohs(h->next.port)
 				? ntohl(h->next.ip2) : ip2_from;
 			while (!after(ip2, ip2_to)) {
 				e.ip2 = htonl(ip2);
@@ -456,7 +456,7 @@ hash_ipportnet6_kadt(struct ip_set *set, const struct sk_buff *skb,
 	struct hash_ipportnet6_elem e = {
 		.cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK) - 1,
 	};
-	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
 
 	if (adt == IPSET_TEST)
 		e.cidr = HOST_MASK - 1;
@@ -479,7 +479,7 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
 	const struct hash_ipportnet *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_ipportnet6_elem e = { .cidr = HOST_MASK - 1 };
-	struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
 	u32 port, port_to;
 	bool with_ports = false;
 	u8 cidr;
diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c
index ec1c7dc10489..1d4caa50dacb 100644
--- a/net/netfilter/ipset/ip_set_hash_net.c
+++ b/net/netfilter/ipset/ip_set_hash_net.c
@@ -145,7 +145,7 @@ hash_net4_kadt(struct ip_set *set, const struct sk_buff *skb,
 	struct hash_net4_elem e = {
 		.cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
 	};
-	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
 
 	if (e.cidr == 0)
 		return -EINVAL;
@@ -165,7 +165,7 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
 	const struct hash_net *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_net4_elem e = { .cidr = HOST_MASK };
-	struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
 	u32 ip = 0, ip_to = 0, last;
 	int ret;
 
@@ -340,7 +340,7 @@ hash_net6_kadt(struct ip_set *set, const struct sk_buff *skb,
 	struct hash_net6_elem e = {
 		.cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
 	};
-	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
 
 	if (e.cidr == 0)
 		return -EINVAL;
@@ -357,10 +357,9 @@ static int
 hash_net6_uadt(struct ip_set *set, struct nlattr *tb[],
 	       enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
 {
-	const struct hash_net *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_net6_elem e = { .cidr = HOST_MASK };
-	struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
 	int ret;
 
 	if (unlikely(!tb[IPSET_ATTR_IP] ||
diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c
index 814b4e31a7f8..2f0ffe35c408 100644
--- a/net/netfilter/ipset/ip_set_hash_netiface.c
+++ b/net/netfilter/ipset/ip_set_hash_netiface.c
@@ -268,7 +268,7 @@ hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb,
 		.cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
 		.elem = 1,
 	};
-	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
 	int ret;
 
 	if (e.cidr == 0)
@@ -319,7 +319,7 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
 	struct hash_netiface *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_netiface4_elem e = { .cidr = HOST_MASK, .elem = 1 };
-	struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
 	u32 ip = 0, ip_to = 0, last;
 	char iface[IFNAMSIZ];
 	int ret;
@@ -537,7 +537,7 @@ hash_netiface6_kadt(struct ip_set *set, const struct sk_buff *skb,
 		.cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
 		.elem = 1,
 	};
-	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
 	int ret;
 
 	if (e.cidr == 0)
@@ -584,7 +584,7 @@ hash_netiface6_uadt(struct ip_set *set, struct nlattr *tb[],
 	struct hash_netiface *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_netiface6_elem e = { .cidr = HOST_MASK, .elem = 1 };
-	struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
 	char iface[IFNAMSIZ];
 	int ret;
 
diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c
index 3bd923d3f179..cab236625f97 100644
--- a/net/netfilter/ipset/ip_set_hash_netport.c
+++ b/net/netfilter/ipset/ip_set_hash_netport.c
@@ -164,7 +164,7 @@ hash_netport4_kadt(struct ip_set *set, const struct sk_buff *skb,
 	struct hash_netport4_elem e = {
 		.cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK) - 1,
 	};
-	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
 
 	if (adt == IPSET_TEST)
 		e.cidr = HOST_MASK - 1;
@@ -186,7 +186,7 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
 	const struct hash_netport *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_netport4_elem e = { .cidr = HOST_MASK - 1 };
-	struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
 	u32 port, port_to, p = 0, ip = 0, ip_to = 0, last;
 	bool with_ports = false;
 	u8 cidr;
@@ -409,7 +409,7 @@ hash_netport6_kadt(struct ip_set *set, const struct sk_buff *skb,
 	struct hash_netport6_elem e = {
 		.cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK) - 1,
 	};
-	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
 
 	if (adt == IPSET_TEST)
 		e.cidr = HOST_MASK - 1;
@@ -431,7 +431,7 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[],
 	const struct hash_netport *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_netport6_elem e = { .cidr = HOST_MASK  - 1 };
-	struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
 	u32 port, port_to;
 	bool with_ports = false;
 	u8 cidr;
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index 0ed19b5e675e..f22d05d6e366 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -58,24 +58,13 @@ struct set_adt_elem {
 
 /* Type structure */
 struct list_set {
-	size_t dsize;		/* element size */
-	size_t offset[IPSET_EXT_ID_MAX]; /* Offsets to extensions */
 	u32 size;		/* size of set list array */
-	u32 timeout;		/* timeout value */
 	struct timer_list gc;	/* garbage collection */
 	struct set_elem members[0]; /* the set members */
 };
 
-static inline struct set_elem *
-list_set_elem(const struct list_set *map, u32 id)
-{
-	return (struct set_elem *)((void *)map->members + id * map->dsize);
-}
-
-#define ext_timeout(e, m)	\
-(unsigned long *)((void *)(e) + (m)->offset[IPSET_EXT_ID_TIMEOUT])
-#define ext_counter(e, m)	\
-(struct ip_set_counter *)((void *)(e) + (m)->offset[IPSET_EXT_ID_COUNTER])
+#define list_set_elem(set, map, id)	\
+	(struct set_elem *)((void *)(map)->members + (id) * (set)->dsize)
 
 static int
 list_set_ktest(struct ip_set *set, const struct sk_buff *skb,
@@ -92,16 +81,16 @@ list_set_ktest(struct ip_set *set, const struct sk_buff *skb,
 	if (opt->cmdflags & IPSET_FLAG_SKIP_SUBCOUNTER_UPDATE)
 		opt->cmdflags &= ~IPSET_FLAG_SKIP_COUNTER_UPDATE;
 	for (i = 0; i < map->size; i++) {
-		e = list_set_elem(map, i);
+		e = list_set_elem(set, map, i);
 		if (e->id == IPSET_INVALID_ID)
 			return 0;
 		if (SET_WITH_TIMEOUT(set) &&
-		    ip_set_timeout_expired(ext_timeout(e, map)))
+		    ip_set_timeout_expired(ext_timeout(e, set)))
 			continue;
 		ret = ip_set_test(e->id, skb, par, opt);
 		if (ret > 0) {
 			if (SET_WITH_COUNTER(set))
-				ip_set_update_counter(ext_counter(e, map),
+				ip_set_update_counter(ext_counter(e, set),
 						      ext, &opt->ext,
 						      cmdflags);
 			return ret;
@@ -121,11 +110,11 @@ list_set_kadd(struct ip_set *set, const struct sk_buff *skb,
 	int ret;
 
 	for (i = 0; i < map->size; i++) {
-		e = list_set_elem(map, i);
+		e = list_set_elem(set, map, i);
 		if (e->id == IPSET_INVALID_ID)
 			return 0;
 		if (SET_WITH_TIMEOUT(set) &&
-		    ip_set_timeout_expired(ext_timeout(e, map)))
+		    ip_set_timeout_expired(ext_timeout(e, set)))
 			continue;
 		ret = ip_set_add(e->id, skb, par, opt);
 		if (ret == 0)
@@ -145,11 +134,11 @@ list_set_kdel(struct ip_set *set, const struct sk_buff *skb,
 	int ret;
 
 	for (i = 0; i < map->size; i++) {
-		e = list_set_elem(map, i);
+		e = list_set_elem(set, map, i);
 		if (e->id == IPSET_INVALID_ID)
 			return 0;
 		if (SET_WITH_TIMEOUT(set) &&
-		    ip_set_timeout_expired(ext_timeout(e, map)))
+		    ip_set_timeout_expired(ext_timeout(e, set)))
 			continue;
 		ret = ip_set_del(e->id, skb, par, opt);
 		if (ret == 0)
@@ -163,8 +152,7 @@ list_set_kadt(struct ip_set *set, const struct sk_buff *skb,
 	      const struct xt_action_param *par,
 	      enum ipset_adt adt, struct ip_set_adt_opt *opt)
 {
-	struct list_set *map = set->data;
-	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, map);
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
 
 	switch (adt) {
 	case IPSET_TEST:
@@ -188,10 +176,10 @@ id_eq(const struct ip_set *set, u32 i, ip_set_id_t id)
 	if (i >= map->size)
 		return 0;
 
-	e = list_set_elem(map, i);
+	e = list_set_elem(set, map, i);
 	return !!(e->id == id &&
 		 !(SET_WITH_TIMEOUT(set) &&
-		   ip_set_timeout_expired(ext_timeout(e, map))));
+		   ip_set_timeout_expired(ext_timeout(e, set))));
 }
 
 static int
@@ -199,28 +187,29 @@ list_set_add(struct ip_set *set, u32 i, struct set_adt_elem *d,
 	     const struct ip_set_ext *ext)
 {
 	struct list_set *map = set->data;
-	struct set_elem *e = list_set_elem(map, i);
+	struct set_elem *e = list_set_elem(set, map, i);
 
 	if (e->id != IPSET_INVALID_ID) {
 		if (i == map->size - 1)
 			/* Last element replaced: e.g. add new,before,last */
 			ip_set_put_byindex(e->id);
 		else {
-			struct set_elem *x = list_set_elem(map, map->size - 1);
+			struct set_elem *x = list_set_elem(set, map,
+							   map->size - 1);
 
 			/* Last element pushed off */
 			if (x->id != IPSET_INVALID_ID)
 				ip_set_put_byindex(x->id);
-			memmove(list_set_elem(map, i + 1), e,
-				map->dsize * (map->size - (i + 1)));
+			memmove(list_set_elem(set, map, i + 1), e,
+				set->dsize * (map->size - (i + 1)));
 		}
 	}
 
 	e->id = d->id;
 	if (SET_WITH_TIMEOUT(set))
-		ip_set_timeout_set(ext_timeout(e, map), ext->timeout);
+		ip_set_timeout_set(ext_timeout(e, set), ext->timeout);
 	if (SET_WITH_COUNTER(set))
-		ip_set_init_counter(ext_counter(e, map), ext);
+		ip_set_init_counter(ext_counter(e, set), ext);
 	return 0;
 }
 
@@ -228,16 +217,16 @@ static int
 list_set_del(struct ip_set *set, u32 i)
 {
 	struct list_set *map = set->data;
-	struct set_elem *e = list_set_elem(map, i);
+	struct set_elem *e = list_set_elem(set, map, i);
 
 	ip_set_put_byindex(e->id);
 
 	if (i < map->size - 1)
-		memmove(e, list_set_elem(map, i + 1),
-			map->dsize * (map->size - (i + 1)));
+		memmove(e, list_set_elem(set, map, i + 1),
+			set->dsize * (map->size - (i + 1)));
 
 	/* Last element */
-	e = list_set_elem(map, map->size - 1);
+	e = list_set_elem(set, map, map->size - 1);
 	e->id = IPSET_INVALID_ID;
 	return 0;
 }
@@ -250,9 +239,9 @@ set_cleanup_entries(struct ip_set *set)
 	u32 i;
 
 	for (i = 0; i < map->size; i++) {
-		e = list_set_elem(map, i);
+		e = list_set_elem(set, map, i);
 		if (e->id != IPSET_INVALID_ID &&
-		    ip_set_timeout_expired(ext_timeout(e, map)))
+		    ip_set_timeout_expired(ext_timeout(e, set)))
 			list_set_del(set, i);
 	}
 }
@@ -268,11 +257,11 @@ list_set_utest(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 	int ret;
 
 	for (i = 0; i < map->size; i++) {
-		e = list_set_elem(map, i);
+		e = list_set_elem(set, map, i);
 		if (e->id == IPSET_INVALID_ID)
 			return 0;
 		else if (SET_WITH_TIMEOUT(set) &&
-			 ip_set_timeout_expired(ext_timeout(e, map)))
+			 ip_set_timeout_expired(ext_timeout(e, set)))
 			continue;
 		else if (e->id != d->id)
 			continue;
@@ -301,11 +290,11 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 
 	/* Check already added element */
 	for (i = 0; i < map->size; i++) {
-		e = list_set_elem(map, i);
+		e = list_set_elem(set, map, i);
 		if (e->id == IPSET_INVALID_ID)
 			goto insert;
 		else if (SET_WITH_TIMEOUT(set) &&
-			 ip_set_timeout_expired(ext_timeout(e, map)))
+			 ip_set_timeout_expired(ext_timeout(e, set)))
 			continue;
 		else if (e->id != d->id)
 			continue;
@@ -320,9 +309,9 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 			return -IPSET_ERR_EXIST;
 		/* Update extensions */
 		if (SET_WITH_TIMEOUT(set))
-			ip_set_timeout_set(ext_timeout(e, map), ext->timeout);
+			ip_set_timeout_set(ext_timeout(e, set), ext->timeout);
 		if (SET_WITH_COUNTER(set))
-			ip_set_init_counter(ext_counter(e, map), ext);
+			ip_set_init_counter(ext_counter(e, set), ext);
 		/* Set is already added to the list */
 		ip_set_put_byindex(d->id);
 		return 0;
@@ -330,7 +319,7 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 insert:
 	ret = -IPSET_ERR_LIST_FULL;
 	for (i = 0; i < map->size && ret == -IPSET_ERR_LIST_FULL; i++) {
-		e = list_set_elem(map, i);
+		e = list_set_elem(set, map, i);
 		if (e->id == IPSET_INVALID_ID)
 			ret = d->before != 0 ? -IPSET_ERR_REF_EXIST
 				: list_set_add(set, i, d, ext);
@@ -355,12 +344,12 @@ list_set_udel(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 	u32 i;
 
 	for (i = 0; i < map->size; i++) {
-		e = list_set_elem(map, i);
+		e = list_set_elem(set, map, i);
 		if (e->id == IPSET_INVALID_ID)
 			return d->before != 0 ? -IPSET_ERR_REF_EXIST
 					      : -IPSET_ERR_EXIST;
 		else if (SET_WITH_TIMEOUT(set) &&
-			 ip_set_timeout_expired(ext_timeout(e, map)))
+			 ip_set_timeout_expired(ext_timeout(e, set)))
 			continue;
 		else if (e->id != d->id)
 			continue;
@@ -383,10 +372,9 @@ static int
 list_set_uadt(struct ip_set *set, struct nlattr *tb[],
 	      enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
 {
-	struct list_set *map = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct set_adt_elem e = { .refid = IPSET_INVALID_ID };
-	struct ip_set_ext ext = IP_SET_INIT_UEXT(map);
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
 	struct ip_set *s;
 	int ret = 0;
 
@@ -454,7 +442,7 @@ list_set_flush(struct ip_set *set)
 	u32 i;
 
 	for (i = 0; i < map->size; i++) {
-		e = list_set_elem(map, i);
+		e = list_set_elem(set, map, i);
 		if (e->id != IPSET_INVALID_ID) {
 			ip_set_put_byindex(e->id);
 			e->id = IPSET_INVALID_ID;
@@ -486,13 +474,13 @@ list_set_head(struct ip_set *set, struct sk_buff *skb)
 		goto nla_put_failure;
 	if (nla_put_net32(skb, IPSET_ATTR_SIZE, htonl(map->size)) ||
 	    (SET_WITH_TIMEOUT(set) &&
-	     nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(map->timeout))) ||
+	     nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(set->timeout))) ||
 	    (SET_WITH_COUNTER(set) &&
 	     nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS,
 			   htonl(IPSET_FLAG_WITH_COUNTERS))) ||
 	    nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) ||
 	    nla_put_net32(skb, IPSET_ATTR_MEMSIZE,
-			  htonl(sizeof(*map) + map->size * map->dsize)))
+			  htonl(sizeof(*map) + map->size * set->dsize)))
 		goto nla_put_failure;
 	ipset_nest_end(skb, nested);
 
@@ -515,11 +503,11 @@ list_set_list(const struct ip_set *set,
 		return -EMSGSIZE;
 	for (; cb->args[2] < map->size; cb->args[2]++) {
 		i = cb->args[2];
-		e = list_set_elem(map, i);
+		e = list_set_elem(set, map, i);
 		if (e->id == IPSET_INVALID_ID)
 			goto finish;
 		if (SET_WITH_TIMEOUT(set) &&
-		    ip_set_timeout_expired(ext_timeout(e, map)))
+		    ip_set_timeout_expired(ext_timeout(e, set)))
 			continue;
 		nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
 		if (!nested) {
@@ -535,10 +523,10 @@ list_set_list(const struct ip_set *set,
 		if (SET_WITH_TIMEOUT(set) &&
 		    nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
 				  htonl(ip_set_timeout_get(
-						ext_timeout(e, map)))))
+						ext_timeout(e, set)))))
 			goto nla_put_failure;
 		if (SET_WITH_COUNTER(set) &&
-		    ip_set_put_counter(skb, ext_counter(e, map)))
+		    ip_set_put_counter(skb, ext_counter(e, set)))
 			goto nla_put_failure;
 		ipset_nest_end(skb, nested);
 	}
@@ -565,7 +553,7 @@ list_set_same_set(const struct ip_set *a, const struct ip_set *b)
 	const struct list_set *y = b->data;
 
 	return x->size == y->size &&
-	       x->timeout == y->timeout &&
+	       a->timeout == b->timeout &&
 	       a->extensions == b->extensions;
 }
 
@@ -594,7 +582,7 @@ list_set_gc(unsigned long ul_set)
 	set_cleanup_entries(set);
 	write_unlock_bh(&set->lock);
 
-	map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ;
+	map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ;
 	add_timer(&map->gc);
 }
 
@@ -606,7 +594,7 @@ list_set_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set))
 	init_timer(&map->gc);
 	map->gc.data = (unsigned long) set;
 	map->gc.function = gc;
-	map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ;
+	map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ;
 	add_timer(&map->gc);
 }
 
@@ -625,12 +613,12 @@ init_list_set(struct ip_set *set, u32 size, size_t dsize,
 		return NULL;
 
 	map->size = size;
-	map->dsize = dsize;
-	map->timeout = timeout;
+	set->dsize = dsize;
+	set->timeout = timeout;
 	set->data = map;
 
 	for (i = 0; i < size; i++) {
-		e = list_set_elem(map, i);
+		e = list_set_elem(set, map, i);
 		e->id = IPSET_INVALID_ID;
 	}
 
@@ -667,9 +655,9 @@ list_set_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
 			if (!map)
 				return -ENOMEM;
 			set->extensions |= IPSET_EXT_TIMEOUT;
-			map->offset[IPSET_EXT_ID_TIMEOUT] =
+			set->offset[IPSET_EXT_ID_TIMEOUT] =
 				offsetof(struct setct_elem, timeout);
-			map->offset[IPSET_EXT_ID_COUNTER] =
+			set->offset[IPSET_EXT_ID_COUNTER] =
 				offsetof(struct setct_elem, counter);
 			list_set_gc_init(set, list_set_gc);
 		} else {
@@ -677,7 +665,7 @@ list_set_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
 					    sizeof(struct setc_elem), 0);
 			if (!map)
 				return -ENOMEM;
-			map->offset[IPSET_EXT_ID_COUNTER] =
+			set->offset[IPSET_EXT_ID_COUNTER] =
 				offsetof(struct setc_elem, counter);
 		}
 	} else if (tb[IPSET_ATTR_TIMEOUT]) {
@@ -686,7 +674,7 @@ list_set_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
 		if (!map)
 			return -ENOMEM;
 		set->extensions |= IPSET_EXT_TIMEOUT;
-		map->offset[IPSET_EXT_ID_TIMEOUT] =
+		set->offset[IPSET_EXT_ID_TIMEOUT] =
 			offsetof(struct sett_elem, timeout);
 		list_set_gc_init(set, list_set_gc);
 	} else {
-- 
cgit v1.2.3


From 03c8b234e61a9a3aab8d970b3bf681934ecfe443 Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Sat, 7 Sep 2013 00:43:52 +0200
Subject: netfilter: ipset: Generalize extensions support

Get rid of the structure based extensions and introduce a blob for
the extensions. Thus we can support more extension types easily.

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
---
 include/linux/netfilter/ipset/ip_set.h      | 13 +++++
 net/netfilter/ipset/ip_set_bitmap_ip.c      | 81 +++----------------------
 net/netfilter/ipset/ip_set_bitmap_ipmac.c   | 91 +++--------------------------
 net/netfilter/ipset/ip_set_bitmap_port.c    | 71 ++--------------------
 net/netfilter/ipset/ip_set_core.c           | 46 +++++++++++++++
 net/netfilter/ipset/ip_set_hash_gen.h       | 86 ++++-----------------------
 net/netfilter/ipset/ip_set_hash_ip.c        | 36 +-----------
 net/netfilter/ipset/ip_set_hash_ipport.c    | 54 +----------------
 net/netfilter/ipset/ip_set_hash_ipportip.c  | 60 +------------------
 net/netfilter/ipset/ip_set_hash_ipportnet.c | 66 +--------------------
 net/netfilter/ipset/ip_set_hash_net.c       | 54 +----------------
 net/netfilter/ipset/ip_set_hash_netiface.c  | 68 +--------------------
 net/netfilter/ipset/ip_set_hash_netport.c   | 60 +------------------
 net/netfilter/ipset/ip_set_list_set.c       | 81 ++++---------------------
 14 files changed, 118 insertions(+), 749 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index 992a2f58dbd3..66d6bd404d64 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -66,6 +66,17 @@ enum ip_set_ext_id {
 	IPSET_EXT_ID_MAX,
 };
 
+/* Extension type */
+struct ip_set_ext_type {
+	enum ip_set_extension type;
+	enum ipset_cadt_flags flag;
+	/* Size and minimal alignment */
+	u8 len;
+	u8 align;
+};
+
+extern const struct ip_set_ext_type ip_set_extensions[];
+
 struct ip_set_ext {
 	u64 packets;
 	u64 bytes;
@@ -283,6 +294,8 @@ extern void *ip_set_alloc(size_t size);
 extern void ip_set_free(void *members);
 extern int ip_set_get_ipaddr4(struct nlattr *nla,  __be32 *ipaddr);
 extern int ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr);
+extern size_t ip_set_elem_len(struct ip_set *set, struct nlattr *tb[],
+			      size_t len);
 extern int ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[],
 				 struct ip_set_ext *ext);
 
diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c
index 363022edb8fb..94d985457c51 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ip.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ip.c
@@ -208,25 +208,6 @@ bitmap_ip_same_set(const struct ip_set *a, const struct ip_set *b)
 struct bitmap_ip_elem {
 };
 
-/* Timeout variant */
-
-struct bitmap_ipt_elem {
-	unsigned long timeout;
-};
-
-/* Plain variant with counter */
-
-struct bitmap_ipc_elem {
-	struct ip_set_counter counter;
-};
-
-/* Timeout variant with counter */
-
-struct bitmap_ipct_elem {
-	unsigned long timeout;
-	struct ip_set_counter counter;
-};
-
 #include "ip_set_bitmap_gen.h"
 
 /* Create bitmap:ip type of sets */
@@ -263,7 +244,7 @@ static int
 bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
 {
 	struct bitmap_ip *map;
-	u32 first_ip = 0, last_ip = 0, hosts, cadt_flags = 0;
+	u32 first_ip = 0, last_ip = 0, hosts;
 	u64 elements;
 	u8 netmask = 32;
 	int ret;
@@ -335,61 +316,15 @@ bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
 
 	map->memsize = bitmap_bytes(0, elements - 1);
 	set->variant = &bitmap_ip;
-	if (tb[IPSET_ATTR_CADT_FLAGS])
-		cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
-	if (cadt_flags & IPSET_FLAG_WITH_COUNTERS) {
-		set->extensions |= IPSET_EXT_COUNTER;
-		if (tb[IPSET_ATTR_TIMEOUT]) {
-			set->dsize = sizeof(struct bitmap_ipct_elem);
-			set->offset[IPSET_EXT_ID_TIMEOUT] =
-				offsetof(struct bitmap_ipct_elem, timeout);
-			set->offset[IPSET_EXT_ID_COUNTER] =
-				offsetof(struct bitmap_ipct_elem, counter);
-
-			if (!init_map_ip(set, map, first_ip, last_ip,
-					 elements, hosts, netmask)) {
-				kfree(map);
-				return -ENOMEM;
-			}
-
-			set->timeout = ip_set_timeout_uget(
-				tb[IPSET_ATTR_TIMEOUT]);
-			set->extensions |= IPSET_EXT_TIMEOUT;
-
-			bitmap_ip_gc_init(set, bitmap_ip_gc);
-		} else {
-			set->dsize = sizeof(struct bitmap_ipc_elem);
-			set->offset[IPSET_EXT_ID_COUNTER] =
-				offsetof(struct bitmap_ipc_elem, counter);
-
-			if (!init_map_ip(set, map, first_ip, last_ip,
-					 elements, hosts, netmask)) {
-				kfree(map);
-				return -ENOMEM;
-			}
-		}
-	} else if (tb[IPSET_ATTR_TIMEOUT]) {
-		set->dsize = sizeof(struct bitmap_ipt_elem);
-		set->offset[IPSET_EXT_ID_TIMEOUT] =
-			offsetof(struct bitmap_ipt_elem, timeout);
-
-		if (!init_map_ip(set, map, first_ip, last_ip,
-				 elements, hosts, netmask)) {
-			kfree(map);
-			return -ENOMEM;
-		}
-
+	set->dsize = ip_set_elem_len(set, tb, 0);
+	if (!init_map_ip(set, map, first_ip, last_ip,
+			 elements, hosts, netmask)) {
+		kfree(map);
+		return -ENOMEM;
+	}
+	if (tb[IPSET_ATTR_TIMEOUT]) {
 		set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
-		set->extensions |= IPSET_EXT_TIMEOUT;
-
 		bitmap_ip_gc_init(set, bitmap_ip_gc);
-	} else {
-		set->dsize = 0;
-		if (!init_map_ip(set, map, first_ip, last_ip,
-				 elements, hosts, netmask)) {
-			kfree(map);
-			return -ENOMEM;
-		}
 	}
 	return 0;
 }
diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
index 74576cb19264..654a97bedfe9 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
@@ -289,37 +289,6 @@ bitmap_ipmac_same_set(const struct ip_set *a, const struct ip_set *b)
 
 /* Plain variant */
 
-/* Timeout variant */
-
-struct bitmap_ipmact_elem {
-	struct {
-		unsigned char ether[ETH_ALEN];
-		unsigned char filled;
-	} __attribute__ ((aligned));
-	unsigned long timeout;
-};
-
-/* Plain variant with counter */
-
-struct bitmap_ipmacc_elem {
-	struct {
-		unsigned char ether[ETH_ALEN];
-		unsigned char filled;
-	} __attribute__ ((aligned));
-	struct ip_set_counter counter;
-};
-
-/* Timeout variant with counter */
-
-struct bitmap_ipmacct_elem {
-	struct {
-		unsigned char ether[ETH_ALEN];
-		unsigned char filled;
-	} __attribute__ ((aligned));
-	unsigned long timeout;
-	struct ip_set_counter counter;
-};
-
 #include "ip_set_bitmap_gen.h"
 
 /* Create bitmap:ip,mac type of sets */
@@ -328,7 +297,7 @@ static bool
 init_map_ipmac(struct ip_set *set, struct bitmap_ipmac *map,
 	       u32 first_ip, u32 last_ip, u32 elements)
 {
-	map->members = ip_set_alloc((last_ip - first_ip + 1) * set->dsize);
+	map->members = ip_set_alloc(map->memsize);
 	if (!map->members)
 		return false;
 	if (set->dsize) {
@@ -353,7 +322,7 @@ static int
 bitmap_ipmac_create(struct ip_set *set, struct nlattr *tb[],
 		    u32 flags)
 {
-	u32 first_ip = 0, last_ip = 0, cadt_flags = 0;
+	u32 first_ip = 0, last_ip = 0;
 	u64 elements;
 	struct bitmap_ipmac *map;
 	int ret;
@@ -397,57 +366,15 @@ bitmap_ipmac_create(struct ip_set *set, struct nlattr *tb[],
 
 	map->memsize = bitmap_bytes(0, elements - 1);
 	set->variant = &bitmap_ipmac;
-	if (tb[IPSET_ATTR_CADT_FLAGS])
-		cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
-	if (cadt_flags & IPSET_FLAG_WITH_COUNTERS) {
-		set->extensions |= IPSET_EXT_COUNTER;
-		if (tb[IPSET_ATTR_TIMEOUT]) {
-			set->dsize = sizeof(struct bitmap_ipmacct_elem);
-			set->offset[IPSET_EXT_ID_TIMEOUT] =
-				offsetof(struct bitmap_ipmacct_elem, timeout);
-			set->offset[IPSET_EXT_ID_COUNTER] =
-				offsetof(struct bitmap_ipmacct_elem, counter);
-
-			if (!init_map_ipmac(set, map, first_ip, last_ip,
-					    elements)) {
-				kfree(map);
-				return -ENOMEM;
-			}
-			set->timeout = ip_set_timeout_uget(
-				tb[IPSET_ATTR_TIMEOUT]);
-			set->extensions |= IPSET_EXT_TIMEOUT;
-			bitmap_ipmac_gc_init(set, bitmap_ipmac_gc);
-		} else {
-			set->dsize = sizeof(struct bitmap_ipmacc_elem);
-			set->offset[IPSET_EXT_ID_COUNTER] =
-				offsetof(struct bitmap_ipmacc_elem, counter);
-
-			if (!init_map_ipmac(set, map, first_ip, last_ip,
-					    elements)) {
-				kfree(map);
-				return -ENOMEM;
-			}
-		}
-	} else if (tb[IPSET_ATTR_TIMEOUT]) {
-		set->dsize = sizeof(struct bitmap_ipmact_elem);
-		set->offset[IPSET_EXT_ID_TIMEOUT] =
-			offsetof(struct bitmap_ipmact_elem, timeout);
-
-		if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) {
-			kfree(map);
-			return -ENOMEM;
-		}
+	set->dsize = ip_set_elem_len(set, tb,
+				     sizeof(struct bitmap_ipmac_elem));
+	if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) {
+		kfree(map);
+		return -ENOMEM;
+	}
+	if (tb[IPSET_ATTR_TIMEOUT]) {
 		set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
-		set->extensions |= IPSET_EXT_TIMEOUT;
 		bitmap_ipmac_gc_init(set, bitmap_ipmac_gc);
-	} else {
-		set->dsize = sizeof(struct bitmap_ipmac_elem);
-
-		if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) {
-			kfree(map);
-			return -ENOMEM;
-		}
-		set->variant = &bitmap_ipmac;
 	}
 	return 0;
 }
diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c
index 71da31935499..1ef2f3186b80 100644
--- a/net/netfilter/ipset/ip_set_bitmap_port.c
+++ b/net/netfilter/ipset/ip_set_bitmap_port.c
@@ -198,25 +198,6 @@ bitmap_port_same_set(const struct ip_set *a, const struct ip_set *b)
 struct bitmap_port_elem {
 };
 
-/* Timeout variant */
-
-struct bitmap_portt_elem {
-	unsigned long timeout;
-};
-
-/* Plain variant with counter */
-
-struct bitmap_portc_elem {
-	struct ip_set_counter counter;
-};
-
-/* Timeout variant with counter */
-
-struct bitmap_portct_elem {
-	unsigned long timeout;
-	struct ip_set_counter counter;
-};
-
 #include "ip_set_bitmap_gen.h"
 
 /* Create bitmap:ip type of sets */
@@ -250,7 +231,6 @@ bitmap_port_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
 {
 	struct bitmap_port *map;
 	u16 first_port, last_port;
-	u32 cadt_flags = 0;
 
 	if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
 		     !ip_set_attr_netorder(tb, IPSET_ATTR_PORT_TO) ||
@@ -274,53 +254,14 @@ bitmap_port_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
 	map->elements = last_port - first_port + 1;
 	map->memsize = map->elements * sizeof(unsigned long);
 	set->variant = &bitmap_port;
-	if (tb[IPSET_ATTR_CADT_FLAGS])
-		cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
-	if (cadt_flags & IPSET_FLAG_WITH_COUNTERS) {
-		set->extensions |= IPSET_EXT_COUNTER;
-		if (tb[IPSET_ATTR_TIMEOUT]) {
-			set->dsize = sizeof(struct bitmap_portct_elem);
-			set->offset[IPSET_EXT_ID_TIMEOUT] =
-				offsetof(struct bitmap_portct_elem, timeout);
-			set->offset[IPSET_EXT_ID_COUNTER] =
-				offsetof(struct bitmap_portct_elem, counter);
-			if (!init_map_port(set, map, first_port, last_port)) {
-				kfree(map);
-				return -ENOMEM;
-			}
-
-			set->timeout =
-				ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
-			set->extensions |= IPSET_EXT_TIMEOUT;
-			bitmap_port_gc_init(set, bitmap_port_gc);
-		} else {
-			set->dsize = sizeof(struct bitmap_portc_elem);
-			set->offset[IPSET_EXT_ID_COUNTER] =
-				offsetof(struct bitmap_portc_elem, counter);
-			if (!init_map_port(set, map, first_port, last_port)) {
-				kfree(map);
-				return -ENOMEM;
-			}
-		}
-	} else if (tb[IPSET_ATTR_TIMEOUT]) {
-		set->dsize = sizeof(struct bitmap_portt_elem);
-		set->offset[IPSET_EXT_ID_TIMEOUT] =
-			offsetof(struct bitmap_portt_elem, timeout);
-		if (!init_map_port(set, map, first_port, last_port)) {
-			kfree(map);
-			return -ENOMEM;
-		}
-
+	set->dsize = ip_set_elem_len(set, tb, 0);
+	if (!init_map_port(set, map, first_port, last_port)) {
+		kfree(map);
+		return -ENOMEM;
+	}
+	if (tb[IPSET_ATTR_TIMEOUT]) {
 		set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
-		set->extensions |= IPSET_EXT_TIMEOUT;
 		bitmap_port_gc_init(set, bitmap_port_gc);
-	} else {
-		set->dsize = 0;
-		if (!init_map_port(set, map, first_port, last_port)) {
-			kfree(map);
-			return -ENOMEM;
-		}
-
 	}
 	return 0;
 }
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 428c30a8586f..f35afed3814f 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -315,6 +315,52 @@ ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr)
 }
 EXPORT_SYMBOL_GPL(ip_set_get_ipaddr6);
 
+/* ipset data extension types, in size order */
+
+const struct ip_set_ext_type ip_set_extensions[] = {
+	[IPSET_EXT_ID_COUNTER] = {
+		.type	= IPSET_EXT_COUNTER,
+		.flag	= IPSET_FLAG_WITH_COUNTERS,
+		.len	= sizeof(struct ip_set_counter),
+		.align	= __alignof__(struct ip_set_counter),
+	},
+	[IPSET_EXT_ID_TIMEOUT] = {
+		.type	= IPSET_EXT_TIMEOUT,
+		.len	= sizeof(unsigned long),
+		.align	= __alignof__(unsigned long),
+	},
+};
+EXPORT_SYMBOL_GPL(ip_set_extensions);
+
+static inline bool
+add_extension(enum ip_set_ext_id id, u32 flags, struct nlattr *tb[])
+{
+	return ip_set_extensions[id].flag ?
+		(flags & ip_set_extensions[id].flag) :
+		!!tb[IPSET_ATTR_TIMEOUT];
+}
+
+size_t
+ip_set_elem_len(struct ip_set *set, struct nlattr *tb[], size_t len)
+{
+	enum ip_set_ext_id id;
+	size_t offset = 0;
+	u32 cadt_flags = 0;
+
+	if (tb[IPSET_ATTR_CADT_FLAGS])
+		cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+	for (id = 0; id < IPSET_EXT_ID_MAX; id++) {
+		if (!add_extension(id, cadt_flags, tb))
+			continue;
+		offset += ALIGN(len + offset, ip_set_extensions[id].align);
+		set->offset[id] = offset;
+		set->extensions |= ip_set_extensions[id].type;
+		offset += ip_set_extensions[id].len;
+	}
+	return len + offset;
+}
+EXPORT_SYMBOL_GPL(ip_set_elem_len);
+
 int
 ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[],
 		      struct ip_set_ext *ext)
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index 0cb840e1f8ae..3999f1719f69 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -960,7 +960,6 @@ static int
 IPSET_TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags)
 {
 	u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM;
-	u32 cadt_flags = 0;
 	u8 hbits;
 #ifdef IP_SET_HASH_WITH_NETMASK
 	u8 netmask;
@@ -1034,88 +1033,23 @@ IPSET_TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags)
 	rcu_assign_pointer(h->table, t);
 
 	set->data = h;
-	if (set->family ==  NFPROTO_IPV4)
+	if (set->family ==  NFPROTO_IPV4) {
 		set->variant = &IPSET_TOKEN(HTYPE, 4_variant);
-	else
+		set->dsize = ip_set_elem_len(set, tb,
+				sizeof(struct IPSET_TOKEN(HTYPE, 4_elem)));
+	} else {
 		set->variant = &IPSET_TOKEN(HTYPE, 6_variant);
-
-	if (tb[IPSET_ATTR_CADT_FLAGS])
-		cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
-	if (cadt_flags & IPSET_FLAG_WITH_COUNTERS) {
-		set->extensions |= IPSET_EXT_COUNTER;
-		if (tb[IPSET_ATTR_TIMEOUT]) {
-			set->timeout =
-				ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
-			set->extensions |= IPSET_EXT_TIMEOUT;
-			if (set->family == NFPROTO_IPV4) {
-				set->dsize = sizeof(struct
-					IPSET_TOKEN(HTYPE, 4ct_elem));
-				set->offset[IPSET_EXT_ID_TIMEOUT] =
-					offsetof(struct
-						IPSET_TOKEN(HTYPE, 4ct_elem),
-						timeout);
-				set->offset[IPSET_EXT_ID_COUNTER] =
-					offsetof(struct
-						IPSET_TOKEN(HTYPE, 4ct_elem),
-						counter);
-				IPSET_TOKEN(HTYPE, 4_gc_init)(set,
-					IPSET_TOKEN(HTYPE, 4_gc));
-			} else {
-				set->dsize = sizeof(struct
-					IPSET_TOKEN(HTYPE, 6ct_elem));
-				set->offset[IPSET_EXT_ID_TIMEOUT] =
-					offsetof(struct
-						IPSET_TOKEN(HTYPE, 6ct_elem),
-						timeout);
-				set->offset[IPSET_EXT_ID_COUNTER] =
-					offsetof(struct
-						IPSET_TOKEN(HTYPE, 6ct_elem),
-						counter);
-				IPSET_TOKEN(HTYPE, 6_gc_init)(set,
-					IPSET_TOKEN(HTYPE, 6_gc));
-			}
-		} else {
-			if (set->family == NFPROTO_IPV4) {
-				set->dsize =
-					sizeof(struct
-						IPSET_TOKEN(HTYPE, 4c_elem));
-				set->offset[IPSET_EXT_ID_COUNTER] =
-					offsetof(struct
-						IPSET_TOKEN(HTYPE, 4c_elem),
-						counter);
-			} else {
-				set->dsize =
-					sizeof(struct
-						IPSET_TOKEN(HTYPE, 6c_elem));
-				set->offset[IPSET_EXT_ID_COUNTER] =
-					offsetof(struct
-						IPSET_TOKEN(HTYPE, 6c_elem),
-						counter);
-			}
-		}
-	} else if (tb[IPSET_ATTR_TIMEOUT]) {
+		set->dsize = ip_set_elem_len(set, tb,
+				sizeof(struct IPSET_TOKEN(HTYPE, 6_elem)));
+	}
+	if (tb[IPSET_ATTR_TIMEOUT]) {
 		set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
-		set->extensions |= IPSET_EXT_TIMEOUT;
-		if (set->family == NFPROTO_IPV4) {
-			set->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 4t_elem));
-			set->offset[IPSET_EXT_ID_TIMEOUT] =
-				offsetof(struct IPSET_TOKEN(HTYPE, 4t_elem),
-					 timeout);
+		if (set->family == NFPROTO_IPV4)
 			IPSET_TOKEN(HTYPE, 4_gc_init)(set,
 				IPSET_TOKEN(HTYPE, 4_gc));
-		} else {
-			set->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 6t_elem));
-			set->offset[IPSET_EXT_ID_TIMEOUT] =
-				offsetof(struct IPSET_TOKEN(HTYPE, 6t_elem),
-					 timeout);
+		else
 			IPSET_TOKEN(HTYPE, 6_gc_init)(set,
 				IPSET_TOKEN(HTYPE, 6_gc));
-		}
-	} else {
-		if (set->family == NFPROTO_IPV4)
-			set->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 4_elem));
-		else
-			set->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 6_elem));
 	}
 
 	pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n",
diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c
index bbde7c304622..a111ffe40b46 100644
--- a/net/netfilter/ipset/ip_set_hash_ip.c
+++ b/net/netfilter/ipset/ip_set_hash_ip.c
@@ -35,7 +35,7 @@ MODULE_ALIAS("ip_set_hash:ip");
 #define HTYPE		hash_ip
 #define IP_SET_HASH_WITH_NETMASK
 
-/* IPv4 variants */
+/* IPv4 variant */
 
 /* Member elements */
 struct hash_ip4_elem {
@@ -43,22 +43,6 @@ struct hash_ip4_elem {
 	__be32 ip;
 };
 
-struct hash_ip4t_elem {
-	__be32 ip;
-	unsigned long timeout;
-};
-
-struct hash_ip4c_elem {
-	__be32 ip;
-	struct ip_set_counter counter;
-};
-
-struct hash_ip4ct_elem {
-	__be32 ip;
-	struct ip_set_counter counter;
-	unsigned long timeout;
-};
-
 /* Common functions */
 
 static inline bool
@@ -178,29 +162,13 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
 	return ret;
 }
 
-/* IPv6 variants */
+/* IPv6 variant */
 
 /* Member elements */
 struct hash_ip6_elem {
 	union nf_inet_addr ip;
 };
 
-struct hash_ip6t_elem {
-	union nf_inet_addr ip;
-	unsigned long timeout;
-};
-
-struct hash_ip6c_elem {
-	union nf_inet_addr ip;
-	struct ip_set_counter counter;
-};
-
-struct hash_ip6ct_elem {
-	union nf_inet_addr ip;
-	struct ip_set_counter counter;
-	unsigned long timeout;
-};
-
 /* Common functions */
 
 static inline bool
diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c
index dd175d6f3965..5dc735c4dac2 100644
--- a/net/netfilter/ipset/ip_set_hash_ipport.c
+++ b/net/netfilter/ipset/ip_set_hash_ipport.c
@@ -36,7 +36,7 @@ MODULE_ALIAS("ip_set_hash:ip,port");
 /* Type specific function prefix */
 #define HTYPE		hash_ipport
 
-/* IPv4 variants */
+/* IPv4 variant */
 
 /* Member elements */
 struct hash_ipport4_elem {
@@ -46,31 +46,6 @@ struct hash_ipport4_elem {
 	u8 padding;
 };
 
-struct hash_ipport4t_elem {
-	__be32 ip;
-	__be16 port;
-	u8 proto;
-	u8 padding;
-	unsigned long timeout;
-};
-
-struct hash_ipport4c_elem {
-	__be32 ip;
-	__be16 port;
-	u8 proto;
-	u8 padding;
-	struct ip_set_counter counter;
-};
-
-struct hash_ipport4ct_elem {
-	__be32 ip;
-	__be16 port;
-	u8 proto;
-	u8 padding;
-	struct ip_set_counter counter;
-	unsigned long timeout;
-};
-
 /* Common functions */
 
 static inline bool
@@ -221,7 +196,7 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
 	return ret;
 }
 
-/* IPv6 variants */
+/* IPv6 variant */
 
 struct hash_ipport6_elem {
 	union nf_inet_addr ip;
@@ -230,31 +205,6 @@ struct hash_ipport6_elem {
 	u8 padding;
 };
 
-struct hash_ipport6t_elem {
-	union nf_inet_addr ip;
-	__be16 port;
-	u8 proto;
-	u8 padding;
-	unsigned long timeout;
-};
-
-struct hash_ipport6c_elem {
-	union nf_inet_addr ip;
-	__be16 port;
-	u8 proto;
-	u8 padding;
-	struct ip_set_counter counter;
-};
-
-struct hash_ipport6ct_elem {
-	union nf_inet_addr ip;
-	__be16 port;
-	u8 proto;
-	u8 padding;
-	struct ip_set_counter counter;
-	unsigned long timeout;
-};
-
 /* Common functions */
 
 static inline bool
diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c
index 87a2cfab2568..8c43dc7811cb 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportip.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportip.c
@@ -36,7 +36,7 @@ MODULE_ALIAS("ip_set_hash:ip,port,ip");
 /* Type specific function prefix */
 #define HTYPE		hash_ipportip
 
-/* IPv4 variants */
+/* IPv4 variant */
 
 /* Member elements  */
 struct hash_ipportip4_elem {
@@ -47,34 +47,6 @@ struct hash_ipportip4_elem {
 	u8 padding;
 };
 
-struct hash_ipportip4t_elem {
-	__be32 ip;
-	__be32 ip2;
-	__be16 port;
-	u8 proto;
-	u8 padding;
-	unsigned long timeout;
-};
-
-struct hash_ipportip4c_elem {
-	__be32 ip;
-	__be32 ip2;
-	__be16 port;
-	u8 proto;
-	u8 padding;
-	struct ip_set_counter counter;
-};
-
-struct hash_ipportip4ct_elem {
-	__be32 ip;
-	__be32 ip2;
-	__be16 port;
-	u8 proto;
-	u8 padding;
-	struct ip_set_counter counter;
-	unsigned long timeout;
-};
-
 static inline bool
 hash_ipportip4_data_equal(const struct hash_ipportip4_elem *ip1,
 			  const struct hash_ipportip4_elem *ip2,
@@ -230,7 +202,7 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
 	return ret;
 }
 
-/* IPv6 variants */
+/* IPv6 variant */
 
 struct hash_ipportip6_elem {
 	union nf_inet_addr ip;
@@ -240,34 +212,6 @@ struct hash_ipportip6_elem {
 	u8 padding;
 };
 
-struct hash_ipportip6t_elem {
-	union nf_inet_addr ip;
-	union nf_inet_addr ip2;
-	__be16 port;
-	u8 proto;
-	u8 padding;
-	unsigned long timeout;
-};
-
-struct hash_ipportip6c_elem {
-	union nf_inet_addr ip;
-	union nf_inet_addr ip2;
-	__be16 port;
-	u8 proto;
-	u8 padding;
-	struct ip_set_counter counter;
-};
-
-struct hash_ipportip6ct_elem {
-	union nf_inet_addr ip;
-	union nf_inet_addr ip2;
-	__be16 port;
-	u8 proto;
-	u8 padding;
-	struct ip_set_counter counter;
-	unsigned long timeout;
-};
-
 /* Common functions */
 
 static inline bool
diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c
index 0b9a28d7c740..34890452366c 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c
@@ -46,7 +46,7 @@ MODULE_ALIAS("ip_set_hash:ip,port,net");
 #define IP_SET_HASH_WITH_PROTO
 #define IP_SET_HASH_WITH_NETS
 
-/* IPv4 variants */
+/* IPv4 variant */
 
 /* Member elements */
 struct hash_ipportnet4_elem {
@@ -58,37 +58,6 @@ struct hash_ipportnet4_elem {
 	u8 proto;
 };
 
-struct hash_ipportnet4t_elem {
-	__be32 ip;
-	__be32 ip2;
-	__be16 port;
-	u8 cidr:7;
-	u8 nomatch:1;
-	u8 proto;
-	unsigned long timeout;
-};
-
-struct hash_ipportnet4c_elem {
-	__be32 ip;
-	__be32 ip2;
-	__be16 port;
-	u8 cidr:7;
-	u8 nomatch:1;
-	u8 proto;
-	struct ip_set_counter counter;
-};
-
-struct hash_ipportnet4ct_elem {
-	__be32 ip;
-	__be32 ip2;
-	__be16 port;
-	u8 cidr:7;
-	u8 nomatch:1;
-	u8 proto;
-	struct ip_set_counter counter;
-	unsigned long timeout;
-};
-
 /* Common functions */
 
 static inline bool
@@ -328,7 +297,7 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
 	return ret;
 }
 
-/* IPv6 variants */
+/* IPv6 variant */
 
 struct hash_ipportnet6_elem {
 	union nf_inet_addr ip;
@@ -339,37 +308,6 @@ struct hash_ipportnet6_elem {
 	u8 proto;
 };
 
-struct hash_ipportnet6t_elem {
-	union nf_inet_addr ip;
-	union nf_inet_addr ip2;
-	__be16 port;
-	u8 cidr:7;
-	u8 nomatch:1;
-	u8 proto;
-	unsigned long timeout;
-};
-
-struct hash_ipportnet6c_elem {
-	union nf_inet_addr ip;
-	union nf_inet_addr ip2;
-	__be16 port;
-	u8 cidr:7;
-	u8 nomatch:1;
-	u8 proto;
-	struct ip_set_counter counter;
-};
-
-struct hash_ipportnet6ct_elem {
-	union nf_inet_addr ip;
-	union nf_inet_addr ip2;
-	__be16 port;
-	u8 cidr:7;
-	u8 nomatch:1;
-	u8 proto;
-	struct ip_set_counter counter;
-	unsigned long timeout;
-};
-
 /* Common functions */
 
 static inline bool
diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c
index 1d4caa50dacb..d5598557f4a9 100644
--- a/net/netfilter/ipset/ip_set_hash_net.c
+++ b/net/netfilter/ipset/ip_set_hash_net.c
@@ -36,7 +36,7 @@ MODULE_ALIAS("ip_set_hash:net");
 #define HTYPE		hash_net
 #define IP_SET_HASH_WITH_NETS
 
-/* IPv4 variants */
+/* IPv4 variant */
 
 /* Member elements  */
 struct hash_net4_elem {
@@ -46,31 +46,6 @@ struct hash_net4_elem {
 	u8 cidr;
 };
 
-struct hash_net4t_elem {
-	__be32 ip;
-	u16 padding0;
-	u8 nomatch;
-	u8 cidr;
-	unsigned long timeout;
-};
-
-struct hash_net4c_elem {
-	__be32 ip;
-	u16 padding0;
-	u8 nomatch;
-	u8 cidr;
-	struct ip_set_counter counter;
-};
-
-struct hash_net4ct_elem {
-	__be32 ip;
-	u16 padding0;
-	u8 nomatch;
-	u8 cidr;
-	struct ip_set_counter counter;
-	unsigned long timeout;
-};
-
 /* Common functions */
 
 static inline bool
@@ -228,7 +203,7 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
 	return ret;
 }
 
-/* IPv6 variants */
+/* IPv6 variant */
 
 struct hash_net6_elem {
 	union nf_inet_addr ip;
@@ -237,31 +212,6 @@ struct hash_net6_elem {
 	u8 cidr;
 };
 
-struct hash_net6t_elem {
-	union nf_inet_addr ip;
-	u16 padding0;
-	u8 nomatch;
-	u8 cidr;
-	unsigned long timeout;
-};
-
-struct hash_net6c_elem {
-	union nf_inet_addr ip;
-	u16 padding0;
-	u8 nomatch;
-	u8 cidr;
-	struct ip_set_counter counter;
-};
-
-struct hash_net6ct_elem {
-	union nf_inet_addr ip;
-	u16 padding0;
-	u8 nomatch;
-	u8 cidr;
-	struct ip_set_counter counter;
-	unsigned long timeout;
-};
-
 /* Common functions */
 
 static inline bool
diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c
index 2f0ffe35c408..26703e9e5082 100644
--- a/net/netfilter/ipset/ip_set_hash_netiface.c
+++ b/net/netfilter/ipset/ip_set_hash_netiface.c
@@ -134,7 +134,7 @@ iface_add(struct rb_root *root, const char **iface)
 
 #define STREQ(a, b)	(strcmp(a, b) == 0)
 
-/* IPv4 variants */
+/* IPv4 variant */
 
 struct hash_netiface4_elem_hashed {
 	__be32 ip;
@@ -144,7 +144,7 @@ struct hash_netiface4_elem_hashed {
 	u8 elem;
 };
 
-/* Member elements without timeout */
+/* Member elements */
 struct hash_netiface4_elem {
 	__be32 ip;
 	u8 physdev;
@@ -154,37 +154,6 @@ struct hash_netiface4_elem {
 	const char *iface;
 };
 
-struct hash_netiface4t_elem {
-	__be32 ip;
-	u8 physdev;
-	u8 cidr;
-	u8 nomatch;
-	u8 elem;
-	const char *iface;
-	unsigned long timeout;
-};
-
-struct hash_netiface4c_elem {
-	__be32 ip;
-	u8 physdev;
-	u8 cidr;
-	u8 nomatch;
-	u8 elem;
-	const char *iface;
-	struct ip_set_counter counter;
-};
-
-struct hash_netiface4ct_elem {
-	__be32 ip;
-	u8 physdev;
-	u8 cidr;
-	u8 nomatch;
-	u8 elem;
-	const char *iface;
-	struct ip_set_counter counter;
-	unsigned long timeout;
-};
-
 /* Common functions */
 
 static inline bool
@@ -399,7 +368,7 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
 	return ret;
 }
 
-/* IPv6 variants */
+/* IPv6 variant */
 
 struct hash_netiface6_elem_hashed {
 	union nf_inet_addr ip;
@@ -418,37 +387,6 @@ struct hash_netiface6_elem {
 	const char *iface;
 };
 
-struct hash_netiface6t_elem {
-	union nf_inet_addr ip;
-	u8 physdev;
-	u8 cidr;
-	u8 nomatch;
-	u8 elem;
-	const char *iface;
-	unsigned long timeout;
-};
-
-struct hash_netiface6c_elem {
-	union nf_inet_addr ip;
-	u8 physdev;
-	u8 cidr;
-	u8 nomatch;
-	u8 elem;
-	const char *iface;
-	struct ip_set_counter counter;
-};
-
-struct hash_netiface6ct_elem {
-	union nf_inet_addr ip;
-	u8 physdev;
-	u8 cidr;
-	u8 nomatch;
-	u8 elem;
-	const char *iface;
-	struct ip_set_counter counter;
-	unsigned long timeout;
-};
-
 /* Common functions */
 
 static inline bool
diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c
index cab236625f97..45b6e91b0636 100644
--- a/net/netfilter/ipset/ip_set_hash_netport.c
+++ b/net/netfilter/ipset/ip_set_hash_netport.c
@@ -45,7 +45,7 @@ MODULE_ALIAS("ip_set_hash:net,port");
  */
 #define IP_SET_HASH_WITH_NETS_PACKED
 
-/* IPv4 variants */
+/* IPv4 variant */
 
 /* Member elements */
 struct hash_netport4_elem {
@@ -56,34 +56,6 @@ struct hash_netport4_elem {
 	u8 nomatch:1;
 };
 
-struct hash_netport4t_elem {
-	__be32 ip;
-	__be16 port;
-	u8 proto;
-	u8 cidr:7;
-	u8 nomatch:1;
-	unsigned long timeout;
-};
-
-struct hash_netport4c_elem {
-	__be32 ip;
-	__be16 port;
-	u8 proto;
-	u8 cidr:7;
-	u8 nomatch:1;
-	struct ip_set_counter counter;
-};
-
-struct hash_netport4ct_elem {
-	__be32 ip;
-	__be16 port;
-	u8 proto;
-	u8 cidr:7;
-	u8 nomatch:1;
-	struct ip_set_counter counter;
-	unsigned long timeout;
-};
-
 /* Common functions */
 
 static inline bool
@@ -287,7 +259,7 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
 	return ret;
 }
 
-/* IPv6 variants */
+/* IPv6 variant */
 
 struct hash_netport6_elem {
 	union nf_inet_addr ip;
@@ -297,34 +269,6 @@ struct hash_netport6_elem {
 	u8 nomatch:1;
 };
 
-struct hash_netport6t_elem {
-	union nf_inet_addr ip;
-	__be16 port;
-	u8 proto;
-	u8 cidr:7;
-	u8 nomatch:1;
-	unsigned long timeout;
-};
-
-struct hash_netport6c_elem {
-	union nf_inet_addr ip;
-	__be16 port;
-	u8 proto;
-	u8 cidr:7;
-	u8 nomatch:1;
-	struct ip_set_counter counter;
-};
-
-struct hash_netport6ct_elem {
-	union nf_inet_addr ip;
-	__be16 port;
-	u8 proto;
-	u8 cidr:7;
-	u8 nomatch:1;
-	struct ip_set_counter counter;
-	unsigned long timeout;
-};
-
 /* Common functions */
 
 static inline bool
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index f22d05d6e366..7fd11c79aff4 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -28,28 +28,6 @@ struct set_elem {
 	ip_set_id_t id;
 };
 
-struct sett_elem {
-	struct {
-		ip_set_id_t id;
-	} __attribute__ ((aligned));
-	unsigned long timeout;
-};
-
-struct setc_elem {
-	struct {
-		ip_set_id_t id;
-	} __attribute__ ((aligned));
-	struct ip_set_counter counter;
-};
-
-struct setct_elem {
-	struct {
-		ip_set_id_t id;
-	} __attribute__ ((aligned));
-	struct ip_set_counter counter;
-	unsigned long timeout;
-};
-
 struct set_adt_elem {
 	ip_set_id_t id;
 	ip_set_id_t refid;
@@ -600,21 +578,18 @@ list_set_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set))
 
 /* Create list:set type of sets */
 
-static struct list_set *
-init_list_set(struct ip_set *set, u32 size, size_t dsize,
-	      unsigned long timeout)
+static bool
+init_list_set(struct ip_set *set, u32 size)
 {
 	struct list_set *map;
 	struct set_elem *e;
 	u32 i;
 
-	map = kzalloc(sizeof(*map) + size * dsize, GFP_KERNEL);
+	map = kzalloc(sizeof(*map) + size * set->dsize, GFP_KERNEL);
 	if (!map)
-		return NULL;
+		return false;
 
 	map->size = size;
-	set->dsize = dsize;
-	set->timeout = timeout;
 	set->data = map;
 
 	for (i = 0; i < size; i++) {
@@ -622,15 +597,13 @@ init_list_set(struct ip_set *set, u32 size, size_t dsize,
 		e->id = IPSET_INVALID_ID;
 	}
 
-	return map;
+	return true;
 }
 
 static int
 list_set_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
 {
-	struct list_set *map;
-	u32 size = IP_SET_LIST_DEFAULT_SIZE, cadt_flags = 0;
-	unsigned long timeout = 0;
+	u32 size = IP_SET_LIST_DEFAULT_SIZE;
 
 	if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_SIZE) ||
 		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
@@ -642,45 +615,13 @@ list_set_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
 	if (size < IP_SET_LIST_MIN_SIZE)
 		size = IP_SET_LIST_MIN_SIZE;
 
-	if (tb[IPSET_ATTR_CADT_FLAGS])
-		cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
-	if (tb[IPSET_ATTR_TIMEOUT])
-		timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
 	set->variant = &set_variant;
-	if (cadt_flags & IPSET_FLAG_WITH_COUNTERS) {
-		set->extensions |= IPSET_EXT_COUNTER;
-		if (tb[IPSET_ATTR_TIMEOUT]) {
-			map = init_list_set(set, size,
-					sizeof(struct setct_elem), timeout);
-			if (!map)
-				return -ENOMEM;
-			set->extensions |= IPSET_EXT_TIMEOUT;
-			set->offset[IPSET_EXT_ID_TIMEOUT] =
-				offsetof(struct setct_elem, timeout);
-			set->offset[IPSET_EXT_ID_COUNTER] =
-				offsetof(struct setct_elem, counter);
-			list_set_gc_init(set, list_set_gc);
-		} else {
-			map = init_list_set(set, size,
-					    sizeof(struct setc_elem), 0);
-			if (!map)
-				return -ENOMEM;
-			set->offset[IPSET_EXT_ID_COUNTER] =
-				offsetof(struct setc_elem, counter);
-		}
-	} else if (tb[IPSET_ATTR_TIMEOUT]) {
-		map = init_list_set(set, size,
-				    sizeof(struct sett_elem), timeout);
-		if (!map)
-			return -ENOMEM;
-		set->extensions |= IPSET_EXT_TIMEOUT;
-		set->offset[IPSET_EXT_ID_TIMEOUT] =
-			offsetof(struct sett_elem, timeout);
+	set->dsize = ip_set_elem_len(set, tb, sizeof(struct set_elem));
+	if (!init_list_set(set, size))
+		return -ENOMEM;
+	if (tb[IPSET_ATTR_TIMEOUT]) {
+		set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
 		list_set_gc_init(set, list_set_gc);
-	} else {
-		map = init_list_set(set, size, sizeof(struct set_elem), 0);
-		if (!map)
-			return -ENOMEM;
 	}
 	return 0;
 }
-- 
cgit v1.2.3


From 40cd63bf33b21ef4d43776b1d49c605f876fe32c Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Mon, 9 Sep 2013 14:44:29 +0200
Subject: netfilter: ipset: Support extensions which need a per data destroy
 function

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
---
 include/linux/netfilter/ipset/ip_set.h  | 22 +++++++---
 net/netfilter/ipset/ip_set_bitmap_gen.h | 38 ++++++++++++++----
 net/netfilter/ipset/ip_set_hash_gen.h   | 71 +++++++++++++++++++++------------
 net/netfilter/ipset/ip_set_list_set.c   | 19 ++++++---
 4 files changed, 107 insertions(+), 43 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index 66d6bd404d64..6372ee224fe8 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -49,11 +49,13 @@ enum ip_set_feature {
 
 /* Set extensions */
 enum ip_set_extension {
-	IPSET_EXT_NONE = 0,
-	IPSET_EXT_BIT_TIMEOUT = 1,
+	IPSET_EXT_BIT_TIMEOUT = 0,
 	IPSET_EXT_TIMEOUT = (1 << IPSET_EXT_BIT_TIMEOUT),
-	IPSET_EXT_BIT_COUNTER = 2,
+	IPSET_EXT_BIT_COUNTER = 1,
 	IPSET_EXT_COUNTER = (1 << IPSET_EXT_BIT_COUNTER),
+	/* Mark set with an extension which needs to call destroy */
+	IPSET_EXT_BIT_DESTROY = 7,
+	IPSET_EXT_DESTROY = (1 << IPSET_EXT_BIT_DESTROY),
 };
 
 #define SET_WITH_TIMEOUT(s)	((s)->extensions & IPSET_EXT_TIMEOUT)
@@ -68,6 +70,8 @@ enum ip_set_ext_id {
 
 /* Extension type */
 struct ip_set_ext_type {
+	/* Destroy extension private data (can be NULL) */
+	void (*destroy)(void *ext);
 	enum ip_set_extension type;
 	enum ipset_cadt_flags flag;
 	/* Size and minimal alignment */
@@ -88,13 +92,21 @@ struct ip_set_counter {
 	atomic64_t packets;
 };
 
+struct ip_set;
+
+static inline void
+ip_set_ext_destroy(struct ip_set *set, void *data)
+{
+	/* Check that the extension is enabled for the set and
+	 * call it's destroy function for its extension part in data.
+	 */
+}
+
 #define ext_timeout(e, s)	\
 (unsigned long *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_TIMEOUT])
 #define ext_counter(e, s)	\
 (struct ip_set_counter *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_COUNTER])
 
-struct ip_set;
-
 typedef int (*ipset_adtfn)(struct ip_set *set, void *value,
 			   const struct ip_set_ext *ext,
 			   struct ip_set_ext *mext, u32 cmdflags);
diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h
index f32ddbca3923..4515fe8b83dd 100644
--- a/net/netfilter/ipset/ip_set_bitmap_gen.h
+++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
@@ -12,6 +12,7 @@
 #define mtype_gc_test		IPSET_TOKEN(MTYPE, _gc_test)
 #define mtype_is_filled		IPSET_TOKEN(MTYPE, _is_filled)
 #define mtype_do_add		IPSET_TOKEN(MTYPE, _do_add)
+#define mtype_ext_cleanup	IPSET_TOKEN(MTYPE, _ext_cleanup)
 #define mtype_do_del		IPSET_TOKEN(MTYPE, _do_del)
 #define mtype_do_list		IPSET_TOKEN(MTYPE, _do_list)
 #define mtype_do_head		IPSET_TOKEN(MTYPE, _do_head)
@@ -46,6 +47,17 @@ mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set))
 	add_timer(&map->gc);
 }
 
+static void
+mtype_ext_cleanup(struct ip_set *set)
+{
+	struct mtype *map = set->data;
+	u32 id;
+
+	for (id = 0; id < map->elements; id++)
+		if (test_bit(id, map->members))
+			ip_set_ext_destroy(set, get_ext(set, map, id));
+}
+
 static void
 mtype_destroy(struct ip_set *set)
 {
@@ -55,8 +67,11 @@ mtype_destroy(struct ip_set *set)
 		del_timer_sync(&map->gc);
 
 	ip_set_free(map->members);
-	if (set->dsize)
+	if (set->dsize) {
+		if (set->extensions & IPSET_EXT_DESTROY)
+			mtype_ext_cleanup(set);
 		ip_set_free(map->extensions);
+	}
 	kfree(map);
 
 	set->data = NULL;
@@ -67,6 +82,8 @@ mtype_flush(struct ip_set *set)
 {
 	struct mtype *map = set->data;
 
+	if (set->extensions & IPSET_EXT_DESTROY)
+		mtype_ext_cleanup(set);
 	memset(map->members, 0, map->memsize);
 }
 
@@ -132,6 +149,8 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 			ret = 0;
 		else if (!(flags & IPSET_FLAG_EXIST))
 			return -IPSET_ERR_EXIST;
+		/* Element is re-added, cleanup extensions */
+		ip_set_ext_destroy(set, x);
 	}
 
 	if (SET_WITH_TIMEOUT(set))
@@ -152,11 +171,14 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 {
 	struct mtype *map = set->data;
 	const struct mtype_adt_elem *e = value;
-	const void *x = get_ext(set, map, e->id);
+	void *x = get_ext(set, map, e->id);
 
-	if (mtype_do_del(e, map) ||
-	    (SET_WITH_TIMEOUT(set) &&
-	     ip_set_timeout_expired(ext_timeout(x, set))))
+	if (mtype_do_del(e, map))
+		return -IPSET_ERR_EXIST;
+
+	ip_set_ext_destroy(set, x);
+	if (SET_WITH_TIMEOUT(set) &&
+	    ip_set_timeout_expired(ext_timeout(x, set)))
 		return -IPSET_ERR_EXIST;
 
 	return 0;
@@ -235,7 +257,7 @@ mtype_gc(unsigned long ul_set)
 {
 	struct ip_set *set = (struct ip_set *) ul_set;
 	struct mtype *map = set->data;
-	const void *x;
+	void *x;
 	u32 id;
 
 	/* We run parallel with other readers (test element)
@@ -244,8 +266,10 @@ mtype_gc(unsigned long ul_set)
 	for (id = 0; id < map->elements; id++)
 		if (mtype_gc_test(id, map, set->dsize)) {
 			x = get_ext(set, map, id);
-			if (ip_set_timeout_expired(ext_timeout(x, set)))
+			if (ip_set_timeout_expired(ext_timeout(x, set))) {
 				clear_bit(id, map->members);
+				ip_set_ext_destroy(set, x);
+			}
 		}
 	read_unlock_bh(&set->lock);
 
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index 3999f1719f69..3c26e5b946f5 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -117,23 +117,6 @@ htable_bits(u32 hashsize)
 	return bits;
 }
 
-/* Destroy the hashtable part of the set */
-static void
-ahash_destroy(struct htable *t)
-{
-	struct hbucket *n;
-	u32 i;
-
-	for (i = 0; i < jhash_size(t->htable_bits); i++) {
-		n = hbucket(t, i);
-		if (n->size)
-			/* FIXME: use slab cache */
-			kfree(n->value);
-	}
-
-	ip_set_free(t);
-}
-
 static int
 hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize)
 {
@@ -192,6 +175,8 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize)
 #undef mtype_data_next
 #undef mtype_elem
 
+#undef mtype_ahash_destroy
+#undef mtype_ext_cleanup
 #undef mtype_add_cidr
 #undef mtype_del_cidr
 #undef mtype_ahash_memsize
@@ -230,6 +215,8 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize)
 #define mtype_data_list		IPSET_TOKEN(MTYPE, _data_list)
 #define mtype_data_next		IPSET_TOKEN(MTYPE, _data_next)
 #define mtype_elem		IPSET_TOKEN(MTYPE, _elem)
+#define mtype_ahash_destroy	IPSET_TOKEN(MTYPE, _ahash_destroy)
+#define mtype_ext_cleanup	IPSET_TOKEN(MTYPE, _ext_cleanup)
 #define mtype_add_cidr		IPSET_TOKEN(MTYPE, _add_cidr)
 #define mtype_del_cidr		IPSET_TOKEN(MTYPE, _del_cidr)
 #define mtype_ahash_memsize	IPSET_TOKEN(MTYPE, _ahash_memsize)
@@ -359,6 +346,19 @@ mtype_ahash_memsize(const struct htype *h, const struct htable *t,
 	return memsize;
 }
 
+/* Get the ith element from the array block n */
+#define ahash_data(n, i, dsize)	\
+	((struct mtype_elem *)((n)->value + ((i) * (dsize))))
+
+static void
+mtype_ext_cleanup(struct ip_set *set, struct hbucket *n)
+{
+	int i;
+
+	for (i = 0; i < n->pos; i++)
+		ip_set_ext_destroy(set, ahash_data(n, i, set->dsize));
+}
+
 /* Flush a hash type of set: destroy all elements */
 static void
 mtype_flush(struct ip_set *set)
@@ -372,6 +372,8 @@ mtype_flush(struct ip_set *set)
 	for (i = 0; i < jhash_size(t->htable_bits); i++) {
 		n = hbucket(t, i);
 		if (n->size) {
+			if (set->extensions & IPSET_EXT_DESTROY)
+				mtype_ext_cleanup(set, n);
 			n->size = n->pos = 0;
 			/* FIXME: use slab cache */
 			kfree(n->value);
@@ -383,6 +385,26 @@ mtype_flush(struct ip_set *set)
 	h->elements = 0;
 }
 
+/* Destroy the hashtable part of the set */
+static void
+mtype_ahash_destroy(struct ip_set *set, struct htable *t)
+{
+	struct hbucket *n;
+	u32 i;
+
+	for (i = 0; i < jhash_size(t->htable_bits); i++) {
+		n = hbucket(t, i);
+		if (n->size) {
+			if (set->extensions & IPSET_EXT_DESTROY)
+				mtype_ext_cleanup(set, n);
+			/* FIXME: use slab cache */
+			kfree(n->value);
+		}
+	}
+
+	ip_set_free(t);
+}
+
 /* Destroy a hash type of set */
 static void
 mtype_destroy(struct ip_set *set)
@@ -392,7 +414,7 @@ mtype_destroy(struct ip_set *set)
 	if (set->extensions & IPSET_EXT_TIMEOUT)
 		del_timer_sync(&h->gc);
 
-	ahash_destroy(rcu_dereference_bh_nfnl(h->table));
+	mtype_ahash_destroy(set, rcu_dereference_bh_nfnl(h->table));
 #ifdef IP_SET_HASH_WITH_RBTREE
 	rbtree_destroy(&h->rbtree);
 #endif
@@ -430,10 +452,6 @@ mtype_same_set(const struct ip_set *a, const struct ip_set *b)
 	       a->extensions == b->extensions;
 }
 
-/* Get the ith element from the array block n */
-#define ahash_data(n, i, dsize)	\
-	((struct mtype_elem *)((n)->value + ((i) * (dsize))))
-
 /* Delete expired elements from the hashtable */
 static void
 mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize)
@@ -456,6 +474,7 @@ mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize)
 				mtype_del_cidr(h, CIDR(data->cidr),
 					       nets_length, 0);
 #endif
+				ip_set_ext_destroy(set, data);
 				if (j != n->pos - 1)
 					/* Not last one */
 					memcpy(data,
@@ -557,7 +576,7 @@ retry:
 				mtype_data_reset_flags(data, &flags);
 #endif
 				read_unlock_bh(&set->lock);
-				ahash_destroy(t);
+				mtype_ahash_destroy(set, t);
 				if (ret == -EAGAIN)
 					goto retry;
 				return ret;
@@ -578,7 +597,7 @@ retry:
 
 	pr_debug("set %s resized from %u (%p) to %u (%p)\n", set->name,
 		 orig->htable_bits, orig, t->htable_bits, t);
-	ahash_destroy(orig);
+	mtype_ahash_destroy(set, orig);
 
 	return 0;
 }
@@ -642,6 +661,7 @@ reuse_slot:
 		mtype_del_cidr(h, CIDR(data->cidr), NLEN(set->family), 0);
 		mtype_add_cidr(h, CIDR(d->cidr), NLEN(set->family), 0);
 #endif
+		ip_set_ext_destroy(set, data);
 	} else {
 		/* Use/create a new slot */
 		TUNE_AHASH_MAX(h, multi);
@@ -707,6 +727,7 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 #ifdef IP_SET_HASH_WITH_NETS
 		mtype_del_cidr(h, CIDR(d->cidr), NLEN(set->family), 0);
 #endif
+		ip_set_ext_destroy(set, data);
 		if (n->pos + AHASH_INIT_SIZE < n->size) {
 			void *tmp = kzalloc((n->size - AHASH_INIT_SIZE)
 					    * set->dsize,
@@ -1033,7 +1054,7 @@ IPSET_TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags)
 	rcu_assign_pointer(h->table, t);
 
 	set->data = h;
-	if (set->family ==  NFPROTO_IPV4) {
+	if (set->family == NFPROTO_IPV4) {
 		set->variant = &IPSET_TOKEN(HTYPE, 4_variant);
 		set->dsize = ip_set_elem_len(set, tb,
 				sizeof(struct IPSET_TOKEN(HTYPE, 4_elem)));
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index 7fd11c79aff4..e44986af1fc2 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -168,16 +168,19 @@ list_set_add(struct ip_set *set, u32 i, struct set_adt_elem *d,
 	struct set_elem *e = list_set_elem(set, map, i);
 
 	if (e->id != IPSET_INVALID_ID) {
-		if (i == map->size - 1)
+		if (i == map->size - 1) {
 			/* Last element replaced: e.g. add new,before,last */
 			ip_set_put_byindex(e->id);
-		else {
+			ip_set_ext_destroy(set, e);
+		} else {
 			struct set_elem *x = list_set_elem(set, map,
 							   map->size - 1);
 
 			/* Last element pushed off */
-			if (x->id != IPSET_INVALID_ID)
+			if (x->id != IPSET_INVALID_ID) {
 				ip_set_put_byindex(x->id);
+				ip_set_ext_destroy(set, x);
+			}
 			memmove(list_set_elem(set, map, i + 1), e,
 				set->dsize * (map->size - (i + 1)));
 		}
@@ -198,6 +201,7 @@ list_set_del(struct ip_set *set, u32 i)
 	struct set_elem *e = list_set_elem(set, map, i);
 
 	ip_set_put_byindex(e->id);
+	ip_set_ext_destroy(set, e);
 
 	if (i < map->size - 1)
 		memmove(e, list_set_elem(set, map, i + 1),
@@ -266,14 +270,14 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 	bool flag_exist = flags & IPSET_FLAG_EXIST;
 	u32 i, ret = 0;
 
+	if (SET_WITH_TIMEOUT(set))
+		set_cleanup_entries(set);
+
 	/* Check already added element */
 	for (i = 0; i < map->size; i++) {
 		e = list_set_elem(set, map, i);
 		if (e->id == IPSET_INVALID_ID)
 			goto insert;
-		else if (SET_WITH_TIMEOUT(set) &&
-			 ip_set_timeout_expired(ext_timeout(e, set)))
-			continue;
 		else if (e->id != d->id)
 			continue;
 
@@ -286,6 +290,8 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 			/* Can't re-add */
 			return -IPSET_ERR_EXIST;
 		/* Update extensions */
+		ip_set_ext_destroy(set, e);
+
 		if (SET_WITH_TIMEOUT(set))
 			ip_set_timeout_set(ext_timeout(e, set), ext->timeout);
 		if (SET_WITH_COUNTER(set))
@@ -423,6 +429,7 @@ list_set_flush(struct ip_set *set)
 		e = list_set_elem(set, map, i);
 		if (e->id != IPSET_INVALID_ID) {
 			ip_set_put_byindex(e->id);
+			ip_set_ext_destroy(set, e);
 			e->id = IPSET_INVALID_ID;
 		}
 	}
-- 
cgit v1.2.3


From 68b63f08d22f23161c43cd2417104aa213ff877f Mon Sep 17 00:00:00 2001
From: Oliver Smith <oliver@8.c.9.b.0.7.4.0.1.0.0.2.ip6.arpa>
Date: Sun, 22 Sep 2013 20:56:30 +0200
Subject: netfilter: ipset: Support comments for ipset entries in the core.

This adds the core support for having comments on ipset entries.

The comments are stored as standard null-terminated strings in
dynamically allocated memory after being passed to the kernel. As a
result of this, code has been added to the generic destroy function to
iterate all extensions and call that extension's destroy task if the set
has that extension activated, and if such a task is defined.

Signed-off-by: Oliver Smith <oliver@8.c.9.b.0.7.4.0.1.0.0.2.ip6.arpa>
Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
---
 include/linux/netfilter/ipset/ip_set.h         | 51 +++++++++++++++++++----
 include/linux/netfilter/ipset/ip_set_comment.h | 57 ++++++++++++++++++++++++++
 include/uapi/linux/netfilter/ipset/ip_set.h    |  8 +++-
 net/netfilter/ipset/ip_set_core.c              | 14 +++++++
 4 files changed, 121 insertions(+), 9 deletions(-)
 create mode 100644 include/linux/netfilter/ipset/ip_set_comment.h

(limited to 'include/linux')

diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index 6372ee224fe8..407f84df6a47 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -53,6 +53,8 @@ enum ip_set_extension {
 	IPSET_EXT_TIMEOUT = (1 << IPSET_EXT_BIT_TIMEOUT),
 	IPSET_EXT_BIT_COUNTER = 1,
 	IPSET_EXT_COUNTER = (1 << IPSET_EXT_BIT_COUNTER),
+	IPSET_EXT_BIT_COMMENT = 2,
+	IPSET_EXT_COMMENT = (1 << IPSET_EXT_BIT_COMMENT),
 	/* Mark set with an extension which needs to call destroy */
 	IPSET_EXT_BIT_DESTROY = 7,
 	IPSET_EXT_DESTROY = (1 << IPSET_EXT_BIT_DESTROY),
@@ -60,11 +62,13 @@ enum ip_set_extension {
 
 #define SET_WITH_TIMEOUT(s)	((s)->extensions & IPSET_EXT_TIMEOUT)
 #define SET_WITH_COUNTER(s)	((s)->extensions & IPSET_EXT_COUNTER)
+#define SET_WITH_COMMENT(s)	((s)->extensions & IPSET_EXT_COMMENT)
 
 /* Extension id, in size order */
 enum ip_set_ext_id {
 	IPSET_EXT_ID_COUNTER = 0,
 	IPSET_EXT_ID_TIMEOUT,
+	IPSET_EXT_ID_COMMENT,
 	IPSET_EXT_ID_MAX,
 };
 
@@ -85,6 +89,7 @@ struct ip_set_ext {
 	u64 packets;
 	u64 bytes;
 	u32 timeout;
+	char *comment;
 };
 
 struct ip_set_counter {
@@ -92,20 +97,19 @@ struct ip_set_counter {
 	atomic64_t packets;
 };
 
-struct ip_set;
+struct ip_set_comment {
+	char *str;
+};
 
-static inline void
-ip_set_ext_destroy(struct ip_set *set, void *data)
-{
-	/* Check that the extension is enabled for the set and
-	 * call it's destroy function for its extension part in data.
-	 */
-}
+struct ip_set;
 
 #define ext_timeout(e, s)	\
 (unsigned long *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_TIMEOUT])
 #define ext_counter(e, s)	\
 (struct ip_set_counter *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_COUNTER])
+#define ext_comment(e, s)	\
+(struct ip_set_comment *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_COMMENT])
+
 
 typedef int (*ipset_adtfn)(struct ip_set *set, void *value,
 			   const struct ip_set_ext *ext,
@@ -222,6 +226,36 @@ struct ip_set {
 	void *data;
 };
 
+static inline void
+ip_set_ext_destroy(struct ip_set *set, void *data)
+{
+	/* Check that the extension is enabled for the set and
+	 * call it's destroy function for its extension part in data.
+	 */
+	if (SET_WITH_COMMENT(set))
+		ip_set_extensions[IPSET_EXT_ID_COMMENT].destroy(
+			ext_comment(data, set));
+}
+
+static inline int
+ip_set_put_flags(struct sk_buff *skb, struct ip_set *set)
+{
+	u32 cadt_flags = 0;
+
+	if (SET_WITH_TIMEOUT(set))
+		if (unlikely(nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
+					   htonl(set->timeout))))
+			return -EMSGSIZE;
+	if (SET_WITH_COUNTER(set))
+		cadt_flags |= IPSET_FLAG_WITH_COUNTERS;
+	if (SET_WITH_COMMENT(set))
+		cadt_flags |= IPSET_FLAG_WITH_COMMENT;
+
+	if (!cadt_flags)
+		return 0;
+	return nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(cadt_flags));
+}
+
 static inline void
 ip_set_add_bytes(u64 bytes, struct ip_set_counter *counter)
 {
@@ -425,6 +459,7 @@ bitmap_bytes(u32 a, u32 b)
 }
 
 #include <linux/netfilter/ipset/ip_set_timeout.h>
+#include <linux/netfilter/ipset/ip_set_comment.h>
 
 #define IP_SET_INIT_KEXT(skb, opt, set)			\
 	{ .bytes = (skb)->len, .packets = 1,		\
diff --git a/include/linux/netfilter/ipset/ip_set_comment.h b/include/linux/netfilter/ipset/ip_set_comment.h
new file mode 100644
index 000000000000..21217ea008d7
--- /dev/null
+++ b/include/linux/netfilter/ipset/ip_set_comment.h
@@ -0,0 +1,57 @@
+#ifndef _IP_SET_COMMENT_H
+#define _IP_SET_COMMENT_H
+
+/* Copyright (C) 2013 Oliver Smith <oliver@8.c.9.b.0.7.4.0.1.0.0.2.ip6.arpa>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifdef __KERNEL__
+
+static inline char*
+ip_set_comment_uget(struct nlattr *tb)
+{
+	return nla_data(tb);
+}
+
+static inline void
+ip_set_init_comment(struct ip_set_comment *comment,
+		    const struct ip_set_ext *ext)
+{
+	size_t len = ext->comment ? strlen(ext->comment) : 0;
+
+	if (unlikely(comment->str)) {
+		kfree(comment->str);
+		comment->str = NULL;
+	}
+	if (!len)
+		return;
+	if (unlikely(len > IPSET_MAX_COMMENT_SIZE))
+		len = IPSET_MAX_COMMENT_SIZE;
+	comment->str = kzalloc(len + 1, GFP_ATOMIC);
+	if (unlikely(!comment->str))
+		return;
+	strlcpy(comment->str, ext->comment, len + 1);
+}
+
+static inline int
+ip_set_put_comment(struct sk_buff *skb, struct ip_set_comment *comment)
+{
+	if (!comment->str)
+		return 0;
+	return nla_put_string(skb, IPSET_ATTR_COMMENT, comment->str);
+}
+
+static inline void
+ip_set_comment_free(struct ip_set_comment *comment)
+{
+	if (unlikely(!comment->str))
+		return;
+	kfree(comment->str);
+	comment->str = NULL;
+}
+
+#endif
+#endif
diff --git a/include/uapi/linux/netfilter/ipset/ip_set.h b/include/uapi/linux/netfilter/ipset/ip_set.h
index 2b61ac44dcc1..25d3b2f79c02 100644
--- a/include/uapi/linux/netfilter/ipset/ip_set.h
+++ b/include/uapi/linux/netfilter/ipset/ip_set.h
@@ -10,12 +10,14 @@
 #ifndef _UAPI_IP_SET_H
 #define _UAPI_IP_SET_H
 
-
 #include <linux/types.h>
 
 /* The protocol version */
 #define IPSET_PROTOCOL		6
 
+/* The maximum permissible comment length we will accept over netlink */
+#define IPSET_MAX_COMMENT_SIZE	255
+
 /* The max length of strings including NUL: set and type identifiers */
 #define IPSET_MAXNAMELEN	32
 
@@ -110,6 +112,7 @@ enum {
 	IPSET_ATTR_IFACE,
 	IPSET_ATTR_BYTES,
 	IPSET_ATTR_PACKETS,
+	IPSET_ATTR_COMMENT,
 	__IPSET_ATTR_ADT_MAX,
 };
 #define IPSET_ATTR_ADT_MAX	(__IPSET_ATTR_ADT_MAX - 1)
@@ -140,6 +143,7 @@ enum ipset_errno {
 	IPSET_ERR_IPADDR_IPV4,
 	IPSET_ERR_IPADDR_IPV6,
 	IPSET_ERR_COUNTER,
+	IPSET_ERR_COMMENT,
 
 	/* Type specific error codes */
 	IPSET_ERR_TYPE_SPECIFIC = 4352,
@@ -176,6 +180,8 @@ enum ipset_cadt_flags {
 	IPSET_FLAG_NOMATCH	= (1 << IPSET_FLAG_BIT_NOMATCH),
 	IPSET_FLAG_BIT_WITH_COUNTERS = 3,
 	IPSET_FLAG_WITH_COUNTERS = (1 << IPSET_FLAG_BIT_WITH_COUNTERS),
+	IPSET_FLAG_BIT_WITH_COMMENT = 4,
+	IPSET_FLAG_WITH_COMMENT = (1 << IPSET_FLAG_BIT_WITH_COMMENT),
 	IPSET_FLAG_CADT_MAX	= 15,
 };
 
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index f35afed3814f..3bf9a3d29dff 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -315,6 +315,7 @@ ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr)
 }
 EXPORT_SYMBOL_GPL(ip_set_get_ipaddr6);
 
+typedef void (*destroyer)(void *);
 /* ipset data extension types, in size order */
 
 const struct ip_set_ext_type ip_set_extensions[] = {
@@ -329,6 +330,13 @@ const struct ip_set_ext_type ip_set_extensions[] = {
 		.len	= sizeof(unsigned long),
 		.align	= __alignof__(unsigned long),
 	},
+	[IPSET_EXT_ID_COMMENT] = {
+		.type	 = IPSET_EXT_COMMENT | IPSET_EXT_DESTROY,
+		.flag	 = IPSET_FLAG_WITH_COMMENT,
+		.len	 = sizeof(struct ip_set_comment),
+		.align	 = __alignof__(struct ip_set_comment),
+		.destroy = (destroyer) ip_set_comment_free,
+	},
 };
 EXPORT_SYMBOL_GPL(ip_set_extensions);
 
@@ -380,6 +388,12 @@ ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[],
 			ext->packets = be64_to_cpu(nla_get_be64(
 						   tb[IPSET_ATTR_PACKETS]));
 	}
+	if (tb[IPSET_ATTR_COMMENT]) {
+		if (!(set->extensions & IPSET_EXT_COMMENT))
+			return -IPSET_ERR_COMMENT;
+		ext->comment = ip_set_comment_uget(tb[IPSET_ATTR_COMMENT]);
+	}
+
 	return 0;
 }
 EXPORT_SYMBOL_GPL(ip_set_get_extensions);
-- 
cgit v1.2.3


From 3fd986b3d99e3847f1cce6fc36043d0f16508e1d Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Wed, 25 Sep 2013 17:44:35 +0200
Subject: netfilter: ipset: Use a common function at listing the extensions

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
---
 include/linux/netfilter/ipset/ip_set.h    | 21 +++++++++++++++++++++
 net/netfilter/ipset/ip_set_bitmap_gen.h   | 29 ++++++++++-------------------
 net/netfilter/ipset/ip_set_bitmap_ipmac.c | 11 -----------
 net/netfilter/ipset/ip_set_hash_gen.h     | 11 +----------
 net/netfilter/ipset/ip_set_list_set.c     | 11 +----------
 5 files changed, 33 insertions(+), 50 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index 407f84df6a47..da2a45acf74c 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -461,6 +461,27 @@ bitmap_bytes(u32 a, u32 b)
 #include <linux/netfilter/ipset/ip_set_timeout.h>
 #include <linux/netfilter/ipset/ip_set_comment.h>
 
+static inline int
+ip_set_put_extensions(struct sk_buff *skb, const struct ip_set *set,
+		      const void *e, bool active)
+{
+	if (SET_WITH_TIMEOUT(set)) {
+		unsigned long *timeout = ext_timeout(e, set);
+
+		if (nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
+			htonl(active ? ip_set_timeout_get(timeout)
+				: *timeout)))
+			return -EMSGSIZE;
+	}
+	if (SET_WITH_COUNTER(set) &&
+	    ip_set_put_counter(skb, ext_counter(e, set)))
+		return -EMSGSIZE;
+	if (SET_WITH_COMMENT(set) &&
+	    ip_set_put_comment(skb, ext_comment(e, set)))
+		return -EMSGSIZE;
+	return 0;
+}
+
 #define IP_SET_INIT_KEXT(skb, opt, set)			\
 	{ .bytes = (skb)->len, .packets = 1,		\
 	  .timeout = ip_set_adt_opt_timeout(opt, set) }
diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h
index 6167fc9d0efe..a13e15be7911 100644
--- a/net/netfilter/ipset/ip_set_bitmap_gen.h
+++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
@@ -183,6 +183,14 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 	return 0;
 }
 
+#ifndef IP_SET_BITMAP_STORED_TIMEOUT
+static inline bool
+mtype_is_filled(const struct mtype_elem *x)
+{
+	return true;
+}
+#endif
+
 static int
 mtype_list(const struct ip_set *set,
 	   struct sk_buff *skb, struct netlink_callback *cb)
@@ -215,25 +223,8 @@ mtype_list(const struct ip_set *set,
 		}
 		if (mtype_do_list(skb, map, id, set->dsize))
 			goto nla_put_failure;
-		if (SET_WITH_TIMEOUT(set)) {
-#ifdef IP_SET_BITMAP_STORED_TIMEOUT
-			if (nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
-					  htonl(ip_set_timeout_stored(map, id,
-							ext_timeout(x, set),
-							set->dsize))))
-				goto nla_put_failure;
-#else
-			if (nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
-					  htonl(ip_set_timeout_get(
-							ext_timeout(x, set)))))
-				goto nla_put_failure;
-#endif
-		}
-		if (SET_WITH_COUNTER(set) &&
-		    ip_set_put_counter(skb, ext_counter(x, set)))
-			goto nla_put_failure;
-		if (SET_WITH_COMMENT(set) &&
-		    ip_set_put_comment(skb, ext_comment(x, set)))
+		if (ip_set_put_extensions(skb, set, x,
+		    mtype_is_filled((const struct mtype_elem *) x)))
 			goto nla_put_failure;
 		ipset_nest_end(skb, nested);
 	}
diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
index fb4d163dea82..87a218f8ab5f 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
@@ -176,17 +176,6 @@ bitmap_ipmac_do_del(const struct bitmap_ipmac_adt_elem *e,
 	return !test_and_clear_bit(e->id, map->members);
 }
 
-static inline unsigned long
-ip_set_timeout_stored(struct bitmap_ipmac *map, u32 id, unsigned long *timeout,
-		      size_t dsize)
-{
-	const struct bitmap_ipmac_elem *elem =
-		get_elem(map->extensions, id, dsize);
-
-	return elem->filled == MAC_FILLED ? ip_set_timeout_get(timeout) :
-					    *timeout;
-}
-
 static inline int
 bitmap_ipmac_do_list(struct sk_buff *skb, const struct bitmap_ipmac *map,
 		     u32 id, size_t dsize)
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index 1cffeb977605..b4add206c603 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -961,16 +961,7 @@ mtype_list(const struct ip_set *set,
 			}
 			if (mtype_data_list(skb, e))
 				goto nla_put_failure;
-			if (SET_WITH_TIMEOUT(set) &&
-			    nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
-					  htonl(ip_set_timeout_get(
-						ext_timeout(e, set)))))
-				goto nla_put_failure;
-			if (SET_WITH_COUNTER(set) &&
-			    ip_set_put_counter(skb, ext_counter(e, set)))
-				goto nla_put_failure;
-			if (SET_WITH_COMMENT(set) &&
-			    ip_set_put_comment(skb, ext_comment(e, set)))
+			if (ip_set_put_extensions(skb, set, e, true))
 				goto nla_put_failure;
 			ipset_nest_end(skb, nested);
 		}
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index e23f33c14435..ba4232e451f4 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -512,16 +512,7 @@ list_set_list(const struct ip_set *set,
 		if (nla_put_string(skb, IPSET_ATTR_NAME,
 				   ip_set_name_byindex(e->id)))
 			goto nla_put_failure;
-		if (SET_WITH_TIMEOUT(set) &&
-		    nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
-				  htonl(ip_set_timeout_get(
-						ext_timeout(e, set)))))
-			goto nla_put_failure;
-		if (SET_WITH_COUNTER(set) &&
-		    ip_set_put_counter(skb, ext_counter(e, set)))
-			goto nla_put_failure;
-		if (SET_WITH_COMMENT(set) &&
-		    ip_set_put_comment(skb, ext_comment(e, set)))
+		if (ip_set_put_extensions(skb, set, e, true))
 			goto nla_put_failure;
 		ipset_nest_end(skb, nested);
 	}
-- 
cgit v1.2.3


From 1785e8f473082aa60d62c7165856cf6484077b99 Mon Sep 17 00:00:00 2001
From: Vitaly Lavrov <lve@guap.ru>
Date: Mon, 30 Sep 2013 17:07:02 +0200
Subject: netfiler: ipset: Add net namespace for ipset

This patch adds netns support for ipset.

Major changes were made in ip_set_core.c and ip_set.h.
Global variables are moved to per net namespace.
Added initialization code and the destruction of the network namespace ipset subsystem.
In the prototypes of public functions ip_set_* added parameter "struct net*".

The remaining corrections related to the change prototypes of public functions ip_set_*.

The patch for git://git.netfilter.org/ipset.git commit 6a4ec96c0b8caac5c35474e40e319704d92ca347

Signed-off-by: Vitaly Lavrov <lve@guap.ru>
Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
---
 include/linux/netfilter/ipset/ip_set.h    |  16 +-
 net/netfilter/ipset/ip_set_bitmap_ip.c    |   3 +-
 net/netfilter/ipset/ip_set_bitmap_ipmac.c |   2 +-
 net/netfilter/ipset/ip_set_bitmap_port.c  |   3 +-
 net/netfilter/ipset/ip_set_core.c         | 288 +++++++++++++++++++-----------
 net/netfilter/ipset/ip_set_hash_gen.h     |   3 +-
 net/netfilter/ipset/ip_set_list_set.c     |  31 ++--
 net/netfilter/xt_set.c                    |  40 ++---
 net/sched/em_ipset.c                      |   7 +-
 9 files changed, 245 insertions(+), 148 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index da2a45acf74c..7967516adc0d 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -184,7 +184,8 @@ struct ip_set_type {
 	u8 revision_min, revision_max;
 
 	/* Create set */
-	int (*create)(struct ip_set *set, struct nlattr *tb[], u32 flags);
+	int (*create)(struct net *net, struct ip_set *set,
+		      struct nlattr *tb[], u32 flags);
 
 	/* Attribute policies */
 	const struct nla_policy create_policy[IPSET_ATTR_CREATE_MAX + 1];
@@ -316,12 +317,13 @@ ip_set_init_counter(struct ip_set_counter *counter,
 }
 
 /* register and unregister set references */
-extern ip_set_id_t ip_set_get_byname(const char *name, struct ip_set **set);
-extern void ip_set_put_byindex(ip_set_id_t index);
-extern const char *ip_set_name_byindex(ip_set_id_t index);
-extern ip_set_id_t ip_set_nfnl_get(const char *name);
-extern ip_set_id_t ip_set_nfnl_get_byindex(ip_set_id_t index);
-extern void ip_set_nfnl_put(ip_set_id_t index);
+extern ip_set_id_t ip_set_get_byname(struct net *net,
+				     const char *name, struct ip_set **set);
+extern void ip_set_put_byindex(struct net *net, ip_set_id_t index);
+extern const char *ip_set_name_byindex(struct net *net, ip_set_id_t index);
+extern ip_set_id_t ip_set_nfnl_get(struct net *net, const char *name);
+extern ip_set_id_t ip_set_nfnl_get_byindex(struct net *net, ip_set_id_t index);
+extern void ip_set_nfnl_put(struct net *net, ip_set_id_t index);
 
 /* API for iptables set match, and SET target */
 
diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c
index faac124e2645..6f1f9f494808 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ip.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ip.c
@@ -242,7 +242,8 @@ init_map_ip(struct ip_set *set, struct bitmap_ip *map,
 }
 
 static int
-bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
+bitmap_ip_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
+		 u32 flags)
 {
 	struct bitmap_ip *map;
 	u32 first_ip = 0, last_ip = 0, hosts;
diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
index 87a218f8ab5f..740eabededd9 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
@@ -309,7 +309,7 @@ init_map_ipmac(struct ip_set *set, struct bitmap_ipmac *map,
 }
 
 static int
-bitmap_ipmac_create(struct ip_set *set, struct nlattr *tb[],
+bitmap_ipmac_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
 		    u32 flags)
 {
 	u32 first_ip = 0, last_ip = 0;
diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c
index 407a63caee6b..e7603c5b53d7 100644
--- a/net/netfilter/ipset/ip_set_bitmap_port.c
+++ b/net/netfilter/ipset/ip_set_bitmap_port.c
@@ -228,7 +228,8 @@ init_map_port(struct ip_set *set, struct bitmap_port *map,
 }
 
 static int
-bitmap_port_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
+bitmap_port_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
+		   u32 flags)
 {
 	struct bitmap_port *map;
 	u16 first_port, last_port;
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 3bf9a3d29dff..dc9284bdd2dd 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -17,6 +17,8 @@
 #include <linux/spinlock.h>
 #include <linux/rculist.h>
 #include <net/netlink.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
 
 #include <linux/netfilter.h>
 #include <linux/netfilter/x_tables.h>
@@ -27,8 +29,17 @@ static LIST_HEAD(ip_set_type_list);		/* all registered set types */
 static DEFINE_MUTEX(ip_set_type_mutex);		/* protects ip_set_type_list */
 static DEFINE_RWLOCK(ip_set_ref_lock);		/* protects the set refs */
 
-static struct ip_set * __rcu *ip_set_list;	/* all individual sets */
-static ip_set_id_t ip_set_max = CONFIG_IP_SET_MAX; /* max number of sets */
+struct ip_set_net {
+	struct ip_set * __rcu *ip_set_list;	/* all individual sets */
+	ip_set_id_t	ip_set_max;	/* max number of sets */
+	int		is_deleted;	/* deleted by ip_set_net_exit */
+};
+static int ip_set_net_id __read_mostly;
+
+static inline struct ip_set_net *ip_set_pernet(struct net *net)
+{
+	return net_generic(net, ip_set_net_id);
+}
 
 #define IP_SET_INC	64
 #define STREQ(a, b)	(strncmp(a, b, IPSET_MAXNAMELEN) == 0)
@@ -45,8 +56,8 @@ MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET);
 /* When the nfnl mutex is held: */
 #define nfnl_dereference(p)		\
 	rcu_dereference_protected(p, 1)
-#define nfnl_set(id)			\
-	nfnl_dereference(ip_set_list)[id]
+#define nfnl_set(inst, id)			\
+	nfnl_dereference((inst)->ip_set_list)[id]
 
 /*
  * The set types are implemented in modules and registered set types
@@ -434,13 +445,14 @@ __ip_set_put(struct ip_set *set)
  */
 
 static inline struct ip_set *
-ip_set_rcu_get(ip_set_id_t index)
+ip_set_rcu_get(struct net *net, ip_set_id_t index)
 {
 	struct ip_set *set;
+	struct ip_set_net *inst = ip_set_pernet(net);
 
 	rcu_read_lock();
 	/* ip_set_list itself needs to be protected */
-	set = rcu_dereference(ip_set_list)[index];
+	set = rcu_dereference(inst->ip_set_list)[index];
 	rcu_read_unlock();
 
 	return set;
@@ -450,7 +462,8 @@ int
 ip_set_test(ip_set_id_t index, const struct sk_buff *skb,
 	    const struct xt_action_param *par, struct ip_set_adt_opt *opt)
 {
-	struct ip_set *set = ip_set_rcu_get(index);
+	struct ip_set *set = ip_set_rcu_get(
+			dev_net(par->in ? par->in : par->out), index);
 	int ret = 0;
 
 	BUG_ON(set == NULL);
@@ -488,7 +501,8 @@ int
 ip_set_add(ip_set_id_t index, const struct sk_buff *skb,
 	   const struct xt_action_param *par, struct ip_set_adt_opt *opt)
 {
-	struct ip_set *set = ip_set_rcu_get(index);
+	struct ip_set *set = ip_set_rcu_get(
+			dev_net(par->in ? par->in : par->out), index);
 	int ret;
 
 	BUG_ON(set == NULL);
@@ -510,7 +524,8 @@ int
 ip_set_del(ip_set_id_t index, const struct sk_buff *skb,
 	   const struct xt_action_param *par, struct ip_set_adt_opt *opt)
 {
-	struct ip_set *set = ip_set_rcu_get(index);
+	struct ip_set *set = ip_set_rcu_get(
+			dev_net(par->in ? par->in : par->out), index);
 	int ret = 0;
 
 	BUG_ON(set == NULL);
@@ -534,14 +549,15 @@ EXPORT_SYMBOL_GPL(ip_set_del);
  *
  */
 ip_set_id_t
-ip_set_get_byname(const char *name, struct ip_set **set)
+ip_set_get_byname(struct net *net, const char *name, struct ip_set **set)
 {
 	ip_set_id_t i, index = IPSET_INVALID_ID;
 	struct ip_set *s;
+	struct ip_set_net *inst = ip_set_pernet(net);
 
 	rcu_read_lock();
-	for (i = 0; i < ip_set_max; i++) {
-		s = rcu_dereference(ip_set_list)[i];
+	for (i = 0; i < inst->ip_set_max; i++) {
+		s = rcu_dereference(inst->ip_set_list)[i];
 		if (s != NULL && STREQ(s->name, name)) {
 			__ip_set_get(s);
 			index = i;
@@ -561,17 +577,26 @@ EXPORT_SYMBOL_GPL(ip_set_get_byname);
  * to be valid, after calling this function.
  *
  */
-void
-ip_set_put_byindex(ip_set_id_t index)
+
+static inline void
+__ip_set_put_byindex(struct ip_set_net *inst, ip_set_id_t index)
 {
 	struct ip_set *set;
 
 	rcu_read_lock();
-	set = rcu_dereference(ip_set_list)[index];
+	set = rcu_dereference(inst->ip_set_list)[index];
 	if (set != NULL)
 		__ip_set_put(set);
 	rcu_read_unlock();
 }
+
+void
+ip_set_put_byindex(struct net *net, ip_set_id_t index)
+{
+	struct ip_set_net *inst = ip_set_pernet(net);
+
+	__ip_set_put_byindex(inst, index);
+}
 EXPORT_SYMBOL_GPL(ip_set_put_byindex);
 
 /*
@@ -582,9 +607,9 @@ EXPORT_SYMBOL_GPL(ip_set_put_byindex);
  *
  */
 const char *
-ip_set_name_byindex(ip_set_id_t index)
+ip_set_name_byindex(struct net *net, ip_set_id_t index)
 {
-	const struct ip_set *set = ip_set_rcu_get(index);
+	const struct ip_set *set = ip_set_rcu_get(net, index);
 
 	BUG_ON(set == NULL);
 	BUG_ON(set->ref == 0);
@@ -606,14 +631,15 @@ EXPORT_SYMBOL_GPL(ip_set_name_byindex);
  * The nfnl mutex is used in the function.
  */
 ip_set_id_t
-ip_set_nfnl_get(const char *name)
+ip_set_nfnl_get(struct net *net, const char *name)
 {
 	ip_set_id_t i, index = IPSET_INVALID_ID;
 	struct ip_set *s;
+	struct ip_set_net *inst = ip_set_pernet(net);
 
 	nfnl_lock(NFNL_SUBSYS_IPSET);
-	for (i = 0; i < ip_set_max; i++) {
-		s = nfnl_set(i);
+	for (i = 0; i < inst->ip_set_max; i++) {
+		s = nfnl_set(inst, i);
 		if (s != NULL && STREQ(s->name, name)) {
 			__ip_set_get(s);
 			index = i;
@@ -633,15 +659,16 @@ EXPORT_SYMBOL_GPL(ip_set_nfnl_get);
  * The nfnl mutex is used in the function.
  */
 ip_set_id_t
-ip_set_nfnl_get_byindex(ip_set_id_t index)
+ip_set_nfnl_get_byindex(struct net *net, ip_set_id_t index)
 {
 	struct ip_set *set;
+	struct ip_set_net *inst = ip_set_pernet(net);
 
-	if (index > ip_set_max)
+	if (index > inst->ip_set_max)
 		return IPSET_INVALID_ID;
 
 	nfnl_lock(NFNL_SUBSYS_IPSET);
-	set = nfnl_set(index);
+	set = nfnl_set(inst, index);
 	if (set)
 		__ip_set_get(set);
 	else
@@ -660,13 +687,17 @@ EXPORT_SYMBOL_GPL(ip_set_nfnl_get_byindex);
  * The nfnl mutex is used in the function.
  */
 void
-ip_set_nfnl_put(ip_set_id_t index)
+ip_set_nfnl_put(struct net *net, ip_set_id_t index)
 {
 	struct ip_set *set;
+	struct ip_set_net *inst = ip_set_pernet(net);
+
 	nfnl_lock(NFNL_SUBSYS_IPSET);
-	set = nfnl_set(index);
-	if (set != NULL)
-		__ip_set_put(set);
+	if (!inst->is_deleted) { /* already deleted from ip_set_net_exit() */
+		set = nfnl_set(inst, index);
+		if (set != NULL)
+			__ip_set_put(set);
+	}
 	nfnl_unlock(NFNL_SUBSYS_IPSET);
 }
 EXPORT_SYMBOL_GPL(ip_set_nfnl_put);
@@ -724,14 +755,14 @@ static const struct nla_policy ip_set_create_policy[IPSET_ATTR_CMD_MAX + 1] = {
 };
 
 static struct ip_set *
-find_set_and_id(const char *name, ip_set_id_t *id)
+find_set_and_id(struct ip_set_net *inst, const char *name, ip_set_id_t *id)
 {
 	struct ip_set *set = NULL;
 	ip_set_id_t i;
 
 	*id = IPSET_INVALID_ID;
-	for (i = 0; i < ip_set_max; i++) {
-		set = nfnl_set(i);
+	for (i = 0; i < inst->ip_set_max; i++) {
+		set = nfnl_set(inst, i);
 		if (set != NULL && STREQ(set->name, name)) {
 			*id = i;
 			break;
@@ -741,22 +772,23 @@ find_set_and_id(const char *name, ip_set_id_t *id)
 }
 
 static inline struct ip_set *
-find_set(const char *name)
+find_set(struct ip_set_net *inst, const char *name)
 {
 	ip_set_id_t id;
 
-	return find_set_and_id(name, &id);
+	return find_set_and_id(inst, name, &id);
 }
 
 static int
-find_free_id(const char *name, ip_set_id_t *index, struct ip_set **set)
+find_free_id(struct ip_set_net *inst, const char *name, ip_set_id_t *index,
+	     struct ip_set **set)
 {
 	struct ip_set *s;
 	ip_set_id_t i;
 
 	*index = IPSET_INVALID_ID;
-	for (i = 0;  i < ip_set_max; i++) {
-		s = nfnl_set(i);
+	for (i = 0;  i < inst->ip_set_max; i++) {
+		s = nfnl_set(inst, i);
 		if (s == NULL) {
 			if (*index == IPSET_INVALID_ID)
 				*index = i;
@@ -785,6 +817,8 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb,
 	      const struct nlmsghdr *nlh,
 	      const struct nlattr * const attr[])
 {
+	struct net *net = sock_net(ctnl);
+	struct ip_set_net *inst = ip_set_pernet(net);
 	struct ip_set *set, *clash = NULL;
 	ip_set_id_t index = IPSET_INVALID_ID;
 	struct nlattr *tb[IPSET_ATTR_CREATE_MAX+1] = {};
@@ -843,7 +877,7 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb,
 		goto put_out;
 	}
 
-	ret = set->type->create(set, tb, flags);
+	ret = set->type->create(net, set, tb, flags);
 	if (ret != 0)
 		goto put_out;
 
@@ -854,7 +888,7 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb,
 	 * by the nfnl mutex. Find the first free index in ip_set_list
 	 * and check clashing.
 	 */
-	ret = find_free_id(set->name, &index, &clash);
+	ret = find_free_id(inst, set->name, &index, &clash);
 	if (ret == -EEXIST) {
 		/* If this is the same set and requested, ignore error */
 		if ((flags & IPSET_FLAG_EXIST) &&
@@ -867,9 +901,9 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb,
 		goto cleanup;
 	} else if (ret == -IPSET_ERR_MAX_SETS) {
 		struct ip_set **list, **tmp;
-		ip_set_id_t i = ip_set_max + IP_SET_INC;
+		ip_set_id_t i = inst->ip_set_max + IP_SET_INC;
 
-		if (i < ip_set_max || i == IPSET_INVALID_ID)
+		if (i < inst->ip_set_max || i == IPSET_INVALID_ID)
 			/* Wraparound */
 			goto cleanup;
 
@@ -877,14 +911,14 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb,
 		if (!list)
 			goto cleanup;
 		/* nfnl mutex is held, both lists are valid */
-		tmp = nfnl_dereference(ip_set_list);
-		memcpy(list, tmp, sizeof(struct ip_set *) * ip_set_max);
-		rcu_assign_pointer(ip_set_list, list);
+		tmp = nfnl_dereference(inst->ip_set_list);
+		memcpy(list, tmp, sizeof(struct ip_set *) * inst->ip_set_max);
+		rcu_assign_pointer(inst->ip_set_list, list);
 		/* Make sure all current packets have passed through */
 		synchronize_net();
 		/* Use new list */
-		index = ip_set_max;
-		ip_set_max = i;
+		index = inst->ip_set_max;
+		inst->ip_set_max = i;
 		kfree(tmp);
 		ret = 0;
 	} else if (ret)
@@ -894,7 +928,7 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb,
 	 * Finally! Add our shiny new set to the list, and be done.
 	 */
 	pr_debug("create: '%s' created with index %u!\n", set->name, index);
-	nfnl_set(index) = set;
+	nfnl_set(inst, index) = set;
 
 	return ret;
 
@@ -917,12 +951,12 @@ ip_set_setname_policy[IPSET_ATTR_CMD_MAX + 1] = {
 };
 
 static void
-ip_set_destroy_set(ip_set_id_t index)
+ip_set_destroy_set(struct ip_set_net *inst, ip_set_id_t index)
 {
-	struct ip_set *set = nfnl_set(index);
+	struct ip_set *set = nfnl_set(inst, index);
 
 	pr_debug("set: %s\n",  set->name);
-	nfnl_set(index) = NULL;
+	nfnl_set(inst, index) = NULL;
 
 	/* Must call it without holding any lock */
 	set->variant->destroy(set);
@@ -935,6 +969,7 @@ ip_set_destroy(struct sock *ctnl, struct sk_buff *skb,
 	       const struct nlmsghdr *nlh,
 	       const struct nlattr * const attr[])
 {
+	struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl));
 	struct ip_set *s;
 	ip_set_id_t i;
 	int ret = 0;
@@ -954,21 +989,22 @@ ip_set_destroy(struct sock *ctnl, struct sk_buff *skb,
 	 */
 	read_lock_bh(&ip_set_ref_lock);
 	if (!attr[IPSET_ATTR_SETNAME]) {
-		for (i = 0; i < ip_set_max; i++) {
-			s = nfnl_set(i);
+		for (i = 0; i < inst->ip_set_max; i++) {
+			s = nfnl_set(inst, i);
 			if (s != NULL && s->ref) {
 				ret = -IPSET_ERR_BUSY;
 				goto out;
 			}
 		}
 		read_unlock_bh(&ip_set_ref_lock);
-		for (i = 0; i < ip_set_max; i++) {
-			s = nfnl_set(i);
+		for (i = 0; i < inst->ip_set_max; i++) {
+			s = nfnl_set(inst, i);
 			if (s != NULL)
-				ip_set_destroy_set(i);
+				ip_set_destroy_set(inst, i);
 		}
 	} else {
-		s = find_set_and_id(nla_data(attr[IPSET_ATTR_SETNAME]), &i);
+		s = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]),
+				    &i);
 		if (s == NULL) {
 			ret = -ENOENT;
 			goto out;
@@ -978,7 +1014,7 @@ ip_set_destroy(struct sock *ctnl, struct sk_buff *skb,
 		}
 		read_unlock_bh(&ip_set_ref_lock);
 
-		ip_set_destroy_set(i);
+		ip_set_destroy_set(inst, i);
 	}
 	return 0;
 out:
@@ -1003,6 +1039,7 @@ ip_set_flush(struct sock *ctnl, struct sk_buff *skb,
 	     const struct nlmsghdr *nlh,
 	     const struct nlattr * const attr[])
 {
+	struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl));
 	struct ip_set *s;
 	ip_set_id_t i;
 
@@ -1010,13 +1047,13 @@ ip_set_flush(struct sock *ctnl, struct sk_buff *skb,
 		return -IPSET_ERR_PROTOCOL;
 
 	if (!attr[IPSET_ATTR_SETNAME]) {
-		for (i = 0; i < ip_set_max; i++) {
-			s = nfnl_set(i);
+		for (i = 0; i < inst->ip_set_max; i++) {
+			s = nfnl_set(inst, i);
 			if (s != NULL)
 				ip_set_flush_set(s);
 		}
 	} else {
-		s = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
+		s = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
 		if (s == NULL)
 			return -ENOENT;
 
@@ -1042,6 +1079,7 @@ ip_set_rename(struct sock *ctnl, struct sk_buff *skb,
 	      const struct nlmsghdr *nlh,
 	      const struct nlattr * const attr[])
 {
+	struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl));
 	struct ip_set *set, *s;
 	const char *name2;
 	ip_set_id_t i;
@@ -1052,7 +1090,7 @@ ip_set_rename(struct sock *ctnl, struct sk_buff *skb,
 		     attr[IPSET_ATTR_SETNAME2] == NULL))
 		return -IPSET_ERR_PROTOCOL;
 
-	set = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
+	set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
 	if (set == NULL)
 		return -ENOENT;
 
@@ -1063,8 +1101,8 @@ ip_set_rename(struct sock *ctnl, struct sk_buff *skb,
 	}
 
 	name2 = nla_data(attr[IPSET_ATTR_SETNAME2]);
-	for (i = 0; i < ip_set_max; i++) {
-		s = nfnl_set(i);
+	for (i = 0; i < inst->ip_set_max; i++) {
+		s = nfnl_set(inst, i);
 		if (s != NULL && STREQ(s->name, name2)) {
 			ret = -IPSET_ERR_EXIST_SETNAME2;
 			goto out;
@@ -1091,6 +1129,7 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb,
 	    const struct nlmsghdr *nlh,
 	    const struct nlattr * const attr[])
 {
+	struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl));
 	struct ip_set *from, *to;
 	ip_set_id_t from_id, to_id;
 	char from_name[IPSET_MAXNAMELEN];
@@ -1100,11 +1139,13 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb,
 		     attr[IPSET_ATTR_SETNAME2] == NULL))
 		return -IPSET_ERR_PROTOCOL;
 
-	from = find_set_and_id(nla_data(attr[IPSET_ATTR_SETNAME]), &from_id);
+	from = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]),
+			       &from_id);
 	if (from == NULL)
 		return -ENOENT;
 
-	to = find_set_and_id(nla_data(attr[IPSET_ATTR_SETNAME2]), &to_id);
+	to = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME2]),
+			     &to_id);
 	if (to == NULL)
 		return -IPSET_ERR_EXIST_SETNAME2;
 
@@ -1121,8 +1162,8 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb,
 
 	write_lock_bh(&ip_set_ref_lock);
 	swap(from->ref, to->ref);
-	nfnl_set(from_id) = to;
-	nfnl_set(to_id) = from;
+	nfnl_set(inst, from_id) = to;
+	nfnl_set(inst, to_id) = from;
 	write_unlock_bh(&ip_set_ref_lock);
 
 	return 0;
@@ -1141,9 +1182,10 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb,
 static int
 ip_set_dump_done(struct netlink_callback *cb)
 {
+	struct ip_set_net *inst = (struct ip_set_net *)cb->data;
 	if (cb->args[2]) {
-		pr_debug("release set %s\n", nfnl_set(cb->args[1])->name);
-		ip_set_put_byindex((ip_set_id_t) cb->args[1]);
+		pr_debug("release set %s\n", nfnl_set(inst, cb->args[1])->name);
+		__ip_set_put_byindex(inst, (ip_set_id_t) cb->args[1]);
 	}
 	return 0;
 }
@@ -1169,6 +1211,7 @@ dump_init(struct netlink_callback *cb)
 	struct nlattr *attr = (void *)nlh + min_len;
 	u32 dump_type;
 	ip_set_id_t index;
+	struct ip_set_net *inst = (struct ip_set_net *)cb->data;
 
 	/* Second pass, so parser can't fail */
 	nla_parse(cda, IPSET_ATTR_CMD_MAX,
@@ -1182,7 +1225,7 @@ dump_init(struct netlink_callback *cb)
 	if (cda[IPSET_ATTR_SETNAME]) {
 		struct ip_set *set;
 
-		set = find_set_and_id(nla_data(cda[IPSET_ATTR_SETNAME]),
+		set = find_set_and_id(inst, nla_data(cda[IPSET_ATTR_SETNAME]),
 				      &index);
 		if (set == NULL)
 			return -ENOENT;
@@ -1210,6 +1253,7 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb)
 	unsigned int flags = NETLINK_CB(cb->skb).portid ? NLM_F_MULTI : 0;
 	u32 dump_type, dump_flags;
 	int ret = 0;
+	struct ip_set_net *inst = (struct ip_set_net *)cb->data;
 
 	if (!cb->args[0]) {
 		ret = dump_init(cb);
@@ -1223,18 +1267,18 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb)
 		}
 	}
 
-	if (cb->args[1] >= ip_set_max)
+	if (cb->args[1] >= inst->ip_set_max)
 		goto out;
 
 	dump_type = DUMP_TYPE(cb->args[0]);
 	dump_flags = DUMP_FLAGS(cb->args[0]);
-	max = dump_type == DUMP_ONE ? cb->args[1] + 1 : ip_set_max;
+	max = dump_type == DUMP_ONE ? cb->args[1] + 1 : inst->ip_set_max;
 dump_last:
 	pr_debug("args[0]: %u %u args[1]: %ld\n",
 		 dump_type, dump_flags, cb->args[1]);
 	for (; cb->args[1] < max; cb->args[1]++) {
 		index = (ip_set_id_t) cb->args[1];
-		set = nfnl_set(index);
+		set = nfnl_set(inst, index);
 		if (set == NULL) {
 			if (dump_type == DUMP_ONE) {
 				ret = -ENOENT;
@@ -1312,8 +1356,8 @@ next_set:
 release_refcount:
 	/* If there was an error or set is done, release set */
 	if (ret || !cb->args[2]) {
-		pr_debug("release set %s\n", nfnl_set(index)->name);
-		ip_set_put_byindex(index);
+		pr_debug("release set %s\n", nfnl_set(inst, index)->name);
+		__ip_set_put_byindex(inst, index);
 		cb->args[2] = 0;
 	}
 out:
@@ -1331,6 +1375,8 @@ ip_set_dump(struct sock *ctnl, struct sk_buff *skb,
 	    const struct nlmsghdr *nlh,
 	    const struct nlattr * const attr[])
 {
+	struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl));
+
 	if (unlikely(protocol_failed(attr)))
 		return -IPSET_ERR_PROTOCOL;
 
@@ -1338,6 +1384,7 @@ ip_set_dump(struct sock *ctnl, struct sk_buff *skb,
 		struct netlink_dump_control c = {
 			.dump = ip_set_dump_start,
 			.done = ip_set_dump_done,
+			.data = (void *)inst
 		};
 		return netlink_dump_start(ctnl, skb, nlh, &c);
 	}
@@ -1416,6 +1463,7 @@ ip_set_uadd(struct sock *ctnl, struct sk_buff *skb,
 	    const struct nlmsghdr *nlh,
 	    const struct nlattr * const attr[])
 {
+	struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl));
 	struct ip_set *set;
 	struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {};
 	const struct nlattr *nla;
@@ -1434,7 +1482,7 @@ ip_set_uadd(struct sock *ctnl, struct sk_buff *skb,
 		       attr[IPSET_ATTR_LINENO] == NULL))))
 		return -IPSET_ERR_PROTOCOL;
 
-	set = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
+	set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
 	if (set == NULL)
 		return -ENOENT;
 
@@ -1470,6 +1518,7 @@ ip_set_udel(struct sock *ctnl, struct sk_buff *skb,
 	    const struct nlmsghdr *nlh,
 	    const struct nlattr * const attr[])
 {
+	struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl));
 	struct ip_set *set;
 	struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {};
 	const struct nlattr *nla;
@@ -1488,7 +1537,7 @@ ip_set_udel(struct sock *ctnl, struct sk_buff *skb,
 		       attr[IPSET_ATTR_LINENO] == NULL))))
 		return -IPSET_ERR_PROTOCOL;
 
-	set = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
+	set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
 	if (set == NULL)
 		return -ENOENT;
 
@@ -1524,6 +1573,7 @@ ip_set_utest(struct sock *ctnl, struct sk_buff *skb,
 	     const struct nlmsghdr *nlh,
 	     const struct nlattr * const attr[])
 {
+	struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl));
 	struct ip_set *set;
 	struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {};
 	int ret = 0;
@@ -1534,7 +1584,7 @@ ip_set_utest(struct sock *ctnl, struct sk_buff *skb,
 		     !flag_nested(attr[IPSET_ATTR_DATA])))
 		return -IPSET_ERR_PROTOCOL;
 
-	set = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
+	set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
 	if (set == NULL)
 		return -ENOENT;
 
@@ -1559,6 +1609,7 @@ ip_set_header(struct sock *ctnl, struct sk_buff *skb,
 	      const struct nlmsghdr *nlh,
 	      const struct nlattr * const attr[])
 {
+	struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl));
 	const struct ip_set *set;
 	struct sk_buff *skb2;
 	struct nlmsghdr *nlh2;
@@ -1568,7 +1619,7 @@ ip_set_header(struct sock *ctnl, struct sk_buff *skb,
 		     attr[IPSET_ATTR_SETNAME] == NULL))
 		return -IPSET_ERR_PROTOCOL;
 
-	set = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
+	set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
 	if (set == NULL)
 		return -ENOENT;
 
@@ -1793,8 +1844,10 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len)
 	unsigned int *op;
 	void *data;
 	int copylen = *len, ret = 0;
+	struct net *net = sock_net(sk);
+	struct ip_set_net *inst = ip_set_pernet(net);
 
-	if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
+	if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
 		return -EPERM;
 	if (optval != SO_IP_SET)
 		return -EBADF;
@@ -1843,7 +1896,7 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len)
 		}
 		req_get->set.name[IPSET_MAXNAMELEN - 1] = '\0';
 		nfnl_lock(NFNL_SUBSYS_IPSET);
-		find_set_and_id(req_get->set.name, &id);
+		find_set_and_id(inst, req_get->set.name, &id);
 		req_get->set.index = id;
 		nfnl_unlock(NFNL_SUBSYS_IPSET);
 		goto copy;
@@ -1858,10 +1911,10 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len)
 		}
 		req_get->set.name[IPSET_MAXNAMELEN - 1] = '\0';
 		nfnl_lock(NFNL_SUBSYS_IPSET);
-		find_set_and_id(req_get->set.name, &id);
+		find_set_and_id(inst, req_get->set.name, &id);
 		req_get->set.index = id;
 		if (id != IPSET_INVALID_ID)
-			req_get->family = nfnl_set(id)->family;
+			req_get->family = nfnl_set(inst, id)->family;
 		nfnl_unlock(NFNL_SUBSYS_IPSET);
 		goto copy;
 	}
@@ -1870,12 +1923,12 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len)
 		struct ip_set *set;
 
 		if (*len != sizeof(struct ip_set_req_get_set) ||
-		    req_get->set.index >= ip_set_max) {
+		    req_get->set.index >= inst->ip_set_max) {
 			ret = -EINVAL;
 			goto done;
 		}
 		nfnl_lock(NFNL_SUBSYS_IPSET);
-		set = nfnl_set(req_get->set.index);
+		set = nfnl_set(inst, req_get->set.index);
 		strncpy(req_get->set.name, set ? set->name : "",
 			IPSET_MAXNAMELEN);
 		nfnl_unlock(NFNL_SUBSYS_IPSET);
@@ -1904,49 +1957,82 @@ static struct nf_sockopt_ops so_set __read_mostly = {
 	.owner		= THIS_MODULE,
 };
 
-static int __init
-ip_set_init(void)
+static int __net_init
+ip_set_net_init(struct net *net)
 {
+	struct ip_set_net *inst = ip_set_pernet(net);
+
 	struct ip_set **list;
-	int ret;
 
-	if (max_sets)
-		ip_set_max = max_sets;
-	if (ip_set_max >= IPSET_INVALID_ID)
-		ip_set_max = IPSET_INVALID_ID - 1;
+	inst->ip_set_max = max_sets ? max_sets : CONFIG_IP_SET_MAX;
+	if (inst->ip_set_max >= IPSET_INVALID_ID)
+		inst->ip_set_max = IPSET_INVALID_ID - 1;
 
-	list = kzalloc(sizeof(struct ip_set *) * ip_set_max, GFP_KERNEL);
+	list = kzalloc(sizeof(struct ip_set *) * inst->ip_set_max, GFP_KERNEL);
 	if (!list)
 		return -ENOMEM;
+	inst->is_deleted = 0;
+	rcu_assign_pointer(inst->ip_set_list, list);
+	pr_notice("ip_set: protocol %u\n", IPSET_PROTOCOL);
+	return 0;
+}
+
+static void __net_exit
+ip_set_net_exit(struct net *net)
+{
+	struct ip_set_net *inst = ip_set_pernet(net);
+
+	struct ip_set *set = NULL;
+	ip_set_id_t i;
+
+	inst->is_deleted = 1; /* flag for ip_set_nfnl_put */
+
+	for (i = 0; i < inst->ip_set_max; i++) {
+		set = nfnl_set(inst, i);
+		if (set != NULL)
+			ip_set_destroy_set(inst, i);
+	}
+	kfree(rcu_dereference_protected(inst->ip_set_list, 1));
+}
+
+static struct pernet_operations ip_set_net_ops = {
+	.init	= ip_set_net_init,
+	.exit   = ip_set_net_exit,
+	.id	= &ip_set_net_id,
+	.size	= sizeof(struct ip_set_net)
+};
+
 
-	rcu_assign_pointer(ip_set_list, list);
-	ret = nfnetlink_subsys_register(&ip_set_netlink_subsys);
+static int __init
+ip_set_init(void)
+{
+	int ret = nfnetlink_subsys_register(&ip_set_netlink_subsys);
 	if (ret != 0) {
 		pr_err("ip_set: cannot register with nfnetlink.\n");
-		kfree(list);
 		return ret;
 	}
 	ret = nf_register_sockopt(&so_set);
 	if (ret != 0) {
 		pr_err("SO_SET registry failed: %d\n", ret);
 		nfnetlink_subsys_unregister(&ip_set_netlink_subsys);
-		kfree(list);
 		return ret;
 	}
-
-	pr_notice("ip_set: protocol %u\n", IPSET_PROTOCOL);
+	ret = register_pernet_subsys(&ip_set_net_ops);
+	if (ret) {
+		pr_err("ip_set: cannot register pernet_subsys.\n");
+		nf_unregister_sockopt(&so_set);
+		nfnetlink_subsys_unregister(&ip_set_netlink_subsys);
+		return ret;
+	}
 	return 0;
 }
 
 static void __exit
 ip_set_fini(void)
 {
-	struct ip_set **list = rcu_dereference_protected(ip_set_list, 1);
-
-	/* There can't be any existing set */
+	unregister_pernet_subsys(&ip_set_net_ops);
 	nf_unregister_sockopt(&so_set);
 	nfnetlink_subsys_unregister(&ip_set_netlink_subsys);
-	kfree(list);
 	pr_debug("these are the famous last words\n");
 }
 
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index b4add206c603..6a80dbd30df7 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -1011,7 +1011,8 @@ static const struct ip_set_type_variant mtype_variant = {
 
 #ifdef IP_SET_EMIT_CREATE
 static int
-IPSET_TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags)
+IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
+			    struct nlattr *tb[], u32 flags)
 {
 	u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM;
 	u8 hbits;
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index ba4232e451f4..ec6f6d15dded 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -39,6 +39,7 @@ struct set_adt_elem {
 struct list_set {
 	u32 size;		/* size of set list array */
 	struct timer_list gc;	/* garbage collection */
+	struct net *net;	/* namespace */
 	struct set_elem members[0]; /* the set members */
 };
 
@@ -171,7 +172,7 @@ list_set_add(struct ip_set *set, u32 i, struct set_adt_elem *d,
 	if (e->id != IPSET_INVALID_ID) {
 		if (i == map->size - 1) {
 			/* Last element replaced: e.g. add new,before,last */
-			ip_set_put_byindex(e->id);
+			ip_set_put_byindex(map->net, e->id);
 			ip_set_ext_destroy(set, e);
 		} else {
 			struct set_elem *x = list_set_elem(set, map,
@@ -179,7 +180,7 @@ list_set_add(struct ip_set *set, u32 i, struct set_adt_elem *d,
 
 			/* Last element pushed off */
 			if (x->id != IPSET_INVALID_ID) {
-				ip_set_put_byindex(x->id);
+				ip_set_put_byindex(map->net, x->id);
 				ip_set_ext_destroy(set, x);
 			}
 			memmove(list_set_elem(set, map, i + 1), e,
@@ -205,7 +206,7 @@ list_set_del(struct ip_set *set, u32 i)
 	struct list_set *map = set->data;
 	struct set_elem *e = list_set_elem(set, map, i);
 
-	ip_set_put_byindex(e->id);
+	ip_set_put_byindex(map->net, e->id);
 	ip_set_ext_destroy(set, e);
 
 	if (i < map->size - 1)
@@ -307,7 +308,7 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 		if (SET_WITH_COMMENT(set))
 			ip_set_init_comment(ext_comment(e, set), ext);
 		/* Set is already added to the list */
-		ip_set_put_byindex(d->id);
+		ip_set_put_byindex(map->net, d->id);
 		return 0;
 	}
 insert:
@@ -366,6 +367,7 @@ static int
 list_set_uadt(struct ip_set *set, struct nlattr *tb[],
 	      enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
 {
+	struct list_set *map = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct set_adt_elem e = { .refid = IPSET_INVALID_ID };
 	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
@@ -385,7 +387,7 @@ list_set_uadt(struct ip_set *set, struct nlattr *tb[],
 	ret = ip_set_get_extensions(set, tb, &ext);
 	if (ret)
 		return ret;
-	e.id = ip_set_get_byname(nla_data(tb[IPSET_ATTR_NAME]), &s);
+	e.id = ip_set_get_byname(map->net, nla_data(tb[IPSET_ATTR_NAME]), &s);
 	if (e.id == IPSET_INVALID_ID)
 		return -IPSET_ERR_NAME;
 	/* "Loop detection" */
@@ -405,7 +407,8 @@ list_set_uadt(struct ip_set *set, struct nlattr *tb[],
 	}
 
 	if (tb[IPSET_ATTR_NAMEREF]) {
-		e.refid = ip_set_get_byname(nla_data(tb[IPSET_ATTR_NAMEREF]),
+		e.refid = ip_set_get_byname(map->net,
+					    nla_data(tb[IPSET_ATTR_NAMEREF]),
 					    &s);
 		if (e.refid == IPSET_INVALID_ID) {
 			ret = -IPSET_ERR_NAMEREF;
@@ -421,9 +424,9 @@ list_set_uadt(struct ip_set *set, struct nlattr *tb[],
 
 finish:
 	if (e.refid != IPSET_INVALID_ID)
-		ip_set_put_byindex(e.refid);
+		ip_set_put_byindex(map->net, e.refid);
 	if (adt != IPSET_ADD || ret)
-		ip_set_put_byindex(e.id);
+		ip_set_put_byindex(map->net, e.id);
 
 	return ip_set_eexist(ret, flags) ? 0 : ret;
 }
@@ -438,7 +441,7 @@ list_set_flush(struct ip_set *set)
 	for (i = 0; i < map->size; i++) {
 		e = list_set_elem(set, map, i);
 		if (e->id != IPSET_INVALID_ID) {
-			ip_set_put_byindex(e->id);
+			ip_set_put_byindex(map->net, e->id);
 			ip_set_ext_destroy(set, e);
 			e->id = IPSET_INVALID_ID;
 		}
@@ -510,7 +513,7 @@ list_set_list(const struct ip_set *set,
 				goto nla_put_failure;
 		}
 		if (nla_put_string(skb, IPSET_ATTR_NAME,
-				   ip_set_name_byindex(e->id)))
+				   ip_set_name_byindex(map->net, e->id)))
 			goto nla_put_failure;
 		if (ip_set_put_extensions(skb, set, e, true))
 			goto nla_put_failure;
@@ -587,7 +590,7 @@ list_set_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set))
 /* Create list:set type of sets */
 
 static bool
-init_list_set(struct ip_set *set, u32 size)
+init_list_set(struct net *net, struct ip_set *set, u32 size)
 {
 	struct list_set *map;
 	struct set_elem *e;
@@ -598,6 +601,7 @@ init_list_set(struct ip_set *set, u32 size)
 		return false;
 
 	map->size = size;
+	map->net = net;
 	set->data = map;
 
 	for (i = 0; i < size; i++) {
@@ -609,7 +613,8 @@ init_list_set(struct ip_set *set, u32 size)
 }
 
 static int
-list_set_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
+list_set_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
+		u32 flags)
 {
 	u32 size = IP_SET_LIST_DEFAULT_SIZE;
 
@@ -625,7 +630,7 @@ list_set_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
 
 	set->variant = &set_variant;
 	set->dsize = ip_set_elem_len(set, tb, sizeof(struct set_elem));
-	if (!init_list_set(set, size))
+	if (!init_list_set(net, set, size))
 		return -ENOMEM;
 	if (tb[IPSET_ATTR_TIMEOUT]) {
 		set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c
index 2095488684ff..e7c4e0e01ff5 100644
--- a/net/netfilter/xt_set.c
+++ b/net/netfilter/xt_set.c
@@ -81,7 +81,7 @@ set_match_v0_checkentry(const struct xt_mtchk_param *par)
 	struct xt_set_info_match_v0 *info = par->matchinfo;
 	ip_set_id_t index;
 
-	index = ip_set_nfnl_get_byindex(info->match_set.index);
+	index = ip_set_nfnl_get_byindex(par->net, info->match_set.index);
 
 	if (index == IPSET_INVALID_ID) {
 		pr_warning("Cannot find set indentified by id %u to match\n",
@@ -91,7 +91,7 @@ set_match_v0_checkentry(const struct xt_mtchk_param *par)
 	if (info->match_set.u.flags[IPSET_DIM_MAX-1] != 0) {
 		pr_warning("Protocol error: set match dimension "
 			   "is over the limit!\n");
-		ip_set_nfnl_put(info->match_set.index);
+		ip_set_nfnl_put(par->net, info->match_set.index);
 		return -ERANGE;
 	}
 
@@ -106,7 +106,7 @@ set_match_v0_destroy(const struct xt_mtdtor_param *par)
 {
 	struct xt_set_info_match_v0 *info = par->matchinfo;
 
-	ip_set_nfnl_put(info->match_set.index);
+	ip_set_nfnl_put(par->net, info->match_set.index);
 }
 
 /* Revision 1 match */
@@ -131,7 +131,7 @@ set_match_v1_checkentry(const struct xt_mtchk_param *par)
 	struct xt_set_info_match_v1 *info = par->matchinfo;
 	ip_set_id_t index;
 
-	index = ip_set_nfnl_get_byindex(info->match_set.index);
+	index = ip_set_nfnl_get_byindex(par->net, info->match_set.index);
 
 	if (index == IPSET_INVALID_ID) {
 		pr_warning("Cannot find set indentified by id %u to match\n",
@@ -141,7 +141,7 @@ set_match_v1_checkentry(const struct xt_mtchk_param *par)
 	if (info->match_set.dim > IPSET_DIM_MAX) {
 		pr_warning("Protocol error: set match dimension "
 			   "is over the limit!\n");
-		ip_set_nfnl_put(info->match_set.index);
+		ip_set_nfnl_put(par->net, info->match_set.index);
 		return -ERANGE;
 	}
 
@@ -153,7 +153,7 @@ set_match_v1_destroy(const struct xt_mtdtor_param *par)
 {
 	struct xt_set_info_match_v1 *info = par->matchinfo;
 
-	ip_set_nfnl_put(info->match_set.index);
+	ip_set_nfnl_put(par->net, info->match_set.index);
 }
 
 /* Revision 3 match */
@@ -228,7 +228,7 @@ set_target_v0_checkentry(const struct xt_tgchk_param *par)
 	ip_set_id_t index;
 
 	if (info->add_set.index != IPSET_INVALID_ID) {
-		index = ip_set_nfnl_get_byindex(info->add_set.index);
+		index = ip_set_nfnl_get_byindex(par->net, info->add_set.index);
 		if (index == IPSET_INVALID_ID) {
 			pr_warning("Cannot find add_set index %u as target\n",
 				   info->add_set.index);
@@ -237,12 +237,12 @@ set_target_v0_checkentry(const struct xt_tgchk_param *par)
 	}
 
 	if (info->del_set.index != IPSET_INVALID_ID) {
-		index = ip_set_nfnl_get_byindex(info->del_set.index);
+		index = ip_set_nfnl_get_byindex(par->net, info->del_set.index);
 		if (index == IPSET_INVALID_ID) {
 			pr_warning("Cannot find del_set index %u as target\n",
 				   info->del_set.index);
 			if (info->add_set.index != IPSET_INVALID_ID)
-				ip_set_nfnl_put(info->add_set.index);
+				ip_set_nfnl_put(par->net, info->add_set.index);
 			return -ENOENT;
 		}
 	}
@@ -251,9 +251,9 @@ set_target_v0_checkentry(const struct xt_tgchk_param *par)
 		pr_warning("Protocol error: SET target dimension "
 			   "is over the limit!\n");
 		if (info->add_set.index != IPSET_INVALID_ID)
-			ip_set_nfnl_put(info->add_set.index);
+			ip_set_nfnl_put(par->net, info->add_set.index);
 		if (info->del_set.index != IPSET_INVALID_ID)
-			ip_set_nfnl_put(info->del_set.index);
+			ip_set_nfnl_put(par->net, info->del_set.index);
 		return -ERANGE;
 	}
 
@@ -270,9 +270,9 @@ set_target_v0_destroy(const struct xt_tgdtor_param *par)
 	const struct xt_set_info_target_v0 *info = par->targinfo;
 
 	if (info->add_set.index != IPSET_INVALID_ID)
-		ip_set_nfnl_put(info->add_set.index);
+		ip_set_nfnl_put(par->net, info->add_set.index);
 	if (info->del_set.index != IPSET_INVALID_ID)
-		ip_set_nfnl_put(info->del_set.index);
+		ip_set_nfnl_put(par->net, info->del_set.index);
 }
 
 /* Revision 1 target */
@@ -301,7 +301,7 @@ set_target_v1_checkentry(const struct xt_tgchk_param *par)
 	ip_set_id_t index;
 
 	if (info->add_set.index != IPSET_INVALID_ID) {
-		index = ip_set_nfnl_get_byindex(info->add_set.index);
+		index = ip_set_nfnl_get_byindex(par->net, info->add_set.index);
 		if (index == IPSET_INVALID_ID) {
 			pr_warning("Cannot find add_set index %u as target\n",
 				   info->add_set.index);
@@ -310,12 +310,12 @@ set_target_v1_checkentry(const struct xt_tgchk_param *par)
 	}
 
 	if (info->del_set.index != IPSET_INVALID_ID) {
-		index = ip_set_nfnl_get_byindex(info->del_set.index);
+		index = ip_set_nfnl_get_byindex(par->net, info->del_set.index);
 		if (index == IPSET_INVALID_ID) {
 			pr_warning("Cannot find del_set index %u as target\n",
 				   info->del_set.index);
 			if (info->add_set.index != IPSET_INVALID_ID)
-				ip_set_nfnl_put(info->add_set.index);
+				ip_set_nfnl_put(par->net, info->add_set.index);
 			return -ENOENT;
 		}
 	}
@@ -324,9 +324,9 @@ set_target_v1_checkentry(const struct xt_tgchk_param *par)
 		pr_warning("Protocol error: SET target dimension "
 			   "is over the limit!\n");
 		if (info->add_set.index != IPSET_INVALID_ID)
-			ip_set_nfnl_put(info->add_set.index);
+			ip_set_nfnl_put(par->net, info->add_set.index);
 		if (info->del_set.index != IPSET_INVALID_ID)
-			ip_set_nfnl_put(info->del_set.index);
+			ip_set_nfnl_put(par->net, info->del_set.index);
 		return -ERANGE;
 	}
 
@@ -339,9 +339,9 @@ set_target_v1_destroy(const struct xt_tgdtor_param *par)
 	const struct xt_set_info_target_v1 *info = par->targinfo;
 
 	if (info->add_set.index != IPSET_INVALID_ID)
-		ip_set_nfnl_put(info->add_set.index);
+		ip_set_nfnl_put(par->net, info->add_set.index);
 	if (info->del_set.index != IPSET_INVALID_ID)
-		ip_set_nfnl_put(info->del_set.index);
+		ip_set_nfnl_put(par->net, info->del_set.index);
 }
 
 /* Revision 2 target */
diff --git a/net/sched/em_ipset.c b/net/sched/em_ipset.c
index 938b7cbf5627..1ac41d3de5c3 100644
--- a/net/sched/em_ipset.c
+++ b/net/sched/em_ipset.c
@@ -24,11 +24,12 @@ static int em_ipset_change(struct tcf_proto *tp, void *data, int data_len,
 {
 	struct xt_set_info *set = data;
 	ip_set_id_t index;
+	struct net *net = qdisc_dev(tp->q)->nd_net;
 
 	if (data_len != sizeof(*set))
 		return -EINVAL;
 
-	index = ip_set_nfnl_get_byindex(set->index);
+	index = ip_set_nfnl_get_byindex(net, set->index);
 	if (index == IPSET_INVALID_ID)
 		return -ENOENT;
 
@@ -37,7 +38,7 @@ static int em_ipset_change(struct tcf_proto *tp, void *data, int data_len,
 	if (em->data)
 		return 0;
 
-	ip_set_nfnl_put(index);
+	ip_set_nfnl_put(net, index);
 	return -ENOMEM;
 }
 
@@ -45,7 +46,7 @@ static void em_ipset_destroy(struct tcf_proto *p, struct tcf_ematch *em)
 {
 	const struct xt_set_info *set = (const void *) em->data;
 	if (set) {
-		ip_set_nfnl_put(set->index);
+		ip_set_nfnl_put(qdisc_dev(p->q)->nd_net, set->index);
 		kfree((void *) em->data);
 	}
 }
-- 
cgit v1.2.3


From 36a8f39e05ccc308a5619a7edb5ad6e15ee82ff6 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sun, 29 Sep 2013 01:21:32 -0700
Subject: net: skb_is_gso_v6() requires skb_is_gso()

bnx2x makes a dangerous use of skb_is_gso_v6().

It should first make sure skb is a gso packet

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Eilon Greenstein <eilong@broadcom.com>
Acked-by: Dmitry Kravkov <dmitry@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | 18 ++++++++++--------
 include/linux/skbuff.h                          |  1 +
 2 files changed, 11 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index 61726af1de6e..0c7fb1ed1261 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -3256,14 +3256,16 @@ static u32 bnx2x_xmit_type(struct bnx2x *bp, struct sk_buff *skb)
 	if (prot == IPPROTO_TCP)
 		rc |= XMIT_CSUM_TCP;
 
-	if (skb_is_gso_v6(skb)) {
-		rc |= (XMIT_GSO_V6 | XMIT_CSUM_TCP);
-		if (rc & XMIT_CSUM_ENC)
-			rc |= XMIT_GSO_ENC_V6;
-	} else if (skb_is_gso(skb)) {
-		rc |= (XMIT_GSO_V4 | XMIT_CSUM_TCP);
-		if (rc & XMIT_CSUM_ENC)
-			rc |= XMIT_GSO_ENC_V4;
+	if (skb_is_gso(skb)) {
+		if (skb_is_gso_v6(skb)) {
+			rc |= (XMIT_GSO_V6 | XMIT_CSUM_TCP);
+			if (rc & XMIT_CSUM_ENC)
+				rc |= XMIT_GSO_ENC_V6;
+		} else {
+			rc |= (XMIT_GSO_V4 | XMIT_CSUM_TCP);
+			if (rc & XMIT_CSUM_ENC)
+				rc |= XMIT_GSO_ENC_V4;
+		}
 	}
 
 	return rc;
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 6d56840e561e..d72d71efa7a3 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2755,6 +2755,7 @@ static inline bool skb_is_gso(const struct sk_buff *skb)
 	return skb_shinfo(skb)->gso_size;
 }
 
+/* Note: Should be called only if skb_is_gso(skb) is true */
 static inline bool skb_is_gso_v6(const struct sk_buff *skb)
 {
 	return skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6;
-- 
cgit v1.2.3


From 5eb7906b47dcd906b3ffd811e689e0de4a6b1b6a Mon Sep 17 00:00:00 2001
From: Eliad Peller <eliad@wizery.com>
Date: Thu, 29 Aug 2013 15:03:14 +0300
Subject: ieee80211: fix vht cap definitions

VHT_CAP_BEAMFORMER_ANTENNAS cap is actually defined in the draft as
VHT_CAP_BEAMFORMEE_STS_MAX, and its size is 3 bits long.

VHT_CAP_SOUNDING_DIMENSIONS is also 3 bits long.

Fix the definitions and change the cap masking accordingly.

Signed-off-by: Eliad Peller <eliad@wizery.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 4 ++--
 net/mac80211/vht.c        | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index a5b598a79bec..7c1e1ebc0e23 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1391,8 +1391,8 @@ struct ieee80211_vht_operation {
 #define IEEE80211_VHT_CAP_RXSTBC_MASK				0x00000700
 #define IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE			0x00000800
 #define IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE			0x00001000
-#define IEEE80211_VHT_CAP_BEAMFORMER_ANTENNAS_MAX		0x00006000
-#define IEEE80211_VHT_CAP_SOUNDING_DIMENSIONS_MAX		0x00030000
+#define IEEE80211_VHT_CAP_BEAMFORMEE_STS_MAX			0x0000e000
+#define IEEE80211_VHT_CAP_SOUNDING_DIMENSIONS_MAX		0x00070000
 #define IEEE80211_VHT_CAP_MU_BEAMFORMER_CAPABLE			0x00080000
 #define IEEE80211_VHT_CAP_MU_BEAMFORMEE_CAPABLE			0x00100000
 #define IEEE80211_VHT_CAP_VHT_TXOP_PS				0x00200000
diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c
index 97c289414e32..de0112785aae 100644
--- a/net/mac80211/vht.c
+++ b/net/mac80211/vht.c
@@ -185,13 +185,13 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata,
 	if (own_cap.cap & IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE) {
 		vht_cap->cap |= cap_info &
 				(IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE |
-				 IEEE80211_VHT_CAP_BEAMFORMER_ANTENNAS_MAX |
 				 IEEE80211_VHT_CAP_SOUNDING_DIMENSIONS_MAX);
 	}
 
 	if (own_cap.cap & IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE)
 		vht_cap->cap |= cap_info &
-				IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE;
+				(IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE |
+				 IEEE80211_VHT_CAP_BEAMFORMEE_STS_MAX);
 
 	if (own_cap.cap & IEEE80211_VHT_CAP_MU_BEAMFORMER_CAPABLE)
 		vht_cap->cap |= cap_info &
-- 
cgit v1.2.3


From 180cf72f56fab2810e00497c087c7126bfe53c85 Mon Sep 17 00:00:00 2001
From: "holger@eitzenberger.org" <holger@eitzenberger.org>
Date: Mon, 30 Sep 2013 17:07:28 +0200
Subject: netfilter: nf_ct_sip: consolidate NAT hook functions

There are currently seven different NAT hooks used in both
nf_conntrack_sip and nf_nat_sip, each of the hooks is exported in
nf_conntrack_sip, then set from the nf_nat_sip NAT helper.

And because each of them is exported there is quite some overhead
introduced due of this.

By introducing nf_nat_sip_hooks I am able to reduce both text/data
somewhat.  For nf_conntrack_sip e. g. I get

        text             data              bss              dec
old    15243             5256               32            20531
new    15010             5192               32            20234

Signed-off-by: Holger Eitzenberger <holger@eitzenberger.org>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/nf_conntrack_sip.h | 107 +++++++++++++-----------
 net/netfilter/nf_conntrack_sip.c           | 127 ++++++++---------------------
 net/netfilter/nf_nat_sip.c                 |  35 ++++----
 3 files changed, 107 insertions(+), 162 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/nf_conntrack_sip.h b/include/linux/netfilter/nf_conntrack_sip.h
index ba7f571a2b1c..4cb71551f611 100644
--- a/include/linux/netfilter/nf_conntrack_sip.h
+++ b/include/linux/netfilter/nf_conntrack_sip.h
@@ -107,55 +107,64 @@ enum sdp_header_types {
 	SDP_HDR_MEDIA,
 };
 
-extern unsigned int (*nf_nat_sip_hook)(struct sk_buff *skb,
-				       unsigned int protoff,
-				       unsigned int dataoff,
-				       const char **dptr,
-				       unsigned int *datalen);
-extern void (*nf_nat_sip_seq_adjust_hook)(struct sk_buff *skb,
-					  unsigned int protoff, s16 off);
-extern unsigned int (*nf_nat_sip_expect_hook)(struct sk_buff *skb,
-					      unsigned int protoff,
-					      unsigned int dataoff,
-					      const char **dptr,
-					      unsigned int *datalen,
-					      struct nf_conntrack_expect *exp,
-					      unsigned int matchoff,
-					      unsigned int matchlen);
-extern unsigned int (*nf_nat_sdp_addr_hook)(struct sk_buff *skb,
-					    unsigned int protoff,
-					    unsigned int dataoff,
-					    const char **dptr,
-					    unsigned int *datalen,
-					    unsigned int sdpoff,
-					    enum sdp_header_types type,
-					    enum sdp_header_types term,
-					    const union nf_inet_addr *addr);
-extern unsigned int (*nf_nat_sdp_port_hook)(struct sk_buff *skb,
-					    unsigned int protoff,
-					    unsigned int dataoff,
-					    const char **dptr,
-					    unsigned int *datalen,
-					    unsigned int matchoff,
-					    unsigned int matchlen,
-					    u_int16_t port);
-extern unsigned int (*nf_nat_sdp_session_hook)(struct sk_buff *skb,
-					       unsigned int protoff,
-					       unsigned int dataoff,
-					       const char **dptr,
-					       unsigned int *datalen,
-					       unsigned int sdpoff,
-					       const union nf_inet_addr *addr);
-extern unsigned int (*nf_nat_sdp_media_hook)(struct sk_buff *skb,
-					     unsigned int protoff,
-					     unsigned int dataoff,
-					     const char **dptr,
-					     unsigned int *datalen,
-					     struct nf_conntrack_expect *rtp_exp,
-					     struct nf_conntrack_expect *rtcp_exp,
-					     unsigned int mediaoff,
-					     unsigned int medialen,
-					     union nf_inet_addr *rtp_addr);
+struct nf_nat_sip_hooks {
+	unsigned int (*msg)(struct sk_buff *skb,
+			    unsigned int protoff,
+			    unsigned int dataoff,
+			    const char **dptr,
+			    unsigned int *datalen);
+
+	void (*seq_adjust)(struct sk_buff *skb,
+			   unsigned int protoff, s16 off);
+
+	unsigned int (*expect)(struct sk_buff *skb,
+			       unsigned int protoff,
+			       unsigned int dataoff,
+			       const char **dptr,
+			       unsigned int *datalen,
+			       struct nf_conntrack_expect *exp,
+			       unsigned int matchoff,
+			       unsigned int matchlen);
+
+	unsigned int (*sdp_addr)(struct sk_buff *skb,
+				 unsigned int protoff,
+				 unsigned int dataoff,
+				 const char **dptr,
+				 unsigned int *datalen,
+				 unsigned int sdpoff,
+				 enum sdp_header_types type,
+				 enum sdp_header_types term,
+				 const union nf_inet_addr *addr);
+
+	unsigned int (*sdp_port)(struct sk_buff *skb,
+				 unsigned int protoff,
+				 unsigned int dataoff,
+				 const char **dptr,
+				 unsigned int *datalen,
+				 unsigned int matchoff,
+				 unsigned int matchlen,
+				 u_int16_t port);
+
+	unsigned int (*sdp_session)(struct sk_buff *skb,
+				    unsigned int protoff,
+				    unsigned int dataoff,
+				    const char **dptr,
+				    unsigned int *datalen,
+				    unsigned int sdpoff,
+				    const union nf_inet_addr *addr);
+
+	unsigned int (*sdp_media)(struct sk_buff *skb,
+				  unsigned int protoff,
+				  unsigned int dataoff,
+				  const char **dptr,
+				  unsigned int *datalen,
+				  struct nf_conntrack_expect *rtp_exp,
+				  struct nf_conntrack_expect *rtcp_exp,
+				  unsigned int mediaoff,
+				  unsigned int medialen,
+				  union nf_inet_addr *rtp_addr);
+};
+extern const struct nf_nat_sip_hooks *nf_nat_sip_hooks;
 
 extern int ct_sip_parse_request(const struct nf_conn *ct,
 				const char *dptr, unsigned int datalen,
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index 5ed8c441dffd..466410eaa482 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -52,66 +52,8 @@ module_param(sip_direct_media, int, 0600);
 MODULE_PARM_DESC(sip_direct_media, "Expect Media streams between signalling "
 				   "endpoints only (default 1)");
 
-unsigned int (*nf_nat_sip_hook)(struct sk_buff *skb, unsigned int protoff,
-				unsigned int dataoff, const char **dptr,
-				unsigned int *datalen) __read_mostly;
-EXPORT_SYMBOL_GPL(nf_nat_sip_hook);
-
-void (*nf_nat_sip_seq_adjust_hook)(struct sk_buff *skb, unsigned int protoff,
-				   s16 off) __read_mostly;
-EXPORT_SYMBOL_GPL(nf_nat_sip_seq_adjust_hook);
-
-unsigned int (*nf_nat_sip_expect_hook)(struct sk_buff *skb,
-				       unsigned int protoff,
-				       unsigned int dataoff,
-				       const char **dptr,
-				       unsigned int *datalen,
-				       struct nf_conntrack_expect *exp,
-				       unsigned int matchoff,
-				       unsigned int matchlen) __read_mostly;
-EXPORT_SYMBOL_GPL(nf_nat_sip_expect_hook);
-
-unsigned int (*nf_nat_sdp_addr_hook)(struct sk_buff *skb, unsigned int protoff,
-				     unsigned int dataoff,
-				     const char **dptr,
-				     unsigned int *datalen,
-				     unsigned int sdpoff,
-				     enum sdp_header_types type,
-				     enum sdp_header_types term,
-				     const union nf_inet_addr *addr)
-				     __read_mostly;
-EXPORT_SYMBOL_GPL(nf_nat_sdp_addr_hook);
-
-unsigned int (*nf_nat_sdp_port_hook)(struct sk_buff *skb, unsigned int protoff,
-				     unsigned int dataoff,
-				     const char **dptr,
-				     unsigned int *datalen,
-				     unsigned int matchoff,
-				     unsigned int matchlen,
-				     u_int16_t port) __read_mostly;
-EXPORT_SYMBOL_GPL(nf_nat_sdp_port_hook);
-
-unsigned int (*nf_nat_sdp_session_hook)(struct sk_buff *skb,
-					unsigned int protoff,
-					unsigned int dataoff,
-					const char **dptr,
-					unsigned int *datalen,
-					unsigned int sdpoff,
-					const union nf_inet_addr *addr)
-					__read_mostly;
-EXPORT_SYMBOL_GPL(nf_nat_sdp_session_hook);
-
-unsigned int (*nf_nat_sdp_media_hook)(struct sk_buff *skb, unsigned int protoff,
-				      unsigned int dataoff,
-				      const char **dptr,
-				      unsigned int *datalen,
-				      struct nf_conntrack_expect *rtp_exp,
-				      struct nf_conntrack_expect *rtcp_exp,
-				      unsigned int mediaoff,
-				      unsigned int medialen,
-				      union nf_inet_addr *rtp_addr)
-				      __read_mostly;
-EXPORT_SYMBOL_GPL(nf_nat_sdp_media_hook);
+const struct nf_nat_sip_hooks *nf_nat_sip_hooks;
+EXPORT_SYMBOL_GPL(nf_nat_sip_hooks);
 
 static int string_len(const struct nf_conn *ct, const char *dptr,
 		      const char *limit, int *shift)
@@ -914,8 +856,7 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff,
 	int direct_rtp = 0, skip_expect = 0, ret = NF_DROP;
 	u_int16_t base_port;
 	__be16 rtp_port, rtcp_port;
-	typeof(nf_nat_sdp_port_hook) nf_nat_sdp_port;
-	typeof(nf_nat_sdp_media_hook) nf_nat_sdp_media;
+	const struct nf_nat_sip_hooks *hooks;
 
 	saddr = NULL;
 	if (sip_direct_media) {
@@ -972,9 +913,9 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff,
 	rtcp_port = htons(base_port + 1);
 
 	if (direct_rtp) {
-		nf_nat_sdp_port = rcu_dereference(nf_nat_sdp_port_hook);
-		if (nf_nat_sdp_port &&
-		    !nf_nat_sdp_port(skb, protoff, dataoff, dptr, datalen,
+		hooks = rcu_dereference(nf_nat_sip_hooks);
+		if (hooks &&
+		    !hooks->sdp_port(skb, protoff, dataoff, dptr, datalen,
 				     mediaoff, medialen, ntohs(rtp_port)))
 			goto err1;
 	}
@@ -996,10 +937,10 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff,
 	nf_ct_expect_init(rtcp_exp, class, nf_ct_l3num(ct), saddr, daddr,
 			  IPPROTO_UDP, NULL, &rtcp_port);
 
-	nf_nat_sdp_media = rcu_dereference(nf_nat_sdp_media_hook);
-	if (nf_nat_sdp_media && ct->status & IPS_NAT_MASK && !direct_rtp)
-		ret = nf_nat_sdp_media(skb, protoff, dataoff, dptr, datalen,
-				       rtp_exp, rtcp_exp,
+	hooks = rcu_dereference(nf_nat_sip_hooks);
+	if (hooks && ct->status & IPS_NAT_MASK && !direct_rtp)
+		ret = hooks->sdp_media(skb, protoff, dataoff, dptr,
+				       datalen, rtp_exp, rtcp_exp,
 				       mediaoff, medialen, daddr);
 	else {
 		if (nf_ct_expect_related(rtp_exp) == 0) {
@@ -1053,13 +994,12 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff,
 	unsigned int caddr_len, maddr_len;
 	unsigned int i;
 	union nf_inet_addr caddr, maddr, rtp_addr;
+	const struct nf_nat_sip_hooks *hooks;
 	unsigned int port;
 	const struct sdp_media_type *t;
 	int ret = NF_ACCEPT;
-	typeof(nf_nat_sdp_addr_hook) nf_nat_sdp_addr;
-	typeof(nf_nat_sdp_session_hook) nf_nat_sdp_session;
 
-	nf_nat_sdp_addr = rcu_dereference(nf_nat_sdp_addr_hook);
+	hooks = rcu_dereference(nf_nat_sip_hooks);
 
 	/* Find beginning of session description */
 	if (ct_sip_get_sdp_header(ct, *dptr, 0, *datalen,
@@ -1127,10 +1067,11 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff,
 		}
 
 		/* Update media connection address if present */
-		if (maddr_len && nf_nat_sdp_addr && ct->status & IPS_NAT_MASK) {
-			ret = nf_nat_sdp_addr(skb, protoff, dataoff,
+		if (maddr_len && hooks && ct->status & IPS_NAT_MASK) {
+			ret = hooks->sdp_addr(skb, protoff, dataoff,
 					      dptr, datalen, mediaoff,
-					      SDP_HDR_CONNECTION, SDP_HDR_MEDIA,
+					      SDP_HDR_CONNECTION,
+					      SDP_HDR_MEDIA,
 					      &rtp_addr);
 			if (ret != NF_ACCEPT) {
 				nf_ct_helper_log(skb, ct, "cannot mangle SDP");
@@ -1141,10 +1082,11 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff,
 	}
 
 	/* Update session connection and owner addresses */
-	nf_nat_sdp_session = rcu_dereference(nf_nat_sdp_session_hook);
-	if (nf_nat_sdp_session && ct->status & IPS_NAT_MASK)
-		ret = nf_nat_sdp_session(skb, protoff, dataoff,
-					 dptr, datalen, sdpoff, &rtp_addr);
+	hooks = rcu_dereference(nf_nat_sip_hooks);
+	if (hooks && ct->status & IPS_NAT_MASK)
+		ret = hooks->sdp_session(skb, protoff, dataoff,
+					 dptr, datalen, sdpoff,
+					 &rtp_addr);
 
 	return ret;
 }
@@ -1244,11 +1186,11 @@ static int process_register_request(struct sk_buff *skb, unsigned int protoff,
 	unsigned int matchoff, matchlen;
 	struct nf_conntrack_expect *exp;
 	union nf_inet_addr *saddr, daddr;
+	const struct nf_nat_sip_hooks *hooks;
 	__be16 port;
 	u8 proto;
 	unsigned int expires = 0;
 	int ret;
-	typeof(nf_nat_sip_expect_hook) nf_nat_sip_expect;
 
 	/* Expected connections can not register again. */
 	if (ct->status & IPS_EXPECTED)
@@ -1311,10 +1253,10 @@ static int process_register_request(struct sk_buff *skb, unsigned int protoff,
 	exp->helper = nfct_help(ct)->helper;
 	exp->flags = NF_CT_EXPECT_PERMANENT | NF_CT_EXPECT_INACTIVE;
 
-	nf_nat_sip_expect = rcu_dereference(nf_nat_sip_expect_hook);
-	if (nf_nat_sip_expect && ct->status & IPS_NAT_MASK)
-		ret = nf_nat_sip_expect(skb, protoff, dataoff, dptr, datalen,
-					exp, matchoff, matchlen);
+	hooks = rcu_dereference(nf_nat_sip_hooks);
+	if (hooks && ct->status & IPS_NAT_MASK)
+		ret = hooks->expect(skb, protoff, dataoff, dptr, datalen,
+				    exp, matchoff, matchlen);
 	else {
 		if (nf_ct_expect_related(exp) != 0) {
 			nf_ct_helper_log(skb, ct, "cannot add expectation");
@@ -1517,7 +1459,7 @@ static int process_sip_msg(struct sk_buff *skb, struct nf_conn *ct,
 			   unsigned int protoff, unsigned int dataoff,
 			   const char **dptr, unsigned int *datalen)
 {
-	typeof(nf_nat_sip_hook) nf_nat_sip;
+	const struct nf_nat_sip_hooks *hooks;
 	int ret;
 
 	if (strnicmp(*dptr, "SIP/2.0 ", strlen("SIP/2.0 ")) != 0)
@@ -1526,9 +1468,9 @@ static int process_sip_msg(struct sk_buff *skb, struct nf_conn *ct,
 		ret = process_sip_response(skb, protoff, dataoff, dptr, datalen);
 
 	if (ret == NF_ACCEPT && ct->status & IPS_NAT_MASK) {
-		nf_nat_sip = rcu_dereference(nf_nat_sip_hook);
-		if (nf_nat_sip && !nf_nat_sip(skb, protoff, dataoff,
-					      dptr, datalen)) {
+		hooks = rcu_dereference(nf_nat_sip_hooks);
+		if (hooks && !hooks->msg(skb, protoff, dataoff,
+					 dptr, datalen)) {
 			nf_ct_helper_log(skb, ct, "cannot NAT SIP message");
 			ret = NF_DROP;
 		}
@@ -1548,7 +1490,6 @@ static int sip_help_tcp(struct sk_buff *skb, unsigned int protoff,
 	s16 diff, tdiff = 0;
 	int ret = NF_ACCEPT;
 	bool term;
-	typeof(nf_nat_sip_seq_adjust_hook) nf_nat_sip_seq_adjust;
 
 	if (ctinfo != IP_CT_ESTABLISHED &&
 	    ctinfo != IP_CT_ESTABLISHED_REPLY)
@@ -1612,9 +1553,11 @@ static int sip_help_tcp(struct sk_buff *skb, unsigned int protoff,
 	}
 
 	if (ret == NF_ACCEPT && ct->status & IPS_NAT_MASK) {
-		nf_nat_sip_seq_adjust = rcu_dereference(nf_nat_sip_seq_adjust_hook);
-		if (nf_nat_sip_seq_adjust)
-			nf_nat_sip_seq_adjust(skb, protoff, tdiff);
+		const struct nf_nat_sip_hooks *hooks;
+
+		hooks = rcu_dereference(nf_nat_sip_hooks);
+		if (hooks)
+			hooks->seq_adjust(skb, protoff, tdiff);
 	}
 
 	return ret;
diff --git a/net/netfilter/nf_nat_sip.c b/net/netfilter/nf_nat_sip.c
index f9790405b7ff..b4d691db955e 100644
--- a/net/netfilter/nf_nat_sip.c
+++ b/net/netfilter/nf_nat_sip.c
@@ -625,33 +625,26 @@ static struct nf_ct_helper_expectfn sip_nat = {
 
 static void __exit nf_nat_sip_fini(void)
 {
-	RCU_INIT_POINTER(nf_nat_sip_hook, NULL);
-	RCU_INIT_POINTER(nf_nat_sip_seq_adjust_hook, NULL);
-	RCU_INIT_POINTER(nf_nat_sip_expect_hook, NULL);
-	RCU_INIT_POINTER(nf_nat_sdp_addr_hook, NULL);
-	RCU_INIT_POINTER(nf_nat_sdp_port_hook, NULL);
-	RCU_INIT_POINTER(nf_nat_sdp_session_hook, NULL);
-	RCU_INIT_POINTER(nf_nat_sdp_media_hook, NULL);
+	RCU_INIT_POINTER(nf_nat_sip_hooks, NULL);
+
 	nf_ct_helper_expectfn_unregister(&sip_nat);
 	synchronize_rcu();
 }
 
+static const struct nf_nat_sip_hooks sip_hooks = {
+	.msg		= nf_nat_sip,
+	.seq_adjust	= nf_nat_sip_seq_adjust,
+	.expect		= nf_nat_sip_expect,
+	.sdp_addr	= nf_nat_sdp_addr,
+	.sdp_port	= nf_nat_sdp_port,
+	.sdp_session	= nf_nat_sdp_session,
+	.sdp_media	= nf_nat_sdp_media,
+};
+
 static int __init nf_nat_sip_init(void)
 {
-	BUG_ON(nf_nat_sip_hook != NULL);
-	BUG_ON(nf_nat_sip_seq_adjust_hook != NULL);
-	BUG_ON(nf_nat_sip_expect_hook != NULL);
-	BUG_ON(nf_nat_sdp_addr_hook != NULL);
-	BUG_ON(nf_nat_sdp_port_hook != NULL);
-	BUG_ON(nf_nat_sdp_session_hook != NULL);
-	BUG_ON(nf_nat_sdp_media_hook != NULL);
-	RCU_INIT_POINTER(nf_nat_sip_hook, nf_nat_sip);
-	RCU_INIT_POINTER(nf_nat_sip_seq_adjust_hook, nf_nat_sip_seq_adjust);
-	RCU_INIT_POINTER(nf_nat_sip_expect_hook, nf_nat_sip_expect);
-	RCU_INIT_POINTER(nf_nat_sdp_addr_hook, nf_nat_sdp_addr);
-	RCU_INIT_POINTER(nf_nat_sdp_port_hook, nf_nat_sdp_port);
-	RCU_INIT_POINTER(nf_nat_sdp_session_hook, nf_nat_sdp_session);
-	RCU_INIT_POINTER(nf_nat_sdp_media_hook, nf_nat_sdp_media);
+	BUG_ON(nf_nat_sip_hooks != NULL);
+	RCU_INIT_POINTER(nf_nat_sip_hooks, &sip_hooks);
 	nf_ct_helper_expectfn_register(&sip_nat);
 	return 0;
 }
-- 
cgit v1.2.3


From 7d65f4a6553203da6a22097821d151fbbe7e4956 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Thu, 5 Sep 2013 15:49:45 +0200
Subject: irq: Consolidate do_softirq() arch overriden implementations

All arch overriden implementations of do_softirq() share the following
common code: disable irqs (to avoid races with the pending check),
check if there are softirqs pending, then execute __do_softirq() on
a specific stack.

Consolidate the common parts such that archs only worry about the
stack switch.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@au1.ibm.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul Mackerras <paulus@au1.ibm.com>
Cc: James Hogan <james.hogan@imgtec.com>
Cc: James E.J. Bottomley <jejb@parisc-linux.org>
Cc: Helge Deller <deller@gmx.de>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Andrew Morton <akpm@linux-foundation.org>
---
 arch/metag/kernel/irq.c    | 52 ++++++++++++++++--------------------------
 arch/parisc/kernel/irq.c   | 17 ++------------
 arch/powerpc/kernel/irq.c  | 17 +-------------
 arch/s390/kernel/irq.c     | 52 +++++++++++++++++-------------------------
 arch/sh/kernel/irq.c       | 57 +++++++++++++++++-----------------------------
 arch/sparc/kernel/irq_64.c | 31 ++++++++-----------------
 arch/x86/kernel/entry_64.S |  4 ++--
 arch/x86/kernel/irq_32.c   | 30 +++++++-----------------
 arch/x86/kernel/irq_64.c   | 21 -----------------
 include/linux/interrupt.h  | 11 +++++++++
 kernel/softirq.c           |  8 +++----
 11 files changed, 98 insertions(+), 202 deletions(-)

(limited to 'include/linux')

diff --git a/arch/metag/kernel/irq.c b/arch/metag/kernel/irq.c
index 2a2c9d55187e..3b4b7f6c0950 100644
--- a/arch/metag/kernel/irq.c
+++ b/arch/metag/kernel/irq.c
@@ -159,44 +159,30 @@ void irq_ctx_exit(int cpu)
 
 extern asmlinkage void __do_softirq(void);
 
-asmlinkage void do_softirq(void)
+void do_softirq_own_stack(void)
 {
-	unsigned long flags;
 	struct thread_info *curctx;
 	union irq_ctx *irqctx;
 	u32 *isp;
 
-	if (in_interrupt())
-		return;
-
-	local_irq_save(flags);
-
-	if (local_softirq_pending()) {
-		curctx = current_thread_info();
-		irqctx = softirq_ctx[smp_processor_id()];
-		irqctx->tinfo.task = curctx->task;
-
-		/* build the stack frame on the softirq stack */
-		isp = (u32 *) ((char *)irqctx + sizeof(struct thread_info));
-
-		asm volatile (
-			"MOV   D0.5,%0\n"
-			"SWAP  A0StP,D0.5\n"
-			"CALLR D1RtP,___do_softirq\n"
-			"MOV   A0StP,D0.5\n"
-			:
-			: "r" (isp)
-			: "memory", "cc", "D1Ar1", "D0Ar2", "D1Ar3", "D0Ar4",
-			  "D1Ar5", "D0Ar6", "D0Re0", "D1Re0", "D0.4", "D1RtP",
-			  "D0.5"
-			);
-		/*
-		 * Shouldn't happen, we returned above if in_interrupt():
-		 */
-		WARN_ON_ONCE(softirq_count());
-	}
-
-	local_irq_restore(flags);
+	curctx = current_thread_info();
+	irqctx = softirq_ctx[smp_processor_id()];
+	irqctx->tinfo.task = curctx->task;
+
+	/* build the stack frame on the softirq stack */
+	isp = (u32 *) ((char *)irqctx + sizeof(struct thread_info));
+
+	asm volatile (
+		"MOV   D0.5,%0\n"
+		"SWAP  A0StP,D0.5\n"
+		"CALLR D1RtP,___do_softirq\n"
+		"MOV   A0StP,D0.5\n"
+		:
+		: "r" (isp)
+		: "memory", "cc", "D1Ar1", "D0Ar2", "D1Ar3", "D0Ar4",
+		  "D1Ar5", "D0Ar6", "D0Re0", "D1Re0", "D0.4", "D1RtP",
+		  "D0.5"
+		);
 }
 #endif
 
diff --git a/arch/parisc/kernel/irq.c b/arch/parisc/kernel/irq.c
index 2e6443b1e922..ef5927685299 100644
--- a/arch/parisc/kernel/irq.c
+++ b/arch/parisc/kernel/irq.c
@@ -499,22 +499,9 @@ static void execute_on_irq_stack(void *func, unsigned long param1)
 	*irq_stack_in_use = 1;
 }
 
-asmlinkage void do_softirq(void)
+void do_softirq_own_stack(void)
 {
-	__u32 pending;
-	unsigned long flags;
-
-	if (in_interrupt())
-		return;
-
-	local_irq_save(flags);
-
-	pending = local_softirq_pending();
-
-	if (pending)
-		execute_on_irq_stack(__do_softirq, 0);
-
-	local_irq_restore(flags);
+	execute_on_irq_stack(__do_softirq, 0);
 }
 #endif /* CONFIG_IRQSTACKS */
 
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 57d286a78f86..5c4adfc6a6d0 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -593,7 +593,7 @@ void irq_ctx_init(void)
 	}
 }
 
-static inline void do_softirq_onstack(void)
+void do_softirq_own_stack(void)
 {
 	struct thread_info *curtp, *irqtp;
 
@@ -611,21 +611,6 @@ static inline void do_softirq_onstack(void)
 		set_bits(irqtp->flags, &curtp->flags);
 }
 
-void do_softirq(void)
-{
-	unsigned long flags;
-
-	if (in_interrupt())
-		return;
-
-	local_irq_save(flags);
-
-	if (local_softirq_pending())
-		do_softirq_onstack();
-
-	local_irq_restore(flags);
-}
-
 irq_hw_number_t virq_to_hw(unsigned int virq)
 {
 	struct irq_data *irq_data = irq_get_irq_data(virq);
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index 8ac2097f13d4..bb27a262c44a 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -157,39 +157,29 @@ int arch_show_interrupts(struct seq_file *p, int prec)
 /*
  * Switch to the asynchronous interrupt stack for softirq execution.
  */
-asmlinkage void do_softirq(void)
+void do_softirq_own_stack(void)
 {
-	unsigned long flags, old, new;
-
-	if (in_interrupt())
-		return;
-
-	local_irq_save(flags);
-
-	if (local_softirq_pending()) {
-		/* Get current stack pointer. */
-		asm volatile("la %0,0(15)" : "=a" (old));
-		/* Check against async. stack address range. */
-		new = S390_lowcore.async_stack;
-		if (((new - old) >> (PAGE_SHIFT + THREAD_ORDER)) != 0) {
-			/* Need to switch to the async. stack. */
-			new -= STACK_FRAME_OVERHEAD;
-			((struct stack_frame *) new)->back_chain = old;
-
-			asm volatile("   la    15,0(%0)\n"
-				     "   basr  14,%2\n"
-				     "   la    15,0(%1)\n"
-				     : : "a" (new), "a" (old),
-				         "a" (__do_softirq)
-				     : "0", "1", "2", "3", "4", "5", "14",
-				       "cc", "memory" );
-		} else {
-			/* We are already on the async stack. */
-			__do_softirq();
-		}
+	unsigned long old, new;
+
+	/* Get current stack pointer. */
+	asm volatile("la %0,0(15)" : "=a" (old));
+	/* Check against async. stack address range. */
+	new = S390_lowcore.async_stack;
+	if (((new - old) >> (PAGE_SHIFT + THREAD_ORDER)) != 0) {
+		/* Need to switch to the async. stack. */
+		new -= STACK_FRAME_OVERHEAD;
+		((struct stack_frame *) new)->back_chain = old;
+		asm volatile("   la    15,0(%0)\n"
+			     "   basr  14,%2\n"
+			     "   la    15,0(%1)\n"
+			     : : "a" (new), "a" (old),
+			         "a" (__do_softirq)
+			     : "0", "1", "2", "3", "4", "5", "14",
+			       "cc", "memory" );
+	} else {
+		/* We are already on the async stack. */
+		__do_softirq();
 	}
-
-	local_irq_restore(flags);
 }
 
 /*
diff --git a/arch/sh/kernel/irq.c b/arch/sh/kernel/irq.c
index 063af10ff3c1..0833736afa32 100644
--- a/arch/sh/kernel/irq.c
+++ b/arch/sh/kernel/irq.c
@@ -149,47 +149,32 @@ void irq_ctx_exit(int cpu)
 	hardirq_ctx[cpu] = NULL;
 }
 
-asmlinkage void do_softirq(void)
+void do_softirq_own_stack(void)
 {
-	unsigned long flags;
 	struct thread_info *curctx;
 	union irq_ctx *irqctx;
 	u32 *isp;
 
-	if (in_interrupt())
-		return;
-
-	local_irq_save(flags);
-
-	if (local_softirq_pending()) {
-		curctx = current_thread_info();
-		irqctx = softirq_ctx[smp_processor_id()];
-		irqctx->tinfo.task = curctx->task;
-		irqctx->tinfo.previous_sp = current_stack_pointer;
-
-		/* build the stack frame on the softirq stack */
-		isp = (u32 *)((char *)irqctx + sizeof(*irqctx));
-
-		__asm__ __volatile__ (
-			"mov	r15, r9		\n"
-			"jsr	@%0		\n"
-			/* switch to the softirq stack */
-			" mov	%1, r15		\n"
-			/* restore the thread stack */
-			"mov	r9, r15		\n"
-			: /* no outputs */
-			: "r" (__do_softirq), "r" (isp)
-			: "memory", "r0", "r1", "r2", "r3", "r4",
-			  "r5", "r6", "r7", "r8", "r9", "r15", "t", "pr"
-		);
-
-		/*
-		 * Shouldn't happen, we returned above if in_interrupt():
-		 */
-		WARN_ON_ONCE(softirq_count());
-	}
-
-	local_irq_restore(flags);
+	curctx = current_thread_info();
+	irqctx = softirq_ctx[smp_processor_id()];
+	irqctx->tinfo.task = curctx->task;
+	irqctx->tinfo.previous_sp = current_stack_pointer;
+
+	/* build the stack frame on the softirq stack */
+	isp = (u32 *)((char *)irqctx + sizeof(*irqctx));
+
+	__asm__ __volatile__ (
+		"mov	r15, r9		\n"
+		"jsr	@%0		\n"
+		/* switch to the softirq stack */
+		" mov	%1, r15		\n"
+		/* restore the thread stack */
+		"mov	r9, r15		\n"
+		: /* no outputs */
+		: "r" (__do_softirq), "r" (isp)
+		: "memory", "r0", "r1", "r2", "r3", "r4",
+		  "r5", "r6", "r7", "r8", "r9", "r15", "t", "pr"
+	);
 }
 #else
 static inline void handle_one_irq(unsigned int irq)
diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c
index d4840cec2c55..666193f4e8bb 100644
--- a/arch/sparc/kernel/irq_64.c
+++ b/arch/sparc/kernel/irq_64.c
@@ -698,30 +698,19 @@ void __irq_entry handler_irq(int pil, struct pt_regs *regs)
 	set_irq_regs(old_regs);
 }
 
-void do_softirq(void)
+void do_softirq_own_stack(void)
 {
-	unsigned long flags;
-
-	if (in_interrupt())
-		return;
-
-	local_irq_save(flags);
+	void *orig_sp, *sp = softirq_stack[smp_processor_id()];
 
-	if (local_softirq_pending()) {
-		void *orig_sp, *sp = softirq_stack[smp_processor_id()];
-
-		sp += THREAD_SIZE - 192 - STACK_BIAS;
-
-		__asm__ __volatile__("mov %%sp, %0\n\t"
-				     "mov %1, %%sp"
-				     : "=&r" (orig_sp)
-				     : "r" (sp));
-		__do_softirq();
-		__asm__ __volatile__("mov %0, %%sp"
-				     : : "r" (orig_sp));
-	}
+	sp += THREAD_SIZE - 192 - STACK_BIAS;
 
-	local_irq_restore(flags);
+	__asm__ __volatile__("mov %%sp, %0\n\t"
+			     "mov %1, %%sp"
+			     : "=&r" (orig_sp)
+			     : "r" (sp));
+	__do_softirq();
+	__asm__ __volatile__("mov %0, %%sp"
+			     : : "r" (orig_sp));
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index b077f4cc225a..083da7c2f40d 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1342,7 +1342,7 @@ bad_gs:
 	.previous
 
 /* Call softirq on interrupt stack. Interrupts are off. */
-ENTRY(call_softirq)
+ENTRY(do_softirq_own_stack)
 	CFI_STARTPROC
 	pushq_cfi %rbp
 	CFI_REL_OFFSET rbp,0
@@ -1359,7 +1359,7 @@ ENTRY(call_softirq)
 	decl PER_CPU_VAR(irq_count)
 	ret
 	CFI_ENDPROC
-END(call_softirq)
+END(do_softirq_own_stack)
 
 #ifdef CONFIG_XEN
 zeroentry xen_hypervisor_callback xen_do_hypervisor_callback
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 4186755f1d7c..8a5bb01dbc0e 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -149,35 +149,21 @@ void irq_ctx_init(int cpu)
 	       cpu, per_cpu(hardirq_ctx, cpu),  per_cpu(softirq_ctx, cpu));
 }
 
-asmlinkage void do_softirq(void)
+void do_softirq_own_stack(void)
 {
-	unsigned long flags;
 	struct thread_info *curctx;
 	union irq_ctx *irqctx;
 	u32 *isp;
 
-	if (in_interrupt())
-		return;
-
-	local_irq_save(flags);
-
-	if (local_softirq_pending()) {
-		curctx = current_thread_info();
-		irqctx = __this_cpu_read(softirq_ctx);
-		irqctx->tinfo.task = curctx->task;
-		irqctx->tinfo.previous_esp = current_stack_pointer;
-
-		/* build the stack frame on the softirq stack */
-		isp = (u32 *) ((char *)irqctx + sizeof(*irqctx));
+	curctx = current_thread_info();
+	irqctx = __this_cpu_read(softirq_ctx);
+	irqctx->tinfo.task = curctx->task;
+	irqctx->tinfo.previous_esp = current_stack_pointer;
 
-		call_on_stack(__do_softirq, isp);
-		/*
-		 * Shouldn't happen, we returned above if in_interrupt():
-		 */
-		WARN_ON_ONCE(softirq_count());
-	}
+	/* build the stack frame on the softirq stack */
+	isp = (u32 *) ((char *)irqctx + sizeof(*irqctx));
 
-	local_irq_restore(flags);
+	call_on_stack(__do_softirq, isp);
 }
 
 bool handle_irq(unsigned irq, struct pt_regs *regs)
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index d04d3ecded62..4d1c746892eb 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -87,24 +87,3 @@ bool handle_irq(unsigned irq, struct pt_regs *regs)
 	generic_handle_irq_desc(irq, desc);
 	return true;
 }
-
-
-extern void call_softirq(void);
-
-asmlinkage void do_softirq(void)
-{
-	__u32 pending;
-	unsigned long flags;
-
-	if (in_interrupt())
-		return;
-
-	local_irq_save(flags);
-	pending = local_softirq_pending();
-	/* Switch to interrupt stack */
-	if (pending) {
-		call_softirq();
-		WARN_ON_ONCE(softirq_count());
-	}
-	local_irq_restore(flags);
-}
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 5e865b554940..c9e831dc80bc 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -19,6 +19,7 @@
 
 #include <linux/atomic.h>
 #include <asm/ptrace.h>
+#include <asm/irq.h>
 
 /*
  * These correspond to the IORESOURCE_IRQ_* defines in
@@ -374,6 +375,16 @@ struct softirq_action
 
 asmlinkage void do_softirq(void);
 asmlinkage void __do_softirq(void);
+
+#ifdef __ARCH_HAS_DO_SOFTIRQ
+void do_softirq_own_stack(void);
+#else
+static inline void do_softirq_own_stack(void)
+{
+	__do_softirq();
+}
+#endif
+
 extern void open_softirq(int nr, void (*action)(struct softirq_action *));
 extern void softirq_init(void);
 extern void __raise_softirq_irqoff(unsigned int nr);
diff --git a/kernel/softirq.c b/kernel/softirq.c
index d7d498d8cc4f..26ee72725d29 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -29,7 +29,6 @@
 #define CREATE_TRACE_POINTS
 #include <trace/events/irq.h>
 
-#include <asm/irq.h>
 /*
    - No shared variables, all the data are CPU local.
    - If a softirq needs serialization, let it serialize itself
@@ -283,7 +282,7 @@ restart:
 	tsk_restore_flags(current, old_flags, PF_MEMALLOC);
 }
 
-#ifndef __ARCH_HAS_DO_SOFTIRQ
+
 
 asmlinkage void do_softirq(void)
 {
@@ -298,13 +297,12 @@ asmlinkage void do_softirq(void)
 	pending = local_softirq_pending();
 
 	if (pending)
-		__do_softirq();
+		do_softirq_own_stack();
 
+	WARN_ON_ONCE(softirq_count());
 	local_irq_restore(flags);
 }
 
-#endif
-
 /*
  * Enter an interrupt context.
  */
-- 
cgit v1.2.3


From cb2ffb26e67ef89c44f46e971440cda2f83ae236 Mon Sep 17 00:00:00 2001
From: Tom Gundersen <teg@jklm.no>
Date: Mon, 9 Sep 2013 20:18:27 +0200
Subject: cuse: add fix minor number to /dev/cuse

This allows udev (or more recently systemd-tmpfiles) to create /dev/cuse on
boot, in the same way as /dev/fuse is currently created, and the corresponding
module to be loaded on first access.

The corresponding functionalty was introduced for fuse in commit 578454f.

Signed-off-by: Tom Gundersen <teg@jklm.no>
Cc: Kay Sievers <kay@vrfy.org>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 Documentation/devices.txt  | 1 +
 fs/fuse/cuse.c             | 5 ++++-
 include/linux/miscdevice.h | 1 +
 3 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/Documentation/devices.txt b/Documentation/devices.txt
index 23721d3be3e6..80b72419ffd8 100644
--- a/Documentation/devices.txt
+++ b/Documentation/devices.txt
@@ -414,6 +414,7 @@ Your cooperation is appreciated.
 		200 = /dev/net/tun	TAP/TUN network device
 		201 = /dev/button/gulpb	Transmeta GULP-B buttons
 		202 = /dev/emd/ctl	Enhanced Metadisk RAID (EMD) control
+		203 = /dev/cuse		Cuse (character device in user-space)
 		204 = /dev/video/em8300		EM8300 DVD decoder control
 		205 = /dev/video/em8300_mv	EM8300 DVD decoder video
 		206 = /dev/video/em8300_ma	EM8300 DVD decoder audio
diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c
index adbfd66b380f..24da581cb52b 100644
--- a/fs/fuse/cuse.c
+++ b/fs/fuse/cuse.c
@@ -589,11 +589,14 @@ static struct attribute *cuse_class_dev_attrs[] = {
 ATTRIBUTE_GROUPS(cuse_class_dev);
 
 static struct miscdevice cuse_miscdev = {
-	.minor		= MISC_DYNAMIC_MINOR,
+	.minor		= CUSE_MINOR,
 	.name		= "cuse",
 	.fops		= &cuse_channel_fops,
 };
 
+MODULE_ALIAS_MISCDEV(CUSE_MINOR);
+MODULE_ALIAS("devname:cuse");
+
 static int __init cuse_init(void)
 {
 	int i, rc;
diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h
index 09c2300ddb37..c8cf093d8b71 100644
--- a/include/linux/miscdevice.h
+++ b/include/linux/miscdevice.h
@@ -31,6 +31,7 @@
 #define I2O_MINOR		166
 #define MICROCODE_MINOR		184
 #define TUN_MINOR		200
+#define CUSE_MINOR		203
 #define MWAVE_MINOR		219	/* ACP/Mwave Modem */
 #define MPT_MINOR		220
 #define MPT2SAS_MINOR		221
-- 
cgit v1.2.3


From 1df3a401154add23826d714c808204633b3b1c31 Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Wed, 18 Sep 2013 18:13:00 +0100
Subject: HID: Delay opening HID device

Don't call hid_open_device till there is actually an user. This saves
power by not opening underlying transport for HID. Also close device
if there are no active mfd client using HID sensor hub.

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/hid/hid-sensor-hub.c   | 45 ++++++++++++++++++++++++++++++++----------
 include/linux/hid-sensor-hub.h | 18 +++++++++++++++++
 2 files changed, 53 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hid/hid-sensor-hub.c b/drivers/hid/hid-sensor-hub.c
index 10e1581022cf..88fc5aefcd96 100644
--- a/drivers/hid/hid-sensor-hub.c
+++ b/drivers/hid/hid-sensor-hub.c
@@ -465,6 +465,39 @@ static int sensor_hub_raw_event(struct hid_device *hdev,
 	return 1;
 }
 
+int sensor_hub_device_open(struct hid_sensor_hub_device *hsdev)
+{
+	int ret = 0;
+	struct sensor_hub_data *data =  hid_get_drvdata(hsdev->hdev);
+
+	mutex_lock(&data->mutex);
+	if (!hsdev->ref_cnt) {
+		ret = hid_hw_open(hsdev->hdev);
+		if (ret) {
+			hid_err(hsdev->hdev, "failed to open hid device\n");
+			mutex_unlock(&data->mutex);
+			return ret;
+		}
+	}
+	hsdev->ref_cnt++;
+	mutex_unlock(&data->mutex);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(sensor_hub_device_open);
+
+void sensor_hub_device_close(struct hid_sensor_hub_device *hsdev)
+{
+	struct sensor_hub_data *data =  hid_get_drvdata(hsdev->hdev);
+
+	mutex_lock(&data->mutex);
+	hsdev->ref_cnt--;
+	if (!hsdev->ref_cnt)
+		hid_hw_close(hsdev->hdev);
+	mutex_unlock(&data->mutex);
+}
+EXPORT_SYMBOL_GPL(sensor_hub_device_close);
+
 static int sensor_hub_probe(struct hid_device *hdev,
 				const struct hid_device_id *id)
 {
@@ -506,12 +539,6 @@ static int sensor_hub_probe(struct hid_device *hdev,
 		hid_err(hdev, "hw start failed\n");
 		return ret;
 	}
-	ret = hid_hw_open(hdev);
-	if (ret) {
-		hid_err(hdev, "failed to open input interrupt pipe\n");
-		goto err_stop_hw;
-	}
-
 	INIT_LIST_HEAD(&sd->dyn_callback_list);
 	sd->hid_sensor_client_cnt = 0;
 	report_enum = &hdev->report_enum[HID_INPUT_REPORT];
@@ -520,7 +547,7 @@ static int sensor_hub_probe(struct hid_device *hdev,
 	if (dev_cnt > HID_MAX_PHY_DEVICES) {
 		hid_err(hdev, "Invalid Physical device count\n");
 		ret = -EINVAL;
-		goto err_close;
+		goto err_stop_hw;
 	}
 	sd->hid_sensor_hub_client_devs = kzalloc(dev_cnt *
 						sizeof(struct mfd_cell),
@@ -528,7 +555,7 @@ static int sensor_hub_probe(struct hid_device *hdev,
 	if (sd->hid_sensor_hub_client_devs == NULL) {
 		hid_err(hdev, "Failed to allocate memory for mfd cells\n");
 			ret = -ENOMEM;
-			goto err_close;
+			goto err_stop_hw;
 	}
 	list_for_each_entry(report, &report_enum->report_list, list) {
 		hid_dbg(hdev, "Report id:%x\n", report->id);
@@ -565,8 +592,6 @@ err_free_names:
 	for (i = 0; i < sd->hid_sensor_client_cnt ; ++i)
 		kfree(sd->hid_sensor_hub_client_devs[i].name);
 	kfree(sd->hid_sensor_hub_client_devs);
-err_close:
-	hid_hw_close(hdev);
 err_stop_hw:
 	hid_hw_stop(hdev);
 
diff --git a/include/linux/hid-sensor-hub.h b/include/linux/hid-sensor-hub.h
index 32ba45158d39..a265af294ea4 100644
--- a/include/linux/hid-sensor-hub.h
+++ b/include/linux/hid-sensor-hub.h
@@ -47,11 +47,13 @@ struct hid_sensor_hub_attribute_info {
  * @hdev:		Stores the hid instance.
  * @vendor_id:		Vendor id of hub device.
  * @product_id:		Product id of hub device.
+ * @ref_cnt:		Number of MFD clients have opened this device
  */
 struct hid_sensor_hub_device {
 	struct hid_device *hdev;
 	u32 vendor_id;
 	u32 product_id;
+	int ref_cnt;
 };
 
 /**
@@ -74,6 +76,22 @@ struct hid_sensor_hub_callbacks {
 			 void *priv);
 };
 
+/**
+* sensor_hub_device_open() - Open hub device
+* @hsdev:	Hub device instance.
+*
+* Used to open hid device for sensor hub.
+*/
+int sensor_hub_device_open(struct hid_sensor_hub_device *hsdev);
+
+/**
+* sensor_hub_device_clode() - Close hub device
+* @hsdev:	Hub device instance.
+*
+* Used to clode hid device for sensor hub.
+*/
+void sensor_hub_device_close(struct hid_sensor_hub_device *hsdev);
+
 /* Registration functions */
 
 /**
-- 
cgit v1.2.3


From 90051ea774613ffc6b8aad3dc665c8505d6205a8 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 25 Sep 2013 12:17:18 -0400
Subject: SUNRPC: Clean up - convert xprt_prepare_transmit to return a bool

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/xprt.h |  2 +-
 net/sunrpc/clnt.c           |  6 ++----
 net/sunrpc/xprt.c           | 15 +++++++++------
 3 files changed, 12 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index cec7b9b5e1bf..8097b9df6773 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -288,7 +288,7 @@ int			xprt_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task);
 int			xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task);
 void			xprt_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task);
 void			xprt_lock_and_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task);
-int			xprt_prepare_transmit(struct rpc_task *task);
+bool			xprt_prepare_transmit(struct rpc_task *task);
 void			xprt_transmit(struct rpc_task *task);
 void			xprt_end_transmit(struct rpc_task *task);
 int			xprt_adjust_timeout(struct rpc_rqst *req);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 77479606a971..fa50e42aabd3 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1722,8 +1722,7 @@ call_transmit(struct rpc_task *task)
 	task->tk_action = call_status;
 	if (task->tk_status < 0)
 		return;
-	task->tk_status = xprt_prepare_transmit(task);
-	if (task->tk_status != 0)
+	if (!xprt_prepare_transmit(task))
 		return;
 	task->tk_action = call_transmit_status;
 	/* Encode here so that rpcsec_gss can use correct sequence number. */
@@ -1811,8 +1810,7 @@ call_bc_transmit(struct rpc_task *task)
 {
 	struct rpc_rqst *req = task->tk_rqstp;
 
-	task->tk_status = xprt_prepare_transmit(task);
-	if (task->tk_status == -EAGAIN) {
+	if (!xprt_prepare_transmit(task)) {
 		/*
 		 * Could not reserve the transport. Try again after the
 		 * transport is released.
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 8cc5c8bcad7f..2326af57b9b9 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -864,24 +864,27 @@ static inline int xprt_has_timer(struct rpc_xprt *xprt)
  * @task: RPC task about to send a request
  *
  */
-int xprt_prepare_transmit(struct rpc_task *task)
+bool xprt_prepare_transmit(struct rpc_task *task)
 {
 	struct rpc_rqst	*req = task->tk_rqstp;
 	struct rpc_xprt	*xprt = req->rq_xprt;
-	int err = 0;
+	bool ret = false;
 
 	dprintk("RPC: %5u xprt_prepare_transmit\n", task->tk_pid);
 
 	spin_lock_bh(&xprt->transport_lock);
 	if (req->rq_reply_bytes_recvd && !req->rq_bytes_sent) {
-		err = req->rq_reply_bytes_recvd;
+		task->tk_status = req->rq_reply_bytes_recvd;
 		goto out_unlock;
 	}
-	if (!xprt->ops->reserve_xprt(xprt, task))
-		err = -EAGAIN;
+	if (!xprt->ops->reserve_xprt(xprt, task)) {
+		task->tk_status = -EAGAIN;
+		goto out_unlock;
+	}
+	ret = true;
 out_unlock:
 	spin_unlock_bh(&xprt->transport_lock);
-	return err;
+	return ret;
 }
 
 void xprt_end_transmit(struct rpc_task *task)
-- 
cgit v1.2.3


From 8a19a0b6cb2e2216afd68ef2047f30260cc8a220 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 24 Sep 2013 12:00:27 -0400
Subject: SUNRPC: Add RPC task and client level options to disable the resend
 timeout

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/clnt.h  |  2 ++
 include/linux/sunrpc/sched.h |  1 +
 net/sunrpc/clnt.c            |  5 ++++-
 net/sunrpc/xprt.c            | 15 ++++++++++++---
 4 files changed, 19 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 6740801aa71a..943ee895f2d1 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -49,6 +49,7 @@ struct rpc_clnt {
 
 	unsigned int		cl_softrtry : 1,/* soft timeouts */
 				cl_discrtry : 1,/* disconnect before retry */
+				cl_noretranstimeo: 1,/* No retransmit timeouts */
 				cl_autobind : 1,/* use getport() */
 				cl_chatty   : 1;/* be verbose */
 
@@ -126,6 +127,7 @@ struct rpc_create_args {
 #define RPC_CLNT_CREATE_QUIET		(1UL << 6)
 #define RPC_CLNT_CREATE_INFINITE_SLOTS	(1UL << 7)
 #define RPC_CLNT_CREATE_NO_IDLE_TIMEOUT	(1UL << 8)
+#define RPC_CLNT_CREATE_NO_RETRANS_TIMEOUT	(1UL << 9)
 
 struct rpc_clnt *rpc_create(struct rpc_create_args *args);
 struct rpc_clnt	*rpc_bind_new_program(struct rpc_clnt *,
diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index 096ee58be11a..3a847de83fab 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -122,6 +122,7 @@ struct rpc_task_setup {
 #define RPC_TASK_SENT		0x0800		/* message was sent */
 #define RPC_TASK_TIMEOUT	0x1000		/* fail with ETIMEDOUT on timeout */
 #define RPC_TASK_NOCONNECT	0x2000		/* return ENOTCONN if not connected */
+#define RPC_TASK_NO_RETRANS_TIMEOUT	0x4000		/* wait forever for a reply */
 
 #define RPC_IS_ASYNC(t)		((t)->tk_flags & RPC_TASK_ASYNC)
 #define RPC_IS_SWAPPER(t)	((t)->tk_flags & RPC_TASK_SWAPPER)
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index fa50e42aabd3..b58525009e40 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -772,6 +772,8 @@ void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt)
 		atomic_inc(&clnt->cl_count);
 		if (clnt->cl_softrtry)
 			task->tk_flags |= RPC_TASK_SOFT;
+		if (clnt->cl_noretranstimeo)
+			task->tk_flags |= RPC_TASK_NO_RETRANS_TIMEOUT;
 		if (sk_memalloc_socks()) {
 			struct rpc_xprt *xprt;
 
@@ -1898,7 +1900,8 @@ call_status(struct rpc_task *task)
 		rpc_delay(task, 3*HZ);
 	case -ETIMEDOUT:
 		task->tk_action = call_timeout;
-		if (task->tk_client->cl_discrtry)
+		if (!(task->tk_flags & RPC_TASK_NO_RETRANS_TIMEOUT)
+		    && task->tk_client->cl_discrtry)
 			xprt_conditional_disconnect(req->rq_xprt,
 					req->rq_connect_cookie);
 		break;
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 2326af57b9b9..d166d9947e36 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -873,9 +873,18 @@ bool xprt_prepare_transmit(struct rpc_task *task)
 	dprintk("RPC: %5u xprt_prepare_transmit\n", task->tk_pid);
 
 	spin_lock_bh(&xprt->transport_lock);
-	if (req->rq_reply_bytes_recvd && !req->rq_bytes_sent) {
-		task->tk_status = req->rq_reply_bytes_recvd;
-		goto out_unlock;
+	if (!req->rq_bytes_sent) {
+		if (req->rq_reply_bytes_recvd) {
+			task->tk_status = req->rq_reply_bytes_recvd;
+			goto out_unlock;
+		}
+		if ((task->tk_flags & RPC_TASK_NO_RETRANS_TIMEOUT)
+		    && xprt_connected(xprt)
+		    && req->rq_connect_cookie == xprt->connect_cookie) {
+			xprt->ops->set_retrans_timeout(task);
+			rpc_sleep_on(&xprt->pending, task, xprt_timer);
+			goto out_unlock;
+		}
 	}
 	if (!xprt->ops->reserve_xprt(xprt, task)) {
 		task->tk_status = -EAGAIN;
-- 
cgit v1.2.3


From 99875249bfbfb6d9a2aba020ce65da2862d0dafa Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 24 Sep 2013 12:06:07 -0400
Subject: NFSv4: Ensure that we disable the resend timeout for NFSv4

The spec states that the client should not resend requests because
the server will disconnect if it needs to drop an RPC request.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/client.c           | 2 ++
 fs/nfs/nfs4client.c       | 1 +
 include/linux/nfs_fs_sb.h | 1 +
 3 files changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 2dceee4db076..af0325864df6 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -590,6 +590,8 @@ int nfs_create_rpc_client(struct nfs_client *clp,
 
 	if (test_bit(NFS_CS_DISCRTRY, &clp->cl_flags))
 		args.flags |= RPC_CLNT_CREATE_DISCRTRY;
+	if (test_bit(NFS_CS_NO_RETRANS_TIMEOUT, &clp->cl_flags))
+		args.flags |= RPC_CLNT_CREATE_NO_RETRANS_TIMEOUT;
 	if (test_bit(NFS_CS_NORESVPORT, &clp->cl_flags))
 		args.flags |= RPC_CLNT_CREATE_NONPRIVPORT;
 	if (test_bit(NFS_CS_INFINITE_SLOTS, &clp->cl_flags))
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index a860ab566d6e..511cdce6ecf2 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -368,6 +368,7 @@ struct nfs_client *nfs4_init_client(struct nfs_client *clp,
 	if (clp->cl_minorversion != 0)
 		__set_bit(NFS_CS_INFINITE_SLOTS, &clp->cl_flags);
 	__set_bit(NFS_CS_DISCRTRY, &clp->cl_flags);
+	__set_bit(NFS_CS_NO_RETRANS_TIMEOUT, &clp->cl_flags);
 	error = nfs_create_rpc_client(clp, timeparms, RPC_AUTH_GSS_KRB5I);
 	if (error == -EINVAL)
 		error = nfs_create_rpc_client(clp, timeparms, RPC_AUTH_UNIX);
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index b8cedced50c9..f9c0a6cb41e9 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -41,6 +41,7 @@ struct nfs_client {
 #define NFS_CS_DISCRTRY		1		/* - disconnect on RPC retry */
 #define NFS_CS_MIGRATION	2		/* - transparent state migr */
 #define NFS_CS_INFINITE_SLOTS	3		/* - don't limit TCP slots */
+#define NFS_CS_NO_RETRANS_TIMEOUT	4	/* - Disable retransmit timeouts */
 	struct sockaddr_storage	cl_addr;	/* server identifier */
 	size_t			cl_addrlen;
 	char *			cl_hostname;	/* hostname of server */
-- 
cgit v1.2.3


From 3713c0cfd06fa49729a12929a7ee8b7ad48f3c02 Mon Sep 17 00:00:00 2001
From: Soren Brinkmann <soren.brinkmann@xilinx.com>
Date: Wed, 18 Sep 2013 11:48:35 -0700
Subject: clockchips: Add FEAT_PERCPU clockevent flag

Add the flag CLOCK_EVT_FEAT_PERCPU which is supposed to be set for per
cpu clockevent devices.

Signed-off-by: Soren Brinkmann <soren.brinkmann@xilinx.com>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Acked-by: Michal Simek <michal.simek@xilinx.com>
---
 include/linux/clockchips.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h
index 0857922e8ad0..493aa021c7a9 100644
--- a/include/linux/clockchips.h
+++ b/include/linux/clockchips.h
@@ -60,6 +60,7 @@ enum clock_event_mode {
  * Core shall set the interrupt affinity dynamically in broadcast mode
  */
 #define CLOCK_EVT_FEAT_DYNIRQ		0x000020
+#define CLOCK_EVT_FEAT_PERCPU		0x000040
 
 /**
  * struct clock_event_device - clock event device descriptor
-- 
cgit v1.2.3


From 4bcef89f0c6ca1eb4f1a789c2a226f4c02656a4b Mon Sep 17 00:00:00 2001
From: Hauke Mehrtens <hauke@hauke-m.de>
Date: Sat, 28 Sep 2013 23:15:27 +0200
Subject: ssb: provide phy address for Gigabit Ethernet driver

Add a function to provide the phy address which should be used to the
Gigabit Ethernet driver connected to ssb.

Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
Reviewed-by: Nithin Nayak Sujir <nsujir@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ssb/ssb_driver_gige.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ssb/ssb_driver_gige.h b/include/linux/ssb/ssb_driver_gige.h
index 86a12b0cb239..0688472500bb 100644
--- a/include/linux/ssb/ssb_driver_gige.h
+++ b/include/linux/ssb/ssb_driver_gige.h
@@ -108,6 +108,16 @@ static inline int ssb_gige_get_macaddr(struct pci_dev *pdev, u8 *macaddr)
 	return 0;
 }
 
+/* Get the device phy address */
+static inline int ssb_gige_get_phyaddr(struct pci_dev *pdev)
+{
+	struct ssb_gige *dev = pdev_to_ssb_gige(pdev);
+	if (!dev)
+		return -ENODEV;
+
+	return dev->dev->bus->sprom.et0phyaddr;
+}
+
 extern int ssb_gige_pcibios_plat_dev_init(struct ssb_device *sdev,
 					  struct pci_dev *pdev);
 extern int ssb_gige_map_irq(struct ssb_device *sdev,
@@ -174,6 +184,10 @@ static inline int ssb_gige_get_macaddr(struct pci_dev *pdev, u8 *macaddr)
 {
 	return -ENODEV;
 }
+static inline int ssb_gige_get_phyaddr(struct pci_dev *pdev)
+{
+	return -ENODEV;
+}
 
 #endif /* CONFIG_SSB_DRIVER_GIGE */
 #endif /* LINUX_SSB_DRIVER_GIGE_H_ */
-- 
cgit v1.2.3


From 5bc3db5c9ca8407f52918b6504d3b27230defedc Mon Sep 17 00:00:00 2001
From: stephen hemminger <stephen@networkplumber.org>
Date: Mon, 30 Sep 2013 21:30:22 -0700
Subject: tc: export tc_defact.h to userspace

Jamal sent patch to add tc user simple actions to iproute2
but required header was not being exported.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tc_act/tc_defact.h      | 19 -------------------
 include/uapi/linux/tc_act/Kbuild      |  1 +
 include/uapi/linux/tc_act/tc_defact.h | 19 +++++++++++++++++++
 3 files changed, 20 insertions(+), 19 deletions(-)
 delete mode 100644 include/linux/tc_act/tc_defact.h
 create mode 100644 include/uapi/linux/tc_act/tc_defact.h

(limited to 'include/linux')

diff --git a/include/linux/tc_act/tc_defact.h b/include/linux/tc_act/tc_defact.h
deleted file mode 100644
index 6f65d07c7ce2..000000000000
--- a/include/linux/tc_act/tc_defact.h
+++ /dev/null
@@ -1,19 +0,0 @@
-#ifndef __LINUX_TC_DEF_H
-#define __LINUX_TC_DEF_H
-
-#include <linux/pkt_cls.h>
-
-struct tc_defact {
-	tc_gen;
-};
-                                                                                
-enum {
-	TCA_DEF_UNSPEC,
-	TCA_DEF_TM,
-	TCA_DEF_PARMS,
-	TCA_DEF_DATA,
-	__TCA_DEF_MAX
-};
-#define TCA_DEF_MAX (__TCA_DEF_MAX - 1)
-
-#endif
diff --git a/include/uapi/linux/tc_act/Kbuild b/include/uapi/linux/tc_act/Kbuild
index 0623ec4e728f..56f121605c99 100644
--- a/include/uapi/linux/tc_act/Kbuild
+++ b/include/uapi/linux/tc_act/Kbuild
@@ -1,5 +1,6 @@
 # UAPI Header export list
 header-y += tc_csum.h
+header-y += tc_defact.h
 header-y += tc_gact.h
 header-y += tc_ipt.h
 header-y += tc_mirred.h
diff --git a/include/uapi/linux/tc_act/tc_defact.h b/include/uapi/linux/tc_act/tc_defact.h
new file mode 100644
index 000000000000..17dddb40f740
--- /dev/null
+++ b/include/uapi/linux/tc_act/tc_defact.h
@@ -0,0 +1,19 @@
+#ifndef __LINUX_TC_DEF_H
+#define __LINUX_TC_DEF_H
+
+#include <linux/pkt_cls.h>
+
+struct tc_defact {
+	tc_gen;
+};
+
+enum {
+	TCA_DEF_UNSPEC,
+	TCA_DEF_TM,
+	TCA_DEF_PARMS,
+	TCA_DEF_DATA,
+	__TCA_DEF_MAX
+};
+#define TCA_DEF_MAX (__TCA_DEF_MAX - 1)
+
+#endif
-- 
cgit v1.2.3


From 0b3d8e087bbee2a4e3f479d538a7edd3f1d2950c Mon Sep 17 00:00:00 2001
From: Denis Kirjanov <kda@linux-powerpc.org>
Date: Wed, 2 Oct 2013 05:58:32 +0400
Subject: include/linux/skbuff.h: move CONFIG_XFRM check inside the
 skb_sec_path()

And thus we have only one function definition

Signed-off-by: Denis Kirjanov <kda@linux-powerpc.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 71b1d9402fd3..1cd32f96055e 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2708,17 +2708,14 @@ static inline bool skb_rx_queue_recorded(const struct sk_buff *skb)
 u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb,
 		  unsigned int num_tx_queues);
 
-#ifdef CONFIG_XFRM
 static inline struct sec_path *skb_sec_path(struct sk_buff *skb)
 {
+#ifdef CONFIG_XFRM
 	return skb->sp;
-}
 #else
-static inline struct sec_path *skb_sec_path(struct sk_buff *skb)
-{
 	return NULL;
-}
 #endif
+}
 
 /* Keeps track of mac header offset relative to skb->head.
  * It is useful for TSO of Tunneling protocol. e.g. GRE.
-- 
cgit v1.2.3


From 05071aa864e84000759191438a4a9ff7ba2c360e Mon Sep 17 00:00:00 2001
From: Lars-Peter Clausen <lars@metafoo.de>
Date: Fri, 27 Sep 2013 16:34:27 +0200
Subject: spi: Add a spi_w8r16be() helper

This patch adds a new spi_w8r16be() helper, which is similar to spi_w8r16()
except that it converts the read data word from big endian to native endianness
before returning it. The reason for introducing this new helper is that for SPI
slave devices it is quite common that the read 16 bit data word is in big
endian. So users of spi_w8r16() have to convert the result to native endianness
manually. A second reason is that in this case the endianness of the return
value of spi_w8r16() depends on its sign. If it is negative (i.e. a error code)
it is already in native endianness, if it is positive it is in big endian. The
sparse code checker doesn't like this kind of mixed endianness and special
annotations are necessary to keep it quiet (E.g. casting to be16 using __force).
Doing the conversion to native endianness in the helper function does not
require such annotations since we are not mixing different endiannesses in the
same variable.

Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 include/linux/spi/spi.h | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 887116dbce2c..0e0aebdeb56b 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -823,6 +823,33 @@ static inline ssize_t spi_w8r16(struct spi_device *spi, u8 cmd)
 	return (status < 0) ? status : result;
 }
 
+/**
+ * spi_w8r16be - SPI synchronous 8 bit write followed by 16 bit big-endian read
+ * @spi: device with which data will be exchanged
+ * @cmd: command to be written before data is read back
+ * Context: can sleep
+ *
+ * This returns the (unsigned) sixteen bit number returned by the device in cpu
+ * endianness, or else a negative error code. Callable only from contexts that
+ * can sleep.
+ *
+ * This function is similar to spi_w8r16, with the exception that it will
+ * convert the read 16 bit data word from big-endian to native endianness.
+ *
+ */
+static inline ssize_t spi_w8r16be(struct spi_device *spi, u8 cmd)
+
+{
+	ssize_t status;
+	__be16 result;
+
+	status = spi_write_then_read(spi, &cmd, 1, &result, 2);
+	if (status < 0)
+		return status;
+
+	return be16_to_cpu(result);
+}
+
 /*---------------------------------------------------------------------------*/
 
 /*
-- 
cgit v1.2.3


From 8daaa5f8261bffd2f6217a960f9182d0503a5c44 Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Wed, 2 Oct 2013 10:14:18 -0500
Subject: kdb: Add support for external NMI handler to call KGDB/KDB

This patch adds a kgdb_nmicallin() interface that can be used by
external NMI handlers to call the KGDB/KDB handler.  The primary
need for this is for those types of NMI interrupts where all the
CPUs have already received the NMI signal.  Therefore no
send_IPI(NMI) is required, and in fact it will cause a 2nd
unhandled NMI to occur. This generates the "Dazed and Confuzed"
messages.

Since all the CPUs are getting the NMI at roughly the same time,
it's not guaranteed that the first CPU that hits the NMI handler
will manage to enter KGDB and set the dbg_master_lock before the
slaves start entering. The new argument "send_ready" was added
for KGDB to signal the NMI handler to release the slave CPUs for
entry into KGDB.

Signed-off-by: Mike Travis <travis@sgi.com>
Acked-by: Jason Wessel <jason.wessel@windriver.com>
Reviewed-by: Dimitri Sivanich <sivanich@sgi.com>
Reviewed-by: Hedi Berriche <hedi@sgi.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Link: http://lkml.kernel.org/r/20131002151417.928886849@asylum.americas.sgi.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/kdb.h             |  1 +
 include/linux/kgdb.h            |  1 +
 kernel/debug/debug_core.c       | 32 ++++++++++++++++++++++++++++++--
 kernel/debug/debug_core.h       |  3 +++
 kernel/debug/kdb/kdb_debugger.c |  5 ++++-
 kernel/debug/kdb/kdb_main.c     |  3 +++
 6 files changed, 42 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kdb.h b/include/linux/kdb.h
index 7f6fe6e015bc..290db1269c4c 100644
--- a/include/linux/kdb.h
+++ b/include/linux/kdb.h
@@ -109,6 +109,7 @@ typedef enum {
 	KDB_REASON_RECURSE,	/* Recursive entry to kdb;
 				 * regs probably valid */
 	KDB_REASON_SSTEP,	/* Single Step trap. - regs valid */
+	KDB_REASON_SYSTEM_NMI,	/* In NMI due to SYSTEM cmd; regs valid */
 } kdb_reason_t;
 
 extern int kdb_trap_printk;
diff --git a/include/linux/kgdb.h b/include/linux/kgdb.h
index c6e091bf39a5..dfb4f2ffdaa2 100644
--- a/include/linux/kgdb.h
+++ b/include/linux/kgdb.h
@@ -310,6 +310,7 @@ extern int
 kgdb_handle_exception(int ex_vector, int signo, int err_code,
 		      struct pt_regs *regs);
 extern int kgdb_nmicallback(int cpu, void *regs);
+extern int kgdb_nmicallin(int cpu, int trapnr, void *regs, atomic_t *snd_rdy);
 extern void gdbstub_exit(int status);
 
 extern int			kgdb_single_step;
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index 0506d447aed2..7d2f35e5df2f 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -575,8 +575,12 @@ return_normal:
 		raw_spin_lock(&dbg_slave_lock);
 
 #ifdef CONFIG_SMP
+	/* If send_ready set, slaves are already waiting */
+	if (ks->send_ready)
+		atomic_set(ks->send_ready, 1);
+
 	/* Signal the other CPUs to enter kgdb_wait() */
-	if ((!kgdb_single_step) && kgdb_do_roundup)
+	else if ((!kgdb_single_step) && kgdb_do_roundup)
 		kgdb_roundup_cpus(flags);
 #endif
 
@@ -678,11 +682,11 @@ kgdb_handle_exception(int evector, int signo, int ecode, struct pt_regs *regs)
 	if (arch_kgdb_ops.enable_nmi)
 		arch_kgdb_ops.enable_nmi(0);
 
+	memset(ks, 0, sizeof(struct kgdb_state));
 	ks->cpu			= raw_smp_processor_id();
 	ks->ex_vector		= evector;
 	ks->signo		= signo;
 	ks->err_code		= ecode;
-	ks->kgdb_usethreadid	= 0;
 	ks->linux_regs		= regs;
 
 	if (kgdb_reenter_check(ks))
@@ -732,6 +736,30 @@ int kgdb_nmicallback(int cpu, void *regs)
 	return 1;
 }
 
+int kgdb_nmicallin(int cpu, int trapnr, void *regs, atomic_t *send_ready)
+{
+#ifdef CONFIG_SMP
+	if (!kgdb_io_ready(0) || !send_ready)
+		return 1;
+
+	if (kgdb_info[cpu].enter_kgdb == 0) {
+		struct kgdb_state kgdb_var;
+		struct kgdb_state *ks = &kgdb_var;
+
+		memset(ks, 0, sizeof(struct kgdb_state));
+		ks->cpu			= cpu;
+		ks->ex_vector		= trapnr;
+		ks->signo		= SIGTRAP;
+		ks->err_code		= KGDB_KDB_REASON_SYSTEM_NMI;
+		ks->linux_regs		= regs;
+		ks->send_ready		= send_ready;
+		kgdb_cpu_enter(ks, regs, DCPU_WANT_MASTER);
+		return 0;
+	}
+#endif
+	return 1;
+}
+
 static void kgdb_console_write(struct console *co, const char *s,
    unsigned count)
 {
diff --git a/kernel/debug/debug_core.h b/kernel/debug/debug_core.h
index 2235967e78b0..572aa4f5677c 100644
--- a/kernel/debug/debug_core.h
+++ b/kernel/debug/debug_core.h
@@ -26,6 +26,7 @@ struct kgdb_state {
 	unsigned long		threadid;
 	long			kgdb_usethreadid;
 	struct pt_regs		*linux_regs;
+	atomic_t		*send_ready;
 };
 
 /* Exception state values */
@@ -74,11 +75,13 @@ extern int kdb_stub(struct kgdb_state *ks);
 extern int kdb_parse(const char *cmdstr);
 extern int kdb_common_init_state(struct kgdb_state *ks);
 extern int kdb_common_deinit_state(void);
+#define KGDB_KDB_REASON_SYSTEM_NMI KDB_REASON_SYSTEM_NMI
 #else /* ! CONFIG_KGDB_KDB */
 static inline int kdb_stub(struct kgdb_state *ks)
 {
 	return DBG_PASS_EVENT;
 }
+#define KGDB_KDB_REASON_SYSTEM_NMI 0
 #endif /* CONFIG_KGDB_KDB */
 
 #endif /* _DEBUG_CORE_H_ */
diff --git a/kernel/debug/kdb/kdb_debugger.c b/kernel/debug/kdb/kdb_debugger.c
index 328d18ef31e4..8859ca34dcfe 100644
--- a/kernel/debug/kdb/kdb_debugger.c
+++ b/kernel/debug/kdb/kdb_debugger.c
@@ -69,7 +69,10 @@ int kdb_stub(struct kgdb_state *ks)
 	if (atomic_read(&kgdb_setting_breakpoint))
 		reason = KDB_REASON_KEYBOARD;
 
-	if (in_nmi())
+	if (ks->err_code == KDB_REASON_SYSTEM_NMI && ks->signo == SIGTRAP)
+		reason = KDB_REASON_SYSTEM_NMI;
+
+	else if (in_nmi())
 		reason = KDB_REASON_NMI;
 
 	for (i = 0, bp = kdb_breakpoints; i < KDB_MAXBPT; i++, bp++) {
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index 00eb8f7fbf41..0b097c8a1e50 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -1200,6 +1200,9 @@ static int kdb_local(kdb_reason_t reason, int error, struct pt_regs *regs,
 			   instruction_pointer(regs));
 		kdb_dumpregs(regs);
 		break;
+	case KDB_REASON_SYSTEM_NMI:
+		kdb_printf("due to System NonMaskable Interrupt\n");
+		break;
 	case KDB_REASON_NMI:
 		kdb_printf("due to NonMaskable Interrupt @ "
 			   kdb_machreg_fmt "\n",
-- 
cgit v1.2.3


From 5080546682bae3d32734b18e281091684f0ebbe4 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 2 Oct 2013 04:29:50 -0700
Subject: inet: consolidate INET_TW_MATCH

TCP listener refactoring, part 2 :

We can use a generic lookup, sockets being in whatever state, if
we are sure all relevant fields are at the same place in all socket
types (ESTABLISH, TIME_WAIT, SYN_RECV)

This patch removes these macros :

 inet_addrpair, inet_addrpair, tw_addrpair, tw_portpair

And adds :

 sk_portpair, sk_addrpair, sk_daddr, sk_rcv_saddr

Then, INET_TW_MATCH() is really the same than INET_MATCH()

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ipv6.h             |  4 ++--
 include/net/inet_hashtables.h    | 26 ++++++++------------------
 include/net/inet_sock.h          |  2 --
 include/net/inet_timewait_sock.h |  8 --------
 include/net/sock.h               |  4 ++++
 net/ipv4/inet_connection_sock.c  | 11 +++++------
 net/ipv6/udp.c                   |  6 ++----
 7 files changed, 21 insertions(+), 40 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 28ea38439313..b7f1f3bb346d 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -370,7 +370,7 @@ static inline struct raw6_sock *raw6_sk(const struct sock *sk)
 #endif /* IS_ENABLED(CONFIG_IPV6) */
 
 #define INET6_MATCH(__sk, __net, __saddr, __daddr, __ports, __dif)	\
-	((inet_sk(__sk)->inet_portpair == (__ports))		&&	\
+	(((__sk)->sk_portpair == (__ports))			&&	\
 	 ((__sk)->sk_family == AF_INET6)			&&	\
 	 ipv6_addr_equal(&inet6_sk(__sk)->daddr, (__saddr))	&&	\
 	 ipv6_addr_equal(&inet6_sk(__sk)->rcv_saddr, (__daddr))	&&	\
@@ -379,7 +379,7 @@ static inline struct raw6_sock *raw6_sk(const struct sock *sk)
 	 net_eq(sock_net(__sk), (__net)))
 
 #define INET6_TW_MATCH(__sk, __net, __saddr, __daddr, __ports, __dif)	   \
-	((inet_twsk(__sk)->tw_portpair == (__ports))			&& \
+	(((__sk)->sk_portpair == (__ports))				&& \
 	 ((__sk)->sk_family == AF_INET6)				&& \
 	 ipv6_addr_equal(&inet6_twsk(__sk)->tw_v6_daddr, (__saddr))	&& \
 	 ipv6_addr_equal(&inet6_twsk(__sk)->tw_v6_rcv_saddr, (__daddr)) && \
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index 594dfeead70f..10d6838378c3 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -302,35 +302,25 @@ static inline struct sock *inet_lookup_listener(struct net *net,
 				   ((__force __u64)(__be32)(__saddr)));
 #endif /* __BIG_ENDIAN */
 #define INET_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif)	\
-	((inet_sk(__sk)->inet_portpair == (__ports))		&&	\
-	 (inet_sk(__sk)->inet_addrpair == (__cookie))		&&	\
+	(((__sk)->sk_portpair == (__ports))			&&	\
+	 ((__sk)->sk_addrpair == (__cookie))			&&	\
 	 (!(__sk)->sk_bound_dev_if	||				\
 	   ((__sk)->sk_bound_dev_if == (__dif))) 		&& 	\
 	 net_eq(sock_net(__sk), (__net)))
-#define INET_TW_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif)\
-	((inet_twsk(__sk)->tw_portpair == (__ports))	&&		\
-	 (inet_twsk(__sk)->tw_addrpair == (__cookie))	&&		\
-	 (!(__sk)->sk_bound_dev_if	||				\
-	   ((__sk)->sk_bound_dev_if == (__dif)))	&&		\
-	 net_eq(sock_net(__sk), (__net)))
 #else /* 32-bit arch */
 #define INET_ADDR_COOKIE(__name, __saddr, __daddr)
 #define INET_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif) \
-	((inet_sk(__sk)->inet_portpair == (__ports))	&&		\
-	 (inet_sk(__sk)->inet_daddr	== (__saddr))	&&		\
-	 (inet_sk(__sk)->inet_rcv_saddr	== (__daddr))	&&		\
-	 (!(__sk)->sk_bound_dev_if	||				\
-	   ((__sk)->sk_bound_dev_if == (__dif))) 	&&		\
-	 net_eq(sock_net(__sk), (__net)))
-#define INET_TW_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif) \
-	((inet_twsk(__sk)->tw_portpair == (__ports))	&&		\
-	 (inet_twsk(__sk)->tw_daddr	== (__saddr))	&&		\
-	 (inet_twsk(__sk)->tw_rcv_saddr	== (__daddr))	&&		\
+	(((__sk)->sk_portpair == (__ports))		&&		\
+	 ((__sk)->sk_daddr	== (__saddr))		&&		\
+	 ((__sk)->sk_rcv_saddr	== (__daddr))		&&		\
 	 (!(__sk)->sk_bound_dev_if	||				\
 	   ((__sk)->sk_bound_dev_if == (__dif))) 	&&		\
 	 net_eq(sock_net(__sk), (__net)))
 #endif /* 64-bit arch */
 
+#define INET_TW_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif)\
+	INET_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif)
+
 /*
  * Sockets in TCP_CLOSE state are _always_ taken out of the hash, so we need
  * not check it for lookups anymore, thanks Alexey. -DaveM
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index f3141773c14d..6d9a7e6eb5a4 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -146,10 +146,8 @@ struct inet_sock {
 	/* Socket demultiplex comparisons on incoming packets. */
 #define inet_daddr		sk.__sk_common.skc_daddr
 #define inet_rcv_saddr		sk.__sk_common.skc_rcv_saddr
-#define inet_addrpair		sk.__sk_common.skc_addrpair
 #define inet_dport		sk.__sk_common.skc_dport
 #define inet_num		sk.__sk_common.skc_num
-#define inet_portpair		sk.__sk_common.skc_portpair
 
 	__be32			inet_saddr;
 	__s16			uc_ttl;
diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h
index 828200ab1125..48fd3561722c 100644
--- a/include/net/inet_timewait_sock.h
+++ b/include/net/inet_timewait_sock.h
@@ -112,10 +112,8 @@ struct inet_timewait_sock {
 #define tw_net			__tw_common.skc_net
 #define tw_daddr        	__tw_common.skc_daddr
 #define tw_rcv_saddr    	__tw_common.skc_rcv_saddr
-#define tw_addrpair		__tw_common.skc_addrpair
 #define tw_dport		__tw_common.skc_dport
 #define tw_num			__tw_common.skc_num
-#define tw_portpair		__tw_common.skc_portpair
 
 	int			tw_timeout;
 	volatile unsigned char	tw_substate;
@@ -189,12 +187,6 @@ static inline struct inet_timewait_sock *inet_twsk(const struct sock *sk)
 	return (struct inet_timewait_sock *)sk;
 }
 
-static inline __be32 sk_rcv_saddr(const struct sock *sk)
-{
-/* both inet_sk() and inet_twsk() store rcv_saddr in skc_rcv_saddr */
-	return sk->__sk_common.skc_rcv_saddr;
-}
-
 void inet_twsk_put(struct inet_timewait_sock *tw);
 
 int inet_twsk_unhash(struct inet_timewait_sock *tw);
diff --git a/include/net/sock.h b/include/net/sock.h
index f0a44cc1ff9d..e3bf213be625 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -300,6 +300,10 @@ struct sock {
 #define sk_dontcopy_begin	__sk_common.skc_dontcopy_begin
 #define sk_dontcopy_end		__sk_common.skc_dontcopy_end
 #define sk_hash			__sk_common.skc_hash
+#define sk_portpair		__sk_common.skc_portpair
+#define sk_addrpair		__sk_common.skc_addrpair
+#define sk_daddr		__sk_common.skc_daddr
+#define sk_rcv_saddr		__sk_common.skc_rcv_saddr
 #define sk_family		__sk_common.skc_family
 #define sk_state		__sk_common.skc_state
 #define sk_reuse		__sk_common.skc_reuse
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 7ac7aa11130e..56e82a4027b4 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -71,17 +71,16 @@ int inet_csk_bind_conflict(const struct sock *sk,
 			    (!reuseport || !sk2->sk_reuseport ||
 			    (sk2->sk_state != TCP_TIME_WAIT &&
 			     !uid_eq(uid, sock_i_uid(sk2))))) {
-				const __be32 sk2_rcv_saddr = sk_rcv_saddr(sk2);
-				if (!sk2_rcv_saddr || !sk_rcv_saddr(sk) ||
-				    sk2_rcv_saddr == sk_rcv_saddr(sk))
+
+				if (!sk2->sk_rcv_saddr || !sk->sk_rcv_saddr ||
+				    sk2->sk_rcv_saddr == sk->sk_rcv_saddr)
 					break;
 			}
 			if (!relax && reuse && sk2->sk_reuse &&
 			    sk2->sk_state != TCP_LISTEN) {
-				const __be32 sk2_rcv_saddr = sk_rcv_saddr(sk2);
 
-				if (!sk2_rcv_saddr || !sk_rcv_saddr(sk) ||
-				    sk2_rcv_saddr == sk_rcv_saddr(sk))
+				if (!sk2->sk_rcv_saddr || !sk->sk_rcv_saddr ||
+				    sk2->sk_rcv_saddr == sk->sk_rcv_saddr)
 					break;
 			}
 		}
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 72b7eaaf3ca0..8119791e8a95 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -57,8 +57,6 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
 {
 	const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr;
 	const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2);
-	__be32 sk1_rcv_saddr = sk_rcv_saddr(sk);
-	__be32 sk2_rcv_saddr = sk_rcv_saddr(sk2);
 	int sk_ipv6only = ipv6_only_sock(sk);
 	int sk2_ipv6only = inet_v6_ipv6only(sk2);
 	int addr_type = ipv6_addr_type(sk_rcv_saddr6);
@@ -67,8 +65,8 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
 	/* if both are mapped, treat as IPv4 */
 	if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED)
 		return (!sk2_ipv6only &&
-			(!sk1_rcv_saddr || !sk2_rcv_saddr ||
-			  sk1_rcv_saddr == sk2_rcv_saddr));
+			(!sk->sk_rcv_saddr || !sk2->sk_rcv_saddr ||
+			  sk->sk_rcv_saddr == sk2->sk_rcv_saddr));
 
 	if (addr_type2 == IPV6_ADDR_ANY &&
 	    !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED))
-- 
cgit v1.2.3


From 0acc2b321342aa813fa9fc485afb09fbc811f594 Mon Sep 17 00:00:00 2001
From: Lars-Peter Clausen <lars@metafoo.de>
Date: Sun, 29 Sep 2013 10:51:06 +0200
Subject: i2c: Remove redundant 'driver' field from the i2c_client struct

The 'driver' field of the i2c_client struct is redundant. The same data can be
accessed through to_i2c_driver(client->dev.driver). The generated code for both
approaches in more or less the same.

E.g. on ARM the expression client->driver->command(...) generates

		...
		ldr     r3, [r0, #28]
		ldr     r3, [r3, #32]
		blx     r3
		...

and the expression to_i2c_driver(client->dev.driver)->command(...) generates

		...
		ldr     r3, [r0, #160]
    	ldr     r3, [r3, #-4]
    	blx     r3
		...

Other architectures will generate similar code.

All users of the 'driver' field outside of the I2C core have already been
converted. So this only leaves the core itself. This patch converts the
remaining few users in the I2C core and then removes the 'driver' field from the
i2c_client struct.

Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/i2c-core.c  | 21 ++++++++++++---------
 drivers/i2c/i2c-smbus.c | 10 ++++++----
 include/linux/i2c.h     |  2 --
 3 files changed, 18 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
index 29d3f045a2bf..430c001b3f1e 100644
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -248,17 +248,16 @@ static int i2c_device_probe(struct device *dev)
 	driver = to_i2c_driver(dev->driver);
 	if (!driver->probe || !driver->id_table)
 		return -ENODEV;
-	client->driver = driver;
+
 	if (!device_can_wakeup(&client->dev))
 		device_init_wakeup(&client->dev,
 					client->flags & I2C_CLIENT_WAKE);
 	dev_dbg(dev, "probe\n");
 
 	status = driver->probe(client, i2c_match_id(driver->id_table, client));
-	if (status) {
-		client->driver = NULL;
+	if (status)
 		i2c_set_clientdata(client, NULL);
-	}
+
 	return status;
 }
 
@@ -279,10 +278,9 @@ static int i2c_device_remove(struct device *dev)
 		dev->driver = NULL;
 		status = 0;
 	}
-	if (status == 0) {
-		client->driver = NULL;
+	if (status == 0)
 		i2c_set_clientdata(client, NULL);
-	}
+
 	return status;
 }
 
@@ -1606,9 +1604,14 @@ static int i2c_cmd(struct device *dev, void *_arg)
 {
 	struct i2c_client	*client = i2c_verify_client(dev);
 	struct i2c_cmd_arg	*arg = _arg;
+	struct i2c_driver	*driver;
+
+	if (!client || !client->dev.driver)
+		return 0;
 
-	if (client && client->driver && client->driver->command)
-		client->driver->command(client, arg->cmd, arg->arg);
+	driver = to_i2c_driver(client->dev.driver);
+	if (driver->command)
+		driver->command(client, arg->cmd, arg->arg);
 	return 0;
 }
 
diff --git a/drivers/i2c/i2c-smbus.c b/drivers/i2c/i2c-smbus.c
index 44d4c6071c15..c99b22987366 100644
--- a/drivers/i2c/i2c-smbus.c
+++ b/drivers/i2c/i2c-smbus.c
@@ -46,6 +46,7 @@ static int smbus_do_alert(struct device *dev, void *addrp)
 {
 	struct i2c_client *client = i2c_verify_client(dev);
 	struct alert_data *data = addrp;
+	struct i2c_driver *driver;
 
 	if (!client || client->addr != data->addr)
 		return 0;
@@ -54,12 +55,13 @@ static int smbus_do_alert(struct device *dev, void *addrp)
 
 	/*
 	 * Drivers should either disable alerts, or provide at least
-	 * a minimal handler.  Lock so client->driver won't change.
+	 * a minimal handler.  Lock so the driver won't change.
 	 */
 	device_lock(dev);
-	if (client->driver) {
-		if (client->driver->alert)
-			client->driver->alert(client, data->flag);
+	if (client->dev.driver) {
+		driver = to_i2c_driver(client->dev.driver);
+		if (driver->alert)
+			driver->alert(client, data->flag);
 		else
 			dev_warn(&client->dev, "no driver alert()!\n");
 	} else
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 2ab11dc38077..eff50e062be8 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -205,7 +205,6 @@ struct i2c_driver {
  * @name: Indicates the type of the device, usually a chip name that's
  *	generic enough to hide second-sourcing and compatible revisions.
  * @adapter: manages the bus segment hosting this I2C device
- * @driver: device's driver, hence pointer to access routines
  * @dev: Driver model device node for the slave.
  * @irq: indicates the IRQ generated by this device (if any)
  * @detected: member of an i2c_driver.clients list or i2c-core's
@@ -222,7 +221,6 @@ struct i2c_client {
 					/* _LOWER_ 7 bits		*/
 	char name[I2C_NAME_SIZE];
 	struct i2c_adapter *adapter;	/* the adapter we sit on	*/
-	struct i2c_driver *driver;	/* and our access routines	*/
 	struct device dev;		/* the device structure		*/
 	int irq;			/* irq issued by device		*/
 	struct list_head detected;
-- 
cgit v1.2.3


From 4ae1a5bd3fabd7f7f3575309c7a0d676fecf6303 Mon Sep 17 00:00:00 2001
From: Thomas Pugliese <thomas.pugliese@gmail.com>
Date: Tue, 1 Oct 2013 14:04:35 -0500
Subject: usb: wusbcore: Add isoc transfer type enum and packet definitions

This patch adds transfer type enum and packet definitions for
WA_XFER_ISO_PACKET_INFO and WA_XFER_ISO_PACKET_STATUS packets.

It also changes instances of __attribute__((packed)) to __packed to make
checkpatch.pl happy.

Signed-off-by: Thomas Pugliese <thomas.pugliese@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/usb/wusb-wa.h | 45 +++++++++++++++++++++++++++++++++------------
 1 file changed, 33 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/usb/wusb-wa.h b/include/linux/usb/wusb-wa.h
index 4ff744e2b678..9ae7e299bf77 100644
--- a/include/linux/usb/wusb-wa.h
+++ b/include/linux/usb/wusb-wa.h
@@ -142,7 +142,7 @@ enum wa_notif_type {
 struct wa_notif_hdr {
 	u8 bLength;
 	u8 bNotifyType;			/* enum wa_notif_type */
-} __attribute__((packed));
+} __packed;
 
 /**
  * HWA DN Received notification [(WUSB] section 8.5.4.2)
@@ -158,7 +158,7 @@ struct hwa_notif_dn {
 	u8 bSourceDeviceAddr;		/* from errata 2005/07 */
 	u8 bmAttributes;
 	struct wusb_dn_hdr dndata[];
-} __attribute__((packed));
+} __packed;
 
 /* [WUSB] section 8.3.3 */
 enum wa_xfer_type {
@@ -167,6 +167,8 @@ enum wa_xfer_type {
 	WA_XFER_TYPE_ISO = 0x82,
 	WA_XFER_RESULT = 0x83,
 	WA_XFER_ABORT = 0x84,
+	WA_XFER_ISO_PACKET_INFO = 0xA0,
+	WA_XFER_ISO_PACKET_STATUS = 0xA1,
 };
 
 /* [WUSB] section 8.3.3 */
@@ -177,28 +179,47 @@ struct wa_xfer_hdr {
 	__le32 dwTransferID;		/* Host-assigned ID */
 	__le32 dwTransferLength;	/* Length of data to xfer */
 	u8 bTransferSegment;
-} __attribute__((packed));
+} __packed;
 
 struct wa_xfer_ctl {
 	struct wa_xfer_hdr hdr;
 	u8 bmAttribute;
 	__le16 wReserved;
 	struct usb_ctrlrequest baSetupData;
-} __attribute__((packed));
+} __packed;
 
 struct wa_xfer_bi {
 	struct wa_xfer_hdr hdr;
 	u8 bReserved;
 	__le16 wReserved;
-} __attribute__((packed));
+} __packed;
 
+/* [WUSB] section 8.5.5 */
 struct wa_xfer_hwaiso {
 	struct wa_xfer_hdr hdr;
 	u8 bReserved;
 	__le16 wPresentationTime;
 	__le32 dwNumOfPackets;
-	/* FIXME: u8 pktdata[]? */
-} __attribute__((packed));
+} __packed;
+
+struct wa_xfer_packet_info_hwaiso {
+	__le16 wLength;
+	u8 bPacketType;
+	u8 bReserved;
+	__le16 PacketLength[0];
+} __packed;
+
+struct wa_xfer_packet_status_len_hwaiso {
+	__le16 PacketLength;
+	__le16 PacketStatus;
+} __packed;
+
+struct wa_xfer_packet_status_hwaiso {
+	__le16 wLength;
+	u8 bPacketType;
+	u8 bReserved;
+	struct wa_xfer_packet_status_len_hwaiso PacketStatus[0];
+} __packed;
 
 /* [WUSB] section 8.3.3.5 */
 struct wa_xfer_abort {
@@ -206,7 +227,7 @@ struct wa_xfer_abort {
 	u8 bRequestType;
 	__le16 wRPipe;			/* RPipe index */
 	__le32 dwTransferID;		/* Host-assigned ID */
-} __attribute__((packed));
+} __packed;
 
 /**
  * WA Transfer Complete notification ([WUSB] section 8.3.3.3)
@@ -216,7 +237,7 @@ struct wa_notif_xfer {
 	struct wa_notif_hdr hdr;
 	u8 bEndpoint;
 	u8 Reserved;
-} __attribute__((packed));
+} __packed;
 
 /** Transfer result basic codes [WUSB] table 8-15 */
 enum {
@@ -243,7 +264,7 @@ struct wa_xfer_result {
 	u8     bTransferSegment;
 	u8     bTransferStatus;
 	__le32 dwNumOfPackets;
-} __attribute__((packed));
+} __packed;
 
 /**
  * Wire Adapter Class Descriptor ([WUSB] section 8.5.2.7).
@@ -267,7 +288,7 @@ struct usb_wa_descriptor {
 	u8	bPwrOn2PwrGood;
 	u8	bNumMMCIEs;
 	u8	DeviceRemovable;	/* FIXME: in DWA this is up to 16 bytes */
-} __attribute__((packed));
+} __packed;
 
 /**
  * HWA Device Information Buffer (WUSB1.0[T8.54])
@@ -277,6 +298,6 @@ struct hwa_dev_info {
 	u8	bDeviceAddress;
 	__le16	wPHYRates;
 	u8	bmDeviceAttribute;
-} __attribute__((packed));
+} __packed;
 
 #endif /* #ifndef __LINUX_USB_WUSB_WA_H */
-- 
cgit v1.2.3


From 4fd06af96b9397fc54eb6b1a013a60c34693eef0 Mon Sep 17 00:00:00 2001
From: Roger Quadros <rogerq@ti.com>
Date: Thu, 3 Oct 2013 18:12:30 +0300
Subject: usb: phy: omap-control: Get rid of platform data

omap-control device is present from OMAP4 onwards which
support device tree boots only. So get rid of platform data.

Signed-off-by: Roger Quadros <rogerq@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/phy/phy-omap-control.c   | 12 +++---------
 include/linux/usb/omap_control_usb.h |  4 ----
 2 files changed, 3 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/phy/phy-omap-control.c b/drivers/usb/phy/phy-omap-control.c
index a4dda8e12562..977259252a8e 100644
--- a/drivers/usb/phy/phy-omap-control.c
+++ b/drivers/usb/phy/phy-omap-control.c
@@ -197,8 +197,6 @@ static int omap_control_usb_probe(struct platform_device *pdev)
 {
 	struct resource	*res;
 	struct device_node *np = pdev->dev.of_node;
-	struct omap_control_usb_platform_data *pdata =
-			dev_get_platdata(&pdev->dev);
 
 	control_usb = devm_kzalloc(&pdev->dev, sizeof(*control_usb),
 		GFP_KERNEL);
@@ -207,14 +205,10 @@ static int omap_control_usb_probe(struct platform_device *pdev)
 		return -ENOMEM;
 	}
 
-	if (np) {
+	if (np)
 		of_property_read_u32(np, "ti,type", &control_usb->type);
-	} else if (pdata) {
-		control_usb->type = pdata->type;
-	} else {
-		dev_err(&pdev->dev, "no pdata present\n");
-		return -EINVAL;
-	}
+	else
+		return -EINVAL;	/* We only support DT boot */
 
 	control_usb->dev	= &pdev->dev;
 
diff --git a/include/linux/usb/omap_control_usb.h b/include/linux/usb/omap_control_usb.h
index 27b5b8c931b0..e2416b45169b 100644
--- a/include/linux/usb/omap_control_usb.h
+++ b/include/linux/usb/omap_control_usb.h
@@ -31,10 +31,6 @@ struct omap_control_usb {
 	u32 type;
 };
 
-struct omap_control_usb_platform_data {
-	u8 type;
-};
-
 enum omap_control_usb_mode {
 	USB_MODE_UNDEFINED = 0,
 	USB_MODE_HOST,
-- 
cgit v1.2.3


From 6cb9310a3290beb8c0d31703a2e76b90a10b4ca0 Mon Sep 17 00:00:00 2001
From: Roger Quadros <rogerq@ti.com>
Date: Thu, 3 Oct 2013 18:12:31 +0300
Subject: usb: phy: omap: Add new device types and remove
 omap_control_usb3_phy_power()

Add support for new device types and in the process rid of "ti,type"
device tree property. The correct type of device will be determined
from the compatible string instead.

Introduce a compatible string for each device type. At the moment
we support 4 types OTGHS, USB2, PIPE3 (e.g. USB3) and DRA7USB2.

Update DT binding information to reflect these changes.

Also get rid of omap_control_usb3_phy_power(). Just one function
i.e. omap_control_usb_phy_power() will now take care of all PHY types.

Signed-off-by: Roger Quadros <rogerq@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/devicetree/bindings/usb/omap-usb.txt |  30 ++--
 drivers/usb/phy/phy-omap-control.c                 | 173 ++++++++++++---------
 drivers/usb/phy/phy-omap-usb3.c                    |   6 +-
 include/linux/usb/omap_control_usb.h               |  24 +--
 4 files changed, 130 insertions(+), 103 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/usb/omap-usb.txt b/Documentation/devicetree/bindings/usb/omap-usb.txt
index 661cb06a9063..9add35c37ebc 100644
--- a/Documentation/devicetree/bindings/usb/omap-usb.txt
+++ b/Documentation/devicetree/bindings/usb/omap-usb.txt
@@ -83,22 +83,22 @@ omap_dwc3 {
 OMAP CONTROL USB
 
 Required properties:
- - compatible: Should be "ti,omap-control-usb"
+ - compatible: Should be one of
+ "ti,control-phy-otghs" - if it has otghs_control mailbox register as on OMAP4.
+ "ti,control-phy-usb2" - if it has Power down bit in control_dev_conf register
+			e.g. USB2_PHY on OMAP5.
+ "ti,control-phy-pipe3" - if it has DPLL and individual Rx & Tx power control
+			e.g. USB3 PHY and SATA PHY on OMAP5.
+ "ti,control-phy-dra7usb2" - if it has power down register like USB2 PHY on
+			DRA7 platform.
  - reg : Address and length of the register set for the device. It contains
-   the address of "control_dev_conf" and "otghs_control" or "phy_power_usb"
-   depending upon omap4 or omap5.
- - reg-names: The names of the register addresses corresponding to the registers
-   filled in "reg".
- - ti,type: This is used to differentiate whether the control module has
-   usb mailbox or usb3 phy power. omap4 has usb mailbox in control module to
-   notify events to the musb core and omap5 has usb3 phy power register to
-   power on usb3 phy. Should be "1" if it has mailbox and "2" if it has usb3
-   phy power.
+   the address of "otghs_control" for control-phy-otghs or "power" register
+   for other types.
+ - reg-names: should be "otghs_control" control-phy-otghs and "power" for
+   other types.
 
 omap_control_usb: omap-control-usb@4a002300 {
-	compatible = "ti,omap-control-usb";
-	reg = <0x4a002300 0x4>,
-	      <0x4a00233c 0x4>;
-	reg-names = "control_dev_conf", "otghs_control";
-	ti,type = <1>;
+	compatible = "ti,control-phy-otghs";
+	reg = <0x4a00233c 0x4>;
+	reg-names = "otghs_control";
 };
diff --git a/drivers/usb/phy/phy-omap-control.c b/drivers/usb/phy/phy-omap-control.c
index 977259252a8e..1c8a7c5ccb9b 100644
--- a/drivers/usb/phy/phy-omap-control.c
+++ b/drivers/usb/phy/phy-omap-control.c
@@ -20,6 +20,7 @@
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <linux/of.h>
+#include <linux/of_device.h>
 #include <linux/err.h>
 #include <linux/io.h>
 #include <linux/clk.h>
@@ -46,61 +47,70 @@ struct device *omap_get_control_dev(void)
 EXPORT_SYMBOL_GPL(omap_get_control_dev);
 
 /**
- * omap_control_usb3_phy_power - power on/off the serializer using control
- *	module
+ * omap_control_usb_phy_power - power on/off the phy using control module reg
  * @dev: the control module device
- * @on: 0 to off and 1 to on based on powering on or off the PHY
- *
- * usb3 PHY driver should call this API to power on or off the PHY.
+ * @on: 0 or 1, based on powering on or off the PHY
  */
-void omap_control_usb3_phy_power(struct device *dev, bool on)
+void omap_control_usb_phy_power(struct device *dev, int on)
 {
 	u32 val;
 	unsigned long rate;
-	struct omap_control_usb	*control_usb = dev_get_drvdata(dev);
+	struct omap_control_usb	*control_usb;
 
-	if (control_usb->type != OMAP_CTRL_DEV_TYPE2)
+	if (IS_ERR(dev) || !dev) {
+		pr_err("%s: invalid device\n", __func__);
 		return;
+	}
 
-	rate = clk_get_rate(control_usb->sys_clk);
-	rate = rate/1000000;
-
-	val = readl(control_usb->phy_power);
-
-	if (on) {
-		val &= ~(OMAP_CTRL_USB_PWRCTL_CLK_CMD_MASK |
-			OMAP_CTRL_USB_PWRCTL_CLK_FREQ_MASK);
-		val |= OMAP_CTRL_USB3_PHY_TX_RX_POWERON <<
-			OMAP_CTRL_USB_PWRCTL_CLK_CMD_SHIFT;
-		val |= rate << OMAP_CTRL_USB_PWRCTL_CLK_FREQ_SHIFT;
-	} else {
-		val &= ~OMAP_CTRL_USB_PWRCTL_CLK_CMD_MASK;
-		val |= OMAP_CTRL_USB3_PHY_TX_RX_POWEROFF <<
-			OMAP_CTRL_USB_PWRCTL_CLK_CMD_SHIFT;
+	control_usb = dev_get_drvdata(dev);
+	if (!control_usb) {
+		dev_err(dev, "%s: invalid control usb device\n", __func__);
+		return;
 	}
 
-	writel(val, control_usb->phy_power);
-}
-EXPORT_SYMBOL_GPL(omap_control_usb3_phy_power);
+	if (control_usb->type == OMAP_CTRL_TYPE_OTGHS)
+		return;
 
-/**
- * omap_control_usb_phy_power - power on/off the phy using control module reg
- * @dev: the control module device
- * @on: 0 or 1, based on powering on or off the PHY
- */
-void omap_control_usb_phy_power(struct device *dev, int on)
-{
-	u32 val;
-	struct omap_control_usb	*control_usb = dev_get_drvdata(dev);
+	val = readl(control_usb->power);
+
+	switch (control_usb->type) {
+	case OMAP_CTRL_TYPE_USB2:
+		if (on)
+			val &= ~OMAP_CTRL_DEV_PHY_PD;
+		else
+			val |= OMAP_CTRL_DEV_PHY_PD;
+		break;
 
-	val = readl(control_usb->dev_conf);
+	case OMAP_CTRL_TYPE_PIPE3:
+		rate = clk_get_rate(control_usb->sys_clk);
+		rate = rate/1000000;
+
+		if (on) {
+			val &= ~(OMAP_CTRL_USB_PWRCTL_CLK_CMD_MASK |
+					OMAP_CTRL_USB_PWRCTL_CLK_FREQ_MASK);
+			val |= OMAP_CTRL_USB3_PHY_TX_RX_POWERON <<
+				OMAP_CTRL_USB_PWRCTL_CLK_CMD_SHIFT;
+			val |= rate << OMAP_CTRL_USB_PWRCTL_CLK_FREQ_SHIFT;
+		} else {
+			val &= ~OMAP_CTRL_USB_PWRCTL_CLK_CMD_MASK;
+			val |= OMAP_CTRL_USB3_PHY_TX_RX_POWEROFF <<
+				OMAP_CTRL_USB_PWRCTL_CLK_CMD_SHIFT;
+		}
+		break;
 
-	if (on)
-		val &= ~OMAP_CTRL_DEV_PHY_PD;
-	else
-		val |= OMAP_CTRL_DEV_PHY_PD;
+	case OMAP_CTRL_TYPE_DRA7USB2:
+		if (on)
+			val &= ~OMAP_CTRL_USB2_PHY_PD;
+		else
+			val |= OMAP_CTRL_USB2_PHY_PD;
+		break;
+	default:
+		dev_err(dev, "%s: type %d not recognized\n",
+					__func__, control_usb->type);
+		break;
+	}
 
-	writel(val, control_usb->dev_conf);
+	writel(val, control_usb->power);
 }
 EXPORT_SYMBOL_GPL(omap_control_usb_phy_power);
 
@@ -172,7 +182,7 @@ void omap_control_usb_set_mode(struct device *dev,
 {
 	struct omap_control_usb	*ctrl_usb;
 
-	if (IS_ERR(dev) || control_usb->type != OMAP_CTRL_DEV_TYPE1)
+	if (IS_ERR(dev) || control_usb->type != OMAP_CTRL_TYPE_OTGHS)
 		return;
 
 	ctrl_usb = dev_get_drvdata(dev);
@@ -193,10 +203,45 @@ void omap_control_usb_set_mode(struct device *dev,
 }
 EXPORT_SYMBOL_GPL(omap_control_usb_set_mode);
 
+#ifdef CONFIG_OF
+
+static const enum omap_control_usb_type otghs_data = OMAP_CTRL_TYPE_OTGHS;
+static const enum omap_control_usb_type usb2_data = OMAP_CTRL_TYPE_USB2;
+static const enum omap_control_usb_type pipe3_data = OMAP_CTRL_TYPE_PIPE3;
+static const enum omap_control_usb_type dra7usb2_data = OMAP_CTRL_TYPE_DRA7USB2;
+
+static const struct of_device_id omap_control_usb_id_table[] = {
+	{
+		.compatible = "ti,control-phy-otghs",
+		.data = &otghs_data,
+	},
+	{
+		.compatible = "ti,control-phy-usb2",
+		.data = &usb2_data,
+	},
+	{
+		.compatible = "ti,control-phy-pipe3",
+		.data = &pipe3_data,
+	},
+	{
+		.compatible = "ti,control-phy-dra7usb2",
+		.data = &dra7usb2_data,
+	},
+	{},
+};
+MODULE_DEVICE_TABLE(of, omap_control_usb_id_table);
+#endif
+
+
 static int omap_control_usb_probe(struct platform_device *pdev)
 {
 	struct resource	*res;
-	struct device_node *np = pdev->dev.of_node;
+	const struct of_device_id *of_id;
+
+	of_id = of_match_device(of_match_ptr(omap_control_usb_id_table),
+								&pdev->dev);
+	if (!of_id)
+		return -EINVAL;
 
 	control_usb = devm_kzalloc(&pdev->dev, sizeof(*control_usb),
 		GFP_KERNEL);
@@ -205,36 +250,27 @@ static int omap_control_usb_probe(struct platform_device *pdev)
 		return -ENOMEM;
 	}
 
-	if (np)
-		of_property_read_u32(np, "ti,type", &control_usb->type);
-	else
-		return -EINVAL;	/* We only support DT boot */
-
-	control_usb->dev	= &pdev->dev;
+	control_usb->dev = &pdev->dev;
+	control_usb->type = *(enum omap_control_usb_type *)of_id->data;
 
-	res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
-		"control_dev_conf");
-	control_usb->dev_conf = devm_ioremap_resource(&pdev->dev, res);
-	if (IS_ERR(control_usb->dev_conf))
-		return PTR_ERR(control_usb->dev_conf);
-
-	if (control_usb->type == OMAP_CTRL_DEV_TYPE1) {
+	if (control_usb->type == OMAP_CTRL_TYPE_OTGHS) {
 		res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
 			"otghs_control");
 		control_usb->otghs_control = devm_ioremap_resource(
 			&pdev->dev, res);
 		if (IS_ERR(control_usb->otghs_control))
 			return PTR_ERR(control_usb->otghs_control);
-	}
-
-	if (control_usb->type == OMAP_CTRL_DEV_TYPE2) {
+	} else {
 		res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
-			"phy_power_usb");
-		control_usb->phy_power = devm_ioremap_resource(
-			&pdev->dev, res);
-		if (IS_ERR(control_usb->phy_power))
-			return PTR_ERR(control_usb->phy_power);
+				"power");
+		control_usb->power = devm_ioremap_resource(&pdev->dev, res);
+		if (IS_ERR(control_usb->power)) {
+			dev_err(&pdev->dev, "Couldn't get power register\n");
+			return PTR_ERR(control_usb->power);
+		}
+	}
 
+	if (control_usb->type == OMAP_CTRL_TYPE_PIPE3) {
 		control_usb->sys_clk = devm_clk_get(control_usb->dev,
 			"sys_clkin");
 		if (IS_ERR(control_usb->sys_clk)) {
@@ -243,20 +279,11 @@ static int omap_control_usb_probe(struct platform_device *pdev)
 		}
 	}
 
-
 	dev_set_drvdata(control_usb->dev, control_usb);
 
 	return 0;
 }
 
-#ifdef CONFIG_OF
-static const struct of_device_id omap_control_usb_id_table[] = {
-	{ .compatible = "ti,omap-control-usb" },
-	{}
-};
-MODULE_DEVICE_TABLE(of, omap_control_usb_id_table);
-#endif
-
 static struct platform_driver omap_control_usb_driver = {
 	.probe		= omap_control_usb_probe,
 	.driver		= {
diff --git a/drivers/usb/phy/phy-omap-usb3.c b/drivers/usb/phy/phy-omap-usb3.c
index 4e8a0405f956..0824be42cd18 100644
--- a/drivers/usb/phy/phy-omap-usb3.c
+++ b/drivers/usb/phy/phy-omap-usb3.c
@@ -100,7 +100,7 @@ static int omap_usb3_suspend(struct usb_phy *x, int suspend)
 			udelay(1);
 		} while (--timeout);
 
-		omap_control_usb3_phy_power(phy->control_dev, 0);
+		omap_control_usb_phy_power(phy->control_dev, 0);
 
 		phy->is_suspended	= 1;
 	} else if (!suspend && phy->is_suspended) {
@@ -189,7 +189,7 @@ static int omap_usb3_init(struct usb_phy *x)
 	if (ret)
 		return ret;
 
-	omap_control_usb3_phy_power(phy->control_dev, 1);
+	omap_control_usb_phy_power(phy->control_dev, 1);
 
 	return 0;
 }
@@ -245,7 +245,7 @@ static int omap_usb3_probe(struct platform_device *pdev)
 		return -ENODEV;
 	}
 
-	omap_control_usb3_phy_power(phy->control_dev, 0);
+	omap_control_usb_phy_power(phy->control_dev, 0);
 	usb_add_phy_dev(&phy->phy);
 
 	platform_set_drvdata(pdev, phy);
diff --git a/include/linux/usb/omap_control_usb.h b/include/linux/usb/omap_control_usb.h
index e2416b45169b..61b889a9c67b 100644
--- a/include/linux/usb/omap_control_usb.h
+++ b/include/linux/usb/omap_control_usb.h
@@ -19,16 +19,23 @@
 #ifndef __OMAP_CONTROL_USB_H__
 #define __OMAP_CONTROL_USB_H__
 
+enum omap_control_usb_type {
+	OMAP_CTRL_TYPE_OTGHS = 1,	/* Mailbox OTGHS_CONTROL */
+	OMAP_CTRL_TYPE_USB2,	/* USB2_PHY, power down in CONTROL_DEV_CONF */
+	OMAP_CTRL_TYPE_PIPE3,	/* PIPE3 PHY, DPLL & seperate Rx/Tx power */
+	OMAP_CTRL_TYPE_DRA7USB2, /* USB2 PHY, power and power_aux e.g. DRA7 */
+};
+
 struct omap_control_usb {
 	struct device *dev;
 
-	u32 __iomem *dev_conf;
 	u32 __iomem *otghs_control;
-	u32 __iomem *phy_power;
+	u32 __iomem *power;
+	u32 __iomem *power_aux;
 
 	struct clk *sys_clk;
 
-	u32 type;
+	enum omap_control_usb_type type;
 };
 
 enum omap_control_usb_mode {
@@ -38,10 +45,6 @@ enum omap_control_usb_mode {
 	USB_MODE_DISCONNECT,
 };
 
-/* To differentiate ctrl module IP having either mailbox or USB3 PHY power */
-#define	OMAP_CTRL_DEV_TYPE1		0x1
-#define	OMAP_CTRL_DEV_TYPE2		0x2
-
 #define	OMAP_CTRL_DEV_PHY_PD		BIT(0)
 
 #define	OMAP_CTRL_DEV_AVALID		BIT(0)
@@ -59,10 +62,11 @@ enum omap_control_usb_mode {
 #define	OMAP_CTRL_USB3_PHY_TX_RX_POWERON	0x3
 #define	OMAP_CTRL_USB3_PHY_TX_RX_POWEROFF	0x0
 
+#define OMAP_CTRL_USB2_PHY_PD		BIT(28)
+
 #if IS_ENABLED(CONFIG_OMAP_CONTROL_USB)
 extern struct device *omap_get_control_dev(void);
 extern void omap_control_usb_phy_power(struct device *dev, int on);
-extern void omap_control_usb3_phy_power(struct device *dev, bool on);
 extern void omap_control_usb_set_mode(struct device *dev,
 	enum omap_control_usb_mode mode);
 #else
@@ -75,10 +79,6 @@ static inline void omap_control_usb_phy_power(struct device *dev, int on)
 {
 }
 
-static inline void omap_control_usb3_phy_power(struct device *dev, int on)
-{
-}
-
 static inline void omap_control_usb_set_mode(struct device *dev,
 	enum omap_control_usb_mode mode)
 {
-- 
cgit v1.2.3


From 8934d3e4d0e7aed1bd067529c667984d7929d92d Mon Sep 17 00:00:00 2001
From: Roger Quadros <rogerq@ti.com>
Date: Thu, 3 Oct 2013 18:12:34 +0300
Subject: usb: musb: omap2430: Don't use omap_get_control_dev()

omap_get_control_dev() is being deprecated as it doesn't support
multiple instances. As control device is present only from OMAP4
onwards which supports DT only, we use phandles to get the
reference to the control device.

Also get rid of "ti,has-mailbox" property as it is redundant and
we can determine that from whether "ctrl-module" property is present
or not. Get rid of has_mailbox from musb_hdrc_platform_data as well.

Signed-off-by: Roger Quadros <rogerq@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/devicetree/bindings/usb/omap-usb.txt |  4 ----
 drivers/usb/musb/omap2430.c                        | 25 ++++++++++++----------
 include/linux/usb/musb.h                           |  2 --
 3 files changed, 14 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/usb/omap-usb.txt b/Documentation/devicetree/bindings/usb/omap-usb.txt
index 9add35c37ebc..090e5e22bd2b 100644
--- a/Documentation/devicetree/bindings/usb/omap-usb.txt
+++ b/Documentation/devicetree/bindings/usb/omap-usb.txt
@@ -3,9 +3,6 @@ OMAP GLUE AND OTHER OMAP SPECIFIC COMPONENTS
 OMAP MUSB GLUE
  - compatible : Should be "ti,omap4-musb" or "ti,omap3-musb"
  - ti,hwmods : must be "usb_otg_hs"
- - ti,has-mailbox : to specify that omap uses an external mailbox
-   (in control module) to communicate with the musb core during device connect
-   and disconnect.
  - multipoint : Should be "1" indicating the musb controller supports
    multipoint. This is a MUSB configuration-specific setting.
  - num-eps : Specifies the number of endpoints. This is also a
@@ -31,7 +28,6 @@ SOC specific device node entry
 usb_otg_hs: usb_otg_hs@4a0ab000 {
 	compatible = "ti,omap4-musb";
 	ti,hwmods = "usb_otg_hs";
-	ti,has-mailbox;
 	multipoint = <1>;
 	num-eps = <16>;
 	ram-bits = <12>;
diff --git a/drivers/usb/musb/omap2430.c b/drivers/usb/musb/omap2430.c
index d0fc4d9c7b80..9eab645fed8b 100644
--- a/drivers/usb/musb/omap2430.c
+++ b/drivers/usb/musb/omap2430.c
@@ -38,6 +38,7 @@
 #include <linux/delay.h>
 #include <linux/usb/musb-omap.h>
 #include <linux/usb/omap_control_usb.h>
+#include <linux/of_platform.h>
 
 #include "musb_core.h"
 #include "omap2430.h"
@@ -524,8 +525,12 @@ static int omap2430_probe(struct platform_device *pdev)
 	glue->dev			= &pdev->dev;
 	glue->musb			= musb;
 	glue->status			= OMAP_MUSB_UNKNOWN;
+	glue->control_otghs = ERR_PTR(-ENODEV);
 
 	if (np) {
+		struct device_node *control_node;
+		struct platform_device *control_pdev;
+
 		pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
 		if (!pdata) {
 			dev_err(&pdev->dev,
@@ -554,22 +559,20 @@ static int omap2430_probe(struct platform_device *pdev)
 		of_property_read_u32(np, "ram-bits", (u32 *)&config->ram_bits);
 		of_property_read_u32(np, "power", (u32 *)&pdata->power);
 		config->multipoint = of_property_read_bool(np, "multipoint");
-		pdata->has_mailbox = of_property_read_bool(np,
-		    "ti,has-mailbox");
 
 		pdata->board_data	= data;
 		pdata->config		= config;
-	}
 
-	if (pdata->has_mailbox) {
-		glue->control_otghs = omap_get_control_dev();
-		if (IS_ERR(glue->control_otghs)) {
-			dev_vdbg(&pdev->dev, "Failed to get control device\n");
-			ret = PTR_ERR(glue->control_otghs);
-			goto err2;
+		control_node = of_parse_phandle(np, "ctrl-module", 0);
+		if (control_node) {
+			control_pdev = of_find_device_by_node(control_node);
+			if (!control_pdev) {
+				dev_err(&pdev->dev, "Failed to get control device\n");
+				ret = -EINVAL;
+				goto err2;
+			}
+			glue->control_otghs = &control_pdev->dev;
 		}
-	} else {
-		glue->control_otghs = ERR_PTR(-ENODEV);
 	}
 	pdata->platform_ops		= &omap2430_ops;
 
diff --git a/include/linux/usb/musb.h b/include/linux/usb/musb.h
index 053c26841cc3..eb505250940a 100644
--- a/include/linux/usb/musb.h
+++ b/include/linux/usb/musb.h
@@ -99,8 +99,6 @@ struct musb_hdrc_platform_data {
 	/* MUSB_HOST, MUSB_PERIPHERAL, or MUSB_OTG */
 	u8		mode;
 
-	u8		has_mailbox:1;
-
 	/* for clk_get() */
 	const char	*clock;
 
-- 
cgit v1.2.3


From 0bb85dc2d3f7117b6686661aba4a88dedead0c8a Mon Sep 17 00:00:00 2001
From: Roger Quadros <rogerq@ti.com>
Date: Thu, 3 Oct 2013 18:12:35 +0300
Subject: usb: phy: omap: get rid of omap_get_control_dev()

This function was preventing us from supporting multiple
instances. Get rid of it. Since we support DT boots only,
users can get the control device phandle from the DT node.

Signed-off-by: Roger Quadros <rogerq@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/phy/phy-omap-control.c   | 31 ++++++++++---------------------
 include/linux/usb/omap_control_usb.h |  5 -----
 2 files changed, 10 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/phy/phy-omap-control.c b/drivers/usb/phy/phy-omap-control.c
index 1c8a7c5ccb9b..09c5ace1edd8 100644
--- a/drivers/usb/phy/phy-omap-control.c
+++ b/drivers/usb/phy/phy-omap-control.c
@@ -26,26 +26,6 @@
 #include <linux/clk.h>
 #include <linux/usb/omap_control_usb.h>
 
-static struct omap_control_usb *control_usb;
-
-/**
- * omap_get_control_dev - returns the device pointer for this control device
- *
- * This API should be called to get the device pointer for this control
- * module device. This device pointer should be used for called other
- * exported API's in this driver.
- *
- * To be used by PHY driver and glue driver.
- */
-struct device *omap_get_control_dev(void)
-{
-	if (!control_usb)
-		return ERR_PTR(-ENODEV);
-
-	return control_usb->dev;
-}
-EXPORT_SYMBOL_GPL(omap_get_control_dev);
-
 /**
  * omap_control_usb_phy_power - power on/off the phy using control module reg
  * @dev: the control module device
@@ -182,11 +162,19 @@ void omap_control_usb_set_mode(struct device *dev,
 {
 	struct omap_control_usb	*ctrl_usb;
 
-	if (IS_ERR(dev) || control_usb->type != OMAP_CTRL_TYPE_OTGHS)
+	if (IS_ERR(dev) || !dev)
 		return;
 
 	ctrl_usb = dev_get_drvdata(dev);
 
+	if (!ctrl_usb) {
+		dev_err(dev, "Invalid control usb device\n");
+		return;
+	}
+
+	if (ctrl_usb->type != OMAP_CTRL_TYPE_OTGHS)
+		return;
+
 	switch (mode) {
 	case USB_MODE_HOST:
 		omap_control_usb_host_mode(ctrl_usb);
@@ -237,6 +225,7 @@ static int omap_control_usb_probe(struct platform_device *pdev)
 {
 	struct resource	*res;
 	const struct of_device_id *of_id;
+	struct omap_control_usb *control_usb;
 
 	of_id = of_match_device(of_match_ptr(omap_control_usb_id_table),
 								&pdev->dev);
diff --git a/include/linux/usb/omap_control_usb.h b/include/linux/usb/omap_control_usb.h
index 61b889a9c67b..596b01918813 100644
--- a/include/linux/usb/omap_control_usb.h
+++ b/include/linux/usb/omap_control_usb.h
@@ -65,15 +65,10 @@ enum omap_control_usb_mode {
 #define OMAP_CTRL_USB2_PHY_PD		BIT(28)
 
 #if IS_ENABLED(CONFIG_OMAP_CONTROL_USB)
-extern struct device *omap_get_control_dev(void);
 extern void omap_control_usb_phy_power(struct device *dev, int on);
 extern void omap_control_usb_set_mode(struct device *dev,
 	enum omap_control_usb_mode mode);
 #else
-static inline struct device *omap_get_control_dev(void)
-{
-	return ERR_PTR(-ENODEV);
-}
 
 static inline void omap_control_usb_phy_power(struct device *dev, int on)
 {
-- 
cgit v1.2.3


From 5b8802143a4a3e38906879e78f2c1415c5b3db73 Mon Sep 17 00:00:00 2001
From: Weijie Yang <weijie.yang@samsung.com>
Date: Fri, 27 Sep 2013 17:09:07 +0800
Subject: fs/debugfs: add declaration for no CONFIG_DEBUG_FS

Two function declarations are absence if not define CONFIG_DEBUG_FS
in include/linux/debugfs.h

Signed-off-by: Weijie Yang <weijie.yang@samsung.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/debugfs.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h
index 263489d0788d..4d0b4d1aa132 100644
--- a/include/linux/debugfs.h
+++ b/include/linux/debugfs.h
@@ -206,6 +206,12 @@ static inline struct dentry *debugfs_create_size_t(const char *name, umode_t mod
 	return ERR_PTR(-ENODEV);
 }
 
+static inline struct dentry *debugfs_create_atomic_t(const char *name, umode_t mode,
+				     struct dentry *parent, atomic_t *value)
+{
+	return ERR_PTR(-ENODEV);
+}
+
 static inline struct dentry *debugfs_create_bool(const char *name, umode_t mode,
 						 struct dentry *parent,
 						 u32 *value)
@@ -227,6 +233,12 @@ static inline struct dentry *debugfs_create_regset32(const char *name,
 	return ERR_PTR(-ENODEV);
 }
 
+static inline int debugfs_print_regs32(struct seq_file *s, const struct debugfs_reg32 *regs,
+			 int nregs, void __iomem *base, char *prefix)
+{
+	return 0;
+}
+
 static inline bool debugfs_initialized(void)
 {
 	return false;
-- 
cgit v1.2.3


From 9886167d20c0720dcfb01e62cdff4d906b226f43 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 3 Oct 2013 16:02:23 +0200
Subject: perf: Fix perf_pmu_migrate_context

While auditing the list_entry usage due to a trinity bug I found that
perf_pmu_migrate_context violates the rules for
perf_event::event_entry.

The problem is that perf_event::event_entry is a RCU list element, and
hence we must wait for a full RCU grace period before re-using the
element after deletion.

Therefore the usage in perf_pmu_migrate_context() which re-uses the
entry immediately is broken. For now introduce another list_head into
perf_event for this specific usage.

This doesn't actually fix the trinity report because that never goes
through this code.

Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/n/tip-mkj72lxagw1z8fvjm648iznw@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/perf_event.h | 24 +++++++++++++++++++++++-
 kernel/events/core.c       |  6 +++---
 2 files changed, 26 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 866e85c5eb94..c8ba627c1d60 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -294,9 +294,31 @@ struct ring_buffer;
  */
 struct perf_event {
 #ifdef CONFIG_PERF_EVENTS
-	struct list_head		group_entry;
+	/*
+	 * entry onto perf_event_context::event_list;
+	 *   modifications require ctx->lock
+	 *   RCU safe iterations.
+	 */
 	struct list_head		event_entry;
+
+	/*
+	 * XXX: group_entry and sibling_list should be mutually exclusive;
+	 * either you're a sibling on a group, or you're the group leader.
+	 * Rework the code to always use the same list element.
+	 *
+	 * Locked for modification by both ctx->mutex and ctx->lock; holding
+	 * either sufficies for read.
+	 */
+	struct list_head		group_entry;
 	struct list_head		sibling_list;
+
+	/*
+	 * We need storage to track the entries in perf_pmu_migrate_context; we
+	 * cannot use the event_entry because of RCU and we want to keep the
+	 * group in tact which avoids us using the other two entries.
+	 */
+	struct list_head		migrate_entry;
+
 	struct hlist_node		hlist_entry;
 	int				nr_siblings;
 	int				group_flags;
diff --git a/kernel/events/core.c b/kernel/events/core.c
index cb4238e85b38..d49a9d29334c 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7234,15 +7234,15 @@ void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu)
 		perf_remove_from_context(event);
 		unaccount_event_cpu(event, src_cpu);
 		put_ctx(src_ctx);
-		list_add(&event->event_entry, &events);
+		list_add(&event->migrate_entry, &events);
 	}
 	mutex_unlock(&src_ctx->mutex);
 
 	synchronize_rcu();
 
 	mutex_lock(&dst_ctx->mutex);
-	list_for_each_entry_safe(event, tmp, &events, event_entry) {
-		list_del(&event->event_entry);
+	list_for_each_entry_safe(event, tmp, &events, migrate_entry) {
+		list_del(&event->migrate_entry);
 		if (event->state >= PERF_EVENT_STATE_OFF)
 			event->state = PERF_EVENT_STATE_INACTIVE;
 		account_event_cpu(event, dst_cpu);
-- 
cgit v1.2.3


From fdfbbd07e91f8fe387140776f3fd94605f0c89e5 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Fri, 20 Sep 2013 07:40:39 -0700
Subject: perf: Add generic transaction flags

Add a generic qualifier for transaction events, as a new sample
type that returns a flag word. This is particularly useful
for qualifying aborts: to distinguish aborts which happen
due to asynchronous events (like conflicts caused by another
CPU) versus instructions that lead to an abort.

The tuning strategies are very different for those cases,
so it's important to distinguish them easily and early.

Since it's inconvenient and inflexible to filter for this
in the kernel we report all the events out and allow
some post processing in user space.

The flags are based on the Intel TSX events, but should be fairly
generic and mostly applicable to other HTM architectures too. In addition
to various flag words there's also reserved space to report an
program supplied abort code. For TSX this is used to distinguish specific
classes of aborts, like a lock busy abort when doing lock elision.

Flags:

Elision and generic transactions 		   (ELISION vs TRANSACTION)
(HLE vs RTM on TSX; IBM etc.  would likely only use TRANSACTION)
Aborts caused by current thread vs aborts caused by others (SYNC vs ASYNC)
Retryable transaction				   (RETRY)
Conflicts with other threads			   (CONFLICT)
Transaction write capacity overflow		   (CAPACITY WRITE)
Transaction read capacity overflow		   (CAPACITY READ)

Transactions implicitely aborted can also return an abort code.
This can be used to signal specific events to the profiler. A common
case is abort on lock busy in a RTM eliding library (code 0xff)
To handle this case we include the TSX abort code

Common example aborts in TSX would be:

- Data conflict with another thread on memory read.
                                      Flags: TRANSACTION|ASYNC|CONFLICT
- executing a WRMSR in a transaction. Flags: TRANSACTION|SYNC
- HLE transaction in user space is too large
                                      Flags: ELISION|SYNC|CAPACITY-WRITE

The only flag that is somewhat TSX specific is ELISION.

This adds the perf core glue needed for reporting the new flag word out.

v2: Add MEM/MISC
v3: Move transaction to the end
v4: Separate capacity-read/write and remove misc
v5: Remove _SAMPLE. Move abort flags to 32bit. Rename
    transaction to txn
Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1379688044-14173-2-git-send-email-andi@firstfloor.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/perf_event.h      |  5 +++++
 include/uapi/linux/perf_event.h | 25 ++++++++++++++++++++++++-
 kernel/events/core.c            |  6 ++++++
 3 files changed, 35 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index c8ba627c1d60..2e069d1288df 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -584,6 +584,10 @@ struct perf_sample_data {
 	struct perf_regs_user		regs_user;
 	u64				stack_user_size;
 	u64				weight;
+	/*
+	 * Transaction flags for abort events:
+	 */
+	u64				txn;
 };
 
 static inline void perf_sample_data_init(struct perf_sample_data *data,
@@ -599,6 +603,7 @@ static inline void perf_sample_data_init(struct perf_sample_data *data,
 	data->stack_user_size = 0;
 	data->weight = 0;
 	data->data_src.val = 0;
+	data->txn = 0;
 }
 
 extern void perf_output_sample(struct perf_output_handle *handle,
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 009a655a5d35..da48837d617d 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -136,8 +136,9 @@ enum perf_event_sample_format {
 	PERF_SAMPLE_WEIGHT			= 1U << 14,
 	PERF_SAMPLE_DATA_SRC			= 1U << 15,
 	PERF_SAMPLE_IDENTIFIER			= 1U << 16,
+	PERF_SAMPLE_TRANSACTION			= 1U << 17,
 
-	PERF_SAMPLE_MAX = 1U << 17,		/* non-ABI */
+	PERF_SAMPLE_MAX = 1U << 18,		/* non-ABI */
 };
 
 /*
@@ -180,6 +181,28 @@ enum perf_sample_regs_abi {
 	PERF_SAMPLE_REGS_ABI_64		= 2,
 };
 
+/*
+ * Values for the memory transaction event qualifier, mostly for
+ * abort events. Multiple bits can be set.
+ */
+enum {
+	PERF_TXN_ELISION        = (1 << 0), /* From elision */
+	PERF_TXN_TRANSACTION    = (1 << 1), /* From transaction */
+	PERF_TXN_SYNC           = (1 << 2), /* Instruction is related */
+	PERF_TXN_ASYNC          = (1 << 3), /* Instruction not related */
+	PERF_TXN_RETRY          = (1 << 4), /* Retry possible */
+	PERF_TXN_CONFLICT       = (1 << 5), /* Conflict abort */
+	PERF_TXN_CAPACITY_WRITE = (1 << 6), /* Capacity write abort */
+	PERF_TXN_CAPACITY_READ  = (1 << 7), /* Capacity read abort */
+
+	PERF_TXN_MAX	        = (1 << 8), /* non-ABI */
+
+	/* bits 32..63 are reserved for the abort code */
+
+	PERF_TXN_ABORT_MASK  = (0xffffffffULL << 32),
+	PERF_TXN_ABORT_SHIFT = 32,
+};
+
 /*
  * The format of the data returned by read() on a perf event fd,
  * as specified by attr.read_format:
diff --git a/kernel/events/core.c b/kernel/events/core.c
index b25d65ce7106..c716385f6483 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1201,6 +1201,9 @@ static void perf_event__header_size(struct perf_event *event)
 	if (sample_type & PERF_SAMPLE_DATA_SRC)
 		size += sizeof(data->data_src.val);
 
+	if (sample_type & PERF_SAMPLE_TRANSACTION)
+		size += sizeof(data->txn);
+
 	event->header_size = size;
 }
 
@@ -4572,6 +4575,9 @@ void perf_output_sample(struct perf_output_handle *handle,
 	if (sample_type & PERF_SAMPLE_DATA_SRC)
 		perf_output_put(handle, data->data_src.val);
 
+	if (sample_type & PERF_SAMPLE_TRANSACTION)
+		perf_output_put(handle, data->txn);
+
 	if (!event->attr.watermark) {
 		int wakeup_events = event->attr.wakeup_events;
 
-- 
cgit v1.2.3


From 2f2a2b60adf368bacd6acd2116c01e32caf936c4 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 2 Oct 2013 11:22:18 +0200
Subject: sched/wait: Make the signal_pending() checks consistent

There's two patterns to check signals in the __wait_event*() macros:

  if (!signal_pending(current)) {
	schedule();
	continue;
  }
  ret = -ERESTARTSYS;
  break;

And the more natural:

  if (signal_pending(current)) {
	ret = -ERESTARTSYS;
	break;
  }
  schedule();

Change them all into the latter form.

Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20131002092527.956416254@infradead.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/tty.h  | 13 ++++++-------
 include/linux/wait.h | 35 ++++++++++++++++-------------------
 2 files changed, 22 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tty.h b/include/linux/tty.h
index 64f864651d86..050372979076 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -686,14 +686,13 @@ do {									\
 		prepare_to_wait(&wq, &__wait, TASK_INTERRUPTIBLE);	\
 		if (condition)						\
 			break;						\
-		if (!signal_pending(current)) {				\
-			tty_unlock(tty);					\
-			schedule();					\
-			tty_lock(tty);					\
-			continue;					\
+		if (signal_pending(current)) {				\
+			ret = -ERESTARTSYS;				\
+			break;						\
 		}							\
-		ret = -ERESTARTSYS;					\
-		break;							\
+		tty_unlock(tty);					\
+		schedule();						\
+		tty_lock(tty);						\
 	}								\
 	finish_wait(&wq, &__wait);					\
 } while (0)
diff --git a/include/linux/wait.h b/include/linux/wait.h
index a67fc1635592..ccf0c529fd37 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -261,12 +261,11 @@ do {									\
 		prepare_to_wait(&wq, &__wait, TASK_INTERRUPTIBLE);	\
 		if (condition)						\
 			break;						\
-		if (!signal_pending(current)) {				\
-			schedule();					\
-			continue;					\
+		if (signal_pending(current)) {				\
+			ret = -ERESTARTSYS;				\
+			break;						\
 		}							\
-		ret = -ERESTARTSYS;					\
-		break;							\
+		schedule();						\
 	}								\
 	finish_wait(&wq, &__wait);					\
 } while (0)
@@ -302,14 +301,13 @@ do {									\
 		prepare_to_wait(&wq, &__wait, TASK_INTERRUPTIBLE);	\
 		if (condition)						\
 			break;						\
-		if (!signal_pending(current)) {				\
-			ret = schedule_timeout(ret);			\
-			if (!ret)					\
-				break;					\
-			continue;					\
+		if (signal_pending(current)) {				\
+			ret = -ERESTARTSYS;				\
+			break;						\
 		}							\
-		ret = -ERESTARTSYS;					\
-		break;							\
+		ret = schedule_timeout(ret);				\
+		if (!ret)						\
+			break;						\
 	}								\
 	if (!ret && (condition))					\
 		ret = 1;						\
@@ -439,14 +437,13 @@ do {									\
 			finish_wait(&wq, &__wait);			\
 			break;						\
 		}							\
-		if (!signal_pending(current)) {				\
-			schedule();					\
-			continue;					\
-		}							\
-		ret = -ERESTARTSYS;					\
-		abort_exclusive_wait(&wq, &__wait, 			\
+		if (signal_pending(current)) {				\
+			ret = -ERESTARTSYS;				\
+			abort_exclusive_wait(&wq, &__wait, 		\
 				TASK_INTERRUPTIBLE, NULL);		\
-		break;							\
+			break;						\
+		}							\
+		schedule();						\
 	}								\
 } while (0)
 
-- 
cgit v1.2.3


From 2953ef246b058989657e1e77b36b67566ac06f7b Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 2 Oct 2013 11:22:19 +0200
Subject: sched/wait: Change timeout logic

Commit 4c663cf ("wait: fix false timeouts when using
wait_event_timeout()") introduced an additional condition check after
a timeout but there's a few issues;

 - it forgot one site
 - it put the check after the main loop; not at the actual timeout
   check.

Cure both; by wrapping the condition (as suggested by Oleg), this
avoids double evaluation of 'condition' which could be quite big.

Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20131002092528.028892896@infradead.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/wait.h | 24 +++++++++++-------------
 1 file changed, 11 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/wait.h b/include/linux/wait.h
index ccf0c529fd37..b2afd665e4ea 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -179,6 +179,14 @@ wait_queue_head_t *bit_waitqueue(void *, int);
 #define wake_up_interruptible_sync_poll(x, m)				\
 	__wake_up_sync_key((x), TASK_INTERRUPTIBLE, 1, (void *) (m))
 
+#define ___wait_cond_timeout(condition, ret)				\
+({									\
+ 	bool __cond = (condition);					\
+ 	if (__cond && !ret)						\
+ 		ret = 1;						\
+ 	__cond || !ret;							\
+})
+
 #define __wait_event(wq, condition) 					\
 do {									\
 	DEFINE_WAIT(__wait);						\
@@ -217,14 +225,10 @@ do {									\
 									\
 	for (;;) {							\
 		prepare_to_wait(&wq, &__wait, TASK_UNINTERRUPTIBLE);	\
-		if (condition)						\
+		if (___wait_cond_timeout(condition, ret))		\
 			break;						\
 		ret = schedule_timeout(ret);				\
-		if (!ret)						\
-			break;						\
 	}								\
-	if (!ret && (condition))					\
-		ret = 1;						\
 	finish_wait(&wq, &__wait);					\
 } while (0)
 
@@ -299,18 +303,14 @@ do {									\
 									\
 	for (;;) {							\
 		prepare_to_wait(&wq, &__wait, TASK_INTERRUPTIBLE);	\
-		if (condition)						\
+		if (___wait_cond_timeout(condition, ret))		\
 			break;						\
 		if (signal_pending(current)) {				\
 			ret = -ERESTARTSYS;				\
 			break;						\
 		}							\
 		ret = schedule_timeout(ret);				\
-		if (!ret)						\
-			break;						\
 	}								\
-	if (!ret && (condition))					\
-		ret = 1;						\
 	finish_wait(&wq, &__wait);					\
 } while (0)
 
@@ -815,7 +815,7 @@ do {									\
 									\
 	for (;;) {							\
 		prepare_to_wait(&wq, &__wait, TASK_INTERRUPTIBLE);	\
-		if (condition)						\
+		if (___wait_cond_timeout(condition, ret))		\
 			break;						\
 		if (signal_pending(current)) {				\
 			ret = -ERESTARTSYS;				\
@@ -824,8 +824,6 @@ do {									\
 		spin_unlock_irq(&lock);					\
 		ret = schedule_timeout(ret);				\
 		spin_lock_irq(&lock);					\
-		if (!ret)						\
-			break;						\
 	}								\
 	finish_wait(&wq, &__wait);					\
 } while (0)
-- 
cgit v1.2.3


From bb632bc44970f75b66df102e831a4fc0692e9159 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 2 Oct 2013 11:22:20 +0200
Subject: sched/wait: Change the wait_exclusive control flow

Purely a preparatory patch; it changes the control flow to match what
will soon be generated by generic code so that that patch can be a
unity transform.

Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20131002092528.107994763@infradead.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/wait.h | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/wait.h b/include/linux/wait.h
index b2afd665e4ea..7d7819dafcc5 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -428,23 +428,24 @@ do {									\
 
 #define __wait_event_interruptible_exclusive(wq, condition, ret)	\
 do {									\
+	__label__ __out;						\
 	DEFINE_WAIT(__wait);						\
 									\
 	for (;;) {							\
 		prepare_to_wait_exclusive(&wq, &__wait,			\
 					TASK_INTERRUPTIBLE);		\
-		if (condition) {					\
-			finish_wait(&wq, &__wait);			\
+		if (condition)						\
 			break;						\
-		}							\
 		if (signal_pending(current)) {				\
 			ret = -ERESTARTSYS;				\
 			abort_exclusive_wait(&wq, &__wait, 		\
 				TASK_INTERRUPTIBLE, NULL);		\
-			break;						\
+			goto __out;					\
 		}							\
 		schedule();						\
 	}								\
+	finish_wait(&wq, &__wait);					\
+__out:	;								\
 } while (0)
 
 #define wait_event_interruptible_exclusive(wq, condition)		\
-- 
cgit v1.2.3


From 41a1431b178c3b731d6dfc40b987528b333dd93e Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 2 Oct 2013 11:22:21 +0200
Subject: sched/wait: Introduce ___wait_event()

There's far too much duplication in the __wait_event macros; in order
to fix this introduce ___wait_event() a macro with the capability to
replace most other macros.

With the previous patches changing the various __wait_event*()
implementations to be more uniform; we can now collapse the lot
without also changing generated code.

Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20131002092528.181897111@infradead.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/wait.h | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/wait.h b/include/linux/wait.h
index 7d7819dafcc5..29d0249e03ab 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -187,6 +187,42 @@ wait_queue_head_t *bit_waitqueue(void *, int);
  	__cond || !ret;							\
 })
 
+#define ___wait_signal_pending(state)					\
+	((state == TASK_INTERRUPTIBLE && signal_pending(current)) ||	\
+	 (state == TASK_KILLABLE && fatal_signal_pending(current)))
+
+#define ___wait_nop_ret		int ret __always_unused
+
+#define ___wait_event(wq, condition, state, exclusive, ret, cmd)	\
+do {									\
+	__label__ __out;						\
+	DEFINE_WAIT(__wait);						\
+									\
+	for (;;) {							\
+		if (exclusive)						\
+			prepare_to_wait_exclusive(&wq, &__wait, state); \
+		else							\
+			prepare_to_wait(&wq, &__wait, state);		\
+									\
+		if (condition)						\
+			break;						\
+									\
+		if (___wait_signal_pending(state)) {			\
+			ret = -ERESTARTSYS;				\
+			if (exclusive) {				\
+				abort_exclusive_wait(&wq, &__wait, 	\
+						     state, NULL); 	\
+				goto __out;				\
+			}						\
+			break;						\
+		}							\
+									\
+		cmd;							\
+	}								\
+	finish_wait(&wq, &__wait);					\
+__out:	;								\
+} while (0)
+
 #define __wait_event(wq, condition) 					\
 do {									\
 	DEFINE_WAIT(__wait);						\
-- 
cgit v1.2.3


From 854267f4384243b19c03a2942e84f06f2beb0952 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 2 Oct 2013 11:22:22 +0200
Subject: sched/wait: Collapse __wait_event()

Reduce macro complexity by using the new ___wait_event() helper.
No change in behaviour, identical generated code.

Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20131002092528.254863348@infradead.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/wait.h | 13 ++-----------
 1 file changed, 2 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/wait.h b/include/linux/wait.h
index 29d0249e03ab..68e3a628e157 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -224,17 +224,8 @@ __out:	;								\
 } while (0)
 
 #define __wait_event(wq, condition) 					\
-do {									\
-	DEFINE_WAIT(__wait);						\
-									\
-	for (;;) {							\
-		prepare_to_wait(&wq, &__wait, TASK_UNINTERRUPTIBLE);	\
-		if (condition)						\
-			break;						\
-		schedule();						\
-	}								\
-	finish_wait(&wq, &__wait);					\
-} while (0)
+	___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0,		\
+		      ___wait_nop_ret, schedule())
 
 /**
  * wait_event - sleep until a condition gets true
-- 
cgit v1.2.3


From ddc1994b8217527e1818f690f17597fc9cedf81b Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 2 Oct 2013 11:22:23 +0200
Subject: sched/wait: Collapse __wait_event_timeout()

Reduce macro complexity by using the new ___wait_event() helper.
No change in behaviour, identical generated code.

Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20131002092528.325264677@infradead.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/wait.h | 14 +++-----------
 1 file changed, 3 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/wait.h b/include/linux/wait.h
index 68e3a628e157..546b94efc82e 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -247,17 +247,9 @@ do {									\
 } while (0)
 
 #define __wait_event_timeout(wq, condition, ret)			\
-do {									\
-	DEFINE_WAIT(__wait);						\
-									\
-	for (;;) {							\
-		prepare_to_wait(&wq, &__wait, TASK_UNINTERRUPTIBLE);	\
-		if (___wait_cond_timeout(condition, ret))		\
-			break;						\
-		ret = schedule_timeout(ret);				\
-	}								\
-	finish_wait(&wq, &__wait);					\
-} while (0)
+	___wait_event(wq, ___wait_cond_timeout(condition, ret), 	\
+		      TASK_UNINTERRUPTIBLE, 0, ret,			\
+		      ret = schedule_timeout(ret))
 
 /**
  * wait_event_timeout - sleep until a condition gets true or a timeout elapses
-- 
cgit v1.2.3


From f13f4c41c9cf9cd61c896e46e4e7ba2687e2af9c Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 2 Oct 2013 11:22:24 +0200
Subject: sched/wait: Collapse __wait_event_interruptible()

Reduce macro complexity by using the new ___wait_event() helper.
No change in behaviour, identical generated code.

Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20131002092528.396949919@infradead.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/wait.h | 17 ++---------------
 1 file changed, 2 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/wait.h b/include/linux/wait.h
index 546b94efc82e..39e4bbd2c735 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -277,21 +277,8 @@ do {									\
 })
 
 #define __wait_event_interruptible(wq, condition, ret)			\
-do {									\
-	DEFINE_WAIT(__wait);						\
-									\
-	for (;;) {							\
-		prepare_to_wait(&wq, &__wait, TASK_INTERRUPTIBLE);	\
-		if (condition)						\
-			break;						\
-		if (signal_pending(current)) {				\
-			ret = -ERESTARTSYS;				\
-			break;						\
-		}							\
-		schedule();						\
-	}								\
-	finish_wait(&wq, &__wait);					\
-} while (0)
+	___wait_event(wq, condition, TASK_INTERRUPTIBLE, 0, ret,	\
+		      schedule())
 
 /**
  * wait_event_interruptible - sleep until a condition gets true
-- 
cgit v1.2.3


From c2ebb1fb4eddf3d1d66fe31d1e89e83ee211b81c Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 2 Oct 2013 11:22:25 +0200
Subject: sched/wait: Collapse __wait_event_interruptible_timeout()

Reduce macro complexity by using the new ___wait_event() helper.
No change in behaviour, identical generated code.

Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20131002092528.469616907@infradead.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/wait.h | 18 +++---------------
 1 file changed, 3 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/wait.h b/include/linux/wait.h
index 39e4bbd2c735..a79fb15c1dd4 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -304,21 +304,9 @@ do {									\
 })
 
 #define __wait_event_interruptible_timeout(wq, condition, ret)		\
-do {									\
-	DEFINE_WAIT(__wait);						\
-									\
-	for (;;) {							\
-		prepare_to_wait(&wq, &__wait, TASK_INTERRUPTIBLE);	\
-		if (___wait_cond_timeout(condition, ret))		\
-			break;						\
-		if (signal_pending(current)) {				\
-			ret = -ERESTARTSYS;				\
-			break;						\
-		}							\
-		ret = schedule_timeout(ret);				\
-	}								\
-	finish_wait(&wq, &__wait);					\
-} while (0)
+	___wait_event(wq, ___wait_cond_timeout(condition, ret),		\
+		      TASK_INTERRUPTIBLE, 0, ret,			\
+		      ret = schedule_timeout(ret))
 
 /**
  * wait_event_interruptible_timeout - sleep until a condition gets true or a timeout elapses
-- 
cgit v1.2.3


From 48c2521717b39cb6904941ec2847d9775669207a Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 2 Oct 2013 11:22:26 +0200
Subject: sched/wait: Collapse __wait_event_interruptible_exclusive()

Reduce macro complexity by using the new ___wait_event() helper.
No change in behaviour, identical generated code.

Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20131002092528.541716442@infradead.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/wait.h | 22 ++--------------------
 1 file changed, 2 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/wait.h b/include/linux/wait.h
index a79fb15c1dd4..c4ab172daac0 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -421,26 +421,8 @@ do {									\
 })
 
 #define __wait_event_interruptible_exclusive(wq, condition, ret)	\
-do {									\
-	__label__ __out;						\
-	DEFINE_WAIT(__wait);						\
-									\
-	for (;;) {							\
-		prepare_to_wait_exclusive(&wq, &__wait,			\
-					TASK_INTERRUPTIBLE);		\
-		if (condition)						\
-			break;						\
-		if (signal_pending(current)) {				\
-			ret = -ERESTARTSYS;				\
-			abort_exclusive_wait(&wq, &__wait, 		\
-				TASK_INTERRUPTIBLE, NULL);		\
-			goto __out;					\
-		}							\
-		schedule();						\
-	}								\
-	finish_wait(&wq, &__wait);					\
-__out:	;								\
-} while (0)
+	___wait_event(wq, condition, TASK_INTERRUPTIBLE, 1, ret,	\
+		      schedule())
 
 #define wait_event_interruptible_exclusive(wq, condition)		\
 ({									\
-- 
cgit v1.2.3


From 13cb5042a4b80396f77cf5d599d2c002c57b89dc Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 2 Oct 2013 11:22:27 +0200
Subject: sched/wait: Collapse __wait_event_lock_irq()

Reduce macro complexity by using the new ___wait_event() helper.
No change in behaviour, identical generated code.

Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20131002092528.612813379@infradead.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/wait.h | 20 ++++++--------------
 1 file changed, 6 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/wait.h b/include/linux/wait.h
index c4ab172daac0..d64918e09e3c 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -624,20 +624,12 @@ do {									\
 
 
 #define __wait_event_lock_irq(wq, condition, lock, cmd)			\
-do {									\
-	DEFINE_WAIT(__wait);						\
-									\
-	for (;;) {							\
-		prepare_to_wait(&wq, &__wait, TASK_UNINTERRUPTIBLE);	\
-		if (condition)						\
-			break;						\
-		spin_unlock_irq(&lock);					\
-		cmd;							\
-		schedule();						\
-		spin_lock_irq(&lock);					\
-	}								\
-	finish_wait(&wq, &__wait);					\
-} while (0)
+	___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0,		\
+		      ___wait_nop_ret,					\
+		      spin_unlock_irq(&lock);				\
+		      cmd;						\
+		      schedule();					\
+		      spin_lock_irq(&lock))
 
 /**
  * wait_event_lock_irq_cmd - sleep until a condition gets true. The
-- 
cgit v1.2.3


From 8fbd88fa1717601ef91ced49a32f24786b167065 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 2 Oct 2013 11:22:28 +0200
Subject: sched/wait: Collapse __wait_event_interruptible_lock_irq()

Reduce macro complexity by using the new ___wait_event() helper.
No change in behaviour, identical generated code.

Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20131002092528.686006009@infradead.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/wait.h | 26 ++++++--------------------
 1 file changed, 6 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/wait.h b/include/linux/wait.h
index d64918e09e3c..a577a85004ae 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -689,26 +689,12 @@ do {									\
 } while (0)
 
 
-#define __wait_event_interruptible_lock_irq(wq, condition,		\
-					    lock, ret, cmd)		\
-do {									\
-	DEFINE_WAIT(__wait);						\
-									\
-	for (;;) {							\
-		prepare_to_wait(&wq, &__wait, TASK_INTERRUPTIBLE);	\
-		if (condition)						\
-			break;						\
-		if (signal_pending(current)) {				\
-			ret = -ERESTARTSYS;				\
-			break;						\
-		}							\
-		spin_unlock_irq(&lock);					\
-		cmd;							\
-		schedule();						\
-		spin_lock_irq(&lock);					\
-	}								\
-	finish_wait(&wq, &__wait);					\
-} while (0)
+#define __wait_event_interruptible_lock_irq(wq, condition, lock, ret, cmd) \
+	___wait_event(wq, condition, TASK_INTERRUPTIBLE, 0, ret,	   \
+		      spin_unlock_irq(&lock);				   \
+		      cmd;						   \
+		      schedule();					   \
+		      spin_lock_irq(&lock))
 
 /**
  * wait_event_interruptible_lock_irq_cmd - sleep until a condition gets true.
-- 
cgit v1.2.3


From a1dc6852ac5eecdcd3122ae01703183a3e88e979 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 2 Oct 2013 11:22:29 +0200
Subject: sched/wait: Collapse __wait_event_interruptible_lock_irq_timeout()

Reduce macro complexity by using the new ___wait_event() helper.
No change in behaviour, identical generated code.

Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20131002092528.759956109@infradead.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/wait.h | 25 ++++++-------------------
 1 file changed, 6 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/wait.h b/include/linux/wait.h
index a577a85004ae..5d5408b08ba5 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -763,25 +763,12 @@ do {									\
 	__ret;								\
 })
 
-#define __wait_event_interruptible_lock_irq_timeout(wq, condition,	\
-						    lock, ret)		\
-do {									\
-	DEFINE_WAIT(__wait);						\
-									\
-	for (;;) {							\
-		prepare_to_wait(&wq, &__wait, TASK_INTERRUPTIBLE);	\
-		if (___wait_cond_timeout(condition, ret))		\
-			break;						\
-		if (signal_pending(current)) {				\
-			ret = -ERESTARTSYS;				\
-			break;						\
-		}							\
-		spin_unlock_irq(&lock);					\
-		ret = schedule_timeout(ret);				\
-		spin_lock_irq(&lock);					\
-	}								\
-	finish_wait(&wq, &__wait);					\
-} while (0)
+#define __wait_event_interruptible_lock_irq_timeout(wq, condition, lock, ret) \
+	___wait_event(wq, ___wait_cond_timeout(condition, ret),		      \
+		      TASK_INTERRUPTIBLE, 0, ret,	      		      \
+		      spin_unlock_irq(&lock);				      \
+		      ret = schedule_timeout(ret);			      \
+		      spin_lock_irq(&lock));
 
 /**
  * wait_event_interruptible_lock_irq_timeout - sleep until a condition gets true or a timeout elapses.
-- 
cgit v1.2.3


From 0d1e1c8a430450a3ce61a842cec64f9e2a9f3b05 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 2 Oct 2013 11:22:30 +0200
Subject: sched/wait: Collapse __wait_event_interruptible_tty()

Reduce macro complexity by using the new ___wait_event() helper.
No change in behaviour, identical generated code.

Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20131002092528.831085521@infradead.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/tty.h | 21 ++++-----------------
 1 file changed, 4 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tty.h b/include/linux/tty.h
index 050372979076..6e803291028f 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -679,23 +679,10 @@ static inline void tty_wait_until_sent_from_close(struct tty_struct *tty,
 })
 
 #define __wait_event_interruptible_tty(tty, wq, condition, ret)		\
-do {									\
-	DEFINE_WAIT(__wait);						\
-									\
-	for (;;) {							\
-		prepare_to_wait(&wq, &__wait, TASK_INTERRUPTIBLE);	\
-		if (condition)						\
-			break;						\
-		if (signal_pending(current)) {				\
-			ret = -ERESTARTSYS;				\
-			break;						\
-		}							\
-		tty_unlock(tty);					\
-		schedule();						\
-		tty_lock(tty);						\
-	}								\
-	finish_wait(&wq, &__wait);					\
-} while (0)
+	___wait_event(wq, condition, TASK_INTERRUPTIBLE, 0, ret,	\
+			tty_unlock(tty);				\
+			schedule();					\
+			tty_lock(tty))
 
 #ifdef CONFIG_PROC_FS
 extern void proc_tty_register_driver(struct tty_driver *);
-- 
cgit v1.2.3


From cf7361fd961b6f0510572af6cf8ca3ffba07018b Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 2 Oct 2013 11:22:31 +0200
Subject: sched/wait: Collapse __wait_event_killable()

Reduce macro complexity by using the new ___wait_event() helper.
No change in behaviour, identical generated code.

Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20131002092528.898691966@infradead.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/wait.h | 17 +----------------
 1 file changed, 1 insertion(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/wait.h b/include/linux/wait.h
index 5d5408b08ba5..ec3683ee0fc2 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -582,22 +582,7 @@ do {									\
 
 
 #define __wait_event_killable(wq, condition, ret)			\
-do {									\
-	DEFINE_WAIT(__wait);						\
-									\
-	for (;;) {							\
-		prepare_to_wait(&wq, &__wait, TASK_KILLABLE);		\
-		if (condition)						\
-			break;						\
-		if (!fatal_signal_pending(current)) {			\
-			schedule();					\
-			continue;					\
-		}							\
-		ret = -ERESTARTSYS;					\
-		break;							\
-	}								\
-	finish_wait(&wq, &__wait);					\
-} while (0)
+	___wait_event(wq, condition, TASK_KILLABLE, 0, ret, schedule())
 
 /**
  * wait_event_killable - sleep until a condition gets true
-- 
cgit v1.2.3


From ebdc195f2ec68576876216081035293e37318e86 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 2 Oct 2013 11:22:32 +0200
Subject: sched/wait: Collapse __wait_event_hrtimeout()

While not a whole-sale replacement like the others we can still reduce
the size of __wait_event_hrtimeout() considerably by noting that the
actual core of __wait_event_hrtimeout() is identical to what
___wait_event() generates.

Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20131002092528.972793648@infradead.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/wait.h | 15 ++-------------
 1 file changed, 2 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/wait.h b/include/linux/wait.h
index ec3683ee0fc2..c065e8af9749 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -337,7 +337,6 @@ do {									\
 #define __wait_event_hrtimeout(wq, condition, timeout, state)		\
 ({									\
 	int __ret = 0;							\
-	DEFINE_WAIT(__wait);						\
 	struct hrtimer_sleeper __t;					\
 									\
 	hrtimer_init_on_stack(&__t.timer, CLOCK_MONOTONIC,		\
@@ -348,25 +347,15 @@ do {									\
 				       current->timer_slack_ns,		\
 				       HRTIMER_MODE_REL);		\
 									\
-	for (;;) {							\
-		prepare_to_wait(&wq, &__wait, state);			\
-		if (condition)						\
-			break;						\
-		if (state == TASK_INTERRUPTIBLE &&			\
-		    signal_pending(current)) {				\
-			__ret = -ERESTARTSYS;				\
-			break;						\
-		}							\
+	___wait_event(wq, condition, state, 0, __ret,			\
 		if (!__t.task) {					\
 			__ret = -ETIME;					\
 			break;						\
 		}							\
-		schedule();						\
-	}								\
+		schedule());						\
 									\
 	hrtimer_cancel(&__t.timer);					\
 	destroy_hrtimer_on_stack(&__t.timer);				\
-	finish_wait(&wq, &__wait);					\
 	__ret;								\
 })
 
-- 
cgit v1.2.3


From 35a2af94c7ce7130ca292c68b1d27fcfdb648f6b Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 2 Oct 2013 11:22:33 +0200
Subject: sched/wait: Make the __wait_event*() interface more friendly

Change all __wait_event*() implementations to match the corresponding
wait_event*() signature for convenience.

In particular this does away with the weird 'ret' logic. Since there
are __wait_event*() users this requires we update them too.

Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20131002092529.042563462@infradead.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/mips/kernel/rtlx.c         |  19 ++++---
 include/linux/tty.h             |  10 ++--
 include/linux/wait.h            | 113 +++++++++++++++++++---------------------
 net/irda/af_irda.c              |   5 +-
 net/netfilter/ipvs/ip_vs_sync.c |   7 +--
 5 files changed, 73 insertions(+), 81 deletions(-)

(limited to 'include/linux')

diff --git a/arch/mips/kernel/rtlx.c b/arch/mips/kernel/rtlx.c
index d763f11e35e2..2c12ea1668d1 100644
--- a/arch/mips/kernel/rtlx.c
+++ b/arch/mips/kernel/rtlx.c
@@ -172,8 +172,9 @@ int rtlx_open(int index, int can_sleep)
 	if (rtlx == NULL) {
 		if( (p = vpe_get_shared(tclimit)) == NULL) {
 		    if (can_sleep) {
-			__wait_event_interruptible(channel_wqs[index].lx_queue,
-				(p = vpe_get_shared(tclimit)), ret);
+			ret = __wait_event_interruptible(
+					channel_wqs[index].lx_queue,
+					(p = vpe_get_shared(tclimit)));
 			if (ret)
 				goto out_fail;
 		    } else {
@@ -263,11 +264,10 @@ unsigned int rtlx_read_poll(int index, int can_sleep)
 	/* data available to read? */
 	if (chan->lx_read == chan->lx_write) {
 		if (can_sleep) {
-			int ret = 0;
-
-			__wait_event_interruptible(channel_wqs[index].lx_queue,
+			int ret = __wait_event_interruptible(
+				channel_wqs[index].lx_queue,
 				(chan->lx_read != chan->lx_write) ||
-				sp_stopping, ret);
+				sp_stopping);
 			if (ret)
 				return ret;
 
@@ -440,14 +440,13 @@ static ssize_t file_write(struct file *file, const char __user * buffer,
 
 	/* any space left... */
 	if (!rtlx_write_poll(minor)) {
-		int ret = 0;
+		int ret;
 
 		if (file->f_flags & O_NONBLOCK)
 			return -EAGAIN;
 
-		__wait_event_interruptible(channel_wqs[minor].rt_queue,
-					   rtlx_write_poll(minor),
-					   ret);
+		ret = __wait_event_interruptible(channel_wqs[minor].rt_queue,
+					   rtlx_write_poll(minor));
 		if (ret)
 			return ret;
 	}
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 6e803291028f..633cac77f9f9 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -672,14 +672,14 @@ static inline void tty_wait_until_sent_from_close(struct tty_struct *tty,
 #define wait_event_interruptible_tty(tty, wq, condition)		\
 ({									\
 	int __ret = 0;							\
-	if (!(condition)) {						\
-		__wait_event_interruptible_tty(tty, wq, condition, __ret);	\
-	}								\
+	if (!(condition))						\
+		__ret = __wait_event_interruptible_tty(tty, wq,		\
+						       condition);	\
 	__ret;								\
 })
 
-#define __wait_event_interruptible_tty(tty, wq, condition, ret)		\
-	___wait_event(wq, condition, TASK_INTERRUPTIBLE, 0, ret,	\
+#define __wait_event_interruptible_tty(tty, wq, condition)		\
+	___wait_event(wq, condition, TASK_INTERRUPTIBLE, 0, 0,		\
 			tty_unlock(tty);				\
 			schedule();					\
 			tty_lock(tty))
diff --git a/include/linux/wait.h b/include/linux/wait.h
index c065e8af9749..bd4bd7b479b6 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -179,24 +179,23 @@ wait_queue_head_t *bit_waitqueue(void *, int);
 #define wake_up_interruptible_sync_poll(x, m)				\
 	__wake_up_sync_key((x), TASK_INTERRUPTIBLE, 1, (void *) (m))
 
-#define ___wait_cond_timeout(condition, ret)				\
+#define ___wait_cond_timeout(condition)					\
 ({									\
  	bool __cond = (condition);					\
- 	if (__cond && !ret)						\
- 		ret = 1;						\
- 	__cond || !ret;							\
+ 	if (__cond && !__ret)						\
+ 		__ret = 1;						\
+ 	__cond || !__ret;						\
 })
 
 #define ___wait_signal_pending(state)					\
 	((state == TASK_INTERRUPTIBLE && signal_pending(current)) ||	\
 	 (state == TASK_KILLABLE && fatal_signal_pending(current)))
 
-#define ___wait_nop_ret		int ret __always_unused
-
 #define ___wait_event(wq, condition, state, exclusive, ret, cmd)	\
-do {									\
+({									\
 	__label__ __out;						\
 	DEFINE_WAIT(__wait);						\
+	long __ret = ret;						\
 									\
 	for (;;) {							\
 		if (exclusive)						\
@@ -208,7 +207,7 @@ do {									\
 			break;						\
 									\
 		if (___wait_signal_pending(state)) {			\
-			ret = -ERESTARTSYS;				\
+			__ret = -ERESTARTSYS;				\
 			if (exclusive) {				\
 				abort_exclusive_wait(&wq, &__wait, 	\
 						     state, NULL); 	\
@@ -220,12 +219,12 @@ do {									\
 		cmd;							\
 	}								\
 	finish_wait(&wq, &__wait);					\
-__out:	;								\
-} while (0)
+__out:	__ret;								\
+})
 
 #define __wait_event(wq, condition) 					\
-	___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0,		\
-		      ___wait_nop_ret, schedule())
+	(void)___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, 0,	\
+			    schedule())
 
 /**
  * wait_event - sleep until a condition gets true
@@ -246,10 +245,10 @@ do {									\
 	__wait_event(wq, condition);					\
 } while (0)
 
-#define __wait_event_timeout(wq, condition, ret)			\
-	___wait_event(wq, ___wait_cond_timeout(condition, ret), 	\
-		      TASK_UNINTERRUPTIBLE, 0, ret,			\
-		      ret = schedule_timeout(ret))
+#define __wait_event_timeout(wq, condition, timeout)			\
+	___wait_event(wq, ___wait_cond_timeout(condition),		\
+		      TASK_UNINTERRUPTIBLE, 0, timeout,			\
+		      __ret = schedule_timeout(__ret))
 
 /**
  * wait_event_timeout - sleep until a condition gets true or a timeout elapses
@@ -272,12 +271,12 @@ do {									\
 ({									\
 	long __ret = timeout;						\
 	if (!(condition)) 						\
-		__wait_event_timeout(wq, condition, __ret);		\
+		__ret = __wait_event_timeout(wq, condition, timeout);	\
 	__ret;								\
 })
 
-#define __wait_event_interruptible(wq, condition, ret)			\
-	___wait_event(wq, condition, TASK_INTERRUPTIBLE, 0, ret,	\
+#define __wait_event_interruptible(wq, condition)			\
+	___wait_event(wq, condition, TASK_INTERRUPTIBLE, 0, 0,		\
 		      schedule())
 
 /**
@@ -299,14 +298,14 @@ do {									\
 ({									\
 	int __ret = 0;							\
 	if (!(condition))						\
-		__wait_event_interruptible(wq, condition, __ret);	\
+		__ret = __wait_event_interruptible(wq, condition);	\
 	__ret;								\
 })
 
-#define __wait_event_interruptible_timeout(wq, condition, ret)		\
-	___wait_event(wq, ___wait_cond_timeout(condition, ret),		\
-		      TASK_INTERRUPTIBLE, 0, ret,			\
-		      ret = schedule_timeout(ret))
+#define __wait_event_interruptible_timeout(wq, condition, timeout)	\
+	___wait_event(wq, ___wait_cond_timeout(condition),		\
+		      TASK_INTERRUPTIBLE, 0, timeout,			\
+		      __ret = schedule_timeout(__ret))
 
 /**
  * wait_event_interruptible_timeout - sleep until a condition gets true or a timeout elapses
@@ -330,7 +329,8 @@ do {									\
 ({									\
 	long __ret = timeout;						\
 	if (!(condition))						\
-		__wait_event_interruptible_timeout(wq, condition, __ret); \
+		__ret = __wait_event_interruptible_timeout(wq, 		\
+						condition, timeout);	\
 	__ret;								\
 })
 
@@ -347,7 +347,7 @@ do {									\
 				       current->timer_slack_ns,		\
 				       HRTIMER_MODE_REL);		\
 									\
-	___wait_event(wq, condition, state, 0, __ret,			\
+	__ret = ___wait_event(wq, condition, state, 0, 0,		\
 		if (!__t.task) {					\
 			__ret = -ETIME;					\
 			break;						\
@@ -409,15 +409,15 @@ do {									\
 	__ret;								\
 })
 
-#define __wait_event_interruptible_exclusive(wq, condition, ret)	\
-	___wait_event(wq, condition, TASK_INTERRUPTIBLE, 1, ret,	\
+#define __wait_event_interruptible_exclusive(wq, condition)		\
+	___wait_event(wq, condition, TASK_INTERRUPTIBLE, 1, 0,		\
 		      schedule())
 
 #define wait_event_interruptible_exclusive(wq, condition)		\
 ({									\
 	int __ret = 0;							\
 	if (!(condition))						\
-		__wait_event_interruptible_exclusive(wq, condition, __ret);\
+		__ret = __wait_event_interruptible_exclusive(wq, condition);\
 	__ret;								\
 })
 
@@ -570,8 +570,8 @@ do {									\
 
 
-#define __wait_event_killable(wq, condition, ret)			\
-	___wait_event(wq, condition, TASK_KILLABLE, 0, ret, schedule())
+#define __wait_event_killable(wq, condition)				\
+	___wait_event(wq, condition, TASK_KILLABLE, 0, 0, schedule())
 
 /**
  * wait_event_killable - sleep until a condition gets true
@@ -592,18 +592,17 @@ do {									\
 ({									\
 	int __ret = 0;							\
 	if (!(condition))						\
-		__wait_event_killable(wq, condition, __ret);		\
+		__ret = __wait_event_killable(wq, condition);		\
 	__ret;								\
 })
 
 
 #define __wait_event_lock_irq(wq, condition, lock, cmd)			\
-	___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0,		\
-		      ___wait_nop_ret,					\
-		      spin_unlock_irq(&lock);				\
-		      cmd;						\
-		      schedule();					\
-		      spin_lock_irq(&lock))
+	(void)___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, 0,	\
+			    spin_unlock_irq(&lock);			\
+			    cmd;					\
+			    schedule();					\
+			    spin_lock_irq(&lock))
 
 /**
  * wait_event_lock_irq_cmd - sleep until a condition gets true. The
@@ -663,11 +662,11 @@ do {									\
 } while (0)
 
 
-#define __wait_event_interruptible_lock_irq(wq, condition, lock, ret, cmd) \
-	___wait_event(wq, condition, TASK_INTERRUPTIBLE, 0, ret,	   \
-		      spin_unlock_irq(&lock);				   \
-		      cmd;						   \
-		      schedule();					   \
+#define __wait_event_interruptible_lock_irq(wq, condition, lock, cmd)	\
+	___wait_event(wq, condition, TASK_INTERRUPTIBLE, 0, 0,	   	\
+		      spin_unlock_irq(&lock);				\
+		      cmd;						\
+		      schedule();					\
 		      spin_lock_irq(&lock))
 
 /**
@@ -698,10 +697,9 @@ do {									\
 #define wait_event_interruptible_lock_irq_cmd(wq, condition, lock, cmd)	\
 ({									\
 	int __ret = 0;							\
-									\
 	if (!(condition))						\
-		__wait_event_interruptible_lock_irq(wq, condition,	\
-						    lock, __ret, cmd);	\
+		__ret = __wait_event_interruptible_lock_irq(wq, 	\
+						condition, lock, cmd);	\
 	__ret;								\
 })
 
@@ -730,18 +728,18 @@ do {									\
 #define wait_event_interruptible_lock_irq(wq, condition, lock)		\
 ({									\
 	int __ret = 0;							\
-									\
 	if (!(condition))						\
-		__wait_event_interruptible_lock_irq(wq, condition,	\
-						    lock, __ret, );	\
+		__ret = __wait_event_interruptible_lock_irq(wq,		\
+						condition, lock,)	\
 	__ret;								\
 })
 
-#define __wait_event_interruptible_lock_irq_timeout(wq, condition, lock, ret) \
-	___wait_event(wq, ___wait_cond_timeout(condition, ret),		      \
-		      TASK_INTERRUPTIBLE, 0, ret,	      		      \
-		      spin_unlock_irq(&lock);				      \
-		      ret = schedule_timeout(ret);			      \
+#define __wait_event_interruptible_lock_irq_timeout(wq, condition, 	\
+						    lock, timeout) 	\
+	___wait_event(wq, ___wait_cond_timeout(condition),		\
+		      TASK_INTERRUPTIBLE, 0, ret,	      		\
+		      spin_unlock_irq(&lock);				\
+		      __ret = schedule_timeout(__ret);			\
 		      spin_lock_irq(&lock));
 
 /**
@@ -771,11 +769,10 @@ do {									\
 #define wait_event_interruptible_lock_irq_timeout(wq, condition, lock,	\
 						  timeout)		\
 ({									\
-	int __ret = timeout;						\
-									\
+	long __ret = timeout;						\
 	if (!(condition))						\
-		__wait_event_interruptible_lock_irq_timeout(		\
-					wq, condition, lock, __ret);	\
+		__ret = __wait_event_interruptible_lock_irq_timeout(	\
+					wq, condition, lock, timeout);	\
 	__ret;								\
 })
 
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index 0578d4fa00a9..0f676908d15b 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -2563,9 +2563,8 @@ bed:
 				  jiffies + msecs_to_jiffies(val));
 
 			/* Wait for IR-LMP to call us back */
-			__wait_event_interruptible(self->query_wait,
-			      (self->cachedaddr != 0 || self->errno == -ETIME),
-						   err);
+			err = __wait_event_interruptible(self->query_wait,
+			      (self->cachedaddr != 0 || self->errno == -ETIME));
 
 			/* If watchdog is still activated, kill it! */
 			del_timer(&(self->watchdog));
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index f4484719f3e6..f63c2388f38d 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -1637,12 +1637,9 @@ static int sync_thread_master(void *data)
 			continue;
 		}
 		while (ip_vs_send_sync_msg(tinfo->sock, sb->mesg) < 0) {
-			int ret = 0;
-
-			__wait_event_interruptible(*sk_sleep(sk),
+			int ret = __wait_event_interruptible(*sk_sleep(sk),
 						   sock_writeable(sk) ||
-						   kthread_should_stop(),
-						   ret);
+						   kthread_should_stop());
 			if (unlikely(kthread_should_stop()))
 				goto done;
 		}
-- 
cgit v1.2.3


From fb869b6e91a3ac235f237f73305ecf34cdc4969b Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 4 Oct 2013 10:24:49 +0200
Subject: sched/wait: Clean up wait.h details a bit

Since we are changing wait.h profoundly, use the opportunity to:

 - add a sentence to explain what this file is about
 - remove whitespace noise
 - prettify weird looking line break fixup attempts
 - standardize type definition and initialization sequences
 - use consistent style details

No code is changed.

Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Link: http://lkml.kernel.org/n/tip-O8dIie5swnctqpupakatvqyq@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/wait.h | 124 +++++++++++++++++++++++++--------------------------
 1 file changed, 60 insertions(+), 64 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/wait.h b/include/linux/wait.h
index bd4bd7b479b6..a2726c7dd244 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -1,7 +1,8 @@
 #ifndef _LINUX_WAIT_H
 #define _LINUX_WAIT_H
-
-
+/*
+ * Linux wait queue related types and methods
+ */
 #include <linux/list.h>
 #include <linux/stddef.h>
 #include <linux/spinlock.h>
@@ -13,27 +14,27 @@ typedef int (*wait_queue_func_t)(wait_queue_t *wait, unsigned mode, int flags, v
 int default_wake_function(wait_queue_t *wait, unsigned mode, int flags, void *key);
 
 struct __wait_queue {
-	unsigned int flags;
+	unsigned int		flags;
 #define WQ_FLAG_EXCLUSIVE	0x01
-	void *private;
-	wait_queue_func_t func;
-	struct list_head task_list;
+	void			*private;
+	wait_queue_func_t	func;
+	struct list_head	task_list;
 };
 
 struct wait_bit_key {
-	void *flags;
-	int bit_nr;
-#define WAIT_ATOMIC_T_BIT_NR -1
+	void			*flags;
+	int			bit_nr;
+#define WAIT_ATOMIC_T_BIT_NR	-1
 };
 
 struct wait_bit_queue {
-	struct wait_bit_key key;
-	wait_queue_t wait;
+	struct wait_bit_key	key;
+	wait_queue_t		wait;
 };
 
 struct __wait_queue_head {
-	spinlock_t lock;
-	struct list_head task_list;
+	spinlock_t		lock;
+	struct list_head	task_list;
 };
 typedef struct __wait_queue_head wait_queue_head_t;
 
@@ -84,17 +85,17 @@ extern void __init_waitqueue_head(wait_queue_head_t *q, const char *name, struct
 
 static inline void init_waitqueue_entry(wait_queue_t *q, struct task_struct *p)
 {
-	q->flags = 0;
-	q->private = p;
-	q->func = default_wake_function;
+	q->flags	= 0;
+	q->private	= p;
+	q->func		= default_wake_function;
 }
 
-static inline void init_waitqueue_func_entry(wait_queue_t *q,
-					wait_queue_func_t func)
+static inline void
+init_waitqueue_func_entry(wait_queue_t *q, wait_queue_func_t func)
 {
-	q->flags = 0;
-	q->private = NULL;
-	q->func = func;
+	q->flags	= 0;
+	q->private	= NULL;
+	q->func		= func;
 }
 
 static inline int waitqueue_active(wait_queue_head_t *q)
@@ -114,8 +115,8 @@ static inline void __add_wait_queue(wait_queue_head_t *head, wait_queue_t *new)
 /*
  * Used for wake-one threads:
  */
-static inline void __add_wait_queue_exclusive(wait_queue_head_t *q,
-					      wait_queue_t *wait)
+static inline void
+__add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t *wait)
 {
 	wait->flags |= WQ_FLAG_EXCLUSIVE;
 	__add_wait_queue(q, wait);
@@ -127,23 +128,22 @@ static inline void __add_wait_queue_tail(wait_queue_head_t *head,
 	list_add_tail(&new->task_list, &head->task_list);
 }
 
-static inline void __add_wait_queue_tail_exclusive(wait_queue_head_t *q,
-					      wait_queue_t *wait)
+static inline void
+__add_wait_queue_tail_exclusive(wait_queue_head_t *q, wait_queue_t *wait)
 {
 	wait->flags |= WQ_FLAG_EXCLUSIVE;
 	__add_wait_queue_tail(q, wait);
 }
 
-static inline void __remove_wait_queue(wait_queue_head_t *head,
-							wait_queue_t *old)
+static inline void
+__remove_wait_queue(wait_queue_head_t *head, wait_queue_t *old)
 {
 	list_del(&old->task_list);
 }
 
 void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr, void *key);
 void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key);
-void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, int nr,
-			void *key);
+void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, int nr, void *key);
 void __wake_up_locked(wait_queue_head_t *q, unsigned int mode, int nr);
 void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr);
 void __wake_up_bit(wait_queue_head_t *, void *, int);
@@ -170,21 +170,21 @@ wait_queue_head_t *bit_waitqueue(void *, int);
 /*
  * Wakeup macros to be used to report events to the targets.
  */
-#define wake_up_poll(x, m)				\
+#define wake_up_poll(x, m)						\
 	__wake_up(x, TASK_NORMAL, 1, (void *) (m))
-#define wake_up_locked_poll(x, m)				\
+#define wake_up_locked_poll(x, m)					\
 	__wake_up_locked_key((x), TASK_NORMAL, (void *) (m))
-#define wake_up_interruptible_poll(x, m)			\
+#define wake_up_interruptible_poll(x, m)				\
 	__wake_up(x, TASK_INTERRUPTIBLE, 1, (void *) (m))
 #define wake_up_interruptible_sync_poll(x, m)				\
 	__wake_up_sync_key((x), TASK_INTERRUPTIBLE, 1, (void *) (m))
 
 #define ___wait_cond_timeout(condition)					\
 ({									\
- 	bool __cond = (condition);					\
- 	if (__cond && !__ret)						\
- 		__ret = 1;						\
- 	__cond || !__ret;						\
+	bool __cond = (condition);					\
+	if (__cond && !__ret)						\
+		__ret = 1;						\
+	__cond || !__ret;						\
 })
 
 #define ___wait_signal_pending(state)					\
@@ -209,8 +209,8 @@ wait_queue_head_t *bit_waitqueue(void *, int);
 		if (___wait_signal_pending(state)) {			\
 			__ret = -ERESTARTSYS;				\
 			if (exclusive) {				\
-				abort_exclusive_wait(&wq, &__wait, 	\
-						     state, NULL); 	\
+				abort_exclusive_wait(&wq, &__wait,	\
+						     state, NULL);	\
 				goto __out;				\
 			}						\
 			break;						\
@@ -222,7 +222,7 @@ wait_queue_head_t *bit_waitqueue(void *, int);
 __out:	__ret;								\
 })
 
-#define __wait_event(wq, condition) 					\
+#define __wait_event(wq, condition)					\
 	(void)___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, 0,	\
 			    schedule())
 
@@ -238,9 +238,9 @@ __out:	__ret;								\
  * wake_up() has to be called after changing any variable that could
  * change the result of the wait condition.
  */
-#define wait_event(wq, condition) 					\
+#define wait_event(wq, condition)					\
 do {									\
-	if (condition)	 						\
+	if (condition)							\
 		break;							\
 	__wait_event(wq, condition);					\
 } while (0)
@@ -270,7 +270,7 @@ do {									\
 #define wait_event_timeout(wq, condition, timeout)			\
 ({									\
 	long __ret = timeout;						\
-	if (!(condition)) 						\
+	if (!(condition))						\
 		__ret = __wait_event_timeout(wq, condition, timeout);	\
 	__ret;								\
 })
@@ -329,7 +329,7 @@ do {									\
 ({									\
 	long __ret = timeout;						\
 	if (!(condition))						\
-		__ret = __wait_event_interruptible_timeout(wq, 		\
+		__ret = __wait_event_interruptible_timeout(wq,		\
 						condition, timeout);	\
 	__ret;								\
 })
@@ -569,7 +569,6 @@ do {									\
 	 ? 0 : __wait_event_interruptible_locked(wq, condition, 1, 1))
 
 
-
 #define __wait_event_killable(wq, condition)				\
 	___wait_event(wq, condition, TASK_KILLABLE, 0, 0, schedule())
 
@@ -663,7 +662,7 @@ do {									\
 
 
 #define __wait_event_interruptible_lock_irq(wq, condition, lock, cmd)	\
-	___wait_event(wq, condition, TASK_INTERRUPTIBLE, 0, 0,	   	\
+	___wait_event(wq, condition, TASK_INTERRUPTIBLE, 0, 0,		\
 		      spin_unlock_irq(&lock);				\
 		      cmd;						\
 		      schedule();					\
@@ -698,7 +697,7 @@ do {									\
 ({									\
 	int __ret = 0;							\
 	if (!(condition))						\
-		__ret = __wait_event_interruptible_lock_irq(wq, 	\
+		__ret = __wait_event_interruptible_lock_irq(wq,		\
 						condition, lock, cmd);	\
 	__ret;								\
 })
@@ -734,18 +733,18 @@ do {									\
 	__ret;								\
 })
 
-#define __wait_event_interruptible_lock_irq_timeout(wq, condition, 	\
-						    lock, timeout) 	\
+#define __wait_event_interruptible_lock_irq_timeout(wq, condition,	\
+						    lock, timeout)	\
 	___wait_event(wq, ___wait_cond_timeout(condition),		\
-		      TASK_INTERRUPTIBLE, 0, ret,	      		\
+		      TASK_INTERRUPTIBLE, 0, ret,			\
 		      spin_unlock_irq(&lock);				\
 		      __ret = schedule_timeout(__ret);			\
 		      spin_lock_irq(&lock));
 
 /**
- * wait_event_interruptible_lock_irq_timeout - sleep until a condition gets true or a timeout elapses.
- *		The condition is checked under the lock. This is expected
- *		to be called with the lock taken.
+ * wait_event_interruptible_lock_irq_timeout - sleep until a condition gets
+ *		true or a timeout elapses. The condition is checked under
+ *		the lock. This is expected to be called with the lock taken.
  * @wq: the waitqueue to wait on
  * @condition: a C expression for the event to wait for
  * @lock: a locked spinlock_t, which will be released before schedule()
@@ -783,11 +782,9 @@ do {									\
  * We plan to remove these interfaces.
  */
 extern void sleep_on(wait_queue_head_t *q);
-extern long sleep_on_timeout(wait_queue_head_t *q,
-				      signed long timeout);
+extern long sleep_on_timeout(wait_queue_head_t *q, signed long timeout);
 extern void interruptible_sleep_on(wait_queue_head_t *q);
-extern long interruptible_sleep_on_timeout(wait_queue_head_t *q,
-					   signed long timeout);
+extern long interruptible_sleep_on_timeout(wait_queue_head_t *q, signed long timeout);
 
 /*
  * Waitqueues which are removed from the waitqueue_head at wakeup time
@@ -795,8 +792,7 @@ extern long interruptible_sleep_on_timeout(wait_queue_head_t *q,
 void prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state);
 void prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state);
 void finish_wait(wait_queue_head_t *q, wait_queue_t *wait);
-void abort_exclusive_wait(wait_queue_head_t *q, wait_queue_t *wait,
-			unsigned int mode, void *key);
+void abort_exclusive_wait(wait_queue_head_t *q, wait_queue_t *wait, unsigned int mode, void *key);
 int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
 int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
 
@@ -842,8 +838,8 @@ int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
  * One uses wait_on_bit() where one is waiting for the bit to clear,
  * but has no intention of setting it.
  */
-static inline int wait_on_bit(void *word, int bit,
-				int (*action)(void *), unsigned mode)
+static inline int
+wait_on_bit(void *word, int bit, int (*action)(void *), unsigned mode)
 {
 	if (!test_bit(bit, word))
 		return 0;
@@ -866,8 +862,8 @@ static inline int wait_on_bit(void *word, int bit,
  * One uses wait_on_bit_lock() where one is waiting for the bit to
  * clear with the intention of setting it, and when done, clearing it.
  */
-static inline int wait_on_bit_lock(void *word, int bit,
-				int (*action)(void *), unsigned mode)
+static inline int
+wait_on_bit_lock(void *word, int bit, int (*action)(void *), unsigned mode)
 {
 	if (!test_and_set_bit(bit, word))
 		return 0;
@@ -891,5 +887,5 @@ int wait_on_atomic_t(atomic_t *val, int (*action)(atomic_t *), unsigned mode)
 		return 0;
 	return out_of_line_wait_on_atomic_t(val, action, mode);
 }
-	
-#endif
+
+#endif /* _LINUX_WAIT_H */
-- 
cgit v1.2.3


From 8e933359ee2c3a861d5022b83110ce88ba3a2dda Mon Sep 17 00:00:00 2001
From: Roger Quadros <rogerq@ti.com>
Date: Tue, 24 Sep 2013 11:53:47 +0300
Subject: usb: phy: generic: Add gpio_reset to platform data

The GPIO number of the RESET line can be passed to the
driver using the gpio_reset member.

Signed-off-by: Roger Quadros <rogerq@ti.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 include/linux/usb/usb_phy_gen_xceiv.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/usb/usb_phy_gen_xceiv.h b/include/linux/usb/usb_phy_gen_xceiv.h
index f9a7e7bc925b..42f3b71a9775 100644
--- a/include/linux/usb/usb_phy_gen_xceiv.h
+++ b/include/linux/usb/usb_phy_gen_xceiv.h
@@ -9,7 +9,8 @@ struct usb_phy_gen_xceiv_platform_data {
 
 	/* if set fails with -EPROBE_DEFER if can't get regulator */
 	unsigned int needs_vcc:1;
-	unsigned int needs_reset:1;
+	unsigned int needs_reset:1;	/* deprecated */
+	int gpio_reset;
 };
 
 #if IS_ENABLED(CONFIG_NOP_USB_XCEIV)
-- 
cgit v1.2.3


From e4a49a6015efa6bd35f107640a497380d5e4ed48 Mon Sep 17 00:00:00 2001
From: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Date: Fri, 4 Oct 2013 13:35:35 +0300
Subject: usb: remove intel_mid_otg.h

It's not used anymore.

Signed-off-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/usb/intel_mid_otg.h | 180 --------------------------------------
 1 file changed, 180 deletions(-)
 delete mode 100644 include/linux/usb/intel_mid_otg.h

(limited to 'include/linux')

diff --git a/include/linux/usb/intel_mid_otg.h b/include/linux/usb/intel_mid_otg.h
deleted file mode 100644
index 756cf5543ffd..000000000000
--- a/include/linux/usb/intel_mid_otg.h
+++ /dev/null
@@ -1,180 +0,0 @@
-/*
- * Intel MID (Langwell/Penwell) USB OTG Transceiver driver
- * Copyright (C) 2008 - 2010, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
- *
- */
-
-#ifndef __INTEL_MID_OTG_H
-#define __INTEL_MID_OTG_H
-
-#include <linux/pm.h>
-#include <linux/usb/otg.h>
-#include <linux/notifier.h>
-
-struct intel_mid_otg_xceiv;
-
-/* This is a common data structure for Intel MID platform to
- * save values of the OTG state machine */
-struct otg_hsm {
-	/* Input */
-	int a_bus_resume;
-	int a_bus_suspend;
-	int a_conn;
-	int a_sess_vld;
-	int a_srp_det;
-	int a_vbus_vld;
-	int b_bus_resume;
-	int b_bus_suspend;
-	int b_conn;
-	int b_se0_srp;
-	int b_ssend_srp;
-	int b_sess_end;
-	int b_sess_vld;
-	int id;
-/* id values */
-#define ID_B		0x05
-#define ID_A		0x04
-#define ID_ACA_C	0x03
-#define ID_ACA_B	0x02
-#define ID_ACA_A	0x01
-	int power_up;
-	int adp_change;
-	int test_device;
-
-	/* Internal variables */
-	int a_set_b_hnp_en;
-	int b_srp_done;
-	int b_hnp_enable;
-	int hnp_poll_enable;
-
-	/* Timeout indicator for timers */
-	int a_wait_vrise_tmout;
-	int a_wait_bcon_tmout;
-	int a_aidl_bdis_tmout;
-	int a_bidl_adis_tmout;
-	int a_bidl_adis_tmr;
-	int a_wait_vfall_tmout;
-	int b_ase0_brst_tmout;
-	int b_bus_suspend_tmout;
-	int b_srp_init_tmout;
-	int b_srp_fail_tmout;
-	int b_srp_fail_tmr;
-	int b_adp_sense_tmout;
-
-	/* Informative variables */
-	int a_bus_drop;
-	int a_bus_req;
-	int a_clr_err;
-	int b_bus_req;
-	int a_suspend_req;
-	int b_bus_suspend_vld;
-
-	/* Output */
-	int drv_vbus;
-	int loc_conn;
-	int loc_sof;
-
-	/* Others */
-	int vbus_srp_up;
-};
-
-/* must provide ULPI access function to read/write registers implemented in
- * ULPI address space */
-struct iotg_ulpi_access_ops {
-	int	(*read)(struct intel_mid_otg_xceiv *iotg, u8 reg, u8 *val);
-	int	(*write)(struct intel_mid_otg_xceiv *iotg, u8 reg, u8 val);
-};
-
-#define OTG_A_DEVICE	0x0
-#define OTG_B_DEVICE	0x1
-
-/*
- * the Intel MID (Langwell/Penwell) otg transceiver driver needs to interact
- * with device and host drivers to implement the USB OTG related feature. More
- * function members are added based on usb_phy data structure for this
- * purpose.
- */
-struct intel_mid_otg_xceiv {
-	struct usb_phy		otg;
-	struct otg_hsm		hsm;
-
-	/* base address */
-	void __iomem		*base;
-
-	/* ops to access ulpi */
-	struct iotg_ulpi_access_ops	ulpi_ops;
-
-	/* atomic notifier for interrupt context */
-	struct atomic_notifier_head	iotg_notifier;
-
-	/* start/stop USB Host function */
-	int	(*start_host)(struct intel_mid_otg_xceiv *iotg);
-	int	(*stop_host)(struct intel_mid_otg_xceiv *iotg);
-
-	/* start/stop USB Peripheral function */
-	int	(*start_peripheral)(struct intel_mid_otg_xceiv *iotg);
-	int	(*stop_peripheral)(struct intel_mid_otg_xceiv *iotg);
-
-	/* start/stop ADP sense/probe function */
-	int	(*set_adp_probe)(struct intel_mid_otg_xceiv *iotg,
-					bool enabled, int dev);
-	int	(*set_adp_sense)(struct intel_mid_otg_xceiv *iotg,
-					bool enabled);
-
-#ifdef CONFIG_PM
-	/* suspend/resume USB host function */
-	int	(*suspend_host)(struct intel_mid_otg_xceiv *iotg,
-					pm_message_t message);
-	int	(*resume_host)(struct intel_mid_otg_xceiv *iotg);
-
-	int	(*suspend_peripheral)(struct intel_mid_otg_xceiv *iotg,
-					pm_message_t message);
-	int	(*resume_peripheral)(struct intel_mid_otg_xceiv *iotg);
-#endif
-
-};
-static inline
-struct intel_mid_otg_xceiv *otg_to_mid_xceiv(struct usb_phy *otg)
-{
-	return container_of(otg, struct intel_mid_otg_xceiv, otg);
-}
-
-#define MID_OTG_NOTIFY_CONNECT		0x0001
-#define MID_OTG_NOTIFY_DISCONN		0x0002
-#define MID_OTG_NOTIFY_HSUSPEND		0x0003
-#define MID_OTG_NOTIFY_HRESUME		0x0004
-#define MID_OTG_NOTIFY_CSUSPEND		0x0005
-#define MID_OTG_NOTIFY_CRESUME		0x0006
-#define MID_OTG_NOTIFY_HOSTADD		0x0007
-#define MID_OTG_NOTIFY_HOSTREMOVE	0x0008
-#define MID_OTG_NOTIFY_CLIENTADD	0x0009
-#define MID_OTG_NOTIFY_CLIENTREMOVE	0x000a
-
-static inline int
-intel_mid_otg_register_notifier(struct intel_mid_otg_xceiv *iotg,
-				struct notifier_block *nb)
-{
-	return atomic_notifier_chain_register(&iotg->iotg_notifier, nb);
-}
-
-static inline void
-intel_mid_otg_unregister_notifier(struct intel_mid_otg_xceiv *iotg,
-				struct notifier_block *nb)
-{
-	atomic_notifier_chain_unregister(&iotg->iotg_notifier, nb);
-}
-
-#endif /* __INTEL_MID_OTG_H */
-- 
cgit v1.2.3


From bcc8edb52f05c1a9e75118d6b3bc04996a750593 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Sat, 5 Oct 2013 18:25:02 -0700
Subject: driver core: remove dev_attrs from struct class

Now that all in-kernel users of the dev_attrs field are converted to use
dev_groups, we can safely remove dev_attrs from struct class.

Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/core.c    | 38 +-------------------------------------
 include/linux/device.h |  2 --
 2 files changed, 1 insertion(+), 39 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/core.c b/drivers/base/core.c
index 319c2c594ac6..f67e86687ae2 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -455,35 +455,6 @@ static ssize_t online_store(struct device *dev, struct device_attribute *attr,
 }
 static DEVICE_ATTR_RW(online);
 
-static int device_add_attributes(struct device *dev,
-				 struct device_attribute *attrs)
-{
-	int error = 0;
-	int i;
-
-	if (attrs) {
-		for (i = 0; attrs[i].attr.name; i++) {
-			error = device_create_file(dev, &attrs[i]);
-			if (error)
-				break;
-		}
-		if (error)
-			while (--i >= 0)
-				device_remove_file(dev, &attrs[i]);
-	}
-	return error;
-}
-
-static void device_remove_attributes(struct device *dev,
-				     struct device_attribute *attrs)
-{
-	int i;
-
-	if (attrs)
-		for (i = 0; attrs[i].attr.name; i++)
-			device_remove_file(dev, &attrs[i]);
-}
-
 static int device_add_bin_attributes(struct device *dev,
 				     struct bin_attribute *attrs)
 {
@@ -534,12 +505,9 @@ static int device_add_attrs(struct device *dev)
 		error = device_add_groups(dev, class->dev_groups);
 		if (error)
 			return error;
-		error = device_add_attributes(dev, class->dev_attrs);
-		if (error)
-			goto err_remove_class_groups;
 		error = device_add_bin_attributes(dev, class->dev_bin_attrs);
 		if (error)
-			goto err_remove_class_attrs;
+			goto err_remove_class_groups;
 	}
 
 	if (type) {
@@ -566,9 +534,6 @@ static int device_add_attrs(struct device *dev)
  err_remove_class_bin_attrs:
 	if (class)
 		device_remove_bin_attributes(dev, class->dev_bin_attrs);
- err_remove_class_attrs:
-	if (class)
-		device_remove_attributes(dev, class->dev_attrs);
  err_remove_class_groups:
 	if (class)
 		device_remove_groups(dev, class->dev_groups);
@@ -588,7 +553,6 @@ static void device_remove_attrs(struct device *dev)
 		device_remove_groups(dev, type->groups);
 
 	if (class) {
-		device_remove_attributes(dev, class->dev_attrs);
 		device_remove_bin_attributes(dev, class->dev_bin_attrs);
 		device_remove_groups(dev, class->dev_groups);
 	}
diff --git a/include/linux/device.h b/include/linux/device.h
index e7f5b8585380..9ffe8561df75 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -325,7 +325,6 @@ int subsys_virtual_register(struct bus_type *subsys,
  * @owner:	The module owner.
  * @class_attrs: Default attributes of this class.
  * @dev_groups:	Default attributes of the devices that belong to the class.
- * @dev_attrs:	Default attributes of the devices belong to the class.
  * @dev_bin_attrs: Default binary attributes of the devices belong to the class.
  * @dev_kobj:	The kobject that represents this class and links it into the hierarchy.
  * @dev_uevent:	Called when a device is added, removed from this class, or a
@@ -354,7 +353,6 @@ struct class {
 	struct module		*owner;
 
 	struct class_attribute		*class_attrs;
-	struct device_attribute		*dev_attrs;	/* use dev_groups instead */
 	const struct attribute_group	**dev_groups;
 	struct bin_attribute		*dev_bin_attrs;
 	struct kobject			*dev_kobj;
-- 
cgit v1.2.3


From a6b01deda1e79259d2fe98fe68d41e4b7bad2783 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Sat, 5 Oct 2013 18:19:30 -0700
Subject: driver core: remove dev_bin_attrs from struct class

No in-kernel code is now using this, they have all be converted over to
using the bin_attrs support in attribute groups, so this field, and the
code in the driver core that was creating/remove the binary files can be
removed.

Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/core.c    | 41 ++---------------------------------------
 include/linux/device.h |  2 --
 2 files changed, 2 insertions(+), 41 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/core.c b/drivers/base/core.c
index f67e86687ae2..bf35c557707f 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -455,35 +455,6 @@ static ssize_t online_store(struct device *dev, struct device_attribute *attr,
 }
 static DEVICE_ATTR_RW(online);
 
-static int device_add_bin_attributes(struct device *dev,
-				     struct bin_attribute *attrs)
-{
-	int error = 0;
-	int i;
-
-	if (attrs) {
-		for (i = 0; attrs[i].attr.name; i++) {
-			error = device_create_bin_file(dev, &attrs[i]);
-			if (error)
-				break;
-		}
-		if (error)
-			while (--i >= 0)
-				device_remove_bin_file(dev, &attrs[i]);
-	}
-	return error;
-}
-
-static void device_remove_bin_attributes(struct device *dev,
-					 struct bin_attribute *attrs)
-{
-	int i;
-
-	if (attrs)
-		for (i = 0; attrs[i].attr.name; i++)
-			device_remove_bin_file(dev, &attrs[i]);
-}
-
 int device_add_groups(struct device *dev, const struct attribute_group **groups)
 {
 	return sysfs_create_groups(&dev->kobj, groups);
@@ -505,15 +476,12 @@ static int device_add_attrs(struct device *dev)
 		error = device_add_groups(dev, class->dev_groups);
 		if (error)
 			return error;
-		error = device_add_bin_attributes(dev, class->dev_bin_attrs);
-		if (error)
-			goto err_remove_class_groups;
 	}
 
 	if (type) {
 		error = device_add_groups(dev, type->groups);
 		if (error)
-			goto err_remove_class_bin_attrs;
+			goto err_remove_class_groups;
 	}
 
 	error = device_add_groups(dev, dev->groups);
@@ -531,9 +499,6 @@ static int device_add_attrs(struct device *dev)
  err_remove_type_groups:
 	if (type)
 		device_remove_groups(dev, type->groups);
- err_remove_class_bin_attrs:
-	if (class)
-		device_remove_bin_attributes(dev, class->dev_bin_attrs);
  err_remove_class_groups:
 	if (class)
 		device_remove_groups(dev, class->dev_groups);
@@ -552,10 +517,8 @@ static void device_remove_attrs(struct device *dev)
 	if (type)
 		device_remove_groups(dev, type->groups);
 
-	if (class) {
-		device_remove_bin_attributes(dev, class->dev_bin_attrs);
+	if (class)
 		device_remove_groups(dev, class->dev_groups);
-	}
 }
 
 static ssize_t dev_show(struct device *dev, struct device_attribute *attr,
diff --git a/include/linux/device.h b/include/linux/device.h
index 9ffe8561df75..94638efa0bf8 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -325,7 +325,6 @@ int subsys_virtual_register(struct bus_type *subsys,
  * @owner:	The module owner.
  * @class_attrs: Default attributes of this class.
  * @dev_groups:	Default attributes of the devices that belong to the class.
- * @dev_bin_attrs: Default binary attributes of the devices belong to the class.
  * @dev_kobj:	The kobject that represents this class and links it into the hierarchy.
  * @dev_uevent:	Called when a device is added, removed from this class, or a
  *		few other things that generate uevents to add the environment
@@ -354,7 +353,6 @@ struct class {
 
 	struct class_attribute		*class_attrs;
 	const struct attribute_group	**dev_groups;
-	struct bin_attribute		*dev_bin_attrs;
 	struct kobject			*dev_kobj;
 
 	int (*dev_uevent)(struct device *dev, struct kobj_uevent_env *env);
-- 
cgit v1.2.3


From 3573540cafa4296dd60f8be02f2aecaa31047525 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Wed, 2 Oct 2013 09:14:06 +0300
Subject: netif_set_xps_queue: make cpu mask const

virtio wants to pass in cpumask_of(cpu), make parameter
const to avoid build warnings.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 5 +++--
 net/core/dev.c            | 3 ++-
 2 files changed, 5 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 3de49aca4519..25f5d2d11e7c 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2264,11 +2264,12 @@ static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index)
 }
 
 #ifdef CONFIG_XPS
-extern int netif_set_xps_queue(struct net_device *dev, struct cpumask *mask,
+extern int netif_set_xps_queue(struct net_device *dev,
+			       const struct cpumask *mask,
 			       u16 index);
 #else
 static inline int netif_set_xps_queue(struct net_device *dev,
-				      struct cpumask *mask,
+				      const struct cpumask *mask,
 				      u16 index)
 {
 	return 0;
diff --git a/net/core/dev.c b/net/core/dev.c
index 65f829cfd928..3430b1ed12e5 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1917,7 +1917,8 @@ static struct xps_map *expand_xps_map(struct xps_map *map,
 	return new_map;
 }
 
-int netif_set_xps_queue(struct net_device *dev, struct cpumask *mask, u16 index)
+int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
+			u16 index)
 {
 	struct xps_dev_maps *dev_maps, *new_dev_maps = NULL;
 	struct xps_map *map, *new_map;
-- 
cgit v1.2.3


From d45ed4a4e33ae103053c0a53d280014e7101bb5c Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Fri, 4 Oct 2013 00:14:06 -0700
Subject: net: fix unsafe set_memory_rw from softirq

on x86 system with net.core.bpf_jit_enable = 1

sudo tcpdump -i eth1 'tcp port 22'

causes the warning:
[   56.766097]  Possible unsafe locking scenario:
[   56.766097]
[   56.780146]        CPU0
[   56.786807]        ----
[   56.793188]   lock(&(&vb->lock)->rlock);
[   56.799593]   <Interrupt>
[   56.805889]     lock(&(&vb->lock)->rlock);
[   56.812266]
[   56.812266]  *** DEADLOCK ***
[   56.812266]
[   56.830670] 1 lock held by ksoftirqd/1/13:
[   56.836838]  #0:  (rcu_read_lock){.+.+..}, at: [<ffffffff8118f44c>] vm_unmap_aliases+0x8c/0x380
[   56.849757]
[   56.849757] stack backtrace:
[   56.862194] CPU: 1 PID: 13 Comm: ksoftirqd/1 Not tainted 3.12.0-rc3+ #45
[   56.868721] Hardware name: System manufacturer System Product Name/P8Z77 WS, BIOS 3007 07/26/2012
[   56.882004]  ffffffff821944c0 ffff88080bbdb8c8 ffffffff8175a145 0000000000000007
[   56.895630]  ffff88080bbd5f40 ffff88080bbdb928 ffffffff81755b14 0000000000000001
[   56.909313]  ffff880800000001 ffff880800000000 ffffffff8101178f 0000000000000001
[   56.923006] Call Trace:
[   56.929532]  [<ffffffff8175a145>] dump_stack+0x55/0x76
[   56.936067]  [<ffffffff81755b14>] print_usage_bug+0x1f7/0x208
[   56.942445]  [<ffffffff8101178f>] ? save_stack_trace+0x2f/0x50
[   56.948932]  [<ffffffff810cc0a0>] ? check_usage_backwards+0x150/0x150
[   56.955470]  [<ffffffff810ccb52>] mark_lock+0x282/0x2c0
[   56.961945]  [<ffffffff810ccfed>] __lock_acquire+0x45d/0x1d50
[   56.968474]  [<ffffffff810cce6e>] ? __lock_acquire+0x2de/0x1d50
[   56.975140]  [<ffffffff81393bf5>] ? cpumask_next_and+0x55/0x90
[   56.981942]  [<ffffffff810cef72>] lock_acquire+0x92/0x1d0
[   56.988745]  [<ffffffff8118f52a>] ? vm_unmap_aliases+0x16a/0x380
[   56.995619]  [<ffffffff817628f1>] _raw_spin_lock+0x41/0x50
[   57.002493]  [<ffffffff8118f52a>] ? vm_unmap_aliases+0x16a/0x380
[   57.009447]  [<ffffffff8118f52a>] vm_unmap_aliases+0x16a/0x380
[   57.016477]  [<ffffffff8118f44c>] ? vm_unmap_aliases+0x8c/0x380
[   57.023607]  [<ffffffff810436b0>] change_page_attr_set_clr+0xc0/0x460
[   57.030818]  [<ffffffff810cfb8d>] ? trace_hardirqs_on+0xd/0x10
[   57.037896]  [<ffffffff811a8330>] ? kmem_cache_free+0xb0/0x2b0
[   57.044789]  [<ffffffff811b59c3>] ? free_object_rcu+0x93/0xa0
[   57.051720]  [<ffffffff81043d9f>] set_memory_rw+0x2f/0x40
[   57.058727]  [<ffffffff8104e17c>] bpf_jit_free+0x2c/0x40
[   57.065577]  [<ffffffff81642cba>] sk_filter_release_rcu+0x1a/0x30
[   57.072338]  [<ffffffff811108e2>] rcu_process_callbacks+0x202/0x7c0
[   57.078962]  [<ffffffff81057f17>] __do_softirq+0xf7/0x3f0
[   57.085373]  [<ffffffff81058245>] run_ksoftirqd+0x35/0x70

cannot reuse jited filter memory, since it's readonly,
so use original bpf insns memory to hold work_struct

defer kfree of sk_filter until jit completed freeing

tested on x86_64 and i386

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/arm/net/bpf_jit_32.c       |  1 +
 arch/powerpc/net/bpf_jit_comp.c |  1 +
 arch/s390/net/bpf_jit_comp.c    |  4 +++-
 arch/sparc/net/bpf_jit_comp.c   |  1 +
 arch/x86/net/bpf_jit_comp.c     | 18 +++++++++++++-----
 include/linux/filter.h          | 15 +++++++++++----
 include/net/sock.h              |  6 ++----
 net/core/filter.c               |  8 ++++----
 8 files changed, 36 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index f50d223a0bd3..99b44e0e8d86 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -930,4 +930,5 @@ void bpf_jit_free(struct sk_filter *fp)
 {
 	if (fp->bpf_func != sk_run_filter)
 		module_free(NULL, fp->bpf_func);
+	kfree(fp);
 }
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index bf56e33f8257..2345bdb4d917 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -691,4 +691,5 @@ void bpf_jit_free(struct sk_filter *fp)
 {
 	if (fp->bpf_func != sk_run_filter)
 		module_free(NULL, fp->bpf_func);
+	kfree(fp);
 }
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index 709239285869..a5df511e27a2 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -881,7 +881,9 @@ void bpf_jit_free(struct sk_filter *fp)
 	struct bpf_binary_header *header = (void *)addr;
 
 	if (fp->bpf_func == sk_run_filter)
-		return;
+		goto free_filter;
 	set_memory_rw(addr, header->pages);
 	module_free(NULL, header);
+free_filter:
+	kfree(fp);
 }
diff --git a/arch/sparc/net/bpf_jit_comp.c b/arch/sparc/net/bpf_jit_comp.c
index 9c7be59e6f5a..218b6b23c378 100644
--- a/arch/sparc/net/bpf_jit_comp.c
+++ b/arch/sparc/net/bpf_jit_comp.c
@@ -808,4 +808,5 @@ void bpf_jit_free(struct sk_filter *fp)
 {
 	if (fp->bpf_func != sk_run_filter)
 		module_free(NULL, fp->bpf_func);
+	kfree(fp);
 }
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 79c216aa0e2b..516593e1ce33 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -772,13 +772,21 @@ out:
 	return;
 }
 
+static void bpf_jit_free_deferred(struct work_struct *work)
+{
+	struct sk_filter *fp = container_of(work, struct sk_filter, work);
+	unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK;
+	struct bpf_binary_header *header = (void *)addr;
+
+	set_memory_rw(addr, header->pages);
+	module_free(NULL, header);
+	kfree(fp);
+}
+
 void bpf_jit_free(struct sk_filter *fp)
 {
 	if (fp->bpf_func != sk_run_filter) {
-		unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK;
-		struct bpf_binary_header *header = (void *)addr;
-
-		set_memory_rw(addr, header->pages);
-		module_free(NULL, header);
+		INIT_WORK(&fp->work, bpf_jit_free_deferred);
+		schedule_work(&fp->work);
 	}
 }
diff --git a/include/linux/filter.h b/include/linux/filter.h
index a6ac84871d6d..ff4e40cd45b1 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -6,6 +6,7 @@
 
 #include <linux/atomic.h>
 #include <linux/compat.h>
+#include <linux/workqueue.h>
 #include <uapi/linux/filter.h>
 
 #ifdef CONFIG_COMPAT
@@ -25,15 +26,19 @@ struct sk_filter
 {
 	atomic_t		refcnt;
 	unsigned int         	len;	/* Number of filter blocks */
+	struct rcu_head		rcu;
 	unsigned int		(*bpf_func)(const struct sk_buff *skb,
 					    const struct sock_filter *filter);
-	struct rcu_head		rcu;
-	struct sock_filter     	insns[0];
+	union {
+		struct sock_filter     	insns[0];
+		struct work_struct	work;
+	};
 };
 
-static inline unsigned int sk_filter_len(const struct sk_filter *fp)
+static inline unsigned int sk_filter_size(unsigned int proglen)
 {
-	return fp->len * sizeof(struct sock_filter) + sizeof(*fp);
+	return max(sizeof(struct sk_filter),
+		   offsetof(struct sk_filter, insns[proglen]));
 }
 
 extern int sk_filter(struct sock *sk, struct sk_buff *skb);
@@ -67,11 +72,13 @@ static inline void bpf_jit_dump(unsigned int flen, unsigned int proglen,
 }
 #define SK_RUN_FILTER(FILTER, SKB) (*FILTER->bpf_func)(SKB, FILTER->insns)
 #else
+#include <linux/slab.h>
 static inline void bpf_jit_compile(struct sk_filter *fp)
 {
 }
 static inline void bpf_jit_free(struct sk_filter *fp)
 {
+	kfree(fp);
 }
 #define SK_RUN_FILTER(FILTER, SKB) sk_run_filter(SKB, FILTER->insns)
 #endif
diff --git a/include/net/sock.h b/include/net/sock.h
index 1d37a8086bed..808cbc2ec6c1 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1630,16 +1630,14 @@ static inline void sk_filter_release(struct sk_filter *fp)
 
 static inline void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp)
 {
-	unsigned int size = sk_filter_len(fp);
-
-	atomic_sub(size, &sk->sk_omem_alloc);
+	atomic_sub(sk_filter_size(fp->len), &sk->sk_omem_alloc);
 	sk_filter_release(fp);
 }
 
 static inline void sk_filter_charge(struct sock *sk, struct sk_filter *fp)
 {
 	atomic_inc(&fp->refcnt);
-	atomic_add(sk_filter_len(fp), &sk->sk_omem_alloc);
+	atomic_add(sk_filter_size(fp->len), &sk->sk_omem_alloc);
 }
 
 /*
diff --git a/net/core/filter.c b/net/core/filter.c
index 6438f29ff266..01b780856db2 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -644,7 +644,6 @@ void sk_filter_release_rcu(struct rcu_head *rcu)
 	struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu);
 
 	bpf_jit_free(fp);
-	kfree(fp);
 }
 EXPORT_SYMBOL(sk_filter_release_rcu);
 
@@ -683,7 +682,7 @@ int sk_unattached_filter_create(struct sk_filter **pfp,
 	if (fprog->filter == NULL)
 		return -EINVAL;
 
-	fp = kmalloc(fsize + sizeof(*fp), GFP_KERNEL);
+	fp = kmalloc(sk_filter_size(fprog->len), GFP_KERNEL);
 	if (!fp)
 		return -ENOMEM;
 	memcpy(fp->insns, fprog->filter, fsize);
@@ -723,6 +722,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
 {
 	struct sk_filter *fp, *old_fp;
 	unsigned int fsize = sizeof(struct sock_filter) * fprog->len;
+	unsigned int sk_fsize = sk_filter_size(fprog->len);
 	int err;
 
 	if (sock_flag(sk, SOCK_FILTER_LOCKED))
@@ -732,11 +732,11 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
 	if (fprog->filter == NULL)
 		return -EINVAL;
 
-	fp = sock_kmalloc(sk, fsize+sizeof(*fp), GFP_KERNEL);
+	fp = sock_kmalloc(sk, sk_fsize, GFP_KERNEL);
 	if (!fp)
 		return -ENOMEM;
 	if (copy_from_user(fp->insns, fprog->filter, fsize)) {
-		sock_kfree_s(sk, fp, fsize+sizeof(*fp));
+		sock_kfree_s(sk, fp, sk_fsize);
 		return -EFAULT;
 	}
 
-- 
cgit v1.2.3


From 5cde282938915f36a2e6769b51c24c4159654859 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sat, 5 Oct 2013 19:26:05 -0700
Subject: net: Separate the close_list and the unreg_list v2

Separate the unreg_list and the close_list in dev_close_many preventing
dev_close_many from permuting the unreg_list.  The permutations of the
unreg_list have resulted in cases where the loopback device is accessed
it has been freed in code such as dst_ifdown.  Resulting in subtle memory
corruption.

This is the second bug from sharing the storage between the close_list
and the unreg_list.  The issues that crop up with sharing are
apparently too subtle to show up in normal testing or usage, so let's
forget about being clever and use two separate lists.

v2: Make all callers pass in a close_list to dev_close_many

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  1 +
 net/core/dev.c            | 25 ++++++++++++++-----------
 net/sched/sch_generic.c   |  6 +++---
 3 files changed, 18 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index f5cd464271bf..6d77e0f3cc10 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1143,6 +1143,7 @@ struct net_device {
 	struct list_head	dev_list;
 	struct list_head	napi_list;
 	struct list_head	unreg_list;
+	struct list_head	close_list;
 
 	/* directly linked devices, like slaves for bonding */
 	struct {
diff --git a/net/core/dev.c b/net/core/dev.c
index c25db20a4246..fa0b2b06c1a6 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1307,7 +1307,7 @@ static int __dev_close_many(struct list_head *head)
 	ASSERT_RTNL();
 	might_sleep();
 
-	list_for_each_entry(dev, head, unreg_list) {
+	list_for_each_entry(dev, head, close_list) {
 		call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
 
 		clear_bit(__LINK_STATE_START, &dev->state);
@@ -1323,7 +1323,7 @@ static int __dev_close_many(struct list_head *head)
 
 	dev_deactivate_many(head);
 
-	list_for_each_entry(dev, head, unreg_list) {
+	list_for_each_entry(dev, head, close_list) {
 		const struct net_device_ops *ops = dev->netdev_ops;
 
 		/*
@@ -1351,7 +1351,7 @@ static int __dev_close(struct net_device *dev)
 	/* Temporarily disable netpoll until the interface is down */
 	netpoll_rx_disable(dev);
 
-	list_add(&dev->unreg_list, &single);
+	list_add(&dev->close_list, &single);
 	retval = __dev_close_many(&single);
 	list_del(&single);
 
@@ -1362,21 +1362,20 @@ static int __dev_close(struct net_device *dev)
 static int dev_close_many(struct list_head *head)
 {
 	struct net_device *dev, *tmp;
-	LIST_HEAD(tmp_list);
 
-	list_for_each_entry_safe(dev, tmp, head, unreg_list)
+	/* Remove the devices that don't need to be closed */
+	list_for_each_entry_safe(dev, tmp, head, close_list)
 		if (!(dev->flags & IFF_UP))
-			list_move(&dev->unreg_list, &tmp_list);
+			list_del_init(&dev->close_list);
 
 	__dev_close_many(head);
 
-	list_for_each_entry(dev, head, unreg_list) {
+	list_for_each_entry_safe(dev, tmp, head, close_list) {
 		rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
 		call_netdevice_notifiers(NETDEV_DOWN, dev);
+		list_del_init(&dev->close_list);
 	}
 
-	/* rollback_registered_many needs the complete original list */
-	list_splice(&tmp_list, head);
 	return 0;
 }
 
@@ -1397,7 +1396,7 @@ int dev_close(struct net_device *dev)
 		/* Block netpoll rx while the interface is going down */
 		netpoll_rx_disable(dev);
 
-		list_add(&dev->unreg_list, &single);
+		list_add(&dev->close_list, &single);
 		dev_close_many(&single);
 		list_del(&single);
 
@@ -5439,6 +5438,7 @@ static void net_set_todo(struct net_device *dev)
 static void rollback_registered_many(struct list_head *head)
 {
 	struct net_device *dev, *tmp;
+	LIST_HEAD(close_head);
 
 	BUG_ON(dev_boot_phase);
 	ASSERT_RTNL();
@@ -5461,7 +5461,9 @@ static void rollback_registered_many(struct list_head *head)
 	}
 
 	/* If device is running, close it first. */
-	dev_close_many(head);
+	list_for_each_entry(dev, head, unreg_list)
+		list_add_tail(&dev->close_list, &close_head);
+	dev_close_many(&close_head);
 
 	list_for_each_entry(dev, head, unreg_list) {
 		/* And unlink it from device chain. */
@@ -6257,6 +6259,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
 
 	INIT_LIST_HEAD(&dev->napi_list);
 	INIT_LIST_HEAD(&dev->unreg_list);
+	INIT_LIST_HEAD(&dev->close_list);
 	INIT_LIST_HEAD(&dev->link_watch_list);
 	INIT_LIST_HEAD(&dev->adj_list.upper);
 	INIT_LIST_HEAD(&dev->adj_list.lower);
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index e7121d29c4bd..7fc899a943a8 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -829,7 +829,7 @@ void dev_deactivate_many(struct list_head *head)
 	struct net_device *dev;
 	bool sync_needed = false;
 
-	list_for_each_entry(dev, head, unreg_list) {
+	list_for_each_entry(dev, head, close_list) {
 		netdev_for_each_tx_queue(dev, dev_deactivate_queue,
 					 &noop_qdisc);
 		if (dev_ingress_queue(dev))
@@ -848,7 +848,7 @@ void dev_deactivate_many(struct list_head *head)
 		synchronize_net();
 
 	/* Wait for outstanding qdisc_run calls. */
-	list_for_each_entry(dev, head, unreg_list)
+	list_for_each_entry(dev, head, close_list)
 		while (some_qdisc_is_busy(dev))
 			yield();
 }
@@ -857,7 +857,7 @@ void dev_deactivate(struct net_device *dev)
 {
 	LIST_HEAD(single);
 
-	list_add(&dev->unreg_list, &single);
+	list_add(&dev->close_list, &single);
 	dev_deactivate_many(&single);
 	list_del(&single);
 }
-- 
cgit v1.2.3


From ddeccb8d6b5bbe2c1e3a29f8c74b52f170c2207d Mon Sep 17 00:00:00 2001
From: Heiko Stuebner <heiko@sntech.de>
Date: Tue, 8 Oct 2013 06:42:10 +0900
Subject: dmaengine: add driver for Samsung s3c24xx SoCs

This adds a new driver to support the s3c24xx dma using the dmaengine
and makes the old one in mach-s3c24xx obsolete in the long run.

Conceptually the s3c24xx-dma feels like a distant relative of the pl08x
with numerous virtual channels being mapped to a lot less physical ones.
The driver therefore borrows a lot from the amba-pl08x driver in this
regard. Functionality-wise the driver gains a memcpy ability in addition
to the slave_sg one.

The driver supports both the method for requesting the peripheral used
by SoCs before the S3C2443 and the different method for S3C2443 and later.

On earlier SoCs the hardware channels usable for specific peripherals is
constrainted while on later SoCs all channels can be used for any peripheral.

Tested on a s3c2416-based board, memcpy using the dmatest module and
slave_sg partially using the spi-s3c64xx driver.

Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Vinod Koul <vinod.koul@intel.com>
Signed-off-by: Kukjin Kim <kgene.kim@samsung.com>
---
 drivers/dma/Kconfig                       |   12 +
 drivers/dma/Makefile                      |    1 +
 drivers/dma/s3c24xx-dma.c                 | 1340 +++++++++++++++++++++++++++++
 include/linux/platform_data/dma-s3c24xx.h |   43 +
 4 files changed, 1396 insertions(+)
 create mode 100644 drivers/dma/s3c24xx-dma.c
 create mode 100644 include/linux/platform_data/dma-s3c24xx.h

(limited to 'include/linux')

diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index 526ec77c7ba0..d639115aa875 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -154,6 +154,18 @@ config TEGRA20_APB_DMA
 	  This DMA controller transfers data from memory to peripheral fifo
 	  or vice versa. It does not support memory to memory data transfer.
 
+config S3C24XX_DMAC
+	tristate "Samsung S3C24XX DMA support"
+	depends on ARCH_S3C24XX && !S3C24XX_DMA
+	select DMA_ENGINE
+	select DMA_VIRTUAL_CHANNELS
+	help
+	  Support for the Samsung S3C24XX DMA controller driver. The
+	  DMA controller is having multiple DMA channels which can be
+	  configured for different peripherals like audio, UART, SPI.
+	  The DMA controller can transfer data from memory to peripheral,
+	  periphal to memory, periphal to periphal and memory to memory.
+
 source "drivers/dma/sh/Kconfig"
 
 config COH901318
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
index db89035b3626..0ce2da97e429 100644
--- a/drivers/dma/Makefile
+++ b/drivers/dma/Makefile
@@ -30,6 +30,7 @@ obj-$(CONFIG_SIRF_DMA) += sirf-dma.o
 obj-$(CONFIG_TI_EDMA) += edma.o
 obj-$(CONFIG_STE_DMA40) += ste_dma40.o ste_dma40_ll.o
 obj-$(CONFIG_TEGRA20_APB_DMA) += tegra20-apb-dma.o
+obj-$(CONFIG_S3C24XX_DMAC) += s3c24xx-dma.o
 obj-$(CONFIG_PL330_DMA) += pl330.o
 obj-$(CONFIG_PCH_DMA) += pch_dma.o
 obj-$(CONFIG_AMBA_PL08X) += amba-pl08x.o
diff --git a/drivers/dma/s3c24xx-dma.c b/drivers/dma/s3c24xx-dma.c
new file mode 100644
index 000000000000..56c92538bc4c
--- /dev/null
+++ b/drivers/dma/s3c24xx-dma.c
@@ -0,0 +1,1340 @@
+/*
+ * S3C24XX DMA handling
+ *
+ * Copyright (c) 2013 Heiko Stuebner <heiko@sntech.de>
+ *
+ * based on amba-pl08x.c
+ *
+ * Copyright (c) 2006 ARM Ltd.
+ * Copyright (c) 2010 ST-Ericsson SA
+ *
+ * Author: Peter Pearse <peter.pearse@arm.com>
+ * Author: Linus Walleij <linus.walleij@stericsson.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * The DMA controllers in S3C24XX SoCs have a varying number of DMA signals
+ * that can be routed to any of the 4 to 8 hardware-channels.
+ *
+ * Therefore on these DMA controllers the number of channels
+ * and the number of incoming DMA signals are two totally different things.
+ * It is usually not possible to theoretically handle all physical signals,
+ * so a multiplexing scheme with possible denial of use is necessary.
+ *
+ * Open items:
+ * - bursts
+ */
+
+#include <linux/platform_device.h>
+#include <linux/types.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
+#include <linux/clk.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/platform_data/dma-s3c24xx.h>
+
+#include "dmaengine.h"
+#include "virt-dma.h"
+
+#define MAX_DMA_CHANNELS	8
+
+#define S3C24XX_DISRC			0x00
+#define S3C24XX_DISRCC			0x04
+#define S3C24XX_DISRCC_INC_INCREMENT	0
+#define S3C24XX_DISRCC_INC_FIXED	BIT(0)
+#define S3C24XX_DISRCC_LOC_AHB		0
+#define S3C24XX_DISRCC_LOC_APB		BIT(1)
+
+#define S3C24XX_DIDST			0x08
+#define S3C24XX_DIDSTC			0x0c
+#define S3C24XX_DIDSTC_INC_INCREMENT	0
+#define S3C24XX_DIDSTC_INC_FIXED	BIT(0)
+#define S3C24XX_DIDSTC_LOC_AHB		0
+#define S3C24XX_DIDSTC_LOC_APB		BIT(1)
+#define S3C24XX_DIDSTC_INT_TC0		0
+#define S3C24XX_DIDSTC_INT_RELOAD	BIT(2)
+
+#define S3C24XX_DCON			0x10
+
+#define S3C24XX_DCON_TC_MASK		0xfffff
+#define S3C24XX_DCON_DSZ_BYTE		(0 << 20)
+#define S3C24XX_DCON_DSZ_HALFWORD	(1 << 20)
+#define S3C24XX_DCON_DSZ_WORD		(2 << 20)
+#define S3C24XX_DCON_DSZ_MASK		(3 << 20)
+#define S3C24XX_DCON_DSZ_SHIFT		20
+#define S3C24XX_DCON_AUTORELOAD		0
+#define S3C24XX_DCON_NORELOAD		BIT(22)
+#define S3C24XX_DCON_HWTRIG		BIT(23)
+#define S3C24XX_DCON_HWSRC_SHIFT	24
+#define S3C24XX_DCON_SERV_SINGLE	0
+#define S3C24XX_DCON_SERV_WHOLE		BIT(27)
+#define S3C24XX_DCON_TSZ_UNIT		0
+#define S3C24XX_DCON_TSZ_BURST4		BIT(28)
+#define S3C24XX_DCON_INT		BIT(29)
+#define S3C24XX_DCON_SYNC_PCLK		0
+#define S3C24XX_DCON_SYNC_HCLK		BIT(30)
+#define S3C24XX_DCON_DEMAND		0
+#define S3C24XX_DCON_HANDSHAKE		BIT(31)
+
+#define S3C24XX_DSTAT			0x14
+#define S3C24XX_DSTAT_STAT_BUSY		BIT(20)
+#define S3C24XX_DSTAT_CURRTC_MASK	0xfffff
+
+#define S3C24XX_DMASKTRIG		0x20
+#define S3C24XX_DMASKTRIG_SWTRIG	BIT(0)
+#define S3C24XX_DMASKTRIG_ON		BIT(1)
+#define S3C24XX_DMASKTRIG_STOP		BIT(2)
+
+#define S3C24XX_DMAREQSEL		0x24
+#define S3C24XX_DMAREQSEL_HW		BIT(0)
+
+/*
+ * S3C2410, S3C2440 and S3C2442 SoCs cannot select any physical channel
+ * for a DMA source. Instead only specific channels are valid.
+ * All of these SoCs have 4 physical channels and the number of request
+ * source bits is 3. Additionally we also need 1 bit to mark the channel
+ * as valid.
+ * Therefore we separate the chansel element of the channel data into 4
+ * parts of 4 bits each, to hold the information if the channel is valid
+ * and the hw request source to use.
+ *
+ * Example:
+ * SDI is valid on channels 0, 2 and 3 - with varying hw request sources.
+ * For it the chansel field would look like
+ *
+ * ((BIT(3) | 1) << 3 * 4) | // channel 3, with request source 1
+ * ((BIT(3) | 2) << 2 * 4) | // channel 2, with request source 2
+ * ((BIT(3) | 2) << 0 * 4)   // channel 0, with request source 2
+ */
+#define S3C24XX_CHANSEL_WIDTH		4
+#define S3C24XX_CHANSEL_VALID		BIT(3)
+#define S3C24XX_CHANSEL_REQ_MASK	7
+
+/*
+ * struct soc_data - vendor-specific config parameters for individual SoCs
+ * @stride: spacing between the registers of each channel
+ * @has_reqsel: does the controller use the newer requestselection mechanism
+ * @has_clocks: are controllable dma-clocks present
+ */
+struct soc_data {
+	int stride;
+	bool has_reqsel;
+	bool has_clocks;
+};
+
+/*
+ * enum s3c24xx_dma_chan_state - holds the virtual channel states
+ * @S3C24XX_DMA_CHAN_IDLE: the channel is idle
+ * @S3C24XX_DMA_CHAN_RUNNING: the channel has allocated a physical transport
+ * channel and is running a transfer on it
+ * @S3C24XX_DMA_CHAN_WAITING: the channel is waiting for a physical transport
+ * channel to become available (only pertains to memcpy channels)
+ */
+enum s3c24xx_dma_chan_state {
+	S3C24XX_DMA_CHAN_IDLE,
+	S3C24XX_DMA_CHAN_RUNNING,
+	S3C24XX_DMA_CHAN_WAITING,
+};
+
+/*
+ * struct s3c24xx_sg - structure containing data per sg
+ * @src_addr: src address of sg
+ * @dst_addr: dst address of sg
+ * @len: transfer len in bytes
+ * @node: node for txd's dsg_list
+ */
+struct s3c24xx_sg {
+	dma_addr_t src_addr;
+	dma_addr_t dst_addr;
+	size_t len;
+	struct list_head node;
+};
+
+/*
+ * struct s3c24xx_txd - wrapper for struct dma_async_tx_descriptor
+ * @vd: virtual DMA descriptor
+ * @dsg_list: list of children sg's
+ * @at: sg currently being transfered
+ * @width: transfer width
+ * @disrcc: value for source control register
+ * @didstc: value for destination control register
+ * @dcon: base value for dcon register
+ */
+struct s3c24xx_txd {
+	struct virt_dma_desc vd;
+	struct list_head dsg_list;
+	struct list_head *at;
+	u8 width;
+	u32 disrcc;
+	u32 didstc;
+	u32 dcon;
+};
+
+struct s3c24xx_dma_chan;
+
+/*
+ * struct s3c24xx_dma_phy - holder for the physical channels
+ * @id: physical index to this channel
+ * @valid: does the channel have all required elements
+ * @base: virtual memory base (remapped) for the this channel
+ * @irq: interrupt for this channel
+ * @clk: clock for this channel
+ * @lock: a lock to use when altering an instance of this struct
+ * @serving: virtual channel currently being served by this physicalchannel
+ * @host: a pointer to the host (internal use)
+ */
+struct s3c24xx_dma_phy {
+	unsigned int			id;
+	bool				valid;
+	void __iomem			*base;
+	unsigned int			irq;
+	struct clk			*clk;
+	spinlock_t			lock;
+	struct s3c24xx_dma_chan		*serving;
+	struct s3c24xx_dma_engine	*host;
+};
+
+/*
+ * struct s3c24xx_dma_chan - this structure wraps a DMA ENGINE channel
+ * @id: the id of the channel
+ * @name: name of the channel
+ * @vc: wrappped virtual channel
+ * @phy: the physical channel utilized by this channel, if there is one
+ * @runtime_addr: address for RX/TX according to the runtime config
+ * @at: active transaction on this channel
+ * @lock: a lock for this channel data
+ * @host: a pointer to the host (internal use)
+ * @state: whether the channel is idle, running etc
+ * @slave: whether this channel is a device (slave) or for memcpy
+ */
+struct s3c24xx_dma_chan {
+	int id;
+	const char *name;
+	struct virt_dma_chan vc;
+	struct s3c24xx_dma_phy *phy;
+	struct dma_slave_config cfg;
+	struct s3c24xx_txd *at;
+	struct s3c24xx_dma_engine *host;
+	enum s3c24xx_dma_chan_state state;
+	bool slave;
+};
+
+/*
+ * struct s3c24xx_dma_engine - the local state holder for the S3C24XX
+ * @pdev: the corresponding platform device
+ * @pdata: platform data passed in from the platform/machine
+ * @base: virtual memory base (remapped)
+ * @slave: slave engine for this instance
+ * @memcpy: memcpy engine for this instance
+ * @phy_chans: array of data for the physical channels
+ */
+struct s3c24xx_dma_engine {
+	struct platform_device			*pdev;
+	const struct s3c24xx_dma_platdata	*pdata;
+	struct soc_data				*sdata;
+	void __iomem				*base;
+	struct dma_device			slave;
+	struct dma_device			memcpy;
+	struct s3c24xx_dma_phy			*phy_chans;
+};
+
+/*
+ * Physical channel handling
+ */
+
+/*
+ * Check whether a certain channel is busy or not.
+ */
+static int s3c24xx_dma_phy_busy(struct s3c24xx_dma_phy *phy)
+{
+	unsigned int val = readl(phy->base + S3C24XX_DSTAT);
+	return val & S3C24XX_DSTAT_STAT_BUSY;
+}
+
+static bool s3c24xx_dma_phy_valid(struct s3c24xx_dma_chan *s3cchan,
+				  struct s3c24xx_dma_phy *phy)
+{
+	struct s3c24xx_dma_engine *s3cdma = s3cchan->host;
+	const struct s3c24xx_dma_platdata *pdata = s3cdma->pdata;
+	struct s3c24xx_dma_channel *cdata = &pdata->channels[s3cchan->id];
+	int phyvalid;
+
+	/* every phy is valid for memcopy channels */
+	if (!s3cchan->slave)
+		return true;
+
+	/* On newer variants all phys can be used for all virtual channels */
+	if (s3cdma->sdata->has_reqsel)
+		return true;
+
+	phyvalid = (cdata->chansel >> (phy->id * S3C24XX_CHANSEL_WIDTH));
+	return (phyvalid & S3C24XX_CHANSEL_VALID) ? true : false;
+}
+
+/*
+ * Allocate a physical channel for a virtual channel
+ *
+ * Try to locate a physical channel to be used for this transfer. If all
+ * are taken return NULL and the requester will have to cope by using
+ * some fallback PIO mode or retrying later.
+ */
+static
+struct s3c24xx_dma_phy *s3c24xx_dma_get_phy(struct s3c24xx_dma_chan *s3cchan)
+{
+	struct s3c24xx_dma_engine *s3cdma = s3cchan->host;
+	const struct s3c24xx_dma_platdata *pdata = s3cdma->pdata;
+	struct s3c24xx_dma_channel *cdata;
+	struct s3c24xx_dma_phy *phy = NULL;
+	unsigned long flags;
+	int i;
+	int ret;
+
+	if (s3cchan->slave)
+		cdata = &pdata->channels[s3cchan->id];
+
+	for (i = 0; i < s3cdma->pdata->num_phy_channels; i++) {
+		phy = &s3cdma->phy_chans[i];
+
+		if (!phy->valid)
+			continue;
+
+		if (!s3c24xx_dma_phy_valid(s3cchan, phy))
+			continue;
+
+		spin_lock_irqsave(&phy->lock, flags);
+
+		if (!phy->serving) {
+			phy->serving = s3cchan;
+			spin_unlock_irqrestore(&phy->lock, flags);
+			break;
+		}
+
+		spin_unlock_irqrestore(&phy->lock, flags);
+	}
+
+	/* No physical channel available, cope with it */
+	if (i == s3cdma->pdata->num_phy_channels) {
+		dev_warn(&s3cdma->pdev->dev, "no phy channel available\n");
+		return NULL;
+	}
+
+	/* start the phy clock */
+	if (s3cdma->sdata->has_clocks) {
+		ret = clk_enable(phy->clk);
+		if (ret) {
+			dev_err(&s3cdma->pdev->dev, "could not enable clock for channel %d, err %d\n",
+				phy->id, ret);
+			phy->serving = NULL;
+			return NULL;
+		}
+	}
+
+	return phy;
+}
+
+/*
+ * Mark the physical channel as free.
+ *
+ * This drops the link between the physical and virtual channel.
+ */
+static inline void s3c24xx_dma_put_phy(struct s3c24xx_dma_phy *phy)
+{
+	struct s3c24xx_dma_engine *s3cdma = phy->host;
+
+	if (s3cdma->sdata->has_clocks)
+		clk_disable(phy->clk);
+
+	phy->serving = NULL;
+}
+
+/*
+ * Stops the channel by writing the stop bit.
+ * This should not be used for an on-going transfer, but as a method of
+ * shutting down a channel (eg, when it's no longer used) or terminating a
+ * transfer.
+ */
+static void s3c24xx_dma_terminate_phy(struct s3c24xx_dma_phy *phy)
+{
+	writel(S3C24XX_DMASKTRIG_STOP, phy->base + S3C24XX_DMASKTRIG);
+}
+
+/*
+ * Virtual channel handling
+ */
+
+static inline
+struct s3c24xx_dma_chan *to_s3c24xx_dma_chan(struct dma_chan *chan)
+{
+	return container_of(chan, struct s3c24xx_dma_chan, vc.chan);
+}
+
+static u32 s3c24xx_dma_getbytes_chan(struct s3c24xx_dma_chan *s3cchan)
+{
+	struct s3c24xx_dma_phy *phy = s3cchan->phy;
+	struct s3c24xx_txd *txd = s3cchan->at;
+	u32 tc = readl(phy->base + S3C24XX_DSTAT) & S3C24XX_DSTAT_CURRTC_MASK;
+
+	return tc * txd->width;
+}
+
+static int s3c24xx_dma_set_runtime_config(struct s3c24xx_dma_chan *s3cchan,
+				  struct dma_slave_config *config)
+{
+	if (!s3cchan->slave)
+		return -EINVAL;
+
+	/* Reject definitely invalid configurations */
+	if (config->src_addr_width == DMA_SLAVE_BUSWIDTH_8_BYTES ||
+	    config->dst_addr_width == DMA_SLAVE_BUSWIDTH_8_BYTES)
+		return -EINVAL;
+
+	s3cchan->cfg = *config;
+
+	return 0;
+}
+
+/*
+ * Transfer handling
+ */
+
+static inline
+struct s3c24xx_txd *to_s3c24xx_txd(struct dma_async_tx_descriptor *tx)
+{
+	return container_of(tx, struct s3c24xx_txd, vd.tx);
+}
+
+static struct s3c24xx_txd *s3c24xx_dma_get_txd(void)
+{
+	struct s3c24xx_txd *txd = kzalloc(sizeof(*txd), GFP_NOWAIT);
+
+	if (txd) {
+		INIT_LIST_HEAD(&txd->dsg_list);
+		txd->dcon = S3C24XX_DCON_INT | S3C24XX_DCON_NORELOAD;
+	}
+
+	return txd;
+}
+
+static void s3c24xx_dma_free_txd(struct s3c24xx_txd *txd)
+{
+	struct s3c24xx_sg *dsg, *_dsg;
+
+	list_for_each_entry_safe(dsg, _dsg, &txd->dsg_list, node) {
+		list_del(&dsg->node);
+		kfree(dsg);
+	}
+
+	kfree(txd);
+}
+
+static void s3c24xx_dma_start_next_sg(struct s3c24xx_dma_chan *s3cchan,
+				       struct s3c24xx_txd *txd)
+{
+	struct s3c24xx_dma_engine *s3cdma = s3cchan->host;
+	struct s3c24xx_dma_phy *phy = s3cchan->phy;
+	const struct s3c24xx_dma_platdata *pdata = s3cdma->pdata;
+	struct s3c24xx_sg *dsg = list_entry(txd->at, struct s3c24xx_sg, node);
+	u32 dcon = txd->dcon;
+	u32 val;
+
+	/* transfer-size and -count from len and width */
+	switch (txd->width) {
+	case 1:
+		dcon |= S3C24XX_DCON_DSZ_BYTE | dsg->len;
+		break;
+	case 2:
+		dcon |= S3C24XX_DCON_DSZ_HALFWORD | (dsg->len / 2);
+		break;
+	case 4:
+		dcon |= S3C24XX_DCON_DSZ_WORD | (dsg->len / 4);
+		break;
+	}
+
+	if (s3cchan->slave) {
+		struct s3c24xx_dma_channel *cdata =
+					&pdata->channels[s3cchan->id];
+
+		if (s3cdma->sdata->has_reqsel) {
+			writel_relaxed((cdata->chansel << 1) |
+							S3C24XX_DMAREQSEL_HW,
+					phy->base + S3C24XX_DMAREQSEL);
+		} else {
+			int csel = cdata->chansel >> (phy->id *
+							S3C24XX_CHANSEL_WIDTH);
+
+			csel &= S3C24XX_CHANSEL_REQ_MASK;
+			dcon |= csel << S3C24XX_DCON_HWSRC_SHIFT;
+			dcon |= S3C24XX_DCON_HWTRIG;
+		}
+	} else {
+		if (s3cdma->sdata->has_reqsel)
+			writel_relaxed(0, phy->base + S3C24XX_DMAREQSEL);
+	}
+
+	writel_relaxed(dsg->src_addr, phy->base + S3C24XX_DISRC);
+	writel_relaxed(txd->disrcc, phy->base + S3C24XX_DISRCC);
+	writel_relaxed(dsg->dst_addr, phy->base + S3C24XX_DIDST);
+	writel_relaxed(txd->didstc, phy->base + S3C24XX_DIDSTC);
+	writel_relaxed(dcon, phy->base + S3C24XX_DCON);
+
+	val = readl_relaxed(phy->base + S3C24XX_DMASKTRIG);
+	val &= ~S3C24XX_DMASKTRIG_STOP;
+	val |= S3C24XX_DMASKTRIG_ON;
+
+	/* trigger the dma operation for memcpy transfers */
+	if (!s3cchan->slave)
+		val |= S3C24XX_DMASKTRIG_SWTRIG;
+
+	writel(val, phy->base + S3C24XX_DMASKTRIG);
+}
+
+/*
+ * Set the initial DMA register values and start first sg.
+ */
+static void s3c24xx_dma_start_next_txd(struct s3c24xx_dma_chan *s3cchan)
+{
+	struct s3c24xx_dma_phy *phy = s3cchan->phy;
+	struct virt_dma_desc *vd = vchan_next_desc(&s3cchan->vc);
+	struct s3c24xx_txd *txd = to_s3c24xx_txd(&vd->tx);
+
+	list_del(&txd->vd.node);
+
+	s3cchan->at = txd;
+
+	/* Wait for channel inactive */
+	while (s3c24xx_dma_phy_busy(phy))
+		cpu_relax();
+
+	/* point to the first element of the sg list */
+	txd->at = txd->dsg_list.next;
+	s3c24xx_dma_start_next_sg(s3cchan, txd);
+}
+
+static void s3c24xx_dma_free_txd_list(struct s3c24xx_dma_engine *s3cdma,
+				struct s3c24xx_dma_chan *s3cchan)
+{
+	LIST_HEAD(head);
+
+	vchan_get_all_descriptors(&s3cchan->vc, &head);
+	vchan_dma_desc_free_list(&s3cchan->vc, &head);
+}
+
+/*
+ * Try to allocate a physical channel.  When successful, assign it to
+ * this virtual channel, and initiate the next descriptor.  The
+ * virtual channel lock must be held at this point.
+ */
+static void s3c24xx_dma_phy_alloc_and_start(struct s3c24xx_dma_chan *s3cchan)
+{
+	struct s3c24xx_dma_engine *s3cdma = s3cchan->host;
+	struct s3c24xx_dma_phy *phy;
+
+	phy = s3c24xx_dma_get_phy(s3cchan);
+	if (!phy) {
+		dev_dbg(&s3cdma->pdev->dev, "no physical channel available for xfer on %s\n",
+			s3cchan->name);
+		s3cchan->state = S3C24XX_DMA_CHAN_WAITING;
+		return;
+	}
+
+	dev_dbg(&s3cdma->pdev->dev, "allocated physical channel %d for xfer on %s\n",
+		phy->id, s3cchan->name);
+
+	s3cchan->phy = phy;
+	s3cchan->state = S3C24XX_DMA_CHAN_RUNNING;
+
+	s3c24xx_dma_start_next_txd(s3cchan);
+}
+
+static void s3c24xx_dma_phy_reassign_start(struct s3c24xx_dma_phy *phy,
+	struct s3c24xx_dma_chan *s3cchan)
+{
+	struct s3c24xx_dma_engine *s3cdma = s3cchan->host;
+
+	dev_dbg(&s3cdma->pdev->dev, "reassigned physical channel %d for xfer on %s\n",
+		phy->id, s3cchan->name);
+
+	/*
+	 * We do this without taking the lock; we're really only concerned
+	 * about whether this pointer is NULL or not, and we're guaranteed
+	 * that this will only be called when it _already_ is non-NULL.
+	 */
+	phy->serving = s3cchan;
+	s3cchan->phy = phy;
+	s3cchan->state = S3C24XX_DMA_CHAN_RUNNING;
+	s3c24xx_dma_start_next_txd(s3cchan);
+}
+
+/*
+ * Free a physical DMA channel, potentially reallocating it to another
+ * virtual channel if we have any pending.
+ */
+static void s3c24xx_dma_phy_free(struct s3c24xx_dma_chan *s3cchan)
+{
+	struct s3c24xx_dma_engine *s3cdma = s3cchan->host;
+	struct s3c24xx_dma_chan *p, *next;
+
+retry:
+	next = NULL;
+
+	/* Find a waiting virtual channel for the next transfer. */
+	list_for_each_entry(p, &s3cdma->memcpy.channels, vc.chan.device_node)
+		if (p->state == S3C24XX_DMA_CHAN_WAITING) {
+			next = p;
+			break;
+		}
+
+	if (!next) {
+		list_for_each_entry(p, &s3cdma->slave.channels,
+				    vc.chan.device_node)
+			if (p->state == S3C24XX_DMA_CHAN_WAITING &&
+				      s3c24xx_dma_phy_valid(p, s3cchan->phy)) {
+				next = p;
+				break;
+			}
+	}
+
+	/* Ensure that the physical channel is stopped */
+	s3c24xx_dma_terminate_phy(s3cchan->phy);
+
+	if (next) {
+		bool success;
+
+		/*
+		 * Eww.  We know this isn't going to deadlock
+		 * but lockdep probably doesn't.
+		 */
+		spin_lock(&next->vc.lock);
+		/* Re-check the state now that we have the lock */
+		success = next->state == S3C24XX_DMA_CHAN_WAITING;
+		if (success)
+			s3c24xx_dma_phy_reassign_start(s3cchan->phy, next);
+		spin_unlock(&next->vc.lock);
+
+		/* If the state changed, try to find another channel */
+		if (!success)
+			goto retry;
+	} else {
+		/* No more jobs, so free up the physical channel */
+		s3c24xx_dma_put_phy(s3cchan->phy);
+	}
+
+	s3cchan->phy = NULL;
+	s3cchan->state = S3C24XX_DMA_CHAN_IDLE;
+}
+
+static void s3c24xx_dma_unmap_buffers(struct s3c24xx_txd *txd)
+{
+	struct device *dev = txd->vd.tx.chan->device->dev;
+	struct s3c24xx_sg *dsg;
+
+	if (!(txd->vd.tx.flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
+		if (txd->vd.tx.flags & DMA_COMPL_SRC_UNMAP_SINGLE)
+			list_for_each_entry(dsg, &txd->dsg_list, node)
+				dma_unmap_single(dev, dsg->src_addr, dsg->len,
+						DMA_TO_DEVICE);
+		else {
+			list_for_each_entry(dsg, &txd->dsg_list, node)
+				dma_unmap_page(dev, dsg->src_addr, dsg->len,
+						DMA_TO_DEVICE);
+		}
+	}
+
+	if (!(txd->vd.tx.flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
+		if (txd->vd.tx.flags & DMA_COMPL_DEST_UNMAP_SINGLE)
+			list_for_each_entry(dsg, &txd->dsg_list, node)
+				dma_unmap_single(dev, dsg->dst_addr, dsg->len,
+						DMA_FROM_DEVICE);
+		else
+			list_for_each_entry(dsg, &txd->dsg_list, node)
+				dma_unmap_page(dev, dsg->dst_addr, dsg->len,
+						DMA_FROM_DEVICE);
+	}
+}
+
+static void s3c24xx_dma_desc_free(struct virt_dma_desc *vd)
+{
+	struct s3c24xx_txd *txd = to_s3c24xx_txd(&vd->tx);
+	struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(vd->tx.chan);
+
+	if (!s3cchan->slave)
+		s3c24xx_dma_unmap_buffers(txd);
+
+	s3c24xx_dma_free_txd(txd);
+}
+
+static irqreturn_t s3c24xx_dma_irq(int irq, void *data)
+{
+	struct s3c24xx_dma_phy *phy = data;
+	struct s3c24xx_dma_chan *s3cchan = phy->serving;
+	struct s3c24xx_txd *txd;
+
+	dev_dbg(&phy->host->pdev->dev, "interrupt on channel %d\n", phy->id);
+
+	/*
+	 * Interrupts happen to notify the completion of a transfer and the
+	 * channel should have moved into its stop state already on its own.
+	 * Therefore interrupts on channels not bound to a virtual channel
+	 * should never happen. Nevertheless send a terminate command to the
+	 * channel if the unlikely case happens.
+	 */
+	if (unlikely(!s3cchan)) {
+		dev_err(&phy->host->pdev->dev, "interrupt on unused channel %d\n",
+			phy->id);
+
+		s3c24xx_dma_terminate_phy(phy);
+
+		return IRQ_HANDLED;
+	}
+
+	spin_lock(&s3cchan->vc.lock);
+	txd = s3cchan->at;
+	if (txd) {
+		/* when more sg's are in this txd, start the next one */
+		if (!list_is_last(txd->at, &txd->dsg_list)) {
+			txd->at = txd->at->next;
+			s3c24xx_dma_start_next_sg(s3cchan, txd);
+		} else {
+			s3cchan->at = NULL;
+			vchan_cookie_complete(&txd->vd);
+
+			/*
+			 * And start the next descriptor (if any),
+			 * otherwise free this channel.
+			 */
+			if (vchan_next_desc(&s3cchan->vc))
+				s3c24xx_dma_start_next_txd(s3cchan);
+			else
+				s3c24xx_dma_phy_free(s3cchan);
+		}
+	}
+	spin_unlock(&s3cchan->vc.lock);
+
+	return IRQ_HANDLED;
+}
+
+/*
+ * The DMA ENGINE API
+ */
+
+static int s3c24xx_dma_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
+			 unsigned long arg)
+{
+	struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan);
+	struct s3c24xx_dma_engine *s3cdma = s3cchan->host;
+	unsigned long flags;
+	int ret = 0;
+
+	spin_lock_irqsave(&s3cchan->vc.lock, flags);
+
+	switch (cmd) {
+	case DMA_SLAVE_CONFIG:
+		ret = s3c24xx_dma_set_runtime_config(s3cchan,
+					      (struct dma_slave_config *)arg);
+		break;
+	case DMA_TERMINATE_ALL:
+		if (!s3cchan->phy && !s3cchan->at) {
+			dev_err(&s3cdma->pdev->dev, "trying to terminate already stopped channel %d\n",
+				s3cchan->id);
+			ret = -EINVAL;
+			break;
+		}
+
+		s3cchan->state = S3C24XX_DMA_CHAN_IDLE;
+
+		 /* Mark physical channel as free */
+		if (s3cchan->phy)
+			s3c24xx_dma_phy_free(s3cchan);
+
+		/* Dequeue current job */
+		if (s3cchan->at) {
+			s3c24xx_dma_desc_free(&s3cchan->at->vd);
+			s3cchan->at = NULL;
+		}
+
+		/* Dequeue jobs not yet fired as well */
+		s3c24xx_dma_free_txd_list(s3cdma, s3cchan);
+		break;
+	default:
+		/* Unknown command */
+		ret = -ENXIO;
+		break;
+	}
+
+	spin_unlock_irqrestore(&s3cchan->vc.lock, flags);
+
+	return ret;
+}
+
+static int s3c24xx_dma_alloc_chan_resources(struct dma_chan *chan)
+{
+	return 0;
+}
+
+static void s3c24xx_dma_free_chan_resources(struct dma_chan *chan)
+{
+	/* Ensure all queued descriptors are freed */
+	vchan_free_chan_resources(to_virt_chan(chan));
+}
+
+static enum dma_status s3c24xx_dma_tx_status(struct dma_chan *chan,
+		dma_cookie_t cookie, struct dma_tx_state *txstate)
+{
+	struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan);
+	struct s3c24xx_txd *txd;
+	struct s3c24xx_sg *dsg;
+	struct virt_dma_desc *vd;
+	unsigned long flags;
+	enum dma_status ret;
+	size_t bytes = 0;
+
+	spin_lock_irqsave(&s3cchan->vc.lock, flags);
+	ret = dma_cookie_status(chan, cookie, txstate);
+	if (ret == DMA_SUCCESS) {
+		spin_unlock_irqrestore(&s3cchan->vc.lock, flags);
+		return ret;
+	}
+
+	/*
+	 * There's no point calculating the residue if there's
+	 * no txstate to store the value.
+	 */
+	if (!txstate) {
+		spin_unlock_irqrestore(&s3cchan->vc.lock, flags);
+		return ret;
+	}
+
+	vd = vchan_find_desc(&s3cchan->vc, cookie);
+	if (vd) {
+		/* On the issued list, so hasn't been processed yet */
+		txd = to_s3c24xx_txd(&vd->tx);
+
+		list_for_each_entry(dsg, &txd->dsg_list, node)
+			bytes += dsg->len;
+	} else {
+		/*
+		 * Currently running, so sum over the pending sg's and
+		 * the currently active one.
+		 */
+		txd = s3cchan->at;
+
+		dsg = list_entry(txd->at, struct s3c24xx_sg, node);
+		list_for_each_entry_from(dsg, &txd->dsg_list, node)
+			bytes += dsg->len;
+
+		bytes += s3c24xx_dma_getbytes_chan(s3cchan);
+	}
+	spin_unlock_irqrestore(&s3cchan->vc.lock, flags);
+
+	/*
+	 * This cookie not complete yet
+	 * Get number of bytes left in the active transactions and queue
+	 */
+	dma_set_residue(txstate, bytes);
+
+	/* Whether waiting or running, we're in progress */
+	return ret;
+}
+
+/*
+ * Initialize a descriptor to be used by memcpy submit
+ */
+static struct dma_async_tx_descriptor *s3c24xx_dma_prep_memcpy(
+		struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
+		size_t len, unsigned long flags)
+{
+	struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan);
+	struct s3c24xx_dma_engine *s3cdma = s3cchan->host;
+	struct s3c24xx_txd *txd;
+	struct s3c24xx_sg *dsg;
+	int src_mod, dest_mod;
+
+	dev_dbg(&s3cdma->pdev->dev, "prepare memcpy of %d bytes from %s\n",
+			len, s3cchan->name);
+
+	if ((len & S3C24XX_DCON_TC_MASK) != len) {
+		dev_err(&s3cdma->pdev->dev, "memcpy size %d to large\n", len);
+		return NULL;
+	}
+
+	txd = s3c24xx_dma_get_txd();
+	if (!txd)
+		return NULL;
+
+	dsg = kzalloc(sizeof(*dsg), GFP_NOWAIT);
+	if (!dsg) {
+		s3c24xx_dma_free_txd(txd);
+		return NULL;
+	}
+	list_add_tail(&dsg->node, &txd->dsg_list);
+
+	dsg->src_addr = src;
+	dsg->dst_addr = dest;
+	dsg->len = len;
+
+	/*
+	 * Determine a suitable transfer width.
+	 * The DMA controller cannot fetch/store information which is not
+	 * naturally aligned on the bus, i.e., a 4 byte fetch must start at
+	 * an address divisible by 4 - more generally addr % width must be 0.
+	 */
+	src_mod = src % 4;
+	dest_mod = dest % 4;
+	switch (len % 4) {
+	case 0:
+		txd->width = (src_mod == 0 && dest_mod == 0) ? 4 : 1;
+		break;
+	case 2:
+		txd->width = ((src_mod == 2 || src_mod == 0) &&
+			      (dest_mod == 2 || dest_mod == 0)) ? 2 : 1;
+		break;
+	default:
+		txd->width = 1;
+		break;
+	}
+
+	txd->disrcc = S3C24XX_DISRCC_LOC_AHB | S3C24XX_DISRCC_INC_INCREMENT;
+	txd->didstc = S3C24XX_DIDSTC_LOC_AHB | S3C24XX_DIDSTC_INC_INCREMENT;
+	txd->dcon |= S3C24XX_DCON_DEMAND | S3C24XX_DCON_SYNC_HCLK |
+		     S3C24XX_DCON_SERV_WHOLE;
+
+	return vchan_tx_prep(&s3cchan->vc, &txd->vd, flags);
+}
+
+static struct dma_async_tx_descriptor *s3c24xx_dma_prep_slave_sg(
+		struct dma_chan *chan, struct scatterlist *sgl,
+		unsigned int sg_len, enum dma_transfer_direction direction,
+		unsigned long flags, void *context)
+{
+	struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan);
+	struct s3c24xx_dma_engine *s3cdma = s3cchan->host;
+	const struct s3c24xx_dma_platdata *pdata = s3cdma->pdata;
+	struct s3c24xx_dma_channel *cdata = &pdata->channels[s3cchan->id];
+	struct s3c24xx_txd *txd;
+	struct s3c24xx_sg *dsg;
+	struct scatterlist *sg;
+	dma_addr_t slave_addr;
+	u32 hwcfg = 0;
+	int tmp;
+
+	dev_dbg(&s3cdma->pdev->dev, "prepare transaction of %d bytes from %s\n",
+			sg_dma_len(sgl), s3cchan->name);
+
+	txd = s3c24xx_dma_get_txd();
+	if (!txd)
+		return NULL;
+
+	if (cdata->handshake)
+		txd->dcon |= S3C24XX_DCON_HANDSHAKE;
+
+	switch (cdata->bus) {
+	case S3C24XX_DMA_APB:
+		txd->dcon |= S3C24XX_DCON_SYNC_PCLK;
+		hwcfg |= S3C24XX_DISRCC_LOC_APB;
+		break;
+	case S3C24XX_DMA_AHB:
+		txd->dcon |= S3C24XX_DCON_SYNC_HCLK;
+		hwcfg |= S3C24XX_DISRCC_LOC_AHB;
+		break;
+	}
+
+	/*
+	 * Always assume our peripheral desintation is a fixed
+	 * address in memory.
+	 */
+	hwcfg |= S3C24XX_DISRCC_INC_FIXED;
+
+	/*
+	 * Individual dma operations are requested by the slave,
+	 * so serve only single atomic operations (S3C24XX_DCON_SERV_SINGLE).
+	 */
+	txd->dcon |= S3C24XX_DCON_SERV_SINGLE;
+
+	if (direction == DMA_MEM_TO_DEV) {
+		txd->disrcc = S3C24XX_DISRCC_LOC_AHB |
+			      S3C24XX_DISRCC_INC_INCREMENT;
+		txd->didstc = hwcfg;
+		slave_addr = s3cchan->cfg.dst_addr;
+		txd->width = s3cchan->cfg.dst_addr_width;
+	} else if (direction == DMA_DEV_TO_MEM) {
+		txd->disrcc = hwcfg;
+		txd->didstc = S3C24XX_DIDSTC_LOC_AHB |
+			      S3C24XX_DIDSTC_INC_INCREMENT;
+		slave_addr = s3cchan->cfg.src_addr;
+		txd->width = s3cchan->cfg.src_addr_width;
+	} else {
+		s3c24xx_dma_free_txd(txd);
+		dev_err(&s3cdma->pdev->dev,
+			"direction %d unsupported\n", direction);
+		return NULL;
+	}
+
+	for_each_sg(sgl, sg, sg_len, tmp) {
+		dsg = kzalloc(sizeof(*dsg), GFP_NOWAIT);
+		if (!dsg) {
+			s3c24xx_dma_free_txd(txd);
+			return NULL;
+		}
+		list_add_tail(&dsg->node, &txd->dsg_list);
+
+		dsg->len = sg_dma_len(sg);
+		if (direction == DMA_MEM_TO_DEV) {
+			dsg->src_addr = sg_dma_address(sg);
+			dsg->dst_addr = slave_addr;
+		} else { /* DMA_DEV_TO_MEM */
+			dsg->src_addr = slave_addr;
+			dsg->dst_addr = sg_dma_address(sg);
+		}
+		break;
+	}
+
+	return vchan_tx_prep(&s3cchan->vc, &txd->vd, flags);
+}
+
+/*
+ * Slave transactions callback to the slave device to allow
+ * synchronization of slave DMA signals with the DMAC enable
+ */
+static void s3c24xx_dma_issue_pending(struct dma_chan *chan)
+{
+	struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan);
+	unsigned long flags;
+
+	spin_lock_irqsave(&s3cchan->vc.lock, flags);
+	if (vchan_issue_pending(&s3cchan->vc)) {
+		if (!s3cchan->phy && s3cchan->state != S3C24XX_DMA_CHAN_WAITING)
+			s3c24xx_dma_phy_alloc_and_start(s3cchan);
+	}
+	spin_unlock_irqrestore(&s3cchan->vc.lock, flags);
+}
+
+/*
+ * Bringup and teardown
+ */
+
+/*
+ * Initialise the DMAC memcpy/slave channels.
+ * Make a local wrapper to hold required data
+ */
+static int s3c24xx_dma_init_virtual_channels(struct s3c24xx_dma_engine *s3cdma,
+		struct dma_device *dmadev, unsigned int channels, bool slave)
+{
+	struct s3c24xx_dma_chan *chan;
+	int i;
+
+	INIT_LIST_HEAD(&dmadev->channels);
+
+	/*
+	 * Register as many many memcpy as we have physical channels,
+	 * we won't always be able to use all but the code will have
+	 * to cope with that situation.
+	 */
+	for (i = 0; i < channels; i++) {
+		chan = devm_kzalloc(dmadev->dev, sizeof(*chan), GFP_KERNEL);
+		if (!chan) {
+			dev_err(dmadev->dev,
+				"%s no memory for channel\n", __func__);
+			return -ENOMEM;
+		}
+
+		chan->id = i;
+		chan->host = s3cdma;
+		chan->state = S3C24XX_DMA_CHAN_IDLE;
+
+		if (slave) {
+			chan->slave = true;
+			chan->name = kasprintf(GFP_KERNEL, "slave%d", i);
+			if (!chan->name)
+				return -ENOMEM;
+		} else {
+			chan->name = kasprintf(GFP_KERNEL, "memcpy%d", i);
+			if (!chan->name)
+				return -ENOMEM;
+		}
+		dev_dbg(dmadev->dev,
+			 "initialize virtual channel \"%s\"\n",
+			 chan->name);
+
+		chan->vc.desc_free = s3c24xx_dma_desc_free;
+		vchan_init(&chan->vc, dmadev);
+	}
+	dev_info(dmadev->dev, "initialized %d virtual %s channels\n",
+		 i, slave ? "slave" : "memcpy");
+	return i;
+}
+
+static void s3c24xx_dma_free_virtual_channels(struct dma_device *dmadev)
+{
+	struct s3c24xx_dma_chan *chan = NULL;
+	struct s3c24xx_dma_chan *next;
+
+	list_for_each_entry_safe(chan,
+				 next, &dmadev->channels, vc.chan.device_node)
+		list_del(&chan->vc.chan.device_node);
+}
+
+/* s3c2412 and s3c2413 have a 0x40 stride and dmareqsel mechanism */
+static struct soc_data soc_s3c2412 = {
+	.stride = 0x40,
+	.has_reqsel = true,
+	.has_clocks = true,
+};
+
+/* s3c2443 and following have a 0x100 stride and dmareqsel mechanism */
+static struct soc_data soc_s3c2443 = {
+	.stride = 0x100,
+	.has_reqsel = true,
+	.has_clocks = true,
+};
+
+static struct platform_device_id s3c24xx_dma_driver_ids[] = {
+	{
+		.name		= "s3c2412-dma",
+		.driver_data	= (kernel_ulong_t)&soc_s3c2412,
+	}, {
+		.name		= "s3c2443-dma",
+		.driver_data	= (kernel_ulong_t)&soc_s3c2443,
+	},
+	{ },
+};
+
+static struct soc_data *s3c24xx_dma_get_soc_data(struct platform_device *pdev)
+{
+	return (struct soc_data *)
+			 platform_get_device_id(pdev)->driver_data;
+}
+
+static int s3c24xx_dma_probe(struct platform_device *pdev)
+{
+	const struct s3c24xx_dma_platdata *pdata = dev_get_platdata(&pdev->dev);
+	struct s3c24xx_dma_engine *s3cdma;
+	struct soc_data *sdata;
+	struct resource *res;
+	int ret;
+	int i;
+
+	if (!pdata) {
+		dev_err(&pdev->dev, "platform data missing\n");
+		return -ENODEV;
+	}
+
+	/* Basic sanity check */
+	if (pdata->num_phy_channels > MAX_DMA_CHANNELS) {
+		dev_err(&pdev->dev, "to many dma channels %d, max %d\n",
+			pdata->num_phy_channels, MAX_DMA_CHANNELS);
+		return -EINVAL;
+	}
+
+	sdata = s3c24xx_dma_get_soc_data(pdev);
+	if (!sdata)
+		return -EINVAL;
+
+	s3cdma = devm_kzalloc(&pdev->dev, sizeof(*s3cdma), GFP_KERNEL);
+	if (!s3cdma)
+		return -ENOMEM;
+
+	s3cdma->pdev = pdev;
+	s3cdma->pdata = pdata;
+	s3cdma->sdata = sdata;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	s3cdma->base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(s3cdma->base))
+		return PTR_ERR(s3cdma->base);
+
+	s3cdma->phy_chans = devm_kzalloc(&pdev->dev,
+					      sizeof(struct s3c24xx_dma_phy) *
+							pdata->num_phy_channels,
+					      GFP_KERNEL);
+	if (!s3cdma->phy_chans)
+		return -ENOMEM;
+
+	/* aquire irqs and clocks for all physical channels */
+	for (i = 0; i < pdata->num_phy_channels; i++) {
+		struct s3c24xx_dma_phy *phy = &s3cdma->phy_chans[i];
+		char clk_name[6];
+
+		phy->id = i;
+		phy->base = s3cdma->base + (i * sdata->stride);
+		phy->host = s3cdma;
+
+		phy->irq = platform_get_irq(pdev, i);
+		if (phy->irq < 0) {
+			dev_err(&pdev->dev, "failed to get irq %d, err %d\n",
+				i, phy->irq);
+			continue;
+		}
+
+		ret = devm_request_irq(&pdev->dev, phy->irq, s3c24xx_dma_irq,
+				       0, pdev->name, phy);
+		if (ret) {
+			dev_err(&pdev->dev, "Unable to request irq for channel %d, error %d\n",
+				i, ret);
+			continue;
+		}
+
+		if (sdata->has_clocks) {
+			sprintf(clk_name, "dma.%d", i);
+			phy->clk = devm_clk_get(&pdev->dev, clk_name);
+			if (IS_ERR(phy->clk) && sdata->has_clocks) {
+				dev_err(&pdev->dev, "unable to aquire clock for channel %d, error %lu",
+					i, PTR_ERR(phy->clk));
+				continue;
+			}
+
+			ret = clk_prepare(phy->clk);
+			if (ret) {
+				dev_err(&pdev->dev, "clock for phy %d failed, error %d\n",
+					i, ret);
+				continue;
+			}
+		}
+
+		spin_lock_init(&phy->lock);
+		phy->valid = true;
+
+		dev_dbg(&pdev->dev, "physical channel %d is %s\n",
+			i, s3c24xx_dma_phy_busy(phy) ? "BUSY" : "FREE");
+	}
+
+	/* Initialize memcpy engine */
+	dma_cap_set(DMA_MEMCPY, s3cdma->memcpy.cap_mask);
+	dma_cap_set(DMA_PRIVATE, s3cdma->memcpy.cap_mask);
+	s3cdma->memcpy.dev = &pdev->dev;
+	s3cdma->memcpy.device_alloc_chan_resources =
+					s3c24xx_dma_alloc_chan_resources;
+	s3cdma->memcpy.device_free_chan_resources =
+					s3c24xx_dma_free_chan_resources;
+	s3cdma->memcpy.device_prep_dma_memcpy = s3c24xx_dma_prep_memcpy;
+	s3cdma->memcpy.device_tx_status = s3c24xx_dma_tx_status;
+	s3cdma->memcpy.device_issue_pending = s3c24xx_dma_issue_pending;
+	s3cdma->memcpy.device_control = s3c24xx_dma_control;
+
+	/* Initialize slave engine for SoC internal dedicated peripherals */
+	dma_cap_set(DMA_SLAVE, s3cdma->slave.cap_mask);
+	dma_cap_set(DMA_PRIVATE, s3cdma->slave.cap_mask);
+	s3cdma->slave.dev = &pdev->dev;
+	s3cdma->slave.device_alloc_chan_resources =
+					s3c24xx_dma_alloc_chan_resources;
+	s3cdma->slave.device_free_chan_resources =
+					s3c24xx_dma_free_chan_resources;
+	s3cdma->slave.device_tx_status = s3c24xx_dma_tx_status;
+	s3cdma->slave.device_issue_pending = s3c24xx_dma_issue_pending;
+	s3cdma->slave.device_prep_slave_sg = s3c24xx_dma_prep_slave_sg;
+	s3cdma->slave.device_control = s3c24xx_dma_control;
+
+	/* Register as many memcpy channels as there are physical channels */
+	ret = s3c24xx_dma_init_virtual_channels(s3cdma, &s3cdma->memcpy,
+						pdata->num_phy_channels, false);
+	if (ret <= 0) {
+		dev_warn(&pdev->dev,
+			 "%s failed to enumerate memcpy channels - %d\n",
+			 __func__, ret);
+		goto err_memcpy;
+	}
+
+	/* Register slave channels */
+	ret = s3c24xx_dma_init_virtual_channels(s3cdma, &s3cdma->slave,
+				pdata->num_channels, true);
+	if (ret <= 0) {
+		dev_warn(&pdev->dev,
+			"%s failed to enumerate slave channels - %d\n",
+				__func__, ret);
+		goto err_slave;
+	}
+
+	ret = dma_async_device_register(&s3cdma->memcpy);
+	if (ret) {
+		dev_warn(&pdev->dev,
+			"%s failed to register memcpy as an async device - %d\n",
+			__func__, ret);
+		goto err_memcpy_reg;
+	}
+
+	ret = dma_async_device_register(&s3cdma->slave);
+	if (ret) {
+		dev_warn(&pdev->dev,
+			"%s failed to register slave as an async device - %d\n",
+			__func__, ret);
+		goto err_slave_reg;
+	}
+
+	platform_set_drvdata(pdev, s3cdma);
+	dev_info(&pdev->dev, "Loaded dma driver with %d physical channels\n",
+		 pdata->num_phy_channels);
+
+	return 0;
+
+err_slave_reg:
+	dma_async_device_unregister(&s3cdma->memcpy);
+err_memcpy_reg:
+	s3c24xx_dma_free_virtual_channels(&s3cdma->slave);
+err_slave:
+	s3c24xx_dma_free_virtual_channels(&s3cdma->memcpy);
+err_memcpy:
+	if (sdata->has_clocks)
+		for (i = 0; i < pdata->num_phy_channels; i++) {
+			struct s3c24xx_dma_phy *phy = &s3cdma->phy_chans[i];
+			if (phy->valid)
+				clk_unprepare(phy->clk);
+		}
+
+	return ret;
+}
+
+static int s3c24xx_dma_remove(struct platform_device *pdev)
+{
+	const struct s3c24xx_dma_platdata *pdata = dev_get_platdata(&pdev->dev);
+	struct s3c24xx_dma_engine *s3cdma = platform_get_drvdata(pdev);
+	struct soc_data *sdata = s3c24xx_dma_get_soc_data(pdev);
+	int i;
+
+	dma_async_device_unregister(&s3cdma->slave);
+	dma_async_device_unregister(&s3cdma->memcpy);
+
+	s3c24xx_dma_free_virtual_channels(&s3cdma->slave);
+	s3c24xx_dma_free_virtual_channels(&s3cdma->memcpy);
+
+	if (sdata->has_clocks)
+		for (i = 0; i < pdata->num_phy_channels; i++) {
+			struct s3c24xx_dma_phy *phy = &s3cdma->phy_chans[i];
+			if (phy->valid)
+				clk_unprepare(phy->clk);
+		}
+
+	return 0;
+}
+
+static struct platform_driver s3c24xx_dma_driver = {
+	.driver		= {
+		.name	= "s3c24xx-dma",
+		.owner	= THIS_MODULE,
+	},
+	.id_table	= s3c24xx_dma_driver_ids,
+	.probe		= s3c24xx_dma_probe,
+	.remove		= s3c24xx_dma_remove,
+};
+
+module_platform_driver(s3c24xx_dma_driver);
+
+bool s3c24xx_dma_filter(struct dma_chan *chan, void *param)
+{
+	struct s3c24xx_dma_chan *s3cchan;
+
+	if (chan->device->dev->driver != &s3c24xx_dma_driver.driver)
+		return false;
+
+	s3cchan = to_s3c24xx_dma_chan(chan);
+
+	return s3cchan->id == (int)param;
+}
+EXPORT_SYMBOL(s3c24xx_dma_filter);
+
+MODULE_DESCRIPTION("S3C24XX DMA Driver");
+MODULE_AUTHOR("Heiko Stuebner");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/platform_data/dma-s3c24xx.h b/include/linux/platform_data/dma-s3c24xx.h
new file mode 100644
index 000000000000..5a0cfffe3bbb
--- /dev/null
+++ b/include/linux/platform_data/dma-s3c24xx.h
@@ -0,0 +1,43 @@
+/*
+ * S3C24XX DMA handling
+ *
+ * Copyright (c) 2013 Heiko Stuebner <heiko@sntech.de>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+enum s3c24xx_dma_bus {
+	S3C24XX_DMA_APB,
+	S3C24XX_DMA_AHB,
+};
+
+/**
+ * @bus: on which bus does the peripheral reside - AHB or APB.
+ * @handshake: is a handshake with the peripheral necessary
+ * @chansel: channel selection information, depending on variant; reqsel for
+ *	     s3c2443 and later and channel-selection map for earlier SoCs
+ *	     see CHANSEL doc in s3c2443-dma.c
+ */
+struct s3c24xx_dma_channel {
+	enum s3c24xx_dma_bus bus;
+	bool handshake;
+	u16 chansel;
+};
+
+/**
+ * struct s3c24xx_dma_platdata - platform specific settings
+ * @num_phy_channels: number of physical channels
+ * @channels: array of virtual channel descriptions
+ * @num_channels: number of virtual channels
+ */
+struct s3c24xx_dma_platdata {
+	int num_phy_channels;
+	struct s3c24xx_dma_channel *channels;
+	int num_channels;
+};
+
+struct dma_chan;
+bool s3c24xx_dma_filter(struct dma_chan *chan, void *param);
-- 
cgit v1.2.3


From f61027426a5bc7093aa8359a411b053a35bb4b68 Mon Sep 17 00:00:00 2001
From: Mike Turquette <mturquette@linaro.org>
Date: Mon, 7 Oct 2013 23:12:13 -0700
Subject: clk: of: helper for determining number of parent clocks

Walks the "clocks" array of parent clock phandles and returns the
number.

Signed-off-by: Mike Turquette <mturquette@linaro.org>
---
 drivers/clk/clk.c            | 6 ++++++
 include/linux/clk-provider.h | 1 +
 2 files changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index 32e2fed6d143..2cf2ea6b77a1 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -2202,6 +2202,12 @@ struct clk *of_clk_get_from_provider(struct of_phandle_args *clkspec)
 	return clk;
 }
 
+int of_clk_get_parent_count(struct device_node *np)
+{
+	return of_count_phandle_with_args(np, "clocks", "#clock-cells");
+}
+EXPORT_SYMBOL_GPL(of_clk_get_parent_count);
+
 const char *of_clk_get_parent_name(struct device_node *np, int index)
 {
 	struct of_phandle_args clkspec;
diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index 73bdb69f0c08..7e59253b8603 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -472,6 +472,7 @@ void of_clk_del_provider(struct device_node *np);
 struct clk *of_clk_src_simple_get(struct of_phandle_args *clkspec,
 				  void *data);
 struct clk *of_clk_src_onecell_get(struct of_phandle_args *clkspec, void *data);
+int of_clk_get_parent_count(struct device_node *np);
 const char *of_clk_get_parent_name(struct device_node *np, int index);
 
 void of_clk_init(const struct of_device_id *matches);
-- 
cgit v1.2.3


From efe4208f47f907b86f528788da711e8ab9dea44d Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 3 Oct 2013 15:42:29 -0700
Subject: ipv6: make lookups simpler and faster

TCP listener refactoring, part 4 :

To speed up inet lookups, we moved IPv4 addresses from inet to struct
sock_common

Now is time to do the same for IPv6, because it permits us to have fast
lookups for all kind of sockets, including upcoming SYN_RECV.

Getting IPv6 addresses in TCP lookups currently requires two extra cache
lines, plus a dereference (and memory stall).

inet6_sk(sk) does the dereference of inet_sk(__sk)->pinet6

This patch is way bigger than its IPv4 counter part, because for IPv4,
we could add aliases (inet_daddr, inet_rcv_saddr), while on IPv6,
it's not doable easily.

inet6_sk(sk)->daddr becomes sk->sk_v6_daddr
inet6_sk(sk)->rcv_saddr becomes sk->sk_v6_rcv_saddr

And timewait socket also have tw->tw_v6_daddr & tw->tw_v6_rcv_saddr
at the same offset.

We get rid of INET6_TW_MATCH() as INET6_MATCH() is now the generic
macro.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ipv6.h                           | 46 +++-----------------
 include/net/inet6_hashtables.h                 |  5 +--
 include/net/inet_timewait_sock.h               |  4 +-
 include/net/ip.h                               |  2 +-
 include/net/ip6_checksum.h                     |  2 +-
 include/net/sock.h                             |  9 ++++
 net/dccp/ipv6.c                                | 24 +++++------
 net/dccp/ipv6.h                                |  1 -
 net/dccp/minisocks.c                           |  7 +---
 net/ipv4/inet_diag.c                           | 35 +++++++---------
 net/ipv4/ping.c                                | 15 ++++---
 net/ipv4/tcp_metrics.c                         | 10 ++---
 net/ipv4/tcp_minisocks.c                       |  7 +---
 net/ipv4/tcp_probe.c                           | 29 +++++--------
 net/ipv4/tcp_timer.c                           |  3 +-
 net/ipv6/af_inet6.c                            | 10 ++---
 net/ipv6/datagram.c                            | 25 ++++++-----
 net/ipv6/inet6_connection_sock.c               |  7 ++--
 net/ipv6/inet6_hashtables.c                    | 58 +++++++++-----------------
 net/ipv6/ipv6_sockglue.c                       |  7 ++--
 net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c |  4 +-
 net/ipv6/ping.c                                |  2 +-
 net/ipv6/raw.c                                 | 17 ++++----
 net/ipv6/tcp_ipv6.c                            | 44 ++++++++++---------
 net/ipv6/udp.c                                 | 48 ++++++++++-----------
 net/l2tp/l2tp_core.c                           | 10 ++---
 net/l2tp/l2tp_debugfs.c                        |  5 ++-
 net/l2tp/l2tp_ip6.c                            | 16 +++----
 net/l2tp/l2tp_netlink.c                        |  4 +-
 net/l2tp/l2tp_ppp.c                            | 12 +++---
 net/netfilter/xt_TPROXY.c                      |  2 +-
 net/netfilter/xt_socket.c                      |  2 +-
 net/sctp/ipv6.c                                | 22 +++++-----
 net/sunrpc/svcsock.c                           |  2 +-
 security/lsm_audit.c                           |  5 +--
 35 files changed, 213 insertions(+), 288 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index b7f1f3bb346d..35f6c1b562c4 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -141,8 +141,6 @@ struct ipv6_fl_socklist;
  */
 struct ipv6_pinfo {
 	struct in6_addr 	saddr;
-	struct in6_addr 	rcv_saddr;
-	struct in6_addr		daddr;
 	struct in6_pktinfo	sticky_pktinfo;
 	const struct in6_addr		*daddr_cache;
 #ifdef CONFIG_IPV6_SUBTREES
@@ -256,22 +254,10 @@ struct tcp6_sock {
 
 extern int inet6_sk_rebuild_header(struct sock *sk);
 
-struct inet6_timewait_sock {
-	struct in6_addr tw_v6_daddr;
-	struct in6_addr	tw_v6_rcv_saddr;
-};
-
 struct tcp6_timewait_sock {
 	struct tcp_timewait_sock   tcp6tw_tcp;
-	struct inet6_timewait_sock tcp6tw_inet6;
 };
 
-static inline struct inet6_timewait_sock *inet6_twsk(const struct sock *sk)
-{
-	return (struct inet6_timewait_sock *)(((u8 *)sk) +
-					      inet_twsk(sk)->tw_ipv6_offset);
-}
-
 #if IS_ENABLED(CONFIG_IPV6)
 static inline struct ipv6_pinfo * inet6_sk(const struct sock *__sk)
 {
@@ -321,21 +307,11 @@ static inline void inet_sk_copy_descendant(struct sock *sk_to,
 #define __ipv6_only_sock(sk)	(inet6_sk(sk)->ipv6only)
 #define ipv6_only_sock(sk)	((sk)->sk_family == PF_INET6 && __ipv6_only_sock(sk))
 
-static inline u16 inet6_tw_offset(const struct proto *prot)
-{
-	return prot->twsk_prot->twsk_obj_size -
-			sizeof(struct inet6_timewait_sock);
-}
-
-static inline struct in6_addr *__inet6_rcv_saddr(const struct sock *sk)
+static inline const struct in6_addr *inet6_rcv_saddr(const struct sock *sk)
 {
-	return likely(sk->sk_state != TCP_TIME_WAIT) ?
-		&inet6_sk(sk)->rcv_saddr : &inet6_twsk(sk)->tw_v6_rcv_saddr;
-}
-
-static inline struct in6_addr *inet6_rcv_saddr(const struct sock *sk)
-{
-	return sk->sk_family == AF_INET6 ? __inet6_rcv_saddr(sk) : NULL;
+	if (sk->sk_family == AF_INET6)
+		return &sk->sk_v6_rcv_saddr;
+	return NULL;
 }
 
 static inline int inet_v6_ipv6only(const struct sock *sk)
@@ -363,7 +339,6 @@ static inline struct raw6_sock *raw6_sk(const struct sock *sk)
 	return NULL;
 }
 
-#define __inet6_rcv_saddr(__sk)	NULL
 #define inet6_rcv_saddr(__sk)	NULL
 #define tcp_twsk_ipv6only(__sk)		0
 #define inet_v6_ipv6only(__sk)		0
@@ -372,19 +347,10 @@ static inline struct raw6_sock *raw6_sk(const struct sock *sk)
 #define INET6_MATCH(__sk, __net, __saddr, __daddr, __ports, __dif)	\
 	(((__sk)->sk_portpair == (__ports))			&&	\
 	 ((__sk)->sk_family == AF_INET6)			&&	\
-	 ipv6_addr_equal(&inet6_sk(__sk)->daddr, (__saddr))	&&	\
-	 ipv6_addr_equal(&inet6_sk(__sk)->rcv_saddr, (__daddr))	&&	\
+	 ipv6_addr_equal(&(__sk)->sk_v6_daddr, (__saddr))		&&	\
+	 ipv6_addr_equal(&(__sk)->sk_v6_rcv_saddr, (__daddr))	&&	\
 	 (!(__sk)->sk_bound_dev_if	||				\
 	   ((__sk)->sk_bound_dev_if == (__dif))) 		&&	\
 	 net_eq(sock_net(__sk), (__net)))
 
-#define INET6_TW_MATCH(__sk, __net, __saddr, __daddr, __ports, __dif)	   \
-	(((__sk)->sk_portpair == (__ports))				&& \
-	 ((__sk)->sk_family == AF_INET6)				&& \
-	 ipv6_addr_equal(&inet6_twsk(__sk)->tw_v6_daddr, (__saddr))	&& \
-	 ipv6_addr_equal(&inet6_twsk(__sk)->tw_v6_rcv_saddr, (__daddr)) && \
-	 (!(__sk)->sk_bound_dev_if	||				   \
-	  ((__sk)->sk_bound_dev_if == (__dif)))				&& \
-	 net_eq(sock_net(__sk), (__net)))
-
 #endif /* _IPV6_H */
diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h
index f52fa88feb64..a105d1a2fc00 100644
--- a/include/net/inet6_hashtables.h
+++ b/include/net/inet6_hashtables.h
@@ -43,9 +43,8 @@ static inline unsigned int inet6_ehashfn(struct net *net,
 static inline int inet6_sk_ehashfn(const struct sock *sk)
 {
 	const struct inet_sock *inet = inet_sk(sk);
-	const struct ipv6_pinfo *np = inet6_sk(sk);
-	const struct in6_addr *laddr = &np->rcv_saddr;
-	const struct in6_addr *faddr = &np->daddr;
+	const struct in6_addr *laddr = &sk->sk_v6_rcv_saddr;
+	const struct in6_addr *faddr = &sk->sk_v6_daddr;
 	const __u16 lport = inet->inet_num;
 	const __be16 fport = inet->inet_dport;
 	struct net *net = sock_net(sk);
diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h
index de9e3ab7d43d..b647c6270eb7 100644
--- a/include/net/inet_timewait_sock.h
+++ b/include/net/inet_timewait_sock.h
@@ -116,7 +116,9 @@ struct inet_timewait_sock {
 #define tw_prot			__tw_common.skc_prot
 #define tw_net			__tw_common.skc_net
 #define tw_daddr        	__tw_common.skc_daddr
+#define tw_v6_daddr		__tw_common.skc_v6_daddr
 #define tw_rcv_saddr    	__tw_common.skc_rcv_saddr
+#define tw_v6_rcv_saddr    	__tw_common.skc_v6_rcv_saddr
 #define tw_dport		__tw_common.skc_dport
 #define tw_num			__tw_common.skc_num
 
@@ -133,7 +135,7 @@ struct inet_timewait_sock {
 				tw_transparent  : 1,
 				tw_pad		: 6,	/* 6 bits hole */
 				tw_tos		: 8,
-				tw_ipv6_offset  : 16;
+				tw_pad2		: 16 /* 16 bits hole */
 	kmemcheck_bitfield_end(flags);
 	u32			tw_ttd;
 	struct inet_bind_bucket	*tw_tb;
diff --git a/include/net/ip.h b/include/net/ip.h
index b39ebe5339ac..217bc5bfc6c6 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -374,7 +374,7 @@ static __inline__ void inet_reset_saddr(struct sock *sk)
 		struct ipv6_pinfo *np = inet6_sk(sk);
 
 		memset(&np->saddr, 0, sizeof(np->saddr));
-		memset(&np->rcv_saddr, 0, sizeof(np->rcv_saddr));
+		memset(&sk->sk_v6_rcv_saddr, 0, sizeof(sk->sk_v6_rcv_saddr));
 	}
 #endif
 }
diff --git a/include/net/ip6_checksum.h b/include/net/ip6_checksum.h
index 7686e3f5033d..1944406949ba 100644
--- a/include/net/ip6_checksum.h
+++ b/include/net/ip6_checksum.h
@@ -70,7 +70,7 @@ static inline void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
 {
 	struct ipv6_pinfo *np = inet6_sk(sk);
 
-	__tcp_v6_send_check(skb, &np->saddr, &np->daddr);
+	__tcp_v6_send_check(skb, &np->saddr, &sk->sk_v6_daddr);
 }
 
 int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, int proto);
diff --git a/include/net/sock.h b/include/net/sock.h
index 3f3e48c4704d..7e50df5c71d4 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -191,6 +191,12 @@ struct sock_common {
 #ifdef CONFIG_NET_NS
 	struct net	 	*skc_net;
 #endif
+
+#if IS_ENABLED(CONFIG_IPV6)
+	struct in6_addr		skc_v6_daddr;
+	struct in6_addr		skc_v6_rcv_saddr;
+#endif
+
 	/*
 	 * fields between dontcopy_begin/dontcopy_end
 	 * are not copied in sock_copy()
@@ -314,6 +320,9 @@ struct sock {
 #define sk_bind_node		__sk_common.skc_bind_node
 #define sk_prot			__sk_common.skc_prot
 #define sk_net			__sk_common.skc_net
+#define sk_v6_daddr		__sk_common.skc_v6_daddr
+#define sk_v6_rcv_saddr	__sk_common.skc_v6_rcv_saddr
+
 	socket_lock_t		sk_lock;
 	struct sk_buff_head	sk_receive_queue;
 	/*
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 6cf9f7782ad4..7f075b83128a 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -67,7 +67,7 @@ static inline void dccp_v6_send_check(struct sock *sk, struct sk_buff *skb)
 	struct dccp_hdr *dh = dccp_hdr(skb);
 
 	dccp_csum_outgoing(skb);
-	dh->dccph_checksum = dccp_v6_csum_finish(skb, &np->saddr, &np->daddr);
+	dh->dccph_checksum = dccp_v6_csum_finish(skb, &np->saddr, &sk->sk_v6_daddr);
 }
 
 static inline __u64 dccp_v6_init_sequence(struct sk_buff *skb)
@@ -467,11 +467,11 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
 
 		memcpy(newnp, np, sizeof(struct ipv6_pinfo));
 
-		ipv6_addr_set_v4mapped(newinet->inet_daddr, &newnp->daddr);
+		ipv6_addr_set_v4mapped(newinet->inet_daddr, &newsk->sk_v6_daddr);
 
 		ipv6_addr_set_v4mapped(newinet->inet_saddr, &newnp->saddr);
 
-		newnp->rcv_saddr = newnp->saddr;
+		newsk->sk_v6_rcv_saddr = newnp->saddr;
 
 		inet_csk(newsk)->icsk_af_ops = &dccp_ipv6_mapped;
 		newsk->sk_backlog_rcv = dccp_v4_do_rcv;
@@ -538,9 +538,9 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
 
 	memcpy(newnp, np, sizeof(struct ipv6_pinfo));
 
-	newnp->daddr = ireq6->rmt_addr;
+	newsk->sk_v6_daddr = ireq6->rmt_addr;
 	newnp->saddr = ireq6->loc_addr;
-	newnp->rcv_saddr = ireq6->loc_addr;
+	newsk->sk_v6_rcv_saddr = ireq6->loc_addr;
 	newsk->sk_bound_dev_if = ireq6->iif;
 
 	/* Now IPv6 options...
@@ -885,7 +885,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 			return -EINVAL;
 	}
 
-	np->daddr = usin->sin6_addr;
+	sk->sk_v6_daddr = usin->sin6_addr;
 	np->flow_label = fl6.flowlabel;
 
 	/*
@@ -915,16 +915,16 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 			goto failure;
 		}
 		ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr);
-		ipv6_addr_set_v4mapped(inet->inet_rcv_saddr, &np->rcv_saddr);
+		ipv6_addr_set_v4mapped(inet->inet_rcv_saddr, &sk->sk_v6_rcv_saddr);
 
 		return err;
 	}
 
-	if (!ipv6_addr_any(&np->rcv_saddr))
-		saddr = &np->rcv_saddr;
+	if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
+		saddr = &sk->sk_v6_rcv_saddr;
 
 	fl6.flowi6_proto = IPPROTO_DCCP;
-	fl6.daddr = np->daddr;
+	fl6.daddr = sk->sk_v6_daddr;
 	fl6.saddr = saddr ? *saddr : np->saddr;
 	fl6.flowi6_oif = sk->sk_bound_dev_if;
 	fl6.fl6_dport = usin->sin6_port;
@@ -941,7 +941,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 
 	if (saddr == NULL) {
 		saddr = &fl6.saddr;
-		np->rcv_saddr = *saddr;
+		sk->sk_v6_rcv_saddr = *saddr;
 	}
 
 	/* set the source address */
@@ -963,7 +963,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 		goto late_failure;
 
 	dp->dccps_iss = secure_dccpv6_sequence_number(np->saddr.s6_addr32,
-						      np->daddr.s6_addr32,
+						      sk->sk_v6_daddr.s6_addr32,
 						      inet->inet_sport,
 						      inet->inet_dport);
 	err = dccp_connect(sk);
diff --git a/net/dccp/ipv6.h b/net/dccp/ipv6.h
index 6eef81fdbe56..6604fc3fe953 100644
--- a/net/dccp/ipv6.h
+++ b/net/dccp/ipv6.h
@@ -30,7 +30,6 @@ struct dccp6_request_sock {
 
 struct dccp6_timewait_sock {
 	struct inet_timewait_sock   inet;
-	struct inet6_timewait_sock  tw6;
 };
 
 #endif /* _DCCP_IPV6_H */
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index 662071b249cc..32e80d96d4c0 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -56,12 +56,9 @@ void dccp_time_wait(struct sock *sk, int state, int timeo)
 #if IS_ENABLED(CONFIG_IPV6)
 		if (tw->tw_family == PF_INET6) {
 			const struct ipv6_pinfo *np = inet6_sk(sk);
-			struct inet6_timewait_sock *tw6;
 
-			tw->tw_ipv6_offset = inet6_tw_offset(sk->sk_prot);
-			tw6 = inet6_twsk((struct sock *)tw);
-			tw6->tw_v6_daddr = np->daddr;
-			tw6->tw_v6_rcv_saddr = np->rcv_saddr;
+			tw->tw_v6_daddr = sk->sk_v6_daddr;
+			tw->tw_v6_rcv_saddr = sk->sk_v6_rcv_saddr;
 			tw->tw_ipv6only = np->ipv6only;
 		}
 #endif
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 8e1e40653357..ecc179d676e4 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -121,13 +121,13 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
 
 #if IS_ENABLED(CONFIG_IPV6)
 	if (r->idiag_family == AF_INET6) {
-		const struct ipv6_pinfo *np = inet6_sk(sk);
 
-		*(struct in6_addr *)r->id.idiag_src = np->rcv_saddr;
-		*(struct in6_addr *)r->id.idiag_dst = np->daddr;
+		*(struct in6_addr *)r->id.idiag_src = sk->sk_v6_rcv_saddr;
+		*(struct in6_addr *)r->id.idiag_dst = sk->sk_v6_daddr;
 
 		if (ext & (1 << (INET_DIAG_TCLASS - 1)))
-			if (nla_put_u8(skb, INET_DIAG_TCLASS, np->tclass) < 0)
+			if (nla_put_u8(skb, INET_DIAG_TCLASS,
+				       inet6_sk(sk)->tclass) < 0)
 				goto errout;
 	}
 #endif
@@ -255,11 +255,8 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw,
 	r->idiag_inode	      = 0;
 #if IS_ENABLED(CONFIG_IPV6)
 	if (tw->tw_family == AF_INET6) {
-		const struct inet6_timewait_sock *tw6 =
-						inet6_twsk((struct sock *)tw);
-
-		*(struct in6_addr *)r->id.idiag_src = tw6->tw_v6_rcv_saddr;
-		*(struct in6_addr *)r->id.idiag_dst = tw6->tw_v6_daddr;
+		*(struct in6_addr *)r->id.idiag_src = tw->tw_v6_rcv_saddr;
+		*(struct in6_addr *)r->id.idiag_dst = tw->tw_v6_daddr;
 	}
 #endif
 
@@ -273,10 +270,11 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
 			const struct nlmsghdr *unlh)
 {
 	if (sk->sk_state == TCP_TIME_WAIT)
-		return inet_twsk_diag_fill((struct inet_timewait_sock *)sk,
-					   skb, r, portid, seq, nlmsg_flags,
-					   unlh);
-	return inet_csk_diag_fill(sk, skb, r, user_ns, portid, seq, nlmsg_flags, unlh);
+		return inet_twsk_diag_fill(inet_twsk(sk), skb, r, portid, seq,
+					   nlmsg_flags, unlh);
+
+	return inet_csk_diag_fill(sk, skb, r, user_ns, portid, seq,
+				  nlmsg_flags, unlh);
 }
 
 int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_skb,
@@ -489,10 +487,9 @@ int inet_diag_bc_sk(const struct nlattr *bc, struct sock *sk)
 	entry.family = sk->sk_family;
 #if IS_ENABLED(CONFIG_IPV6)
 	if (entry.family == AF_INET6) {
-		struct ipv6_pinfo *np = inet6_sk(sk);
 
-		entry.saddr = np->rcv_saddr.s6_addr32;
-		entry.daddr = np->daddr.s6_addr32;
+		entry.saddr = sk->sk_v6_rcv_saddr.s6_addr32;
+		entry.daddr = sk->sk_v6_daddr.s6_addr32;
 	} else
 #endif
 	{
@@ -649,10 +646,8 @@ static int inet_twsk_diag_dump(struct sock *sk,
 		entry.family = tw->tw_family;
 #if IS_ENABLED(CONFIG_IPV6)
 		if (tw->tw_family == AF_INET6) {
-			struct inet6_timewait_sock *tw6 =
-						inet6_twsk((struct sock *)tw);
-			entry.saddr = tw6->tw_v6_rcv_saddr.s6_addr32;
-			entry.daddr = tw6->tw_v6_daddr.s6_addr32;
+			entry.saddr = tw->tw_v6_rcv_saddr.s6_addr32;
+			entry.daddr = tw->tw_v6_daddr.s6_addr32;
 		} else
 #endif
 		{
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index a62610443152..ccefc07beacd 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -202,15 +202,14 @@ static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident)
 #if IS_ENABLED(CONFIG_IPV6)
 		} else if (skb->protocol == htons(ETH_P_IPV6) &&
 			   sk->sk_family == AF_INET6) {
-			struct ipv6_pinfo *np = inet6_sk(sk);
 
 			pr_debug("found: %p: num=%d, daddr=%pI6c, dif=%d\n", sk,
 				 (int) isk->inet_num,
-				 &inet6_sk(sk)->rcv_saddr,
+				 &sk->sk_v6_rcv_saddr,
 				 sk->sk_bound_dev_if);
 
-			if (!ipv6_addr_any(&np->rcv_saddr) &&
-			    !ipv6_addr_equal(&np->rcv_saddr,
+			if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr) &&
+			    !ipv6_addr_equal(&sk->sk_v6_rcv_saddr,
 					     &ipv6_hdr(skb)->daddr))
 				continue;
 #endif
@@ -362,7 +361,7 @@ static void ping_set_saddr(struct sock *sk, struct sockaddr *saddr)
 	} else if (saddr->sa_family == AF_INET6) {
 		struct sockaddr_in6 *addr = (struct sockaddr_in6 *) saddr;
 		struct ipv6_pinfo *np = inet6_sk(sk);
-		np->rcv_saddr = np->saddr = addr->sin6_addr;
+		sk->sk_v6_rcv_saddr = np->saddr = addr->sin6_addr;
 #endif
 	}
 }
@@ -376,7 +375,7 @@ static void ping_clear_saddr(struct sock *sk, int dif)
 #if IS_ENABLED(CONFIG_IPV6)
 	} else if (sk->sk_family == AF_INET6) {
 		struct ipv6_pinfo *np = inet6_sk(sk);
-		memset(&np->rcv_saddr, 0, sizeof(np->rcv_saddr));
+		memset(&sk->sk_v6_rcv_saddr, 0, sizeof(sk->sk_v6_rcv_saddr));
 		memset(&np->saddr, 0, sizeof(np->saddr));
 #endif
 	}
@@ -418,7 +417,7 @@ int ping_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	err = 0;
 	if ((sk->sk_family == AF_INET && isk->inet_rcv_saddr) ||
 	    (sk->sk_family == AF_INET6 &&
-	     !ipv6_addr_any(&inet6_sk(sk)->rcv_saddr)))
+	     !ipv6_addr_any(&sk->sk_v6_rcv_saddr)))
 		sk->sk_userlocks |= SOCK_BINDADDR_LOCK;
 
 	if (snum)
@@ -429,7 +428,7 @@ int ping_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 
 #if IS_ENABLED(CONFIG_IPV6)
 	if (sk->sk_family == AF_INET6)
-		memset(&inet6_sk(sk)->daddr, 0, sizeof(inet6_sk(sk)->daddr));
+		memset(&sk->sk_v6_daddr, 0, sizeof(sk->sk_v6_daddr));
 #endif
 
 	sk_dst_reset(sk);
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index 52f3c6b971d2..27535fd5ea10 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -240,7 +240,6 @@ static struct tcp_metrics_block *__tcp_get_metrics_req(struct request_sock *req,
 
 static struct tcp_metrics_block *__tcp_get_metrics_tw(struct inet_timewait_sock *tw)
 {
-	struct inet6_timewait_sock *tw6;
 	struct tcp_metrics_block *tm;
 	struct inetpeer_addr addr;
 	unsigned int hash;
@@ -253,9 +252,8 @@ static struct tcp_metrics_block *__tcp_get_metrics_tw(struct inet_timewait_sock
 		hash = (__force unsigned int) addr.addr.a4;
 		break;
 	case AF_INET6:
-		tw6 = inet6_twsk((struct sock *)tw);
-		*(struct in6_addr *)addr.addr.a6 = tw6->tw_v6_daddr;
-		hash = ipv6_addr_hash(&tw6->tw_v6_daddr);
+		*(struct in6_addr *)addr.addr.a6 = tw->tw_v6_daddr;
+		hash = ipv6_addr_hash(&tw->tw_v6_daddr);
 		break;
 	default:
 		return NULL;
@@ -289,8 +287,8 @@ static struct tcp_metrics_block *tcp_get_metrics(struct sock *sk,
 		hash = (__force unsigned int) addr.addr.a4;
 		break;
 	case AF_INET6:
-		*(struct in6_addr *)addr.addr.a6 = inet6_sk(sk)->daddr;
-		hash = ipv6_addr_hash(&inet6_sk(sk)->daddr);
+		*(struct in6_addr *)addr.addr.a6 = sk->sk_v6_daddr;
+		hash = ipv6_addr_hash(&sk->sk_v6_daddr);
 		break;
 	default:
 		return NULL;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 58a3e69aef64..97b684159861 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -293,12 +293,9 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
 #if IS_ENABLED(CONFIG_IPV6)
 		if (tw->tw_family == PF_INET6) {
 			struct ipv6_pinfo *np = inet6_sk(sk);
-			struct inet6_timewait_sock *tw6;
 
-			tw->tw_ipv6_offset = inet6_tw_offset(sk->sk_prot);
-			tw6 = inet6_twsk((struct sock *)tw);
-			tw6->tw_v6_daddr = np->daddr;
-			tw6->tw_v6_rcv_saddr = np->rcv_saddr;
+			tw->tw_v6_daddr = sk->sk_v6_daddr;
+			tw->tw_v6_rcv_saddr = sk->sk_v6_rcv_saddr;
 			tw->tw_tclass = np->tclass;
 			tw->tw_ipv6only = np->ipv6only;
 		}
diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c
index 611beab38a00..8b97d71e193b 100644
--- a/net/ipv4/tcp_probe.c
+++ b/net/ipv4/tcp_probe.c
@@ -101,22 +101,6 @@ static inline int tcp_probe_avail(void)
 		si4.sin_addr.s_addr = inet->inet_##mem##addr;	\
 	} while (0)						\
 
-#if IS_ENABLED(CONFIG_IPV6)
-#define tcp_probe_copy_fl_to_si6(inet, si6, mem)		\
-	do {							\
-		struct ipv6_pinfo *pi6 = inet->pinet6;		\
-		si6.sin6_family = AF_INET6;			\
-		si6.sin6_port = inet->inet_##mem##port;		\
-		si6.sin6_addr = pi6->mem##addr;			\
-		si6.sin6_flowinfo = 0; /* No need here. */	\
-		si6.sin6_scope_id = 0;	/* No need here. */	\
-	} while (0)
-#else
-#define tcp_probe_copy_fl_to_si6(fl, si6, mem)			\
-	do {							\
-		memset(&si6, 0, sizeof(si6));			\
-	} while (0)
-#endif
 
 /*
  * Hook inserted to be called before each receive packet.
@@ -147,8 +131,17 @@ static void jtcp_rcv_established(struct sock *sk, struct sk_buff *skb,
 				tcp_probe_copy_fl_to_si4(inet, p->dst.v4, d);
 				break;
 			case AF_INET6:
-				tcp_probe_copy_fl_to_si6(inet, p->src.v6, s);
-				tcp_probe_copy_fl_to_si6(inet, p->dst.v6, d);
+				memset(&p->src.v6, 0, sizeof(p->src.v6));
+				memset(&p->dst.v6, 0, sizeof(p->dst.v6));
+#if IS_ENABLED(CONFIG_IPV6)
+				p->src.v6.sin6_family = AF_INET6;
+				p->src.v6.sin6_port = inet->inet_sport;
+				p->src.v6.sin6_addr = inet6_sk(sk)->saddr;
+
+				p->dst.v6.sin6_family = AF_INET6;
+				p->dst.v6.sin6_port = inet->inet_dport;
+				p->dst.v6.sin6_addr = sk->sk_v6_daddr;
+#endif
 				break;
 			default:
 				BUG();
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 4b85e6f636c9..af07b5b23ebf 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -374,9 +374,8 @@ void tcp_retransmit_timer(struct sock *sk)
 		}
 #if IS_ENABLED(CONFIG_IPV6)
 		else if (sk->sk_family == AF_INET6) {
-			struct ipv6_pinfo *np = inet6_sk(sk);
 			LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("Peer %pI6:%u/%u unexpectedly shrunk window %u:%u (repaired)\n"),
-				       &np->daddr,
+				       &sk->sk_v6_daddr,
 				       ntohs(inet->inet_dport), inet->inet_num,
 				       tp->snd_una, tp->snd_nxt);
 		}
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 4966b124dc2e..a2cb07cd3850 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -364,7 +364,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 	inet->inet_rcv_saddr = v4addr;
 	inet->inet_saddr = v4addr;
 
-	np->rcv_saddr = addr->sin6_addr;
+	sk->sk_v6_rcv_saddr = addr->sin6_addr;
 
 	if (!(addr_type & IPV6_ADDR_MULTICAST))
 		np->saddr = addr->sin6_addr;
@@ -461,14 +461,14 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
 		    peer == 1)
 			return -ENOTCONN;
 		sin->sin6_port = inet->inet_dport;
-		sin->sin6_addr = np->daddr;
+		sin->sin6_addr = sk->sk_v6_daddr;
 		if (np->sndflow)
 			sin->sin6_flowinfo = np->flow_label;
 	} else {
-		if (ipv6_addr_any(&np->rcv_saddr))
+		if (ipv6_addr_any(&sk->sk_v6_rcv_saddr))
 			sin->sin6_addr = np->saddr;
 		else
-			sin->sin6_addr = np->rcv_saddr;
+			sin->sin6_addr = sk->sk_v6_rcv_saddr;
 
 		sin->sin6_port = inet->inet_sport;
 	}
@@ -655,7 +655,7 @@ int inet6_sk_rebuild_header(struct sock *sk)
 
 		memset(&fl6, 0, sizeof(fl6));
 		fl6.flowi6_proto = sk->sk_protocol;
-		fl6.daddr = np->daddr;
+		fl6.daddr = sk->sk_v6_daddr;
 		fl6.saddr = np->saddr;
 		fl6.flowlabel = np->flow_label;
 		fl6.flowi6_oif = sk->sk_bound_dev_if;
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 48b6bd2a9a14..a454b0ff57c7 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -107,16 +107,16 @@ ipv4_connected:
 		if (err)
 			goto out;
 
-		ipv6_addr_set_v4mapped(inet->inet_daddr, &np->daddr);
+		ipv6_addr_set_v4mapped(inet->inet_daddr, &sk->sk_v6_daddr);
 
 		if (ipv6_addr_any(&np->saddr) ||
 		    ipv6_mapped_addr_any(&np->saddr))
 			ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr);
 
-		if (ipv6_addr_any(&np->rcv_saddr) ||
-		    ipv6_mapped_addr_any(&np->rcv_saddr)) {
+		if (ipv6_addr_any(&sk->sk_v6_rcv_saddr) ||
+		    ipv6_mapped_addr_any(&sk->sk_v6_rcv_saddr)) {
 			ipv6_addr_set_v4mapped(inet->inet_rcv_saddr,
-					       &np->rcv_saddr);
+					       &sk->sk_v6_rcv_saddr);
 			if (sk->sk_prot->rehash)
 				sk->sk_prot->rehash(sk);
 		}
@@ -145,7 +145,7 @@ ipv4_connected:
 		}
 	}
 
-	np->daddr = *daddr;
+	sk->sk_v6_daddr = *daddr;
 	np->flow_label = fl6.flowlabel;
 
 	inet->inet_dport = usin->sin6_port;
@@ -156,7 +156,7 @@ ipv4_connected:
 	 */
 
 	fl6.flowi6_proto = sk->sk_protocol;
-	fl6.daddr = np->daddr;
+	fl6.daddr = sk->sk_v6_daddr;
 	fl6.saddr = np->saddr;
 	fl6.flowi6_oif = sk->sk_bound_dev_if;
 	fl6.flowi6_mark = sk->sk_mark;
@@ -183,16 +183,16 @@ ipv4_connected:
 	if (ipv6_addr_any(&np->saddr))
 		np->saddr = fl6.saddr;
 
-	if (ipv6_addr_any(&np->rcv_saddr)) {
-		np->rcv_saddr = fl6.saddr;
+	if (ipv6_addr_any(&sk->sk_v6_rcv_saddr)) {
+		sk->sk_v6_rcv_saddr = fl6.saddr;
 		inet->inet_rcv_saddr = LOOPBACK4_IPV6;
 		if (sk->sk_prot->rehash)
 			sk->sk_prot->rehash(sk);
 	}
 
 	ip6_dst_store(sk, dst,
-		      ipv6_addr_equal(&fl6.daddr, &np->daddr) ?
-		      &np->daddr : NULL,
+		      ipv6_addr_equal(&fl6.daddr, &sk->sk_v6_daddr) ?
+		      &sk->sk_v6_daddr : NULL,
 #ifdef CONFIG_IPV6_SUBTREES
 		      ipv6_addr_equal(&fl6.saddr, &np->saddr) ?
 		      &np->saddr :
@@ -883,11 +883,10 @@ EXPORT_SYMBOL_GPL(ip6_datagram_send_ctl);
 void ip6_dgram_sock_seq_show(struct seq_file *seq, struct sock *sp,
 			     __u16 srcp, __u16 destp, int bucket)
 {
-	struct ipv6_pinfo *np = inet6_sk(sp);
 	const struct in6_addr *dest, *src;
 
-	dest  = &np->daddr;
-	src   = &np->rcv_saddr;
+	dest  = &sp->sk_v6_daddr;
+	src   = &sp->sk_v6_rcv_saddr;
 	seq_printf(seq,
 		   "%5d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
 		   "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %d\n",
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index e4311cbc8b4e..b7400b480e74 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -165,11 +165,10 @@ EXPORT_SYMBOL_GPL(inet6_csk_reqsk_queue_hash_add);
 
 void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
 {
-	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;
 
 	sin6->sin6_family = AF_INET6;
-	sin6->sin6_addr = np->daddr;
+	sin6->sin6_addr = sk->sk_v6_daddr;
 	sin6->sin6_port	= inet_sk(sk)->inet_dport;
 	/* We do not store received flowlabel for TCP */
 	sin6->sin6_flowinfo = 0;
@@ -203,7 +202,7 @@ static struct dst_entry *inet6_csk_route_socket(struct sock *sk,
 
 	memset(fl6, 0, sizeof(*fl6));
 	fl6->flowi6_proto = sk->sk_protocol;
-	fl6->daddr = np->daddr;
+	fl6->daddr = sk->sk_v6_daddr;
 	fl6->saddr = np->saddr;
 	fl6->flowlabel = np->flow_label;
 	IP6_ECN_flow_xmit(sk, fl6->flowlabel);
@@ -245,7 +244,7 @@ int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused)
 	skb_dst_set_noref(skb, dst);
 
 	/* Restore final destination back after routing done */
-	fl6.daddr = np->daddr;
+	fl6.daddr = sk->sk_v6_daddr;
 
 	res = ip6_xmit(sk, skb, &fl6, np->opt, np->tclass);
 	rcu_read_unlock();
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 46440777e1c5..842d833dfc18 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -89,30 +89,16 @@ begin:
 	sk_nulls_for_each_rcu(sk, node, &head->chain) {
 		if (sk->sk_hash != hash)
 			continue;
-		if (sk->sk_state == TCP_TIME_WAIT) {
-			if (!INET6_TW_MATCH(sk, net, saddr, daddr, ports, dif))
-				continue;
-		} else {
-			if (!INET6_MATCH(sk, net, saddr, daddr, ports, dif))
-				continue;
-		}
+		if (!INET6_MATCH(sk, net, saddr, daddr, ports, dif))
+			continue;
 		if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt)))
 			goto out;
 
-		if (sk->sk_state == TCP_TIME_WAIT) {
-			if (unlikely(!INET6_TW_MATCH(sk, net, saddr, daddr,
-						     ports, dif))) {
-				sock_gen_put(sk);
-				goto begin;
-			}
-		} else {
-			if (unlikely(!INET6_MATCH(sk, net, saddr, daddr,
-						  ports, dif))) {
-				sock_put(sk);
-				goto begin;
-			}
-		goto found;
+		if (unlikely(!INET6_MATCH(sk, net, saddr, daddr, ports, dif))) {
+			sock_gen_put(sk);
+			goto begin;
 		}
+		goto found;
 	}
 	if (get_nulls_value(node) != slot)
 		goto begin;
@@ -133,11 +119,10 @@ static inline int compute_score(struct sock *sk, struct net *net,
 
 	if (net_eq(sock_net(sk), net) && inet_sk(sk)->inet_num == hnum &&
 	    sk->sk_family == PF_INET6) {
-		const struct ipv6_pinfo *np = inet6_sk(sk);
 
 		score = 1;
-		if (!ipv6_addr_any(&np->rcv_saddr)) {
-			if (!ipv6_addr_equal(&np->rcv_saddr, daddr))
+		if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) {
+			if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr))
 				return -1;
 			score++;
 		}
@@ -229,9 +214,8 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
 {
 	struct inet_hashinfo *hinfo = death_row->hashinfo;
 	struct inet_sock *inet = inet_sk(sk);
-	const struct ipv6_pinfo *np = inet6_sk(sk);
-	const struct in6_addr *daddr = &np->rcv_saddr;
-	const struct in6_addr *saddr = &np->daddr;
+	const struct in6_addr *daddr = &sk->sk_v6_rcv_saddr;
+	const struct in6_addr *saddr = &sk->sk_v6_daddr;
 	const int dif = sk->sk_bound_dev_if;
 	const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport);
 	struct net *net = sock_net(sk);
@@ -250,23 +234,19 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
 		if (sk2->sk_hash != hash)
 			continue;
 
-		if (sk2->sk_state == TCP_TIME_WAIT) {
-			if (likely(INET6_TW_MATCH(sk2, net, saddr, daddr,
-						  ports, dif))) {
+		if (likely(INET6_MATCH(sk2, net, saddr, daddr, ports, dif))) {
+			if (sk2->sk_state == TCP_TIME_WAIT) {
 				tw = inet_twsk(sk2);
 				if (twsk_unique(sk, sk2, twp))
-					goto unique;
-				else
-					goto not_unique;
+					break;
 			}
-		}
-		if (likely(INET6_MATCH(sk2, net, saddr, daddr, ports, dif)))
 			goto not_unique;
+		}
 	}
 
-unique:
 	/* Must record num and sport now. Otherwise we will see
-	 * in hash table socket with a funny identity. */
+	 * in hash table socket with a funny identity.
+	 */
 	inet->inet_num = lport;
 	inet->inet_sport = htons(lport);
 	sk->sk_hash = hash;
@@ -299,9 +279,9 @@ not_unique:
 static inline u32 inet6_sk_port_offset(const struct sock *sk)
 {
 	const struct inet_sock *inet = inet_sk(sk);
-	const struct ipv6_pinfo *np = inet6_sk(sk);
-	return secure_ipv6_port_ephemeral(np->rcv_saddr.s6_addr32,
-					  np->daddr.s6_addr32,
+
+	return secure_ipv6_port_ephemeral(sk->sk_v6_rcv_saddr.s6_addr32,
+					  sk->sk_v6_daddr.s6_addr32,
 					  inet->inet_dport);
 }
 
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index d1e2e8ef29c5..4919a8e6063e 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -174,7 +174,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
 			}
 
 			if (ipv6_only_sock(sk) ||
-			    !ipv6_addr_v4mapped(&np->daddr)) {
+			    !ipv6_addr_v4mapped(&sk->sk_v6_daddr)) {
 				retv = -EADDRNOTAVAIL;
 				break;
 			}
@@ -1011,7 +1011,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
 				struct in6_pktinfo src_info;
 				src_info.ipi6_ifindex = np->mcast_oif ? np->mcast_oif :
 					np->sticky_pktinfo.ipi6_ifindex;
-				src_info.ipi6_addr = np->mcast_oif ? np->daddr : np->sticky_pktinfo.ipi6_addr;
+				src_info.ipi6_addr = np->mcast_oif ? sk->sk_v6_daddr : np->sticky_pktinfo.ipi6_addr;
 				put_cmsg(&msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info);
 			}
 			if (np->rxopt.bits.rxhlim) {
@@ -1026,7 +1026,8 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
 				struct in6_pktinfo src_info;
 				src_info.ipi6_ifindex = np->mcast_oif ? np->mcast_oif :
 					np->sticky_pktinfo.ipi6_ifindex;
-				src_info.ipi6_addr = np->mcast_oif ? np->daddr : np->sticky_pktinfo.ipi6_addr;
+				src_info.ipi6_addr = np->mcast_oif ? sk->sk_v6_daddr :
+								     np->sticky_pktinfo.ipi6_addr;
 				put_cmsg(&msg, SOL_IPV6, IPV6_2292PKTINFO, sizeof(src_info), &src_info);
 			}
 			if (np->rxopt.bits.rxohlim) {
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index d6e4dd8b58df..54b75ead5a69 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -297,9 +297,9 @@ ipv6_getorigdst(struct sock *sk, int optval, void __user *user, int *len)
 	struct nf_conntrack_tuple tuple = { .src.l3num = NFPROTO_IPV6 };
 	struct nf_conn *ct;
 
-	tuple.src.u3.in6 = inet6->rcv_saddr;
+	tuple.src.u3.in6 = sk->sk_v6_rcv_saddr;
 	tuple.src.u.tcp.port = inet->inet_sport;
-	tuple.dst.u3.in6 = inet6->daddr;
+	tuple.dst.u3.in6 = sk->sk_v6_daddr;
 	tuple.dst.u.tcp.port = inet->inet_dport;
 	tuple.dst.protonum = sk->sk_protocol;
 
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index 18f19df4189f..8815e31a87fe 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -116,7 +116,7 @@ int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 	} else {
 		if (sk->sk_state != TCP_ESTABLISHED)
 			return -EDESTADDRREQ;
-		daddr = &np->daddr;
+		daddr = &sk->sk_v6_daddr;
 	}
 
 	if (!iif)
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index a4ed2416399e..3c00842b0079 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -77,20 +77,19 @@ static struct sock *__raw_v6_lookup(struct net *net, struct sock *sk,
 
 	sk_for_each_from(sk)
 		if (inet_sk(sk)->inet_num == num) {
-			struct ipv6_pinfo *np = inet6_sk(sk);
 
 			if (!net_eq(sock_net(sk), net))
 				continue;
 
-			if (!ipv6_addr_any(&np->daddr) &&
-			    !ipv6_addr_equal(&np->daddr, rmt_addr))
+			if (!ipv6_addr_any(&sk->sk_v6_daddr) &&
+			    !ipv6_addr_equal(&sk->sk_v6_daddr, rmt_addr))
 				continue;
 
 			if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)
 				continue;
 
-			if (!ipv6_addr_any(&np->rcv_saddr)) {
-				if (ipv6_addr_equal(&np->rcv_saddr, loc_addr))
+			if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) {
+				if (ipv6_addr_equal(&sk->sk_v6_rcv_saddr, loc_addr))
 					goto found;
 				if (is_multicast &&
 				    inet6_mc_check(sk, loc_addr, rmt_addr))
@@ -302,7 +301,7 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	}
 
 	inet->inet_rcv_saddr = inet->inet_saddr = v4addr;
-	np->rcv_saddr = addr->sin6_addr;
+	sk->sk_v6_rcv_saddr = addr->sin6_addr;
 	if (!(addr_type & IPV6_ADDR_MULTICAST))
 		np->saddr = addr->sin6_addr;
 	err = 0;
@@ -804,8 +803,8 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
 		 * sk->sk_dst_cache.
 		 */
 		if (sk->sk_state == TCP_ESTABLISHED &&
-		    ipv6_addr_equal(daddr, &np->daddr))
-			daddr = &np->daddr;
+		    ipv6_addr_equal(daddr, &sk->sk_v6_daddr))
+			daddr = &sk->sk_v6_daddr;
 
 		if (addr_len >= sizeof(struct sockaddr_in6) &&
 		    sin6->sin6_scope_id &&
@@ -816,7 +815,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
 			return -EDESTADDRREQ;
 
 		proto = inet->inet_num;
-		daddr = &np->daddr;
+		daddr = &sk->sk_v6_daddr;
 		fl6.flowlabel = np->flow_label;
 	}
 
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 528e61afaf5e..541dfc40c7b3 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -192,13 +192,13 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 	}
 
 	if (tp->rx_opt.ts_recent_stamp &&
-	    !ipv6_addr_equal(&np->daddr, &usin->sin6_addr)) {
+	    !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
 		tp->rx_opt.ts_recent = 0;
 		tp->rx_opt.ts_recent_stamp = 0;
 		tp->write_seq = 0;
 	}
 
-	np->daddr = usin->sin6_addr;
+	sk->sk_v6_daddr = usin->sin6_addr;
 	np->flow_label = fl6.flowlabel;
 
 	/*
@@ -237,17 +237,17 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 		} else {
 			ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr);
 			ipv6_addr_set_v4mapped(inet->inet_rcv_saddr,
-					       &np->rcv_saddr);
+					       &sk->sk_v6_rcv_saddr);
 		}
 
 		return err;
 	}
 
-	if (!ipv6_addr_any(&np->rcv_saddr))
-		saddr = &np->rcv_saddr;
+	if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
+		saddr = &sk->sk_v6_rcv_saddr;
 
 	fl6.flowi6_proto = IPPROTO_TCP;
-	fl6.daddr = np->daddr;
+	fl6.daddr = sk->sk_v6_daddr;
 	fl6.saddr = saddr ? *saddr : np->saddr;
 	fl6.flowi6_oif = sk->sk_bound_dev_if;
 	fl6.flowi6_mark = sk->sk_mark;
@@ -266,7 +266,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 
 	if (saddr == NULL) {
 		saddr = &fl6.saddr;
-		np->rcv_saddr = *saddr;
+		sk->sk_v6_rcv_saddr = *saddr;
 	}
 
 	/* set the source address */
@@ -279,7 +279,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 	rt = (struct rt6_info *) dst;
 	if (tcp_death_row.sysctl_tw_recycle &&
 	    !tp->rx_opt.ts_recent_stamp &&
-	    ipv6_addr_equal(&rt->rt6i_dst.addr, &np->daddr))
+	    ipv6_addr_equal(&rt->rt6i_dst.addr, &sk->sk_v6_daddr))
 		tcp_fetch_timewait_stamp(sk, dst);
 
 	icsk->icsk_ext_hdr_len = 0;
@@ -298,7 +298,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 
 	if (!tp->write_seq && likely(!tp->repair))
 		tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
-							     np->daddr.s6_addr32,
+							     sk->sk_v6_daddr.s6_addr32,
 							     inet->inet_sport,
 							     inet->inet_dport);
 
@@ -515,7 +515,7 @@ static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk,
 static struct tcp_md5sig_key *tcp_v6_md5_lookup(struct sock *sk,
 						struct sock *addr_sk)
 {
-	return tcp_v6_md5_do_lookup(sk, &inet6_sk(addr_sk)->daddr);
+	return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr);
 }
 
 static struct tcp_md5sig_key *tcp_v6_reqsk_md5_lookup(struct sock *sk,
@@ -621,7 +621,7 @@ static int tcp_v6_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
 
 	if (sk) {
 		saddr = &inet6_sk(sk)->saddr;
-		daddr = &inet6_sk(sk)->daddr;
+		daddr = &sk->sk_v6_daddr;
 	} else if (req) {
 		saddr = &inet6_rsk(req)->loc_addr;
 		daddr = &inet6_rsk(req)->rmt_addr;
@@ -1116,11 +1116,11 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 
 		memcpy(newnp, np, sizeof(struct ipv6_pinfo));
 
-		ipv6_addr_set_v4mapped(newinet->inet_daddr, &newnp->daddr);
+		ipv6_addr_set_v4mapped(newinet->inet_daddr, &newsk->sk_v6_daddr);
 
 		ipv6_addr_set_v4mapped(newinet->inet_saddr, &newnp->saddr);
 
-		newnp->rcv_saddr = newnp->saddr;
+		newsk->sk_v6_rcv_saddr = newnp->saddr;
 
 		inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
 		newsk->sk_backlog_rcv = tcp_v4_do_rcv;
@@ -1185,9 +1185,9 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 
 	memcpy(newnp, np, sizeof(struct ipv6_pinfo));
 
-	newnp->daddr = treq->rmt_addr;
+	newsk->sk_v6_daddr = treq->rmt_addr;
 	newnp->saddr = treq->loc_addr;
-	newnp->rcv_saddr = treq->loc_addr;
+	newsk->sk_v6_rcv_saddr = treq->loc_addr;
 	newsk->sk_bound_dev_if = treq->iif;
 
 	/* Now IPv6 options...
@@ -1244,13 +1244,13 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 
 #ifdef CONFIG_TCP_MD5SIG
 	/* Copy over the MD5 key from the original socket */
-	if ((key = tcp_v6_md5_do_lookup(sk, &newnp->daddr)) != NULL) {
+	if ((key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr)) != NULL) {
 		/* We're using one, so create a matching key
 		 * on the newsk structure. If we fail to get
 		 * memory, then we end up not copying the key
 		 * across. Shucks.
 		 */
-		tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newnp->daddr,
+		tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
 			       AF_INET6, key->key, key->keylen,
 			       sk_gfp_atomic(sk, GFP_ATOMIC));
 	}
@@ -1758,10 +1758,9 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
 	const struct inet_sock *inet = inet_sk(sp);
 	const struct tcp_sock *tp = tcp_sk(sp);
 	const struct inet_connection_sock *icsk = inet_csk(sp);
-	const struct ipv6_pinfo *np = inet6_sk(sp);
 
-	dest  = &np->daddr;
-	src   = &np->rcv_saddr;
+	dest  = &sp->sk_v6_daddr;
+	src   = &sp->sk_v6_rcv_saddr;
 	destp = ntohs(inet->inet_dport);
 	srcp  = ntohs(inet->inet_sport);
 
@@ -1810,11 +1809,10 @@ static void get_timewait6_sock(struct seq_file *seq,
 {
 	const struct in6_addr *dest, *src;
 	__u16 destp, srcp;
-	const struct inet6_timewait_sock *tw6 = inet6_twsk((struct sock *)tw);
 	s32 delta = tw->tw_ttd - inet_tw_time_stamp();
 
-	dest = &tw6->tw_v6_daddr;
-	src  = &tw6->tw_v6_rcv_saddr;
+	dest = &tw->tw_v6_daddr;
+	src  = &tw->tw_v6_rcv_saddr;
 	destp = ntohs(tw->tw_dport);
 	srcp  = ntohs(tw->tw_sport);
 
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 37532478e3ba..b496de19a341 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -55,11 +55,10 @@
 
 int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
 {
-	const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr;
 	const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2);
 	int sk_ipv6only = ipv6_only_sock(sk);
 	int sk2_ipv6only = inet_v6_ipv6only(sk2);
-	int addr_type = ipv6_addr_type(sk_rcv_saddr6);
+	int addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr);
 	int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED;
 
 	/* if both are mapped, treat as IPv4 */
@@ -77,7 +76,7 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
 		return 1;
 
 	if (sk2_rcv_saddr6 &&
-	    ipv6_addr_equal(sk_rcv_saddr6, sk2_rcv_saddr6))
+	    ipv6_addr_equal(&sk->sk_v6_rcv_saddr, sk2_rcv_saddr6))
 		return 1;
 
 	return 0;
@@ -105,7 +104,7 @@ int udp_v6_get_port(struct sock *sk, unsigned short snum)
 	unsigned int hash2_nulladdr =
 		udp6_portaddr_hash(sock_net(sk), &in6addr_any, snum);
 	unsigned int hash2_partial =
-		udp6_portaddr_hash(sock_net(sk), &inet6_sk(sk)->rcv_saddr, 0);
+		udp6_portaddr_hash(sock_net(sk), &sk->sk_v6_rcv_saddr, 0);
 
 	/* precompute partial secondary hash */
 	udp_sk(sk)->udp_portaddr_hash = hash2_partial;
@@ -115,7 +114,7 @@ int udp_v6_get_port(struct sock *sk, unsigned short snum)
 static void udp_v6_rehash(struct sock *sk)
 {
 	u16 new_hash = udp6_portaddr_hash(sock_net(sk),
-					  &inet6_sk(sk)->rcv_saddr,
+					  &sk->sk_v6_rcv_saddr,
 					  inet_sk(sk)->inet_num);
 
 	udp_lib_rehash(sk, new_hash);
@@ -131,7 +130,6 @@ static inline int compute_score(struct sock *sk, struct net *net,
 
 	if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum &&
 			sk->sk_family == PF_INET6) {
-		struct ipv6_pinfo *np = inet6_sk(sk);
 		struct inet_sock *inet = inet_sk(sk);
 
 		score = 0;
@@ -140,13 +138,13 @@ static inline int compute_score(struct sock *sk, struct net *net,
 				return -1;
 			score++;
 		}
-		if (!ipv6_addr_any(&np->rcv_saddr)) {
-			if (!ipv6_addr_equal(&np->rcv_saddr, daddr))
+		if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) {
+			if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr))
 				return -1;
 			score++;
 		}
-		if (!ipv6_addr_any(&np->daddr)) {
-			if (!ipv6_addr_equal(&np->daddr, saddr))
+		if (!ipv6_addr_any(&sk->sk_v6_daddr)) {
+			if (!ipv6_addr_equal(&sk->sk_v6_daddr, saddr))
 				return -1;
 			score++;
 		}
@@ -169,10 +167,9 @@ static inline int compute_score2(struct sock *sk, struct net *net,
 
 	if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum &&
 			sk->sk_family == PF_INET6) {
-		struct ipv6_pinfo *np = inet6_sk(sk);
 		struct inet_sock *inet = inet_sk(sk);
 
-		if (!ipv6_addr_equal(&np->rcv_saddr, daddr))
+		if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr))
 			return -1;
 		score = 0;
 		if (inet->inet_dport) {
@@ -180,8 +177,8 @@ static inline int compute_score2(struct sock *sk, struct net *net,
 				return -1;
 			score++;
 		}
-		if (!ipv6_addr_any(&np->daddr)) {
-			if (!ipv6_addr_equal(&np->daddr, saddr))
+		if (!ipv6_addr_any(&sk->sk_v6_daddr)) {
+			if (!ipv6_addr_equal(&sk->sk_v6_daddr, saddr))
 				return -1;
 			score++;
 		}
@@ -549,7 +546,7 @@ static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 {
 	int rc;
 
-	if (!ipv6_addr_any(&inet6_sk(sk)->daddr)) {
+	if (!ipv6_addr_any(&sk->sk_v6_daddr)) {
 		sock_rps_save_rxhash(sk, skb);
 		sk_mark_napi_id(sk, skb);
 	}
@@ -690,20 +687,19 @@ static struct sock *udp_v6_mcast_next(struct net *net, struct sock *sk,
 
 		if (udp_sk(s)->udp_port_hash == num &&
 		    s->sk_family == PF_INET6) {
-			struct ipv6_pinfo *np = inet6_sk(s);
 			if (inet->inet_dport) {
 				if (inet->inet_dport != rmt_port)
 					continue;
 			}
-			if (!ipv6_addr_any(&np->daddr) &&
-			    !ipv6_addr_equal(&np->daddr, rmt_addr))
+			if (!ipv6_addr_any(&sk->sk_v6_daddr) &&
+			    !ipv6_addr_equal(&sk->sk_v6_daddr, rmt_addr))
 				continue;
 
 			if (s->sk_bound_dev_if && s->sk_bound_dev_if != dif)
 				continue;
 
-			if (!ipv6_addr_any(&np->rcv_saddr)) {
-				if (!ipv6_addr_equal(&np->rcv_saddr, loc_addr))
+			if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) {
+				if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, loc_addr))
 					continue;
 			}
 			if (!inet6_mc_check(s, loc_addr, rmt_addr))
@@ -1063,7 +1059,7 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk,
 	} else if (!up->pending) {
 		if (sk->sk_state != TCP_ESTABLISHED)
 			return -EDESTADDRREQ;
-		daddr = &np->daddr;
+		daddr = &sk->sk_v6_daddr;
 	} else
 		daddr = NULL;
 
@@ -1133,8 +1129,8 @@ do_udp_sendmsg:
 		 * sk->sk_dst_cache.
 		 */
 		if (sk->sk_state == TCP_ESTABLISHED &&
-		    ipv6_addr_equal(daddr, &np->daddr))
-			daddr = &np->daddr;
+		    ipv6_addr_equal(daddr, &sk->sk_v6_daddr))
+			daddr = &sk->sk_v6_daddr;
 
 		if (addr_len >= sizeof(struct sockaddr_in6) &&
 		    sin6->sin6_scope_id &&
@@ -1145,7 +1141,7 @@ do_udp_sendmsg:
 			return -EDESTADDRREQ;
 
 		fl6.fl6_dport = inet->inet_dport;
-		daddr = &np->daddr;
+		daddr = &sk->sk_v6_daddr;
 		fl6.flowlabel = np->flow_label;
 		connected = 1;
 	}
@@ -1261,8 +1257,8 @@ do_append_data:
 	if (dst) {
 		if (connected) {
 			ip6_dst_store(sk, dst,
-				      ipv6_addr_equal(&fl6.daddr, &np->daddr) ?
-				      &np->daddr : NULL,
+				      ipv6_addr_equal(&fl6.daddr, &sk->sk_v6_daddr) ?
+				      &sk->sk_v6_daddr : NULL,
 #ifdef CONFIG_IPV6_SUBTREES
 				      ipv6_addr_equal(&fl6.saddr, &np->saddr) ?
 				      &np->saddr :
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index b076e8309bc2..9af77d9c0ec9 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -1181,7 +1181,7 @@ static void l2tp_xmit_ipv6_csum(struct sock *sk, struct sk_buff *skb,
 	    !(skb_dst(skb)->dev->features & NETIF_F_IPV6_CSUM)) {
 		__wsum csum = skb_checksum(skb, 0, udp_len, 0);
 		skb->ip_summed = CHECKSUM_UNNECESSARY;
-		uh->check = csum_ipv6_magic(&np->saddr, &np->daddr, udp_len,
+		uh->check = csum_ipv6_magic(&np->saddr, &sk->sk_v6_daddr, udp_len,
 					    IPPROTO_UDP, csum);
 		if (uh->check == 0)
 			uh->check = CSUM_MANGLED_0;
@@ -1189,7 +1189,7 @@ static void l2tp_xmit_ipv6_csum(struct sock *sk, struct sk_buff *skb,
 		skb->ip_summed = CHECKSUM_PARTIAL;
 		skb->csum_start = skb_transport_header(skb) - skb->head;
 		skb->csum_offset = offsetof(struct udphdr, check);
-		uh->check = ~csum_ipv6_magic(&np->saddr, &np->daddr,
+		uh->check = ~csum_ipv6_magic(&np->saddr, &sk->sk_v6_daddr,
 					     udp_len, IPPROTO_UDP, 0);
 	}
 }
@@ -1713,13 +1713,13 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
 		struct ipv6_pinfo *np = inet6_sk(sk);
 
 		if (ipv6_addr_v4mapped(&np->saddr) &&
-		    ipv6_addr_v4mapped(&np->daddr)) {
+		    ipv6_addr_v4mapped(&sk->sk_v6_daddr)) {
 			struct inet_sock *inet = inet_sk(sk);
 
 			tunnel->v4mapped = true;
 			inet->inet_saddr = np->saddr.s6_addr32[3];
-			inet->inet_rcv_saddr = np->rcv_saddr.s6_addr32[3];
-			inet->inet_daddr = np->daddr.s6_addr32[3];
+			inet->inet_rcv_saddr = sk->sk_v6_rcv_saddr.s6_addr32[3];
+			inet->inet_daddr = sk->sk_v6_daddr.s6_addr32[3];
 		} else {
 			tunnel->v4mapped = false;
 		}
diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c
index 072d7202e182..2d6760a2ae34 100644
--- a/net/l2tp/l2tp_debugfs.c
+++ b/net/l2tp/l2tp_debugfs.c
@@ -127,9 +127,10 @@ static void l2tp_dfs_seq_tunnel_show(struct seq_file *m, void *v)
 
 #if IS_ENABLED(CONFIG_IPV6)
 		if (tunnel->sock->sk_family == AF_INET6) {
-			struct ipv6_pinfo *np = inet6_sk(tunnel->sock);
+			const struct ipv6_pinfo *np = inet6_sk(tunnel->sock);
+
 			seq_printf(m, " from %pI6c to %pI6c\n",
-				&np->saddr, &np->daddr);
+				&np->saddr, &tunnel->sock->sk_v6_daddr);
 		} else
 #endif
 		seq_printf(m, " from %pI4 to %pI4\n",
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index b8a6039314e8..cfd65304be60 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -63,7 +63,7 @@ static struct sock *__l2tp_ip6_bind_lookup(struct net *net,
 	struct sock *sk;
 
 	sk_for_each_bound(sk, &l2tp_ip6_bind_table) {
-		struct in6_addr *addr = inet6_rcv_saddr(sk);
+		const struct in6_addr *addr = inet6_rcv_saddr(sk);
 		struct l2tp_ip6_sock *l2tp = l2tp_ip6_sk(sk);
 
 		if (l2tp == NULL)
@@ -331,7 +331,7 @@ static int l2tp_ip6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	rcu_read_unlock();
 
 	inet->inet_rcv_saddr = inet->inet_saddr = v4addr;
-	np->rcv_saddr = addr->l2tp_addr;
+	sk->sk_v6_rcv_saddr = addr->l2tp_addr;
 	np->saddr = addr->l2tp_addr;
 
 	l2tp_ip6_sk(sk)->conn_id = addr->l2tp_conn_id;
@@ -421,14 +421,14 @@ static int l2tp_ip6_getname(struct socket *sock, struct sockaddr *uaddr,
 		if (!lsk->peer_conn_id)
 			return -ENOTCONN;
 		lsa->l2tp_conn_id = lsk->peer_conn_id;
-		lsa->l2tp_addr = np->daddr;
+		lsa->l2tp_addr = sk->sk_v6_daddr;
 		if (np->sndflow)
 			lsa->l2tp_flowinfo = np->flow_label;
 	} else {
-		if (ipv6_addr_any(&np->rcv_saddr))
+		if (ipv6_addr_any(&sk->sk_v6_rcv_saddr))
 			lsa->l2tp_addr = np->saddr;
 		else
-			lsa->l2tp_addr = np->rcv_saddr;
+			lsa->l2tp_addr = sk->sk_v6_rcv_saddr;
 
 		lsa->l2tp_conn_id = lsk->conn_id;
 	}
@@ -537,8 +537,8 @@ static int l2tp_ip6_sendmsg(struct kiocb *iocb, struct sock *sk,
 		 * sk->sk_dst_cache.
 		 */
 		if (sk->sk_state == TCP_ESTABLISHED &&
-		    ipv6_addr_equal(daddr, &np->daddr))
-			daddr = &np->daddr;
+		    ipv6_addr_equal(daddr, &sk->sk_v6_daddr))
+			daddr = &sk->sk_v6_daddr;
 
 		if (addr_len >= sizeof(struct sockaddr_in6) &&
 		    lsa->l2tp_scope_id &&
@@ -548,7 +548,7 @@ static int l2tp_ip6_sendmsg(struct kiocb *iocb, struct sock *sk,
 		if (sk->sk_state != TCP_ESTABLISHED)
 			return -EDESTADDRREQ;
 
-		daddr = &np->daddr;
+		daddr = &sk->sk_v6_daddr;
 		fl6.flowlabel = np->flow_label;
 	}
 
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index 0825ff26e113..be446d517bc9 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -306,8 +306,8 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int fla
 		if (np) {
 			if (nla_put(skb, L2TP_ATTR_IP6_SADDR, sizeof(np->saddr),
 				    &np->saddr) ||
-			    nla_put(skb, L2TP_ATTR_IP6_DADDR, sizeof(np->daddr),
-				    &np->daddr))
+			    nla_put(skb, L2TP_ATTR_IP6_DADDR, sizeof(sk->sk_v6_daddr),
+				    &sk->sk_v6_daddr))
 				goto nla_put_failure;
 		} else
 #endif
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 5ebee2ded9e9..f0a7adaef2ea 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -906,8 +906,8 @@ static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr,
 #if IS_ENABLED(CONFIG_IPV6)
 	} else if ((tunnel->version == 2) &&
 		   (tunnel->sock->sk_family == AF_INET6)) {
-		struct ipv6_pinfo *np = inet6_sk(tunnel->sock);
 		struct sockaddr_pppol2tpin6 sp;
+
 		len = sizeof(sp);
 		memset(&sp, 0, len);
 		sp.sa_family	= AF_PPPOX;
@@ -920,13 +920,13 @@ static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr,
 		sp.pppol2tp.d_session = session->peer_session_id;
 		sp.pppol2tp.addr.sin6_family = AF_INET6;
 		sp.pppol2tp.addr.sin6_port = inet->inet_dport;
-		memcpy(&sp.pppol2tp.addr.sin6_addr, &np->daddr,
-		       sizeof(np->daddr));
+		memcpy(&sp.pppol2tp.addr.sin6_addr, &tunnel->sock->sk_v6_daddr,
+		       sizeof(tunnel->sock->sk_v6_daddr));
 		memcpy(uaddr, &sp, len);
 	} else if ((tunnel->version == 3) &&
 		   (tunnel->sock->sk_family == AF_INET6)) {
-		struct ipv6_pinfo *np = inet6_sk(tunnel->sock);
 		struct sockaddr_pppol2tpv3in6 sp;
+
 		len = sizeof(sp);
 		memset(&sp, 0, len);
 		sp.sa_family	= AF_PPPOX;
@@ -939,8 +939,8 @@ static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr,
 		sp.pppol2tp.d_session = session->peer_session_id;
 		sp.pppol2tp.addr.sin6_family = AF_INET6;
 		sp.pppol2tp.addr.sin6_port = inet->inet_dport;
-		memcpy(&sp.pppol2tp.addr.sin6_addr, &np->daddr,
-		       sizeof(np->daddr));
+		memcpy(&sp.pppol2tp.addr.sin6_addr, &tunnel->sock->sk_v6_daddr,
+		       sizeof(tunnel->sock->sk_v6_daddr));
 		memcpy(uaddr, &sp, len);
 #endif
 	} else if (tunnel->version == 3) {
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index 5d8a3a3cd5a7..ef8a926752a9 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -200,7 +200,7 @@ nf_tproxy_get_sock_v6(struct net *net, const u8 protocol,
 				     in->ifindex);
 		if (sk) {
 			int connected = (sk->sk_state == TCP_ESTABLISHED);
-			int wildcard = ipv6_addr_any(&inet6_sk(sk)->rcv_saddr);
+			int wildcard = ipv6_addr_any(&sk->sk_v6_rcv_saddr);
 
 			/* NOTE: we return listeners even if bound to
 			 * 0.0.0.0, those are filtered out in
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 06df2b9110f5..3dd0e374bc2b 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -370,7 +370,7 @@ socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par)
 		 */
 		wildcard = (!(info->flags & XT_SOCKET_NOWILDCARD) &&
 			    sk->sk_state != TCP_TIME_WAIT &&
-			    ipv6_addr_any(&inet6_sk(sk)->rcv_saddr));
+			    ipv6_addr_any(&sk->sk_v6_rcv_saddr));
 
 		/* Ignore non-transparent sockets,
 		   if XT_SOCKET_TRANSPARENT is used */
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index e7b2d4fe2b6a..f6334aa19151 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -426,20 +426,20 @@ static void sctp_v6_from_sk(union sctp_addr *addr, struct sock *sk)
 {
 	addr->v6.sin6_family = AF_INET6;
 	addr->v6.sin6_port = 0;
-	addr->v6.sin6_addr = inet6_sk(sk)->rcv_saddr;
+	addr->v6.sin6_addr = sk->sk_v6_rcv_saddr;
 }
 
 /* Initialize sk->sk_rcv_saddr from sctp_addr. */
 static void sctp_v6_to_sk_saddr(union sctp_addr *addr, struct sock *sk)
 {
 	if (addr->sa.sa_family == AF_INET && sctp_sk(sk)->v4mapped) {
-		inet6_sk(sk)->rcv_saddr.s6_addr32[0] = 0;
-		inet6_sk(sk)->rcv_saddr.s6_addr32[1] = 0;
-		inet6_sk(sk)->rcv_saddr.s6_addr32[2] = htonl(0x0000ffff);
-		inet6_sk(sk)->rcv_saddr.s6_addr32[3] =
+		sk->sk_v6_rcv_saddr.s6_addr32[0] = 0;
+		sk->sk_v6_rcv_saddr.s6_addr32[1] = 0;
+		sk->sk_v6_rcv_saddr.s6_addr32[2] = htonl(0x0000ffff);
+		sk->sk_v6_rcv_saddr.s6_addr32[3] =
 			addr->v4.sin_addr.s_addr;
 	} else {
-		inet6_sk(sk)->rcv_saddr = addr->v6.sin6_addr;
+		sk->sk_v6_rcv_saddr = addr->v6.sin6_addr;
 	}
 }
 
@@ -447,12 +447,12 @@ static void sctp_v6_to_sk_saddr(union sctp_addr *addr, struct sock *sk)
 static void sctp_v6_to_sk_daddr(union sctp_addr *addr, struct sock *sk)
 {
 	if (addr->sa.sa_family == AF_INET && sctp_sk(sk)->v4mapped) {
-		inet6_sk(sk)->daddr.s6_addr32[0] = 0;
-		inet6_sk(sk)->daddr.s6_addr32[1] = 0;
-		inet6_sk(sk)->daddr.s6_addr32[2] = htonl(0x0000ffff);
-		inet6_sk(sk)->daddr.s6_addr32[3] = addr->v4.sin_addr.s_addr;
+		sk->sk_v6_daddr.s6_addr32[0] = 0;
+		sk->sk_v6_daddr.s6_addr32[1] = 0;
+		sk->sk_v6_daddr.s6_addr32[2] = htonl(0x0000ffff);
+		sk->sk_v6_daddr.s6_addr32[3] = addr->v4.sin_addr.s_addr;
 	} else {
-		inet6_sk(sk)->daddr = addr->v6.sin6_addr;
+		sk->sk_v6_daddr = addr->v6.sin6_addr;
 	}
 }
 
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 9c9caaa5e0d3..0045c7cf1e9e 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -294,7 +294,7 @@ static int svc_one_sock_name(struct svc_sock *svsk, char *buf, int remaining)
 	case PF_INET6:
 		len = snprintf(buf, remaining, "ipv6 %s %pI6 %d\n",
 				proto_name,
-				&inet6_sk(sk)->rcv_saddr,
+				&sk->sk_v6_rcv_saddr,
 				inet_sk(sk)->inet_num);
 		break;
 	default:
diff --git a/security/lsm_audit.c b/security/lsm_audit.c
index 8d8d97dbb389..80554fcf9fcc 100644
--- a/security/lsm_audit.c
+++ b/security/lsm_audit.c
@@ -304,12 +304,11 @@ static void dump_common_audit_data(struct audit_buffer *ab,
 			}
 			case AF_INET6: {
 				struct inet_sock *inet = inet_sk(sk);
-				struct ipv6_pinfo *inet6 = inet6_sk(sk);
 
-				print_ipv6_addr(ab, &inet6->rcv_saddr,
+				print_ipv6_addr(ab, &sk->sk_v6_rcv_saddr,
 						inet->inet_sport,
 						"laddr", "lport");
-				print_ipv6_addr(ab, &inet6->daddr,
+				print_ipv6_addr(ab, &sk->sk_v6_daddr,
 						inet->inet_dport,
 						"faddr", "fport");
 				break;
-- 
cgit v1.2.3


From b726b7dfb400c937546fa91cf8523dcb1aa2fc6e Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Mon, 7 Oct 2013 11:28:53 +0100
Subject: Revert "mm: sched: numa: Delay PTE scanning until a task is scheduled
 on a new node"

PTE scanning and NUMA hinting fault handling is expensive so commit
5bca2303 ("mm: sched: numa: Delay PTE scanning until a task is scheduled
on a new node") deferred the PTE scan until a task had been scheduled on
another node. The problem is that in the purely shared memory case that
this may never happen and no NUMA hinting fault information will be
captured. We are not ruling out the possibility that something better
can be done here but for now, this patch needs to be reverted and depend
entirely on the scan_delay to avoid punishing short-lived processes.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1381141781-10992-16-git-send-email-mgorman@suse.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/mm_types.h | 10 ----------
 kernel/fork.c            |  3 ---
 kernel/sched/fair.c      | 18 ------------------
 kernel/sched/features.h  |  4 +---
 4 files changed, 1 insertion(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index d9851eeb6e1d..b7adf1d4310c 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -428,20 +428,10 @@ struct mm_struct {
 
 	/* numa_scan_seq prevents two threads setting pte_numa */
 	int numa_scan_seq;
-
-	/*
-	 * The first node a task was scheduled on. If a task runs on
-	 * a different node than Make PTE Scan Go Now.
-	 */
-	int first_nid;
 #endif
 	struct uprobes_state uprobes_state;
 };
 
-/* first nid will either be a valid NID or one of these values */
-#define NUMA_PTE_SCAN_INIT	-1
-#define NUMA_PTE_SCAN_ACTIVE	-2
-
 static inline void mm_init_cpumask(struct mm_struct *mm)
 {
 #ifdef CONFIG_CPUMASK_OFFSTACK
diff --git a/kernel/fork.c b/kernel/fork.c
index 086fe73ad6bd..7192d91b5415 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -816,9 +816,6 @@ struct mm_struct *dup_mm(struct task_struct *tsk)
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 	mm->pmd_huge_pte = NULL;
-#endif
-#ifdef CONFIG_NUMA_BALANCING
-	mm->first_nid = NUMA_PTE_SCAN_INIT;
 #endif
 	if (!mm_init(mm, tsk))
 		goto fail_nomem;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 464207fc9eef..49b11faa2961 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -900,24 +900,6 @@ void task_numa_work(struct callback_head *work)
 	if (p->flags & PF_EXITING)
 		return;
 
-	/*
-	 * We do not care about task placement until a task runs on a node
-	 * other than the first one used by the address space. This is
-	 * largely because migrations are driven by what CPU the task
-	 * is running on. If it's never scheduled on another node, it'll
-	 * not migrate so why bother trapping the fault.
-	 */
-	if (mm->first_nid == NUMA_PTE_SCAN_INIT)
-		mm->first_nid = numa_node_id();
-	if (mm->first_nid != NUMA_PTE_SCAN_ACTIVE) {
-		/* Are we running on a new node yet? */
-		if (numa_node_id() == mm->first_nid &&
-		    !sched_feat_numa(NUMA_FORCE))
-			return;
-
-		mm->first_nid = NUMA_PTE_SCAN_ACTIVE;
-	}
-
 	/*
 	 * Reset the scan period if enough time has gone by. Objective is that
 	 * scanning will be reduced if pages are properly placed. As tasks
diff --git a/kernel/sched/features.h b/kernel/sched/features.h
index 99399f8e4799..cba5c616a157 100644
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -63,10 +63,8 @@ SCHED_FEAT(LB_MIN, false)
 /*
  * Apply the automatic NUMA scheduling policy. Enabled automatically
  * at runtime if running on a NUMA machine. Can be controlled via
- * numa_balancing=. Allow PTE scanning to be forced on UMA machines
- * for debugging the core machinery.
+ * numa_balancing=
  */
 #ifdef CONFIG_NUMA_BALANCING
 SCHED_FEAT(NUMA,	false)
-SCHED_FEAT(NUMA_FORCE,	false)
 #endif
-- 
cgit v1.2.3


From 598f0ec0bc996e90a806ee9564af919ea5aad401 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Mon, 7 Oct 2013 11:28:55 +0100
Subject: sched/numa: Set the scan rate proportional to the memory usage of the
 task being scanned

The NUMA PTE scan rate is controlled with a combination of the
numa_balancing_scan_period_min, numa_balancing_scan_period_max and
numa_balancing_scan_size. This scan rate is independent of the size
of the task and as an aside it is further complicated by the fact that
numa_balancing_scan_size controls how many pages are marked pte_numa and
not how much virtual memory is scanned.

In combination, it is almost impossible to meaningfully tune the min and
max scan periods and reasoning about performance is complex when the time
to complete a full scan is is partially a function of the tasks memory
size. This patch alters the semantic of the min and max tunables to be
about tuning the length time it takes to complete a scan of a tasks occupied
virtual address space. Conceptually this is a lot easier to understand. There
is a "sanity" check to ensure the scan rate is never extremely fast based on
the amount of virtual memory that should be scanned in a second. The default
of 2.5G seems arbitrary but it is to have the maximum scan rate after the
patch roughly match the maximum scan rate before the patch was applied.

On a similar note, numa_scan_period is in milliseconds and not
jiffies. Properly placed pages slow the scanning rate but adding 10 jiffies
to numa_scan_period means that the rate scanning slows depends on HZ which
is confusing. Get rid of the jiffies_to_msec conversion and treat it as ms.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1381141781-10992-18-git-send-email-mgorman@suse.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 Documentation/sysctl/kernel.txt | 11 +++---
 include/linux/sched.h           |  1 +
 kernel/sched/fair.c             | 88 +++++++++++++++++++++++++++++++++++------
 3 files changed, 83 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index 1428c6659254..8cd7e5fc79da 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -403,15 +403,16 @@ workload pattern changes and minimises performance impact due to remote
 memory accesses. These sysctls control the thresholds for scan delays and
 the number of pages scanned.
 
-numa_balancing_scan_period_min_ms is the minimum delay in milliseconds
-between scans. It effectively controls the maximum scanning rate for
-each task.
+numa_balancing_scan_period_min_ms is the minimum time in milliseconds to
+scan a tasks virtual memory. It effectively controls the maximum scanning
+rate for each task.
 
 numa_balancing_scan_delay_ms is the starting "scan delay" used for a task
 when it initially forks.
 
-numa_balancing_scan_period_max_ms is the maximum delay between scans. It
-effectively controls the minimum scanning rate for each task.
+numa_balancing_scan_period_max_ms is the maximum time in milliseconds to
+scan a tasks virtual memory. It effectively controls the minimum scanning
+rate for each task.
 
 numa_balancing_scan_size_mb is how many megabytes worth of pages are
 scanned for a given scan.
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 2ac5285db434..fdcb4c855072 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1339,6 +1339,7 @@ struct task_struct {
 	int numa_scan_seq;
 	int numa_migrate_seq;
 	unsigned int numa_scan_period;
+	unsigned int numa_scan_period_max;
 	u64 node_stamp;			/* migration stamp  */
 	struct callback_head numa_work;
 #endif /* CONFIG_NUMA_BALANCING */
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 0966f0c16f1b..e08d757720de 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -818,11 +818,13 @@ update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
 
 #ifdef CONFIG_NUMA_BALANCING
 /*
- * numa task sample period in ms
+ * Approximate time to scan a full NUMA task in ms. The task scan period is
+ * calculated based on the tasks virtual memory size and
+ * numa_balancing_scan_size.
  */
-unsigned int sysctl_numa_balancing_scan_period_min = 100;
-unsigned int sysctl_numa_balancing_scan_period_max = 100*50;
-unsigned int sysctl_numa_balancing_scan_period_reset = 100*600;
+unsigned int sysctl_numa_balancing_scan_period_min = 1000;
+unsigned int sysctl_numa_balancing_scan_period_max = 60000;
+unsigned int sysctl_numa_balancing_scan_period_reset = 60000;
 
 /* Portion of address space to scan in MB */
 unsigned int sysctl_numa_balancing_scan_size = 256;
@@ -830,6 +832,51 @@ unsigned int sysctl_numa_balancing_scan_size = 256;
 /* Scan @scan_size MB every @scan_period after an initial @scan_delay in ms */
 unsigned int sysctl_numa_balancing_scan_delay = 1000;
 
+static unsigned int task_nr_scan_windows(struct task_struct *p)
+{
+	unsigned long rss = 0;
+	unsigned long nr_scan_pages;
+
+	/*
+	 * Calculations based on RSS as non-present and empty pages are skipped
+	 * by the PTE scanner and NUMA hinting faults should be trapped based
+	 * on resident pages
+	 */
+	nr_scan_pages = sysctl_numa_balancing_scan_size << (20 - PAGE_SHIFT);
+	rss = get_mm_rss(p->mm);
+	if (!rss)
+		rss = nr_scan_pages;
+
+	rss = round_up(rss, nr_scan_pages);
+	return rss / nr_scan_pages;
+}
+
+/* For sanitys sake, never scan more PTEs than MAX_SCAN_WINDOW MB/sec. */
+#define MAX_SCAN_WINDOW 2560
+
+static unsigned int task_scan_min(struct task_struct *p)
+{
+	unsigned int scan, floor;
+	unsigned int windows = 1;
+
+	if (sysctl_numa_balancing_scan_size < MAX_SCAN_WINDOW)
+		windows = MAX_SCAN_WINDOW / sysctl_numa_balancing_scan_size;
+	floor = 1000 / windows;
+
+	scan = sysctl_numa_balancing_scan_period_min / task_nr_scan_windows(p);
+	return max_t(unsigned int, floor, scan);
+}
+
+static unsigned int task_scan_max(struct task_struct *p)
+{
+	unsigned int smin = task_scan_min(p);
+	unsigned int smax;
+
+	/* Watch for min being lower than max due to floor calculations */
+	smax = sysctl_numa_balancing_scan_period_max / task_nr_scan_windows(p);
+	return max(smin, smax);
+}
+
 static void task_numa_placement(struct task_struct *p)
 {
 	int seq;
@@ -840,6 +887,7 @@ static void task_numa_placement(struct task_struct *p)
 	if (p->numa_scan_seq == seq)
 		return;
 	p->numa_scan_seq = seq;
+	p->numa_scan_period_max = task_scan_max(p);
 
 	/* FIXME: Scheduling placement policy hints go here */
 }
@@ -860,9 +908,14 @@ void task_numa_fault(int node, int pages, bool migrated)
 	 * If pages are properly placed (did not migrate) then scan slower.
 	 * This is reset periodically in case of phase changes
 	 */
-        if (!migrated)
-		p->numa_scan_period = min(sysctl_numa_balancing_scan_period_max,
-			p->numa_scan_period + jiffies_to_msecs(10));
+	if (!migrated) {
+		/* Initialise if necessary */
+		if (!p->numa_scan_period_max)
+			p->numa_scan_period_max = task_scan_max(p);
+
+		p->numa_scan_period = min(p->numa_scan_period_max,
+			p->numa_scan_period + 10);
+	}
 
 	task_numa_placement(p);
 }
@@ -884,6 +937,7 @@ void task_numa_work(struct callback_head *work)
 	struct mm_struct *mm = p->mm;
 	struct vm_area_struct *vma;
 	unsigned long start, end;
+	unsigned long nr_pte_updates = 0;
 	long pages;
 
 	WARN_ON_ONCE(p != container_of(work, struct task_struct, numa_work));
@@ -915,7 +969,7 @@ void task_numa_work(struct callback_head *work)
 	 */
 	migrate = mm->numa_next_reset;
 	if (time_after(now, migrate)) {
-		p->numa_scan_period = sysctl_numa_balancing_scan_period_min;
+		p->numa_scan_period = task_scan_min(p);
 		next_scan = now + msecs_to_jiffies(sysctl_numa_balancing_scan_period_reset);
 		xchg(&mm->numa_next_reset, next_scan);
 	}
@@ -927,8 +981,10 @@ void task_numa_work(struct callback_head *work)
 	if (time_before(now, migrate))
 		return;
 
-	if (p->numa_scan_period == 0)
-		p->numa_scan_period = sysctl_numa_balancing_scan_period_min;
+	if (p->numa_scan_period == 0) {
+		p->numa_scan_period_max = task_scan_max(p);
+		p->numa_scan_period = task_scan_min(p);
+	}
 
 	next_scan = now + msecs_to_jiffies(p->numa_scan_period);
 	if (cmpxchg(&mm->numa_next_scan, migrate, next_scan) != migrate)
@@ -965,7 +1021,15 @@ void task_numa_work(struct callback_head *work)
 			start = max(start, vma->vm_start);
 			end = ALIGN(start + (pages << PAGE_SHIFT), HPAGE_SIZE);
 			end = min(end, vma->vm_end);
-			pages -= change_prot_numa(vma, start, end);
+			nr_pte_updates += change_prot_numa(vma, start, end);
+
+			/*
+			 * Scan sysctl_numa_balancing_scan_size but ensure that
+			 * at least one PTE is updated so that unused virtual
+			 * address space is quickly skipped.
+			 */
+			if (nr_pte_updates)
+				pages -= (end - start) >> PAGE_SHIFT;
 
 			start = end;
 			if (pages <= 0)
@@ -1012,7 +1076,7 @@ void task_tick_numa(struct rq *rq, struct task_struct *curr)
 
 	if (now - curr->node_stamp > period) {
 		if (!curr->node_stamp)
-			curr->numa_scan_period = sysctl_numa_balancing_scan_period_min;
+			curr->numa_scan_period = task_scan_min(curr);
 		curr->node_stamp += period;
 
 		if (!time_before(jiffies, curr->mm->numa_next_scan)) {
-- 
cgit v1.2.3


From f809ca9a554dda49fb264c79e31c722e0b063ff8 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Mon, 7 Oct 2013 11:28:57 +0100
Subject: sched/numa: Track NUMA hinting faults on per-node basis

This patch tracks what nodes numa hinting faults were incurred on.
This information is later used to schedule a task on the node storing
the pages most frequently faulted by the task.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1381141781-10992-20-git-send-email-mgorman@suse.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h |  2 ++
 kernel/sched/core.c   |  3 +++
 kernel/sched/fair.c   | 11 ++++++++++-
 kernel/sched/sched.h  | 12 ++++++++++++
 4 files changed, 27 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index fdcb4c855072..a810e95bca2b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1342,6 +1342,8 @@ struct task_struct {
 	unsigned int numa_scan_period_max;
 	u64 node_stamp;			/* migration stamp  */
 	struct callback_head numa_work;
+
+	unsigned long *numa_faults;
 #endif /* CONFIG_NUMA_BALANCING */
 
 	struct rcu_head rcu;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index aee7e4dcbbf3..6808d35fd7ed 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1634,6 +1634,7 @@ static void __sched_fork(struct task_struct *p)
 	p->numa_migrate_seq = p->mm ? p->mm->numa_scan_seq - 1 : 0;
 	p->numa_scan_period = sysctl_numa_balancing_scan_delay;
 	p->numa_work.next = &p->numa_work;
+	p->numa_faults = NULL;
 #endif /* CONFIG_NUMA_BALANCING */
 }
 
@@ -1892,6 +1893,8 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
 	if (mm)
 		mmdrop(mm);
 	if (unlikely(prev_state == TASK_DEAD)) {
+		task_numa_free(prev);
+
 		/*
 		 * Remove function-return probe instances associated with this
 		 * task and put them back on the free list.
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index c6c330245f7f..0bb3e0aa110b 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -902,7 +902,14 @@ void task_numa_fault(int node, int pages, bool migrated)
 	if (!numabalancing_enabled)
 		return;
 
-	/* FIXME: Allocate task-specific structure for placement policy here */
+	/* Allocate buffer to track faults on a per-node basis */
+	if (unlikely(!p->numa_faults)) {
+		int size = sizeof(*p->numa_faults) * nr_node_ids;
+
+		p->numa_faults = kzalloc(size, GFP_KERNEL|__GFP_NOWARN);
+		if (!p->numa_faults)
+			return;
+	}
 
 	/*
 	 * If pages are properly placed (did not migrate) then scan slower.
@@ -918,6 +925,8 @@ void task_numa_fault(int node, int pages, bool migrated)
 	}
 
 	task_numa_placement(p);
+
+	p->numa_faults[node] += pages;
 }
 
 static void reset_ptenuma_scan(struct task_struct *p)
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index e82484db7699..199099c7aa22 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -6,6 +6,7 @@
 #include <linux/spinlock.h>
 #include <linux/stop_machine.h>
 #include <linux/tick.h>
+#include <linux/slab.h>
 
 #include "cpupri.h"
 #include "cpuacct.h"
@@ -555,6 +556,17 @@ static inline u64 rq_clock_task(struct rq *rq)
 	return rq->clock_task;
 }
 
+#ifdef CONFIG_NUMA_BALANCING
+static inline void task_numa_free(struct task_struct *p)
+{
+	kfree(p->numa_faults);
+}
+#else /* CONFIG_NUMA_BALANCING */
+static inline void task_numa_free(struct task_struct *p)
+{
+}
+#endif /* CONFIG_NUMA_BALANCING */
+
 #ifdef CONFIG_SMP
 
 #define rcu_dereference_check_sched_domain(p) \
-- 
cgit v1.2.3


From 688b7585d16ab57a17aa4422a3b290b3a55fa679 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Mon, 7 Oct 2013 11:28:58 +0100
Subject: sched/numa: Select a preferred node with the most numa hinting faults

This patch selects a preferred node for a task to run on based on the
NUMA hinting faults. This information is later used to migrate tasks
towards the node during balancing.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1381141781-10992-21-git-send-email-mgorman@suse.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h |  1 +
 kernel/sched/core.c   |  1 +
 kernel/sched/fair.c   | 17 +++++++++++++++--
 3 files changed, 17 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index a810e95bca2b..b1fc75e7187b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1344,6 +1344,7 @@ struct task_struct {
 	struct callback_head numa_work;
 
 	unsigned long *numa_faults;
+	int numa_preferred_nid;
 #endif /* CONFIG_NUMA_BALANCING */
 
 	struct rcu_head rcu;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 6808d35fd7ed..d15cd70f85b5 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1633,6 +1633,7 @@ static void __sched_fork(struct task_struct *p)
 	p->numa_scan_seq = p->mm ? p->mm->numa_scan_seq : 0;
 	p->numa_migrate_seq = p->mm ? p->mm->numa_scan_seq - 1 : 0;
 	p->numa_scan_period = sysctl_numa_balancing_scan_delay;
+	p->numa_preferred_nid = -1;
 	p->numa_work.next = &p->numa_work;
 	p->numa_faults = NULL;
 #endif /* CONFIG_NUMA_BALANCING */
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 0bb3e0aa110b..9efd34f63e81 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -879,7 +879,8 @@ static unsigned int task_scan_max(struct task_struct *p)
 
 static void task_numa_placement(struct task_struct *p)
 {
-	int seq;
+	int seq, nid, max_nid = -1;
+	unsigned long max_faults = 0;
 
 	if (!p->mm)	/* for example, ksmd faulting in a user's mm */
 		return;
@@ -889,7 +890,19 @@ static void task_numa_placement(struct task_struct *p)
 	p->numa_scan_seq = seq;
 	p->numa_scan_period_max = task_scan_max(p);
 
-	/* FIXME: Scheduling placement policy hints go here */
+	/* Find the node with the highest number of faults */
+	for_each_online_node(nid) {
+		unsigned long faults = p->numa_faults[nid];
+		p->numa_faults[nid] >>= 1;
+		if (faults > max_faults) {
+			max_faults = faults;
+			max_nid = nid;
+		}
+	}
+
+	/* Update the tasks preferred node if necessary */
+	if (max_faults && max_nid != p->numa_preferred_nid)
+		p->numa_preferred_nid = max_nid;
 }
 
 /*
-- 
cgit v1.2.3


From 745d61476ddb737aad3495fa6d9a8f8c2ee59f86 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Mon, 7 Oct 2013 11:28:59 +0100
Subject: sched/numa: Update NUMA hinting faults once per scan

NUMA hinting fault counts and placement decisions are both recorded in the
same array which distorts the samples in an unpredictable fashion. The values
linearly accumulate during the scan and then decay creating a sawtooth-like
pattern in the per-node counts. It also means that placement decisions are
time sensitive. At best it means that it is very difficult to state that
the buffer holds a decaying average of past faulting behaviour. At worst,
it can confuse the load balancer if it sees one node with an artifically high
count due to very recent faulting activity and may create a bouncing effect.

This patch adds a second array. numa_faults stores the historical data
which is used for placement decisions. numa_faults_buffer holds the
fault activity during the current scan window. When the scan completes,
numa_faults decays and the values from numa_faults_buffer are copied
across.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1381141781-10992-22-git-send-email-mgorman@suse.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 13 +++++++++++++
 kernel/sched/core.c   |  1 +
 kernel/sched/fair.c   | 16 +++++++++++++---
 3 files changed, 27 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index b1fc75e7187b..a463bc3ad437 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1343,7 +1343,20 @@ struct task_struct {
 	u64 node_stamp;			/* migration stamp  */
 	struct callback_head numa_work;
 
+	/*
+	 * Exponential decaying average of faults on a per-node basis.
+	 * Scheduling placement decisions are made based on the these counts.
+	 * The values remain static for the duration of a PTE scan
+	 */
 	unsigned long *numa_faults;
+
+	/*
+	 * numa_faults_buffer records faults per node during the current
+	 * scan window. When the scan completes, the counts in numa_faults
+	 * decay and these values are copied.
+	 */
+	unsigned long *numa_faults_buffer;
+
 	int numa_preferred_nid;
 #endif /* CONFIG_NUMA_BALANCING */
 
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index d15cd70f85b5..064a0af44540 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1636,6 +1636,7 @@ static void __sched_fork(struct task_struct *p)
 	p->numa_preferred_nid = -1;
 	p->numa_work.next = &p->numa_work;
 	p->numa_faults = NULL;
+	p->numa_faults_buffer = NULL;
 #endif /* CONFIG_NUMA_BALANCING */
 }
 
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 9efd34f63e81..3abc651bc38a 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -892,8 +892,14 @@ static void task_numa_placement(struct task_struct *p)
 
 	/* Find the node with the highest number of faults */
 	for_each_online_node(nid) {
-		unsigned long faults = p->numa_faults[nid];
+		unsigned long faults;
+
+		/* Decay existing window and copy faults since last scan */
 		p->numa_faults[nid] >>= 1;
+		p->numa_faults[nid] += p->numa_faults_buffer[nid];
+		p->numa_faults_buffer[nid] = 0;
+
+		faults = p->numa_faults[nid];
 		if (faults > max_faults) {
 			max_faults = faults;
 			max_nid = nid;
@@ -919,9 +925,13 @@ void task_numa_fault(int node, int pages, bool migrated)
 	if (unlikely(!p->numa_faults)) {
 		int size = sizeof(*p->numa_faults) * nr_node_ids;
 
-		p->numa_faults = kzalloc(size, GFP_KERNEL|__GFP_NOWARN);
+		/* numa_faults and numa_faults_buffer share the allocation */
+		p->numa_faults = kzalloc(size * 2, GFP_KERNEL|__GFP_NOWARN);
 		if (!p->numa_faults)
 			return;
+
+		BUG_ON(p->numa_faults_buffer);
+		p->numa_faults_buffer = p->numa_faults + nr_node_ids;
 	}
 
 	/*
@@ -939,7 +949,7 @@ void task_numa_fault(int node, int pages, bool migrated)
 
 	task_numa_placement(p);
 
-	p->numa_faults[node] += pages;
+	p->numa_faults_buffer[node] += pages;
 }
 
 static void reset_ptenuma_scan(struct task_struct *p)
-- 
cgit v1.2.3


From 3a7053b3224f4a8b0e8184166190076593621617 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Mon, 7 Oct 2013 11:29:00 +0100
Subject: sched/numa: Favour moving tasks towards the preferred node

This patch favours moving tasks towards NUMA node that recorded a higher
number of NUMA faults during active load balancing.  Ideally this is
self-reinforcing as the longer the task runs on that node, the more faults
it should incur causing task_numa_placement to keep the task running on that
node. In reality a big weakness is that the nodes CPUs can be overloaded
and it would be more efficient to queue tasks on an idle node and migrate
to the new node. This would require additional smarts in the balancer so
for now the balancer will simply prefer to place the task on the preferred
node for a PTE scans which is controlled by the numa_balancing_settle_count
sysctl. Once the settle_count number of scans has complete the schedule
is free to place the task on an alternative node if the load is imbalanced.

[srikar@linux.vnet.ibm.com: Fixed statistics]
Signed-off-by: Mel Gorman <mgorman@suse.de>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
[ Tunable and use higher faults instead of preferred. ]
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1381141781-10992-23-git-send-email-mgorman@suse.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 Documentation/sysctl/kernel.txt |  8 +++++-
 include/linux/sched.h           |  1 +
 kernel/sched/core.c             |  3 +-
 kernel/sched/fair.c             | 63 ++++++++++++++++++++++++++++++++++++++---
 kernel/sched/features.h         |  7 +++++
 kernel/sysctl.c                 |  7 +++++
 6 files changed, 83 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index 8cd7e5fc79da..d48bca45b6f2 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -375,7 +375,8 @@ feature should be disabled. Otherwise, if the system overhead from the
 feature is too high then the rate the kernel samples for NUMA hinting
 faults may be controlled by the numa_balancing_scan_period_min_ms,
 numa_balancing_scan_delay_ms, numa_balancing_scan_period_reset,
-numa_balancing_scan_period_max_ms and numa_balancing_scan_size_mb sysctls.
+numa_balancing_scan_period_max_ms, numa_balancing_scan_size_mb and
+numa_balancing_settle_count sysctls.
 
 ==============================================================
 
@@ -420,6 +421,11 @@ scanned for a given scan.
 numa_balancing_scan_period_reset is a blunt instrument that controls how
 often a tasks scan delay is reset to detect sudden changes in task behaviour.
 
+numa_balancing_settle_count is how many scan periods must complete before
+the schedule balancer stops pushing the task towards a preferred node. This
+gives the scheduler a chance to place the task on an alternative node if the
+preferred node is overloaded.
+
 ==============================================================
 
 osrelease, ostype & version:
diff --git a/include/linux/sched.h b/include/linux/sched.h
index a463bc3ad437..aecdc5a18773 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -777,6 +777,7 @@ enum cpu_idle_type {
 #define SD_ASYM_PACKING		0x0800  /* Place busy groups earlier in the domain */
 #define SD_PREFER_SIBLING	0x1000	/* Prefer to place tasks in a sibling domain */
 #define SD_OVERLAP		0x2000	/* sched_domains of this level overlap */
+#define SD_NUMA			0x4000	/* cross-node balancing */
 
 extern int __weak arch_sd_sibiling_asym_packing(void);
 
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 064a0af44540..b7e6b6f9c5f6 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1631,7 +1631,7 @@ static void __sched_fork(struct task_struct *p)
 
 	p->node_stamp = 0ULL;
 	p->numa_scan_seq = p->mm ? p->mm->numa_scan_seq : 0;
-	p->numa_migrate_seq = p->mm ? p->mm->numa_scan_seq - 1 : 0;
+	p->numa_migrate_seq = 0;
 	p->numa_scan_period = sysctl_numa_balancing_scan_delay;
 	p->numa_preferred_nid = -1;
 	p->numa_work.next = &p->numa_work;
@@ -5656,6 +5656,7 @@ sd_numa_init(struct sched_domain_topology_level *tl, int cpu)
 					| 0*SD_SHARE_PKG_RESOURCES
 					| 1*SD_SERIALIZE
 					| 0*SD_PREFER_SIBLING
+					| 1*SD_NUMA
 					| sd_local_flags(level)
 					,
 		.last_balance		= jiffies,
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 3abc651bc38a..6ffddca687fe 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -877,6 +877,15 @@ static unsigned int task_scan_max(struct task_struct *p)
 	return max(smin, smax);
 }
 
+/*
+ * Once a preferred node is selected the scheduler balancer will prefer moving
+ * a task to that node for sysctl_numa_balancing_settle_count number of PTE
+ * scans. This will give the process the chance to accumulate more faults on
+ * the preferred node but still allow the scheduler to move the task again if
+ * the nodes CPUs are overloaded.
+ */
+unsigned int sysctl_numa_balancing_settle_count __read_mostly = 3;
+
 static void task_numa_placement(struct task_struct *p)
 {
 	int seq, nid, max_nid = -1;
@@ -888,6 +897,7 @@ static void task_numa_placement(struct task_struct *p)
 	if (p->numa_scan_seq == seq)
 		return;
 	p->numa_scan_seq = seq;
+	p->numa_migrate_seq++;
 	p->numa_scan_period_max = task_scan_max(p);
 
 	/* Find the node with the highest number of faults */
@@ -907,8 +917,10 @@ static void task_numa_placement(struct task_struct *p)
 	}
 
 	/* Update the tasks preferred node if necessary */
-	if (max_faults && max_nid != p->numa_preferred_nid)
+	if (max_faults && max_nid != p->numa_preferred_nid) {
 		p->numa_preferred_nid = max_nid;
+		p->numa_migrate_seq = 0;
+	}
 }
 
 /*
@@ -4071,6 +4083,38 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd)
 	return delta < (s64)sysctl_sched_migration_cost;
 }
 
+#ifdef CONFIG_NUMA_BALANCING
+/* Returns true if the destination node has incurred more faults */
+static bool migrate_improves_locality(struct task_struct *p, struct lb_env *env)
+{
+	int src_nid, dst_nid;
+
+	if (!sched_feat(NUMA_FAVOUR_HIGHER) || !p->numa_faults ||
+	    !(env->sd->flags & SD_NUMA)) {
+		return false;
+	}
+
+	src_nid = cpu_to_node(env->src_cpu);
+	dst_nid = cpu_to_node(env->dst_cpu);
+
+	if (src_nid == dst_nid ||
+	    p->numa_migrate_seq >= sysctl_numa_balancing_settle_count)
+		return false;
+
+	if (dst_nid == p->numa_preferred_nid ||
+	    p->numa_faults[dst_nid] > p->numa_faults[src_nid])
+		return true;
+
+	return false;
+}
+#else
+static inline bool migrate_improves_locality(struct task_struct *p,
+					     struct lb_env *env)
+{
+	return false;
+}
+#endif
+
 /*
  * can_migrate_task - may task p from runqueue rq be migrated to this_cpu?
  */
@@ -4128,11 +4172,22 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
 
 	/*
 	 * Aggressive migration if:
-	 * 1) task is cache cold, or
-	 * 2) too many balance attempts have failed.
+	 * 1) destination numa is preferred
+	 * 2) task is cache cold, or
+	 * 3) too many balance attempts have failed.
 	 */
-
 	tsk_cache_hot = task_hot(p, rq_clock_task(env->src_rq), env->sd);
+
+	if (migrate_improves_locality(p, env)) {
+#ifdef CONFIG_SCHEDSTATS
+		if (tsk_cache_hot) {
+			schedstat_inc(env->sd, lb_hot_gained[env->idle]);
+			schedstat_inc(p, se.statistics.nr_forced_migrations);
+		}
+#endif
+		return 1;
+	}
+
 	if (!tsk_cache_hot ||
 		env->sd->nr_balance_failed > env->sd->cache_nice_tries) {
 
diff --git a/kernel/sched/features.h b/kernel/sched/features.h
index cba5c616a157..d9278ce2c4b4 100644
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -67,4 +67,11 @@ SCHED_FEAT(LB_MIN, false)
  */
 #ifdef CONFIG_NUMA_BALANCING
 SCHED_FEAT(NUMA,	false)
+
+/*
+ * NUMA_FAVOUR_HIGHER will favor moving tasks towards nodes where a
+ * higher number of hinting faults are recorded during active load
+ * balancing.
+ */
+SCHED_FEAT(NUMA_FAVOUR_HIGHER, true)
 #endif
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index b2f06f3c6a3f..42f616a74f40 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -391,6 +391,13 @@ static struct ctl_table kern_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
+	{
+		.procname       = "numa_balancing_settle_count",
+		.data           = &sysctl_numa_balancing_settle_count,
+		.maxlen         = sizeof(unsigned int),
+		.mode           = 0644,
+		.proc_handler   = proc_dointvec,
+	},
 #endif /* CONFIG_NUMA_BALANCING */
 #endif /* CONFIG_SCHED_DEBUG */
 	{
-- 
cgit v1.2.3


From ac8e895bd260cb8bb19ade6a3abd44e7abe9a01d Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Mon, 7 Oct 2013 11:29:03 +0100
Subject: sched/numa: Add infrastructure for split shared/private accounting of
 NUMA hinting faults

Ideally it would be possible to distinguish between NUMA hinting faults
that are private to a task and those that are shared.  This patch prepares
infrastructure for separately accounting shared and private faults by
allocating the necessary buffers and passing in relevant information. For
now, all faults are treated as private and detection will be introduced
later.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1381141781-10992-26-git-send-email-mgorman@suse.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h |  5 +++--
 kernel/sched/fair.c   | 46 +++++++++++++++++++++++++++++++++++-----------
 mm/huge_memory.c      |  5 +++--
 mm/memory.c           |  8 ++++++--
 4 files changed, 47 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index aecdc5a18773..d946195eec10 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1445,10 +1445,11 @@ struct task_struct {
 #define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed)
 
 #ifdef CONFIG_NUMA_BALANCING
-extern void task_numa_fault(int node, int pages, bool migrated);
+extern void task_numa_fault(int last_node, int node, int pages, bool migrated);
 extern void set_numabalancing_state(bool enabled);
 #else
-static inline void task_numa_fault(int node, int pages, bool migrated)
+static inline void task_numa_fault(int last_node, int node, int pages,
+				   bool migrated)
 {
 }
 static inline void set_numabalancing_state(bool enabled)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 8b15e9e1d1b8..89eeb89fd99a 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -886,6 +886,20 @@ static unsigned int task_scan_max(struct task_struct *p)
  */
 unsigned int sysctl_numa_balancing_settle_count __read_mostly = 3;
 
+static inline int task_faults_idx(int nid, int priv)
+{
+	return 2 * nid + priv;
+}
+
+static inline unsigned long task_faults(struct task_struct *p, int nid)
+{
+	if (!p->numa_faults)
+		return 0;
+
+	return p->numa_faults[task_faults_idx(nid, 0)] +
+		p->numa_faults[task_faults_idx(nid, 1)];
+}
+
 static unsigned long weighted_cpuload(const int cpu);
 
 
@@ -928,13 +942,19 @@ static void task_numa_placement(struct task_struct *p)
 	/* Find the node with the highest number of faults */
 	for_each_online_node(nid) {
 		unsigned long faults;
+		int priv, i;
 
-		/* Decay existing window and copy faults since last scan */
-		p->numa_faults[nid] >>= 1;
-		p->numa_faults[nid] += p->numa_faults_buffer[nid];
-		p->numa_faults_buffer[nid] = 0;
+		for (priv = 0; priv < 2; priv++) {
+			i = task_faults_idx(nid, priv);
 
-		faults = p->numa_faults[nid];
+			/* Decay existing window, copy faults since last scan */
+			p->numa_faults[i] >>= 1;
+			p->numa_faults[i] += p->numa_faults_buffer[i];
+			p->numa_faults_buffer[i] = 0;
+		}
+
+		/* Find maximum private faults */
+		faults = p->numa_faults[task_faults_idx(nid, 1)];
 		if (faults > max_faults) {
 			max_faults = faults;
 			max_nid = nid;
@@ -970,16 +990,20 @@ static void task_numa_placement(struct task_struct *p)
 /*
  * Got a PROT_NONE fault for a page on @node.
  */
-void task_numa_fault(int node, int pages, bool migrated)
+void task_numa_fault(int last_nid, int node, int pages, bool migrated)
 {
 	struct task_struct *p = current;
+	int priv;
 
 	if (!numabalancing_enabled)
 		return;
 
+	/* For now, do not attempt to detect private/shared accesses */
+	priv = 1;
+
 	/* Allocate buffer to track faults on a per-node basis */
 	if (unlikely(!p->numa_faults)) {
-		int size = sizeof(*p->numa_faults) * nr_node_ids;
+		int size = sizeof(*p->numa_faults) * 2 * nr_node_ids;
 
 		/* numa_faults and numa_faults_buffer share the allocation */
 		p->numa_faults = kzalloc(size * 2, GFP_KERNEL|__GFP_NOWARN);
@@ -987,7 +1011,7 @@ void task_numa_fault(int node, int pages, bool migrated)
 			return;
 
 		BUG_ON(p->numa_faults_buffer);
-		p->numa_faults_buffer = p->numa_faults + nr_node_ids;
+		p->numa_faults_buffer = p->numa_faults + (2 * nr_node_ids);
 	}
 
 	/*
@@ -1005,7 +1029,7 @@ void task_numa_fault(int node, int pages, bool migrated)
 
 	task_numa_placement(p);
 
-	p->numa_faults_buffer[node] += pages;
+	p->numa_faults_buffer[task_faults_idx(node, priv)] += pages;
 }
 
 static void reset_ptenuma_scan(struct task_struct *p)
@@ -4146,7 +4170,7 @@ static bool migrate_improves_locality(struct task_struct *p, struct lb_env *env)
 		return false;
 
 	if (dst_nid == p->numa_preferred_nid ||
-	    p->numa_faults[dst_nid] > p->numa_faults[src_nid])
+	    task_faults(p, dst_nid) > task_faults(p, src_nid))
 		return true;
 
 	return false;
@@ -4170,7 +4194,7 @@ static bool migrate_degrades_locality(struct task_struct *p, struct lb_env *env)
 	    p->numa_migrate_seq >= sysctl_numa_balancing_settle_count)
 		return false;
 
-	if (p->numa_faults[dst_nid] < p->numa_faults[src_nid])
+	if (task_faults(p, dst_nid) < task_faults(p, src_nid))
 		return true;
 
 	return false;
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 8677dbf31c2e..914216733e0a 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1282,7 +1282,7 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	struct page *page;
 	unsigned long haddr = addr & HPAGE_PMD_MASK;
 	int page_nid = -1, this_nid = numa_node_id();
-	int target_nid;
+	int target_nid, last_nid = -1;
 	bool page_locked;
 	bool migrated = false;
 
@@ -1293,6 +1293,7 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	page = pmd_page(pmd);
 	BUG_ON(is_huge_zero_page(page));
 	page_nid = page_to_nid(page);
+	last_nid = page_nid_last(page);
 	count_vm_numa_event(NUMA_HINT_FAULTS);
 	if (page_nid == this_nid)
 		count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL);
@@ -1361,7 +1362,7 @@ out:
 		page_unlock_anon_vma_read(anon_vma);
 
 	if (page_nid != -1)
-		task_numa_fault(page_nid, HPAGE_PMD_NR, migrated);
+		task_numa_fault(last_nid, page_nid, HPAGE_PMD_NR, migrated);
 
 	return 0;
 }
diff --git a/mm/memory.c b/mm/memory.c
index ed51f15136ee..24bc9b848af6 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3536,6 +3536,7 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	struct page *page = NULL;
 	spinlock_t *ptl;
 	int page_nid = -1;
+	int last_nid;
 	int target_nid;
 	bool migrated = false;
 
@@ -3566,6 +3567,7 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	}
 	BUG_ON(is_zero_pfn(page_to_pfn(page)));
 
+	last_nid = page_nid_last(page);
 	page_nid = page_to_nid(page);
 	target_nid = numa_migrate_prep(page, vma, addr, page_nid);
 	pte_unmap_unlock(ptep, ptl);
@@ -3581,7 +3583,7 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 
 out:
 	if (page_nid != -1)
-		task_numa_fault(page_nid, 1, migrated);
+		task_numa_fault(last_nid, page_nid, 1, migrated);
 	return 0;
 }
 
@@ -3596,6 +3598,7 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	unsigned long offset;
 	spinlock_t *ptl;
 	bool numa = false;
+	int last_nid;
 
 	spin_lock(&mm->page_table_lock);
 	pmd = *pmdp;
@@ -3643,6 +3646,7 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		if (unlikely(page_mapcount(page) != 1))
 			continue;
 
+		last_nid = page_nid_last(page);
 		page_nid = page_to_nid(page);
 		target_nid = numa_migrate_prep(page, vma, addr, page_nid);
 		pte_unmap_unlock(pte, ptl);
@@ -3655,7 +3659,7 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		}
 
 		if (page_nid != -1)
-			task_numa_fault(page_nid, 1, migrated);
+			task_numa_fault(last_nid, page_nid, 1, migrated);
 
 		pte = pte_offset_map_lock(mm, pmdp, addr, &ptl);
 	}
-- 
cgit v1.2.3


From 1bc115d87dffd1c43bdc3c9c9d1e3a51c195d18e Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Mon, 7 Oct 2013 11:29:05 +0100
Subject: mm: numa: Scan pages with elevated page_mapcount

Currently automatic NUMA balancing is unable to distinguish between false
shared versus private pages except by ignoring pages with an elevated
page_mapcount entirely. This avoids shared pages bouncing between the
nodes whose task is using them but that is ignored quite a lot of data.

This patch kicks away the training wheels in preparation for adding support
for identifying shared/private pages is now in place. The ordering is so
that the impact of the shared/private detection can be easily measured. Note
that the patch does not migrate shared, file-backed within vmas marked
VM_EXEC as these are generally shared library pages. Migrating such pages
is not beneficial as there is an expectation they are read-shared between
caches and iTLB and iCache pressure is generally low.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1381141781-10992-28-git-send-email-mgorman@suse.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/migrate.h |  7 ++++---
 mm/huge_memory.c        | 12 +++++-------
 mm/memory.c             |  7 ++-----
 mm/migrate.c            | 17 ++++++-----------
 mm/mprotect.c           |  4 +---
 5 files changed, 18 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 8d3c57fdf221..f5096b58b20d 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -90,11 +90,12 @@ static inline int migrate_huge_page_move_mapping(struct address_space *mapping,
 #endif /* CONFIG_MIGRATION */
 
 #ifdef CONFIG_NUMA_BALANCING
-extern int migrate_misplaced_page(struct page *page, int node);
-extern int migrate_misplaced_page(struct page *page, int node);
+extern int migrate_misplaced_page(struct page *page,
+				  struct vm_area_struct *vma, int node);
 extern bool migrate_ratelimited(int node);
 #else
-static inline int migrate_misplaced_page(struct page *page, int node)
+static inline int migrate_misplaced_page(struct page *page,
+					 struct vm_area_struct *vma, int node)
 {
 	return -EAGAIN; /* can't migrate now */
 }
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 914216733e0a..2a28c2c6c165 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1484,14 +1484,12 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
 			struct page *page = pmd_page(*pmd);
 
 			/*
-			 * Only check non-shared pages. Do not trap faults
-			 * against the zero page. The read-only data is likely
-			 * to be read-cached on the local CPU cache and it is
-			 * less useful to know about local vs remote hits on
-			 * the zero page.
+			 * Do not trap faults against the zero page. The
+			 * read-only data is likely to be read-cached on the
+			 * local CPU cache and it is less useful to know about
+			 * local vs remote hits on the zero page.
 			 */
-			if (page_mapcount(page) == 1 &&
-			    !is_huge_zero_page(page) &&
+			if (!is_huge_zero_page(page) &&
 			    !pmd_numa(*pmd)) {
 				entry = pmdp_get_and_clear(mm, addr, pmd);
 				entry = pmd_mknuma(entry);
diff --git a/mm/memory.c b/mm/memory.c
index 24bc9b848af6..3e3b4b8b6c41 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3577,7 +3577,7 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	}
 
 	/* Migrate to the requested node */
-	migrated = migrate_misplaced_page(page, target_nid);
+	migrated = migrate_misplaced_page(page, vma, target_nid);
 	if (migrated)
 		page_nid = target_nid;
 
@@ -3642,16 +3642,13 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		page = vm_normal_page(vma, addr, pteval);
 		if (unlikely(!page))
 			continue;
-		/* only check non-shared pages */
-		if (unlikely(page_mapcount(page) != 1))
-			continue;
 
 		last_nid = page_nid_last(page);
 		page_nid = page_to_nid(page);
 		target_nid = numa_migrate_prep(page, vma, addr, page_nid);
 		pte_unmap_unlock(pte, ptl);
 		if (target_nid != -1) {
-			migrated = migrate_misplaced_page(page, target_nid);
+			migrated = migrate_misplaced_page(page, vma, target_nid);
 			if (migrated)
 				page_nid = target_nid;
 		} else {
diff --git a/mm/migrate.c b/mm/migrate.c
index 7bd90d3b16bb..fcba2f46bb80 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1599,7 +1599,8 @@ int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page)
  * node. Caller is expected to have an elevated reference count on
  * the page that will be dropped by this function before returning.
  */
-int migrate_misplaced_page(struct page *page, int node)
+int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma,
+			   int node)
 {
 	pg_data_t *pgdat = NODE_DATA(node);
 	int isolated;
@@ -1607,10 +1608,11 @@ int migrate_misplaced_page(struct page *page, int node)
 	LIST_HEAD(migratepages);
 
 	/*
-	 * Don't migrate pages that are mapped in multiple processes.
-	 * TODO: Handle false sharing detection instead of this hammer
+	 * Don't migrate file pages that are mapped in multiple processes
+	 * with execute permissions as they are probably shared libraries.
 	 */
-	if (page_mapcount(page) != 1)
+	if (page_mapcount(page) != 1 && page_is_file_cache(page) &&
+	    (vma->vm_flags & VM_EXEC))
 		goto out;
 
 	/*
@@ -1660,13 +1662,6 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
 	struct mem_cgroup *memcg = NULL;
 	int page_lru = page_is_file_cache(page);
 
-	/*
-	 * Don't migrate pages that are mapped in multiple processes.
-	 * TODO: Handle false sharing detection instead of this hammer
-	 */
-	if (page_mapcount(page) != 1)
-		goto out_dropref;
-
 	/*
 	 * Rate-limit the amount of data that is being migrated to a node.
 	 * Optimal placement is no good if the memory bus is saturated and
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 2da33dca6134..41e02923fcd9 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -69,9 +69,7 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
 					if (last_nid != this_nid)
 						all_same_node = false;
 
-					/* only check non-shared pages */
-					if (!pte_numa(oldpte) &&
-					    page_mapcount(page) == 1) {
+					if (!pte_numa(oldpte)) {
 						ptent = pte_mknuma(ptent);
 						updated = true;
 					}
-- 
cgit v1.2.3


From b795854b1fa70f6aee923ae5df74ff7afeaddcaa Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Mon, 7 Oct 2013 11:29:07 +0100
Subject: sched/numa: Set preferred NUMA node based on number of private faults

Ideally it would be possible to distinguish between NUMA hinting faults that
are private to a task and those that are shared. If treated identically
there is a risk that shared pages bounce between nodes depending on
the order they are referenced by tasks. Ultimately what is desirable is
that task private pages remain local to the task while shared pages are
interleaved between sharing tasks running on different nodes to give good
average performance. This is further complicated by THP as even
applications that partition their data may not be partitioning on a huge
page boundary.

To start with, this patch assumes that multi-threaded or multi-process
applications partition their data and that in general the private accesses
are more important for cpu->memory locality in the general case. Also,
no new infrastructure is required to treat private pages properly but
interleaving for shared pages requires additional infrastructure.

To detect private accesses the pid of the last accessing task is required
but the storage requirements are a high. This patch borrows heavily from
Ingo Molnar's patch "numa, mm, sched: Implement last-CPU+PID hash tracking"
to encode some bits from the last accessing task in the page flags as
well as the node information. Collisions will occur but it is better than
just depending on the node information. Node information is then used to
determine if a page needs to migrate. The PID information is used to detect
private/shared accesses. The preferred NUMA node is selected based on where
the maximum number of approximately private faults were measured. Shared
faults are not taken into consideration for a few reasons.

First, if there are many tasks sharing the page then they'll all move
towards the same node. The node will be compute overloaded and then
scheduled away later only to bounce back again. Alternatively the shared
tasks would just bounce around nodes because the fault information is
effectively noise. Either way accounting for shared faults the same as
private faults can result in lower performance overall.

The second reason is based on a hypothetical workload that has a small
number of very important, heavily accessed private pages but a large shared
array. The shared array would dominate the number of faults and be selected
as a preferred node even though it's the wrong decision.

The third reason is that multiple threads in a process will race each
other to fault the shared page making the fault information unreliable.

Signed-off-by: Mel Gorman <mgorman@suse.de>
[ Fix complication error when !NUMA_BALANCING. ]
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1381141781-10992-30-git-send-email-mgorman@suse.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/mm.h                | 89 +++++++++++++++++++++++++++++----------
 include/linux/mm_types.h          |  4 +-
 include/linux/page-flags-layout.h | 28 +++++++-----
 kernel/sched/fair.c               | 12 ++++--
 mm/huge_memory.c                  |  8 ++--
 mm/memory.c                       | 16 +++----
 mm/mempolicy.c                    |  8 ++--
 mm/migrate.c                      |  4 +-
 mm/mm_init.c                      | 18 ++++----
 mm/mmzone.c                       | 14 +++---
 mm/mprotect.c                     | 26 ++++++++----
 mm/page_alloc.c                   |  4 +-
 12 files changed, 149 insertions(+), 82 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 8b6e55ee8855..bb412ce2a8b5 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -581,11 +581,11 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
  * sets it, so none of the operations on it need to be atomic.
  */
 
-/* Page flags: | [SECTION] | [NODE] | ZONE | [LAST_NID] | ... | FLAGS | */
+/* Page flags: | [SECTION] | [NODE] | ZONE | [LAST_NIDPID] | ... | FLAGS | */
 #define SECTIONS_PGOFF		((sizeof(unsigned long)*8) - SECTIONS_WIDTH)
 #define NODES_PGOFF		(SECTIONS_PGOFF - NODES_WIDTH)
 #define ZONES_PGOFF		(NODES_PGOFF - ZONES_WIDTH)
-#define LAST_NID_PGOFF		(ZONES_PGOFF - LAST_NID_WIDTH)
+#define LAST_NIDPID_PGOFF	(ZONES_PGOFF - LAST_NIDPID_WIDTH)
 
 /*
  * Define the bit shifts to access each section.  For non-existent
@@ -595,7 +595,7 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
 #define SECTIONS_PGSHIFT	(SECTIONS_PGOFF * (SECTIONS_WIDTH != 0))
 #define NODES_PGSHIFT		(NODES_PGOFF * (NODES_WIDTH != 0))
 #define ZONES_PGSHIFT		(ZONES_PGOFF * (ZONES_WIDTH != 0))
-#define LAST_NID_PGSHIFT	(LAST_NID_PGOFF * (LAST_NID_WIDTH != 0))
+#define LAST_NIDPID_PGSHIFT	(LAST_NIDPID_PGOFF * (LAST_NIDPID_WIDTH != 0))
 
 /* NODE:ZONE or SECTION:ZONE is used to ID a zone for the buddy allocator */
 #ifdef NODE_NOT_IN_PAGE_FLAGS
@@ -617,7 +617,7 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
 #define ZONES_MASK		((1UL << ZONES_WIDTH) - 1)
 #define NODES_MASK		((1UL << NODES_WIDTH) - 1)
 #define SECTIONS_MASK		((1UL << SECTIONS_WIDTH) - 1)
-#define LAST_NID_MASK		((1UL << LAST_NID_WIDTH) - 1)
+#define LAST_NIDPID_MASK	((1UL << LAST_NIDPID_WIDTH) - 1)
 #define ZONEID_MASK		((1UL << ZONEID_SHIFT) - 1)
 
 static inline enum zone_type page_zonenum(const struct page *page)
@@ -661,48 +661,93 @@ static inline int page_to_nid(const struct page *page)
 #endif
 
 #ifdef CONFIG_NUMA_BALANCING
-#ifdef LAST_NID_NOT_IN_PAGE_FLAGS
-static inline int page_nid_xchg_last(struct page *page, int nid)
+static inline int nid_pid_to_nidpid(int nid, int pid)
 {
-	return xchg(&page->_last_nid, nid);
+	return ((nid & LAST__NID_MASK) << LAST__PID_SHIFT) | (pid & LAST__PID_MASK);
 }
 
-static inline int page_nid_last(struct page *page)
+static inline int nidpid_to_pid(int nidpid)
 {
-	return page->_last_nid;
+	return nidpid & LAST__PID_MASK;
 }
-static inline void page_nid_reset_last(struct page *page)
+
+static inline int nidpid_to_nid(int nidpid)
+{
+	return (nidpid >> LAST__PID_SHIFT) & LAST__NID_MASK;
+}
+
+static inline bool nidpid_pid_unset(int nidpid)
+{
+	return nidpid_to_pid(nidpid) == (-1 & LAST__PID_MASK);
+}
+
+static inline bool nidpid_nid_unset(int nidpid)
 {
-	page->_last_nid = -1;
+	return nidpid_to_nid(nidpid) == (-1 & LAST__NID_MASK);
+}
+
+#ifdef LAST_NIDPID_NOT_IN_PAGE_FLAGS
+static inline int page_nidpid_xchg_last(struct page *page, int nid)
+{
+	return xchg(&page->_last_nidpid, nid);
+}
+
+static inline int page_nidpid_last(struct page *page)
+{
+	return page->_last_nidpid;
+}
+static inline void page_nidpid_reset_last(struct page *page)
+{
+	page->_last_nidpid = -1;
 }
 #else
-static inline int page_nid_last(struct page *page)
+static inline int page_nidpid_last(struct page *page)
 {
-	return (page->flags >> LAST_NID_PGSHIFT) & LAST_NID_MASK;
+	return (page->flags >> LAST_NIDPID_PGSHIFT) & LAST_NIDPID_MASK;
 }
 
-extern int page_nid_xchg_last(struct page *page, int nid);
+extern int page_nidpid_xchg_last(struct page *page, int nidpid);
 
-static inline void page_nid_reset_last(struct page *page)
+static inline void page_nidpid_reset_last(struct page *page)
 {
-	int nid = (1 << LAST_NID_SHIFT) - 1;
+	int nidpid = (1 << LAST_NIDPID_SHIFT) - 1;
 
-	page->flags &= ~(LAST_NID_MASK << LAST_NID_PGSHIFT);
-	page->flags |= (nid & LAST_NID_MASK) << LAST_NID_PGSHIFT;
+	page->flags &= ~(LAST_NIDPID_MASK << LAST_NIDPID_PGSHIFT);
+	page->flags |= (nidpid & LAST_NIDPID_MASK) << LAST_NIDPID_PGSHIFT;
 }
-#endif /* LAST_NID_NOT_IN_PAGE_FLAGS */
+#endif /* LAST_NIDPID_NOT_IN_PAGE_FLAGS */
 #else
-static inline int page_nid_xchg_last(struct page *page, int nid)
+static inline int page_nidpid_xchg_last(struct page *page, int nidpid)
 {
 	return page_to_nid(page);
 }
 
-static inline int page_nid_last(struct page *page)
+static inline int page_nidpid_last(struct page *page)
 {
 	return page_to_nid(page);
 }
 
-static inline void page_nid_reset_last(struct page *page)
+static inline int nidpid_to_nid(int nidpid)
+{
+	return -1;
+}
+
+static inline int nidpid_to_pid(int nidpid)
+{
+	return -1;
+}
+
+static inline int nid_pid_to_nidpid(int nid, int pid)
+{
+	return -1;
+}
+
+static inline bool nidpid_pid_unset(int nidpid)
+{
+	return 1;
+}
+
+static inline void page_nidpid_reset_last(struct page *page)
 {
 }
 #endif
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index b7adf1d4310c..38a902a6d1e3 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -174,8 +174,8 @@ struct page {
 	void *shadow;
 #endif
 
-#ifdef LAST_NID_NOT_IN_PAGE_FLAGS
-	int _last_nid;
+#ifdef LAST_NIDPID_NOT_IN_PAGE_FLAGS
+	int _last_nidpid;
 #endif
 }
 /*
diff --git a/include/linux/page-flags-layout.h b/include/linux/page-flags-layout.h
index 93506a114034..02bc9184f16b 100644
--- a/include/linux/page-flags-layout.h
+++ b/include/linux/page-flags-layout.h
@@ -38,10 +38,10 @@
  * The last is when there is insufficient space in page->flags and a separate
  * lookup is necessary.
  *
- * No sparsemem or sparsemem vmemmap: |       NODE     | ZONE |          ... | FLAGS |
- *         " plus space for last_nid: |       NODE     | ZONE | LAST_NID ... | FLAGS |
- * classic sparse with space for node:| SECTION | NODE | ZONE |          ... | FLAGS |
- *         " plus space for last_nid: | SECTION | NODE | ZONE | LAST_NID ... | FLAGS |
+ * No sparsemem or sparsemem vmemmap: |       NODE     | ZONE |             ... | FLAGS |
+ *      " plus space for last_nidpid: |       NODE     | ZONE | LAST_NIDPID ... | FLAGS |
+ * classic sparse with space for node:| SECTION | NODE | ZONE |             ... | FLAGS |
+ *      " plus space for last_nidpid: | SECTION | NODE | ZONE | LAST_NIDPID ... | FLAGS |
  * classic sparse no space for node:  | SECTION |     ZONE    | ... | FLAGS |
  */
 #if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
@@ -62,15 +62,21 @@
 #endif
 
 #ifdef CONFIG_NUMA_BALANCING
-#define LAST_NID_SHIFT NODES_SHIFT
+#define LAST__PID_SHIFT 8
+#define LAST__PID_MASK  ((1 << LAST__PID_SHIFT)-1)
+
+#define LAST__NID_SHIFT NODES_SHIFT
+#define LAST__NID_MASK  ((1 << LAST__NID_SHIFT)-1)
+
+#define LAST_NIDPID_SHIFT (LAST__PID_SHIFT+LAST__NID_SHIFT)
 #else
-#define LAST_NID_SHIFT 0
+#define LAST_NIDPID_SHIFT 0
 #endif
 
-#if SECTIONS_WIDTH+ZONES_WIDTH+NODES_SHIFT+LAST_NID_SHIFT <= BITS_PER_LONG - NR_PAGEFLAGS
-#define LAST_NID_WIDTH LAST_NID_SHIFT
+#if SECTIONS_WIDTH+ZONES_WIDTH+NODES_SHIFT+LAST_NIDPID_SHIFT <= BITS_PER_LONG - NR_PAGEFLAGS
+#define LAST_NIDPID_WIDTH LAST_NIDPID_SHIFT
 #else
-#define LAST_NID_WIDTH 0
+#define LAST_NIDPID_WIDTH 0
 #endif
 
 /*
@@ -81,8 +87,8 @@
 #define NODE_NOT_IN_PAGE_FLAGS
 #endif
 
-#if defined(CONFIG_NUMA_BALANCING) && LAST_NID_WIDTH == 0
-#define LAST_NID_NOT_IN_PAGE_FLAGS
+#if defined(CONFIG_NUMA_BALANCING) && LAST_NIDPID_WIDTH == 0
+#define LAST_NIDPID_NOT_IN_PAGE_FLAGS
 #endif
 
 #endif /* _LINUX_PAGE_FLAGS_LAYOUT */
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 862d20d02e5c..b1de7c55e9f7 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -988,7 +988,7 @@ static void task_numa_placement(struct task_struct *p)
 /*
  * Got a PROT_NONE fault for a page on @node.
  */
-void task_numa_fault(int last_nid, int node, int pages, bool migrated)
+void task_numa_fault(int last_nidpid, int node, int pages, bool migrated)
 {
 	struct task_struct *p = current;
 	int priv;
@@ -1000,8 +1000,14 @@ void task_numa_fault(int last_nid, int node, int pages, bool migrated)
 	if (!p->mm)
 		return;
 
-	/* For now, do not attempt to detect private/shared accesses */
-	priv = 1;
+	/*
+	 * First accesses are treated as private, otherwise consider accesses
+	 * to be private if the accessing pid has not changed
+	 */
+	if (!nidpid_pid_unset(last_nidpid))
+		priv = ((p->pid & LAST__PID_MASK) == nidpid_to_pid(last_nidpid));
+	else
+		priv = 1;
 
 	/* Allocate buffer to track faults on a per-node basis */
 	if (unlikely(!p->numa_faults)) {
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 2a28c2c6c165..0baf0e4d5203 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1282,7 +1282,7 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	struct page *page;
 	unsigned long haddr = addr & HPAGE_PMD_MASK;
 	int page_nid = -1, this_nid = numa_node_id();
-	int target_nid, last_nid = -1;
+	int target_nid, last_nidpid = -1;
 	bool page_locked;
 	bool migrated = false;
 
@@ -1293,7 +1293,7 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	page = pmd_page(pmd);
 	BUG_ON(is_huge_zero_page(page));
 	page_nid = page_to_nid(page);
-	last_nid = page_nid_last(page);
+	last_nidpid = page_nidpid_last(page);
 	count_vm_numa_event(NUMA_HINT_FAULTS);
 	if (page_nid == this_nid)
 		count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL);
@@ -1362,7 +1362,7 @@ out:
 		page_unlock_anon_vma_read(anon_vma);
 
 	if (page_nid != -1)
-		task_numa_fault(last_nid, page_nid, HPAGE_PMD_NR, migrated);
+		task_numa_fault(last_nidpid, page_nid, HPAGE_PMD_NR, migrated);
 
 	return 0;
 }
@@ -1682,7 +1682,7 @@ static void __split_huge_page_refcount(struct page *page,
 		page_tail->mapping = page->mapping;
 
 		page_tail->index = page->index + i;
-		page_nid_xchg_last(page_tail, page_nid_last(page));
+		page_nidpid_xchg_last(page_tail, page_nidpid_last(page));
 
 		BUG_ON(!PageAnon(page_tail));
 		BUG_ON(!PageUptodate(page_tail));
diff --git a/mm/memory.c b/mm/memory.c
index 3e3b4b8b6c41..cc7f20691c82 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -69,8 +69,8 @@
 
 #include "internal.h"
 
-#ifdef LAST_NID_NOT_IN_PAGE_FLAGS
-#warning Unfortunate NUMA and NUMA Balancing config, growing page-frame for last_nid.
+#ifdef LAST_NIDPID_NOT_IN_PAGE_FLAGS
+#warning Unfortunate NUMA and NUMA Balancing config, growing page-frame for last_nidpid.
 #endif
 
 #ifndef CONFIG_NEED_MULTIPLE_NODES
@@ -3536,7 +3536,7 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	struct page *page = NULL;
 	spinlock_t *ptl;
 	int page_nid = -1;
-	int last_nid;
+	int last_nidpid;
 	int target_nid;
 	bool migrated = false;
 
@@ -3567,7 +3567,7 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	}
 	BUG_ON(is_zero_pfn(page_to_pfn(page)));
 
-	last_nid = page_nid_last(page);
+	last_nidpid = page_nidpid_last(page);
 	page_nid = page_to_nid(page);
 	target_nid = numa_migrate_prep(page, vma, addr, page_nid);
 	pte_unmap_unlock(ptep, ptl);
@@ -3583,7 +3583,7 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 
 out:
 	if (page_nid != -1)
-		task_numa_fault(last_nid, page_nid, 1, migrated);
+		task_numa_fault(last_nidpid, page_nid, 1, migrated);
 	return 0;
 }
 
@@ -3598,7 +3598,7 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	unsigned long offset;
 	spinlock_t *ptl;
 	bool numa = false;
-	int last_nid;
+	int last_nidpid;
 
 	spin_lock(&mm->page_table_lock);
 	pmd = *pmdp;
@@ -3643,7 +3643,7 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		if (unlikely(!page))
 			continue;
 
-		last_nid = page_nid_last(page);
+		last_nidpid = page_nidpid_last(page);
 		page_nid = page_to_nid(page);
 		target_nid = numa_migrate_prep(page, vma, addr, page_nid);
 		pte_unmap_unlock(pte, ptl);
@@ -3656,7 +3656,7 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		}
 
 		if (page_nid != -1)
-			task_numa_fault(last_nid, page_nid, 1, migrated);
+			task_numa_fault(last_nidpid, page_nid, 1, migrated);
 
 		pte = pte_offset_map_lock(mm, pmdp, addr, &ptl);
 	}
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 04729647f359..aff1f1ed3dc5 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -2348,9 +2348,11 @@ int mpol_misplaced(struct page *page, struct vm_area_struct *vma, unsigned long
 
 	/* Migrate the page towards the node whose CPU is referencing it */
 	if (pol->flags & MPOL_F_MORON) {
-		int last_nid;
+		int last_nidpid;
+		int this_nidpid;
 
 		polnid = numa_node_id();
+		this_nidpid = nid_pid_to_nidpid(polnid, current->pid);
 
 		/*
 		 * Multi-stage node selection is used in conjunction
@@ -2373,8 +2375,8 @@ int mpol_misplaced(struct page *page, struct vm_area_struct *vma, unsigned long
 		 * it less likely we act on an unlikely task<->page
 		 * relation.
 		 */
-		last_nid = page_nid_xchg_last(page, polnid);
-		if (last_nid != polnid)
+		last_nidpid = page_nidpid_xchg_last(page, this_nidpid);
+		if (!nidpid_pid_unset(last_nidpid) && nidpid_to_nid(last_nidpid) != polnid)
 			goto out;
 	}
 
diff --git a/mm/migrate.c b/mm/migrate.c
index fcba2f46bb80..025d1e3d2ad2 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1498,7 +1498,7 @@ static struct page *alloc_misplaced_dst_page(struct page *page,
 					  __GFP_NOWARN) &
 					 ~GFP_IOFS, 0);
 	if (newpage)
-		page_nid_xchg_last(newpage, page_nid_last(page));
+		page_nidpid_xchg_last(newpage, page_nidpid_last(page));
 
 	return newpage;
 }
@@ -1675,7 +1675,7 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
 	if (!new_page)
 		goto out_fail;
 
-	page_nid_xchg_last(new_page, page_nid_last(page));
+	page_nidpid_xchg_last(new_page, page_nidpid_last(page));
 
 	isolated = numamigrate_isolate_page(pgdat, page);
 	if (!isolated) {
diff --git a/mm/mm_init.c b/mm/mm_init.c
index 633c08863fd8..467de579784b 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -71,26 +71,26 @@ void __init mminit_verify_pageflags_layout(void)
 	unsigned long or_mask, add_mask;
 
 	shift = 8 * sizeof(unsigned long);
-	width = shift - SECTIONS_WIDTH - NODES_WIDTH - ZONES_WIDTH - LAST_NID_SHIFT;
+	width = shift - SECTIONS_WIDTH - NODES_WIDTH - ZONES_WIDTH - LAST_NIDPID_SHIFT;
 	mminit_dprintk(MMINIT_TRACE, "pageflags_layout_widths",
-		"Section %d Node %d Zone %d Lastnid %d Flags %d\n",
+		"Section %d Node %d Zone %d Lastnidpid %d Flags %d\n",
 		SECTIONS_WIDTH,
 		NODES_WIDTH,
 		ZONES_WIDTH,
-		LAST_NID_WIDTH,
+		LAST_NIDPID_WIDTH,
 		NR_PAGEFLAGS);
 	mminit_dprintk(MMINIT_TRACE, "pageflags_layout_shifts",
-		"Section %d Node %d Zone %d Lastnid %d\n",
+		"Section %d Node %d Zone %d Lastnidpid %d\n",
 		SECTIONS_SHIFT,
 		NODES_SHIFT,
 		ZONES_SHIFT,
-		LAST_NID_SHIFT);
+		LAST_NIDPID_SHIFT);
 	mminit_dprintk(MMINIT_TRACE, "pageflags_layout_pgshifts",
-		"Section %lu Node %lu Zone %lu Lastnid %lu\n",
+		"Section %lu Node %lu Zone %lu Lastnidpid %lu\n",
 		(unsigned long)SECTIONS_PGSHIFT,
 		(unsigned long)NODES_PGSHIFT,
 		(unsigned long)ZONES_PGSHIFT,
-		(unsigned long)LAST_NID_PGSHIFT);
+		(unsigned long)LAST_NIDPID_PGSHIFT);
 	mminit_dprintk(MMINIT_TRACE, "pageflags_layout_nodezoneid",
 		"Node/Zone ID: %lu -> %lu\n",
 		(unsigned long)(ZONEID_PGOFF + ZONEID_SHIFT),
@@ -102,9 +102,9 @@ void __init mminit_verify_pageflags_layout(void)
 	mminit_dprintk(MMINIT_TRACE, "pageflags_layout_nodeflags",
 		"Node not in page flags");
 #endif
-#ifdef LAST_NID_NOT_IN_PAGE_FLAGS
+#ifdef LAST_NIDPID_NOT_IN_PAGE_FLAGS
 	mminit_dprintk(MMINIT_TRACE, "pageflags_layout_nodeflags",
-		"Last nid not in page flags");
+		"Last nidpid not in page flags");
 #endif
 
 	if (SECTIONS_WIDTH) {
diff --git a/mm/mmzone.c b/mm/mmzone.c
index 2ac0afbd68f3..25bb477deb26 100644
--- a/mm/mmzone.c
+++ b/mm/mmzone.c
@@ -97,20 +97,20 @@ void lruvec_init(struct lruvec *lruvec)
 		INIT_LIST_HEAD(&lruvec->lists[lru]);
 }
 
-#if defined(CONFIG_NUMA_BALANCING) && !defined(LAST_NID_NOT_IN_PAGE_FLAGS)
-int page_nid_xchg_last(struct page *page, int nid)
+#if defined(CONFIG_NUMA_BALANCING) && !defined(LAST_NIDPID_NOT_IN_PAGE_FLAGS)
+int page_nidpid_xchg_last(struct page *page, int nidpid)
 {
 	unsigned long old_flags, flags;
-	int last_nid;
+	int last_nidpid;
 
 	do {
 		old_flags = flags = page->flags;
-		last_nid = page_nid_last(page);
+		last_nidpid = page_nidpid_last(page);
 
-		flags &= ~(LAST_NID_MASK << LAST_NID_PGSHIFT);
-		flags |= (nid & LAST_NID_MASK) << LAST_NID_PGSHIFT;
+		flags &= ~(LAST_NIDPID_MASK << LAST_NIDPID_PGSHIFT);
+		flags |= (nidpid & LAST_NIDPID_MASK) << LAST_NIDPID_PGSHIFT;
 	} while (unlikely(cmpxchg(&page->flags, old_flags, flags) != old_flags));
 
-	return last_nid;
+	return last_nidpid;
 }
 #endif
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 41e02923fcd9..f0b087d1069c 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -37,14 +37,15 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
 
 static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
 		unsigned long addr, unsigned long end, pgprot_t newprot,
-		int dirty_accountable, int prot_numa, bool *ret_all_same_node)
+		int dirty_accountable, int prot_numa, bool *ret_all_same_nidpid)
 {
 	struct mm_struct *mm = vma->vm_mm;
 	pte_t *pte, oldpte;
 	spinlock_t *ptl;
 	unsigned long pages = 0;
-	bool all_same_node = true;
+	bool all_same_nidpid = true;
 	int last_nid = -1;
+	int last_pid = -1;
 
 	pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
 	arch_enter_lazy_mmu_mode();
@@ -63,11 +64,18 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
 
 				page = vm_normal_page(vma, addr, oldpte);
 				if (page) {
-					int this_nid = page_to_nid(page);
+					int nidpid = page_nidpid_last(page);
+					int this_nid = nidpid_to_nid(nidpid);
+					int this_pid = nidpid_to_pid(nidpid);
+
 					if (last_nid == -1)
 						last_nid = this_nid;
-					if (last_nid != this_nid)
-						all_same_node = false;
+					if (last_pid == -1)
+						last_pid = this_pid;
+					if (last_nid != this_nid ||
+					    last_pid != this_pid) {
+						all_same_nidpid = false;
+					}
 
 					if (!pte_numa(oldpte)) {
 						ptent = pte_mknuma(ptent);
@@ -107,7 +115,7 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
 	arch_leave_lazy_mmu_mode();
 	pte_unmap_unlock(pte - 1, ptl);
 
-	*ret_all_same_node = all_same_node;
+	*ret_all_same_nidpid = all_same_nidpid;
 	return pages;
 }
 
@@ -134,7 +142,7 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
 	pmd_t *pmd;
 	unsigned long next;
 	unsigned long pages = 0;
-	bool all_same_node;
+	bool all_same_nidpid;
 
 	pmd = pmd_offset(pud, addr);
 	do {
@@ -158,7 +166,7 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
 		if (pmd_none_or_clear_bad(pmd))
 			continue;
 		pages += change_pte_range(vma, pmd, addr, next, newprot,
-				 dirty_accountable, prot_numa, &all_same_node);
+				 dirty_accountable, prot_numa, &all_same_nidpid);
 
 		/*
 		 * If we are changing protections for NUMA hinting faults then
@@ -166,7 +174,7 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
 		 * node. This allows a regular PMD to be handled as one fault
 		 * and effectively batches the taking of the PTL
 		 */
-		if (prot_numa && all_same_node)
+		if (prot_numa && all_same_nidpid)
 			change_pmd_protnuma(vma->vm_mm, addr, pmd);
 	} while (pmd++, addr = next, addr != end);
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index dd886fac451a..89bedd0e4cad 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -626,7 +626,7 @@ static inline int free_pages_check(struct page *page)
 		bad_page(page);
 		return 1;
 	}
-	page_nid_reset_last(page);
+	page_nidpid_reset_last(page);
 	if (page->flags & PAGE_FLAGS_CHECK_AT_PREP)
 		page->flags &= ~PAGE_FLAGS_CHECK_AT_PREP;
 	return 0;
@@ -4015,7 +4015,7 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
 		mminit_verify_page_links(page, zone, nid, pfn);
 		init_page_count(page);
 		page_mapcount_reset(page);
-		page_nid_reset_last(page);
+		page_nidpid_reset_last(page);
 		SetPageReserved(page);
 		/*
 		 * Mark the block movable so that blocks are reserved for
-- 
cgit v1.2.3


From fc3147245d193bd0f57307859c698fa28a20b0fe Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Mon, 7 Oct 2013 11:29:09 +0100
Subject: mm: numa: Limit NUMA scanning to migrate-on-fault VMAs

There is a 90% regression observed with a large Oracle performance test
on a 4 node system. Profiles indicated that the overhead was due to
contention on sp_lock when looking up shared memory policies. These
policies do not have the appropriate flags to allow them to be
automatically balanced so trapping faults on them is pointless. This
patch skips VMAs that do not have MPOL_F_MOF set.

[riel@redhat.com: Initial patch]

Signed-off-by: Mel Gorman <mgorman@suse.de>
Reported-and-tested-by: Joe Mario <jmario@redhat.com>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1381141781-10992-32-git-send-email-mgorman@suse.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/mempolicy.h |  1 +
 kernel/sched/fair.c       |  2 +-
 mm/mempolicy.c            | 24 ++++++++++++++++++++++++
 3 files changed, 26 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index da6716b9e3fe..ea4d2495c646 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -136,6 +136,7 @@ struct mempolicy *mpol_shared_policy_lookup(struct shared_policy *sp,
 
 struct mempolicy *get_vma_policy(struct task_struct *tsk,
 		struct vm_area_struct *vma, unsigned long addr);
+bool vma_policy_mof(struct task_struct *task, struct vm_area_struct *vma);
 
 extern void numa_default_policy(void);
 extern void numa_policy_init(void);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 61ec0d4765b9..d98175d5c2c6 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1130,7 +1130,7 @@ void task_numa_work(struct callback_head *work)
 		vma = mm->mmap;
 	}
 	for (; vma; vma = vma->vm_next) {
-		if (!vma_migratable(vma))
+		if (!vma_migratable(vma) || !vma_policy_mof(p, vma))
 			continue;
 
 		do {
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 196d8da2b657..0e895a2eed5f 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1679,6 +1679,30 @@ struct mempolicy *get_vma_policy(struct task_struct *task,
 	return pol;
 }
 
+bool vma_policy_mof(struct task_struct *task, struct vm_area_struct *vma)
+{
+	struct mempolicy *pol = get_task_policy(task);
+	if (vma) {
+		if (vma->vm_ops && vma->vm_ops->get_policy) {
+			bool ret = false;
+
+			pol = vma->vm_ops->get_policy(vma, vma->vm_start);
+			if (pol && (pol->flags & MPOL_F_MOF))
+				ret = true;
+			mpol_cond_put(pol);
+
+			return ret;
+		} else if (vma->vm_policy) {
+			pol = vma->vm_policy;
+		}
+	}
+
+	if (!pol)
+		return default_policy.flags & MPOL_F_MOF;
+
+	return pol->flags & MPOL_F_MOF;
+}
+
 static int apply_policy_zone(struct mempolicy *policy, enum zone_type zone)
 {
 	enum zone_type dynamic_policy_zone = policy_zone;
-- 
cgit v1.2.3


From 6b9a7460b6baf6c77fc3d23d927ddfc3f3f05bf3 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Mon, 7 Oct 2013 11:29:11 +0100
Subject: sched/numa: Retry migration of tasks to CPU on a preferred node

When a preferred node is selected for a tasks there is an attempt to migrate
the task to a CPU there. This may fail in which case the task will only
migrate if the active load balancer takes action. This may never happen if
the conditions are not right. This patch will check at NUMA hinting fault
time if another attempt should be made to migrate the task. It will only
make an attempt once every five seconds.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1381141781-10992-34-git-send-email-mgorman@suse.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h |  1 +
 kernel/sched/fair.c   | 30 +++++++++++++++++++++++-------
 2 files changed, 24 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index d946195eec10..14251a8ff2ea 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1341,6 +1341,7 @@ struct task_struct {
 	int numa_migrate_seq;
 	unsigned int numa_scan_period;
 	unsigned int numa_scan_period_max;
+	unsigned long numa_migrate_retry;
 	u64 node_stamp;			/* migration stamp  */
 	struct callback_head numa_work;
 
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 51a760081193..f84ac3fb581b 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1011,6 +1011,23 @@ migrate:
 	return migrate_task_to(p, env.best_cpu);
 }
 
+/* Attempt to migrate a task to a CPU on the preferred node. */
+static void numa_migrate_preferred(struct task_struct *p)
+{
+	/* Success if task is already running on preferred CPU */
+	p->numa_migrate_retry = 0;
+	if (cpu_to_node(task_cpu(p)) == p->numa_preferred_nid)
+		return;
+
+	/* This task has no NUMA fault statistics yet */
+	if (unlikely(p->numa_preferred_nid == -1))
+		return;
+
+	/* Otherwise, try migrate to a CPU on the preferred node */
+	if (task_numa_migrate(p) != 0)
+		p->numa_migrate_retry = jiffies + HZ*5;
+}
+
 static void task_numa_placement(struct task_struct *p)
 {
 	int seq, nid, max_nid = -1;
@@ -1045,17 +1062,12 @@ static void task_numa_placement(struct task_struct *p)
 		}
 	}
 
-	/*
-	 * Record the preferred node as the node with the most faults,
-	 * requeue the task to be running on the idlest CPU on the
-	 * preferred node and reset the scanning rate to recheck
-	 * the working set placement.
-	 */
+	/* Preferred node as the node with the most faults */
 	if (max_faults && max_nid != p->numa_preferred_nid) {
 		/* Update the preferred nid and migrate task if possible */
 		p->numa_preferred_nid = max_nid;
 		p->numa_migrate_seq = 1;
-		task_numa_migrate(p);
+		numa_migrate_preferred(p);
 	}
 }
 
@@ -1111,6 +1123,10 @@ void task_numa_fault(int last_nidpid, int node, int pages, bool migrated)
 
 	task_numa_placement(p);
 
+	/* Retry task to preferred node migration if it previously failed */
+	if (p->numa_migrate_retry && time_after(jiffies, p->numa_migrate_retry))
+		numa_migrate_preferred(p);
+
 	p->numa_faults_buffer[task_faults_idx(node, priv)] += pages;
 }
 
-- 
cgit v1.2.3


From 1be0bd77c5dd7c903f46abf52f9a3650face3c1d Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 7 Oct 2013 11:29:15 +0100
Subject: stop_machine: Introduce stop_two_cpus()

Introduce stop_two_cpus() in order to allow controlled swapping of two
tasks. It repurposes the stop_machine() state machine but only stops
the two cpus which we can do with on-stack structures and avoid
machine wide synchronization issues.

The ordering of CPUs is important to avoid deadlocks. If unordered then
two cpus calling stop_two_cpus on each other simultaneously would attempt
to queue in the opposite order on each CPU causing an AB-BA style deadlock.
By always having the lowest number CPU doing the queueing of works, we can
guarantee that works are always queued in the same order, and deadlocks
are avoided.

Signed-off-by: Peter Zijlstra <peterz@infradead.org>
[ Implemented deadlock avoidance. ]
Signed-off-by: Rik van Riel <riel@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Mel Gorman <mgorman@suse.de>
Link: http://lkml.kernel.org/r/1381141781-10992-38-git-send-email-mgorman@suse.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/stop_machine.h |   1 +
 kernel/stop_machine.c        | 272 +++++++++++++++++++++++++++----------------
 2 files changed, 175 insertions(+), 98 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h
index 3b5e910d14ca..d2abbdb8c6aa 100644
--- a/include/linux/stop_machine.h
+++ b/include/linux/stop_machine.h
@@ -28,6 +28,7 @@ struct cpu_stop_work {
 };
 
 int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg);
+int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *arg);
 void stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg,
 			 struct cpu_stop_work *work_buf);
 int stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg);
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index c09f2955ae30..32a6c44d8f78 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -115,6 +115,166 @@ int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg)
 	return done.executed ? done.ret : -ENOENT;
 }
 
+/* This controls the threads on each CPU. */
+enum multi_stop_state {
+	/* Dummy starting state for thread. */
+	MULTI_STOP_NONE,
+	/* Awaiting everyone to be scheduled. */
+	MULTI_STOP_PREPARE,
+	/* Disable interrupts. */
+	MULTI_STOP_DISABLE_IRQ,
+	/* Run the function */
+	MULTI_STOP_RUN,
+	/* Exit */
+	MULTI_STOP_EXIT,
+};
+
+struct multi_stop_data {
+	int			(*fn)(void *);
+	void			*data;
+	/* Like num_online_cpus(), but hotplug cpu uses us, so we need this. */
+	unsigned int		num_threads;
+	const struct cpumask	*active_cpus;
+
+	enum multi_stop_state	state;
+	atomic_t		thread_ack;
+};
+
+static void set_state(struct multi_stop_data *msdata,
+		      enum multi_stop_state newstate)
+{
+	/* Reset ack counter. */
+	atomic_set(&msdata->thread_ack, msdata->num_threads);
+	smp_wmb();
+	msdata->state = newstate;
+}
+
+/* Last one to ack a state moves to the next state. */
+static void ack_state(struct multi_stop_data *msdata)
+{
+	if (atomic_dec_and_test(&msdata->thread_ack))
+		set_state(msdata, msdata->state + 1);
+}
+
+/* This is the cpu_stop function which stops the CPU. */
+static int multi_cpu_stop(void *data)
+{
+	struct multi_stop_data *msdata = data;
+	enum multi_stop_state curstate = MULTI_STOP_NONE;
+	int cpu = smp_processor_id(), err = 0;
+	unsigned long flags;
+	bool is_active;
+
+	/*
+	 * When called from stop_machine_from_inactive_cpu(), irq might
+	 * already be disabled.  Save the state and restore it on exit.
+	 */
+	local_save_flags(flags);
+
+	if (!msdata->active_cpus)
+		is_active = cpu == cpumask_first(cpu_online_mask);
+	else
+		is_active = cpumask_test_cpu(cpu, msdata->active_cpus);
+
+	/* Simple state machine */
+	do {
+		/* Chill out and ensure we re-read multi_stop_state. */
+		cpu_relax();
+		if (msdata->state != curstate) {
+			curstate = msdata->state;
+			switch (curstate) {
+			case MULTI_STOP_DISABLE_IRQ:
+				local_irq_disable();
+				hard_irq_disable();
+				break;
+			case MULTI_STOP_RUN:
+				if (is_active)
+					err = msdata->fn(msdata->data);
+				break;
+			default:
+				break;
+			}
+			ack_state(msdata);
+		}
+	} while (curstate != MULTI_STOP_EXIT);
+
+	local_irq_restore(flags);
+	return err;
+}
+
+struct irq_cpu_stop_queue_work_info {
+	int cpu1;
+	int cpu2;
+	struct cpu_stop_work *work1;
+	struct cpu_stop_work *work2;
+};
+
+/*
+ * This function is always run with irqs and preemption disabled.
+ * This guarantees that both work1 and work2 get queued, before
+ * our local migrate thread gets the chance to preempt us.
+ */
+static void irq_cpu_stop_queue_work(void *arg)
+{
+	struct irq_cpu_stop_queue_work_info *info = arg;
+	cpu_stop_queue_work(info->cpu1, info->work1);
+	cpu_stop_queue_work(info->cpu2, info->work2);
+}
+
+/**
+ * stop_two_cpus - stops two cpus
+ * @cpu1: the cpu to stop
+ * @cpu2: the other cpu to stop
+ * @fn: function to execute
+ * @arg: argument to @fn
+ *
+ * Stops both the current and specified CPU and runs @fn on one of them.
+ *
+ * returns when both are completed.
+ */
+int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *arg)
+{
+	int call_cpu;
+	struct cpu_stop_done done;
+	struct cpu_stop_work work1, work2;
+	struct irq_cpu_stop_queue_work_info call_args;
+	struct multi_stop_data msdata = {
+		.fn = fn,
+		.data = arg,
+		.num_threads = 2,
+		.active_cpus = cpumask_of(cpu1),
+	};
+
+	work1 = work2 = (struct cpu_stop_work){
+		.fn = multi_cpu_stop,
+		.arg = &msdata,
+		.done = &done
+	};
+
+	call_args = (struct irq_cpu_stop_queue_work_info){
+		.cpu1 = cpu1,
+		.cpu2 = cpu2,
+		.work1 = &work1,
+		.work2 = &work2,
+	};
+
+	cpu_stop_init_done(&done, 2);
+	set_state(&msdata, MULTI_STOP_PREPARE);
+
+	/*
+	 * Queuing needs to be done by the lowest numbered CPU, to ensure
+	 * that works are always queued in the same order on every CPU.
+	 * This prevents deadlocks.
+	 */
+	call_cpu = min(cpu1, cpu2);
+
+	smp_call_function_single(call_cpu, &irq_cpu_stop_queue_work,
+				 &call_args, 0);
+
+	wait_for_completion(&done.completion);
+	return done.executed ? done.ret : -ENOENT;
+}
+
 /**
  * stop_one_cpu_nowait - stop a cpu but don't wait for completion
  * @cpu: cpu to stop
@@ -359,98 +519,14 @@ early_initcall(cpu_stop_init);
 
 #ifdef CONFIG_STOP_MACHINE
 
-/* This controls the threads on each CPU. */
-enum stopmachine_state {
-	/* Dummy starting state for thread. */
-	STOPMACHINE_NONE,
-	/* Awaiting everyone to be scheduled. */
-	STOPMACHINE_PREPARE,
-	/* Disable interrupts. */
-	STOPMACHINE_DISABLE_IRQ,
-	/* Run the function */
-	STOPMACHINE_RUN,
-	/* Exit */
-	STOPMACHINE_EXIT,
-};
-
-struct stop_machine_data {
-	int			(*fn)(void *);
-	void			*data;
-	/* Like num_online_cpus(), but hotplug cpu uses us, so we need this. */
-	unsigned int		num_threads;
-	const struct cpumask	*active_cpus;
-
-	enum stopmachine_state	state;
-	atomic_t		thread_ack;
-};
-
-static void set_state(struct stop_machine_data *smdata,
-		      enum stopmachine_state newstate)
-{
-	/* Reset ack counter. */
-	atomic_set(&smdata->thread_ack, smdata->num_threads);
-	smp_wmb();
-	smdata->state = newstate;
-}
-
-/* Last one to ack a state moves to the next state. */
-static void ack_state(struct stop_machine_data *smdata)
-{
-	if (atomic_dec_and_test(&smdata->thread_ack))
-		set_state(smdata, smdata->state + 1);
-}
-
-/* This is the cpu_stop function which stops the CPU. */
-static int stop_machine_cpu_stop(void *data)
-{
-	struct stop_machine_data *smdata = data;
-	enum stopmachine_state curstate = STOPMACHINE_NONE;
-	int cpu = smp_processor_id(), err = 0;
-	unsigned long flags;
-	bool is_active;
-
-	/*
-	 * When called from stop_machine_from_inactive_cpu(), irq might
-	 * already be disabled.  Save the state and restore it on exit.
-	 */
-	local_save_flags(flags);
-
-	if (!smdata->active_cpus)
-		is_active = cpu == cpumask_first(cpu_online_mask);
-	else
-		is_active = cpumask_test_cpu(cpu, smdata->active_cpus);
-
-	/* Simple state machine */
-	do {
-		/* Chill out and ensure we re-read stopmachine_state. */
-		cpu_relax();
-		if (smdata->state != curstate) {
-			curstate = smdata->state;
-			switch (curstate) {
-			case STOPMACHINE_DISABLE_IRQ:
-				local_irq_disable();
-				hard_irq_disable();
-				break;
-			case STOPMACHINE_RUN:
-				if (is_active)
-					err = smdata->fn(smdata->data);
-				break;
-			default:
-				break;
-			}
-			ack_state(smdata);
-		}
-	} while (curstate != STOPMACHINE_EXIT);
-
-	local_irq_restore(flags);
-	return err;
-}
-
 int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus)
 {
-	struct stop_machine_data smdata = { .fn = fn, .data = data,
-					    .num_threads = num_online_cpus(),
-					    .active_cpus = cpus };
+	struct multi_stop_data msdata = {
+		.fn = fn,
+		.data = data,
+		.num_threads = num_online_cpus(),
+		.active_cpus = cpus,
+	};
 
 	if (!stop_machine_initialized) {
 		/*
@@ -461,7 +537,7 @@ int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus)
 		unsigned long flags;
 		int ret;
 
-		WARN_ON_ONCE(smdata.num_threads != 1);
+		WARN_ON_ONCE(msdata.num_threads != 1);
 
 		local_irq_save(flags);
 		hard_irq_disable();
@@ -472,8 +548,8 @@ int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus)
 	}
 
 	/* Set the initial state and stop all online cpus. */
-	set_state(&smdata, STOPMACHINE_PREPARE);
-	return stop_cpus(cpu_online_mask, stop_machine_cpu_stop, &smdata);
+	set_state(&msdata, MULTI_STOP_PREPARE);
+	return stop_cpus(cpu_online_mask, multi_cpu_stop, &msdata);
 }
 
 int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus)
@@ -513,25 +589,25 @@ EXPORT_SYMBOL_GPL(stop_machine);
 int stop_machine_from_inactive_cpu(int (*fn)(void *), void *data,
 				  const struct cpumask *cpus)
 {
-	struct stop_machine_data smdata = { .fn = fn, .data = data,
+	struct multi_stop_data msdata = { .fn = fn, .data = data,
 					    .active_cpus = cpus };
 	struct cpu_stop_done done;
 	int ret;
 
 	/* Local CPU must be inactive and CPU hotplug in progress. */
 	BUG_ON(cpu_active(raw_smp_processor_id()));
-	smdata.num_threads = num_active_cpus() + 1;	/* +1 for local */
+	msdata.num_threads = num_active_cpus() + 1;	/* +1 for local */
 
 	/* No proper task established and can't sleep - busy wait for lock. */
 	while (!mutex_trylock(&stop_cpus_mutex))
 		cpu_relax();
 
 	/* Schedule work on other CPUs and execute directly for local CPU */
-	set_state(&smdata, STOPMACHINE_PREPARE);
+	set_state(&msdata, MULTI_STOP_PREPARE);
 	cpu_stop_init_done(&done, num_active_cpus());
-	queue_stop_cpus_work(cpu_active_mask, stop_machine_cpu_stop, &smdata,
+	queue_stop_cpus_work(cpu_active_mask, multi_cpu_stop, &msdata,
 			     &done);
-	ret = stop_machine_cpu_stop(&smdata);
+	ret = multi_cpu_stop(&msdata);
 
 	/* Busy wait for completion. */
 	while (!completion_done(&done.completion))
-- 
cgit v1.2.3


From ac66f5477239ebd3c4e2cbf2f591ef387aa09884 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 7 Oct 2013 11:29:16 +0100
Subject: sched/numa: Introduce migrate_swap()

Use the new stop_two_cpus() to implement migrate_swap(), a function that
flips two tasks between their respective cpus.

I'm fairly sure there's a less crude way than employing the stop_two_cpus()
method, but everything I tried either got horribly fragile and/or complex. So
keep it simple for now.

The notable detail is how we 'migrate' tasks that aren't runnable
anymore. We'll make it appear like we migrated them before they went to
sleep. The sole difference is the previous cpu in the wakeup path, so we
override this.

Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Mel Gorman <mgorman@suse.de>
Link: http://lkml.kernel.org/r/1381141781-10992-39-git-send-email-mgorman@suse.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h    |   2 +
 kernel/sched/core.c      | 106 ++++++++++++++++++++++++++++++++++++++++++++---
 kernel/sched/fair.c      |   3 +-
 kernel/sched/idle_task.c |   2 +-
 kernel/sched/rt.c        |   5 +--
 kernel/sched/sched.h     |   4 +-
 kernel/sched/stop_task.c |   2 +-
 7 files changed, 110 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 14251a8ff2ea..b6619792bb13 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1043,6 +1043,8 @@ struct task_struct {
 	struct task_struct *last_wakee;
 	unsigned long wakee_flips;
 	unsigned long wakee_flip_decay_ts;
+
+	int wake_cpu;
 #endif
 	int on_rq;
 
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 9060a7f4e9ed..32a2b29c2610 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1013,6 +1013,102 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
 	__set_task_cpu(p, new_cpu);
 }
 
+static void __migrate_swap_task(struct task_struct *p, int cpu)
+{
+	if (p->on_rq) {
+		struct rq *src_rq, *dst_rq;
+
+		src_rq = task_rq(p);
+		dst_rq = cpu_rq(cpu);
+
+		deactivate_task(src_rq, p, 0);
+		set_task_cpu(p, cpu);
+		activate_task(dst_rq, p, 0);
+		check_preempt_curr(dst_rq, p, 0);
+	} else {
+		/*
+		 * Task isn't running anymore; make it appear like we migrated
+		 * it before it went to sleep. This means on wakeup we make the
+		 * previous cpu our targer instead of where it really is.
+		 */
+		p->wake_cpu = cpu;
+	}
+}
+
+struct migration_swap_arg {
+	struct task_struct *src_task, *dst_task;
+	int src_cpu, dst_cpu;
+};
+
+static int migrate_swap_stop(void *data)
+{
+	struct migration_swap_arg *arg = data;
+	struct rq *src_rq, *dst_rq;
+	int ret = -EAGAIN;
+
+	src_rq = cpu_rq(arg->src_cpu);
+	dst_rq = cpu_rq(arg->dst_cpu);
+
+	double_rq_lock(src_rq, dst_rq);
+	if (task_cpu(arg->dst_task) != arg->dst_cpu)
+		goto unlock;
+
+	if (task_cpu(arg->src_task) != arg->src_cpu)
+		goto unlock;
+
+	if (!cpumask_test_cpu(arg->dst_cpu, tsk_cpus_allowed(arg->src_task)))
+		goto unlock;
+
+	if (!cpumask_test_cpu(arg->src_cpu, tsk_cpus_allowed(arg->dst_task)))
+		goto unlock;
+
+	__migrate_swap_task(arg->src_task, arg->dst_cpu);
+	__migrate_swap_task(arg->dst_task, arg->src_cpu);
+
+	ret = 0;
+
+unlock:
+	double_rq_unlock(src_rq, dst_rq);
+
+	return ret;
+}
+
+/*
+ * Cross migrate two tasks
+ */
+int migrate_swap(struct task_struct *cur, struct task_struct *p)
+{
+	struct migration_swap_arg arg;
+	int ret = -EINVAL;
+
+	get_online_cpus();
+
+	arg = (struct migration_swap_arg){
+		.src_task = cur,
+		.src_cpu = task_cpu(cur),
+		.dst_task = p,
+		.dst_cpu = task_cpu(p),
+	};
+
+	if (arg.src_cpu == arg.dst_cpu)
+		goto out;
+
+	if (!cpu_active(arg.src_cpu) || !cpu_active(arg.dst_cpu))
+		goto out;
+
+	if (!cpumask_test_cpu(arg.dst_cpu, tsk_cpus_allowed(arg.src_task)))
+		goto out;
+
+	if (!cpumask_test_cpu(arg.src_cpu, tsk_cpus_allowed(arg.dst_task)))
+		goto out;
+
+	ret = stop_two_cpus(arg.dst_cpu, arg.src_cpu, migrate_swap_stop, &arg);
+
+out:
+	put_online_cpus();
+	return ret;
+}
+
 struct migration_arg {
 	struct task_struct *task;
 	int dest_cpu;
@@ -1232,9 +1328,9 @@ out:
  * The caller (fork, wakeup) owns p->pi_lock, ->cpus_allowed is stable.
  */
 static inline
-int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags)
+int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags)
 {
-	int cpu = p->sched_class->select_task_rq(p, sd_flags, wake_flags);
+	cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags);
 
 	/*
 	 * In order not to call set_task_cpu() on a blocking task we need
@@ -1518,7 +1614,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
 	if (p->sched_class->task_waking)
 		p->sched_class->task_waking(p);
 
-	cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags);
+	cpu = select_task_rq(p, p->wake_cpu, SD_BALANCE_WAKE, wake_flags);
 	if (task_cpu(p) != cpu) {
 		wake_flags |= WF_MIGRATED;
 		set_task_cpu(p, cpu);
@@ -1752,7 +1848,7 @@ void wake_up_new_task(struct task_struct *p)
 	 *  - cpus_allowed can change in the fork path
 	 *  - any previously selected cpu might disappear through hotplug
 	 */
-	set_task_cpu(p, select_task_rq(p, SD_BALANCE_FORK, 0));
+	set_task_cpu(p, select_task_rq(p, task_cpu(p), SD_BALANCE_FORK, 0));
 #endif
 
 	/* Initialize new task's runnable average */
@@ -2080,7 +2176,7 @@ void sched_exec(void)
 	int dest_cpu;
 
 	raw_spin_lock_irqsave(&p->pi_lock, flags);
-	dest_cpu = p->sched_class->select_task_rq(p, SD_BALANCE_EXEC, 0);
+	dest_cpu = p->sched_class->select_task_rq(p, task_cpu(p), SD_BALANCE_EXEC, 0);
 	if (dest_cpu == smp_processor_id())
 		goto unlock;
 
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index fbc0c84a8a04..b1e5061287ab 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3706,11 +3706,10 @@ done:
  * preempt must be disabled.
  */
 static int
-select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags)
+select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_flags)
 {
 	struct sched_domain *tmp, *affine_sd = NULL, *sd = NULL;
 	int cpu = smp_processor_id();
-	int prev_cpu = task_cpu(p);
 	int new_cpu = cpu;
 	int want_affine = 0;
 	int sync = wake_flags & WF_SYNC;
diff --git a/kernel/sched/idle_task.c b/kernel/sched/idle_task.c
index d8da01008d39..516c3d9ceea1 100644
--- a/kernel/sched/idle_task.c
+++ b/kernel/sched/idle_task.c
@@ -9,7 +9,7 @@
 
 #ifdef CONFIG_SMP
 static int
-select_task_rq_idle(struct task_struct *p, int sd_flag, int flags)
+select_task_rq_idle(struct task_struct *p, int cpu, int sd_flag, int flags)
 {
 	return task_cpu(p); /* IDLE tasks as never migrated */
 }
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index ceebfba0a1dd..e9304cdc26fe 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1169,13 +1169,10 @@ static void yield_task_rt(struct rq *rq)
 static int find_lowest_rq(struct task_struct *task);
 
 static int
-select_task_rq_rt(struct task_struct *p, int sd_flag, int flags)
+select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags)
 {
 	struct task_struct *curr;
 	struct rq *rq;
-	int cpu;
-
-	cpu = task_cpu(p);
 
 	if (p->nr_cpus_allowed == 1)
 		goto out;
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 66458c902d84..4dc92d016aef 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -558,6 +558,7 @@ static inline u64 rq_clock_task(struct rq *rq)
 
 #ifdef CONFIG_NUMA_BALANCING
 extern int migrate_task_to(struct task_struct *p, int cpu);
+extern int migrate_swap(struct task_struct *, struct task_struct *);
 static inline void task_numa_free(struct task_struct *p)
 {
 	kfree(p->numa_faults);
@@ -736,6 +737,7 @@ static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
 	 */
 	smp_wmb();
 	task_thread_info(p)->cpu = cpu;
+	p->wake_cpu = cpu;
 #endif
 }
 
@@ -991,7 +993,7 @@ struct sched_class {
 	void (*put_prev_task) (struct rq *rq, struct task_struct *p);
 
 #ifdef CONFIG_SMP
-	int  (*select_task_rq)(struct task_struct *p, int sd_flag, int flags);
+	int  (*select_task_rq)(struct task_struct *p, int task_cpu, int sd_flag, int flags);
 	void (*migrate_task_rq)(struct task_struct *p, int next_cpu);
 
 	void (*pre_schedule) (struct rq *this_rq, struct task_struct *task);
diff --git a/kernel/sched/stop_task.c b/kernel/sched/stop_task.c
index e08fbeeb54b9..47197de8abd9 100644
--- a/kernel/sched/stop_task.c
+++ b/kernel/sched/stop_task.c
@@ -11,7 +11,7 @@
 
 #ifdef CONFIG_SMP
 static int
-select_task_rq_stop(struct task_struct *p, int sd_flag, int flags)
+select_task_rq_stop(struct task_struct *p, int cpu, int sd_flag, int flags)
 {
 	return task_cpu(p); /* stop tasks as never migrate */
 }
-- 
cgit v1.2.3


From 90572890d202527c366aa9489b32404e88a7c020 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 7 Oct 2013 11:29:20 +0100
Subject: mm: numa: Change page last {nid,pid} into {cpu,pid}

Change the per page last fault tracking to use cpu,pid instead of
nid,pid. This will allow us to try and lookup the alternate task more
easily. Note that even though it is the cpu that is store in the page
flags that the mpol_misplaced decision is still based on the node.

Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Mel Gorman <mgorman@suse.de>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Link: http://lkml.kernel.org/r/1381141781-10992-43-git-send-email-mgorman@suse.de
[ Fixed build failure on 32-bit systems. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/mm.h                | 90 ++++++++++++++++++++++-----------------
 include/linux/mm_types.h          |  4 +-
 include/linux/page-flags-layout.h | 22 +++++-----
 kernel/bounds.c                   |  4 ++
 kernel/sched/fair.c               |  6 +--
 mm/huge_memory.c                  |  8 ++--
 mm/memory.c                       | 16 +++----
 mm/mempolicy.c                    | 16 ++++---
 mm/migrate.c                      |  4 +-
 mm/mm_init.c                      | 18 ++++----
 mm/mmzone.c                       | 14 +++---
 mm/mprotect.c                     | 28 ++++++------
 mm/page_alloc.c                   |  4 +-
 13 files changed, 125 insertions(+), 109 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index bb412ce2a8b5..ce464cd4777e 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -581,11 +581,11 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
  * sets it, so none of the operations on it need to be atomic.
  */
 
-/* Page flags: | [SECTION] | [NODE] | ZONE | [LAST_NIDPID] | ... | FLAGS | */
+/* Page flags: | [SECTION] | [NODE] | ZONE | [LAST_CPUPID] | ... | FLAGS | */
 #define SECTIONS_PGOFF		((sizeof(unsigned long)*8) - SECTIONS_WIDTH)
 #define NODES_PGOFF		(SECTIONS_PGOFF - NODES_WIDTH)
 #define ZONES_PGOFF		(NODES_PGOFF - ZONES_WIDTH)
-#define LAST_NIDPID_PGOFF	(ZONES_PGOFF - LAST_NIDPID_WIDTH)
+#define LAST_CPUPID_PGOFF	(ZONES_PGOFF - LAST_CPUPID_WIDTH)
 
 /*
  * Define the bit shifts to access each section.  For non-existent
@@ -595,7 +595,7 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
 #define SECTIONS_PGSHIFT	(SECTIONS_PGOFF * (SECTIONS_WIDTH != 0))
 #define NODES_PGSHIFT		(NODES_PGOFF * (NODES_WIDTH != 0))
 #define ZONES_PGSHIFT		(ZONES_PGOFF * (ZONES_WIDTH != 0))
-#define LAST_NIDPID_PGSHIFT	(LAST_NIDPID_PGOFF * (LAST_NIDPID_WIDTH != 0))
+#define LAST_CPUPID_PGSHIFT	(LAST_CPUPID_PGOFF * (LAST_CPUPID_WIDTH != 0))
 
 /* NODE:ZONE or SECTION:ZONE is used to ID a zone for the buddy allocator */
 #ifdef NODE_NOT_IN_PAGE_FLAGS
@@ -617,7 +617,7 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
 #define ZONES_MASK		((1UL << ZONES_WIDTH) - 1)
 #define NODES_MASK		((1UL << NODES_WIDTH) - 1)
 #define SECTIONS_MASK		((1UL << SECTIONS_WIDTH) - 1)
-#define LAST_NIDPID_MASK	((1UL << LAST_NIDPID_WIDTH) - 1)
+#define LAST_CPUPID_MASK	((1UL << LAST_CPUPID_WIDTH) - 1)
 #define ZONEID_MASK		((1UL << ZONEID_SHIFT) - 1)
 
 static inline enum zone_type page_zonenum(const struct page *page)
@@ -661,96 +661,106 @@ static inline int page_to_nid(const struct page *page)
 #endif
 
 #ifdef CONFIG_NUMA_BALANCING
-static inline int nid_pid_to_nidpid(int nid, int pid)
+static inline int cpu_pid_to_cpupid(int cpu, int pid)
 {
-	return ((nid & LAST__NID_MASK) << LAST__PID_SHIFT) | (pid & LAST__PID_MASK);
+	return ((cpu & LAST__CPU_MASK) << LAST__PID_SHIFT) | (pid & LAST__PID_MASK);
 }
 
-static inline int nidpid_to_pid(int nidpid)
+static inline int cpupid_to_pid(int cpupid)
 {
-	return nidpid & LAST__PID_MASK;
+	return cpupid & LAST__PID_MASK;
 }
 
-static inline int nidpid_to_nid(int nidpid)
+static inline int cpupid_to_cpu(int cpupid)
 {
-	return (nidpid >> LAST__PID_SHIFT) & LAST__NID_MASK;
+	return (cpupid >> LAST__PID_SHIFT) & LAST__CPU_MASK;
 }
 
-static inline bool nidpid_pid_unset(int nidpid)
+static inline int cpupid_to_nid(int cpupid)
 {
-	return nidpid_to_pid(nidpid) == (-1 & LAST__PID_MASK);
+	return cpu_to_node(cpupid_to_cpu(cpupid));
 }
 
-static inline bool nidpid_nid_unset(int nidpid)
+static inline bool cpupid_pid_unset(int cpupid)
 {
-	return nidpid_to_nid(nidpid) == (-1 & LAST__NID_MASK);
+	return cpupid_to_pid(cpupid) == (-1 & LAST__PID_MASK);
 }
 
-#ifdef LAST_NIDPID_NOT_IN_PAGE_FLAGS
-static inline int page_nidpid_xchg_last(struct page *page, int nid)
+static inline bool cpupid_cpu_unset(int cpupid)
 {
-	return xchg(&page->_last_nidpid, nid);
+	return cpupid_to_cpu(cpupid) == (-1 & LAST__CPU_MASK);
 }
 
-static inline int page_nidpid_last(struct page *page)
+#ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS
+static inline int page_cpupid_xchg_last(struct page *page, int cpupid)
 {
-	return page->_last_nidpid;
+	return xchg(&page->_last_cpupid, cpupid);
 }
-static inline void page_nidpid_reset_last(struct page *page)
+
+static inline int page_cpupid_last(struct page *page)
+{
+	return page->_last_cpupid;
+}
+static inline void page_cpupid_reset_last(struct page *page)
 {
-	page->_last_nidpid = -1;
+	page->_last_cpupid = -1;
 }
 #else
-static inline int page_nidpid_last(struct page *page)
+static inline int page_cpupid_last(struct page *page)
 {
-	return (page->flags >> LAST_NIDPID_PGSHIFT) & LAST_NIDPID_MASK;
+	return (page->flags >> LAST_CPUPID_PGSHIFT) & LAST_CPUPID_MASK;
 }
 
-extern int page_nidpid_xchg_last(struct page *page, int nidpid);
+extern int page_cpupid_xchg_last(struct page *page, int cpupid);
 
-static inline void page_nidpid_reset_last(struct page *page)
+static inline void page_cpupid_reset_last(struct page *page)
 {
-	int nidpid = (1 << LAST_NIDPID_SHIFT) - 1;
+	int cpupid = (1 << LAST_CPUPID_SHIFT) - 1;
 
-	page->flags &= ~(LAST_NIDPID_MASK << LAST_NIDPID_PGSHIFT);
-	page->flags |= (nidpid & LAST_NIDPID_MASK) << LAST_NIDPID_PGSHIFT;
+	page->flags &= ~(LAST_CPUPID_MASK << LAST_CPUPID_PGSHIFT);
+	page->flags |= (cpupid & LAST_CPUPID_MASK) << LAST_CPUPID_PGSHIFT;
 }
-#endif /* LAST_NIDPID_NOT_IN_PAGE_FLAGS */
-#else
-static inline int page_nidpid_xchg_last(struct page *page, int nidpid)
+#endif /* LAST_CPUPID_NOT_IN_PAGE_FLAGS */
+#else /* !CONFIG_NUMA_BALANCING */
+static inline int page_cpupid_xchg_last(struct page *page, int cpupid)
 {
-	return page_to_nid(page);
+	return page_to_nid(page); /* XXX */
 }
 
-static inline int page_nidpid_last(struct page *page)
+static inline int page_cpupid_last(struct page *page)
 {
-	return page_to_nid(page);
+	return page_to_nid(page); /* XXX */
 }
 
-static inline int nidpid_to_nid(int nidpid)
+static inline int cpupid_to_nid(int cpupid)
 {
 	return -1;
 }
 
-static inline int nidpid_to_pid(int nidpid)
+static inline int cpupid_to_pid(int cpupid)
 {
 	return -1;
 }
 
-static inline int nid_pid_to_nidpid(int nid, int pid)
+static inline int cpupid_to_cpu(int cpupid)
 {
 	return -1;
 }
 
-static inline bool nidpid_pid_unset(int nidpid)
+static inline int cpu_pid_to_cpupid(int nid, int pid)
+{
+	return -1;
+}
+
+static inline bool cpupid_pid_unset(int cpupid)
 {
 	return 1;
 }
 
-static inline void page_nidpid_reset_last(struct page *page)
+static inline void page_cpupid_reset_last(struct page *page)
 {
 }
-#endif
+#endif /* CONFIG_NUMA_BALANCING */
 
 static inline struct zone *page_zone(const struct page *page)
 {
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 38a902a6d1e3..a30f9ca66557 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -174,8 +174,8 @@ struct page {
 	void *shadow;
 #endif
 
-#ifdef LAST_NIDPID_NOT_IN_PAGE_FLAGS
-	int _last_nidpid;
+#ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS
+	int _last_cpupid;
 #endif
 }
 /*
diff --git a/include/linux/page-flags-layout.h b/include/linux/page-flags-layout.h
index 02bc9184f16b..da523661500a 100644
--- a/include/linux/page-flags-layout.h
+++ b/include/linux/page-flags-layout.h
@@ -39,9 +39,9 @@
  * lookup is necessary.
  *
  * No sparsemem or sparsemem vmemmap: |       NODE     | ZONE |             ... | FLAGS |
- *      " plus space for last_nidpid: |       NODE     | ZONE | LAST_NIDPID ... | FLAGS |
+ *      " plus space for last_cpupid: |       NODE     | ZONE | LAST_CPUPID ... | FLAGS |
  * classic sparse with space for node:| SECTION | NODE | ZONE |             ... | FLAGS |
- *      " plus space for last_nidpid: | SECTION | NODE | ZONE | LAST_NIDPID ... | FLAGS |
+ *      " plus space for last_cpupid: | SECTION | NODE | ZONE | LAST_CPUPID ... | FLAGS |
  * classic sparse no space for node:  | SECTION |     ZONE    | ... | FLAGS |
  */
 #if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
@@ -65,18 +65,18 @@
 #define LAST__PID_SHIFT 8
 #define LAST__PID_MASK  ((1 << LAST__PID_SHIFT)-1)
 
-#define LAST__NID_SHIFT NODES_SHIFT
-#define LAST__NID_MASK  ((1 << LAST__NID_SHIFT)-1)
+#define LAST__CPU_SHIFT NR_CPUS_BITS
+#define LAST__CPU_MASK  ((1 << LAST__CPU_SHIFT)-1)
 
-#define LAST_NIDPID_SHIFT (LAST__PID_SHIFT+LAST__NID_SHIFT)
+#define LAST_CPUPID_SHIFT (LAST__PID_SHIFT+LAST__CPU_SHIFT)
 #else
-#define LAST_NIDPID_SHIFT 0
+#define LAST_CPUPID_SHIFT 0
 #endif
 
-#if SECTIONS_WIDTH+ZONES_WIDTH+NODES_SHIFT+LAST_NIDPID_SHIFT <= BITS_PER_LONG - NR_PAGEFLAGS
-#define LAST_NIDPID_WIDTH LAST_NIDPID_SHIFT
+#if SECTIONS_WIDTH+ZONES_WIDTH+NODES_SHIFT+LAST_CPUPID_SHIFT <= BITS_PER_LONG - NR_PAGEFLAGS
+#define LAST_CPUPID_WIDTH LAST_CPUPID_SHIFT
 #else
-#define LAST_NIDPID_WIDTH 0
+#define LAST_CPUPID_WIDTH 0
 #endif
 
 /*
@@ -87,8 +87,8 @@
 #define NODE_NOT_IN_PAGE_FLAGS
 #endif
 
-#if defined(CONFIG_NUMA_BALANCING) && LAST_NIDPID_WIDTH == 0
-#define LAST_NIDPID_NOT_IN_PAGE_FLAGS
+#if defined(CONFIG_NUMA_BALANCING) && LAST_CPUPID_WIDTH == 0
+#define LAST_CPUPID_NOT_IN_PAGE_FLAGS
 #endif
 
 #endif /* _LINUX_PAGE_FLAGS_LAYOUT */
diff --git a/kernel/bounds.c b/kernel/bounds.c
index 0c9b862292b2..e8ca97b5c386 100644
--- a/kernel/bounds.c
+++ b/kernel/bounds.c
@@ -10,6 +10,7 @@
 #include <linux/mmzone.h>
 #include <linux/kbuild.h>
 #include <linux/page_cgroup.h>
+#include <linux/log2.h>
 
 void foo(void)
 {
@@ -17,5 +18,8 @@ void foo(void)
 	DEFINE(NR_PAGEFLAGS, __NR_PAGEFLAGS);
 	DEFINE(MAX_NR_ZONES, __MAX_NR_ZONES);
 	DEFINE(NR_PCG_FLAGS, __NR_PCG_FLAGS);
+#ifdef CONFIG_SMP
+	DEFINE(NR_CPUS_BITS, ilog2(CONFIG_NR_CPUS));
+#endif
 	/* End of constants */
 }
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index aa561c8dc899..dbe0f628efa3 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1210,7 +1210,7 @@ static void task_numa_placement(struct task_struct *p)
 /*
  * Got a PROT_NONE fault for a page on @node.
  */
-void task_numa_fault(int last_nidpid, int node, int pages, bool migrated)
+void task_numa_fault(int last_cpupid, int node, int pages, bool migrated)
 {
 	struct task_struct *p = current;
 	int priv;
@@ -1226,8 +1226,8 @@ void task_numa_fault(int last_nidpid, int node, int pages, bool migrated)
 	 * First accesses are treated as private, otherwise consider accesses
 	 * to be private if the accessing pid has not changed
 	 */
-	if (!nidpid_pid_unset(last_nidpid))
-		priv = ((p->pid & LAST__PID_MASK) == nidpid_to_pid(last_nidpid));
+	if (!cpupid_pid_unset(last_cpupid))
+		priv = ((p->pid & LAST__PID_MASK) == cpupid_to_pid(last_cpupid));
 	else
 		priv = 1;
 
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 0baf0e4d5203..becf92ca54f3 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1282,7 +1282,7 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	struct page *page;
 	unsigned long haddr = addr & HPAGE_PMD_MASK;
 	int page_nid = -1, this_nid = numa_node_id();
-	int target_nid, last_nidpid = -1;
+	int target_nid, last_cpupid = -1;
 	bool page_locked;
 	bool migrated = false;
 
@@ -1293,7 +1293,7 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	page = pmd_page(pmd);
 	BUG_ON(is_huge_zero_page(page));
 	page_nid = page_to_nid(page);
-	last_nidpid = page_nidpid_last(page);
+	last_cpupid = page_cpupid_last(page);
 	count_vm_numa_event(NUMA_HINT_FAULTS);
 	if (page_nid == this_nid)
 		count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL);
@@ -1362,7 +1362,7 @@ out:
 		page_unlock_anon_vma_read(anon_vma);
 
 	if (page_nid != -1)
-		task_numa_fault(last_nidpid, page_nid, HPAGE_PMD_NR, migrated);
+		task_numa_fault(last_cpupid, page_nid, HPAGE_PMD_NR, migrated);
 
 	return 0;
 }
@@ -1682,7 +1682,7 @@ static void __split_huge_page_refcount(struct page *page,
 		page_tail->mapping = page->mapping;
 
 		page_tail->index = page->index + i;
-		page_nidpid_xchg_last(page_tail, page_nidpid_last(page));
+		page_cpupid_xchg_last(page_tail, page_cpupid_last(page));
 
 		BUG_ON(!PageAnon(page_tail));
 		BUG_ON(!PageUptodate(page_tail));
diff --git a/mm/memory.c b/mm/memory.c
index cc7f20691c82..5162e6d0d652 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -69,8 +69,8 @@
 
 #include "internal.h"
 
-#ifdef LAST_NIDPID_NOT_IN_PAGE_FLAGS
-#warning Unfortunate NUMA and NUMA Balancing config, growing page-frame for last_nidpid.
+#ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS
+#warning Unfortunate NUMA and NUMA Balancing config, growing page-frame for last_cpupid.
 #endif
 
 #ifndef CONFIG_NEED_MULTIPLE_NODES
@@ -3536,7 +3536,7 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	struct page *page = NULL;
 	spinlock_t *ptl;
 	int page_nid = -1;
-	int last_nidpid;
+	int last_cpupid;
 	int target_nid;
 	bool migrated = false;
 
@@ -3567,7 +3567,7 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	}
 	BUG_ON(is_zero_pfn(page_to_pfn(page)));
 
-	last_nidpid = page_nidpid_last(page);
+	last_cpupid = page_cpupid_last(page);
 	page_nid = page_to_nid(page);
 	target_nid = numa_migrate_prep(page, vma, addr, page_nid);
 	pte_unmap_unlock(ptep, ptl);
@@ -3583,7 +3583,7 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 
 out:
 	if (page_nid != -1)
-		task_numa_fault(last_nidpid, page_nid, 1, migrated);
+		task_numa_fault(last_cpupid, page_nid, 1, migrated);
 	return 0;
 }
 
@@ -3598,7 +3598,7 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	unsigned long offset;
 	spinlock_t *ptl;
 	bool numa = false;
-	int last_nidpid;
+	int last_cpupid;
 
 	spin_lock(&mm->page_table_lock);
 	pmd = *pmdp;
@@ -3643,7 +3643,7 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		if (unlikely(!page))
 			continue;
 
-		last_nidpid = page_nidpid_last(page);
+		last_cpupid = page_cpupid_last(page);
 		page_nid = page_to_nid(page);
 		target_nid = numa_migrate_prep(page, vma, addr, page_nid);
 		pte_unmap_unlock(pte, ptl);
@@ -3656,7 +3656,7 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		}
 
 		if (page_nid != -1)
-			task_numa_fault(last_nidpid, page_nid, 1, migrated);
+			task_numa_fault(last_cpupid, page_nid, 1, migrated);
 
 		pte = pte_offset_map_lock(mm, pmdp, addr, &ptl);
 	}
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 0e895a2eed5f..a5867ef24bda 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -2324,6 +2324,8 @@ int mpol_misplaced(struct page *page, struct vm_area_struct *vma, unsigned long
 	struct zone *zone;
 	int curnid = page_to_nid(page);
 	unsigned long pgoff;
+	int thiscpu = raw_smp_processor_id();
+	int thisnid = cpu_to_node(thiscpu);
 	int polnid = -1;
 	int ret = -1;
 
@@ -2372,11 +2374,11 @@ int mpol_misplaced(struct page *page, struct vm_area_struct *vma, unsigned long
 
 	/* Migrate the page towards the node whose CPU is referencing it */
 	if (pol->flags & MPOL_F_MORON) {
-		int last_nidpid;
-		int this_nidpid;
+		int last_cpupid;
+		int this_cpupid;
 
-		polnid = numa_node_id();
-		this_nidpid = nid_pid_to_nidpid(polnid, current->pid);
+		polnid = thisnid;
+		this_cpupid = cpu_pid_to_cpupid(thiscpu, current->pid);
 
 		/*
 		 * Multi-stage node selection is used in conjunction
@@ -2399,8 +2401,8 @@ int mpol_misplaced(struct page *page, struct vm_area_struct *vma, unsigned long
 		 * it less likely we act on an unlikely task<->page
 		 * relation.
 		 */
-		last_nidpid = page_nidpid_xchg_last(page, this_nidpid);
-		if (!nidpid_pid_unset(last_nidpid) && nidpid_to_nid(last_nidpid) != polnid)
+		last_cpupid = page_cpupid_xchg_last(page, this_cpupid);
+		if (!cpupid_pid_unset(last_cpupid) && cpupid_to_nid(last_cpupid) != thisnid)
 			goto out;
 
 #ifdef CONFIG_NUMA_BALANCING
@@ -2410,7 +2412,7 @@ int mpol_misplaced(struct page *page, struct vm_area_struct *vma, unsigned long
 		 * This way a short and temporary process migration will
 		 * not cause excessive memory migration.
 		 */
-		if (polnid != current->numa_preferred_nid &&
+		if (thisnid != current->numa_preferred_nid &&
 				!current->numa_migrate_seq)
 			goto out;
 #endif
diff --git a/mm/migrate.c b/mm/migrate.c
index 025d1e3d2ad2..ff537749d3b4 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1498,7 +1498,7 @@ static struct page *alloc_misplaced_dst_page(struct page *page,
 					  __GFP_NOWARN) &
 					 ~GFP_IOFS, 0);
 	if (newpage)
-		page_nidpid_xchg_last(newpage, page_nidpid_last(page));
+		page_cpupid_xchg_last(newpage, page_cpupid_last(page));
 
 	return newpage;
 }
@@ -1675,7 +1675,7 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
 	if (!new_page)
 		goto out_fail;
 
-	page_nidpid_xchg_last(new_page, page_nidpid_last(page));
+	page_cpupid_xchg_last(new_page, page_cpupid_last(page));
 
 	isolated = numamigrate_isolate_page(pgdat, page);
 	if (!isolated) {
diff --git a/mm/mm_init.c b/mm/mm_init.c
index 467de579784b..68562e92d50c 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -71,26 +71,26 @@ void __init mminit_verify_pageflags_layout(void)
 	unsigned long or_mask, add_mask;
 
 	shift = 8 * sizeof(unsigned long);
-	width = shift - SECTIONS_WIDTH - NODES_WIDTH - ZONES_WIDTH - LAST_NIDPID_SHIFT;
+	width = shift - SECTIONS_WIDTH - NODES_WIDTH - ZONES_WIDTH - LAST_CPUPID_SHIFT;
 	mminit_dprintk(MMINIT_TRACE, "pageflags_layout_widths",
-		"Section %d Node %d Zone %d Lastnidpid %d Flags %d\n",
+		"Section %d Node %d Zone %d Lastcpupid %d Flags %d\n",
 		SECTIONS_WIDTH,
 		NODES_WIDTH,
 		ZONES_WIDTH,
-		LAST_NIDPID_WIDTH,
+		LAST_CPUPID_WIDTH,
 		NR_PAGEFLAGS);
 	mminit_dprintk(MMINIT_TRACE, "pageflags_layout_shifts",
-		"Section %d Node %d Zone %d Lastnidpid %d\n",
+		"Section %d Node %d Zone %d Lastcpupid %d\n",
 		SECTIONS_SHIFT,
 		NODES_SHIFT,
 		ZONES_SHIFT,
-		LAST_NIDPID_SHIFT);
+		LAST_CPUPID_SHIFT);
 	mminit_dprintk(MMINIT_TRACE, "pageflags_layout_pgshifts",
-		"Section %lu Node %lu Zone %lu Lastnidpid %lu\n",
+		"Section %lu Node %lu Zone %lu Lastcpupid %lu\n",
 		(unsigned long)SECTIONS_PGSHIFT,
 		(unsigned long)NODES_PGSHIFT,
 		(unsigned long)ZONES_PGSHIFT,
-		(unsigned long)LAST_NIDPID_PGSHIFT);
+		(unsigned long)LAST_CPUPID_PGSHIFT);
 	mminit_dprintk(MMINIT_TRACE, "pageflags_layout_nodezoneid",
 		"Node/Zone ID: %lu -> %lu\n",
 		(unsigned long)(ZONEID_PGOFF + ZONEID_SHIFT),
@@ -102,9 +102,9 @@ void __init mminit_verify_pageflags_layout(void)
 	mminit_dprintk(MMINIT_TRACE, "pageflags_layout_nodeflags",
 		"Node not in page flags");
 #endif
-#ifdef LAST_NIDPID_NOT_IN_PAGE_FLAGS
+#ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS
 	mminit_dprintk(MMINIT_TRACE, "pageflags_layout_nodeflags",
-		"Last nidpid not in page flags");
+		"Last cpupid not in page flags");
 #endif
 
 	if (SECTIONS_WIDTH) {
diff --git a/mm/mmzone.c b/mm/mmzone.c
index 25bb477deb26..bf34fb8556db 100644
--- a/mm/mmzone.c
+++ b/mm/mmzone.c
@@ -97,20 +97,20 @@ void lruvec_init(struct lruvec *lruvec)
 		INIT_LIST_HEAD(&lruvec->lists[lru]);
 }
 
-#if defined(CONFIG_NUMA_BALANCING) && !defined(LAST_NIDPID_NOT_IN_PAGE_FLAGS)
-int page_nidpid_xchg_last(struct page *page, int nidpid)
+#if defined(CONFIG_NUMA_BALANCING) && !defined(LAST_CPUPID_NOT_IN_PAGE_FLAGS)
+int page_cpupid_xchg_last(struct page *page, int cpupid)
 {
 	unsigned long old_flags, flags;
-	int last_nidpid;
+	int last_cpupid;
 
 	do {
 		old_flags = flags = page->flags;
-		last_nidpid = page_nidpid_last(page);
+		last_cpupid = page_cpupid_last(page);
 
-		flags &= ~(LAST_NIDPID_MASK << LAST_NIDPID_PGSHIFT);
-		flags |= (nidpid & LAST_NIDPID_MASK) << LAST_NIDPID_PGSHIFT;
+		flags &= ~(LAST_CPUPID_MASK << LAST_CPUPID_PGSHIFT);
+		flags |= (cpupid & LAST_CPUPID_MASK) << LAST_CPUPID_PGSHIFT;
 	} while (unlikely(cmpxchg(&page->flags, old_flags, flags) != old_flags));
 
-	return last_nidpid;
+	return last_cpupid;
 }
 #endif
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 5aae39017d6d..9a74855f1241 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -37,14 +37,14 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
 
 static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
 		unsigned long addr, unsigned long end, pgprot_t newprot,
-		int dirty_accountable, int prot_numa, bool *ret_all_same_nidpid)
+		int dirty_accountable, int prot_numa, bool *ret_all_same_cpupid)
 {
 	struct mm_struct *mm = vma->vm_mm;
 	pte_t *pte, oldpte;
 	spinlock_t *ptl;
 	unsigned long pages = 0;
-	bool all_same_nidpid = true;
-	int last_nid = -1;
+	bool all_same_cpupid = true;
+	int last_cpu = -1;
 	int last_pid = -1;
 
 	pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
@@ -64,17 +64,17 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
 
 				page = vm_normal_page(vma, addr, oldpte);
 				if (page) {
-					int nidpid = page_nidpid_last(page);
-					int this_nid = nidpid_to_nid(nidpid);
-					int this_pid = nidpid_to_pid(nidpid);
+					int cpupid = page_cpupid_last(page);
+					int this_cpu = cpupid_to_cpu(cpupid);
+					int this_pid = cpupid_to_pid(cpupid);
 
-					if (last_nid == -1)
-						last_nid = this_nid;
+					if (last_cpu == -1)
+						last_cpu = this_cpu;
 					if (last_pid == -1)
 						last_pid = this_pid;
-					if (last_nid != this_nid ||
+					if (last_cpu != this_cpu ||
 					    last_pid != this_pid) {
-						all_same_nidpid = false;
+						all_same_cpupid = false;
 					}
 
 					if (!pte_numa(oldpte)) {
@@ -115,7 +115,7 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
 	arch_leave_lazy_mmu_mode();
 	pte_unmap_unlock(pte - 1, ptl);
 
-	*ret_all_same_nidpid = all_same_nidpid;
+	*ret_all_same_cpupid = all_same_cpupid;
 	return pages;
 }
 
@@ -142,7 +142,7 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
 	pmd_t *pmd;
 	unsigned long next;
 	unsigned long pages = 0;
-	bool all_same_nidpid;
+	bool all_same_cpupid;
 
 	pmd = pmd_offset(pud, addr);
 	do {
@@ -168,7 +168,7 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
 		if (pmd_none_or_clear_bad(pmd))
 			continue;
 		this_pages = change_pte_range(vma, pmd, addr, next, newprot,
-				 dirty_accountable, prot_numa, &all_same_nidpid);
+				 dirty_accountable, prot_numa, &all_same_cpupid);
 		pages += this_pages;
 
 		/*
@@ -177,7 +177,7 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
 		 * node. This allows a regular PMD to be handled as one fault
 		 * and effectively batches the taking of the PTL
 		 */
-		if (prot_numa && this_pages && all_same_nidpid)
+		if (prot_numa && this_pages && all_same_cpupid)
 			change_pmd_protnuma(vma->vm_mm, addr, pmd);
 	} while (pmd++, addr = next, addr != end);
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 89bedd0e4cad..73d812f16dde 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -626,7 +626,7 @@ static inline int free_pages_check(struct page *page)
 		bad_page(page);
 		return 1;
 	}
-	page_nidpid_reset_last(page);
+	page_cpupid_reset_last(page);
 	if (page->flags & PAGE_FLAGS_CHECK_AT_PREP)
 		page->flags &= ~PAGE_FLAGS_CHECK_AT_PREP;
 	return 0;
@@ -4015,7 +4015,7 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
 		mminit_verify_page_links(page, zone, nid, pfn);
 		init_page_count(page);
 		page_mapcount_reset(page);
-		page_nidpid_reset_last(page);
+		page_cpupid_reset_last(page);
 		SetPageReserved(page);
 		/*
 		 * Mark the block movable so that blocks are reserved for
-- 
cgit v1.2.3


From 8c8a743c5087bac9caac8155b8f3b367e75cdd0b Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 7 Oct 2013 11:29:21 +0100
Subject: sched/numa: Use {cpu, pid} to create task groups for shared faults

While parallel applications tend to align their data on the cache
boundary, they tend not to align on the page or THP boundary.
Consequently tasks that partition their data can still "false-share"
pages presenting a problem for optimal NUMA placement.

This patch uses NUMA hinting faults to chain tasks together into
numa_groups. As well as storing the NID a task was running on when
accessing a page a truncated representation of the faulting PID is
stored. If subsequent faults are from different PIDs it is reasonable
to assume that those two tasks share a page and are candidates for
being grouped together. Note that this patch makes no scheduling
decisions based on the grouping information.

Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Mel Gorman <mgorman@suse.de>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Link: http://lkml.kernel.org/r/1381141781-10992-44-git-send-email-mgorman@suse.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/mm.h    |  11 ++++
 include/linux/sched.h |   3 +
 kernel/sched/core.c   |   3 +
 kernel/sched/fair.c   | 165 +++++++++++++++++++++++++++++++++++++++++++++++---
 kernel/sched/sched.h  |   5 +-
 mm/memory.c           |   8 +++
 6 files changed, 182 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index ce464cd4777e..81443d557a2e 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -691,6 +691,12 @@ static inline bool cpupid_cpu_unset(int cpupid)
 	return cpupid_to_cpu(cpupid) == (-1 & LAST__CPU_MASK);
 }
 
+static inline bool __cpupid_match_pid(pid_t task_pid, int cpupid)
+{
+	return (task_pid & LAST__PID_MASK) == cpupid_to_pid(cpupid);
+}
+
+#define cpupid_match_pid(task, cpupid) __cpupid_match_pid(task->pid, cpupid)
 #ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS
 static inline int page_cpupid_xchg_last(struct page *page, int cpupid)
 {
@@ -760,6 +766,11 @@ static inline bool cpupid_pid_unset(int cpupid)
 static inline void page_cpupid_reset_last(struct page *page)
 {
 }
+
+static inline bool cpupid_match_pid(struct task_struct *task, int cpupid)
+{
+	return false;
+}
 #endif /* CONFIG_NUMA_BALANCING */
 
 static inline struct zone *page_zone(const struct page *page)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index b6619792bb13..f587ded5c148 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1347,6 +1347,9 @@ struct task_struct {
 	u64 node_stamp;			/* migration stamp  */
 	struct callback_head numa_work;
 
+	struct list_head numa_entry;
+	struct numa_group *numa_group;
+
 	/*
 	 * Exponential decaying average of faults on a per-node basis.
 	 * Scheduling placement decisions are made based on the these counts.
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 1fe59da280e3..51092d5cc64c 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1733,6 +1733,9 @@ static void __sched_fork(struct task_struct *p)
 	p->numa_work.next = &p->numa_work;
 	p->numa_faults = NULL;
 	p->numa_faults_buffer = NULL;
+
+	INIT_LIST_HEAD(&p->numa_entry);
+	p->numa_group = NULL;
 #endif /* CONFIG_NUMA_BALANCING */
 }
 
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index dbe0f628efa3..85565053a6ed 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -888,6 +888,17 @@ static unsigned int task_scan_max(struct task_struct *p)
  */
 unsigned int sysctl_numa_balancing_settle_count __read_mostly = 4;
 
+struct numa_group {
+	atomic_t refcount;
+
+	spinlock_t lock; /* nr_tasks, tasks */
+	int nr_tasks;
+	struct list_head task_list;
+
+	struct rcu_head rcu;
+	atomic_long_t faults[0];
+};
+
 static inline int task_faults_idx(int nid, int priv)
 {
 	return 2 * nid + priv;
@@ -1182,7 +1193,10 @@ static void task_numa_placement(struct task_struct *p)
 		int priv, i;
 
 		for (priv = 0; priv < 2; priv++) {
+			long diff;
+
 			i = task_faults_idx(nid, priv);
+			diff = -p->numa_faults[i];
 
 			/* Decay existing window, copy faults since last scan */
 			p->numa_faults[i] >>= 1;
@@ -1190,6 +1204,11 @@ static void task_numa_placement(struct task_struct *p)
 			p->numa_faults_buffer[i] = 0;
 
 			faults += p->numa_faults[i];
+			diff += p->numa_faults[i];
+			if (p->numa_group) {
+				/* safe because we can only change our own group */
+				atomic_long_add(diff, &p->numa_group->faults[i]);
+			}
 		}
 
 		if (faults > max_faults) {
@@ -1207,6 +1226,131 @@ static void task_numa_placement(struct task_struct *p)
 	}
 }
 
+static inline int get_numa_group(struct numa_group *grp)
+{
+	return atomic_inc_not_zero(&grp->refcount);
+}
+
+static inline void put_numa_group(struct numa_group *grp)
+{
+	if (atomic_dec_and_test(&grp->refcount))
+		kfree_rcu(grp, rcu);
+}
+
+static void double_lock(spinlock_t *l1, spinlock_t *l2)
+{
+	if (l1 > l2)
+		swap(l1, l2);
+
+	spin_lock(l1);
+	spin_lock_nested(l2, SINGLE_DEPTH_NESTING);
+}
+
+static void task_numa_group(struct task_struct *p, int cpupid)
+{
+	struct numa_group *grp, *my_grp;
+	struct task_struct *tsk;
+	bool join = false;
+	int cpu = cpupid_to_cpu(cpupid);
+	int i;
+
+	if (unlikely(!p->numa_group)) {
+		unsigned int size = sizeof(struct numa_group) +
+				    2*nr_node_ids*sizeof(atomic_long_t);
+
+		grp = kzalloc(size, GFP_KERNEL | __GFP_NOWARN);
+		if (!grp)
+			return;
+
+		atomic_set(&grp->refcount, 1);
+		spin_lock_init(&grp->lock);
+		INIT_LIST_HEAD(&grp->task_list);
+
+		for (i = 0; i < 2*nr_node_ids; i++)
+			atomic_long_set(&grp->faults[i], p->numa_faults[i]);
+
+		list_add(&p->numa_entry, &grp->task_list);
+		grp->nr_tasks++;
+		rcu_assign_pointer(p->numa_group, grp);
+	}
+
+	rcu_read_lock();
+	tsk = ACCESS_ONCE(cpu_rq(cpu)->curr);
+
+	if (!cpupid_match_pid(tsk, cpupid))
+		goto unlock;
+
+	grp = rcu_dereference(tsk->numa_group);
+	if (!grp)
+		goto unlock;
+
+	my_grp = p->numa_group;
+	if (grp == my_grp)
+		goto unlock;
+
+	/*
+	 * Only join the other group if its bigger; if we're the bigger group,
+	 * the other task will join us.
+	 */
+	if (my_grp->nr_tasks > grp->nr_tasks)
+		goto unlock;
+
+	/*
+	 * Tie-break on the grp address.
+	 */
+	if (my_grp->nr_tasks == grp->nr_tasks && my_grp > grp)
+		goto unlock;
+
+	if (!get_numa_group(grp))
+		goto unlock;
+
+	join = true;
+
+unlock:
+	rcu_read_unlock();
+
+	if (!join)
+		return;
+
+	for (i = 0; i < 2*nr_node_ids; i++) {
+		atomic_long_sub(p->numa_faults[i], &my_grp->faults[i]);
+		atomic_long_add(p->numa_faults[i], &grp->faults[i]);
+	}
+
+	double_lock(&my_grp->lock, &grp->lock);
+
+	list_move(&p->numa_entry, &grp->task_list);
+	my_grp->nr_tasks--;
+	grp->nr_tasks++;
+
+	spin_unlock(&my_grp->lock);
+	spin_unlock(&grp->lock);
+
+	rcu_assign_pointer(p->numa_group, grp);
+
+	put_numa_group(my_grp);
+}
+
+void task_numa_free(struct task_struct *p)
+{
+	struct numa_group *grp = p->numa_group;
+	int i;
+
+	if (grp) {
+		for (i = 0; i < 2*nr_node_ids; i++)
+			atomic_long_sub(p->numa_faults[i], &grp->faults[i]);
+
+		spin_lock(&grp->lock);
+		list_del(&p->numa_entry);
+		grp->nr_tasks--;
+		spin_unlock(&grp->lock);
+		rcu_assign_pointer(p->numa_group, NULL);
+		put_numa_group(grp);
+	}
+
+	kfree(p->numa_faults);
+}
+
 /*
  * Got a PROT_NONE fault for a page on @node.
  */
@@ -1222,15 +1366,6 @@ void task_numa_fault(int last_cpupid, int node, int pages, bool migrated)
 	if (!p->mm)
 		return;
 
-	/*
-	 * First accesses are treated as private, otherwise consider accesses
-	 * to be private if the accessing pid has not changed
-	 */
-	if (!cpupid_pid_unset(last_cpupid))
-		priv = ((p->pid & LAST__PID_MASK) == cpupid_to_pid(last_cpupid));
-	else
-		priv = 1;
-
 	/* Allocate buffer to track faults on a per-node basis */
 	if (unlikely(!p->numa_faults)) {
 		int size = sizeof(*p->numa_faults) * 2 * nr_node_ids;
@@ -1244,6 +1379,18 @@ void task_numa_fault(int last_cpupid, int node, int pages, bool migrated)
 		p->numa_faults_buffer = p->numa_faults + (2 * nr_node_ids);
 	}
 
+	/*
+	 * First accesses are treated as private, otherwise consider accesses
+	 * to be private if the accessing pid has not changed
+	 */
+	if (unlikely(last_cpupid == (-1 & LAST_CPUPID_MASK))) {
+		priv = 1;
+	} else {
+		priv = cpupid_match_pid(p, last_cpupid);
+		if (!priv)
+			task_numa_group(p, last_cpupid);
+	}
+
 	/*
 	 * If pages are properly placed (did not migrate) then scan slower.
 	 * This is reset periodically in case of phase changes
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 691e96964dcc..8037b10a256f 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -559,10 +559,7 @@ static inline u64 rq_clock_task(struct rq *rq)
 #ifdef CONFIG_NUMA_BALANCING
 extern int migrate_task_to(struct task_struct *p, int cpu);
 extern int migrate_swap(struct task_struct *, struct task_struct *);
-static inline void task_numa_free(struct task_struct *p)
-{
-	kfree(p->numa_faults);
-}
+extern void task_numa_free(struct task_struct *p);
 #else /* CONFIG_NUMA_BALANCING */
 static inline void task_numa_free(struct task_struct *p)
 {
diff --git a/mm/memory.c b/mm/memory.c
index 5162e6d0d652..c57efa25cdbb 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2719,6 +2719,14 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		get_page(dirty_page);
 
 reuse:
+		/*
+		 * Clear the pages cpupid information as the existing
+		 * information potentially belongs to a now completely
+		 * unrelated process.
+		 */
+		if (old_page)
+			page_cpupid_xchg_last(old_page, (1 << LAST_CPUPID_SHIFT) - 1);
+
 		flush_cache_page(vma, address, pte_pfn(orig_pte));
 		entry = pte_mkyoung(orig_pte);
 		entry = maybe_mkwrite(pte_mkdirty(entry), vma);
-- 
cgit v1.2.3


From e29cf08b05dc0b8151d65704d96d525a9e179a6b Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Mon, 7 Oct 2013 11:29:22 +0100
Subject: sched/numa: Report a NUMA task group ID

It is desirable to model from userspace how the scheduler groups tasks
over time. This patch adds an ID to the numa_group and reports it via
/proc/PID/status.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1381141781-10992-45-git-send-email-mgorman@suse.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 fs/proc/array.c       | 2 ++
 include/linux/sched.h | 5 +++++
 kernel/sched/fair.c   | 7 +++++++
 3 files changed, 14 insertions(+)

(limited to 'include/linux')

diff --git a/fs/proc/array.c b/fs/proc/array.c
index cbd0f1b324b9..1bd2077187fd 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -183,6 +183,7 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
 	seq_printf(m,
 		"State:\t%s\n"
 		"Tgid:\t%d\n"
+		"Ngid:\t%d\n"
 		"Pid:\t%d\n"
 		"PPid:\t%d\n"
 		"TracerPid:\t%d\n"
@@ -190,6 +191,7 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
 		"Gid:\t%d\t%d\t%d\t%d\n",
 		get_task_state(p),
 		task_tgid_nr_ns(p, ns),
+		task_numa_group_id(p),
 		pid_nr_ns(pid, ns),
 		ppid, tpid,
 		from_kuid_munged(user_ns, cred->uid),
diff --git a/include/linux/sched.h b/include/linux/sched.h
index f587ded5c148..b0b343b1ba64 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1452,12 +1452,17 @@ struct task_struct {
 
 #ifdef CONFIG_NUMA_BALANCING
 extern void task_numa_fault(int last_node, int node, int pages, bool migrated);
+extern pid_t task_numa_group_id(struct task_struct *p);
 extern void set_numabalancing_state(bool enabled);
 #else
 static inline void task_numa_fault(int last_node, int node, int pages,
 				   bool migrated)
 {
 }
+static inline pid_t task_numa_group_id(struct task_struct *p)
+{
+	return 0;
+}
 static inline void set_numabalancing_state(bool enabled)
 {
 }
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 85565053a6ed..5bd309c035c7 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -893,12 +893,18 @@ struct numa_group {
 
 	spinlock_t lock; /* nr_tasks, tasks */
 	int nr_tasks;
+	pid_t gid;
 	struct list_head task_list;
 
 	struct rcu_head rcu;
 	atomic_long_t faults[0];
 };
 
+pid_t task_numa_group_id(struct task_struct *p)
+{
+	return p->numa_group ? p->numa_group->gid : 0;
+}
+
 static inline int task_faults_idx(int nid, int priv)
 {
 	return 2 * nid + priv;
@@ -1265,6 +1271,7 @@ static void task_numa_group(struct task_struct *p, int cpupid)
 		atomic_set(&grp->refcount, 1);
 		spin_lock_init(&grp->lock);
 		INIT_LIST_HEAD(&grp->task_list);
+		grp->gid = p->pid;
 
 		for (i = 0; i < 2*nr_node_ids; i++)
 			atomic_long_set(&grp->faults[i], p->numa_faults[i]);
-- 
cgit v1.2.3


From 6688cc05473b36a0a3d3971e1adf1712919b32eb Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 7 Oct 2013 11:29:24 +0100
Subject: mm: numa: Do not group on RO pages

And here's a little something to make sure not the whole world ends up
in a single group.

As while we don't migrate shared executable pages, we do scan/fault on
them. And since everybody links to libc, everybody ends up in the same
group.

Suggested-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Mel Gorman <mgorman@suse.de>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Link: http://lkml.kernel.org/r/1381141781-10992-47-git-send-email-mgorman@suse.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h |  7 +++++--
 kernel/sched/fair.c   |  5 +++--
 mm/huge_memory.c      | 15 +++++++++++++--
 mm/memory.c           | 30 ++++++++++++++++++++++++++----
 4 files changed, 47 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index b0b343b1ba64..ff543851a18a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1450,13 +1450,16 @@ struct task_struct {
 /* Future-safe accessor for struct task_struct's cpus_allowed. */
 #define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed)
 
+#define TNF_MIGRATED	0x01
+#define TNF_NO_GROUP	0x02
+
 #ifdef CONFIG_NUMA_BALANCING
-extern void task_numa_fault(int last_node, int node, int pages, bool migrated);
+extern void task_numa_fault(int last_node, int node, int pages, int flags);
 extern pid_t task_numa_group_id(struct task_struct *p);
 extern void set_numabalancing_state(bool enabled);
 #else
 static inline void task_numa_fault(int last_node, int node, int pages,
-				   bool migrated)
+				   int flags)
 {
 }
 static inline pid_t task_numa_group_id(struct task_struct *p)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 5bd309c035c7..35661b8afb4e 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1361,9 +1361,10 @@ void task_numa_free(struct task_struct *p)
 /*
  * Got a PROT_NONE fault for a page on @node.
  */
-void task_numa_fault(int last_cpupid, int node, int pages, bool migrated)
+void task_numa_fault(int last_cpupid, int node, int pages, int flags)
 {
 	struct task_struct *p = current;
+	bool migrated = flags & TNF_MIGRATED;
 	int priv;
 
 	if (!numabalancing_enabled)
@@ -1394,7 +1395,7 @@ void task_numa_fault(int last_cpupid, int node, int pages, bool migrated)
 		priv = 1;
 	} else {
 		priv = cpupid_match_pid(p, last_cpupid);
-		if (!priv)
+		if (!priv && !(flags & TNF_NO_GROUP))
 			task_numa_group(p, last_cpupid);
 	}
 
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index becf92ca54f3..7ab4e32afe12 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1285,6 +1285,7 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	int target_nid, last_cpupid = -1;
 	bool page_locked;
 	bool migrated = false;
+	int flags = 0;
 
 	spin_lock(&mm->page_table_lock);
 	if (unlikely(!pmd_same(pmd, *pmdp)))
@@ -1298,6 +1299,14 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	if (page_nid == this_nid)
 		count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL);
 
+	/*
+	 * Avoid grouping on DSO/COW pages in specific and RO pages
+	 * in general, RO pages shouldn't hurt as much anyway since
+	 * they can be in shared cache state.
+	 */
+	if (!pmd_write(pmd))
+		flags |= TNF_NO_GROUP;
+
 	/*
 	 * Acquire the page lock to serialise THP migrations but avoid dropping
 	 * page_table_lock if at all possible
@@ -1343,8 +1352,10 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	spin_unlock(&mm->page_table_lock);
 	migrated = migrate_misplaced_transhuge_page(mm, vma,
 				pmdp, pmd, addr, page, target_nid);
-	if (migrated)
+	if (migrated) {
+		flags |= TNF_MIGRATED;
 		page_nid = target_nid;
+	}
 
 	goto out;
 clear_pmdnuma:
@@ -1362,7 +1373,7 @@ out:
 		page_unlock_anon_vma_read(anon_vma);
 
 	if (page_nid != -1)
-		task_numa_fault(last_cpupid, page_nid, HPAGE_PMD_NR, migrated);
+		task_numa_fault(last_cpupid, page_nid, HPAGE_PMD_NR, flags);
 
 	return 0;
 }
diff --git a/mm/memory.c b/mm/memory.c
index c57efa25cdbb..eba846bcf124 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3547,6 +3547,7 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	int last_cpupid;
 	int target_nid;
 	bool migrated = false;
+	int flags = 0;
 
 	/*
 	* The "pte" at this point cannot be used safely without
@@ -3575,6 +3576,14 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	}
 	BUG_ON(is_zero_pfn(page_to_pfn(page)));
 
+	/*
+	 * Avoid grouping on DSO/COW pages in specific and RO pages
+	 * in general, RO pages shouldn't hurt as much anyway since
+	 * they can be in shared cache state.
+	 */
+	if (!pte_write(pte))
+		flags |= TNF_NO_GROUP;
+
 	last_cpupid = page_cpupid_last(page);
 	page_nid = page_to_nid(page);
 	target_nid = numa_migrate_prep(page, vma, addr, page_nid);
@@ -3586,12 +3595,14 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 
 	/* Migrate to the requested node */
 	migrated = migrate_misplaced_page(page, vma, target_nid);
-	if (migrated)
+	if (migrated) {
 		page_nid = target_nid;
+		flags |= TNF_MIGRATED;
+	}
 
 out:
 	if (page_nid != -1)
-		task_numa_fault(last_cpupid, page_nid, 1, migrated);
+		task_numa_fault(last_cpupid, page_nid, 1, flags);
 	return 0;
 }
 
@@ -3632,6 +3643,7 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		int page_nid = -1;
 		int target_nid;
 		bool migrated = false;
+		int flags = 0;
 
 		if (!pte_present(pteval))
 			continue;
@@ -3651,20 +3663,30 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		if (unlikely(!page))
 			continue;
 
+		/*
+		 * Avoid grouping on DSO/COW pages in specific and RO pages
+		 * in general, RO pages shouldn't hurt as much anyway since
+		 * they can be in shared cache state.
+		 */
+		if (!pte_write(pteval))
+			flags |= TNF_NO_GROUP;
+
 		last_cpupid = page_cpupid_last(page);
 		page_nid = page_to_nid(page);
 		target_nid = numa_migrate_prep(page, vma, addr, page_nid);
 		pte_unmap_unlock(pte, ptl);
 		if (target_nid != -1) {
 			migrated = migrate_misplaced_page(page, vma, target_nid);
-			if (migrated)
+			if (migrated) {
 				page_nid = target_nid;
+				flags |= TNF_MIGRATED;
+			}
 		} else {
 			put_page(page);
 		}
 
 		if (page_nid != -1)
-			task_numa_fault(last_cpupid, page_nid, 1, migrated);
+			task_numa_fault(last_cpupid, page_nid, 1, flags);
 
 		pte = pte_offset_map_lock(mm, pmdp, addr, &ptl);
 	}
-- 
cgit v1.2.3


From 5e1576ed0e54d419286a8096133029062b6ad456 Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@redhat.com>
Date: Mon, 7 Oct 2013 11:29:26 +0100
Subject: sched/numa: Stay on the same node if CLONE_VM

A newly spawned thread inside a process should stay on the same
NUMA node as its parent. This prevents processes from being "torn"
across multiple NUMA nodes every time they spawn a new thread.

Signed-off-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Mel Gorman <mgorman@suse.de>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1381141781-10992-49-git-send-email-mgorman@suse.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h |  2 +-
 kernel/fork.c         |  2 +-
 kernel/sched/core.c   | 14 +++++++++-----
 3 files changed, 11 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index ff543851a18a..8563e3dd5c0f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2021,7 +2021,7 @@ extern void wake_up_new_task(struct task_struct *tsk);
 #else
  static inline void kick_process(struct task_struct *tsk) { }
 #endif
-extern void sched_fork(struct task_struct *p);
+extern void sched_fork(unsigned long clone_flags, struct task_struct *p);
 extern void sched_dead(struct task_struct *p);
 
 extern void proc_caches_init(void);
diff --git a/kernel/fork.c b/kernel/fork.c
index 7192d91b5415..c93be06dee87 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1310,7 +1310,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 #endif
 
 	/* Perform scheduler related setup. Assign this task to a CPU. */
-	sched_fork(p);
+	sched_fork(clone_flags, p);
 
 	retval = perf_event_init_task(p);
 	if (retval)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 51092d5cc64c..3e2c893df173 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1696,7 +1696,7 @@ int wake_up_state(struct task_struct *p, unsigned int state)
  *
  * __sched_fork() is basic setup used by init_idle() too:
  */
-static void __sched_fork(struct task_struct *p)
+static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
 {
 	p->on_rq			= 0;
 
@@ -1725,11 +1725,15 @@ static void __sched_fork(struct task_struct *p)
 		p->mm->numa_scan_seq = 0;
 	}
 
+	if (clone_flags & CLONE_VM)
+		p->numa_preferred_nid = current->numa_preferred_nid;
+	else
+		p->numa_preferred_nid = -1;
+
 	p->node_stamp = 0ULL;
 	p->numa_scan_seq = p->mm ? p->mm->numa_scan_seq : 0;
 	p->numa_migrate_seq = 1;
 	p->numa_scan_period = sysctl_numa_balancing_scan_delay;
-	p->numa_preferred_nid = -1;
 	p->numa_work.next = &p->numa_work;
 	p->numa_faults = NULL;
 	p->numa_faults_buffer = NULL;
@@ -1761,12 +1765,12 @@ void set_numabalancing_state(bool enabled)
 /*
  * fork()/clone()-time setup:
  */
-void sched_fork(struct task_struct *p)
+void sched_fork(unsigned long clone_flags, struct task_struct *p)
 {
 	unsigned long flags;
 	int cpu = get_cpu();
 
-	__sched_fork(p);
+	__sched_fork(clone_flags, p);
 	/*
 	 * We mark the process as running here. This guarantees that
 	 * nobody will actually run it, and a signal or other external
@@ -4287,7 +4291,7 @@ void init_idle(struct task_struct *idle, int cpu)
 
 	raw_spin_lock_irqsave(&rq->lock, flags);
 
-	__sched_fork(idle);
+	__sched_fork(0, idle);
 	idle->state = TASK_RUNNING;
 	idle->se.exec_start = sched_clock();
 
-- 
cgit v1.2.3


From 83e1d2cd9eabec5164afea295ff06b941ae8e4a9 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Mon, 7 Oct 2013 11:29:27 +0100
Subject: sched/numa: Use group fault statistics in numa placement

This patch uses the fraction of faults on a particular node for both task
and group, to figure out the best node to place a task.  If the task and
group statistics disagree on what the preferred node should be then a full
rescan will select the node with the best combined weight.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1381141781-10992-50-git-send-email-mgorman@suse.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h |   1 +
 kernel/sched/fair.c   | 124 +++++++++++++++++++++++++++++++++++++++++++-------
 2 files changed, 108 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8563e3dd5c0f..724482200b83 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1356,6 +1356,7 @@ struct task_struct {
 	 * The values remain static for the duration of a PTE scan
 	 */
 	unsigned long *numa_faults;
+	unsigned long total_numa_faults;
 
 	/*
 	 * numa_faults_buffer records faults per node during the current
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 35661b8afb4e..4c40e13310e9 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -897,6 +897,7 @@ struct numa_group {
 	struct list_head task_list;
 
 	struct rcu_head rcu;
+	atomic_long_t total_faults;
 	atomic_long_t faults[0];
 };
 
@@ -919,6 +920,51 @@ static inline unsigned long task_faults(struct task_struct *p, int nid)
 		p->numa_faults[task_faults_idx(nid, 1)];
 }
 
+static inline unsigned long group_faults(struct task_struct *p, int nid)
+{
+	if (!p->numa_group)
+		return 0;
+
+	return atomic_long_read(&p->numa_group->faults[2*nid]) +
+	       atomic_long_read(&p->numa_group->faults[2*nid+1]);
+}
+
+/*
+ * These return the fraction of accesses done by a particular task, or
+ * task group, on a particular numa node.  The group weight is given a
+ * larger multiplier, in order to group tasks together that are almost
+ * evenly spread out between numa nodes.
+ */
+static inline unsigned long task_weight(struct task_struct *p, int nid)
+{
+	unsigned long total_faults;
+
+	if (!p->numa_faults)
+		return 0;
+
+	total_faults = p->total_numa_faults;
+
+	if (!total_faults)
+		return 0;
+
+	return 1000 * task_faults(p, nid) / total_faults;
+}
+
+static inline unsigned long group_weight(struct task_struct *p, int nid)
+{
+	unsigned long total_faults;
+
+	if (!p->numa_group)
+		return 0;
+
+	total_faults = atomic_long_read(&p->numa_group->total_faults);
+
+	if (!total_faults)
+		return 0;
+
+	return 1200 * group_faults(p, nid) / total_faults;
+}
+
 static unsigned long weighted_cpuload(const int cpu);
 static unsigned long source_load(int cpu, int type);
 static unsigned long target_load(int cpu, int type);
@@ -1018,8 +1064,10 @@ static void task_numa_compare(struct task_numa_env *env, long imp)
 		if (!cpumask_test_cpu(env->src_cpu, tsk_cpus_allowed(cur)))
 			goto unlock;
 
-		imp += task_faults(cur, env->src_nid) -
-		       task_faults(cur, env->dst_nid);
+		imp += task_weight(cur, env->src_nid) +
+		       group_weight(cur, env->src_nid) -
+		       task_weight(cur, env->dst_nid) -
+		       group_weight(cur, env->dst_nid);
 	}
 
 	if (imp < env->best_imp)
@@ -1098,7 +1146,7 @@ static int task_numa_migrate(struct task_struct *p)
 		.best_cpu = -1
 	};
 	struct sched_domain *sd;
-	unsigned long faults;
+	unsigned long weight;
 	int nid, ret;
 	long imp;
 
@@ -1115,10 +1163,10 @@ static int task_numa_migrate(struct task_struct *p)
 	env.imbalance_pct = 100 + (sd->imbalance_pct - 100) / 2;
 	rcu_read_unlock();
 
-	faults = task_faults(p, env.src_nid);
+	weight = task_weight(p, env.src_nid) + group_weight(p, env.src_nid);
 	update_numa_stats(&env.src_stats, env.src_nid);
 	env.dst_nid = p->numa_preferred_nid;
-	imp = task_faults(env.p, env.dst_nid) - faults;
+	imp = task_weight(p, env.dst_nid) + group_weight(p, env.dst_nid) - weight;
 	update_numa_stats(&env.dst_stats, env.dst_nid);
 
 	/* If the preferred nid has capacity, try to use it. */
@@ -1131,8 +1179,8 @@ static int task_numa_migrate(struct task_struct *p)
 			if (nid == env.src_nid || nid == p->numa_preferred_nid)
 				continue;
 
-			/* Only consider nodes that recorded more faults */
-			imp = task_faults(env.p, nid) - faults;
+			/* Only consider nodes where both task and groups benefit */
+			imp = task_weight(p, nid) + group_weight(p, nid) - weight;
 			if (imp < 0)
 				continue;
 
@@ -1183,8 +1231,8 @@ static void numa_migrate_preferred(struct task_struct *p)
 
 static void task_numa_placement(struct task_struct *p)
 {
-	int seq, nid, max_nid = -1;
-	unsigned long max_faults = 0;
+	int seq, nid, max_nid = -1, max_group_nid = -1;
+	unsigned long max_faults = 0, max_group_faults = 0;
 
 	seq = ACCESS_ONCE(p->mm->numa_scan_seq);
 	if (p->numa_scan_seq == seq)
@@ -1195,7 +1243,7 @@ static void task_numa_placement(struct task_struct *p)
 
 	/* Find the node with the highest number of faults */
 	for_each_online_node(nid) {
-		unsigned long faults = 0;
+		unsigned long faults = 0, group_faults = 0;
 		int priv, i;
 
 		for (priv = 0; priv < 2; priv++) {
@@ -1211,9 +1259,12 @@ static void task_numa_placement(struct task_struct *p)
 
 			faults += p->numa_faults[i];
 			diff += p->numa_faults[i];
+			p->total_numa_faults += diff;
 			if (p->numa_group) {
 				/* safe because we can only change our own group */
 				atomic_long_add(diff, &p->numa_group->faults[i]);
+				atomic_long_add(diff, &p->numa_group->total_faults);
+				group_faults += atomic_long_read(&p->numa_group->faults[i]);
 			}
 		}
 
@@ -1221,6 +1272,27 @@ static void task_numa_placement(struct task_struct *p)
 			max_faults = faults;
 			max_nid = nid;
 		}
+
+		if (group_faults > max_group_faults) {
+			max_group_faults = group_faults;
+			max_group_nid = nid;
+		}
+	}
+
+	/*
+	 * If the preferred task and group nids are different,
+	 * iterate over the nodes again to find the best place.
+	 */
+	if (p->numa_group && max_nid != max_group_nid) {
+		unsigned long weight, max_weight = 0;
+
+		for_each_online_node(nid) {
+			weight = task_weight(p, nid) + group_weight(p, nid);
+			if (weight > max_weight) {
+				max_weight = weight;
+				max_nid = nid;
+			}
+		}
 	}
 
 	/* Preferred node as the node with the most faults */
@@ -1276,6 +1348,8 @@ static void task_numa_group(struct task_struct *p, int cpupid)
 		for (i = 0; i < 2*nr_node_ids; i++)
 			atomic_long_set(&grp->faults[i], p->numa_faults[i]);
 
+		atomic_long_set(&grp->total_faults, p->total_numa_faults);
+
 		list_add(&p->numa_entry, &grp->task_list);
 		grp->nr_tasks++;
 		rcu_assign_pointer(p->numa_group, grp);
@@ -1323,6 +1397,8 @@ unlock:
 		atomic_long_sub(p->numa_faults[i], &my_grp->faults[i]);
 		atomic_long_add(p->numa_faults[i], &grp->faults[i]);
 	}
+	atomic_long_sub(p->total_numa_faults, &my_grp->total_faults);
+	atomic_long_add(p->total_numa_faults, &grp->total_faults);
 
 	double_lock(&my_grp->lock, &grp->lock);
 
@@ -1347,6 +1423,8 @@ void task_numa_free(struct task_struct *p)
 		for (i = 0; i < 2*nr_node_ids; i++)
 			atomic_long_sub(p->numa_faults[i], &grp->faults[i]);
 
+		atomic_long_sub(p->total_numa_faults, &grp->total_faults);
+
 		spin_lock(&grp->lock);
 		list_del(&p->numa_entry);
 		grp->nr_tasks--;
@@ -1385,6 +1463,7 @@ void task_numa_fault(int last_cpupid, int node, int pages, int flags)
 
 		BUG_ON(p->numa_faults_buffer);
 		p->numa_faults_buffer = p->numa_faults + (2 * nr_node_ids);
+		p->total_numa_faults = 0;
 	}
 
 	/*
@@ -4572,12 +4651,17 @@ static bool migrate_improves_locality(struct task_struct *p, struct lb_env *env)
 	src_nid = cpu_to_node(env->src_cpu);
 	dst_nid = cpu_to_node(env->dst_cpu);
 
-	if (src_nid == dst_nid ||
-	    p->numa_migrate_seq >= sysctl_numa_balancing_settle_count)
+	if (src_nid == dst_nid)
 		return false;
 
-	if (dst_nid == p->numa_preferred_nid ||
-	    task_faults(p, dst_nid) > task_faults(p, src_nid))
+	/* Always encourage migration to the preferred node. */
+	if (dst_nid == p->numa_preferred_nid)
+		return true;
+
+	/* After the task has settled, check if the new node is better. */
+	if (p->numa_migrate_seq >= sysctl_numa_balancing_settle_count &&
+			task_weight(p, dst_nid) + group_weight(p, dst_nid) >
+			task_weight(p, src_nid) + group_weight(p, src_nid))
 		return true;
 
 	return false;
@@ -4597,11 +4681,17 @@ static bool migrate_degrades_locality(struct task_struct *p, struct lb_env *env)
 	src_nid = cpu_to_node(env->src_cpu);
 	dst_nid = cpu_to_node(env->dst_cpu);
 
-	if (src_nid == dst_nid ||
-	    p->numa_migrate_seq >= sysctl_numa_balancing_settle_count)
+	if (src_nid == dst_nid)
 		return false;
 
-	if (task_faults(p, dst_nid) < task_faults(p, src_nid))
+	/* Migrating away from the preferred node is always bad. */
+	if (src_nid == p->numa_preferred_nid)
+		return true;
+
+	/* After the task has settled, check if the new node is worse. */
+	if (p->numa_migrate_seq >= sysctl_numa_balancing_settle_count &&
+			task_weight(p, dst_nid) + group_weight(p, dst_nid) <
+			task_weight(p, src_nid) + group_weight(p, src_nid))
 		return true;
 
 	return false;
-- 
cgit v1.2.3


From 82727018b0d33d188e9916bcf76f18387484cb04 Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@redhat.com>
Date: Mon, 7 Oct 2013 11:29:28 +0100
Subject: sched/numa: Call task_numa_free() from do_execve()

It is possible for a task in a numa group to call exec, and
have the new (unrelated) executable inherit the numa group
association from its former self.

This has the potential to break numa grouping, and is trivial
to fix.

Signed-off-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Mel Gorman <mgorman@suse.de>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1381141781-10992-51-git-send-email-mgorman@suse.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 fs/exec.c             | 1 +
 include/linux/sched.h | 4 ++++
 kernel/sched/fair.c   | 9 ++++++++-
 kernel/sched/sched.h  | 5 -----
 4 files changed, 13 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/fs/exec.c b/fs/exec.c
index 8875dd10ae7a..2ea437e5acf4 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1547,6 +1547,7 @@ static int do_execve_common(const char *filename,
 	current->fs->in_exec = 0;
 	current->in_execve = 0;
 	acct_update_integrals(current);
+	task_numa_free(current);
 	free_bprm(bprm);
 	if (displaced)
 		put_files_struct(displaced);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 724482200b83..f6385107c352 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1458,6 +1458,7 @@ struct task_struct {
 extern void task_numa_fault(int last_node, int node, int pages, int flags);
 extern pid_t task_numa_group_id(struct task_struct *p);
 extern void set_numabalancing_state(bool enabled);
+extern void task_numa_free(struct task_struct *p);
 #else
 static inline void task_numa_fault(int last_node, int node, int pages,
 				   int flags)
@@ -1470,6 +1471,9 @@ static inline pid_t task_numa_group_id(struct task_struct *p)
 static inline void set_numabalancing_state(bool enabled)
 {
 }
+static inline void task_numa_free(struct task_struct *p)
+{
+}
 #endif
 
 static inline struct pid *task_pid(struct task_struct *task)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 4c40e13310e9..c4df2de6ca4a 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1418,6 +1418,7 @@ void task_numa_free(struct task_struct *p)
 {
 	struct numa_group *grp = p->numa_group;
 	int i;
+	void *numa_faults = p->numa_faults;
 
 	if (grp) {
 		for (i = 0; i < 2*nr_node_ids; i++)
@@ -1433,7 +1434,9 @@ void task_numa_free(struct task_struct *p)
 		put_numa_group(grp);
 	}
 
-	kfree(p->numa_faults);
+	p->numa_faults = NULL;
+	p->numa_faults_buffer = NULL;
+	kfree(numa_faults);
 }
 
 /*
@@ -1452,6 +1455,10 @@ void task_numa_fault(int last_cpupid, int node, int pages, int flags)
 	if (!p->mm)
 		return;
 
+	/* Do not worry about placement if exiting */
+	if (p->state == TASK_DEAD)
+		return;
+
 	/* Allocate buffer to track faults on a per-node basis */
 	if (unlikely(!p->numa_faults)) {
 		int size = sizeof(*p->numa_faults) * 2 * nr_node_ids;
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 8037b10a256f..eeb1923812a1 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -559,11 +559,6 @@ static inline u64 rq_clock_task(struct rq *rq)
 #ifdef CONFIG_NUMA_BALANCING
 extern int migrate_task_to(struct task_struct *p, int cpu);
 extern int migrate_swap(struct task_struct *, struct task_struct *);
-extern void task_numa_free(struct task_struct *p);
-#else /* CONFIG_NUMA_BALANCING */
-static inline void task_numa_free(struct task_struct *p)
-{
-}
 #endif /* CONFIG_NUMA_BALANCING */
 
 #ifdef CONFIG_SMP
-- 
cgit v1.2.3


From b32e86b4301e345611f0446265f782a229faadf6 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Mon, 7 Oct 2013 11:29:30 +0100
Subject: sched/numa: Add debugging

Signed-off-by: Ingo Molnar <mingo@kernel.org>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: http://lkml.kernel.org/r/1381141781-10992-53-git-send-email-mgorman@suse.de
---
 include/linux/sched.h |  6 ++++++
 kernel/sched/debug.c  | 60 +++++++++++++++++++++++++++++++++++++++++++++++++--
 kernel/sched/fair.c   |  5 ++++-
 3 files changed, 68 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index f6385107c352..1127a46ac3d2 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1366,6 +1366,7 @@ struct task_struct {
 	unsigned long *numa_faults_buffer;
 
 	int numa_preferred_nid;
+	unsigned long numa_pages_migrated;
 #endif /* CONFIG_NUMA_BALANCING */
 
 	struct rcu_head rcu;
@@ -2661,6 +2662,11 @@ static inline unsigned int task_cpu(const struct task_struct *p)
 	return task_thread_info(p)->cpu;
 }
 
+static inline int task_node(const struct task_struct *p)
+{
+	return cpu_to_node(task_cpu(p));
+}
+
 extern void set_task_cpu(struct task_struct *p, unsigned int cpu);
 
 #else
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 196559994f7c..e6ba5e31c7ca 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -15,6 +15,7 @@
 #include <linux/seq_file.h>
 #include <linux/kallsyms.h>
 #include <linux/utsname.h>
+#include <linux/mempolicy.h>
 
 #include "sched.h"
 
@@ -137,6 +138,9 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
 	SEQ_printf(m, "%15Ld %15Ld %15Ld.%06ld %15Ld.%06ld %15Ld.%06ld",
 		0LL, 0LL, 0LL, 0L, 0LL, 0L, 0LL, 0L);
 #endif
+#ifdef CONFIG_NUMA_BALANCING
+	SEQ_printf(m, " %d", cpu_to_node(task_cpu(p)));
+#endif
 #ifdef CONFIG_CGROUP_SCHED
 	SEQ_printf(m, " %s", task_group_path(task_group(p)));
 #endif
@@ -159,7 +163,7 @@ static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu)
 	read_lock_irqsave(&tasklist_lock, flags);
 
 	do_each_thread(g, p) {
-		if (!p->on_rq || task_cpu(p) != rq_cpu)
+		if (task_cpu(p) != rq_cpu)
 			continue;
 
 		print_task(m, rq, p);
@@ -345,7 +349,7 @@ static void sched_debug_header(struct seq_file *m)
 	cpu_clk = local_clock();
 	local_irq_restore(flags);
 
-	SEQ_printf(m, "Sched Debug Version: v0.10, %s %.*s\n",
+	SEQ_printf(m, "Sched Debug Version: v0.11, %s %.*s\n",
 		init_utsname()->release,
 		(int)strcspn(init_utsname()->version, " "),
 		init_utsname()->version);
@@ -488,6 +492,56 @@ static int __init init_sched_debug_procfs(void)
 
 __initcall(init_sched_debug_procfs);
 
+#define __P(F) \
+	SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)F)
+#define P(F) \
+	SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)p->F)
+#define __PN(F) \
+	SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)F))
+#define PN(F) \
+	SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)p->F))
+
+
+static void sched_show_numa(struct task_struct *p, struct seq_file *m)
+{
+#ifdef CONFIG_NUMA_BALANCING
+	struct mempolicy *pol;
+	int node, i;
+
+	if (p->mm)
+		P(mm->numa_scan_seq);
+
+	task_lock(p);
+	pol = p->mempolicy;
+	if (pol && !(pol->flags & MPOL_F_MORON))
+		pol = NULL;
+	mpol_get(pol);
+	task_unlock(p);
+
+	SEQ_printf(m, "numa_migrations, %ld\n", xchg(&p->numa_pages_migrated, 0));
+
+	for_each_online_node(node) {
+		for (i = 0; i < 2; i++) {
+			unsigned long nr_faults = -1;
+			int cpu_current, home_node;
+
+			if (p->numa_faults)
+				nr_faults = p->numa_faults[2*node + i];
+
+			cpu_current = !i ? (task_node(p) == node) :
+				(pol && node_isset(node, pol->v.nodes));
+
+			home_node = (p->numa_preferred_nid == node);
+
+			SEQ_printf(m, "numa_faults, %d, %d, %d, %d, %ld\n",
+				i, node, cpu_current, home_node, nr_faults);
+		}
+	}
+
+	mpol_put(pol);
+#endif
+}
+
 void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
 {
 	unsigned long nr_switches;
@@ -591,6 +645,8 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
 		SEQ_printf(m, "%-45s:%21Ld\n",
 			   "clock-delta", (long long)(t1-t0));
 	}
+
+	sched_show_numa(p, m);
 }
 
 void proc_sched_set_task(struct task_struct *p)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 147349987bfe..2876a37cdfc4 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1137,7 +1137,7 @@ static int task_numa_migrate(struct task_struct *p)
 		.p = p,
 
 		.src_cpu = task_cpu(p),
-		.src_nid = cpu_to_node(task_cpu(p)),
+		.src_nid = task_node(p),
 
 		.imbalance_pct = 112,
 
@@ -1515,6 +1515,9 @@ void task_numa_fault(int last_cpupid, int node, int pages, int flags)
 	if (p->numa_migrate_retry && time_after(jiffies, p->numa_migrate_retry))
 		numa_migrate_preferred(p);
 
+	if (migrated)
+		p->numa_pages_migrated += pages;
+
 	p->numa_faults_buffer[task_faults_idx(node, priv)] += pages;
 }
 
-- 
cgit v1.2.3


From dabe1d992414a6456e60e41f1d1ad8affc6d444d Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@redhat.com>
Date: Mon, 7 Oct 2013 11:29:34 +0100
Subject: sched/numa: Be more careful about joining numa groups

Due to the way the pid is truncated, and tasks are moved between
CPUs by the scheduler, it is possible for the current task_numa_fault
to group together tasks that do not actually share memory together.

This patch adds a few easy sanity checks to task_numa_fault, joining
tasks together if they share the same tsk->mm, or if the fault was on
a page with an elevated mapcount, in a shared VMA.

Signed-off-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Mel Gorman <mgorman@suse.de>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1381141781-10992-57-git-send-email-mgorman@suse.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h |  1 +
 kernel/sched/fair.c   | 16 +++++++++++-----
 mm/memory.c           |  7 +++++++
 3 files changed, 19 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1127a46ac3d2..59f953b2e413 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1454,6 +1454,7 @@ struct task_struct {
 
 #define TNF_MIGRATED	0x01
 #define TNF_NO_GROUP	0x02
+#define TNF_SHARED	0x04
 
 #ifdef CONFIG_NUMA_BALANCING
 extern void task_numa_fault(int last_node, int node, int pages, int flags);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 5166b9b1af70..222c2d0b6ae2 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1381,7 +1381,7 @@ static void double_lock(spinlock_t *l1, spinlock_t *l2)
 	spin_lock_nested(l2, SINGLE_DEPTH_NESTING);
 }
 
-static void task_numa_group(struct task_struct *p, int cpupid)
+static void task_numa_group(struct task_struct *p, int cpupid, int flags)
 {
 	struct numa_group *grp, *my_grp;
 	struct task_struct *tsk;
@@ -1439,10 +1439,16 @@ static void task_numa_group(struct task_struct *p, int cpupid)
 	if (my_grp->nr_tasks == grp->nr_tasks && my_grp > grp)
 		goto unlock;
 
-	if (!get_numa_group(grp))
-		goto unlock;
+	/* Always join threads in the same process. */
+	if (tsk->mm == current->mm)
+		join = true;
+
+	/* Simple filter to avoid false positives due to PID collisions */
+	if (flags & TNF_SHARED)
+		join = true;
 
-	join = true;
+	if (join && !get_numa_group(grp))
+		join = false;
 
 unlock:
 	rcu_read_unlock();
@@ -1539,7 +1545,7 @@ void task_numa_fault(int last_cpupid, int node, int pages, int flags)
 	} else {
 		priv = cpupid_match_pid(p, last_cpupid);
 		if (!priv && !(flags & TNF_NO_GROUP))
-			task_numa_group(p, last_cpupid);
+			task_numa_group(p, last_cpupid, flags);
 	}
 
 	/*
diff --git a/mm/memory.c b/mm/memory.c
index 9898eeb9a21c..823720c43ea9 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3584,6 +3584,13 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	if (!pte_write(pte))
 		flags |= TNF_NO_GROUP;
 
+	/*
+	 * Flag if the page is shared between multiple address spaces. This
+	 * is later used when determining whether to group tasks together
+	 */
+	if (page_mapcount(page) > 1 && (vma->vm_flags & VM_SHARED))
+		flags |= TNF_SHARED;
+
 	last_cpupid = page_cpupid_last(page);
 	page_nid = page_to_nid(page);
 	target_nid = numa_migrate_prep(page, vma, addr, page_nid);
-- 
cgit v1.2.3


From 04bb2f9475054298f0c67a89ca92cade42d3fe5e Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@redhat.com>
Date: Mon, 7 Oct 2013 11:29:36 +0100
Subject: sched/numa: Adjust scan rate in task_numa_placement

Adjust numa_scan_period in task_numa_placement, depending on how much
useful work the numa code can do. The more local faults there are in a
given scan window the longer the period (and hence the slower the scan rate)
during the next window. If there are excessive shared faults then the scan
period will decrease with the amount of scaling depending on whether the
ratio of shared/private faults. If the preferred node changes then the
scan rate is reset to recheck if the task is properly placed.

Signed-off-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Mel Gorman <mgorman@suse.de>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1381141781-10992-59-git-send-email-mgorman@suse.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h |   9 ++++
 kernel/sched/fair.c   | 112 +++++++++++++++++++++++++++++++++++++++-----------
 mm/huge_memory.c      |   4 +-
 mm/memory.c           |   9 ++--
 4 files changed, 105 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 59f953b2e413..2292f6c1596f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1365,6 +1365,14 @@ struct task_struct {
 	 */
 	unsigned long *numa_faults_buffer;
 
+	/*
+	 * numa_faults_locality tracks if faults recorded during the last
+	 * scan window were remote/local. The task scan period is adapted
+	 * based on the locality of the faults with different weights
+	 * depending on whether they were shared or private faults
+	 */
+	unsigned long numa_faults_locality[2];
+
 	int numa_preferred_nid;
 	unsigned long numa_pages_migrated;
 #endif /* CONFIG_NUMA_BALANCING */
@@ -1455,6 +1463,7 @@ struct task_struct {
 #define TNF_MIGRATED	0x01
 #define TNF_NO_GROUP	0x02
 #define TNF_SHARED	0x04
+#define TNF_FAULT_LOCAL	0x08
 
 #ifdef CONFIG_NUMA_BALANCING
 extern void task_numa_fault(int last_node, int node, int pages, int flags);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index d26a16e45437..66237ff8b01e 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1241,6 +1241,12 @@ static int task_numa_migrate(struct task_struct *p)
 
 	sched_setnuma(p, env.dst_nid);
 
+	/*
+	 * Reset the scan period if the task is being rescheduled on an
+	 * alternative node to recheck if the tasks is now properly placed.
+	 */
+	p->numa_scan_period = task_scan_min(p);
+
 	if (env.best_task == NULL) {
 		int ret = migrate_task_to(p, env.best_cpu);
 		return ret;
@@ -1276,10 +1282,86 @@ static void numa_migrate_preferred(struct task_struct *p)
 		p->numa_migrate_retry = jiffies + HZ*5;
 }
 
+/*
+ * When adapting the scan rate, the period is divided into NUMA_PERIOD_SLOTS
+ * increments. The more local the fault statistics are, the higher the scan
+ * period will be for the next scan window. If local/remote ratio is below
+ * NUMA_PERIOD_THRESHOLD (where range of ratio is 1..NUMA_PERIOD_SLOTS) the
+ * scan period will decrease
+ */
+#define NUMA_PERIOD_SLOTS 10
+#define NUMA_PERIOD_THRESHOLD 3
+
+/*
+ * Increase the scan period (slow down scanning) if the majority of
+ * our memory is already on our local node, or if the majority of
+ * the page accesses are shared with other processes.
+ * Otherwise, decrease the scan period.
+ */
+static void update_task_scan_period(struct task_struct *p,
+			unsigned long shared, unsigned long private)
+{
+	unsigned int period_slot;
+	int ratio;
+	int diff;
+
+	unsigned long remote = p->numa_faults_locality[0];
+	unsigned long local = p->numa_faults_locality[1];
+
+	/*
+	 * If there were no record hinting faults then either the task is
+	 * completely idle or all activity is areas that are not of interest
+	 * to automatic numa balancing. Scan slower
+	 */
+	if (local + shared == 0) {
+		p->numa_scan_period = min(p->numa_scan_period_max,
+			p->numa_scan_period << 1);
+
+		p->mm->numa_next_scan = jiffies +
+			msecs_to_jiffies(p->numa_scan_period);
+
+		return;
+	}
+
+	/*
+	 * Prepare to scale scan period relative to the current period.
+	 *	 == NUMA_PERIOD_THRESHOLD scan period stays the same
+	 *       <  NUMA_PERIOD_THRESHOLD scan period decreases (scan faster)
+	 *	 >= NUMA_PERIOD_THRESHOLD scan period increases (scan slower)
+	 */
+	period_slot = DIV_ROUND_UP(p->numa_scan_period, NUMA_PERIOD_SLOTS);
+	ratio = (local * NUMA_PERIOD_SLOTS) / (local + remote);
+	if (ratio >= NUMA_PERIOD_THRESHOLD) {
+		int slot = ratio - NUMA_PERIOD_THRESHOLD;
+		if (!slot)
+			slot = 1;
+		diff = slot * period_slot;
+	} else {
+		diff = -(NUMA_PERIOD_THRESHOLD - ratio) * period_slot;
+
+		/*
+		 * Scale scan rate increases based on sharing. There is an
+		 * inverse relationship between the degree of sharing and
+		 * the adjustment made to the scanning period. Broadly
+		 * speaking the intent is that there is little point
+		 * scanning faster if shared accesses dominate as it may
+		 * simply bounce migrations uselessly
+		 */
+		period_slot = DIV_ROUND_UP(diff, NUMA_PERIOD_SLOTS);
+		ratio = DIV_ROUND_UP(private * NUMA_PERIOD_SLOTS, (private + shared));
+		diff = (diff * ratio) / NUMA_PERIOD_SLOTS;
+	}
+
+	p->numa_scan_period = clamp(p->numa_scan_period + diff,
+			task_scan_min(p), task_scan_max(p));
+	memset(p->numa_faults_locality, 0, sizeof(p->numa_faults_locality));
+}
+
 static void task_numa_placement(struct task_struct *p)
 {
 	int seq, nid, max_nid = -1, max_group_nid = -1;
 	unsigned long max_faults = 0, max_group_faults = 0;
+	unsigned long fault_types[2] = { 0, 0 };
 	spinlock_t *group_lock = NULL;
 
 	seq = ACCESS_ONCE(p->mm->numa_scan_seq);
@@ -1309,6 +1391,7 @@ static void task_numa_placement(struct task_struct *p)
 			/* Decay existing window, copy faults since last scan */
 			p->numa_faults[i] >>= 1;
 			p->numa_faults[i] += p->numa_faults_buffer[i];
+			fault_types[priv] += p->numa_faults_buffer[i];
 			p->numa_faults_buffer[i] = 0;
 
 			faults += p->numa_faults[i];
@@ -1333,6 +1416,8 @@ static void task_numa_placement(struct task_struct *p)
 		}
 	}
 
+	update_task_scan_period(p, fault_types[0], fault_types[1]);
+
 	if (p->numa_group) {
 		/*
 		 * If the preferred task and group nids are different,
@@ -1538,6 +1623,7 @@ void task_numa_fault(int last_cpupid, int node, int pages, int flags)
 		BUG_ON(p->numa_faults_buffer);
 		p->numa_faults_buffer = p->numa_faults + (2 * nr_node_ids);
 		p->total_numa_faults = 0;
+		memset(p->numa_faults_locality, 0, sizeof(p->numa_faults_locality));
 	}
 
 	/*
@@ -1552,19 +1638,6 @@ void task_numa_fault(int last_cpupid, int node, int pages, int flags)
 			task_numa_group(p, last_cpupid, flags, &priv);
 	}
 
-	/*
-	 * If pages are properly placed (did not migrate) then scan slower.
-	 * This is reset periodically in case of phase changes
-	 */
-	if (!migrated) {
-		/* Initialise if necessary */
-		if (!p->numa_scan_period_max)
-			p->numa_scan_period_max = task_scan_max(p);
-
-		p->numa_scan_period = min(p->numa_scan_period_max,
-			p->numa_scan_period + 10);
-	}
-
 	task_numa_placement(p);
 
 	/* Retry task to preferred node migration if it previously failed */
@@ -1575,6 +1648,7 @@ void task_numa_fault(int last_cpupid, int node, int pages, int flags)
 		p->numa_pages_migrated += pages;
 
 	p->numa_faults_buffer[task_faults_idx(node, priv)] += pages;
+	p->numa_faults_locality[!!(flags & TNF_FAULT_LOCAL)] += pages;
 }
 
 static void reset_ptenuma_scan(struct task_struct *p)
@@ -1701,18 +1775,6 @@ void task_numa_work(struct callback_head *work)
 	}
 
 out:
-	/*
-	 * If the whole process was scanned without updates then no NUMA
-	 * hinting faults are being recorded and scan rate should be lower.
-	 */
-	if (mm->numa_scan_offset == 0 && !nr_pte_updates) {
-		p->numa_scan_period = min(p->numa_scan_period_max,
-			p->numa_scan_period << 1);
-
-		next_scan = now + msecs_to_jiffies(p->numa_scan_period);
-		mm->numa_next_scan = next_scan;
-	}
-
 	/*
 	 * It is possible to reach the end of the VMA list but the last few
 	 * VMAs are not guaranteed to the vma_migratable. If they are not, we
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 7ab4e32afe12..1be2a1f95b61 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1296,8 +1296,10 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	page_nid = page_to_nid(page);
 	last_cpupid = page_cpupid_last(page);
 	count_vm_numa_event(NUMA_HINT_FAULTS);
-	if (page_nid == this_nid)
+	if (page_nid == this_nid) {
 		count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL);
+		flags |= TNF_FAULT_LOCAL;
+	}
 
 	/*
 	 * Avoid grouping on DSO/COW pages in specific and RO pages
diff --git a/mm/memory.c b/mm/memory.c
index 823720c43ea9..1c7501f7fb1a 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3527,13 +3527,16 @@ static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 }
 
 int numa_migrate_prep(struct page *page, struct vm_area_struct *vma,
-				unsigned long addr, int page_nid)
+				unsigned long addr, int page_nid,
+				int *flags)
 {
 	get_page(page);
 
 	count_vm_numa_event(NUMA_HINT_FAULTS);
-	if (page_nid == numa_node_id())
+	if (page_nid == numa_node_id()) {
 		count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL);
+		*flags |= TNF_FAULT_LOCAL;
+	}
 
 	return mpol_misplaced(page, vma, addr);
 }
@@ -3593,7 +3596,7 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 
 	last_cpupid = page_cpupid_last(page);
 	page_nid = page_to_nid(page);
-	target_nid = numa_migrate_prep(page, vma, addr, page_nid);
+	target_nid = numa_migrate_prep(page, vma, addr, page_nid, &flags);
 	pte_unmap_unlock(ptep, ptl);
 	if (target_nid == -1) {
 		put_page(page);
-- 
cgit v1.2.3


From 930aa174fcc8b0efaad102fd80f677b92f35eaa2 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Mon, 7 Oct 2013 11:29:37 +0100
Subject: sched/numa: Remove the numa_balancing_scan_period_reset sysctl

With scan rate adaptions based on whether the workload has properly
converged or not there should be no need for the scan period reset
hammer. Get rid of it.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1381141781-10992-60-git-send-email-mgorman@suse.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 Documentation/sysctl/kernel.txt | 11 +++--------
 include/linux/mm_types.h        |  3 ---
 include/linux/sched/sysctl.h    |  1 -
 kernel/sched/core.c             |  1 -
 kernel/sched/fair.c             | 18 +-----------------
 kernel/sysctl.c                 |  7 -------
 6 files changed, 4 insertions(+), 37 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index d48bca45b6f2..84f17800f8b5 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -374,15 +374,13 @@ guarantee. If the target workload is already bound to NUMA nodes then this
 feature should be disabled. Otherwise, if the system overhead from the
 feature is too high then the rate the kernel samples for NUMA hinting
 faults may be controlled by the numa_balancing_scan_period_min_ms,
-numa_balancing_scan_delay_ms, numa_balancing_scan_period_reset,
-numa_balancing_scan_period_max_ms, numa_balancing_scan_size_mb and
-numa_balancing_settle_count sysctls.
+numa_balancing_scan_delay_ms, numa_balancing_scan_period_max_ms,
+numa_balancing_scan_size_mb and numa_balancing_settle_count sysctls.
 
 ==============================================================
 
 numa_balancing_scan_period_min_ms, numa_balancing_scan_delay_ms,
-numa_balancing_scan_period_max_ms, numa_balancing_scan_period_reset,
-numa_balancing_scan_size_mb
+numa_balancing_scan_period_max_ms, numa_balancing_scan_size_mb
 
 Automatic NUMA balancing scans tasks address space and unmaps pages to
 detect if pages are properly placed or if the data should be migrated to a
@@ -418,9 +416,6 @@ rate for each task.
 numa_balancing_scan_size_mb is how many megabytes worth of pages are
 scanned for a given scan.
 
-numa_balancing_scan_period_reset is a blunt instrument that controls how
-often a tasks scan delay is reset to detect sudden changes in task behaviour.
-
 numa_balancing_settle_count is how many scan periods must complete before
 the schedule balancer stops pushing the task towards a preferred node. This
 gives the scheduler a chance to place the task on an alternative node if the
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index a30f9ca66557..a3198e5aaf4e 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -420,9 +420,6 @@ struct mm_struct {
 	 */
 	unsigned long numa_next_scan;
 
-	/* numa_next_reset is when the PTE scanner period will be reset */
-	unsigned long numa_next_reset;
-
 	/* Restart point for scanning and setting pte_numa */
 	unsigned long numa_scan_offset;
 
diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index bf8086b2506e..10d16c4fbe89 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -47,7 +47,6 @@ extern enum sched_tunable_scaling sysctl_sched_tunable_scaling;
 extern unsigned int sysctl_numa_balancing_scan_delay;
 extern unsigned int sysctl_numa_balancing_scan_period_min;
 extern unsigned int sysctl_numa_balancing_scan_period_max;
-extern unsigned int sysctl_numa_balancing_scan_period_reset;
 extern unsigned int sysctl_numa_balancing_scan_size;
 extern unsigned int sysctl_numa_balancing_settle_count;
 
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 8cfd51f62241..89c5ae836f66 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1721,7 +1721,6 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
 #ifdef CONFIG_NUMA_BALANCING
 	if (p->mm && atomic_read(&p->mm->mm_users) == 1) {
 		p->mm->numa_next_scan = jiffies + msecs_to_jiffies(sysctl_numa_balancing_scan_delay);
-		p->mm->numa_next_reset = jiffies + msecs_to_jiffies(sysctl_numa_balancing_scan_period_reset);
 		p->mm->numa_scan_seq = 0;
 	}
 
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 66237ff8b01e..da6fa22be000 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -826,7 +826,6 @@ update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
  */
 unsigned int sysctl_numa_balancing_scan_period_min = 1000;
 unsigned int sysctl_numa_balancing_scan_period_max = 60000;
-unsigned int sysctl_numa_balancing_scan_period_reset = 60000;
 
 /* Portion of address space to scan in MB */
 unsigned int sysctl_numa_balancing_scan_size = 256;
@@ -1685,24 +1684,9 @@ void task_numa_work(struct callback_head *work)
 	if (p->flags & PF_EXITING)
 		return;
 
-	if (!mm->numa_next_reset || !mm->numa_next_scan) {
+	if (!mm->numa_next_scan) {
 		mm->numa_next_scan = now +
 			msecs_to_jiffies(sysctl_numa_balancing_scan_delay);
-		mm->numa_next_reset = now +
-			msecs_to_jiffies(sysctl_numa_balancing_scan_period_reset);
-	}
-
-	/*
-	 * Reset the scan period if enough time has gone by. Objective is that
-	 * scanning will be reduced if pages are properly placed. As tasks
-	 * can enter different phases this needs to be re-examined. Lacking
-	 * proper tracking of reference behaviour, this blunt hammer is used.
-	 */
-	migrate = mm->numa_next_reset;
-	if (time_after(now, migrate)) {
-		p->numa_scan_period = task_scan_min(p);
-		next_scan = now + msecs_to_jiffies(sysctl_numa_balancing_scan_period_reset);
-		xchg(&mm->numa_next_reset, next_scan);
 	}
 
 	/*
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 42f616a74f40..e509b90a8002 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -370,13 +370,6 @@ static struct ctl_table kern_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
-	{
-		.procname	= "numa_balancing_scan_period_reset",
-		.data		= &sysctl_numa_balancing_scan_period_reset,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
 	{
 		.procname	= "numa_balancing_scan_period_max_ms",
 		.data		= &sysctl_numa_balancing_scan_period_max,
-- 
cgit v1.2.3


From 1e3646ffc64b232cb14a5ef01d7b98997c1b73f9 Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@redhat.com>
Date: Mon, 7 Oct 2013 11:29:38 +0100
Subject: mm: numa: Revert temporarily disabling of NUMA migration

With the scan rate code working (at least for multi-instance specjbb),
the large hammer that is "sched: Do not migrate memory immediately after
switching node" can be replaced with something smarter. Revert temporarily
migration disabling and all traces of numa_migrate_seq.

Signed-off-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Mel Gorman <mgorman@suse.de>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1381141781-10992-61-git-send-email-mgorman@suse.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h |  1 -
 kernel/sched/core.c   |  2 --
 kernel/sched/fair.c   | 25 +------------------------
 mm/mempolicy.c        | 12 ------------
 4 files changed, 1 insertion(+), 39 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 2292f6c1596f..d24f70ffddee 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1340,7 +1340,6 @@ struct task_struct {
 #endif
 #ifdef CONFIG_NUMA_BALANCING
 	int numa_scan_seq;
-	int numa_migrate_seq;
 	unsigned int numa_scan_period;
 	unsigned int numa_scan_period_max;
 	unsigned long numa_migrate_retry;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 89c5ae836f66..0c3feebcf112 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1731,7 +1731,6 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
 
 	p->node_stamp = 0ULL;
 	p->numa_scan_seq = p->mm ? p->mm->numa_scan_seq : 0;
-	p->numa_migrate_seq = 1;
 	p->numa_scan_period = sysctl_numa_balancing_scan_delay;
 	p->numa_work.next = &p->numa_work;
 	p->numa_faults = NULL;
@@ -4488,7 +4487,6 @@ void sched_setnuma(struct task_struct *p, int nid)
 		p->sched_class->put_prev_task(rq, p);
 
 	p->numa_preferred_nid = nid;
-	p->numa_migrate_seq = 1;
 
 	if (running)
 		p->sched_class->set_curr_task(rq);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index da6fa22be000..8454c38b1b12 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1261,16 +1261,8 @@ static void numa_migrate_preferred(struct task_struct *p)
 {
 	/* Success if task is already running on preferred CPU */
 	p->numa_migrate_retry = 0;
-	if (cpu_to_node(task_cpu(p)) == p->numa_preferred_nid) {
-		/*
-		 * If migration is temporarily disabled due to a task migration
-		 * then re-enable it now as the task is running on its
-		 * preferred node and memory should migrate locally
-		 */
-		if (!p->numa_migrate_seq)
-			p->numa_migrate_seq++;
+	if (cpu_to_node(task_cpu(p)) == p->numa_preferred_nid)
 		return;
-	}
 
 	/* This task has no NUMA fault statistics yet */
 	if (unlikely(p->numa_preferred_nid == -1))
@@ -1367,7 +1359,6 @@ static void task_numa_placement(struct task_struct *p)
 	if (p->numa_scan_seq == seq)
 		return;
 	p->numa_scan_seq = seq;
-	p->numa_migrate_seq++;
 	p->numa_scan_period_max = task_scan_max(p);
 
 	/* If the task is part of a group prevent parallel updates to group stats */
@@ -4730,20 +4721,6 @@ static void move_task(struct task_struct *p, struct lb_env *env)
 	set_task_cpu(p, env->dst_cpu);
 	activate_task(env->dst_rq, p, 0);
 	check_preempt_curr(env->dst_rq, p, 0);
-#ifdef CONFIG_NUMA_BALANCING
-	if (p->numa_preferred_nid != -1) {
-		int src_nid = cpu_to_node(env->src_cpu);
-		int dst_nid = cpu_to_node(env->dst_cpu);
-
-		/*
-		 * If the load balancer has moved the task then limit
-		 * migrations from taking place in the short term in
-		 * case this is a short-lived migration.
-		 */
-		if (src_nid != dst_nid && dst_nid != p->numa_preferred_nid)
-			p->numa_migrate_seq = 0;
-	}
-#endif
 }
 
 /*
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index a5867ef24bda..2929c24c22b7 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -2404,18 +2404,6 @@ int mpol_misplaced(struct page *page, struct vm_area_struct *vma, unsigned long
 		last_cpupid = page_cpupid_xchg_last(page, this_cpupid);
 		if (!cpupid_pid_unset(last_cpupid) && cpupid_to_nid(last_cpupid) != thisnid)
 			goto out;
-
-#ifdef CONFIG_NUMA_BALANCING
-		/*
-		 * If the scheduler has just moved us away from our
-		 * preferred node, do not bother migrating pages yet.
-		 * This way a short and temporary process migration will
-		 * not cause excessive memory migration.
-		 */
-		if (thisnid != current->numa_preferred_nid &&
-				!current->numa_migrate_seq)
-			goto out;
-#endif
 	}
 
 	if (curnid != polnid)
-- 
cgit v1.2.3


From de1c9ce6f07fec0381a39a9d0b379ea35aa1167f Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@redhat.com>
Date: Mon, 7 Oct 2013 11:29:39 +0100
Subject: sched/numa: Skip some page migrations after a shared fault

Shared faults can lead to lots of unnecessary page migrations,
slowing down the system, and causing private faults to hit the
per-pgdat migration ratelimit.

This patch adds sysctl numa_balancing_migrate_deferred, which specifies
how many shared page migrations to skip unconditionally, after each page
migration that is skipped because it is a shared fault.

This reduces the number of page migrations back and forth in
shared fault situations. It also gives a strong preference to
the tasks that are already running where most of the memory is,
and to moving the other tasks to near the memory.

Testing this with a much higher scan rate than the default
still seems to result in fewer page migrations than before.

Memory seems to be somewhat better consolidated than previously,
with multi-instance specjbb runs on a 4 node system.

Signed-off-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Mel Gorman <mgorman@suse.de>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1381141781-10992-62-git-send-email-mgorman@suse.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 Documentation/sysctl/kernel.txt | 10 ++++++++-
 include/linux/sched.h           |  5 ++++-
 kernel/sched/fair.c             |  8 +++++++
 kernel/sysctl.c                 |  7 ++++++
 mm/mempolicy.c                  | 48 ++++++++++++++++++++++++++++++++++++++++-
 5 files changed, 75 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index 84f17800f8b5..4273b2d71a27 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -375,7 +375,8 @@ feature should be disabled. Otherwise, if the system overhead from the
 feature is too high then the rate the kernel samples for NUMA hinting
 faults may be controlled by the numa_balancing_scan_period_min_ms,
 numa_balancing_scan_delay_ms, numa_balancing_scan_period_max_ms,
-numa_balancing_scan_size_mb and numa_balancing_settle_count sysctls.
+numa_balancing_scan_size_mb, numa_balancing_settle_count sysctls and
+numa_balancing_migrate_deferred.
 
 ==============================================================
 
@@ -421,6 +422,13 @@ the schedule balancer stops pushing the task towards a preferred node. This
 gives the scheduler a chance to place the task on an alternative node if the
 preferred node is overloaded.
 
+numa_balancing_migrate_deferred is how many page migrations get skipped
+unconditionally, after a page migration is skipped because a page is shared
+with other tasks. This reduces page migration overhead, and determines
+how much stronger the "move task near its memory" policy scheduler becomes,
+versus the "move memory near its task" memory management policy, for workloads
+with shared memory.
+
 ==============================================================
 
 osrelease, ostype & version:
diff --git a/include/linux/sched.h b/include/linux/sched.h
index d24f70ffddee..833eed55cf43 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1342,6 +1342,8 @@ struct task_struct {
 	int numa_scan_seq;
 	unsigned int numa_scan_period;
 	unsigned int numa_scan_period_max;
+	int numa_preferred_nid;
+	int numa_migrate_deferred;
 	unsigned long numa_migrate_retry;
 	u64 node_stamp;			/* migration stamp  */
 	struct callback_head numa_work;
@@ -1372,7 +1374,6 @@ struct task_struct {
 	 */
 	unsigned long numa_faults_locality[2];
 
-	int numa_preferred_nid;
 	unsigned long numa_pages_migrated;
 #endif /* CONFIG_NUMA_BALANCING */
 
@@ -1469,6 +1470,8 @@ extern void task_numa_fault(int last_node, int node, int pages, int flags);
 extern pid_t task_numa_group_id(struct task_struct *p);
 extern void set_numabalancing_state(bool enabled);
 extern void task_numa_free(struct task_struct *p);
+
+extern unsigned int sysctl_numa_balancing_migrate_deferred;
 #else
 static inline void task_numa_fault(int last_node, int node, int pages,
 				   int flags)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 8454c38b1b12..e7884dc3416d 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -833,6 +833,14 @@ unsigned int sysctl_numa_balancing_scan_size = 256;
 /* Scan @scan_size MB every @scan_period after an initial @scan_delay in ms */
 unsigned int sysctl_numa_balancing_scan_delay = 1000;
 
+/*
+ * After skipping a page migration on a shared page, skip N more numa page
+ * migrations unconditionally. This reduces the number of NUMA migrations
+ * in shared memory workloads, and has the effect of pulling tasks towards
+ * where their memory lives, over pulling the memory towards the task.
+ */
+unsigned int sysctl_numa_balancing_migrate_deferred = 16;
+
 static unsigned int task_nr_scan_windows(struct task_struct *p)
 {
 	unsigned long rss = 0;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index e509b90a8002..a159e1fd2013 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -391,6 +391,13 @@ static struct ctl_table kern_table[] = {
 		.mode           = 0644,
 		.proc_handler   = proc_dointvec,
 	},
+	{
+		.procname       = "numa_balancing_migrate_deferred",
+		.data           = &sysctl_numa_balancing_migrate_deferred,
+		.maxlen         = sizeof(unsigned int),
+		.mode           = 0644,
+		.proc_handler   = proc_dointvec,
+	},
 #endif /* CONFIG_NUMA_BALANCING */
 #endif /* CONFIG_SCHED_DEBUG */
 	{
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 2929c24c22b7..71cb253368cb 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -2301,6 +2301,35 @@ static void sp_free(struct sp_node *n)
 	kmem_cache_free(sn_cache, n);
 }
 
+#ifdef CONFIG_NUMA_BALANCING
+static bool numa_migrate_deferred(struct task_struct *p, int last_cpupid)
+{
+	/* Never defer a private fault */
+	if (cpupid_match_pid(p, last_cpupid))
+		return false;
+
+	if (p->numa_migrate_deferred) {
+		p->numa_migrate_deferred--;
+		return true;
+	}
+	return false;
+}
+
+static inline void defer_numa_migrate(struct task_struct *p)
+{
+	p->numa_migrate_deferred = sysctl_numa_balancing_migrate_deferred;
+}
+#else
+static inline bool numa_migrate_deferred(struct task_struct *p, int last_cpupid)
+{
+	return false;
+}
+
+static inline void defer_numa_migrate(struct task_struct *p)
+{
+}
+#endif /* CONFIG_NUMA_BALANCING */
+
 /**
  * mpol_misplaced - check whether current page node is valid in policy
  *
@@ -2402,7 +2431,24 @@ int mpol_misplaced(struct page *page, struct vm_area_struct *vma, unsigned long
 		 * relation.
 		 */
 		last_cpupid = page_cpupid_xchg_last(page, this_cpupid);
-		if (!cpupid_pid_unset(last_cpupid) && cpupid_to_nid(last_cpupid) != thisnid)
+		if (!cpupid_pid_unset(last_cpupid) && cpupid_to_nid(last_cpupid) != thisnid) {
+
+			/* See sysctl_numa_balancing_migrate_deferred comment */
+			if (!cpupid_match_pid(current, last_cpupid))
+				defer_numa_migrate(current);
+
+			goto out;
+		}
+
+		/*
+		 * The quadratic filter above reduces extraneous migration
+		 * of shared pages somewhat. This code reduces it even more,
+		 * reducing the overhead of page migrations of shared pages.
+		 * This makes workloads with shared pages rely more on
+		 * "move task near its memory", and less on "move memory
+		 * towards its task", which is exactly what we want.
+		 */
+		if (numa_migrate_deferred(current, last_cpupid))
 			goto out;
 	}
 
-- 
cgit v1.2.3


From 915f441b6f31b1a8ee01e9263a4e2d44c434d832 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@linaro.org>
Date: Wed, 9 Oct 2013 13:30:10 +0100
Subject: regmap: Provide asynchronous write and update bits operations

Make it easier for drivers to include single register writes in
asynchronous sequences by providing async versions of the write
and update bits operations. The update bits operations are only
likely to be effective when used with devices that have caches
but this is common enough to be useful.

Signed-off-by: Mark Brown <broonie@linaro.org>
---
 drivers/base/regmap/regmap.c | 103 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/regmap.h       |  31 +++++++++++++
 2 files changed, 134 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index 268fb71891ee..0503d868ff8c 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -1337,6 +1337,37 @@ int regmap_write(struct regmap *map, unsigned int reg, unsigned int val)
 }
 EXPORT_SYMBOL_GPL(regmap_write);
 
+/**
+ * regmap_write_async(): Write a value to a single register asynchronously
+ *
+ * @map: Register map to write to
+ * @reg: Register to write to
+ * @val: Value to be written
+ *
+ * A value of zero will be returned on success, a negative errno will
+ * be returned in error cases.
+ */
+int regmap_write_async(struct regmap *map, unsigned int reg, unsigned int val)
+{
+	int ret;
+
+	if (reg % map->reg_stride)
+		return -EINVAL;
+
+	map->lock(map->lock_arg);
+
+	map->async = true;
+
+	ret = _regmap_write(map, reg, val);
+
+	map->async = false;
+
+	map->unlock(map->lock_arg);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(regmap_write_async);
+
 /**
  * regmap_raw_write(): Write raw values to one or more registers
  *
@@ -1810,6 +1841,41 @@ int regmap_update_bits(struct regmap *map, unsigned int reg,
 }
 EXPORT_SYMBOL_GPL(regmap_update_bits);
 
+/**
+ * regmap_update_bits_async: Perform a read/modify/write cycle on the register
+ *                           map asynchronously
+ *
+ * @map: Register map to update
+ * @reg: Register to update
+ * @mask: Bitmask to change
+ * @val: New value for bitmask
+ *
+ * With most buses the read must be done synchronously so this is most
+ * useful for devices with a cache which do not need to interact with
+ * the hardware to determine the current register value.
+ *
+ * Returns zero for success, a negative number on error.
+ */
+int regmap_update_bits_async(struct regmap *map, unsigned int reg,
+			     unsigned int mask, unsigned int val)
+{
+	bool change;
+	int ret;
+
+	map->lock(map->lock_arg);
+
+	map->async = true;
+
+	ret = _regmap_update_bits(map, reg, mask, val, &change);
+
+	map->async = false;
+
+	map->unlock(map->lock_arg);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(regmap_update_bits_async);
+
 /**
  * regmap_update_bits_check: Perform a read/modify/write cycle on the
  *                           register map and report if updated
@@ -1835,6 +1901,43 @@ int regmap_update_bits_check(struct regmap *map, unsigned int reg,
 }
 EXPORT_SYMBOL_GPL(regmap_update_bits_check);
 
+/**
+ * regmap_update_bits_check_async: Perform a read/modify/write cycle on the
+ *                                 register map asynchronously and report if
+ *                                 updated
+ *
+ * @map: Register map to update
+ * @reg: Register to update
+ * @mask: Bitmask to change
+ * @val: New value for bitmask
+ * @change: Boolean indicating if a write was done
+ *
+ * With most buses the read must be done synchronously so this is most
+ * useful for devices with a cache which do not need to interact with
+ * the hardware to determine the current register value.
+ *
+ * Returns zero for success, a negative number on error.
+ */
+int regmap_update_bits_check_async(struct regmap *map, unsigned int reg,
+				   unsigned int mask, unsigned int val,
+				   bool *change)
+{
+	int ret;
+
+	map->lock(map->lock_arg);
+
+	map->async = true;
+
+	ret = _regmap_update_bits(map, reg, mask, val, change);
+
+	map->async = false;
+
+	map->unlock(map->lock_arg);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(regmap_update_bits_check_async);
+
 void regmap_async_complete_cb(struct regmap_async *async, int ret)
 {
 	struct regmap *map = async->map;
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index a10380bfbeac..114565befbd2 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -374,6 +374,7 @@ int regmap_reinit_cache(struct regmap *map,
 			const struct regmap_config *config);
 struct regmap *dev_get_regmap(struct device *dev, const char *name);
 int regmap_write(struct regmap *map, unsigned int reg, unsigned int val);
+int regmap_write_async(struct regmap *map, unsigned int reg, unsigned int val);
 int regmap_raw_write(struct regmap *map, unsigned int reg,
 		     const void *val, size_t val_len);
 int regmap_bulk_write(struct regmap *map, unsigned int reg, const void *val,
@@ -387,9 +388,14 @@ int regmap_bulk_read(struct regmap *map, unsigned int reg, void *val,
 		     size_t val_count);
 int regmap_update_bits(struct regmap *map, unsigned int reg,
 		       unsigned int mask, unsigned int val);
+int regmap_update_bits_async(struct regmap *map, unsigned int reg,
+			     unsigned int mask, unsigned int val);
 int regmap_update_bits_check(struct regmap *map, unsigned int reg,
 			     unsigned int mask, unsigned int val,
 			     bool *change);
+int regmap_update_bits_check_async(struct regmap *map, unsigned int reg,
+				   unsigned int mask, unsigned int val,
+				   bool *change);
 int regmap_get_val_bytes(struct regmap *map);
 int regmap_async_complete(struct regmap *map);
 bool regmap_can_raw_write(struct regmap *map);
@@ -527,6 +533,13 @@ static inline int regmap_write(struct regmap *map, unsigned int reg,
 	return -EINVAL;
 }
 
+static inline int regmap_write_async(struct regmap *map, unsigned int reg,
+				     unsigned int val)
+{
+	WARN_ONCE(1, "regmap API is disabled");
+	return -EINVAL;
+}
+
 static inline int regmap_raw_write(struct regmap *map, unsigned int reg,
 				   const void *val, size_t val_len)
 {
@@ -576,6 +589,14 @@ static inline int regmap_update_bits(struct regmap *map, unsigned int reg,
 	return -EINVAL;
 }
 
+static inline int regmap_update_bits_async(struct regmap *map,
+					   unsigned int reg,
+					   unsigned int mask, unsigned int val)
+{
+	WARN_ONCE(1, "regmap API is disabled");
+	return -EINVAL;
+}
+
 static inline int regmap_update_bits_check(struct regmap *map,
 					   unsigned int reg,
 					   unsigned int mask, unsigned int val,
@@ -585,6 +606,16 @@ static inline int regmap_update_bits_check(struct regmap *map,
 	return -EINVAL;
 }
 
+static inline int regmap_update_bits_check_async(struct regmap *map,
+						 unsigned int reg,
+						 unsigned int mask,
+						 unsigned int val,
+						 bool *change)
+{
+	WARN_ONCE(1, "regmap API is disabled");
+	return -EINVAL;
+}
+
 static inline int regmap_get_val_bytes(struct regmap *map)
 {
 	WARN_ONCE(1, "regmap API is disabled");
-- 
cgit v1.2.3


From a8bf7527a2e17ccf1366e67f6ac728327ca34c40 Mon Sep 17 00:00:00 2001
From: Rob Herring <rob.herring@calxeda.com>
Date: Mon, 26 Aug 2013 11:22:45 -0500
Subject: of: create unflatten_and_copy_device_tree

Several architectures using DT support built-in dtb's in the init
section. These platforms need to copy the dtb from init since the
strings are referenced after unflattening. Every arch has their own
copying routine which do the same thing. Create a common function,
unflatten_and_copy_device_tree, to copy the dtb when unflattening the
dtb.

Signed-off-by: Rob Herring <rob.herring@calxeda.com>
Acked-by: Grant Likely <grant.likely@linaro.org>
---
 drivers/of/fdt.c       | 24 ++++++++++++++++++++++++
 include/linux/of_fdt.h |  2 ++
 2 files changed, 26 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index 229dd9d69e18..51776166d2b0 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -802,6 +802,30 @@ void __init unflatten_device_tree(void)
 	of_alias_scan(early_init_dt_alloc_memory_arch);
 }
 
+/**
+ * unflatten_and_copy_device_tree - copy and create tree of device_nodes from flat blob
+ *
+ * Copies and unflattens the device-tree passed by the firmware, creating the
+ * tree of struct device_node. It also fills the "name" and "type"
+ * pointers of the nodes so the normal device-tree walking functions
+ * can be used. This should only be used when the FDT memory has not been
+ * reserved such is the case when the FDT is built-in to the kernel init
+ * section. If the FDT memory is reserved already then unflatten_device_tree
+ * should be used instead.
+ */
+void __init unflatten_and_copy_device_tree(void)
+{
+	int size = __be32_to_cpu(initial_boot_params->totalsize);
+	void *dt = early_init_dt_alloc_memory_arch(size,
+		__alignof__(struct boot_param_header));
+
+	if (dt) {
+		memcpy(dt, initial_boot_params, size);
+		initial_boot_params = dt;
+	}
+	unflatten_device_tree();
+}
+
 #endif /* CONFIG_OF_EARLY_FLATTREE */
 
 /* Feed entire flattened device tree into the random pool */
diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h
index a478c62a2aab..58c28a8cc257 100644
--- a/include/linux/of_fdt.h
+++ b/include/linux/of_fdt.h
@@ -118,9 +118,11 @@ extern int early_init_dt_scan_root(unsigned long node, const char *uname,
 
 /* Other Prototypes */
 extern void unflatten_device_tree(void);
+extern void unflatten_and_copy_device_tree(void);
 extern void early_init_devtree(void *);
 #else /* CONFIG_OF_FLATTREE */
 static inline void unflatten_device_tree(void) {}
+static inline void unflatten_and_copy_device_tree(void) {}
 #endif /* CONFIG_OF_FLATTREE */
 
 #endif /* __ASSEMBLY__ */
-- 
cgit v1.2.3


From 0288ffcbfdf9b8656e7320c24caa1e4c1d498287 Mon Sep 17 00:00:00 2001
From: Rob Herring <rob.herring@calxeda.com>
Date: Mon, 26 Aug 2013 09:47:40 -0500
Subject: of: Introduce common early_init_dt_scan

Most architectures scan the all the same items early in the FDT and none
are really architecture specific. Create a common early_init_dt_scan to
unify the early scan of root, memory, and chosen nodes in the flattened
DT.

Signed-off-by: Rob Herring <rob.herring@calxeda.com>
Acked-by: Grant Likely <grant.likely@linaro.org>
---
 drivers/of/fdt.c       | 26 ++++++++++++++++++++++++++
 include/linux/of_fdt.h |  2 ++
 2 files changed, 28 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index 51776166d2b0..bfbfda543768 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -785,6 +785,32 @@ void * __init __weak early_init_dt_alloc_memory_arch(u64 size, u64 align)
 }
 #endif
 
+bool __init early_init_dt_scan(void *params)
+{
+	if (!params)
+		return false;
+
+	/* Setup flat device-tree pointer */
+	initial_boot_params = params;
+
+	/* check device tree validity */
+	if (be32_to_cpu(initial_boot_params->magic) != OF_DT_HEADER) {
+		initial_boot_params = NULL;
+		return false;
+	}
+
+	/* Retrieve various information from the /chosen node */
+	of_scan_flat_dt(early_init_dt_scan_chosen, boot_command_line);
+
+	/* Initialize {size,address}-cells info */
+	of_scan_flat_dt(early_init_dt_scan_root, NULL);
+
+	/* Setup memory, calling early_init_dt_add_memory_arch */
+	of_scan_flat_dt(early_init_dt_scan_memory, NULL);
+
+	return true;
+}
+
 /**
  * unflatten_device_tree - create tree of device_nodes from flat blob
  *
diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h
index 58c28a8cc257..73e16511134e 100644
--- a/include/linux/of_fdt.h
+++ b/include/linux/of_fdt.h
@@ -116,6 +116,8 @@ extern void early_init_dt_setup_initrd_arch(u64 start, u64 end);
 extern int early_init_dt_scan_root(unsigned long node, const char *uname,
 				   int depth, void *data);
 
+extern bool early_init_dt_scan(void *params);
+
 /* Other Prototypes */
 extern void unflatten_device_tree(void);
 extern void unflatten_and_copy_device_tree(void);
-- 
cgit v1.2.3


From 29eb45a9ab4839a1e9cef2bcf369b918c9c4fcad Mon Sep 17 00:00:00 2001
From: Rob Herring <rob.herring@calxeda.com>
Date: Fri, 30 Aug 2013 17:06:53 -0500
Subject: of: remove early_init_dt_setup_initrd_arch

All arches do essentially the same thing now for
early_init_dt_setup_initrd_arch, so it can now be removed.

Signed-off-by: Rob Herring <rob.herring@calxeda.com>
Acked-by: Vineet Gupta <vgupta@synopsys.com>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Mark Salter <msalter@redhat.com>
Cc: Aurelien Jacquiot <a-jacquiot@ti.com>
Cc: James Hogan <james.hogan@imgtec.com>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Jonas Bonn <jonas@southpole.se>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: x86@kernel.org
Cc: Chris Zankel <chris@zankel.net>
Cc: Max Filippov <jcmvbkbc@gmail.com>
Acked-by: Grant Likely <grant.likely@linaro.org>
---
 arch/arc/mm/init.c            |  7 -------
 arch/arm/mm/init.c            |  8 --------
 arch/c6x/kernel/devicetree.c  | 10 ----------
 arch/metag/mm/init.c          |  9 ---------
 arch/microblaze/kernel/prom.c |  9 ---------
 arch/mips/kernel/prom.c       | 10 ----------
 arch/openrisc/kernel/prom.c   |  9 ---------
 arch/powerpc/kernel/prom.c    |  9 ---------
 arch/x86/kernel/devicetree.c  |  9 ---------
 arch/xtensa/kernel/setup.c    | 15 ++++-----------
 drivers/of/fdt.c              |  9 ++++++---
 include/linux/of_fdt.h        | 10 ----------
 12 files changed, 10 insertions(+), 104 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c
index 81279ec73a6a..55e0a85bea78 100644
--- a/arch/arc/mm/init.c
+++ b/arch/arc/mm/init.c
@@ -125,10 +125,3 @@ void __init free_initrd_mem(unsigned long start, unsigned long end)
 	free_reserved_area((void *)start, (void *)end, -1, "initrd");
 }
 #endif
-
-#ifdef CONFIG_OF_FLATTREE
-void __init early_init_dt_setup_initrd_arch(u64 start, u64 end)
-{
-	pr_err("%s(%llx, %llx)\n", __func__, start, end);
-}
-#endif /* CONFIG_OF_FLATTREE */
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 9eeb1cd1c401..9d0b91dccf37 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -77,14 +77,6 @@ static int __init parse_tag_initrd2(const struct tag *tag)
 
 __tagtable(ATAG_INITRD2, parse_tag_initrd2);
 
-#if defined(CONFIG_OF_FLATTREE) && defined(CONFIG_BLK_DEV_INITRD)
-void __init early_init_dt_setup_initrd_arch(u64 start, u64 end)
-{
-	initrd_start = (unsigned long)__va(start);
-	initrd_end = (unsigned long)__va(end);
-}
-#endif /* CONFIG_OF_FLATTREE */
-
 /*
  * This keeps memory configuration data used by a couple memory
  * initialization functions, as well as show_mem() for the skipping
diff --git a/arch/c6x/kernel/devicetree.c b/arch/c6x/kernel/devicetree.c
index d28a92fc0c52..fa3e5741514e 100644
--- a/arch/c6x/kernel/devicetree.c
+++ b/arch/c6x/kernel/devicetree.c
@@ -10,18 +10,8 @@
  *
  */
 #include <linux/init.h>
-#include <linux/initrd.h>
 #include <linux/memblock.h>
 
-#ifdef CONFIG_BLK_DEV_INITRD
-void __init early_init_dt_setup_initrd_arch(u64 start, u64 end)
-{
-	initrd_start = (unsigned long)__va(start);
-	initrd_end = (unsigned long)__va(end);
-	initrd_below_start_ok = 1;
-}
-#endif
-
 void __init early_init_dt_add_memory_arch(u64 base, u64 size)
 {
 	c6x_add_memory(base, size);
diff --git a/arch/metag/mm/init.c b/arch/metag/mm/init.c
index 123919534b80..249fff66add3 100644
--- a/arch/metag/mm/init.c
+++ b/arch/metag/mm/init.c
@@ -12,7 +12,6 @@
 #include <linux/percpu.h>
 #include <linux/memblock.h>
 #include <linux/initrd.h>
-#include <linux/of_fdt.h>
 
 #include <asm/setup.h>
 #include <asm/page.h>
@@ -405,11 +404,3 @@ void free_initrd_mem(unsigned long start, unsigned long end)
 			   "initrd");
 }
 #endif
-
-#ifdef CONFIG_OF_FLATTREE
-void __init early_init_dt_setup_initrd_arch(u64 start, u64 end)
-{
-	pr_err("%s(%llx, %llx)\n",
-	       __func__, start, end);
-}
-#endif /* CONFIG_OF_FLATTREE */
diff --git a/arch/microblaze/kernel/prom.c b/arch/microblaze/kernel/prom.c
index 951e4d61ca2d..cab6dc356119 100644
--- a/arch/microblaze/kernel/prom.c
+++ b/arch/microblaze/kernel/prom.c
@@ -114,15 +114,6 @@ void __init early_init_devtree(void *params)
 	pr_debug(" <- early_init_devtree()\n");
 }
 
-#ifdef CONFIG_BLK_DEV_INITRD
-void __init early_init_dt_setup_initrd_arch(u64 start, u64 end)
-{
-	initrd_start = (unsigned long)__va(start);
-	initrd_end = (unsigned long)__va(end);
-	initrd_below_start_ok = 1;
-}
-#endif
-
 /*******
  *
  * New implementation of the OF "find" APIs, return a refcounted
diff --git a/arch/mips/kernel/prom.c b/arch/mips/kernel/prom.c
index 67a4c53b2b54..0b2485f9a6c1 100644
--- a/arch/mips/kernel/prom.c
+++ b/arch/mips/kernel/prom.c
@@ -13,7 +13,6 @@
 #include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/bootmem.h>
-#include <linux/initrd.h>
 #include <linux/debugfs.h>
 #include <linux/of.h>
 #include <linux/of_fdt.h>
@@ -48,15 +47,6 @@ void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align)
 	return __alloc_bootmem(size, align, __pa(MAX_DMA_ADDRESS));
 }
 
-#ifdef CONFIG_BLK_DEV_INITRD
-void __init early_init_dt_setup_initrd_arch(u64 start, u64 end)
-{
-	initrd_start = (unsigned long)__va(start);
-	initrd_end = (unsigned long)__va(end);
-	initrd_below_start_ok = 1;
-}
-#endif
-
 int __init early_init_dt_scan_model(unsigned long node,	const char *uname,
 				    int depth, void *data)
 {
diff --git a/arch/openrisc/kernel/prom.c b/arch/openrisc/kernel/prom.c
index 6dbcaa85421c..2aae474b44c3 100644
--- a/arch/openrisc/kernel/prom.c
+++ b/arch/openrisc/kernel/prom.c
@@ -52,12 +52,3 @@ void __init early_init_devtree(void *params)
 	early_init_dt_scan(params);
 	memblock_allow_resize();
 }
-
-#ifdef CONFIG_BLK_DEV_INITRD
-void __init early_init_dt_setup_initrd_arch(u64 start, u64 end)
-{
-	initrd_start = (unsigned long)__va(start);
-	initrd_end = (unsigned long)__va(end);
-	initrd_below_start_ok = 1;
-}
-#endif
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index b7634ce41dbc..a0894688daef 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -546,15 +546,6 @@ void __init early_init_dt_add_memory_arch(u64 base, u64 size)
 	memblock_add(base, size);
 }
 
-#ifdef CONFIG_BLK_DEV_INITRD
-void __init early_init_dt_setup_initrd_arch(u64 start, u64 end)
-{
-	initrd_start = (unsigned long)__va(start);
-	initrd_end = (unsigned long)__va(end);
-	initrd_below_start_ok = 1;
-}
-#endif
-
 static void __init early_reserve_mem_dt(void)
 {
 	unsigned long i, len, dt_root;
diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c
index 0db805c4b9e8..0e1f95b06336 100644
--- a/arch/x86/kernel/devicetree.c
+++ b/arch/x86/kernel/devicetree.c
@@ -51,15 +51,6 @@ void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align)
 	return __alloc_bootmem(size, align, __pa(MAX_DMA_ADDRESS));
 }
 
-#ifdef CONFIG_BLK_DEV_INITRD
-void __init early_init_dt_setup_initrd_arch(u64 start, u64 end)
-{
-	initrd_start = (unsigned long)__va(start);
-	initrd_end = (unsigned long)__va(end);
-	initrd_below_start_ok = 1;
-}
-#endif
-
 void __init add_dtb(u64 data)
 {
 	initial_dtb = data + offsetof(struct setup_data, data);
diff --git a/arch/xtensa/kernel/setup.c b/arch/xtensa/kernel/setup.c
index 65974a8f41a4..6e2b6638122d 100644
--- a/arch/xtensa/kernel/setup.c
+++ b/arch/xtensa/kernel/setup.c
@@ -61,8 +61,8 @@ extern struct rtc_ops no_rtc_ops;
 struct rtc_ops *rtc_ops;
 
 #ifdef CONFIG_BLK_DEV_INITRD
-extern void *initrd_start;
-extern void *initrd_end;
+extern unsigned long initrd_start;
+extern unsigned long initrd_end;
 int initrd_is_mapped = 0;
 extern int initrd_below_start_ok;
 #endif
@@ -149,8 +149,8 @@ static int __init parse_tag_initrd(const bp_tag_t* tag)
 {
 	meminfo_t* mi;
 	mi = (meminfo_t*)(tag->data);
-	initrd_start = __va(mi->start);
-	initrd_end = __va(mi->end);
+	initrd_start = (unsigned long)__va(mi->start);
+	initrd_end = (unsigned long)__va(mi->end);
 
 	return 0;
 }
@@ -167,13 +167,6 @@ static int __init parse_tag_fdt(const bp_tag_t *tag)
 
 __tagtable(BP_TAG_FDT, parse_tag_fdt);
 
-void __init early_init_dt_setup_initrd_arch(u64 start, u64 end)
-{
-	initrd_start = (void *)__va(start);
-	initrd_end = (void *)__va(end);
-	initrd_below_start_ok = 1;
-}
-
 #endif /* CONFIG_OF */
 
 #endif /* CONFIG_BLK_DEV_INITRD */
diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index 5bc55b6e2b41..5f4cc88cd89e 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -624,7 +624,7 @@ int __init of_scan_flat_dt_by_path(const char *path,
  * early_init_dt_check_for_initrd - Decode initrd location from flat tree
  * @node: reference to node containing initrd location ('chosen')
  */
-void __init early_init_dt_check_for_initrd(unsigned long node)
+static void __init early_init_dt_check_for_initrd(unsigned long node)
 {
 	u64 start, end;
 	unsigned long len;
@@ -642,12 +642,15 @@ void __init early_init_dt_check_for_initrd(unsigned long node)
 		return;
 	end = of_read_number(prop, len/4);
 
-	early_init_dt_setup_initrd_arch(start, end);
+	initrd_start = (unsigned long)__va(start);
+	initrd_end = (unsigned long)__va(end);
+	initrd_below_start_ok = 1;
+
 	pr_debug("initrd_start=0x%llx  initrd_end=0x%llx\n",
 		 (unsigned long long)start, (unsigned long long)end);
 }
 #else
-inline void early_init_dt_check_for_initrd(unsigned long node)
+static inline void early_init_dt_check_for_initrd(unsigned long node)
 {
 }
 #endif /* CONFIG_BLK_DEV_INITRD */
diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h
index 73e16511134e..b365f5ac7b54 100644
--- a/include/linux/of_fdt.h
+++ b/include/linux/of_fdt.h
@@ -96,22 +96,12 @@ extern int of_scan_flat_dt_by_path(const char *path,
 
 extern int early_init_dt_scan_chosen(unsigned long node, const char *uname,
 				     int depth, void *data);
-extern void early_init_dt_check_for_initrd(unsigned long node);
 extern int early_init_dt_scan_memory(unsigned long node, const char *uname,
 				     int depth, void *data);
 extern void early_init_dt_add_memory_arch(u64 base, u64 size);
 extern void * early_init_dt_alloc_memory_arch(u64 size, u64 align);
 extern u64 dt_mem_next_cell(int s, __be32 **cellp);
 
-/*
- * If BLK_DEV_INITRD, the fdt early init code will call this function,
- * to be provided by the arch code. start and end are specified as
- * physical addresses.
- */
-#ifdef CONFIG_BLK_DEV_INITRD
-extern void early_init_dt_setup_initrd_arch(u64 start, u64 end);
-#endif
-
 /* Early flat tree scan hooks */
 extern int early_init_dt_scan_root(unsigned long node, const char *uname,
 				   int depth, void *data);
-- 
cgit v1.2.3


From b4042ceaabbd913bc5b397ddd1e396eeb312d72f Mon Sep 17 00:00:00 2001
From: Stephen Boyd <sboyd@codeaurora.org>
Date: Thu, 18 Jul 2013 16:21:19 -0700
Subject: sched_clock: Remove sched_clock_func() hook

Nobody is using sched_clock_func() anymore now that sched_clock
supports up to 64 bits. Remove the hook so that new code only
uses sched_clock_register().

Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 include/linux/sched_clock.h | 2 --
 kernel/time/sched_clock.c   | 9 +--------
 2 files changed, 1 insertion(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched_clock.h b/include/linux/sched_clock.h
index eca7abeb86fc..cddf0c2940b6 100644
--- a/include/linux/sched_clock.h
+++ b/include/linux/sched_clock.h
@@ -18,6 +18,4 @@ extern void setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate);
 extern void sched_clock_register(u64 (*read)(void), int bits,
 				 unsigned long rate);
 
-extern unsigned long long (*sched_clock_func)(void);
-
 #endif
diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c
index f388baeaf2b6..68b799375981 100644
--- a/kernel/time/sched_clock.c
+++ b/kernel/time/sched_clock.c
@@ -63,7 +63,7 @@ static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift)
 	return (cyc * mult) >> shift;
 }
 
-static unsigned long long notrace sched_clock_32(void)
+unsigned long long notrace sched_clock(void)
 {
 	u64 epoch_ns;
 	u64 epoch_cyc;
@@ -170,13 +170,6 @@ void __init setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate)
 	sched_clock_register(read_sched_clock_32_wrapper, bits, rate);
 }
 
-unsigned long long __read_mostly (*sched_clock_func)(void) = sched_clock_32;
-
-unsigned long long notrace sched_clock(void)
-{
-	return sched_clock_func();
-}
-
 void __init sched_clock_postinit(void)
 {
 	/*
-- 
cgit v1.2.3


From 6a903a2551ef778d60ce4341722d611144251398 Mon Sep 17 00:00:00 2001
From: Rob Herring <rob.herring@calxeda.com>
Date: Tue, 27 Aug 2013 21:41:56 -0500
Subject: of: introduce common FDT machine related functions

Introduce common of_flat_dt_match_machine and
of_flat_dt_get_machine_name functions to unify architectures' handling
of machine level model and compatible properties.

Several architectures match the root compatible string with an arch
specific list of machine descriptors duplicating the same search
algorithm. Create a common implementation with a simple architecture
specific hook to iterate over each machine's match table.

Signed-off-by: Rob Herring <rob.herring@calxeda.com>
Acked-by: Grant Likely <grant.likely@linaro.org>
---
 drivers/of/fdt.c       | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/of_fdt.h |  5 +++++
 2 files changed, 65 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index 5f4cc88cd89e..5c479104fc67 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -619,6 +619,66 @@ int __init of_scan_flat_dt_by_path(const char *path,
 		return ret;
 }
 
+const char * __init of_flat_dt_get_machine_name(void)
+{
+	const char *name;
+	unsigned long dt_root = of_get_flat_dt_root();
+
+	name = of_get_flat_dt_prop(dt_root, "model", NULL);
+	if (!name)
+		name = of_get_flat_dt_prop(dt_root, "compatible", NULL);
+	return name;
+}
+
+/**
+ * of_flat_dt_match_machine - Iterate match tables to find matching machine.
+ *
+ * @default_match: A machine specific ptr to return in case of no match.
+ * @get_next_compat: callback function to return next compatible match table.
+ *
+ * Iterate through machine match tables to find the best match for the machine
+ * compatible string in the FDT.
+ */
+const void * __init of_flat_dt_match_machine(const void *default_match,
+		const void * (*get_next_compat)(const char * const**))
+{
+	const void *data = NULL;
+	const void *best_data = default_match;
+	const char *const *compat;
+	unsigned long dt_root;
+	unsigned int best_score = ~1, score = 0;
+
+	dt_root = of_get_flat_dt_root();
+	while ((data = get_next_compat(&compat))) {
+		score = of_flat_dt_match(dt_root, compat);
+		if (score > 0 && score < best_score) {
+			best_data = data;
+			best_score = score;
+		}
+	}
+	if (!best_data) {
+		const char *prop;
+		long size;
+
+		pr_err("\n unrecognized device tree list:\n[ ");
+
+		prop = of_get_flat_dt_prop(dt_root, "compatible", &size);
+		if (prop) {
+			while (size > 0) {
+				printk("'%s' ", prop);
+				size -= strlen(prop) + 1;
+				prop += strlen(prop) + 1;
+			}
+		}
+		printk("]\n\n");
+		return NULL;
+	}
+
+	pr_info("Machine model: %s\n", of_flat_dt_get_machine_name());
+
+	return best_data;
+}
+
 #ifdef CONFIG_BLK_DEV_INITRD
 /**
  * early_init_dt_check_for_initrd - Decode initrd location from flat tree
diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h
index b365f5ac7b54..0beaee9dac1f 100644
--- a/include/linux/of_fdt.h
+++ b/include/linux/of_fdt.h
@@ -108,11 +108,16 @@ extern int early_init_dt_scan_root(unsigned long node, const char *uname,
 
 extern bool early_init_dt_scan(void *params);
 
+extern const char *of_flat_dt_get_machine_name(void);
+extern const void *of_flat_dt_match_machine(const void *default_match,
+		const void * (*get_next_compat)(const char * const**));
+
 /* Other Prototypes */
 extern void unflatten_device_tree(void);
 extern void unflatten_and_copy_device_tree(void);
 extern void early_init_devtree(void *);
 #else /* CONFIG_OF_FLATTREE */
+static inline const char *of_flat_dt_get_machine_name(void) { return NULL; }
 static inline void unflatten_device_tree(void) {}
 static inline void unflatten_and_copy_device_tree(void) {}
 #endif /* CONFIG_OF_FLATTREE */
-- 
cgit v1.2.3


From 25ff79443cbfa924b8df1d4a8a0fbff83816938a Mon Sep 17 00:00:00 2001
From: Rob Herring <rob.herring@calxeda.com>
Date: Sat, 7 Sep 2013 14:07:11 -0500
Subject: of: implement pci_address_to_pio as weak function

Implement pci_address_to_pio as weak function to remove the dependency on
asm/prom.h. This is in preparation to make prom.h optional.

Signed-off-by: Rob Herring <rob.herring@calxeda.com>
Acked-by: Grant Likely <grant.likely@linaro.org>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: x86@kernel.org
Cc: Grant Likely <grant.likely@linaro.org>
---
 arch/microblaze/include/asm/prom.h |  9 ---------
 arch/mips/include/asm/prom.h       | 11 -----------
 arch/powerpc/include/asm/prom.h    |  5 -----
 arch/x86/include/asm/prom.h        |  3 ---
 arch/x86/kernel/devicetree.c       | 10 ----------
 drivers/of/address.c               |  8 ++++++++
 drivers/of/of_pci.c                |  1 -
 include/linux/of_address.h         |  5 +----
 8 files changed, 9 insertions(+), 43 deletions(-)

(limited to 'include/linux')

diff --git a/arch/microblaze/include/asm/prom.h b/arch/microblaze/include/asm/prom.h
index f05bedce70d0..0ebd924902df 100644
--- a/arch/microblaze/include/asm/prom.h
+++ b/arch/microblaze/include/asm/prom.h
@@ -26,13 +26,4 @@ enum early_consoles {
 
 extern int of_early_console(void *version);
 
-/*
- * OF address retreival & translation
- */
-
-#ifdef CONFIG_PCI
-extern unsigned long pci_address_to_pio(phys_addr_t address);
-#define pci_address_to_pio pci_address_to_pio
-#endif	/* CONFIG_PCI */
-
 #endif /* _ASM_MICROBLAZE_PROM_H */
diff --git a/arch/mips/include/asm/prom.h b/arch/mips/include/asm/prom.h
index e3dbd0e0608e..ccd2b75f152c 100644
--- a/arch/mips/include/asm/prom.h
+++ b/arch/mips/include/asm/prom.h
@@ -19,17 +19,6 @@
 
 extern void device_tree_init(void);
 
-static inline unsigned long pci_address_to_pio(phys_addr_t address)
-{
-	/*
-	 * The ioport address can be directly used by inX() / outX()
-	 */
-	BUG_ON(address > IO_SPACE_LIMIT);
-
-	return (unsigned long) address;
-}
-#define pci_address_to_pio pci_address_to_pio
-
 struct boot_param_header;
 
 extern void __dt_setup_arch(struct boot_param_header *bph);
diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h
index 7d0c7f3a7171..bd215f74df0f 100644
--- a/arch/powerpc/include/asm/prom.h
+++ b/arch/powerpc/include/asm/prom.h
@@ -30,11 +30,6 @@
 extern u64 of_translate_dma_address(struct device_node *dev,
 				    const __be32 *in_addr);
 
-#ifdef CONFIG_PCI
-extern unsigned long pci_address_to_pio(phys_addr_t address);
-#define pci_address_to_pio pci_address_to_pio
-#endif	/* CONFIG_PCI */
-
 /* Parse the ibm,dma-window property of an OF node into the busno, phys and
  * size parameters.
  */
diff --git a/arch/x86/include/asm/prom.h b/arch/x86/include/asm/prom.h
index bade6ac3b14f..8ef2ec70858f 100644
--- a/arch/x86/include/asm/prom.h
+++ b/arch/x86/include/asm/prom.h
@@ -39,9 +39,6 @@ static inline void x86_dtb_init(void) { }
 
 extern char cmd_line[COMMAND_LINE_SIZE];
 
-#define pci_address_to_pio pci_address_to_pio
-unsigned long pci_address_to_pio(phys_addr_t addr);
-
 #define HAVE_ARCH_DEVTREE_FIXUPS
 
 #endif /* __ASSEMBLY__ */
diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c
index 679d676a30d7..cffd07368547 100644
--- a/arch/x86/kernel/devicetree.c
+++ b/arch/x86/kernel/devicetree.c
@@ -27,16 +27,6 @@ char __initdata cmd_line[COMMAND_LINE_SIZE];
 
 int __initdata of_ioapic;
 
-unsigned long pci_address_to_pio(phys_addr_t address)
-{
-	/*
-	 * The ioport address can be directly used by inX / outX
-	 */
-	BUG_ON(address >= (1 << 16));
-	return (unsigned long)address;
-}
-EXPORT_SYMBOL_GPL(pci_address_to_pio);
-
 void __init early_init_dt_scan_chosen_arch(unsigned long node)
 {
 	BUG();
diff --git a/drivers/of/address.c b/drivers/of/address.c
index b55c21890760..556a7fb6ead3 100644
--- a/drivers/of/address.c
+++ b/drivers/of/address.c
@@ -626,6 +626,14 @@ const __be32 *of_get_address(struct device_node *dev, int index, u64 *size,
 }
 EXPORT_SYMBOL(of_get_address);
 
+unsigned long __weak pci_address_to_pio(phys_addr_t address)
+{
+	if (address > IO_SPACE_LIMIT)
+		return (unsigned long)-1;
+
+	return (unsigned long) address;
+}
+
 static int __of_address_to_resource(struct device_node *dev,
 		const __be32 *addrp, u64 size, unsigned int flags,
 		const char *name, struct resource *r)
diff --git a/drivers/of/of_pci.c b/drivers/of/of_pci.c
index e5ca00893c0c..848199633798 100644
--- a/drivers/of/of_pci.c
+++ b/drivers/of/of_pci.c
@@ -2,7 +2,6 @@
 #include <linux/export.h>
 #include <linux/of.h>
 #include <linux/of_pci.h>
-#include <asm/prom.h>
 
 static inline int __of_pci_pci_compare(struct device_node *node,
 				       unsigned int data)
diff --git a/include/linux/of_address.h b/include/linux/of_address.h
index 4c2e6f26432c..f6fc6899ceae 100644
--- a/include/linux/of_address.h
+++ b/include/linux/of_address.h
@@ -52,10 +52,7 @@ extern void __iomem *of_iomap(struct device_node *device, int index);
 extern const __be32 *of_get_address(struct device_node *dev, int index,
 			   u64 *size, unsigned int *flags);
 
-#ifndef pci_address_to_pio
-static inline unsigned long pci_address_to_pio(phys_addr_t addr) { return -1; }
-#define pci_address_to_pio pci_address_to_pio
-#endif
+extern unsigned long pci_address_to_pio(phys_addr_t addr);
 
 extern int of_pci_range_parser_init(struct of_pci_range_parser *parser,
 			struct device_node *node);
-- 
cgit v1.2.3


From 0c3f061c195ceb891067b6de9e4ecc347c4dea31 Mon Sep 17 00:00:00 2001
From: Rob Herring <rob.herring@calxeda.com>
Date: Tue, 17 Sep 2013 10:42:50 -0500
Subject: of: implement of_node_to_nid as a weak function

Implement of_node_to_nid as weak function to remove the dependency on
asm/prom.h. This is in preparation to make prom.h optional.

Signed-off-by: Rob Herring <rob.herring@calxeda.com>
Acked-by: Grant Likely <grant.likely@linaro.org>
---
 arch/powerpc/include/asm/prom.h |  7 -------
 arch/sparc/include/asm/prom.h   |  4 ----
 drivers/of/base.c               |  7 +++++++
 include/linux/of.h              | 11 ++++-------
 4 files changed, 11 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h
index bd215f74df0f..6707c16d8fc5 100644
--- a/arch/powerpc/include/asm/prom.h
+++ b/arch/powerpc/include/asm/prom.h
@@ -42,13 +42,6 @@ extern void kdump_move_device_tree(void);
 /* cache lookup */
 struct device_node *of_find_next_cache_node(struct device_node *np);
 
-#ifdef CONFIG_NUMA
-extern int of_node_to_nid(struct device_node *device);
-#else
-static inline int of_node_to_nid(struct device_node *device) { return 0; }
-#endif
-#define of_node_to_nid of_node_to_nid
-
 extern void of_instantiate_rtc(void);
 
 extern int of_get_ibm_chip_id(struct device_node *np);
diff --git a/arch/sparc/include/asm/prom.h b/arch/sparc/include/asm/prom.h
index 67c62578d170..60c8d7bd4058 100644
--- a/arch/sparc/include/asm/prom.h
+++ b/arch/sparc/include/asm/prom.h
@@ -43,10 +43,6 @@ extern int of_getintprop_default(struct device_node *np,
 				 const char *name,
 				 int def);
 extern int of_find_in_proplist(const char *list, const char *match, int len);
-#ifdef CONFIG_NUMA
-extern int of_node_to_nid(struct device_node *dp);
-#define of_node_to_nid of_node_to_nid
-#endif
 
 extern void prom_build_devicetree(void);
 extern void of_populate_present_mask(void);
diff --git a/drivers/of/base.c b/drivers/of/base.c
index 865d3f66c86b..ced4c06d79b3 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -74,6 +74,13 @@ int of_n_size_cells(struct device_node *np)
 }
 EXPORT_SYMBOL(of_n_size_cells);
 
+#ifdef CONFIG_NUMA
+int __weak of_node_to_nid(struct device_node *np)
+{
+	return numa_node_id();
+}
+#endif
+
 #if defined(CONFIG_OF_DYNAMIC)
 /**
  *	of_node_get - Increment refcount of a node
diff --git a/include/linux/of.h b/include/linux/of.h
index f95aee391e30..4d294a0b8a57 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -534,13 +534,10 @@ static inline const char *of_prop_next_string(struct property *prop,
 #define of_match_node(_matches, _node)	NULL
 #endif /* CONFIG_OF */
 
-#ifndef of_node_to_nid
-static inline int of_node_to_nid(struct device_node *np)
-{
-	return numa_node_id();
-}
-
-#define of_node_to_nid of_node_to_nid
+#if defined(CONFIG_OF) && defined(CONFIG_NUMA)
+extern int of_node_to_nid(struct device_node *np);
+#else
+static inline int of_node_to_nid(struct device_node *device) { return 0; }
 #endif
 
 /**
-- 
cgit v1.2.3


From 4acf4b9cd4534aaa9102004937e1ba79da01d008 Mon Sep 17 00:00:00 2001
From: Rob Herring <rob.herring@calxeda.com>
Date: Mon, 16 Sep 2013 21:03:24 -0500
Subject: of: move of_address_to_resource and of_iomap declarations from sparc

Move of_address_to_resource and of_iomap declarations to common code. These
only differ on sparc, but the declarations are the same and don't need to
be in arch header.

Signed-off-by: Rob Herring <rob.herring@calxeda.com>
Acked-by: Grant Likely <grant.likely@linaro.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: sparclinux@vger.kernel.org
---
 arch/sparc/include/asm/prom.h |  8 --------
 include/linux/of_address.h    | 30 +++++++++++++++++-------------
 2 files changed, 17 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/arch/sparc/include/asm/prom.h b/arch/sparc/include/asm/prom.h
index 60c8d7bd4058..11ebd659e7b6 100644
--- a/arch/sparc/include/asm/prom.h
+++ b/arch/sparc/include/asm/prom.h
@@ -59,13 +59,5 @@ extern char *of_console_options;
 extern void irq_trans_init(struct device_node *dp);
 extern char *build_path_component(struct device_node *dp);
 
-/* SPARC has local implementations */
-extern int of_address_to_resource(struct device_node *dev, int index,
-				  struct resource *r);
-#define of_address_to_resource of_address_to_resource
-
-void __iomem *of_iomap(struct device_node *node, int index);
-#define of_iomap of_iomap
-
 #endif /* __KERNEL__ */
 #endif /* _SPARC_PROM_H */
diff --git a/include/linux/of_address.h b/include/linux/of_address.h
index f6fc6899ceae..e8a179773a1a 100644
--- a/include/linux/of_address.h
+++ b/include/linux/of_address.h
@@ -60,13 +60,6 @@ extern struct of_pci_range *of_pci_range_parser_one(
 					struct of_pci_range_parser *parser,
 					struct of_pci_range *range);
 #else /* CONFIG_OF_ADDRESS */
-#ifndef of_address_to_resource
-static inline int of_address_to_resource(struct device_node *dev, int index,
-					 struct resource *r)
-{
-	return -EINVAL;
-}
-#endif
 static inline struct device_node *of_find_matching_node_by_address(
 					struct device_node *from,
 					const struct of_device_id *matches,
@@ -74,12 +67,7 @@ static inline struct device_node *of_find_matching_node_by_address(
 {
 	return NULL;
 }
-#ifndef of_iomap
-static inline void __iomem *of_iomap(struct device_node *device, int index)
-{
-	return NULL;
-}
-#endif
+
 static inline const __be32 *of_get_address(struct device_node *dev, int index,
 					u64 *size, unsigned int *flags)
 {
@@ -100,6 +88,22 @@ static inline struct of_pci_range *of_pci_range_parser_one(
 }
 #endif /* CONFIG_OF_ADDRESS */
 
+#ifdef CONFIG_OF
+extern int of_address_to_resource(struct device_node *dev, int index,
+				  struct resource *r);
+void __iomem *of_iomap(struct device_node *node, int index);
+#else
+static inline int of_address_to_resource(struct device_node *dev, int index,
+					 struct resource *r)
+{
+	return -EINVAL;
+}
+
+static inline void __iomem *of_iomap(struct device_node *device, int index)
+{
+	return NULL;
+}
+#endif
 
 #if defined(CONFIG_OF_ADDRESS) && defined(CONFIG_PCI)
 extern const __be32 *of_get_pci_address(struct device_node *dev, int bar_no,
-- 
cgit v1.2.3


From d0dfa16a600190d142f7538e5909d13c35b60d98 Mon Sep 17 00:00:00 2001
From: Rob Herring <rob.herring@calxeda.com>
Date: Mon, 16 Sep 2013 21:05:05 -0500
Subject: of: move of_translate_dma_address to of_address.h

of_translate_dma_address is implemented in common code, so move the
declaration there too.

Signed-off-by: Rob Herring <rob.herring@calxeda.com>
Acked-by: Grant Likely <grant.likely@linaro.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: linuxppc-dev@lists.ozlabs.org
---
 arch/powerpc/include/asm/prom.h | 4 ----
 include/linux/of_address.h      | 4 ++++
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h
index 43fe0023d722..b8774bdc69e0 100644
--- a/arch/powerpc/include/asm/prom.h
+++ b/arch/powerpc/include/asm/prom.h
@@ -24,10 +24,6 @@
  * OF address retreival & translation
  */
 
-/* Translate a DMA address from device space to CPU space */
-extern u64 of_translate_dma_address(struct device_node *dev,
-				    const __be32 *in_addr);
-
 /* Parse the ibm,dma-window property of an OF node into the busno, phys and
  * size parameters.
  */
diff --git a/include/linux/of_address.h b/include/linux/of_address.h
index e8a179773a1a..5f6ed6b182b8 100644
--- a/include/linux/of_address.h
+++ b/include/linux/of_address.h
@@ -34,6 +34,10 @@ static inline void of_pci_range_to_resource(struct of_pci_range *range,
 	res->name = np->full_name;
 }
 
+/* Translate a DMA address from device space to CPU space */
+extern u64 of_translate_dma_address(struct device_node *dev,
+				    const __be32 *in_addr);
+
 #ifdef CONFIG_OF_ADDRESS
 extern u64 of_translate_address(struct device_node *np, const __be32 *addr);
 extern bool of_can_translate_address(struct device_node *dev);
-- 
cgit v1.2.3


From b5b4bb3f6a11f9c37b6d53138244f2ffe5bacd12 Mon Sep 17 00:00:00 2001
From: Rob Herring <rob.herring@calxeda.com>
Date: Sat, 7 Sep 2013 14:08:20 -0500
Subject: of: only include prom.h on sparc

The dependency on prom.h by the core DT code is now removed and only
sparc needs to include prom.h for the core code.

Signed-off-by: Rob Herring <rob.herring@calxeda.com>
Acked-by: Grant Likely <grant.likely@linaro.org>
---
 include/linux/of.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/of.h b/include/linux/of.h
index 4d294a0b8a57..54017b83650b 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -136,7 +136,9 @@ static inline unsigned long of_read_ulong(const __be32 *cell, int size)
 	return of_read_number(cell, size);
 }
 
+#if defined(CONFIG_SPARC)
 #include <asm/prom.h>
+#endif
 
 /* Default #address and #size cells.  Allow arch asm/prom.h to override */
 #if !defined(OF_ROOT_NODE_ADDR_CELLS_DEFAULT)
-- 
cgit v1.2.3


From 634fb979e8f3a70f04c1f2f519d0cd1142eb5c1a Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 9 Oct 2013 15:21:29 -0700
Subject: inet: includes a sock_common in request_sock

TCP listener refactoring, part 5 :

We want to be able to insert request sockets (SYN_RECV) into main
ehash table instead of the per listener hash table to allow RCU
lookups and remove listener lock contention.

This patch includes the needed struct sock_common in front
of struct request_sock

This means there is no more inet6_request_sock IPv6 specific
structure.

Following inet_request_sock fields were renamed as they became
macros to reference fields from struct sock_common.
Prefix ir_ was chosen to avoid name collisions.

loc_port   -> ir_loc_port
loc_addr   -> ir_loc_addr
rmt_addr   -> ir_rmt_addr
rmt_port   -> ir_rmt_port
iif        -> ir_iif

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ipv6.h             | 26 ++---------------
 include/net/inet_sock.h          | 16 +++++-----
 include/net/request_sock.h       |  1 +
 include/net/tcp.h                |  4 +--
 net/dccp/ipv4.c                  | 18 ++++++------
 net/dccp/ipv6.c                  | 63 ++++++++++++++++++++--------------------
 net/dccp/ipv6.h                  |  1 -
 net/dccp/minisocks.c             |  4 +--
 net/dccp/output.c                |  4 +--
 net/ipv4/inet_connection_sock.c  | 23 ++++++++-------
 net/ipv4/inet_diag.c             | 22 +++++++-------
 net/ipv4/syncookies.c            | 12 ++++----
 net/ipv4/tcp_ipv4.c              | 38 ++++++++++++------------
 net/ipv4/tcp_metrics.c           |  8 +++--
 net/ipv4/tcp_output.c            |  4 +--
 net/ipv6/inet6_connection_sock.c | 26 ++++++++---------
 net/ipv6/syncookies.c            | 24 +++++++--------
 net/ipv6/tcp_ipv6.c              | 61 +++++++++++++++++++-------------------
 net/netlabel/netlabel_kapi.c     |  2 +-
 19 files changed, 169 insertions(+), 188 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 35f6c1b562c4..a80a63cfb70c 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -115,16 +115,8 @@ static inline int inet6_iif(const struct sk_buff *skb)
 	return IP6CB(skb)->iif;
 }
 
-struct inet6_request_sock {
-	struct in6_addr		loc_addr;
-	struct in6_addr		rmt_addr;
-	struct sk_buff		*pktopts;
-	int			iif;
-};
-
 struct tcp6_request_sock {
 	struct tcp_request_sock	  tcp6rsk_tcp;
-	struct inet6_request_sock tcp6rsk_inet6;
 };
 
 struct ipv6_mc_socklist;
@@ -264,26 +256,12 @@ static inline struct ipv6_pinfo * inet6_sk(const struct sock *__sk)
 	return inet_sk(__sk)->pinet6;
 }
 
-static inline struct inet6_request_sock *
-			inet6_rsk(const struct request_sock *rsk)
-{
-	return (struct inet6_request_sock *)(((u8 *)rsk) +
-					     inet_rsk(rsk)->inet6_rsk_offset);
-}
-
-static inline u32 inet6_rsk_offset(struct request_sock *rsk)
-{
-	return rsk->rsk_ops->obj_size - sizeof(struct inet6_request_sock);
-}
-
 static inline struct request_sock *inet6_reqsk_alloc(struct request_sock_ops *ops)
 {
 	struct request_sock *req = reqsk_alloc(ops);
 
-	if (req != NULL) {
-		inet_rsk(req)->inet6_rsk_offset = inet6_rsk_offset(req);
-		inet6_rsk(req)->pktopts = NULL;
-	}
+	if (req)
+		inet_rsk(req)->pktopts = NULL;
 
 	return req;
 }
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index 6d9a7e6eb5a4..f91204442efa 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -70,13 +70,14 @@ struct ip_options_data {
 
 struct inet_request_sock {
 	struct request_sock	req;
-#if IS_ENABLED(CONFIG_IPV6)
-	u16			inet6_rsk_offset;
-#endif
-	__be16			loc_port;
-	__be32			loc_addr;
-	__be32			rmt_addr;
-	__be16			rmt_port;
+#define ir_loc_addr		req.__req_common.skc_rcv_saddr
+#define ir_rmt_addr		req.__req_common.skc_daddr
+#define ir_loc_port		req.__req_common.skc_num
+#define ir_rmt_port		req.__req_common.skc_dport
+#define ir_v6_rmt_addr		req.__req_common.skc_v6_daddr
+#define ir_v6_loc_addr		req.__req_common.skc_v6_rcv_saddr
+#define ir_iif			req.__req_common.skc_bound_dev_if
+
 	kmemcheck_bitfield_begin(flags);
 	u16			snd_wscale : 4,
 				rcv_wscale : 4,
@@ -88,6 +89,7 @@ struct inet_request_sock {
 				no_srccheck: 1;
 	kmemcheck_bitfield_end(flags);
 	struct ip_options_rcu	*opt;
+	struct sk_buff		*pktopts;
 };
 
 static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk)
diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index 65c3e5164a5c..7f830ff67f08 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -48,6 +48,7 @@ int inet_rtx_syn_ack(struct sock *parent, struct request_sock *req);
 /* struct request_sock - mini sock to represent a connection request
  */
 struct request_sock {
+	struct sock_common		__req_common;
 	struct request_sock		*dl_next;
 	u16				mss;
 	u8				num_retrans; /* number of retransmits */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 39bbfa1602b2..24a06161d174 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1109,8 +1109,8 @@ static inline void tcp_openreq_init(struct request_sock *req,
 	ireq->wscale_ok = rx_opt->wscale_ok;
 	ireq->acked = 0;
 	ireq->ecn_ok = 0;
-	ireq->rmt_port = tcp_hdr(skb)->source;
-	ireq->loc_port = tcp_hdr(skb)->dest;
+	ireq->ir_rmt_port = tcp_hdr(skb)->source;
+	ireq->ir_loc_port = tcp_hdr(skb)->dest;
 }
 
 void tcp_enter_memory_pressure(struct sock *sk);
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index ebc54fef85a5..720c36225ed9 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -409,9 +409,9 @@ struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb,
 
 	newinet		   = inet_sk(newsk);
 	ireq		   = inet_rsk(req);
-	newinet->inet_daddr	= ireq->rmt_addr;
-	newinet->inet_rcv_saddr = ireq->loc_addr;
-	newinet->inet_saddr	= ireq->loc_addr;
+	newinet->inet_daddr	= ireq->ir_rmt_addr;
+	newinet->inet_rcv_saddr = ireq->ir_loc_addr;
+	newinet->inet_saddr	= ireq->ir_loc_addr;
 	newinet->inet_opt	= ireq->opt;
 	ireq->opt	   = NULL;
 	newinet->mc_index  = inet_iif(skb);
@@ -516,10 +516,10 @@ static int dccp_v4_send_response(struct sock *sk, struct request_sock *req)
 		const struct inet_request_sock *ireq = inet_rsk(req);
 		struct dccp_hdr *dh = dccp_hdr(skb);
 
-		dh->dccph_checksum = dccp_v4_csum_finish(skb, ireq->loc_addr,
-							      ireq->rmt_addr);
-		err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
-					    ireq->rmt_addr,
+		dh->dccph_checksum = dccp_v4_csum_finish(skb, ireq->ir_loc_addr,
+							      ireq->ir_rmt_addr);
+		err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
+					    ireq->ir_rmt_addr,
 					    ireq->opt);
 		err = net_xmit_eval(err);
 	}
@@ -641,8 +641,8 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 		goto drop_and_free;
 
 	ireq = inet_rsk(req);
-	ireq->loc_addr = ip_hdr(skb)->daddr;
-	ireq->rmt_addr = ip_hdr(skb)->saddr;
+	ireq->ir_loc_addr = ip_hdr(skb)->daddr;
+	ireq->ir_rmt_addr = ip_hdr(skb)->saddr;
 
 	/*
 	 * Step 3: Process LISTEN state
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 7f075b83128a..5cc5b24a956e 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -216,7 +216,7 @@ out:
 
 static int dccp_v6_send_response(struct sock *sk, struct request_sock *req)
 {
-	struct inet6_request_sock *ireq6 = inet6_rsk(req);
+	struct inet_request_sock *ireq = inet_rsk(req);
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct sk_buff *skb;
 	struct in6_addr *final_p, final;
@@ -226,12 +226,12 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req)
 
 	memset(&fl6, 0, sizeof(fl6));
 	fl6.flowi6_proto = IPPROTO_DCCP;
-	fl6.daddr = ireq6->rmt_addr;
-	fl6.saddr = ireq6->loc_addr;
+	fl6.daddr = ireq->ir_v6_rmt_addr;
+	fl6.saddr = ireq->ir_v6_loc_addr;
 	fl6.flowlabel = 0;
-	fl6.flowi6_oif = ireq6->iif;
-	fl6.fl6_dport = inet_rsk(req)->rmt_port;
-	fl6.fl6_sport = inet_rsk(req)->loc_port;
+	fl6.flowi6_oif = ireq->ir_iif;
+	fl6.fl6_dport = ireq->ir_rmt_port;
+	fl6.fl6_sport = ireq->ir_loc_port;
 	security_req_classify_flow(req, flowi6_to_flowi(&fl6));
 
 
@@ -249,9 +249,9 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req)
 		struct dccp_hdr *dh = dccp_hdr(skb);
 
 		dh->dccph_checksum = dccp_v6_csum_finish(skb,
-							 &ireq6->loc_addr,
-							 &ireq6->rmt_addr);
-		fl6.daddr = ireq6->rmt_addr;
+							 &ireq->ir_v6_loc_addr,
+							 &ireq->ir_v6_rmt_addr);
+		fl6.daddr = ireq->ir_v6_rmt_addr;
 		err = ip6_xmit(sk, skb, &fl6, np->opt, np->tclass);
 		err = net_xmit_eval(err);
 	}
@@ -264,8 +264,7 @@ done:
 static void dccp_v6_reqsk_destructor(struct request_sock *req)
 {
 	dccp_feat_list_purge(&dccp_rsk(req)->dreq_featneg);
-	if (inet6_rsk(req)->pktopts != NULL)
-		kfree_skb(inet6_rsk(req)->pktopts);
+	kfree_skb(inet_rsk(req)->pktopts);
 }
 
 static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb)
@@ -359,7 +358,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 {
 	struct request_sock *req;
 	struct dccp_request_sock *dreq;
-	struct inet6_request_sock *ireq6;
+	struct inet_request_sock *ireq;
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	const __be32 service = dccp_hdr_request(skb)->dccph_req_service;
 	struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
@@ -398,22 +397,22 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 	if (security_inet_conn_request(sk, skb, req))
 		goto drop_and_free;
 
-	ireq6 = inet6_rsk(req);
-	ireq6->rmt_addr = ipv6_hdr(skb)->saddr;
-	ireq6->loc_addr = ipv6_hdr(skb)->daddr;
+	ireq = inet_rsk(req);
+	ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
+	ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
 
 	if (ipv6_opt_accepted(sk, skb) ||
 	    np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
 	    np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
 		atomic_inc(&skb->users);
-		ireq6->pktopts = skb;
+		ireq->pktopts = skb;
 	}
-	ireq6->iif = sk->sk_bound_dev_if;
+	ireq->ir_iif = sk->sk_bound_dev_if;
 
 	/* So that link locals have meaning */
 	if (!sk->sk_bound_dev_if &&
-	    ipv6_addr_type(&ireq6->rmt_addr) & IPV6_ADDR_LINKLOCAL)
-		ireq6->iif = inet6_iif(skb);
+	    ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
+		ireq->ir_iif = inet6_iif(skb);
 
 	/*
 	 * Step 3: Process LISTEN state
@@ -446,7 +445,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
 					      struct request_sock *req,
 					      struct dst_entry *dst)
 {
-	struct inet6_request_sock *ireq6 = inet6_rsk(req);
+	struct inet_request_sock *ireq = inet_rsk(req);
 	struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
 	struct inet_sock *newinet;
 	struct dccp6_sock *newdp6;
@@ -505,12 +504,12 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
 
 		memset(&fl6, 0, sizeof(fl6));
 		fl6.flowi6_proto = IPPROTO_DCCP;
-		fl6.daddr = ireq6->rmt_addr;
+		fl6.daddr = ireq->ir_v6_rmt_addr;
 		final_p = fl6_update_dst(&fl6, np->opt, &final);
-		fl6.saddr = ireq6->loc_addr;
+		fl6.saddr = ireq->ir_v6_loc_addr;
 		fl6.flowi6_oif = sk->sk_bound_dev_if;
-		fl6.fl6_dport = inet_rsk(req)->rmt_port;
-		fl6.fl6_sport = inet_rsk(req)->loc_port;
+		fl6.fl6_dport = ireq->ir_rmt_port;
+		fl6.fl6_sport = ireq->ir_loc_port;
 		security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
 
 		dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false);
@@ -538,10 +537,10 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
 
 	memcpy(newnp, np, sizeof(struct ipv6_pinfo));
 
-	newsk->sk_v6_daddr = ireq6->rmt_addr;
-	newnp->saddr = ireq6->loc_addr;
-	newsk->sk_v6_rcv_saddr = ireq6->loc_addr;
-	newsk->sk_bound_dev_if = ireq6->iif;
+	newsk->sk_v6_daddr	= ireq->ir_v6_rmt_addr;
+	newnp->saddr		= ireq->ir_v6_loc_addr;
+	newsk->sk_v6_rcv_saddr	= ireq->ir_v6_loc_addr;
+	newsk->sk_bound_dev_if	= ireq->ir_iif;
 
 	/* Now IPv6 options...
 
@@ -554,10 +553,10 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
 
 	/* Clone pktoptions received with SYN */
 	newnp->pktoptions = NULL;
-	if (ireq6->pktopts != NULL) {
-		newnp->pktoptions = skb_clone(ireq6->pktopts, GFP_ATOMIC);
-		consume_skb(ireq6->pktopts);
-		ireq6->pktopts = NULL;
+	if (ireq->pktopts != NULL) {
+		newnp->pktoptions = skb_clone(ireq->pktopts, GFP_ATOMIC);
+		consume_skb(ireq->pktopts);
+		ireq->pktopts = NULL;
 		if (newnp->pktoptions)
 			skb_set_owner_r(newnp->pktoptions, newsk);
 	}
diff --git a/net/dccp/ipv6.h b/net/dccp/ipv6.h
index 6604fc3fe953..af259e15e7f0 100644
--- a/net/dccp/ipv6.h
+++ b/net/dccp/ipv6.h
@@ -25,7 +25,6 @@ struct dccp6_sock {
 
 struct dccp6_request_sock {
 	struct dccp_request_sock  dccp;
-	struct inet6_request_sock inet6;
 };
 
 struct dccp6_timewait_sock {
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index 32e80d96d4c0..66afbcec2941 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -266,8 +266,8 @@ int dccp_reqsk_init(struct request_sock *req,
 {
 	struct dccp_request_sock *dreq = dccp_rsk(req);
 
-	inet_rsk(req)->rmt_port	  = dccp_hdr(skb)->dccph_sport;
-	inet_rsk(req)->loc_port	  = dccp_hdr(skb)->dccph_dport;
+	inet_rsk(req)->ir_rmt_port	  = dccp_hdr(skb)->dccph_sport;
+	inet_rsk(req)->ir_loc_port	  = dccp_hdr(skb)->dccph_dport;
 	inet_rsk(req)->acked	  = 0;
 	dreq->dreq_timestamp_echo = 0;
 
diff --git a/net/dccp/output.c b/net/dccp/output.c
index d17fc90a74b6..9bf195d1b87a 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -424,8 +424,8 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst,
 	/* Build and checksum header */
 	dh = dccp_zeroed_hdr(skb, dccp_header_size);
 
-	dh->dccph_sport	= inet_rsk(req)->loc_port;
-	dh->dccph_dport	= inet_rsk(req)->rmt_port;
+	dh->dccph_sport	= inet_rsk(req)->ir_loc_port;
+	dh->dccph_dport	= inet_rsk(req)->ir_rmt_port;
 	dh->dccph_doff	= (dccp_header_size +
 			   DCCP_SKB_CB(skb)->dccpd_opt_len) / 4;
 	dh->dccph_type	= DCCP_PKT_RESPONSE;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 56e82a4027b4..2ffd931d652f 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -412,8 +412,8 @@ struct dst_entry *inet_csk_route_req(struct sock *sk,
 			   RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
 			   sk->sk_protocol,
 			   flags,
-			   (opt && opt->opt.srr) ? opt->opt.faddr : ireq->rmt_addr,
-			   ireq->loc_addr, ireq->rmt_port, inet_sk(sk)->inet_sport);
+			   (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr,
+			   ireq->ir_loc_addr, ireq->ir_rmt_port, inet_sk(sk)->inet_sport);
 	security_req_classify_flow(req, flowi4_to_flowi(fl4));
 	rt = ip_route_output_flow(net, fl4, sk);
 	if (IS_ERR(rt))
@@ -448,8 +448,8 @@ struct dst_entry *inet_csk_route_child_sock(struct sock *sk,
 	flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
 			   RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
 			   sk->sk_protocol, inet_sk_flowi_flags(sk),
-			   (opt && opt->opt.srr) ? opt->opt.faddr : ireq->rmt_addr,
-			   ireq->loc_addr, ireq->rmt_port, inet_sk(sk)->inet_sport);
+			   (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr,
+			   ireq->ir_loc_addr, ireq->ir_rmt_port, inet_sk(sk)->inet_sport);
 	security_req_classify_flow(req, flowi4_to_flowi(fl4));
 	rt = ip_route_output_flow(net, fl4, sk);
 	if (IS_ERR(rt))
@@ -495,9 +495,9 @@ struct request_sock *inet_csk_search_req(const struct sock *sk,
 	     prev = &req->dl_next) {
 		const struct inet_request_sock *ireq = inet_rsk(req);
 
-		if (ireq->rmt_port == rport &&
-		    ireq->rmt_addr == raddr &&
-		    ireq->loc_addr == laddr &&
+		if (ireq->ir_rmt_port == rport &&
+		    ireq->ir_rmt_addr == raddr &&
+		    ireq->ir_loc_addr == laddr &&
 		    AF_INET_FAMILY(req->rsk_ops->family)) {
 			WARN_ON(req->sk);
 			*prevp = prev;
@@ -514,7 +514,8 @@ void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
-	const u32 h = inet_synq_hash(inet_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port,
+	const u32 h = inet_synq_hash(inet_rsk(req)->ir_rmt_addr,
+				     inet_rsk(req)->ir_rmt_port,
 				     lopt->hash_rnd, lopt->nr_table_entries);
 
 	reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, timeout);
@@ -674,9 +675,9 @@ struct sock *inet_csk_clone_lock(const struct sock *sk,
 		newsk->sk_state = TCP_SYN_RECV;
 		newicsk->icsk_bind_hash = NULL;
 
-		inet_sk(newsk)->inet_dport = inet_rsk(req)->rmt_port;
-		inet_sk(newsk)->inet_num = ntohs(inet_rsk(req)->loc_port);
-		inet_sk(newsk)->inet_sport = inet_rsk(req)->loc_port;
+		inet_sk(newsk)->inet_dport = inet_rsk(req)->ir_rmt_port;
+		inet_sk(newsk)->inet_num = ntohs(inet_rsk(req)->ir_loc_port);
+		inet_sk(newsk)->inet_sport = inet_rsk(req)->ir_loc_port;
 		newsk->sk_write_space = sk_stream_write_space;
 
 		newicsk->icsk_retransmits = 0;
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index ecc179d676e4..41e1c3ea8b51 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -679,12 +679,12 @@ static inline void inet_diag_req_addrs(const struct sock *sk,
 #if IS_ENABLED(CONFIG_IPV6)
 	if (sk->sk_family == AF_INET6) {
 		if (req->rsk_ops->family == AF_INET6) {
-			entry->saddr = inet6_rsk(req)->loc_addr.s6_addr32;
-			entry->daddr = inet6_rsk(req)->rmt_addr.s6_addr32;
+			entry->saddr = ireq->ir_v6_loc_addr.s6_addr32;
+			entry->daddr = ireq->ir_v6_rmt_addr.s6_addr32;
 		} else if (req->rsk_ops->family == AF_INET) {
-			ipv6_addr_set_v4mapped(ireq->loc_addr,
+			ipv6_addr_set_v4mapped(ireq->ir_loc_addr,
 					       &entry->saddr_storage);
-			ipv6_addr_set_v4mapped(ireq->rmt_addr,
+			ipv6_addr_set_v4mapped(ireq->ir_rmt_addr,
 					       &entry->daddr_storage);
 			entry->saddr = entry->saddr_storage.s6_addr32;
 			entry->daddr = entry->daddr_storage.s6_addr32;
@@ -692,8 +692,8 @@ static inline void inet_diag_req_addrs(const struct sock *sk,
 	} else
 #endif
 	{
-		entry->saddr = &ireq->loc_addr;
-		entry->daddr = &ireq->rmt_addr;
+		entry->saddr = &ireq->ir_loc_addr;
+		entry->daddr = &ireq->ir_rmt_addr;
 	}
 }
 
@@ -728,9 +728,9 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk,
 		tmo = 0;
 
 	r->id.idiag_sport = inet->inet_sport;
-	r->id.idiag_dport = ireq->rmt_port;
-	r->id.idiag_src[0] = ireq->loc_addr;
-	r->id.idiag_dst[0] = ireq->rmt_addr;
+	r->id.idiag_dport = ireq->ir_rmt_port;
+	r->id.idiag_src[0] = ireq->ir_loc_addr;
+	r->id.idiag_dst[0] = ireq->ir_rmt_addr;
 	r->idiag_expires = jiffies_to_msecs(tmo);
 	r->idiag_rqueue = 0;
 	r->idiag_wqueue = 0;
@@ -789,13 +789,13 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
 
 			if (reqnum < s_reqnum)
 				continue;
-			if (r->id.idiag_dport != ireq->rmt_port &&
+			if (r->id.idiag_dport != ireq->ir_rmt_port &&
 			    r->id.idiag_dport)
 				continue;
 
 			if (bc) {
 				inet_diag_req_addrs(sk, req, &entry);
-				entry.dport = ntohs(ireq->rmt_port);
+				entry.dport = ntohs(ireq->ir_rmt_port);
 
 				if (!inet_diag_bc_run(bc, &entry))
 					continue;
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 15e024105f91..984e21cf3c50 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -304,10 +304,10 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 	treq->rcv_isn		= ntohl(th->seq) - 1;
 	treq->snt_isn		= cookie;
 	req->mss		= mss;
-	ireq->loc_port		= th->dest;
-	ireq->rmt_port		= th->source;
-	ireq->loc_addr		= ip_hdr(skb)->daddr;
-	ireq->rmt_addr		= ip_hdr(skb)->saddr;
+	ireq->ir_loc_port		= th->dest;
+	ireq->ir_rmt_port		= th->source;
+	ireq->ir_loc_addr		= ip_hdr(skb)->daddr;
+	ireq->ir_rmt_addr		= ip_hdr(skb)->saddr;
 	ireq->ecn_ok		= ecn_ok;
 	ireq->snd_wscale	= tcp_opt.snd_wscale;
 	ireq->sack_ok		= tcp_opt.sack_ok;
@@ -347,8 +347,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 	flowi4_init_output(&fl4, sk->sk_bound_dev_if, sk->sk_mark,
 			   RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, IPPROTO_TCP,
 			   inet_sk_flowi_flags(sk),
-			   (opt && opt->srr) ? opt->faddr : ireq->rmt_addr,
-			   ireq->loc_addr, th->source, th->dest);
+			   (opt && opt->srr) ? opt->faddr : ireq->ir_rmt_addr,
+			   ireq->ir_loc_addr, th->source, th->dest);
 	security_req_classify_flow(req, flowi4_to_flowi(&fl4));
 	rt = ip_route_output_key(sock_net(sk), &fl4);
 	if (IS_ERR(rt)) {
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index e4695dde1af6..114d1b748cbb 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -835,11 +835,11 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
 	skb = tcp_make_synack(sk, dst, req, NULL);
 
 	if (skb) {
-		__tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr);
+		__tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
 
 		skb_set_queue_mapping(skb, queue_mapping);
-		err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
-					    ireq->rmt_addr,
+		err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
+					    ireq->ir_rmt_addr,
 					    ireq->opt);
 		err = net_xmit_eval(err);
 		if (!tcp_rsk(req)->snt_synack && !err)
@@ -972,7 +972,7 @@ static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
 {
 	union tcp_md5_addr *addr;
 
-	addr = (union tcp_md5_addr *)&inet_rsk(req)->rmt_addr;
+	addr = (union tcp_md5_addr *)&inet_rsk(req)->ir_rmt_addr;
 	return tcp_md5_do_lookup(sk, addr, AF_INET);
 }
 
@@ -1149,8 +1149,8 @@ int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
 		saddr = inet_sk(sk)->inet_saddr;
 		daddr = inet_sk(sk)->inet_daddr;
 	} else if (req) {
-		saddr = inet_rsk(req)->loc_addr;
-		daddr = inet_rsk(req)->rmt_addr;
+		saddr = inet_rsk(req)->ir_loc_addr;
+		daddr = inet_rsk(req)->ir_rmt_addr;
 	} else {
 		const struct iphdr *iph = ip_hdr(skb);
 		saddr = iph->saddr;
@@ -1366,8 +1366,8 @@ static int tcp_v4_conn_req_fastopen(struct sock *sk,
 		kfree_skb(skb_synack);
 		return -1;
 	}
-	err = ip_build_and_send_pkt(skb_synack, sk, ireq->loc_addr,
-				    ireq->rmt_addr, ireq->opt);
+	err = ip_build_and_send_pkt(skb_synack, sk, ireq->ir_loc_addr,
+				    ireq->ir_rmt_addr, ireq->opt);
 	err = net_xmit_eval(err);
 	if (!err)
 		tcp_rsk(req)->snt_synack = tcp_time_stamp;
@@ -1502,8 +1502,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	tcp_openreq_init(req, &tmp_opt, skb);
 
 	ireq = inet_rsk(req);
-	ireq->loc_addr = daddr;
-	ireq->rmt_addr = saddr;
+	ireq->ir_loc_addr = daddr;
+	ireq->ir_rmt_addr = saddr;
 	ireq->no_srccheck = inet_sk(sk)->transparent;
 	ireq->opt = tcp_v4_save_options(skb);
 
@@ -1578,15 +1578,15 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	    fastopen_cookie_present(&valid_foc) ? &valid_foc : NULL);
 
 	if (skb_synack) {
-		__tcp_v4_send_check(skb_synack, ireq->loc_addr, ireq->rmt_addr);
+		__tcp_v4_send_check(skb_synack, ireq->ir_loc_addr, ireq->ir_rmt_addr);
 		skb_set_queue_mapping(skb_synack, skb_get_queue_mapping(skb));
 	} else
 		goto drop_and_free;
 
 	if (likely(!do_fastopen)) {
 		int err;
-		err = ip_build_and_send_pkt(skb_synack, sk, ireq->loc_addr,
-		     ireq->rmt_addr, ireq->opt);
+		err = ip_build_and_send_pkt(skb_synack, sk, ireq->ir_loc_addr,
+		     ireq->ir_rmt_addr, ireq->opt);
 		err = net_xmit_eval(err);
 		if (err || want_cookie)
 			goto drop_and_free;
@@ -1644,9 +1644,9 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 	newtp		      = tcp_sk(newsk);
 	newinet		      = inet_sk(newsk);
 	ireq		      = inet_rsk(req);
-	newinet->inet_daddr   = ireq->rmt_addr;
-	newinet->inet_rcv_saddr = ireq->loc_addr;
-	newinet->inet_saddr	      = ireq->loc_addr;
+	newinet->inet_daddr   = ireq->ir_rmt_addr;
+	newinet->inet_rcv_saddr = ireq->ir_loc_addr;
+	newinet->inet_saddr	      = ireq->ir_loc_addr;
 	inet_opt	      = ireq->opt;
 	rcu_assign_pointer(newinet->inet_opt, inet_opt);
 	ireq->opt	      = NULL;
@@ -2548,10 +2548,10 @@ static void get_openreq4(const struct sock *sk, const struct request_sock *req,
 	seq_printf(f, "%4d: %08X:%04X %08X:%04X"
 		" %02X %08X:%08X %02X:%08lX %08X %5u %8d %u %d %pK%n",
 		i,
-		ireq->loc_addr,
+		ireq->ir_loc_addr,
 		ntohs(inet_sk(sk)->inet_sport),
-		ireq->rmt_addr,
-		ntohs(ireq->rmt_port),
+		ireq->ir_rmt_addr,
+		ntohs(ireq->ir_rmt_port),
 		TCP_SYN_RECV,
 		0, 0, /* could print option size, but that is af dependent. */
 		1,    /* timers active (only the expire timer) */
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index 8fcc2cb9dba4..4a2a84110dfb 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -215,13 +215,15 @@ static struct tcp_metrics_block *__tcp_get_metrics_req(struct request_sock *req,
 	addr.family = req->rsk_ops->family;
 	switch (addr.family) {
 	case AF_INET:
-		addr.addr.a4 = inet_rsk(req)->rmt_addr;
+		addr.addr.a4 = inet_rsk(req)->ir_rmt_addr;
 		hash = (__force unsigned int) addr.addr.a4;
 		break;
+#if IS_ENABLED(CONFIG_IPV6)
 	case AF_INET6:
-		*(struct in6_addr *)addr.addr.a6 = inet6_rsk(req)->rmt_addr;
-		hash = ipv6_addr_hash(&inet6_rsk(req)->rmt_addr);
+		*(struct in6_addr *)addr.addr.a6 = inet_rsk(req)->ir_v6_rmt_addr;
+		hash = ipv6_addr_hash(&inet_rsk(req)->ir_v6_rmt_addr);
 		break;
+#endif
 	default:
 		return NULL;
 	}
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index c6f01f2cdb32..faec81353522 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2734,8 +2734,8 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 	th->syn = 1;
 	th->ack = 1;
 	TCP_ECN_make_synack(req, th);
-	th->source = ireq->loc_port;
-	th->dest = ireq->rmt_port;
+	th->source = ireq->ir_loc_port;
+	th->dest = ireq->ir_rmt_port;
 	/* Setting of flags are superfluous here for callers (and ECE is
 	 * not even correctly set)
 	 */
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index b7400b480e74..1317c569b58f 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -70,20 +70,20 @@ struct dst_entry *inet6_csk_route_req(struct sock *sk,
 				      struct flowi6 *fl6,
 				      const struct request_sock *req)
 {
-	struct inet6_request_sock *treq = inet6_rsk(req);
+	struct inet_request_sock *ireq = inet_rsk(req);
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct in6_addr *final_p, final;
 	struct dst_entry *dst;
 
 	memset(fl6, 0, sizeof(*fl6));
 	fl6->flowi6_proto = IPPROTO_TCP;
-	fl6->daddr = treq->rmt_addr;
+	fl6->daddr = ireq->ir_v6_rmt_addr;
 	final_p = fl6_update_dst(fl6, np->opt, &final);
-	fl6->saddr = treq->loc_addr;
-	fl6->flowi6_oif = treq->iif;
+	fl6->saddr = ireq->ir_v6_loc_addr;
+	fl6->flowi6_oif = ireq->ir_iif;
 	fl6->flowi6_mark = sk->sk_mark;
-	fl6->fl6_dport = inet_rsk(req)->rmt_port;
-	fl6->fl6_sport = inet_rsk(req)->loc_port;
+	fl6->fl6_dport = ireq->ir_rmt_port;
+	fl6->fl6_sport = ireq->ir_loc_port;
 	security_req_classify_flow(req, flowi6_to_flowi(fl6));
 
 	dst = ip6_dst_lookup_flow(sk, fl6, final_p, false);
@@ -129,13 +129,13 @@ struct request_sock *inet6_csk_search_req(const struct sock *sk,
 						     lopt->nr_table_entries)];
 	     (req = *prev) != NULL;
 	     prev = &req->dl_next) {
-		const struct inet6_request_sock *treq = inet6_rsk(req);
+		const struct inet_request_sock *ireq = inet_rsk(req);
 
-		if (inet_rsk(req)->rmt_port == rport &&
+		if (ireq->ir_rmt_port == rport &&
 		    req->rsk_ops->family == AF_INET6 &&
-		    ipv6_addr_equal(&treq->rmt_addr, raddr) &&
-		    ipv6_addr_equal(&treq->loc_addr, laddr) &&
-		    (!treq->iif || treq->iif == iif)) {
+		    ipv6_addr_equal(&ireq->ir_v6_rmt_addr, raddr) &&
+		    ipv6_addr_equal(&ireq->ir_v6_loc_addr, laddr) &&
+		    (!ireq->ir_iif || ireq->ir_iif == iif)) {
 			WARN_ON(req->sk != NULL);
 			*prevp = prev;
 			return req;
@@ -153,8 +153,8 @@ void inet6_csk_reqsk_queue_hash_add(struct sock *sk,
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
-	const u32 h = inet6_synq_hash(&inet6_rsk(req)->rmt_addr,
-				      inet_rsk(req)->rmt_port,
+	const u32 h = inet6_synq_hash(&inet_rsk(req)->ir_v6_rmt_addr,
+				      inet_rsk(req)->ir_rmt_port,
 				      lopt->hash_rnd, lopt->nr_table_entries);
 
 	reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, timeout);
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index d703218a653b..bc5698f9e4cd 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -150,7 +150,6 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 {
 	struct tcp_options_received tcp_opt;
 	struct inet_request_sock *ireq;
-	struct inet6_request_sock *ireq6;
 	struct tcp_request_sock *treq;
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
@@ -187,7 +186,6 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 		goto out;
 
 	ireq = inet_rsk(req);
-	ireq6 = inet6_rsk(req);
 	treq = tcp_rsk(req);
 	treq->listener = NULL;
 
@@ -195,22 +193,22 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 		goto out_free;
 
 	req->mss = mss;
-	ireq->rmt_port = th->source;
-	ireq->loc_port = th->dest;
-	ireq6->rmt_addr = ipv6_hdr(skb)->saddr;
-	ireq6->loc_addr = ipv6_hdr(skb)->daddr;
+	ireq->ir_rmt_port = th->source;
+	ireq->ir_loc_port = th->dest;
+	ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
+	ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
 	if (ipv6_opt_accepted(sk, skb) ||
 	    np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
 	    np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
 		atomic_inc(&skb->users);
-		ireq6->pktopts = skb;
+		ireq->pktopts = skb;
 	}
 
-	ireq6->iif = sk->sk_bound_dev_if;
+	ireq->ir_iif = sk->sk_bound_dev_if;
 	/* So that link locals have meaning */
 	if (!sk->sk_bound_dev_if &&
-	    ipv6_addr_type(&ireq6->rmt_addr) & IPV6_ADDR_LINKLOCAL)
-		ireq6->iif = inet6_iif(skb);
+	    ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
+		ireq->ir_iif = inet6_iif(skb);
 
 	req->expires = 0UL;
 	req->num_retrans = 0;
@@ -234,12 +232,12 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 		struct flowi6 fl6;
 		memset(&fl6, 0, sizeof(fl6));
 		fl6.flowi6_proto = IPPROTO_TCP;
-		fl6.daddr = ireq6->rmt_addr;
+		fl6.daddr = ireq->ir_v6_rmt_addr;
 		final_p = fl6_update_dst(&fl6, np->opt, &final);
-		fl6.saddr = ireq6->loc_addr;
+		fl6.saddr = ireq->ir_v6_loc_addr;
 		fl6.flowi6_oif = sk->sk_bound_dev_if;
 		fl6.flowi6_mark = sk->sk_mark;
-		fl6.fl6_dport = inet_rsk(req)->rmt_port;
+		fl6.fl6_dport = ireq->ir_rmt_port;
 		fl6.fl6_sport = inet_sk(sk)->inet_sport;
 		security_req_classify_flow(req, flowi6_to_flowi(&fl6));
 
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 541dfc40c7b3..db234d609b33 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -465,7 +465,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst,
 			      struct request_sock *req,
 			      u16 queue_mapping)
 {
-	struct inet6_request_sock *treq = inet6_rsk(req);
+	struct inet_request_sock *ireq = inet_rsk(req);
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct sk_buff * skb;
 	int err = -ENOMEM;
@@ -477,9 +477,10 @@ static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst,
 	skb = tcp_make_synack(sk, dst, req, NULL);
 
 	if (skb) {
-		__tcp_v6_send_check(skb, &treq->loc_addr, &treq->rmt_addr);
+		__tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
+				    &ireq->ir_v6_rmt_addr);
 
-		fl6->daddr = treq->rmt_addr;
+		fl6->daddr = ireq->ir_v6_rmt_addr;
 		skb_set_queue_mapping(skb, queue_mapping);
 		err = ip6_xmit(sk, skb, fl6, np->opt, np->tclass);
 		err = net_xmit_eval(err);
@@ -502,7 +503,7 @@ static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req)
 
 static void tcp_v6_reqsk_destructor(struct request_sock *req)
 {
-	kfree_skb(inet6_rsk(req)->pktopts);
+	kfree_skb(inet_rsk(req)->pktopts);
 }
 
 #ifdef CONFIG_TCP_MD5SIG
@@ -521,7 +522,7 @@ static struct tcp_md5sig_key *tcp_v6_md5_lookup(struct sock *sk,
 static struct tcp_md5sig_key *tcp_v6_reqsk_md5_lookup(struct sock *sk,
 						      struct request_sock *req)
 {
-	return tcp_v6_md5_do_lookup(sk, &inet6_rsk(req)->rmt_addr);
+	return tcp_v6_md5_do_lookup(sk, &inet_rsk(req)->ir_v6_rmt_addr);
 }
 
 static int tcp_v6_parse_md5_keys (struct sock *sk, char __user *optval,
@@ -623,8 +624,8 @@ static int tcp_v6_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
 		saddr = &inet6_sk(sk)->saddr;
 		daddr = &sk->sk_v6_daddr;
 	} else if (req) {
-		saddr = &inet6_rsk(req)->loc_addr;
-		daddr = &inet6_rsk(req)->rmt_addr;
+		saddr = &inet_rsk(req)->ir_v6_loc_addr;
+		daddr = &inet_rsk(req)->ir_v6_rmt_addr;
 	} else {
 		const struct ipv6hdr *ip6h = ipv6_hdr(skb);
 		saddr = &ip6h->saddr;
@@ -949,7 +950,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 {
 	struct tcp_options_received tmp_opt;
 	struct request_sock *req;
-	struct inet6_request_sock *treq;
+	struct inet_request_sock *ireq;
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
 	__u32 isn = TCP_SKB_CB(skb)->when;
@@ -994,25 +995,25 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 	tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
 	tcp_openreq_init(req, &tmp_opt, skb);
 
-	treq = inet6_rsk(req);
-	treq->rmt_addr = ipv6_hdr(skb)->saddr;
-	treq->loc_addr = ipv6_hdr(skb)->daddr;
+	ireq = inet_rsk(req);
+	ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
+	ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
 	if (!want_cookie || tmp_opt.tstamp_ok)
 		TCP_ECN_create_request(req, skb, sock_net(sk));
 
-	treq->iif = sk->sk_bound_dev_if;
+	ireq->ir_iif = sk->sk_bound_dev_if;
 
 	/* So that link locals have meaning */
 	if (!sk->sk_bound_dev_if &&
-	    ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
-		treq->iif = inet6_iif(skb);
+	    ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
+		ireq->ir_iif = inet6_iif(skb);
 
 	if (!isn) {
 		if (ipv6_opt_accepted(sk, skb) ||
 		    np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
 		    np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
 			atomic_inc(&skb->users);
-			treq->pktopts = skb;
+			ireq->pktopts = skb;
 		}
 
 		if (want_cookie) {
@@ -1051,7 +1052,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 			 * to the moment of synflood.
 			 */
 			LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI6/%u\n",
-				       &treq->rmt_addr, ntohs(tcp_hdr(skb)->source));
+				       &ireq->ir_v6_rmt_addr, ntohs(tcp_hdr(skb)->source));
 			goto drop_and_release;
 		}
 
@@ -1086,7 +1087,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 					  struct request_sock *req,
 					  struct dst_entry *dst)
 {
-	struct inet6_request_sock *treq;
+	struct inet_request_sock *ireq;
 	struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
 	struct tcp6_sock *newtcp6sk;
 	struct inet_sock *newinet;
@@ -1151,7 +1152,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 		return newsk;
 	}
 
-	treq = inet6_rsk(req);
+	ireq = inet_rsk(req);
 
 	if (sk_acceptq_is_full(sk))
 		goto out_overflow;
@@ -1185,10 +1186,10 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 
 	memcpy(newnp, np, sizeof(struct ipv6_pinfo));
 
-	newsk->sk_v6_daddr = treq->rmt_addr;
-	newnp->saddr = treq->loc_addr;
-	newsk->sk_v6_rcv_saddr = treq->loc_addr;
-	newsk->sk_bound_dev_if = treq->iif;
+	newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
+	newnp->saddr = ireq->ir_v6_loc_addr;
+	newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
+	newsk->sk_bound_dev_if = ireq->ir_iif;
 
 	/* Now IPv6 options...
 
@@ -1203,11 +1204,11 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 
 	/* Clone pktoptions received with SYN */
 	newnp->pktoptions = NULL;
-	if (treq->pktopts != NULL) {
-		newnp->pktoptions = skb_clone(treq->pktopts,
+	if (ireq->pktopts != NULL) {
+		newnp->pktoptions = skb_clone(ireq->pktopts,
 					      sk_gfp_atomic(sk, GFP_ATOMIC));
-		consume_skb(treq->pktopts);
-		treq->pktopts = NULL;
+		consume_skb(ireq->pktopts);
+		ireq->pktopts = NULL;
 		if (newnp->pktoptions)
 			skb_set_owner_r(newnp->pktoptions, newsk);
 	}
@@ -1722,8 +1723,8 @@ static void get_openreq6(struct seq_file *seq,
 			 const struct sock *sk, struct request_sock *req, int i, kuid_t uid)
 {
 	int ttd = req->expires - jiffies;
-	const struct in6_addr *src = &inet6_rsk(req)->loc_addr;
-	const struct in6_addr *dest = &inet6_rsk(req)->rmt_addr;
+	const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
+	const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
 
 	if (ttd < 0)
 		ttd = 0;
@@ -1734,10 +1735,10 @@ static void get_openreq6(struct seq_file *seq,
 		   i,
 		   src->s6_addr32[0], src->s6_addr32[1],
 		   src->s6_addr32[2], src->s6_addr32[3],
-		   ntohs(inet_rsk(req)->loc_port),
+		   ntohs(inet_rsk(req)->ir_loc_port),
 		   dest->s6_addr32[0], dest->s6_addr32[1],
 		   dest->s6_addr32[2], dest->s6_addr32[3],
-		   ntohs(inet_rsk(req)->rmt_port),
+		   ntohs(inet_rsk(req)->ir_rmt_port),
 		   TCP_SYN_RECV,
 		   0,0, /* could print option size, but that is af dependent. */
 		   1,   /* timers active (only the expire timer) */
diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c
index 96a458e12f60..dce1bebf7aec 100644
--- a/net/netlabel/netlabel_kapi.c
+++ b/net/netlabel/netlabel_kapi.c
@@ -817,7 +817,7 @@ int netlbl_req_setattr(struct request_sock *req,
 	switch (req->rsk_ops->family) {
 	case AF_INET:
 		entry = netlbl_domhsh_getentry_af4(secattr->domain,
-						   inet_rsk(req)->rmt_addr);
+						   inet_rsk(req)->ir_rmt_addr);
 		if (entry == NULL) {
 			ret_val = -ENOENT;
 			goto req_setattr_return;
-- 
cgit v1.2.3


From c1868b822515313c72445e70b7d9e47d8815bc52 Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@mellanox.com>
Date: Wed, 11 Sep 2013 16:35:25 +0300
Subject: mlx5: Remove checksum on command interface commands

Checksum calculations consume CPU resources and can be significant to
the rate of resource creation/destruction.

Signed-off-by: Eli Cohen <eli@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c  | 26 ++++++++++++++------------
 drivers/net/ethernet/mellanox/mlx5/core/main.c | 21 +++++----------------
 include/linux/mlx5/device.h                    |  2 +-
 include/linux/mlx5/driver.h                    |  4 +---
 4 files changed, 21 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 5472cbd34028..db3a6584939f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -180,28 +180,32 @@ static int verify_block_sig(struct mlx5_cmd_prot_block *block)
 	return 0;
 }
 
-static void calc_block_sig(struct mlx5_cmd_prot_block *block, u8 token)
+static void calc_block_sig(struct mlx5_cmd_prot_block *block, u8 token,
+			   int csum)
 {
 	block->token = token;
-	block->ctrl_sig = ~xor8_buf(block->rsvd0, sizeof(*block) - sizeof(block->data) - 2);
-	block->sig = ~xor8_buf(block, sizeof(*block) - 1);
+	if (csum) {
+		block->ctrl_sig = ~xor8_buf(block->rsvd0, sizeof(*block) -
+					    sizeof(block->data) - 2);
+		block->sig = ~xor8_buf(block, sizeof(*block) - 1);
+	}
 }
 
-static void calc_chain_sig(struct mlx5_cmd_msg *msg, u8 token)
+static void calc_chain_sig(struct mlx5_cmd_msg *msg, u8 token, int csum)
 {
 	struct mlx5_cmd_mailbox *next = msg->next;
 
 	while (next) {
-		calc_block_sig(next->buf, token);
+		calc_block_sig(next->buf, token, csum);
 		next = next->next;
 	}
 }
 
-static void set_signature(struct mlx5_cmd_work_ent *ent)
+static void set_signature(struct mlx5_cmd_work_ent *ent, int csum)
 {
 	ent->lay->sig = ~xor8_buf(ent->lay, sizeof(*ent->lay));
-	calc_chain_sig(ent->in, ent->token);
-	calc_chain_sig(ent->out, ent->token);
+	calc_chain_sig(ent->in, ent->token, csum);
+	calc_chain_sig(ent->out, ent->token, csum);
 }
 
 static void poll_timeout(struct mlx5_cmd_work_ent *ent)
@@ -539,8 +543,7 @@ static void cmd_work_handler(struct work_struct *work)
 	lay->type = MLX5_PCI_CMD_XPORT;
 	lay->token = ent->token;
 	lay->status_own = CMD_OWNER_HW;
-	if (!cmd->checksum_disabled)
-		set_signature(ent);
+	set_signature(ent, !cmd->checksum_disabled);
 	dump_command(dev, ent, 1);
 	ktime_get_ts(&ent->ts1);
 
@@ -773,8 +776,6 @@ static int mlx5_copy_from_msg(void *to, struct mlx5_cmd_msg *from, int size)
 
 		copy = min_t(int, size, MLX5_CMD_DATA_BLOCK_SIZE);
 		block = next->buf;
-		if (xor8_buf(block, sizeof(*block)) != 0xff)
-			return -EINVAL;
 
 		memcpy(to, block->data, copy);
 		to += copy;
@@ -1361,6 +1362,7 @@ int mlx5_cmd_init(struct mlx5_core_dev *dev)
 		goto err_map;
 	}
 
+	cmd->checksum_disabled = 1;
 	cmd->max_reg_cmds = (1 << cmd->log_sz) - 1;
 	cmd->bitmask = (1 << cmd->max_reg_cmds) - 1;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index b47739b0b5f6..bc0f5fb66e24 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -165,9 +165,7 @@ static int handle_hca_cap(struct mlx5_core_dev *dev)
 	struct mlx5_cmd_set_hca_cap_mbox_in *set_ctx = NULL;
 	struct mlx5_cmd_query_hca_cap_mbox_in query_ctx;
 	struct mlx5_cmd_set_hca_cap_mbox_out set_out;
-	struct mlx5_profile *prof = dev->profile;
 	u64 flags;
-	int csum = 1;
 	int err;
 
 	memset(&query_ctx, 0, sizeof(query_ctx));
@@ -197,20 +195,14 @@ static int handle_hca_cap(struct mlx5_core_dev *dev)
 	memcpy(&set_ctx->hca_cap, &query_out->hca_cap,
 	       sizeof(set_ctx->hca_cap));
 
-	if (prof->mask & MLX5_PROF_MASK_CMDIF_CSUM) {
-		csum = !!prof->cmdif_csum;
-		flags = be64_to_cpu(set_ctx->hca_cap.flags);
-		if (csum)
-			flags |= MLX5_DEV_CAP_FLAG_CMDIF_CSUM;
-		else
-			flags &= ~MLX5_DEV_CAP_FLAG_CMDIF_CSUM;
-
-		set_ctx->hca_cap.flags = cpu_to_be64(flags);
-	}
-
 	if (dev->profile->mask & MLX5_PROF_MASK_QP_SIZE)
 		set_ctx->hca_cap.log_max_qp = dev->profile->log_max_qp;
 
+	flags = be64_to_cpu(query_out->hca_cap.flags);
+	/* disable checksum */
+	flags &= ~MLX5_DEV_CAP_FLAG_CMDIF_CSUM;
+
+	set_ctx->hca_cap.flags = cpu_to_be64(flags);
 	memset(&set_out, 0, sizeof(set_out));
 	set_ctx->hca_cap.log_uar_page_sz = cpu_to_be16(PAGE_SHIFT - 12);
 	set_ctx->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_SET_HCA_CAP);
@@ -225,9 +217,6 @@ static int handle_hca_cap(struct mlx5_core_dev *dev)
 	if (err)
 		goto query_ex;
 
-	if (!csum)
-		dev->cmd.checksum_disabled = 1;
-
 query_ex:
 	kfree(query_out);
 	kfree(set_ctx);
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 68029b30c3dc..770e3b448b3b 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -181,7 +181,7 @@ enum {
 	MLX5_DEV_CAP_FLAG_TLP_HINTS	= 1LL << 39,
 	MLX5_DEV_CAP_FLAG_SIG_HAND_OVER	= 1LL << 40,
 	MLX5_DEV_CAP_FLAG_DCT		= 1LL << 41,
-	MLX5_DEV_CAP_FLAG_CMDIF_CSUM	= 1LL << 46,
+	MLX5_DEV_CAP_FLAG_CMDIF_CSUM	= 3LL << 46,
 };
 
 enum {
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 8888381fc150..2cfc4309d45f 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -747,8 +747,7 @@ static inline u32 mlx5_idx_to_mkey(u32 mkey_idx)
 
 enum {
 	MLX5_PROF_MASK_QP_SIZE		= (u64)1 << 0,
-	MLX5_PROF_MASK_CMDIF_CSUM	= (u64)1 << 1,
-	MLX5_PROF_MASK_MR_CACHE		= (u64)1 << 2,
+	MLX5_PROF_MASK_MR_CACHE		= (u64)1 << 1,
 };
 
 enum {
@@ -758,7 +757,6 @@ enum {
 struct mlx5_profile {
 	u64	mask;
 	u32	log_max_qp;
-	int	cmdif_csum;
 	struct {
 		int	size;
 		int	limit;
-- 
cgit v1.2.3


From 2f6daec14d02deb84e7896a93196d78fbe9956a2 Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@mellanox.com>
Date: Wed, 11 Sep 2013 16:35:29 +0300
Subject: mlx5: Fix layout of struct mlx5_init_seg

The layout of struct health_buffer was not according to firmware
specification.  Fix it to comply.

Signed-off-by: Eli Cohen <eli@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
---
 include/linux/mlx5/device.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 770e3b448b3b..5eb4e31af22b 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -417,7 +417,7 @@ struct mlx5_init_seg {
 	struct health_buffer	health;
 	__be32			rsvd2[884];
 	__be32			health_counter;
-	__be32			rsvd3[1023];
+	__be32			rsvd3[1019];
 	__be64			ieee1588_clk;
 	__be32			ieee1588_clk_type;
 	__be32			clr_intx;
-- 
cgit v1.2.3


From ada9f5d007971a71d619e2abf66ebd3a9a399413 Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagig@mellanox.com>
Date: Wed, 11 Sep 2013 16:35:34 +0300
Subject: IB/mlx5: Fix eq names to display nicely in /proc/interrupts

It's helpful for a driver to put the pci slot name in its interrupt
names, so /proc/interrupts will show the pci slot of the device.

Signed-off-by: Sagi Grimberg <sagig@mellanox.com>
Signed-off-by: Eli Cohen <eli@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
---
 drivers/infiniband/hw/mlx5/main.c            | 6 +++---
 drivers/net/ethernet/mellanox/mlx5/core/eq.c | 4 +++-
 include/linux/mlx5/driver.h                  | 2 +-
 3 files changed, 7 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index b267c65261c0..b1a6cb3a2809 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -164,6 +164,7 @@ int mlx5_vector2eqn(struct mlx5_ib_dev *dev, int vector, int *eqn, int *irqn)
 static int alloc_comp_eqs(struct mlx5_ib_dev *dev)
 {
 	struct mlx5_eq_table *table = &dev->mdev.priv.eq_table;
+	char name[MLX5_MAX_EQ_NAME];
 	struct mlx5_eq *eq, *n;
 	int ncomp_vec;
 	int nent;
@@ -180,11 +181,10 @@ static int alloc_comp_eqs(struct mlx5_ib_dev *dev)
 			goto clean;
 		}
 
-		snprintf(eq->name, MLX5_MAX_EQ_NAME, "mlx5_comp%d", i);
+		snprintf(name, MLX5_MAX_EQ_NAME, "mlx5_comp%d", i);
 		err = mlx5_create_map_eq(&dev->mdev, eq,
 					 i + MLX5_EQ_VEC_COMP_BASE, nent, 0,
-					 eq->name,
-					 &dev->mdev.priv.uuari.uars[0]);
+					 name, &dev->mdev.priv.uuari.uars[0]);
 		if (err) {
 			kfree(eq);
 			goto clean;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index 443cc4d7b024..2231d93cc7ad 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -366,9 +366,11 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
 		goto err_in;
 	}
 
+	snprintf(eq->name, MLX5_MAX_EQ_NAME, "%s@pci:%s",
+		 name, pci_name(dev->pdev));
 	eq->eqn = out.eq_number;
 	err = request_irq(table->msix_arr[vecidx].vector, mlx5_msix_handler, 0,
-			  name, eq);
+			  eq->name, eq);
 	if (err)
 		goto err_eq;
 
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 2cfc4309d45f..6b8c496572c8 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -82,7 +82,7 @@ enum {
 };
 
 enum {
-	MLX5_MAX_EQ_NAME	= 20
+	MLX5_MAX_EQ_NAME	= 32
 };
 
 enum {
-- 
cgit v1.2.3


From 5578b266e9ae05391d53b446acf23818256ff13f Mon Sep 17 00:00:00 2001
From: Valentine Barshak <valentine.barshak@cogentembedded.com>
Date: Thu, 10 Oct 2013 20:35:17 +0400
Subject: usb: phy: Add RCAR Gen2 USB phy

This adds RCAR Gen2 USB phy support. The driver configures
USB channels 0/2 which are shared between PCI USB hosts and
USBHS/USBSS devices. It also controls internal USBHS phy.

Signed-off-by: Valentine Barshak <valentine.barshak@cogentembedded.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/phy/Kconfig                         |  13 ++
 drivers/usb/phy/Makefile                        |   1 +
 drivers/usb/phy/phy-rcar-gen2-usb.c             | 248 ++++++++++++++++++++++++
 include/linux/platform_data/usb-rcar-gen2-phy.h |  22 +++
 4 files changed, 284 insertions(+)
 create mode 100644 drivers/usb/phy/phy-rcar-gen2-usb.c
 create mode 100644 include/linux/platform_data/usb-rcar-gen2-phy.h

(limited to 'include/linux')

diff --git a/drivers/usb/phy/Kconfig b/drivers/usb/phy/Kconfig
index d5589f9c60a9..c0c8cd37648e 100644
--- a/drivers/usb/phy/Kconfig
+++ b/drivers/usb/phy/Kconfig
@@ -214,6 +214,19 @@ config USB_RCAR_PHY
 	  To compile this driver as a module, choose M here: the
 	  module will be called phy-rcar-usb.
 
+config USB_RCAR_GEN2_PHY
+	tristate "Renesas R-Car Gen2 USB PHY support"
+	depends on ARCH_R8A7790 || ARCH_R8A7791 || COMPILE_TEST
+	select USB_PHY
+	help
+	  Say Y here to add support for the Renesas R-Car Gen2 USB PHY driver.
+	  It is typically used to control internal USB PHY for USBHS,
+	  and to configure shared USB channels 0 and 2.
+	  This driver supports R8A7790 and R8A7791.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called phy-rcar-gen2-usb.
+
 config USB_ULPI
 	bool "Generic ULPI Transceiver Driver"
 	depends on ARM
diff --git a/drivers/usb/phy/Makefile b/drivers/usb/phy/Makefile
index 2135e85f46ed..8c5b14764b72 100644
--- a/drivers/usb/phy/Makefile
+++ b/drivers/usb/phy/Makefile
@@ -29,5 +29,6 @@ obj-$(CONFIG_USB_MSM_OTG)		+= phy-msm-usb.o
 obj-$(CONFIG_USB_MV_OTG)		+= phy-mv-usb.o
 obj-$(CONFIG_USB_MXS_PHY)		+= phy-mxs-usb.o
 obj-$(CONFIG_USB_RCAR_PHY)		+= phy-rcar-usb.o
+obj-$(CONFIG_USB_RCAR_GEN2_PHY)		+= phy-rcar-gen2-usb.o
 obj-$(CONFIG_USB_ULPI)			+= phy-ulpi.o
 obj-$(CONFIG_USB_ULPI_VIEWPORT)		+= phy-ulpi-viewport.o
diff --git a/drivers/usb/phy/phy-rcar-gen2-usb.c b/drivers/usb/phy/phy-rcar-gen2-usb.c
new file mode 100644
index 000000000000..a99a6953f11c
--- /dev/null
+++ b/drivers/usb/phy/phy-rcar-gen2-usb.c
@@ -0,0 +1,248 @@
+/*
+ * Renesas R-Car Gen2 USB phy driver
+ *
+ * Copyright (C) 2013 Renesas Solutions Corp.
+ * Copyright (C) 2013 Cogent Embedded, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/platform_data/usb-rcar-gen2-phy.h>
+#include <linux/platform_device.h>
+#include <linux/spinlock.h>
+#include <linux/usb/otg.h>
+
+struct rcar_gen2_usb_phy_priv {
+	struct usb_phy phy;
+	void __iomem *base;
+	struct clk *clk;
+	spinlock_t lock;
+	int usecount;
+	u32 ugctrl2;
+};
+
+#define usb_phy_to_priv(p) container_of(p, struct rcar_gen2_usb_phy_priv, phy)
+
+/* Low Power Status register */
+#define USBHS_LPSTS_REG			0x02
+#define USBHS_LPSTS_SUSPM		(1 << 14)
+
+/* USB General control register */
+#define USBHS_UGCTRL_REG		0x80
+#define USBHS_UGCTRL_CONNECT		(1 << 2)
+#define USBHS_UGCTRL_PLLRESET		(1 << 0)
+
+/* USB General control register 2 */
+#define USBHS_UGCTRL2_REG		0x84
+#define USBHS_UGCTRL2_USB0_PCI		(1 << 4)
+#define USBHS_UGCTRL2_USB0_HS		(3 << 4)
+#define USBHS_UGCTRL2_USB2_PCI		(0 << 31)
+#define USBHS_UGCTRL2_USB2_SS		(1 << 31)
+
+/* USB General status register */
+#define USBHS_UGSTS_REG			0x88
+#define USBHS_UGSTS_LOCK		(3 << 8)
+
+/* Enable USBHS internal phy */
+static int __rcar_gen2_usbhs_phy_enable(void __iomem *base)
+{
+	u32 val;
+	int i;
+
+	/* USBHS PHY power on */
+	val = ioread32(base + USBHS_UGCTRL_REG);
+	val &= ~USBHS_UGCTRL_PLLRESET;
+	iowrite32(val, base + USBHS_UGCTRL_REG);
+
+	val = ioread16(base + USBHS_LPSTS_REG);
+	val |= USBHS_LPSTS_SUSPM;
+	iowrite16(val, base + USBHS_LPSTS_REG);
+
+	for (i = 0; i < 20; i++) {
+		val = ioread32(base + USBHS_UGSTS_REG);
+		if ((val & USBHS_UGSTS_LOCK) == USBHS_UGSTS_LOCK) {
+			val = ioread32(base + USBHS_UGCTRL_REG);
+			val |= USBHS_UGCTRL_CONNECT;
+			iowrite32(val, base + USBHS_UGCTRL_REG);
+			return 0;
+		}
+		udelay(1);
+	}
+
+	/* Timed out waiting for the PLL lock */
+	return -ETIMEDOUT;
+}
+
+/* Disable USBHS internal phy */
+static int __rcar_gen2_usbhs_phy_disable(void __iomem *base)
+{
+	u32 val;
+
+	/* USBHS PHY power off */
+	val = ioread32(base + USBHS_UGCTRL_REG);
+	val &= ~USBHS_UGCTRL_CONNECT;
+	iowrite32(val, base + USBHS_UGCTRL_REG);
+
+	val = ioread16(base + USBHS_LPSTS_REG);
+	val &= ~USBHS_LPSTS_SUSPM;
+	iowrite16(val, base + USBHS_LPSTS_REG);
+
+	val = ioread32(base + USBHS_UGCTRL_REG);
+	val |= USBHS_UGCTRL_PLLRESET;
+	iowrite32(val, base + USBHS_UGCTRL_REG);
+	return 0;
+}
+
+/* Setup USB channels */
+static void __rcar_gen2_usb_phy_init(struct rcar_gen2_usb_phy_priv *priv)
+{
+	u32 val;
+
+	clk_prepare_enable(priv->clk);
+
+	/* Set USB channels in the USBHS UGCTRL2 register */
+	val = ioread32(priv->base);
+	val &= ~(USBHS_UGCTRL2_USB0_HS | USBHS_UGCTRL2_USB2_SS);
+	val |= priv->ugctrl2;
+	iowrite32(val, priv->base);
+}
+
+/* Shutdown USB channels */
+static void __rcar_gen2_usb_phy_shutdown(struct rcar_gen2_usb_phy_priv *priv)
+{
+	__rcar_gen2_usbhs_phy_disable(priv->base);
+	clk_disable_unprepare(priv->clk);
+}
+
+static int rcar_gen2_usb_phy_set_suspend(struct usb_phy *phy, int suspend)
+{
+	struct rcar_gen2_usb_phy_priv *priv = usb_phy_to_priv(phy);
+	unsigned long flags;
+	int retval;
+
+	spin_lock_irqsave(&priv->lock, flags);
+	retval = suspend ? __rcar_gen2_usbhs_phy_disable(priv->base) :
+			   __rcar_gen2_usbhs_phy_enable(priv->base);
+	spin_unlock_irqrestore(&priv->lock, flags);
+	return retval;
+}
+
+static int rcar_gen2_usb_phy_init(struct usb_phy *phy)
+{
+	struct rcar_gen2_usb_phy_priv *priv = usb_phy_to_priv(phy);
+	unsigned long flags;
+
+	spin_lock_irqsave(&priv->lock, flags);
+	/*
+	 * Enable the clock and setup USB channels
+	 * if it's the first user
+	 */
+	if (!priv->usecount++)
+		__rcar_gen2_usb_phy_init(priv);
+	spin_unlock_irqrestore(&priv->lock, flags);
+	return 0;
+}
+
+static void rcar_gen2_usb_phy_shutdown(struct usb_phy *phy)
+{
+	struct rcar_gen2_usb_phy_priv *priv = usb_phy_to_priv(phy);
+	unsigned long flags;
+
+	spin_lock_irqsave(&priv->lock, flags);
+	if (!priv->usecount) {
+		dev_warn(phy->dev, "Trying to disable phy with 0 usecount\n");
+		goto out;
+	}
+
+	/* Disable everything if it's the last user */
+	if (!--priv->usecount)
+		__rcar_gen2_usb_phy_shutdown(priv);
+out:
+	spin_unlock_irqrestore(&priv->lock, flags);
+}
+
+static int rcar_gen2_usb_phy_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct rcar_gen2_phy_platform_data *pdata;
+	struct rcar_gen2_usb_phy_priv *priv;
+	struct resource *res;
+	void __iomem *base;
+	struct clk *clk;
+	int retval;
+
+	pdata = dev_get_platdata(&pdev->dev);
+	if (!pdata) {
+		dev_err(dev, "No platform data\n");
+		return -EINVAL;
+	}
+
+	clk = devm_clk_get(&pdev->dev, "usbhs");
+	if (IS_ERR(clk)) {
+		dev_err(&pdev->dev, "Can't get the clock\n");
+		return PTR_ERR(clk);
+	}
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+
+	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv) {
+		dev_err(dev, "Memory allocation failed\n");
+		return -ENOMEM;
+	}
+
+	spin_lock_init(&priv->lock);
+	priv->clk = clk;
+	priv->base = base;
+	priv->ugctrl2 = pdata->chan0_pci ?
+			USBHS_UGCTRL2_USB0_PCI : USBHS_UGCTRL2_USB0_HS;
+	priv->ugctrl2 |= pdata->chan2_pci ?
+			USBHS_UGCTRL2_USB2_PCI : USBHS_UGCTRL2_USB2_SS;
+	priv->phy.dev = dev;
+	priv->phy.label = dev_name(dev);
+	priv->phy.init = rcar_gen2_usb_phy_init;
+	priv->phy.shutdown = rcar_gen2_usb_phy_shutdown;
+	priv->phy.set_suspend = rcar_gen2_usb_phy_set_suspend;
+
+	retval = usb_add_phy(&priv->phy, USB_PHY_TYPE_USB2);
+	if (retval < 0) {
+		dev_err(dev, "Failed to add USB phy\n");
+		return retval;
+	}
+
+	platform_set_drvdata(pdev, priv);
+
+	return retval;
+}
+
+static int rcar_gen2_usb_phy_remove(struct platform_device *pdev)
+{
+	struct rcar_gen2_usb_phy_priv *priv = platform_get_drvdata(pdev);
+
+	usb_remove_phy(&priv->phy);
+
+	return 0;
+}
+
+static struct platform_driver rcar_gen2_usb_phy_driver = {
+	.driver = {
+		.name = "usb_phy_rcar_gen2",
+	},
+	.probe = rcar_gen2_usb_phy_probe,
+	.remove = rcar_gen2_usb_phy_remove,
+};
+
+module_platform_driver(rcar_gen2_usb_phy_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Renesas R-Car Gen2 USB phy");
+MODULE_AUTHOR("Valentine Barshak <valentine.barshak@cogentembedded.com>");
diff --git a/include/linux/platform_data/usb-rcar-gen2-phy.h b/include/linux/platform_data/usb-rcar-gen2-phy.h
new file mode 100644
index 000000000000..dd3ba46c0d90
--- /dev/null
+++ b/include/linux/platform_data/usb-rcar-gen2-phy.h
@@ -0,0 +1,22 @@
+/*
+ * Copyright (C) 2013 Renesas Solutions Corp.
+ * Copyright (C) 2013 Cogent Embedded, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __USB_RCAR_GEN2_PHY_H
+#define __USB_RCAR_GEN2_PHY_H
+
+#include <linux/types.h>
+
+struct rcar_gen2_phy_platform_data {
+	/* USB channel 0 configuration */
+	bool chan0_pci:1;	/* true: PCI USB host 0, false: USBHS */
+	/* USB channel 2 configuration */
+	bool chan2_pci:1;	/* true: PCI USB host 2, false: USBSS */
+};
+
+#endif
-- 
cgit v1.2.3


From 61875f30daf60305712e25b209ef41ced2635bad Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Sat, 21 Sep 2013 13:58:22 -0400
Subject: random: allow architectures to optionally define random_get_entropy()

Allow architectures which have a disabled get_cycles() function to
provide a random_get_entropy() function which provides a fine-grained,
rapidly changing counter that can be used by the /dev/random driver.

For example, an architecture might have a rapidly changing register
used to control random TLB cache eviction, or DRAM refresh that
doesn't meet the requirements of get_cycles(), but which is good
enough for the needs of the random driver.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Cc: stable@vger.kernel.org
---
 drivers/char/random.c |  8 ++++----
 include/linux/timex.h | 14 ++++++++++++++
 2 files changed, 18 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 92e6c67e1ae6..2d5daf9b58e9 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -643,7 +643,7 @@ struct timer_rand_state {
  */
 void add_device_randomness(const void *buf, unsigned int size)
 {
-	unsigned long time = get_cycles() ^ jiffies;
+	unsigned long time = random_get_entropy() ^ jiffies;
 
 	mix_pool_bytes(&input_pool, buf, size, NULL);
 	mix_pool_bytes(&input_pool, &time, sizeof(time), NULL);
@@ -680,7 +680,7 @@ static void add_timer_randomness(struct timer_rand_state *state, unsigned num)
 		goto out;
 
 	sample.jiffies = jiffies;
-	sample.cycles = get_cycles();
+	sample.cycles = random_get_entropy();
 	sample.num = num;
 	mix_pool_bytes(&input_pool, &sample, sizeof(sample), NULL);
 
@@ -747,7 +747,7 @@ void add_interrupt_randomness(int irq, int irq_flags)
 	struct fast_pool	*fast_pool = &__get_cpu_var(irq_randomness);
 	struct pt_regs		*regs = get_irq_regs();
 	unsigned long		now = jiffies;
-	__u32			input[4], cycles = get_cycles();
+	__u32			input[4], cycles = random_get_entropy();
 
 	input[0] = cycles ^ jiffies;
 	input[1] = irq;
@@ -1485,7 +1485,7 @@ unsigned int get_random_int(void)
 
 	hash = get_cpu_var(get_random_int_hash);
 
-	hash[0] += current->pid + jiffies + get_cycles();
+	hash[0] += current->pid + jiffies + random_get_entropy();
 	md5_transform(hash, random_int_secret);
 	ret = hash[0];
 	put_cpu_var(get_random_int_hash);
diff --git a/include/linux/timex.h b/include/linux/timex.h
index b3726e61368e..da4c32dbb2aa 100644
--- a/include/linux/timex.h
+++ b/include/linux/timex.h
@@ -64,6 +64,20 @@
 
 #include <asm/timex.h>
 
+#ifndef random_get_entropy
+/*
+ * The random_get_entropy() function is used by the /dev/random driver
+ * in order to extract entropy via the relative unpredictability of
+ * when an interrupt takes places versus a high speed, fine-grained
+ * timing source or cycle counter.  Since it will be occurred on every
+ * single interrupt, it must have a very low cost/overhead.
+ *
+ * By default we use get_cycles() for this purpose, but individual
+ * architectures may override this in their asm/timex.h header file.
+ */
+#define random_get_entropy()	get_cycles()
+#endif
+
 /*
  * SHIFT_PLL is used as a dampening factor to define how much we
  * adjust the frequency correction for a given offset in PLL mode.
-- 
cgit v1.2.3


From dc7743aa3c49fabbc6dc9edbcf7df74d776ac32e Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Wed, 2 Oct 2013 21:39:40 -0700
Subject: pinctrl: single: Add support for auxdata

For omaps, we still have dependencies to the legacy code
for handling the PRM (Power Reset Management) interrupts,
and also for reconfiguring the io wake-up chain after
changes.

Let's pass the PRM interrupt and the rearm functions via
auxdata. Then when at some point we have a proper PRM
driver, we can get the interrupt via device tree and
set up the rearm function as exported function in the
PRM driver.

By using auxdata we can remove a dependency to the
wake-up events for converting omap3 to be device
tree only.

Cc: Peter Ujfalusi <peter.ujfalusi@ti.com>
Cc: Grygorii Strashko <grygorii.strashko@ti.com>
Cc: Prakash Manjunathappa <prakash.pm@ti.com>
Cc: Roger Quadros <rogerq@ti.com>
Cc: Haojian Zhuang <haojian.zhuang@gmail.com>
Cc: linux-kernel@vger.kernel.org
Reviewed-by: Kevin Hilman <khilman@linaro.org>
Tested-by: Kevin Hilman <khilman@linaro.org>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 drivers/pinctrl/pinctrl-single.c             | 23 +++++++++++++++++++++++
 include/linux/platform_data/pinctrl-single.h | 12 ++++++++++++
 2 files changed, 35 insertions(+)
 create mode 100644 include/linux/platform_data/pinctrl-single.h

(limited to 'include/linux')

diff --git a/drivers/pinctrl/pinctrl-single.c b/drivers/pinctrl/pinctrl-single.c
index dad65df36f4b..c2aada71c915 100644
--- a/drivers/pinctrl/pinctrl-single.c
+++ b/drivers/pinctrl/pinctrl-single.c
@@ -28,6 +28,8 @@
 #include <linux/pinctrl/pinmux.h>
 #include <linux/pinctrl/pinconf-generic.h>
 
+#include <linux/platform_data/pinctrl-single.h>
+
 #include "core.h"
 #include "pinconf.h"
 
@@ -159,12 +161,14 @@ struct pcs_name {
  * @irq:	optional interrupt for the controller
  * @irq_enable_mask:	optional SoC specific interrupt enable mask
  * @irq_status_mask:	optional SoC specific interrupt status mask
+ * @rearm:	optional SoC specific wake-up rearm function
  */
 struct pcs_soc_data {
 	unsigned flags;
 	int irq;
 	unsigned irq_enable_mask;
 	unsigned irq_status_mask;
+	void (*rearm)(void);
 };
 
 /**
@@ -1622,6 +1626,8 @@ static void pcs_irq_unmask(struct irq_data *d)
 	struct pcs_soc_data *pcs_soc = irq_data_get_irq_chip_data(d);
 
 	pcs_irq_set(pcs_soc, d->irq, true);
+	if (pcs_soc->rearm)
+		pcs_soc->rearm();
 }
 
 /**
@@ -1672,6 +1678,11 @@ static int pcs_irq_handle(struct pcs_soc_data *pcs_soc)
 		}
 	}
 
+	/*
+	 * For debugging on omaps, you may want to call pcs_soc->rearm()
+	 * here to see wake-up interrupts during runtime also.
+	 */
+
 	return count;
 }
 
@@ -1835,6 +1846,7 @@ static int pcs_probe(struct platform_device *pdev)
 {
 	struct device_node *np = pdev->dev.of_node;
 	const struct of_device_id *match;
+	struct pcs_pdata *pdata;
 	struct resource *res;
 	struct pcs_device *pcs;
 	const struct pcs_soc_data *soc;
@@ -1949,6 +1961,17 @@ static int pcs_probe(struct platform_device *pdev)
 	if (pcs->socdata.irq)
 		pcs->flags |= PCS_FEAT_IRQ;
 
+	/* We still need auxdata for some omaps for PRM interrupts */
+	pdata = dev_get_platdata(&pdev->dev);
+	if (pdata) {
+		if (pdata->rearm)
+			pcs->socdata.rearm = pdata->rearm;
+		if (pdata->irq) {
+			pcs->socdata.irq = pdata->irq;
+			pcs->flags |= PCS_FEAT_IRQ;
+		}
+	}
+
 	if (PCS_HAS_IRQ) {
 		ret = pcs_irq_init_chained_handler(pcs, np);
 		if (ret < 0)
diff --git a/include/linux/platform_data/pinctrl-single.h b/include/linux/platform_data/pinctrl-single.h
new file mode 100644
index 000000000000..72eacda9b360
--- /dev/null
+++ b/include/linux/platform_data/pinctrl-single.h
@@ -0,0 +1,12 @@
+/**
+ * irq:		optional wake-up interrupt
+ * rearm:	optional soc specific rearm function
+ *
+ * Note that the irq and rearm setup should come from device
+ * tree except for omap where there are still some dependencies
+ * to the legacy PRM code.
+ */
+struct pcs_pdata {
+	int irq;
+	void (*rearm)(void);
+};
-- 
cgit v1.2.3


From ebff5fa9d545574324095d9c6a3cb80c9157abc5 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Fri, 11 Oct 2013 15:12:04 +1000
Subject: Revert "i915: Update VGA arbiter support for newer devices"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This reverts commit 81b5c7bc8de3e6f63419139c2fc91bf81dea8a7d.

Adding drm/i915 into the vga arbiter chain means that X (in a piece of
well-meant paranoia) will do a get/put on the vga decoding around
_every_ accel call down into the ddx. Which results in some nice
performance disasters [1]. This really breaks userspace, by disabling
DRI for everyone, and stops OpenGL from working, this isn't limited
to just the i915 but both the integrated and discrete GPUs on
multi-gpu systems, in other words this causes untold worlds of pain,

Ville tried to come up with a Great Hack to fiddle the required VGA
I/O ops behind everyone's back using stop_machine, but that didn't
really work out [2]. Given that we're fairly late in the -rc stage for
such games let's just revert this all.

One thing we might want to keep is to delay the disabling of the vga
decoding until the fbdev emulation and the fbcon screen is set up. If
we kill vga mem decoding beforehand fbcon can end up with a white
square in the top-left corner it tried to save from the vga memory for
a seamless transition. And we have bug reports on older platforms
which seem to match these symptoms.

But again that's something to play around with in -next.

References: [1] http://lists.x.org/archives/xorg-devel/2013-September/037763.html
References: [2] http://www.spinics.net/lists/intel-gfx/msg34062.html
Cc: Alex Williamson <alex.williamson@redhat.com>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/i915/i915_dma.c      |  9 +++------
 drivers/gpu/drm/i915/intel_display.c | 25 -------------------------
 include/linux/vgaarb.h               |  7 -------
 3 files changed, 3 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 52f5ad8037cc..d5c784d48671 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1290,12 +1290,9 @@ static int i915_load_modeset_init(struct drm_device *dev)
 	 * then we do not take part in VGA arbitration and the
 	 * vga_client_register() fails with -ENODEV.
 	 */
-	if (!HAS_PCH_SPLIT(dev)) {
-		ret = vga_client_register(dev->pdev, dev, NULL,
-					  i915_vga_set_decode);
-		if (ret && ret != -ENODEV)
-			goto out;
-	}
+	ret = vga_client_register(dev->pdev, dev, NULL, i915_vga_set_decode);
+	if (ret && ret != -ENODEV)
+		goto out;
 
 	intel_register_dsm_handler();
 
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index aaea3ec811ed..581fb4b2f766 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -10038,15 +10038,6 @@ static void i915_disable_vga(struct drm_device *dev)
 	outb(SR01, VGA_SR_INDEX);
 	sr1 = inb(VGA_SR_DATA);
 	outb(sr1 | 1<<5, VGA_SR_DATA);
-
-	/* Disable VGA memory on Intel HD */
-	if (HAS_PCH_SPLIT(dev)) {
-		outb(inb(VGA_MSR_READ) & ~VGA_MSR_MEM_EN, VGA_MSR_WRITE);
-		vga_set_legacy_decoding(dev->pdev, VGA_RSRC_LEGACY_IO |
-						   VGA_RSRC_NORMAL_IO |
-						   VGA_RSRC_NORMAL_MEM);
-	}
-
 	vga_put(dev->pdev, VGA_RSRC_LEGACY_IO);
 	udelay(300);
 
@@ -10054,20 +10045,6 @@ static void i915_disable_vga(struct drm_device *dev)
 	POSTING_READ(vga_reg);
 }
 
-static void i915_enable_vga(struct drm_device *dev)
-{
-	/* Enable VGA memory on Intel HD */
-	if (HAS_PCH_SPLIT(dev)) {
-		vga_get_uninterruptible(dev->pdev, VGA_RSRC_LEGACY_IO);
-		outb(inb(VGA_MSR_READ) | VGA_MSR_MEM_EN, VGA_MSR_WRITE);
-		vga_set_legacy_decoding(dev->pdev, VGA_RSRC_LEGACY_IO |
-						   VGA_RSRC_LEGACY_MEM |
-						   VGA_RSRC_NORMAL_IO |
-						   VGA_RSRC_NORMAL_MEM);
-		vga_put(dev->pdev, VGA_RSRC_LEGACY_IO);
-	}
-}
-
 void intel_modeset_init_hw(struct drm_device *dev)
 {
 	intel_init_power_well(dev);
@@ -10559,8 +10536,6 @@ void intel_modeset_cleanup(struct drm_device *dev)
 
 	intel_disable_fbc(dev);
 
-	i915_enable_vga(dev);
-
 	intel_disable_gt_powersave(dev);
 
 	ironlake_teardown_rc6(dev);
diff --git a/include/linux/vgaarb.h b/include/linux/vgaarb.h
index 80cf8173a65b..2c02f3a8d2ba 100644
--- a/include/linux/vgaarb.h
+++ b/include/linux/vgaarb.h
@@ -65,15 +65,8 @@ struct pci_dev;
  *     out of the arbitration process (and can be safe to take
  *     interrupts at any time.
  */
-#if defined(CONFIG_VGA_ARB)
 extern void vga_set_legacy_decoding(struct pci_dev *pdev,
 				    unsigned int decodes);
-#else
-static inline void vga_set_legacy_decoding(struct pci_dev *pdev,
-					   unsigned int decodes)
-{
-}
-#endif
 
 /**
  *     vga_get         - acquire & locks VGA resources
-- 
cgit v1.2.3


From 3f0116c3238a96bc18ad4b4acefe4e7be32fa861 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Thu, 10 Oct 2013 10:16:30 +0200
Subject: compiler/gcc4: Add quirk for 'asm goto' miscompilation bug

Fengguang Wu, Oleg Nesterov and Peter Zijlstra tracked down
a kernel crash to a GCC bug: GCC miscompiles certain 'asm goto'
constructs, as outlined here:

  http://gcc.gnu.org/bugzilla/show_bug.cgi?id=58670

Implement a workaround suggested by Jakub Jelinek.

Reported-and-tested-by: Fengguang Wu <fengguang.wu@intel.com>
Reported-by: Oleg Nesterov <oleg@redhat.com>
Reported-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Suggested-by: Jakub Jelinek <jakub@redhat.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: <stable@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/arm/include/asm/jump_label.h     |  2 +-
 arch/mips/include/asm/jump_label.h    |  2 +-
 arch/powerpc/include/asm/jump_label.h |  2 +-
 arch/s390/include/asm/jump_label.h    |  2 +-
 arch/sparc/include/asm/jump_label.h   |  2 +-
 arch/x86/include/asm/cpufeature.h     |  6 +++---
 arch/x86/include/asm/jump_label.h     |  2 +-
 arch/x86/include/asm/mutex_64.h       |  4 ++--
 include/linux/compiler-gcc4.h         | 15 +++++++++++++++
 9 files changed, 26 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/include/asm/jump_label.h b/arch/arm/include/asm/jump_label.h
index bfc198c75913..863c892b4aaa 100644
--- a/arch/arm/include/asm/jump_label.h
+++ b/arch/arm/include/asm/jump_label.h
@@ -16,7 +16,7 @@
 
 static __always_inline bool arch_static_branch(struct static_key *key)
 {
-	asm goto("1:\n\t"
+	asm_volatile_goto("1:\n\t"
 		 JUMP_LABEL_NOP "\n\t"
 		 ".pushsection __jump_table,  \"aw\"\n\t"
 		 ".word 1b, %l[l_yes], %c0\n\t"
diff --git a/arch/mips/include/asm/jump_label.h b/arch/mips/include/asm/jump_label.h
index 4d6d77ed9b9d..e194f957ca8c 100644
--- a/arch/mips/include/asm/jump_label.h
+++ b/arch/mips/include/asm/jump_label.h
@@ -22,7 +22,7 @@
 
 static __always_inline bool arch_static_branch(struct static_key *key)
 {
-	asm goto("1:\tnop\n\t"
+	asm_volatile_goto("1:\tnop\n\t"
 		"nop\n\t"
 		".pushsection __jump_table,  \"aw\"\n\t"
 		WORD_INSN " 1b, %l[l_yes], %0\n\t"
diff --git a/arch/powerpc/include/asm/jump_label.h b/arch/powerpc/include/asm/jump_label.h
index ae098c438f00..f016bb699b5f 100644
--- a/arch/powerpc/include/asm/jump_label.h
+++ b/arch/powerpc/include/asm/jump_label.h
@@ -19,7 +19,7 @@
 
 static __always_inline bool arch_static_branch(struct static_key *key)
 {
-	asm goto("1:\n\t"
+	asm_volatile_goto("1:\n\t"
 		 "nop\n\t"
 		 ".pushsection __jump_table,  \"aw\"\n\t"
 		 JUMP_ENTRY_TYPE "1b, %l[l_yes], %c0\n\t"
diff --git a/arch/s390/include/asm/jump_label.h b/arch/s390/include/asm/jump_label.h
index 6c32190dc73e..346b1c85ffb4 100644
--- a/arch/s390/include/asm/jump_label.h
+++ b/arch/s390/include/asm/jump_label.h
@@ -15,7 +15,7 @@
 
 static __always_inline bool arch_static_branch(struct static_key *key)
 {
-	asm goto("0:	brcl 0,0\n"
+	asm_volatile_goto("0:	brcl 0,0\n"
 		".pushsection __jump_table, \"aw\"\n"
 		ASM_ALIGN "\n"
 		ASM_PTR " 0b, %l[label], %0\n"
diff --git a/arch/sparc/include/asm/jump_label.h b/arch/sparc/include/asm/jump_label.h
index 5080d16a832f..ec2e2e2aba7d 100644
--- a/arch/sparc/include/asm/jump_label.h
+++ b/arch/sparc/include/asm/jump_label.h
@@ -9,7 +9,7 @@
 
 static __always_inline bool arch_static_branch(struct static_key *key)
 {
-		asm goto("1:\n\t"
+		asm_volatile_goto("1:\n\t"
 			 "nop\n\t"
 			 "nop\n\t"
 			 ".pushsection __jump_table,  \"aw\"\n\t"
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index d3f5c63078d8..89270b4318db 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -374,7 +374,7 @@ static __always_inline __pure bool __static_cpu_has(u16 bit)
 		 * Catch too early usage of this before alternatives
 		 * have run.
 		 */
-		asm goto("1: jmp %l[t_warn]\n"
+		asm_volatile_goto("1: jmp %l[t_warn]\n"
 			 "2:\n"
 			 ".section .altinstructions,\"a\"\n"
 			 " .long 1b - .\n"
@@ -388,7 +388,7 @@ static __always_inline __pure bool __static_cpu_has(u16 bit)
 
 #endif
 
-		asm goto("1: jmp %l[t_no]\n"
+		asm_volatile_goto("1: jmp %l[t_no]\n"
 			 "2:\n"
 			 ".section .altinstructions,\"a\"\n"
 			 " .long 1b - .\n"
@@ -453,7 +453,7 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
  * have. Thus, we force the jump to the widest, 4-byte, signed relative
  * offset even though the last would often fit in less bytes.
  */
-		asm goto("1: .byte 0xe9\n .long %l[t_dynamic] - 2f\n"
+		asm_volatile_goto("1: .byte 0xe9\n .long %l[t_dynamic] - 2f\n"
 			 "2:\n"
 			 ".section .altinstructions,\"a\"\n"
 			 " .long 1b - .\n"		/* src offset */
diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h
index 64507f35800c..6a2cefb4395a 100644
--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -18,7 +18,7 @@
 
 static __always_inline bool arch_static_branch(struct static_key *key)
 {
-	asm goto("1:"
+	asm_volatile_goto("1:"
 		".byte " __stringify(STATIC_KEY_INIT_NOP) "\n\t"
 		".pushsection __jump_table,  \"aw\" \n\t"
 		_ASM_ALIGN "\n\t"
diff --git a/arch/x86/include/asm/mutex_64.h b/arch/x86/include/asm/mutex_64.h
index e7e6751648ed..07537a44216e 100644
--- a/arch/x86/include/asm/mutex_64.h
+++ b/arch/x86/include/asm/mutex_64.h
@@ -20,7 +20,7 @@
 static inline void __mutex_fastpath_lock(atomic_t *v,
 					 void (*fail_fn)(atomic_t *))
 {
-	asm volatile goto(LOCK_PREFIX "   decl %0\n"
+	asm_volatile_goto(LOCK_PREFIX "   decl %0\n"
 			  "   jns %l[exit]\n"
 			  : : "m" (v->counter)
 			  : "memory", "cc"
@@ -75,7 +75,7 @@ static inline int __mutex_fastpath_lock_retval(atomic_t *count)
 static inline void __mutex_fastpath_unlock(atomic_t *v,
 					   void (*fail_fn)(atomic_t *))
 {
-	asm volatile goto(LOCK_PREFIX "   incl %0\n"
+	asm_volatile_goto(LOCK_PREFIX "   incl %0\n"
 			  "   jg %l[exit]\n"
 			  : : "m" (v->counter)
 			  : "memory", "cc"
diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h
index 842de225055f..ded429966c1f 100644
--- a/include/linux/compiler-gcc4.h
+++ b/include/linux/compiler-gcc4.h
@@ -65,6 +65,21 @@
 #define __visible __attribute__((externally_visible))
 #endif
 
+/*
+ * GCC 'asm goto' miscompiles certain code sequences:
+ *
+ *   http://gcc.gnu.org/bugzilla/show_bug.cgi?id=58670
+ *
+ * Work it around via a compiler barrier quirk suggested by Jakub Jelinek.
+ * Fixed in GCC 4.8.2 and later versions.
+ *
+ * (asm goto is automatically volatile - the naming reflects this.)
+ */
+#if GCC_VERSION <= 40801
+# define asm_volatile_goto(x...)	do { asm goto(x); asm (""); } while (0)
+#else
+# define asm_volatile_goto(x...)	do { asm goto(x); } while (0)
+#endif
 
 #ifdef CONFIG_ARCH_USE_BUILTIN_BSWAP
 #if GCC_VERSION >= 40400
-- 
cgit v1.2.3


From 81fcfb813fe99c30f77dd3ed9a4e541d14a9ed01 Mon Sep 17 00:00:00 2001
From: Alexey Kardashevskiy <aik@ozlabs.ru>
Date: Wed, 28 Aug 2013 18:37:39 +1000
Subject: hashtable: add hash_for_each_possible_rcu_notrace()

This adds hash_for_each_possible_rcu_notrace() which is basically
a notrace clone of hash_for_each_possible_rcu() which cannot be
used in real mode due to its tracing/debugging capability.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 include/linux/hashtable.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/hashtable.h b/include/linux/hashtable.h
index a9df51f5d54c..519b6e2d769e 100644
--- a/include/linux/hashtable.h
+++ b/include/linux/hashtable.h
@@ -173,6 +173,21 @@ static inline void hash_del_rcu(struct hlist_node *node)
 	hlist_for_each_entry_rcu(obj, &name[hash_min(key, HASH_BITS(name))],\
 		member)
 
+/**
+ * hash_for_each_possible_rcu_notrace - iterate over all possible objects hashing
+ * to the same bucket in an rcu enabled hashtable in a rcu enabled hashtable
+ * @name: hashtable to iterate
+ * @obj: the type * to use as a loop cursor for each entry
+ * @member: the name of the hlist_node within the struct
+ * @key: the key of the objects to iterate over
+ *
+ * This is the same as hash_for_each_possible_rcu() except that it does
+ * not do any RCU debugging or tracing.
+ */
+#define hash_for_each_possible_rcu_notrace(name, obj, member, key) \
+	hlist_for_each_entry_rcu_notrace(obj, \
+		&name[hash_min(key, HASH_BITS(name))], member)
+
 /**
  * hash_for_each_possible_safe - iterate over all possible objects hashing to the
  * same bucket safe against removals
-- 
cgit v1.2.3


From 8e0861fa3c4edfc2f30dd4cf4d58d3929f7c1b23 Mon Sep 17 00:00:00 2001
From: Alexey Kardashevskiy <aik@ozlabs.ru>
Date: Wed, 28 Aug 2013 18:37:42 +1000
Subject: powerpc: Prepare to support kernel handling of IOMMU map/unmap

The current VFIO-on-POWER implementation supports only user mode
driven mapping, i.e. QEMU is sending requests to map/unmap pages.
However this approach is really slow, so we want to move that to KVM.
Since H_PUT_TCE can be extremely performance sensitive (especially with
network adapters where each packet needs to be mapped/unmapped) we chose
to implement that as a "fast" hypercall directly in "real
mode" (processor still in the guest context but MMU off).

To be able to do that, we need to provide some facilities to
access the struct page count within that real mode environment as things
like the sparsemem vmemmap mappings aren't accessible.

This adds an API function realmode_pfn_to_page() to get page struct when
MMU is off.

This adds to MM a new function put_page_unless_one() which drops a page
if counter is bigger than 1. It is going to be used when MMU is off
(for example, real mode on PPC64) and we want to make sure that page
release will not happen in real mode as it may crash the kernel in
a horrible way.

CONFIG_SPARSEMEM_VMEMMAP and CONFIG_FLATMEM are supported.

Cc: linux-mm@kvack.org
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/pgtable-ppc64.h |  2 ++
 arch/powerpc/mm/init_64.c                | 51 +++++++++++++++++++++++++++++++-
 include/linux/mm.h                       | 14 +++++++++
 include/linux/page-flags.h               |  4 ++-
 4 files changed, 69 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h
index 46db09414a10..4a191c472867 100644
--- a/arch/powerpc/include/asm/pgtable-ppc64.h
+++ b/arch/powerpc/include/asm/pgtable-ppc64.h
@@ -394,6 +394,8 @@ static inline void mark_hpte_slot_valid(unsigned char *hpte_slot_array,
 	hpte_slot_array[index] = hidx << 4 | 0x1 << 3;
 }
 
+struct page *realmode_pfn_to_page(unsigned long pfn);
+
 static inline char *get_hpte_slot_array(pmd_t *pmdp)
 {
 	/*
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index 8ed035d2edb5..e3734edffa69 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -304,5 +304,54 @@ void register_page_bootmem_memmap(unsigned long section_nr,
 				  struct page *start_page, unsigned long size)
 {
 }
-#endif /* CONFIG_SPARSEMEM_VMEMMAP */
 
+/*
+ * We do not have access to the sparsemem vmemmap, so we fallback to
+ * walking the list of sparsemem blocks which we already maintain for
+ * the sake of crashdump. In the long run, we might want to maintain
+ * a tree if performance of that linear walk becomes a problem.
+ *
+ * realmode_pfn_to_page functions can fail due to:
+ * 1) As real sparsemem blocks do not lay in RAM continously (they
+ * are in virtual address space which is not available in the real mode),
+ * the requested page struct can be split between blocks so get_page/put_page
+ * may fail.
+ * 2) When huge pages are used, the get_page/put_page API will fail
+ * in real mode as the linked addresses in the page struct are virtual
+ * too.
+ */
+struct page *realmode_pfn_to_page(unsigned long pfn)
+{
+	struct vmemmap_backing *vmem_back;
+	struct page *page;
+	unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift;
+	unsigned long pg_va = (unsigned long) pfn_to_page(pfn);
+
+	for (vmem_back = vmemmap_list; vmem_back; vmem_back = vmem_back->list) {
+		if (pg_va < vmem_back->virt_addr)
+			continue;
+
+		/* Check that page struct is not split between real pages */
+		if ((pg_va + sizeof(struct page)) >
+				(vmem_back->virt_addr + page_size))
+			return NULL;
+
+		page = (struct page *) (vmem_back->phys + pg_va -
+				vmem_back->virt_addr);
+		return page;
+	}
+
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(realmode_pfn_to_page);
+
+#elif defined(CONFIG_FLATMEM)
+
+struct page *realmode_pfn_to_page(unsigned long pfn)
+{
+	struct page *page = pfn_to_page(pfn);
+	return page;
+}
+EXPORT_SYMBOL_GPL(realmode_pfn_to_page);
+
+#endif /* CONFIG_SPARSEMEM_VMEMMAP/CONFIG_FLATMEM */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 8b6e55ee8855..1a0668e5a4ee 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -297,12 +297,26 @@ static inline int put_page_testzero(struct page *page)
 /*
  * Try to grab a ref unless the page has a refcount of zero, return false if
  * that is the case.
+ * This can be called when MMU is off so it must not access
+ * any of the virtual mappings.
  */
 static inline int get_page_unless_zero(struct page *page)
 {
 	return atomic_inc_not_zero(&page->_count);
 }
 
+/*
+ * Try to drop a ref unless the page has a refcount of one, return false if
+ * that is the case.
+ * This is to make sure that the refcount won't become zero after this drop.
+ * This can be called when MMU is off so it must not access
+ * any of the virtual mappings.
+ */
+static inline int put_page_unless_one(struct page *page)
+{
+	return atomic_add_unless(&page->_count, -1, 1);
+}
+
 extern int page_is_ram(unsigned long pfn);
 
 /* Support for virtually mapped pages */
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 6d53675c2b54..98ada58f9942 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -329,7 +329,9 @@ static inline void set_page_writeback(struct page *page)
  * System with lots of page flags available. This allows separate
  * flags for PageHead() and PageTail() checks of compound pages so that bit
  * tests can be used in performance sensitive paths. PageCompound is
- * generally not used in hot code paths.
+ * generally not used in hot code paths except arch/powerpc/mm/init_64.c
+ * and arch/powerpc/kvm/book3s_64_vio_hv.c which use it to detect huge pages
+ * and avoid handling those in real mode.
  */
 __PAGEFLAG(Head, head) CLEARPAGEFLAG(Head, head)
 __PAGEFLAG(Tail, tail)
-- 
cgit v1.2.3


From e277e656804c85a0729d4fd8cdd3c8ab3e6b3b86 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Fri, 11 Oct 2013 09:30:24 +0800
Subject: regulator: Remove max_uV from struct regulator_linear_range

linear ranges means each range has linear voltage settings.
So we can calculate max_uV for each linear range in regulator core rather than
set the max_uV field in drivers.

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 drivers/regulator/88pm800.c            | 14 ++++++--------
 drivers/regulator/as3711-regulator.c   | 25 +++++++++++--------------
 drivers/regulator/as3722-regulator.c   |  1 -
 drivers/regulator/da903x.c             |  6 ++----
 drivers/regulator/helpers.c            |  6 +++++-
 drivers/regulator/tps65217-regulator.c | 24 ++++++++----------------
 drivers/regulator/tps65912-regulator.c |  9 +++------
 drivers/regulator/wm831x-ldo.c         | 12 ++++--------
 drivers/regulator/wm8350-regulator.c   |  6 ++----
 drivers/regulator/wm8400-regulator.c   |  6 ++----
 include/linux/regulator/driver.h       |  2 --
 11 files changed, 43 insertions(+), 68 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/regulator/88pm800.c b/drivers/regulator/88pm800.c
index 3459f60dcfd1..22ba4c4280ee 100644
--- a/drivers/regulator/88pm800.c
+++ b/drivers/regulator/88pm800.c
@@ -141,18 +141,16 @@ struct pm800_regulators {
 
 /* Ranges are sorted in ascending order. */
 static const struct regulator_linear_range buck1_volt_range[] = {
-	{ .min_uV = 600000, .max_uV = 1587500, .min_sel = 0, .max_sel = 0x4f,
-	  .uV_step = 12500 },
-	{ .min_uV = 1600000, .max_uV = 1800000, .min_sel = 0x50,
-	  .max_sel = 0x54, .uV_step = 50000 },
+	{ .min_uV = 600000, .min_sel = 0, .max_sel = 0x4f, .uV_step = 12500 },
+	{ .min_uV = 1600000, .min_sel = 0x50, .max_sel = 0x54,
+	  .uV_step = 50000 },
 };
 
 /* BUCK 2~5 have same ranges. */
 static const struct regulator_linear_range buck2_5_volt_range[] = {
-	{ .min_uV = 600000, .max_uV = 1587500,	.min_sel = 0, .max_sel = 0x4f,
-	  .uV_step = 12500 },
-	{ .min_uV = 1600000, .max_uV = 3300000, .min_sel = 0x50,
-	  .max_sel = 0x72, .uV_step = 50000 },
+	{ .min_uV = 600000, .min_sel = 0, .max_sel = 0x4f, .uV_step = 12500 },
+	{ .min_uV = 1600000, .min_sel = 0x50, .max_sel = 0x72,
+	  .uV_step = 50000 },
 };
 
 static const unsigned int ldo1_volt_table[] = {
diff --git a/drivers/regulator/as3711-regulator.c b/drivers/regulator/as3711-regulator.c
index 8406cd745da2..d0a97e5ea431 100644
--- a/drivers/regulator/as3711-regulator.c
+++ b/drivers/regulator/as3711-regulator.c
@@ -117,26 +117,23 @@ static struct regulator_ops as3711_dldo_ops = {
 };
 
 static const struct regulator_linear_range as3711_sd_ranges[] = {
-	{ .min_uV = 612500, .max_uV = 1400000,
-	  .min_sel = 0x1, .max_sel = 0x40, .uV_step = 12500 },
-	{ .min_uV = 1425000, .max_uV = 2600000,
-	  .min_sel = 0x41, .max_sel = 0x70, .uV_step = 25000 },
-	{ .min_uV = 2650000, .max_uV = 3350000,
-	  .min_sel = 0x71, .max_sel = 0x7f, .uV_step = 50000 },
+	{ .min_uV = 612500, .min_sel = 0x1, .max_sel = 0x40, .uV_step = 12500 },
+	{ .min_uV = 1425000, .min_sel = 0x41, .max_sel = 0x70,
+	  .uV_step = 25000 },
+	{ .min_uV = 2650000, .min_sel = 0x71, .max_sel = 0x7f,
+	  .uV_step = 50000 },
 };
 
 static const struct regulator_linear_range as3711_aldo_ranges[] = {
-	{ .min_uV = 1200000, .max_uV = 1950000,
-	  .min_sel = 0, .max_sel = 0xf, .uV_step = 50000 },
-	{ .min_uV = 1800000, .max_uV = 3300000,
-	  .min_sel = 0x10, .max_sel = 0x1f, .uV_step = 100000 },
+	{ .min_uV = 1200000, .min_sel = 0, .max_sel = 0xf, .uV_step = 50000 },
+	{ .min_uV = 1800000, .min_sel = 0x10, .max_sel = 0x1f,
+	  .uV_step = 100000 },
 };
 
 static const struct regulator_linear_range as3711_dldo_ranges[] = {
-	{ .min_uV = 900000, .max_uV = 1700000,
-	  .min_sel = 0, .max_sel = 0x10, .uV_step = 50000 },
-	{ .min_uV = 1750000, .max_uV = 3300000,
-	  .min_sel = 0x20, .max_sel = 0x3f, .uV_step = 50000 },
+	{ .min_uV = 900000, .min_sel = 0, .max_sel = 0x10, .uV_step = 50000 },
+	{ .min_uV = 1750000, .min_sel = 0x20, .max_sel = 0x3f,
+	  .uV_step = 50000 },
 };
 
 #define AS3711_REG(_id, _en_reg, _en_bit, _vmask, _vshift, _min_uV, _max_uV, _sfx)	\
diff --git a/drivers/regulator/as3722-regulator.c b/drivers/regulator/as3722-regulator.c
index d7b71a9c41f1..240ae6d2ee2a 100644
--- a/drivers/regulator/as3722-regulator.c
+++ b/drivers/regulator/as3722-regulator.c
@@ -441,7 +441,6 @@ static struct regulator_ops as3722_ldo3_extcntrl_ops = {
 		.max_sel = _max_sel,					\
 		.uV_step = _step_uV,					\
 		.min_uV = _min_uV,					\
-		.max_uV = _min_uV + (_max_sel - _min_sel) * _step_uV,	\
 	}
 
 static const struct regulator_linear_range as3722_ldo_ranges[] = {
diff --git a/drivers/regulator/da903x.c b/drivers/regulator/da903x.c
index f06854cf8cf5..90d55e066447 100644
--- a/drivers/regulator/da903x.c
+++ b/drivers/regulator/da903x.c
@@ -253,10 +253,8 @@ static int da9034_set_dvc_voltage_sel(struct regulator_dev *rdev,
 }
 
 static const struct regulator_linear_range da9034_ldo12_ranges[] = {
-	{ .min_uV = 1700000, .max_uV = 2050000, .min_sel =  0, .max_sel = 7,
-	  .uV_step =  50000 },
-	{ .min_uV = 2700000, .max_uV = 3050000, .min_sel =  8, .max_sel = 15,
-	  .uV_step =  50000 },
+	{ .min_uV = 1700000, .min_sel =  0, .max_sel = 7, .uV_step =  50000 },
+	{ .min_uV = 2700000, .min_sel =  8, .max_sel = 15, .uV_step =  50000 },
 };
 
 static struct regulator_ops da903x_regulator_ldo_ops = {
diff --git a/drivers/regulator/helpers.c b/drivers/regulator/helpers.c
index 6e30df14714b..e221a271ba56 100644
--- a/drivers/regulator/helpers.c
+++ b/drivers/regulator/helpers.c
@@ -284,9 +284,13 @@ int regulator_map_voltage_linear_range(struct regulator_dev *rdev,
 	}
 
 	for (i = 0; i < rdev->desc->n_linear_ranges; i++) {
+		int linear_max_uV;
+
 		range = &rdev->desc->linear_ranges[i];
+		linear_max_uV = range->min_uV +
+			(range->max_sel - range->min_sel) * range->uV_step;
 
-		if (!(min_uV <= range->max_uV && max_uV >= range->min_uV))
+		if (!(min_uV <= linear_max_uV && max_uV >= range->min_uV))
 			continue;
 
 		if (min_uV <= range->min_uV)
diff --git a/drivers/regulator/tps65217-regulator.c b/drivers/regulator/tps65217-regulator.c
index 90861d68a0b0..484866f43681 100644
--- a/drivers/regulator/tps65217-regulator.c
+++ b/drivers/regulator/tps65217-regulator.c
@@ -52,25 +52,17 @@ static const unsigned int LDO1_VSEL_table[] = {
 };
 
 static const struct regulator_linear_range tps65217_uv1_ranges[] = {
-	{ .min_uV = 900000, .max_uV = 1500000, .min_sel =  0, .max_sel = 24,
-	  .uV_step = 25000 },
-	{ .min_uV = 1550000, .max_uV = 1800000, .min_sel = 25, .max_sel = 30,
-	  .uV_step = 50000 },
-	{ .min_uV = 1850000, .max_uV = 2900000, .min_sel = 31, .max_sel = 52,
-	  .uV_step = 50000 },
-	{ .min_uV = 3000000, .max_uV = 3200000, .min_sel = 53, .max_sel = 55,
-	  .uV_step = 100000 },
-	{ .min_uV = 3300000, .max_uV = 3300000, .min_sel = 56, .max_sel = 62,
-	  .uV_step = 0 },
+	{ .min_uV = 900000, .min_sel =  0, .max_sel = 24, .uV_step = 25000 },
+	{ .min_uV = 1550000, .min_sel = 25, .max_sel = 30, .uV_step = 50000 },
+	{ .min_uV = 1850000, .min_sel = 31, .max_sel = 52, .uV_step = 50000 },
+	{ .min_uV = 3000000, .min_sel = 53, .max_sel = 55, .uV_step = 100000 },
+	{ .min_uV = 3300000, .min_sel = 56, .max_sel = 62, .uV_step = 0 },
 };
 
 static const struct regulator_linear_range tps65217_uv2_ranges[] = {
-	{ .min_uV = 1500000, .max_uV = 1900000, .min_sel =  0, .max_sel = 8,
-	  .uV_step = 50000 },
-	{ .min_uV = 2000000, .max_uV = 2400000, .min_sel = 9, .max_sel = 13,
-	  .uV_step = 100000 },
-	{ .min_uV = 2450000, .max_uV = 3300000, .min_sel = 14, .max_sel = 31,
-	  .uV_step = 50000 },
+	{ .min_uV = 1500000, .min_sel =  0, .max_sel = 8, .uV_step = 50000 },
+	{ .min_uV = 2000000, .min_sel = 9, .max_sel = 13, .uV_step = 100000 },
+	{ .min_uV = 2450000, .min_sel = 14, .max_sel = 31, .uV_step = 50000 },
 };
 
 static int tps65217_pmic_enable(struct regulator_dev *dev)
diff --git a/drivers/regulator/tps65912-regulator.c b/drivers/regulator/tps65912-regulator.c
index 281e52ac64ba..9fc87d8c9ce5 100644
--- a/drivers/regulator/tps65912-regulator.c
+++ b/drivers/regulator/tps65912-regulator.c
@@ -119,12 +119,9 @@ struct tps65912_reg {
 };
 
 static const struct regulator_linear_range tps65912_ldo_ranges[] = {
-	{ .min_uV = 800000, .max_uV = 1600000, .min_sel =  0, .max_sel = 32,
-	  .uV_step = 25000 },
-	{ .min_uV = 1650000, .max_uV = 3000000, .min_sel = 33, .max_sel = 60,
-	  .uV_step = 50000 },
-	{ .min_uV = 3100000, .max_uV = 3300000, .min_sel = 61, .max_sel = 63,
-	  .uV_step = 100000 },
+	{ .min_uV = 800000, .min_sel =  0, .max_sel = 32, .uV_step = 25000 },
+	{ .min_uV = 1650000, .min_sel = 33, .max_sel = 60, .uV_step = 50000 },
+	{ .min_uV = 3100000, .min_sel = 61, .max_sel = 63, .uV_step = 100000 },
 };
 
 static int tps65912_get_range(struct tps65912_reg *pmic, int id)
diff --git a/drivers/regulator/wm831x-ldo.c b/drivers/regulator/wm831x-ldo.c
index 2205fbc2c37b..a95814027b24 100644
--- a/drivers/regulator/wm831x-ldo.c
+++ b/drivers/regulator/wm831x-ldo.c
@@ -63,10 +63,8 @@ static irqreturn_t wm831x_ldo_uv_irq(int irq, void *data)
  */
 
 static const struct regulator_linear_range wm831x_gp_ldo_ranges[] = {
-	{ .min_uV =  900000, .max_uV = 1600000, .min_sel =  0, .max_sel = 14,
-	  .uV_step =  50000 },
-	{ .min_uV = 1700000, .max_uV = 3300000, .min_sel = 15, .max_sel = 31,
-	  .uV_step = 100000 },
+	{ .min_uV =  900000, .min_sel =  0, .max_sel = 14, .uV_step =  50000 },
+	{ .min_uV = 1700000, .min_sel = 15, .max_sel = 31, .uV_step = 100000 },
 };
 
 static int wm831x_gp_ldo_set_suspend_voltage(struct regulator_dev *rdev,
@@ -332,10 +330,8 @@ static struct platform_driver wm831x_gp_ldo_driver = {
  */
 
 static const struct regulator_linear_range wm831x_aldo_ranges[] = {
-	{ .min_uV = 1000000, .max_uV = 1600000, .min_sel =  0, .max_sel = 12,
-	  .uV_step =  50000 },
-	{ .min_uV = 1700000, .max_uV = 3500000, .min_sel = 13, .max_sel = 31,
-	  .uV_step = 100000 },
+	{ .min_uV = 1000000, .min_sel =  0, .max_sel = 12, .uV_step =  50000 },
+	{ .min_uV = 1700000, .min_sel = 13, .max_sel = 31, .uV_step = 100000 },
 };
 
 static int wm831x_aldo_set_suspend_voltage(struct regulator_dev *rdev,
diff --git a/drivers/regulator/wm8350-regulator.c b/drivers/regulator/wm8350-regulator.c
index 61ca9292a429..de9de26d0bfc 100644
--- a/drivers/regulator/wm8350-regulator.c
+++ b/drivers/regulator/wm8350-regulator.c
@@ -543,10 +543,8 @@ static int wm8350_dcdc_set_suspend_mode(struct regulator_dev *rdev,
 }
 
 static const struct regulator_linear_range wm8350_ldo_ranges[] = {
-	{ .min_uV =  900000, .max_uV = 1650000, .min_sel =  0, .max_sel = 15,
-	  .uV_step =  50000 },
-	{ .min_uV = 1800000, .max_uV = 3300000, .min_sel = 16, .max_sel = 31,
-	  .uV_step = 100000 },
+	{ .min_uV =  900000, .min_sel =  0, .max_sel = 15, .uV_step =  50000 },
+	{ .min_uV = 1800000, .min_sel = 16, .max_sel = 31, .uV_step = 100000 },
 };
 
 static int wm8350_ldo_set_suspend_voltage(struct regulator_dev *rdev, int uV)
diff --git a/drivers/regulator/wm8400-regulator.c b/drivers/regulator/wm8400-regulator.c
index 58f51bec13f2..3352b2090ed5 100644
--- a/drivers/regulator/wm8400-regulator.c
+++ b/drivers/regulator/wm8400-regulator.c
@@ -20,10 +20,8 @@
 #include <linux/mfd/wm8400-private.h>
 
 static const struct regulator_linear_range wm8400_ldo_ranges[] = {
-	{ .min_uV =  900000, .max_uV = 1600000, .min_sel = 0, .max_sel = 14,
-	  .uV_step =  50000 },
-	{ .min_uV = 1700000, .max_uV = 3300000, .min_sel = 15, .max_sel = 31,
-	  .uV_step = 100000 },
+	{ .min_uV =  900000, .min_sel = 0, .max_sel = 14, .uV_step =  50000 },
+	{ .min_uV = 1700000, .min_sel = 15, .max_sel = 31, .uV_step = 100000 },
 };
 
 static struct regulator_ops wm8400_ldo_ops = {
diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index 9bdad43ad228..997ff5c4d880 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h
@@ -46,14 +46,12 @@ enum regulator_status {
  * regulator_list_linear_range().
  *
  * @min_uV:  Lowest voltage in range
- * @max_uV:  Highest voltage in range
  * @min_sel: Lowest selector for range
  * @max_sel: Highest selector for range
  * @uV_step: Step size
  */
 struct regulator_linear_range {
 	unsigned int min_uV;
-	unsigned int max_uV;
 	unsigned int min_sel;
 	unsigned int max_sel;
 	unsigned int uV_step;
-- 
cgit v1.2.3


From 8828bae464b129abed95b748263f1ab53bdc5755 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Fri, 11 Oct 2013 09:32:18 +0800
Subject: regulator: Add REGULATOR_LINEAR_RANGE macro

Add REGULATOR_LINEAR_RANGE macro and convert regulator drivers to use it.

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 drivers/regulator/88pm800.c            | 10 ++++------
 drivers/regulator/as3711-regulator.c   | 18 +++++++-----------
 drivers/regulator/as3722-regulator.c   | 18 +++++-------------
 drivers/regulator/da903x.c             |  4 ++--
 drivers/regulator/tps65217-regulator.c | 16 ++++++++--------
 drivers/regulator/tps65912-regulator.c |  6 +++---
 drivers/regulator/wm831x-ldo.c         |  8 ++++----
 drivers/regulator/wm8350-regulator.c   |  4 ++--
 drivers/regulator/wm8400-regulator.c   |  4 ++--
 include/linux/regulator/driver.h       |  9 +++++++++
 10 files changed, 46 insertions(+), 51 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/regulator/88pm800.c b/drivers/regulator/88pm800.c
index 22ba4c4280ee..d333f7eac106 100644
--- a/drivers/regulator/88pm800.c
+++ b/drivers/regulator/88pm800.c
@@ -141,16 +141,14 @@ struct pm800_regulators {
 
 /* Ranges are sorted in ascending order. */
 static const struct regulator_linear_range buck1_volt_range[] = {
-	{ .min_uV = 600000, .min_sel = 0, .max_sel = 0x4f, .uV_step = 12500 },
-	{ .min_uV = 1600000, .min_sel = 0x50, .max_sel = 0x54,
-	  .uV_step = 50000 },
+	REGULATOR_LINEAR_RANGE(600000, 0, 0x4f, 12500),
+	REGULATOR_LINEAR_RANGE(1600000, 0x50, 0x54, 50000),
 };
 
 /* BUCK 2~5 have same ranges. */
 static const struct regulator_linear_range buck2_5_volt_range[] = {
-	{ .min_uV = 600000, .min_sel = 0, .max_sel = 0x4f, .uV_step = 12500 },
-	{ .min_uV = 1600000, .min_sel = 0x50, .max_sel = 0x72,
-	  .uV_step = 50000 },
+	REGULATOR_LINEAR_RANGE(600000, 0, 0x4f, 12500),
+	REGULATOR_LINEAR_RANGE(1600000, 0x50, 0x72, 50000),
 };
 
 static const unsigned int ldo1_volt_table[] = {
diff --git a/drivers/regulator/as3711-regulator.c b/drivers/regulator/as3711-regulator.c
index d0a97e5ea431..f8524f988bdc 100644
--- a/drivers/regulator/as3711-regulator.c
+++ b/drivers/regulator/as3711-regulator.c
@@ -117,23 +117,19 @@ static struct regulator_ops as3711_dldo_ops = {
 };
 
 static const struct regulator_linear_range as3711_sd_ranges[] = {
-	{ .min_uV = 612500, .min_sel = 0x1, .max_sel = 0x40, .uV_step = 12500 },
-	{ .min_uV = 1425000, .min_sel = 0x41, .max_sel = 0x70,
-	  .uV_step = 25000 },
-	{ .min_uV = 2650000, .min_sel = 0x71, .max_sel = 0x7f,
-	  .uV_step = 50000 },
+	REGULATOR_LINEAR_RANGE(612500, 0x1, 0x40, 12500),
+	REGULATOR_LINEAR_RANGE(1425000, 0x41, 0x70, 25000),
+	REGULATOR_LINEAR_RANGE(2650000, 0x71, 0x7f, 50000),
 };
 
 static const struct regulator_linear_range as3711_aldo_ranges[] = {
-	{ .min_uV = 1200000, .min_sel = 0, .max_sel = 0xf, .uV_step = 50000 },
-	{ .min_uV = 1800000, .min_sel = 0x10, .max_sel = 0x1f,
-	  .uV_step = 100000 },
+	REGULATOR_LINEAR_RANGE(1200000, 0, 0xf, 50000),
+	REGULATOR_LINEAR_RANGE(1800000, 0x10, 0x1f, 100000),
 };
 
 static const struct regulator_linear_range as3711_dldo_ranges[] = {
-	{ .min_uV = 900000, .min_sel = 0, .max_sel = 0x10, .uV_step = 50000 },
-	{ .min_uV = 1750000, .min_sel = 0x20, .max_sel = 0x3f,
-	  .uV_step = 50000 },
+	REGULATOR_LINEAR_RANGE(900000, 0, 0x10, 50000),
+	REGULATOR_LINEAR_RANGE(1750000, 0x20, 0x3f, 50000),
 };
 
 #define AS3711_REG(_id, _en_reg, _en_bit, _vmask, _vshift, _min_uV, _max_uV, _sfx)	\
diff --git a/drivers/regulator/as3722-regulator.c b/drivers/regulator/as3722-regulator.c
index 240ae6d2ee2a..5917fe3dc983 100644
--- a/drivers/regulator/as3722-regulator.c
+++ b/drivers/regulator/as3722-regulator.c
@@ -435,17 +435,9 @@ static struct regulator_ops as3722_ldo3_extcntrl_ops = {
 	.get_current_limit = as3722_ldo3_get_current_limit,
 };
 
-#define regulator_lin_range(_min_sel, _max_sel, _min_uV, _step_uV)	\
-	{								\
-		.min_sel = _min_sel,					\
-		.max_sel = _max_sel,					\
-		.uV_step = _step_uV,					\
-		.min_uV = _min_uV,					\
-	}
-
 static const struct regulator_linear_range as3722_ldo_ranges[] = {
-	regulator_lin_range(0x01, 0x24,  825000, 25000),
-	regulator_lin_range(0x40, 0x7F, 1725000, 25000),
+	REGULATOR_LINEAR_RANGE(825000, 0x01, 0x24, 25000),
+	REGULATOR_LINEAR_RANGE(1725000, 0x40, 0x7F, 25000),
 };
 
 static struct regulator_ops as3722_ldo_ops = {
@@ -604,9 +596,9 @@ static int as3722_sd016_set_current_limit(struct regulator_dev *rdev,
 }
 
 static const struct regulator_linear_range as3722_sd2345_ranges[] = {
-	regulator_lin_range(0x01, 0x40,  612500, 12500),
-	regulator_lin_range(0x41, 0x70, 1425000, 25000),
-	regulator_lin_range(0x71, 0x7F, 2650000, 50000),
+	REGULATOR_LINEAR_RANGE(612500, 0x01, 0x40, 12500),
+	REGULATOR_LINEAR_RANGE(1425000, 0x41, 0x70, 25000),
+	REGULATOR_LINEAR_RANGE(2650000, 0x71, 0x7F, 50000),
 };
 
 static struct regulator_ops as3722_sd016_ops = {
diff --git a/drivers/regulator/da903x.c b/drivers/regulator/da903x.c
index 90d55e066447..6d1b799ad86b 100644
--- a/drivers/regulator/da903x.c
+++ b/drivers/regulator/da903x.c
@@ -253,8 +253,8 @@ static int da9034_set_dvc_voltage_sel(struct regulator_dev *rdev,
 }
 
 static const struct regulator_linear_range da9034_ldo12_ranges[] = {
-	{ .min_uV = 1700000, .min_sel =  0, .max_sel = 7, .uV_step =  50000 },
-	{ .min_uV = 2700000, .min_sel =  8, .max_sel = 15, .uV_step =  50000 },
+	REGULATOR_LINEAR_RANGE(1700000, 0, 7, 50000),
+	REGULATOR_LINEAR_RANGE(2700000, 8, 15, 50000),
 };
 
 static struct regulator_ops da903x_regulator_ldo_ops = {
diff --git a/drivers/regulator/tps65217-regulator.c b/drivers/regulator/tps65217-regulator.c
index 484866f43681..bbc7277fa097 100644
--- a/drivers/regulator/tps65217-regulator.c
+++ b/drivers/regulator/tps65217-regulator.c
@@ -52,17 +52,17 @@ static const unsigned int LDO1_VSEL_table[] = {
 };
 
 static const struct regulator_linear_range tps65217_uv1_ranges[] = {
-	{ .min_uV = 900000, .min_sel =  0, .max_sel = 24, .uV_step = 25000 },
-	{ .min_uV = 1550000, .min_sel = 25, .max_sel = 30, .uV_step = 50000 },
-	{ .min_uV = 1850000, .min_sel = 31, .max_sel = 52, .uV_step = 50000 },
-	{ .min_uV = 3000000, .min_sel = 53, .max_sel = 55, .uV_step = 100000 },
-	{ .min_uV = 3300000, .min_sel = 56, .max_sel = 62, .uV_step = 0 },
+	REGULATOR_LINEAR_RANGE(900000, 0, 24, 25000),
+	REGULATOR_LINEAR_RANGE(1550000, 25, 30, 50000),
+	REGULATOR_LINEAR_RANGE(1850000, 31, 52, 50000),
+	REGULATOR_LINEAR_RANGE(3000000, 53, 55, 100000),
+	REGULATOR_LINEAR_RANGE(3300000, 56, 62, 0),
 };
 
 static const struct regulator_linear_range tps65217_uv2_ranges[] = {
-	{ .min_uV = 1500000, .min_sel =  0, .max_sel = 8, .uV_step = 50000 },
-	{ .min_uV = 2000000, .min_sel = 9, .max_sel = 13, .uV_step = 100000 },
-	{ .min_uV = 2450000, .min_sel = 14, .max_sel = 31, .uV_step = 50000 },
+	REGULATOR_LINEAR_RANGE(1500000, 0, 8, 50000),
+	REGULATOR_LINEAR_RANGE(2000000, 9, 13, 100000),
+	REGULATOR_LINEAR_RANGE(2450000, 14, 31, 50000),
 };
 
 static int tps65217_pmic_enable(struct regulator_dev *dev)
diff --git a/drivers/regulator/tps65912-regulator.c b/drivers/regulator/tps65912-regulator.c
index 9fc87d8c9ce5..697eab8e74d8 100644
--- a/drivers/regulator/tps65912-regulator.c
+++ b/drivers/regulator/tps65912-regulator.c
@@ -119,9 +119,9 @@ struct tps65912_reg {
 };
 
 static const struct regulator_linear_range tps65912_ldo_ranges[] = {
-	{ .min_uV = 800000, .min_sel =  0, .max_sel = 32, .uV_step = 25000 },
-	{ .min_uV = 1650000, .min_sel = 33, .max_sel = 60, .uV_step = 50000 },
-	{ .min_uV = 3100000, .min_sel = 61, .max_sel = 63, .uV_step = 100000 },
+	REGULATOR_LINEAR_RANGE(800000, 0, 32, 25000),
+	REGULATOR_LINEAR_RANGE(1650000, 33, 60, 50000),
+	REGULATOR_LINEAR_RANGE(3100000, 61, 63, 100000),
 };
 
 static int tps65912_get_range(struct tps65912_reg *pmic, int id)
diff --git a/drivers/regulator/wm831x-ldo.c b/drivers/regulator/wm831x-ldo.c
index a95814027b24..9111b651c353 100644
--- a/drivers/regulator/wm831x-ldo.c
+++ b/drivers/regulator/wm831x-ldo.c
@@ -63,8 +63,8 @@ static irqreturn_t wm831x_ldo_uv_irq(int irq, void *data)
  */
 
 static const struct regulator_linear_range wm831x_gp_ldo_ranges[] = {
-	{ .min_uV =  900000, .min_sel =  0, .max_sel = 14, .uV_step =  50000 },
-	{ .min_uV = 1700000, .min_sel = 15, .max_sel = 31, .uV_step = 100000 },
+	REGULATOR_LINEAR_RANGE(900000, 0, 14, 50000),
+	REGULATOR_LINEAR_RANGE(1700000, 15, 31, 100000),
 };
 
 static int wm831x_gp_ldo_set_suspend_voltage(struct regulator_dev *rdev,
@@ -330,8 +330,8 @@ static struct platform_driver wm831x_gp_ldo_driver = {
  */
 
 static const struct regulator_linear_range wm831x_aldo_ranges[] = {
-	{ .min_uV = 1000000, .min_sel =  0, .max_sel = 12, .uV_step =  50000 },
-	{ .min_uV = 1700000, .min_sel = 13, .max_sel = 31, .uV_step = 100000 },
+	REGULATOR_LINEAR_RANGE(1000000, 0, 12, 50000),
+	REGULATOR_LINEAR_RANGE(1700000, 13, 31, 100000),
 };
 
 static int wm831x_aldo_set_suspend_voltage(struct regulator_dev *rdev,
diff --git a/drivers/regulator/wm8350-regulator.c b/drivers/regulator/wm8350-regulator.c
index de9de26d0bfc..3827539f3ad7 100644
--- a/drivers/regulator/wm8350-regulator.c
+++ b/drivers/regulator/wm8350-regulator.c
@@ -543,8 +543,8 @@ static int wm8350_dcdc_set_suspend_mode(struct regulator_dev *rdev,
 }
 
 static const struct regulator_linear_range wm8350_ldo_ranges[] = {
-	{ .min_uV =  900000, .min_sel =  0, .max_sel = 15, .uV_step =  50000 },
-	{ .min_uV = 1800000, .min_sel = 16, .max_sel = 31, .uV_step = 100000 },
+	REGULATOR_LINEAR_RANGE(900000, 0, 15, 50000),
+	REGULATOR_LINEAR_RANGE(1800000, 16, 31, 100000),
 };
 
 static int wm8350_ldo_set_suspend_voltage(struct regulator_dev *rdev, int uV)
diff --git a/drivers/regulator/wm8400-regulator.c b/drivers/regulator/wm8400-regulator.c
index 3352b2090ed5..8eedba2953f9 100644
--- a/drivers/regulator/wm8400-regulator.c
+++ b/drivers/regulator/wm8400-regulator.c
@@ -20,8 +20,8 @@
 #include <linux/mfd/wm8400-private.h>
 
 static const struct regulator_linear_range wm8400_ldo_ranges[] = {
-	{ .min_uV =  900000, .min_sel = 0, .max_sel = 14, .uV_step =  50000 },
-	{ .min_uV = 1700000, .min_sel = 15, .max_sel = 31, .uV_step = 100000 },
+	REGULATOR_LINEAR_RANGE(900000, 0, 14, 50000),
+	REGULATOR_LINEAR_RANGE(1700000, 15, 31, 100000),
 };
 
 static struct regulator_ops wm8400_ldo_ops = {
diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index 997ff5c4d880..edb11b716dd3 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h
@@ -57,6 +57,15 @@ struct regulator_linear_range {
 	unsigned int uV_step;
 };
 
+/* Initialize struct regulator_linear_range */
+#define REGULATOR_LINEAR_RANGE(_min_uV, _min_sel, _max_sel, _step_uV)	\
+{									\
+	.min_uV		= _min_uV,					\
+	.min_sel	= _min_sel,					\
+	.max_sel	= _max_sel,					\
+	.uV_step	= _step_uV,					\
+}
+
 /**
  * struct regulator_ops - regulator operations.
  *
-- 
cgit v1.2.3


From 2841a5fc375e9c573d10b82db30fa8a4cc25301c Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@linaro.org>
Date: Sat, 5 Oct 2013 00:23:12 +0100
Subject: spi: Provide per-message prepare and unprepare operations

Many SPI drivers perform setup and tear down on every message, usually
doing things like DMA mapping the message. Provide hooks for them to use
to provide such operations.

This is of limited value for drivers that implement transfer_one_message()
but will be of much greater utility with future factoring out of standard
implementations of that function.

Signed-off-by: Mark Brown <broonie@linaro.org>
---
 drivers/spi/spi.c       | 22 ++++++++++++++++++++++
 include/linux/spi/spi.h | 11 +++++++++++
 2 files changed, 33 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 8bef0c9a7233..8a30c6b66a64 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -606,6 +606,18 @@ static void spi_pump_messages(struct kthread_work *work)
 
 	trace_spi_message_start(master->cur_msg);
 
+	if (master->prepare_message) {
+		ret = master->prepare_message(master, master->cur_msg);
+		if (ret) {
+			dev_err(&master->dev,
+				"failed to prepare message: %d\n", ret);
+			master->cur_msg->status = ret;
+			spi_finalize_current_message(master);
+			return;
+		}
+		master->cur_msg_prepared = true;
+	}
+
 	ret = master->transfer_one_message(master, master->cur_msg);
 	if (ret) {
 		dev_err(&master->dev,
@@ -687,6 +699,7 @@ void spi_finalize_current_message(struct spi_master *master)
 {
 	struct spi_message *mesg;
 	unsigned long flags;
+	int ret;
 
 	spin_lock_irqsave(&master->queue_lock, flags);
 	mesg = master->cur_msg;
@@ -695,6 +708,15 @@ void spi_finalize_current_message(struct spi_master *master)
 	queue_kthread_work(&master->kworker, &master->pump_messages);
 	spin_unlock_irqrestore(&master->queue_lock, flags);
 
+	if (master->cur_msg_prepared && master->unprepare_message) {
+		ret = master->unprepare_message(master, mesg);
+		if (ret) {
+			dev_err(&master->dev,
+				"failed to unprepare message: %d\n", ret);
+		}
+	}
+	master->cur_msg_prepared = false;
+
 	mesg->state = NULL;
 	if (mesg->complete)
 		mesg->complete(mesg->context);
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 887116dbce2c..000b50bee6c0 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -257,6 +257,8 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv)
  * @queue_lock: spinlock to syncronise access to message queue
  * @queue: message queue
  * @cur_msg: the currently in-flight message
+ * @cur_msg_prepared: spi_prepare_message was called for the currently
+ *                    in-flight message
  * @busy: message pump is busy
  * @running: message pump is running
  * @rt: whether this queue is set to run as a realtime task
@@ -274,6 +276,10 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv)
  * @unprepare_transfer_hardware: there are currently no more messages on the
  *	queue so the subsystem notifies the driver that it may relax the
  *	hardware by issuing this call
+ * @prepare_message: set up the controller to transfer a single message,
+ *                   for example doing DMA mapping.  Called from threaded
+ *                   context.
+ * @unprepare_message: undo any work done by prepare_message().
  * @cs_gpios: Array of GPIOs to use as chip select lines; one per CS
  *	number. Any individual value may be -ENOENT for CS lines that
  *	are not GPIOs (driven by the SPI controller itself).
@@ -388,11 +394,16 @@ struct spi_master {
 	bool				running;
 	bool				rt;
 	bool				auto_runtime_pm;
+	bool                            cur_msg_prepared;
 
 	int (*prepare_transfer_hardware)(struct spi_master *master);
 	int (*transfer_one_message)(struct spi_master *master,
 				    struct spi_message *mesg);
 	int (*unprepare_transfer_hardware)(struct spi_master *master);
+	int (*prepare_message)(struct spi_master *master,
+			       struct spi_message *message);
+	int (*unprepare_message)(struct spi_master *master,
+				 struct spi_message *message);
 
 	/* gpio chip select */
 	int			*cs_gpios;
-- 
cgit v1.2.3


From b158935f70b9c156903338053216dd0adf7ce31c Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@linaro.org>
Date: Sat, 5 Oct 2013 11:50:40 +0100
Subject: spi: Provide common spi_message processing loop

The loops which SPI controller drivers use to process the list of transfers
in a spi_message are typically very similar and have some error prone areas
such as the handling of /CS. Help simplify drivers by factoring this code
out into the core - if drivers provide a transfer_one() function instead
of a transfer_one_message() function the core will handle processing at the
message level.

/CS can be controlled by either setting cs_gpio or providing a set_cs
function. If this is not possible for hardware reasons then both can be
omitted and the driver should continue to implement manual /CS handling.

This is a first step in refactoring and it is expected that there will be
further enhancements, for example factoring out of the mapping of transfers
for DMA and the initiation and completion of interrupt driven transfers.

Signed-off-by: Mark Brown <broonie@linaro.org>
---
 drivers/spi/spi.c          | 92 ++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/spi/spi.h    | 21 ++++++++++-
 include/trace/events/spi.h | 42 +++++++++++++++++++++
 3 files changed, 153 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 8a30c6b66a64..85c18d8a86b3 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -526,6 +526,95 @@ int spi_register_board_info(struct spi_board_info const *info, unsigned n)
 
 /*-------------------------------------------------------------------------*/
 
+static void spi_set_cs(struct spi_device *spi, bool enable)
+{
+	if (spi->mode & SPI_CS_HIGH)
+		enable = !enable;
+
+	if (spi->cs_gpio >= 0)
+		gpio_set_value(spi->cs_gpio, !enable);
+	else if (spi->master->set_cs)
+		spi->master->set_cs(spi, !enable);
+}
+
+/*
+ * spi_transfer_one_message - Default implementation of transfer_one_message()
+ *
+ * This is a standard implementation of transfer_one_message() for
+ * drivers which impelment a transfer_one() operation.  It provides
+ * standard handling of delays and chip select management.
+ */
+static int spi_transfer_one_message(struct spi_master *master,
+				    struct spi_message *msg)
+{
+	struct spi_transfer *xfer;
+	bool cur_cs = true;
+	bool keep_cs = false;
+	int ret = 0;
+
+	spi_set_cs(msg->spi, true);
+
+	list_for_each_entry(xfer, &msg->transfers, transfer_list) {
+		trace_spi_transfer_start(msg, xfer);
+
+		INIT_COMPLETION(master->xfer_completion);
+
+		ret = master->transfer_one(master, msg->spi, xfer);
+		if (ret < 0) {
+			dev_err(&msg->spi->dev,
+				"SPI transfer failed: %d\n", ret);
+			goto out;
+		}
+
+		if (ret > 0)
+			wait_for_completion(&master->xfer_completion);
+
+		trace_spi_transfer_stop(msg, xfer);
+
+		if (msg->status != -EINPROGRESS)
+			goto out;
+
+		if (xfer->delay_usecs)
+			udelay(xfer->delay_usecs);
+
+		if (xfer->cs_change) {
+			if (list_is_last(&xfer->transfer_list,
+					 &msg->transfers)) {
+				keep_cs = true;
+			} else {
+				cur_cs = !cur_cs;
+				spi_set_cs(msg->spi, cur_cs);
+			}
+		}
+
+		msg->actual_length += xfer->len;
+	}
+
+out:
+	if (ret != 0 || !keep_cs)
+		spi_set_cs(msg->spi, false);
+
+	if (msg->status == -EINPROGRESS)
+		msg->status = ret;
+
+	spi_finalize_current_message(master);
+
+	return ret;
+}
+
+/**
+ * spi_finalize_current_transfer - report completion of a transfer
+ *
+ * Called by SPI drivers using the core transfer_one_message()
+ * implementation to notify it that the current interrupt driven
+ * transfer has finised and the next one may be scheduled.
+ */
+void spi_finalize_current_transfer(struct spi_master *master)
+{
+	complete(&master->xfer_completion);
+}
+EXPORT_SYMBOL_GPL(spi_finalize_current_transfer);
+
 /**
  * spi_pump_messages - kthread work function which processes spi message queue
  * @work: pointer to kthread work struct contained in the master struct
@@ -836,6 +925,8 @@ static int spi_master_initialize_queue(struct spi_master *master)
 
 	master->queued = true;
 	master->transfer = spi_queued_transfer;
+	if (!master->transfer_one_message)
+		master->transfer_one_message = spi_transfer_one_message;
 
 	/* Initialize and start queue */
 	ret = spi_init_queue(master);
@@ -1242,6 +1333,7 @@ int spi_register_master(struct spi_master *master)
 	spin_lock_init(&master->bus_lock_spinlock);
 	mutex_init(&master->bus_lock_mutex);
 	master->bus_lock_flag = 0;
+	init_completion(&master->xfer_completion);
 
 	/* register the device, then userspace will see it.
 	 * registration fails if the bus ID is in use.
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 000b50bee6c0..da371ab5ebeb 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -23,6 +23,7 @@
 #include <linux/mod_devicetable.h>
 #include <linux/slab.h>
 #include <linux/kthread.h>
+#include <linux/completion.h>
 
 /*
  * INTERFACES between SPI master-side drivers and SPI infrastructure.
@@ -150,8 +151,7 @@ static inline void *spi_get_drvdata(struct spi_device *spi)
 }
 
 struct spi_message;
-
-
+struct spi_transfer;
 
 /**
  * struct spi_driver - Host side "protocol" driver
@@ -259,6 +259,7 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv)
  * @cur_msg: the currently in-flight message
  * @cur_msg_prepared: spi_prepare_message was called for the currently
  *                    in-flight message
+ * @xfer_completion: used by core tranfer_one_message()
  * @busy: message pump is busy
  * @running: message pump is running
  * @rt: whether this queue is set to run as a realtime task
@@ -276,9 +277,15 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv)
  * @unprepare_transfer_hardware: there are currently no more messages on the
  *	queue so the subsystem notifies the driver that it may relax the
  *	hardware by issuing this call
+ * @set_cs: assert or deassert chip select, true to assert.  May be called
+ *          from interrupt context.
  * @prepare_message: set up the controller to transfer a single message,
  *                   for example doing DMA mapping.  Called from threaded
  *                   context.
+ * @transfer_one: transfer a single spi_transfer. When the
+ *	          driver is finished with this transfer it must call
+ *	          spi_finalize_current_transfer() so the subsystem can issue
+ *                the next transfer
  * @unprepare_message: undo any work done by prepare_message().
  * @cs_gpios: Array of GPIOs to use as chip select lines; one per CS
  *	number. Any individual value may be -ENOENT for CS lines that
@@ -395,6 +402,7 @@ struct spi_master {
 	bool				rt;
 	bool				auto_runtime_pm;
 	bool                            cur_msg_prepared;
+	struct completion               xfer_completion;
 
 	int (*prepare_transfer_hardware)(struct spi_master *master);
 	int (*transfer_one_message)(struct spi_master *master,
@@ -405,6 +413,14 @@ struct spi_master {
 	int (*unprepare_message)(struct spi_master *master,
 				 struct spi_message *message);
 
+	/*
+	 * These hooks are for drivers that use a generic implementation
+	 * of transfer_one_message() provied by the core.
+	 */
+	void (*set_cs)(struct spi_device *spi, bool enable);
+	int (*transfer_one)(struct spi_master *master, struct spi_device *spi,
+			    struct spi_transfer *transfer);
+
 	/* gpio chip select */
 	int			*cs_gpios;
 };
@@ -439,6 +455,7 @@ extern int spi_master_resume(struct spi_master *master);
 /* Calls the driver make to interact with the message queue */
 extern struct spi_message *spi_get_next_queued_message(struct spi_master *master);
 extern void spi_finalize_current_message(struct spi_master *master);
+extern void spi_finalize_current_transfer(struct spi_master *master);
 
 /* the spi driver core manages memory for the spi_master classdev */
 extern struct spi_master *
diff --git a/include/trace/events/spi.h b/include/trace/events/spi.h
index 5e77e21f885a..7e02c983bbe2 100644
--- a/include/trace/events/spi.h
+++ b/include/trace/events/spi.h
@@ -108,6 +108,48 @@ TRACE_EVENT(spi_message_done,
                   (unsigned)__entry->actual, (unsigned)__entry->frame)
 );
 
+DECLARE_EVENT_CLASS(spi_transfer,
+
+	TP_PROTO(struct spi_message *msg, struct spi_transfer *xfer),
+
+	TP_ARGS(msg, xfer),
+
+	TP_STRUCT__entry(
+		__field(        int,            bus_num         )
+		__field(        int,            chip_select     )
+		__field(        struct spi_transfer *,   xfer   )
+		__field(        int,            len             )
+	),
+
+	TP_fast_assign(
+		__entry->bus_num = msg->spi->master->bus_num;
+		__entry->chip_select = msg->spi->chip_select;
+		__entry->xfer = xfer;
+		__entry->len = xfer->len;
+	),
+
+        TP_printk("spi%d.%d %p len=%d", (int)__entry->bus_num,
+		  (int)__entry->chip_select,
+		  (struct spi_message *)__entry->xfer,
+		  (int)__entry->len)
+);
+
+DEFINE_EVENT(spi_transfer, spi_transfer_start,
+
+	TP_PROTO(struct spi_message *msg, struct spi_transfer *xfer),
+
+	TP_ARGS(msg, xfer)
+
+);
+
+DEFINE_EVENT(spi_transfer, spi_transfer_stop,
+
+	TP_PROTO(struct spi_message *msg, struct spi_transfer *xfer),
+
+	TP_ARGS(msg, xfer)
+
+);
+
 #endif /* _TRACE_POWER_H */
 
 /* This part must be outside protection */
-- 
cgit v1.2.3


From 9ef73dbdd0fc292d183e93cd1d4b21d1a66040d7 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Wed, 9 Oct 2013 17:01:40 +0200
Subject: usb-anchor: Ensure poisened gets initialized to 0

And do so in a way which ensures that any fields added in the future will
also get properly zero-ed.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Acked-by: Oliver Neukum <oliver@neukum.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/usb.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/usb.h b/include/linux/usb.h
index f726c39097e0..fa8bedf06c6e 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -1214,6 +1214,7 @@ struct usb_anchor {
 
 static inline void init_usb_anchor(struct usb_anchor *anchor)
 {
+	memset(anchor, 0, sizeof(*anchor));
 	INIT_LIST_HEAD(&anchor->urb_list);
 	init_waitqueue_head(&anchor->wait);
 	spin_lock_init(&anchor->lock);
-- 
cgit v1.2.3


From 6ec4147e7bdbde168f5bce30de5984aa4f971b22 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Wed, 9 Oct 2013 17:01:41 +0200
Subject: usb-anchor: Delay usb_wait_anchor_empty_timeout wake up till
 completion is done

usb_wait_anchor_empty_timeout() should wait till the completion handler
has run. Both the zd1211rw driver and the uas driver (in its task mgmt) depend
on the completion handler having completed when usb_wait_anchor_empty_timeout()
returns, as they read state set by the completion handler after an
usb_wait_anchor_empty_timeout() call.

But __usb_hcd_giveback_urb() calls usb_unanchor_urb before calling the
completion handler. This is necessary as the completion handler may
re-submit and re-anchor the urb. But this introduces a race where the state
these drivers want to read has not been set yet by the completion handler
(this race is easily triggered with the uas task mgmt code).

I've considered adding an anchor_count to struct urb, which would be
incremented on anchor and decremented on unanchor, and then only actually
do the anchor / unanchor on 0 -> 1 and 1 -> 0 transtions, combined with
moving the unanchor call in hcd_giveback_urb to after calling the completion
handler. But this will only work if urb's are only re-anchored to the same
anchor as they were anchored to before the completion handler ran.

And at least one driver re-anchors to another anchor from the completion
handler (rtlwifi).

So I have come up with this patch instead, which adds the ability to
suspend wakeups of usb_wait_anchor_empty_timeout() waiters to the usb_anchor
functionality, and uses this in __usb_hcd_giveback_urb() to delay wake-ups
until the completion handler has run.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Acked-by: Oliver Neukum <oliver@neukum.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/hcd.c |  3 +++
 drivers/usb/core/urb.c | 44 ++++++++++++++++++++++++++++++++++++++++++--
 include/linux/usb.h    |  3 +++
 3 files changed, 48 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index 460bb59cb655..149cdf129293 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -1652,6 +1652,7 @@ int usb_hcd_unlink_urb (struct urb *urb, int status)
 static void __usb_hcd_giveback_urb(struct urb *urb)
 {
 	struct usb_hcd *hcd = bus_to_hcd(urb->dev->bus);
+	struct usb_anchor *anchor = urb->anchor;
 	int status = urb->unlinked;
 	unsigned long flags;
 
@@ -1663,6 +1664,7 @@ static void __usb_hcd_giveback_urb(struct urb *urb)
 
 	unmap_urb_for_dma(hcd, urb);
 	usbmon_urb_complete(&hcd->self, urb, status);
+	usb_anchor_suspend_wakeups(anchor);
 	usb_unanchor_urb(urb);
 
 	/* pass ownership to the completion handler */
@@ -1682,6 +1684,7 @@ static void __usb_hcd_giveback_urb(struct urb *urb)
 	urb->complete(urb);
 	local_irq_restore(flags);
 
+	usb_anchor_resume_wakeups(anchor);
 	atomic_dec(&urb->use_count);
 	if (unlikely(atomic_read(&urb->reject)))
 		wake_up(&usb_kill_urb_queue);
diff --git a/drivers/usb/core/urb.c b/drivers/usb/core/urb.c
index c12bc790a6a7..e62208356c89 100644
--- a/drivers/usb/core/urb.c
+++ b/drivers/usb/core/urb.c
@@ -138,13 +138,19 @@ void usb_anchor_urb(struct urb *urb, struct usb_anchor *anchor)
 }
 EXPORT_SYMBOL_GPL(usb_anchor_urb);
 
+static int usb_anchor_check_wakeup(struct usb_anchor *anchor)
+{
+	return atomic_read(&anchor->suspend_wakeups) == 0 &&
+		list_empty(&anchor->urb_list);
+}
+
 /* Callers must hold anchor->lock */
 static void __usb_unanchor_urb(struct urb *urb, struct usb_anchor *anchor)
 {
 	urb->anchor = NULL;
 	list_del(&urb->anchor_list);
 	usb_put_urb(urb);
-	if (list_empty(&anchor->urb_list))
+	if (usb_anchor_check_wakeup(anchor))
 		wake_up(&anchor->wait);
 }
 
@@ -845,6 +851,39 @@ void usb_unlink_anchored_urbs(struct usb_anchor *anchor)
 }
 EXPORT_SYMBOL_GPL(usb_unlink_anchored_urbs);
 
+/**
+ * usb_anchor_suspend_wakeups
+ * @anchor: the anchor you want to suspend wakeups on
+ *
+ * Call this to stop the last urb being unanchored from waking up any
+ * usb_wait_anchor_empty_timeout waiters. This is used in the hcd urb give-
+ * back path to delay waking up until after the completion handler has run.
+ */
+void usb_anchor_suspend_wakeups(struct usb_anchor *anchor)
+{
+	if (anchor)
+		atomic_inc(&anchor->suspend_wakeups);
+}
+EXPORT_SYMBOL_GPL(usb_anchor_suspend_wakeups);
+
+/**
+ * usb_anchor_resume_wakeups
+ * @anchor: the anchor you want to resume wakeups on
+ *
+ * Allow usb_wait_anchor_empty_timeout waiters to be woken up again, and
+ * wake up any current waiters if the anchor is empty.
+ */
+void usb_anchor_resume_wakeups(struct usb_anchor *anchor)
+{
+	if (!anchor)
+		return;
+
+	atomic_dec(&anchor->suspend_wakeups);
+	if (usb_anchor_check_wakeup(anchor))
+		wake_up(&anchor->wait);
+}
+EXPORT_SYMBOL_GPL(usb_anchor_resume_wakeups);
+
 /**
  * usb_wait_anchor_empty_timeout - wait for an anchor to be unused
  * @anchor: the anchor you want to become unused
@@ -858,7 +897,8 @@ EXPORT_SYMBOL_GPL(usb_unlink_anchored_urbs);
 int usb_wait_anchor_empty_timeout(struct usb_anchor *anchor,
 				  unsigned int timeout)
 {
-	return wait_event_timeout(anchor->wait, list_empty(&anchor->urb_list),
+	return wait_event_timeout(anchor->wait,
+				  usb_anchor_check_wakeup(anchor),
 				  msecs_to_jiffies(timeout));
 }
 EXPORT_SYMBOL_GPL(usb_wait_anchor_empty_timeout);
diff --git a/include/linux/usb.h b/include/linux/usb.h
index fa8bedf06c6e..055ba74bee80 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -1209,6 +1209,7 @@ struct usb_anchor {
 	struct list_head urb_list;
 	wait_queue_head_t wait;
 	spinlock_t lock;
+	atomic_t suspend_wakeups;
 	unsigned int poisoned:1;
 };
 
@@ -1575,6 +1576,8 @@ extern void usb_kill_anchored_urbs(struct usb_anchor *anchor);
 extern void usb_poison_anchored_urbs(struct usb_anchor *anchor);
 extern void usb_unpoison_anchored_urbs(struct usb_anchor *anchor);
 extern void usb_unlink_anchored_urbs(struct usb_anchor *anchor);
+extern void usb_anchor_suspend_wakeups(struct usb_anchor *anchor);
+extern void usb_anchor_resume_wakeups(struct usb_anchor *anchor);
 extern void usb_anchor_urb(struct urb *urb, struct usb_anchor *anchor);
 extern void usb_unanchor_urb(struct urb *urb);
 extern int usb_wait_anchor_empty_timeout(struct usb_anchor *anchor,
-- 
cgit v1.2.3


From 63fb3a280061c5a1d9190015e5a074213f9d23c0 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Fri, 11 Oct 2013 11:28:02 -0400
Subject: USB: NS_TO_US should round up

Host controller drivers use the NS_TO_US macro to convert transaction
times, which are computed in nanoseconds, to microseconds for
scheduling.  Periodic scheduling requires worst-case estimates, but
the macro does its conversion using round-to-nearest.  This patch
changes it to use round-up, giving a correct worst-case value.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/usb/hcd.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h
index fc64b6825f5e..dbe3cd19ffd8 100644
--- a/include/linux/usb/hcd.h
+++ b/include/linux/usb/hcd.h
@@ -564,9 +564,8 @@ extern void usb_ep0_reinit(struct usb_device *);
 		 * of (7/6 * 8 * bytecount) = 9.33 * bytecount */
 		/* bytecount = data payload byte count */
 
-#define NS_TO_US(ns)	((ns + 500L) / 1000L)
-			/* convert & round nanoseconds to microseconds */
-
+#define NS_TO_US(ns)	DIV_ROUND_UP(ns, 1000L)
+			/* convert nanoseconds to microseconds, rounding up */
 
 /*
  * Full/low speed bandwidth allocation constants/support.
-- 
cgit v1.2.3


From 7c4bb942986fc2aa7ca4fccfed665d24525a0e21 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Fri, 11 Oct 2013 11:29:22 -0400
Subject: USB: add a private-data pointer to struct usb_tt

For improved scheduling of transfers through a Transaction Translator,
ehci-hcd will need to store a bunch of information associated with the
FS/LS bus on the downstream side of the TT.  This patch adds a pointer
for such HCD-private data to the usb_tt structure.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/usb/hcd.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h
index dbe3cd19ffd8..b8aba196f7f1 100644
--- a/include/linux/usb/hcd.h
+++ b/include/linux/usb/hcd.h
@@ -506,6 +506,7 @@ struct usb_tt {
 	struct usb_device	*hub;	/* upstream highspeed hub */
 	int			multi;	/* true means one TT per port */
 	unsigned		think_time;	/* think time in ns */
+	void			*hcpriv;	/* HCD private data */
 
 	/* for control/bulk error recovery (CLEAR_TT_BUFFER) */
 	spinlock_t		lock;
-- 
cgit v1.2.3


From 57ae1605c0f511bca212787d28d79a3f82c0a7f9 Mon Sep 17 00:00:00 2001
From: Jingoo Han <jg1.han@samsung.com>
Date: Thu, 10 Oct 2013 16:41:19 +0900
Subject: USB: ehci-s5p: Remove non-DT support

The non-DT for EXYNOS SoCs is not supported from v3.11.
Thus, there is no need to support non-DT for Exynos EHCI driver.

The 'include/linux/platform_data/usb-ehci-s5p.h' file has been
used for non-DT support. Thus, the 'usb-ehci-s5p.h' file can
be removed.

Signed-off-by: Jingoo Han <jg1.han@samsung.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/ehci-s5p.c                | 32 ++++--------------------------
 include/linux/platform_data/usb-ehci-s5p.h | 21 --------------------
 2 files changed, 4 insertions(+), 49 deletions(-)
 delete mode 100644 include/linux/platform_data/usb-ehci-s5p.h

(limited to 'include/linux')

diff --git a/drivers/usb/host/ehci-s5p.c b/drivers/usb/host/ehci-s5p.c
index 7c3de95c7054..45e1ad3f56b2 100644
--- a/drivers/usb/host/ehci-s5p.c
+++ b/drivers/usb/host/ehci-s5p.c
@@ -20,7 +20,6 @@
 #include <linux/of.h>
 #include <linux/of_gpio.h>
 #include <linux/platform_device.h>
-#include <linux/platform_data/usb-ehci-s5p.h>
 #include <linux/usb/phy.h>
 #include <linux/usb/samsung_usb_phy.h>
 #include <linux/usb.h>
@@ -47,11 +46,8 @@ struct s5p_ehci_hcd {
 	struct clk *clk;
 	struct usb_phy *phy;
 	struct usb_otg *otg;
-	struct s5p_ehci_platdata *pdata;
 };
 
-static struct s5p_ehci_platdata empty_platdata;
-
 #define to_s5p_ehci(hcd)      (struct s5p_ehci_hcd *)(hcd_to_ehci(hcd)->priv)
 
 static void s5p_setup_vbus_gpio(struct platform_device *pdev)
@@ -75,7 +71,6 @@ static void s5p_setup_vbus_gpio(struct platform_device *pdev)
 
 static int s5p_ehci_probe(struct platform_device *pdev)
 {
-	struct s5p_ehci_platdata *pdata = dev_get_platdata(&pdev->dev);
 	struct s5p_ehci_hcd *s5p_ehci;
 	struct usb_hcd *hcd;
 	struct ehci_hcd *ehci;
@@ -105,21 +100,14 @@ static int s5p_ehci_probe(struct platform_device *pdev)
 	s5p_ehci = to_s5p_ehci(hcd);
 
 	if (of_device_is_compatible(pdev->dev.of_node,
-					"samsung,exynos5440-ehci")) {
-		s5p_ehci->pdata = &empty_platdata;
+					"samsung,exynos5440-ehci"))
 		goto skip_phy;
-	}
 
 	phy = devm_usb_get_phy(&pdev->dev, USB_PHY_TYPE_USB2);
 	if (IS_ERR(phy)) {
-		/* Fallback to pdata */
-		if (!pdata) {
-			usb_put_hcd(hcd);
-			dev_warn(&pdev->dev, "no platform data or transceiver defined\n");
-			return -EPROBE_DEFER;
-		} else {
-			s5p_ehci->pdata = pdata;
-		}
+		usb_put_hcd(hcd);
+		dev_warn(&pdev->dev, "no platform data or transceiver defined\n");
+		return -EPROBE_DEFER;
 	} else {
 		s5p_ehci->phy = phy;
 		s5p_ehci->otg = phy->otg;
@@ -167,8 +155,6 @@ skip_phy:
 
 	if (s5p_ehci->phy)
 		usb_phy_init(s5p_ehci->phy);
-	else if (s5p_ehci->pdata->phy_init)
-		s5p_ehci->pdata->phy_init(pdev, USB_PHY_TYPE_HOST);
 
 	ehci = hcd_to_ehci(hcd);
 	ehci->caps = hcd->regs;
@@ -189,8 +175,6 @@ skip_phy:
 fail_add_hcd:
 	if (s5p_ehci->phy)
 		usb_phy_shutdown(s5p_ehci->phy);
-	else if (s5p_ehci->pdata->phy_exit)
-		s5p_ehci->pdata->phy_exit(pdev, USB_PHY_TYPE_HOST);
 fail_io:
 	clk_disable_unprepare(s5p_ehci->clk);
 fail_clk:
@@ -210,8 +194,6 @@ static int s5p_ehci_remove(struct platform_device *pdev)
 
 	if (s5p_ehci->phy)
 		usb_phy_shutdown(s5p_ehci->phy);
-	else if (s5p_ehci->pdata->phy_exit)
-		s5p_ehci->pdata->phy_exit(pdev, USB_PHY_TYPE_HOST);
 
 	clk_disable_unprepare(s5p_ehci->clk);
 
@@ -225,7 +207,6 @@ static int s5p_ehci_suspend(struct device *dev)
 {
 	struct usb_hcd *hcd = dev_get_drvdata(dev);
 	struct s5p_ehci_hcd *s5p_ehci = to_s5p_ehci(hcd);
-	struct platform_device *pdev = to_platform_device(dev);
 
 	bool do_wakeup = device_may_wakeup(dev);
 	int rc;
@@ -237,8 +218,6 @@ static int s5p_ehci_suspend(struct device *dev)
 
 	if (s5p_ehci->phy)
 		usb_phy_shutdown(s5p_ehci->phy);
-	else if (s5p_ehci->pdata->phy_exit)
-		s5p_ehci->pdata->phy_exit(pdev, USB_PHY_TYPE_HOST);
 
 	clk_disable_unprepare(s5p_ehci->clk);
 
@@ -249,7 +228,6 @@ static int s5p_ehci_resume(struct device *dev)
 {
 	struct usb_hcd *hcd = dev_get_drvdata(dev);
 	struct  s5p_ehci_hcd *s5p_ehci = to_s5p_ehci(hcd);
-	struct platform_device *pdev = to_platform_device(dev);
 
 	clk_prepare_enable(s5p_ehci->clk);
 
@@ -258,8 +236,6 @@ static int s5p_ehci_resume(struct device *dev)
 
 	if (s5p_ehci->phy)
 		usb_phy_init(s5p_ehci->phy);
-	else if (s5p_ehci->pdata->phy_init)
-		s5p_ehci->pdata->phy_init(pdev, USB_PHY_TYPE_HOST);
 
 	/* DMA burst Enable */
 	writel(EHCI_INSNREG00_ENABLE_DMA_BURST, EHCI_INSNREG00(hcd->regs));
diff --git a/include/linux/platform_data/usb-ehci-s5p.h b/include/linux/platform_data/usb-ehci-s5p.h
deleted file mode 100644
index 5f28cae18582..000000000000
--- a/include/linux/platform_data/usb-ehci-s5p.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * Copyright (C) 2011 Samsung Electronics Co.Ltd
- * Author: Joonyoung Shim <jy0922.shim@samsung.com>
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- */
-
-#ifndef __PLAT_SAMSUNG_EHCI_H
-#define __PLAT_SAMSUNG_EHCI_H __FILE__
-
-struct s5p_ehci_platdata {
-	int (*phy_init)(struct platform_device *pdev, int type);
-	int (*phy_exit)(struct platform_device *pdev, int type);
-};
-
-extern void s5p_ehci_set_platdata(struct s5p_ehci_platdata *pd);
-
-#endif /* __PLAT_SAMSUNG_EHCI_H */
-- 
cgit v1.2.3


From 706cd17e8559c96dc883ba692c931f1ef31fbc5c Mon Sep 17 00:00:00 2001
From: Johan Hovold <jhovold@gmail.com>
Date: Wed, 9 Oct 2013 17:01:12 +0200
Subject: USB: serial: export usb_serial_generic_write_start

Export usb_serial_generic_write_start which is needed when implementing
a custom resume function while still relying on the generic write
implementation.

Signed-off-by: Johan Hovold <jhovold@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/generic.c | 3 ++-
 include/linux/usb/serial.h   | 2 ++
 2 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/usb/serial/generic.c b/drivers/usb/serial/generic.c
index 31f78290c0e4..2b01ec8651c2 100644
--- a/drivers/usb/serial/generic.c
+++ b/drivers/usb/serial/generic.c
@@ -129,7 +129,7 @@ int usb_serial_generic_prepare_write_buffer(struct usb_serial_port *port,
  *
  * Return: Zero on success or if busy, otherwise a negative errno value.
  */
-static int usb_serial_generic_write_start(struct usb_serial_port *port,
+int usb_serial_generic_write_start(struct usb_serial_port *port,
 							gfp_t mem_flags)
 {
 	struct urb *urb;
@@ -184,6 +184,7 @@ retry:
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(usb_serial_generic_write_start);
 
 /**
  * usb_serial_generic_write - generic write function
diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h
index d528b8045150..704a1ab8240c 100644
--- a/include/linux/usb/serial.h
+++ b/include/linux/usb/serial.h
@@ -320,6 +320,8 @@ extern struct usb_serial_port *usb_serial_port_get_by_minor(unsigned int minor);
 extern void usb_serial_put(struct usb_serial *serial);
 extern int usb_serial_generic_open(struct tty_struct *tty,
 	struct usb_serial_port *port);
+extern int usb_serial_generic_write_start(struct usb_serial_port *port,
+							gfp_t mem_flags);
 extern int usb_serial_generic_write(struct tty_struct *tty,
 	struct usb_serial_port *port, const unsigned char *buf, int count);
 extern void usb_serial_generic_close(struct usb_serial_port *port);
-- 
cgit v1.2.3


From 9e69c935fad9fd5f0550c51e3bd251cd30033136 Mon Sep 17 00:00:00 2001
From: Lars-Peter Clausen <lars@metafoo.de>
Date: Fri, 4 Oct 2013 12:06:00 +0100
Subject: iio: Add reference counting for buffers

Since the buffer is accessed by userspace we can not just free the buffers
memory once we are done with it in kernel space. There might still be open file
descriptors and userspace still might be accessing the buffer. This patch adds
support for reference counting to the IIO buffers. When a buffer is created and
initialized its initial reference count is set to 1. Instead of freeing the
memory of the buffer the buffer's _free() function will drop that reference
again. But only after the last reference to the buffer has been dropped the
buffer the buffer's memory will be freed. The IIO device will take a reference
to its primary buffer. The patch adds a small helper function for this called
iio_device_attach_buffer() which will get a reference to the buffer and assign
the buffer to the IIO device. This function must be used instead of assigning
the buffer to the device by hand. The reference is only dropped once the IIO
device is freed and we can be sure that there are no more open file handles. A
reference to a buffer will also be taken whenever the buffer is active to avoid
the buffer being freed while data is still being send to it.

Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/buffer_cb.c                         | 21 +++++---
 drivers/iio/industrialio-buffer.c               | 66 +++++++++++++++++++++++--
 drivers/iio/industrialio-core.c                 |  3 ++
 drivers/iio/industrialio-triggered-buffer.c     |  7 ++-
 drivers/iio/kfifo_buf.c                         |  8 ++-
 drivers/staging/iio/accel/lis3l02dq_ring.c      |  2 +-
 drivers/staging/iio/accel/sca3000_ring.c        | 13 +++--
 drivers/staging/iio/iio_simple_dummy_buffer.c   |  2 +-
 drivers/staging/iio/impedance-analyzer/ad5933.c |  8 ++-
 drivers/staging/iio/meter/ade7758_ring.c        |  7 ++-
 include/linux/iio/buffer.h                      | 28 +++++++++++
 11 files changed, 141 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iio/buffer_cb.c b/drivers/iio/buffer_cb.c
index 841fec1e78b2..2d9c6f8c06db 100644
--- a/drivers/iio/buffer_cb.c
+++ b/drivers/iio/buffer_cb.c
@@ -12,17 +12,27 @@ struct iio_cb_buffer {
 	struct iio_channel *channels;
 };
 
-static int iio_buffer_cb_store_to(struct iio_buffer *buffer, const void *data)
+static struct iio_cb_buffer *buffer_to_cb_buffer(struct iio_buffer *buffer)
 {
-	struct iio_cb_buffer *cb_buff = container_of(buffer,
-						     struct iio_cb_buffer,
-						     buffer);
+	return container_of(buffer, struct iio_cb_buffer, buffer);
+}
 
+static int iio_buffer_cb_store_to(struct iio_buffer *buffer, const void *data)
+{
+	struct iio_cb_buffer *cb_buff = buffer_to_cb_buffer(buffer);
 	return cb_buff->cb(data, cb_buff->private);
 }
 
+static void iio_buffer_cb_release(struct iio_buffer *buffer)
+{
+	struct iio_cb_buffer *cb_buff = buffer_to_cb_buffer(buffer);
+	kfree(cb_buff->buffer.scan_mask);
+	kfree(cb_buff);
+}
+
 static const struct iio_buffer_access_funcs iio_cb_access = {
 	.store_to = &iio_buffer_cb_store_to,
+	.release = &iio_buffer_cb_release,
 };
 
 struct iio_cb_buffer *iio_channel_get_all_cb(struct device *dev,
@@ -104,9 +114,8 @@ EXPORT_SYMBOL_GPL(iio_channel_stop_all_cb);
 
 void iio_channel_release_all_cb(struct iio_cb_buffer *cb_buff)
 {
-	kfree(cb_buff->buffer.scan_mask);
 	iio_channel_release_all(cb_buff->channels);
-	kfree(cb_buff);
+	iio_buffer_put(&cb_buff->buffer);
 }
 EXPORT_SYMBOL_GPL(iio_channel_release_all_cb);
 
diff --git a/drivers/iio/industrialio-buffer.c b/drivers/iio/industrialio-buffer.c
index e9f389b9da69..36c39dcad850 100644
--- a/drivers/iio/industrialio-buffer.c
+++ b/drivers/iio/industrialio-buffer.c
@@ -74,6 +74,7 @@ void iio_buffer_init(struct iio_buffer *buffer)
 	INIT_LIST_HEAD(&buffer->demux_list);
 	INIT_LIST_HEAD(&buffer->buffer_list);
 	init_waitqueue_head(&buffer->pollq);
+	kref_init(&buffer->ref);
 }
 EXPORT_SYMBOL(iio_buffer_init);
 
@@ -454,6 +455,19 @@ static int iio_compute_scan_bytes(struct iio_dev *indio_dev,
 	return bytes;
 }
 
+static void iio_buffer_activate(struct iio_dev *indio_dev,
+	struct iio_buffer *buffer)
+{
+	iio_buffer_get(buffer);
+	list_add(&buffer->buffer_list, &indio_dev->buffer_list);
+}
+
+static void iio_buffer_deactivate(struct iio_buffer *buffer)
+{
+	list_del_init(&buffer->buffer_list);
+	iio_buffer_put(buffer);
+}
+
 void iio_disable_all_buffers(struct iio_dev *indio_dev)
 {
 	struct iio_buffer *buffer, *_buffer;
@@ -466,7 +480,7 @@ void iio_disable_all_buffers(struct iio_dev *indio_dev)
 
 	list_for_each_entry_safe(buffer, _buffer,
 			&indio_dev->buffer_list, buffer_list)
-		list_del_init(&buffer->buffer_list);
+		iio_buffer_deactivate(buffer);
 
 	indio_dev->currentmode = INDIO_DIRECT_MODE;
 	if (indio_dev->setup_ops->postdisable)
@@ -503,9 +517,9 @@ int iio_update_buffers(struct iio_dev *indio_dev,
 		indio_dev->active_scan_mask = NULL;
 
 	if (remove_buffer)
-		list_del_init(&remove_buffer->buffer_list);
+		iio_buffer_deactivate(remove_buffer);
 	if (insert_buffer)
-		list_add(&insert_buffer->buffer_list, &indio_dev->buffer_list);
+		iio_buffer_activate(indio_dev, insert_buffer);
 
 	/* If no buffers in list, we are done */
 	if (list_empty(&indio_dev->buffer_list)) {
@@ -540,7 +554,7 @@ int iio_update_buffers(struct iio_dev *indio_dev,
 			 * Roll back.
 			 * Note can only occur when adding a buffer.
 			 */
-			list_del_init(&insert_buffer->buffer_list);
+			iio_buffer_deactivate(insert_buffer);
 			if (old_mask) {
 				indio_dev->active_scan_mask = old_mask;
 				success = -EINVAL;
@@ -631,7 +645,7 @@ error_run_postdisable:
 error_remove_inserted:
 
 	if (insert_buffer)
-		list_del_init(&insert_buffer->buffer_list);
+		iio_buffer_deactivate(insert_buffer);
 	indio_dev->active_scan_mask = old_mask;
 	kfree(compound_mask);
 error_ret:
@@ -952,3 +966,45 @@ error_clear_mux_table:
 	return ret;
 }
 EXPORT_SYMBOL_GPL(iio_update_demux);
+
+/**
+ * iio_buffer_release() - Free a buffer's resources
+ * @ref: Pointer to the kref embedded in the iio_buffer struct
+ *
+ * This function is called when the last reference to the buffer has been
+ * dropped. It will typically free all resources allocated by the buffer. Do not
+ * call this function manually, always use iio_buffer_put() when done using a
+ * buffer.
+ */
+static void iio_buffer_release(struct kref *ref)
+{
+	struct iio_buffer *buffer = container_of(ref, struct iio_buffer, ref);
+
+	buffer->access->release(buffer);
+}
+
+/**
+ * iio_buffer_get() - Grab a reference to the buffer
+ * @buffer: The buffer to grab a reference for, may be NULL
+ *
+ * Returns the pointer to the buffer that was passed into the function.
+ */
+struct iio_buffer *iio_buffer_get(struct iio_buffer *buffer)
+{
+	if (buffer)
+		kref_get(&buffer->ref);
+
+	return buffer;
+}
+EXPORT_SYMBOL_GPL(iio_buffer_get);
+
+/**
+ * iio_buffer_put() - Release the reference to the buffer
+ * @buffer: The buffer to release the reference for, may be NULL
+ */
+void iio_buffer_put(struct iio_buffer *buffer)
+{
+	if (buffer)
+		kref_put(&buffer->ref, iio_buffer_release);
+}
+EXPORT_SYMBOL_GPL(iio_buffer_put);
diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c
index ecf58963ac24..f939bad31ca1 100644
--- a/drivers/iio/industrialio-core.c
+++ b/drivers/iio/industrialio-core.c
@@ -28,6 +28,7 @@
 #include "iio_core_trigger.h"
 #include <linux/iio/sysfs.h>
 #include <linux/iio/events.h>
+#include <linux/iio/buffer.h>
 
 /* IDA to assign each registered device a unique id */
 static DEFINE_IDA(iio_ida);
@@ -895,6 +896,8 @@ static void iio_dev_release(struct device *device)
 	iio_device_unregister_eventset(indio_dev);
 	iio_device_unregister_sysfs(indio_dev);
 
+	iio_buffer_put(indio_dev->buffer);
+
 	ida_simple_remove(&iio_ida, indio_dev->id);
 	kfree(indio_dev);
 }
diff --git a/drivers/iio/industrialio-triggered-buffer.c b/drivers/iio/industrialio-triggered-buffer.c
index 46c619b0d8c5..c1cb1f94fe2e 100644
--- a/drivers/iio/industrialio-triggered-buffer.c
+++ b/drivers/iio/industrialio-triggered-buffer.c
@@ -47,14 +47,17 @@ int iio_triggered_buffer_setup(struct iio_dev *indio_dev,
 	irqreturn_t (*pollfunc_th)(int irq, void *p),
 	const struct iio_buffer_setup_ops *setup_ops)
 {
+	struct iio_buffer *buffer;
 	int ret;
 
-	indio_dev->buffer = iio_kfifo_allocate(indio_dev);
-	if (!indio_dev->buffer) {
+	buffer = iio_kfifo_allocate(indio_dev);
+	if (!buffer) {
 		ret = -ENOMEM;
 		goto error_ret;
 	}
 
+	iio_device_attach_buffer(indio_dev, buffer);
+
 	indio_dev->pollfunc = iio_alloc_pollfunc(pollfunc_bh,
 						 pollfunc_th,
 						 IRQF_ONESHOT,
diff --git a/drivers/iio/kfifo_buf.c b/drivers/iio/kfifo_buf.c
index 538d4616e48f..b4ac55a29fc4 100644
--- a/drivers/iio/kfifo_buf.c
+++ b/drivers/iio/kfifo_buf.c
@@ -130,6 +130,11 @@ static int iio_read_first_n_kfifo(struct iio_buffer *r,
 	return copied;
 }
 
+static void iio_kfifo_buffer_release(struct iio_buffer *buffer)
+{
+	kfree(iio_to_kfifo(buffer));
+}
+
 static const struct iio_buffer_access_funcs kfifo_access_funcs = {
 	.store_to = &iio_store_to_kfifo,
 	.read_first_n = &iio_read_first_n_kfifo,
@@ -138,6 +143,7 @@ static const struct iio_buffer_access_funcs kfifo_access_funcs = {
 	.set_bytes_per_datum = &iio_set_bytes_per_datum_kfifo,
 	.get_length = &iio_get_length_kfifo,
 	.set_length = &iio_set_length_kfifo,
+	.release = &iio_kfifo_buffer_release,
 };
 
 struct iio_buffer *iio_kfifo_allocate(struct iio_dev *indio_dev)
@@ -158,7 +164,7 @@ EXPORT_SYMBOL(iio_kfifo_allocate);
 
 void iio_kfifo_free(struct iio_buffer *r)
 {
-	kfree(iio_to_kfifo(r));
+	iio_buffer_put(r);
 }
 EXPORT_SYMBOL(iio_kfifo_free);
 
diff --git a/drivers/staging/iio/accel/lis3l02dq_ring.c b/drivers/staging/iio/accel/lis3l02dq_ring.c
index ed7de471e3f3..2d559ba353d2 100644
--- a/drivers/staging/iio/accel/lis3l02dq_ring.c
+++ b/drivers/staging/iio/accel/lis3l02dq_ring.c
@@ -396,7 +396,7 @@ int lis3l02dq_configure_buffer(struct iio_dev *indio_dev)
 	if (!buffer)
 		return -ENOMEM;
 
-	indio_dev->buffer = buffer;
+	iio_device_attach_buffer(indio_dev, buffer);
 
 	buffer->scan_timestamp = true;
 	indio_dev->setup_ops = &lis3l02dq_buffer_setup_ops;
diff --git a/drivers/staging/iio/accel/sca3000_ring.c b/drivers/staging/iio/accel/sca3000_ring.c
index 0f2ee3373d9a..4a27beb1451a 100644
--- a/drivers/staging/iio/accel/sca3000_ring.c
+++ b/drivers/staging/iio/accel/sca3000_ring.c
@@ -265,7 +265,7 @@ static struct iio_buffer *sca3000_rb_allocate(struct iio_dev *indio_dev)
 	return buf;
 }
 
-static inline void sca3000_rb_free(struct iio_buffer *r)
+static void sca3000_ring_release(struct iio_buffer *r)
 {
 	kfree(iio_to_hw_buf(r));
 }
@@ -274,23 +274,28 @@ static const struct iio_buffer_access_funcs sca3000_ring_access_funcs = {
 	.read_first_n = &sca3000_read_first_n_hw_rb,
 	.get_length = &sca3000_ring_get_length,
 	.get_bytes_per_datum = &sca3000_ring_get_bytes_per_datum,
+	.release = sca3000_ring_release,
 };
 
 int sca3000_configure_ring(struct iio_dev *indio_dev)
 {
-	indio_dev->buffer = sca3000_rb_allocate(indio_dev);
-	if (indio_dev->buffer == NULL)
+	struct iio_buffer *buffer;
+
+	buffer = sca3000_rb_allocate(indio_dev);
+	if (buffer == NULL)
 		return -ENOMEM;
 	indio_dev->modes |= INDIO_BUFFER_HARDWARE;
 
 	indio_dev->buffer->access = &sca3000_ring_access_funcs;
 
+	iio_device_attach_buffer(indio_dev, buffer);
+
 	return 0;
 }
 
 void sca3000_unconfigure_ring(struct iio_dev *indio_dev)
 {
-	sca3000_rb_free(indio_dev->buffer);
+	iio_buffer_put(indio_dev->buffer);
 }
 
 static inline
diff --git a/drivers/staging/iio/iio_simple_dummy_buffer.c b/drivers/staging/iio/iio_simple_dummy_buffer.c
index 09c93ac7351a..2612e87fbcb4 100644
--- a/drivers/staging/iio/iio_simple_dummy_buffer.c
+++ b/drivers/staging/iio/iio_simple_dummy_buffer.c
@@ -135,7 +135,7 @@ int iio_simple_dummy_configure_buffer(struct iio_dev *indio_dev,
 		goto error_ret;
 	}
 
-	indio_dev->buffer = buffer;
+	iio_device_attach_buffer(indio_dev, buffer);
 
 	/* Enable timestamps by default */
 	buffer->scan_timestamp = true;
diff --git a/drivers/staging/iio/impedance-analyzer/ad5933.c b/drivers/staging/iio/impedance-analyzer/ad5933.c
index 712f3c22ce64..f5701159bf36 100644
--- a/drivers/staging/iio/impedance-analyzer/ad5933.c
+++ b/drivers/staging/iio/impedance-analyzer/ad5933.c
@@ -630,10 +630,14 @@ static const struct iio_buffer_setup_ops ad5933_ring_setup_ops = {
 
 static int ad5933_register_ring_funcs_and_init(struct iio_dev *indio_dev)
 {
-	indio_dev->buffer = iio_kfifo_allocate(indio_dev);
-	if (!indio_dev->buffer)
+	struct iio_buffer *buffer;
+
+	buffer = iio_kfifo_allocate(indio_dev);
+	if (buffer)
 		return -ENOMEM;
 
+	iio_device_attach_buffer(indio_dev, buffer);
+
 	/* Ring buffer functions - here trigger setup related */
 	indio_dev->setup_ops = &ad5933_ring_setup_ops;
 
diff --git a/drivers/staging/iio/meter/ade7758_ring.c b/drivers/staging/iio/meter/ade7758_ring.c
index 4080995c99b6..7a448ffaeb14 100644
--- a/drivers/staging/iio/meter/ade7758_ring.c
+++ b/drivers/staging/iio/meter/ade7758_ring.c
@@ -121,14 +121,17 @@ void ade7758_unconfigure_ring(struct iio_dev *indio_dev)
 int ade7758_configure_ring(struct iio_dev *indio_dev)
 {
 	struct ade7758_state *st = iio_priv(indio_dev);
+	struct iio_buffer *buffer;
 	int ret = 0;
 
-	indio_dev->buffer = iio_kfifo_allocate(indio_dev);
-	if (!indio_dev->buffer) {
+	buffer = iio_kfifo_allocate(indio_dev);
+	if (!buffer) {
 		ret = -ENOMEM;
 		return ret;
 	}
 
+	iio_device_attach_buffer(indio_dev, buffer);
+
 	indio_dev->setup_ops = &ade7758_ring_setup_ops;
 
 	indio_dev->pollfunc = iio_alloc_pollfunc(&iio_pollfunc_store_time,
diff --git a/include/linux/iio/buffer.h b/include/linux/iio/buffer.h
index a1124bdc4cac..6e428d96d570 100644
--- a/include/linux/iio/buffer.h
+++ b/include/linux/iio/buffer.h
@@ -11,6 +11,7 @@
 #define _IIO_BUFFER_GENERIC_H_
 #include <linux/sysfs.h>
 #include <linux/iio/iio.h>
+#include <linux/kref.h>
 
 #ifdef CONFIG_IIO_BUFFER
 
@@ -26,6 +27,8 @@ struct iio_buffer;
  * @set_bytes_per_datum:set number of bytes per datum
  * @get_length:		get number of datums in buffer
  * @set_length:		set number of datums in buffer
+ * @release:		called when the last reference to the buffer is dropped,
+ *			should free all resources allocated by the buffer.
  *
  * The purpose of this structure is to make the buffer element
  * modular as event for a given driver, different usecases may require
@@ -47,6 +50,8 @@ struct iio_buffer_access_funcs {
 	int (*set_bytes_per_datum)(struct iio_buffer *buffer, size_t bpd);
 	int (*get_length)(struct iio_buffer *buffer);
 	int (*set_length)(struct iio_buffer *buffer, int length);
+
+	void (*release)(struct iio_buffer *buffer);
 };
 
 /**
@@ -67,6 +72,7 @@ struct iio_buffer_access_funcs {
  * @demux_list:		[INTERN] list of operations required to demux the scan.
  * @demux_bounce:	[INTERN] buffer for doing gather from incoming scan.
  * @buffer_list:	[INTERN] entry in the devices list of current buffers.
+ * @ref:		[INTERN] reference count of the buffer.
  */
 struct iio_buffer {
 	int					length;
@@ -83,6 +89,7 @@ struct iio_buffer {
 	struct list_head			demux_list;
 	void					*demux_bounce;
 	struct list_head			buffer_list;
+	struct kref				ref;
 };
 
 /**
@@ -204,6 +211,24 @@ int iio_sw_buffer_preenable(struct iio_dev *indio_dev);
 bool iio_validate_scan_mask_onehot(struct iio_dev *indio_dev,
 	const unsigned long *mask);
 
+struct iio_buffer *iio_buffer_get(struct iio_buffer *buffer);
+void iio_buffer_put(struct iio_buffer *buffer);
+
+/**
+ * iio_device_attach_buffer - Attach a buffer to a IIO device
+ * @indio_dev: The device the buffer should be attached to
+ * @buffer: The buffer to attach to the device
+ *
+ * This function attaches a buffer to a IIO device. The buffer stays attached to
+ * the device until the device is freed. The function should only be called at
+ * most once per device.
+ */
+static inline void iio_device_attach_buffer(struct iio_dev *indio_dev,
+	struct iio_buffer *buffer)
+{
+	indio_dev->buffer = iio_buffer_get(buffer);
+}
+
 #else /* CONFIG_IIO_BUFFER */
 
 static inline int iio_buffer_register(struct iio_dev *indio_dev,
@@ -216,6 +241,9 @@ static inline int iio_buffer_register(struct iio_dev *indio_dev,
 static inline void iio_buffer_unregister(struct iio_dev *indio_dev)
 {}
 
+static inline void iio_buffer_get(struct iio_buffer *buffer) {}
+static inline void iio_buffer_put(struct iio_buffer *buffer) {}
+
 #endif /* CONFIG_IIO_BUFFER */
 
 #endif /* _IIO_BUFFER_GENERIC_H_ */
-- 
cgit v1.2.3


From b4e3ac0a204ff1775c69924510f49922a56910a7 Mon Sep 17 00:00:00 2001
From: Lars-Peter Clausen <lars@metafoo.de>
Date: Mon, 7 Oct 2013 15:11:00 +0100
Subject: iio: Extend the event config interface

The event configuration interface of the IIO framework has not been getting the
same attention as other parts. As a result it has not seen the same improvements
as e.g. the channel interface has seen with the introduction of the channel spec
struct. Currently all the event config callbacks take a u64 (the so called event
code) to pass all the different information about for which event the callback
is invoked. The callback function then has to extract the information it is
interested in using some macros with rather long names. Most information encoded
in the event code comes straight from the iio_chan_spec struct the event was
registered for. Since we always have a handle to the channel spec when we call
the event callbacks the first step is to add the channel spec as a parameter to
the event callbacks. The two remaining things encoded in the event code are the
type and direction of the event. Instead of passing them in one parameter, add
one parameter for each of them and remove the eventcode from the event
callbacks. The patch also adds a new iio_event_info parameter to the
{read,write}_event_value callbacks. This makes it possible, similar to the
iio_chan_info_enum for channels, to specify additional properties other than
just the value for an event. Furthermore the new interface will allow to
register shared events. This is e.g. useful if a device allows configuring a
threshold event, but the threshold setting is the same for all channels.

To implement this the patch adds a new iio_event_spec struct which is similar to
the iio_chan_spec struct. It as two field to specify the type and the direction
of the event. Furthermore it has a mask field for each one of the different
iio_shared_by types. These mask fields holds which kind of attributes should be
registered for the event. Creation of the attributes follows the same rules as
the for the channel attributes. E.g. for the separate_mask there will be a
attribute for each channel with this event, for the shared_by_type there will
only be one attribute per channel type. The iio_chan_spec struct gets two new
fields, 'event_spec' and 'num_event_specs', which is used to specify which the
events for this channel. These two fields are going to replace the channel's
event_mask field.

For now both the old and the new event config interface coexist, but over the
few patches all drivers will be converted from the old to the new interface.
Once that is done all code for supporting the old interface will be removed.

Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/industrialio-event.c | 193 ++++++++++++++++++++++++++++++++++-----
 include/linux/iio/events.h       |  14 ---
 include/linux/iio/iio.h          |  58 ++++++++++++
 include/linux/iio/types.h        |  19 ++++
 4 files changed, 248 insertions(+), 36 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iio/industrialio-event.c b/drivers/iio/industrialio-event.c
index 4a3fd5acda94..b7a5d7cbed42 100644
--- a/drivers/iio/industrialio-event.c
+++ b/drivers/iio/industrialio-event.c
@@ -201,6 +201,26 @@ static const char * const iio_ev_dir_text[] = {
 	[IIO_EV_DIR_FALLING] = "falling"
 };
 
+static const char * const iio_ev_info_text[] = {
+	[IIO_EV_INFO_ENABLE] = "en",
+	[IIO_EV_INFO_VALUE] = "value",
+};
+
+static enum iio_event_direction iio_ev_attr_dir(struct iio_dev_attr *attr)
+{
+	return attr->c->event_spec[attr->address & 0xffff].dir;
+}
+
+static enum iio_event_type iio_ev_attr_type(struct iio_dev_attr *attr)
+{
+	return attr->c->event_spec[attr->address & 0xffff].type;
+}
+
+static enum iio_event_info iio_ev_attr_info(struct iio_dev_attr *attr)
+{
+	return (attr->address >> 16) & 0xffff;
+}
+
 static ssize_t iio_ev_state_store(struct device *dev,
 				  struct device_attribute *attr,
 				  const char *buf,
@@ -215,9 +235,14 @@ static ssize_t iio_ev_state_store(struct device *dev,
 	if (ret < 0)
 		return ret;
 
-	ret = indio_dev->info->write_event_config(indio_dev,
-						  this_attr->address,
-						  val);
+	if (indio_dev->info->write_event_config)
+		ret = indio_dev->info->write_event_config(indio_dev,
+			this_attr->address, val);
+	else
+		ret = indio_dev->info->write_event_config_new(indio_dev,
+			this_attr->c, iio_ev_attr_type(this_attr),
+			iio_ev_attr_dir(this_attr), val);
+
 	return (ret < 0) ? ret : len;
 }
 
@@ -227,9 +252,15 @@ static ssize_t iio_ev_state_show(struct device *dev,
 {
 	struct iio_dev *indio_dev = dev_to_iio_dev(dev);
 	struct iio_dev_attr *this_attr = to_iio_dev_attr(attr);
-	int val = indio_dev->info->read_event_config(indio_dev,
-						     this_attr->address);
+	int val;
 
+	if (indio_dev->info->read_event_config)
+		val = indio_dev->info->read_event_config(indio_dev,
+			this_attr->address);
+	else
+		val = indio_dev->info->read_event_config_new(indio_dev,
+			this_attr->c, iio_ev_attr_type(this_attr),
+			iio_ev_attr_dir(this_attr));
 	if (val < 0)
 		return val;
 	else
@@ -242,14 +273,24 @@ static ssize_t iio_ev_value_show(struct device *dev,
 {
 	struct iio_dev *indio_dev = dev_to_iio_dev(dev);
 	struct iio_dev_attr *this_attr = to_iio_dev_attr(attr);
-	int val, ret;
-
-	ret = indio_dev->info->read_event_value(indio_dev,
-						this_attr->address, &val);
-	if (ret < 0)
-		return ret;
+	int val, val2;
+	int ret;
 
-	return sprintf(buf, "%d\n", val);
+	if (indio_dev->info->read_event_value) {
+		ret = indio_dev->info->read_event_value(indio_dev,
+			this_attr->address, &val);
+		if (ret < 0)
+			return ret;
+		return sprintf(buf, "%d\n", val);
+	} else {
+		ret = indio_dev->info->read_event_value_new(indio_dev,
+			this_attr->c, iio_ev_attr_type(this_attr),
+			iio_ev_attr_dir(this_attr), iio_ev_attr_info(this_attr),
+			&val, &val2);
+		if (ret < 0)
+			return ret;
+		return iio_format_value(buf, ret, val, val2);
+	}
 }
 
 static ssize_t iio_ev_value_store(struct device *dev,
@@ -259,25 +300,120 @@ static ssize_t iio_ev_value_store(struct device *dev,
 {
 	struct iio_dev *indio_dev = dev_to_iio_dev(dev);
 	struct iio_dev_attr *this_attr = to_iio_dev_attr(attr);
-	int val;
+	int val, val2;
 	int ret;
 
-	if (!indio_dev->info->write_event_value)
+	if (!indio_dev->info->write_event_value &&
+		!indio_dev->info->write_event_value_new)
 		return -EINVAL;
 
-	ret = kstrtoint(buf, 10, &val);
-	if (ret)
-		return ret;
-
-	ret = indio_dev->info->write_event_value(indio_dev, this_attr->address,
-						 val);
+	if (indio_dev->info->write_event_value) {
+		ret = kstrtoint(buf, 10, &val);
+		if (ret)
+			return ret;
+		ret = indio_dev->info->write_event_value(indio_dev,
+			this_attr->address, val);
+	} else {
+		ret = iio_str_to_fixpoint(buf, 100000, &val, &val2);
+		if (ret)
+			return ret;
+		ret = indio_dev->info->write_event_value_new(indio_dev,
+			this_attr->c, iio_ev_attr_type(this_attr),
+			iio_ev_attr_dir(this_attr), iio_ev_attr_info(this_attr),
+			val, val2);
+	}
 	if (ret < 0)
 		return ret;
 
 	return len;
 }
 
-static int iio_device_add_event_sysfs(struct iio_dev *indio_dev,
+static int iio_device_add_event(struct iio_dev *indio_dev,
+	const struct iio_chan_spec *chan, unsigned int spec_index,
+	enum iio_event_type type, enum iio_event_direction dir,
+	enum iio_shared_by shared_by, const unsigned long *mask)
+{
+	ssize_t (*show)(struct device *, struct device_attribute *, char *);
+	ssize_t (*store)(struct device *, struct device_attribute *,
+		const char *, size_t);
+	unsigned int attrcount = 0;
+	unsigned int i;
+	char *postfix;
+	int ret;
+
+	for_each_set_bit(i, mask, sizeof(*mask)) {
+		postfix = kasprintf(GFP_KERNEL, "%s_%s_%s",
+				iio_ev_type_text[type], iio_ev_dir_text[dir],
+				iio_ev_info_text[i]);
+		if (postfix == NULL)
+			return -ENOMEM;
+
+		if (i == IIO_EV_INFO_ENABLE) {
+			show = iio_ev_state_show;
+			store = iio_ev_state_store;
+		} else {
+			show = iio_ev_value_show;
+			store = iio_ev_value_store;
+		}
+
+		ret = __iio_add_chan_devattr(postfix, chan, show, store,
+			 (i << 16) | spec_index, shared_by, &indio_dev->dev,
+			&indio_dev->event_interface->dev_attr_list);
+		kfree(postfix);
+
+		if (ret)
+			return ret;
+
+		attrcount++;
+	}
+
+	return attrcount;
+}
+
+static int iio_device_add_event_sysfs_new(struct iio_dev *indio_dev,
+	struct iio_chan_spec const *chan)
+{
+	int ret = 0, i, attrcount = 0;
+	enum iio_event_direction dir;
+	enum iio_event_type type;
+
+	for (i = 0; i < chan->num_event_specs; i++) {
+		type = chan->event_spec[i].type;
+		dir = chan->event_spec[i].dir;
+
+		ret = iio_device_add_event(indio_dev, chan, i, type, dir,
+			IIO_SEPARATE, &chan->event_spec[i].mask_separate);
+		if (ret < 0)
+			goto error_ret;
+		attrcount += ret;
+
+		ret = iio_device_add_event(indio_dev, chan, i, type, dir,
+			IIO_SHARED_BY_TYPE,
+			&chan->event_spec[i].mask_shared_by_type);
+		if (ret < 0)
+			goto error_ret;
+		attrcount += ret;
+
+		ret = iio_device_add_event(indio_dev, chan, i, type, dir,
+			IIO_SHARED_BY_DIR,
+			&chan->event_spec[i].mask_shared_by_dir);
+		if (ret < 0)
+			goto error_ret;
+		attrcount += ret;
+
+		ret = iio_device_add_event(indio_dev, chan, i, type, dir,
+			IIO_SHARED_BY_ALL,
+			&chan->event_spec[i].mask_shared_by_all);
+		if (ret < 0)
+			goto error_ret;
+		attrcount += ret;
+	}
+	ret = attrcount;
+error_ret:
+	return ret;
+}
+
+static int iio_device_add_event_sysfs_old(struct iio_dev *indio_dev,
 				      struct iio_chan_spec const *chan)
 {
 	int ret = 0, i, attrcount = 0;
@@ -350,6 +486,16 @@ error_ret:
 	return ret;
 }
 
+
+static int iio_device_add_event_sysfs(struct iio_dev *indio_dev,
+				      struct iio_chan_spec const *chan)
+{
+	if (chan->event_mask)
+		return iio_device_add_event_sysfs_old(indio_dev, chan);
+	else
+		return iio_device_add_event_sysfs_new(indio_dev, chan);
+}
+
 static inline int __iio_add_event_config_attrs(struct iio_dev *indio_dev)
 {
 	int j, ret, attrcount = 0;
@@ -369,9 +515,12 @@ static bool iio_check_for_dynamic_events(struct iio_dev *indio_dev)
 {
 	int j;
 
-	for (j = 0; j < indio_dev->num_channels; j++)
+	for (j = 0; j < indio_dev->num_channels; j++) {
 		if (indio_dev->channels[j].event_mask != 0)
 			return true;
+		if (indio_dev->channels[j].num_event_specs != 0)
+			return true;
+	}
 	return false;
 }
 
diff --git a/include/linux/iio/events.h b/include/linux/iio/events.h
index 13ce220c7003..5dab2c41031f 100644
--- a/include/linux/iio/events.h
+++ b/include/linux/iio/events.h
@@ -26,20 +26,6 @@ struct iio_event_data {
 
 #define IIO_GET_EVENT_FD_IOCTL _IOR('i', 0x90, int)
 
-enum iio_event_type {
-	IIO_EV_TYPE_THRESH,
-	IIO_EV_TYPE_MAG,
-	IIO_EV_TYPE_ROC,
-	IIO_EV_TYPE_THRESH_ADAPTIVE,
-	IIO_EV_TYPE_MAG_ADAPTIVE,
-};
-
-enum iio_event_direction {
-	IIO_EV_DIR_EITHER,
-	IIO_EV_DIR_RISING,
-	IIO_EV_DIR_FALLING,
-};
-
 /**
  * IIO_EVENT_CODE() - create event identifier
  * @chan_type:	Type of the channel. Should be one of enum iio_chan_type.
diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h
index ac1cb8f1858c..256a90a1bea6 100644
--- a/include/linux/iio/iio.h
+++ b/include/linux/iio/iio.h
@@ -138,6 +138,29 @@ ssize_t iio_enum_write(struct iio_dev *indio_dev,
 	.private = (uintptr_t)(_e), \
 }
 
+/**
+ * struct iio_event_spec - specification for a channel event
+ * @type:		    Type of the event
+ * @dir:		    Direction of the event
+ * @mask_separate:	    Bit mask of enum iio_event_info values. Attributes
+ *			    set in this mask will be registered per channel.
+ * @mask_shared_by_type:    Bit mask of enum iio_event_info values. Attributes
+ *			    set in this mask will be shared by channel type.
+ * @mask_shared_by_dir:	    Bit mask of enum iio_event_info values. Attributes
+ *			    set in this mask will be shared by channel type and
+ *			    direction.
+ * @mask_shared_by_all:	    Bit mask of enum iio_event_info values. Attributes
+ *			    set in this mask will be shared by all channels.
+ */
+struct iio_event_spec {
+	enum iio_event_type type;
+	enum iio_event_direction dir;
+	unsigned long mask_separate;
+	unsigned long mask_shared_by_type;
+	unsigned long mask_shared_by_dir;
+	unsigned long mask_shared_by_all;
+};
+
 /**
  * struct iio_chan_spec - specification of a single channel
  * @type:		What type of measurement is the channel making.
@@ -163,6 +186,9 @@ ssize_t iio_enum_write(struct iio_dev *indio_dev,
  * @info_mask_shared_by_all: What information is to be exported that is shared
  *			by all channels.
  * @event_mask:		What events can this channel produce.
+ * @event_spec:		Array of events which should be registered for this
+ *			channel.
+ * @num_event_specs:	Size of the event_spec array.
  * @ext_info:		Array of extended info attributes for this channel.
  *			The array is NULL terminated, the last element should
  *			have its name field set to NULL.
@@ -201,6 +227,8 @@ struct iio_chan_spec {
 	long			info_mask_shared_by_dir;
 	long			info_mask_shared_by_all;
 	long			event_mask;
+	const struct iio_event_spec *event_spec;
+	unsigned int		num_event_specs;
 	const struct iio_chan_spec_ext_info *ext_info;
 	const char		*extend_name;
 	const char		*datasheet_name;
@@ -283,6 +311,12 @@ struct iio_dev;
  *			is event dependant. event_code specifies which event.
  * @write_event_value:	write the value associated with the event.
  *			Meaning is event dependent.
+ * @read_event_config_new: find out if the event is enabled. New style interface.
+ * @write_event_config_new: set if the event is enabled. New style interface.
+ * @read_event_value_new: read a configuration value associated with the event.
+ *                         New style interface.
+ * @write_event_value_new: write a configuration value for the event. New style
+ *			   interface.
  * @validate_trigger:	function to validate the trigger when the
  *			current trigger gets changed.
  * @update_scan_mode:	function to configure device and scan buffer when
@@ -323,6 +357,30 @@ struct iio_info {
 	int (*write_event_value)(struct iio_dev *indio_dev,
 				 u64 event_code,
 				 int val);
+
+	int (*read_event_config_new)(struct iio_dev *indio_dev,
+				 const struct iio_chan_spec *chan,
+				 enum iio_event_type type,
+				 enum iio_event_direction dir);
+
+	int (*write_event_config_new)(struct iio_dev *indio_dev,
+				  const struct iio_chan_spec *chan,
+				  enum iio_event_type type,
+				  enum iio_event_direction dir,
+				  int state);
+
+	int (*read_event_value_new)(struct iio_dev *indio_dev,
+				const struct iio_chan_spec *chan,
+				enum iio_event_type type,
+				enum iio_event_direction dir,
+				enum iio_event_info info, int *val, int *val2);
+
+	int (*write_event_value_new)(struct iio_dev *indio_dev,
+				 const struct iio_chan_spec *chan,
+				 enum iio_event_type type,
+				 enum iio_event_direction dir,
+				 enum iio_event_info info, int val, int val2);
+
 	int (*validate_trigger)(struct iio_dev *indio_dev,
 				struct iio_trigger *trig);
 	int (*update_scan_mode)(struct iio_dev *indio_dev,
diff --git a/include/linux/iio/types.h b/include/linux/iio/types.h
index 88bf0f0d27b4..18339ef4ff5d 100644
--- a/include/linux/iio/types.h
+++ b/include/linux/iio/types.h
@@ -54,6 +54,25 @@ enum iio_modifier {
 	IIO_MOD_LIGHT_BLUE,
 };
 
+enum iio_event_type {
+	IIO_EV_TYPE_THRESH,
+	IIO_EV_TYPE_MAG,
+	IIO_EV_TYPE_ROC,
+	IIO_EV_TYPE_THRESH_ADAPTIVE,
+	IIO_EV_TYPE_MAG_ADAPTIVE,
+};
+
+enum iio_event_info {
+	IIO_EV_INFO_ENABLE,
+	IIO_EV_INFO_VALUE,
+};
+
+enum iio_event_direction {
+	IIO_EV_DIR_EITHER,
+	IIO_EV_DIR_RISING,
+	IIO_EV_DIR_FALLING,
+};
+
 #define IIO_VAL_INT 1
 #define IIO_VAL_INT_PLUS_MICRO 2
 #define IIO_VAL_INT_PLUS_NANO 3
-- 
cgit v1.2.3


From ec6670ae53c13d767bdb7b3e37755ad661395380 Mon Sep 17 00:00:00 2001
From: Lars-Peter Clausen <lars@metafoo.de>
Date: Mon, 7 Oct 2013 15:11:00 +0100
Subject: iio: Add a hysteresis event info attribute

For some devices it is possible to configure a hysteresis for threshold (or
similar) events. This patch adds a new hysteresis event info type which allows
for easy creation and read/write handling of the sysfs attribute.

Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 Documentation/ABI/testing/sysfs-bus-iio | 56 +++++++++++++++++++++++++++++++++
 drivers/iio/industrialio-event.c        |  1 +
 include/linux/iio/types.h               |  1 +
 3 files changed, 58 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/ABI/testing/sysfs-bus-iio b/Documentation/ABI/testing/sysfs-bus-iio
index 2d73620874b2..b20e829d350f 100644
--- a/Documentation/ABI/testing/sysfs-bus-iio
+++ b/Documentation/ABI/testing/sysfs-bus-iio
@@ -537,6 +537,62 @@ Description:
 		value is in raw device units or in processed units (as _raw
 		and _input do on sysfs direct channel read attributes).
 
+What:		/sys/.../events/in_accel_x_thresh_rising_hysteresis
+What:		/sys/.../events/in_accel_x_thresh_falling_hysteresis
+What:		/sys/.../events/in_accel_x_thresh_either_hysteresis
+What:		/sys/.../events/in_accel_y_thresh_rising_hysteresis
+What:		/sys/.../events/in_accel_y_thresh_falling_hysteresis
+What:		/sys/.../events/in_accel_y_thresh_either_hysteresis
+What:		/sys/.../events/in_accel_z_thresh_rising_hysteresis
+What:		/sys/.../events/in_accel_z_thresh_falling_hysteresis
+What:		/sys/.../events/in_accel_z_thresh_either_hysteresis
+What:		/sys/.../events/in_anglvel_x_thresh_rising_hysteresis
+What:		/sys/.../events/in_anglvel_x_thresh_falling_hysteresis
+What:		/sys/.../events/in_anglvel_x_thresh_either_hysteresis
+What:		/sys/.../events/in_anglvel_y_thresh_rising_hysteresis
+What:		/sys/.../events/in_anglvel_y_thresh_falling_hysteresis
+What:		/sys/.../events/in_anglvel_y_thresh_either_hysteresis
+What:		/sys/.../events/in_anglvel_z_thresh_rising_hysteresis
+What:		/sys/.../events/in_anglvel_z_thresh_falling_hysteresis
+What:		/sys/.../events/in_anglvel_z_thresh_either_hysteresis
+What:		/sys/.../events/in_magn_x_thresh_rising_hysteresis
+What:		/sys/.../events/in_magn_x_thresh_falling_hysteresis
+What:		/sys/.../events/in_magn_x_thresh_either_hysteresis
+What:		/sys/.../events/in_magn_y_thresh_rising_hysteresis
+What:		/sys/.../events/in_magn_y_thresh_falling_hysteresis
+What:		/sys/.../events/in_magn_y_thresh_either_hysteresis
+What:		/sys/.../events/in_magn_z_thresh_rising_hysteresis
+What:		/sys/.../events/in_magn_z_thresh_falling_hysteresis
+What:		/sys/.../events/in_magn_z_thresh_either_hysteresis
+What:		/sys/.../events/in_voltageY_thresh_rising_hysteresis
+What:		/sys/.../events/in_voltageY_thresh_falling_hysteresis
+What:		/sys/.../events/in_voltageY_thresh_either_hysteresis
+What:		/sys/.../events/in_tempY_thresh_rising_hysteresis
+What:		/sys/.../events/in_tempY_thresh_falling_hysteresis
+What:		/sys/.../events/in_tempY_thresh_either_hysteresis
+What:		/sys/.../events/in_illuminance0_thresh_falling_hysteresis
+what:		/sys/.../events/in_illuminance0_thresh_rising_hysteresis
+what:		/sys/.../events/in_illuminance0_thresh_either_hysteresis
+what:		/sys/.../events/in_proximity0_thresh_falling_hysteresis
+what:		/sys/.../events/in_proximity0_thresh_rising_hysteresis
+what:		/sys/.../events/in_proximity0_thresh_either_hysteresis
+KernelVersion:	3.13
+Contact:	linux-iio@vger.kernel.org
+Description:
+		Specifies the hysteresis of threshold that the device is comparing
+		against for the events enabled by
+		<type>Y[_name]_thresh[_(rising|falling)]_hysteresis.
+		If separate attributes exist for the two directions, but
+		direction is not specified for this attribute, then a single
+		hysteresis value applies to both directions.
+		For falling events the hysteresis is added to the _value attribute for
+		this event to get the upper threshold for when the event goes back to
+		normal, for rising events the hysteresis is subtracted from the _value
+		attribute. E.g. if in_voltage0_raw_thresh_rising_value is set to 1200
+		and in_voltage0_raw_thresh_rising_hysteresis is set to 50. The event
+		will get activated once in_voltage0_raw goes above 1200 and will become
+		deactived again once the value falls below 1150.
+
 What:		/sys/.../events/in_accel_x_raw_roc_rising_value
 What:		/sys/.../events/in_accel_x_raw_roc_falling_value
 What:		/sys/.../events/in_accel_y_raw_roc_rising_value
diff --git a/drivers/iio/industrialio-event.c b/drivers/iio/industrialio-event.c
index b7a5d7cbed42..dac15b9f9df8 100644
--- a/drivers/iio/industrialio-event.c
+++ b/drivers/iio/industrialio-event.c
@@ -204,6 +204,7 @@ static const char * const iio_ev_dir_text[] = {
 static const char * const iio_ev_info_text[] = {
 	[IIO_EV_INFO_ENABLE] = "en",
 	[IIO_EV_INFO_VALUE] = "value",
+	[IIO_EV_INFO_HYSTERESIS] = "hysteresis",
 };
 
 static enum iio_event_direction iio_ev_attr_dir(struct iio_dev_attr *attr)
diff --git a/include/linux/iio/types.h b/include/linux/iio/types.h
index 18339ef4ff5d..4ac928ee31c5 100644
--- a/include/linux/iio/types.h
+++ b/include/linux/iio/types.h
@@ -65,6 +65,7 @@ enum iio_event_type {
 enum iio_event_info {
 	IIO_EV_INFO_ENABLE,
 	IIO_EV_INFO_VALUE,
+	IIO_EV_INFO_HYSTERESIS,
 };
 
 enum iio_event_direction {
-- 
cgit v1.2.3


From 674d0ed8588c11ec9f70c8427ac83a73e0d156d5 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Fri, 13 Sep 2013 10:59:27 -0700
Subject: hwmon: (atxp1) Set and use error code from vid_to_reg()

vid_to_reg() returns -1 if it encounters an error. Return -EINVAL instead.
Its only caller, atxp1_storevcore(), doesn't use the return code but
returns -1 instead, which is wrong anyway as it means -EPERM.
Use the return value from vid_to_reg() instead to report the error.

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Reviewed-by: Jean Delvare <khali@linux-fr.org>
---
 drivers/hwmon/atxp1.c     | 3 +--
 include/linux/hwmon-vid.h | 2 +-
 2 files changed, 2 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hwmon/atxp1.c b/drivers/hwmon/atxp1.c
index aecb9ea7beb5..ddff02e3e66f 100644
--- a/drivers/hwmon/atxp1.c
+++ b/drivers/hwmon/atxp1.c
@@ -147,10 +147,9 @@ static ssize_t atxp1_storevcore(struct device *dev,
 
 	/* Calculate VID */
 	vid = vid_to_reg(vcore, data->vrm);
-
 	if (vid < 0) {
 		dev_err(dev, "VID calculation failed.\n");
-		return -1;
+		return vid;
 	}
 
 	/*
diff --git a/include/linux/hwmon-vid.h b/include/linux/hwmon-vid.h
index f346e4d5381c..da0a680e2f6d 100644
--- a/include/linux/hwmon-vid.h
+++ b/include/linux/hwmon-vid.h
@@ -38,7 +38,7 @@ static inline int vid_to_reg(int val, u8 vrm)
 		return ((val >= 1100) && (val <= 1850) ?
 			((18499 - val * 10) / 25 + 5) / 10 : -1);
 	default:
-		return -1;
+		return -EINVAL;
 	}
 }
 
-- 
cgit v1.2.3


From bab2243ce1897865e31ea6d59b0478391f51812b Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Sat, 6 Jul 2013 13:57:23 -0700
Subject: hwmon: Introduce hwmon_device_register_with_groups

hwmon_device_register_with_groups() lets callers register a hwmon device
together with all sysfs attributes in a single call.

When using hwmon_device_register_with_groups(), hwmon attributes are attached
to the hwmon device directly and no longer with its parent device.

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/hwmon.c | 122 +++++++++++++++++++++++++++++++++++++++++++-------
 include/linux/hwmon.h |   5 +++
 2 files changed, 111 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hwmon/hwmon.c b/drivers/hwmon/hwmon.c
index 646314f7c839..a982528293ea 100644
--- a/drivers/hwmon/hwmon.c
+++ b/drivers/hwmon/hwmon.c
@@ -15,6 +15,7 @@
 #include <linux/module.h>
 #include <linux/device.h>
 #include <linux/err.h>
+#include <linux/slab.h>
 #include <linux/kdev_t.h>
 #include <linux/idr.h>
 #include <linux/hwmon.h>
@@ -25,35 +26,122 @@
 #define HWMON_ID_PREFIX "hwmon"
 #define HWMON_ID_FORMAT HWMON_ID_PREFIX "%d"
 
-static struct class *hwmon_class;
+struct hwmon_device {
+	const char *name;
+	struct device dev;
+};
+#define to_hwmon_device(d) container_of(d, struct hwmon_device, dev)
+
+static ssize_t
+show_name(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%s\n", to_hwmon_device(dev)->name);
+}
+static DEVICE_ATTR(name, S_IRUGO, show_name, NULL);
+
+static struct attribute *hwmon_dev_attrs[] = {
+	&dev_attr_name.attr,
+	NULL
+};
+
+static umode_t hwmon_dev_name_is_visible(struct kobject *kobj,
+					 struct attribute *attr, int n)
+{
+	struct device *dev = container_of(kobj, struct device, kobj);
+
+	if (to_hwmon_device(dev)->name == NULL)
+		return 0;
+
+	return attr->mode;
+}
+
+static struct attribute_group hwmon_dev_attr_group = {
+	.attrs		= hwmon_dev_attrs,
+	.is_visible	= hwmon_dev_name_is_visible,
+};
+
+static const struct attribute_group *hwmon_dev_attr_groups[] = {
+	&hwmon_dev_attr_group,
+	NULL
+};
+
+static void hwmon_dev_release(struct device *dev)
+{
+	kfree(to_hwmon_device(dev));
+}
+
+static struct class hwmon_class = {
+	.name = "hwmon",
+	.owner = THIS_MODULE,
+	.dev_groups = hwmon_dev_attr_groups,
+	.dev_release = hwmon_dev_release,
+};
 
 static DEFINE_IDA(hwmon_ida);
 
 /**
- * hwmon_device_register - register w/ hwmon
- * @dev: the device to register
+ * hwmon_device_register_with_groups - register w/ hwmon
+ * @dev: the parent device
+ * @name: hwmon name attribute
+ * @drvdata: driver data to attach to created device
+ * @groups: List of attribute groups to create
  *
  * hwmon_device_unregister() must be called when the device is no
  * longer needed.
  *
  * Returns the pointer to the new device.
  */
-struct device *hwmon_device_register(struct device *dev)
+struct device *
+hwmon_device_register_with_groups(struct device *dev, const char *name,
+				  void *drvdata,
+				  const struct attribute_group **groups)
 {
-	struct device *hwdev;
-	int id;
+	struct hwmon_device *hwdev;
+	int err, id;
 
 	id = ida_simple_get(&hwmon_ida, 0, 0, GFP_KERNEL);
 	if (id < 0)
 		return ERR_PTR(id);
 
-	hwdev = device_create(hwmon_class, dev, MKDEV(0, 0), NULL,
-			      HWMON_ID_FORMAT, id);
+	hwdev = kzalloc(sizeof(*hwdev), GFP_KERNEL);
+	if (hwdev == NULL) {
+		err = -ENOMEM;
+		goto ida_remove;
+	}
 
-	if (IS_ERR(hwdev))
-		ida_simple_remove(&hwmon_ida, id);
+	hwdev->name = name;
+	hwdev->dev.class = &hwmon_class;
+	hwdev->dev.parent = dev;
+	hwdev->dev.groups = groups;
+	hwdev->dev.of_node = dev ? dev->of_node : NULL;
+	dev_set_drvdata(&hwdev->dev, drvdata);
+	dev_set_name(&hwdev->dev, HWMON_ID_FORMAT, id);
+	err = device_register(&hwdev->dev);
+	if (err)
+		goto free;
+
+	return &hwdev->dev;
+
+free:
+	kfree(hwdev);
+ida_remove:
+	ida_simple_remove(&hwmon_ida, id);
+	return ERR_PTR(err);
+}
+EXPORT_SYMBOL_GPL(hwmon_device_register_with_groups);
 
-	return hwdev;
+/**
+ * hwmon_device_register - register w/ hwmon
+ * @dev: the device to register
+ *
+ * hwmon_device_unregister() must be called when the device is no
+ * longer needed.
+ *
+ * Returns the pointer to the new device.
+ */
+struct device *hwmon_device_register(struct device *dev)
+{
+	return hwmon_device_register_with_groups(dev, NULL, NULL, NULL);
 }
 EXPORT_SYMBOL_GPL(hwmon_device_register);
 
@@ -105,19 +193,21 @@ static void __init hwmon_pci_quirks(void)
 
 static int __init hwmon_init(void)
 {
+	int err;
+
 	hwmon_pci_quirks();
 
-	hwmon_class = class_create(THIS_MODULE, "hwmon");
-	if (IS_ERR(hwmon_class)) {
-		pr_err("couldn't create sysfs class\n");
-		return PTR_ERR(hwmon_class);
+	err = class_register(&hwmon_class);
+	if (err) {
+		pr_err("couldn't register hwmon sysfs class\n");
+		return err;
 	}
 	return 0;
 }
 
 static void __exit hwmon_exit(void)
 {
-	class_destroy(hwmon_class);
+	class_unregister(&hwmon_class);
 }
 
 subsys_initcall(hwmon_init);
diff --git a/include/linux/hwmon.h b/include/linux/hwmon.h
index b2514f70d591..6d02ff77ae1a 100644
--- a/include/linux/hwmon.h
+++ b/include/linux/hwmon.h
@@ -15,8 +15,13 @@
 #define _HWMON_H_
 
 struct device;
+struct attribute_group;
 
 struct device *hwmon_device_register(struct device *dev);
+struct device *
+hwmon_device_register_with_groups(struct device *dev, const char *name,
+				  void *drvdata,
+				  const struct attribute_group **groups);
 
 void hwmon_device_unregister(struct device *dev);
 
-- 
cgit v1.2.3


From 74188cba088192e14cd7fd5433876e8c947bcdd8 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Thu, 11 Jul 2013 20:00:12 -0700
Subject: hwmon: Provide managed hwmon registration

Drivers using the new hwmon_device_register_with_groups API often have a
remove function which consists solely of a call hwmon_device_unregister().

Provide support for devm_hwmon_device_register_with_groups and
devm_hwmon_device_unregister to allow this repeated code to be removed
and help eliminate error handling code.

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/hwmon.c | 63 +++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/hwmon.h |  5 ++++
 2 files changed, 68 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/hwmon/hwmon.c b/drivers/hwmon/hwmon.c
index a982528293ea..e176a43af63d 100644
--- a/drivers/hwmon/hwmon.c
+++ b/drivers/hwmon/hwmon.c
@@ -163,6 +163,69 @@ void hwmon_device_unregister(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(hwmon_device_unregister);
 
+static void devm_hwmon_release(struct device *dev, void *res)
+{
+	struct device *hwdev = *(struct device **)res;
+
+	hwmon_device_unregister(hwdev);
+}
+
+/**
+ * devm_hwmon_device_register_with_groups - register w/ hwmon
+ * @dev: the parent device
+ * @name: hwmon name attribute
+ * @drvdata: driver data to attach to created device
+ * @groups: List of attribute groups to create
+ *
+ * Returns the pointer to the new device. The new device is automatically
+ * unregistered with the parent device.
+ */
+struct device *
+devm_hwmon_device_register_with_groups(struct device *dev, const char *name,
+				       void *drvdata,
+				       const struct attribute_group **groups)
+{
+	struct device **ptr, *hwdev;
+
+	if (!dev)
+		return ERR_PTR(-EINVAL);
+
+	ptr = devres_alloc(devm_hwmon_release, sizeof(*ptr), GFP_KERNEL);
+	if (!ptr)
+		return ERR_PTR(-ENOMEM);
+
+	hwdev = hwmon_device_register_with_groups(dev, name, drvdata, groups);
+	if (IS_ERR(hwdev))
+		goto error;
+
+	*ptr = hwdev;
+	devres_add(dev, ptr);
+	return hwdev;
+
+error:
+	devres_free(ptr);
+	return hwdev;
+}
+EXPORT_SYMBOL_GPL(devm_hwmon_device_register_with_groups);
+
+static int devm_hwmon_match(struct device *dev, void *res, void *data)
+{
+	struct device **hwdev = res;
+
+	return *hwdev == data;
+}
+
+/**
+ * devm_hwmon_device_unregister - removes a previously registered hwmon device
+ *
+ * @dev: the parent device of the device to unregister
+ */
+void devm_hwmon_device_unregister(struct device *dev)
+{
+	WARN_ON(devres_release(dev, devm_hwmon_release, devm_hwmon_match, dev));
+}
+EXPORT_SYMBOL_GPL(devm_hwmon_device_unregister);
+
 static void __init hwmon_pci_quirks(void)
 {
 #if defined CONFIG_X86 && defined CONFIG_PCI
diff --git a/include/linux/hwmon.h b/include/linux/hwmon.h
index 6d02ff77ae1a..09354f6c1d63 100644
--- a/include/linux/hwmon.h
+++ b/include/linux/hwmon.h
@@ -22,7 +22,12 @@ struct device *
 hwmon_device_register_with_groups(struct device *dev, const char *name,
 				  void *drvdata,
 				  const struct attribute_group **groups);
+struct device *
+devm_hwmon_device_register_with_groups(struct device *dev, const char *name,
+				       void *drvdata,
+				       const struct attribute_group **groups);
 
 void hwmon_device_unregister(struct device *dev);
+void devm_hwmon_device_unregister(struct device *dev);
 
 #endif
-- 
cgit v1.2.3


From 6d9d41e57440e32a3400f37aa05ef7a1a09ced64 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Wed, 2 Oct 2013 14:22:28 -0700
Subject: KVM: Move gfn_to_index to x86 specific code

The gfn_to_index function relies on huge page defines which either may
not make sense on systems that don't support huge pages or are defined
in an unconvenient way for other architectures.  Since this is
x86-specific, move the function to arch/x86/include/asm/kvm_host.h.

Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 7 +++++++
 include/linux/kvm_host.h        | 7 -------
 2 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 8dd143a65d60..5cbf3166257c 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -79,6 +79,13 @@
 #define KVM_HPAGE_MASK(x)	(~(KVM_HPAGE_SIZE(x) - 1))
 #define KVM_PAGES_PER_HPAGE(x)	(KVM_HPAGE_SIZE(x) / PAGE_SIZE)
 
+static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level)
+{
+	/* KVM_HPAGE_GFN_SHIFT(PT_PAGE_TABLE_LEVEL) must be 0. */
+	return (gfn >> KVM_HPAGE_GFN_SHIFT(level)) -
+		(base_gfn >> KVM_HPAGE_GFN_SHIFT(level));
+}
+
 #define SELECTOR_TI_MASK (1 << 2)
 #define SELECTOR_RPL_MASK 0x03
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 7c961e1e9270..f6dccde755f6 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -841,13 +841,6 @@ static inline int memslot_id(struct kvm *kvm, gfn_t gfn)
 	return gfn_to_memslot(kvm, gfn)->id;
 }
 
-static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level)
-{
-	/* KVM_HPAGE_GFN_SHIFT(PT_PAGE_TABLE_LEVEL) must be 0. */
-	return (gfn >> KVM_HPAGE_GFN_SHIFT(level)) -
-		(base_gfn >> KVM_HPAGE_GFN_SHIFT(level));
-}
-
 static inline gfn_t
 hva_to_gfn_memslot(unsigned long hva, struct kvm_memory_slot *slot)
 {
-- 
cgit v1.2.3


From 795aa6ef6a1aba99050735eadd0c2341b789b53b Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 10 Oct 2013 09:21:55 +0200
Subject: netfilter: pass hook ops to hookfn

Pass the hook ops to the hookfn to allow for generic hook
functions. This change is required by nf_tables.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter.h                      |  3 +-
 net/bridge/br_netfilter.c                      | 22 +++++++++-----
 net/bridge/netfilter/ebtable_filter.c          | 16 ++++++----
 net/bridge/netfilter/ebtable_nat.c             | 16 ++++++----
 net/decnet/netfilter/dn_rtmsg.c                |  2 +-
 net/ipv4/netfilter/arptable_filter.c           |  5 +--
 net/ipv4/netfilter/ipt_CLUSTERIP.c             |  2 +-
 net/ipv4/netfilter/ipt_SYNPROXY.c              |  2 +-
 net/ipv4/netfilter/iptable_filter.c            |  7 +++--
 net/ipv4/netfilter/iptable_mangle.c            | 10 +++---
 net/ipv4/netfilter/iptable_nat.c               | 26 ++++++++--------
 net/ipv4/netfilter/iptable_raw.c               |  6 ++--
 net/ipv4/netfilter/iptable_security.c          |  7 +++--
 net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 12 ++++----
 net/ipv4/netfilter/nf_defrag_ipv4.c            |  6 ++--
 net/ipv6/netfilter/ip6t_SYNPROXY.c             |  2 +-
 net/ipv6/netfilter/ip6table_filter.c           |  5 +--
 net/ipv6/netfilter/ip6table_mangle.c           | 10 +++---
 net/ipv6/netfilter/ip6table_nat.c              | 27 +++++++++--------
 net/ipv6/netfilter/ip6table_raw.c              |  5 +--
 net/ipv6/netfilter/ip6table_security.c         |  5 +--
 net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 14 +++++----
 net/ipv6/netfilter/nf_defrag_ipv6_hooks.c      |  6 ++--
 net/netfilter/core.c                           |  2 +-
 net/netfilter/ipvs/ip_vs_core.c                | 42 +++++++++++++-------------
 security/selinux/hooks.c                       | 10 +++---
 26 files changed, 148 insertions(+), 122 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 61223c52414f..fef7e67f7101 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -42,7 +42,8 @@ int netfilter_init(void);
 
 struct sk_buff;
 
-typedef unsigned int nf_hookfn(unsigned int hooknum,
+struct nf_hook_ops;
+typedef unsigned int nf_hookfn(const struct nf_hook_ops *ops,
 			       struct sk_buff *skb,
 			       const struct net_device *in,
 			       const struct net_device *out,
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index f87736270eaa..878f008afefa 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -619,7 +619,7 @@ bad:
 
 /* Replicate the checks that IPv6 does on packet reception and pass the packet
  * to ip6tables, which doesn't support NAT, so things are fairly simple. */
-static unsigned int br_nf_pre_routing_ipv6(unsigned int hook,
+static unsigned int br_nf_pre_routing_ipv6(const struct nf_hook_ops *ops,
 					   struct sk_buff *skb,
 					   const struct net_device *in,
 					   const struct net_device *out,
@@ -669,7 +669,8 @@ static unsigned int br_nf_pre_routing_ipv6(unsigned int hook,
  * receiving device) to make netfilter happy, the REDIRECT
  * target in particular.  Save the original destination IP
  * address to be able to detect DNAT afterwards. */
-static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff *skb,
+static unsigned int br_nf_pre_routing(const struct nf_hook_ops *ops,
+				      struct sk_buff *skb,
 				      const struct net_device *in,
 				      const struct net_device *out,
 				      int (*okfn)(struct sk_buff *))
@@ -691,7 +692,7 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff *skb,
 			return NF_ACCEPT;
 
 		nf_bridge_pull_encap_header_rcsum(skb);
-		return br_nf_pre_routing_ipv6(hook, skb, in, out, okfn);
+		return br_nf_pre_routing_ipv6(ops, skb, in, out, okfn);
 	}
 
 	if (!brnf_call_iptables && !br->nf_call_iptables)
@@ -727,7 +728,8 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff *skb,
  * took place when the packet entered the bridge), but we
  * register an IPv4 PRE_ROUTING 'sabotage' hook that will
  * prevent this from happening. */
-static unsigned int br_nf_local_in(unsigned int hook, struct sk_buff *skb,
+static unsigned int br_nf_local_in(const struct nf_hook_ops *ops,
+				   struct sk_buff *skb,
 				   const struct net_device *in,
 				   const struct net_device *out,
 				   int (*okfn)(struct sk_buff *))
@@ -765,7 +767,8 @@ static int br_nf_forward_finish(struct sk_buff *skb)
  * but we are still able to filter on the 'real' indev/outdev
  * because of the physdev module. For ARP, indev and outdev are the
  * bridge ports. */
-static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff *skb,
+static unsigned int br_nf_forward_ip(const struct nf_hook_ops *ops,
+				     struct sk_buff *skb,
 				     const struct net_device *in,
 				     const struct net_device *out,
 				     int (*okfn)(struct sk_buff *))
@@ -818,7 +821,8 @@ static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff *skb,
 	return NF_STOLEN;
 }
 
-static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff *skb,
+static unsigned int br_nf_forward_arp(const struct nf_hook_ops *ops,
+				      struct sk_buff *skb,
 				      const struct net_device *in,
 				      const struct net_device *out,
 				      int (*okfn)(struct sk_buff *))
@@ -878,7 +882,8 @@ static int br_nf_dev_queue_xmit(struct sk_buff *skb)
 #endif
 
 /* PF_BRIDGE/POST_ROUTING ********************************************/
-static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff *skb,
+static unsigned int br_nf_post_routing(const struct nf_hook_ops *ops,
+				       struct sk_buff *skb,
 				       const struct net_device *in,
 				       const struct net_device *out,
 				       int (*okfn)(struct sk_buff *))
@@ -923,7 +928,8 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff *skb,
 /* IP/SABOTAGE *****************************************************/
 /* Don't hand locally destined packets to PF_INET(6)/PRE_ROUTING
  * for the second time. */
-static unsigned int ip_sabotage_in(unsigned int hook, struct sk_buff *skb,
+static unsigned int ip_sabotage_in(const struct nf_hook_ops *ops,
+				   struct sk_buff *skb,
 				   const struct net_device *in,
 				   const struct net_device *out,
 				   int (*okfn)(struct sk_buff *))
diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c
index 94b2b700cff8..bb2da7b706e7 100644
--- a/net/bridge/netfilter/ebtable_filter.c
+++ b/net/bridge/netfilter/ebtable_filter.c
@@ -60,17 +60,21 @@ static const struct ebt_table frame_filter =
 };
 
 static unsigned int
-ebt_in_hook(unsigned int hook, struct sk_buff *skb, const struct net_device *in,
-   const struct net_device *out, int (*okfn)(struct sk_buff *))
+ebt_in_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
+	    const struct net_device *in, const struct net_device *out,
+	    int (*okfn)(struct sk_buff *))
 {
-	return ebt_do_table(hook, skb, in, out, dev_net(in)->xt.frame_filter);
+	return ebt_do_table(ops->hooknum, skb, in, out,
+			    dev_net(in)->xt.frame_filter);
 }
 
 static unsigned int
-ebt_out_hook(unsigned int hook, struct sk_buff *skb, const struct net_device *in,
-   const struct net_device *out, int (*okfn)(struct sk_buff *))
+ebt_out_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
+	     const struct net_device *in, const struct net_device *out,
+	     int (*okfn)(struct sk_buff *))
 {
-	return ebt_do_table(hook, skb, in, out, dev_net(out)->xt.frame_filter);
+	return ebt_do_table(ops->hooknum, skb, in, out,
+			    dev_net(out)->xt.frame_filter);
 }
 
 static struct nf_hook_ops ebt_ops_filter[] __read_mostly = {
diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c
index 322555acdd40..bd238f1f105b 100644
--- a/net/bridge/netfilter/ebtable_nat.c
+++ b/net/bridge/netfilter/ebtable_nat.c
@@ -60,17 +60,21 @@ static struct ebt_table frame_nat =
 };
 
 static unsigned int
-ebt_nat_in(unsigned int hook, struct sk_buff *skb, const struct net_device *in
-   , const struct net_device *out, int (*okfn)(struct sk_buff *))
+ebt_nat_in(const struct nf_hook_ops *ops, struct sk_buff *skb,
+	   const struct net_device *in, const struct net_device *out,
+	   int (*okfn)(struct sk_buff *))
 {
-	return ebt_do_table(hook, skb, in, out, dev_net(in)->xt.frame_nat);
+	return ebt_do_table(ops->hooknum, skb, in, out,
+			    dev_net(in)->xt.frame_nat);
 }
 
 static unsigned int
-ebt_nat_out(unsigned int hook, struct sk_buff *skb, const struct net_device *in
-   , const struct net_device *out, int (*okfn)(struct sk_buff *))
+ebt_nat_out(const struct nf_hook_ops *ops, struct sk_buff *skb,
+	    const struct net_device *in, const struct net_device *out,
+	    int (*okfn)(struct sk_buff *))
 {
-	return ebt_do_table(hook, skb, in, out, dev_net(out)->xt.frame_nat);
+	return ebt_do_table(ops->hooknum, skb, in, out,
+			    dev_net(out)->xt.frame_nat);
 }
 
 static struct nf_hook_ops ebt_ops_nat[] __read_mostly = {
diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c
index 2a7efe388344..e83015cecfa7 100644
--- a/net/decnet/netfilter/dn_rtmsg.c
+++ b/net/decnet/netfilter/dn_rtmsg.c
@@ -87,7 +87,7 @@ static void dnrmg_send_peer(struct sk_buff *skb)
 }
 
 
-static unsigned int dnrmg_hook(unsigned int hook,
+static unsigned int dnrmg_hook(const struct nf_hook_ops *ops,
 			struct sk_buff *skb,
 			const struct net_device *in,
 			const struct net_device *out,
diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c
index a865f6f94013..802ddecb30b8 100644
--- a/net/ipv4/netfilter/arptable_filter.c
+++ b/net/ipv4/netfilter/arptable_filter.c
@@ -27,13 +27,14 @@ static const struct xt_table packet_filter = {
 
 /* The work comes in here from netfilter.c */
 static unsigned int
-arptable_filter_hook(unsigned int hook, struct sk_buff *skb,
+arptable_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
 		     const struct net_device *in, const struct net_device *out,
 		     int (*okfn)(struct sk_buff *))
 {
 	const struct net *net = dev_net((in != NULL) ? in : out);
 
-	return arpt_do_table(skb, hook, in, out, net->ipv4.arptable_filter);
+	return arpt_do_table(skb, ops->hooknum, in, out,
+			     net->ipv4.arptable_filter);
 }
 
 static struct nf_hook_ops *arpfilter_ops __read_mostly;
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 0b732efd32e2..a2e2b61cd7da 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -483,7 +483,7 @@ static void arp_print(struct arp_payload *payload)
 #endif
 
 static unsigned int
-arp_mangle(unsigned int hook,
+arp_mangle(const struct nf_hook_ops *ops,
 	   struct sk_buff *skb,
 	   const struct net_device *in,
 	   const struct net_device *out,
diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c
index b6346bf2fde3..01cffeaa0085 100644
--- a/net/ipv4/netfilter/ipt_SYNPROXY.c
+++ b/net/ipv4/netfilter/ipt_SYNPROXY.c
@@ -297,7 +297,7 @@ synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par)
 	return XT_CONTINUE;
 }
 
-static unsigned int ipv4_synproxy_hook(unsigned int hooknum,
+static unsigned int ipv4_synproxy_hook(const struct nf_hook_ops *ops,
 				       struct sk_buff *skb,
 				       const struct net_device *in,
 				       const struct net_device *out,
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index 50af5b45c050..e08a74a243a8 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -33,20 +33,21 @@ static const struct xt_table packet_filter = {
 };
 
 static unsigned int
-iptable_filter_hook(unsigned int hook, struct sk_buff *skb,
+iptable_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
 		    const struct net_device *in, const struct net_device *out,
 		    int (*okfn)(struct sk_buff *))
 {
 	const struct net *net;
 
-	if (hook == NF_INET_LOCAL_OUT &&
+	if (ops->hooknum == NF_INET_LOCAL_OUT &&
 	    (skb->len < sizeof(struct iphdr) ||
 	     ip_hdrlen(skb) < sizeof(struct iphdr)))
 		/* root is playing with raw sockets. */
 		return NF_ACCEPT;
 
 	net = dev_net((in != NULL) ? in : out);
-	return ipt_do_table(skb, hook, in, out, net->ipv4.iptable_filter);
+	return ipt_do_table(skb, ops->hooknum, in, out,
+			    net->ipv4.iptable_filter);
 }
 
 static struct nf_hook_ops *filter_ops __read_mostly;
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index 0d8cd82e0fad..6a5079c34bb3 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -79,19 +79,19 @@ ipt_mangle_out(struct sk_buff *skb, const struct net_device *out)
 
 /* The work comes in here from netfilter.c. */
 static unsigned int
-iptable_mangle_hook(unsigned int hook,
+iptable_mangle_hook(const struct nf_hook_ops *ops,
 		     struct sk_buff *skb,
 		     const struct net_device *in,
 		     const struct net_device *out,
 		     int (*okfn)(struct sk_buff *))
 {
-	if (hook == NF_INET_LOCAL_OUT)
+	if (ops->hooknum == NF_INET_LOCAL_OUT)
 		return ipt_mangle_out(skb, out);
-	if (hook == NF_INET_POST_ROUTING)
-		return ipt_do_table(skb, hook, in, out,
+	if (ops->hooknum == NF_INET_POST_ROUTING)
+		return ipt_do_table(skb, ops->hooknum, in, out,
 				    dev_net(out)->ipv4.iptable_mangle);
 	/* PREROUTING/INPUT/FORWARD: */
-	return ipt_do_table(skb, hook, in, out,
+	return ipt_do_table(skb, ops->hooknum, in, out,
 			    dev_net(in)->ipv4.iptable_mangle);
 }
 
diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c
index 683bfaffed65..ee2886126e3d 100644
--- a/net/ipv4/netfilter/iptable_nat.c
+++ b/net/ipv4/netfilter/iptable_nat.c
@@ -61,7 +61,7 @@ static unsigned int nf_nat_rule_find(struct sk_buff *skb, unsigned int hooknum,
 }
 
 static unsigned int
-nf_nat_ipv4_fn(unsigned int hooknum,
+nf_nat_ipv4_fn(const struct nf_hook_ops *ops,
 	       struct sk_buff *skb,
 	       const struct net_device *in,
 	       const struct net_device *out,
@@ -71,7 +71,7 @@ nf_nat_ipv4_fn(unsigned int hooknum,
 	enum ip_conntrack_info ctinfo;
 	struct nf_conn_nat *nat;
 	/* maniptype == SRC for postrouting. */
-	enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum);
+	enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum);
 
 	/* We never see fragments: conntrack defrags on pre-routing
 	 * and local-out, and nf_nat_out protects post-routing.
@@ -108,7 +108,7 @@ nf_nat_ipv4_fn(unsigned int hooknum,
 	case IP_CT_RELATED_REPLY:
 		if (ip_hdr(skb)->protocol == IPPROTO_ICMP) {
 			if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo,
-							   hooknum))
+							   ops->hooknum))
 				return NF_DROP;
 			else
 				return NF_ACCEPT;
@@ -121,14 +121,14 @@ nf_nat_ipv4_fn(unsigned int hooknum,
 		if (!nf_nat_initialized(ct, maniptype)) {
 			unsigned int ret;
 
-			ret = nf_nat_rule_find(skb, hooknum, in, out, ct);
+			ret = nf_nat_rule_find(skb, ops->hooknum, in, out, ct);
 			if (ret != NF_ACCEPT)
 				return ret;
 		} else {
 			pr_debug("Already setup manip %s for ct %p\n",
 				 maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST",
 				 ct);
-			if (nf_nat_oif_changed(hooknum, ctinfo, nat, out))
+			if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out))
 				goto oif_changed;
 		}
 		break;
@@ -137,11 +137,11 @@ nf_nat_ipv4_fn(unsigned int hooknum,
 		/* ESTABLISHED */
 		NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
 			     ctinfo == IP_CT_ESTABLISHED_REPLY);
-		if (nf_nat_oif_changed(hooknum, ctinfo, nat, out))
+		if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out))
 			goto oif_changed;
 	}
 
-	return nf_nat_packet(ct, ctinfo, hooknum, skb);
+	return nf_nat_packet(ct, ctinfo, ops->hooknum, skb);
 
 oif_changed:
 	nf_ct_kill_acct(ct, ctinfo, skb);
@@ -149,7 +149,7 @@ oif_changed:
 }
 
 static unsigned int
-nf_nat_ipv4_in(unsigned int hooknum,
+nf_nat_ipv4_in(const struct nf_hook_ops *ops,
 	       struct sk_buff *skb,
 	       const struct net_device *in,
 	       const struct net_device *out,
@@ -158,7 +158,7 @@ nf_nat_ipv4_in(unsigned int hooknum,
 	unsigned int ret;
 	__be32 daddr = ip_hdr(skb)->daddr;
 
-	ret = nf_nat_ipv4_fn(hooknum, skb, in, out, okfn);
+	ret = nf_nat_ipv4_fn(ops, skb, in, out, okfn);
 	if (ret != NF_DROP && ret != NF_STOLEN &&
 	    daddr != ip_hdr(skb)->daddr)
 		skb_dst_drop(skb);
@@ -167,7 +167,7 @@ nf_nat_ipv4_in(unsigned int hooknum,
 }
 
 static unsigned int
-nf_nat_ipv4_out(unsigned int hooknum,
+nf_nat_ipv4_out(const struct nf_hook_ops *ops,
 		struct sk_buff *skb,
 		const struct net_device *in,
 		const struct net_device *out,
@@ -185,7 +185,7 @@ nf_nat_ipv4_out(unsigned int hooknum,
 	    ip_hdrlen(skb) < sizeof(struct iphdr))
 		return NF_ACCEPT;
 
-	ret = nf_nat_ipv4_fn(hooknum, skb, in, out, okfn);
+	ret = nf_nat_ipv4_fn(ops, skb, in, out, okfn);
 #ifdef CONFIG_XFRM
 	if (ret != NF_DROP && ret != NF_STOLEN &&
 	    !(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
@@ -207,7 +207,7 @@ nf_nat_ipv4_out(unsigned int hooknum,
 }
 
 static unsigned int
-nf_nat_ipv4_local_fn(unsigned int hooknum,
+nf_nat_ipv4_local_fn(const struct nf_hook_ops *ops,
 		     struct sk_buff *skb,
 		     const struct net_device *in,
 		     const struct net_device *out,
@@ -223,7 +223,7 @@ nf_nat_ipv4_local_fn(unsigned int hooknum,
 	    ip_hdrlen(skb) < sizeof(struct iphdr))
 		return NF_ACCEPT;
 
-	ret = nf_nat_ipv4_fn(hooknum, skb, in, out, okfn);
+	ret = nf_nat_ipv4_fn(ops, skb, in, out, okfn);
 	if (ret != NF_DROP && ret != NF_STOLEN &&
 	    (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
 		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index 1f82aea11df6..b2f7e8f98316 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -20,20 +20,20 @@ static const struct xt_table packet_raw = {
 
 /* The work comes in here from netfilter.c. */
 static unsigned int
-iptable_raw_hook(unsigned int hook, struct sk_buff *skb,
+iptable_raw_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
 		 const struct net_device *in, const struct net_device *out,
 		 int (*okfn)(struct sk_buff *))
 {
 	const struct net *net;
 
-	if (hook == NF_INET_LOCAL_OUT && 
+	if (ops->hooknum == NF_INET_LOCAL_OUT &&
 	    (skb->len < sizeof(struct iphdr) ||
 	     ip_hdrlen(skb) < sizeof(struct iphdr)))
 		/* root is playing with raw sockets. */
 		return NF_ACCEPT;
 
 	net = dev_net((in != NULL) ? in : out);
-	return ipt_do_table(skb, hook, in, out, net->ipv4.iptable_raw);
+	return ipt_do_table(skb, ops->hooknum, in, out, net->ipv4.iptable_raw);
 }
 
 static struct nf_hook_ops *rawtable_ops __read_mostly;
diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c
index f867a8d38bf7..c86647ed2078 100644
--- a/net/ipv4/netfilter/iptable_security.c
+++ b/net/ipv4/netfilter/iptable_security.c
@@ -37,21 +37,22 @@ static const struct xt_table security_table = {
 };
 
 static unsigned int
-iptable_security_hook(unsigned int hook, struct sk_buff *skb,
+iptable_security_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
 		      const struct net_device *in,
 		      const struct net_device *out,
 		      int (*okfn)(struct sk_buff *))
 {
 	const struct net *net;
 
-	if (hook == NF_INET_LOCAL_OUT &&
+	if (ops->hooknum == NF_INET_LOCAL_OUT &&
 	    (skb->len < sizeof(struct iphdr) ||
 	     ip_hdrlen(skb) < sizeof(struct iphdr)))
 		/* Somebody is playing with raw sockets. */
 		return NF_ACCEPT;
 
 	net = dev_net((in != NULL) ? in : out);
-	return ipt_do_table(skb, hook, in, out, net->ipv4.iptable_security);
+	return ipt_do_table(skb, ops->hooknum, in, out,
+			    net->ipv4.iptable_security);
 }
 
 static struct nf_hook_ops *sectbl_ops __read_mostly;
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 86f5b34a4ed1..ecd8bec411c9 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -92,7 +92,7 @@ static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
 	return NF_ACCEPT;
 }
 
-static unsigned int ipv4_helper(unsigned int hooknum,
+static unsigned int ipv4_helper(const struct nf_hook_ops *ops,
 				struct sk_buff *skb,
 				const struct net_device *in,
 				const struct net_device *out,
@@ -121,7 +121,7 @@ static unsigned int ipv4_helper(unsigned int hooknum,
 			    ct, ctinfo);
 }
 
-static unsigned int ipv4_confirm(unsigned int hooknum,
+static unsigned int ipv4_confirm(const struct nf_hook_ops *ops,
 				 struct sk_buff *skb,
 				 const struct net_device *in,
 				 const struct net_device *out,
@@ -147,16 +147,16 @@ out:
 	return nf_conntrack_confirm(skb);
 }
 
-static unsigned int ipv4_conntrack_in(unsigned int hooknum,
+static unsigned int ipv4_conntrack_in(const struct nf_hook_ops *ops,
 				      struct sk_buff *skb,
 				      const struct net_device *in,
 				      const struct net_device *out,
 				      int (*okfn)(struct sk_buff *))
 {
-	return nf_conntrack_in(dev_net(in), PF_INET, hooknum, skb);
+	return nf_conntrack_in(dev_net(in), PF_INET, ops->hooknum, skb);
 }
 
-static unsigned int ipv4_conntrack_local(unsigned int hooknum,
+static unsigned int ipv4_conntrack_local(const struct nf_hook_ops *ops,
 					 struct sk_buff *skb,
 					 const struct net_device *in,
 					 const struct net_device *out,
@@ -166,7 +166,7 @@ static unsigned int ipv4_conntrack_local(unsigned int hooknum,
 	if (skb->len < sizeof(struct iphdr) ||
 	    ip_hdrlen(skb) < sizeof(struct iphdr))
 		return NF_ACCEPT;
-	return nf_conntrack_in(dev_net(out), PF_INET, hooknum, skb);
+	return nf_conntrack_in(dev_net(out), PF_INET, ops->hooknum, skb);
 }
 
 /* Connection tracking may drop packets, but never alters them, so
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index 742815518b0f..12e13bd82b5b 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -60,7 +60,7 @@ static enum ip_defrag_users nf_ct_defrag_user(unsigned int hooknum,
 		return IP_DEFRAG_CONNTRACK_OUT + zone;
 }
 
-static unsigned int ipv4_conntrack_defrag(unsigned int hooknum,
+static unsigned int ipv4_conntrack_defrag(const struct nf_hook_ops *ops,
 					  struct sk_buff *skb,
 					  const struct net_device *in,
 					  const struct net_device *out,
@@ -83,7 +83,9 @@ static unsigned int ipv4_conntrack_defrag(unsigned int hooknum,
 #endif
 	/* Gather fragments. */
 	if (ip_is_fragment(ip_hdr(skb))) {
-		enum ip_defrag_users user = nf_ct_defrag_user(hooknum, skb);
+		enum ip_defrag_users user =
+			nf_ct_defrag_user(ops->hooknum, skb);
+
 		if (nf_ct_ipv4_gather_frags(skb, user))
 			return NF_STOLEN;
 	}
diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c
index 2748b042da72..bf9f612c1bc2 100644
--- a/net/ipv6/netfilter/ip6t_SYNPROXY.c
+++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c
@@ -312,7 +312,7 @@ synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par)
 	return XT_CONTINUE;
 }
 
-static unsigned int ipv6_synproxy_hook(unsigned int hooknum,
+static unsigned int ipv6_synproxy_hook(const struct nf_hook_ops *ops,
 				       struct sk_buff *skb,
 				       const struct net_device *in,
 				       const struct net_device *out,
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index 29b44b14c5ea..ca7f6c128086 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -32,13 +32,14 @@ static const struct xt_table packet_filter = {
 
 /* The work comes in here from netfilter.c. */
 static unsigned int
-ip6table_filter_hook(unsigned int hook, struct sk_buff *skb,
+ip6table_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
 		     const struct net_device *in, const struct net_device *out,
 		     int (*okfn)(struct sk_buff *))
 {
 	const struct net *net = dev_net((in != NULL) ? in : out);
 
-	return ip6t_do_table(skb, hook, in, out, net->ipv6.ip6table_filter);
+	return ip6t_do_table(skb, ops->hooknum, in, out,
+			     net->ipv6.ip6table_filter);
 }
 
 static struct nf_hook_ops *filter_ops __read_mostly;
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index c705907ae6ab..307bbb782d14 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -76,17 +76,17 @@ ip6t_mangle_out(struct sk_buff *skb, const struct net_device *out)
 
 /* The work comes in here from netfilter.c. */
 static unsigned int
-ip6table_mangle_hook(unsigned int hook, struct sk_buff *skb,
+ip6table_mangle_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
 		     const struct net_device *in, const struct net_device *out,
 		     int (*okfn)(struct sk_buff *))
 {
-	if (hook == NF_INET_LOCAL_OUT)
+	if (ops->hooknum == NF_INET_LOCAL_OUT)
 		return ip6t_mangle_out(skb, out);
-	if (hook == NF_INET_POST_ROUTING)
-		return ip6t_do_table(skb, hook, in, out,
+	if (ops->hooknum == NF_INET_POST_ROUTING)
+		return ip6t_do_table(skb, ops->hooknum, in, out,
 				     dev_net(out)->ipv6.ip6table_mangle);
 	/* INPUT/FORWARD */
-	return ip6t_do_table(skb, hook, in, out,
+	return ip6t_do_table(skb, ops->hooknum, in, out,
 			     dev_net(in)->ipv6.ip6table_mangle);
 }
 
diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
index 9b076d2d3a7b..84c7f33d0cf8 100644
--- a/net/ipv6/netfilter/ip6table_nat.c
+++ b/net/ipv6/netfilter/ip6table_nat.c
@@ -63,7 +63,7 @@ static unsigned int nf_nat_rule_find(struct sk_buff *skb, unsigned int hooknum,
 }
 
 static unsigned int
-nf_nat_ipv6_fn(unsigned int hooknum,
+nf_nat_ipv6_fn(const struct nf_hook_ops *ops,
 	       struct sk_buff *skb,
 	       const struct net_device *in,
 	       const struct net_device *out,
@@ -72,7 +72,7 @@ nf_nat_ipv6_fn(unsigned int hooknum,
 	struct nf_conn *ct;
 	enum ip_conntrack_info ctinfo;
 	struct nf_conn_nat *nat;
-	enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum);
+	enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum);
 	__be16 frag_off;
 	int hdrlen;
 	u8 nexthdr;
@@ -111,7 +111,8 @@ nf_nat_ipv6_fn(unsigned int hooknum,
 
 		if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) {
 			if (!nf_nat_icmpv6_reply_translation(skb, ct, ctinfo,
-							     hooknum, hdrlen))
+							     ops->hooknum,
+							     hdrlen))
 				return NF_DROP;
 			else
 				return NF_ACCEPT;
@@ -124,14 +125,14 @@ nf_nat_ipv6_fn(unsigned int hooknum,
 		if (!nf_nat_initialized(ct, maniptype)) {
 			unsigned int ret;
 
-			ret = nf_nat_rule_find(skb, hooknum, in, out, ct);
+			ret = nf_nat_rule_find(skb, ops->hooknum, in, out, ct);
 			if (ret != NF_ACCEPT)
 				return ret;
 		} else {
 			pr_debug("Already setup manip %s for ct %p\n",
 				 maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST",
 				 ct);
-			if (nf_nat_oif_changed(hooknum, ctinfo, nat, out))
+			if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out))
 				goto oif_changed;
 		}
 		break;
@@ -140,11 +141,11 @@ nf_nat_ipv6_fn(unsigned int hooknum,
 		/* ESTABLISHED */
 		NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
 			     ctinfo == IP_CT_ESTABLISHED_REPLY);
-		if (nf_nat_oif_changed(hooknum, ctinfo, nat, out))
+		if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out))
 			goto oif_changed;
 	}
 
-	return nf_nat_packet(ct, ctinfo, hooknum, skb);
+	return nf_nat_packet(ct, ctinfo, ops->hooknum, skb);
 
 oif_changed:
 	nf_ct_kill_acct(ct, ctinfo, skb);
@@ -152,7 +153,7 @@ oif_changed:
 }
 
 static unsigned int
-nf_nat_ipv6_in(unsigned int hooknum,
+nf_nat_ipv6_in(const struct nf_hook_ops *ops,
 	       struct sk_buff *skb,
 	       const struct net_device *in,
 	       const struct net_device *out,
@@ -161,7 +162,7 @@ nf_nat_ipv6_in(unsigned int hooknum,
 	unsigned int ret;
 	struct in6_addr daddr = ipv6_hdr(skb)->daddr;
 
-	ret = nf_nat_ipv6_fn(hooknum, skb, in, out, okfn);
+	ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn);
 	if (ret != NF_DROP && ret != NF_STOLEN &&
 	    ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr))
 		skb_dst_drop(skb);
@@ -170,7 +171,7 @@ nf_nat_ipv6_in(unsigned int hooknum,
 }
 
 static unsigned int
-nf_nat_ipv6_out(unsigned int hooknum,
+nf_nat_ipv6_out(const struct nf_hook_ops *ops,
 		struct sk_buff *skb,
 		const struct net_device *in,
 		const struct net_device *out,
@@ -187,7 +188,7 @@ nf_nat_ipv6_out(unsigned int hooknum,
 	if (skb->len < sizeof(struct ipv6hdr))
 		return NF_ACCEPT;
 
-	ret = nf_nat_ipv6_fn(hooknum, skb, in, out, okfn);
+	ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn);
 #ifdef CONFIG_XFRM
 	if (ret != NF_DROP && ret != NF_STOLEN &&
 	    !(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
@@ -209,7 +210,7 @@ nf_nat_ipv6_out(unsigned int hooknum,
 }
 
 static unsigned int
-nf_nat_ipv6_local_fn(unsigned int hooknum,
+nf_nat_ipv6_local_fn(const struct nf_hook_ops *ops,
 		     struct sk_buff *skb,
 		     const struct net_device *in,
 		     const struct net_device *out,
@@ -224,7 +225,7 @@ nf_nat_ipv6_local_fn(unsigned int hooknum,
 	if (skb->len < sizeof(struct ipv6hdr))
 		return NF_ACCEPT;
 
-	ret = nf_nat_ipv6_fn(hooknum, skb, in, out, okfn);
+	ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn);
 	if (ret != NF_DROP && ret != NF_STOLEN &&
 	    (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
 		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index 9a626d86720f..5274740acecc 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -19,13 +19,14 @@ static const struct xt_table packet_raw = {
 
 /* The work comes in here from netfilter.c. */
 static unsigned int
-ip6table_raw_hook(unsigned int hook, struct sk_buff *skb,
+ip6table_raw_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
 		  const struct net_device *in, const struct net_device *out,
 		  int (*okfn)(struct sk_buff *))
 {
 	const struct net *net = dev_net((in != NULL) ? in : out);
 
-	return ip6t_do_table(skb, hook, in, out, net->ipv6.ip6table_raw);
+	return ip6t_do_table(skb, ops->hooknum, in, out,
+			     net->ipv6.ip6table_raw);
 }
 
 static struct nf_hook_ops *rawtable_ops __read_mostly;
diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c
index ce88d1d7e525..ab3b0219ecfa 100644
--- a/net/ipv6/netfilter/ip6table_security.c
+++ b/net/ipv6/netfilter/ip6table_security.c
@@ -36,14 +36,15 @@ static const struct xt_table security_table = {
 };
 
 static unsigned int
-ip6table_security_hook(unsigned int hook, struct sk_buff *skb,
+ip6table_security_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
 		       const struct net_device *in,
 		       const struct net_device *out,
 		       int (*okfn)(struct sk_buff *))
 {
 	const struct net *net = dev_net((in != NULL) ? in : out);
 
-	return ip6t_do_table(skb, hook, in, out, net->ipv6.ip6table_security);
+	return ip6t_do_table(skb, ops->hooknum, in, out,
+			     net->ipv6.ip6table_security);
 }
 
 static struct nf_hook_ops *sectbl_ops __read_mostly;
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 54b75ead5a69..486545eb42ce 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -95,7 +95,7 @@ static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
 	return NF_ACCEPT;
 }
 
-static unsigned int ipv6_helper(unsigned int hooknum,
+static unsigned int ipv6_helper(const struct nf_hook_ops *ops,
 				struct sk_buff *skb,
 				const struct net_device *in,
 				const struct net_device *out,
@@ -133,7 +133,7 @@ static unsigned int ipv6_helper(unsigned int hooknum,
 	return helper->help(skb, protoff, ct, ctinfo);
 }
 
-static unsigned int ipv6_confirm(unsigned int hooknum,
+static unsigned int ipv6_confirm(const struct nf_hook_ops *ops,
 				 struct sk_buff *skb,
 				 const struct net_device *in,
 				 const struct net_device *out,
@@ -219,16 +219,17 @@ static unsigned int __ipv6_conntrack_in(struct net *net,
 	return nf_conntrack_in(net, PF_INET6, hooknum, skb);
 }
 
-static unsigned int ipv6_conntrack_in(unsigned int hooknum,
+static unsigned int ipv6_conntrack_in(const struct nf_hook_ops *ops,
 				      struct sk_buff *skb,
 				      const struct net_device *in,
 				      const struct net_device *out,
 				      int (*okfn)(struct sk_buff *))
 {
-	return __ipv6_conntrack_in(dev_net(in), hooknum, skb, in, out, okfn);
+	return __ipv6_conntrack_in(dev_net(in), ops->hooknum, skb, in, out,
+				   okfn);
 }
 
-static unsigned int ipv6_conntrack_local(unsigned int hooknum,
+static unsigned int ipv6_conntrack_local(const struct nf_hook_ops *ops,
 					 struct sk_buff *skb,
 					 const struct net_device *in,
 					 const struct net_device *out,
@@ -239,7 +240,8 @@ static unsigned int ipv6_conntrack_local(unsigned int hooknum,
 		net_notice_ratelimited("ipv6_conntrack_local: packet too short\n");
 		return NF_ACCEPT;
 	}
-	return __ipv6_conntrack_in(dev_net(out), hooknum, skb, in, out, okfn);
+	return __ipv6_conntrack_in(dev_net(out), ops->hooknum, skb, in, out,
+				   okfn);
 }
 
 static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = {
diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
index aacd121fe8c5..ec483aa3f60f 100644
--- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
+++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
@@ -52,7 +52,7 @@ static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum,
 
 }
 
-static unsigned int ipv6_defrag(unsigned int hooknum,
+static unsigned int ipv6_defrag(const struct nf_hook_ops *ops,
 				struct sk_buff *skb,
 				const struct net_device *in,
 				const struct net_device *out,
@@ -66,7 +66,7 @@ static unsigned int ipv6_defrag(unsigned int hooknum,
 		return NF_ACCEPT;
 #endif
 
-	reasm = nf_ct_frag6_gather(skb, nf_ct6_defrag_user(hooknum, skb));
+	reasm = nf_ct_frag6_gather(skb, nf_ct6_defrag_user(ops->hooknum, skb));
 	/* queued */
 	if (reasm == NULL)
 		return NF_STOLEN;
@@ -75,7 +75,7 @@ static unsigned int ipv6_defrag(unsigned int hooknum,
 	if (reasm == skb)
 		return NF_ACCEPT;
 
-	nf_ct_frag6_output(hooknum, reasm, (struct net_device *)in,
+	nf_ct_frag6_output(ops->hooknum, reasm, (struct net_device *)in,
 			   (struct net_device *)out, okfn);
 
 	return NF_STOLEN;
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 593b16ea45e0..1fbab0cdd302 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -146,7 +146,7 @@ unsigned int nf_iterate(struct list_head *head,
 		/* Optimization: we don't need to hold module
 		   reference here, since function can't sleep. --RR */
 repeat:
-		verdict = (*elemp)->hook(hook, skb, indev, outdev, okfn);
+		verdict = (*elemp)->hook(*elemp, skb, indev, outdev, okfn);
 		if (verdict != NF_ACCEPT) {
 #ifdef CONFIG_NETFILTER_DEBUG
 			if (unlikely((verdict & NF_VERDICT_MASK)
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 74fd00c27210..34fda62f40f6 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1239,11 +1239,11 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
  *	Check if packet is reply for established ip_vs_conn.
  */
 static unsigned int
-ip_vs_reply4(unsigned int hooknum, struct sk_buff *skb,
+ip_vs_reply4(const struct nf_hook_ops *ops, struct sk_buff *skb,
 	     const struct net_device *in, const struct net_device *out,
 	     int (*okfn)(struct sk_buff *))
 {
-	return ip_vs_out(hooknum, skb, AF_INET);
+	return ip_vs_out(ops->hooknum, skb, AF_INET);
 }
 
 /*
@@ -1251,11 +1251,11 @@ ip_vs_reply4(unsigned int hooknum, struct sk_buff *skb,
  *	Check if packet is reply for established ip_vs_conn.
  */
 static unsigned int
-ip_vs_local_reply4(unsigned int hooknum, struct sk_buff *skb,
+ip_vs_local_reply4(const struct nf_hook_ops *ops, struct sk_buff *skb,
 		   const struct net_device *in, const struct net_device *out,
 		   int (*okfn)(struct sk_buff *))
 {
-	return ip_vs_out(hooknum, skb, AF_INET);
+	return ip_vs_out(ops->hooknum, skb, AF_INET);
 }
 
 #ifdef CONFIG_IP_VS_IPV6
@@ -1266,11 +1266,11 @@ ip_vs_local_reply4(unsigned int hooknum, struct sk_buff *skb,
  *	Check if packet is reply for established ip_vs_conn.
  */
 static unsigned int
-ip_vs_reply6(unsigned int hooknum, struct sk_buff *skb,
+ip_vs_reply6(const struct nf_hook_ops *ops, struct sk_buff *skb,
 	     const struct net_device *in, const struct net_device *out,
 	     int (*okfn)(struct sk_buff *))
 {
-	return ip_vs_out(hooknum, skb, AF_INET6);
+	return ip_vs_out(ops->hooknum, skb, AF_INET6);
 }
 
 /*
@@ -1278,11 +1278,11 @@ ip_vs_reply6(unsigned int hooknum, struct sk_buff *skb,
  *	Check if packet is reply for established ip_vs_conn.
  */
 static unsigned int
-ip_vs_local_reply6(unsigned int hooknum, struct sk_buff *skb,
+ip_vs_local_reply6(const struct nf_hook_ops *ops, struct sk_buff *skb,
 		   const struct net_device *in, const struct net_device *out,
 		   int (*okfn)(struct sk_buff *))
 {
-	return ip_vs_out(hooknum, skb, AF_INET6);
+	return ip_vs_out(ops->hooknum, skb, AF_INET6);
 }
 
 #endif
@@ -1733,12 +1733,12 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
  *	Schedule and forward packets from remote clients
  */
 static unsigned int
-ip_vs_remote_request4(unsigned int hooknum, struct sk_buff *skb,
+ip_vs_remote_request4(const struct nf_hook_ops *ops, struct sk_buff *skb,
 		      const struct net_device *in,
 		      const struct net_device *out,
 		      int (*okfn)(struct sk_buff *))
 {
-	return ip_vs_in(hooknum, skb, AF_INET);
+	return ip_vs_in(ops->hooknum, skb, AF_INET);
 }
 
 /*
@@ -1746,11 +1746,11 @@ ip_vs_remote_request4(unsigned int hooknum, struct sk_buff *skb,
  *	Schedule and forward packets from local clients
  */
 static unsigned int
-ip_vs_local_request4(unsigned int hooknum, struct sk_buff *skb,
+ip_vs_local_request4(const struct nf_hook_ops *ops, struct sk_buff *skb,
 		     const struct net_device *in, const struct net_device *out,
 		     int (*okfn)(struct sk_buff *))
 {
-	return ip_vs_in(hooknum, skb, AF_INET);
+	return ip_vs_in(ops->hooknum, skb, AF_INET);
 }
 
 #ifdef CONFIG_IP_VS_IPV6
@@ -1760,7 +1760,7 @@ ip_vs_local_request4(unsigned int hooknum, struct sk_buff *skb,
  * Copy info from first fragment, to the rest of them.
  */
 static unsigned int
-ip_vs_preroute_frag6(unsigned int hooknum, struct sk_buff *skb,
+ip_vs_preroute_frag6(const struct nf_hook_ops *ops, struct sk_buff *skb,
 		     const struct net_device *in,
 		     const struct net_device *out,
 		     int (*okfn)(struct sk_buff *))
@@ -1792,12 +1792,12 @@ ip_vs_preroute_frag6(unsigned int hooknum, struct sk_buff *skb,
  *	Schedule and forward packets from remote clients
  */
 static unsigned int
-ip_vs_remote_request6(unsigned int hooknum, struct sk_buff *skb,
+ip_vs_remote_request6(const struct nf_hook_ops *ops, struct sk_buff *skb,
 		      const struct net_device *in,
 		      const struct net_device *out,
 		      int (*okfn)(struct sk_buff *))
 {
-	return ip_vs_in(hooknum, skb, AF_INET6);
+	return ip_vs_in(ops->hooknum, skb, AF_INET6);
 }
 
 /*
@@ -1805,11 +1805,11 @@ ip_vs_remote_request6(unsigned int hooknum, struct sk_buff *skb,
  *	Schedule and forward packets from local clients
  */
 static unsigned int
-ip_vs_local_request6(unsigned int hooknum, struct sk_buff *skb,
+ip_vs_local_request6(const struct nf_hook_ops *ops, struct sk_buff *skb,
 		     const struct net_device *in, const struct net_device *out,
 		     int (*okfn)(struct sk_buff *))
 {
-	return ip_vs_in(hooknum, skb, AF_INET6);
+	return ip_vs_in(ops->hooknum, skb, AF_INET6);
 }
 
 #endif
@@ -1825,7 +1825,7 @@ ip_vs_local_request6(unsigned int hooknum, struct sk_buff *skb,
  *      and send them to ip_vs_in_icmp.
  */
 static unsigned int
-ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff *skb,
+ip_vs_forward_icmp(const struct nf_hook_ops *ops, struct sk_buff *skb,
 		   const struct net_device *in, const struct net_device *out,
 		   int (*okfn)(struct sk_buff *))
 {
@@ -1842,12 +1842,12 @@ ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff *skb,
 	if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable))
 		return NF_ACCEPT;
 
-	return ip_vs_in_icmp(skb, &r, hooknum);
+	return ip_vs_in_icmp(skb, &r, ops->hooknum);
 }
 
 #ifdef CONFIG_IP_VS_IPV6
 static unsigned int
-ip_vs_forward_icmp_v6(unsigned int hooknum, struct sk_buff *skb,
+ip_vs_forward_icmp_v6(const struct nf_hook_ops *ops, struct sk_buff *skb,
 		      const struct net_device *in, const struct net_device *out,
 		      int (*okfn)(struct sk_buff *))
 {
@@ -1866,7 +1866,7 @@ ip_vs_forward_icmp_v6(unsigned int hooknum, struct sk_buff *skb,
 	if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable))
 		return NF_ACCEPT;
 
-	return ip_vs_in_icmp_v6(skb, &r, hooknum, &iphdr);
+	return ip_vs_in_icmp_v6(skb, &r, ops->hooknum, &iphdr);
 }
 #endif
 
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 568c7699abf1..3f224d7795f5 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -4668,7 +4668,7 @@ static unsigned int selinux_ip_forward(struct sk_buff *skb, int ifindex,
 	return NF_ACCEPT;
 }
 
-static unsigned int selinux_ipv4_forward(unsigned int hooknum,
+static unsigned int selinux_ipv4_forward(const struct nf_hook_ops *ops,
 					 struct sk_buff *skb,
 					 const struct net_device *in,
 					 const struct net_device *out,
@@ -4678,7 +4678,7 @@ static unsigned int selinux_ipv4_forward(unsigned int hooknum,
 }
 
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-static unsigned int selinux_ipv6_forward(unsigned int hooknum,
+static unsigned int selinux_ipv6_forward(const struct nf_hook_ops *ops,
 					 struct sk_buff *skb,
 					 const struct net_device *in,
 					 const struct net_device *out,
@@ -4710,7 +4710,7 @@ static unsigned int selinux_ip_output(struct sk_buff *skb,
 	return NF_ACCEPT;
 }
 
-static unsigned int selinux_ipv4_output(unsigned int hooknum,
+static unsigned int selinux_ipv4_output(const struct nf_hook_ops *ops,
 					struct sk_buff *skb,
 					const struct net_device *in,
 					const struct net_device *out,
@@ -4837,7 +4837,7 @@ static unsigned int selinux_ip_postroute(struct sk_buff *skb, int ifindex,
 	return NF_ACCEPT;
 }
 
-static unsigned int selinux_ipv4_postroute(unsigned int hooknum,
+static unsigned int selinux_ipv4_postroute(const struct nf_hook_ops *ops,
 					   struct sk_buff *skb,
 					   const struct net_device *in,
 					   const struct net_device *out,
@@ -4847,7 +4847,7 @@ static unsigned int selinux_ipv4_postroute(unsigned int hooknum,
 }
 
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-static unsigned int selinux_ipv6_postroute(unsigned int hooknum,
+static unsigned int selinux_ipv6_postroute(const struct nf_hook_ops *ops,
 					   struct sk_buff *skb,
 					   const struct net_device *in,
 					   const struct net_device *out,
-- 
cgit v1.2.3


From 0585123ebbd07a4fb49558ed6dea5fced967841a Mon Sep 17 00:00:00 2001
From: LABBE Corentin <clabbe.montjoie@gmail.com>
Date: Thu, 26 Sep 2013 16:50:21 +0200
Subject: Correct some typos for word frequency

Signed-off-by: LABBE Corentin <clabbe.montjoie@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/cpufreq/exynos-cpufreq.c | 2 +-
 include/linux/devfreq.h          | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/cpufreq/exynos-cpufreq.c b/drivers/cpufreq/exynos-cpufreq.c
index 0fac34439e31..b4afd078fe05 100644
--- a/drivers/cpufreq/exynos-cpufreq.c
+++ b/drivers/cpufreq/exynos-cpufreq.c
@@ -77,7 +77,7 @@ static int exynos_cpufreq_scale(unsigned int target_freq)
 	/*
 	 * The policy max have been changed so that we cannot get proper
 	 * old_index with cpufreq_frequency_table_target(). Thus, ignore
-	 * policy and get the index from the raw freqeuncy table.
+	 * policy and get the index from the raw frequency table.
 	 */
 	old_index = exynos_cpufreq_get_index(freqs.old);
 	if (old_index < 0) {
diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h
index 5f1ab92107e6..32acc0090ce6 100644
--- a/include/linux/devfreq.h
+++ b/include/linux/devfreq.h
@@ -168,7 +168,7 @@ struct devfreq {
 	unsigned long max_freq;
 	bool stop_polling;
 
-	/* information for device freqeuncy transition */
+	/* information for device frequency transition */
 	unsigned int total_trans;
 	unsigned int *trans_table;
 	unsigned long *time_in_state;
-- 
cgit v1.2.3


From 3f79410c7c9c8ef33ccff60c61e1f1166f5ed64a Mon Sep 17 00:00:00 2001
From: Maxime Jayat <maxime@artisandeveloppeur.fr>
Date: Sat, 12 Oct 2013 01:29:46 +0200
Subject: treewide: Fix common typo in "identify"

Correct common misspelling of "identify" as "indentify" throughout
the kernel

Signed-off-by: Maxime Jayat <maxime@artisandeveloppeur.fr>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 arch/mips/include/asm/octeon/cvmx-pip.h   |  4 ++--
 arch/x86/kernel/cpu/intel_cacheinfo.c     |  2 +-
 arch/x86/kernel/cpu/scattered.c           |  2 +-
 drivers/media/rc/keymaps/rc-dib0700-nec.c |  2 +-
 drivers/media/rc/keymaps/rc-dib0700-rc5.c |  2 +-
 drivers/net/irda/ali-ircc.c               |  2 +-
 drivers/net/irda/nsc-ircc.c               |  2 +-
 drivers/s390/scsi/zfcp_dbf.c              |  6 +++---
 drivers/scsi/bnx2i/bnx2i_hwi.c            | 16 ++++++++--------
 drivers/scsi/bnx2i/bnx2i_iscsi.c          | 14 +++++++-------
 drivers/scsi/ncr53c8xx.c                  |  2 +-
 drivers/scsi/sym53c8xx_2/sym_glue.h       |  2 +-
 drivers/staging/iio/adc/ad7606.h          |  2 +-
 include/linux/amba/serial.h               |  2 +-
 include/linux/mfd/si476x-core.h           |  2 +-
 include/linux/netdevice.h                 |  2 +-
 include/net/bluetooth/l2cap.h             |  2 +-
 net/can/af_can.c                          |  2 +-
 net/netfilter/xt_set.c                    |  4 ++--
 19 files changed, 36 insertions(+), 36 deletions(-)

(limited to 'include/linux')

diff --git a/arch/mips/include/asm/octeon/cvmx-pip.h b/arch/mips/include/asm/octeon/cvmx-pip.h
index a76fe5a57a9f..df69bfd2b006 100644
--- a/arch/mips/include/asm/octeon/cvmx-pip.h
+++ b/arch/mips/include/asm/octeon/cvmx-pip.h
@@ -192,13 +192,13 @@ typedef struct {
 	/* Number of packets processed by PIP */
 	uint32_t packets;
 	/*
-	 * Number of indentified L2 multicast packets.	Does not
+	 * Number of identified L2 multicast packets.	Does not
 	 * include broadcast packets.  Only includes packets whose
 	 * parse mode is SKIP_TO_L2
 	 */
 	uint32_t multicast_packets;
 	/*
-	 * Number of indentified L2 broadcast packets.	Does not
+	 * Number of identified L2 broadcast packets.	Does not
 	 * include multicast packets.  Only includes packets whose
 	 * parse mode is SKIP_TO_L2
 	 */
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 1414c90feaba..0641113e2965 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -1,5 +1,5 @@
 /*
- *	Routines to indentify caches on Intel CPU.
+ *	Routines to identify caches on Intel CPU.
  *
  *	Changes:
  *	Venkatesh Pallipadi	: Adding cache identification through cpuid(4)
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index f2cc63e9cf08..b6f794aa1693 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -1,5 +1,5 @@
 /*
- *	Routines to indentify additional cpu features that are scattered in
+ *	Routines to identify additional cpu features that are scattered in
  *	cpuid space.
  */
 #include <linux/cpu.h>
diff --git a/drivers/media/rc/keymaps/rc-dib0700-nec.c b/drivers/media/rc/keymaps/rc-dib0700-nec.c
index 4d13a7f2e5c3..492a05ade7e1 100644
--- a/drivers/media/rc/keymaps/rc-dib0700-nec.c
+++ b/drivers/media/rc/keymaps/rc-dib0700-nec.c
@@ -5,7 +5,7 @@
  * TODO: This table is a real mess, as it merges RC codes from several
  * devices into a big table. It also has both RC-5 and NEC codes inside.
  * It should be broken into small tables, and the protocols should properly
- * be indentificated.
+ * be identificated.
  *
  * The table were imported from dib0700_devices.c.
  *
diff --git a/drivers/media/rc/keymaps/rc-dib0700-rc5.c b/drivers/media/rc/keymaps/rc-dib0700-rc5.c
index ba81d9697cfc..454ea596a7ee 100644
--- a/drivers/media/rc/keymaps/rc-dib0700-rc5.c
+++ b/drivers/media/rc/keymaps/rc-dib0700-rc5.c
@@ -5,7 +5,7 @@
  * TODO: This table is a real mess, as it merges RC codes from several
  * devices into a big table. It also has both RC-5 and NEC codes inside.
  * It should be broken into small tables, and the protocols should properly
- * be indentificated.
+ * be identificated.
  *
  * The table were imported from dib0700_devices.c.
  *
diff --git a/drivers/net/irda/ali-ircc.c b/drivers/net/irda/ali-ircc.c
index 7bbd318bc93e..befa45f809c3 100644
--- a/drivers/net/irda/ali-ircc.c
+++ b/drivers/net/irda/ali-ircc.c
@@ -627,7 +627,7 @@ static int ali_ircc_setup(chipio_t *info)
 /*
  * Function ali_ircc_read_dongle_id (int index, info)
  *
- * Try to read dongle indentification. This procedure needs to be executed
+ * Try to read dongle identification. This procedure needs to be executed
  * once after power-on/reset. It also needs to be used whenever you suspect
  * that the user may have plugged/unplugged the IrDA Dongle.
  */
diff --git a/drivers/net/irda/nsc-ircc.c b/drivers/net/irda/nsc-ircc.c
index ceeb53737f86..66bc03bdb138 100644
--- a/drivers/net/irda/nsc-ircc.c
+++ b/drivers/net/irda/nsc-ircc.c
@@ -1035,7 +1035,7 @@ static int nsc_ircc_setup(chipio_t *info)
 /*
  * Function nsc_ircc_read_dongle_id (void)
  *
- * Try to read dongle indentification. This procedure needs to be executed
+ * Try to read dongle identification. This procedure needs to be executed
  * once after power-on/reset. It also needs to be used whenever you suspect
  * that the user may have plugged/unplugged the IrDA Dongle.
  */
diff --git a/drivers/s390/scsi/zfcp_dbf.c b/drivers/s390/scsi/zfcp_dbf.c
index 132a905b6bdb..0ca64484cfa3 100644
--- a/drivers/s390/scsi/zfcp_dbf.c
+++ b/drivers/s390/scsi/zfcp_dbf.c
@@ -344,7 +344,7 @@ void zfcp_dbf_san(char *tag, struct zfcp_dbf *dbf, void *data, u8 id, u16 len,
 
 /**
  * zfcp_dbf_san_req - trace event for issued SAN request
- * @tag: indentifier for event
+ * @tag: identifier for event
  * @fsf_req: request containing issued CT data
  * d_id: destination ID
  */
@@ -361,7 +361,7 @@ void zfcp_dbf_san_req(char *tag, struct zfcp_fsf_req *fsf, u32 d_id)
 
 /**
  * zfcp_dbf_san_res - trace event for received SAN request
- * @tag: indentifier for event
+ * @tag: identifier for event
  * @fsf_req: request containing issued CT data
  */
 void zfcp_dbf_san_res(char *tag, struct zfcp_fsf_req *fsf)
@@ -377,7 +377,7 @@ void zfcp_dbf_san_res(char *tag, struct zfcp_fsf_req *fsf)
 
 /**
  * zfcp_dbf_san_in_els - trace event for incoming ELS
- * @tag: indentifier for event
+ * @tag: identifier for event
  * @fsf_req: request containing issued CT data
  */
 void zfcp_dbf_san_in_els(char *tag, struct zfcp_fsf_req *fsf)
diff --git a/drivers/scsi/bnx2i/bnx2i_hwi.c b/drivers/scsi/bnx2i/bnx2i_hwi.c
index af3e675d4d48..886e2f9eb0ea 100644
--- a/drivers/scsi/bnx2i/bnx2i_hwi.c
+++ b/drivers/scsi/bnx2i/bnx2i_hwi.c
@@ -126,7 +126,7 @@ static void bnx2i_iscsi_license_error(struct bnx2i_hba *hba, u32 error_code)
 
 /**
  * bnx2i_arm_cq_event_coalescing - arms CQ to enable EQ notification
- * @ep:		endpoint (transport indentifier) structure
+ * @ep:		endpoint (transport identifier) structure
  * @action:	action, ARM or DISARM. For now only ARM_CQE is used
  *
  * Arm'ing CQ will enable chip to generate global EQ events inorder to interrupt
@@ -756,7 +756,7 @@ void bnx2i_send_cmd_cleanup_req(struct bnx2i_hba *hba, struct bnx2i_cmd *cmd)
 /**
  * bnx2i_send_conn_destroy - initiates iscsi connection teardown process
  * @hba:	adapter structure pointer
- * @ep:		endpoint (transport indentifier) structure
+ * @ep:		endpoint (transport identifier) structure
  *
  * this routine prepares and posts CONN_OFLD_REQ1/2 KWQE to initiate
  * 	iscsi connection context clean-up process
@@ -791,7 +791,7 @@ int bnx2i_send_conn_destroy(struct bnx2i_hba *hba, struct bnx2i_endpoint *ep)
 /**
  * bnx2i_570x_send_conn_ofld_req - initiates iscsi conn context setup process
  * @hba: 		adapter structure pointer
- * @ep: 		endpoint (transport indentifier) structure
+ * @ep: 		endpoint (transport identifier) structure
  *
  * 5706/5708/5709 specific - prepares and posts CONN_OFLD_REQ1/2 KWQE
  */
@@ -851,7 +851,7 @@ static int bnx2i_570x_send_conn_ofld_req(struct bnx2i_hba *hba,
 /**
  * bnx2i_5771x_send_conn_ofld_req - initiates iscsi connection context creation
  * @hba: 		adapter structure pointer
- * @ep: 		endpoint (transport indentifier) structure
+ * @ep: 		endpoint (transport identifier) structure
  *
  * 57710 specific - prepares and posts CONN_OFLD_REQ1/2 KWQE
  */
@@ -920,7 +920,7 @@ static int bnx2i_5771x_send_conn_ofld_req(struct bnx2i_hba *hba,
  * bnx2i_send_conn_ofld_req - initiates iscsi connection context setup process
  *
  * @hba: 		adapter structure pointer
- * @ep: 		endpoint (transport indentifier) structure
+ * @ep: 		endpoint (transport identifier) structure
  *
  * this routine prepares and posts CONN_OFLD_REQ1/2 KWQE
  */
@@ -939,7 +939,7 @@ int bnx2i_send_conn_ofld_req(struct bnx2i_hba *hba, struct bnx2i_endpoint *ep)
 
 /**
  * setup_qp_page_tables - iscsi QP page table setup function
- * @ep:		endpoint (transport indentifier) structure
+ * @ep:		endpoint (transport identifier) structure
  *
  * Sets up page tables for SQ/RQ/CQ, 1G/sec (5706/5708/5709) devices requires
  * 	64-bit address in big endian format. Whereas 10G/sec (57710) requires
@@ -1046,7 +1046,7 @@ static void setup_qp_page_tables(struct bnx2i_endpoint *ep)
 /**
  * bnx2i_alloc_qp_resc - allocates required resources for QP.
  * @hba:	adapter structure pointer
- * @ep:		endpoint (transport indentifier) structure
+ * @ep:		endpoint (transport identifier) structure
  *
  * Allocate QP (transport layer for iSCSI connection) resources, DMA'able
  *	memory for SQ/RQ/CQ and page tables. EP structure elements such
@@ -1191,7 +1191,7 @@ mem_alloc_err:
 /**
  * bnx2i_free_qp_resc - free memory resources held by QP
  * @hba:	adapter structure pointer
- * @ep:	endpoint (transport indentifier) structure
+ * @ep:	endpoint (transport identifier) structure
  *
  * Free QP resources - SQ/RQ/CQ memory and page tables.
  */
diff --git a/drivers/scsi/bnx2i/bnx2i_iscsi.c b/drivers/scsi/bnx2i/bnx2i_iscsi.c
index fabeb88602ac..854dad7d5b03 100644
--- a/drivers/scsi/bnx2i/bnx2i_iscsi.c
+++ b/drivers/scsi/bnx2i/bnx2i_iscsi.c
@@ -596,7 +596,7 @@ void bnx2i_drop_session(struct iscsi_cls_session *cls_session)
 /**
  * bnx2i_ep_destroy_list_add - add an entry to EP destroy list
  * @hba:	pointer to adapter instance
- * @ep:		pointer to endpoint (transport indentifier) structure
+ * @ep:		pointer to endpoint (transport identifier) structure
  *
  * EP destroy queue manager
  */
@@ -613,7 +613,7 @@ static int bnx2i_ep_destroy_list_add(struct bnx2i_hba *hba,
  * bnx2i_ep_destroy_list_del - add an entry to EP destroy list
  *
  * @hba: 		pointer to adapter instance
- * @ep: 		pointer to endpoint (transport indentifier) structure
+ * @ep: 		pointer to endpoint (transport identifier) structure
  *
  * EP destroy queue manager
  */
@@ -630,7 +630,7 @@ static int bnx2i_ep_destroy_list_del(struct bnx2i_hba *hba,
 /**
  * bnx2i_ep_ofld_list_add - add an entry to ep offload pending list
  * @hba:	pointer to adapter instance
- * @ep:		pointer to endpoint (transport indentifier) structure
+ * @ep:		pointer to endpoint (transport identifier) structure
  *
  * pending conn offload completion queue manager
  */
@@ -646,7 +646,7 @@ static int bnx2i_ep_ofld_list_add(struct bnx2i_hba *hba,
 /**
  * bnx2i_ep_ofld_list_del - add an entry to ep offload pending list
  * @hba: 		pointer to adapter instance
- * @ep: 		pointer to endpoint (transport indentifier) structure
+ * @ep: 		pointer to endpoint (transport identifier) structure
  *
  * pending conn offload completion queue manager
  */
@@ -721,7 +721,7 @@ bnx2i_find_ep_in_destroy_list(struct bnx2i_hba *hba, u32 iscsi_cid)
 /**
  * bnx2i_ep_active_list_add - add an entry to ep active list
  * @hba:	pointer to adapter instance
- * @ep:		pointer to endpoint (transport indentifier) structure
+ * @ep:		pointer to endpoint (transport identifier) structure
  *
  * current active conn queue manager
  */
@@ -737,7 +737,7 @@ static void bnx2i_ep_active_list_add(struct bnx2i_hba *hba,
 /**
  * bnx2i_ep_active_list_del - deletes an entry to ep active list
  * @hba:	pointer to adapter instance
- * @ep:		pointer to endpoint (transport indentifier) structure
+ * @ep:		pointer to endpoint (transport identifier) structure
  *
  * current active conn queue manager
  */
@@ -1695,7 +1695,7 @@ no_nx2_route:
 /**
  * bnx2i_tear_down_conn - tear down iscsi/tcp connection and free resources
  * @hba:	pointer to adapter instance
- * @ep:		endpoint (transport indentifier) structure
+ * @ep:		endpoint (transport identifier) structure
  *
  * destroys cm_sock structure and on chip iscsi context
  */
diff --git a/drivers/scsi/ncr53c8xx.c b/drivers/scsi/ncr53c8xx.c
index 5982a587babc..7d014b11df62 100644
--- a/drivers/scsi/ncr53c8xx.c
+++ b/drivers/scsi/ncr53c8xx.c
@@ -1615,7 +1615,7 @@ struct ncb {
 	spinlock_t	smp_lock;	/* Lock for SMP threading       */
 
 	/*----------------------------------------------------------------
-	**	Chip and controller indentification.
+	**	Chip and controller identification.
 	**----------------------------------------------------------------
 	*/
 	int		unit;		/* Unit number			*/
diff --git a/drivers/scsi/sym53c8xx_2/sym_glue.h b/drivers/scsi/sym53c8xx_2/sym_glue.h
index b80bf709f104..805369521df8 100644
--- a/drivers/scsi/sym53c8xx_2/sym_glue.h
+++ b/drivers/scsi/sym53c8xx_2/sym_glue.h
@@ -174,7 +174,7 @@ struct sym_slcb {
  */
 struct sym_shcb {
 	/*
-	 *  Chip and controller indentification.
+	 *  Chip and controller identification.
 	 */
 	int		unit;
 	char		inst_name[16];
diff --git a/drivers/staging/iio/adc/ad7606.h b/drivers/staging/iio/adc/ad7606.h
index 9221a74efd18..93c7299e8353 100644
--- a/drivers/staging/iio/adc/ad7606.h
+++ b/drivers/staging/iio/adc/ad7606.h
@@ -42,7 +42,7 @@ struct ad7606_platform_data {
 
 /**
  * struct ad7606_chip_info - chip specifc information
- * @name:		indentification string for chip
+ * @name:		identification string for chip
  * @int_vref_mv:	the internal reference voltage
  * @channels:		channel specification
  * @num_channels:	number of channels
diff --git a/include/linux/amba/serial.h b/include/linux/amba/serial.h
index 62d9303c2837..0ddb5c02ad8b 100644
--- a/include/linux/amba/serial.h
+++ b/include/linux/amba/serial.h
@@ -40,7 +40,7 @@
 #define UART010_LCRL		0x10	/* Line control register, low byte. */
 #define UART010_CR		0x14	/* Control register. */
 #define UART01x_FR		0x18	/* Flag register (Read only). */
-#define UART010_IIR		0x1C	/* Interrupt indentification register (Read). */
+#define UART010_IIR		0x1C	/* Interrupt identification register (Read). */
 #define UART010_ICR		0x1C	/* Interrupt clear register (Write). */
 #define ST_UART011_LCRH_RX	0x1C    /* Rx line control register. */
 #define UART01x_ILPR		0x20	/* IrDA low power counter register. */
diff --git a/include/linux/mfd/si476x-core.h b/include/linux/mfd/si476x-core.h
index ba89b94e4a56..674b45d5a757 100644
--- a/include/linux/mfd/si476x-core.h
+++ b/include/linux/mfd/si476x-core.h
@@ -316,7 +316,7 @@ enum si476x_smoothmetrics {
  * response to 'FM_RD_STATUS' command
  * @rdstpptyint: Traffic program flag(TP) and/or program type(PTY)
  * code has changed.
- * @rdspiint: Program indentifiaction(PI) code has changed.
+ * @rdspiint: Program identification(PI) code has changed.
  * @rdssyncint: RDS synchronization has changed.
  * @rdsfifoint: RDS was received and the RDS FIFO has at least
  * 'FM_RDS_INTERRUPT_FIFO_COUNT' elements in it.
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 8ed4ae943053..9ff50c9a0009 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1546,7 +1546,7 @@ static inline void *netdev_priv(const struct net_device *dev)
 #define SET_NETDEV_DEV(net, pdev)	((net)->dev.parent = (pdev))
 
 /* Set the sysfs device type for the network logical device to allow
- * fin grained indentification of different network device types. For
+ * fine-grained identification of different network device types. For
  * example Ethernet, Wirelss LAN, Bluetooth, WiMAX etc.
  */
 #define SET_NETDEV_DEVTYPE(net, devtype)	((net)->dev.type = (devtype))
diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index 1a966afbbfa8..7170b4b434e1 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -238,7 +238,7 @@ struct l2cap_conn_rsp {
 #define L2CAP_PSM_SDP		0x0001
 #define L2CAP_PSM_RFCOMM	0x0003
 
-/* channel indentifier */
+/* channel identifier */
 #define L2CAP_CID_SIGNALING	0x0001
 #define L2CAP_CID_CONN_LESS	0x0002
 #define L2CAP_CID_A2MP		0x0003
diff --git a/net/can/af_can.c b/net/can/af_can.c
index 3ab8dd2e1282..d249874a366d 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -420,7 +420,7 @@ static struct hlist_head *find_rcv_list(canid_t *can_id, canid_t *mask,
  * @mask: CAN mask (see description)
  * @func: callback function on filter match
  * @data: returned parameter for callback function
- * @ident: string for calling module indentification
+ * @ident: string for calling module identification
  *
  * Description:
  *  Invokes the callback function with the received sk_buff and the given
diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c
index 31790e789e22..4b9d6b4f1eb0 100644
--- a/net/netfilter/xt_set.c
+++ b/net/netfilter/xt_set.c
@@ -84,7 +84,7 @@ set_match_v0_checkentry(const struct xt_mtchk_param *par)
 	index = ip_set_nfnl_get_byindex(info->match_set.index);
 
 	if (index == IPSET_INVALID_ID) {
-		pr_warning("Cannot find set indentified by id %u to match\n",
+		pr_warning("Cannot find set identified by id %u to match\n",
 			   info->match_set.index);
 		return -ENOENT;
 	}
@@ -205,7 +205,7 @@ set_match_v1_checkentry(const struct xt_mtchk_param *par)
 	index = ip_set_nfnl_get_byindex(info->match_set.index);
 
 	if (index == IPSET_INVALID_ID) {
-		pr_warning("Cannot find set indentified by id %u to match\n",
+		pr_warning("Cannot find set identified by id %u to match\n",
 			   info->match_set.index);
 		return -ENOENT;
 	}
-- 
cgit v1.2.3


From e33fabd365596178e72f62bb4b89f0aaad0509ad Mon Sep 17 00:00:00 2001
From: Anthony Olech <anthony.olech.opensource@diasemi.com>
Date: Fri, 11 Oct 2013 15:31:11 +0100
Subject: regmap: new API regmap_multi_reg_write() definition

New API regmap_multi_reg_write() is defined that allows a set of reg,val
pairs to be written to a I2C client device as one block transfer from the
point of view of a single I2C master system.

A simple demonstration implementation is included that just splits the
block write request into a sequence of single register writes.

The implementation will be modified later to support those I2C clients
that implement the alternative non-standard MULTIWRITE block write mode
so to achieve a single I2C transfer that will be atomic even in multiple
I2C master systems.

Signed-off-by: Anthony Olech <anthony.olech.opensource@diasemi.com>
Signed-off-by: David Dajun Chen <david.chen@diasemi.com>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 drivers/base/regmap/regmap.c | 41 +++++++++++++++++++++++++++++++++++++++++
 include/linux/regmap.h       |  2 ++
 2 files changed, 43 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index 7d689a15c500..c42ad69e527f 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -1439,6 +1439,47 @@ out:
 }
 EXPORT_SYMBOL_GPL(regmap_bulk_write);
 
+/*
+ * regmap_multi_reg_write(): Write multiple registers to the device
+ *
+ * where the set of register are supplied in any order
+ *
+ * @map: Register map to write to
+ * @regs: Array of structures containing register,value to be written
+ * @num_regs: Number of registers to write
+ *
+ * This function is intended to be used for writing a large block of data
+ * atomically to the device in single transfer for those I2C client devices
+ * that implement this alternative block write mode.
+ *
+ * A value of zero will be returned on success, a negative errno will
+ * be returned in error cases.
+ */
+int regmap_multi_reg_write(struct regmap *map, struct reg_default *regs,
+				int num_regs)
+{
+	int ret = 0, i;
+
+	for (i = 0; i < num_regs; i++) {
+		int reg = regs[i].reg;
+		if (reg % map->reg_stride)
+			return -EINVAL;
+	}
+
+	map->lock(map->lock_arg);
+
+	for (i = 0; i < num_regs; i++) {
+		ret = _regmap_write(map, regs[i].reg, regs[i].def);
+		if (ret != 0)
+			goto out;
+	}
+out:
+	map->unlock(map->lock_arg);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(regmap_multi_reg_write);
+
 /**
  * regmap_raw_write_async(): Write raw values to one or more registers
  *                           asynchronously
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index a10380bfbeac..4b933a31f84f 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -378,6 +378,8 @@ int regmap_raw_write(struct regmap *map, unsigned int reg,
 		     const void *val, size_t val_len);
 int regmap_bulk_write(struct regmap *map, unsigned int reg, const void *val,
 			size_t val_count);
+int regmap_multi_reg_write(struct regmap *map, struct reg_default *regs,
+			int num_regs);
 int regmap_raw_write_async(struct regmap *map, unsigned int reg,
 			   const void *val, size_t val_len);
 int regmap_read(struct regmap *map, unsigned int reg, unsigned int *val);
-- 
cgit v1.2.3


From 96518518cc417bb0a8c80b9fb736202e28acdf96 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 14 Oct 2013 11:00:02 +0200
Subject: netfilter: add nftables

This patch adds nftables which is the intended successor of iptables.
This packet filtering framework reuses the existing netfilter hooks,
the connection tracking system, the NAT subsystem, the transparent
proxying engine, the logging infrastructure and the userspace packet
queueing facilities.

In a nutshell, nftables provides a pseudo-state machine with 4 general
purpose registers of 128 bits and 1 specific purpose register to store
verdicts. This pseudo-machine comes with an extensible instruction set,
a.k.a. "expressions" in the nftables jargon. The expressions included
in this patch provide the basic functionality, they are:

* bitwise: to perform bitwise operations.
* byteorder: to change from host/network endianess.
* cmp: to compare data with the content of the registers.
* counter: to enable counters on rules.
* ct: to store conntrack keys into register.
* exthdr: to match IPv6 extension headers.
* immediate: to load data into registers.
* limit: to limit matching based on packet rate.
* log: to log packets.
* meta: to match metainformation that usually comes with the skbuff.
* nat: to perform Network Address Translation.
* payload: to fetch data from the packet payload and store it into
  registers.
* reject (IPv4 only): to explicitly close connection, eg. TCP RST.

Using this instruction-set, the userspace utility 'nft' can transform
the rules expressed in human-readable text representation (using a
new syntax, inspired by tcpdump) to nftables bytecode.

nftables also inherits the table, chain and rule objects from
iptables, but in a more configurable way, and it also includes the
original datatype-agnostic set infrastructure with mapping support.
This set infrastructure is enhanced in the follow up patch (netfilter:
nf_tables: add netlink set API).

This patch includes the following components:

* the netlink API: net/netfilter/nf_tables_api.c and
  include/uapi/netfilter/nf_tables.h
* the packet filter core: net/netfilter/nf_tables_core.c
* the expressions (described above): net/netfilter/nft_*.c
* the filter tables: arp, IPv4, IPv6 and bridge:
  net/ipv4/netfilter/nf_tables_ipv4.c
  net/ipv6/netfilter/nf_tables_ipv6.c
  net/ipv4/netfilter/nf_tables_arp.c
  net/bridge/netfilter/nf_tables_bridge.c
* the NAT table (IPv4 only):
  net/ipv4/netfilter/nf_table_nat_ipv4.c
* the route table (similar to mangle):
  net/ipv4/netfilter/nf_table_route_ipv4.c
  net/ipv6/netfilter/nf_table_route_ipv6.c
* internal definitions under:
  include/net/netfilter/nf_tables.h
  include/net/netfilter/nf_tables_core.h
* It also includes an skeleton expression:
  net/netfilter/nft_expr_template.c
  and the preliminary implementation of the meta target
  net/netfilter/nft_meta_target.c

It also includes a change in struct nf_hook_ops to add a new
pointer to store private data to the hook, that is used to store
the rule list per chain.

This patch is based on the patch from Patrick McHardy, plus merged
accumulated cleanups, fixes and small enhancements to the nftables
code that has been done since 2009, which are:

From Patrick McHardy:
* nf_tables: adjust netlink handler function signatures
* nf_tables: only retry table lookup after successful table module load
* nf_tables: fix event notification echo and avoid unnecessary messages
* nft_ct: add l3proto support
* nf_tables: pass expression context to nft_validate_data_load()
* nf_tables: remove redundant definition
* nft_ct: fix maxattr initialization
* nf_tables: fix invalid event type in nf_tables_getrule()
* nf_tables: simplify nft_data_init() usage
* nf_tables: build in more core modules
* nf_tables: fix double lookup expression unregistation
* nf_tables: move expression initialization to nf_tables_core.c
* nf_tables: build in payload module
* nf_tables: use NFPROTO constants
* nf_tables: rename pid variables to portid
* nf_tables: save 48 bits per rule
* nf_tables: introduce chain rename
* nf_tables: check for duplicate names on chain rename
* nf_tables: remove ability to specify handles for new rules
* nf_tables: return error for rule change request
* nf_tables: return error for NLM_F_REPLACE without rule handle
* nf_tables: include NLM_F_APPEND/NLM_F_REPLACE flags in rule notification
* nf_tables: fix NLM_F_MULTI usage in netlink notifications
* nf_tables: include NLM_F_APPEND in rule dumps

From Pablo Neira Ayuso:
* nf_tables: fix stack overflow in nf_tables_newrule
* nf_tables: nft_ct: fix compilation warning
* nf_tables: nft_ct: fix crash with invalid packets
* nft_log: group and qthreshold are 2^16
* nf_tables: nft_meta: fix socket uid,gid handling
* nft_counter: allow to restore counters
* nf_tables: fix module autoload
* nf_tables: allow to remove all rules placed in one chain
* nf_tables: use 64-bits rule handle instead of 16-bits
* nf_tables: fix chain after rule deletion
* nf_tables: improve deletion performance
* nf_tables: add missing code in route chain type
* nf_tables: rise maximum number of expressions from 12 to 128
* nf_tables: don't delete table if in use
* nf_tables: fix basechain release

From Tomasz Bursztyka:
* nf_tables: Add support for changing users chain's name
* nf_tables: Change chain's name to be fixed sized
* nf_tables: Add support for replacing a rule by another one
* nf_tables: Update uapi nftables netlink header documentation

From Florian Westphal:
* nft_log: group is u16, snaplen u32

From Phil Oester:
* nf_tables: operational limit match

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter.h                          |   11 +-
 include/net/netfilter/nf_tables.h                  |  301 ++++
 include/net/netfilter/nf_tables_core.h             |   25 +
 include/uapi/linux/netfilter/Kbuild                |    1 +
 include/uapi/linux/netfilter/nf_conntrack_common.h |    4 +
 include/uapi/linux/netfilter/nf_tables.h           |  582 +++++++
 include/uapi/linux/netfilter/nfnetlink.h           |    5 +-
 net/bridge/netfilter/Kconfig                       |    3 +
 net/bridge/netfilter/Makefile                      |    2 +
 net/bridge/netfilter/nf_tables_bridge.c            |   37 +
 net/ipv4/netfilter/Kconfig                         |   16 +
 net/ipv4/netfilter/Makefile                        |    5 +
 net/ipv4/netfilter/nf_table_nat_ipv4.c             |  409 +++++
 net/ipv4/netfilter/nf_table_route_ipv4.c           |   97 ++
 net/ipv4/netfilter/nf_tables_ipv4.c                |   59 +
 net/ipv4/netfilter/nft_reject_ipv4.c               |  117 ++
 net/ipv6/netfilter/Kconfig                         |    8 +
 net/ipv6/netfilter/Makefile                        |    4 +
 net/ipv6/netfilter/nf_table_route_ipv6.c           |   93 ++
 net/ipv6/netfilter/nf_tables_ipv6.c                |   57 +
 net/netfilter/Kconfig                              |   37 +
 net/netfilter/Makefile                             |   16 +
 net/netfilter/nf_tables_api.c                      | 1760 ++++++++++++++++++++
 net/netfilter/nf_tables_core.c                     |  152 ++
 net/netfilter/nft_bitwise.c                        |  140 ++
 net/netfilter/nft_byteorder.c                      |  167 ++
 net/netfilter/nft_cmp.c                            |  146 ++
 net/netfilter/nft_counter.c                        |  107 ++
 net/netfilter/nft_ct.c                             |  252 +++
 net/netfilter/nft_expr_template.c                  |   88 +
 net/netfilter/nft_exthdr.c                         |  127 ++
 net/netfilter/nft_hash.c                           |  348 ++++
 net/netfilter/nft_immediate.c                      |  113 ++
 net/netfilter/nft_limit.c                          |  113 ++
 net/netfilter/nft_log.c                            |  140 ++
 net/netfilter/nft_meta.c                           |  222 +++
 net/netfilter/nft_meta_target.c                    |  117 ++
 net/netfilter/nft_payload.c                        |  137 ++
 net/netfilter/nft_set.c                            |  381 +++++
 39 files changed, 6393 insertions(+), 6 deletions(-)
 create mode 100644 include/net/netfilter/nf_tables.h
 create mode 100644 include/net/netfilter/nf_tables_core.h
 create mode 100644 include/uapi/linux/netfilter/nf_tables.h
 create mode 100644 net/bridge/netfilter/nf_tables_bridge.c
 create mode 100644 net/ipv4/netfilter/nf_table_nat_ipv4.c
 create mode 100644 net/ipv4/netfilter/nf_table_route_ipv4.c
 create mode 100644 net/ipv4/netfilter/nf_tables_ipv4.c
 create mode 100644 net/ipv4/netfilter/nft_reject_ipv4.c
 create mode 100644 net/ipv6/netfilter/nf_table_route_ipv6.c
 create mode 100644 net/ipv6/netfilter/nf_tables_ipv6.c
 create mode 100644 net/netfilter/nf_tables_api.c
 create mode 100644 net/netfilter/nf_tables_core.c
 create mode 100644 net/netfilter/nft_bitwise.c
 create mode 100644 net/netfilter/nft_byteorder.c
 create mode 100644 net/netfilter/nft_cmp.c
 create mode 100644 net/netfilter/nft_counter.c
 create mode 100644 net/netfilter/nft_ct.c
 create mode 100644 net/netfilter/nft_expr_template.c
 create mode 100644 net/netfilter/nft_exthdr.c
 create mode 100644 net/netfilter/nft_hash.c
 create mode 100644 net/netfilter/nft_immediate.c
 create mode 100644 net/netfilter/nft_limit.c
 create mode 100644 net/netfilter/nft_log.c
 create mode 100644 net/netfilter/nft_meta.c
 create mode 100644 net/netfilter/nft_meta_target.c
 create mode 100644 net/netfilter/nft_payload.c
 create mode 100644 net/netfilter/nft_set.c

(limited to 'include/linux')

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index fef7e67f7101..2077489f9887 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -53,12 +53,13 @@ struct nf_hook_ops {
 	struct list_head list;
 
 	/* User fills in from here down. */
-	nf_hookfn *hook;
-	struct module *owner;
-	u_int8_t pf;
-	unsigned int hooknum;
+	nf_hookfn	*hook;
+	struct module	*owner;
+	void		*priv;
+	u_int8_t	pf;
+	unsigned int	hooknum;
 	/* Hooks are ordered in ascending priority. */
-	int priority;
+	int		priority;
 };
 
 struct nf_sockopt_ops {
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
new file mode 100644
index 000000000000..d26dfa345f49
--- /dev/null
+++ b/include/net/netfilter/nf_tables.h
@@ -0,0 +1,301 @@
+#ifndef _NET_NF_TABLES_H
+#define _NET_NF_TABLES_H
+
+#include <linux/list.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netlink.h>
+
+struct nft_pktinfo {
+	struct sk_buff			*skb;
+	const struct net_device		*in;
+	const struct net_device		*out;
+	u8				hooknum;
+	u8				nhoff;
+	u8				thoff;
+};
+
+struct nft_data {
+	union {
+		u32				data[4];
+		struct {
+			u32			verdict;
+			struct nft_chain	*chain;
+		};
+	};
+} __attribute__((aligned(__alignof__(u64))));
+
+static inline int nft_data_cmp(const struct nft_data *d1,
+			       const struct nft_data *d2,
+			       unsigned int len)
+{
+	return memcmp(d1->data, d2->data, len);
+}
+
+static inline void nft_data_copy(struct nft_data *dst,
+				 const struct nft_data *src)
+{
+	BUILD_BUG_ON(__alignof__(*dst) != __alignof__(u64));
+	*(u64 *)&dst->data[0] = *(u64 *)&src->data[0];
+	*(u64 *)&dst->data[2] = *(u64 *)&src->data[2];
+}
+
+static inline void nft_data_debug(const struct nft_data *data)
+{
+	pr_debug("data[0]=%x data[1]=%x data[2]=%x data[3]=%x\n",
+		 data->data[0], data->data[1],
+		 data->data[2], data->data[3]);
+}
+
+/**
+ *	struct nft_ctx - nf_tables rule context
+ *
+ * 	@afi: address family info
+ * 	@table: the table the chain is contained in
+ * 	@chain: the chain the rule is contained in
+ */
+struct nft_ctx {
+	const struct nft_af_info	*afi;
+	const struct nft_table		*table;
+	const struct nft_chain		*chain;
+};
+
+enum nft_data_types {
+	NFT_DATA_VALUE,
+	NFT_DATA_VERDICT,
+};
+
+struct nft_data_desc {
+	enum nft_data_types		type;
+	unsigned int			len;
+};
+
+extern int nft_data_init(const struct nft_ctx *ctx, struct nft_data *data,
+			 struct nft_data_desc *desc, const struct nlattr *nla);
+extern void nft_data_uninit(const struct nft_data *data,
+			    enum nft_data_types type);
+extern int nft_data_dump(struct sk_buff *skb, int attr,
+			 const struct nft_data *data,
+			 enum nft_data_types type, unsigned int len);
+
+static inline enum nft_data_types nft_dreg_to_type(enum nft_registers reg)
+{
+	return reg == NFT_REG_VERDICT ? NFT_DATA_VERDICT : NFT_DATA_VALUE;
+}
+
+extern int nft_validate_input_register(enum nft_registers reg);
+extern int nft_validate_output_register(enum nft_registers reg);
+extern int nft_validate_data_load(const struct nft_ctx *ctx,
+				  enum nft_registers reg,
+				  const struct nft_data *data,
+				  enum nft_data_types type);
+
+/**
+ *	struct nft_expr_ops - nf_tables expression operations
+ *
+ *	@eval: Expression evaluation function
+ *	@init: initialization function
+ *	@destroy: destruction function
+ *	@dump: function to dump parameters
+ *	@list: used internally
+ *	@name: Identifier
+ *	@owner: module reference
+ *	@policy: netlink attribute policy
+ *	@maxattr: highest netlink attribute number
+ *	@size: full expression size, including private data size
+ */
+struct nft_expr;
+struct nft_expr_ops {
+	void				(*eval)(const struct nft_expr *expr,
+						struct nft_data data[NFT_REG_MAX + 1],
+						const struct nft_pktinfo *pkt);
+	int				(*init)(const struct nft_ctx *ctx,
+						const struct nft_expr *expr,
+						const struct nlattr * const tb[]);
+	void				(*destroy)(const struct nft_expr *expr);
+	int				(*dump)(struct sk_buff *skb,
+						const struct nft_expr *expr);
+
+	struct list_head		list;
+	const char			*name;
+	struct module			*owner;
+	const struct nla_policy		*policy;
+	unsigned int			maxattr;
+	unsigned int			size;
+};
+
+#define NFT_EXPR_SIZE(size)		(sizeof(struct nft_expr) + \
+					 ALIGN(size, __alignof__(struct nft_expr)))
+
+/**
+ *	struct nft_expr - nf_tables expression
+ *
+ *	@ops: expression ops
+ *	@data: expression private data
+ */
+struct nft_expr {
+	const struct nft_expr_ops	*ops;
+	unsigned char			data[];
+};
+
+static inline void *nft_expr_priv(const struct nft_expr *expr)
+{
+	return (void *)expr->data;
+}
+
+/**
+ *	struct nft_rule - nf_tables rule
+ *
+ *	@list: used internally
+ *	@rcu_head: used internally for rcu
+ *	@handle: rule handle
+ *	@dlen: length of expression data
+ *	@data: expression data
+ */
+struct nft_rule {
+	struct list_head		list;
+	struct rcu_head			rcu_head;
+	u64				handle:48,
+					dlen:16;
+	unsigned char			data[]
+		__attribute__((aligned(__alignof__(struct nft_expr))));
+};
+
+static inline struct nft_expr *nft_expr_first(const struct nft_rule *rule)
+{
+	return (struct nft_expr *)&rule->data[0];
+}
+
+static inline struct nft_expr *nft_expr_next(const struct nft_expr *expr)
+{
+	return ((void *)expr) + expr->ops->size;
+}
+
+static inline struct nft_expr *nft_expr_last(const struct nft_rule *rule)
+{
+	return (struct nft_expr *)&rule->data[rule->dlen];
+}
+
+/*
+ * The last pointer isn't really necessary, but the compiler isn't able to
+ * determine that the result of nft_expr_last() is always the same since it
+ * can't assume that the dlen value wasn't changed within calls in the loop.
+ */
+#define nft_rule_for_each_expr(expr, last, rule) \
+	for ((expr) = nft_expr_first(rule), (last) = nft_expr_last(rule); \
+	     (expr) != (last); \
+	     (expr) = nft_expr_next(expr))
+
+enum nft_chain_flags {
+	NFT_BASE_CHAIN			= 0x1,
+	NFT_CHAIN_BUILTIN		= 0x2,
+};
+
+/**
+ *	struct nft_chain - nf_tables chain
+ *
+ *	@rules: list of rules in the chain
+ *	@list: used internally
+ *	@rcu_head: used internally
+ *	@handle: chain handle
+ *	@flags: bitmask of enum nft_chain_flags
+ *	@use: number of jump references to this chain
+ *	@level: length of longest path to this chain
+ *	@name: name of the chain
+ */
+struct nft_chain {
+	struct list_head		rules;
+	struct list_head		list;
+	struct rcu_head			rcu_head;
+	u64				handle;
+	u8				flags;
+	u16				use;
+	u16				level;
+	char				name[NFT_CHAIN_MAXNAMELEN];
+};
+
+/**
+ *	struct nft_base_chain - nf_tables base chain
+ *
+ *	@ops: netfilter hook ops
+ *	@chain: the chain
+ */
+struct nft_base_chain {
+	struct nf_hook_ops		ops;
+	struct nft_chain		chain;
+};
+
+static inline struct nft_base_chain *nft_base_chain(const struct nft_chain *chain)
+{
+	return container_of(chain, struct nft_base_chain, chain);
+}
+
+extern unsigned int nft_do_chain(const struct nf_hook_ops *ops,
+				 struct sk_buff *skb,
+				 const struct net_device *in,
+				 const struct net_device *out,
+				 int (*okfn)(struct sk_buff *));
+
+enum nft_table_flags {
+	NFT_TABLE_BUILTIN		= 0x1,
+};
+
+/**
+ *	struct nft_table - nf_tables table
+ *
+ *	@list: used internally
+ *	@chains: chains in the table
+ *	@sets: sets in the table
+ *	@hgenerator: handle generator state
+ *	@use: number of chain references to this table
+ *	@flags: table flag (see enum nft_table_flags)
+ *	@name: name of the table
+ */
+struct nft_table {
+	struct list_head		list;
+	struct list_head		chains;
+	struct list_head		sets;
+	u64				hgenerator;
+	u32				use;
+	u16				flags;
+	char				name[];
+};
+
+/**
+ *	struct nft_af_info - nf_tables address family info
+ *
+ *	@list: used internally
+ *	@family: address family
+ *	@nhooks: number of hooks in this family
+ *	@owner: module owner
+ *	@tables: used internally
+ *	@hooks: hookfn overrides for packet validation
+ */
+struct nft_af_info {
+	struct list_head		list;
+	int				family;
+	unsigned int			nhooks;
+	struct module			*owner;
+	struct list_head		tables;
+	nf_hookfn			*hooks[NF_MAX_HOOKS];
+};
+
+extern int nft_register_afinfo(struct nft_af_info *);
+extern void nft_unregister_afinfo(struct nft_af_info *);
+
+extern int nft_register_table(struct nft_table *, int family);
+extern void nft_unregister_table(struct nft_table *, int family);
+
+extern int nft_register_expr(struct nft_expr_ops *);
+extern void nft_unregister_expr(struct nft_expr_ops *);
+
+#define MODULE_ALIAS_NFT_FAMILY(family)	\
+	MODULE_ALIAS("nft-afinfo-" __stringify(family))
+
+#define MODULE_ALIAS_NFT_TABLE(family, name) \
+	MODULE_ALIAS("nft-table-" __stringify(family) "-" name)
+
+#define MODULE_ALIAS_NFT_EXPR(name) \
+	MODULE_ALIAS("nft-expr-" name)
+
+#endif /* _NET_NF_TABLES_H */
diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h
new file mode 100644
index 000000000000..283396c916e0
--- /dev/null
+++ b/include/net/netfilter/nf_tables_core.h
@@ -0,0 +1,25 @@
+#ifndef _NET_NF_TABLES_CORE_H
+#define _NET_NF_TABLES_CORE_H
+
+extern int nf_tables_core_module_init(void);
+extern void nf_tables_core_module_exit(void);
+
+extern int nft_immediate_module_init(void);
+extern void nft_immediate_module_exit(void);
+
+extern int nft_cmp_module_init(void);
+extern void nft_cmp_module_exit(void);
+
+extern int nft_lookup_module_init(void);
+extern void nft_lookup_module_exit(void);
+
+extern int nft_bitwise_module_init(void);
+extern void nft_bitwise_module_exit(void);
+
+extern int nft_byteorder_module_init(void);
+extern void nft_byteorder_module_exit(void);
+
+extern int nft_payload_module_init(void);
+extern void nft_payload_module_exit(void);
+
+#endif /* _NET_NF_TABLES_CORE_H */
diff --git a/include/uapi/linux/netfilter/Kbuild b/include/uapi/linux/netfilter/Kbuild
index 174915420d3f..6ce0b7f566a7 100644
--- a/include/uapi/linux/netfilter/Kbuild
+++ b/include/uapi/linux/netfilter/Kbuild
@@ -5,6 +5,7 @@ header-y += nf_conntrack_ftp.h
 header-y += nf_conntrack_sctp.h
 header-y += nf_conntrack_tcp.h
 header-y += nf_conntrack_tuple_common.h
+header-y += nf_tables.h
 header-y += nf_nat.h
 header-y += nfnetlink.h
 header-y += nfnetlink_acct.h
diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h
index 8dd803818ebe..319f47128db8 100644
--- a/include/uapi/linux/netfilter/nf_conntrack_common.h
+++ b/include/uapi/linux/netfilter/nf_conntrack_common.h
@@ -25,6 +25,10 @@ enum ip_conntrack_info {
 	IP_CT_NUMBER = IP_CT_IS_REPLY * 2 - 1
 };
 
+#define NF_CT_STATE_INVALID_BIT			(1 << 0)
+#define NF_CT_STATE_BIT(ctinfo)			(1 << ((ctinfo) % IP_CT_IS_REPLY + 1))
+#define NF_CT_STATE_UNTRACKED_BIT		(1 << (IP_CT_NUMBER + 1))
+
 /* Bitset representing status of connection. */
 enum ip_conntrack_status {
 	/* It's an expected connection: bit 0 set.  This bit never changed */
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
new file mode 100644
index 000000000000..ec6d84a8ed1e
--- /dev/null
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -0,0 +1,582 @@
+#ifndef _LINUX_NF_TABLES_H
+#define _LINUX_NF_TABLES_H
+
+#define NFT_CHAIN_MAXNAMELEN 32
+
+enum nft_registers {
+	NFT_REG_VERDICT,
+	NFT_REG_1,
+	NFT_REG_2,
+	NFT_REG_3,
+	NFT_REG_4,
+	__NFT_REG_MAX
+};
+#define NFT_REG_MAX	(__NFT_REG_MAX - 1)
+
+/**
+ * enum nft_verdicts - nf_tables internal verdicts
+ *
+ * @NFT_CONTINUE: continue evaluation of the current rule
+ * @NFT_BREAK: terminate evaluation of the current rule
+ * @NFT_JUMP: push the current chain on the jump stack and jump to a chain
+ * @NFT_GOTO: jump to a chain without pushing the current chain on the jump stack
+ * @NFT_RETURN: return to the topmost chain on the jump stack
+ *
+ * The nf_tables verdicts share their numeric space with the netfilter verdicts.
+ */
+enum nft_verdicts {
+	NFT_CONTINUE	= -1,
+	NFT_BREAK	= -2,
+	NFT_JUMP	= -3,
+	NFT_GOTO	= -4,
+	NFT_RETURN	= -5,
+};
+
+/**
+ * enum nf_tables_msg_types - nf_tables netlink message types
+ *
+ * @NFT_MSG_NEWTABLE: create a new table (enum nft_table_attributes)
+ * @NFT_MSG_GETTABLE: get a table (enum nft_table_attributes)
+ * @NFT_MSG_DELTABLE: delete a table (enum nft_table_attributes)
+ * @NFT_MSG_NEWCHAIN: create a new chain (enum nft_chain_attributes)
+ * @NFT_MSG_GETCHAIN: get a chain (enum nft_chain_attributes)
+ * @NFT_MSG_DELCHAIN: delete a chain (enum nft_chain_attributes)
+ * @NFT_MSG_NEWRULE: create a new rule (enum nft_rule_attributes)
+ * @NFT_MSG_GETRULE: get a rule (enum nft_rule_attributes)
+ * @NFT_MSG_DELRULE: delete a rule (enum nft_rule_attributes)
+ */
+enum nf_tables_msg_types {
+	NFT_MSG_NEWTABLE,
+	NFT_MSG_GETTABLE,
+	NFT_MSG_DELTABLE,
+	NFT_MSG_NEWCHAIN,
+	NFT_MSG_GETCHAIN,
+	NFT_MSG_DELCHAIN,
+	NFT_MSG_NEWRULE,
+	NFT_MSG_GETRULE,
+	NFT_MSG_DELRULE,
+	NFT_MSG_MAX,
+};
+
+enum nft_list_attributes {
+	NFTA_LIST_UNPEC,
+	NFTA_LIST_ELEM,
+	__NFTA_LIST_MAX
+};
+#define NFTA_LIST_MAX		(__NFTA_LIST_MAX - 1)
+
+/**
+ * enum nft_hook_attributes - nf_tables netfilter hook netlink attributes
+ *
+ * @NFTA_HOOK_HOOKNUM: netfilter hook number (NLA_U32)
+ * @NFTA_HOOK_PRIORITY: netfilter hook priority (NLA_U32)
+ */
+enum nft_hook_attributes {
+	NFTA_HOOK_UNSPEC,
+	NFTA_HOOK_HOOKNUM,
+	NFTA_HOOK_PRIORITY,
+	__NFTA_HOOK_MAX
+};
+#define NFTA_HOOK_MAX		(__NFTA_HOOK_MAX - 1)
+
+/**
+ * enum nft_table_attributes - nf_tables table netlink attributes
+ *
+ * @NFTA_TABLE_NAME: name of the table (NLA_STRING)
+ */
+enum nft_table_attributes {
+	NFTA_TABLE_UNSPEC,
+	NFTA_TABLE_NAME,
+	__NFTA_TABLE_MAX
+};
+#define NFTA_TABLE_MAX		(__NFTA_TABLE_MAX - 1)
+
+/**
+ * enum nft_chain_attributes - nf_tables chain netlink attributes
+ *
+ * @NFTA_CHAIN_TABLE: name of the table containing the chain (NLA_STRING)
+ * @NFTA_CHAIN_HANDLE: numeric handle of the chain (NLA_U64)
+ * @NFTA_CHAIN_NAME: name of the chain (NLA_STRING)
+ * @NFTA_CHAIN_HOOK: hook specification for basechains (NLA_NESTED: nft_hook_attributes)
+ */
+enum nft_chain_attributes {
+	NFTA_CHAIN_UNSPEC,
+	NFTA_CHAIN_TABLE,
+	NFTA_CHAIN_HANDLE,
+	NFTA_CHAIN_NAME,
+	NFTA_CHAIN_HOOK,
+	__NFTA_CHAIN_MAX
+};
+#define NFTA_CHAIN_MAX		(__NFTA_CHAIN_MAX - 1)
+
+/**
+ * enum nft_rule_attributes - nf_tables rule netlink attributes
+ *
+ * @NFTA_RULE_TABLE: name of the table containing the rule (NLA_STRING)
+ * @NFTA_RULE_CHAIN: name of the chain containing the rule (NLA_STRING)
+ * @NFTA_RULE_HANDLE: numeric handle of the rule (NLA_U64)
+ * @NFTA_RULE_EXPRESSIONS: list of expressions (NLA_NESTED: nft_expr_attributes)
+ */
+enum nft_rule_attributes {
+	NFTA_RULE_UNSPEC,
+	NFTA_RULE_TABLE,
+	NFTA_RULE_CHAIN,
+	NFTA_RULE_HANDLE,
+	NFTA_RULE_EXPRESSIONS,
+	__NFTA_RULE_MAX
+};
+#define NFTA_RULE_MAX		(__NFTA_RULE_MAX - 1)
+
+enum nft_data_attributes {
+	NFTA_DATA_UNSPEC,
+	NFTA_DATA_VALUE,
+	NFTA_DATA_VERDICT,
+	__NFTA_DATA_MAX
+};
+#define NFTA_DATA_MAX		(__NFTA_DATA_MAX - 1)
+
+/**
+ * enum nft_verdict_attributes - nf_tables verdict netlink attributes
+ *
+ * @NFTA_VERDICT_CODE: nf_tables verdict (NLA_U32: enum nft_verdicts)
+ * @NFTA_VERDICT_CHAIN: jump target chain name (NLA_STRING)
+ */
+enum nft_verdict_attributes {
+	NFTA_VERDICT_UNSPEC,
+	NFTA_VERDICT_CODE,
+	NFTA_VERDICT_CHAIN,
+	__NFTA_VERDICT_MAX
+};
+#define NFTA_VERDICT_MAX	(__NFTA_VERDICT_MAX - 1)
+
+/**
+ * enum nft_expr_attributes - nf_tables expression netlink attributes
+ *
+ * @NFTA_EXPR_NAME: name of the expression type (NLA_STRING)
+ * @NFTA_EXPR_DATA: type specific data (NLA_NESTED)
+ */
+enum nft_expr_attributes {
+	NFTA_EXPR_UNSPEC,
+	NFTA_EXPR_NAME,
+	NFTA_EXPR_DATA,
+	__NFTA_EXPR_MAX
+};
+#define NFTA_EXPR_MAX		(__NFTA_EXPR_MAX - 1)
+
+/**
+ * enum nft_immediate_attributes - nf_tables immediate expression netlink attributes
+ *
+ * @NFTA_IMMEDIATE_DREG: destination register to load data into (NLA_U32)
+ * @NFTA_IMMEDIATE_DATA: data to load (NLA_NESTED: nft_data_attributes)
+ */
+enum nft_immediate_attributes {
+	NFTA_IMMEDIATE_UNSPEC,
+	NFTA_IMMEDIATE_DREG,
+	NFTA_IMMEDIATE_DATA,
+	__NFTA_IMMEDIATE_MAX
+};
+#define NFTA_IMMEDIATE_MAX	(__NFTA_IMMEDIATE_MAX - 1)
+
+/**
+ * enum nft_bitwise_attributes - nf_tables bitwise expression netlink attributes
+ *
+ * @NFTA_BITWISE_SREG: source register (NLA_U32: nft_registers)
+ * @NFTA_BITWISE_DREG: destination register (NLA_U32: nft_registers)
+ * @NFTA_BITWISE_LEN: length of operands (NLA_U32)
+ * @NFTA_BITWISE_MASK: mask value (NLA_NESTED: nft_data_attributes)
+ * @NFTA_BITWISE_XOR: xor value (NLA_NESTED: nft_data_attributes)
+ *
+ * The bitwise expression performs the following operation:
+ *
+ * dreg = (sreg & mask) ^ xor
+ *
+ * which allow to express all bitwise operations:
+ *
+ * 		mask	xor
+ * NOT:		1	1
+ * OR:		0	x
+ * XOR:		1	x
+ * AND:		x	0
+ */
+enum nft_bitwise_attributes {
+	NFTA_BITWISE_UNSPEC,
+	NFTA_BITWISE_SREG,
+	NFTA_BITWISE_DREG,
+	NFTA_BITWISE_LEN,
+	NFTA_BITWISE_MASK,
+	NFTA_BITWISE_XOR,
+	__NFTA_BITWISE_MAX
+};
+#define NFTA_BITWISE_MAX	(__NFTA_BITWISE_MAX - 1)
+
+/**
+ * enum nft_byteorder_ops - nf_tables byteorder operators
+ *
+ * @NFT_BYTEORDER_NTOH: network to host operator
+ * @NFT_BYTEORDER_HTON: host to network opertaor
+ */
+enum nft_byteorder_ops {
+	NFT_BYTEORDER_NTOH,
+	NFT_BYTEORDER_HTON,
+};
+
+/**
+ * enum nft_byteorder_attributes - nf_tables byteorder expression netlink attributes
+ *
+ * @NFTA_BYTEORDER_SREG: source register (NLA_U32: nft_registers)
+ * @NFTA_BYTEORDER_DREG: destination register (NLA_U32: nft_registers)
+ * @NFTA_BYTEORDER_OP: operator (NLA_U32: enum nft_byteorder_ops)
+ * @NFTA_BYTEORDER_LEN: length of the data (NLA_U32)
+ * @NFTA_BYTEORDER_SIZE: data size in bytes (NLA_U32: 2 or 4)
+ */
+enum nft_byteorder_attributes {
+	NFTA_BYTEORDER_UNSPEC,
+	NFTA_BYTEORDER_SREG,
+	NFTA_BYTEORDER_DREG,
+	NFTA_BYTEORDER_OP,
+	NFTA_BYTEORDER_LEN,
+	NFTA_BYTEORDER_SIZE,
+	__NFTA_BYTEORDER_MAX
+};
+#define NFTA_BYTEORDER_MAX	(__NFTA_BYTEORDER_MAX - 1)
+
+/**
+ * enum nft_cmp_ops - nf_tables relational operator
+ *
+ * @NFT_CMP_EQ: equal
+ * @NFT_CMP_NEQ: not equal
+ * @NFT_CMP_LT: less than
+ * @NFT_CMP_LTE: less than or equal to
+ * @NFT_CMP_GT: greater than
+ * @NFT_CMP_GTE: greater than or equal to
+ */
+enum nft_cmp_ops {
+	NFT_CMP_EQ,
+	NFT_CMP_NEQ,
+	NFT_CMP_LT,
+	NFT_CMP_LTE,
+	NFT_CMP_GT,
+	NFT_CMP_GTE,
+};
+
+/**
+ * enum nft_cmp_attributes - nf_tables cmp expression netlink attributes
+ *
+ * @NFTA_CMP_SREG: source register of data to compare (NLA_U32: nft_registers)
+ * @NFTA_CMP_OP: cmp operation (NLA_U32: nft_cmp_ops)
+ * @NFTA_CMP_DATA: data to compare against (NLA_NESTED: nft_data_attributes)
+ */
+enum nft_cmp_attributes {
+	NFTA_CMP_UNSPEC,
+	NFTA_CMP_SREG,
+	NFTA_CMP_OP,
+	NFTA_CMP_DATA,
+	__NFTA_CMP_MAX
+};
+#define NFTA_CMP_MAX		(__NFTA_CMP_MAX - 1)
+
+enum nft_set_elem_flags {
+	NFT_SE_INTERVAL_END	= 0x1,
+};
+
+enum nft_set_elem_attributes {
+	NFTA_SE_UNSPEC,
+	NFTA_SE_KEY,
+	NFTA_SE_DATA,
+	NFTA_SE_FLAGS,
+	__NFTA_SE_MAX
+};
+#define NFTA_SE_MAX		(__NFTA_SE_MAX - 1)
+
+enum nft_set_flags {
+	NFT_SET_INTERVAL	= 0x1,
+	NFT_SET_MAP		= 0x2,
+};
+
+enum nft_set_attributes {
+	NFTA_SET_UNSPEC,
+	NFTA_SET_FLAGS,
+	NFTA_SET_SREG,
+	NFTA_SET_DREG,
+	NFTA_SET_KLEN,
+	NFTA_SET_DLEN,
+	NFTA_SET_ELEMENTS,
+	__NFTA_SET_MAX
+};
+#define NFTA_SET_MAX		(__NFTA_SET_MAX - 1)
+
+enum nft_hash_flags {
+	NFT_HASH_MAP		= 0x1,
+};
+
+enum nft_hash_elem_attributes {
+	NFTA_HE_UNSPEC,
+	NFTA_HE_KEY,
+	NFTA_HE_DATA,
+	__NFTA_HE_MAX
+};
+#define NFTA_HE_MAX		(__NFTA_HE_MAX - 1)
+
+enum nft_hash_attributes {
+	NFTA_HASH_UNSPEC,
+	NFTA_HASH_FLAGS,
+	NFTA_HASH_SREG,
+	NFTA_HASH_DREG,
+	NFTA_HASH_KLEN,
+	NFTA_HASH_ELEMENTS,
+	__NFTA_HASH_MAX
+};
+#define NFTA_HASH_MAX		(__NFTA_HASH_MAX - 1)
+
+/**
+ * enum nft_payload_bases - nf_tables payload expression offset bases
+ *
+ * @NFT_PAYLOAD_LL_HEADER: link layer header
+ * @NFT_PAYLOAD_NETWORK_HEADER: network header
+ * @NFT_PAYLOAD_TRANSPORT_HEADER: transport header
+ */
+enum nft_payload_bases {
+	NFT_PAYLOAD_LL_HEADER,
+	NFT_PAYLOAD_NETWORK_HEADER,
+	NFT_PAYLOAD_TRANSPORT_HEADER,
+};
+
+/**
+ * enum nft_payload_attributes - nf_tables payload expression netlink attributes
+ *
+ * @NFTA_PAYLOAD_DREG: destination register to load data into (NLA_U32: nft_registers)
+ * @NFTA_PAYLOAD_BASE: payload base (NLA_U32: nft_payload_bases)
+ * @NFTA_PAYLOAD_OFFSET: payload offset relative to base (NLA_U32)
+ * @NFTA_PAYLOAD_LEN: payload length (NLA_U32)
+ */
+enum nft_payload_attributes {
+	NFTA_PAYLOAD_UNSPEC,
+	NFTA_PAYLOAD_DREG,
+	NFTA_PAYLOAD_BASE,
+	NFTA_PAYLOAD_OFFSET,
+	NFTA_PAYLOAD_LEN,
+	__NFTA_PAYLOAD_MAX
+};
+#define NFTA_PAYLOAD_MAX	(__NFTA_PAYLOAD_MAX - 1)
+
+/**
+ * enum nft_exthdr_attributes - nf_tables IPv6 extension header expression netlink attributes
+ *
+ * @NFTA_EXTHDR_DREG: destination register (NLA_U32: nft_registers)
+ * @NFTA_EXTHDR_TYPE: extension header type (NLA_U8)
+ * @NFTA_EXTHDR_OFFSET: extension header offset (NLA_U32)
+ * @NFTA_EXTHDR_LEN: extension header length (NLA_U32)
+ */
+enum nft_exthdr_attributes {
+	NFTA_EXTHDR_UNSPEC,
+	NFTA_EXTHDR_DREG,
+	NFTA_EXTHDR_TYPE,
+	NFTA_EXTHDR_OFFSET,
+	NFTA_EXTHDR_LEN,
+	__NFTA_EXTHDR_MAX
+};
+#define NFTA_EXTHDR_MAX		(__NFTA_EXTHDR_MAX - 1)
+
+/**
+ * enum nft_meta_keys - nf_tables meta expression keys
+ *
+ * @NFT_META_LEN: packet length (skb->len)
+ * @NFT_META_PROTOCOL: packet ethertype protocol (skb->protocol), invalid in OUTPUT
+ * @NFT_META_PRIORITY: packet priority (skb->priority)
+ * @NFT_META_MARK: packet mark (skb->mark)
+ * @NFT_META_IIF: packet input interface index (dev->ifindex)
+ * @NFT_META_OIF: packet output interface index (dev->ifindex)
+ * @NFT_META_IIFNAME: packet input interface name (dev->name)
+ * @NFT_META_OIFNAME: packet output interface name (dev->name)
+ * @NFT_META_IIFTYPE: packet input interface type (dev->type)
+ * @NFT_META_OIFTYPE: packet output interface type (dev->type)
+ * @NFT_META_SKUID: originating socket UID (fsuid)
+ * @NFT_META_SKGID: originating socket GID (fsgid)
+ * @NFT_META_NFTRACE: packet nftrace bit
+ * @NFT_META_RTCLASSID: realm value of packet's route (skb->dst->tclassid)
+ * @NFT_META_SECMARK: packet secmark (skb->secmark)
+ */
+enum nft_meta_keys {
+	NFT_META_LEN,
+	NFT_META_PROTOCOL,
+	NFT_META_PRIORITY,
+	NFT_META_MARK,
+	NFT_META_IIF,
+	NFT_META_OIF,
+	NFT_META_IIFNAME,
+	NFT_META_OIFNAME,
+	NFT_META_IIFTYPE,
+	NFT_META_OIFTYPE,
+	NFT_META_SKUID,
+	NFT_META_SKGID,
+	NFT_META_NFTRACE,
+	NFT_META_RTCLASSID,
+	NFT_META_SECMARK,
+};
+
+/**
+ * enum nft_meta_attributes - nf_tables meta expression netlink attributes
+ *
+ * @NFTA_META_DREG: destination register (NLA_U32)
+ * @NFTA_META_KEY: meta data item to load (NLA_U32: nft_meta_keys)
+ */
+enum nft_meta_attributes {
+	NFTA_META_UNSPEC,
+	NFTA_META_DREG,
+	NFTA_META_KEY,
+	__NFTA_META_MAX
+};
+#define NFTA_META_MAX		(__NFTA_META_MAX - 1)
+
+/**
+ * enum nft_ct_keys - nf_tables ct expression keys
+ *
+ * @NFT_CT_STATE: conntrack state (bitmask of enum ip_conntrack_info)
+ * @NFT_CT_DIRECTION: conntrack direction (enum ip_conntrack_dir)
+ * @NFT_CT_STATUS: conntrack status (bitmask of enum ip_conntrack_status)
+ * @NFT_CT_MARK: conntrack mark value
+ * @NFT_CT_SECMARK: conntrack secmark value
+ * @NFT_CT_EXPIRATION: relative conntrack expiration time in ms
+ * @NFT_CT_HELPER: connection tracking helper assigned to conntrack
+ * @NFT_CT_L3PROTOCOL: conntrack layer 3 protocol
+ * @NFT_CT_SRC: conntrack layer 3 protocol source (IPv4/IPv6 address)
+ * @NFT_CT_DST: conntrack layer 3 protocol destination (IPv4/IPv6 address)
+ * @NFT_CT_PROTOCOL: conntrack layer 4 protocol
+ * @NFT_CT_PROTO_SRC: conntrack layer 4 protocol source
+ * @NFT_CT_PROTO_DST: conntrack layer 4 protocol destination
+ */
+enum nft_ct_keys {
+	NFT_CT_STATE,
+	NFT_CT_DIRECTION,
+	NFT_CT_STATUS,
+	NFT_CT_MARK,
+	NFT_CT_SECMARK,
+	NFT_CT_EXPIRATION,
+	NFT_CT_HELPER,
+	NFT_CT_L3PROTOCOL,
+	NFT_CT_SRC,
+	NFT_CT_DST,
+	NFT_CT_PROTOCOL,
+	NFT_CT_PROTO_SRC,
+	NFT_CT_PROTO_DST,
+};
+
+/**
+ * enum nft_ct_attributes - nf_tables ct expression netlink attributes
+ *
+ * @NFTA_CT_DREG: destination register (NLA_U32)
+ * @NFTA_CT_KEY: conntrack data item to load (NLA_U32: nft_ct_keys)
+ * @NFTA_CT_DIRECTION: direction in case of directional keys (NLA_U8)
+ */
+enum nft_ct_attributes {
+	NFTA_CT_UNSPEC,
+	NFTA_CT_DREG,
+	NFTA_CT_KEY,
+	NFTA_CT_DIRECTION,
+	__NFTA_CT_MAX
+};
+#define NFTA_CT_MAX		(__NFTA_CT_MAX - 1)
+
+/**
+ * enum nft_limit_attributes - nf_tables limit expression netlink attributes
+ *
+ * @NFTA_LIMIT_RATE: refill rate (NLA_U64)
+ * @NFTA_LIMIT_UNIT: refill unit (NLA_U64)
+ */
+enum nft_limit_attributes {
+	NFTA_LIMIT_UNSPEC,
+	NFTA_LIMIT_RATE,
+	NFTA_LIMIT_UNIT,
+	__NFTA_LIMIT_MAX
+};
+#define NFTA_LIMIT_MAX		(__NFTA_LIMIT_MAX - 1)
+
+/**
+ * enum nft_counter_attributes - nf_tables counter expression netlink attributes
+ *
+ * @NFTA_COUNTER_BYTES: number of bytes (NLA_U64)
+ * @NFTA_COUNTER_PACKETS: number of packets (NLA_U64)
+ */
+enum nft_counter_attributes {
+	NFTA_COUNTER_UNSPEC,
+	NFTA_COUNTER_BYTES,
+	NFTA_COUNTER_PACKETS,
+	__NFTA_COUNTER_MAX
+};
+#define NFTA_COUNTER_MAX	(__NFTA_COUNTER_MAX - 1)
+
+/**
+ * enum nft_log_attributes - nf_tables log expression netlink attributes
+ *
+ * @NFTA_LOG_GROUP: netlink group to send messages to (NLA_U32)
+ * @NFTA_LOG_PREFIX: prefix to prepend to log messages (NLA_STRING)
+ * @NFTA_LOG_SNAPLEN: length of payload to include in netlink message (NLA_U32)
+ * @NFTA_LOG_QTHRESHOLD: queue threshold (NLA_U32)
+ */
+enum nft_log_attributes {
+	NFTA_LOG_UNSPEC,
+	NFTA_LOG_GROUP,
+	NFTA_LOG_PREFIX,
+	NFTA_LOG_SNAPLEN,
+	NFTA_LOG_QTHRESHOLD,
+	__NFTA_LOG_MAX
+};
+#define NFTA_LOG_MAX		(__NFTA_LOG_MAX - 1)
+
+/**
+ * enum nft_reject_types - nf_tables reject expression reject types
+ *
+ * @NFT_REJECT_ICMP_UNREACH: reject using ICMP unreachable
+ * @NFT_REJECT_TCP_RST: reject using TCP RST
+ */
+enum nft_reject_types {
+	NFT_REJECT_ICMP_UNREACH,
+	NFT_REJECT_TCP_RST,
+};
+
+/**
+ * enum nft_reject_attributes - nf_tables reject expression netlink attributes
+ *
+ * @NFTA_REJECT_TYPE: packet type to use (NLA_U32: nft_reject_types)
+ * @NFTA_REJECT_ICMP_CODE: ICMP code to use (NLA_U8)
+ */
+enum nft_reject_attributes {
+	NFTA_REJECT_UNSPEC,
+	NFTA_REJECT_TYPE,
+	NFTA_REJECT_ICMP_CODE,
+	__NFTA_REJECT_MAX
+};
+#define NFTA_REJECT_MAX		(__NFTA_REJECT_MAX - 1)
+
+/**
+ * enum nft_nat_types - nf_tables nat expression NAT types
+ *
+ * @NFT_NAT_SNAT: source NAT
+ * @NFT_NAT_DNAT: destination NAT
+ */
+enum nft_nat_types {
+	NFT_NAT_SNAT,
+	NFT_NAT_DNAT,
+};
+
+/**
+ * enum nft_nat_attributes - nf_tables nat expression netlink attributes
+ *
+ * @NFTA_NAT_TYPE: NAT type (NLA_U32: nft_nat_types)
+ * @NFTA_NAT_ADDR_MIN: source register of address range start (NLA_U32: nft_registers)
+ * @NFTA_NAT_ADDR_MAX: source register of address range end (NLA_U32: nft_registers)
+ * @NFTA_NAT_PROTO_MIN: source register of proto range start (NLA_U32: nft_registers)
+ * @NFTA_NAT_PROTO_MAX: source register of proto range end (NLA_U32: nft_registers)
+ */
+enum nft_nat_attributes {
+	NFTA_NAT_UNSPEC,
+	NFTA_NAT_TYPE,
+	NFTA_NAT_ADDR_MIN,
+	NFTA_NAT_ADDR_MAX,
+	NFTA_NAT_PROTO_MIN,
+	NFTA_NAT_PROTO_MAX,
+	__NFTA_NAT_MAX
+};
+#define NFTA_NAT_MAX		(__NFTA_NAT_MAX - 1)
+
+#endif /* _LINUX_NF_TABLES_H */
diff --git a/include/uapi/linux/netfilter/nfnetlink.h b/include/uapi/linux/netfilter/nfnetlink.h
index 4a4efafad5f4..d276c3bd55b8 100644
--- a/include/uapi/linux/netfilter/nfnetlink.h
+++ b/include/uapi/linux/netfilter/nfnetlink.h
@@ -18,6 +18,8 @@ enum nfnetlink_groups {
 #define NFNLGRP_CONNTRACK_EXP_UPDATE	NFNLGRP_CONNTRACK_EXP_UPDATE
 	NFNLGRP_CONNTRACK_EXP_DESTROY,
 #define NFNLGRP_CONNTRACK_EXP_DESTROY	NFNLGRP_CONNTRACK_EXP_DESTROY
+	NFNLGRP_NFTABLES,
+#define NFNLGRP_NFTABLES                NFNLGRP_NFTABLES
 	__NFNLGRP_MAX,
 };
 #define NFNLGRP_MAX	(__NFNLGRP_MAX - 1)
@@ -51,6 +53,7 @@ struct nfgenmsg {
 #define NFNL_SUBSYS_ACCT		7
 #define NFNL_SUBSYS_CTNETLINK_TIMEOUT	8
 #define NFNL_SUBSYS_CTHELPER		9
-#define NFNL_SUBSYS_COUNT		10
+#define NFNL_SUBSYS_NFTABLES		10
+#define NFNL_SUBSYS_COUNT		11
 
 #endif /* _UAPI_NFNETLINK_H */
diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig
index a9aff9c7d027..68f8128147be 100644
--- a/net/bridge/netfilter/Kconfig
+++ b/net/bridge/netfilter/Kconfig
@@ -1,6 +1,9 @@
 #
 # Bridge netfilter configuration
 #
+#
+config NF_TABLES_BRIDGE
+	tristate "Ethernet Bridge nf_tables support"
 
 menuconfig BRIDGE_NF_EBTABLES
 	tristate "Ethernet Bridge tables (ebtables) support"
diff --git a/net/bridge/netfilter/Makefile b/net/bridge/netfilter/Makefile
index 0718699540b0..ea7629f58b3d 100644
--- a/net/bridge/netfilter/Makefile
+++ b/net/bridge/netfilter/Makefile
@@ -2,6 +2,8 @@
 # Makefile for the netfilter modules for Link Layer filtering on a bridge.
 #
 
+obj-$(CONFIG_NF_TABLES_BRIDGE) += nf_tables_bridge.o
+
 obj-$(CONFIG_BRIDGE_NF_EBTABLES) += ebtables.o
 
 # tables
diff --git a/net/bridge/netfilter/nf_tables_bridge.c b/net/bridge/netfilter/nf_tables_bridge.c
new file mode 100644
index 000000000000..bc5c21c911c0
--- /dev/null
+++ b/net/bridge/netfilter/nf_tables_bridge.c
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netfilter_bridge.h>
+#include <net/netfilter/nf_tables.h>
+
+static struct nft_af_info nft_af_bridge __read_mostly = {
+	.family		= NFPROTO_BRIDGE,
+	.nhooks		= NF_BR_NUMHOOKS,
+	.owner		= THIS_MODULE,
+};
+
+static int __init nf_tables_bridge_init(void)
+{
+	return nft_register_afinfo(&nft_af_bridge);
+}
+
+static void __exit nf_tables_bridge_exit(void)
+{
+	nft_unregister_afinfo(&nft_af_bridge);
+}
+
+module_init(nf_tables_bridge_init);
+module_exit(nf_tables_bridge_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_FAMILY(AF_BRIDGE);
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 1657e39b291f..eb1d56ece361 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -36,6 +36,22 @@ config NF_CONNTRACK_PROC_COMPAT
 
 	  If unsure, say Y.
 
+config NF_TABLES_IPV4
+	depends on NF_TABLES
+	tristate "IPv4 nf_tables support"
+
+config NFT_REJECT_IPV4
+	depends on NF_TABLES_IPV4
+	tristate "nf_tables IPv4 reject support"
+
+config NF_TABLE_ROUTE_IPV4
+	depends on NF_TABLES_IPV4
+	tristate "IPv4 nf_tables route table support"
+
+config NF_TABLE_NAT_IPV4
+	depends on NF_TABLES_IPV4
+	tristate "IPv4 nf_tables nat table support"
+
 config IP_NF_IPTABLES
 	tristate "IP tables support (required for filtering/masq/NAT)"
 	default m if NETFILTER_ADVANCED=n
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 3622b248b6dd..b2f01cd2cd65 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -27,6 +27,11 @@ obj-$(CONFIG_NF_NAT_SNMP_BASIC) += nf_nat_snmp_basic.o
 # NAT protocols (nf_nat)
 obj-$(CONFIG_NF_NAT_PROTO_GRE) += nf_nat_proto_gre.o
 
+obj-$(CONFIG_NF_TABLES_IPV4) += nf_tables_ipv4.o
+obj-$(CONFIG_NFT_REJECT_IPV4) += nft_reject_ipv4.o
+obj-$(CONFIG_NF_TABLE_ROUTE_IPV4) += nf_table_route_ipv4.o
+obj-$(CONFIG_NF_TABLE_NAT_IPV4) += nf_table_nat_ipv4.o
+
 # generic IP tables 
 obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o
 
diff --git a/net/ipv4/netfilter/nf_table_nat_ipv4.c b/net/ipv4/netfilter/nf_table_nat_ipv4.c
new file mode 100644
index 000000000000..2a6f184c10bd
--- /dev/null
+++ b/net/ipv4/netfilter/nf_table_nat_ipv4.c
@@ -0,0 +1,409 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_core.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_nat_l3proto.h>
+#include <net/ip.h>
+
+struct nft_nat {
+	enum nft_registers	sreg_addr_min:8;
+	enum nft_registers	sreg_addr_max:8;
+	enum nft_registers	sreg_proto_min:8;
+	enum nft_registers	sreg_proto_max:8;
+	enum nf_nat_manip_type	type;
+};
+
+static void nft_nat_eval(const struct nft_expr *expr,
+			 struct nft_data data[NFT_REG_MAX + 1],
+			 const struct nft_pktinfo *pkt)
+{
+	const struct nft_nat *priv = nft_expr_priv(expr);
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct = nf_ct_get(pkt->skb, &ctinfo);
+	struct nf_nat_range range;
+
+	memset(&range, 0, sizeof(range));
+	if (priv->sreg_addr_min) {
+		range.min_addr.ip = data[priv->sreg_addr_min].data[0];
+		range.max_addr.ip = data[priv->sreg_addr_max].data[0];
+		range.flags |= NF_NAT_RANGE_MAP_IPS;
+	}
+
+	if (priv->sreg_proto_min) {
+		range.min_proto.all = data[priv->sreg_proto_min].data[0];
+		range.max_proto.all = data[priv->sreg_proto_max].data[0];
+		range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
+	}
+
+	data[NFT_REG_VERDICT].verdict =
+		nf_nat_setup_info(ct, &range, priv->type);
+}
+
+static const struct nla_policy nft_nat_policy[NFTA_NAT_MAX + 1] = {
+	[NFTA_NAT_ADDR_MIN]	= { .type = NLA_U32 },
+	[NFTA_NAT_ADDR_MAX]	= { .type = NLA_U32 },
+	[NFTA_NAT_PROTO_MIN]	= { .type = NLA_U32 },
+	[NFTA_NAT_PROTO_MAX]	= { .type = NLA_U32 },
+	[NFTA_NAT_TYPE]		= { .type = NLA_U32 },
+};
+
+static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
+			const struct nlattr * const tb[])
+{
+	struct nft_nat *priv = nft_expr_priv(expr);
+	int err;
+
+	if (tb[NFTA_NAT_TYPE] == NULL)
+		return -EINVAL;
+
+	switch (ntohl(nla_get_be32(tb[NFTA_NAT_TYPE]))) {
+	case NFT_NAT_SNAT:
+		priv->type = NF_NAT_MANIP_SRC;
+		break;
+	case NFT_NAT_DNAT:
+		priv->type = NF_NAT_MANIP_DST;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (tb[NFTA_NAT_ADDR_MIN]) {
+		priv->sreg_addr_min = ntohl(nla_get_be32(tb[NFTA_NAT_ADDR_MIN]));
+		err = nft_validate_input_register(priv->sreg_addr_min);
+		if (err < 0)
+			return err;
+	}
+
+	if (tb[NFTA_NAT_ADDR_MAX]) {
+		priv->sreg_addr_max = ntohl(nla_get_be32(tb[NFTA_NAT_ADDR_MAX]));
+		err = nft_validate_input_register(priv->sreg_addr_max);
+		if (err < 0)
+			return err;
+	} else
+		priv->sreg_addr_max = priv->sreg_addr_min;
+
+	if (tb[NFTA_NAT_PROTO_MIN]) {
+		priv->sreg_proto_min = ntohl(nla_get_be32(tb[NFTA_NAT_PROTO_MIN]));
+		err = nft_validate_input_register(priv->sreg_proto_min);
+		if (err < 0)
+			return err;
+	}
+
+	if (tb[NFTA_NAT_PROTO_MAX]) {
+		priv->sreg_proto_max = ntohl(nla_get_be32(tb[NFTA_NAT_PROTO_MAX]));
+		err = nft_validate_input_register(priv->sreg_proto_max);
+		if (err < 0)
+			return err;
+	} else
+		priv->sreg_proto_max = priv->sreg_proto_min;
+
+	return 0;
+}
+
+static int nft_nat_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_nat *priv = nft_expr_priv(expr);
+
+	switch (priv->type) {
+	case NF_NAT_MANIP_SRC:
+		if (nla_put_be32(skb, NFTA_NAT_TYPE, htonl(NFT_NAT_SNAT)))
+			goto nla_put_failure;
+		break;
+	case NF_NAT_MANIP_DST:
+		if (nla_put_be32(skb, NFTA_NAT_TYPE, htonl(NFT_NAT_DNAT)))
+			goto nla_put_failure;
+		break;
+	}
+
+	if (nla_put_be32(skb, NFTA_NAT_ADDR_MIN, htonl(priv->sreg_addr_min)))
+		goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_NAT_ADDR_MAX, htonl(priv->sreg_addr_max)))
+		goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_NAT_PROTO_MIN, htonl(priv->sreg_proto_min)))
+		goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_NAT_PROTO_MAX, htonl(priv->sreg_proto_max)))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static struct nft_expr_ops nft_nat_ops __read_mostly = {
+	.name		= "nat",
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_nat)),
+	.owner		= THIS_MODULE,
+	.eval		= nft_nat_eval,
+	.init		= nft_nat_init,
+	.dump		= nft_nat_dump,
+	.policy		= nft_nat_policy,
+	.maxattr	= NFTA_NAT_MAX,
+};
+
+/*
+ * NAT table
+ */
+
+static unsigned int nf_nat_fn(const struct nf_hook_ops *ops,
+			      struct sk_buff *skb,
+			      const struct net_device *in,
+			      const struct net_device *out,
+			      int (*okfn)(struct sk_buff *))
+{
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+	struct nf_conn_nat *nat;
+	enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum);
+	unsigned int ret;
+
+	if (ct == NULL || nf_ct_is_untracked(ct))
+		return NF_ACCEPT;
+
+	NF_CT_ASSERT(!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)));
+
+	nat = nfct_nat(ct);
+	if (nat == NULL) {
+		/* Conntrack module was loaded late, can't add extension. */
+		if (nf_ct_is_confirmed(ct))
+			return NF_ACCEPT;
+		nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC);
+		if (nat == NULL)
+			return NF_ACCEPT;
+	}
+
+	switch (ctinfo) {
+	case IP_CT_RELATED:
+	case IP_CT_RELATED + IP_CT_IS_REPLY:
+		if (ip_hdr(skb)->protocol == IPPROTO_ICMP) {
+			if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo,
+							   ops->hooknum))
+				return NF_DROP;
+			else
+				return NF_ACCEPT;
+		}
+		/* Fall through */
+	case IP_CT_NEW:
+		if (nf_nat_initialized(ct, maniptype))
+			break;
+
+		ret = nft_do_chain(ops, skb, in, out, okfn);
+		if (ret != NF_ACCEPT)
+			return ret;
+		if (!nf_nat_initialized(ct, maniptype)) {
+			ret = nf_nat_alloc_null_binding(ct, ops->hooknum);
+			if (ret != NF_ACCEPT)
+				return ret;
+		}
+	default:
+		break;
+	}
+
+	return nf_nat_packet(ct, ctinfo, ops->hooknum, skb);
+}
+
+static unsigned int nf_nat_prerouting(const struct nf_hook_ops *ops,
+				      struct sk_buff *skb,
+				      const struct net_device *in,
+				      const struct net_device *out,
+				      int (*okfn)(struct sk_buff *))
+{
+	__be32 daddr = ip_hdr(skb)->daddr;
+	unsigned int ret;
+
+	ret = nf_nat_fn(ops, skb, in, out, okfn);
+	if (ret != NF_DROP && ret != NF_STOLEN &&
+	    ip_hdr(skb)->daddr != daddr) {
+		skb_dst_drop(skb);
+	}
+	return ret;
+}
+
+static unsigned int nf_nat_postrouting(const struct nf_hook_ops *ops,
+				       struct sk_buff *skb,
+				       const struct net_device *in,
+				       const struct net_device *out,
+				       int (*okfn)(struct sk_buff *))
+{
+	enum ip_conntrack_info ctinfo __maybe_unused;
+	const struct nf_conn *ct __maybe_unused;
+	unsigned int ret;
+
+	ret = nf_nat_fn(ops, skb, in, out, okfn);
+#ifdef CONFIG_XFRM
+	if (ret != NF_DROP && ret != NF_STOLEN &&
+	    (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
+		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+
+		if (ct->tuplehash[dir].tuple.src.u3.ip !=
+		    ct->tuplehash[!dir].tuple.dst.u3.ip ||
+		    ct->tuplehash[dir].tuple.src.u.all !=
+		    ct->tuplehash[!dir].tuple.dst.u.all)
+			return nf_xfrm_me_harder(skb, AF_INET) == 0 ?
+								ret : NF_DROP;
+	}
+#endif
+	return ret;
+}
+
+static unsigned int nf_nat_output(const struct nf_hook_ops *ops,
+				  struct sk_buff *skb,
+				  const struct net_device *in,
+				  const struct net_device *out,
+				  int (*okfn)(struct sk_buff *))
+{
+	enum ip_conntrack_info ctinfo;
+	const struct nf_conn *ct;
+	unsigned int ret;
+
+	ret = nf_nat_fn(ops, skb, in, out, okfn);
+	if (ret != NF_DROP && ret != NF_STOLEN &&
+	    (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
+		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+
+		if (ct->tuplehash[dir].tuple.dst.u3.ip !=
+		    ct->tuplehash[!dir].tuple.src.u3.ip) {
+			if (ip_route_me_harder(skb, RTN_UNSPEC))
+				ret = NF_DROP;
+		}
+#ifdef CONFIG_XFRM
+		else if (ct->tuplehash[dir].tuple.dst.u.all !=
+			 ct->tuplehash[!dir].tuple.src.u.all)
+			if (nf_xfrm_me_harder(skb, AF_INET))
+				ret = NF_DROP;
+#endif
+	}
+	return ret;
+}
+
+static struct nft_base_chain nf_chain_nat_prerouting __read_mostly = {
+	.chain	= {
+		.name		= "PREROUTING",
+		.rules		= LIST_HEAD_INIT(nf_chain_nat_prerouting.chain.rules),
+		.flags		= NFT_BASE_CHAIN | NFT_CHAIN_BUILTIN,
+	},
+	.ops	= {
+		.hook		= nf_nat_prerouting,
+		.owner		= THIS_MODULE,
+		.pf		= NFPROTO_IPV4,
+		.hooknum	= NF_INET_PRE_ROUTING,
+		.priority	= NF_IP_PRI_NAT_DST,
+		.priv		= &nf_chain_nat_prerouting.chain,
+	},
+};
+
+static struct nft_base_chain nf_chain_nat_postrouting __read_mostly = {
+	.chain	= {
+		.name		= "POSTROUTING",
+		.rules		= LIST_HEAD_INIT(nf_chain_nat_postrouting.chain.rules),
+		.flags		= NFT_BASE_CHAIN | NFT_CHAIN_BUILTIN,
+	},
+	.ops	= {
+		.hook		= nf_nat_postrouting,
+		.owner		= THIS_MODULE,
+		.pf		= NFPROTO_IPV4,
+		.hooknum	= NF_INET_POST_ROUTING,
+		.priority	= NF_IP_PRI_NAT_SRC,
+		.priv		= &nf_chain_nat_postrouting.chain,
+	},
+};
+
+static struct nft_base_chain nf_chain_nat_output __read_mostly = {
+	.chain	= {
+		.name		= "OUTPUT",
+		.rules		= LIST_HEAD_INIT(nf_chain_nat_output.chain.rules),
+		.flags		= NFT_BASE_CHAIN | NFT_CHAIN_BUILTIN,
+	},
+	.ops	= {
+		.hook		= nf_nat_output,
+		.owner		= THIS_MODULE,
+		.pf		= NFPROTO_IPV4,
+		.hooknum	= NF_INET_LOCAL_OUT,
+		.priority	= NF_IP_PRI_NAT_DST,
+		.priv		= &nf_chain_nat_output.chain,
+	},
+};
+
+static struct nft_base_chain nf_chain_nat_input __read_mostly = {
+	.chain	= {
+		.name		= "INPUT",
+		.rules		= LIST_HEAD_INIT(nf_chain_nat_input.chain.rules),
+		.flags		= NFT_BASE_CHAIN | NFT_CHAIN_BUILTIN,
+	},
+	.ops	= {
+		.hook		= nf_nat_fn,
+		.owner		= THIS_MODULE,
+		.pf		= NFPROTO_IPV4,
+		.hooknum	= NF_INET_LOCAL_IN,
+		.priority	= NF_IP_PRI_NAT_SRC,
+		.priv		= &nf_chain_nat_input.chain,
+	},
+};
+
+
+static struct nft_table nf_table_nat_ipv4 __read_mostly = {
+	.name	= "nat",
+	.chains	= LIST_HEAD_INIT(nf_table_nat_ipv4.chains),
+};
+
+static int __init nf_table_nat_init(void)
+{
+	int err;
+
+	list_add_tail(&nf_chain_nat_prerouting.chain.list,
+		      &nf_table_nat_ipv4.chains);
+	list_add_tail(&nf_chain_nat_postrouting.chain.list,
+		      &nf_table_nat_ipv4.chains);
+	list_add_tail(&nf_chain_nat_output.chain.list,
+		      &nf_table_nat_ipv4.chains);
+	list_add_tail(&nf_chain_nat_input.chain.list,
+		      &nf_table_nat_ipv4.chains);
+
+	err = nft_register_table(&nf_table_nat_ipv4, NFPROTO_IPV4);
+	if (err < 0)
+		goto err1;
+
+	err = nft_register_expr(&nft_nat_ops);
+	if (err < 0)
+		goto err2;
+
+	return 0;
+
+err2:
+	nft_unregister_table(&nf_table_nat_ipv4, NFPROTO_IPV4);
+err1:
+	return err;
+}
+
+static void __exit nf_table_nat_exit(void)
+{
+	nft_unregister_expr(&nft_nat_ops);
+	nft_unregister_table(&nf_table_nat_ipv4, AF_INET);
+}
+
+module_init(nf_table_nat_init);
+module_exit(nf_table_nat_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_TABLE(AF_INET, "nat");
+MODULE_ALIAS_NFT_EXPR("nat");
diff --git a/net/ipv4/netfilter/nf_table_route_ipv4.c b/net/ipv4/netfilter/nf_table_route_ipv4.c
new file mode 100644
index 000000000000..4f257a1ed661
--- /dev/null
+++ b/net/ipv4/netfilter/nf_table_route_ipv4.c
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/skbuff.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/route.h>
+#include <net/ip.h>
+
+static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops,
+					struct sk_buff *skb,
+					const struct net_device *in,
+					const struct net_device *out,
+					int (*okfn)(struct sk_buff *))
+{
+	unsigned int ret;
+	u32 mark;
+	__be32 saddr, daddr;
+	u_int8_t tos;
+	const struct iphdr *iph;
+
+	/* root is playing with raw sockets. */
+	if (skb->len < sizeof(struct iphdr) ||
+	    ip_hdrlen(skb) < sizeof(struct iphdr))
+		return NF_ACCEPT;
+
+	mark = skb->mark;
+	iph = ip_hdr(skb);
+	saddr = iph->saddr;
+	daddr = iph->daddr;
+	tos = iph->tos;
+
+	ret = nft_do_chain(ops, skb, in, out, okfn);
+	if (ret != NF_DROP && ret != NF_QUEUE) {
+		iph = ip_hdr(skb);
+
+		if (iph->saddr != saddr ||
+		    iph->daddr != daddr ||
+		    skb->mark != mark ||
+		    iph->tos != tos)
+			if (ip_route_me_harder(skb, RTN_UNSPEC))
+				ret = NF_DROP;
+	}
+	return ret;
+}
+
+static struct nft_base_chain nf_chain_route_output __read_mostly = {
+	.chain	= {
+		.name		= "OUTPUT",
+		.rules		= LIST_HEAD_INIT(nf_chain_route_output.chain.rules),
+		.flags		= NFT_BASE_CHAIN | NFT_CHAIN_BUILTIN,
+	},
+	.ops	= {
+		.hook		= nf_route_table_hook,
+		.owner		= THIS_MODULE,
+		.pf		= NFPROTO_IPV4,
+		.hooknum	= NF_INET_LOCAL_OUT,
+		.priority	= NF_IP_PRI_MANGLE,
+		.priv		= &nf_chain_route_output.chain,
+	},
+};
+
+static struct nft_table nf_table_route_ipv4 __read_mostly = {
+	.name	= "route",
+	.chains	= LIST_HEAD_INIT(nf_table_route_ipv4.chains),
+};
+
+static int __init nf_table_route_init(void)
+{
+	list_add_tail(&nf_chain_route_output.chain.list,
+		      &nf_table_route_ipv4.chains);
+	return nft_register_table(&nf_table_route_ipv4, NFPROTO_IPV4);
+}
+
+static void __exit nf_table_route_exit(void)
+{
+	nft_unregister_table(&nf_table_route_ipv4, NFPROTO_IPV4);
+}
+
+module_init(nf_table_route_init);
+module_exit(nf_table_route_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_TABLE(AF_INET, "route");
diff --git a/net/ipv4/netfilter/nf_tables_ipv4.c b/net/ipv4/netfilter/nf_tables_ipv4.c
new file mode 100644
index 000000000000..63d0a3bf53d3
--- /dev/null
+++ b/net/ipv4/netfilter/nf_tables_ipv4.c
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/ip.h>
+#include <linux/netfilter_ipv4.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/ip.h>
+
+static unsigned int nft_ipv4_output(const struct nf_hook_ops *ops,
+				    struct sk_buff *skb,
+				    const struct net_device *in,
+				    const struct net_device *out,
+				    int (*okfn)(struct sk_buff *))
+{
+	if (unlikely(skb->len < sizeof(struct iphdr) ||
+		     ip_hdr(skb)->ihl < sizeof(struct iphdr) / 4)) {
+		if (net_ratelimit())
+			pr_info("nf_tables_ipv4: ignoring short SOCK_RAW "
+				"packet\n");
+		return NF_ACCEPT;
+	}
+
+	return nft_do_chain(ops, skb, in, out, okfn);
+}
+
+static struct nft_af_info nft_af_ipv4 __read_mostly = {
+	.family		= NFPROTO_IPV4,
+	.nhooks		= NF_INET_NUMHOOKS,
+	.owner		= THIS_MODULE,
+	.hooks		= {
+		[NF_INET_LOCAL_OUT]	= nft_ipv4_output,
+	},
+};
+
+static int __init nf_tables_ipv4_init(void)
+{
+	return nft_register_afinfo(&nft_af_ipv4);
+}
+
+static void __exit nf_tables_ipv4_exit(void)
+{
+	nft_unregister_afinfo(&nft_af_ipv4);
+}
+
+module_init(nf_tables_ipv4_init);
+module_exit(nf_tables_ipv4_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_FAMILY(AF_INET);
diff --git a/net/ipv4/netfilter/nft_reject_ipv4.c b/net/ipv4/netfilter/nft_reject_ipv4.c
new file mode 100644
index 000000000000..b4ee8d3bb1e4
--- /dev/null
+++ b/net/ipv4/netfilter/nft_reject_ipv4.c
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/icmp.h>
+
+struct nft_reject {
+	enum nft_reject_types	type:8;
+	u8			icmp_code;
+};
+
+static void nft_reject_eval(const struct nft_expr *expr,
+			      struct nft_data data[NFT_REG_MAX + 1],
+			      const struct nft_pktinfo *pkt)
+{
+	struct nft_reject *priv = nft_expr_priv(expr);
+
+	switch (priv->type) {
+	case NFT_REJECT_ICMP_UNREACH:
+		icmp_send(pkt->skb, ICMP_DEST_UNREACH, priv->icmp_code, 0);
+		break;
+	case NFT_REJECT_TCP_RST:
+		break;
+	}
+
+	data[NFT_REG_VERDICT].verdict = NF_DROP;
+}
+
+static const struct nla_policy nft_reject_policy[NFTA_REJECT_MAX + 1] = {
+	[NFTA_REJECT_TYPE]		= { .type = NLA_U32 },
+	[NFTA_REJECT_ICMP_CODE]		= { .type = NLA_U8 },
+};
+
+static int nft_reject_init(const struct nft_ctx *ctx,
+			   const struct nft_expr *expr,
+			   const struct nlattr * const tb[])
+{
+	struct nft_reject *priv = nft_expr_priv(expr);
+
+	if (tb[NFTA_REJECT_TYPE] == NULL)
+		return -EINVAL;
+
+	priv->type = ntohl(nla_get_be32(tb[NFTA_REJECT_TYPE]));
+	switch (priv->type) {
+	case NFT_REJECT_ICMP_UNREACH:
+		if (tb[NFTA_REJECT_ICMP_CODE] == NULL)
+			return -EINVAL;
+		priv->icmp_code = nla_get_u8(tb[NFTA_REJECT_ICMP_CODE]);
+	case NFT_REJECT_TCP_RST:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int nft_reject_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_reject *priv = nft_expr_priv(expr);
+
+	if (nla_put_be32(skb, NFTA_REJECT_TYPE, priv->type))
+		goto nla_put_failure;
+
+	switch (priv->type) {
+	case NFT_REJECT_ICMP_UNREACH:
+		if (nla_put_u8(skb, NFTA_REJECT_ICMP_CODE, priv->icmp_code))
+			goto nla_put_failure;
+		break;
+	}
+
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static struct nft_expr_ops reject_ops __read_mostly = {
+	.name		= "reject",
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_reject)),
+	.owner		= THIS_MODULE,
+	.eval		= nft_reject_eval,
+	.init		= nft_reject_init,
+	.dump		= nft_reject_dump,
+	.policy		= nft_reject_policy,
+	.maxattr	= NFTA_REJECT_MAX,
+};
+
+static int __init nft_reject_module_init(void)
+{
+	return nft_register_expr(&reject_ops);
+}
+
+static void __exit nft_reject_module_exit(void)
+{
+	nft_unregister_expr(&reject_ops);
+}
+
+module_init(nft_reject_module_init);
+module_exit(nft_reject_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_EXPR("reject");
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index a7f842b29b67..5677e38eeca3 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -25,6 +25,14 @@ config NF_CONNTRACK_IPV6
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
+config NF_TABLES_IPV6
+	depends on NF_TABLES
+	tristate "IPv6 nf_tables support"
+
+config NF_TABLE_ROUTE_IPV6
+	depends on NF_TABLES_IPV6
+	tristate "IPv6 nf_tables route table support"
+
 config IP6_NF_IPTABLES
 	tristate "IP6 tables support (required for filtering)"
 	depends on INET && IPV6
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index 2b53738f798c..956af4492d10 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -23,6 +23,10 @@ obj-$(CONFIG_NF_NAT_IPV6) += nf_nat_ipv6.o
 nf_defrag_ipv6-y := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o
 obj-$(CONFIG_NF_DEFRAG_IPV6) += nf_defrag_ipv6.o
 
+# nf_tables
+obj-$(CONFIG_NF_TABLES_IPV6) += nf_tables_ipv6.o
+obj-$(CONFIG_NF_TABLE_ROUTE_IPV6) += nf_table_route_ipv6.o
+
 # matches
 obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o
 obj-$(CONFIG_IP6_NF_MATCH_EUI64) += ip6t_eui64.o
diff --git a/net/ipv6/netfilter/nf_table_route_ipv6.c b/net/ipv6/netfilter/nf_table_route_ipv6.c
new file mode 100644
index 000000000000..48ac65c7b398
--- /dev/null
+++ b/net/ipv6/netfilter/nf_table_route_ipv6.c
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/skbuff.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/route.h>
+
+static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops,
+					struct sk_buff *skb,
+					const struct net_device *in,
+					const struct net_device *out,
+					int (*okfn)(struct sk_buff *))
+{
+	unsigned int ret;
+	struct in6_addr saddr, daddr;
+	u_int8_t hop_limit;
+	u32 mark, flowlabel;
+
+	/* save source/dest address, mark, hoplimit, flowlabel, priority */
+	memcpy(&saddr, &ipv6_hdr(skb)->saddr, sizeof(saddr));
+	memcpy(&daddr, &ipv6_hdr(skb)->daddr, sizeof(daddr));
+	mark = skb->mark;
+	hop_limit = ipv6_hdr(skb)->hop_limit;
+
+	/* flowlabel and prio (includes version, which shouldn't change either */
+	flowlabel = *((u32 *)ipv6_hdr(skb));
+
+	ret = nft_do_chain(ops, skb, in, out, okfn);
+	if (ret != NF_DROP && ret != NF_QUEUE &&
+	    (memcmp(&ipv6_hdr(skb)->saddr, &saddr, sizeof(saddr)) ||
+	     memcmp(&ipv6_hdr(skb)->daddr, &daddr, sizeof(daddr)) ||
+	     skb->mark != mark ||
+	     ipv6_hdr(skb)->hop_limit != hop_limit ||
+	     flowlabel != *((u_int32_t *)ipv6_hdr(skb))))
+		return ip6_route_me_harder(skb) == 0 ? ret : NF_DROP;
+
+	return ret;
+}
+
+static struct nft_base_chain nf_chain_route_output __read_mostly = {
+	.chain	= {
+		.name		= "OUTPUT",
+		.rules		= LIST_HEAD_INIT(nf_chain_route_output.chain.rules),
+		.flags		= NFT_BASE_CHAIN | NFT_CHAIN_BUILTIN,
+	},
+	.ops	= {
+		.hook		= nf_route_table_hook,
+		.owner		= THIS_MODULE,
+		.pf		= NFPROTO_IPV6,
+		.hooknum	= NF_INET_LOCAL_OUT,
+		.priority	= NF_IP6_PRI_MANGLE,
+		.priv		= &nf_chain_route_output.chain,
+	},
+};
+
+static struct nft_table nf_table_route_ipv6 __read_mostly = {
+	.name	= "route",
+	.chains	= LIST_HEAD_INIT(nf_table_route_ipv6.chains),
+};
+
+static int __init nf_table_route_init(void)
+{
+	list_add_tail(&nf_chain_route_output.chain.list,
+		      &nf_table_route_ipv6.chains);
+	return nft_register_table(&nf_table_route_ipv6, NFPROTO_IPV6);
+}
+
+static void __exit nf_table_route_exit(void)
+{
+	nft_unregister_table(&nf_table_route_ipv6, NFPROTO_IPV6);
+}
+
+module_init(nf_table_route_init);
+module_exit(nf_table_route_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_TABLE(AF_INET6, "route");
diff --git a/net/ipv6/netfilter/nf_tables_ipv6.c b/net/ipv6/netfilter/nf_tables_ipv6.c
new file mode 100644
index 000000000000..e0717cea4913
--- /dev/null
+++ b/net/ipv6/netfilter/nf_tables_ipv6.c
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/ipv6.h>
+#include <linux/netfilter_ipv6.h>
+#include <net/netfilter/nf_tables.h>
+
+static unsigned int nft_ipv6_output(const struct nf_hook_ops *ops,
+				    struct sk_buff *skb,
+				    const struct net_device *in,
+				    const struct net_device *out,
+				    int (*okfn)(struct sk_buff *))
+{
+	if (unlikely(skb->len < sizeof(struct ipv6hdr))) {
+		if (net_ratelimit())
+			pr_info("nf_tables_ipv6: ignoring short SOCK_RAW "
+				"packet\n");
+		return NF_ACCEPT;
+	}
+
+	return nft_do_chain(ops, skb, in, out, okfn);
+}
+
+static struct nft_af_info nft_af_ipv6 __read_mostly = {
+	.family		= NFPROTO_IPV6,
+	.nhooks		= NF_INET_NUMHOOKS,
+	.owner		= THIS_MODULE,
+	.hooks		= {
+		[NF_INET_LOCAL_OUT]	= nft_ipv6_output,
+	},
+};
+
+static int __init nf_tables_ipv6_init(void)
+{
+	return nft_register_afinfo(&nft_af_ipv6);
+}
+
+static void __exit nf_tables_ipv6_exit(void)
+{
+	nft_unregister_afinfo(&nft_af_ipv6);
+}
+
+module_init(nf_tables_ipv6_init);
+module_exit(nf_tables_ipv6_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_FAMILY(AF_INET6);
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 6e839b6dff2b..c271e1af93b5 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -413,6 +413,43 @@ config NETFILTER_SYNPROXY
 
 endif # NF_CONNTRACK
 
+config NF_TABLES
+	depends on NETFILTER_NETLINK
+	tristate "Netfilter nf_tables support"
+
+config NFT_EXTHDR
+	depends on NF_TABLES
+	tristate "Netfilter nf_tables IPv6 exthdr module"
+
+config NFT_META
+	depends on NF_TABLES
+	tristate "Netfilter nf_tables meta module"
+
+config NFT_CT
+	depends on NF_TABLES
+	depends on NF_CONNTRACK
+	tristate "Netfilter nf_tables conntrack module"
+
+config NFT_SET
+	depends on NF_TABLES
+	tristate "Netfilter nf_tables set module"
+
+config NFT_HASH
+	depends on NF_TABLES
+	tristate "Netfilter nf_tables hash module"
+
+config NFT_COUNTER
+	depends on NF_TABLES
+	tristate "Netfilter nf_tables counter module"
+
+config NFT_LOG
+	depends on NF_TABLES
+	tristate "Netfilter nf_tables log module"
+
+config NFT_LIMIT
+	depends on NF_TABLES
+	tristate "Netfilter nf_tables limit module"
+
 config NETFILTER_XTABLES
 	tristate "Netfilter Xtables support (required for ip_tables)"
 	default m if NETFILTER_ADVANCED=n
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index c3a0a12907f6..1ca3f3932826 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -64,6 +64,22 @@ obj-$(CONFIG_NF_NAT_TFTP) += nf_nat_tftp.o
 # SYNPROXY
 obj-$(CONFIG_NETFILTER_SYNPROXY) += nf_synproxy_core.o
 
+# nf_tables
+nf_tables-objs += nf_tables_core.o nf_tables_api.o
+nf_tables-objs += nft_immediate.o nft_cmp.o nft_lookup.o
+nf_tables-objs += nft_bitwise.o nft_byteorder.o nft_payload.o
+
+obj-$(CONFIG_NF_TABLES)		+= nf_tables.o
+obj-$(CONFIG_NFT_EXTHDR)	+= nft_exthdr.o
+obj-$(CONFIG_NFT_META)		+= nft_meta.o
+obj-$(CONFIG_NFT_CT)		+= nft_ct.o
+obj-$(CONFIG_NFT_LIMIT)		+= nft_limit.o
+#nf_tables-objs			+= nft_meta_target.o
+obj-$(CONFIG_NFT_SET)		+= nft_set.o
+obj-$(CONFIG_NFT_HASH)		+= nft_hash.o
+obj-$(CONFIG_NFT_COUNTER)	+= nft_counter.o
+obj-$(CONFIG_NFT_LOG)		+= nft_log.o
+
 # generic X tables 
 obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o
 
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
new file mode 100644
index 000000000000..7d59c89c6c75
--- /dev/null
+++ b/net/netfilter/nf_tables_api.c
@@ -0,0 +1,1760 @@
+/*
+ * Copyright (c) 2007, 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/skbuff.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/sock.h>
+
+static LIST_HEAD(nf_tables_afinfo);
+static LIST_HEAD(nf_tables_expressions);
+
+/**
+ *	nft_register_afinfo - register nf_tables address family info
+ *
+ *	@afi: address family info to register
+ *
+ *	Register the address family for use with nf_tables. Returns zero on
+ *	success or a negative errno code otherwise.
+ */
+int nft_register_afinfo(struct nft_af_info *afi)
+{
+	INIT_LIST_HEAD(&afi->tables);
+	nfnl_lock(NFNL_SUBSYS_NFTABLES);
+	list_add_tail(&afi->list, &nf_tables_afinfo);
+	nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nft_register_afinfo);
+
+/**
+ *	nft_unregister_afinfo - unregister nf_tables address family info
+ *
+ *	@afi: address family info to unregister
+ *
+ *	Unregister the address family for use with nf_tables.
+ */
+void nft_unregister_afinfo(struct nft_af_info *afi)
+{
+	nfnl_lock(NFNL_SUBSYS_NFTABLES);
+	list_del(&afi->list);
+	nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+}
+EXPORT_SYMBOL_GPL(nft_unregister_afinfo);
+
+static struct nft_af_info *nft_afinfo_lookup(int family)
+{
+	struct nft_af_info *afi;
+
+	list_for_each_entry(afi, &nf_tables_afinfo, list) {
+		if (afi->family == family)
+			return afi;
+	}
+	return NULL;
+}
+
+static struct nft_af_info *nf_tables_afinfo_lookup(int family, bool autoload)
+{
+	struct nft_af_info *afi;
+
+	afi = nft_afinfo_lookup(family);
+	if (afi != NULL)
+		return afi;
+#ifdef CONFIG_MODULES
+	if (autoload) {
+		nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+		request_module("nft-afinfo-%u", family);
+		nfnl_lock(NFNL_SUBSYS_NFTABLES);
+		afi = nft_afinfo_lookup(family);
+		if (afi != NULL)
+			return ERR_PTR(-EAGAIN);
+	}
+#endif
+	return ERR_PTR(-EAFNOSUPPORT);
+}
+
+/*
+ * Tables
+ */
+
+static struct nft_table *nft_table_lookup(const struct nft_af_info *afi,
+					  const struct nlattr *nla)
+{
+	struct nft_table *table;
+
+	list_for_each_entry(table, &afi->tables, list) {
+		if (!nla_strcmp(nla, table->name))
+			return table;
+	}
+	return NULL;
+}
+
+static struct nft_table *nf_tables_table_lookup(const struct nft_af_info *afi,
+						const struct nlattr *nla,
+						bool autoload)
+{
+	struct nft_table *table;
+
+	if (nla == NULL)
+		return ERR_PTR(-EINVAL);
+
+	table = nft_table_lookup(afi, nla);
+	if (table != NULL)
+		return table;
+
+#ifdef CONFIG_MODULES
+	if (autoload) {
+		nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+		request_module("nft-table-%u-%*.s", afi->family,
+			       nla_len(nla)-1, (const char *)nla_data(nla));
+		nfnl_lock(NFNL_SUBSYS_NFTABLES);
+		if (nft_table_lookup(afi, nla))
+			return ERR_PTR(-EAGAIN);
+	}
+#endif
+	return ERR_PTR(-ENOENT);
+}
+
+static inline u64 nf_tables_alloc_handle(struct nft_table *table)
+{
+	return ++table->hgenerator;
+}
+
+static const struct nla_policy nft_table_policy[NFTA_TABLE_MAX + 1] = {
+	[NFTA_TABLE_NAME]	= { .type = NLA_STRING },
+};
+
+static int nf_tables_fill_table_info(struct sk_buff *skb, u32 portid, u32 seq,
+				     int event, u32 flags, int family,
+				     const struct nft_table *table)
+{
+	struct nlmsghdr *nlh;
+	struct nfgenmsg *nfmsg;
+
+	event |= NFNL_SUBSYS_NFTABLES << 8;
+	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags);
+	if (nlh == NULL)
+		goto nla_put_failure;
+
+	nfmsg = nlmsg_data(nlh);
+	nfmsg->nfgen_family	= family;
+	nfmsg->version		= NFNETLINK_V0;
+	nfmsg->res_id		= 0;
+
+	if (nla_put_string(skb, NFTA_TABLE_NAME, table->name))
+		goto nla_put_failure;
+
+	return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+	nlmsg_trim(skb, nlh);
+	return -1;
+}
+
+static int nf_tables_table_notify(const struct sk_buff *oskb,
+				  const struct nlmsghdr *nlh,
+				  const struct nft_table *table,
+				  int event, int family)
+{
+	struct sk_buff *skb;
+	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
+	u32 seq = nlh ? nlh->nlmsg_seq : 0;
+	struct net *net = oskb ? sock_net(oskb->sk) : &init_net;
+	bool report;
+	int err;
+
+	report = nlh ? nlmsg_report(nlh) : false;
+	if (!report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
+		return 0;
+
+	err = -ENOBUFS;
+	skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (skb == NULL)
+		goto err;
+
+	err = nf_tables_fill_table_info(skb, portid, seq, event, 0,
+					family, table);
+	if (err < 0) {
+		kfree_skb(skb);
+		goto err;
+	}
+
+	err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report,
+			     GFP_KERNEL);
+err:
+	if (err < 0)
+		nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err);
+	return err;
+}
+
+static int nf_tables_dump_tables(struct sk_buff *skb,
+				 struct netlink_callback *cb)
+{
+	const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
+	const struct nft_af_info *afi;
+	const struct nft_table *table;
+	unsigned int idx = 0, s_idx = cb->args[0];
+	int family = nfmsg->nfgen_family;
+
+	list_for_each_entry(afi, &nf_tables_afinfo, list) {
+		if (family != NFPROTO_UNSPEC && family != afi->family)
+			continue;
+
+		list_for_each_entry(table, &afi->tables, list) {
+			if (idx < s_idx)
+				goto cont;
+			if (idx > s_idx)
+				memset(&cb->args[1], 0,
+				       sizeof(cb->args) - sizeof(cb->args[0]));
+			if (nf_tables_fill_table_info(skb,
+						      NETLINK_CB(cb->skb).portid,
+						      cb->nlh->nlmsg_seq,
+						      NFT_MSG_NEWTABLE,
+						      NLM_F_MULTI,
+						      afi->family, table) < 0)
+				goto done;
+cont:
+			idx++;
+		}
+	}
+done:
+	cb->args[0] = idx;
+	return skb->len;
+}
+
+static int nf_tables_gettable(struct sock *nlsk, struct sk_buff *skb,
+			      const struct nlmsghdr *nlh,
+			      const struct nlattr * const nla[])
+{
+	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+	const struct nft_af_info *afi;
+	const struct nft_table *table;
+	struct sk_buff *skb2;
+	int family = nfmsg->nfgen_family;
+	int err;
+
+	if (nlh->nlmsg_flags & NLM_F_DUMP) {
+		struct netlink_dump_control c = {
+			.dump = nf_tables_dump_tables,
+		};
+		return netlink_dump_start(nlsk, skb, nlh, &c);
+	}
+
+	afi = nf_tables_afinfo_lookup(family, false);
+	if (IS_ERR(afi))
+		return PTR_ERR(afi);
+
+	table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME], false);
+	if (IS_ERR(table))
+		return PTR_ERR(table);
+
+	skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!skb2)
+		return -ENOMEM;
+
+	err = nf_tables_fill_table_info(skb2, NETLINK_CB(skb).portid,
+					nlh->nlmsg_seq, NFT_MSG_NEWTABLE, 0,
+					family, table);
+	if (err < 0)
+		goto err;
+
+	return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid);
+
+err:
+	kfree_skb(skb2);
+	return err;
+}
+
+static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb,
+			      const struct nlmsghdr *nlh,
+			      const struct nlattr * const nla[])
+{
+	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+	const struct nlattr *name;
+	struct nft_af_info *afi;
+	struct nft_table *table;
+	int family = nfmsg->nfgen_family;
+
+	afi = nf_tables_afinfo_lookup(family, true);
+	if (IS_ERR(afi))
+		return PTR_ERR(afi);
+
+	name = nla[NFTA_TABLE_NAME];
+	table = nf_tables_table_lookup(afi, name, false);
+	if (IS_ERR(table)) {
+		if (PTR_ERR(table) != -ENOENT)
+			return PTR_ERR(table);
+		table = NULL;
+	}
+
+	if (table != NULL) {
+		if (nlh->nlmsg_flags & NLM_F_EXCL)
+			return -EEXIST;
+		if (nlh->nlmsg_flags & NLM_F_REPLACE)
+			return -EOPNOTSUPP;
+		return 0;
+	}
+
+	table = kzalloc(sizeof(*table) + nla_len(name), GFP_KERNEL);
+	if (table == NULL)
+		return -ENOMEM;
+
+	nla_strlcpy(table->name, name, nla_len(name));
+	INIT_LIST_HEAD(&table->chains);
+
+	list_add_tail(&table->list, &afi->tables);
+	nf_tables_table_notify(skb, nlh, table, NFT_MSG_NEWTABLE, family);
+	return 0;
+}
+
+static int nf_tables_deltable(struct sock *nlsk, struct sk_buff *skb,
+			      const struct nlmsghdr *nlh,
+			      const struct nlattr * const nla[])
+{
+	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+	struct nft_af_info *afi;
+	struct nft_table *table;
+	int family = nfmsg->nfgen_family;
+
+	afi = nf_tables_afinfo_lookup(family, false);
+	if (IS_ERR(afi))
+		return PTR_ERR(afi);
+
+	table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME], false);
+	if (IS_ERR(table))
+		return PTR_ERR(table);
+
+	if (table->flags & NFT_TABLE_BUILTIN)
+		return -EOPNOTSUPP;
+
+	if (table->use)
+		return -EBUSY;
+
+	list_del(&table->list);
+	nf_tables_table_notify(skb, nlh, table, NFT_MSG_DELTABLE, family);
+	kfree(table);
+	return 0;
+}
+
+static struct nft_table *__nf_tables_table_lookup(const struct nft_af_info *afi,
+						  const char *name)
+{
+	struct nft_table *table;
+
+	list_for_each_entry(table, &afi->tables, list) {
+		if (!strcmp(name, table->name))
+			return table;
+	}
+
+	return ERR_PTR(-ENOENT);
+}
+
+static int nf_tables_chain_notify(const struct sk_buff *oskb,
+				  const struct nlmsghdr *nlh,
+				  const struct nft_table *table,
+				  const struct nft_chain *chain,
+				  int event, int family);
+
+/**
+ *	nft_register_table - register a built-in table
+ *
+ *	@table: the table to register
+ *	@family: protocol family to register table with
+ *
+ *	Register a built-in table for use with nf_tables. Returns zero on
+ *	success or a negative errno code otherwise.
+ */
+int nft_register_table(struct nft_table *table, int family)
+{
+	struct nft_af_info *afi;
+	struct nft_table *t;
+	struct nft_chain *chain;
+	int err;
+
+	nfnl_lock(NFNL_SUBSYS_NFTABLES);
+again:
+	afi = nf_tables_afinfo_lookup(family, true);
+	if (IS_ERR(afi)) {
+		err = PTR_ERR(afi);
+		if (err == -EAGAIN)
+			goto again;
+		goto err;
+	}
+
+	t = __nf_tables_table_lookup(afi, table->name);
+	if (IS_ERR(t)) {
+		err = PTR_ERR(t);
+		if (err != -ENOENT)
+			goto err;
+		t = NULL;
+	}
+
+	if (t != NULL) {
+		err = -EEXIST;
+		goto err;
+	}
+
+	table->flags |= NFT_TABLE_BUILTIN;
+	list_add_tail(&table->list, &afi->tables);
+	nf_tables_table_notify(NULL, NULL, table, NFT_MSG_NEWTABLE, family);
+	list_for_each_entry(chain, &table->chains, list)
+		nf_tables_chain_notify(NULL, NULL, table, chain,
+				       NFT_MSG_NEWCHAIN, family);
+	err = 0;
+err:
+	nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+	return err;
+}
+EXPORT_SYMBOL_GPL(nft_register_table);
+
+/**
+ *	nft_unregister_table - unregister a built-in table
+ *
+ *	@table: the table to unregister
+ *	@family: protocol family to unregister table with
+ *
+ *	Unregister a built-in table for use with nf_tables.
+ */
+void nft_unregister_table(struct nft_table *table, int family)
+{
+	struct nft_chain *chain;
+
+	nfnl_lock(NFNL_SUBSYS_NFTABLES);
+	list_del(&table->list);
+	list_for_each_entry(chain, &table->chains, list)
+		nf_tables_chain_notify(NULL, NULL, table, chain,
+				       NFT_MSG_DELCHAIN, family);
+	nf_tables_table_notify(NULL, NULL, table, NFT_MSG_DELTABLE, family);
+	nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+}
+EXPORT_SYMBOL_GPL(nft_unregister_table);
+
+/*
+ * Chains
+ */
+
+static struct nft_chain *
+nf_tables_chain_lookup_byhandle(const struct nft_table *table, u64 handle)
+{
+	struct nft_chain *chain;
+
+	list_for_each_entry(chain, &table->chains, list) {
+		if (chain->handle == handle)
+			return chain;
+	}
+
+	return ERR_PTR(-ENOENT);
+}
+
+static struct nft_chain *nf_tables_chain_lookup(const struct nft_table *table,
+						const struct nlattr *nla)
+{
+	struct nft_chain *chain;
+
+	if (nla == NULL)
+		return ERR_PTR(-EINVAL);
+
+	list_for_each_entry(chain, &table->chains, list) {
+		if (!nla_strcmp(nla, chain->name))
+			return chain;
+	}
+
+	return ERR_PTR(-ENOENT);
+}
+
+static const struct nla_policy nft_chain_policy[NFTA_CHAIN_MAX + 1] = {
+	[NFTA_CHAIN_TABLE]	= { .type = NLA_STRING },
+	[NFTA_CHAIN_HANDLE]	= { .type = NLA_U64 },
+	[NFTA_CHAIN_NAME]	= { .type = NLA_STRING,
+				    .len = NFT_CHAIN_MAXNAMELEN - 1 },
+	[NFTA_CHAIN_HOOK]	= { .type = NLA_NESTED },
+};
+
+static const struct nla_policy nft_hook_policy[NFTA_HOOK_MAX + 1] = {
+	[NFTA_HOOK_HOOKNUM]	= { .type = NLA_U32 },
+	[NFTA_HOOK_PRIORITY]	= { .type = NLA_U32 },
+};
+
+static int nf_tables_fill_chain_info(struct sk_buff *skb, u32 portid, u32 seq,
+				     int event, u32 flags, int family,
+				     const struct nft_table *table,
+				     const struct nft_chain *chain)
+{
+	struct nlmsghdr *nlh;
+	struct nfgenmsg *nfmsg;
+
+	event |= NFNL_SUBSYS_NFTABLES << 8;
+	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags);
+	if (nlh == NULL)
+		goto nla_put_failure;
+
+	nfmsg = nlmsg_data(nlh);
+	nfmsg->nfgen_family	= family;
+	nfmsg->version		= NFNETLINK_V0;
+	nfmsg->res_id		= 0;
+
+	if (nla_put_string(skb, NFTA_CHAIN_TABLE, table->name))
+		goto nla_put_failure;
+	if (nla_put_be64(skb, NFTA_CHAIN_HANDLE, cpu_to_be64(chain->handle)))
+		goto nla_put_failure;
+	if (nla_put_string(skb, NFTA_CHAIN_NAME, chain->name))
+		goto nla_put_failure;
+
+	if (chain->flags & NFT_BASE_CHAIN) {
+		const struct nf_hook_ops *ops = &nft_base_chain(chain)->ops;
+		struct nlattr *nest = nla_nest_start(skb, NFTA_CHAIN_HOOK);
+		if (nest == NULL)
+			goto nla_put_failure;
+		if (nla_put_be32(skb, NFTA_HOOK_HOOKNUM, htonl(ops->hooknum)))
+			goto nla_put_failure;
+		if (nla_put_be32(skb, NFTA_HOOK_PRIORITY, htonl(ops->priority)))
+			goto nla_put_failure;
+		nla_nest_end(skb, nest);
+	}
+
+	return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+	nlmsg_trim(skb, nlh);
+	return -1;
+}
+
+static int nf_tables_chain_notify(const struct sk_buff *oskb,
+				  const struct nlmsghdr *nlh,
+				  const struct nft_table *table,
+				  const struct nft_chain *chain,
+				  int event, int family)
+{
+	struct sk_buff *skb;
+	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
+	struct net *net = oskb ? sock_net(oskb->sk) : &init_net;
+	u32 seq = nlh ? nlh->nlmsg_seq : 0;
+	bool report;
+	int err;
+
+	report = nlh ? nlmsg_report(nlh) : false;
+	if (!report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
+		return 0;
+
+	err = -ENOBUFS;
+	skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (skb == NULL)
+		goto err;
+
+	err = nf_tables_fill_chain_info(skb, portid, seq, event, 0, family,
+					table, chain);
+	if (err < 0) {
+		kfree_skb(skb);
+		goto err;
+	}
+
+	err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report,
+			     GFP_KERNEL);
+err:
+	if (err < 0)
+		nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err);
+	return err;
+}
+
+static int nf_tables_dump_chains(struct sk_buff *skb,
+				 struct netlink_callback *cb)
+{
+	const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
+	const struct nft_af_info *afi;
+	const struct nft_table *table;
+	const struct nft_chain *chain;
+	unsigned int idx = 0, s_idx = cb->args[0];
+	int family = nfmsg->nfgen_family;
+
+	list_for_each_entry(afi, &nf_tables_afinfo, list) {
+		if (family != NFPROTO_UNSPEC && family != afi->family)
+			continue;
+
+		list_for_each_entry(table, &afi->tables, list) {
+			list_for_each_entry(chain, &table->chains, list) {
+				if (idx < s_idx)
+					goto cont;
+				if (idx > s_idx)
+					memset(&cb->args[1], 0,
+					       sizeof(cb->args) - sizeof(cb->args[0]));
+				if (nf_tables_fill_chain_info(skb, NETLINK_CB(cb->skb).portid,
+							      cb->nlh->nlmsg_seq,
+							      NFT_MSG_NEWCHAIN,
+							      NLM_F_MULTI,
+							      afi->family, table, chain) < 0)
+					goto done;
+cont:
+				idx++;
+			}
+		}
+	}
+done:
+	cb->args[0] = idx;
+	return skb->len;
+}
+
+
+static int nf_tables_getchain(struct sock *nlsk, struct sk_buff *skb,
+			      const struct nlmsghdr *nlh,
+			      const struct nlattr * const nla[])
+{
+	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+	const struct nft_af_info *afi;
+	const struct nft_table *table;
+	const struct nft_chain *chain;
+	struct sk_buff *skb2;
+	int family = nfmsg->nfgen_family;
+	int err;
+
+	if (nlh->nlmsg_flags & NLM_F_DUMP) {
+		struct netlink_dump_control c = {
+			.dump = nf_tables_dump_chains,
+		};
+		return netlink_dump_start(nlsk, skb, nlh, &c);
+	}
+
+	afi = nf_tables_afinfo_lookup(family, false);
+	if (IS_ERR(afi))
+		return PTR_ERR(afi);
+
+	table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE], false);
+	if (IS_ERR(table))
+		return PTR_ERR(table);
+
+	chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME]);
+	if (IS_ERR(chain))
+		return PTR_ERR(chain);
+
+	skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!skb2)
+		return -ENOMEM;
+
+	err = nf_tables_fill_chain_info(skb2, NETLINK_CB(skb).portid,
+					nlh->nlmsg_seq, NFT_MSG_NEWCHAIN, 0,
+					family, table, chain);
+	if (err < 0)
+		goto err;
+
+	return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid);
+
+err:
+	kfree_skb(skb2);
+	return err;
+}
+
+static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
+			      const struct nlmsghdr *nlh,
+			      const struct nlattr * const nla[])
+{
+	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+	const struct nlattr * uninitialized_var(name);
+	const struct nft_af_info *afi;
+	struct nft_table *table;
+	struct nft_chain *chain;
+	struct nft_base_chain *basechain;
+	struct nlattr *ha[NFTA_HOOK_MAX + 1];
+	int family = nfmsg->nfgen_family;
+	u64 handle = 0;
+	int err;
+	bool create;
+
+	create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false;
+
+	afi = nf_tables_afinfo_lookup(family, true);
+	if (IS_ERR(afi))
+		return PTR_ERR(afi);
+
+	table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE], create);
+	if (IS_ERR(table))
+		return PTR_ERR(table);
+
+	if (table->use == UINT_MAX)
+		return -EOVERFLOW;
+
+	chain = NULL;
+	name = nla[NFTA_CHAIN_NAME];
+
+	if (nla[NFTA_CHAIN_HANDLE]) {
+		handle = be64_to_cpu(nla_get_be64(nla[NFTA_CHAIN_HANDLE]));
+		chain = nf_tables_chain_lookup_byhandle(table, handle);
+		if (IS_ERR(chain))
+			return PTR_ERR(chain);
+	} else {
+		chain = nf_tables_chain_lookup(table, name);
+		if (IS_ERR(chain)) {
+			if (PTR_ERR(chain) != -ENOENT)
+				return PTR_ERR(chain);
+			chain = NULL;
+		}
+	}
+
+	if (chain != NULL) {
+		if (nlh->nlmsg_flags & NLM_F_EXCL)
+			return -EEXIST;
+		if (nlh->nlmsg_flags & NLM_F_REPLACE)
+			return -EOPNOTSUPP;
+
+		if (nla[NFTA_CHAIN_HANDLE] && name &&
+		    !IS_ERR(nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME])))
+			return -EEXIST;
+
+		if (nla[NFTA_CHAIN_HANDLE] && name)
+			nla_strlcpy(chain->name, name, NFT_CHAIN_MAXNAMELEN);
+
+		goto notify;
+	}
+
+	if (nla[NFTA_CHAIN_HOOK]) {
+		struct nf_hook_ops *ops;
+
+		err = nla_parse_nested(ha, NFTA_HOOK_MAX, nla[NFTA_CHAIN_HOOK],
+				       nft_hook_policy);
+		if (err < 0)
+			return err;
+		if (ha[NFTA_HOOK_HOOKNUM] == NULL ||
+		    ha[NFTA_HOOK_PRIORITY] == NULL)
+			return -EINVAL;
+		if (ntohl(nla_get_be32(ha[NFTA_HOOK_HOOKNUM])) >= afi->nhooks)
+			return -EINVAL;
+
+		basechain = kzalloc(sizeof(*basechain), GFP_KERNEL);
+		if (basechain == NULL)
+			return -ENOMEM;
+		chain = &basechain->chain;
+
+		ops = &basechain->ops;
+		ops->pf		= family;
+		ops->owner	= afi->owner;
+		ops->hooknum	= ntohl(nla_get_be32(ha[NFTA_HOOK_HOOKNUM]));
+		ops->priority	= ntohl(nla_get_be32(ha[NFTA_HOOK_PRIORITY]));
+		ops->priv	= chain;
+		ops->hook	= nft_do_chain;
+		if (afi->hooks[ops->hooknum])
+			ops->hook = afi->hooks[ops->hooknum];
+
+		chain->flags |= NFT_BASE_CHAIN;
+	} else {
+		chain = kzalloc(sizeof(*chain), GFP_KERNEL);
+		if (chain == NULL)
+			return -ENOMEM;
+	}
+
+	INIT_LIST_HEAD(&chain->rules);
+	chain->handle = nf_tables_alloc_handle(table);
+	nla_strlcpy(chain->name, name, NFT_CHAIN_MAXNAMELEN);
+
+	list_add_tail(&chain->list, &table->chains);
+	table->use++;
+notify:
+	nf_tables_chain_notify(skb, nlh, table, chain, NFT_MSG_NEWCHAIN,
+			       family);
+	return 0;
+}
+
+static void nf_tables_rcu_chain_destroy(struct rcu_head *head)
+{
+	struct nft_chain *chain = container_of(head, struct nft_chain, rcu_head);
+
+	BUG_ON(chain->use > 0);
+
+	if (chain->flags & NFT_BASE_CHAIN)
+		kfree(nft_base_chain(chain));
+	else
+		kfree(chain);
+}
+
+static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb,
+			      const struct nlmsghdr *nlh,
+			      const struct nlattr * const nla[])
+{
+	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+	const struct nft_af_info *afi;
+	struct nft_table *table;
+	struct nft_chain *chain;
+	int family = nfmsg->nfgen_family;
+
+	afi = nf_tables_afinfo_lookup(family, false);
+	if (IS_ERR(afi))
+		return PTR_ERR(afi);
+
+	table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE], false);
+	if (IS_ERR(table))
+		return PTR_ERR(table);
+
+	chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME]);
+	if (IS_ERR(chain))
+		return PTR_ERR(chain);
+
+	if (chain->flags & NFT_CHAIN_BUILTIN)
+		return -EOPNOTSUPP;
+
+	if (!list_empty(&chain->rules))
+		return -EBUSY;
+
+	list_del(&chain->list);
+	table->use--;
+
+	if (chain->flags & NFT_BASE_CHAIN)
+		nf_unregister_hook(&nft_base_chain(chain)->ops);
+
+	nf_tables_chain_notify(skb, nlh, table, chain, NFT_MSG_DELCHAIN,
+			       family);
+
+	/* Make sure all rule references are gone before this is released */
+	call_rcu(&chain->rcu_head, nf_tables_rcu_chain_destroy);
+	return 0;
+}
+
+static void nft_ctx_init(struct nft_ctx *ctx,
+			 const struct nft_af_info *afi,
+			 const struct nft_table *table,
+			 const struct nft_chain *chain)
+{
+	ctx->afi   = afi;
+	ctx->table = table;
+	ctx->chain = chain;
+}
+
+/*
+ * Expressions
+ */
+
+/**
+ *	nft_register_expr - register nf_tables expr operations
+ *	@ops: expr operations
+ *
+ *	Registers the expr operations for use with nf_tables. Returns zero on
+ *	success or a negative errno code otherwise.
+ */
+int nft_register_expr(struct nft_expr_ops *ops)
+{
+	nfnl_lock(NFNL_SUBSYS_NFTABLES);
+	list_add_tail(&ops->list, &nf_tables_expressions);
+	nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nft_register_expr);
+
+/**
+ *	nft_unregister_expr - unregister nf_tables expr operations
+ *	@ops: expr operations
+ *
+ * 	Unregisters the expr operations for use with nf_tables.
+ */
+void nft_unregister_expr(struct nft_expr_ops *ops)
+{
+	nfnl_lock(NFNL_SUBSYS_NFTABLES);
+	list_del(&ops->list);
+	nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+}
+EXPORT_SYMBOL_GPL(nft_unregister_expr);
+
+static const struct nft_expr_ops *__nft_expr_ops_get(struct nlattr *nla)
+{
+	const struct nft_expr_ops *ops;
+
+	list_for_each_entry(ops, &nf_tables_expressions, list) {
+		if (!nla_strcmp(nla, ops->name))
+			return ops;
+	}
+	return NULL;
+}
+
+static const struct nft_expr_ops *nft_expr_ops_get(struct nlattr *nla)
+{
+	const struct nft_expr_ops *ops;
+
+	if (nla == NULL)
+		return ERR_PTR(-EINVAL);
+
+	ops = __nft_expr_ops_get(nla);
+	if (ops != NULL && try_module_get(ops->owner))
+		return ops;
+
+#ifdef CONFIG_MODULES
+	if (ops == NULL) {
+		nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+		request_module("nft-expr-%.*s",
+			       nla_len(nla), (char *)nla_data(nla));
+		nfnl_lock(NFNL_SUBSYS_NFTABLES);
+		if (__nft_expr_ops_get(nla))
+			return ERR_PTR(-EAGAIN);
+	}
+#endif
+	return ERR_PTR(-ENOENT);
+}
+
+static const struct nla_policy nft_expr_policy[NFTA_EXPR_MAX + 1] = {
+	[NFTA_EXPR_NAME]	= { .type = NLA_STRING },
+	[NFTA_EXPR_DATA]	= { .type = NLA_NESTED },
+};
+
+static int nf_tables_fill_expr_info(struct sk_buff *skb,
+				    const struct nft_expr *expr)
+{
+	if (nla_put_string(skb, NFTA_EXPR_NAME, expr->ops->name))
+		goto nla_put_failure;
+
+	if (expr->ops->dump) {
+		struct nlattr *data = nla_nest_start(skb, NFTA_EXPR_DATA);
+		if (data == NULL)
+			goto nla_put_failure;
+		if (expr->ops->dump(skb, expr) < 0)
+			goto nla_put_failure;
+		nla_nest_end(skb, data);
+	}
+
+	return skb->len;
+
+nla_put_failure:
+	return -1;
+};
+
+struct nft_expr_info {
+	const struct nft_expr_ops	*ops;
+	struct nlattr			*tb[NFTA_EXPR_MAX + 1];
+};
+
+static int nf_tables_expr_parse(const struct nlattr *nla,
+				struct nft_expr_info *info)
+{
+	const struct nft_expr_ops *ops;
+	int err;
+
+	err = nla_parse_nested(info->tb, NFTA_EXPR_MAX, nla, nft_expr_policy);
+	if (err < 0)
+		return err;
+
+	ops = nft_expr_ops_get(info->tb[NFTA_EXPR_NAME]);
+	if (IS_ERR(ops))
+		return PTR_ERR(ops);
+	info->ops = ops;
+	return 0;
+}
+
+static int nf_tables_newexpr(const struct nft_ctx *ctx,
+			     struct nft_expr_info *info,
+			     struct nft_expr *expr)
+{
+	const struct nft_expr_ops *ops = info->ops;
+	int err;
+
+	expr->ops = ops;
+	if (ops->init) {
+		struct nlattr *ma[ops->maxattr + 1];
+
+		if (info->tb[NFTA_EXPR_DATA]) {
+			err = nla_parse_nested(ma, ops->maxattr,
+					       info->tb[NFTA_EXPR_DATA],
+					       ops->policy);
+			if (err < 0)
+				goto err1;
+		} else
+			memset(ma, 0, sizeof(ma[0]) * (ops->maxattr + 1));
+
+		err = ops->init(ctx, expr, (const struct nlattr **)ma);
+		if (err < 0)
+			goto err1;
+	}
+
+	info->ops = NULL;
+	return 0;
+
+err1:
+	expr->ops = NULL;
+	return err;
+}
+
+static void nf_tables_expr_destroy(struct nft_expr *expr)
+{
+	if (expr->ops->destroy)
+		expr->ops->destroy(expr);
+	module_put(expr->ops->owner);
+}
+
+/*
+ * Rules
+ */
+
+static struct nft_rule *__nf_tables_rule_lookup(const struct nft_chain *chain,
+						u64 handle)
+{
+	struct nft_rule *rule;
+
+	// FIXME: this sucks
+	list_for_each_entry(rule, &chain->rules, list) {
+		if (handle == rule->handle)
+			return rule;
+	}
+
+	return ERR_PTR(-ENOENT);
+}
+
+static struct nft_rule *nf_tables_rule_lookup(const struct nft_chain *chain,
+					      const struct nlattr *nla)
+{
+	if (nla == NULL)
+		return ERR_PTR(-EINVAL);
+
+	return __nf_tables_rule_lookup(chain, be64_to_cpu(nla_get_be64(nla)));
+}
+
+static const struct nla_policy nft_rule_policy[NFTA_RULE_MAX + 1] = {
+	[NFTA_RULE_TABLE]	= { .type = NLA_STRING },
+	[NFTA_RULE_CHAIN]	= { .type = NLA_STRING,
+				    .len = NFT_CHAIN_MAXNAMELEN - 1 },
+	[NFTA_RULE_HANDLE]	= { .type = NLA_U64 },
+	[NFTA_RULE_EXPRESSIONS]	= { .type = NLA_NESTED },
+};
+
+static int nf_tables_fill_rule_info(struct sk_buff *skb, u32 portid, u32 seq,
+				    int event, u32 flags, int family,
+				    const struct nft_table *table,
+				    const struct nft_chain *chain,
+				    const struct nft_rule *rule)
+{
+	struct nlmsghdr *nlh;
+	struct nfgenmsg *nfmsg;
+	const struct nft_expr *expr, *next;
+	struct nlattr *list;
+
+	event |= NFNL_SUBSYS_NFTABLES << 8;
+	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg),
+			flags);
+	if (nlh == NULL)
+		goto nla_put_failure;
+
+	nfmsg = nlmsg_data(nlh);
+	nfmsg->nfgen_family	= family;
+	nfmsg->version		= NFNETLINK_V0;
+	nfmsg->res_id		= 0;
+
+	if (nla_put_string(skb, NFTA_RULE_TABLE, table->name))
+		goto nla_put_failure;
+	if (nla_put_string(skb, NFTA_RULE_CHAIN, chain->name))
+		goto nla_put_failure;
+	if (nla_put_be64(skb, NFTA_RULE_HANDLE, cpu_to_be64(rule->handle)))
+		goto nla_put_failure;
+
+	list = nla_nest_start(skb, NFTA_RULE_EXPRESSIONS);
+	if (list == NULL)
+		goto nla_put_failure;
+	nft_rule_for_each_expr(expr, next, rule) {
+		struct nlattr *elem = nla_nest_start(skb, NFTA_LIST_ELEM);
+		if (elem == NULL)
+			goto nla_put_failure;
+		if (nf_tables_fill_expr_info(skb, expr) < 0)
+			goto nla_put_failure;
+		nla_nest_end(skb, elem);
+	}
+	nla_nest_end(skb, list);
+
+	return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+	nlmsg_trim(skb, nlh);
+	return -1;
+}
+
+static int nf_tables_rule_notify(const struct sk_buff *oskb,
+				 const struct nlmsghdr *nlh,
+				 const struct nft_table *table,
+				 const struct nft_chain *chain,
+				 const struct nft_rule *rule,
+				 int event, u32 flags, int family)
+{
+	struct sk_buff *skb;
+	u32 portid = NETLINK_CB(oskb).portid;
+	struct net *net = oskb ? sock_net(oskb->sk) : &init_net;
+	u32 seq = nlh->nlmsg_seq;
+	bool report;
+	int err;
+
+	report = nlmsg_report(nlh);
+	if (!report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
+		return 0;
+
+	err = -ENOBUFS;
+	skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (skb == NULL)
+		goto err;
+
+	err = nf_tables_fill_rule_info(skb, portid, seq, event, flags,
+				       family, table, chain, rule);
+	if (err < 0) {
+		kfree_skb(skb);
+		goto err;
+	}
+
+	err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report,
+			     GFP_KERNEL);
+err:
+	if (err < 0)
+		nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err);
+	return err;
+}
+
+static int nf_tables_dump_rules(struct sk_buff *skb,
+				struct netlink_callback *cb)
+{
+	const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
+	const struct nft_af_info *afi;
+	const struct nft_table *table;
+	const struct nft_chain *chain;
+	const struct nft_rule *rule;
+	unsigned int idx = 0, s_idx = cb->args[0];
+	int family = nfmsg->nfgen_family;
+
+	list_for_each_entry(afi, &nf_tables_afinfo, list) {
+		if (family != NFPROTO_UNSPEC && family != afi->family)
+			continue;
+
+		list_for_each_entry(table, &afi->tables, list) {
+			list_for_each_entry(chain, &table->chains, list) {
+				list_for_each_entry(rule, &chain->rules, list) {
+					if (idx < s_idx)
+						goto cont;
+					if (idx > s_idx)
+						memset(&cb->args[1], 0,
+						       sizeof(cb->args) - sizeof(cb->args[0]));
+					if (nf_tables_fill_rule_info(skb, NETLINK_CB(cb->skb).portid,
+								      cb->nlh->nlmsg_seq,
+								      NFT_MSG_NEWRULE,
+								      NLM_F_MULTI | NLM_F_APPEND,
+								      afi->family, table, chain, rule) < 0)
+						goto done;
+cont:
+					idx++;
+				}
+			}
+		}
+	}
+done:
+	cb->args[0] = idx;
+	return skb->len;
+}
+
+static int nf_tables_getrule(struct sock *nlsk, struct sk_buff *skb,
+			     const struct nlmsghdr *nlh,
+			     const struct nlattr * const nla[])
+{
+	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+	const struct nft_af_info *afi;
+	const struct nft_table *table;
+	const struct nft_chain *chain;
+	const struct nft_rule *rule;
+	struct sk_buff *skb2;
+	int family = nfmsg->nfgen_family;
+	int err;
+
+	if (nlh->nlmsg_flags & NLM_F_DUMP) {
+		struct netlink_dump_control c = {
+			.dump = nf_tables_dump_rules,
+		};
+		return netlink_dump_start(nlsk, skb, nlh, &c);
+	}
+
+	afi = nf_tables_afinfo_lookup(family, false);
+	if (IS_ERR(afi))
+		return PTR_ERR(afi);
+
+	table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE], false);
+	if (IS_ERR(table))
+		return PTR_ERR(table);
+
+	chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]);
+	if (IS_ERR(chain))
+		return PTR_ERR(chain);
+
+	rule = nf_tables_rule_lookup(chain, nla[NFTA_RULE_HANDLE]);
+	if (IS_ERR(rule))
+		return PTR_ERR(rule);
+
+	skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!skb2)
+		return -ENOMEM;
+
+	err = nf_tables_fill_rule_info(skb2, NETLINK_CB(skb).portid,
+				       nlh->nlmsg_seq, NFT_MSG_NEWRULE, 0,
+				       family, table, chain, rule);
+	if (err < 0)
+		goto err;
+
+	return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid);
+
+err:
+	kfree_skb(skb2);
+	return err;
+}
+
+static void nf_tables_rcu_rule_destroy(struct rcu_head *head)
+{
+	struct nft_rule *rule = container_of(head, struct nft_rule, rcu_head);
+	struct nft_expr *expr;
+
+	/*
+	 * Careful: some expressions might not be initialized in case this
+	 * is called on error from nf_tables_newrule().
+	 */
+	expr = nft_expr_first(rule);
+	while (expr->ops && expr != nft_expr_last(rule)) {
+		nf_tables_expr_destroy(expr);
+		expr = nft_expr_next(expr);
+	}
+	kfree(rule);
+}
+
+static void nf_tables_rule_destroy(struct nft_rule *rule)
+{
+	call_rcu(&rule->rcu_head, nf_tables_rcu_rule_destroy);
+}
+
+#define NFT_RULE_MAXEXPRS	128
+
+static struct nft_expr_info *info;
+
+static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
+			     const struct nlmsghdr *nlh,
+			     const struct nlattr * const nla[])
+{
+	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+	const struct nft_af_info *afi;
+	struct nft_table *table;
+	struct nft_chain *chain;
+	struct nft_rule *rule, *old_rule = NULL;
+	struct nft_expr *expr;
+	struct nft_ctx ctx;
+	struct nlattr *tmp;
+	unsigned int size, i, n;
+	int err, rem;
+	bool create;
+	u64 handle;
+
+	create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false;
+
+	afi = nf_tables_afinfo_lookup(nfmsg->nfgen_family, create);
+	if (IS_ERR(afi))
+		return PTR_ERR(afi);
+
+	table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE], create);
+	if (IS_ERR(table))
+		return PTR_ERR(table);
+
+	chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]);
+	if (IS_ERR(chain))
+		return PTR_ERR(chain);
+
+	if (nla[NFTA_RULE_HANDLE]) {
+		handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_HANDLE]));
+		rule = __nf_tables_rule_lookup(chain, handle);
+		if (IS_ERR(rule))
+			return PTR_ERR(rule);
+
+		if (nlh->nlmsg_flags & NLM_F_EXCL)
+			return -EEXIST;
+		if (nlh->nlmsg_flags & NLM_F_REPLACE)
+			old_rule = rule;
+		else
+			return -EOPNOTSUPP;
+	} else {
+		if (!create || nlh->nlmsg_flags & NLM_F_REPLACE)
+			return -EINVAL;
+		handle = nf_tables_alloc_handle(table);
+	}
+
+	n = 0;
+	size = 0;
+	if (nla[NFTA_RULE_EXPRESSIONS]) {
+		nla_for_each_nested(tmp, nla[NFTA_RULE_EXPRESSIONS], rem) {
+			err = -EINVAL;
+			if (nla_type(tmp) != NFTA_LIST_ELEM)
+				goto err1;
+			if (n == NFT_RULE_MAXEXPRS)
+				goto err1;
+			err = nf_tables_expr_parse(tmp, &info[n]);
+			if (err < 0)
+				goto err1;
+			size += info[n].ops->size;
+			n++;
+		}
+	}
+
+	err = -ENOMEM;
+	rule = kzalloc(sizeof(*rule) + size, GFP_KERNEL);
+	if (rule == NULL)
+		goto err1;
+
+	rule->handle = handle;
+	rule->dlen   = size;
+
+	nft_ctx_init(&ctx, afi, table, chain);
+	expr = nft_expr_first(rule);
+	for (i = 0; i < n; i++) {
+		err = nf_tables_newexpr(&ctx, &info[i], expr);
+		if (err < 0)
+			goto err2;
+		expr = nft_expr_next(expr);
+	}
+
+	/* Register hook when first rule is inserted into a base chain */
+	if (list_empty(&chain->rules) && chain->flags & NFT_BASE_CHAIN) {
+		err = nf_register_hook(&nft_base_chain(chain)->ops);
+		if (err < 0)
+			goto err2;
+	}
+
+	if (nlh->nlmsg_flags & NLM_F_REPLACE) {
+		list_replace_rcu(&old_rule->list, &rule->list);
+		nf_tables_rule_destroy(old_rule);
+	} else if (nlh->nlmsg_flags & NLM_F_APPEND)
+		list_add_tail_rcu(&rule->list, &chain->rules);
+	else
+		list_add_rcu(&rule->list, &chain->rules);
+
+	nf_tables_rule_notify(skb, nlh, table, chain, rule, NFT_MSG_NEWRULE,
+			      nlh->nlmsg_flags & (NLM_F_APPEND | NLM_F_REPLACE),
+			      nfmsg->nfgen_family);
+	return 0;
+
+err2:
+	nf_tables_rule_destroy(rule);
+err1:
+	for (i = 0; i < n; i++) {
+		if (info[i].ops != NULL)
+			module_put(info[i].ops->owner);
+	}
+	return err;
+}
+
+static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb,
+			     const struct nlmsghdr *nlh,
+			     const struct nlattr * const nla[])
+{
+	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+	const struct nft_af_info *afi;
+	const struct nft_table *table;
+	struct nft_chain *chain;
+	struct nft_rule *rule, *tmp;
+	int family = nfmsg->nfgen_family;
+
+	afi = nf_tables_afinfo_lookup(family, false);
+	if (IS_ERR(afi))
+		return PTR_ERR(afi);
+
+	table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE], false);
+	if (IS_ERR(table))
+		return PTR_ERR(table);
+
+	chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]);
+	if (IS_ERR(chain))
+		return PTR_ERR(chain);
+
+	if (nla[NFTA_RULE_HANDLE]) {
+		rule = nf_tables_rule_lookup(chain, nla[NFTA_RULE_HANDLE]);
+		if (IS_ERR(rule))
+			return PTR_ERR(rule);
+
+		/* List removal must be visible before destroying expressions */
+		list_del_rcu(&rule->list);
+
+		nf_tables_rule_notify(skb, nlh, table, chain, rule,
+				      NFT_MSG_DELRULE, 0, family);
+		nf_tables_rule_destroy(rule);
+	} else {
+		/* Remove all rules in this chain */
+		list_for_each_entry_safe(rule, tmp, &chain->rules, list) {
+			list_del_rcu(&rule->list);
+
+			nf_tables_rule_notify(skb, nlh, table, chain, rule,
+					      NFT_MSG_DELRULE, 0, family);
+			nf_tables_rule_destroy(rule);
+		}
+	}
+
+	/* Unregister hook when last rule from base chain is deleted */
+	if (list_empty(&chain->rules) && chain->flags & NFT_BASE_CHAIN)
+		nf_unregister_hook(&nft_base_chain(chain)->ops);
+
+	return 0;
+}
+
+static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
+	[NFT_MSG_NEWTABLE] = {
+		.call		= nf_tables_newtable,
+		.attr_count	= NFTA_TABLE_MAX,
+		.policy		= nft_table_policy,
+	},
+	[NFT_MSG_GETTABLE] = {
+		.call		= nf_tables_gettable,
+		.attr_count	= NFTA_TABLE_MAX,
+		.policy		= nft_table_policy,
+	},
+	[NFT_MSG_DELTABLE] = {
+		.call		= nf_tables_deltable,
+		.attr_count	= NFTA_TABLE_MAX,
+		.policy		= nft_table_policy,
+	},
+	[NFT_MSG_NEWCHAIN] = {
+		.call		= nf_tables_newchain,
+		.attr_count	= NFTA_CHAIN_MAX,
+		.policy		= nft_chain_policy,
+	},
+	[NFT_MSG_GETCHAIN] = {
+		.call		= nf_tables_getchain,
+		.attr_count	= NFTA_CHAIN_MAX,
+		.policy		= nft_chain_policy,
+	},
+	[NFT_MSG_DELCHAIN] = {
+		.call		= nf_tables_delchain,
+		.attr_count	= NFTA_CHAIN_MAX,
+		.policy		= nft_chain_policy,
+	},
+	[NFT_MSG_NEWRULE] = {
+		.call		= nf_tables_newrule,
+		.attr_count	= NFTA_RULE_MAX,
+		.policy		= nft_rule_policy,
+	},
+	[NFT_MSG_GETRULE] = {
+		.call		= nf_tables_getrule,
+		.attr_count	= NFTA_RULE_MAX,
+		.policy		= nft_rule_policy,
+	},
+	[NFT_MSG_DELRULE] = {
+		.call		= nf_tables_delrule,
+		.attr_count	= NFTA_RULE_MAX,
+		.policy		= nft_rule_policy,
+	},
+};
+
+static const struct nfnetlink_subsystem nf_tables_subsys = {
+	.name		= "nf_tables",
+	.subsys_id	= NFNL_SUBSYS_NFTABLES,
+	.cb_count	= NFT_MSG_MAX,
+	.cb		= nf_tables_cb,
+};
+
+/**
+ *	nft_validate_input_register - validate an expressions' input register
+ *
+ *	@reg: the register number
+ *
+ * 	Validate that the input register is one of the general purpose
+ * 	registers.
+ */
+int nft_validate_input_register(enum nft_registers reg)
+{
+	if (reg <= NFT_REG_VERDICT)
+		return -EINVAL;
+	if (reg > NFT_REG_MAX)
+		return -ERANGE;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nft_validate_input_register);
+
+/**
+ *	nft_validate_output_register - validate an expressions' output register
+ *
+ *	@reg: the register number
+ *
+ * 	Validate that the output register is one of the general purpose
+ * 	registers or the verdict register.
+ */
+int nft_validate_output_register(enum nft_registers reg)
+{
+	if (reg < NFT_REG_VERDICT)
+		return -EINVAL;
+	if (reg > NFT_REG_MAX)
+		return -ERANGE;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nft_validate_output_register);
+
+/**
+ *	nft_validate_data_load - validate an expressions' data load
+ *
+ *	@ctx: context of the expression performing the load
+ * 	@reg: the destination register number
+ * 	@data: the data to load
+ * 	@type: the data type
+ *
+ * 	Validate that a data load uses the appropriate data type for
+ * 	the destination register. A value of NULL for the data means
+ * 	that its runtime gathered data, which is always of type
+ * 	NFT_DATA_VALUE.
+ */
+int nft_validate_data_load(const struct nft_ctx *ctx, enum nft_registers reg,
+			   const struct nft_data *data,
+			   enum nft_data_types type)
+{
+	switch (reg) {
+	case NFT_REG_VERDICT:
+		if (data == NULL || type != NFT_DATA_VERDICT)
+			return -EINVAL;
+		// FIXME: do loop detection
+		return 0;
+	default:
+		if (data != NULL && type != NFT_DATA_VALUE)
+			return -EINVAL;
+		return 0;
+	}
+}
+EXPORT_SYMBOL_GPL(nft_validate_data_load);
+
+static const struct nla_policy nft_verdict_policy[NFTA_VERDICT_MAX + 1] = {
+	[NFTA_VERDICT_CODE]	= { .type = NLA_U32 },
+	[NFTA_VERDICT_CHAIN]	= { .type = NLA_STRING,
+				    .len = NFT_CHAIN_MAXNAMELEN - 1 },
+};
+
+static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
+			    struct nft_data_desc *desc, const struct nlattr *nla)
+{
+	struct nlattr *tb[NFTA_VERDICT_MAX + 1];
+	struct nft_chain *chain;
+	int err;
+
+	err = nla_parse_nested(tb, NFTA_VERDICT_MAX, nla, nft_verdict_policy);
+	if (err < 0)
+		return err;
+
+	if (!tb[NFTA_VERDICT_CODE])
+		return -EINVAL;
+	data->verdict = ntohl(nla_get_be32(tb[NFTA_VERDICT_CODE]));
+
+	switch (data->verdict) {
+	case NF_ACCEPT:
+	case NF_DROP:
+	case NF_QUEUE:
+	case NFT_CONTINUE:
+	case NFT_BREAK:
+	case NFT_RETURN:
+		desc->len = sizeof(data->verdict);
+		break;
+	case NFT_JUMP:
+	case NFT_GOTO:
+		if (!tb[NFTA_VERDICT_CHAIN])
+			return -EINVAL;
+		chain = nf_tables_chain_lookup(ctx->table,
+					       tb[NFTA_VERDICT_CHAIN]);
+		if (IS_ERR(chain))
+			return PTR_ERR(chain);
+		if (chain->flags & NFT_BASE_CHAIN)
+			return -EOPNOTSUPP;
+
+		if (ctx->chain->level + 1 > chain->level) {
+			if (ctx->chain->level + 1 == 16)
+				return -EMLINK;
+			chain->level = ctx->chain->level + 1;
+		}
+		chain->use++;
+		data->chain = chain;
+		desc->len = sizeof(data);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	desc->type = NFT_DATA_VERDICT;
+	return 0;
+}
+
+static void nft_verdict_uninit(const struct nft_data *data)
+{
+	switch (data->verdict) {
+	case NFT_JUMP:
+	case NFT_GOTO:
+		data->chain->use--;
+		break;
+	}
+}
+
+static int nft_verdict_dump(struct sk_buff *skb, const struct nft_data *data)
+{
+	struct nlattr *nest;
+
+	nest = nla_nest_start(skb, NFTA_DATA_VERDICT);
+	if (!nest)
+		goto nla_put_failure;
+
+	if (nla_put_be32(skb, NFTA_VERDICT_CODE, htonl(data->verdict)))
+		goto nla_put_failure;
+
+	switch (data->verdict) {
+	case NFT_JUMP:
+	case NFT_GOTO:
+		if (nla_put_string(skb, NFTA_VERDICT_CHAIN, data->chain->name))
+			goto nla_put_failure;
+	}
+	nla_nest_end(skb, nest);
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static int nft_value_init(const struct nft_ctx *ctx, struct nft_data *data,
+			  struct nft_data_desc *desc, const struct nlattr *nla)
+{
+	unsigned int len;
+
+	len = nla_len(nla);
+	if (len == 0)
+		return -EINVAL;
+	if (len > sizeof(data->data))
+		return -EOVERFLOW;
+
+	nla_memcpy(data->data, nla, sizeof(data->data));
+	desc->type = NFT_DATA_VALUE;
+	desc->len  = len;
+	return 0;
+}
+
+static int nft_value_dump(struct sk_buff *skb, const struct nft_data *data,
+			  unsigned int len)
+{
+	return nla_put(skb, NFTA_DATA_VALUE, len, data->data);
+}
+
+static const struct nla_policy nft_data_policy[NFTA_DATA_MAX + 1] = {
+	[NFTA_DATA_VALUE]	= { .type = NLA_BINARY,
+				    .len  = FIELD_SIZEOF(struct nft_data, data) },
+	[NFTA_DATA_VERDICT]	= { .type = NLA_NESTED },
+};
+
+/**
+ *	nft_data_init - parse nf_tables data netlink attributes
+ *
+ *	@ctx: context of the expression using the data
+ *	@data: destination struct nft_data
+ *	@desc: data description
+ *	@nla: netlink attribute containing data
+ *
+ *	Parse the netlink data attributes and initialize a struct nft_data.
+ *	The type and length of data are returned in the data description.
+ *
+ *	The caller can indicate that it only wants to accept data of type
+ *	NFT_DATA_VALUE by passing NULL for the ctx argument.
+ */
+int nft_data_init(const struct nft_ctx *ctx, struct nft_data *data,
+		  struct nft_data_desc *desc, const struct nlattr *nla)
+{
+	struct nlattr *tb[NFTA_DATA_MAX + 1];
+	int err;
+
+	err = nla_parse_nested(tb, NFTA_DATA_MAX, nla, nft_data_policy);
+	if (err < 0)
+		return err;
+
+	if (tb[NFTA_DATA_VALUE])
+		return nft_value_init(ctx, data, desc, tb[NFTA_DATA_VALUE]);
+	if (tb[NFTA_DATA_VERDICT] && ctx != NULL)
+		return nft_verdict_init(ctx, data, desc, tb[NFTA_DATA_VERDICT]);
+	return -EINVAL;
+}
+EXPORT_SYMBOL_GPL(nft_data_init);
+
+/**
+ *	nft_data_uninit - release a nft_data item
+ *
+ *	@data: struct nft_data to release
+ *	@type: type of data
+ *
+ *	Release a nft_data item. NFT_DATA_VALUE types can be silently discarded,
+ *	all others need to be released by calling this function.
+ */
+void nft_data_uninit(const struct nft_data *data, enum nft_data_types type)
+{
+	switch (type) {
+	case NFT_DATA_VALUE:
+		return;
+	case NFT_DATA_VERDICT:
+		return nft_verdict_uninit(data);
+	default:
+		WARN_ON(1);
+	}
+}
+EXPORT_SYMBOL_GPL(nft_data_uninit);
+
+int nft_data_dump(struct sk_buff *skb, int attr, const struct nft_data *data,
+		  enum nft_data_types type, unsigned int len)
+{
+	struct nlattr *nest;
+	int err;
+
+	nest = nla_nest_start(skb, attr);
+	if (nest == NULL)
+		return -1;
+
+	switch (type) {
+	case NFT_DATA_VALUE:
+		err = nft_value_dump(skb, data, len);
+		break;
+	case NFT_DATA_VERDICT:
+		err = nft_verdict_dump(skb, data);
+		break;
+	default:
+		err = -EINVAL;
+		WARN_ON(1);
+	}
+
+	nla_nest_end(skb, nest);
+	return err;
+}
+EXPORT_SYMBOL_GPL(nft_data_dump);
+
+static int __init nf_tables_module_init(void)
+{
+	int err;
+
+	info = kmalloc(sizeof(struct nft_expr_info) * NFT_RULE_MAXEXPRS,
+		       GFP_KERNEL);
+	if (info == NULL) {
+		err = -ENOMEM;
+		goto err1;
+	}
+
+	err = nf_tables_core_module_init();
+	if (err < 0)
+		goto err2;
+
+	err = nfnetlink_subsys_register(&nf_tables_subsys);
+	if (err < 0)
+		goto err3;
+
+	pr_info("nf_tables: (c) 2007-2009 Patrick McHardy <kaber@trash.net>\n");
+	return 0;
+err3:
+	nf_tables_core_module_exit();
+err2:
+	kfree(info);
+err1:
+	return err;
+}
+
+static void __exit nf_tables_module_exit(void)
+{
+	nfnetlink_subsys_unregister(&nf_tables_subsys);
+	nf_tables_core_module_exit();
+	kfree(info);
+}
+
+module_init(nf_tables_module_init);
+module_exit(nf_tables_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_NFTABLES);
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
new file mode 100644
index 000000000000..bc7fb85d4002
--- /dev/null
+++ b/net/netfilter/nf_tables_core.c
@@ -0,0 +1,152 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/rculist.h>
+#include <linux/skbuff.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+
+#define NFT_JUMP_STACK_SIZE	16
+
+unsigned int nft_do_chain(const struct nf_hook_ops *ops,
+			  struct sk_buff *skb,
+			  const struct net_device *in,
+			  const struct net_device *out,
+			  int (*okfn)(struct sk_buff *))
+{
+	const struct nft_chain *chain = ops->priv;
+	const struct nft_rule *rule;
+	const struct nft_expr *expr, *last;
+	struct nft_data data[NFT_REG_MAX + 1];
+	const struct nft_pktinfo pkt = {
+		.skb		= skb,
+		.in		= in,
+		.out		= out,
+		.hooknum	= ops->hooknum,
+	};
+	unsigned int stackptr = 0;
+	struct {
+		const struct nft_chain	*chain;
+		const struct nft_rule	*rule;
+	} jumpstack[NFT_JUMP_STACK_SIZE];
+
+do_chain:
+	rule = list_entry(&chain->rules, struct nft_rule, list);
+next_rule:
+	data[NFT_REG_VERDICT].verdict = NFT_CONTINUE;
+	list_for_each_entry_continue_rcu(rule, &chain->rules, list) {
+		nft_rule_for_each_expr(expr, last, rule) {
+			expr->ops->eval(expr, data, &pkt);
+			if (data[NFT_REG_VERDICT].verdict != NFT_CONTINUE)
+				break;
+		}
+
+		switch (data[NFT_REG_VERDICT].verdict) {
+		case NFT_BREAK:
+			data[NFT_REG_VERDICT].verdict = NFT_CONTINUE;
+			/* fall through */
+		case NFT_CONTINUE:
+			continue;
+		}
+		break;
+	}
+
+	switch (data[NFT_REG_VERDICT].verdict) {
+	case NF_ACCEPT:
+	case NF_DROP:
+	case NF_QUEUE:
+		return data[NFT_REG_VERDICT].verdict;
+	case NFT_JUMP:
+		BUG_ON(stackptr >= NFT_JUMP_STACK_SIZE);
+		jumpstack[stackptr].chain = chain;
+		jumpstack[stackptr].rule  = rule;
+		stackptr++;
+		/* fall through */
+	case NFT_GOTO:
+		chain = data[NFT_REG_VERDICT].chain;
+		goto do_chain;
+	case NFT_RETURN:
+	case NFT_CONTINUE:
+		break;
+	default:
+		WARN_ON(1);
+	}
+
+	if (stackptr > 0) {
+		stackptr--;
+		chain = jumpstack[stackptr].chain;
+		rule  = jumpstack[stackptr].rule;
+		goto next_rule;
+	}
+
+	return NF_ACCEPT;
+}
+EXPORT_SYMBOL_GPL(nft_do_chain);
+
+int __init nf_tables_core_module_init(void)
+{
+	int err;
+
+	err = nft_immediate_module_init();
+	if (err < 0)
+		goto err1;
+
+	err = nft_cmp_module_init();
+	if (err < 0)
+		goto err2;
+
+	err = nft_lookup_module_init();
+	if (err < 0)
+		goto err3;
+
+	err = nft_bitwise_module_init();
+	if (err < 0)
+		goto err4;
+
+	err = nft_byteorder_module_init();
+	if (err < 0)
+		goto err5;
+
+	err = nft_payload_module_init();
+	if (err < 0)
+		goto err6;
+
+	return 0;
+
+err6:
+	nft_byteorder_module_exit();
+err5:
+	nft_bitwise_module_exit();
+err4:
+	nft_lookup_module_exit();
+err3:
+	nft_cmp_module_exit();
+err2:
+	nft_immediate_module_exit();
+err1:
+	return err;
+}
+
+void nf_tables_core_module_exit(void)
+{
+	nft_payload_module_exit();
+	nft_byteorder_module_exit();
+	nft_bitwise_module_exit();
+	nft_lookup_module_exit();
+	nft_cmp_module_exit();
+	nft_immediate_module_exit();
+}
diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c
new file mode 100644
index 000000000000..0f7501506367
--- /dev/null
+++ b/net/netfilter/nft_bitwise.c
@@ -0,0 +1,140 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+
+struct nft_bitwise {
+	enum nft_registers	sreg:8;
+	enum nft_registers	dreg:8;
+	u8			len;
+	struct nft_data		mask;
+	struct nft_data		xor;
+};
+
+static void nft_bitwise_eval(const struct nft_expr *expr,
+			     struct nft_data data[NFT_REG_MAX + 1],
+			     const struct nft_pktinfo *pkt)
+{
+	const struct nft_bitwise *priv = nft_expr_priv(expr);
+	const struct nft_data *src = &data[priv->sreg];
+	struct nft_data *dst = &data[priv->dreg];
+	unsigned int i;
+
+	for (i = 0; i < DIV_ROUND_UP(priv->len, 4); i++) {
+		dst->data[i] = (src->data[i] & priv->mask.data[i]) ^
+			       priv->xor.data[i];
+	}
+}
+
+static const struct nla_policy nft_bitwise_policy[NFTA_BITWISE_MAX + 1] = {
+	[NFTA_BITWISE_SREG]	= { .type = NLA_U32 },
+	[NFTA_BITWISE_DREG]	= { .type = NLA_U32 },
+	[NFTA_BITWISE_LEN]	= { .type = NLA_U32 },
+	[NFTA_BITWISE_MASK]	= { .type = NLA_NESTED },
+	[NFTA_BITWISE_XOR]	= { .type = NLA_NESTED },
+};
+
+static int nft_bitwise_init(const struct nft_ctx *ctx,
+			    const struct nft_expr *expr,
+			    const struct nlattr * const tb[])
+{
+	struct nft_bitwise *priv = nft_expr_priv(expr);
+	struct nft_data_desc d1, d2;
+	int err;
+
+	if (tb[NFTA_BITWISE_SREG] == NULL ||
+	    tb[NFTA_BITWISE_DREG] == NULL ||
+	    tb[NFTA_BITWISE_LEN] == NULL ||
+	    tb[NFTA_BITWISE_MASK] == NULL ||
+	    tb[NFTA_BITWISE_XOR] == NULL)
+		return -EINVAL;
+
+	priv->sreg = ntohl(nla_get_be32(tb[NFTA_BITWISE_SREG]));
+	err = nft_validate_input_register(priv->sreg);
+	if (err < 0)
+		return err;
+
+	priv->dreg = ntohl(nla_get_be32(tb[NFTA_BITWISE_DREG]));
+	err = nft_validate_output_register(priv->dreg);
+	if (err < 0)
+		return err;
+	err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE);
+	if (err < 0)
+		return err;
+
+	priv->len = ntohl(nla_get_be32(tb[NFTA_BITWISE_LEN]));
+
+	err = nft_data_init(NULL, &priv->mask, &d1, tb[NFTA_BITWISE_MASK]);
+	if (err < 0)
+		return err;
+	if (d1.len != priv->len)
+		return -EINVAL;
+
+	err = nft_data_init(NULL, &priv->xor, &d2, tb[NFTA_BITWISE_XOR]);
+	if (err < 0)
+		return err;
+	if (d2.len != priv->len)
+		return -EINVAL;
+
+	return 0;
+}
+
+static int nft_bitwise_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_bitwise *priv = nft_expr_priv(expr);
+
+	if (nla_put_be32(skb, NFTA_BITWISE_SREG, htonl(priv->sreg)))
+		goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_BITWISE_DREG, htonl(priv->dreg)))
+		goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_BITWISE_LEN, htonl(priv->len)))
+		goto nla_put_failure;
+
+	if (nft_data_dump(skb, NFTA_BITWISE_MASK, &priv->mask,
+			  NFT_DATA_VALUE, priv->len) < 0)
+		goto nla_put_failure;
+
+	if (nft_data_dump(skb, NFTA_BITWISE_XOR, &priv->xor,
+			  NFT_DATA_VALUE, priv->len) < 0)
+		goto nla_put_failure;
+
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static struct nft_expr_ops nft_bitwise_ops __read_mostly = {
+	.name		= "bitwise",
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_bitwise)),
+	.owner		= THIS_MODULE,
+	.eval		= nft_bitwise_eval,
+	.init		= nft_bitwise_init,
+	.dump		= nft_bitwise_dump,
+	.policy		= nft_bitwise_policy,
+	.maxattr	= NFTA_BITWISE_MAX,
+};
+
+int __init nft_bitwise_module_init(void)
+{
+	return nft_register_expr(&nft_bitwise_ops);
+}
+
+void nft_bitwise_module_exit(void)
+{
+	nft_unregister_expr(&nft_bitwise_ops);
+}
diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c
new file mode 100644
index 000000000000..8b0657a4d17b
--- /dev/null
+++ b/net/netfilter/nft_byteorder.c
@@ -0,0 +1,167 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+
+struct nft_byteorder {
+	enum nft_registers	sreg:8;
+	enum nft_registers	dreg:8;
+	enum nft_byteorder_ops	op:8;
+	u8			len;
+	u8			size;
+};
+
+static void nft_byteorder_eval(const struct nft_expr *expr,
+			       struct nft_data data[NFT_REG_MAX + 1],
+			       const struct nft_pktinfo *pkt)
+{
+	const struct nft_byteorder *priv = nft_expr_priv(expr);
+	struct nft_data *src = &data[priv->sreg], *dst = &data[priv->dreg];
+	union { u32 u32; u16 u16; } *s, *d;
+	unsigned int i;
+
+	s = (void *)src->data;
+	d = (void *)dst->data;
+
+	switch (priv->size) {
+	case 4:
+		switch (priv->op) {
+		case NFT_BYTEORDER_NTOH:
+			for (i = 0; i < priv->len / 4; i++)
+				d[i].u32 = ntohl((__force __be32)s[i].u32);
+			break;
+		case NFT_BYTEORDER_HTON:
+			for (i = 0; i < priv->len / 4; i++)
+				d[i].u32 = (__force __u32)htonl(s[i].u32);
+			break;
+		}
+		break;
+	case 2:
+		switch (priv->op) {
+		case NFT_BYTEORDER_NTOH:
+			for (i = 0; i < priv->len / 2; i++)
+				d[i].u16 = ntohs((__force __be16)s[i].u16);
+			break;
+		case NFT_BYTEORDER_HTON:
+			for (i = 0; i < priv->len / 2; i++)
+				d[i].u16 = (__force __u16)htons(s[i].u16);
+			break;
+		}
+		break;
+	}
+}
+
+static const struct nla_policy nft_byteorder_policy[NFTA_BYTEORDER_MAX + 1] = {
+	[NFTA_BYTEORDER_SREG]	= { .type = NLA_U32 },
+	[NFTA_BYTEORDER_DREG]	= { .type = NLA_U32 },
+	[NFTA_BYTEORDER_OP]	= { .type = NLA_U32 },
+	[NFTA_BYTEORDER_LEN]	= { .type = NLA_U32 },
+	[NFTA_BYTEORDER_SIZE]	= { .type = NLA_U32 },
+};
+
+static int nft_byteorder_init(const struct nft_ctx *ctx,
+			      const struct nft_expr *expr,
+			      const struct nlattr * const tb[])
+{
+	struct nft_byteorder *priv = nft_expr_priv(expr);
+	int err;
+
+	if (tb[NFTA_BYTEORDER_SREG] == NULL ||
+	    tb[NFTA_BYTEORDER_DREG] == NULL ||
+	    tb[NFTA_BYTEORDER_LEN] == NULL ||
+	    tb[NFTA_BYTEORDER_SIZE] == NULL ||
+	    tb[NFTA_BYTEORDER_OP] == NULL)
+		return -EINVAL;
+
+	priv->sreg = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_SREG]));
+	err = nft_validate_input_register(priv->sreg);
+	if (err < 0)
+		return err;
+
+	priv->dreg = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_DREG]));
+	err = nft_validate_output_register(priv->dreg);
+	if (err < 0)
+		return err;
+	err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE);
+	if (err < 0)
+		return err;
+
+	priv->op = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_OP]));
+	switch (priv->op) {
+	case NFT_BYTEORDER_NTOH:
+	case NFT_BYTEORDER_HTON:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	priv->len = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_LEN]));
+	if (priv->len == 0 || priv->len > FIELD_SIZEOF(struct nft_data, data))
+		return -EINVAL;
+
+	priv->size = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_SIZE]));
+	switch (priv->size) {
+	case 2:
+	case 4:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int nft_byteorder_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_byteorder *priv = nft_expr_priv(expr);
+
+	if (nla_put_be32(skb, NFTA_BYTEORDER_SREG, htonl(priv->sreg)))
+		goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_BYTEORDER_DREG, htonl(priv->dreg)))
+		goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_BYTEORDER_OP, htonl(priv->op)))
+		goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_BYTEORDER_LEN, htonl(priv->len)))
+		goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_BYTEORDER_SIZE, htonl(priv->size)))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static struct nft_expr_ops nft_byteorder_ops __read_mostly = {
+	.name		= "byteorder",
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_byteorder)),
+	.owner		= THIS_MODULE,
+	.eval		= nft_byteorder_eval,
+	.init		= nft_byteorder_init,
+	.dump		= nft_byteorder_dump,
+	.policy		= nft_byteorder_policy,
+	.maxattr	= NFTA_BYTEORDER_MAX,
+};
+
+int __init nft_byteorder_module_init(void)
+{
+	return nft_register_expr(&nft_byteorder_ops);
+}
+
+void nft_byteorder_module_exit(void)
+{
+	nft_unregister_expr(&nft_byteorder_ops);
+}
diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c
new file mode 100644
index 000000000000..e734d670120a
--- /dev/null
+++ b/net/netfilter/nft_cmp.c
@@ -0,0 +1,146 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+
+struct nft_cmp_expr {
+	struct nft_data		data;
+	enum nft_registers	sreg:8;
+	u8			len;
+	enum nft_cmp_ops	op:8;
+};
+
+static void nft_cmp_eval(const struct nft_expr *expr,
+			 struct nft_data data[NFT_REG_MAX + 1],
+			 const struct nft_pktinfo *pkt)
+{
+	const struct nft_cmp_expr *priv = nft_expr_priv(expr);
+	int d;
+
+	d = nft_data_cmp(&data[priv->sreg], &priv->data, priv->len);
+	switch (priv->op) {
+	case NFT_CMP_EQ:
+		if (d != 0)
+			goto mismatch;
+		break;
+	case NFT_CMP_NEQ:
+		if (d == 0)
+			goto mismatch;
+		break;
+	case NFT_CMP_LT:
+		if (d == 0)
+			goto mismatch;
+	case NFT_CMP_LTE:
+		if (d > 0)
+			goto mismatch;
+		break;
+	case NFT_CMP_GT:
+		if (d == 0)
+			goto mismatch;
+	case NFT_CMP_GTE:
+		if (d < 0)
+			goto mismatch;
+		break;
+	}
+	return;
+
+mismatch:
+	data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+}
+
+static const struct nla_policy nft_cmp_policy[NFTA_CMP_MAX + 1] = {
+	[NFTA_CMP_SREG]		= { .type = NLA_U32 },
+	[NFTA_CMP_OP]		= { .type = NLA_U32 },
+	[NFTA_CMP_DATA]		= { .type = NLA_NESTED },
+};
+
+static int nft_cmp_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
+			const struct nlattr * const tb[])
+{
+	struct nft_cmp_expr *priv = nft_expr_priv(expr);
+	struct nft_data_desc desc;
+	int err;
+
+	if (tb[NFTA_CMP_SREG] == NULL ||
+	    tb[NFTA_CMP_OP] == NULL ||
+	    tb[NFTA_CMP_DATA] == NULL)
+		return -EINVAL;
+
+	priv->sreg = ntohl(nla_get_be32(tb[NFTA_CMP_SREG]));
+	err = nft_validate_input_register(priv->sreg);
+	if (err < 0)
+		return err;
+
+	priv->op = ntohl(nla_get_be32(tb[NFTA_CMP_OP]));
+	switch (priv->op) {
+	case NFT_CMP_EQ:
+	case NFT_CMP_NEQ:
+	case NFT_CMP_LT:
+	case NFT_CMP_LTE:
+	case NFT_CMP_GT:
+	case NFT_CMP_GTE:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	err = nft_data_init(NULL, &priv->data, &desc, tb[NFTA_CMP_DATA]);
+	if (err < 0)
+		return err;
+
+	priv->len = desc.len;
+	return 0;
+}
+
+static int nft_cmp_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_cmp_expr *priv = nft_expr_priv(expr);
+
+	if (nla_put_be32(skb, NFTA_CMP_SREG, htonl(priv->sreg)))
+		goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_CMP_OP, htonl(priv->op)))
+		goto nla_put_failure;
+
+	if (nft_data_dump(skb, NFTA_CMP_DATA, &priv->data,
+			  NFT_DATA_VALUE, priv->len) < 0)
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static struct nft_expr_ops nft_cmp_ops __read_mostly = {
+	.name		= "cmp",
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_cmp_expr)),
+	.owner		= THIS_MODULE,
+	.eval		= nft_cmp_eval,
+	.init		= nft_cmp_init,
+	.dump		= nft_cmp_dump,
+	.policy		= nft_cmp_policy,
+	.maxattr	= NFTA_CMP_MAX,
+};
+
+int __init nft_cmp_module_init(void)
+{
+	return nft_register_expr(&nft_cmp_ops);
+}
+
+void nft_cmp_module_exit(void)
+{
+	nft_unregister_expr(&nft_cmp_ops);
+}
diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c
new file mode 100644
index 000000000000..33c5d36819bb
--- /dev/null
+++ b/net/netfilter/nft_counter.c
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/seqlock.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+
+struct nft_counter {
+	seqlock_t	lock;
+	u64		bytes;
+	u64		packets;
+};
+
+static void nft_counter_eval(const struct nft_expr *expr,
+			     struct nft_data data[NFT_REG_MAX + 1],
+			     const struct nft_pktinfo *pkt)
+{
+	struct nft_counter *priv = nft_expr_priv(expr);
+
+	write_seqlock_bh(&priv->lock);
+	priv->bytes += pkt->skb->len;
+	priv->packets++;
+	write_sequnlock_bh(&priv->lock);
+}
+
+static int nft_counter_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	struct nft_counter *priv = nft_expr_priv(expr);
+	unsigned int seq;
+	u64 bytes;
+	u64 packets;
+
+	do {
+		seq = read_seqbegin(&priv->lock);
+		bytes	= priv->bytes;
+		packets	= priv->packets;
+	} while (read_seqretry(&priv->lock, seq));
+
+	if (nla_put_be64(skb, NFTA_COUNTER_BYTES, cpu_to_be64(bytes)))
+		goto nla_put_failure;
+	if (nla_put_be64(skb, NFTA_COUNTER_PACKETS, cpu_to_be64(packets)))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static const struct nla_policy nft_counter_policy[NFTA_COUNTER_MAX + 1] = {
+	[NFTA_COUNTER_PACKETS]	= { .type = NLA_U64 },
+	[NFTA_COUNTER_BYTES]	= { .type = NLA_U64 },
+};
+
+static int nft_counter_init(const struct nft_ctx *ctx,
+			    const struct nft_expr *expr,
+			    const struct nlattr * const tb[])
+{
+	struct nft_counter *priv = nft_expr_priv(expr);
+
+	if (tb[NFTA_COUNTER_PACKETS])
+	        priv->packets = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_PACKETS]));
+	if (tb[NFTA_COUNTER_BYTES])
+		priv->bytes = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_BYTES]));
+
+	seqlock_init(&priv->lock);
+	return 0;
+}
+
+static struct nft_expr_ops nft_counter_ops __read_mostly = {
+	.name		= "counter",
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_counter)),
+	.policy		= nft_counter_policy,
+	.maxattr	= NFTA_COUNTER_MAX,
+	.owner		= THIS_MODULE,
+	.eval		= nft_counter_eval,
+	.init		= nft_counter_init,
+	.dump		= nft_counter_dump,
+};
+
+static int __init nft_counter_module_init(void)
+{
+	return nft_register_expr(&nft_counter_ops);
+}
+
+static void __exit nft_counter_module_exit(void)
+{
+	nft_unregister_expr(&nft_counter_ops);
+}
+
+module_init(nft_counter_module_init);
+module_exit(nft_counter_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_EXPR("counter");
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
new file mode 100644
index 000000000000..a1756d678226
--- /dev/null
+++ b/net/netfilter/nft_ct.c
@@ -0,0 +1,252 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_tuple.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+
+struct nft_ct {
+	enum nft_ct_keys	key:8;
+	enum ip_conntrack_dir	dir:8;
+	enum nft_registers	dreg:8;
+	uint8_t			family;
+};
+
+static void nft_ct_eval(const struct nft_expr *expr,
+			struct nft_data data[NFT_REG_MAX + 1],
+			const struct nft_pktinfo *pkt)
+{
+	const struct nft_ct *priv = nft_expr_priv(expr);
+	struct nft_data *dest = &data[priv->dreg];
+	enum ip_conntrack_info ctinfo;
+	const struct nf_conn *ct;
+	const struct nf_conn_help *help;
+	const struct nf_conntrack_tuple *tuple;
+	const struct nf_conntrack_helper *helper;
+	long diff;
+	unsigned int state;
+
+	ct = nf_ct_get(pkt->skb, &ctinfo);
+
+	switch (priv->key) {
+	case NFT_CT_STATE:
+		if (ct == NULL)
+			state = NF_CT_STATE_INVALID_BIT;
+		else if (nf_ct_is_untracked(ct))
+			state = NF_CT_STATE_UNTRACKED_BIT;
+		else
+			state = NF_CT_STATE_BIT(ctinfo);
+		dest->data[0] = state;
+		return;
+	}
+
+	if (ct == NULL)
+		goto err;
+
+	switch (priv->key) {
+	case NFT_CT_DIRECTION:
+		dest->data[0] = CTINFO2DIR(ctinfo);
+		return;
+	case NFT_CT_STATUS:
+		dest->data[0] = ct->status;
+		return;
+#ifdef CONFIG_NF_CONNTRACK_MARK
+	case NFT_CT_MARK:
+		dest->data[0] = ct->mark;
+		return;
+#endif
+#ifdef CONFIG_NF_CONNTRACK_SECMARK
+	case NFT_CT_SECMARK:
+		dest->data[0] = ct->secmark;
+		return;
+#endif
+	case NFT_CT_EXPIRATION:
+		diff = (long)jiffies - (long)ct->timeout.expires;
+		if (diff < 0)
+			diff = 0;
+		dest->data[0] = jiffies_to_msecs(diff);
+		return;
+	case NFT_CT_HELPER:
+		if (ct->master == NULL)
+			goto err;
+		help = nfct_help(ct->master);
+		if (help == NULL)
+			goto err;
+		helper = rcu_dereference(help->helper);
+		if (helper == NULL)
+			goto err;
+		if (strlen(helper->name) >= sizeof(dest->data))
+			goto err;
+		strncpy((char *)dest->data, helper->name, sizeof(dest->data));
+		return;
+	}
+
+	tuple = &ct->tuplehash[priv->dir].tuple;
+	switch (priv->key) {
+	case NFT_CT_L3PROTOCOL:
+		dest->data[0] = nf_ct_l3num(ct);
+		return;
+	case NFT_CT_SRC:
+		memcpy(dest->data, tuple->src.u3.all,
+		       nf_ct_l3num(ct) == NFPROTO_IPV4 ? 4 : 16);
+		return;
+	case NFT_CT_DST:
+		memcpy(dest->data, tuple->dst.u3.all,
+		       nf_ct_l3num(ct) == NFPROTO_IPV4 ? 4 : 16);
+		return;
+	case NFT_CT_PROTOCOL:
+		dest->data[0] = nf_ct_protonum(ct);
+		return;
+	case NFT_CT_PROTO_SRC:
+		dest->data[0] = (__force __u16)tuple->src.u.all;
+		return;
+	case NFT_CT_PROTO_DST:
+		dest->data[0] = (__force __u16)tuple->dst.u.all;
+		return;
+	}
+	return;
+err:
+	data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+}
+
+static const struct nla_policy nft_ct_policy[NFTA_CT_MAX + 1] = {
+	[NFTA_CT_DREG]		= { .type = NLA_U32 },
+	[NFTA_CT_KEY]		= { .type = NLA_U32 },
+	[NFTA_CT_DIRECTION]	= { .type = NLA_U8 },
+};
+
+static int nft_ct_init(const struct nft_ctx *ctx,
+		       const struct nft_expr *expr,
+		       const struct nlattr * const tb[])
+{
+	struct nft_ct *priv = nft_expr_priv(expr);
+	int err;
+
+	if (tb[NFTA_CT_DREG] == NULL ||
+	    tb[NFTA_CT_KEY] == NULL)
+		return -EINVAL;
+
+	priv->key = ntohl(nla_get_be32(tb[NFTA_CT_KEY]));
+	if (tb[NFTA_CT_DIRECTION] != NULL) {
+		priv->dir = nla_get_u8(tb[NFTA_CT_DIRECTION]);
+		switch (priv->dir) {
+		case IP_CT_DIR_ORIGINAL:
+		case IP_CT_DIR_REPLY:
+			break;
+		default:
+			return -EINVAL;
+		}
+	}
+
+	switch (priv->key) {
+	case NFT_CT_STATE:
+	case NFT_CT_DIRECTION:
+	case NFT_CT_STATUS:
+#ifdef CONFIG_NF_CONNTRACK_MARK
+	case NFT_CT_MARK:
+#endif
+#ifdef CONFIG_NF_CONNTRACK_SECMARK
+	case NFT_CT_SECMARK:
+#endif
+	case NFT_CT_EXPIRATION:
+	case NFT_CT_HELPER:
+		if (tb[NFTA_CT_DIRECTION] != NULL)
+			return -EINVAL;
+		break;
+	case NFT_CT_PROTOCOL:
+	case NFT_CT_SRC:
+	case NFT_CT_DST:
+	case NFT_CT_PROTO_SRC:
+	case NFT_CT_PROTO_DST:
+		if (tb[NFTA_CT_DIRECTION] == NULL)
+			return -EINVAL;
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	err = nf_ct_l3proto_try_module_get(ctx->afi->family);
+	if (err < 0)
+		return err;
+	priv->family = ctx->afi->family;
+
+	priv->dreg = ntohl(nla_get_be32(tb[NFTA_CT_DREG]));
+	err = nft_validate_output_register(priv->dreg);
+	if (err < 0)
+		goto err1;
+
+	err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE);
+	if (err < 0)
+		goto err1;
+	return 0;
+
+err1:
+	nf_ct_l3proto_module_put(ctx->afi->family);
+	return err;
+}
+
+static void nft_ct_destroy(const struct nft_expr *expr)
+{
+	struct nft_ct *priv = nft_expr_priv(expr);
+
+	nf_ct_l3proto_module_put(priv->family);
+}
+
+static int nft_ct_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_ct *priv = nft_expr_priv(expr);
+
+	if (nla_put_be32(skb, NFTA_CT_DREG, htonl(priv->dreg)))
+		goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_CT_KEY, htonl(priv->key)))
+		goto nla_put_failure;
+	if (nla_put_u8(skb, NFTA_CT_DIRECTION, priv->dir))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static struct nft_expr_ops nft_ct_ops __read_mostly = {
+	.name		= "ct",
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_ct)),
+	.owner		= THIS_MODULE,
+	.eval		= nft_ct_eval,
+	.init		= nft_ct_init,
+	.destroy	= nft_ct_destroy,
+	.dump		= nft_ct_dump,
+	.policy		= nft_ct_policy,
+	.maxattr	= NFTA_CT_MAX,
+};
+
+static int __init nft_ct_module_init(void)
+{
+	return nft_register_expr(&nft_ct_ops);
+}
+
+static void __exit nft_ct_module_exit(void)
+{
+	nft_unregister_expr(&nft_ct_ops);
+}
+
+module_init(nft_ct_module_init);
+module_exit(nft_ct_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_EXPR("ct");
diff --git a/net/netfilter/nft_expr_template.c b/net/netfilter/nft_expr_template.c
new file mode 100644
index 000000000000..9fc8eb308193
--- /dev/null
+++ b/net/netfilter/nft_expr_template.c
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+
+struct nft_template {
+
+};
+
+static void nft_template_eval(const struct nft_expr *expr,
+			      struct nft_data data[NFT_REG_MAX + 1],
+			      const struct nft_pktinfo *pkt)
+{
+	struct nft_template *priv = nft_expr_priv(expr);
+
+}
+
+static const struct nla_policy nft_template_policy[NFTA_TEMPLATE_MAX + 1] = {
+	[NFTA_TEMPLATE_ATTR]		= { .type = NLA_U32 },
+};
+
+static int nft_template_init(const struct nft_ctx *ctx,
+			   const struct nft_expr *expr,
+			   const struct nlattr *tb[])
+{
+	struct nft_template *priv = nft_expr_priv(expr);
+
+	return 0;
+}
+
+static void nft_template_destroy(const struct nft_ctx *ctx,
+			       const struct nft_expr *expr)
+{
+	struct nft_template *priv = nft_expr_priv(expr);
+
+}
+
+static int nft_template_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_template *priv = nft_expr_priv(expr);
+
+	NLA_PUT_BE32(skb, NFTA_TEMPLATE_ATTR, priv->field);
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static struct nft_expr_ops template_ops __read_mostly = {
+	.name		= "template",
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_template)),
+	.owner		= THIS_MODULE,
+	.eval		= nft_template_eval,
+	.init		= nft_template_init,
+	.destroy	= nft_template_destroy,
+	.dump		= nft_template_dump,
+	.policy		= nft_template_policy,
+	.maxattr	= NFTA_TEMPLATE_MAX,
+};
+
+static int __init nft_template_module_init(void)
+{
+	return nft_register_expr(&template_ops);
+}
+
+static void __exit nft_template_module_exit(void)
+{
+	nft_unregister_expr(&template_ops);
+}
+
+module_init(nft_template_module_init);
+module_exit(nft_template_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_EXPR("template");
diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c
new file mode 100644
index 000000000000..21c6a6b7b662
--- /dev/null
+++ b/net/netfilter/nft_exthdr.c
@@ -0,0 +1,127 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+// FIXME:
+#include <net/ipv6.h>
+
+struct nft_exthdr {
+	u8			type;
+	u8			offset;
+	u8			len;
+	enum nft_registers	dreg:8;
+};
+
+static void nft_exthdr_eval(const struct nft_expr *expr,
+			    struct nft_data data[NFT_REG_MAX + 1],
+			    const struct nft_pktinfo *pkt)
+{
+	struct nft_exthdr *priv = nft_expr_priv(expr);
+	struct nft_data *dest = &data[priv->dreg];
+	unsigned int offset;
+	int err;
+
+	err = ipv6_find_hdr(pkt->skb, &offset, priv->type, NULL, NULL);
+	if (err < 0)
+		goto err;
+	offset += priv->offset;
+
+	if (skb_copy_bits(pkt->skb, offset, dest->data, priv->len) < 0)
+		goto err;
+	return;
+err:
+	data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+}
+
+static const struct nla_policy nft_exthdr_policy[NFTA_EXTHDR_MAX + 1] = {
+	[NFTA_EXTHDR_DREG]		= { .type = NLA_U32 },
+	[NFTA_EXTHDR_TYPE]		= { .type = NLA_U8 },
+	[NFTA_EXTHDR_OFFSET]		= { .type = NLA_U32 },
+	[NFTA_EXTHDR_LEN]		= { .type = NLA_U32 },
+};
+
+static int nft_exthdr_init(const struct nft_ctx *ctx,
+			   const struct nft_expr *expr,
+			   const struct nlattr * const tb[])
+{
+	struct nft_exthdr *priv = nft_expr_priv(expr);
+	int err;
+
+	if (tb[NFTA_EXTHDR_DREG] == NULL ||
+	    tb[NFTA_EXTHDR_TYPE] == NULL ||
+	    tb[NFTA_EXTHDR_OFFSET] == NULL ||
+	    tb[NFTA_EXTHDR_LEN] == NULL)
+		return -EINVAL;
+
+	priv->type   = nla_get_u8(tb[NFTA_EXTHDR_TYPE]);
+	priv->offset = ntohl(nla_get_be32(tb[NFTA_EXTHDR_OFFSET]));
+	priv->len    = ntohl(nla_get_be32(tb[NFTA_EXTHDR_LEN]));
+	if (priv->len == 0 ||
+	    priv->len > FIELD_SIZEOF(struct nft_data, data))
+		return -EINVAL;
+
+	priv->dreg = ntohl(nla_get_be32(tb[NFTA_EXTHDR_DREG]));
+	err = nft_validate_output_register(priv->dreg);
+	if (err < 0)
+		return err;
+	return nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE);
+}
+
+static int nft_exthdr_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_exthdr *priv = nft_expr_priv(expr);
+
+	if (nla_put_be32(skb, NFTA_EXTHDR_DREG, htonl(priv->dreg)))
+		goto nla_put_failure;
+	if (nla_put_u8(skb, NFTA_EXTHDR_TYPE, priv->type))
+		goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_EXTHDR_OFFSET, htonl(priv->offset)))
+		goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_EXTHDR_LEN, htonl(priv->len)))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static struct nft_expr_ops exthdr_ops __read_mostly = {
+	.name		= "exthdr",
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_exthdr)),
+	.owner		= THIS_MODULE,
+	.eval		= nft_exthdr_eval,
+	.init		= nft_exthdr_init,
+	.dump		= nft_exthdr_dump,
+	.policy		= nft_exthdr_policy,
+	.maxattr	= NFTA_EXTHDR_MAX,
+};
+
+static int __init nft_exthdr_module_init(void)
+{
+	return nft_register_expr(&exthdr_ops);
+}
+
+static void __exit nft_exthdr_module_exit(void)
+{
+	nft_unregister_expr(&exthdr_ops);
+}
+
+module_init(nft_exthdr_module_init);
+module_exit(nft_exthdr_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_EXPR("exthdr");
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
new file mode 100644
index 000000000000..67cc502881f1
--- /dev/null
+++ b/net/netfilter/nft_hash.c
@@ -0,0 +1,348 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/list.h>
+#include <linux/jhash.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+
+struct nft_hash {
+	struct hlist_head	*hash;
+	unsigned int		hsize;
+	enum nft_registers	sreg:8;
+	enum nft_registers	dreg:8;
+	u8			klen;
+	u8			dlen;
+	u16			flags;
+};
+
+struct nft_hash_elem {
+	struct hlist_node	hnode;
+	struct nft_data		key;
+	struct nft_data		data[];
+};
+
+static u32 nft_hash_rnd __read_mostly;
+static bool nft_hash_rnd_initted __read_mostly;
+
+static unsigned int nft_hash_data(const struct nft_data *data,
+				  unsigned int hsize, unsigned int len)
+{
+	unsigned int h;
+
+	// FIXME: can we reasonably guarantee the upper bits are fixed?
+	h = jhash2(data->data, len >> 2, nft_hash_rnd);
+	return ((u64)h * hsize) >> 32;
+}
+
+static void nft_hash_eval(const struct nft_expr *expr,
+			  struct nft_data data[NFT_REG_MAX + 1],
+			  const struct nft_pktinfo *pkt)
+{
+	const struct nft_hash *priv = nft_expr_priv(expr);
+	const struct nft_hash_elem *elem;
+	const struct nft_data *key = &data[priv->sreg];
+	unsigned int h;
+
+	h = nft_hash_data(key, priv->hsize, priv->klen);
+	hlist_for_each_entry(elem, &priv->hash[h], hnode) {
+		if (nft_data_cmp(&elem->key, key, priv->klen))
+			continue;
+		if (priv->flags & NFT_HASH_MAP)
+			nft_data_copy(&data[priv->dreg], elem->data);
+		return;
+	}
+	data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+}
+
+static void nft_hash_elem_destroy(const struct nft_expr *expr,
+				  struct nft_hash_elem *elem)
+{
+	const struct nft_hash *priv = nft_expr_priv(expr);
+
+	nft_data_uninit(&elem->key, NFT_DATA_VALUE);
+	if (priv->flags & NFT_HASH_MAP)
+		nft_data_uninit(elem->data, nft_dreg_to_type(priv->dreg));
+	kfree(elem);
+}
+
+static const struct nla_policy nft_he_policy[NFTA_HE_MAX + 1] = {
+	[NFTA_HE_KEY]		= { .type = NLA_NESTED },
+	[NFTA_HE_DATA]		= { .type = NLA_NESTED },
+};
+
+static int nft_hash_elem_init(const struct nft_ctx *ctx,
+			      const struct nft_expr *expr,
+			      const struct nlattr *nla,
+			      struct nft_hash_elem **new)
+{
+	struct nft_hash *priv = nft_expr_priv(expr);
+	struct nlattr *tb[NFTA_HE_MAX + 1];
+	struct nft_hash_elem *elem;
+	struct nft_data_desc d1, d2;
+	unsigned int size;
+	int err;
+
+	err = nla_parse_nested(tb, NFTA_HE_MAX, nla, nft_he_policy);
+	if (err < 0)
+		return err;
+
+	if (tb[NFTA_HE_KEY] == NULL)
+		return -EINVAL;
+	size = sizeof(*elem);
+
+	if (priv->flags & NFT_HASH_MAP) {
+		if (tb[NFTA_HE_DATA] == NULL)
+			return -EINVAL;
+		size += sizeof(elem->data[0]);
+	} else {
+		if (tb[NFTA_HE_DATA] != NULL)
+			return -EINVAL;
+	}
+
+	elem = kzalloc(size, GFP_KERNEL);
+	if (elem == NULL)
+		return -ENOMEM;
+
+	err = nft_data_init(ctx, &elem->key, &d1, tb[NFTA_HE_KEY]);
+	if (err < 0)
+		goto err1;
+	err = -EINVAL;
+	if (d1.type != NFT_DATA_VALUE || d1.len != priv->klen)
+		goto err2;
+
+	if (tb[NFTA_HE_DATA] != NULL) {
+		err = nft_data_init(ctx, elem->data, &d2, tb[NFTA_HE_DATA]);
+		if (err < 0)
+			goto err2;
+		err = nft_validate_data_load(ctx, priv->dreg, elem->data, d2.type);
+		if (err < 0)
+			goto err3;
+	}
+
+	*new = elem;
+	return 0;
+
+err3:
+	nft_data_uninit(elem->data, d2.type);
+err2:
+	nft_data_uninit(&elem->key, d1.type);
+err1:
+	kfree(elem);
+	return err;
+}
+
+static int nft_hash_elem_dump(struct sk_buff *skb, const struct nft_expr *expr,
+			      const struct nft_hash_elem *elem)
+
+{
+	const struct nft_hash *priv = nft_expr_priv(expr);
+	struct nlattr *nest;
+
+	nest = nla_nest_start(skb, NFTA_LIST_ELEM);
+	if (nest == NULL)
+		goto nla_put_failure;
+
+	if (nft_data_dump(skb, NFTA_HE_KEY, &elem->key,
+			  NFT_DATA_VALUE, priv->klen) < 0)
+		goto nla_put_failure;
+
+	if (priv->flags & NFT_HASH_MAP) {
+		if (nft_data_dump(skb, NFTA_HE_DATA, elem->data,
+				  NFT_DATA_VALUE, priv->dlen) < 0)
+			goto nla_put_failure;
+	}
+
+	nla_nest_end(skb, nest);
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static void nft_hash_destroy(const struct nft_ctx *ctx,
+			     const struct nft_expr *expr)
+{
+	const struct nft_hash *priv = nft_expr_priv(expr);
+	const struct hlist_node *next;
+	struct nft_hash_elem *elem;
+	unsigned int i;
+
+	for (i = 0; i < priv->hsize; i++) {
+		hlist_for_each_entry_safe(elem, next, &priv->hash[i], hnode) {
+			hlist_del(&elem->hnode);
+			nft_hash_elem_destroy(expr, elem);
+		}
+	}
+	kfree(priv->hash);
+}
+
+static const struct nla_policy nft_hash_policy[NFTA_HASH_MAX + 1] = {
+	[NFTA_HASH_FLAGS]	= { .type = NLA_U32 },
+	[NFTA_HASH_SREG]	= { .type = NLA_U32 },
+	[NFTA_HASH_DREG]	= { .type = NLA_U32 },
+	[NFTA_HASH_KLEN]	= { .type = NLA_U32 },
+	[NFTA_HASH_ELEMENTS]	= { .type = NLA_NESTED },
+};
+
+static int nft_hash_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
+			 const struct nlattr * const tb[])
+{
+	struct nft_hash *priv = nft_expr_priv(expr);
+	struct nft_hash_elem *elem, *uninitialized_var(new);
+	const struct nlattr *nla;
+	unsigned int cnt, i;
+	unsigned int h;
+	int err, rem;
+
+	if (unlikely(!nft_hash_rnd_initted)) {
+		get_random_bytes(&nft_hash_rnd, 4);
+		nft_hash_rnd_initted = true;
+	}
+
+	if (tb[NFTA_HASH_SREG] == NULL ||
+	    tb[NFTA_HASH_KLEN] == NULL ||
+	    tb[NFTA_HASH_ELEMENTS] == NULL)
+		return -EINVAL;
+
+	if (tb[NFTA_HASH_FLAGS] != NULL) {
+		priv->flags = ntohl(nla_get_be32(tb[NFTA_HASH_FLAGS]));
+		if (priv->flags & ~NFT_HASH_MAP)
+			return -EINVAL;
+	}
+
+	priv->sreg = ntohl(nla_get_be32(tb[NFTA_HASH_SREG]));
+	err = nft_validate_input_register(priv->sreg);
+	if (err < 0)
+		return err;
+
+	if (tb[NFTA_HASH_DREG] != NULL) {
+		if (!(priv->flags & NFT_HASH_MAP))
+			return -EINVAL;
+		priv->dreg = ntohl(nla_get_be32(tb[NFTA_HASH_DREG]));
+		err = nft_validate_output_register(priv->dreg);
+		if (err < 0)
+			return err;
+	}
+
+	priv->klen = ntohl(nla_get_be32(tb[NFTA_HASH_KLEN]));
+	if (priv->klen == 0)
+		return -EINVAL;
+
+	cnt = 0;
+	nla_for_each_nested(nla, tb[NFTA_HASH_ELEMENTS], rem) {
+		if (nla_type(nla) != NFTA_LIST_ELEM)
+			return -EINVAL;
+		cnt++;
+	}
+
+	/* Aim for a load factor of 0.75 */
+	cnt = cnt * 4 / 3;
+
+	priv->hash = kcalloc(cnt, sizeof(struct hlist_head), GFP_KERNEL);
+	if (priv->hash == NULL)
+		return -ENOMEM;
+	priv->hsize = cnt;
+
+	for (i = 0; i < cnt; i++)
+		INIT_HLIST_HEAD(&priv->hash[i]);
+
+	err = -ENOMEM;
+	nla_for_each_nested(nla, tb[NFTA_HASH_ELEMENTS], rem) {
+		err = nft_hash_elem_init(ctx, expr, nla, &new);
+		if (err < 0)
+			goto err1;
+
+		h = nft_hash_data(&new->key, priv->hsize, priv->klen);
+		hlist_for_each_entry(elem, &priv->hash[h], hnode) {
+			if (nft_data_cmp(&elem->key, &new->key, priv->klen))
+				continue;
+			nft_hash_elem_destroy(expr, new);
+			err = -EEXIST;
+			goto err1;
+		}
+		hlist_add_head(&new->hnode, &priv->hash[h]);
+	}
+	return 0;
+
+err1:
+	nft_hash_destroy(ctx, expr);
+	return err;
+}
+
+static int nft_hash_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_hash *priv = nft_expr_priv(expr);
+	const struct nft_hash_elem *elem;
+	struct nlattr *list;
+	unsigned int i;
+
+	if (priv->flags)
+		if (nla_put_be32(skb, NFTA_HASH_FLAGS, htonl(priv->flags)))
+			goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_HASH_SREG, htonl(priv->sreg)))
+		goto nla_put_failure;
+	if (priv->flags & NFT_HASH_MAP)
+		if (nla_put_be32(skb, NFTA_HASH_DREG, htonl(priv->dreg)))
+			goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_HASH_KLEN, htonl(priv->klen)))
+		goto nla_put_failure;
+
+	list = nla_nest_start(skb, NFTA_HASH_ELEMENTS);
+	if (list == NULL)
+		goto nla_put_failure;
+
+	for (i = 0; i < priv->hsize; i++) {
+		hlist_for_each_entry(elem, &priv->hash[i], hnode) {
+			if (nft_hash_elem_dump(skb, expr, elem) < 0)
+				goto nla_put_failure;
+		}
+	}
+
+	nla_nest_end(skb, list);
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static struct nft_expr_ops nft_hash_ops __read_mostly = {
+	.name		= "hash",
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_hash)),
+	.owner		= THIS_MODULE,
+	.eval		= nft_hash_eval,
+	.init		= nft_hash_init,
+	.destroy	= nft_hash_destroy,
+	.dump		= nft_hash_dump,
+	.policy		= nft_hash_policy,
+	.maxattr	= NFTA_HASH_MAX,
+};
+
+static int __init nft_hash_module_init(void)
+{
+	return nft_register_expr(&nft_hash_ops);
+}
+
+static void __exit nft_hash_module_exit(void)
+{
+	nft_unregister_expr(&nft_hash_ops);
+}
+
+module_init(nft_hash_module_init);
+module_exit(nft_hash_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_EXPR("hash");
diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c
new file mode 100644
index 000000000000..3bf42c3cc49a
--- /dev/null
+++ b/net/netfilter/nft_immediate.c
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+
+struct nft_immediate_expr {
+	struct nft_data		data;
+	enum nft_registers	dreg:8;
+	u8			dlen;
+};
+
+static void nft_immediate_eval(const struct nft_expr *expr,
+			       struct nft_data data[NFT_REG_MAX + 1],
+			       const struct nft_pktinfo *pkt)
+{
+	const struct nft_immediate_expr *priv = nft_expr_priv(expr);
+
+	nft_data_copy(&data[priv->dreg], &priv->data);
+}
+
+static const struct nla_policy nft_immediate_policy[NFTA_IMMEDIATE_MAX + 1] = {
+	[NFTA_IMMEDIATE_DREG]	= { .type = NLA_U32 },
+	[NFTA_IMMEDIATE_DATA]	= { .type = NLA_NESTED },
+};
+
+static int nft_immediate_init(const struct nft_ctx *ctx,
+			      const struct nft_expr *expr,
+			      const struct nlattr * const tb[])
+{
+	struct nft_immediate_expr *priv = nft_expr_priv(expr);
+	struct nft_data_desc desc;
+	int err;
+
+	if (tb[NFTA_IMMEDIATE_DREG] == NULL ||
+	    tb[NFTA_IMMEDIATE_DATA] == NULL)
+		return -EINVAL;
+
+	priv->dreg = ntohl(nla_get_be32(tb[NFTA_IMMEDIATE_DREG]));
+	err = nft_validate_output_register(priv->dreg);
+	if (err < 0)
+		return err;
+
+	err = nft_data_init(ctx, &priv->data, &desc, tb[NFTA_IMMEDIATE_DATA]);
+	if (err < 0)
+		return err;
+	priv->dlen = desc.len;
+
+	err = nft_validate_data_load(ctx, priv->dreg, &priv->data, desc.type);
+	if (err < 0)
+		goto err1;
+
+	return 0;
+
+err1:
+	nft_data_uninit(&priv->data, desc.type);
+	return err;
+}
+
+static void nft_immediate_destroy(const struct nft_expr *expr)
+{
+	const struct nft_immediate_expr *priv = nft_expr_priv(expr);
+	return nft_data_uninit(&priv->data, nft_dreg_to_type(priv->dreg));
+}
+
+static int nft_immediate_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_immediate_expr *priv = nft_expr_priv(expr);
+
+	if (nla_put_be32(skb, NFTA_IMMEDIATE_DREG, htonl(priv->dreg)))
+		goto nla_put_failure;
+
+	return nft_data_dump(skb, NFTA_IMMEDIATE_DATA, &priv->data,
+			     nft_dreg_to_type(priv->dreg), priv->dlen);
+
+nla_put_failure:
+	return -1;
+}
+
+static struct nft_expr_ops nft_imm_ops __read_mostly = {
+	.name		= "immediate",
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_immediate_expr)),
+	.owner		= THIS_MODULE,
+	.eval		= nft_immediate_eval,
+	.init		= nft_immediate_init,
+	.destroy	= nft_immediate_destroy,
+	.dump		= nft_immediate_dump,
+	.policy		= nft_immediate_policy,
+	.maxattr	= NFTA_IMMEDIATE_MAX,
+};
+
+int __init nft_immediate_module_init(void)
+{
+	return nft_register_expr(&nft_imm_ops);
+}
+
+void nft_immediate_module_exit(void)
+{
+	nft_unregister_expr(&nft_imm_ops);
+}
diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c
new file mode 100644
index 000000000000..e0e3fc8aebc3
--- /dev/null
+++ b/net/netfilter/nft_limit.c
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+
+static DEFINE_SPINLOCK(limit_lock);
+
+struct nft_limit {
+	u64		tokens;
+	u64		rate;
+	u64		unit;
+	unsigned long	stamp;
+};
+
+static void nft_limit_eval(const struct nft_expr *expr,
+			   struct nft_data data[NFT_REG_MAX + 1],
+			   const struct nft_pktinfo *pkt)
+{
+	struct nft_limit *priv = nft_expr_priv(expr);
+
+	spin_lock_bh(&limit_lock);
+	if (time_after_eq(jiffies, priv->stamp)) {
+		priv->tokens = priv->rate;
+		priv->stamp = jiffies + priv->unit * HZ;
+	}
+
+	if (priv->tokens >= 1) {
+		priv->tokens--;
+		spin_unlock_bh(&limit_lock);
+		return;
+	}
+	spin_unlock_bh(&limit_lock);
+
+	data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+}
+
+static const struct nla_policy nft_limit_policy[NFTA_LIMIT_MAX + 1] = {
+	[NFTA_LIMIT_RATE]	= { .type = NLA_U64 },
+	[NFTA_LIMIT_UNIT]	= { .type = NLA_U64 },
+};
+
+static int nft_limit_init(const struct nft_ctx *ctx,
+			  const struct nft_expr *expr,
+			  const struct nlattr * const tb[])
+{
+	struct nft_limit *priv = nft_expr_priv(expr);
+
+	if (tb[NFTA_LIMIT_RATE] == NULL ||
+	    tb[NFTA_LIMIT_UNIT] == NULL)
+		return -EINVAL;
+
+	priv->rate   = be64_to_cpu(nla_get_be64(tb[NFTA_LIMIT_RATE]));
+	priv->unit   = be64_to_cpu(nla_get_be64(tb[NFTA_LIMIT_UNIT]));
+	priv->stamp  = jiffies + priv->unit * HZ;
+	priv->tokens = priv->rate;
+	return 0;
+}
+
+static int nft_limit_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_limit *priv = nft_expr_priv(expr);
+
+	if (nla_put_be64(skb, NFTA_LIMIT_RATE, cpu_to_be64(priv->rate)))
+		goto nla_put_failure;
+	if (nla_put_be64(skb, NFTA_LIMIT_UNIT, cpu_to_be64(priv->unit)))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static struct nft_expr_ops nft_limit_ops __read_mostly = {
+	.name		= "limit",
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_limit)),
+	.owner		= THIS_MODULE,
+	.eval		= nft_limit_eval,
+	.init		= nft_limit_init,
+	.dump		= nft_limit_dump,
+	.policy		= nft_limit_policy,
+	.maxattr	= NFTA_LIMIT_MAX,
+};
+
+static int __init nft_limit_module_init(void)
+{
+	return nft_register_expr(&nft_limit_ops);
+}
+
+static void __exit nft_limit_module_exit(void)
+{
+	nft_unregister_expr(&nft_limit_ops);
+}
+
+module_init(nft_limit_module_init);
+module_exit(nft_limit_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_EXPR("limit");
diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c
new file mode 100644
index 000000000000..da495c3b1e7e
--- /dev/null
+++ b/net/netfilter/nft_log.c
@@ -0,0 +1,140 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_log.h>
+#include <linux/netdevice.h>
+
+static const char *nft_log_null_prefix = "";
+
+struct nft_log {
+	struct nf_loginfo	loginfo;
+	char			*prefix;
+	int			family;
+};
+
+static void nft_log_eval(const struct nft_expr *expr,
+			 struct nft_data data[NFT_REG_MAX + 1],
+			 const struct nft_pktinfo *pkt)
+{
+	const struct nft_log *priv = nft_expr_priv(expr);
+	struct net *net = dev_net(pkt->in ? pkt->in : pkt->out);
+
+	nf_log_packet(net, priv->family, pkt->hooknum, pkt->skb, pkt->in,
+		      pkt->out, &priv->loginfo, "%s", priv->prefix);
+}
+
+static const struct nla_policy nft_log_policy[NFTA_LOG_MAX + 1] = {
+	[NFTA_LOG_GROUP]	= { .type = NLA_U16 },
+	[NFTA_LOG_PREFIX]	= { .type = NLA_STRING },
+	[NFTA_LOG_SNAPLEN]	= { .type = NLA_U32 },
+	[NFTA_LOG_QTHRESHOLD]	= { .type = NLA_U16 },
+};
+
+static int nft_log_init(const struct nft_ctx *ctx,
+			const struct nft_expr *expr,
+			const struct nlattr * const tb[])
+{
+	struct nft_log *priv = nft_expr_priv(expr);
+	struct nf_loginfo *li = &priv->loginfo;
+	const struct nlattr *nla;
+
+	priv->family = ctx->afi->family;
+
+	nla = tb[NFTA_LOG_PREFIX];
+	if (nla != NULL) {
+		priv->prefix = kmalloc(nla_len(nla) + 1, GFP_KERNEL);
+		if (priv->prefix == NULL)
+			return -ENOMEM;
+		nla_strlcpy(priv->prefix, nla, nla_len(nla) + 1);
+	} else
+		priv->prefix = (char *)nft_log_null_prefix;
+
+	li->type = NF_LOG_TYPE_ULOG;
+	if (tb[NFTA_LOG_GROUP] != NULL)
+		li->u.ulog.group = ntohs(nla_get_be16(tb[NFTA_LOG_GROUP]));
+
+	if (tb[NFTA_LOG_SNAPLEN] != NULL)
+		li->u.ulog.copy_len = ntohl(nla_get_be32(tb[NFTA_LOG_SNAPLEN]));
+	if (tb[NFTA_LOG_QTHRESHOLD] != NULL) {
+		li->u.ulog.qthreshold =
+			ntohs(nla_get_be16(tb[NFTA_LOG_QTHRESHOLD]));
+	}
+
+	return 0;
+}
+
+static void nft_log_destroy(const struct nft_expr *expr)
+{
+	struct nft_log *priv = nft_expr_priv(expr);
+
+	if (priv->prefix != nft_log_null_prefix)
+		kfree(priv->prefix);
+}
+
+static int nft_log_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_log *priv = nft_expr_priv(expr);
+	const struct nf_loginfo *li = &priv->loginfo;
+
+	if (priv->prefix != nft_log_null_prefix)
+		if (nla_put_string(skb, NFTA_LOG_PREFIX, priv->prefix))
+			goto nla_put_failure;
+	if (li->u.ulog.group)
+		if (nla_put_be16(skb, NFTA_LOG_GROUP, htons(li->u.ulog.group)))
+			goto nla_put_failure;
+	if (li->u.ulog.copy_len)
+		if (nla_put_be32(skb, NFTA_LOG_SNAPLEN,
+				 htonl(li->u.ulog.copy_len)))
+			goto nla_put_failure;
+	if (li->u.ulog.qthreshold)
+		if (nla_put_be16(skb, NFTA_LOG_QTHRESHOLD,
+				 htons(li->u.ulog.qthreshold)))
+			goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static struct nft_expr_ops nft_log_ops __read_mostly = {
+	.name		= "log",
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_log)),
+	.owner		= THIS_MODULE,
+	.eval		= nft_log_eval,
+	.init		= nft_log_init,
+	.destroy	= nft_log_destroy,
+	.dump		= nft_log_dump,
+	.policy		= nft_log_policy,
+	.maxattr	= NFTA_LOG_MAX,
+};
+
+static int __init nft_log_module_init(void)
+{
+	return nft_register_expr(&nft_log_ops);
+}
+
+static void __exit nft_log_module_exit(void)
+{
+	nft_unregister_expr(&nft_log_ops);
+}
+
+module_init(nft_log_module_init);
+module_exit(nft_log_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_EXPR("log");
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
new file mode 100644
index 000000000000..96735aa2f039
--- /dev/null
+++ b/net/netfilter/nft_meta.c
@@ -0,0 +1,222 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/dst.h>
+#include <net/sock.h>
+#include <net/tcp_states.h> /* for TCP_TIME_WAIT */
+#include <net/netfilter/nf_tables.h>
+
+struct nft_meta {
+	enum nft_meta_keys	key:8;
+	enum nft_registers	dreg:8;
+};
+
+static void nft_meta_eval(const struct nft_expr *expr,
+			  struct nft_data data[NFT_REG_MAX + 1],
+			  const struct nft_pktinfo *pkt)
+{
+	const struct nft_meta *priv = nft_expr_priv(expr);
+	const struct sk_buff *skb = pkt->skb;
+	const struct net_device *in = pkt->in, *out = pkt->out;
+	struct nft_data *dest = &data[priv->dreg];
+
+	switch (priv->key) {
+	case NFT_META_LEN:
+		dest->data[0] = skb->len;
+		break;
+	case NFT_META_PROTOCOL:
+		*(__be16 *)dest->data = skb->protocol;
+		break;
+	case NFT_META_PRIORITY:
+		dest->data[0] = skb->priority;
+		break;
+	case NFT_META_MARK:
+		dest->data[0] = skb->mark;
+		break;
+	case NFT_META_IIF:
+		if (in == NULL)
+			goto err;
+		dest->data[0] = in->ifindex;
+		break;
+	case NFT_META_OIF:
+		if (out == NULL)
+			goto err;
+		dest->data[0] = out->ifindex;
+		break;
+	case NFT_META_IIFNAME:
+		if (in == NULL)
+			goto err;
+		strncpy((char *)dest->data, in->name, sizeof(dest->data));
+		break;
+	case NFT_META_OIFNAME:
+		if (out == NULL)
+			goto err;
+		strncpy((char *)dest->data, out->name, sizeof(dest->data));
+		break;
+	case NFT_META_IIFTYPE:
+		if (in == NULL)
+			goto err;
+		*(u16 *)dest->data = in->type;
+		break;
+	case NFT_META_OIFTYPE:
+		if (out == NULL)
+			goto err;
+		*(u16 *)dest->data = out->type;
+		break;
+	case NFT_META_SKUID:
+		if (skb->sk == NULL || skb->sk->sk_state == TCP_TIME_WAIT)
+			goto err;
+
+		read_lock_bh(&skb->sk->sk_callback_lock);
+		if (skb->sk->sk_socket == NULL ||
+		    skb->sk->sk_socket->file == NULL) {
+			read_unlock_bh(&skb->sk->sk_callback_lock);
+			goto err;
+		}
+
+		dest->data[0] =
+			from_kuid_munged(&init_user_ns,
+				skb->sk->sk_socket->file->f_cred->fsuid);
+		read_unlock_bh(&skb->sk->sk_callback_lock);
+		break;
+	case NFT_META_SKGID:
+		if (skb->sk == NULL || skb->sk->sk_state == TCP_TIME_WAIT)
+			goto err;
+
+		read_lock_bh(&skb->sk->sk_callback_lock);
+		if (skb->sk->sk_socket == NULL ||
+		    skb->sk->sk_socket->file == NULL) {
+			read_unlock_bh(&skb->sk->sk_callback_lock);
+			goto err;
+		}
+		dest->data[0] =
+			from_kgid_munged(&init_user_ns,
+				 skb->sk->sk_socket->file->f_cred->fsgid);
+		read_unlock_bh(&skb->sk->sk_callback_lock);
+		break;
+#ifdef CONFIG_NET_CLS_ROUTE
+	case NFT_META_RTCLASSID: {
+		const struct dst_entry *dst = skb_dst(skb);
+
+		if (dst == NULL)
+			goto err;
+		dest->data[0] = dst->tclassid;
+		break;
+	}
+#endif
+#ifdef CONFIG_NETWORK_SECMARK
+	case NFT_META_SECMARK:
+		dest->data[0] = skb->secmark;
+		break;
+#endif
+	default:
+		WARN_ON(1);
+		goto err;
+	}
+	return;
+
+err:
+	data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+}
+
+static const struct nla_policy nft_meta_policy[NFTA_META_MAX + 1] = {
+	[NFTA_META_DREG]	= { .type = NLA_U32 },
+	[NFTA_META_KEY]		= { .type = NLA_U32 },
+};
+
+static int nft_meta_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
+			 const struct nlattr * const tb[])
+{
+	struct nft_meta *priv = nft_expr_priv(expr);
+	int err;
+
+	if (tb[NFTA_META_DREG] == NULL ||
+	    tb[NFTA_META_KEY] == NULL)
+		return -EINVAL;
+
+	priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY]));
+	switch (priv->key) {
+	case NFT_META_LEN:
+	case NFT_META_PROTOCOL:
+	case NFT_META_PRIORITY:
+	case NFT_META_MARK:
+	case NFT_META_IIF:
+	case NFT_META_OIF:
+	case NFT_META_IIFNAME:
+	case NFT_META_OIFNAME:
+	case NFT_META_IIFTYPE:
+	case NFT_META_OIFTYPE:
+	case NFT_META_SKUID:
+	case NFT_META_SKGID:
+#ifdef CONFIG_NET_CLS_ROUTE
+	case NFT_META_RTCLASSID:
+#endif
+#ifdef CONFIG_NETWORK_SECMARK
+	case NFT_META_SECMARK:
+#endif
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	priv->dreg = ntohl(nla_get_be32(tb[NFTA_META_DREG]));
+	err = nft_validate_output_register(priv->dreg);
+	if (err < 0)
+		return err;
+	return nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE);
+}
+
+static int nft_meta_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_meta *priv = nft_expr_priv(expr);
+
+	if (nla_put_be32(skb, NFTA_META_DREG, htonl(priv->dreg)))
+		goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_META_KEY, htonl(priv->key)))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static struct nft_expr_ops nft_meta_ops __read_mostly = {
+	.name		= "meta",
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_meta)),
+	.owner		= THIS_MODULE,
+	.eval		= nft_meta_eval,
+	.init		= nft_meta_init,
+	.dump		= nft_meta_dump,
+	.policy		= nft_meta_policy,
+	.maxattr	= NFTA_META_MAX,
+};
+
+static int __init nft_meta_module_init(void)
+{
+	return nft_register_expr(&nft_meta_ops);
+}
+
+static void __exit nft_meta_module_exit(void)
+{
+	nft_unregister_expr(&nft_meta_ops);
+}
+
+module_init(nft_meta_module_init);
+module_exit(nft_meta_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_EXPR("meta");
diff --git a/net/netfilter/nft_meta_target.c b/net/netfilter/nft_meta_target.c
new file mode 100644
index 000000000000..71177df75ffb
--- /dev/null
+++ b/net/netfilter/nft_meta_target.c
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/rbtree.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+
+struct nft_meta {
+	enum nft_meta_keys	key;
+};
+
+static void nft_meta_eval(const struct nft_expr *expr,
+			  struct nft_data *nfres,
+			  struct nft_data *data,
+			  const struct nft_pktinfo *pkt)
+{
+	const struct nft_meta *meta = nft_expr_priv(expr);
+	struct sk_buff *skb = pkt->skb;
+	u32 val = data->data[0];
+
+	switch (meta->key) {
+	case NFT_META_MARK:
+		skb->mark = val;
+		break;
+	case NFT_META_PRIORITY:
+		skb->priority = val;
+		break;
+	case NFT_META_NFTRACE:
+		skb->nf_trace = val;
+		break;
+#ifdef CONFIG_NETWORK_SECMARK
+	case NFT_META_SECMARK:
+		skb->secmark = val;
+		break;
+#endif
+	default:
+		WARN_ON(1);
+	}
+}
+
+static const struct nla_policy nft_meta_policy[NFTA_META_MAX + 1] = {
+	[NFTA_META_KEY]		= { .type = NLA_U32 },
+};
+
+static int nft_meta_init(const struct nft_expr *expr, struct nlattr *tb[])
+{
+	struct nft_meta *meta = nft_expr_priv(expr);
+
+	if (tb[NFTA_META_KEY] == NULL)
+		return -EINVAL;
+
+	meta->key = ntohl(nla_get_be32(tb[NFTA_META_KEY]));
+	switch (meta->key) {
+	case NFT_META_MARK:
+	case NFT_META_PRIORITY:
+	case NFT_META_NFTRACE:
+#ifdef CONFIG_NETWORK_SECMARK
+	case NFT_META_SECMARK:
+#endif
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int nft_meta_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	struct nft_meta *meta = nft_expr_priv(expr);
+
+	NLA_PUT_BE32(skb, NFTA_META_KEY, htonl(meta->key));
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static struct nft_expr_ops meta_target __read_mostly = {
+	.name		= "meta",
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_meta)),
+	.owner		= THIS_MODULE,
+	.eval		= nft_meta_eval,
+	.init		= nft_meta_init,
+	.dump		= nft_meta_dump,
+	.policy		= nft_meta_policy,
+	.maxattr	= NFTA_META_MAX,
+};
+
+static int __init nft_meta_target_init(void)
+{
+	return nft_register_expr(&meta_target);
+}
+
+static void __exit nft_meta_target_exit(void)
+{
+	nft_unregister_expr(&meta_target);
+}
+
+module_init(nft_meta_target_init);
+module_exit(nft_meta_target_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_EXPR("meta");
diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
new file mode 100644
index 000000000000..329f134b3f89
--- /dev/null
+++ b/net/netfilter/nft_payload.c
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+
+struct nft_payload {
+	enum nft_payload_bases	base:8;
+	u8			offset;
+	u8			len;
+	enum nft_registers	dreg:8;
+};
+
+static void nft_payload_eval(const struct nft_expr *expr,
+			     struct nft_data data[NFT_REG_MAX + 1],
+			     const struct nft_pktinfo *pkt)
+{
+	const struct nft_payload *priv = nft_expr_priv(expr);
+	const struct sk_buff *skb = pkt->skb;
+	struct nft_data *dest = &data[priv->dreg];
+	int offset;
+
+	switch (priv->base) {
+	case NFT_PAYLOAD_LL_HEADER:
+		if (!skb_mac_header_was_set(skb))
+			goto err;
+		offset = skb_mac_header(skb) - skb->data;
+		break;
+	case NFT_PAYLOAD_NETWORK_HEADER:
+		offset = skb_network_offset(skb);
+		break;
+	case NFT_PAYLOAD_TRANSPORT_HEADER:
+		offset = skb_transport_offset(skb);
+		break;
+	default:
+		BUG();
+	}
+	offset += priv->offset;
+
+	if (skb_copy_bits(skb, offset, dest->data, priv->len) < 0)
+		goto err;
+	return;
+err:
+	data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+}
+
+static const struct nla_policy nft_payload_policy[NFTA_PAYLOAD_MAX + 1] = {
+	[NFTA_PAYLOAD_DREG]	= { .type = NLA_U32 },
+	[NFTA_PAYLOAD_BASE]	= { .type = NLA_U32 },
+	[NFTA_PAYLOAD_OFFSET]	= { .type = NLA_U32 },
+	[NFTA_PAYLOAD_LEN]	= { .type = NLA_U32 },
+};
+
+static int nft_payload_init(const struct nft_ctx *ctx,
+			    const struct nft_expr *expr,
+			    const struct nlattr * const tb[])
+{
+	struct nft_payload *priv = nft_expr_priv(expr);
+	int err;
+
+	if (tb[NFTA_PAYLOAD_DREG] == NULL ||
+	    tb[NFTA_PAYLOAD_BASE] == NULL ||
+	    tb[NFTA_PAYLOAD_OFFSET] == NULL ||
+	    tb[NFTA_PAYLOAD_LEN] == NULL)
+		return -EINVAL;
+
+	priv->base = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_BASE]));
+	switch (priv->base) {
+	case NFT_PAYLOAD_LL_HEADER:
+	case NFT_PAYLOAD_NETWORK_HEADER:
+	case NFT_PAYLOAD_TRANSPORT_HEADER:
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	priv->offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET]));
+	priv->len    = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN]));
+	if (priv->len == 0 ||
+	    priv->len > FIELD_SIZEOF(struct nft_data, data))
+		return -EINVAL;
+
+	priv->dreg = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_DREG]));
+	err = nft_validate_output_register(priv->dreg);
+	if (err < 0)
+		return err;
+	return nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE);
+}
+
+static int nft_payload_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_payload *priv = nft_expr_priv(expr);
+
+	if (nla_put_be32(skb, NFTA_PAYLOAD_DREG, htonl(priv->dreg)) ||
+	    nla_put_be32(skb, NFTA_PAYLOAD_BASE, htonl(priv->base)) ||
+	    nla_put_be32(skb, NFTA_PAYLOAD_OFFSET, htonl(priv->offset)) ||
+	    nla_put_be32(skb, NFTA_PAYLOAD_LEN, htonl(priv->len)))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static struct nft_expr_ops nft_payload_ops __read_mostly = {
+	.name		= "payload",
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_payload)),
+	.owner		= THIS_MODULE,
+	.eval		= nft_payload_eval,
+	.init		= nft_payload_init,
+	.dump		= nft_payload_dump,
+	.policy		= nft_payload_policy,
+	.maxattr	= NFTA_PAYLOAD_MAX,
+};
+
+int __init nft_payload_module_init(void)
+{
+	return nft_register_expr(&nft_payload_ops);
+}
+
+void nft_payload_module_exit(void)
+{
+	nft_unregister_expr(&nft_payload_ops);
+}
diff --git a/net/netfilter/nft_set.c b/net/netfilter/nft_set.c
new file mode 100644
index 000000000000..7b7c8354c327
--- /dev/null
+++ b/net/netfilter/nft_set.c
@@ -0,0 +1,381 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/rbtree.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+
+struct nft_set {
+	struct rb_root		root;
+	enum nft_registers	sreg:8;
+	enum nft_registers	dreg:8;
+	u8			klen;
+	u8			dlen;
+	u16			flags;
+};
+
+struct nft_set_elem {
+	struct rb_node		node;
+	enum nft_set_elem_flags	flags;
+	struct nft_data		key;
+	struct nft_data		data[];
+};
+
+static void nft_set_eval(const struct nft_expr *expr,
+			 struct nft_data data[NFT_REG_MAX + 1],
+			 const struct nft_pktinfo *pkt)
+{
+	const struct nft_set *priv = nft_expr_priv(expr);
+	const struct rb_node *parent = priv->root.rb_node;
+	const struct nft_set_elem *elem, *interval = NULL;
+	const struct nft_data *key = &data[priv->sreg];
+	int d;
+
+	while (parent != NULL) {
+		elem = rb_entry(parent, struct nft_set_elem, node);
+
+		d = nft_data_cmp(&elem->key, key, priv->klen);
+		if (d < 0) {
+			parent = parent->rb_left;
+			interval = elem;
+		} else if (d > 0)
+			parent = parent->rb_right;
+		else {
+found:
+			if (elem->flags & NFT_SE_INTERVAL_END)
+				goto out;
+			if (priv->flags & NFT_SET_MAP)
+				nft_data_copy(&data[priv->dreg], elem->data);
+			return;
+		}
+	}
+
+	if (priv->flags & NFT_SET_INTERVAL && interval != NULL) {
+		elem = interval;
+		goto found;
+	}
+out:
+	data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+}
+
+static void nft_set_elem_destroy(const struct nft_expr *expr,
+				 struct nft_set_elem *elem)
+{
+	const struct nft_set *priv = nft_expr_priv(expr);
+
+	nft_data_uninit(&elem->key, NFT_DATA_VALUE);
+	if (priv->flags & NFT_SET_MAP)
+		nft_data_uninit(elem->data, nft_dreg_to_type(priv->dreg));
+	kfree(elem);
+}
+
+static const struct nla_policy nft_se_policy[NFTA_SE_MAX + 1] = {
+	[NFTA_SE_KEY]		= { .type = NLA_NESTED },
+	[NFTA_SE_DATA]		= { .type = NLA_NESTED },
+	[NFTA_SE_FLAGS]		= { .type = NLA_U32 },
+};
+
+static int nft_set_elem_init(const struct nft_ctx *ctx,
+			     const struct nft_expr *expr,
+			     const struct nlattr *nla,
+			     struct nft_set_elem **new)
+{
+	struct nft_set *priv = nft_expr_priv(expr);
+	struct nlattr *tb[NFTA_SE_MAX + 1];
+	struct nft_set_elem *elem;
+	struct nft_data_desc d1, d2;
+	enum nft_set_elem_flags flags = 0;
+	unsigned int size;
+	int err;
+
+	err = nla_parse_nested(tb, NFTA_SE_MAX, nla, nft_se_policy);
+	if (err < 0)
+		return err;
+
+	if (tb[NFTA_SE_KEY] == NULL)
+		return -EINVAL;
+
+	if (tb[NFTA_SE_FLAGS] != NULL) {
+		flags = ntohl(nla_get_be32(tb[NFTA_SE_FLAGS]));
+		if (flags & ~NFT_SE_INTERVAL_END)
+			return -EINVAL;
+	}
+
+	size = sizeof(*elem);
+	if (priv->flags & NFT_SET_MAP) {
+		if (tb[NFTA_SE_DATA] == NULL && !(flags & NFT_SE_INTERVAL_END))
+			return -EINVAL;
+		size += sizeof(elem->data[0]);
+	} else {
+		if (tb[NFTA_SE_DATA] != NULL)
+			return -EINVAL;
+	}
+
+	elem = kzalloc(size, GFP_KERNEL);
+	if (elem == NULL)
+		return -ENOMEM;
+	elem->flags = flags;
+
+	err = nft_data_init(ctx, &elem->key, &d1, tb[NFTA_SE_KEY]);
+	if (err < 0)
+		goto err1;
+	err = -EINVAL;
+	if (d1.type != NFT_DATA_VALUE || d1.len != priv->klen)
+		goto err2;
+
+	if (tb[NFTA_SE_DATA] != NULL) {
+		err = nft_data_init(ctx, elem->data, &d2, tb[NFTA_SE_DATA]);
+		if (err < 0)
+			goto err2;
+		err = -EINVAL;
+		if (priv->dreg != NFT_REG_VERDICT && d2.len != priv->dlen)
+			goto err2;
+		err = nft_validate_data_load(ctx, priv->dreg, elem->data, d2.type);
+		if (err < 0)
+			goto err3;
+	}
+
+	*new = elem;
+	return 0;
+
+err3:
+	nft_data_uninit(elem->data, d2.type);
+err2:
+	nft_data_uninit(&elem->key, d1.type);
+err1:
+	kfree(elem);
+	return err;
+}
+
+static int nft_set_elem_dump(struct sk_buff *skb, const struct nft_expr *expr,
+			     const struct nft_set_elem *elem)
+
+{
+	const struct nft_set *priv = nft_expr_priv(expr);
+	struct nlattr *nest;
+
+	nest = nla_nest_start(skb, NFTA_LIST_ELEM);
+	if (nest == NULL)
+		goto nla_put_failure;
+
+	if (nft_data_dump(skb, NFTA_SE_KEY, &elem->key,
+			  NFT_DATA_VALUE, priv->klen) < 0)
+		goto nla_put_failure;
+
+	if (priv->flags & NFT_SET_MAP && !(elem->flags & NFT_SE_INTERVAL_END)) {
+		if (nft_data_dump(skb, NFTA_SE_DATA, elem->data,
+				  nft_dreg_to_type(priv->dreg), priv->dlen) < 0)
+			goto nla_put_failure;
+	}
+
+	if (elem->flags){
+		if (nla_put_be32(skb, NFTA_SE_FLAGS, htonl(elem->flags)))
+			goto nla_put_failure;
+	}
+
+	nla_nest_end(skb, nest);
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static void nft_set_destroy(const struct nft_expr *expr)
+{
+	struct nft_set *priv = nft_expr_priv(expr);
+	struct nft_set_elem *elem;
+	struct rb_node *node;
+
+	while ((node = priv->root.rb_node) != NULL) {
+		rb_erase(node, &priv->root);
+		elem = rb_entry(node, struct nft_set_elem, node);
+		nft_set_elem_destroy(expr, elem);
+	}
+}
+
+static const struct nla_policy nft_set_policy[NFTA_SET_MAX + 1] = {
+	[NFTA_SET_FLAGS]	= { .type = NLA_U32 },
+	[NFTA_SET_SREG]		= { .type = NLA_U32 },
+	[NFTA_SET_DREG]		= { .type = NLA_U32 },
+	[NFTA_SET_KLEN]		= { .type = NLA_U32 },
+	[NFTA_SET_DLEN]		= { .type = NLA_U32 },
+	[NFTA_SET_ELEMENTS]	= { .type = NLA_NESTED },
+};
+
+static int nft_set_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
+			const struct nlattr * const tb[])
+{
+	struct nft_set *priv = nft_expr_priv(expr);
+	struct nft_set_elem *elem, *uninitialized_var(new);
+	struct rb_node *parent, **p;
+	const struct nlattr *nla;
+	int err, rem, d;
+
+	if (tb[NFTA_SET_SREG] == NULL ||
+	    tb[NFTA_SET_KLEN] == NULL ||
+	    tb[NFTA_SET_ELEMENTS] == NULL)
+		return -EINVAL;
+
+	priv->root = RB_ROOT;
+
+	if (tb[NFTA_SET_FLAGS] != NULL) {
+		priv->flags = ntohl(nla_get_be32(tb[NFTA_SET_FLAGS]));
+		if (priv->flags & ~(NFT_SET_INTERVAL | NFT_SET_MAP))
+			return -EINVAL;
+	}
+
+	priv->sreg = ntohl(nla_get_be32(tb[NFTA_SET_SREG]));
+	err = nft_validate_input_register(priv->sreg);
+	if (err < 0)
+		return err;
+
+	if (tb[NFTA_SET_DREG] != NULL) {
+		if (!(priv->flags & NFT_SET_MAP))
+			return -EINVAL;
+		if (tb[NFTA_SET_DLEN] == NULL)
+			return -EINVAL;
+
+		priv->dreg = ntohl(nla_get_be32(tb[NFTA_SET_DREG]));
+		err = nft_validate_output_register(priv->dreg);
+		if (err < 0)
+			return err;
+
+		if (priv->dreg == NFT_REG_VERDICT)
+			priv->dlen = FIELD_SIZEOF(struct nft_data, data);
+		else {
+			priv->dlen = ntohl(nla_get_be32(tb[NFTA_SET_DLEN]));
+			if (priv->dlen == 0 ||
+			    priv->dlen > FIELD_SIZEOF(struct nft_data, data))
+				return -EINVAL;
+		}
+	} else {
+		if (priv->flags & NFT_SET_MAP)
+			return -EINVAL;
+		if (tb[NFTA_SET_DLEN] != NULL)
+			return -EINVAL;
+	}
+
+	priv->klen = ntohl(nla_get_be32(tb[NFTA_SET_KLEN]));
+	if (priv->klen == 0 ||
+	    priv->klen > FIELD_SIZEOF(struct nft_data, data))
+		return -EINVAL;
+
+	nla_for_each_nested(nla, tb[NFTA_SET_ELEMENTS], rem) {
+		err = -EINVAL;
+		if (nla_type(nla) != NFTA_LIST_ELEM)
+			goto err1;
+
+		err = nft_set_elem_init(ctx, expr, nla, &new);
+		if (err < 0)
+			goto err1;
+
+		parent = NULL;
+		p = &priv->root.rb_node;
+		while (*p != NULL) {
+			parent = *p;
+			elem = rb_entry(parent, struct nft_set_elem, node);
+			d = nft_data_cmp(&elem->key, &new->key, priv->klen);
+			if (d < 0)
+				p = &parent->rb_left;
+			else if (d > 0)
+				p = &parent->rb_right;
+			else {
+				err = -EEXIST;
+				goto err2;
+			}
+		}
+		rb_link_node(&new->node, parent, p);
+		rb_insert_color(&new->node, &priv->root);
+	}
+
+	return 0;
+
+err2:
+	nft_set_elem_destroy(expr, new);
+err1:
+	nft_set_destroy(expr);
+	return err;
+}
+
+static int nft_set_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	struct nft_set *priv = nft_expr_priv(expr);
+	const struct nft_set_elem *elem;
+	struct rb_node *node;
+	struct nlattr *list;
+
+	if (priv->flags) {
+		if (nla_put_be32(skb, NFTA_SET_FLAGS, htonl(priv->flags)))
+			goto nla_put_failure;
+	}
+
+	if (nla_put_be32(skb, NFTA_SET_SREG, htonl(priv->sreg)))
+		goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_SET_KLEN, htonl(priv->klen)))
+		goto nla_put_failure;
+
+	if (priv->flags & NFT_SET_MAP) {
+		if (nla_put_be32(skb, NFTA_SET_DREG, htonl(priv->dreg)))
+			goto nla_put_failure;
+		if (nla_put_be32(skb, NFTA_SET_DLEN, htonl(priv->dlen)))
+			goto nla_put_failure;
+	}
+
+	list = nla_nest_start(skb, NFTA_SET_ELEMENTS);
+	if (list == NULL)
+		goto nla_put_failure;
+
+	for (node = rb_first(&priv->root); node; node = rb_next(node)) {
+		elem = rb_entry(node, struct nft_set_elem, node);
+		if (nft_set_elem_dump(skb, expr, elem) < 0)
+			goto nla_put_failure;
+	}
+
+	nla_nest_end(skb, list);
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static struct nft_expr_ops nft_set_ops __read_mostly = {
+	.name		= "set",
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_set)),
+	.owner		= THIS_MODULE,
+	.eval		= nft_set_eval,
+	.init		= nft_set_init,
+	.destroy	= nft_set_destroy,
+	.dump		= nft_set_dump,
+	.policy		= nft_set_policy,
+	.maxattr	= NFTA_SET_MAX,
+};
+
+static int __init nft_set_module_init(void)
+{
+	return nft_register_expr(&nft_set_ops);
+}
+
+static void __exit nft_set_module_exit(void)
+{
+	nft_unregister_expr(&nft_set_ops);
+}
+
+module_init(nft_set_module_init);
+module_exit(nft_set_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_EXPR("set");
-- 
cgit v1.2.3


From 0628b123c96d126e617beb3b4fd63b874d0e4f17 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Mon, 14 Oct 2013 11:05:33 +0200
Subject: netfilter: nfnetlink: add batch support and use it from nf_tables

This patch adds a batch support to nfnetlink. Basically, it adds
two new control messages:

* NFNL_MSG_BATCH_BEGIN, that indicates the beginning of a batch,
  the nfgenmsg->res_id indicates the nfnetlink subsystem ID.

* NFNL_MSG_BATCH_END, that results in the invocation of the
  ss->commit callback function. If not specified or an error
  ocurred in the batch, the ss->abort function is invoked
  instead.

The end message represents the commit operation in nftables, the
lack of end message results in an abort. This patch also adds the
.call_batch function that is only called from the batch receival
path.

This patch adds atomic rule updates and dumps based on
bitmask generations. This allows to atomically commit a set of
rule-set updates incrementally without altering the internal
state of existing nf_tables expressions/matches/targets.

The idea consists of using a generation cursor of 1 bit and
a bitmask of 2 bits per rule. Assuming the gencursor is 0,
then the genmask (expressed as a bitmask) can be interpreted
as:

00 active in the present, will be active in the next generation.
01 inactive in the present, will be active in the next generation.
10 active in the present, will be deleted in the next generation.
 ^
 gencursor

Once you invoke the transition to the next generation, the global
gencursor is updated:

00 active in the present, will be active in the next generation.
01 active in the present, needs to zero its future, it becomes 00.
10 inactive in the present, delete now.
^
gencursor

If a dump is in progress and nf_tables enters a new generation,
the dump will stop and return -EBUSY to let userspace know that
it has to retry again. In order to invalidate dumps, a global
genctr counter is increased everytime nf_tables enters a new
generation.

This new operation can be used from the user-space utility
that controls the firewall, eg.

nft -f restore

The rule updates contained in `file' will be applied atomically.

cat file
-----
add filter INPUT ip saddr 1.1.1.1 counter accept #1
del filter INPUT ip daddr 2.2.2.2 counter drop   #2
-EOF-

Note that the rule 1 will be inactive until the transition to the
next generation, the rule 2 will be evicted in the next generation.

There is a penalty during the rule update due to the branch
misprediction in the packet matching framework. But that should be
quickly resolved once the iteration over the commit list that
contain rules that require updates is finished.

Event notification happens once the rule-set update has been
committed. So we skip notifications is case the rule-set update
is aborted, which can happen in case that the rule-set is tested
to apply correctly.

This patch squashed the following patches from Pablo:

* nf_tables: atomic rule updates and dumps
* nf_tables: get rid of per rule list_head for commits
* nf_tables: use per netns commit list
* nfnetlink: add batch support and use it from nf_tables
* nf_tables: all rule updates are transactional
* nf_tables: attach replacement rule after stale one
* nf_tables: do not allow deletion/replacement of stale rules
* nf_tables: remove unused NFTA_RULE_FLAGS

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/nfnetlink.h      |   5 +
 include/net/netfilter/nf_tables.h        |  25 +++-
 include/net/netns/nftables.h             |   3 +
 include/uapi/linux/netfilter/nfnetlink.h |   4 +
 net/netfilter/nf_tables_api.c            | 202 ++++++++++++++++++++++++++++---
 net/netfilter/nf_tables_core.c           |  10 ++
 net/netfilter/nfnetlink.c                | 175 +++++++++++++++++++++++++-
 7 files changed, 401 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h
index 4f68cd7141d2..28c74367e900 100644
--- a/include/linux/netfilter/nfnetlink.h
+++ b/include/linux/netfilter/nfnetlink.h
@@ -14,6 +14,9 @@ struct nfnl_callback {
 	int (*call_rcu)(struct sock *nl, struct sk_buff *skb, 
 		    const struct nlmsghdr *nlh,
 		    const struct nlattr * const cda[]);
+	int (*call_batch)(struct sock *nl, struct sk_buff *skb,
+			  const struct nlmsghdr *nlh,
+			  const struct nlattr * const cda[]);
 	const struct nla_policy *policy;	/* netlink attribute policy */
 	const u_int16_t attr_count;		/* number of nlattr's */
 };
@@ -23,6 +26,8 @@ struct nfnetlink_subsystem {
 	__u8 subsys_id;			/* nfnetlink subsystem ID */
 	__u8 cb_count;			/* number of callbacks */
 	const struct nfnl_callback *cb;	/* callback for individual types */
+	int (*commit)(struct sk_buff *skb);
+	int (*abort)(struct sk_buff *skb);
 };
 
 int nfnetlink_subsys_register(const struct nfnetlink_subsystem *n);
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index d3272e943aac..975ad3c573c7 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -323,18 +323,39 @@ static inline void *nft_expr_priv(const struct nft_expr *expr)
  *	@list: used internally
  *	@rcu_head: used internally for rcu
  *	@handle: rule handle
+ *	@genmask: generation mask
  *	@dlen: length of expression data
  *	@data: expression data
  */
 struct nft_rule {
 	struct list_head		list;
 	struct rcu_head			rcu_head;
-	u64				handle:48,
+	u64				handle:46,
+					genmask:2,
 					dlen:16;
 	unsigned char			data[]
 		__attribute__((aligned(__alignof__(struct nft_expr))));
 };
 
+/**
+ *	struct nft_rule_trans - nf_tables rule update in transaction
+ *
+ *	@list: used internally
+ *	@rule: rule that needs to be updated
+ *	@chain: chain that this rule belongs to
+ *	@table: table for which this chain applies
+ *	@nlh: netlink header of the message that contain this update
+ *	@family: family expressesed as AF_*
+ */
+struct nft_rule_trans {
+	struct list_head		list;
+	struct nft_rule			*rule;
+	const struct nft_chain		*chain;
+	const struct nft_table		*table;
+	const struct nlmsghdr		*nlh;
+	u8				family;
+};
+
 static inline struct nft_expr *nft_expr_first(const struct nft_rule *rule)
 {
 	return (struct nft_expr *)&rule->data[0];
@@ -370,6 +391,7 @@ enum nft_chain_flags {
  *	@rules: list of rules in the chain
  *	@list: used internally
  *	@rcu_head: used internally
+ *	@net: net namespace that this chain belongs to
  *	@handle: chain handle
  *	@flags: bitmask of enum nft_chain_flags
  *	@use: number of jump references to this chain
@@ -380,6 +402,7 @@ struct nft_chain {
 	struct list_head		rules;
 	struct list_head		list;
 	struct rcu_head			rcu_head;
+	struct net			*net;
 	u64				handle;
 	u8				flags;
 	u16				use;
diff --git a/include/net/netns/nftables.h b/include/net/netns/nftables.h
index a98b1c5d9913..08a4248a12b5 100644
--- a/include/net/netns/nftables.h
+++ b/include/net/netns/nftables.h
@@ -7,9 +7,12 @@ struct nft_af_info;
 
 struct netns_nftables {
 	struct list_head	af_info;
+	struct list_head	commit_list;
 	struct nft_af_info	*ipv4;
 	struct nft_af_info	*ipv6;
 	struct nft_af_info	*bridge;
+	u8			gencursor;
+	u8			genctr;
 };
 
 #endif
diff --git a/include/uapi/linux/netfilter/nfnetlink.h b/include/uapi/linux/netfilter/nfnetlink.h
index 288959404d54..596ddd45253c 100644
--- a/include/uapi/linux/netfilter/nfnetlink.h
+++ b/include/uapi/linux/netfilter/nfnetlink.h
@@ -57,4 +57,8 @@ struct nfgenmsg {
 #define NFNL_SUBSYS_NFT_COMPAT		11
 #define NFNL_SUBSYS_COUNT		12
 
+/* Reserved control nfnetlink messages */
+#define NFNL_MSG_BATCH_BEGIN		NLMSG_MIN_TYPE
+#define NFNL_MSG_BATCH_END		NLMSG_MIN_TYPE+1
+
 #endif /* _UAPI_NFNETLINK_H */
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 0f140663ec71..79e1418a6043 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -978,6 +978,7 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
 
 	INIT_LIST_HEAD(&chain->rules);
 	chain->handle = nf_tables_alloc_handle(table);
+	chain->net = net;
 	nla_strlcpy(chain->name, name, NFT_CHAIN_MAXNAMELEN);
 
 	if (!(table->flags & NFT_TABLE_F_DORMANT) &&
@@ -1371,6 +1372,41 @@ err:
 	return err;
 }
 
+static inline bool
+nft_rule_is_active(struct net *net, const struct nft_rule *rule)
+{
+	return (rule->genmask & (1 << net->nft.gencursor)) == 0;
+}
+
+static inline int gencursor_next(struct net *net)
+{
+	return net->nft.gencursor+1 == 1 ? 1 : 0;
+}
+
+static inline int
+nft_rule_is_active_next(struct net *net, const struct nft_rule *rule)
+{
+	return (rule->genmask & (1 << gencursor_next(net))) == 0;
+}
+
+static inline void
+nft_rule_activate_next(struct net *net, struct nft_rule *rule)
+{
+	/* Now inactive, will be active in the future */
+	rule->genmask = (1 << net->nft.gencursor);
+}
+
+static inline void
+nft_rule_disactivate_next(struct net *net, struct nft_rule *rule)
+{
+	rule->genmask = (1 << gencursor_next(net));
+}
+
+static inline void nft_rule_clear(struct net *net, struct nft_rule *rule)
+{
+	rule->genmask = 0;
+}
+
 static int nf_tables_dump_rules(struct sk_buff *skb,
 				struct netlink_callback *cb)
 {
@@ -1382,6 +1418,8 @@ static int nf_tables_dump_rules(struct sk_buff *skb,
 	unsigned int idx = 0, s_idx = cb->args[0];
 	struct net *net = sock_net(skb->sk);
 	int family = nfmsg->nfgen_family;
+	u8 genctr = ACCESS_ONCE(net->nft.genctr);
+	u8 gencursor = ACCESS_ONCE(net->nft.gencursor);
 
 	list_for_each_entry(afi, &net->nft.af_info, list) {
 		if (family != NFPROTO_UNSPEC && family != afi->family)
@@ -1390,6 +1428,8 @@ static int nf_tables_dump_rules(struct sk_buff *skb,
 		list_for_each_entry(table, &afi->tables, list) {
 			list_for_each_entry(chain, &table->chains, list) {
 				list_for_each_entry(rule, &chain->rules, list) {
+					if (!nft_rule_is_active(net, rule))
+						goto cont;
 					if (idx < s_idx)
 						goto cont;
 					if (idx > s_idx)
@@ -1408,6 +1448,10 @@ cont:
 		}
 	}
 done:
+	/* Invalidate this dump, a transition to the new generation happened */
+	if (gencursor != net->nft.gencursor || genctr != net->nft.genctr)
+		return -EBUSY;
+
 	cb->args[0] = idx;
 	return skb->len;
 }
@@ -1492,6 +1536,25 @@ static void nf_tables_rule_destroy(struct nft_rule *rule)
 
 static struct nft_expr_info *info;
 
+static struct nft_rule_trans *
+nf_tables_trans_add(struct nft_rule *rule, const struct nft_ctx *ctx)
+{
+	struct nft_rule_trans *rupd;
+
+	rupd = kmalloc(sizeof(struct nft_rule_trans), GFP_KERNEL);
+	if (rupd == NULL)
+	       return NULL;
+
+	rupd->chain = ctx->chain;
+	rupd->table = ctx->table;
+	rupd->rule = rule;
+	rupd->family = ctx->afi->family;
+	rupd->nlh = ctx->nlh;
+	list_add_tail(&rupd->list, &ctx->net->nft.commit_list);
+
+	return rupd;
+}
+
 static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
 			     const struct nlmsghdr *nlh,
 			     const struct nlattr * const nla[])
@@ -1502,6 +1565,7 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
 	struct nft_table *table;
 	struct nft_chain *chain;
 	struct nft_rule *rule, *old_rule = NULL;
+	struct nft_rule_trans *repl = NULL;
 	struct nft_expr *expr;
 	struct nft_ctx ctx;
 	struct nlattr *tmp;
@@ -1576,6 +1640,8 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
 	if (rule == NULL)
 		goto err1;
 
+	nft_rule_activate_next(net, rule);
+
 	rule->handle = handle;
 	rule->dlen   = size;
 
@@ -1589,8 +1655,18 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
 	}
 
 	if (nlh->nlmsg_flags & NLM_F_REPLACE) {
-		list_replace_rcu(&old_rule->list, &rule->list);
-		nf_tables_rule_destroy(old_rule);
+		if (nft_rule_is_active_next(net, old_rule)) {
+			repl = nf_tables_trans_add(old_rule, &ctx);
+			if (repl == NULL) {
+				err = -ENOMEM;
+				goto err2;
+			}
+			nft_rule_disactivate_next(net, old_rule);
+			list_add_tail(&rule->list, &old_rule->list);
+		} else {
+			err = -ENOENT;
+			goto err2;
+		}
 	} else if (nlh->nlmsg_flags & NLM_F_APPEND)
 		if (old_rule)
 			list_add_rcu(&rule->list, &old_rule->list);
@@ -1603,11 +1679,20 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
 			list_add_rcu(&rule->list, &chain->rules);
 	}
 
-	nf_tables_rule_notify(skb, nlh, table, chain, rule, NFT_MSG_NEWRULE,
-			      nlh->nlmsg_flags & (NLM_F_APPEND | NLM_F_REPLACE),
-			      nfmsg->nfgen_family);
+	if (nf_tables_trans_add(rule, &ctx) == NULL) {
+		err = -ENOMEM;
+		goto err3;
+	}
 	return 0;
 
+err3:
+	list_del_rcu(&rule->list);
+	if (repl) {
+		list_del_rcu(&repl->rule->list);
+		list_del(&repl->list);
+		nft_rule_clear(net, repl->rule);
+		kfree(repl);
+	}
 err2:
 	nf_tables_rule_destroy(rule);
 err1:
@@ -1618,6 +1703,19 @@ err1:
 	return err;
 }
 
+static int
+nf_tables_delrule_one(struct nft_ctx *ctx, struct nft_rule *rule)
+{
+	/* You cannot delete the same rule twice */
+	if (nft_rule_is_active_next(ctx->net, rule)) {
+		if (nf_tables_trans_add(rule, ctx) == NULL)
+			return -ENOMEM;
+		nft_rule_disactivate_next(ctx->net, rule);
+		return 0;
+	}
+	return -ENOENT;
+}
+
 static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb,
 			     const struct nlmsghdr *nlh,
 			     const struct nlattr * const nla[])
@@ -1628,7 +1726,8 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb,
 	const struct nft_table *table;
 	struct nft_chain *chain;
 	struct nft_rule *rule, *tmp;
-	int family = nfmsg->nfgen_family;
+	int family = nfmsg->nfgen_family, err = 0;
+	struct nft_ctx ctx;
 
 	afi = nf_tables_afinfo_lookup(net, family, false);
 	if (IS_ERR(afi))
@@ -1642,31 +1741,95 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb,
 	if (IS_ERR(chain))
 		return PTR_ERR(chain);
 
+	nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla);
+
 	if (nla[NFTA_RULE_HANDLE]) {
 		rule = nf_tables_rule_lookup(chain, nla[NFTA_RULE_HANDLE]);
 		if (IS_ERR(rule))
 			return PTR_ERR(rule);
 
-		/* List removal must be visible before destroying expressions */
-		list_del_rcu(&rule->list);
-
-		nf_tables_rule_notify(skb, nlh, table, chain, rule,
-				      NFT_MSG_DELRULE, 0, family);
-		nf_tables_rule_destroy(rule);
+		err = nf_tables_delrule_one(&ctx, rule);
 	} else {
 		/* Remove all rules in this chain */
 		list_for_each_entry_safe(rule, tmp, &chain->rules, list) {
-			list_del_rcu(&rule->list);
+			err = nf_tables_delrule_one(&ctx, rule);
+			if (err < 0)
+				break;
+		}
+	}
+
+	return err;
+}
+
+static int nf_tables_commit(struct sk_buff *skb)
+{
+	struct net *net = sock_net(skb->sk);
+	struct nft_rule_trans *rupd, *tmp;
 
-			nf_tables_rule_notify(skb, nlh, table, chain, rule,
-					      NFT_MSG_DELRULE, 0, family);
-			nf_tables_rule_destroy(rule);
+	/* Bump generation counter, invalidate any dump in progress */
+	net->nft.genctr++;
+
+	/* A new generation has just started */
+	net->nft.gencursor = gencursor_next(net);
+
+	/* Make sure all packets have left the previous generation before
+	 * purging old rules.
+	 */
+	synchronize_rcu();
+
+	list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) {
+		/* Delete this rule from the dirty list */
+		list_del(&rupd->list);
+
+		/* This rule was inactive in the past and just became active.
+		 * Clear the next bit of the genmask since its meaning has
+		 * changed, now it is the future.
+		 */
+		if (nft_rule_is_active(net, rupd->rule)) {
+			nft_rule_clear(net, rupd->rule);
+			nf_tables_rule_notify(skb, rupd->nlh, rupd->table,
+					      rupd->chain, rupd->rule,
+					      NFT_MSG_NEWRULE, 0,
+					      rupd->family);
+			kfree(rupd);
+			continue;
 		}
+
+		/* This rule is in the past, get rid of it */
+		list_del_rcu(&rupd->rule->list);
+		nf_tables_rule_notify(skb, rupd->nlh, rupd->table, rupd->chain,
+				      rupd->rule, NFT_MSG_DELRULE, 0,
+				      rupd->family);
+		nf_tables_rule_destroy(rupd->rule);
+		kfree(rupd);
 	}
 
 	return 0;
 }
 
+static int nf_tables_abort(struct sk_buff *skb)
+{
+	struct net *net = sock_net(skb->sk);
+	struct nft_rule_trans *rupd, *tmp;
+
+	list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) {
+		/* Delete all rules from the dirty list */
+		list_del(&rupd->list);
+
+		if (!nft_rule_is_active_next(net, rupd->rule)) {
+			nft_rule_clear(net, rupd->rule);
+			kfree(rupd);
+			continue;
+		}
+
+		/* This rule is inactive, get rid of it */
+		list_del_rcu(&rupd->rule->list);
+		nf_tables_rule_destroy(rupd->rule);
+		kfree(rupd);
+	}
+	return 0;
+}
+
 /*
  * Sets
  */
@@ -2634,7 +2797,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
 		.policy		= nft_chain_policy,
 	},
 	[NFT_MSG_NEWRULE] = {
-		.call		= nf_tables_newrule,
+		.call_batch	= nf_tables_newrule,
 		.attr_count	= NFTA_RULE_MAX,
 		.policy		= nft_rule_policy,
 	},
@@ -2644,7 +2807,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
 		.policy		= nft_rule_policy,
 	},
 	[NFT_MSG_DELRULE] = {
-		.call		= nf_tables_delrule,
+		.call_batch	= nf_tables_delrule,
 		.attr_count	= NFTA_RULE_MAX,
 		.policy		= nft_rule_policy,
 	},
@@ -2685,6 +2848,8 @@ static const struct nfnetlink_subsystem nf_tables_subsys = {
 	.subsys_id	= NFNL_SUBSYS_NFTABLES,
 	.cb_count	= NFT_MSG_MAX,
 	.cb		= nf_tables_cb,
+	.commit		= nf_tables_commit,
+	.abort		= nf_tables_abort,
 };
 
 /*
@@ -3056,6 +3221,7 @@ EXPORT_SYMBOL_GPL(nft_data_dump);
 static int nf_tables_init_net(struct net *net)
 {
 	INIT_LIST_HEAD(&net->nft.af_info);
+	INIT_LIST_HEAD(&net->nft.commit_list);
 	return 0;
 }
 
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index 3c13007d80df..d581ef660248 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -88,12 +88,22 @@ nft_do_chain_pktinfo(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops)
 	struct nft_data data[NFT_REG_MAX + 1];
 	unsigned int stackptr = 0;
 	struct nft_jumpstack jumpstack[NFT_JUMP_STACK_SIZE];
+	/*
+	 * Cache cursor to avoid problems in case that the cursor is updated
+	 * while traversing the ruleset.
+	 */
+	unsigned int gencursor = ACCESS_ONCE(chain->net->nft.gencursor);
 
 do_chain:
 	rule = list_entry(&chain->rules, struct nft_rule, list);
 next_rule:
 	data[NFT_REG_VERDICT].verdict = NFT_CONTINUE;
 	list_for_each_entry_continue_rcu(rule, &chain->rules, list) {
+
+		/* This rule is not active, skip. */
+		if (unlikely(rule->genmask & (1 << gencursor)))
+			continue;
+
 		nft_rule_for_each_expr(expr, last, rule) {
 			if (expr->ops == &nft_cmp_fast_ops)
 				nft_cmp_fast_eval(expr, data);
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 572d87dc116f..027f16af51a0 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -147,9 +147,6 @@ static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 	const struct nfnetlink_subsystem *ss;
 	int type, err;
 
-	if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
-		return -EPERM;
-
 	/* All the messages must at least contain nfgenmsg */
 	if (nlmsg_len(nlh) < sizeof(struct nfgenmsg))
 		return 0;
@@ -217,9 +214,179 @@ replay:
 	}
 }
 
+static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh,
+				u_int16_t subsys_id)
+{
+	struct sk_buff *nskb, *oskb = skb;
+	struct net *net = sock_net(skb->sk);
+	const struct nfnetlink_subsystem *ss;
+	const struct nfnl_callback *nc;
+	bool success = true, done = false;
+	int err;
+
+	if (subsys_id >= NFNL_SUBSYS_COUNT)
+		return netlink_ack(skb, nlh, -EINVAL);
+replay:
+	nskb = netlink_skb_clone(oskb, GFP_KERNEL);
+	if (!nskb)
+		return netlink_ack(oskb, nlh, -ENOMEM);
+
+	nskb->sk = oskb->sk;
+	skb = nskb;
+
+	nfnl_lock(subsys_id);
+	ss = rcu_dereference_protected(table[subsys_id].subsys,
+				       lockdep_is_held(&table[subsys_id].mutex));
+	if (!ss) {
+#ifdef CONFIG_MODULES
+		nfnl_unlock(subsys_id);
+		request_module("nfnetlink-subsys-%d", subsys_id);
+		nfnl_lock(subsys_id);
+		ss = rcu_dereference_protected(table[subsys_id].subsys,
+					       lockdep_is_held(&table[subsys_id].mutex));
+		if (!ss)
+#endif
+		{
+			nfnl_unlock(subsys_id);
+			kfree_skb(nskb);
+			return netlink_ack(skb, nlh, -EOPNOTSUPP);
+		}
+	}
+
+	if (!ss->commit || !ss->abort) {
+		nfnl_unlock(subsys_id);
+		kfree_skb(nskb);
+		return netlink_ack(skb, nlh, -EOPNOTSUPP);
+	}
+
+	while (skb->len >= nlmsg_total_size(0)) {
+		int msglen, type;
+
+		nlh = nlmsg_hdr(skb);
+		err = 0;
+
+		if (nlh->nlmsg_len < NLMSG_HDRLEN) {
+			err = -EINVAL;
+			goto ack;
+		}
+
+		/* Only requests are handled by the kernel */
+		if (!(nlh->nlmsg_flags & NLM_F_REQUEST)) {
+			err = -EINVAL;
+			goto ack;
+		}
+
+		type = nlh->nlmsg_type;
+		if (type == NFNL_MSG_BATCH_BEGIN) {
+			/* Malformed: Batch begin twice */
+			success = false;
+			goto done;
+		} else if (type == NFNL_MSG_BATCH_END) {
+			done = true;
+			goto done;
+		} else if (type < NLMSG_MIN_TYPE) {
+			err = -EINVAL;
+			goto ack;
+		}
+
+		/* We only accept a batch with messages for the same
+		 * subsystem.
+		 */
+		if (NFNL_SUBSYS_ID(type) != subsys_id) {
+			err = -EINVAL;
+			goto ack;
+		}
+
+		nc = nfnetlink_find_client(type, ss);
+		if (!nc) {
+			err = -EINVAL;
+			goto ack;
+		}
+
+		{
+			int min_len = nlmsg_total_size(sizeof(struct nfgenmsg));
+			u_int8_t cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type);
+			struct nlattr *cda[ss->cb[cb_id].attr_count + 1];
+			struct nlattr *attr = (void *)nlh + min_len;
+			int attrlen = nlh->nlmsg_len - min_len;
+
+			err = nla_parse(cda, ss->cb[cb_id].attr_count,
+					attr, attrlen, ss->cb[cb_id].policy);
+			if (err < 0)
+				goto ack;
+
+			if (nc->call_batch) {
+				err = nc->call_batch(net->nfnl, skb, nlh,
+						     (const struct nlattr **)cda);
+			}
+
+			/* The lock was released to autoload some module, we
+			 * have to abort and start from scratch using the
+			 * original skb.
+			 */
+			if (err == -EAGAIN) {
+				ss->abort(skb);
+				nfnl_unlock(subsys_id);
+				kfree_skb(nskb);
+				goto replay;
+			}
+		}
+ack:
+		if (nlh->nlmsg_flags & NLM_F_ACK || err) {
+			/* We don't stop processing the batch on errors, thus,
+			 * userspace gets all the errors that the batch
+			 * triggers.
+			 */
+			netlink_ack(skb, nlh, err);
+			if (err)
+				success = false;
+		}
+
+		msglen = NLMSG_ALIGN(nlh->nlmsg_len);
+		if (msglen > skb->len)
+			msglen = skb->len;
+		skb_pull(skb, msglen);
+	}
+done:
+	if (success && done)
+		ss->commit(skb);
+	else
+		ss->abort(skb);
+
+	nfnl_unlock(subsys_id);
+	kfree_skb(nskb);
+}
+
 static void nfnetlink_rcv(struct sk_buff *skb)
 {
-	netlink_rcv_skb(skb, &nfnetlink_rcv_msg);
+	struct nlmsghdr *nlh = nlmsg_hdr(skb);
+	struct net *net = sock_net(skb->sk);
+	int msglen;
+
+	if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
+		return netlink_ack(skb, nlh, -EPERM);
+
+	if (nlh->nlmsg_len < NLMSG_HDRLEN ||
+	    skb->len < nlh->nlmsg_len)
+		return;
+
+	if (nlh->nlmsg_type == NFNL_MSG_BATCH_BEGIN) {
+		struct nfgenmsg *nfgenmsg;
+
+		msglen = NLMSG_ALIGN(nlh->nlmsg_len);
+		if (msglen > skb->len)
+			msglen = skb->len;
+
+		if (nlh->nlmsg_len < NLMSG_HDRLEN ||
+		    skb->len < NLMSG_HDRLEN + sizeof(struct nfgenmsg))
+			return;
+
+		nfgenmsg = nlmsg_data(nlh);
+		skb_pull(skb, msglen);
+		nfnetlink_rcv_batch(skb, nlh, nfgenmsg->res_id);
+	} else {
+		netlink_rcv_skb(skb, &nfnetlink_rcv_msg);
+	}
 }
 
 #ifdef CONFIG_MODULES
-- 
cgit v1.2.3


From c00809d330cfe42469fcd1cfd63f0690b47ea9bb Mon Sep 17 00:00:00 2001
From: Jingoo Han <jg1.han@samsung.com>
Date: Mon, 14 Oct 2013 09:18:54 +0900
Subject: USB: ohci-exynos: Remove non-DT support

The non-DT for EXYNOS SoCs is not supported from v3.11.
Thus, there is no need to support non-DT for Exynos OHCI driver.

The 'include/linux/platform_data/usb-ohci-exynos.h' file has been
used for non-DT support. Thus, the 'usb-ohci-exynos.h' file can
be removed.

Signed-off-by: Jingoo Han <jg1.han@samsung.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/ohci-exynos.c                | 18 +++---------------
 include/linux/platform_data/usb-ohci-exynos.h | 21 ---------------------
 2 files changed, 3 insertions(+), 36 deletions(-)
 delete mode 100644 include/linux/platform_data/usb-ohci-exynos.h

(limited to 'include/linux')

diff --git a/drivers/usb/host/ohci-exynos.c b/drivers/usb/host/ohci-exynos.c
index 17f46fbf047d..a87baedc0aa7 100644
--- a/drivers/usb/host/ohci-exynos.c
+++ b/drivers/usb/host/ohci-exynos.c
@@ -18,7 +18,6 @@
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/platform_device.h>
-#include <linux/platform_data/usb-ohci-exynos.h>
 #include <linux/usb/phy.h>
 #include <linux/usb/samsung_usb_phy.h>
 #include <linux/usb.h>
@@ -38,7 +37,6 @@ struct exynos_ohci_hcd {
 	struct clk *clk;
 	struct usb_phy *phy;
 	struct usb_otg *otg;
-	struct exynos4_ohci_platdata *pdata;
 };
 
 static void exynos_ohci_phy_enable(struct platform_device *pdev)
@@ -48,8 +46,6 @@ static void exynos_ohci_phy_enable(struct platform_device *pdev)
 
 	if (exynos_ohci->phy)
 		usb_phy_init(exynos_ohci->phy);
-	else if (exynos_ohci->pdata && exynos_ohci->pdata->phy_init)
-		exynos_ohci->pdata->phy_init(pdev, USB_PHY_TYPE_HOST);
 }
 
 static void exynos_ohci_phy_disable(struct platform_device *pdev)
@@ -59,13 +55,10 @@ static void exynos_ohci_phy_disable(struct platform_device *pdev)
 
 	if (exynos_ohci->phy)
 		usb_phy_shutdown(exynos_ohci->phy);
-	else if (exynos_ohci->pdata && exynos_ohci->pdata->phy_exit)
-		exynos_ohci->pdata->phy_exit(pdev, USB_PHY_TYPE_HOST);
 }
 
 static int exynos_ohci_probe(struct platform_device *pdev)
 {
-	struct exynos4_ohci_platdata *pdata = dev_get_platdata(&pdev->dev);
 	struct exynos_ohci_hcd *exynos_ohci;
 	struct usb_hcd *hcd;
 	struct resource *res;
@@ -98,14 +91,9 @@ static int exynos_ohci_probe(struct platform_device *pdev)
 
 	phy = devm_usb_get_phy(&pdev->dev, USB_PHY_TYPE_USB2);
 	if (IS_ERR(phy)) {
-		/* Fallback to pdata */
-		if (!pdata) {
-			usb_put_hcd(hcd);
-			dev_warn(&pdev->dev, "no platform data or transceiver defined\n");
-			return -EPROBE_DEFER;
-		} else {
-			exynos_ohci->pdata = pdata;
-		}
+		usb_put_hcd(hcd);
+		dev_warn(&pdev->dev, "no platform data or transceiver defined\n");
+		return -EPROBE_DEFER;
 	} else {
 		exynos_ohci->phy = phy;
 		exynos_ohci->otg = phy->otg;
diff --git a/include/linux/platform_data/usb-ohci-exynos.h b/include/linux/platform_data/usb-ohci-exynos.h
deleted file mode 100644
index c256c595be5e..000000000000
--- a/include/linux/platform_data/usb-ohci-exynos.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * Copyright (C) 2011 Samsung Electronics Co.Ltd
- *		http://www.samsung.com/
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- */
-
-#ifndef __MACH_EXYNOS_OHCI_H
-#define __MACH_EXYNOS_OHCI_H
-
-struct exynos4_ohci_platdata {
-	int (*phy_init)(struct platform_device *pdev, int type);
-	int (*phy_exit)(struct platform_device *pdev, int type);
-};
-
-extern void exynos4_ohci_set_platdata(struct exynos4_ohci_platdata *pd);
-
-#endif /* __MACH_EXYNOS_OHCI_H */
-- 
cgit v1.2.3


From 1931ee143b0ab72924944bc06e363d837ba05063 Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Fri, 11 Oct 2013 09:27:28 +0200
Subject: Revert "drivers: of: add initialization code for dma reserved memory"

This reverts commit 9d8eab7af79cb4ce2de5de39f82c455b1f796963. There is
still no consensus on the bindings for the reserved memory and various
drawbacks of the proposed solution has been shown, so the best now is to
revert it completely and start again from scratch later.

Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Grant Likely <grant.likely@linaro.org>
---
 Documentation/devicetree/bindings/memory.txt | 168 --------------------------
 drivers/of/Kconfig                           |   6 -
 drivers/of/Makefile                          |   1 -
 drivers/of/of_reserved_mem.c                 | 173 ---------------------------
 drivers/of/platform.c                        |   4 -
 include/linux/of_reserved_mem.h              |  14 ---
 6 files changed, 366 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/memory.txt
 delete mode 100644 drivers/of/of_reserved_mem.c
 delete mode 100644 include/linux/of_reserved_mem.h

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/memory.txt b/Documentation/devicetree/bindings/memory.txt
deleted file mode 100644
index eb2469365593..000000000000
--- a/Documentation/devicetree/bindings/memory.txt
+++ /dev/null
@@ -1,168 +0,0 @@
-*** Memory binding ***
-
-The /memory node provides basic information about the address and size
-of the physical memory. This node is usually filled or updated by the
-bootloader, depending on the actual memory configuration of the given
-hardware.
-
-The memory layout is described by the following node:
-
-/ {
-	#address-cells = <(n)>;
-	#size-cells = <(m)>;
-	memory {
-		device_type = "memory";
-		reg =  <(baseaddr1) (size1)
-			(baseaddr2) (size2)
-			...
-			(baseaddrN) (sizeN)>;
-	};
-	...
-};
-
-A memory node follows the typical device tree rules for "reg" property:
-n:		number of cells used to store base address value
-m:		number of cells used to store size value
-baseaddrX:	defines a base address of the defined memory bank
-sizeX:		the size of the defined memory bank
-
-
-More than one memory bank can be defined.
-
-
-*** Reserved memory regions ***
-
-In /memory/reserved-memory node one can create child nodes describing
-particular reserved (excluded from normal use) memory regions. Such
-memory regions are usually designed for the special usage by various
-device drivers. A good example are contiguous memory allocations or
-memory sharing with other operating system on the same hardware board.
-Those special memory regions might depend on the board configuration and
-devices used on the target system.
-
-Parameters for each memory region can be encoded into the device tree
-with the following convention:
-
-[(label):] (name) {
-	compatible = "linux,contiguous-memory-region", "reserved-memory-region";
-	reg = <(address) (size)>;
-	(linux,default-contiguous-region);
-};
-
-compatible:	one or more of:
-	- "linux,contiguous-memory-region" - enables binding of this
-	  region to Contiguous Memory Allocator (special region for
-	  contiguous memory allocations, shared with movable system
-	  memory, Linux kernel-specific).
-	- "reserved-memory-region" - compatibility is defined, given
-	  region is assigned for exclusive usage for by the respective
-	  devices.
-
-reg:	standard property defining the base address and size of
-	the memory region
-
-linux,default-contiguous-region: property indicating that the region
-	is the default region for all contiguous memory
-	allocations, Linux specific (optional)
-
-It is optional to specify the base address, so if one wants to use
-autoconfiguration of the base address, '0' can be specified as a base
-address in the 'reg' property.
-
-The /memory/reserved-memory node must contain the same #address-cells
-and #size-cells value as the root node.
-
-
-*** Device node's properties ***
-
-Once regions in the /memory/reserved-memory node have been defined, they
-may be referenced by other device nodes. Bindings that wish to reference
-memory regions should explicitly document their use of the following
-property:
-
-memory-region = <&phandle_to_defined_region>;
-
-This property indicates that the device driver should use the memory
-region pointed by the given phandle.
-
-
-*** Example ***
-
-This example defines a memory consisting of 4 memory banks. 3 contiguous
-regions are defined for Linux kernel, one default of all device drivers
-(named contig_mem, placed at 0x72000000, 64MiB), one dedicated to the
-framebuffer device (labelled display_mem, placed at 0x78000000, 8MiB)
-and one for multimedia processing (labelled multimedia_mem, placed at
-0x77000000, 64MiB). 'display_mem' region is then assigned to fb@12300000
-device for DMA memory allocations (Linux kernel drivers will use CMA is
-available or dma-exclusive usage otherwise). 'multimedia_mem' is
-assigned to scaler@12500000 and codec@12600000 devices for contiguous
-memory allocations when CMA driver is enabled.
-
-The reason for creating a separate region for framebuffer device is to
-match the framebuffer base address to the one configured by bootloader,
-so once Linux kernel drivers starts no glitches on the displayed boot
-logo appears. Scaller and codec drivers should share the memory
-allocations.
-
-/ {
-	#address-cells = <1>;
-	#size-cells = <1>;
-
-	/* ... */
-
-	memory {
-		reg =  <0x40000000 0x10000000
-			0x50000000 0x10000000
-			0x60000000 0x10000000
-			0x70000000 0x10000000>;
-
-		reserved-memory {
-			#address-cells = <1>;
-			#size-cells = <1>;
-
-			/*
-			 * global autoconfigured region for contiguous allocations
-			 * (used only with Contiguous Memory Allocator)
-			 */
-			contig_region@0 {
-				compatible = "linux,contiguous-memory-region";
-				reg = <0x0 0x4000000>;
-				linux,default-contiguous-region;
-			};
-
-			/*
-			 * special region for framebuffer
-			 */
-			display_region: region@78000000 {
-				compatible = "linux,contiguous-memory-region", "reserved-memory-region";
-				reg = <0x78000000 0x800000>;
-			};
-
-			/*
-			 * special region for multimedia processing devices
-			 */
-			multimedia_region: region@77000000 {
-				compatible = "linux,contiguous-memory-region";
-				reg = <0x77000000 0x4000000>;
-			};
-		};
-	};
-
-	/* ... */
-
-	fb0: fb@12300000 {
-		status = "okay";
-		memory-region = <&display_region>;
-	};
-
-	scaler: scaler@12500000 {
-		status = "okay";
-		memory-region = <&multimedia_region>;
-	};
-
-	codec: codec@12600000 {
-		status = "okay";
-		memory-region = <&multimedia_region>;
-	};
-};
diff --git a/drivers/of/Kconfig b/drivers/of/Kconfig
index 9d2009a9004d..78cc76053328 100644
--- a/drivers/of/Kconfig
+++ b/drivers/of/Kconfig
@@ -74,10 +74,4 @@ config OF_MTD
 	depends on MTD
 	def_bool y
 
-config OF_RESERVED_MEM
-	depends on OF_FLATTREE && (DMA_CMA || (HAVE_GENERIC_DMA_COHERENT && HAVE_MEMBLOCK))
-	def_bool y
-	help
-	  Initialization code for DMA reserved memory
-
 endmenu # OF
diff --git a/drivers/of/Makefile b/drivers/of/Makefile
index ed9660adad77..efd05102c405 100644
--- a/drivers/of/Makefile
+++ b/drivers/of/Makefile
@@ -9,4 +9,3 @@ obj-$(CONFIG_OF_MDIO)	+= of_mdio.o
 obj-$(CONFIG_OF_PCI)	+= of_pci.o
 obj-$(CONFIG_OF_PCI_IRQ)  += of_pci_irq.o
 obj-$(CONFIG_OF_MTD)	+= of_mtd.o
-obj-$(CONFIG_OF_RESERVED_MEM) += of_reserved_mem.o
diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c
deleted file mode 100644
index 0fe40c7d6904..000000000000
--- a/drivers/of/of_reserved_mem.c
+++ /dev/null
@@ -1,173 +0,0 @@
-/*
- * Device tree based initialization code for reserved memory.
- *
- * Copyright (c) 2013 Samsung Electronics Co., Ltd.
- *		http://www.samsung.com
- * Author: Marek Szyprowski <m.szyprowski@samsung.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of the
- * License or (at your optional) any later version of the license.
- */
-
-#include <linux/memblock.h>
-#include <linux/err.h>
-#include <linux/of.h>
-#include <linux/of_fdt.h>
-#include <linux/of_platform.h>
-#include <linux/mm.h>
-#include <linux/sizes.h>
-#include <linux/mm_types.h>
-#include <linux/dma-contiguous.h>
-#include <linux/dma-mapping.h>
-#include <linux/of_reserved_mem.h>
-
-#define MAX_RESERVED_REGIONS	16
-struct reserved_mem {
-	phys_addr_t		base;
-	unsigned long		size;
-	struct cma		*cma;
-	char			name[32];
-};
-static struct reserved_mem reserved_mem[MAX_RESERVED_REGIONS];
-static int reserved_mem_count;
-
-static int __init fdt_scan_reserved_mem(unsigned long node, const char *uname,
-					int depth, void *data)
-{
-	struct reserved_mem *rmem = &reserved_mem[reserved_mem_count];
-	phys_addr_t base, size;
-	int is_cma, is_reserved;
-	unsigned long len;
-	const char *status;
-	__be32 *prop;
-
-	is_cma = IS_ENABLED(CONFIG_DMA_CMA) &&
-	       of_flat_dt_is_compatible(node, "linux,contiguous-memory-region");
-	is_reserved = of_flat_dt_is_compatible(node, "reserved-memory-region");
-
-	if (!is_reserved && !is_cma) {
-		/* ignore node and scan next one */
-		return 0;
-	}
-
-	status = of_get_flat_dt_prop(node, "status", &len);
-	if (status && strcmp(status, "okay") != 0) {
-		/* ignore disabled node nad scan next one */
-		return 0;
-	}
-
-	prop = of_get_flat_dt_prop(node, "reg", &len);
-	if (!prop || (len < (dt_root_size_cells + dt_root_addr_cells) *
-			     sizeof(__be32))) {
-		pr_err("Reserved mem: node %s, incorrect \"reg\" property\n",
-		       uname);
-		/* ignore node and scan next one */
-		return 0;
-	}
-	base = dt_mem_next_cell(dt_root_addr_cells, &prop);
-	size = dt_mem_next_cell(dt_root_size_cells, &prop);
-
-	if (!size) {
-		/* ignore node and scan next one */
-		return 0;
-	}
-
-	pr_info("Reserved mem: found %s, memory base %lx, size %ld MiB\n",
-		uname, (unsigned long)base, (unsigned long)size / SZ_1M);
-
-	if (reserved_mem_count == ARRAY_SIZE(reserved_mem))
-		return -ENOSPC;
-
-	rmem->base = base;
-	rmem->size = size;
-	strlcpy(rmem->name, uname, sizeof(rmem->name));
-
-	if (is_cma) {
-		struct cma *cma;
-		if (dma_contiguous_reserve_area(size, base, 0, &cma) == 0) {
-			rmem->cma = cma;
-			reserved_mem_count++;
-			if (of_get_flat_dt_prop(node,
-						"linux,default-contiguous-region",
-						NULL))
-				dma_contiguous_set_default(cma);
-		}
-	} else if (is_reserved) {
-		if (memblock_remove(base, size) == 0)
-			reserved_mem_count++;
-		else
-			pr_err("Failed to reserve memory for %s\n", uname);
-	}
-
-	return 0;
-}
-
-static struct reserved_mem *get_dma_memory_region(struct device *dev)
-{
-	struct device_node *node;
-	const char *name;
-	int i;
-
-	node = of_parse_phandle(dev->of_node, "memory-region", 0);
-	if (!node)
-		return NULL;
-
-	name = kbasename(node->full_name);
-	for (i = 0; i < reserved_mem_count; i++)
-		if (strcmp(name, reserved_mem[i].name) == 0)
-			return &reserved_mem[i];
-	return NULL;
-}
-
-/**
- * of_reserved_mem_device_init() - assign reserved memory region to given device
- *
- * This function assign memory region pointed by "memory-region" device tree
- * property to the given device.
- */
-void of_reserved_mem_device_init(struct device *dev)
-{
-	struct reserved_mem *region = get_dma_memory_region(dev);
-	if (!region)
-		return;
-
-	if (region->cma) {
-		dev_set_cma_area(dev, region->cma);
-		pr_info("Assigned CMA %s to %s device\n", region->name,
-			dev_name(dev));
-	} else {
-		if (dma_declare_coherent_memory(dev, region->base, region->base,
-		    region->size, DMA_MEMORY_MAP | DMA_MEMORY_EXCLUSIVE) != 0)
-			pr_info("Declared reserved memory %s to %s device\n",
-				region->name, dev_name(dev));
-	}
-}
-
-/**
- * of_reserved_mem_device_release() - release reserved memory device structures
- *
- * This function releases structures allocated for memory region handling for
- * the given device.
- */
-void of_reserved_mem_device_release(struct device *dev)
-{
-	struct reserved_mem *region = get_dma_memory_region(dev);
-	if (!region && !region->cma)
-		dma_release_declared_memory(dev);
-}
-
-/**
- * early_init_dt_scan_reserved_mem() - create reserved memory regions
- *
- * This function grabs memory from early allocator for device exclusive use
- * defined in device tree structures. It should be called by arch specific code
- * once the early allocator (memblock) has been activated and all other
- * subsystems have already allocated/reserved memory.
- */
-void __init early_init_dt_scan_reserved_mem(void)
-{
-	of_scan_flat_dt_by_path("/memory/reserved-memory",
-				fdt_scan_reserved_mem, NULL);
-}
diff --git a/drivers/of/platform.c b/drivers/of/platform.c
index 9b439ac63d8e..f6dcde220821 100644
--- a/drivers/of/platform.c
+++ b/drivers/of/platform.c
@@ -21,7 +21,6 @@
 #include <linux/of_device.h>
 #include <linux/of_irq.h>
 #include <linux/of_platform.h>
-#include <linux/of_reserved_mem.h>
 #include <linux/platform_device.h>
 
 const struct of_device_id of_default_bus_match_table[] = {
@@ -219,8 +218,6 @@ static struct platform_device *of_platform_device_create_pdata(
 	dev->dev.bus = &platform_bus_type;
 	dev->dev.platform_data = platform_data;
 
-	of_reserved_mem_device_init(&dev->dev);
-
 	/* We do not fill the DMA ops for platform devices by default.
 	 * This is currently the responsibility of the platform code
 	 * to do such, possibly using a device notifier
@@ -228,7 +225,6 @@ static struct platform_device *of_platform_device_create_pdata(
 
 	if (of_device_add(dev) != 0) {
 		platform_device_put(dev);
-		of_reserved_mem_device_release(&dev->dev);
 		return NULL;
 	}
 
diff --git a/include/linux/of_reserved_mem.h b/include/linux/of_reserved_mem.h
deleted file mode 100644
index c84128255814..000000000000
--- a/include/linux/of_reserved_mem.h
+++ /dev/null
@@ -1,14 +0,0 @@
-#ifndef __OF_RESERVED_MEM_H
-#define __OF_RESERVED_MEM_H
-
-#ifdef CONFIG_OF_RESERVED_MEM
-void of_reserved_mem_device_init(struct device *dev);
-void of_reserved_mem_device_release(struct device *dev);
-void early_init_dt_scan_reserved_mem(void);
-#else
-static inline void of_reserved_mem_device_init(struct device *dev) { }
-static inline void of_reserved_mem_device_release(struct device *dev) { }
-static inline void early_init_dt_scan_reserved_mem(void) { }
-#endif
-
-#endif /* __OF_RESERVED_MEM_H */
-- 
cgit v1.2.3


From f2e106692d5189303997ad7b96de8d8123aa5613 Mon Sep 17 00:00:00 2001
From: chai wen <chaiw.fnst@cn.fujitsu.com>
Date: Mon, 14 Oct 2013 22:22:33 +0800
Subject: KVM: Drop FOLL_GET in GUP when doing async page fault

Page pinning is not mandatory in kvm async page fault processing since
after async page fault event is delivered to a guest it accesses page once
again and does its own GUP.  Drop the FOLL_GET flag in GUP in async_pf
code, and do some simplifying in check/clear processing.

Suggested-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Gu zheng <guz.fnst@cn.fujitsu.com>
Signed-off-by: chai wen <chaiw.fnst@cn.fujitsu.com>
Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 arch/x86/kvm/x86.c         |  4 ++--
 include/linux/kvm_host.h   |  2 +-
 include/trace/events/kvm.h | 10 ++++------
 virt/kvm/async_pf.c        | 17 +++++------------
 4 files changed, 12 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c951c71dc80b..edf2a07df3a3 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7298,7 +7298,7 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
 	int r;
 
 	if ((vcpu->arch.mmu.direct_map != work->arch.direct_map) ||
-	      is_error_page(work->page))
+	      work->wakeup_all)
 		return;
 
 	r = kvm_mmu_reload(vcpu);
@@ -7408,7 +7408,7 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
 	struct x86_exception fault;
 
 	trace_kvm_async_pf_ready(work->arch.token, work->gva);
-	if (is_error_page(work->page))
+	if (work->wakeup_all)
 		work->arch.token = ~0; /* broadcast wakeup */
 	else
 		kvm_del_async_pf_gfn(vcpu, work->arch.gfn);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index f6dccde755f6..c9d4236ab442 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -189,7 +189,7 @@ struct kvm_async_pf {
 	gva_t gva;
 	unsigned long addr;
 	struct kvm_arch_async_pf arch;
-	struct page *page;
+	bool   wakeup_all;
 };
 
 void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu);
diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h
index 7005d1109ec9..131a0bda7aec 100644
--- a/include/trace/events/kvm.h
+++ b/include/trace/events/kvm.h
@@ -296,23 +296,21 @@ DEFINE_EVENT(kvm_async_pf_nopresent_ready, kvm_async_pf_ready,
 
 TRACE_EVENT(
 	kvm_async_pf_completed,
-	TP_PROTO(unsigned long address, struct page *page, u64 gva),
-	TP_ARGS(address, page, gva),
+	TP_PROTO(unsigned long address, u64 gva),
+	TP_ARGS(address, gva),
 
 	TP_STRUCT__entry(
 		__field(unsigned long, address)
-		__field(pfn_t, pfn)
 		__field(u64, gva)
 		),
 
 	TP_fast_assign(
 		__entry->address = address;
-		__entry->pfn = page ? page_to_pfn(page) : 0;
 		__entry->gva = gva;
 		),
 
-	TP_printk("gva %#llx address %#lx pfn %#llx",  __entry->gva,
-		  __entry->address, __entry->pfn)
+	TP_printk("gva %#llx address %#lx",  __entry->gva,
+		  __entry->address)
 );
 
 #endif
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index b197950ac4d5..8631d9c14320 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -56,7 +56,6 @@ void kvm_async_pf_vcpu_init(struct kvm_vcpu *vcpu)
 
 static void async_pf_execute(struct work_struct *work)
 {
-	struct page *page = NULL;
 	struct kvm_async_pf *apf =
 		container_of(work, struct kvm_async_pf, work);
 	struct mm_struct *mm = apf->mm;
@@ -68,13 +67,12 @@ static void async_pf_execute(struct work_struct *work)
 
 	use_mm(mm);
 	down_read(&mm->mmap_sem);
-	get_user_pages(current, mm, addr, 1, 1, 0, &page, NULL);
+	get_user_pages(current, mm, addr, 1, 1, 0, NULL, NULL);
 	up_read(&mm->mmap_sem);
 	unuse_mm(mm);
 
 	spin_lock(&vcpu->async_pf.lock);
 	list_add_tail(&apf->link, &vcpu->async_pf.done);
-	apf->page = page;
 	spin_unlock(&vcpu->async_pf.lock);
 
 	/*
@@ -82,7 +80,7 @@ static void async_pf_execute(struct work_struct *work)
 	 * this point
 	 */
 
-	trace_kvm_async_pf_completed(addr, page, gva);
+	trace_kvm_async_pf_completed(addr, gva);
 
 	if (waitqueue_active(&vcpu->wq))
 		wake_up_interruptible(&vcpu->wq);
@@ -112,8 +110,6 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
 			list_entry(vcpu->async_pf.done.next,
 				   typeof(*work), link);
 		list_del(&work->link);
-		if (!is_error_page(work->page))
-			kvm_release_page_clean(work->page);
 		kmem_cache_free(async_pf_cache, work);
 	}
 	spin_unlock(&vcpu->async_pf.lock);
@@ -133,14 +129,11 @@ void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu)
 		list_del(&work->link);
 		spin_unlock(&vcpu->async_pf.lock);
 
-		if (work->page)
-			kvm_arch_async_page_ready(vcpu, work);
+		kvm_arch_async_page_ready(vcpu, work);
 		kvm_arch_async_page_present(vcpu, work);
 
 		list_del(&work->queue);
 		vcpu->async_pf.queued--;
-		if (!is_error_page(work->page))
-			kvm_release_page_clean(work->page);
 		kmem_cache_free(async_pf_cache, work);
 	}
 }
@@ -163,7 +156,7 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
 	if (!work)
 		return 0;
 
-	work->page = NULL;
+	work->wakeup_all = false;
 	work->vcpu = vcpu;
 	work->gva = gva;
 	work->addr = gfn_to_hva(vcpu->kvm, gfn);
@@ -203,7 +196,7 @@ int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu)
 	if (!work)
 		return -ENOMEM;
 
-	work->page = KVM_ERR_PTR_BAD_PAGE;
+	work->wakeup_all = true;
 	INIT_LIST_HEAD(&work->queue); /* for list_del to work */
 
 	spin_lock(&vcpu->async_pf.lock);
-- 
cgit v1.2.3


From e66cf161098a634dc96e32d0089c5767cf25668a Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Tue, 15 Oct 2013 15:18:08 +0100
Subject: GFS2: Use lockref for glocks

Currently glocks have an atomic reference count and also a spinlock
which covers various internal fields, such as the state. This intent of
this patch is to replace the spinlock and the atomic reference count
with a lockref structure. This contains a spinlock which we can continue
to use as before, and a reference counter which is used in conjuction
with the spinlock to replace the previous atomic counter.

As a result of this there are some new rules for reference counting on
glocks. We need to distinguish between reference count changes under
gl_spin (which are now just increment or decrement of the new counter,
provided the count cannot hit zero) and those which are outside of
gl_spin, but which now take gl_spin internally.

The conversion is relatively straight forward. There is probably some
further clean up which can be done, but the priority at this stage is to
make the change in as simple a manner as possible.

A consequence of this change is that the reference count is being
decoupled from the lru list processing. This should allow future
adoption of the lru_list code with glocks in due course.

The reason for using the "dead" state and not just relying on 0 being
the "invalid state" is so that in due course 0 ref counts can be
allowable. The intent is to eventually be able to remove the ref count
changes which are currently hidden away in state_change().

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/glock.c         | 83 ++++++++++++++++++++++++-------------------------
 fs/gfs2/glock.h         |  2 --
 fs/gfs2/glops.c         |  4 +--
 fs/gfs2/incore.h        |  5 +--
 include/linux/lockref.h |  6 ++++
 lib/lockref.c           |  1 +
 6 files changed, 52 insertions(+), 49 deletions(-)

(limited to 'include/linux')

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index c2f41b4d00b9..e66a8009aff1 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -31,6 +31,7 @@
 #include <linux/bit_spinlock.h>
 #include <linux/percpu.h>
 #include <linux/list_sort.h>
+#include <linux/lockref.h>
 
 #include "gfs2.h"
 #include "incore.h"
@@ -129,10 +130,10 @@ void gfs2_glock_free(struct gfs2_glock *gl)
  *
  */
 
-void gfs2_glock_hold(struct gfs2_glock *gl)
+static void gfs2_glock_hold(struct gfs2_glock *gl)
 {
-	GLOCK_BUG_ON(gl, atomic_read(&gl->gl_ref) == 0);
-	atomic_inc(&gl->gl_ref);
+	GLOCK_BUG_ON(gl, __lockref_is_dead(&gl->gl_lockref));
+	lockref_get(&gl->gl_lockref);
 }
 
 /**
@@ -186,20 +187,6 @@ static void gfs2_glock_remove_from_lru(struct gfs2_glock *gl)
 	spin_unlock(&lru_lock);
 }
 
-/**
- * gfs2_glock_put_nolock() - Decrement reference count on glock
- * @gl: The glock to put
- *
- * This function should only be used if the caller has its own reference
- * to the glock, in addition to the one it is dropping.
- */
-
-void gfs2_glock_put_nolock(struct gfs2_glock *gl)
-{
-	if (atomic_dec_and_test(&gl->gl_ref))
-		GLOCK_BUG_ON(gl, 1);
-}
-
 /**
  * gfs2_glock_put() - Decrement reference count on glock
  * @gl: The glock to put
@@ -211,17 +198,22 @@ void gfs2_glock_put(struct gfs2_glock *gl)
 	struct gfs2_sbd *sdp = gl->gl_sbd;
 	struct address_space *mapping = gfs2_glock2aspace(gl);
 
-	if (atomic_dec_and_lock(&gl->gl_ref, &lru_lock)) {
-		__gfs2_glock_remove_from_lru(gl);
-		spin_unlock(&lru_lock);
-		spin_lock_bucket(gl->gl_hash);
-		hlist_bl_del_rcu(&gl->gl_list);
-		spin_unlock_bucket(gl->gl_hash);
-		GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders));
-		GLOCK_BUG_ON(gl, mapping && mapping->nrpages);
-		trace_gfs2_glock_put(gl);
-		sdp->sd_lockstruct.ls_ops->lm_put_lock(gl);
-	}
+	if (lockref_put_or_lock(&gl->gl_lockref))
+		return;
+
+	lockref_mark_dead(&gl->gl_lockref);
+
+	spin_lock(&lru_lock);
+	__gfs2_glock_remove_from_lru(gl);
+	spin_unlock(&lru_lock);
+	spin_unlock(&gl->gl_lockref.lock);
+	spin_lock_bucket(gl->gl_hash);
+	hlist_bl_del_rcu(&gl->gl_list);
+	spin_unlock_bucket(gl->gl_hash);
+	GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders));
+	GLOCK_BUG_ON(gl, mapping && mapping->nrpages);
+	trace_gfs2_glock_put(gl);
+	sdp->sd_lockstruct.ls_ops->lm_put_lock(gl);
 }
 
 /**
@@ -244,7 +236,7 @@ static struct gfs2_glock *search_bucket(unsigned int hash,
 			continue;
 		if (gl->gl_sbd != sdp)
 			continue;
-		if (atomic_inc_not_zero(&gl->gl_ref))
+		if (lockref_get_not_dead(&gl->gl_lockref))
 			return gl;
 	}
 
@@ -396,10 +388,11 @@ static void state_change(struct gfs2_glock *gl, unsigned int new_state)
 	held2 = (new_state != LM_ST_UNLOCKED);
 
 	if (held1 != held2) {
+		GLOCK_BUG_ON(gl, __lockref_is_dead(&gl->gl_lockref));
 		if (held2)
-			gfs2_glock_hold(gl);
+			gl->gl_lockref.count++;
 		else
-			gfs2_glock_put_nolock(gl);
+			gl->gl_lockref.count--;
 	}
 	if (held1 && held2 && list_empty(&gl->gl_holders))
 		clear_bit(GLF_QUEUED, &gl->gl_flags);
@@ -626,9 +619,9 @@ out:
 out_sched:
 	clear_bit(GLF_LOCK, &gl->gl_flags);
 	smp_mb__after_clear_bit();
-	gfs2_glock_hold(gl);
+	gl->gl_lockref.count++;
 	if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
-		gfs2_glock_put_nolock(gl);
+		gl->gl_lockref.count--;
 	return;
 
 out_unlock:
@@ -754,7 +747,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
 	gl->gl_sbd = sdp;
 	gl->gl_flags = 0;
 	gl->gl_name = name;
-	atomic_set(&gl->gl_ref, 1);
+	gl->gl_lockref.count = 1;
 	gl->gl_state = LM_ST_UNLOCKED;
 	gl->gl_target = LM_ST_UNLOCKED;
 	gl->gl_demote_state = LM_ST_EXCLUSIVE;
@@ -1356,10 +1349,10 @@ void gfs2_glock_complete(struct gfs2_glock *gl, int ret)
 		}
 	}
 
-	spin_unlock(&gl->gl_spin);
+	gl->gl_lockref.count++;
 	set_bit(GLF_REPLY_PENDING, &gl->gl_flags);
-	smp_wmb();
-	gfs2_glock_hold(gl);
+	spin_unlock(&gl->gl_spin);
+
 	if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
 		gfs2_glock_put(gl);
 }
@@ -1404,15 +1397,19 @@ __acquires(&lru_lock)
 	while(!list_empty(list)) {
 		gl = list_entry(list->next, struct gfs2_glock, gl_lru);
 		list_del_init(&gl->gl_lru);
+		if (!spin_trylock(&gl->gl_spin)) {
+			list_add(&gl->gl_lru, &lru_list);
+			atomic_inc(&lru_count);
+			continue;
+		}
 		clear_bit(GLF_LRU, &gl->gl_flags);
-		gfs2_glock_hold(gl);
 		spin_unlock(&lru_lock);
-		spin_lock(&gl->gl_spin);
+		gl->gl_lockref.count++;
 		if (demote_ok(gl))
 			handle_callback(gl, LM_ST_UNLOCKED, 0, false);
 		WARN_ON(!test_and_clear_bit(GLF_LOCK, &gl->gl_flags));
 		if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
-			gfs2_glock_put_nolock(gl);
+			gl->gl_lockref.count--;
 		spin_unlock(&gl->gl_spin);
 		spin_lock(&lru_lock);
 	}
@@ -1493,7 +1490,7 @@ static void examine_bucket(glock_examiner examiner, const struct gfs2_sbd *sdp,
 
 	rcu_read_lock();
 	hlist_bl_for_each_entry_rcu(gl, pos, head, gl_list) {
-		if ((gl->gl_sbd == sdp) && atomic_inc_not_zero(&gl->gl_ref))
+		if ((gl->gl_sbd == sdp) && lockref_get_not_dead(&gl->gl_lockref))
 			examiner(gl);
 	}
 	rcu_read_unlock();
@@ -1746,7 +1743,7 @@ int gfs2_dump_glock(struct seq_file *seq, const struct gfs2_glock *gl)
 		  state2str(gl->gl_demote_state), dtime,
 		  atomic_read(&gl->gl_ail_count),
 		  atomic_read(&gl->gl_revokes),
-		  atomic_read(&gl->gl_ref), gl->gl_hold_time);
+		  (int)gl->gl_lockref.count, gl->gl_hold_time);
 
 	list_for_each_entry(gh, &gl->gl_holders, gh_list) {
 		error = dump_holder(seq, gh);
@@ -1902,7 +1899,7 @@ static int gfs2_glock_iter_next(struct gfs2_glock_iter *gi)
 			gi->nhash = 0;
 		}
 	/* Skip entries for other sb and dead entries */
-	} while (gi->sdp != gi->gl->gl_sbd || atomic_read(&gi->gl->gl_ref) == 0);
+	} while (gi->sdp != gi->gl->gl_sbd || __lockref_is_dead(&gl->gl_lockref));
 
 	return 0;
 }
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index 69f66e3d22bf..6647d77366ba 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -181,8 +181,6 @@ static inline struct address_space *gfs2_glock2aspace(struct gfs2_glock *gl)
 extern int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
 			  const struct gfs2_glock_operations *glops,
 			  int create, struct gfs2_glock **glp);
-extern void gfs2_glock_hold(struct gfs2_glock *gl);
-extern void gfs2_glock_put_nolock(struct gfs2_glock *gl);
 extern void gfs2_glock_put(struct gfs2_glock *gl);
 extern void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state,
 			     unsigned flags, struct gfs2_holder *gh);
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index e2e0a90396e7..db908f697139 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -525,9 +525,9 @@ static void iopen_go_callback(struct gfs2_glock *gl, bool remote)
 
 	if (gl->gl_demote_state == LM_ST_UNLOCKED &&
 	    gl->gl_state == LM_ST_SHARED && ip) {
-		gfs2_glock_hold(gl);
+		gl->gl_lockref.count++;
 		if (queue_work(gfs2_delete_workqueue, &gl->gl_delete) == 0)
-			gfs2_glock_put_nolock(gl);
+			gl->gl_lockref.count--;
 	}
 }
 
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 2ab4f8d8f4c4..bb88e417231f 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -21,6 +21,7 @@
 #include <linux/rbtree.h>
 #include <linux/ktime.h>
 #include <linux/percpu.h>
+#include <linux/lockref.h>
 
 #define DIO_WAIT	0x00000010
 #define DIO_METADATA	0x00000020
@@ -321,9 +322,9 @@ struct gfs2_glock {
 	struct gfs2_sbd *gl_sbd;
 	unsigned long gl_flags;		/* GLF_... */
 	struct lm_lockname gl_name;
-	atomic_t gl_ref;
 
-	spinlock_t gl_spin;
+	struct lockref gl_lockref;
+#define gl_spin gl_lockref.lock
 
 	/* State fields protected by gl_spin */
 	unsigned int gl_state:2,	/* Current state */
diff --git a/include/linux/lockref.h b/include/linux/lockref.h
index f279ed9a9163..13dfd36a3294 100644
--- a/include/linux/lockref.h
+++ b/include/linux/lockref.h
@@ -36,4 +36,10 @@ extern int lockref_put_or_lock(struct lockref *);
 extern void lockref_mark_dead(struct lockref *);
 extern int lockref_get_not_dead(struct lockref *);
 
+/* Must be called under spinlock for reliable results */
+static inline int __lockref_is_dead(const struct lockref *l)
+{
+	return ((int)l->count < 0);
+}
+
 #endif /* __LINUX_LOCKREF_H */
diff --git a/lib/lockref.c b/lib/lockref.c
index e2cd2c0a8821..8ff162fe3413 100644
--- a/lib/lockref.c
+++ b/lib/lockref.c
@@ -136,6 +136,7 @@ void lockref_mark_dead(struct lockref *lockref)
 	assert_spin_locked(&lockref->lock);
 	lockref->count = -128;
 }
+EXPORT_SYMBOL(lockref_mark_dead);
 
 /**
  * lockref_get_not_dead - Increments count unless the ref is dead
-- 
cgit v1.2.3


From d1cb9d1af0bc11b7450a6032f43935c746609418 Mon Sep 17 00:00:00 2001
From: David Miller <davem@davemloft.net>
Date: Thu, 3 Oct 2013 17:24:51 -0400
Subject: of: Make cpu node handling more portable.

Use for_each_node_by_type() to iterate all cpu nodes in the
system.

Provide and overridable function arch_find_n_match_cpu_physical_id,
which sees if the given device node matches 'cpu' and if so sets
'*thread' when non-NULL to the cpu thread number within the core.

The default implementation behaves the same as the existing code.

Add a sparc64 implementation.

Signed-off-by: David S. Miller <davem@davemloft.net>
Tested-by: Sudeep KarkadaNagesha <Sudeep.KarkadaNagesha@arm.com>
Signed-off-by: Grant Likely <grant.likely@linaro.org>
---
 arch/sparc/kernel/prom_64.c | 53 +++++++++++++++++++++++++++++++++++++++++++++
 drivers/of/base.c           | 45 +++++++++++++++++++++++---------------
 include/linux/cpu.h         |  3 +++
 3 files changed, 84 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/arch/sparc/kernel/prom_64.c b/arch/sparc/kernel/prom_64.c
index d397d7fc5c28..6b39125eb927 100644
--- a/arch/sparc/kernel/prom_64.c
+++ b/arch/sparc/kernel/prom_64.c
@@ -373,6 +373,59 @@ static const char *get_mid_prop(void)
 	return (tlb_type == spitfire ? "upa-portid" : "portid");
 }
 
+bool arch_find_n_match_cpu_physical_id(struct device_node *cpun,
+				       int cpu, unsigned int *thread)
+{
+	const char *mid_prop = get_mid_prop();
+	int this_cpu_id;
+
+	/* On hypervisor based platforms we interrogate the 'reg'
+	 * property.  On everything else we look for a 'upa-portis',
+	 * 'portid', or 'cpuid' property.
+	 */
+
+	if (tlb_type == hypervisor) {
+		struct property *prop = of_find_property(cpun, "reg", NULL);
+		u32 *regs;
+
+		if (!prop) {
+			pr_warn("CPU node missing reg property\n");
+			return false;
+		}
+		regs = prop->value;
+		this_cpu_id = regs[0] & 0x0fffffff;
+	} else {
+		this_cpu_id = of_getintprop_default(cpun, mid_prop, -1);
+
+		if (this_cpu_id < 0) {
+			mid_prop = "cpuid";
+			this_cpu_id = of_getintprop_default(cpun, mid_prop, -1);
+		}
+		if (this_cpu_id < 0) {
+			pr_warn("CPU node missing cpu ID property\n");
+			return false;
+		}
+	}
+	if (this_cpu_id == cpu) {
+		if (thread) {
+			int proc_id = cpu_data(cpu).proc_id;
+
+			/* On sparc64, the cpu thread information is obtained
+			 * either from OBP or the machine description.  We've
+			 * actually probed this information already long before
+			 * this interface gets called so instead of interrogating
+			 * both the OF node and the MDESC again, just use what
+			 * we discovered already.
+			 */
+			if (proc_id < 0)
+				proc_id = 0;
+			*thread = proc_id;
+		}
+		return true;
+	}
+	return false;
+}
+
 static void *of_iterate_over_cpus(void *(*func)(struct device_node *, int, int), int arg)
 {
 	struct device_node *dp;
diff --git a/drivers/of/base.c b/drivers/of/base.c
index 7d4c70f859e3..e4c99453adf1 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -280,6 +280,31 @@ static bool __of_find_n_match_cpu_property(struct device_node *cpun,
 	return false;
 }
 
+/*
+ * arch_find_n_match_cpu_physical_id - See if the given device node is
+ * for the cpu corresponding to logical cpu 'cpu'.  Return true if so,
+ * else false.  If 'thread' is non-NULL, the local thread number within the
+ * core is returned in it.
+ */
+bool __weak arch_find_n_match_cpu_physical_id(struct device_node *cpun,
+					      int cpu, unsigned int *thread)
+{
+	/* Check for non-standard "ibm,ppc-interrupt-server#s" property
+	 * for thread ids on PowerPC. If it doesn't exist fallback to
+	 * standard "reg" property.
+	 */
+	if (IS_ENABLED(CONFIG_PPC) &&
+	    __of_find_n_match_cpu_property(cpun,
+					   "ibm,ppc-interrupt-server#s",
+					   cpu, thread))
+		return true;
+
+	if (__of_find_n_match_cpu_property(cpun, "reg", cpu, thread))
+		return true;
+
+	return false;
+}
+
 /**
  * of_get_cpu_node - Get device node associated with the given logical CPU
  *
@@ -300,24 +325,10 @@ static bool __of_find_n_match_cpu_property(struct device_node *cpun,
  */
 struct device_node *of_get_cpu_node(int cpu, unsigned int *thread)
 {
-	struct device_node *cpun, *cpus;
-
-	cpus = of_find_node_by_path("/cpus");
-	if (!cpus)
-		return NULL;
+	struct device_node *cpun;
 
-	for_each_child_of_node(cpus, cpun) {
-		if (of_node_cmp(cpun->type, "cpu"))
-			continue;
-		/* Check for non-standard "ibm,ppc-interrupt-server#s" property
-		 * for thread ids on PowerPC. If it doesn't exist fallback to
-		 * standard "reg" property.
-		 */
-		if (IS_ENABLED(CONFIG_PPC) &&
-			__of_find_n_match_cpu_property(cpun,
-				"ibm,ppc-interrupt-server#s", cpu, thread))
-			return cpun;
-		if (__of_find_n_match_cpu_property(cpun, "reg", cpu, thread))
+	for_each_node_by_type(cpun, "cpu") {
+		if (arch_find_n_match_cpu_physical_id(cpun, cpu, thread))
 			return cpun;
 	}
 	return NULL;
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 801ff9e73679..fbd25c3c2923 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -18,6 +18,7 @@
 #include <linux/cpumask.h>
 
 struct device;
+struct device_node;
 
 struct cpu {
 	int node_id;		/* The node which contains the CPU */
@@ -29,6 +30,8 @@ extern int register_cpu(struct cpu *cpu, int num);
 extern struct device *get_cpu_device(unsigned cpu);
 extern bool cpu_is_hotpluggable(unsigned cpu);
 extern bool arch_match_cpu_phys_id(int cpu, u64 phys_id);
+extern bool arch_find_n_match_cpu_physical_id(struct device_node *cpun,
+					      int cpu, unsigned int *thread);
 
 extern int cpu_add_dev_attr(struct device_attribute *attr);
 extern void cpu_remove_dev_attr(struct device_attribute *attr);
-- 
cgit v1.2.3


From 6461f018e7ad08863b55c386bf694683e4e2e86e Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Wed, 2 Oct 2013 14:13:17 +0530
Subject: cpufreq: rewrite cpufreq_driver->flags using shift operator

Currently cpufreq_driver's flags are defined directly using 0x1, 0x2, 0x4, 0x8,
etc.. As the list grows it becomes less readable..

Use bitwise shift operator << to generate these numbers for respective
positions.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/cpufreq.h | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 6b199ed7aa58..00c80a5372db 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -211,13 +211,14 @@ struct cpufreq_driver {
 };
 
 /* flags */
-#define CPUFREQ_STICKY		0x01	/* the driver isn't removed even if
-					 * all ->init() calls failed */
-#define CPUFREQ_CONST_LOOPS	0x02	/* loops_per_jiffy or other kernel
-					 * "constants" aren't affected by
-					 * frequency transitions */
-#define CPUFREQ_PM_NO_WARN	0x04	/* don't warn on suspend/resume speed
-					 * mismatches */
+#define CPUFREQ_STICKY		(1 << 0)	/* driver isn't removed even if
+						   all ->init() calls failed */
+#define CPUFREQ_CONST_LOOPS	(1 << 1)	/* loops_per_jiffy or other
+						   kernel "constants" aren't
+						   affected by frequency
+						   transitions */
+#define CPUFREQ_PM_NO_WARN	(1 << 2)	/* don't warn on suspend/resume
+						   speed mismatches */
 
 int cpufreq_register_driver(struct cpufreq_driver *driver_data);
 int cpufreq_unregister_driver(struct cpufreq_driver *driver_data);
-- 
cgit v1.2.3


From 0b981e70748861a3e10ea2e2a689bdcee3e15085 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Wed, 2 Oct 2013 14:13:18 +0530
Subject: cpufreq: use cpufreq_driver->flags to mark
 CPUFREQ_HAVE_GOVERNOR_PER_POLICY

Use cpufreq_driver->flags to mark CPUFREQ_HAVE_GOVERNOR_PER_POLICY instead
of a separate field within cpufreq_driver. This will save some bytes of
memory.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/arm_big_little.c   |  4 ++--
 drivers/cpufreq/cpufreq.c          |  2 +-
 drivers/cpufreq/cpufreq_governor.h |  5 ++++-
 include/linux/cpufreq.h            | 15 ++++++++-------
 4 files changed, 15 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/cpufreq/arm_big_little.c b/drivers/cpufreq/arm_big_little.c
index 9acfb82c1cfb..2c7c13494825 100644
--- a/drivers/cpufreq/arm_big_little.c
+++ b/drivers/cpufreq/arm_big_little.c
@@ -213,13 +213,13 @@ static struct freq_attr *bL_cpufreq_attr[] = {
 
 static struct cpufreq_driver bL_cpufreq_driver = {
 	.name			= "arm-big-little",
-	.flags			= CPUFREQ_STICKY,
+	.flags			= CPUFREQ_STICKY |
+					CPUFREQ_HAVE_GOVERNOR_PER_POLICY,
 	.verify			= bL_cpufreq_verify_policy,
 	.target			= bL_cpufreq_set_target,
 	.get			= bL_cpufreq_get,
 	.init			= bL_cpufreq_init,
 	.exit			= bL_cpufreq_exit,
-	.have_governor_per_policy = true,
 	.attr			= bL_cpufreq_attr,
 };
 
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 3a811df43e14..fc9110c9f10d 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -133,7 +133,7 @@ static DEFINE_MUTEX(cpufreq_governor_mutex);
 
 bool have_governor_per_policy(void)
 {
-	return cpufreq_driver->have_governor_per_policy;
+	return !!(cpufreq_driver->flags & CPUFREQ_HAVE_GOVERNOR_PER_POLICY);
 }
 EXPORT_SYMBOL_GPL(have_governor_per_policy);
 
diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h
index 88cd39f7b0e9..b5f2b8618949 100644
--- a/drivers/cpufreq/cpufreq_governor.h
+++ b/drivers/cpufreq/cpufreq_governor.h
@@ -191,7 +191,10 @@ struct common_dbs_data {
 	struct attribute_group *attr_group_gov_sys; /* one governor - system */
 	struct attribute_group *attr_group_gov_pol; /* one governor - policy */
 
-	/* Common data for platforms that don't set have_governor_per_policy */
+	/*
+	 * Common data for platforms that don't set
+	 * CPUFREQ_HAVE_GOVERNOR_PER_POLICY
+	 */
 	struct dbs_data *gdbs_data;
 
 	struct cpu_dbs_common_info *(*get_cpu_cdbs)(int cpu);
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 00c80a5372db..24b84f7e7f8d 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -180,13 +180,6 @@ __ATTR(_name, 0644, show_##_name, store_##_name)
 struct cpufreq_driver {
 	char			name[CPUFREQ_NAME_LEN];
 	u8			flags;
-	/*
-	 * This should be set by platforms having multiple clock-domains, i.e.
-	 * supporting multiple policies. With this sysfs directories of governor
-	 * would be created in cpu/cpu<num>/cpufreq/ directory and so they can
-	 * use the same governor with different tunables for different clusters.
-	 */
-	bool			have_governor_per_policy;
 
 	/* needed by all drivers */
 	int	(*init)		(struct cpufreq_policy *policy);
@@ -220,6 +213,14 @@ struct cpufreq_driver {
 #define CPUFREQ_PM_NO_WARN	(1 << 2)	/* don't warn on suspend/resume
 						   speed mismatches */
 
+/*
+ * This should be set by platforms having multiple clock-domains, i.e.
+ * supporting multiple policies. With this sysfs directories of governor would
+ * be created in cpu/cpu<num>/cpufreq/ directory and so they can use the same
+ * governor with different tunables for different clusters.
+ */
+#define CPUFREQ_HAVE_GOVERNOR_PER_POLICY (1 << 3)
+
 int cpufreq_register_driver(struct cpufreq_driver *driver_data);
 int cpufreq_unregister_driver(struct cpufreq_driver *driver_data);
 
-- 
cgit v1.2.3


From be49e3465f222b4b796be8a21d14afbfd8f5d20f Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Wed, 2 Oct 2013 14:13:19 +0530
Subject: cpufreq: add new routine cpufreq_verify_within_cpu_limits()

Most of the users of cpufreq_verify_within_limits() calls it for
limiting with min/max from policy->cpuinfo. We can make that code
simple by introducing another routine which will do this for them
automatically.

This patch adds another routine cpufreq_verify_within_cpu_limits()
and updates others to use it.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Acked-by: Dirk Brandewie <dirk.j.brandewie@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq-nforce2.c    | 4 +---
 drivers/cpufreq/davinci-cpufreq.c    | 4 +---
 drivers/cpufreq/freq_table.c         | 6 ++----
 drivers/cpufreq/integrator-cpufreq.c | 9 ++-------
 drivers/cpufreq/intel_pstate.c       | 4 +---
 drivers/cpufreq/longrun.c            | 4 +---
 drivers/cpufreq/pcc-cpufreq.c        | 3 +--
 drivers/cpufreq/sh-cpufreq.c         | 7 ++-----
 drivers/cpufreq/unicore2-cpufreq.c   | 4 +---
 include/linux/cpufreq.h              | 7 +++++++
 10 files changed, 19 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/cpufreq/cpufreq-nforce2.c b/drivers/cpufreq/cpufreq-nforce2.c
index b83d45f68574..56c964c16a66 100644
--- a/drivers/cpufreq/cpufreq-nforce2.c
+++ b/drivers/cpufreq/cpufreq-nforce2.c
@@ -303,9 +303,7 @@ static int nforce2_verify(struct cpufreq_policy *policy)
 	if (policy->min < (fsb_pol_max * fid * 100))
 		policy->max = (fsb_pol_max + 1) * fid * 100;
 
-	cpufreq_verify_within_limits(policy,
-				     policy->cpuinfo.min_freq,
-				     policy->cpuinfo.max_freq);
+	cpufreq_verify_within_cpu_limits(policy);
 	return 0;
 }
 
diff --git a/drivers/cpufreq/davinci-cpufreq.c b/drivers/cpufreq/davinci-cpufreq.c
index f67196e21fc9..ba03e6f91d47 100644
--- a/drivers/cpufreq/davinci-cpufreq.c
+++ b/drivers/cpufreq/davinci-cpufreq.c
@@ -50,9 +50,7 @@ static int davinci_verify_speed(struct cpufreq_policy *policy)
 	if (policy->cpu)
 		return -EINVAL;
 
-	cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq,
-				     policy->cpuinfo.max_freq);
-
+	cpufreq_verify_within_cpu_limits(policy);
 	policy->min = clk_round_rate(armclk, policy->min * 1000) / 1000;
 	policy->max = clk_round_rate(armclk, policy->max * 1000) / 1000;
 	cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq,
diff --git a/drivers/cpufreq/freq_table.c b/drivers/cpufreq/freq_table.c
index 10f3cfb6b513..b9336edb9bd6 100644
--- a/drivers/cpufreq/freq_table.c
+++ b/drivers/cpufreq/freq_table.c
@@ -60,8 +60,7 @@ int cpufreq_frequency_table_verify(struct cpufreq_policy *policy,
 	pr_debug("request for verification of policy (%u - %u kHz) for cpu %u\n",
 					policy->min, policy->max, policy->cpu);
 
-	cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq,
-				     policy->cpuinfo.max_freq);
+	cpufreq_verify_within_cpu_limits(policy);
 
 	for (; freq = table[i].frequency, freq != CPUFREQ_TABLE_END; i++) {
 		if (freq == CPUFREQ_ENTRY_INVALID)
@@ -77,8 +76,7 @@ int cpufreq_frequency_table_verify(struct cpufreq_policy *policy,
 
 	if (!found) {
 		policy->max = next_larger;
-		cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq,
-				policy->cpuinfo.max_freq);
+		cpufreq_verify_within_cpu_limits(policy);
 	}
 
 	pr_debug("verification lead to (%u - %u kHz) for cpu %u\n",
diff --git a/drivers/cpufreq/integrator-cpufreq.c b/drivers/cpufreq/integrator-cpufreq.c
index f7c99df0880b..8152a9bb7e2c 100644
--- a/drivers/cpufreq/integrator-cpufreq.c
+++ b/drivers/cpufreq/integrator-cpufreq.c
@@ -59,9 +59,7 @@ static int integrator_verify_policy(struct cpufreq_policy *policy)
 {
 	struct icst_vco vco;
 
-	cpufreq_verify_within_limits(policy, 
-				     policy->cpuinfo.min_freq, 
-				     policy->cpuinfo.max_freq);
+	cpufreq_verify_within_cpu_limits(policy);
 
 	vco = icst_hz_to_vco(&cclk_params, policy->max * 1000);
 	policy->max = icst_hz(&cclk_params, vco) / 1000;
@@ -69,10 +67,7 @@ static int integrator_verify_policy(struct cpufreq_policy *policy)
 	vco = icst_hz_to_vco(&cclk_params, policy->min * 1000);
 	policy->min = icst_hz(&cclk_params, vco) / 1000;
 
-	cpufreq_verify_within_limits(policy, 
-				     policy->cpuinfo.min_freq, 
-				     policy->cpuinfo.max_freq);
-
+	cpufreq_verify_within_cpu_limits(policy);
 	return 0;
 }
 
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 9733f29ed148..a02bd77cbfe8 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -611,9 +611,7 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
 
 static int intel_pstate_verify_policy(struct cpufreq_policy *policy)
 {
-	cpufreq_verify_within_limits(policy,
-				policy->cpuinfo.min_freq,
-				policy->cpuinfo.max_freq);
+	cpufreq_verify_within_cpu_limits(policy);
 
 	if ((policy->policy != CPUFREQ_POLICY_POWERSAVE) &&
 		(policy->policy != CPUFREQ_POLICY_PERFORMANCE))
diff --git a/drivers/cpufreq/longrun.c b/drivers/cpufreq/longrun.c
index 5aa031612d53..074971b12635 100644
--- a/drivers/cpufreq/longrun.c
+++ b/drivers/cpufreq/longrun.c
@@ -129,9 +129,7 @@ static int longrun_verify_policy(struct cpufreq_policy *policy)
 		return -EINVAL;
 
 	policy->cpu = 0;
-	cpufreq_verify_within_limits(policy,
-		policy->cpuinfo.min_freq,
-		policy->cpuinfo.max_freq);
+	cpufreq_verify_within_cpu_limits(policy);
 
 	if ((policy->policy != CPUFREQ_POLICY_POWERSAVE) &&
 	    (policy->policy != CPUFREQ_POLICY_PERFORMANCE))
diff --git a/drivers/cpufreq/pcc-cpufreq.c b/drivers/cpufreq/pcc-cpufreq.c
index d81c4e5ea0ad..78787e965e0b 100644
--- a/drivers/cpufreq/pcc-cpufreq.c
+++ b/drivers/cpufreq/pcc-cpufreq.c
@@ -111,8 +111,7 @@ static struct pcc_cpu __percpu *pcc_cpu_info;
 
 static int pcc_cpufreq_verify(struct cpufreq_policy *policy)
 {
-	cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq,
-				     policy->cpuinfo.max_freq);
+	cpufreq_verify_within_cpu_limits(policy);
 	return 0;
 }
 
diff --git a/drivers/cpufreq/sh-cpufreq.c b/drivers/cpufreq/sh-cpufreq.c
index 1362e8894ee1..f1fb944d714c 100644
--- a/drivers/cpufreq/sh-cpufreq.c
+++ b/drivers/cpufreq/sh-cpufreq.c
@@ -87,15 +87,12 @@ static int sh_cpufreq_verify(struct cpufreq_policy *policy)
 	if (freq_table)
 		return cpufreq_frequency_table_verify(policy, freq_table);
 
-	cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq,
-				     policy->cpuinfo.max_freq);
+	cpufreq_verify_within_cpu_limits(policy);
 
 	policy->min = (clk_round_rate(cpuclk, 1) + 500) / 1000;
 	policy->max = (clk_round_rate(cpuclk, ~0UL) + 500) / 1000;
 
-	cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq,
-				     policy->cpuinfo.max_freq);
-
+	cpufreq_verify_within_cpu_limits(policy);
 	return 0;
 }
 
diff --git a/drivers/cpufreq/unicore2-cpufreq.c b/drivers/cpufreq/unicore2-cpufreq.c
index b225f04d8ae5..14e6d3106355 100644
--- a/drivers/cpufreq/unicore2-cpufreq.c
+++ b/drivers/cpufreq/unicore2-cpufreq.c
@@ -29,9 +29,7 @@ static int ucv2_verify_speed(struct cpufreq_policy *policy)
 	if (policy->cpu)
 		return -EINVAL;
 
-	cpufreq_verify_within_limits(policy,
-			policy->cpuinfo.min_freq, policy->cpuinfo.max_freq);
-
+	cpufreq_verify_within_cpu_limits(policy);
 	return 0;
 }
 
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 24b84f7e7f8d..c7495915932d 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -242,6 +242,13 @@ static inline void cpufreq_verify_within_limits(struct cpufreq_policy *policy,
 	return;
 }
 
+static inline void
+cpufreq_verify_within_cpu_limits(struct cpufreq_policy *policy)
+{
+	cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq,
+			policy->cpuinfo.max_freq);
+}
+
 /*********************************************************************
  *                     CPUFREQ NOTIFIER INTERFACE                    *
  *********************************************************************/
-- 
cgit v1.2.3


From 184345129c53e76069c209f9912ed7c457eceb31 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Thu, 3 Oct 2013 20:27:55 +0530
Subject: cpufreq: define generic .attr, .exit() and .verify() routines

Most of the CPUFreq drivers do similar things in .exit() and .verify() routines
and .attr. So its better if we have generic routines for them which can be used
by cpufreq drivers then.

This patch introduces generic .attr, .exit() and .verify() cpufreq drivers.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/freq_table.c | 20 ++++++++++++++++++++
 include/linux/cpufreq.h      |  8 ++++++++
 2 files changed, 28 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/cpufreq/freq_table.c b/drivers/cpufreq/freq_table.c
index b9336edb9bd6..3458d27f63b4 100644
--- a/drivers/cpufreq/freq_table.c
+++ b/drivers/cpufreq/freq_table.c
@@ -86,6 +86,20 @@ int cpufreq_frequency_table_verify(struct cpufreq_policy *policy,
 }
 EXPORT_SYMBOL_GPL(cpufreq_frequency_table_verify);
 
+/*
+ * Generic routine to verify policy & frequency table, requires driver to call
+ * cpufreq_frequency_table_get_attr() prior to it.
+ */
+int cpufreq_generic_frequency_table_verify(struct cpufreq_policy *policy)
+{
+	struct cpufreq_frequency_table *table =
+		cpufreq_frequency_get_table(policy->cpu);
+	if (!table)
+		return -ENODEV;
+
+	return cpufreq_frequency_table_verify(policy, table);
+}
+EXPORT_SYMBOL_GPL(cpufreq_generic_frequency_table_verify);
 
 int cpufreq_frequency_table_target(struct cpufreq_policy *policy,
 				   struct cpufreq_frequency_table *table,
@@ -199,6 +213,12 @@ struct freq_attr cpufreq_freq_attr_scaling_available_freqs = {
 };
 EXPORT_SYMBOL_GPL(cpufreq_freq_attr_scaling_available_freqs);
 
+struct freq_attr *cpufreq_generic_attr[] = {
+	&cpufreq_freq_attr_scaling_available_freqs,
+	NULL,
+};
+EXPORT_SYMBOL_GPL(cpufreq_generic_attr);
+
 /*
  * if you use these, you must assure that the frequency table is valid
  * all the time between get_attr and put_attr!
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index c7495915932d..36ccd0bf1304 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -401,6 +401,7 @@ int cpufreq_frequency_table_cpuinfo(struct cpufreq_policy *policy,
 
 int cpufreq_frequency_table_verify(struct cpufreq_policy *policy,
 				   struct cpufreq_frequency_table *table);
+int cpufreq_generic_frequency_table_verify(struct cpufreq_policy *policy);
 
 int cpufreq_frequency_table_target(struct cpufreq_policy *policy,
 				   struct cpufreq_frequency_table *table,
@@ -416,10 +417,17 @@ struct cpufreq_frequency_table *cpufreq_frequency_get_table(unsigned int cpu);
 
 /* the following are really really optional */
 extern struct freq_attr cpufreq_freq_attr_scaling_available_freqs;
+extern struct freq_attr *cpufreq_generic_attr[];
 void cpufreq_frequency_table_get_attr(struct cpufreq_frequency_table *table,
 				      unsigned int cpu);
 void cpufreq_frequency_table_put_attr(unsigned int cpu);
 int cpufreq_table_validate_and_show(struct cpufreq_policy *policy,
 				      struct cpufreq_frequency_table *table);
 
+static inline int cpufreq_generic_exit(struct cpufreq_policy *policy)
+{
+	cpufreq_frequency_table_put_attr(policy->cpu);
+	return 0;
+}
+
 #endif /* _LINUX_CPUFREQ_H */
-- 
cgit v1.2.3


From 70e9e778337973d5bf57004092b360bd3f3c412f Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Thu, 3 Oct 2013 20:29:07 +0530
Subject: cpufreq: create cpufreq_generic_init() routine

Many CPUFreq drivers for SMP system (where all cores share same clock lines), do
similar stuff in their ->init() part.

This patch creates a generic routine in cpufreq core which can be used by these
so that we can remove some redundant code.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/Kconfig   |  1 +
 drivers/cpufreq/cpufreq.c | 31 +++++++++++++++++++++++++++++++
 include/linux/cpufreq.h   |  3 +++
 3 files changed, 35 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig
index 534fcb825153..2d06754f4a04 100644
--- a/drivers/cpufreq/Kconfig
+++ b/drivers/cpufreq/Kconfig
@@ -2,6 +2,7 @@ menu "CPU Frequency scaling"
 
 config CPU_FREQ
 	bool "CPU Frequency scaling"
+	select CPU_FREQ_TABLE
 	help
 	  CPU Frequency scaling allows you to change the clock speed of 
 	  CPUs on the fly. This is a nice method to save power, because 
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index f033adf06eaf..985f325adc4f 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -181,6 +181,37 @@ u64 get_cpu_idle_time(unsigned int cpu, u64 *wall, int io_busy)
 }
 EXPORT_SYMBOL_GPL(get_cpu_idle_time);
 
+/*
+ * This is a generic cpufreq init() routine which can be used by cpufreq
+ * drivers of SMP systems. It will do following:
+ * - validate & show freq table passed
+ * - set policies transition latency
+ * - policy->cpus with all possible CPUs
+ */
+int cpufreq_generic_init(struct cpufreq_policy *policy,
+		struct cpufreq_frequency_table *table,
+		unsigned int transition_latency)
+{
+	int ret;
+
+	ret = cpufreq_table_validate_and_show(policy, table);
+	if (ret) {
+		pr_err("%s: invalid frequency table: %d\n", __func__, ret);
+		return ret;
+	}
+
+	policy->cpuinfo.transition_latency = transition_latency;
+
+	/*
+	 * The driver only supports the SMP configuartion where all processors
+	 * share the clock and voltage and clock.
+	 */
+	cpumask_setall(policy->cpus);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(cpufreq_generic_init);
+
 struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu)
 {
 	struct cpufreq_policy *policy = NULL;
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 36ccd0bf1304..0aba2a6cadaf 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -424,6 +424,9 @@ void cpufreq_frequency_table_put_attr(unsigned int cpu);
 int cpufreq_table_validate_and_show(struct cpufreq_policy *policy,
 				      struct cpufreq_frequency_table *table);
 
+int cpufreq_generic_init(struct cpufreq_policy *policy,
+		struct cpufreq_frequency_table *table,
+		unsigned int transition_latency);
 static inline int cpufreq_generic_exit(struct cpufreq_policy *policy)
 {
 	cpufreq_frequency_table_put_attr(policy->cpu);
-- 
cgit v1.2.3


From 5915a3db0c3983f1cd5e046bf70086c7d0c686d2 Mon Sep 17 00:00:00 2001
From: Aaron Lu <aaron.lu@intel.com>
Date: Fri, 11 Oct 2013 21:27:43 +0800
Subject: backlight: introduce backlight_device_registered

Introduce a new API for modules to query if a specific type of backlight
device has been registered. This is useful for some backlight device
provider module (e.g. ACPI video) to know if a native control
interface(e.g. the interface created by i915) is available and then do
things accordingly (e.g. avoid registering its own on Win8 systems).

Signed-off-by: Aaron Lu <aaron.lu@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/video/backlight/backlight.c | 31 +++++++++++++++++++++++++++++++
 include/linux/backlight.h           |  4 ++++
 2 files changed, 35 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/video/backlight/backlight.c b/drivers/video/backlight/backlight.c
index 94a403a9717a..5d05555fe841 100644
--- a/drivers/video/backlight/backlight.c
+++ b/drivers/video/backlight/backlight.c
@@ -21,6 +21,9 @@
 #include <asm/backlight.h>
 #endif
 
+static struct list_head backlight_dev_list;
+static struct mutex backlight_dev_list_mutex;
+
 static const char *const backlight_types[] = {
 	[BACKLIGHT_RAW] = "raw",
 	[BACKLIGHT_PLATFORM] = "platform",
@@ -349,10 +352,32 @@ struct backlight_device *backlight_device_register(const char *name,
 	mutex_unlock(&pmac_backlight_mutex);
 #endif
 
+	mutex_lock(&backlight_dev_list_mutex);
+	list_add(&new_bd->entry, &backlight_dev_list);
+	mutex_unlock(&backlight_dev_list_mutex);
+
 	return new_bd;
 }
 EXPORT_SYMBOL(backlight_device_register);
 
+bool backlight_device_registered(enum backlight_type type)
+{
+	bool found = false;
+	struct backlight_device *bd;
+
+	mutex_lock(&backlight_dev_list_mutex);
+	list_for_each_entry(bd, &backlight_dev_list, entry) {
+		if (bd->props.type == type) {
+			found = true;
+			break;
+		}
+	}
+	mutex_unlock(&backlight_dev_list_mutex);
+
+	return found;
+}
+EXPORT_SYMBOL(backlight_device_registered);
+
 /**
  * backlight_device_unregister - unregisters a backlight device object.
  * @bd: the backlight device object to be unregistered and freed.
@@ -364,6 +389,10 @@ void backlight_device_unregister(struct backlight_device *bd)
 	if (!bd)
 		return;
 
+	mutex_lock(&backlight_dev_list_mutex);
+	list_del(&bd->entry);
+	mutex_unlock(&backlight_dev_list_mutex);
+
 #ifdef CONFIG_PMAC_BACKLIGHT
 	mutex_lock(&pmac_backlight_mutex);
 	if (pmac_backlight == bd)
@@ -499,6 +528,8 @@ static int __init backlight_class_init(void)
 
 	backlight_class->dev_groups = bl_device_groups;
 	backlight_class->pm = &backlight_class_dev_pm_ops;
+	INIT_LIST_HEAD(&backlight_dev_list);
+	mutex_init(&backlight_dev_list_mutex);
 	return 0;
 }
 
diff --git a/include/linux/backlight.h b/include/linux/backlight.h
index 53b77949c79d..5f9cd963213d 100644
--- a/include/linux/backlight.h
+++ b/include/linux/backlight.h
@@ -100,6 +100,9 @@ struct backlight_device {
 	/* The framebuffer notifier block */
 	struct notifier_block fb_notif;
 
+	/* list entry of all registered backlight devices */
+	struct list_head entry;
+
 	struct device dev;
 };
 
@@ -123,6 +126,7 @@ extern void devm_backlight_device_unregister(struct device *dev,
 					struct backlight_device *bd);
 extern void backlight_force_update(struct backlight_device *bd,
 				   enum backlight_update_reason reason);
+extern bool backlight_device_registered(enum backlight_type type);
 
 #define to_backlight_device(obj) container_of(obj, struct backlight_device, dev)
 
-- 
cgit v1.2.3


From 2b9b1620349e325f184c68cddf3b484499c163c0 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Fri, 30 Aug 2013 11:51:22 +0200
Subject: pwm-backlight: Add optional enable GPIO

To support a wider variety of backlight setups, introduce an optional
enable GPIO. Legacy users of the platform data already have a means of
supporting GPIOs by using the .init(), .exit() and .notify() hooks. DT
users however cannot use those, so an alternative method is required.

In order to ease the introduction of the optional enable GPIO, make it
available in the platform data first, so that existing users can be
converted. Once that has happened a second patch will add code to make
use of it in the driver.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 include/linux/pwm_backlight.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pwm_backlight.h b/include/linux/pwm_backlight.h
index 56f4a866539a..2de2e275b2cb 100644
--- a/include/linux/pwm_backlight.h
+++ b/include/linux/pwm_backlight.h
@@ -6,6 +6,9 @@
 
 #include <linux/backlight.h>
 
+/* TODO: convert to gpiod_*() API once it has been merged */
+#define PWM_BACKLIGHT_GPIO_ACTIVE_LOW	(1 << 0)
+
 struct platform_pwm_backlight_data {
 	int pwm_id;
 	unsigned int max_brightness;
@@ -13,6 +16,8 @@ struct platform_pwm_backlight_data {
 	unsigned int lth_brightness;
 	unsigned int pwm_period_ns;
 	unsigned int *levels;
+	int enable_gpio;
+	unsigned long enable_gpio_flags;
 	int (*init)(struct device *dev);
 	int (*notify)(struct device *dev, int brightness);
 	void (*notify_after)(struct device *dev, int brightness);
-- 
cgit v1.2.3


From d468bf9ecaabd3bf3a6134e5a369ced82b1d1ca1 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Tue, 24 Sep 2013 11:54:38 +0200
Subject: gpio: add API to be strict about GPIO IRQ usage

It is currently often possible in many GPIO drivers to request
a GPIO line to be used as IRQ after calling gpio_to_irq() and,
as the gpiolib is not aware of this, set the same line to
output and start driving it, with undesired side effects.

As it is a bogus usage scenario to request a line flagged as
output to used as IRQ, we introduce APIs to let gpiolib track
the use of a line as IRQ, and also set this flag from the
userspace ABI.

The API is symmetric so that lines can also be flagged from
.irq_enable() and unflagged from IRQ by .irq_disable().
The debugfs file is altered so that we see if a line is
reserved for IRQ.

Cc: Enric Balletbo i Serra <eballetbo@gmail.com>
Cc: Grant Likely <grant.likely@linaro.org>
Cc: Jean-Christophe PLAGNIOL-VILLARD <plagnioj@jcrosoft.com>
Cc: Santosh Shilimkar <santosh.shilimkar@ti.com>
Acked-by: Alexandre Courbot <acourbot@nvidia.com>
Reviewed-by: Stephen Warren <swarren@nvidia.com>
Reviewed-by: Javier Martinez Canillas <javier.martinez@collabora.co.uk>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpiolib.c     | 88 ++++++++++++++++++++++++++++++++++++++++++++--
 include/asm-generic/gpio.h |  3 ++
 include/linux/gpio.h       | 12 +++++++
 3 files changed, 101 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 4fc28603a742..1014cb5e10b0 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -60,6 +60,7 @@ struct gpio_desc {
 #define FLAG_ACTIVE_LOW	6	/* sysfs value has active low */
 #define FLAG_OPEN_DRAIN	7	/* Gpio is open drain type */
 #define FLAG_OPEN_SOURCE 8	/* Gpio is open source type */
+#define FLAG_USED_AS_IRQ 9	/* GPIO is connected to an IRQ */
 
 #define ID_SHIFT	16	/* add new flags before this one */
 
@@ -96,6 +97,8 @@ static int gpiod_get_value(const struct gpio_desc *desc);
 static void gpiod_set_value(struct gpio_desc *desc, int value);
 static int gpiod_cansleep(const struct gpio_desc *desc);
 static int gpiod_to_irq(const struct gpio_desc *desc);
+static int gpiod_lock_as_irq(struct gpio_desc *desc);
+static void gpiod_unlock_as_irq(struct gpio_desc *desc);
 static int gpiod_export(struct gpio_desc *desc, bool direction_may_change);
 static int gpiod_export_link(struct device *dev, const char *name,
 			     struct gpio_desc *desc);
@@ -162,6 +165,17 @@ static struct gpio_desc *gpio_to_desc(unsigned gpio)
 		return &gpio_desc[gpio];
 }
 
+/**
+ * Convert an offset on a certain chip to a corresponding descriptor
+ */
+static struct gpio_desc *gpiochip_offset_to_desc(struct gpio_chip *chip,
+						 unsigned int offset)
+{
+	unsigned int gpio = chip->base + offset;
+
+	return gpio_to_desc(gpio);
+}
+
 /**
  * Convert a GPIO descriptor to the integer namespace.
  * This should disappear in the future but is needed since we still
@@ -428,6 +442,7 @@ static int gpio_setup_irq(struct gpio_desc *desc, struct device *dev,
 	desc->flags &= ~GPIO_TRIGGER_MASK;
 
 	if (!gpio_flags) {
+		gpiod_unlock_as_irq(desc);
 		ret = 0;
 		goto free_id;
 	}
@@ -466,6 +481,12 @@ static int gpio_setup_irq(struct gpio_desc *desc, struct device *dev,
 	if (ret < 0)
 		goto free_id;
 
+	ret = gpiod_lock_as_irq(desc);
+	if (ret < 0) {
+		gpiod_warn(desc, "failed to flag the GPIO for IRQ\n");
+		goto free_id;
+	}
+
 	desc->flags |= gpio_flags;
 	return 0;
 
@@ -1730,6 +1751,14 @@ static int gpiod_direction_output(struct gpio_desc *desc, int value)
 		return -EINVAL;
 	}
 
+	/* GPIOs used for IRQs shall not be set as output */
+	if (test_bit(FLAG_USED_AS_IRQ, &desc->flags)) {
+		gpiod_err(desc,
+			  "%s: tried to set a GPIO tied to an IRQ as output\n",
+			  __func__);
+		return -EIO;
+	}
+
 	/* Open drain pin should not be driven to 1 */
 	if (value && test_bit(FLAG_OPEN_DRAIN,  &desc->flags))
 		return gpiod_direction_input(desc);
@@ -2050,6 +2079,58 @@ int __gpio_to_irq(unsigned gpio)
 }
 EXPORT_SYMBOL_GPL(__gpio_to_irq);
 
+/**
+ * gpiod_lock_as_irq() - lock a GPIO to be used as IRQ
+ * @gpio: the GPIO line to lock as used for IRQ
+ *
+ * This is used directly by GPIO drivers that want to lock down
+ * a certain GPIO line to be used as IRQs, for example in the
+ * .to_irq() callback of their gpio_chip, or in the .irq_enable()
+ * of its irq_chip implementation if the GPIO is known from that
+ * code.
+ */
+static int gpiod_lock_as_irq(struct gpio_desc *desc)
+{
+	if (!desc)
+		return -EINVAL;
+
+	if (test_bit(FLAG_IS_OUT, &desc->flags)) {
+		gpiod_err(desc,
+			  "%s: tried to flag a GPIO set as output for IRQ\n",
+			  __func__);
+		return -EIO;
+	}
+
+	set_bit(FLAG_USED_AS_IRQ, &desc->flags);
+	return 0;
+}
+
+int gpio_lock_as_irq(struct gpio_chip *chip, unsigned int offset)
+{
+	return gpiod_lock_as_irq(gpiochip_offset_to_desc(chip, offset));
+}
+EXPORT_SYMBOL_GPL(gpio_lock_as_irq);
+
+/**
+ * gpiod_unlock_as_irq() - unlock a GPIO used as IRQ
+ * @gpio: the GPIO line to unlock from IRQ usage
+ *
+ * This is used directly by GPIO drivers that want to indicate
+ * that a certain GPIO is no longer used exclusively for IRQ.
+ */
+static void gpiod_unlock_as_irq(struct gpio_desc *desc)
+{
+	if (!desc)
+		return;
+
+	clear_bit(FLAG_USED_AS_IRQ, &desc->flags);
+}
+
+void gpio_unlock_as_irq(struct gpio_chip *chip, unsigned int offset)
+{
+	return gpiod_unlock_as_irq(gpiochip_offset_to_desc(chip, offset));
+}
+EXPORT_SYMBOL_GPL(gpio_unlock_as_irq);
 
 /* There's no value in making it easy to inline GPIO calls that may sleep.
  * Common examples include ones connected to I2C or SPI chips.
@@ -2091,6 +2172,7 @@ static void gpiolib_dbg_show(struct seq_file *s, struct gpio_chip *chip)
 	unsigned		gpio = chip->base;
 	struct gpio_desc	*gdesc = &chip->desc[0];
 	int			is_out;
+	int			is_irq;
 
 	for (i = 0; i < chip->ngpio; i++, gpio++, gdesc++) {
 		if (!test_bit(FLAG_REQUESTED, &gdesc->flags))
@@ -2098,12 +2180,14 @@ static void gpiolib_dbg_show(struct seq_file *s, struct gpio_chip *chip)
 
 		gpiod_get_direction(gdesc);
 		is_out = test_bit(FLAG_IS_OUT, &gdesc->flags);
-		seq_printf(s, " gpio-%-3d (%-20.20s) %s %s",
+		is_irq = test_bit(FLAG_USED_AS_IRQ, &gdesc->flags);
+		seq_printf(s, " gpio-%-3d (%-20.20s) %s %s %s",
 			gpio, gdesc->label,
 			is_out ? "out" : "in ",
 			chip->get
 				? (chip->get(chip, i) ? "hi" : "lo")
-				: "?  ");
+				: "?  ",
+			is_irq ? "IRQ" : "   ");
 		seq_printf(s, "\n");
 	}
 }
diff --git a/include/asm-generic/gpio.h b/include/asm-generic/gpio.h
index bde646995d10..b309a5c0019e 100644
--- a/include/asm-generic/gpio.h
+++ b/include/asm-generic/gpio.h
@@ -192,6 +192,9 @@ extern int __gpio_cansleep(unsigned gpio);
 
 extern int __gpio_to_irq(unsigned gpio);
 
+extern int gpio_lock_as_irq(struct gpio_chip *chip, unsigned int offset);
+extern void gpio_unlock_as_irq(struct gpio_chip *chip, unsigned int offset);
+
 extern int gpio_request_one(unsigned gpio, unsigned long flags, const char *label);
 extern int gpio_request_array(const struct gpio *array, size_t num);
 extern void gpio_free_array(const struct gpio *array, size_t num);
diff --git a/include/linux/gpio.h b/include/linux/gpio.h
index 552e3f46e4a3..a06ec3e85ba3 100644
--- a/include/linux/gpio.h
+++ b/include/linux/gpio.h
@@ -204,6 +204,18 @@ static inline int gpio_to_irq(unsigned gpio)
 	return -EINVAL;
 }
 
+static inline int gpio_lock_as_irq(struct gpio_chip *chip, unsigned int offset)
+{
+	WARN_ON(1);
+	return -EINVAL;
+}
+
+static inline void gpio_unlock_as_irq(struct gpio_chip *chip,
+				      unsigned int offset)
+{
+	WARN_ON(1);
+}
+
 static inline int irq_to_gpio(unsigned irq)
 {
 	/* irq can never have been returned from gpio_to_irq() */
-- 
cgit v1.2.3


From 8922915b38cd8b72f8e5af614b95be71d1d299d4 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Mon, 7 Oct 2013 20:31:06 +0200
Subject: sched/wait: Add ___wait_cond_timeout() to wait_event*_timeout() too

Commit 4c663cfc ("wait: fix false timeouts when using
wait_event_timeout()") introduced the additional condition checks
after a timeout but only in the "slow" __wait*() paths.

wait_event_timeout(wq, CONDITION, 0) still returns 0 if CONDITION
is already true and we do not call __wait*().

Now that we have ___wait_cond_timeout() we can use it instead to
ensure that __ret will be properly updated.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20131007183106.GA10973@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/wait.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/wait.h b/include/linux/wait.h
index a2726c7dd244..04c0260bda8f 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -270,7 +270,7 @@ do {									\
 #define wait_event_timeout(wq, condition, timeout)			\
 ({									\
 	long __ret = timeout;						\
-	if (!(condition))						\
+	if (!___wait_cond_timeout(condition))				\
 		__ret = __wait_event_timeout(wq, condition, timeout);	\
 	__ret;								\
 })
@@ -328,7 +328,7 @@ do {									\
 #define wait_event_interruptible_timeout(wq, condition, timeout)	\
 ({									\
 	long __ret = timeout;						\
-	if (!(condition))						\
+	if (!___wait_cond_timeout(condition))				\
 		__ret = __wait_event_interruptible_timeout(wq,		\
 						condition, timeout);	\
 	__ret;								\
@@ -769,7 +769,7 @@ do {									\
 						  timeout)		\
 ({									\
 	long __ret = timeout;						\
-	if (!(condition))						\
+	if (!___wait_cond_timeout(condition))				\
 		__ret = __wait_event_interruptible_lock_irq_timeout(	\
 					wq, condition, lock, timeout);	\
 	__ret;								\
-- 
cgit v1.2.3


From c2d816443ef305aba8eaf0bf368f4d3d87494f06 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Mon, 7 Oct 2013 18:18:24 +0200
Subject: sched/wait: Introduce prepare_to_wait_event()

Add the new helper, prepare_to_wait_event() which should only be used
by ___wait_event().

prepare_to_wait_event() returns -ERESTARTSYS if signal_pending_state()
is true, otherwise it does prepare_to_wait/exclusive.  This allows to
uninline the signal-pending checks in wait_event*() macros.

Also, it can initialize wait->private/func. We do not care if they were
already initialized, the values are the same. This also shaves a couple
of insns from the inlined code.

This obviously makes prepare_*() path a little bit slower, but we are
likely going to sleep anyway, so I think it makes sense to shrink .text:

               text    data      bss      dec     hex  filename
            ===================================================
   before:  5126092 2959248 10117120 18202460 115bf5c   vmlinux
    after:  5124618 2955152 10117120 18196890 115a99a   vmlinux

on my build.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20131007161824.GA29757@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/wait.h | 24 ++++++++++++++----------
 kernel/wait.c        | 24 ++++++++++++++++++++++++
 2 files changed, 38 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/wait.h b/include/linux/wait.h
index 04c0260bda8f..ec099b03e11b 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -187,27 +187,30 @@ wait_queue_head_t *bit_waitqueue(void *, int);
 	__cond || !__ret;						\
 })
 
-#define ___wait_signal_pending(state)					\
-	((state == TASK_INTERRUPTIBLE && signal_pending(current)) ||	\
-	 (state == TASK_KILLABLE && fatal_signal_pending(current)))
+#define ___wait_is_interruptible(state)					\
+	(!__builtin_constant_p(state) ||				\
+		state == TASK_INTERRUPTIBLE || state == TASK_KILLABLE)	\
 
 #define ___wait_event(wq, condition, state, exclusive, ret, cmd)	\
 ({									\
 	__label__ __out;						\
-	DEFINE_WAIT(__wait);						\
+	wait_queue_t __wait;						\
 	long __ret = ret;						\
 									\
+	INIT_LIST_HEAD(&__wait.task_list);				\
+	if (exclusive)							\
+		__wait.flags = WQ_FLAG_EXCLUSIVE;			\
+	else								\
+		__wait.flags = 0;					\
+									\
 	for (;;) {							\
-		if (exclusive)						\
-			prepare_to_wait_exclusive(&wq, &__wait, state); \
-		else							\
-			prepare_to_wait(&wq, &__wait, state);		\
+		long __int = prepare_to_wait_event(&wq, &__wait, state);\
 									\
 		if (condition)						\
 			break;						\
 									\
-		if (___wait_signal_pending(state)) {			\
-			__ret = -ERESTARTSYS;				\
+		if (___wait_is_interruptible(state) && __int) {		\
+			__ret = __int;					\
 			if (exclusive) {				\
 				abort_exclusive_wait(&wq, &__wait,	\
 						     state, NULL);	\
@@ -791,6 +794,7 @@ extern long interruptible_sleep_on_timeout(wait_queue_head_t *q, signed long tim
  */
 void prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state);
 void prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state);
+long prepare_to_wait_event(wait_queue_head_t *q, wait_queue_t *wait, int state);
 void finish_wait(wait_queue_head_t *q, wait_queue_t *wait);
 void abort_exclusive_wait(wait_queue_head_t *q, wait_queue_t *wait, unsigned int mode, void *key);
 int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
diff --git a/kernel/wait.c b/kernel/wait.c
index d550920e040c..de21c6305a44 100644
--- a/kernel/wait.c
+++ b/kernel/wait.c
@@ -92,6 +92,30 @@ prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state)
 }
 EXPORT_SYMBOL(prepare_to_wait_exclusive);
 
+long prepare_to_wait_event(wait_queue_head_t *q, wait_queue_t *wait, int state)
+{
+	unsigned long flags;
+
+	if (signal_pending_state(state, current))
+		return -ERESTARTSYS;
+
+	wait->private = current;
+	wait->func = autoremove_wake_function;
+
+	spin_lock_irqsave(&q->lock, flags);
+	if (list_empty(&wait->task_list)) {
+		if (wait->flags & WQ_FLAG_EXCLUSIVE)
+			__add_wait_queue_tail(q, wait);
+		else
+			__add_wait_queue(q, wait);
+	}
+	set_current_state(state);
+	spin_unlock_irqrestore(&q->lock, flags);
+
+	return 0;
+}
+EXPORT_SYMBOL(prepare_to_wait_event);
+
 /**
  * finish_wait - clean up after waiting in a queue
  * @q: waitqueue waited on
-- 
cgit v1.2.3


From 586a87e6edc936d6d3c3585af504b33b9c3f0a06 Mon Sep 17 00:00:00 2001
From: Christian Ruppert <christian.ruppert@abilis.com>
Date: Tue, 15 Oct 2013 15:37:54 +0200
Subject: pinctrl/gpio: non-linear GPIO ranges accesible from gpiolib

This patch adds the infrastructure required to register non-linear gpio
ranges through gpiolib and the standard GPIO device tree bindings.

Signed-off-by: Christian Ruppert <christian.ruppert@abilis.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 Documentation/devicetree/bindings/gpio/gpio.txt | 40 +++++++++++++++-
 drivers/gpio/gpiolib-of.c                       | 63 +++++++++++++++++++++----
 drivers/gpio/gpiolib.c                          | 47 ++++++++++++++++++
 drivers/pinctrl/core.c                          | 14 ++++++
 include/asm-generic/gpio.h                      | 10 ++++
 include/linux/gpio.h                            | 10 ++++
 include/linux/pinctrl/pinctrl.h                 |  3 ++
 7 files changed, 177 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/gpio/gpio.txt b/Documentation/devicetree/bindings/gpio/gpio.txt
index 6cec6ff20d2e..0c85bb6e3a80 100644
--- a/Documentation/devicetree/bindings/gpio/gpio.txt
+++ b/Documentation/devicetree/bindings/gpio/gpio.txt
@@ -87,8 +87,10 @@ controllers. The gpio-ranges property described below represents this, and
 contains information structures as follows:
 
 	gpio-range-list ::= <single-gpio-range> [gpio-range-list]
-	single-gpio-range ::=
+	single-gpio-range ::= <numeric-gpio-range> | <named-gpio-range>
+	numeric-gpio-range ::=
 			<pinctrl-phandle> <gpio-base> <pinctrl-base> <count>
+	named-gpio-range ::= <pinctrl-phandle> <gpio-base> '<0 0>'
 	gpio-phandle : phandle to pin controller node.
 	gpio-base : Base GPIO ID in the GPIO controller
 	pinctrl-base : Base pinctrl pin ID in the pin controller
@@ -97,6 +99,19 @@ contains information structures as follows:
 The "pin controller node" mentioned above must conform to the bindings
 described in ../pinctrl/pinctrl-bindings.txt.
 
+In case named gpio ranges are used (ranges with both <pinctrl-base> and
+<count> set to 0), the property gpio-ranges-group-names contains one string
+for every single-gpio-range in gpio-ranges:
+	gpiorange-names-list ::= <gpiorange-name> [gpiorange-names-list]
+	gpiorange-name : Name of the pingroup associated to the GPIO range in
+			the respective pin controller.
+
+Elements of gpiorange-names-list corresponding to numeric ranges contain
+the empty string. Elements of gpiorange-names-list corresponding to named
+ranges contain the name of a pin group defined in the respective pin
+controller. The number of pins/GPIOs in the range is the number of pins in
+that pin group.
+
 Previous versions of this binding required all pin controller nodes that
 were referenced by any gpio-ranges property to contain a property named
 #gpio-range-cells with value <3>. This requirement is now deprecated.
@@ -104,7 +119,7 @@ However, that property may still exist in older device trees for
 compatibility reasons, and would still be required even in new device
 trees that need to be compatible with older software.
 
-Example:
+Example 1:
 
 	qe_pio_e: gpio-controller@1460 {
 		#gpio-cells = <2>;
@@ -117,3 +132,24 @@ Example:
 Here, a single GPIO controller has GPIOs 0..9 routed to pin controller
 pinctrl1's pins 20..29, and GPIOs 10..19 routed to pin controller pinctrl2's
 pins 50..59.
+
+Example 2:
+
+	gpio_pio_i: gpio-controller@14B0 {
+		#gpio-cells = <2>;
+		compatible = "fsl,qe-pario-bank-e", "fsl,qe-pario-bank";
+		reg = <0x1480 0x18>;
+		gpio-controller;
+		gpio-ranges =			<&pinctrl1 0 20 10>,
+						<&pinctrl2 10 0 0>,
+						<&pinctrl1 15 0 10>,
+						<&pinctrl2 25 0 0>;
+		gpio-ranges-group-names =	"",
+						"foo",
+						"",
+						"bar";
+	};
+
+Here, three GPIO ranges are defined wrt. two pin controllers. pinctrl1 GPIO
+ranges are defined using pin numbers whereas the GPIO ranges wrt. pinctrl2
+are named "foo" and "bar".
diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c
index 0dfaf20e4dad..e78760921bd7 100644
--- a/drivers/gpio/gpiolib-of.c
+++ b/drivers/gpio/gpiolib-of.c
@@ -190,10 +190,15 @@ static void of_gpiochip_add_pin_range(struct gpio_chip *chip)
 	struct of_phandle_args pinspec;
 	struct pinctrl_dev *pctldev;
 	int index = 0, ret;
+	const char *name;
+	static const char group_names_propname[] = "gpio-ranges-group-names";
+	struct property *group_names;
 
 	if (!np)
 		return;
 
+	group_names = of_find_property(np, group_names_propname, NULL);
+
 	for (;; index++) {
 		ret = of_parse_phandle_with_fixed_args(np, "gpio-ranges", 3,
 				index, &pinspec);
@@ -204,14 +209,56 @@ static void of_gpiochip_add_pin_range(struct gpio_chip *chip)
 		if (!pctldev)
 			break;
 
-		ret = gpiochip_add_pin_range(chip,
-					     pinctrl_dev_get_devname(pctldev),
-					     pinspec.args[0],
-					     pinspec.args[1],
-					     pinspec.args[2]);
-
-		if (ret)
-			break;
+		if (pinspec.args[2]) {
+			if (group_names) {
+				ret = of_property_read_string_index(np,
+						group_names_propname,
+						index, &name);
+				if (strlen(name)) {
+					pr_err("%s: Group name of numeric GPIO ranges must be the empty string.\n",
+						np->full_name);
+					break;
+				}
+			}
+			/* npins != 0: linear range */
+			ret = gpiochip_add_pin_range(chip,
+					pinctrl_dev_get_devname(pctldev),
+					pinspec.args[0],
+					pinspec.args[1],
+					pinspec.args[2]);
+			if (ret)
+				break;
+		} else {
+			/* npins == 0: special range */
+			if (pinspec.args[1]) {
+				pr_err("%s: Illegal gpio-range format.\n",
+					np->full_name);
+				break;
+			}
+
+			if (!group_names) {
+				pr_err("%s: GPIO group range requested but no %s property.\n",
+					np->full_name, group_names_propname);
+				break;
+			}
+
+			ret = of_property_read_string_index(np,
+						group_names_propname,
+						index, &name);
+			if (ret)
+				break;
+
+			if (!strlen(name)) {
+				pr_err("%s: Group name of GPIO group range cannot be the empty string.\n",
+				np->full_name);
+				break;
+			}
+
+			ret = gpiochip_add_pingroup_range(chip, pctldev,
+						pinspec.args[0], name);
+			if (ret)
+				break;
+		}
 	}
 }
 
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 86ef3461ec06..b83b7e491f76 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -1319,6 +1319,53 @@ EXPORT_SYMBOL_GPL(gpiochip_find);
 
 #ifdef CONFIG_PINCTRL
 
+/**
+ * gpiochip_add_pingroup_range() - add a range for GPIO <-> pin mapping
+ * @chip: the gpiochip to add the range for
+ * @pinctrl: the dev_name() of the pin controller to map to
+ * @gpio_offset: the start offset in the current gpio_chip number space
+ * @pin_group: name of the pin group inside the pin controller
+ */
+int gpiochip_add_pingroup_range(struct gpio_chip *chip,
+			struct pinctrl_dev *pctldev,
+			unsigned int gpio_offset, const char *pin_group)
+{
+	struct gpio_pin_range *pin_range;
+	int ret;
+
+	pin_range = kzalloc(sizeof(*pin_range), GFP_KERNEL);
+	if (!pin_range) {
+		pr_err("%s: GPIO chip: failed to allocate pin ranges\n",
+				chip->label);
+		return -ENOMEM;
+	}
+
+	/* Use local offset as range ID */
+	pin_range->range.id = gpio_offset;
+	pin_range->range.gc = chip;
+	pin_range->range.name = chip->label;
+	pin_range->range.base = chip->base + gpio_offset;
+	pin_range->pctldev = pctldev;
+
+	ret = pinctrl_get_group_pins(pctldev, pin_group,
+					&pin_range->range.pins,
+					&pin_range->range.npins);
+	if (ret < 0)
+		return ret;
+
+	pinctrl_add_gpio_range(pctldev, &pin_range->range);
+
+	pr_debug("GPIO chip %s: created GPIO range %d->%d ==> %s PINGRP %s\n",
+		 chip->label, gpio_offset,
+		 gpio_offset + pin_range->range.npins - 1,
+		 pinctrl_dev_get_devname(pctldev), pin_group);
+
+	list_add_tail(&pin_range->node, &chip->pin_ranges);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(gpiochip_add_pingroup_range);
+
 /**
  * gpiochip_add_pin_range() - add a range for GPIO <-> pin mapping
  * @chip: the gpiochip to add the range for
diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c
index 92f86ab30a13..5ee61a470016 100644
--- a/drivers/pinctrl/core.c
+++ b/drivers/pinctrl/core.c
@@ -462,6 +462,20 @@ struct pinctrl_dev *pinctrl_find_and_add_gpio_range(const char *devname,
 }
 EXPORT_SYMBOL_GPL(pinctrl_find_and_add_gpio_range);
 
+int pinctrl_get_group_pins(struct pinctrl_dev *pctldev, const char *pin_group,
+				const unsigned **pins, unsigned *num_pins)
+{
+	const struct pinctrl_ops *pctlops = pctldev->desc->pctlops;
+	int gs;
+
+	gs = pinctrl_get_group_selector(pctldev, pin_group);
+	if (gs < 0)
+		return gs;
+
+	return pctlops->get_group_pins(pctldev, gs, pins, num_pins);
+}
+EXPORT_SYMBOL_GPL(pinctrl_get_group_pins);
+
 /**
  * pinctrl_find_gpio_range_from_pin() - locate the GPIO range for a pin
  * @pctldev: the pin controller device to look in
diff --git a/include/asm-generic/gpio.h b/include/asm-generic/gpio.h
index bde646995d10..523f40525535 100644
--- a/include/asm-generic/gpio.h
+++ b/include/asm-generic/gpio.h
@@ -228,6 +228,9 @@ struct gpio_pin_range {
 int gpiochip_add_pin_range(struct gpio_chip *chip, const char *pinctl_name,
 			   unsigned int gpio_offset, unsigned int pin_offset,
 			   unsigned int npins);
+int gpiochip_add_pingroup_range(struct gpio_chip *chip,
+			struct pinctrl_dev *pctldev,
+			unsigned int gpio_offset, const char *pin_group);
 void gpiochip_remove_pin_ranges(struct gpio_chip *chip);
 
 #else
@@ -239,6 +242,13 @@ gpiochip_add_pin_range(struct gpio_chip *chip, const char *pinctl_name,
 {
 	return 0;
 }
+static inline int
+gpiochip_add_pingroup_range(struct gpio_chip *chip,
+			struct pinctrl_dev *pctldev,
+			unsigned int gpio_offset, const char *pin_group)
+{
+	return 0;
+}
 
 static inline void
 gpiochip_remove_pin_ranges(struct gpio_chip *chip)
diff --git a/include/linux/gpio.h b/include/linux/gpio.h
index 552e3f46e4a3..b8d0e53a802f 100644
--- a/include/linux/gpio.h
+++ b/include/linux/gpio.h
@@ -80,6 +80,7 @@ static inline int irq_to_gpio(unsigned int irq)
 #include <linux/types.h>
 #include <linux/errno.h>
 #include <linux/bug.h>
+#include <linux/pinctrl/pinctrl.h>
 
 struct device;
 struct gpio_chip;
@@ -220,6 +221,15 @@ gpiochip_add_pin_range(struct gpio_chip *chip, const char *pinctl_name,
 	return -EINVAL;
 }
 
+static inline int
+gpiochip_add_pingroup_range(struct gpio_chip *chip,
+			struct pinctrl_dev *pctldev,
+			unsigned int gpio_offset, const char *pin_group)
+{
+	WARN_ON(1);
+	return -EINVAL;
+}
+
 static inline void
 gpiochip_remove_pin_ranges(struct gpio_chip *chip)
 {
diff --git a/include/linux/pinctrl/pinctrl.h b/include/linux/pinctrl/pinctrl.h
index 5979147d2bda..fefb88663975 100644
--- a/include/linux/pinctrl/pinctrl.h
+++ b/include/linux/pinctrl/pinctrl.h
@@ -144,6 +144,9 @@ extern struct pinctrl_dev *pinctrl_find_and_add_gpio_range(const char *devname,
 extern struct pinctrl_gpio_range *
 pinctrl_find_gpio_range_from_pin(struct pinctrl_dev *pctldev,
 				 unsigned int pin);
+extern int pinctrl_get_group_pins(struct pinctrl_dev *pctldev,
+				const char *pin_group, const unsigned **pins,
+				unsigned *num_pins);
 
 #ifdef CONFIG_OF
 extern struct pinctrl_dev *of_pinctrl_get(struct device_node *np);
-- 
cgit v1.2.3


From bf741c08af194ed0448799ff2690c8dd3da709e5 Mon Sep 17 00:00:00 2001
From: Lars-Peter Clausen <lars@metafoo.de>
Date: Mon, 14 Oct 2013 17:49:00 +0100
Subject: iio: Remove unused iio_sw_buffer_preenable()

The functionality implemented by iio_sw_buffer_preenable() is now done directly
in the IIO core and previous users of iio_sw_buffer_preenable() have all been
updated to not use it anymore. It is unused now and can be remove.

Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/industrialio-buffer.c | 18 ------------------
 include/linux/iio/buffer.h        |  2 --
 2 files changed, 20 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iio/industrialio-buffer.c b/drivers/iio/industrialio-buffer.c
index 186f501e6415..d7ab258e3a42 100644
--- a/drivers/iio/industrialio-buffer.c
+++ b/drivers/iio/industrialio-buffer.c
@@ -749,24 +749,6 @@ done:
 }
 EXPORT_SYMBOL(iio_buffer_store_enable);
 
-int iio_sw_buffer_preenable(struct iio_dev *indio_dev)
-{
-	struct iio_buffer *buffer;
-	unsigned bytes;
-	dev_dbg(&indio_dev->dev, "%s\n", __func__);
-
-	list_for_each_entry(buffer, &indio_dev->buffer_list, buffer_list)
-		if (buffer->access->set_bytes_per_datum) {
-			bytes = iio_compute_scan_bytes(indio_dev,
-						       buffer->scan_mask,
-						       buffer->scan_timestamp);
-
-			buffer->access->set_bytes_per_datum(buffer, bytes);
-		}
-	return 0;
-}
-EXPORT_SYMBOL(iio_sw_buffer_preenable);
-
 /**
  * iio_validate_scan_mask_onehot() - Validates that exactly one channel is selected
  * @indio_dev: the iio device
diff --git a/include/linux/iio/buffer.h b/include/linux/iio/buffer.h
index 6e428d96d570..15607b45221a 100644
--- a/include/linux/iio/buffer.h
+++ b/include/linux/iio/buffer.h
@@ -206,8 +206,6 @@ ssize_t iio_buffer_show_enable(struct device *dev,
 					   iio_buffer_show_enable,	\
 					   iio_buffer_store_enable)
 
-int iio_sw_buffer_preenable(struct iio_dev *indio_dev);
-
 bool iio_validate_scan_mask_onehot(struct iio_dev *indio_dev,
 	const unsigned long *mask);
 
-- 
cgit v1.2.3


From de68bab4fa96014cfaa6fcbcdb9750e32969fb86 Mon Sep 17 00:00:00 2001
From: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Date: Mon, 30 Sep 2013 17:26:28 +0300
Subject: usb: Don't enable USB 2.0 Link PM by default.

How it's supposed to work:
--------------------------

USB 2.0 Link PM is a lower power state that some newer USB 2.0 devices
support.  USB 3.0 devices certified by the USB-IF are required to
support it if they are plugged into a USB 2.0 only port, or a USB 2.0
cable is used.  USB 2.0 Link PM requires both a USB device and a host
controller that supports USB 2.0 hardware-enabled LPM.

USB 2.0 Link PM is designed to be enabled once by software, and the host
hardware handles transitions to the L1 state automatically.  The premise
of USB 2.0 Link PM is to be able to put the device into a lower power
link state when the bus is idle or the device NAKs USB IN transfers for
a specified amount of time.

...but hardware is broken:
--------------------------

It turns out many USB 3.0 devices claim to support USB 2.0 Link PM (by
setting the LPM bit in their USB 2.0 BOS descriptor), but they don't
actually implement it correctly.  This manifests as the USB device
refusing to respond to transfers when it is plugged into a USB 2.0 only
port under the Haswell-ULT/Lynx Point LP xHCI host.

These devices pass the xHCI driver's simple test to enable USB 2.0 Link
PM, wait for the port to enter L1, and then bring it back into L0.  They
only start to break when L1 entry is interleaved with transfers.

Some devices then fail to respond to the next control transfer (usually
a Set Configuration).  This results in devices never enumerating.

Other mass storage devices (such as a later model Western Digital My
Passport USB 3.0 hard drive) respond fine to going into L1 between
control transfers.  They ACK the entry, come out of L1 when the host
needs to send a control transfer, and respond properly to those control
transfers.  However, when the first READ10 SCSI command is sent, the
device NAKs the data phase while it's reading from the spinning disk.
Eventually, the host requests to put the link into L1, and the device
ACKs that request.  Then it never responds to the data phase of the
READ10 command.  This results in not being able to read from the drive.

Some mass storage devices (like the Corsair Survivor USB 3.0 flash
drive) are well behaved.  They ACK the entry into L1 during control
transfers, and when SCSI commands start coming in, they NAK the requests
to go into L1, because they need to be at full power.

Not all USB 3.0 devices advertise USB 2.0 link PM support.  My Point
Grey USB 3.0 webcam advertises itself as a USB 2.1 device, but doesn't
have a USB 2.0 BOS descriptor, so we don't enable USB 2.0 Link PM.  I
suspect that means the device isn't certified.

What do we do about it?
-----------------------

There's really no good way for the kernel to test these devices.
Therefore, the kernel needs to disable USB 2.0 Link PM by default, and
distros will have to enable it by writing 1 to the sysfs file
/sys/bus/usb/devices/../power/usb2_hardware_lpm.  Rip out the xHCI Link
PM test, since it's not sufficient to detect these buggy devices, and
don't automatically enable LPM after the device is addressed.

This patch should be backported to kernels as old as 3.11, that
contain the commit a558ccdcc71c7770c5e80c926a31cfe8a3892a09 "usb: xhci:
add USB2 Link power management BESL support".  Without this fix, some
USB 3.0 devices will not enumerate or work properly under USB 2.0 ports
on Haswell-ULT systems.

Signed-off-by: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Cc: stable@vger.kernel.org
---
 drivers/usb/core/driver.c   |   3 +
 drivers/usb/core/hub.c      |   1 +
 drivers/usb/core/sysfs.c    |   6 +-
 drivers/usb/host/xhci-mem.c |  10 ---
 drivers/usb/host/xhci.c     | 161 +++++---------------------------------------
 include/linux/usb.h         |   4 +-
 6 files changed, 29 insertions(+), 156 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c
index f7841d44feda..689433cdef25 100644
--- a/drivers/usb/core/driver.c
+++ b/drivers/usb/core/driver.c
@@ -1790,6 +1790,9 @@ int usb_set_usb2_hardware_lpm(struct usb_device *udev, int enable)
 	struct usb_hcd *hcd = bus_to_hcd(udev->bus);
 	int ret = -EPERM;
 
+	if (enable && !udev->usb2_hw_lpm_allowed)
+		return 0;
+
 	if (hcd->driver->set_usb2_hw_lpm) {
 		ret = hcd->driver->set_usb2_hw_lpm(hcd, udev, enable);
 		if (!ret)
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index d3a1d79d663d..9be1a2d5fba6 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -5203,6 +5203,7 @@ static int usb_reset_and_verify_device(struct usb_device *udev)
 
 done:
 	/* Now that the alt settings are re-installed, enable LTM and LPM. */
+	usb_set_usb2_hardware_lpm(udev, 1);
 	usb_unlocked_enable_lpm(udev);
 	usb_enable_ltm(udev);
 	usb_release_bos_descriptor(udev);
diff --git a/drivers/usb/core/sysfs.c b/drivers/usb/core/sysfs.c
index 5cf431b0424c..52a97adf02a0 100644
--- a/drivers/usb/core/sysfs.c
+++ b/drivers/usb/core/sysfs.c
@@ -458,7 +458,7 @@ static ssize_t usb2_hardware_lpm_show(struct device *dev,
 	struct usb_device *udev = to_usb_device(dev);
 	const char *p;
 
-	if (udev->usb2_hw_lpm_enabled == 1)
+	if (udev->usb2_hw_lpm_allowed == 1)
 		p = "enabled";
 	else
 		p = "disabled";
@@ -478,8 +478,10 @@ static ssize_t usb2_hardware_lpm_store(struct device *dev,
 
 	ret = strtobool(buf, &value);
 
-	if (!ret)
+	if (!ret) {
+		udev->usb2_hw_lpm_allowed = value;
 		ret = usb_set_usb2_hardware_lpm(udev, value);
+	}
 
 	usb_unlock_device(udev);
 
diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c
index 83bcd13622c3..49b8bd063fab 100644
--- a/drivers/usb/host/xhci-mem.c
+++ b/drivers/usb/host/xhci-mem.c
@@ -1693,9 +1693,7 @@ void xhci_free_command(struct xhci_hcd *xhci,
 void xhci_mem_cleanup(struct xhci_hcd *xhci)
 {
 	struct pci_dev	*pdev = to_pci_dev(xhci_to_hcd(xhci)->self.controller);
-	struct dev_info	*dev_info, *next;
 	struct xhci_cd  *cur_cd, *next_cd;
-	unsigned long	flags;
 	int size;
 	int i, j, num_ports;
 
@@ -1756,13 +1754,6 @@ void xhci_mem_cleanup(struct xhci_hcd *xhci)
 
 	scratchpad_free(xhci);
 
-	spin_lock_irqsave(&xhci->lock, flags);
-	list_for_each_entry_safe(dev_info, next, &xhci->lpm_failed_devs, list) {
-		list_del(&dev_info->list);
-		kfree(dev_info);
-	}
-	spin_unlock_irqrestore(&xhci->lock, flags);
-
 	if (!xhci->rh_bw)
 		goto no_bw;
 
@@ -2231,7 +2222,6 @@ int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags)
 	u32 page_size, temp;
 	int i;
 
-	INIT_LIST_HEAD(&xhci->lpm_failed_devs);
 	INIT_LIST_HEAD(&xhci->cancel_cmd_list);
 
 	page_size = xhci_readl(xhci, &xhci->op_regs->page_size);
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index 2983e5dd98bf..0ea253ec813d 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -4013,133 +4013,6 @@ static int xhci_calculate_usb2_hw_lpm_params(struct usb_device *udev)
 	return PORT_BESLD(besld) | PORT_L1_TIMEOUT(l1) | PORT_HIRDM(hirdm);
 }
 
-static int xhci_usb2_software_lpm_test(struct usb_hcd *hcd,
-					struct usb_device *udev)
-{
-	struct xhci_hcd	*xhci = hcd_to_xhci(hcd);
-	struct dev_info	*dev_info;
-	__le32 __iomem	**port_array;
-	__le32 __iomem	*addr, *pm_addr;
-	u32		temp, dev_id;
-	unsigned int	port_num;
-	unsigned long	flags;
-	int		hird;
-	int		ret;
-
-	if (hcd->speed == HCD_USB3 || !xhci->sw_lpm_support ||
-			!udev->lpm_capable)
-		return -EINVAL;
-
-	/* we only support lpm for non-hub device connected to root hub yet */
-	if (!udev->parent || udev->parent->parent ||
-			udev->descriptor.bDeviceClass == USB_CLASS_HUB)
-		return -EINVAL;
-
-	spin_lock_irqsave(&xhci->lock, flags);
-
-	/* Look for devices in lpm_failed_devs list */
-	dev_id = le16_to_cpu(udev->descriptor.idVendor) << 16 |
-			le16_to_cpu(udev->descriptor.idProduct);
-	list_for_each_entry(dev_info, &xhci->lpm_failed_devs, list) {
-		if (dev_info->dev_id == dev_id) {
-			ret = -EINVAL;
-			goto finish;
-		}
-	}
-
-	port_array = xhci->usb2_ports;
-	port_num = udev->portnum - 1;
-
-	if (port_num > HCS_MAX_PORTS(xhci->hcs_params1)) {
-		xhci_dbg(xhci, "invalid port number %d\n", udev->portnum);
-		ret = -EINVAL;
-		goto finish;
-	}
-
-	/*
-	 * Test USB 2.0 software LPM.
-	 * FIXME: some xHCI 1.0 hosts may implement a new register to set up
-	 * hardware-controlled USB 2.0 LPM. See section 5.4.11 and 4.23.5.1.1.1
-	 * in the June 2011 errata release.
-	 */
-	xhci_dbg(xhci, "test port %d software LPM\n", port_num);
-	/*
-	 * Set L1 Device Slot and HIRD/BESL.
-	 * Check device's USB 2.0 extension descriptor to determine whether
-	 * HIRD or BESL shoule be used. See USB2.0 LPM errata.
-	 */
-	pm_addr = port_array[port_num] + PORTPMSC;
-	hird = xhci_calculate_hird_besl(xhci, udev);
-	temp = PORT_L1DS(udev->slot_id) | PORT_HIRD(hird);
-	xhci_writel(xhci, temp, pm_addr);
-
-	/* Set port link state to U2(L1) */
-	addr = port_array[port_num];
-	xhci_set_link_state(xhci, port_array, port_num, XDEV_U2);
-
-	/* wait for ACK */
-	spin_unlock_irqrestore(&xhci->lock, flags);
-	msleep(10);
-	spin_lock_irqsave(&xhci->lock, flags);
-
-	/* Check L1 Status */
-	ret = xhci_handshake(xhci, pm_addr,
-			PORT_L1S_MASK, PORT_L1S_SUCCESS, 125);
-	if (ret != -ETIMEDOUT) {
-		/* enter L1 successfully */
-		temp = xhci_readl(xhci, addr);
-		xhci_dbg(xhci, "port %d entered L1 state, port status 0x%x\n",
-				port_num, temp);
-		ret = 0;
-	} else {
-		temp = xhci_readl(xhci, pm_addr);
-		xhci_dbg(xhci, "port %d software lpm failed, L1 status %d\n",
-				port_num, temp & PORT_L1S_MASK);
-		ret = -EINVAL;
-	}
-
-	/* Resume the port */
-	xhci_set_link_state(xhci, port_array, port_num, XDEV_U0);
-
-	spin_unlock_irqrestore(&xhci->lock, flags);
-	msleep(10);
-	spin_lock_irqsave(&xhci->lock, flags);
-
-	/* Clear PLC */
-	xhci_test_and_clear_bit(xhci, port_array, port_num, PORT_PLC);
-
-	/* Check PORTSC to make sure the device is in the right state */
-	if (!ret) {
-		temp = xhci_readl(xhci, addr);
-		xhci_dbg(xhci, "resumed port %d status 0x%x\n",	port_num, temp);
-		if (!(temp & PORT_CONNECT) || !(temp & PORT_PE) ||
-				(temp & PORT_PLS_MASK) != XDEV_U0) {
-			xhci_dbg(xhci, "port L1 resume fail\n");
-			ret = -EINVAL;
-		}
-	}
-
-	if (ret) {
-		/* Insert dev to lpm_failed_devs list */
-		xhci_warn(xhci, "device LPM test failed, may disconnect and "
-				"re-enumerate\n");
-		dev_info = kzalloc(sizeof(struct dev_info), GFP_ATOMIC);
-		if (!dev_info) {
-			ret = -ENOMEM;
-			goto finish;
-		}
-		dev_info->dev_id = dev_id;
-		INIT_LIST_HEAD(&dev_info->list);
-		list_add(&dev_info->list, &xhci->lpm_failed_devs);
-	} else {
-		xhci_ring_device(xhci, udev->slot_id);
-	}
-
-finish:
-	spin_unlock_irqrestore(&xhci->lock, flags);
-	return ret;
-}
-
 int xhci_set_usb2_hardware_lpm(struct usb_hcd *hcd,
 			struct usb_device *udev, int enable)
 {
@@ -4267,24 +4140,26 @@ static int xhci_check_usb2_port_capability(struct xhci_hcd *xhci, int port,
 int xhci_update_device(struct usb_hcd *hcd, struct usb_device *udev)
 {
 	struct xhci_hcd	*xhci = hcd_to_xhci(hcd);
-	int		ret;
 	int		portnum = udev->portnum - 1;
 
-	ret = xhci_usb2_software_lpm_test(hcd, udev);
-	if (!ret) {
-		xhci_dbg(xhci, "software LPM test succeed\n");
-		if (xhci->hw_lpm_support == 1 &&
-		    xhci_check_usb2_port_capability(xhci, portnum, XHCI_HLC)) {
-			udev->usb2_hw_lpm_capable = 1;
-			udev->l1_params.timeout = XHCI_L1_TIMEOUT;
-			udev->l1_params.besl = XHCI_DEFAULT_BESL;
-			if (xhci_check_usb2_port_capability(xhci, portnum,
-							    XHCI_BLC))
-				udev->usb2_hw_lpm_besl_capable = 1;
-			ret = xhci_set_usb2_hardware_lpm(hcd, udev, 1);
-			if (!ret)
-				udev->usb2_hw_lpm_enabled = 1;
-		}
+	if (hcd->speed == HCD_USB3 || !xhci->sw_lpm_support ||
+			!udev->lpm_capable)
+		return 0;
+
+	/* we only support lpm for non-hub device connected to root hub yet */
+	if (!udev->parent || udev->parent->parent ||
+			udev->descriptor.bDeviceClass == USB_CLASS_HUB)
+		return 0;
+
+	if (xhci->hw_lpm_support == 1 &&
+			xhci_check_usb2_port_capability(
+				xhci, portnum, XHCI_HLC)) {
+		udev->usb2_hw_lpm_capable = 1;
+		udev->l1_params.timeout = XHCI_L1_TIMEOUT;
+		udev->l1_params.besl = XHCI_DEFAULT_BESL;
+		if (xhci_check_usb2_port_capability(xhci, portnum,
+					XHCI_BLC))
+			udev->usb2_hw_lpm_besl_capable = 1;
 	}
 
 	return 0;
diff --git a/include/linux/usb.h b/include/linux/usb.h
index 055ba74bee80..7454865ad148 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -475,7 +475,8 @@ struct usb3_lpm_parameters {
  * @lpm_capable: device supports LPM
  * @usb2_hw_lpm_capable: device can perform USB2 hardware LPM
  * @usb2_hw_lpm_besl_capable: device can perform USB2 hardware BESL LPM
- * @usb2_hw_lpm_enabled: USB2 hardware LPM enabled
+ * @usb2_hw_lpm_enabled: USB2 hardware LPM is enabled
+ * @usb2_hw_lpm_allowed: Userspace allows USB 2.0 LPM to be enabled
  * @usb3_lpm_enabled: USB3 hardware LPM enabled
  * @string_langid: language ID for strings
  * @product: iProduct string, if present (static)
@@ -548,6 +549,7 @@ struct usb_device {
 	unsigned usb2_hw_lpm_capable:1;
 	unsigned usb2_hw_lpm_besl_capable:1;
 	unsigned usb2_hw_lpm_enabled:1;
+	unsigned usb2_hw_lpm_allowed:1;
 	unsigned usb3_lpm_enabled:1;
 	int string_langid;
 
-- 
cgit v1.2.3


From 8eaede49dfdc1ff1d727f9c913665b8009945191 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Mon, 7 Oct 2013 01:05:46 +0100
Subject: sysrq: Allow magic SysRq key functions to be disabled through Kconfig

Turn the initial value of sysctl kernel.sysrq (SYSRQ_DEFAULT_ENABLE)
into a Kconfig variable.

Original version by Bastian Blank <waldi@debian.org>.

Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/sysrq.txt | 13 ++++++-------
 drivers/tty/sysrq.c     |  2 +-
 include/linux/sysrq.h   |  3 ---
 kernel/sysctl.c         |  2 +-
 lib/Kconfig.debug       |  9 +++++++++
 5 files changed, 17 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/sysrq.txt b/Documentation/sysrq.txt
index 1c0471dc70fe..0e307c94809a 100644
--- a/Documentation/sysrq.txt
+++ b/Documentation/sysrq.txt
@@ -11,11 +11,9 @@ regardless of whatever else it is doing, unless it is completely locked up.
 You need to say "yes" to 'Magic SysRq key (CONFIG_MAGIC_SYSRQ)' when
 configuring the kernel. When running a kernel with SysRq compiled in,
 /proc/sys/kernel/sysrq controls the functions allowed to be invoked via
-the SysRq key. By default the file contains 1 which means that every
-possible SysRq request is allowed (in older versions SysRq was disabled
-by default, and you were required to specifically enable it at run-time
-but this is not the case any more). Here is the list of possible values
-in /proc/sys/kernel/sysrq:
+the SysRq key. The default value in this file is set by the
+CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE config symbol, which itself defaults
+to 1. Here is the list of possible values in /proc/sys/kernel/sysrq:
    0 - disable sysrq completely
    1 - enable all functions of sysrq
   >1 - bitmask of allowed sysrq functions (see below for detailed function
@@ -32,8 +30,9 @@ in /proc/sys/kernel/sysrq:
 You can set the value in the file by the following command:
     echo "number" >/proc/sys/kernel/sysrq
 
-The number may be written either as decimal or as hexadecimal with the
-0x prefix.
+The number may be written here either as decimal or as hexadecimal
+with the 0x prefix. CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE must always be
+written in hexadecimal.
 
 Note that the value of /proc/sys/kernel/sysrq influences only the invocation
 via a keyboard. Invocation of any operation via /proc/sysrq-trigger is always
diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
index 40a9fe9d3b10..ce396ecdf412 100644
--- a/drivers/tty/sysrq.c
+++ b/drivers/tty/sysrq.c
@@ -51,7 +51,7 @@
 #include <asm/irq_regs.h>
 
 /* Whether we react on sysrq keys or just ignore them */
-static int __read_mostly sysrq_enabled = SYSRQ_DEFAULT_ENABLE;
+static int __read_mostly sysrq_enabled = CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE;
 static bool __read_mostly sysrq_always_enabled;
 
 unsigned short platform_sysrq_reset_seq[] __weak = { KEY_RESERVED };
diff --git a/include/linux/sysrq.h b/include/linux/sysrq.h
index 7faf933cced7..387fa7d05c98 100644
--- a/include/linux/sysrq.h
+++ b/include/linux/sysrq.h
@@ -17,9 +17,6 @@
 #include <linux/errno.h>
 #include <linux/types.h>
 
-/* Enable/disable SYSRQ support by default (0==no, 1==yes). */
-#define SYSRQ_DEFAULT_ENABLE	1
-
 /* Possible values of bitmask for enabling sysrq functions */
 /* 0x0001 is reserved for enable everything */
 #define SYSRQ_ENABLE_LOG	0x0002
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index b2f06f3c6a3f..8b80f1bae21a 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -190,7 +190,7 @@ static int proc_dostring_coredump(struct ctl_table *table, int write,
 
 #ifdef CONFIG_MAGIC_SYSRQ
 /* Note: sysrq code uses it's own private copy */
-static int __sysrq_enabled = SYSRQ_DEFAULT_ENABLE;
+static int __sysrq_enabled = CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE;
 
 static int sysrq_sysctl_handler(ctl_table *table, int write,
 				void __user *buffer, size_t *lenp,
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 06344d986eb9..29329374ff0d 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -312,6 +312,15 @@ config MAGIC_SYSRQ
 	  keys are documented in <file:Documentation/sysrq.txt>. Don't say Y
 	  unless you really know what this hack does.
 
+config MAGIC_SYSRQ_DEFAULT_ENABLE
+	hex "Enable magic SysRq key functions by default"
+	depends on MAGIC_SYSRQ
+	default 0x1
+	help
+	  Specifies which SysRq key functions are enabled by default.
+	  This may be set to 1 or 0 to enable or disable them all, or
+	  to a bitmask as described in Documentation/sysrq.txt.
+
 config DEBUG_KERNEL
 	bool "Kernel debugging"
 	help
-- 
cgit v1.2.3


From fa2b5ea09e48186041f68649ab8192447b31bffc Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Tue, 15 Oct 2013 09:20:52 +0200
Subject: serial: core: delete .set_wake() callback

This deletes the .set_wake() callback in the struct uart_ops.
Apparently this has been unused since pre-git times. In the
old-2.6-bkcvs it is deleted as part of a changeset removing
the PM_SET_WAKEUP from pm_request_t which is since also deleted
from the kernel.

The apropriate way to set wakeups in the kernel is to have a
code snippet like this in .suspend() or .runtime_suspend()
callbacks:

static int foo_suspend(struct device *dev)
{
	if (device_may_wakeup(dev)) {
		/* Enable wakeups, set internal states */
	}
}

This specific callback is not coming back.

Cc: Rafael J. Wysocki <rjw@sisk.pl>
Cc: Len Brown <len.brown@intel.com>
Cc: Pavel Machek <pavel@ucw.cz>
Cc: Kevin Hilman <khilman@deeprootsystems.com>
Cc: Dmitry Artamonow <mad_soft@inbox.ru>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/serial/driver | 4 ----
 include/linux/serial_core.h | 1 -
 2 files changed, 5 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/serial/driver b/Documentation/serial/driver
index 067c47d46917..c3a7689a90e6 100644
--- a/Documentation/serial/driver
+++ b/Documentation/serial/driver
@@ -264,10 +264,6 @@ hardware.
 	Locking: none.
 	Interrupts: caller dependent.
 
-  set_wake(port,state)
-	Enable/disable power management wakeup on serial activity.  Not
-	currently implemented.
-
   type(port)
 	Return a pointer to a string constant describing the specified
 	port, or return NULL, in which case the string 'unknown' is
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index b98291ac7f14..f729be981da0 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -66,7 +66,6 @@ struct uart_ops {
 	void		(*set_ldisc)(struct uart_port *, int new);
 	void		(*pm)(struct uart_port *, unsigned int state,
 			      unsigned int oldstate);
-	int		(*set_wake)(struct uart_port *, unsigned int state);
 
 	/*
 	 * Return a string describing the type of the port
-- 
cgit v1.2.3


From 32c37fc30c52508711ea6a108cfd5855b8a07176 Mon Sep 17 00:00:00 2001
From: Oliver Neukum <oneukum@suse.de>
Date: Mon, 14 Oct 2013 15:24:55 +0200
Subject: usb-storage: add quirk for mandatory READ_CAPACITY_16

Some USB drive enclosures do not correctly report an
overflow condition if they hold a drive with a capacity
over 2TB and are confronted with a READ_CAPACITY_10.
They answer with their capacity modulo 2TB.
The generic layer cannot cope with that. It must be told
to use READ_CAPACITY_16 from the beginning.

Signed-off-by: Oliver Neukum <oneukum@suse.de>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/storage/scsiglue.c     | 5 ++++-
 drivers/usb/storage/unusual_devs.h | 7 +++++++
 include/linux/usb_usual.h          | 4 +++-
 3 files changed, 14 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/storage/scsiglue.c b/drivers/usb/storage/scsiglue.c
index 94d75edef77f..18509e6c21ab 100644
--- a/drivers/usb/storage/scsiglue.c
+++ b/drivers/usb/storage/scsiglue.c
@@ -211,8 +211,11 @@ static int slave_configure(struct scsi_device *sdev)
 		/*
 		 * Many devices do not respond properly to READ_CAPACITY_16.
 		 * Tell the SCSI layer to try READ_CAPACITY_10 first.
+		 * However some USB 3.0 drive enclosures return capacity
+		 * modulo 2TB. Those must use READ_CAPACITY_16
 		 */
-		sdev->try_rc_10_first = 1;
+		if (!(us->fflags & US_FL_NEEDS_CAP16))
+			sdev->try_rc_10_first = 1;
 
 		/* assume SPC3 or latter devices support sense size > 18 */
 		if (sdev->scsi_level > SCSI_SPC_2)
diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h
index c015f2c16729..de32cfa5bfa6 100644
--- a/drivers/usb/storage/unusual_devs.h
+++ b/drivers/usb/storage/unusual_devs.h
@@ -1925,6 +1925,13 @@ UNUSUAL_DEV(  0x1652, 0x6600, 0x0201, 0x0201,
 		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_IGNORE_RESIDUE ),
 
+/* Reported by Oliver Neukum <oneukum@suse.com> */
+UNUSUAL_DEV(  0x174c, 0x55aa, 0x0100, 0x0100,
+		"ASMedia",
+		"AS2105",
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
+		US_FL_NEEDS_CAP16),
+
 /* Reported by Jesse Feddema <jdfeddema@gmail.com> */
 UNUSUAL_DEV(  0x177f, 0x0400, 0x0000, 0x0000,
 		"Yarvik",
diff --git a/include/linux/usb_usual.h b/include/linux/usb_usual.h
index bf99cd01be20..630356866030 100644
--- a/include/linux/usb_usual.h
+++ b/include/linux/usb_usual.h
@@ -66,7 +66,9 @@
 	US_FLAG(INITIAL_READ10,	0x00100000)			\
 		/* Initial READ(10) (and others) must be retried */	\
 	US_FLAG(WRITE_CACHE,	0x00200000)			\
-		/* Write Cache status is not available */
+		/* Write Cache status is not available */	\
+	US_FLAG(NEEDS_CAP16,	0x00400000)
+		/* cannot handle READ_CAPACITY_10 */
 
 #define US_FLAG(name, value)	US_FL_##name = value ,
 enum { US_DO_ALL_FLAGS };
-- 
cgit v1.2.3


From f1468a2077e8c00fddb6cecec41b356637195ca3 Mon Sep 17 00:00:00 2001
From: Sylwester Nawrocki <sylvester.nawrocki@gmail.com>
Date: Wed, 16 Oct 2013 21:58:11 +0530
Subject: exynos4-is: Use the generic MIPI CSIS PHY driver

Use the generic PHY API instead of the platform callback
to control the MIPI CSIS DPHY.

Signed-off-by: Sylwester Nawrocki <s.nawrocki@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Acked-by: Felipe Balbi <balbi@ti.com>
Acked-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Signed-off-by: Kishon Vijay Abraham I <kishon@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/media/platform/exynos4-is/Kconfig     |  2 +-
 drivers/media/platform/exynos4-is/mipi-csis.c | 13 ++++++++++---
 include/linux/platform_data/mipi-csis.h       |  9 ---------
 3 files changed, 11 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/media/platform/exynos4-is/Kconfig b/drivers/media/platform/exynos4-is/Kconfig
index 53ad0f080179..d2d3b4b61435 100644
--- a/drivers/media/platform/exynos4-is/Kconfig
+++ b/drivers/media/platform/exynos4-is/Kconfig
@@ -29,7 +29,7 @@ config VIDEO_S5P_FIMC
 config VIDEO_S5P_MIPI_CSIS
 	tristate "S5P/EXYNOS MIPI-CSI2 receiver (MIPI-CSIS) driver"
 	depends on REGULATOR
-	select S5P_SETUP_MIPIPHY
+	select GENERIC_PHY
 	help
 	  This is a V4L2 driver for Samsung S5P and EXYNOS4 SoC MIPI-CSI2
 	  receiver (MIPI-CSIS) devices.
diff --git a/drivers/media/platform/exynos4-is/mipi-csis.c b/drivers/media/platform/exynos4-is/mipi-csis.c
index 0914230b42de..9fc2af6a0446 100644
--- a/drivers/media/platform/exynos4-is/mipi-csis.c
+++ b/drivers/media/platform/exynos4-is/mipi-csis.c
@@ -20,6 +20,7 @@
 #include <linux/memory.h>
 #include <linux/module.h>
 #include <linux/of.h>
+#include <linux/phy/phy.h>
 #include <linux/platform_data/mipi-csis.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
@@ -180,6 +181,7 @@ struct csis_drvdata {
  * @sd: v4l2_subdev associated with CSIS device instance
  * @index: the hardware instance index
  * @pdev: CSIS platform device
+ * @phy: pointer to the CSIS generic PHY
  * @regs: mmaped I/O registers memory
  * @supplies: CSIS regulator supplies
  * @clock: CSIS clocks
@@ -203,6 +205,7 @@ struct csis_state {
 	struct v4l2_subdev sd;
 	u8 index;
 	struct platform_device *pdev;
+	struct phy *phy;
 	void __iomem *regs;
 	struct regulator_bulk_data supplies[CSIS_NUM_SUPPLIES];
 	struct clk *clock[NUM_CSIS_CLOCKS];
@@ -779,8 +782,8 @@ static int s5pcsis_parse_dt(struct platform_device *pdev,
 					"samsung,csis-wclk");
 
 	state->num_lanes = endpoint.bus.mipi_csi2.num_data_lanes;
-
 	of_node_put(node);
+
 	return 0;
 }
 #else
@@ -829,6 +832,10 @@ static int s5pcsis_probe(struct platform_device *pdev)
 		return -EINVAL;
 	}
 
+	state->phy = devm_phy_get(dev, "csis");
+	if (IS_ERR(state->phy))
+		return PTR_ERR(state->phy);
+
 	mem_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	state->regs = devm_ioremap_resource(dev, mem_res);
 	if (IS_ERR(state->regs))
@@ -922,7 +929,7 @@ static int s5pcsis_pm_suspend(struct device *dev, bool runtime)
 	mutex_lock(&state->lock);
 	if (state->flags & ST_POWERED) {
 		s5pcsis_stop_stream(state);
-		ret = s5p_csis_phy_enable(state->index, false);
+		ret = phy_power_off(state->phy);
 		if (ret)
 			goto unlock;
 		ret = regulator_bulk_disable(CSIS_NUM_SUPPLIES,
@@ -958,7 +965,7 @@ static int s5pcsis_pm_resume(struct device *dev, bool runtime)
 					    state->supplies);
 		if (ret)
 			goto unlock;
-		ret = s5p_csis_phy_enable(state->index, true);
+		ret = phy_power_on(state->phy);
 		if (!ret) {
 			state->flags |= ST_POWERED;
 		} else {
diff --git a/include/linux/platform_data/mipi-csis.h b/include/linux/platform_data/mipi-csis.h
index bf34e17cee7f..c2fd9024717c 100644
--- a/include/linux/platform_data/mipi-csis.h
+++ b/include/linux/platform_data/mipi-csis.h
@@ -25,13 +25,4 @@ struct s5p_platform_mipi_csis {
 	u8 hs_settle;
 };
 
-/**
- * s5p_csis_phy_enable - global MIPI-CSI receiver D-PHY control
- * @id:     MIPI-CSIS harware instance index (0...1)
- * @on:     true to enable D-PHY and deassert its reset
- *          false to disable D-PHY
- * @return: 0 on success, or negative error code on failure
- */
-int s5p_csis_phy_enable(int id, bool on);
-
 #endif /* __PLAT_SAMSUNG_MIPI_CSIS_H_ */
-- 
cgit v1.2.3


From 75d2364ea0cab3a95be3f8d1f8dabd20ac4b1b2a Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Fri, 11 Oct 2013 16:54:56 -0700
Subject: PowerCap: Add class driver

The power capping framework providing a consistent interface between the
kernel and user space that allows power capping drivers to expose their
settings to user space in a uniform way.
The overall design of the framework is described in the documentation
added by the previous patch in this series.

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
Reviewed-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/powercap/Kconfig        |  19 ++
 drivers/powercap/Makefile       |   1 +
 drivers/powercap/powercap_sys.c | 683 ++++++++++++++++++++++++++++++++++++++++
 include/linux/powercap.h        | 325 +++++++++++++++++++
 4 files changed, 1028 insertions(+)
 create mode 100644 drivers/powercap/Kconfig
 create mode 100644 drivers/powercap/Makefile
 create mode 100644 drivers/powercap/powercap_sys.c
 create mode 100644 include/linux/powercap.h

(limited to 'include/linux')

diff --git a/drivers/powercap/Kconfig b/drivers/powercap/Kconfig
new file mode 100644
index 000000000000..a37055eb5ebc
--- /dev/null
+++ b/drivers/powercap/Kconfig
@@ -0,0 +1,19 @@
+#
+# Generic power capping sysfs interface configuration
+#
+
+menuconfig POWERCAP
+	bool "Generic powercap sysfs driver"
+	help
+	  The power capping sysfs interface allows kernel subsystems to expose power
+	  capping settings to user space in a consistent way.  Usually, it consists
+	  of multiple control types that determine which settings may be exposed and
+	  power zones representing parts of the system that can be subject to power
+	  capping.
+
+	  If you want this code to be compiled in, say Y here.
+
+if POWERCAP
+# Client driver configurations go here.
+
+endif
diff --git a/drivers/powercap/Makefile b/drivers/powercap/Makefile
new file mode 100644
index 000000000000..6defbc8dc4bf
--- /dev/null
+++ b/drivers/powercap/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_POWERCAP)	+= powercap_sys.o
diff --git a/drivers/powercap/powercap_sys.c b/drivers/powercap/powercap_sys.c
new file mode 100644
index 000000000000..c22fa4c78eaa
--- /dev/null
+++ b/drivers/powercap/powercap_sys.c
@@ -0,0 +1,683 @@
+/*
+ * Power capping class
+ * Copyright (c) 2013, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+#include <linux/powercap.h>
+
+#define to_powercap_zone(n) container_of(n, struct powercap_zone, dev)
+#define to_powercap_control_type(n) \
+			container_of(n, struct powercap_control_type, dev)
+
+/* Power zone show function */
+#define define_power_zone_show(_attr)		\
+static ssize_t _attr##_show(struct device *dev, \
+					struct device_attribute *dev_attr,\
+					char *buf) \
+{ \
+	u64 value; \
+	ssize_t len = -EINVAL; \
+	struct powercap_zone *power_zone = to_powercap_zone(dev); \
+	\
+	if (power_zone->ops->get_##_attr) { \
+		if (!power_zone->ops->get_##_attr(power_zone, &value)) \
+			len = sprintf(buf, "%lld\n", value); \
+	} \
+	\
+	return len; \
+}
+
+/* The only meaningful input is 0 (reset), others are silently ignored */
+#define define_power_zone_store(_attr)		\
+static ssize_t _attr##_store(struct device *dev,\
+				struct device_attribute *dev_attr, \
+				const char *buf, size_t count) \
+{ \
+	int err; \
+	struct powercap_zone *power_zone = to_powercap_zone(dev); \
+	u64 value; \
+	\
+	err = kstrtoull(buf, 10, &value); \
+	if (err) \
+		return -EINVAL; \
+	if (value) \
+		return count; \
+	if (power_zone->ops->reset_##_attr) { \
+		if (!power_zone->ops->reset_##_attr(power_zone)) \
+			return count; \
+	} \
+	\
+	return -EINVAL; \
+}
+
+/* Power zone constraint show function */
+#define define_power_zone_constraint_show(_attr) \
+static ssize_t show_constraint_##_attr(struct device *dev, \
+				struct device_attribute *dev_attr,\
+				char *buf) \
+{ \
+	u64 value; \
+	ssize_t len = -ENODATA; \
+	struct powercap_zone *power_zone = to_powercap_zone(dev); \
+	int id; \
+	struct powercap_zone_constraint *pconst;\
+	\
+	if (!sscanf(dev_attr->attr.name, "constraint_%d_", &id)) \
+		return -EINVAL; \
+	if (id >= power_zone->const_id_cnt)	\
+		return -EINVAL; \
+	pconst = &power_zone->constraints[id]; \
+	if (pconst && pconst->ops && pconst->ops->get_##_attr) { \
+		if (!pconst->ops->get_##_attr(power_zone, id, &value)) \
+			len = sprintf(buf, "%lld\n", value); \
+	} \
+	\
+	return len; \
+}
+
+/* Power zone constraint store function */
+#define define_power_zone_constraint_store(_attr) \
+static ssize_t store_constraint_##_attr(struct device *dev,\
+				struct device_attribute *dev_attr, \
+				const char *buf, size_t count) \
+{ \
+	int err; \
+	u64 value; \
+	struct powercap_zone *power_zone = to_powercap_zone(dev); \
+	int id; \
+	struct powercap_zone_constraint *pconst;\
+	\
+	if (!sscanf(dev_attr->attr.name, "constraint_%d_", &id)) \
+		return -EINVAL; \
+	if (id >= power_zone->const_id_cnt)	\
+		return -EINVAL; \
+	pconst = &power_zone->constraints[id]; \
+	err = kstrtoull(buf, 10, &value); \
+	if (err) \
+		return -EINVAL; \
+	if (pconst && pconst->ops && pconst->ops->set_##_attr) { \
+		if (!pconst->ops->set_##_attr(power_zone, id, value)) \
+			return count; \
+	} \
+	\
+	return -ENODATA; \
+}
+
+/* Power zone information callbacks */
+define_power_zone_show(power_uw);
+define_power_zone_show(max_power_range_uw);
+define_power_zone_show(energy_uj);
+define_power_zone_store(energy_uj);
+define_power_zone_show(max_energy_range_uj);
+
+/* Power zone attributes */
+static DEVICE_ATTR_RO(max_power_range_uw);
+static DEVICE_ATTR_RO(power_uw);
+static DEVICE_ATTR_RO(max_energy_range_uj);
+static DEVICE_ATTR_RW(energy_uj);
+
+/* Power zone constraint attributes callbacks */
+define_power_zone_constraint_show(power_limit_uw);
+define_power_zone_constraint_store(power_limit_uw);
+define_power_zone_constraint_show(time_window_us);
+define_power_zone_constraint_store(time_window_us);
+define_power_zone_constraint_show(max_power_uw);
+define_power_zone_constraint_show(min_power_uw);
+define_power_zone_constraint_show(max_time_window_us);
+define_power_zone_constraint_show(min_time_window_us);
+
+/* For one time seeding of constraint device attributes */
+struct powercap_constraint_attr {
+	struct device_attribute power_limit_attr;
+	struct device_attribute time_window_attr;
+	struct device_attribute max_power_attr;
+	struct device_attribute min_power_attr;
+	struct device_attribute max_time_window_attr;
+	struct device_attribute min_time_window_attr;
+	struct device_attribute name_attr;
+};
+
+static struct powercap_constraint_attr
+				constraint_attrs[MAX_CONSTRAINTS_PER_ZONE];
+
+/* A list of powercap control_types */
+static LIST_HEAD(powercap_cntrl_list);
+/* Mutex to protect list of powercap control_types */
+static DEFINE_MUTEX(powercap_cntrl_list_lock);
+
+#define POWERCAP_CONSTRAINT_NAME_LEN	30 /* Some limit to avoid overflow */
+static ssize_t show_constraint_name(struct device *dev,
+				struct device_attribute *dev_attr,
+				char *buf)
+{
+	const char *name;
+	struct powercap_zone *power_zone = to_powercap_zone(dev);
+	int id;
+	ssize_t len = -ENODATA;
+	struct powercap_zone_constraint *pconst;
+
+	if (!sscanf(dev_attr->attr.name, "constraint_%d_", &id))
+		return -EINVAL;
+	if (id >= power_zone->const_id_cnt)
+		return -EINVAL;
+	pconst = &power_zone->constraints[id];
+
+	if (pconst && pconst->ops && pconst->ops->get_name) {
+		name = pconst->ops->get_name(power_zone, id);
+		if (name) {
+			snprintf(buf, POWERCAP_CONSTRAINT_NAME_LEN,
+								"%s\n", name);
+			buf[POWERCAP_CONSTRAINT_NAME_LEN] = '\0';
+			len = strlen(buf);
+		}
+	}
+
+	return len;
+}
+
+static int create_constraint_attribute(int id, const char *name,
+				int mode,
+				struct device_attribute *dev_attr,
+				ssize_t (*show)(struct device *,
+					struct device_attribute *, char *),
+				ssize_t (*store)(struct device *,
+					struct device_attribute *,
+				const char *, size_t)
+				)
+{
+
+	dev_attr->attr.name = kasprintf(GFP_KERNEL, "constraint_%d_%s",
+								id, name);
+	if (!dev_attr->attr.name)
+		return -ENOMEM;
+	dev_attr->attr.mode = mode;
+	dev_attr->show = show;
+	dev_attr->store = store;
+
+	return 0;
+}
+
+static void free_constraint_attributes(void)
+{
+	int i;
+
+	for (i = 0; i < MAX_CONSTRAINTS_PER_ZONE; ++i) {
+		kfree(constraint_attrs[i].power_limit_attr.attr.name);
+		kfree(constraint_attrs[i].time_window_attr.attr.name);
+		kfree(constraint_attrs[i].name_attr.attr.name);
+		kfree(constraint_attrs[i].max_power_attr.attr.name);
+		kfree(constraint_attrs[i].min_power_attr.attr.name);
+		kfree(constraint_attrs[i].max_time_window_attr.attr.name);
+		kfree(constraint_attrs[i].min_time_window_attr.attr.name);
+	}
+}
+
+static int seed_constraint_attributes(void)
+{
+	int i;
+	int ret;
+
+	for (i = 0; i < MAX_CONSTRAINTS_PER_ZONE; ++i) {
+		ret = create_constraint_attribute(i, "power_limit_uw",
+					S_IWUSR | S_IRUGO,
+					&constraint_attrs[i].power_limit_attr,
+					show_constraint_power_limit_uw,
+					store_constraint_power_limit_uw);
+		if (ret)
+			goto err_alloc;
+		ret = create_constraint_attribute(i, "time_window_us",
+					S_IWUSR | S_IRUGO,
+					&constraint_attrs[i].time_window_attr,
+					show_constraint_time_window_us,
+					store_constraint_time_window_us);
+		if (ret)
+			goto err_alloc;
+		ret = create_constraint_attribute(i, "name", S_IRUGO,
+				&constraint_attrs[i].name_attr,
+				show_constraint_name,
+				NULL);
+		if (ret)
+			goto err_alloc;
+		ret = create_constraint_attribute(i, "max_power_uw", S_IRUGO,
+				&constraint_attrs[i].max_power_attr,
+				show_constraint_max_power_uw,
+				NULL);
+		if (ret)
+			goto err_alloc;
+		ret = create_constraint_attribute(i, "min_power_uw", S_IRUGO,
+				&constraint_attrs[i].min_power_attr,
+				show_constraint_min_power_uw,
+				NULL);
+		if (ret)
+			goto err_alloc;
+		ret = create_constraint_attribute(i, "max_time_window_us",
+				S_IRUGO,
+				&constraint_attrs[i].max_time_window_attr,
+				show_constraint_max_time_window_us,
+				NULL);
+		if (ret)
+			goto err_alloc;
+		ret = create_constraint_attribute(i, "min_time_window_us",
+				S_IRUGO,
+				&constraint_attrs[i].min_time_window_attr,
+				show_constraint_min_time_window_us,
+				NULL);
+		if (ret)
+			goto err_alloc;
+
+	}
+
+	return 0;
+
+err_alloc:
+	free_constraint_attributes();
+
+	return ret;
+}
+
+static int create_constraints(struct powercap_zone *power_zone,
+				int nr_constraints,
+				struct powercap_zone_constraint_ops *const_ops)
+{
+	int i;
+	int ret = 0;
+	int count;
+	struct powercap_zone_constraint *pconst;
+
+	if (!power_zone || !const_ops || !const_ops->get_power_limit_uw ||
+					!const_ops->set_power_limit_uw ||
+					!const_ops->get_time_window_us ||
+					!const_ops->set_time_window_us)
+		return -EINVAL;
+
+	count = power_zone->zone_attr_count;
+	for (i = 0; i < nr_constraints; ++i) {
+		pconst = &power_zone->constraints[i];
+		pconst->ops = const_ops;
+		pconst->id = power_zone->const_id_cnt;
+		power_zone->const_id_cnt++;
+		power_zone->zone_dev_attrs[count++] =
+				&constraint_attrs[i].power_limit_attr.attr;
+		power_zone->zone_dev_attrs[count++] =
+				&constraint_attrs[i].time_window_attr.attr;
+		if (pconst->ops->get_name)
+			power_zone->zone_dev_attrs[count++] =
+				&constraint_attrs[i].name_attr.attr;
+		if (pconst->ops->get_max_power_uw)
+			power_zone->zone_dev_attrs[count++] =
+				&constraint_attrs[i].max_power_attr.attr;
+		if (pconst->ops->get_min_power_uw)
+			power_zone->zone_dev_attrs[count++] =
+				&constraint_attrs[i].min_power_attr.attr;
+		if (pconst->ops->get_max_time_window_us)
+			power_zone->zone_dev_attrs[count++] =
+				&constraint_attrs[i].max_time_window_attr.attr;
+		if (pconst->ops->get_min_time_window_us)
+			power_zone->zone_dev_attrs[count++] =
+				&constraint_attrs[i].min_time_window_attr.attr;
+	}
+	power_zone->zone_attr_count = count;
+
+	return ret;
+}
+
+static bool control_type_valid(void *control_type)
+{
+	struct powercap_control_type *pos = NULL;
+	bool found = false;
+
+	mutex_lock(&powercap_cntrl_list_lock);
+
+	list_for_each_entry(pos, &powercap_cntrl_list, node) {
+		if (pos == control_type) {
+			found = true;
+			break;
+		}
+	}
+	mutex_unlock(&powercap_cntrl_list_lock);
+
+	return found;
+}
+
+static ssize_t name_show(struct device *dev,
+				struct device_attribute *attr,
+				char *buf)
+{
+	struct powercap_zone *power_zone = to_powercap_zone(dev);
+
+	return sprintf(buf, "%s\n", power_zone->name);
+}
+
+static DEVICE_ATTR_RO(name);
+
+/* Create zone and attributes in sysfs */
+static void create_power_zone_common_attributes(
+					struct powercap_zone *power_zone)
+{
+	int count = 0;
+
+	power_zone->zone_dev_attrs[count++] = &dev_attr_name.attr;
+	if (power_zone->ops->get_max_energy_range_uj)
+		power_zone->zone_dev_attrs[count++] =
+					&dev_attr_max_energy_range_uj.attr;
+	if (power_zone->ops->get_energy_uj)
+		power_zone->zone_dev_attrs[count++] =
+					&dev_attr_energy_uj.attr;
+	if (power_zone->ops->get_power_uw)
+		power_zone->zone_dev_attrs[count++] =
+					&dev_attr_power_uw.attr;
+	if (power_zone->ops->get_max_power_range_uw)
+		power_zone->zone_dev_attrs[count++] =
+					&dev_attr_max_power_range_uw.attr;
+	power_zone->zone_dev_attrs[count] = NULL;
+	power_zone->zone_attr_count = count;
+}
+
+static void powercap_release(struct device *dev)
+{
+	bool allocated;
+
+	if (dev->parent) {
+		struct powercap_zone *power_zone = to_powercap_zone(dev);
+
+		/* Store flag as the release() may free memory */
+		allocated = power_zone->allocated;
+		/* Remove id from parent idr struct */
+		idr_remove(power_zone->parent_idr, power_zone->id);
+		/* Destroy idrs allocated for this zone */
+		idr_destroy(&power_zone->idr);
+		kfree(power_zone->name);
+		kfree(power_zone->zone_dev_attrs);
+		kfree(power_zone->constraints);
+		if (power_zone->ops->release)
+			power_zone->ops->release(power_zone);
+		if (allocated)
+			kfree(power_zone);
+	} else {
+		struct powercap_control_type *control_type =
+						to_powercap_control_type(dev);
+
+		/* Store flag as the release() may free memory */
+		allocated = control_type->allocated;
+		idr_destroy(&control_type->idr);
+		mutex_destroy(&control_type->lock);
+		if (control_type->ops && control_type->ops->release)
+			control_type->ops->release(control_type);
+		if (allocated)
+			kfree(control_type);
+	}
+}
+
+static ssize_t enabled_show(struct device *dev,
+				struct device_attribute *attr,
+				char *buf)
+{
+	bool mode = true;
+
+	/* Default is enabled */
+	if (dev->parent) {
+		struct powercap_zone *power_zone = to_powercap_zone(dev);
+		if (power_zone->ops->get_enable)
+			if (power_zone->ops->get_enable(power_zone, &mode))
+				mode = false;
+	} else {
+		struct powercap_control_type *control_type =
+						to_powercap_control_type(dev);
+		if (control_type->ops && control_type->ops->get_enable)
+			if (control_type->ops->get_enable(control_type, &mode))
+				mode = false;
+	}
+
+	return sprintf(buf, "%d\n", mode);
+}
+
+static ssize_t enabled_store(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf,  size_t len)
+{
+	bool mode;
+
+	if (strtobool(buf, &mode))
+		return -EINVAL;
+	if (dev->parent) {
+		struct powercap_zone *power_zone = to_powercap_zone(dev);
+		if (power_zone->ops->set_enable)
+			if (!power_zone->ops->set_enable(power_zone, mode))
+				return len;
+	} else {
+		struct powercap_control_type *control_type =
+						to_powercap_control_type(dev);
+		if (control_type->ops && control_type->ops->set_enable)
+			if (!control_type->ops->set_enable(control_type, mode))
+				return len;
+	}
+
+	return -ENOSYS;
+}
+
+static struct device_attribute powercap_def_attrs[] = {
+		__ATTR(enabled, S_IWUSR | S_IRUGO, enabled_show,
+							enabled_store),
+		__ATTR_NULL
+};
+
+static struct class powercap_class = {
+	.name = "powercap",
+	.dev_release = powercap_release,
+	.dev_attrs = powercap_def_attrs,
+};
+
+struct powercap_zone *powercap_register_zone(
+				struct powercap_zone *power_zone,
+				struct powercap_control_type *control_type,
+				const char *name,
+				struct powercap_zone *parent,
+				const struct powercap_zone_ops *ops,
+				int nr_constraints,
+				struct powercap_zone_constraint_ops *const_ops)
+{
+	int result;
+	int nr_attrs;
+
+	if (!name || !control_type || !ops ||
+			nr_constraints > MAX_CONSTRAINTS_PER_ZONE ||
+			(!ops->get_energy_uj && !ops->get_power_uw) ||
+			!control_type_valid(control_type))
+		return ERR_PTR(-EINVAL);
+
+	if (power_zone) {
+		if (!ops->release)
+			return ERR_PTR(-EINVAL);
+		memset(power_zone, 0, sizeof(*power_zone));
+	} else {
+		power_zone = kzalloc(sizeof(*power_zone), GFP_KERNEL);
+		if (!power_zone)
+			return ERR_PTR(-ENOMEM);
+		power_zone->allocated = true;
+	}
+	power_zone->ops = ops;
+	power_zone->control_type_inst = control_type;
+	if (!parent) {
+		power_zone->dev.parent = &control_type->dev;
+		power_zone->parent_idr = &control_type->idr;
+	} else {
+		power_zone->dev.parent = &parent->dev;
+		power_zone->parent_idr = &parent->idr;
+	}
+	power_zone->dev.class = &powercap_class;
+
+	mutex_lock(&control_type->lock);
+	/* Using idr to get the unique id */
+	result = idr_alloc(power_zone->parent_idr, NULL, 0, 0, GFP_KERNEL);
+	if (result < 0)
+		goto err_idr_alloc;
+
+	power_zone->id = result;
+	idr_init(&power_zone->idr);
+	power_zone->name = kstrdup(name, GFP_KERNEL);
+	if (!power_zone->name)
+		goto err_name_alloc;
+	dev_set_name(&power_zone->dev, "%s:%x",
+					dev_name(power_zone->dev.parent),
+					power_zone->id);
+	power_zone->constraints = kzalloc(sizeof(*power_zone->constraints) *
+					 nr_constraints, GFP_KERNEL);
+	if (!power_zone->constraints)
+		goto err_const_alloc;
+
+	nr_attrs = nr_constraints * POWERCAP_CONSTRAINTS_ATTRS +
+						POWERCAP_ZONE_MAX_ATTRS + 1;
+	power_zone->zone_dev_attrs = kzalloc(sizeof(void *) *
+						nr_attrs, GFP_KERNEL);
+	if (!power_zone->zone_dev_attrs)
+		goto err_attr_alloc;
+	create_power_zone_common_attributes(power_zone);
+	result = create_constraints(power_zone, nr_constraints, const_ops);
+	if (result)
+		goto err_dev_ret;
+
+	power_zone->zone_dev_attrs[power_zone->zone_attr_count] = NULL;
+	power_zone->dev_zone_attr_group.attrs = power_zone->zone_dev_attrs;
+	power_zone->dev_attr_groups[0] = &power_zone->dev_zone_attr_group;
+	power_zone->dev_attr_groups[1] = NULL;
+	power_zone->dev.groups = power_zone->dev_attr_groups;
+	result = device_register(&power_zone->dev);
+	if (result)
+		goto err_dev_ret;
+
+	control_type->nr_zones++;
+	mutex_unlock(&control_type->lock);
+
+	return power_zone;
+
+err_dev_ret:
+	kfree(power_zone->zone_dev_attrs);
+err_attr_alloc:
+	kfree(power_zone->constraints);
+err_const_alloc:
+	kfree(power_zone->name);
+err_name_alloc:
+	idr_remove(power_zone->parent_idr, power_zone->id);
+err_idr_alloc:
+	if (power_zone->allocated)
+		kfree(power_zone);
+	mutex_unlock(&control_type->lock);
+
+	return ERR_PTR(result);
+}
+EXPORT_SYMBOL_GPL(powercap_register_zone);
+
+int powercap_unregister_zone(struct powercap_control_type *control_type,
+				struct powercap_zone *power_zone)
+{
+	if (!power_zone || !control_type)
+		return -EINVAL;
+
+	mutex_lock(&control_type->lock);
+	control_type->nr_zones--;
+	mutex_unlock(&control_type->lock);
+
+	device_unregister(&power_zone->dev);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(powercap_unregister_zone);
+
+struct powercap_control_type *powercap_register_control_type(
+				struct powercap_control_type *control_type,
+				const char *name,
+				const struct powercap_control_type_ops *ops)
+{
+	int result;
+
+	if (!name)
+		return ERR_PTR(-EINVAL);
+	if (control_type) {
+		if (!ops || !ops->release)
+			return ERR_PTR(-EINVAL);
+		memset(control_type, 0, sizeof(*control_type));
+	} else {
+		control_type = kzalloc(sizeof(*control_type), GFP_KERNEL);
+		if (!control_type)
+			return ERR_PTR(-ENOMEM);
+		control_type->allocated = true;
+	}
+	mutex_init(&control_type->lock);
+	control_type->ops = ops;
+	INIT_LIST_HEAD(&control_type->node);
+	control_type->dev.class = &powercap_class;
+	dev_set_name(&control_type->dev, name);
+	result = device_register(&control_type->dev);
+	if (result) {
+		if (control_type->allocated)
+			kfree(control_type);
+		return ERR_PTR(result);
+	}
+	idr_init(&control_type->idr);
+
+	mutex_lock(&powercap_cntrl_list_lock);
+	list_add_tail(&control_type->node, &powercap_cntrl_list);
+	mutex_unlock(&powercap_cntrl_list_lock);
+
+	return control_type;
+}
+EXPORT_SYMBOL_GPL(powercap_register_control_type);
+
+int powercap_unregister_control_type(struct powercap_control_type *control_type)
+{
+	struct powercap_control_type *pos = NULL;
+
+	if (control_type->nr_zones) {
+		dev_err(&control_type->dev, "Zones of this type still not freed\n");
+		return -EINVAL;
+	}
+	mutex_lock(&powercap_cntrl_list_lock);
+	list_for_each_entry(pos, &powercap_cntrl_list, node) {
+		if (pos == control_type) {
+			list_del(&control_type->node);
+			mutex_unlock(&powercap_cntrl_list_lock);
+			device_unregister(&control_type->dev);
+			return 0;
+		}
+	}
+	mutex_unlock(&powercap_cntrl_list_lock);
+
+	return -ENODEV;
+}
+EXPORT_SYMBOL_GPL(powercap_unregister_control_type);
+
+static int __init powercap_init(void)
+{
+	int result = 0;
+
+	result = seed_constraint_attributes();
+	if (result)
+		return result;
+
+	result = class_register(&powercap_class);
+
+	return result;
+}
+
+device_initcall(powercap_init);
+
+MODULE_DESCRIPTION("PowerCap sysfs Driver");
+MODULE_AUTHOR("Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/powercap.h b/include/linux/powercap.h
new file mode 100644
index 000000000000..4e250417ee30
--- /dev/null
+++ b/include/linux/powercap.h
@@ -0,0 +1,325 @@
+/*
+ * powercap.h: Data types and headers for sysfs power capping interface
+ * Copyright (c) 2013, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.
+ *
+ */
+
+#ifndef __POWERCAP_H__
+#define __POWERCAP_H__
+
+#include <linux/device.h>
+#include <linux/idr.h>
+
+/*
+ * A power cap class device can contain multiple powercap control_types.
+ * Each control_type can have multiple power zones, which can be independently
+ * controlled. Each power zone can have one or more constraints.
+ */
+
+struct powercap_control_type;
+struct powercap_zone;
+struct powercap_zone_constraint;
+
+/**
+ * struct powercap_control_type_ops - Define control type callbacks
+ * @set_enable:		Enable/Disable whole control type.
+ *			Default is enabled. But this callback allows all zones
+ *			to be in disable state and remove any applied power
+ *			limits. If disabled power zone can only be monitored
+ *			not controlled.
+ * @get_enable:		get Enable/Disable status.
+ * @release:		Callback to inform that last reference to this
+ *			control type is closed. So it is safe to free data
+ *			structure associated with this control type.
+ *			This callback is mandatory if the client own memory
+ *			for the control type.
+ *
+ * This structure defines control type callbacks to be implemented by client
+ * drivers
+ */
+struct powercap_control_type_ops {
+	int (*set_enable) (struct powercap_control_type *, bool mode);
+	int (*get_enable) (struct powercap_control_type *, bool *mode);
+	int (*release) (struct powercap_control_type *);
+};
+
+/**
+ * struct powercap_control_type- Defines a powercap control_type
+ * @name:		name of control_type
+ * @dev:		device for this control_type
+ * @idr:		idr to have unique id for its child
+ * @root_node:		Root holding power zones for this control_type
+ * @ops:		Pointer to callback struct
+ * @node_lock:		mutex for control type
+ * @allocated:		This is possible that client owns the memory
+ *			used by this structure. In this case
+ *			this flag is set to false by framework to
+ *			prevent deallocation during release process.
+ *			Otherwise this flag is set to true.
+ * @ctrl_inst:		link to the control_type list
+ *
+ * Defines powercap control_type. This acts as a container for power
+ * zones, which use same method to control power. E.g. RAPL, RAPL-PCI etc.
+ * All fields are private and should not be used by client drivers.
+ */
+struct powercap_control_type {
+	struct device dev;
+	struct idr idr;
+	int nr_zones;
+	const struct powercap_control_type_ops *ops;
+	struct mutex lock;
+	bool allocated;
+	struct list_head node;
+};
+
+/**
+ * struct powercap_zone_ops - Define power zone callbacks
+ * @get_max_energy_range_uj:	Get maximum range of energy counter in
+ *				micro-joules.
+ * @get_energy_uj:		Get current energy counter in micro-joules.
+ * @reset_energy_uj:		Reset micro-joules energy counter.
+ * @get_max_power_range_uw:	Get maximum range of power counter in
+ *				micro-watts.
+ * @get_power_uw:		Get current power counter in micro-watts.
+ * @set_enable:			Enable/Disable power zone controls.
+ *				Default is enabled.
+ * @get_enable:			get Enable/Disable status.
+ * @release:			Callback to inform that last reference to this
+ *				control type is closed. So it is safe to free
+ *				data structure associated with this
+ *				control type. Mandatory, if client driver owns
+ *				the power_zone memory.
+ *
+ * This structure defines zone callbacks to be implemented by client drivers.
+ * Client drives can define both energy and power related callbacks. But at
+ * the least one type (either power or energy) is mandatory. Client drivers
+ * should handle mutual exclusion, if required in callbacks.
+ */
+struct powercap_zone_ops {
+	int (*get_max_energy_range_uj) (struct powercap_zone *, u64 *);
+	int (*get_energy_uj) (struct powercap_zone *, u64 *);
+	int (*reset_energy_uj) (struct powercap_zone *);
+	int (*get_max_power_range_uw) (struct powercap_zone *, u64 *);
+	int (*get_power_uw) (struct powercap_zone *, u64 *);
+	int (*set_enable) (struct powercap_zone *, bool mode);
+	int (*get_enable) (struct powercap_zone *, bool *mode);
+	int (*release) (struct powercap_zone *);
+};
+
+#define	POWERCAP_ZONE_MAX_ATTRS		6
+#define	POWERCAP_CONSTRAINTS_ATTRS	8
+#define MAX_CONSTRAINTS_PER_ZONE	10
+/**
+ * struct powercap_zone- Defines instance of a power cap zone
+ * @id:			Unique id
+ * @name:		Power zone name.
+ * @control_type_inst:	Control type instance for this zone.
+ * @ops:		Pointer to the zone operation structure.
+ * @dev:		Instance of a device.
+ * @const_id_cnt:	Number of constraint defined.
+ * @idr:		Instance to an idr entry for children zones.
+ * @parent_idr:		To remove reference from the parent idr.
+ * @private_data:	Private data pointer if any for this zone.
+ * @zone_dev_attrs:	Attributes associated with this device.
+ * @zone_attr_count:	Attribute count.
+ * @dev_zone_attr_group: Attribute group for attributes.
+ * @dev_attr_groups:	Attribute group store to register with device.
+ * @allocated:		This is possible that client owns the memory
+ *			used by this structure. In this case
+ *			this flag is set to false by framework to
+ *			prevent deallocation during release process.
+ *			Otherwise this flag is set to true.
+ * @constraint_ptr:	List of constraints for this zone.
+ *
+ * This defines a power zone instance. The fields of this structure are
+ * private, and should not be used by client drivers.
+ */
+struct powercap_zone {
+	int id;
+	char *name;
+	void *control_type_inst;
+	const struct powercap_zone_ops *ops;
+	struct device dev;
+	int const_id_cnt;
+	struct idr idr;
+	struct idr *parent_idr;
+	void *private_data;
+	struct attribute **zone_dev_attrs;
+	int zone_attr_count;
+	struct attribute_group dev_zone_attr_group;
+	const struct attribute_group *dev_attr_groups[2]; /* 1 group + NULL */
+	bool allocated;
+	struct powercap_zone_constraint *constraints;
+};
+
+/**
+ * struct powercap_zone_constraint_ops - Define constraint callbacks
+ * @set_power_limit_uw:		Set power limit in micro-watts.
+ * @get_power_limit_uw:		Get power limit in micro-watts.
+ * @set_time_window_us:		Set time window in micro-seconds.
+ * @get_time_window_us:		Get time window in micro-seconds.
+ * @get_max_power_uw:		Get max power allowed in micro-watts.
+ * @get_min_power_uw:		Get min power allowed in micro-watts.
+ * @get_max_time_window_us:	Get max time window allowed in micro-seconds.
+ * @get_min_time_window_us:	Get min time window allowed in micro-seconds.
+ * @get_name:			Get the name of constraint
+ *
+ * This structure is used to define the constraint callbacks for the client
+ * drivers. The following callbacks are mandatory and can't be NULL:
+ *  set_power_limit_uw
+ *  get_power_limit_uw
+ *  set_time_window_us
+ *  get_time_window_us
+ *  get_name
+ *  Client drivers should handle mutual exclusion, if required in callbacks.
+ */
+struct powercap_zone_constraint_ops {
+	int (*set_power_limit_uw) (struct powercap_zone *, int, u64);
+	int (*get_power_limit_uw) (struct powercap_zone *, int, u64 *);
+	int (*set_time_window_us) (struct powercap_zone *, int, u64);
+	int (*get_time_window_us) (struct powercap_zone *, int, u64 *);
+	int (*get_max_power_uw) (struct powercap_zone *, int, u64 *);
+	int (*get_min_power_uw) (struct powercap_zone *, int, u64 *);
+	int (*get_max_time_window_us) (struct powercap_zone *, int, u64 *);
+	int (*get_min_time_window_us) (struct powercap_zone *, int, u64 *);
+	const char *(*get_name) (struct powercap_zone *, int);
+};
+
+/**
+ * struct powercap_zone_constraint- Defines instance of a constraint
+ * @id:			Instance Id of this constraint.
+ * @power_zone:		Pointer to the power zone for this constraint.
+ * @ops:		Pointer to the constraint callbacks.
+ *
+ * This defines a constraint instance.
+ */
+struct powercap_zone_constraint {
+	int id;
+	struct powercap_zone *power_zone;
+	struct powercap_zone_constraint_ops *ops;
+};
+
+
+/* For clients to get their device pointer, may be used for dev_dbgs */
+#define POWERCAP_GET_DEV(power_zone)	(&power_zone->dev)
+
+/**
+* powercap_set_zone_data() - Set private data for a zone
+* @power_zone:	A pointer to the valid zone instance.
+* @pdata:	A pointer to the user private data.
+*
+* Allows client drivers to associate some private data to zone instance.
+*/
+static inline void powercap_set_zone_data(struct powercap_zone *power_zone,
+						void *pdata)
+{
+	if (power_zone)
+		power_zone->private_data = pdata;
+}
+
+/**
+* powercap_get_zone_data() - Get private data for a zone
+* @power_zone:	A pointer to the valid zone instance.
+*
+* Allows client drivers to get private data associate with a zone,
+* using call to powercap_set_zone_data.
+*/
+static inline void *powercap_get_zone_data(struct powercap_zone *power_zone)
+{
+	if (power_zone)
+		return power_zone->private_data;
+	return NULL;
+}
+
+/**
+* powercap_register_control_type() - Register a control_type with framework
+* @control_type:	Pointer to client allocated memory for the control type
+*			structure storage. If this is NULL, powercap framework
+*			will allocate memory and own it.
+*			Advantage of this parameter is that client can embed
+*			this data in its data structures and allocate in a
+*			single call, preventing multiple allocations.
+* @control_type_name:	The Name of this control_type, which will be shown
+*			in the sysfs Interface.
+* @ops:			Callbacks for control type. This parameter is optional.
+*
+* Used to create a control_type with the power capping class. Here control_type
+* can represent a type of technology, which can control a range of power zones.
+* For example a control_type can be RAPL (Running Average Power Limit)
+* Intel® 64 and IA-32 Processor Architectures. The name can be any string
+* which must be unique, otherwise this function returns NULL.
+* A pointer to the control_type instance is returned on success.
+*/
+struct powercap_control_type *powercap_register_control_type(
+				struct powercap_control_type *control_type,
+				const char *name,
+				const struct powercap_control_type_ops *ops);
+
+/**
+* powercap_unregister_control_type() - Unregister a control_type from framework
+* @instance:	A pointer to the valid control_type instance.
+*
+* Used to unregister a control_type with the power capping class.
+* All power zones registered under this control type have to be unregistered
+* before calling this function, or it will fail with an error code.
+*/
+int powercap_unregister_control_type(struct powercap_control_type *instance);
+
+/* Zone register/unregister API */
+
+/**
+* powercap_register_zone() - Register a power zone
+* @power_zone:	Pointer to client allocated memory for the power zone structure
+*		storage. If this is NULL, powercap framework will allocate
+*		memory and own it. Advantage of this parameter is that client
+*		can embed this data in its data structures and allocate in a
+*		single call, preventing multiple allocations.
+* @control_type: A control_type instance under which this zone operates.
+* @name:	A name for this zone.
+* @parent:	A pointer to the parent power zone instance if any or NULL
+* @ops:		Pointer to zone operation callback structure.
+* @no_constraints: Number of constraints for this zone
+* @const_ops:	Pointer to constraint callback structure
+*
+* Register a power zone under a given control type. A power zone must register
+* a pointer to a structure representing zone callbacks.
+* A power zone can be located under a parent power zone, in which case @parent
+* should point to it.  Otherwise, if @parent is NULL, the new power zone will
+* be located directly under the given control type
+* For each power zone there may be a number of constraints that appear in the
+* sysfs under that zone as attributes with unique numeric IDs.
+* Returns pointer to the power_zone on success.
+*/
+struct powercap_zone *powercap_register_zone(
+			struct powercap_zone *power_zone,
+			struct powercap_control_type *control_type,
+			const char *name,
+			struct powercap_zone *parent,
+			const struct powercap_zone_ops *ops,
+			int nr_constraints,
+			struct powercap_zone_constraint_ops *const_ops);
+
+/**
+* powercap_unregister_zone() - Unregister a zone device
+* @control_type:	A pointer to the valid instance of a control_type.
+* @power_zone:	A pointer to the valid zone instance for a control_type
+*
+* Used to unregister a zone device for a control_type.  Caller should
+* make sure that children for this zone are unregistered first.
+*/
+int powercap_unregister_zone(struct powercap_control_type *control_type,
+				struct powercap_zone *power_zone);
+
+#endif
-- 
cgit v1.2.3


From bfd1ff6375c82930bfb3b401eee2c96720fa8e84 Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Fri, 11 Oct 2013 16:54:59 -0700
Subject: bitops: Introduce BIT_ULL

Adding BIT(x) equivalent for unsigned long long type, BIT_ULL(x). Also
added BIT_ULL_MASK and BIT_ULL_WORD.

Suggested-by: Joe Perches <joe@perches.com>
Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/bitops.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index a3b6b82108b9..5a1c8b71ccd8 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -4,8 +4,11 @@
 
 #ifdef	__KERNEL__
 #define BIT(nr)			(1UL << (nr))
+#define BIT_ULL(nr)		(1ULL << (nr))
 #define BIT_MASK(nr)		(1UL << ((nr) % BITS_PER_LONG))
 #define BIT_WORD(nr)		((nr) / BITS_PER_LONG)
+#define BIT_ULL_MASK(nr)	(1ULL << ((nr) % BITS_PER_LONG_LONG))
+#define BIT_ULL_WORD(nr)	((nr) / BITS_PER_LONG_LONG)
 #define BITS_PER_BYTE		8
 #define BITS_TO_LONGS(nr)	DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long))
 #endif
-- 
cgit v1.2.3


From 0b90d0622ad290b3717a13489b396af52aea9d2d Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Mon, 14 Oct 2013 18:11:51 +1030
Subject: virtio_config: introduce size-based accessors.

This lets the us do endian conversion if necessary, and insulates the
drivers from that change.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/virtio_config.h | 134 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 134 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index 29b9104232b4..490a4bbd59a3 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -162,5 +162,139 @@ int virtqueue_set_affinity(struct virtqueue *vq, int cpu)
 	return 0;
 }
 
+/* Config space accessors. */
+#define virtio_cread(vdev, structname, member, ptr)			\
+	do {								\
+		/* Must match the member's type, and be integer */	\
+		if (!typecheck(typeof((((structname*)0)->member)), *(ptr))) \
+			(*ptr) = 1;					\
+									\
+		switch (sizeof(*ptr)) {					\
+		case 1:							\
+			*(ptr) = virtio_cread8(vdev,			\
+					       offsetof(structname, member)); \
+			break;						\
+		case 2:							\
+			*(ptr) = virtio_cread16(vdev,			\
+						offsetof(structname, member)); \
+			break;						\
+		case 4:							\
+			*(ptr) = virtio_cread32(vdev,			\
+						offsetof(structname, member)); \
+			break;						\
+		case 8:							\
+			*(ptr) = virtio_cread64(vdev,			\
+						offsetof(structname, member)); \
+			break;						\
+		default:						\
+			BUG();						\
+		}							\
+	} while(0)
+
+/* Config space accessors. */
+#define virtio_cwrite(vdev, structname, member, ptr)			\
+	do {								\
+		/* Must match the member's type, and be integer */	\
+		if (!typecheck(typeof((((structname*)0)->member)), *(ptr))) \
+			BUG_ON((*ptr) == 1);				\
+									\
+		switch (sizeof(*ptr)) {					\
+		case 1:							\
+			virtio_cwrite8(vdev,				\
+				       offsetof(structname, member),	\
+				       *(ptr));				\
+			break;						\
+		case 2:							\
+			virtio_cwrite16(vdev,				\
+					offsetof(structname, member),	\
+					*(ptr));			\
+			break;						\
+		case 4:							\
+			virtio_cwrite32(vdev,				\
+					offsetof(structname, member),	\
+					*(ptr));			\
+			break;						\
+		case 8:							\
+			virtio_cwrite64(vdev,				\
+					offsetof(structname, member),	\
+					*(ptr));			\
+			break;						\
+		default:						\
+			BUG();						\
+		}							\
+	} while(0)
+
+static inline u8 virtio_cread8(struct virtio_device *vdev, unsigned int offset)
+{
+	u8 ret;
+	vdev->config->get(vdev, offset, &ret, sizeof(ret));
+	return ret;
+}
+
+static inline void virtio_cread_bytes(struct virtio_device *vdev,
+				      unsigned int offset,
+				      void *buf, size_t len)
+{
+	vdev->config->get(vdev, offset, buf, len);
+}
+
+static inline void virtio_cwrite8(struct virtio_device *vdev,
+				  unsigned int offset, u8 val)
+{
+	vdev->config->set(vdev, offset, &val, sizeof(val));
+}
+
+static inline u16 virtio_cread16(struct virtio_device *vdev,
+				 unsigned int offset)
+{
+	u16 ret;
+	vdev->config->get(vdev, offset, &ret, sizeof(ret));
+	return ret;
+}
+
+static inline void virtio_cwrite16(struct virtio_device *vdev,
+				   unsigned int offset, u16 val)
+{
+	vdev->config->set(vdev, offset, &val, sizeof(val));
+}
+
+static inline u32 virtio_cread32(struct virtio_device *vdev,
+				 unsigned int offset)
+{
+	u32 ret;
+	vdev->config->get(vdev, offset, &ret, sizeof(ret));
+	return ret;
+}
+
+static inline void virtio_cwrite32(struct virtio_device *vdev,
+				   unsigned int offset, u32 val)
+{
+	vdev->config->set(vdev, offset, &val, sizeof(val));
+}
+
+static inline u64 virtio_cread64(struct virtio_device *vdev,
+				 unsigned int offset)
+{
+	u64 ret;
+	vdev->config->get(vdev, offset, &ret, sizeof(ret));
+	return ret;
+}
+
+static inline void virtio_cwrite64(struct virtio_device *vdev,
+				   unsigned int offset, u64 val)
+{
+	vdev->config->set(vdev, offset, &val, sizeof(val));
+}
+
+/* Conditional config space accessors. */
+#define virtio_cread_feature(vdev, fbit, structname, member, ptr)	\
+	({								\
+		int _r = 0;						\
+		if (!virtio_has_feature(vdev, fbit))			\
+			_r = -ENOENT;					\
+		else							\
+			virtio_cread((vdev), structname, member, ptr);	\
+		_r;							\
+	})
 
 #endif /* _LINUX_VIRTIO_CONFIG_H */
-- 
cgit v1.2.3


From 630b54d33493d1f67e79b148b5e361c5bbd3f29d Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Mon, 14 Oct 2013 18:11:51 +1030
Subject: virtio_config: remove virtio_config_val

The virtio_cread() functions should now be used.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/virtio_config.h | 27 ---------------------------
 1 file changed, 27 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index 490a4bbd59a3..e8f8f71e843c 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -96,33 +96,6 @@ static inline bool virtio_has_feature(const struct virtio_device *vdev,
 	return test_bit(fbit, vdev->features);
 }
 
-/**
- * virtio_config_val - look for a feature and get a virtio config entry.
- * @vdev: the virtio device
- * @fbit: the feature bit
- * @offset: the type to search for.
- * @v: a pointer to the value to fill in.
- *
- * The return value is -ENOENT if the feature doesn't exist.  Otherwise
- * the config value is copied into whatever is pointed to by v. */
-#define virtio_config_val(vdev, fbit, offset, v) \
-	virtio_config_buf((vdev), (fbit), (offset), (v), sizeof(*v))
-
-#define virtio_config_val_len(vdev, fbit, offset, v, len) \
-	virtio_config_buf((vdev), (fbit), (offset), (v), (len))
-
-static inline int virtio_config_buf(struct virtio_device *vdev,
-				    unsigned int fbit,
-				    unsigned int offset,
-				    void *buf, unsigned len)
-{
-	if (!virtio_has_feature(vdev, fbit))
-		return -ENOENT;
-
-	vdev->config->get(vdev, offset, buf, len);
-	return 0;
-}
-
 static inline
 struct virtqueue *virtio_find_single_vq(struct virtio_device *vdev,
 					vq_callback_t *c, const char *n)
-- 
cgit v1.2.3


From 64c862a839a8db2c02bbaa88b923d13e1208919d Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Fri, 11 Oct 2013 13:11:38 -0700
Subject: devres: add kernel standard devm_k.alloc functions

Currently, devm_ managed memory only supports kzalloc.

Convert the devm_kzalloc implementation to devm_kmalloc and remove the
complete memset to 0 but still set the initial struct devres header and
whatever padding before data to 0.

Add the other normal alloc variants as static inlines with __GFP_ZERO
added to the gfp flag where appropriate:

	devm_kzalloc
	devm_kcalloc
	devm_kmalloc_array

Add gfp.h to device.h for the newly added static inlines.

akpm: the current API forces us to replace kmalloc() with kzalloc() when
performing devm_ conversions.  This adds a relatively minor overhead.
More significantly, it will defeat kmemcheck used-uninitialized checking,
and for a particular driver, losing used-uninitialised checking for their
core controlling data structures will significantly degrade kmemcheck
usefulness.

Signed-off-by: Joe Perches <joe@perches.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Sangjung Woo <sangjung.woo@samsung.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/devres.c  | 27 ++++++++++++++++-----------
 include/linux/device.h | 21 +++++++++++++++++++--
 2 files changed, 35 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/devres.c b/drivers/base/devres.c
index 507379e7b763..37e67a26e388 100644
--- a/drivers/base/devres.c
+++ b/drivers/base/devres.c
@@ -91,7 +91,8 @@ static __always_inline struct devres * alloc_dr(dr_release_t release,
 	if (unlikely(!dr))
 		return NULL;
 
-	memset(dr, 0, tot_size);
+	memset(dr, 0, offsetof(struct devres, data));
+
 	INIT_LIST_HEAD(&dr->node.entry);
 	dr->node.release = release;
 	return dr;
@@ -745,58 +746,62 @@ void devm_remove_action(struct device *dev, void (*action)(void *), void *data)
 EXPORT_SYMBOL_GPL(devm_remove_action);
 
 /*
- * Managed kzalloc/kfree
+ * Managed kmalloc/kfree
  */
-static void devm_kzalloc_release(struct device *dev, void *res)
+static void devm_kmalloc_release(struct device *dev, void *res)
 {
 	/* noop */
 }
 
-static int devm_kzalloc_match(struct device *dev, void *res, void *data)
+static int devm_kmalloc_match(struct device *dev, void *res, void *data)
 {
 	return res == data;
 }
 
 /**
- * devm_kzalloc - Resource-managed kzalloc
+ * devm_kmalloc - Resource-managed kmalloc
  * @dev: Device to allocate memory for
  * @size: Allocation size
  * @gfp: Allocation gfp flags
  *
- * Managed kzalloc.  Memory allocated with this function is
+ * Managed kmalloc.  Memory allocated with this function is
  * automatically freed on driver detach.  Like all other devres
  * resources, guaranteed alignment is unsigned long long.
  *
  * RETURNS:
  * Pointer to allocated memory on success, NULL on failure.
  */
-void * devm_kzalloc(struct device *dev, size_t size, gfp_t gfp)
+void * devm_kmalloc(struct device *dev, size_t size, gfp_t gfp)
 {
 	struct devres *dr;
 
 	/* use raw alloc_dr for kmalloc caller tracing */
-	dr = alloc_dr(devm_kzalloc_release, size, gfp);
+	dr = alloc_dr(devm_kmalloc_release, size, gfp);
 	if (unlikely(!dr))
 		return NULL;
 
+	/*
+	 * This is named devm_kzalloc_release for historical reasons
+	 * The initial implementation did not support kmalloc, only kzalloc
+	 */
 	set_node_dbginfo(&dr->node, "devm_kzalloc_release", size);
 	devres_add(dev, dr->data);
 	return dr->data;
 }
-EXPORT_SYMBOL_GPL(devm_kzalloc);
+EXPORT_SYMBOL_GPL(devm_kmalloc);
 
 /**
  * devm_kfree - Resource-managed kfree
  * @dev: Device this memory belongs to
  * @p: Memory to free
  *
- * Free memory allocated with devm_kzalloc().
+ * Free memory allocated with devm_kmalloc().
  */
 void devm_kfree(struct device *dev, void *p)
 {
 	int rc;
 
-	rc = devres_destroy(dev, devm_kzalloc_release, devm_kzalloc_match, p);
+	rc = devres_destroy(dev, devm_kmalloc_release, devm_kmalloc_match, p);
 	WARN_ON(rc);
 }
 EXPORT_SYMBOL_GPL(devm_kfree);
diff --git a/include/linux/device.h b/include/linux/device.h
index 94638efa0bf8..5e44cff5bced 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -26,6 +26,7 @@
 #include <linux/atomic.h>
 #include <linux/ratelimit.h>
 #include <linux/uidgid.h>
+#include <linux/gfp.h>
 #include <asm/device.h>
 
 struct device;
@@ -606,8 +607,24 @@ extern void devres_close_group(struct device *dev, void *id);
 extern void devres_remove_group(struct device *dev, void *id);
 extern int devres_release_group(struct device *dev, void *id);
 
-/* managed kzalloc/kfree for device drivers, no kmalloc, always use kzalloc */
-extern void *devm_kzalloc(struct device *dev, size_t size, gfp_t gfp);
+/* managed devm_k.alloc/kfree for device drivers */
+extern void *devm_kmalloc(struct device *dev, size_t size, gfp_t gfp);
+static inline void *devm_kzalloc(struct device *dev, size_t size, gfp_t gfp)
+{
+	return devm_kmalloc(dev, size, gfp | __GFP_ZERO);
+}
+static inline void *devm_kmalloc_array(struct device *dev,
+				       size_t n, size_t size, gfp_t flags)
+{
+	if (size != 0 && n > SIZE_MAX / size)
+		return NULL;
+	return devm_kmalloc(dev, n * size, flags);
+}
+static inline void *devm_kcalloc(struct device *dev,
+				 size_t n, size_t size, gfp_t flags)
+{
+	return devm_kmalloc_array(dev, n, size, flags | __GFP_ZERO);
+}
 extern void devm_kfree(struct device *dev, void *p);
 
 void __iomem *devm_ioremap_resource(struct device *dev, struct resource *res);
-- 
cgit v1.2.3


From fb3fed7926545e44ce36574e1b1c5cdeb018db5c Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 7 Oct 2013 18:27:35 -0700
Subject: ide: convert bus code to use dev_groups

The dev_attrs field of struct bus_type is going away soon, dev_groups
should be used instead.  This converts the ide bus code to use the
correct field.

Acked-by: David S. Miller <davem@davemloft.net>
Cc: <linux-ide@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/ide/ide-sysfs.c | 35 ++++++++++++++++++++++++++---------
 drivers/ide/ide.c       |  2 +-
 include/linux/ide.h     |  2 +-
 3 files changed, 28 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ide/ide-sysfs.c b/drivers/ide/ide-sysfs.c
index 883ffacaf45a..84a6a9e08d64 100644
--- a/drivers/ide/ide-sysfs.c
+++ b/drivers/ide/ide-sysfs.c
@@ -25,6 +25,7 @@ static ssize_t media_show(struct device *dev, struct device_attribute *attr,
 	ide_drive_t *drive = to_ide_device(dev);
 	return sprintf(buf, "%s\n", ide_media_string(drive));
 }
+static DEVICE_ATTR_RO(media);
 
 static ssize_t drivename_show(struct device *dev, struct device_attribute *attr,
 			      char *buf)
@@ -32,6 +33,7 @@ static ssize_t drivename_show(struct device *dev, struct device_attribute *attr,
 	ide_drive_t *drive = to_ide_device(dev);
 	return sprintf(buf, "%s\n", drive->name);
 }
+static DEVICE_ATTR_RO(drivename);
 
 static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
 			     char *buf)
@@ -39,6 +41,7 @@ static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
 	ide_drive_t *drive = to_ide_device(dev);
 	return sprintf(buf, "ide:m-%s\n", ide_media_string(drive));
 }
+static DEVICE_ATTR_RO(modalias);
 
 static ssize_t model_show(struct device *dev, struct device_attribute *attr,
 			  char *buf)
@@ -46,6 +49,7 @@ static ssize_t model_show(struct device *dev, struct device_attribute *attr,
 	ide_drive_t *drive = to_ide_device(dev);
 	return sprintf(buf, "%s\n", (char *)&drive->id[ATA_ID_PROD]);
 }
+static DEVICE_ATTR_RO(model);
 
 static ssize_t firmware_show(struct device *dev, struct device_attribute *attr,
 			     char *buf)
@@ -53,6 +57,7 @@ static ssize_t firmware_show(struct device *dev, struct device_attribute *attr,
 	ide_drive_t *drive = to_ide_device(dev);
 	return sprintf(buf, "%s\n", (char *)&drive->id[ATA_ID_FW_REV]);
 }
+static DEVICE_ATTR_RO(firmware);
 
 static ssize_t serial_show(struct device *dev, struct device_attribute *attr,
 			   char *buf)
@@ -60,16 +65,28 @@ static ssize_t serial_show(struct device *dev, struct device_attribute *attr,
 	ide_drive_t *drive = to_ide_device(dev);
 	return sprintf(buf, "%s\n", (char *)&drive->id[ATA_ID_SERNO]);
 }
+static DEVICE_ATTR(serial, 0400, serial_show, NULL);
+
+static DEVICE_ATTR(unload_heads, 0644, ide_park_show, ide_park_store);
+
+static struct attribute *ide_attrs[] = {
+	&dev_attr_media.attr,
+	&dev_attr_drivename.attr,
+	&dev_attr_modalias.attr,
+	&dev_attr_model.attr,
+	&dev_attr_firmware.attr,
+	&dev_attr_serial.attr,
+	&dev_attr_unload_heads.attr,
+	NULL,
+};
+
+static const struct attribute_group ide_attr_group = {
+	.attrs = ide_attrs,
+};
 
-struct device_attribute ide_dev_attrs[] = {
-	__ATTR_RO(media),
-	__ATTR_RO(drivename),
-	__ATTR_RO(modalias),
-	__ATTR_RO(model),
-	__ATTR_RO(firmware),
-	__ATTR(serial, 0400, serial_show, NULL),
-	__ATTR(unload_heads, 0644, ide_park_show, ide_park_store),
-	__ATTR_NULL
+const struct attribute_group *ide_dev_groups[] = {
+	&ide_attr_group,
+	NULL,
 };
 
 static ssize_t store_delete_devices(struct device *portdev,
diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c
index fa896210ed7b..2ce6268a2734 100644
--- a/drivers/ide/ide.c
+++ b/drivers/ide/ide.c
@@ -158,7 +158,7 @@ struct bus_type ide_bus_type = {
 	.probe		= generic_ide_probe,
 	.remove		= generic_ide_remove,
 	.shutdown	= generic_ide_shutdown,
-	.dev_attrs	= ide_dev_attrs,
+	.dev_groups	= ide_dev_groups,
 	.suspend	= generic_ide_suspend,
 	.resume		= generic_ide_resume,
 };
diff --git a/include/linux/ide.h b/include/linux/ide.h
index b17974917dbf..46a14229a162 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -1514,7 +1514,7 @@ static inline void ide_set_max_pio(ide_drive_t *drive)
 
 char *ide_media_string(ide_drive_t *);
 
-extern struct device_attribute ide_dev_attrs[];
+extern const struct attribute_group *ide_dev_groups[];
 extern struct bus_type ide_bus_type;
 extern struct class *ide_port_class;
 
-- 
cgit v1.2.3


From 4942642080ea82d99ab5b653abb9a12b7ba31f4a Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Wed, 16 Oct 2013 13:46:59 -0700
Subject: mm: memcg: handle non-error OOM situations more gracefully

Commit 3812c8c8f395 ("mm: memcg: do not trap chargers with full
callstack on OOM") assumed that only a few places that can trigger a
memcg OOM situation do not return VM_FAULT_OOM, like optional page cache
readahead.  But there are many more and it's impractical to annotate
them all.

First of all, we don't want to invoke the OOM killer when the failed
allocation is gracefully handled, so defer the actual kill to the end of
the fault handling as well.  This simplifies the code quite a bit for
added bonus.

Second, since a failed allocation might not be the abrupt end of the
fault, the memcg OOM handler needs to be re-entrant until the fault
finishes for subsequent allocation attempts.  If an allocation is
attempted after the task already OOMed, allow it to bypass the limit so
that it can quickly finish the fault and invoke the OOM killer.

Reported-by: azurIt <azurit@pobox.sk>
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |  50 ++++------------
 include/linux/sched.h      |   7 +--
 mm/filemap.c               |  11 +---
 mm/memcontrol.c            | 139 +++++++++++++++++----------------------------
 mm/memory.c                |  18 ++++--
 mm/oom_kill.c              |   2 +-
 6 files changed, 79 insertions(+), 148 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index ecc82b37c4cc..b3e7a667e03c 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -137,47 +137,24 @@ extern void mem_cgroup_print_oom_info(struct mem_cgroup *memcg,
 extern void mem_cgroup_replace_page_cache(struct page *oldpage,
 					struct page *newpage);
 
-/**
- * mem_cgroup_toggle_oom - toggle the memcg OOM killer for the current task
- * @new: true to enable, false to disable
- *
- * Toggle whether a failed memcg charge should invoke the OOM killer
- * or just return -ENOMEM.  Returns the previous toggle state.
- *
- * NOTE: Any path that enables the OOM killer before charging must
- *       call mem_cgroup_oom_synchronize() afterward to finalize the
- *       OOM handling and clean up.
- */
-static inline bool mem_cgroup_toggle_oom(bool new)
+static inline void mem_cgroup_oom_enable(void)
 {
-	bool old;
-
-	old = current->memcg_oom.may_oom;
-	current->memcg_oom.may_oom = new;
-
-	return old;
+	WARN_ON(current->memcg_oom.may_oom);
+	current->memcg_oom.may_oom = 1;
 }
 
-static inline void mem_cgroup_enable_oom(void)
+static inline void mem_cgroup_oom_disable(void)
 {
-	bool old = mem_cgroup_toggle_oom(true);
-
-	WARN_ON(old == true);
-}
-
-static inline void mem_cgroup_disable_oom(void)
-{
-	bool old = mem_cgroup_toggle_oom(false);
-
-	WARN_ON(old == false);
+	WARN_ON(!current->memcg_oom.may_oom);
+	current->memcg_oom.may_oom = 0;
 }
 
 static inline bool task_in_memcg_oom(struct task_struct *p)
 {
-	return p->memcg_oom.in_memcg_oom;
+	return p->memcg_oom.memcg;
 }
 
-bool mem_cgroup_oom_synchronize(void);
+bool mem_cgroup_oom_synchronize(bool wait);
 
 #ifdef CONFIG_MEMCG_SWAP
 extern int do_swap_account;
@@ -402,16 +379,11 @@ static inline void mem_cgroup_end_update_page_stat(struct page *page,
 {
 }
 
-static inline bool mem_cgroup_toggle_oom(bool new)
-{
-	return false;
-}
-
-static inline void mem_cgroup_enable_oom(void)
+static inline void mem_cgroup_oom_enable(void)
 {
 }
 
-static inline void mem_cgroup_disable_oom(void)
+static inline void mem_cgroup_oom_disable(void)
 {
 }
 
@@ -420,7 +392,7 @@ static inline bool task_in_memcg_oom(struct task_struct *p)
 	return false;
 }
 
-static inline bool mem_cgroup_oom_synchronize(void)
+static inline bool mem_cgroup_oom_synchronize(bool wait)
 {
 	return false;
 }
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6682da36b293..e27baeeda3f4 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1394,11 +1394,10 @@ struct task_struct {
 	} memcg_batch;
 	unsigned int memcg_kmem_skip_account;
 	struct memcg_oom_info {
+		struct mem_cgroup *memcg;
+		gfp_t gfp_mask;
+		int order;
 		unsigned int may_oom:1;
-		unsigned int in_memcg_oom:1;
-		unsigned int oom_locked:1;
-		int wakeups;
-		struct mem_cgroup *wait_on_memcg;
 	} memcg_oom;
 #endif
 #ifdef CONFIG_UPROBES
diff --git a/mm/filemap.c b/mm/filemap.c
index 1e6aec4a2d2e..ae4846ff4849 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1616,7 +1616,6 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	struct inode *inode = mapping->host;
 	pgoff_t offset = vmf->pgoff;
 	struct page *page;
-	bool memcg_oom;
 	pgoff_t size;
 	int ret = 0;
 
@@ -1625,11 +1624,7 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 		return VM_FAULT_SIGBUS;
 
 	/*
-	 * Do we have something in the page cache already?  Either
-	 * way, try readahead, but disable the memcg OOM killer for it
-	 * as readahead is optional and no errors are propagated up
-	 * the fault stack.  The OOM killer is enabled while trying to
-	 * instantiate the faulting page individually below.
+	 * Do we have something in the page cache already?
 	 */
 	page = find_get_page(mapping, offset);
 	if (likely(page) && !(vmf->flags & FAULT_FLAG_TRIED)) {
@@ -1637,14 +1632,10 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 		 * We found the page, so try async readahead before
 		 * waiting for the lock.
 		 */
-		memcg_oom = mem_cgroup_toggle_oom(false);
 		do_async_mmap_readahead(vma, ra, file, page, offset);
-		mem_cgroup_toggle_oom(memcg_oom);
 	} else if (!page) {
 		/* No page in the page cache at all */
-		memcg_oom = mem_cgroup_toggle_oom(false);
 		do_sync_mmap_readahead(vma, ra, file, offset);
-		mem_cgroup_toggle_oom(memcg_oom);
 		count_vm_event(PGMAJFAULT);
 		mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
 		ret = VM_FAULT_MAJOR;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 5335b2b6be77..65fc6a449841 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2161,110 +2161,59 @@ static void memcg_oom_recover(struct mem_cgroup *memcg)
 		memcg_wakeup_oom(memcg);
 }
 
-/*
- * try to call OOM killer
- */
 static void mem_cgroup_oom(struct mem_cgroup *memcg, gfp_t mask, int order)
 {
-	bool locked;
-	int wakeups;
-
 	if (!current->memcg_oom.may_oom)
 		return;
-
-	current->memcg_oom.in_memcg_oom = 1;
-
 	/*
-	 * As with any blocking lock, a contender needs to start
-	 * listening for wakeups before attempting the trylock,
-	 * otherwise it can miss the wakeup from the unlock and sleep
-	 * indefinitely.  This is just open-coded because our locking
-	 * is so particular to memcg hierarchies.
+	 * We are in the middle of the charge context here, so we
+	 * don't want to block when potentially sitting on a callstack
+	 * that holds all kinds of filesystem and mm locks.
+	 *
+	 * Also, the caller may handle a failed allocation gracefully
+	 * (like optional page cache readahead) and so an OOM killer
+	 * invocation might not even be necessary.
+	 *
+	 * That's why we don't do anything here except remember the
+	 * OOM context and then deal with it at the end of the page
+	 * fault when the stack is unwound, the locks are released,
+	 * and when we know whether the fault was overall successful.
 	 */
-	wakeups = atomic_read(&memcg->oom_wakeups);
-	mem_cgroup_mark_under_oom(memcg);
-
-	locked = mem_cgroup_oom_trylock(memcg);
-
-	if (locked)
-		mem_cgroup_oom_notify(memcg);
-
-	if (locked && !memcg->oom_kill_disable) {
-		mem_cgroup_unmark_under_oom(memcg);
-		mem_cgroup_out_of_memory(memcg, mask, order);
-		mem_cgroup_oom_unlock(memcg);
-		/*
-		 * There is no guarantee that an OOM-lock contender
-		 * sees the wakeups triggered by the OOM kill
-		 * uncharges.  Wake any sleepers explicitely.
-		 */
-		memcg_oom_recover(memcg);
-	} else {
-		/*
-		 * A system call can just return -ENOMEM, but if this
-		 * is a page fault and somebody else is handling the
-		 * OOM already, we need to sleep on the OOM waitqueue
-		 * for this memcg until the situation is resolved.
-		 * Which can take some time because it might be
-		 * handled by a userspace task.
-		 *
-		 * However, this is the charge context, which means
-		 * that we may sit on a large call stack and hold
-		 * various filesystem locks, the mmap_sem etc. and we
-		 * don't want the OOM handler to deadlock on them
-		 * while we sit here and wait.  Store the current OOM
-		 * context in the task_struct, then return -ENOMEM.
-		 * At the end of the page fault handler, with the
-		 * stack unwound, pagefault_out_of_memory() will check
-		 * back with us by calling
-		 * mem_cgroup_oom_synchronize(), possibly putting the
-		 * task to sleep.
-		 */
-		current->memcg_oom.oom_locked = locked;
-		current->memcg_oom.wakeups = wakeups;
-		css_get(&memcg->css);
-		current->memcg_oom.wait_on_memcg = memcg;
-	}
+	css_get(&memcg->css);
+	current->memcg_oom.memcg = memcg;
+	current->memcg_oom.gfp_mask = mask;
+	current->memcg_oom.order = order;
 }
 
 /**
  * mem_cgroup_oom_synchronize - complete memcg OOM handling
+ * @handle: actually kill/wait or just clean up the OOM state
  *
- * This has to be called at the end of a page fault if the the memcg
- * OOM handler was enabled and the fault is returning %VM_FAULT_OOM.
+ * This has to be called at the end of a page fault if the memcg OOM
+ * handler was enabled.
  *
- * Memcg supports userspace OOM handling, so failed allocations must
+ * Memcg supports userspace OOM handling where failed allocations must
  * sleep on a waitqueue until the userspace task resolves the
  * situation.  Sleeping directly in the charge context with all kinds
  * of locks held is not a good idea, instead we remember an OOM state
  * in the task and mem_cgroup_oom_synchronize() has to be called at
- * the end of the page fault to put the task to sleep and clean up the
- * OOM state.
+ * the end of the page fault to complete the OOM handling.
  *
  * Returns %true if an ongoing memcg OOM situation was detected and
- * finalized, %false otherwise.
+ * completed, %false otherwise.
  */
-bool mem_cgroup_oom_synchronize(void)
+bool mem_cgroup_oom_synchronize(bool handle)
 {
+	struct mem_cgroup *memcg = current->memcg_oom.memcg;
 	struct oom_wait_info owait;
-	struct mem_cgroup *memcg;
+	bool locked;
 
 	/* OOM is global, do not handle */
-	if (!current->memcg_oom.in_memcg_oom)
-		return false;
-
-	/*
-	 * We invoked the OOM killer but there is a chance that a kill
-	 * did not free up any charges.  Everybody else might already
-	 * be sleeping, so restart the fault and keep the rampage
-	 * going until some charges are released.
-	 */
-	memcg = current->memcg_oom.wait_on_memcg;
 	if (!memcg)
-		goto out;
+		return false;
 
-	if (test_thread_flag(TIF_MEMDIE) || fatal_signal_pending(current))
-		goto out_memcg;
+	if (!handle)
+		goto cleanup;
 
 	owait.memcg = memcg;
 	owait.wait.flags = 0;
@@ -2273,13 +2222,25 @@ bool mem_cgroup_oom_synchronize(void)
 	INIT_LIST_HEAD(&owait.wait.task_list);
 
 	prepare_to_wait(&memcg_oom_waitq, &owait.wait, TASK_KILLABLE);
-	/* Only sleep if we didn't miss any wakeups since OOM */
-	if (atomic_read(&memcg->oom_wakeups) == current->memcg_oom.wakeups)
+	mem_cgroup_mark_under_oom(memcg);
+
+	locked = mem_cgroup_oom_trylock(memcg);
+
+	if (locked)
+		mem_cgroup_oom_notify(memcg);
+
+	if (locked && !memcg->oom_kill_disable) {
+		mem_cgroup_unmark_under_oom(memcg);
+		finish_wait(&memcg_oom_waitq, &owait.wait);
+		mem_cgroup_out_of_memory(memcg, current->memcg_oom.gfp_mask,
+					 current->memcg_oom.order);
+	} else {
 		schedule();
-	finish_wait(&memcg_oom_waitq, &owait.wait);
-out_memcg:
-	mem_cgroup_unmark_under_oom(memcg);
-	if (current->memcg_oom.oom_locked) {
+		mem_cgroup_unmark_under_oom(memcg);
+		finish_wait(&memcg_oom_waitq, &owait.wait);
+	}
+
+	if (locked) {
 		mem_cgroup_oom_unlock(memcg);
 		/*
 		 * There is no guarantee that an OOM-lock contender
@@ -2288,10 +2249,9 @@ out_memcg:
 		 */
 		memcg_oom_recover(memcg);
 	}
+cleanup:
+	current->memcg_oom.memcg = NULL;
 	css_put(&memcg->css);
-	current->memcg_oom.wait_on_memcg = NULL;
-out:
-	current->memcg_oom.in_memcg_oom = 0;
 	return true;
 }
 
@@ -2705,6 +2665,9 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
 		     || fatal_signal_pending(current)))
 		goto bypass;
 
+	if (unlikely(task_in_memcg_oom(current)))
+		goto bypass;
+
 	/*
 	 * We always charge the cgroup the mm_struct belongs to.
 	 * The mm_struct's mem_cgroup changes on task migration if the
diff --git a/mm/memory.c b/mm/memory.c
index f7b7692c05ed..1311f26497e6 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3865,15 +3865,21 @@ int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 	 * space.  Kernel faults are handled more gracefully.
 	 */
 	if (flags & FAULT_FLAG_USER)
-		mem_cgroup_enable_oom();
+		mem_cgroup_oom_enable();
 
 	ret = __handle_mm_fault(mm, vma, address, flags);
 
-	if (flags & FAULT_FLAG_USER)
-		mem_cgroup_disable_oom();
-
-	if (WARN_ON(task_in_memcg_oom(current) && !(ret & VM_FAULT_OOM)))
-		mem_cgroup_oom_synchronize();
+	if (flags & FAULT_FLAG_USER) {
+		mem_cgroup_oom_disable();
+                /*
+                 * The task may have entered a memcg OOM situation but
+                 * if the allocation error was handled gracefully (no
+                 * VM_FAULT_OOM), there is no need to kill anything.
+                 * Just clean up the OOM state peacefully.
+                 */
+                if (task_in_memcg_oom(current) && !(ret & VM_FAULT_OOM))
+                        mem_cgroup_oom_synchronize(false);
+	}
 
 	return ret;
 }
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 314e9d274381..6738c47f1f72 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -680,7 +680,7 @@ void pagefault_out_of_memory(void)
 {
 	struct zonelist *zonelist;
 
-	if (mem_cgroup_oom_synchronize())
+	if (mem_cgroup_oom_synchronize(true))
 		return;
 
 	zonelist = node_zonelist(first_online_node, GFP_KERNEL);
-- 
cgit v1.2.3


From 420f9739a62cdb027f5580d25c813501ff93aa6f Mon Sep 17 00:00:00 2001
From: David Henningsson <david.henningsson@canonical.com>
Date: Wed, 16 Oct 2013 23:10:31 +0200
Subject: thinkpad-acpi: Add mute and mic-mute LED functionality

The LEDs are currently not visible to userspace, for security
reasons. They are exported through thinkpad_acpi.h for use by the
snd-hda-intel driver.

Thanks to Alex Hung <alex.hung@canonical.com> and Takashi Iwai
<tiwai@suse.de> for writing parts of this patch.

Signed-off-by: David Henningsson <david.henningsson@canonical.com>
Acked-by: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 Documentation/laptops/thinkpad-acpi.txt |  7 ++-
 drivers/platform/x86/thinkpad_acpi.c    | 92 ++++++++++++++++++++++++++++++++-
 include/linux/thinkpad_acpi.h           | 15 ++++++
 3 files changed, 111 insertions(+), 3 deletions(-)
 create mode 100644 include/linux/thinkpad_acpi.h

(limited to 'include/linux')

diff --git a/Documentation/laptops/thinkpad-acpi.txt b/Documentation/laptops/thinkpad-acpi.txt
index 86c52360ffe7..fc04c14de4bb 100644
--- a/Documentation/laptops/thinkpad-acpi.txt
+++ b/Documentation/laptops/thinkpad-acpi.txt
@@ -1,7 +1,7 @@
 		     ThinkPad ACPI Extras Driver
 
-                            Version 0.24
-                        December 11th,  2009
+                            Version 0.25
+                        October 16th,  2013
 
                Borislav Deianov <borislav@users.sf.net>
              Henrique de Moraes Holschuh <hmh@hmh.eng.br>
@@ -741,6 +741,9 @@ compiled with the CONFIG_THINKPAD_ACPI_UNSAFE_LEDS option enabled.
 Distributions must never enable this option.  Individual users that
 are aware of the consequences are welcome to enabling it.
 
+Audio mute and microphone mute LEDs are supported, but currently not
+visible to userspace. They are used by the snd-hda-intel audio driver.
+
 procfs notes:
 
 The available commands are:
diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c
index 03ca6c139f1a..0b7efb269cf1 100644
--- a/drivers/platform/x86/thinkpad_acpi.c
+++ b/drivers/platform/x86/thinkpad_acpi.c
@@ -23,7 +23,7 @@
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
-#define TPACPI_VERSION "0.24"
+#define TPACPI_VERSION "0.25"
 #define TPACPI_SYSFS_VERSION 0x020700
 
 /*
@@ -88,6 +88,7 @@
 
 #include <linux/pci_ids.h>
 
+#include <linux/thinkpad_acpi.h>
 
 /* ThinkPad CMOS commands */
 #define TP_CMOS_VOLUME_DOWN	0
@@ -8350,6 +8351,91 @@ static struct ibm_struct fan_driver_data = {
 	.resume = fan_resume,
 };
 
+/*************************************************************************
+ * Mute LED subdriver
+ */
+
+
+struct tp_led_table {
+	acpi_string name;
+	int on_value;
+	int off_value;
+	int state;
+};
+
+static struct tp_led_table led_tables[] = {
+	[TPACPI_LED_MUTE] = {
+		.name = "SSMS",
+		.on_value = 1,
+		.off_value = 0,
+	},
+	[TPACPI_LED_MICMUTE] = {
+		.name = "MMTS",
+		.on_value = 2,
+		.off_value = 0,
+	},
+};
+
+static int mute_led_on_off(struct tp_led_table *t, bool state)
+{
+	acpi_handle temp;
+	int output;
+
+	if (!ACPI_SUCCESS(acpi_get_handle(hkey_handle, t->name, &temp))) {
+		pr_warn("Thinkpad ACPI has no %s interface.\n", t->name);
+		return -EIO;
+	}
+
+	if (!acpi_evalf(hkey_handle, &output, t->name, "dd",
+			state ? t->on_value : t->off_value))
+		return -EIO;
+
+	t->state = state;
+	return state;
+}
+
+int tpacpi_led_set(int whichled, bool on)
+{
+	struct tp_led_table *t;
+
+	if (whichled < 0 || whichled >= TPACPI_LED_MAX)
+		return -EINVAL;
+
+	t = &led_tables[whichled];
+	if (t->state < 0 || t->state == on)
+		return t->state;
+	return mute_led_on_off(t, on);
+}
+EXPORT_SYMBOL_GPL(tpacpi_led_set);
+
+static int mute_led_init(struct ibm_init_struct *iibm)
+{
+	acpi_handle temp;
+	int i;
+
+	for (i = 0; i < TPACPI_LED_MAX; i++) {
+		struct tp_led_table *t = &led_tables[i];
+		if (ACPI_SUCCESS(acpi_get_handle(hkey_handle, t->name, &temp)))
+			mute_led_on_off(t, false);
+		else
+			t->state = -ENODEV;
+	}
+	return 0;
+}
+
+static void mute_led_exit(void)
+{
+	int i;
+
+	for (i = 0; i < TPACPI_LED_MAX; i++)
+		tpacpi_led_set(i, false);
+}
+
+static struct ibm_struct mute_led_driver_data = {
+	.name = "mute_led",
+	.exit = mute_led_exit,
+};
+
 /****************************************************************************
  ****************************************************************************
  *
@@ -8768,6 +8854,10 @@ static struct ibm_init_struct ibms_init[] __initdata = {
 		.init = fan_init,
 		.data = &fan_driver_data,
 	},
+	{
+		.init = mute_led_init,
+		.data = &mute_led_driver_data,
+	},
 };
 
 static int __init set_ibm_param(const char *val, struct kernel_param *kp)
diff --git a/include/linux/thinkpad_acpi.h b/include/linux/thinkpad_acpi.h
new file mode 100644
index 000000000000..361de59a2285
--- /dev/null
+++ b/include/linux/thinkpad_acpi.h
@@ -0,0 +1,15 @@
+#ifndef __THINKPAD_ACPI_H__
+#define __THINKPAD_ACPI_H__
+
+/* These two functions return 0 if success, or negative error code
+   (e g -ENODEV if no led present) */
+
+enum {
+	TPACPI_LED_MUTE,
+	TPACPI_LED_MICMUTE,
+	TPACPI_LED_MAX,
+};
+
+int tpacpi_led_set(int whichled, bool on);
+
+#endif
-- 
cgit v1.2.3


From 5587027ce9d59a57aecaa190be1c8e560aaff45d Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Mon, 7 Oct 2013 22:18:00 +0530
Subject: kvm: Add struct kvm arg to memslot APIs

We will use that in the later patch to find the kvm ops handler

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/arm/kvm/arm.c                 |  5 +++--
 arch/ia64/kvm/kvm-ia64.c           |  5 +++--
 arch/mips/kvm/kvm_mips.c           |  5 +++--
 arch/powerpc/include/asm/kvm_ppc.h |  6 ++++--
 arch/powerpc/kvm/book3s.c          |  4 ++--
 arch/powerpc/kvm/booke.c           |  4 ++--
 arch/powerpc/kvm/powerpc.c         |  9 +++++----
 arch/s390/kvm/kvm-s390.c           |  5 +++--
 arch/x86/kvm/x86.c                 |  5 +++--
 include/linux/kvm_host.h           |  5 +++--
 virt/kvm/kvm_main.c                | 12 ++++++------
 11 files changed, 37 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index cc5adb9349ef..e312e4a53f8d 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -152,12 +152,13 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
 	return VM_FAULT_SIGBUS;
 }
 
-void kvm_arch_free_memslot(struct kvm_memory_slot *free,
+void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
 			   struct kvm_memory_slot *dont)
 {
 }
 
-int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
+int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
+			    unsigned long npages)
 {
 	return 0;
 }
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index bdfd8789b376..985bf80c622e 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -1550,12 +1550,13 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
 	return VM_FAULT_SIGBUS;
 }
 
-void kvm_arch_free_memslot(struct kvm_memory_slot *free,
+void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
 			   struct kvm_memory_slot *dont)
 {
 }
 
-int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
+int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
+			    unsigned long npages)
 {
 	return 0;
 }
diff --git a/arch/mips/kvm/kvm_mips.c b/arch/mips/kvm/kvm_mips.c
index a7b044536de4..73b34827826c 100644
--- a/arch/mips/kvm/kvm_mips.c
+++ b/arch/mips/kvm/kvm_mips.c
@@ -198,12 +198,13 @@ kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
 	return -ENOIOCTLCMD;
 }
 
-void kvm_arch_free_memslot(struct kvm_memory_slot *free,
+void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
 			   struct kvm_memory_slot *dont)
 {
 }
 
-int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
+int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
+			    unsigned long npages)
 {
 	return 0;
 }
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index c13f15db476c..20f461637090 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -134,9 +134,11 @@ extern struct page *kvm_alloc_hpt(unsigned long nr_pages);
 extern void kvm_release_hpt(struct page *page, unsigned long nr_pages);
 extern int kvmppc_core_init_vm(struct kvm *kvm);
 extern void kvmppc_core_destroy_vm(struct kvm *kvm);
-extern void kvmppc_core_free_memslot(struct kvm_memory_slot *free,
+extern void kvmppc_core_free_memslot(struct kvm *kvm,
+				     struct kvm_memory_slot *free,
 				     struct kvm_memory_slot *dont);
-extern int kvmppc_core_create_memslot(struct kvm_memory_slot *slot,
+extern int kvmppc_core_create_memslot(struct kvm *kvm,
+				      struct kvm_memory_slot *slot,
 				      unsigned long npages);
 extern int kvmppc_core_prepare_memory_region(struct kvm *kvm,
 				struct kvm_memory_slot *memslot,
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 39d2994f9d27..130fe1d75bac 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -761,13 +761,13 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
 	return kvmppc_ops->get_dirty_log(kvm, log);
 }
 
-void kvmppc_core_free_memslot(struct kvm_memory_slot *free,
+void kvmppc_core_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
 			      struct kvm_memory_slot *dont)
 {
 	kvmppc_ops->free_memslot(free, dont);
 }
 
-int kvmppc_core_create_memslot(struct kvm_memory_slot *slot,
+int kvmppc_core_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
 			       unsigned long npages)
 {
 	return kvmppc_ops->create_memslot(slot, npages);
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 1769354d0bf8..cb2d986a3382 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -1662,12 +1662,12 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
 	return -ENOTSUPP;
 }
 
-void kvmppc_core_free_memslot(struct kvm_memory_slot *free,
+void kvmppc_core_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
 			      struct kvm_memory_slot *dont)
 {
 }
 
-int kvmppc_core_create_memslot(struct kvm_memory_slot *slot,
+int kvmppc_core_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
 			       unsigned long npages)
 {
 	return 0;
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 9aaa07efa4b6..b103d747934a 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -417,15 +417,16 @@ long kvm_arch_dev_ioctl(struct file *filp,
 	return -EINVAL;
 }
 
-void kvm_arch_free_memslot(struct kvm_memory_slot *free,
+void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
 			   struct kvm_memory_slot *dont)
 {
-	kvmppc_core_free_memslot(free, dont);
+	kvmppc_core_free_memslot(kvm, free, dont);
 }
 
-int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
+int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
+			    unsigned long npages)
 {
-	return kvmppc_core_create_memslot(slot, npages);
+	return kvmppc_core_create_memslot(kvm, slot, npages);
 }
 
 void kvm_arch_memslots_updated(struct kvm *kvm)
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 1e4e7b97337a..bedda67cc222 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -1089,12 +1089,13 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
 	return VM_FAULT_SIGBUS;
 }
 
-void kvm_arch_free_memslot(struct kvm_memory_slot *free,
+void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
 			   struct kvm_memory_slot *dont)
 {
 }
 
-int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
+int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
+			    unsigned long npages)
 {
 	return 0;
 }
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index edf2a07df3a3..666526a55c46 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7080,7 +7080,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 	kfree(rcu_dereference_check(kvm->arch.apic_map, 1));
 }
 
-void kvm_arch_free_memslot(struct kvm_memory_slot *free,
+void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
 			   struct kvm_memory_slot *dont)
 {
 	int i;
@@ -7101,7 +7101,8 @@ void kvm_arch_free_memslot(struct kvm_memory_slot *free,
 	}
 }
 
-int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
+int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
+			    unsigned long npages)
 {
 	int i;
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index c9d4236ab442..8b0107dc2067 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -507,9 +507,10 @@ int kvm_set_memory_region(struct kvm *kvm,
 			  struct kvm_userspace_memory_region *mem);
 int __kvm_set_memory_region(struct kvm *kvm,
 			    struct kvm_userspace_memory_region *mem);
-void kvm_arch_free_memslot(struct kvm_memory_slot *free,
+void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
 			   struct kvm_memory_slot *dont);
-int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages);
+int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
+			    unsigned long npages);
 void kvm_arch_memslots_updated(struct kvm *kvm);
 int kvm_arch_prepare_memory_region(struct kvm *kvm,
 				struct kvm_memory_slot *memslot,
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 94c6e3f6f244..0932c3b64155 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -542,13 +542,13 @@ static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot)
 /*
  * Free any memory in @free but not in @dont.
  */
-static void kvm_free_physmem_slot(struct kvm_memory_slot *free,
+static void kvm_free_physmem_slot(struct kvm *kvm, struct kvm_memory_slot *free,
 				  struct kvm_memory_slot *dont)
 {
 	if (!dont || free->dirty_bitmap != dont->dirty_bitmap)
 		kvm_destroy_dirty_bitmap(free);
 
-	kvm_arch_free_memslot(free, dont);
+	kvm_arch_free_memslot(kvm, free, dont);
 
 	free->npages = 0;
 }
@@ -559,7 +559,7 @@ void kvm_free_physmem(struct kvm *kvm)
 	struct kvm_memory_slot *memslot;
 
 	kvm_for_each_memslot(memslot, slots)
-		kvm_free_physmem_slot(memslot, NULL);
+		kvm_free_physmem_slot(kvm, memslot, NULL);
 
 	kfree(kvm->memslots);
 }
@@ -823,7 +823,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
 	if (change == KVM_MR_CREATE) {
 		new.userspace_addr = mem->userspace_addr;
 
-		if (kvm_arch_create_memslot(&new, npages))
+		if (kvm_arch_create_memslot(kvm, &new, npages))
 			goto out_free;
 	}
 
@@ -899,7 +899,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
 
 	kvm_arch_commit_memory_region(kvm, mem, &old, change);
 
-	kvm_free_physmem_slot(&old, &new);
+	kvm_free_physmem_slot(kvm, &old, &new);
 	kfree(old_memslots);
 
 	return 0;
@@ -907,7 +907,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
 out_slots:
 	kfree(slots);
 out_free:
-	kvm_free_physmem_slot(&new, &old);
+	kvm_free_physmem_slot(kvm, &new, &old);
 out:
 	return r;
 }
-- 
cgit v1.2.3


From 94468783cd960aa14b22503dd59afd14efb785aa Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Wed, 16 Oct 2013 19:18:41 -0700
Subject: usb: usb_phy_gen: refine conditional declaration of
 usb_nop_xceiv_register

Commit 3fa4d734 (usb: phy: rename nop_usb_xceiv => usb_phy_gen_xceiv)
changed the conditional around the declaration of usb_nop_xceiv_register
from
	#if defined(CONFIG_NOP_USB_XCEIV) ||
		(defined(CONFIG_NOP_USB_XCEIV_MODULE) && defined(MODULE))
to
	#if IS_ENABLED(CONFIG_NOP_USB_XCEIV)

While that looks the same, it is semantically different. The first expression
is true if CONFIG_NOP_USB_XCEIV is built as module and if the including
code is built as module. The second expression is true if code depending on
CONFIG_NOP_USB_XCEIV if built as module or into the kernel.

As a result, the arm:allmodconfig build fails with

arch/arm/mach-omap2/built-in.o: In function `omap3_evm_init':
arch/arm/mach-omap2/board-omap3evm.c:703: undefined reference to
	`usb_nop_xceiv_register'

Fix the problem by reverting to the old conditional.

Cc: Josh Boyer <jwboyer@redhat.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/usb/usb_phy_gen_xceiv.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/usb/usb_phy_gen_xceiv.h b/include/linux/usb/usb_phy_gen_xceiv.h
index f9a7e7bc925b..11d85b9c1b08 100644
--- a/include/linux/usb/usb_phy_gen_xceiv.h
+++ b/include/linux/usb/usb_phy_gen_xceiv.h
@@ -12,7 +12,7 @@ struct usb_phy_gen_xceiv_platform_data {
 	unsigned int needs_reset:1;
 };
 
-#if IS_ENABLED(CONFIG_NOP_USB_XCEIV)
+#if defined(CONFIG_NOP_USB_XCEIV) || (defined(CONFIG_NOP_USB_XCEIV_MODULE) && defined(MODULE))
 /* sometimes transceivers are accessed only through e.g. ULPI */
 extern void usb_nop_xceiv_register(void);
 extern void usb_nop_xceiv_unregister(void);
-- 
cgit v1.2.3


From 5930e8d0ab3689f1e239566443ca8f53e45e01cc Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Tue, 15 Oct 2013 16:55:22 +0200
Subject: net/mlx4: Fix typo, move similar defs to same location

Small code cleanup:

1. change MLX4_DEV_CAP_FLAGS2_REASSIGN_MAC_EN to MLX4_DEV_CAP_FLAG2_REASSIGN_MAC_EN

2. put MLX4_SET_PORT_PRIO2TC and MLX4_SET_PORT_SCHEDULER in the same union with the
   other MLX4_SET_PORT_yyy

Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Amir Vadai <amirv@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 2 +-
 drivers/net/ethernet/mellanox/mlx4/fw.c        | 2 +-
 include/linux/mlx4/cmd.h                       | 6 ++----
 include/linux/mlx4/device.h                    | 2 +-
 4 files changed, 5 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index fa37b7a61213..85d91665d400 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -1733,7 +1733,7 @@ void mlx4_en_stop_port(struct net_device *dev, int detach)
 
 	/* Unregister Mac address for the port */
 	mlx4_en_put_qp(priv);
-	if (!(mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAGS2_REASSIGN_MAC_EN))
+	if (!(mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_REASSIGN_MAC_EN))
 		mdev->mac_removed[priv->port] = 1;
 
 	/* Free RX Rings */
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index 0d63daa2f422..a377484cf544 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -652,7 +652,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 		 QUERY_DEV_CAP_RSVD_LKEY_OFFSET);
 	MLX4_GET(field, outbox, QUERY_DEV_CAP_FW_REASSIGN_MAC);
 	if (field & 1<<6)
-		dev_cap->flags2 |= MLX4_DEV_CAP_FLAGS2_REASSIGN_MAC_EN;
+		dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_REASSIGN_MAC_EN;
 	MLX4_GET(dev_cap->max_icm_sz, outbox,
 		 QUERY_DEV_CAP_MAX_ICM_SZ_OFFSET);
 	if (dev_cap->flags & MLX4_DEV_CAP_FLAG_COUNTERS)
diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h
index cd1fdf75103b..8df61bc5da00 100644
--- a/include/linux/mlx4/cmd.h
+++ b/include/linux/mlx4/cmd.h
@@ -154,10 +154,6 @@ enum {
 	MLX4_CMD_QUERY_IF_STAT	 = 0X54,
 	MLX4_CMD_SET_IF_STAT	 = 0X55,
 
-	/* set port opcode modifiers */
-	MLX4_SET_PORT_PRIO2TC = 0x8,
-	MLX4_SET_PORT_SCHEDULER  = 0x9,
-
 	/* register/delete flow steering network rules */
 	MLX4_QP_FLOW_STEERING_ATTACH = 0x65,
 	MLX4_QP_FLOW_STEERING_DETACH = 0x66,
@@ -182,6 +178,8 @@ enum {
 	MLX4_SET_PORT_VLAN_TABLE = 0x3,
 	MLX4_SET_PORT_PRIO_MAP  = 0x4,
 	MLX4_SET_PORT_GID_TABLE = 0x5,
+	MLX4_SET_PORT_PRIO2TC	= 0x8,
+	MLX4_SET_PORT_SCHEDULER = 0x9,
 };
 
 enum {
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 24ce6bdd540e..9ad0c18495ad 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -155,7 +155,7 @@ enum {
 	MLX4_DEV_CAP_FLAG2_RSS_TOP		= 1LL <<  1,
 	MLX4_DEV_CAP_FLAG2_RSS_XOR		= 1LL <<  2,
 	MLX4_DEV_CAP_FLAG2_FS_EN		= 1LL <<  3,
-	MLX4_DEV_CAP_FLAGS2_REASSIGN_MAC_EN	= 1LL <<  4,
+	MLX4_DEV_CAP_FLAG2_REASSIGN_MAC_EN	= 1LL <<  4,
 	MLX4_DEV_CAP_FLAG2_TS			= 1LL <<  5,
 	MLX4_DEV_CAP_FLAG2_VLAN_CONTROL		= 1LL <<  6,
 	MLX4_DEV_CAP_FLAG2_FSM			= 1LL <<  7,
-- 
cgit v1.2.3


From 9e5f1721907fcfbd4b575bcafa0314188f7330a5 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Mon, 14 Oct 2013 15:28:38 +0300
Subject: yam: integer underflow in yam_ioctl()

We cap bitrate at YAM_MAXBITRATE in yam_ioctl(), but it could also be
negative.  I don't know the impact of using a negative bitrate but let's
prevent it.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/yam.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/yam.h b/include/linux/yam.h
index 7fe28228b274..512cdc2fb80f 100644
--- a/include/linux/yam.h
+++ b/include/linux/yam.h
@@ -77,6 +77,6 @@ struct yamdrv_ioctl_cfg {
 
 struct yamdrv_ioctl_mcs {
 	int cmd;
-	int bitrate;
+	unsigned int bitrate;
 	unsigned char bits[YAM_FPGA_SIZE];
 };
-- 
cgit v1.2.3


From 731d9cae0204a8948c7db2f551edcd5d5822ed95 Mon Sep 17 00:00:00 2001
From: Nicolas Ferre <nicolas.ferre@atmel.com>
Date: Thu, 17 Oct 2013 17:37:12 +0200
Subject: tty/serial: at91: add a fallback option to determine uart/usart
 property

On older SoC, the "name" field is not filled in the register map.
Fix the way to figure out if the serial port is an uart or an usart for these
older products (with corresponding properties).

Signed-off-by: Nicolas Ferre <nicolas.ferre@atmel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/atmel_serial.c | 19 ++++++++++++++++++-
 include/linux/atmel_serial.h      |  1 +
 2 files changed, 19 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c
index 6b0f75eac8a2..c7d99af46a96 100644
--- a/drivers/tty/serial/atmel_serial.c
+++ b/drivers/tty/serial/atmel_serial.c
@@ -99,6 +99,7 @@ static void atmel_stop_rx(struct uart_port *port);
 #define UART_PUT_RTOR(port,v)	__raw_writel(v, (port)->membase + ATMEL_US_RTOR)
 #define UART_PUT_TTGR(port, v)	__raw_writel(v, (port)->membase + ATMEL_US_TTGR)
 #define UART_GET_IP_NAME(port)	__raw_readl((port)->membase + ATMEL_US_NAME)
+#define UART_GET_IP_VERSION(port) __raw_readl((port)->membase + ATMEL_US_VERSION)
 
  /* PDC registers */
 #define UART_PUT_PTCR(port,v)	__raw_writel(v, (port)->membase + ATMEL_PDC_PTCR)
@@ -1503,6 +1504,7 @@ static void atmel_get_ip_name(struct uart_port *port)
 {
 	struct atmel_uart_port *atmel_port = to_atmel_uart_port(port);
 	int name = UART_GET_IP_NAME(port);
+	u32 version;
 	int usart, uart;
 	/* usart and uart ascii */
 	usart = 0x55534152;
@@ -1517,7 +1519,22 @@ static void atmel_get_ip_name(struct uart_port *port)
 		dev_dbg(port->dev, "This is uart\n");
 		atmel_port->is_usart = false;
 	} else {
-		dev_err(port->dev, "Not supported ip name, set to uart\n");
+		/* fallback for older SoCs: use version field */
+		version = UART_GET_IP_VERSION(port);
+		switch (version) {
+		case 0x302:
+		case 0x10213:
+			dev_dbg(port->dev, "This version is usart\n");
+			atmel_port->is_usart = true;
+			break;
+		case 0x203:
+		case 0x10202:
+			dev_dbg(port->dev, "This version is uart\n");
+			atmel_port->is_usart = false;
+			break;
+		default:
+			dev_err(port->dev, "Not supported ip name nor version, set to uart\n");
+		}
 	}
 }
 
diff --git a/include/linux/atmel_serial.h b/include/linux/atmel_serial.h
index be201ca2990c..00beddf6be20 100644
--- a/include/linux/atmel_serial.h
+++ b/include/linux/atmel_serial.h
@@ -125,5 +125,6 @@
 #define ATMEL_US_IF		0x4c			/* IrDA Filter Register */
 
 #define ATMEL_US_NAME		0xf0			/* Ip Name */
+#define ATMEL_US_VERSION	0xfc			/* Ip Version */
 
 #endif
-- 
cgit v1.2.3


From 40a96d54ee2232045783e657eb9224cd723dcb40 Mon Sep 17 00:00:00 2001
From: David Cohen <david.a.cohen@linux.intel.com>
Date: Thu, 17 Oct 2013 15:35:36 -0700
Subject: intel_mid: Move platform device setups to their own
 platform_<device>.* files

As Intel rolling out more SoC's after Moorestown, we need to
re-structure the code in a way that is backward compatible and easy to
expand. This patch implements a flexible way to support multiple boards
and devices.

This patch does not add any new functional support. It just refactors
the existing code to increase the modularity and decrease the code
duplication for supporting multiple soc's and boards.

Currently intel-mid.c has both board and soc related code in one file.
This patch moves the board related code to new files and let linker
script to create SFI devite table following this:

1. Move the SFI device specific code to
   arch/x86/platform/intel-mid/device-libs/platform_<device>.*
   A new device file is added for every supported device. This code will
   get conditionally compiled by using corresponding device driver
   CONFIG option.

2. Move the device_ids location to .x86_intel_mid_dev.init section by
   using new sfi_device() macro.

This patch was based on previous code from Sathyanarayanan Kuppuswamy.

Signed-off-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
Link: http://lkml.kernel.org/r/1382049336-21316-13-git-send-email-david.a.cohen@linux.intel.com
Signed-off-by: David Cohen <david.a.cohen@linux.intel.com>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/include/asm/intel-mid.h                   |  16 +
 arch/x86/platform/intel-mid/Makefile               |   8 +-
 arch/x86/platform/intel-mid/device_libs/Makefile   |  22 ++
 .../intel-mid/device_libs/platform_bma023.c        |  20 +
 .../intel-mid/device_libs/platform_emc1403.c       |  41 ++
 .../intel-mid/device_libs/platform_gpio_keys.c     |  83 ++++
 .../platform/intel-mid/device_libs/platform_ipc.c  |  68 ++++
 .../platform/intel-mid/device_libs/platform_ipc.h  |  17 +
 .../intel-mid/device_libs/platform_lis331.c        |  39 ++
 .../intel-mid/device_libs/platform_max3111.c       |  35 ++
 .../intel-mid/device_libs/platform_max7315.c       |  79 ++++
 .../intel-mid/device_libs/platform_mpu3050.c       |  36 ++
 .../platform/intel-mid/device_libs/platform_msic.c |  87 +++++
 .../platform/intel-mid/device_libs/platform_msic.h |  19 +
 .../intel-mid/device_libs/platform_msic_audio.c    |  47 +++
 .../intel-mid/device_libs/platform_msic_battery.c  |  37 ++
 .../intel-mid/device_libs/platform_msic_gpio.c     |  48 +++
 .../intel-mid/device_libs/platform_msic_ocd.c      |  49 +++
 .../device_libs/platform_msic_power_btn.c          |  36 ++
 .../intel-mid/device_libs/platform_msic_thermal.c  |  37 ++
 .../intel-mid/device_libs/platform_pmic_gpio.c     |  54 +++
 .../intel-mid/device_libs/platform_tc35876x.c      |  36 ++
 .../intel-mid/device_libs/platform_tca6416.c       |  57 +++
 arch/x86/platform/intel-mid/intel-mid.c            | 419 ---------------------
 arch/x86/platform/intel-mid/sfi.c                  |  14 +-
 include/linux/sfi.h                                |   3 +
 26 files changed, 978 insertions(+), 429 deletions(-)
 create mode 100644 arch/x86/platform/intel-mid/device_libs/Makefile
 create mode 100644 arch/x86/platform/intel-mid/device_libs/platform_bma023.c
 create mode 100644 arch/x86/platform/intel-mid/device_libs/platform_emc1403.c
 create mode 100644 arch/x86/platform/intel-mid/device_libs/platform_gpio_keys.c
 create mode 100644 arch/x86/platform/intel-mid/device_libs/platform_ipc.c
 create mode 100644 arch/x86/platform/intel-mid/device_libs/platform_ipc.h
 create mode 100644 arch/x86/platform/intel-mid/device_libs/platform_lis331.c
 create mode 100644 arch/x86/platform/intel-mid/device_libs/platform_max3111.c
 create mode 100644 arch/x86/platform/intel-mid/device_libs/platform_max7315.c
 create mode 100644 arch/x86/platform/intel-mid/device_libs/platform_mpu3050.c
 create mode 100644 arch/x86/platform/intel-mid/device_libs/platform_msic.c
 create mode 100644 arch/x86/platform/intel-mid/device_libs/platform_msic.h
 create mode 100644 arch/x86/platform/intel-mid/device_libs/platform_msic_audio.c
 create mode 100644 arch/x86/platform/intel-mid/device_libs/platform_msic_battery.c
 create mode 100644 arch/x86/platform/intel-mid/device_libs/platform_msic_gpio.c
 create mode 100644 arch/x86/platform/intel-mid/device_libs/platform_msic_ocd.c
 create mode 100644 arch/x86/platform/intel-mid/device_libs/platform_msic_power_btn.c
 create mode 100644 arch/x86/platform/intel-mid/device_libs/platform_msic_thermal.c
 create mode 100644 arch/x86/platform/intel-mid/device_libs/platform_pmic_gpio.c
 create mode 100644 arch/x86/platform/intel-mid/device_libs/platform_tc35876x.c
 create mode 100644 arch/x86/platform/intel-mid/device_libs/platform_tca6416.c

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/intel-mid.h b/arch/x86/include/asm/intel-mid.h
index 3b0e7a771cb5..459769d39263 100644
--- a/arch/x86/include/asm/intel-mid.h
+++ b/arch/x86/include/asm/intel-mid.h
@@ -12,8 +12,11 @@
 #define _ASM_X86_INTEL_MID_H
 
 #include <linux/sfi.h>
+#include <linux/platform_device.h>
 
 extern int intel_mid_pci_init(void);
+extern int get_gpio_by_name(const char *name);
+extern void intel_scu_device_register(struct platform_device *pdev);
 extern int __init sfi_parse_mrtc(struct sfi_table_header *table);
 extern int __init sfi_parse_mtmr(struct sfi_table_header *table);
 extern int sfi_mrtc_num;
@@ -34,6 +37,10 @@ struct devs_id {
 				struct devs_id *dev);
 };
 
+#define sfi_device(i)   \
+	static const struct devs_id *const __intel_mid_sfi_##i##_dev __used \
+	__attribute__((__section__(".x86_intel_mid_dev.init"))) = &i
+
 /*
  * Medfield is the follow-up of Moorestown, it combines two chip solution into
  * one. Other than that it also added always-on and constant tsc and lapic
@@ -55,9 +62,15 @@ static inline enum intel_mid_cpu_type intel_mid_identify_cpu(void)
 	return __intel_mid_cpu_chip;
 }
 
+static inline bool intel_mid_has_msic(void)
+{
+	return (intel_mid_identify_cpu() == INTEL_MID_CPU_CHIP_PENWELL);
+}
+
 #else /* !CONFIG_X86_INTEL_MID */
 
 #define intel_mid_identify_cpu()    (0)
+#define intel_mid_has_msic()    (0)
 
 #endif /* !CONFIG_X86_INTEL_MID */
 
@@ -94,4 +107,7 @@ extern void intel_scu_devices_destroy(void);
 
 extern void intel_mid_rtc_init(void);
 
+/* the offset for the mapping of global gpio pin to irq */
+#define INTEL_MID_IRQ_OFFSET 0x100
+
 #endif /* _ASM_X86_INTEL_MID_H */
diff --git a/arch/x86/platform/intel-mid/Makefile b/arch/x86/platform/intel-mid/Makefile
index b11e5b2de336..01cc29ea5ff7 100644
--- a/arch/x86/platform/intel-mid/Makefile
+++ b/arch/x86/platform/intel-mid/Makefile
@@ -1,5 +1,7 @@
 obj-$(CONFIG_X86_INTEL_MID) += intel-mid.o
-obj-$(CONFIG_X86_INTEL_MID)	+= intel_mid_vrtc.o
-obj-$(CONFIG_EARLY_PRINTK_INTEL_MID)	+= early_printk_intel_mid.o
+obj-$(CONFIG_X86_INTEL_MID) += intel_mid_vrtc.o
+obj-$(CONFIG_EARLY_PRINTK_INTEL_MID) += early_printk_intel_mid.o
 # SFI specific code
-obj-$(CONFIG_SFI) += sfi.o
+ifdef CONFIG_X86_INTEL_MID
+obj-$(CONFIG_SFI) += sfi.o device_libs/
+endif
diff --git a/arch/x86/platform/intel-mid/device_libs/Makefile b/arch/x86/platform/intel-mid/device_libs/Makefile
new file mode 100644
index 000000000000..097e7a7940d8
--- /dev/null
+++ b/arch/x86/platform/intel-mid/device_libs/Makefile
@@ -0,0 +1,22 @@
+# IPC Devices
+obj-y += platform_ipc.o
+obj-$(subst m,y,$(CONFIG_MFD_INTEL_MSIC)) += platform_msic.o
+obj-$(subst m,y,$(CONFIG_SND_MFLD_MACHINE)) += platform_msic_audio.o
+obj-$(subst m,y,$(CONFIG_GPIO_MSIC)) += platform_msic_gpio.o
+obj-$(subst m,y,$(CONFIG_MFD_INTEL_MSIC)) += platform_msic_ocd.o
+obj-$(subst m,y,$(CONFIG_MFD_INTEL_MSIC)) += platform_msic_battery.o
+obj-$(subst m,y,$(CONFIG_INTEL_MID_POWER_BUTTON)) += platform_msic_power_btn.o
+obj-$(subst m,y,$(CONFIG_GPIO_INTEL_PMIC)) += platform_pmic_gpio.o
+obj-$(subst m,y,$(CONFIG_INTEL_MFLD_THERMAL)) += platform_msic_thermal.o
+# I2C Devices
+obj-$(subst m,y,$(CONFIG_SENSORS_EMC1403)) += platform_emc1403.o
+obj-$(subst m,y,$(CONFIG_SENSORS_LIS3LV02D)) += platform_lis331.o
+obj-$(subst m,y,$(CONFIG_GPIO_PCA953X)) += platform_max7315.o
+obj-$(subst m,y,$(CONFIG_INPUT_MPU3050)) += platform_mpu3050.o
+obj-$(subst m,y,$(CONFIG_INPUT_BMA150)) += platform_bma023.o
+obj-$(subst m,y,$(CONFIG_GPIO_PCA953X)) += platform_tca6416.o
+obj-$(subst m,y,$(CONFIG_DRM_MEDFIELD)) += platform_tc35876x.o
+# SPI Devices
+obj-$(subst m,y,$(CONFIG_SERIAL_MRST_MAX3110)) += platform_max3111.o
+# MISC Devices
+obj-$(subst m,y,$(CONFIG_KEYBOARD_GPIO)) += platform_gpio_keys.o
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_bma023.c b/arch/x86/platform/intel-mid/device_libs/platform_bma023.c
new file mode 100644
index 000000000000..0ae7f2ae2296
--- /dev/null
+++ b/arch/x86/platform/intel-mid/device_libs/platform_bma023.c
@@ -0,0 +1,20 @@
+/*
+ * platform_bma023.c: bma023 platform data initilization file
+ *
+ * (C) Copyright 2013 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <asm/intel-mid.h>
+
+static const struct devs_id bma023_dev_id __initconst = {
+	.name = "bma023",
+	.type = SFI_DEV_TYPE_I2C,
+	.delay = 1,
+};
+
+sfi_device(bma023_dev_id);
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_emc1403.c b/arch/x86/platform/intel-mid/device_libs/platform_emc1403.c
new file mode 100644
index 000000000000..0d942c1d26d5
--- /dev/null
+++ b/arch/x86/platform/intel-mid/device_libs/platform_emc1403.c
@@ -0,0 +1,41 @@
+/*
+ * platform_emc1403.c: emc1403 platform data initilization file
+ *
+ * (C) Copyright 2013 Intel Corporation
+ * Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <linux/init.h>
+#include <linux/gpio.h>
+#include <linux/i2c.h>
+#include <asm/intel-mid.h>
+
+static void __init *emc1403_platform_data(void *info)
+{
+	static short intr2nd_pdata;
+	struct i2c_board_info *i2c_info = info;
+	int intr = get_gpio_by_name("thermal_int");
+	int intr2nd = get_gpio_by_name("thermal_alert");
+
+	if (intr == -1 || intr2nd == -1)
+		return NULL;
+
+	i2c_info->irq = intr + INTEL_MID_IRQ_OFFSET;
+	intr2nd_pdata = intr2nd + INTEL_MID_IRQ_OFFSET;
+
+	return &intr2nd_pdata;
+}
+
+static const struct devs_id emc1403_dev_id __initconst = {
+	.name = "emc1403",
+	.type = SFI_DEV_TYPE_I2C,
+	.delay = 1,
+	.get_platform_data = &emc1403_platform_data,
+};
+
+sfi_device(emc1403_dev_id);
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_gpio_keys.c b/arch/x86/platform/intel-mid/device_libs/platform_gpio_keys.c
new file mode 100644
index 000000000000..a013a4834bbe
--- /dev/null
+++ b/arch/x86/platform/intel-mid/device_libs/platform_gpio_keys.c
@@ -0,0 +1,83 @@
+/*
+ * platform_gpio_keys.c: gpio_keys platform data initilization file
+ *
+ * (C) Copyright 2013 Intel Corporation
+ * Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <linux/input.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/gpio.h>
+#include <linux/gpio_keys.h>
+#include <linux/platform_device.h>
+#include <asm/intel-mid.h>
+
+#define DEVICE_NAME "gpio-keys"
+
+/*
+ * we will search these buttons in SFI GPIO table (by name)
+ * and register them dynamically. Please add all possible
+ * buttons here, we will shrink them if no GPIO found.
+ */
+static struct gpio_keys_button gpio_button[] = {
+	{KEY_POWER,		-1, 1, "power_btn",	EV_KEY, 0, 3000},
+	{KEY_PROG1,		-1, 1, "prog_btn1",	EV_KEY, 0, 20},
+	{KEY_PROG2,		-1, 1, "prog_btn2",	EV_KEY, 0, 20},
+	{SW_LID,		-1, 1, "lid_switch",	EV_SW,  0, 20},
+	{KEY_VOLUMEUP,		-1, 1, "vol_up",	EV_KEY, 0, 20},
+	{KEY_VOLUMEDOWN,	-1, 1, "vol_down",	EV_KEY, 0, 20},
+	{KEY_CAMERA,		-1, 1, "camera_full",	EV_KEY, 0, 20},
+	{KEY_CAMERA_FOCUS,	-1, 1, "camera_half",	EV_KEY, 0, 20},
+	{SW_KEYPAD_SLIDE,	-1, 1, "MagSw1",	EV_SW,  0, 20},
+	{SW_KEYPAD_SLIDE,	-1, 1, "MagSw2",	EV_SW,  0, 20},
+};
+
+static struct gpio_keys_platform_data gpio_keys = {
+	.buttons	= gpio_button,
+	.rep		= 1,
+	.nbuttons	= -1, /* will fill it after search */
+};
+
+static struct platform_device pb_device = {
+	.name		= DEVICE_NAME,
+	.id		= -1,
+	.dev		= {
+		.platform_data	= &gpio_keys,
+	},
+};
+
+/*
+ * Shrink the non-existent buttons, register the gpio button
+ * device if there is some
+ */
+static int __init pb_keys_init(void)
+{
+	struct gpio_keys_button *gb = gpio_button;
+	int i, num, good = 0;
+
+	num = sizeof(gpio_button) / sizeof(struct gpio_keys_button);
+	for (i = 0; i < num; i++) {
+		gb[i].gpio = get_gpio_by_name(gb[i].desc);
+		pr_debug("info[%2d]: name = %s, gpio = %d\n", i, gb[i].desc,
+					gb[i].gpio);
+		if (gb[i].gpio == -1)
+			continue;
+
+		if (i != good)
+			gb[good] = gb[i];
+		good++;
+	}
+
+	if (good) {
+		gpio_keys.nbuttons = good;
+		return platform_device_register(&pb_device);
+	}
+	return 0;
+}
+late_initcall(pb_keys_init);
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_ipc.c b/arch/x86/platform/intel-mid/device_libs/platform_ipc.c
new file mode 100644
index 000000000000..a84b73d6c4a0
--- /dev/null
+++ b/arch/x86/platform/intel-mid/device_libs/platform_ipc.c
@@ -0,0 +1,68 @@
+/*
+ * platform_ipc.c: IPC platform library file
+ *
+ * (C) Copyright 2013 Intel Corporation
+ * Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/sfi.h>
+#include <linux/gpio.h>
+#include <asm/intel-mid.h>
+#include "platform_ipc.h"
+
+void __init ipc_device_handler(struct sfi_device_table_entry *pentry,
+				struct devs_id *dev)
+{
+	struct platform_device *pdev;
+	void *pdata = NULL;
+	static struct resource res __initdata = {
+		.name = "IRQ",
+		.flags = IORESOURCE_IRQ,
+	};
+
+	pr_debug("IPC bus, name = %16.16s, irq = 0x%2x\n",
+		pentry->name, pentry->irq);
+
+	/*
+	 * We need to call platform init of IPC devices to fill misc_pdata
+	 * structure. It will be used in msic_init for initialization.
+	 */
+	if (dev != NULL)
+		pdata = dev->get_platform_data(pentry);
+
+	/*
+	 * On Medfield the platform device creation is handled by the MSIC
+	 * MFD driver so we don't need to do it here.
+	 */
+	if (intel_mid_has_msic())
+		return;
+
+	pdev = platform_device_alloc(pentry->name, 0);
+	if (pdev == NULL) {
+		pr_err("out of memory for SFI platform device '%s'.\n",
+			pentry->name);
+		return;
+	}
+	res.start = pentry->irq;
+	platform_device_add_resources(pdev, &res, 1);
+
+	pdev->dev.platform_data = pdata;
+	intel_scu_device_register(pdev);
+}
+
+static const struct devs_id pmic_audio_dev_id __initconst = {
+	.name = "pmic_audio",
+	.type = SFI_DEV_TYPE_IPC,
+	.delay = 1,
+	.device_handler = &ipc_device_handler,
+};
+
+sfi_device(pmic_audio_dev_id);
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_ipc.h b/arch/x86/platform/intel-mid/device_libs/platform_ipc.h
new file mode 100644
index 000000000000..8f568dd79605
--- /dev/null
+++ b/arch/x86/platform/intel-mid/device_libs/platform_ipc.h
@@ -0,0 +1,17 @@
+/*
+ * platform_ipc.h: IPC platform library header file
+ *
+ * (C) Copyright 2013 Intel Corporation
+ * Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+#ifndef _PLATFORM_IPC_H_
+#define _PLATFORM_IPC_H_
+
+extern void __init ipc_device_handler(struct sfi_device_table_entry *pentry,
+			struct devs_id *dev) __attribute__((weak));
+#endif
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_lis331.c b/arch/x86/platform/intel-mid/device_libs/platform_lis331.c
new file mode 100644
index 000000000000..15278c11f714
--- /dev/null
+++ b/arch/x86/platform/intel-mid/device_libs/platform_lis331.c
@@ -0,0 +1,39 @@
+/*
+ * platform_lis331.c:  lis331 platform data initilization file
+ *
+ * (C) Copyright 2013 Intel Corporation
+ * Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <linux/i2c.h>
+#include <linux/gpio.h>
+#include <asm/intel-mid.h>
+
+static void __init *lis331dl_platform_data(void *info)
+{
+	static short intr2nd_pdata;
+	struct i2c_board_info *i2c_info = info;
+	int intr = get_gpio_by_name("accel_int");
+	int intr2nd = get_gpio_by_name("accel_2");
+
+	if (intr == -1 || intr2nd == -1)
+		return NULL;
+
+	i2c_info->irq = intr + INTEL_MID_IRQ_OFFSET;
+	intr2nd_pdata = intr2nd + INTEL_MID_IRQ_OFFSET;
+
+	return &intr2nd_pdata;
+}
+
+static const struct devs_id lis331dl_dev_id __initconst = {
+	.name = "i2c_accel",
+	.type = SFI_DEV_TYPE_I2C,
+	.get_platform_data = &lis331dl_platform_data,
+};
+
+sfi_device(lis331dl_dev_id);
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_max3111.c b/arch/x86/platform/intel-mid/device_libs/platform_max3111.c
new file mode 100644
index 000000000000..afd1df94e0e5
--- /dev/null
+++ b/arch/x86/platform/intel-mid/device_libs/platform_max3111.c
@@ -0,0 +1,35 @@
+/*
+ * platform_max3111.c: max3111 platform data initilization file
+ *
+ * (C) Copyright 2013 Intel Corporation
+ * Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <linux/gpio.h>
+#include <linux/spi/spi.h>
+#include <asm/intel-mid.h>
+
+static void __init *max3111_platform_data(void *info)
+{
+	struct spi_board_info *spi_info = info;
+	int intr = get_gpio_by_name("max3111_int");
+
+	spi_info->mode = SPI_MODE_0;
+	if (intr == -1)
+		return NULL;
+	spi_info->irq = intr + INTEL_MID_IRQ_OFFSET;
+	return NULL;
+}
+
+static const struct devs_id max3111_dev_id __initconst = {
+	.name = "spi_max3111",
+	.type = SFI_DEV_TYPE_SPI,
+	.get_platform_data = &max3111_platform_data,
+};
+
+sfi_device(max3111_dev_id);
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_max7315.c b/arch/x86/platform/intel-mid/device_libs/platform_max7315.c
new file mode 100644
index 000000000000..94ade10024ae
--- /dev/null
+++ b/arch/x86/platform/intel-mid/device_libs/platform_max7315.c
@@ -0,0 +1,79 @@
+/*
+ * platform_max7315.c: max7315 platform data initilization file
+ *
+ * (C) Copyright 2013 Intel Corporation
+ * Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <linux/init.h>
+#include <linux/gpio.h>
+#include <linux/i2c.h>
+#include <linux/platform_data/pca953x.h>
+#include <asm/intel-mid.h>
+
+#define MAX7315_NUM 2
+
+static void __init *max7315_platform_data(void *info)
+{
+	static struct pca953x_platform_data max7315_pdata[MAX7315_NUM];
+	static int nr;
+	struct pca953x_platform_data *max7315 = &max7315_pdata[nr];
+	struct i2c_board_info *i2c_info = info;
+	int gpio_base, intr;
+	char base_pin_name[SFI_NAME_LEN + 1];
+	char intr_pin_name[SFI_NAME_LEN + 1];
+
+	if (nr == MAX7315_NUM) {
+		pr_err("too many max7315s, we only support %d\n",
+				MAX7315_NUM);
+		return NULL;
+	}
+	/* we have several max7315 on the board, we only need load several
+	 * instances of the same pca953x driver to cover them
+	 */
+	strcpy(i2c_info->type, "max7315");
+	if (nr++) {
+		sprintf(base_pin_name, "max7315_%d_base", nr);
+		sprintf(intr_pin_name, "max7315_%d_int", nr);
+	} else {
+		strcpy(base_pin_name, "max7315_base");
+		strcpy(intr_pin_name, "max7315_int");
+	}
+
+	gpio_base = get_gpio_by_name(base_pin_name);
+	intr = get_gpio_by_name(intr_pin_name);
+
+	if (gpio_base == -1)
+		return NULL;
+	max7315->gpio_base = gpio_base;
+	if (intr != -1) {
+		i2c_info->irq = intr + INTEL_MID_IRQ_OFFSET;
+		max7315->irq_base = gpio_base + INTEL_MID_IRQ_OFFSET;
+	} else {
+		i2c_info->irq = -1;
+		max7315->irq_base = -1;
+	}
+	return max7315;
+}
+
+static const struct devs_id max7315_dev_id __initconst = {
+	.name = "i2c_max7315",
+	.type = SFI_DEV_TYPE_I2C,
+	.delay = 1,
+	.get_platform_data = &max7315_platform_data,
+};
+
+static const struct devs_id max7315_2_dev_id __initconst = {
+	.name = "i2c_max7315_2",
+	.type = SFI_DEV_TYPE_I2C,
+	.delay = 1,
+	.get_platform_data = &max7315_platform_data,
+};
+
+sfi_device(max7315_dev_id);
+sfi_device(max7315_2_dev_id);
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_mpu3050.c b/arch/x86/platform/intel-mid/device_libs/platform_mpu3050.c
new file mode 100644
index 000000000000..dd28d63c84fb
--- /dev/null
+++ b/arch/x86/platform/intel-mid/device_libs/platform_mpu3050.c
@@ -0,0 +1,36 @@
+/*
+ * platform_mpu3050.c: mpu3050 platform data initilization file
+ *
+ * (C) Copyright 2013 Intel Corporation
+ * Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <linux/gpio.h>
+#include <linux/i2c.h>
+#include <asm/intel-mid.h>
+
+static void *mpu3050_platform_data(void *info)
+{
+	struct i2c_board_info *i2c_info = info;
+	int intr = get_gpio_by_name("mpu3050_int");
+
+	if (intr == -1)
+		return NULL;
+
+	i2c_info->irq = intr + INTEL_MID_IRQ_OFFSET;
+	return NULL;
+}
+
+static const struct devs_id mpu3050_dev_id __initconst = {
+	.name = "mpu3050",
+	.type = SFI_DEV_TYPE_I2C,
+	.delay = 1,
+	.get_platform_data = &mpu3050_platform_data,
+};
+
+sfi_device(mpu3050_dev_id);
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_msic.c b/arch/x86/platform/intel-mid/device_libs/platform_msic.c
new file mode 100644
index 000000000000..9f4a775a69d6
--- /dev/null
+++ b/arch/x86/platform/intel-mid/device_libs/platform_msic.c
@@ -0,0 +1,87 @@
+/*
+ * platform_msic.c: MSIC platform data initilization file
+ *
+ * (C) Copyright 2013 Intel Corporation
+ * Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/scatterlist.h>
+#include <linux/init.h>
+#include <linux/sfi.h>
+#include <linux/mfd/intel_msic.h>
+#include <asm/intel_scu_ipc.h>
+#include <asm/intel-mid.h>
+#include "platform_msic.h"
+
+struct intel_msic_platform_data msic_pdata;
+
+static struct resource msic_resources[] = {
+	{
+		.start	= INTEL_MSIC_IRQ_PHYS_BASE,
+		.end	= INTEL_MSIC_IRQ_PHYS_BASE + 64 - 1,
+		.flags	= IORESOURCE_MEM,
+	},
+};
+
+static struct platform_device msic_device = {
+	.name		= "intel_msic",
+	.id		= -1,
+	.dev		= {
+		.platform_data	= &msic_pdata,
+	},
+	.num_resources	= ARRAY_SIZE(msic_resources),
+	.resource	= msic_resources,
+};
+
+static int msic_scu_status_change(struct notifier_block *nb,
+				  unsigned long code, void *data)
+{
+	if (code == SCU_DOWN) {
+		platform_device_unregister(&msic_device);
+		return 0;
+	}
+
+	return platform_device_register(&msic_device);
+}
+
+static int __init msic_init(void)
+{
+	static struct notifier_block msic_scu_notifier = {
+		.notifier_call	= msic_scu_status_change,
+	};
+
+	/*
+	 * We need to be sure that the SCU IPC is ready before MSIC device
+	 * can be registered.
+	 */
+	if (intel_mid_has_msic())
+		intel_scu_notifier_add(&msic_scu_notifier);
+
+	return 0;
+}
+arch_initcall(msic_init);
+
+/*
+ * msic_generic_platform_data - sets generic platform data for the block
+ * @info: pointer to the SFI device table entry for this block
+ * @block: MSIC block
+ *
+ * Function sets IRQ number from the SFI table entry for given device to
+ * the MSIC platform data.
+ */
+void *msic_generic_platform_data(void *info, enum intel_msic_block block)
+{
+	struct sfi_device_table_entry *entry = info;
+
+	BUG_ON(block < 0 || block >= INTEL_MSIC_BLOCK_LAST);
+	msic_pdata.irq[block] = entry->irq;
+
+	return NULL;
+}
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_msic.h b/arch/x86/platform/intel-mid/device_libs/platform_msic.h
new file mode 100644
index 000000000000..917eb56d77da
--- /dev/null
+++ b/arch/x86/platform/intel-mid/device_libs/platform_msic.h
@@ -0,0 +1,19 @@
+/*
+ * platform_msic.h: MSIC platform data header file
+ *
+ * (C) Copyright 2013 Intel Corporation
+ * Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+#ifndef _PLATFORM_MSIC_H_
+#define _PLATFORM_MSIC_H_
+
+extern struct intel_msic_platform_data msic_pdata;
+
+extern void *msic_generic_platform_data(void *info,
+			enum intel_msic_block block) __attribute__((weak));
+#endif
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_msic_audio.c b/arch/x86/platform/intel-mid/device_libs/platform_msic_audio.c
new file mode 100644
index 000000000000..29629397d2b3
--- /dev/null
+++ b/arch/x86/platform/intel-mid/device_libs/platform_msic_audio.c
@@ -0,0 +1,47 @@
+/*
+ * platform_msic_audio.c: MSIC audio platform data initilization file
+ *
+ * (C) Copyright 2013 Intel Corporation
+ * Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/scatterlist.h>
+#include <linux/init.h>
+#include <linux/sfi.h>
+#include <linux/platform_device.h>
+#include <linux/mfd/intel_msic.h>
+#include <asm/intel-mid.h>
+
+#include "platform_msic.h"
+#include "platform_ipc.h"
+
+static void *msic_audio_platform_data(void *info)
+{
+	struct platform_device *pdev;
+
+	pdev = platform_device_register_simple("sst-platform", -1, NULL, 0);
+
+	if (IS_ERR(pdev)) {
+		pr_err("failed to create audio platform device\n");
+		return NULL;
+	}
+
+	return msic_generic_platform_data(info, INTEL_MSIC_BLOCK_AUDIO);
+}
+
+static const struct devs_id msic_audio_dev_id __initconst = {
+	.name = "msic_audio",
+	.type = SFI_DEV_TYPE_IPC,
+	.delay = 1,
+	.get_platform_data = &msic_audio_platform_data,
+	.device_handler = &ipc_device_handler,
+};
+
+sfi_device(msic_audio_dev_id);
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_msic_battery.c b/arch/x86/platform/intel-mid/device_libs/platform_msic_battery.c
new file mode 100644
index 000000000000..f446c33df1a8
--- /dev/null
+++ b/arch/x86/platform/intel-mid/device_libs/platform_msic_battery.c
@@ -0,0 +1,37 @@
+/*
+ * platform_msic_battery.c: MSIC battery platform data initilization file
+ *
+ * (C) Copyright 2013 Intel Corporation
+ * Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/scatterlist.h>
+#include <linux/init.h>
+#include <linux/sfi.h>
+#include <linux/mfd/intel_msic.h>
+#include <asm/intel-mid.h>
+
+#include "platform_msic.h"
+#include "platform_ipc.h"
+
+static void __init *msic_battery_platform_data(void *info)
+{
+	return msic_generic_platform_data(info, INTEL_MSIC_BLOCK_BATTERY);
+}
+
+static const struct devs_id msic_battery_dev_id __initconst = {
+	.name = "msic_battery",
+	.type = SFI_DEV_TYPE_IPC,
+	.delay = 1,
+	.get_platform_data = &msic_battery_platform_data,
+	.device_handler = &ipc_device_handler,
+};
+
+sfi_device(msic_battery_dev_id);
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_msic_gpio.c b/arch/x86/platform/intel-mid/device_libs/platform_msic_gpio.c
new file mode 100644
index 000000000000..2a4f7b1dd917
--- /dev/null
+++ b/arch/x86/platform/intel-mid/device_libs/platform_msic_gpio.c
@@ -0,0 +1,48 @@
+/*
+ * platform_msic_gpio.c: MSIC GPIO platform data initilization file
+ *
+ * (C) Copyright 2013 Intel Corporation
+ * Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/scatterlist.h>
+#include <linux/sfi.h>
+#include <linux/init.h>
+#include <linux/gpio.h>
+#include <linux/mfd/intel_msic.h>
+#include <asm/intel-mid.h>
+
+#include "platform_msic.h"
+#include "platform_ipc.h"
+
+static void __init *msic_gpio_platform_data(void *info)
+{
+	static struct intel_msic_gpio_pdata msic_gpio_pdata;
+
+	int gpio = get_gpio_by_name("msic_gpio_base");
+
+	if (gpio < 0)
+		return NULL;
+
+	msic_gpio_pdata.gpio_base = gpio;
+	msic_pdata.gpio = &msic_gpio_pdata;
+
+	return msic_generic_platform_data(info, INTEL_MSIC_BLOCK_GPIO);
+}
+
+static const struct devs_id msic_gpio_dev_id __initconst = {
+	.name = "msic_gpio",
+	.type = SFI_DEV_TYPE_IPC,
+	.delay = 1,
+	.get_platform_data = &msic_gpio_platform_data,
+	.device_handler = &ipc_device_handler,
+};
+
+sfi_device(msic_gpio_dev_id);
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_msic_ocd.c b/arch/x86/platform/intel-mid/device_libs/platform_msic_ocd.c
new file mode 100644
index 000000000000..6497111ddb54
--- /dev/null
+++ b/arch/x86/platform/intel-mid/device_libs/platform_msic_ocd.c
@@ -0,0 +1,49 @@
+/*
+ * platform_msic_ocd.c: MSIC OCD platform data initilization file
+ *
+ * (C) Copyright 2013 Intel Corporation
+ * Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/scatterlist.h>
+#include <linux/sfi.h>
+#include <linux/init.h>
+#include <linux/gpio.h>
+#include <linux/mfd/intel_msic.h>
+#include <asm/intel-mid.h>
+
+#include "platform_msic.h"
+#include "platform_ipc.h"
+
+static void __init *msic_ocd_platform_data(void *info)
+{
+	static struct intel_msic_ocd_pdata msic_ocd_pdata;
+	int gpio;
+
+	gpio = get_gpio_by_name("ocd_gpio");
+
+	if (gpio < 0)
+		return NULL;
+
+	msic_ocd_pdata.gpio = gpio;
+	msic_pdata.ocd = &msic_ocd_pdata;
+
+	return msic_generic_platform_data(info, INTEL_MSIC_BLOCK_OCD);
+}
+
+static const struct devs_id msic_ocd_dev_id __initconst = {
+	.name = "msic_ocd",
+	.type = SFI_DEV_TYPE_IPC,
+	.delay = 1,
+	.get_platform_data = &msic_ocd_platform_data,
+	.device_handler = &ipc_device_handler,
+};
+
+sfi_device(msic_ocd_dev_id);
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_msic_power_btn.c b/arch/x86/platform/intel-mid/device_libs/platform_msic_power_btn.c
new file mode 100644
index 000000000000..83a3459bc337
--- /dev/null
+++ b/arch/x86/platform/intel-mid/device_libs/platform_msic_power_btn.c
@@ -0,0 +1,36 @@
+/*
+ * platform_msic_power_btn.c: MSIC power btn platform data initilization file
+ *
+ * (C) Copyright 2013 Intel Corporation
+ * Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/scatterlist.h>
+#include <linux/sfi.h>
+#include <linux/init.h>
+#include <linux/mfd/intel_msic.h>
+#include <asm/intel-mid.h>
+
+#include "platform_msic.h"
+#include "platform_ipc.h"
+
+static void __init *msic_power_btn_platform_data(void *info)
+{
+	return msic_generic_platform_data(info, INTEL_MSIC_BLOCK_POWER_BTN);
+}
+
+static const struct devs_id msic_power_btn_dev_id __initconst = {
+	.name = "msic_power_btn",
+	.type = SFI_DEV_TYPE_IPC,
+	.delay = 1,
+	.get_platform_data = &msic_power_btn_platform_data,
+	.device_handler = &ipc_device_handler,
+};
+
+sfi_device(msic_power_btn_dev_id);
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_msic_thermal.c b/arch/x86/platform/intel-mid/device_libs/platform_msic_thermal.c
new file mode 100644
index 000000000000..a351878b96bc
--- /dev/null
+++ b/arch/x86/platform/intel-mid/device_libs/platform_msic_thermal.c
@@ -0,0 +1,37 @@
+/*
+ * platform_msic_thermal.c: msic_thermal platform data initilization file
+ *
+ * (C) Copyright 2013 Intel Corporation
+ * Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <linux/input.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/gpio.h>
+#include <linux/platform_device.h>
+#include <linux/mfd/intel_msic.h>
+#include <asm/intel-mid.h>
+
+#include "platform_msic.h"
+#include "platform_ipc.h"
+
+static void __init *msic_thermal_platform_data(void *info)
+{
+	return msic_generic_platform_data(info, INTEL_MSIC_BLOCK_THERMAL);
+}
+
+static const struct devs_id msic_thermal_dev_id __initconst = {
+	.name = "msic_thermal",
+	.type = SFI_DEV_TYPE_IPC,
+	.delay = 1,
+	.get_platform_data = &msic_thermal_platform_data,
+	.device_handler = &ipc_device_handler,
+};
+
+sfi_device(msic_thermal_dev_id);
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_pmic_gpio.c b/arch/x86/platform/intel-mid/device_libs/platform_pmic_gpio.c
new file mode 100644
index 000000000000..d87182a09263
--- /dev/null
+++ b/arch/x86/platform/intel-mid/device_libs/platform_pmic_gpio.c
@@ -0,0 +1,54 @@
+/*
+ * platform_pmic_gpio.c: PMIC GPIO platform data initilization file
+ *
+ * (C) Copyright 2013 Intel Corporation
+ * Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/scatterlist.h>
+#include <linux/gpio.h>
+#include <linux/init.h>
+#include <linux/sfi.h>
+#include <linux/intel_pmic_gpio.h>
+#include <asm/intel-mid.h>
+
+#include "platform_ipc.h"
+
+static void __init *pmic_gpio_platform_data(void *info)
+{
+	static struct intel_pmic_gpio_platform_data pmic_gpio_pdata;
+	int gpio_base = get_gpio_by_name("pmic_gpio_base");
+
+	if (gpio_base == -1)
+		gpio_base = 64;
+	pmic_gpio_pdata.gpio_base = gpio_base;
+	pmic_gpio_pdata.irq_base = gpio_base + INTEL_MID_IRQ_OFFSET;
+	pmic_gpio_pdata.gpiointr = 0xffffeff8;
+
+	return &pmic_gpio_pdata;
+}
+
+static const struct devs_id pmic_gpio_spi_dev_id __initconst = {
+	.name = "pmic_gpio",
+	.type = SFI_DEV_TYPE_SPI,
+	.delay = 1,
+	.get_platform_data = &pmic_gpio_platform_data,
+};
+
+static const struct devs_id pmic_gpio_ipc_dev_id __initconst = {
+	.name = "pmic_gpio",
+	.type = SFI_DEV_TYPE_IPC,
+	.delay = 1,
+	.get_platform_data = &pmic_gpio_platform_data,
+	.device_handler = &ipc_device_handler
+};
+
+sfi_device(pmic_gpio_spi_dev_id);
+sfi_device(pmic_gpio_ipc_dev_id);
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_tc35876x.c b/arch/x86/platform/intel-mid/device_libs/platform_tc35876x.c
new file mode 100644
index 000000000000..740fc757050c
--- /dev/null
+++ b/arch/x86/platform/intel-mid/device_libs/platform_tc35876x.c
@@ -0,0 +1,36 @@
+/*
+ * platform_tc35876x.c: tc35876x platform data initilization file
+ *
+ * (C) Copyright 2013 Intel Corporation
+ * Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <linux/gpio.h>
+#include <linux/i2c/tc35876x.h>
+#include <asm/intel-mid.h>
+
+/*tc35876x DSI_LVDS bridge chip and panel platform data*/
+static void *tc35876x_platform_data(void *data)
+{
+	static struct tc35876x_platform_data pdata;
+
+	/* gpio pins set to -1 will not be used by the driver */
+	pdata.gpio_bridge_reset = get_gpio_by_name("LCMB_RXEN");
+	pdata.gpio_panel_bl_en = get_gpio_by_name("6S6P_BL_EN");
+	pdata.gpio_panel_vadd = get_gpio_by_name("EN_VREG_LCD_V3P3");
+
+	return &pdata;
+}
+
+static const struct devs_id tc35876x_dev_id __initconst = {
+	.name = "i2c_disp_brig",
+	.type = SFI_DEV_TYPE_I2C,
+	.get_platform_data = &tc35876x_platform_data,
+};
+
+sfi_device(tc35876x_dev_id);
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_tca6416.c b/arch/x86/platform/intel-mid/device_libs/platform_tca6416.c
new file mode 100644
index 000000000000..22881c9a6737
--- /dev/null
+++ b/arch/x86/platform/intel-mid/device_libs/platform_tca6416.c
@@ -0,0 +1,57 @@
+/*
+ * platform_tca6416.c: tca6416 platform data initilization file
+ *
+ * (C) Copyright 2013 Intel Corporation
+ * Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <linux/platform_data/pca953x.h>
+#include <linux/i2c.h>
+#include <linux/gpio.h>
+#include <asm/intel-mid.h>
+
+#define TCA6416_NAME	"tca6416"
+#define TCA6416_BASE	"tca6416_base"
+#define TCA6416_INTR	"tca6416_int"
+
+static void *tca6416_platform_data(void *info)
+{
+	static struct pca953x_platform_data tca6416;
+	struct i2c_board_info *i2c_info = info;
+	int gpio_base, intr;
+	char base_pin_name[SFI_NAME_LEN + 1];
+	char intr_pin_name[SFI_NAME_LEN + 1];
+
+	strcpy(i2c_info->type, TCA6416_NAME);
+	strcpy(base_pin_name, TCA6416_BASE);
+	strcpy(intr_pin_name, TCA6416_INTR);
+
+	gpio_base = get_gpio_by_name(base_pin_name);
+	intr = get_gpio_by_name(intr_pin_name);
+
+	if (gpio_base == -1)
+		return NULL;
+	tca6416.gpio_base = gpio_base;
+	if (intr != -1) {
+		i2c_info->irq = intr + INTEL_MID_IRQ_OFFSET;
+		tca6416.irq_base = gpio_base + INTEL_MID_IRQ_OFFSET;
+	} else {
+		i2c_info->irq = -1;
+		tca6416.irq_base = -1;
+	}
+	return &tca6416;
+}
+
+static const struct devs_id tca6416_dev_id __initconst = {
+	.name = "tca6416",
+	.type = SFI_DEV_TYPE_I2C,
+	.delay = 1,
+	.get_platform_data = &tca6416_platform_data,
+};
+
+sfi_device(tca6416_dev_id);
diff --git a/arch/x86/platform/intel-mid/intel-mid.c b/arch/x86/platform/intel-mid/intel-mid.c
index 40915698b9b7..523a1c8f7f07 100644
--- a/arch/x86/platform/intel-mid/intel-mid.c
+++ b/arch/x86/platform/intel-mid/intel-mid.c
@@ -61,8 +61,6 @@ enum intel_mid_timer_options intel_mid_timer_options;
 enum intel_mid_cpu_type __intel_mid_cpu_chip;
 EXPORT_SYMBOL_GPL(__intel_mid_cpu_chip);
 
-static void __init ipc_device_handler(struct sfi_device_table_entry *pentry,
-			struct devs_id *dev);
 static void intel_mid_power_off(void)
 {
 }
@@ -213,420 +211,3 @@ static inline int __init setup_x86_intel_mid_timer(char *arg)
 }
 __setup("x86_intel_mid_timer=", setup_x86_intel_mid_timer);
 
-/* the offset for the mapping of global gpio pin to irq */
-#define INTEL_MID_IRQ_OFFSET 0x100
-
-static void __init *pmic_gpio_platform_data(void *info)
-{
-	static struct intel_pmic_gpio_platform_data pmic_gpio_pdata;
-	int gpio_base = get_gpio_by_name("pmic_gpio_base");
-
-	if (gpio_base == -1)
-		gpio_base = 64;
-	pmic_gpio_pdata.gpio_base = gpio_base;
-	pmic_gpio_pdata.irq_base = gpio_base + INTEL_MID_IRQ_OFFSET;
-	pmic_gpio_pdata.gpiointr = 0xffffeff8;
-
-	return &pmic_gpio_pdata;
-}
-
-static void __init *max3111_platform_data(void *info)
-{
-	struct spi_board_info *spi_info = info;
-	int intr = get_gpio_by_name("max3111_int");
-
-	spi_info->mode = SPI_MODE_0;
-	if (intr == -1)
-		return NULL;
-	spi_info->irq = intr + INTEL_MID_IRQ_OFFSET;
-	return NULL;
-}
-
-/* we have multiple max7315 on the board ... */
-#define MAX7315_NUM 2
-static void __init *max7315_platform_data(void *info)
-{
-	static struct pca953x_platform_data max7315_pdata[MAX7315_NUM];
-	static int nr;
-	struct pca953x_platform_data *max7315 = &max7315_pdata[nr];
-	struct i2c_board_info *i2c_info = info;
-	int gpio_base, intr;
-	char base_pin_name[SFI_NAME_LEN + 1];
-	char intr_pin_name[SFI_NAME_LEN + 1];
-
-	if (nr == MAX7315_NUM) {
-		pr_err("too many max7315s, we only support %d\n",
-				MAX7315_NUM);
-		return NULL;
-	}
-	/* we have several max7315 on the board, we only need load several
-	 * instances of the same pca953x driver to cover them
-	 */
-	strcpy(i2c_info->type, "max7315");
-	if (nr++) {
-		sprintf(base_pin_name, "max7315_%d_base", nr);
-		sprintf(intr_pin_name, "max7315_%d_int", nr);
-	} else {
-		strcpy(base_pin_name, "max7315_base");
-		strcpy(intr_pin_name, "max7315_int");
-	}
-
-	gpio_base = get_gpio_by_name(base_pin_name);
-	intr = get_gpio_by_name(intr_pin_name);
-
-	if (gpio_base == -1)
-		return NULL;
-	max7315->gpio_base = gpio_base;
-	if (intr != -1) {
-		i2c_info->irq = intr + INTEL_MID_IRQ_OFFSET;
-		max7315->irq_base = gpio_base + INTEL_MID_IRQ_OFFSET;
-	} else {
-		i2c_info->irq = -1;
-		max7315->irq_base = -1;
-	}
-	return max7315;
-}
-
-static void *tca6416_platform_data(void *info)
-{
-	static struct pca953x_platform_data tca6416;
-	struct i2c_board_info *i2c_info = info;
-	int gpio_base, intr;
-	char base_pin_name[SFI_NAME_LEN + 1];
-	char intr_pin_name[SFI_NAME_LEN + 1];
-
-	strcpy(i2c_info->type, "tca6416");
-	strcpy(base_pin_name, "tca6416_base");
-	strcpy(intr_pin_name, "tca6416_int");
-
-	gpio_base = get_gpio_by_name(base_pin_name);
-	intr = get_gpio_by_name(intr_pin_name);
-
-	if (gpio_base == -1)
-		return NULL;
-	tca6416.gpio_base = gpio_base;
-	if (intr != -1) {
-		i2c_info->irq = intr + INTEL_MID_IRQ_OFFSET;
-		tca6416.irq_base = gpio_base + INTEL_MID_IRQ_OFFSET;
-	} else {
-		i2c_info->irq = -1;
-		tca6416.irq_base = -1;
-	}
-	return &tca6416;
-}
-
-static void *mpu3050_platform_data(void *info)
-{
-	struct i2c_board_info *i2c_info = info;
-	int intr = get_gpio_by_name("mpu3050_int");
-
-	if (intr == -1)
-		return NULL;
-
-	i2c_info->irq = intr + INTEL_MID_IRQ_OFFSET;
-	return NULL;
-}
-
-static void __init *emc1403_platform_data(void *info)
-{
-	static short intr2nd_pdata;
-	struct i2c_board_info *i2c_info = info;
-	int intr = get_gpio_by_name("thermal_int");
-	int intr2nd = get_gpio_by_name("thermal_alert");
-
-	if (intr == -1 || intr2nd == -1)
-		return NULL;
-
-	i2c_info->irq = intr + INTEL_MID_IRQ_OFFSET;
-	intr2nd_pdata = intr2nd + INTEL_MID_IRQ_OFFSET;
-
-	return &intr2nd_pdata;
-}
-
-static void __init *lis331dl_platform_data(void *info)
-{
-	static short intr2nd_pdata;
-	struct i2c_board_info *i2c_info = info;
-	int intr = get_gpio_by_name("accel_int");
-	int intr2nd = get_gpio_by_name("accel_2");
-
-	if (intr == -1 || intr2nd == -1)
-		return NULL;
-
-	i2c_info->irq = intr + INTEL_MID_IRQ_OFFSET;
-	intr2nd_pdata = intr2nd + INTEL_MID_IRQ_OFFSET;
-
-	return &intr2nd_pdata;
-}
-
-static void __init *no_platform_data(void *info)
-{
-	return NULL;
-}
-
-static struct resource msic_resources[] = {
-	{
-		.start	= INTEL_MSIC_IRQ_PHYS_BASE,
-		.end	= INTEL_MSIC_IRQ_PHYS_BASE + 64 - 1,
-		.flags	= IORESOURCE_MEM,
-	},
-};
-
-static struct intel_msic_platform_data msic_pdata;
-
-static struct platform_device msic_device = {
-	.name		= "intel_msic",
-	.id		= -1,
-	.dev		= {
-		.platform_data	= &msic_pdata,
-	},
-	.num_resources	= ARRAY_SIZE(msic_resources),
-	.resource	= msic_resources,
-};
-
-static inline bool intel_mid_has_msic(void)
-{
-	return intel_mid_identify_cpu() == INTEL_MID_CPU_CHIP_PENWELL;
-}
-
-static int msic_scu_status_change(struct notifier_block *nb,
-				  unsigned long code, void *data)
-{
-	if (code == SCU_DOWN) {
-		platform_device_unregister(&msic_device);
-		return 0;
-	}
-
-	return platform_device_register(&msic_device);
-}
-
-static int __init msic_init(void)
-{
-	static struct notifier_block msic_scu_notifier = {
-		.notifier_call	= msic_scu_status_change,
-	};
-
-	/*
-	 * We need to be sure that the SCU IPC is ready before MSIC device
-	 * can be registered.
-	 */
-	if (intel_mid_has_msic())
-		intel_scu_notifier_add(&msic_scu_notifier);
-
-	return 0;
-}
-arch_initcall(msic_init);
-
-/*
- * msic_generic_platform_data - sets generic platform data for the block
- * @info: pointer to the SFI device table entry for this block
- * @block: MSIC block
- *
- * Function sets IRQ number from the SFI table entry for given device to
- * the MSIC platform data.
- */
-static void *msic_generic_platform_data(void *info, enum intel_msic_block block)
-{
-	struct sfi_device_table_entry *entry = info;
-
-	BUG_ON(block < 0 || block >= INTEL_MSIC_BLOCK_LAST);
-	msic_pdata.irq[block] = entry->irq;
-
-	return no_platform_data(info);
-}
-
-static void *msic_battery_platform_data(void *info)
-{
-	return msic_generic_platform_data(info, INTEL_MSIC_BLOCK_BATTERY);
-}
-
-static void *msic_gpio_platform_data(void *info)
-{
-	static struct intel_msic_gpio_pdata pdata;
-	int gpio = get_gpio_by_name("msic_gpio_base");
-
-	if (gpio < 0)
-		return NULL;
-
-	pdata.gpio_base = gpio;
-	msic_pdata.gpio = &pdata;
-
-	return msic_generic_platform_data(info, INTEL_MSIC_BLOCK_GPIO);
-}
-
-static void *msic_audio_platform_data(void *info)
-{
-	struct platform_device *pdev;
-
-	pdev = platform_device_register_simple("sst-platform", -1, NULL, 0);
-	if (IS_ERR(pdev)) {
-		pr_err("failed to create audio platform device\n");
-		return NULL;
-	}
-
-	return msic_generic_platform_data(info, INTEL_MSIC_BLOCK_AUDIO);
-}
-
-static void *msic_power_btn_platform_data(void *info)
-{
-	return msic_generic_platform_data(info, INTEL_MSIC_BLOCK_POWER_BTN);
-}
-
-static void *msic_ocd_platform_data(void *info)
-{
-	static struct intel_msic_ocd_pdata pdata;
-	int gpio = get_gpio_by_name("ocd_gpio");
-
-	if (gpio < 0)
-		return NULL;
-
-	pdata.gpio = gpio;
-	msic_pdata.ocd = &pdata;
-
-	return msic_generic_platform_data(info, INTEL_MSIC_BLOCK_OCD);
-}
-
-static void *msic_thermal_platform_data(void *info)
-{
-	return msic_generic_platform_data(info, INTEL_MSIC_BLOCK_THERMAL);
-}
-
-/* tc35876x DSI-LVDS bridge chip and panel platform data */
-static void *tc35876x_platform_data(void *data)
-{
-	static struct tc35876x_platform_data pdata;
-
-	/* gpio pins set to -1 will not be used by the driver */
-	pdata.gpio_bridge_reset = get_gpio_by_name("LCMB_RXEN");
-	pdata.gpio_panel_bl_en = get_gpio_by_name("6S6P_BL_EN");
-	pdata.gpio_panel_vadd = get_gpio_by_name("EN_VREG_LCD_V3P3");
-
-	return &pdata;
-}
-
-static const struct devs_id __initconst device_ids[] = {
-	{"bma023", SFI_DEV_TYPE_I2C, 1, &no_platform_data, NULL},
-	{"pmic_gpio", SFI_DEV_TYPE_SPI, 1, &pmic_gpio_platform_data, NULL},
-	{"pmic_gpio", SFI_DEV_TYPE_IPC, 1, &pmic_gpio_platform_data, &ipc_device_handler},
-	{"spi_max3111", SFI_DEV_TYPE_SPI, 0, &max3111_platform_data, NULL},
-	{"i2c_max7315", SFI_DEV_TYPE_I2C, 1, &max7315_platform_data, NULL},
-	{"i2c_max7315_2", SFI_DEV_TYPE_I2C, 1, &max7315_platform_data, NULL},
-	{"tca6416", SFI_DEV_TYPE_I2C, 1, &tca6416_platform_data, NULL},
-	{"emc1403", SFI_DEV_TYPE_I2C, 1, &emc1403_platform_data, NULL},
-	{"i2c_accel", SFI_DEV_TYPE_I2C, 0, &lis331dl_platform_data, NULL},
-	{"pmic_audio", SFI_DEV_TYPE_IPC, 1, &no_platform_data, &ipc_device_handler},
-	{"mpu3050", SFI_DEV_TYPE_I2C, 1, &mpu3050_platform_data, NULL},
-	{"i2c_disp_brig", SFI_DEV_TYPE_I2C, 0, &tc35876x_platform_data, NULL},
-
-	/* MSIC subdevices */
-	{"msic_battery", SFI_DEV_TYPE_IPC, 1, &msic_battery_platform_data, &ipc_device_handler},
-	{"msic_gpio", SFI_DEV_TYPE_IPC, 1, &msic_gpio_platform_data, &ipc_device_handler},
-	{"msic_audio", SFI_DEV_TYPE_IPC, 1, &msic_audio_platform_data, &ipc_device_handler},
-	{"msic_power_btn", SFI_DEV_TYPE_IPC, 1, &msic_power_btn_platform_data, &ipc_device_handler},
-	{"msic_ocd", SFI_DEV_TYPE_IPC, 1, &msic_ocd_platform_data, &ipc_device_handler},
-	{"msic_thermal", SFI_DEV_TYPE_IPC, 1, &msic_thermal_platform_data, &ipc_device_handler},
-	{ 0 }
-};
-
-static void __init ipc_device_handler(struct sfi_device_table_entry *pentry,
-				struct devs_id *dev)
-{
-	struct platform_device *pdev;
-	void *pdata = NULL;
-	static struct resource res __initdata = {
-		.name = "IRQ",
-		.flags = IORESOURCE_IRQ,
-	};
-
-	pr_debug("IPC bus, name = %16.16s, irq = 0x%2x\n",
-		pentry->name, pentry->irq);
-
-	/*
-	 * We need to call platform init of IPC devices to fill misc_pdata
-	 * structure. It will be used in msic_init for initialization.
-	 */
-	if (dev != NULL)
-		pdata = dev->get_platform_data(pentry);
-
-	/*
-	 * On Medfield the platform device creation is handled by the MSIC
-	 * MFD driver so we don't need to do it here.
-	 */
-	if (intel_mid_has_msic())
-		return;
-
-	pdev = platform_device_alloc(pentry->name, 0);
-	if (pdev == NULL) {
-		pr_err("out of memory for SFI platform device '%s'.\n",
-			pentry->name);
-		return;
-	}
-	res.start = pentry->irq;
-	platform_device_add_resources(pdev, &res, 1);
-
-	pdev->dev.platform_data = pdata;
-	intel_scu_device_register(pdev);
-}
-
-
-/*
- * we will search these buttons in SFI GPIO table (by name)
- * and register them dynamically. Please add all possible
- * buttons here, we will shrink them if no GPIO found.
- */
-static struct gpio_keys_button gpio_button[] = {
-	{KEY_POWER,		-1, 1, "power_btn",	EV_KEY, 0, 3000},
-	{KEY_PROG1,		-1, 1, "prog_btn1",	EV_KEY, 0, 20},
-	{KEY_PROG2,		-1, 1, "prog_btn2",	EV_KEY, 0, 20},
-	{SW_LID,		-1, 1, "lid_switch",	EV_SW,  0, 20},
-	{KEY_VOLUMEUP,		-1, 1, "vol_up",	EV_KEY, 0, 20},
-	{KEY_VOLUMEDOWN,	-1, 1, "vol_down",	EV_KEY, 0, 20},
-	{KEY_CAMERA,		-1, 1, "camera_full",	EV_KEY, 0, 20},
-	{KEY_CAMERA_FOCUS,	-1, 1, "camera_half",	EV_KEY, 0, 20},
-	{SW_KEYPAD_SLIDE,	-1, 1, "MagSw1",	EV_SW,  0, 20},
-	{SW_KEYPAD_SLIDE,	-1, 1, "MagSw2",	EV_SW,  0, 20},
-};
-
-static struct gpio_keys_platform_data intel_mid_gpio_keys = {
-	.buttons	= gpio_button,
-	.rep		= 1,
-	.nbuttons	= -1, /* will fill it after search */
-};
-
-static struct platform_device pb_device = {
-	.name		= "gpio-keys",
-	.id		= -1,
-	.dev		= {
-		.platform_data	= &intel_mid_gpio_keys,
-	},
-};
-
-/*
- * Shrink the non-existent buttons, register the gpio button
- * device if there is some
- */
-static int __init pb_keys_init(void)
-{
-	struct gpio_keys_button *gb = gpio_button;
-	int i, num, good = 0;
-
-	num = sizeof(gpio_button) / sizeof(struct gpio_keys_button);
-	for (i = 0; i < num; i++) {
-		gb[i].gpio = get_gpio_by_name(gb[i].desc);
-		pr_debug("info[%2d]: name = %s, gpio = %d\n", i, gb[i].desc,
-					gb[i].gpio);
-		if (gb[i].gpio == -1)
-			continue;
-
-		if (i != good)
-			gb[good] = gb[i];
-		good++;
-	}
-
-	if (good) {
-		intel_mid_gpio_keys.nbuttons = good;
-		return platform_device_register(&pb_device);
-	}
-	return 0;
-}
-late_initcall(pb_keys_init);
\ No newline at end of file
diff --git a/arch/x86/platform/intel-mid/sfi.c b/arch/x86/platform/intel-mid/sfi.c
index 3f1c171e5ce3..c84c1ca396bf 100644
--- a/arch/x86/platform/intel-mid/sfi.c
+++ b/arch/x86/platform/intel-mid/sfi.c
@@ -42,7 +42,6 @@
 #include <asm/intel_scu_ipc.h>
 #include <asm/apb_timer.h>
 #include <asm/reboot.h>
-#include "intel_mid_weak_decls.h"
 
 #define	SFI_SIG_OEM0	"OEM0"
 #define MAX_IPCDEVS	24
@@ -403,19 +402,20 @@ static void __init sfi_handle_i2c_dev(struct sfi_device_table_entry *pentry,
 		i2c_register_board_info(pentry->host_num, &i2c_info, 1);
 }
 
+extern struct devs_id *const __x86_intel_mid_dev_start[],
+		      *const __x86_intel_mid_dev_end[];
+
 static struct devs_id __init *get_device_id(u8 type, char *name)
 {
-	struct devs_id *dev = device_ids;
-
-	if (device_ids == NULL)
-		return NULL;
+	struct devs_id *const *dev_table;
 
-	while (dev->name[0]) {
+	for (dev_table = __x86_intel_mid_dev_start;
+			dev_table < __x86_intel_mid_dev_end; dev_table++) {
+		struct devs_id *dev = *dev_table;
 		if (dev->type == type &&
 			!strncmp(dev->name, name, SFI_NAME_LEN)) {
 			return dev;
 		}
-		dev++;
 	}
 
 	return NULL;
diff --git a/include/linux/sfi.h b/include/linux/sfi.h
index fe817918b30e..d9b436f09925 100644
--- a/include/linux/sfi.h
+++ b/include/linux/sfi.h
@@ -59,6 +59,9 @@
 #ifndef _LINUX_SFI_H
 #define _LINUX_SFI_H
 
+#include <linux/init.h>
+#include <linux/types.h>
+
 /* Table signatures reserved by the SFI specification */
 #define SFI_SIG_SYST		"SYST"
 #define SFI_SIG_FREQ		"FREQ"
-- 
cgit v1.2.3


From a06ccd9c3785fa5550917ae036944f4e080b5749 Mon Sep 17 00:00:00 2001
From: Charles Keepax <ckeepax@opensource.wolfsonmicro.com>
Date: Tue, 15 Oct 2013 20:14:20 +0100
Subject: regulator: core: Add ability to create a lookup alias for supply

These patches add the ability to create an alternative device on which
a lookup for a certain supply should be conducted.

A common use-case for this would be devices that are logically
represented as a collection of drivers within Linux but are are
presented as a single device from device tree. It this case it is
necessary for each sub device to locate their supply data on the main
device.

Signed-off-by: Charles Keepax <ckeepax@opensource.wolfsonmicro.com>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 drivers/regulator/core.c           | 170 +++++++++++++++++++++++++++++++++++++
 drivers/regulator/devres.c         | 163 +++++++++++++++++++++++++++++++++++
 include/linux/regulator/consumer.h |  79 +++++++++++++++++
 3 files changed, 412 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index 906deb7354ed..16427de56ce8 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -53,6 +53,7 @@ static DEFINE_MUTEX(regulator_list_mutex);
 static LIST_HEAD(regulator_list);
 static LIST_HEAD(regulator_map_list);
 static LIST_HEAD(regulator_ena_gpio_list);
+static LIST_HEAD(regulator_supply_alias_list);
 static bool has_full_constraints;
 static bool board_wants_dummy_regulator;
 
@@ -83,6 +84,19 @@ struct regulator_enable_gpio {
 	unsigned int ena_gpio_invert:1;
 };
 
+/*
+ * struct regulator_supply_alias
+ *
+ * Used to map lookups for a supply onto an alternative device.
+ */
+struct regulator_supply_alias {
+	struct list_head list;
+	struct device *src_dev;
+	const char *src_supply;
+	struct device *alias_dev;
+	const char *alias_supply;
+};
+
 static int _regulator_is_enabled(struct regulator_dev *rdev);
 static int _regulator_disable(struct regulator_dev *rdev);
 static int _regulator_get_voltage(struct regulator_dev *rdev);
@@ -1173,6 +1187,32 @@ static int _regulator_get_enable_time(struct regulator_dev *rdev)
 	return rdev->desc->ops->enable_time(rdev);
 }
 
+static struct regulator_supply_alias *regulator_find_supply_alias(
+		struct device *dev, const char *supply)
+{
+	struct regulator_supply_alias *map;
+
+	list_for_each_entry(map, &regulator_supply_alias_list, list)
+		if (map->src_dev == dev && strcmp(map->src_supply, supply) == 0)
+			return map;
+
+	return NULL;
+}
+
+static void regulator_supply_alias(struct device **dev, const char **supply)
+{
+	struct regulator_supply_alias *map;
+
+	map = regulator_find_supply_alias(*dev, *supply);
+	if (map) {
+		dev_dbg(*dev, "Mapping supply %s to %s,%s\n",
+				*supply, map->alias_supply,
+				dev_name(map->alias_dev));
+		*dev = map->alias_dev;
+		*supply = map->alias_supply;
+	}
+}
+
 static struct regulator_dev *regulator_dev_lookup(struct device *dev,
 						  const char *supply,
 						  int *ret)
@@ -1182,6 +1222,8 @@ static struct regulator_dev *regulator_dev_lookup(struct device *dev,
 	struct regulator_map *map;
 	const char *devname = NULL;
 
+	regulator_supply_alias(&dev, &supply);
+
 	/* first do a dt based lookup */
 	if (dev && dev->of_node) {
 		node = of_get_regulator(dev, supply);
@@ -1432,6 +1474,134 @@ void regulator_put(struct regulator *regulator)
 }
 EXPORT_SYMBOL_GPL(regulator_put);
 
+/**
+ * regulator_register_supply_alias - Provide device alias for supply lookup
+ *
+ * @dev: device that will be given as the regulator "consumer"
+ * @id: Supply name or regulator ID
+ * @alias_dev: device that should be used to lookup the supply
+ * @alias_id: Supply name or regulator ID that should be used to lookup the
+ * supply
+ *
+ * All lookups for id on dev will instead be conducted for alias_id on
+ * alias_dev.
+ */
+int regulator_register_supply_alias(struct device *dev, const char *id,
+				    struct device *alias_dev,
+				    const char *alias_id)
+{
+	struct regulator_supply_alias *map;
+
+	map = regulator_find_supply_alias(dev, id);
+	if (map)
+		return -EEXIST;
+
+	map = kzalloc(sizeof(struct regulator_supply_alias), GFP_KERNEL);
+	if (!map)
+		return -ENOMEM;
+
+	map->src_dev = dev;
+	map->src_supply = id;
+	map->alias_dev = alias_dev;
+	map->alias_supply = alias_id;
+
+	list_add(&map->list, &regulator_supply_alias_list);
+
+	pr_info("Adding alias for supply %s,%s -> %s,%s\n",
+		id, dev_name(dev), alias_id, dev_name(alias_dev));
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(regulator_register_supply_alias);
+
+/**
+ * regulator_unregister_supply_alias - Remove device alias
+ *
+ * @dev: device that will be given as the regulator "consumer"
+ * @id: Supply name or regulator ID
+ *
+ * Remove a lookup alias if one exists for id on dev.
+ */
+void regulator_unregister_supply_alias(struct device *dev, const char *id)
+{
+	struct regulator_supply_alias *map;
+
+	map = regulator_find_supply_alias(dev, id);
+	if (map) {
+		list_del(&map->list);
+		kfree(map);
+	}
+}
+EXPORT_SYMBOL_GPL(regulator_unregister_supply_alias);
+
+/**
+ * regulator_bulk_register_supply_alias - register multiple aliases
+ *
+ * @dev: device that will be given as the regulator "consumer"
+ * @id: List of supply names or regulator IDs
+ * @alias_dev: device that should be used to lookup the supply
+ * @alias_id: List of supply names or regulator IDs that should be used to
+ * lookup the supply
+ * @num_id: Number of aliases to register
+ *
+ * @return 0 on success, an errno on failure.
+ *
+ * This helper function allows drivers to register several supply
+ * aliases in one operation.  If any of the aliases cannot be
+ * registered any aliases that were registered will be removed
+ * before returning to the caller.
+ */
+int regulator_bulk_register_supply_alias(struct device *dev, const char **id,
+					 struct device *alias_dev,
+					 const char **alias_id,
+					 int num_id)
+{
+	int i;
+	int ret;
+
+	for (i = 0; i < num_id; ++i) {
+		ret = regulator_register_supply_alias(dev, id[i], alias_dev,
+						      alias_id[i]);
+		if (ret < 0)
+			goto err;
+	}
+
+	return 0;
+
+err:
+	dev_err(dev,
+		"Failed to create supply alias %s,%s -> %s,%s\n",
+		id[i], dev_name(dev), alias_id[i], dev_name(alias_dev));
+
+	while (--i >= 0)
+		regulator_unregister_supply_alias(dev, id[i]);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(regulator_bulk_register_supply_alias);
+
+/**
+ * regulator_bulk_unregister_supply_alias - unregister multiple aliases
+ *
+ * @dev: device that will be given as the regulator "consumer"
+ * @id: List of supply names or regulator IDs
+ * @num_id: Number of aliases to unregister
+ *
+ * This helper function allows drivers to unregister several supply
+ * aliases in one operation.
+ */
+void regulator_bulk_unregister_supply_alias(struct device *dev,
+					    const char **id,
+					    int num_id)
+{
+	int i;
+
+	for (i = 0; i < num_id; ++i)
+		regulator_unregister_supply_alias(dev, id[i]);
+}
+EXPORT_SYMBOL_GPL(regulator_bulk_unregister_supply_alias);
+
+
 /* Manage enable GPIO list. Same GPIO pin can be shared among regulators */
 static int regulator_ena_gpio_request(struct regulator_dev *rdev,
 				const struct regulator_config *config)
diff --git a/drivers/regulator/devres.c b/drivers/regulator/devres.c
index 2672a029fa25..f44818b838dc 100644
--- a/drivers/regulator/devres.c
+++ b/drivers/regulator/devres.c
@@ -250,3 +250,166 @@ void devm_regulator_unregister(struct device *dev, struct regulator_dev *rdev)
 		WARN_ON(rc);
 }
 EXPORT_SYMBOL_GPL(devm_regulator_unregister);
+
+struct regulator_supply_alias_match {
+	struct device *dev;
+	const char *id;
+};
+
+static int devm_regulator_match_supply_alias(struct device *dev, void *res,
+					     void *data)
+{
+	struct regulator_supply_alias_match *match = res;
+	struct regulator_supply_alias_match *target = data;
+
+	return match->dev == target->dev && strcmp(match->id, target->id) == 0;
+}
+
+static void devm_regulator_destroy_supply_alias(struct device *dev, void *res)
+{
+	struct regulator_supply_alias_match *match = res;
+
+	regulator_unregister_supply_alias(match->dev, match->id);
+}
+
+/**
+ * devm_regulator_register_supply_alias - Resource managed
+ * regulator_register_supply_alias()
+ *
+ * @dev: device that will be given as the regulator "consumer"
+ * @id: Supply name or regulator ID
+ * @alias_dev: device that should be used to lookup the supply
+ * @alias_id: Supply name or regulator ID that should be used to lookup the
+ * supply
+ *
+ * The supply alias will automatically be unregistered when the source
+ * device is unbound.
+ */
+int devm_regulator_register_supply_alias(struct device *dev, const char *id,
+					 struct device *alias_dev,
+					 const char *alias_id)
+{
+	struct regulator_supply_alias_match *match;
+	int ret;
+
+	match = devres_alloc(devm_regulator_destroy_supply_alias,
+			   sizeof(struct regulator_supply_alias_match),
+			   GFP_KERNEL);
+	if (!match)
+		return -ENOMEM;
+
+	match->dev = dev;
+	match->id = id;
+
+	ret = regulator_register_supply_alias(dev, id, alias_dev, alias_id);
+	if (ret < 0) {
+		devres_free(match);
+		return ret;
+	}
+
+	devres_add(dev, match);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(devm_regulator_register_supply_alias);
+
+/**
+ * devm_regulator_unregister_supply_alias - Resource managed
+ * regulator_unregister_supply_alias()
+ *
+ * @dev: device that will be given as the regulator "consumer"
+ * @id: Supply name or regulator ID
+ *
+ * Unregister an alias registered with
+ * devm_regulator_register_supply_alias(). Normally this function
+ * will not need to be called and the resource management code
+ * will ensure that the resource is freed.
+ */
+void devm_regulator_unregister_supply_alias(struct device *dev, const char *id)
+{
+	struct regulator_supply_alias_match match;
+	int rc;
+
+	match.dev = dev;
+	match.id = id;
+
+	rc = devres_release(dev, devm_regulator_destroy_supply_alias,
+			    devm_regulator_match_supply_alias, &match);
+	if (rc != 0)
+		WARN_ON(rc);
+}
+EXPORT_SYMBOL_GPL(devm_regulator_unregister_supply_alias);
+
+/**
+ * devm_regulator_bulk_register_supply_alias - Managed register
+ * multiple aliases
+ *
+ * @dev: device that will be given as the regulator "consumer"
+ * @id: List of supply names or regulator IDs
+ * @alias_dev: device that should be used to lookup the supply
+ * @alias_id: List of supply names or regulator IDs that should be used to
+ * lookup the supply
+ * @num_id: Number of aliases to register
+ *
+ * @return 0 on success, an errno on failure.
+ *
+ * This helper function allows drivers to register several supply
+ * aliases in one operation, the aliases will be automatically
+ * unregisters when the source device is unbound.  If any of the
+ * aliases cannot be registered any aliases that were registered
+ * will be removed before returning to the caller.
+ */
+int devm_regulator_bulk_register_supply_alias(struct device *dev,
+					      const char **id,
+					      struct device *alias_dev,
+					      const char **alias_id,
+					      int num_id)
+{
+	int i;
+	int ret;
+
+	for (i = 0; i < num_id; ++i) {
+		ret = devm_regulator_register_supply_alias(dev, id[i],
+							   alias_dev,
+							   alias_id[i]);
+		if (ret < 0)
+			goto err;
+	}
+
+	return 0;
+
+err:
+	dev_err(dev,
+		"Failed to create supply alias %s,%s -> %s,%s\n",
+		id[i], dev_name(dev), alias_id[i], dev_name(alias_dev));
+
+	while (--i >= 0)
+		devm_regulator_unregister_supply_alias(dev, id[i]);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(devm_regulator_bulk_register_supply_alias);
+
+/**
+ * devm_regulator_bulk_unregister_supply_alias - Managed unregister
+ * multiple aliases
+ *
+ * @dev: device that will be given as the regulator "consumer"
+ * @id: List of supply names or regulator IDs
+ * @num_id: Number of aliases to unregister
+ *
+ * Unregister aliases registered with
+ * devm_regulator_bulk_register_supply_alias(). Normally this function
+ * will not need to be called and the resource management code
+ * will ensure that the resource is freed.
+ */
+void devm_regulator_bulk_unregister_supply_alias(struct device *dev,
+						 const char **id,
+						 int num_id)
+{
+	int i;
+
+	for (i = 0; i < num_id; ++i)
+		devm_regulator_unregister_supply_alias(dev, id[i]);
+}
+EXPORT_SYMBOL_GPL(devm_regulator_bulk_unregister_supply_alias);
diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h
index 27be915caa96..e530681bea70 100644
--- a/include/linux/regulator/consumer.h
+++ b/include/linux/regulator/consumer.h
@@ -146,6 +146,32 @@ struct regulator *__must_check devm_regulator_get_optional(struct device *dev,
 void regulator_put(struct regulator *regulator);
 void devm_regulator_put(struct regulator *regulator);
 
+int regulator_register_supply_alias(struct device *dev, const char *id,
+				    struct device *alias_dev,
+				    const char *alias_id);
+void regulator_unregister_supply_alias(struct device *dev, const char *id);
+
+int regulator_bulk_register_supply_alias(struct device *dev, const char **id,
+					 struct device *alias_dev,
+					 const char **alias_id, int num_id);
+void regulator_bulk_unregister_supply_alias(struct device *dev,
+					    const char **id, int num_id);
+
+int devm_regulator_register_supply_alias(struct device *dev, const char *id,
+					 struct device *alias_dev,
+					 const char *alias_id);
+void devm_regulator_unregister_supply_alias(struct device *dev,
+					    const char *id);
+
+int devm_regulator_bulk_register_supply_alias(struct device *dev,
+					      const char **id,
+					      struct device *alias_dev,
+					      const char **alias_id,
+					      int num_id);
+void devm_regulator_bulk_unregister_supply_alias(struct device *dev,
+						 const char **id,
+						 int num_id);
+
 /* regulator output control and status */
 int __must_check regulator_enable(struct regulator *regulator);
 int regulator_disable(struct regulator *regulator);
@@ -250,6 +276,59 @@ static inline void devm_regulator_put(struct regulator *regulator)
 {
 }
 
+static inline int regulator_register_supply_alias(struct device *dev,
+						  const char *id,
+						  struct device *alias_dev,
+						  const char *alias_id)
+{
+	return 0;
+}
+
+static inline void regulator_unregister_supply_alias(struct device *dev,
+						    const char *id)
+{
+}
+
+static inline int regulator_bulk_register_supply_alias(struct device *dev,
+						       const char **id,
+						       struct device *alias_dev,
+						       const char **alias_id,
+						       int num_id)
+{
+	return 0;
+}
+
+static inline void regulator_bulk_unregister_supply_alias(struct device *dev,
+							  const char **id,
+							  int num_id)
+{
+}
+
+static inline int devm_regulator_register_supply_alias(struct device *dev,
+						       const char *id,
+						       struct device *alias_dev,
+						       const char *alias_id)
+{
+	return 0;
+}
+
+static inline void devm_regulator_unregister_supply_alias(struct device *dev,
+							  const char *id)
+{
+}
+
+static inline int devm_regulator_bulk_register_supply_alias(
+		struct device *dev, const char **id, struct device *alias_dev,
+		const char **alias_id, int num_id)
+{
+	return 0;
+}
+
+static inline void devm_regulator_bulk_unregister_supply_alias(
+		struct device *dev, const char **id, int num_id)
+{
+}
+
 static inline int regulator_enable(struct regulator *regulator)
 {
 	return 0;
-- 
cgit v1.2.3


From 400dfd3ae899849b27d398ca7894e1b44430887f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 17 Oct 2013 16:27:07 -0700
Subject: net: refactor sk_page_frag_refill()

While working on virtio_net new allocation strategy to increase
payload/truesize ratio, we found that refactoring sk_page_frag_refill()
was needed.

This patch splits sk_page_frag_refill() into two parts, adding
skb_page_frag_refill() which can be used without a socket.

While we are at it, add a minimum frag size of 32 for
sk_page_frag_refill()

Michael will either use netdev_alloc_frag() from softirq context,
or skb_page_frag_refill() from process context in refill_work()
 (GFP_KERNEL allocations)

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Michael Dalton <mwdalton@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h |  2 ++
 net/core/sock.c        | 27 +++++++++++++++++++++++----
 2 files changed, 25 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 1cd32f96055e..ba74474836c0 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2062,6 +2062,8 @@ static inline void skb_frag_set_page(struct sk_buff *skb, int f,
 	__skb_frag_set_page(&skb_shinfo(skb)->frags[f], page);
 }
 
+bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t prio);
+
 /**
  * skb_frag_dma_map - maps a paged fragment via the DMA API
  * @dev: the device to map the fragment to
diff --git a/net/core/sock.c b/net/core/sock.c
index fd6afa267475..440afdca1e8f 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1847,7 +1847,17 @@ EXPORT_SYMBOL(sock_alloc_send_skb);
 /* On 32bit arches, an skb frag is limited to 2^15 */
 #define SKB_FRAG_PAGE_ORDER	get_order(32768)
 
-bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag)
+/**
+ * skb_page_frag_refill - check that a page_frag contains enough room
+ * @sz: minimum size of the fragment we want to get
+ * @pfrag: pointer to page_frag
+ * @prio: priority for memory allocation
+ *
+ * Note: While this allocator tries to use high order pages, there is
+ * no guarantee that allocations succeed. Therefore, @sz MUST be
+ * less or equal than PAGE_SIZE.
+ */
+bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t prio)
 {
 	int order;
 
@@ -1856,16 +1866,16 @@ bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag)
 			pfrag->offset = 0;
 			return true;
 		}
-		if (pfrag->offset < pfrag->size)
+		if (pfrag->offset + sz <= pfrag->size)
 			return true;
 		put_page(pfrag->page);
 	}
 
 	/* We restrict high order allocations to users that can afford to wait */
-	order = (sk->sk_allocation & __GFP_WAIT) ? SKB_FRAG_PAGE_ORDER : 0;
+	order = (prio & __GFP_WAIT) ? SKB_FRAG_PAGE_ORDER : 0;
 
 	do {
-		gfp_t gfp = sk->sk_allocation;
+		gfp_t gfp = prio;
 
 		if (order)
 			gfp |= __GFP_COMP | __GFP_NOWARN;
@@ -1877,6 +1887,15 @@ bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag)
 		}
 	} while (--order >= 0);
 
+	return false;
+}
+EXPORT_SYMBOL(skb_page_frag_refill);
+
+bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag)
+{
+	if (likely(skb_page_frag_refill(32U, pfrag, sk->sk_allocation)))
+		return true;
+
 	sk_enter_memory_pressure(sk);
 	sk_stream_moderate_sndbuf(sk);
 	return false;
-- 
cgit v1.2.3


From 7cc7c5e54b7128195a1403747a63971c3c3f8e25 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Mon, 14 Oct 2013 21:49:21 +0100
Subject: net: Delete trailing semi-colon from definition of netdev_WARN()

Macro definitions should not normally end with a semi-colon, as this
makes it dangerous to use them an if...else statement.  Happily this
has not happened yet.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 2e53b44454ad..27f62f746621 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3049,7 +3049,7 @@ do {								\
  * file/line information and a backtrace.
  */
 #define netdev_WARN(dev, format, args...)			\
-	WARN(1, "netdevice: %s\n" format, netdev_name(dev), ##args);
+	WARN(1, "netdevice: %s\n" format, netdev_name(dev), ##args)
 
 /* netif printk helpers, similar to netdev_printk */
 
-- 
cgit v1.2.3


From c75b505ddaa08098940441226bb3a87c5b3179c0 Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Tue, 8 Oct 2013 10:56:11 +0200
Subject: cpufreq: Add dummy cpufreq_cpu_get/put for CONFIG_CPU_FREQ=n

The drm/i915 driver wants to adjust it's own power policies using the
cpu policies as a guideline (we can implicitly boost the cpus through
the gpus on some platforms). To avoid a dreaded select (since a
depends will leave users wondering where where their driver has gone
too) add dummy functions.

Reported-by: kbuild test robot <fengguang.wu@intel.com>
Cc: kbuild test robot <fengguang.wu@intel.com>
Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
Cc: Viresh Kumar <viresh.kumar@linaro.org>
Cc: cpufreq@vger.kernel.org
Cc: linux-pm@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Acked-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 include/linux/cpufreq.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index fcabc42d66ab..5ad9a4e2bc59 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -93,8 +93,16 @@ struct cpufreq_policy {
 #define CPUFREQ_SHARED_TYPE_ALL	 (2) /* All dependent CPUs should set freq */
 #define CPUFREQ_SHARED_TYPE_ANY	 (3) /* Freq can be set from any dependent CPU*/
 
+#ifdef CONFIG_CPU_FREQ
 struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu);
 void cpufreq_cpu_put(struct cpufreq_policy *policy);
+#else
+static inline struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu)
+{
+	return NULL;
+}
+static inline void cpufreq_cpu_put(struct cpufreq_policy *policy) { }
+#endif
 
 static inline bool policy_is_shared(struct cpufreq_policy *policy)
 {
-- 
cgit v1.2.3


From bd6a9ddcb9f2cb19dae0509341a4cbaf40dc66f0 Mon Sep 17 00:00:00 2001
From: Thierry Reding <thierry.reding@gmail.com>
Date: Wed, 16 Oct 2013 19:19:02 +0200
Subject: ARM: tegra: Add Tegra114 powergate support

Extend the list of power gates found on Tegra114. Note that there are
now holes in the list, so perhaps a simple array is no longer the best
data structure to represent it.

Signed-off-by: Thierry Reding <treding@nvidia.com>
Signed-off-by: Stephen Warren <swarren@nvidia.com>
---
 arch/arm/mach-tegra/powergate.c | 43 ++++++++++++++++++++++++++++++++++++++++-
 include/linux/tegra-powergate.h |  9 ++++++++-
 2 files changed, 50 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-tegra/powergate.c b/arch/arm/mach-tegra/powergate.c
index bd10822f8b66..85d28e756bb7 100644
--- a/arch/arm/mach-tegra/powergate.c
+++ b/arch/arm/mach-tegra/powergate.c
@@ -45,6 +45,13 @@ static int tegra_num_cpu_domains;
 static const u8 *tegra_cpu_domains;
 
 static const u8 tegra30_cpu_domains[] = {
+	TEGRA_POWERGATE_CPU,
+	TEGRA_POWERGATE_CPU1,
+	TEGRA_POWERGATE_CPU2,
+	TEGRA_POWERGATE_CPU3,
+};
+
+static const u8 tegra114_cpu_domains[] = {
 	TEGRA_POWERGATE_CPU0,
 	TEGRA_POWERGATE_CPU1,
 	TEGRA_POWERGATE_CPU2,
@@ -190,6 +197,11 @@ int __init tegra_powergate_init(void)
 		tegra_num_cpu_domains = 4;
 		tegra_cpu_domains = tegra30_cpu_domains;
 		break;
+	case TEGRA114:
+		tegra_num_powerdomains = 23;
+		tegra_num_cpu_domains = 4;
+		tegra_cpu_domains = tegra114_cpu_domains;
+		break;
 	default:
 		/* Unknown Tegra variant. Disable powergating */
 		tegra_num_powerdomains = 0;
@@ -230,6 +242,27 @@ static const char * const powergate_name_t30[] = {
 	[TEGRA_POWERGATE_3D1]	= "3d1",
 };
 
+static const char * const powergate_name_t114[] = {
+	[TEGRA_POWERGATE_CPU]	= "cpu0",
+	[TEGRA_POWERGATE_3D]	= "3d",
+	[TEGRA_POWERGATE_VENC]	= "venc",
+	[TEGRA_POWERGATE_VDEC]	= "vdec",
+	[TEGRA_POWERGATE_MPE]	= "mpe",
+	[TEGRA_POWERGATE_HEG]	= "heg",
+	[TEGRA_POWERGATE_CPU1]	= "cpu1",
+	[TEGRA_POWERGATE_CPU2]	= "cpu2",
+	[TEGRA_POWERGATE_CPU3]	= "cpu3",
+	[TEGRA_POWERGATE_CELP]	= "celp",
+	[TEGRA_POWERGATE_CPU0]	= "cpu0",
+	[TEGRA_POWERGATE_C0NC]	= "c0nc",
+	[TEGRA_POWERGATE_C1NC]	= "c1nc",
+	[TEGRA_POWERGATE_DIS]	= "dis",
+	[TEGRA_POWERGATE_DISB]	= "disb",
+	[TEGRA_POWERGATE_XUSBA]	= "xusba",
+	[TEGRA_POWERGATE_XUSBB]	= "xusbb",
+	[TEGRA_POWERGATE_XUSBC]	= "xusbc",
+};
+
 static int powergate_show(struct seq_file *s, void *data)
 {
 	int i;
@@ -237,9 +270,14 @@ static int powergate_show(struct seq_file *s, void *data)
 	seq_printf(s, " powergate powered\n");
 	seq_printf(s, "------------------\n");
 
-	for (i = 0; i < tegra_num_powerdomains; i++)
+	for (i = 0; i < tegra_num_powerdomains; i++) {
+		if (!powergate_name[i])
+			continue;
+
 		seq_printf(s, " %9s %7s\n", powergate_name[i],
 			tegra_powergate_is_powered(i) ? "yes" : "no");
+	}
+
 	return 0;
 }
 
@@ -266,6 +304,9 @@ int __init tegra_powergate_debugfs_init(void)
 	case TEGRA30:
 		powergate_name = powergate_name_t30;
 		break;
+	case TEGRA114:
+		powergate_name = powergate_name_t114;
+		break;
 	}
 
 	if (powergate_name) {
diff --git a/include/linux/tegra-powergate.h b/include/linux/tegra-powergate.h
index 55c29a8d5015..c98cfa406952 100644
--- a/include/linux/tegra-powergate.h
+++ b/include/linux/tegra-powergate.h
@@ -34,8 +34,15 @@ struct clk;
 #define TEGRA_POWERGATE_CPU3	11
 #define TEGRA_POWERGATE_CELP	12
 #define TEGRA_POWERGATE_3D1	13
+#define TEGRA_POWERGATE_CPU0	14
+#define TEGRA_POWERGATE_C0NC	15
+#define TEGRA_POWERGATE_C1NC	16
+#define TEGRA_POWERGATE_DIS	18
+#define TEGRA_POWERGATE_DISB	19
+#define TEGRA_POWERGATE_XUSBA	20
+#define TEGRA_POWERGATE_XUSBB	21
+#define TEGRA_POWERGATE_XUSBC	22
 
-#define TEGRA_POWERGATE_CPU0	TEGRA_POWERGATE_CPU
 #define TEGRA_POWERGATE_3D0	TEGRA_POWERGATE_3D
 
 int tegra_powergate_is_powered(int id);
-- 
cgit v1.2.3


From 29ad23b00474c34e3b5040dda508c78d33a1a3eb Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung.kim@lge.com>
Date: Mon, 14 Oct 2013 17:24:26 +0900
Subject: ftrace: Add set_graph_notrace filter

The set_graph_notrace filter is analogous to set_ftrace_notrace and
can be used for eliminating uninteresting part of function graph trace
output.  It also works with set_graph_function nicely.

  # cd /sys/kernel/debug/tracing/
  # echo do_page_fault > set_graph_function
  # perf ftrace live true
   2)               |  do_page_fault() {
   2)               |    __do_page_fault() {
   2)   0.381 us    |      down_read_trylock();
   2)   0.055 us    |      __might_sleep();
   2)   0.696 us    |      find_vma();
   2)               |      handle_mm_fault() {
   2)               |        handle_pte_fault() {
   2)               |          __do_fault() {
   2)               |            filemap_fault() {
   2)               |              find_get_page() {
   2)   0.033 us    |                __rcu_read_lock();
   2)   0.035 us    |                __rcu_read_unlock();
   2)   1.696 us    |              }
   2)   0.031 us    |              __might_sleep();
   2)   2.831 us    |            }
   2)               |            _raw_spin_lock() {
   2)   0.046 us    |              add_preempt_count();
   2)   0.841 us    |            }
   2)   0.033 us    |            page_add_file_rmap();
   2)               |            _raw_spin_unlock() {
   2)   0.057 us    |              sub_preempt_count();
   2)   0.568 us    |            }
   2)               |            unlock_page() {
   2)   0.084 us    |              page_waitqueue();
   2)   0.126 us    |              __wake_up_bit();
   2)   1.117 us    |            }
   2)   7.729 us    |          }
   2)   8.397 us    |        }
   2)   8.956 us    |      }
   2)   0.085 us    |      up_read();
   2) + 12.745 us   |    }
   2) + 13.401 us   |  }
  ...

  # echo handle_mm_fault > set_graph_notrace
  # perf ftrace live true
   1)               |  do_page_fault() {
   1)               |    __do_page_fault() {
   1)   0.205 us    |      down_read_trylock();
   1)   0.041 us    |      __might_sleep();
   1)   0.344 us    |      find_vma();
   1)   0.069 us    |      up_read();
   1)   4.692 us    |    }
   1)   5.311 us    |  }
  ...

Link: http://lkml.kernel.org/r/1381739066-7531-5-git-send-email-namhyung@kernel.org

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace.h               |  1 +
 kernel/trace/ftrace.c                | 33 +++++++++++++++++++++
 kernel/trace/trace.h                 | 22 ++++++++++++++
 kernel/trace/trace_functions_graph.c | 56 ++++++++++++++++++++++++++++++++++--
 4 files changed, 109 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 9f15c0064c50..ec85d48619e1 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -721,6 +721,7 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth,
 extern char __irqentry_text_start[];
 extern char __irqentry_text_end[];
 
+#define FTRACE_NOTRACE_DEPTH 65536
 #define FTRACE_RETFUNC_DEPTH 50
 #define FTRACE_RETSTACK_ALLOC_SIZE 32
 extern int register_ftrace_graph(trace_func_graph_ret_t retfunc,
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 26a229ab0c19..44e826a79665 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -3776,7 +3776,9 @@ static const struct file_operations ftrace_notrace_fops = {
 static DEFINE_MUTEX(graph_lock);
 
 int ftrace_graph_count;
+int ftrace_graph_notrace_count;
 unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS] __read_mostly;
+unsigned long ftrace_graph_notrace_funcs[FTRACE_GRAPH_MAX_FUNCS] __read_mostly;
 
 struct ftrace_graph_data {
 	unsigned long *table;
@@ -3890,6 +3892,26 @@ ftrace_graph_open(struct inode *inode, struct file *file)
 	return __ftrace_graph_open(inode, file, fgd);
 }
 
+static int
+ftrace_graph_notrace_open(struct inode *inode, struct file *file)
+{
+	struct ftrace_graph_data *fgd;
+
+	if (unlikely(ftrace_disabled))
+		return -ENODEV;
+
+	fgd = kmalloc(sizeof(*fgd), GFP_KERNEL);
+	if (fgd == NULL)
+		return -ENOMEM;
+
+	fgd->table = ftrace_graph_notrace_funcs;
+	fgd->size = FTRACE_GRAPH_MAX_FUNCS;
+	fgd->count = &ftrace_graph_notrace_count;
+	fgd->seq_ops = &ftrace_graph_seq_ops;
+
+	return __ftrace_graph_open(inode, file, fgd);
+}
+
 static int
 ftrace_graph_release(struct inode *inode, struct file *file)
 {
@@ -4011,6 +4033,14 @@ static const struct file_operations ftrace_graph_fops = {
 	.llseek		= ftrace_filter_lseek,
 	.release	= ftrace_graph_release,
 };
+
+static const struct file_operations ftrace_graph_notrace_fops = {
+	.open		= ftrace_graph_notrace_open,
+	.read		= seq_read,
+	.write		= ftrace_graph_write,
+	.llseek		= ftrace_filter_lseek,
+	.release	= ftrace_graph_release,
+};
 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
 
 static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer)
@@ -4032,6 +4062,9 @@ static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer)
 	trace_create_file("set_graph_function", 0444, d_tracer,
 				    NULL,
 				    &ftrace_graph_fops);
+	trace_create_file("set_graph_notrace", 0444, d_tracer,
+				    NULL,
+				    &ftrace_graph_notrace_fops);
 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
 
 	return 0;
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 40211cef2796..d1cf5159bec0 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -732,6 +732,8 @@ extern void __trace_graph_return(struct trace_array *tr,
 #define FTRACE_GRAPH_MAX_FUNCS		32
 extern int ftrace_graph_count;
 extern unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS];
+extern int ftrace_graph_notrace_count;
+extern unsigned long ftrace_graph_notrace_funcs[FTRACE_GRAPH_MAX_FUNCS];
 
 static inline int ftrace_graph_addr(unsigned long addr)
 {
@@ -757,11 +759,31 @@ static inline int ftrace_graph_addr(unsigned long addr)
 
 	return 0;
 }
+
+static inline int ftrace_graph_notrace_addr(unsigned long addr)
+{
+	int i;
+
+	if (!ftrace_graph_notrace_count)
+		return 0;
+
+	for (i = 0; i < ftrace_graph_notrace_count; i++) {
+		if (addr == ftrace_graph_notrace_funcs[i])
+			return 1;
+	}
+
+	return 0;
+}
 #else
 static inline int ftrace_graph_addr(unsigned long addr)
 {
 	return 1;
 }
+
+static inline int ftrace_graph_notrace_addr(unsigned long addr)
+{
+	return 0;
+}
 #endif /* CONFIG_DYNAMIC_FTRACE */
 #else /* CONFIG_FUNCTION_GRAPH_TRACER */
 static inline enum print_line_t
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index b5c09242683d..e08c030b8f38 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -114,16 +114,37 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth,
 		return -EBUSY;
 	}
 
+	/*
+	 * The curr_ret_stack is an index to ftrace return stack of
+	 * current task.  Its value should be in [0, FTRACE_RETFUNC_
+	 * DEPTH) when the function graph tracer is used.  To support
+	 * filtering out specific functions, it makes the index
+	 * negative by subtracting huge value (FTRACE_NOTRACE_DEPTH)
+	 * so when it sees a negative index the ftrace will ignore
+	 * the record.  And the index gets recovered when returning
+	 * from the filtered function by adding the FTRACE_NOTRACE_
+	 * DEPTH and then it'll continue to record functions normally.
+	 *
+	 * The curr_ret_stack is initialized to -1 and get increased
+	 * in this function.  So it can be less than -1 only if it was
+	 * filtered out via ftrace_graph_notrace_addr() which can be
+	 * set from set_graph_notrace file in debugfs by user.
+	 */
+	if (current->curr_ret_stack < -1)
+		return -EBUSY;
+
 	calltime = trace_clock_local();
 
 	index = ++current->curr_ret_stack;
+	if (ftrace_graph_notrace_addr(func))
+		current->curr_ret_stack -= FTRACE_NOTRACE_DEPTH;
 	barrier();
 	current->ret_stack[index].ret = ret;
 	current->ret_stack[index].func = func;
 	current->ret_stack[index].calltime = calltime;
 	current->ret_stack[index].subtime = 0;
 	current->ret_stack[index].fp = frame_pointer;
-	*depth = index;
+	*depth = current->curr_ret_stack;
 
 	return 0;
 }
@@ -137,7 +158,17 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret,
 
 	index = current->curr_ret_stack;
 
-	if (unlikely(index < 0)) {
+	/*
+	 * A negative index here means that it's just returned from a
+	 * notrace'd function.  Recover index to get an original
+	 * return address.  See ftrace_push_return_trace().
+	 *
+	 * TODO: Need to check whether the stack gets corrupted.
+	 */
+	if (index < 0)
+		index += FTRACE_NOTRACE_DEPTH;
+
+	if (unlikely(index < 0 || index >= FTRACE_RETFUNC_DEPTH)) {
 		ftrace_graph_stop();
 		WARN_ON(1);
 		/* Might as well panic, otherwise we have no where to go */
@@ -193,6 +224,15 @@ unsigned long ftrace_return_to_handler(unsigned long frame_pointer)
 	trace.rettime = trace_clock_local();
 	barrier();
 	current->curr_ret_stack--;
+	/*
+	 * The curr_ret_stack can be less than -1 only if it was
+	 * filtered out and it's about to return from the function.
+	 * Recover the index and continue to trace normal functions.
+	 */
+	if (current->curr_ret_stack < -1) {
+		current->curr_ret_stack += FTRACE_NOTRACE_DEPTH;
+		return ret;
+	}
 
 	/*
 	 * The trace should run after decrementing the ret counter
@@ -259,10 +299,20 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
 
 	/* trace it when it is-nested-in or is a function enabled. */
 	if ((!(trace->depth || ftrace_graph_addr(trace->func)) ||
-	     ftrace_graph_ignore_irqs()) ||
+	     ftrace_graph_ignore_irqs()) || (trace->depth < 0) ||
 	    (max_depth && trace->depth >= max_depth))
 		return 0;
 
+	/*
+	 * Do not trace a function if it's filtered by set_graph_notrace.
+	 * Make the index of ret stack negative to indicate that it should
+	 * ignore further functions.  But it needs its own ret stack entry
+	 * to recover the original index in order to continue tracing after
+	 * returning from the function.
+	 */
+	if (ftrace_graph_notrace_addr(trace->func))
+		return 1;
+
 	local_irq_save(flags);
 	cpu = raw_smp_processor_id();
 	data = per_cpu_ptr(tr->trace_buffer.data, cpu);
-- 
cgit v1.2.3


From 1653d2f88f43f6780eace1faeeb7b7adde8c10fb Mon Sep 17 00:00:00 2001
From: Thomas Pugliese <thomas.pugliese@gmail.com>
Date: Mon, 7 Oct 2013 10:07:51 -0500
Subject: usb: wusbcore: preserve endianness of cached descriptors

Do not overwrite the multi-byte fields of usb_wa_descriptor with their
cpu format values after reading the descriptor.  Leave the values as
__le16 and swap on use.  This is more consistent with other uses of USB
descriptors.

Signed-off-by: Thomas Pugliese <thomas.pugliese@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/hwa-hc.c       | 10 +++-------
 drivers/usb/wusbcore/wa-rpipe.c |  2 +-
 include/linux/usb/wusb-wa.h     |  6 +++---
 3 files changed, 7 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/host/hwa-hc.c b/drivers/usb/host/hwa-hc.c
index e58b92491eb1..e5fb3cfd57a9 100644
--- a/drivers/usb/host/hwa-hc.c
+++ b/drivers/usb/host/hwa-hc.c
@@ -570,14 +570,10 @@ found:
 		goto error;
 	}
 	wa->wa_descr = wa_descr = (struct usb_wa_descriptor *) hdr;
-	/* Make LE fields CPU order */
-	wa_descr->bcdWAVersion = le16_to_cpu(wa_descr->bcdWAVersion);
-	wa_descr->wNumRPipes = le16_to_cpu(wa_descr->wNumRPipes);
-	wa_descr->wRPipeMaxBlock = le16_to_cpu(wa_descr->wRPipeMaxBlock);
-	if (wa_descr->bcdWAVersion > 0x0100)
+	if (le16_to_cpu(wa_descr->bcdWAVersion) > 0x0100)
 		dev_warn(dev, "Wire Adapter v%d.%d newer than groked v1.0\n",
-			 wa_descr->bcdWAVersion & 0xff00 >> 8,
-			 wa_descr->bcdWAVersion & 0x00ff);
+			 le16_to_cpu(wa_descr->bcdWAVersion) & 0xff00 >> 8,
+			 le16_to_cpu(wa_descr->bcdWAVersion) & 0x00ff);
 	result = 0;
 error:
 	return result;
diff --git a/drivers/usb/wusbcore/wa-rpipe.c b/drivers/usb/wusbcore/wa-rpipe.c
index 50de1d2c7b72..1ed068accb76 100644
--- a/drivers/usb/wusbcore/wa-rpipe.c
+++ b/drivers/usb/wusbcore/wa-rpipe.c
@@ -480,7 +480,7 @@ error:
  */
 int wa_rpipes_create(struct wahc *wa)
 {
-	wa->rpipes = wa->wa_descr->wNumRPipes;
+	wa->rpipes = le16_to_cpu(wa->wa_descr->wNumRPipes);
 	wa->rpipe_bm = kzalloc(BITS_TO_LONGS(wa->rpipes)*sizeof(unsigned long),
 			       GFP_KERNEL);
 	if (wa->rpipe_bm == NULL)
diff --git a/include/linux/usb/wusb-wa.h b/include/linux/usb/wusb-wa.h
index 9ae7e299bf77..c1257130769b 100644
--- a/include/linux/usb/wusb-wa.h
+++ b/include/linux/usb/wusb-wa.h
@@ -279,11 +279,11 @@ struct wa_xfer_result {
 struct usb_wa_descriptor {
 	u8	bLength;
 	u8	bDescriptorType;
-	u16	bcdWAVersion;
+	__le16	bcdWAVersion;
 	u8	bNumPorts;		/* don't use!! */
 	u8	bmAttributes;		/* Reserved == 0 */
-	u16	wNumRPipes;
-	u16	wRPipeMaxBlock;
+	__le16	wNumRPipes;
+	__le16	wRPipeMaxBlock;
 	u8	bRPipeBlockSize;
 	u8	bPwrOn2PwrGood;
 	u8	bNumMMCIEs;
-- 
cgit v1.2.3


From 79a9becda8940deb2274b5aa4577c86d52ee7ecb Mon Sep 17 00:00:00 2001
From: Alexandre Courbot <acourbot@nvidia.com>
Date: Thu, 17 Oct 2013 10:21:36 -0700
Subject: gpiolib: export descriptor-based GPIO interface

This patch exports the gpiod_* family of API functions, a safer
alternative to the legacy GPIO interface. Differences between the gpiod
and legacy gpio APIs are:

- gpio works with integers, whereas gpiod operates on opaque handlers
  which cannot be forged or used before proper acquisition
- gpiod get/set functions are aware of the active low state of a GPIO
- gpio consumers should now include <linux/gpio/consumer.h> to access
  the new interface, whereas chips drivers will use
  <linux/gpio/driver.h>

The legacy gpio API is now built as inline functions on top of gpiod.

Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpiolib.c        | 422 ++++++++++++++++++++++--------------------
 include/asm-generic/gpio.h    | 222 +++++++---------------
 include/linux/gpio.h          |  11 +-
 include/linux/gpio/consumer.h | 238 ++++++++++++++++++++++++
 include/linux/gpio/driver.h   | 127 +++++++++++++
 5 files changed, 658 insertions(+), 362 deletions(-)
 create mode 100644 include/linux/gpio/consumer.h
 create mode 100644 include/linux/gpio/driver.h

(limited to 'include/linux')

diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index d66139dc410d..224abdc4b095 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -16,16 +16,11 @@
 #define CREATE_TRACE_POINTS
 #include <trace/events/gpio.h>
 
-/* Optional implementation infrastructure for GPIO interfaces.
+/* Implementation infrastructure for GPIO interfaces.
  *
- * Platforms may want to use this if they tend to use very many GPIOs
- * that aren't part of a System-On-Chip core; or across I2C/SPI/etc.
- *
- * When kernel footprint or instruction count is an issue, simpler
- * implementations may be preferred.  The GPIO programming interface
- * allows for inlining speed-critical get/set operations for common
- * cases, so that access to SOC-integrated GPIOs can sometimes cost
- * only an instruction or two per bit.
+ * The GPIO programming interface allows for inlining speed-critical
+ * get/set operations for common cases, so that access to SOC-integrated
+ * GPIOs can sometimes cost only an instruction or two per bit.
  */
 
 
@@ -57,7 +52,7 @@ struct gpio_desc {
 #define FLAG_SYSFS	3	/* exported via /sys/class/gpio/control */
 #define FLAG_TRIG_FALL	4	/* trigger on falling edge */
 #define FLAG_TRIG_RISE	5	/* trigger on rising edge */
-#define FLAG_ACTIVE_LOW	6	/* sysfs value has active low */
+#define FLAG_ACTIVE_LOW	6	/* value has active low */
 #define FLAG_OPEN_DRAIN	7	/* Gpio is open drain type */
 #define FLAG_OPEN_SOURCE 8	/* Gpio is open source type */
 #define FLAG_USED_AS_IRQ 9	/* GPIO is connected to an IRQ */
@@ -81,29 +76,8 @@ static LIST_HEAD(gpio_chips);
 static DEFINE_IDR(dirent_idr);
 #endif
 
-/*
- * Internal gpiod_* API using descriptors instead of the integer namespace.
- * Most of this should eventually go public.
- */
 static int gpiod_request(struct gpio_desc *desc, const char *label);
 static void gpiod_free(struct gpio_desc *desc);
-static int gpiod_direction_input(struct gpio_desc *desc);
-static int gpiod_direction_output(struct gpio_desc *desc, int value);
-static int gpiod_get_direction(const struct gpio_desc *desc);
-static int gpiod_set_debounce(struct gpio_desc *desc, unsigned debounce);
-static int gpiod_get_value_cansleep(const struct gpio_desc *desc);
-static void gpiod_set_value_cansleep(struct gpio_desc *desc, int value);
-static int gpiod_get_value(const struct gpio_desc *desc);
-static void gpiod_set_value(struct gpio_desc *desc, int value);
-static int gpiod_cansleep(const struct gpio_desc *desc);
-static int gpiod_to_irq(const struct gpio_desc *desc);
-static int gpiod_lock_as_irq(struct gpio_desc *desc);
-static void gpiod_unlock_as_irq(struct gpio_desc *desc);
-static int gpiod_export(struct gpio_desc *desc, bool direction_may_change);
-static int gpiod_export_link(struct device *dev, const char *name,
-			     struct gpio_desc *desc);
-static int gpiod_sysfs_set_active_low(struct gpio_desc *desc, int value);
-static void gpiod_unexport(struct gpio_desc *desc);
 
 #ifdef CONFIG_DEBUG_FS
 #define gpiod_emerg(desc, fmt, ...)			                \
@@ -157,13 +131,14 @@ static int gpio_chip_hwgpio(const struct gpio_desc *desc)
 /**
  * Convert a GPIO number to its descriptor
  */
-static struct gpio_desc *gpio_to_desc(unsigned gpio)
+struct gpio_desc *gpio_to_desc(unsigned gpio)
 {
 	if (WARN(!gpio_is_valid(gpio), "invalid GPIO %d\n", gpio))
 		return NULL;
 	else
 		return &gpio_desc[gpio];
 }
+EXPORT_SYMBOL_GPL(gpio_to_desc);
 
 /**
  * Convert an offset on a certain chip to a corresponding descriptor
@@ -181,10 +156,11 @@ static struct gpio_desc *gpiochip_offset_to_desc(struct gpio_chip *chip,
  * This should disappear in the future but is needed since we still
  * use GPIO numbers for error messages and sysfs nodes
  */
-static int desc_to_gpio(const struct gpio_desc *desc)
+int desc_to_gpio(const struct gpio_desc *desc)
 {
 	return desc - &gpio_desc[0];
 }
+EXPORT_SYMBOL_GPL(desc_to_gpio);
 
 
 /* Warn when drivers omit gpio_request() calls -- legal but ill-advised
@@ -219,16 +195,15 @@ static int gpio_ensure_requested(struct gpio_desc *desc)
 	return 0;
 }
 
-static struct gpio_chip *gpiod_to_chip(const struct gpio_desc *desc)
+/**
+ * gpiod_to_chip - Return the GPIO chip to which a GPIO descriptor belongs
+ * @desc:	descriptor to return the chip of
+ */
+struct gpio_chip *gpiod_to_chip(const struct gpio_desc *desc)
 {
 	return desc ? desc->chip : NULL;
 }
-
-/* caller holds gpio_lock *OR* gpio is marked as requested */
-struct gpio_chip *gpio_to_chip(unsigned gpio)
-{
-	return gpiod_to_chip(gpio_to_desc(gpio));
-}
+EXPORT_SYMBOL_GPL(gpiod_to_chip);
 
 /* dynamic allocation of GPIOs, e.g. on a hotplugged device */
 static int gpiochip_find_base(int ngpio)
@@ -254,8 +229,15 @@ static int gpiochip_find_base(int ngpio)
 	}
 }
 
-/* caller ensures gpio is valid and requested, chip->get_direction may sleep  */
-static int gpiod_get_direction(const struct gpio_desc *desc)
+/**
+ * gpiod_get_direction - return the current direction of a GPIO
+ * @desc:	GPIO to get the direction of
+ *
+ * Return GPIOF_DIR_IN or GPIOF_DIR_OUT, or an error code in case of error.
+ *
+ * This function may sleep if gpiod_cansleep() is true.
+ */
+int gpiod_get_direction(const struct gpio_desc *desc)
 {
 	struct gpio_chip	*chip;
 	unsigned		offset;
@@ -281,6 +263,7 @@ static int gpiod_get_direction(const struct gpio_desc *desc)
 	}
 	return status;
 }
+EXPORT_SYMBOL_GPL(gpiod_get_direction);
 
 #ifdef CONFIG_GPIO_SYSFS
 
@@ -365,17 +348,10 @@ static ssize_t gpio_value_show(struct device *dev,
 
 	mutex_lock(&sysfs_lock);
 
-	if (!test_bit(FLAG_EXPORT, &desc->flags)) {
+	if (!test_bit(FLAG_EXPORT, &desc->flags))
 		status = -EIO;
-	} else {
-		int value;
-
-		value = !!gpiod_get_value_cansleep(desc);
-		if (test_bit(FLAG_ACTIVE_LOW, &desc->flags))
-			value = !value;
-
-		status = sprintf(buf, "%d\n", value);
-	}
+	else
+		status = sprintf(buf, "%d\n", gpiod_get_value_cansleep(desc));
 
 	mutex_unlock(&sysfs_lock);
 	return status;
@@ -398,9 +374,7 @@ static ssize_t gpio_value_store(struct device *dev,
 
 		status = kstrtol(buf, 0, &value);
 		if (status == 0) {
-			if (test_bit(FLAG_ACTIVE_LOW, &desc->flags))
-				value = !value;
-			gpiod_set_value_cansleep(desc, value != 0);
+			gpiod_set_value_cansleep(desc, value);
 			status = size;
 		}
 	}
@@ -790,7 +764,7 @@ static struct class gpio_class = {
 
 
 /**
- * gpio_export - export a GPIO through sysfs
+ * gpiod_export - export a GPIO through sysfs
  * @gpio: gpio to make available, already requested
  * @direction_may_change: true if userspace may change gpio direction
  * Context: arch_initcall or later
@@ -804,7 +778,7 @@ static struct class gpio_class = {
  *
  * Returns zero on success, else an error.
  */
-static int gpiod_export(struct gpio_desc *desc, bool direction_may_change)
+int gpiod_export(struct gpio_desc *desc, bool direction_may_change)
 {
 	unsigned long		flags;
 	int			status;
@@ -882,12 +856,7 @@ fail_unlock:
 		 status);
 	return status;
 }
-
-int gpio_export(unsigned gpio, bool direction_may_change)
-{
-	return gpiod_export(gpio_to_desc(gpio), direction_may_change);
-}
-EXPORT_SYMBOL_GPL(gpio_export);
+EXPORT_SYMBOL_GPL(gpiod_export);
 
 static int match_export(struct device *dev, const void *data)
 {
@@ -895,7 +864,7 @@ static int match_export(struct device *dev, const void *data)
 }
 
 /**
- * gpio_export_link - create a sysfs link to an exported GPIO node
+ * gpiod_export_link - create a sysfs link to an exported GPIO node
  * @dev: device under which to create symlink
  * @name: name of the symlink
  * @gpio: gpio to create symlink to, already exported
@@ -905,8 +874,8 @@ static int match_export(struct device *dev, const void *data)
  *
  * Returns zero on success, else an error.
  */
-static int gpiod_export_link(struct device *dev, const char *name,
-			     struct gpio_desc *desc)
+int gpiod_export_link(struct device *dev, const char *name,
+		      struct gpio_desc *desc)
 {
 	int			status = -EINVAL;
 
@@ -937,15 +906,10 @@ static int gpiod_export_link(struct device *dev, const char *name,
 
 	return status;
 }
-
-int gpio_export_link(struct device *dev, const char *name, unsigned gpio)
-{
-	return gpiod_export_link(dev, name, gpio_to_desc(gpio));
-}
-EXPORT_SYMBOL_GPL(gpio_export_link);
+EXPORT_SYMBOL_GPL(gpiod_export_link);
 
 /**
- * gpio_sysfs_set_active_low - set the polarity of gpio sysfs value
+ * gpiod_sysfs_set_active_low - set the polarity of gpio sysfs value
  * @gpio: gpio to change
  * @value: non-zero to use active low, i.e. inverted values
  *
@@ -956,7 +920,7 @@ EXPORT_SYMBOL_GPL(gpio_export_link);
  *
  * Returns zero on success, else an error.
  */
-static int gpiod_sysfs_set_active_low(struct gpio_desc *desc, int value)
+int gpiod_sysfs_set_active_low(struct gpio_desc *desc, int value)
 {
 	struct device		*dev = NULL;
 	int			status = -EINVAL;
@@ -987,20 +951,15 @@ unlock:
 
 	return status;
 }
-
-int gpio_sysfs_set_active_low(unsigned gpio, int value)
-{
-	return gpiod_sysfs_set_active_low(gpio_to_desc(gpio), value);
-}
-EXPORT_SYMBOL_GPL(gpio_sysfs_set_active_low);
+EXPORT_SYMBOL_GPL(gpiod_sysfs_set_active_low);
 
 /**
- * gpio_unexport - reverse effect of gpio_export()
+ * gpiod_unexport - reverse effect of gpio_export()
  * @gpio: gpio to make unavailable
  *
  * This is implicit on gpio_free().
  */
-static void gpiod_unexport(struct gpio_desc *desc)
+void gpiod_unexport(struct gpio_desc *desc)
 {
 	int			status = 0;
 	struct device		*dev = NULL;
@@ -1033,12 +992,7 @@ static void gpiod_unexport(struct gpio_desc *desc)
 		pr_debug("%s: gpio%d status %d\n", __func__, desc_to_gpio(desc),
 			 status);
 }
-
-void gpio_unexport(unsigned gpio)
-{
-	gpiod_unexport(gpio_to_desc(gpio));
-}
-EXPORT_SYMBOL_GPL(gpio_unexport);
+EXPORT_SYMBOL_GPL(gpiod_unexport);
 
 static int gpiochip_export(struct gpio_chip *chip)
 {
@@ -1145,27 +1099,6 @@ static inline void gpiochip_unexport(struct gpio_chip *chip)
 {
 }
 
-static inline int gpiod_export(struct gpio_desc *desc,
-			       bool direction_may_change)
-{
-	return -ENOSYS;
-}
-
-static inline int gpiod_export_link(struct device *dev, const char *name,
-				    struct gpio_desc *desc)
-{
-	return -ENOSYS;
-}
-
-static inline int gpiod_sysfs_set_active_low(struct gpio_desc *desc, int value)
-{
-	return -ENOSYS;
-}
-
-static inline void gpiod_unexport(struct gpio_desc *desc)
-{
-}
-
 #endif /* CONFIG_GPIO_SYSFS */
 
 /*
@@ -1677,7 +1610,16 @@ EXPORT_SYMBOL_GPL(gpiochip_is_requested);
  * rely on gpio_request() having been called beforehand.
  */
 
-static int gpiod_direction_input(struct gpio_desc *desc)
+/**
+ * gpiod_direction_input - set the GPIO direction to input
+ * @desc:	GPIO to set to input
+ *
+ * Set the direction of the passed GPIO to input, such as gpiod_get_value() can
+ * be called safely on it.
+ *
+ * Return 0 in case of success, else an error code.
+ */
+int gpiod_direction_input(struct gpio_desc *desc)
 {
 	unsigned long		flags;
 	struct gpio_chip	*chip;
@@ -1734,14 +1676,19 @@ fail:
 		gpiod_dbg(desc, "%s status %d\n", __func__, status);
 	return status;
 }
+EXPORT_SYMBOL_GPL(gpiod_direction_input);
 
-int gpio_direction_input(unsigned gpio)
-{
-	return gpiod_direction_input(gpio_to_desc(gpio));
-}
-EXPORT_SYMBOL_GPL(gpio_direction_input);
-
-static int gpiod_direction_output(struct gpio_desc *desc, int value)
+/**
+ * gpiod_direction_output - set the GPIO direction to input
+ * @desc:	GPIO to set to output
+ * @value:	initial output value of the GPIO
+ *
+ * Set the direction of the passed GPIO to output, such as gpiod_set_value() can
+ * be called safely on it. The initial value of the output must be specified.
+ *
+ * Return 0 in case of success, else an error code.
+ */
+int gpiod_direction_output(struct gpio_desc *desc, int value)
 {
 	unsigned long		flags;
 	struct gpio_chip	*chip;
@@ -1814,22 +1761,17 @@ fail:
 		gpiod_dbg(desc, "%s: gpio status %d\n", __func__, status);
 	return status;
 }
-
-int gpio_direction_output(unsigned gpio, int value)
-{
-	return gpiod_direction_output(gpio_to_desc(gpio), value);
-}
-EXPORT_SYMBOL_GPL(gpio_direction_output);
+EXPORT_SYMBOL_GPL(gpiod_direction_output);
 
 /**
- * gpio_set_debounce - sets @debounce time for a @gpio
+ * gpiod_set_debounce - sets @debounce time for a @gpio
  * @gpio: the gpio to set debounce time
  * @debounce: debounce time is microseconds
  *
  * returns -ENOTSUPP if the controller does not support setting
  * debounce.
  */
-static int gpiod_set_debounce(struct gpio_desc *desc, unsigned debounce)
+int gpiod_set_debounce(struct gpio_desc *desc, unsigned debounce)
 {
 	unsigned long		flags;
 	struct gpio_chip	*chip;
@@ -1871,12 +1813,19 @@ fail:
 
 	return status;
 }
+EXPORT_SYMBOL_GPL(gpiod_set_debounce);
 
-int gpio_set_debounce(unsigned gpio, unsigned debounce)
+/**
+ * gpiod_is_active_low - test whether a GPIO is active-low or not
+ * @desc: the gpio descriptor to test
+ *
+ * Returns 1 if the GPIO is active-low, 0 otherwise.
+ */
+int gpiod_is_active_low(const struct gpio_desc *desc)
 {
-	return gpiod_set_debounce(gpio_to_desc(gpio), debounce);
+	return test_bit(FLAG_ACTIVE_LOW, &desc->flags);
 }
-EXPORT_SYMBOL_GPL(gpio_set_debounce);
+EXPORT_SYMBOL_GPL(gpiod_is_active_low);
 
 /* I/O calls are only valid after configuration completed; the relevant
  * "is this a valid GPIO" error checks should already have been done.
@@ -1900,7 +1849,7 @@ EXPORT_SYMBOL_GPL(gpio_set_debounce);
  * that the GPIO was actually requested.
  */
 
-static int _gpiod_get_value(const struct gpio_desc *desc)
+static int _gpiod_get_raw_value(const struct gpio_desc *desc)
 {
 	struct gpio_chip	*chip;
 	int value;
@@ -1914,33 +1863,54 @@ static int _gpiod_get_value(const struct gpio_desc *desc)
 }
 
 /**
- * __gpio_get_value() - return a gpio's value
- * @gpio: gpio whose value will be returned
- * Context: any
+ * gpiod_get_raw_value() - return a gpio's raw value
+ * @desc: gpio whose value will be returned
  *
- * This is used directly or indirectly to implement gpio_get_value().
- * It returns the zero or nonzero value provided by the associated
- * gpio_chip.get() method; or zero if no such method is provided.
+ * Return the GPIO's raw value, i.e. the value of the physical line disregarding
+ * its ACTIVE_LOW status.
+ *
+ * This function should be called from contexts where we cannot sleep, and will
+ * complain if the GPIO chip functions potentially sleep.
  */
-static int gpiod_get_value(const struct gpio_desc *desc)
+int gpiod_get_raw_value(const struct gpio_desc *desc)
 {
 	if (!desc)
 		return 0;
 	/* Should be using gpio_get_value_cansleep() */
 	WARN_ON(desc->chip->can_sleep);
-	return _gpiod_get_value(desc);
+	return _gpiod_get_raw_value(desc);
 }
+EXPORT_SYMBOL_GPL(gpiod_get_raw_value);
 
-int __gpio_get_value(unsigned gpio)
+/**
+ * gpiod_get_value() - return a gpio's value
+ * @desc: gpio whose value will be returned
+ *
+ * Return the GPIO's logical value, i.e. taking the ACTIVE_LOW status into
+ * account.
+ *
+ * This function should be called from contexts where we cannot sleep, and will
+ * complain if the GPIO chip functions potentially sleep.
+ */
+int gpiod_get_value(const struct gpio_desc *desc)
 {
-	return gpiod_get_value(gpio_to_desc(gpio));
+	int value;
+	if (!desc)
+		return 0;
+	/* Should be using gpio_get_value_cansleep() */
+	WARN_ON(desc->chip->can_sleep);
+
+	value = _gpiod_get_raw_value(desc);
+	if (test_bit(FLAG_ACTIVE_LOW, &desc->flags))
+		value = !value;
+
+	return value;
 }
-EXPORT_SYMBOL_GPL(__gpio_get_value);
+EXPORT_SYMBOL_GPL(gpiod_get_value);
 
 /*
  *  _gpio_set_open_drain_value() - Set the open drain gpio's value.
- * @gpio: Gpio whose state need to be set.
- * @chip: Gpio chip.
+ * @desc: gpio descriptor whose state need to be set.
  * @value: Non-zero for setting it HIGH otherise it will set to LOW.
  */
 static void _gpio_set_open_drain_value(struct gpio_desc *desc, int value)
@@ -1966,9 +1936,8 @@ static void _gpio_set_open_drain_value(struct gpio_desc *desc, int value)
 }
 
 /*
- *  _gpio_set_open_source() - Set the open source gpio's value.
- * @gpio: Gpio whose state need to be set.
- * @chip: Gpio chip.
+ *  _gpio_set_open_source_value() - Set the open source gpio's value.
+ * @desc: gpio descriptor whose state need to be set.
  * @value: Non-zero for setting it HIGH otherise it will set to LOW.
  */
 static void _gpio_set_open_source_value(struct gpio_desc *desc, int value)
@@ -1993,7 +1962,7 @@ static void _gpio_set_open_source_value(struct gpio_desc *desc, int value)
 			  __func__, err);
 }
 
-static void _gpiod_set_value(struct gpio_desc *desc, int value)
+static void _gpiod_set_raw_value(struct gpio_desc *desc, int value)
 {
 	struct gpio_chip	*chip;
 
@@ -2008,62 +1977,70 @@ static void _gpiod_set_value(struct gpio_desc *desc, int value)
 }
 
 /**
- * __gpio_set_value() - assign a gpio's value
- * @gpio: gpio whose value will be assigned
+ * gpiod_set_raw_value() - assign a gpio's raw value
+ * @desc: gpio whose value will be assigned
  * @value: value to assign
- * Context: any
  *
- * This is used directly or indirectly to implement gpio_set_value().
- * It invokes the associated gpio_chip.set() method.
+ * Set the raw value of the GPIO, i.e. the value of its physical line without
+ * regard for its ACTIVE_LOW status.
+ *
+ * This function should be called from contexts where we cannot sleep, and will
+ * complain if the GPIO chip functions potentially sleep.
  */
-static void gpiod_set_value(struct gpio_desc *desc, int value)
+void gpiod_set_raw_value(struct gpio_desc *desc, int value)
 {
-
 	if (!desc)
 		return;
 	/* Should be using gpio_set_value_cansleep() */
 	WARN_ON(desc->chip->can_sleep);
-	_gpiod_set_value(desc, value);
+	_gpiod_set_raw_value(desc, value);
 }
+EXPORT_SYMBOL_GPL(gpiod_set_raw_value);
 
-void __gpio_set_value(unsigned gpio, int value)
+/**
+ * gpiod_set_value() - assign a gpio's value
+ * @desc: gpio whose value will be assigned
+ * @value: value to assign
+ *
+ * Set the logical value of the GPIO, i.e. taking its ACTIVE_LOW status into
+ * account
+ *
+ * This function should be called from contexts where we cannot sleep, and will
+ * complain if the GPIO chip functions potentially sleep.
+ */
+void gpiod_set_value(struct gpio_desc *desc, int value)
 {
-	return gpiod_set_value(gpio_to_desc(gpio), value);
+	if (!desc)
+		return;
+	/* Should be using gpio_set_value_cansleep() */
+	WARN_ON(desc->chip->can_sleep);
+	if (test_bit(FLAG_ACTIVE_LOW, &desc->flags))
+		value = !value;
+	_gpiod_set_raw_value(desc, value);
 }
-EXPORT_SYMBOL_GPL(__gpio_set_value);
+EXPORT_SYMBOL_GPL(gpiod_set_value);
 
 /**
- * __gpio_cansleep() - report whether gpio value access will sleep
- * @gpio: gpio in question
- * Context: any
+ * gpiod_cansleep() - report whether gpio value access may sleep
+ * @desc: gpio to check
  *
- * This is used directly or indirectly to implement gpio_cansleep().  It
- * returns nonzero if access reading or writing the GPIO value can sleep.
  */
-static int gpiod_cansleep(const struct gpio_desc *desc)
+int gpiod_cansleep(const struct gpio_desc *desc)
 {
 	if (!desc)
 		return 0;
-	/* only call this on GPIOs that are valid! */
 	return desc->chip->can_sleep;
 }
-
-int __gpio_cansleep(unsigned gpio)
-{
-	return gpiod_cansleep(gpio_to_desc(gpio));
-}
-EXPORT_SYMBOL_GPL(__gpio_cansleep);
+EXPORT_SYMBOL_GPL(gpiod_cansleep);
 
 /**
- * __gpio_to_irq() - return the IRQ corresponding to a GPIO
- * @gpio: gpio whose IRQ will be returned (already requested)
- * Context: any
+ * gpiod_to_irq() - return the IRQ corresponding to a GPIO
+ * @desc: gpio whose IRQ will be returned (already requested)
  *
- * This is used directly or indirectly to implement gpio_to_irq().
- * It returns the number of the IRQ signaled by this (input) GPIO,
- * or a negative errno.
+ * Return the IRQ corresponding to the passed GPIO, or an error code in case of
+ * error.
  */
-static int gpiod_to_irq(const struct gpio_desc *desc)
+int gpiod_to_irq(const struct gpio_desc *desc)
 {
 	struct gpio_chip	*chip;
 	int			offset;
@@ -2074,12 +2051,7 @@ static int gpiod_to_irq(const struct gpio_desc *desc)
 	offset = gpio_chip_hwgpio(desc);
 	return chip->to_irq ? chip->to_irq(chip, offset) : -ENXIO;
 }
-
-int __gpio_to_irq(unsigned gpio)
-{
-	return gpiod_to_irq(gpio_to_desc(gpio));
-}
-EXPORT_SYMBOL_GPL(__gpio_to_irq);
+EXPORT_SYMBOL_GPL(gpiod_to_irq);
 
 /**
  * gpiod_lock_as_irq() - lock a GPIO to be used as IRQ
@@ -2091,7 +2063,7 @@ EXPORT_SYMBOL_GPL(__gpio_to_irq);
  * of its irq_chip implementation if the GPIO is known from that
  * code.
  */
-static int gpiod_lock_as_irq(struct gpio_desc *desc)
+int gpiod_lock_as_irq(struct gpio_desc *desc)
 {
 	if (!desc)
 		return -EINVAL;
@@ -2106,6 +2078,7 @@ static int gpiod_lock_as_irq(struct gpio_desc *desc)
 	set_bit(FLAG_USED_AS_IRQ, &desc->flags);
 	return 0;
 }
+EXPORT_SYMBOL_GPL(gpiod_lock_as_irq);
 
 int gpio_lock_as_irq(struct gpio_chip *chip, unsigned int offset)
 {
@@ -2120,13 +2093,14 @@ EXPORT_SYMBOL_GPL(gpio_lock_as_irq);
  * This is used directly by GPIO drivers that want to indicate
  * that a certain GPIO is no longer used exclusively for IRQ.
  */
-static void gpiod_unlock_as_irq(struct gpio_desc *desc)
+void gpiod_unlock_as_irq(struct gpio_desc *desc)
 {
 	if (!desc)
 		return;
 
 	clear_bit(FLAG_USED_AS_IRQ, &desc->flags);
 }
+EXPORT_SYMBOL_GPL(gpiod_unlock_as_irq);
 
 void gpio_unlock_as_irq(struct gpio_chip *chip, unsigned int offset)
 {
@@ -2134,37 +2108,89 @@ void gpio_unlock_as_irq(struct gpio_chip *chip, unsigned int offset)
 }
 EXPORT_SYMBOL_GPL(gpio_unlock_as_irq);
 
-/* There's no value in making it easy to inline GPIO calls that may sleep.
- * Common examples include ones connected to I2C or SPI chips.
+/**
+ * gpiod_get_raw_value_cansleep() - return a gpio's raw value
+ * @desc: gpio whose value will be returned
+ *
+ * Return the GPIO's raw value, i.e. the value of the physical line disregarding
+ * its ACTIVE_LOW status.
+ *
+ * This function is to be called from contexts that can sleep.
  */
-
-static int gpiod_get_value_cansleep(const struct gpio_desc *desc)
+int gpiod_get_raw_value_cansleep(const struct gpio_desc *desc)
 {
 	might_sleep_if(extra_checks);
 	if (!desc)
 		return 0;
-	return _gpiod_get_value(desc);
+	return _gpiod_get_raw_value(desc);
 }
+EXPORT_SYMBOL_GPL(gpiod_get_raw_value_cansleep);
 
-int gpio_get_value_cansleep(unsigned gpio)
+/**
+ * gpiod_get_value_cansleep() - return a gpio's value
+ * @desc: gpio whose value will be returned
+ *
+ * Return the GPIO's logical value, i.e. taking the ACTIVE_LOW status into
+ * account.
+ *
+ * This function is to be called from contexts that can sleep.
+ */
+int gpiod_get_value_cansleep(const struct gpio_desc *desc)
 {
-	return gpiod_get_value_cansleep(gpio_to_desc(gpio));
+	int value;
+
+	might_sleep_if(extra_checks);
+	if (!desc)
+		return 0;
+
+	value = _gpiod_get_raw_value(desc);
+	if (test_bit(FLAG_ACTIVE_LOW, &desc->flags))
+		value = !value;
+
+	return value;
 }
-EXPORT_SYMBOL_GPL(gpio_get_value_cansleep);
+EXPORT_SYMBOL_GPL(gpiod_get_value_cansleep);
 
-static void gpiod_set_value_cansleep(struct gpio_desc *desc, int value)
+/**
+ * gpiod_set_raw_value_cansleep() - assign a gpio's raw value
+ * @desc: gpio whose value will be assigned
+ * @value: value to assign
+ *
+ * Set the raw value of the GPIO, i.e. the value of its physical line without
+ * regard for its ACTIVE_LOW status.
+ *
+ * This function is to be called from contexts that can sleep.
+ */
+void gpiod_set_raw_value_cansleep(struct gpio_desc *desc, int value)
 {
 	might_sleep_if(extra_checks);
 	if (!desc)
 		return;
-	_gpiod_set_value(desc, value);
+	_gpiod_set_raw_value(desc, value);
 }
+EXPORT_SYMBOL_GPL(gpiod_set_raw_value_cansleep);
 
-void gpio_set_value_cansleep(unsigned gpio, int value)
+/**
+ * gpiod_set_value_cansleep() - assign a gpio's value
+ * @desc: gpio whose value will be assigned
+ * @value: value to assign
+ *
+ * Set the logical value of the GPIO, i.e. taking its ACTIVE_LOW status into
+ * account
+ *
+ * This function is to be called from contexts that can sleep.
+ */
+void gpiod_set_value_cansleep(struct gpio_desc *desc, int value)
 {
-	return gpiod_set_value_cansleep(gpio_to_desc(gpio), value);
+	might_sleep_if(extra_checks);
+	if (!desc)
+		return;
+
+	if (test_bit(FLAG_ACTIVE_LOW, &desc->flags))
+		value = !value;
+	_gpiod_set_raw_value(desc, value);
 }
-EXPORT_SYMBOL_GPL(gpio_set_value_cansleep);
+EXPORT_SYMBOL_GPL(gpiod_set_value_cansleep);
 
 #ifdef CONFIG_DEBUG_FS
 
diff --git a/include/asm-generic/gpio.h b/include/asm-generic/gpio.h
index b309a5c0019e..00f8f0a9edcd 100644
--- a/include/asm-generic/gpio.h
+++ b/include/asm-generic/gpio.h
@@ -10,6 +10,8 @@
 #ifdef CONFIG_GPIOLIB
 
 #include <linux/compiler.h>
+#include <linux/gpio/driver.h>
+#include <linux/gpio/consumer.h>
 
 /* Platforms may implement their GPIO interface with library code,
  * at a small performance cost for non-inlined operations and some
@@ -49,122 +51,11 @@ struct module;
 struct device_node;
 struct gpio_desc;
 
-/**
- * struct gpio_chip - abstract a GPIO controller
- * @label: for diagnostics
- * @dev: optional device providing the GPIOs
- * @owner: helps prevent removal of modules exporting active GPIOs
- * @list: links gpio_chips together for traversal
- * @request: optional hook for chip-specific activation, such as
- *	enabling module power and clock; may sleep
- * @free: optional hook for chip-specific deactivation, such as
- *	disabling module power and clock; may sleep
- * @get_direction: returns direction for signal "offset", 0=out, 1=in,
- *	(same as GPIOF_DIR_XXX), or negative error
- * @direction_input: configures signal "offset" as input, or returns error
- * @get: returns value for signal "offset"; for output signals this
- *	returns either the value actually sensed, or zero
- * @direction_output: configures signal "offset" as output, or returns error
- * @set_debounce: optional hook for setting debounce time for specified gpio in
- *      interrupt triggered gpio chips
- * @set: assigns output value for signal "offset"
- * @to_irq: optional hook supporting non-static gpio_to_irq() mappings;
- *	implementation may not sleep
- * @dbg_show: optional routine to show contents in debugfs; default code
- *	will be used when this is omitted, but custom code can show extra
- *	state (such as pullup/pulldown configuration).
- * @base: identifies the first GPIO number handled by this chip; or, if
- *	negative during registration, requests dynamic ID allocation.
- * @ngpio: the number of GPIOs handled by this controller; the last GPIO
- *	handled is (base + ngpio - 1).
- * @desc: array of ngpio descriptors. Private.
- * @can_sleep: flag must be set iff get()/set() methods sleep, as they
- *	must while accessing GPIO expander chips over I2C or SPI
- * @names: if set, must be an array of strings to use as alternative
- *      names for the GPIOs in this chip. Any entry in the array
- *      may be NULL if there is no alias for the GPIO, however the
- *      array must be @ngpio entries long.  A name can include a single printk
- *      format specifier for an unsigned int.  It is substituted by the actual
- *      number of the gpio.
- *
- * A gpio_chip can help platforms abstract various sources of GPIOs so
- * they can all be accessed through a common programing interface.
- * Example sources would be SOC controllers, FPGAs, multifunction
- * chips, dedicated GPIO expanders, and so on.
- *
- * Each chip controls a number of signals, identified in method calls
- * by "offset" values in the range 0..(@ngpio - 1).  When those signals
- * are referenced through calls like gpio_get_value(gpio), the offset
- * is calculated by subtracting @base from the gpio number.
- */
-struct gpio_chip {
-	const char		*label;
-	struct device		*dev;
-	struct module		*owner;
-	struct list_head        list;
-
-	int			(*request)(struct gpio_chip *chip,
-						unsigned offset);
-	void			(*free)(struct gpio_chip *chip,
-						unsigned offset);
-	int			(*get_direction)(struct gpio_chip *chip,
-						unsigned offset);
-	int			(*direction_input)(struct gpio_chip *chip,
-						unsigned offset);
-	int			(*get)(struct gpio_chip *chip,
-						unsigned offset);
-	int			(*direction_output)(struct gpio_chip *chip,
-						unsigned offset, int value);
-	int			(*set_debounce)(struct gpio_chip *chip,
-						unsigned offset, unsigned debounce);
-
-	void			(*set)(struct gpio_chip *chip,
-						unsigned offset, int value);
-
-	int			(*to_irq)(struct gpio_chip *chip,
-						unsigned offset);
-
-	void			(*dbg_show)(struct seq_file *s,
-						struct gpio_chip *chip);
-	int			base;
-	u16			ngpio;
-	struct gpio_desc	*desc;
-	const char		*const *names;
-	unsigned		can_sleep:1;
-	unsigned		exported:1;
-
-#if defined(CONFIG_OF_GPIO)
-	/*
-	 * If CONFIG_OF is enabled, then all GPIO controllers described in the
-	 * device tree automatically may have an OF translation
-	 */
-	struct device_node *of_node;
-	int of_gpio_n_cells;
-	int (*of_xlate)(struct gpio_chip *gc,
-		        const struct of_phandle_args *gpiospec, u32 *flags);
-#endif
-#ifdef CONFIG_PINCTRL
-	/*
-	 * If CONFIG_PINCTRL is enabled, then gpio controllers can optionally
-	 * describe the actual pin range which they serve in an SoC. This
-	 * information would be used by pinctrl subsystem to configure
-	 * corresponding pins for gpio usage.
-	 */
-	struct list_head pin_ranges;
-#endif
-};
-
-extern const char *gpiochip_is_requested(struct gpio_chip *chip,
-			unsigned offset);
-extern struct gpio_chip *gpio_to_chip(unsigned gpio);
-
-/* add/remove chips */
-extern int gpiochip_add(struct gpio_chip *chip);
-extern int __must_check gpiochip_remove(struct gpio_chip *chip);
-extern struct gpio_chip *gpiochip_find(void *data,
-					int (*match)(struct gpio_chip *chip,
-						     void *data));
-
+/* caller holds gpio_lock *OR* gpio is marked as requested */
+static inline struct gpio_chip *gpio_to_chip(unsigned gpio)
+{
+	return gpiod_to_chip(gpio_to_desc(gpio));
+}
 
 /* Always use the library code for GPIO management calls,
  * or when sleeping may be involved.
@@ -172,25 +63,52 @@ extern struct gpio_chip *gpiochip_find(void *data,
 extern int gpio_request(unsigned gpio, const char *label);
 extern void gpio_free(unsigned gpio);
 
-extern int gpio_direction_input(unsigned gpio);
-extern int gpio_direction_output(unsigned gpio, int value);
+static inline int gpio_direction_input(unsigned gpio)
+{
+	return gpiod_direction_input(gpio_to_desc(gpio));
+}
+static inline int gpio_direction_output(unsigned gpio, int value)
+{
+	return gpiod_direction_output(gpio_to_desc(gpio), value);
+}
 
-extern int gpio_set_debounce(unsigned gpio, unsigned debounce);
+static inline int gpio_set_debounce(unsigned gpio, unsigned debounce)
+{
+	return gpiod_set_debounce(gpio_to_desc(gpio), debounce);
+}
 
-extern int gpio_get_value_cansleep(unsigned gpio);
-extern void gpio_set_value_cansleep(unsigned gpio, int value);
+static inline int gpio_get_value_cansleep(unsigned gpio)
+{
+	return gpiod_get_raw_value_cansleep(gpio_to_desc(gpio));
+}
+static inline void gpio_set_value_cansleep(unsigned gpio, int value)
+{
+	return gpiod_set_raw_value_cansleep(gpio_to_desc(gpio), value);
+}
 
 
 /* A platform's <asm/gpio.h> code may want to inline the I/O calls when
  * the GPIO is constant and refers to some always-present controller,
  * giving direct access to chip registers and tight bitbanging loops.
  */
-extern int __gpio_get_value(unsigned gpio);
-extern void __gpio_set_value(unsigned gpio, int value);
+static inline int __gpio_get_value(unsigned gpio)
+{
+	return gpiod_get_raw_value(gpio_to_desc(gpio));
+}
+static inline void __gpio_set_value(unsigned gpio, int value)
+{
+	return gpiod_set_raw_value(gpio_to_desc(gpio), value);
+}
 
-extern int __gpio_cansleep(unsigned gpio);
+static inline int __gpio_cansleep(unsigned gpio)
+{
+	return gpiod_cansleep(gpio_to_desc(gpio));
+}
 
-extern int __gpio_to_irq(unsigned gpio);
+static inline int __gpio_to_irq(unsigned gpio)
+{
+	return gpiod_to_irq(gpio_to_desc(gpio));
+}
 
 extern int gpio_lock_as_irq(struct gpio_chip *chip, unsigned int offset);
 extern void gpio_unlock_as_irq(struct gpio_chip *chip, unsigned int offset);
@@ -199,19 +117,30 @@ extern int gpio_request_one(unsigned gpio, unsigned long flags, const char *labe
 extern int gpio_request_array(const struct gpio *array, size_t num);
 extern void gpio_free_array(const struct gpio *array, size_t num);
 
-#ifdef CONFIG_GPIO_SYSFS
-
 /*
  * A sysfs interface can be exported by individual drivers if they want,
  * but more typically is configured entirely from userspace.
  */
-extern int gpio_export(unsigned gpio, bool direction_may_change);
-extern int gpio_export_link(struct device *dev, const char *name,
-			unsigned gpio);
-extern int gpio_sysfs_set_active_low(unsigned gpio, int value);
-extern void gpio_unexport(unsigned gpio);
+static inline int gpio_export(unsigned gpio, bool direction_may_change)
+{
+	return gpiod_export(gpio_to_desc(gpio), direction_may_change);
+}
 
-#endif	/* CONFIG_GPIO_SYSFS */
+static inline int gpio_export_link(struct device *dev, const char *name,
+				   unsigned gpio)
+{
+	return gpiod_export_link(dev, name, gpio_to_desc(gpio));
+}
+
+static inline int gpio_sysfs_set_active_low(unsigned gpio, int value)
+{
+	return gpiod_sysfs_set_active_low(gpio_to_desc(gpio), value);
+}
+
+static inline void gpio_unexport(unsigned gpio)
+{
+	gpiod_unexport(gpio_to_desc(gpio));
+}
 
 #ifdef CONFIG_PINCTRL
 
@@ -281,31 +210,4 @@ static inline void gpio_set_value_cansleep(unsigned gpio, int value)
 
 #endif /* !CONFIG_GPIOLIB */
 
-#ifndef CONFIG_GPIO_SYSFS
-
-struct device;
-
-/* sysfs support is only available with gpiolib, where it's optional */
-
-static inline int gpio_export(unsigned gpio, bool direction_may_change)
-{
-	return -ENOSYS;
-}
-
-static inline int gpio_export_link(struct device *dev, const char *name,
-				unsigned gpio)
-{
-	return -ENOSYS;
-}
-
-static inline int gpio_sysfs_set_active_low(unsigned gpio, int value)
-{
-	return -ENOSYS;
-}
-
-static inline void gpio_unexport(unsigned gpio)
-{
-}
-#endif	/* CONFIG_GPIO_SYSFS */
-
 #endif /* _ASM_GENERIC_GPIO_H */
diff --git a/include/linux/gpio.h b/include/linux/gpio.h
index a06ec3e85ba3..c691df044458 100644
--- a/include/linux/gpio.h
+++ b/include/linux/gpio.h
@@ -16,14 +16,17 @@
 #define GPIOF_OUT_INIT_LOW	(GPIOF_DIR_OUT | GPIOF_INIT_LOW)
 #define GPIOF_OUT_INIT_HIGH	(GPIOF_DIR_OUT | GPIOF_INIT_HIGH)
 
+/* Gpio pin is active-low */
+#define GPIOF_ACTIVE_LOW        (1 << 2)
+
 /* Gpio pin is open drain */
-#define GPIOF_OPEN_DRAIN	(1 << 2)
+#define GPIOF_OPEN_DRAIN	(1 << 3)
 
 /* Gpio pin is open source */
-#define GPIOF_OPEN_SOURCE	(1 << 3)
+#define GPIOF_OPEN_SOURCE	(1 << 4)
 
-#define GPIOF_EXPORT		(1 << 4)
-#define GPIOF_EXPORT_CHANGEABLE	(1 << 5)
+#define GPIOF_EXPORT		(1 << 5)
+#define GPIOF_EXPORT_CHANGEABLE	(1 << 6)
 #define GPIOF_EXPORT_DIR_FIXED	(GPIOF_EXPORT)
 #define GPIOF_EXPORT_DIR_CHANGEABLE (GPIOF_EXPORT | GPIOF_EXPORT_CHANGEABLE)
 
diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h
new file mode 100644
index 000000000000..2088eb50421c
--- /dev/null
+++ b/include/linux/gpio/consumer.h
@@ -0,0 +1,238 @@
+#ifndef __LINUX_GPIO_CONSUMER_H
+#define __LINUX_GPIO_CONSUMER_H
+
+#include <linux/err.h>
+#include <linux/kernel.h>
+
+#ifdef CONFIG_GPIOLIB
+
+struct device;
+struct gpio_chip;
+
+/**
+ * Opaque descriptor for a GPIO. These are obtained using gpiod_get() and are
+ * preferable to the old integer-based handles.
+ *
+ * Contrary to integers, a pointer to a gpio_desc is guaranteed to be valid
+ * until the GPIO is released.
+ */
+struct gpio_desc;
+
+int gpiod_get_direction(const struct gpio_desc *desc);
+int gpiod_direction_input(struct gpio_desc *desc);
+int gpiod_direction_output(struct gpio_desc *desc, int value);
+
+/* Value get/set from non-sleeping context */
+int gpiod_get_value(const struct gpio_desc *desc);
+void gpiod_set_value(struct gpio_desc *desc, int value);
+int gpiod_get_raw_value(const struct gpio_desc *desc);
+void gpiod_set_raw_value(struct gpio_desc *desc, int value);
+
+/* Value get/set from sleeping context */
+int gpiod_get_value_cansleep(const struct gpio_desc *desc);
+void gpiod_set_value_cansleep(struct gpio_desc *desc, int value);
+int gpiod_get_raw_value_cansleep(const struct gpio_desc *desc);
+void gpiod_set_raw_value_cansleep(struct gpio_desc *desc, int value);
+
+int gpiod_set_debounce(struct gpio_desc *desc, unsigned debounce);
+
+int gpiod_is_active_low(const struct gpio_desc *desc);
+int gpiod_cansleep(const struct gpio_desc *desc);
+
+int gpiod_to_irq(const struct gpio_desc *desc);
+
+/* Convert between the old gpio_ and new gpiod_ interfaces */
+struct gpio_desc *gpio_to_desc(unsigned gpio);
+int desc_to_gpio(const struct gpio_desc *desc);
+struct gpio_chip *gpiod_to_chip(const struct gpio_desc *desc);
+
+#else /* CONFIG_GPIOLIB */
+
+static inline struct gpio_desc *__must_check gpiod_get(struct device *dev,
+						       const char *con_id)
+{
+	return ERR_PTR(-ENOSYS);
+}
+static inline struct gpio_desc *__must_check gpiod_get_index(struct device *dev,
+							     const char *con_id,
+							     unsigned int idx)
+{
+	return ERR_PTR(-ENOSYS);
+}
+static inline void gpiod_put(struct gpio_desc *desc)
+{
+	might_sleep();
+
+	/* GPIO can never have been requested */
+	WARN_ON(1);
+}
+
+static inline struct gpio_desc *__must_check devm_gpiod_get(struct device *dev,
+							    const char *con_id)
+{
+	return ERR_PTR(-ENOSYS);
+}
+static inline
+struct gpio_desc *__must_check devm_gpiod_get_index(struct device *dev,
+						    const char *con_id,
+						    unsigned int idx)
+{
+	return ERR_PTR(-ENOSYS);
+}
+static inline void devm_gpiod_put(struct device *dev, struct gpio_desc *desc)
+{
+	might_sleep();
+
+	/* GPIO can never have been requested */
+	WARN_ON(1);
+}
+
+
+static inline int gpiod_get_direction(const struct gpio_desc *desc)
+{
+	/* GPIO can never have been requested */
+	WARN_ON(1);
+	return -ENOSYS;
+}
+static inline int gpiod_direction_input(struct gpio_desc *desc)
+{
+	/* GPIO can never have been requested */
+	WARN_ON(1);
+	return -ENOSYS;
+}
+static inline int gpiod_direction_output(struct gpio_desc *desc, int value)
+{
+	/* GPIO can never have been requested */
+	WARN_ON(1);
+	return -ENOSYS;
+}
+
+
+static inline int gpiod_get_value(const struct gpio_desc *desc)
+{
+	/* GPIO can never have been requested */
+	WARN_ON(1);
+	return 0;
+}
+static inline void gpiod_set_value(struct gpio_desc *desc, int value)
+{
+	/* GPIO can never have been requested */
+	WARN_ON(1);
+}
+static inline int gpiod_get_raw_value(const struct gpio_desc *desc)
+{
+	/* GPIO can never have been requested */
+	WARN_ON(1);
+	return 0;
+}
+static inline void gpiod_set_raw_value(struct gpio_desc *desc, int value)
+{
+	/* GPIO can never have been requested */
+	WARN_ON(1);
+}
+
+static inline int gpiod_get_value_cansleep(const struct gpio_desc *desc)
+{
+	/* GPIO can never have been requested */
+	WARN_ON(1);
+	return 0;
+}
+static inline void gpiod_set_value_cansleep(struct gpio_desc *desc, int value)
+{
+	/* GPIO can never have been requested */
+	WARN_ON(1);
+}
+static inline int gpiod_get_raw_value_cansleep(const struct gpio_desc *desc)
+{
+	/* GPIO can never have been requested */
+	WARN_ON(1);
+	return 0;
+}
+static inline void gpiod_set_raw_value_cansleep(struct gpio_desc *desc,
+						int value)
+{
+	/* GPIO can never have been requested */
+	WARN_ON(1);
+}
+
+static inline int gpiod_set_debounce(struct gpio_desc *desc, unsigned debounce)
+{
+	/* GPIO can never have been requested */
+	WARN_ON(1);
+	return -ENOSYS;
+}
+
+static inline int gpiod_is_active_low(const struct gpio_desc *desc)
+{
+	/* GPIO can never have been requested */
+	WARN_ON(1);
+	return 0;
+}
+static inline int gpiod_cansleep(const struct gpio_desc *desc)
+{
+	/* GPIO can never have been requested */
+	WARN_ON(1);
+	return 0;
+}
+
+static inline int gpiod_to_irq(const struct gpio_desc *desc)
+{
+	/* GPIO can never have been requested */
+	WARN_ON(1);
+	return -EINVAL;
+}
+
+static inline struct gpio_desc *gpio_to_desc(unsigned gpio)
+{
+	return ERR_PTR(-EINVAL);
+}
+static inline int desc_to_gpio(const struct gpio_desc *desc)
+{
+	/* GPIO can never have been requested */
+	WARN_ON(1);
+	return -EINVAL;
+}
+static inline struct gpio_chip *gpiod_to_chip(const struct gpio_desc *desc)
+{
+	/* GPIO can never have been requested */
+	WARN_ON(1);
+	return ERR_PTR(-ENODEV);
+}
+
+
+#endif /* CONFIG_GPIOLIB */
+
+#if IS_ENABLED(CONFIG_GPIOLIB) && IS_ENABLED(CONFIG_GPIO_SYSFS)
+
+int gpiod_export(struct gpio_desc *desc, bool direction_may_change);
+int gpiod_export_link(struct device *dev, const char *name,
+		      struct gpio_desc *desc);
+int gpiod_sysfs_set_active_low(struct gpio_desc *desc, int value);
+void gpiod_unexport(struct gpio_desc *desc);
+
+#else  /* CONFIG_GPIOLIB && CONFIG_GPIO_SYSFS */
+
+static inline int gpiod_export(struct gpio_desc *desc,
+			       bool direction_may_change)
+{
+	return -ENOSYS;
+}
+
+static inline int gpiod_export_link(struct device *dev, const char *name,
+				    struct gpio_desc *desc)
+{
+	return -ENOSYS;
+}
+
+static inline int gpiod_sysfs_set_active_low(struct gpio_desc *desc, int value)
+{
+	return -ENOSYS;
+}
+
+static inline void gpiod_unexport(struct gpio_desc *desc)
+{
+}
+
+#endif /* CONFIG_GPIOLIB && CONFIG_GPIO_SYSFS */
+
+#endif
diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
new file mode 100644
index 000000000000..5dc172c72f0f
--- /dev/null
+++ b/include/linux/gpio/driver.h
@@ -0,0 +1,127 @@
+#ifndef __LINUX_GPIO_DRIVER_H
+#define __LINUX_GPIO_DRIVER_H
+
+#include <linux/types.h>
+
+struct device;
+struct gpio_desc;
+
+/**
+ * struct gpio_chip - abstract a GPIO controller
+ * @label: for diagnostics
+ * @dev: optional device providing the GPIOs
+ * @owner: helps prevent removal of modules exporting active GPIOs
+ * @list: links gpio_chips together for traversal
+ * @request: optional hook for chip-specific activation, such as
+ *	enabling module power and clock; may sleep
+ * @free: optional hook for chip-specific deactivation, such as
+ *	disabling module power and clock; may sleep
+ * @get_direction: returns direction for signal "offset", 0=out, 1=in,
+ *	(same as GPIOF_DIR_XXX), or negative error
+ * @direction_input: configures signal "offset" as input, or returns error
+ * @direction_output: configures signal "offset" as output, or returns error
+ * @get: returns value for signal "offset"; for output signals this
+ *	returns either the value actually sensed, or zero
+ * @set: assigns output value for signal "offset"
+ * @set_debounce: optional hook for setting debounce time for specified gpio in
+ *      interrupt triggered gpio chips
+ * @to_irq: optional hook supporting non-static gpio_to_irq() mappings;
+ *	implementation may not sleep
+ * @dbg_show: optional routine to show contents in debugfs; default code
+ *	will be used when this is omitted, but custom code can show extra
+ *	state (such as pullup/pulldown configuration).
+ * @base: identifies the first GPIO number handled by this chip; or, if
+ *	negative during registration, requests dynamic ID allocation.
+ * @ngpio: the number of GPIOs handled by this controller; the last GPIO
+ *	handled is (base + ngpio - 1).
+ * @desc: array of ngpio descriptors. Private.
+ * @can_sleep: flag must be set iff get()/set() methods sleep, as they
+ *	must while accessing GPIO expander chips over I2C or SPI
+ * @names: if set, must be an array of strings to use as alternative
+ *      names for the GPIOs in this chip. Any entry in the array
+ *      may be NULL if there is no alias for the GPIO, however the
+ *      array must be @ngpio entries long.  A name can include a single printk
+ *      format specifier for an unsigned int.  It is substituted by the actual
+ *      number of the gpio.
+ *
+ * A gpio_chip can help platforms abstract various sources of GPIOs so
+ * they can all be accessed through a common programing interface.
+ * Example sources would be SOC controllers, FPGAs, multifunction
+ * chips, dedicated GPIO expanders, and so on.
+ *
+ * Each chip controls a number of signals, identified in method calls
+ * by "offset" values in the range 0..(@ngpio - 1).  When those signals
+ * are referenced through calls like gpio_get_value(gpio), the offset
+ * is calculated by subtracting @base from the gpio number.
+ */
+struct gpio_chip {
+	const char		*label;
+	struct device		*dev;
+	struct module		*owner;
+	struct list_head        list;
+
+	int			(*request)(struct gpio_chip *chip,
+						unsigned offset);
+	void			(*free)(struct gpio_chip *chip,
+						unsigned offset);
+	int			(*get_direction)(struct gpio_chip *chip,
+						unsigned offset);
+	int			(*direction_input)(struct gpio_chip *chip,
+						unsigned offset);
+	int			(*direction_output)(struct gpio_chip *chip,
+						unsigned offset, int value);
+	int			(*get)(struct gpio_chip *chip,
+						unsigned offset);
+	void			(*set)(struct gpio_chip *chip,
+						unsigned offset, int value);
+	int			(*set_debounce)(struct gpio_chip *chip,
+						unsigned offset,
+						unsigned debounce);
+
+	int			(*to_irq)(struct gpio_chip *chip,
+						unsigned offset);
+
+	void			(*dbg_show)(struct seq_file *s,
+						struct gpio_chip *chip);
+	int			base;
+	u16			ngpio;
+	struct gpio_desc	*desc;
+	const char		*const *names;
+	unsigned		can_sleep:1;
+	unsigned		exported:1;
+
+#if defined(CONFIG_OF_GPIO)
+	/*
+	 * If CONFIG_OF is enabled, then all GPIO controllers described in the
+	 * device tree automatically may have an OF translation
+	 */
+	struct device_node *of_node;
+	int of_gpio_n_cells;
+	int (*of_xlate)(struct gpio_chip *gc,
+			const struct of_phandle_args *gpiospec, u32 *flags);
+#endif
+#ifdef CONFIG_PINCTRL
+	/*
+	 * If CONFIG_PINCTRL is enabled, then gpio controllers can optionally
+	 * describe the actual pin range which they serve in an SoC. This
+	 * information would be used by pinctrl subsystem to configure
+	 * corresponding pins for gpio usage.
+	 */
+	struct list_head pin_ranges;
+#endif
+};
+
+extern const char *gpiochip_is_requested(struct gpio_chip *chip,
+			unsigned offset);
+
+/* add/remove chips */
+extern int gpiochip_add(struct gpio_chip *chip);
+extern int __must_check gpiochip_remove(struct gpio_chip *chip);
+extern struct gpio_chip *gpiochip_find(void *data,
+			      int (*match)(struct gpio_chip *chip, void *data));
+
+/* lock/unlock as IRQ */
+int gpiod_lock_as_irq(struct gpio_desc *desc);
+void gpiod_unlock_as_irq(struct gpio_desc *desc);
+
+#endif
-- 
cgit v1.2.3


From af8b6375a8291fe2cf77707f3edec86b98a999cc Mon Sep 17 00:00:00 2001
From: Alexandre Courbot <acourbot@nvidia.com>
Date: Thu, 17 Oct 2013 10:21:37 -0700
Subject: gpiolib: port of_ functions to use gpiod

Refactor the of_ functions of gpiolib to use the now public gpiod
interface, and export of_get_named_gpiod_flags() and
of_get_gpiod_flags() functions.

Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpiolib-of.c | 28 +++++++++++++++++-----------
 include/linux/of_gpio.h   | 29 ++++++++++++++++++++++++-----
 2 files changed, 41 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c
index 0dfaf20e4dad..32a396d891be 100644
--- a/drivers/gpio/gpiolib-of.c
+++ b/drivers/gpio/gpiolib-of.c
@@ -15,19 +15,21 @@
 #include <linux/errno.h>
 #include <linux/module.h>
 #include <linux/io.h>
-#include <linux/gpio.h>
+#include <linux/gpio/consumer.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/of_gpio.h>
 #include <linux/pinctrl/pinctrl.h>
 #include <linux/slab.h>
 
+struct gpio_desc;
+
 /* Private data structure for of_gpiochip_find_and_xlate */
 struct gg_data {
 	enum of_gpio_flags *flags;
 	struct of_phandle_args gpiospec;
 
-	int out_gpio;
+	struct gpio_desc *out_gpio;
 };
 
 /* Private function for resolving node pointer to gpio_chip */
@@ -45,28 +47,31 @@ static int of_gpiochip_find_and_xlate(struct gpio_chip *gc, void *data)
 	if (ret < 0)
 		return false;
 
-	gg_data->out_gpio = ret + gc->base;
+	gg_data->out_gpio = gpio_to_desc(ret + gc->base);
 	return true;
 }
 
 /**
- * of_get_named_gpio_flags() - Get a GPIO number and flags to use with GPIO API
+ * of_get_named_gpiod_flags() - Get a GPIO descriptor and flags for GPIO API
  * @np:		device node to get GPIO from
  * @propname:	property name containing gpio specifier(s)
  * @index:	index of the GPIO
  * @flags:	a flags pointer to fill in
  *
- * Returns GPIO number to use with Linux generic GPIO API, or one of the errno
+ * Returns GPIO descriptor to use with Linux GPIO API, or one of the errno
  * value on the error condition. If @flags is not NULL the function also fills
  * in flags for the GPIO.
  */
-int of_get_named_gpio_flags(struct device_node *np, const char *propname,
-			   int index, enum of_gpio_flags *flags)
+struct gpio_desc *of_get_named_gpiod_flags(struct device_node *np,
+		     const char *propname, int index, enum of_gpio_flags *flags)
 {
 	/* Return -EPROBE_DEFER to support probe() functions to be called
 	 * later when the GPIO actually becomes available
 	 */
-	struct gg_data gg_data = { .flags = flags, .out_gpio = -EPROBE_DEFER };
+	struct gg_data gg_data = {
+		.flags = flags,
+		.out_gpio = ERR_PTR(-EPROBE_DEFER)
+	};
 	int ret;
 
 	/* .of_xlate might decide to not fill in the flags, so clear it. */
@@ -78,16 +83,17 @@ int of_get_named_gpio_flags(struct device_node *np, const char *propname,
 	if (ret) {
 		pr_debug("%s: can't parse gpios property of node '%s[%d]'\n",
 			__func__, np->full_name, index);
-		return ret;
+		return ERR_PTR(ret);
 	}
 
 	gpiochip_find(&gg_data, of_gpiochip_find_and_xlate);
 
 	of_node_put(gg_data.gpiospec.np);
-	pr_debug("%s exited with status %d\n", __func__, gg_data.out_gpio);
+	pr_debug("%s exited with status %d\n", __func__,
+		 PTR_RET(gg_data.out_gpio));
 	return gg_data.out_gpio;
 }
-EXPORT_SYMBOL(of_get_named_gpio_flags);
+EXPORT_SYMBOL(of_get_named_gpiod_flags);
 
 /**
  * of_gpio_simple_xlate - translate gpio_spec to the GPIO number and flags
diff --git a/include/linux/of_gpio.h b/include/linux/of_gpio.h
index a83dc6f5008e..d71f2cc141ae 100644
--- a/include/linux/of_gpio.h
+++ b/include/linux/of_gpio.h
@@ -21,6 +21,7 @@
 #include <linux/of.h>
 
 struct device_node;
+struct gpio_desc;
 
 /*
  * This is Linux-specific flags. By default controllers' and Linux' mapping
@@ -47,7 +48,7 @@ static inline struct of_mm_gpio_chip *to_of_mm_gpio_chip(struct gpio_chip *gc)
 	return container_of(gc, struct of_mm_gpio_chip, gc);
 }
 
-extern int of_get_named_gpio_flags(struct device_node *np,
+extern struct gpio_desc *of_get_named_gpiod_flags(struct device_node *np,
 		const char *list_name, int index, enum of_gpio_flags *flags);
 
 extern int of_mm_gpiochip_add(struct device_node *np,
@@ -62,10 +63,10 @@ extern int of_gpio_simple_xlate(struct gpio_chip *gc,
 #else /* CONFIG_OF_GPIO */
 
 /* Drivers may not strictly depend on the GPIO support, so let them link. */
-static inline int of_get_named_gpio_flags(struct device_node *np,
+static inline struct gpio_desc *of_get_named_gpiod_flags(struct device_node *np,
 		const char *list_name, int index, enum of_gpio_flags *flags)
 {
-	return -ENOSYS;
+	return ERR_PTR(-ENOSYS);
 }
 
 static inline int of_gpio_simple_xlate(struct gpio_chip *gc,
@@ -80,6 +81,18 @@ static inline void of_gpiochip_remove(struct gpio_chip *gc) { }
 
 #endif /* CONFIG_OF_GPIO */
 
+static inline int of_get_named_gpio_flags(struct device_node *np,
+		const char *list_name, int index, enum of_gpio_flags *flags)
+{
+	struct gpio_desc *desc;
+	desc = of_get_named_gpiod_flags(np, list_name, index, flags);
+
+	if (IS_ERR(desc))
+		return PTR_ERR(desc);
+	else
+		return desc_to_gpio(desc);
+}
+
 /**
  * of_gpio_named_count() - Count GPIOs for a device
  * @np:		device node to count GPIOs for
@@ -117,15 +130,21 @@ static inline int of_gpio_count(struct device_node *np)
 }
 
 /**
- * of_get_gpio_flags() - Get a GPIO number and flags to use with GPIO API
+ * of_get_gpiod_flags() - Get a GPIO descriptor and flags to use with GPIO API
  * @np:		device node to get GPIO from
  * @index:	index of the GPIO
  * @flags:	a flags pointer to fill in
  *
- * Returns GPIO number to use with Linux generic GPIO API, or one of the errno
+ * Returns GPIO descriptor to use with Linux generic GPIO API, or a errno
  * value on the error condition. If @flags is not NULL the function also fills
  * in flags for the GPIO.
  */
+static inline struct gpio_desc *of_get_gpiod_flags(struct device_node *np,
+					int index, enum of_gpio_flags *flags)
+{
+	return of_get_named_gpiod_flags(np, "gpios", index, flags);
+}
+
 static inline int of_get_gpio_flags(struct device_node *np, int index,
 		      enum of_gpio_flags *flags)
 {
-- 
cgit v1.2.3


From bae48da237fcedd7ad09569025483b988635efb7 Mon Sep 17 00:00:00 2001
From: Alexandre Courbot <acourbot@nvidia.com>
Date: Thu, 17 Oct 2013 10:21:38 -0700
Subject: gpiolib: add gpiod_get() and gpiod_put() functions

Add gpiod_get(), gpiod_get_index() and gpiod_put() functions that
provide safer management of GPIOs.

These functions put the GPIO framework in line with the conventions of
other frameworks in the kernel, and help ensure every GPIO is declared
properly and valid while it is used.

Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/devres.c         |  83 +++++++++++++++++
 drivers/gpio/gpiolib.c        | 203 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/gpio/consumer.h |  15 ++++
 include/linux/gpio/driver.h   |  56 ++++++++++++
 4 files changed, 357 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/gpio/devres.c b/drivers/gpio/devres.c
index 3e7812f0405e..2caa2571734e 100644
--- a/drivers/gpio/devres.c
+++ b/drivers/gpio/devres.c
@@ -19,6 +19,89 @@
 #include <linux/device.h>
 #include <linux/gfp.h>
 
+static void devm_gpiod_release(struct device *dev, void *res)
+{
+	struct gpio_desc **desc = res;
+
+	gpiod_put(*desc);
+}
+
+static int devm_gpiod_match(struct device *dev, void *res, void *data)
+{
+	struct gpio_desc **this = res, **gpio = data;
+
+	return *this == *gpio;
+}
+
+/**
+ * devm_gpiod_get - Resource-managed gpiod_get()
+ * @dev:	GPIO consumer
+ * @con_id:	function within the GPIO consumer
+ *
+ * Managed gpiod_get(). GPIO descriptors returned from this function are
+ * automatically disposed on driver detach. See gpiod_get() for detailed
+ * information about behavior and return values.
+ */
+struct gpio_desc *__must_check devm_gpiod_get(struct device *dev,
+					      const char *con_id)
+{
+	return devm_gpiod_get_index(dev, con_id, 0);
+}
+EXPORT_SYMBOL(devm_gpiod_get);
+
+/**
+ * devm_gpiod_get_index - Resource-managed gpiod_get_index()
+ * @dev:	GPIO consumer
+ * @con_id:	function within the GPIO consumer
+ * @idx:	index of the GPIO to obtain in the consumer
+ *
+ * Managed gpiod_get_index(). GPIO descriptors returned from this function are
+ * automatically disposed on driver detach. See gpiod_get_index() for detailed
+ * information about behavior and return values.
+ */
+struct gpio_desc *__must_check devm_gpiod_get_index(struct device *dev,
+						    const char *con_id,
+						    unsigned int idx)
+{
+	struct gpio_desc **dr;
+	struct gpio_desc *desc;
+
+	dr = devres_alloc(devm_gpiod_release, sizeof(struct gpiod_desc *),
+			  GFP_KERNEL);
+	if (!dr)
+		return ERR_PTR(-ENOMEM);
+
+	desc = gpiod_get_index(dev, con_id, idx);
+	if (IS_ERR(desc)) {
+		devres_free(dr);
+		return desc;
+	}
+
+	*dr = desc;
+	devres_add(dev, dr);
+
+	return 0;
+}
+EXPORT_SYMBOL(devm_gpiod_get_index);
+
+/**
+ * devm_gpiod_put - Resource-managed gpiod_put()
+ * @desc:	GPIO descriptor to dispose of
+ *
+ * Dispose of a GPIO descriptor obtained with devm_gpiod_get() or
+ * devm_gpiod_get_index(). Normally this function will not be called as the GPIO
+ * will be disposed of by the resource management code.
+ */
+void devm_gpiod_put(struct device *dev, struct gpio_desc *desc)
+{
+	WARN_ON(devres_release(dev, devm_gpiod_release, devm_gpiod_match,
+		&desc));
+}
+EXPORT_SYMBOL(devm_gpiod_put);
+
+
+
+
 static void devm_gpio_release(struct device *dev, void *res)
 {
 	unsigned *gpio = res;
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 224abdc4b095..9263c7b359b7 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -70,6 +70,8 @@ static struct gpio_desc gpio_desc[ARCH_NR_GPIOS];
 
 #define GPIO_OFFSET_VALID(chip, offset) (offset >= 0 && offset < chip->ngpio)
 
+static DEFINE_MUTEX(gpio_lookup_lock);
+static LIST_HEAD(gpio_lookup_list);
 static LIST_HEAD(gpio_chips);
 
 #ifdef CONFIG_GPIO_SYSFS
@@ -2192,6 +2194,207 @@ void gpiod_set_value_cansleep(struct gpio_desc *desc, int value)
 }
 EXPORT_SYMBOL_GPL(gpiod_set_value_cansleep);
 
+/**
+ * gpiod_add_table() - register GPIO device consumers
+ * @table: array of consumers to register
+ * @num: number of consumers in table
+ */
+void gpiod_add_table(struct gpiod_lookup *table, size_t size)
+{
+	mutex_lock(&gpio_lookup_lock);
+
+	while (size--) {
+		list_add_tail(&table->list, &gpio_lookup_list);
+		table++;
+	}
+
+	mutex_unlock(&gpio_lookup_lock);
+}
+
+/*
+ * Caller must have a acquired gpio_lookup_lock
+ */
+static struct gpio_chip *find_chip_by_name(const char *name)
+{
+	struct gpio_chip *chip = NULL;
+
+	list_for_each_entry(chip, &gpio_lookup_list, list) {
+		if (chip->label == NULL)
+			continue;
+		if (!strcmp(chip->label, name))
+			break;
+	}
+
+	return chip;
+}
+
+#ifdef CONFIG_OF
+static struct gpio_desc *of_find_gpio(struct device *dev, const char *con_id,
+				      unsigned int idx, unsigned long *flags)
+{
+	char prop_name[32]; /* 32 is max size of property name */
+	enum of_gpio_flags of_flags;
+	struct gpio_desc *desc;
+
+	if (con_id)
+		snprintf(prop_name, 32, "%s-gpios", con_id);
+	else
+		snprintf(prop_name, 32, "gpios");
+
+	desc = of_get_named_gpiod_flags(dev->of_node, prop_name, idx,
+					&of_flags);
+
+	if (IS_ERR(desc))
+		return desc;
+
+	if (of_flags & OF_GPIO_ACTIVE_LOW)
+		*flags |= GPIOF_ACTIVE_LOW;
+
+	return desc;
+}
+#else
+static struct gpio_desc *of_find_gpio(struct device *dev, const char *con_id,
+				      unsigned int idx, unsigned long *flags)
+{
+	return ERR_PTR(-ENODEV);
+}
+#endif
+
+static struct gpio_desc *gpiod_find(struct device *dev, const char *con_id,
+				    unsigned int idx, unsigned long *flags)
+{
+	const char *dev_id = dev ? dev_name(dev) : NULL;
+	struct gpio_desc *desc = ERR_PTR(-ENODEV);
+	unsigned int match, best = 0;
+	struct gpiod_lookup *p;
+
+	mutex_lock(&gpio_lookup_lock);
+
+	list_for_each_entry(p, &gpio_lookup_list, list) {
+		match = 0;
+
+		if (p->dev_id) {
+			if (!dev_id || strcmp(p->dev_id, dev_id))
+				continue;
+
+			match += 2;
+		}
+
+		if (p->con_id) {
+			if (!con_id || strcmp(p->con_id, con_id))
+				continue;
+
+			match += 1;
+		}
+
+		if (p->idx != idx)
+			continue;
+
+		if (match > best) {
+			struct gpio_chip *chip;
+
+			chip = find_chip_by_name(p->chip_label);
+
+			if (!chip) {
+				dev_warn(dev, "cannot find GPIO chip %s\n",
+					 p->chip_label);
+				continue;
+			}
+
+			if (chip->ngpio >= p->chip_hwnum) {
+				dev_warn(dev, "GPIO chip %s has %d GPIOs\n",
+					 chip->label, chip->ngpio);
+				continue;
+			}
+
+			desc = gpio_to_desc(chip->base + p->chip_hwnum);
+			*flags = p->flags;
+
+			if (match != 3)
+				best = match;
+			else
+				break;
+		}
+	}
+
+	mutex_unlock(&gpio_lookup_lock);
+
+	return desc;
+}
+
+/**
+ * gpio_get - obtain a GPIO for a given GPIO function
+ * @dev:	GPIO consumer
+ * @con_id:	function within the GPIO consumer
+ *
+ * Return the GPIO descriptor corresponding to the function con_id of device
+ * dev, or an IS_ERR() condition if an error occured.
+ */
+struct gpio_desc *__must_check gpiod_get(struct device *dev, const char *con_id)
+{
+	return gpiod_get_index(dev, con_id, 0);
+}
+EXPORT_SYMBOL_GPL(gpiod_get);
+
+/**
+ * gpiod_get_index - obtain a GPIO from a multi-index GPIO function
+ * @dev:	GPIO consumer
+ * @con_id:	function within the GPIO consumer
+ * @idx:	index of the GPIO to obtain in the consumer
+ *
+ * This variant of gpiod_get() allows to access GPIOs other than the first
+ * defined one for functions that define several GPIOs.
+ *
+ * Return a valid GPIO descriptor, or an IS_ERR() condition in case of error.
+ */
+struct gpio_desc *__must_check gpiod_get_index(struct device *dev,
+					       const char *con_id,
+					       unsigned int idx)
+{
+	struct gpio_desc *desc;
+	int status;
+	unsigned long flags = 0;
+
+	dev_dbg(dev, "GPIO lookup for consumer %s\n", con_id);
+
+	/* Using device tree? */
+	if (IS_ENABLED(CONFIG_OF) && dev && dev->of_node) {
+		dev_dbg(dev, "using device tree for GPIO lookup\n");
+		desc = of_find_gpio(dev, con_id, idx, &flags);
+	} else {
+		dev_dbg(dev, "using lookup tables for GPIO lookup");
+		desc = gpiod_find(dev, con_id, idx, &flags);
+	}
+
+	if (IS_ERR(desc)) {
+		dev_warn(dev, "lookup for GPIO %s failed\n", con_id);
+		return desc;
+	}
+
+	status = gpiod_request(desc, con_id);
+
+	if (status < 0)
+		return ERR_PTR(status);
+
+	if (flags & GPIOF_ACTIVE_LOW)
+		set_bit(FLAG_ACTIVE_LOW, &desc->flags);
+
+	return desc;
+}
+EXPORT_SYMBOL_GPL(gpiod_get_index);
+
+/**
+ * gpiod_put - dispose of a GPIO descriptor
+ * @desc:	GPIO descriptor to dispose of
+ *
+ * No descriptor can be used after gpiod_put() has been called on it.
+ */
+void gpiod_put(struct gpio_desc *desc)
+{
+	gpiod_free(desc);
+}
+EXPORT_SYMBOL_GPL(gpiod_put);
+
 #ifdef CONFIG_DEBUG_FS
 
 static void gpiolib_dbg_show(struct seq_file *s, struct gpio_chip *chip)
diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h
index 2088eb50421c..4d34dbbbad4d 100644
--- a/include/linux/gpio/consumer.h
+++ b/include/linux/gpio/consumer.h
@@ -18,6 +18,21 @@ struct gpio_chip;
  */
 struct gpio_desc;
 
+/* Acquire and dispose GPIOs */
+struct gpio_desc *__must_check gpiod_get(struct device *dev,
+					 const char *con_id);
+struct gpio_desc *__must_check gpiod_get_index(struct device *dev,
+					       const char *con_id,
+					       unsigned int idx);
+void gpiod_put(struct gpio_desc *desc);
+
+struct gpio_desc *__must_check devm_gpiod_get(struct device *dev,
+					      const char *con_id);
+struct gpio_desc *__must_check devm_gpiod_get_index(struct device *dev,
+						    const char *con_id,
+						    unsigned int idx);
+void devm_gpiod_put(struct device *dev, struct gpio_desc *desc);
+
 int gpiod_get_direction(const struct gpio_desc *desc);
 int gpiod_direction_input(struct gpio_desc *desc);
 int gpiod_direction_output(struct gpio_desc *desc, int value);
diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index 5dc172c72f0f..cd9da3885d79 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -124,4 +124,60 @@ extern struct gpio_chip *gpiochip_find(void *data,
 int gpiod_lock_as_irq(struct gpio_desc *desc);
 void gpiod_unlock_as_irq(struct gpio_desc *desc);
 
+/**
+ * Lookup table for associating GPIOs to specific devices and functions using
+ * platform data.
+ */
+struct gpiod_lookup {
+	struct list_head list;
+	/*
+	 * name of the chip the GPIO belongs to
+	 */
+	const char *chip_label;
+	/*
+	 * hardware number (i.e. relative to the chip) of the GPIO
+	 */
+	u16 chip_hwnum;
+	/*
+	 * name of device that can claim this GPIO
+	 */
+	const char *dev_id;
+	/*
+	 * name of the GPIO from the device's point of view
+	 */
+	const char *con_id;
+	/*
+	 * index of the GPIO in case several GPIOs share the same name
+	 */
+	unsigned int idx;
+	/*
+	 * mask of GPIOF_* values
+	 */
+	unsigned long flags;
+};
+
+/*
+ * Simple definition of a single GPIO under a con_id
+ */
+#define GPIO_LOOKUP(_chip_label, _chip_hwnum, _dev_id, _con_id, _flags) \
+	GPIO_LOOKUP_IDX(_chip_label, _chip_hwnum, _dev_id, _con_id, 0, _flags)
+
+/*
+ * Use this macro if you need to have several GPIOs under the same con_id.
+ * Each GPIO needs to use a different index and can be accessed using
+ * gpiod_get_index()
+ */
+#define GPIO_LOOKUP_IDX(_chip_label, _chip_hwnum, _dev_id, _con_id, _idx, \
+			_flags)                                           \
+{                                                                         \
+	.chip_label = _chip_label,                                        \
+	.chip_hwnum = _chip_hwnum,                                        \
+	.dev_id = _dev_id,                                                \
+	.con_id = _con_id,                                                \
+	.idx = _idx,                                                      \
+	.flags = _flags,                                                  \
+}
+
+void gpiod_add_table(struct gpiod_lookup *table, size_t size);
+
 #endif
-- 
cgit v1.2.3


From 936e15dd2128eb5aa71251766f1176552b45f43c Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Thu, 10 Oct 2013 11:01:08 +0300
Subject: gpiolib / ACPI: convert to gpiod interfaces

The new GPIO descriptor based interface is now preferred over the old
integer based one. This patch converts the ACPI GPIO helpers to use this
new interface internally. In addition to that provide compatibility
function acpi_get_gpio_by_index() that converts the returned GPIO
descriptor to an integer.

We also drop acpi_get_gpio() as it is not used anywhere outside
gpiolib-acpi and even there we use acpi_get_gpiod() instead.

Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Alexandre Courbot <acourbot@nvidia.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpiolib-acpi.c | 51 ++++++++++++++++++++++-----------------------
 include/linux/acpi_gpio.h   | 29 ++++++++++++++++----------
 2 files changed, 43 insertions(+), 37 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c
index 1745ce5983d6..03187d0a3045 100644
--- a/drivers/gpio/gpiolib-acpi.c
+++ b/drivers/gpio/gpiolib-acpi.c
@@ -11,7 +11,7 @@
  */
 
 #include <linux/errno.h>
-#include <linux/gpio.h>
+#include <linux/gpio/consumer.h>
 #include <linux/export.h>
 #include <linux/acpi_gpio.h>
 #include <linux/acpi.h>
@@ -33,14 +33,15 @@ static int acpi_gpiochip_find(struct gpio_chip *gc, void *data)
 }
 
 /**
- * acpi_get_gpio() - Translate ACPI GPIO pin to GPIO number usable with GPIO API
+ * acpi_get_gpiod() - Translate ACPI GPIO pin to GPIO descriptor usable with GPIO API
  * @path:	ACPI GPIO controller full path name, (e.g. "\\_SB.GPO1")
  * @pin:	ACPI GPIO pin number (0-based, controller-relative)
  *
- * Returns GPIO number to use with Linux generic GPIO API, or errno error value
+ * Returns GPIO descriptor to use with Linux generic GPIO API, or ERR_PTR
+ * error value
  */
 
-int acpi_get_gpio(char *path, int pin)
+static struct gpio_desc *acpi_get_gpiod(char *path, int pin)
 {
 	struct gpio_chip *chip;
 	acpi_handle handle;
@@ -48,18 +49,17 @@ int acpi_get_gpio(char *path, int pin)
 
 	status = acpi_get_handle(NULL, path, &handle);
 	if (ACPI_FAILURE(status))
-		return -ENODEV;
+		return ERR_PTR(-ENODEV);
 
 	chip = gpiochip_find(handle, acpi_gpiochip_find);
 	if (!chip)
-		return -ENODEV;
+		return ERR_PTR(-ENODEV);
 
-	if (!gpio_is_valid(chip->base + pin))
-		return -EINVAL;
+	if (pin < 0 || pin > chip->ngpio)
+		return ERR_PTR(-EINVAL);
 
-	return chip->base + pin;
+	return gpio_to_desc(chip->base + pin);
 }
-EXPORT_SYMBOL_GPL(acpi_get_gpio);
 
 static irqreturn_t acpi_gpio_irq_handler(int irq, void *data)
 {
@@ -235,7 +235,7 @@ EXPORT_SYMBOL(acpi_gpiochip_free_interrupts);
 struct acpi_gpio_lookup {
 	struct acpi_gpio_info info;
 	int index;
-	int gpio;
+	struct gpio_desc *desc;
 	int n;
 };
 
@@ -246,11 +246,11 @@ static int acpi_find_gpio(struct acpi_resource *ares, void *data)
 	if (ares->type != ACPI_RESOURCE_TYPE_GPIO)
 		return 1;
 
-	if (lookup->n++ == lookup->index && lookup->gpio < 0) {
+	if (lookup->n++ == lookup->index && !lookup->desc) {
 		const struct acpi_resource_gpio *agpio = &ares->data.gpio;
 
-		lookup->gpio = acpi_get_gpio(agpio->resource_source.string_ptr,
-					     agpio->pin_table[0]);
+		lookup->desc = acpi_get_gpiod(agpio->resource_source.string_ptr,
+					      agpio->pin_table[0]);
 		lookup->info.gpioint =
 			agpio->connection_type == ACPI_RESOURCE_GPIO_TYPE_INT;
 	}
@@ -259,24 +259,24 @@ static int acpi_find_gpio(struct acpi_resource *ares, void *data)
 }
 
 /**
- * acpi_get_gpio_by_index() - get a GPIO number from device resources
+ * acpi_get_gpiod_by_index() - get a GPIO descriptor from device resources
  * @dev: pointer to a device to get GPIO from
  * @index: index of GpioIo/GpioInt resource (starting from %0)
  * @info: info pointer to fill in (optional)
  *
  * Function goes through ACPI resources for @dev and based on @index looks
- * up a GpioIo/GpioInt resource, translates it to the Linux GPIO number,
+ * up a GpioIo/GpioInt resource, translates it to the Linux GPIO descriptor,
  * and returns it. @index matches GpioIo/GpioInt resources only so if there
  * are total %3 GPIO resources, the index goes from %0 to %2.
  *
- * If the GPIO cannot be translated or there is an error, negative errno is
+ * If the GPIO cannot be translated or there is an error an ERR_PTR is
  * returned.
  *
  * Note: if the GPIO resource has multiple entries in the pin list, this
  * function only returns the first.
  */
-int acpi_get_gpio_by_index(struct device *dev, int index,
-			   struct acpi_gpio_info *info)
+struct gpio_desc *acpi_get_gpiod_by_index(struct device *dev, int index,
+					  struct acpi_gpio_info *info)
 {
 	struct acpi_gpio_lookup lookup;
 	struct list_head resource_list;
@@ -285,27 +285,26 @@ int acpi_get_gpio_by_index(struct device *dev, int index,
 	int ret;
 
 	if (!dev)
-		return -EINVAL;
+		return ERR_PTR(-EINVAL);
 
 	handle = ACPI_HANDLE(dev);
 	if (!handle || acpi_bus_get_device(handle, &adev))
-		return -ENODEV;
+		return ERR_PTR(-ENODEV);
 
 	memset(&lookup, 0, sizeof(lookup));
 	lookup.index = index;
-	lookup.gpio = -ENODEV;
 
 	INIT_LIST_HEAD(&resource_list);
 	ret = acpi_dev_get_resources(adev, &resource_list, acpi_find_gpio,
 				     &lookup);
 	if (ret < 0)
-		return ret;
+		return ERR_PTR(ret);
 
 	acpi_dev_free_resource_list(&resource_list);
 
-	if (lookup.gpio >= 0 && info)
+	if (lookup.desc && info)
 		*info = lookup.info;
 
-	return lookup.gpio;
+	return lookup.desc ? lookup.desc : ERR_PTR(-ENODEV);
 }
-EXPORT_SYMBOL_GPL(acpi_get_gpio_by_index);
+EXPORT_SYMBOL_GPL(acpi_get_gpiod_by_index);
diff --git a/include/linux/acpi_gpio.h b/include/linux/acpi_gpio.h
index 4c120a1e0ca3..b6ce601e55a2 100644
--- a/include/linux/acpi_gpio.h
+++ b/include/linux/acpi_gpio.h
@@ -2,8 +2,10 @@
 #define _LINUX_ACPI_GPIO_H_
 
 #include <linux/device.h>
+#include <linux/err.h>
 #include <linux/errno.h>
 #include <linux/gpio.h>
+#include <linux/gpio/consumer.h>
 
 /**
  * struct acpi_gpio_info - ACPI GPIO specific information
@@ -15,23 +17,18 @@ struct acpi_gpio_info {
 
 #ifdef CONFIG_GPIO_ACPI
 
-int acpi_get_gpio(char *path, int pin);
-int acpi_get_gpio_by_index(struct device *dev, int index,
-			   struct acpi_gpio_info *info);
+struct gpio_desc *acpi_get_gpiod_by_index(struct device *dev, int index,
+					  struct acpi_gpio_info *info);
 void acpi_gpiochip_request_interrupts(struct gpio_chip *chip);
 void acpi_gpiochip_free_interrupts(struct gpio_chip *chip);
 
 #else /* CONFIG_GPIO_ACPI */
 
-static inline int acpi_get_gpio(char *path, int pin)
+static inline struct gpio_desc *
+acpi_get_gpiod_by_index(struct device *dev, int index,
+			struct acpi_gpio_info *info)
 {
-	return -ENODEV;
-}
-
-static inline int acpi_get_gpio_by_index(struct device *dev, int index,
-					 struct acpi_gpio_info *info)
-{
-	return -ENODEV;
+	return ERR_PTR(-ENOSYS);
 }
 
 static inline void acpi_gpiochip_request_interrupts(struct gpio_chip *chip) { }
@@ -39,4 +36,14 @@ static inline void acpi_gpiochip_free_interrupts(struct gpio_chip *chip) { }
 
 #endif /* CONFIG_GPIO_ACPI */
 
+static inline int acpi_get_gpio_by_index(struct device *dev, int index,
+					 struct acpi_gpio_info *info)
+{
+	struct gpio_desc *desc = acpi_get_gpiod_by_index(dev, index, info);
+
+	if (IS_ERR(desc))
+		return PTR_ERR(desc);
+	return desc_to_gpio(desc);
+}
+
 #endif /* _LINUX_ACPI_GPIO_H_ */
-- 
cgit v1.2.3


From e01f440a689aeb2d0e81c696fe2069f8d01d5d49 Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Thu, 10 Oct 2013 11:01:10 +0300
Subject: gpiolib / ACPI: allow passing GPIOF_ACTIVE_LOW for GpioInt resources

The ACPI GpioInt resources contain polarity field that is used to specify
whether the interrupt is active high or low. Since gpiolib supports
GPIOF_ACTIVE_LOW we can pass this information in the flags field in
acpi_find_gpio(), analogous to the DeviceTree version.

Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Alexandre Courbot <acourbot@nvidia.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpiolib-acpi.c |  2 ++
 drivers/gpio/gpiolib.c      | 12 +++++++++++-
 include/linux/acpi_gpio.h   |  2 ++
 3 files changed, 15 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c
index 03187d0a3045..ae0ffdce8bd5 100644
--- a/drivers/gpio/gpiolib-acpi.c
+++ b/drivers/gpio/gpiolib-acpi.c
@@ -253,6 +253,8 @@ static int acpi_find_gpio(struct acpi_resource *ares, void *data)
 					      agpio->pin_table[0]);
 		lookup->info.gpioint =
 			agpio->connection_type == ACPI_RESOURCE_GPIO_TYPE_INT;
+		lookup->info.active_low =
+			agpio->polarity == ACPI_ACTIVE_LOW;
 	}
 
 	return 1;
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index bee93c8c2361..9f3326b95e60 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -2264,7 +2264,17 @@ static struct gpio_desc *of_find_gpio(struct device *dev, const char *con_id,
 static struct gpio_desc *acpi_find_gpio(struct device *dev, const char *con_id,
 					unsigned int idx, unsigned long *flags)
 {
-	return acpi_get_gpiod_by_index(dev, idx, NULL);
+	struct acpi_gpio_info info;
+	struct gpio_desc *desc;
+
+	desc = acpi_get_gpiod_by_index(dev, idx, &info);
+	if (IS_ERR(desc))
+		return desc;
+
+	if (info.gpioint && info.active_low)
+		*flags |= GPIOF_ACTIVE_LOW;
+
+	return desc;
 }
 
 static struct gpio_desc *gpiod_find(struct device *dev, const char *con_id,
diff --git a/include/linux/acpi_gpio.h b/include/linux/acpi_gpio.h
index b6ce601e55a2..d875bc3dba3c 100644
--- a/include/linux/acpi_gpio.h
+++ b/include/linux/acpi_gpio.h
@@ -10,9 +10,11 @@
 /**
  * struct acpi_gpio_info - ACPI GPIO specific information
  * @gpioint: if %true this GPIO is of type GpioInt otherwise type is GpioIo
+ * @active_low: in case of @gpioint, the pin is active low
  */
 struct acpi_gpio_info {
 	bool gpioint;
+	bool active_low;
 };
 
 #ifdef CONFIG_GPIO_ACPI
-- 
cgit v1.2.3


From 3347c960295583eee3fd58e5c539fb1972fbc005 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 19 Oct 2013 11:42:56 -0700
Subject: ipv4: gso: make inet_gso_segment() stackable

In order to support GSO on IPIP, we need to make
inet_gso_segment() stackable.

It should not assume network header starts right after mac
header.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h |  7 +++++--
 net/core/dev.c         |  2 ++
 net/ipv4/af_inet.c     | 25 ++++++++++++++++++-------
 3 files changed, 25 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index ba74474836c0..cad1e0c5cc04 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2722,9 +2722,12 @@ static inline struct sec_path *skb_sec_path(struct sk_buff *skb)
 /* Keeps track of mac header offset relative to skb->head.
  * It is useful for TSO of Tunneling protocol. e.g. GRE.
  * For non-tunnel skb it points to skb_mac_header() and for
- * tunnel skb it points to outer mac header. */
+ * tunnel skb it points to outer mac header.
+ * Keeps track of level of encapsulation of network headers.
+ */
 struct skb_gso_cb {
-	int mac_offset;
+	int	mac_offset;
+	int	encap_level;
 };
 #define SKB_GSO_CB(skb) ((struct skb_gso_cb *)(skb)->cb)
 
diff --git a/net/core/dev.c b/net/core/dev.c
index 1b6eadf69289..0918aadc20fd 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2377,6 +2377,8 @@ struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
 	}
 
 	SKB_GSO_CB(skb)->mac_offset = skb_headroom(skb);
+	SKB_GSO_CB(skb)->encap_level = 0;
+
 	skb_reset_mac_header(skb);
 	skb_reset_mac_len(skb);
 
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 4f8cd4fc451d..5783ab5b5ef8 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1273,16 +1273,17 @@ out:
 }
 
 static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
-	netdev_features_t features)
+					netdev_features_t features)
 {
 	struct sk_buff *segs = ERR_PTR(-EINVAL);
 	const struct net_offload *ops;
+	unsigned int offset = 0;
 	struct iphdr *iph;
+	bool tunnel;
 	int proto;
+	int nhoff;
 	int ihl;
 	int id;
-	unsigned int offset = 0;
-	bool tunnel;
 
 	if (unlikely(skb_shinfo(skb)->gso_type &
 		     ~(SKB_GSO_TCPV4 |
@@ -1296,6 +1297,8 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
 		       0)))
 		goto out;
 
+	skb_reset_network_header(skb);
+	nhoff = skb_network_header(skb) - skb_mac_header(skb);
 	if (unlikely(!pskb_may_pull(skb, sizeof(*iph))))
 		goto out;
 
@@ -1312,7 +1315,10 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
 		goto out;
 	__skb_pull(skb, ihl);
 
-	tunnel = !!skb->encapsulation;
+	tunnel = SKB_GSO_CB(skb)->encap_level > 0;
+	if (tunnel)
+		features = skb->dev->hw_enc_features & netif_skb_features(skb);
+	SKB_GSO_CB(skb)->encap_level += ihl;
 
 	skb_reset_transport_header(skb);
 
@@ -1327,18 +1333,23 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
 
 	skb = segs;
 	do {
-		iph = ip_hdr(skb);
+		iph = (struct iphdr *)(skb_mac_header(skb) + nhoff);
 		if (!tunnel && proto == IPPROTO_UDP) {
 			iph->id = htons(id);
 			iph->frag_off = htons(offset >> 3);
 			if (skb->next != NULL)
 				iph->frag_off |= htons(IP_MF);
-			offset += (skb->len - skb->mac_len - iph->ihl * 4);
+			offset += skb->len - nhoff - ihl;
 		} else  {
 			iph->id = htons(id++);
 		}
-		iph->tot_len = htons(skb->len - skb->mac_len);
+		iph->tot_len = htons(skb->len - nhoff);
 		ip_send_check(iph);
+		if (tunnel) {
+			skb_reset_inner_headers(skb);
+			skb->encapsulation = 1;
+		}
+		skb->network_header = (u8 *)iph - skb->head;
 	} while ((skb = skb->next));
 
 out:
-- 
cgit v1.2.3


From cb32f511a70be8967ac9025cf49c44324ced9a39 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 19 Oct 2013 11:42:57 -0700
Subject: ipip: add GSO/TSO support

Now inet_gso_segment() is stackable, its relatively easy to
implement GSO/TSO support for IPIP

Performance results, when segmentation is done after tunnel
device (as no NIC is yet enabled for TSO IPIP support) :

Before patch :

lpq83:~# ./netperf -H 7.7.9.84 -Cc
MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 7.7.9.84 () port 0 AF_INET
Recv   Send    Send                          Utilization       Service Demand
Socket Socket  Message  Elapsed              Send     Recv     Send    Recv
Size   Size    Size     Time     Throughput  local    remote   local   remote
bytes  bytes   bytes    secs.    10^6bits/s  % S      % S      us/KB   us/KB

 87380  16384  16384    10.00      3357.88   5.09     3.70     2.983   2.167

After patch :

lpq83:~# ./netperf -H 7.7.9.84 -Cc
MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 7.7.9.84 () port 0 AF_INET
Recv   Send    Send                          Utilization       Service Demand
Socket Socket  Message  Elapsed              Send     Recv     Send    Recv
Size   Size    Size     Time     Throughput  local    remote   local   remote
bytes  bytes   bytes    secs.    10^6bits/s  % S      % S      us/KB   us/KB

 87380  16384  16384    10.00      7710.19   4.52     6.62     1.152   1.687

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdev_features.h |  2 ++
 include/linux/skbuff.h          |  6 ++++--
 net/core/ethtool.c              |  1 +
 net/ipv4/af_inet.c              |  9 +++++++++
 net/ipv4/gre_offload.c          |  3 ++-
 net/ipv4/ipip.c                 | 11 ++++++-----
 net/ipv4/tcp_offload.c          |  1 +
 net/ipv4/udp_offload.c          |  1 +
 net/ipv6/ip6_offload.c          |  1 +
 net/ipv6/udp_offload.c          |  1 +
 net/mpls/mpls_gso.c             |  1 +
 11 files changed, 29 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h
index a2a89a5c7be5..8dad68cede1c 100644
--- a/include/linux/netdev_features.h
+++ b/include/linux/netdev_features.h
@@ -42,6 +42,7 @@ enum {
 	NETIF_F_TSO6_BIT,		/* ... TCPv6 segmentation */
 	NETIF_F_FSO_BIT,		/* ... FCoE segmentation */
 	NETIF_F_GSO_GRE_BIT,		/* ... GRE with TSO */
+	NETIF_F_GSO_IPIP_BIT,		/* ... IPIP tunnel with TSO */
 	NETIF_F_GSO_UDP_TUNNEL_BIT,	/* ... UDP TUNNEL with TSO */
 	NETIF_F_GSO_MPLS_BIT,		/* ... MPLS segmentation */
 	/**/NETIF_F_GSO_LAST =		/* last bit, see GSO_MASK */
@@ -107,6 +108,7 @@ enum {
 #define NETIF_F_RXFCS		__NETIF_F(RXFCS)
 #define NETIF_F_RXALL		__NETIF_F(RXALL)
 #define NETIF_F_GSO_GRE		__NETIF_F(GSO_GRE)
+#define NETIF_F_GSO_IPIP	__NETIF_F(GSO_IPIP)
 #define NETIF_F_GSO_UDP_TUNNEL	__NETIF_F(GSO_UDP_TUNNEL)
 #define NETIF_F_GSO_MPLS	__NETIF_F(GSO_MPLS)
 #define NETIF_F_HW_VLAN_STAG_FILTER __NETIF_F(HW_VLAN_STAG_FILTER)
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index cad1e0c5cc04..60729134d253 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -318,9 +318,11 @@ enum {
 
 	SKB_GSO_GRE = 1 << 6,
 
-	SKB_GSO_UDP_TUNNEL = 1 << 7,
+	SKB_GSO_IPIP = 1 << 7,
 
-	SKB_GSO_MPLS = 1 << 8,
+	SKB_GSO_UDP_TUNNEL = 1 << 8,
+
+	SKB_GSO_MPLS = 1 << 9,
 };
 
 #if BITS_PER_LONG > 32
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 78e9d9223e40..8cab7744790e 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -81,6 +81,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
 	[NETIF_F_TSO6_BIT] =             "tx-tcp6-segmentation",
 	[NETIF_F_FSO_BIT] =              "tx-fcoe-segmentation",
 	[NETIF_F_GSO_GRE_BIT] =		 "tx-gre-segmentation",
+	[NETIF_F_GSO_IPIP_BIT] =	 "tx-ipip-segmentation",
 	[NETIF_F_GSO_UDP_TUNNEL_BIT] =	 "tx-udp_tnl-segmentation",
 	[NETIF_F_GSO_MPLS_BIT] =	 "tx-mpls-segmentation",
 
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 5783ab5b5ef8..4049906010f7 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1291,6 +1291,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
 		       SKB_GSO_DODGY |
 		       SKB_GSO_TCP_ECN |
 		       SKB_GSO_GRE |
+		       SKB_GSO_IPIP |
 		       SKB_GSO_TCPV6 |
 		       SKB_GSO_UDP_TUNNEL |
 		       SKB_GSO_MPLS |
@@ -1656,6 +1657,13 @@ static struct packet_offload ip_packet_offload __read_mostly = {
 	},
 };
 
+static const struct net_offload ipip_offload = {
+	.callbacks = {
+		.gso_send_check = inet_gso_send_check,
+		.gso_segment	= inet_gso_segment,
+	},
+};
+
 static int __init ipv4_offload_init(void)
 {
 	/*
@@ -1667,6 +1675,7 @@ static int __init ipv4_offload_init(void)
 		pr_crit("%s: Cannot add TCP protocol offload\n", __func__);
 
 	dev_add_offload(&ip_packet_offload);
+	inet_add_offload(&ipip_offload, IPPROTO_IPIP);
 	return 0;
 }
 
diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index 55e6bfb3a289..e5d436188464 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c
@@ -39,7 +39,8 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
 				  SKB_GSO_UDP |
 				  SKB_GSO_DODGY |
 				  SKB_GSO_TCP_ECN |
-				  SKB_GSO_GRE)))
+				  SKB_GSO_GRE |
+				  SKB_GSO_IPIP)))
 		goto out;
 
 	if (unlikely(!pskb_may_pull(skb, sizeof(*greh))))
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 7f80fb4b82d3..fe3e9f7f1f0b 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -220,17 +220,17 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 	if (unlikely(skb->protocol != htons(ETH_P_IP)))
 		goto tx_error;
 
-	if (likely(!skb->encapsulation)) {
-		skb_reset_inner_headers(skb);
-		skb->encapsulation = 1;
-	}
+	skb = iptunnel_handle_offloads(skb, false, SKB_GSO_IPIP);
+	if (IS_ERR(skb))
+		goto out;
 
 	ip_tunnel_xmit(skb, dev, tiph, tiph->protocol);
 	return NETDEV_TX_OK;
 
 tx_error:
-	dev->stats.tx_errors++;
 	dev_kfree_skb(skb);
+out:
+	dev->stats.tx_errors++;
 	return NETDEV_TX_OK;
 }
 
@@ -275,6 +275,7 @@ static const struct net_device_ops ipip_netdev_ops = {
 #define IPIP_FEATURES (NETIF_F_SG |		\
 		       NETIF_F_FRAGLIST |	\
 		       NETIF_F_HIGHDMA |	\
+		       NETIF_F_GSO_SOFTWARE |	\
 		       NETIF_F_HW_CSUM)
 
 static void ipip_tunnel_setup(struct net_device *dev)
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index 8e3113f46ec1..dfc96b00673e 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -56,6 +56,7 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
 			       SKB_GSO_TCP_ECN |
 			       SKB_GSO_TCPV6 |
 			       SKB_GSO_GRE |
+			       SKB_GSO_IPIP |
 			       SKB_GSO_MPLS |
 			       SKB_GSO_UDP_TUNNEL |
 			       0) ||
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index f35eccaa855e..83206de2bc76 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -52,6 +52,7 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
 
 		if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY |
 				      SKB_GSO_UDP_TUNNEL |
+				      SKB_GSO_IPIP |
 				      SKB_GSO_GRE | SKB_GSO_MPLS) ||
 			     !(type & (SKB_GSO_UDP))))
 			goto out;
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index b405fba91c72..5c2fc1d04196 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -96,6 +96,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
 		       SKB_GSO_DODGY |
 		       SKB_GSO_TCP_ECN |
 		       SKB_GSO_GRE |
+		       SKB_GSO_IPIP |
 		       SKB_GSO_UDP_TUNNEL |
 		       SKB_GSO_MPLS |
 		       SKB_GSO_TCPV6 |
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index 60559511bd9c..f63780ff3732 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -64,6 +64,7 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
 				      SKB_GSO_DODGY |
 				      SKB_GSO_UDP_TUNNEL |
 				      SKB_GSO_GRE |
+				      SKB_GSO_IPIP |
 				      SKB_GSO_MPLS) ||
 			     !(type & (SKB_GSO_UDP))))
 			goto out;
diff --git a/net/mpls/mpls_gso.c b/net/mpls/mpls_gso.c
index 1bec1219ab81..851cd880b0c0 100644
--- a/net/mpls/mpls_gso.c
+++ b/net/mpls/mpls_gso.c
@@ -33,6 +33,7 @@ static struct sk_buff *mpls_gso_segment(struct sk_buff *skb,
 				  SKB_GSO_DODGY |
 				  SKB_GSO_TCP_ECN |
 				  SKB_GSO_GRE |
+				  SKB_GSO_IPIP |
 				  SKB_GSO_MPLS)))
 		goto out;
 
-- 
cgit v1.2.3


From c4b2c0c5f647aa1093e8f9097a30c17ce0f94d4d Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Sat, 19 Oct 2013 21:48:53 +0200
Subject: static_key: WARN on usage before jump_label_init was called

Usage of the static key primitives to toggle a branch must not be used
before jump_label_init() is called from init/main.c. jump_label_init
reorganizes and wires up the jump_entries so usage before that could
have unforeseen consequences.

Following primitives are now checked for correct use:
* static_key_slow_inc
* static_key_slow_dec
* static_key_slow_dec_deferred
* jump_label_rate_limit

The x86 architecture already checks this by testing if the default_nop
was already replaced with an optimal nop or with a branch instruction. It
will panic then. Other architectures don't check for this.

Because we need to relax this check for the x86 arch to allow code to
transition from default_nop to the enabled state and other architectures
did not check for this at all this patch introduces checking on the
static_key primitives in a non-arch dependent manner.

All checked functions are considered slow-path so the additional check
does no harm to performance.

The warnings are best observed with earlyprintk.

Based on a patch from Andi Kleen.

Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Andi Kleen <andi@firstfloor.org>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/jump_label.h           | 10 ++++++++++
 include/linux/jump_label_ratelimit.h |  2 ++
 init/main.c                          |  7 +++++++
 kernel/jump_label.c                  |  5 +++++
 4 files changed, 24 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index a5079072da66..e96be7245717 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -48,6 +48,13 @@
 
 #include <linux/types.h>
 #include <linux/compiler.h>
+#include <linux/bug.h>
+
+extern bool static_key_initialized;
+
+#define STATIC_KEY_CHECK_USE() WARN(!static_key_initialized,		      \
+				    "%s used before call to jump_label_init", \
+				    __func__)
 
 #if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_JUMP_LABEL)
 
@@ -128,6 +135,7 @@ struct static_key {
 
 static __always_inline void jump_label_init(void)
 {
+	static_key_initialized = true;
 }
 
 static __always_inline bool static_key_false(struct static_key *key)
@@ -146,11 +154,13 @@ static __always_inline bool static_key_true(struct static_key *key)
 
 static inline void static_key_slow_inc(struct static_key *key)
 {
+	STATIC_KEY_CHECK_USE();
 	atomic_inc(&key->enabled);
 }
 
 static inline void static_key_slow_dec(struct static_key *key)
 {
+	STATIC_KEY_CHECK_USE();
 	atomic_dec(&key->enabled);
 }
 
diff --git a/include/linux/jump_label_ratelimit.h b/include/linux/jump_label_ratelimit.h
index 113788389b3d..089f70f83e97 100644
--- a/include/linux/jump_label_ratelimit.h
+++ b/include/linux/jump_label_ratelimit.h
@@ -23,12 +23,14 @@ struct static_key_deferred {
 };
 static inline void static_key_slow_dec_deferred(struct static_key_deferred *key)
 {
+	STATIC_KEY_CHECK_USE();
 	static_key_slow_dec(&key->key);
 }
 static inline void
 jump_label_rate_limit(struct static_key_deferred *key,
 		unsigned long rl)
 {
+	STATIC_KEY_CHECK_USE();
 }
 #endif	/* HAVE_JUMP_LABEL */
 #endif	/* _LINUX_JUMP_LABEL_RATELIMIT_H */
diff --git a/init/main.c b/init/main.c
index af310afbef28..27bbec1a5b35 100644
--- a/init/main.c
+++ b/init/main.c
@@ -135,6 +135,13 @@ static char *static_command_line;
 static char *execute_command;
 static char *ramdisk_execute_command;
 
+/*
+ * Used to generate warnings if static_key manipulation functions are used
+ * before jump_label_init is called.
+ */
+bool static_key_initialized __read_mostly = false;
+EXPORT_SYMBOL_GPL(static_key_initialized);
+
 /*
  * If set, this is an indication to the drivers that reset the underlying
  * device before going ahead with the initialization otherwise driver might
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index 297a9247a3b3..9019f15deab2 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -58,6 +58,7 @@ static void jump_label_update(struct static_key *key, int enable);
 
 void static_key_slow_inc(struct static_key *key)
 {
+	STATIC_KEY_CHECK_USE();
 	if (atomic_inc_not_zero(&key->enabled))
 		return;
 
@@ -103,12 +104,14 @@ static void jump_label_update_timeout(struct work_struct *work)
 
 void static_key_slow_dec(struct static_key *key)
 {
+	STATIC_KEY_CHECK_USE();
 	__static_key_slow_dec(key, 0, NULL);
 }
 EXPORT_SYMBOL_GPL(static_key_slow_dec);
 
 void static_key_slow_dec_deferred(struct static_key_deferred *key)
 {
+	STATIC_KEY_CHECK_USE();
 	__static_key_slow_dec(&key->key, key->timeout, &key->work);
 }
 EXPORT_SYMBOL_GPL(static_key_slow_dec_deferred);
@@ -116,6 +119,7 @@ EXPORT_SYMBOL_GPL(static_key_slow_dec_deferred);
 void jump_label_rate_limit(struct static_key_deferred *key,
 		unsigned long rl)
 {
+	STATIC_KEY_CHECK_USE();
 	key->timeout = rl;
 	INIT_DELAYED_WORK(&key->work, jump_label_update_timeout);
 }
@@ -212,6 +216,7 @@ void __init jump_label_init(void)
 		key->next = NULL;
 #endif
 	}
+	static_key_initialized = true;
 	jump_label_unlock();
 }
 
-- 
cgit v1.2.3


From a48e42920ff38bc90bbf75143fff4555723d4540 Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Sat, 19 Oct 2013 21:48:55 +0200
Subject: net: introduce new macro net_get_random_once

net_get_random_once is a new macro which handles the initialization
of secret keys. It is possible to call it in the fast path. Only the
initialization depends on the spinlock and is rather slow. Otherwise
it should get used just before the key is used to delay the entropy
extration as late as possible to get better randomness. It returns true
if the key got initialized.

The usage of static_keys for net_get_random_once is a bit uncommon so
it needs some further explanation why this actually works:

=== In the simple non-HAVE_JUMP_LABEL case we actually have ===
no constrains to use static_key_(true|false) on keys initialized with
STATIC_KEY_INIT_(FALSE|TRUE). So this path just expands in favor of
the likely case that the initialization is already done. The key is
initialized like this:

___done_key = { .enabled = ATOMIC_INIT(0) }

The check

                if (!static_key_true(&___done_key))                     \

expands into (pseudo code)

                if (!likely(___done_key > 0))

, so we take the fast path as soon as ___done_key is increased from the
helper function.

=== If HAVE_JUMP_LABELs are available this depends ===
on patching of jumps into the prepared NOPs, which is done in
jump_label_init at boot-up time (from start_kernel). It is forbidden
and dangerous to use net_get_random_once in functions which are called
before that!

At compilation time NOPs are generated at the call sites of
net_get_random_once. E.g. net/ipv6/inet6_hashtable.c:inet6_ehashfn (we
need to call net_get_random_once two times in inet6_ehashfn, so two NOPs):

      71:       0f 1f 44 00 00          nopl   0x0(%rax,%rax,1)
      76:       0f 1f 44 00 00          nopl   0x0(%rax,%rax,1)

Both will be patched to the actual jumps to the end of the function to
call __net_get_random_once at boot time as explained above.

arch_static_branch is optimized and inlined for false as return value and
actually also returns false in case the NOP is placed in the instruction
stream. So in the fast case we get a "return false". But because we
initialize ___done_key with (enabled != (entries & 1)) this call-site
will get patched up at boot thus returning true. The final check looks
like this:

                if (!static_key_true(&___done_key))                     \
                        ___ret = __net_get_random_once(buf,             \

expands to

                if (!!static_key_false(&___done_key))                     \
                        ___ret = __net_get_random_once(buf,             \

So we get true at boot time and as soon as static_key_slow_inc is called
on the key it will invert the logic and return false for the fast path.
static_key_slow_inc will change the branch because it got initialized
with .enabled == 0. After static_key_slow_inc is called on the key the
branch is replaced with a nop again.

=== Misc: ===
The helper defers the increment into a workqueue so we don't
have problems calling this code from atomic sections. A seperate boolean
(___done) guards the case where we enter net_get_random_once again before
the increment happend.

Cc: Ingo Molnar <mingo@redhat.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Jason Baron <jbaron@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Eric Dumazet <edumazet@google.com>
Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/net.h | 25 +++++++++++++++++++++++++
 net/core/utils.c    | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 73 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/net.h b/include/linux/net.h
index ca9ec8540905..a489705f6fa3 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -239,6 +239,31 @@ do {								\
 #define net_random()		prandom_u32()
 #define net_srandom(seed)	prandom_seed((__force u32)(seed))
 
+bool __net_get_random_once(void *buf, int nbytes, bool *done,
+			   struct static_key *done_key);
+
+#ifdef HAVE_JUMP_LABEL
+#define ___NET_RANDOM_STATIC_KEY_INIT ((struct static_key) \
+		{ .enabled = ATOMIC_INIT(0), .entries = (void *)1 })
+#else /* !HAVE_JUMP_LABEL */
+#define ___NET_RANDOM_STATIC_KEY_INIT STATIC_KEY_INIT_FALSE
+#endif /* HAVE_JUMP_LABEL */
+
+/* BE CAREFUL: this function is not interrupt safe */
+#define net_get_random_once(buf, nbytes)				\
+	({								\
+		bool ___ret = false;					\
+		static bool ___done = false;				\
+		static struct static_key ___done_key =			\
+			___NET_RANDOM_STATIC_KEY_INIT;			\
+		if (!static_key_true(&___done_key))			\
+			___ret = __net_get_random_once(buf,		\
+						       nbytes,		\
+						       &___done,	\
+						       &___done_key);	\
+		___ret;							\
+	})
+
 int kernel_sendmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec,
 		   size_t num, size_t len);
 int kernel_recvmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec,
diff --git a/net/core/utils.c b/net/core/utils.c
index aa88e23fc87a..bf09371e19b1 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -338,3 +338,51 @@ void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb,
 				  csum_unfold(*sum)));
 }
 EXPORT_SYMBOL(inet_proto_csum_replace16);
+
+struct __net_random_once_work {
+	struct work_struct work;
+	struct static_key *key;
+};
+
+static void __net_random_once_deferred(struct work_struct *w)
+{
+	struct __net_random_once_work *work =
+		container_of(w, struct __net_random_once_work, work);
+	if (!static_key_enabled(work->key))
+		static_key_slow_inc(work->key);
+	kfree(work);
+}
+
+static void __net_random_once_disable_jump(struct static_key *key)
+{
+	struct __net_random_once_work *w;
+
+	w = kmalloc(sizeof(*w), GFP_ATOMIC);
+	if (!w)
+		return;
+
+	INIT_WORK(&w->work, __net_random_once_deferred);
+	w->key = key;
+	schedule_work(&w->work);
+}
+
+bool __net_get_random_once(void *buf, int nbytes, bool *done,
+			   struct static_key *done_key)
+{
+	static DEFINE_SPINLOCK(lock);
+
+	spin_lock_bh(&lock);
+	if (*done) {
+		spin_unlock_bh(&lock);
+		return false;
+	}
+
+	get_random_bytes(buf, nbytes);
+	*done = true;
+	spin_unlock_bh(&lock);
+
+	__net_random_once_disable_jump(done_key);
+
+	return true;
+}
+EXPORT_SYMBOL(__net_get_random_once);
-- 
cgit v1.2.3


From 1fecf8958eb7f90791f2c7e99afac393b64fa976 Mon Sep 17 00:00:00 2001
From: Heiko Stuebner <heiko@sntech.de>
Date: Mon, 21 Oct 2013 05:32:48 +0900
Subject: ARM: S3C24XX: add dma pdata for s3c2410, s3c2440 and s3c2442

s3c2410 and s3c2442 share the same dma channels while s3c2440 has
slight differences. But on all three the reachable sources per dma
channel has constraints attached and thus encodes the usable
combinations using the S3C24XX_DMA_CHANREQ macro.

This also fixes the warning about s3c2410_dma_resource being unused
as reported by Olof Johansson.

Reported-by: Olof Johansson <olof@lixom.net>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Reviewed-by: Tomasz Figa <t.figa@samsung.com>
Signed-off-by: Kukjin Kim <kgene.kim@samsung.com>
---
 arch/arm/mach-s3c24xx/common.c            | 100 ++++++++++++++++++++++++++++++
 arch/arm/mach-s3c24xx/common.h            |   2 +
 include/linux/platform_data/dma-s3c24xx.h |   3 +
 3 files changed, 105 insertions(+)

(limited to 'include/linux')

diff --git a/arch/arm/mach-s3c24xx/common.c b/arch/arm/mach-s3c24xx/common.c
index bdcb29f4ea31..4adaa4b43ffe 100644
--- a/arch/arm/mach-s3c24xx/common.c
+++ b/arch/arm/mach-s3c24xx/common.c
@@ -343,6 +343,50 @@ static struct resource s3c2410_dma_resource[] = {
 };
 #endif
 
+#if defined(CONFIG_CPU_S3C2410) || defined(CONFIG_CPU_S3C2442)
+static struct s3c24xx_dma_channel s3c2410_dma_channels[DMACH_MAX] = {
+	[DMACH_XD0] = { S3C24XX_DMA_AHB, true, S3C24XX_DMA_CHANREQ(0, 0), },
+	[DMACH_XD1] = { S3C24XX_DMA_AHB, true, S3C24XX_DMA_CHANREQ(0, 1), },
+	[DMACH_SDI] = { S3C24XX_DMA_APB, false, S3C24XX_DMA_CHANREQ(2, 0) |
+						S3C24XX_DMA_CHANREQ(2, 2) |
+						S3C24XX_DMA_CHANREQ(1, 3),
+	},
+	[DMACH_SPI0] = { S3C24XX_DMA_APB, true, S3C24XX_DMA_CHANREQ(3, 1), },
+	[DMACH_SPI1] = { S3C24XX_DMA_APB, true, S3C24XX_DMA_CHANREQ(2, 3), },
+	[DMACH_UART0] = { S3C24XX_DMA_APB, true, S3C24XX_DMA_CHANREQ(1, 0), },
+	[DMACH_UART1] = { S3C24XX_DMA_APB, true, S3C24XX_DMA_CHANREQ(1, 1), },
+	[DMACH_UART2] = { S3C24XX_DMA_APB, true, S3C24XX_DMA_CHANREQ(0, 3), },
+	[DMACH_TIMER] = { S3C24XX_DMA_APB, true, S3C24XX_DMA_CHANREQ(3, 0) |
+						 S3C24XX_DMA_CHANREQ(3, 2) |
+						 S3C24XX_DMA_CHANREQ(3, 3),
+	},
+	[DMACH_I2S_IN] = { S3C24XX_DMA_APB, true, S3C24XX_DMA_CHANREQ(2, 1) |
+						  S3C24XX_DMA_CHANREQ(1, 2),
+	},
+	[DMACH_I2S_OUT] = { S3C24XX_DMA_APB, true, S3C24XX_DMA_CHANREQ(0, 2), },
+	[DMACH_USB_EP1] = { S3C24XX_DMA_APB, true, S3C24XX_DMA_CHANREQ(4, 0), },
+	[DMACH_USB_EP2] = { S3C24XX_DMA_APB, true, S3C24XX_DMA_CHANREQ(4, 1), },
+	[DMACH_USB_EP3] = { S3C24XX_DMA_APB, true, S3C24XX_DMA_CHANREQ(4, 2), },
+	[DMACH_USB_EP4] = { S3C24XX_DMA_APB, true, S3C24XX_DMA_CHANREQ(4, 3), },
+};
+
+static struct s3c24xx_dma_platdata s3c2410_dma_platdata = {
+	.num_phy_channels = 4,
+	.channels = s3c2410_dma_channels,
+	.num_channels = DMACH_MAX,
+};
+
+struct platform_device s3c2410_device_dma = {
+	.name		= "s3c2410-dma",
+	.id		= 0,
+	.num_resources	= ARRAY_SIZE(s3c2410_dma_resource),
+	.resource	= s3c2410_dma_resource,
+	.dev	= {
+		.platform_data	= &s3c2410_dma_platdata,
+	},
+};
+#endif
+
 #ifdef CONFIG_CPU_S3C2412
 static struct s3c24xx_dma_channel s3c2412_dma_channels[DMACH_MAX] = {
 	[DMACH_XD0] = { S3C24XX_DMA_AHB, true, 17 },
@@ -384,6 +428,62 @@ struct platform_device s3c2412_device_dma = {
 };
 #endif
 
+#if defined(CONFIG_CPU_S3C2440)
+static struct s3c24xx_dma_channel s3c2440_dma_channels[DMACH_MAX] = {
+	[DMACH_XD0] = { S3C24XX_DMA_AHB, true, S3C24XX_DMA_CHANREQ(0, 0), },
+	[DMACH_XD1] = { S3C24XX_DMA_AHB, true, S3C24XX_DMA_CHANREQ(0, 1), },
+	[DMACH_SDI] = { S3C24XX_DMA_APB, false, S3C24XX_DMA_CHANREQ(2, 0) |
+						S3C24XX_DMA_CHANREQ(6, 1) |
+						S3C24XX_DMA_CHANREQ(2, 2) |
+						S3C24XX_DMA_CHANREQ(1, 3),
+	},
+	[DMACH_SPI0] = { S3C24XX_DMA_APB, true, S3C24XX_DMA_CHANREQ(3, 1), },
+	[DMACH_SPI1] = { S3C24XX_DMA_APB, true, S3C24XX_DMA_CHANREQ(2, 3), },
+	[DMACH_UART0] = { S3C24XX_DMA_APB, true, S3C24XX_DMA_CHANREQ(1, 0), },
+	[DMACH_UART1] = { S3C24XX_DMA_APB, true, S3C24XX_DMA_CHANREQ(1, 1), },
+	[DMACH_UART2] = { S3C24XX_DMA_APB, true, S3C24XX_DMA_CHANREQ(0, 3), },
+	[DMACH_TIMER] = { S3C24XX_DMA_APB, true, S3C24XX_DMA_CHANREQ(3, 0) |
+						 S3C24XX_DMA_CHANREQ(3, 2) |
+						 S3C24XX_DMA_CHANREQ(3, 3),
+	},
+	[DMACH_I2S_IN] = { S3C24XX_DMA_APB, true, S3C24XX_DMA_CHANREQ(2, 1) |
+						  S3C24XX_DMA_CHANREQ(1, 2),
+	},
+	[DMACH_I2S_OUT] = { S3C24XX_DMA_APB, true, S3C24XX_DMA_CHANREQ(5, 0) |
+						   S3C24XX_DMA_CHANREQ(0, 2),
+	},
+	[DMACH_PCM_IN] = { S3C24XX_DMA_APB, true, S3C24XX_DMA_CHANREQ(6, 0) |
+						  S3C24XX_DMA_CHANREQ(5, 2),
+	},
+	[DMACH_PCM_OUT] = { S3C24XX_DMA_APB, true, S3C24XX_DMA_CHANREQ(5, 1) |
+						  S3C24XX_DMA_CHANREQ(6, 3),
+	},
+	[DMACH_MIC_IN] = { S3C24XX_DMA_APB, true, S3C24XX_DMA_CHANREQ(6, 2) |
+						  S3C24XX_DMA_CHANREQ(5, 3),
+	},
+	[DMACH_USB_EP1] = { S3C24XX_DMA_APB, true, S3C24XX_DMA_CHANREQ(4, 0), },
+	[DMACH_USB_EP2] = { S3C24XX_DMA_APB, true, S3C24XX_DMA_CHANREQ(4, 1), },
+	[DMACH_USB_EP3] = { S3C24XX_DMA_APB, true, S3C24XX_DMA_CHANREQ(4, 2), },
+	[DMACH_USB_EP4] = { S3C24XX_DMA_APB, true, S3C24XX_DMA_CHANREQ(4, 3), },
+};
+
+static struct s3c24xx_dma_platdata s3c2440_dma_platdata = {
+	.num_phy_channels = 4,
+	.channels = s3c2440_dma_channels,
+	.num_channels = DMACH_MAX,
+};
+
+struct platform_device s3c2440_device_dma = {
+	.name		= "s3c2410-dma",
+	.id		= 0,
+	.num_resources	= ARRAY_SIZE(s3c2410_dma_resource),
+	.resource	= s3c2410_dma_resource,
+	.dev	= {
+		.platform_data	= &s3c2440_dma_platdata,
+	},
+};
+#endif
+
 #if defined(CONFIG_CPUS_3C2443) || defined(CONFIG_CPU_S3C2416)
 static struct resource s3c2443_dma_resource[] = {
 	[0] = DEFINE_RES_MEM(S3C24XX_PA_DMA, S3C24XX_SZ_DMA),
diff --git a/arch/arm/mach-s3c24xx/common.h b/arch/arm/mach-s3c24xx/common.h
index fe071893275f..e46c10417216 100644
--- a/arch/arm/mach-s3c24xx/common.h
+++ b/arch/arm/mach-s3c24xx/common.h
@@ -109,7 +109,9 @@ extern void s3c2443_init_irq(void);
 
 extern struct syscore_ops s3c24xx_irq_syscore_ops;
 
+extern struct platform_device s3c2410_device_dma;
 extern struct platform_device s3c2412_device_dma;
+extern struct platform_device s3c2440_device_dma;
 extern struct platform_device s3c2443_device_dma;
 
 #endif /* __ARCH_ARM_MACH_S3C24XX_COMMON_H */
diff --git a/include/linux/platform_data/dma-s3c24xx.h b/include/linux/platform_data/dma-s3c24xx.h
index 5a0cfffe3bbb..89ba1b0c90e4 100644
--- a/include/linux/platform_data/dma-s3c24xx.h
+++ b/include/linux/platform_data/dma-s3c24xx.h
@@ -9,6 +9,9 @@
  * any later version.
  */
 
+/* Helper to encode the source selection constraints for early s3c socs. */
+#define S3C24XX_DMA_CHANREQ(src, chan)	((BIT(3) | src) << chan * 4)
+
 enum s3c24xx_dma_bus {
 	S3C24XX_DMA_APB,
 	S3C24XX_DMA_AHB,
-- 
cgit v1.2.3


From 7f6ac89c1d1e0c654ea02c8c2dd0ee8e1ce2795f Mon Sep 17 00:00:00 2001
From: Fugang Duan <B38611@freescale.com>
Date: Tue, 3 Sep 2013 12:26:24 +0800
Subject: ARM: imx6sl: add imx6sl iomux-gpr field define

Add imx6sl iomux-gpr register field define in "imx6q-iomuxc-gpr.h" header
file, which is not fully define all iomux-gpr registers and fields, only
add fec related macro define.

Signed-off-by: Fugang Duan <B38611@freescale.com>
Signed-off-by: Shawn Guo <shawn.guo@linaro.org>
---
 include/linux/mfd/syscon/imx6q-iomuxc-gpr.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h b/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h
index b6bdcd66c07d..7086b2248c8f 100644
--- a/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h
+++ b/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h
@@ -363,4 +363,9 @@
 #define IMX6Q_GPR13_SATA_TX_LVL_1_240_V		(0x1f << 2)
 #define IMX6Q_GPR13_SATA_MPLL_CLK_EN		BIT(1)
 #define IMX6Q_GPR13_SATA_TX_EDGE_RATE		BIT(0)
+
+/* For imx6sl iomux gpr register field define */
+#define IMX6SL_GPR1_FEC_CLOCK_MUX1_SEL_MASK    (0x3 << 17)
+#define IMX6SL_GPR1_FEC_CLOCK_MUX2_SEL_MASK    (0x1 << 14)
+
 #endif /* __LINUX_IMX6Q_IOMUXC_GPR_H */
-- 
cgit v1.2.3


From 602519b2bd63adfdf9e24b2f94aaddcfeb464e9e Mon Sep 17 00:00:00 2001
From: Dong Aisheng <b29396@freescale.com>
Date: Fri, 18 Oct 2013 19:48:47 +0800
Subject: mmc: sdhci-esdhc-imx: add delay line setting support

The DLL(Delay Line) is newly added to assist in sampling read data.
The DLL provides the ability to programmatically select a quantized
delay (in fractions of the clock period) regardless of on-chip variations
such as process, voltage and temperature (PVT).

This patch adds a user interface to set slave delay line via device tree.
It's usually used in high speed mode like mmc DDR mode when the signal
quality is not good caused by board design, e.g. the signal path is too
long.  User can manually set delay line to find a suitable data sampling
window for card to work properly.

Signed-off-by: Dong Aisheng <b29396@freescale.com>
Acked-by: Shawn Guo <shawn.guo@linaro.org>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 .../devicetree/bindings/mmc/fsl-imx-esdhc.txt          |  5 +++++
 drivers/mmc/host/sdhci-esdhc-imx.c                     | 18 ++++++++++++++++++
 include/linux/platform_data/mmc-esdhc-imx.h            |  1 +
 3 files changed, 24 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/mmc/fsl-imx-esdhc.txt b/Documentation/devicetree/bindings/mmc/fsl-imx-esdhc.txt
index 1dd622546d06..9046ba06c47a 100644
--- a/Documentation/devicetree/bindings/mmc/fsl-imx-esdhc.txt
+++ b/Documentation/devicetree/bindings/mmc/fsl-imx-esdhc.txt
@@ -12,6 +12,11 @@ Required properties:
 Optional properties:
 - fsl,cd-controller : Indicate to use controller internal card detection
 - fsl,wp-controller : Indicate to use controller internal write protection
+- fsl,delay-line : Specify the number of delay cells for override mode.
+  This is used to set the clock delay for DLL(Delay Line) on override mode
+  to select a proper data sampling window in case the clock quality is not good
+  due to signal path is too long on the board. Please refer to eSDHC/uSDHC
+  chapter, DLL (Delay Line) section in RM for details.
 
 Examples:
 
diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c
index 915fa68e5122..260a81fde18d 100644
--- a/drivers/mmc/host/sdhci-esdhc-imx.c
+++ b/drivers/mmc/host/sdhci-esdhc-imx.c
@@ -46,6 +46,11 @@
 /* Bits 3 and 6 are not SDHCI standard definitions */
 #define  ESDHC_MIX_CTRL_SDHCI_MASK	0xb7
 
+/* dll control register */
+#define ESDHC_DLL_CTRL			0x60
+#define ESDHC_DLL_OVERRIDE_VAL_SHIFT	9
+#define ESDHC_DLL_OVERRIDE_EN_SHIFT	8
+
 /* tune control register */
 #define ESDHC_TUNE_CTRL_STATUS		0x68
 #define  ESDHC_TUNE_CTRL_STEP		1
@@ -817,6 +822,7 @@ static int esdhc_set_uhs_signaling(struct sdhci_host *host, unsigned int uhs)
 {
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
 	struct pltfm_imx_data *imx_data = pltfm_host->priv;
+	struct esdhc_platform_data *boarddata = &imx_data->boarddata;
 
 	switch (uhs) {
 	case MMC_TIMING_UHS_SDR12:
@@ -837,6 +843,15 @@ static int esdhc_set_uhs_signaling(struct sdhci_host *host, unsigned int uhs)
 				ESDHC_MIX_CTRL_DDREN,
 				host->ioaddr + ESDHC_MIX_CTRL);
 		imx_data->is_ddr = 1;
+		if (boarddata->delay_line) {
+			u32 v;
+			v = boarddata->delay_line <<
+				ESDHC_DLL_OVERRIDE_VAL_SHIFT |
+				(1 << ESDHC_DLL_OVERRIDE_EN_SHIFT);
+			if (is_imx53_esdhc(imx_data))
+				v <<= 1;
+			writel(v, host->ioaddr + ESDHC_DLL_CTRL);
+		}
 		break;
 	}
 
@@ -901,6 +916,9 @@ sdhci_esdhc_imx_probe_dt(struct platform_device *pdev,
 	else
 		boarddata->support_vsel = true;
 
+	if (of_property_read_u32(np, "fsl,delay-line", &boarddata->delay_line))
+		boarddata->delay_line = 0;
+
 	return 0;
 }
 #else
diff --git a/include/linux/platform_data/mmc-esdhc-imx.h b/include/linux/platform_data/mmc-esdhc-imx.h
index a0f5a8f9b3bc..75f70f6ac137 100644
--- a/include/linux/platform_data/mmc-esdhc-imx.h
+++ b/include/linux/platform_data/mmc-esdhc-imx.h
@@ -45,5 +45,6 @@ struct esdhc_platform_data {
 	int max_bus_width;
 	unsigned int f_max;
 	bool support_vsel;
+	unsigned int delay_line;
 };
 #endif /* __ASM_ARCH_IMX_ESDHC_H */
-- 
cgit v1.2.3


From c68c7f5a88328fbcd992c68e99ebd6bf7d49e9d2 Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Sun, 20 Oct 2013 06:26:02 +0200
Subject: net: fix build warnings because of net_get_random_once merge

This patch fixes the following warning:

   In file included from include/linux/skbuff.h:27:0,
                    from include/linux/netfilter.h:5,
                    from include/net/netns/netfilter.h:5,
                    from include/net/net_namespace.h:20,
                    from include/linux/init_task.h:14,
                    from init/init_task.c:1:
include/linux/net.h:243:14: warning: 'struct static_key' declared inside parameter list [enabled by default]
          struct static_key *done_key);

on x86_64 allnoconfig, um defconfig and ia64 allmodconfig and maybe others as well.

Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/net.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/net.h b/include/linux/net.h
index a489705f6fa3..aca446b46754 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -24,6 +24,7 @@
 #include <linux/fcntl.h>	/* For O_CLOEXEC and O_NONBLOCK */
 #include <linux/kmemcheck.h>
 #include <linux/rcupdate.h>
+#include <linux/jump_label.h>
 #include <uapi/linux/net.h>
 
 struct poll_table_struct;
-- 
cgit v1.2.3


From 88f074f4871a8c212b212b725e4dcdcdb09613c1 Mon Sep 17 00:00:00 2001
From: "Chen, Gong" <gong.chen@linux.intel.com>
Date: Fri, 18 Oct 2013 14:28:59 -0700
Subject: ACPI, CPER: Update cper info

We have a lot of confusing names of functions and data structures in
amongs the the error reporting code.  In particular the "apei" prefix
has been applied to many objects that are not part of APEI.  Since we
will be using these routines for extended error log reporting it will
be clearer if we fix up the names first.

Signed-off-by: Chen, Gong <gong.chen@linux.intel.com>
Acked-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Mauro Carvalho Chehab <m.chehab@samsung.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 drivers/acpi/apei/apei-internal.h | 12 ++++----
 drivers/acpi/apei/cper.c          | 58 +++++++++++++++++++--------------------
 drivers/acpi/apei/ghes.c          | 54 ++++++++++++++++++------------------
 include/acpi/actbl1.h             | 14 +++++-----
 include/acpi/ghes.h               |  2 +-
 include/linux/cper.h              |  2 +-
 6 files changed, 71 insertions(+), 71 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/apei/apei-internal.h b/drivers/acpi/apei/apei-internal.h
index f220d642136e..21ba34a73883 100644
--- a/drivers/acpi/apei/apei-internal.h
+++ b/drivers/acpi/apei/apei-internal.h
@@ -122,11 +122,11 @@ struct dentry;
 struct dentry *apei_get_debugfs_dir(void);
 
 #define apei_estatus_for_each_section(estatus, section)			\
-	for (section = (struct acpi_hest_generic_data *)(estatus + 1);	\
+	for (section = (struct acpi_generic_data *)(estatus + 1);	\
 	     (void *)section - (void *)estatus < estatus->data_length;	\
 	     section = (void *)(section+1) + section->error_data_length)
 
-static inline u32 apei_estatus_len(struct acpi_hest_generic_status *estatus)
+static inline u32 cper_estatus_len(struct acpi_generic_status *estatus)
 {
 	if (estatus->raw_data_length)
 		return estatus->raw_data_offset + \
@@ -135,10 +135,10 @@ static inline u32 apei_estatus_len(struct acpi_hest_generic_status *estatus)
 		return sizeof(*estatus) + estatus->data_length;
 }
 
-void apei_estatus_print(const char *pfx,
-			const struct acpi_hest_generic_status *estatus);
-int apei_estatus_check_header(const struct acpi_hest_generic_status *estatus);
-int apei_estatus_check(const struct acpi_hest_generic_status *estatus);
+void cper_estatus_print(const char *pfx,
+			const struct acpi_generic_status *estatus);
+int cper_estatus_check_header(const struct acpi_generic_status *estatus);
+int cper_estatus_check(const struct acpi_generic_status *estatus);
 
 int apei_osc_setup(void);
 #endif
diff --git a/drivers/acpi/apei/cper.c b/drivers/acpi/apei/cper.c
index f827f02bfbe8..eb5f6d6d7dbc 100644
--- a/drivers/acpi/apei/cper.c
+++ b/drivers/acpi/apei/cper.c
@@ -5,7 +5,7 @@
  *	Author: Huang Ying <ying.huang@intel.com>
  *
  * CPER is the format used to describe platform hardware error by
- * various APEI tables, such as ERST, BERT and HEST etc.
+ * various tables, such as ERST, BERT and HEST etc.
  *
  * For more information about CPER, please refer to Appendix N of UEFI
  * Specification version 2.3.
@@ -73,7 +73,7 @@ static const char *cper_severity_str(unsigned int severity)
  * printed, with @pfx is printed at the beginning of each line.
  */
 void cper_print_bits(const char *pfx, unsigned int bits,
-		     const char *strs[], unsigned int strs_size)
+		     const char * const strs[], unsigned int strs_size)
 {
 	int i, len = 0;
 	const char *str;
@@ -98,32 +98,32 @@ void cper_print_bits(const char *pfx, unsigned int bits,
 		printk("%s\n", buf);
 }
 
-static const char *cper_proc_type_strs[] = {
+static const char * const cper_proc_type_strs[] = {
 	"IA32/X64",
 	"IA64",
 };
 
-static const char *cper_proc_isa_strs[] = {
+static const char * const cper_proc_isa_strs[] = {
 	"IA32",
 	"IA64",
 	"X64",
 };
 
-static const char *cper_proc_error_type_strs[] = {
+static const char * const cper_proc_error_type_strs[] = {
 	"cache error",
 	"TLB error",
 	"bus error",
 	"micro-architectural error",
 };
 
-static const char *cper_proc_op_strs[] = {
+static const char * const cper_proc_op_strs[] = {
 	"unknown or generic",
 	"data read",
 	"data write",
 	"instruction execution",
 };
 
-static const char *cper_proc_flag_strs[] = {
+static const char * const cper_proc_flag_strs[] = {
 	"restartable",
 	"precise IP",
 	"overflow",
@@ -248,7 +248,7 @@ static const char *cper_pcie_port_type_strs[] = {
 };
 
 static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie,
-			    const struct acpi_hest_generic_data *gdata)
+			    const struct acpi_generic_data *gdata)
 {
 	if (pcie->validation_bits & CPER_PCIE_VALID_PORT_TYPE)
 		printk("%s""port_type: %d, %s\n", pfx, pcie->port_type,
@@ -283,17 +283,17 @@ static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie,
 	pfx, pcie->bridge.secondary_status, pcie->bridge.control);
 }
 
-static const char *apei_estatus_section_flag_strs[] = {
+static const char * const cper_estatus_section_flag_strs[] = {
 	"primary",
 	"containment warning",
 	"reset",
-	"threshold exceeded",
+	"error threshold exceeded",
 	"resource not accessible",
 	"latent error",
 };
 
-static void apei_estatus_print_section(
-	const char *pfx, const struct acpi_hest_generic_data *gdata, int sec_no)
+static void cper_estatus_print_section(
+	const char *pfx, const struct acpi_generic_data *gdata, int sec_no)
 {
 	uuid_le *sec_type = (uuid_le *)gdata->section_type;
 	__u16 severity;
@@ -302,8 +302,8 @@ static void apei_estatus_print_section(
 	printk("%s""section: %d, severity: %d, %s\n", pfx, sec_no, severity,
 	       cper_severity_str(severity));
 	printk("%s""flags: 0x%02x\n", pfx, gdata->flags);
-	cper_print_bits(pfx, gdata->flags, apei_estatus_section_flag_strs,
-			ARRAY_SIZE(apei_estatus_section_flag_strs));
+	cper_print_bits(pfx, gdata->flags, cper_estatus_section_flag_strs,
+			ARRAY_SIZE(cper_estatus_section_flag_strs));
 	if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
 		printk("%s""fru_id: %pUl\n", pfx, (uuid_le *)gdata->fru_id);
 	if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
@@ -339,34 +339,34 @@ err_section_too_small:
 	pr_err(FW_WARN "error section length is too small\n");
 }
 
-void apei_estatus_print(const char *pfx,
-			const struct acpi_hest_generic_status *estatus)
+void cper_estatus_print(const char *pfx,
+			const struct acpi_generic_status *estatus)
 {
-	struct acpi_hest_generic_data *gdata;
+	struct acpi_generic_data *gdata;
 	unsigned int data_len, gedata_len;
 	int sec_no = 0;
 	__u16 severity;
 
-	printk("%s""APEI generic hardware error status\n", pfx);
+	printk("%s""Generic Hardware Error Status\n", pfx);
 	severity = estatus->error_severity;
 	printk("%s""severity: %d, %s\n", pfx, severity,
 	       cper_severity_str(severity));
 	data_len = estatus->data_length;
-	gdata = (struct acpi_hest_generic_data *)(estatus + 1);
+	gdata = (struct acpi_generic_data *)(estatus + 1);
 	while (data_len >= sizeof(*gdata)) {
 		gedata_len = gdata->error_data_length;
-		apei_estatus_print_section(pfx, gdata, sec_no);
+		cper_estatus_print_section(pfx, gdata, sec_no);
 		data_len -= gedata_len + sizeof(*gdata);
 		gdata = (void *)(gdata + 1) + gedata_len;
 		sec_no++;
 	}
 }
-EXPORT_SYMBOL_GPL(apei_estatus_print);
+EXPORT_SYMBOL_GPL(cper_estatus_print);
 
-int apei_estatus_check_header(const struct acpi_hest_generic_status *estatus)
+int cper_estatus_check_header(const struct acpi_generic_status *estatus)
 {
 	if (estatus->data_length &&
-	    estatus->data_length < sizeof(struct acpi_hest_generic_data))
+	    estatus->data_length < sizeof(struct acpi_generic_data))
 		return -EINVAL;
 	if (estatus->raw_data_length &&
 	    estatus->raw_data_offset < sizeof(*estatus) + estatus->data_length)
@@ -374,19 +374,19 @@ int apei_estatus_check_header(const struct acpi_hest_generic_status *estatus)
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(apei_estatus_check_header);
+EXPORT_SYMBOL_GPL(cper_estatus_check_header);
 
-int apei_estatus_check(const struct acpi_hest_generic_status *estatus)
+int cper_estatus_check(const struct acpi_generic_status *estatus)
 {
-	struct acpi_hest_generic_data *gdata;
+	struct acpi_generic_data *gdata;
 	unsigned int data_len, gedata_len;
 	int rc;
 
-	rc = apei_estatus_check_header(estatus);
+	rc = cper_estatus_check_header(estatus);
 	if (rc)
 		return rc;
 	data_len = estatus->data_length;
-	gdata = (struct acpi_hest_generic_data *)(estatus + 1);
+	gdata = (struct acpi_generic_data *)(estatus + 1);
 	while (data_len >= sizeof(*gdata)) {
 		gedata_len = gdata->error_data_length;
 		if (gedata_len > data_len - sizeof(*gdata))
@@ -399,4 +399,4 @@ int apei_estatus_check(const struct acpi_hest_generic_status *estatus)
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(apei_estatus_check);
+EXPORT_SYMBOL_GPL(cper_estatus_check);
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 8ec37bbdd699..0db6e4ff6501 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -75,13 +75,13 @@
 #define GHES_ESTATUS_CACHE_LEN(estatus_len)			\
 	(sizeof(struct ghes_estatus_cache) + (estatus_len))
 #define GHES_ESTATUS_FROM_CACHE(estatus_cache)			\
-	((struct acpi_hest_generic_status *)			\
+	((struct acpi_generic_status *)				\
 	 ((struct ghes_estatus_cache *)(estatus_cache) + 1))
 
 #define GHES_ESTATUS_NODE_LEN(estatus_len)			\
 	(sizeof(struct ghes_estatus_node) + (estatus_len))
-#define GHES_ESTATUS_FROM_NODE(estatus_node)				\
-	((struct acpi_hest_generic_status *)				\
+#define GHES_ESTATUS_FROM_NODE(estatus_node)			\
+	((struct acpi_generic_status *)				\
 	 ((struct ghes_estatus_node *)(estatus_node) + 1))
 
 bool ghes_disable;
@@ -378,17 +378,17 @@ static int ghes_read_estatus(struct ghes *ghes, int silent)
 	ghes->flags |= GHES_TO_CLEAR;
 
 	rc = -EIO;
-	len = apei_estatus_len(ghes->estatus);
+	len = cper_estatus_len(ghes->estatus);
 	if (len < sizeof(*ghes->estatus))
 		goto err_read_block;
 	if (len > ghes->generic->error_block_length)
 		goto err_read_block;
-	if (apei_estatus_check_header(ghes->estatus))
+	if (cper_estatus_check_header(ghes->estatus))
 		goto err_read_block;
 	ghes_copy_tofrom_phys(ghes->estatus + 1,
 			      buf_paddr + sizeof(*ghes->estatus),
 			      len - sizeof(*ghes->estatus), 1);
-	if (apei_estatus_check(ghes->estatus))
+	if (cper_estatus_check(ghes->estatus))
 		goto err_read_block;
 	rc = 0;
 
@@ -409,7 +409,7 @@ static void ghes_clear_estatus(struct ghes *ghes)
 	ghes->flags &= ~GHES_TO_CLEAR;
 }
 
-static void ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata, int sev)
+static void ghes_handle_memory_failure(struct acpi_generic_data *gdata, int sev)
 {
 #ifdef CONFIG_ACPI_APEI_MEMORY_FAILURE
 	unsigned long pfn;
@@ -438,10 +438,10 @@ static void ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata, int
 }
 
 static void ghes_do_proc(struct ghes *ghes,
-			 const struct acpi_hest_generic_status *estatus)
+			 const struct acpi_generic_status *estatus)
 {
 	int sev, sec_sev;
-	struct acpi_hest_generic_data *gdata;
+	struct acpi_generic_data *gdata;
 
 	sev = ghes_severity(estatus->error_severity);
 	apei_estatus_for_each_section(estatus, gdata) {
@@ -496,7 +496,7 @@ static void ghes_do_proc(struct ghes *ghes,
 
 static void __ghes_print_estatus(const char *pfx,
 				 const struct acpi_hest_generic *generic,
-				 const struct acpi_hest_generic_status *estatus)
+				 const struct acpi_generic_status *estatus)
 {
 	static atomic_t seqno;
 	unsigned int curr_seqno;
@@ -513,12 +513,12 @@ static void __ghes_print_estatus(const char *pfx,
 	snprintf(pfx_seq, sizeof(pfx_seq), "%s{%u}" HW_ERR, pfx, curr_seqno);
 	printk("%s""Hardware error from APEI Generic Hardware Error Source: %d\n",
 	       pfx_seq, generic->header.source_id);
-	apei_estatus_print(pfx_seq, estatus);
+	cper_estatus_print(pfx_seq, estatus);
 }
 
 static int ghes_print_estatus(const char *pfx,
 			      const struct acpi_hest_generic *generic,
-			      const struct acpi_hest_generic_status *estatus)
+			      const struct acpi_generic_status *estatus)
 {
 	/* Not more than 2 messages every 5 seconds */
 	static DEFINE_RATELIMIT_STATE(ratelimit_corrected, 5*HZ, 2);
@@ -540,15 +540,15 @@ static int ghes_print_estatus(const char *pfx,
  * GHES error status reporting throttle, to report more kinds of
  * errors, instead of just most frequently occurred errors.
  */
-static int ghes_estatus_cached(struct acpi_hest_generic_status *estatus)
+static int ghes_estatus_cached(struct acpi_generic_status *estatus)
 {
 	u32 len;
 	int i, cached = 0;
 	unsigned long long now;
 	struct ghes_estatus_cache *cache;
-	struct acpi_hest_generic_status *cache_estatus;
+	struct acpi_generic_status *cache_estatus;
 
-	len = apei_estatus_len(estatus);
+	len = cper_estatus_len(estatus);
 	rcu_read_lock();
 	for (i = 0; i < GHES_ESTATUS_CACHES_SIZE; i++) {
 		cache = rcu_dereference(ghes_estatus_caches[i]);
@@ -571,19 +571,19 @@ static int ghes_estatus_cached(struct acpi_hest_generic_status *estatus)
 
 static struct ghes_estatus_cache *ghes_estatus_cache_alloc(
 	struct acpi_hest_generic *generic,
-	struct acpi_hest_generic_status *estatus)
+	struct acpi_generic_status *estatus)
 {
 	int alloced;
 	u32 len, cache_len;
 	struct ghes_estatus_cache *cache;
-	struct acpi_hest_generic_status *cache_estatus;
+	struct acpi_generic_status *cache_estatus;
 
 	alloced = atomic_add_return(1, &ghes_estatus_cache_alloced);
 	if (alloced > GHES_ESTATUS_CACHE_ALLOCED_MAX) {
 		atomic_dec(&ghes_estatus_cache_alloced);
 		return NULL;
 	}
-	len = apei_estatus_len(estatus);
+	len = cper_estatus_len(estatus);
 	cache_len = GHES_ESTATUS_CACHE_LEN(len);
 	cache = (void *)gen_pool_alloc(ghes_estatus_pool, cache_len);
 	if (!cache) {
@@ -603,7 +603,7 @@ static void ghes_estatus_cache_free(struct ghes_estatus_cache *cache)
 {
 	u32 len;
 
-	len = apei_estatus_len(GHES_ESTATUS_FROM_CACHE(cache));
+	len = cper_estatus_len(GHES_ESTATUS_FROM_CACHE(cache));
 	len = GHES_ESTATUS_CACHE_LEN(len);
 	gen_pool_free(ghes_estatus_pool, (unsigned long)cache, len);
 	atomic_dec(&ghes_estatus_cache_alloced);
@@ -619,7 +619,7 @@ static void ghes_estatus_cache_rcu_free(struct rcu_head *head)
 
 static void ghes_estatus_cache_add(
 	struct acpi_hest_generic *generic,
-	struct acpi_hest_generic_status *estatus)
+	struct acpi_generic_status *estatus)
 {
 	int i, slot = -1, count;
 	unsigned long long now, duration, period, max_period = 0;
@@ -751,7 +751,7 @@ static void ghes_proc_in_irq(struct irq_work *irq_work)
 	struct llist_node *llnode, *next;
 	struct ghes_estatus_node *estatus_node;
 	struct acpi_hest_generic *generic;
-	struct acpi_hest_generic_status *estatus;
+	struct acpi_generic_status *estatus;
 	u32 len, node_len;
 
 	llnode = llist_del_all(&ghes_estatus_llist);
@@ -765,7 +765,7 @@ static void ghes_proc_in_irq(struct irq_work *irq_work)
 		estatus_node = llist_entry(llnode, struct ghes_estatus_node,
 					   llnode);
 		estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
-		len = apei_estatus_len(estatus);
+		len = cper_estatus_len(estatus);
 		node_len = GHES_ESTATUS_NODE_LEN(len);
 		ghes_do_proc(estatus_node->ghes, estatus);
 		if (!ghes_estatus_cached(estatus)) {
@@ -784,7 +784,7 @@ static void ghes_print_queued_estatus(void)
 	struct llist_node *llnode;
 	struct ghes_estatus_node *estatus_node;
 	struct acpi_hest_generic *generic;
-	struct acpi_hest_generic_status *estatus;
+	struct acpi_generic_status *estatus;
 	u32 len, node_len;
 
 	llnode = llist_del_all(&ghes_estatus_llist);
@@ -797,7 +797,7 @@ static void ghes_print_queued_estatus(void)
 		estatus_node = llist_entry(llnode, struct ghes_estatus_node,
 					   llnode);
 		estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
-		len = apei_estatus_len(estatus);
+		len = cper_estatus_len(estatus);
 		node_len = GHES_ESTATUS_NODE_LEN(len);
 		generic = estatus_node->generic;
 		ghes_print_estatus(NULL, generic, estatus);
@@ -843,7 +843,7 @@ static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs)
 #ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
 		u32 len, node_len;
 		struct ghes_estatus_node *estatus_node;
-		struct acpi_hest_generic_status *estatus;
+		struct acpi_generic_status *estatus;
 #endif
 		if (!(ghes->flags & GHES_TO_CLEAR))
 			continue;
@@ -851,7 +851,7 @@ static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs)
 		if (ghes_estatus_cached(ghes->estatus))
 			goto next;
 		/* Save estatus for further processing in IRQ context */
-		len = apei_estatus_len(ghes->estatus);
+		len = cper_estatus_len(ghes->estatus);
 		node_len = GHES_ESTATUS_NODE_LEN(len);
 		estatus_node = (void *)gen_pool_alloc(ghes_estatus_pool,
 						      node_len);
@@ -923,7 +923,7 @@ static int ghes_probe(struct platform_device *ghes_dev)
 
 	rc = -EIO;
 	if (generic->error_block_length <
-	    sizeof(struct acpi_hest_generic_status)) {
+	    sizeof(struct acpi_generic_status)) {
 		pr_warning(FW_BUG GHES_PFX "Invalid error block length: %u for generic hardware error source: %d\n",
 			   generic->error_block_length,
 			   generic->header.source_id);
diff --git a/include/acpi/actbl1.h b/include/acpi/actbl1.h
index 0bd750ebeb49..556c83ee6b42 100644
--- a/include/acpi/actbl1.h
+++ b/include/acpi/actbl1.h
@@ -596,7 +596,7 @@ struct acpi_hest_generic {
 
 /* Generic Error Status block */
 
-struct acpi_hest_generic_status {
+struct acpi_generic_status {
 	u32 block_status;
 	u32 raw_data_offset;
 	u32 raw_data_length;
@@ -606,15 +606,15 @@ struct acpi_hest_generic_status {
 
 /* Values for block_status flags above */
 
-#define ACPI_HEST_UNCORRECTABLE             (1)
-#define ACPI_HEST_CORRECTABLE               (1<<1)
-#define ACPI_HEST_MULTIPLE_UNCORRECTABLE    (1<<2)
-#define ACPI_HEST_MULTIPLE_CORRECTABLE      (1<<3)
-#define ACPI_HEST_ERROR_ENTRY_COUNT         (0xFF<<4)	/* 8 bits, error count */
+#define ACPI_GEN_ERR_UC			BIT(0)
+#define ACPI_GEN_ERR_CE			BIT(1)
+#define ACPI_GEN_ERR_MULTI_UC		BIT(2)
+#define ACPI_GEN_ERR_MULTI_CE		BIT(3)
+#define ACPI_GEN_ERR_COUNT_SHIFT	(0xFF<<4) /* 8 bits, error count */
 
 /* Generic Error Data entry */
 
-struct acpi_hest_generic_data {
+struct acpi_generic_data {
 	u8 section_type[16];
 	u32 error_severity;
 	u16 revision;
diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h
index 720446cb243e..dfd60d0bfd27 100644
--- a/include/acpi/ghes.h
+++ b/include/acpi/ghes.h
@@ -14,7 +14,7 @@
 
 struct ghes {
 	struct acpi_hest_generic *generic;
-	struct acpi_hest_generic_status *estatus;
+	struct acpi_generic_status *estatus;
 	u64 buffer_paddr;
 	unsigned long flags;
 	union {
diff --git a/include/linux/cper.h b/include/linux/cper.h
index c23049496531..09ebe2113641 100644
--- a/include/linux/cper.h
+++ b/include/linux/cper.h
@@ -389,6 +389,6 @@ struct cper_sec_pcie {
 
 u64 cper_next_record_id(void);
 void cper_print_bits(const char *prefix, unsigned int bits,
-		     const char *strs[], unsigned int strs_size);
+		     const char * const strs[], unsigned int strs_size);
 
 #endif
-- 
cgit v1.2.3


From 10ef6b0dffe404bcc54e94cb2ca1a5b18445a66b Mon Sep 17 00:00:00 2001
From: "Chen, Gong" <gong.chen@linux.intel.com>
Date: Fri, 18 Oct 2013 14:29:07 -0700
Subject: bitops: Introduce a more generic BITMASK macro

GENMASK is used to create a contiguous bitmask([hi:lo]). It is
implemented twice in current kernel. One is in EDAC driver, the other
is in SiS/XGI FB driver. Move it to a more generic place for other
usage.

Signed-off-by: Chen, Gong <gong.chen@linux.intel.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Thomas Winischhofer <thomas@winischhofer.net>
Cc: Jean-Christophe Plagniol-Villard <plagnioj@jcrosoft.com>
Cc: Tomi Valkeinen <tomi.valkeinen@ti.com>
Acked-by: Borislav Petkov <bp@suse.de>
Acked-by: Mauro Carvalho Chehab <m.chehab@samsung.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 drivers/edac/amd64_edac.c | 46 ++++++++++++++++++++++++----------------------
 drivers/edac/amd64_edac.h |  8 --------
 drivers/edac/sb_edac.c    |  2 +-
 drivers/video/sis/init.c  |  5 ++---
 include/linux/bitops.h    |  8 ++++++++
 5 files changed, 35 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 3c9e4e98c651..b53d0de17e15 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -339,8 +339,8 @@ static void get_cs_base_and_mask(struct amd64_pvt *pvt, int csrow, u8 dct,
 	if (pvt->fam == 0xf && pvt->ext_model < K8_REV_F) {
 		csbase		= pvt->csels[dct].csbases[csrow];
 		csmask		= pvt->csels[dct].csmasks[csrow];
-		base_bits	= GENMASK(21, 31) | GENMASK(9, 15);
-		mask_bits	= GENMASK(21, 29) | GENMASK(9, 15);
+		base_bits	= GENMASK_ULL(31, 21) | GENMASK_ULL(15, 9);
+		mask_bits	= GENMASK_ULL(29, 21) | GENMASK_ULL(15, 9);
 		addr_shift	= 4;
 
 	/*
@@ -352,16 +352,16 @@ static void get_cs_base_and_mask(struct amd64_pvt *pvt, int csrow, u8 dct,
 		csbase          = pvt->csels[dct].csbases[csrow];
 		csmask          = pvt->csels[dct].csmasks[csrow >> 1];
 
-		*base  = (csbase & GENMASK(5,  15)) << 6;
-		*base |= (csbase & GENMASK(19, 30)) << 8;
+		*base  = (csbase & GENMASK_ULL(15,  5)) << 6;
+		*base |= (csbase & GENMASK_ULL(30, 19)) << 8;
 
 		*mask = ~0ULL;
 		/* poke holes for the csmask */
-		*mask &= ~((GENMASK(5, 15)  << 6) |
-			   (GENMASK(19, 30) << 8));
+		*mask &= ~((GENMASK_ULL(15, 5)  << 6) |
+			   (GENMASK_ULL(30, 19) << 8));
 
-		*mask |= (csmask & GENMASK(5, 15))  << 6;
-		*mask |= (csmask & GENMASK(19, 30)) << 8;
+		*mask |= (csmask & GENMASK_ULL(15, 5))  << 6;
+		*mask |= (csmask & GENMASK_ULL(30, 19)) << 8;
 
 		return;
 	} else {
@@ -370,9 +370,11 @@ static void get_cs_base_and_mask(struct amd64_pvt *pvt, int csrow, u8 dct,
 		addr_shift	= 8;
 
 		if (pvt->fam == 0x15)
-			base_bits = mask_bits = GENMASK(19,30) | GENMASK(5,13);
+			base_bits = mask_bits =
+				GENMASK_ULL(30,19) | GENMASK_ULL(13,5);
 		else
-			base_bits = mask_bits = GENMASK(19,28) | GENMASK(5,13);
+			base_bits = mask_bits =
+				GENMASK_ULL(28,19) | GENMASK_ULL(13,5);
 	}
 
 	*base  = (csbase & base_bits) << addr_shift;
@@ -561,7 +563,7 @@ static u64 sys_addr_to_dram_addr(struct mem_ctl_info *mci, u64 sys_addr)
 	 * section 3.4.2 of AMD publication 24592: AMD x86-64 Architecture
 	 * Programmer's Manual Volume 1 Application Programming.
 	 */
-	dram_addr = (sys_addr & GENMASK(0, 39)) - dram_base;
+	dram_addr = (sys_addr & GENMASK_ULL(39, 0)) - dram_base;
 
 	edac_dbg(2, "using DRAM Base register to translate SysAddr 0x%lx to DramAddr 0x%lx\n",
 		 (unsigned long)sys_addr, (unsigned long)dram_addr);
@@ -597,7 +599,7 @@ static u64 dram_addr_to_input_addr(struct mem_ctl_info *mci, u64 dram_addr)
 	 * concerning translating a DramAddr to an InputAddr.
 	 */
 	intlv_shift = num_node_interleave_bits(dram_intlv_en(pvt, 0));
-	input_addr = ((dram_addr >> intlv_shift) & GENMASK(12, 35)) +
+	input_addr = ((dram_addr >> intlv_shift) & GENMASK_ULL(35, 12)) +
 		      (dram_addr & 0xfff);
 
 	edac_dbg(2, "  Intlv Shift=%d DramAddr=0x%lx maps to InputAddr=0x%lx\n",
@@ -849,7 +851,7 @@ static u64 get_error_address(struct amd64_pvt *pvt, struct mce *m)
 		end_bit   = 39;
 	}
 
-	addr = m->addr & GENMASK(start_bit, end_bit);
+	addr = m->addr & GENMASK_ULL(end_bit, start_bit);
 
 	/*
 	 * Erratum 637 workaround
@@ -861,7 +863,7 @@ static u64 get_error_address(struct amd64_pvt *pvt, struct mce *m)
 		u16 mce_nid;
 		u8 intlv_en;
 
-		if ((addr & GENMASK(24, 47)) >> 24 != 0x00fdf7)
+		if ((addr & GENMASK_ULL(47, 24)) >> 24 != 0x00fdf7)
 			return addr;
 
 		mce_nid	= amd_get_nb_id(m->extcpu);
@@ -871,7 +873,7 @@ static u64 get_error_address(struct amd64_pvt *pvt, struct mce *m)
 		intlv_en = tmp >> 21 & 0x7;
 
 		/* add [47:27] + 3 trailing bits */
-		cc6_base  = (tmp & GENMASK(0, 20)) << 3;
+		cc6_base  = (tmp & GENMASK_ULL(20, 0)) << 3;
 
 		/* reverse and add DramIntlvEn */
 		cc6_base |= intlv_en ^ 0x7;
@@ -880,18 +882,18 @@ static u64 get_error_address(struct amd64_pvt *pvt, struct mce *m)
 		cc6_base <<= 24;
 
 		if (!intlv_en)
-			return cc6_base | (addr & GENMASK(0, 23));
+			return cc6_base | (addr & GENMASK_ULL(23, 0));
 
 		amd64_read_pci_cfg(pvt->F1, DRAM_LOCAL_NODE_BASE, &tmp);
 
 							/* faster log2 */
-		tmp_addr  = (addr & GENMASK(12, 23)) << __fls(intlv_en + 1);
+		tmp_addr  = (addr & GENMASK_ULL(23, 12)) << __fls(intlv_en + 1);
 
 		/* OR DramIntlvSel into bits [14:12] */
-		tmp_addr |= (tmp & GENMASK(21, 23)) >> 9;
+		tmp_addr |= (tmp & GENMASK_ULL(23, 21)) >> 9;
 
 		/* add remaining [11:0] bits from original MC4_ADDR */
-		tmp_addr |= addr & GENMASK(0, 11);
+		tmp_addr |= addr & GENMASK_ULL(11, 0);
 
 		return cc6_base | tmp_addr;
 	}
@@ -952,12 +954,12 @@ static void read_dram_base_limit_regs(struct amd64_pvt *pvt, unsigned range)
 
 	amd64_read_pci_cfg(f1, DRAM_LOCAL_NODE_LIM, &llim);
 
-	pvt->ranges[range].lim.lo &= GENMASK(0, 15);
+	pvt->ranges[range].lim.lo &= GENMASK_ULL(15, 0);
 
 				    /* {[39:27],111b} */
 	pvt->ranges[range].lim.lo |= ((llim & 0x1fff) << 3 | 0x7) << 16;
 
-	pvt->ranges[range].lim.hi &= GENMASK(0, 7);
+	pvt->ranges[range].lim.hi &= GENMASK_ULL(7, 0);
 
 				    /* [47:40] */
 	pvt->ranges[range].lim.hi |= llim >> 13;
@@ -1330,7 +1332,7 @@ static u64 f1x_get_norm_dct_addr(struct amd64_pvt *pvt, u8 range,
 			chan_off = dram_base;
 	}
 
-	return (sys_addr & GENMASK(6,47)) - (chan_off & GENMASK(23,47));
+	return (sys_addr & GENMASK_ULL(47,6)) - (chan_off & GENMASK_ULL(47,23));
 }
 
 /*
diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h
index d2443cfa0698..6dc1fcc25afb 100644
--- a/drivers/edac/amd64_edac.h
+++ b/drivers/edac/amd64_edac.h
@@ -159,14 +159,6 @@
 #define ON true
 #define OFF false
 
-/*
- * Create a contiguous bitmask starting at bit position @lo and ending at
- * position @hi. For example
- *
- * GENMASK(21, 39) gives us the 64bit vector 0x000000ffffe00000.
- */
-#define GENMASK(lo, hi)			(((1ULL << ((hi) - (lo) + 1)) - 1) << (lo))
-
 /*
  * PCI-defined configuration space registers
  */
diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c
index e04462b60756..88f60c5fecbc 100644
--- a/drivers/edac/sb_edac.c
+++ b/drivers/edac/sb_edac.c
@@ -50,7 +50,7 @@ static int probed;
  * Get a bit field at register value <v>, from bit <lo> to bit <hi>
  */
 #define GET_BITFIELD(v, lo, hi)	\
-	(((v) & ((1ULL << ((hi) - (lo) + 1)) - 1) << (lo)) >> (lo))
+	(((v) & GENMASK_ULL(hi, lo)) >> (lo))
 
 /*
  * sbridge Memory Controller Registers
diff --git a/drivers/video/sis/init.c b/drivers/video/sis/init.c
index f082ae55c0c9..4f26bc28e60b 100644
--- a/drivers/video/sis/init.c
+++ b/drivers/video/sis/init.c
@@ -3320,9 +3320,8 @@ SiSSetMode(struct SiS_Private *SiS_Pr, unsigned short ModeNo)
 }
 
 #ifndef GETBITSTR
-#define BITMASK(h,l)    	(((unsigned)(1U << ((h)-(l)+1))-1)<<(l))
-#define GENMASK(mask)   	BITMASK(1?mask,0?mask)
-#define GETBITS(var,mask)   	(((var) & GENMASK(mask)) >> (0?mask))
+#define GENBITSMASK(mask)   	GENMASK(1?mask,0?mask)
+#define GETBITS(var,mask)   	(((var) & GENBITSMASK(mask)) >> (0?mask))
 #define GETBITSTR(val,from,to)  ((GETBITS(val,from)) << (0?to))
 #endif
 
diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index a3b6b82108b9..bd0c4598d03b 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -10,6 +10,14 @@
 #define BITS_TO_LONGS(nr)	DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long))
 #endif
 
+/*
+ * Create a contiguous bitmask starting at bit position @l and ending at
+ * position @h. For example
+ * GENMASK_ULL(39, 21) gives us the 64bit vector 0x000000ffffe00000.
+ */
+#define GENMASK(h, l)		(((U32_C(1) << ((h) - (l) + 1)) - 1) << (l))
+#define GENMASK_ULL(h, l)	(((U64_C(1) << ((h) - (l) + 1)) - 1) << (l))
+
 extern unsigned int __sw_hweight8(unsigned int w);
 extern unsigned int __sw_hweight16(unsigned int w);
 extern unsigned int __sw_hweight32(unsigned int w);
-- 
cgit v1.2.3


From 61c1db7fae21ed33c614356a43bf6580c5e53118 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sun, 20 Oct 2013 20:47:30 -0700
Subject: ipv6: sit: add GSO/TSO support

Now ipv6_gso_segment() is stackable, its relatively easy to
implement GSO/TSO support for SIT tunnels

Performance results, when segmentation is done after tunnel
device (as no NIC is yet enabled for TSO SIT support) :

Before patch :

lpq84:~# ./netperf -H 2002:af6:1153:: -Cc
MIGRATED TCP STREAM TEST from ::0 (::) port 0 AF_INET6 to 2002:af6:1153:: () port 0 AF_INET6
Recv   Send    Send                          Utilization       Service Demand
Socket Socket  Message  Elapsed              Send     Recv     Send    Recv
Size   Size    Size     Time     Throughput  local    remote   local   remote
bytes  bytes   bytes    secs.    10^6bits/s  % S      % S      us/KB   us/KB

 87380  16384  16384    10.00      3168.31   4.81     4.64     2.988   2.877

After patch :

lpq84:~# ./netperf -H 2002:af6:1153:: -Cc
MIGRATED TCP STREAM TEST from ::0 (::) port 0 AF_INET6 to 2002:af6:1153:: () port 0 AF_INET6
Recv   Send    Send                          Utilization       Service Demand
Socket Socket  Message  Elapsed              Send     Recv     Send    Recv
Size   Size    Size     Time     Throughput  local    remote   local   remote
bytes  bytes   bytes    secs.    10^6bits/s  % S      % S      us/KB   us/KB

 87380  16384  16384    10.00      5525.00   7.76     5.17     2.763   1.840

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdev_features.h |  2 ++
 include/linux/skbuff.h          |  6 ++++--
 net/core/ethtool.c              |  1 +
 net/ipv4/af_inet.c              |  1 +
 net/ipv4/tcp_offload.c          |  1 +
 net/ipv6/ip6_offload.c          | 11 +++++++++++
 net/ipv6/sit.c                  | 28 +++++++++++++++++++---------
 net/ipv6/udp_offload.c          |  1 +
 8 files changed, 40 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h
index 8dad68cede1c..b05a4b501ab5 100644
--- a/include/linux/netdev_features.h
+++ b/include/linux/netdev_features.h
@@ -43,6 +43,7 @@ enum {
 	NETIF_F_FSO_BIT,		/* ... FCoE segmentation */
 	NETIF_F_GSO_GRE_BIT,		/* ... GRE with TSO */
 	NETIF_F_GSO_IPIP_BIT,		/* ... IPIP tunnel with TSO */
+	NETIF_F_GSO_SIT_BIT,		/* ... SIT tunnel with TSO */
 	NETIF_F_GSO_UDP_TUNNEL_BIT,	/* ... UDP TUNNEL with TSO */
 	NETIF_F_GSO_MPLS_BIT,		/* ... MPLS segmentation */
 	/**/NETIF_F_GSO_LAST =		/* last bit, see GSO_MASK */
@@ -109,6 +110,7 @@ enum {
 #define NETIF_F_RXALL		__NETIF_F(RXALL)
 #define NETIF_F_GSO_GRE		__NETIF_F(GSO_GRE)
 #define NETIF_F_GSO_IPIP	__NETIF_F(GSO_IPIP)
+#define NETIF_F_GSO_SIT		__NETIF_F(GSO_SIT)
 #define NETIF_F_GSO_UDP_TUNNEL	__NETIF_F(GSO_UDP_TUNNEL)
 #define NETIF_F_GSO_MPLS	__NETIF_F(GSO_MPLS)
 #define NETIF_F_HW_VLAN_STAG_FILTER __NETIF_F(HW_VLAN_STAG_FILTER)
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 60729134d253..2c154976394b 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -320,9 +320,11 @@ enum {
 
 	SKB_GSO_IPIP = 1 << 7,
 
-	SKB_GSO_UDP_TUNNEL = 1 << 8,
+	SKB_GSO_SIT = 1 << 8,
 
-	SKB_GSO_MPLS = 1 << 9,
+	SKB_GSO_UDP_TUNNEL = 1 << 9,
+
+	SKB_GSO_MPLS = 1 << 10,
 };
 
 #if BITS_PER_LONG > 32
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 8cab7744790e..862989898f61 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -82,6 +82,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
 	[NETIF_F_FSO_BIT] =              "tx-fcoe-segmentation",
 	[NETIF_F_GSO_GRE_BIT] =		 "tx-gre-segmentation",
 	[NETIF_F_GSO_IPIP_BIT] =	 "tx-ipip-segmentation",
+	[NETIF_F_GSO_SIT_BIT] =		 "tx-sit-segmentation",
 	[NETIF_F_GSO_UDP_TUNNEL_BIT] =	 "tx-udp_tnl-segmentation",
 	[NETIF_F_GSO_MPLS_BIT] =	 "tx-mpls-segmentation",
 
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 24a53fc275b0..f4a159e705c0 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1265,6 +1265,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
 		       SKB_GSO_TCP_ECN |
 		       SKB_GSO_GRE |
 		       SKB_GSO_IPIP |
+		       SKB_GSO_SIT |
 		       SKB_GSO_TCPV6 |
 		       SKB_GSO_UDP_TUNNEL |
 		       SKB_GSO_MPLS |
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index dfc96b00673e..a7a5583eab04 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -57,6 +57,7 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
 			       SKB_GSO_TCPV6 |
 			       SKB_GSO_GRE |
 			       SKB_GSO_IPIP |
+			       SKB_GSO_SIT |
 			       SKB_GSO_MPLS |
 			       SKB_GSO_UDP_TUNNEL |
 			       0) ||
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index f9b33d82bb9d..4b851692b1f6 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -98,6 +98,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
 		       SKB_GSO_TCP_ECN |
 		       SKB_GSO_GRE |
 		       SKB_GSO_IPIP |
+		       SKB_GSO_SIT |
 		       SKB_GSO_UDP_TUNNEL |
 		       SKB_GSO_MPLS |
 		       SKB_GSO_TCPV6 |
@@ -276,6 +277,13 @@ static struct packet_offload ipv6_packet_offload __read_mostly = {
 	},
 };
 
+static const struct net_offload sit_offload = {
+	.callbacks = {
+		.gso_send_check = ipv6_gso_send_check,
+		.gso_segment	= ipv6_gso_segment,
+	},
+};
+
 static int __init ipv6_offload_init(void)
 {
 
@@ -287,6 +295,9 @@ static int __init ipv6_offload_init(void)
 		pr_crit("%s: Cannot add EXTHDRS protocol offload\n", __func__);
 
 	dev_add_offload(&ipv6_packet_offload);
+
+	inet_add_offload(&sit_offload, IPPROTO_IPV6);
+
 	return 0;
 }
 
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 19269453a8ea..3a9038dd818d 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -933,10 +933,9 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
 		ttl = iph6->hop_limit;
 	tos = INET_ECN_encapsulate(tos, ipv6_get_dsfield(iph6));
 
-	if (likely(!skb->encapsulation)) {
-		skb_reset_inner_headers(skb);
-		skb->encapsulation = 1;
-	}
+	skb = iptunnel_handle_offloads(skb, false, SKB_GSO_SIT);
+	if (IS_ERR(skb))
+		goto out;
 
 	err = iptunnel_xmit(rt, skb, fl4.saddr, fl4.daddr, IPPROTO_IPV6, tos,
 			    ttl, df, !net_eq(tunnel->net, dev_net(dev)));
@@ -946,8 +945,9 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
 tx_error_icmp:
 	dst_link_failure(skb);
 tx_error:
-	dev->stats.tx_errors++;
 	dev_kfree_skb(skb);
+out:
+	dev->stats.tx_errors++;
 	return NETDEV_TX_OK;
 }
 
@@ -956,13 +956,15 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 	struct ip_tunnel *tunnel = netdev_priv(dev);
 	const struct iphdr  *tiph = &tunnel->parms.iph;
 
-	if (likely(!skb->encapsulation)) {
-		skb_reset_inner_headers(skb);
-		skb->encapsulation = 1;
-	}
+	skb = iptunnel_handle_offloads(skb, false, SKB_GSO_IPIP);
+	if (IS_ERR(skb))
+		goto out;
 
 	ip_tunnel_xmit(skb, dev, tiph, IPPROTO_IPIP);
 	return NETDEV_TX_OK;
+out:
+	dev->stats.tx_errors++;
+	return NETDEV_TX_OK;
 }
 
 static netdev_tx_t sit_tunnel_xmit(struct sk_buff *skb,
@@ -1292,6 +1294,12 @@ static void ipip6_dev_free(struct net_device *dev)
 	free_netdev(dev);
 }
 
+#define SIT_FEATURES (NETIF_F_SG	   | \
+		      NETIF_F_FRAGLIST	   | \
+		      NETIF_F_HIGHDMA	   | \
+		      NETIF_F_GSO_SOFTWARE | \
+		      NETIF_F_HW_CSUM)
+
 static void ipip6_tunnel_setup(struct net_device *dev)
 {
 	dev->netdev_ops		= &ipip6_netdev_ops;
@@ -1305,6 +1313,8 @@ static void ipip6_tunnel_setup(struct net_device *dev)
 	dev->iflink		= 0;
 	dev->addr_len		= 4;
 	dev->features		|= NETIF_F_LLTX;
+	dev->features		|= SIT_FEATURES;
+	dev->hw_features	|= SIT_FEATURES;
 }
 
 static int ipip6_tunnel_init(struct net_device *dev)
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index f63780ff3732..08e23b0bf302 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -65,6 +65,7 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
 				      SKB_GSO_UDP_TUNNEL |
 				      SKB_GSO_GRE |
 				      SKB_GSO_IPIP |
+				      SKB_GSO_SIT |
 				      SKB_GSO_MPLS) ||
 			     !(type & (SKB_GSO_UDP))))
 			goto out;
-- 
cgit v1.2.3


From 93302880d8a3e5dc6b7da3f9825beb839152c940 Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Fri, 18 Oct 2013 11:41:55 +0200
Subject: netfilter: ipset: Use netlink callback dump args only

Instead of cb->data, use callback dump args only and introduce symbolic
names instead of plain numbers at accessing the argument members.

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/ipset/ip_set.h  | 10 +++++
 net/netfilter/ipset/ip_set_bitmap_gen.h | 11 +++---
 net/netfilter/ipset/ip_set_core.c       | 70 ++++++++++++++++-----------------
 net/netfilter/ipset/ip_set_hash_gen.h   | 20 +++++-----
 net/netfilter/ipset/ip_set_list_set.c   | 11 +++---
 5 files changed, 68 insertions(+), 54 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index 7967516adc0d..c7174b816674 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -316,6 +316,16 @@ ip_set_init_counter(struct ip_set_counter *counter,
 		atomic64_set(&(counter)->packets, (long long)(ext->packets));
 }
 
+/* Netlink CB args */
+enum {
+	IPSET_CB_NET = 0,
+	IPSET_CB_DUMP,
+	IPSET_CB_INDEX,
+	IPSET_CB_ARG0,
+	IPSET_CB_ARG1,
+	IPSET_CB_ARG2,
+};
+
 /* register and unregister set references */
 extern ip_set_id_t ip_set_get_byname(struct net *net,
 				     const char *name, struct ip_set **set);
diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h
index a13e15be7911..f2c7d83dc23f 100644
--- a/net/netfilter/ipset/ip_set_bitmap_gen.h
+++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
@@ -198,13 +198,14 @@ mtype_list(const struct ip_set *set,
 	struct mtype *map = set->data;
 	struct nlattr *adt, *nested;
 	void *x;
-	u32 id, first = cb->args[2];
+	u32 id, first = cb->args[IPSET_CB_ARG0];
 
 	adt = ipset_nest_start(skb, IPSET_ATTR_ADT);
 	if (!adt)
 		return -EMSGSIZE;
-	for (; cb->args[2] < map->elements; cb->args[2]++) {
-		id = cb->args[2];
+	for (; cb->args[IPSET_CB_ARG0] < map->elements;
+	     cb->args[IPSET_CB_ARG0]++) {
+		id = cb->args[IPSET_CB_ARG0];
 		x = get_ext(set, map, id);
 		if (!test_bit(id, map->members) ||
 		    (SET_WITH_TIMEOUT(set) &&
@@ -231,14 +232,14 @@ mtype_list(const struct ip_set *set,
 	ipset_nest_end(skb, adt);
 
 	/* Set listing finished */
-	cb->args[2] = 0;
+	cb->args[IPSET_CB_ARG0] = 0;
 
 	return 0;
 
 nla_put_failure:
 	nla_nest_cancel(skb, nested);
 	if (unlikely(id == first)) {
-		cb->args[2] = 0;
+		cb->args[IPSET_CB_ARG0] = 0;
 		return -EMSGSIZE;
 	}
 	ipset_nest_end(skb, adt);
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index dc9284bdd2dd..bac7e01df67f 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -1182,10 +1182,12 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb,
 static int
 ip_set_dump_done(struct netlink_callback *cb)
 {
-	struct ip_set_net *inst = (struct ip_set_net *)cb->data;
-	if (cb->args[2]) {
-		pr_debug("release set %s\n", nfnl_set(inst, cb->args[1])->name);
-		__ip_set_put_byindex(inst, (ip_set_id_t) cb->args[1]);
+	struct ip_set_net *inst = (struct ip_set_net *)cb->args[IPSET_CB_NET];
+	if (cb->args[IPSET_CB_ARG0]) {
+		pr_debug("release set %s\n",
+			 nfnl_set(inst, cb->args[IPSET_CB_INDEX])->name);
+		__ip_set_put_byindex(inst,
+			(ip_set_id_t) cb->args[IPSET_CB_INDEX]);
 	}
 	return 0;
 }
@@ -1203,7 +1205,7 @@ dump_attrs(struct nlmsghdr *nlh)
 }
 
 static int
-dump_init(struct netlink_callback *cb)
+dump_init(struct netlink_callback *cb, struct ip_set_net *inst)
 {
 	struct nlmsghdr *nlh = nlmsg_hdr(cb->skb);
 	int min_len = nlmsg_total_size(sizeof(struct nfgenmsg));
@@ -1211,15 +1213,15 @@ dump_init(struct netlink_callback *cb)
 	struct nlattr *attr = (void *)nlh + min_len;
 	u32 dump_type;
 	ip_set_id_t index;
-	struct ip_set_net *inst = (struct ip_set_net *)cb->data;
 
 	/* Second pass, so parser can't fail */
 	nla_parse(cda, IPSET_ATTR_CMD_MAX,
 		  attr, nlh->nlmsg_len - min_len, ip_set_setname_policy);
 
-	/* cb->args[0] : dump single set/all sets
-	 *         [1] : set index
-	 *         [..]: type specific
+	/* cb->args[IPSET_CB_NET]:	net namespace
+	 *         [IPSET_CB_DUMP]:	dump single set/all sets
+	 *         [IPSET_CB_INDEX]: 	set index
+	 *         [IPSET_CB_ARG0]:	type specific
 	 */
 
 	if (cda[IPSET_ATTR_SETNAME]) {
@@ -1231,7 +1233,7 @@ dump_init(struct netlink_callback *cb)
 			return -ENOENT;
 
 		dump_type = DUMP_ONE;
-		cb->args[1] = index;
+		cb->args[IPSET_CB_INDEX] = index;
 	} else
 		dump_type = DUMP_ALL;
 
@@ -1239,7 +1241,8 @@ dump_init(struct netlink_callback *cb)
 		u32 f = ip_set_get_h32(cda[IPSET_ATTR_FLAGS]);
 		dump_type |= (f << 16);
 	}
-	cb->args[0] = dump_type;
+	cb->args[IPSET_CB_NET] = (unsigned long)inst;
+	cb->args[IPSET_CB_DUMP] = dump_type;
 
 	return 0;
 }
@@ -1251,12 +1254,12 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb)
 	struct ip_set *set = NULL;
 	struct nlmsghdr *nlh = NULL;
 	unsigned int flags = NETLINK_CB(cb->skb).portid ? NLM_F_MULTI : 0;
+	struct ip_set_net *inst = ip_set_pernet(sock_net(skb->sk));
 	u32 dump_type, dump_flags;
 	int ret = 0;
-	struct ip_set_net *inst = (struct ip_set_net *)cb->data;
 
-	if (!cb->args[0]) {
-		ret = dump_init(cb);
+	if (!cb->args[IPSET_CB_DUMP]) {
+		ret = dump_init(cb, inst);
 		if (ret < 0) {
 			nlh = nlmsg_hdr(cb->skb);
 			/* We have to create and send the error message
@@ -1267,17 +1270,18 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb)
 		}
 	}
 
-	if (cb->args[1] >= inst->ip_set_max)
+	if (cb->args[IPSET_CB_INDEX] >= inst->ip_set_max)
 		goto out;
 
-	dump_type = DUMP_TYPE(cb->args[0]);
-	dump_flags = DUMP_FLAGS(cb->args[0]);
-	max = dump_type == DUMP_ONE ? cb->args[1] + 1 : inst->ip_set_max;
+	dump_type = DUMP_TYPE(cb->args[IPSET_CB_DUMP]);
+	dump_flags = DUMP_FLAGS(cb->args[IPSET_CB_DUMP]);
+	max = dump_type == DUMP_ONE ? cb->args[IPSET_CB_INDEX] + 1
+				    : inst->ip_set_max;
 dump_last:
-	pr_debug("args[0]: %u %u args[1]: %ld\n",
-		 dump_type, dump_flags, cb->args[1]);
-	for (; cb->args[1] < max; cb->args[1]++) {
-		index = (ip_set_id_t) cb->args[1];
+	pr_debug("dump type, flag: %u %u index: %ld\n",
+		 dump_type, dump_flags, cb->args[IPSET_CB_INDEX]);
+	for (; cb->args[IPSET_CB_INDEX] < max; cb->args[IPSET_CB_INDEX]++) {
+		index = (ip_set_id_t) cb->args[IPSET_CB_INDEX];
 		set = nfnl_set(inst, index);
 		if (set == NULL) {
 			if (dump_type == DUMP_ONE) {
@@ -1294,7 +1298,7 @@ dump_last:
 		     !!(set->type->features & IPSET_DUMP_LAST)))
 			continue;
 		pr_debug("List set: %s\n", set->name);
-		if (!cb->args[2]) {
+		if (!cb->args[IPSET_CB_ARG0]) {
 			/* Start listing: make sure set won't be destroyed */
 			pr_debug("reference set\n");
 			__ip_set_get(set);
@@ -1311,7 +1315,7 @@ dump_last:
 			goto nla_put_failure;
 		if (dump_flags & IPSET_FLAG_LIST_SETNAME)
 			goto next_set;
-		switch (cb->args[2]) {
+		switch (cb->args[IPSET_CB_ARG0]) {
 		case 0:
 			/* Core header data */
 			if (nla_put_string(skb, IPSET_ATTR_TYPENAME,
@@ -1331,7 +1335,7 @@ dump_last:
 			read_lock_bh(&set->lock);
 			ret = set->variant->list(set, skb, cb);
 			read_unlock_bh(&set->lock);
-			if (!cb->args[2])
+			if (!cb->args[IPSET_CB_ARG0])
 				/* Set is done, proceed with next one */
 				goto next_set;
 			goto release_refcount;
@@ -1340,8 +1344,8 @@ dump_last:
 	/* If we dump all sets, continue with dumping last ones */
 	if (dump_type == DUMP_ALL) {
 		dump_type = DUMP_LAST;
-		cb->args[0] = dump_type | (dump_flags << 16);
-		cb->args[1] = 0;
+		cb->args[IPSET_CB_DUMP] = dump_type | (dump_flags << 16);
+		cb->args[IPSET_CB_INDEX] = 0;
 		goto dump_last;
 	}
 	goto out;
@@ -1350,15 +1354,15 @@ nla_put_failure:
 	ret = -EFAULT;
 next_set:
 	if (dump_type == DUMP_ONE)
-		cb->args[1] = IPSET_INVALID_ID;
+		cb->args[IPSET_CB_INDEX] = IPSET_INVALID_ID;
 	else
-		cb->args[1]++;
+		cb->args[IPSET_CB_INDEX]++;
 release_refcount:
 	/* If there was an error or set is done, release set */
-	if (ret || !cb->args[2]) {
+	if (ret || !cb->args[IPSET_CB_ARG0]) {
 		pr_debug("release set %s\n", nfnl_set(inst, index)->name);
 		__ip_set_put_byindex(inst, index);
-		cb->args[2] = 0;
+		cb->args[IPSET_CB_ARG0] = 0;
 	}
 out:
 	if (nlh) {
@@ -1375,8 +1379,6 @@ ip_set_dump(struct sock *ctnl, struct sk_buff *skb,
 	    const struct nlmsghdr *nlh,
 	    const struct nlattr * const attr[])
 {
-	struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl));
-
 	if (unlikely(protocol_failed(attr)))
 		return -IPSET_ERR_PROTOCOL;
 
@@ -1384,7 +1386,6 @@ ip_set_dump(struct sock *ctnl, struct sk_buff *skb,
 		struct netlink_dump_control c = {
 			.dump = ip_set_dump_start,
 			.done = ip_set_dump_done,
-			.data = (void *)inst
 		};
 		return netlink_dump_start(ctnl, skb, nlh, &c);
 	}
@@ -1961,7 +1962,6 @@ static int __net_init
 ip_set_net_init(struct net *net)
 {
 	struct ip_set_net *inst = ip_set_pernet(net);
-
 	struct ip_set **list;
 
 	inst->ip_set_max = max_sets ? max_sets : CONFIG_IP_SET_MAX;
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index 6a80dbd30df7..2f80c74c871f 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -931,7 +931,7 @@ mtype_list(const struct ip_set *set,
 	struct nlattr *atd, *nested;
 	const struct hbucket *n;
 	const struct mtype_elem *e;
-	u32 first = cb->args[2];
+	u32 first = cb->args[IPSET_CB_ARG0];
 	/* We assume that one hash bucket fills into one page */
 	void *incomplete;
 	int i;
@@ -940,20 +940,22 @@ mtype_list(const struct ip_set *set,
 	if (!atd)
 		return -EMSGSIZE;
 	pr_debug("list hash set %s\n", set->name);
-	for (; cb->args[2] < jhash_size(t->htable_bits); cb->args[2]++) {
+	for (; cb->args[IPSET_CB_ARG0] < jhash_size(t->htable_bits);
+	     cb->args[IPSET_CB_ARG0]++) {
 		incomplete = skb_tail_pointer(skb);
-		n = hbucket(t, cb->args[2]);
-		pr_debug("cb->args[2]: %lu, t %p n %p\n", cb->args[2], t, n);
+		n = hbucket(t, cb->args[IPSET_CB_ARG0]);
+		pr_debug("cb->arg bucket: %lu, t %p n %p\n",
+			 cb->args[IPSET_CB_ARG0], t, n);
 		for (i = 0; i < n->pos; i++) {
 			e = ahash_data(n, i, set->dsize);
 			if (SET_WITH_TIMEOUT(set) &&
 			    ip_set_timeout_expired(ext_timeout(e, set)))
 				continue;
 			pr_debug("list hash %lu hbucket %p i %u, data %p\n",
-				 cb->args[2], n, i, e);
+				 cb->args[IPSET_CB_ARG0], n, i, e);
 			nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
 			if (!nested) {
-				if (cb->args[2] == first) {
+				if (cb->args[IPSET_CB_ARG0] == first) {
 					nla_nest_cancel(skb, atd);
 					return -EMSGSIZE;
 				} else
@@ -968,16 +970,16 @@ mtype_list(const struct ip_set *set,
 	}
 	ipset_nest_end(skb, atd);
 	/* Set listing finished */
-	cb->args[2] = 0;
+	cb->args[IPSET_CB_ARG0] = 0;
 
 	return 0;
 
 nla_put_failure:
 	nlmsg_trim(skb, incomplete);
-	if (unlikely(first == cb->args[2])) {
+	if (unlikely(first == cb->args[IPSET_CB_ARG0])) {
 		pr_warning("Can't list set %s: one bucket does not fit into "
 			   "a message. Please report it!\n", set->name);
-		cb->args[2] = 0;
+		cb->args[IPSET_CB_ARG0] = 0;
 		return -EMSGSIZE;
 	}
 	ipset_nest_end(skb, atd);
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index ec6f6d15dded..3e2317f3cf68 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -490,14 +490,15 @@ list_set_list(const struct ip_set *set,
 {
 	const struct list_set *map = set->data;
 	struct nlattr *atd, *nested;
-	u32 i, first = cb->args[2];
+	u32 i, first = cb->args[IPSET_CB_ARG0];
 	const struct set_elem *e;
 
 	atd = ipset_nest_start(skb, IPSET_ATTR_ADT);
 	if (!atd)
 		return -EMSGSIZE;
-	for (; cb->args[2] < map->size; cb->args[2]++) {
-		i = cb->args[2];
+	for (; cb->args[IPSET_CB_ARG0] < map->size;
+	     cb->args[IPSET_CB_ARG0]++) {
+		i = cb->args[IPSET_CB_ARG0];
 		e = list_set_elem(set, map, i);
 		if (e->id == IPSET_INVALID_ID)
 			goto finish;
@@ -522,13 +523,13 @@ list_set_list(const struct ip_set *set,
 finish:
 	ipset_nest_end(skb, atd);
 	/* Set listing finished */
-	cb->args[2] = 0;
+	cb->args[IPSET_CB_ARG0] = 0;
 	return 0;
 
 nla_put_failure:
 	nla_nest_cancel(skb, nested);
 	if (unlikely(i == first)) {
-		cb->args[2] = 0;
+		cb->args[IPSET_CB_ARG0] = 0;
 		return -EMSGSIZE;
 	}
 	ipset_nest_end(skb, atd);
-- 
cgit v1.2.3


From 4023fe6ff2192d6050647571ea54f5497b2ec8f6 Mon Sep 17 00:00:00 2001
From: Jyri Sarha <jsarha@ti.com>
Date: Fri, 18 Oct 2013 18:37:43 +0300
Subject: ASoC: davinci-mcasp: Extract DMA channels directly from DT

Extract DMA channels directly from DT as they can not be found from
platform resources anymore. This is a work-around until davinci audio
driver is updated to use dmaengine.

Signed-off-by: Jyri Sarha <jsarha@ti.com>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 .../bindings/sound/davinci-mcasp-audio.txt         |  5 +++
 include/linux/platform_data/davinci_asp.h          |  2 +
 sound/soc/davinci/davinci-mcasp.c                  | 45 +++++++++++++++-------
 3 files changed, 38 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/sound/davinci-mcasp-audio.txt b/Documentation/devicetree/bindings/sound/davinci-mcasp-audio.txt
index c2ab8697e24a..c3ccde71f97a 100644
--- a/Documentation/devicetree/bindings/sound/davinci-mcasp-audio.txt
+++ b/Documentation/devicetree/bindings/sound/davinci-mcasp-audio.txt
@@ -18,6 +18,11 @@ Required properties:
 - serial-dir : A list of serializer pin mode. The list number should be equal
 		to "num-serializer" parameter. Each entry is a number indication
 		serializer pin direction. (0 - INACTIVE, 1 - TX, 2 - RX)
+- dmas: two element list of DMA controller phandles and DMA request line
+        ordered pairs.
+- dma-names: identifier string for each DMA request line in the dmas property.
+	     These strings correspond 1:1 with the ordered pairs in dmas. The dma
+	     identifiers must be "rx" and "tx".
 
 Optional properties:
 
diff --git a/include/linux/platform_data/davinci_asp.h b/include/linux/platform_data/davinci_asp.h
index 8db5ae03b6e3..689a856b86f9 100644
--- a/include/linux/platform_data/davinci_asp.h
+++ b/include/linux/platform_data/davinci_asp.h
@@ -84,6 +84,8 @@ struct snd_platform_data {
 	u8 version;
 	u8 txnumevt;
 	u8 rxnumevt;
+	int tx_dma_channel;
+	int rx_dma_channel;
 };
 
 enum {
diff --git a/sound/soc/davinci/davinci-mcasp.c b/sound/soc/davinci/davinci-mcasp.c
index 806bec34e4d9..4c207508348f 100644
--- a/sound/soc/davinci/davinci-mcasp.c
+++ b/sound/soc/davinci/davinci-mcasp.c
@@ -1047,6 +1047,7 @@ static struct snd_platform_data *davinci_mcasp_set_pdata_from_of(
 	struct snd_platform_data *pdata = NULL;
 	const struct of_device_id *match =
 			of_match_device(mcasp_dt_ids, &pdev->dev);
+	struct of_phandle_args dma_spec;
 
 	const u32 *of_serial_dir32;
 	u8 *of_serial_dir;
@@ -1109,6 +1110,28 @@ static struct snd_platform_data *davinci_mcasp_set_pdata_from_of(
 		pdata->serial_dir = of_serial_dir;
 	}
 
+	ret = of_property_match_string(np, "dma-names", "tx");
+	if (ret < 0)
+		goto nodata;
+
+	ret = of_parse_phandle_with_args(np, "dmas", "#dma-cells", ret,
+					 &dma_spec);
+	if (ret < 0)
+		goto nodata;
+
+	pdata->tx_dma_channel = dma_spec.args[0];
+
+	ret = of_property_match_string(np, "dma-names", "rx");
+	if (ret < 0)
+		goto nodata;
+
+	ret = of_parse_phandle_with_args(np, "dmas", "#dma-cells", ret,
+					 &dma_spec);
+	if (ret < 0)
+		goto nodata;
+
+	pdata->rx_dma_channel = dma_spec.args[0];
+
 	ret = of_property_read_u32(np, "tx-num-evt", &val);
 	if (ret >= 0)
 		pdata->txnumevt = val;
@@ -1213,15 +1236,11 @@ static int davinci_mcasp_probe(struct platform_device *pdev)
 	dma_data->sram_size = pdata->sram_size_playback;
 	dma_data->dma_addr = dat->start + pdata->tx_dma_offset;
 
-	/* first TX, then RX */
 	res = platform_get_resource(pdev, IORESOURCE_DMA, 0);
-	if (!res) {
-		dev_err(&pdev->dev, "no DMA resource\n");
-		ret = -ENODEV;
-		goto err_release_clk;
-	}
-
-	dma_data->channel = res->start;
+	if (res)
+		dma_data->channel = res->start;
+	else
+		dma_data->channel = pdata->tx_dma_channel;
 
 	dma_data = &dev->dma_params[SNDRV_PCM_STREAM_CAPTURE];
 	dma_data->asp_chan_q = pdata->asp_chan_q;
@@ -1231,13 +1250,11 @@ static int davinci_mcasp_probe(struct platform_device *pdev)
 	dma_data->dma_addr = dat->start + pdata->rx_dma_offset;
 
 	res = platform_get_resource(pdev, IORESOURCE_DMA, 1);
-	if (!res) {
-		dev_err(&pdev->dev, "no DMA resource\n");
-		ret = -ENODEV;
-		goto err_release_clk;
-	}
+	if (res)
+		dma_data->channel = res->start;
+	else
+		dma_data->channel = pdata->rx_dma_channel;
 
-	dma_data->channel = res->start;
 	dev_set_drvdata(&pdev->dev, dev);
 	ret = snd_soc_register_component(&pdev->dev, &davinci_mcasp_component,
 					 &davinci_mcasp_dai[pdata->op_mode], 1);
-- 
cgit v1.2.3


From f65f0a1a9836abbfbe5c9b8fa0452e4d8eb7bf00 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Sun, 15 Sep 2013 03:50:17 -0700
Subject: leds: lp55xx: enable setting default trigger

This enables setting a default trigger on an LP55xx channel,
either from platform data or device tree. This mechanism is
identical to the mechanism for GPIO LEDs and references the
common LEDs device tree bindings.

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Tested-by: Milo Kim <milo.kim@ti.com>
Acked-by: Milo Kim <milo.kim@ti.com>
Signed-off-by: Bryan Wu <cooloney@gmail.com>
---
 Documentation/devicetree/bindings/leds/leds-lp55xx.txt | 10 +++++++---
 drivers/leds/leds-lp55xx-common.c                      |  3 +++
 include/linux/platform_data/leds-lp55xx.h              |  1 +
 3 files changed, 11 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/leds/leds-lp55xx.txt b/Documentation/devicetree/bindings/leds/leds-lp55xx.txt
index a61727f9a6d1..d221e75d90fa 100644
--- a/Documentation/devicetree/bindings/leds/leds-lp55xx.txt
+++ b/Documentation/devicetree/bindings/leds/leds-lp55xx.txt
@@ -17,12 +17,15 @@ Optional properties:
          2: D1~6 with VOUT, D7~9 with VDD
          3: D1~9 are connected to VOUT
 
-Alternatively, each child can have specific channel name
-- chan-name: Name of each channel name
+Alternatively, each child can have a specific channel name and trigger:
+- chan-name (optional): name of channel
+- linux,default-trigger (optional): see
+  Documentation/devicetree/bindings/leds/common.txt
 
 example 1) LP5521
 3 LED channels, external clock used. Channel names are 'lp5521_pri:channel0',
-'lp5521_pri:channel1' and 'lp5521_pri:channel2'
+'lp5521_pri:channel1' and 'lp5521_pri:channel2', with a heartbeat trigger
+on channel 0.
 
 lp5521@32 {
 	compatible = "national,lp5521";
@@ -33,6 +36,7 @@ lp5521@32 {
 	chan0 {
 		led-cur = /bits/ 8 <0x2f>;
 		max-cur = /bits/ 8 <0x5f>;
+		linux,default-trigger = "heartbeat";
 	};
 
 	chan1 {
diff --git a/drivers/leds/leds-lp55xx-common.c b/drivers/leds/leds-lp55xx-common.c
index 351825b96f16..075acf5b9fab 100644
--- a/drivers/leds/leds-lp55xx-common.c
+++ b/drivers/leds/leds-lp55xx-common.c
@@ -165,6 +165,7 @@ static int lp55xx_init_led(struct lp55xx_led *led,
 	led->led_current = pdata->led_config[chan].led_current;
 	led->max_current = pdata->led_config[chan].max_current;
 	led->chan_nr = pdata->led_config[chan].chan_nr;
+	led->cdev.default_trigger = pdata->led_config[chan].default_trigger;
 
 	if (led->chan_nr >= max_channel) {
 		dev_err(dev, "Use channel numbers between 0 and %d\n",
@@ -586,6 +587,8 @@ int lp55xx_of_populate_pdata(struct device *dev, struct device_node *np)
 		of_property_read_string(child, "chan-name", &cfg[i].name);
 		of_property_read_u8(child, "led-cur", &cfg[i].led_current);
 		of_property_read_u8(child, "max-cur", &cfg[i].max_current);
+		cfg[i].default_trigger =
+			of_get_property(child, "linux,default-trigger", NULL);
 
 		i++;
 	}
diff --git a/include/linux/platform_data/leds-lp55xx.h b/include/linux/platform_data/leds-lp55xx.h
index 51a2ff579d60..c32de4dcec54 100644
--- a/include/linux/platform_data/leds-lp55xx.h
+++ b/include/linux/platform_data/leds-lp55xx.h
@@ -22,6 +22,7 @@
 
 struct lp55xx_led_config {
 	const char *name;
+	const char *default_trigger;
 	u8 chan_nr;
 	u8 led_current; /* mA x10, 0 if led is not connected */
 	u8 max_current;
-- 
cgit v1.2.3


From bb6febdc90efe7f664328075c204eed8e9af7ec9 Mon Sep 17 00:00:00 2001
From: Maximilian Güntner <maximilian.guentner@gmail.com>
Date: Wed, 16 Oct 2013 18:09:17 -0700
Subject: leds: Added driver for the NXP PCA9685 I2C chip
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The NXP PCA9685 supports 16 channels/leds using a 12-bit PWM (4095
levels of brightness)
This driver supports configuration using platform_data.

Signed-off-by: Maximilian Güntner <maximilian.guentner@gmail.com>
Signed-off-by: Bryan Wu <cooloney@gmail.com>
---
 drivers/leds/Kconfig                       |  10 ++
 drivers/leds/Makefile                      |   1 +
 drivers/leds/leds-pca9685.c                | 213 +++++++++++++++++++++++++++++
 include/linux/platform_data/leds-pca9685.h |  35 +++++
 4 files changed, 259 insertions(+)
 create mode 100644 drivers/leds/leds-pca9685.c
 create mode 100644 include/linux/platform_data/leds-pca9685.h

(limited to 'include/linux')

diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig
index 875bbe4c962e..72156c123033 100644
--- a/drivers/leds/Kconfig
+++ b/drivers/leds/Kconfig
@@ -300,6 +300,16 @@ config LEDS_PCA963X
 	  LED driver chip accessed via the I2C bus. Supported
 	  devices include PCA9633 and PCA9634
 
+config LEDS_PCA9685
+	tristate "LED support for PCA9685 I2C chip"
+	depends on LEDS_CLASS
+	depends on I2C
+	help
+	  This option enables support for LEDs connected to the PCA9685
+	  LED driver chip accessed via the I2C bus.
+	  The PCA9685 offers 12-bit PWM (4095 levels of brightness) on
+	  16 individual channels.
+
 config LEDS_WM831X_STATUS
 	tristate "LED support for status LEDs on WM831x PMICs"
 	depends on LEDS_CLASS
diff --git a/drivers/leds/Makefile b/drivers/leds/Makefile
index 8979b0b2c85e..3cd76dbd9be2 100644
--- a/drivers/leds/Makefile
+++ b/drivers/leds/Makefile
@@ -36,6 +36,7 @@ obj-$(CONFIG_LEDS_OT200)		+= leds-ot200.o
 obj-$(CONFIG_LEDS_FSG)			+= leds-fsg.o
 obj-$(CONFIG_LEDS_PCA955X)		+= leds-pca955x.o
 obj-$(CONFIG_LEDS_PCA963X)		+= leds-pca963x.o
+obj-$(CONFIG_LEDS_PCA9685)		+= leds-pca9685.o
 obj-$(CONFIG_LEDS_DA903X)		+= leds-da903x.o
 obj-$(CONFIG_LEDS_DA9052)		+= leds-da9052.o
 obj-$(CONFIG_LEDS_WM831X_STATUS)	+= leds-wm831x-status.o
diff --git a/drivers/leds/leds-pca9685.c b/drivers/leds/leds-pca9685.c
new file mode 100644
index 000000000000..6e1ef3a9d6ef
--- /dev/null
+++ b/drivers/leds/leds-pca9685.c
@@ -0,0 +1,213 @@
+/*
+ * Copyright 2013 Maximilian Güntner <maximilian.guentner@gmail.com>
+ *
+ * This file is subject to the terms and conditions of version 2 of
+ * the GNU General Public License.  See the file COPYING in the main
+ * directory of this archive for more details.
+ *
+ * Based on leds-pca963x.c driver by
+ * Peter Meerwald <p.meerwald@bct-electronic.com>
+ *
+ * Driver for the NXP PCA9685 12-Bit PWM LED driver chip.
+ *
+ */
+
+#include <linux/ctype.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/i2c.h>
+#include <linux/leds.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/workqueue.h>
+
+#include <linux/platform_data/leds-pca9685.h>
+
+/* Register Addresses */
+#define PCA9685_MODE1 0x00
+#define PCA9685_MODE2 0x01
+#define PCA9685_LED0_ON_L 0x06
+#define PCA9685_ALL_LED_ON_L 0xFA
+
+/* MODE1 Register */
+#define PCA9685_ALLCALL 0x00
+#define PCA9685_SLEEP   0x04
+#define PCA9685_AI      0x05
+
+/* MODE2 Register */
+#define PCA9685_INVRT   0x04
+#define PCA9685_OUTDRV  0x02
+
+static const struct i2c_device_id pca9685_id[] = {
+	{ "pca9685", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, pca9685_id);
+
+struct pca9685_led {
+	struct i2c_client *client;
+	struct work_struct work;
+	u16 brightness;
+	struct led_classdev led_cdev;
+	int led_num; /* 0-15 */
+	char name[32];
+};
+
+static void pca9685_write_msg(struct i2c_client *client, u8 *buf, u8 len)
+{
+	struct i2c_msg msg = {
+		.addr = client->addr,
+		.flags = 0x00,
+		.len = len,
+		.buf = buf
+	};
+	i2c_transfer(client->adapter, &msg, 1);
+}
+
+static void pca9685_all_off(struct i2c_client *client)
+{
+	u8 i2c_buffer[5] = {PCA9685_ALL_LED_ON_L, 0x00, 0x00, 0x00, 0x10};
+	pca9685_write_msg(client, i2c_buffer, 5);
+}
+
+static void pca9685_led_work(struct work_struct *work)
+{
+	struct pca9685_led *pca9685;
+	u8 i2c_buffer[5];
+
+	pca9685 = container_of(work, struct pca9685_led, work);
+	i2c_buffer[0] = PCA9685_LED0_ON_L + 4 * pca9685->led_num;
+	/*
+	 * 4095 is the maximum brightness, so we set the ON time to 0x1000
+	 * which disables the PWM generator for that LED
+	 */
+	if (pca9685->brightness == 4095)
+		*((__le16 *)(i2c_buffer+1)) = cpu_to_le16(0x1000);
+	else
+		*((__le16 *)(i2c_buffer+1)) = 0x0000;
+
+	if (pca9685->brightness == 0)
+		*((__le16 *)(i2c_buffer+3)) = cpu_to_le16(0x1000);
+	else if (pca9685->brightness == 4095)
+		*((__le16 *)(i2c_buffer+3)) = 0x0000;
+	else
+		*((__le16 *)(i2c_buffer+3)) = cpu_to_le16(pca9685->brightness);
+
+	pca9685_write_msg(pca9685->client, i2c_buffer, 5);
+}
+
+static void pca9685_led_set(struct led_classdev *led_cdev,
+		enum led_brightness value)
+{
+	struct pca9685_led *pca9685;
+	pca9685 = container_of(led_cdev, struct pca9685_led, led_cdev);
+	pca9685->brightness = value;
+
+	schedule_work(&pca9685->work);
+}
+
+static int pca9685_probe(struct i2c_client *client,
+		const struct i2c_device_id *id)
+{
+	struct pca9685_led *pca9685;
+	struct pca9685_platform_data *pdata;
+	int err;
+	u8 i;
+
+	pdata = dev_get_platdata(&client->dev);
+	if (pdata) {
+		if (pdata->leds.num_leds < 1 || pdata->leds.num_leds > 15) {
+			dev_err(&client->dev, "board info must claim 1-16 LEDs");
+			return -EINVAL;
+		}
+	}
+
+	pca9685 = devm_kzalloc(&client->dev, 16 * sizeof(*pca9685), GFP_KERNEL);
+	if (!pca9685)
+		return -ENOMEM;
+
+	i2c_set_clientdata(client, pca9685);
+	pca9685_all_off(client);
+
+	for (i = 0; i < 16; i++) {
+		pca9685[i].client = client;
+		pca9685[i].led_num = i;
+		pca9685[i].name[0] = '\0';
+		if (pdata && i < pdata->leds.num_leds) {
+			if (pdata->leds.leds[i].name)
+				strncpy(pca9685[i].name,
+					pdata->leds.leds[i].name,
+					sizeof(pca9685[i].name)-1);
+			if (pdata->leds.leds[i].default_trigger)
+				pca9685[i].led_cdev.default_trigger =
+					pdata->leds.leds[i].default_trigger;
+		}
+		if (strlen(pca9685[i].name) == 0) {
+			/*
+			 * Write adapter and address to the name as well.
+			 * Otherwise multiple chips attached to one host would
+			 * not work.
+			 */
+			snprintf(pca9685[i].name, sizeof(pca9685[i].name),
+					"pca9685:%d:x%.2x:%d",
+					client->adapter->nr, client->addr, i);
+		}
+		pca9685[i].led_cdev.name = pca9685[i].name;
+		pca9685[i].led_cdev.max_brightness = 0xfff;
+		pca9685[i].led_cdev.brightness_set = pca9685_led_set;
+
+		INIT_WORK(&pca9685[i].work, pca9685_led_work);
+		err = led_classdev_register(&client->dev, &pca9685[i].led_cdev);
+		if (err < 0)
+			goto exit;
+	}
+
+	if (pdata)
+		i2c_smbus_write_byte_data(client, PCA9685_MODE2,
+			pdata->outdrv << PCA9685_OUTDRV |
+			pdata->inverted << PCA9685_INVRT);
+	else
+		i2c_smbus_write_byte_data(client, PCA9685_MODE2,
+			PCA9685_TOTEM_POLE << PCA9685_OUTDRV);
+	/* Enable Auto-Increment, enable oscillator, ALLCALL/SUBADDR disabled */
+	i2c_smbus_write_byte_data(client, PCA9685_MODE1, BIT(PCA9685_AI));
+
+	return 0;
+
+exit:
+	while (i--) {
+		led_classdev_unregister(&pca9685[i].led_cdev);
+		cancel_work_sync(&pca9685[i].work);
+	}
+	return err;
+}
+
+static int pca9685_remove(struct i2c_client *client)
+{
+	struct pca9685_led *pca9685 = i2c_get_clientdata(client);
+	u8 i;
+
+	for (i = 0; i < 16; i++) {
+		led_classdev_unregister(&pca9685[i].led_cdev);
+		cancel_work_sync(&pca9685[i].work);
+	}
+	pca9685_all_off(client);
+	return 0;
+}
+
+static struct i2c_driver pca9685_driver = {
+	.driver = {
+		.name = "leds-pca9685",
+		.owner = THIS_MODULE,
+	},
+	.probe = pca9685_probe,
+	.remove = pca9685_remove,
+	.id_table = pca9685_id,
+};
+
+module_i2c_driver(pca9685_driver);
+
+MODULE_AUTHOR("Maximilian Güntner <maximilian.guentner@gmail.com>");
+MODULE_DESCRIPTION("PCA9685 LED Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/platform_data/leds-pca9685.h b/include/linux/platform_data/leds-pca9685.h
new file mode 100644
index 000000000000..778e9e4249cc
--- /dev/null
+++ b/include/linux/platform_data/leds-pca9685.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2013 Maximilian Güntner <maximilian.guentner@gmail.com>
+ *
+ * This file is subject to the terms and conditions of version 2 of
+ * the GNU General Public License.  See the file COPYING in the main
+ * directory of this archive for more details.
+ *
+ * Based on leds-pca963x.h by Peter Meerwald <p.meerwald@bct-electronic.com>
+ *
+ * LED driver for the NXP PCA9685 PWM chip
+ *
+ */
+
+#ifndef __LINUX_PCA9685_H
+#define __LINUX_PCA9685_H
+
+#include <linux/leds.h>
+
+enum pca9685_outdrv {
+	PCA9685_OPEN_DRAIN,
+	PCA9685_TOTEM_POLE,
+};
+
+enum pca9685_inverted {
+	PCA9685_NOT_INVERTED,
+	PCA9685_INVERTED,
+};
+
+struct pca9685_platform_data {
+	struct led_platform_data leds;
+	enum pca9685_outdrv outdrv;
+	enum pca9685_inverted inverted;
+};
+
+#endif /* __LINUX_PCA9685_H */
-- 
cgit v1.2.3


From 7fcd427465e710d0c4e2737d2f02b2ffa14b9bb3 Mon Sep 17 00:00:00 2001
From: Charles Keepax <ckeepax@opensource.wolfsonmicro.com>
Date: Tue, 15 Oct 2013 20:14:21 +0100
Subject: mfd: Allow mapping regulator supplies to MFD device from children

Occasionally, it is useful to map supplies from a child device onto the
MFD device. A typical usecase for this would be if the MFD device is
represented as a single node in device tree. All supplies will be
defined in device tree as existing on the MFD device. When a child
depends on frameworks which might have no knowledge of MFD to lookup
supplies on its behalf the supply will not be found.

This patch adds a list of supplies that should be looked up on the
parent rather than the child as part of the mfd_cell structure.

Signed-off-by: Charles Keepax <ckeepax@opensource.wolfsonmicro.com>
Acked-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 drivers/mfd/mfd-core.c   | 22 +++++++++++++++++-----
 include/linux/mfd/core.h |  6 ++++++
 2 files changed, 23 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/mfd-core.c b/drivers/mfd/mfd-core.c
index f421586f29fb..adc8ea36e7c4 100644
--- a/drivers/mfd/mfd-core.c
+++ b/drivers/mfd/mfd-core.c
@@ -20,6 +20,7 @@
 #include <linux/module.h>
 #include <linux/irqdomain.h>
 #include <linux/of.h>
+#include <linux/regulator/consumer.h>
 
 static struct device_type mfd_dev_type = {
 	.name	= "mfd_device",
@@ -99,6 +100,13 @@ static int mfd_add_device(struct device *parent, int id,
 	pdev->dev.dma_mask = parent->dma_mask;
 	pdev->dev.dma_parms = parent->dma_parms;
 
+	ret = devm_regulator_bulk_register_supply_alias(
+			&pdev->dev, cell->parent_supplies,
+			parent, cell->parent_supplies,
+			cell->num_parent_supplies);
+	if (ret < 0)
+		goto fail_res;
+
 	if (parent->of_node && cell->of_compatible) {
 		for_each_child_of_node(parent->of_node, np) {
 			if (of_device_is_compatible(np, cell->of_compatible)) {
@@ -112,12 +120,12 @@ static int mfd_add_device(struct device *parent, int id,
 		ret = platform_device_add_data(pdev,
 					cell->platform_data, cell->pdata_size);
 		if (ret)
-			goto fail_res;
+			goto fail_alias;
 	}
 
 	ret = mfd_platform_add_cell(pdev, cell);
 	if (ret)
-		goto fail_res;
+		goto fail_alias;
 
 	for (r = 0; r < cell->num_resources; r++) {
 		res[r].name = cell->resources[r].name;
@@ -152,17 +160,17 @@ static int mfd_add_device(struct device *parent, int id,
 		if (!cell->ignore_resource_conflicts) {
 			ret = acpi_check_resource_conflict(&res[r]);
 			if (ret)
-				goto fail_res;
+				goto fail_alias;
 		}
 	}
 
 	ret = platform_device_add_resources(pdev, res, cell->num_resources);
 	if (ret)
-		goto fail_res;
+		goto fail_alias;
 
 	ret = platform_device_add(pdev);
 	if (ret)
-		goto fail_res;
+		goto fail_alias;
 
 	if (cell->pm_runtime_no_callbacks)
 		pm_runtime_no_callbacks(&pdev->dev);
@@ -171,6 +179,10 @@ static int mfd_add_device(struct device *parent, int id,
 
 	return 0;
 
+fail_alias:
+	devm_regulator_bulk_unregister_supply_alias(&pdev->dev,
+						    cell->parent_supplies,
+						    cell->num_parent_supplies);
 fail_res:
 	kfree(res);
 fail_device:
diff --git a/include/linux/mfd/core.h b/include/linux/mfd/core.h
index cebe97ee98b8..7314fc4e6d25 100644
--- a/include/linux/mfd/core.h
+++ b/include/linux/mfd/core.h
@@ -59,6 +59,12 @@ struct mfd_cell {
 	 * pm_runtime_no_callbacks().
 	 */
 	bool			pm_runtime_no_callbacks;
+
+	/* A list of regulator supplies that should be mapped to the MFD
+	 * device rather than the child device when requested
+	 */
+	const char		**parent_supplies;
+	int			num_parent_supplies;
 };
 
 /*
-- 
cgit v1.2.3


From 92ec11809565cf6429c75204e99e0f583b5c9d7c Mon Sep 17 00:00:00 2001
From: Thierry Reding <thierry.reding@gmail.com>
Date: Wed, 23 Oct 2013 13:40:55 +0200
Subject: sched/wait: Fix build breakage

The wait_event_interruptible_lock_irq() macro is missing a
semi-colon which causes a build failure in the i915 DRM driver.

Signed-off-by: Thierry Reding <treding@nvidia.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1382528455-29911-1-git-send-email-treding@nvidia.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/wait.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/wait.h b/include/linux/wait.h
index ec099b03e11b..3b23afa04d6b 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -732,7 +732,7 @@ do {									\
 	int __ret = 0;							\
 	if (!(condition))						\
 		__ret = __wait_event_interruptible_lock_irq(wq,		\
-						condition, lock,)	\
+						condition, lock,);	\
 	__ret;								\
 })
 
-- 
cgit v1.2.3


From 43e30f23b589642a7eaff005bc30444a5247976c Mon Sep 17 00:00:00 2001
From: David Jander <david@protonic.nl>
Date: Mon, 2 Sep 2013 09:46:11 +0200
Subject: mfd: da9052: Avoid multiwrite mode due to silicon errata

DA9053 (up to revision bc) can corrupt internal registers when multi-write
mode is enabled and power is removed or during shutdown.

Signed-off-by: David Jander <david@protonic.nl>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/da9052-i2c.c          | 12 ++++++++----
 include/linux/mfd/da9052/da9052.h | 20 ++++++++++++++------
 2 files changed, 22 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/da9052-i2c.c b/drivers/mfd/da9052-i2c.c
index 6a9fec40d018..c319c4ef5d49 100644
--- a/drivers/mfd/da9052-i2c.c
+++ b/drivers/mfd/da9052-i2c.c
@@ -86,7 +86,11 @@ static int da9052_i2c_fix(struct da9052 *da9052, unsigned char reg)
 	return 0;
 }
 
-static int da9052_i2c_enable_multiwrite(struct da9052 *da9052)
+/*
+ * According to errata item 24, multiwrite mode should be avoided
+ * in order to prevent register data corruption after power-down.
+ */
+static int da9052_i2c_disable_multiwrite(struct da9052 *da9052)
 {
 	int reg_val, ret;
 
@@ -94,8 +98,8 @@ static int da9052_i2c_enable_multiwrite(struct da9052 *da9052)
 	if (ret < 0)
 		return ret;
 
-	if (reg_val & DA9052_CONTROL_B_WRITEMODE) {
-		reg_val &= ~DA9052_CONTROL_B_WRITEMODE;
+	if (!(reg_val & DA9052_CONTROL_B_WRITEMODE)) {
+		reg_val |= DA9052_CONTROL_B_WRITEMODE;
 		ret = regmap_write(da9052->regmap, DA9052_CONTROL_B_REG,
 				   reg_val);
 		if (ret < 0)
@@ -154,7 +158,7 @@ static int da9052_i2c_probe(struct i2c_client *client,
 		return ret;
 	}
 
-	ret = da9052_i2c_enable_multiwrite(da9052);
+	ret = da9052_i2c_disable_multiwrite(da9052);
 	if (ret < 0)
 		return ret;
 
diff --git a/include/linux/mfd/da9052/da9052.h b/include/linux/mfd/da9052/da9052.h
index 786d02eb79d2..21e21b81cc75 100644
--- a/include/linux/mfd/da9052/da9052.h
+++ b/include/linux/mfd/da9052/da9052.h
@@ -148,10 +148,15 @@ static inline int da9052_group_read(struct da9052 *da9052, unsigned char reg,
 				     unsigned reg_cnt, unsigned char *val)
 {
 	int ret;
+	unsigned int tmp;
+	int i;
 
-	ret = regmap_bulk_read(da9052->regmap, reg, val, reg_cnt);
-	if (ret < 0)
-		return ret;
+	for (i = 0; i < reg_cnt; i++) {
+		ret = regmap_read(da9052->regmap, reg + i, &tmp);
+		val[i] = (unsigned char)tmp;
+		if (ret < 0)
+			return ret;
+	}
 
 	if (da9052->fix_io) {
 		ret = da9052->fix_io(da9052, reg);
@@ -166,10 +171,13 @@ static inline int da9052_group_write(struct da9052 *da9052, unsigned char reg,
 				      unsigned reg_cnt, unsigned char *val)
 {
 	int ret;
+	int i;
 
-	ret = regmap_raw_write(da9052->regmap, reg, val, reg_cnt);
-	if (ret < 0)
-		return ret;
+	for (i = 0; i < reg_cnt; i++) {
+		ret = regmap_write(da9052->regmap, reg + i, val[i]);
+		if (ret < 0)
+			return ret;
+	}
 
 	if (da9052->fix_io) {
 		ret = da9052->fix_io(da9052, reg);
-- 
cgit v1.2.3


From b5f90240e1ef0568a8c666da3c3be4c6a682c5a6 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@linaro.org>
Date: Fri, 6 Sep 2013 16:14:28 +0100
Subject: mfd: wm8994: Inline register I/O functions

Since the register I/O functions are all simple wrappers for the regmap
equivalents inline them to provide a small code size saving and an example
of good practice.

Signed-off-by: Mark Brown <broonie@linaro.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/wm8994-core.c       | 78 -----------------------------------------
 include/linux/mfd/wm8994/core.h | 45 +++++++++++++++++++-----
 2 files changed, 36 insertions(+), 87 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/wm8994-core.c b/drivers/mfd/wm8994-core.c
index e1c283e6d4e5..030827511667 100644
--- a/drivers/mfd/wm8994-core.c
+++ b/drivers/mfd/wm8994-core.c
@@ -33,84 +33,6 @@
 
 #include "wm8994.h"
 
-/**
- * wm8994_reg_read: Read a single WM8994 register.
- *
- * @wm8994: Device to read from.
- * @reg: Register to read.
- */
-int wm8994_reg_read(struct wm8994 *wm8994, unsigned short reg)
-{
-	unsigned int val;
-	int ret;
-
-	ret = regmap_read(wm8994->regmap, reg, &val);
-
-	if (ret < 0)
-		return ret;
-	else
-		return val;
-}
-EXPORT_SYMBOL_GPL(wm8994_reg_read);
-
-/**
- * wm8994_bulk_read: Read multiple WM8994 registers
- *
- * @wm8994: Device to read from
- * @reg: First register
- * @count: Number of registers
- * @buf: Buffer to fill.  The data will be returned big endian.
- */
-int wm8994_bulk_read(struct wm8994 *wm8994, unsigned short reg,
-		     int count, u16 *buf)
-{
-	return regmap_bulk_read(wm8994->regmap, reg, buf, count);
-}
-
-/**
- * wm8994_reg_write: Write a single WM8994 register.
- *
- * @wm8994: Device to write to.
- * @reg: Register to write to.
- * @val: Value to write.
- */
-int wm8994_reg_write(struct wm8994 *wm8994, unsigned short reg,
-		     unsigned short val)
-{
-	return regmap_write(wm8994->regmap, reg, val);
-}
-EXPORT_SYMBOL_GPL(wm8994_reg_write);
-
-/**
- * wm8994_bulk_write: Write multiple WM8994 registers
- *
- * @wm8994: Device to write to
- * @reg: First register
- * @count: Number of registers
- * @buf: Buffer to write from.  Data must be big-endian formatted.
- */
-int wm8994_bulk_write(struct wm8994 *wm8994, unsigned short reg,
-		      int count, const u16 *buf)
-{
-	return regmap_raw_write(wm8994->regmap, reg, buf, count * sizeof(u16));
-}
-EXPORT_SYMBOL_GPL(wm8994_bulk_write);
-
-/**
- * wm8994_set_bits: Set the value of a bitfield in a WM8994 register
- *
- * @wm8994: Device to write to.
- * @reg: Register to write to.
- * @mask: Mask of bits to set.
- * @val: Value to set (unshifted)
- */
-int wm8994_set_bits(struct wm8994 *wm8994, unsigned short reg,
-		    unsigned short mask, unsigned short val)
-{
-	return regmap_update_bits(wm8994->regmap, reg, mask, val);
-}
-EXPORT_SYMBOL_GPL(wm8994_set_bits);
-
 static struct mfd_cell wm8994_regulator_devs[] = {
 	{
 		.name = "wm8994-ldo",
diff --git a/include/linux/mfd/wm8994/core.h b/include/linux/mfd/wm8994/core.h
index 40854ac0ba3d..3fbcf3d4a0fe 100644
--- a/include/linux/mfd/wm8994/core.h
+++ b/include/linux/mfd/wm8994/core.h
@@ -85,16 +85,43 @@ struct wm8994 {
 };
 
 /* Device I/O API */
-int wm8994_reg_read(struct wm8994 *wm8994, unsigned short reg);
-int wm8994_reg_write(struct wm8994 *wm8994, unsigned short reg,
-		 unsigned short val);
-int wm8994_set_bits(struct wm8994 *wm8994, unsigned short reg,
-		    unsigned short mask, unsigned short val);
-int wm8994_bulk_read(struct wm8994 *wm8994, unsigned short reg,
-		     int count, u16 *buf);
-int wm8994_bulk_write(struct wm8994 *wm8994, unsigned short reg,
-		     int count, const u16 *buf);
 
+static inline int wm8994_reg_read(struct wm8994 *wm8994, unsigned short reg)
+{
+	unsigned int val;
+	int ret;
+
+	ret = regmap_read(wm8994->regmap, reg, &val);
+
+	if (ret < 0)
+		return ret;
+	else
+		return val;
+}
+
+static inline int wm8994_reg_write(struct wm8994 *wm8994, unsigned short reg,
+				   unsigned short val)
+{
+	return regmap_write(wm8994->regmap, reg, val);
+}
+
+static inline int wm8994_bulk_read(struct wm8994 *wm8994, unsigned short reg,
+				   int count, u16 *buf)
+{
+	return regmap_bulk_read(wm8994->regmap, reg, buf, count);
+}
+
+static inline int wm8994_bulk_write(struct wm8994 *wm8994, unsigned short reg,
+				    int count, const u16 *buf)
+{
+	return regmap_raw_write(wm8994->regmap, reg, buf, count * sizeof(u16));
+}
+
+static inline int wm8994_set_bits(struct wm8994 *wm8994, unsigned short reg,
+		    unsigned short mask, unsigned short val)
+{
+	return regmap_update_bits(wm8994->regmap, reg, mask, val);
+}
 
 /* Helper to save on boilerplate */
 static inline int wm8994_request_irq(struct wm8994 *wm8994, int irq,
-- 
cgit v1.2.3


From dae188c6092a3f085bf7fc335b6c0e0420d3dd8f Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@linaro.org>
Date: Fri, 6 Sep 2013 17:48:35 +0100
Subject: mfd: wm8994: Remove unused irq_lock

Since the conversion to regmap-irq irq_lock has been unused.

Signed-off-by: Mark Brown <broonie@linaro.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/linux/mfd/wm8994/core.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/wm8994/core.h b/include/linux/mfd/wm8994/core.h
index 3fbcf3d4a0fe..eefafa62d304 100644
--- a/include/linux/mfd/wm8994/core.h
+++ b/include/linux/mfd/wm8994/core.h
@@ -56,8 +56,6 @@ struct irq_domain;
 #define WM8994_IRQ_GPIO(x) (x + WM8994_IRQ_TEMP_WARN)
 
 struct wm8994 {
-	struct mutex irq_lock;
-
 	struct wm8994_pdata pdata;
 
 	enum wm8994_type type;
-- 
cgit v1.2.3


From 1a54b7dabf8f20df2894aed9683155ff89fc20e8 Mon Sep 17 00:00:00 2001
From: Matthias Kaehlcke <matthias.list@kaehlcke.net>
Date: Tue, 10 Sep 2013 23:02:18 +0200
Subject: mfd: ti_am335x_tscadc: Fix idle timeout value

The old timeout value was based on the assumption that the minimum values are
used for the open and sample delay and no averaging is done. In fact the ADC
and touchscreen driver both use an open delay of 152 cycles and averaging over
16 samples. This patch adjusts the timeout value accordingly

Signed-off-by: Matthias Kaehlcke <matthias@kaehlcke.net>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/linux/mfd/ti_am335x_tscadc.h | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/ti_am335x_tscadc.h b/include/linux/mfd/ti_am335x_tscadc.h
index 25f2c611ab01..4befdb85dd9c 100644
--- a/include/linux/mfd/ti_am335x_tscadc.h
+++ b/include/linux/mfd/ti_am335x_tscadc.h
@@ -125,13 +125,18 @@
 #define TOTAL_CHANNELS		8
 
 /*
-* ADC runs at 3MHz, and it takes
-* 15 cycles to latch one data output.
-* Hence the idle time for ADC to
-* process one sample data would be
-* around 5 micro seconds.
-*/
-#define IDLE_TIMEOUT 5 /* microsec */
+ * time in us for processing a single channel, calculated as follows:
+ *
+ * num cycles = open delay + (sample delay + conv time) * averaging
+ *
+ * num cycles: 152 + (1 + 13) * 16 = 376
+ *
+ * clock frequency: 26MHz / 8 = 3.25MHz
+ * clock period: 1 / 3.25MHz = 308ns
+ *
+ * processing time: 376 * 308ns = 116us
+ */
+#define IDLE_TIMEOUT 116 /* microsec */
 
 #define TSCADC_CELLS		2
 
-- 
cgit v1.2.3


From 26b818511c6562ce372566c219a2ef1afea35fe6 Mon Sep 17 00:00:00 2001
From: Wei WANG <wei_wang@realsil.com.cn>
Date: Fri, 13 Sep 2013 17:45:43 +0800
Subject: mfd: rtsx: Modify rts5249_optimize_phy

In some platforms, specially Thinkpad series, rts5249 won't be
initialized properly. So we need adjust some phy parameters to
improve the compatibility issue.

It is a little different between simulation and real chip. We have
no idea about which configuration is better before tape-out. We set
default settings according to simulation, but need to tune these
parameters after getting the real chip.

I can't explain every change in detail here. The below information is
just a rough description:

PHY_REG_REV: Disable internal clkreq_tx, enable rx_pwst
PHY_BPCR: No change, just turn the magic number to macro definitions
PHY_PCR: Change OOBS sensitivity, from 60mV to 90mV
PHY_RCR2: Control charge-pump current automatically
PHY_FLD4: Use TX cmu reference clock
PHY_RDR: Change RXDSEL from 30nF to 1.9nF
PHY_RCR1: Change the duration between adp_st and asserting cp_en from
0.32 us to 0.64us
PHY_FLD3: Adjust internal timers
PHY_TUNE: Fine tune the regulator12 output voltage

Signed-off-by: Wei WANG <wei_wang@realsil.com.cn>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/rts5249.c        | 48 +++++++++++++++++++++++++++++++++++++--
 include/linux/mfd/rtsx_pci.h | 53 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 99 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/rts5249.c b/drivers/mfd/rts5249.c
index 3b835f593e35..573de7bfcced 100644
--- a/drivers/mfd/rts5249.c
+++ b/drivers/mfd/rts5249.c
@@ -130,13 +130,57 @@ static int rts5249_optimize_phy(struct rtsx_pcr *pcr)
 {
 	int err;
 
-	err = rtsx_pci_write_phy_register(pcr, PHY_REG_REV, 0xFE46);
+	err = rtsx_pci_write_phy_register(pcr, PHY_REG_REV,
+			PHY_REG_REV_RESV | PHY_REG_REV_RXIDLE_LATCHED |
+			PHY_REG_REV_P1_EN | PHY_REG_REV_RXIDLE_EN |
+			PHY_REG_REV_RX_PWST | PHY_REG_REV_CLKREQ_DLY_TIMER_1_0 |
+			PHY_REG_REV_STOP_CLKRD | PHY_REG_REV_STOP_CLKWR);
 	if (err < 0)
 		return err;
 
 	msleep(1);
 
-	return rtsx_pci_write_phy_register(pcr, PHY_BPCR, 0x05C0);
+	err = rtsx_pci_write_phy_register(pcr, PHY_BPCR,
+			PHY_BPCR_IBRXSEL | PHY_BPCR_IBTXSEL |
+			PHY_BPCR_IB_FILTER | PHY_BPCR_CMIRROR_EN);
+	if (err < 0)
+		return err;
+	err = rtsx_pci_write_phy_register(pcr, PHY_PCR,
+			PHY_PCR_FORCE_CODE | PHY_PCR_OOBS_CALI_50 |
+			PHY_PCR_OOBS_VCM_08 | PHY_PCR_OOBS_SEN_90 |
+			PHY_PCR_RSSI_EN);
+	if (err < 0)
+		return err;
+	err = rtsx_pci_write_phy_register(pcr, PHY_RCR2,
+			PHY_RCR2_EMPHASE_EN | PHY_RCR2_NADJR |
+			PHY_RCR2_CDR_CP_10 | PHY_RCR2_CDR_SR_2 |
+			PHY_RCR2_FREQSEL_12 | PHY_RCR2_CPADJEN |
+			PHY_RCR2_CDR_SC_8 | PHY_RCR2_CALIB_LATE);
+	if (err < 0)
+		return err;
+	err = rtsx_pci_write_phy_register(pcr, PHY_FLD4,
+			PHY_FLD4_FLDEN_SEL | PHY_FLD4_REQ_REF |
+			PHY_FLD4_RXAMP_OFF | PHY_FLD4_REQ_ADDA |
+			PHY_FLD4_BER_COUNT | PHY_FLD4_BER_TIMER |
+			PHY_FLD4_BER_CHK_EN);
+	if (err < 0)
+		return err;
+	err = rtsx_pci_write_phy_register(pcr, PHY_RDR, PHY_RDR_RXDSEL_1_9);
+	if (err < 0)
+		return err;
+	err = rtsx_pci_write_phy_register(pcr, PHY_RCR1,
+			PHY_RCR1_ADP_TIME | PHY_RCR1_VCO_COARSE);
+	if (err < 0)
+		return err;
+	err = rtsx_pci_write_phy_register(pcr, PHY_FLD3,
+			PHY_FLD3_TIMER_4 | PHY_FLD3_TIMER_6 |
+			PHY_FLD3_RXDELINK);
+	if (err < 0)
+		return err;
+	return rtsx_pci_write_phy_register(pcr, PHY_TUNE,
+			PHY_TUNE_TUNEREF_1_0 | PHY_TUNE_VBGSEL_1252 |
+			PHY_TUNE_SDBUS_33 | PHY_TUNE_TUNED18 |
+			PHY_TUNE_TUNED12);
 }
 
 static int rts5249_turn_on_led(struct rtsx_pcr *pcr)
diff --git a/include/linux/mfd/rtsx_pci.h b/include/linux/mfd/rtsx_pci.h
index d1382dfbeff0..0ce772105508 100644
--- a/include/linux/mfd/rtsx_pci.h
+++ b/include/linux/mfd/rtsx_pci.h
@@ -756,6 +756,59 @@
 #define PCR_SETTING_REG2		0x814
 #define PCR_SETTING_REG3		0x747
 
+/* Phy bits */
+#define PHY_PCR_FORCE_CODE			0xB000
+#define PHY_PCR_OOBS_CALI_50			0x0800
+#define PHY_PCR_OOBS_VCM_08			0x0200
+#define PHY_PCR_OOBS_SEN_90			0x0040
+#define PHY_PCR_RSSI_EN				0x0002
+
+#define PHY_RCR1_ADP_TIME			0x0100
+#define PHY_RCR1_VCO_COARSE			0x001F
+
+#define PHY_RCR2_EMPHASE_EN			0x8000
+#define PHY_RCR2_NADJR				0x4000
+#define PHY_RCR2_CDR_CP_10			0x0400
+#define PHY_RCR2_CDR_SR_2			0x0100
+#define PHY_RCR2_FREQSEL_12			0x0040
+#define PHY_RCR2_CPADJEN			0x0020
+#define PHY_RCR2_CDR_SC_8			0x0008
+#define PHY_RCR2_CALIB_LATE			0x0002
+
+#define PHY_RDR_RXDSEL_1_9			0x4000
+
+#define PHY_TUNE_TUNEREF_1_0			0x4000
+#define PHY_TUNE_VBGSEL_1252			0x0C00
+#define PHY_TUNE_SDBUS_33			0x0200
+#define PHY_TUNE_TUNED18			0x01C0
+#define PHY_TUNE_TUNED12			0X0020
+
+#define PHY_BPCR_IBRXSEL			0x0400
+#define PHY_BPCR_IBTXSEL			0x0100
+#define PHY_BPCR_IB_FILTER			0x0080
+#define PHY_BPCR_CMIRROR_EN			0x0040
+
+#define PHY_REG_REV_RESV			0xE000
+#define PHY_REG_REV_RXIDLE_LATCHED		0x1000
+#define PHY_REG_REV_P1_EN			0x0800
+#define PHY_REG_REV_RXIDLE_EN			0x0400
+#define PHY_REG_REV_CLKREQ_DLY_TIMER_1_0	0x0040
+#define PHY_REG_REV_STOP_CLKRD			0x0020
+#define PHY_REG_REV_RX_PWST			0x0008
+#define PHY_REG_REV_STOP_CLKWR			0x0004
+
+#define PHY_FLD3_TIMER_4			0x7800
+#define PHY_FLD3_TIMER_6			0x00E0
+#define PHY_FLD3_RXDELINK			0x0004
+
+#define PHY_FLD4_FLDEN_SEL			0x4000
+#define PHY_FLD4_REQ_REF			0x2000
+#define PHY_FLD4_RXAMP_OFF			0x1000
+#define PHY_FLD4_REQ_ADDA			0x0800
+#define PHY_FLD4_BER_COUNT			0x00E0
+#define PHY_FLD4_BER_TIMER			0x000A
+#define PHY_FLD4_BER_CHK_EN			0x0001
+
 #define rtsx_pci_init_cmd(pcr)		((pcr)->ci = 0)
 
 struct rtsx_pcr;
-- 
cgit v1.2.3


From ca13ce3701900c5b64c2c477a9cfea396c6e79c3 Mon Sep 17 00:00:00 2001
From: Charles Keepax <ckeepax@opensource.wolfsonmicro.com>
Date: Sat, 21 Sep 2013 11:02:04 +0100
Subject: mfd: arizona: Correct register definition for FLL2_SYNC_BW

We had specified the mask twice for FLL2_SYNC_BW change the first mask
definition in a bit definition to match the other fields.

Signed-off-by: Charles Keepax <ckeepax@opensource.wolfsonmicro.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/linux/mfd/arizona/registers.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/arizona/registers.h b/include/linux/mfd/arizona/registers.h
index 4706d3d46e56..cb49417f8ba9 100644
--- a/include/linux/mfd/arizona/registers.h
+++ b/include/linux/mfd/arizona/registers.h
@@ -1908,7 +1908,7 @@
 #define ARIZONA_FLL2_SYNC_GAIN_MASK              0x003c  /* FLL2_SYNC_GAIN */
 #define ARIZONA_FLL2_SYNC_GAIN_SHIFT                  2  /* FLL2_SYNC_GAIN */
 #define ARIZONA_FLL2_SYNC_GAIN_WIDTH                  4  /* FLL2_SYNC_GAIN */
-#define ARIZONA_FLL2_SYNC_BW_MASK                0x0001  /* FLL2_SYNC_BW */
+#define ARIZONA_FLL2_SYNC_BW                     0x0001  /* FLL2_SYNC_BW */
 #define ARIZONA_FLL2_SYNC_BW_MASK                0x0001  /* FLL2_SYNC_BW */
 #define ARIZONA_FLL2_SYNC_BW_SHIFT                    0  /* FLL2_SYNC_BW */
 #define ARIZONA_FLL2_SYNC_BW_WIDTH                    1  /* FLL2_SYNC_BW */
-- 
cgit v1.2.3


From 60013b94d9530346db963474f7fde8aecabaff25 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Sun, 22 Sep 2013 21:49:18 +0200
Subject: mfd: Add STw481x driver

This adds a driver for the STw481x PMICs found in the Nomadik
family of platforms. This one uses pure device tree probing.
Print some of the OTP registers on boot and register a regulator
MFD child.

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/Kconfig         |  10 ++
 drivers/mfd/Makefile        |   1 +
 drivers/mfd/stw481x.c       | 250 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/stw481x.h |  56 ++++++++++
 4 files changed, 317 insertions(+)
 create mode 100644 drivers/mfd/stw481x.c
 create mode 100644 include/linux/mfd/stw481x.h

(limited to 'include/linux')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 914c3d142f78..d078fe4a82ef 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -1151,6 +1151,16 @@ config MFD_WM8994
 	  core support for the WM8994, in order to use the actual
 	  functionaltiy of the device other drivers must be enabled.
 
+config MFD_STW481X
+	bool "Support for ST Microelectronics STw481x"
+	depends on I2C && ARCH_NOMADIK
+	select REGMAP_I2C
+	select MFD_CORE
+	help
+	  Select this option to enable the STw481x chip driver used
+	  in various ST Microelectronics and ST-Ericsson embedded
+	  Nomadik series.
+
 endmenu
 endif
 
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 15b905c6553c..656cec90a1e5 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -162,3 +162,4 @@ obj-$(CONFIG_MFD_LM3533)	+= lm3533-core.o lm3533-ctrlbank.o
 obj-$(CONFIG_VEXPRESS_CONFIG)	+= vexpress-config.o vexpress-sysreg.o
 obj-$(CONFIG_MFD_RETU)		+= retu-mfd.o
 obj-$(CONFIG_MFD_AS3711)	+= as3711.o
+obj-$(CONFIG_MFD_STW481X)	+= stw481x.o
diff --git a/drivers/mfd/stw481x.c b/drivers/mfd/stw481x.c
new file mode 100644
index 000000000000..1243d5c6a448
--- /dev/null
+++ b/drivers/mfd/stw481x.c
@@ -0,0 +1,250 @@
+/*
+ * Core driver for STw4810/STw4811
+ *
+ * Copyright (C) 2013 ST-Ericsson SA
+ * Written on behalf of Linaro for ST-Ericsson
+ *
+ * Author: Linus Walleij <linus.walleij@linaro.org>
+ *
+ * License terms: GNU General Public License (GPL) version 2
+ */
+
+#include <linux/err.h>
+#include <linux/i2c.h>
+#include <linux/init.h>
+#include <linux/mfd/core.h>
+#include <linux/mfd/stw481x.h>
+#include <linux/module.h>
+#include <linux/regmap.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+
+/*
+ * This driver can only access the non-USB portions of STw4811, the register
+ * range 0x00-0x10 dealing with USB is bound to the two special I2C pins used
+ * for USB control.
+ */
+
+/* Registers inside the power control address space */
+#define STW_PC_VCORE_SEL	0x05U
+#define STW_PC_VAUX_SEL		0x06U
+#define STW_PC_VPLL_SEL		0x07U
+
+/**
+ * stw481x_get_pctl_reg() - get a power control register
+ * @stw481x: handle to the stw481x chip
+ * @reg: power control register to fetch
+ *
+ * The power control registers is a set of one-time-programmable registers
+ * in its own register space, accessed by writing addess bits to these
+ * two registers: bits 7,6,5 of PCTL_REG_LO corresponds to the 3 LSBs of
+ * the address and bits 8,9 of PCTL_REG_HI corresponds to the 2 MSBs of
+ * the address, forming an address space of 5 bits, i.e. 32 registers
+ * 0x00 ... 0x1f can be obtained.
+ */
+static int stw481x_get_pctl_reg(struct stw481x *stw481x, u8 reg)
+{
+	u8 msb = (reg >> 3) & 0x03;
+	u8 lsb = (reg << 5) & 0xe0;
+	unsigned int val;
+	u8 vrfy;
+	int ret;
+
+	ret = regmap_write(stw481x->map, STW_PCTL_REG_HI, msb);
+	if (ret)
+		return ret;
+	ret = regmap_write(stw481x->map, STW_PCTL_REG_LO, lsb);
+	if (ret)
+		return ret;
+	ret = regmap_read(stw481x->map, STW_PCTL_REG_HI, &val);
+	if (ret)
+		return ret;
+	vrfy = (val & 0x03) << 3;
+	ret = regmap_read(stw481x->map, STW_PCTL_REG_LO, &val);
+	if (ret)
+		return ret;
+	vrfy |= ((val >> 5) & 0x07);
+	if (vrfy != reg)
+		return -EIO;
+	return (val >> 1) & 0x0f;
+}
+
+static int stw481x_startup(struct stw481x *stw481x)
+{
+	/* Voltages multiplied by 100 */
+	u8 vcore_val[] = { 100, 105, 110, 115, 120, 122, 124, 126, 128,
+			   130, 132, 134, 136, 138, 140, 145 };
+	u8 vpll_val[] = { 105, 120, 130, 180 };
+	u8 vaux_val[] = { 15, 18, 25, 28 };
+	u8 vcore;
+	u8 vcore_slp;
+	u8 vpll;
+	u8 vaux;
+	bool vaux_en;
+	bool it_warn;
+	int ret;
+	unsigned int val;
+
+	ret = regmap_read(stw481x->map, STW_CONF1, &val);
+	if (ret)
+		return ret;
+	vaux_en = !!(val & STW_CONF1_PDN_VAUX);
+	it_warn = !!(val & STW_CONF1_IT_WARN);
+
+	dev_info(&stw481x->client->dev, "voltages %s\n",
+		(val & STW_CONF1_V_MONITORING) ? "OK" : "LOW");
+	dev_info(&stw481x->client->dev, "MMC level shifter %s\n",
+		(val & STW_CONF1_MMC_LS_STATUS) ? "high impedance" : "ON");
+	dev_info(&stw481x->client->dev, "VMMC: %s\n",
+		(val & STW_CONF1_PDN_VMMC) ? "ON" : "disabled");
+
+	dev_info(&stw481x->client->dev, "STw481x power control registers:\n");
+
+	ret = stw481x_get_pctl_reg(stw481x, STW_PC_VCORE_SEL);
+	if (ret < 0)
+		return ret;
+	vcore = ret & 0x0f;
+
+	ret = stw481x_get_pctl_reg(stw481x, STW_PC_VAUX_SEL);
+	if (ret < 0)
+		return ret;
+	vaux = (ret >> 2) & 3;
+	vpll = (ret >> 4) & 1; /* Save bit 4 */
+
+	ret = stw481x_get_pctl_reg(stw481x, STW_PC_VPLL_SEL);
+	if (ret < 0)
+		return ret;
+	vpll |= (ret >> 1) & 2;
+
+	dev_info(&stw481x->client->dev, "VCORE: %u.%uV %s\n",
+		vcore_val[vcore] / 100, vcore_val[vcore] % 100,
+		(ret & 4) ? "ON" : "OFF");
+
+	dev_info(&stw481x->client->dev, "VPLL:  %u.%uV %s\n",
+		vpll_val[vpll] / 100, vpll_val[vpll] % 100,
+		(ret & 0x10) ? "ON" : "OFF");
+
+	dev_info(&stw481x->client->dev, "VAUX:  %u.%uV %s\n",
+		vaux_val[vaux] / 10, vaux_val[vaux] % 10,
+		vaux_en ? "ON" : "OFF");
+
+	ret = regmap_read(stw481x->map, STW_CONF2, &val);
+	if (ret)
+		return ret;
+
+	dev_info(&stw481x->client->dev, "TWARN: %s threshold, %s\n",
+		it_warn ? "below" : "above",
+		(val & STW_CONF2_MASK_TWARN) ?
+		 "enabled" : "mask through VDDOK");
+	dev_info(&stw481x->client->dev, "VMMC: %s\n",
+		(val & STW_CONF2_VMMC_EXT) ? "internal" : "external");
+	dev_info(&stw481x->client->dev, "IT WAKE UP: %s\n",
+		(val & STW_CONF2_MASK_IT_WAKE_UP) ? "enabled" : "masked");
+	dev_info(&stw481x->client->dev, "GPO1: %s\n",
+		(val & STW_CONF2_GPO1) ? "low" : "high impedance");
+	dev_info(&stw481x->client->dev, "GPO2: %s\n",
+		(val & STW_CONF2_GPO2) ? "low" : "high impedance");
+
+	ret = regmap_read(stw481x->map, STW_VCORE_SLEEP, &val);
+	if (ret)
+		return ret;
+	vcore_slp = val & 0x0f;
+	dev_info(&stw481x->client->dev, "VCORE SLEEP: %u.%uV\n",
+		vcore_val[vcore_slp] / 100, vcore_val[vcore_slp] % 100);
+
+	return 0;
+}
+
+/*
+ * MFD cells - we have one cell which is selected operation
+ * mode, and we always have a GPIO cell.
+ */
+static struct mfd_cell stw481x_cells[] = {
+	{
+		.of_compatible = "st,stw481x-vmmc",
+		.name = "stw481x-vmmc-regulator",
+		.id = -1,
+	},
+};
+
+const struct regmap_config stw481x_regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 8,
+};
+
+static int stw481x_probe(struct i2c_client *client,
+			 const struct i2c_device_id *id)
+{
+	struct stw481x			*stw481x;
+	int ret;
+	int i;
+
+	stw481x = devm_kzalloc(&client->dev, sizeof(*stw481x), GFP_KERNEL);
+	if (!stw481x)
+		return -ENOMEM;
+
+	i2c_set_clientdata(client, stw481x);
+	stw481x->client = client;
+	stw481x->map = devm_regmap_init_i2c(client, &stw481x_regmap_config);
+
+	ret = stw481x_startup(stw481x);
+	if (ret) {
+		dev_err(&client->dev, "chip initialization failed\n");
+		return ret;
+	}
+
+	/* Set up and register the platform devices. */
+	for (i = 0; i < ARRAY_SIZE(stw481x_cells); i++) {
+		/* One state holder for all drivers, this is simple */
+		stw481x_cells[i].platform_data = stw481x;
+		stw481x_cells[i].pdata_size = sizeof(*stw481x);
+	}
+
+	ret = mfd_add_devices(&client->dev, 0, stw481x_cells,
+			ARRAY_SIZE(stw481x_cells), NULL, 0, NULL);
+	if (ret)
+		return ret;
+
+	dev_info(&client->dev, "initialized STw481x device\n");
+
+	return ret;
+}
+
+static int stw481x_remove(struct i2c_client *client)
+{
+	mfd_remove_devices(&client->dev);
+	return 0;
+}
+
+/*
+ * This ID table is completely unused, as this is a pure
+ * device-tree probed driver, but it has to be here due to
+ * the structure of the I2C core.
+ */
+static const struct i2c_device_id stw481x_id[] = {
+	{ "stw481x", 0 },
+	{ },
+};
+
+static const struct of_device_id stw481x_match[] = {
+	{ .compatible = "st,stw4810", },
+	{ .compatible = "st,stw4811", },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, stw481x_match);
+
+static struct i2c_driver stw481x_driver = {
+	.driver = {
+		.name	= "stw481x",
+		.of_match_table = stw481x_match,
+	},
+	.probe		= stw481x_probe,
+	.remove		= stw481x_remove,
+	.id_table	= stw481x_id,
+};
+
+module_i2c_driver(stw481x_driver);
+
+MODULE_AUTHOR("Linus Walleij");
+MODULE_DESCRIPTION("STw481x PMIC driver");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/mfd/stw481x.h b/include/linux/mfd/stw481x.h
new file mode 100644
index 000000000000..eda121556e5d
--- /dev/null
+++ b/include/linux/mfd/stw481x.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2011 ST-Ericsson SA
+ * Written on behalf of Linaro for ST-Ericsson
+ *
+ * Author: Linus Walleij <linus.walleij@linaro.org>
+ *
+ * License terms: GNU General Public License (GPL) version 2
+ */
+#ifndef MFD_STW481X_H
+#define MFD_STW481X_H
+
+#include <linux/i2c.h>
+#include <linux/regulator/machine.h>
+#include <linux/regmap.h>
+#include <linux/bitops.h>
+
+/* These registers are accessed from more than one driver */
+#define STW_CONF1			0x11U
+#define STW_CONF1_PDN_VMMC		0x01U
+#define STW_CONF1_VMMC_MASK		0x0eU
+#define STW_CONF1_VMMC_1_8V		0x02U
+#define STW_CONF1_VMMC_2_85V		0x04U
+#define STW_CONF1_VMMC_3V		0x06U
+#define STW_CONF1_VMMC_1_85V		0x08U
+#define STW_CONF1_VMMC_2_6V		0x0aU
+#define STW_CONF1_VMMC_2_7V		0x0cU
+#define STW_CONF1_VMMC_3_3V		0x0eU
+#define STW_CONF1_MMC_LS_STATUS		0x10U
+#define STW_PCTL_REG_LO			0x1eU
+#define STW_PCTL_REG_HI			0x1fU
+#define STW_CONF1_V_MONITORING		0x20U
+#define STW_CONF1_IT_WARN		0x40U
+#define STW_CONF1_PDN_VAUX		0x80U
+#define STW_CONF2			0x20U
+#define STW_CONF2_MASK_TWARN		0x01U
+#define STW_CONF2_VMMC_EXT		0x02U
+#define STW_CONF2_MASK_IT_WAKE_UP	0x04U
+#define STW_CONF2_GPO1			0x08U
+#define STW_CONF2_GPO2			0x10U
+#define STW_VCORE_SLEEP			0x21U
+
+/**
+ * struct stw481x - state holder for the Stw481x drivers
+ * @mutex: mutex to serialize I2C accesses
+ * @i2c_client: corresponding I2C client
+ * @regulator: regulator device for regulator children
+ * @map: regmap handle to access device registers
+ */
+struct stw481x {
+	struct mutex		lock;
+	struct i2c_client	*client;
+	struct regulator_dev	*vmmc_regulator;
+	struct regmap		*map;
+};
+
+#endif
-- 
cgit v1.2.3


From e90f875419967589d75d1a3e2b89c5f2720e794e Mon Sep 17 00:00:00 2001
From: Matthias Kaehlcke <matthias.list@kaehlcke.net>
Date: Mon, 23 Sep 2013 22:43:29 +0200
Subject: mfd: ti_am335x_tscadc: Restore clock divider on resume

The ADC clock divider needs to be restored on resume as the register content
is lost when the ADC is powered down

Signed-off-by: Matthias Kaehlcke <matthias@kaehlcke.net>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/ti_am335x_tscadc.c       | 10 ++++++----
 include/linux/mfd/ti_am335x_tscadc.h |  1 +
 2 files changed, 7 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/ti_am335x_tscadc.c b/drivers/mfd/ti_am335x_tscadc.c
index baaf5a8123bb..a3685d6ef674 100644
--- a/drivers/mfd/ti_am335x_tscadc.c
+++ b/drivers/mfd/ti_am335x_tscadc.c
@@ -95,7 +95,7 @@ static	int ti_tscadc_probe(struct platform_device *pdev)
 	const __be32            *cur;
 	u32			val;
 	int			err, ctrl;
-	int			clk_value, clock_rate;
+	int			clock_rate;
 	int			tsc_wires = 0, adc_channels = 0, total_channels;
 	int			readouts = 0;
 
@@ -196,11 +196,11 @@ static	int ti_tscadc_probe(struct platform_device *pdev)
 	}
 	clock_rate = clk_get_rate(clk);
 	clk_put(clk);
-	clk_value = clock_rate / ADC_CLK;
+	tscadc->clk_div = clock_rate / ADC_CLK;
 
 	/* TSCADC_CLKDIV needs to be configured to the value minus 1 */
-	clk_value = clk_value - 1;
-	tscadc_writel(tscadc, REG_CLKDIV, clk_value);
+	tscadc->clk_div--;
+	tscadc_writel(tscadc, REG_CLKDIV, tscadc->clk_div);
 
 	/* Set the control register bits */
 	ctrl = CNTRLREG_STEPCONFIGWRT |
@@ -303,6 +303,8 @@ static int tscadc_resume(struct device *dev)
 	tscadc_writel(tscadc_dev, REG_CTRL,
 			(restore | CNTRLREG_TSCSSENB));
 
+	tscadc_writel(tscadc_dev, REG_CLKDIV, tscadc_dev->clk_div);
+
 	return 0;
 }
 
diff --git a/include/linux/mfd/ti_am335x_tscadc.h b/include/linux/mfd/ti_am335x_tscadc.h
index 4befdb85dd9c..7b68a061cd60 100644
--- a/include/linux/mfd/ti_am335x_tscadc.h
+++ b/include/linux/mfd/ti_am335x_tscadc.h
@@ -151,6 +151,7 @@ struct ti_tscadc_dev {
 	struct mfd_cell cells[TSCADC_CELLS];
 	u32 reg_se_cache;
 	spinlock_t reg_lock;
+	unsigned int clk_div;
 
 	/* tsc device */
 	struct titsc *tsc;
-- 
cgit v1.2.3


From 0248b4bfe56f0545c051e6230939ca8b95f1b037 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@linaro.org>
Date: Mon, 23 Sep 2013 19:14:32 +0100
Subject: mfd: mc13xxx: Move SPI erratum workaround into SPI I/O function

Move the workaround for double sending AUDIO_CODEC and AUDIO_DAC writes
into the SPI core, aiding refactoring to eliminate the ASoC custom I/O
functions and avoiding the extra writes for I2C.

Signed-off-by: Mark Brown <broonie@linaro.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/mc13xxx-spi.c   | 5 +++++
 include/linux/mfd/mc13xxx.h | 7 +++++++
 sound/soc/codecs/mc13783.c  | 4 ----
 3 files changed, 12 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/mc13xxx-spi.c b/drivers/mfd/mc13xxx-spi.c
index 77189daadf1e..5f14ef6693c2 100644
--- a/drivers/mfd/mc13xxx-spi.c
+++ b/drivers/mfd/mc13xxx-spi.c
@@ -94,10 +94,15 @@ static int mc13xxx_spi_write(void *context, const void *data, size_t count)
 {
 	struct device *dev = context;
 	struct spi_device *spi = to_spi_device(dev);
+	const char *reg = data;
 
 	if (count != 4)
 		return -ENOTSUPP;
 
+	/* include errata fix for spi audio problems */
+	if (*reg == MC13783_AUDIO_CODEC || *reg == MC13783_AUDIO_DAC)
+		spi_write(spi, data, count);
+
 	return spi_write(spi, data, count);
 }
 
diff --git a/include/linux/mfd/mc13xxx.h b/include/linux/mfd/mc13xxx.h
index 41ed59276c00..67c17b5a6f44 100644
--- a/include/linux/mfd/mc13xxx.h
+++ b/include/linux/mfd/mc13xxx.h
@@ -41,6 +41,13 @@ int mc13xxx_adc_do_conversion(struct mc13xxx *mc13xxx,
 		unsigned int mode, unsigned int channel,
 		u8 ato, bool atox, unsigned int *sample);
 
+#define MC13783_AUDIO_RX0	36
+#define MC13783_AUDIO_RX1	37
+#define MC13783_AUDIO_TX	38
+#define MC13783_SSI_NETWORK	39
+#define MC13783_AUDIO_CODEC	40
+#define MC13783_AUDIO_DAC	41
+
 #define MC13XXX_IRQ_ADCDONE	0
 #define MC13XXX_IRQ_ADCBISDONE	1
 #define MC13XXX_IRQ_TS		2
diff --git a/sound/soc/codecs/mc13783.c b/sound/soc/codecs/mc13783.c
index ea141e1d6f28..4d3c8fd8c5db 100644
--- a/sound/soc/codecs/mc13783.c
+++ b/sound/soc/codecs/mc13783.c
@@ -125,10 +125,6 @@ static int mc13783_write(struct snd_soc_codec *codec,
 
 	ret = mc13xxx_reg_write(priv->mc13xxx, reg, value);
 
-	/* include errata fix for spi audio problems */
-	if (reg == MC13783_AUDIO_CODEC || reg == MC13783_AUDIO_DAC)
-		ret = mc13xxx_reg_write(priv->mc13xxx, reg, value);
-
 	mc13xxx_unlock(priv->mc13xxx);
 
 	return ret;
-- 
cgit v1.2.3


From 4233a0aafb72985a4692a9e6af5c528811226ac1 Mon Sep 17 00:00:00 2001
From: Andrzej Hajda <a.hajda@samsung.com>
Date: Wed, 21 Aug 2013 18:53:33 +0200
Subject: mfd: max77693: Remove device wakeup from driver

The patch removes wakeup related code from the driver and plaftorm
data - it is already handled by i2c core using I2C_CLIENT_WAKE flag
from struct i2c_board_info. As a result MFD itself do not requires
platform data.

Signed-off-by: Andrzej Hajda <a.hajda@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/max77693.c               | 10 ----------
 include/linux/mfd/max77693-private.h |  1 -
 include/linux/mfd/max77693.h         |  2 --
 3 files changed, 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/max77693.c b/drivers/mfd/max77693.c
index c04723efc707..27f5da3bc63a 100644
--- a/drivers/mfd/max77693.c
+++ b/drivers/mfd/max77693.c
@@ -110,15 +110,9 @@ static int max77693_i2c_probe(struct i2c_client *i2c,
 			      const struct i2c_device_id *id)
 {
 	struct max77693_dev *max77693;
-	struct max77693_platform_data *pdata = dev_get_platdata(&i2c->dev);
 	u8 reg_data;
 	int ret = 0;
 
-	if (!pdata) {
-		dev_err(&i2c->dev, "No platform data found.\n");
-		return -EINVAL;
-	}
-
 	max77693 = devm_kzalloc(&i2c->dev,
 			sizeof(struct max77693_dev), GFP_KERNEL);
 	if (max77693 == NULL)
@@ -138,8 +132,6 @@ static int max77693_i2c_probe(struct i2c_client *i2c,
 		return ret;
 	}
 
-	max77693->wakeup = pdata->wakeup;
-
 	ret = max77693_read_reg(max77693->regmap, MAX77693_PMIC_REG_PMIC_ID2,
 				&reg_data);
 	if (ret < 0) {
@@ -179,8 +171,6 @@ static int max77693_i2c_probe(struct i2c_client *i2c,
 	if (ret < 0)
 		goto err_mfd;
 
-	device_init_wakeup(max77693->dev, pdata->wakeup);
-
 	return ret;
 
 err_mfd:
diff --git a/include/linux/mfd/max77693-private.h b/include/linux/mfd/max77693-private.h
index 244fb0d51589..3e050b933dd0 100644
--- a/include/linux/mfd/max77693-private.h
+++ b/include/linux/mfd/max77693-private.h
@@ -323,7 +323,6 @@ struct max77693_dev {
 
 	int irq;
 	int irq_gpio;
-	bool wakeup;
 	struct mutex irqlock;
 	int irq_masks_cur[MAX77693_IRQ_GROUP_NR];
 	int irq_masks_cache[MAX77693_IRQ_GROUP_NR];
diff --git a/include/linux/mfd/max77693.h b/include/linux/mfd/max77693.h
index 676f0f388992..3f3dc45f93ee 100644
--- a/include/linux/mfd/max77693.h
+++ b/include/linux/mfd/max77693.h
@@ -64,8 +64,6 @@ struct max77693_muic_platform_data {
 };
 
 struct max77693_platform_data {
-	int wakeup;
-
 	/* regulator data */
 	struct max77693_regulator_data *regulators;
 	int num_regulators;
-- 
cgit v1.2.3


From d460a6f3d67a8558fb58299518077888b7dbf5f3 Mon Sep 17 00:00:00 2001
From: Laxman Dewangan <ldewangan@nvidia.com>
Date: Tue, 22 Oct 2013 13:08:46 +0530
Subject: mfd: Add support for ams AS3722 PMIC

The ams AS3722 is a compact system PMU suitable for mobile phones,
tablets etc. It has 4 DC/DC step-down regulators, 3 DC/DC step-down
controller, 11 LDOs, RTC, automatic battery, temperature and
over-current monitoring, 8 GPIOs, ADC and a watchdog.

Add MFD core driver for the AS3722 to support core functionality.

Acked-by: Stephen Warren <swarren@nvidia.com>
Signed-off-by: Laxman Dewangan <ldewangan@nvidia.com>
Signed-off-by: Florian Lobmaier <florian.lobmaier@ams.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 Documentation/devicetree/bindings/mfd/as3722.txt | 194 ++++++++++
 drivers/mfd/Kconfig                              |  12 +
 drivers/mfd/Makefile                             |   1 +
 drivers/mfd/as3722.c                             | 449 +++++++++++++++++++++++
 include/dt-bindings/mfd/as3722.h                 |  52 +++
 include/linux/mfd/as3722.h                       | 423 +++++++++++++++++++++
 6 files changed, 1131 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/mfd/as3722.txt
 create mode 100644 drivers/mfd/as3722.c
 create mode 100644 include/dt-bindings/mfd/as3722.h
 create mode 100644 include/linux/mfd/as3722.h

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/mfd/as3722.txt b/Documentation/devicetree/bindings/mfd/as3722.txt
new file mode 100644
index 000000000000..fc2191ecfd6b
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/as3722.txt
@@ -0,0 +1,194 @@
+* ams AS3722 Power management IC.
+
+Required properties:
+-------------------
+- compatible: Must be "ams,as3722".
+- reg: I2C device address.
+- interrupt-controller: AS3722 has internal interrupt controller which takes the
+  interrupt request from internal sub-blocks like RTC, regulators, GPIOs as well
+  as external input.
+- #interrupt-cells: Should be set to 2 for IRQ number and flags.
+  The first cell is the IRQ number. IRQ numbers for different interrupt source
+  of AS3722 are defined at dt-bindings/mfd/as3722.h
+  The second cell is the flags, encoded as the trigger masks from binding document
+	interrupts.txt, using dt-bindings/irq.
+
+Optional submodule and their properties:
+=======================================
+
+Pinmux and GPIO:
+===============
+Device has 8 GPIO pins which can be configured as GPIO as well as the special IO
+functions.
+
+Please refer to pinctrl-bindings.txt in this directory for details of the
+common pinctrl bindings used by client devices, including the meaning of the
+phrase "pin configuration node".
+
+Following are properties which is needed if GPIO and pinmux functionality
+is required:
+    Required properties:
+    -------------------
+	- gpio-controller: Marks the device node as a GPIO controller.
+	- #gpio-cells: Number of GPIO cells. Refer to binding document
+			gpio/gpio.txt
+
+    Optional properties:
+    --------------------
+	Following properties are require if pin control setting is required
+	at boot.
+	- pinctrl-names: A pinctrl state named "default" be defined, using the
+		bindings in pinctrl/pinctrl-binding.txt.
+	- pinctrl[0...n]: Properties to contain the phandle that refer to
+		different nodes of pin control settings. These nodes represents
+		the pin control setting of state 0 to state n. Each of these
+		nodes contains different subnodes to represents some desired
+		configuration for a list of pins. This configuration can
+		include the mux function to select on those pin(s), and
+		various pin configuration parameters, such as pull-up,
+		open drain.
+
+		Each subnode have following properties:
+		Required properties:
+		    - pins: List of pins. Valid values of pins properties are:
+				gpio0, gpio1, gpio2, gpio3, gpio4, gpio5,
+				gpio6, gpio7
+
+		Optional properties:
+			function, bias-disable, bias-pull-up, bias-pull-down,
+			bias-high-impedance, drive-open-drain.
+
+			Valid values for function properties are:
+				gpio, interrupt-out, gpio-in-interrupt,
+				vsup-vbat-low-undebounce-out,
+				vsup-vbat-low-debounce-out,
+				voltage-in-standby, oc-pg-sd0, oc-pg-sd6,
+				powergood-out, pwm-in, pwm-out, clk32k-out,
+				watchdog-in, soft-reset-in
+
+Regulators:
+===========
+Device has multiple DCDC and LDOs. The node "regulators" is require if regulator
+functionality is needed.
+
+Following are properties of regulator subnode.
+
+    Optional properties:
+    -------------------
+	The input supply of regulators are the optional properties on the
+	regulator node. The input supply of these regulators are provided
+	through following properties:
+		vsup-sd2-supply: Input supply for SD2.
+		vsup-sd3-supply: Input supply for SD3.
+		vsup-sd4-supply: Input supply for SD4.
+		vsup-sd5-supply: Input supply for SD5.
+		vin-ldo0-supply: Input supply for LDO0.
+		vin-ldo1-6-supply: Input supply for LDO1 and LDO6.
+		vin-ldo2-5-7-supply: Input supply for LDO2, LDO5 and LDO7.
+		vin-ldo3-4-supply: Input supply for LDO3 and LDO4.
+		vin-ldo9-10-supply: Input supply for LDO9 and LDO10.
+		vin-ldo11-supply: Input supply for LDO11.
+
+    Optional sub nodes for regulators:
+    ---------------------------------
+	The subnodes name is the name of regulator and it must be one of:
+	sd[0-6], ldo[0-7], ldo[9-11]
+
+	Each sub-node should contain the constraints and initialization
+	information for that regulator. See regulator.txt for a description
+	of standard properties for these sub-nodes.
+	Additional optional custom properties  are listed below.
+		ams,ext-control: External control of the rail. The option of
+			this properties will tell which external input is
+			controlling this rail. Valid values are 0, 1, 2 ad 3.
+			0: There is no external control of this rail.
+			1: Rail is controlled by ENABLE1 input pin.
+			2: Rail is controlled by ENABLE2 input pin.
+			3: Rail is controlled by ENABLE3 input pin.
+			Missing this property on DT will be assume as no
+			external control. The external control pin macros
+			are defined @dt-bindings/mfd/as3722.h
+
+		ams,enable-tracking: Enable tracking with SD1, only supported
+			by LDO3.
+
+Example:
+--------
+#include <dt-bindings/mfd/as3722.h>
+...
+ams3722 {
+	compatible = "ams,as3722";
+	reg = <0x48>;
+
+	interrupt-parent = <&intc>;
+	interrupt-controller;
+	#interrupt-cells = <2>;
+
+	gpio-controller;
+	#gpio-cells = <2>;
+
+	pinctrl-names = "default";
+	pinctrl-0 = <&as3722_default>;
+
+	as3722_default: pinmux {
+			gpio0 {
+				pins = "gpio0";
+				function = "gpio";
+				bias-pull-down;
+			};
+
+			gpio1_2_4_7 {
+				pins = "gpio1", "gpio2", "gpio4", "gpio7";
+				function = "gpio";
+				bias-pull-up;
+			};
+
+			gpio5 {
+				pins = "gpio5";
+				function = "clk32k_out";
+			};
+	}
+
+	regulators {
+			vsup-sd2-supply = <...>;
+			...
+
+			sd0 {
+				regulator-name = "vdd_cpu";
+				regulator-min-microvolt = <700000>;
+				regulator-max-microvolt = <1400000>;
+				regulator-always-on;
+				ams,ext-control = <2>;
+			};
+
+			sd1 {
+				regulator-name = "vdd_core";
+				regulator-min-microvolt = <700000>;
+				regulator-max-microvolt = <1400000>;
+				regulator-always-on;
+				ams,ext-control = <1>;
+			};
+
+			sd2 {
+				regulator-name = "vddio_ddr";
+				regulator-min-microvolt = <1350000>;
+				regulator-max-microvolt = <1350000>;
+				regulator-always-on;
+			};
+
+			sd4 {
+				regulator-name = "avdd-hdmi-pex";
+				regulator-min-microvolt = <1050000>;
+				regulator-max-microvolt = <1050000>;
+				regulator-always-on;
+			};
+
+			sd5 {
+				regulator-name = "vdd-1v8";
+				regulator-min-microvolt = <1800000>;
+				regulator-max-microvolt = <1800000>;
+				regulator-always-on;
+			};
+			....
+	};
+};
diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index d078fe4a82ef..b7c74a73d371 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -27,6 +27,18 @@ config MFD_AS3711
 	help
 	  Support for the AS3711 PMIC from AMS
 
+config MFD_AS3722
+	bool "ams AS3722 Power Management IC"
+	select MFD_CORE
+	select REGMAP_I2C
+	select REGMAP_IRQ
+	depends on I2C && OF
+	help
+	  The ams AS3722 is a compact system PMU suitable for mobile phones,
+	  tablets etc. It has 4 DC/DC step-down regulators, 3 DC/DC step-down
+	  controllers, 11 LDOs, RTC, automatic battery, temperature and
+	  over current monitoring, GPIOs, ADC and a watchdog.
+
 config PMIC_ADP5520
 	bool "Analog Devices ADP5520/01 MFD PMIC Core Support"
 	depends on I2C=y
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 656cec90a1e5..8a28dc90fe78 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -162,4 +162,5 @@ obj-$(CONFIG_MFD_LM3533)	+= lm3533-core.o lm3533-ctrlbank.o
 obj-$(CONFIG_VEXPRESS_CONFIG)	+= vexpress-config.o vexpress-sysreg.o
 obj-$(CONFIG_MFD_RETU)		+= retu-mfd.o
 obj-$(CONFIG_MFD_AS3711)	+= as3711.o
+obj-$(CONFIG_MFD_AS3722)	+= as3722.o
 obj-$(CONFIG_MFD_STW481X)	+= stw481x.o
diff --git a/drivers/mfd/as3722.c b/drivers/mfd/as3722.c
new file mode 100644
index 000000000000..cd8ee593cfa8
--- /dev/null
+++ b/drivers/mfd/as3722.c
@@ -0,0 +1,449 @@
+/*
+ * Core driver for ams AS3722 PMICs
+ *
+ * Copyright (C) 2013 AMS AG
+ * Copyright (c) 2013, NVIDIA Corporation. All rights reserved.
+ *
+ * Author: Florian Lobmaier <florian.lobmaier@ams.com>
+ * Author: Laxman Dewangan <ldewangan@nvidia.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#include <linux/err.h>
+#include <linux/i2c.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mfd/core.h>
+#include <linux/mfd/as3722.h>
+#include <linux/of.h>
+#include <linux/regmap.h>
+#include <linux/slab.h>
+
+#define AS3722_DEVICE_ID	0x0C
+
+static const struct resource as3722_rtc_resource[] = {
+	{
+		.name = "as3722-rtc-alarm",
+		.start = AS3722_IRQ_RTC_ALARM,
+		.end = AS3722_IRQ_RTC_ALARM,
+		.flags = IORESOURCE_IRQ,
+	},
+};
+
+static const struct resource as3722_adc_resource[] = {
+	{
+		.name = "as3722-adc",
+		.start = AS3722_IRQ_ADC,
+		.end = AS3722_IRQ_ADC,
+		.flags = IORESOURCE_IRQ,
+	},
+};
+
+static struct mfd_cell as3722_devs[] = {
+	{
+		.name = "as3722-pinctrl",
+	},
+	{
+		.name = "as3722-regulator",
+	},
+	{
+		.name = "as3722-rtc",
+		.num_resources = ARRAY_SIZE(as3722_rtc_resource),
+		.resources = as3722_rtc_resource,
+	},
+	{
+		.name = "as3722-adc",
+		.num_resources = ARRAY_SIZE(as3722_adc_resource),
+		.resources = as3722_adc_resource,
+	},
+	{
+		.name = "as3722-power-off",
+	},
+};
+
+static const struct regmap_irq as3722_irqs[] = {
+	/* INT1 IRQs */
+	[AS3722_IRQ_LID] = {
+		.mask = AS3722_INTERRUPT_MASK1_LID,
+	},
+	[AS3722_IRQ_ACOK] = {
+		.mask = AS3722_INTERRUPT_MASK1_ACOK,
+	},
+	[AS3722_IRQ_ENABLE1] = {
+		.mask = AS3722_INTERRUPT_MASK1_ENABLE1,
+	},
+	[AS3722_IRQ_OCCUR_ALARM_SD0] = {
+		.mask = AS3722_INTERRUPT_MASK1_OCURR_ALARM_SD0,
+	},
+	[AS3722_IRQ_ONKEY_LONG_PRESS] = {
+		.mask = AS3722_INTERRUPT_MASK1_ONKEY_LONG,
+	},
+	[AS3722_IRQ_ONKEY] = {
+		.mask = AS3722_INTERRUPT_MASK1_ONKEY,
+	},
+	[AS3722_IRQ_OVTMP] = {
+		.mask = AS3722_INTERRUPT_MASK1_OVTMP,
+	},
+	[AS3722_IRQ_LOWBAT] = {
+		.mask = AS3722_INTERRUPT_MASK1_LOWBAT,
+	},
+
+	/* INT2 IRQs */
+	[AS3722_IRQ_SD0_LV] = {
+		.mask = AS3722_INTERRUPT_MASK2_SD0_LV,
+		.reg_offset = 1,
+	},
+	[AS3722_IRQ_SD1_LV] = {
+		.mask = AS3722_INTERRUPT_MASK2_SD1_LV,
+		.reg_offset = 1,
+	},
+	[AS3722_IRQ_SD2_LV] = {
+		.mask = AS3722_INTERRUPT_MASK2_SD2345_LV,
+		.reg_offset = 1,
+	},
+	[AS3722_IRQ_PWM1_OV_PROT] = {
+		.mask = AS3722_INTERRUPT_MASK2_PWM1_OV_PROT,
+		.reg_offset = 1,
+	},
+	[AS3722_IRQ_PWM2_OV_PROT] = {
+		.mask = AS3722_INTERRUPT_MASK2_PWM2_OV_PROT,
+		.reg_offset = 1,
+	},
+	[AS3722_IRQ_ENABLE2] = {
+		.mask = AS3722_INTERRUPT_MASK2_ENABLE2,
+		.reg_offset = 1,
+	},
+	[AS3722_IRQ_SD6_LV] = {
+		.mask = AS3722_INTERRUPT_MASK2_SD6_LV,
+		.reg_offset = 1,
+	},
+	[AS3722_IRQ_RTC_REP] = {
+		.mask = AS3722_INTERRUPT_MASK2_RTC_REP,
+		.reg_offset = 1,
+	},
+
+	/* INT3 IRQs */
+	[AS3722_IRQ_RTC_ALARM] = {
+		.mask = AS3722_INTERRUPT_MASK3_RTC_ALARM,
+		.reg_offset = 2,
+	},
+	[AS3722_IRQ_GPIO1] = {
+		.mask = AS3722_INTERRUPT_MASK3_GPIO1,
+		.reg_offset = 2,
+	},
+	[AS3722_IRQ_GPIO2] = {
+		.mask = AS3722_INTERRUPT_MASK3_GPIO2,
+		.reg_offset = 2,
+	},
+	[AS3722_IRQ_GPIO3] = {
+		.mask = AS3722_INTERRUPT_MASK3_GPIO3,
+		.reg_offset = 2,
+	},
+	[AS3722_IRQ_GPIO4] = {
+		.mask = AS3722_INTERRUPT_MASK3_GPIO4,
+		.reg_offset = 2,
+	},
+	[AS3722_IRQ_GPIO5] = {
+		.mask = AS3722_INTERRUPT_MASK3_GPIO5,
+		.reg_offset = 2,
+	},
+	[AS3722_IRQ_WATCHDOG] = {
+		.mask = AS3722_INTERRUPT_MASK3_WATCHDOG,
+		.reg_offset = 2,
+	},
+	[AS3722_IRQ_ENABLE3] = {
+		.mask = AS3722_INTERRUPT_MASK3_ENABLE3,
+		.reg_offset = 2,
+	},
+
+	/* INT4 IRQs */
+	[AS3722_IRQ_TEMP_SD0_SHUTDOWN] = {
+		.mask = AS3722_INTERRUPT_MASK4_TEMP_SD0_SHUTDOWN,
+		.reg_offset = 3,
+	},
+	[AS3722_IRQ_TEMP_SD1_SHUTDOWN] = {
+		.mask = AS3722_INTERRUPT_MASK4_TEMP_SD1_SHUTDOWN,
+		.reg_offset = 3,
+	},
+	[AS3722_IRQ_TEMP_SD2_SHUTDOWN] = {
+		.mask = AS3722_INTERRUPT_MASK4_TEMP_SD6_SHUTDOWN,
+		.reg_offset = 3,
+	},
+	[AS3722_IRQ_TEMP_SD0_ALARM] = {
+		.mask = AS3722_INTERRUPT_MASK4_TEMP_SD0_ALARM,
+		.reg_offset = 3,
+	},
+	[AS3722_IRQ_TEMP_SD1_ALARM] = {
+		.mask = AS3722_INTERRUPT_MASK4_TEMP_SD1_ALARM,
+		.reg_offset = 3,
+	},
+	[AS3722_IRQ_TEMP_SD6_ALARM] = {
+		.mask = AS3722_INTERRUPT_MASK4_TEMP_SD6_ALARM,
+		.reg_offset = 3,
+	},
+	[AS3722_IRQ_OCCUR_ALARM_SD6] = {
+		.mask = AS3722_INTERRUPT_MASK4_OCCUR_ALARM_SD6,
+		.reg_offset = 3,
+	},
+	[AS3722_IRQ_ADC] = {
+		.mask = AS3722_INTERRUPT_MASK4_ADC,
+		.reg_offset = 3,
+	},
+};
+
+static const struct regmap_irq_chip as3722_irq_chip = {
+	.name = "as3722",
+	.irqs = as3722_irqs,
+	.num_irqs = ARRAY_SIZE(as3722_irqs),
+	.num_regs = 4,
+	.status_base = AS3722_INTERRUPT_STATUS1_REG,
+	.mask_base = AS3722_INTERRUPT_MASK1_REG,
+};
+
+static int as3722_check_device_id(struct as3722 *as3722)
+{
+	u32 val;
+	int ret;
+
+	/* Check that this is actually a AS3722 */
+	ret = as3722_read(as3722, AS3722_ASIC_ID1_REG, &val);
+	if (ret < 0) {
+		dev_err(as3722->dev, "ASIC_ID1 read failed: %d\n", ret);
+		return ret;
+	}
+
+	if (val != AS3722_DEVICE_ID) {
+		dev_err(as3722->dev, "Device is not AS3722, ID is 0x%x\n", val);
+		return -ENODEV;
+	}
+
+	ret = as3722_read(as3722, AS3722_ASIC_ID2_REG, &val);
+	if (ret < 0) {
+		dev_err(as3722->dev, "ASIC_ID2 read failed: %d\n", ret);
+		return ret;
+	}
+
+	dev_info(as3722->dev, "AS3722 with revision 0x%x found\n", val);
+	return 0;
+}
+
+static int as3722_configure_pullups(struct as3722 *as3722)
+{
+	int ret;
+	u32 val = 0;
+
+	if (as3722->en_intern_int_pullup)
+		val |= AS3722_INT_PULL_UP;
+	if (as3722->en_intern_i2c_pullup)
+		val |= AS3722_I2C_PULL_UP;
+
+	ret = as3722_update_bits(as3722, AS3722_IOVOLTAGE_REG,
+			AS3722_INT_PULL_UP | AS3722_I2C_PULL_UP, val);
+	if (ret < 0)
+		dev_err(as3722->dev, "IOVOLTAGE_REG update failed: %d\n", ret);
+	return ret;
+}
+
+static const struct regmap_range as3722_readable_ranges[] = {
+	regmap_reg_range(AS3722_SD0_VOLTAGE_REG, AS3722_SD6_VOLTAGE_REG),
+	regmap_reg_range(AS3722_GPIO0_CONTROL_REG, AS3722_LDO7_VOLTAGE_REG),
+	regmap_reg_range(AS3722_LDO9_VOLTAGE_REG, AS3722_REG_SEQU_MOD3_REG),
+	regmap_reg_range(AS3722_SD_PHSW_CTRL_REG, AS3722_PWM_CONTROL_H_REG),
+	regmap_reg_range(AS3722_WATCHDOG_TIMER_REG, AS3722_WATCHDOG_TIMER_REG),
+	regmap_reg_range(AS3722_WATCHDOG_SOFTWARE_SIGNAL_REG,
+					AS3722_BATTERY_VOLTAGE_MONITOR2_REG),
+	regmap_reg_range(AS3722_SD_CONTROL_REG, AS3722_PWM_VCONTROL4_REG),
+	regmap_reg_range(AS3722_BB_CHARGER_REG, AS3722_SRAM_REG),
+	regmap_reg_range(AS3722_RTC_ACCESS_REG, AS3722_RTC_ACCESS_REG),
+	regmap_reg_range(AS3722_RTC_STATUS_REG, AS3722_TEMP_STATUS_REG),
+	regmap_reg_range(AS3722_ADC0_CONTROL_REG, AS3722_ADC_CONFIGURATION_REG),
+	regmap_reg_range(AS3722_ASIC_ID1_REG, AS3722_ASIC_ID2_REG),
+	regmap_reg_range(AS3722_LOCK_REG, AS3722_LOCK_REG),
+};
+
+static const struct regmap_access_table as3722_readable_table = {
+	.yes_ranges = as3722_readable_ranges,
+	.n_yes_ranges = ARRAY_SIZE(as3722_readable_ranges),
+};
+
+static const struct regmap_range as3722_writable_ranges[] = {
+	regmap_reg_range(AS3722_SD0_VOLTAGE_REG, AS3722_SD6_VOLTAGE_REG),
+	regmap_reg_range(AS3722_GPIO0_CONTROL_REG, AS3722_LDO7_VOLTAGE_REG),
+	regmap_reg_range(AS3722_LDO9_VOLTAGE_REG, AS3722_GPIO_SIGNAL_OUT_REG),
+	regmap_reg_range(AS3722_REG_SEQU_MOD1_REG, AS3722_REG_SEQU_MOD3_REG),
+	regmap_reg_range(AS3722_SD_PHSW_CTRL_REG, AS3722_PWM_CONTROL_H_REG),
+	regmap_reg_range(AS3722_WATCHDOG_TIMER_REG, AS3722_WATCHDOG_TIMER_REG),
+	regmap_reg_range(AS3722_WATCHDOG_SOFTWARE_SIGNAL_REG,
+					AS3722_BATTERY_VOLTAGE_MONITOR2_REG),
+	regmap_reg_range(AS3722_SD_CONTROL_REG, AS3722_PWM_VCONTROL4_REG),
+	regmap_reg_range(AS3722_BB_CHARGER_REG, AS3722_SRAM_REG),
+	regmap_reg_range(AS3722_INTERRUPT_MASK1_REG, AS3722_TEMP_STATUS_REG),
+	regmap_reg_range(AS3722_ADC0_CONTROL_REG, AS3722_ADC1_CONTROL_REG),
+	regmap_reg_range(AS3722_ADC1_THRESHOLD_HI_MSB_REG,
+					AS3722_ADC_CONFIGURATION_REG),
+	regmap_reg_range(AS3722_LOCK_REG, AS3722_LOCK_REG),
+};
+
+static const struct regmap_access_table as3722_writable_table = {
+	.yes_ranges = as3722_writable_ranges,
+	.n_yes_ranges = ARRAY_SIZE(as3722_writable_ranges),
+};
+
+static const struct regmap_range as3722_cacheable_ranges[] = {
+	regmap_reg_range(AS3722_SD0_VOLTAGE_REG, AS3722_LDO11_VOLTAGE_REG),
+	regmap_reg_range(AS3722_SD_CONTROL_REG, AS3722_LDOCONTROL1_REG),
+};
+
+static const struct regmap_access_table as3722_volatile_table = {
+	.no_ranges = as3722_cacheable_ranges,
+	.n_no_ranges = ARRAY_SIZE(as3722_cacheable_ranges),
+};
+
+const struct regmap_config as3722_regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 8,
+	.max_register = AS3722_MAX_REGISTER,
+	.cache_type = REGCACHE_RBTREE,
+	.rd_table = &as3722_readable_table,
+	.wr_table = &as3722_writable_table,
+	.volatile_table = &as3722_volatile_table,
+};
+
+static int as3722_i2c_of_probe(struct i2c_client *i2c,
+			struct as3722 *as3722)
+{
+	struct device_node *np = i2c->dev.of_node;
+	struct irq_data *irq_data;
+
+	if (!np) {
+		dev_err(&i2c->dev, "Device Tree not found\n");
+		return -EINVAL;
+	}
+
+	irq_data = irq_get_irq_data(i2c->irq);
+	if (!irq_data) {
+		dev_err(&i2c->dev, "Invalid IRQ: %d\n", i2c->irq);
+		return -EINVAL;
+	}
+
+	as3722->en_intern_int_pullup = of_property_read_bool(np,
+					"ams,enable-internal-int-pullup");
+	as3722->en_intern_i2c_pullup = of_property_read_bool(np,
+					"ams,enable-internal-i2c-pullup");
+	as3722->irq_flags = irqd_get_trigger_type(irq_data);
+	dev_dbg(&i2c->dev, "IRQ flags are 0x%08lx\n", as3722->irq_flags);
+	return 0;
+}
+
+static int as3722_i2c_probe(struct i2c_client *i2c,
+			const struct i2c_device_id *id)
+{
+	struct as3722 *as3722;
+	unsigned long irq_flags;
+	int ret;
+
+	as3722 = devm_kzalloc(&i2c->dev, sizeof(struct as3722), GFP_KERNEL);
+	if (!as3722)
+		return -ENOMEM;
+
+	as3722->dev = &i2c->dev;
+	as3722->chip_irq = i2c->irq;
+	i2c_set_clientdata(i2c, as3722);
+
+	ret = as3722_i2c_of_probe(i2c, as3722);
+	if (ret < 0)
+		return ret;
+
+	as3722->regmap = devm_regmap_init_i2c(i2c, &as3722_regmap_config);
+	if (IS_ERR(as3722->regmap)) {
+		ret = PTR_ERR(as3722->regmap);
+		dev_err(&i2c->dev, "regmap init failed: %d\n", ret);
+		return ret;
+	}
+
+	ret = as3722_check_device_id(as3722);
+	if (ret < 0)
+		return ret;
+
+	irq_flags = as3722->irq_flags | IRQF_ONESHOT;
+	ret = regmap_add_irq_chip(as3722->regmap, as3722->chip_irq,
+			irq_flags, -1, &as3722_irq_chip,
+			&as3722->irq_data);
+	if (ret < 0) {
+		dev_err(as3722->dev, "Failed to add regmap irq: %d\n", ret);
+		return ret;
+	}
+
+	ret = as3722_configure_pullups(as3722);
+	if (ret < 0)
+		goto scrub;
+
+	ret = mfd_add_devices(&i2c->dev, -1, as3722_devs,
+			ARRAY_SIZE(as3722_devs), NULL, 0,
+			regmap_irq_get_domain(as3722->irq_data));
+	if (ret) {
+		dev_err(as3722->dev, "Failed to add MFD devices: %d\n", ret);
+		goto scrub;
+	}
+
+	dev_dbg(as3722->dev, "AS3722 core driver initialized successfully\n");
+	return 0;
+
+scrub:
+	regmap_del_irq_chip(as3722->chip_irq, as3722->irq_data);
+	return ret;
+}
+
+static int as3722_i2c_remove(struct i2c_client *i2c)
+{
+	struct as3722 *as3722 = i2c_get_clientdata(i2c);
+
+	mfd_remove_devices(as3722->dev);
+	regmap_del_irq_chip(as3722->chip_irq, as3722->irq_data);
+	return 0;
+}
+
+static const struct of_device_id as3722_of_match[] = {
+	{ .compatible = "ams,as3722", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, as3722_of_match);
+
+static const struct i2c_device_id as3722_i2c_id[] = {
+	{ "as3722", 0 },
+	{},
+};
+MODULE_DEVICE_TABLE(i2c, as3722_i2c_id);
+
+static struct i2c_driver as3722_i2c_driver = {
+	.driver = {
+		.name = "as3722",
+		.owner = THIS_MODULE,
+		.of_match_table = as3722_of_match,
+	},
+	.probe = as3722_i2c_probe,
+	.remove = as3722_i2c_remove,
+	.id_table = as3722_i2c_id,
+};
+
+module_i2c_driver(as3722_i2c_driver);
+
+MODULE_DESCRIPTION("I2C support for AS3722 PMICs");
+MODULE_AUTHOR("Florian Lobmaier <florian.lobmaier@ams.com>");
+MODULE_AUTHOR("Laxman Dewangan <ldewangan@nvidia.com>");
+MODULE_LICENSE("GPL");
diff --git a/include/dt-bindings/mfd/as3722.h b/include/dt-bindings/mfd/as3722.h
new file mode 100644
index 000000000000..0e692562d77b
--- /dev/null
+++ b/include/dt-bindings/mfd/as3722.h
@@ -0,0 +1,52 @@
+/*
+ * This header provides macros for ams AS3722 device bindings.
+ *
+ * Copyright (c) 2013, NVIDIA Corporation.
+ *
+ * Author: Laxman Dewangan <ldewangan@nvidia.com>
+ *
+ */
+
+#ifndef __DT_BINDINGS_AS3722_H__
+#define __DT_BINDINGS_AS3722_H__
+
+/* External control pins */
+#define AS3722_EXT_CONTROL_PIN_ENABLE1 1
+#define AS3722_EXT_CONTROL_PIN_ENABLE2 2
+#define AS3722_EXT_CONTROL_PIN_ENABLE2 3
+
+/* Interrupt numbers for AS3722 */
+#define AS3722_IRQ_LID			0
+#define AS3722_IRQ_ACOK			1
+#define AS3722_IRQ_ENABLE1		2
+#define AS3722_IRQ_OCCUR_ALARM_SD0	3
+#define AS3722_IRQ_ONKEY_LONG_PRESS	4
+#define AS3722_IRQ_ONKEY		5
+#define AS3722_IRQ_OVTMP		6
+#define AS3722_IRQ_LOWBAT		7
+#define AS3722_IRQ_SD0_LV		8
+#define AS3722_IRQ_SD1_LV		9
+#define AS3722_IRQ_SD2_LV		10
+#define AS3722_IRQ_PWM1_OV_PROT		11
+#define AS3722_IRQ_PWM2_OV_PROT		12
+#define AS3722_IRQ_ENABLE2		13
+#define AS3722_IRQ_SD6_LV		14
+#define AS3722_IRQ_RTC_REP		15
+#define AS3722_IRQ_RTC_ALARM		16
+#define AS3722_IRQ_GPIO1		17
+#define AS3722_IRQ_GPIO2		18
+#define AS3722_IRQ_GPIO3		19
+#define AS3722_IRQ_GPIO4		20
+#define AS3722_IRQ_GPIO5		21
+#define AS3722_IRQ_WATCHDOG		22
+#define AS3722_IRQ_ENABLE3		23
+#define AS3722_IRQ_TEMP_SD0_SHUTDOWN	24
+#define AS3722_IRQ_TEMP_SD1_SHUTDOWN	25
+#define AS3722_IRQ_TEMP_SD2_SHUTDOWN	26
+#define AS3722_IRQ_TEMP_SD0_ALARM	27
+#define AS3722_IRQ_TEMP_SD1_ALARM	28
+#define AS3722_IRQ_TEMP_SD6_ALARM	29
+#define AS3722_IRQ_OCCUR_ALARM_SD6	30
+#define AS3722_IRQ_ADC			31
+
+#endif /* __DT_BINDINGS_AS3722_H__ */
diff --git a/include/linux/mfd/as3722.h b/include/linux/mfd/as3722.h
new file mode 100644
index 000000000000..16bf8a0dcd97
--- /dev/null
+++ b/include/linux/mfd/as3722.h
@@ -0,0 +1,423 @@
+/*
+ * as3722 definitions
+ *
+ * Copyright (C) 2013 ams
+ * Copyright (c) 2013, NVIDIA Corporation. All rights reserved.
+ *
+ * Author: Florian Lobmaier <florian.lobmaier@ams.com>
+ * Author: Laxman Dewangan <ldewangan@nvidia.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ *
+ */
+
+#ifndef __LINUX_MFD_AS3722_H__
+#define __LINUX_MFD_AS3722_H__
+
+#include <linux/regmap.h>
+
+/* AS3722 registers */
+#define AS3722_SD0_VOLTAGE_REG				0x00
+#define AS3722_SD1_VOLTAGE_REG				0x01
+#define AS3722_SD2_VOLTAGE_REG				0x02
+#define AS3722_SD3_VOLTAGE_REG				0x03
+#define AS3722_SD4_VOLTAGE_REG				0x04
+#define AS3722_SD5_VOLTAGE_REG				0x05
+#define AS3722_SD6_VOLTAGE_REG				0x06
+#define AS3722_GPIO0_CONTROL_REG			0x08
+#define AS3722_GPIO1_CONTROL_REG			0x09
+#define AS3722_GPIO2_CONTROL_REG			0x0A
+#define AS3722_GPIO3_CONTROL_REG			0x0B
+#define AS3722_GPIO4_CONTROL_REG			0x0C
+#define AS3722_GPIO5_CONTROL_REG			0x0D
+#define AS3722_GPIO6_CONTROL_REG			0x0E
+#define AS3722_GPIO7_CONTROL_REG			0x0F
+#define AS3722_LDO0_VOLTAGE_REG				0x10
+#define AS3722_LDO1_VOLTAGE_REG				0x11
+#define AS3722_LDO2_VOLTAGE_REG				0x12
+#define AS3722_LDO3_VOLTAGE_REG				0x13
+#define AS3722_LDO4_VOLTAGE_REG				0x14
+#define AS3722_LDO5_VOLTAGE_REG				0x15
+#define AS3722_LDO6_VOLTAGE_REG				0x16
+#define AS3722_LDO7_VOLTAGE_REG				0x17
+#define AS3722_LDO9_VOLTAGE_REG				0x19
+#define AS3722_LDO10_VOLTAGE_REG			0x1A
+#define AS3722_LDO11_VOLTAGE_REG			0x1B
+#define AS3722_GPIO_DEB1_REG				0x1E
+#define AS3722_GPIO_DEB2_REG				0x1F
+#define AS3722_GPIO_SIGNAL_OUT_REG			0x20
+#define AS3722_GPIO_SIGNAL_IN_REG			0x21
+#define AS3722_REG_SEQU_MOD1_REG			0x22
+#define AS3722_REG_SEQU_MOD2_REG			0x23
+#define AS3722_REG_SEQU_MOD3_REG			0x24
+#define AS3722_SD_PHSW_CTRL_REG				0x27
+#define AS3722_SD_PHSW_STATUS				0x28
+#define AS3722_SD0_CONTROL_REG				0x29
+#define AS3722_SD1_CONTROL_REG				0x2A
+#define AS3722_SDmph_CONTROL_REG			0x2B
+#define AS3722_SD23_CONTROL_REG				0x2C
+#define AS3722_SD4_CONTROL_REG				0x2D
+#define AS3722_SD5_CONTROL_REG				0x2E
+#define AS3722_SD6_CONTROL_REG				0x2F
+#define AS3722_SD_DVM_REG				0x30
+#define AS3722_RESET_REASON_REG				0x31
+#define AS3722_BATTERY_VOLTAGE_MONITOR_REG		0x32
+#define AS3722_STARTUP_CONTROL_REG			0x33
+#define AS3722_RESET_TIMER_REG				0x34
+#define AS3722_REFERENCE_CONTROL_REG			0x35
+#define AS3722_RESET_CONTROL_REG			0x36
+#define AS3722_OVER_TEMP_CONTROL_REG			0x37
+#define AS3722_WATCHDOG_CONTROL_REG			0x38
+#define AS3722_REG_STANDBY_MOD1_REG			0x39
+#define AS3722_REG_STANDBY_MOD2_REG			0x3A
+#define AS3722_REG_STANDBY_MOD3_REG			0x3B
+#define AS3722_ENABLE_CTRL1_REG				0x3C
+#define AS3722_ENABLE_CTRL2_REG				0x3D
+#define AS3722_ENABLE_CTRL3_REG				0x3E
+#define AS3722_ENABLE_CTRL4_REG				0x3F
+#define AS3722_ENABLE_CTRL5_REG				0x40
+#define AS3722_PWM_CONTROL_L_REG			0x41
+#define AS3722_PWM_CONTROL_H_REG			0x42
+#define AS3722_WATCHDOG_TIMER_REG			0x46
+#define AS3722_WATCHDOG_SOFTWARE_SIGNAL_REG		0x48
+#define AS3722_IOVOLTAGE_REG				0x49
+#define AS3722_BATTERY_VOLTAGE_MONITOR2_REG		0x4A
+#define AS3722_SD_CONTROL_REG				0x4D
+#define AS3722_LDOCONTROL0_REG				0x4E
+#define AS3722_LDOCONTROL1_REG				0x4F
+#define AS3722_SD0_PROTECT_REG				0x50
+#define AS3722_SD6_PROTECT_REG				0x51
+#define AS3722_PWM_VCONTROL1_REG			0x52
+#define AS3722_PWM_VCONTROL2_REG			0x53
+#define AS3722_PWM_VCONTROL3_REG			0x54
+#define AS3722_PWM_VCONTROL4_REG			0x55
+#define AS3722_BB_CHARGER_REG				0x57
+#define AS3722_CTRL_SEQU1_REG				0x58
+#define AS3722_CTRL_SEQU2_REG				0x59
+#define AS3722_OVCURRENT_REG				0x5A
+#define AS3722_OVCURRENT_DEB_REG			0x5B
+#define AS3722_SDLV_DEB_REG				0x5C
+#define AS3722_OC_PG_CTRL_REG				0x5D
+#define AS3722_OC_PG_CTRL2_REG				0x5E
+#define AS3722_CTRL_STATUS				0x5F
+#define AS3722_RTC_CONTROL_REG				0x60
+#define AS3722_RTC_SECOND_REG				0x61
+#define AS3722_RTC_MINUTE_REG				0x62
+#define AS3722_RTC_HOUR_REG				0x63
+#define AS3722_RTC_DAY_REG				0x64
+#define AS3722_RTC_MONTH_REG				0x65
+#define AS3722_RTC_YEAR_REG				0x66
+#define AS3722_RTC_ALARM_SECOND_REG			0x67
+#define AS3722_RTC_ALARM_MINUTE_REG			0x68
+#define AS3722_RTC_ALARM_HOUR_REG			0x69
+#define AS3722_RTC_ALARM_DAY_REG			0x6A
+#define AS3722_RTC_ALARM_MONTH_REG			0x6B
+#define AS3722_RTC_ALARM_YEAR_REG			0x6C
+#define AS3722_SRAM_REG					0x6D
+#define AS3722_RTC_ACCESS_REG				0x6F
+#define AS3722_RTC_STATUS_REG				0x73
+#define AS3722_INTERRUPT_MASK1_REG			0x74
+#define AS3722_INTERRUPT_MASK2_REG			0x75
+#define AS3722_INTERRUPT_MASK3_REG			0x76
+#define AS3722_INTERRUPT_MASK4_REG			0x77
+#define AS3722_INTERRUPT_STATUS1_REG			0x78
+#define AS3722_INTERRUPT_STATUS2_REG			0x79
+#define AS3722_INTERRUPT_STATUS3_REG			0x7A
+#define AS3722_INTERRUPT_STATUS4_REG			0x7B
+#define AS3722_TEMP_STATUS_REG				0x7D
+#define AS3722_ADC0_CONTROL_REG				0x80
+#define AS3722_ADC1_CONTROL_REG				0x81
+#define AS3722_ADC0_MSB_RESULT_REG			0x82
+#define AS3722_ADC0_LSB_RESULT_REG			0x83
+#define AS3722_ADC1_MSB_RESULT_REG			0x84
+#define AS3722_ADC1_LSB_RESULT_REG			0x85
+#define AS3722_ADC1_THRESHOLD_HI_MSB_REG		0x86
+#define AS3722_ADC1_THRESHOLD_HI_LSB_REG		0x87
+#define AS3722_ADC1_THRESHOLD_LO_MSB_REG		0x88
+#define AS3722_ADC1_THRESHOLD_LO_LSB_REG		0x89
+#define AS3722_ADC_CONFIGURATION_REG			0x8A
+#define AS3722_ASIC_ID1_REG				0x90
+#define AS3722_ASIC_ID2_REG				0x91
+#define AS3722_LOCK_REG					0x9E
+#define AS3722_MAX_REGISTER				0xF4
+
+#define AS3722_SD0_EXT_ENABLE_MASK			0x03
+#define AS3722_SD1_EXT_ENABLE_MASK			0x0C
+#define AS3722_SD2_EXT_ENABLE_MASK			0x30
+#define AS3722_SD3_EXT_ENABLE_MASK			0xC0
+#define AS3722_SD4_EXT_ENABLE_MASK			0x03
+#define AS3722_SD5_EXT_ENABLE_MASK			0x0C
+#define AS3722_SD6_EXT_ENABLE_MASK			0x30
+#define AS3722_LDO0_EXT_ENABLE_MASK			0x03
+#define AS3722_LDO1_EXT_ENABLE_MASK			0x0C
+#define AS3722_LDO2_EXT_ENABLE_MASK			0x30
+#define AS3722_LDO3_EXT_ENABLE_MASK			0xC0
+#define AS3722_LDO4_EXT_ENABLE_MASK			0x03
+#define AS3722_LDO5_EXT_ENABLE_MASK			0x0C
+#define AS3722_LDO6_EXT_ENABLE_MASK			0x30
+#define AS3722_LDO7_EXT_ENABLE_MASK			0xC0
+#define AS3722_LDO9_EXT_ENABLE_MASK			0x0C
+#define AS3722_LDO10_EXT_ENABLE_MASK			0x30
+#define AS3722_LDO11_EXT_ENABLE_MASK			0xC0
+
+#define AS3722_OVCURRENT_SD0_ALARM_MASK			0x07
+#define AS3722_OVCURRENT_SD0_ALARM_SHIFT		0x01
+#define AS3722_OVCURRENT_SD0_TRIP_MASK			0x18
+#define AS3722_OVCURRENT_SD0_TRIP_SHIFT			0x03
+#define AS3722_OVCURRENT_SD1_TRIP_MASK			0x60
+#define AS3722_OVCURRENT_SD1_TRIP_SHIFT			0x05
+
+#define AS3722_OVCURRENT_SD6_ALARM_MASK			0x07
+#define AS3722_OVCURRENT_SD6_ALARM_SHIFT		0x01
+#define AS3722_OVCURRENT_SD6_TRIP_MASK			0x18
+#define AS3722_OVCURRENT_SD6_TRIP_SHIFT			0x03
+
+/* AS3722 register bits and bit masks */
+#define AS3722_LDO_ILIMIT_MASK				BIT(7)
+#define AS3722_LDO_ILIMIT_BIT				BIT(7)
+#define AS3722_LDO0_VSEL_MASK				0x1F
+#define AS3722_LDO0_VSEL_MIN				0x01
+#define AS3722_LDO0_VSEL_MAX				0x12
+#define AS3722_LDO0_NUM_VOLT				0x12
+#define AS3722_LDO3_VSEL_MASK				0x3F
+#define AS3722_LDO3_VSEL_MIN				0x01
+#define AS3722_LDO3_VSEL_MAX				0x2D
+#define AS3722_LDO3_NUM_VOLT				0x2D
+#define AS3722_LDO_VSEL_MASK				0x7F
+#define AS3722_LDO_VSEL_MIN				0x01
+#define AS3722_LDO_VSEL_MAX				0x7F
+#define AS3722_LDO_VSEL_DNU_MIN				0x25
+#define AS3722_LDO_VSEL_DNU_MAX				0x3F
+#define AS3722_LDO_NUM_VOLT				0x80
+
+#define AS3722_LDO0_CTRL				BIT(0)
+#define AS3722_LDO1_CTRL				BIT(1)
+#define AS3722_LDO2_CTRL				BIT(2)
+#define AS3722_LDO3_CTRL				BIT(3)
+#define AS3722_LDO4_CTRL				BIT(4)
+#define AS3722_LDO5_CTRL				BIT(5)
+#define AS3722_LDO6_CTRL				BIT(6)
+#define AS3722_LDO7_CTRL				BIT(7)
+#define AS3722_LDO9_CTRL				BIT(1)
+#define AS3722_LDO10_CTRL				BIT(2)
+#define AS3722_LDO11_CTRL				BIT(3)
+
+#define AS3722_LDO3_MODE_MASK				(3 << 6)
+#define AS3722_LDO3_MODE_VAL(n)				(((n) & 0x3) << 6)
+#define AS3722_LDO3_MODE_PMOS				AS3722_LDO3_MODE_VAL(0)
+#define AS3722_LDO3_MODE_PMOS_TRACKING			AS3722_LDO3_MODE_VAL(1)
+#define AS3722_LDO3_MODE_NMOS				AS3722_LDO3_MODE_VAL(2)
+#define AS3722_LDO3_MODE_SWITCH				AS3722_LDO3_MODE_VAL(3)
+
+#define AS3722_SD_VSEL_MASK				0x7F
+#define AS3722_SD0_VSEL_MIN				0x01
+#define AS3722_SD0_VSEL_MAX				0x5A
+#define AS3722_SD2_VSEL_MIN				0x01
+#define AS3722_SD2_VSEL_MAX				0x7F
+
+#define AS3722_SDn_CTRL(n)				BIT(n)
+
+#define AS3722_SD0_MODE_FAST				BIT(4)
+#define AS3722_SD1_MODE_FAST				BIT(4)
+#define AS3722_SD2_MODE_FAST				BIT(2)
+#define AS3722_SD3_MODE_FAST				BIT(6)
+#define AS3722_SD4_MODE_FAST				BIT(2)
+#define AS3722_SD5_MODE_FAST				BIT(2)
+#define AS3722_SD6_MODE_FAST				BIT(4)
+
+#define AS3722_POWER_OFF				BIT(1)
+
+#define AS3722_INTERRUPT_MASK1_LID			BIT(0)
+#define AS3722_INTERRUPT_MASK1_ACOK			BIT(1)
+#define AS3722_INTERRUPT_MASK1_ENABLE1			BIT(2)
+#define AS3722_INTERRUPT_MASK1_OCURR_ALARM_SD0		BIT(3)
+#define AS3722_INTERRUPT_MASK1_ONKEY_LONG		BIT(4)
+#define AS3722_INTERRUPT_MASK1_ONKEY			BIT(5)
+#define AS3722_INTERRUPT_MASK1_OVTMP			BIT(6)
+#define AS3722_INTERRUPT_MASK1_LOWBAT			BIT(7)
+
+#define AS3722_INTERRUPT_MASK2_SD0_LV			BIT(0)
+#define AS3722_INTERRUPT_MASK2_SD1_LV			BIT(1)
+#define AS3722_INTERRUPT_MASK2_SD2345_LV		BIT(2)
+#define AS3722_INTERRUPT_MASK2_PWM1_OV_PROT		BIT(3)
+#define AS3722_INTERRUPT_MASK2_PWM2_OV_PROT		BIT(4)
+#define AS3722_INTERRUPT_MASK2_ENABLE2			BIT(5)
+#define AS3722_INTERRUPT_MASK2_SD6_LV			BIT(6)
+#define AS3722_INTERRUPT_MASK2_RTC_REP			BIT(7)
+
+#define AS3722_INTERRUPT_MASK3_RTC_ALARM		BIT(0)
+#define AS3722_INTERRUPT_MASK3_GPIO1			BIT(1)
+#define AS3722_INTERRUPT_MASK3_GPIO2			BIT(2)
+#define AS3722_INTERRUPT_MASK3_GPIO3			BIT(3)
+#define AS3722_INTERRUPT_MASK3_GPIO4			BIT(4)
+#define AS3722_INTERRUPT_MASK3_GPIO5			BIT(5)
+#define AS3722_INTERRUPT_MASK3_WATCHDOG			BIT(6)
+#define AS3722_INTERRUPT_MASK3_ENABLE3			BIT(7)
+
+#define AS3722_INTERRUPT_MASK4_TEMP_SD0_SHUTDOWN	BIT(0)
+#define AS3722_INTERRUPT_MASK4_TEMP_SD1_SHUTDOWN	BIT(1)
+#define AS3722_INTERRUPT_MASK4_TEMP_SD6_SHUTDOWN	BIT(2)
+#define AS3722_INTERRUPT_MASK4_TEMP_SD0_ALARM		BIT(3)
+#define AS3722_INTERRUPT_MASK4_TEMP_SD1_ALARM		BIT(4)
+#define AS3722_INTERRUPT_MASK4_TEMP_SD6_ALARM		BIT(5)
+#define AS3722_INTERRUPT_MASK4_OCCUR_ALARM_SD6		BIT(6)
+#define AS3722_INTERRUPT_MASK4_ADC			BIT(7)
+
+#define AS3722_ADC1_INTERVAL_TIME			BIT(0)
+#define AS3722_ADC1_INT_MODE_ON				BIT(1)
+#define AS3722_ADC_BUF_ON				BIT(2)
+#define AS3722_ADC1_LOW_VOLTAGE_RANGE			BIT(5)
+#define AS3722_ADC1_INTEVAL_SCAN			BIT(6)
+#define AS3722_ADC1_INT_MASK				BIT(7)
+
+#define AS3722_ADC_MSB_VAL_MASK				0x7F
+#define AS3722_ADC_LSB_VAL_MASK				0x07
+
+#define AS3722_ADC0_CONV_START				BIT(7)
+#define AS3722_ADC0_CONV_NOTREADY			BIT(7)
+#define AS3722_ADC0_SOURCE_SELECT_MASK			0x1F
+
+#define AS3722_ADC1_CONV_START				BIT(7)
+#define AS3722_ADC1_CONV_NOTREADY			BIT(7)
+#define AS3722_ADC1_SOURCE_SELECT_MASK			0x1F
+
+/* GPIO modes */
+#define AS3722_GPIO_MODE_MASK				0x07
+#define AS3722_GPIO_MODE_INPUT				0x00
+#define AS3722_GPIO_MODE_OUTPUT_VDDH			0x01
+#define AS3722_GPIO_MODE_IO_OPEN_DRAIN			0x02
+#define AS3722_GPIO_MODE_ADC_IN				0x03
+#define AS3722_GPIO_MODE_INPUT_PULL_UP			0x04
+#define AS3722_GPIO_MODE_INPUT_PULL_DOWN		0x05
+#define AS3722_GPIO_MODE_IO_OPEN_DRAIN_PULL_UP		0x06
+#define AS3722_GPIO_MODE_OUTPUT_VDDL			0x07
+#define AS3722_GPIO_MODE_VAL(n)			((n) & AS3722_GPIO_MODE_MASK)
+
+#define AS3722_GPIO_INV					BIT(7)
+#define AS3722_GPIO_IOSF_MASK				0x78
+#define AS3722_GPIO_IOSF_VAL(n)				(((n) & 0xF) << 3)
+#define AS3722_GPIO_IOSF_NORMAL				AS3722_GPIO_IOSF_VAL(0)
+#define AS3722_GPIO_IOSF_INTERRUPT_OUT			AS3722_GPIO_IOSF_VAL(1)
+#define AS3722_GPIO_IOSF_VSUP_LOW_OUT			AS3722_GPIO_IOSF_VAL(2)
+#define AS3722_GPIO_IOSF_GPIO_INTERRUPT_IN		AS3722_GPIO_IOSF_VAL(3)
+#define AS3722_GPIO_IOSF_ISINK_PWM_IN			AS3722_GPIO_IOSF_VAL(4)
+#define AS3722_GPIO_IOSF_VOLTAGE_STBY			AS3722_GPIO_IOSF_VAL(5)
+#define AS3722_GPIO_IOSF_PWR_GOOD_OUT			AS3722_GPIO_IOSF_VAL(7)
+#define AS3722_GPIO_IOSF_Q32K_OUT			AS3722_GPIO_IOSF_VAL(8)
+#define AS3722_GPIO_IOSF_WATCHDOG_IN			AS3722_GPIO_IOSF_VAL(9)
+#define AS3722_GPIO_IOSF_SOFT_RESET_IN			AS3722_GPIO_IOSF_VAL(11)
+#define AS3722_GPIO_IOSF_PWM_OUT			AS3722_GPIO_IOSF_VAL(12)
+#define AS3722_GPIO_IOSF_VSUP_LOW_DEB_OUT		AS3722_GPIO_IOSF_VAL(13)
+#define AS3722_GPIO_IOSF_SD6_LOW_VOLT_LOW		AS3722_GPIO_IOSF_VAL(14)
+
+#define AS3722_GPIOn_SIGNAL(n)				BIT(n)
+#define AS3722_GPIOn_CONTROL_REG(n)		(AS3722_GPIO0_CONTROL_REG + n)
+#define AS3722_I2C_PULL_UP				BIT(4)
+#define AS3722_INT_PULL_UP				BIT(5)
+
+#define AS3722_RTC_REP_WAKEUP_EN			BIT(0)
+#define AS3722_RTC_ALARM_WAKEUP_EN			BIT(1)
+#define AS3722_RTC_ON					BIT(2)
+#define AS3722_RTC_IRQMODE				BIT(3)
+#define AS3722_RTC_CLK32K_OUT_EN			BIT(5)
+
+#define AS3722_WATCHDOG_TIMER_MAX			0x7F
+#define AS3722_WATCHDOG_ON				BIT(0)
+#define AS3722_WATCHDOG_SW_SIG				BIT(0)
+
+#define AS3722_EXT_CONTROL_ENABLE1			0x1
+#define AS3722_EXT_CONTROL_ENABLE2			0x2
+#define AS3722_EXT_CONTROL_ENABLE3			0x3
+
+/* Interrupt IDs */
+enum as3722_irq {
+	AS3722_IRQ_LID,
+	AS3722_IRQ_ACOK,
+	AS3722_IRQ_ENABLE1,
+	AS3722_IRQ_OCCUR_ALARM_SD0,
+	AS3722_IRQ_ONKEY_LONG_PRESS,
+	AS3722_IRQ_ONKEY,
+	AS3722_IRQ_OVTMP,
+	AS3722_IRQ_LOWBAT,
+	AS3722_IRQ_SD0_LV,
+	AS3722_IRQ_SD1_LV,
+	AS3722_IRQ_SD2_LV,
+	AS3722_IRQ_PWM1_OV_PROT,
+	AS3722_IRQ_PWM2_OV_PROT,
+	AS3722_IRQ_ENABLE2,
+	AS3722_IRQ_SD6_LV,
+	AS3722_IRQ_RTC_REP,
+	AS3722_IRQ_RTC_ALARM,
+	AS3722_IRQ_GPIO1,
+	AS3722_IRQ_GPIO2,
+	AS3722_IRQ_GPIO3,
+	AS3722_IRQ_GPIO4,
+	AS3722_IRQ_GPIO5,
+	AS3722_IRQ_WATCHDOG,
+	AS3722_IRQ_ENABLE3,
+	AS3722_IRQ_TEMP_SD0_SHUTDOWN,
+	AS3722_IRQ_TEMP_SD1_SHUTDOWN,
+	AS3722_IRQ_TEMP_SD2_SHUTDOWN,
+	AS3722_IRQ_TEMP_SD0_ALARM,
+	AS3722_IRQ_TEMP_SD1_ALARM,
+	AS3722_IRQ_TEMP_SD6_ALARM,
+	AS3722_IRQ_OCCUR_ALARM_SD6,
+	AS3722_IRQ_ADC,
+	AS3722_IRQ_MAX,
+};
+
+struct as3722 {
+	struct device *dev;
+	struct regmap *regmap;
+	int chip_irq;
+	unsigned long irq_flags;
+	bool en_intern_int_pullup;
+	bool en_intern_i2c_pullup;
+	struct regmap_irq_chip_data *irq_data;
+};
+
+static inline int as3722_read(struct as3722 *as3722, u32 reg, u32 *dest)
+{
+	return regmap_read(as3722->regmap, reg, dest);
+}
+
+static inline int as3722_write(struct as3722 *as3722, u32 reg, u32 value)
+{
+	return regmap_write(as3722->regmap, reg, value);
+}
+
+static inline int as3722_block_read(struct as3722 *as3722, u32 reg,
+		int count, u8 *buf)
+{
+	return regmap_bulk_read(as3722->regmap, reg, buf, count);
+}
+
+static inline int as3722_block_write(struct as3722 *as3722, u32 reg,
+		int count, u8 *data)
+{
+	return regmap_bulk_write(as3722->regmap, reg, data, count);
+}
+
+static inline int as3722_update_bits(struct as3722 *as3722, u32 reg,
+		u32 mask, u8 val)
+{
+	return regmap_update_bits(as3722->regmap, reg, mask, val);
+}
+
+static inline int as3722_irq_get_virq(struct as3722 *as3722, int irq)
+{
+	return regmap_irq_get_virq(as3722->irq_data, irq);
+}
+#endif /* __LINUX_MFD_AS3722_H__ */
-- 
cgit v1.2.3


From df73de9b0d412915384396637bf67ef9208161e9 Mon Sep 17 00:00:00 2001
From: Peter Chen <peter.chen@freescale.com>
Date: Tue, 22 Oct 2013 16:46:25 +0800
Subject: mfd: syscon: Return -ENOSYS if CONFIG_MFD_SYSCON is not enabled

Some platforms may not define CONFIG_MFD_SYSCON (or haven't syscon),
it can fix build error for these platforms.

Signed-off-by: Peter Chen <peter.chen@freescale.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/linux/mfd/syscon.h | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mfd/syscon.h b/include/linux/mfd/syscon.h
index b473577f36db..8789fa3c7fd9 100644
--- a/include/linux/mfd/syscon.h
+++ b/include/linux/mfd/syscon.h
@@ -17,10 +17,35 @@
 
 struct device_node;
 
+#ifdef CONFIG_MFD_SYSCON
 extern struct regmap *syscon_node_to_regmap(struct device_node *np);
 extern struct regmap *syscon_regmap_lookup_by_compatible(const char *s);
 extern struct regmap *syscon_regmap_lookup_by_pdevname(const char *s);
 extern struct regmap *syscon_regmap_lookup_by_phandle(
 					struct device_node *np,
 					const char *property);
+#else
+static inline struct regmap *syscon_node_to_regmap(struct device_node *np)
+{
+	return ERR_PTR(-ENOSYS);
+}
+
+static inline struct regmap *syscon_regmap_lookup_by_compatible(const char *s)
+{
+	return ERR_PTR(-ENOSYS);
+}
+
+static inline struct regmap *syscon_regmap_lookup_by_pdevname(const char *s)
+{
+	return ERR_PTR(-ENOSYS);
+}
+
+static inline struct regmap *syscon_regmap_lookup_by_phandle(
+					struct device_node *np,
+					const char *property)
+{
+	return ERR_PTR(-ENOSYS);
+}
+#endif
+
 #endif /* __LINUX_MFD_SYSCON_H__ */
-- 
cgit v1.2.3


From 4b3db708b114fc35ff1e0cd28a2bfb1490dbb5d3 Mon Sep 17 00:00:00 2001
From: "Chen, Gong" <gong.chen@linux.intel.com>
Date: Mon, 21 Oct 2013 14:29:25 -0700
Subject: ACPI, x86: Extended error log driver for x86 platform

This H/W error log driver (a.k.a eMCA driver) is implemented based on
http://www.intel.com/content/www/us/en/architecture-and-technology/enhanced-mca-logging-xeon-paper.html

After errors are captured, more detailed platform specific information
can be got via this new enhanced H/W error log driver. Most notably we
can track memory errors back to the DIMM slot silk screen label.

Signed-off-by: Chen, Gong <gong.chen@linux.intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/x86/include/asm/mce.h |   1 +
 drivers/acpi/Kconfig       |  19 +++
 drivers/acpi/Makefile      |   2 +
 drivers/acpi/acpi_extlog.c | 327 +++++++++++++++++++++++++++++++++++++++++++++
 drivers/acpi/bus.c         |   3 +-
 include/linux/acpi.h       |   1 +
 6 files changed, 352 insertions(+), 1 deletion(-)
 create mode 100644 drivers/acpi/acpi_extlog.c

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index cbe6b9e404ce..c696a8687567 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -16,6 +16,7 @@
 #define MCG_EXT_CNT_SHIFT	16
 #define MCG_EXT_CNT(c)		(((c) & MCG_EXT_CNT_MASK) >> MCG_EXT_CNT_SHIFT)
 #define MCG_SER_P		(1ULL<<24)   /* MCA recovery/new status bits */
+#define MCG_ELOG_P		(1ULL<<26)   /* Extended error log supported */
 
 /* MCG_STATUS register defines */
 #define MCG_STATUS_RIPV  (1ULL<<0)   /* restart ip valid */
diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index 6efe2ac6902f..252f0e818a49 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -372,4 +372,23 @@ config ACPI_BGRT
 
 source "drivers/acpi/apei/Kconfig"
 
+config ACPI_EXTLOG
+	tristate "Extended Error Log support"
+	depends on X86_MCE && ACPI_APEI
+	default n
+	help
+	  Certain usages such as Predictive Failure Analysis (PFA) require
+	  more information about the error than what can be described in
+	  processor machine check banks. Most server processors log
+	  additional information about the error in processor uncore
+	  registers. Since the addresses and layout of these registers vary
+	  widely from one processor to another, system software cannot
+	  readily make use of them. To complicate matters further, some of
+	  the additional error information cannot be constructed without
+	  detailed knowledge about platform topology.
+
+	  Enhanced MCA Logging allows firmware to provide additional error
+	  information to system software, synchronous with MCE or CMCI. This
+	  driver adds support for that functionality.
+
 endif	# ACPI
diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile
index cdaf68b58b00..bce34afadcd0 100644
--- a/drivers/acpi/Makefile
+++ b/drivers/acpi/Makefile
@@ -82,3 +82,5 @@ processor-$(CONFIG_CPU_FREQ)	+= processor_perflib.o
 obj-$(CONFIG_ACPI_PROCESSOR_AGGREGATOR) += acpi_pad.o
 
 obj-$(CONFIG_ACPI_APEI)		+= apei/
+
+obj-$(CONFIG_ACPI_EXTLOG)	+= acpi_extlog.o
diff --git a/drivers/acpi/acpi_extlog.c b/drivers/acpi/acpi_extlog.c
new file mode 100644
index 000000000000..a6869e110ce5
--- /dev/null
+++ b/drivers/acpi/acpi_extlog.c
@@ -0,0 +1,327 @@
+/*
+ * Extended Error Log driver
+ *
+ * Copyright (C) 2013 Intel Corp.
+ * Author: Chen, Gong <gong.chen@intel.com>
+ *
+ * This file is licensed under GPLv2.
+ */
+
+#include <linux/module.h>
+#include <linux/acpi.h>
+#include <acpi/acpi_bus.h>
+#include <linux/cper.h>
+#include <linux/ratelimit.h>
+#include <asm/cpu.h>
+#include <asm/mce.h>
+
+#include "apei/apei-internal.h"
+
+#define EXT_ELOG_ENTRY_MASK	GENMASK_ULL(51, 0) /* elog entry address mask */
+
+#define EXTLOG_DSM_REV		0x0
+#define	EXTLOG_FN_QUERY		0x0
+#define	EXTLOG_FN_ADDR		0x1
+
+#define FLAG_OS_OPTIN		BIT(0)
+#define EXTLOG_QUERY_L1_EXIST	BIT(1)
+#define ELOG_ENTRY_VALID	(1ULL<<63)
+#define ELOG_ENTRY_LEN		0x1000
+
+#define EMCA_BUG \
+	"Can not request iomem region <0x%016llx-0x%016llx> - eMCA disabled\n"
+
+struct extlog_l1_head {
+	u32 ver;	/* Header Version */
+	u32 hdr_len;	/* Header Length */
+	u64 total_len;	/* entire L1 Directory length including this header */
+	u64 elog_base;	/* MCA Error Log Directory base address */
+	u64 elog_len;	/* MCA Error Log Directory length */
+	u32 flags;	/* bit 0 - OS/VMM Opt-in */
+	u8  rev0[12];
+	u32 entries;	/* Valid L1 Directory entries per logical processor */
+	u8  rev1[12];
+};
+
+static u8 extlog_dsm_uuid[] = "663E35AF-CC10-41A4-88EA-5470AF055295";
+
+/* L1 table related physical address */
+static u64 elog_base;
+static size_t elog_size;
+static u64 l1_dirbase;
+static size_t l1_size;
+
+/* L1 table related virtual address */
+static void __iomem *extlog_l1_addr;
+static void __iomem *elog_addr;
+
+static void *elog_buf;
+
+static u64 *l1_entry_base;
+static u32 l1_percpu_entry;
+
+#define ELOG_IDX(cpu, bank) \
+	(cpu_physical_id(cpu) * l1_percpu_entry + (bank))
+
+#define ELOG_ENTRY_DATA(idx) \
+	(*(l1_entry_base + (idx)))
+
+#define ELOG_ENTRY_ADDR(phyaddr) \
+	(phyaddr - elog_base + (u8 *)elog_addr)
+
+static struct acpi_generic_status *extlog_elog_entry_check(int cpu, int bank)
+{
+	int idx;
+	u64 data;
+	struct acpi_generic_status *estatus;
+
+	WARN_ON(cpu < 0);
+	idx = ELOG_IDX(cpu, bank);
+	data = ELOG_ENTRY_DATA(idx);
+	if ((data & ELOG_ENTRY_VALID) == 0)
+		return NULL;
+
+	data &= EXT_ELOG_ENTRY_MASK;
+	estatus = (struct acpi_generic_status *)ELOG_ENTRY_ADDR(data);
+
+	/* if no valid data in elog entry, just return */
+	if (estatus->block_status == 0)
+		return NULL;
+
+	return estatus;
+}
+
+static void __print_extlog_rcd(const char *pfx,
+			       struct acpi_generic_status *estatus, int cpu)
+{
+	static atomic_t seqno;
+	unsigned int curr_seqno;
+	char pfx_seq[64];
+
+	if (!pfx) {
+		if (estatus->error_severity <= CPER_SEV_CORRECTED)
+			pfx = KERN_INFO;
+		else
+			pfx = KERN_ERR;
+	}
+	curr_seqno = atomic_inc_return(&seqno);
+	snprintf(pfx_seq, sizeof(pfx_seq), "%s{%u}", pfx, curr_seqno);
+	printk("%s""Hardware error detected on CPU%d\n", pfx_seq, cpu);
+	cper_estatus_print(pfx_seq, estatus);
+}
+
+static int print_extlog_rcd(const char *pfx,
+			    struct acpi_generic_status *estatus, int cpu)
+{
+	/* Not more than 2 messages every 5 seconds */
+	static DEFINE_RATELIMIT_STATE(ratelimit_corrected, 5*HZ, 2);
+	static DEFINE_RATELIMIT_STATE(ratelimit_uncorrected, 5*HZ, 2);
+	struct ratelimit_state *ratelimit;
+
+	if (estatus->error_severity == CPER_SEV_CORRECTED ||
+	    (estatus->error_severity == CPER_SEV_INFORMATIONAL))
+		ratelimit = &ratelimit_corrected;
+	else
+		ratelimit = &ratelimit_uncorrected;
+	if (__ratelimit(ratelimit)) {
+		__print_extlog_rcd(pfx, estatus, cpu);
+		return 0;
+	}
+
+	return 1;
+}
+
+static int extlog_print(struct notifier_block *nb, unsigned long val,
+			void *data)
+{
+	struct mce *mce = (struct mce *)data;
+	int	bank = mce->bank;
+	int	cpu = mce->extcpu;
+	struct acpi_generic_status *estatus;
+	int rc;
+
+	estatus = extlog_elog_entry_check(cpu, bank);
+	if (estatus == NULL)
+		return NOTIFY_DONE;
+
+	memcpy(elog_buf, (void *)estatus, ELOG_ENTRY_LEN);
+	/* clear record status to enable BIOS to update it again */
+	estatus->block_status = 0;
+
+	rc = print_extlog_rcd(NULL, (struct acpi_generic_status *)elog_buf, cpu);
+
+	return NOTIFY_DONE;
+}
+
+static int extlog_get_dsm(acpi_handle handle, int rev, int func, u64 *ret)
+{
+	struct acpi_buffer buf = {ACPI_ALLOCATE_BUFFER, NULL};
+	struct acpi_object_list input;
+	union acpi_object params[4], *obj;
+	u8 uuid[16];
+	int i;
+
+	acpi_str_to_uuid(extlog_dsm_uuid, uuid);
+	input.count = 4;
+	input.pointer = params;
+	params[0].type = ACPI_TYPE_BUFFER;
+	params[0].buffer.length = 16;
+	params[0].buffer.pointer = uuid;
+	params[1].type = ACPI_TYPE_INTEGER;
+	params[1].integer.value = rev;
+	params[2].type = ACPI_TYPE_INTEGER;
+	params[2].integer.value = func;
+	params[3].type = ACPI_TYPE_PACKAGE;
+	params[3].package.count = 0;
+	params[3].package.elements = NULL;
+
+	if (ACPI_FAILURE(acpi_evaluate_object(handle, "_DSM", &input, &buf)))
+		return -1;
+
+	*ret = 0;
+	obj = (union acpi_object *)buf.pointer;
+	if (obj->type == ACPI_TYPE_INTEGER) {
+		*ret = obj->integer.value;
+	} else if (obj->type == ACPI_TYPE_BUFFER) {
+		if (obj->buffer.length <= 8) {
+			for (i = 0; i < obj->buffer.length; i++)
+				*ret |= (obj->buffer.pointer[i] << (i * 8));
+		}
+	}
+	kfree(buf.pointer);
+
+	return 0;
+}
+
+static bool extlog_get_l1addr(void)
+{
+	acpi_handle handle;
+	u64 ret;
+
+	if (ACPI_FAILURE(acpi_get_handle(NULL, "\\_SB", &handle)))
+		return false;
+
+	if (extlog_get_dsm(handle, EXTLOG_DSM_REV, EXTLOG_FN_QUERY, &ret) ||
+	    !(ret & EXTLOG_QUERY_L1_EXIST))
+		return false;
+
+	if (extlog_get_dsm(handle, EXTLOG_DSM_REV, EXTLOG_FN_ADDR, &ret))
+		return false;
+
+	l1_dirbase = ret;
+	/* Spec says L1 directory must be 4K aligned, bail out if it isn't */
+	if (l1_dirbase & ((1 << 12) - 1)) {
+		pr_warn(FW_BUG "L1 Directory is invalid at physical %llx\n",
+			l1_dirbase);
+		return false;
+	}
+
+	return true;
+}
+static struct notifier_block extlog_mce_dec = {
+	.notifier_call	= extlog_print,
+};
+
+static int __init extlog_init(void)
+{
+	struct extlog_l1_head *l1_head;
+	void __iomem *extlog_l1_hdr;
+	size_t l1_hdr_size;
+	struct resource *r;
+	u64 cap;
+	int rc;
+
+	rc = -ENODEV;
+
+	rdmsrl(MSR_IA32_MCG_CAP, cap);
+	if (!(cap & MCG_ELOG_P))
+		return rc;
+
+	if (!extlog_get_l1addr())
+		return rc;
+
+	rc = -EINVAL;
+	/* get L1 header to fetch necessary information */
+	l1_hdr_size = sizeof(struct extlog_l1_head);
+	r = request_mem_region(l1_dirbase, l1_hdr_size, "L1 DIR HDR");
+	if (!r) {
+		pr_warn(FW_BUG EMCA_BUG,
+			(unsigned long long)l1_dirbase,
+			(unsigned long long)l1_dirbase + l1_hdr_size);
+		goto err;
+	}
+
+	extlog_l1_hdr = acpi_os_map_memory(l1_dirbase, l1_hdr_size);
+	l1_head = (struct extlog_l1_head *)extlog_l1_hdr;
+	l1_size = l1_head->total_len;
+	l1_percpu_entry = l1_head->entries;
+	elog_base = l1_head->elog_base;
+	elog_size = l1_head->elog_len;
+	acpi_os_unmap_memory(extlog_l1_hdr, l1_hdr_size);
+	release_mem_region(l1_dirbase, l1_hdr_size);
+
+	/* remap L1 header again based on completed information */
+	r = request_mem_region(l1_dirbase, l1_size, "L1 Table");
+	if (!r) {
+		pr_warn(FW_BUG EMCA_BUG,
+			(unsigned long long)l1_dirbase,
+			(unsigned long long)l1_dirbase + l1_size);
+		goto err;
+	}
+	extlog_l1_addr = acpi_os_map_memory(l1_dirbase, l1_size);
+	l1_entry_base = (u64 *)((u8 *)extlog_l1_addr + l1_hdr_size);
+
+	/* remap elog table */
+	r = request_mem_region(elog_base, elog_size, "Elog Table");
+	if (!r) {
+		pr_warn(FW_BUG EMCA_BUG,
+			(unsigned long long)elog_base,
+			(unsigned long long)elog_base + elog_size);
+		goto err_release_l1_dir;
+	}
+	elog_addr = acpi_os_map_memory(elog_base, elog_size);
+
+	rc = -ENOMEM;
+	/* allocate buffer to save elog record */
+	elog_buf = kmalloc(ELOG_ENTRY_LEN, GFP_KERNEL);
+	if (elog_buf == NULL)
+		goto err_release_elog;
+
+	mce_register_decode_chain(&extlog_mce_dec);
+	/* enable OS to be involved to take over management from BIOS */
+	((struct extlog_l1_head *)extlog_l1_addr)->flags |= FLAG_OS_OPTIN;
+
+	return 0;
+
+err_release_elog:
+	if (elog_addr)
+		acpi_os_unmap_memory(elog_addr, elog_size);
+	release_mem_region(elog_base, elog_size);
+err_release_l1_dir:
+	if (extlog_l1_addr)
+		acpi_os_unmap_memory(extlog_l1_addr, l1_size);
+	release_mem_region(l1_dirbase, l1_size);
+err:
+	pr_warn(FW_BUG "Extended error log disabled because of problems parsing f/w tables\n");
+	return rc;
+}
+
+static void __exit extlog_exit(void)
+{
+	mce_unregister_decode_chain(&extlog_mce_dec);
+	((struct extlog_l1_head *)extlog_l1_addr)->flags &= ~FLAG_OS_OPTIN;
+	if (extlog_l1_addr)
+		acpi_os_unmap_memory(extlog_l1_addr, l1_size);
+	if (elog_addr)
+		acpi_os_unmap_memory(elog_addr, elog_size);
+	release_mem_region(elog_base, elog_size);
+	release_mem_region(l1_dirbase, l1_size);
+	kfree(elog_buf);
+}
+
+module_init(extlog_init);
+module_exit(extlog_exit);
+
+MODULE_AUTHOR("Chen, Gong <gong.chen@intel.com>");
+MODULE_DESCRIPTION("Extended MCA Error Log Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index b587ec8257b2..e1bd9a181117 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -174,7 +174,7 @@ static void acpi_print_osc_error(acpi_handle handle,
 	printk("\n");
 }
 
-static acpi_status acpi_str_to_uuid(char *str, u8 *uuid)
+acpi_status acpi_str_to_uuid(char *str, u8 *uuid)
 {
 	int i;
 	static int opc_map_to_uuid[16] = {6, 4, 2, 0, 11, 9, 16, 14, 19, 21,
@@ -195,6 +195,7 @@ static acpi_status acpi_str_to_uuid(char *str, u8 *uuid)
 	}
 	return AE_OK;
 }
+EXPORT_SYMBOL_GPL(acpi_str_to_uuid);
 
 acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context)
 {
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index a5db4aeefa36..c30bac8503bc 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -311,6 +311,7 @@ struct acpi_osc_context {
 #define OSC_INVALID_REVISION_ERROR	8
 #define OSC_CAPABILITIES_MASK_ERROR	16
 
+acpi_status acpi_str_to_uuid(char *str, u8 *uuid);
 acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context);
 
 /* platform-wide _OSC bits */
-- 
cgit v1.2.3


From dd6dad4288cb93e79bd7abfa6c6a338c47454d1a Mon Sep 17 00:00:00 2001
From: "Chen, Gong" <gong.chen@linux.intel.com>
Date: Fri, 18 Oct 2013 14:29:25 -0700
Subject: DMI: Parse memory device (type 17) in SMBIOS

This patch adds a new interface to decode memory device (type 17)
to help error reporting on DIMMs.

Original-author: Tony Luck <tony.luck@intel.com>
Signed-off-by: Chen, Gong <gong.chen@linux.intel.com>
Acked-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Acked-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Mauro Carvalho Chehab <m.chehab@samsung.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/kernel/setup.c    |  1 +
 arch/x86/kernel/setup.c     |  1 +
 drivers/firmware/dmi_scan.c | 60 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/dmi.h         |  5 ++++
 4 files changed, 67 insertions(+)

(limited to 'include/linux')

diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index 4fc2e9569bb2..d86669bcdfb2 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -1063,6 +1063,7 @@ check_bugs (void)
 static int __init run_dmi_scan(void)
 {
 	dmi_scan_machine();
+	dmi_memdev_walk();
 	dmi_set_dump_stack_arch_desc();
 	return 0;
 }
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index f0de6294b955..918d489fa53d 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -993,6 +993,7 @@ void __init setup_arch(char **cmdline_p)
 		efi_init();
 
 	dmi_scan_machine();
+	dmi_memdev_walk();
 	dmi_set_dump_stack_arch_desc();
 
 	/*
diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c
index fa0affb699b4..59579a744d58 100644
--- a/drivers/firmware/dmi_scan.c
+++ b/drivers/firmware/dmi_scan.c
@@ -25,6 +25,13 @@ static int dmi_initialized;
 /* DMI system identification string used during boot */
 static char dmi_ids_string[128] __initdata;
 
+static struct dmi_memdev_info {
+	const char *device;
+	const char *bank;
+	u16 handle;
+} *dmi_memdev;
+static int dmi_memdev_nr;
+
 static const char * __init dmi_string_nosave(const struct dmi_header *dm, u8 s)
 {
 	const u8 *bp = ((u8 *) dm) + dm->length;
@@ -322,6 +329,42 @@ static void __init dmi_save_extended_devices(const struct dmi_header *dm)
 	dmi_save_one_device(*d & 0x7f, dmi_string_nosave(dm, *(d - 1)));
 }
 
+static void __init count_mem_devices(const struct dmi_header *dm, void *v)
+{
+	if (dm->type != DMI_ENTRY_MEM_DEVICE)
+		return;
+	dmi_memdev_nr++;
+}
+
+static void __init save_mem_devices(const struct dmi_header *dm, void *v)
+{
+	const char *d = (const char *)dm;
+	static int nr;
+
+	if (dm->type != DMI_ENTRY_MEM_DEVICE)
+		return;
+	if (nr >= dmi_memdev_nr) {
+		pr_warn(FW_BUG "Too many DIMM entries in SMBIOS table\n");
+		return;
+	}
+	dmi_memdev[nr].handle = dm->handle;
+	dmi_memdev[nr].device = dmi_string(dm, d[0x10]);
+	dmi_memdev[nr].bank = dmi_string(dm, d[0x11]);
+	nr++;
+}
+
+void __init dmi_memdev_walk(void)
+{
+	if (!dmi_available)
+		return;
+
+	if (dmi_walk_early(count_mem_devices) == 0 && dmi_memdev_nr) {
+		dmi_memdev = dmi_alloc(sizeof(*dmi_memdev) * dmi_memdev_nr);
+		if (dmi_memdev)
+			dmi_walk_early(save_mem_devices);
+	}
+}
+
 /*
  *	Process a DMI table entry. Right now all we care about are the BIOS
  *	and machine entries. For 2.5 we should pull the smbus controller info
@@ -815,3 +858,20 @@ bool dmi_match(enum dmi_field f, const char *str)
 	return !strcmp(info, str);
 }
 EXPORT_SYMBOL_GPL(dmi_match);
+
+void dmi_memdev_name(u16 handle, const char **bank, const char **device)
+{
+	int n;
+
+	if (dmi_memdev == NULL)
+		return;
+
+	for (n = 0; n < dmi_memdev_nr; n++) {
+		if (handle == dmi_memdev[n].handle) {
+			*bank = dmi_memdev[n].bank;
+			*device = dmi_memdev[n].device;
+			break;
+		}
+	}
+}
+EXPORT_SYMBOL_GPL(dmi_memdev_name);
diff --git a/include/linux/dmi.h b/include/linux/dmi.h
index b6eb7a05d58e..f820f0a336c9 100644
--- a/include/linux/dmi.h
+++ b/include/linux/dmi.h
@@ -99,6 +99,7 @@ extern const char * dmi_get_system_info(int field);
 extern const struct dmi_device * dmi_find_device(int type, const char *name,
 	const struct dmi_device *from);
 extern void dmi_scan_machine(void);
+extern void dmi_memdev_walk(void);
 extern void dmi_set_dump_stack_arch_desc(void);
 extern bool dmi_get_date(int field, int *yearp, int *monthp, int *dayp);
 extern int dmi_name_in_vendors(const char *str);
@@ -107,6 +108,7 @@ extern int dmi_available;
 extern int dmi_walk(void (*decode)(const struct dmi_header *, void *),
 	void *private_data);
 extern bool dmi_match(enum dmi_field f, const char *str);
+extern void dmi_memdev_name(u16 handle, const char **bank, const char **device);
 
 #else
 
@@ -115,6 +117,7 @@ static inline const char * dmi_get_system_info(int field) { return NULL; }
 static inline const struct dmi_device * dmi_find_device(int type, const char *name,
 	const struct dmi_device *from) { return NULL; }
 static inline void dmi_scan_machine(void) { return; }
+static inline void dmi_memdev_walk(void) { }
 static inline void dmi_set_dump_stack_arch_desc(void) { }
 static inline bool dmi_get_date(int field, int *yearp, int *monthp, int *dayp)
 {
@@ -133,6 +136,8 @@ static inline int dmi_walk(void (*decode)(const struct dmi_header *, void *),
 	void *private_data) { return -1; }
 static inline bool dmi_match(enum dmi_field f, const char *str)
 	{ return false; }
+static inline void dmi_memdev_name(u16 handle, const char **bank,
+		const char **device) { }
 static inline const struct dmi_system_id *
 	dmi_first_match(const struct dmi_system_id *list) { return NULL; }
 
-- 
cgit v1.2.3


From 147de14772ed897727dba7353916b02d1e0f17f4 Mon Sep 17 00:00:00 2001
From: "Chen, Gong" <gong.chen@linux.intel.com>
Date: Fri, 18 Oct 2013 14:30:13 -0700
Subject: ACPI, APEI, CPER: Add UEFI 2.4 support for memory error

In latest UEFI spec(by now it is 2.4) memory error definition
for CPER (UEFI 2.4 Appendix N Common Platform Error Record)
adds some new fields. These fields help people to locate
memory error to an actual DIMM location.

Original-author: Tony Luck <tony.luck@intel.com>
Signed-off-by: Chen, Gong <gong.chen@linux.intel.com>
Reviewed-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Mauro Carvalho Chehab <m.chehab@samsung.com>
Acked-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/x86/kernel/cpu/mcheck/mce-apei.c |  3 +--
 drivers/acpi/apei/cper.c              |  7 ++++---
 drivers/acpi/apei/ghes.c              |  4 ++--
 drivers/edac/ghes_edac.c              |  5 ++---
 include/linux/cper.h                  | 11 +++++++++--
 5 files changed, 18 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/cpu/mcheck/mce-apei.c b/arch/x86/kernel/cpu/mcheck/mce-apei.c
index cd8b166a1735..de8b60a53f69 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-apei.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-apei.c
@@ -42,8 +42,7 @@ void apei_mce_report_mem_error(int corrected, struct cper_sec_mem_err *mem_err)
 	struct mce m;
 
 	/* Only corrected MC is reported */
-	if (!corrected || !(mem_err->validation_bits &
-				CPER_MEM_VALID_PHYSICAL_ADDRESS))
+	if (!corrected || !(mem_err->validation_bits & CPER_MEM_VALID_PA))
 		return;
 
 	mce_setup(&m);
diff --git a/drivers/acpi/apei/cper.c b/drivers/acpi/apei/cper.c
index eb5f6d6d7dbc..946ef520186f 100644
--- a/drivers/acpi/apei/cper.c
+++ b/drivers/acpi/apei/cper.c
@@ -8,7 +8,7 @@
  * various tables, such as ERST, BERT and HEST etc.
  *
  * For more information about CPER, please refer to Appendix N of UEFI
- * Specification version 2.3.
+ * Specification version 2.4.
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License version
@@ -191,16 +191,17 @@ static const char *cper_mem_err_type_strs[] = {
 	"memory sparing",
 	"scrub corrected error",
 	"scrub uncorrected error",
+	"physical memory map-out event",
 };
 
 static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem)
 {
 	if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS)
 		printk("%s""error_status: 0x%016llx\n", pfx, mem->error_status);
-	if (mem->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS)
+	if (mem->validation_bits & CPER_MEM_VALID_PA)
 		printk("%s""physical_address: 0x%016llx\n",
 		       pfx, mem->physical_addr);
-	if (mem->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS_MASK)
+	if (mem->validation_bits & CPER_MEM_VALID_PA_MASK)
 		printk("%s""physical_address_mask: 0x%016llx\n",
 		       pfx, mem->physical_addr_mask);
 	if (mem->validation_bits & CPER_MEM_VALID_NODE)
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 0db6e4ff6501..a30bc313787b 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -419,7 +419,7 @@ static void ghes_handle_memory_failure(struct acpi_generic_data *gdata, int sev)
 
 	if (sec_sev == GHES_SEV_CORRECTED &&
 	    (gdata->flags & CPER_SEC_ERROR_THRESHOLD_EXCEEDED) &&
-	    (mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS)) {
+	    (mem_err->validation_bits & CPER_MEM_VALID_PA)) {
 		pfn = mem_err->physical_addr >> PAGE_SHIFT;
 		if (pfn_valid(pfn))
 			memory_failure_queue(pfn, 0, MF_SOFT_OFFLINE);
@@ -430,7 +430,7 @@ static void ghes_handle_memory_failure(struct acpi_generic_data *gdata, int sev)
 	}
 	if (sev == GHES_SEV_RECOVERABLE &&
 	    sec_sev == GHES_SEV_RECOVERABLE &&
-	    mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS) {
+	    mem_err->validation_bits & CPER_MEM_VALID_PA) {
 		pfn = mem_err->physical_addr >> PAGE_SHIFT;
 		memory_failure_queue(pfn, 0, 0);
 	}
diff --git a/drivers/edac/ghes_edac.c b/drivers/edac/ghes_edac.c
index bb534670ec02..0ad797b9db65 100644
--- a/drivers/edac/ghes_edac.c
+++ b/drivers/edac/ghes_edac.c
@@ -297,15 +297,14 @@ void ghes_edac_report_mem_error(struct ghes *ghes, int sev,
 	}
 
 	/* Error address */
-	if (mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS) {
+	if (mem_err->validation_bits & CPER_MEM_VALID_PA) {
 		e->page_frame_number = mem_err->physical_addr >> PAGE_SHIFT;
 		e->offset_in_page = mem_err->physical_addr & ~PAGE_MASK;
 	}
 
 	/* Error grain */
-	if (mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS_MASK) {
+	if (mem_err->validation_bits & CPER_MEM_VALID_PA_MASK)
 		e->grain = ~(mem_err->physical_addr_mask & ~PAGE_MASK);
-	}
 
 	/* Memory error location, mapped on e->location */
 	p = e->location;
diff --git a/include/linux/cper.h b/include/linux/cper.h
index 09ebe2113641..2fc0ec3d89cc 100644
--- a/include/linux/cper.h
+++ b/include/linux/cper.h
@@ -218,8 +218,8 @@ enum {
 #define CPER_PROC_VALID_IP			0x1000
 
 #define CPER_MEM_VALID_ERROR_STATUS		0x0001
-#define CPER_MEM_VALID_PHYSICAL_ADDRESS		0x0002
-#define CPER_MEM_VALID_PHYSICAL_ADDRESS_MASK	0x0004
+#define CPER_MEM_VALID_PA			0x0002
+#define CPER_MEM_VALID_PA_MASK			0x0004
 #define CPER_MEM_VALID_NODE			0x0008
 #define CPER_MEM_VALID_CARD			0x0010
 #define CPER_MEM_VALID_MODULE			0x0020
@@ -232,6 +232,9 @@ enum {
 #define CPER_MEM_VALID_RESPONDER_ID		0x1000
 #define CPER_MEM_VALID_TARGET_ID		0x2000
 #define CPER_MEM_VALID_ERROR_TYPE		0x4000
+#define CPER_MEM_VALID_RANK_NUMBER		0x8000
+#define CPER_MEM_VALID_CARD_HANDLE		0x10000
+#define CPER_MEM_VALID_MODULE_HANDLE		0x20000
 
 #define CPER_PCIE_VALID_PORT_TYPE		0x0001
 #define CPER_PCIE_VALID_VERSION			0x0002
@@ -347,6 +350,10 @@ struct cper_sec_mem_err {
 	__u64	responder_id;
 	__u64	target_id;
 	__u8	error_type;
+	__u8	reserved;
+	__u16	rank;
+	__u16	mem_array_handle;	/* card handle in UEFI 2.4 */
+	__u16	mem_dev_handle;		/* module handle in UEFI 2.4 */
 };
 
 struct cper_sec_pcie {
-- 
cgit v1.2.3


From 56507694de3453076d73e0e9813349586ee67e59 Mon Sep 17 00:00:00 2001
From: "Chen, Gong" <gong.chen@linux.intel.com>
Date: Fri, 18 Oct 2013 14:30:38 -0700
Subject: EDAC, GHES: Update ghes error record info

In latest UEFI spec(by now it's 2.4) there are some new
fields for memory error reporting. Add these new fields for
ghes_edac interface.

Signed-off-by: Chen, Gong <gong.chen@linux.intel.com>
Cc: Mauro Carvalho Chehab <m.chehab@samsung.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 drivers/edac/ghes_edac.c | 11 +++++++++++
 include/linux/edac.h     |  2 +-
 2 files changed, 12 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/edac/ghes_edac.c b/drivers/edac/ghes_edac.c
index 0ad797b9db65..d5a98a45c062 100644
--- a/drivers/edac/ghes_edac.c
+++ b/drivers/edac/ghes_edac.c
@@ -314,6 +314,8 @@ void ghes_edac_report_mem_error(struct ghes *ghes, int sev,
 		p += sprintf(p, "card:%d ", mem_err->card);
 	if (mem_err->validation_bits & CPER_MEM_VALID_MODULE)
 		p += sprintf(p, "module:%d ", mem_err->module);
+	if (mem_err->validation_bits & CPER_MEM_VALID_RANK_NUMBER)
+		p += sprintf(p, "rank:%d ", mem_err->rank);
 	if (mem_err->validation_bits & CPER_MEM_VALID_BANK)
 		p += sprintf(p, "bank:%d ", mem_err->bank);
 	if (mem_err->validation_bits & CPER_MEM_VALID_ROW)
@@ -322,6 +324,15 @@ void ghes_edac_report_mem_error(struct ghes *ghes, int sev,
 		p += sprintf(p, "col:%d ", mem_err->column);
 	if (mem_err->validation_bits & CPER_MEM_VALID_BIT_POSITION)
 		p += sprintf(p, "bit_pos:%d ", mem_err->bit_pos);
+	if (mem_err->validation_bits & CPER_MEM_VALID_MODULE_HANDLE) {
+		const char *bank = NULL, *device = NULL;
+		dmi_memdev_name(mem_err->mem_dev_handle, &bank, &device);
+		if (bank != NULL && device != NULL)
+			p += sprintf(p, "DIMM location:%s %s ", bank, device);
+		else
+			p += sprintf(p, "DIMM DMI handle: 0x%.4x ",
+				     mem_err->mem_dev_handle);
+	}
 	if (p > e->location)
 		*(p - 1) = '\0';
 
diff --git a/include/linux/edac.h b/include/linux/edac.h
index 5c6d7fbaf89e..dbdffe8d4469 100644
--- a/include/linux/edac.h
+++ b/include/linux/edac.h
@@ -51,7 +51,7 @@ static inline void opstate_init(void)
 #define EDAC_MC_LABEL_LEN	31
 
 /* Maximum size of the location string */
-#define LOCATION_SIZE 80
+#define LOCATION_SIZE 256
 
 /* Defines the maximum number of labels that can be reported */
 #define EDAC_MAX_LABELS		8
-- 
cgit v1.2.3


From 0c02c8007ea5554d028f99fd3e29fc201fdeeab3 Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@linaro.org>
Date: Thu, 19 Sep 2013 11:22:36 -0500
Subject: of/irq: Rename of_irq_map_* functions to of_irq_parse_*

The OF irq handling code has been overloading the term 'map' to refer to
both parsing the data in the device tree and mapping it to the internal
linux irq system. This is probably because the device tree does have the
concept of an 'interrupt-map' function for translating interrupt
references from one node to another, but 'map' is still confusing when
the primary purpose of some of the functions are to parse the DT data.

This patch renames all the of_irq_map_* functions to of_irq_parse_*
which makes it clear that there is a difference between the parsing
phase and the mapping phase. Kernel code can make use of just the
parsing or just the mapping support as needed by the subsystem.

The patch was generated mechanically with a handful of sed commands.

Signed-off-by: Grant Likely <grant.likely@linaro.org>
Acked-by: Michal Simek <monstr@monstr.eu>
Acked-by: Tony Lindgren <tony@atomide.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/arm/mach-integrator/pci_v3.c              |  4 ++--
 arch/microblaze/pci/pci-common.c               |  2 +-
 arch/mips/pci/fixup-lantiq.c                   |  2 +-
 arch/mips/pci/pci-rt3883.c                     |  2 +-
 arch/powerpc/kernel/pci-common.c               |  2 +-
 arch/powerpc/platforms/cell/celleb_scc_pciex.c |  2 +-
 arch/powerpc/platforms/cell/celleb_scc_sio.c   |  2 +-
 arch/powerpc/platforms/cell/spider-pic.c       |  2 +-
 arch/powerpc/platforms/cell/spu_manage.c       |  2 +-
 arch/powerpc/platforms/fsl_uli1575.c           |  2 +-
 arch/powerpc/platforms/powermac/pic.c          |  2 +-
 arch/powerpc/platforms/pseries/event_sources.c |  2 +-
 arch/powerpc/sysdev/mpic_msi.c                 |  2 +-
 arch/x86/kernel/devicetree.c                   |  2 +-
 drivers/of/address.c                           |  4 ++--
 drivers/of/irq.c                               | 28 +++++++++++++-------------
 drivers/of/of_pci_irq.c                        | 10 ++++-----
 drivers/pci/host/pci-mvebu.c                   |  2 +-
 include/linux/of_irq.h                         |  8 ++++----
 include/linux/of_pci.h                         |  2 +-
 20 files changed, 42 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-integrator/pci_v3.c b/arch/arm/mach-integrator/pci_v3.c
index bef100527c42..0f496cc51f36 100644
--- a/arch/arm/mach-integrator/pci_v3.c
+++ b/arch/arm/mach-integrator/pci_v3.c
@@ -840,9 +840,9 @@ static int __init pci_v3_map_irq_dt(const struct pci_dev *dev, u8 slot, u8 pin)
 	struct of_irq oirq;
 	int ret;
 
-	ret = of_irq_map_pci(dev, &oirq);
+	ret = of_irq_parse_pci(dev, &oirq);
 	if (ret) {
-		dev_err(&dev->dev, "of_irq_map_pci() %d\n", ret);
+		dev_err(&dev->dev, "of_irq_parse_pci() %d\n", ret);
 		/* Proper return code 0 == NO_IRQ */
 		return 0;
 	}
diff --git a/arch/microblaze/pci/pci-common.c b/arch/microblaze/pci/pci-common.c
index 1b93bf0892a0..4d5b1a9cd790 100644
--- a/arch/microblaze/pci/pci-common.c
+++ b/arch/microblaze/pci/pci-common.c
@@ -217,7 +217,7 @@ int pci_read_irq_line(struct pci_dev *pci_dev)
 	memset(&oirq, 0xff, sizeof(oirq));
 #endif
 	/* Try to get a mapping from the device-tree */
-	if (of_irq_map_pci(pci_dev, &oirq)) {
+	if (of_irq_parse_pci(pci_dev, &oirq)) {
 		u8 line, pin;
 
 		/* If that fails, lets fallback to what is in the config
diff --git a/arch/mips/pci/fixup-lantiq.c b/arch/mips/pci/fixup-lantiq.c
index 6c829df28dc7..2e8dbfedae53 100644
--- a/arch/mips/pci/fixup-lantiq.c
+++ b/arch/mips/pci/fixup-lantiq.c
@@ -28,7 +28,7 @@ int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 	struct of_irq dev_irq;
 	int irq;
 
-	if (of_irq_map_pci(dev, &dev_irq)) {
+	if (of_irq_parse_pci(dev, &dev_irq)) {
 		dev_err(&dev->dev, "trying to map irq for unknown slot:%d pin:%d\n",
 			slot, pin);
 		return 0;
diff --git a/arch/mips/pci/pci-rt3883.c b/arch/mips/pci/pci-rt3883.c
index 95c9d41382e7..cae92a05ee70 100644
--- a/arch/mips/pci/pci-rt3883.c
+++ b/arch/mips/pci/pci-rt3883.c
@@ -587,7 +587,7 @@ int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 	int err;
 	int irq;
 
-	err = of_irq_map_pci(dev, &dev_irq);
+	err = of_irq_parse_pci(dev, &dev_irq);
 	if (err) {
 		pr_err("pci %s: unable to get irq map, err=%d\n",
 		       pci_name((struct pci_dev *) dev), err);
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index 905a24bb7acc..2f4185425ed5 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -237,7 +237,7 @@ static int pci_read_irq_line(struct pci_dev *pci_dev)
 	memset(&oirq, 0xff, sizeof(oirq));
 #endif
 	/* Try to get a mapping from the device-tree */
-	if (of_irq_map_pci(pci_dev, &oirq)) {
+	if (of_irq_parse_pci(pci_dev, &oirq)) {
 		u8 line, pin;
 
 		/* If that fails, lets fallback to what is in the config
diff --git a/arch/powerpc/platforms/cell/celleb_scc_pciex.c b/arch/powerpc/platforms/cell/celleb_scc_pciex.c
index 14be2bd358b8..40bc371096b7 100644
--- a/arch/powerpc/platforms/cell/celleb_scc_pciex.c
+++ b/arch/powerpc/platforms/cell/celleb_scc_pciex.c
@@ -507,7 +507,7 @@ static __init int celleb_setup_pciex(struct device_node *node,
 	phb->ops = &scc_pciex_pci_ops;
 
 	/* internal interrupt handler */
-	if (of_irq_map_one(node, 1, &oirq)) {
+	if (of_irq_parse_one(node, 1, &oirq)) {
 		pr_err("PCIEXC:Failed to map irq\n");
 		goto error;
 	}
diff --git a/arch/powerpc/platforms/cell/celleb_scc_sio.c b/arch/powerpc/platforms/cell/celleb_scc_sio.c
index 9c339ec646f5..96388b202f4e 100644
--- a/arch/powerpc/platforms/cell/celleb_scc_sio.c
+++ b/arch/powerpc/platforms/cell/celleb_scc_sio.c
@@ -53,7 +53,7 @@ static int __init txx9_serial_init(void)
 			if (!(txx9_serial_bitmap & (1<<i)))
 				continue;
 
-			if (of_irq_map_one(node, i, &irq))
+			if (of_irq_parse_one(node, i, &irq))
 				continue;
 			if (of_address_to_resource(node,
 				txx9_scc_tab[i].index, &res))
diff --git a/arch/powerpc/platforms/cell/spider-pic.c b/arch/powerpc/platforms/cell/spider-pic.c
index 8e299447127e..b491f406560a 100644
--- a/arch/powerpc/platforms/cell/spider-pic.c
+++ b/arch/powerpc/platforms/cell/spider-pic.c
@@ -236,7 +236,7 @@ static unsigned int __init spider_find_cascade_and_node(struct spider_pic *pic)
 	 * tree in case the device-tree is ever fixed
 	 */
 	struct of_irq oirq;
-	if (of_irq_map_one(pic->host->of_node, 0, &oirq) == 0) {
+	if (of_irq_parse_one(pic->host->of_node, 0, &oirq) == 0) {
 		virq = irq_create_of_mapping(oirq.controller, oirq.specifier,
 					     oirq.size);
 		return virq;
diff --git a/arch/powerpc/platforms/cell/spu_manage.c b/arch/powerpc/platforms/cell/spu_manage.c
index 2bb6977c0a5a..e6cdb81a0959 100644
--- a/arch/powerpc/platforms/cell/spu_manage.c
+++ b/arch/powerpc/platforms/cell/spu_manage.c
@@ -182,7 +182,7 @@ static int __init spu_map_interrupts(struct spu *spu, struct device_node *np)
 	int i;
 
 	for (i=0; i < 3; i++) {
-		ret = of_irq_map_one(np, i, &oirq);
+		ret = of_irq_parse_one(np, i, &oirq);
 		if (ret) {
 			pr_debug("spu_new: failed to get irq %d\n", i);
 			goto err;
diff --git a/arch/powerpc/platforms/fsl_uli1575.c b/arch/powerpc/platforms/fsl_uli1575.c
index 92ac9b52b32d..ac539c1cd808 100644
--- a/arch/powerpc/platforms/fsl_uli1575.c
+++ b/arch/powerpc/platforms/fsl_uli1575.c
@@ -333,7 +333,7 @@ static void hpcd_final_uli5288(struct pci_dev *dev)
 
 	laddr[0] = (hose->first_busno << 16) | (PCI_DEVFN(31, 0) << 8);
 	laddr[1] = laddr[2] = 0;
-	of_irq_map_raw(hosenode, &pin, 1, laddr, &oirq);
+	of_irq_parse_raw(hosenode, &pin, 1, laddr, &oirq);
 	virq = irq_create_of_mapping(oirq.controller, oirq.specifier,
 				     oirq.size);
 	dev->irq = virq;
diff --git a/arch/powerpc/platforms/powermac/pic.c b/arch/powerpc/platforms/powermac/pic.c
index 31036b56670e..720663e29f47 100644
--- a/arch/powerpc/platforms/powermac/pic.c
+++ b/arch/powerpc/platforms/powermac/pic.c
@@ -393,7 +393,7 @@ static void __init pmac_pic_probe_oldstyle(void)
 #endif
 }
 
-int of_irq_map_oldworld(struct device_node *device, int index,
+int of_irq_parse_oldworld(struct device_node *device, int index,
 			struct of_irq *out_irq)
 {
 	const u32 *ints = NULL;
diff --git a/arch/powerpc/platforms/pseries/event_sources.c b/arch/powerpc/platforms/pseries/event_sources.c
index 2605c310166a..850c18c457ad 100644
--- a/arch/powerpc/platforms/pseries/event_sources.c
+++ b/arch/powerpc/platforms/pseries/event_sources.c
@@ -55,7 +55,7 @@ void request_event_sources_irqs(struct device_node *np,
 	/* Else use normal interrupt tree parsing */
 	else {
 		/* First try to do a proper OF tree parsing */
-		for (index = 0; of_irq_map_one(np, index, &oirq) == 0;
+		for (index = 0; of_irq_parse_one(np, index, &oirq) == 0;
 		     index++) {
 			if (count > 15)
 				break;
diff --git a/arch/powerpc/sysdev/mpic_msi.c b/arch/powerpc/sysdev/mpic_msi.c
index bbf342c88314..463e3a7c193c 100644
--- a/arch/powerpc/sysdev/mpic_msi.c
+++ b/arch/powerpc/sysdev/mpic_msi.c
@@ -63,7 +63,7 @@ static int mpic_msi_reserve_u3_hwirqs(struct mpic *mpic)
 		pr_debug("mpic: mapping hwirqs for %s\n", np->full_name);
 
 		index = 0;
-		while (of_irq_map_one(np, index++, &oirq) == 0) {
+		while (of_irq_parse_one(np, index++, &oirq) == 0) {
 			ops->xlate(mpic->irqhost, NULL, oirq.specifier,
 						oirq.size, &hwirq, &flags);
 			msi_bitmap_reserve_hwirq(&mpic->msi_bitmap, hwirq);
diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c
index 376dc7873447..3ac6398e5361 100644
--- a/arch/x86/kernel/devicetree.c
+++ b/arch/x86/kernel/devicetree.c
@@ -116,7 +116,7 @@ static int x86_of_pci_irq_enable(struct pci_dev *dev)
 	if (!pin)
 		return 0;
 
-	ret = of_irq_map_pci(dev, &oirq);
+	ret = of_irq_parse_pci(dev, &oirq);
 	if (ret)
 		return ret;
 
diff --git a/drivers/of/address.c b/drivers/of/address.c
index 71180b91bfbe..994f293baebf 100644
--- a/drivers/of/address.c
+++ b/drivers/of/address.c
@@ -532,12 +532,12 @@ static u64 __of_translate_address(struct device_node *dev,
 		pbus->count_cells(dev, &pna, &pns);
 		if (!OF_CHECK_COUNTS(pna, pns)) {
 			printk(KERN_ERR "prom_parse: Bad cell count for %s\n",
-			       dev->full_name);
+			       of_node_full_name(dev));
 			break;
 		}
 
 		pr_debug("OF: parent bus is %s (na=%d, ns=%d) on %s\n",
-		    pbus->name, pna, pns, parent->full_name);
+		    pbus->name, pna, pns, of_node_full_name(parent));
 
 		/* Apply bus translation */
 		if (of_translate_one(dev, bus, pbus, addr, na, ns, pna, rprop))
diff --git a/drivers/of/irq.c b/drivers/of/irq.c
index 90068f914911..410aa2415f42 100644
--- a/drivers/of/irq.c
+++ b/drivers/of/irq.c
@@ -31,14 +31,14 @@
  * @dev: Device node of the device whose interrupt is to be mapped
  * @index: Index of the interrupt to map
  *
- * This function is a wrapper that chains of_irq_map_one() and
+ * This function is a wrapper that chains of_irq_parse_one() and
  * irq_create_of_mapping() to make things easier to callers
  */
 unsigned int irq_of_parse_and_map(struct device_node *dev, int index)
 {
 	struct of_irq oirq;
 
-	if (of_irq_map_one(dev, index, &oirq))
+	if (of_irq_parse_one(dev, index, &oirq))
 		return 0;
 
 	return irq_create_of_mapping(oirq.controller, oirq.specifier,
@@ -79,7 +79,7 @@ struct device_node *of_irq_find_parent(struct device_node *child)
 }
 
 /**
- * of_irq_map_raw - Low level interrupt tree parsing
+ * of_irq_parse_raw - Low level interrupt tree parsing
  * @parent:	the device interrupt parent
  * @intspec:	interrupt specifier ("interrupts" property of the device)
  * @ointsize:   size of the passed in interrupt specifier
@@ -93,7 +93,7 @@ struct device_node *of_irq_find_parent(struct device_node *child)
  * properties, for example when resolving PCI interrupts when no device
  * node exist for the parent.
  */
-int of_irq_map_raw(struct device_node *parent, const __be32 *intspec,
+int of_irq_parse_raw(struct device_node *parent, const __be32 *intspec,
 		   u32 ointsize, const __be32 *addr, struct of_irq *out_irq)
 {
 	struct device_node *ipar, *tnode, *old = NULL, *newpar = NULL;
@@ -101,7 +101,7 @@ int of_irq_map_raw(struct device_node *parent, const __be32 *intspec,
 	u32 intsize = 1, addrsize, newintsize = 0, newaddrsize = 0;
 	int imaplen, match, i;
 
-	pr_debug("of_irq_map_raw: par=%s,intspec=[0x%08x 0x%08x...],ointsize=%d\n",
+	pr_debug("of_irq_parse_raw: par=%s,intspec=[0x%08x 0x%08x...],ointsize=%d\n",
 		 of_node_full_name(parent), be32_to_cpup(intspec),
 		 be32_to_cpup(intspec + 1), ointsize);
 
@@ -126,7 +126,7 @@ int of_irq_map_raw(struct device_node *parent, const __be32 *intspec,
 		goto fail;
 	}
 
-	pr_debug("of_irq_map_raw: ipar=%s, size=%d\n", of_node_full_name(ipar), intsize);
+	pr_debug("of_irq_parse_raw: ipar=%s, size=%d\n", of_node_full_name(ipar), intsize);
 
 	if (ointsize != intsize)
 		return -EINVAL;
@@ -269,29 +269,29 @@ int of_irq_map_raw(struct device_node *parent, const __be32 *intspec,
 
 	return -EINVAL;
 }
-EXPORT_SYMBOL_GPL(of_irq_map_raw);
+EXPORT_SYMBOL_GPL(of_irq_parse_raw);
 
 /**
- * of_irq_map_one - Resolve an interrupt for a device
+ * of_irq_parse_one - Resolve an interrupt for a device
  * @device: the device whose interrupt is to be resolved
  * @index: index of the interrupt to resolve
  * @out_irq: structure of_irq filled by this function
  *
  * This function resolves an interrupt, walking the tree, for a given
- * device-tree node. It's the high level pendant to of_irq_map_raw().
+ * device-tree node. It's the high level pendant to of_irq_parse_raw().
  */
-int of_irq_map_one(struct device_node *device, int index, struct of_irq *out_irq)
+int of_irq_parse_one(struct device_node *device, int index, struct of_irq *out_irq)
 {
 	struct device_node *p;
 	const __be32 *intspec, *tmp, *addr;
 	u32 intsize, intlen;
 	int res = -EINVAL;
 
-	pr_debug("of_irq_map_one: dev=%s, index=%d\n", of_node_full_name(device), index);
+	pr_debug("of_irq_parse_one: dev=%s, index=%d\n", of_node_full_name(device), index);
 
 	/* OldWorld mac stuff is "special", handle out of line */
 	if (of_irq_workarounds & OF_IMAP_OLDWORLD_MAC)
-		return of_irq_map_oldworld(device, index, out_irq);
+		return of_irq_parse_oldworld(device, index, out_irq);
 
 	/* Get the interrupts property */
 	intspec = of_get_property(device, "interrupts", &intlen);
@@ -322,13 +322,13 @@ int of_irq_map_one(struct device_node *device, int index, struct of_irq *out_irq
 		goto out;
 
 	/* Get new specifier and map it */
-	res = of_irq_map_raw(p, intspec + index * intsize, intsize,
+	res = of_irq_parse_raw(p, intspec + index * intsize, intsize,
 			     addr, out_irq);
  out:
 	of_node_put(p);
 	return res;
 }
-EXPORT_SYMBOL_GPL(of_irq_map_one);
+EXPORT_SYMBOL_GPL(of_irq_parse_one);
 
 /**
  * of_irq_to_resource - Decode a node's IRQ and return it as a resource
diff --git a/drivers/of/of_pci_irq.c b/drivers/of/of_pci_irq.c
index 677053813211..dceec1048dab 100644
--- a/drivers/of/of_pci_irq.c
+++ b/drivers/of/of_pci_irq.c
@@ -5,7 +5,7 @@
 #include <asm/prom.h>
 
 /**
- * of_irq_map_pci - Resolve the interrupt for a PCI device
+ * of_irq_parse_pci - Resolve the interrupt for a PCI device
  * @pdev:       the device whose interrupt is to be resolved
  * @out_irq:    structure of_irq filled by this function
  *
@@ -15,7 +15,7 @@
  * PCI tree until an device-node is found, at which point it will finish
  * resolving using the OF tree walking.
  */
-int of_irq_map_pci(const struct pci_dev *pdev, struct of_irq *out_irq)
+int of_irq_parse_pci(const struct pci_dev *pdev, struct of_irq *out_irq)
 {
 	struct device_node *dn, *ppnode;
 	struct pci_dev *ppdev;
@@ -30,7 +30,7 @@ int of_irq_map_pci(const struct pci_dev *pdev, struct of_irq *out_irq)
 	 */
 	dn = pci_device_to_OF_node(pdev);
 	if (dn) {
-		rc = of_irq_map_one(dn, 0, out_irq);
+		rc = of_irq_parse_one(dn, 0, out_irq);
 		if (!rc)
 			return rc;
 	}
@@ -88,6 +88,6 @@ int of_irq_map_pci(const struct pci_dev *pdev, struct of_irq *out_irq)
 	lspec_be = cpu_to_be32(lspec);
 	laddr[0] = cpu_to_be32((pdev->bus->number << 16) | (pdev->devfn << 8));
 	laddr[1]  = laddr[2] = cpu_to_be32(0);
-	return of_irq_map_raw(ppnode, &lspec_be, 1, laddr, out_irq);
+	return of_irq_parse_raw(ppnode, &lspec_be, 1, laddr, out_irq);
 }
-EXPORT_SYMBOL_GPL(of_irq_map_pci);
+EXPORT_SYMBOL_GPL(of_irq_parse_pci);
diff --git a/drivers/pci/host/pci-mvebu.c b/drivers/pci/host/pci-mvebu.c
index 729d5a101d62..05f81807d05b 100644
--- a/drivers/pci/host/pci-mvebu.c
+++ b/drivers/pci/host/pci-mvebu.c
@@ -650,7 +650,7 @@ static int __init mvebu_pcie_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 	struct of_irq oirq;
 	int ret;
 
-	ret = of_irq_map_pci(dev, &oirq);
+	ret = of_irq_parse_pci(dev, &oirq);
 	if (ret)
 		return ret;
 
diff --git a/include/linux/of_irq.h b/include/linux/of_irq.h
index fcd63baee5f2..a00bc71e62a3 100644
--- a/include/linux/of_irq.h
+++ b/include/linux/of_irq.h
@@ -35,12 +35,12 @@ typedef int (*of_irq_init_cb_t)(struct device_node *, struct device_node *);
 #if defined(CONFIG_PPC32) && defined(CONFIG_PPC_PMAC)
 extern unsigned int of_irq_workarounds;
 extern struct device_node *of_irq_dflt_pic;
-extern int of_irq_map_oldworld(struct device_node *device, int index,
+extern int of_irq_parse_oldworld(struct device_node *device, int index,
 			       struct of_irq *out_irq);
 #else /* CONFIG_PPC32 && CONFIG_PPC_PMAC */
 #define of_irq_workarounds (0)
 #define of_irq_dflt_pic (NULL)
-static inline int of_irq_map_oldworld(struct device_node *device, int index,
+static inline int of_irq_parse_oldworld(struct device_node *device, int index,
 				      struct of_irq *out_irq)
 {
 	return -EINVAL;
@@ -48,10 +48,10 @@ static inline int of_irq_map_oldworld(struct device_node *device, int index,
 #endif /* CONFIG_PPC32 && CONFIG_PPC_PMAC */
 
 
-extern int of_irq_map_raw(struct device_node *parent, const __be32 *intspec,
+extern int of_irq_parse_raw(struct device_node *parent, const __be32 *intspec,
 			  u32 ointsize, const __be32 *addr,
 			  struct of_irq *out_irq);
-extern int of_irq_map_one(struct device_node *device, int index,
+extern int of_irq_parse_one(struct device_node *device, int index,
 			  struct of_irq *out_irq);
 extern unsigned int irq_create_of_mapping(struct device_node *controller,
 					  const u32 *intspec,
diff --git a/include/linux/of_pci.h b/include/linux/of_pci.h
index fd9c408631a0..839ba20808fe 100644
--- a/include/linux/of_pci.h
+++ b/include/linux/of_pci.h
@@ -6,7 +6,7 @@
 
 struct pci_dev;
 struct of_irq;
-int of_irq_map_pci(const struct pci_dev *pdev, struct of_irq *out_irq);
+int of_irq_parse_pci(const struct pci_dev *pdev, struct of_irq *out_irq);
 
 struct device_node;
 struct device_node *of_pci_find_child_device(struct device_node *parent,
-- 
cgit v1.2.3


From 530210c7814e83564c7ca7bca8192515042c0b63 Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@linaro.org>
Date: Sun, 15 Sep 2013 16:39:11 +0100
Subject: of/irq: Replace of_irq with of_phandle_args

struct of_irq and struct of_phandle_args are exactly the same structure.
This patch makes the kernel use of_phandle_args everywhere. This in
itself isn't a big deal, but it makes some follow-on patches simpler.

Signed-off-by: Grant Likely <grant.likely@linaro.org>
Acked-by: Michal Simek <monstr@monstr.eu>
Acked-by: Tony Lindgren <tony@atomide.com>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/arm/mach-integrator/pci_v3.c              |  5 ++---
 arch/microblaze/pci/pci-common.c               |  9 ++++-----
 arch/mips/pci/fixup-lantiq.c                   |  5 ++---
 arch/mips/pci/pci-rt3883.c                     |  6 ++----
 arch/powerpc/kernel/pci-common.c               |  9 ++++-----
 arch/powerpc/platforms/cell/celleb_scc_pciex.c |  5 ++---
 arch/powerpc/platforms/cell/celleb_scc_sio.c   |  5 ++---
 arch/powerpc/platforms/cell/spider-pic.c       |  6 +++---
 arch/powerpc/platforms/cell/spu_manage.c       | 12 ++++++------
 arch/powerpc/platforms/fsl_uli1575.c           |  8 +++-----
 arch/powerpc/platforms/powermac/pic.c          |  8 ++++----
 arch/powerpc/platforms/pseries/event_sources.c |  7 +++----
 arch/powerpc/sysdev/mpic_msi.c                 |  6 +++---
 arch/x86/kernel/devicetree.c                   |  5 ++---
 drivers/of/irq.c                               | 15 +++++++--------
 drivers/of/of_pci_irq.c                        |  2 +-
 drivers/pci/host/pci-mvebu.c                   |  5 ++---
 include/linux/of_irq.h                         | 24 ++++--------------------
 include/linux/of_pci.h                         |  4 ++--
 19 files changed, 58 insertions(+), 88 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-integrator/pci_v3.c b/arch/arm/mach-integrator/pci_v3.c
index 0f496cc51f36..2d6b4da90fb4 100644
--- a/arch/arm/mach-integrator/pci_v3.c
+++ b/arch/arm/mach-integrator/pci_v3.c
@@ -837,7 +837,7 @@ static struct hw_pci pci_v3 __initdata = {
 
 static int __init pci_v3_map_irq_dt(const struct pci_dev *dev, u8 slot, u8 pin)
 {
-	struct of_irq oirq;
+	struct of_phandle_args oirq;
 	int ret;
 
 	ret = of_irq_parse_pci(dev, &oirq);
@@ -847,8 +847,7 @@ static int __init pci_v3_map_irq_dt(const struct pci_dev *dev, u8 slot, u8 pin)
 		return 0;
 	}
 
-	return irq_create_of_mapping(oirq.controller, oirq.specifier,
-				     oirq.size);
+	return irq_create_of_mapping(oirq.np, oirq.args, oirq.args_count);
 }
 
 static int __init pci_v3_dtprobe(struct platform_device *pdev,
diff --git a/arch/microblaze/pci/pci-common.c b/arch/microblaze/pci/pci-common.c
index 4d5b1a9cd790..c9302c4f8a57 100644
--- a/arch/microblaze/pci/pci-common.c
+++ b/arch/microblaze/pci/pci-common.c
@@ -199,7 +199,7 @@ void pcibios_set_master(struct pci_dev *dev)
  */
 int pci_read_irq_line(struct pci_dev *pci_dev)
 {
-	struct of_irq oirq;
+	struct of_phandle_args oirq;
 	unsigned int virq;
 
 	/* The current device-tree that iSeries generates from the HV
@@ -243,11 +243,10 @@ int pci_read_irq_line(struct pci_dev *pci_dev)
 			irq_set_irq_type(virq, IRQ_TYPE_LEVEL_LOW);
 	} else {
 		pr_debug(" Got one, spec %d cells (0x%08x 0x%08x...) on %s\n",
-			 oirq.size, oirq.specifier[0], oirq.specifier[1],
-			 of_node_full_name(oirq.controller));
+			 oirq.args_count, oirq.args[0], oirq.args[1],
+			 of_node_full_name(oirq.np));
 
-		virq = irq_create_of_mapping(oirq.controller, oirq.specifier,
-					     oirq.size);
+		virq = irq_create_of_mapping(oirq.np, oirq.args, oirq.args_count);
 	}
 	if (!virq) {
 		pr_debug(" Failed to map !\n");
diff --git a/arch/mips/pci/fixup-lantiq.c b/arch/mips/pci/fixup-lantiq.c
index 2e8dbfedae53..81ff0b5e6efa 100644
--- a/arch/mips/pci/fixup-lantiq.c
+++ b/arch/mips/pci/fixup-lantiq.c
@@ -25,7 +25,7 @@ int pcibios_plat_dev_init(struct pci_dev *dev)
 
 int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
-	struct of_irq dev_irq;
+	struct of_phandle_args dev_irq;
 	int irq;
 
 	if (of_irq_parse_pci(dev, &dev_irq)) {
@@ -33,8 +33,7 @@ int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 			slot, pin);
 		return 0;
 	}
-	irq = irq_create_of_mapping(dev_irq.controller, dev_irq.specifier,
-					dev_irq.size);
+	irq = irq_create_of_mapping(dev_irq.np, dev_irq.args, dev_irq.args_count);
 	dev_info(&dev->dev, "SLOT:%d PIN:%d IRQ:%d\n", slot, pin, irq);
 	return irq;
 }
diff --git a/arch/mips/pci/pci-rt3883.c b/arch/mips/pci/pci-rt3883.c
index cae92a05ee70..0f08a5ba0b2a 100644
--- a/arch/mips/pci/pci-rt3883.c
+++ b/arch/mips/pci/pci-rt3883.c
@@ -583,7 +583,7 @@ err_put_intc_node:
 
 int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
-	struct of_irq dev_irq;
+	struct of_phandle_args dev_irq;
 	int err;
 	int irq;
 
@@ -594,9 +594,7 @@ int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 		return 0;
 	}
 
-	irq = irq_create_of_mapping(dev_irq.controller,
-				    dev_irq.specifier,
-				    dev_irq.size);
+	irq = irq_create_of_mapping(dev_irq.np, dev_irq.args, dev_irq.args_count);
 
 	if (irq == 0)
 		pr_crit("pci %s: no irq found for pin %u\n",
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index 2f4185425ed5..96c46235fda2 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -228,7 +228,7 @@ int pcibios_add_platform_entries(struct pci_dev *pdev)
  */
 static int pci_read_irq_line(struct pci_dev *pci_dev)
 {
-	struct of_irq oirq;
+	struct of_phandle_args oirq;
 	unsigned int virq;
 
 	pr_debug("PCI: Try to map irq for %s...\n", pci_name(pci_dev));
@@ -263,11 +263,10 @@ static int pci_read_irq_line(struct pci_dev *pci_dev)
 			irq_set_irq_type(virq, IRQ_TYPE_LEVEL_LOW);
 	} else {
 		pr_debug(" Got one, spec %d cells (0x%08x 0x%08x...) on %s\n",
-			 oirq.size, oirq.specifier[0], oirq.specifier[1],
-			 of_node_full_name(oirq.controller));
+			 oirq.args_count, oirq.args[0], oirq.args[1],
+			 of_node_full_name(oirq.np));
 
-		virq = irq_create_of_mapping(oirq.controller, oirq.specifier,
-					     oirq.size);
+		virq = irq_create_of_mapping(oirq.np, oirq.args, oirq.args_count);
 	}
 	if(virq == NO_IRQ) {
 		pr_debug(" Failed to map !\n");
diff --git a/arch/powerpc/platforms/cell/celleb_scc_pciex.c b/arch/powerpc/platforms/cell/celleb_scc_pciex.c
index 40bc371096b7..e8d34d1f640d 100644
--- a/arch/powerpc/platforms/cell/celleb_scc_pciex.c
+++ b/arch/powerpc/platforms/cell/celleb_scc_pciex.c
@@ -486,7 +486,7 @@ static __init int celleb_setup_pciex(struct device_node *node,
 				     struct pci_controller *phb)
 {
 	struct resource	r;
-	struct of_irq oirq;
+	struct of_phandle_args oirq;
 	int virq;
 
 	/* SMMIO registers; used inside this file */
@@ -511,8 +511,7 @@ static __init int celleb_setup_pciex(struct device_node *node,
 		pr_err("PCIEXC:Failed to map irq\n");
 		goto error;
 	}
-	virq = irq_create_of_mapping(oirq.controller, oirq.specifier,
-				     oirq.size);
+	virq = irq_create_of_mapping(oirq.np, oirq.args, oirq.args_count);
 	if (request_irq(virq, pciex_handle_internal_irq,
 			0, "pciex", (void *)phb)) {
 		pr_err("PCIEXC:Failed to request irq\n");
diff --git a/arch/powerpc/platforms/cell/celleb_scc_sio.c b/arch/powerpc/platforms/cell/celleb_scc_sio.c
index 96388b202f4e..06046d512130 100644
--- a/arch/powerpc/platforms/cell/celleb_scc_sio.c
+++ b/arch/powerpc/platforms/cell/celleb_scc_sio.c
@@ -45,7 +45,7 @@ static int __init txx9_serial_init(void)
 	struct device_node *node;
 	int i;
 	struct uart_port req;
-	struct of_irq irq;
+	struct of_phandle_args irq;
 	struct resource res;
 
 	for_each_compatible_node(node, "serial", "toshiba,sio-scc") {
@@ -66,8 +66,7 @@ static int __init txx9_serial_init(void)
 #ifdef CONFIG_SERIAL_TXX9_CONSOLE
 			req.membase = ioremap(req.mapbase, 0x24);
 #endif
-			req.irq = irq_create_of_mapping(irq.controller,
-				irq.specifier, irq.size);
+			req.irq = irq_create_of_mapping(irq.np, irq.args, irq.args_count);
 			req.flags |= UPF_IOREMAP | UPF_BUGGY_UART
 				/*HAVE_CTS_LINE*/;
 			req.uartclk = 83300000;
diff --git a/arch/powerpc/platforms/cell/spider-pic.c b/arch/powerpc/platforms/cell/spider-pic.c
index b491f406560a..6e842fdbfcab 100644
--- a/arch/powerpc/platforms/cell/spider-pic.c
+++ b/arch/powerpc/platforms/cell/spider-pic.c
@@ -235,10 +235,10 @@ static unsigned int __init spider_find_cascade_and_node(struct spider_pic *pic)
 	/* First, we check whether we have a real "interrupts" in the device
 	 * tree in case the device-tree is ever fixed
 	 */
-	struct of_irq oirq;
+	struct of_phandle_args oirq;
 	if (of_irq_parse_one(pic->host->of_node, 0, &oirq) == 0) {
-		virq = irq_create_of_mapping(oirq.controller, oirq.specifier,
-					     oirq.size);
+		virq = irq_create_of_mapping(oirq.np, oirq.args,
+					     oirq.args_count);
 		return virq;
 	}
 
diff --git a/arch/powerpc/platforms/cell/spu_manage.c b/arch/powerpc/platforms/cell/spu_manage.c
index e6cdb81a0959..e9eb4f83b1d5 100644
--- a/arch/powerpc/platforms/cell/spu_manage.c
+++ b/arch/powerpc/platforms/cell/spu_manage.c
@@ -177,7 +177,7 @@ out:
 
 static int __init spu_map_interrupts(struct spu *spu, struct device_node *np)
 {
-	struct of_irq oirq;
+	struct of_phandle_args oirq;
 	int ret;
 	int i;
 
@@ -188,10 +188,10 @@ static int __init spu_map_interrupts(struct spu *spu, struct device_node *np)
 			goto err;
 		}
 		ret = -EINVAL;
-		pr_debug("  irq %d no 0x%x on %s\n", i, oirq.specifier[0],
-			 oirq.controller->full_name);
-		spu->irqs[i] = irq_create_of_mapping(oirq.controller,
-					oirq.specifier, oirq.size);
+		pr_debug("  irq %d no 0x%x on %s\n", i, oirq.args[0],
+			 oirq.np->full_name);
+		spu->irqs[i] = irq_create_of_mapping(oirq.np,
+					oirq.args, oirq.args_count);
 		if (spu->irqs[i] == NO_IRQ) {
 			pr_debug("spu_new: failed to map it !\n");
 			goto err;
@@ -200,7 +200,7 @@ static int __init spu_map_interrupts(struct spu *spu, struct device_node *np)
 	return 0;
 
 err:
-	pr_debug("failed to map irq %x for spu %s\n", *oirq.specifier,
+	pr_debug("failed to map irq %x for spu %s\n", *oirq.args,
 		spu->name);
 	for (; i >= 0; i--) {
 		if (spu->irqs[i] != NO_IRQ)
diff --git a/arch/powerpc/platforms/fsl_uli1575.c b/arch/powerpc/platforms/fsl_uli1575.c
index ac539c1cd808..288226deffa3 100644
--- a/arch/powerpc/platforms/fsl_uli1575.c
+++ b/arch/powerpc/platforms/fsl_uli1575.c
@@ -321,8 +321,8 @@ static void hpcd_final_uli5288(struct pci_dev *dev)
 {
 	struct pci_controller *hose = pci_bus_to_host(dev->bus);
 	struct device_node *hosenode = hose ? hose->dn : NULL;
-	struct of_irq oirq;
-	int virq, pin = 2;
+	struct of_phandle_args oirq;
+	int pin = 2;
 	u32 laddr[3];
 
 	if (!machine_is(mpc86xx_hpcd))
@@ -334,9 +334,7 @@ static void hpcd_final_uli5288(struct pci_dev *dev)
 	laddr[0] = (hose->first_busno << 16) | (PCI_DEVFN(31, 0) << 8);
 	laddr[1] = laddr[2] = 0;
 	of_irq_parse_raw(hosenode, &pin, 1, laddr, &oirq);
-	virq = irq_create_of_mapping(oirq.controller, oirq.specifier,
-				     oirq.size);
-	dev->irq = virq;
+	dev->irq = irq_create_of_mapping(oirq.np, oirq.args, oirq.args_count);
 }
 
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AL, 0x1575, hpcd_quirk_uli1575);
diff --git a/arch/powerpc/platforms/powermac/pic.c b/arch/powerpc/platforms/powermac/pic.c
index 720663e29f47..4c24bf60d39d 100644
--- a/arch/powerpc/platforms/powermac/pic.c
+++ b/arch/powerpc/platforms/powermac/pic.c
@@ -394,7 +394,7 @@ static void __init pmac_pic_probe_oldstyle(void)
 }
 
 int of_irq_parse_oldworld(struct device_node *device, int index,
-			struct of_irq *out_irq)
+			struct of_phandle_args *out_irq)
 {
 	const u32 *ints = NULL;
 	int intlen;
@@ -422,9 +422,9 @@ int of_irq_parse_oldworld(struct device_node *device, int index,
 	if (index >= intlen)
 		return -EINVAL;
 
-	out_irq->controller = NULL;
-	out_irq->specifier[0] = ints[index];
-	out_irq->size = 1;
+	out_irq->np = NULL;
+	out_irq->args[0] = ints[index];
+	out_irq->args_count = 1;
 
 	return 0;
 }
diff --git a/arch/powerpc/platforms/pseries/event_sources.c b/arch/powerpc/platforms/pseries/event_sources.c
index 850c18c457ad..6dcf9cc38ffb 100644
--- a/arch/powerpc/platforms/pseries/event_sources.c
+++ b/arch/powerpc/platforms/pseries/event_sources.c
@@ -25,7 +25,7 @@ void request_event_sources_irqs(struct device_node *np,
 				const char *name)
 {
 	int i, index, count = 0;
-	struct of_irq oirq;
+	struct of_phandle_args oirq;
 	const u32 *opicprop;
 	unsigned int opicplen;
 	unsigned int virqs[16];
@@ -59,9 +59,8 @@ void request_event_sources_irqs(struct device_node *np,
 		     index++) {
 			if (count > 15)
 				break;
-			virqs[count] = irq_create_of_mapping(oirq.controller,
-							    oirq.specifier,
-							    oirq.size);
+			virqs[count] = irq_create_of_mapping(oirq.np, oirq.args,
+							    oirq.args_count);
 			if (virqs[count] == NO_IRQ) {
 				pr_err("event-sources: Unable to allocate "
 				       "interrupt number for %s\n",
diff --git a/arch/powerpc/sysdev/mpic_msi.c b/arch/powerpc/sysdev/mpic_msi.c
index 463e3a7c193c..7dc39f35a4cc 100644
--- a/arch/powerpc/sysdev/mpic_msi.c
+++ b/arch/powerpc/sysdev/mpic_msi.c
@@ -35,7 +35,7 @@ static int mpic_msi_reserve_u3_hwirqs(struct mpic *mpic)
 	const struct irq_domain_ops *ops = mpic->irqhost->ops;
 	struct device_node *np;
 	int flags, index, i;
-	struct of_irq oirq;
+	struct of_phandle_args oirq;
 
 	pr_debug("mpic: found U3, guessing msi allocator setup\n");
 
@@ -64,8 +64,8 @@ static int mpic_msi_reserve_u3_hwirqs(struct mpic *mpic)
 
 		index = 0;
 		while (of_irq_parse_one(np, index++, &oirq) == 0) {
-			ops->xlate(mpic->irqhost, NULL, oirq.specifier,
-						oirq.size, &hwirq, &flags);
+			ops->xlate(mpic->irqhost, NULL, oirq.args,
+						oirq.args_count, &hwirq, &flags);
 			msi_bitmap_reserve_hwirq(&mpic->msi_bitmap, hwirq);
 		}
 	}
diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c
index 3ac6398e5361..00986988a10e 100644
--- a/arch/x86/kernel/devicetree.c
+++ b/arch/x86/kernel/devicetree.c
@@ -105,7 +105,7 @@ struct device_node *pcibios_get_phb_of_node(struct pci_bus *bus)
 
 static int x86_of_pci_irq_enable(struct pci_dev *dev)
 {
-	struct of_irq oirq;
+	struct of_phandle_args oirq;
 	u32 virq;
 	int ret;
 	u8 pin;
@@ -120,8 +120,7 @@ static int x86_of_pci_irq_enable(struct pci_dev *dev)
 	if (ret)
 		return ret;
 
-	virq = irq_create_of_mapping(oirq.controller, oirq.specifier,
-			oirq.size);
+	virq = irq_create_of_mapping(oirq.np, oirq.args, oirq.args_count);
 	if (virq == 0)
 		return -EINVAL;
 	dev->irq = virq;
diff --git a/drivers/of/irq.c b/drivers/of/irq.c
index 410aa2415f42..a7db38a63403 100644
--- a/drivers/of/irq.c
+++ b/drivers/of/irq.c
@@ -36,13 +36,12 @@
  */
 unsigned int irq_of_parse_and_map(struct device_node *dev, int index)
 {
-	struct of_irq oirq;
+	struct of_phandle_args oirq;
 
 	if (of_irq_parse_one(dev, index, &oirq))
 		return 0;
 
-	return irq_create_of_mapping(oirq.controller, oirq.specifier,
-				     oirq.size);
+	return irq_create_of_mapping(oirq.np, oirq.args, oirq.args_count);
 }
 EXPORT_SYMBOL_GPL(irq_of_parse_and_map);
 
@@ -94,7 +93,7 @@ struct device_node *of_irq_find_parent(struct device_node *child)
  * node exist for the parent.
  */
 int of_irq_parse_raw(struct device_node *parent, const __be32 *intspec,
-		   u32 ointsize, const __be32 *addr, struct of_irq *out_irq)
+		   u32 ointsize, const __be32 *addr, struct of_phandle_args *out_irq)
 {
 	struct device_node *ipar, *tnode, *old = NULL, *newpar = NULL;
 	const __be32 *tmp, *imap, *imask;
@@ -156,10 +155,10 @@ int of_irq_parse_raw(struct device_node *parent, const __be32 *intspec,
 				NULL) {
 			pr_debug(" -> got it !\n");
 			for (i = 0; i < intsize; i++)
-				out_irq->specifier[i] =
+				out_irq->args[i] =
 						of_read_number(intspec +i, 1);
-			out_irq->size = intsize;
-			out_irq->controller = ipar;
+			out_irq->args_count = intsize;
+			out_irq->np = ipar;
 			of_node_put(old);
 			return 0;
 		}
@@ -280,7 +279,7 @@ EXPORT_SYMBOL_GPL(of_irq_parse_raw);
  * This function resolves an interrupt, walking the tree, for a given
  * device-tree node. It's the high level pendant to of_irq_parse_raw().
  */
-int of_irq_parse_one(struct device_node *device, int index, struct of_irq *out_irq)
+int of_irq_parse_one(struct device_node *device, int index, struct of_phandle_args *out_irq)
 {
 	struct device_node *p;
 	const __be32 *intspec, *tmp, *addr;
diff --git a/drivers/of/of_pci_irq.c b/drivers/of/of_pci_irq.c
index dceec1048dab..ee3293d4b66b 100644
--- a/drivers/of/of_pci_irq.c
+++ b/drivers/of/of_pci_irq.c
@@ -15,7 +15,7 @@
  * PCI tree until an device-node is found, at which point it will finish
  * resolving using the OF tree walking.
  */
-int of_irq_parse_pci(const struct pci_dev *pdev, struct of_irq *out_irq)
+int of_irq_parse_pci(const struct pci_dev *pdev, struct of_phandle_args *out_irq)
 {
 	struct device_node *dn, *ppnode;
 	struct pci_dev *ppdev;
diff --git a/drivers/pci/host/pci-mvebu.c b/drivers/pci/host/pci-mvebu.c
index 05f81807d05b..c5e57f82b9af 100644
--- a/drivers/pci/host/pci-mvebu.c
+++ b/drivers/pci/host/pci-mvebu.c
@@ -647,15 +647,14 @@ static int __init mvebu_pcie_setup(int nr, struct pci_sys_data *sys)
 
 static int __init mvebu_pcie_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
-	struct of_irq oirq;
+	struct of_phandle_args oirq;
 	int ret;
 
 	ret = of_irq_parse_pci(dev, &oirq);
 	if (ret)
 		return ret;
 
-	return irq_create_of_mapping(oirq.controller, oirq.specifier,
-				     oirq.size);
+	return irq_create_of_mapping(oirq.np, oirq.args, oirq.args_count);
 }
 
 static struct pci_bus *mvebu_pcie_scan_bus(int nr, struct pci_sys_data *sys)
diff --git a/include/linux/of_irq.h b/include/linux/of_irq.h
index a00bc71e62a3..8d9f85560d48 100644
--- a/include/linux/of_irq.h
+++ b/include/linux/of_irq.h
@@ -8,22 +8,6 @@
 #include <linux/ioport.h>
 #include <linux/of.h>
 
-/**
- * of_irq - container for device_node/irq_specifier pair for an irq controller
- * @controller: pointer to interrupt controller device tree node
- * @size: size of interrupt specifier
- * @specifier: array of cells @size long specifing the specific interrupt
- *
- * This structure is returned when an interrupt is mapped. The controller
- * field needs to be put() after use
- */
-#define OF_MAX_IRQ_SPEC		4 /* We handle specifiers of at most 4 cells */
-struct of_irq {
-	struct device_node *controller; /* Interrupt controller node */
-	u32 size; /* Specifier size */
-	u32 specifier[OF_MAX_IRQ_SPEC]; /* Specifier copy */
-};
-
 typedef int (*of_irq_init_cb_t)(struct device_node *, struct device_node *);
 
 /*
@@ -36,12 +20,12 @@ typedef int (*of_irq_init_cb_t)(struct device_node *, struct device_node *);
 extern unsigned int of_irq_workarounds;
 extern struct device_node *of_irq_dflt_pic;
 extern int of_irq_parse_oldworld(struct device_node *device, int index,
-			       struct of_irq *out_irq);
+			       struct of_phandle_args *out_irq);
 #else /* CONFIG_PPC32 && CONFIG_PPC_PMAC */
 #define of_irq_workarounds (0)
 #define of_irq_dflt_pic (NULL)
 static inline int of_irq_parse_oldworld(struct device_node *device, int index,
-				      struct of_irq *out_irq)
+				      struct of_phandle_args *out_irq)
 {
 	return -EINVAL;
 }
@@ -50,9 +34,9 @@ static inline int of_irq_parse_oldworld(struct device_node *device, int index,
 
 extern int of_irq_parse_raw(struct device_node *parent, const __be32 *intspec,
 			  u32 ointsize, const __be32 *addr,
-			  struct of_irq *out_irq);
+			  struct of_phandle_args *out_irq);
 extern int of_irq_parse_one(struct device_node *device, int index,
-			  struct of_irq *out_irq);
+			  struct of_phandle_args *out_irq);
 extern unsigned int irq_create_of_mapping(struct device_node *controller,
 					  const u32 *intspec,
 					  unsigned int intsize);
diff --git a/include/linux/of_pci.h b/include/linux/of_pci.h
index 839ba20808fe..f297237349e8 100644
--- a/include/linux/of_pci.h
+++ b/include/linux/of_pci.h
@@ -5,8 +5,8 @@
 #include <linux/msi.h>
 
 struct pci_dev;
-struct of_irq;
-int of_irq_parse_pci(const struct pci_dev *pdev, struct of_irq *out_irq);
+struct of_phandle_args;
+int of_irq_parse_pci(const struct pci_dev *pdev, struct of_phandle_args *out_irq);
 
 struct device_node;
 struct device_node *of_pci_find_child_device(struct device_node *parent,
-- 
cgit v1.2.3


From e6d30ab1e7d1281784672c0fc2ffa385cfb7279e Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@linaro.org>
Date: Sun, 15 Sep 2013 16:55:53 +0100
Subject: of/irq: simplify args to irq_create_of_mapping

All the callers of irq_create_of_mapping() pass the contents of a struct
of_phandle_args structure to the function. Since all the callers already
have an of_phandle_args pointer, why not pass it directly to
irq_create_of_mapping()?

Signed-off-by: Grant Likely <grant.likely@linaro.org>
Acked-by: Michal Simek <monstr@monstr.eu>
Acked-by: Tony Lindgren <tony@atomide.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/arm/mach-integrator/pci_v3.c              |  2 +-
 arch/microblaze/pci/pci-common.c               |  2 +-
 arch/mips/pci/fixup-lantiq.c                   |  2 +-
 arch/mips/pci/pci-rt3883.c                     |  2 +-
 arch/powerpc/kernel/pci-common.c               |  2 +-
 arch/powerpc/platforms/cell/celleb_scc_pciex.c |  2 +-
 arch/powerpc/platforms/cell/celleb_scc_sio.c   |  2 +-
 arch/powerpc/platforms/cell/spider-pic.c       |  7 ++-----
 arch/powerpc/platforms/cell/spu_manage.c       |  3 +--
 arch/powerpc/platforms/fsl_uli1575.c           |  2 +-
 arch/powerpc/platforms/pseries/event_sources.c |  3 +--
 arch/x86/kernel/devicetree.c                   |  2 +-
 drivers/of/irq.c                               |  2 +-
 drivers/pci/host/pci-mvebu.c                   |  2 +-
 include/linux/of_irq.h                         |  4 +---
 kernel/irq/irqdomain.c                         | 13 ++++++-------
 16 files changed, 22 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-integrator/pci_v3.c b/arch/arm/mach-integrator/pci_v3.c
index 2d6b4da90fb4..bb3aeb31a54e 100644
--- a/arch/arm/mach-integrator/pci_v3.c
+++ b/arch/arm/mach-integrator/pci_v3.c
@@ -847,7 +847,7 @@ static int __init pci_v3_map_irq_dt(const struct pci_dev *dev, u8 slot, u8 pin)
 		return 0;
 	}
 
-	return irq_create_of_mapping(oirq.np, oirq.args, oirq.args_count);
+	return irq_create_of_mapping(&oirq);
 }
 
 static int __init pci_v3_dtprobe(struct platform_device *pdev,
diff --git a/arch/microblaze/pci/pci-common.c b/arch/microblaze/pci/pci-common.c
index c9302c4f8a57..60b386c72c57 100644
--- a/arch/microblaze/pci/pci-common.c
+++ b/arch/microblaze/pci/pci-common.c
@@ -246,7 +246,7 @@ int pci_read_irq_line(struct pci_dev *pci_dev)
 			 oirq.args_count, oirq.args[0], oirq.args[1],
 			 of_node_full_name(oirq.np));
 
-		virq = irq_create_of_mapping(oirq.np, oirq.args, oirq.args_count);
+		virq = irq_create_of_mapping(&oirq);
 	}
 	if (!virq) {
 		pr_debug(" Failed to map !\n");
diff --git a/arch/mips/pci/fixup-lantiq.c b/arch/mips/pci/fixup-lantiq.c
index 81ff0b5e6efa..aef60e75003e 100644
--- a/arch/mips/pci/fixup-lantiq.c
+++ b/arch/mips/pci/fixup-lantiq.c
@@ -33,7 +33,7 @@ int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 			slot, pin);
 		return 0;
 	}
-	irq = irq_create_of_mapping(dev_irq.np, dev_irq.args, dev_irq.args_count);
+	irq = irq_create_of_mapping(&dev_irq);
 	dev_info(&dev->dev, "SLOT:%d PIN:%d IRQ:%d\n", slot, pin, irq);
 	return irq;
 }
diff --git a/arch/mips/pci/pci-rt3883.c b/arch/mips/pci/pci-rt3883.c
index 0f08a5ba0b2a..eadc4310cd36 100644
--- a/arch/mips/pci/pci-rt3883.c
+++ b/arch/mips/pci/pci-rt3883.c
@@ -594,7 +594,7 @@ int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 		return 0;
 	}
 
-	irq = irq_create_of_mapping(dev_irq.np, dev_irq.args, dev_irq.args_count);
+	irq = irq_create_of_mapping(&dev_irq);
 
 	if (irq == 0)
 		pr_crit("pci %s: no irq found for pin %u\n",
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index 96c46235fda2..a1e3e40ca3fd 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -266,7 +266,7 @@ static int pci_read_irq_line(struct pci_dev *pci_dev)
 			 oirq.args_count, oirq.args[0], oirq.args[1],
 			 of_node_full_name(oirq.np));
 
-		virq = irq_create_of_mapping(oirq.np, oirq.args, oirq.args_count);
+		virq = irq_create_of_mapping(&oirq);
 	}
 	if(virq == NO_IRQ) {
 		pr_debug(" Failed to map !\n");
diff --git a/arch/powerpc/platforms/cell/celleb_scc_pciex.c b/arch/powerpc/platforms/cell/celleb_scc_pciex.c
index e8d34d1f640d..b3ea96db5b06 100644
--- a/arch/powerpc/platforms/cell/celleb_scc_pciex.c
+++ b/arch/powerpc/platforms/cell/celleb_scc_pciex.c
@@ -511,7 +511,7 @@ static __init int celleb_setup_pciex(struct device_node *node,
 		pr_err("PCIEXC:Failed to map irq\n");
 		goto error;
 	}
-	virq = irq_create_of_mapping(oirq.np, oirq.args, oirq.args_count);
+	virq = irq_create_of_mapping(&oirq);
 	if (request_irq(virq, pciex_handle_internal_irq,
 			0, "pciex", (void *)phb)) {
 		pr_err("PCIEXC:Failed to request irq\n");
diff --git a/arch/powerpc/platforms/cell/celleb_scc_sio.c b/arch/powerpc/platforms/cell/celleb_scc_sio.c
index 06046d512130..c8eb57193826 100644
--- a/arch/powerpc/platforms/cell/celleb_scc_sio.c
+++ b/arch/powerpc/platforms/cell/celleb_scc_sio.c
@@ -66,7 +66,7 @@ static int __init txx9_serial_init(void)
 #ifdef CONFIG_SERIAL_TXX9_CONSOLE
 			req.membase = ioremap(req.mapbase, 0x24);
 #endif
-			req.irq = irq_create_of_mapping(irq.np, irq.args, irq.args_count);
+			req.irq = irq_create_of_mapping(&irq);
 			req.flags |= UPF_IOREMAP | UPF_BUGGY_UART
 				/*HAVE_CTS_LINE*/;
 			req.uartclk = 83300000;
diff --git a/arch/powerpc/platforms/cell/spider-pic.c b/arch/powerpc/platforms/cell/spider-pic.c
index 6e842fdbfcab..d20680446174 100644
--- a/arch/powerpc/platforms/cell/spider-pic.c
+++ b/arch/powerpc/platforms/cell/spider-pic.c
@@ -236,11 +236,8 @@ static unsigned int __init spider_find_cascade_and_node(struct spider_pic *pic)
 	 * tree in case the device-tree is ever fixed
 	 */
 	struct of_phandle_args oirq;
-	if (of_irq_parse_one(pic->host->of_node, 0, &oirq) == 0) {
-		virq = irq_create_of_mapping(oirq.np, oirq.args,
-					     oirq.args_count);
-		return virq;
-	}
+	if (of_irq_parse_one(pic->host->of_node, 0, &oirq) == 0)
+		return irq_create_of_mapping(&oirq);
 
 	/* Now do the horrible hacks */
 	tmp = of_get_property(pic->host->of_node, "#interrupt-cells", NULL);
diff --git a/arch/powerpc/platforms/cell/spu_manage.c b/arch/powerpc/platforms/cell/spu_manage.c
index e9eb4f83b1d5..c3327f3d8cf7 100644
--- a/arch/powerpc/platforms/cell/spu_manage.c
+++ b/arch/powerpc/platforms/cell/spu_manage.c
@@ -190,8 +190,7 @@ static int __init spu_map_interrupts(struct spu *spu, struct device_node *np)
 		ret = -EINVAL;
 		pr_debug("  irq %d no 0x%x on %s\n", i, oirq.args[0],
 			 oirq.np->full_name);
-		spu->irqs[i] = irq_create_of_mapping(oirq.np,
-					oirq.args, oirq.args_count);
+		spu->irqs[i] = irq_create_of_mapping(&oirq);
 		if (spu->irqs[i] == NO_IRQ) {
 			pr_debug("spu_new: failed to map it !\n");
 			goto err;
diff --git a/arch/powerpc/platforms/fsl_uli1575.c b/arch/powerpc/platforms/fsl_uli1575.c
index 288226deffa3..8904046556ad 100644
--- a/arch/powerpc/platforms/fsl_uli1575.c
+++ b/arch/powerpc/platforms/fsl_uli1575.c
@@ -334,7 +334,7 @@ static void hpcd_final_uli5288(struct pci_dev *dev)
 	laddr[0] = (hose->first_busno << 16) | (PCI_DEVFN(31, 0) << 8);
 	laddr[1] = laddr[2] = 0;
 	of_irq_parse_raw(hosenode, &pin, 1, laddr, &oirq);
-	dev->irq = irq_create_of_mapping(oirq.np, oirq.args, oirq.args_count);
+	dev->irq = irq_create_of_mapping(&oirq);
 }
 
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AL, 0x1575, hpcd_quirk_uli1575);
diff --git a/arch/powerpc/platforms/pseries/event_sources.c b/arch/powerpc/platforms/pseries/event_sources.c
index 6dcf9cc38ffb..18380e8f6dfe 100644
--- a/arch/powerpc/platforms/pseries/event_sources.c
+++ b/arch/powerpc/platforms/pseries/event_sources.c
@@ -59,8 +59,7 @@ void request_event_sources_irqs(struct device_node *np,
 		     index++) {
 			if (count > 15)
 				break;
-			virqs[count] = irq_create_of_mapping(oirq.np, oirq.args,
-							    oirq.args_count);
+			virqs[count] = irq_create_of_mapping(&oirq);
 			if (virqs[count] == NO_IRQ) {
 				pr_err("event-sources: Unable to allocate "
 				       "interrupt number for %s\n",
diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c
index 00986988a10e..d39948f654a0 100644
--- a/arch/x86/kernel/devicetree.c
+++ b/arch/x86/kernel/devicetree.c
@@ -120,7 +120,7 @@ static int x86_of_pci_irq_enable(struct pci_dev *dev)
 	if (ret)
 		return ret;
 
-	virq = irq_create_of_mapping(oirq.np, oirq.args, oirq.args_count);
+	virq = irq_create_of_mapping(&oirq);
 	if (virq == 0)
 		return -EINVAL;
 	dev->irq = virq;
diff --git a/drivers/of/irq.c b/drivers/of/irq.c
index a7db38a63403..84184c44e8db 100644
--- a/drivers/of/irq.c
+++ b/drivers/of/irq.c
@@ -41,7 +41,7 @@ unsigned int irq_of_parse_and_map(struct device_node *dev, int index)
 	if (of_irq_parse_one(dev, index, &oirq))
 		return 0;
 
-	return irq_create_of_mapping(oirq.np, oirq.args, oirq.args_count);
+	return irq_create_of_mapping(&oirq);
 }
 EXPORT_SYMBOL_GPL(irq_of_parse_and_map);
 
diff --git a/drivers/pci/host/pci-mvebu.c b/drivers/pci/host/pci-mvebu.c
index c5e57f82b9af..3a8d01ec50f7 100644
--- a/drivers/pci/host/pci-mvebu.c
+++ b/drivers/pci/host/pci-mvebu.c
@@ -654,7 +654,7 @@ static int __init mvebu_pcie_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 	if (ret)
 		return ret;
 
-	return irq_create_of_mapping(oirq.np, oirq.args, oirq.args_count);
+	return irq_create_of_mapping(&oirq);
 }
 
 static struct pci_bus *mvebu_pcie_scan_bus(int nr, struct pci_sys_data *sys)
diff --git a/include/linux/of_irq.h b/include/linux/of_irq.h
index 8d9f85560d48..3bbba8d6adc8 100644
--- a/include/linux/of_irq.h
+++ b/include/linux/of_irq.h
@@ -37,9 +37,7 @@ extern int of_irq_parse_raw(struct device_node *parent, const __be32 *intspec,
 			  struct of_phandle_args *out_irq);
 extern int of_irq_parse_one(struct device_node *device, int index,
 			  struct of_phandle_args *out_irq);
-extern unsigned int irq_create_of_mapping(struct device_node *controller,
-					  const u32 *intspec,
-					  unsigned int intsize);
+extern unsigned int irq_create_of_mapping(struct of_phandle_args *irq_data);
 extern int of_irq_to_resource(struct device_node *dev, int index,
 			      struct resource *r);
 extern int of_irq_count(struct device_node *dev);
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 706724e9835d..cf68bb36fe58 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -465,27 +465,26 @@ int irq_create_strict_mappings(struct irq_domain *domain, unsigned int irq_base,
 }
 EXPORT_SYMBOL_GPL(irq_create_strict_mappings);
 
-unsigned int irq_create_of_mapping(struct device_node *controller,
-				   const u32 *intspec, unsigned int intsize)
+unsigned int irq_create_of_mapping(struct of_phandle_args *irq_data)
 {
 	struct irq_domain *domain;
 	irq_hw_number_t hwirq;
 	unsigned int type = IRQ_TYPE_NONE;
 	unsigned int virq;
 
-	domain = controller ? irq_find_host(controller) : irq_default_domain;
+	domain = irq_data->np ? irq_find_host(irq_data->np) : irq_default_domain;
 	if (!domain) {
 		pr_warn("no irq domain found for %s !\n",
-			of_node_full_name(controller));
+			of_node_full_name(irq_data->np));
 		return 0;
 	}
 
 	/* If domain has no translation, then we assume interrupt line */
 	if (domain->ops->xlate == NULL)
-		hwirq = intspec[0];
+		hwirq = irq_data->args[0];
 	else {
-		if (domain->ops->xlate(domain, controller, intspec, intsize,
-				     &hwirq, &type))
+		if (domain->ops->xlate(domain, irq_data->np, irq_data->args,
+					irq_data->args_count, &hwirq, &type))
 			return 0;
 	}
 
-- 
cgit v1.2.3


From 2361613206e66ce59cc0e08efa8d98ec15b84ed1 Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@linaro.org>
Date: Sun, 15 Sep 2013 22:32:39 +0100
Subject: of/irq: Refactor interrupt-map parsing

All the users of of_irq_parse_raw pass in a raw interrupt specifier from
the device tree and expect it to be returned (possibly modified) in an
of_phandle_args structure. However, the primary function of
of_irq_parse_raw() is to check for translations due to the presence of
one or more interrupt-map properties. The actual placing of the data
into an of_phandle_args structure is trivial. If it is refactored to
accept an of_phandle_args structure directly, then it becomes possible
to consume of_phandle_args from other sources. This is important for an
upcoming patch that allows a device to be connected to more than one
interrupt parent. It also simplifies the code a bit.

The biggest complication with this patch is that the old version works
on the interrupt specifiers in __be32 form, but the of_phandle_args
structure is intended to carry it in the cpu-native version. A bit of
churn was required to make this work. In the end it results in tighter
code, so the churn is worth it.

Signed-off-by: Grant Likely <grant.likely@linaro.org>
Acked-by: Tony Lindgren <tony@atomide.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/platforms/fsl_uli1575.c |   6 +-
 drivers/of/irq.c                     | 108 ++++++++++++++++++-----------------
 drivers/of/of_pci_irq.c              |   7 ++-
 include/linux/of_irq.h               |   5 +-
 4 files changed, 67 insertions(+), 59 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/platforms/fsl_uli1575.c b/arch/powerpc/platforms/fsl_uli1575.c
index 8904046556ad..b97f6f3d3c5b 100644
--- a/arch/powerpc/platforms/fsl_uli1575.c
+++ b/arch/powerpc/platforms/fsl_uli1575.c
@@ -322,7 +322,6 @@ static void hpcd_final_uli5288(struct pci_dev *dev)
 	struct pci_controller *hose = pci_bus_to_host(dev->bus);
 	struct device_node *hosenode = hose ? hose->dn : NULL;
 	struct of_phandle_args oirq;
-	int pin = 2;
 	u32 laddr[3];
 
 	if (!machine_is(mpc86xx_hpcd))
@@ -331,9 +330,12 @@ static void hpcd_final_uli5288(struct pci_dev *dev)
 	if (!hosenode)
 		return;
 
+	oirq.np = hosenode;
+	oirq.args[0] = 2;
+	oirq.args_count = 1;
 	laddr[0] = (hose->first_busno << 16) | (PCI_DEVFN(31, 0) << 8);
 	laddr[1] = laddr[2] = 0;
-	of_irq_parse_raw(hosenode, &pin, 1, laddr, &oirq);
+	of_irq_parse_raw(laddr, &oirq);
 	dev->irq = irq_create_of_mapping(&oirq);
 }
 
diff --git a/drivers/of/irq.c b/drivers/of/irq.c
index 84184c44e8db..0ed5ed4a5f9b 100644
--- a/drivers/of/irq.c
+++ b/drivers/of/irq.c
@@ -80,31 +80,32 @@ struct device_node *of_irq_find_parent(struct device_node *child)
 /**
  * of_irq_parse_raw - Low level interrupt tree parsing
  * @parent:	the device interrupt parent
- * @intspec:	interrupt specifier ("interrupts" property of the device)
- * @ointsize:   size of the passed in interrupt specifier
- * @addr:	address specifier (start of "reg" property of the device)
- * @out_irq:	structure of_irq filled by this function
+ * @addr:	address specifier (start of "reg" property of the device) in be32 format
+ * @out_irq:	structure of_irq updated by this function
  *
  * Returns 0 on success and a negative number on error
  *
  * This function is a low-level interrupt tree walking function. It
  * can be used to do a partial walk with synthetized reg and interrupts
  * properties, for example when resolving PCI interrupts when no device
- * node exist for the parent.
+ * node exist for the parent. It takes an interrupt specifier structure as
+ * input, walks the tree looking for any interrupt-map properties, translates
+ * the specifier for each map, and then returns the translated map.
  */
-int of_irq_parse_raw(struct device_node *parent, const __be32 *intspec,
-		   u32 ointsize, const __be32 *addr, struct of_phandle_args *out_irq)
+int of_irq_parse_raw(const __be32 *addr, struct of_phandle_args *out_irq)
 {
 	struct device_node *ipar, *tnode, *old = NULL, *newpar = NULL;
-	const __be32 *tmp, *imap, *imask;
+	__be32 initial_match_array[8];
+	const __be32 *match_array = initial_match_array;
+	const __be32 *tmp, *imap, *imask, dummy_imask[] = { ~0, ~0, ~0, ~0, ~0 };
 	u32 intsize = 1, addrsize, newintsize = 0, newaddrsize = 0;
 	int imaplen, match, i;
 
 	pr_debug("of_irq_parse_raw: par=%s,intspec=[0x%08x 0x%08x...],ointsize=%d\n",
-		 of_node_full_name(parent), be32_to_cpup(intspec),
-		 be32_to_cpup(intspec + 1), ointsize);
+		 of_node_full_name(out_irq->np), out_irq->args[0], out_irq->args[1],
+		 out_irq->args_count);
 
-	ipar = of_node_get(parent);
+	ipar = of_node_get(out_irq->np);
 
 	/* First get the #interrupt-cells property of the current cursor
 	 * that tells us how to interpret the passed-in intspec. If there
@@ -127,7 +128,7 @@ int of_irq_parse_raw(struct device_node *parent, const __be32 *intspec,
 
 	pr_debug("of_irq_parse_raw: ipar=%s, size=%d\n", of_node_full_name(ipar), intsize);
 
-	if (ointsize != intsize)
+	if (out_irq->args_count != intsize)
 		return -EINVAL;
 
 	/* Look for this #address-cells. We have to implement the old linux
@@ -146,6 +147,21 @@ int of_irq_parse_raw(struct device_node *parent, const __be32 *intspec,
 
 	pr_debug(" -> addrsize=%d\n", addrsize);
 
+	/* If we were passed no "reg" property and we attempt to parse
+	 * an interrupt-map, then #address-cells must be 0.
+	 * Fail if it's not.
+	 */
+	if (addr == NULL && addrsize != 0) {
+		pr_debug(" -> no reg passed in when needed !\n");
+		return -EINVAL;
+	}
+
+	/* Precalculate the match array - this simplifies match loop */
+	for (i = 0; i < addrsize; i++)
+		initial_match_array[i] = addr[i];
+	for (i = 0; i < intsize; i++)
+		initial_match_array[addrsize + i] = cpu_to_be32(out_irq->args[i]);
+
 	/* Now start the actual "proper" walk of the interrupt tree */
 	while (ipar != NULL) {
 		/* Now check if cursor is an interrupt-controller and if it is
@@ -154,11 +170,6 @@ int of_irq_parse_raw(struct device_node *parent, const __be32 *intspec,
 		if (of_get_property(ipar, "interrupt-controller", NULL) !=
 				NULL) {
 			pr_debug(" -> got it !\n");
-			for (i = 0; i < intsize; i++)
-				out_irq->args[i] =
-						of_read_number(intspec +i, 1);
-			out_irq->args_count = intsize;
-			out_irq->np = ipar;
 			of_node_put(old);
 			return 0;
 		}
@@ -175,34 +186,16 @@ int of_irq_parse_raw(struct device_node *parent, const __be32 *intspec,
 
 		/* Look for a mask */
 		imask = of_get_property(ipar, "interrupt-map-mask", NULL);
-
-		/* If we were passed no "reg" property and we attempt to parse
-		 * an interrupt-map, then #address-cells must be 0.
-		 * Fail if it's not.
-		 */
-		if (addr == NULL && addrsize != 0) {
-			pr_debug(" -> no reg passed in when needed !\n");
-			goto fail;
-		}
+		if (!imask)
+			imask = dummy_imask;
 
 		/* Parse interrupt-map */
 		match = 0;
 		while (imaplen > (addrsize + intsize + 1) && !match) {
 			/* Compare specifiers */
 			match = 1;
-			for (i = 0; i < addrsize && match; ++i) {
-				__be32 mask = imask ? imask[i]
-						    : cpu_to_be32(0xffffffffu);
-				match = ((addr[i] ^ imap[i]) & mask) == 0;
-			}
-			for (; i < (addrsize + intsize) && match; ++i) {
-				__be32 mask = imask ? imask[i]
-						    : cpu_to_be32(0xffffffffu);
-				match =
-				   ((intspec[i-addrsize] ^ imap[i]) & mask) == 0;
-			}
-			imap += addrsize + intsize;
-			imaplen -= addrsize + intsize;
+			for (i = 0; i < (addrsize + intsize); i++, imaplen--)
+				match = !((match_array[i] ^ *imap++) & imask[i]);
 
 			pr_debug(" -> match=%d (imaplen=%d)\n", match, imaplen);
 
@@ -247,12 +240,18 @@ int of_irq_parse_raw(struct device_node *parent, const __be32 *intspec,
 		if (!match)
 			goto fail;
 
-		of_node_put(old);
-		old = of_node_get(newpar);
+		/*
+		 * Successfully parsed an interrrupt-map translation; copy new
+		 * interrupt specifier into the out_irq structure
+		 */
+		of_node_put(out_irq->np);
+		out_irq->np = of_node_get(newpar);
+
+		match_array = imap - newaddrsize - newintsize;
+		for (i = 0; i < newintsize; i++)
+			out_irq->args[i] = be32_to_cpup(imap - newintsize + i);
+		out_irq->args_count = intsize = newintsize;
 		addrsize = newaddrsize;
-		intsize = newintsize;
-		intspec = imap - intsize;
-		addr = intspec - addrsize;
 
 	skiplevel:
 		/* Iterate again with new parent */
@@ -263,7 +262,7 @@ int of_irq_parse_raw(struct device_node *parent, const __be32 *intspec,
 	}
  fail:
 	of_node_put(ipar);
-	of_node_put(old);
+	of_node_put(out_irq->np);
 	of_node_put(newpar);
 
 	return -EINVAL;
@@ -276,15 +275,16 @@ EXPORT_SYMBOL_GPL(of_irq_parse_raw);
  * @index: index of the interrupt to resolve
  * @out_irq: structure of_irq filled by this function
  *
- * This function resolves an interrupt, walking the tree, for a given
- * device-tree node. It's the high level pendant to of_irq_parse_raw().
+ * This function resolves an interrupt for a node by walking the interrupt tree,
+ * finding which interrupt controller node it is attached to, and returning the
+ * interrupt specifier that can be used to retrieve a Linux IRQ number.
  */
 int of_irq_parse_one(struct device_node *device, int index, struct of_phandle_args *out_irq)
 {
 	struct device_node *p;
 	const __be32 *intspec, *tmp, *addr;
 	u32 intsize, intlen;
-	int res = -EINVAL;
+	int i, res = -EINVAL;
 
 	pr_debug("of_irq_parse_one: dev=%s, index=%d\n", of_node_full_name(device), index);
 
@@ -320,9 +320,15 @@ int of_irq_parse_one(struct device_node *device, int index, struct of_phandle_ar
 	if ((index + 1) * intsize > intlen)
 		goto out;
 
-	/* Get new specifier and map it */
-	res = of_irq_parse_raw(p, intspec + index * intsize, intsize,
-			     addr, out_irq);
+	/* Copy intspec into irq structure */
+	intspec += index * intsize;
+	out_irq->np = p;
+	out_irq->args_count = intsize;
+	for (i = 0; i < intsize; i++)
+		out_irq->args[i] = be32_to_cpup(intspec++);
+
+	/* Check if there are any interrupt-map translations to process */
+	res = of_irq_parse_raw(addr, out_irq);
  out:
 	of_node_put(p);
 	return res;
diff --git a/drivers/of/of_pci_irq.c b/drivers/of/of_pci_irq.c
index ee3293d4b66b..303afebc247a 100644
--- a/drivers/of/of_pci_irq.c
+++ b/drivers/of/of_pci_irq.c
@@ -85,9 +85,12 @@ int of_irq_parse_pci(const struct pci_dev *pdev, struct of_phandle_args *out_irq
 		pdev = ppdev;
 	}
 
+	out_irq->np = ppnode;
+	out_irq->args_count = 1;
+	out_irq->args[0] = lspec;
 	lspec_be = cpu_to_be32(lspec);
 	laddr[0] = cpu_to_be32((pdev->bus->number << 16) | (pdev->devfn << 8));
-	laddr[1]  = laddr[2] = cpu_to_be32(0);
-	return of_irq_parse_raw(ppnode, &lspec_be, 1, laddr, out_irq);
+	laddr[1] = laddr[2] = cpu_to_be32(0);
+	return of_irq_parse_raw(laddr, out_irq);
 }
 EXPORT_SYMBOL_GPL(of_irq_parse_pci);
diff --git a/include/linux/of_irq.h b/include/linux/of_irq.h
index 3bbba8d6adc8..c0d6dfe80895 100644
--- a/include/linux/of_irq.h
+++ b/include/linux/of_irq.h
@@ -31,10 +31,7 @@ static inline int of_irq_parse_oldworld(struct device_node *device, int index,
 }
 #endif /* CONFIG_PPC32 && CONFIG_PPC_PMAC */
 
-
-extern int of_irq_parse_raw(struct device_node *parent, const __be32 *intspec,
-			  u32 ointsize, const __be32 *addr,
-			  struct of_phandle_args *out_irq);
+extern int of_irq_parse_raw(const __be32 *addr, struct of_phandle_args *out_irq);
 extern int of_irq_parse_one(struct device_node *device, int index,
 			  struct of_phandle_args *out_irq);
 extern unsigned int irq_create_of_mapping(struct of_phandle_args *irq_data);
-- 
cgit v1.2.3


From 624cfca534f9b1ffb1326617b4e973a3d5ecff4a Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@linaro.org>
Date: Fri, 11 Oct 2013 22:05:10 +0100
Subject: of: Add helper for printing an of_phandle_args structure

It is sometimes useful for debug to get the contents of an
of_phandle_args structure out into the kernel log.

Signed-off-by: Grant Likely <grant.likely@linaro.org>
---
 drivers/of/base.c  | 9 +++++++++
 drivers/of/irq.c   | 6 +++---
 include/linux/of.h | 1 +
 3 files changed, 13 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/of/base.c b/drivers/of/base.c
index 3ae106d89791..021db96245e7 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -1185,6 +1185,15 @@ int of_property_count_strings(struct device_node *np, const char *propname)
 }
 EXPORT_SYMBOL_GPL(of_property_count_strings);
 
+void of_print_phandle_args(const char *msg, const struct of_phandle_args *args)
+{
+	int i;
+	printk("%s %s", msg, of_node_full_name(args->np));
+	for (i = 0; i < args->args_count; i++)
+		printk(i ? ",%08x" : ":%08x", args->args[i]);
+	printk("\n");
+}
+
 static int __of_parse_phandle_with_args(const struct device_node *np,
 					const char *list_name,
 					const char *cells_name,
diff --git a/drivers/of/irq.c b/drivers/of/irq.c
index 0ed5ed4a5f9b..ddea945ec29e 100644
--- a/drivers/of/irq.c
+++ b/drivers/of/irq.c
@@ -101,9 +101,9 @@ int of_irq_parse_raw(const __be32 *addr, struct of_phandle_args *out_irq)
 	u32 intsize = 1, addrsize, newintsize = 0, newaddrsize = 0;
 	int imaplen, match, i;
 
-	pr_debug("of_irq_parse_raw: par=%s,intspec=[0x%08x 0x%08x...],ointsize=%d\n",
-		 of_node_full_name(out_irq->np), out_irq->args[0], out_irq->args[1],
-		 out_irq->args_count);
+#ifdef DEBUG
+	of_print_phandle_args("of_irq_parse_raw: ", out_irq);
+#endif
 
 	ipar = of_node_get(out_irq->np);
 
diff --git a/include/linux/of.h b/include/linux/of.h
index f95aee391e30..374e03536135 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -275,6 +275,7 @@ extern int of_n_size_cells(struct device_node *np);
 extern const struct of_device_id *of_match_node(
 	const struct of_device_id *matches, const struct device_node *node);
 extern int of_modalias_node(struct device_node *node, char *modalias, int len);
+extern void of_print_phandle_args(const char *msg, const struct of_phandle_args *args);
 extern struct device_node *of_parse_phandle(const struct device_node *np,
 					    const char *phandle_name,
 					    int index);
-- 
cgit v1.2.3


From 16b84e5a505c790538e534ad8dfda9c288691e40 Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@linaro.org>
Date: Thu, 19 Sep 2013 16:44:55 -0500
Subject: of/irq: Create of_irq_parse_and_map_pci() to consolidate arch code.

Several architectures open code effectively the same code block for
finding and mapping PCI irqs. This patch consolidates it down to a
single function.

Signed-off-by: Grant Likely <grant.likely@linaro.org>
Acked-by: Michal Simek <monstr@monstr.eu>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/arm/mach-integrator/pci_v3.c | 17 +----------------
 arch/mips/pci/fixup-lantiq.c      | 12 +-----------
 arch/mips/pci/pci-rt3883.c        | 22 +---------------------
 arch/x86/kernel/devicetree.c      |  7 +------
 drivers/of/of_pci_irq.c           | 25 +++++++++++++++++++++++++
 drivers/pci/host/pci-mvebu.c      | 14 +-------------
 include/linux/of_pci.h            |  1 +
 7 files changed, 31 insertions(+), 67 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-integrator/pci_v3.c b/arch/arm/mach-integrator/pci_v3.c
index bb3aeb31a54e..a87e510fe0c4 100644
--- a/arch/arm/mach-integrator/pci_v3.c
+++ b/arch/arm/mach-integrator/pci_v3.c
@@ -835,21 +835,6 @@ static struct hw_pci pci_v3 __initdata = {
 
 #ifdef CONFIG_OF
 
-static int __init pci_v3_map_irq_dt(const struct pci_dev *dev, u8 slot, u8 pin)
-{
-	struct of_phandle_args oirq;
-	int ret;
-
-	ret = of_irq_parse_pci(dev, &oirq);
-	if (ret) {
-		dev_err(&dev->dev, "of_irq_parse_pci() %d\n", ret);
-		/* Proper return code 0 == NO_IRQ */
-		return 0;
-	}
-
-	return irq_create_of_mapping(&oirq);
-}
-
 static int __init pci_v3_dtprobe(struct platform_device *pdev,
 				struct device_node *np)
 {
@@ -918,7 +903,7 @@ static int __init pci_v3_dtprobe(struct platform_device *pdev,
 		return -EINVAL;
 	}
 
-	pci_v3.map_irq = pci_v3_map_irq_dt;
+	pci_v3.map_irq = of_irq_parse_and_map_pci;
 	pci_common_init_dev(&pdev->dev, &pci_v3);
 
 	return 0;
diff --git a/arch/mips/pci/fixup-lantiq.c b/arch/mips/pci/fixup-lantiq.c
index aef60e75003e..c2ce41ea61d7 100644
--- a/arch/mips/pci/fixup-lantiq.c
+++ b/arch/mips/pci/fixup-lantiq.c
@@ -25,15 +25,5 @@ int pcibios_plat_dev_init(struct pci_dev *dev)
 
 int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
-	struct of_phandle_args dev_irq;
-	int irq;
-
-	if (of_irq_parse_pci(dev, &dev_irq)) {
-		dev_err(&dev->dev, "trying to map irq for unknown slot:%d pin:%d\n",
-			slot, pin);
-		return 0;
-	}
-	irq = irq_create_of_mapping(&dev_irq);
-	dev_info(&dev->dev, "SLOT:%d PIN:%d IRQ:%d\n", slot, pin, irq);
-	return irq;
+	return of_irq_parse_and_map_pci(dev, slot, pin);
 }
diff --git a/arch/mips/pci/pci-rt3883.c b/arch/mips/pci/pci-rt3883.c
index eadc4310cd36..adeff2bfe4cd 100644
--- a/arch/mips/pci/pci-rt3883.c
+++ b/arch/mips/pci/pci-rt3883.c
@@ -583,27 +583,7 @@ err_put_intc_node:
 
 int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
-	struct of_phandle_args dev_irq;
-	int err;
-	int irq;
-
-	err = of_irq_parse_pci(dev, &dev_irq);
-	if (err) {
-		pr_err("pci %s: unable to get irq map, err=%d\n",
-		       pci_name((struct pci_dev *) dev), err);
-		return 0;
-	}
-
-	irq = irq_create_of_mapping(&dev_irq);
-
-	if (irq == 0)
-		pr_crit("pci %s: no irq found for pin %u\n",
-			pci_name((struct pci_dev *) dev), pin);
-	else
-		pr_info("pci %s: using irq %d for pin %u\n",
-			pci_name((struct pci_dev *) dev), irq, pin);
-
-	return irq;
+	return of_irq_parse_and_map_pci(dev, slot, pin);
 }
 
 int pcibios_plat_dev_init(struct pci_dev *dev)
diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c
index d39948f654a0..69c826a3d07e 100644
--- a/arch/x86/kernel/devicetree.c
+++ b/arch/x86/kernel/devicetree.c
@@ -105,7 +105,6 @@ struct device_node *pcibios_get_phb_of_node(struct pci_bus *bus)
 
 static int x86_of_pci_irq_enable(struct pci_dev *dev)
 {
-	struct of_phandle_args oirq;
 	u32 virq;
 	int ret;
 	u8 pin;
@@ -116,11 +115,7 @@ static int x86_of_pci_irq_enable(struct pci_dev *dev)
 	if (!pin)
 		return 0;
 
-	ret = of_irq_parse_pci(dev, &oirq);
-	if (ret)
-		return ret;
-
-	virq = irq_create_of_mapping(&oirq);
+	virq = of_irq_parse_and_map_pci(dev, 0, 0);
 	if (virq == 0)
 		return -EINVAL;
 	dev->irq = virq;
diff --git a/drivers/of/of_pci_irq.c b/drivers/of/of_pci_irq.c
index 303afebc247a..8e92acd8253f 100644
--- a/drivers/of/of_pci_irq.c
+++ b/drivers/of/of_pci_irq.c
@@ -94,3 +94,28 @@ int of_irq_parse_pci(const struct pci_dev *pdev, struct of_phandle_args *out_irq
 	return of_irq_parse_raw(laddr, out_irq);
 }
 EXPORT_SYMBOL_GPL(of_irq_parse_pci);
+
+/**
+ * of_irq_parse_and_map_pci() - Decode a PCI irq from the device tree and map to a virq
+ * @dev: The pci device needing an irq
+ * @slot: PCI slot number; passed when used as map_irq callback. Unused
+ * @pin: PCI irq pin number; passed when used as map_irq callback. Unused
+ *
+ * @slot and @pin are unused, but included in the function so that this
+ * function can be used directly as the map_irq callback to pci_fixup_irqs().
+ */
+int of_irq_parse_and_map_pci(const struct pci_dev *dev, u8 slot, u8 pin)
+{
+	struct of_phandle_args oirq;
+	int ret;
+
+	ret = of_irq_parse_pci(dev, &oirq);
+	if (ret) {
+		dev_err(&dev->dev, "of_irq_parse_pci() failed with rc=%d\n", ret);
+		return 0; /* Proper return code 0 == NO_IRQ */
+	}
+
+	return irq_create_of_mapping(&oirq);
+}
+EXPORT_SYMBOL_GPL(of_irq_parse_and_map_pci);
+
diff --git a/drivers/pci/host/pci-mvebu.c b/drivers/pci/host/pci-mvebu.c
index 3a8d01ec50f7..07ddb3a13c6e 100644
--- a/drivers/pci/host/pci-mvebu.c
+++ b/drivers/pci/host/pci-mvebu.c
@@ -645,18 +645,6 @@ static int __init mvebu_pcie_setup(int nr, struct pci_sys_data *sys)
 	return 1;
 }
 
-static int __init mvebu_pcie_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
-{
-	struct of_phandle_args oirq;
-	int ret;
-
-	ret = of_irq_parse_pci(dev, &oirq);
-	if (ret)
-		return ret;
-
-	return irq_create_of_mapping(&oirq);
-}
-
 static struct pci_bus *mvebu_pcie_scan_bus(int nr, struct pci_sys_data *sys)
 {
 	struct mvebu_pcie *pcie = sys_to_pcie(sys);
@@ -705,7 +693,7 @@ static void __init mvebu_pcie_enable(struct mvebu_pcie *pcie)
 	hw.private_data   = (void **)&pcie;
 	hw.setup          = mvebu_pcie_setup;
 	hw.scan           = mvebu_pcie_scan_bus;
-	hw.map_irq        = mvebu_pcie_map_irq;
+	hw.map_irq        = of_irq_parse_and_map_pci;
 	hw.ops            = &mvebu_pcie_ops;
 	hw.align_resource = mvebu_pcie_align_resource;
 
diff --git a/include/linux/of_pci.h b/include/linux/of_pci.h
index f297237349e8..1a1f5ffd5288 100644
--- a/include/linux/of_pci.h
+++ b/include/linux/of_pci.h
@@ -7,6 +7,7 @@
 struct pci_dev;
 struct of_phandle_args;
 int of_irq_parse_pci(const struct pci_dev *pdev, struct of_phandle_args *out_irq);
+int of_irq_parse_and_map_pci(const struct pci_dev *dev, u8 slot, u8 pin);
 
 struct device_node;
 struct device_node *of_pci_find_child_device(struct device_node *parent,
-- 
cgit v1.2.3


From 68126702b419fd26ef4946e314bb3a1f57d3a53f Mon Sep 17 00:00:00 2001
From: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Date: Thu, 24 Oct 2013 10:07:42 +0900
Subject: slab: overloading the RCU head over the LRU for RCU free

With build-time size checking, we can overload the RCU head over the LRU
of struct page to free pages of a slab in rcu context. This really help to
implement to overload the struct slab over the struct page and this
eventually reduce memory usage and cache footprint of the SLAB.

Acked-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Signed-off-by: Pekka Enberg <penberg@iki.fi>
---
 include/linux/mm_types.h |  3 +++
 include/linux/slab.h     |  9 ++++++-
 mm/slab.c                | 68 +++++++++++++++++++++---------------------------
 3 files changed, 41 insertions(+), 39 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index faf4b7c1ad12..959cb369b197 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -130,6 +130,9 @@ struct page {
 
 		struct list_head list;	/* slobs list of pages */
 		struct slab *slab_page; /* slab fields */
+		struct rcu_head rcu_head;	/* Used by SLAB
+						 * when destroying via RCU
+						 */
 	};
 
 	/* Remainder is not double word aligned */
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 6c5cc0ea8713..caaad51fee1f 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -51,7 +51,14 @@
  *  }
  *  rcu_read_unlock();
  *
- * See also the comment on struct slab_rcu in mm/slab.c.
+ * This is useful if we need to approach a kernel structure obliquely,
+ * from its address obtained without the usual locking. We can lock
+ * the structure to stabilize it and check it's still at the given address,
+ * only if we can be sure that the memory has not been meanwhile reused
+ * for some other kind of object (which our subsystem's lock might corrupt).
+ *
+ * rcu_read_lock before reading the address, then rcu_read_unlock after
+ * taking the spinlock within the structure expected at that address.
  */
 #define SLAB_DESTROY_BY_RCU	0x00080000UL	/* Defer freeing slabs to RCU */
 #define SLAB_MEM_SPREAD		0x00100000UL	/* Spread some memory over cpuset */
diff --git a/mm/slab.c b/mm/slab.c
index 7e1aabe2b5d8..84c4ed62c10d 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -188,25 +188,6 @@ typedef unsigned int kmem_bufctl_t;
 #define	BUFCTL_ACTIVE	(((kmem_bufctl_t)(~0U))-2)
 #define	SLAB_LIMIT	(((kmem_bufctl_t)(~0U))-3)
 
-/*
- * struct slab_rcu
- *
- * slab_destroy on a SLAB_DESTROY_BY_RCU cache uses this structure to
- * arrange for kmem_freepages to be called via RCU.  This is useful if
- * we need to approach a kernel structure obliquely, from its address
- * obtained without the usual locking.  We can lock the structure to
- * stabilize it and check it's still at the given address, only if we
- * can be sure that the memory has not been meanwhile reused for some
- * other kind of object (which our subsystem's lock might corrupt).
- *
- * rcu_read_lock before reading the address, then rcu_read_unlock after
- * taking the spinlock within the structure expected at that address.
- */
-struct slab_rcu {
-	struct rcu_head head;
-	struct page *page;
-};
-
 /*
  * struct slab
  *
@@ -215,14 +196,11 @@ struct slab_rcu {
  * Slabs are chained into three list: fully used, partial, fully free slabs.
  */
 struct slab {
-	union {
-		struct {
-			struct list_head list;
-			void *s_mem;		/* including colour offset */
-			unsigned int inuse;	/* num of objs active in slab */
-			kmem_bufctl_t free;
-		};
-		struct slab_rcu __slab_cover_slab_rcu;
+	struct {
+		struct list_head list;
+		void *s_mem;		/* including colour offset */
+		unsigned int inuse;	/* num of objs active in slab */
+		kmem_bufctl_t free;
 	};
 };
 
@@ -1509,6 +1487,8 @@ void __init kmem_cache_init(void)
 {
 	int i;
 
+	BUILD_BUG_ON(sizeof(((struct page *)NULL)->lru) <
+					sizeof(struct rcu_head));
 	kmem_cache = &kmem_cache_boot;
 	setup_node_pointer(kmem_cache);
 
@@ -1822,12 +1802,13 @@ static void kmem_freepages(struct kmem_cache *cachep, struct page *page)
 
 static void kmem_rcu_free(struct rcu_head *head)
 {
-	struct slab_rcu *slab_rcu = (struct slab_rcu *)head;
-	struct kmem_cache *cachep = slab_rcu->page->slab_cache;
+	struct kmem_cache *cachep;
+	struct page *page;
 
-	kmem_freepages(cachep, slab_rcu->page);
-	if (OFF_SLAB(cachep))
-		kmem_cache_free(cachep->slabp_cache, slab_rcu);
+	page = container_of(head, struct page, rcu_head);
+	cachep = page->slab_cache;
+
+	kmem_freepages(cachep, page);
 }
 
 #if DEBUG
@@ -2048,16 +2029,27 @@ static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp)
 
 	slab_destroy_debugcheck(cachep, slabp);
 	if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU)) {
-		struct slab_rcu *slab_rcu;
+		struct rcu_head *head;
+
+		/*
+		 * RCU free overloads the RCU head over the LRU.
+		 * slab_page has been overloeaded over the LRU,
+		 * however it is not used from now on so that
+		 * we can use it safely.
+		 */
+		head = (void *)&page->rcu_head;
+		call_rcu(head, kmem_rcu_free);
 
-		slab_rcu = (struct slab_rcu *)slabp;
-		slab_rcu->page = page;
-		call_rcu(&slab_rcu->head, kmem_rcu_free);
 	} else {
 		kmem_freepages(cachep, page);
-		if (OFF_SLAB(cachep))
-			kmem_cache_free(cachep->slabp_cache, slabp);
 	}
+
+	/*
+	 * From now on, we don't use slab management
+	 * although actual page can be freed in rcu context
+	 */
+	if (OFF_SLAB(cachep))
+		kmem_cache_free(cachep->slabp_cache, slabp);
 }
 
 /**
-- 
cgit v1.2.3


From 8456a648cf44f14365f1f44de90a3da2526a4776 Mon Sep 17 00:00:00 2001
From: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Date: Thu, 24 Oct 2013 10:07:49 +0900
Subject: slab: use struct page for slab management

Now, there are a few field in struct slab, so we can overload these
over struct page. This will save some memory and reduce cache footprint.

After this change, slabp_cache and slab_size no longer related to
a struct slab, so rename them as freelist_cache and freelist_size.

These changes are just mechanical ones and there is no functional change.

Acked-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Signed-off-by: Pekka Enberg <penberg@iki.fi>
---
 include/linux/mm_types.h |  21 ++--
 include/linux/slab_def.h |   4 +-
 mm/slab.c                | 306 ++++++++++++++++++++++-------------------------
 3 files changed, 158 insertions(+), 173 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 959cb369b197..95bf0c5a7eb9 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -42,18 +42,22 @@ struct page {
 	/* First double word block */
 	unsigned long flags;		/* Atomic flags, some possibly
 					 * updated asynchronously */
-	struct address_space *mapping;	/* If low bit clear, points to
-					 * inode address_space, or NULL.
-					 * If page mapped as anonymous
-					 * memory, low bit is set, and
-					 * it points to anon_vma object:
-					 * see PAGE_MAPPING_ANON below.
-					 */
+	union {
+		struct address_space *mapping;	/* If low bit clear, points to
+						 * inode address_space, or NULL.
+						 * If page mapped as anonymous
+						 * memory, low bit is set, and
+						 * it points to anon_vma object:
+						 * see PAGE_MAPPING_ANON below.
+						 */
+		void *s_mem;			/* slab first object */
+	};
+
 	/* Second double word */
 	struct {
 		union {
 			pgoff_t index;		/* Our offset within mapping. */
-			void *freelist;		/* slub/slob first free object */
+			void *freelist;		/* sl[aou]b first free object */
 			bool pfmemalloc;	/* If set by the page allocator,
 						 * ALLOC_NO_WATERMARKS was set
 						 * and the low watermark was not
@@ -109,6 +113,7 @@ struct page {
 				};
 				atomic_t _count;		/* Usage count, see below. */
 			};
+			unsigned int active;	/* SLAB */
 		};
 	};
 
diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h
index cd401580bdd3..ca82e8ff89fa 100644
--- a/include/linux/slab_def.h
+++ b/include/linux/slab_def.h
@@ -41,8 +41,8 @@ struct kmem_cache {
 
 	size_t colour;			/* cache colouring range */
 	unsigned int colour_off;	/* colour offset */
-	struct kmem_cache *slabp_cache;
-	unsigned int slab_size;
+	struct kmem_cache *freelist_cache;
+	unsigned int freelist_size;
 
 	/* constructor func */
 	void (*ctor)(void *obj);
diff --git a/mm/slab.c b/mm/slab.c
index 2ec2336a1ffc..0e7f2e73e08e 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -163,21 +163,6 @@
  */
 static bool pfmemalloc_active __read_mostly;
 
-/*
- * struct slab
- *
- * Manages the objs in a slab. Placed either at the beginning of mem allocated
- * for a slab, or allocated from an general cache.
- * Slabs are chained into three list: fully used, partial, fully free slabs.
- */
-struct slab {
-	struct {
-		struct list_head list;
-		void *s_mem;		/* including colour offset */
-		unsigned int active;	/* num of objs active in slab */
-	};
-};
-
 /*
  * struct array_cache
  *
@@ -405,18 +390,10 @@ static inline struct kmem_cache *virt_to_cache(const void *obj)
 	return page->slab_cache;
 }
 
-static inline struct slab *virt_to_slab(const void *obj)
-{
-	struct page *page = virt_to_head_page(obj);
-
-	VM_BUG_ON(!PageSlab(page));
-	return page->slab_page;
-}
-
-static inline void *index_to_obj(struct kmem_cache *cache, struct slab *slab,
+static inline void *index_to_obj(struct kmem_cache *cache, struct page *page,
 				 unsigned int idx)
 {
-	return slab->s_mem + cache->size * idx;
+	return page->s_mem + cache->size * idx;
 }
 
 /*
@@ -426,9 +403,9 @@ static inline void *index_to_obj(struct kmem_cache *cache, struct slab *slab,
  *   reciprocal_divide(offset, cache->reciprocal_buffer_size)
  */
 static inline unsigned int obj_to_index(const struct kmem_cache *cache,
-					const struct slab *slab, void *obj)
+					const struct page *page, void *obj)
 {
-	u32 offset = (obj - slab->s_mem);
+	u32 offset = (obj - page->s_mem);
 	return reciprocal_divide(offset, cache->reciprocal_buffer_size);
 }
 
@@ -590,7 +567,7 @@ static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
 
 static size_t slab_mgmt_size(size_t nr_objs, size_t align)
 {
-	return ALIGN(sizeof(struct slab)+nr_objs*sizeof(unsigned int), align);
+	return ALIGN(nr_objs * sizeof(unsigned int), align);
 }
 
 /*
@@ -609,7 +586,6 @@ static void cache_estimate(unsigned long gfporder, size_t buffer_size,
 	 * on it. For the latter case, the memory allocated for a
 	 * slab is used for:
 	 *
-	 * - The struct slab
 	 * - One unsigned int for each object
 	 * - Padding to respect alignment of @align
 	 * - @buffer_size bytes for each object
@@ -632,8 +608,7 @@ static void cache_estimate(unsigned long gfporder, size_t buffer_size,
 		 * into the memory allocation when taking the padding
 		 * into account.
 		 */
-		nr_objs = (slab_size - sizeof(struct slab)) /
-			  (buffer_size + sizeof(unsigned int));
+		nr_objs = (slab_size) / (buffer_size + sizeof(unsigned int));
 
 		/*
 		 * This calculated number will be either the right
@@ -773,11 +748,11 @@ static struct array_cache *alloc_arraycache(int node, int entries,
 	return nc;
 }
 
-static inline bool is_slab_pfmemalloc(struct slab *slabp)
+static inline bool is_slab_pfmemalloc(struct page *page)
 {
-	struct page *page = virt_to_page(slabp->s_mem);
+	struct page *mem_page = virt_to_page(page->s_mem);
 
-	return PageSlabPfmemalloc(page);
+	return PageSlabPfmemalloc(mem_page);
 }
 
 /* Clears pfmemalloc_active if no slabs have pfmalloc set */
@@ -785,23 +760,23 @@ static void recheck_pfmemalloc_active(struct kmem_cache *cachep,
 						struct array_cache *ac)
 {
 	struct kmem_cache_node *n = cachep->node[numa_mem_id()];
-	struct slab *slabp;
+	struct page *page;
 	unsigned long flags;
 
 	if (!pfmemalloc_active)
 		return;
 
 	spin_lock_irqsave(&n->list_lock, flags);
-	list_for_each_entry(slabp, &n->slabs_full, list)
-		if (is_slab_pfmemalloc(slabp))
+	list_for_each_entry(page, &n->slabs_full, lru)
+		if (is_slab_pfmemalloc(page))
 			goto out;
 
-	list_for_each_entry(slabp, &n->slabs_partial, list)
-		if (is_slab_pfmemalloc(slabp))
+	list_for_each_entry(page, &n->slabs_partial, lru)
+		if (is_slab_pfmemalloc(page))
 			goto out;
 
-	list_for_each_entry(slabp, &n->slabs_free, list)
-		if (is_slab_pfmemalloc(slabp))
+	list_for_each_entry(page, &n->slabs_free, lru)
+		if (is_slab_pfmemalloc(page))
 			goto out;
 
 	pfmemalloc_active = false;
@@ -841,8 +816,8 @@ static void *__ac_get_obj(struct kmem_cache *cachep, struct array_cache *ac,
 		 */
 		n = cachep->node[numa_mem_id()];
 		if (!list_empty(&n->slabs_free) && force_refill) {
-			struct slab *slabp = virt_to_slab(objp);
-			ClearPageSlabPfmemalloc(virt_to_head_page(slabp->s_mem));
+			struct page *page = virt_to_head_page(objp);
+			ClearPageSlabPfmemalloc(virt_to_head_page(page->s_mem));
 			clear_obj_pfmemalloc(&objp);
 			recheck_pfmemalloc_active(cachep, ac);
 			return objp;
@@ -874,9 +849,9 @@ static void *__ac_put_obj(struct kmem_cache *cachep, struct array_cache *ac,
 {
 	if (unlikely(pfmemalloc_active)) {
 		/* Some pfmemalloc slabs exist, check if this is one */
-		struct slab *slabp = virt_to_slab(objp);
-		struct page *page = virt_to_head_page(slabp->s_mem);
-		if (PageSlabPfmemalloc(page))
+		struct page *page = virt_to_head_page(objp);
+		struct page *mem_page = virt_to_head_page(page->s_mem);
+		if (PageSlabPfmemalloc(mem_page))
 			set_obj_pfmemalloc(&objp);
 	}
 
@@ -1633,7 +1608,7 @@ static noinline void
 slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid)
 {
 	struct kmem_cache_node *n;
-	struct slab *slabp;
+	struct page *page;
 	unsigned long flags;
 	int node;
 
@@ -1652,15 +1627,15 @@ slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid)
 			continue;
 
 		spin_lock_irqsave(&n->list_lock, flags);
-		list_for_each_entry(slabp, &n->slabs_full, list) {
+		list_for_each_entry(page, &n->slabs_full, lru) {
 			active_objs += cachep->num;
 			active_slabs++;
 		}
-		list_for_each_entry(slabp, &n->slabs_partial, list) {
-			active_objs += slabp->active;
+		list_for_each_entry(page, &n->slabs_partial, lru) {
+			active_objs += page->active;
 			active_slabs++;
 		}
-		list_for_each_entry(slabp, &n->slabs_free, list)
+		list_for_each_entry(page, &n->slabs_free, lru)
 			num_slabs++;
 
 		free_objects += n->free_objects;
@@ -1746,6 +1721,8 @@ static void kmem_freepages(struct kmem_cache *cachep, struct page *page)
 	BUG_ON(!PageSlab(page));
 	__ClearPageSlabPfmemalloc(page);
 	__ClearPageSlab(page);
+	page_mapcount_reset(page);
+	page->mapping = NULL;
 
 	memcg_release_pages(cachep, cachep->gfporder);
 	if (current->reclaim_state)
@@ -1910,19 +1887,19 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp)
 		/* Print some data about the neighboring objects, if they
 		 * exist:
 		 */
-		struct slab *slabp = virt_to_slab(objp);
+		struct page *page = virt_to_head_page(objp);
 		unsigned int objnr;
 
-		objnr = obj_to_index(cachep, slabp, objp);
+		objnr = obj_to_index(cachep, page, objp);
 		if (objnr) {
-			objp = index_to_obj(cachep, slabp, objnr - 1);
+			objp = index_to_obj(cachep, page, objnr - 1);
 			realobj = (char *)objp + obj_offset(cachep);
 			printk(KERN_ERR "Prev obj: start=%p, len=%d\n",
 			       realobj, size);
 			print_objinfo(cachep, objp, 2);
 		}
 		if (objnr + 1 < cachep->num) {
-			objp = index_to_obj(cachep, slabp, objnr + 1);
+			objp = index_to_obj(cachep, page, objnr + 1);
 			realobj = (char *)objp + obj_offset(cachep);
 			printk(KERN_ERR "Next obj: start=%p, len=%d\n",
 			       realobj, size);
@@ -1933,11 +1910,12 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp)
 #endif
 
 #if DEBUG
-static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slabp)
+static void slab_destroy_debugcheck(struct kmem_cache *cachep,
+						struct page *page)
 {
 	int i;
 	for (i = 0; i < cachep->num; i++) {
-		void *objp = index_to_obj(cachep, slabp, i);
+		void *objp = index_to_obj(cachep, page, i);
 
 		if (cachep->flags & SLAB_POISON) {
 #ifdef CONFIG_DEBUG_PAGEALLOC
@@ -1962,7 +1940,8 @@ static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slab
 	}
 }
 #else
-static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slabp)
+static void slab_destroy_debugcheck(struct kmem_cache *cachep,
+						struct page *page)
 {
 }
 #endif
@@ -1976,11 +1955,12 @@ static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slab
  * Before calling the slab must have been unlinked from the cache.  The
  * cache-lock is not held/needed.
  */
-static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp)
+static void slab_destroy(struct kmem_cache *cachep, struct page *page)
 {
-	struct page *page = virt_to_head_page(slabp->s_mem);
+	struct freelist *freelist;
 
-	slab_destroy_debugcheck(cachep, slabp);
+	freelist = page->freelist;
+	slab_destroy_debugcheck(cachep, page);
 	if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU)) {
 		struct rcu_head *head;
 
@@ -1998,11 +1978,11 @@ static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp)
 	}
 
 	/*
-	 * From now on, we don't use slab management
+	 * From now on, we don't use freelist
 	 * although actual page can be freed in rcu context
 	 */
 	if (OFF_SLAB(cachep))
-		kmem_cache_free(cachep->slabp_cache, slabp);
+		kmem_cache_free(cachep->freelist_cache, freelist);
 }
 
 /**
@@ -2039,7 +2019,7 @@ static size_t calculate_slab_order(struct kmem_cache *cachep,
 			 * use off-slab slabs. Needed to avoid a possible
 			 * looping condition in cache_grow().
 			 */
-			offslab_limit = size - sizeof(struct slab);
+			offslab_limit = size;
 			offslab_limit /= sizeof(unsigned int);
 
  			if (num > offslab_limit)
@@ -2162,7 +2142,7 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
 int
 __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
 {
-	size_t left_over, slab_size, ralign;
+	size_t left_over, freelist_size, ralign;
 	gfp_t gfp;
 	int err;
 	size_t size = cachep->size;
@@ -2281,22 +2261,21 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
 	if (!cachep->num)
 		return -E2BIG;
 
-	slab_size = ALIGN(cachep->num * sizeof(unsigned int)
-			  + sizeof(struct slab), cachep->align);
+	freelist_size =
+		ALIGN(cachep->num * sizeof(unsigned int), cachep->align);
 
 	/*
 	 * If the slab has been placed off-slab, and we have enough space then
 	 * move it on-slab. This is at the expense of any extra colouring.
 	 */
-	if (flags & CFLGS_OFF_SLAB && left_over >= slab_size) {
+	if (flags & CFLGS_OFF_SLAB && left_over >= freelist_size) {
 		flags &= ~CFLGS_OFF_SLAB;
-		left_over -= slab_size;
+		left_over -= freelist_size;
 	}
 
 	if (flags & CFLGS_OFF_SLAB) {
 		/* really off slab. No need for manual alignment */
-		slab_size =
-		    cachep->num * sizeof(unsigned int) + sizeof(struct slab);
+		freelist_size = cachep->num * sizeof(unsigned int);
 
 #ifdef CONFIG_PAGE_POISONING
 		/* If we're going to use the generic kernel_map_pages()
@@ -2313,7 +2292,7 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
 	if (cachep->colour_off < cachep->align)
 		cachep->colour_off = cachep->align;
 	cachep->colour = left_over / cachep->colour_off;
-	cachep->slab_size = slab_size;
+	cachep->freelist_size = freelist_size;
 	cachep->flags = flags;
 	cachep->allocflags = __GFP_COMP;
 	if (CONFIG_ZONE_DMA_FLAG && (flags & SLAB_CACHE_DMA))
@@ -2322,7 +2301,7 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
 	cachep->reciprocal_buffer_size = reciprocal_value(size);
 
 	if (flags & CFLGS_OFF_SLAB) {
-		cachep->slabp_cache = kmalloc_slab(slab_size, 0u);
+		cachep->freelist_cache = kmalloc_slab(freelist_size, 0u);
 		/*
 		 * This is a possibility for one of the malloc_sizes caches.
 		 * But since we go off slab only for object size greater than
@@ -2330,7 +2309,7 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
 		 * this should not happen at all.
 		 * But leave a BUG_ON for some lucky dude.
 		 */
-		BUG_ON(ZERO_OR_NULL_PTR(cachep->slabp_cache));
+		BUG_ON(ZERO_OR_NULL_PTR(cachep->freelist_cache));
 	}
 
 	err = setup_cpu_cache(cachep, gfp);
@@ -2436,7 +2415,7 @@ static int drain_freelist(struct kmem_cache *cache,
 {
 	struct list_head *p;
 	int nr_freed;
-	struct slab *slabp;
+	struct page *page;
 
 	nr_freed = 0;
 	while (nr_freed < tofree && !list_empty(&n->slabs_free)) {
@@ -2448,18 +2427,18 @@ static int drain_freelist(struct kmem_cache *cache,
 			goto out;
 		}
 
-		slabp = list_entry(p, struct slab, list);
+		page = list_entry(p, struct page, lru);
 #if DEBUG
-		BUG_ON(slabp->active);
+		BUG_ON(page->active);
 #endif
-		list_del(&slabp->list);
+		list_del(&page->lru);
 		/*
 		 * Safe to drop the lock. The slab is no longer linked
 		 * to the cache.
 		 */
 		n->free_objects -= cache->num;
 		spin_unlock_irq(&n->list_lock);
-		slab_destroy(cache, slabp);
+		slab_destroy(cache, page);
 		nr_freed++;
 	}
 out:
@@ -2542,18 +2521,18 @@ int __kmem_cache_shutdown(struct kmem_cache *cachep)
  * descriptors in kmem_cache_create, we search through the malloc_sizes array.
  * If we are creating a malloc_sizes cache here it would not be visible to
  * kmem_find_general_cachep till the initialization is complete.
- * Hence we cannot have slabp_cache same as the original cache.
+ * Hence we cannot have freelist_cache same as the original cache.
  */
-static struct slab *alloc_slabmgmt(struct kmem_cache *cachep,
+static struct freelist *alloc_slabmgmt(struct kmem_cache *cachep,
 				   struct page *page, int colour_off,
 				   gfp_t local_flags, int nodeid)
 {
-	struct slab *slabp;
+	struct freelist *freelist;
 	void *addr = page_address(page);
 
 	if (OFF_SLAB(cachep)) {
 		/* Slab management obj is off-slab. */
-		slabp = kmem_cache_alloc_node(cachep->slabp_cache,
+		freelist = kmem_cache_alloc_node(cachep->freelist_cache,
 					      local_flags, nodeid);
 		/*
 		 * If the first object in the slab is leaked (it's allocated
@@ -2561,31 +2540,31 @@ static struct slab *alloc_slabmgmt(struct kmem_cache *cachep,
 		 * kmemleak does not treat the ->s_mem pointer as a reference
 		 * to the object. Otherwise we will not report the leak.
 		 */
-		kmemleak_scan_area(&slabp->list, sizeof(struct list_head),
+		kmemleak_scan_area(&page->lru, sizeof(struct list_head),
 				   local_flags);
-		if (!slabp)
+		if (!freelist)
 			return NULL;
 	} else {
-		slabp = addr + colour_off;
-		colour_off += cachep->slab_size;
+		freelist = addr + colour_off;
+		colour_off += cachep->freelist_size;
 	}
-	slabp->active = 0;
-	slabp->s_mem = addr + colour_off;
-	return slabp;
+	page->active = 0;
+	page->s_mem = addr + colour_off;
+	return freelist;
 }
 
-static inline unsigned int *slab_bufctl(struct slab *slabp)
+static inline unsigned int *slab_bufctl(struct page *page)
 {
-	return (unsigned int *) (slabp + 1);
+	return (unsigned int *)(page->freelist);
 }
 
 static void cache_init_objs(struct kmem_cache *cachep,
-			    struct slab *slabp)
+			    struct page *page)
 {
 	int i;
 
 	for (i = 0; i < cachep->num; i++) {
-		void *objp = index_to_obj(cachep, slabp, i);
+		void *objp = index_to_obj(cachep, page, i);
 #if DEBUG
 		/* need to poison the objs? */
 		if (cachep->flags & SLAB_POISON)
@@ -2621,7 +2600,7 @@ static void cache_init_objs(struct kmem_cache *cachep,
 		if (cachep->ctor)
 			cachep->ctor(objp);
 #endif
-		slab_bufctl(slabp)[i] = i;
+		slab_bufctl(page)[i] = i;
 	}
 }
 
@@ -2635,13 +2614,13 @@ static void kmem_flagcheck(struct kmem_cache *cachep, gfp_t flags)
 	}
 }
 
-static void *slab_get_obj(struct kmem_cache *cachep, struct slab *slabp,
+static void *slab_get_obj(struct kmem_cache *cachep, struct page *page,
 				int nodeid)
 {
 	void *objp;
 
-	objp = index_to_obj(cachep, slabp, slab_bufctl(slabp)[slabp->active]);
-	slabp->active++;
+	objp = index_to_obj(cachep, page, slab_bufctl(page)[page->active]);
+	page->active++;
 #if DEBUG
 	WARN_ON(page_to_nid(virt_to_page(objp)) != nodeid);
 #endif
@@ -2649,10 +2628,10 @@ static void *slab_get_obj(struct kmem_cache *cachep, struct slab *slabp,
 	return objp;
 }
 
-static void slab_put_obj(struct kmem_cache *cachep, struct slab *slabp,
+static void slab_put_obj(struct kmem_cache *cachep, struct page *page,
 				void *objp, int nodeid)
 {
-	unsigned int objnr = obj_to_index(cachep, slabp, objp);
+	unsigned int objnr = obj_to_index(cachep, page, objp);
 #if DEBUG
 	unsigned int i;
 
@@ -2660,16 +2639,16 @@ static void slab_put_obj(struct kmem_cache *cachep, struct slab *slabp,
 	WARN_ON(page_to_nid(virt_to_page(objp)) != nodeid);
 
 	/* Verify double free bug */
-	for (i = slabp->active; i < cachep->num; i++) {
-		if (slab_bufctl(slabp)[i] == objnr) {
+	for (i = page->active; i < cachep->num; i++) {
+		if (slab_bufctl(page)[i] == objnr) {
 			printk(KERN_ERR "slab: double free detected in cache "
 					"'%s', objp %p\n", cachep->name, objp);
 			BUG();
 		}
 	}
 #endif
-	slabp->active--;
-	slab_bufctl(slabp)[slabp->active] = objnr;
+	page->active--;
+	slab_bufctl(page)[page->active] = objnr;
 }
 
 /*
@@ -2677,11 +2656,11 @@ static void slab_put_obj(struct kmem_cache *cachep, struct slab *slabp,
  * for the slab allocator to be able to lookup the cache and slab of a
  * virtual address for kfree, ksize, and slab debugging.
  */
-static void slab_map_pages(struct kmem_cache *cache, struct slab *slab,
-			   struct page *page)
+static void slab_map_pages(struct kmem_cache *cache, struct page *page,
+			   struct freelist *freelist)
 {
 	page->slab_cache = cache;
-	page->slab_page = slab;
+	page->freelist = freelist;
 }
 
 /*
@@ -2691,7 +2670,7 @@ static void slab_map_pages(struct kmem_cache *cache, struct slab *slab,
 static int cache_grow(struct kmem_cache *cachep,
 		gfp_t flags, int nodeid, struct page *page)
 {
-	struct slab *slabp;
+	struct freelist *freelist;
 	size_t offset;
 	gfp_t local_flags;
 	struct kmem_cache_node *n;
@@ -2738,14 +2717,14 @@ static int cache_grow(struct kmem_cache *cachep,
 		goto failed;
 
 	/* Get slab management. */
-	slabp = alloc_slabmgmt(cachep, page, offset,
+	freelist = alloc_slabmgmt(cachep, page, offset,
 			local_flags & ~GFP_CONSTRAINT_MASK, nodeid);
-	if (!slabp)
+	if (!freelist)
 		goto opps1;
 
-	slab_map_pages(cachep, slabp, page);
+	slab_map_pages(cachep, page, freelist);
 
-	cache_init_objs(cachep, slabp);
+	cache_init_objs(cachep, page);
 
 	if (local_flags & __GFP_WAIT)
 		local_irq_disable();
@@ -2753,7 +2732,7 @@ static int cache_grow(struct kmem_cache *cachep,
 	spin_lock(&n->list_lock);
 
 	/* Make slab active. */
-	list_add_tail(&slabp->list, &(n->slabs_free));
+	list_add_tail(&page->lru, &(n->slabs_free));
 	STATS_INC_GROWN(cachep);
 	n->free_objects += cachep->num;
 	spin_unlock(&n->list_lock);
@@ -2808,13 +2787,13 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
 				   unsigned long caller)
 {
 	unsigned int objnr;
-	struct slab *slabp;
+	struct page *page;
 
 	BUG_ON(virt_to_cache(objp) != cachep);
 
 	objp -= obj_offset(cachep);
 	kfree_debugcheck(objp);
-	slabp = virt_to_slab(objp);
+	page = virt_to_head_page(objp);
 
 	if (cachep->flags & SLAB_RED_ZONE) {
 		verify_redzone_free(cachep, objp);
@@ -2824,10 +2803,10 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
 	if (cachep->flags & SLAB_STORE_USER)
 		*dbg_userword(cachep, objp) = (void *)caller;
 
-	objnr = obj_to_index(cachep, slabp, objp);
+	objnr = obj_to_index(cachep, page, objp);
 
 	BUG_ON(objnr >= cachep->num);
-	BUG_ON(objp != index_to_obj(cachep, slabp, objnr));
+	BUG_ON(objp != index_to_obj(cachep, page, objnr));
 
 	if (cachep->flags & SLAB_POISON) {
 #ifdef CONFIG_DEBUG_PAGEALLOC
@@ -2886,7 +2865,7 @@ retry:
 
 	while (batchcount > 0) {
 		struct list_head *entry;
-		struct slab *slabp;
+		struct page *page;
 		/* Get slab alloc is to come from. */
 		entry = n->slabs_partial.next;
 		if (entry == &n->slabs_partial) {
@@ -2896,7 +2875,7 @@ retry:
 				goto must_grow;
 		}
 
-		slabp = list_entry(entry, struct slab, list);
+		page = list_entry(entry, struct page, lru);
 		check_spinlock_acquired(cachep);
 
 		/*
@@ -2904,23 +2883,23 @@ retry:
 		 * there must be at least one object available for
 		 * allocation.
 		 */
-		BUG_ON(slabp->active >= cachep->num);
+		BUG_ON(page->active >= cachep->num);
 
-		while (slabp->active < cachep->num && batchcount--) {
+		while (page->active < cachep->num && batchcount--) {
 			STATS_INC_ALLOCED(cachep);
 			STATS_INC_ACTIVE(cachep);
 			STATS_SET_HIGH(cachep);
 
-			ac_put_obj(cachep, ac, slab_get_obj(cachep, slabp,
+			ac_put_obj(cachep, ac, slab_get_obj(cachep, page,
 									node));
 		}
 
 		/* move slabp to correct slabp list: */
-		list_del(&slabp->list);
-		if (slabp->active == cachep->num)
-			list_add(&slabp->list, &n->slabs_full);
+		list_del(&page->lru);
+		if (page->active == cachep->num)
+			list_add(&page->list, &n->slabs_full);
 		else
-			list_add(&slabp->list, &n->slabs_partial);
+			list_add(&page->list, &n->slabs_partial);
 	}
 
 must_grow:
@@ -3175,7 +3154,7 @@ static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,
 				int nodeid)
 {
 	struct list_head *entry;
-	struct slab *slabp;
+	struct page *page;
 	struct kmem_cache_node *n;
 	void *obj;
 	int x;
@@ -3195,24 +3174,24 @@ retry:
 			goto must_grow;
 	}
 
-	slabp = list_entry(entry, struct slab, list);
+	page = list_entry(entry, struct page, lru);
 	check_spinlock_acquired_node(cachep, nodeid);
 
 	STATS_INC_NODEALLOCS(cachep);
 	STATS_INC_ACTIVE(cachep);
 	STATS_SET_HIGH(cachep);
 
-	BUG_ON(slabp->active == cachep->num);
+	BUG_ON(page->active == cachep->num);
 
-	obj = slab_get_obj(cachep, slabp, nodeid);
+	obj = slab_get_obj(cachep, page, nodeid);
 	n->free_objects--;
 	/* move slabp to correct slabp list: */
-	list_del(&slabp->list);
+	list_del(&page->lru);
 
-	if (slabp->active == cachep->num)
-		list_add(&slabp->list, &n->slabs_full);
+	if (page->active == cachep->num)
+		list_add(&page->lru, &n->slabs_full);
 	else
-		list_add(&slabp->list, &n->slabs_partial);
+		list_add(&page->lru, &n->slabs_partial);
 
 	spin_unlock(&n->list_lock);
 	goto done;
@@ -3362,21 +3341,21 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
 
 	for (i = 0; i < nr_objects; i++) {
 		void *objp;
-		struct slab *slabp;
+		struct page *page;
 
 		clear_obj_pfmemalloc(&objpp[i]);
 		objp = objpp[i];
 
-		slabp = virt_to_slab(objp);
+		page = virt_to_head_page(objp);
 		n = cachep->node[node];
-		list_del(&slabp->list);
+		list_del(&page->lru);
 		check_spinlock_acquired_node(cachep, node);
-		slab_put_obj(cachep, slabp, objp, node);
+		slab_put_obj(cachep, page, objp, node);
 		STATS_DEC_ACTIVE(cachep);
 		n->free_objects++;
 
 		/* fixup slab chains */
-		if (slabp->active == 0) {
+		if (page->active == 0) {
 			if (n->free_objects > n->free_limit) {
 				n->free_objects -= cachep->num;
 				/* No need to drop any previously held
@@ -3385,16 +3364,16 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
 				 * a different cache, refer to comments before
 				 * alloc_slabmgmt.
 				 */
-				slab_destroy(cachep, slabp);
+				slab_destroy(cachep, page);
 			} else {
-				list_add(&slabp->list, &n->slabs_free);
+				list_add(&page->lru, &n->slabs_free);
 			}
 		} else {
 			/* Unconditionally move a slab to the end of the
 			 * partial list on free - maximum time for the
 			 * other objects to be freed, too.
 			 */
-			list_add_tail(&slabp->list, &n->slabs_partial);
+			list_add_tail(&page->lru, &n->slabs_partial);
 		}
 	}
 }
@@ -3434,10 +3413,10 @@ free_done:
 
 		p = n->slabs_free.next;
 		while (p != &(n->slabs_free)) {
-			struct slab *slabp;
+			struct page *page;
 
-			slabp = list_entry(p, struct slab, list);
-			BUG_ON(slabp->active);
+			page = list_entry(p, struct page, lru);
+			BUG_ON(page->active);
 
 			i++;
 			p = p->next;
@@ -4041,7 +4020,7 @@ out:
 #ifdef CONFIG_SLABINFO
 void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo)
 {
-	struct slab *slabp;
+	struct page *page;
 	unsigned long active_objs;
 	unsigned long num_objs;
 	unsigned long active_slabs = 0;
@@ -4061,22 +4040,22 @@ void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo)
 		check_irq_on();
 		spin_lock_irq(&n->list_lock);
 
-		list_for_each_entry(slabp, &n->slabs_full, list) {
-			if (slabp->active != cachep->num && !error)
+		list_for_each_entry(page, &n->slabs_full, lru) {
+			if (page->active != cachep->num && !error)
 				error = "slabs_full accounting error";
 			active_objs += cachep->num;
 			active_slabs++;
 		}
-		list_for_each_entry(slabp, &n->slabs_partial, list) {
-			if (slabp->active == cachep->num && !error)
+		list_for_each_entry(page, &n->slabs_partial, lru) {
+			if (page->active == cachep->num && !error)
 				error = "slabs_partial accounting error";
-			if (!slabp->active && !error)
+			if (!page->active && !error)
 				error = "slabs_partial accounting error";
-			active_objs += slabp->active;
+			active_objs += page->active;
 			active_slabs++;
 		}
-		list_for_each_entry(slabp, &n->slabs_free, list) {
-			if (slabp->active && !error)
+		list_for_each_entry(page, &n->slabs_free, lru) {
+			if (page->active && !error)
 				error = "slabs_free accounting error";
 			num_slabs++;
 		}
@@ -4229,19 +4208,20 @@ static inline int add_caller(unsigned long *n, unsigned long v)
 	return 1;
 }
 
-static void handle_slab(unsigned long *n, struct kmem_cache *c, struct slab *s)
+static void handle_slab(unsigned long *n, struct kmem_cache *c,
+						struct page *page)
 {
 	void *p;
 	int i, j;
 
 	if (n[0] == n[1])
 		return;
-	for (i = 0, p = s->s_mem; i < c->num; i++, p += c->size) {
+	for (i = 0, p = page->s_mem; i < c->num; i++, p += c->size) {
 		bool active = true;
 
-		for (j = s->active; j < c->num; j++) {
+		for (j = page->active; j < c->num; j++) {
 			/* Skip freed item */
-			if (slab_bufctl(s)[j] == i) {
+			if (slab_bufctl(page)[j] == i) {
 				active = false;
 				break;
 			}
@@ -4273,7 +4253,7 @@ static void show_symbol(struct seq_file *m, unsigned long address)
 static int leaks_show(struct seq_file *m, void *p)
 {
 	struct kmem_cache *cachep = list_entry(p, struct kmem_cache, list);
-	struct slab *slabp;
+	struct page *page;
 	struct kmem_cache_node *n;
 	const char *name;
 	unsigned long *x = m->private;
@@ -4297,10 +4277,10 @@ static int leaks_show(struct seq_file *m, void *p)
 		check_irq_on();
 		spin_lock_irq(&n->list_lock);
 
-		list_for_each_entry(slabp, &n->slabs_full, list)
-			handle_slab(x, cachep, slabp);
-		list_for_each_entry(slabp, &n->slabs_partial, list)
-			handle_slab(x, cachep, slabp);
+		list_for_each_entry(page, &n->slabs_full, lru)
+			handle_slab(x, cachep, page);
+		list_for_each_entry(page, &n->slabs_partial, lru)
+			handle_slab(x, cachep, page);
 		spin_unlock_irq(&n->list_lock);
 	}
 	name = cachep->name;
-- 
cgit v1.2.3


From 87dc800be2499128efb3a6f059d75dc8e1e6d503 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 16 Sep 2013 10:30:04 -0400
Subject: new helper: kfree_put_link()

duplicated to hell and back...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/cifs/cifsfs.c    |  2 +-
 fs/cifs/cifsfs.h    |  2 --
 fs/cifs/link.c      |  7 -------
 fs/ecryptfs/inode.c | 12 +-----------
 fs/gfs2/inode.c     |  9 +--------
 fs/libfs.c          | 10 ++++++++++
 fs/proc/self.c      | 10 +---------
 include/linux/fs.h  |  1 +
 8 files changed, 15 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index a16b4e58bcc6..801975c34cf9 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -860,7 +860,7 @@ const struct inode_operations cifs_file_inode_ops = {
 const struct inode_operations cifs_symlink_inode_ops = {
 	.readlink = generic_readlink,
 	.follow_link = cifs_follow_link,
-	.put_link = cifs_put_link,
+	.put_link = kfree_put_link,
 	.permission = cifs_permission,
 	/* BB add the following two eventually */
 	/* revalidate: cifs_revalidate,
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 6d0b07217ac9..26a754f49ba1 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -115,8 +115,6 @@ extern struct vfsmount *cifs_dfs_d_automount(struct path *path);
 
 /* Functions related to symlinks */
 extern void *cifs_follow_link(struct dentry *direntry, struct nameidata *nd);
-extern void cifs_put_link(struct dentry *direntry,
-			  struct nameidata *nd, void *);
 extern int cifs_readlink(struct dentry *direntry, char __user *buffer,
 			 int buflen);
 extern int cifs_symlink(struct inode *inode, struct dentry *direntry,
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index 7e36ceba0c7a..cc0234710ddb 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -621,10 +621,3 @@ symlink_exit:
 	free_xid(xid);
 	return rc;
 }
-
-void cifs_put_link(struct dentry *direntry, struct nameidata *nd, void *cookie)
-{
-	char *p = nd_get_link(nd);
-	if (!IS_ERR(p))
-		kfree(p);
-}
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 2ca62c40c6c2..0f9b66eaa767 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -703,16 +703,6 @@ out:
 	return NULL;
 }
 
-static void
-ecryptfs_put_link(struct dentry *dentry, struct nameidata *nd, void *ptr)
-{
-	char *buf = nd_get_link(nd);
-	if (!IS_ERR(buf)) {
-		/* Free the char* */
-		kfree(buf);
-	}
-}
-
 /**
  * upper_size_to_lower_size
  * @crypt_stat: Crypt_stat associated with file
@@ -1121,7 +1111,7 @@ out:
 const struct inode_operations ecryptfs_symlink_iops = {
 	.readlink = generic_readlink,
 	.follow_link = ecryptfs_follow_link,
-	.put_link = ecryptfs_put_link,
+	.put_link = kfree_put_link,
 	.permission = ecryptfs_permission,
 	.setattr = ecryptfs_setattr,
 	.getattr = ecryptfs_getattr_link,
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index ced3257f06e8..d2384f7c53e1 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -1506,13 +1506,6 @@ out:
 	return NULL;
 }
 
-static void gfs2_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
-{
-	char *s = nd_get_link(nd);
-	if (!IS_ERR(s))
-		kfree(s);
-}
-
 /**
  * gfs2_permission -
  * @inode: The inode
@@ -1864,7 +1857,7 @@ const struct inode_operations gfs2_dir_iops = {
 const struct inode_operations gfs2_symlink_iops = {
 	.readlink = generic_readlink,
 	.follow_link = gfs2_follow_link,
-	.put_link = gfs2_put_link,
+	.put_link = kfree_put_link,
 	.permission = gfs2_permission,
 	.setattr = gfs2_setattr,
 	.getattr = gfs2_getattr,
diff --git a/fs/libfs.c b/fs/libfs.c
index 828622a31d30..8c5018493154 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -10,6 +10,7 @@
 #include <linux/vfs.h>
 #include <linux/quotaops.h>
 #include <linux/mutex.h>
+#include <linux/namei.h>
 #include <linux/exportfs.h>
 #include <linux/writeback.h>
 #include <linux/buffer_head.h> /* sync_mapping_buffers */
@@ -992,3 +993,12 @@ int noop_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 	return 0;
 }
 EXPORT_SYMBOL(noop_fsync);
+
+void kfree_put_link(struct dentry *dentry, struct nameidata *nd,
+				void *cookie)
+{
+	char *s = nd_get_link(nd);
+	if (!IS_ERR(s))
+		kfree(s);
+}
+EXPORT_SYMBOL(kfree_put_link);
diff --git a/fs/proc/self.c b/fs/proc/self.c
index 6b6a993b5c25..ffeb202ec942 100644
--- a/fs/proc/self.c
+++ b/fs/proc/self.c
@@ -36,18 +36,10 @@ static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
 	return NULL;
 }
 
-static void proc_self_put_link(struct dentry *dentry, struct nameidata *nd,
-				void *cookie)
-{
-	char *s = nd_get_link(nd);
-	if (!IS_ERR(s))
-		kfree(s);
-}
-
 static const struct inode_operations proc_self_inode_operations = {
 	.readlink	= proc_self_readlink,
 	.follow_link	= proc_self_follow_link,
-	.put_link	= proc_self_put_link,
+	.put_link	= kfree_put_link,
 };
 
 static unsigned self_inum;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 3f40547ba191..d80c2437d624 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2502,6 +2502,7 @@ extern int __page_symlink(struct inode *inode, const char *symname, int len,
 		int nofs);
 extern int page_symlink(struct inode *inode, const char *symname, int len);
 extern const struct inode_operations page_symlink_inode_operations;
+extern void kfree_put_link(struct dentry *, struct nameidata *, void *);
 extern int generic_readlink(struct dentry *, char __user *, int);
 extern void generic_fillattr(struct inode *, struct kstat *);
 extern int vfs_getattr(struct path *, struct kstat *);
-- 
cgit v1.2.3


From e84f9e57b90ca89664d733a7cef19aa7ccd832f3 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 22 Sep 2013 14:17:15 -0400
Subject: consolidate the reassignments of ->f_op in ->open() instances

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/char/misc.c             | 12 +++---------
 drivers/gpu/drm/drm_fops.c      | 17 ++++++-----------
 drivers/media/dvb-core/dvbdev.c | 19 ++++++-------------
 drivers/usb/core/file.c         | 16 ++++------------
 fs/char_dev.c                   |  6 ++++--
 include/linux/fs.h              | 11 +++++++++++
 sound/core/sound.c              | 22 ++++++----------------
 sound/sound_core.c              | 17 +++--------------
 8 files changed, 43 insertions(+), 77 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/misc.c b/drivers/char/misc.c
index 190d4423653f..fd0f7c4b4df9 100644
--- a/drivers/char/misc.c
+++ b/drivers/char/misc.c
@@ -114,7 +114,7 @@ static int misc_open(struct inode * inode, struct file * file)
 	int minor = iminor(inode);
 	struct miscdevice *c;
 	int err = -ENODEV;
-	const struct file_operations *old_fops, *new_fops = NULL;
+	const struct file_operations *new_fops = NULL;
 
 	mutex_lock(&misc_mtx);
 	
@@ -141,17 +141,11 @@ static int misc_open(struct inode * inode, struct file * file)
 	}
 
 	err = 0;
-	old_fops = file->f_op;
-	file->f_op = new_fops;
+	replace_fops(file, new_fops);
 	if (file->f_op->open) {
 		file->private_data = c;
-		err=file->f_op->open(inode,file);
-		if (err) {
-			fops_put(file->f_op);
-			file->f_op = fops_get(old_fops);
-		}
+		err = file->f_op->open(inode,file);
 	}
-	fops_put(old_fops);
 fail:
 	mutex_unlock(&misc_mtx);
 	return err;
diff --git a/drivers/gpu/drm/drm_fops.c b/drivers/gpu/drm/drm_fops.c
index 3f84277d7036..22d14ecbd3ec 100644
--- a/drivers/gpu/drm/drm_fops.c
+++ b/drivers/gpu/drm/drm_fops.c
@@ -148,7 +148,7 @@ int drm_stub_open(struct inode *inode, struct file *filp)
 	struct drm_minor *minor;
 	int minor_id = iminor(inode);
 	int err = -ENODEV;
-	const struct file_operations *old_fops;
+	const struct file_operations *new_fops;
 
 	DRM_DEBUG("\n");
 
@@ -163,18 +163,13 @@ int drm_stub_open(struct inode *inode, struct file *filp)
 	if (drm_device_is_unplugged(dev))
 		goto out;
 
-	old_fops = filp->f_op;
-	filp->f_op = fops_get(dev->driver->fops);
-	if (filp->f_op == NULL) {
-		filp->f_op = old_fops;
+	new_fops = fops_get(dev->driver->fops);
+	if (!new_fops)
 		goto out;
-	}
-	if (filp->f_op->open && (err = filp->f_op->open(inode, filp))) {
-		fops_put(filp->f_op);
-		filp->f_op = fops_get(old_fops);
-	}
-	fops_put(old_fops);
 
+	replace_fops(filp, new_fops);
+	if (filp->f_op->open)
+		err = filp->f_op->open(inode, filp);
 out:
 	mutex_unlock(&drm_global_mutex);
 	return err;
diff --git a/drivers/media/dvb-core/dvbdev.c b/drivers/media/dvb-core/dvbdev.c
index 401ef64f92c6..983db75de350 100644
--- a/drivers/media/dvb-core/dvbdev.c
+++ b/drivers/media/dvb-core/dvbdev.c
@@ -74,22 +74,15 @@ static int dvb_device_open(struct inode *inode, struct file *file)
 
 	if (dvbdev && dvbdev->fops) {
 		int err = 0;
-		const struct file_operations *old_fops;
+		const struct file_operations *new_fops;
 
-		file->private_data = dvbdev;
-		old_fops = file->f_op;
-		file->f_op = fops_get(dvbdev->fops);
-		if (file->f_op == NULL) {
-			file->f_op = old_fops;
+		new_fops = fops_get(dvbdev->fops);
+		if (!new_fops)
 			goto fail;
-		}
-		if(file->f_op->open)
+		file->private_data = dvbdev;
+		replace_fops(file, new_fops);
+		if (file->f_op->open)
 			err = file->f_op->open(inode,file);
-		if (err) {
-			fops_put(file->f_op);
-			file->f_op = fops_get(old_fops);
-		}
-		fops_put(old_fops);
 		up_read(&minor_rwsem);
 		mutex_unlock(&dvbdev_mutex);
 		return err;
diff --git a/drivers/usb/core/file.c b/drivers/usb/core/file.c
index 7421888087a3..fe0d8365411a 100644
--- a/drivers/usb/core/file.c
+++ b/drivers/usb/core/file.c
@@ -29,27 +29,19 @@ static DECLARE_RWSEM(minor_rwsem);
 
 static int usb_open(struct inode * inode, struct file * file)
 {
-	int minor = iminor(inode);
-	const struct file_operations *c;
 	int err = -ENODEV;
-	const struct file_operations *old_fops, *new_fops = NULL;
+	const struct file_operations *new_fops;
 
 	down_read(&minor_rwsem);
-	c = usb_minors[minor];
+	new_fops = fops_get(usb_minors[iminor(inode)]);
 
-	if (!c || !(new_fops = fops_get(c)))
+	if (!new_fops)
 		goto done;
 
-	old_fops = file->f_op;
-	file->f_op = new_fops;
+	replace_fops(file, new_fops);
 	/* Curiouser and curiouser... NULL ->open() as "no device" ? */
 	if (file->f_op->open)
 		err = file->f_op->open(inode,file);
-	if (err) {
-		fops_put(file->f_op);
-		file->f_op = fops_get(old_fops);
-	}
-	fops_put(old_fops);
  done:
 	up_read(&minor_rwsem);
 	return err;
diff --git a/fs/char_dev.c b/fs/char_dev.c
index afc2bb691780..94b5f60076da 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -368,6 +368,7 @@ void cdev_put(struct cdev *p)
  */
 static int chrdev_open(struct inode *inode, struct file *filp)
 {
+	const struct file_operations *fops;
 	struct cdev *p;
 	struct cdev *new = NULL;
 	int ret = 0;
@@ -400,10 +401,11 @@ static int chrdev_open(struct inode *inode, struct file *filp)
 		return ret;
 
 	ret = -ENXIO;
-	filp->f_op = fops_get(p->ops);
-	if (!filp->f_op)
+	fops = fops_get(p->ops);
+	if (!fops)
 		goto out_cdev_put;
 
+	replace_fops(filp, fops);
 	if (filp->f_op->open) {
 		ret = filp->f_op->open(inode, filp);
 		if (ret)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index d80c2437d624..b09e4e1d747a 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1875,6 +1875,17 @@ extern struct dentry *mount_pseudo(struct file_system_type *, char *,
 	(((fops) && try_module_get((fops)->owner) ? (fops) : NULL))
 #define fops_put(fops) \
 	do { if (fops) module_put((fops)->owner); } while(0)
+/*
+ * This one is to be used *ONLY* from ->open() instances.
+ * fops must be non-NULL, pinned down *and* module dependencies
+ * should be sufficient to pin the caller down as well.
+ */
+#define replace_fops(f, fops) \
+	do {	\
+		struct file *__file = (f); \
+		fops_put(__file->f_op); \
+		BUG_ON(!(__file->f_op = (fops))); \
+	} while(0)
 
 extern int register_filesystem(struct file_system_type *);
 extern int unregister_filesystem(struct file_system_type *);
diff --git a/sound/core/sound.c b/sound/core/sound.c
index f002bd911dae..437c25ea6403 100644
--- a/sound/core/sound.c
+++ b/sound/core/sound.c
@@ -153,7 +153,7 @@ static int snd_open(struct inode *inode, struct file *file)
 {
 	unsigned int minor = iminor(inode);
 	struct snd_minor *mptr = NULL;
-	const struct file_operations *old_fops;
+	const struct file_operations *new_fops;
 	int err = 0;
 
 	if (minor >= ARRAY_SIZE(snd_minors))
@@ -167,24 +167,14 @@ static int snd_open(struct inode *inode, struct file *file)
 			return -ENODEV;
 		}
 	}
-	old_fops = file->f_op;
-	file->f_op = fops_get(mptr->f_ops);
-	if (file->f_op == NULL) {
-		file->f_op = old_fops;
-		err = -ENODEV;
-	}
+	new_fops = fops_get(mptr->f_ops);
 	mutex_unlock(&sound_mutex);
-	if (err < 0)
-		return err;
+	if (!new_fops)
+		return -ENODEV;
+	replace_fops(file, new_fops);
 
-	if (file->f_op->open) {
+	if (file->f_op->open)
 		err = file->f_op->open(inode, file);
-		if (err) {
-			fops_put(file->f_op);
-			file->f_op = fops_get(old_fops);
-		}
-	}
-	fops_put(old_fops);
 	return err;
 }
 
diff --git a/sound/sound_core.c b/sound/sound_core.c
index 45759f4cca75..11e953a1fa45 100644
--- a/sound/sound_core.c
+++ b/sound/sound_core.c
@@ -626,31 +626,20 @@ static int soundcore_open(struct inode *inode, struct file *file)
 		if (s)
 			new_fops = fops_get(s->unit_fops);
 	}
+	spin_unlock(&sound_loader_lock);
 	if (new_fops) {
 		/*
 		 * We rely upon the fact that we can't be unloaded while the
-		 * subdriver is there, so if ->open() is successful we can
-		 * safely drop the reference counter and if it is not we can
-		 * revert to old ->f_op. Ugly, indeed, but that's the cost of
-		 * switching ->f_op in the first place.
+		 * subdriver is there.
 		 */
 		int err = 0;
-		const struct file_operations *old_fops = file->f_op;
-		file->f_op = new_fops;
-		spin_unlock(&sound_loader_lock);
+		replace_fops(file, new_fops);
 
 		if (file->f_op->open)
 			err = file->f_op->open(inode,file);
 
-		if (err) {
-			fops_put(file->f_op);
-			file->f_op = fops_get(old_fops);
-		}
-
-		fops_put(old_fops);
 		return err;
 	}
-	spin_unlock(&sound_loader_lock);
 	return -ENODEV;
 }
 
-- 
cgit v1.2.3


From b70a80e7a133a0c86f2fa078e7c144597c516415 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Tue, 1 Oct 2013 16:44:54 +0200
Subject: vfs: introduce d_instantiate_no_diralias()

...which just returns -EBUSY if a directory alias would be created.

This is to be used by fuse mkdir to make sure that a buggy or malicious
userspace filesystem doesn't do anything nasty.  Previously fuse used a
private mutex for this purpose, which can now go away.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 fs/dcache.c            | 27 +++++++++++++++++++++++++++
 fs/fuse/dir.c          | 40 +++++-----------------------------------
 fs/fuse/fuse_i.h       |  3 ---
 fs/fuse/inode.c        |  2 --
 include/linux/dcache.h |  1 +
 5 files changed, 33 insertions(+), 40 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dcache.c b/fs/dcache.c
index d888223a5486..c8e83d0d61ac 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1801,6 +1801,33 @@ struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
 
 EXPORT_SYMBOL(d_instantiate_unique);
 
+/**
+ * d_instantiate_no_diralias - instantiate a non-aliased dentry
+ * @entry: dentry to complete
+ * @inode: inode to attach to this dentry
+ *
+ * Fill in inode information in the entry.  If a directory alias is found, then
+ * return an error (and drop inode).  Together with d_materialise_unique() this
+ * guarantees that a directory inode may never have more than one alias.
+ */
+int d_instantiate_no_diralias(struct dentry *entry, struct inode *inode)
+{
+	BUG_ON(!hlist_unhashed(&entry->d_alias));
+
+	spin_lock(&inode->i_lock);
+	if (S_ISDIR(inode->i_mode) && !hlist_empty(&inode->i_dentry)) {
+		spin_unlock(&inode->i_lock);
+		iput(inode);
+		return -EBUSY;
+	}
+	__d_instantiate(entry, inode);
+	spin_unlock(&inode->i_lock);
+	security_d_instantiate(entry, inode);
+
+	return 0;
+}
+EXPORT_SYMBOL(d_instantiate_no_diralias);
+
 struct dentry *d_make_root(struct inode *root_inode)
 {
 	struct dentry *res = NULL;
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index b7989f2ab4c4..c3eb2c46c8f1 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -342,24 +342,6 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
 	return err;
 }
 
-static struct dentry *fuse_materialise_dentry(struct dentry *dentry,
-					      struct inode *inode)
-{
-	struct dentry *newent;
-
-	if (inode && S_ISDIR(inode->i_mode)) {
-		struct fuse_conn *fc = get_fuse_conn(inode);
-
-		mutex_lock(&fc->inst_mutex);
-		newent = d_materialise_unique(dentry, inode);
-		mutex_unlock(&fc->inst_mutex);
-	} else {
-		newent = d_materialise_unique(dentry, inode);
-	}
-
-	return newent;
-}
-
 static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
 				  unsigned int flags)
 {
@@ -382,7 +364,7 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
 	if (inode && get_node_id(inode) == FUSE_ROOT_ID)
 		goto out_iput;
 
-	newent = fuse_materialise_dentry(entry, inode);
+	newent = d_materialise_unique(entry, inode);
 	err = PTR_ERR(newent);
 	if (IS_ERR(newent))
 		goto out_err;
@@ -601,21 +583,9 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req,
 	}
 	kfree(forget);
 
-	if (S_ISDIR(inode->i_mode)) {
-		struct dentry *alias;
-		mutex_lock(&fc->inst_mutex);
-		alias = d_find_alias(inode);
-		if (alias) {
-			/* New directory must have moved since mkdir */
-			mutex_unlock(&fc->inst_mutex);
-			dput(alias);
-			iput(inode);
-			return -EBUSY;
-		}
-		d_instantiate(entry, inode);
-		mutex_unlock(&fc->inst_mutex);
-	} else
-		d_instantiate(entry, inode);
+	err = d_instantiate_no_diralias(entry, inode);
+	if (err)
+		return err;
 
 	fuse_change_entry_timeout(entry, &outarg);
 	fuse_invalidate_attr(dir);
@@ -1284,7 +1254,7 @@ static int fuse_direntplus_link(struct file *file,
 	if (!inode)
 		goto out;
 
-	alias = fuse_materialise_dentry(dentry, inode);
+	alias = d_materialise_unique(dentry, inode);
 	err = PTR_ERR(alias);
 	if (IS_ERR(alias))
 		goto out;
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 5b9e6f3b6aef..fe446c222e3f 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -374,9 +374,6 @@ struct fuse_conn {
 	/** Lock protecting accessess to  members of this structure */
 	spinlock_t lock;
 
-	/** Mutex protecting against directory alias creation */
-	struct mutex inst_mutex;
-
 	/** Refcount */
 	atomic_t count;
 
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index a8ce6dab60a0..1c15613c64f8 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -565,7 +565,6 @@ void fuse_conn_init(struct fuse_conn *fc)
 {
 	memset(fc, 0, sizeof(*fc));
 	spin_lock_init(&fc->lock);
-	mutex_init(&fc->inst_mutex);
 	init_rwsem(&fc->killsb);
 	atomic_set(&fc->count, 1);
 	init_waitqueue_head(&fc->waitq);
@@ -596,7 +595,6 @@ void fuse_conn_put(struct fuse_conn *fc)
 	if (atomic_dec_and_test(&fc->count)) {
 		if (fc->destroy_req)
 			fuse_request_free(fc->destroy_req);
-		mutex_destroy(&fc->inst_mutex);
 		fc->release(fc);
 	}
 }
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 59066e0b4ff1..716c3760ee39 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -224,6 +224,7 @@ static inline int dname_external(const struct dentry *dentry)
 extern void d_instantiate(struct dentry *, struct inode *);
 extern struct dentry * d_instantiate_unique(struct dentry *, struct inode *);
 extern struct dentry * d_materialise_unique(struct dentry *, struct inode *);
+extern int d_instantiate_no_diralias(struct dentry *, struct inode *);
 extern void __d_drop(struct dentry *dentry);
 extern void d_drop(struct dentry *dentry);
 extern void d_delete(struct dentry *);
-- 
cgit v1.2.3


From e2fec7c35582e7bb41cccc1761faa2af4dc17627 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 4 Oct 2013 17:06:56 -0400
Subject: make freeing super_block rcu-delayed

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/super.c         | 2 +-
 include/linux/fs.h | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/super.c b/fs/super.c
index efa6e488a95c..743bb7118053 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -149,7 +149,7 @@ static void destroy_super(struct super_block *s)
 	WARN_ON(!list_empty(&s->s_mounts));
 	kfree(s->s_subtype);
 	kfree(s->s_options);
-	kfree(s);
+	kfree_rcu(s, rcu);
 }
 
 /**
diff --git a/include/linux/fs.h b/include/linux/fs.h
index b09e4e1d747a..2ab8a67ee054 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1330,6 +1330,7 @@ struct super_block {
 	 */
 	struct list_lru		s_dentry_lru ____cacheline_aligned_in_smp;
 	struct list_lru		s_inode_lru ____cacheline_aligned_in_smp;
+	struct rcu_head		rcu;
 };
 
 extern struct timespec current_fs_time(struct super_block *sb);
-- 
cgit v1.2.3


From 1adfcb03e31ba0d6be5fddf773da4357d0792cbb Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 3 Oct 2013 13:28:06 -0400
Subject: pid_namespace: make freeing struct pid_namespace rcu-delayed

makes procfs ->premission() instances safety in RCU mode independent
from vfsmount_lock.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/pid_namespace.h | 1 +
 kernel/pid_namespace.c        | 8 +++++++-
 2 files changed, 8 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index e2772666f004..7246ef3d4455 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -23,6 +23,7 @@ struct bsd_acct_struct;
 struct pid_namespace {
 	struct kref kref;
 	struct pidmap pidmap[PIDMAP_ENTRIES];
+	struct rcu_head rcu;
 	int last_pid;
 	unsigned int nr_hashed;
 	struct task_struct *child_reaper;
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index 42086551a24a..06c62de9c711 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -132,6 +132,12 @@ out:
 	return ERR_PTR(err);
 }
 
+static void delayed_free_pidns(struct rcu_head *p)
+{
+	kmem_cache_free(pid_ns_cachep,
+			container_of(p, struct pid_namespace, rcu));
+}
+
 static void destroy_pid_namespace(struct pid_namespace *ns)
 {
 	int i;
@@ -140,7 +146,7 @@ static void destroy_pid_namespace(struct pid_namespace *ns)
 	for (i = 0; i < PIDMAP_ENTRIES; i++)
 		kfree(ns->pidmap[i].page);
 	put_user_ns(ns->user_ns);
-	kmem_cache_free(pid_ns_cachep, ns);
+	call_rcu(&ns->rcu, delayed_free_pidns);
 }
 
 struct pid_namespace *copy_pid_ns(unsigned long flags,
-- 
cgit v1.2.3


From adfedd9a32e4e3490c0060576fd824881572b72a Mon Sep 17 00:00:00 2001
From: Vinod Koul <vinod.koul@intel.com>
Date: Wed, 16 Oct 2013 13:29:02 +0530
Subject: dmaengine: use DMA_COMPLETE for dma completion status

the DMA_SUCCESS is a misnomer as dmaengine indicates the transfer is complete and
gives no guarantee of the transfer success. Hence we should use DMA_COMPLTE
instead of DMA_SUCCESS

Acked-by: Dan Williams <dan.j.williams@intel.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/dmaengine.c   |  2 +-
 include/linux/dmaengine.h | 12 ++++++------
 2 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index 9162ac80c18f..81d876528c70 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -1062,7 +1062,7 @@ dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx)
 	unsigned long dma_sync_wait_timeout = jiffies + msecs_to_jiffies(5000);
 
 	if (!tx)
-		return DMA_SUCCESS;
+		return DMA_COMPLETE;
 
 	while (tx->cookie == -EBUSY) {
 		if (time_after_eq(jiffies, dma_sync_wait_timeout)) {
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 0bc727534108..120e64c96478 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -45,13 +45,13 @@ static inline int dma_submit_error(dma_cookie_t cookie)
 
 /**
  * enum dma_status - DMA transaction status
- * @DMA_SUCCESS: transaction completed successfully
+ * @DMA_COMPLETE: transaction completed
  * @DMA_IN_PROGRESS: transaction not yet processed
  * @DMA_PAUSED: transaction is paused
  * @DMA_ERROR: transaction failed
  */
 enum dma_status {
-	DMA_SUCCESS,
+	DMA_SUCCESS = 0, DMA_COMPLETE = 0,
 	DMA_IN_PROGRESS,
 	DMA_PAUSED,
 	DMA_ERROR,
@@ -979,10 +979,10 @@ static inline enum dma_status dma_async_is_complete(dma_cookie_t cookie,
 {
 	if (last_complete <= last_used) {
 		if ((cookie <= last_complete) || (cookie > last_used))
-			return DMA_SUCCESS;
+			return DMA_COMPLETE;
 	} else {
 		if ((cookie <= last_complete) && (cookie > last_used))
-			return DMA_SUCCESS;
+			return DMA_COMPLETE;
 	}
 	return DMA_IN_PROGRESS;
 }
@@ -1013,11 +1013,11 @@ static inline struct dma_chan *dma_find_channel(enum dma_transaction_type tx_typ
 }
 static inline enum dma_status dma_sync_wait(struct dma_chan *chan, dma_cookie_t cookie)
 {
-	return DMA_SUCCESS;
+	return DMA_COMPLETE;
 }
 static inline enum dma_status dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx)
 {
-	return DMA_SUCCESS;
+	return DMA_COMPLETE;
 }
 static inline void dma_issue_pending_all(void)
 {
-- 
cgit v1.2.3


From 7db5f7274a0b065abdc358be2a44b4a911d75707 Mon Sep 17 00:00:00 2001
From: Vinod Koul <vinod.koul@intel.com>
Date: Thu, 17 Oct 2013 07:29:57 +0530
Subject: dmaengine: remove unused DMA_SUCCESS

after all the users are converted

Acked-by: Dan Williams <dan.j.williams@intel.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 include/linux/dmaengine.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 120e64c96478..4b460a683968 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -51,7 +51,7 @@ static inline int dma_submit_error(dma_cookie_t cookie)
  * @DMA_ERROR: transaction failed
  */
 enum dma_status {
-	DMA_SUCCESS = 0, DMA_COMPLETE = 0,
+	DMA_COMPLETE,
 	DMA_IN_PROGRESS,
 	DMA_PAUSED,
 	DMA_ERROR,
-- 
cgit v1.2.3


From 5953316dbf90067ebdeca626c34488bc166b73a8 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Thu, 23 May 2013 12:25:08 +0200
Subject: block: make rq->cmd_flags be 64-bit

We have officially run out of flags in a 32-bit space. Extend it
to 64-bit even on 32-bit archs.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c          |  4 +--
 drivers/block/floppy.c    |  4 +--
 drivers/scsi/sd.c         |  2 +-
 include/linux/blk_types.h | 66 +++++++++++++++++++++++------------------------
 include/linux/blkdev.h    |  4 +--
 5 files changed, 40 insertions(+), 40 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index 0a00e4ecf87c..213e9f01c627 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -174,9 +174,9 @@ void blk_dump_rq_flags(struct request *rq, char *msg)
 {
 	int bit;
 
-	printk(KERN_INFO "%s: dev %s: type=%x, flags=%x\n", msg,
+	printk(KERN_INFO "%s: dev %s: type=%x, flags=%llx\n", msg,
 		rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->cmd_type,
-		rq->cmd_flags);
+		(unsigned long long) rq->cmd_flags);
 
 	printk(KERN_INFO "  sector %llu, nr/cnr %u/%u\n",
 	       (unsigned long long)blk_rq_pos(rq),
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 04ceb7e2fadd..000abe2f105c 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -2886,9 +2886,9 @@ static void do_fd_request(struct request_queue *q)
 		return;
 
 	if (WARN(atomic_read(&usage_count) == 0,
-		 "warning: usage count=0, current_req=%p sect=%ld type=%x flags=%x\n",
+		 "warning: usage count=0, current_req=%p sect=%ld type=%x flags=%llx\n",
 		 current_req, (long)blk_rq_pos(current_req), current_req->cmd_type,
-		 current_req->cmd_flags))
+		 (unsigned long long) current_req->cmd_flags))
 		return;
 
 	if (test_and_set_bit(0, &fdc_busy)) {
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index e62d17d41d4e..dae6133a538d 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -1002,7 +1002,7 @@ static int sd_prep_fn(struct request_queue *q, struct request *rq)
 		SCpnt->cmnd[0] = READ_6;
 		SCpnt->sc_data_direction = DMA_FROM_DEVICE;
 	} else {
-		scmd_printk(KERN_ERR, SCpnt, "Unknown command %x\n", rq->cmd_flags);
+		scmd_printk(KERN_ERR, SCpnt, "Unknown command %llx\n", (unsigned long long) rq->cmd_flags);
 		goto out;
 	}
 
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index fa1abeb45b76..c26801e14788 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -181,16 +181,16 @@ enum rq_flag_bits {
 	__REQ_NR_BITS,		/* stops here */
 };
 
-#define REQ_WRITE		(1 << __REQ_WRITE)
-#define REQ_FAILFAST_DEV	(1 << __REQ_FAILFAST_DEV)
-#define REQ_FAILFAST_TRANSPORT	(1 << __REQ_FAILFAST_TRANSPORT)
-#define REQ_FAILFAST_DRIVER	(1 << __REQ_FAILFAST_DRIVER)
-#define REQ_SYNC		(1 << __REQ_SYNC)
-#define REQ_META		(1 << __REQ_META)
-#define REQ_PRIO		(1 << __REQ_PRIO)
-#define REQ_DISCARD		(1 << __REQ_DISCARD)
-#define REQ_WRITE_SAME		(1 << __REQ_WRITE_SAME)
-#define REQ_NOIDLE		(1 << __REQ_NOIDLE)
+#define REQ_WRITE		(1ULL << __REQ_WRITE)
+#define REQ_FAILFAST_DEV	(1ULL << __REQ_FAILFAST_DEV)
+#define REQ_FAILFAST_TRANSPORT	(1ULL << __REQ_FAILFAST_TRANSPORT)
+#define REQ_FAILFAST_DRIVER	(1ULL << __REQ_FAILFAST_DRIVER)
+#define REQ_SYNC		(1ULL << __REQ_SYNC)
+#define REQ_META		(1ULL << __REQ_META)
+#define REQ_PRIO		(1ULL << __REQ_PRIO)
+#define REQ_DISCARD		(1ULL << __REQ_DISCARD)
+#define REQ_WRITE_SAME		(1ULL << __REQ_WRITE_SAME)
+#define REQ_NOIDLE		(1ULL << __REQ_NOIDLE)
 
 #define REQ_FAILFAST_MASK \
 	(REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER)
@@ -206,28 +206,28 @@ enum rq_flag_bits {
 #define REQ_NOMERGE_FLAGS \
 	(REQ_NOMERGE | REQ_STARTED | REQ_SOFTBARRIER | REQ_FLUSH | REQ_FUA)
 
-#define REQ_RAHEAD		(1 << __REQ_RAHEAD)
-#define REQ_THROTTLED		(1 << __REQ_THROTTLED)
-
-#define REQ_SORTED		(1 << __REQ_SORTED)
-#define REQ_SOFTBARRIER		(1 << __REQ_SOFTBARRIER)
-#define REQ_FUA			(1 << __REQ_FUA)
-#define REQ_NOMERGE		(1 << __REQ_NOMERGE)
-#define REQ_STARTED		(1 << __REQ_STARTED)
-#define REQ_DONTPREP		(1 << __REQ_DONTPREP)
-#define REQ_QUEUED		(1 << __REQ_QUEUED)
-#define REQ_ELVPRIV		(1 << __REQ_ELVPRIV)
-#define REQ_FAILED		(1 << __REQ_FAILED)
-#define REQ_QUIET		(1 << __REQ_QUIET)
-#define REQ_PREEMPT		(1 << __REQ_PREEMPT)
-#define REQ_ALLOCED		(1 << __REQ_ALLOCED)
-#define REQ_COPY_USER		(1 << __REQ_COPY_USER)
-#define REQ_FLUSH		(1 << __REQ_FLUSH)
-#define REQ_FLUSH_SEQ		(1 << __REQ_FLUSH_SEQ)
-#define REQ_IO_STAT		(1 << __REQ_IO_STAT)
-#define REQ_MIXED_MERGE		(1 << __REQ_MIXED_MERGE)
-#define REQ_SECURE		(1 << __REQ_SECURE)
-#define REQ_KERNEL		(1 << __REQ_KERNEL)
-#define REQ_PM			(1 << __REQ_PM)
+#define REQ_RAHEAD		(1ULL << __REQ_RAHEAD)
+#define REQ_THROTTLED		(1ULL << __REQ_THROTTLED)
+
+#define REQ_SORTED		(1ULL << __REQ_SORTED)
+#define REQ_SOFTBARRIER		(1ULL << __REQ_SOFTBARRIER)
+#define REQ_FUA			(1ULL << __REQ_FUA)
+#define REQ_NOMERGE		(1ULL << __REQ_NOMERGE)
+#define REQ_STARTED		(1ULL << __REQ_STARTED)
+#define REQ_DONTPREP		(1ULL << __REQ_DONTPREP)
+#define REQ_QUEUED		(1ULL << __REQ_QUEUED)
+#define REQ_ELVPRIV		(1ULL << __REQ_ELVPRIV)
+#define REQ_FAILED		(1ULL << __REQ_FAILED)
+#define REQ_QUIET		(1ULL << __REQ_QUIET)
+#define REQ_PREEMPT		(1ULL << __REQ_PREEMPT)
+#define REQ_ALLOCED		(1ULL << __REQ_ALLOCED)
+#define REQ_COPY_USER		(1ULL << __REQ_COPY_USER)
+#define REQ_FLUSH		(1ULL << __REQ_FLUSH)
+#define REQ_FLUSH_SEQ		(1ULL << __REQ_FLUSH_SEQ)
+#define REQ_IO_STAT		(1ULL << __REQ_IO_STAT)
+#define REQ_MIXED_MERGE		(1ULL << __REQ_MIXED_MERGE)
+#define REQ_SECURE		(1ULL << __REQ_SECURE)
+#define REQ_KERNEL		(1ULL << __REQ_KERNEL)
+#define REQ_PM			(1ULL << __REQ_PM)
 
 #endif /* __LINUX_BLK_TYPES_H */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 0e6f765aa1f5..f5c7596c93dd 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -99,7 +99,7 @@ struct request {
 
 	struct request_queue *q;
 
-	unsigned int cmd_flags;
+	u64 cmd_flags;
 	enum rq_cmd_type_bits cmd_type;
 	unsigned long atomic_flags;
 
@@ -570,7 +570,7 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
 
 #define list_entry_rq(ptr)	list_entry((ptr), struct request, queuelist)
 
-#define rq_data_dir(rq)		((rq)->cmd_flags & 1)
+#define rq_data_dir(rq)		(((rq)->cmd_flags & 1) != 0)
 
 static inline unsigned int blk_queue_cluster(struct request_queue *q)
 {
-- 
cgit v1.2.3


From 71fe07d040626de7b72244bf6de889c2e0f5aea3 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Fri, 4 Oct 2013 06:49:11 -0700
Subject: block: remove request ref_count

This reference count has been around since before git history, but the only
place where it's used is in blk_execute_rq, and ther it is entirely useless
as it is incremented before submitting the request and decremented in the
end_io handler before waking up the submitter thread.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c       | 3 ---
 block/blk-exec.c       | 7 -------
 include/linux/blkdev.h | 2 --
 3 files changed, 12 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index 213e9f01c627..18faa7e81d3b 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -145,7 +145,6 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
 	rq->cmd = rq->__cmd;
 	rq->cmd_len = BLK_MAX_CDB;
 	rq->tag = -1;
-	rq->ref_count = 1;
 	rq->start_time = jiffies;
 	set_start_time_ns(rq);
 	rq->part = NULL;
@@ -1272,8 +1271,6 @@ void __blk_put_request(struct request_queue *q, struct request *req)
 {
 	if (unlikely(!q))
 		return;
-	if (unlikely(--req->ref_count))
-		return;
 
 	blk_pm_put_request(req);
 
diff --git a/block/blk-exec.c b/block/blk-exec.c
index ae4f27d7944e..6b18d82d91c5 100644
--- a/block/blk-exec.c
+++ b/block/blk-exec.c
@@ -24,7 +24,6 @@ static void blk_end_sync_rq(struct request *rq, int error)
 	struct completion *waiting = rq->end_io_data;
 
 	rq->end_io_data = NULL;
-	__blk_put_request(rq->q, rq);
 
 	/*
 	 * complete last, if this is a stack request the process (and thus
@@ -103,12 +102,6 @@ int blk_execute_rq(struct request_queue *q, struct gendisk *bd_disk,
 	int err = 0;
 	unsigned long hang_check;
 
-	/*
-	 * we need an extra reference to the request, so we can look at
-	 * it after io completion
-	 */
-	rq->ref_count++;
-
 	if (!rq->sense) {
 		memset(sense, 0, sizeof(sense));
 		rq->sense = sense;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index f5c7596c93dd..0a8da96274c3 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -160,8 +160,6 @@ struct request {
 
 	unsigned short ioprio;
 
-	int ref_count;
-
 	void *special;		/* opaque pointer available for LLD use */
 	char *buffer;		/* kaddr of the current segment if available */
 
-- 
cgit v1.2.3


From e26b53d0b287056646a0dffce8bc6b0f053f3823 Mon Sep 17 00:00:00 2001
From: Shaohua Li <shli@kernel.org>
Date: Tue, 15 Oct 2013 09:05:01 +0800
Subject: percpu_ida: make percpu_ida percpu size/batch configurable

Make percpu_ida percpu size/batch configurable. The block-mq-tag will
use it.

After block-mq uses percpu_ida to manage tags, performance is improved.
My test is done in a 2 sockets machine, 12 process cross the 2 sockets.
So if there is lock contention or ipi, should be stressed heavily.
Testing is done for null-blk.

hw_queue_depth	nopatch iops	patch iops
64		~800k/s		~1470k/s
2048		~4470k/s	~4340k/s

Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Shaohua Li <shli@fusionio.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/percpu_ida.h | 18 +++++++++++++++++-
 lib/percpu_ida.c           | 28 +++++++++++-----------------
 2 files changed, 28 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/percpu_ida.h b/include/linux/percpu_ida.h
index 0b23edbee309..56c14033e7e7 100644
--- a/include/linux/percpu_ida.h
+++ b/include/linux/percpu_ida.h
@@ -16,6 +16,8 @@ struct percpu_ida {
 	 * percpu_ida_init()
 	 */
 	unsigned			nr_tags;
+	unsigned			percpu_max_size;
+	unsigned			percpu_batch_size;
 
 	struct percpu_ida_cpu __percpu	*tag_cpu;
 
@@ -51,10 +53,24 @@ struct percpu_ida {
 	} ____cacheline_aligned_in_smp;
 };
 
+/*
+ * Number of tags we move between the percpu freelist and the global freelist at
+ * a time
+ */
+#define IDA_DEFAULT_PCPU_BATCH_MOVE	32U
+/* Max size of percpu freelist, */
+#define IDA_DEFAULT_PCPU_SIZE	((IDA_DEFAULT_PCPU_BATCH_MOVE * 3) / 2)
+
 int percpu_ida_alloc(struct percpu_ida *pool, gfp_t gfp);
 void percpu_ida_free(struct percpu_ida *pool, unsigned tag);
 
 void percpu_ida_destroy(struct percpu_ida *pool);
-int percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags);
+int __percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags,
+	unsigned long max_size, unsigned long batch_size);
+static inline int percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags)
+{
+	return __percpu_ida_init(pool, nr_tags, IDA_DEFAULT_PCPU_SIZE,
+		IDA_DEFAULT_PCPU_BATCH_MOVE);
+}
 
 #endif /* __PERCPU_IDA_H__ */
diff --git a/lib/percpu_ida.c b/lib/percpu_ida.c
index bab1ba2a4c71..a601d4259e13 100644
--- a/lib/percpu_ida.c
+++ b/lib/percpu_ida.c
@@ -30,15 +30,6 @@
 #include <linux/spinlock.h>
 #include <linux/percpu_ida.h>
 
-/*
- * Number of tags we move between the percpu freelist and the global freelist at
- * a time
- */
-#define IDA_PCPU_BATCH_MOVE	32U
-
-/* Max size of percpu freelist, */
-#define IDA_PCPU_SIZE		((IDA_PCPU_BATCH_MOVE * 3) / 2)
-
 struct percpu_ida_cpu {
 	/*
 	 * Even though this is percpu, we need a lock for tag stealing by remote
@@ -78,7 +69,7 @@ static inline void steal_tags(struct percpu_ida *pool,
 	struct percpu_ida_cpu *remote;
 
 	for (cpus_have_tags = cpumask_weight(&pool->cpus_have_tags);
-	     cpus_have_tags * IDA_PCPU_SIZE > pool->nr_tags / 2;
+	     cpus_have_tags * pool->percpu_max_size > pool->nr_tags / 2;
 	     cpus_have_tags--) {
 		cpu = cpumask_next(cpu, &pool->cpus_have_tags);
 
@@ -123,7 +114,7 @@ static inline void alloc_global_tags(struct percpu_ida *pool,
 {
 	move_tags(tags->freelist, &tags->nr_free,
 		  pool->freelist, &pool->nr_free,
-		  min(pool->nr_free, IDA_PCPU_BATCH_MOVE));
+		  min(pool->nr_free, pool->percpu_batch_size));
 }
 
 static inline unsigned alloc_local_tag(struct percpu_ida *pool,
@@ -245,17 +236,17 @@ void percpu_ida_free(struct percpu_ida *pool, unsigned tag)
 		wake_up(&pool->wait);
 	}
 
-	if (nr_free == IDA_PCPU_SIZE) {
+	if (nr_free == pool->percpu_max_size) {
 		spin_lock(&pool->lock);
 
 		/*
 		 * Global lock held and irqs disabled, don't need percpu
 		 * lock
 		 */
-		if (tags->nr_free == IDA_PCPU_SIZE) {
+		if (tags->nr_free == pool->percpu_max_size) {
 			move_tags(pool->freelist, &pool->nr_free,
 				  tags->freelist, &tags->nr_free,
-				  IDA_PCPU_BATCH_MOVE);
+				  pool->percpu_batch_size);
 
 			wake_up(&pool->wait);
 		}
@@ -292,7 +283,8 @@ EXPORT_SYMBOL_GPL(percpu_ida_destroy);
  * Allocation is percpu, but sharding is limited by nr_tags - for best
  * performance, the workload should not span more cpus than nr_tags / 128.
  */
-int percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags)
+int __percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags,
+	unsigned long max_size, unsigned long batch_size)
 {
 	unsigned i, cpu, order;
 
@@ -301,6 +293,8 @@ int percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags)
 	init_waitqueue_head(&pool->wait);
 	spin_lock_init(&pool->lock);
 	pool->nr_tags = nr_tags;
+	pool->percpu_max_size = max_size;
+	pool->percpu_batch_size = batch_size;
 
 	/* Guard against overflow */
 	if (nr_tags > (unsigned) INT_MAX + 1) {
@@ -319,7 +313,7 @@ int percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags)
 	pool->nr_free = nr_tags;
 
 	pool->tag_cpu = __alloc_percpu(sizeof(struct percpu_ida_cpu) +
-				       IDA_PCPU_SIZE * sizeof(unsigned),
+				       pool->percpu_max_size * sizeof(unsigned),
 				       sizeof(unsigned));
 	if (!pool->tag_cpu)
 		goto err;
@@ -332,4 +326,4 @@ err:
 	percpu_ida_destroy(pool);
 	return -ENOMEM;
 }
-EXPORT_SYMBOL_GPL(percpu_ida_init);
+EXPORT_SYMBOL_GPL(__percpu_ida_init);
-- 
cgit v1.2.3


From 7fc2ba17e8bf9f218cac10cc2a3de613d9d9086d Mon Sep 17 00:00:00 2001
From: Shaohua Li <shli@kernel.org>
Date: Tue, 15 Oct 2013 09:05:02 +0800
Subject: percpu_ida: add percpu_ida_for_each_free

Add a new API to iterate free ids. blk-mq-tag will use it.

Note, this doesn't guarantee to iterate all free ids restrictly. Caller
should be aware of this. blk-mq uses it to do sanity check for request
timedout, so can tolerate the limitation.

Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Shaohua Li <shli@fusionio.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/percpu_ida.h |  4 ++++
 lib/percpu_ida.c           | 44 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 48 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/percpu_ida.h b/include/linux/percpu_ida.h
index 56c14033e7e7..63510ae6f933 100644
--- a/include/linux/percpu_ida.h
+++ b/include/linux/percpu_ida.h
@@ -73,4 +73,8 @@ static inline int percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags
 		IDA_DEFAULT_PCPU_BATCH_MOVE);
 }
 
+typedef int (*percpu_ida_cb)(unsigned, void *);
+int percpu_ida_for_each_free(struct percpu_ida *pool, percpu_ida_cb fn,
+	void *data);
+
 #endif /* __PERCPU_IDA_H__ */
diff --git a/lib/percpu_ida.c b/lib/percpu_ida.c
index a601d4259e13..0f51c1b556cf 100644
--- a/lib/percpu_ida.c
+++ b/lib/percpu_ida.c
@@ -327,3 +327,47 @@ err:
 	return -ENOMEM;
 }
 EXPORT_SYMBOL_GPL(__percpu_ida_init);
+
+/**
+ * percpu_ida_for_each_free - iterate free ids of a pool
+ * @pool: pool to iterate
+ * @fn: interate callback function
+ * @data: parameter for @fn
+ *
+ * Note, this doesn't guarantee to iterate all free ids restrictly. Some free
+ * ids might be missed, some might be iterated duplicated, and some might
+ * be iterated and not free soon.
+ */
+int percpu_ida_for_each_free(struct percpu_ida *pool, percpu_ida_cb fn,
+	void *data)
+{
+	unsigned long flags;
+	struct percpu_ida_cpu *remote;
+	unsigned cpu, i, err = 0;
+
+	local_irq_save(flags);
+	for_each_possible_cpu(cpu) {
+		remote = per_cpu_ptr(pool->tag_cpu, cpu);
+		spin_lock(&remote->lock);
+		for (i = 0; i < remote->nr_free; i++) {
+			err = fn(remote->freelist[i], data);
+			if (err)
+				break;
+		}
+		spin_unlock(&remote->lock);
+		if (err)
+			goto out;
+	}
+
+	spin_lock(&pool->lock);
+	for (i = 0; i < pool->nr_free; i++) {
+		err = fn(pool->freelist[i], data);
+		if (err)
+			break;
+	}
+	spin_unlock(&pool->lock);
+out:
+	local_irq_restore(flags);
+	return err;
+}
+EXPORT_SYMBOL_GPL(percpu_ida_for_each_free);
-- 
cgit v1.2.3


From 1dddc01af0d42b21058e0cb9c1ca9e8d5204d9b0 Mon Sep 17 00:00:00 2001
From: Shaohua Li <shli@kernel.org>
Date: Tue, 15 Oct 2013 09:05:03 +0800
Subject: percpu_ida: add an API to return free tags

Add an API to return free tags, blk-mq-tag will use it.

Note, this just returns a snapshot of free tags number. blk-mq-tag has
two usages of it. One is for info output for diagnosis. The other is to
quickly check if there are free tags for request dispatch checking.
Neither requires very precise.

Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Shaohua Li <shli@fusionio.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/percpu_ida.h |  1 +
 lib/percpu_ida.c           | 17 +++++++++++++++++
 2 files changed, 18 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/percpu_ida.h b/include/linux/percpu_ida.h
index 63510ae6f933..1900bd0fa639 100644
--- a/include/linux/percpu_ida.h
+++ b/include/linux/percpu_ida.h
@@ -77,4 +77,5 @@ typedef int (*percpu_ida_cb)(unsigned, void *);
 int percpu_ida_for_each_free(struct percpu_ida *pool, percpu_ida_cb fn,
 	void *data);
 
+unsigned percpu_ida_free_tags(struct percpu_ida *pool, int cpu);
 #endif /* __PERCPU_IDA_H__ */
diff --git a/lib/percpu_ida.c b/lib/percpu_ida.c
index 0f51c1b556cf..b0698ea972c6 100644
--- a/lib/percpu_ida.c
+++ b/lib/percpu_ida.c
@@ -371,3 +371,20 @@ out:
 	return err;
 }
 EXPORT_SYMBOL_GPL(percpu_ida_for_each_free);
+
+/**
+ * percpu_ida_free_tags - return free tags number of a specific cpu or global pool
+ * @pool: pool related
+ * @cpu: specific cpu or global pool if @cpu == nr_cpu_ids
+ *
+ * Note: this just returns a snapshot of free tags number.
+ */
+unsigned percpu_ida_free_tags(struct percpu_ida *pool, int cpu)
+{
+	struct percpu_ida_cpu *remote;
+	if (cpu == nr_cpu_ids)
+		return pool->nr_free;
+	remote = per_cpu_ptr(pool->tag_cpu, cpu);
+	return remote->nr_free;
+}
+EXPORT_SYMBOL_GPL(percpu_ida_free_tags);
-- 
cgit v1.2.3


From 320ae51feed5c2f13664aa05a76bec198967e04d Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Thu, 24 Oct 2013 09:20:05 +0100
Subject: blk-mq: new multi-queue block IO queueing mechanism

Linux currently has two models for block devices:

- The classic request_fn based approach, where drivers use struct
  request units for IO. The block layer provides various helper
  functionalities to let drivers share code, things like tag
  management, timeout handling, queueing, etc.

- The "stacked" approach, where a driver squeezes in between the
  block layer and IO submitter. Since this bypasses the IO stack,
  driver generally have to manage everything themselves.

With drivers being written for new high IOPS devices, the classic
request_fn based driver doesn't work well enough. The design dates
back to when both SMP and high IOPS was rare. It has problems with
scaling to bigger machines, and runs into scaling issues even on
smaller machines when you have IOPS in the hundreds of thousands
per device.

The stacked approach is then most often selected as the model
for the driver. But this means that everybody has to re-invent
everything, and along with that we get all the problems again
that the shared approach solved.

This commit introduces blk-mq, block multi queue support. The
design is centered around per-cpu queues for queueing IO, which
then funnel down into x number of hardware submission queues.
We might have a 1:1 mapping between the two, or it might be
an N:M mapping. That all depends on what the hardware supports.

blk-mq provides various helper functions, which include:

- Scalable support for request tagging. Most devices need to
  be able to uniquely identify a request both in the driver and
  to the hardware. The tagging uses per-cpu caches for freed
  tags, to enable cache hot reuse.

- Timeout handling without tracking request on a per-device
  basis. Basically the driver should be able to get a notification,
  if a request happens to fail.

- Optional support for non 1:1 mappings between issue and
  submission queues. blk-mq can redirect IO completions to the
  desired location.

- Support for per-request payloads. Drivers almost always need
  to associate a request structure with some driver private
  command structure. Drivers can tell blk-mq this at init time,
  and then any request handed to the driver will have the
  required size of memory associated with it.

- Support for merging of IO, and plugging. The stacked model
  gets neither of these. Even for high IOPS devices, merging
  sequential IO reduces per-command overhead and thus
  increases bandwidth.

For now, this is provided as a potential 3rd queueing model, with
the hope being that, as it matures, it can replace both the classic
and stacked model. That would get us back to having just 1 real
model for block devices, leaving the stacked approach to dm/md
devices (as it was originally intended).

Contributions in this patch from the following people:

Shaohua Li <shli@fusionio.com>
Alexander Gordeev <agordeev@redhat.com>
Christoph Hellwig <hch@infradead.org>
Mike Christie <michaelc@cs.wisc.edu>
Matias Bjorling <m@bjorling.me>
Jeff Moyer <jmoyer@redhat.com>

Acked-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/Makefile            |    5 +-
 block/blk-core.c          |  142 +++--
 block/blk-exec.c          |    7 +
 block/blk-flush.c         |  154 ++++-
 block/blk-mq-cpu.c        |   93 +++
 block/blk-mq-cpumap.c     |  108 ++++
 block/blk-mq-sysfs.c      |  384 ++++++++++++
 block/blk-mq-tag.c        |  204 +++++++
 block/blk-mq-tag.h        |   27 +
 block/blk-mq.c            | 1480 +++++++++++++++++++++++++++++++++++++++++++++
 block/blk-mq.h            |   52 ++
 block/blk-sysfs.c         |   13 +
 block/blk-timeout.c       |   73 ++-
 block/blk.h               |   17 +
 include/linux/bio.h       |    2 +
 include/linux/blk-mq.h    |  182 ++++++
 include/linux/blk_types.h |    2 +
 include/linux/blkdev.h    |   54 +-
 18 files changed, 2890 insertions(+), 109 deletions(-)
 create mode 100644 block/blk-mq-cpu.c
 create mode 100644 block/blk-mq-cpumap.c
 create mode 100644 block/blk-mq-sysfs.c
 create mode 100644 block/blk-mq-tag.c
 create mode 100644 block/blk-mq-tag.h
 create mode 100644 block/blk-mq.c
 create mode 100644 block/blk-mq.h
 create mode 100644 include/linux/blk-mq.h

(limited to 'include/linux')

diff --git a/block/Makefile b/block/Makefile
index 671a83d063a5..20645e88fb57 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -5,8 +5,9 @@
 obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \
 			blk-flush.o blk-settings.o blk-ioc.o blk-map.o \
 			blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \
-			blk-iopoll.o blk-lib.o ioctl.o genhd.o scsi_ioctl.o \
-			partition-generic.o partitions/
+			blk-iopoll.o blk-lib.o blk-mq.o blk-mq-tag.o \
+			blk-mq-sysfs.o blk-mq-cpu.o blk-mq-cpumap.o ioctl.o \
+			genhd.o scsi_ioctl.o partition-generic.o partitions/
 
 obj-$(CONFIG_BLK_DEV_BSG)	+= bsg.o
 obj-$(CONFIG_BLK_DEV_BSGLIB)	+= bsg-lib.o
diff --git a/block/blk-core.c b/block/blk-core.c
index 18faa7e81d3b..3bb9e9f7f87e 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -16,6 +16,7 @@
 #include <linux/backing-dev.h>
 #include <linux/bio.h>
 #include <linux/blkdev.h>
+#include <linux/blk-mq.h>
 #include <linux/highmem.h>
 #include <linux/mm.h>
 #include <linux/kernel_stat.h>
@@ -48,7 +49,7 @@ DEFINE_IDA(blk_queue_ida);
 /*
  * For the allocated request tables
  */
-static struct kmem_cache *request_cachep;
+struct kmem_cache *request_cachep = NULL;
 
 /*
  * For queue allocation
@@ -60,42 +61,6 @@ struct kmem_cache *blk_requestq_cachep;
  */
 static struct workqueue_struct *kblockd_workqueue;
 
-static void drive_stat_acct(struct request *rq, int new_io)
-{
-	struct hd_struct *part;
-	int rw = rq_data_dir(rq);
-	int cpu;
-
-	if (!blk_do_io_stat(rq))
-		return;
-
-	cpu = part_stat_lock();
-
-	if (!new_io) {
-		part = rq->part;
-		part_stat_inc(cpu, part, merges[rw]);
-	} else {
-		part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq));
-		if (!hd_struct_try_get(part)) {
-			/*
-			 * The partition is already being removed,
-			 * the request will be accounted on the disk only
-			 *
-			 * We take a reference on disk->part0 although that
-			 * partition will never be deleted, so we can treat
-			 * it as any other partition.
-			 */
-			part = &rq->rq_disk->part0;
-			hd_struct_get(part);
-		}
-		part_round_stats(cpu, part);
-		part_inc_in_flight(part, rw);
-		rq->part = part;
-	}
-
-	part_stat_unlock();
-}
-
 void blk_queue_congestion_threshold(struct request_queue *q)
 {
 	int nr;
@@ -594,9 +559,12 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
 	if (!q)
 		return NULL;
 
+	if (percpu_counter_init(&q->mq_usage_counter, 0))
+		goto fail_q;
+
 	q->id = ida_simple_get(&blk_queue_ida, 0, 0, gfp_mask);
 	if (q->id < 0)
-		goto fail_q;
+		goto fail_c;
 
 	q->backing_dev_info.ra_pages =
 			(VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
@@ -643,6 +611,8 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
 	q->bypass_depth = 1;
 	__set_bit(QUEUE_FLAG_BYPASS, &q->queue_flags);
 
+	init_waitqueue_head(&q->mq_freeze_wq);
+
 	if (blkcg_init_queue(q))
 		goto fail_id;
 
@@ -650,6 +620,8 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
 
 fail_id:
 	ida_simple_remove(&blk_queue_ida, q->id);
+fail_c:
+	percpu_counter_destroy(&q->mq_usage_counter);
 fail_q:
 	kmem_cache_free(blk_requestq_cachep, q);
 	return NULL;
@@ -1108,7 +1080,8 @@ retry:
 	goto retry;
 }
 
-struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
+static struct request *blk_old_get_request(struct request_queue *q, int rw,
+		gfp_t gfp_mask)
 {
 	struct request *rq;
 
@@ -1125,6 +1098,14 @@ struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
 
 	return rq;
 }
+
+struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
+{
+	if (q->mq_ops)
+		return blk_mq_alloc_request(q, rw, gfp_mask);
+	else
+		return blk_old_get_request(q, rw, gfp_mask);
+}
 EXPORT_SYMBOL(blk_get_request);
 
 /**
@@ -1210,7 +1191,7 @@ EXPORT_SYMBOL(blk_requeue_request);
 static void add_acct_request(struct request_queue *q, struct request *rq,
 			     int where)
 {
-	drive_stat_acct(rq, 1);
+	blk_account_io_start(rq, true);
 	__elv_add_request(q, rq, where);
 }
 
@@ -1299,12 +1280,17 @@ EXPORT_SYMBOL_GPL(__blk_put_request);
 
 void blk_put_request(struct request *req)
 {
-	unsigned long flags;
 	struct request_queue *q = req->q;
 
-	spin_lock_irqsave(q->queue_lock, flags);
-	__blk_put_request(q, req);
-	spin_unlock_irqrestore(q->queue_lock, flags);
+	if (q->mq_ops)
+		blk_mq_free_request(req);
+	else {
+		unsigned long flags;
+
+		spin_lock_irqsave(q->queue_lock, flags);
+		__blk_put_request(q, req);
+		spin_unlock_irqrestore(q->queue_lock, flags);
+	}
 }
 EXPORT_SYMBOL(blk_put_request);
 
@@ -1340,8 +1326,8 @@ void blk_add_request_payload(struct request *rq, struct page *page,
 }
 EXPORT_SYMBOL_GPL(blk_add_request_payload);
 
-static bool bio_attempt_back_merge(struct request_queue *q, struct request *req,
-				   struct bio *bio)
+bool bio_attempt_back_merge(struct request_queue *q, struct request *req,
+			    struct bio *bio)
 {
 	const int ff = bio->bi_rw & REQ_FAILFAST_MASK;
 
@@ -1358,12 +1344,12 @@ static bool bio_attempt_back_merge(struct request_queue *q, struct request *req,
 	req->__data_len += bio->bi_size;
 	req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));
 
-	drive_stat_acct(req, 0);
+	blk_account_io_start(req, false);
 	return true;
 }
 
-static bool bio_attempt_front_merge(struct request_queue *q,
-				    struct request *req, struct bio *bio)
+bool bio_attempt_front_merge(struct request_queue *q, struct request *req,
+			     struct bio *bio)
 {
 	const int ff = bio->bi_rw & REQ_FAILFAST_MASK;
 
@@ -1388,12 +1374,12 @@ static bool bio_attempt_front_merge(struct request_queue *q,
 	req->__data_len += bio->bi_size;
 	req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));
 
-	drive_stat_acct(req, 0);
+	blk_account_io_start(req, false);
 	return true;
 }
 
 /**
- * attempt_plug_merge - try to merge with %current's plugged list
+ * blk_attempt_plug_merge - try to merge with %current's plugged list
  * @q: request_queue new bio is being queued at
  * @bio: new bio being queued
  * @request_count: out parameter for number of traversed plugged requests
@@ -1409,8 +1395,8 @@ static bool bio_attempt_front_merge(struct request_queue *q,
  * reliable access to the elevator outside queue lock.  Only check basic
  * merging parameters without querying the elevator.
  */
-static bool attempt_plug_merge(struct request_queue *q, struct bio *bio,
-			       unsigned int *request_count)
+bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
+			    unsigned int *request_count)
 {
 	struct blk_plug *plug;
 	struct request *rq;
@@ -1489,7 +1475,7 @@ void blk_queue_bio(struct request_queue *q, struct bio *bio)
 	 * Check if we can merge with the plugged list before grabbing
 	 * any locks.
 	 */
-	if (attempt_plug_merge(q, bio, &request_count))
+	if (blk_attempt_plug_merge(q, bio, &request_count))
 		return;
 
 	spin_lock_irq(q->queue_lock);
@@ -1557,7 +1543,7 @@ get_rq:
 			}
 		}
 		list_add_tail(&req->queuelist, &plug->list);
-		drive_stat_acct(req, 1);
+		blk_account_io_start(req, true);
 	} else {
 		spin_lock_irq(q->queue_lock);
 		add_acct_request(q, req, where);
@@ -2011,7 +1997,7 @@ unsigned int blk_rq_err_bytes(const struct request *rq)
 }
 EXPORT_SYMBOL_GPL(blk_rq_err_bytes);
 
-static void blk_account_io_completion(struct request *req, unsigned int bytes)
+void blk_account_io_completion(struct request *req, unsigned int bytes)
 {
 	if (blk_do_io_stat(req)) {
 		const int rw = rq_data_dir(req);
@@ -2025,7 +2011,7 @@ static void blk_account_io_completion(struct request *req, unsigned int bytes)
 	}
 }
 
-static void blk_account_io_done(struct request *req)
+void blk_account_io_done(struct request *req)
 {
 	/*
 	 * Account IO completion.  flush_rq isn't accounted as a
@@ -2073,6 +2059,42 @@ static inline struct request *blk_pm_peek_request(struct request_queue *q,
 }
 #endif
 
+void blk_account_io_start(struct request *rq, bool new_io)
+{
+	struct hd_struct *part;
+	int rw = rq_data_dir(rq);
+	int cpu;
+
+	if (!blk_do_io_stat(rq))
+		return;
+
+	cpu = part_stat_lock();
+
+	if (!new_io) {
+		part = rq->part;
+		part_stat_inc(cpu, part, merges[rw]);
+	} else {
+		part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq));
+		if (!hd_struct_try_get(part)) {
+			/*
+			 * The partition is already being removed,
+			 * the request will be accounted on the disk only
+			 *
+			 * We take a reference on disk->part0 although that
+			 * partition will never be deleted, so we can treat
+			 * it as any other partition.
+			 */
+			part = &rq->rq_disk->part0;
+			hd_struct_get(part);
+		}
+		part_round_stats(cpu, part);
+		part_inc_in_flight(part, rw);
+		rq->part = part;
+	}
+
+	part_stat_unlock();
+}
+
 /**
  * blk_peek_request - peek at the top of a request queue
  * @q: request queue to peek at
@@ -2448,7 +2470,6 @@ static void blk_finish_request(struct request *req, int error)
 	if (req->cmd_flags & REQ_DONTPREP)
 		blk_unprep_request(req);
 
-
 	blk_account_io_done(req);
 
 	if (req->end_io)
@@ -2870,6 +2891,7 @@ void blk_start_plug(struct blk_plug *plug)
 
 	plug->magic = PLUG_MAGIC;
 	INIT_LIST_HEAD(&plug->list);
+	INIT_LIST_HEAD(&plug->mq_list);
 	INIT_LIST_HEAD(&plug->cb_list);
 
 	/*
@@ -2967,6 +2989,10 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
 	BUG_ON(plug->magic != PLUG_MAGIC);
 
 	flush_plug_callbacks(plug, from_schedule);
+
+	if (!list_empty(&plug->mq_list))
+		blk_mq_flush_plug_list(plug, from_schedule);
+
 	if (list_empty(&plug->list))
 		return;
 
diff --git a/block/blk-exec.c b/block/blk-exec.c
index 6b18d82d91c5..c3edf9dff566 100644
--- a/block/blk-exec.c
+++ b/block/blk-exec.c
@@ -5,6 +5,7 @@
 #include <linux/module.h>
 #include <linux/bio.h>
 #include <linux/blkdev.h>
+#include <linux/blk-mq.h>
 #include <linux/sched/sysctl.h>
 
 #include "blk.h"
@@ -58,6 +59,12 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
 
 	rq->rq_disk = bd_disk;
 	rq->end_io = done;
+
+	if (q->mq_ops) {
+		blk_mq_insert_request(q, rq, true);
+		return;
+	}
+
 	/*
 	 * need to check this before __blk_run_queue(), because rq can
 	 * be freed before that returns.
diff --git a/block/blk-flush.c b/block/blk-flush.c
index cc2b827a853c..3e4cc9c7890a 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -69,8 +69,10 @@
 #include <linux/bio.h>
 #include <linux/blkdev.h>
 #include <linux/gfp.h>
+#include <linux/blk-mq.h>
 
 #include "blk.h"
+#include "blk-mq.h"
 
 /* FLUSH/FUA sequences */
 enum {
@@ -124,6 +126,24 @@ static void blk_flush_restore_request(struct request *rq)
 	/* make @rq a normal request */
 	rq->cmd_flags &= ~REQ_FLUSH_SEQ;
 	rq->end_io = rq->flush.saved_end_io;
+
+	blk_clear_rq_complete(rq);
+}
+
+static void mq_flush_data_run(struct work_struct *work)
+{
+	struct request *rq;
+
+	rq = container_of(work, struct request, mq_flush_data);
+
+	memset(&rq->csd, 0, sizeof(rq->csd));
+	blk_mq_run_request(rq, true, false);
+}
+
+static void blk_mq_flush_data_insert(struct request *rq)
+{
+	INIT_WORK(&rq->mq_flush_data, mq_flush_data_run);
+	kblockd_schedule_work(rq->q, &rq->mq_flush_data);
 }
 
 /**
@@ -136,7 +156,7 @@ static void blk_flush_restore_request(struct request *rq)
  * completion and trigger the next step.
  *
  * CONTEXT:
- * spin_lock_irq(q->queue_lock)
+ * spin_lock_irq(q->queue_lock or q->mq_flush_lock)
  *
  * RETURNS:
  * %true if requests were added to the dispatch queue, %false otherwise.
@@ -146,7 +166,7 @@ static bool blk_flush_complete_seq(struct request *rq, unsigned int seq,
 {
 	struct request_queue *q = rq->q;
 	struct list_head *pending = &q->flush_queue[q->flush_pending_idx];
-	bool queued = false;
+	bool queued = false, kicked;
 
 	BUG_ON(rq->flush.seq & seq);
 	rq->flush.seq |= seq;
@@ -167,8 +187,12 @@ static bool blk_flush_complete_seq(struct request *rq, unsigned int seq,
 
 	case REQ_FSEQ_DATA:
 		list_move_tail(&rq->flush.list, &q->flush_data_in_flight);
-		list_add(&rq->queuelist, &q->queue_head);
-		queued = true;
+		if (q->mq_ops)
+			blk_mq_flush_data_insert(rq);
+		else {
+			list_add(&rq->queuelist, &q->queue_head);
+			queued = true;
+		}
 		break;
 
 	case REQ_FSEQ_DONE:
@@ -181,28 +205,43 @@ static bool blk_flush_complete_seq(struct request *rq, unsigned int seq,
 		BUG_ON(!list_empty(&rq->queuelist));
 		list_del_init(&rq->flush.list);
 		blk_flush_restore_request(rq);
-		__blk_end_request_all(rq, error);
+		if (q->mq_ops)
+			blk_mq_end_io(rq, error);
+		else
+			__blk_end_request_all(rq, error);
 		break;
 
 	default:
 		BUG();
 	}
 
-	return blk_kick_flush(q) | queued;
+	kicked = blk_kick_flush(q);
+	/* blk_mq_run_flush will run queue */
+	if (q->mq_ops)
+		return queued;
+	return kicked | queued;
 }
 
 static void flush_end_io(struct request *flush_rq, int error)
 {
 	struct request_queue *q = flush_rq->q;
-	struct list_head *running = &q->flush_queue[q->flush_running_idx];
+	struct list_head *running;
 	bool queued = false;
 	struct request *rq, *n;
+	unsigned long flags = 0;
 
+	if (q->mq_ops) {
+		blk_mq_free_request(flush_rq);
+		spin_lock_irqsave(&q->mq_flush_lock, flags);
+	}
+	running = &q->flush_queue[q->flush_running_idx];
 	BUG_ON(q->flush_pending_idx == q->flush_running_idx);
 
 	/* account completion of the flush request */
 	q->flush_running_idx ^= 1;
-	elv_completed_request(q, flush_rq);
+
+	if (!q->mq_ops)
+		elv_completed_request(q, flush_rq);
 
 	/* and push the waiting requests to the next stage */
 	list_for_each_entry_safe(rq, n, running, flush.list) {
@@ -223,9 +262,48 @@ static void flush_end_io(struct request *flush_rq, int error)
 	 * directly into request_fn may confuse the driver.  Always use
 	 * kblockd.
 	 */
-	if (queued || q->flush_queue_delayed)
-		blk_run_queue_async(q);
+	if (queued || q->flush_queue_delayed) {
+		if (!q->mq_ops)
+			blk_run_queue_async(q);
+		else
+		/*
+		 * This can be optimized to only run queues with requests
+		 * queued if necessary.
+		 */
+			blk_mq_run_queues(q, true);
+	}
 	q->flush_queue_delayed = 0;
+	if (q->mq_ops)
+		spin_unlock_irqrestore(&q->mq_flush_lock, flags);
+}
+
+static void mq_flush_work(struct work_struct *work)
+{
+	struct request_queue *q;
+	struct request *rq;
+
+	q = container_of(work, struct request_queue, mq_flush_work);
+
+	/* We don't need set REQ_FLUSH_SEQ, it's for consistency */
+	rq = blk_mq_alloc_request(q, WRITE_FLUSH|REQ_FLUSH_SEQ,
+		__GFP_WAIT|GFP_ATOMIC);
+	rq->cmd_type = REQ_TYPE_FS;
+	rq->end_io = flush_end_io;
+
+	blk_mq_run_request(rq, true, false);
+}
+
+/*
+ * We can't directly use q->flush_rq, because it doesn't have tag and is not in
+ * hctx->rqs[]. so we must allocate a new request, since we can't sleep here,
+ * so offload the work to workqueue.
+ *
+ * Note: we assume a flush request finished in any hardware queue will flush
+ * the whole disk cache.
+ */
+static void mq_run_flush(struct request_queue *q)
+{
+	kblockd_schedule_work(q, &q->mq_flush_work);
 }
 
 /**
@@ -236,7 +314,7 @@ static void flush_end_io(struct request *flush_rq, int error)
  * Please read the comment at the top of this file for more info.
  *
  * CONTEXT:
- * spin_lock_irq(q->queue_lock)
+ * spin_lock_irq(q->queue_lock or q->mq_flush_lock)
  *
  * RETURNS:
  * %true if flush was issued, %false otherwise.
@@ -261,13 +339,18 @@ static bool blk_kick_flush(struct request_queue *q)
 	 * Issue flush and toggle pending_idx.  This makes pending_idx
 	 * different from running_idx, which means flush is in flight.
 	 */
+	q->flush_pending_idx ^= 1;
+	if (q->mq_ops) {
+		mq_run_flush(q);
+		return true;
+	}
+
 	blk_rq_init(q, &q->flush_rq);
 	q->flush_rq.cmd_type = REQ_TYPE_FS;
 	q->flush_rq.cmd_flags = WRITE_FLUSH | REQ_FLUSH_SEQ;
 	q->flush_rq.rq_disk = first_rq->rq_disk;
 	q->flush_rq.end_io = flush_end_io;
 
-	q->flush_pending_idx ^= 1;
 	list_add_tail(&q->flush_rq.queuelist, &q->queue_head);
 	return true;
 }
@@ -284,16 +367,37 @@ static void flush_data_end_io(struct request *rq, int error)
 		blk_run_queue_async(q);
 }
 
+static void mq_flush_data_end_io(struct request *rq, int error)
+{
+	struct request_queue *q = rq->q;
+	struct blk_mq_hw_ctx *hctx;
+	struct blk_mq_ctx *ctx;
+	unsigned long flags;
+
+	ctx = rq->mq_ctx;
+	hctx = q->mq_ops->map_queue(q, ctx->cpu);
+
+	/*
+	 * After populating an empty queue, kick it to avoid stall.  Read
+	 * the comment in flush_end_io().
+	 */
+	spin_lock_irqsave(&q->mq_flush_lock, flags);
+	if (blk_flush_complete_seq(rq, REQ_FSEQ_DATA, error))
+		blk_mq_run_hw_queue(hctx, true);
+	spin_unlock_irqrestore(&q->mq_flush_lock, flags);
+}
+
 /**
  * blk_insert_flush - insert a new FLUSH/FUA request
  * @rq: request to insert
  *
  * To be called from __elv_add_request() for %ELEVATOR_INSERT_FLUSH insertions.
+ * or __blk_mq_run_hw_queue() to dispatch request.
  * @rq is being submitted.  Analyze what needs to be done and put it on the
  * right queue.
  *
  * CONTEXT:
- * spin_lock_irq(q->queue_lock)
+ * spin_lock_irq(q->queue_lock) in !mq case
  */
 void blk_insert_flush(struct request *rq)
 {
@@ -316,7 +420,10 @@ void blk_insert_flush(struct request *rq)
 	 * complete the request.
 	 */
 	if (!policy) {
-		__blk_end_bidi_request(rq, 0, 0, 0);
+		if (q->mq_ops)
+			blk_mq_end_io(rq, 0);
+		else
+			__blk_end_bidi_request(rq, 0, 0, 0);
 		return;
 	}
 
@@ -329,7 +436,10 @@ void blk_insert_flush(struct request *rq)
 	 */
 	if ((policy & REQ_FSEQ_DATA) &&
 	    !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) {
-		list_add_tail(&rq->queuelist, &q->queue_head);
+		if (q->mq_ops) {
+			blk_mq_run_request(rq, false, true);
+		} else
+			list_add_tail(&rq->queuelist, &q->queue_head);
 		return;
 	}
 
@@ -341,6 +451,14 @@ void blk_insert_flush(struct request *rq)
 	INIT_LIST_HEAD(&rq->flush.list);
 	rq->cmd_flags |= REQ_FLUSH_SEQ;
 	rq->flush.saved_end_io = rq->end_io; /* Usually NULL */
+	if (q->mq_ops) {
+		rq->end_io = mq_flush_data_end_io;
+
+		spin_lock_irq(&q->mq_flush_lock);
+		blk_flush_complete_seq(rq, REQ_FSEQ_ACTIONS & ~policy, 0);
+		spin_unlock_irq(&q->mq_flush_lock);
+		return;
+	}
 	rq->end_io = flush_data_end_io;
 
 	blk_flush_complete_seq(rq, REQ_FSEQ_ACTIONS & ~policy, 0);
@@ -453,3 +571,9 @@ int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
 	return ret;
 }
 EXPORT_SYMBOL(blkdev_issue_flush);
+
+void blk_mq_init_flush(struct request_queue *q)
+{
+	spin_lock_init(&q->mq_flush_lock);
+	INIT_WORK(&q->mq_flush_work, mq_flush_work);
+}
diff --git a/block/blk-mq-cpu.c b/block/blk-mq-cpu.c
new file mode 100644
index 000000000000..f8ea39d7ae54
--- /dev/null
+++ b/block/blk-mq-cpu.c
@@ -0,0 +1,93 @@
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/blkdev.h>
+#include <linux/list.h>
+#include <linux/llist.h>
+#include <linux/smp.h>
+#include <linux/cpu.h>
+
+#include <linux/blk-mq.h>
+#include "blk-mq.h"
+
+static LIST_HEAD(blk_mq_cpu_notify_list);
+static DEFINE_SPINLOCK(blk_mq_cpu_notify_lock);
+
+static int __cpuinit blk_mq_main_cpu_notify(struct notifier_block *self,
+					    unsigned long action, void *hcpu)
+{
+	unsigned int cpu = (unsigned long) hcpu;
+	struct blk_mq_cpu_notifier *notify;
+
+	spin_lock(&blk_mq_cpu_notify_lock);
+
+	list_for_each_entry(notify, &blk_mq_cpu_notify_list, list)
+		notify->notify(notify->data, action, cpu);
+
+	spin_unlock(&blk_mq_cpu_notify_lock);
+	return NOTIFY_OK;
+}
+
+static void __cpuinit blk_mq_cpu_notify(void *data, unsigned long action,
+					unsigned int cpu)
+{
+	if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
+		/*
+		 * If the CPU goes away, ensure that we run any pending
+		 * completions.
+		 */
+		struct llist_node *node;
+		struct request *rq;
+
+		local_irq_disable();
+
+		node = llist_del_all(&per_cpu(ipi_lists, cpu));
+		while (node) {
+			struct llist_node *next = node->next;
+
+			rq = llist_entry(node, struct request, ll_list);
+			__blk_mq_end_io(rq, rq->errors);
+			node = next;
+		}
+
+		local_irq_enable();
+	}
+}
+
+static struct notifier_block __cpuinitdata blk_mq_main_cpu_notifier = {
+	.notifier_call	= blk_mq_main_cpu_notify,
+};
+
+void blk_mq_register_cpu_notifier(struct blk_mq_cpu_notifier *notifier)
+{
+	BUG_ON(!notifier->notify);
+
+	spin_lock(&blk_mq_cpu_notify_lock);
+	list_add_tail(&notifier->list, &blk_mq_cpu_notify_list);
+	spin_unlock(&blk_mq_cpu_notify_lock);
+}
+
+void blk_mq_unregister_cpu_notifier(struct blk_mq_cpu_notifier *notifier)
+{
+	spin_lock(&blk_mq_cpu_notify_lock);
+	list_del(&notifier->list);
+	spin_unlock(&blk_mq_cpu_notify_lock);
+}
+
+void blk_mq_init_cpu_notifier(struct blk_mq_cpu_notifier *notifier,
+			      void (*fn)(void *, unsigned long, unsigned int),
+			      void *data)
+{
+	notifier->notify = fn;
+	notifier->data = data;
+}
+
+static struct blk_mq_cpu_notifier __cpuinitdata cpu_notifier = {
+	.notify = blk_mq_cpu_notify,
+};
+
+void __init blk_mq_cpu_init(void)
+{
+	register_hotcpu_notifier(&blk_mq_main_cpu_notifier);
+	blk_mq_register_cpu_notifier(&cpu_notifier);
+}
diff --git a/block/blk-mq-cpumap.c b/block/blk-mq-cpumap.c
new file mode 100644
index 000000000000..f8721278601c
--- /dev/null
+++ b/block/blk-mq-cpumap.c
@@ -0,0 +1,108 @@
+#include <linux/kernel.h>
+#include <linux/threads.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/cpu.h>
+
+#include <linux/blk-mq.h>
+#include "blk.h"
+#include "blk-mq.h"
+
+static void show_map(unsigned int *map, unsigned int nr)
+{
+	int i;
+
+	pr_info("blk-mq: CPU -> queue map\n");
+	for_each_online_cpu(i)
+		pr_info("  CPU%2u -> Queue %u\n", i, map[i]);
+}
+
+static int cpu_to_queue_index(unsigned int nr_cpus, unsigned int nr_queues,
+			      const int cpu)
+{
+	return cpu / ((nr_cpus + nr_queues - 1) / nr_queues);
+}
+
+static int get_first_sibling(unsigned int cpu)
+{
+	unsigned int ret;
+
+	ret = cpumask_first(topology_thread_cpumask(cpu));
+	if (ret < nr_cpu_ids)
+		return ret;
+
+	return cpu;
+}
+
+int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues)
+{
+	unsigned int i, nr_cpus, nr_uniq_cpus, queue, first_sibling;
+	cpumask_var_t cpus;
+
+	if (!alloc_cpumask_var(&cpus, GFP_ATOMIC))
+		return 1;
+
+	cpumask_clear(cpus);
+	nr_cpus = nr_uniq_cpus = 0;
+	for_each_online_cpu(i) {
+		nr_cpus++;
+		first_sibling = get_first_sibling(i);
+		if (!cpumask_test_cpu(first_sibling, cpus))
+			nr_uniq_cpus++;
+		cpumask_set_cpu(i, cpus);
+	}
+
+	queue = 0;
+	for_each_possible_cpu(i) {
+		if (!cpu_online(i)) {
+			map[i] = 0;
+			continue;
+		}
+
+		/*
+		 * Easy case - we have equal or more hardware queues. Or
+		 * there are no thread siblings to take into account. Do
+		 * 1:1 if enough, or sequential mapping if less.
+		 */
+		if (nr_queues >= nr_cpus || nr_cpus == nr_uniq_cpus) {
+			map[i] = cpu_to_queue_index(nr_cpus, nr_queues, queue);
+			queue++;
+			continue;
+		}
+
+		/*
+		 * Less then nr_cpus queues, and we have some number of
+		 * threads per cores. Map sibling threads to the same
+		 * queue.
+		 */
+		first_sibling = get_first_sibling(i);
+		if (first_sibling == i) {
+			map[i] = cpu_to_queue_index(nr_uniq_cpus, nr_queues,
+							queue);
+			queue++;
+		} else
+			map[i] = map[first_sibling];
+	}
+
+	show_map(map, nr_cpus);
+	free_cpumask_var(cpus);
+	return 0;
+}
+
+unsigned int *blk_mq_make_queue_map(struct blk_mq_reg *reg)
+{
+	unsigned int *map;
+
+	/* If cpus are offline, map them to first hctx */
+	map = kzalloc_node(sizeof(*map) * num_possible_cpus(), GFP_KERNEL,
+				reg->numa_node);
+	if (!map)
+		return NULL;
+
+	if (!blk_mq_update_queue_map(map, reg->nr_hw_queues))
+		return map;
+
+	kfree(map);
+	return NULL;
+}
diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c
new file mode 100644
index 000000000000..ba6cf8e9aa0a
--- /dev/null
+++ b/block/blk-mq-sysfs.c
@@ -0,0 +1,384 @@
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/backing-dev.h>
+#include <linux/bio.h>
+#include <linux/blkdev.h>
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/workqueue.h>
+#include <linux/smp.h>
+
+#include <linux/blk-mq.h>
+#include "blk-mq.h"
+#include "blk-mq-tag.h"
+
+static void blk_mq_sysfs_release(struct kobject *kobj)
+{
+}
+
+struct blk_mq_ctx_sysfs_entry {
+	struct attribute attr;
+	ssize_t (*show)(struct blk_mq_ctx *, char *);
+	ssize_t (*store)(struct blk_mq_ctx *, const char *, size_t);
+};
+
+struct blk_mq_hw_ctx_sysfs_entry {
+	struct attribute attr;
+	ssize_t (*show)(struct blk_mq_hw_ctx *, char *);
+	ssize_t (*store)(struct blk_mq_hw_ctx *, const char *, size_t);
+};
+
+static ssize_t blk_mq_sysfs_show(struct kobject *kobj, struct attribute *attr,
+				 char *page)
+{
+	struct blk_mq_ctx_sysfs_entry *entry;
+	struct blk_mq_ctx *ctx;
+	struct request_queue *q;
+	ssize_t res;
+
+	entry = container_of(attr, struct blk_mq_ctx_sysfs_entry, attr);
+	ctx = container_of(kobj, struct blk_mq_ctx, kobj);
+	q = ctx->queue;
+
+	if (!entry->show)
+		return -EIO;
+
+	res = -ENOENT;
+	mutex_lock(&q->sysfs_lock);
+	if (!blk_queue_dying(q))
+		res = entry->show(ctx, page);
+	mutex_unlock(&q->sysfs_lock);
+	return res;
+}
+
+static ssize_t blk_mq_sysfs_store(struct kobject *kobj, struct attribute *attr,
+				  const char *page, size_t length)
+{
+	struct blk_mq_ctx_sysfs_entry *entry;
+	struct blk_mq_ctx *ctx;
+	struct request_queue *q;
+	ssize_t res;
+
+	entry = container_of(attr, struct blk_mq_ctx_sysfs_entry, attr);
+	ctx = container_of(kobj, struct blk_mq_ctx, kobj);
+	q = ctx->queue;
+
+	if (!entry->store)
+		return -EIO;
+
+	res = -ENOENT;
+	mutex_lock(&q->sysfs_lock);
+	if (!blk_queue_dying(q))
+		res = entry->store(ctx, page, length);
+	mutex_unlock(&q->sysfs_lock);
+	return res;
+}
+
+static ssize_t blk_mq_hw_sysfs_show(struct kobject *kobj,
+				    struct attribute *attr, char *page)
+{
+	struct blk_mq_hw_ctx_sysfs_entry *entry;
+	struct blk_mq_hw_ctx *hctx;
+	struct request_queue *q;
+	ssize_t res;
+
+	entry = container_of(attr, struct blk_mq_hw_ctx_sysfs_entry, attr);
+	hctx = container_of(kobj, struct blk_mq_hw_ctx, kobj);
+	q = hctx->queue;
+
+	if (!entry->show)
+		return -EIO;
+
+	res = -ENOENT;
+	mutex_lock(&q->sysfs_lock);
+	if (!blk_queue_dying(q))
+		res = entry->show(hctx, page);
+	mutex_unlock(&q->sysfs_lock);
+	return res;
+}
+
+static ssize_t blk_mq_hw_sysfs_store(struct kobject *kobj,
+				     struct attribute *attr, const char *page,
+				     size_t length)
+{
+	struct blk_mq_hw_ctx_sysfs_entry *entry;
+	struct blk_mq_hw_ctx *hctx;
+	struct request_queue *q;
+	ssize_t res;
+
+	entry = container_of(attr, struct blk_mq_hw_ctx_sysfs_entry, attr);
+	hctx = container_of(kobj, struct blk_mq_hw_ctx, kobj);
+	q = hctx->queue;
+
+	if (!entry->store)
+		return -EIO;
+
+	res = -ENOENT;
+	mutex_lock(&q->sysfs_lock);
+	if (!blk_queue_dying(q))
+		res = entry->store(hctx, page, length);
+	mutex_unlock(&q->sysfs_lock);
+	return res;
+}
+
+static ssize_t blk_mq_sysfs_dispatched_show(struct blk_mq_ctx *ctx, char *page)
+{
+	return sprintf(page, "%lu %lu\n", ctx->rq_dispatched[1],
+				ctx->rq_dispatched[0]);
+}
+
+static ssize_t blk_mq_sysfs_merged_show(struct blk_mq_ctx *ctx, char *page)
+{
+	return sprintf(page, "%lu\n", ctx->rq_merged);
+}
+
+static ssize_t blk_mq_sysfs_completed_show(struct blk_mq_ctx *ctx, char *page)
+{
+	return sprintf(page, "%lu %lu\n", ctx->rq_completed[1],
+				ctx->rq_completed[0]);
+}
+
+static ssize_t sysfs_list_show(char *page, struct list_head *list, char *msg)
+{
+	char *start_page = page;
+	struct request *rq;
+
+	page += sprintf(page, "%s:\n", msg);
+
+	list_for_each_entry(rq, list, queuelist)
+		page += sprintf(page, "\t%p\n", rq);
+
+	return page - start_page;
+}
+
+static ssize_t blk_mq_sysfs_rq_list_show(struct blk_mq_ctx *ctx, char *page)
+{
+	ssize_t ret;
+
+	spin_lock(&ctx->lock);
+	ret = sysfs_list_show(page, &ctx->rq_list, "CTX pending");
+	spin_unlock(&ctx->lock);
+
+	return ret;
+}
+
+static ssize_t blk_mq_hw_sysfs_queued_show(struct blk_mq_hw_ctx *hctx,
+					   char *page)
+{
+	return sprintf(page, "%lu\n", hctx->queued);
+}
+
+static ssize_t blk_mq_hw_sysfs_run_show(struct blk_mq_hw_ctx *hctx, char *page)
+{
+	return sprintf(page, "%lu\n", hctx->run);
+}
+
+static ssize_t blk_mq_hw_sysfs_dispatched_show(struct blk_mq_hw_ctx *hctx,
+					       char *page)
+{
+	char *start_page = page;
+	int i;
+
+	page += sprintf(page, "%8u\t%lu\n", 0U, hctx->dispatched[0]);
+
+	for (i = 1; i < BLK_MQ_MAX_DISPATCH_ORDER; i++) {
+		unsigned long d = 1U << (i - 1);
+
+		page += sprintf(page, "%8lu\t%lu\n", d, hctx->dispatched[i]);
+	}
+
+	return page - start_page;
+}
+
+static ssize_t blk_mq_hw_sysfs_rq_list_show(struct blk_mq_hw_ctx *hctx,
+					    char *page)
+{
+	ssize_t ret;
+
+	spin_lock(&hctx->lock);
+	ret = sysfs_list_show(page, &hctx->dispatch, "HCTX pending");
+	spin_unlock(&hctx->lock);
+
+	return ret;
+}
+
+static ssize_t blk_mq_hw_sysfs_ipi_show(struct blk_mq_hw_ctx *hctx, char *page)
+{
+	ssize_t ret;
+
+	spin_lock(&hctx->lock);
+	ret = sprintf(page, "%u\n", !!(hctx->flags & BLK_MQ_F_SHOULD_IPI));
+	spin_unlock(&hctx->lock);
+
+	return ret;
+}
+
+static ssize_t blk_mq_hw_sysfs_ipi_store(struct blk_mq_hw_ctx *hctx,
+					 const char *page, size_t len)
+{
+	struct blk_mq_ctx *ctx;
+	unsigned long ret;
+	unsigned int i;
+
+	if (kstrtoul(page, 10, &ret)) {
+		pr_err("blk-mq-sysfs: invalid input '%s'\n", page);
+		return -EINVAL;
+	}
+
+	spin_lock(&hctx->lock);
+	if (ret)
+		hctx->flags |= BLK_MQ_F_SHOULD_IPI;
+	else
+		hctx->flags &= ~BLK_MQ_F_SHOULD_IPI;
+	spin_unlock(&hctx->lock);
+
+	hctx_for_each_ctx(hctx, ctx, i)
+		ctx->ipi_redirect = !!ret;
+
+	return len;
+}
+
+static ssize_t blk_mq_hw_sysfs_tags_show(struct blk_mq_hw_ctx *hctx, char *page)
+{
+	return blk_mq_tag_sysfs_show(hctx->tags, page);
+}
+
+static struct blk_mq_ctx_sysfs_entry blk_mq_sysfs_dispatched = {
+	.attr = {.name = "dispatched", .mode = S_IRUGO },
+	.show = blk_mq_sysfs_dispatched_show,
+};
+static struct blk_mq_ctx_sysfs_entry blk_mq_sysfs_merged = {
+	.attr = {.name = "merged", .mode = S_IRUGO },
+	.show = blk_mq_sysfs_merged_show,
+};
+static struct blk_mq_ctx_sysfs_entry blk_mq_sysfs_completed = {
+	.attr = {.name = "completed", .mode = S_IRUGO },
+	.show = blk_mq_sysfs_completed_show,
+};
+static struct blk_mq_ctx_sysfs_entry blk_mq_sysfs_rq_list = {
+	.attr = {.name = "rq_list", .mode = S_IRUGO },
+	.show = blk_mq_sysfs_rq_list_show,
+};
+
+static struct attribute *default_ctx_attrs[] = {
+	&blk_mq_sysfs_dispatched.attr,
+	&blk_mq_sysfs_merged.attr,
+	&blk_mq_sysfs_completed.attr,
+	&blk_mq_sysfs_rq_list.attr,
+	NULL,
+};
+
+static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_queued = {
+	.attr = {.name = "queued", .mode = S_IRUGO },
+	.show = blk_mq_hw_sysfs_queued_show,
+};
+static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_run = {
+	.attr = {.name = "run", .mode = S_IRUGO },
+	.show = blk_mq_hw_sysfs_run_show,
+};
+static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_dispatched = {
+	.attr = {.name = "dispatched", .mode = S_IRUGO },
+	.show = blk_mq_hw_sysfs_dispatched_show,
+};
+static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_pending = {
+	.attr = {.name = "pending", .mode = S_IRUGO },
+	.show = blk_mq_hw_sysfs_rq_list_show,
+};
+static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_ipi = {
+	.attr = {.name = "ipi_redirect", .mode = S_IRUGO | S_IWUSR},
+	.show = blk_mq_hw_sysfs_ipi_show,
+	.store = blk_mq_hw_sysfs_ipi_store,
+};
+static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_tags = {
+	.attr = {.name = "tags", .mode = S_IRUGO },
+	.show = blk_mq_hw_sysfs_tags_show,
+};
+
+static struct attribute *default_hw_ctx_attrs[] = {
+	&blk_mq_hw_sysfs_queued.attr,
+	&blk_mq_hw_sysfs_run.attr,
+	&blk_mq_hw_sysfs_dispatched.attr,
+	&blk_mq_hw_sysfs_pending.attr,
+	&blk_mq_hw_sysfs_ipi.attr,
+	&blk_mq_hw_sysfs_tags.attr,
+	NULL,
+};
+
+static const struct sysfs_ops blk_mq_sysfs_ops = {
+	.show	= blk_mq_sysfs_show,
+	.store	= blk_mq_sysfs_store,
+};
+
+static const struct sysfs_ops blk_mq_hw_sysfs_ops = {
+	.show	= blk_mq_hw_sysfs_show,
+	.store	= blk_mq_hw_sysfs_store,
+};
+
+static struct kobj_type blk_mq_ktype = {
+	.sysfs_ops	= &blk_mq_sysfs_ops,
+	.release	= blk_mq_sysfs_release,
+};
+
+static struct kobj_type blk_mq_ctx_ktype = {
+	.sysfs_ops	= &blk_mq_sysfs_ops,
+	.default_attrs	= default_ctx_attrs,
+	.release	= blk_mq_sysfs_release,
+};
+
+static struct kobj_type blk_mq_hw_ktype = {
+	.sysfs_ops	= &blk_mq_hw_sysfs_ops,
+	.default_attrs	= default_hw_ctx_attrs,
+	.release	= blk_mq_sysfs_release,
+};
+
+void blk_mq_unregister_disk(struct gendisk *disk)
+{
+	struct request_queue *q = disk->queue;
+
+	kobject_uevent(&q->mq_kobj, KOBJ_REMOVE);
+	kobject_del(&q->mq_kobj);
+
+	kobject_put(&disk_to_dev(disk)->kobj);
+}
+
+int blk_mq_register_disk(struct gendisk *disk)
+{
+	struct device *dev = disk_to_dev(disk);
+	struct request_queue *q = disk->queue;
+	struct blk_mq_hw_ctx *hctx;
+	struct blk_mq_ctx *ctx;
+	int ret, i, j;
+
+	kobject_init(&q->mq_kobj, &blk_mq_ktype);
+
+	ret = kobject_add(&q->mq_kobj, kobject_get(&dev->kobj), "%s", "mq");
+	if (ret < 0)
+		return ret;
+
+	kobject_uevent(&q->mq_kobj, KOBJ_ADD);
+
+	queue_for_each_hw_ctx(q, hctx, i) {
+		kobject_init(&hctx->kobj, &blk_mq_hw_ktype);
+		ret = kobject_add(&hctx->kobj, &q->mq_kobj, "%u", i);
+		if (ret)
+			break;
+
+		if (!hctx->nr_ctx)
+			continue;
+
+		hctx_for_each_ctx(hctx, ctx, j) {
+			kobject_init(&ctx->kobj, &blk_mq_ctx_ktype);
+			ret = kobject_add(&ctx->kobj, &hctx->kobj, "cpu%u", ctx->cpu);
+			if (ret)
+				break;
+		}
+	}
+
+	if (ret) {
+		blk_mq_unregister_disk(disk);
+		return ret;
+	}
+
+	return 0;
+}
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
new file mode 100644
index 000000000000..d64a02fb1f73
--- /dev/null
+++ b/block/blk-mq-tag.c
@@ -0,0 +1,204 @@
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/percpu_ida.h>
+
+#include <linux/blk-mq.h>
+#include "blk.h"
+#include "blk-mq.h"
+#include "blk-mq-tag.h"
+
+/*
+ * Per tagged queue (tag address space) map
+ */
+struct blk_mq_tags {
+	unsigned int nr_tags;
+	unsigned int nr_reserved_tags;
+	unsigned int nr_batch_move;
+	unsigned int nr_max_cache;
+
+	struct percpu_ida free_tags;
+	struct percpu_ida reserved_tags;
+};
+
+void blk_mq_wait_for_tags(struct blk_mq_tags *tags)
+{
+	int tag = blk_mq_get_tag(tags, __GFP_WAIT, false);
+	blk_mq_put_tag(tags, tag);
+}
+
+bool blk_mq_has_free_tags(struct blk_mq_tags *tags)
+{
+	return !tags ||
+		percpu_ida_free_tags(&tags->free_tags, nr_cpu_ids) != 0;
+}
+
+static unsigned int __blk_mq_get_tag(struct blk_mq_tags *tags, gfp_t gfp)
+{
+	int tag;
+
+	tag = percpu_ida_alloc(&tags->free_tags, gfp);
+	if (tag < 0)
+		return BLK_MQ_TAG_FAIL;
+	return tag + tags->nr_reserved_tags;
+}
+
+static unsigned int __blk_mq_get_reserved_tag(struct blk_mq_tags *tags,
+					      gfp_t gfp)
+{
+	int tag;
+
+	if (unlikely(!tags->nr_reserved_tags)) {
+		WARN_ON_ONCE(1);
+		return BLK_MQ_TAG_FAIL;
+	}
+
+	tag = percpu_ida_alloc(&tags->reserved_tags, gfp);
+	if (tag < 0)
+		return BLK_MQ_TAG_FAIL;
+	return tag;
+}
+
+unsigned int blk_mq_get_tag(struct blk_mq_tags *tags, gfp_t gfp, bool reserved)
+{
+	if (!reserved)
+		return __blk_mq_get_tag(tags, gfp);
+
+	return __blk_mq_get_reserved_tag(tags, gfp);
+}
+
+static void __blk_mq_put_tag(struct blk_mq_tags *tags, unsigned int tag)
+{
+	BUG_ON(tag >= tags->nr_tags);
+
+	percpu_ida_free(&tags->free_tags, tag - tags->nr_reserved_tags);
+}
+
+static void __blk_mq_put_reserved_tag(struct blk_mq_tags *tags,
+				      unsigned int tag)
+{
+	BUG_ON(tag >= tags->nr_reserved_tags);
+
+	percpu_ida_free(&tags->reserved_tags, tag);
+}
+
+void blk_mq_put_tag(struct blk_mq_tags *tags, unsigned int tag)
+{
+	if (tag >= tags->nr_reserved_tags)
+		__blk_mq_put_tag(tags, tag);
+	else
+		__blk_mq_put_reserved_tag(tags, tag);
+}
+
+static int __blk_mq_tag_iter(unsigned id, void *data)
+{
+	unsigned long *tag_map = data;
+	__set_bit(id, tag_map);
+	return 0;
+}
+
+void blk_mq_tag_busy_iter(struct blk_mq_tags *tags,
+			  void (*fn)(void *, unsigned long *), void *data)
+{
+	unsigned long *tag_map;
+	size_t map_size;
+
+	map_size = ALIGN(tags->nr_tags, BITS_PER_LONG) / BITS_PER_LONG;
+	tag_map = kzalloc(map_size * sizeof(unsigned long), GFP_ATOMIC);
+	if (!tag_map)
+		return;
+
+	percpu_ida_for_each_free(&tags->free_tags, __blk_mq_tag_iter, tag_map);
+	if (tags->nr_reserved_tags)
+		percpu_ida_for_each_free(&tags->reserved_tags, __blk_mq_tag_iter,
+			tag_map);
+
+	fn(data, tag_map);
+	kfree(tag_map);
+}
+
+struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags,
+				     unsigned int reserved_tags, int node)
+{
+	unsigned int nr_tags, nr_cache;
+	struct blk_mq_tags *tags;
+	int ret;
+
+	if (total_tags > BLK_MQ_TAG_MAX) {
+		pr_err("blk-mq: tag depth too large\n");
+		return NULL;
+	}
+
+	tags = kzalloc_node(sizeof(*tags), GFP_KERNEL, node);
+	if (!tags)
+		return NULL;
+
+	nr_tags = total_tags - reserved_tags;
+	nr_cache = nr_tags / num_possible_cpus();
+
+	if (nr_cache < BLK_MQ_TAG_CACHE_MIN)
+		nr_cache = BLK_MQ_TAG_CACHE_MIN;
+	else if (nr_cache > BLK_MQ_TAG_CACHE_MAX)
+		nr_cache = BLK_MQ_TAG_CACHE_MAX;
+
+	tags->nr_tags = total_tags;
+	tags->nr_reserved_tags = reserved_tags;
+	tags->nr_max_cache = nr_cache;
+	tags->nr_batch_move = max(1u, nr_cache / 2);
+
+	ret = __percpu_ida_init(&tags->free_tags, tags->nr_tags -
+				tags->nr_reserved_tags,
+				tags->nr_max_cache,
+				tags->nr_batch_move);
+	if (ret)
+		goto err_free_tags;
+
+	if (reserved_tags) {
+		/*
+		 * With max_cahe and batch set to 1, the allocator fallbacks to
+		 * no cached. It's fine reserved tags allocation is slow.
+		 */
+		ret = __percpu_ida_init(&tags->reserved_tags, reserved_tags,
+				1, 1);
+		if (ret)
+			goto err_reserved_tags;
+	}
+
+	return tags;
+
+err_reserved_tags:
+	percpu_ida_destroy(&tags->free_tags);
+err_free_tags:
+	kfree(tags);
+	return NULL;
+}
+
+void blk_mq_free_tags(struct blk_mq_tags *tags)
+{
+	percpu_ida_destroy(&tags->free_tags);
+	percpu_ida_destroy(&tags->reserved_tags);
+	kfree(tags);
+}
+
+ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page)
+{
+	char *orig_page = page;
+	int cpu;
+
+	if (!tags)
+		return 0;
+
+	page += sprintf(page, "nr_tags=%u, reserved_tags=%u, batch_move=%u,"
+			" max_cache=%u\n", tags->nr_tags, tags->nr_reserved_tags,
+			tags->nr_batch_move, tags->nr_max_cache);
+
+	page += sprintf(page, "nr_free=%u, nr_reserved=%u\n",
+			percpu_ida_free_tags(&tags->free_tags, nr_cpu_ids),
+			percpu_ida_free_tags(&tags->reserved_tags, nr_cpu_ids));
+
+	for_each_possible_cpu(cpu) {
+		page += sprintf(page, "  cpu%02u: nr_free=%u\n", cpu,
+				percpu_ida_free_tags(&tags->free_tags, cpu));
+	}
+
+	return page - orig_page;
+}
diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h
new file mode 100644
index 000000000000..947ba2c6148e
--- /dev/null
+++ b/block/blk-mq-tag.h
@@ -0,0 +1,27 @@
+#ifndef INT_BLK_MQ_TAG_H
+#define INT_BLK_MQ_TAG_H
+
+struct blk_mq_tags;
+
+extern struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags, unsigned int reserved_tags, int node);
+extern void blk_mq_free_tags(struct blk_mq_tags *tags);
+
+extern unsigned int blk_mq_get_tag(struct blk_mq_tags *tags, gfp_t gfp, bool reserved);
+extern void blk_mq_wait_for_tags(struct blk_mq_tags *tags);
+extern void blk_mq_put_tag(struct blk_mq_tags *tags, unsigned int tag);
+extern void blk_mq_tag_busy_iter(struct blk_mq_tags *tags, void (*fn)(void *data, unsigned long *), void *data);
+extern bool blk_mq_has_free_tags(struct blk_mq_tags *tags);
+extern ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page);
+
+enum {
+	BLK_MQ_TAG_CACHE_MIN	= 1,
+	BLK_MQ_TAG_CACHE_MAX	= 64,
+};
+
+enum {
+	BLK_MQ_TAG_FAIL		= -1U,
+	BLK_MQ_TAG_MIN		= BLK_MQ_TAG_CACHE_MIN,
+	BLK_MQ_TAG_MAX		= BLK_MQ_TAG_FAIL - 1,
+};
+
+#endif
diff --git a/block/blk-mq.c b/block/blk-mq.c
new file mode 100644
index 000000000000..f21ec964e411
--- /dev/null
+++ b/block/blk-mq.c
@@ -0,0 +1,1480 @@
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/backing-dev.h>
+#include <linux/bio.h>
+#include <linux/blkdev.h>
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/workqueue.h>
+#include <linux/smp.h>
+#include <linux/llist.h>
+#include <linux/list_sort.h>
+#include <linux/cpu.h>
+#include <linux/cache.h>
+#include <linux/sched/sysctl.h>
+#include <linux/delay.h>
+
+#include <trace/events/block.h>
+
+#include <linux/blk-mq.h>
+#include "blk.h"
+#include "blk-mq.h"
+#include "blk-mq-tag.h"
+
+static DEFINE_MUTEX(all_q_mutex);
+static LIST_HEAD(all_q_list);
+
+static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx);
+
+DEFINE_PER_CPU(struct llist_head, ipi_lists);
+
+static struct blk_mq_ctx *__blk_mq_get_ctx(struct request_queue *q,
+					   unsigned int cpu)
+{
+	return per_cpu_ptr(q->queue_ctx, cpu);
+}
+
+/*
+ * This assumes per-cpu software queueing queues. They could be per-node
+ * as well, for instance. For now this is hardcoded as-is. Note that we don't
+ * care about preemption, since we know the ctx's are persistent. This does
+ * mean that we can't rely on ctx always matching the currently running CPU.
+ */
+static struct blk_mq_ctx *blk_mq_get_ctx(struct request_queue *q)
+{
+	return __blk_mq_get_ctx(q, get_cpu());
+}
+
+static void blk_mq_put_ctx(struct blk_mq_ctx *ctx)
+{
+	put_cpu();
+}
+
+/*
+ * Check if any of the ctx's have pending work in this hardware queue
+ */
+static bool blk_mq_hctx_has_pending(struct blk_mq_hw_ctx *hctx)
+{
+	unsigned int i;
+
+	for (i = 0; i < hctx->nr_ctx_map; i++)
+		if (hctx->ctx_map[i])
+			return true;
+
+	return false;
+}
+
+/*
+ * Mark this ctx as having pending work in this hardware queue
+ */
+static void blk_mq_hctx_mark_pending(struct blk_mq_hw_ctx *hctx,
+				     struct blk_mq_ctx *ctx)
+{
+	if (!test_bit(ctx->index_hw, hctx->ctx_map))
+		set_bit(ctx->index_hw, hctx->ctx_map);
+}
+
+static struct request *blk_mq_alloc_rq(struct blk_mq_hw_ctx *hctx, gfp_t gfp,
+				       bool reserved)
+{
+	struct request *rq;
+	unsigned int tag;
+
+	tag = blk_mq_get_tag(hctx->tags, gfp, reserved);
+	if (tag != BLK_MQ_TAG_FAIL) {
+		rq = hctx->rqs[tag];
+		rq->tag = tag;
+
+		return rq;
+	}
+
+	return NULL;
+}
+
+static int blk_mq_queue_enter(struct request_queue *q)
+{
+	int ret;
+
+	__percpu_counter_add(&q->mq_usage_counter, 1, 1000000);
+	smp_wmb();
+	/* we have problems to freeze the queue if it's initializing */
+	if (!blk_queue_bypass(q) || !blk_queue_init_done(q))
+		return 0;
+
+	__percpu_counter_add(&q->mq_usage_counter, -1, 1000000);
+
+	spin_lock_irq(q->queue_lock);
+	ret = wait_event_interruptible_lock_irq(q->mq_freeze_wq,
+		!blk_queue_bypass(q), *q->queue_lock);
+	/* inc usage with lock hold to avoid freeze_queue runs here */
+	if (!ret)
+		__percpu_counter_add(&q->mq_usage_counter, 1, 1000000);
+	spin_unlock_irq(q->queue_lock);
+
+	return ret;
+}
+
+static void blk_mq_queue_exit(struct request_queue *q)
+{
+	__percpu_counter_add(&q->mq_usage_counter, -1, 1000000);
+}
+
+/*
+ * Guarantee no request is in use, so we can change any data structure of
+ * the queue afterward.
+ */
+static void blk_mq_freeze_queue(struct request_queue *q)
+{
+	bool drain;
+
+	spin_lock_irq(q->queue_lock);
+	drain = !q->bypass_depth++;
+	queue_flag_set(QUEUE_FLAG_BYPASS, q);
+	spin_unlock_irq(q->queue_lock);
+
+	if (!drain)
+		return;
+
+	while (true) {
+		s64 count;
+
+		spin_lock_irq(q->queue_lock);
+		count = percpu_counter_sum(&q->mq_usage_counter);
+		spin_unlock_irq(q->queue_lock);
+
+		if (count == 0)
+			break;
+		blk_mq_run_queues(q, false);
+		msleep(10);
+	}
+}
+
+static void blk_mq_unfreeze_queue(struct request_queue *q)
+{
+	bool wake = false;
+
+	spin_lock_irq(q->queue_lock);
+	if (!--q->bypass_depth) {
+		queue_flag_clear(QUEUE_FLAG_BYPASS, q);
+		wake = true;
+	}
+	WARN_ON_ONCE(q->bypass_depth < 0);
+	spin_unlock_irq(q->queue_lock);
+	if (wake)
+		wake_up_all(&q->mq_freeze_wq);
+}
+
+bool blk_mq_can_queue(struct blk_mq_hw_ctx *hctx)
+{
+	return blk_mq_has_free_tags(hctx->tags);
+}
+EXPORT_SYMBOL(blk_mq_can_queue);
+
+static void blk_mq_rq_ctx_init(struct blk_mq_ctx *ctx, struct request *rq,
+			       unsigned int rw_flags)
+{
+	rq->mq_ctx = ctx;
+	rq->cmd_flags = rw_flags;
+	ctx->rq_dispatched[rw_is_sync(rw_flags)]++;
+}
+
+static struct request *__blk_mq_alloc_request(struct blk_mq_hw_ctx *hctx,
+					      gfp_t gfp, bool reserved)
+{
+	return blk_mq_alloc_rq(hctx, gfp, reserved);
+}
+
+static struct request *blk_mq_alloc_request_pinned(struct request_queue *q,
+						   int rw, gfp_t gfp,
+						   bool reserved)
+{
+	struct request *rq;
+
+	do {
+		struct blk_mq_ctx *ctx = blk_mq_get_ctx(q);
+		struct blk_mq_hw_ctx *hctx = q->mq_ops->map_queue(q, ctx->cpu);
+
+		rq = __blk_mq_alloc_request(hctx, gfp & ~__GFP_WAIT, reserved);
+		if (rq) {
+			blk_mq_rq_ctx_init(ctx, rq, rw);
+			break;
+		} else if (!(gfp & __GFP_WAIT))
+			break;
+
+		blk_mq_put_ctx(ctx);
+		__blk_mq_run_hw_queue(hctx);
+		blk_mq_wait_for_tags(hctx->tags);
+	} while (1);
+
+	return rq;
+}
+
+struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp)
+{
+	struct request *rq;
+
+	if (blk_mq_queue_enter(q))
+		return NULL;
+
+	rq = blk_mq_alloc_request_pinned(q, rw, gfp, false);
+	blk_mq_put_ctx(rq->mq_ctx);
+	return rq;
+}
+
+struct request *blk_mq_alloc_reserved_request(struct request_queue *q, int rw,
+					      gfp_t gfp)
+{
+	struct request *rq;
+
+	if (blk_mq_queue_enter(q))
+		return NULL;
+
+	rq = blk_mq_alloc_request_pinned(q, rw, gfp, true);
+	blk_mq_put_ctx(rq->mq_ctx);
+	return rq;
+}
+EXPORT_SYMBOL(blk_mq_alloc_reserved_request);
+
+/*
+ * Re-init and set pdu, if we have it
+ */
+static void blk_mq_rq_init(struct blk_mq_hw_ctx *hctx, struct request *rq)
+{
+	blk_rq_init(hctx->queue, rq);
+
+	if (hctx->cmd_size)
+		rq->special = blk_mq_rq_to_pdu(rq);
+}
+
+static void __blk_mq_free_request(struct blk_mq_hw_ctx *hctx,
+				  struct blk_mq_ctx *ctx, struct request *rq)
+{
+	const int tag = rq->tag;
+	struct request_queue *q = rq->q;
+
+	blk_mq_rq_init(hctx, rq);
+	blk_mq_put_tag(hctx->tags, tag);
+
+	blk_mq_queue_exit(q);
+}
+
+void blk_mq_free_request(struct request *rq)
+{
+	struct blk_mq_ctx *ctx = rq->mq_ctx;
+	struct blk_mq_hw_ctx *hctx;
+	struct request_queue *q = rq->q;
+
+	ctx->rq_completed[rq_is_sync(rq)]++;
+
+	hctx = q->mq_ops->map_queue(q, ctx->cpu);
+	__blk_mq_free_request(hctx, ctx, rq);
+}
+
+static void blk_mq_bio_endio(struct request *rq, struct bio *bio, int error)
+{
+	if (error)
+		clear_bit(BIO_UPTODATE, &bio->bi_flags);
+	else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
+		error = -EIO;
+
+	if (unlikely(rq->cmd_flags & REQ_QUIET))
+		set_bit(BIO_QUIET, &bio->bi_flags);
+
+	/* don't actually finish bio if it's part of flush sequence */
+	if (!(rq->cmd_flags & REQ_FLUSH_SEQ))
+		bio_endio(bio, error);
+}
+
+void blk_mq_complete_request(struct request *rq, int error)
+{
+	struct bio *bio = rq->bio;
+	unsigned int bytes = 0;
+
+	trace_block_rq_complete(rq->q, rq);
+
+	while (bio) {
+		struct bio *next = bio->bi_next;
+
+		bio->bi_next = NULL;
+		bytes += bio->bi_size;
+		blk_mq_bio_endio(rq, bio, error);
+		bio = next;
+	}
+
+	blk_account_io_completion(rq, bytes);
+
+	if (rq->end_io)
+		rq->end_io(rq, error);
+	else
+		blk_mq_free_request(rq);
+
+	blk_account_io_done(rq);
+}
+
+void __blk_mq_end_io(struct request *rq, int error)
+{
+	if (!blk_mark_rq_complete(rq))
+		blk_mq_complete_request(rq, error);
+}
+
+#if defined(CONFIG_SMP) && defined(CONFIG_USE_GENERIC_SMP_HELPERS)
+
+/*
+ * Called with interrupts disabled.
+ */
+static void ipi_end_io(void *data)
+{
+	struct llist_head *list = &per_cpu(ipi_lists, smp_processor_id());
+	struct llist_node *entry, *next;
+	struct request *rq;
+
+	entry = llist_del_all(list);
+
+	while (entry) {
+		next = entry->next;
+		rq = llist_entry(entry, struct request, ll_list);
+		__blk_mq_end_io(rq, rq->errors);
+		entry = next;
+	}
+}
+
+static int ipi_remote_cpu(struct blk_mq_ctx *ctx, const int cpu,
+			  struct request *rq, const int error)
+{
+	struct call_single_data *data = &rq->csd;
+
+	rq->errors = error;
+	rq->ll_list.next = NULL;
+
+	/*
+	 * If the list is non-empty, an existing IPI must already
+	 * be "in flight". If that is the case, we need not schedule
+	 * a new one.
+	 */
+	if (llist_add(&rq->ll_list, &per_cpu(ipi_lists, ctx->cpu))) {
+		data->func = ipi_end_io;
+		data->flags = 0;
+		__smp_call_function_single(ctx->cpu, data, 0);
+	}
+
+	return true;
+}
+#else /* CONFIG_SMP && CONFIG_USE_GENERIC_SMP_HELPERS */
+static int ipi_remote_cpu(struct blk_mq_ctx *ctx, const int cpu,
+			  struct request *rq, const int error)
+{
+	return false;
+}
+#endif
+
+/*
+ * End IO on this request on a multiqueue enabled driver. We'll either do
+ * it directly inline, or punt to a local IPI handler on the matching
+ * remote CPU.
+ */
+void blk_mq_end_io(struct request *rq, int error)
+{
+	struct blk_mq_ctx *ctx = rq->mq_ctx;
+	int cpu;
+
+	if (!ctx->ipi_redirect)
+		return __blk_mq_end_io(rq, error);
+
+	cpu = get_cpu();
+
+	if (cpu == ctx->cpu || !cpu_online(ctx->cpu) ||
+	    !ipi_remote_cpu(ctx, cpu, rq, error))
+		__blk_mq_end_io(rq, error);
+
+	put_cpu();
+}
+EXPORT_SYMBOL(blk_mq_end_io);
+
+static void blk_mq_start_request(struct request *rq)
+{
+	struct request_queue *q = rq->q;
+
+	trace_block_rq_issue(q, rq);
+
+	/*
+	 * Just mark start time and set the started bit. Due to memory
+	 * ordering, we know we'll see the correct deadline as long as
+	 * REQ_ATOMIC_STARTED is seen.
+	 */
+	rq->deadline = jiffies + q->rq_timeout;
+	set_bit(REQ_ATOM_STARTED, &rq->atomic_flags);
+}
+
+static void blk_mq_requeue_request(struct request *rq)
+{
+	struct request_queue *q = rq->q;
+
+	trace_block_rq_requeue(q, rq);
+	clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags);
+}
+
+struct blk_mq_timeout_data {
+	struct blk_mq_hw_ctx *hctx;
+	unsigned long *next;
+	unsigned int *next_set;
+};
+
+static void blk_mq_timeout_check(void *__data, unsigned long *free_tags)
+{
+	struct blk_mq_timeout_data *data = __data;
+	struct blk_mq_hw_ctx *hctx = data->hctx;
+	unsigned int tag;
+
+	 /* It may not be in flight yet (this is where
+	 * the REQ_ATOMIC_STARTED flag comes in). The requests are
+	 * statically allocated, so we know it's always safe to access the
+	 * memory associated with a bit offset into ->rqs[].
+	 */
+	tag = 0;
+	do {
+		struct request *rq;
+
+		tag = find_next_zero_bit(free_tags, hctx->queue_depth, tag);
+		if (tag >= hctx->queue_depth)
+			break;
+
+		rq = hctx->rqs[tag++];
+
+		if (!test_bit(REQ_ATOM_STARTED, &rq->atomic_flags))
+			continue;
+
+		blk_rq_check_expired(rq, data->next, data->next_set);
+	} while (1);
+}
+
+static void blk_mq_hw_ctx_check_timeout(struct blk_mq_hw_ctx *hctx,
+					unsigned long *next,
+					unsigned int *next_set)
+{
+	struct blk_mq_timeout_data data = {
+		.hctx		= hctx,
+		.next		= next,
+		.next_set	= next_set,
+	};
+
+	/*
+	 * Ask the tagging code to iterate busy requests, so we can
+	 * check them for timeout.
+	 */
+	blk_mq_tag_busy_iter(hctx->tags, blk_mq_timeout_check, &data);
+}
+
+static void blk_mq_rq_timer(unsigned long data)
+{
+	struct request_queue *q = (struct request_queue *) data;
+	struct blk_mq_hw_ctx *hctx;
+	unsigned long next = 0;
+	int i, next_set = 0;
+
+	queue_for_each_hw_ctx(q, hctx, i)
+		blk_mq_hw_ctx_check_timeout(hctx, &next, &next_set);
+
+	if (next_set)
+		mod_timer(&q->timeout, round_jiffies_up(next));
+}
+
+/*
+ * Reverse check our software queue for entries that we could potentially
+ * merge with. Currently includes a hand-wavy stop count of 8, to not spend
+ * too much time checking for merges.
+ */
+static bool blk_mq_attempt_merge(struct request_queue *q,
+				 struct blk_mq_ctx *ctx, struct bio *bio)
+{
+	struct request *rq;
+	int checked = 8;
+
+	list_for_each_entry_reverse(rq, &ctx->rq_list, queuelist) {
+		int el_ret;
+
+		if (!checked--)
+			break;
+
+		if (!blk_rq_merge_ok(rq, bio))
+			continue;
+
+		el_ret = blk_try_merge(rq, bio);
+		if (el_ret == ELEVATOR_BACK_MERGE) {
+			if (bio_attempt_back_merge(q, rq, bio)) {
+				ctx->rq_merged++;
+				return true;
+			}
+			break;
+		} else if (el_ret == ELEVATOR_FRONT_MERGE) {
+			if (bio_attempt_front_merge(q, rq, bio)) {
+				ctx->rq_merged++;
+				return true;
+			}
+			break;
+		}
+	}
+
+	return false;
+}
+
+void blk_mq_add_timer(struct request *rq)
+{
+	__blk_add_timer(rq, NULL);
+}
+
+/*
+ * Run this hardware queue, pulling any software queues mapped to it in.
+ * Note that this function currently has various problems around ordering
+ * of IO. In particular, we'd like FIFO behaviour on handling existing
+ * items on the hctx->dispatch list. Ignore that for now.
+ */
+static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
+{
+	struct request_queue *q = hctx->queue;
+	struct blk_mq_ctx *ctx;
+	struct request *rq;
+	LIST_HEAD(rq_list);
+	int bit, queued;
+
+	if (unlikely(test_bit(BLK_MQ_S_STOPPED, &hctx->flags)))
+		return;
+
+	hctx->run++;
+
+	/*
+	 * Touch any software queue that has pending entries.
+	 */
+	for_each_set_bit(bit, hctx->ctx_map, hctx->nr_ctx) {
+		clear_bit(bit, hctx->ctx_map);
+		ctx = hctx->ctxs[bit];
+		BUG_ON(bit != ctx->index_hw);
+
+		spin_lock(&ctx->lock);
+		list_splice_tail_init(&ctx->rq_list, &rq_list);
+		spin_unlock(&ctx->lock);
+	}
+
+	/*
+	 * If we have previous entries on our dispatch list, grab them
+	 * and stuff them at the front for more fair dispatch.
+	 */
+	if (!list_empty_careful(&hctx->dispatch)) {
+		spin_lock(&hctx->lock);
+		if (!list_empty(&hctx->dispatch))
+			list_splice_init(&hctx->dispatch, &rq_list);
+		spin_unlock(&hctx->lock);
+	}
+
+	/*
+	 * Delete and return all entries from our dispatch list
+	 */
+	queued = 0;
+
+	/*
+	 * Now process all the entries, sending them to the driver.
+	 */
+	while (!list_empty(&rq_list)) {
+		int ret;
+
+		rq = list_first_entry(&rq_list, struct request, queuelist);
+		list_del_init(&rq->queuelist);
+		blk_mq_start_request(rq);
+
+		/*
+		 * Last request in the series. Flag it as such, this
+		 * enables drivers to know when IO should be kicked off,
+		 * if they don't do it on a per-request basis.
+		 *
+		 * Note: the flag isn't the only condition drivers
+		 * should do kick off. If drive is busy, the last
+		 * request might not have the bit set.
+		 */
+		if (list_empty(&rq_list))
+			rq->cmd_flags |= REQ_END;
+
+		ret = q->mq_ops->queue_rq(hctx, rq);
+		switch (ret) {
+		case BLK_MQ_RQ_QUEUE_OK:
+			queued++;
+			continue;
+		case BLK_MQ_RQ_QUEUE_BUSY:
+			/*
+			 * FIXME: we should have a mechanism to stop the queue
+			 * like blk_stop_queue, otherwise we will waste cpu
+			 * time
+			 */
+			list_add(&rq->queuelist, &rq_list);
+			blk_mq_requeue_request(rq);
+			break;
+		default:
+			pr_err("blk-mq: bad return on queue: %d\n", ret);
+			rq->errors = -EIO;
+		case BLK_MQ_RQ_QUEUE_ERROR:
+			blk_mq_end_io(rq, rq->errors);
+			break;
+		}
+
+		if (ret == BLK_MQ_RQ_QUEUE_BUSY)
+			break;
+	}
+
+	if (!queued)
+		hctx->dispatched[0]++;
+	else if (queued < (1 << (BLK_MQ_MAX_DISPATCH_ORDER - 1)))
+		hctx->dispatched[ilog2(queued) + 1]++;
+
+	/*
+	 * Any items that need requeuing? Stuff them into hctx->dispatch,
+	 * that is where we will continue on next queue run.
+	 */
+	if (!list_empty(&rq_list)) {
+		spin_lock(&hctx->lock);
+		list_splice(&rq_list, &hctx->dispatch);
+		spin_unlock(&hctx->lock);
+	}
+}
+
+void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
+{
+	if (unlikely(test_bit(BLK_MQ_S_STOPPED, &hctx->flags)))
+		return;
+
+	if (!async)
+		__blk_mq_run_hw_queue(hctx);
+	else {
+		struct request_queue *q = hctx->queue;
+
+		kblockd_schedule_delayed_work(q, &hctx->delayed_work, 0);
+	}
+}
+
+void blk_mq_run_queues(struct request_queue *q, bool async)
+{
+	struct blk_mq_hw_ctx *hctx;
+	int i;
+
+	queue_for_each_hw_ctx(q, hctx, i) {
+		if ((!blk_mq_hctx_has_pending(hctx) &&
+		    list_empty_careful(&hctx->dispatch)) ||
+		    test_bit(BLK_MQ_S_STOPPED, &hctx->flags))
+			continue;
+
+		blk_mq_run_hw_queue(hctx, async);
+	}
+}
+EXPORT_SYMBOL(blk_mq_run_queues);
+
+void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx)
+{
+	cancel_delayed_work(&hctx->delayed_work);
+	set_bit(BLK_MQ_S_STOPPED, &hctx->state);
+}
+EXPORT_SYMBOL(blk_mq_stop_hw_queue);
+
+void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx)
+{
+	clear_bit(BLK_MQ_S_STOPPED, &hctx->state);
+	__blk_mq_run_hw_queue(hctx);
+}
+EXPORT_SYMBOL(blk_mq_start_hw_queue);
+
+void blk_mq_start_stopped_hw_queues(struct request_queue *q)
+{
+	struct blk_mq_hw_ctx *hctx;
+	int i;
+
+	queue_for_each_hw_ctx(q, hctx, i) {
+		if (!test_bit(BLK_MQ_S_STOPPED, &hctx->state))
+			continue;
+
+		clear_bit(BLK_MQ_S_STOPPED, &hctx->state);
+		blk_mq_run_hw_queue(hctx, true);
+	}
+}
+EXPORT_SYMBOL(blk_mq_start_stopped_hw_queues);
+
+static void blk_mq_work_fn(struct work_struct *work)
+{
+	struct blk_mq_hw_ctx *hctx;
+
+	hctx = container_of(work, struct blk_mq_hw_ctx, delayed_work.work);
+	__blk_mq_run_hw_queue(hctx);
+}
+
+static void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx,
+				    struct request *rq)
+{
+	struct blk_mq_ctx *ctx = rq->mq_ctx;
+
+	list_add_tail(&rq->queuelist, &ctx->rq_list);
+	blk_mq_hctx_mark_pending(hctx, ctx);
+
+	/*
+	 * We do this early, to ensure we are on the right CPU.
+	 */
+	blk_mq_add_timer(rq);
+}
+
+void blk_mq_insert_request(struct request_queue *q, struct request *rq,
+			   bool run_queue)
+{
+	struct blk_mq_hw_ctx *hctx;
+	struct blk_mq_ctx *ctx, *current_ctx;
+
+	ctx = rq->mq_ctx;
+	hctx = q->mq_ops->map_queue(q, ctx->cpu);
+
+	if (rq->cmd_flags & (REQ_FLUSH | REQ_FUA)) {
+		blk_insert_flush(rq);
+	} else {
+		current_ctx = blk_mq_get_ctx(q);
+
+		if (!cpu_online(ctx->cpu)) {
+			ctx = current_ctx;
+			hctx = q->mq_ops->map_queue(q, ctx->cpu);
+			rq->mq_ctx = ctx;
+		}
+		spin_lock(&ctx->lock);
+		__blk_mq_insert_request(hctx, rq);
+		spin_unlock(&ctx->lock);
+
+		blk_mq_put_ctx(current_ctx);
+	}
+
+	if (run_queue)
+		__blk_mq_run_hw_queue(hctx);
+}
+EXPORT_SYMBOL(blk_mq_insert_request);
+
+/*
+ * This is a special version of blk_mq_insert_request to bypass FLUSH request
+ * check. Should only be used internally.
+ */
+void blk_mq_run_request(struct request *rq, bool run_queue, bool async)
+{
+	struct request_queue *q = rq->q;
+	struct blk_mq_hw_ctx *hctx;
+	struct blk_mq_ctx *ctx, *current_ctx;
+
+	current_ctx = blk_mq_get_ctx(q);
+
+	ctx = rq->mq_ctx;
+	if (!cpu_online(ctx->cpu)) {
+		ctx = current_ctx;
+		rq->mq_ctx = ctx;
+	}
+	hctx = q->mq_ops->map_queue(q, ctx->cpu);
+
+	/* ctx->cpu might be offline */
+	spin_lock(&ctx->lock);
+	__blk_mq_insert_request(hctx, rq);
+	spin_unlock(&ctx->lock);
+
+	blk_mq_put_ctx(current_ctx);
+
+	if (run_queue)
+		blk_mq_run_hw_queue(hctx, async);
+}
+
+static void blk_mq_insert_requests(struct request_queue *q,
+				     struct blk_mq_ctx *ctx,
+				     struct list_head *list,
+				     int depth,
+				     bool from_schedule)
+
+{
+	struct blk_mq_hw_ctx *hctx;
+	struct blk_mq_ctx *current_ctx;
+
+	trace_block_unplug(q, depth, !from_schedule);
+
+	current_ctx = blk_mq_get_ctx(q);
+
+	if (!cpu_online(ctx->cpu))
+		ctx = current_ctx;
+	hctx = q->mq_ops->map_queue(q, ctx->cpu);
+
+	/*
+	 * preemption doesn't flush plug list, so it's possible ctx->cpu is
+	 * offline now
+	 */
+	spin_lock(&ctx->lock);
+	while (!list_empty(list)) {
+		struct request *rq;
+
+		rq = list_first_entry(list, struct request, queuelist);
+		list_del_init(&rq->queuelist);
+		rq->mq_ctx = ctx;
+		__blk_mq_insert_request(hctx, rq);
+	}
+	spin_unlock(&ctx->lock);
+
+	blk_mq_put_ctx(current_ctx);
+
+	blk_mq_run_hw_queue(hctx, from_schedule);
+}
+
+static int plug_ctx_cmp(void *priv, struct list_head *a, struct list_head *b)
+{
+	struct request *rqa = container_of(a, struct request, queuelist);
+	struct request *rqb = container_of(b, struct request, queuelist);
+
+	return !(rqa->mq_ctx < rqb->mq_ctx ||
+		 (rqa->mq_ctx == rqb->mq_ctx &&
+		  blk_rq_pos(rqa) < blk_rq_pos(rqb)));
+}
+
+void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule)
+{
+	struct blk_mq_ctx *this_ctx;
+	struct request_queue *this_q;
+	struct request *rq;
+	LIST_HEAD(list);
+	LIST_HEAD(ctx_list);
+	unsigned int depth;
+
+	list_splice_init(&plug->mq_list, &list);
+
+	list_sort(NULL, &list, plug_ctx_cmp);
+
+	this_q = NULL;
+	this_ctx = NULL;
+	depth = 0;
+
+	while (!list_empty(&list)) {
+		rq = list_entry_rq(list.next);
+		list_del_init(&rq->queuelist);
+		BUG_ON(!rq->q);
+		if (rq->mq_ctx != this_ctx) {
+			if (this_ctx) {
+				blk_mq_insert_requests(this_q, this_ctx,
+							&ctx_list, depth,
+							from_schedule);
+			}
+
+			this_ctx = rq->mq_ctx;
+			this_q = rq->q;
+			depth = 0;
+		}
+
+		depth++;
+		list_add_tail(&rq->queuelist, &ctx_list);
+	}
+
+	/*
+	 * If 'this_ctx' is set, we know we have entries to complete
+	 * on 'ctx_list'. Do those.
+	 */
+	if (this_ctx) {
+		blk_mq_insert_requests(this_q, this_ctx, &ctx_list, depth,
+				       from_schedule);
+	}
+}
+
+static void blk_mq_bio_to_request(struct request *rq, struct bio *bio)
+{
+	init_request_from_bio(rq, bio);
+	blk_account_io_start(rq, 1);
+}
+
+static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
+{
+	struct blk_mq_hw_ctx *hctx;
+	struct blk_mq_ctx *ctx;
+	const int is_sync = rw_is_sync(bio->bi_rw);
+	const int is_flush_fua = bio->bi_rw & (REQ_FLUSH | REQ_FUA);
+	int rw = bio_data_dir(bio);
+	struct request *rq;
+	unsigned int use_plug, request_count = 0;
+
+	/*
+	 * If we have multiple hardware queues, just go directly to
+	 * one of those for sync IO.
+	 */
+	use_plug = !is_flush_fua && ((q->nr_hw_queues == 1) || !is_sync);
+
+	blk_queue_bounce(q, &bio);
+
+	if (use_plug && blk_attempt_plug_merge(q, bio, &request_count))
+		return;
+
+	if (blk_mq_queue_enter(q)) {
+		bio_endio(bio, -EIO);
+		return;
+	}
+
+	ctx = blk_mq_get_ctx(q);
+	hctx = q->mq_ops->map_queue(q, ctx->cpu);
+
+	trace_block_getrq(q, bio, rw);
+	rq = __blk_mq_alloc_request(hctx, GFP_ATOMIC, false);
+	if (likely(rq))
+		blk_mq_rq_ctx_init(ctx, rq, rw);
+	else {
+		blk_mq_put_ctx(ctx);
+		trace_block_sleeprq(q, bio, rw);
+		rq = blk_mq_alloc_request_pinned(q, rw, __GFP_WAIT|GFP_ATOMIC,
+							false);
+		ctx = rq->mq_ctx;
+		hctx = q->mq_ops->map_queue(q, ctx->cpu);
+	}
+
+	hctx->queued++;
+
+	if (unlikely(is_flush_fua)) {
+		blk_mq_bio_to_request(rq, bio);
+		blk_mq_put_ctx(ctx);
+		blk_insert_flush(rq);
+		goto run_queue;
+	}
+
+	/*
+	 * A task plug currently exists. Since this is completely lockless,
+	 * utilize that to temporarily store requests until the task is
+	 * either done or scheduled away.
+	 */
+	if (use_plug) {
+		struct blk_plug *plug = current->plug;
+
+		if (plug) {
+			blk_mq_bio_to_request(rq, bio);
+			if (list_empty(&plug->list))
+				trace_block_plug(q);
+			else if (request_count >= BLK_MAX_REQUEST_COUNT) {
+				blk_flush_plug_list(plug, false);
+				trace_block_plug(q);
+			}
+			list_add_tail(&rq->queuelist, &plug->mq_list);
+			blk_mq_put_ctx(ctx);
+			return;
+		}
+	}
+
+	spin_lock(&ctx->lock);
+
+	if ((hctx->flags & BLK_MQ_F_SHOULD_MERGE) &&
+	    blk_mq_attempt_merge(q, ctx, bio))
+		__blk_mq_free_request(hctx, ctx, rq);
+	else {
+		blk_mq_bio_to_request(rq, bio);
+		__blk_mq_insert_request(hctx, rq);
+	}
+
+	spin_unlock(&ctx->lock);
+	blk_mq_put_ctx(ctx);
+
+	/*
+	 * For a SYNC request, send it to the hardware immediately. For an
+	 * ASYNC request, just ensure that we run it later on. The latter
+	 * allows for merging opportunities and more efficient dispatching.
+	 */
+run_queue:
+	blk_mq_run_hw_queue(hctx, !is_sync || is_flush_fua);
+}
+
+/*
+ * Default mapping to a software queue, since we use one per CPU.
+ */
+struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, const int cpu)
+{
+	return q->queue_hw_ctx[q->mq_map[cpu]];
+}
+EXPORT_SYMBOL(blk_mq_map_queue);
+
+struct blk_mq_hw_ctx *blk_mq_alloc_single_hw_queue(struct blk_mq_reg *reg,
+						   unsigned int hctx_index)
+{
+	return kmalloc_node(sizeof(struct blk_mq_hw_ctx),
+				GFP_KERNEL | __GFP_ZERO, reg->numa_node);
+}
+EXPORT_SYMBOL(blk_mq_alloc_single_hw_queue);
+
+void blk_mq_free_single_hw_queue(struct blk_mq_hw_ctx *hctx,
+				 unsigned int hctx_index)
+{
+	kfree(hctx);
+}
+EXPORT_SYMBOL(blk_mq_free_single_hw_queue);
+
+static void blk_mq_hctx_notify(void *data, unsigned long action,
+			       unsigned int cpu)
+{
+	struct blk_mq_hw_ctx *hctx = data;
+	struct blk_mq_ctx *ctx;
+	LIST_HEAD(tmp);
+
+	if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
+		return;
+
+	/*
+	 * Move ctx entries to new CPU, if this one is going away.
+	 */
+	ctx = __blk_mq_get_ctx(hctx->queue, cpu);
+
+	spin_lock(&ctx->lock);
+	if (!list_empty(&ctx->rq_list)) {
+		list_splice_init(&ctx->rq_list, &tmp);
+		clear_bit(ctx->index_hw, hctx->ctx_map);
+	}
+	spin_unlock(&ctx->lock);
+
+	if (list_empty(&tmp))
+		return;
+
+	ctx = blk_mq_get_ctx(hctx->queue);
+	spin_lock(&ctx->lock);
+
+	while (!list_empty(&tmp)) {
+		struct request *rq;
+
+		rq = list_first_entry(&tmp, struct request, queuelist);
+		rq->mq_ctx = ctx;
+		list_move_tail(&rq->queuelist, &ctx->rq_list);
+	}
+
+	blk_mq_hctx_mark_pending(hctx, ctx);
+
+	spin_unlock(&ctx->lock);
+	blk_mq_put_ctx(ctx);
+}
+
+static void blk_mq_init_hw_commands(struct blk_mq_hw_ctx *hctx,
+				    void (*init)(void *, struct blk_mq_hw_ctx *,
+					struct request *, unsigned int),
+				    void *data)
+{
+	unsigned int i;
+
+	for (i = 0; i < hctx->queue_depth; i++) {
+		struct request *rq = hctx->rqs[i];
+
+		init(data, hctx, rq, i);
+	}
+}
+
+void blk_mq_init_commands(struct request_queue *q,
+			  void (*init)(void *, struct blk_mq_hw_ctx *,
+					struct request *, unsigned int),
+			  void *data)
+{
+	struct blk_mq_hw_ctx *hctx;
+	unsigned int i;
+
+	queue_for_each_hw_ctx(q, hctx, i)
+		blk_mq_init_hw_commands(hctx, init, data);
+}
+EXPORT_SYMBOL(blk_mq_init_commands);
+
+static void blk_mq_free_rq_map(struct blk_mq_hw_ctx *hctx)
+{
+	struct page *page;
+
+	while (!list_empty(&hctx->page_list)) {
+		page = list_first_entry(&hctx->page_list, struct page, list);
+		list_del_init(&page->list);
+		__free_pages(page, page->private);
+	}
+
+	kfree(hctx->rqs);
+
+	if (hctx->tags)
+		blk_mq_free_tags(hctx->tags);
+}
+
+static size_t order_to_size(unsigned int order)
+{
+	size_t ret = PAGE_SIZE;
+
+	while (order--)
+		ret *= 2;
+
+	return ret;
+}
+
+static int blk_mq_init_rq_map(struct blk_mq_hw_ctx *hctx,
+			      unsigned int reserved_tags, int node)
+{
+	unsigned int i, j, entries_per_page, max_order = 4;
+	size_t rq_size, left;
+
+	INIT_LIST_HEAD(&hctx->page_list);
+
+	hctx->rqs = kmalloc_node(hctx->queue_depth * sizeof(struct request *),
+					GFP_KERNEL, node);
+	if (!hctx->rqs)
+		return -ENOMEM;
+
+	/*
+	 * rq_size is the size of the request plus driver payload, rounded
+	 * to the cacheline size
+	 */
+	rq_size = round_up(sizeof(struct request) + hctx->cmd_size,
+				cache_line_size());
+	left = rq_size * hctx->queue_depth;
+
+	for (i = 0; i < hctx->queue_depth;) {
+		int this_order = max_order;
+		struct page *page;
+		int to_do;
+		void *p;
+
+		while (left < order_to_size(this_order - 1) && this_order)
+			this_order--;
+
+		do {
+			page = alloc_pages_node(node, GFP_KERNEL, this_order);
+			if (page)
+				break;
+			if (!this_order--)
+				break;
+			if (order_to_size(this_order) < rq_size)
+				break;
+		} while (1);
+
+		if (!page)
+			break;
+
+		page->private = this_order;
+		list_add_tail(&page->list, &hctx->page_list);
+
+		p = page_address(page);
+		entries_per_page = order_to_size(this_order) / rq_size;
+		to_do = min(entries_per_page, hctx->queue_depth - i);
+		left -= to_do * rq_size;
+		for (j = 0; j < to_do; j++) {
+			hctx->rqs[i] = p;
+			blk_mq_rq_init(hctx, hctx->rqs[i]);
+			p += rq_size;
+			i++;
+		}
+	}
+
+	if (i < (reserved_tags + BLK_MQ_TAG_MIN))
+		goto err_rq_map;
+	else if (i != hctx->queue_depth) {
+		hctx->queue_depth = i;
+		pr_warn("%s: queue depth set to %u because of low memory\n",
+					__func__, i);
+	}
+
+	hctx->tags = blk_mq_init_tags(hctx->queue_depth, reserved_tags, node);
+	if (!hctx->tags) {
+err_rq_map:
+		blk_mq_free_rq_map(hctx);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static int blk_mq_init_hw_queues(struct request_queue *q,
+				 struct blk_mq_reg *reg, void *driver_data)
+{
+	struct blk_mq_hw_ctx *hctx;
+	unsigned int i, j;
+
+	/*
+	 * Initialize hardware queues
+	 */
+	queue_for_each_hw_ctx(q, hctx, i) {
+		unsigned int num_maps;
+		int node;
+
+		node = hctx->numa_node;
+		if (node == NUMA_NO_NODE)
+			node = hctx->numa_node = reg->numa_node;
+
+		INIT_DELAYED_WORK(&hctx->delayed_work, blk_mq_work_fn);
+		spin_lock_init(&hctx->lock);
+		INIT_LIST_HEAD(&hctx->dispatch);
+		hctx->queue = q;
+		hctx->queue_num = i;
+		hctx->flags = reg->flags;
+		hctx->queue_depth = reg->queue_depth;
+		hctx->cmd_size = reg->cmd_size;
+
+		blk_mq_init_cpu_notifier(&hctx->cpu_notifier,
+						blk_mq_hctx_notify, hctx);
+		blk_mq_register_cpu_notifier(&hctx->cpu_notifier);
+
+		if (blk_mq_init_rq_map(hctx, reg->reserved_tags, node))
+			break;
+
+		/*
+		 * Allocate space for all possible cpus to avoid allocation in
+		 * runtime
+		 */
+		hctx->ctxs = kmalloc_node(nr_cpu_ids * sizeof(void *),
+						GFP_KERNEL, node);
+		if (!hctx->ctxs)
+			break;
+
+		num_maps = ALIGN(nr_cpu_ids, BITS_PER_LONG) / BITS_PER_LONG;
+		hctx->ctx_map = kzalloc_node(num_maps * sizeof(unsigned long),
+						GFP_KERNEL, node);
+		if (!hctx->ctx_map)
+			break;
+
+		hctx->nr_ctx_map = num_maps;
+		hctx->nr_ctx = 0;
+
+		if (reg->ops->init_hctx &&
+		    reg->ops->init_hctx(hctx, driver_data, i))
+			break;
+	}
+
+	if (i == q->nr_hw_queues)
+		return 0;
+
+	/*
+	 * Init failed
+	 */
+	queue_for_each_hw_ctx(q, hctx, j) {
+		if (i == j)
+			break;
+
+		if (reg->ops->exit_hctx)
+			reg->ops->exit_hctx(hctx, j);
+
+		blk_mq_unregister_cpu_notifier(&hctx->cpu_notifier);
+		blk_mq_free_rq_map(hctx);
+		kfree(hctx->ctxs);
+	}
+
+	return 1;
+}
+
+static void blk_mq_init_cpu_queues(struct request_queue *q,
+				   unsigned int nr_hw_queues)
+{
+	unsigned int i;
+
+	for_each_possible_cpu(i) {
+		struct blk_mq_ctx *__ctx = per_cpu_ptr(q->queue_ctx, i);
+		struct blk_mq_hw_ctx *hctx;
+
+		memset(__ctx, 0, sizeof(*__ctx));
+		__ctx->cpu = i;
+		spin_lock_init(&__ctx->lock);
+		INIT_LIST_HEAD(&__ctx->rq_list);
+		__ctx->queue = q;
+
+		/* If the cpu isn't online, the cpu is mapped to first hctx */
+		hctx = q->mq_ops->map_queue(q, i);
+		hctx->nr_ctx++;
+
+		if (!cpu_online(i))
+			continue;
+
+		/*
+		 * Set local node, IFF we have more than one hw queue. If
+		 * not, we remain on the home node of the device
+		 */
+		if (nr_hw_queues > 1 && hctx->numa_node == NUMA_NO_NODE)
+			hctx->numa_node = cpu_to_node(i);
+	}
+}
+
+static void blk_mq_map_swqueue(struct request_queue *q)
+{
+	unsigned int i;
+	struct blk_mq_hw_ctx *hctx;
+	struct blk_mq_ctx *ctx;
+
+	queue_for_each_hw_ctx(q, hctx, i) {
+		hctx->nr_ctx = 0;
+	}
+
+	/*
+	 * Map software to hardware queues
+	 */
+	queue_for_each_ctx(q, ctx, i) {
+		/* If the cpu isn't online, the cpu is mapped to first hctx */
+		hctx = q->mq_ops->map_queue(q, i);
+		ctx->index_hw = hctx->nr_ctx;
+		hctx->ctxs[hctx->nr_ctx++] = ctx;
+	}
+}
+
+struct request_queue *blk_mq_init_queue(struct blk_mq_reg *reg,
+					void *driver_data)
+{
+	struct blk_mq_hw_ctx **hctxs;
+	struct blk_mq_ctx *ctx;
+	struct request_queue *q;
+	int i;
+
+	if (!reg->nr_hw_queues ||
+	    !reg->ops->queue_rq || !reg->ops->map_queue ||
+	    !reg->ops->alloc_hctx || !reg->ops->free_hctx)
+		return ERR_PTR(-EINVAL);
+
+	if (!reg->queue_depth)
+		reg->queue_depth = BLK_MQ_MAX_DEPTH;
+	else if (reg->queue_depth > BLK_MQ_MAX_DEPTH) {
+		pr_err("blk-mq: queuedepth too large (%u)\n", reg->queue_depth);
+		reg->queue_depth = BLK_MQ_MAX_DEPTH;
+	}
+
+	if (reg->queue_depth < (reg->reserved_tags + BLK_MQ_TAG_MIN))
+		return ERR_PTR(-EINVAL);
+
+	ctx = alloc_percpu(struct blk_mq_ctx);
+	if (!ctx)
+		return ERR_PTR(-ENOMEM);
+
+	hctxs = kmalloc_node(reg->nr_hw_queues * sizeof(*hctxs), GFP_KERNEL,
+			reg->numa_node);
+
+	if (!hctxs)
+		goto err_percpu;
+
+	for (i = 0; i < reg->nr_hw_queues; i++) {
+		hctxs[i] = reg->ops->alloc_hctx(reg, i);
+		if (!hctxs[i])
+			goto err_hctxs;
+
+		hctxs[i]->numa_node = NUMA_NO_NODE;
+		hctxs[i]->queue_num = i;
+	}
+
+	q = blk_alloc_queue_node(GFP_KERNEL, reg->numa_node);
+	if (!q)
+		goto err_hctxs;
+
+	q->mq_map = blk_mq_make_queue_map(reg);
+	if (!q->mq_map)
+		goto err_map;
+
+	setup_timer(&q->timeout, blk_mq_rq_timer, (unsigned long) q);
+	blk_queue_rq_timeout(q, 30000);
+
+	q->nr_queues = nr_cpu_ids;
+	q->nr_hw_queues = reg->nr_hw_queues;
+
+	q->queue_ctx = ctx;
+	q->queue_hw_ctx = hctxs;
+
+	q->mq_ops = reg->ops;
+
+	blk_queue_make_request(q, blk_mq_make_request);
+	blk_queue_rq_timed_out(q, reg->ops->timeout);
+	if (reg->timeout)
+		blk_queue_rq_timeout(q, reg->timeout);
+
+	blk_mq_init_flush(q);
+	blk_mq_init_cpu_queues(q, reg->nr_hw_queues);
+
+	if (blk_mq_init_hw_queues(q, reg, driver_data))
+		goto err_hw;
+
+	blk_mq_map_swqueue(q);
+
+	mutex_lock(&all_q_mutex);
+	list_add_tail(&q->all_q_node, &all_q_list);
+	mutex_unlock(&all_q_mutex);
+
+	return q;
+err_hw:
+	kfree(q->mq_map);
+err_map:
+	blk_cleanup_queue(q);
+err_hctxs:
+	for (i = 0; i < reg->nr_hw_queues; i++) {
+		if (!hctxs[i])
+			break;
+		reg->ops->free_hctx(hctxs[i], i);
+	}
+	kfree(hctxs);
+err_percpu:
+	free_percpu(ctx);
+	return ERR_PTR(-ENOMEM);
+}
+EXPORT_SYMBOL(blk_mq_init_queue);
+
+void blk_mq_free_queue(struct request_queue *q)
+{
+	struct blk_mq_hw_ctx *hctx;
+	int i;
+
+	queue_for_each_hw_ctx(q, hctx, i) {
+		cancel_delayed_work_sync(&hctx->delayed_work);
+		kfree(hctx->ctx_map);
+		kfree(hctx->ctxs);
+		blk_mq_free_rq_map(hctx);
+		blk_mq_unregister_cpu_notifier(&hctx->cpu_notifier);
+		if (q->mq_ops->exit_hctx)
+			q->mq_ops->exit_hctx(hctx, i);
+		q->mq_ops->free_hctx(hctx, i);
+	}
+
+	free_percpu(q->queue_ctx);
+	kfree(q->queue_hw_ctx);
+	kfree(q->mq_map);
+
+	q->queue_ctx = NULL;
+	q->queue_hw_ctx = NULL;
+	q->mq_map = NULL;
+
+	mutex_lock(&all_q_mutex);
+	list_del_init(&q->all_q_node);
+	mutex_unlock(&all_q_mutex);
+}
+EXPORT_SYMBOL(blk_mq_free_queue);
+
+/* Basically redo blk_mq_init_queue with queue frozen */
+static void __cpuinit blk_mq_queue_reinit(struct request_queue *q)
+{
+	blk_mq_freeze_queue(q);
+
+	blk_mq_update_queue_map(q->mq_map, q->nr_hw_queues);
+
+	/*
+	 * redo blk_mq_init_cpu_queues and blk_mq_init_hw_queues. FIXME: maybe
+	 * we should change hctx numa_node according to new topology (this
+	 * involves free and re-allocate memory, worthy doing?)
+	 */
+
+	blk_mq_map_swqueue(q);
+
+	blk_mq_unfreeze_queue(q);
+}
+
+static int __cpuinit blk_mq_queue_reinit_notify(struct notifier_block *nb,
+		unsigned long action, void *hcpu)
+{
+	struct request_queue *q;
+
+	/*
+	 * Before new mapping is established, hotadded cpu might already start
+	 * handling requests. This doesn't break anything as we map offline
+	 * CPUs to first hardware queue. We will re-init queue below to get
+	 * optimal settings.
+	 */
+	if (action != CPU_DEAD && action != CPU_DEAD_FROZEN &&
+	    action != CPU_ONLINE && action != CPU_ONLINE_FROZEN)
+		return NOTIFY_OK;
+
+	mutex_lock(&all_q_mutex);
+	list_for_each_entry(q, &all_q_list, all_q_node)
+		blk_mq_queue_reinit(q);
+	mutex_unlock(&all_q_mutex);
+	return NOTIFY_OK;
+}
+
+static int __init blk_mq_init(void)
+{
+	unsigned int i;
+
+	for_each_possible_cpu(i)
+		init_llist_head(&per_cpu(ipi_lists, i));
+
+	blk_mq_cpu_init();
+
+	/* Must be called after percpu_counter_hotcpu_callback() */
+	hotcpu_notifier(blk_mq_queue_reinit_notify, -10);
+
+	return 0;
+}
+subsys_initcall(blk_mq_init);
diff --git a/block/blk-mq.h b/block/blk-mq.h
new file mode 100644
index 000000000000..52bf1f96a2c2
--- /dev/null
+++ b/block/blk-mq.h
@@ -0,0 +1,52 @@
+#ifndef INT_BLK_MQ_H
+#define INT_BLK_MQ_H
+
+struct blk_mq_ctx {
+	struct {
+		spinlock_t		lock;
+		struct list_head	rq_list;
+	}  ____cacheline_aligned_in_smp;
+
+	unsigned int		cpu;
+	unsigned int		index_hw;
+	unsigned int		ipi_redirect;
+
+	/* incremented at dispatch time */
+	unsigned long		rq_dispatched[2];
+	unsigned long		rq_merged;
+
+	/* incremented at completion time */
+	unsigned long		____cacheline_aligned_in_smp rq_completed[2];
+
+	struct request_queue	*queue;
+	struct kobject		kobj;
+};
+
+void __blk_mq_end_io(struct request *rq, int error);
+void blk_mq_complete_request(struct request *rq, int error);
+void blk_mq_run_request(struct request *rq, bool run_queue, bool async);
+void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
+void blk_mq_init_flush(struct request_queue *q);
+
+/*
+ * CPU hotplug helpers
+ */
+struct blk_mq_cpu_notifier;
+void blk_mq_init_cpu_notifier(struct blk_mq_cpu_notifier *notifier,
+			      void (*fn)(void *, unsigned long, unsigned int),
+			      void *data);
+void blk_mq_register_cpu_notifier(struct blk_mq_cpu_notifier *notifier);
+void blk_mq_unregister_cpu_notifier(struct blk_mq_cpu_notifier *notifier);
+void blk_mq_cpu_init(void);
+DECLARE_PER_CPU(struct llist_head, ipi_lists);
+
+/*
+ * CPU -> queue mappings
+ */
+struct blk_mq_reg;
+extern unsigned int *blk_mq_make_queue_map(struct blk_mq_reg *reg);
+extern int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues);
+
+void blk_mq_add_timer(struct request *rq);
+
+#endif
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 3aa5b195f4dd..4f8c4d90ec73 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -7,6 +7,7 @@
 #include <linux/bio.h>
 #include <linux/blkdev.h>
 #include <linux/blktrace_api.h>
+#include <linux/blk-mq.h>
 
 #include "blk.h"
 #include "blk-cgroup.h"
@@ -542,6 +543,11 @@ static void blk_release_queue(struct kobject *kobj)
 	if (q->queue_tags)
 		__blk_queue_free_tags(q);
 
+	percpu_counter_destroy(&q->mq_usage_counter);
+
+	if (q->mq_ops)
+		blk_mq_free_queue(q);
+
 	blk_trace_shutdown(q);
 
 	bdi_destroy(&q->backing_dev_info);
@@ -575,6 +581,7 @@ int blk_register_queue(struct gendisk *disk)
 	 * bypass from queue allocation.
 	 */
 	blk_queue_bypass_end(q);
+	queue_flag_set_unlocked(QUEUE_FLAG_INIT_DONE, q);
 
 	ret = blk_trace_init_sysfs(dev);
 	if (ret)
@@ -588,6 +595,9 @@ int blk_register_queue(struct gendisk *disk)
 
 	kobject_uevent(&q->kobj, KOBJ_ADD);
 
+	if (q->mq_ops)
+		blk_mq_register_disk(disk);
+
 	if (!q->request_fn)
 		return 0;
 
@@ -610,6 +620,9 @@ void blk_unregister_queue(struct gendisk *disk)
 	if (WARN_ON(!q))
 		return;
 
+	if (q->mq_ops)
+		blk_mq_unregister_disk(disk);
+
 	if (q->request_fn)
 		elv_unregister_queue(q);
 
diff --git a/block/blk-timeout.c b/block/blk-timeout.c
index 65f103563969..22846cf3595a 100644
--- a/block/blk-timeout.c
+++ b/block/blk-timeout.c
@@ -7,6 +7,7 @@
 #include <linux/fault-inject.h>
 
 #include "blk.h"
+#include "blk-mq.h"
 
 #ifdef CONFIG_FAIL_IO_TIMEOUT
 
@@ -88,11 +89,18 @@ static void blk_rq_timed_out(struct request *req)
 		ret = q->rq_timed_out_fn(req);
 	switch (ret) {
 	case BLK_EH_HANDLED:
-		__blk_complete_request(req);
+		/* Can we use req->errors here? */
+		if (q->mq_ops)
+			blk_mq_complete_request(req, req->errors);
+		else
+			__blk_complete_request(req);
 		break;
 	case BLK_EH_RESET_TIMER:
 		blk_clear_rq_complete(req);
-		blk_add_timer(req);
+		if (q->mq_ops)
+			blk_mq_add_timer(req);
+		else
+			blk_add_timer(req);
 		break;
 	case BLK_EH_NOT_HANDLED:
 		/*
@@ -108,6 +116,23 @@ static void blk_rq_timed_out(struct request *req)
 	}
 }
 
+void blk_rq_check_expired(struct request *rq, unsigned long *next_timeout,
+			  unsigned int *next_set)
+{
+	if (time_after_eq(jiffies, rq->deadline)) {
+		list_del_init(&rq->timeout_list);
+
+		/*
+		 * Check if we raced with end io completion
+		 */
+		if (!blk_mark_rq_complete(rq))
+			blk_rq_timed_out(rq);
+	} else if (!*next_set || time_after(*next_timeout, rq->deadline)) {
+		*next_timeout = rq->deadline;
+		*next_set = 1;
+	}
+}
+
 void blk_rq_timed_out_timer(unsigned long data)
 {
 	struct request_queue *q = (struct request_queue *) data;
@@ -117,21 +142,8 @@ void blk_rq_timed_out_timer(unsigned long data)
 
 	spin_lock_irqsave(q->queue_lock, flags);
 
-	list_for_each_entry_safe(rq, tmp, &q->timeout_list, timeout_list) {
-		if (time_after_eq(jiffies, rq->deadline)) {
-			list_del_init(&rq->timeout_list);
-
-			/*
-			 * Check if we raced with end io completion
-			 */
-			if (blk_mark_rq_complete(rq))
-				continue;
-			blk_rq_timed_out(rq);
-		} else if (!next_set || time_after(next, rq->deadline)) {
-			next = rq->deadline;
-			next_set = 1;
-		}
-	}
+	list_for_each_entry_safe(rq, tmp, &q->timeout_list, timeout_list)
+		blk_rq_check_expired(rq, &next, &next_set);
 
 	if (next_set)
 		mod_timer(&q->timeout, round_jiffies_up(next));
@@ -157,15 +169,7 @@ void blk_abort_request(struct request *req)
 }
 EXPORT_SYMBOL_GPL(blk_abort_request);
 
-/**
- * blk_add_timer - Start timeout timer for a single request
- * @req:	request that is about to start running.
- *
- * Notes:
- *    Each request has its own timer, and as it is added to the queue, we
- *    set up the timer. When the request completes, we cancel the timer.
- */
-void blk_add_timer(struct request *req)
+void __blk_add_timer(struct request *req, struct list_head *timeout_list)
 {
 	struct request_queue *q = req->q;
 	unsigned long expiry;
@@ -184,7 +188,8 @@ void blk_add_timer(struct request *req)
 		req->timeout = q->rq_timeout;
 
 	req->deadline = jiffies + req->timeout;
-	list_add_tail(&req->timeout_list, &q->timeout_list);
+	if (timeout_list)
+		list_add_tail(&req->timeout_list, timeout_list);
 
 	/*
 	 * If the timer isn't already pending or this timeout is earlier
@@ -196,5 +201,19 @@ void blk_add_timer(struct request *req)
 	if (!timer_pending(&q->timeout) ||
 	    time_before(expiry, q->timeout.expires))
 		mod_timer(&q->timeout, expiry);
+
+}
+
+/**
+ * blk_add_timer - Start timeout timer for a single request
+ * @req:	request that is about to start running.
+ *
+ * Notes:
+ *    Each request has its own timer, and as it is added to the queue, we
+ *    set up the timer. When the request completes, we cancel the timer.
+ */
+void blk_add_timer(struct request *req)
+{
+	__blk_add_timer(req, &req->q->timeout_list);
 }
 
diff --git a/block/blk.h b/block/blk.h
index e837b8f619b7..c90e1d8f7a2b 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -10,6 +10,7 @@
 #define BLK_BATCH_REQ	32
 
 extern struct kmem_cache *blk_requestq_cachep;
+extern struct kmem_cache *request_cachep;
 extern struct kobj_type blk_queue_ktype;
 extern struct ida blk_queue_ida;
 
@@ -34,14 +35,30 @@ bool __blk_end_bidi_request(struct request *rq, int error,
 			    unsigned int nr_bytes, unsigned int bidi_bytes);
 
 void blk_rq_timed_out_timer(unsigned long data);
+void blk_rq_check_expired(struct request *rq, unsigned long *next_timeout,
+			  unsigned int *next_set);
+void __blk_add_timer(struct request *req, struct list_head *timeout_list);
 void blk_delete_timer(struct request *);
 void blk_add_timer(struct request *);
 
+
+bool bio_attempt_front_merge(struct request_queue *q, struct request *req,
+			     struct bio *bio);
+bool bio_attempt_back_merge(struct request_queue *q, struct request *req,
+			    struct bio *bio);
+bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
+			    unsigned int *request_count);
+
+void blk_account_io_start(struct request *req, bool new_io);
+void blk_account_io_completion(struct request *req, unsigned int bytes);
+void blk_account_io_done(struct request *req);
+
 /*
  * Internal atomic flags for request handling
  */
 enum rq_atomic_flags {
 	REQ_ATOM_COMPLETE = 0,
+	REQ_ATOM_STARTED,
 };
 
 /*
diff --git a/include/linux/bio.h b/include/linux/bio.h
index ec48bac5b039..4c2775443dcf 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -419,6 +419,8 @@ static inline void bio_list_init(struct bio_list *bl)
 	bl->head = bl->tail = NULL;
 }
 
+#define BIO_EMPTY_LIST	{ NULL, NULL }
+
 #define bio_list_for_each(bio, bl) \
 	for (bio = (bl)->head; bio; bio = bio->bi_next)
 
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
new file mode 100644
index 000000000000..746042ff321a
--- /dev/null
+++ b/include/linux/blk-mq.h
@@ -0,0 +1,182 @@
+#ifndef BLK_MQ_H
+#define BLK_MQ_H
+
+#include <linux/blkdev.h>
+
+struct blk_mq_tags;
+
+struct blk_mq_cpu_notifier {
+	struct list_head list;
+	void *data;
+	void (*notify)(void *data, unsigned long action, unsigned int cpu);
+};
+
+struct blk_mq_hw_ctx {
+	struct {
+		spinlock_t		lock;
+		struct list_head	dispatch;
+	} ____cacheline_aligned_in_smp;
+
+	unsigned long		state;		/* BLK_MQ_S_* flags */
+	struct delayed_work	delayed_work;
+
+	unsigned long		flags;		/* BLK_MQ_F_* flags */
+
+	struct request_queue	*queue;
+	unsigned int		queue_num;
+
+	void			*driver_data;
+
+	unsigned int		nr_ctx;
+	struct blk_mq_ctx	**ctxs;
+	unsigned int 		nr_ctx_map;
+	unsigned long		*ctx_map;
+
+	struct request		**rqs;
+	struct list_head	page_list;
+	struct blk_mq_tags	*tags;
+
+	unsigned long		queued;
+	unsigned long		run;
+#define BLK_MQ_MAX_DISPATCH_ORDER	10
+	unsigned long		dispatched[BLK_MQ_MAX_DISPATCH_ORDER];
+
+	unsigned int		queue_depth;
+	unsigned int		numa_node;
+	unsigned int		cmd_size;	/* per-request extra data */
+
+	struct blk_mq_cpu_notifier	cpu_notifier;
+	struct kobject		kobj;
+};
+
+struct blk_mq_reg {
+	struct blk_mq_ops	*ops;
+	unsigned int		nr_hw_queues;
+	unsigned int		queue_depth;
+	unsigned int		reserved_tags;
+	unsigned int		cmd_size;	/* per-request extra data */
+	int			numa_node;
+	unsigned int		timeout;
+	unsigned int		flags;		/* BLK_MQ_F_* */
+};
+
+typedef int (queue_rq_fn)(struct blk_mq_hw_ctx *, struct request *);
+typedef struct blk_mq_hw_ctx *(map_queue_fn)(struct request_queue *, const int);
+typedef struct blk_mq_hw_ctx *(alloc_hctx_fn)(struct blk_mq_reg *,unsigned int);
+typedef void (free_hctx_fn)(struct blk_mq_hw_ctx *, unsigned int);
+typedef int (init_hctx_fn)(struct blk_mq_hw_ctx *, void *, unsigned int);
+typedef void (exit_hctx_fn)(struct blk_mq_hw_ctx *, unsigned int);
+
+struct blk_mq_ops {
+	/*
+	 * Queue request
+	 */
+	queue_rq_fn		*queue_rq;
+
+	/*
+	 * Map to specific hardware queue
+	 */
+	map_queue_fn		*map_queue;
+
+	/*
+	 * Called on request timeout
+	 */
+	rq_timed_out_fn		*timeout;
+
+	/*
+	 * Override for hctx allocations (should probably go)
+	 */
+	alloc_hctx_fn		*alloc_hctx;
+	free_hctx_fn		*free_hctx;
+
+	/*
+	 * Called when the block layer side of a hardware queue has been
+	 * set up, allowing the driver to allocate/init matching structures.
+	 * Ditto for exit/teardown.
+	 */
+	init_hctx_fn		*init_hctx;
+	exit_hctx_fn		*exit_hctx;
+};
+
+enum {
+	BLK_MQ_RQ_QUEUE_OK	= 0,	/* queued fine */
+	BLK_MQ_RQ_QUEUE_BUSY	= 1,	/* requeue IO for later */
+	BLK_MQ_RQ_QUEUE_ERROR	= 2,	/* end IO with error */
+
+	BLK_MQ_F_SHOULD_MERGE	= 1 << 0,
+	BLK_MQ_F_SHOULD_SORT	= 1 << 1,
+	BLK_MQ_F_SHOULD_IPI	= 1 << 2,
+
+	BLK_MQ_S_STOPPED	= 1 << 0,
+
+	BLK_MQ_MAX_DEPTH	= 2048,
+};
+
+struct request_queue *blk_mq_init_queue(struct blk_mq_reg *, void *);
+void blk_mq_free_queue(struct request_queue *);
+int blk_mq_register_disk(struct gendisk *);
+void blk_mq_unregister_disk(struct gendisk *);
+void blk_mq_init_commands(struct request_queue *, void (*init)(void *data, struct blk_mq_hw_ctx *, struct request *, unsigned int), void *data);
+
+void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule);
+
+void blk_mq_insert_request(struct request_queue *, struct request *, bool);
+void blk_mq_run_queues(struct request_queue *q, bool async);
+void blk_mq_free_request(struct request *rq);
+bool blk_mq_can_queue(struct blk_mq_hw_ctx *);
+struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp);
+struct request *blk_mq_alloc_reserved_request(struct request_queue *q, int rw, gfp_t gfp);
+struct request *blk_mq_rq_from_tag(struct request_queue *q, unsigned int tag);
+
+struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *, const int ctx_index);
+struct blk_mq_hw_ctx *blk_mq_alloc_single_hw_queue(struct blk_mq_reg *, unsigned int);
+void blk_mq_free_single_hw_queue(struct blk_mq_hw_ctx *, unsigned int);
+
+void blk_mq_end_io(struct request *rq, int error);
+
+void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx);
+void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx);
+void blk_mq_start_stopped_hw_queues(struct request_queue *q);
+
+/*
+ * Driver command data is immediately after the request. So subtract request
+ * size to get back to the original request.
+ */
+static inline struct request *blk_mq_rq_from_pdu(void *pdu)
+{
+	return pdu - sizeof(struct request);
+}
+static inline void *blk_mq_rq_to_pdu(struct request *rq)
+{
+	return (void *) rq + sizeof(*rq);
+}
+
+static inline struct request *blk_mq_tag_to_rq(struct blk_mq_hw_ctx *hctx,
+					       unsigned int tag)
+{
+	return hctx->rqs[tag];
+}
+
+#define queue_for_each_hw_ctx(q, hctx, i)				\
+	for ((i) = 0, hctx = (q)->queue_hw_ctx[0];			\
+	     (i) < (q)->nr_hw_queues; (i)++, hctx = (q)->queue_hw_ctx[i])
+
+#define queue_for_each_ctx(q, ctx, i)					\
+	for ((i) = 0, ctx = per_cpu_ptr((q)->queue_ctx, 0);		\
+	     (i) < (q)->nr_queues; (i)++, ctx = per_cpu_ptr(q->queue_ctx, (i)))
+
+#define hctx_for_each_ctx(hctx, ctx, i)					\
+	for ((i) = 0, ctx = (hctx)->ctxs[0];				\
+	     (i) < (hctx)->nr_ctx; (i)++, ctx = (hctx)->ctxs[(i)])
+
+#define blk_ctx_sum(q, sum)						\
+({									\
+	struct blk_mq_ctx *__x;						\
+	unsigned int __ret = 0, __i;					\
+									\
+	queue_for_each_ctx((q), __x, __i)				\
+		__ret += sum;						\
+	__ret;								\
+})
+
+#endif
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index c26801e14788..238ef0ed62f8 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -178,6 +178,7 @@ enum rq_flag_bits {
 	__REQ_MIXED_MERGE,	/* merge of different types, fail separately */
 	__REQ_KERNEL, 		/* direct IO to kernel pages */
 	__REQ_PM,		/* runtime pm request */
+	__REQ_END,		/* last of chain of requests */
 	__REQ_NR_BITS,		/* stops here */
 };
 
@@ -229,5 +230,6 @@ enum rq_flag_bits {
 #define REQ_SECURE		(1ULL << __REQ_SECURE)
 #define REQ_KERNEL		(1ULL << __REQ_KERNEL)
 #define REQ_PM			(1ULL << __REQ_PM)
+#define REQ_END			(1ULL << __REQ_END)
 
 #endif /* __LINUX_BLK_TYPES_H */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 0a8da96274c3..f26ec20f6354 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -8,6 +8,7 @@
 #include <linux/major.h>
 #include <linux/genhd.h>
 #include <linux/list.h>
+#include <linux/llist.h>
 #include <linux/timer.h>
 #include <linux/workqueue.h>
 #include <linux/pagemap.h>
@@ -94,10 +95,17 @@ enum rq_cmd_type_bits {
  * as well!
  */
 struct request {
-	struct list_head queuelist;
-	struct call_single_data csd;
+	union {
+		struct list_head queuelist;
+		struct llist_node ll_list;
+	};
+	union {
+		struct call_single_data csd;
+		struct work_struct mq_flush_data;
+	};
 
 	struct request_queue *q;
+	struct blk_mq_ctx *mq_ctx;
 
 	u64 cmd_flags;
 	enum rq_cmd_type_bits cmd_type;
@@ -213,6 +221,8 @@ struct request_pm_state
 
 #include <linux/elevator.h>
 
+struct blk_queue_ctx;
+
 typedef void (request_fn_proc) (struct request_queue *q);
 typedef void (make_request_fn) (struct request_queue *q, struct bio *bio);
 typedef int (prep_rq_fn) (struct request_queue *, struct request *);
@@ -311,6 +321,18 @@ struct request_queue {
 	dma_drain_needed_fn	*dma_drain_needed;
 	lld_busy_fn		*lld_busy_fn;
 
+	struct blk_mq_ops	*mq_ops;
+
+	unsigned int		*mq_map;
+
+	/* sw queues */
+	struct blk_mq_ctx	*queue_ctx;
+	unsigned int		nr_queues;
+
+	/* hw dispatch queues */
+	struct blk_mq_hw_ctx	**queue_hw_ctx;
+	unsigned int		nr_hw_queues;
+
 	/*
 	 * Dispatch queue sorting
 	 */
@@ -359,6 +381,11 @@ struct request_queue {
 	 */
 	struct kobject kobj;
 
+	/*
+	 * mq queue kobject
+	 */
+	struct kobject mq_kobj;
+
 #ifdef CONFIG_PM_RUNTIME
 	struct device		*dev;
 	int			rpm_status;
@@ -423,7 +450,13 @@ struct request_queue {
 	unsigned long		flush_pending_since;
 	struct list_head	flush_queue[2];
 	struct list_head	flush_data_in_flight;
-	struct request		flush_rq;
+	union {
+		struct request	flush_rq;
+		struct {
+			spinlock_t mq_flush_lock;
+			struct work_struct mq_flush_work;
+		};
+	};
 
 	struct mutex		sysfs_lock;
 
@@ -435,14 +468,14 @@ struct request_queue {
 	struct bsg_class_device bsg_dev;
 #endif
 
-#ifdef CONFIG_BLK_CGROUP
-	struct list_head	all_q_node;
-#endif
 #ifdef CONFIG_BLK_DEV_THROTTLING
 	/* Throttle data */
 	struct throtl_data *td;
 #endif
 	struct rcu_head		rcu_head;
+	wait_queue_head_t	mq_freeze_wq;
+	struct percpu_counter	mq_usage_counter;
+	struct list_head	all_q_node;
 };
 
 #define QUEUE_FLAG_QUEUED	1	/* uses generic tag queueing */
@@ -465,6 +498,7 @@ struct request_queue {
 #define QUEUE_FLAG_SECDISCARD  17	/* supports SECDISCARD */
 #define QUEUE_FLAG_SAME_FORCE  18	/* force complete on same CPU */
 #define QUEUE_FLAG_DEAD        19	/* queue tear-down finished */
+#define QUEUE_FLAG_INIT_DONE   20	/* queue is initialized */
 
 #define QUEUE_FLAG_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
 				 (1 << QUEUE_FLAG_STACKABLE)	|	\
@@ -537,6 +571,7 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
 #define blk_queue_dying(q)	test_bit(QUEUE_FLAG_DYING, &(q)->queue_flags)
 #define blk_queue_dead(q)	test_bit(QUEUE_FLAG_DEAD, &(q)->queue_flags)
 #define blk_queue_bypass(q)	test_bit(QUEUE_FLAG_BYPASS, &(q)->queue_flags)
+#define blk_queue_init_done(q)	test_bit(QUEUE_FLAG_INIT_DONE, &(q)->queue_flags)
 #define blk_queue_nomerges(q)	test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags)
 #define blk_queue_noxmerges(q)	\
 	test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags)
@@ -1011,6 +1046,7 @@ static inline void blk_post_runtime_resume(struct request_queue *q, int err) {}
 struct blk_plug {
 	unsigned long magic; /* detect uninitialized use-cases */
 	struct list_head list; /* requests */
+	struct list_head mq_list; /* blk-mq requests */
 	struct list_head cb_list; /* md requires an unplug callback */
 };
 #define BLK_MAX_REQUEST_COUNT 16
@@ -1048,7 +1084,10 @@ static inline bool blk_needs_flush_plug(struct task_struct *tsk)
 {
 	struct blk_plug *plug = tsk->plug;
 
-	return plug && (!list_empty(&plug->list) || !list_empty(&plug->cb_list));
+	return plug &&
+		(!list_empty(&plug->list) ||
+		 !list_empty(&plug->mq_list) ||
+		 !list_empty(&plug->cb_list));
 }
 
 /*
@@ -1323,6 +1362,7 @@ static inline void put_dev_sector(Sector p)
 
 struct work_struct;
 int kblockd_schedule_work(struct request_queue *q, struct work_struct *work);
+int kblockd_schedule_delayed_work(struct request_queue *q, struct delayed_work *dwork, unsigned long delay);
 
 #ifdef CONFIG_BLK_CGROUP
 /*
-- 
cgit v1.2.3


From 280d45f6c35d8d7a0fe20c36caf426e3ac139cf9 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 25 Oct 2013 14:45:58 +0100
Subject: blk-mq: add blk_mq_stop_hw_queues

Add a helper to iterate over all hw queues and stop them.  This is useful
for driver that implement PM suspend functionality.

Signed-off-by: Christoph Hellwig <hch@lst.de>

Modified to just call blk_mq_stop_hw_queue() by Jens.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c         | 10 ++++++++++
 include/linux/blk-mq.h |  1 +
 2 files changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index f21ec964e411..ac804c635040 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -672,6 +672,16 @@ void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx)
 }
 EXPORT_SYMBOL(blk_mq_stop_hw_queue);
 
+void blk_mq_stop_hw_queues(struct request_queue *q)
+{
+	struct blk_mq_hw_ctx *hctx;
+	int i;
+
+	queue_for_each_hw_ctx(q, hctx, i)
+		blk_mq_stop_hw_queue(hctx);
+}
+EXPORT_SYMBOL(blk_mq_stop_hw_queues);
+
 void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx)
 {
 	clear_bit(BLK_MQ_S_STOPPED, &hctx->state);
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 746042ff321a..3368b97bee73 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -136,6 +136,7 @@ void blk_mq_end_io(struct request *rq, int error);
 
 void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx);
 void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx);
+void blk_mq_stop_hw_queues(struct request_queue *q);
 void blk_mq_start_stopped_hw_queues(struct request_queue *q);
 
 /*
-- 
cgit v1.2.3


From 954e04b9491adea99e4590bc73937fdd8774ab3c Mon Sep 17 00:00:00 2001
From: Bryan Wu <cooloney@gmail.com>
Date: Tue, 24 Sep 2013 10:38:26 -0700
Subject: of: introduce of_get_available_child_count

Some drivers keep counting available child by themselves. So
introduce a new simple API like of_get_child_count() but for
available childs.

Cc: Josh Wu <josh.wu@atmel.com>
Signed-off-by: Bryan Wu <cooloney@gmail.com>
Acked-by: Rob Herring <rob.herring@calxeda.com>
---
 include/linux/of.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/of.h b/include/linux/of.h
index f95aee391e30..54c25606a997 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -226,6 +226,17 @@ static inline int of_get_child_count(const struct device_node *np)
 	return num;
 }
 
+static inline int of_get_available_child_count(const struct device_node *np)
+{
+	struct device_node *child;
+	int num = 0;
+
+	for_each_available_child_of_node(np, child)
+		num++;
+
+	return num;
+}
+
 extern struct device_node *of_find_node_with_property(
 	struct device_node *from, const char *prop_name);
 #define for_each_node_with_property(dn, prop_name) \
@@ -376,6 +387,11 @@ static inline int of_get_child_count(const struct device_node *np)
 	return 0;
 }
 
+static inline int of_get_available_child_count(const struct device_node *np)
+{
+	return 0;
+}
+
 static inline int of_device_is_compatible(const struct device_node *device,
 					  const char *name)
 {
-- 
cgit v1.2.3


From 30dae2f98612d7c8cd855861b9de205ebd9ef4fa Mon Sep 17 00:00:00 2001
From: Sebastian Reichel <sre@debian.org>
Date: Tue, 22 Oct 2013 11:02:56 -0700
Subject: leds: lp55xx: handle enable pin in driver

This patch moves the handling of the chip's enable pin from the board
code into the driver. It also updates all board-code files using the
driver to incorporate this change.

This is needed for device tree support of the enable pin.

Signed-off-by: Sebastian Reichel <sre@debian.org>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Bryan Wu <cooloney@gmail.com>
---
 .../devicetree/bindings/leds/leds-lp55xx.txt       |  1 +
 arch/arm/mach-omap2/board-rx51-peripherals.c       | 20 +----------------
 arch/arm/mach-ux500/board-mop500.c                 |  2 ++
 drivers/leds/leds-lp55xx-common.c                  | 25 +++++++++++-----------
 include/linux/platform_data/leds-lp55xx.h          |  6 ++----
 5 files changed, 19 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/leds/leds-lp55xx.txt b/Documentation/devicetree/bindings/leds/leds-lp55xx.txt
index d221e75d90fa..c55b8c016a9e 100644
--- a/Documentation/devicetree/bindings/leds/leds-lp55xx.txt
+++ b/Documentation/devicetree/bindings/leds/leds-lp55xx.txt
@@ -10,6 +10,7 @@ Each child has own specific current settings
 - max-cur: Maximun current at each led channel.
 
 Optional properties:
+- enable-gpio: GPIO attached to the chip's enable pin
 - label: Used for naming LEDs
 - pwr-sel: LP8501 specific property. Power selection for output channels.
          0: D1~9 are connected to VDD
diff --git a/arch/arm/mach-omap2/board-rx51-peripherals.c b/arch/arm/mach-omap2/board-rx51-peripherals.c
index f6fe388af989..68dc998fa34b 100644
--- a/arch/arm/mach-omap2/board-rx51-peripherals.c
+++ b/arch/arm/mach-omap2/board-rx51-peripherals.c
@@ -211,29 +211,11 @@ static struct lp55xx_led_config rx51_lp5523_led_config[] = {
 	}
 };
 
-static int rx51_lp5523_setup(void)
-{
-	return gpio_request_one(RX51_LP5523_CHIP_EN_GPIO, GPIOF_DIR_OUT,
-			"lp5523_enable");
-}
-
-static void rx51_lp5523_release(void)
-{
-	gpio_free(RX51_LP5523_CHIP_EN_GPIO);
-}
-
-static void rx51_lp5523_enable(bool state)
-{
-	gpio_set_value(RX51_LP5523_CHIP_EN_GPIO, !!state);
-}
-
 static struct lp55xx_platform_data rx51_lp5523_platform_data = {
 	.led_config		= rx51_lp5523_led_config,
 	.num_channels		= ARRAY_SIZE(rx51_lp5523_led_config),
 	.clock_mode		= LP55XX_CLOCK_AUTO,
-	.setup_resources	= rx51_lp5523_setup,
-	.release_resources	= rx51_lp5523_release,
-	.enable			= rx51_lp5523_enable,
+	.enable_gpio		= RX51_LP5523_CHIP_EN_GPIO,
 };
 #endif
 
diff --git a/arch/arm/mach-ux500/board-mop500.c b/arch/arm/mach-ux500/board-mop500.c
index ad0806eff762..703dec2b7d8d 100644
--- a/arch/arm/mach-ux500/board-mop500.c
+++ b/arch/arm/mach-ux500/board-mop500.c
@@ -297,6 +297,7 @@ static struct lp55xx_platform_data __initdata lp5521_pri_data = {
        .led_config     = &lp5521_pri_led[0],
        .num_channels   = 3,
        .clock_mode     = LP55XX_CLOCK_EXT,
+       .enable_gpio    = -1,
 };
 
 static struct lp55xx_led_config lp5521_sec_led[] = {
@@ -322,6 +323,7 @@ static struct lp55xx_platform_data __initdata lp5521_sec_data = {
        .led_config     = &lp5521_sec_led[0],
        .num_channels   = 3,
        .clock_mode     = LP55XX_CLOCK_EXT,
+       .enable_gpio    = -1,
 };
 
 /* I2C0 devices only available on the first HREF/MOP500 */
diff --git a/drivers/leds/leds-lp55xx-common.c b/drivers/leds/leds-lp55xx-common.c
index 075acf5b9fab..9acc6bb7deef 100644
--- a/drivers/leds/leds-lp55xx-common.c
+++ b/drivers/leds/leds-lp55xx-common.c
@@ -20,6 +20,8 @@
 #include <linux/module.h>
 #include <linux/platform_data/leds-lp55xx.h>
 #include <linux/slab.h>
+#include <linux/gpio.h>
+#include <linux/of_gpio.h>
 
 #include "leds-lp55xx-common.h"
 
@@ -407,18 +409,18 @@ int lp55xx_init_device(struct lp55xx_chip *chip)
 	if (!pdata || !cfg)
 		return -EINVAL;
 
-	if (pdata->setup_resources) {
-		ret = pdata->setup_resources();
+	if (gpio_is_valid(pdata->enable_gpio)) {
+		ret = devm_gpio_request_one(dev, pdata->enable_gpio,
+					    GPIOF_DIR_OUT, "lp5523_enable");
 		if (ret < 0) {
-			dev_err(dev, "setup resoure err: %d\n", ret);
+			dev_err(dev, "could not acquire enable gpio (err=%d)\n",
+				ret);
 			goto err;
 		}
-	}
 
-	if (pdata->enable) {
-		pdata->enable(0);
+		gpio_set_value(pdata->enable_gpio, 0);
 		usleep_range(1000, 2000); /* Keep enable down at least 1ms */
-		pdata->enable(1);
+		gpio_set_value(pdata->enable_gpio, 1);
 		usleep_range(1000, 2000); /* 500us abs min. */
 	}
 
@@ -459,11 +461,8 @@ void lp55xx_deinit_device(struct lp55xx_chip *chip)
 	if (chip->clk)
 		clk_disable_unprepare(chip->clk);
 
-	if (pdata->enable)
-		pdata->enable(0);
-
-	if (pdata->release_resources)
-		pdata->release_resources();
+	if (gpio_is_valid(pdata->enable_gpio))
+		gpio_set_value(pdata->enable_gpio, 0);
 }
 EXPORT_SYMBOL_GPL(lp55xx_deinit_device);
 
@@ -596,6 +595,8 @@ int lp55xx_of_populate_pdata(struct device *dev, struct device_node *np)
 	of_property_read_string(np, "label", &pdata->label);
 	of_property_read_u8(np, "clock-mode", &pdata->clock_mode);
 
+	pdata->enable_gpio = of_get_named_gpio(np, "enable-gpio", 0);
+
 	/* LP8501 specific */
 	of_property_read_u8(np, "pwr-sel", (u8 *)&pdata->pwr_sel);
 
diff --git a/include/linux/platform_data/leds-lp55xx.h b/include/linux/platform_data/leds-lp55xx.h
index c32de4dcec54..624ff9edad6f 100644
--- a/include/linux/platform_data/leds-lp55xx.h
+++ b/include/linux/platform_data/leds-lp55xx.h
@@ -67,10 +67,8 @@ struct lp55xx_platform_data {
 	/* Clock configuration */
 	u8 clock_mode;
 
-	/* Platform specific functions */
-	int (*setup_resources)(void);
-	void (*release_resources)(void);
-	void (*enable)(bool state);
+	/* optional enable GPIO */
+	int enable_gpio;
 
 	/* Predefined pattern data */
 	struct lp55xx_predef_pattern *patterns;
-- 
cgit v1.2.3


From 5d4879cda67b09f086807821cf594ee079d6dfbe Mon Sep 17 00:00:00 2001
From: Nishanth Menon <nm@ti.com>
Date: Thu, 19 Sep 2013 16:03:50 -0500
Subject: PM / OPP: rename functions to dev_pm_opp*

Since Operating Performance Points (OPP) functions are specific to
device specific power management, be specific and rename opp_*
accessors in OPP library with dev_pm_opp_* equivalent.

Affected functions are:
 opp_get_voltage
 opp_get_freq
 opp_get_opp_count
 opp_find_freq_exact
 opp_find_freq_floor
 opp_find_freq_ceil
 opp_add
 opp_enable
 opp_disable
 opp_get_notifier
 opp_init_cpufreq_table
 opp_free_cpufreq_table

Reported-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Nishanth Menon <nm@ti.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/power/opp.txt             | 102 ++++++++++++++++----------------
 arch/arm/mach-imx/mach-imx6q.c          |   2 +-
 arch/arm/mach-omap2/board-omap3beagle.c |   8 +--
 arch/arm/mach-omap2/opp.c               |   4 +-
 arch/arm/mach-omap2/pm.c                |   4 +-
 drivers/base/power/opp.c                |  82 ++++++++++++-------------
 drivers/cpufreq/arm_big_little.c        |   6 +-
 drivers/cpufreq/cpufreq-cpu0.c          |  18 +++---
 drivers/cpufreq/exynos5440-cpufreq.c    |  13 ++--
 drivers/cpufreq/imx6q-cpufreq.c         |  20 +++----
 drivers/cpufreq/omap-cpufreq.c          |   8 +--
 drivers/devfreq/devfreq.c               |  14 ++---
 drivers/devfreq/exynos/exynos4_bus.c    |  21 +++----
 drivers/devfreq/exynos/exynos5_bus.c    |  18 +++---
 include/linux/opp.h                     |  50 ++++++++--------
 15 files changed, 187 insertions(+), 183 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/power/opp.txt b/Documentation/power/opp.txt
index 425c51d56aef..185367b1848f 100644
--- a/Documentation/power/opp.txt
+++ b/Documentation/power/opp.txt
@@ -71,14 +71,14 @@ operations until that OPP could be re-enabled if possible.
 
 OPP library facilitates this concept in it's implementation. The following
 operational functions operate only on available opps:
-opp_find_freq_{ceil, floor}, opp_get_voltage, opp_get_freq, opp_get_opp_count
-and opp_init_cpufreq_table
+opp_find_freq_{ceil, floor}, dev_pm_opp_get_voltage, dev_pm_opp_get_freq, dev_pm_opp_get_opp_count
+and dev_pm_opp_init_cpufreq_table
 
-opp_find_freq_exact is meant to be used to find the opp pointer which can then
-be used for opp_enable/disable functions to make an opp available as required.
+dev_pm_opp_find_freq_exact is meant to be used to find the opp pointer which can then
+be used for dev_pm_opp_enable/disable functions to make an opp available as required.
 
 WARNING: Users of OPP library should refresh their availability count using
-get_opp_count if opp_enable/disable functions are invoked for a device, the
+get_opp_count if dev_pm_opp_enable/disable functions are invoked for a device, the
 exact mechanism to trigger these or the notification mechanism to other
 dependent subsystems such as cpufreq are left to the discretion of the SoC
 specific framework which uses the OPP library. Similar care needs to be taken
@@ -96,24 +96,24 @@ using RCU read locks. The opp_find_freq_{exact,ceil,floor},
 opp_get_{voltage, freq, opp_count} fall into this category.
 
 opp_{add,enable,disable} are updaters which use mutex and implement it's own
-RCU locking mechanisms. opp_init_cpufreq_table acts as an updater and uses
+RCU locking mechanisms. dev_pm_opp_init_cpufreq_table acts as an updater and uses
 mutex to implment RCU updater strategy. These functions should *NOT* be called
 under RCU locks and other contexts that prevent blocking functions in RCU or
 mutex operations from working.
 
 2. Initial OPP List Registration
 ================================
-The SoC implementation calls opp_add function iteratively to add OPPs per
+The SoC implementation calls dev_pm_opp_add function iteratively to add OPPs per
 device. It is expected that the SoC framework will register the OPP entries
 optimally- typical numbers range to be less than 5. The list generated by
 registering the OPPs is maintained by OPP library throughout the device
 operation. The SoC framework can subsequently control the availability of the
-OPPs dynamically using the opp_enable / disable functions.
+OPPs dynamically using the dev_pm_opp_enable / disable functions.
 
-opp_add - Add a new OPP for a specific domain represented by the device pointer.
+dev_pm_opp_add - Add a new OPP for a specific domain represented by the device pointer.
 	The OPP is defined using the frequency and voltage. Once added, the OPP
 	is assumed to be available and control of it's availability can be done
-	with the opp_enable/disable functions. OPP library internally stores
+	with the dev_pm_opp_enable/disable functions. OPP library internally stores
 	and manages this information in the opp struct. This function may be
 	used by SoC framework to define a optimal list as per the demands of
 	SoC usage environment.
@@ -124,7 +124,7 @@ opp_add - Add a new OPP for a specific domain represented by the device pointer.
 	 soc_pm_init()
 	 {
 		/* Do things */
-		r = opp_add(mpu_dev, 1000000, 900000);
+		r = dev_pm_opp_add(mpu_dev, 1000000, 900000);
 		if (!r) {
 			pr_err("%s: unable to register mpu opp(%d)\n", r);
 			goto no_cpufreq;
@@ -143,44 +143,44 @@ functions return the matching pointer representing the opp if a match is
 found, else returns error. These errors are expected to be handled by standard
 error checks such as IS_ERR() and appropriate actions taken by the caller.
 
-opp_find_freq_exact - Search for an OPP based on an *exact* frequency and
+dev_pm_opp_find_freq_exact - Search for an OPP based on an *exact* frequency and
 	availability. This function is especially useful to enable an OPP which
 	is not available by default.
 	Example: In a case when SoC framework detects a situation where a
 	higher frequency could be made available, it can use this function to
-	find the OPP prior to call the opp_enable to actually make it available.
+	find the OPP prior to call the dev_pm_opp_enable to actually make it available.
 	 rcu_read_lock();
-	 opp = opp_find_freq_exact(dev, 1000000000, false);
+	 opp = dev_pm_opp_find_freq_exact(dev, 1000000000, false);
 	 rcu_read_unlock();
 	 /* dont operate on the pointer.. just do a sanity check.. */
 	 if (IS_ERR(opp)) {
 		pr_err("frequency not disabled!\n");
 		/* trigger appropriate actions.. */
 	 } else {
-		opp_enable(dev,1000000000);
+		dev_pm_opp_enable(dev,1000000000);
 	 }
 
 	NOTE: This is the only search function that operates on OPPs which are
 	not available.
 
-opp_find_freq_floor - Search for an available OPP which is *at most* the
+dev_pm_opp_find_freq_floor - Search for an available OPP which is *at most* the
 	provided frequency. This function is useful while searching for a lesser
 	match OR operating on OPP information in the order of decreasing
 	frequency.
 	Example: To find the highest opp for a device:
 	 freq = ULONG_MAX;
 	 rcu_read_lock();
-	 opp_find_freq_floor(dev, &freq);
+	 dev_pm_opp_find_freq_floor(dev, &freq);
 	 rcu_read_unlock();
 
-opp_find_freq_ceil - Search for an available OPP which is *at least* the
+dev_pm_opp_find_freq_ceil - Search for an available OPP which is *at least* the
 	provided frequency. This function is useful while searching for a
 	higher match OR operating on OPP information in the order of increasing
 	frequency.
 	Example 1: To find the lowest opp for a device:
 	 freq = 0;
 	 rcu_read_lock();
-	 opp_find_freq_ceil(dev, &freq);
+	 dev_pm_opp_find_freq_ceil(dev, &freq);
 	 rcu_read_unlock();
 	Example 2: A simplified implementation of a SoC cpufreq_driver->target:
 	 soc_cpufreq_target(..)
@@ -188,7 +188,7 @@ opp_find_freq_ceil - Search for an available OPP which is *at least* the
 		/* Do stuff like policy checks etc. */
 		/* Find the best frequency match for the req */
 		rcu_read_lock();
-		opp = opp_find_freq_ceil(dev, &freq);
+		opp = dev_pm_opp_find_freq_ceil(dev, &freq);
 		rcu_read_unlock();
 		if (!IS_ERR(opp))
 			soc_switch_to_freq_voltage(freq);
@@ -208,34 +208,34 @@ as thermal considerations (e.g. don't use OPPx until the temperature drops).
 
 WARNING: Do not use these functions in interrupt context.
 
-opp_enable - Make a OPP available for operation.
+dev_pm_opp_enable - Make a OPP available for operation.
 	Example: Lets say that 1GHz OPP is to be made available only if the
 	SoC temperature is lower than a certain threshold. The SoC framework
 	implementation might choose to do something as follows:
 	 if (cur_temp < temp_low_thresh) {
 		/* Enable 1GHz if it was disabled */
 		rcu_read_lock();
-		opp = opp_find_freq_exact(dev, 1000000000, false);
+		opp = dev_pm_opp_find_freq_exact(dev, 1000000000, false);
 		rcu_read_unlock();
 		/* just error check */
 		if (!IS_ERR(opp))
-			ret = opp_enable(dev, 1000000000);
+			ret = dev_pm_opp_enable(dev, 1000000000);
 		else
 			goto try_something_else;
 	 }
 
-opp_disable - Make an OPP to be not available for operation
+dev_pm_opp_disable - Make an OPP to be not available for operation
 	Example: Lets say that 1GHz OPP is to be disabled if the temperature
 	exceeds a threshold value. The SoC framework implementation might
 	choose to do something as follows:
 	 if (cur_temp > temp_high_thresh) {
 		/* Disable 1GHz if it was enabled */
 		rcu_read_lock();
-		opp = opp_find_freq_exact(dev, 1000000000, true);
+		opp = dev_pm_opp_find_freq_exact(dev, 1000000000, true);
 		rcu_read_unlock();
 		/* just error check */
 		if (!IS_ERR(opp))
-			ret = opp_disable(dev, 1000000000);
+			ret = dev_pm_opp_disable(dev, 1000000000);
 		else
 			goto try_something_else;
 	 }
@@ -247,7 +247,7 @@ information from the OPP structure is necessary. Once an OPP pointer is
 retrieved using the search functions, the following functions can be used by SoC
 framework to retrieve the information represented inside the OPP layer.
 
-opp_get_voltage - Retrieve the voltage represented by the opp pointer.
+dev_pm_opp_get_voltage - Retrieve the voltage represented by the opp pointer.
 	Example: At a cpufreq transition to a different frequency, SoC
 	framework requires to set the voltage represented by the OPP using
 	the regulator framework to the Power Management chip providing the
@@ -256,15 +256,15 @@ opp_get_voltage - Retrieve the voltage represented by the opp pointer.
 	 {
 		/* do things */
 		rcu_read_lock();
-		opp = opp_find_freq_ceil(dev, &freq);
-		v = opp_get_voltage(opp);
+		opp = dev_pm_opp_find_freq_ceil(dev, &freq);
+		v = dev_pm_opp_get_voltage(opp);
 		rcu_read_unlock();
 		if (v)
 			regulator_set_voltage(.., v);
 		/* do other things */
 	 }
 
-opp_get_freq - Retrieve the freq represented by the opp pointer.
+dev_pm_opp_get_freq - Retrieve the freq represented by the opp pointer.
 	Example: Lets say the SoC framework uses a couple of helper functions
 	we could pass opp pointers instead of doing additional parameters to
 	handle quiet a bit of data parameters.
@@ -273,8 +273,8 @@ opp_get_freq - Retrieve the freq represented by the opp pointer.
 		/* do things.. */
 		 max_freq = ULONG_MAX;
 		 rcu_read_lock();
-		 max_opp = opp_find_freq_floor(dev,&max_freq);
-		 requested_opp = opp_find_freq_ceil(dev,&freq);
+		 max_opp = dev_pm_opp_find_freq_floor(dev,&max_freq);
+		 requested_opp = dev_pm_opp_find_freq_ceil(dev,&freq);
 		 if (!IS_ERR(max_opp) && !IS_ERR(requested_opp))
 			r = soc_test_validity(max_opp, requested_opp);
 		 rcu_read_unlock();
@@ -282,25 +282,25 @@ opp_get_freq - Retrieve the freq represented by the opp pointer.
 	 }
 	 soc_test_validity(..)
 	 {
-		 if(opp_get_voltage(max_opp) < opp_get_voltage(requested_opp))
+		 if(dev_pm_opp_get_voltage(max_opp) < dev_pm_opp_get_voltage(requested_opp))
 			 return -EINVAL;
-		 if(opp_get_freq(max_opp) < opp_get_freq(requested_opp))
+		 if(dev_pm_opp_get_freq(max_opp) < dev_pm_opp_get_freq(requested_opp))
 			 return -EINVAL;
 		/* do things.. */
 	 }
 
-opp_get_opp_count - Retrieve the number of available opps for a device
+dev_pm_opp_get_opp_count - Retrieve the number of available opps for a device
 	Example: Lets say a co-processor in the SoC needs to know the available
 	frequencies in a table, the main processor can notify as following:
 	 soc_notify_coproc_available_frequencies()
 	 {
 		/* Do things */
 		rcu_read_lock();
-		num_available = opp_get_opp_count(dev);
+		num_available = dev_pm_opp_get_opp_count(dev);
 		speeds = kzalloc(sizeof(u32) * num_available, GFP_KERNEL);
 		/* populate the table in increasing order */
 		freq = 0;
-		while (!IS_ERR(opp = opp_find_freq_ceil(dev, &freq))) {
+		while (!IS_ERR(opp = dev_pm_opp_find_freq_ceil(dev, &freq))) {
 			speeds[i] = freq;
 			freq++;
 			i++;
@@ -313,7 +313,7 @@ opp_get_opp_count - Retrieve the number of available opps for a device
 
 6. Cpufreq Table Generation
 ===========================
-opp_init_cpufreq_table - cpufreq framework typically is initialized with
+dev_pm_opp_init_cpufreq_table - cpufreq framework typically is initialized with
 	cpufreq_frequency_table_cpuinfo which is provided with the list of
 	frequencies that are available for operation. This function provides
 	a ready to use conversion routine to translate the OPP layer's internal
@@ -326,7 +326,7 @@ opp_init_cpufreq_table - cpufreq framework typically is initialized with
 	 soc_pm_init()
 	 {
 		/* Do things */
-		r = opp_init_cpufreq_table(dev, &freq_table);
+		r = dev_pm_opp_init_cpufreq_table(dev, &freq_table);
 		if (!r)
 			cpufreq_frequency_table_cpuinfo(policy, freq_table);
 		/* Do other things */
@@ -336,7 +336,7 @@ opp_init_cpufreq_table - cpufreq framework typically is initialized with
 	addition to CONFIG_PM as power management feature is required to
 	dynamically scale voltage and frequency in a system.
 
-opp_free_cpufreq_table - Free up the table allocated by opp_init_cpufreq_table
+dev_pm_opp_free_cpufreq_table - Free up the table allocated by dev_pm_opp_init_cpufreq_table
 
 7. Data Structures
 ==================
@@ -366,8 +366,8 @@ struct opp - The internal data structure of OPP library which is used to
 	identifier for OPP in the interactions with OPP layer.
 
 	WARNING: The struct opp pointer should not be parsed or modified by the
-	users. The defaults of for an instance is populated by opp_add, but the
-	availability of the OPP can be modified by opp_enable/disable functions.
+	users. The defaults of for an instance is populated by dev_pm_opp_add, but the
+	availability of the OPP can be modified by dev_pm_opp_enable/disable functions.
 
 struct device - This is used to identify a domain to the OPP layer. The
 	nature of the device and it's implementation is left to the user of
@@ -377,19 +377,19 @@ Overall, in a simplistic view, the data structure operations is represented as
 following:
 
 Initialization / modification:
-            +-----+        /- opp_enable
-opp_add --> | opp | <-------
-  |         +-----+        \- opp_disable
+            +-----+        /- dev_pm_opp_enable
+dev_pm_opp_add --> | opp | <-------
+  |         +-----+        \- dev_pm_opp_disable
   \-------> domain_info(device)
 
 Search functions:
-             /-- opp_find_freq_ceil  ---\   +-----+
-domain_info<---- opp_find_freq_exact -----> | opp |
-             \-- opp_find_freq_floor ---/   +-----+
+             /-- dev_pm_opp_find_freq_ceil  ---\   +-----+
+domain_info<---- dev_pm_opp_find_freq_exact -----> | opp |
+             \-- dev_pm_opp_find_freq_floor ---/   +-----+
 
 Retrieval functions:
-+-----+     /- opp_get_voltage
++-----+     /- dev_pm_opp_get_voltage
 | opp | <---
-+-----+     \- opp_get_freq
++-----+     \- dev_pm_opp_get_freq
 
-domain_info <- opp_get_opp_count
+domain_info <- dev_pm_opp_get_opp_count
diff --git a/arch/arm/mach-imx/mach-imx6q.c b/arch/arm/mach-imx/mach-imx6q.c
index 90372a21087f..d7ecc90e6bba 100644
--- a/arch/arm/mach-imx/mach-imx6q.c
+++ b/arch/arm/mach-imx/mach-imx6q.c
@@ -226,7 +226,7 @@ static void __init imx6q_opp_check_1p2ghz(struct device *cpu_dev)
 	val = readl_relaxed(base + OCOTP_CFG3);
 	val >>= OCOTP_CFG3_SPEED_SHIFT;
 	if ((val & 0x3) != OCOTP_CFG3_SPEED_1P2GHZ)
-		if (opp_disable(cpu_dev, 1200000000))
+		if (dev_pm_opp_disable(cpu_dev, 1200000000))
 			pr_warn("failed to disable 1.2 GHz OPP\n");
 
 put_node:
diff --git a/arch/arm/mach-omap2/board-omap3beagle.c b/arch/arm/mach-omap2/board-omap3beagle.c
index f26918467efc..33969e5a149f 100644
--- a/arch/arm/mach-omap2/board-omap3beagle.c
+++ b/arch/arm/mach-omap2/board-omap3beagle.c
@@ -522,11 +522,11 @@ static int __init beagle_opp_init(void)
 			return -ENODEV;
 		}
 		/* Enable MPU 1GHz and lower opps */
-		r = opp_enable(mpu_dev, 800000000);
+		r = dev_pm_opp_enable(mpu_dev, 800000000);
 		/* TODO: MPU 1GHz needs SR and ABB */
 
 		/* Enable IVA 800MHz and lower opps */
-		r |= opp_enable(iva_dev, 660000000);
+		r |= dev_pm_opp_enable(iva_dev, 660000000);
 		/* TODO: DSP 800MHz needs SR and ABB */
 		if (r) {
 			pr_err("%s: failed to enable higher opp %d\n",
@@ -535,8 +535,8 @@ static int __init beagle_opp_init(void)
 			 * Cleanup - disable the higher freqs - we dont care
 			 * about the results
 			 */
-			opp_disable(mpu_dev, 800000000);
-			opp_disable(iva_dev, 660000000);
+			dev_pm_opp_disable(mpu_dev, 800000000);
+			dev_pm_opp_disable(iva_dev, 660000000);
 		}
 	}
 	return 0;
diff --git a/arch/arm/mach-omap2/opp.c b/arch/arm/mach-omap2/opp.c
index bd41d59a7cab..7b04637b13cb 100644
--- a/arch/arm/mach-omap2/opp.c
+++ b/arch/arm/mach-omap2/opp.c
@@ -81,14 +81,14 @@ int __init omap_init_opp_table(struct omap_opp_def *opp_def,
 			dev = &oh->od->pdev->dev;
 		}
 
-		r = opp_add(dev, opp_def->freq, opp_def->u_volt);
+		r = dev_pm_opp_add(dev, opp_def->freq, opp_def->u_volt);
 		if (r) {
 			dev_err(dev, "%s: add OPP %ld failed for %s [%d] result=%d\n",
 				__func__, opp_def->freq,
 				opp_def->hwmod_name, i, r);
 		} else {
 			if (!opp_def->default_available)
-				r = opp_disable(dev, opp_def->freq);
+				r = dev_pm_opp_disable(dev, opp_def->freq);
 			if (r)
 				dev_err(dev, "%s: disable %ld failed for %s [%d] result=%d\n",
 					__func__, opp_def->freq,
diff --git a/arch/arm/mach-omap2/pm.c b/arch/arm/mach-omap2/pm.c
index e742118fcfd2..937744c3679d 100644
--- a/arch/arm/mach-omap2/pm.c
+++ b/arch/arm/mach-omap2/pm.c
@@ -172,7 +172,7 @@ static int __init omap2_set_init_voltage(char *vdd_name, char *clk_name,
 	clk_put(clk);
 
 	rcu_read_lock();
-	opp = opp_find_freq_ceil(dev, &freq);
+	opp = dev_pm_opp_find_freq_ceil(dev, &freq);
 	if (IS_ERR(opp)) {
 		rcu_read_unlock();
 		pr_err("%s: unable to find boot up OPP for vdd_%s\n",
@@ -180,7 +180,7 @@ static int __init omap2_set_init_voltage(char *vdd_name, char *clk_name,
 		goto exit;
 	}
 
-	bootup_volt = opp_get_voltage(opp);
+	bootup_volt = dev_pm_opp_get_voltage(opp);
 	rcu_read_unlock();
 	if (!bootup_volt) {
 		pr_err("%s: unable to find voltage corresponding to the bootup OPP for vdd_%s\n",
diff --git a/drivers/base/power/opp.c b/drivers/base/power/opp.c
index ef89897c6043..00c6a449cdeb 100644
--- a/drivers/base/power/opp.c
+++ b/drivers/base/power/opp.c
@@ -136,7 +136,7 @@ static struct device_opp *find_device_opp(struct device *dev)
 }
 
 /**
- * opp_get_voltage() - Gets the voltage corresponding to an available opp
+ * dev_pm_opp_get_voltage() - Gets the voltage corresponding to an available opp
  * @opp:	opp for which voltage has to be returned for
  *
  * Return voltage in micro volt corresponding to the opp, else
@@ -150,7 +150,7 @@ static struct device_opp *find_device_opp(struct device *dev)
  * prior to unlocking with rcu_read_unlock() to maintain the integrity of the
  * pointer.
  */
-unsigned long opp_get_voltage(struct opp *opp)
+unsigned long dev_pm_opp_get_voltage(struct opp *opp)
 {
 	struct opp *tmp_opp;
 	unsigned long v = 0;
@@ -163,10 +163,10 @@ unsigned long opp_get_voltage(struct opp *opp)
 
 	return v;
 }
-EXPORT_SYMBOL_GPL(opp_get_voltage);
+EXPORT_SYMBOL_GPL(dev_pm_opp_get_voltage);
 
 /**
- * opp_get_freq() - Gets the frequency corresponding to an available opp
+ * dev_pm_opp_get_freq() - Gets the frequency corresponding to an available opp
  * @opp:	opp for which frequency has to be returned for
  *
  * Return frequency in hertz corresponding to the opp, else
@@ -180,7 +180,7 @@ EXPORT_SYMBOL_GPL(opp_get_voltage);
  * prior to unlocking with rcu_read_unlock() to maintain the integrity of the
  * pointer.
  */
-unsigned long opp_get_freq(struct opp *opp)
+unsigned long dev_pm_opp_get_freq(struct opp *opp)
 {
 	struct opp *tmp_opp;
 	unsigned long f = 0;
@@ -193,10 +193,10 @@ unsigned long opp_get_freq(struct opp *opp)
 
 	return f;
 }
-EXPORT_SYMBOL_GPL(opp_get_freq);
+EXPORT_SYMBOL_GPL(dev_pm_opp_get_freq);
 
 /**
- * opp_get_opp_count() - Get number of opps available in the opp list
+ * dev_pm_opp_get_opp_count() - Get number of opps available in the opp list
  * @dev:	device for which we do this operation
  *
  * This function returns the number of available opps if there are any,
@@ -206,7 +206,7 @@ EXPORT_SYMBOL_GPL(opp_get_freq);
  * internally references two RCU protected structures: device_opp and opp which
  * are safe as long as we are under a common RCU locked section.
  */
-int opp_get_opp_count(struct device *dev)
+int dev_pm_opp_get_opp_count(struct device *dev)
 {
 	struct device_opp *dev_opp;
 	struct opp *temp_opp;
@@ -226,10 +226,10 @@ int opp_get_opp_count(struct device *dev)
 
 	return count;
 }
-EXPORT_SYMBOL_GPL(opp_get_opp_count);
+EXPORT_SYMBOL_GPL(dev_pm_opp_get_opp_count);
 
 /**
- * opp_find_freq_exact() - search for an exact frequency
+ * dev_pm_opp_find_freq_exact() - search for an exact frequency
  * @dev:		device for which we do this operation
  * @freq:		frequency to search for
  * @available:		true/false - match for available opp
@@ -254,7 +254,7 @@ EXPORT_SYMBOL_GPL(opp_get_opp_count);
  * under the locked area. The pointer returned must be used prior to unlocking
  * with rcu_read_unlock() to maintain the integrity of the pointer.
  */
-struct opp *opp_find_freq_exact(struct device *dev, unsigned long freq,
+struct opp *dev_pm_opp_find_freq_exact(struct device *dev, unsigned long freq,
 				bool available)
 {
 	struct device_opp *dev_opp;
@@ -277,10 +277,10 @@ struct opp *opp_find_freq_exact(struct device *dev, unsigned long freq,
 
 	return opp;
 }
-EXPORT_SYMBOL_GPL(opp_find_freq_exact);
+EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_exact);
 
 /**
- * opp_find_freq_ceil() - Search for an rounded ceil freq
+ * dev_pm_opp_find_freq_ceil() - Search for an rounded ceil freq
  * @dev:	device for which we do this operation
  * @freq:	Start frequency
  *
@@ -300,7 +300,7 @@ EXPORT_SYMBOL_GPL(opp_find_freq_exact);
  * under the locked area. The pointer returned must be used prior to unlocking
  * with rcu_read_unlock() to maintain the integrity of the pointer.
  */
-struct opp *opp_find_freq_ceil(struct device *dev, unsigned long *freq)
+struct opp *dev_pm_opp_find_freq_ceil(struct device *dev, unsigned long *freq)
 {
 	struct device_opp *dev_opp;
 	struct opp *temp_opp, *opp = ERR_PTR(-ERANGE);
@@ -324,10 +324,10 @@ struct opp *opp_find_freq_ceil(struct device *dev, unsigned long *freq)
 
 	return opp;
 }
-EXPORT_SYMBOL_GPL(opp_find_freq_ceil);
+EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_ceil);
 
 /**
- * opp_find_freq_floor() - Search for a rounded floor freq
+ * dev_pm_opp_find_freq_floor() - Search for a rounded floor freq
  * @dev:	device for which we do this operation
  * @freq:	Start frequency
  *
@@ -347,7 +347,7 @@ EXPORT_SYMBOL_GPL(opp_find_freq_ceil);
  * under the locked area. The pointer returned must be used prior to unlocking
  * with rcu_read_unlock() to maintain the integrity of the pointer.
  */
-struct opp *opp_find_freq_floor(struct device *dev, unsigned long *freq)
+struct opp *dev_pm_opp_find_freq_floor(struct device *dev, unsigned long *freq)
 {
 	struct device_opp *dev_opp;
 	struct opp *temp_opp, *opp = ERR_PTR(-ERANGE);
@@ -375,17 +375,17 @@ struct opp *opp_find_freq_floor(struct device *dev, unsigned long *freq)
 
 	return opp;
 }
-EXPORT_SYMBOL_GPL(opp_find_freq_floor);
+EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_floor);
 
 /**
- * opp_add()  - Add an OPP table from a table definitions
+ * dev_pm_opp_add()  - Add an OPP table from a table definitions
  * @dev:	device for which we do this operation
  * @freq:	Frequency in Hz for this OPP
  * @u_volt:	Voltage in uVolts for this OPP
  *
  * This function adds an opp definition to the opp list and returns status.
  * The opp is made available by default and it can be controlled using
- * opp_enable/disable functions.
+ * dev_pm_opp_enable/disable functions.
  *
  * Locking: The internal device_opp and opp structures are RCU protected.
  * Hence this function internally uses RCU updater strategy with mutex locks
@@ -393,7 +393,7 @@ EXPORT_SYMBOL_GPL(opp_find_freq_floor);
  * that this function is *NOT* called under RCU protection or in contexts where
  * mutex cannot be locked.
  */
-int opp_add(struct device *dev, unsigned long freq, unsigned long u_volt)
+int dev_pm_opp_add(struct device *dev, unsigned long freq, unsigned long u_volt)
 {
 	struct device_opp *dev_opp = NULL;
 	struct opp *opp, *new_opp;
@@ -460,7 +460,7 @@ int opp_add(struct device *dev, unsigned long freq, unsigned long u_volt)
 	srcu_notifier_call_chain(&dev_opp->head, OPP_EVENT_ADD, new_opp);
 	return 0;
 }
-EXPORT_SYMBOL_GPL(opp_add);
+EXPORT_SYMBOL_GPL(dev_pm_opp_add);
 
 /**
  * opp_set_availability() - helper to set the availability of an opp
@@ -552,13 +552,13 @@ unlock:
 }
 
 /**
- * opp_enable() - Enable a specific OPP
+ * dev_pm_opp_enable() - Enable a specific OPP
  * @dev:	device for which we do this operation
  * @freq:	OPP frequency to enable
  *
  * Enables a provided opp. If the operation is valid, this returns 0, else the
  * corresponding error value. It is meant to be used for users an OPP available
- * after being temporarily made unavailable with opp_disable.
+ * after being temporarily made unavailable with dev_pm_opp_disable.
  *
  * Locking: The internal device_opp and opp structures are RCU protected.
  * Hence this function indirectly uses RCU and mutex locks to keep the
@@ -566,21 +566,21 @@ unlock:
  * this function is *NOT* called under RCU protection or in contexts where
  * mutex locking or synchronize_rcu() blocking calls cannot be used.
  */
-int opp_enable(struct device *dev, unsigned long freq)
+int dev_pm_opp_enable(struct device *dev, unsigned long freq)
 {
 	return opp_set_availability(dev, freq, true);
 }
-EXPORT_SYMBOL_GPL(opp_enable);
+EXPORT_SYMBOL_GPL(dev_pm_opp_enable);
 
 /**
- * opp_disable() - Disable a specific OPP
+ * dev_pm_opp_disable() - Disable a specific OPP
  * @dev:	device for which we do this operation
  * @freq:	OPP frequency to disable
  *
  * Disables a provided opp. If the operation is valid, this returns
  * 0, else the corresponding error value. It is meant to be a temporary
  * control by users to make this OPP not available until the circumstances are
- * right to make it available again (with a call to opp_enable).
+ * right to make it available again (with a call to dev_pm_opp_enable).
  *
  * Locking: The internal device_opp and opp structures are RCU protected.
  * Hence this function indirectly uses RCU and mutex locks to keep the
@@ -588,15 +588,15 @@ EXPORT_SYMBOL_GPL(opp_enable);
  * this function is *NOT* called under RCU protection or in contexts where
  * mutex locking or synchronize_rcu() blocking calls cannot be used.
  */
-int opp_disable(struct device *dev, unsigned long freq)
+int dev_pm_opp_disable(struct device *dev, unsigned long freq)
 {
 	return opp_set_availability(dev, freq, false);
 }
-EXPORT_SYMBOL_GPL(opp_disable);
+EXPORT_SYMBOL_GPL(dev_pm_opp_disable);
 
 #ifdef CONFIG_CPU_FREQ
 /**
- * opp_init_cpufreq_table() - create a cpufreq table for a device
+ * dev_pm_opp_init_cpufreq_table() - create a cpufreq table for a device
  * @dev:	device for which we do this operation
  * @table:	Cpufreq table returned back to caller
  *
@@ -619,7 +619,7 @@ EXPORT_SYMBOL_GPL(opp_disable);
  * Callers should ensure that this function is *NOT* called under RCU protection
  * or in contexts where mutex locking cannot be used.
  */
-int opp_init_cpufreq_table(struct device *dev,
+int dev_pm_opp_init_cpufreq_table(struct device *dev,
 			    struct cpufreq_frequency_table **table)
 {
 	struct device_opp *dev_opp;
@@ -639,7 +639,7 @@ int opp_init_cpufreq_table(struct device *dev,
 	}
 
 	freq_table = kzalloc(sizeof(struct cpufreq_frequency_table) *
-			     (opp_get_opp_count(dev) + 1), GFP_KERNEL);
+			     (dev_pm_opp_get_opp_count(dev) + 1), GFP_KERNEL);
 	if (!freq_table) {
 		mutex_unlock(&dev_opp_list_lock);
 		dev_warn(dev, "%s: Unable to allocate frequency table\n",
@@ -663,16 +663,16 @@ int opp_init_cpufreq_table(struct device *dev,
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(opp_init_cpufreq_table);
+EXPORT_SYMBOL_GPL(dev_pm_opp_init_cpufreq_table);
 
 /**
- * opp_free_cpufreq_table() - free the cpufreq table
+ * dev_pm_opp_free_cpufreq_table() - free the cpufreq table
  * @dev:	device for which we do this operation
  * @table:	table to free
  *
- * Free up the table allocated by opp_init_cpufreq_table
+ * Free up the table allocated by dev_pm_opp_init_cpufreq_table
  */
-void opp_free_cpufreq_table(struct device *dev,
+void dev_pm_opp_free_cpufreq_table(struct device *dev,
 				struct cpufreq_frequency_table **table)
 {
 	if (!table)
@@ -681,14 +681,14 @@ void opp_free_cpufreq_table(struct device *dev,
 	kfree(*table);
 	*table = NULL;
 }
-EXPORT_SYMBOL_GPL(opp_free_cpufreq_table);
+EXPORT_SYMBOL_GPL(dev_pm_opp_free_cpufreq_table);
 #endif		/* CONFIG_CPU_FREQ */
 
 /**
- * opp_get_notifier() - find notifier_head of the device with opp
+ * dev_pm_opp_get_notifier() - find notifier_head of the device with opp
  * @dev:	device pointer used to lookup device OPPs.
  */
-struct srcu_notifier_head *opp_get_notifier(struct device *dev)
+struct srcu_notifier_head *dev_pm_opp_get_notifier(struct device *dev)
 {
 	struct device_opp *dev_opp = find_device_opp(dev);
 
@@ -732,7 +732,7 @@ int of_init_opp_table(struct device *dev)
 		unsigned long freq = be32_to_cpup(val++) * 1000;
 		unsigned long volt = be32_to_cpup(val++);
 
-		if (opp_add(dev, freq, volt)) {
+		if (dev_pm_opp_add(dev, freq, volt)) {
 			dev_warn(dev, "%s: Failed to add OPP %ld\n",
 				 __func__, freq);
 			continue;
diff --git a/drivers/cpufreq/arm_big_little.c b/drivers/cpufreq/arm_big_little.c
index 3549f0784af1..9e82a9d5df77 100644
--- a/drivers/cpufreq/arm_big_little.c
+++ b/drivers/cpufreq/arm_big_little.c
@@ -98,7 +98,7 @@ static void put_cluster_clk_and_freq_table(struct device *cpu_dev)
 
 	if (!atomic_dec_return(&cluster_usage[cluster])) {
 		clk_put(clk[cluster]);
-		opp_free_cpufreq_table(cpu_dev, &freq_table[cluster]);
+		dev_pm_opp_free_cpufreq_table(cpu_dev, &freq_table[cluster]);
 		dev_dbg(cpu_dev, "%s: cluster: %d\n", __func__, cluster);
 	}
 }
@@ -119,7 +119,7 @@ static int get_cluster_clk_and_freq_table(struct device *cpu_dev)
 		goto atomic_dec;
 	}
 
-	ret = opp_init_cpufreq_table(cpu_dev, &freq_table[cluster]);
+	ret = dev_pm_opp_init_cpufreq_table(cpu_dev, &freq_table[cluster]);
 	if (ret) {
 		dev_err(cpu_dev, "%s: failed to init cpufreq table, cpu: %d, err: %d\n",
 				__func__, cpu_dev->id, ret);
@@ -138,7 +138,7 @@ static int get_cluster_clk_and_freq_table(struct device *cpu_dev)
 	dev_err(cpu_dev, "%s: Failed to get clk for cpu: %d, cluster: %d\n",
 			__func__, cpu_dev->id, cluster);
 	ret = PTR_ERR(clk[cluster]);
-	opp_free_cpufreq_table(cpu_dev, &freq_table[cluster]);
+	dev_pm_opp_free_cpufreq_table(cpu_dev, &freq_table[cluster]);
 
 atomic_dec:
 	atomic_dec(&cluster_usage[cluster]);
diff --git a/drivers/cpufreq/cpufreq-cpu0.c b/drivers/cpufreq/cpufreq-cpu0.c
index c522a95c0e16..af951a9cea78 100644
--- a/drivers/cpufreq/cpufreq-cpu0.c
+++ b/drivers/cpufreq/cpufreq-cpu0.c
@@ -72,7 +72,7 @@ static int cpu0_set_target(struct cpufreq_policy *policy,
 
 	if (!IS_ERR(cpu_reg)) {
 		rcu_read_lock();
-		opp = opp_find_freq_ceil(cpu_dev, &freq_Hz);
+		opp = dev_pm_opp_find_freq_ceil(cpu_dev, &freq_Hz);
 		if (IS_ERR(opp)) {
 			rcu_read_unlock();
 			pr_err("failed to find OPP for %ld\n", freq_Hz);
@@ -80,7 +80,7 @@ static int cpu0_set_target(struct cpufreq_policy *policy,
 			ret = PTR_ERR(opp);
 			goto post_notify;
 		}
-		volt = opp_get_voltage(opp);
+		volt = dev_pm_opp_get_voltage(opp);
 		rcu_read_unlock();
 		tol = volt * voltage_tolerance / 100;
 		volt_old = regulator_get_voltage(cpu_reg);
@@ -218,7 +218,7 @@ static int cpu0_cpufreq_probe(struct platform_device *pdev)
 		goto out_put_node;
 	}
 
-	ret = opp_init_cpufreq_table(cpu_dev, &freq_table);
+	ret = dev_pm_opp_init_cpufreq_table(cpu_dev, &freq_table);
 	if (ret) {
 		pr_err("failed to init cpufreq table: %d\n", ret);
 		goto out_put_node;
@@ -242,12 +242,12 @@ static int cpu0_cpufreq_probe(struct platform_device *pdev)
 		for (i = 0; freq_table[i].frequency != CPUFREQ_TABLE_END; i++)
 			;
 		rcu_read_lock();
-		opp = opp_find_freq_exact(cpu_dev,
+		opp = dev_pm_opp_find_freq_exact(cpu_dev,
 				freq_table[0].frequency * 1000, true);
-		min_uV = opp_get_voltage(opp);
-		opp = opp_find_freq_exact(cpu_dev,
+		min_uV = dev_pm_opp_get_voltage(opp);
+		opp = dev_pm_opp_find_freq_exact(cpu_dev,
 				freq_table[i-1].frequency * 1000, true);
-		max_uV = opp_get_voltage(opp);
+		max_uV = dev_pm_opp_get_voltage(opp);
 		rcu_read_unlock();
 		ret = regulator_set_voltage_time(cpu_reg, min_uV, max_uV);
 		if (ret > 0)
@@ -264,7 +264,7 @@ static int cpu0_cpufreq_probe(struct platform_device *pdev)
 	return 0;
 
 out_free_table:
-	opp_free_cpufreq_table(cpu_dev, &freq_table);
+	dev_pm_opp_free_cpufreq_table(cpu_dev, &freq_table);
 out_put_node:
 	of_node_put(np);
 	return ret;
@@ -273,7 +273,7 @@ out_put_node:
 static int cpu0_cpufreq_remove(struct platform_device *pdev)
 {
 	cpufreq_unregister_driver(&cpu0_cpufreq_driver);
-	opp_free_cpufreq_table(cpu_dev, &freq_table);
+	dev_pm_opp_free_cpufreq_table(cpu_dev, &freq_table);
 
 	return 0;
 }
diff --git a/drivers/cpufreq/exynos5440-cpufreq.c b/drivers/cpufreq/exynos5440-cpufreq.c
index be5380ecdcd4..444eca74d3c2 100644
--- a/drivers/cpufreq/exynos5440-cpufreq.c
+++ b/drivers/cpufreq/exynos5440-cpufreq.c
@@ -123,7 +123,7 @@ static int init_div_table(void)
 	rcu_read_lock();
 	for (i = 0; freq_tbl[i].frequency != CPUFREQ_TABLE_END; i++) {
 
-		opp = opp_find_freq_exact(dvfs_info->dev,
+		opp = dev_pm_opp_find_freq_exact(dvfs_info->dev,
 					freq_tbl[i].frequency * 1000, true);
 		if (IS_ERR(opp)) {
 			rcu_read_unlock();
@@ -142,7 +142,7 @@ static int init_div_table(void)
 					<< P0_7_CSCLKDEV_SHIFT;
 
 		/* Calculate EMA */
-		volt_id = opp_get_voltage(opp);
+		volt_id = dev_pm_opp_get_voltage(opp);
 		volt_id = (MAX_VOLTAGE - volt_id) / VOLTAGE_STEP;
 		if (volt_id < PMIC_HIGH_VOLT) {
 			ema_div = (CPUEMA_HIGH << P0_7_CPUEMA_SHIFT) |
@@ -399,13 +399,14 @@ static int exynos_cpufreq_probe(struct platform_device *pdev)
 		goto err_put_node;
 	}
 
-	ret = opp_init_cpufreq_table(dvfs_info->dev, &dvfs_info->freq_table);
+	ret = dev_pm_opp_init_cpufreq_table(dvfs_info->dev,
+					    &dvfs_info->freq_table);
 	if (ret) {
 		dev_err(dvfs_info->dev,
 			"failed to init cpufreq table: %d\n", ret);
 		goto err_put_node;
 	}
-	dvfs_info->freq_count = opp_get_opp_count(dvfs_info->dev);
+	dvfs_info->freq_count = dev_pm_opp_get_opp_count(dvfs_info->dev);
 	exynos_sort_descend_freq_table();
 
 	if (of_property_read_u32(np, "clock-latency", &dvfs_info->latency))
@@ -454,7 +455,7 @@ static int exynos_cpufreq_probe(struct platform_device *pdev)
 	return 0;
 
 err_free_table:
-	opp_free_cpufreq_table(dvfs_info->dev, &dvfs_info->freq_table);
+	dev_pm_opp_free_cpufreq_table(dvfs_info->dev, &dvfs_info->freq_table);
 err_put_node:
 	of_node_put(np);
 	dev_err(&pdev->dev, "%s: failed initialization\n", __func__);
@@ -464,7 +465,7 @@ err_put_node:
 static int exynos_cpufreq_remove(struct platform_device *pdev)
 {
 	cpufreq_unregister_driver(&exynos_driver);
-	opp_free_cpufreq_table(dvfs_info->dev, &dvfs_info->freq_table);
+	dev_pm_opp_free_cpufreq_table(dvfs_info->dev, &dvfs_info->freq_table);
 	return 0;
 }
 
diff --git a/drivers/cpufreq/imx6q-cpufreq.c b/drivers/cpufreq/imx6q-cpufreq.c
index c3fd2a101ca0..b360549e7715 100644
--- a/drivers/cpufreq/imx6q-cpufreq.c
+++ b/drivers/cpufreq/imx6q-cpufreq.c
@@ -70,14 +70,14 @@ static int imx6q_set_target(struct cpufreq_policy *policy,
 		return 0;
 
 	rcu_read_lock();
-	opp = opp_find_freq_ceil(cpu_dev, &freq_hz);
+	opp = dev_pm_opp_find_freq_ceil(cpu_dev, &freq_hz);
 	if (IS_ERR(opp)) {
 		rcu_read_unlock();
 		dev_err(cpu_dev, "failed to find OPP for %ld\n", freq_hz);
 		return PTR_ERR(opp);
 	}
 
-	volt = opp_get_voltage(opp);
+	volt = dev_pm_opp_get_voltage(opp);
 	rcu_read_unlock();
 	volt_old = regulator_get_voltage(arm_reg);
 
@@ -237,14 +237,14 @@ static int imx6q_cpufreq_probe(struct platform_device *pdev)
 	}
 
 	/* We expect an OPP table supplied by platform */
-	num = opp_get_opp_count(cpu_dev);
+	num = dev_pm_opp_get_opp_count(cpu_dev);
 	if (num < 0) {
 		ret = num;
 		dev_err(cpu_dev, "no OPP table is found: %d\n", ret);
 		goto put_node;
 	}
 
-	ret = opp_init_cpufreq_table(cpu_dev, &freq_table);
+	ret = dev_pm_opp_init_cpufreq_table(cpu_dev, &freq_table);
 	if (ret) {
 		dev_err(cpu_dev, "failed to init cpufreq table: %d\n", ret);
 		goto put_node;
@@ -259,12 +259,12 @@ static int imx6q_cpufreq_probe(struct platform_device *pdev)
 	 * same order.
 	 */
 	rcu_read_lock();
-	opp = opp_find_freq_exact(cpu_dev,
+	opp = dev_pm_opp_find_freq_exact(cpu_dev,
 				  freq_table[0].frequency * 1000, true);
-	min_volt = opp_get_voltage(opp);
-	opp = opp_find_freq_exact(cpu_dev,
+	min_volt = dev_pm_opp_get_voltage(opp);
+	opp = dev_pm_opp_find_freq_exact(cpu_dev,
 				  freq_table[--num].frequency * 1000, true);
-	max_volt = opp_get_voltage(opp);
+	max_volt = dev_pm_opp_get_voltage(opp);
 	rcu_read_unlock();
 	ret = regulator_set_voltage_time(arm_reg, min_volt, max_volt);
 	if (ret > 0)
@@ -292,7 +292,7 @@ static int imx6q_cpufreq_probe(struct platform_device *pdev)
 	return 0;
 
 free_freq_table:
-	opp_free_cpufreq_table(cpu_dev, &freq_table);
+	dev_pm_opp_free_cpufreq_table(cpu_dev, &freq_table);
 put_node:
 	of_node_put(np);
 	return ret;
@@ -301,7 +301,7 @@ put_node:
 static int imx6q_cpufreq_remove(struct platform_device *pdev)
 {
 	cpufreq_unregister_driver(&imx6q_cpufreq_driver);
-	opp_free_cpufreq_table(cpu_dev, &freq_table);
+	dev_pm_opp_free_cpufreq_table(cpu_dev, &freq_table);
 
 	return 0;
 }
diff --git a/drivers/cpufreq/omap-cpufreq.c b/drivers/cpufreq/omap-cpufreq.c
index f31fcfcad514..c5e31d296607 100644
--- a/drivers/cpufreq/omap-cpufreq.c
+++ b/drivers/cpufreq/omap-cpufreq.c
@@ -105,14 +105,14 @@ static int omap_target(struct cpufreq_policy *policy,
 
 	if (mpu_reg) {
 		rcu_read_lock();
-		opp = opp_find_freq_ceil(mpu_dev, &freq);
+		opp = dev_pm_opp_find_freq_ceil(mpu_dev, &freq);
 		if (IS_ERR(opp)) {
 			rcu_read_unlock();
 			dev_err(mpu_dev, "%s: unable to find MPU OPP for %d\n",
 				__func__, freqs.new);
 			return -EINVAL;
 		}
-		volt = opp_get_voltage(opp);
+		volt = dev_pm_opp_get_voltage(opp);
 		rcu_read_unlock();
 		tol = volt * OPP_TOLERANCE / 100;
 		volt_old = regulator_get_voltage(mpu_reg);
@@ -162,7 +162,7 @@ done:
 static inline void freq_table_free(void)
 {
 	if (atomic_dec_and_test(&freq_table_users))
-		opp_free_cpufreq_table(mpu_dev, &freq_table);
+		dev_pm_opp_free_cpufreq_table(mpu_dev, &freq_table);
 }
 
 static int omap_cpu_init(struct cpufreq_policy *policy)
@@ -181,7 +181,7 @@ static int omap_cpu_init(struct cpufreq_policy *policy)
 	policy->cur = omap_getspeed(policy->cpu);
 
 	if (!freq_table)
-		result = opp_init_cpufreq_table(mpu_dev, &freq_table);
+		result = dev_pm_opp_init_cpufreq_table(mpu_dev, &freq_table);
 
 	if (result) {
 		dev_err(mpu_dev, "%s: cpu%d: failed creating freq table[%d]\n",
diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
index c99c00d35d34..f798edcb910d 100644
--- a/drivers/devfreq/devfreq.c
+++ b/drivers/devfreq/devfreq.c
@@ -908,7 +908,7 @@ static ssize_t available_frequencies_show(struct device *d,
 
 	rcu_read_lock();
 	do {
-		opp = opp_find_freq_ceil(dev, &freq);
+		opp = dev_pm_opp_find_freq_ceil(dev, &freq);
 		if (IS_ERR(opp))
 			break;
 
@@ -1036,18 +1036,18 @@ struct opp *devfreq_recommended_opp(struct device *dev, unsigned long *freq,
 
 	if (flags & DEVFREQ_FLAG_LEAST_UPPER_BOUND) {
 		/* The freq is an upper bound. opp should be lower */
-		opp = opp_find_freq_floor(dev, freq);
+		opp = dev_pm_opp_find_freq_floor(dev, freq);
 
 		/* If not available, use the closest opp */
 		if (opp == ERR_PTR(-ERANGE))
-			opp = opp_find_freq_ceil(dev, freq);
+			opp = dev_pm_opp_find_freq_ceil(dev, freq);
 	} else {
 		/* The freq is an lower bound. opp should be higher */
-		opp = opp_find_freq_ceil(dev, freq);
+		opp = dev_pm_opp_find_freq_ceil(dev, freq);
 
 		/* If not available, use the closest opp */
 		if (opp == ERR_PTR(-ERANGE))
-			opp = opp_find_freq_floor(dev, freq);
+			opp = dev_pm_opp_find_freq_floor(dev, freq);
 	}
 
 	return opp;
@@ -1066,7 +1066,7 @@ int devfreq_register_opp_notifier(struct device *dev, struct devfreq *devfreq)
 	int ret = 0;
 
 	rcu_read_lock();
-	nh = opp_get_notifier(dev);
+	nh = dev_pm_opp_get_notifier(dev);
 	if (IS_ERR(nh))
 		ret = PTR_ERR(nh);
 	rcu_read_unlock();
@@ -1092,7 +1092,7 @@ int devfreq_unregister_opp_notifier(struct device *dev, struct devfreq *devfreq)
 	int ret = 0;
 
 	rcu_read_lock();
-	nh = opp_get_notifier(dev);
+	nh = dev_pm_opp_get_notifier(dev);
 	if (IS_ERR(nh))
 		ret = PTR_ERR(nh);
 	rcu_read_unlock();
diff --git a/drivers/devfreq/exynos/exynos4_bus.c b/drivers/devfreq/exynos/exynos4_bus.c
index c5f86d8caca3..33c0e05a5ff1 100644
--- a/drivers/devfreq/exynos/exynos4_bus.c
+++ b/drivers/devfreq/exynos/exynos4_bus.c
@@ -650,8 +650,8 @@ static int exynos4_bus_target(struct device *dev, unsigned long *_freq,
 		rcu_read_unlock();
 		return PTR_ERR(opp);
 	}
-	new_oppinfo.rate = opp_get_freq(opp);
-	new_oppinfo.volt = opp_get_voltage(opp);
+	new_oppinfo.rate = dev_pm_opp_get_freq(opp);
+	new_oppinfo.volt = dev_pm_opp_get_voltage(opp);
 	rcu_read_unlock();
 	freq = new_oppinfo.rate;
 
@@ -873,7 +873,7 @@ static int exynos4210_init_tables(struct busfreq_data *data)
 		exynos4210_busclk_table[i].volt = exynos4210_asv_volt[mgrp][i];
 
 	for (i = LV_0; i < EX4210_LV_NUM; i++) {
-		err = opp_add(data->dev, exynos4210_busclk_table[i].clk,
+		err = dev_pm_opp_add(data->dev, exynos4210_busclk_table[i].clk,
 			      exynos4210_busclk_table[i].volt);
 		if (err) {
 			dev_err(data->dev, "Cannot add opp entries.\n");
@@ -940,7 +940,7 @@ static int exynos4x12_init_tables(struct busfreq_data *data)
 	}
 
 	for (i = 0; i < EX4x12_LV_NUM; i++) {
-		ret = opp_add(data->dev, exynos4x12_mifclk_table[i].clk,
+		ret = dev_pm_opp_add(data->dev, exynos4x12_mifclk_table[i].clk,
 			      exynos4x12_mifclk_table[i].volt);
 		if (ret) {
 			dev_err(data->dev, "Fail to add opp entries.\n");
@@ -969,7 +969,7 @@ static int exynos4_busfreq_pm_notifier_event(struct notifier_block *this,
 		data->disabled = true;
 
 		rcu_read_lock();
-		opp = opp_find_freq_floor(data->dev, &maxfreq);
+		opp = dev_pm_opp_find_freq_floor(data->dev, &maxfreq);
 		if (IS_ERR(opp)) {
 			rcu_read_unlock();
 			dev_err(data->dev, "%s: unable to find a min freq\n",
@@ -977,8 +977,8 @@ static int exynos4_busfreq_pm_notifier_event(struct notifier_block *this,
 			mutex_unlock(&data->lock);
 			return PTR_ERR(opp);
 		}
-		new_oppinfo.rate = opp_get_freq(opp);
-		new_oppinfo.volt = opp_get_voltage(opp);
+		new_oppinfo.rate = dev_pm_opp_get_freq(opp);
+		new_oppinfo.volt = dev_pm_opp_get_voltage(opp);
 		rcu_read_unlock();
 
 		err = exynos4_bus_setvolt(data, &new_oppinfo,
@@ -1065,15 +1065,16 @@ static int exynos4_busfreq_probe(struct platform_device *pdev)
 	}
 
 	rcu_read_lock();
-	opp = opp_find_freq_floor(dev, &exynos4_devfreq_profile.initial_freq);
+	opp = dev_pm_opp_find_freq_floor(dev,
+					 &exynos4_devfreq_profile.initial_freq);
 	if (IS_ERR(opp)) {
 		rcu_read_unlock();
 		dev_err(dev, "Invalid initial frequency %lu kHz.\n",
 			exynos4_devfreq_profile.initial_freq);
 		return PTR_ERR(opp);
 	}
-	data->curr_oppinfo.rate = opp_get_freq(opp);
-	data->curr_oppinfo.volt = opp_get_voltage(opp);
+	data->curr_oppinfo.rate = dev_pm_opp_get_freq(opp);
+	data->curr_oppinfo.volt = dev_pm_opp_get_voltage(opp);
 	rcu_read_unlock();
 
 	platform_set_drvdata(pdev, data);
diff --git a/drivers/devfreq/exynos/exynos5_bus.c b/drivers/devfreq/exynos/exynos5_bus.c
index 574b16b59be5..b0e45925505b 100644
--- a/drivers/devfreq/exynos/exynos5_bus.c
+++ b/drivers/devfreq/exynos/exynos5_bus.c
@@ -144,8 +144,8 @@ static int exynos5_busfreq_int_target(struct device *dev, unsigned long *_freq,
 		return PTR_ERR(opp);
 	}
 
-	freq = opp_get_freq(opp);
-	volt = opp_get_voltage(opp);
+	freq = dev_pm_opp_get_freq(opp);
+	volt = dev_pm_opp_get_voltage(opp);
 	rcu_read_unlock();
 
 	old_freq = data->curr_freq;
@@ -246,7 +246,7 @@ static int exynos5250_init_int_tables(struct busfreq_data_int *data)
 	int i, err = 0;
 
 	for (i = LV_0; i < _LV_END; i++) {
-		err = opp_add(data->dev, exynos5_int_opp_table[i].clk,
+		err = dev_pm_opp_add(data->dev, exynos5_int_opp_table[i].clk,
 				exynos5_int_opp_table[i].volt);
 		if (err) {
 			dev_err(data->dev, "Cannot add opp entries.\n");
@@ -276,14 +276,14 @@ static int exynos5_busfreq_int_pm_notifier_event(struct notifier_block *this,
 		data->disabled = true;
 
 		rcu_read_lock();
-		opp = opp_find_freq_floor(data->dev, &maxfreq);
+		opp = dev_pm_opp_find_freq_floor(data->dev, &maxfreq);
 		if (IS_ERR(opp)) {
 			rcu_read_unlock();
 			err = PTR_ERR(opp);
 			goto unlock;
 		}
-		freq = opp_get_freq(opp);
-		volt = opp_get_voltage(opp);
+		freq = dev_pm_opp_get_freq(opp);
+		volt = dev_pm_opp_get_voltage(opp);
 		rcu_read_unlock();
 
 		err = exynos5_int_setvolt(data, volt);
@@ -368,7 +368,7 @@ static int exynos5_busfreq_int_probe(struct platform_device *pdev)
 	}
 
 	rcu_read_lock();
-	opp = opp_find_freq_floor(dev,
+	opp = dev_pm_opp_find_freq_floor(dev,
 			&exynos5_devfreq_int_profile.initial_freq);
 	if (IS_ERR(opp)) {
 		rcu_read_unlock();
@@ -377,8 +377,8 @@ static int exynos5_busfreq_int_probe(struct platform_device *pdev)
 		err = PTR_ERR(opp);
 		goto err_opp_add;
 	}
-	initial_freq = opp_get_freq(opp);
-	initial_volt = opp_get_voltage(opp);
+	initial_freq = dev_pm_opp_get_freq(opp);
+	initial_volt = dev_pm_opp_get_voltage(opp);
 	rcu_read_unlock();
 	data->curr_freq = initial_freq;
 
diff --git a/include/linux/opp.h b/include/linux/opp.h
index 3aca2b8def33..2066ae5880c6 100644
--- a/include/linux/opp.h
+++ b/include/linux/opp.h
@@ -27,77 +27,79 @@ enum opp_event {
 
 #if defined(CONFIG_PM_OPP)
 
-unsigned long opp_get_voltage(struct opp *opp);
+unsigned long dev_pm_opp_get_voltage(struct opp *opp);
 
-unsigned long opp_get_freq(struct opp *opp);
+unsigned long dev_pm_opp_get_freq(struct opp *opp);
 
-int opp_get_opp_count(struct device *dev);
+int dev_pm_opp_get_opp_count(struct device *dev);
 
-struct opp *opp_find_freq_exact(struct device *dev, unsigned long freq,
+struct opp *dev_pm_opp_find_freq_exact(struct device *dev, unsigned long freq,
 				bool available);
 
-struct opp *opp_find_freq_floor(struct device *dev, unsigned long *freq);
+struct opp *dev_pm_opp_find_freq_floor(struct device *dev, unsigned long *freq);
 
-struct opp *opp_find_freq_ceil(struct device *dev, unsigned long *freq);
+struct opp *dev_pm_opp_find_freq_ceil(struct device *dev, unsigned long *freq);
 
-int opp_add(struct device *dev, unsigned long freq, unsigned long u_volt);
+int dev_pm_opp_add(struct device *dev, unsigned long freq,
+		   unsigned long u_volt);
 
-int opp_enable(struct device *dev, unsigned long freq);
+int dev_pm_opp_enable(struct device *dev, unsigned long freq);
 
-int opp_disable(struct device *dev, unsigned long freq);
+int dev_pm_opp_disable(struct device *dev, unsigned long freq);
 
-struct srcu_notifier_head *opp_get_notifier(struct device *dev);
+struct srcu_notifier_head *dev_pm_opp_get_notifier(struct device *dev);
 #else
-static inline unsigned long opp_get_voltage(struct opp *opp)
+static inline unsigned long dev_pm_opp_get_voltage(struct opp *opp)
 {
 	return 0;
 }
 
-static inline unsigned long opp_get_freq(struct opp *opp)
+static inline unsigned long dev_pm_opp_get_freq(struct opp *opp)
 {
 	return 0;
 }
 
-static inline int opp_get_opp_count(struct device *dev)
+static inline int dev_pm_opp_get_opp_count(struct device *dev)
 {
 	return 0;
 }
 
-static inline struct opp *opp_find_freq_exact(struct device *dev,
+static inline struct opp *dev_pm_opp_find_freq_exact(struct device *dev,
 					unsigned long freq, bool available)
 {
 	return ERR_PTR(-EINVAL);
 }
 
-static inline struct opp *opp_find_freq_floor(struct device *dev,
+static inline struct opp *dev_pm_opp_find_freq_floor(struct device *dev,
 					unsigned long *freq)
 {
 	return ERR_PTR(-EINVAL);
 }
 
-static inline struct opp *opp_find_freq_ceil(struct device *dev,
+static inline struct opp *dev_pm_opp_find_freq_ceil(struct device *dev,
 					unsigned long *freq)
 {
 	return ERR_PTR(-EINVAL);
 }
 
-static inline int opp_add(struct device *dev, unsigned long freq,
+static inline int dev_pm_opp_add(struct device *dev, unsigned long freq,
 					unsigned long u_volt)
 {
 	return -EINVAL;
 }
 
-static inline int opp_enable(struct device *dev, unsigned long freq)
+static inline int dev_pm_opp_enable(struct device *dev, unsigned long freq)
 {
 	return 0;
 }
 
-static inline int opp_disable(struct device *dev, unsigned long freq)
+static inline int dev_pm_opp_disable(struct device *dev, unsigned long freq)
 {
 	return 0;
 }
 
-static inline struct srcu_notifier_head *opp_get_notifier(struct device *dev)
+static inline struct srcu_notifier_head *dev_pm_opp_get_notifier(
+							struct device *dev)
 {
 	return ERR_PTR(-EINVAL);
 }
@@ -113,19 +115,19 @@ static inline int of_init_opp_table(struct device *dev)
 #endif
 
 #if defined(CONFIG_CPU_FREQ) && defined(CONFIG_PM_OPP)
-int opp_init_cpufreq_table(struct device *dev,
+int dev_pm_opp_init_cpufreq_table(struct device *dev,
 			    struct cpufreq_frequency_table **table);
-void opp_free_cpufreq_table(struct device *dev,
+void dev_pm_opp_free_cpufreq_table(struct device *dev,
 				struct cpufreq_frequency_table **table);
 #else
-static inline int opp_init_cpufreq_table(struct device *dev,
+static inline int dev_pm_opp_init_cpufreq_table(struct device *dev,
 			    struct cpufreq_frequency_table **table)
 {
 	return -EINVAL;
 }
 
 static inline
-void opp_free_cpufreq_table(struct device *dev,
+void dev_pm_opp_free_cpufreq_table(struct device *dev,
 				struct cpufreq_frequency_table **table)
 {
 }
-- 
cgit v1.2.3


From 47d43ba73eb98d8ba731208735c899129d9849e1 Mon Sep 17 00:00:00 2001
From: Nishanth Menon <nm@ti.com>
Date: Thu, 19 Sep 2013 16:03:51 -0500
Subject: PM / OPP: rename data structures to dev_pm equivalents

Since Operating Performance Points (OPP) data structures are specific
to device specific power management, be specific and rename opp_* data
structures in OPP library with dev_pm_opp_* equivalent.

Affected structures are:
 struct opp
 enum opp_event

Minor checkpatch warning resulting of this change was fixed as well.

Reported-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Nishanth Menon <nm@ti.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/power/opp.txt          |  4 ++--
 arch/arm/mach-omap2/pm.c             |  2 +-
 drivers/base/power/opp.c             | 41 +++++++++++++++++++-----------------
 drivers/cpufreq/cpufreq-cpu0.c       |  4 ++--
 drivers/cpufreq/exynos5440-cpufreq.c |  2 +-
 drivers/cpufreq/imx6q-cpufreq.c      |  4 ++--
 drivers/cpufreq/omap-cpufreq.c       |  2 +-
 drivers/devfreq/devfreq.c            |  9 ++++----
 drivers/devfreq/exynos/exynos4_bus.c |  6 +++---
 drivers/devfreq/exynos/exynos5_bus.c |  6 +++---
 include/linux/devfreq.h              |  4 ++--
 include/linux/opp.h                  | 29 +++++++++++++------------
 12 files changed, 60 insertions(+), 53 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/power/opp.txt b/Documentation/power/opp.txt
index 185367b1848f..7f67e3d2c530 100644
--- a/Documentation/power/opp.txt
+++ b/Documentation/power/opp.txt
@@ -358,14 +358,14 @@ accessed by various functions as described above. However, the structures
 representing the actual OPPs and domains are internal to the OPP library itself
 to allow for suitable abstraction reusable across systems.
 
-struct opp - The internal data structure of OPP library which is used to
+struct dev_pm_opp - The internal data structure of OPP library which is used to
 	represent an OPP. In addition to the freq, voltage, availability
 	information, it also contains internal book keeping information required
 	for the OPP library to operate on.  Pointer to this structure is
 	provided back to the users such as SoC framework to be used as a
 	identifier for OPP in the interactions with OPP layer.
 
-	WARNING: The struct opp pointer should not be parsed or modified by the
+	WARNING: The struct dev_pm_opp pointer should not be parsed or modified by the
 	users. The defaults of for an instance is populated by dev_pm_opp_add, but the
 	availability of the OPP can be modified by dev_pm_opp_enable/disable functions.
 
diff --git a/arch/arm/mach-omap2/pm.c b/arch/arm/mach-omap2/pm.c
index 937744c3679d..92901bd41490 100644
--- a/arch/arm/mach-omap2/pm.c
+++ b/arch/arm/mach-omap2/pm.c
@@ -131,7 +131,7 @@ static int __init omap2_set_init_voltage(char *vdd_name, char *clk_name,
 {
 	struct voltagedomain *voltdm;
 	struct clk *clk;
-	struct opp *opp;
+	struct dev_pm_opp *opp;
 	unsigned long freq, bootup_volt;
 	struct device *dev;
 
diff --git a/drivers/base/power/opp.c b/drivers/base/power/opp.c
index 00c6a449cdeb..693e14a24914 100644
--- a/drivers/base/power/opp.c
+++ b/drivers/base/power/opp.c
@@ -42,7 +42,7 @@
  */
 
 /**
- * struct opp - Generic OPP description structure
+ * struct dev_pm_opp - Generic OPP description structure
  * @node:	opp list node. The nodes are maintained throughout the lifetime
  *		of boot. It is expected only an optimal set of OPPs are
  *		added to the library by the SoC framework.
@@ -59,7 +59,7 @@
  *
  * This structure stores the OPP information for a given device.
  */
-struct opp {
+struct dev_pm_opp {
 	struct list_head node;
 
 	bool available;
@@ -150,9 +150,9 @@ static struct device_opp *find_device_opp(struct device *dev)
  * prior to unlocking with rcu_read_unlock() to maintain the integrity of the
  * pointer.
  */
-unsigned long dev_pm_opp_get_voltage(struct opp *opp)
+unsigned long dev_pm_opp_get_voltage(struct dev_pm_opp *opp)
 {
-	struct opp *tmp_opp;
+	struct dev_pm_opp *tmp_opp;
 	unsigned long v = 0;
 
 	tmp_opp = rcu_dereference(opp);
@@ -180,9 +180,9 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_get_voltage);
  * prior to unlocking with rcu_read_unlock() to maintain the integrity of the
  * pointer.
  */
-unsigned long dev_pm_opp_get_freq(struct opp *opp)
+unsigned long dev_pm_opp_get_freq(struct dev_pm_opp *opp)
 {
-	struct opp *tmp_opp;
+	struct dev_pm_opp *tmp_opp;
 	unsigned long f = 0;
 
 	tmp_opp = rcu_dereference(opp);
@@ -209,7 +209,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_get_freq);
 int dev_pm_opp_get_opp_count(struct device *dev)
 {
 	struct device_opp *dev_opp;
-	struct opp *temp_opp;
+	struct dev_pm_opp *temp_opp;
 	int count = 0;
 
 	dev_opp = find_device_opp(dev);
@@ -254,11 +254,12 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_get_opp_count);
  * under the locked area. The pointer returned must be used prior to unlocking
  * with rcu_read_unlock() to maintain the integrity of the pointer.
  */
-struct opp *dev_pm_opp_find_freq_exact(struct device *dev, unsigned long freq,
-				bool available)
+struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev,
+					      unsigned long freq,
+					      bool available)
 {
 	struct device_opp *dev_opp;
-	struct opp *temp_opp, *opp = ERR_PTR(-ERANGE);
+	struct dev_pm_opp *temp_opp, *opp = ERR_PTR(-ERANGE);
 
 	dev_opp = find_device_opp(dev);
 	if (IS_ERR(dev_opp)) {
@@ -300,10 +301,11 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_exact);
  * under the locked area. The pointer returned must be used prior to unlocking
  * with rcu_read_unlock() to maintain the integrity of the pointer.
  */
-struct opp *dev_pm_opp_find_freq_ceil(struct device *dev, unsigned long *freq)
+struct dev_pm_opp *dev_pm_opp_find_freq_ceil(struct device *dev,
+					     unsigned long *freq)
 {
 	struct device_opp *dev_opp;
-	struct opp *temp_opp, *opp = ERR_PTR(-ERANGE);
+	struct dev_pm_opp *temp_opp, *opp = ERR_PTR(-ERANGE);
 
 	if (!dev || !freq) {
 		dev_err(dev, "%s: Invalid argument freq=%p\n", __func__, freq);
@@ -347,10 +349,11 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_ceil);
  * under the locked area. The pointer returned must be used prior to unlocking
  * with rcu_read_unlock() to maintain the integrity of the pointer.
  */
-struct opp *dev_pm_opp_find_freq_floor(struct device *dev, unsigned long *freq)
+struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev,
+					      unsigned long *freq)
 {
 	struct device_opp *dev_opp;
-	struct opp *temp_opp, *opp = ERR_PTR(-ERANGE);
+	struct dev_pm_opp *temp_opp, *opp = ERR_PTR(-ERANGE);
 
 	if (!dev || !freq) {
 		dev_err(dev, "%s: Invalid argument freq=%p\n", __func__, freq);
@@ -396,11 +399,11 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_floor);
 int dev_pm_opp_add(struct device *dev, unsigned long freq, unsigned long u_volt)
 {
 	struct device_opp *dev_opp = NULL;
-	struct opp *opp, *new_opp;
+	struct dev_pm_opp *opp, *new_opp;
 	struct list_head *head;
 
 	/* allocate new OPP node */
-	new_opp = kzalloc(sizeof(struct opp), GFP_KERNEL);
+	new_opp = kzalloc(sizeof(*new_opp), GFP_KERNEL);
 	if (!new_opp) {
 		dev_warn(dev, "%s: Unable to create new OPP node\n", __func__);
 		return -ENOMEM;
@@ -485,11 +488,11 @@ static int opp_set_availability(struct device *dev, unsigned long freq,
 		bool availability_req)
 {
 	struct device_opp *tmp_dev_opp, *dev_opp = ERR_PTR(-ENODEV);
-	struct opp *new_opp, *tmp_opp, *opp = ERR_PTR(-ENODEV);
+	struct dev_pm_opp *new_opp, *tmp_opp, *opp = ERR_PTR(-ENODEV);
 	int r = 0;
 
 	/* keep the node allocated */
-	new_opp = kmalloc(sizeof(struct opp), GFP_KERNEL);
+	new_opp = kmalloc(sizeof(*new_opp), GFP_KERNEL);
 	if (!new_opp) {
 		dev_warn(dev, "%s: Unable to create OPP\n", __func__);
 		return -ENOMEM;
@@ -623,7 +626,7 @@ int dev_pm_opp_init_cpufreq_table(struct device *dev,
 			    struct cpufreq_frequency_table **table)
 {
 	struct device_opp *dev_opp;
-	struct opp *opp;
+	struct dev_pm_opp *opp;
 	struct cpufreq_frequency_table *freq_table;
 	int i = 0;
 
diff --git a/drivers/cpufreq/cpufreq-cpu0.c b/drivers/cpufreq/cpufreq-cpu0.c
index af951a9cea78..d73107894fee 100644
--- a/drivers/cpufreq/cpufreq-cpu0.c
+++ b/drivers/cpufreq/cpufreq-cpu0.c
@@ -44,7 +44,7 @@ static int cpu0_set_target(struct cpufreq_policy *policy,
 			   unsigned int target_freq, unsigned int relation)
 {
 	struct cpufreq_freqs freqs;
-	struct opp *opp;
+	struct dev_pm_opp *opp;
 	unsigned long volt = 0, volt_old = 0, tol = 0;
 	long freq_Hz, freq_exact;
 	unsigned int index;
@@ -230,7 +230,7 @@ static int cpu0_cpufreq_probe(struct platform_device *pdev)
 		transition_latency = CPUFREQ_ETERNAL;
 
 	if (!IS_ERR(cpu_reg)) {
-		struct opp *opp;
+		struct dev_pm_opp *opp;
 		unsigned long min_uV, max_uV;
 		int i;
 
diff --git a/drivers/cpufreq/exynos5440-cpufreq.c b/drivers/cpufreq/exynos5440-cpufreq.c
index 444eca74d3c2..c8b3d3950f9e 100644
--- a/drivers/cpufreq/exynos5440-cpufreq.c
+++ b/drivers/cpufreq/exynos5440-cpufreq.c
@@ -118,7 +118,7 @@ static int init_div_table(void)
 	struct cpufreq_frequency_table *freq_tbl = dvfs_info->freq_table;
 	unsigned int tmp, clk_div, ema_div, freq, volt_id;
 	int i = 0;
-	struct opp *opp;
+	struct dev_pm_opp *opp;
 
 	rcu_read_lock();
 	for (i = 0; freq_tbl[i].frequency != CPUFREQ_TABLE_END; i++) {
diff --git a/drivers/cpufreq/imx6q-cpufreq.c b/drivers/cpufreq/imx6q-cpufreq.c
index b360549e7715..84e993dfb305 100644
--- a/drivers/cpufreq/imx6q-cpufreq.c
+++ b/drivers/cpufreq/imx6q-cpufreq.c
@@ -49,7 +49,7 @@ static int imx6q_set_target(struct cpufreq_policy *policy,
 			    unsigned int target_freq, unsigned int relation)
 {
 	struct cpufreq_freqs freqs;
-	struct opp *opp;
+	struct dev_pm_opp *opp;
 	unsigned long freq_hz, volt, volt_old;
 	unsigned int index;
 	int ret;
@@ -199,7 +199,7 @@ static struct cpufreq_driver imx6q_cpufreq_driver = {
 static int imx6q_cpufreq_probe(struct platform_device *pdev)
 {
 	struct device_node *np;
-	struct opp *opp;
+	struct dev_pm_opp *opp;
 	unsigned long min_volt, max_volt;
 	int num, ret;
 
diff --git a/drivers/cpufreq/omap-cpufreq.c b/drivers/cpufreq/omap-cpufreq.c
index c5e31d296607..f057d423020c 100644
--- a/drivers/cpufreq/omap-cpufreq.c
+++ b/drivers/cpufreq/omap-cpufreq.c
@@ -65,7 +65,7 @@ static int omap_target(struct cpufreq_policy *policy,
 	unsigned int i;
 	int r, ret = 0;
 	struct cpufreq_freqs freqs;
-	struct opp *opp;
+	struct dev_pm_opp *opp;
 	unsigned long freq, volt = 0, volt_old = 0, tol = 0;
 
 	if (!freq_table) {
diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
index f798edcb910d..03530a0a8042 100644
--- a/drivers/devfreq/devfreq.c
+++ b/drivers/devfreq/devfreq.c
@@ -902,7 +902,7 @@ static ssize_t available_frequencies_show(struct device *d,
 {
 	struct devfreq *df = to_devfreq(d);
 	struct device *dev = df->dev.parent;
-	struct opp *opp;
+	struct dev_pm_opp *opp;
 	ssize_t count = 0;
 	unsigned long freq = 0;
 
@@ -1029,10 +1029,11 @@ module_exit(devfreq_exit);
  * under the locked area. The pointer returned must be used prior to unlocking
  * with rcu_read_unlock() to maintain the integrity of the pointer.
  */
-struct opp *devfreq_recommended_opp(struct device *dev, unsigned long *freq,
-				    u32 flags)
+struct dev_pm_opp *devfreq_recommended_opp(struct device *dev,
+					   unsigned long *freq,
+					   u32 flags)
 {
-	struct opp *opp;
+	struct dev_pm_opp *opp;
 
 	if (flags & DEVFREQ_FLAG_LEAST_UPPER_BOUND) {
 		/* The freq is an upper bound. opp should be lower */
diff --git a/drivers/devfreq/exynos/exynos4_bus.c b/drivers/devfreq/exynos/exynos4_bus.c
index 33c0e05a5ff1..891becd46513 100644
--- a/drivers/devfreq/exynos/exynos4_bus.c
+++ b/drivers/devfreq/exynos/exynos4_bus.c
@@ -639,7 +639,7 @@ static int exynos4_bus_target(struct device *dev, unsigned long *_freq,
 	struct platform_device *pdev = container_of(dev, struct platform_device,
 						    dev);
 	struct busfreq_data *data = platform_get_drvdata(pdev);
-	struct opp *opp;
+	struct dev_pm_opp *opp;
 	unsigned long freq;
 	unsigned long old_freq = data->curr_oppinfo.rate;
 	struct busfreq_opp_info	new_oppinfo;
@@ -956,7 +956,7 @@ static int exynos4_busfreq_pm_notifier_event(struct notifier_block *this,
 {
 	struct busfreq_data *data = container_of(this, struct busfreq_data,
 						 pm_notifier);
-	struct opp *opp;
+	struct dev_pm_opp *opp;
 	struct busfreq_opp_info	new_oppinfo;
 	unsigned long maxfreq = ULONG_MAX;
 	int err = 0;
@@ -1020,7 +1020,7 @@ unlock:
 static int exynos4_busfreq_probe(struct platform_device *pdev)
 {
 	struct busfreq_data *data;
-	struct opp *opp;
+	struct dev_pm_opp *opp;
 	struct device *dev = &pdev->dev;
 	int err = 0;
 
diff --git a/drivers/devfreq/exynos/exynos5_bus.c b/drivers/devfreq/exynos/exynos5_bus.c
index b0e45925505b..d06232aa0738 100644
--- a/drivers/devfreq/exynos/exynos5_bus.c
+++ b/drivers/devfreq/exynos/exynos5_bus.c
@@ -132,7 +132,7 @@ static int exynos5_busfreq_int_target(struct device *dev, unsigned long *_freq,
 	struct platform_device *pdev = container_of(dev, struct platform_device,
 						    dev);
 	struct busfreq_data_int *data = platform_get_drvdata(pdev);
-	struct opp *opp;
+	struct dev_pm_opp *opp;
 	unsigned long old_freq, freq;
 	unsigned long volt;
 
@@ -262,7 +262,7 @@ static int exynos5_busfreq_int_pm_notifier_event(struct notifier_block *this,
 {
 	struct busfreq_data_int *data = container_of(this,
 					struct busfreq_data_int, pm_notifier);
-	struct opp *opp;
+	struct dev_pm_opp *opp;
 	unsigned long maxfreq = ULONG_MAX;
 	unsigned long freq;
 	unsigned long volt;
@@ -316,7 +316,7 @@ unlock:
 static int exynos5_busfreq_int_probe(struct platform_device *pdev)
 {
 	struct busfreq_data_int *data;
-	struct opp *opp;
+	struct dev_pm_opp *opp;
 	struct device *dev = &pdev->dev;
 	struct device_node *np;
 	unsigned long initial_freq;
diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h
index 5f1ab92107e6..c8c995325ca8 100644
--- a/include/linux/devfreq.h
+++ b/include/linux/devfreq.h
@@ -187,7 +187,7 @@ extern int devfreq_suspend_device(struct devfreq *devfreq);
 extern int devfreq_resume_device(struct devfreq *devfreq);
 
 /* Helper functions for devfreq user device driver with OPP. */
-extern struct opp *devfreq_recommended_opp(struct device *dev,
+extern struct dev_pm_opp *devfreq_recommended_opp(struct device *dev,
 					   unsigned long *freq, u32 flags);
 extern int devfreq_register_opp_notifier(struct device *dev,
 					 struct devfreq *devfreq);
@@ -238,7 +238,7 @@ static inline int devfreq_resume_device(struct devfreq *devfreq)
 	return 0;
 }
 
-static inline struct opp *devfreq_recommended_opp(struct device *dev,
+static inline struct dev_pm_opp *devfreq_recommended_opp(struct device *dev,
 					   unsigned long *freq, u32 flags)
 {
 	return ERR_PTR(-EINVAL);
diff --git a/include/linux/opp.h b/include/linux/opp.h
index 2066ae5880c6..5151b0059585 100644
--- a/include/linux/opp.h
+++ b/include/linux/opp.h
@@ -18,27 +18,30 @@
 #include <linux/cpufreq.h>
 #include <linux/notifier.h>
 
-struct opp;
+struct dev_pm_opp;
 struct device;
 
-enum opp_event {
+enum dev_pm_opp_event {
 	OPP_EVENT_ADD, OPP_EVENT_ENABLE, OPP_EVENT_DISABLE,
 };
 
 #if defined(CONFIG_PM_OPP)
 
-unsigned long dev_pm_opp_get_voltage(struct opp *opp);
+unsigned long dev_pm_opp_get_voltage(struct dev_pm_opp *opp);
 
-unsigned long dev_pm_opp_get_freq(struct opp *opp);
+unsigned long dev_pm_opp_get_freq(struct dev_pm_opp *opp);
 
 int dev_pm_opp_get_opp_count(struct device *dev);
 
-struct opp *dev_pm_opp_find_freq_exact(struct device *dev, unsigned long freq,
-				bool available);
+struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev,
+					      unsigned long freq,
+					      bool available);
 
-struct opp *dev_pm_opp_find_freq_floor(struct device *dev, unsigned long *freq);
+struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev,
+					      unsigned long *freq);
 
-struct opp *dev_pm_opp_find_freq_ceil(struct device *dev, unsigned long *freq);
+struct dev_pm_opp *dev_pm_opp_find_freq_ceil(struct device *dev,
+					     unsigned long *freq);
 
 int dev_pm_opp_add(struct device *dev, unsigned long freq,
 		   unsigned long u_volt);
@@ -49,12 +52,12 @@ int dev_pm_opp_disable(struct device *dev, unsigned long freq);
 
 struct srcu_notifier_head *dev_pm_opp_get_notifier(struct device *dev);
 #else
-static inline unsigned long dev_pm_opp_get_voltage(struct opp *opp)
+static inline unsigned long dev_pm_opp_get_voltage(struct dev_pm_opp *opp)
 {
 	return 0;
 }
 
-static inline unsigned long dev_pm_opp_get_freq(struct opp *opp)
+static inline unsigned long dev_pm_opp_get_freq(struct dev_pm_opp *opp)
 {
 	return 0;
 }
@@ -64,19 +67,19 @@ static inline int dev_pm_opp_get_opp_count(struct device *dev)
 	return 0;
 }
 
-static inline struct opp *dev_pm_opp_find_freq_exact(struct device *dev,
+static inline struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev,
 					unsigned long freq, bool available)
 {
 	return ERR_PTR(-EINVAL);
 }
 
-static inline struct opp *dev_pm_opp_find_freq_floor(struct device *dev,
+static inline struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev,
 					unsigned long *freq)
 {
 	return ERR_PTR(-EINVAL);
 }
 
-static inline struct opp *dev_pm_opp_find_freq_ceil(struct device *dev,
+static inline struct dev_pm_opp *dev_pm_opp_find_freq_ceil(struct device *dev,
 					unsigned long *freq)
 {
 	return ERR_PTR(-EINVAL);
-- 
cgit v1.2.3


From e4db1c7439b31993a4886b273bb9235a8eea82bf Mon Sep 17 00:00:00 2001
From: Nishanth Menon <nm@ti.com>
Date: Thu, 19 Sep 2013 16:03:52 -0500
Subject: PM / OPP: rename header to linux/pm_opp.h

Since Operating Performance Points (OPP) functions are specific
to device specific power management, be specific and rename opp.h
to pm_opp.h

Reported-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Nishanth Menon <nm@ti.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/power/opp.txt             |   2 +-
 arch/arm/mach-imx/mach-imx6q.c          |   2 +-
 arch/arm/mach-omap2/board-omap3beagle.c |   2 +-
 arch/arm/mach-omap2/omap-pm.h           |   2 +-
 arch/arm/mach-omap2/opp.c               |   2 +-
 arch/arm/mach-omap2/pm.c                |   2 +-
 drivers/base/power/opp.c                |   2 +-
 drivers/cpufreq/arm_big_little.c        |   2 +-
 drivers/cpufreq/arm_big_little_dt.c     |   2 +-
 drivers/cpufreq/cpufreq-cpu0.c          |   2 +-
 drivers/cpufreq/exynos5440-cpufreq.c    |   2 +-
 drivers/cpufreq/imx6q-cpufreq.c         |   2 +-
 drivers/cpufreq/omap-cpufreq.c          |   2 +-
 drivers/devfreq/devfreq.c               |   2 +-
 drivers/devfreq/exynos/exynos4_bus.c    |   2 +-
 drivers/devfreq/exynos/exynos5_bus.c    |   4 +-
 include/linux/devfreq.h                 |   2 +-
 include/linux/opp.h                     | 139 --------------------------------
 include/linux/pm_opp.h                  | 139 ++++++++++++++++++++++++++++++++
 19 files changed, 157 insertions(+), 157 deletions(-)
 delete mode 100644 include/linux/opp.h
 create mode 100644 include/linux/pm_opp.h

(limited to 'include/linux')

diff --git a/Documentation/power/opp.txt b/Documentation/power/opp.txt
index 7f67e3d2c530..b8a907dc0169 100644
--- a/Documentation/power/opp.txt
+++ b/Documentation/power/opp.txt
@@ -42,7 +42,7 @@ We can represent these as three OPPs as the following {Hz, uV} tuples:
 
 OPP library provides a set of helper functions to organize and query the OPP
 information. The library is located in drivers/base/power/opp.c and the header
-is located in include/linux/opp.h. OPP library can be enabled by enabling
+is located in include/linux/pm_opp.h. OPP library can be enabled by enabling
 CONFIG_PM_OPP from power management menuconfig menu. OPP library depends on
 CONFIG_PM as certain SoCs such as Texas Instrument's OMAP framework allows to
 optionally boot at a certain OPP without needing cpufreq.
diff --git a/arch/arm/mach-imx/mach-imx6q.c b/arch/arm/mach-imx/mach-imx6q.c
index d7ecc90e6bba..699aabe296e1 100644
--- a/arch/arm/mach-imx/mach-imx6q.c
+++ b/arch/arm/mach-imx/mach-imx6q.c
@@ -25,7 +25,7 @@
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
 #include <linux/of_platform.h>
-#include <linux/opp.h>
+#include <linux/pm_opp.h>
 #include <linux/phy.h>
 #include <linux/reboot.h>
 #include <linux/regmap.h>
diff --git a/arch/arm/mach-omap2/board-omap3beagle.c b/arch/arm/mach-omap2/board-omap3beagle.c
index 33969e5a149f..6432ab8d9207 100644
--- a/arch/arm/mach-omap2/board-omap3beagle.c
+++ b/arch/arm/mach-omap2/board-omap3beagle.c
@@ -25,7 +25,7 @@
 #include <linux/gpio.h>
 #include <linux/input.h>
 #include <linux/gpio_keys.h>
-#include <linux/opp.h>
+#include <linux/pm_opp.h>
 #include <linux/cpu.h>
 
 #include <linux/mtd/mtd.h>
diff --git a/arch/arm/mach-omap2/omap-pm.h b/arch/arm/mach-omap2/omap-pm.h
index 67faa7b8fe92..1d777e63e05c 100644
--- a/arch/arm/mach-omap2/omap-pm.h
+++ b/arch/arm/mach-omap2/omap-pm.h
@@ -17,7 +17,7 @@
 #include <linux/device.h>
 #include <linux/cpufreq.h>
 #include <linux/clk.h>
-#include <linux/opp.h>
+#include <linux/pm_opp.h>
 
 /*
  * agent_id values for use with omap_pm_set_min_bus_tput():
diff --git a/arch/arm/mach-omap2/opp.c b/arch/arm/mach-omap2/opp.c
index 7b04637b13cb..ec21e6eb03e1 100644
--- a/arch/arm/mach-omap2/opp.c
+++ b/arch/arm/mach-omap2/opp.c
@@ -17,7 +17,7 @@
  * GNU General Public License for more details.
  */
 #include <linux/module.h>
-#include <linux/opp.h>
+#include <linux/pm_opp.h>
 #include <linux/cpu.h>
 
 #include "omap_device.h"
diff --git a/arch/arm/mach-omap2/pm.c b/arch/arm/mach-omap2/pm.c
index 92901bd41490..2f569b3c3092 100644
--- a/arch/arm/mach-omap2/pm.c
+++ b/arch/arm/mach-omap2/pm.c
@@ -13,7 +13,7 @@
 #include <linux/init.h>
 #include <linux/io.h>
 #include <linux/err.h>
-#include <linux/opp.h>
+#include <linux/pm_opp.h>
 #include <linux/export.h>
 #include <linux/suspend.h>
 #include <linux/cpu.h>
diff --git a/drivers/base/power/opp.c b/drivers/base/power/opp.c
index 693e14a24914..fa4187418440 100644
--- a/drivers/base/power/opp.c
+++ b/drivers/base/power/opp.c
@@ -21,7 +21,7 @@
 #include <linux/list.h>
 #include <linux/rculist.h>
 #include <linux/rcupdate.h>
-#include <linux/opp.h>
+#include <linux/pm_opp.h>
 #include <linux/of.h>
 #include <linux/export.h>
 
diff --git a/drivers/cpufreq/arm_big_little.c b/drivers/cpufreq/arm_big_little.c
index 9e82a9d5df77..e010fb722c6b 100644
--- a/drivers/cpufreq/arm_big_little.c
+++ b/drivers/cpufreq/arm_big_little.c
@@ -25,7 +25,7 @@
 #include <linux/cpumask.h>
 #include <linux/export.h>
 #include <linux/of_platform.h>
-#include <linux/opp.h>
+#include <linux/pm_opp.h>
 #include <linux/slab.h>
 #include <linux/topology.h>
 #include <linux/types.h>
diff --git a/drivers/cpufreq/arm_big_little_dt.c b/drivers/cpufreq/arm_big_little_dt.c
index 480c0bd0468d..8d9d59108906 100644
--- a/drivers/cpufreq/arm_big_little_dt.c
+++ b/drivers/cpufreq/arm_big_little_dt.c
@@ -24,7 +24,7 @@
 #include <linux/export.h>
 #include <linux/module.h>
 #include <linux/of_device.h>
-#include <linux/opp.h>
+#include <linux/pm_opp.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <linux/types.h>
diff --git a/drivers/cpufreq/cpufreq-cpu0.c b/drivers/cpufreq/cpufreq-cpu0.c
index d73107894fee..242d4900de47 100644
--- a/drivers/cpufreq/cpufreq-cpu0.c
+++ b/drivers/cpufreq/cpufreq-cpu0.c
@@ -17,7 +17,7 @@
 #include <linux/err.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/opp.h>
+#include <linux/pm_opp.h>
 #include <linux/platform_device.h>
 #include <linux/regulator/consumer.h>
 #include <linux/slab.h>
diff --git a/drivers/cpufreq/exynos5440-cpufreq.c b/drivers/cpufreq/exynos5440-cpufreq.c
index c8b3d3950f9e..d41e77769d1a 100644
--- a/drivers/cpufreq/exynos5440-cpufreq.c
+++ b/drivers/cpufreq/exynos5440-cpufreq.c
@@ -20,7 +20,7 @@
 #include <linux/module.h>
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
-#include <linux/opp.h>
+#include <linux/pm_opp.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 
diff --git a/drivers/cpufreq/imx6q-cpufreq.c b/drivers/cpufreq/imx6q-cpufreq.c
index 84e993dfb305..e4c914066601 100644
--- a/drivers/cpufreq/imx6q-cpufreq.c
+++ b/drivers/cpufreq/imx6q-cpufreq.c
@@ -13,7 +13,7 @@
 #include <linux/err.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/opp.h>
+#include <linux/pm_opp.h>
 #include <linux/platform_device.h>
 #include <linux/regulator/consumer.h>
 
diff --git a/drivers/cpufreq/omap-cpufreq.c b/drivers/cpufreq/omap-cpufreq.c
index f057d423020c..20190f56594f 100644
--- a/drivers/cpufreq/omap-cpufreq.c
+++ b/drivers/cpufreq/omap-cpufreq.c
@@ -22,7 +22,7 @@
 #include <linux/err.h>
 #include <linux/clk.h>
 #include <linux/io.h>
-#include <linux/opp.h>
+#include <linux/pm_opp.h>
 #include <linux/cpu.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
index 03530a0a8042..2e23b12c350b 100644
--- a/drivers/devfreq/devfreq.c
+++ b/drivers/devfreq/devfreq.c
@@ -18,7 +18,7 @@
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/stat.h>
-#include <linux/opp.h>
+#include <linux/pm_opp.h>
 #include <linux/devfreq.h>
 #include <linux/workqueue.h>
 #include <linux/platform_device.h>
diff --git a/drivers/devfreq/exynos/exynos4_bus.c b/drivers/devfreq/exynos/exynos4_bus.c
index 891becd46513..cede6f71cd63 100644
--- a/drivers/devfreq/exynos/exynos4_bus.c
+++ b/drivers/devfreq/exynos/exynos4_bus.c
@@ -19,7 +19,7 @@
 #include <linux/slab.h>
 #include <linux/mutex.h>
 #include <linux/suspend.h>
-#include <linux/opp.h>
+#include <linux/pm_opp.h>
 #include <linux/devfreq.h>
 #include <linux/platform_device.h>
 #include <linux/regulator/consumer.h>
diff --git a/drivers/devfreq/exynos/exynos5_bus.c b/drivers/devfreq/exynos/exynos5_bus.c
index d06232aa0738..52835f12ac98 100644
--- a/drivers/devfreq/exynos/exynos5_bus.c
+++ b/drivers/devfreq/exynos/exynos5_bus.c
@@ -15,10 +15,10 @@
 #include <linux/module.h>
 #include <linux/devfreq.h>
 #include <linux/io.h>
-#include <linux/opp.h>
+#include <linux/pm_opp.h>
 #include <linux/slab.h>
 #include <linux/suspend.h>
-#include <linux/opp.h>
+#include <linux/pm_opp.h>
 #include <linux/clk.h>
 #include <linux/delay.h>
 #include <linux/platform_device.h>
diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h
index c8c995325ca8..7a7cc74d7f27 100644
--- a/include/linux/devfreq.h
+++ b/include/linux/devfreq.h
@@ -15,7 +15,7 @@
 
 #include <linux/device.h>
 #include <linux/notifier.h>
-#include <linux/opp.h>
+#include <linux/pm_opp.h>
 
 #define DEVFREQ_NAME_LEN 16
 
diff --git a/include/linux/opp.h b/include/linux/opp.h
deleted file mode 100644
index 5151b0059585..000000000000
--- a/include/linux/opp.h
+++ /dev/null
@@ -1,139 +0,0 @@
-/*
- * Generic OPP Interface
- *
- * Copyright (C) 2009-2010 Texas Instruments Incorporated.
- *	Nishanth Menon
- *	Romit Dasgupta
- *	Kevin Hilman
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef __LINUX_OPP_H__
-#define __LINUX_OPP_H__
-
-#include <linux/err.h>
-#include <linux/cpufreq.h>
-#include <linux/notifier.h>
-
-struct dev_pm_opp;
-struct device;
-
-enum dev_pm_opp_event {
-	OPP_EVENT_ADD, OPP_EVENT_ENABLE, OPP_EVENT_DISABLE,
-};
-
-#if defined(CONFIG_PM_OPP)
-
-unsigned long dev_pm_opp_get_voltage(struct dev_pm_opp *opp);
-
-unsigned long dev_pm_opp_get_freq(struct dev_pm_opp *opp);
-
-int dev_pm_opp_get_opp_count(struct device *dev);
-
-struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev,
-					      unsigned long freq,
-					      bool available);
-
-struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev,
-					      unsigned long *freq);
-
-struct dev_pm_opp *dev_pm_opp_find_freq_ceil(struct device *dev,
-					     unsigned long *freq);
-
-int dev_pm_opp_add(struct device *dev, unsigned long freq,
-		   unsigned long u_volt);
-
-int dev_pm_opp_enable(struct device *dev, unsigned long freq);
-
-int dev_pm_opp_disable(struct device *dev, unsigned long freq);
-
-struct srcu_notifier_head *dev_pm_opp_get_notifier(struct device *dev);
-#else
-static inline unsigned long dev_pm_opp_get_voltage(struct dev_pm_opp *opp)
-{
-	return 0;
-}
-
-static inline unsigned long dev_pm_opp_get_freq(struct dev_pm_opp *opp)
-{
-	return 0;
-}
-
-static inline int dev_pm_opp_get_opp_count(struct device *dev)
-{
-	return 0;
-}
-
-static inline struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev,
-					unsigned long freq, bool available)
-{
-	return ERR_PTR(-EINVAL);
-}
-
-static inline struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev,
-					unsigned long *freq)
-{
-	return ERR_PTR(-EINVAL);
-}
-
-static inline struct dev_pm_opp *dev_pm_opp_find_freq_ceil(struct device *dev,
-					unsigned long *freq)
-{
-	return ERR_PTR(-EINVAL);
-}
-
-static inline int dev_pm_opp_add(struct device *dev, unsigned long freq,
-					unsigned long u_volt)
-{
-	return -EINVAL;
-}
-
-static inline int dev_pm_opp_enable(struct device *dev, unsigned long freq)
-{
-	return 0;
-}
-
-static inline int dev_pm_opp_disable(struct device *dev, unsigned long freq)
-{
-	return 0;
-}
-
-static inline struct srcu_notifier_head *dev_pm_opp_get_notifier(
-							struct device *dev)
-{
-	return ERR_PTR(-EINVAL);
-}
-#endif		/* CONFIG_PM_OPP */
-
-#if defined(CONFIG_PM_OPP) && defined(CONFIG_OF)
-int of_init_opp_table(struct device *dev);
-#else
-static inline int of_init_opp_table(struct device *dev)
-{
-	return -EINVAL;
-}
-#endif
-
-#if defined(CONFIG_CPU_FREQ) && defined(CONFIG_PM_OPP)
-int dev_pm_opp_init_cpufreq_table(struct device *dev,
-			    struct cpufreq_frequency_table **table);
-void dev_pm_opp_free_cpufreq_table(struct device *dev,
-				struct cpufreq_frequency_table **table);
-#else
-static inline int dev_pm_opp_init_cpufreq_table(struct device *dev,
-			    struct cpufreq_frequency_table **table)
-{
-	return -EINVAL;
-}
-
-static inline
-void dev_pm_opp_free_cpufreq_table(struct device *dev,
-				struct cpufreq_frequency_table **table)
-{
-}
-#endif		/* CONFIG_CPU_FREQ */
-
-#endif		/* __LINUX_OPP_H__ */
diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h
new file mode 100644
index 000000000000..5151b0059585
--- /dev/null
+++ b/include/linux/pm_opp.h
@@ -0,0 +1,139 @@
+/*
+ * Generic OPP Interface
+ *
+ * Copyright (C) 2009-2010 Texas Instruments Incorporated.
+ *	Nishanth Menon
+ *	Romit Dasgupta
+ *	Kevin Hilman
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __LINUX_OPP_H__
+#define __LINUX_OPP_H__
+
+#include <linux/err.h>
+#include <linux/cpufreq.h>
+#include <linux/notifier.h>
+
+struct dev_pm_opp;
+struct device;
+
+enum dev_pm_opp_event {
+	OPP_EVENT_ADD, OPP_EVENT_ENABLE, OPP_EVENT_DISABLE,
+};
+
+#if defined(CONFIG_PM_OPP)
+
+unsigned long dev_pm_opp_get_voltage(struct dev_pm_opp *opp);
+
+unsigned long dev_pm_opp_get_freq(struct dev_pm_opp *opp);
+
+int dev_pm_opp_get_opp_count(struct device *dev);
+
+struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev,
+					      unsigned long freq,
+					      bool available);
+
+struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev,
+					      unsigned long *freq);
+
+struct dev_pm_opp *dev_pm_opp_find_freq_ceil(struct device *dev,
+					     unsigned long *freq);
+
+int dev_pm_opp_add(struct device *dev, unsigned long freq,
+		   unsigned long u_volt);
+
+int dev_pm_opp_enable(struct device *dev, unsigned long freq);
+
+int dev_pm_opp_disable(struct device *dev, unsigned long freq);
+
+struct srcu_notifier_head *dev_pm_opp_get_notifier(struct device *dev);
+#else
+static inline unsigned long dev_pm_opp_get_voltage(struct dev_pm_opp *opp)
+{
+	return 0;
+}
+
+static inline unsigned long dev_pm_opp_get_freq(struct dev_pm_opp *opp)
+{
+	return 0;
+}
+
+static inline int dev_pm_opp_get_opp_count(struct device *dev)
+{
+	return 0;
+}
+
+static inline struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev,
+					unsigned long freq, bool available)
+{
+	return ERR_PTR(-EINVAL);
+}
+
+static inline struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev,
+					unsigned long *freq)
+{
+	return ERR_PTR(-EINVAL);
+}
+
+static inline struct dev_pm_opp *dev_pm_opp_find_freq_ceil(struct device *dev,
+					unsigned long *freq)
+{
+	return ERR_PTR(-EINVAL);
+}
+
+static inline int dev_pm_opp_add(struct device *dev, unsigned long freq,
+					unsigned long u_volt)
+{
+	return -EINVAL;
+}
+
+static inline int dev_pm_opp_enable(struct device *dev, unsigned long freq)
+{
+	return 0;
+}
+
+static inline int dev_pm_opp_disable(struct device *dev, unsigned long freq)
+{
+	return 0;
+}
+
+static inline struct srcu_notifier_head *dev_pm_opp_get_notifier(
+							struct device *dev)
+{
+	return ERR_PTR(-EINVAL);
+}
+#endif		/* CONFIG_PM_OPP */
+
+#if defined(CONFIG_PM_OPP) && defined(CONFIG_OF)
+int of_init_opp_table(struct device *dev);
+#else
+static inline int of_init_opp_table(struct device *dev)
+{
+	return -EINVAL;
+}
+#endif
+
+#if defined(CONFIG_CPU_FREQ) && defined(CONFIG_PM_OPP)
+int dev_pm_opp_init_cpufreq_table(struct device *dev,
+			    struct cpufreq_frequency_table **table);
+void dev_pm_opp_free_cpufreq_table(struct device *dev,
+				struct cpufreq_frequency_table **table);
+#else
+static inline int dev_pm_opp_init_cpufreq_table(struct device *dev,
+			    struct cpufreq_frequency_table **table)
+{
+	return -EINVAL;
+}
+
+static inline
+void dev_pm_opp_free_cpufreq_table(struct device *dev,
+				struct cpufreq_frequency_table **table)
+{
+}
+#endif		/* CONFIG_CPU_FREQ */
+
+#endif		/* __LINUX_OPP_H__ */
-- 
cgit v1.2.3


From 9c0ebcf78fde0ffa348a95a544c6d3f2dac5af65 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Fri, 25 Oct 2013 19:45:48 +0530
Subject: cpufreq: Implement light weight ->target_index() routine

Currently, the prototype of cpufreq_drivers target routines is:

int target(struct cpufreq_policy *policy, unsigned int target_freq,
		unsigned int relation);

And most of the drivers call cpufreq_frequency_table_target() to get a valid
index of their frequency table which is closest to the target_freq. And they
don't use target_freq and relation after that.

So, it makes sense to just do this work in cpufreq core before calling
cpufreq_frequency_table_target() and simply pass index instead. But this can be
done only with drivers which expose their frequency table with cpufreq core. For
others we need to stick with the old prototype of target() until those drivers
are converted to expose frequency tables.

This patch implements the new light weight prototype for target_index() routine.
It looks like this:

int target_index(struct cpufreq_policy *policy, unsigned int index);

CPUFreq core will call cpufreq_frequency_table_target() before calling this
routine and pass index to it. Because CPUFreq core now requires to call routines
present in freq_table.c CONFIG_CPU_FREQ_TABLE must be enabled all the time.

This also marks target() interface as deprecated. So, that new drivers avoid
using it. And Documentation is updated accordingly.

It also converts existing .target() to newly defined light weight
.target_index() routine for many driver.

Acked-by: Hans-Christian Egtvedt <egtvedt@samfundet.no>
Acked-by: Jesper Nilsson <jesper.nilsson@axis.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Russell King <linux@arm.linux.org.uk>
Acked-by: David S. Miller <davem@davemloft.net>
Tested-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rjw@rjwysocki.net>
---
 Documentation/cpu-freq/cpu-drivers.txt | 27 ++++++++++------
 Documentation/cpu-freq/governors.txt   |  4 +--
 arch/arm/mach-sa1100/generic.c         | 20 ------------
 arch/arm/mach-sa1100/generic.h         |  2 --
 drivers/cpufreq/acpi-cpufreq.c         | 21 ++++---------
 drivers/cpufreq/arm_big_little.c       | 17 +++-------
 drivers/cpufreq/at32ap-cpufreq.c       | 23 +++-----------
 drivers/cpufreq/blackfin-cpufreq.c     | 17 +++-------
 drivers/cpufreq/cpufreq-cpu0.c         | 17 ++--------
 drivers/cpufreq/cpufreq.c              | 57 +++++++++++++++++++++++++++-------
 drivers/cpufreq/cris-artpec3-cpufreq.c | 18 ++---------
 drivers/cpufreq/cris-etraxfs-cpufreq.c | 17 ++--------
 drivers/cpufreq/davinci-cpufreq.c      | 16 ++--------
 drivers/cpufreq/dbx500-cpufreq.c       | 16 ++--------
 drivers/cpufreq/e_powersaver.c         | 17 ++--------
 drivers/cpufreq/elanfreq.c             | 34 ++------------------
 drivers/cpufreq/exynos-cpufreq.c       | 21 ++-----------
 drivers/cpufreq/exynos5440-cpufreq.c   | 21 +++----------
 drivers/cpufreq/ia64-acpi-cpufreq.c    | 21 ++-----------
 drivers/cpufreq/imx6q-cpufreq.c        | 17 ++--------
 drivers/cpufreq/kirkwood-cpufreq.c     | 19 ++----------
 drivers/cpufreq/longhaul.c             | 13 ++------
 drivers/cpufreq/loongson2_cpufreq.c    | 21 +++----------
 drivers/cpufreq/maple-cpufreq.c        | 16 +++-------
 drivers/cpufreq/omap-cpufreq.c         | 31 ++----------------
 drivers/cpufreq/p4-clockmod.c          | 18 +++--------
 drivers/cpufreq/pasemi-cpufreq.c       | 12 ++-----
 drivers/cpufreq/pmac32-cpufreq.c       | 12 ++-----
 drivers/cpufreq/pmac64-cpufreq.c       | 17 +++-------
 drivers/cpufreq/powernow-k6.c          | 35 +++------------------
 drivers/cpufreq/powernow-k7.c          | 22 +++----------
 drivers/cpufreq/powernow-k8.c          | 24 +++++---------
 drivers/cpufreq/ppc-corenet-cpufreq.c  | 15 +++------
 drivers/cpufreq/ppc_cbe_cpufreq.c      | 12 ++-----
 drivers/cpufreq/pxa2xx-cpufreq.c       | 13 ++------
 drivers/cpufreq/pxa3xx-cpufreq.c       | 17 ++--------
 drivers/cpufreq/s3c2416-cpufreq.c      | 17 +++-------
 drivers/cpufreq/s3c64xx-cpufreq.c      | 18 +++--------
 drivers/cpufreq/s5pv210-cpufreq.c      | 54 +++++++++-----------------------
 drivers/cpufreq/sa1100-cpufreq.c       | 24 +++-----------
 drivers/cpufreq/sa1110-cpufreq.c       | 26 +++-------------
 drivers/cpufreq/sc520_freq.c           | 19 ++----------
 drivers/cpufreq/sparc-us2e-cpufreq.c   | 21 ++-----------
 drivers/cpufreq/sparc-us3-cpufreq.c    | 23 ++------------
 drivers/cpufreq/spear-cpufreq.c        | 12 +++----
 drivers/cpufreq/speedstep-centrino.c   | 27 +++++-----------
 drivers/cpufreq/speedstep-ich.c        | 24 ++++----------
 drivers/cpufreq/speedstep-smi.c        | 20 +++---------
 drivers/cpufreq/tegra-cpufreq.c        | 12 ++-----
 include/linux/cpufreq.h                |  4 ++-
 50 files changed, 242 insertions(+), 759 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/cpu-freq/cpu-drivers.txt b/Documentation/cpu-freq/cpu-drivers.txt
index 40282e617913..8b1a4451422e 100644
--- a/Documentation/cpu-freq/cpu-drivers.txt
+++ b/Documentation/cpu-freq/cpu-drivers.txt
@@ -23,8 +23,8 @@ Contents:
 1.1  Initialization
 1.2  Per-CPU Initialization
 1.3  verify
-1.4  target or setpolicy?
-1.5  target
+1.4  target/target_index or setpolicy?
+1.5  target/target_index
 1.6  setpolicy
 2.   Frequency Table Helpers
 
@@ -56,7 +56,8 @@ cpufreq_driver.init -		A pointer to the per-CPU initialization
 cpufreq_driver.verify -		A pointer to a "verification" function.
 
 cpufreq_driver.setpolicy _or_ 
-cpufreq_driver.target -		See below on the differences.
+cpufreq_driver.target/
+target_index		-	See below on the differences.
 
 And optionally
 
@@ -66,7 +67,7 @@ cpufreq_driver.resume -		A pointer to a per-CPU resume function
 				which is called with interrupts disabled
 				and _before_ the pre-suspend frequency
 				and/or policy is restored by a call to
-				->target or ->setpolicy.
+				->target/target_index or ->setpolicy.
 
 cpufreq_driver.attr -		A pointer to a NULL-terminated list of
 				"struct freq_attr" which allow to
@@ -103,8 +104,8 @@ policy->governor		must contain the "default policy" for
 				this CPU. A few moments later,
 				cpufreq_driver.verify and either
 				cpufreq_driver.setpolicy or
-				cpufreq_driver.target is called with
-				these values.
+				cpufreq_driver.target/target_index is called
+				with these values.
 
 For setting some of these values (cpuinfo.min[max]_freq, policy->min[max]), the
 frequency table helpers might be helpful. See the section 2 for more information
@@ -133,20 +134,28 @@ range) is within policy->min and policy->max. If necessary, increase
 policy->max first, and only if this is no solution, decrease policy->min.
 
 
-1.4 target or setpolicy?
+1.4 target/target_index or setpolicy?
 ----------------------------
 
 Most cpufreq drivers or even most cpu frequency scaling algorithms 
 only allow the CPU to be set to one frequency. For these, you use the
-->target call.
+->target/target_index call.
 
 Some cpufreq-capable processors switch the frequency between certain
 limits on their own. These shall use the ->setpolicy call
 
 
-1.4. target
+1.4. target/target_index
 -------------
 
+The target_index call has two arguments: struct cpufreq_policy *policy,
+and unsigned int index (into the exposed frequency table).
+
+The CPUfreq driver must set the new frequency when called here. The
+actual frequency must be determined by freq_table[index].frequency.
+
+Deprecated:
+----------
 The target call has three arguments: struct cpufreq_policy *policy,
 unsigned int target_frequency, unsigned int relation.
 
diff --git a/Documentation/cpu-freq/governors.txt b/Documentation/cpu-freq/governors.txt
index 219970ba54b7..77ec21574fb1 100644
--- a/Documentation/cpu-freq/governors.txt
+++ b/Documentation/cpu-freq/governors.txt
@@ -40,7 +40,7 @@ Most cpufreq drivers (in fact, all except one, longrun) or even most
 cpu frequency scaling algorithms only offer the CPU to be set to one
 frequency. In order to offer dynamic frequency scaling, the cpufreq
 core must be able to tell these drivers of a "target frequency". So
-these specific drivers will be transformed to offer a "->target"
+these specific drivers will be transformed to offer a "->target/target_index"
 call instead of the existing "->setpolicy" call. For "longrun", all
 stays the same, though.
 
@@ -71,7 +71,7 @@ CPU can be set to switch independently	 |	   CPU can only be set
 		    /			       the limits of policy->{min,max}
 		   /			            \
 		  /				     \
-	Using the ->setpolicy call,		 Using the ->target call,
+	Using the ->setpolicy call,		 Using the ->target/target_index call,
 	    the limits and the			  the frequency closest
 	     "policy" is set.			  to target_freq is set.
 						  It is assured that it
diff --git a/arch/arm/mach-sa1100/generic.c b/arch/arm/mach-sa1100/generic.c
index cb4b2ca3cf6b..d4ea142c4edd 100644
--- a/arch/arm/mach-sa1100/generic.c
+++ b/arch/arm/mach-sa1100/generic.c
@@ -62,26 +62,6 @@ struct cpufreq_frequency_table sa11x0_freq_table[NR_FREQS+1] = {
 	{ .frequency = CPUFREQ_TABLE_END, },
 };
 
-/* rounds up(!)  */
-unsigned int sa11x0_freq_to_ppcr(unsigned int khz)
-{
-	int i;
-
-	for (i = 0; i < NR_FREQS; i++)
-		if (sa11x0_freq_table[i].frequency >= khz)
-			break;
-
-	return i;
-}
-
-unsigned int sa11x0_ppcr_to_freq(unsigned int idx)
-{
-	unsigned int freq = 0;
-	if (idx < NR_FREQS)
-		freq = sa11x0_freq_table[idx].frequency;
-	return freq;
-}
-
 unsigned int sa11x0_getspeed(unsigned int cpu)
 {
 	if (cpu)
diff --git a/arch/arm/mach-sa1100/generic.h b/arch/arm/mach-sa1100/generic.h
index cbdfae744dc5..0d92e119b36b 100644
--- a/arch/arm/mach-sa1100/generic.h
+++ b/arch/arm/mach-sa1100/generic.h
@@ -21,9 +21,7 @@ extern void sa1110_mb_enable(void);
 extern void sa1110_mb_disable(void);
 
 extern struct cpufreq_frequency_table sa11x0_freq_table[];
-extern unsigned int sa11x0_freq_to_ppcr(unsigned int khz);
 extern unsigned int sa11x0_getspeed(unsigned int cpu);
-extern unsigned int sa11x0_ppcr_to_freq(unsigned int idx);
 
 struct flash_platform_data;
 struct resource;
diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c
index a1717d7367c1..889cec0a9022 100644
--- a/drivers/cpufreq/acpi-cpufreq.c
+++ b/drivers/cpufreq/acpi-cpufreq.c
@@ -424,17 +424,17 @@ static unsigned int check_freqs(const struct cpumask *mask, unsigned int freq,
 }
 
 static int acpi_cpufreq_target(struct cpufreq_policy *policy,
-			       unsigned int target_freq, unsigned int relation)
+			       unsigned int index)
 {
 	struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
 	struct acpi_processor_performance *perf;
 	struct cpufreq_freqs freqs;
 	struct drv_cmd cmd;
-	unsigned int next_state = 0; /* Index into freq_table */
 	unsigned int next_perf_state = 0; /* Index into perf table */
 	int result = 0;
 
-	pr_debug("acpi_cpufreq_target %d (%d)\n", target_freq, policy->cpu);
+	pr_debug("acpi_cpufreq_target %d (%d)\n",
+			data->freq_table[index].frequency, policy->cpu);
 
 	if (unlikely(data == NULL ||
 	     data->acpi_data == NULL || data->freq_table == NULL)) {
@@ -442,16 +442,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
 	}
 
 	perf = data->acpi_data;
-	result = cpufreq_frequency_table_target(policy,
-						data->freq_table,
-						target_freq,
-						relation, &next_state);
-	if (unlikely(result)) {
-		result = -ENODEV;
-		goto out;
-	}
-
-	next_perf_state = data->freq_table[next_state].driver_data;
+	next_perf_state = data->freq_table[index].driver_data;
 	if (perf->state == next_perf_state) {
 		if (unlikely(data->resume)) {
 			pr_debug("Called after resume, resetting to P%d\n",
@@ -493,7 +484,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
 		cmd.mask = cpumask_of(policy->cpu);
 
 	freqs.old = perf->states[perf->state].core_frequency * 1000;
-	freqs.new = data->freq_table[next_state].frequency;
+	freqs.new = data->freq_table[index].frequency;
 	cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE);
 
 	drv_write(&cmd);
@@ -923,7 +914,7 @@ static struct freq_attr *acpi_cpufreq_attr[] = {
 
 static struct cpufreq_driver acpi_cpufreq_driver = {
 	.verify		= cpufreq_generic_frequency_table_verify,
-	.target		= acpi_cpufreq_target,
+	.target_index	= acpi_cpufreq_target,
 	.bios_limit	= acpi_processor_get_bios_limit,
 	.init		= acpi_cpufreq_cpu_init,
 	.exit		= acpi_cpufreq_cpu_exit,
diff --git a/drivers/cpufreq/arm_big_little.c b/drivers/cpufreq/arm_big_little.c
index 086f7c17ff58..163e3378fe17 100644
--- a/drivers/cpufreq/arm_big_little.c
+++ b/drivers/cpufreq/arm_big_little.c
@@ -49,28 +49,21 @@ static unsigned int bL_cpufreq_get(unsigned int cpu)
 
 /* Set clock frequency */
 static int bL_cpufreq_set_target(struct cpufreq_policy *policy,
-		unsigned int target_freq, unsigned int relation)
+		unsigned int index)
 {
 	struct cpufreq_freqs freqs;
-	u32 cpu = policy->cpu, freq_tab_idx, cur_cluster;
+	u32 cpu = policy->cpu, cur_cluster;
 	int ret = 0;
 
 	cur_cluster = cpu_to_cluster(policy->cpu);
 
 	freqs.old = bL_cpufreq_get(policy->cpu);
-
-	/* Determine valid target frequency using freq_table */
-	cpufreq_frequency_table_target(policy, freq_table[cur_cluster],
-			target_freq, relation, &freq_tab_idx);
-	freqs.new = freq_table[cur_cluster][freq_tab_idx].frequency;
+	freqs.new = freq_table[cur_cluster][index].frequency;
 
 	pr_debug("%s: cpu: %d, cluster: %d, oldfreq: %d, target freq: %d, new freq: %d\n",
-			__func__, cpu, cur_cluster, freqs.old, target_freq,
+			__func__, cpu, cur_cluster, freqs.old, freqs.new,
 			freqs.new);
 
-	if (freqs.old == freqs.new)
-		return 0;
-
 	cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE);
 
 	ret = clk_set_rate(clk[cur_cluster], freqs.new * 1000);
@@ -200,7 +193,7 @@ static struct cpufreq_driver bL_cpufreq_driver = {
 	.flags			= CPUFREQ_STICKY |
 					CPUFREQ_HAVE_GOVERNOR_PER_POLICY,
 	.verify			= cpufreq_generic_frequency_table_verify,
-	.target			= bL_cpufreq_set_target,
+	.target_index		= bL_cpufreq_set_target,
 	.get			= bL_cpufreq_get,
 	.init			= bL_cpufreq_init,
 	.exit			= bL_cpufreq_exit,
diff --git a/drivers/cpufreq/at32ap-cpufreq.c b/drivers/cpufreq/at32ap-cpufreq.c
index 7439deddd5cf..81d07527bde6 100644
--- a/drivers/cpufreq/at32ap-cpufreq.c
+++ b/drivers/cpufreq/at32ap-cpufreq.c
@@ -35,25 +35,12 @@ static unsigned int at32_get_speed(unsigned int cpu)
 static unsigned int	ref_freq;
 static unsigned long	loops_per_jiffy_ref;
 
-static int at32_set_target(struct cpufreq_policy *policy,
-			  unsigned int target_freq,
-			  unsigned int relation)
+static int at32_set_target(struct cpufreq_policy *policy, unsigned int index)
 {
 	struct cpufreq_freqs freqs;
-	long freq;
-
-	/* Convert target_freq from kHz to Hz */
-	freq = clk_round_rate(cpuclk, target_freq * 1000);
-
-	/* Check if policy->min <= new_freq <= policy->max */
-	if(freq < (policy->min * 1000) || freq > (policy->max * 1000))
-		return -EINVAL;
-
-	pr_debug("cpufreq: requested frequency %u Hz\n", target_freq * 1000);
 
 	freqs.old = at32_get_speed(0);
-	freqs.new = (freq + 500) / 1000;
-	freqs.flags = 0;
+	freqs.new = freq_table[index].frequency;
 
 	if (!ref_freq) {
 		ref_freq = freqs.old;
@@ -64,13 +51,13 @@ static int at32_set_target(struct cpufreq_policy *policy,
 	if (freqs.old < freqs.new)
 		boot_cpu_data.loops_per_jiffy = cpufreq_scale(
 				loops_per_jiffy_ref, ref_freq, freqs.new);
-	clk_set_rate(cpuclk, freq);
+	clk_set_rate(cpuclk, freqs.new * 1000);
 	if (freqs.new < freqs.old)
 		boot_cpu_data.loops_per_jiffy = cpufreq_scale(
 				loops_per_jiffy_ref, ref_freq, freqs.new);
 	cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE);
 
-	pr_debug("cpufreq: set frequency %lu Hz\n", freq);
+	pr_debug("cpufreq: set frequency %u Hz\n", freqs.new * 1000);
 
 	return 0;
 }
@@ -139,7 +126,7 @@ static struct cpufreq_driver at32_driver = {
 	.name		= "at32ap",
 	.init		= at32_cpufreq_driver_init,
 	.verify		= cpufreq_generic_frequency_table_verify,
-	.target		= at32_set_target,
+	.target_index	= at32_set_target,
 	.get		= at32_get_speed,
 	.flags		= CPUFREQ_STICKY,
 };
diff --git a/drivers/cpufreq/blackfin-cpufreq.c b/drivers/cpufreq/blackfin-cpufreq.c
index 0bc9e8c2c79b..12528b28d45d 100644
--- a/drivers/cpufreq/blackfin-cpufreq.c
+++ b/drivers/cpufreq/blackfin-cpufreq.c
@@ -127,14 +127,11 @@ unsigned long cpu_set_cclk(int cpu, unsigned long new)
 }
 #endif
 
-static int bfin_target(struct cpufreq_policy *policy,
-			unsigned int target_freq, unsigned int relation)
+static int bfin_target(struct cpufreq_policy *policy, unsigned int index)
 {
 #ifndef CONFIG_BF60x
 	unsigned int plldiv;
 #endif
-	unsigned int index;
-	unsigned long cclk_hz;
 	struct cpufreq_freqs freqs;
 	static unsigned long lpj_ref;
 	static unsigned int  lpj_ref_freq;
@@ -144,17 +141,11 @@ static int bfin_target(struct cpufreq_policy *policy,
 	cycles_t cycles;
 #endif
 
-	if (cpufreq_frequency_table_target(policy, bfin_freq_table, target_freq,
-				relation, &index))
-		return -EINVAL;
-
-	cclk_hz = bfin_freq_table[index].frequency;
-
 	freqs.old = bfin_getfreq_khz(0);
-	freqs.new = cclk_hz;
+	freqs.new = bfin_freq_table[index].frequency;
 
 	pr_debug("cpufreq: changing cclk to %lu; target = %u, oldfreq = %u\n",
-			cclk_hz, target_freq, freqs.old);
+			freqs.new, freqs.new, freqs.old);
 
 	cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE);
 #ifndef CONFIG_BF60x
@@ -209,7 +200,7 @@ static int __bfin_cpu_init(struct cpufreq_policy *policy)
 
 static struct cpufreq_driver bfin_driver = {
 	.verify = cpufreq_generic_frequency_table_verify,
-	.target = bfin_target,
+	.target_index = bfin_target,
 	.get = bfin_getfreq_khz,
 	.init = __bfin_cpu_init,
 	.exit = cpufreq_generic_exit,
diff --git a/drivers/cpufreq/cpufreq-cpu0.c b/drivers/cpufreq/cpufreq-cpu0.c
index 33ab6504c447..4dbe7efd86e5 100644
--- a/drivers/cpufreq/cpufreq-cpu0.c
+++ b/drivers/cpufreq/cpufreq-cpu0.c
@@ -35,24 +35,14 @@ static unsigned int cpu0_get_speed(unsigned int cpu)
 	return clk_get_rate(cpu_clk) / 1000;
 }
 
-static int cpu0_set_target(struct cpufreq_policy *policy,
-			   unsigned int target_freq, unsigned int relation)
+static int cpu0_set_target(struct cpufreq_policy *policy, unsigned int index)
 {
 	struct cpufreq_freqs freqs;
 	struct dev_pm_opp *opp;
 	unsigned long volt = 0, volt_old = 0, tol = 0;
 	long freq_Hz, freq_exact;
-	unsigned int index;
 	int ret;
 
-	ret = cpufreq_frequency_table_target(policy, freq_table, target_freq,
-					     relation, &index);
-	if (ret) {
-		pr_err("failed to match target freqency %d: %d\n",
-		       target_freq, ret);
-		return ret;
-	}
-
 	freq_Hz = clk_round_rate(cpu_clk, freq_table[index].frequency * 1000);
 	if (freq_Hz < 0)
 		freq_Hz = freq_table[index].frequency * 1000;
@@ -60,9 +50,6 @@ static int cpu0_set_target(struct cpufreq_policy *policy,
 	freqs.new = freq_Hz / 1000;
 	freqs.old = clk_get_rate(cpu_clk) / 1000;
 
-	if (freqs.old == freqs.new)
-		return 0;
-
 	cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE);
 
 	if (!IS_ERR(cpu_reg)) {
@@ -128,7 +115,7 @@ static int cpu0_cpufreq_init(struct cpufreq_policy *policy)
 static struct cpufreq_driver cpu0_cpufreq_driver = {
 	.flags = CPUFREQ_STICKY,
 	.verify = cpufreq_generic_frequency_table_verify,
-	.target = cpu0_set_target,
+	.target_index = cpu0_set_target,
 	.get = cpu0_get_speed,
 	.init = cpu0_cpufreq_init,
 	.exit = cpufreq_generic_exit,
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index ec391d7f010b..87ed83a6c894 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -47,6 +47,11 @@ static LIST_HEAD(cpufreq_policy_list);
 static DEFINE_PER_CPU(char[CPUFREQ_NAME_LEN], cpufreq_cpu_governor);
 #endif
 
+static inline bool has_target(void)
+{
+	return cpufreq_driver->target_index || cpufreq_driver->target;
+}
+
 /*
  * cpu_policy_rwsem is a per CPU reader-writer semaphore designed to cure
  * all cpufreq/hotplug/workqueue/etc related lock issues.
@@ -392,7 +397,7 @@ static int cpufreq_parse_governor(char *str_governor, unsigned int *policy,
 			*policy = CPUFREQ_POLICY_POWERSAVE;
 			err = 0;
 		}
-	} else if (cpufreq_driver->target) {
+	} else if (has_target()) {
 		struct cpufreq_governor *t;
 
 		mutex_lock(&cpufreq_governor_mutex);
@@ -550,7 +555,7 @@ static ssize_t show_scaling_available_governors(struct cpufreq_policy *policy,
 	ssize_t i = 0;
 	struct cpufreq_governor *t;
 
-	if (!cpufreq_driver->target) {
+	if (!has_target()) {
 		i += sprintf(buf, "performance powersave");
 		goto out;
 	}
@@ -835,7 +840,7 @@ static int cpufreq_add_dev_interface(struct cpufreq_policy *policy,
 		if (ret)
 			goto err_out_kobj_put;
 	}
-	if (cpufreq_driver->target) {
+	if (has_target()) {
 		ret = sysfs_create_file(&policy->kobj, &scaling_cur_freq.attr);
 		if (ret)
 			goto err_out_kobj_put;
@@ -884,10 +889,10 @@ static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy,
 				  unsigned int cpu, struct device *dev,
 				  bool frozen)
 {
-	int ret = 0, has_target = !!cpufreq_driver->target;
+	int ret = 0;
 	unsigned long flags;
 
-	if (has_target) {
+	if (has_target()) {
 		ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP);
 		if (ret) {
 			pr_err("%s: Failed to stop governor\n", __func__);
@@ -905,7 +910,7 @@ static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy,
 
 	unlock_policy_rwsem_write(policy->cpu);
 
-	if (has_target) {
+	if (has_target()) {
 		if ((ret = __cpufreq_governor(policy, CPUFREQ_GOV_START)) ||
 			(ret = __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS))) {
 			pr_err("%s: Failed to start governor\n", __func__);
@@ -1215,7 +1220,7 @@ static int __cpufreq_remove_dev_prepare(struct device *dev,
 		return -EINVAL;
 	}
 
-	if (cpufreq_driver->target) {
+	if (has_target()) {
 		ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP);
 		if (ret) {
 			pr_err("%s: Failed to stop governor\n", __func__);
@@ -1280,7 +1285,7 @@ static int __cpufreq_remove_dev_finish(struct device *dev,
 
 	/* If cpu is last user of policy, free policy */
 	if (cpus == 1) {
-		if (cpufreq_driver->target) {
+		if (has_target()) {
 			ret = __cpufreq_governor(policy,
 					CPUFREQ_GOV_POLICY_EXIT);
 			if (ret) {
@@ -1323,7 +1328,7 @@ static int __cpufreq_remove_dev_finish(struct device *dev,
 		if (!frozen)
 			cpufreq_policy_free(policy);
 	} else {
-		if (cpufreq_driver->target) {
+		if (has_target()) {
 			if ((ret = __cpufreq_governor(policy, CPUFREQ_GOV_START)) ||
 					(ret = __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS))) {
 				pr_err("%s: Failed to start governor\n",
@@ -1694,12 +1699,41 @@ int __cpufreq_driver_target(struct cpufreq_policy *policy,
 	pr_debug("target for CPU %u: %u kHz, relation %u, requested %u kHz\n",
 			policy->cpu, target_freq, relation, old_target_freq);
 
+	/*
+	 * This might look like a redundant call as we are checking it again
+	 * after finding index. But it is left intentionally for cases where
+	 * exactly same freq is called again and so we can save on few function
+	 * calls.
+	 */
 	if (target_freq == policy->cur)
 		return 0;
 
 	if (cpufreq_driver->target)
 		retval = cpufreq_driver->target(policy, target_freq, relation);
+	else if (cpufreq_driver->target_index) {
+		struct cpufreq_frequency_table *freq_table;
+		int index;
 
+		freq_table = cpufreq_frequency_get_table(policy->cpu);
+		if (unlikely(!freq_table)) {
+			pr_err("%s: Unable to find freq_table\n", __func__);
+			goto out;
+		}
+
+		retval = cpufreq_frequency_table_target(policy, freq_table,
+				target_freq, relation, &index);
+		if (unlikely(retval)) {
+			pr_err("%s: Unable to find matching freq\n", __func__);
+			goto out;
+		}
+
+		if (freq_table[index].frequency == policy->cur)
+			retval = 0;
+		else
+			retval = cpufreq_driver->target_index(policy, index);
+	}
+
+out:
 	return retval;
 }
 EXPORT_SYMBOL_GPL(__cpufreq_driver_target);
@@ -2025,7 +2059,7 @@ int cpufreq_update_policy(unsigned int cpu)
 			pr_debug("Driver did not initialize current freq");
 			policy->cur = new_policy.cur;
 		} else {
-			if (policy->cur != new_policy.cur && cpufreq_driver->target)
+			if (policy->cur != new_policy.cur && has_target())
 				cpufreq_out_of_sync(cpu, policy->cur,
 								new_policy.cur);
 		}
@@ -2103,7 +2137,8 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data)
 		return -ENODEV;
 
 	if (!driver_data || !driver_data->verify || !driver_data->init ||
-	    ((!driver_data->setpolicy) && (!driver_data->target)))
+	    !(driver_data->setpolicy || driver_data->target_index ||
+		    driver_data->target))
 		return -EINVAL;
 
 	pr_debug("trying to register driver %s\n", driver_data->name);
diff --git a/drivers/cpufreq/cris-artpec3-cpufreq.c b/drivers/cpufreq/cris-artpec3-cpufreq.c
index 05fdc7e40257..841857cf1562 100644
--- a/drivers/cpufreq/cris-artpec3-cpufreq.c
+++ b/drivers/cpufreq/cris-artpec3-cpufreq.c
@@ -27,8 +27,7 @@ static unsigned int cris_freq_get_cpu_frequency(unsigned int cpu)
 	return clk_ctrl.pll ? 200000 : 6000;
 }
 
-static void cris_freq_set_cpu_state(struct cpufreq_policy *policy,
-		unsigned int state)
+static int cris_freq_target(struct cpufreq_policy *policy, unsigned int state)
 {
 	struct cpufreq_freqs freqs;
 	reg_clkgen_rw_clk_ctrl clk_ctrl;
@@ -52,19 +51,6 @@ static void cris_freq_set_cpu_state(struct cpufreq_policy *policy,
 	local_irq_enable();
 
 	cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE);
-};
-
-static int cris_freq_target(struct cpufreq_policy *policy,
-			    unsigned int target_freq,
-			    unsigned int relation)
-{
-	unsigned int newstate = 0;
-
-	if (cpufreq_frequency_table_target(policy, cris_freq_table,
-			target_freq, relation, &newstate))
-		return -EINVAL;
-
-	cris_freq_set_cpu_state(policy, newstate);
 
 	return 0;
 }
@@ -77,7 +63,7 @@ static int cris_freq_cpu_init(struct cpufreq_policy *policy)
 static struct cpufreq_driver cris_freq_driver = {
 	.get	= cris_freq_get_cpu_frequency,
 	.verify	= cpufreq_generic_frequency_table_verify,
-	.target	= cris_freq_target,
+	.target_index = cris_freq_target,
 	.init	= cris_freq_cpu_init,
 	.exit	= cpufreq_generic_exit,
 	.name	= "cris_freq",
diff --git a/drivers/cpufreq/cris-etraxfs-cpufreq.c b/drivers/cpufreq/cris-etraxfs-cpufreq.c
index fac2b26932dd..c58811abd961 100644
--- a/drivers/cpufreq/cris-etraxfs-cpufreq.c
+++ b/drivers/cpufreq/cris-etraxfs-cpufreq.c
@@ -27,8 +27,7 @@ static unsigned int cris_freq_get_cpu_frequency(unsigned int cpu)
 	return clk_ctrl.pll ? 200000 : 6000;
 }
 
-static void cris_freq_set_cpu_state(struct cpufreq_policy *policy,
-		unsigned int state)
+static int cris_freq_target(struct cpufreq_policy *policy, unsigned int state)
 {
 	struct cpufreq_freqs freqs;
 	reg_config_rw_clk_ctrl clk_ctrl;
@@ -52,18 +51,6 @@ static void cris_freq_set_cpu_state(struct cpufreq_policy *policy,
 	local_irq_enable();
 
 	cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE);
-};
-
-static int cris_freq_target(struct cpufreq_policy *policy,
-			    unsigned int target_freq, unsigned int relation)
-{
-	unsigned int newstate = 0;
-
-	if (cpufreq_frequency_table_target
-	    (policy, cris_freq_table, target_freq, relation, &newstate))
-		return -EINVAL;
-
-	cris_freq_set_cpu_state(policy, newstate);
 
 	return 0;
 }
@@ -76,7 +63,7 @@ static int cris_freq_cpu_init(struct cpufreq_policy *policy)
 static struct cpufreq_driver cris_freq_driver = {
 	.get = cris_freq_get_cpu_frequency,
 	.verify = cpufreq_generic_frequency_table_verify,
-	.target = cris_freq_target,
+	.target_index = cris_freq_target,
 	.init = cris_freq_cpu_init,
 	.exit = cpufreq_generic_exit,
 	.name = "cris_freq",
diff --git a/drivers/cpufreq/davinci-cpufreq.c b/drivers/cpufreq/davinci-cpufreq.c
index 972583baf9e8..1f5d8a569c77 100644
--- a/drivers/cpufreq/davinci-cpufreq.c
+++ b/drivers/cpufreq/davinci-cpufreq.c
@@ -66,28 +66,18 @@ static unsigned int davinci_getspeed(unsigned int cpu)
 	return clk_get_rate(cpufreq.armclk) / 1000;
 }
 
-static int davinci_target(struct cpufreq_policy *policy,
-				unsigned int target_freq, unsigned int relation)
+static int davinci_target(struct cpufreq_policy *policy, unsigned int idx)
 {
 	int ret = 0;
-	unsigned int idx;
 	struct cpufreq_freqs freqs;
 	struct davinci_cpufreq_config *pdata = cpufreq.dev->platform_data;
 	struct clk *armclk = cpufreq.armclk;
 
 	freqs.old = davinci_getspeed(0);
-	freqs.new = clk_round_rate(armclk, target_freq * 1000) / 1000;
-
-	if (freqs.old == freqs.new)
-		return ret;
+	freqs.new = pdata->freq_table[idx].frequency;
 
 	dev_dbg(cpufreq.dev, "transition: %u --> %u\n", freqs.old, freqs.new);
 
-	ret = cpufreq_frequency_table_target(policy, pdata->freq_table,
-						freqs.new, relation, &idx);
-	if (ret)
-		return -EINVAL;
-
 	cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE);
 
 	/* if moving to higher frequency, up the voltage beforehand */
@@ -148,7 +138,7 @@ static int davinci_cpu_init(struct cpufreq_policy *policy)
 static struct cpufreq_driver davinci_driver = {
 	.flags		= CPUFREQ_STICKY,
 	.verify		= davinci_verify_speed,
-	.target		= davinci_target,
+	.target_index	= davinci_target,
 	.get		= davinci_getspeed,
 	.init		= davinci_cpu_init,
 	.exit		= cpufreq_generic_exit,
diff --git a/drivers/cpufreq/dbx500-cpufreq.c b/drivers/cpufreq/dbx500-cpufreq.c
index a60f7693c18e..238b16976be1 100644
--- a/drivers/cpufreq/dbx500-cpufreq.c
+++ b/drivers/cpufreq/dbx500-cpufreq.c
@@ -20,23 +20,13 @@ static struct cpufreq_frequency_table *freq_table;
 static struct clk *armss_clk;
 
 static int dbx500_cpufreq_target(struct cpufreq_policy *policy,
-				unsigned int target_freq,
-				unsigned int relation)
+				unsigned int index)
 {
 	struct cpufreq_freqs freqs;
-	unsigned int idx;
 	int ret;
 
-	/* Lookup the next frequency */
-	if (cpufreq_frequency_table_target(policy, freq_table, target_freq,
-					relation, &idx))
-		return -EINVAL;
-
 	freqs.old = policy->cur;
-	freqs.new = freq_table[idx].frequency;
-
-	if (freqs.old == freqs.new)
-		return 0;
+	freqs.new = freq_table[index].frequency;
 
 	/* pre-change notification */
 	cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE);
@@ -80,7 +70,7 @@ static int dbx500_cpufreq_init(struct cpufreq_policy *policy)
 static struct cpufreq_driver dbx500_cpufreq_driver = {
 	.flags  = CPUFREQ_STICKY | CPUFREQ_CONST_LOOPS,
 	.verify = cpufreq_generic_frequency_table_verify,
-	.target = dbx500_cpufreq_target,
+	.target_index = dbx500_cpufreq_target,
 	.get    = dbx500_cpufreq_getspeed,
 	.init   = dbx500_cpufreq_init,
 	.name   = "DBX500",
diff --git a/drivers/cpufreq/e_powersaver.c b/drivers/cpufreq/e_powersaver.c
index 2c11ce3c67bd..b39c4ef60a7a 100644
--- a/drivers/cpufreq/e_powersaver.c
+++ b/drivers/cpufreq/e_powersaver.c
@@ -168,12 +168,9 @@ postchange:
 	return err;
 }
 
-static int eps_target(struct cpufreq_policy *policy,
-			       unsigned int target_freq,
-			       unsigned int relation)
+static int eps_target(struct cpufreq_policy *policy, unsigned int index)
 {
 	struct eps_cpu_data *centaur;
-	unsigned int newstate = 0;
 	unsigned int cpu = policy->cpu;
 	unsigned int dest_state;
 	int ret;
@@ -182,16 +179,8 @@ static int eps_target(struct cpufreq_policy *policy,
 		return -ENODEV;
 	centaur = eps_cpu[cpu];
 
-	if (unlikely(cpufreq_frequency_table_target(policy,
-			&eps_cpu[cpu]->freq_table[0],
-			target_freq,
-			relation,
-			&newstate))) {
-		return -EINVAL;
-	}
-
 	/* Make frequency transition */
-	dest_state = centaur->freq_table[newstate].driver_data & 0xffff;
+	dest_state = centaur->freq_table[index].driver_data & 0xffff;
 	ret = eps_set_state(centaur, policy, dest_state);
 	if (ret)
 		printk(KERN_ERR "eps: Timeout!\n");
@@ -418,7 +407,7 @@ static int eps_cpu_exit(struct cpufreq_policy *policy)
 
 static struct cpufreq_driver eps_driver = {
 	.verify		= cpufreq_generic_frequency_table_verify,
-	.target		= eps_target,
+	.target_index	= eps_target,
 	.init		= eps_cpu_init,
 	.exit		= eps_cpu_exit,
 	.get		= eps_get,
diff --git a/drivers/cpufreq/elanfreq.c b/drivers/cpufreq/elanfreq.c
index d91a645a27ae..4ab41539514f 100644
--- a/drivers/cpufreq/elanfreq.c
+++ b/drivers/cpufreq/elanfreq.c
@@ -105,20 +105,8 @@ static unsigned int elanfreq_get_cpu_frequency(unsigned int cpu)
 }
 
 
-/**
- *	elanfreq_set_cpu_frequency: Change the CPU core frequency
- *	@cpu: cpu number
- *	@freq: frequency in kHz
- *
- *	This function takes a frequency value and changes the CPU frequency
- *	according to this. Note that the frequency has to be checked by
- *	elanfreq_validatespeed() for correctness!
- *
- *	There is no return value.
- */
-
-static void elanfreq_set_cpu_state(struct cpufreq_policy *policy,
-		unsigned int state)
+static int elanfreq_target(struct cpufreq_policy *policy,
+			    unsigned int state)
 {
 	struct cpufreq_freqs    freqs;
 
@@ -162,25 +150,9 @@ static void elanfreq_set_cpu_state(struct cpufreq_policy *policy,
 	local_irq_enable();
 
 	cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE);
-};
-
-
-static int elanfreq_target(struct cpufreq_policy *policy,
-			    unsigned int target_freq,
-			    unsigned int relation)
-{
-	unsigned int newstate = 0;
-
-	if (cpufreq_frequency_table_target(policy, &elanfreq_table[0],
-				target_freq, relation, &newstate))
-		return -EINVAL;
-
-	elanfreq_set_cpu_state(policy, newstate);
 
 	return 0;
 }
-
-
 /*
  *	Module init and exit code
  */
@@ -237,7 +209,7 @@ __setup("elanfreq=", elanfreq_setup);
 static struct cpufreq_driver elanfreq_driver = {
 	.get		= elanfreq_get_cpu_frequency,
 	.verify		= cpufreq_generic_frequency_table_verify,
-	.target		= elanfreq_target,
+	.target_index	= elanfreq_target,
 	.init		= elanfreq_cpu_init,
 	.exit		= cpufreq_generic_exit,
 	.name		= "elanfreq",
diff --git a/drivers/cpufreq/exynos-cpufreq.c b/drivers/cpufreq/exynos-cpufreq.c
index 3e4af676f43d..9982fcb82257 100644
--- a/drivers/cpufreq/exynos-cpufreq.c
+++ b/drivers/cpufreq/exynos-cpufreq.c
@@ -65,9 +65,6 @@ static int exynos_cpufreq_scale(unsigned int target_freq)
 	freqs.old = policy->cur;
 	freqs.new = target_freq;
 
-	if (freqs.new == freqs.old)
-		goto out;
-
 	/*
 	 * The policy max have been changed so that we cannot get proper
 	 * old_index with cpufreq_frequency_table_target(). Thus, ignore
@@ -151,13 +148,9 @@ out:
 	return ret;
 }
 
-static int exynos_target(struct cpufreq_policy *policy,
-			  unsigned int target_freq,
-			  unsigned int relation)
+static int exynos_target(struct cpufreq_policy *policy, unsigned int index)
 {
 	struct cpufreq_frequency_table *freq_table = exynos_info->freq_table;
-	unsigned int index;
-	unsigned int new_freq;
 	int ret = 0;
 
 	mutex_lock(&cpufreq_lock);
@@ -165,15 +158,7 @@ static int exynos_target(struct cpufreq_policy *policy,
 	if (frequency_locked)
 		goto out;
 
-	if (cpufreq_frequency_table_target(policy, freq_table,
-					   target_freq, relation, &index)) {
-		ret = -EINVAL;
-		goto out;
-	}
-
-	new_freq = freq_table[index].frequency;
-
-	ret = exynos_cpufreq_scale(new_freq);
+	ret = exynos_cpufreq_scale(freq_table[index].frequency);
 
 out:
 	mutex_unlock(&cpufreq_lock);
@@ -247,7 +232,7 @@ static int exynos_cpufreq_cpu_init(struct cpufreq_policy *policy)
 static struct cpufreq_driver exynos_driver = {
 	.flags		= CPUFREQ_STICKY,
 	.verify		= cpufreq_generic_frequency_table_verify,
-	.target		= exynos_target,
+	.target_index	= exynos_target,
 	.get		= exynos_getspeed,
 	.init		= exynos_cpufreq_cpu_init,
 	.exit		= cpufreq_generic_exit,
diff --git a/drivers/cpufreq/exynos5440-cpufreq.c b/drivers/cpufreq/exynos5440-cpufreq.c
index 8ae5e2925bf1..1bf9b060d522 100644
--- a/drivers/cpufreq/exynos5440-cpufreq.c
+++ b/drivers/cpufreq/exynos5440-cpufreq.c
@@ -214,27 +214,17 @@ static unsigned int exynos_getspeed(unsigned int cpu)
 	return dvfs_info->cur_frequency;
 }
 
-static int exynos_target(struct cpufreq_policy *policy,
-			  unsigned int target_freq,
-			  unsigned int relation)
+static int exynos_target(struct cpufreq_policy *policy, unsigned int index)
 {
-	unsigned int index, tmp;
-	int ret = 0, i;
+	unsigned int tmp;
+	int i;
 	struct cpufreq_frequency_table *freq_table = dvfs_info->freq_table;
 
 	mutex_lock(&cpufreq_lock);
 
-	ret = cpufreq_frequency_table_target(policy, freq_table,
-					   target_freq, relation, &index);
-	if (ret)
-		goto out;
-
 	freqs.old = dvfs_info->cur_frequency;
 	freqs.new = freq_table[index].frequency;
 
-	if (freqs.old == freqs.new)
-		goto out;
-
 	cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE);
 
 	/* Set the target frequency in all C0_3_PSTATE register */
@@ -245,9 +235,8 @@ static int exynos_target(struct cpufreq_policy *policy,
 
 		__raw_writel(tmp, dvfs_info->base + XMU_C0_3_PSTATE + i * 4);
 	}
-out:
 	mutex_unlock(&cpufreq_lock);
-	return ret;
+	return 0;
 }
 
 static void exynos_cpufreq_work(struct work_struct *work)
@@ -325,7 +314,7 @@ static int exynos_cpufreq_cpu_init(struct cpufreq_policy *policy)
 static struct cpufreq_driver exynos_driver = {
 	.flags		= CPUFREQ_STICKY,
 	.verify		= cpufreq_generic_frequency_table_verify,
-	.target		= exynos_target,
+	.target_index	= exynos_target,
 	.get		= exynos_getspeed,
 	.init		= exynos_cpufreq_cpu_init,
 	.exit		= cpufreq_generic_exit,
diff --git a/drivers/cpufreq/ia64-acpi-cpufreq.c b/drivers/cpufreq/ia64-acpi-cpufreq.c
index 90c6598415fd..4695fa22406a 100644
--- a/drivers/cpufreq/ia64-acpi-cpufreq.c
+++ b/drivers/cpufreq/ia64-acpi-cpufreq.c
@@ -227,26 +227,11 @@ acpi_cpufreq_get (
 static int
 acpi_cpufreq_target (
 	struct cpufreq_policy   *policy,
-	unsigned int target_freq,
-	unsigned int relation)
+	unsigned int index)
 {
-	struct cpufreq_acpi_io *data = acpi_io_data[policy->cpu];
-	unsigned int next_state = 0;
-	unsigned int result = 0;
-
-	pr_debug("acpi_cpufreq_setpolicy\n");
-
-	result = cpufreq_frequency_table_target(policy,
-			data->freq_table, target_freq, relation, &next_state);
-	if (result)
-		return (result);
-
-	result = processor_set_freq(data, policy, next_state);
-
-	return (result);
+	return processor_set_freq(acpi_io_data[policy->cpu], policy, index);
 }
 
-
 static int
 acpi_cpufreq_cpu_init (
 	struct cpufreq_policy   *policy)
@@ -379,7 +364,7 @@ acpi_cpufreq_cpu_exit (
 
 static struct cpufreq_driver acpi_cpufreq_driver = {
 	.verify 	= cpufreq_generic_frequency_table_verify,
-	.target 	= acpi_cpufreq_target,
+	.target_index	= acpi_cpufreq_target,
 	.get 		= acpi_cpufreq_get,
 	.init		= acpi_cpufreq_cpu_init,
 	.exit		= acpi_cpufreq_cpu_exit,
diff --git a/drivers/cpufreq/imx6q-cpufreq.c b/drivers/cpufreq/imx6q-cpufreq.c
index be23892282e3..07af3b0de069 100644
--- a/drivers/cpufreq/imx6q-cpufreq.c
+++ b/drivers/cpufreq/imx6q-cpufreq.c
@@ -40,30 +40,17 @@ static unsigned int imx6q_get_speed(unsigned int cpu)
 	return clk_get_rate(arm_clk) / 1000;
 }
 
-static int imx6q_set_target(struct cpufreq_policy *policy,
-			    unsigned int target_freq, unsigned int relation)
+static int imx6q_set_target(struct cpufreq_policy *policy, unsigned int index)
 {
 	struct cpufreq_freqs freqs;
 	struct dev_pm_opp *opp;
 	unsigned long freq_hz, volt, volt_old;
-	unsigned int index;
 	int ret;
 
-	ret = cpufreq_frequency_table_target(policy, freq_table, target_freq,
-					     relation, &index);
-	if (ret) {
-		dev_err(cpu_dev, "failed to match target frequency %d: %d\n",
-			target_freq, ret);
-		return ret;
-	}
-
 	freqs.new = freq_table[index].frequency;
 	freq_hz = freqs.new * 1000;
 	freqs.old = clk_get_rate(arm_clk) / 1000;
 
-	if (freqs.old == freqs.new)
-		return 0;
-
 	rcu_read_lock();
 	opp = dev_pm_opp_find_freq_ceil(cpu_dev, &freq_hz);
 	if (IS_ERR(opp)) {
@@ -159,7 +146,7 @@ static int imx6q_cpufreq_init(struct cpufreq_policy *policy)
 
 static struct cpufreq_driver imx6q_cpufreq_driver = {
 	.verify = cpufreq_generic_frequency_table_verify,
-	.target = imx6q_set_target,
+	.target_index = imx6q_set_target,
 	.get = imx6q_get_speed,
 	.init = imx6q_cpufreq_init,
 	.exit = cpufreq_generic_exit,
diff --git a/drivers/cpufreq/kirkwood-cpufreq.c b/drivers/cpufreq/kirkwood-cpufreq.c
index fff8653c8e9b..0ae4dd7e1f2d 100644
--- a/drivers/cpufreq/kirkwood-cpufreq.c
+++ b/drivers/cpufreq/kirkwood-cpufreq.c
@@ -55,8 +55,8 @@ static unsigned int kirkwood_cpufreq_get_cpu_frequency(unsigned int cpu)
 	return kirkwood_freq_table[0].frequency;
 }
 
-static void kirkwood_cpufreq_set_cpu_state(struct cpufreq_policy *policy,
-		unsigned int index)
+static int kirkwood_cpufreq_target(struct cpufreq_policy *policy,
+			    unsigned int index)
 {
 	struct cpufreq_freqs freqs;
 	unsigned int state = kirkwood_freq_table[index].driver_data;
@@ -100,19 +100,6 @@ static void kirkwood_cpufreq_set_cpu_state(struct cpufreq_policy *policy,
 		local_irq_enable();
 	}
 	cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE);
-};
-
-static int kirkwood_cpufreq_target(struct cpufreq_policy *policy,
-			    unsigned int target_freq,
-			    unsigned int relation)
-{
-	unsigned int index = 0;
-
-	if (cpufreq_frequency_table_target(policy, kirkwood_freq_table,
-				target_freq, relation, &index))
-		return -EINVAL;
-
-	kirkwood_cpufreq_set_cpu_state(policy, index);
 
 	return 0;
 }
@@ -126,7 +113,7 @@ static int kirkwood_cpufreq_cpu_init(struct cpufreq_policy *policy)
 static struct cpufreq_driver kirkwood_cpufreq_driver = {
 	.get	= kirkwood_cpufreq_get_cpu_frequency,
 	.verify	= cpufreq_generic_frequency_table_verify,
-	.target	= kirkwood_cpufreq_target,
+	.target_index = kirkwood_cpufreq_target,
 	.init	= kirkwood_cpufreq_cpu_init,
 	.exit	= cpufreq_generic_exit,
 	.name	= "kirkwood-cpufreq",
diff --git a/drivers/cpufreq/longhaul.c b/drivers/cpufreq/longhaul.c
index 14df4974fb45..45bafddfd8ea 100644
--- a/drivers/cpufreq/longhaul.c
+++ b/drivers/cpufreq/longhaul.c
@@ -626,21 +626,12 @@ static void longhaul_setup_voltagescaling(void)
 
 
 static int longhaul_target(struct cpufreq_policy *policy,
-			    unsigned int target_freq, unsigned int relation)
+			    unsigned int table_index)
 {
-	unsigned int table_index = 0;
 	unsigned int i;
 	unsigned int dir = 0;
 	u8 vid, current_vid;
 
-	if (cpufreq_frequency_table_target(policy, longhaul_table, target_freq,
-				relation, &table_index))
-		return -EINVAL;
-
-	/* Don't set same frequency again */
-	if (longhaul_index == table_index)
-		return 0;
-
 	if (!can_scale_voltage)
 		longhaul_setstate(policy, table_index);
 	else {
@@ -919,7 +910,7 @@ static int longhaul_cpu_init(struct cpufreq_policy *policy)
 
 static struct cpufreq_driver longhaul_driver = {
 	.verify	= cpufreq_generic_frequency_table_verify,
-	.target	= longhaul_target,
+	.target_index = longhaul_target,
 	.get	= longhaul_get,
 	.init	= longhaul_cpu_init,
 	.exit	= cpufreq_generic_exit,
diff --git a/drivers/cpufreq/loongson2_cpufreq.c b/drivers/cpufreq/loongson2_cpufreq.c
index 2c8ec8e06449..41a8e2cdf940 100644
--- a/drivers/cpufreq/loongson2_cpufreq.c
+++ b/drivers/cpufreq/loongson2_cpufreq.c
@@ -53,11 +53,9 @@ static unsigned int loongson2_cpufreq_get(unsigned int cpu)
  * Here we notify other drivers of the proposed change and the final change.
  */
 static int loongson2_cpufreq_target(struct cpufreq_policy *policy,
-				     unsigned int target_freq,
-				     unsigned int relation)
+				     unsigned int index)
 {
 	unsigned int cpu = policy->cpu;
-	unsigned int newstate = 0;
 	cpumask_t cpus_allowed;
 	struct cpufreq_freqs freqs;
 	unsigned int freq;
@@ -65,26 +63,17 @@ static int loongson2_cpufreq_target(struct cpufreq_policy *policy,
 	cpus_allowed = current->cpus_allowed;
 	set_cpus_allowed_ptr(current, cpumask_of(cpu));
 
-	if (cpufreq_frequency_table_target
-	    (policy, &loongson2_clockmod_table[0], target_freq, relation,
-	     &newstate))
-		return -EINVAL;
-
 	freq =
 	    ((cpu_clock_freq / 1000) *
-	     loongson2_clockmod_table[newstate].driver_data) / 8;
-	if (freq < policy->min || freq > policy->max)
-		return -EINVAL;
+	     loongson2_clockmod_table[index].driver_data) / 8;
 
-	pr_debug("cpufreq: requested frequency %u Hz\n", target_freq * 1000);
+	pr_debug("cpufreq: requested frequency %u Hz\n",
+			loongson2_clockmod_table[index].frequency * 1000);
 
 	freqs.old = loongson2_cpufreq_get(cpu);
 	freqs.new = freq;
 	freqs.flags = 0;
 
-	if (freqs.new == freqs.old)
-		return 0;
-
 	/* notifiers */
 	cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE);
 
@@ -145,7 +134,7 @@ static struct cpufreq_driver loongson2_cpufreq_driver = {
 	.name = "loongson2",
 	.init = loongson2_cpufreq_cpu_init,
 	.verify = cpufreq_generic_frequency_table_verify,
-	.target = loongson2_cpufreq_target,
+	.target_index = loongson2_cpufreq_target,
 	.get = loongson2_cpufreq_get,
 	.exit = loongson2_cpufreq_exit,
 	.attr = cpufreq_generic_attr,
diff --git a/drivers/cpufreq/maple-cpufreq.c b/drivers/cpufreq/maple-cpufreq.c
index eb1e1766baed..4e2da0874bfb 100644
--- a/drivers/cpufreq/maple-cpufreq.c
+++ b/drivers/cpufreq/maple-cpufreq.c
@@ -131,26 +131,18 @@ static int maple_scom_query_freq(void)
  */
 
 static int maple_cpufreq_target(struct cpufreq_policy *policy,
-	unsigned int target_freq, unsigned int relation)
+	unsigned int index)
 {
-	unsigned int newstate = 0;
 	struct cpufreq_freqs freqs;
 	int rc;
 
-	if (cpufreq_frequency_table_target(policy, maple_cpu_freqs,
-			target_freq, relation, &newstate))
-		return -EINVAL;
-
-	if (maple_pmode_cur == newstate)
-		return 0;
-
 	mutex_lock(&maple_switch_mutex);
 
 	freqs.old = maple_cpu_freqs[maple_pmode_cur].frequency;
-	freqs.new = maple_cpu_freqs[newstate].frequency;
+	freqs.new = maple_cpu_freqs[index].frequency;
 
 	cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE);
-	rc = maple_scom_switch_freq(newstate);
+	rc = maple_scom_switch_freq(index);
 	cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE);
 
 	mutex_unlock(&maple_switch_mutex);
@@ -173,7 +165,7 @@ static struct cpufreq_driver maple_cpufreq_driver = {
 	.flags		= CPUFREQ_CONST_LOOPS,
 	.init		= maple_cpufreq_cpu_init,
 	.verify		= cpufreq_generic_frequency_table_verify,
-	.target		= maple_cpufreq_target,
+	.target_index	= maple_cpufreq_target,
 	.get		= maple_cpufreq_get_speed,
 	.attr		= cpufreq_generic_attr,
 };
diff --git a/drivers/cpufreq/omap-cpufreq.c b/drivers/cpufreq/omap-cpufreq.c
index ac552d090463..b5512712298f 100644
--- a/drivers/cpufreq/omap-cpufreq.c
+++ b/drivers/cpufreq/omap-cpufreq.c
@@ -51,40 +51,15 @@ static unsigned int omap_getspeed(unsigned int cpu)
 	return rate;
 }
 
-static int omap_target(struct cpufreq_policy *policy,
-		       unsigned int target_freq,
-		       unsigned int relation)
+static int omap_target(struct cpufreq_policy *policy, unsigned int index)
 {
-	unsigned int i;
 	int r, ret = 0;
 	struct cpufreq_freqs freqs;
 	struct dev_pm_opp *opp;
 	unsigned long freq, volt = 0, volt_old = 0, tol = 0;
 
-	if (!freq_table) {
-		dev_err(mpu_dev, "%s: cpu%d: no freq table!\n", __func__,
-				policy->cpu);
-		return -EINVAL;
-	}
-
-	ret = cpufreq_frequency_table_target(policy, freq_table, target_freq,
-			relation, &i);
-	if (ret) {
-		dev_dbg(mpu_dev, "%s: cpu%d: no freq match for %d(ret=%d)\n",
-			__func__, policy->cpu, target_freq, ret);
-		return ret;
-	}
-	freqs.new = freq_table[i].frequency;
-	if (!freqs.new) {
-		dev_err(mpu_dev, "%s: cpu%d: no match for freq %d\n", __func__,
-			policy->cpu, target_freq);
-		return -EINVAL;
-	}
-
 	freqs.old = omap_getspeed(policy->cpu);
-
-	if (freqs.old == freqs.new && policy->cur == freqs.new)
-		return ret;
+	freqs.new = freq_table[index].frequency;
 
 	freq = freqs.new * 1000;
 	ret = clk_round_rate(mpu_clk, freq);
@@ -200,7 +175,7 @@ static int omap_cpu_exit(struct cpufreq_policy *policy)
 static struct cpufreq_driver omap_driver = {
 	.flags		= CPUFREQ_STICKY,
 	.verify		= cpufreq_generic_frequency_table_verify,
-	.target		= omap_target,
+	.target_index	= omap_target,
 	.get		= omap_getspeed,
 	.init		= omap_cpu_init,
 	.exit		= omap_cpu_exit,
diff --git a/drivers/cpufreq/p4-clockmod.c b/drivers/cpufreq/p4-clockmod.c
index 6164c1cca504..3c23053afdfd 100644
--- a/drivers/cpufreq/p4-clockmod.c
+++ b/drivers/cpufreq/p4-clockmod.c
@@ -105,23 +105,13 @@ static struct cpufreq_frequency_table p4clockmod_table[] = {
 };
 
 
-static int cpufreq_p4_target(struct cpufreq_policy *policy,
-			     unsigned int target_freq,
-			     unsigned int relation)
+static int cpufreq_p4_target(struct cpufreq_policy *policy, unsigned int index)
 {
-	unsigned int    newstate = DC_RESV;
 	struct cpufreq_freqs freqs;
 	int i;
 
-	if (cpufreq_frequency_table_target(policy, &p4clockmod_table[0],
-				target_freq, relation, &newstate))
-		return -EINVAL;
-
 	freqs.old = cpufreq_p4_get(policy->cpu);
-	freqs.new = stock_freq * p4clockmod_table[newstate].driver_data / 8;
-
-	if (freqs.new == freqs.old)
-		return 0;
+	freqs.new = stock_freq * p4clockmod_table[index].driver_data / 8;
 
 	/* notifiers */
 	cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE);
@@ -131,7 +121,7 @@ static int cpufreq_p4_target(struct cpufreq_policy *policy,
 	 * Developer's Manual, Volume 3
 	 */
 	for_each_cpu(i, policy->cpus)
-		cpufreq_p4_setdc(i, p4clockmod_table[newstate].driver_data);
+		cpufreq_p4_setdc(i, p4clockmod_table[index].driver_data);
 
 	/* notifiers */
 	cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE);
@@ -255,7 +245,7 @@ static unsigned int cpufreq_p4_get(unsigned int cpu)
 
 static struct cpufreq_driver p4clockmod_driver = {
 	.verify		= cpufreq_generic_frequency_table_verify,
-	.target		= cpufreq_p4_target,
+	.target_index	= cpufreq_p4_target,
 	.init		= cpufreq_p4_cpu_init,
 	.exit		= cpufreq_generic_exit,
 	.get		= cpufreq_p4_get,
diff --git a/drivers/cpufreq/pasemi-cpufreq.c b/drivers/cpufreq/pasemi-cpufreq.c
index 1cca332728c3..17424ddc7f67 100644
--- a/drivers/cpufreq/pasemi-cpufreq.c
+++ b/drivers/cpufreq/pasemi-cpufreq.c
@@ -240,19 +240,11 @@ static int pas_cpufreq_cpu_exit(struct cpufreq_policy *policy)
 }
 
 static int pas_cpufreq_target(struct cpufreq_policy *policy,
-			      unsigned int target_freq,
-			      unsigned int relation)
+			      unsigned int pas_astate_new)
 {
 	struct cpufreq_freqs freqs;
-	int pas_astate_new;
 	int i;
 
-	cpufreq_frequency_table_target(policy,
-				       pas_freqs,
-				       target_freq,
-				       relation,
-				       &pas_astate_new);
-
 	freqs.old = policy->cur;
 	freqs.new = pas_freqs[pas_astate_new].frequency;
 
@@ -282,7 +274,7 @@ static struct cpufreq_driver pas_cpufreq_driver = {
 	.init		= pas_cpufreq_cpu_init,
 	.exit		= pas_cpufreq_cpu_exit,
 	.verify		= cpufreq_generic_frequency_table_verify,
-	.target		= pas_cpufreq_target,
+	.target_index	= pas_cpufreq_target,
 	.attr		= cpufreq_generic_attr,
 };
 
diff --git a/drivers/cpufreq/pmac32-cpufreq.c b/drivers/cpufreq/pmac32-cpufreq.c
index 6eac1e230078..05f705e1b7a2 100644
--- a/drivers/cpufreq/pmac32-cpufreq.c
+++ b/drivers/cpufreq/pmac32-cpufreq.c
@@ -374,17 +374,11 @@ static unsigned int pmac_cpufreq_get_speed(unsigned int cpu)
 }
 
 static int pmac_cpufreq_target(	struct cpufreq_policy *policy,
-					unsigned int target_freq,
-					unsigned int relation)
+					unsigned int index)
 {
-	unsigned int    newstate = 0;
 	int		rc;
 
-	if (cpufreq_frequency_table_target(policy, pmac_cpu_freqs,
-			target_freq, relation, &newstate))
-		return -EINVAL;
-
-	rc = do_set_cpu_speed(policy, newstate, 1);
+	rc = do_set_cpu_speed(policy, index, 1);
 
 	ppc_proc_freq = cur_freq * 1000ul;
 	return rc;
@@ -453,7 +447,7 @@ static int pmac_cpufreq_resume(struct cpufreq_policy *policy)
 
 static struct cpufreq_driver pmac_cpufreq_driver = {
 	.verify 	= cpufreq_generic_frequency_table_verify,
-	.target 	= pmac_cpufreq_target,
+	.target_index	= pmac_cpufreq_target,
 	.get		= pmac_cpufreq_get_speed,
 	.init		= pmac_cpufreq_cpu_init,
 	.suspend	= pmac_cpufreq_suspend,
diff --git a/drivers/cpufreq/pmac64-cpufreq.c b/drivers/cpufreq/pmac64-cpufreq.c
index 5261b92d768b..234b598ce416 100644
--- a/drivers/cpufreq/pmac64-cpufreq.c
+++ b/drivers/cpufreq/pmac64-cpufreq.c
@@ -312,27 +312,18 @@ static int g5_pfunc_query_freq(void)
  * Common interface to the cpufreq core
  */
 
-static int g5_cpufreq_target(struct cpufreq_policy *policy,
-	unsigned int target_freq, unsigned int relation)
+static int g5_cpufreq_target(struct cpufreq_policy *policy, unsigned int index)
 {
-	unsigned int newstate = 0;
 	struct cpufreq_freqs freqs;
 	int rc;
 
-	if (cpufreq_frequency_table_target(policy, g5_cpu_freqs,
-			target_freq, relation, &newstate))
-		return -EINVAL;
-
-	if (g5_pmode_cur == newstate)
-		return 0;
-
 	mutex_lock(&g5_switch_mutex);
 
 	freqs.old = g5_cpu_freqs[g5_pmode_cur].frequency;
-	freqs.new = g5_cpu_freqs[newstate].frequency;
+	freqs.new = g5_cpu_freqs[index].frequency;
 
 	cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE);
-	rc = g5_switch_freq(newstate);
+	rc = g5_switch_freq(index);
 	cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE);
 
 	mutex_unlock(&g5_switch_mutex);
@@ -355,7 +346,7 @@ static struct cpufreq_driver g5_cpufreq_driver = {
 	.flags		= CPUFREQ_CONST_LOOPS,
 	.init		= g5_cpufreq_cpu_init,
 	.verify		= cpufreq_generic_frequency_table_verify,
-	.target		= g5_cpufreq_target,
+	.target_index	= g5_cpufreq_target,
 	.get		= g5_cpufreq_get_speed,
 	.attr 		= cpufreq_generic_attr,
 };
diff --git a/drivers/cpufreq/powernow-k6.c b/drivers/cpufreq/powernow-k6.c
index eda17024a34a..643e7952cad3 100644
--- a/drivers/cpufreq/powernow-k6.c
+++ b/drivers/cpufreq/powernow-k6.c
@@ -63,12 +63,12 @@ static int powernow_k6_get_cpu_multiplier(void)
 
 
 /**
- * powernow_k6_set_state - set the PowerNow! multiplier
+ * powernow_k6_target - set the PowerNow! multiplier
  * @best_i: clock_ratio[best_i] is the target multiplier
  *
  *   Tries to change the PowerNow! multiplier
  */
-static void powernow_k6_set_state(struct cpufreq_policy *policy,
+static int powernow_k6_target(struct cpufreq_policy *policy,
 		unsigned int best_i)
 {
 	unsigned long outvalue = 0, invalue = 0;
@@ -77,7 +77,7 @@ static void powernow_k6_set_state(struct cpufreq_policy *policy,
 
 	if (clock_ratio[best_i].driver_data > max_multiplier) {
 		printk(KERN_ERR PFX "invalid target frequency\n");
-		return;
+		return -EINVAL;
 	}
 
 	freqs.old = busfreq * powernow_k6_get_cpu_multiplier();
@@ -100,31 +100,6 @@ static void powernow_k6_set_state(struct cpufreq_policy *policy,
 
 	cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE);
 
-	return;
-}
-
-
-/**
- * powernow_k6_setpolicy - sets a new CPUFreq policy
- * @policy: new policy
- * @target_freq: the target frequency
- * @relation: how that frequency relates to achieved frequency
- *  (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H)
- *
- * sets a new CPUFreq policy
- */
-static int powernow_k6_target(struct cpufreq_policy *policy,
-			       unsigned int target_freq,
-			       unsigned int relation)
-{
-	unsigned int newstate = 0;
-
-	if (cpufreq_frequency_table_target(policy, &clock_ratio[0],
-				target_freq, relation, &newstate))
-		return -EINVAL;
-
-	powernow_k6_set_state(policy, newstate);
-
 	return 0;
 }
 
@@ -161,7 +136,7 @@ static int powernow_k6_cpu_exit(struct cpufreq_policy *policy)
 	unsigned int i;
 	for (i = 0; i < 8; i++) {
 		if (i == max_multiplier)
-			powernow_k6_set_state(policy, i);
+			powernow_k6_target(policy, i);
 	}
 	cpufreq_frequency_table_put_attr(policy->cpu);
 	return 0;
@@ -176,7 +151,7 @@ static unsigned int powernow_k6_get(unsigned int cpu)
 
 static struct cpufreq_driver powernow_k6_driver = {
 	.verify		= cpufreq_generic_frequency_table_verify,
-	.target		= powernow_k6_target,
+	.target_index	= powernow_k6_target,
 	.init		= powernow_k6_cpu_init,
 	.exit		= powernow_k6_cpu_exit,
 	.get		= powernow_k6_get,
diff --git a/drivers/cpufreq/powernow-k7.c b/drivers/cpufreq/powernow-k7.c
index 44d345bad6fb..946708a1d745 100644
--- a/drivers/cpufreq/powernow-k7.c
+++ b/drivers/cpufreq/powernow-k7.c
@@ -248,7 +248,7 @@ static void change_VID(int vid)
 }
 
 
-static void change_speed(struct cpufreq_policy *policy, unsigned int index)
+static int powernow_target(struct cpufreq_policy *policy, unsigned int index)
 {
 	u8 fid, vid;
 	struct cpufreq_freqs freqs;
@@ -291,6 +291,8 @@ static void change_speed(struct cpufreq_policy *policy, unsigned int index)
 		local_irq_enable();
 
 	cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE);
+
+	return 0;
 }
 
 
@@ -533,22 +535,6 @@ static int powernow_decode_bios(int maxfid, int startvid)
 }
 
 
-static int powernow_target(struct cpufreq_policy *policy,
-			    unsigned int target_freq,
-			    unsigned int relation)
-{
-	unsigned int newstate;
-
-	if (cpufreq_frequency_table_target(policy, powernow_table, target_freq,
-				relation, &newstate))
-		return -EINVAL;
-
-	change_speed(policy, newstate);
-
-	return 0;
-}
-
-
 /*
  * We use the fact that the bus frequency is somehow
  * a multiple of 100000/3 khz, then we compute sgtc according
@@ -694,7 +680,7 @@ static int powernow_cpu_exit(struct cpufreq_policy *policy)
 
 static struct cpufreq_driver powernow_driver = {
 	.verify		= cpufreq_generic_frequency_table_verify,
-	.target		= powernow_target,
+	.target_index	= powernow_target,
 	.get		= powernow_get,
 #ifdef CONFIG_X86_POWERNOW_K7_ACPI
 	.bios_limit	= acpi_processor_get_bios_limit,
diff --git a/drivers/cpufreq/powernow-k8.c b/drivers/cpufreq/powernow-k8.c
index 298beb742ebb..62a1ce47d3df 100644
--- a/drivers/cpufreq/powernow-k8.c
+++ b/drivers/cpufreq/powernow-k8.c
@@ -977,20 +977,17 @@ static int transition_frequency_fidvid(struct powernow_k8_data *data,
 
 struct powernowk8_target_arg {
 	struct cpufreq_policy		*pol;
-	unsigned			targfreq;
-	unsigned			relation;
+	unsigned			newstate;
 };
 
 static long powernowk8_target_fn(void *arg)
 {
 	struct powernowk8_target_arg *pta = arg;
 	struct cpufreq_policy *pol = pta->pol;
-	unsigned targfreq = pta->targfreq;
-	unsigned relation = pta->relation;
+	unsigned newstate = pta->newstate;
 	struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu);
 	u32 checkfid;
 	u32 checkvid;
-	unsigned int newstate;
 	int ret;
 
 	if (!data)
@@ -1004,8 +1001,9 @@ static long powernowk8_target_fn(void *arg)
 		return -EIO;
 	}
 
-	pr_debug("targ: cpu %d, %d kHz, min %d, max %d, relation %d\n",
-		pol->cpu, targfreq, pol->min, pol->max, relation);
+	pr_debug("targ: cpu %d, %d kHz, min %d, max %d\n",
+		pol->cpu, data->powernow_table[newstate].frequency, pol->min,
+		pol->max);
 
 	if (query_current_values_with_pending_wait(data))
 		return -EIO;
@@ -1021,10 +1019,6 @@ static long powernowk8_target_fn(void *arg)
 		       checkvid, data->currvid);
 	}
 
-	if (cpufreq_frequency_table_target(pol, data->powernow_table,
-				targfreq, relation, &newstate))
-		return -EIO;
-
 	mutex_lock(&fidvid_mutex);
 
 	powernow_k8_acpi_pst_values(data, newstate);
@@ -1044,11 +1038,9 @@ static long powernowk8_target_fn(void *arg)
 }
 
 /* Driver entry point to switch to the target frequency */
-static int powernowk8_target(struct cpufreq_policy *pol,
-		unsigned targfreq, unsigned relation)
+static int powernowk8_target(struct cpufreq_policy *pol, unsigned index)
 {
-	struct powernowk8_target_arg pta = { .pol = pol, .targfreq = targfreq,
-					     .relation = relation };
+	struct powernowk8_target_arg pta = { .pol = pol, .newstate = index };
 
 	return work_on_cpu(pol->cpu, powernowk8_target_fn, &pta);
 }
@@ -1213,7 +1205,7 @@ out:
 
 static struct cpufreq_driver cpufreq_amd64_driver = {
 	.verify		= cpufreq_generic_frequency_table_verify,
-	.target		= powernowk8_target,
+	.target_index	= powernowk8_target,
 	.bios_limit	= acpi_processor_get_bios_limit,
 	.init		= powernowk8_cpu_init,
 	.exit		= powernowk8_cpu_exit,
diff --git a/drivers/cpufreq/ppc-corenet-cpufreq.c b/drivers/cpufreq/ppc-corenet-cpufreq.c
index a0f562ca292d..79d8e9c46b6d 100644
--- a/drivers/cpufreq/ppc-corenet-cpufreq.c
+++ b/drivers/cpufreq/ppc-corenet-cpufreq.c
@@ -251,27 +251,20 @@ static int __exit corenet_cpufreq_cpu_exit(struct cpufreq_policy *policy)
 }
 
 static int corenet_cpufreq_target(struct cpufreq_policy *policy,
-		unsigned int target_freq, unsigned int relation)
+		unsigned int index)
 {
 	struct cpufreq_freqs freqs;
-	unsigned int new;
 	struct clk *parent;
 	int ret;
 	struct cpu_data *data = per_cpu(cpu_data, policy->cpu);
 
-	cpufreq_frequency_table_target(policy, data->table,
-			target_freq, relation, &new);
-
-	if (policy->cur == data->table[new].frequency)
-		return 0;
-
 	freqs.old = policy->cur;
-	freqs.new = data->table[new].frequency;
+	freqs.new = data->table[index].frequency;
 
 	mutex_lock(&cpufreq_lock);
 	cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE);
 
-	parent = of_clk_get(data->parent, data->table[new].driver_data);
+	parent = of_clk_get(data->parent, data->table[index].driver_data);
 	ret = clk_set_parent(data->clk, parent);
 	if (ret)
 		freqs.new = freqs.old;
@@ -288,7 +281,7 @@ static struct cpufreq_driver ppc_corenet_cpufreq_driver = {
 	.init		= corenet_cpufreq_cpu_init,
 	.exit		= __exit_p(corenet_cpufreq_cpu_exit),
 	.verify		= cpufreq_generic_frequency_table_verify,
-	.target		= corenet_cpufreq_target,
+	.target_index	= corenet_cpufreq_target,
 	.get		= corenet_cpufreq_get_speed,
 	.attr		= cpufreq_generic_attr,
 };
diff --git a/drivers/cpufreq/ppc_cbe_cpufreq.c b/drivers/cpufreq/ppc_cbe_cpufreq.c
index 38540d1f5939..52f707d5f458 100644
--- a/drivers/cpufreq/ppc_cbe_cpufreq.c
+++ b/drivers/cpufreq/ppc_cbe_cpufreq.c
@@ -129,18 +129,10 @@ static int cbe_cpufreq_cpu_init(struct cpufreq_policy *policy)
 }
 
 static int cbe_cpufreq_target(struct cpufreq_policy *policy,
-			      unsigned int target_freq,
-			      unsigned int relation)
+			      unsigned int cbe_pmode_new)
 {
 	int rc;
 	struct cpufreq_freqs freqs;
-	unsigned int cbe_pmode_new;
-
-	cpufreq_frequency_table_target(policy,
-				       cbe_freqs,
-				       target_freq,
-				       relation,
-				       &cbe_pmode_new);
 
 	freqs.old = policy->cur;
 	freqs.new = cbe_freqs[cbe_pmode_new].frequency;
@@ -164,7 +156,7 @@ static int cbe_cpufreq_target(struct cpufreq_policy *policy,
 
 static struct cpufreq_driver cbe_cpufreq_driver = {
 	.verify		= cpufreq_generic_frequency_table_verify,
-	.target		= cbe_cpufreq_target,
+	.target_index	= cbe_cpufreq_target,
 	.init		= cbe_cpufreq_cpu_init,
 	.exit		= cpufreq_generic_exit,
 	.name		= "cbe-cpufreq",
diff --git a/drivers/cpufreq/pxa2xx-cpufreq.c b/drivers/cpufreq/pxa2xx-cpufreq.c
index 29aca574317b..183bc13f13e5 100644
--- a/drivers/cpufreq/pxa2xx-cpufreq.c
+++ b/drivers/cpufreq/pxa2xx-cpufreq.c
@@ -267,14 +267,11 @@ static unsigned int pxa_cpufreq_get(unsigned int cpu)
 	return get_clk_frequency_khz(0);
 }
 
-static int pxa_set_target(struct cpufreq_policy *policy,
-			  unsigned int target_freq,
-			  unsigned int relation)
+static int pxa_set_target(struct cpufreq_policy *policy, unsigned int idx)
 {
 	struct cpufreq_frequency_table *pxa_freqs_table;
 	pxa_freqs_t *pxa_freq_settings;
 	struct cpufreq_freqs freqs;
-	unsigned int idx;
 	unsigned long flags;
 	unsigned int new_freq_cpu, new_freq_mem;
 	unsigned int unused, preset_mdrefr, postset_mdrefr, cclkcfg;
@@ -283,12 +280,6 @@ static int pxa_set_target(struct cpufreq_policy *policy,
 	/* Get the current policy */
 	find_freq_tables(&pxa_freqs_table, &pxa_freq_settings);
 
-	/* Lookup the next frequency */
-	if (cpufreq_frequency_table_target(policy, pxa_freqs_table,
-					   target_freq, relation, &idx)) {
-		return -EINVAL;
-	}
-
 	new_freq_cpu = pxa_freq_settings[idx].khz;
 	new_freq_mem = pxa_freq_settings[idx].membus;
 	freqs.old = policy->cur;
@@ -448,7 +439,7 @@ static int pxa_cpufreq_init(struct cpufreq_policy *policy)
 
 static struct cpufreq_driver pxa_cpufreq_driver = {
 	.verify	= cpufreq_generic_frequency_table_verify,
-	.target	= pxa_set_target,
+	.target_index = pxa_set_target,
 	.init	= pxa_cpufreq_init,
 	.exit	= cpufreq_generic_exit,
 	.get	= pxa_cpufreq_get,
diff --git a/drivers/cpufreq/pxa3xx-cpufreq.c b/drivers/cpufreq/pxa3xx-cpufreq.c
index 47fbee49d6e5..132e37d578c2 100644
--- a/drivers/cpufreq/pxa3xx-cpufreq.c
+++ b/drivers/cpufreq/pxa3xx-cpufreq.c
@@ -155,24 +155,16 @@ static unsigned int pxa3xx_cpufreq_get(unsigned int cpu)
 	return pxa3xx_get_clk_frequency_khz(0);
 }
 
-static int pxa3xx_cpufreq_set(struct cpufreq_policy *policy,
-			      unsigned int target_freq,
-			      unsigned int relation)
+static int pxa3xx_cpufreq_set(struct cpufreq_policy *policy, unsigned int index)
 {
 	struct pxa3xx_freq_info *next;
 	struct cpufreq_freqs freqs;
 	unsigned long flags;
-	int idx;
 
 	if (policy->cpu != 0)
 		return -EINVAL;
 
-	/* Lookup the next frequency */
-	if (cpufreq_frequency_table_target(policy, pxa3xx_freqs_table,
-				target_freq, relation, &idx))
-		return -EINVAL;
-
-	next = &pxa3xx_freqs[idx];
+	next = &pxa3xx_freqs[index];
 
 	freqs.old = policy->cur;
 	freqs.new = next->cpufreq_mhz * 1000;
@@ -181,9 +173,6 @@ static int pxa3xx_cpufreq_set(struct cpufreq_policy *policy,
 			freqs.old / 1000, freqs.new / 1000,
 			(freqs.old == freqs.new) ? " (skipped)" : "");
 
-	if (freqs.old == target_freq)
-		return 0;
-
 	cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE);
 
 	local_irq_save(flags);
@@ -225,7 +214,7 @@ static int pxa3xx_cpufreq_init(struct cpufreq_policy *policy)
 
 static struct cpufreq_driver pxa3xx_cpufreq_driver = {
 	.verify		= cpufreq_generic_frequency_table_verify,
-	.target		= pxa3xx_cpufreq_set,
+	.target_index	= pxa3xx_cpufreq_set,
 	.init		= pxa3xx_cpufreq_init,
 	.exit		= cpufreq_generic_exit,
 	.get		= pxa3xx_cpufreq_get,
diff --git a/drivers/cpufreq/s3c2416-cpufreq.c b/drivers/cpufreq/s3c2416-cpufreq.c
index 26a35d137157..4188accd34ab 100644
--- a/drivers/cpufreq/s3c2416-cpufreq.c
+++ b/drivers/cpufreq/s3c2416-cpufreq.c
@@ -217,24 +217,15 @@ static int s3c2416_cpufreq_leave_dvs(struct s3c2416_data *s3c_freq, int idx)
 }
 
 static int s3c2416_cpufreq_set_target(struct cpufreq_policy *policy,
-				      unsigned int target_freq,
-				      unsigned int relation)
+				      unsigned int index)
 {
 	struct s3c2416_data *s3c_freq = &s3c2416_cpufreq;
 	struct cpufreq_freqs freqs;
 	int idx, ret, to_dvs = 0;
-	unsigned int i;
 
 	mutex_lock(&cpufreq_lock);
 
-	pr_debug("cpufreq: to %dKHz, relation %d\n", target_freq, relation);
-
-	ret = cpufreq_frequency_table_target(policy, s3c_freq->freq_table,
-					     target_freq, relation, &i);
-	if (ret != 0)
-		goto out;
-
-	idx = s3c_freq->freq_table[i].driver_data;
+	idx = s3c_freq->freq_table[index].driver_data;
 
 	if (idx == SOURCE_HCLK)
 		to_dvs = 1;
@@ -256,7 +247,7 @@ static int s3c2416_cpufreq_set_target(struct cpufreq_policy *policy,
 	 */
 	freqs.new = (s3c_freq->is_dvs && !to_dvs)
 				? clk_get_rate(s3c_freq->hclk) / 1000
-				: s3c_freq->freq_table[i].frequency;
+				: s3c_freq->freq_table[index].frequency;
 
 	pr_debug("cpufreq: Transition %d-%dkHz\n", freqs.old, freqs.new);
 
@@ -505,7 +496,7 @@ err_hclk:
 static struct cpufreq_driver s3c2416_cpufreq_driver = {
 	.flags          = 0,
 	.verify		= cpufreq_generic_frequency_table_verify,
-	.target		= s3c2416_cpufreq_set_target,
+	.target_index	= s3c2416_cpufreq_set_target,
 	.get		= s3c2416_cpufreq_get_speed,
 	.init		= s3c2416_cpufreq_driver_init,
 	.name		= "s3c2416",
diff --git a/drivers/cpufreq/s3c64xx-cpufreq.c b/drivers/cpufreq/s3c64xx-cpufreq.c
index 461617332033..8bdcf32a4418 100644
--- a/drivers/cpufreq/s3c64xx-cpufreq.c
+++ b/drivers/cpufreq/s3c64xx-cpufreq.c
@@ -63,26 +63,16 @@ static unsigned int s3c64xx_cpufreq_get_speed(unsigned int cpu)
 }
 
 static int s3c64xx_cpufreq_set_target(struct cpufreq_policy *policy,
-				      unsigned int target_freq,
-				      unsigned int relation)
+				      unsigned int index)
 {
 	int ret;
-	unsigned int i;
 	struct cpufreq_freqs freqs;
 	struct s3c64xx_dvfs *dvfs;
 
-	ret = cpufreq_frequency_table_target(policy, s3c64xx_freq_table,
-					     target_freq, relation, &i);
-	if (ret != 0)
-		return ret;
-
 	freqs.old = clk_get_rate(armclk) / 1000;
-	freqs.new = s3c64xx_freq_table[i].frequency;
+	freqs.new = s3c64xx_freq_table[index].frequency;
 	freqs.flags = 0;
-	dvfs = &s3c64xx_dvfs_table[s3c64xx_freq_table[i].driver_data];
-
-	if (freqs.old == freqs.new)
-		return 0;
+	dvfs = &s3c64xx_dvfs_table[s3c64xx_freq_table[index].driver_data];
 
 	pr_debug("Transition %d-%dkHz\n", freqs.old, freqs.new);
 
@@ -254,7 +244,7 @@ static int s3c64xx_cpufreq_driver_init(struct cpufreq_policy *policy)
 static struct cpufreq_driver s3c64xx_cpufreq_driver = {
 	.flags          = 0,
 	.verify		= cpufreq_generic_frequency_table_verify,
-	.target		= s3c64xx_cpufreq_set_target,
+	.target_index	= s3c64xx_cpufreq_set_target,
 	.get		= s3c64xx_cpufreq_get_speed,
 	.init		= s3c64xx_cpufreq_driver_init,
 	.name		= "s3c",
diff --git a/drivers/cpufreq/s5pv210-cpufreq.c b/drivers/cpufreq/s5pv210-cpufreq.c
index 600b4f472e28..5978b94e0340 100644
--- a/drivers/cpufreq/s5pv210-cpufreq.c
+++ b/drivers/cpufreq/s5pv210-cpufreq.c
@@ -36,16 +36,7 @@ static DEFINE_MUTEX(set_freq_lock);
 /* Use 800MHz when entering sleep mode */
 #define SLEEP_FREQ	(800 * 1000)
 
-/*
- * relation has an additional symantics other than the standard of cpufreq
- * DISALBE_FURTHER_CPUFREQ: disable further access to target
- * ENABLE_FURTUER_CPUFREQ: enable access to target
- */
-enum cpufreq_access {
-	DISABLE_FURTHER_CPUFREQ = 0x10,
-	ENABLE_FURTHER_CPUFREQ = 0x20,
-};
-
+/* Tracks if cpu freqency can be updated anymore */
 static bool no_cpufreq_access;
 
 /*
@@ -182,12 +173,10 @@ static unsigned int s5pv210_getspeed(unsigned int cpu)
 	return clk_get_rate(cpu_clk) / 1000;
 }
 
-static int s5pv210_target(struct cpufreq_policy *policy,
-			  unsigned int target_freq,
-			  unsigned int relation)
+static int s5pv210_target(struct cpufreq_policy *policy, unsigned int index)
 {
 	unsigned long reg;
-	unsigned int index, priv_index;
+	unsigned int priv_index;
 	unsigned int pll_changing = 0;
 	unsigned int bus_speed_changing = 0;
 	int arm_volt, int_volt;
@@ -195,9 +184,6 @@ static int s5pv210_target(struct cpufreq_policy *policy,
 
 	mutex_lock(&set_freq_lock);
 
-	if (relation & ENABLE_FURTHER_CPUFREQ)
-		no_cpufreq_access = false;
-
 	if (no_cpufreq_access) {
 #ifdef CONFIG_PM_VERBOSE
 		pr_err("%s:%d denied access to %s as it is disabled"
@@ -207,27 +193,13 @@ static int s5pv210_target(struct cpufreq_policy *policy,
 		goto exit;
 	}
 
-	if (relation & DISABLE_FURTHER_CPUFREQ)
-		no_cpufreq_access = true;
-
-	relation &= ~(ENABLE_FURTHER_CPUFREQ | DISABLE_FURTHER_CPUFREQ);
-
 	freqs.old = s5pv210_getspeed(0);
-
-	if (cpufreq_frequency_table_target(policy, s5pv210_freq_table,
-					   target_freq, relation, &index)) {
-		ret = -EINVAL;
-		goto exit;
-	}
-
 	freqs.new = s5pv210_freq_table[index].frequency;
 
-	if (freqs.new == freqs.old)
-		goto exit;
-
 	/* Finding current running level index */
 	if (cpufreq_frequency_table_target(policy, s5pv210_freq_table,
-					   freqs.old, relation, &priv_index)) {
+					   freqs.old, CPUFREQ_RELATION_H,
+					   &priv_index)) {
 		ret = -EINVAL;
 		goto exit;
 	}
@@ -559,16 +531,18 @@ static int s5pv210_cpufreq_notifier_event(struct notifier_block *this,
 
 	switch (event) {
 	case PM_SUSPEND_PREPARE:
-		ret = cpufreq_driver_target(cpufreq_cpu_get(0), SLEEP_FREQ,
-					    DISABLE_FURTHER_CPUFREQ);
+		ret = cpufreq_driver_target(cpufreq_cpu_get(0), SLEEP_FREQ, 0);
 		if (ret < 0)
 			return NOTIFY_BAD;
 
+		/* Disable updation of cpu frequency */
+		no_cpufreq_access = true;
 		return NOTIFY_OK;
 	case PM_POST_RESTORE:
 	case PM_POST_SUSPEND:
-		cpufreq_driver_target(cpufreq_cpu_get(0), SLEEP_FREQ,
-				      ENABLE_FURTHER_CPUFREQ);
+		/* Enable updation of cpu frequency */
+		no_cpufreq_access = false;
+		cpufreq_driver_target(cpufreq_cpu_get(0), SLEEP_FREQ, 0);
 
 		return NOTIFY_OK;
 	}
@@ -581,18 +555,18 @@ static int s5pv210_cpufreq_reboot_notifier_event(struct notifier_block *this,
 {
 	int ret;
 
-	ret = cpufreq_driver_target(cpufreq_cpu_get(0), SLEEP_FREQ,
-				    DISABLE_FURTHER_CPUFREQ);
+	ret = cpufreq_driver_target(cpufreq_cpu_get(0), SLEEP_FREQ, 0);
 	if (ret < 0)
 		return NOTIFY_BAD;
 
+	no_cpufreq_access = true;
 	return NOTIFY_DONE;
 }
 
 static struct cpufreq_driver s5pv210_driver = {
 	.flags		= CPUFREQ_STICKY,
 	.verify		= cpufreq_generic_frequency_table_verify,
-	.target		= s5pv210_target,
+	.target_index	= s5pv210_target,
 	.get		= s5pv210_getspeed,
 	.init		= s5pv210_cpu_init,
 	.name		= "s5pv210",
diff --git a/drivers/cpufreq/sa1100-cpufreq.c b/drivers/cpufreq/sa1100-cpufreq.c
index b282cea47e62..b0da1fe40b1d 100644
--- a/drivers/cpufreq/sa1100-cpufreq.c
+++ b/drivers/cpufreq/sa1100-cpufreq.c
@@ -177,36 +177,20 @@ static void sa1100_update_dram_timings(int current_speed, int new_speed)
 	}
 }
 
-static int sa1100_target(struct cpufreq_policy *policy,
-			 unsigned int target_freq,
-			 unsigned int relation)
+static int sa1100_target(struct cpufreq_policy *policy, unsigned int ppcr)
 {
 	unsigned int cur = sa11x0_getspeed(0);
-	unsigned int new_ppcr;
 	struct cpufreq_freqs freqs;
 
-	new_ppcr = sa11x0_freq_to_ppcr(target_freq);
-	switch (relation) {
-	case CPUFREQ_RELATION_L:
-		if (sa11x0_ppcr_to_freq(new_ppcr) > policy->max)
-			new_ppcr--;
-		break;
-	case CPUFREQ_RELATION_H:
-		if ((sa11x0_ppcr_to_freq(new_ppcr) > target_freq) &&
-		    (sa11x0_ppcr_to_freq(new_ppcr - 1) >= policy->min))
-			new_ppcr--;
-		break;
-	}
-
 	freqs.old = cur;
-	freqs.new = sa11x0_ppcr_to_freq(new_ppcr);
+	freqs.new = sa11x0_freq_table[ppcr].frequency;
 
 	cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE);
 
 	if (freqs.new > cur)
 		sa1100_update_dram_timings(cur, freqs.new);
 
-	PPCR = new_ppcr;
+	PPCR = ppcr;
 
 	if (freqs.new < cur)
 		sa1100_update_dram_timings(cur, freqs.new);
@@ -224,7 +208,7 @@ static int __init sa1100_cpu_init(struct cpufreq_policy *policy)
 static struct cpufreq_driver sa1100_driver __refdata = {
 	.flags		= CPUFREQ_STICKY,
 	.verify		= cpufreq_generic_frequency_table_verify,
-	.target		= sa1100_target,
+	.target_index	= sa1100_target,
 	.get		= sa11x0_getspeed,
 	.init		= sa1100_cpu_init,
 	.name		= "sa1100",
diff --git a/drivers/cpufreq/sa1110-cpufreq.c b/drivers/cpufreq/sa1110-cpufreq.c
index bca04c0b4a73..55b1818c3e49 100644
--- a/drivers/cpufreq/sa1110-cpufreq.c
+++ b/drivers/cpufreq/sa1110-cpufreq.c
@@ -229,34 +229,16 @@ sdram_update_refresh(u_int cpu_khz, struct sdram_params *sdram)
 /*
  * Ok, set the CPU frequency.
  */
-static int sa1110_target(struct cpufreq_policy *policy,
-			 unsigned int target_freq,
-			 unsigned int relation)
+static int sa1110_target(struct cpufreq_policy *policy, unsigned int ppcr)
 {
 	struct sdram_params *sdram = &sdram_params;
 	struct cpufreq_freqs freqs;
 	struct sdram_info sd;
 	unsigned long flags;
-	unsigned int ppcr, unused;
-
-	switch (relation) {
-	case CPUFREQ_RELATION_L:
-		ppcr = sa11x0_freq_to_ppcr(target_freq);
-		if (sa11x0_ppcr_to_freq(ppcr) > policy->max)
-			ppcr--;
-		break;
-	case CPUFREQ_RELATION_H:
-		ppcr = sa11x0_freq_to_ppcr(target_freq);
-		if (ppcr && (sa11x0_ppcr_to_freq(ppcr) > target_freq) &&
-		    (sa11x0_ppcr_to_freq(ppcr-1) >= policy->min))
-			ppcr--;
-		break;
-	default:
-		return -EINVAL;
-	}
+	unsigned int unused;
 
 	freqs.old = sa11x0_getspeed(0);
-	freqs.new = sa11x0_ppcr_to_freq(ppcr);
+	freqs.new = sa11x0_freq_table[ppcr].frequency;
 
 	sdram_calculate_timing(&sd, freqs.new, sdram);
 
@@ -340,7 +322,7 @@ static int __init sa1110_cpu_init(struct cpufreq_policy *policy)
 static struct cpufreq_driver sa1110_driver __refdata = {
 	.flags		= CPUFREQ_STICKY,
 	.verify		= cpufreq_generic_frequency_table_verify,
-	.target		= sa1110_target,
+	.target_index	= sa1110_target,
 	.get		= sa11x0_getspeed,
 	.init		= sa1110_cpu_init,
 	.name		= "sa1110",
diff --git a/drivers/cpufreq/sc520_freq.c b/drivers/cpufreq/sc520_freq.c
index 9047ab1ca014..6c86452e1737 100644
--- a/drivers/cpufreq/sc520_freq.c
+++ b/drivers/cpufreq/sc520_freq.c
@@ -53,8 +53,7 @@ static unsigned int sc520_freq_get_cpu_frequency(unsigned int cpu)
 	}
 }
 
-static void sc520_freq_set_cpu_state(struct cpufreq_policy *policy,
-		unsigned int state)
+static int sc520_freq_target(struct cpufreq_policy *policy, unsigned int state)
 {
 
 	struct cpufreq_freqs	freqs;
@@ -76,24 +75,10 @@ static void sc520_freq_set_cpu_state(struct cpufreq_policy *policy,
 	local_irq_enable();
 
 	cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE);
-};
-
-static int sc520_freq_target(struct cpufreq_policy *policy,
-			    unsigned int target_freq,
-			    unsigned int relation)
-{
-	unsigned int newstate = 0;
-
-	if (cpufreq_frequency_table_target(policy, sc520_freq_table,
-				target_freq, relation, &newstate))
-		return -EINVAL;
-
-	sc520_freq_set_cpu_state(policy, newstate);
 
 	return 0;
 }
 
-
 /*
  *	Module init and exit code
  */
@@ -117,7 +102,7 @@ static int sc520_freq_cpu_init(struct cpufreq_policy *policy)
 static struct cpufreq_driver sc520_freq_driver = {
 	.get	= sc520_freq_get_cpu_frequency,
 	.verify	= cpufreq_generic_frequency_table_verify,
-	.target	= sc520_freq_target,
+	.target_index = sc520_freq_target,
 	.init	= sc520_freq_cpu_init,
 	.exit	= cpufreq_generic_exit,
 	.name	= "sc520_freq",
diff --git a/drivers/cpufreq/sparc-us2e-cpufreq.c b/drivers/cpufreq/sparc-us2e-cpufreq.c
index 291688c1da9a..3bf5b8f03661 100644
--- a/drivers/cpufreq/sparc-us2e-cpufreq.c
+++ b/drivers/cpufreq/sparc-us2e-cpufreq.c
@@ -245,8 +245,7 @@ static unsigned int us2e_freq_get(unsigned int cpu)
 	return clock_tick / estar_to_divisor(estar);
 }
 
-static void us2e_set_cpu_divider_index(struct cpufreq_policy *policy,
-		unsigned int index)
+static int us2e_freq_target(struct cpufreq_policy *policy, unsigned int index)
 {
 	unsigned int cpu = policy->cpu;
 	unsigned long new_bits, new_freq;
@@ -277,20 +276,6 @@ static void us2e_set_cpu_divider_index(struct cpufreq_policy *policy,
 	cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE);
 
 	set_cpus_allowed_ptr(current, &cpus_allowed);
-}
-
-static int us2e_freq_target(struct cpufreq_policy *policy,
-			  unsigned int target_freq,
-			  unsigned int relation)
-{
-	unsigned int new_index = 0;
-
-	if (cpufreq_frequency_table_target(policy,
-					   &us2e_freq_table[policy->cpu].table[0],
-					   target_freq, relation, &new_index))
-		return -EINVAL;
-
-	us2e_set_cpu_divider_index(policy, new_index);
 
 	return 0;
 }
@@ -325,7 +310,7 @@ static int us2e_freq_cpu_exit(struct cpufreq_policy *policy)
 {
 	if (cpufreq_us2e_driver) {
 		cpufreq_frequency_table_put_attr(policy->cpu);
-		us2e_set_cpu_divider_index(policy, 0);
+		us2e_freq_target(policy, 0);
 	}
 
 	return 0;
@@ -358,7 +343,7 @@ static int __init us2e_freq_init(void)
 
 		driver->init = us2e_freq_cpu_init;
 		driver->verify = cpufreq_generic_frequency_table_verify;
-		driver->target = us2e_freq_target;
+		driver->target_index = us2e_freq_target;
 		driver->get = us2e_freq_get;
 		driver->exit = us2e_freq_cpu_exit;
 		strcpy(driver->name, "UltraSPARC-IIe");
diff --git a/drivers/cpufreq/sparc-us3-cpufreq.c b/drivers/cpufreq/sparc-us3-cpufreq.c
index 9b3dbd31362e..2e54d55915df 100644
--- a/drivers/cpufreq/sparc-us3-cpufreq.c
+++ b/drivers/cpufreq/sparc-us3-cpufreq.c
@@ -93,8 +93,7 @@ static unsigned int us3_freq_get(unsigned int cpu)
 	return ret;
 }
 
-static void us3_set_cpu_divider_index(struct cpufreq_policy *policy,
-		unsigned int index)
+static int us3_freq_target(struct cpufreq_policy *policy, unsigned int index)
 {
 	unsigned int cpu = policy->cpu;
 	unsigned long new_bits, new_freq, reg;
@@ -136,22 +135,6 @@ static void us3_set_cpu_divider_index(struct cpufreq_policy *policy,
 	cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE);
 
 	set_cpus_allowed_ptr(current, &cpus_allowed);
-}
-
-static int us3_freq_target(struct cpufreq_policy *policy,
-			  unsigned int target_freq,
-			  unsigned int relation)
-{
-	unsigned int new_index = 0;
-
-	if (cpufreq_frequency_table_target(policy,
-					   &us3_freq_table[policy->cpu].table[0],
-					   target_freq,
-					   relation,
-					   &new_index))
-		return -EINVAL;
-
-	us3_set_cpu_divider_index(policy, new_index);
 
 	return 0;
 }
@@ -182,7 +165,7 @@ static int us3_freq_cpu_exit(struct cpufreq_policy *policy)
 {
 	if (cpufreq_us3_driver) {
 		cpufreq_frequency_table_put_attr(policy->cpu);
-		us3_set_cpu_divider_index(policy, 0);
+		us3_freq_target(policy, 0);
 	}
 
 	return 0;
@@ -219,7 +202,7 @@ static int __init us3_freq_init(void)
 
 		driver->init = us3_freq_cpu_init;
 		driver->verify = cpufreq_generic_frequency_table_verify;
-		driver->target = us3_freq_target;
+		driver->target_index = us3_freq_target;
 		driver->get = us3_freq_get;
 		driver->exit = us3_freq_cpu_exit;
 		strcpy(driver->name, "UltraSPARC-III");
diff --git a/drivers/cpufreq/spear-cpufreq.c b/drivers/cpufreq/spear-cpufreq.c
index 8841366a2068..11a65be3fd76 100644
--- a/drivers/cpufreq/spear-cpufreq.c
+++ b/drivers/cpufreq/spear-cpufreq.c
@@ -105,20 +105,16 @@ static int spear1340_set_cpu_rate(struct clk *sys_pclk, unsigned long newfreq)
 }
 
 static int spear_cpufreq_target(struct cpufreq_policy *policy,
-		unsigned int target_freq, unsigned int relation)
+		unsigned int index)
 {
 	struct cpufreq_freqs freqs;
 	long newfreq;
 	struct clk *srcclk;
-	int index, ret, mult = 1;
-
-	if (cpufreq_frequency_table_target(policy, spear_cpufreq.freq_tbl,
-				target_freq, relation, &index))
-		return -EINVAL;
+	int ret, mult = 1;
 
 	freqs.old = spear_cpufreq_get(0);
-
 	newfreq = spear_cpufreq.freq_tbl[index].frequency * 1000;
+
 	if (of_machine_is_compatible("st,spear1340")) {
 		/*
 		 * SPEAr1340 is special in the sense that due to the possibility
@@ -179,7 +175,7 @@ static struct cpufreq_driver spear_cpufreq_driver = {
 	.name		= "cpufreq-spear",
 	.flags		= CPUFREQ_STICKY,
 	.verify		= cpufreq_generic_frequency_table_verify,
-	.target		= spear_cpufreq_target,
+	.target_index	= spear_cpufreq_target,
 	.get		= spear_cpufreq_get,
 	.init		= spear_cpufreq_init,
 	.exit		= cpufreq_generic_exit,
diff --git a/drivers/cpufreq/speedstep-centrino.c b/drivers/cpufreq/speedstep-centrino.c
index 25e45f89acac..c51ec8c0e3a8 100644
--- a/drivers/cpufreq/speedstep-centrino.c
+++ b/drivers/cpufreq/speedstep-centrino.c
@@ -416,21 +416,17 @@ static int centrino_cpu_exit(struct cpufreq_policy *policy)
 /**
  * centrino_setpolicy - set a new CPUFreq policy
  * @policy: new policy
- * @target_freq: the target frequency
- * @relation: how that frequency relates to achieved frequency
- *	(CPUFREQ_RELATION_L or CPUFREQ_RELATION_H)
+ * @index: index of target frequency
  *
  * Sets a new CPUFreq policy.
  */
-static int centrino_target (struct cpufreq_policy *policy,
-			    unsigned int target_freq,
-			    unsigned int relation)
+static int centrino_target(struct cpufreq_policy *policy, unsigned int index)
 {
-	unsigned int    newstate = 0;
 	unsigned int	msr, oldmsr = 0, h = 0, cpu = policy->cpu;
 	struct cpufreq_freqs	freqs;
 	int			retval = 0;
 	unsigned int		j, first_cpu, tmp;
+	struct cpufreq_frequency_table *op_points;
 	cpumask_var_t covered_cpus;
 
 	if (unlikely(!zalloc_cpumask_var(&covered_cpus, GFP_KERNEL)))
@@ -441,16 +437,8 @@ static int centrino_target (struct cpufreq_policy *policy,
 		goto out;
 	}
 
-	if (unlikely(cpufreq_frequency_table_target(policy,
-			per_cpu(centrino_model, cpu)->op_points,
-			target_freq,
-			relation,
-			&newstate))) {
-		retval = -EINVAL;
-		goto out;
-	}
-
 	first_cpu = 1;
+	op_points = &per_cpu(centrino_model, cpu)->op_points[index];
 	for_each_cpu(j, policy->cpus) {
 		int good_cpu;
 
@@ -474,7 +462,7 @@ static int centrino_target (struct cpufreq_policy *policy,
 			break;
 		}
 
-		msr = per_cpu(centrino_model, cpu)->op_points[newstate].driver_data;
+		msr = op_points->driver_data;
 
 		if (first_cpu) {
 			rdmsr_on_cpu(good_cpu, MSR_IA32_PERF_CTL, &oldmsr, &h);
@@ -489,7 +477,8 @@ static int centrino_target (struct cpufreq_policy *policy,
 			freqs.new = extract_clock(msr, cpu, 0);
 
 			pr_debug("target=%dkHz old=%d new=%d msr=%04x\n",
-				target_freq, freqs.old, freqs.new, msr);
+				op_points->frequency, freqs.old, freqs.new,
+				msr);
 
 			cpufreq_notify_transition(policy, &freqs,
 					CPUFREQ_PRECHANGE);
@@ -540,7 +529,7 @@ static struct cpufreq_driver centrino_driver = {
 	.init		= centrino_cpu_init,
 	.exit		= centrino_cpu_exit,
 	.verify		= cpufreq_generic_frequency_table_verify,
-	.target		= centrino_target,
+	.target_index	= centrino_target,
 	.get		= get_cur_freq,
 	.attr		= cpufreq_generic_attr,
 };
diff --git a/drivers/cpufreq/speedstep-ich.c b/drivers/cpufreq/speedstep-ich.c
index 1a8b01bd0fec..707721ebb853 100644
--- a/drivers/cpufreq/speedstep-ich.c
+++ b/drivers/cpufreq/speedstep-ich.c
@@ -251,36 +251,24 @@ static unsigned int speedstep_get(unsigned int cpu)
 /**
  * speedstep_target - set a new CPUFreq policy
  * @policy: new policy
- * @target_freq: the target frequency
- * @relation: how that frequency relates to achieved frequency
- *	(CPUFREQ_RELATION_L or CPUFREQ_RELATION_H)
+ * @index: index of target frequency
  *
  * Sets a new CPUFreq policy.
  */
-static int speedstep_target(struct cpufreq_policy *policy,
-			     unsigned int target_freq,
-			     unsigned int relation)
+static int speedstep_target(struct cpufreq_policy *policy, unsigned int index)
 {
-	unsigned int newstate = 0, policy_cpu;
+	unsigned int policy_cpu;
 	struct cpufreq_freqs freqs;
 
-	if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0],
-				target_freq, relation, &newstate))
-		return -EINVAL;
-
 	policy_cpu = cpumask_any_and(policy->cpus, cpu_online_mask);
 	freqs.old = speedstep_get(policy_cpu);
-	freqs.new = speedstep_freqs[newstate].frequency;
+	freqs.new = speedstep_freqs[index].frequency;
 
 	pr_debug("transiting from %u to %u kHz\n", freqs.old, freqs.new);
 
-	/* no transition necessary */
-	if (freqs.old == freqs.new)
-		return 0;
-
 	cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE);
 
-	smp_call_function_single(policy_cpu, _speedstep_set_state, &newstate,
+	smp_call_function_single(policy_cpu, _speedstep_set_state, &index,
 				 true);
 
 	cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE);
@@ -330,7 +318,7 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy)
 static struct cpufreq_driver speedstep_driver = {
 	.name	= "speedstep-ich",
 	.verify	= cpufreq_generic_frequency_table_verify,
-	.target	= speedstep_target,
+	.target_index = speedstep_target,
 	.init	= speedstep_cpu_init,
 	.exit	= cpufreq_generic_exit,
 	.get	= speedstep_get,
diff --git a/drivers/cpufreq/speedstep-smi.c b/drivers/cpufreq/speedstep-smi.c
index a02b649c9647..19446e479ccc 100644
--- a/drivers/cpufreq/speedstep-smi.c
+++ b/drivers/cpufreq/speedstep-smi.c
@@ -235,29 +235,19 @@ static void speedstep_set_state(unsigned int state)
 /**
  * speedstep_target - set a new CPUFreq policy
  * @policy: new policy
- * @target_freq: new freq
- * @relation:
+ * @index: index of new freq
  *
  * Sets a new CPUFreq policy/freq.
  */
-static int speedstep_target(struct cpufreq_policy *policy,
-			unsigned int target_freq, unsigned int relation)
+static int speedstep_target(struct cpufreq_policy *policy, unsigned int index)
 {
-	unsigned int newstate = 0;
 	struct cpufreq_freqs freqs;
 
-	if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0],
-				target_freq, relation, &newstate))
-		return -EINVAL;
-
 	freqs.old = speedstep_freqs[speedstep_get_state()].frequency;
-	freqs.new = speedstep_freqs[newstate].frequency;
-
-	if (freqs.old == freqs.new)
-		return 0;
+	freqs.new = speedstep_freqs[index].frequency;
 
 	cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE);
-	speedstep_set_state(newstate);
+	speedstep_set_state(index);
 	cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE);
 
 	return 0;
@@ -327,7 +317,7 @@ static int speedstep_resume(struct cpufreq_policy *policy)
 static struct cpufreq_driver speedstep_driver = {
 	.name		= "speedstep-smi",
 	.verify		= cpufreq_generic_frequency_table_verify,
-	.target		= speedstep_target,
+	.target_index	= speedstep_target,
 	.init		= speedstep_cpu_init,
 	.exit		= cpufreq_generic_exit,
 	.get		= speedstep_get,
diff --git a/drivers/cpufreq/tegra-cpufreq.c b/drivers/cpufreq/tegra-cpufreq.c
index 32483ef63d53..bd7d89c013a5 100644
--- a/drivers/cpufreq/tegra-cpufreq.c
+++ b/drivers/cpufreq/tegra-cpufreq.c
@@ -150,11 +150,8 @@ static unsigned long tegra_cpu_highest_speed(void)
 	return rate;
 }
 
-static int tegra_target(struct cpufreq_policy *policy,
-		       unsigned int target_freq,
-		       unsigned int relation)
+static int tegra_target(struct cpufreq_policy *policy, unsigned int index)
 {
-	unsigned int idx;
 	unsigned int freq;
 	int ret = 0;
 
@@ -165,10 +162,7 @@ static int tegra_target(struct cpufreq_policy *policy,
 		goto out;
 	}
 
-	cpufreq_frequency_table_target(policy, freq_table, target_freq,
-		relation, &idx);
-
-	freq = freq_table[idx].frequency;
+	freq = freq_table[index].frequency;
 
 	target_cpu_speed[policy->cpu] = freq;
 
@@ -238,7 +232,7 @@ static int tegra_cpu_exit(struct cpufreq_policy *policy)
 
 static struct cpufreq_driver tegra_cpufreq_driver = {
 	.verify		= cpufreq_generic_frequency_table_verify,
-	.target		= tegra_target,
+	.target_index	= tegra_target,
 	.get		= tegra_getspeed,
 	.init		= tegra_cpu_init,
 	.exit		= tegra_cpu_exit,
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 0aba2a6cadaf..e8c77d330479 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -187,9 +187,11 @@ struct cpufreq_driver {
 
 	/* define one out of two */
 	int	(*setpolicy)	(struct cpufreq_policy *policy);
-	int	(*target)	(struct cpufreq_policy *policy,
+	int	(*target)	(struct cpufreq_policy *policy,	/* Deprecated */
 				 unsigned int target_freq,
 				 unsigned int relation);
+	int	(*target_index)	(struct cpufreq_policy *policy,
+				 unsigned int index);
 
 	/* should be defined, if possible */
 	unsigned int	(*get)	(unsigned int cpu);
-- 
cgit v1.2.3


From ad7722dab7292dbc1c4586d701ac226b68122d39 Mon Sep 17 00:00:00 2001
From: viresh kumar <viresh.kumar@linaro.org>
Date: Fri, 18 Oct 2013 19:10:15 +0530
Subject: cpufreq: create per policy rwsem instead of per CPU cpu_policy_rwsem

We have per-CPU cpu_policy_rwsem for cpufreq core, but we never use
all of them. We always use rwsem of policy->cpu and so we can
actually make this rwsem per policy instead.

This patch does this change. With this change other tricky situations
are also avoided now, like which lock to take while we are changing
policy->cpu, etc.

Suggested-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Tested-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq.c | 110 +++++++++++++---------------------------------
 include/linux/cpufreq.h   |  14 ++++++
 2 files changed, 45 insertions(+), 79 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 87ed83a6c894..6c9cbb9ebd1f 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -52,47 +52,6 @@ static inline bool has_target(void)
 	return cpufreq_driver->target_index || cpufreq_driver->target;
 }
 
-/*
- * cpu_policy_rwsem is a per CPU reader-writer semaphore designed to cure
- * all cpufreq/hotplug/workqueue/etc related lock issues.
- *
- * The rules for this semaphore:
- * - Any routine that wants to read from the policy structure will
- *   do a down_read on this semaphore.
- * - Any routine that will write to the policy structure and/or may take away
- *   the policy altogether (eg. CPU hotplug), will hold this lock in write
- *   mode before doing so.
- *
- * Additional rules:
- * - Governor routines that can be called in cpufreq hotplug path should not
- *   take this sem as top level hotplug notifier handler takes this.
- * - Lock should not be held across
- *     __cpufreq_governor(data, CPUFREQ_GOV_STOP);
- */
-static DEFINE_PER_CPU(struct rw_semaphore, cpu_policy_rwsem);
-
-#define lock_policy_rwsem(mode, cpu)					\
-static void lock_policy_rwsem_##mode(int cpu)				\
-{									\
-	struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu);	\
-	BUG_ON(!policy);						\
-	down_##mode(&per_cpu(cpu_policy_rwsem, policy->cpu));		\
-}
-
-lock_policy_rwsem(read, cpu);
-lock_policy_rwsem(write, cpu);
-
-#define unlock_policy_rwsem(mode, cpu)					\
-static void unlock_policy_rwsem_##mode(int cpu)				\
-{									\
-	struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu);	\
-	BUG_ON(!policy);						\
-	up_##mode(&per_cpu(cpu_policy_rwsem, policy->cpu));		\
-}
-
-unlock_policy_rwsem(read, cpu);
-unlock_policy_rwsem(write, cpu);
-
 /*
  * rwsem to guarantee that cpufreq driver module doesn't unload during critical
  * sections
@@ -688,14 +647,14 @@ static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
 	if (!down_read_trylock(&cpufreq_rwsem))
 		return -EINVAL;
 
-	lock_policy_rwsem_read(policy->cpu);
+	down_read(&policy->rwsem);
 
 	if (fattr->show)
 		ret = fattr->show(policy, buf);
 	else
 		ret = -EIO;
 
-	unlock_policy_rwsem_read(policy->cpu);
+	up_read(&policy->rwsem);
 	up_read(&cpufreq_rwsem);
 
 	return ret;
@@ -716,14 +675,14 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr,
 	if (!down_read_trylock(&cpufreq_rwsem))
 		goto unlock;
 
-	lock_policy_rwsem_write(policy->cpu);
+	down_write(&policy->rwsem);
 
 	if (fattr->store)
 		ret = fattr->store(policy, buf, count);
 	else
 		ret = -EIO;
 
-	unlock_policy_rwsem_write(policy->cpu);
+	up_write(&policy->rwsem);
 
 	up_read(&cpufreq_rwsem);
 unlock:
@@ -900,7 +859,7 @@ static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy,
 		}
 	}
 
-	lock_policy_rwsem_write(policy->cpu);
+	down_write(&policy->rwsem);
 
 	write_lock_irqsave(&cpufreq_driver_lock, flags);
 
@@ -908,7 +867,7 @@ static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy,
 	per_cpu(cpufreq_cpu_data, cpu) = policy;
 	write_unlock_irqrestore(&cpufreq_driver_lock, flags);
 
-	unlock_policy_rwsem_write(policy->cpu);
+	up_write(&policy->rwsem);
 
 	if (has_target()) {
 		if ((ret = __cpufreq_governor(policy, CPUFREQ_GOV_START)) ||
@@ -955,6 +914,8 @@ static struct cpufreq_policy *cpufreq_policy_alloc(void)
 		goto err_free_cpumask;
 
 	INIT_LIST_HEAD(&policy->policy_list);
+	init_rwsem(&policy->rwsem);
+
 	return policy;
 
 err_free_cpumask:
@@ -977,19 +938,12 @@ static void update_policy_cpu(struct cpufreq_policy *policy, unsigned int cpu)
 	if (WARN_ON(cpu == policy->cpu))
 		return;
 
-	/*
-	 * Take direct locks as lock_policy_rwsem_write wouldn't work here.
-	 * Also lock for last cpu is enough here as contention will happen only
-	 * after policy->cpu is changed and after it is changed, other threads
-	 * will try to acquire lock for new cpu. And policy is already updated
-	 * by then.
-	 */
-	down_write(&per_cpu(cpu_policy_rwsem, policy->cpu));
+	down_write(&policy->rwsem);
 
 	policy->last_cpu = policy->cpu;
 	policy->cpu = cpu;
 
-	up_write(&per_cpu(cpu_policy_rwsem, policy->last_cpu));
+	up_write(&policy->rwsem);
 
 	cpufreq_frequency_table_update_policy_cpu(policy);
 	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
@@ -1181,9 +1135,9 @@ static int cpufreq_nominate_new_policy_cpu(struct cpufreq_policy *policy,
 	if (ret) {
 		pr_err("%s: Failed to move kobj: %d", __func__, ret);
 
-		lock_policy_rwsem_write(old_cpu);
+		down_write(&policy->rwsem);
 		cpumask_set_cpu(old_cpu, policy->cpus);
-		unlock_policy_rwsem_write(old_cpu);
+		up_write(&policy->rwsem);
 
 		ret = sysfs_create_link(&cpu_dev->kobj, &policy->kobj,
 					"cpufreq");
@@ -1234,9 +1188,9 @@ static int __cpufreq_remove_dev_prepare(struct device *dev,
 			policy->governor->name, CPUFREQ_NAME_LEN);
 #endif
 
-	lock_policy_rwsem_read(cpu);
+	down_read(&policy->rwsem);
 	cpus = cpumask_weight(policy->cpus);
-	unlock_policy_rwsem_read(cpu);
+	up_read(&policy->rwsem);
 
 	if (cpu != policy->cpu) {
 		if (!frozen)
@@ -1276,12 +1230,12 @@ static int __cpufreq_remove_dev_finish(struct device *dev,
 		return -EINVAL;
 	}
 
-	lock_policy_rwsem_write(cpu);
+	down_write(&policy->rwsem);
 	cpus = cpumask_weight(policy->cpus);
 
 	if (cpus > 1)
 		cpumask_clear_cpu(cpu, policy->cpus);
-	unlock_policy_rwsem_write(cpu);
+	up_write(&policy->rwsem);
 
 	/* If cpu is last user of policy, free policy */
 	if (cpus == 1) {
@@ -1296,10 +1250,10 @@ static int __cpufreq_remove_dev_finish(struct device *dev,
 		}
 
 		if (!frozen) {
-			lock_policy_rwsem_read(cpu);
+			down_read(&policy->rwsem);
 			kobj = &policy->kobj;
 			cmp = &policy->kobj_unregister;
-			unlock_policy_rwsem_read(cpu);
+			up_read(&policy->rwsem);
 			kobject_put(kobj);
 
 			/*
@@ -1479,19 +1433,22 @@ static unsigned int __cpufreq_get(unsigned int cpu)
  */
 unsigned int cpufreq_get(unsigned int cpu)
 {
+	struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu);
 	unsigned int ret_freq = 0;
 
 	if (cpufreq_disabled() || !cpufreq_driver)
 		return -ENOENT;
 
+	BUG_ON(!policy);
+
 	if (!down_read_trylock(&cpufreq_rwsem))
 		return 0;
 
-	lock_policy_rwsem_read(cpu);
+	down_read(&policy->rwsem);
 
 	ret_freq = __cpufreq_get(cpu);
 
-	unlock_policy_rwsem_read(cpu);
+	up_read(&policy->rwsem);
 	up_read(&cpufreq_rwsem);
 
 	return ret_freq;
@@ -1744,11 +1701,11 @@ int cpufreq_driver_target(struct cpufreq_policy *policy,
 {
 	int ret = -EINVAL;
 
-	lock_policy_rwsem_write(policy->cpu);
+	down_write(&policy->rwsem);
 
 	ret = __cpufreq_driver_target(policy, target_freq, relation);
 
-	unlock_policy_rwsem_write(policy->cpu);
+	up_write(&policy->rwsem);
 
 	return ret;
 }
@@ -1979,10 +1936,10 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy,
 			/* end old governor */
 			if (policy->governor) {
 				__cpufreq_governor(policy, CPUFREQ_GOV_STOP);
-				unlock_policy_rwsem_write(new_policy->cpu);
+				up_write(&policy->rwsem);
 				__cpufreq_governor(policy,
 						CPUFREQ_GOV_POLICY_EXIT);
-				lock_policy_rwsem_write(new_policy->cpu);
+				down_write(&policy->rwsem);
 			}
 
 			/* start new governor */
@@ -1991,10 +1948,10 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy,
 				if (!__cpufreq_governor(policy, CPUFREQ_GOV_START)) {
 					failed = 0;
 				} else {
-					unlock_policy_rwsem_write(new_policy->cpu);
+					up_write(&policy->rwsem);
 					__cpufreq_governor(policy,
 							CPUFREQ_GOV_POLICY_EXIT);
-					lock_policy_rwsem_write(new_policy->cpu);
+					down_write(&policy->rwsem);
 				}
 			}
 
@@ -2040,7 +1997,7 @@ int cpufreq_update_policy(unsigned int cpu)
 		goto no_policy;
 	}
 
-	lock_policy_rwsem_write(cpu);
+	down_write(&policy->rwsem);
 
 	pr_debug("updating policy for CPU %u\n", cpu);
 	memcpy(&new_policy, policy, sizeof(*policy));
@@ -2067,7 +2024,7 @@ int cpufreq_update_policy(unsigned int cpu)
 
 	ret = cpufreq_set_policy(policy, &new_policy);
 
-	unlock_policy_rwsem_write(cpu);
+	up_write(&policy->rwsem);
 
 	cpufreq_cpu_put(policy);
 no_policy:
@@ -2225,14 +2182,9 @@ EXPORT_SYMBOL_GPL(cpufreq_unregister_driver);
 
 static int __init cpufreq_core_init(void)
 {
-	int cpu;
-
 	if (cpufreq_disabled())
 		return -ENODEV;
 
-	for_each_possible_cpu(cpu)
-		init_rwsem(&per_cpu(cpu_policy_rwsem, cpu));
-
 	cpufreq_global_kobject = kobject_create();
 	BUG_ON(!cpufreq_global_kobject);
 	register_syscore_ops(&cpufreq_syscore_ops);
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index e8c77d330479..93a8c34d6c7f 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -85,6 +85,20 @@ struct cpufreq_policy {
 	struct list_head        policy_list;
 	struct kobject		kobj;
 	struct completion	kobj_unregister;
+
+	/*
+	 * The rules for this semaphore:
+	 * - Any routine that wants to read from the policy structure will
+	 *   do a down_read on this semaphore.
+	 * - Any routine that will write to the policy structure and/or may take away
+	 *   the policy altogether (eg. CPU hotplug), will hold this lock in write
+	 *   mode before doing so.
+	 *
+	 * Additional rules:
+	 * - Lock should not be held across
+	 *     __cpufreq_governor(data, CPUFREQ_GOV_POLICY_EXIT);
+	 */
+	struct rw_semaphore	rwsem;
 };
 
 /* Only for ACPI */
-- 
cgit v1.2.3


From ce77399226313a72578b5b0d67e289d3f165b8ba Mon Sep 17 00:00:00 2001
From: Darbha Sriharsha <dsriharsha@nvidia.com>
Date: Fri, 11 Oct 2013 17:15:43 -0400
Subject: power_supply: Add support for bq24735 charger

Adds support for the bq24735 charger chipset. The bq24735 is a
high-efficiency, synchronous battery charger.

It allows control of the charging current, input current, and the charger
voltage DAC's through SMBus.

Signed-off-by: Darbha Sriharsha <dsriharsha@nvidia.com>
Signed-off-by: Rhyland Klein <rklein@nvidia.com>
Thanks-to: Stephen Warren <swarren@wwwdotorg.org>
Thanks-to: Thierry Reding <thierry.reding@gmail.com>
Thanks-to: Manish Badarkhe <badarkhe.manish@gmail.com>
Signed-off-by: Anton Vorontsov <anton@enomsg.org>
---
 .../bindings/power_supply/ti,bq24735.txt           |  32 ++
 drivers/power/Kconfig                              |   6 +
 drivers/power/Makefile                             |   1 +
 drivers/power/bq24735-charger.c                    | 419 +++++++++++++++++++++
 include/linux/power/bq24735-charger.h              |  39 ++
 5 files changed, 497 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/power_supply/ti,bq24735.txt
 create mode 100644 drivers/power/bq24735-charger.c
 create mode 100644 include/linux/power/bq24735-charger.h

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/power_supply/ti,bq24735.txt b/Documentation/devicetree/bindings/power_supply/ti,bq24735.txt
new file mode 100644
index 000000000000..4f6a550184d0
--- /dev/null
+++ b/Documentation/devicetree/bindings/power_supply/ti,bq24735.txt
@@ -0,0 +1,32 @@
+TI BQ24735 Charge Controller
+~~~~~~~~~~
+
+Required properties :
+ - compatible : "ti,bq24735"
+
+Optional properties :
+ - interrupts : Specify the interrupt to be used to trigger when the AC
+   adapter is either plugged in or removed.
+ - ti,ac-detect-gpios : This GPIO is optionally used to read the AC adapter
+   presence. This is a Host GPIO that is configured as an input and
+   connected to the bq24735.
+ - ti,charge-current : Used to control and set the charging current. This value
+   must be between 128mA and 8.128A with a 64mA step resolution. The POR value
+   is 0x0000h. This number is in mA (e.g. 8192), see spec for more information
+   about the ChargeCurrent (0x14h) register.
+ - ti,charge-voltage : Used to control and set the charging voltage. This value
+   must be between 1.024V and 19.2V with a 16mV step resolution. The POR value
+   is 0x0000h. This number is in mV (e.g. 19200), see spec for more information
+   about the ChargeVoltage (0x15h) register.
+ - ti,input-current : Used to control and set the charger input current. This
+   value must be between 128mA and 8.064A with a 128mA step resolution. The
+   POR value is 0x1000h. This number is in mA (e.g. 8064), see the spec for
+   more information about the InputCurrent (0x3fh) register.
+
+Example:
+
+	bq24735@9 {
+		compatible = "ti,bq24735";
+		reg = <0x9>;
+		ti,ac-detect-gpios = <&gpio 72 0x1>;
+	}
diff --git a/drivers/power/Kconfig b/drivers/power/Kconfig
index e6f92b450913..5e2054afe840 100644
--- a/drivers/power/Kconfig
+++ b/drivers/power/Kconfig
@@ -346,6 +346,12 @@ config CHARGER_BQ24190
 	help
 	  Say Y to enable support for the TI BQ24190 battery charger.
 
+config CHARGER_BQ24735
+	tristate "TI BQ24735 battery charger support"
+	depends on I2C && GPIOLIB
+	help
+	  Say Y to enable support for the TI BQ24735 battery charger.
+
 config CHARGER_SMB347
 	tristate "Summit Microelectronics SMB347 Battery Charger"
 	depends on I2C
diff --git a/drivers/power/Makefile b/drivers/power/Makefile
index a4b74177706f..372b4e8ab598 100644
--- a/drivers/power/Makefile
+++ b/drivers/power/Makefile
@@ -52,6 +52,7 @@ obj-$(CONFIG_CHARGER_MAX8997)	+= max8997_charger.o
 obj-$(CONFIG_CHARGER_MAX8998)	+= max8998_charger.o
 obj-$(CONFIG_CHARGER_BQ2415X)	+= bq2415x_charger.o
 obj-$(CONFIG_CHARGER_BQ24190)	+= bq24190_charger.o
+obj-$(CONFIG_CHARGER_BQ24735)	+= bq24735-charger.o
 obj-$(CONFIG_POWER_AVS)		+= avs/
 obj-$(CONFIG_CHARGER_SMB347)	+= smb347-charger.o
 obj-$(CONFIG_CHARGER_TPS65090)	+= tps65090-charger.o
diff --git a/drivers/power/bq24735-charger.c b/drivers/power/bq24735-charger.c
new file mode 100644
index 000000000000..d022b823305b
--- /dev/null
+++ b/drivers/power/bq24735-charger.c
@@ -0,0 +1,419 @@
+/*
+ * Battery charger driver for TI BQ24735
+ *
+ * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation;
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/err.h>
+#include <linux/gpio.h>
+#include <linux/i2c.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_gpio.h>
+#include <linux/power_supply.h>
+#include <linux/slab.h>
+
+#include <linux/power/bq24735-charger.h>
+
+#define BQ24735_CHG_OPT			0x12
+#define BQ24735_CHG_OPT_CHARGE_DISABLE	(1 << 0)
+#define BQ24735_CHG_OPT_AC_PRESENT	(1 << 4)
+#define BQ24735_CHARGE_CURRENT		0x14
+#define BQ24735_CHARGE_CURRENT_MASK	0x1fc0
+#define BQ24735_CHARGE_VOLTAGE		0x15
+#define BQ24735_CHARGE_VOLTAGE_MASK	0x7ff0
+#define BQ24735_INPUT_CURRENT		0x3f
+#define BQ24735_INPUT_CURRENT_MASK	0x1f80
+#define BQ24735_MANUFACTURER_ID		0xfe
+#define BQ24735_DEVICE_ID		0xff
+
+struct bq24735 {
+	struct power_supply	charger;
+	struct i2c_client	*client;
+	struct bq24735_platform	*pdata;
+};
+
+static inline struct bq24735 *to_bq24735(struct power_supply *psy)
+{
+	return container_of(psy, struct bq24735, charger);
+}
+
+static enum power_supply_property bq24735_charger_properties[] = {
+	POWER_SUPPLY_PROP_ONLINE,
+};
+
+static inline int bq24735_write_word(struct i2c_client *client, u8 reg,
+				     u16 value)
+{
+	return i2c_smbus_write_word_data(client, reg, le16_to_cpu(value));
+}
+
+static inline int bq24735_read_word(struct i2c_client *client, u8 reg)
+{
+	s32 ret = i2c_smbus_read_word_data(client, reg);
+
+	return ret < 0 ? ret : le16_to_cpu(ret);
+}
+
+static int bq24735_update_word(struct i2c_client *client, u8 reg,
+			       u16 mask, u16 value)
+{
+	unsigned int tmp;
+	int ret;
+
+	ret = bq24735_read_word(client, reg);
+	if (ret < 0)
+		return ret;
+
+	tmp = ret & ~mask;
+	tmp |= value & mask;
+
+	return bq24735_write_word(client, reg, tmp);
+}
+
+static inline int bq24735_enable_charging(struct bq24735 *charger)
+{
+	return bq24735_update_word(charger->client, BQ24735_CHG_OPT,
+				   BQ24735_CHG_OPT_CHARGE_DISABLE,
+				   ~BQ24735_CHG_OPT_CHARGE_DISABLE);
+}
+
+static inline int bq24735_disable_charging(struct bq24735 *charger)
+{
+	return bq24735_update_word(charger->client, BQ24735_CHG_OPT,
+				   BQ24735_CHG_OPT_CHARGE_DISABLE,
+				   BQ24735_CHG_OPT_CHARGE_DISABLE);
+}
+
+static int bq24735_config_charger(struct bq24735 *charger)
+{
+	struct bq24735_platform *pdata = charger->pdata;
+	int ret;
+	u16 value;
+
+	if (pdata->charge_current) {
+		value = pdata->charge_current & BQ24735_CHARGE_CURRENT_MASK;
+
+		ret = bq24735_write_word(charger->client,
+					 BQ24735_CHARGE_CURRENT, value);
+		if (ret < 0) {
+			dev_err(&charger->client->dev,
+				"Failed to write charger current : %d\n",
+				ret);
+			return ret;
+		}
+	}
+
+	if (pdata->charge_voltage) {
+		value = pdata->charge_voltage & BQ24735_CHARGE_VOLTAGE_MASK;
+
+		ret = bq24735_write_word(charger->client,
+					 BQ24735_CHARGE_VOLTAGE, value);
+		if (ret < 0) {
+			dev_err(&charger->client->dev,
+				"Failed to write charger voltage : %d\n",
+				ret);
+			return ret;
+		}
+	}
+
+	if (pdata->input_current) {
+		value = pdata->input_current & BQ24735_INPUT_CURRENT_MASK;
+
+		ret = bq24735_write_word(charger->client,
+					 BQ24735_INPUT_CURRENT, value);
+		if (ret < 0) {
+			dev_err(&charger->client->dev,
+				"Failed to write input current : %d\n",
+				ret);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+static bool bq24735_charger_is_present(struct bq24735 *charger)
+{
+	struct bq24735_platform *pdata = charger->pdata;
+	int ret;
+
+	if (pdata->status_gpio_valid) {
+		ret = gpio_get_value_cansleep(pdata->status_gpio);
+		return ret ^= pdata->status_gpio_active_low == 0;
+	} else {
+		int ac = 0;
+
+		ac = bq24735_read_word(charger->client, BQ24735_CHG_OPT);
+		if (ac < 0) {
+			dev_err(&charger->client->dev,
+				"Failed to read charger options : %d\n",
+				ac);
+			return false;
+		}
+		return (ac & BQ24735_CHG_OPT_AC_PRESENT) ? true : false;
+	}
+
+	return false;
+}
+
+static irqreturn_t bq24735_charger_isr(int irq, void *devid)
+{
+	struct power_supply *psy = devid;
+	struct bq24735 *charger = to_bq24735(psy);
+
+	if (bq24735_charger_is_present(charger))
+		bq24735_enable_charging(charger);
+	else
+		bq24735_disable_charging(charger);
+
+	power_supply_changed(psy);
+
+	return IRQ_HANDLED;
+}
+
+static int bq24735_charger_get_property(struct power_supply *psy,
+					enum power_supply_property psp,
+					union power_supply_propval *val)
+{
+	struct bq24735 *charger;
+
+	charger = container_of(psy, struct bq24735, charger);
+
+	switch (psp) {
+	case POWER_SUPPLY_PROP_ONLINE:
+		val->intval = bq24735_charger_is_present(charger) ? 1 : 0;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static struct bq24735_platform *bq24735_parse_dt_data(struct i2c_client *client)
+{
+	struct bq24735_platform *pdata;
+	struct device_node *np = client->dev.of_node;
+	u32 val;
+	int ret;
+	enum of_gpio_flags flags;
+
+	pdata = devm_kzalloc(&client->dev, sizeof(*pdata), GFP_KERNEL);
+	if (!pdata) {
+		dev_err(&client->dev,
+			"Memory alloc for bq24735 pdata failed\n");
+		return NULL;
+	}
+
+	pdata->status_gpio = of_get_named_gpio_flags(np, "ti,ac-detect-gpios",
+						     0, &flags);
+
+	if (flags & OF_GPIO_ACTIVE_LOW)
+		pdata->status_gpio_active_low = 1;
+
+	ret = of_property_read_u32(np, "ti,charge-current", &val);
+	if (!ret)
+		pdata->charge_current = val;
+
+	ret = of_property_read_u32(np, "ti,charge-voltage", &val);
+	if (!ret)
+		pdata->charge_voltage = val;
+
+	ret = of_property_read_u32(np, "ti,input-current", &val);
+	if (!ret)
+		pdata->input_current = val;
+
+	return pdata;
+}
+
+static int bq24735_charger_probe(struct i2c_client *client,
+				 const struct i2c_device_id *id)
+{
+	int ret;
+	struct bq24735 *charger;
+	struct power_supply *supply;
+	char *name;
+
+	charger = devm_kzalloc(&client->dev, sizeof(*charger), GFP_KERNEL);
+	if (!charger)
+		return -ENOMEM;
+
+	charger->pdata = client->dev.platform_data;
+
+	if (IS_ENABLED(CONFIG_OF) && !charger->pdata && client->dev.of_node)
+		charger->pdata = bq24735_parse_dt_data(client);
+
+	if (!charger->pdata) {
+		dev_err(&client->dev, "no platform data provided\n");
+		return -EINVAL;
+	}
+
+	name = (char *)charger->pdata->name;
+	if (!name) {
+		name = kasprintf(GFP_KERNEL, "bq24735@%s",
+				 dev_name(&client->dev));
+		if (!name) {
+			dev_err(&client->dev, "Failed to alloc device name\n");
+			return -ENOMEM;
+		}
+	}
+
+	charger->client = client;
+
+	supply = &charger->charger;
+
+	supply->name = name;
+	supply->type = POWER_SUPPLY_TYPE_MAINS;
+	supply->properties = bq24735_charger_properties;
+	supply->num_properties = ARRAY_SIZE(bq24735_charger_properties);
+	supply->get_property = bq24735_charger_get_property;
+	supply->supplied_to = charger->pdata->supplied_to;
+	supply->num_supplicants = charger->pdata->num_supplicants;
+	supply->of_node = client->dev.of_node;
+
+	i2c_set_clientdata(client, charger);
+
+	ret = bq24735_read_word(client, BQ24735_MANUFACTURER_ID);
+	if (ret < 0) {
+		dev_err(&client->dev, "Failed to read manufacturer id : %d\n",
+			ret);
+		goto err_free_name;
+	} else if (ret != 0x0040) {
+		dev_err(&client->dev,
+			"manufacturer id mismatch. 0x0040 != 0x%04x\n", ret);
+		ret = -ENODEV;
+		goto err_free_name;
+	}
+
+	ret = bq24735_read_word(client, BQ24735_DEVICE_ID);
+	if (ret < 0) {
+		dev_err(&client->dev, "Failed to read device id : %d\n", ret);
+		goto err_free_name;
+	} else if (ret != 0x000B) {
+		dev_err(&client->dev,
+			"device id mismatch. 0x000b != 0x%04x\n", ret);
+		ret = -ENODEV;
+		goto err_free_name;
+	}
+
+	if (gpio_is_valid(charger->pdata->status_gpio)) {
+		ret = devm_gpio_request(&client->dev,
+					charger->pdata->status_gpio,
+					name);
+		if (ret) {
+			dev_err(&client->dev,
+				"Failed GPIO request for GPIO %d: %d\n",
+				charger->pdata->status_gpio, ret);
+		}
+
+		charger->pdata->status_gpio_valid = !ret;
+	}
+
+	ret = bq24735_config_charger(charger);
+	if (ret < 0) {
+		dev_err(&client->dev, "failed in configuring charger");
+		goto err_free_name;
+	}
+
+	/* check for AC adapter presence */
+	if (bq24735_charger_is_present(charger)) {
+		ret = bq24735_enable_charging(charger);
+		if (ret < 0) {
+			dev_err(&client->dev, "Failed to enable charging\n");
+			goto err_free_name;
+		}
+	}
+
+	ret = power_supply_register(&client->dev, supply);
+	if (ret < 0) {
+		dev_err(&client->dev, "Failed to register power supply: %d\n",
+			ret);
+		goto err_free_name;
+	}
+
+	if (client->irq) {
+		ret = devm_request_threaded_irq(&client->dev, client->irq,
+						NULL, bq24735_charger_isr,
+						IRQF_TRIGGER_RISING |
+						IRQF_TRIGGER_FALLING |
+						IRQF_ONESHOT,
+						supply->name, supply);
+		if (ret) {
+			dev_err(&client->dev,
+				"Unable to register IRQ %d err %d\n",
+				client->irq, ret);
+			goto err_unregister_supply;
+		}
+	}
+
+	return 0;
+err_unregister_supply:
+	power_supply_unregister(supply);
+err_free_name:
+	if (name != charger->pdata->name)
+		kfree(name);
+
+	return ret;
+}
+
+static int bq24735_charger_remove(struct i2c_client *client)
+{
+	struct bq24735 *charger = i2c_get_clientdata(client);
+
+	if (charger->client->irq)
+		devm_free_irq(&charger->client->dev, charger->client->irq,
+			      &charger->charger);
+
+	power_supply_unregister(&charger->charger);
+
+	if (charger->charger.name != charger->pdata->name)
+		kfree(charger->charger.name);
+
+	return 0;
+}
+
+static const struct i2c_device_id bq24735_charger_id[] = {
+	{ "bq24735-charger", 0 },
+	{}
+};
+MODULE_DEVICE_TABLE(i2c, bq24735_charger_id);
+
+static const struct of_device_id bq24735_match_ids[] = {
+	{ .compatible = "ti,bq24735", },
+	{ /* end */ }
+};
+MODULE_DEVICE_TABLE(of, bq24735_match_ids);
+
+static struct i2c_driver bq24735_charger_driver = {
+	.driver = {
+		.name = "bq24735-charger",
+		.owner = THIS_MODULE,
+		.of_match_table = bq24735_match_ids,
+	},
+	.probe = bq24735_charger_probe,
+	.remove = bq24735_charger_remove,
+	.id_table = bq24735_charger_id,
+};
+
+module_i2c_driver(bq24735_charger_driver);
+
+MODULE_DESCRIPTION("bq24735 battery charging driver");
+MODULE_AUTHOR("Darbha Sriharsha <dsriharsha@nvidia.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/power/bq24735-charger.h b/include/linux/power/bq24735-charger.h
new file mode 100644
index 000000000000..f536164a6069
--- /dev/null
+++ b/include/linux/power/bq24735-charger.h
@@ -0,0 +1,39 @@
+/*
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef __CHARGER_BQ24735_H_
+#define __CHARGER_BQ24735_H_
+
+#include <linux/types.h>
+#include <linux/power_supply.h>
+
+struct bq24735_platform {
+	uint32_t charge_current;
+	uint32_t charge_voltage;
+	uint32_t input_current;
+
+	const char *name;
+
+	int status_gpio;
+	int status_gpio_active_low;
+	bool status_gpio_valid;
+
+	char **supplied_to;
+	size_t num_supplicants;
+};
+
+#endif /* __CHARGER_BQ24735_H_ */
-- 
cgit v1.2.3


From f84be2bd96a108b09c8440263fa3adb3fb225fa3 Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Wed, 23 Oct 2013 20:05:27 +0200
Subject: net: make net_get_random_once irq safe

I initial build non irq safe version of net_get_random_once because I
would liked to have the freedom to defer even the extraction process of
get_random_bytes until the nonblocking pool is fully seeded.

I don't think this is a good idea anymore and thus this patch makes
net_get_random_once irq safe. Now someone using net_get_random_once does
not need to care from where it is called.

Cc: David S. Miller <davem@davemloft.net>
Cc: Eric Dumazet <edumazet@google.com>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/net.h | 1 -
 net/core/utils.c    | 7 ++++---
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/net.h b/include/linux/net.h
index aca446b46754..b292a0435571 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -250,7 +250,6 @@ bool __net_get_random_once(void *buf, int nbytes, bool *done,
 #define ___NET_RANDOM_STATIC_KEY_INIT STATIC_KEY_INIT_FALSE
 #endif /* HAVE_JUMP_LABEL */
 
-/* BE CAREFUL: this function is not interrupt safe */
 #define net_get_random_once(buf, nbytes)				\
 	({								\
 		bool ___ret = false;					\
diff --git a/net/core/utils.c b/net/core/utils.c
index bf09371e19b1..2f737bf90b3f 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -370,16 +370,17 @@ bool __net_get_random_once(void *buf, int nbytes, bool *done,
 			   struct static_key *done_key)
 {
 	static DEFINE_SPINLOCK(lock);
+	unsigned long flags;
 
-	spin_lock_bh(&lock);
+	spin_lock_irqsave(&lock, flags);
 	if (*done) {
-		spin_unlock_bh(&lock);
+		spin_unlock_irqrestore(&lock, flags);
 		return false;
 	}
 
 	get_random_bytes(buf, nbytes);
 	*done = true;
-	spin_unlock_bh(&lock);
+	spin_unlock_irqrestore(&lock, flags);
 
 	__net_random_once_disable_jump(done_key);
 
-- 
cgit v1.2.3


From 7f29405403d7c17f539c099987972b862e7e5255 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Wed, 23 Oct 2013 16:02:42 -0700
Subject: net: fix rtnl notification in atomic context

commit 991fb3f74c "dev: always advertise rx_flags changes via netlink"
introduced rtnl notification from __dev_set_promiscuity(),
which can be called in atomic context.

Steps to reproduce:
ip tuntap add dev tap1 mode tap
ifconfig tap1 up
tcpdump -nei tap1 &
ip tuntap del dev tap1 mode tap

[  271.627994] device tap1 left promiscuous mode
[  271.639897] BUG: sleeping function called from invalid context at mm/slub.c:940
[  271.664491] in_atomic(): 1, irqs_disabled(): 0, pid: 3394, name: ip
[  271.677525] INFO: lockdep is turned off.
[  271.690503] CPU: 0 PID: 3394 Comm: ip Tainted: G        W    3.12.0-rc3+ #73
[  271.703996] Hardware name: System manufacturer System Product Name/P8Z77 WS, BIOS 3007 07/26/2012
[  271.731254]  ffffffff81a58506 ffff8807f0d57a58 ffffffff817544e5 ffff88082fa0f428
[  271.760261]  ffff8808071f5f40 ffff8807f0d57a88 ffffffff8108bad1 ffffffff81110ff8
[  271.790683]  0000000000000010 00000000000000d0 00000000000000d0 ffff8807f0d57af8
[  271.822332] Call Trace:
[  271.838234]  [<ffffffff817544e5>] dump_stack+0x55/0x76
[  271.854446]  [<ffffffff8108bad1>] __might_sleep+0x181/0x240
[  271.870836]  [<ffffffff81110ff8>] ? rcu_irq_exit+0x68/0xb0
[  271.887076]  [<ffffffff811a80be>] kmem_cache_alloc_node+0x4e/0x2a0
[  271.903368]  [<ffffffff810b4ddc>] ? vprintk_emit+0x1dc/0x5a0
[  271.919716]  [<ffffffff81614d67>] ? __alloc_skb+0x57/0x2a0
[  271.936088]  [<ffffffff810b4de0>] ? vprintk_emit+0x1e0/0x5a0
[  271.952504]  [<ffffffff81614d67>] __alloc_skb+0x57/0x2a0
[  271.968902]  [<ffffffff8163a0b2>] rtmsg_ifinfo+0x52/0x100
[  271.985302]  [<ffffffff8162ac6d>] __dev_notify_flags+0xad/0xc0
[  272.001642]  [<ffffffff8162ad0c>] __dev_set_promiscuity+0x8c/0x1c0
[  272.017917]  [<ffffffff81731ea5>] ? packet_notifier+0x5/0x380
[  272.033961]  [<ffffffff8162b109>] dev_set_promiscuity+0x29/0x50
[  272.049855]  [<ffffffff8172e937>] packet_dev_mc+0x87/0xc0
[  272.065494]  [<ffffffff81732052>] packet_notifier+0x1b2/0x380
[  272.080915]  [<ffffffff81731ea5>] ? packet_notifier+0x5/0x380
[  272.096009]  [<ffffffff81761c66>] notifier_call_chain+0x66/0x150
[  272.110803]  [<ffffffff8108503e>] __raw_notifier_call_chain+0xe/0x10
[  272.125468]  [<ffffffff81085056>] raw_notifier_call_chain+0x16/0x20
[  272.139984]  [<ffffffff81620190>] call_netdevice_notifiers_info+0x40/0x70
[  272.154523]  [<ffffffff816201d6>] call_netdevice_notifiers+0x16/0x20
[  272.168552]  [<ffffffff816224c5>] rollback_registered_many+0x145/0x240
[  272.182263]  [<ffffffff81622641>] rollback_registered+0x31/0x40
[  272.195369]  [<ffffffff816229c8>] unregister_netdevice_queue+0x58/0x90
[  272.208230]  [<ffffffff81547ca0>] __tun_detach+0x140/0x340
[  272.220686]  [<ffffffff81547ed6>] tun_chr_close+0x36/0x60

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Acked-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c |  4 ++--
 include/linux/rtnetlink.h       |  2 +-
 net/core/dev.c                  | 16 ++++++++--------
 net/core/rtnetlink.c            |  9 +++++----
 4 files changed, 16 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 2daa066c6cdd..a141f406cb98 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1213,7 +1213,7 @@ static int bond_master_upper_dev_link(struct net_device *bond_dev,
 	if (err)
 		return err;
 	slave_dev->flags |= IFF_SLAVE;
-	rtmsg_ifinfo(RTM_NEWLINK, slave_dev, IFF_SLAVE);
+	rtmsg_ifinfo(RTM_NEWLINK, slave_dev, IFF_SLAVE, GFP_KERNEL);
 	return 0;
 }
 
@@ -1222,7 +1222,7 @@ static void bond_upper_dev_unlink(struct net_device *bond_dev,
 {
 	netdev_upper_dev_unlink(slave_dev, bond_dev);
 	slave_dev->flags &= ~IFF_SLAVE;
-	rtmsg_ifinfo(RTM_NEWLINK, slave_dev, IFF_SLAVE);
+	rtmsg_ifinfo(RTM_NEWLINK, slave_dev, IFF_SLAVE, GFP_KERNEL);
 }
 
 /* enslave device <slave> to bond device <master> */
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index f28544b2f9af..939428ad25ac 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -15,7 +15,7 @@ extern int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics);
 extern int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst,
 			      u32 id, long expires, u32 error);
 
-extern void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change);
+void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change, gfp_t flags);
 
 /* RTNL is used as a global lock for all changes to network configuration  */
 extern void rtnl_lock(void);
diff --git a/net/core/dev.c b/net/core/dev.c
index bdffd654edc4..0054c8c75f50 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1203,7 +1203,7 @@ void netdev_state_change(struct net_device *dev)
 {
 	if (dev->flags & IFF_UP) {
 		call_netdevice_notifiers(NETDEV_CHANGE, dev);
-		rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
+		rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL);
 	}
 }
 EXPORT_SYMBOL(netdev_state_change);
@@ -1293,7 +1293,7 @@ int dev_open(struct net_device *dev)
 	if (ret < 0)
 		return ret;
 
-	rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
+	rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING, GFP_KERNEL);
 	call_netdevice_notifiers(NETDEV_UP, dev);
 
 	return ret;
@@ -1371,7 +1371,7 @@ static int dev_close_many(struct list_head *head)
 	__dev_close_many(head);
 
 	list_for_each_entry_safe(dev, tmp, head, close_list) {
-		rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
+		rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING, GFP_KERNEL);
 		call_netdevice_notifiers(NETDEV_DOWN, dev);
 		list_del_init(&dev->close_list);
 	}
@@ -5258,7 +5258,7 @@ void __dev_notify_flags(struct net_device *dev, unsigned int old_flags,
 	unsigned int changes = dev->flags ^ old_flags;
 
 	if (gchanges)
-		rtmsg_ifinfo(RTM_NEWLINK, dev, gchanges);
+		rtmsg_ifinfo(RTM_NEWLINK, dev, gchanges, GFP_ATOMIC);
 
 	if (changes & IFF_UP) {
 		if (dev->flags & IFF_UP)
@@ -5490,7 +5490,7 @@ static void rollback_registered_many(struct list_head *head)
 
 		if (!dev->rtnl_link_ops ||
 		    dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
-			rtmsg_ifinfo(RTM_DELLINK, dev, ~0U);
+			rtmsg_ifinfo(RTM_DELLINK, dev, ~0U, GFP_KERNEL);
 
 		/*
 		 *	Flush the unicast and multicast chains
@@ -5889,7 +5889,7 @@ int register_netdevice(struct net_device *dev)
 	 */
 	if (!dev->rtnl_link_ops ||
 	    dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
-		rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
+		rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U, GFP_KERNEL);
 
 out:
 	return ret;
@@ -6501,7 +6501,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
 	call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
 	rcu_barrier();
 	call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
-	rtmsg_ifinfo(RTM_DELLINK, dev, ~0U);
+	rtmsg_ifinfo(RTM_DELLINK, dev, ~0U, GFP_KERNEL);
 
 	/*
 	 *	Flush the unicast and multicast chains
@@ -6540,7 +6540,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
 	 *	Prevent userspace races by waiting until the network
 	 *	device is fully setup before sending notifications.
 	 */
-	rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
+	rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U, GFP_KERNEL);
 
 	synchronize_net();
 	err = 0;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 4aedf03da052..cf67144d3e3c 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1984,14 +1984,15 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
 	return skb->len;
 }
 
-void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change)
+void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change,
+		  gfp_t flags)
 {
 	struct net *net = dev_net(dev);
 	struct sk_buff *skb;
 	int err = -ENOBUFS;
 	size_t if_info_size;
 
-	skb = nlmsg_new((if_info_size = if_nlmsg_size(dev, 0)), GFP_KERNEL);
+	skb = nlmsg_new((if_info_size = if_nlmsg_size(dev, 0)), flags);
 	if (skb == NULL)
 		goto errout;
 
@@ -2002,7 +2003,7 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change)
 		kfree_skb(skb);
 		goto errout;
 	}
-	rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_KERNEL);
+	rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, flags);
 	return;
 errout:
 	if (err < 0)
@@ -2716,7 +2717,7 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi
 	case NETDEV_JOIN:
 		break;
 	default:
-		rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
+		rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL);
 		break;
 	}
 	return NOTIFY_DONE;
-- 
cgit v1.2.3


From 8fb479a47c869820966e7298f38038aa334d889c Mon Sep 17 00:00:00 2001
From: Antonio Quartulli <antonio@meshcoding.com>
Date: Wed, 23 Oct 2013 23:36:30 +0200
Subject: netpoll: fix rx_hook() interface by passing the skb

Right now skb->data is passed to rx_hook() even if the skb
has not been linearised and without giving rx_hook() a way
to linearise it.

Change the rx_hook() interface and make it accept the skb
and the offset to the UDP payload as arguments. rx_hook() is
also renamed to rx_skb_hook() to ensure that out of the tree
users notice the API change.

In this way any rx_skb_hook() implementation can perform all
the needed operations to properly (and safely) access the
skb data.

Signed-off-by: Antonio Quartulli <antonio@meshcoding.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netpoll.h |  5 +++--
 net/core/netpoll.c      | 31 ++++++++++++++++++-------------
 2 files changed, 21 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h
index f3c7c24bec1c..fbfdb9d8d3a7 100644
--- a/include/linux/netpoll.h
+++ b/include/linux/netpoll.h
@@ -24,7 +24,8 @@ struct netpoll {
 	struct net_device *dev;
 	char dev_name[IFNAMSIZ];
 	const char *name;
-	void (*rx_hook)(struct netpoll *, int, char *, int);
+	void (*rx_skb_hook)(struct netpoll *np, int source, struct sk_buff *skb,
+			    int offset, int len);
 
 	union inet_addr local_ip, remote_ip;
 	bool ipv6;
@@ -41,7 +42,7 @@ struct netpoll_info {
 	unsigned long rx_flags;
 	spinlock_t rx_lock;
 	struct semaphore dev_lock;
-	struct list_head rx_np; /* netpolls that registered an rx_hook */
+	struct list_head rx_np; /* netpolls that registered an rx_skb_hook */
 
 	struct sk_buff_head neigh_tx; /* list of neigh requests to reply to */
 	struct sk_buff_head txq;
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index fc75c9e461b8..8f971990677c 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -636,8 +636,9 @@ static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo
 
 			netpoll_send_skb(np, send_skb);
 
-			/* If there are several rx_hooks for the same address,
-			   we're fine by sending a single reply */
+			/* If there are several rx_skb_hooks for the same
+			 * address we're fine by sending a single reply
+			 */
 			break;
 		}
 		spin_unlock_irqrestore(&npinfo->rx_lock, flags);
@@ -719,8 +720,9 @@ static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo
 
 			netpoll_send_skb(np, send_skb);
 
-			/* If there are several rx_hooks for the same address,
-			   we're fine by sending a single reply */
+			/* If there are several rx_skb_hooks for the same
+			 * address, we're fine by sending a single reply
+			 */
 			break;
 		}
 		spin_unlock_irqrestore(&npinfo->rx_lock, flags);
@@ -756,11 +758,12 @@ static bool pkt_is_ns(struct sk_buff *skb)
 
 int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo)
 {
-	int proto, len, ulen;
-	int hits = 0;
+	int proto, len, ulen, data_len;
+	int hits = 0, offset;
 	const struct iphdr *iph;
 	struct udphdr *uh;
 	struct netpoll *np, *tmp;
+	uint16_t source;
 
 	if (list_empty(&npinfo->rx_np))
 		goto out;
@@ -820,7 +823,10 @@ int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo)
 
 		len -= iph->ihl*4;
 		uh = (struct udphdr *)(((char *)iph) + iph->ihl*4);
+		offset = (unsigned char *)(uh + 1) - skb->data;
 		ulen = ntohs(uh->len);
+		data_len = skb->len - offset;
+		source = ntohs(uh->source);
 
 		if (ulen != len)
 			goto out;
@@ -834,9 +840,7 @@ int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo)
 			if (np->local_port && np->local_port != ntohs(uh->dest))
 				continue;
 
-			np->rx_hook(np, ntohs(uh->source),
-				       (char *)(uh+1),
-				       ulen - sizeof(struct udphdr));
+			np->rx_skb_hook(np, source, skb, offset, data_len);
 			hits++;
 		}
 	} else {
@@ -859,7 +863,10 @@ int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo)
 		if (!pskb_may_pull(skb, sizeof(struct udphdr)))
 			goto out;
 		uh = udp_hdr(skb);
+		offset = (unsigned char *)(uh + 1) - skb->data;
 		ulen = ntohs(uh->len);
+		data_len = skb->len - offset;
+		source = ntohs(uh->source);
 		if (ulen != skb->len)
 			goto out;
 		if (udp6_csum_init(skb, uh, IPPROTO_UDP))
@@ -872,9 +879,7 @@ int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo)
 			if (np->local_port && np->local_port != ntohs(uh->dest))
 				continue;
 
-			np->rx_hook(np, ntohs(uh->source),
-				       (char *)(uh+1),
-				       ulen - sizeof(struct udphdr));
+			np->rx_skb_hook(np, source, skb, offset, data_len);
 			hits++;
 		}
 #endif
@@ -1062,7 +1067,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp)
 
 	npinfo->netpoll = np;
 
-	if (np->rx_hook) {
+	if (np->rx_skb_hook) {
 		spin_lock_irqsave(&npinfo->rx_lock, flags);
 		npinfo->rx_flags |= NETPOLL_RX_ENABLED;
 		list_add_tail(&np->rx, &npinfo->rx_np);
-- 
cgit v1.2.3


From 3915c3b5b10cd127198c1f9e2aefff7808500d92 Mon Sep 17 00:00:00 2001
From: Robert Hancock <hancockrwd@gmail.com>
Date: Mon, 21 Oct 2013 19:26:30 -0600
Subject: libata: Add some missing command descriptions

Add some missing command enumerations from the ATA-8 ACS-3 spec into
include/linux/ata.h, and add the corresponding human-readable command
descriptions in libata-eh.c.

Signed-off-by: Robert Hancock <hancockrwd@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 drivers/ata/libata-eh.c | 8 ++++++++
 include/linux/ata.h     | 7 +++++++
 2 files changed, 15 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index c69fcce505c0..ca88c48d08b8 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -2293,6 +2293,7 @@ const char *ata_get_cmd_descript(u8 command)
 		{ ATA_CMD_IDLE, 		"IDLE" },
 		{ ATA_CMD_EDD, 			"EXECUTE DEVICE DIAGNOSTIC" },
 		{ ATA_CMD_DOWNLOAD_MICRO,   	"DOWNLOAD MICROCODE" },
+		{ ATA_CMD_DOWNLOAD_MICRO_DMA,	"DOWNLOAD MICROCODE DMA" },
 		{ ATA_CMD_NOP,			"NOP" },
 		{ ATA_CMD_FLUSH, 		"FLUSH CACHE" },
 		{ ATA_CMD_FLUSH_EXT, 		"FLUSH CACHE EXT" },
@@ -2313,6 +2314,8 @@ const char *ata_get_cmd_descript(u8 command)
 		{ ATA_CMD_WRITE_QUEUED_FUA_EXT, "WRITE DMA QUEUED FUA EXT" },
 		{ ATA_CMD_FPDMA_READ,		"READ FPDMA QUEUED" },
 		{ ATA_CMD_FPDMA_WRITE,		"WRITE FPDMA QUEUED" },
+		{ ATA_CMD_FPDMA_SEND,		"SEND FPDMA QUEUED" },
+		{ ATA_CMD_FPDMA_RECV,		"RECEIVE FPDMA QUEUED" },
 		{ ATA_CMD_PIO_READ,		"READ SECTOR(S)" },
 		{ ATA_CMD_PIO_READ_EXT,		"READ SECTOR(S) EXT" },
 		{ ATA_CMD_PIO_WRITE,		"WRITE SECTOR(S)" },
@@ -2339,12 +2342,15 @@ const char *ata_get_cmd_descript(u8 command)
 		{ ATA_CMD_WRITE_LOG_EXT,	"WRITE LOG EXT" },
 		{ ATA_CMD_READ_LOG_DMA_EXT,	"READ LOG DMA EXT" },
 		{ ATA_CMD_WRITE_LOG_DMA_EXT, 	"WRITE LOG DMA EXT" },
+		{ ATA_CMD_TRUSTED_NONDATA,	"TRUSTED NON-DATA" },
 		{ ATA_CMD_TRUSTED_RCV,		"TRUSTED RECEIVE" },
 		{ ATA_CMD_TRUSTED_RCV_DMA, 	"TRUSTED RECEIVE DMA" },
 		{ ATA_CMD_TRUSTED_SND,		"TRUSTED SEND" },
 		{ ATA_CMD_TRUSTED_SND_DMA, 	"TRUSTED SEND DMA" },
 		{ ATA_CMD_PMP_READ,		"READ BUFFER" },
+		{ ATA_CMD_PMP_READ_DMA,		"READ BUFFER DMA" },
 		{ ATA_CMD_PMP_WRITE,		"WRITE BUFFER" },
+		{ ATA_CMD_PMP_WRITE_DMA,	"WRITE BUFFER DMA" },
 		{ ATA_CMD_CONF_OVERLAY,		"DEVICE CONFIGURATION OVERLAY" },
 		{ ATA_CMD_SEC_SET_PASS,		"SECURITY SET PASSWORD" },
 		{ ATA_CMD_SEC_UNLOCK,		"SECURITY UNLOCK" },
@@ -2363,6 +2369,8 @@ const char *ata_get_cmd_descript(u8 command)
 		{ ATA_CMD_CFA_TRANS_SECT,	"CFA TRANSLATE SECTOR" },
 		{ ATA_CMD_CFA_ERASE,		"CFA ERASE SECTORS" },
 		{ ATA_CMD_CFA_WRITE_MULT_NE, 	"CFA WRITE MULTIPLE WITHOUT ERASE" },
+		{ ATA_CMD_REQ_SENSE_DATA,	"REQUEST SENSE DATA EXT" },
+		{ ATA_CMD_SANITIZE_DEVICE,	"SANITIZE DEVICE" },
 		{ ATA_CMD_READ_LONG,		"READ LONG (with retries)" },
 		{ ATA_CMD_READ_LONG_ONCE,	"READ LONG (without retries)" },
 		{ ATA_CMD_WRITE_LONG,		"WRITE LONG (with retries)" },
diff --git a/include/linux/ata.h b/include/linux/ata.h
index bf4c69ca76df..f2f4d8da97c0 100644
--- a/include/linux/ata.h
+++ b/include/linux/ata.h
@@ -219,6 +219,7 @@ enum {
 	ATA_CMD_IDLE		= 0xE3, /* place in idle power mode */
 	ATA_CMD_EDD		= 0x90,	/* execute device diagnostic */
 	ATA_CMD_DOWNLOAD_MICRO  = 0x92,
+	ATA_CMD_DOWNLOAD_MICRO_DMA = 0x93,
 	ATA_CMD_NOP		= 0x00,
 	ATA_CMD_FLUSH		= 0xE7,
 	ATA_CMD_FLUSH_EXT	= 0xEA,
@@ -268,12 +269,15 @@ enum {
 	ATA_CMD_WRITE_LOG_EXT	= 0x3F,
 	ATA_CMD_READ_LOG_DMA_EXT = 0x47,
 	ATA_CMD_WRITE_LOG_DMA_EXT = 0x57,
+	ATA_CMD_TRUSTED_NONDATA	= 0x5B,
 	ATA_CMD_TRUSTED_RCV	= 0x5C,
 	ATA_CMD_TRUSTED_RCV_DMA = 0x5D,
 	ATA_CMD_TRUSTED_SND	= 0x5E,
 	ATA_CMD_TRUSTED_SND_DMA = 0x5F,
 	ATA_CMD_PMP_READ	= 0xE4,
+	ATA_CMD_PMP_READ_DMA	= 0xE9,
 	ATA_CMD_PMP_WRITE	= 0xE8,
+	ATA_CMD_PMP_WRITE_DMA	= 0xEB,
 	ATA_CMD_CONF_OVERLAY	= 0xB1,
 	ATA_CMD_SEC_SET_PASS	= 0xF1,
 	ATA_CMD_SEC_UNLOCK	= 0xF2,
@@ -292,6 +296,9 @@ enum {
 	ATA_CMD_CFA_TRANS_SECT	= 0x87,
 	ATA_CMD_CFA_ERASE	= 0xC0,
 	ATA_CMD_CFA_WRITE_MULT_NE = 0xCD,
+	ATA_CMD_REQ_SENSE_DATA  = 0x0B,
+	ATA_CMD_SANITIZE_DEVICE = 0xB4,
+
 	/* marked obsolete in the ATA/ATAPI-7 spec */
 	ATA_CMD_RESTORE		= 0x10,
 
-- 
cgit v1.2.3


From 84292b29475bc7bb6b04ae5360595bd816ca4efb Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Mon, 21 Oct 2013 13:25:36 +0200
Subject: percpu: remove this_cpu_xor() implementation

There is not a single user in the whole kernel.
Besides that this_cpu_xor() is broken anyway since it gets
translated to this_cpu_or() (see __pcpu_size_call() line).

So instead of fixing an unused definition just remove it.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/percpu.h | 32 --------------------------------
 1 file changed, 32 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index cc88172c7d9a..fd6ffe459aae 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -375,22 +375,6 @@ do {									\
 # define this_cpu_or(pcp, val)		__pcpu_size_call(this_cpu_or_, (pcp), (val))
 #endif
 
-#ifndef this_cpu_xor
-# ifndef this_cpu_xor_1
-#  define this_cpu_xor_1(pcp, val)	_this_cpu_generic_to_op((pcp), (val), ^=)
-# endif
-# ifndef this_cpu_xor_2
-#  define this_cpu_xor_2(pcp, val)	_this_cpu_generic_to_op((pcp), (val), ^=)
-# endif
-# ifndef this_cpu_xor_4
-#  define this_cpu_xor_4(pcp, val)	_this_cpu_generic_to_op((pcp), (val), ^=)
-# endif
-# ifndef this_cpu_xor_8
-#  define this_cpu_xor_8(pcp, val)	_this_cpu_generic_to_op((pcp), (val), ^=)
-# endif
-# define this_cpu_xor(pcp, val)		__pcpu_size_call(this_cpu_or_, (pcp), (val))
-#endif
-
 #define _this_cpu_generic_add_return(pcp, val)				\
 ({									\
 	typeof(pcp) ret__;						\
@@ -629,22 +613,6 @@ do {									\
 # define __this_cpu_or(pcp, val)	__pcpu_size_call(__this_cpu_or_, (pcp), (val))
 #endif
 
-#ifndef __this_cpu_xor
-# ifndef __this_cpu_xor_1
-#  define __this_cpu_xor_1(pcp, val)	__this_cpu_generic_to_op((pcp), (val), ^=)
-# endif
-# ifndef __this_cpu_xor_2
-#  define __this_cpu_xor_2(pcp, val)	__this_cpu_generic_to_op((pcp), (val), ^=)
-# endif
-# ifndef __this_cpu_xor_4
-#  define __this_cpu_xor_4(pcp, val)	__this_cpu_generic_to_op((pcp), (val), ^=)
-# endif
-# ifndef __this_cpu_xor_8
-#  define __this_cpu_xor_8(pcp, val)	__this_cpu_generic_to_op((pcp), (val), ^=)
-# endif
-# define __this_cpu_xor(pcp, val)	__pcpu_size_call(__this_cpu_xor_, (pcp), (val))
-#endif
-
 #define __this_cpu_generic_add_return(pcp, val)				\
 ({									\
 	__this_cpu_add(pcp, val);					\
-- 
cgit v1.2.3


From a6eaa2ae7820f0cca742917f85f34ef1eed2c95d Mon Sep 17 00:00:00 2001
From: Ezequiel Garcia <ezequiel.garcia@free-electrons.com>
Date: Mon, 2 Sep 2013 23:37:53 -0300
Subject: of_mtd: Add no-op stubs to support CONFIG_OF=n

Just like the rest of the subsystems, let's add the required no-op
functions to implement stubs when CONFIG_OF=n.

This prevents MTD drivers from having ugly ifdefs in their code,
and instead hide the ifdef monster in the header closet (far away
from people's sight).

Signed-off-by: Ezequiel Garcia <ezequiel.garcia@free-electrons.com>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 include/linux/of_mtd.h | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/of_mtd.h b/include/linux/of_mtd.h
index ed7f267e6389..6f10e938ff7e 100644
--- a/include/linux/of_mtd.h
+++ b/include/linux/of_mtd.h
@@ -10,10 +10,29 @@
 #define __LINUX_OF_NET_H
 
 #ifdef CONFIG_OF_MTD
+
 #include <linux/of.h>
 int of_get_nand_ecc_mode(struct device_node *np);
 int of_get_nand_bus_width(struct device_node *np);
 bool of_get_nand_on_flash_bbt(struct device_node *np);
-#endif
+
+#else /* CONFIG_OF_MTD */
+
+static inline int of_get_nand_ecc_mode(struct device_node *np)
+{
+	return -ENOSYS;
+}
+
+static inline int of_get_nand_bus_width(struct device_node *np)
+{
+	return -ENOSYS;
+}
+
+static inline bool of_get_nand_on_flash_bbt(struct device_node *np)
+{
+	return false;
+}
+
+#endif /* CONFIG_OF_MTD */
 
 #endif /* __LINUX_OF_MTD_H */
-- 
cgit v1.2.3


From aaadd9819a128fb7ad988b6808581e3175859e2f Mon Sep 17 00:00:00 2001
From: Brian Norris <computersforpeace@gmail.com>
Date: Fri, 23 Aug 2013 23:24:47 -0700
Subject: mtd: nand: remove obsolete 'ecclayout' field

This field is never used, except to print it out.

Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 drivers/mtd/nand/fsl_elbc_nand.c | 2 --
 drivers/mtd/nand/fsl_ifc_nand.c  | 2 --
 include/linux/mtd/nand.h         | 2 --
 3 files changed, 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/fsl_elbc_nand.c b/drivers/mtd/nand/fsl_elbc_nand.c
index 20657209a472..c6ef9f1c7a82 100644
--- a/drivers/mtd/nand/fsl_elbc_nand.c
+++ b/drivers/mtd/nand/fsl_elbc_nand.c
@@ -650,8 +650,6 @@ static int fsl_elbc_chip_init_tail(struct mtd_info *mtd)
 	        chip->page_shift);
 	dev_dbg(priv->dev, "fsl_elbc_init: nand->phys_erase_shift = %d\n",
 	        chip->phys_erase_shift);
-	dev_dbg(priv->dev, "fsl_elbc_init: nand->ecclayout = %p\n",
-	        chip->ecclayout);
 	dev_dbg(priv->dev, "fsl_elbc_init: nand->ecc.mode = %d\n",
 	        chip->ecc.mode);
 	dev_dbg(priv->dev, "fsl_elbc_init: nand->ecc.steps = %d\n",
diff --git a/drivers/mtd/nand/fsl_ifc_nand.c b/drivers/mtd/nand/fsl_ifc_nand.c
index 317a771f1587..47acda6c4e92 100644
--- a/drivers/mtd/nand/fsl_ifc_nand.c
+++ b/drivers/mtd/nand/fsl_ifc_nand.c
@@ -718,8 +718,6 @@ static int fsl_ifc_chip_init_tail(struct mtd_info *mtd)
 							chip->page_shift);
 	dev_dbg(priv->dev, "%s: nand->phys_erase_shift = %d\n", __func__,
 							chip->phys_erase_shift);
-	dev_dbg(priv->dev, "%s: nand->ecclayout = %p\n", __func__,
-							chip->ecclayout);
 	dev_dbg(priv->dev, "%s: nand->ecc.mode = %d\n", __func__,
 							chip->ecc.mode);
 	dev_dbg(priv->dev, "%s: nand->ecc.steps = %d\n", __func__,
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index ac8e89d5a792..129548169400 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -498,7 +498,6 @@ struct nand_buffers {
  *			supported, 0 otherwise.
  * @onfi_set_features:	[REPLACEABLE] set the features for ONFI nand
  * @onfi_get_features:	[REPLACEABLE] get the features for ONFI nand
- * @ecclayout:		[REPLACEABLE] the default ECC placement scheme
  * @bbt:		[INTERN] bad block table pointer
  * @bbt_td:		[REPLACEABLE] bad block table descriptor for flash
  *			lookup.
@@ -572,7 +571,6 @@ struct nand_chip {
 
 	uint8_t *oob_poi;
 	struct nand_hw_control *controller;
-	struct nand_ecclayout *ecclayout;
 
 	struct nand_ecc_ctrl ecc;
 	struct nand_buffers *buffers;
-- 
cgit v1.2.3


From 1d0ed69ddd714b6e2a974f42896463366923ded6 Mon Sep 17 00:00:00 2001
From: Huang Shijie <b32955@freescale.com>
Date: Wed, 25 Sep 2013 14:58:10 +0800
Subject: mtd: nand: add a helper to check the SLC/MLC nand chip

Add a helper to check if a nand chip is SLC or MLC.
This helper makes the code more readable.

Signed-off-by: Huang Shijie <b32955@freescale.com>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 drivers/mtd/nand/denali.c    |  2 +-
 drivers/mtd/nand/nand_base.c | 14 ++++++--------
 include/linux/mtd/nand.h     |  9 +++++++++
 3 files changed, 16 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/denali.c b/drivers/mtd/nand/denali.c
index 154b033b4e8e..370b9dd7a278 100644
--- a/drivers/mtd/nand/denali.c
+++ b/drivers/mtd/nand/denali.c
@@ -1520,7 +1520,7 @@ int denali_init(struct denali_nand_info *denali)
 	 * so just let controller do 15bit ECC for MLC and 8bit ECC for
 	 * SLC if possible.
 	 * */
-	if (denali->nand.cellinfo & NAND_CI_CELLTYPE_MSK &&
+	if (!nand_is_slc(&denali->nand) &&
 			(denali->mtd.oobsize > (denali->bbtskipbytes +
 			ECC_15BITS * (denali->mtd.writesize /
 			ECC_SECTOR_SIZE)))) {
diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 0b39d0c03f1e..6ea66270dbf5 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -3108,8 +3108,7 @@ static void nand_decode_ext_id(struct mtd_info *mtd, struct nand_chip *chip,
 	 * ID to decide what to do.
 	 */
 	if (id_len == 6 && id_data[0] == NAND_MFR_SAMSUNG &&
-			(chip->cellinfo & NAND_CI_CELLTYPE_MSK) &&
-			id_data[5] != 0x00) {
+			!nand_is_slc(chip) && id_data[5] != 0x00) {
 		/* Calc pagesize */
 		mtd->writesize = 2048 << (extid & 0x03);
 		extid >>= 2;
@@ -3141,7 +3140,7 @@ static void nand_decode_ext_id(struct mtd_info *mtd, struct nand_chip *chip,
 			(((extid >> 1) & 0x04) | (extid & 0x03));
 		*busw = 0;
 	} else if (id_len == 6 && id_data[0] == NAND_MFR_HYNIX &&
-			(chip->cellinfo & NAND_CI_CELLTYPE_MSK)) {
+			!nand_is_slc(chip)) {
 		unsigned int tmp;
 
 		/* Calc pagesize */
@@ -3204,7 +3203,7 @@ static void nand_decode_ext_id(struct mtd_info *mtd, struct nand_chip *chip,
 		 * - ID byte 5, bit[7]:    1 -> BENAND, 0 -> raw SLC
 		 */
 		if (id_len >= 6 && id_data[0] == NAND_MFR_TOSHIBA &&
-				!(chip->cellinfo & NAND_CI_CELLTYPE_MSK) &&
+				nand_is_slc(chip) &&
 				(id_data[5] & 0x7) == 0x6 /* 24nm */ &&
 				!(id_data[4] & 0x80) /* !BENAND */) {
 			mtd->oobsize = 32 * mtd->writesize >> 9;
@@ -3265,11 +3264,11 @@ static void nand_decode_bbm_options(struct mtd_info *mtd,
 	 * Micron devices with 2KiB pages and on SLC Samsung, Hynix, Toshiba,
 	 * AMD/Spansion, and Macronix.  All others scan only the first page.
 	 */
-	if ((chip->cellinfo & NAND_CI_CELLTYPE_MSK) &&
+	if (!nand_is_slc(chip) &&
 			(maf_id == NAND_MFR_SAMSUNG ||
 			 maf_id == NAND_MFR_HYNIX))
 		chip->bbt_options |= NAND_BBT_SCANLASTPAGE;
-	else if ((!(chip->cellinfo & NAND_CI_CELLTYPE_MSK) &&
+	else if ((nand_is_slc(chip) &&
 				(maf_id == NAND_MFR_SAMSUNG ||
 				 maf_id == NAND_MFR_HYNIX ||
 				 maf_id == NAND_MFR_TOSHIBA ||
@@ -3745,8 +3744,7 @@ int nand_scan_tail(struct mtd_info *mtd)
 	chip->ecc.total = chip->ecc.steps * chip->ecc.bytes;
 
 	/* Allow subpage writes up to ecc.steps. Not possible for MLC flash */
-	if (!(chip->options & NAND_NO_SUBPAGE_WRITE) &&
-	    !(chip->cellinfo & NAND_CI_CELLTYPE_MSK)) {
+	if (!(chip->options & NAND_NO_SUBPAGE_WRITE) && nand_is_slc(chip)) {
 		switch (chip->ecc.steps) {
 		case 2:
 			mtd->subpage_sft = 1;
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 129548169400..5c05bab0ad89 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -795,4 +795,13 @@ static inline int onfi_get_sync_timing_mode(struct nand_chip *chip)
 	return le16_to_cpu(chip->onfi_params.src_sync_timing_mode);
 }
 
+/*
+ * Check if it is a SLC nand.
+ * The !nand_is_slc() can be used to check the MLC/TLC nand chips.
+ * We do not distinguish the MLC and TLC now.
+ */
+static inline bool nand_is_slc(struct nand_chip *chip)
+{
+	return !(chip->cellinfo & NAND_CI_CELLTYPE_MSK);
+}
 #endif /* __LINUX_MTD_NAND_H */
-- 
cgit v1.2.3


From 7db906b79f69b6ed936a1ef1d788f02e3ad42462 Mon Sep 17 00:00:00 2001
From: Huang Shijie <b32955@freescale.com>
Date: Wed, 25 Sep 2013 14:58:11 +0800
Subject: mtd: nand: rename the cellinfo to bits_per_cell

The @cellinfo fields contains unused information, such as write caching,
internal chip numbering, etc. But we only use it to check the SLC or MLC.

This patch tries to make it more clear and simple, renames the @cellinfo
to @bits_per_cell.

In order to avoiding the bisect issue, this patch also does the following
changes:
  (0) add a macro NAND_CI_CELLTYPE_SHIFT to avoid the hardcode.

  (1) add a helper to parse out the cell type : nand_get_bits_per_cell()

  (2) parse out the cell type for extended-ID chips and the full-id nand chips.

Signed-off-by: Huang Shijie <b32955@freescale.com>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 drivers/mtd/nand/nand_base.c | 14 ++++++++++++--
 include/linux/mtd/nand.h     |  7 ++++---
 2 files changed, 16 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 6ea66270dbf5..5fb00957fd25 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -3082,6 +3082,16 @@ static int nand_id_len(u8 *id_data, int arrlen)
 	return arrlen;
 }
 
+/* Extract the bits of per cell from the 3rd byte of the extended ID */
+static int nand_get_bits_per_cell(u8 cellinfo)
+{
+	int bits;
+
+	bits = cellinfo & NAND_CI_CELLTYPE_MSK;
+	bits >>= NAND_CI_CELLTYPE_SHIFT;
+	return bits + 1;
+}
+
 /*
  * Many new NAND share similar device ID codes, which represent the size of the
  * chip. The rest of the parameters must be decoded according to generic or
@@ -3092,7 +3102,7 @@ static void nand_decode_ext_id(struct mtd_info *mtd, struct nand_chip *chip,
 {
 	int extid, id_len;
 	/* The 3rd id byte holds MLC / multichip data */
-	chip->cellinfo = id_data[2];
+	chip->bits_per_cell = nand_get_bits_per_cell(id_data[2]);
 	/* The 4th id byte is the important one */
 	extid = id_data[3];
 
@@ -3292,7 +3302,7 @@ static bool find_full_id_nand(struct mtd_info *mtd, struct nand_chip *chip,
 		mtd->erasesize = type->erasesize;
 		mtd->oobsize = type->oobsize;
 
-		chip->cellinfo = id_data[2];
+		chip->bits_per_cell = nand_get_bits_per_cell(id_data[2]);
 		chip->chipsize = (uint64_t)type->chipsize << 20;
 		chip->options |= type->options;
 		chip->ecc_strength_ds = NAND_ECC_STRENGTH(type);
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 5c05bab0ad89..9e6c8f9f306e 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -198,6 +198,7 @@ typedef enum {
 /* Cell info constants */
 #define NAND_CI_CHIPNR_MSK	0x03
 #define NAND_CI_CELLTYPE_MSK	0x0C
+#define NAND_CI_CELLTYPE_SHIFT	2
 
 /* Keep gcc happy */
 struct nand_chip;
@@ -477,7 +478,7 @@ struct nand_buffers {
  * @badblockbits:	[INTERN] minimum number of set bits in a good block's
  *			bad block marker position; i.e., BBM == 11110111b is
  *			not bad when badblockbits == 7
- * @cellinfo:		[INTERN] MLC/multichip data from chip ident
+ * @bits_per_cell:	[INTERN] number of bits per cell. i.e., 1 means SLC.
  * @ecc_strength_ds:	[INTERN] ECC correctability from the datasheet.
  *			Minimum amount of bit errors per @ecc_step_ds guaranteed
  *			to be correctable. If unknown, set to zero.
@@ -558,7 +559,7 @@ struct nand_chip {
 	int pagebuf;
 	unsigned int pagebuf_bitflips;
 	int subpagesize;
-	uint8_t cellinfo;
+	uint8_t bits_per_cell;
 	uint16_t ecc_strength_ds;
 	uint16_t ecc_step_ds;
 	int badblockpos;
@@ -802,6 +803,6 @@ static inline int onfi_get_sync_timing_mode(struct nand_chip *chip)
  */
 static inline bool nand_is_slc(struct nand_chip *chip)
 {
-	return !(chip->cellinfo & NAND_CI_CELLTYPE_MSK);
+	return chip->bits_per_cell == 1;
 }
 #endif /* __LINUX_MTD_NAND_H */
-- 
cgit v1.2.3


From 818b97392932ac4cecc36ab839957258367004a9 Mon Sep 17 00:00:00 2001
From: Huang Shijie <b32955@freescale.com>
Date: Wed, 25 Sep 2013 14:58:17 +0800
Subject: mtd: nand: add a helper to detect the nand type

This helper detects that whether the mtd's type is nand type.

Now, it's clear that the MTD_NANDFLASH stands for SLC nand only.
So use the mtd_type_is_nand() to replace the old check method
to do the nand type (include the SLC and MLC) check.

Signed-off-by: Huang Shijie <b32955@freescale.com>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 drivers/mtd/inftlcore.c         | 2 +-
 drivers/mtd/nftlcore.c          | 2 +-
 drivers/mtd/ssfdc.c             | 2 +-
 drivers/mtd/tests/nandbiterrs.c | 2 +-
 drivers/mtd/tests/oobtest.c     | 2 +-
 drivers/mtd/tests/pagetest.c    | 2 +-
 drivers/mtd/tests/subpagetest.c | 2 +-
 include/linux/mtd/mtd.h         | 5 +++++
 8 files changed, 12 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/inftlcore.c b/drivers/mtd/inftlcore.c
index 3af351484098..b66b541877f0 100644
--- a/drivers/mtd/inftlcore.c
+++ b/drivers/mtd/inftlcore.c
@@ -50,7 +50,7 @@ static void inftl_add_mtd(struct mtd_blktrans_ops *tr, struct mtd_info *mtd)
 	struct INFTLrecord *inftl;
 	unsigned long temp;
 
-	if (mtd->type != MTD_NANDFLASH || mtd->size > UINT_MAX)
+	if (!mtd_type_is_nand(mtd) || mtd->size > UINT_MAX)
 		return;
 	/* OK, this is moderately ugly.  But probably safe.  Alternatives? */
 	if (memcmp(mtd->name, "DiskOnChip", 10))
diff --git a/drivers/mtd/nftlcore.c b/drivers/mtd/nftlcore.c
index c5f4ebf4b384..46f27de018c3 100644
--- a/drivers/mtd/nftlcore.c
+++ b/drivers/mtd/nftlcore.c
@@ -50,7 +50,7 @@ static void nftl_add_mtd(struct mtd_blktrans_ops *tr, struct mtd_info *mtd)
 	struct NFTLrecord *nftl;
 	unsigned long temp;
 
-	if (mtd->type != MTD_NANDFLASH || mtd->size > UINT_MAX)
+	if (!mtd_type_is_nand(mtd) || mtd->size > UINT_MAX)
 		return;
 	/* OK, this is moderately ugly.  But probably safe.  Alternatives? */
 	if (memcmp(mtd->name, "DiskOnChip", 10))
diff --git a/drivers/mtd/ssfdc.c b/drivers/mtd/ssfdc.c
index ab2a52a039c3..daf82ba7aba0 100644
--- a/drivers/mtd/ssfdc.c
+++ b/drivers/mtd/ssfdc.c
@@ -290,7 +290,7 @@ static void ssfdcr_add_mtd(struct mtd_blktrans_ops *tr, struct mtd_info *mtd)
 	int cis_sector;
 
 	/* Check for small page NAND flash */
-	if (mtd->type != MTD_NANDFLASH || mtd->oobsize != OOB_SIZE ||
+	if (!mtd_type_is_nand(mtd) || mtd->oobsize != OOB_SIZE ||
 	    mtd->size > UINT_MAX)
 		return;
 
diff --git a/drivers/mtd/tests/nandbiterrs.c b/drivers/mtd/tests/nandbiterrs.c
index 3cd3aabbe1cd..6f976159611f 100644
--- a/drivers/mtd/tests/nandbiterrs.c
+++ b/drivers/mtd/tests/nandbiterrs.c
@@ -349,7 +349,7 @@ static int __init mtd_nandbiterrs_init(void)
 		goto exit_mtddev;
 	}
 
-	if (mtd->type != MTD_NANDFLASH) {
+	if (!mtd_type_is_nand(mtd)) {
 		pr_info("this test requires NAND flash\n");
 		err = -ENODEV;
 		goto exit_nand;
diff --git a/drivers/mtd/tests/oobtest.c b/drivers/mtd/tests/oobtest.c
index ff35c465bfee..2e9e2d11f204 100644
--- a/drivers/mtd/tests/oobtest.c
+++ b/drivers/mtd/tests/oobtest.c
@@ -289,7 +289,7 @@ static int __init mtd_oobtest_init(void)
 		return err;
 	}
 
-	if (mtd->type != MTD_NANDFLASH) {
+	if (!mtd_type_is_nand(mtd)) {
 		pr_info("this test requires NAND flash\n");
 		goto out;
 	}
diff --git a/drivers/mtd/tests/pagetest.c b/drivers/mtd/tests/pagetest.c
index 44b96e999ad4..ed2d3f656fd2 100644
--- a/drivers/mtd/tests/pagetest.c
+++ b/drivers/mtd/tests/pagetest.c
@@ -353,7 +353,7 @@ static int __init mtd_pagetest_init(void)
 		return err;
 	}
 
-	if (mtd->type != MTD_NANDFLASH) {
+	if (!mtd_type_is_nand(mtd)) {
 		pr_info("this test requires NAND flash\n");
 		goto out;
 	}
diff --git a/drivers/mtd/tests/subpagetest.c b/drivers/mtd/tests/subpagetest.c
index e2c0adf24cfc..a876371ad410 100644
--- a/drivers/mtd/tests/subpagetest.c
+++ b/drivers/mtd/tests/subpagetest.c
@@ -299,7 +299,7 @@ static int __init mtd_subpagetest_init(void)
 		return err;
 	}
 
-	if (mtd->type != MTD_NANDFLASH) {
+	if (!mtd_type_is_nand(mtd)) {
 		pr_info("this test requires NAND flash\n");
 		goto out;
 	}
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index f9bfe526d310..88409b813418 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -354,6 +354,11 @@ static inline int mtd_has_oob(const struct mtd_info *mtd)
 	return mtd->_read_oob && mtd->_write_oob;
 }
 
+static inline int mtd_type_is_nand(const struct mtd_info *mtd)
+{
+	return mtd->type == MTD_NANDFLASH || mtd->type == MTD_MLCNANDFLASH;
+}
+
 static inline int mtd_can_have_bb(const struct mtd_info *mtd)
 {
 	return !!mtd->_block_isbad;
-- 
cgit v1.2.3


From 8f2535b92d685c68db4bc699dd78462a646f6ef9 Mon Sep 17 00:00:00 2001
From: Chun-Yeow Yeoh <yeohchunyeow@cozybit.com>
Date: Mon, 14 Oct 2013 19:08:27 -0700
Subject: mac80211: process the CSA frame for mesh accordingly

Process the CSA frame according to the procedures define in IEEE Std
802.11-2012 section 10.9.8.4.3 as follow:
* The mesh channel switch parameters element (MCSP) must be availabe.
* If the MCSP's TTL is 1, drop the frame but still process the CSA.
* If the MCSP's precedence value is less than or equal to the current
  precedence value, drop the frame and do not process the CSA.
* The CSA frame is forwarded after TTL is decremented by 1 and the
  initiator field is set to 0. Transmit restrict field and others
  are maintained as is.
* No beacon or probe response frame are handled here.

Also, introduce the debug message used for mesh CSA purpose.

Signed-off-by: Chun-Yeow Yeoh <yeohchunyeow@cozybit.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h  | 20 +++++++++++
 net/mac80211/Kconfig       | 11 ++++++
 net/mac80211/debug.h       | 10 ++++++
 net/mac80211/ieee80211_i.h |  4 +++
 net/mac80211/mesh.c        | 83 ++++++++++++++++++++++++++++++++++++++++++++--
 net/mac80211/util.c        |  9 +++++
 6 files changed, 134 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 7c1e1ebc0e23..8c3b26a21574 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -696,6 +696,18 @@ struct ieee80211_sec_chan_offs_ie {
 	u8 sec_chan_offs;
 } __packed;
 
+/**
+ * struct ieee80211_mesh_chansw_params_ie - mesh channel switch parameters IE
+ *
+ * This structure represents the "Mesh Channel Switch Paramters element"
+ */
+struct ieee80211_mesh_chansw_params_ie {
+	u8 mesh_ttl;
+	u8 mesh_flags;
+	__le16 mesh_reason;
+	__le16 mesh_pre_value;
+} __packed;
+
 /**
  * struct ieee80211_wide_bw_chansw_ie - wide bandwidth channel switch IE
  */
@@ -750,6 +762,14 @@ enum mesh_config_capab_flags {
 	IEEE80211_MESHCONF_CAPAB_POWER_SAVE_LEVEL	= 0x40,
 };
 
+/**
+ * mesh channel switch parameters element's flag indicator
+ *
+ */
+#define WLAN_EID_CHAN_SWITCH_PARAM_TX_RESTRICT BIT(0)
+#define WLAN_EID_CHAN_SWITCH_PARAM_INITIATOR BIT(1)
+#define WLAN_EID_CHAN_SWITCH_PARAM_REASON BIT(2)
+
 /**
  * struct ieee80211_rann_ie
  *
diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig
index dc31ec3db404..97b5dcad5025 100644
--- a/net/mac80211/Kconfig
+++ b/net/mac80211/Kconfig
@@ -259,6 +259,17 @@ config MAC80211_MESH_SYNC_DEBUG
 
 	  Do not select this option.
 
+config MAC80211_MESH_CSA_DEBUG
+	bool "Verbose mesh channel switch debugging"
+	depends on MAC80211_DEBUG_MENU
+	depends on MAC80211_MESH
+	---help---
+	  Selecting this option causes mac80211 to print out very verbose mesh
+	  channel switch debugging messages (when mac80211 is taking part in a
+	  mesh network).
+
+	  Do not select this option.
+
 config MAC80211_MESH_PS_DEBUG
 	bool "Verbose mesh powersave debugging"
 	depends on MAC80211_DEBUG_MENU
diff --git a/net/mac80211/debug.h b/net/mac80211/debug.h
index 4ccc5ed6237d..493d68061f0c 100644
--- a/net/mac80211/debug.h
+++ b/net/mac80211/debug.h
@@ -44,6 +44,12 @@
 #define MAC80211_MESH_SYNC_DEBUG 0
 #endif
 
+#ifdef CONFIG_MAC80211_MESH_CSA_DEBUG
+#define MAC80211_MESH_CSA_DEBUG 1
+#else
+#define MAC80211_MESH_CSA_DEBUG 0
+#endif
+
 #ifdef CONFIG_MAC80211_MESH_PS_DEBUG
 #define MAC80211_MESH_PS_DEBUG 1
 #else
@@ -157,6 +163,10 @@ do {									\
 	_sdata_dbg(MAC80211_MESH_SYNC_DEBUG,				\
 		   sdata, fmt, ##__VA_ARGS__)
 
+#define mcsa_dbg(sdata, fmt, ...)					\
+	_sdata_dbg(MAC80211_MESH_CSA_DEBUG,				\
+		   sdata, fmt, ##__VA_ARGS__)
+
 #define mps_dbg(sdata, fmt, ...)					\
 	_sdata_dbg(MAC80211_MESH_PS_DEBUG,				\
 		   sdata, fmt, ##__VA_ARGS__)
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index cbaea32bccf1..4ebbcc6f67e0 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -603,6 +603,9 @@ struct ieee80211_if_mesh {
 	int ps_peers_light_sleep;
 	int ps_peers_deep_sleep;
 	struct ps_data ps;
+	/* Channel Switching Support */
+	bool chsw_init;
+	u16 pre_value;
 };
 
 #ifdef CONFIG_MAC80211_MESH
@@ -1252,6 +1255,7 @@ struct ieee802_11_elems {
 	const struct ieee80211_timeout_interval_ie *timeout_int;
 	const u8 *opmode_notif;
 	const struct ieee80211_sec_chan_offs_ie *sec_chan_offs;
+	const struct ieee80211_mesh_chansw_params_ie *mesh_chansw_params_ie;
 
 	/* length of them, respectively */
 	u8 ssid_len;
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 707ac61d63e5..0a3ccaa275f9 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -920,6 +920,82 @@ static void ieee80211_mesh_rx_bcn_presp(struct ieee80211_sub_if_data *sdata,
 			stype, mgmt, &elems, rx_status);
 }
 
+static int mesh_fwd_csa_frame(struct ieee80211_sub_if_data *sdata,
+			       struct ieee80211_mgmt *mgmt, size_t len)
+{
+	struct ieee80211_mgmt *mgmt_fwd;
+	struct sk_buff *skb;
+	struct ieee80211_local *local = sdata->local;
+	u8 *pos = mgmt->u.action.u.chan_switch.variable;
+	size_t offset_ttl;
+
+	skb = dev_alloc_skb(local->tx_headroom + len);
+	if (!skb)
+		return -ENOMEM;
+	skb_reserve(skb, local->tx_headroom);
+	mgmt_fwd = (struct ieee80211_mgmt *) skb_put(skb, len);
+
+	/* offset_ttl is based on whether the secondary channel
+	 * offset is available or not. Substract 1 from the mesh TTL
+	 * and disable the initiator flag before forwarding.
+	 */
+	offset_ttl = (len < 42) ? 7 : 10;
+	*(pos + offset_ttl) -= 1;
+	*(pos + offset_ttl + 1) &= ~WLAN_EID_CHAN_SWITCH_PARAM_INITIATOR;
+
+	memcpy(mgmt_fwd, mgmt, len);
+	eth_broadcast_addr(mgmt_fwd->da);
+	memcpy(mgmt_fwd->sa, sdata->vif.addr, ETH_ALEN);
+	memcpy(mgmt_fwd->bssid, sdata->vif.addr, ETH_ALEN);
+
+	ieee80211_tx_skb(sdata, skb);
+	return 0;
+}
+
+static void mesh_rx_csa_frame(struct ieee80211_sub_if_data *sdata,
+			      struct ieee80211_mgmt *mgmt, size_t len)
+{
+	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
+	struct ieee802_11_elems elems;
+	u16 pre_value;
+	bool block_tx, fwd_csa = true;
+	size_t baselen;
+	u8 *pos, ttl;
+
+	if (mgmt->u.action.u.measurement.action_code !=
+	    WLAN_ACTION_SPCT_CHL_SWITCH)
+		return;
+
+	pos = mgmt->u.action.u.chan_switch.variable;
+	baselen = offsetof(struct ieee80211_mgmt,
+			   u.action.u.chan_switch.variable);
+	ieee802_11_parse_elems(pos, len - baselen, false, &elems);
+
+	ttl = elems.mesh_chansw_params_ie->mesh_ttl;
+	if (!--ttl)
+		fwd_csa = false;
+
+	pre_value = le16_to_cpu(elems.mesh_chansw_params_ie->mesh_pre_value);
+	if (ifmsh->pre_value >= pre_value)
+		return;
+
+	ifmsh->pre_value = pre_value;
+
+	/* forward or re-broadcast the CSA frame */
+	if (fwd_csa) {
+		if (mesh_fwd_csa_frame(sdata, mgmt, len) < 0)
+			mcsa_dbg(sdata, "Failed to forward the CSA frame");
+	}
+
+	/* block the Tx only after forwarding the CSA frame if required */
+	block_tx = elems.mesh_chansw_params_ie->mesh_flags &
+		   WLAN_EID_CHAN_SWITCH_PARAM_TX_RESTRICT;
+	if (block_tx)
+		ieee80211_stop_queues_by_reason(&sdata->local->hw,
+				IEEE80211_MAX_QUEUE_MAP,
+				IEEE80211_QUEUE_STOP_REASON_CSA);
+}
+
 static void ieee80211_mesh_rx_mgmt_action(struct ieee80211_sub_if_data *sdata,
 					  struct ieee80211_mgmt *mgmt,
 					  size_t len,
@@ -939,6 +1015,9 @@ static void ieee80211_mesh_rx_mgmt_action(struct ieee80211_sub_if_data *sdata,
 		if (mesh_action_is_path_sel(mgmt))
 			mesh_rx_path_sel_frame(sdata, mgmt, len);
 		break;
+	case WLAN_CATEGORY_SPECTRUM_MGMT:
+		mesh_rx_csa_frame(sdata, mgmt, len);
+		break;
 	}
 }
 
@@ -1056,13 +1135,11 @@ void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata)
 		    (unsigned long) sdata);
 
 	ifmsh->accepting_plinks = true;
-	ifmsh->preq_id = 0;
-	ifmsh->sn = 0;
-	ifmsh->num_gates = 0;
 	atomic_set(&ifmsh->mpaths, 0);
 	mesh_rmc_init(sdata);
 	ifmsh->last_preq = jiffies;
 	ifmsh->next_perr = jiffies;
+	ifmsh->chsw_init = false;
 	/* Allocate all mesh structures when creating the first mesh interface. */
 	if (!mesh_allocated)
 		ieee80211s_init();
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 65ebe0c5e835..523783cedf6e 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -740,6 +740,7 @@ u32 ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
 		case WLAN_EID_TIMEOUT_INTERVAL:
 		case WLAN_EID_SECONDARY_CHANNEL_OFFSET:
 		case WLAN_EID_WIDE_BW_CHANNEL_SWITCH:
+		case WLAN_EID_CHAN_SWITCH_PARAM:
 		/*
 		 * not listing WLAN_EID_CHANNEL_SWITCH_WRAPPER -- it seems possible
 		 * that if the content gets bigger it might be needed more than once
@@ -905,6 +906,14 @@ u32 ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
 			}
 			elems->sec_chan_offs = (void *)pos;
 			break;
+		case WLAN_EID_CHAN_SWITCH_PARAM:
+			if (elen !=
+			    sizeof(*elems->mesh_chansw_params_ie)) {
+				elem_parse_failed = true;
+				break;
+			}
+			elems->mesh_chansw_params_ie = (void *)pos;
+			break;
 		case WLAN_EID_WIDE_BW_CHANNEL_SWITCH:
 			if (!action ||
 			    elen != sizeof(*elems->wide_bw_chansw_ie)) {
-- 
cgit v1.2.3


From 40b00b6b17c412ff9ff28631250d32ee29ff0006 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 17 Oct 2013 14:12:23 -0400
Subject: SUNRPC: Add a helper to switch the transport of an rpc_clnt

Add an RPC client API to redirect an rpc_clnt's transport from a
source server to a destination server during a migration event.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
[ cel: forward ported to 3.12 ]
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/clnt.h |   4 ++
 net/sunrpc/clnt.c           | 107 +++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 104 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 943ee895f2d1..8af2804bab16 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -136,6 +136,10 @@ void rpc_task_reset_client(struct rpc_task *task, struct rpc_clnt *clnt);
 struct rpc_clnt *rpc_clone_client(struct rpc_clnt *);
 struct rpc_clnt *rpc_clone_client_set_auth(struct rpc_clnt *,
 				rpc_authflavor_t);
+int		rpc_switch_client_transport(struct rpc_clnt *,
+				struct xprt_create *,
+				const struct rpc_timeout *);
+
 void		rpc_shutdown_client(struct rpc_clnt *);
 void		rpc_release_client(struct rpc_clnt *);
 void		rpc_task_release_client(struct rpc_task *);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 587f31e0577e..f167d9c8d7dd 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -25,6 +25,7 @@
 #include <linux/namei.h>
 #include <linux/mount.h>
 #include <linux/slab.h>
+#include <linux/rcupdate.h>
 #include <linux/utsname.h>
 #include <linux/workqueue.h>
 #include <linux/in.h>
@@ -264,6 +265,25 @@ void rpc_clients_notifier_unregister(void)
 	return rpc_pipefs_notifier_unregister(&rpc_clients_block);
 }
 
+static struct rpc_xprt *rpc_clnt_set_transport(struct rpc_clnt *clnt,
+		struct rpc_xprt *xprt,
+		const struct rpc_timeout *timeout)
+{
+	struct rpc_xprt *old;
+
+	spin_lock(&clnt->cl_lock);
+	old = clnt->cl_xprt;
+
+	if (!xprt_bound(xprt))
+		clnt->cl_autobind = 1;
+
+	clnt->cl_timeout = timeout;
+	rcu_assign_pointer(clnt->cl_xprt, xprt);
+	spin_unlock(&clnt->cl_lock);
+
+	return old;
+}
+
 static void rpc_clnt_set_nodename(struct rpc_clnt *clnt, const char *nodename)
 {
 	clnt->cl_nodelen = strlen(nodename);
@@ -338,7 +358,8 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args,
 {
 	const struct rpc_program *program = args->program;
 	const struct rpc_version *version;
-	struct rpc_clnt		*clnt = NULL;
+	struct rpc_clnt *clnt = NULL;
+	const struct rpc_timeout *timeout;
 	int err;
 
 	/* sanity check the name before trying to print it */
@@ -366,7 +387,6 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args,
 	if (err)
 		goto out_no_clid;
 
-	rcu_assign_pointer(clnt->cl_xprt, xprt);
 	clnt->cl_procinfo = version->procs;
 	clnt->cl_maxproc  = version->nrprocs;
 	clnt->cl_prog     = args->prognumber ? : program->number;
@@ -381,16 +401,15 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args,
 	INIT_LIST_HEAD(&clnt->cl_tasks);
 	spin_lock_init(&clnt->cl_lock);
 
-	if (!xprt_bound(xprt))
-		clnt->cl_autobind = 1;
-
-	clnt->cl_timeout = xprt->timeout;
+	timeout = xprt->timeout;
 	if (args->timeout != NULL) {
 		memcpy(&clnt->cl_timeout_default, args->timeout,
 				sizeof(clnt->cl_timeout_default));
-		clnt->cl_timeout = &clnt->cl_timeout_default;
+		timeout = &clnt->cl_timeout_default;
 	}
 
+	rpc_clnt_set_transport(clnt, xprt, timeout);
+
 	clnt->cl_rtt = &clnt->cl_rtt_default;
 	rpc_init_rtt(&clnt->cl_rtt_default, clnt->cl_timeout->to_initval);
 
@@ -601,6 +620,80 @@ rpc_clone_client_set_auth(struct rpc_clnt *clnt, rpc_authflavor_t flavor)
 }
 EXPORT_SYMBOL_GPL(rpc_clone_client_set_auth);
 
+/**
+ * rpc_switch_client_transport: switch the RPC transport on the fly
+ * @clnt: pointer to a struct rpc_clnt
+ * @args: pointer to the new transport arguments
+ * @timeout: pointer to the new timeout parameters
+ *
+ * This function allows the caller to switch the RPC transport for the
+ * rpc_clnt structure 'clnt' to allow it to connect to a mirrored NFS
+ * server, for instance.  It assumes that the caller has ensured that
+ * there are no active RPC tasks by using some form of locking.
+ *
+ * Returns zero if "clnt" is now using the new xprt.  Otherwise a
+ * negative errno is returned, and "clnt" continues to use the old
+ * xprt.
+ */
+int rpc_switch_client_transport(struct rpc_clnt *clnt,
+		struct xprt_create *args,
+		const struct rpc_timeout *timeout)
+{
+	const struct rpc_timeout *old_timeo;
+	rpc_authflavor_t pseudoflavor;
+	struct rpc_xprt *xprt, *old;
+	struct rpc_clnt *parent;
+	int err;
+
+	xprt = xprt_create_transport(args);
+	if (IS_ERR(xprt)) {
+		dprintk("RPC:       failed to create new xprt for clnt %p\n",
+			clnt);
+		return PTR_ERR(xprt);
+	}
+
+	pseudoflavor = clnt->cl_auth->au_flavor;
+
+	old_timeo = clnt->cl_timeout;
+	old = rpc_clnt_set_transport(clnt, xprt, timeout);
+
+	rpc_unregister_client(clnt);
+	__rpc_clnt_remove_pipedir(clnt);
+
+	/*
+	 * A new transport was created.  "clnt" therefore
+	 * becomes the root of a new cl_parent tree.  clnt's
+	 * children, if it has any, still point to the old xprt.
+	 */
+	parent = clnt->cl_parent;
+	clnt->cl_parent = clnt;
+
+	/*
+	 * The old rpc_auth cache cannot be re-used.  GSS
+	 * contexts in particular are between a single
+	 * client and server.
+	 */
+	err = rpc_client_register(clnt, pseudoflavor, NULL);
+	if (err)
+		goto out_revert;
+
+	synchronize_rcu();
+	if (parent != clnt)
+		rpc_release_client(parent);
+	xprt_put(old);
+	dprintk("RPC:       replaced xprt for clnt %p\n", clnt);
+	return 0;
+
+out_revert:
+	rpc_clnt_set_transport(clnt, old, old_timeo);
+	clnt->cl_parent = parent;
+	rpc_client_register(clnt, pseudoflavor, NULL);
+	xprt_put(xprt);
+	dprintk("RPC:       failed to switch xprt for clnt %p\n", clnt);
+	return err;
+}
+EXPORT_SYMBOL_GPL(rpc_switch_client_transport);
+
 /*
  * Kill all tasks for the given client.
  * XXX: kill their descendants as well?
-- 
cgit v1.2.3


From b03d735b4ca2375d2251195cd848713bc55e7d79 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 17 Oct 2013 14:12:50 -0400
Subject: NFS: Add method to retrieve fs_locations during migration recovery

The nfs4_proc_fs_locations() function is invoked during referral
processing to perform a GETATTR(fs_locations) on an object's parent
directory in order to discover the target of the referral.  It
performs a LOOKUP in the compound, so the client needs to know the
parent's file handle a priori.

Unfortunately this function is not adequate for handling migration
recovery.  We need to probe fs_locations information on an FSID, but
there's no parent directory available for many operations that
can return NFS4ERR_MOVED.

Another subtlety: recovering from NFS4ERR_LEASE_MOVED is a process
of walking over a list of known FSIDs that reside on the server, and
probing whether they have migrated.  Once the server has detected
that the client has probed all migrated file systems, it stops
returning NFS4ERR_LEASE_MOVED.

A minor version zero server needs to know what client ID is
requesting fs_locations information so it can clear the flag that
forces it to continue returning NFS4ERR_LEASE_MOVED.  This flag is
set per client ID and per FSID.  However, the client ID is not an
argument of either the PUTFH or GETATTR operations.  Later minor
versions have client ID information embedded in the compound's
SEQUENCE operation.

Therefore, by convention, minor version zero clients send a RENEW
operation in the same compound as the GETATTR(fs_locations), since
RENEW's one argument is a clientid4.  This allows a minor version
zero server to identify correctly the client that is probing for a
migration.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4_fs.h        |   4 ++
 fs/nfs/nfs4proc.c       | 153 ++++++++++++++++++++++++++++++++++++++++++++++++
 fs/nfs/nfs4xdr.c        |  46 +++++++++++----
 include/linux/nfs_xdr.h |   4 ++
 4 files changed, 196 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 4f48000b2b97..e59d3b4c7944 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -205,6 +205,8 @@ struct nfs4_state_maintenance_ops {
 };
 
 struct nfs4_mig_recovery_ops {
+	int (*get_locations)(struct inode *, struct nfs4_fs_locations *,
+		struct page *, struct rpc_cred *);
 };
 
 extern const struct dentry_operations nfs4_dentry_operations;
@@ -237,6 +239,8 @@ extern int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait);
 extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle);
 extern int nfs4_proc_fs_locations(struct rpc_clnt *, struct inode *, const struct qstr *,
 				  struct nfs4_fs_locations *, struct page *);
+extern int nfs4_proc_get_locations(struct inode *, struct nfs4_fs_locations *,
+		struct page *page, struct rpc_cred *);
 extern struct rpc_clnt *nfs4_proc_lookup_mountpoint(struct inode *, struct qstr *,
 			    struct nfs_fh *, struct nfs_fattr *);
 extern int nfs4_proc_secinfo(struct inode *, const struct qstr *, struct nfs4_secinfo_flavors *);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 2aacd1357dc5..c71c16e0ac22 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -5984,6 +5984,157 @@ int nfs4_proc_fs_locations(struct rpc_clnt *client, struct inode *dir,
 	return err;
 }
 
+/*
+ * This operation also signals the server that this client is
+ * performing migration recovery.  The server can stop returning
+ * NFS4ERR_LEASE_MOVED to this client.  A RENEW operation is
+ * appended to this compound to identify the client ID which is
+ * performing recovery.
+ */
+static int _nfs40_proc_get_locations(struct inode *inode,
+				     struct nfs4_fs_locations *locations,
+				     struct page *page, struct rpc_cred *cred)
+{
+	struct nfs_server *server = NFS_SERVER(inode);
+	struct rpc_clnt *clnt = server->client;
+	u32 bitmask[2] = {
+		[0] = FATTR4_WORD0_FSID | FATTR4_WORD0_FS_LOCATIONS,
+	};
+	struct nfs4_fs_locations_arg args = {
+		.clientid	= server->nfs_client->cl_clientid,
+		.fh		= NFS_FH(inode),
+		.page		= page,
+		.bitmask	= bitmask,
+		.migration	= 1,		/* skip LOOKUP */
+		.renew		= 1,		/* append RENEW */
+	};
+	struct nfs4_fs_locations_res res = {
+		.fs_locations	= locations,
+		.migration	= 1,
+		.renew		= 1,
+	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs4_procedures[NFSPROC4_CLNT_FS_LOCATIONS],
+		.rpc_argp	= &args,
+		.rpc_resp	= &res,
+		.rpc_cred	= cred,
+	};
+	unsigned long now = jiffies;
+	int status;
+
+	nfs_fattr_init(&locations->fattr);
+	locations->server = server;
+	locations->nlocations = 0;
+
+	nfs4_init_sequence(&args.seq_args, &res.seq_res, 0);
+	nfs4_set_sequence_privileged(&args.seq_args);
+	status = nfs4_call_sync_sequence(clnt, server, &msg,
+					&args.seq_args, &res.seq_res);
+	if (status)
+		return status;
+
+	renew_lease(server, now);
+	return 0;
+}
+
+#ifdef CONFIG_NFS_V4_1
+
+/*
+ * This operation also signals the server that this client is
+ * performing migration recovery.  The server can stop asserting
+ * SEQ4_STATUS_LEASE_MOVED for this client.  The client ID
+ * performing this operation is identified in the SEQUENCE
+ * operation in this compound.
+ *
+ * When the client supports GETATTR(fs_locations_info), it can
+ * be plumbed in here.
+ */
+static int _nfs41_proc_get_locations(struct inode *inode,
+				     struct nfs4_fs_locations *locations,
+				     struct page *page, struct rpc_cred *cred)
+{
+	struct nfs_server *server = NFS_SERVER(inode);
+	struct rpc_clnt *clnt = server->client;
+	u32 bitmask[2] = {
+		[0] = FATTR4_WORD0_FSID | FATTR4_WORD0_FS_LOCATIONS,
+	};
+	struct nfs4_fs_locations_arg args = {
+		.fh		= NFS_FH(inode),
+		.page		= page,
+		.bitmask	= bitmask,
+		.migration	= 1,		/* skip LOOKUP */
+	};
+	struct nfs4_fs_locations_res res = {
+		.fs_locations	= locations,
+		.migration	= 1,
+	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs4_procedures[NFSPROC4_CLNT_FS_LOCATIONS],
+		.rpc_argp	= &args,
+		.rpc_resp	= &res,
+		.rpc_cred	= cred,
+	};
+	int status;
+
+	nfs_fattr_init(&locations->fattr);
+	locations->server = server;
+	locations->nlocations = 0;
+
+	nfs4_init_sequence(&args.seq_args, &res.seq_res, 0);
+	nfs4_set_sequence_privileged(&args.seq_args);
+	status = nfs4_call_sync_sequence(clnt, server, &msg,
+					&args.seq_args, &res.seq_res);
+	if (status == NFS4_OK &&
+	    res.seq_res.sr_status_flags & SEQ4_STATUS_LEASE_MOVED)
+		status = -NFS4ERR_LEASE_MOVED;
+	return status;
+}
+
+#endif	/* CONFIG_NFS_V4_1 */
+
+/**
+ * nfs4_proc_get_locations - discover locations for a migrated FSID
+ * @inode: inode on FSID that is migrating
+ * @locations: result of query
+ * @page: buffer
+ * @cred: credential to use for this operation
+ *
+ * Returns NFS4_OK on success, a negative NFS4ERR status code if the
+ * operation failed, or a negative errno if a local error occurred.
+ *
+ * On success, "locations" is filled in, but if the server has
+ * no locations information, NFS_ATTR_FATTR_V4_LOCATIONS is not
+ * asserted.
+ *
+ * -NFS4ERR_LEASE_MOVED is returned if the server still has leases
+ * from this client that require migration recovery.
+ */
+int nfs4_proc_get_locations(struct inode *inode,
+			    struct nfs4_fs_locations *locations,
+			    struct page *page, struct rpc_cred *cred)
+{
+	struct nfs_server *server = NFS_SERVER(inode);
+	struct nfs_client *clp = server->nfs_client;
+	const struct nfs4_mig_recovery_ops *ops =
+					clp->cl_mvops->mig_recovery_ops;
+	struct nfs4_exception exception = { };
+	int status;
+
+	dprintk("%s: FSID %llx:%llx on \"%s\"\n", __func__,
+		(unsigned long long)server->fsid.major,
+		(unsigned long long)server->fsid.minor,
+		clp->cl_hostname);
+	nfs_display_fhandle(NFS_FH(inode), __func__);
+
+	do {
+		status = ops->get_locations(inode, locations, page, cred);
+		if (status != -NFS4ERR_DELAY)
+			break;
+		nfs4_handle_exception(server, status, &exception);
+	} while (exception.retry);
+	return status;
+}
+
 /**
  * If 'use_integrity' is true and the state managment nfs_client
  * cl_rpcclient is using krb5i/p, use the integrity protected cl_rpcclient
@@ -7882,10 +8033,12 @@ static const struct nfs4_state_maintenance_ops nfs41_state_renewal_ops = {
 #endif
 
 static const struct nfs4_mig_recovery_ops nfs40_mig_recovery_ops = {
+	.get_locations = _nfs40_proc_get_locations,
 };
 
 #if defined(CONFIG_NFS_V4_1)
 static const struct nfs4_mig_recovery_ops nfs41_mig_recovery_ops = {
+	.get_locations = _nfs41_proc_get_locations,
 };
 #endif	/* CONFIG_NFS_V4_1 */
 
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 79210d23f607..1854b04f828f 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -736,13 +736,15 @@ static int nfs4_stat_to_errno(int);
 				 encode_sequence_maxsz + \
 				 encode_putfh_maxsz + \
 				 encode_lookup_maxsz + \
-				 encode_fs_locations_maxsz)
+				 encode_fs_locations_maxsz + \
+				 encode_renew_maxsz)
 #define NFS4_dec_fs_locations_sz \
 				(compound_decode_hdr_maxsz + \
 				 decode_sequence_maxsz + \
 				 decode_putfh_maxsz + \
 				 decode_lookup_maxsz + \
-				 decode_fs_locations_maxsz)
+				 decode_fs_locations_maxsz + \
+				 decode_renew_maxsz)
 #define NFS4_enc_secinfo_sz 	(compound_encode_hdr_maxsz + \
 				encode_sequence_maxsz + \
 				encode_putfh_maxsz + \
@@ -2687,11 +2689,20 @@ static void nfs4_xdr_enc_fs_locations(struct rpc_rqst *req,
 
 	encode_compound_hdr(xdr, req, &hdr);
 	encode_sequence(xdr, &args->seq_args, &hdr);
-	encode_putfh(xdr, args->dir_fh, &hdr);
-	encode_lookup(xdr, args->name, &hdr);
-	replen = hdr.replen;	/* get the attribute into args->page */
-	encode_fs_locations(xdr, args->bitmask, &hdr);
+	if (args->migration) {
+		encode_putfh(xdr, args->fh, &hdr);
+		replen = hdr.replen;
+		encode_fs_locations(xdr, args->bitmask, &hdr);
+		if (args->renew)
+			encode_renew(xdr, args->clientid, &hdr);
+	} else {
+		encode_putfh(xdr, args->dir_fh, &hdr);
+		encode_lookup(xdr, args->name, &hdr);
+		replen = hdr.replen;
+		encode_fs_locations(xdr, args->bitmask, &hdr);
+	}
 
+	/* Set up reply kvec to capture returned fs_locations array. */
 	xdr_inline_pages(&req->rq_rcv_buf, replen << 2, &args->page,
 			0, PAGE_SIZE);
 	encode_nops(&hdr);
@@ -6824,13 +6835,26 @@ static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req,
 	status = decode_putfh(xdr);
 	if (status)
 		goto out;
-	status = decode_lookup(xdr);
-	if (status)
-		goto out;
-	xdr_enter_page(xdr, PAGE_SIZE);
-	status = decode_getfattr_generic(xdr, &res->fs_locations->fattr,
+	if (res->migration) {
+		xdr_enter_page(xdr, PAGE_SIZE);
+		status = decode_getfattr_generic(xdr,
+					&res->fs_locations->fattr,
 					 NULL, res->fs_locations,
 					 NULL, res->fs_locations->server);
+		if (status)
+			goto out;
+		if (res->renew)
+			status = decode_renew(xdr);
+	} else {
+		status = decode_lookup(xdr);
+		if (status)
+			goto out;
+		xdr_enter_page(xdr, PAGE_SIZE);
+		status = decode_getfattr_generic(xdr,
+					&res->fs_locations->fattr,
+					 NULL, res->fs_locations,
+					 NULL, res->fs_locations->server);
+	}
 out:
 	return status;
 }
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 49f52c8f4422..405dfadcbc3b 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1053,14 +1053,18 @@ struct nfs4_fs_locations {
 struct nfs4_fs_locations_arg {
 	struct nfs4_sequence_args	seq_args;
 	const struct nfs_fh *dir_fh;
+	const struct nfs_fh *fh;
 	const struct qstr *name;
 	struct page *page;
 	const u32 *bitmask;
+	clientid4 clientid;
+	unsigned char migration:1, renew:1;
 };
 
 struct nfs4_fs_locations_res {
 	struct nfs4_sequence_res	seq_res;
 	struct nfs4_fs_locations       *fs_locations;
+	unsigned char			migration:1, renew:1;
 };
 
 struct nfs4_secinfo4 {
-- 
cgit v1.2.3


From ce6cda1845cf2332d2c411a5c72cd166256b21da Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 17 Oct 2013 14:12:56 -0400
Subject: NFS: Add a super_block backpointer to the nfs_server struct

NFS_SB() returns the pointer to an nfs_server struct, given a
pointer to a super_block.  But we have no way to go back the other
way.

Add a super_block backpointer field so that, given an nfs_server
struct, it is easy to get to the filesystem's root dentry.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/super.c            | 1 +
 include/linux/nfs_fs_sb.h | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 3f5a7a85c9c2..e26647be69ce 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -2534,6 +2534,7 @@ struct dentry *nfs_fs_mount_common(struct nfs_server *server,
 			mntroot = ERR_PTR(error);
 			goto error_splat_bdi;
 		}
+		server->super = s;
 	}
 
 	if (!s->s_root) {
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index f9c0a6cb41e9..46b0cb4bee09 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -148,6 +148,7 @@ struct nfs_server {
 	__u64			maxfilesize;	/* maximum file size */
 	struct timespec		time_delta;	/* smallest time granularity */
 	unsigned long		mount_time;	/* when this fs was mounted */
+	struct super_block	*super;		/* VFS super block */
 	dev_t			s_dev;		/* superblock dev numbers */
 
 #ifdef CONFIG_NFS_FSCACHE
-- 
cgit v1.2.3


From c9fdeb280b8cc511c6730db9ab3973331ea344e0 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 17 Oct 2013 14:13:02 -0400
Subject: NFS: Add basic migration support to state manager thread

Migration recovery and state recovery must be serialized, so handle
both in the state manager thread.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4_fs.h          |   2 +
 fs/nfs/nfs4client.c       |   1 +
 fs/nfs/nfs4state.c        | 161 +++++++++++++++++++++++++++++++++++++++++++++-
 include/linux/nfs_fs_sb.h |   7 ++
 4 files changed, 168 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index e59d3b4c7944..94e783ffcf08 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -29,6 +29,7 @@ enum nfs4_client_state {
 	NFS4CLNT_SERVER_SCOPE_MISMATCH,
 	NFS4CLNT_PURGE_STATE,
 	NFS4CLNT_BIND_CONN_TO_SESSION,
+	NFS4CLNT_MOVED,
 };
 
 #define NFS4_RENEW_TIMEOUT		0x01
@@ -421,6 +422,7 @@ extern int nfs4_client_recover_expired_lease(struct nfs_client *clp);
 extern void nfs4_schedule_state_manager(struct nfs_client *);
 extern void nfs4_schedule_path_down_recovery(struct nfs_client *clp);
 extern int nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs4_state *);
+extern int nfs4_schedule_migration_recovery(const struct nfs_server *);
 extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags);
 extern void nfs41_handle_server_scope(struct nfs_client *,
 				      struct nfs41_server_scope **);
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index a2cba66f8345..0cde95e9bd6a 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -197,6 +197,7 @@ struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *cl_init)
 	clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED;
 	clp->cl_minorversion = cl_init->minorversion;
 	clp->cl_mvops = nfs_v4_minor_ops[cl_init->minorversion];
+	clp->cl_mig_gen = 1;
 	return clp;
 
 error:
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index fa2706a4225a..ba0db18b5f67 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -239,8 +239,6 @@ static void nfs4_end_drain_session(struct nfs_client *clp)
 	}
 }
 
-#if defined(CONFIG_NFS_V4_1)
-
 static int nfs4_drain_slot_tbl(struct nfs4_slot_table *tbl)
 {
 	set_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state);
@@ -270,6 +268,8 @@ static int nfs4_begin_drain_session(struct nfs_client *clp)
 	return nfs4_drain_slot_tbl(&ses->fc_slot_table);
 }
 
+#if defined(CONFIG_NFS_V4_1)
+
 static int nfs41_setup_state_renewal(struct nfs_client *clp)
 {
 	int status;
@@ -1197,6 +1197,42 @@ void nfs4_schedule_lease_recovery(struct nfs_client *clp)
 }
 EXPORT_SYMBOL_GPL(nfs4_schedule_lease_recovery);
 
+/**
+ * nfs4_schedule_migration_recovery - trigger migration recovery
+ *
+ * @server: FSID that is migrating
+ *
+ * Returns zero if recovery has started, otherwise a negative NFS4ERR
+ * value is returned.
+ */
+int nfs4_schedule_migration_recovery(const struct nfs_server *server)
+{
+	struct nfs_client *clp = server->nfs_client;
+
+	if (server->fh_expire_type != NFS4_FH_PERSISTENT) {
+		pr_err("NFS: volatile file handles not supported (server %s)\n",
+				clp->cl_hostname);
+		return -NFS4ERR_IO;
+	}
+
+	if (test_bit(NFS_MIG_FAILED, &server->mig_status))
+		return -NFS4ERR_IO;
+
+	dprintk("%s: scheduling migration recovery for (%llx:%llx) on %s\n",
+			__func__,
+			(unsigned long long)server->fsid.major,
+			(unsigned long long)server->fsid.minor,
+			clp->cl_hostname);
+
+	set_bit(NFS_MIG_IN_TRANSITION,
+			&((struct nfs_server *)server)->mig_status);
+	set_bit(NFS4CLNT_MOVED, &clp->cl_state);
+
+	nfs4_schedule_state_manager(clp);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nfs4_schedule_migration_recovery);
+
 int nfs4_wait_clnt_recover(struct nfs_client *clp)
 {
 	int res;
@@ -1826,6 +1862,119 @@ static int nfs4_purge_lease(struct nfs_client *clp)
 	return 0;
 }
 
+/*
+ * Try remote migration of one FSID from a source server to a
+ * destination server.  The source server provides a list of
+ * potential destinations.
+ *
+ * Returns zero or a negative NFS4ERR status code.
+ */
+static int nfs4_try_migration(struct nfs_server *server, struct rpc_cred *cred)
+{
+	struct nfs_client *clp = server->nfs_client;
+	struct nfs4_fs_locations *locations = NULL;
+	struct inode *inode;
+	struct page *page;
+	int status, result;
+
+	dprintk("--> %s: FSID %llx:%llx on \"%s\"\n", __func__,
+			(unsigned long long)server->fsid.major,
+			(unsigned long long)server->fsid.minor,
+			clp->cl_hostname);
+
+	result = 0;
+	page = alloc_page(GFP_KERNEL);
+	locations = kmalloc(sizeof(struct nfs4_fs_locations), GFP_KERNEL);
+	if (page == NULL || locations == NULL) {
+		dprintk("<-- %s: no memory\n", __func__);
+		goto out;
+	}
+
+	inode = server->super->s_root->d_inode;
+	result = nfs4_proc_get_locations(inode, locations, page, cred);
+	if (result) {
+		dprintk("<-- %s: failed to retrieve fs_locations: %d\n",
+			__func__, result);
+		goto out;
+	}
+
+	result = -NFS4ERR_NXIO;
+	if (!(locations->fattr.valid & NFS_ATTR_FATTR_V4_LOCATIONS)) {
+		dprintk("<-- %s: No fs_locations data, migration skipped\n",
+			__func__);
+		goto out;
+	}
+
+	nfs4_begin_drain_session(clp);
+
+	status = nfs4_replace_transport(server, locations);
+	if (status != 0) {
+		dprintk("<-- %s: failed to replace transport: %d\n",
+			__func__, status);
+		goto out;
+	}
+
+	result = 0;
+	dprintk("<-- %s: migration succeeded\n", __func__);
+
+out:
+	if (page != NULL)
+		__free_page(page);
+	kfree(locations);
+	if (result) {
+		pr_err("NFS: migration recovery failed (server %s)\n",
+				clp->cl_hostname);
+		set_bit(NFS_MIG_FAILED, &server->mig_status);
+	}
+	return result;
+}
+
+/*
+ * Returns zero or a negative NFS4ERR status code.
+ */
+static int nfs4_handle_migration(struct nfs_client *clp)
+{
+	const struct nfs4_state_maintenance_ops *ops =
+				clp->cl_mvops->state_renewal_ops;
+	struct nfs_server *server;
+	struct rpc_cred *cred;
+
+	dprintk("%s: migration reported on \"%s\"\n", __func__,
+			clp->cl_hostname);
+
+	spin_lock(&clp->cl_lock);
+	cred = ops->get_state_renewal_cred_locked(clp);
+	spin_unlock(&clp->cl_lock);
+	if (cred == NULL)
+		return -NFS4ERR_NOENT;
+
+	clp->cl_mig_gen++;
+restart:
+	rcu_read_lock();
+	list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
+		int status;
+
+		if (server->mig_gen == clp->cl_mig_gen)
+			continue;
+		server->mig_gen = clp->cl_mig_gen;
+
+		if (!test_and_clear_bit(NFS_MIG_IN_TRANSITION,
+						&server->mig_status))
+			continue;
+
+		rcu_read_unlock();
+		status = nfs4_try_migration(server, cred);
+		if (status < 0) {
+			put_rpccred(cred);
+			return status;
+		}
+		goto restart;
+	}
+	rcu_read_unlock();
+	put_rpccred(cred);
+	return 0;
+}
+
 /**
  * nfs4_discover_server_trunking - Detect server IP address trunking
  *
@@ -2154,7 +2303,13 @@ static void nfs4_state_manager(struct nfs_client *clp)
 			status = nfs4_check_lease(clp);
 			if (status < 0)
 				goto out_error;
-			continue;
+		}
+
+		if (test_and_clear_bit(NFS4CLNT_MOVED, &clp->cl_state)) {
+			section = "migration";
+			status = nfs4_handle_migration(clp);
+			if (status < 0)
+				goto out_error;
 		}
 
 		/* First recover reboot state... */
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 46b0cb4bee09..186ec4969313 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -79,6 +79,7 @@ struct nfs_client {
 	char			cl_ipaddr[48];
 	u32			cl_cb_ident;	/* v4.0 callback identifier */
 	const struct nfs4_minor_version_ops *cl_mvops;
+	unsigned long		cl_mig_gen;
 
 	/* NFSv4.0 transport blocking */
 	struct nfs4_slot_table	*cl_slot_tbl;
@@ -189,6 +190,12 @@ struct nfs_server {
 	struct list_head	state_owners_lru;
 	struct list_head	layouts;
 	struct list_head	delegations;
+
+	unsigned long		mig_gen;
+	unsigned long		mig_status;
+#define NFS_MIG_IN_TRANSITION		(1)
+#define NFS_MIG_FAILED			(2)
+
 	void (*destroy)(struct nfs_server *);
 
 	atomic_t active; /* Keep trace of any activity to this server */
-- 
cgit v1.2.3


From 44c9993384e9311cd56acf6ead3baffab616ae50 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 17 Oct 2013 14:13:30 -0400
Subject: NFS: Add method to detect whether an FSID is still on the server

Introduce a mechanism for probing a server to determine if an FSID
is present or absent.

The on-the-wire compound is different between minor version 0 and 1.
Minor version 0 appends a RENEW operation to identify which client
ID is probing.  Minor version 1 has a SEQUENCE operation in the
compound which effectively carries the same information.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4_fs.h        |   2 +
 fs/nfs/nfs4proc.c       | 128 ++++++++++++++++++++++++++++++++++++++++++++++++
 fs/nfs/nfs4xdr.c        |  67 ++++++++++++++++++++++++-
 include/linux/nfs4.h    |   1 +
 include/linux/nfs_xdr.h |  13 +++++
 5 files changed, 209 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 94e783ffcf08..2f0f8c216441 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -208,6 +208,7 @@ struct nfs4_state_maintenance_ops {
 struct nfs4_mig_recovery_ops {
 	int (*get_locations)(struct inode *, struct nfs4_fs_locations *,
 		struct page *, struct rpc_cred *);
+	int (*fsid_present)(struct inode *, struct rpc_cred *);
 };
 
 extern const struct dentry_operations nfs4_dentry_operations;
@@ -242,6 +243,7 @@ extern int nfs4_proc_fs_locations(struct rpc_clnt *, struct inode *, const struc
 				  struct nfs4_fs_locations *, struct page *);
 extern int nfs4_proc_get_locations(struct inode *, struct nfs4_fs_locations *,
 		struct page *page, struct rpc_cred *);
+extern int nfs4_proc_fsid_present(struct inode *, struct rpc_cred *);
 extern struct rpc_clnt *nfs4_proc_lookup_mountpoint(struct inode *, struct qstr *,
 			    struct nfs_fh *, struct nfs_fattr *);
 extern int nfs4_proc_secinfo(struct inode *, const struct qstr *, struct nfs4_secinfo_flavors *);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 552e4f7a8225..01b90bd341bb 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -6153,6 +6153,132 @@ int nfs4_proc_get_locations(struct inode *inode,
 	return status;
 }
 
+/*
+ * This operation also signals the server that this client is
+ * performing "lease moved" recovery.  The server can stop
+ * returning NFS4ERR_LEASE_MOVED to this client.  A RENEW operation
+ * is appended to this compound to identify the client ID which is
+ * performing recovery.
+ */
+static int _nfs40_proc_fsid_present(struct inode *inode, struct rpc_cred *cred)
+{
+	struct nfs_server *server = NFS_SERVER(inode);
+	struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
+	struct rpc_clnt *clnt = server->client;
+	struct nfs4_fsid_present_arg args = {
+		.fh		= NFS_FH(inode),
+		.clientid	= clp->cl_clientid,
+		.renew		= 1,		/* append RENEW */
+	};
+	struct nfs4_fsid_present_res res = {
+		.renew		= 1,
+	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs4_procedures[NFSPROC4_CLNT_FSID_PRESENT],
+		.rpc_argp	= &args,
+		.rpc_resp	= &res,
+		.rpc_cred	= cred,
+	};
+	unsigned long now = jiffies;
+	int status;
+
+	res.fh = nfs_alloc_fhandle();
+	if (res.fh == NULL)
+		return -ENOMEM;
+
+	nfs4_init_sequence(&args.seq_args, &res.seq_res, 0);
+	nfs4_set_sequence_privileged(&args.seq_args);
+	status = nfs4_call_sync_sequence(clnt, server, &msg,
+						&args.seq_args, &res.seq_res);
+	nfs_free_fhandle(res.fh);
+	if (status)
+		return status;
+
+	do_renew_lease(clp, now);
+	return 0;
+}
+
+#ifdef CONFIG_NFS_V4_1
+
+/*
+ * This operation also signals the server that this client is
+ * performing "lease moved" recovery.  The server can stop asserting
+ * SEQ4_STATUS_LEASE_MOVED for this client.  The client ID performing
+ * this operation is identified in the SEQUENCE operation in this
+ * compound.
+ */
+static int _nfs41_proc_fsid_present(struct inode *inode, struct rpc_cred *cred)
+{
+	struct nfs_server *server = NFS_SERVER(inode);
+	struct rpc_clnt *clnt = server->client;
+	struct nfs4_fsid_present_arg args = {
+		.fh		= NFS_FH(inode),
+	};
+	struct nfs4_fsid_present_res res = {
+	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs4_procedures[NFSPROC4_CLNT_FSID_PRESENT],
+		.rpc_argp	= &args,
+		.rpc_resp	= &res,
+		.rpc_cred	= cred,
+	};
+	int status;
+
+	res.fh = nfs_alloc_fhandle();
+	if (res.fh == NULL)
+		return -ENOMEM;
+
+	nfs4_init_sequence(&args.seq_args, &res.seq_res, 0);
+	nfs4_set_sequence_privileged(&args.seq_args);
+	status = nfs4_call_sync_sequence(clnt, server, &msg,
+						&args.seq_args, &res.seq_res);
+	nfs_free_fhandle(res.fh);
+	if (status == NFS4_OK &&
+	    res.seq_res.sr_status_flags & SEQ4_STATUS_LEASE_MOVED)
+		status = -NFS4ERR_LEASE_MOVED;
+	return status;
+}
+
+#endif	/* CONFIG_NFS_V4_1 */
+
+/**
+ * nfs4_proc_fsid_present - Is this FSID present or absent on server?
+ * @inode: inode on FSID to check
+ * @cred: credential to use for this operation
+ *
+ * Server indicates whether the FSID is present, moved, or not
+ * recognized.  This operation is necessary to clear a LEASE_MOVED
+ * condition for this client ID.
+ *
+ * Returns NFS4_OK if the FSID is present on this server,
+ * -NFS4ERR_MOVED if the FSID is no longer present, a negative
+ *  NFS4ERR code if some error occurred on the server, or a
+ *  negative errno if a local failure occurred.
+ */
+int nfs4_proc_fsid_present(struct inode *inode, struct rpc_cred *cred)
+{
+	struct nfs_server *server = NFS_SERVER(inode);
+	struct nfs_client *clp = server->nfs_client;
+	const struct nfs4_mig_recovery_ops *ops =
+					clp->cl_mvops->mig_recovery_ops;
+	struct nfs4_exception exception = { };
+	int status;
+
+	dprintk("%s: FSID %llx:%llx on \"%s\"\n", __func__,
+		(unsigned long long)server->fsid.major,
+		(unsigned long long)server->fsid.minor,
+		clp->cl_hostname);
+	nfs_display_fhandle(NFS_FH(inode), __func__);
+
+	do {
+		status = ops->fsid_present(inode, cred);
+		if (status != -NFS4ERR_DELAY)
+			break;
+		nfs4_handle_exception(server, status, &exception);
+	} while (exception.retry);
+	return status;
+}
+
 /**
  * If 'use_integrity' is true and the state managment nfs_client
  * cl_rpcclient is using krb5i/p, use the integrity protected cl_rpcclient
@@ -8052,11 +8178,13 @@ static const struct nfs4_state_maintenance_ops nfs41_state_renewal_ops = {
 
 static const struct nfs4_mig_recovery_ops nfs40_mig_recovery_ops = {
 	.get_locations = _nfs40_proc_get_locations,
+	.fsid_present = _nfs40_proc_fsid_present,
 };
 
 #if defined(CONFIG_NFS_V4_1)
 static const struct nfs4_mig_recovery_ops nfs41_mig_recovery_ops = {
 	.get_locations = _nfs41_proc_get_locations,
+	.fsid_present = _nfs41_proc_fsid_present,
 };
 #endif	/* CONFIG_NFS_V4_1 */
 
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 1854b04f828f..f903389d90f1 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -595,11 +595,13 @@ static int nfs4_stat_to_errno(int);
 #define NFS4_enc_getattr_sz	(compound_encode_hdr_maxsz + \
 				encode_sequence_maxsz + \
 				encode_putfh_maxsz + \
-				encode_getattr_maxsz)
+				encode_getattr_maxsz + \
+				encode_renew_maxsz)
 #define NFS4_dec_getattr_sz	(compound_decode_hdr_maxsz + \
 				decode_sequence_maxsz + \
 				decode_putfh_maxsz + \
-				decode_getattr_maxsz)
+				decode_getattr_maxsz + \
+				decode_renew_maxsz)
 #define NFS4_enc_lookup_sz	(compound_encode_hdr_maxsz + \
 				encode_sequence_maxsz + \
 				encode_putfh_maxsz + \
@@ -753,6 +755,18 @@ static int nfs4_stat_to_errno(int);
 				decode_sequence_maxsz + \
 				decode_putfh_maxsz + \
 				decode_secinfo_maxsz)
+#define NFS4_enc_fsid_present_sz \
+				(compound_encode_hdr_maxsz + \
+				 encode_sequence_maxsz + \
+				 encode_putfh_maxsz + \
+				 encode_getfh_maxsz + \
+				 encode_renew_maxsz)
+#define NFS4_dec_fsid_present_sz \
+				(compound_decode_hdr_maxsz + \
+				 decode_sequence_maxsz + \
+				 decode_putfh_maxsz + \
+				 decode_getfh_maxsz + \
+				 decode_renew_maxsz)
 #if defined(CONFIG_NFS_V4_1)
 #define NFS4_enc_bind_conn_to_session_sz \
 				(compound_encode_hdr_maxsz + \
@@ -2726,6 +2740,26 @@ static void nfs4_xdr_enc_secinfo(struct rpc_rqst *req,
 	encode_nops(&hdr);
 }
 
+/*
+ * Encode FSID_PRESENT request
+ */
+static void nfs4_xdr_enc_fsid_present(struct rpc_rqst *req,
+				      struct xdr_stream *xdr,
+				      struct nfs4_fsid_present_arg *args)
+{
+	struct compound_hdr hdr = {
+		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
+	};
+
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->seq_args, &hdr);
+	encode_putfh(xdr, args->fh, &hdr);
+	encode_getfh(xdr, &hdr);
+	if (args->renew)
+		encode_renew(xdr, args->clientid, &hdr);
+	encode_nops(&hdr);
+}
+
 #if defined(CONFIG_NFS_V4_1)
 /*
  * BIND_CONN_TO_SESSION request
@@ -6883,6 +6917,34 @@ out:
 	return status;
 }
 
+/*
+ * Decode FSID_PRESENT response
+ */
+static int nfs4_xdr_dec_fsid_present(struct rpc_rqst *rqstp,
+				     struct xdr_stream *xdr,
+				     struct nfs4_fsid_present_res *res)
+{
+	struct compound_hdr hdr;
+	int status;
+
+	status = decode_compound_hdr(xdr, &hdr);
+	if (status)
+		goto out;
+	status = decode_sequence(xdr, &res->seq_res, rqstp);
+	if (status)
+		goto out;
+	status = decode_putfh(xdr);
+	if (status)
+		goto out;
+	status = decode_getfh(xdr, res->fh);
+	if (status)
+		goto out;
+	if (res->renew)
+		status = decode_renew(xdr);
+out:
+	return status;
+}
+
 #if defined(CONFIG_NFS_V4_1)
 /*
  * Decode BIND_CONN_TO_SESSION response
@@ -7397,6 +7459,7 @@ struct rpc_procinfo	nfs4_procedures[] = {
 	PROC(FS_LOCATIONS,	enc_fs_locations,	dec_fs_locations),
 	PROC(RELEASE_LOCKOWNER,	enc_release_lockowner,	dec_release_lockowner),
 	PROC(SECINFO,		enc_secinfo,		dec_secinfo),
+	PROC(FSID_PRESENT,	enc_fsid_present,	dec_fsid_present),
 #if defined(CONFIG_NFS_V4_1)
 	PROC(EXCHANGE_ID,	enc_exchange_id,	dec_exchange_id),
 	PROC(CREATE_SESSION,	enc_create_session,	dec_create_session),
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index e36dee52f224..c56fa8fedce9 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -460,6 +460,7 @@ enum {
 	NFSPROC4_CLNT_FS_LOCATIONS,
 	NFSPROC4_CLNT_RELEASE_LOCKOWNER,
 	NFSPROC4_CLNT_SECINFO,
+	NFSPROC4_CLNT_FSID_PRESENT,
 
 	/* nfs41 */
 	NFSPROC4_CLNT_EXCHANGE_ID,
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 405dfadcbc3b..8fe5b940c5f2 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1088,6 +1088,19 @@ struct nfs4_secinfo_res {
 	struct nfs4_secinfo_flavors	*flavors;
 };
 
+struct nfs4_fsid_present_arg {
+	struct nfs4_sequence_args	seq_args;
+	const struct nfs_fh		*fh;
+	clientid4			clientid;
+	unsigned char			renew:1;
+};
+
+struct nfs4_fsid_present_res {
+	struct nfs4_sequence_res	seq_res;
+	struct nfs_fh			*fh;
+	unsigned char			renew:1;
+};
+
 #endif /* CONFIG_NFS_V4 */
 
 struct nfstime4 {
-- 
cgit v1.2.3


From 3228f48be2d19b2dd90db96ec16a40187a2946f3 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 28 Oct 2013 13:33:58 -0600
Subject: blk-mq: fix for flush deadlock

The flush state machine takes in a struct request, which then is
submitted multiple times to the underling driver.  The old block code
requeses the same request for each of those, so it does not have an
issue with tapping into the request pool.  The new one on the other hand
allocates a new request for each of the actualy steps of the flush
sequence. If have already allocated all of the tags for IO, we will
fail allocating the flush request.

Set aside a reserved request just for flushes.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c       |  2 +-
 block/blk-flush.c      |  2 +-
 block/blk-mq.c         | 14 ++++++++++++--
 include/linux/blk-mq.h |  2 +-
 4 files changed, 15 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index 3bb9e9f7f87e..9677c6525ed8 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1102,7 +1102,7 @@ static struct request *blk_old_get_request(struct request_queue *q, int rw,
 struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
 {
 	if (q->mq_ops)
-		return blk_mq_alloc_request(q, rw, gfp_mask);
+		return blk_mq_alloc_request(q, rw, gfp_mask, false);
 	else
 		return blk_old_get_request(q, rw, gfp_mask);
 }
diff --git a/block/blk-flush.c b/block/blk-flush.c
index 3e4cc9c7890a..331e627301ea 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -286,7 +286,7 @@ static void mq_flush_work(struct work_struct *work)
 
 	/* We don't need set REQ_FLUSH_SEQ, it's for consistency */
 	rq = blk_mq_alloc_request(q, WRITE_FLUSH|REQ_FLUSH_SEQ,
-		__GFP_WAIT|GFP_ATOMIC);
+		__GFP_WAIT|GFP_ATOMIC, true);
 	rq->cmd_type = REQ_TYPE_FS;
 	rq->end_io = flush_end_io;
 
diff --git a/block/blk-mq.c b/block/blk-mq.c
index ac804c635040..2dc8de86d0d2 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -210,14 +210,15 @@ static struct request *blk_mq_alloc_request_pinned(struct request_queue *q,
 	return rq;
 }
 
-struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp)
+struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
+		gfp_t gfp, bool reserved)
 {
 	struct request *rq;
 
 	if (blk_mq_queue_enter(q))
 		return NULL;
 
-	rq = blk_mq_alloc_request_pinned(q, rw, gfp, false);
+	rq = blk_mq_alloc_request_pinned(q, rw, gfp, reserved);
 	blk_mq_put_ctx(rq->mq_ctx);
 	return rq;
 }
@@ -1327,6 +1328,15 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_reg *reg,
 		reg->queue_depth = BLK_MQ_MAX_DEPTH;
 	}
 
+	/*
+	 * Set aside a tag for flush requests.  It will only be used while
+	 * another flush request is in progress but outside the driver.
+	 *
+	 * TODO: only allocate if flushes are supported
+	 */
+	reg->queue_depth++;
+	reg->reserved_tags++;
+
 	if (reg->queue_depth < (reg->reserved_tags + BLK_MQ_TAG_MIN))
 		return ERR_PTR(-EINVAL);
 
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 3368b97bee73..ab0e9b2025b3 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -124,7 +124,7 @@ void blk_mq_insert_request(struct request_queue *, struct request *, bool);
 void blk_mq_run_queues(struct request_queue *q, bool async);
 void blk_mq_free_request(struct request *rq);
 bool blk_mq_can_queue(struct blk_mq_hw_ctx *);
-struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp);
+struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp, bool reserved);
 struct request *blk_mq_alloc_reserved_request(struct request_queue *q, int rw, gfp_t gfp);
 struct request *blk_mq_rq_from_tag(struct request_queue *q, unsigned int tag);
 
-- 
cgit v1.2.3


From a3f73c27afff9590a4432879b7145289cb89cf0a Mon Sep 17 00:00:00 2001
From: Weston Andros Adamson <dros@netapp.com>
Date: Fri, 18 Oct 2013 15:15:16 -0400
Subject: NFS: separate passed security flavs from selected

When filling parsed_mount_data, store the parsed sec= mount option in
the new struct nfs_auth_info and the chosen flavor in selected_flavor.

This patch lays the groundwork for supporting multiple sec= options.

Signed-off-by: Weston Andros Adamson <dros@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/client.c         |  3 ++-
 fs/nfs/internal.h       |  4 ++--
 fs/nfs/nfs4client.c     | 16 +++++++++-------
 fs/nfs/super.c          | 47 ++++++++++++++++++++++-------------------------
 include/linux/nfs_xdr.h |  6 ++++++
 5 files changed, 41 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 692fd0e9362f..f5a7f7f9cd59 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -787,7 +787,8 @@ static int nfs_init_server(struct nfs_server *server,
 
 	server->port = data->nfs_server.port;
 
-	error = nfs_init_server_rpcclient(server, &timeparms, data->auth_flavors[0]);
+	error = nfs_init_server_rpcclient(server, &timeparms,
+					  data->selected_flavor);
 	if (error < 0)
 		goto error;
 
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index e5a6bd12562a..c8cd044f0982 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -88,8 +88,8 @@ struct nfs_parsed_mount_data {
 	unsigned int		namlen;
 	unsigned int		options;
 	unsigned int		bsize;
-	unsigned int		auth_flavor_len;
-	rpc_authflavor_t	auth_flavors[1];
+	struct nfs_auth_info	auth_info;
+	rpc_authflavor_t	selected_flavor;
 	char			*client_address;
 	unsigned int		version;
 	unsigned int		minorversion;
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 0cde95e9bd6a..d65090e5d00f 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -949,9 +949,8 @@ out:
  * Create a version 4 volume record
  */
 static int nfs4_init_server(struct nfs_server *server,
-		const struct nfs_parsed_mount_data *data)
+		struct nfs_parsed_mount_data *data)
 {
-	rpc_authflavor_t pseudoflavor = RPC_AUTH_UNIX;
 	struct rpc_timeout timeparms;
 	int error;
 
@@ -964,8 +963,10 @@ static int nfs4_init_server(struct nfs_server *server,
 	server->flags = data->flags;
 	server->options = data->options;
 
-	if (data->auth_flavor_len >= 1)
-		pseudoflavor = data->auth_flavors[0];
+	if (data->auth_info.flavor_len >= 1)
+		data->selected_flavor = data->auth_info.flavors[0];
+	else
+		data->selected_flavor = RPC_AUTH_UNIX;
 
 	/* Get a client record */
 	error = nfs4_set_client(server,
@@ -973,7 +974,7 @@ static int nfs4_init_server(struct nfs_server *server,
 			(const struct sockaddr *)&data->nfs_server.address,
 			data->nfs_server.addrlen,
 			data->client_address,
-			pseudoflavor,
+			data->selected_flavor,
 			data->nfs_server.protocol,
 			&timeparms,
 			data->minorversion,
@@ -993,7 +994,8 @@ static int nfs4_init_server(struct nfs_server *server,
 
 	server->port = data->nfs_server.port;
 
-	error = nfs_init_server_rpcclient(server, &timeparms, pseudoflavor);
+	error = nfs_init_server_rpcclient(server, &timeparms,
+					  data->selected_flavor);
 
 error:
 	/* Done */
@@ -1020,7 +1022,7 @@ struct nfs_server *nfs4_create_server(struct nfs_mount_info *mount_info,
 	if (!server)
 		return ERR_PTR(-ENOMEM);
 
-	auth_probe = mount_info->parsed->auth_flavor_len < 1;
+	auth_probe = mount_info->parsed->auth_info.flavor_len < 1;
 
 	/* set up the general RPC client */
 	error = nfs4_init_server(server, mount_info->parsed);
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index e26647be69ce..b87744fb9dcd 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -923,8 +923,7 @@ static struct nfs_parsed_mount_data *nfs_alloc_parsed_mount_data(void)
 		data->mount_server.port	= NFS_UNSPEC_PORT;
 		data->nfs_server.port	= NFS_UNSPEC_PORT;
 		data->nfs_server.protocol = XPRT_TRANSPORT_TCP;
-		data->auth_flavors[0]	= RPC_AUTH_MAXFLAVOR;
-		data->auth_flavor_len	= 0;
+		data->selected_flavor	= RPC_AUTH_MAXFLAVOR;
 		data->minorversion	= 0;
 		data->need_mount	= true;
 		data->net		= current->nsproxy->net_ns;
@@ -1019,13 +1018,6 @@ static void nfs_set_mount_transport_protocol(struct nfs_parsed_mount_data *mnt)
 	}
 }
 
-static void nfs_set_auth_parsed_mount_data(struct nfs_parsed_mount_data *data,
-		rpc_authflavor_t pseudoflavor)
-{
-	data->auth_flavors[0] = pseudoflavor;
-	data->auth_flavor_len = 1;
-}
-
 /*
  * Parse the value of the 'sec=' option.
  */
@@ -1076,7 +1068,8 @@ static int nfs_parse_security_flavors(char *value,
 	}
 
 	mnt->flags |= NFS_MOUNT_SECFLAVOUR;
-	nfs_set_auth_parsed_mount_data(mnt, pseudoflavor);
+	mnt->auth_info.flavors[0] = pseudoflavor;
+	mnt->auth_info.flavor_len = 1;
 	return 1;
 }
 
@@ -1623,7 +1616,7 @@ out_security_failure:
 }
 
 /*
- * Ensure that the specified authtype in args->auth_flavors[0] is supported by
+ * Ensure that the specified authtype in args->auth_info is supported by
  * the server. Returns 0 if it's ok, and -EACCES if not.
  */
 static int nfs_verify_authflavor(struct nfs_parsed_mount_data *args,
@@ -1640,17 +1633,18 @@ static int nfs_verify_authflavor(struct nfs_parsed_mount_data *args,
 	 * can be used.
 	 */
 	for (i = 0; i < count; i++) {
-		if (args->auth_flavors[0] == server_authlist[i] ||
+		if (args->auth_info.flavors[0] == server_authlist[i] ||
 		    server_authlist[i] == RPC_AUTH_NULL)
 			goto out;
 	}
 
 	dfprintk(MOUNT, "NFS: auth flavor %u not supported by server\n",
-		args->auth_flavors[0]);
+		args->auth_info.flavors[0]);
 	return -EACCES;
 
 out:
-	dfprintk(MOUNT, "NFS: using auth flavor %u\n", args->auth_flavors[0]);
+	args->selected_flavor = args->auth_info.flavors[0];
+	dfprintk(MOUNT, "NFS: using auth flavor %u\n", args->selected_flavor);
 	return 0;
 }
 
@@ -1738,9 +1732,10 @@ static struct nfs_server *nfs_try_mount_request(struct nfs_mount_info *mount_inf
 	 * Was a sec= authflavor specified in the options? First, verify
 	 * whether the server supports it, and then just try to use it if so.
 	 */
-	if (args->auth_flavor_len > 0) {
+	if (args->auth_info.flavor_len > 0) {
 		status = nfs_verify_authflavor(args, authlist, authlist_len);
-		dfprintk(MOUNT, "NFS: using auth flavor %u\n", args->auth_flavors[0]);
+		dfprintk(MOUNT, "NFS: using auth flavor %u\n",
+			 args->selected_flavor);
 		if (status)
 			return ERR_PTR(status);
 		return nfs_mod->rpc_ops->create_server(mount_info, nfs_mod);
@@ -1769,7 +1764,7 @@ static struct nfs_server *nfs_try_mount_request(struct nfs_mount_info *mount_inf
 			/* Fallthrough */
 		}
 		dfprintk(MOUNT, "NFS: attempting to use auth flavor %u\n", flavor);
-		nfs_set_auth_parsed_mount_data(args, flavor);
+		args->selected_flavor = flavor;
 		server = nfs_mod->rpc_ops->create_server(mount_info, nfs_mod);
 		if (!IS_ERR(server))
 			return server;
@@ -1785,7 +1780,7 @@ static struct nfs_server *nfs_try_mount_request(struct nfs_mount_info *mount_inf
 
 	/* Last chance! Try AUTH_UNIX */
 	dfprintk(MOUNT, "NFS: attempting to use auth flavor %u\n", RPC_AUTH_UNIX);
-	nfs_set_auth_parsed_mount_data(args, RPC_AUTH_UNIX);
+	args->selected_flavor = RPC_AUTH_UNIX;
 	return nfs_mod->rpc_ops->create_server(mount_info, nfs_mod);
 }
 
@@ -1972,9 +1967,9 @@ static int nfs23_validate_mount_data(void *options,
 		args->bsize		= data->bsize;
 
 		if (data->flags & NFS_MOUNT_SECFLAVOUR)
-			nfs_set_auth_parsed_mount_data(args, data->pseudoflavor);
+			args->selected_flavor = data->pseudoflavor;
 		else
-			nfs_set_auth_parsed_mount_data(args, RPC_AUTH_UNIX);
+			args->selected_flavor = RPC_AUTH_UNIX;
 		if (!args->nfs_server.hostname)
 			goto out_nomem;
 
@@ -2108,7 +2103,7 @@ static int nfs_validate_text_mount_data(void *options,
 
 	nfs_set_port(sap, &args->nfs_server.port, port);
 
-	if (args->auth_flavor_len > 1)
+	if (args->auth_info.flavor_len > 1)
 		goto out_bad_auth;
 
 	return nfs_parse_devname(dev_name,
@@ -2146,7 +2141,7 @@ nfs_compare_remount_data(struct nfs_server *nfss,
 	    data->version != nfss->nfs_client->rpc_ops->version ||
 	    data->minorversion != nfss->nfs_client->cl_minorversion ||
 	    data->retrans != nfss->client->cl_timeout->to_retries ||
-	    data->auth_flavors[0] != nfss->client->cl_auth->au_flavor ||
+	    data->selected_flavor != nfss->client->cl_auth->au_flavor ||
 	    data->acregmin != nfss->acregmin / HZ ||
 	    data->acregmax != nfss->acregmax / HZ ||
 	    data->acdirmin != nfss->acdirmin / HZ ||
@@ -2191,7 +2186,9 @@ nfs_remount(struct super_block *sb, int *flags, char *raw_data)
 	data->rsize = nfss->rsize;
 	data->wsize = nfss->wsize;
 	data->retrans = nfss->client->cl_timeout->to_retries;
-	nfs_set_auth_parsed_mount_data(data, nfss->client->cl_auth->au_flavor);
+	data->selected_flavor = nfss->client->cl_auth->au_flavor;
+	data->auth_info.flavors[0] = nfss->client->cl_auth->au_flavor;
+	data->auth_info.flavor_len = 1;
 	data->acregmin = nfss->acregmin / HZ;
 	data->acregmax = nfss->acregmax / HZ;
 	data->acdirmin = nfss->acdirmin / HZ;
@@ -2718,9 +2715,9 @@ static int nfs4_validate_mount_data(void *options,
 					   data->auth_flavours,
 					   sizeof(pseudoflavor)))
 				return -EFAULT;
-			nfs_set_auth_parsed_mount_data(args, pseudoflavor);
+			args->selected_flavor = pseudoflavor;
 		} else
-			nfs_set_auth_parsed_mount_data(args, RPC_AUTH_UNIX);
+			args->selected_flavor = RPC_AUTH_UNIX;
 
 		c = strndup_user(data->hostname.data, NFS4_MAXNAMLEN);
 		if (IS_ERR(c))
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 8fe5b940c5f2..658104acf13b 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -591,6 +591,12 @@ struct nfs_renameres {
 	struct nfs_fattr		*new_fattr;
 };
 
+/* parsed sec= options */
+struct nfs_auth_info {
+	unsigned int            flavor_len;
+	rpc_authflavor_t        flavors[1];
+};
+
 /*
  * Argument struct for decode_entry function
  */
-- 
cgit v1.2.3


From 0f5f49b8b3593309fd3c3a2080a5fd465afdbe16 Mon Sep 17 00:00:00 2001
From: Weston Andros Adamson <dros@netapp.com>
Date: Fri, 18 Oct 2013 15:15:17 -0400
Subject: NFS: cache parsed auth_info in nfs_server

Cache the auth_info structure in nfs_server and pass these values to submounts.

This lays the groundwork for supporting multiple sec= options.

Signed-off-by: Weston Andros Adamson <dros@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/client.c           | 2 ++
 fs/nfs/nfs4client.c       | 1 +
 fs/nfs/super.c            | 3 +--
 include/linux/nfs_fs_sb.h | 1 +
 4 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index f5a7f7f9cd59..1d09289c8f0e 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -786,6 +786,7 @@ static int nfs_init_server(struct nfs_server *server,
 		goto error;
 
 	server->port = data->nfs_server.port;
+	server->auth_info = data->auth_info;
 
 	error = nfs_init_server_rpcclient(server, &timeparms,
 					  data->selected_flavor);
@@ -929,6 +930,7 @@ void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_server *sour
 	target->acdirmax = source->acdirmax;
 	target->caps = source->caps;
 	target->options = source->options;
+	target->auth_info = source->auth_info;
 }
 EXPORT_SYMBOL_GPL(nfs_server_copy_userdata);
 
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index d65090e5d00f..04131c837f27 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -962,6 +962,7 @@ static int nfs4_init_server(struct nfs_server *server,
 	/* Initialise the client representation from the mount data */
 	server->flags = data->flags;
 	server->options = data->options;
+	server->auth_info = data->auth_info;
 
 	if (data->auth_info.flavor_len >= 1)
 		data->selected_flavor = data->auth_info.flavors[0];
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index b87744fb9dcd..de1e5e89c93d 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -2187,8 +2187,7 @@ nfs_remount(struct super_block *sb, int *flags, char *raw_data)
 	data->wsize = nfss->wsize;
 	data->retrans = nfss->client->cl_timeout->to_retries;
 	data->selected_flavor = nfss->client->cl_auth->au_flavor;
-	data->auth_info.flavors[0] = nfss->client->cl_auth->au_flavor;
-	data->auth_info.flavor_len = 1;
+	data->auth_info = nfss->auth_info;
 	data->acregmin = nfss->acregmin / HZ;
 	data->acregmax = nfss->acregmax / HZ;
 	data->acdirmin = nfss->acdirmin / HZ;
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 186ec4969313..1150ea41b626 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -151,6 +151,7 @@ struct nfs_server {
 	unsigned long		mount_time;	/* when this fs was mounted */
 	struct super_block	*super;		/* VFS super block */
 	dev_t			s_dev;		/* superblock dev numbers */
+	struct nfs_auth_info	auth_info;	/* parsed auth flavors */
 
 #ifdef CONFIG_NFS_FSCACHE
 	struct nfs_fscache_key	*fscache_key;	/* unique key for superblock */
-- 
cgit v1.2.3


From 4d4b69dd847a098cdca341c45326f6c6f61b8691 Mon Sep 17 00:00:00 2001
From: Weston Andros Adamson <dros@netapp.com>
Date: Fri, 18 Oct 2013 15:15:19 -0400
Subject: NFS: add support for multiple sec= mount options

This patch adds support for multiple security options which can be
specified using a colon-delimited list of security flavors (the same
syntax as nfsd's exports file).

This is useful, for instance, when NFSv4.x mounts cross SECINFO
boundaries. With this patch a user can use "sec=krb5i,krb5p"
to mount a remote filesystem using krb5i, but can still cross
into krb5p-only exports.

New mounts will try all security options before failing.  NFSv4.x
SECINFO results will be compared against the sec= flavors to
find the first flavor in both lists or if no match is found will
return -EPERM.

Signed-off-by: Weston Andros Adamson <dros@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/internal.h       |   1 +
 fs/nfs/nfs4client.c     |   3 +
 fs/nfs/nfs4namespace.c  |  17 +++--
 fs/nfs/nfs4proc.c       |  32 ++++++----
 fs/nfs/super.c          | 160 +++++++++++++++++++++++++++++++-----------------
 include/linux/nfs_xdr.h |   3 +-
 6 files changed, 145 insertions(+), 71 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index c8cd044f0982..bca6a3e3c49c 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -326,6 +326,7 @@ extern struct file_system_type nfs_xdev_fs_type;
 extern struct file_system_type nfs4_xdev_fs_type;
 extern struct file_system_type nfs4_referral_fs_type;
 #endif
+bool nfs_auth_info_match(const struct nfs_auth_info *, rpc_authflavor_t);
 struct dentry *nfs_try_mount(int, const char *, struct nfs_mount_info *,
 			struct nfs_subversion *);
 void nfs_initialise_sb(struct super_block *);
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index f6cc77c7d802..b4a160a405ce 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -964,6 +964,9 @@ static int nfs4_init_server(struct nfs_server *server,
 	server->options = data->options;
 	server->auth_info = data->auth_info;
 
+	/* Use the first specified auth flavor. If this flavor isn't
+	 * allowed by the server, use the SECINFO path to try the
+	 * other specified flavors */
 	if (data->auth_info.flavor_len >= 1)
 		data->selected_flavor = data->auth_info.flavors[0];
 	else
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c
index b947054f0ca1..c08cbf40c59e 100644
--- a/fs/nfs/nfs4namespace.c
+++ b/fs/nfs/nfs4namespace.c
@@ -137,6 +137,7 @@ static size_t nfs_parse_server_name(char *string, size_t len,
 
 /**
  * nfs_find_best_sec - Find a security mechanism supported locally
+ * @server: NFS server struct
  * @flavors: List of security tuples returned by SECINFO procedure
  *
  * Return the pseudoflavor of the first security mechanism in
@@ -145,7 +146,8 @@ static size_t nfs_parse_server_name(char *string, size_t len,
  * is searched in the order returned from the server, per RFC 3530
  * recommendation.
  */
-static rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors)
+static rpc_authflavor_t nfs_find_best_sec(struct nfs_server *server,
+					  struct nfs4_secinfo_flavors *flavors)
 {
 	rpc_authflavor_t pseudoflavor;
 	struct nfs4_secinfo4 *secinfo;
@@ -160,12 +162,19 @@ static rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors)
 		case RPC_AUTH_GSS:
 			pseudoflavor = rpcauth_get_pseudoflavor(secinfo->flavor,
 							&secinfo->flavor_info);
-			if (pseudoflavor != RPC_AUTH_MAXFLAVOR)
+			/* make sure pseudoflavor matches sec= mount opt */
+			if (pseudoflavor != RPC_AUTH_MAXFLAVOR &&
+			    nfs_auth_info_match(&server->auth_info,
+						pseudoflavor))
 				return pseudoflavor;
 			break;
 		}
 	}
 
+	/* if there were any sec= options then nothing matched */
+	if (server->auth_info.flavor_len > 0)
+		return -EPERM;
+
 	return RPC_AUTH_UNIX;
 }
 
@@ -187,7 +196,7 @@ static rpc_authflavor_t nfs4_negotiate_security(struct inode *inode, struct qstr
 		goto out;
 	}
 
-	flavor = nfs_find_best_sec(flavors);
+	flavor = nfs_find_best_sec(NFS_SERVER(inode), flavors);
 
 out:
 	put_page(page);
@@ -390,7 +399,7 @@ struct vfsmount *nfs4_submount(struct nfs_server *server, struct dentry *dentry,
 
 	if (client->cl_auth->au_flavor != flavor)
 		flavor = client->cl_auth->au_flavor;
-	else if (server->auth_info.flavor_len == 0) {
+	else {
 		rpc_authflavor_t new = nfs4_negotiate_security(dir, name);
 		if ((int)new >= 0)
 			flavor = new;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 0eb8dc5792da..b02c4cc7b0a9 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2873,11 +2873,24 @@ static int nfs4_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle,
 	int status = -EPERM;
 	size_t i;
 
-	for (i = 0; i < ARRAY_SIZE(flav_array); i++) {
-		status = nfs4_lookup_root_sec(server, fhandle, info, flav_array[i]);
-		if (status == -NFS4ERR_WRONGSEC || status == -EACCES)
-			continue;
-		break;
+	if (server->auth_info.flavor_len > 0) {
+		/* try each flavor specified by user */
+		for (i = 0; i < server->auth_info.flavor_len; i++) {
+			status = nfs4_lookup_root_sec(server, fhandle, info,
+						server->auth_info.flavors[i]);
+			if (status == -NFS4ERR_WRONGSEC || status == -EACCES)
+				continue;
+			break;
+		}
+	} else {
+		/* no flavors specified by user, try default list */
+		for (i = 0; i < ARRAY_SIZE(flav_array); i++) {
+			status = nfs4_lookup_root_sec(server, fhandle, info,
+						      flav_array[i]);
+			if (status == -NFS4ERR_WRONGSEC || status == -EACCES)
+				continue;
+			break;
+		}
 	}
 
 	/*
@@ -2919,9 +2932,6 @@ int nfs4_proc_get_rootfh(struct nfs_server *server, struct nfs_fh *fhandle,
 		status = nfs4_lookup_root(server, fhandle, info);
 		if (status != -NFS4ERR_WRONGSEC)
 			break;
-		/* Did user force a 'sec=' mount option? */
-		if (server->auth_info.flavor_len > 0)
-			break;
 	default:
 		status = nfs4_do_find_root_sec(server, fhandle, info);
 	}
@@ -3179,9 +3189,6 @@ static int nfs4_proc_lookup_common(struct rpc_clnt **clnt, struct inode *dir,
 			err = -EPERM;
 			if (client != *clnt)
 				goto out;
-			/* No security negotiation if the user specified 'sec=' */
-			if (NFS_SERVER(dir)->auth_info.flavor_len > 0)
-				goto out;
 			client = nfs4_create_sec_client(client, dir, name);
 			if (IS_ERR(client))
 				return PTR_ERR(client);
@@ -7942,6 +7949,9 @@ nfs41_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle,
 			break;
 		}
 
+		if (!nfs_auth_info_match(&server->auth_info, flavor))
+			flavor = RPC_AUTH_MAXFLAVOR;
+
 		if (flavor != RPC_AUTH_MAXFLAVOR) {
 			err = nfs4_lookup_root_sec(server, fhandle,
 						   info, flavor);
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 3a4f8bf5e5a5..317d6fc2160e 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -497,7 +497,8 @@ static const char *nfs_pseudoflavour_to_name(rpc_authflavor_t flavour)
 	static const struct {
 		rpc_authflavor_t flavour;
 		const char *str;
-	} sec_flavours[] = {
+	} sec_flavours[NFS_AUTH_INFO_MAX_FLAVORS] = {
+		/* update NFS_AUTH_INFO_MAX_FLAVORS when this list changes! */
 		{ RPC_AUTH_NULL, "null" },
 		{ RPC_AUTH_UNIX, "sys" },
 		{ RPC_AUTH_GSS_KRB5, "krb5" },
@@ -1018,6 +1019,52 @@ static void nfs_set_mount_transport_protocol(struct nfs_parsed_mount_data *mnt)
 	}
 }
 
+/*
+ * Add 'flavor' to 'auth_info' if not already present.
+ * Returns true if 'flavor' ends up in the list, false otherwise
+ */
+static bool nfs_auth_info_add(struct nfs_auth_info *auth_info,
+			      rpc_authflavor_t flavor)
+{
+	unsigned int i;
+	unsigned int max_flavor_len = (sizeof(auth_info->flavors) /
+				       sizeof(auth_info->flavors[0]));
+
+	/* make sure this flavor isn't already in the list */
+	for (i = 0; i < auth_info->flavor_len; i++) {
+		if (flavor == auth_info->flavors[i])
+			return true;
+	}
+
+	if (auth_info->flavor_len + 1 >= max_flavor_len) {
+		dfprintk(MOUNT, "NFS: too many sec= flavors\n");
+		return false;
+	}
+
+	auth_info->flavors[auth_info->flavor_len++] = flavor;
+	return true;
+}
+
+/*
+ * Return true if 'match' is in auth_info or auth_info is empty.
+ * Return false otherwise.
+ */
+bool nfs_auth_info_match(const struct nfs_auth_info *auth_info,
+			 rpc_authflavor_t match)
+{
+	int i;
+
+	if (!auth_info->flavor_len)
+		return true;
+
+	for (i = 0; i < auth_info->flavor_len; i++) {
+		if (auth_info->flavors[i] == match)
+			return true;
+	}
+	return false;
+}
+EXPORT_SYMBOL_GPL(nfs_auth_info_match);
+
 /*
  * Parse the value of the 'sec=' option.
  */
@@ -1026,49 +1073,55 @@ static int nfs_parse_security_flavors(char *value,
 {
 	substring_t args[MAX_OPT_ARGS];
 	rpc_authflavor_t pseudoflavor;
+	char *p;
 
 	dfprintk(MOUNT, "NFS: parsing sec=%s option\n", value);
 
-	switch (match_token(value, nfs_secflavor_tokens, args)) {
-	case Opt_sec_none:
-		pseudoflavor = RPC_AUTH_NULL;
-		break;
-	case Opt_sec_sys:
-		pseudoflavor = RPC_AUTH_UNIX;
-		break;
-	case Opt_sec_krb5:
-		pseudoflavor = RPC_AUTH_GSS_KRB5;
-		break;
-	case Opt_sec_krb5i:
-		pseudoflavor = RPC_AUTH_GSS_KRB5I;
-		break;
-	case Opt_sec_krb5p:
-		pseudoflavor = RPC_AUTH_GSS_KRB5P;
-		break;
-	case Opt_sec_lkey:
-		pseudoflavor = RPC_AUTH_GSS_LKEY;
-		break;
-	case Opt_sec_lkeyi:
-		pseudoflavor = RPC_AUTH_GSS_LKEYI;
-		break;
-	case Opt_sec_lkeyp:
-		pseudoflavor = RPC_AUTH_GSS_LKEYP;
-		break;
-	case Opt_sec_spkm:
-		pseudoflavor = RPC_AUTH_GSS_SPKM;
-		break;
-	case Opt_sec_spkmi:
-		pseudoflavor = RPC_AUTH_GSS_SPKMI;
-		break;
-	case Opt_sec_spkmp:
-		pseudoflavor = RPC_AUTH_GSS_SPKMP;
-		break;
-	default:
-		return 0;
+	while ((p = strsep(&value, ":")) != NULL) {
+		switch (match_token(p, nfs_secflavor_tokens, args)) {
+		case Opt_sec_none:
+			pseudoflavor = RPC_AUTH_NULL;
+			break;
+		case Opt_sec_sys:
+			pseudoflavor = RPC_AUTH_UNIX;
+			break;
+		case Opt_sec_krb5:
+			pseudoflavor = RPC_AUTH_GSS_KRB5;
+			break;
+		case Opt_sec_krb5i:
+			pseudoflavor = RPC_AUTH_GSS_KRB5I;
+			break;
+		case Opt_sec_krb5p:
+			pseudoflavor = RPC_AUTH_GSS_KRB5P;
+			break;
+		case Opt_sec_lkey:
+			pseudoflavor = RPC_AUTH_GSS_LKEY;
+			break;
+		case Opt_sec_lkeyi:
+			pseudoflavor = RPC_AUTH_GSS_LKEYI;
+			break;
+		case Opt_sec_lkeyp:
+			pseudoflavor = RPC_AUTH_GSS_LKEYP;
+			break;
+		case Opt_sec_spkm:
+			pseudoflavor = RPC_AUTH_GSS_SPKM;
+			break;
+		case Opt_sec_spkmi:
+			pseudoflavor = RPC_AUTH_GSS_SPKMI;
+			break;
+		case Opt_sec_spkmp:
+			pseudoflavor = RPC_AUTH_GSS_SPKMP;
+			break;
+		default:
+			dfprintk(MOUNT,
+				 "NFS: sec= option '%s' not recognized\n", p);
+			return 0;
+		}
+
+		if (!nfs_auth_info_add(&mnt->auth_info, pseudoflavor))
+			return 0;
 	}
 
-	mnt->auth_info.flavors[0] = pseudoflavor;
-	mnt->auth_info.flavor_len = 1;
 	return 1;
 }
 
@@ -1615,12 +1668,14 @@ out_security_failure:
 }
 
 /*
- * Ensure that the specified authtype in args->auth_info is supported by
- * the server. Returns 0 if it's ok, and -EACCES if not.
+ * Ensure that a specified authtype in args->auth_info is supported by
+ * the server. Returns 0 and sets args->selected_flavor if it's ok, and
+ * -EACCES if not.
  */
-static int nfs_verify_authflavor(struct nfs_parsed_mount_data *args,
+static int nfs_verify_authflavors(struct nfs_parsed_mount_data *args,
 			rpc_authflavor_t *server_authlist, unsigned int count)
 {
+	rpc_authflavor_t flavor = RPC_AUTH_MAXFLAVOR;
 	unsigned int i;
 
 	/*
@@ -1632,17 +1687,19 @@ static int nfs_verify_authflavor(struct nfs_parsed_mount_data *args,
 	 * can be used.
 	 */
 	for (i = 0; i < count; i++) {
-		if (args->auth_info.flavors[0] == server_authlist[i] ||
-		    server_authlist[i] == RPC_AUTH_NULL)
+		flavor = server_authlist[i];
+
+		if (nfs_auth_info_match(&args->auth_info, flavor) ||
+		    flavor == RPC_AUTH_NULL)
 			goto out;
 	}
 
-	dfprintk(MOUNT, "NFS: auth flavor %u not supported by server\n",
-		args->auth_info.flavors[0]);
+	dfprintk(MOUNT,
+		 "NFS: specified auth flavors not supported by server\n");
 	return -EACCES;
 
 out:
-	args->selected_flavor = args->auth_info.flavors[0];
+	args->selected_flavor = flavor;
 	dfprintk(MOUNT, "NFS: using auth flavor %u\n", args->selected_flavor);
 	return 0;
 }
@@ -1732,7 +1789,7 @@ static struct nfs_server *nfs_try_mount_request(struct nfs_mount_info *mount_inf
 	 * whether the server supports it, and then just try to use it if so.
 	 */
 	if (args->auth_info.flavor_len > 0) {
-		status = nfs_verify_authflavor(args, authlist, authlist_len);
+		status = nfs_verify_authflavors(args, authlist, authlist_len);
 		dfprintk(MOUNT, "NFS: using auth flavor %u\n",
 			 args->selected_flavor);
 		if (status)
@@ -2102,9 +2159,6 @@ static int nfs_validate_text_mount_data(void *options,
 
 	nfs_set_port(sap, &args->nfs_server.port, port);
 
-	if (args->auth_info.flavor_len > 1)
-		goto out_bad_auth;
-
 	return nfs_parse_devname(dev_name,
 				   &args->nfs_server.hostname,
 				   max_namelen,
@@ -2124,10 +2178,6 @@ out_invalid_transport_udp:
 out_no_address:
 	dfprintk(MOUNT, "NFS: mount program didn't pass remote address\n");
 	return -EINVAL;
-
-out_bad_auth:
-	dfprintk(MOUNT, "NFS: Too many RPC auth flavours specified\n");
-	return -EINVAL;
 }
 
 static int
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 658104acf13b..3ccfcecf8999 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -592,9 +592,10 @@ struct nfs_renameres {
 };
 
 /* parsed sec= options */
+#define NFS_AUTH_INFO_MAX_FLAVORS 12 /* see fs/nfs/super.c */
 struct nfs_auth_info {
 	unsigned int            flavor_len;
-	rpc_authflavor_t        flavors[1];
+	rpc_authflavor_t        flavors[NFS_AUTH_INFO_MAX_FLAVORS];
 };
 
 /*
-- 
cgit v1.2.3


From a01779f89fc8a2225cb82dca0fc7b8451851cb7b Mon Sep 17 00:00:00 2001
From: Josh Cartwright <joshc@codeaurora.org>
Date: Mon, 28 Oct 2013 13:12:35 -0500
Subject: regmap: add SPMI support

Add basic support for the System Power Management Interface (SPMI) bus.
This is a simple implementation which only implements register accesses
via the Extended Register Read/Write Long commands.

Signed-off-by: Josh Cartwright <joshc@codeaurora.org>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 drivers/base/regmap/Kconfig       |  5 ++-
 drivers/base/regmap/Makefile      |  1 +
 drivers/base/regmap/regmap-spmi.c | 90 +++++++++++++++++++++++++++++++++++++++
 include/linux/regmap.h            |  5 +++
 4 files changed, 100 insertions(+), 1 deletion(-)
 create mode 100644 drivers/base/regmap/regmap-spmi.c

(limited to 'include/linux')

diff --git a/drivers/base/regmap/Kconfig b/drivers/base/regmap/Kconfig
index f0d30543fcce..4251570610c9 100644
--- a/drivers/base/regmap/Kconfig
+++ b/drivers/base/regmap/Kconfig
@@ -3,7 +3,7 @@
 # subsystems should select the appropriate symbols.
 
 config REGMAP
-	default y if (REGMAP_I2C || REGMAP_SPI || REGMAP_MMIO || REGMAP_IRQ)
+	default y if (REGMAP_I2C || REGMAP_SPI || REGMAP_SPMI || REGMAP_MMIO || REGMAP_IRQ)
 	select LZO_COMPRESS
 	select LZO_DECOMPRESS
 	select IRQ_DOMAIN if REGMAP_IRQ
@@ -15,6 +15,9 @@ config REGMAP_I2C
 config REGMAP_SPI
 	tristate
 
+config REGMAP_SPMI
+	tristate
+
 config REGMAP_MMIO
 	tristate
 
diff --git a/drivers/base/regmap/Makefile b/drivers/base/regmap/Makefile
index cf129980abd0..a7c670b4123a 100644
--- a/drivers/base/regmap/Makefile
+++ b/drivers/base/regmap/Makefile
@@ -3,5 +3,6 @@ obj-$(CONFIG_REGMAP) += regcache-rbtree.o regcache-lzo.o regcache-flat.o
 obj-$(CONFIG_DEBUG_FS) += regmap-debugfs.o
 obj-$(CONFIG_REGMAP_I2C) += regmap-i2c.o
 obj-$(CONFIG_REGMAP_SPI) += regmap-spi.o
+obj-$(CONFIG_REGMAP_SPMI) += regmap-spmi.o
 obj-$(CONFIG_REGMAP_MMIO) += regmap-mmio.o
 obj-$(CONFIG_REGMAP_IRQ) += regmap-irq.o
diff --git a/drivers/base/regmap/regmap-spmi.c b/drivers/base/regmap/regmap-spmi.c
new file mode 100644
index 000000000000..ac2391013db1
--- /dev/null
+++ b/drivers/base/regmap/regmap-spmi.c
@@ -0,0 +1,90 @@
+/*
+ * Register map access API - SPMI support
+ *
+ * Copyright (c) 2012-2013, The Linux Foundation. All rights reserved.
+ *
+ * Based on regmap-i2c.c:
+ * Copyright 2011 Wolfson Microelectronics plc
+ * Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#include <linux/regmap.h>
+#include <linux/spmi.h>
+#include <linux/module.h>
+#include <linux/init.h>
+
+static int regmap_spmi_read(void *context,
+			    const void *reg, size_t reg_size,
+			    void *val, size_t val_size)
+{
+	BUG_ON(reg_size != 2);
+	return spmi_ext_register_readl(context, *(u16 *)reg,
+				       val, val_size);
+}
+
+static int regmap_spmi_gather_write(void *context,
+				    const void *reg, size_t reg_size,
+				    const void *val, size_t val_size)
+{
+	BUG_ON(reg_size != 2);
+	return spmi_ext_register_writel(context, *(u16 *)reg, val, val_size);
+}
+
+static int regmap_spmi_write(void *context, const void *data,
+			     size_t count)
+{
+	BUG_ON(count < 2);
+	return regmap_spmi_gather_write(context, data, 2, data + 2, count - 2);
+}
+
+static struct regmap_bus regmap_spmi = {
+	.read				= regmap_spmi_read,
+	.write				= regmap_spmi_write,
+	.gather_write			= regmap_spmi_gather_write,
+	.reg_format_endian_default	= REGMAP_ENDIAN_NATIVE,
+	.val_format_endian_default	= REGMAP_ENDIAN_NATIVE,
+};
+
+/**
+ * regmap_init_spmi(): Initialize register map
+ *
+ * @sdev: Device that will be interacted with
+ * @config: Configuration for register map
+ *
+ * The return value will be an ERR_PTR() on error or a valid pointer to
+ * a struct regmap.
+ */
+struct regmap *regmap_init_spmi(struct spmi_device *sdev,
+				const struct regmap_config *config)
+{
+	return regmap_init(&sdev->dev, &regmap_spmi, sdev, config);
+}
+EXPORT_SYMBOL_GPL(regmap_init_spmi);
+
+/**
+ * devm_regmap_init_spmi(): Initialise managed register map
+ *
+ * @sdev: Device that will be interacted with
+ * @config: Configuration for register map
+ *
+ * The return value will be an ERR_PTR() on error or a valid pointer
+ * to a struct regmap.  The regmap will be automatically freed by the
+ * device management code.
+ */
+struct regmap *devm_regmap_init_spmi(struct spmi_device *sdev,
+				     const struct regmap_config *config)
+{
+	return devm_regmap_init(&sdev->dev, &regmap_spmi, sdev, config);
+}
+EXPORT_SYMBOL_GPL(devm_regmap_init_spmi);
+
+MODULE_LICENSE("GPL");
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index a10380bfbeac..3f5abc86b6b5 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -23,6 +23,7 @@ struct device;
 struct i2c_client;
 struct irq_domain;
 struct spi_device;
+struct spmi_device;
 struct regmap;
 struct regmap_range_cfg;
 struct regmap_field;
@@ -318,6 +319,8 @@ struct regmap *regmap_init_i2c(struct i2c_client *i2c,
 			       const struct regmap_config *config);
 struct regmap *regmap_init_spi(struct spi_device *dev,
 			       const struct regmap_config *config);
+struct regmap *regmap_init_spmi(struct spmi_device *dev,
+			       const struct regmap_config *config);
 struct regmap *regmap_init_mmio_clk(struct device *dev, const char *clk_id,
 				    void __iomem *regs,
 				    const struct regmap_config *config);
@@ -330,6 +333,8 @@ struct regmap *devm_regmap_init_i2c(struct i2c_client *i2c,
 				    const struct regmap_config *config);
 struct regmap *devm_regmap_init_spi(struct spi_device *dev,
 				    const struct regmap_config *config);
+struct regmap *devm_regmap_init_spmi(struct spmi_device *dev,
+				     const struct regmap_config *config);
 struct regmap *devm_regmap_init_mmio_clk(struct device *dev, const char *clk_id,
 					 void __iomem *regs,
 					 const struct regmap_config *config);
-- 
cgit v1.2.3


From e0f244c63fc9d192dfd399cc2677bbdca61994b1 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Wed, 23 Oct 2013 10:57:58 +1030
Subject: asmlinkage, module: Make ksymtab and kcrctab symbols and
 __this_module __visible

Make the ksymtab symbols for EXPORT_SYMBOL visible.
This prevents the LTO compiler from adding a .NUMBER prefix,
which avoids various problems in later export processing.

Cc: rusty@rustcorp.com.au
Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/export.h | 4 ++--
 scripts/mod/modpost.c  | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/export.h b/include/linux/export.h
index 412cd509effe..3f2793d51899 100644
--- a/include/linux/export.h
+++ b/include/linux/export.h
@@ -43,7 +43,7 @@ extern struct module __this_module;
 /* Mark the CRC weak since genksyms apparently decides not to
  * generate a checksums for some symbols */
 #define __CRC_SYMBOL(sym, sec)					\
-	extern void *__crc_##sym __attribute__((weak));		\
+	extern __visible void *__crc_##sym __attribute__((weak));		\
 	static const unsigned long __kcrctab_##sym		\
 	__used							\
 	__attribute__((section("___kcrctab" sec "+" #sym), unused))	\
@@ -59,7 +59,7 @@ extern struct module __this_module;
 	static const char __kstrtab_##sym[]			\
 	__attribute__((section("__ksymtab_strings"), aligned(1))) \
 	= VMLINUX_SYMBOL_STR(sym);				\
-	static const struct kernel_symbol __ksymtab_##sym	\
+	__visible const struct kernel_symbol __ksymtab_##sym	\
 	__used							\
 	__attribute__((section("___ksymtab" sec "+" #sym), unused))	\
 	= { (unsigned long)&sym, __kstrtab_##sym }
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index 393706b37774..9b873ac6ed7b 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -1861,7 +1861,7 @@ static void add_header(struct buffer *b, struct module *mod)
 	buf_printf(b, "\n");
 	buf_printf(b, "MODULE_INFO(vermagic, VERMAGIC_STRING);\n");
 	buf_printf(b, "\n");
-	buf_printf(b, "struct module __this_module\n");
+	buf_printf(b, "__visible struct module __this_module\n");
 	buf_printf(b, "__attribute__((section(\".gnu.linkonce.this_module\"))) = {\n");
 	buf_printf(b, "\t.name = KBUILD_MODNAME,\n");
 	if (mod->has_init)
-- 
cgit v1.2.3


From 5d9efa7ee99eed58388f186c13cf2e2a87e9ceb4 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 28 Oct 2013 20:07:50 -0400
Subject: ipv6: Remove privacy config option.

The code for privacy extentions is very mature, and making it
configurable only gives marginal memory/code savings in exchange
for obfuscation and hard to read code via CPP ifdef'ery.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ipv6.h   |  2 --
 include/net/if_inet6.h |  5 +----
 net/ipv6/Kconfig       | 18 ------------------
 net/ipv6/addrconf.c    | 41 +++--------------------------------------
 4 files changed, 4 insertions(+), 62 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index a80a63cfb70c..5d89d1b808a6 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -21,13 +21,11 @@ struct ipv6_devconf {
 	__s32		force_mld_version;
 	__s32		mldv1_unsolicited_report_interval;
 	__s32		mldv2_unsolicited_report_interval;
-#ifdef CONFIG_IPV6_PRIVACY
 	__s32		use_tempaddr;
 	__s32		temp_valid_lft;
 	__s32		temp_prefered_lft;
 	__s32		regen_max_retry;
 	__s32		max_desync_factor;
-#endif
 	__s32		max_addresses;
 	__s32		accept_ra_defrtr;
 	__s32		accept_ra_pinfo;
diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h
index 02ef7727bb55..76d54270f2e2 100644
--- a/include/net/if_inet6.h
+++ b/include/net/if_inet6.h
@@ -66,11 +66,10 @@ struct inet6_ifaddr {
 	struct hlist_node	addr_lst;
 	struct list_head	if_list;
 
-#ifdef CONFIG_IPV6_PRIVACY
 	struct list_head	tmp_list;
 	struct inet6_ifaddr	*ifpub;
 	int			regen_count;
-#endif
+
 	bool			tokenized;
 
 	struct rcu_head		rcu;
@@ -192,11 +191,9 @@ struct inet6_dev {
 	__u32			if_flags;
 	int			dead;
 
-#ifdef CONFIG_IPV6_PRIVACY
 	u8			rndid[8];
 	struct timer_list	regen_timer;
 	struct list_head	tempaddr_list;
-#endif
 
 	struct in6_addr		token;
 
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index e1a8d903e366..d92e5586783e 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -21,24 +21,6 @@ menuconfig IPV6
 
 if IPV6
 
-config IPV6_PRIVACY
-	bool "IPv6: Privacy Extensions (RFC 3041) support"
-	---help---
-	  Privacy Extensions for Stateless Address Autoconfiguration in IPv6
-	  support.  With this option, additional periodically-altered
-	  pseudo-random global-scope unicast address(es) will be assigned to
-	  your interface(s).
-	
-	  We use our standard pseudo-random algorithm to generate the
-          randomized interface identifier, instead of one described in RFC 3041.
-
-	  By default the kernel does not generate temporary addresses.
-	  To use temporary addresses, do
-	
-	        echo 2 >/proc/sys/net/ipv6/conf/all/use_tempaddr 
-
-	  See <file:Documentation/networking/ip-sysctl.txt> for details.
-
 config IPV6_ROUTER_PREF
 	bool "IPv6: Router Preference (RFC 4191) support"
 	---help---
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index cd3fb301da38..542d09561ed6 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -83,11 +83,7 @@
 #include <linux/if_tunnel.h>
 #include <linux/rtnetlink.h>
 #include <linux/netconf.h>
-
-#ifdef CONFIG_IPV6_PRIVACY
 #include <linux/random.h>
-#endif
-
 #include <linux/uaccess.h>
 #include <asm/unaligned.h>
 
@@ -124,11 +120,9 @@ static inline void addrconf_sysctl_unregister(struct inet6_dev *idev)
 }
 #endif
 
-#ifdef CONFIG_IPV6_PRIVACY
 static void __ipv6_regen_rndid(struct inet6_dev *idev);
 static void __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr);
 static void ipv6_regen_rndid(unsigned long data);
-#endif
 
 static int ipv6_generate_eui64(u8 *eui, struct net_device *dev);
 static int ipv6_count_addresses(struct inet6_dev *idev);
@@ -183,13 +177,11 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
 	.rtr_solicits		= MAX_RTR_SOLICITATIONS,
 	.rtr_solicit_interval	= RTR_SOLICITATION_INTERVAL,
 	.rtr_solicit_delay	= MAX_RTR_SOLICITATION_DELAY,
-#ifdef CONFIG_IPV6_PRIVACY
 	.use_tempaddr 		= 0,
 	.temp_valid_lft		= TEMP_VALID_LIFETIME,
 	.temp_prefered_lft	= TEMP_PREFERRED_LIFETIME,
 	.regen_max_retry	= REGEN_MAX_RETRY,
 	.max_desync_factor	= MAX_DESYNC_FACTOR,
-#endif
 	.max_addresses		= IPV6_MAX_ADDRESSES,
 	.accept_ra_defrtr	= 1,
 	.accept_ra_pinfo	= 1,
@@ -221,13 +213,11 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
 	.rtr_solicits		= MAX_RTR_SOLICITATIONS,
 	.rtr_solicit_interval	= RTR_SOLICITATION_INTERVAL,
 	.rtr_solicit_delay	= MAX_RTR_SOLICITATION_DELAY,
-#ifdef CONFIG_IPV6_PRIVACY
 	.use_tempaddr		= 0,
 	.temp_valid_lft		= TEMP_VALID_LIFETIME,
 	.temp_prefered_lft	= TEMP_PREFERRED_LIFETIME,
 	.regen_max_retry	= REGEN_MAX_RETRY,
 	.max_desync_factor	= MAX_DESYNC_FACTOR,
-#endif
 	.max_addresses		= IPV6_MAX_ADDRESSES,
 	.accept_ra_defrtr	= 1,
 	.accept_ra_pinfo	= 1,
@@ -371,7 +361,6 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
 	}
 #endif
 
-#ifdef CONFIG_IPV6_PRIVACY
 	INIT_LIST_HEAD(&ndev->tempaddr_list);
 	setup_timer(&ndev->regen_timer, ipv6_regen_rndid, (unsigned long)ndev);
 	if ((dev->flags&IFF_LOOPBACK) ||
@@ -384,7 +373,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
 		in6_dev_hold(ndev);
 		ipv6_regen_rndid((unsigned long) ndev);
 	}
-#endif
+
 	ndev->token = in6addr_any;
 
 	if (netif_running(dev) && addrconf_qdisc_ok(dev))
@@ -865,12 +854,10 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
 	/* Add to inet6_dev unicast addr list. */
 	ipv6_link_dev_addr(idev, ifa);
 
-#ifdef CONFIG_IPV6_PRIVACY
 	if (ifa->flags&IFA_F_TEMPORARY) {
 		list_add(&ifa->tmp_list, &idev->tempaddr_list);
 		in6_ifa_hold(ifa);
 	}
-#endif
 
 	in6_ifa_hold(ifa);
 	write_unlock(&idev->lock);
@@ -913,7 +900,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
 	spin_unlock_bh(&addrconf_hash_lock);
 
 	write_lock_bh(&idev->lock);
-#ifdef CONFIG_IPV6_PRIVACY
+
 	if (ifp->flags&IFA_F_TEMPORARY) {
 		list_del(&ifp->tmp_list);
 		if (ifp->ifpub) {
@@ -922,7 +909,6 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
 		}
 		__in6_ifa_put(ifp);
 	}
-#endif
 
 	list_for_each_entry_safe(ifa, ifn, &idev->addr_list, if_list) {
 		if (ifa == ifp) {
@@ -1013,7 +999,6 @@ out:
 	in6_ifa_put(ifp);
 }
 
-#ifdef CONFIG_IPV6_PRIVACY
 static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, struct inet6_ifaddr *ift)
 {
 	struct inet6_dev *idev = ifp->idev;
@@ -1116,7 +1101,6 @@ retry:
 out:
 	return ret;
 }
-#endif
 
 /*
  *	Choose an appropriate source address (RFC3484)
@@ -1131,9 +1115,7 @@ enum {
 #endif
 	IPV6_SADDR_RULE_OIF,
 	IPV6_SADDR_RULE_LABEL,
-#ifdef CONFIG_IPV6_PRIVACY
 	IPV6_SADDR_RULE_PRIVACY,
-#endif
 	IPV6_SADDR_RULE_ORCHID,
 	IPV6_SADDR_RULE_PREFIX,
 	IPV6_SADDR_RULE_MAX
@@ -1247,7 +1229,6 @@ static int ipv6_get_saddr_eval(struct net *net,
 				      &score->ifa->addr, score->addr_type,
 				      score->ifa->idev->dev->ifindex) == dst->label;
 		break;
-#ifdef CONFIG_IPV6_PRIVACY
 	case IPV6_SADDR_RULE_PRIVACY:
 	    {
 		/* Rule 7: Prefer public address
@@ -1259,7 +1240,6 @@ static int ipv6_get_saddr_eval(struct net *net,
 		ret = (!(score->ifa->flags & IFA_F_TEMPORARY)) ^ preftmp;
 		break;
 	    }
-#endif
 	case IPV6_SADDR_RULE_ORCHID:
 		/* Rule 8-: Prefer ORCHID vs ORCHID or
 		 *	    non-ORCHID vs non-ORCHID
@@ -1588,7 +1568,6 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed)
 		if (dad_failed)
 			ipv6_ifa_notify(0, ifp);
 		in6_ifa_put(ifp);
-#ifdef CONFIG_IPV6_PRIVACY
 	} else if (ifp->flags&IFA_F_TEMPORARY) {
 		struct inet6_ifaddr *ifpub;
 		spin_lock_bh(&ifp->lock);
@@ -1602,7 +1581,6 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed)
 			spin_unlock_bh(&ifp->lock);
 		}
 		ipv6_del_addr(ifp);
-#endif
 	} else
 		ipv6_del_addr(ifp);
 }
@@ -1851,7 +1829,6 @@ static int ipv6_inherit_eui64(u8 *eui, struct inet6_dev *idev)
 	return err;
 }
 
-#ifdef CONFIG_IPV6_PRIVACY
 /* (re)generation of randomized interface identifier (RFC 3041 3.2, 3.5) */
 static void __ipv6_regen_rndid(struct inet6_dev *idev)
 {
@@ -1919,7 +1896,6 @@ static void  __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmp
 	if (tmpaddr && memcmp(idev->rndid, &tmpaddr->s6_addr[8], 8) == 0)
 		__ipv6_regen_rndid(idev);
 }
-#endif
 
 /*
  *	Add prefix route.
@@ -2207,9 +2183,7 @@ ok:
 		if (ifp) {
 			int flags;
 			unsigned long now;
-#ifdef CONFIG_IPV6_PRIVACY
 			struct inet6_ifaddr *ift;
-#endif
 			u32 stored_lft;
 
 			/* update lifetime (RFC2462 5.5.3 e) */
@@ -2250,7 +2224,6 @@ ok:
 			} else
 				spin_unlock(&ifp->lock);
 
-#ifdef CONFIG_IPV6_PRIVACY
 			read_lock_bh(&in6_dev->lock);
 			/* update all temporary addresses in the list */
 			list_for_each_entry(ift, &in6_dev->tempaddr_list,
@@ -2315,7 +2288,7 @@ ok:
 			} else {
 				read_unlock_bh(&in6_dev->lock);
 			}
-#endif
+
 			in6_ifa_put(ifp);
 			addrconf_verify(0);
 		}
@@ -2995,7 +2968,6 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 	if (!how)
 		idev->if_flags &= ~(IF_RS_SENT|IF_RA_RCVD|IF_READY);
 
-#ifdef CONFIG_IPV6_PRIVACY
 	if (how && del_timer(&idev->regen_timer))
 		in6_dev_put(idev);
 
@@ -3015,7 +2987,6 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 		in6_ifa_put(ifa);
 		write_lock_bh(&idev->lock);
 	}
-#endif
 
 	while (!list_empty(&idev->addr_list)) {
 		ifa = list_first_entry(&idev->addr_list,
@@ -3528,7 +3499,6 @@ restart:
 					in6_ifa_put(ifp);
 					goto restart;
 				}
-#ifdef CONFIG_IPV6_PRIVACY
 			} else if ((ifp->flags&IFA_F_TEMPORARY) &&
 				   !(ifp->flags&IFA_F_TENTATIVE)) {
 				unsigned long regen_advance = ifp->idev->cnf.regen_max_retry *
@@ -3556,7 +3526,6 @@ restart:
 				} else if (time_before(ifp->tstamp + ifp->prefered_lft * HZ - regen_advance * HZ, next))
 					next = ifp->tstamp + ifp->prefered_lft * HZ - regen_advance * HZ;
 				spin_unlock(&ifp->lock);
-#endif
 			} else {
 				/* ifp->prefered_lft <= ifp->valid_lft */
 				if (time_before(ifp->tstamp + ifp->prefered_lft * HZ, next))
@@ -4128,13 +4097,11 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
 		jiffies_to_msecs(cnf->mldv1_unsolicited_report_interval);
 	array[DEVCONF_MLDV2_UNSOLICITED_REPORT_INTERVAL] =
 		jiffies_to_msecs(cnf->mldv2_unsolicited_report_interval);
-#ifdef CONFIG_IPV6_PRIVACY
 	array[DEVCONF_USE_TEMPADDR] = cnf->use_tempaddr;
 	array[DEVCONF_TEMP_VALID_LFT] = cnf->temp_valid_lft;
 	array[DEVCONF_TEMP_PREFERED_LFT] = cnf->temp_prefered_lft;
 	array[DEVCONF_REGEN_MAX_RETRY] = cnf->regen_max_retry;
 	array[DEVCONF_MAX_DESYNC_FACTOR] = cnf->max_desync_factor;
-#endif
 	array[DEVCONF_MAX_ADDRESSES] = cnf->max_addresses;
 	array[DEVCONF_ACCEPT_RA_DEFRTR] = cnf->accept_ra_defrtr;
 	array[DEVCONF_ACCEPT_RA_PINFO] = cnf->accept_ra_pinfo;
@@ -4828,7 +4795,6 @@ static struct addrconf_sysctl_table
 			.mode		= 0644,
 			.proc_handler	= proc_dointvec_ms_jiffies,
 		},
-#ifdef CONFIG_IPV6_PRIVACY
 		{
 			.procname	= "use_tempaddr",
 			.data		= &ipv6_devconf.use_tempaddr,
@@ -4864,7 +4830,6 @@ static struct addrconf_sysctl_table
 			.mode		= 0644,
 			.proc_handler	= proc_dointvec,
 		},
-#endif
 		{
 			.procname	= "max_addresses",
 			.data		= &ipv6_devconf.max_addresses,
-- 
cgit v1.2.3


From 46f9c2b925ac12e5ad8b8b7c90c71dacc9d5db37 Mon Sep 17 00:00:00 2001
From: Heinz Graalfs <graalfs@linux.vnet.ibm.com>
Date: Tue, 29 Oct 2013 09:38:50 +1030
Subject: virtio_ring: change host notification API

Currently a host kick error is silently ignored and not reflected in
the virtqueue of a particular virtio device.

Changing the notify API for guest->host notification seems to be one
prerequisite in order to be able to handle such errors in the context
where the kick is triggered.

This patch changes the notify API. The notify function must return a
bool return value. It returns false if the host notification failed.

Signed-off-by: Heinz Graalfs <graalfs@linux.vnet.ibm.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/lguest/lguest_device.c         |  3 ++-
 drivers/remoteproc/remoteproc_virtio.c |  3 ++-
 drivers/s390/kvm/kvm_virtio.c          |  8 ++++++--
 drivers/s390/kvm/virtio_ccw.c          |  5 ++++-
 drivers/virtio/virtio_mmio.c           |  3 ++-
 drivers/virtio/virtio_pci.c            |  3 ++-
 drivers/virtio/virtio_ring.c           |  4 ++--
 include/linux/virtio_ring.h            |  2 +-
 tools/virtio/virtio_test.c             |  3 ++-
 tools/virtio/vringh_test.c             | 13 +++++++++----
 10 files changed, 32 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c
index b3256ff0d426..d0a1d8a45c81 100644
--- a/drivers/lguest/lguest_device.c
+++ b/drivers/lguest/lguest_device.c
@@ -229,7 +229,7 @@ struct lguest_vq_info {
  * make a hypercall.  We hand the physical address of the virtqueue so the Host
  * knows which virtqueue we're talking about.
  */
-static void lg_notify(struct virtqueue *vq)
+static bool lg_notify(struct virtqueue *vq)
 {
 	/*
 	 * We store our virtqueue information in the "priv" pointer of the
@@ -238,6 +238,7 @@ static void lg_notify(struct virtqueue *vq)
 	struct lguest_vq_info *lvq = vq->priv;
 
 	hcall(LHCALL_NOTIFY, lvq->config.pfn << PAGE_SHIFT, 0, 0, 0);
+	return true;
 }
 
 /* An extern declaration inside a C file is bad form.  Don't do it. */
diff --git a/drivers/remoteproc/remoteproc_virtio.c b/drivers/remoteproc/remoteproc_virtio.c
index b09c75c21b60..a34b50690b4e 100644
--- a/drivers/remoteproc/remoteproc_virtio.c
+++ b/drivers/remoteproc/remoteproc_virtio.c
@@ -30,7 +30,7 @@
 #include "remoteproc_internal.h"
 
 /* kick the remote processor, and let it know which virtqueue to poke at */
-static void rproc_virtio_notify(struct virtqueue *vq)
+static bool rproc_virtio_notify(struct virtqueue *vq)
 {
 	struct rproc_vring *rvring = vq->priv;
 	struct rproc *rproc = rvring->rvdev->rproc;
@@ -39,6 +39,7 @@ static void rproc_virtio_notify(struct virtqueue *vq)
 	dev_dbg(&rproc->dev, "kicking vq index: %d\n", notifyid);
 
 	rproc->ops->kick(rproc, notifyid);
+	return true;
 }
 
 /**
diff --git a/drivers/s390/kvm/kvm_virtio.c b/drivers/s390/kvm/kvm_virtio.c
index af2166fa5159..1abd0db29915 100644
--- a/drivers/s390/kvm/kvm_virtio.c
+++ b/drivers/s390/kvm/kvm_virtio.c
@@ -166,11 +166,15 @@ static void kvm_reset(struct virtio_device *vdev)
  * make a hypercall.  We hand the address  of the virtqueue so the Host
  * knows which virtqueue we're talking about.
  */
-static void kvm_notify(struct virtqueue *vq)
+static bool kvm_notify(struct virtqueue *vq)
 {
+	long rc;
 	struct kvm_vqconfig *config = vq->priv;
 
-	kvm_hypercall1(KVM_S390_VIRTIO_NOTIFY, config->address);
+	rc = kvm_hypercall1(KVM_S390_VIRTIO_NOTIFY, config->address);
+	if (rc < 0)
+		return false;
+	return true;
 }
 
 /*
diff --git a/drivers/s390/kvm/virtio_ccw.c b/drivers/s390/kvm/virtio_ccw.c
index 779dc5136291..d6297176ab85 100644
--- a/drivers/s390/kvm/virtio_ccw.c
+++ b/drivers/s390/kvm/virtio_ccw.c
@@ -162,7 +162,7 @@ static inline long do_kvm_notify(struct subchannel_id schid,
 	return __rc;
 }
 
-static void virtio_ccw_kvm_notify(struct virtqueue *vq)
+static bool virtio_ccw_kvm_notify(struct virtqueue *vq)
 {
 	struct virtio_ccw_vq_info *info = vq->priv;
 	struct virtio_ccw_device *vcdev;
@@ -171,6 +171,9 @@ static void virtio_ccw_kvm_notify(struct virtqueue *vq)
 	vcdev = to_vc_device(info->vq->vdev);
 	ccw_device_get_schid(vcdev->cdev, &schid);
 	info->cookie = do_kvm_notify(schid, vq->index, info->cookie);
+	if (info->cookie < 0)
+		return false;
+	return true;
 }
 
 static int virtio_ccw_read_vq_conf(struct virtio_ccw_device *vcdev,
diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c
index 1ba0d6831015..e9fdeb861992 100644
--- a/drivers/virtio/virtio_mmio.c
+++ b/drivers/virtio/virtio_mmio.c
@@ -219,13 +219,14 @@ static void vm_reset(struct virtio_device *vdev)
 /* Transport interface */
 
 /* the notify function used when creating a virt queue */
-static void vm_notify(struct virtqueue *vq)
+static bool vm_notify(struct virtqueue *vq)
 {
 	struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vq->vdev);
 
 	/* We write the queue's selector into the notification register to
 	 * signal the other end */
 	writel(vq->index, vm_dev->base + VIRTIO_MMIO_QUEUE_NOTIFY);
+	return true;
 }
 
 /* Notify all virtqueues on an interrupt. */
diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
index 98917fc872a4..a37c69941d30 100644
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -197,13 +197,14 @@ static void vp_reset(struct virtio_device *vdev)
 }
 
 /* the notify function used when creating a virt queue */
-static void vp_notify(struct virtqueue *vq)
+static bool vp_notify(struct virtqueue *vq)
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
 
 	/* we write the queue's selector into the notification register to
 	 * signal the other end */
 	iowrite16(vq->index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NOTIFY);
+	return true;
 }
 
 /* Handle a configuration change: Tell driver if it wants to know. */
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 6547d46171b3..97dd51619b55 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -81,7 +81,7 @@ struct vring_virtqueue
 	u16 last_used_idx;
 
 	/* How to notify other side. FIXME: commonalize hcalls! */
-	void (*notify)(struct virtqueue *vq);
+	bool (*notify)(struct virtqueue *vq);
 
 #ifdef DEBUG
 	/* They're supposed to lock for us. */
@@ -744,7 +744,7 @@ struct virtqueue *vring_new_virtqueue(unsigned int index,
 				      struct virtio_device *vdev,
 				      bool weak_barriers,
 				      void *pages,
-				      void (*notify)(struct virtqueue *),
+				      bool (*notify)(struct virtqueue *),
 				      void (*callback)(struct virtqueue *),
 				      const char *name)
 {
diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h
index b300787af8e0..67e06fe18c03 100644
--- a/include/linux/virtio_ring.h
+++ b/include/linux/virtio_ring.h
@@ -71,7 +71,7 @@ struct virtqueue *vring_new_virtqueue(unsigned int index,
 				      struct virtio_device *vdev,
 				      bool weak_barriers,
 				      void *pages,
-				      void (*notify)(struct virtqueue *vq),
+				      bool (*notify)(struct virtqueue *vq),
 				      void (*callback)(struct virtqueue *vq),
 				      const char *name);
 void vring_del_virtqueue(struct virtqueue *vq);
diff --git a/tools/virtio/virtio_test.c b/tools/virtio/virtio_test.c
index da7a19558281..059cb723f6a7 100644
--- a/tools/virtio/virtio_test.c
+++ b/tools/virtio/virtio_test.c
@@ -41,13 +41,14 @@ struct vdev_info {
 	struct vhost_memory *mem;
 };
 
-void vq_notify(struct virtqueue *vq)
+bool vq_notify(struct virtqueue *vq)
 {
 	struct vq_info *info = vq->priv;
 	unsigned long long v = 1;
 	int r;
 	r = write(info->kick, &v, sizeof v);
 	assert(r == sizeof v);
+	return true;
 }
 
 void vq_callback(struct virtqueue *vq)
diff --git a/tools/virtio/vringh_test.c b/tools/virtio/vringh_test.c
index d053ea40c001..14a4f4cab5b9 100644
--- a/tools/virtio/vringh_test.c
+++ b/tools/virtio/vringh_test.c
@@ -22,7 +22,7 @@ static u64 user_addr_offset;
 #define RINGSIZE 256
 #define ALIGN 4096
 
-static void never_notify_host(struct virtqueue *vq)
+static bool never_notify_host(struct virtqueue *vq)
 {
 	abort();
 }
@@ -65,17 +65,22 @@ struct guest_virtio_device {
 	unsigned long notifies;
 };
 
-static void parallel_notify_host(struct virtqueue *vq)
+static bool parallel_notify_host(struct virtqueue *vq)
 {
+	int rc;
 	struct guest_virtio_device *gvdev;
 
 	gvdev = container_of(vq->vdev, struct guest_virtio_device, vdev);
-	write(gvdev->to_host_fd, "", 1);
+	rc = write(gvdev->to_host_fd, "", 1);
+	if (rc < 0)
+		return false;
 	gvdev->notifies++;
+	return true;
 }
 
-static void no_notify_host(struct virtqueue *vq)
+static bool no_notify_host(struct virtqueue *vq)
 {
+	return true;
 }
 
 #define NUM_XFERS (10000000)
-- 
cgit v1.2.3


From 5b1bf7cb673ade0ab5c75f200dce911d9fb91c21 Mon Sep 17 00:00:00 2001
From: Heinz Graalfs <graalfs@linux.vnet.ibm.com>
Date: Tue, 29 Oct 2013 09:39:48 +1030
Subject: virtio_ring: let virtqueue_{kick()/notify()} return a bool

virtqueue_{kick()/notify()} should exploit the new host notification API.
If the notify call returned with a negative value the host kick failed
(e.g. a kick triggered after a device was hot-unplugged). In this case
the virtqueue is set to 'broken' and false is returned, otherwise true.

Signed-off-by: Heinz Graalfs <graalfs@linux.vnet.ibm.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/virtio/virtio_ring.c | 20 ++++++++++++++++----
 include/linux/virtio.h       |  4 ++--
 2 files changed, 18 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 97dd51619b55..b47142723119 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -430,13 +430,22 @@ EXPORT_SYMBOL_GPL(virtqueue_kick_prepare);
  * @vq: the struct virtqueue
  *
  * This does not need to be serialized.
+ *
+ * Returns false if host notify failed or queue is broken, otherwise true.
  */
-void virtqueue_notify(struct virtqueue *_vq)
+bool virtqueue_notify(struct virtqueue *_vq)
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
 
+	if (unlikely(vq->broken))
+		return false;
+
 	/* Prod other side to tell it about changes. */
-	vq->notify(_vq);
+	if (vq->notify(_vq) < 0) {
+		vq->broken = true;
+		return false;
+	}
+	return true;
 }
 EXPORT_SYMBOL_GPL(virtqueue_notify);
 
@@ -449,11 +458,14 @@ EXPORT_SYMBOL_GPL(virtqueue_notify);
  *
  * Caller must ensure we don't call this with other virtqueue
  * operations at the same time (except where noted).
+ *
+ * Returns false if kick failed, otherwise true.
  */
-void virtqueue_kick(struct virtqueue *vq)
+bool virtqueue_kick(struct virtqueue *vq)
 {
 	if (virtqueue_kick_prepare(vq))
-		virtqueue_notify(vq);
+		return virtqueue_notify(vq);
+	return true;
 }
 EXPORT_SYMBOL_GPL(virtqueue_kick);
 
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 36d36cc89329..9b4de15fcb2f 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -51,11 +51,11 @@ int virtqueue_add_sgs(struct virtqueue *vq,
 		      void *data,
 		      gfp_t gfp);
 
-void virtqueue_kick(struct virtqueue *vq);
+bool virtqueue_kick(struct virtqueue *vq);
 
 bool virtqueue_kick_prepare(struct virtqueue *vq);
 
-void virtqueue_notify(struct virtqueue *vq);
+bool virtqueue_notify(struct virtqueue *vq);
 
 void *virtqueue_get_buf(struct virtqueue *vq, unsigned int *len);
 
-- 
cgit v1.2.3


From b3b32c94133621c9ba7e4c8f29ec7533f2f4d8ec Mon Sep 17 00:00:00 2001
From: Heinz Graalfs <graalfs@linux.vnet.ibm.com>
Date: Tue, 29 Oct 2013 09:40:19 +1030
Subject: virtio_ring: add new function virtqueue_is_broken()

Add new function virtqueue_is_broken(). Callers of virtqueue_get_buf()
should check for a broken queue.

Signed-off-by: Heinz Graalfs <graalfs@linux.vnet.ibm.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/virtio/virtio_ring.c | 8 ++++++++
 include/linux/virtio.h       | 2 ++
 2 files changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index b47142723119..f47777582ce5 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -851,4 +851,12 @@ unsigned int virtqueue_get_vring_size(struct virtqueue *_vq)
 }
 EXPORT_SYMBOL_GPL(virtqueue_get_vring_size);
 
+bool virtqueue_is_broken(struct virtqueue *_vq)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+
+	return vq->broken;
+}
+EXPORT_SYMBOL_GPL(virtqueue_is_broken);
+
 MODULE_LICENSE("GPL");
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 9b4de15fcb2f..e4abb84199be 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -73,6 +73,8 @@ void *virtqueue_detach_unused_buf(struct virtqueue *vq);
 
 unsigned int virtqueue_get_vring_size(struct virtqueue *vq);
 
+bool virtqueue_is_broken(struct virtqueue *vq);
+
 /**
  * virtio_device - representation of a device using virtio
  * @index: unique position on the virtio bus
-- 
cgit v1.2.3


From 80c33ddd31d0e801953e02b7b003f395c1920e4e Mon Sep 17 00:00:00 2001
From: Jacob Keller <jacob.e.keller@intel.com>
Date: Sat, 21 Sep 2013 05:05:39 +0000
Subject: net: add might_sleep() call to napi_disable

napi_disable uses an msleep() call to wait for outstanding napi work to be
finished after setting the disable bit. It does not always sleep incase there
was no outstanding work. This resulted in a rare bug in ixgbe_down operation
where a napi_disable call took place inside of a local_bh_disable()d context.
In order to enable easier detection of future sleep while atomic BUGs, this
patch adds a might_sleep() call, so that every use of napi_disable during
atomic context will be visible.

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Cc: Eliezer Tamir <eliezer.tamir@linux.intel.com>
Cc: Alexander Duyck <alexander.duyck@intel.com>
Cc: Hyong-Youb Kim <hykim@myri.com>
Cc: Amir Vadai <amirv@mellanox.com>
Cc: Dmitry Kravkov <dmitry@broadcom.com>
Tested-by: Phil Schmitt <phillip.j.schmitt@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 include/linux/netdevice.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 27f62f746621..cb1d918ecdf1 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -483,6 +483,7 @@ void napi_hash_del(struct napi_struct *napi);
  */
 static inline void napi_disable(struct napi_struct *n)
 {
+	might_sleep();
 	set_bit(NAPI_STATE_DISABLE, &n->state);
 	while (test_and_set_bit(NAPI_STATE_SCHED, &n->state))
 		msleep(1);
-- 
cgit v1.2.3


From f51a07d05c5142e73f781d878f411d63d3548a49 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Thu, 19 Sep 2013 18:35:54 -0700
Subject: framebuffer: Add fb_<level> convenience logging macros

Add fb_<level> convenience macros for emitting the
"fb%d: ", struct fb_info->node value.

Neatens and shortens the code a bit.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
---
 include/linux/fb.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/fb.h b/include/linux/fb.h
index ffac70aab3e9..70c4836e4a9f 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -792,4 +792,16 @@ extern int fb_find_mode(struct fb_var_screeninfo *var,
 			const struct fb_videomode *default_mode,
 			unsigned int default_bpp);
 
+/* Convenience logging macros */
+#define fb_err(fb_info, fmt, ...)					\
+	pr_err("fb%d: " fmt, (fb_info)->node, ##__VA_ARGS__)
+#define fb_notice(info, fmt, ...)					\
+	pr_notice("fb%d: " fmt, (fb_info)->node, ##__VA_ARGS__)
+#define fb_warn(fb_info, fmt, ...)					\
+	pr_warn("fb%d: " fmt, (fb_info)->node, ##__VA_ARGS__)
+#define fb_info(fb_info, fmt, ...)					\
+	pr_info("fb%d: " fmt, (fb_info)->node, ##__VA_ARGS__)
+#define fb_dbg(fb_info, fmt, ...)					\
+	pr_debug("fb%d: " fmt, (fb_info)->node, ##__VA_ARGS__)
+
 #endif /* _LINUX_FB_H */
-- 
cgit v1.2.3


From c2d3f25dda016d9697c5416810d4528770f0a281 Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Wed, 9 Oct 2013 14:08:09 +0200
Subject: uprobes: Remove the wrong __weak attribute

linux/uprobes.h declares arch_uprobe_skip_sstep() as a weak function.
But as there is no definition of generic version so when trying to build
uprobes for an architecture that doesn't yet have a arch_uprobe_skip_sstep()
implementation, the vmlinux will try to call arch_uprobe_skip_sstep()
somehwere in Stupidhistan leading to a system crash.  We rather want a
proper link error so remove arch_uprobe_skip_sstep().

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
---
 include/linux/uprobes.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index 06f28beed7c2..e6fba627ea45 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -123,7 +123,7 @@ extern int uprobe_post_sstep_notifier(struct pt_regs *regs);
 extern int uprobe_pre_sstep_notifier(struct pt_regs *regs);
 extern void uprobe_notify_resume(struct pt_regs *regs);
 extern bool uprobe_deny_signal(void);
-extern bool __weak arch_uprobe_skip_sstep(struct arch_uprobe *aup, struct pt_regs *regs);
+extern bool arch_uprobe_skip_sstep(struct arch_uprobe *aup, struct pt_regs *regs);
 extern void uprobe_clear_state(struct mm_struct *mm);
 #else /* !CONFIG_UPROBES */
 struct uprobes_state {
-- 
cgit v1.2.3


From 3ab679661721b1ec2aaad99a801870ed59ab1110 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Wed, 16 Oct 2013 19:39:37 +0200
Subject: uprobes: Teach uprobe_copy_process() to handle CLONE_VFORK

uprobe_copy_process() does nothing if the child shares ->mm with
the forking process, but there is a special case: CLONE_VFORK.
In this case it would be more correct to do dup_utask() but avoid
dup_xol(). This is not that important, the child should not unwind
its stack too much, this can corrupt the parent's stack, but at
least we need this to allow to ret-probe __vfork() itself.

Note: in theory, it would be better to check task_pt_regs(p)->sp
instead of CLONE_VFORK, we need to dup_utask() if and only if the
child can return from the function called by the parent. But this
needs the arch-dependant helper, and I think that nobody actually
does clone(same_stack, CLONE_VM).

Reported-by: Martin Cermak <mcermak@redhat.com>
Reported-by: David Smith <dsmith@redhat.com>
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
---
 include/linux/uprobes.h |  4 ++--
 kernel/events/uprobes.c | 10 ++++++++--
 kernel/fork.c           |  2 +-
 3 files changed, 11 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index e6fba627ea45..9e0d5a6fe7a8 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -117,7 +117,7 @@ extern void uprobe_start_dup_mmap(void);
 extern void uprobe_end_dup_mmap(void);
 extern void uprobe_dup_mmap(struct mm_struct *oldmm, struct mm_struct *newmm);
 extern void uprobe_free_utask(struct task_struct *t);
-extern void uprobe_copy_process(struct task_struct *t);
+extern void uprobe_copy_process(struct task_struct *t, unsigned long flags);
 extern unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs);
 extern int uprobe_post_sstep_notifier(struct pt_regs *regs);
 extern int uprobe_pre_sstep_notifier(struct pt_regs *regs);
@@ -174,7 +174,7 @@ static inline unsigned long uprobe_get_swbp_addr(struct pt_regs *regs)
 static inline void uprobe_free_utask(struct task_struct *t)
 {
 }
-static inline void uprobe_copy_process(struct task_struct *t)
+static inline void uprobe_copy_process(struct task_struct *t, unsigned long flags)
 {
 }
 static inline void uprobe_clear_state(struct mm_struct *mm)
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 9f282e14925d..ae9e1d2ef256 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -1415,7 +1415,7 @@ static void dup_xol_work(struct callback_head *work)
 /*
  * Called in context of a new clone/fork from copy_process.
  */
-void uprobe_copy_process(struct task_struct *t)
+void uprobe_copy_process(struct task_struct *t, unsigned long flags)
 {
 	struct uprobe_task *utask = current->utask;
 	struct mm_struct *mm = current->mm;
@@ -1424,7 +1424,10 @@ void uprobe_copy_process(struct task_struct *t)
 
 	t->utask = NULL;
 
-	if (mm == t->mm || !utask || !utask->return_instances)
+	if (!utask || !utask->return_instances)
+		return;
+
+	if (mm == t->mm && !(flags & CLONE_VFORK))
 		return;
 
 	if (dup_utask(t, utask))
@@ -1435,6 +1438,9 @@ void uprobe_copy_process(struct task_struct *t)
 	if (!area)
 		return uprobe_warn(t, "dup xol area");
 
+	if (mm == t->mm)
+		return;
+
 	/* TODO: move it into the union in uprobe_task */
 	work = kmalloc(sizeof(*work), GFP_KERNEL);
 	if (!work)
diff --git a/kernel/fork.c b/kernel/fork.c
index d3603b81246b..8531609b6a82 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1489,7 +1489,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	perf_event_fork(p);
 
 	trace_task_newtask(p, clone_flags);
-	uprobe_copy_process(p);
+	uprobe_copy_process(p, clone_flags);
 
 	return p;
 
-- 
cgit v1.2.3


From 3eae13671716492f3bcde270115407185e9c69fd Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Thu, 24 Oct 2013 15:42:33 -0600
Subject: device: Make dev_WARN/dev_WARN_ONCE print device as well as driver
 name

dev_WARN() and dev_WARN_ONCE() are annoying because (1) they include
only the driver name, not the device name, and (2) they print a spurious
newline in the middle.  This results in messages like this that are less
useful than they should be:

  [   40.094995] Device pcieport
  disabling already-disabled device

This patch makes them work more like dev_printk().

Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Felipe Balbi <balbi@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/device.h | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index 5e44cff5bced..b025925df7f7 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -1170,16 +1170,15 @@ do {									\
 #endif
 
 /*
- * dev_WARN*() acts like dev_printk(), but with the key difference
- * of using a WARN/WARN_ON to get the message out, including the
- * file/line information and a backtrace.
+ * dev_WARN*() acts like dev_printk(), but with the key difference of
+ * using WARN/WARN_ONCE to include file/line information and a backtrace.
  */
 #define dev_WARN(dev, format, arg...) \
-	WARN(1, "Device: %s\n" format, dev_driver_string(dev), ## arg);
+	WARN(1, "%s %s: " format, dev_driver_string(dev), dev_name(dev), ## arg);
 
 #define dev_WARN_ONCE(dev, condition, format, arg...) \
-	WARN_ONCE(condition, "Device %s\n" format, \
-			dev_driver_string(dev), ## arg)
+	WARN_ONCE(condition, "%s %s: " format, \
+			dev_driver_string(dev), dev_name(dev), ## arg)
 
 /* Create alias, so I can be autoloaded. */
 #define MODULE_ALIAS_CHARDEV(major,minor) \
-- 
cgit v1.2.3


From 29fc2bc75393864bbc9b90a7a13a0d0e11c6f41e Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Sat, 26 Oct 2013 20:41:53 -0700
Subject: printk: pr_debug_ratelimited: check state first to reduce "callbacks
 suppressed" messages

pr_debug_ratelimited should be coded similarly to dev_dbg_ratelimited
to reduce the "callbacks suppressed" messages.

Add #include <linux/dynamic_debug.h> to printk.h. Unfortunately, this
new #include must be after the prototype/declaration of function printk.

It may be better to split out these _ratelimited declarations into
a separate file one day.

Any use of these pr_<foo>_ratelimited functions must also have another
specific #include <ratelimited.h>.  Most users have this done indirectly
via #include <linux/kernel.h>

printk.h may not #include <linux/ratelimit.h> as it causes circular
dependencies and compilation failures.

Signed-off-by: Joe Perches <joe@perches.com>
Tested-by: Krzysztof Mazur <krzysiek@podlesie.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/printk.h | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/printk.h b/include/linux/printk.h
index e6131a782481..694925837a16 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -233,6 +233,8 @@ extern asmlinkage void dump_stack(void) __cold;
 	no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
 #endif
 
+#include <linux/dynamic_debug.h>
+
 /* If you are writing a driver, please use dev_dbg instead */
 #if defined(CONFIG_DYNAMIC_DEBUG)
 /* dynamic_pr_debug() uses pr_fmt() internally so we don't need it here */
@@ -343,7 +345,19 @@ extern asmlinkage void dump_stack(void) __cold;
 #endif
 
 /* If you are writing a driver, please use dev_dbg instead */
-#if defined(DEBUG)
+#if defined(CONFIG_DYNAMIC_DEBUG)
+/* descriptor check is first to prevent flooding with "callbacks suppressed" */
+#define pr_debug_ratelimited(fmt, ...)					\
+do {									\
+	static DEFINE_RATELIMIT_STATE(_rs,				\
+				      DEFAULT_RATELIMIT_INTERVAL,	\
+				      DEFAULT_RATELIMIT_BURST);		\
+	DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, fmt);			\
+	if (unlikely(descriptor.flags & _DPRINTK_FLAGS_PRINT) &&	\
+	    __ratelimit(&_rs))						\
+		__dynamic_pr_debug(&descriptor, fmt, ##__VA_ARGS__);	\
+} while (0)
+#elif defined(DEBUG)
 #define pr_debug_ratelimited(fmt, ...)					\
 	printk_ratelimited(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
 #else
-- 
cgit v1.2.3


From 6587fca23001c33813d56309faf41188b5fbdb70 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Thu, 3 Oct 2013 21:26:40 +0530
Subject: cpuidle: fix indentation of cpumask

Use tabs for cpumask indentation in struct cpuidle_driver.

[rjw: Changelog]
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/cpuidle.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index 781addc66f03..c082425757f4 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -114,7 +114,7 @@ struct cpuidle_driver {
 	int			safe_state_index;
 
 	/* the driver handles the cpus in cpumask */
-	struct cpumask       *cpumask;
+	struct cpumask		*cpumask;
 };
 
 #ifdef CONFIG_CPU_IDLE
-- 
cgit v1.2.3


From f60e230f6be5672241e48434a6c2a417d9674d42 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Thu, 3 Oct 2013 21:26:55 +0530
Subject: cpuidle: remove cpuidle_unregister_governor()

cpuidle_unregister_governor() and cpuidle_replace_governor() aren't
used anymore and can be removed. They were used by cpufreq governors
earlier, but since the governors can't be compiled as modules any
more, these two functions aren't necessary.

Suggested-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/cpuidle/governor.txt |  1 -
 drivers/cpuidle/governor.c         | 43 --------------------------------------
 include/linux/cpuidle.h            |  6 ------
 3 files changed, 50 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/cpuidle/governor.txt b/Documentation/cpuidle/governor.txt
index 12c6bd50c9f6..d9020f5e847b 100644
--- a/Documentation/cpuidle/governor.txt
+++ b/Documentation/cpuidle/governor.txt
@@ -25,5 +25,4 @@ kernel configuration and platform will be selected by cpuidle.
 
 Interfaces:
 extern int cpuidle_register_governor(struct cpuidle_governor *gov);
-extern void cpuidle_unregister_governor(struct cpuidle_governor *gov);
 struct cpuidle_governor
diff --git a/drivers/cpuidle/governor.c b/drivers/cpuidle/governor.c
index ea2f8e7aa24a..ca89412f5122 100644
--- a/drivers/cpuidle/governor.c
+++ b/drivers/cpuidle/governor.c
@@ -96,46 +96,3 @@ int cpuidle_register_governor(struct cpuidle_governor *gov)
 
 	return ret;
 }
-
-/**
- * cpuidle_replace_governor - find a replacement governor
- * @exclude_rating: the rating that will be skipped while looking for
- * new governor.
- */
-static struct cpuidle_governor *cpuidle_replace_governor(int exclude_rating)
-{
-	struct cpuidle_governor *gov;
-	struct cpuidle_governor *ret_gov = NULL;
-	unsigned int max_rating = 0;
-
-	list_for_each_entry(gov, &cpuidle_governors, governor_list) {
-		if (gov->rating == exclude_rating)
-			continue;
-		if (gov->rating > max_rating) {
-			max_rating = gov->rating;
-			ret_gov = gov;
-		}
-	}
-
-	return ret_gov;
-}
-
-/**
- * cpuidle_unregister_governor - unregisters a governor
- * @gov: the governor
- */
-void cpuidle_unregister_governor(struct cpuidle_governor *gov)
-{
-	if (!gov)
-		return;
-
-	mutex_lock(&cpuidle_lock);
-	if (gov == cpuidle_curr_governor) {
-		struct cpuidle_governor *new_gov;
-		new_gov = cpuidle_replace_governor(gov->rating);
-		cpuidle_switch_governor(new_gov);
-	}
-	list_del(&gov->governor_list);
-	mutex_unlock(&cpuidle_lock);
-}
-
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index c082425757f4..50fcbb0ac4e7 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -195,16 +195,10 @@ struct cpuidle_governor {
 };
 
 #ifdef CONFIG_CPU_IDLE
-
 extern int cpuidle_register_governor(struct cpuidle_governor *gov);
-extern void cpuidle_unregister_governor(struct cpuidle_governor *gov);
-
 #else
-
 static inline int cpuidle_register_governor(struct cpuidle_governor *gov)
 {return 0;}
-static inline void cpuidle_unregister_governor(struct cpuidle_governor *gov) { }
-
 #endif
 
 #ifdef CONFIG_ARCH_HAS_CPU_RELAX
-- 
cgit v1.2.3


From 403c1d0be5ccbd750d25c59d8358843a81e52e3b Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Fri, 25 Oct 2013 12:59:05 +0200
Subject: gpio: provide stubs for devres gpio functions

commit 6b3d8145dcfdbbb43f13544e16f44f4574f941dd
"gpiolib: make GPIO_DEVRES depend on GPIOLIB"
breaks builds when device drivers are using devm_gpio*
devres functions without enabling GPIOLIB, relying on
the devres code to be compiled anyway.

Provide stubs so that we get these if we're using the
devres functions without GPIOLIB.

Reported-by: Fengguang Wu <fengguang.wu@intel.com>
Cc: Alexandre Courbot <acourbot@nvidia.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/gpio.h | 34 +++++++++++++++++++++++++++-------
 1 file changed, 27 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gpio.h b/include/linux/gpio.h
index c691df044458..0c56b9e9c209 100644
--- a/include/linux/gpio.h
+++ b/include/linux/gpio.h
@@ -77,6 +77,15 @@ static inline int irq_to_gpio(unsigned int irq)
 
 #endif /* ! CONFIG_ARCH_HAVE_CUSTOM_GPIO_H */
 
+/* CONFIG_GPIOLIB: bindings for managed devices that want to request gpios */
+
+struct device;
+
+int devm_gpio_request(struct device *dev, unsigned gpio, const char *label);
+int devm_gpio_request_one(struct device *dev, unsigned gpio,
+			  unsigned long flags, const char *label);
+void devm_gpio_free(struct device *dev, unsigned int gpio);
+
 #else /* ! CONFIG_GPIOLIB */
 
 #include <linux/kernel.h>
@@ -241,14 +250,25 @@ gpiochip_remove_pin_ranges(struct gpio_chip *chip)
 	WARN_ON(1);
 }
 
-#endif /* ! CONFIG_GPIOLIB */
+static inline int devm_gpio_request(struct device *dev, unsigned gpio,
+				    const char *label)
+{
+	WARN_ON(1);
+	return -EINVAL;
+}
 
-struct device;
+static inline int devm_gpio_request_one(struct device *dev, unsigned gpio,
+					unsigned long flags, const char *label)
+{
+	WARN_ON(1);
+	return -EINVAL;
+}
 
-/* bindings for managed devices that want to request gpios */
-int devm_gpio_request(struct device *dev, unsigned gpio, const char *label);
-int devm_gpio_request_one(struct device *dev, unsigned gpio,
-			  unsigned long flags, const char *label);
-void devm_gpio_free(struct device *dev, unsigned int gpio);
+static inline void devm_gpio_free(struct device *dev, unsigned int gpio)
+{
+	WARN_ON(1);
+}
+
+#endif /* ! CONFIG_GPIOLIB */
 
 #endif /* __LINUX_GPIO_H */
-- 
cgit v1.2.3


From 335d7a7d63aa3a6da4d4903ef6e64de4a88f27da Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Tue, 29 Oct 2013 20:10:13 +1100
Subject: gpiolib: include gpio/consumer.h in of_gpio.h for desc_to_gpio()

Fixes this build error on sparc:

In file included from drivers/spi/spi.c:33:0:
include/linux/of_gpio.h: In function 'of_get_named_gpio_flags':
include/linux/of_gpio.h:93:3: error: implicit declaration of function 'desc_to_gpio' [-Werror=implicit-function-declaration]

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/of_gpio.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/of_gpio.h b/include/linux/of_gpio.h
index d71f2cc141ae..f14123a5a9df 100644
--- a/include/linux/of_gpio.h
+++ b/include/linux/of_gpio.h
@@ -19,9 +19,9 @@
 #include <linux/errno.h>
 #include <linux/gpio.h>
 #include <linux/of.h>
+#include <linux/gpio/consumer.h>
 
 struct device_node;
-struct gpio_desc;
 
 /*
  * This is Linux-specific flags. By default controllers' and Linux' mapping
-- 
cgit v1.2.3


From f3ed0b66482fa2a0403280174a998487e9054867 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Tue, 29 Oct 2013 01:06:23 +1100
Subject: gpiolib: provide a declaration of seq_file in gpio/driver.h

Fixes this build error:

In file included from include/asm-generic/gpio.h:13:0,
                 from include/linux/gpio.h:51,
                 from include/linux/of_gpio.h:20,
                 from arch/powerpc/sysdev/ppc4xx_gpio.c:29:
include/linux/gpio/driver.h:85:14: error: 'struct seq_file' declared inside=
 parameter list [-Werror]
include/linux/gpio/driver.h:85:14: error: its scope is only this definition=
 or declaration, which is probably not what you want [-Werror]

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/gpio/driver.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index cd9da3885d79..656a27efb2c8 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -5,6 +5,7 @@
 
 struct device;
 struct gpio_desc;
+struct seq_file;
 
 /**
  * struct gpio_chip - abstract a GPIO controller
-- 
cgit v1.2.3


From 0b6b098efcddac2bf4e2a895c9b655560bbfcee4 Mon Sep 17 00:00:00 2001
From: Mathias Krause <mathias.krause@secunet.com>
Date: Fri, 25 Oct 2013 12:14:15 +0200
Subject: padata: make the sequence counter an atomic_t

Using a spinlock to atomically increase a counter sounds wrong -- we've
atomic_t for this!

Also move 'seq_nr' to a different cache line than 'lock' to reduce cache
line trashing. This has the nice side effect of decreasing the size of
struct parallel_data from 192 to 128 bytes for a x86-64 build, e.g.
occupying only two instead of three cache lines.

Those changes results in a 5% performance increase on an IPsec test run
using pcrypt.

Btw. the seq_lock spinlock was never explicitly initialized -- one more
reason to get rid of it.

Signed-off-by: Mathias Krause <mathias.krause@secunet.com>
Acked-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/padata.h | 3 +--
 kernel/padata.c        | 9 ++++-----
 2 files changed, 5 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/padata.h b/include/linux/padata.h
index 86292beebfe2..438694650471 100644
--- a/include/linux/padata.h
+++ b/include/linux/padata.h
@@ -129,10 +129,9 @@ struct parallel_data {
 	struct padata_serial_queue	__percpu *squeue;
 	atomic_t			reorder_objects;
 	atomic_t			refcnt;
+	atomic_t			seq_nr;
 	struct padata_cpumask		cpumask;
 	spinlock_t                      lock ____cacheline_aligned;
-	spinlock_t                      seq_lock;
-	unsigned int			seq_nr;
 	unsigned int			processed;
 	struct timer_list		timer;
 };
diff --git a/kernel/padata.c b/kernel/padata.c
index 07af2c95dcfe..2abd25d79cc8 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -46,6 +46,7 @@ static int padata_index_to_cpu(struct parallel_data *pd, int cpu_index)
 
 static int padata_cpu_hash(struct parallel_data *pd)
 {
+	unsigned int seq_nr;
 	int cpu_index;
 
 	/*
@@ -53,10 +54,8 @@ static int padata_cpu_hash(struct parallel_data *pd)
 	 * seq_nr mod. number of cpus in use.
 	 */
 
-	spin_lock(&pd->seq_lock);
-	cpu_index =  pd->seq_nr % cpumask_weight(pd->cpumask.pcpu);
-	pd->seq_nr++;
-	spin_unlock(&pd->seq_lock);
+	seq_nr = atomic_inc_return(&pd->seq_nr);
+	cpu_index = seq_nr % cpumask_weight(pd->cpumask.pcpu);
 
 	return padata_index_to_cpu(pd, cpu_index);
 }
@@ -429,7 +428,7 @@ static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst,
 	padata_init_pqueues(pd);
 	padata_init_squeues(pd);
 	setup_timer(&pd->timer, padata_reorder_timer, (unsigned long)pd);
-	pd->seq_nr = 0;
+	atomic_set(&pd->seq_nr, -1);
 	atomic_set(&pd->reorder_objects, 0);
 	atomic_set(&pd->refcnt, 0);
 	pd->pinst = pinst;
-- 
cgit v1.2.3


From db60d8da8f643586c95f8fc3e383954f8c57f1f3 Mon Sep 17 00:00:00 2001
From: Vinod Koul <vinod.koul@intel.com>
Date: Wed, 30 Oct 2013 18:22:30 +0530
Subject: dmanengine: fix edma driver to not define DMA_COMPLETE

edma header defines DMA_COMPLETE, this causes issues as commit adfedd9a32e4 move
DMA_SUCCESS to DMA_COMPLETE. edma should properly namespace its defines and
needs a future fix

Reported-by: Olof Johansson <olof@lixom.net>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 arch/arm/common/edma.c             | 4 ++--
 drivers/dma/edma.c                 | 4 ++--
 include/linux/platform_data/edma.h | 8 ++++----
 3 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/common/edma.c b/arch/arm/common/edma.c
index 8e1a0245907f..41bca32409fc 100644
--- a/arch/arm/common/edma.c
+++ b/arch/arm/common/edma.c
@@ -404,7 +404,7 @@ static irqreturn_t dma_irq_handler(int irq, void *data)
 					BIT(slot));
 			if (edma_cc[ctlr]->intr_data[channel].callback)
 				edma_cc[ctlr]->intr_data[channel].callback(
-					channel, DMA_COMPLETE,
+					channel, EDMA_DMA_COMPLETE,
 					edma_cc[ctlr]->intr_data[channel].data);
 		}
 	} while (sh_ipr);
@@ -459,7 +459,7 @@ static irqreturn_t dma_ccerr_handler(int irq, void *data)
 								callback) {
 						edma_cc[ctlr]->intr_data[k].
 						callback(k,
-						DMA_CC_ERROR,
+						EDMA_DMA_CC_ERROR,
 						edma_cc[ctlr]->intr_data
 						[k].data);
 					}
diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c
index 29fa35807f09..cb69f3a674e0 100644
--- a/drivers/dma/edma.c
+++ b/drivers/dma/edma.c
@@ -407,7 +407,7 @@ static void edma_callback(unsigned ch_num, u16 ch_status, void *data)
 	edma_pause(echan->ch_num);
 
 	switch (ch_status) {
-	case DMA_COMPLETE:
+	case EDMA_DMA_COMPLETE:
 		spin_lock_irqsave(&echan->vchan.lock, flags);
 
 		edesc = echan->edesc;
@@ -426,7 +426,7 @@ static void edma_callback(unsigned ch_num, u16 ch_status, void *data)
 		spin_unlock_irqrestore(&echan->vchan.lock, flags);
 
 		break;
-	case DMA_CC_ERROR:
+	case EDMA_DMA_CC_ERROR:
 		spin_lock_irqsave(&echan->vchan.lock, flags);
 
 		edma_read_slot(EDMA_CHAN_SLOT(echan->slot[0]), &p);
diff --git a/include/linux/platform_data/edma.h b/include/linux/platform_data/edma.h
index 179fb91bb5f2..f50821cb64be 100644
--- a/include/linux/platform_data/edma.h
+++ b/include/linux/platform_data/edma.h
@@ -67,10 +67,10 @@ struct edmacc_param {
 #define ITCCHEN		BIT(23)
 
 /*ch_status paramater of callback function possible values*/
-#define DMA_COMPLETE 1
-#define DMA_CC_ERROR 2
-#define DMA_TC1_ERROR 3
-#define DMA_TC2_ERROR 4
+#define EDMA_DMA_COMPLETE 1
+#define EDMA_DMA_CC_ERROR 2
+#define EDMA_DMA_TC1_ERROR 3
+#define EDMA_DMA_TC2_ERROR 4
 
 enum address_mode {
 	INCR = 0,
-- 
cgit v1.2.3


From ec53500fae421e07c5d035918ca454a429732ef4 Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Wed, 30 Oct 2013 11:02:17 -0600
Subject: kvm: Add VFIO device

So far we've succeeded at making KVM and VFIO mostly unaware of each
other, but areas are cropping up where a connection beyond eventfds
and irqfds needs to be made.  This patch introduces a KVM-VFIO device
that is meant to be a gateway for such interaction.  The user creates
the device and can add and remove VFIO groups to it via file
descriptors.  When a group is added, KVM verifies the group is valid
and gets a reference to it via the VFIO external user interface.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 Documentation/virtual/kvm/devices/vfio.txt |  22 +++
 arch/x86/kvm/Kconfig                       |   1 +
 arch/x86/kvm/Makefile                      |   2 +-
 include/linux/kvm_host.h                   |   1 +
 include/uapi/linux/kvm.h                   |   4 +
 virt/kvm/Kconfig                           |   3 +
 virt/kvm/kvm_main.c                        |   5 +
 virt/kvm/vfio.c                            | 220 +++++++++++++++++++++++++++++
 8 files changed, 257 insertions(+), 1 deletion(-)
 create mode 100644 Documentation/virtual/kvm/devices/vfio.txt
 create mode 100644 virt/kvm/vfio.c

(limited to 'include/linux')

diff --git a/Documentation/virtual/kvm/devices/vfio.txt b/Documentation/virtual/kvm/devices/vfio.txt
new file mode 100644
index 000000000000..ef51740c67ca
--- /dev/null
+++ b/Documentation/virtual/kvm/devices/vfio.txt
@@ -0,0 +1,22 @@
+VFIO virtual device
+===================
+
+Device types supported:
+  KVM_DEV_TYPE_VFIO
+
+Only one VFIO instance may be created per VM.  The created device
+tracks VFIO groups in use by the VM and features of those groups
+important to the correctness and acceleration of the VM.  As groups
+are enabled and disabled for use by the VM, KVM should be updated
+about their presence.  When registered with KVM, a reference to the
+VFIO-group is held by KVM.
+
+Groups:
+  KVM_DEV_VFIO_GROUP
+
+KVM_DEV_VFIO_GROUP attributes:
+  KVM_DEV_VFIO_GROUP_ADD: Add a VFIO group to VFIO-KVM device tracking
+  KVM_DEV_VFIO_GROUP_DEL: Remove a VFIO group from VFIO-KVM device tracking
+
+For each, kvm_device_attr.addr points to an int32_t file descriptor
+for the VFIO group.
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index a47a3e54b964..b89c5db2b832 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -38,6 +38,7 @@ config KVM
 	select PERF_EVENTS
 	select HAVE_KVM_MSI
 	select HAVE_KVM_CPU_RELAX_INTERCEPT
+	select KVM_VFIO
 	---help---
 	  Support hosting fully virtualized guest machines using hardware
 	  virtualization extensions.  You will need a fairly recent
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index bf4fb04d0112..25d22b2d6509 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -9,7 +9,7 @@ KVM := ../../../virt/kvm
 
 kvm-y			+= $(KVM)/kvm_main.o $(KVM)/ioapic.o \
 				$(KVM)/coalesced_mmio.o $(KVM)/irq_comm.o \
-				$(KVM)/eventfd.o $(KVM)/irqchip.o
+				$(KVM)/eventfd.o $(KVM)/irqchip.o $(KVM)/vfio.o
 kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT)	+= $(KVM)/assigned-dev.o $(KVM)/iommu.o
 kvm-$(CONFIG_KVM_ASYNC_PF)	+= $(KVM)/async_pf.o
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index c9d4236ab442..7beddbd38ac7 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1058,6 +1058,7 @@ struct kvm_device *kvm_device_from_filp(struct file *filp);
 
 extern struct kvm_device_ops kvm_mpic_ops;
 extern struct kvm_device_ops kvm_xics_ops;
+extern struct kvm_device_ops kvm_vfio_ops;
 
 #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
 
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 32c60b98ddf8..509cfbfe9658 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -845,6 +845,10 @@ struct kvm_device_attr {
 #define KVM_DEV_TYPE_FSL_MPIC_20	1
 #define KVM_DEV_TYPE_FSL_MPIC_42	2
 #define KVM_DEV_TYPE_XICS		3
+#define KVM_DEV_TYPE_VFIO		4
+#define  KVM_DEV_VFIO_GROUP			1
+#define   KVM_DEV_VFIO_GROUP_ADD			1
+#define   KVM_DEV_VFIO_GROUP_DEL			2
 
 /*
  * ioctls for VM fds
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index 779262f59e25..fbe1a48bd629 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -27,3 +27,6 @@ config HAVE_KVM_MSI
 
 config HAVE_KVM_CPU_RELAX_INTERCEPT
        bool
+
+config KVM_VFIO
+       bool
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 9ca014da134d..82c4047aa0e3 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2270,6 +2270,11 @@ static int kvm_ioctl_create_device(struct kvm *kvm,
 	case KVM_DEV_TYPE_XICS:
 		ops = &kvm_xics_ops;
 		break;
+#endif
+#ifdef CONFIG_KVM_VFIO
+	case KVM_DEV_TYPE_VFIO:
+		ops = &kvm_vfio_ops;
+		break;
 #endif
 	default:
 		return -ENODEV;
diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
new file mode 100644
index 000000000000..597c258245ea
--- /dev/null
+++ b/virt/kvm/vfio.c
@@ -0,0 +1,220 @@
+/*
+ * VFIO-KVM bridge pseudo device
+ *
+ * Copyright (C) 2013 Red Hat, Inc.  All rights reserved.
+ *     Author: Alex Williamson <alex.williamson@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/errno.h>
+#include <linux/file.h>
+#include <linux/kvm_host.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/vfio.h>
+
+struct kvm_vfio_group {
+	struct list_head node;
+	struct vfio_group *vfio_group;
+};
+
+struct kvm_vfio {
+	struct list_head group_list;
+	struct mutex lock;
+};
+
+static struct vfio_group *kvm_vfio_group_get_external_user(struct file *filep)
+{
+	struct vfio_group *vfio_group;
+	struct vfio_group *(*fn)(struct file *);
+
+	fn = symbol_get(vfio_group_get_external_user);
+	if (!fn)
+		return ERR_PTR(-EINVAL);
+
+	vfio_group = fn(filep);
+
+	symbol_put(vfio_group_get_external_user);
+
+	return vfio_group;
+}
+
+static void kvm_vfio_group_put_external_user(struct vfio_group *vfio_group)
+{
+	void (*fn)(struct vfio_group *);
+
+	fn = symbol_get(vfio_group_put_external_user);
+	if (!fn)
+		return;
+
+	fn(vfio_group);
+
+	symbol_put(vfio_group_put_external_user);
+}
+
+static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg)
+{
+	struct kvm_vfio *kv = dev->private;
+	struct vfio_group *vfio_group;
+	struct kvm_vfio_group *kvg;
+	void __user *argp = (void __user *)arg;
+	struct fd f;
+	int32_t fd;
+	int ret;
+
+	switch (attr) {
+	case KVM_DEV_VFIO_GROUP_ADD:
+		if (get_user(fd, (int32_t __user *)argp))
+			return -EFAULT;
+
+		f = fdget(fd);
+		if (!f.file)
+			return -EBADF;
+
+		vfio_group = kvm_vfio_group_get_external_user(f.file);
+		fdput(f);
+
+		if (IS_ERR(vfio_group))
+			return PTR_ERR(vfio_group);
+
+		mutex_lock(&kv->lock);
+
+		list_for_each_entry(kvg, &kv->group_list, node) {
+			if (kvg->vfio_group == vfio_group) {
+				mutex_unlock(&kv->lock);
+				kvm_vfio_group_put_external_user(vfio_group);
+				return -EEXIST;
+			}
+		}
+
+		kvg = kzalloc(sizeof(*kvg), GFP_KERNEL);
+		if (!kvg) {
+			mutex_unlock(&kv->lock);
+			kvm_vfio_group_put_external_user(vfio_group);
+			return -ENOMEM;
+		}
+
+		list_add_tail(&kvg->node, &kv->group_list);
+		kvg->vfio_group = vfio_group;
+
+		mutex_unlock(&kv->lock);
+
+		return 0;
+
+	case KVM_DEV_VFIO_GROUP_DEL:
+		if (get_user(fd, (int32_t __user *)argp))
+			return -EFAULT;
+
+		f = fdget(fd);
+		if (!f.file)
+			return -EBADF;
+
+		vfio_group = kvm_vfio_group_get_external_user(f.file);
+		fdput(f);
+
+		if (IS_ERR(vfio_group))
+			return PTR_ERR(vfio_group);
+
+		ret = -ENOENT;
+
+		mutex_lock(&kv->lock);
+
+		list_for_each_entry(kvg, &kv->group_list, node) {
+			if (kvg->vfio_group != vfio_group)
+				continue;
+
+			list_del(&kvg->node);
+			kvm_vfio_group_put_external_user(kvg->vfio_group);
+			kfree(kvg);
+			ret = 0;
+			break;
+		}
+
+		mutex_unlock(&kv->lock);
+
+		kvm_vfio_group_put_external_user(vfio_group);
+
+		return ret;
+	}
+
+	return -ENXIO;
+}
+
+static int kvm_vfio_set_attr(struct kvm_device *dev,
+			     struct kvm_device_attr *attr)
+{
+	switch (attr->group) {
+	case KVM_DEV_VFIO_GROUP:
+		return kvm_vfio_set_group(dev, attr->attr, attr->addr);
+	}
+
+	return -ENXIO;
+}
+
+static int kvm_vfio_has_attr(struct kvm_device *dev,
+			     struct kvm_device_attr *attr)
+{
+	switch (attr->group) {
+	case KVM_DEV_VFIO_GROUP:
+		switch (attr->attr) {
+		case KVM_DEV_VFIO_GROUP_ADD:
+		case KVM_DEV_VFIO_GROUP_DEL:
+			return 0;
+		}
+
+		break;
+	}
+
+	return -ENXIO;
+}
+
+static void kvm_vfio_destroy(struct kvm_device *dev)
+{
+	struct kvm_vfio *kv = dev->private;
+	struct kvm_vfio_group *kvg, *tmp;
+
+	list_for_each_entry_safe(kvg, tmp, &kv->group_list, node) {
+		kvm_vfio_group_put_external_user(kvg->vfio_group);
+		list_del(&kvg->node);
+		kfree(kvg);
+	}
+
+	kfree(kv);
+	kfree(dev); /* alloc by kvm_ioctl_create_device, free by .destroy */
+}
+
+static int kvm_vfio_create(struct kvm_device *dev, u32 type)
+{
+	struct kvm_device *tmp;
+	struct kvm_vfio *kv;
+
+	/* Only one VFIO "device" per VM */
+	list_for_each_entry(tmp, &dev->kvm->devices, vm_node)
+		if (tmp->ops == &kvm_vfio_ops)
+			return -EBUSY;
+
+	kv = kzalloc(sizeof(*kv), GFP_KERNEL);
+	if (!kv)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&kv->group_list);
+	mutex_init(&kv->lock);
+
+	dev->private = kv;
+
+	return 0;
+}
+
+struct kvm_device_ops kvm_vfio_ops = {
+	.name = "kvm-vfio",
+	.create = kvm_vfio_create,
+	.destroy = kvm_vfio_destroy,
+	.set_attr = kvm_vfio_set_attr,
+	.has_attr = kvm_vfio_has_attr,
+};
-- 
cgit v1.2.3


From d96eb2c6f480769bff32054e78b964860dae4d56 Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Wed, 30 Oct 2013 11:02:23 -0600
Subject: kvm/x86: Convert iommu_flags to iommu_noncoherent

Default to operating in coherent mode.  This simplifies the logic when
we switch to a model of registering and unregistering noncoherent I/O
with KVM.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/ia64/include/asm/kvm_host.h |  2 +-
 arch/x86/include/asm/kvm_host.h  |  2 +-
 arch/x86/kvm/vmx.c               |  2 +-
 arch/x86/kvm/x86.c               |  2 +-
 include/linux/kvm_host.h         |  3 ---
 virt/kvm/iommu.c                 | 16 ++++++++--------
 6 files changed, 12 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h
index 95a3ff93777c..db95f570705f 100644
--- a/arch/ia64/include/asm/kvm_host.h
+++ b/arch/ia64/include/asm/kvm_host.h
@@ -476,7 +476,7 @@ struct kvm_arch {
 
 	struct list_head assigned_dev_head;
 	struct iommu_domain *iommu_domain;
-	int iommu_flags;
+	bool iommu_noncoherent;
 
 	unsigned long irq_sources_bitmap;
 	unsigned long irq_states[KVM_IOAPIC_NUM_PINS];
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 5cbf3166257c..91b35e4005d3 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -564,7 +564,7 @@ struct kvm_arch {
 
 	struct list_head assigned_dev_head;
 	struct iommu_domain *iommu_domain;
-	int iommu_flags;
+	bool iommu_noncoherent;
 	struct kvm_pic *vpic;
 	struct kvm_ioapic *vioapic;
 	struct kvm_pit *vpit;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 6850b0f1d52b..727a5e980c43 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -7446,7 +7446,7 @@ static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
 	if (is_mmio)
 		ret = MTRR_TYPE_UNCACHABLE << VMX_EPT_MT_EPTE_SHIFT;
 	else if (vcpu->kvm->arch.iommu_domain &&
-		!(vcpu->kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY))
+		 vcpu->kvm->arch.iommu_noncoherent)
 		ret = kvm_get_guest_memory_type(vcpu, gfn) <<
 		      VMX_EPT_MT_EPTE_SHIFT;
 	else
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 15f9540a2b1f..92ad83e5b132 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2719,7 +2719,7 @@ static void wbinvd_ipi(void *garbage)
 static bool need_emulate_wbinvd(struct kvm_vcpu *vcpu)
 {
 	return vcpu->kvm->arch.iommu_domain &&
-		!(vcpu->kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY);
+	       vcpu->kvm->arch.iommu_noncoherent;
 }
 
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 7beddbd38ac7..ed64880e4915 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -746,9 +746,6 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
 int kvm_request_irq_source_id(struct kvm *kvm);
 void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id);
 
-/* For vcpu->arch.iommu_flags */
-#define KVM_IOMMU_CACHE_COHERENCY	0x1
-
 #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
 int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot);
 void kvm_iommu_unmap_pages(struct kvm *kvm, struct kvm_memory_slot *slot);
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c
index a3b14109049b..d32d156a423a 100644
--- a/virt/kvm/iommu.c
+++ b/virt/kvm/iommu.c
@@ -79,7 +79,7 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
 	flags = IOMMU_READ;
 	if (!(slot->flags & KVM_MEM_READONLY))
 		flags |= IOMMU_WRITE;
-	if (kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY)
+	if (!kvm->arch.iommu_noncoherent)
 		flags |= IOMMU_CACHE;
 
 
@@ -158,7 +158,8 @@ int kvm_assign_device(struct kvm *kvm,
 {
 	struct pci_dev *pdev = NULL;
 	struct iommu_domain *domain = kvm->arch.iommu_domain;
-	int r, last_flags;
+	int r;
+	bool noncoherent;
 
 	/* check if iommu exists and in use */
 	if (!domain)
@@ -174,15 +175,13 @@ int kvm_assign_device(struct kvm *kvm,
 		return r;
 	}
 
-	last_flags = kvm->arch.iommu_flags;
-	if (iommu_domain_has_cap(kvm->arch.iommu_domain,
-				 IOMMU_CAP_CACHE_COHERENCY))
-		kvm->arch.iommu_flags |= KVM_IOMMU_CACHE_COHERENCY;
+	noncoherent = !iommu_domain_has_cap(kvm->arch.iommu_domain,
+					    IOMMU_CAP_CACHE_COHERENCY);
 
 	/* Check if need to update IOMMU page table for guest memory */
-	if ((last_flags ^ kvm->arch.iommu_flags) ==
-			KVM_IOMMU_CACHE_COHERENCY) {
+	if (noncoherent != kvm->arch.iommu_noncoherent) {
 		kvm_iommu_unmap_memslots(kvm);
+		kvm->arch.iommu_noncoherent = noncoherent;
 		r = kvm_iommu_map_memslots(kvm);
 		if (r)
 			goto out_unmap;
@@ -342,6 +341,7 @@ int kvm_iommu_unmap_guest(struct kvm *kvm)
 	mutex_lock(&kvm->slots_lock);
 	kvm_iommu_unmap_memslots(kvm);
 	kvm->arch.iommu_domain = NULL;
+	kvm->arch.iommu_noncoherent = false;
 	mutex_unlock(&kvm->slots_lock);
 
 	iommu_domain_free(domain);
-- 
cgit v1.2.3


From e0f0bbc527f6e9c0261f1d16b2a0b47612b7f235 Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Wed, 30 Oct 2013 11:02:30 -0600
Subject: kvm: Create non-coherent DMA registeration

We currently use some ad-hoc arch variables tied to legacy KVM device
assignment to manage emulation of instructions that depend on whether
non-coherent DMA is present.  Create an interface for this, adapting
legacy KVM device assignment and adding VFIO via the KVM-VFIO device.
For now we assume that non-coherent DMA is possible any time we have a
VFIO group.  Eventually an interface can be developed as part of the
VFIO external user interface to query the coherency of a group.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  2 ++
 arch/x86/kvm/vmx.c              |  3 +--
 arch/x86/kvm/x86.c              | 22 +++++++++++++++++++--
 include/linux/kvm_host.h        | 19 ++++++++++++++++++
 virt/kvm/iommu.c                |  6 ++++++
 virt/kvm/vfio.c                 | 44 +++++++++++++++++++++++++++++++++++++++++
 6 files changed, 92 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 91b35e4005d3..de388c55e7ec 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -565,6 +565,8 @@ struct kvm_arch {
 	struct list_head assigned_dev_head;
 	struct iommu_domain *iommu_domain;
 	bool iommu_noncoherent;
+#define __KVM_HAVE_ARCH_NONCOHERENT_DMA
+	atomic_t noncoherent_dma_count;
 	struct kvm_pic *vpic;
 	struct kvm_ioapic *vioapic;
 	struct kvm_pit *vpit;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 727a5e980c43..fabf7421ec18 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -7445,8 +7445,7 @@ static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
 	 */
 	if (is_mmio)
 		ret = MTRR_TYPE_UNCACHABLE << VMX_EPT_MT_EPTE_SHIFT;
-	else if (vcpu->kvm->arch.iommu_domain &&
-		 vcpu->kvm->arch.iommu_noncoherent)
+	else if (kvm_arch_has_noncoherent_dma(vcpu->kvm))
 		ret = kvm_get_guest_memory_type(vcpu, gfn) <<
 		      VMX_EPT_MT_EPTE_SHIFT;
 	else
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 92ad83e5b132..ec35d09937da 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2718,8 +2718,7 @@ static void wbinvd_ipi(void *garbage)
 
 static bool need_emulate_wbinvd(struct kvm_vcpu *vcpu)
 {
-	return vcpu->kvm->arch.iommu_domain &&
-	       vcpu->kvm->arch.iommu_noncoherent;
+	return kvm_arch_has_noncoherent_dma(vcpu->kvm);
 }
 
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
@@ -6998,6 +6997,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 	INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
 	INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages);
 	INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
+	atomic_set(&kvm->arch.noncoherent_dma_count, 0);
 
 	/* Reserve bit 0 of irq_sources_bitmap for userspace irq source */
 	set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
@@ -7437,6 +7437,24 @@ bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
 			kvm_x86_ops->interrupt_allowed(vcpu);
 }
 
+void kvm_arch_register_noncoherent_dma(struct kvm *kvm)
+{
+	atomic_inc(&kvm->arch.noncoherent_dma_count);
+}
+EXPORT_SYMBOL_GPL(kvm_arch_register_noncoherent_dma);
+
+void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm)
+{
+	atomic_dec(&kvm->arch.noncoherent_dma_count);
+}
+EXPORT_SYMBOL_GPL(kvm_arch_unregister_noncoherent_dma);
+
+bool kvm_arch_has_noncoherent_dma(struct kvm *kvm)
+{
+	return atomic_read(&kvm->arch.noncoherent_dma_count);
+}
+EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma);
+
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ed64880e4915..92aae88756db 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -670,6 +670,25 @@ static inline void kvm_arch_free_vm(struct kvm *kvm)
 }
 #endif
 
+#ifdef __KVM_HAVE_ARCH_NONCOHERENT_DMA
+void kvm_arch_register_noncoherent_dma(struct kvm *kvm);
+void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm);
+bool kvm_arch_has_noncoherent_dma(struct kvm *kvm);
+#else
+static inline void kvm_arch_register_noncoherent_dma(struct kvm *kvm)
+{
+}
+
+static inline void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm)
+{
+}
+
+static inline bool kvm_arch_has_noncoherent_dma(struct kvm *kvm)
+{
+	return false;
+}
+#endif
+
 static inline wait_queue_head_t *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu)
 {
 #ifdef __KVM_HAVE_ARCH_WQP
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c
index d32d156a423a..c7d9ce122529 100644
--- a/virt/kvm/iommu.c
+++ b/virt/kvm/iommu.c
@@ -140,6 +140,9 @@ static int kvm_iommu_map_memslots(struct kvm *kvm)
 	struct kvm_memslots *slots;
 	struct kvm_memory_slot *memslot;
 
+	if (kvm->arch.iommu_noncoherent)
+		kvm_arch_register_noncoherent_dma(kvm);
+
 	idx = srcu_read_lock(&kvm->srcu);
 	slots = kvm_memslots(kvm);
 
@@ -327,6 +330,9 @@ static int kvm_iommu_unmap_memslots(struct kvm *kvm)
 
 	srcu_read_unlock(&kvm->srcu, idx);
 
+	if (kvm->arch.iommu_noncoherent)
+		kvm_arch_unregister_noncoherent_dma(kvm);
+
 	return 0;
 }
 
diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
index 597c258245ea..ca4260e35037 100644
--- a/virt/kvm/vfio.c
+++ b/virt/kvm/vfio.c
@@ -27,6 +27,7 @@ struct kvm_vfio_group {
 struct kvm_vfio {
 	struct list_head group_list;
 	struct mutex lock;
+	bool noncoherent;
 };
 
 static struct vfio_group *kvm_vfio_group_get_external_user(struct file *filep)
@@ -58,6 +59,43 @@ static void kvm_vfio_group_put_external_user(struct vfio_group *vfio_group)
 	symbol_put(vfio_group_put_external_user);
 }
 
+/*
+ * Groups can use the same or different IOMMU domains.  If the same then
+ * adding a new group may change the coherency of groups we've previously
+ * been told about.  We don't want to care about any of that so we retest
+ * each group and bail as soon as we find one that's noncoherent.  This
+ * means we only ever [un]register_noncoherent_dma once for the whole device.
+ */
+static void kvm_vfio_update_coherency(struct kvm_device *dev)
+{
+	struct kvm_vfio *kv = dev->private;
+	bool noncoherent = false;
+	struct kvm_vfio_group *kvg;
+
+	mutex_lock(&kv->lock);
+
+	list_for_each_entry(kvg, &kv->group_list, node) {
+		/*
+		 * TODO: We need an interface to check the coherency of
+		 * the IOMMU domain this group is using.  For now, assume
+		 * it's always noncoherent.
+		 */
+		noncoherent = true;
+		break;
+	}
+
+	if (noncoherent != kv->noncoherent) {
+		kv->noncoherent = noncoherent;
+
+		if (kv->noncoherent)
+			kvm_arch_register_noncoherent_dma(dev->kvm);
+		else
+			kvm_arch_unregister_noncoherent_dma(dev->kvm);
+	}
+
+	mutex_unlock(&kv->lock);
+}
+
 static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg)
 {
 	struct kvm_vfio *kv = dev->private;
@@ -105,6 +143,8 @@ static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg)
 
 		mutex_unlock(&kv->lock);
 
+		kvm_vfio_update_coherency(dev);
+
 		return 0;
 
 	case KVM_DEV_VFIO_GROUP_DEL:
@@ -140,6 +180,8 @@ static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg)
 
 		kvm_vfio_group_put_external_user(vfio_group);
 
+		kvm_vfio_update_coherency(dev);
+
 		return ret;
 	}
 
@@ -185,6 +227,8 @@ static void kvm_vfio_destroy(struct kvm_device *dev)
 		kfree(kvg);
 	}
 
+	kvm_vfio_update_coherency(dev);
+
 	kfree(kv);
 	kfree(dev); /* alloc by kvm_ioctl_create_device, free by .destroy */
 }
-- 
cgit v1.2.3


From bd09d9a35111b6ffc0c7585d3853d0ec7f9f1eb4 Mon Sep 17 00:00:00 2001
From: Greg Thelen <gthelen@google.com>
Date: Wed, 30 Oct 2013 13:56:20 -0700
Subject: percpu: fix this_cpu_sub() subtrahend casting for unsigneds

this_cpu_sub() is implemented as negation and addition.

This patch casts the adjustment to the counter type before negation to
sign extend the adjustment.  This helps in cases where the counter type
is wider than an unsigned adjustment.  An alternative to this patch is
to declare such operations unsupported, but it seemed useful to avoid
surprises.

This patch specifically helps the following example:
  unsigned int delta = 1
  preempt_disable()
  this_cpu_write(long_counter, 0)
  this_cpu_sub(long_counter, delta)
  preempt_enable()

Before this change long_counter on a 64 bit machine ends with value
0xffffffff, rather than 0xffffffffffffffff.  This is because
this_cpu_sub(pcp, delta) boils down to this_cpu_add(pcp, -delta),
which is basically:
  long_counter = 0 + 0xffffffff

Also apply the same cast to:
  __this_cpu_sub()
  __this_cpu_sub_return()
  this_cpu_sub_return()

All percpu_test.ko passes, especially the following cases which
previously failed:

  l -= ui_one;
  __this_cpu_sub(long_counter, ui_one);
  CHECK(l, long_counter, -1);

  l -= ui_one;
  this_cpu_sub(long_counter, ui_one);
  CHECK(l, long_counter, -1);
  CHECK(l, long_counter, 0xffffffffffffffff);

  ul -= ui_one;
  __this_cpu_sub(ulong_counter, ui_one);
  CHECK(ul, ulong_counter, -1);
  CHECK(ul, ulong_counter, 0xffffffffffffffff);

  ul = this_cpu_sub_return(ulong_counter, ui_one);
  CHECK(ul, ulong_counter, 2);

  ul = __this_cpu_sub_return(ulong_counter, ui_one);
  CHECK(ul, ulong_counter, 1);

Signed-off-by: Greg Thelen <gthelen@google.com>
Acked-by: Tejun Heo <tj@kernel.org>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/include/asm/percpu.h | 3 ++-
 include/linux/percpu.h        | 8 ++++----
 2 files changed, 6 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 0da5200ee79d..b3e18f800302 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -128,7 +128,8 @@ do {							\
 do {									\
 	typedef typeof(var) pao_T__;					\
 	const int pao_ID__ = (__builtin_constant_p(val) &&		\
-			      ((val) == 1 || (val) == -1)) ? (val) : 0;	\
+			      ((val) == 1 || (val) == -1)) ?		\
+				(int)(val) : 0;				\
 	if (0) {							\
 		pao_T__ pao_tmp__;					\
 		pao_tmp__ = (val);					\
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index cc88172c7d9a..c74088ab103b 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -332,7 +332,7 @@ do {									\
 #endif
 
 #ifndef this_cpu_sub
-# define this_cpu_sub(pcp, val)		this_cpu_add((pcp), -(val))
+# define this_cpu_sub(pcp, val)		this_cpu_add((pcp), -(typeof(pcp))(val))
 #endif
 
 #ifndef this_cpu_inc
@@ -418,7 +418,7 @@ do {									\
 # define this_cpu_add_return(pcp, val)	__pcpu_size_call_return2(this_cpu_add_return_, pcp, val)
 #endif
 
-#define this_cpu_sub_return(pcp, val)	this_cpu_add_return(pcp, -(val))
+#define this_cpu_sub_return(pcp, val)	this_cpu_add_return(pcp, -(typeof(pcp))(val))
 #define this_cpu_inc_return(pcp)	this_cpu_add_return(pcp, 1)
 #define this_cpu_dec_return(pcp)	this_cpu_add_return(pcp, -1)
 
@@ -586,7 +586,7 @@ do {									\
 #endif
 
 #ifndef __this_cpu_sub
-# define __this_cpu_sub(pcp, val)	__this_cpu_add((pcp), -(val))
+# define __this_cpu_sub(pcp, val)	__this_cpu_add((pcp), -(typeof(pcp))(val))
 #endif
 
 #ifndef __this_cpu_inc
@@ -668,7 +668,7 @@ do {									\
 	__pcpu_size_call_return2(__this_cpu_add_return_, pcp, val)
 #endif
 
-#define __this_cpu_sub_return(pcp, val)	__this_cpu_add_return(pcp, -(val))
+#define __this_cpu_sub_return(pcp, val)	__this_cpu_add_return(pcp, -(typeof(pcp))(val))
 #define __this_cpu_inc_return(pcp)	__this_cpu_add_return(pcp, 1)
 #define __this_cpu_dec_return(pcp)	__this_cpu_add_return(pcp, -1)
 
-- 
cgit v1.2.3


From 8217d146ab98a1790349d79c436176658e311e3c Mon Sep 17 00:00:00 2001
From: Anna Schumaker <bjschuma@netapp.com>
Date: Wed, 30 Oct 2013 13:38:13 -0400
Subject: NFSD: Add support for NFS v4.2 operation checking

The server does allow NFS over v4.2, even if it doesn't add any new
operations yet.

I also switch to using constants to represent the last operation for
each minor version since this makes the code cleaner and easier to
understand at a quick glance.

Signed-off-by: Anna Schumaker <bjschuma@netapp.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4xdr.c    | 8 +++++---
 include/linux/nfs4.h | 3 +++
 2 files changed, 8 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 99bebea20668..83db5a742aa9 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1585,11 +1585,13 @@ static nfsd4_dec nfsd4_dec_ops[] = {
 static inline bool
 nfsd4_opnum_in_range(struct nfsd4_compoundargs *argp, struct nfsd4_op *op)
 {
-	if (op->opnum < FIRST_NFS4_OP || op->opnum > LAST_NFS4_OP)
+	if (op->opnum < FIRST_NFS4_OP)
 		return false;
-	else if (argp->minorversion == 0 && op->opnum > OP_RELEASE_LOCKOWNER)
+	else if (argp->minorversion == 0 && op->opnum > LAST_NFS40_OP)
 		return false;
-	else if (argp->minorversion == 1 && op->opnum > OP_RECLAIM_COMPLETE)
+	else if (argp->minorversion == 1 && op->opnum > LAST_NFS41_OP)
+		return false;
+	else if (argp->minorversion == 2 && op->opnum > LAST_NFS42_OP)
 		return false;
 	return true;
 }
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index e36dee52f224..737e40e4c554 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -118,6 +118,9 @@ Needs to be updated if more operations are defined in future.*/
 
 #define FIRST_NFS4_OP	OP_ACCESS
 #define LAST_NFS4_OP 	OP_RECLAIM_COMPLETE
+#define LAST_NFS40_OP	OP_RELEASE_LOCKOWNER
+#define LAST_NFS41_OP	OP_RECLAIM_COMPLETE
+#define LAST_NFS42_OP	OP_RECLAIM_COMPLETE
 
 enum nfsstat4 {
 	NFS4_OK = 0,
-- 
cgit v1.2.3


From 7dbf694db6ac7c759599316d50d7050efcbd512a Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Tue, 29 Oct 2013 18:56:06 +0530
Subject: cpufreq: distinguish drivers that do asynchronous notifications

There are few special cases like exynos5440 which doesn't send POSTCHANGE
notification from their ->target() routine and call some kind of bottom halves
for doing this work, work/tasklet/etc.. From which they finally send POSTCHANGE
notification.

Its better if we distinguish them from other cpufreq drivers in some way so that
core can handle them specially. So this patch introduces another flag:
CPUFREQ_ASYNC_NOTIFICATION, which will be set by such drivers.

This also changes exynos5440-cpufreq.c and powernow-k8 in order to set this
flag.

Acked-by: Amit Daniel Kachhap <amit.daniel@samsung.com>
Acked-by: Kukjin Kim <kgene.kim@samsung.com>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/exynos5440-cpufreq.c | 2 +-
 drivers/cpufreq/powernow-k8.c        | 1 +
 include/linux/cpufreq.h              | 7 +++++++
 3 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/cpufreq/exynos5440-cpufreq.c b/drivers/cpufreq/exynos5440-cpufreq.c
index 1bf9b060d522..76bef8b078cb 100644
--- a/drivers/cpufreq/exynos5440-cpufreq.c
+++ b/drivers/cpufreq/exynos5440-cpufreq.c
@@ -312,7 +312,7 @@ static int exynos_cpufreq_cpu_init(struct cpufreq_policy *policy)
 }
 
 static struct cpufreq_driver exynos_driver = {
-	.flags		= CPUFREQ_STICKY,
+	.flags		= CPUFREQ_STICKY | CPUFREQ_ASYNC_NOTIFICATION,
 	.verify		= cpufreq_generic_frequency_table_verify,
 	.target_index	= exynos_target,
 	.get		= exynos_getspeed,
diff --git a/drivers/cpufreq/powernow-k8.c b/drivers/cpufreq/powernow-k8.c
index 62a1ce47d3df..0023c7d40a51 100644
--- a/drivers/cpufreq/powernow-k8.c
+++ b/drivers/cpufreq/powernow-k8.c
@@ -1204,6 +1204,7 @@ out:
 }
 
 static struct cpufreq_driver cpufreq_amd64_driver = {
+	.flags		= CPUFREQ_ASYNC_NOTIFICATION,
 	.verify		= cpufreq_generic_frequency_table_verify,
 	.target_index	= powernowk8_target,
 	.bios_limit	= acpi_processor_get_bios_limit,
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 93a8c34d6c7f..5bd6ab9b0c27 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -237,6 +237,13 @@ struct cpufreq_driver {
  */
 #define CPUFREQ_HAVE_GOVERNOR_PER_POLICY (1 << 3)
 
+/*
+ * Driver will do POSTCHANGE notifications from outside of their ->target()
+ * routine and so must set cpufreq_driver->flags with this flag, so that core
+ * can handle them specially.
+ */
+#define CPUFREQ_ASYNC_NOTIFICATION  (1 << 4)
+
 int cpufreq_register_driver(struct cpufreq_driver *driver_data);
 int cpufreq_unregister_driver(struct cpufreq_driver *driver_data);
 
-- 
cgit v1.2.3


From 878e200bbb1fbde9f21582decab95b178e5a3b83 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Mon, 9 Sep 2013 11:57:57 +0200
Subject: mmc: core: Do not poll for busy with status cmd for all switch cmds

Some switch operations like poweroff notify, shall according to the
spec not be followed by any other new commands. For these cases and
when the host does'nt support MMC_CAP_WAIT_WHILE_BUSY, we must not
send status commands to poll for busy detection. Instead wait for
the stated timeout from the EXT_CSD before completing the request.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Cc: Jaehoon Chung <jh80.chung@samsung.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/core.c    |  2 +-
 drivers/mmc/core/mmc.c     |  6 +++---
 drivers/mmc/core/mmc_ops.c | 23 ++++++++++++++++++-----
 include/linux/mmc/core.h   |  3 ++-
 4 files changed, 24 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 006ead2fb701..0292ad448b07 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -301,7 +301,7 @@ void mmc_start_bkops(struct mmc_card *card, bool from_exception)
 	}
 
 	err = __mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
-			EXT_CSD_BKOPS_START, 1, timeout, use_busy_signal);
+			EXT_CSD_BKOPS_START, 1, timeout, use_busy_signal, true);
 	if (err) {
 		pr_warn("%s: Error %d starting bkops\n",
 			mmc_hostname(card->host), err);
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index 6d02012a1d0b..8f0c51686849 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -1404,9 +1404,9 @@ static int mmc_poweroff_notify(struct mmc_card *card, unsigned int notify_type)
 	if (notify_type == EXT_CSD_POWER_OFF_LONG)
 		timeout = card->ext_csd.power_off_longtime;
 
-	err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
-			 EXT_CSD_POWER_OFF_NOTIFICATION,
-			 notify_type, timeout);
+	err = __mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
+			EXT_CSD_POWER_OFF_NOTIFICATION,
+			notify_type, timeout, true, false);
 	if (err)
 		pr_err("%s: Power Off Notification timed out, %u\n",
 		       mmc_hostname(card->host), timeout);
diff --git a/drivers/mmc/core/mmc_ops.c b/drivers/mmc/core/mmc_ops.c
index 37f7d7059ab7..aae8d8b45549 100644
--- a/drivers/mmc/core/mmc_ops.c
+++ b/drivers/mmc/core/mmc_ops.c
@@ -404,11 +404,12 @@ int mmc_spi_set_crc(struct mmc_host *host, int use_crc)
  *	@timeout_ms: timeout (ms) for operation performed by register write,
  *                   timeout of zero implies maximum possible timeout
  *	@use_busy_signal: use the busy signal as response type
+ *	@send_status: send status cmd to poll for busy
  *
  *	Modifies the EXT_CSD register for selected card.
  */
 int __mmc_switch(struct mmc_card *card, u8 set, u8 index, u8 value,
-	       unsigned int timeout_ms, bool use_busy_signal)
+		unsigned int timeout_ms, bool use_busy_signal, bool send_status)
 {
 	int err;
 	struct mmc_command cmd = {0};
@@ -454,14 +455,26 @@ int __mmc_switch(struct mmc_card *card, u8 set, u8 index, u8 value,
 
 	timeout = jiffies + msecs_to_jiffies(MMC_OPS_TIMEOUT_MS);
 	do {
-		err = __mmc_send_status(card, &status, ignore_crc);
-		if (err)
-			return err;
+		if (send_status) {
+			err = __mmc_send_status(card, &status, ignore_crc);
+			if (err)
+				return err;
+		}
 		if (card->host->caps & MMC_CAP_WAIT_WHILE_BUSY)
 			break;
 		if (mmc_host_is_spi(card->host))
 			break;
 
+		/*
+		 * We are not allowed to issue a status command and the host
+		 * does'nt support MMC_CAP_WAIT_WHILE_BUSY, then we can only
+		 * rely on waiting for the stated timeout to be sufficient.
+		 */
+		if (!send_status) {
+			mmc_delay(timeout_ms);
+			return 0;
+		}
+
 		/* Timeout if the device never leaves the program state. */
 		if (time_after(jiffies, timeout)) {
 			pr_err("%s: Card stuck in programming state! %s\n",
@@ -488,7 +501,7 @@ EXPORT_SYMBOL_GPL(__mmc_switch);
 int mmc_switch(struct mmc_card *card, u8 set, u8 index, u8 value,
 		unsigned int timeout_ms)
 {
-	return __mmc_switch(card, set, index, value, timeout_ms, true);
+	return __mmc_switch(card, set, index, value, timeout_ms, true, true);
 }
 EXPORT_SYMBOL_GPL(mmc_switch);
 
diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h
index a00fc49c8434..87079fc38011 100644
--- a/include/linux/mmc/core.h
+++ b/include/linux/mmc/core.h
@@ -151,7 +151,8 @@ extern int mmc_app_cmd(struct mmc_host *, struct mmc_card *);
 extern int mmc_wait_for_app_cmd(struct mmc_host *, struct mmc_card *,
 	struct mmc_command *, int);
 extern void mmc_start_bkops(struct mmc_card *card, bool from_exception);
-extern int __mmc_switch(struct mmc_card *, u8, u8, u8, unsigned int, bool);
+extern int __mmc_switch(struct mmc_card *, u8, u8, u8, unsigned int, bool,
+			bool);
 extern int mmc_switch(struct mmc_card *, u8, u8, u8, unsigned int);
 extern int mmc_send_ext_csd(struct mmc_card *card, u8 *ext_csd);
 
-- 
cgit v1.2.3


From 6904115095ad60ced638eb1e36e0e4e5e7de00b0 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Fri, 13 Sep 2013 11:31:33 +0200
Subject: mmc: core: Move cached value of the negotiated ocr mask to card
 struct

The negotiated ocr mask is directly related to the card. Once a card
gets removed, the mask shall be dropped. By moving the cache of the ocr
mask from the host struct to the card struct we have accomplished this.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/core.c  |  7 ++++---
 drivers/mmc/core/mmc.c   | 19 ++++++++++---------
 drivers/mmc/core/sd.c    | 22 ++++++++++++----------
 drivers/mmc/core/sdio.c  | 26 ++++++++++++++------------
 include/linux/mmc/card.h |  1 +
 include/linux/mmc/host.h |  1 -
 6 files changed, 41 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 5909ba46b8f2..529d2eff6095 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -1574,9 +1574,10 @@ void mmc_power_off(struct mmc_host *host)
 
 	/*
 	 * Reset ocr mask to be the highest possible voltage supported for
-	 * this mmc host. This value will be used at next power up.
+	 * this card. This value will be used at next power up.
 	 */
-	host->ocr = 1 << (fls(host->ocr_avail) - 1);
+	if (host->card)
+		host->card->ocr = 1 << (fls(host->ocr_avail) - 1);
 
 	if (!mmc_host_is_spi(host)) {
 		host->ios.bus_mode = MMC_BUSMODE_OPENDRAIN;
@@ -2550,7 +2551,7 @@ int mmc_power_restore_host(struct mmc_host *host)
 		return -EINVAL;
 	}
 
-	mmc_power_up(host, host->ocr);
+	mmc_power_up(host, host->card->ocr);
 	ret = host->bus_ops->power_restore(host);
 
 	mmc_bus_put(host);
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index 75c66f975e24..eb7161c11587 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -934,6 +934,7 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
 			goto err;
 		}
 
+		card->ocr = ocr;
 		card->type = MMC_TYPE_MMC;
 		card->rca = 1;
 		memcpy(card->raw_cid, cid, sizeof(card->raw_cid));
@@ -1533,9 +1534,9 @@ static int mmc_resume(struct mmc_host *host)
 	BUG_ON(!host->card);
 
 	mmc_claim_host(host);
-	mmc_power_up(host, host->ocr);
-	mmc_select_voltage(host, host->ocr);
-	err = mmc_init_card(host, host->ocr, host->card);
+	mmc_power_up(host, host->card->ocr);
+	mmc_select_voltage(host, host->card->ocr);
+	err = mmc_init_card(host, host->card->ocr, host->card);
 	mmc_release_host(host);
 
 	return err;
@@ -1579,7 +1580,7 @@ static int mmc_runtime_resume(struct mmc_host *host)
 
 	mmc_claim_host(host);
 
-	mmc_power_up(host, host->ocr);
+	mmc_power_up(host, host->card->ocr);
 	err = mmc_resume(host);
 	if (err)
 		pr_err("%s: error %d doing aggessive resume\n",
@@ -1595,7 +1596,7 @@ static int mmc_power_restore(struct mmc_host *host)
 
 	host->card->state &= ~(MMC_STATE_HIGHSPEED | MMC_STATE_HIGHSPEED_200);
 	mmc_claim_host(host);
-	ret = mmc_init_card(host, host->ocr, host->card);
+	ret = mmc_init_card(host, host->card->ocr, host->card);
 	mmc_release_host(host);
 
 	return ret;
@@ -1640,7 +1641,7 @@ static void mmc_attach_bus_ops(struct mmc_host *host)
 int mmc_attach_mmc(struct mmc_host *host)
 {
 	int err;
-	u32 ocr;
+	u32 ocr, rocr;
 
 	BUG_ON(!host);
 	WARN_ON(!host->claimed);
@@ -1677,12 +1678,12 @@ int mmc_attach_mmc(struct mmc_host *host)
 		ocr &= ~0x7F;
 	}
 
-	host->ocr = mmc_select_voltage(host, ocr);
+	rocr = mmc_select_voltage(host, ocr);
 
 	/*
 	 * Can we support the voltage of the card?
 	 */
-	if (!host->ocr) {
+	if (!rocr) {
 		err = -EINVAL;
 		goto err;
 	}
@@ -1690,7 +1691,7 @@ int mmc_attach_mmc(struct mmc_host *host)
 	/*
 	 * Detect and init the card.
 	 */
-	err = mmc_init_card(host, host->ocr, NULL);
+	err = mmc_init_card(host, rocr, NULL);
 	if (err)
 		goto err;
 
diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index 5818887fffb3..398065c57edf 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -721,6 +721,7 @@ int mmc_sd_get_cid(struct mmc_host *host, u32 ocr, u32 *cid, u32 *rocr)
 	int err;
 	u32 max_current;
 	int retries = 10;
+	u32 pocr = ocr;
 
 try_again:
 	if (!retries) {
@@ -774,7 +775,7 @@ try_again:
 	if (!mmc_host_is_spi(host) && rocr &&
 	   ((*rocr & 0x41000000) == 0x41000000)) {
 		err = mmc_set_signal_voltage(host, MMC_SIGNAL_VOLTAGE_180,
-					host->ocr);
+					pocr);
 		if (err == -EAGAIN) {
 			retries--;
 			goto try_again;
@@ -936,6 +937,7 @@ static int mmc_sd_init_card(struct mmc_host *host, u32 ocr,
 		if (IS_ERR(card))
 			return PTR_ERR(card);
 
+		card->ocr = ocr;
 		card->type = MMC_TYPE_SD;
 		memcpy(card->raw_cid, cid, sizeof(card->raw_cid));
 	}
@@ -1100,9 +1102,9 @@ static int mmc_sd_resume(struct mmc_host *host)
 	BUG_ON(!host->card);
 
 	mmc_claim_host(host);
-	mmc_power_up(host, host->ocr);
-	mmc_select_voltage(host, host->ocr);
-	err = mmc_sd_init_card(host, host->ocr, host->card);
+	mmc_power_up(host, host->card->ocr);
+	mmc_select_voltage(host, host->card->ocr);
+	err = mmc_sd_init_card(host, host->card->ocr, host->card);
 	mmc_release_host(host);
 
 	return err;
@@ -1145,7 +1147,7 @@ static int mmc_sd_runtime_resume(struct mmc_host *host)
 
 	mmc_claim_host(host);
 
-	mmc_power_up(host, host->ocr);
+	mmc_power_up(host, host->card->ocr);
 	err = mmc_sd_resume(host);
 	if (err)
 		pr_err("%s: error %d doing aggessive resume\n",
@@ -1161,7 +1163,7 @@ static int mmc_sd_power_restore(struct mmc_host *host)
 
 	host->card->state &= ~MMC_STATE_HIGHSPEED;
 	mmc_claim_host(host);
-	ret = mmc_sd_init_card(host, host->ocr, host->card);
+	ret = mmc_sd_init_card(host, host->card->ocr, host->card);
 	mmc_release_host(host);
 
 	return ret;
@@ -1206,7 +1208,7 @@ static void mmc_sd_attach_bus_ops(struct mmc_host *host)
 int mmc_attach_sd(struct mmc_host *host)
 {
 	int err;
-	u32 ocr;
+	u32 ocr, rocr;
 
 	BUG_ON(!host);
 	WARN_ON(!host->claimed);
@@ -1249,12 +1251,12 @@ int mmc_attach_sd(struct mmc_host *host)
 		ocr &= ~MMC_VDD_165_195;
 	}
 
-	host->ocr = mmc_select_voltage(host, ocr);
+	rocr = mmc_select_voltage(host, ocr);
 
 	/*
 	 * Can we support the voltage(s) of the card(s)?
 	 */
-	if (!host->ocr) {
+	if (!rocr) {
 		err = -EINVAL;
 		goto err;
 	}
@@ -1262,7 +1264,7 @@ int mmc_attach_sd(struct mmc_host *host)
 	/*
 	 * Detect and init the card.
 	 */
-	err = mmc_sd_init_card(host, host->ocr, NULL);
+	err = mmc_sd_init_card(host, rocr, NULL);
 	if (err)
 		goto err;
 
diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c
index 9587d9f8cf1c..e0a135a635f5 100644
--- a/drivers/mmc/core/sdio.c
+++ b/drivers/mmc/core/sdio.c
@@ -594,6 +594,7 @@ static int mmc_sdio_init_card(struct mmc_host *host, u32 ocr,
 	int err;
 	int retries = 10;
 	u32 rocr = 0;
+	u32 ocr_card = ocr;
 
 	BUG_ON(!host);
 	WARN_ON(!host->claimed);
@@ -762,6 +763,7 @@ try_again:
 
 		card = oldcard;
 	}
+	card->ocr = ocr_card;
 	mmc_fixup_device(card, NULL);
 
 	if (card->type == MMC_TYPE_SD_COMBO) {
@@ -984,8 +986,8 @@ static int mmc_sdio_resume(struct mmc_host *host)
 
 	/* Restore power if needed */
 	if (!mmc_card_keep_power(host)) {
-		mmc_power_up(host, host->ocr);
-		mmc_select_voltage(host, host->ocr);
+		mmc_power_up(host, host->card->ocr);
+		mmc_select_voltage(host, host->card->ocr);
 		/*
 		 * Tell runtime PM core we just powered up the card,
 		 * since it still believes the card is powered off.
@@ -1003,7 +1005,7 @@ static int mmc_sdio_resume(struct mmc_host *host)
 	if (mmc_card_is_removable(host) || !mmc_card_keep_power(host)) {
 		sdio_reset(host);
 		mmc_go_idle(host);
-		err = mmc_sdio_init_card(host, host->ocr, host->card,
+		err = mmc_sdio_init_card(host, host->card->ocr, host->card,
 					mmc_card_keep_power(host));
 	} else if (mmc_card_keep_power(host) && mmc_card_wake_sdio_irq(host)) {
 		/* We may have switched to 1-bit mode during suspend */
@@ -1043,7 +1045,7 @@ static int mmc_sdio_resume(struct mmc_host *host)
 static int mmc_sdio_power_restore(struct mmc_host *host)
 {
 	int ret;
-	u32 ocr;
+	u32 ocr, rocr;
 
 	BUG_ON(!host);
 	BUG_ON(!host->card);
@@ -1080,13 +1082,13 @@ static int mmc_sdio_power_restore(struct mmc_host *host)
 	if (host->ocr_avail_sdio)
 		host->ocr_avail = host->ocr_avail_sdio;
 
-	host->ocr = mmc_select_voltage(host, ocr & ~0x7F);
-	if (!host->ocr) {
+	rocr = mmc_select_voltage(host, ocr & ~0x7F);
+	if (!rocr) {
 		ret = -EINVAL;
 		goto out;
 	}
 
-	ret = mmc_sdio_init_card(host, host->ocr, host->card,
+	ret = mmc_sdio_init_card(host, rocr, host->card,
 				mmc_card_keep_power(host));
 	if (!ret && host->sdio_irqs)
 		mmc_signal_sdio_irq(host);
@@ -1107,7 +1109,7 @@ static int mmc_sdio_runtime_suspend(struct mmc_host *host)
 static int mmc_sdio_runtime_resume(struct mmc_host *host)
 {
 	/* Restore power and re-initialize. */
-	mmc_power_up(host, host->ocr);
+	mmc_power_up(host, host->card->ocr);
 	return mmc_sdio_power_restore(host);
 }
 
@@ -1130,7 +1132,7 @@ static const struct mmc_bus_ops mmc_sdio_ops = {
 int mmc_attach_sdio(struct mmc_host *host)
 {
 	int err, i, funcs;
-	u32 ocr;
+	u32 ocr, rocr;
 	struct mmc_card *card;
 
 	BUG_ON(!host);
@@ -1155,12 +1157,12 @@ int mmc_attach_sdio(struct mmc_host *host)
 		ocr &= ~0x7F;
 	}
 
-	host->ocr = mmc_select_voltage(host, ocr);
+	rocr = mmc_select_voltage(host, ocr);
 
 	/*
 	 * Can we support the voltage(s) of the card(s)?
 	 */
-	if (!host->ocr) {
+	if (!rocr) {
 		err = -EINVAL;
 		goto err;
 	}
@@ -1168,7 +1170,7 @@ int mmc_attach_sdio(struct mmc_host *host)
 	/*
 	 * Detect and init the card.
 	 */
-	err = mmc_sdio_init_card(host, host->ocr, NULL, 0);
+	err = mmc_sdio_init_card(host, rocr, NULL, 0);
 	if (err)
 		goto err;
 
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index f42cdbd8ac21..33d9a74f92e6 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -240,6 +240,7 @@ struct mmc_part {
 struct mmc_card {
 	struct mmc_host		*host;		/* the host this device belongs to */
 	struct device		dev;		/* the device */
+	u32			ocr;		/* the current OCR setting */
 	unsigned int		rca;		/* relative card address of device */
 	unsigned int		type;		/* card type */
 #define MMC_TYPE_MMC		0		/* MMC card */
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 3b0c33ae13e1..1c91bbbb01c3 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -309,7 +309,6 @@ struct mmc_host {
 	spinlock_t		lock;		/* lock for claim and bus ops */
 
 	struct mmc_ios		ios;		/* current io bus settings */
-	u32			ocr;		/* the current OCR setting */
 
 	/* group bitfields together to minimize padding */
 	unsigned int		use_spi_crc:1;
-- 
cgit v1.2.3


From 3c0d22e8180b98eea412b84aa0f0c42c16159679 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Thu, 26 Sep 2013 11:01:18 +0200
Subject: mmc: core: Remove deprecated mmc_suspend|resume_host APIs

The are no more users of the deprecated mmc_suspend|resume_host API,
so let's remove it.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/core.c  | 22 ----------------------
 include/linux/mmc/host.h |  3 ---
 2 files changed, 25 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 49dfafa99cc1..57a2b403bf8e 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -2641,28 +2641,6 @@ EXPORT_SYMBOL(mmc_cache_ctrl);
 
 #ifdef CONFIG_PM
 
-/**
- *	mmc_suspend_host - suspend a host
- *	@host: mmc host
- */
-int mmc_suspend_host(struct mmc_host *host)
-{
-	/* This function is deprecated */
-	return 0;
-}
-EXPORT_SYMBOL(mmc_suspend_host);
-
-/**
- *	mmc_resume_host - resume a previously suspended host
- *	@host: mmc host
- */
-int mmc_resume_host(struct mmc_host *host)
-{
-	/* This function is deprecated */
-	return 0;
-}
-EXPORT_SYMBOL(mmc_resume_host);
-
 /* Do the card removal on suspend if card is assumed removeable
  * Do that in pm notifier while userspace isn't yet frozen, so we will be able
    to sync the card.
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 1c91bbbb01c3..f18669e15d4d 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -381,9 +381,6 @@ static inline void *mmc_priv(struct mmc_host *host)
 #define mmc_classdev(x)	(&(x)->class_dev)
 #define mmc_hostname(x)	(dev_name(&(x)->class_dev))
 
-int mmc_suspend_host(struct mmc_host *);
-int mmc_resume_host(struct mmc_host *);
-
 int mmc_power_save_host(struct mmc_host *host);
 int mmc_power_restore_host(struct mmc_host *host);
 
-- 
cgit v1.2.3


From 9ec775f7efd6d17084b4f361804d2030d50fca0e Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Wed, 2 Oct 2013 17:37:09 +0200
Subject: mmc: Don't force card to active state when entering suspend/shutdown

By adding a card state that records if it is suspended or resumed, we
can accept asyncronus suspend/resume requests for the mmc and sd
bus_ops.

MMC_CAP_AGGRESSIVE_PM, will at request inactivity through the runtime
bus_ops callbacks, execute a suspend of the the card. In the state were
this has been done, we can receive a suspend request for the mmc bus,
which for sd and mmc forced the card to active state by a
pm_runtime_get_sync. In other words, the card was resumed and then
immediately suspended again, completely unnecessary.

Since the suspend/resume bus_ops callbacks for sd and mmc are now
capable of handling asynchronous requests, we no longer need to force
the card to active state before executing suspend. Evidently preventing
the above sequence for MMC_CAP_AGGRESSIVE_PM.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/card/block.c |  2 --
 drivers/mmc/core/mmc.c   | 45 ++++++++++++++++++++++++++++++++++-----------
 drivers/mmc/core/sd.c    | 21 +++++++++++++++++----
 include/linux/mmc/card.h |  4 ++++
 4 files changed, 55 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index 1a3163f1407e..29d5d988a51c 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -2448,7 +2448,6 @@ static int _mmc_blk_suspend(struct mmc_card *card)
 	struct mmc_blk_data *md = mmc_get_drvdata(card);
 
 	if (md) {
-		pm_runtime_get_sync(&card->dev);
 		mmc_queue_suspend(&md->queue);
 		list_for_each_entry(part_md, &md->part, part) {
 			mmc_queue_suspend(&part_md->queue);
@@ -2483,7 +2482,6 @@ static int mmc_blk_resume(struct mmc_card *card)
 		list_for_each_entry(part_md, &md->part, part) {
 			mmc_queue_resume(&part_md->queue);
 		}
-		pm_runtime_put(&card->dev);
 	}
 	return 0;
 }
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index 0d060227cfc1..84d8694cf369 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -1478,6 +1478,9 @@ static int _mmc_suspend(struct mmc_host *host, bool is_suspend)
 
 	mmc_claim_host(host);
 
+	if (mmc_card_suspended(host->card))
+		goto out;
+
 	if (mmc_card_doing_bkops(host->card)) {
 		err = mmc_stop_bkops(host->card);
 		if (err)
@@ -1497,8 +1500,10 @@ static int _mmc_suspend(struct mmc_host *host, bool is_suspend)
 		err = mmc_deselect_cards(host);
 	host->card->state &= ~(MMC_STATE_HIGHSPEED | MMC_STATE_HIGHSPEED_200);
 
-	if (!err)
+	if (!err) {
 		mmc_power_off(host);
+		mmc_card_set_suspended(host->card);
+	}
 out:
 	mmc_release_host(host);
 	return err;
@@ -1512,14 +1517,6 @@ static int mmc_suspend(struct mmc_host *host)
 	return _mmc_suspend(host, true);
 }
 
-/*
- * Shutdown callback
- */
-static int mmc_shutdown(struct mmc_host *host)
-{
-	return _mmc_suspend(host, false);
-}
-
 /*
  * Resume callback from host.
  *
@@ -1528,19 +1525,45 @@ static int mmc_shutdown(struct mmc_host *host)
  */
 static int mmc_resume(struct mmc_host *host)
 {
-	int err;
+	int err = 0;
 
 	BUG_ON(!host);
 	BUG_ON(!host->card);
 
 	mmc_claim_host(host);
+
+	if (!mmc_card_suspended(host->card))
+		goto out;
+
 	mmc_power_up(host, host->card->ocr);
 	err = mmc_init_card(host, host->card->ocr, host->card);
-	mmc_release_host(host);
+	mmc_card_clr_suspended(host->card);
 
+out:
+	mmc_release_host(host);
 	return err;
 }
 
+/*
+ * Shutdown callback
+ */
+static int mmc_shutdown(struct mmc_host *host)
+{
+	int err = 0;
+
+	/*
+	 * In a specific case for poweroff notify, we need to resume the card
+	 * before we can shutdown it properly.
+	 */
+	if (mmc_can_poweroff_notify(host->card) &&
+		!(host->caps2 & MMC_CAP2_FULL_PWR_CYCLE))
+		err = mmc_resume(host);
+
+	if (!err)
+		err = _mmc_suspend(host, false);
+
+	return err;
+}
 
 /*
  * Callback for runtime_suspend.
diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index 6ef84d0ca178..685796560f8b 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -1078,13 +1078,20 @@ static int mmc_sd_suspend(struct mmc_host *host)
 	BUG_ON(!host->card);
 
 	mmc_claim_host(host);
+
+	if (mmc_card_suspended(host->card))
+		goto out;
+
 	if (!mmc_host_is_spi(host))
 		err = mmc_deselect_cards(host);
 	host->card->state &= ~MMC_STATE_HIGHSPEED;
-	if (!err)
+	if (!err) {
 		mmc_power_off(host);
-	mmc_release_host(host);
+		mmc_card_set_suspended(host->card);
+	}
 
+out:
+	mmc_release_host(host);
 	return err;
 }
 
@@ -1096,16 +1103,22 @@ static int mmc_sd_suspend(struct mmc_host *host)
  */
 static int mmc_sd_resume(struct mmc_host *host)
 {
-	int err;
+	int err = 0;
 
 	BUG_ON(!host);
 	BUG_ON(!host->card);
 
 	mmc_claim_host(host);
+
+	if (!mmc_card_suspended(host->card))
+		goto out;
+
 	mmc_power_up(host, host->card->ocr);
 	err = mmc_sd_init_card(host, host->card->ocr, host->card);
-	mmc_release_host(host);
+	mmc_card_clr_suspended(host->card);
 
+out:
+	mmc_release_host(host);
 	return err;
 }
 
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 33d9a74f92e6..176fdf824b14 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -258,6 +258,7 @@ struct mmc_card {
 #define MMC_CARD_REMOVED	(1<<7)		/* card has been removed */
 #define MMC_STATE_HIGHSPEED_200	(1<<8)		/* card is in HS200 mode */
 #define MMC_STATE_DOING_BKOPS	(1<<10)		/* card is doing BKOPS */
+#define MMC_STATE_SUSPENDED	(1<<11)		/* card is suspended */
 	unsigned int		quirks; 	/* card quirks */
 #define MMC_QUIRK_LENIENT_FN0	(1<<0)		/* allow SDIO FN0 writes outside of the VS CCCR range */
 #define MMC_QUIRK_BLKSZ_FOR_BYTE_MODE (1<<1)	/* use func->cur_blksize */
@@ -424,6 +425,7 @@ static inline void __maybe_unused remove_quirk(struct mmc_card *card, int data)
 #define mmc_card_ext_capacity(c) ((c)->state & MMC_CARD_SDXC)
 #define mmc_card_removed(c)	((c) && ((c)->state & MMC_CARD_REMOVED))
 #define mmc_card_doing_bkops(c)	((c)->state & MMC_STATE_DOING_BKOPS)
+#define mmc_card_suspended(c)	((c)->state & MMC_STATE_SUSPENDED)
 
 #define mmc_card_set_present(c)	((c)->state |= MMC_STATE_PRESENT)
 #define mmc_card_set_readonly(c) ((c)->state |= MMC_STATE_READONLY)
@@ -436,6 +438,8 @@ static inline void __maybe_unused remove_quirk(struct mmc_card *card, int data)
 #define mmc_card_set_removed(c) ((c)->state |= MMC_CARD_REMOVED)
 #define mmc_card_set_doing_bkops(c)	((c)->state |= MMC_STATE_DOING_BKOPS)
 #define mmc_card_clr_doing_bkops(c)	((c)->state &= ~MMC_STATE_DOING_BKOPS)
+#define mmc_card_set_suspended(c) ((c)->state |= MMC_STATE_SUSPENDED)
+#define mmc_card_clr_suspended(c) ((c)->state &= ~MMC_STATE_SUSPENDED)
 
 /*
  * Quirk add/remove for MMC products.
-- 
cgit v1.2.3


From 4d22378221bd0ed69c2e99408d31c108d72aeb80 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Thu, 10 Oct 2013 17:22:23 +0200
Subject: mmc: core: Add MMC_CAP_RUNTIME_RESUME to resume at runtime_resume

In some environments it is to prefer to postpone the resume of the card
device until runtime_resume is being carried out, since it will mean a
signficant decrease of the total system resume time.

The reason of the decreased resume time is simply because of the actual
re-initalization of the card, which typically takes hundreds of
milliseconds, is performed outside the resume sequence and wont thus
affect it.

For removable card, the detect work tries to re-detect the card to make
sure it is still present, as a part of that sequence the card will also
be runtime_resumed and thus also fully resumed.

For a non-removable card, typically a mmc blk request will trigger a
runtime_resume and thus fully resume the card. This also means the
first request will likely suffer from an inital latency since the
re-initialization of the card needs to be performed.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/mmc.c   | 12 +++++++-----
 drivers/mmc/core/sd.c    | 12 +++++++-----
 include/linux/mmc/host.h |  1 +
 3 files changed, 15 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index 7f5dc26865de..f631f5a9bf79 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -1577,11 +1577,13 @@ static int mmc_shutdown(struct mmc_host *host)
  */
 static int mmc_resume(struct mmc_host *host)
 {
-	int err;
+	int err = 0;
 
-	err = _mmc_resume(host);
-	pm_runtime_set_active(&host->card->dev);
-	pm_runtime_mark_last_busy(&host->card->dev);
+	if (!(host->caps & MMC_CAP_RUNTIME_RESUME)) {
+		err = _mmc_resume(host);
+		pm_runtime_set_active(&host->card->dev);
+		pm_runtime_mark_last_busy(&host->card->dev);
+	}
 	pm_runtime_enable(&host->card->dev);
 
 	return err;
@@ -1612,7 +1614,7 @@ static int mmc_runtime_resume(struct mmc_host *host)
 {
 	int err;
 
-	if (!(host->caps & MMC_CAP_AGGRESSIVE_PM))
+	if (!(host->caps & (MMC_CAP_AGGRESSIVE_PM | MMC_CAP_RUNTIME_RESUME)))
 		return 0;
 
 	err = _mmc_resume(host);
diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index 447fa8e9f322..6f42050b7ccc 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -1139,11 +1139,13 @@ out:
  */
 static int mmc_sd_resume(struct mmc_host *host)
 {
-	int err;
+	int err = 0;
 
-	err = _mmc_sd_resume(host);
-	pm_runtime_set_active(&host->card->dev);
-	pm_runtime_mark_last_busy(&host->card->dev);
+	if (!(host->caps & MMC_CAP_RUNTIME_RESUME)) {
+		err = _mmc_sd_resume(host);
+		pm_runtime_set_active(&host->card->dev);
+		pm_runtime_mark_last_busy(&host->card->dev);
+	}
 	pm_runtime_enable(&host->card->dev);
 
 	return err;
@@ -1174,7 +1176,7 @@ static int mmc_sd_runtime_resume(struct mmc_host *host)
 {
 	int err;
 
-	if (!(host->caps & MMC_CAP_AGGRESSIVE_PM))
+	if (!(host->caps & (MMC_CAP_AGGRESSIVE_PM | MMC_CAP_RUNTIME_RESUME)))
 		return 0;
 
 	err = _mmc_sd_resume(host);
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index f18669e15d4d..99f5709ac343 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -254,6 +254,7 @@ struct mmc_host {
 #define MMC_CAP_UHS_SDR50	(1 << 17)	/* Host supports UHS SDR50 mode */
 #define MMC_CAP_UHS_SDR104	(1 << 18)	/* Host supports UHS SDR104 mode */
 #define MMC_CAP_UHS_DDR50	(1 << 19)	/* Host supports UHS DDR50 mode */
+#define MMC_CAP_RUNTIME_RESUME	(1 << 20)	/* Resume at runtime_resume. */
 #define MMC_CAP_DRIVER_TYPE_A	(1 << 23)	/* Host supports Driver Type A */
 #define MMC_CAP_DRIVER_TYPE_C	(1 << 24)	/* Host supports Driver Type C */
 #define MMC_CAP_DRIVER_TYPE_D	(1 << 25)	/* Host supports Driver Type D */
-- 
cgit v1.2.3


From a3e31b4588443f37d82195096c6b30dff1c152c2 Mon Sep 17 00:00:00 2001
From: Sudeep KarkadaNagesha <sudeep.karkadanagesha@arm.com>
Date: Wed, 18 Sep 2013 11:53:05 +0100
Subject: of: Move definition of of_find_next_cache_node into common code.

Since the definition of_find_next_cache_node is architecture independent,
the existing definition in powerpc can be moved to driver/of/base.c

Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Grant Likely <grant.likely@linaro.org>
Cc: Rob Herring <rob.herring@calxeda.com>
Signed-off-by: Sudeep KarkadaNagesha <sudeep.karkadanagesha@arm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/prom.h |  3 ---
 arch/powerpc/kernel/prom.c      | 31 -------------------------------
 drivers/of/base.c               | 31 +++++++++++++++++++++++++++++++
 include/linux/of.h              |  2 ++
 4 files changed, 33 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h
index 7d0c7f3a7171..bf09e5a065b8 100644
--- a/arch/powerpc/include/asm/prom.h
+++ b/arch/powerpc/include/asm/prom.h
@@ -44,9 +44,6 @@ void of_parse_dma_window(struct device_node *dn, const __be32 *dma_window,
 
 extern void kdump_move_device_tree(void);
 
-/* cache lookup */
-struct device_node *of_find_next_cache_node(struct device_node *np);
-
 #ifdef CONFIG_NUMA
 extern int of_node_to_nid(struct device_node *device);
 #else
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 09be2759c314..4432fd86a6d2 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -760,37 +760,6 @@ void __init early_init_devtree(void *params)
  *
  *******/
 
-/**
- *	of_find_next_cache_node - Find a node's subsidiary cache
- *	@np:	node of type "cpu" or "cache"
- *
- *	Returns a node pointer with refcount incremented, use
- *	of_node_put() on it when done.  Caller should hold a reference
- *	to np.
- */
-struct device_node *of_find_next_cache_node(struct device_node *np)
-{
-	struct device_node *child;
-	const phandle *handle;
-
-	handle = of_get_property(np, "l2-cache", NULL);
-	if (!handle)
-		handle = of_get_property(np, "next-level-cache", NULL);
-
-	if (handle)
-		return of_find_node_by_phandle(be32_to_cpup(handle));
-
-	/* OF on pmac has nodes instead of properties named "l2-cache"
-	 * beneath CPU nodes.
-	 */
-	if (!strcmp(np->type, "cpu"))
-		for_each_child_of_node(np, child)
-			if (!strcmp(child->type, "cache"))
-				return child;
-
-	return NULL;
-}
-
 /**
  * of_get_ibm_chip_id - Returns the IBM "chip-id" of a device
  * @np: device node of the device
diff --git a/drivers/of/base.c b/drivers/of/base.c
index 865d3f66c86b..b2cee3db5ceb 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -1884,3 +1884,34 @@ int of_device_is_stdout_path(struct device_node *dn)
 	return of_stdout == dn;
 }
 EXPORT_SYMBOL_GPL(of_device_is_stdout_path);
+
+/**
+ *	of_find_next_cache_node - Find a node's subsidiary cache
+ *	@np:	node of type "cpu" or "cache"
+ *
+ *	Returns a node pointer with refcount incremented, use
+ *	of_node_put() on it when done.  Caller should hold a reference
+ *	to np.
+ */
+struct device_node *of_find_next_cache_node(const struct device_node *np)
+{
+	struct device_node *child;
+	const phandle *handle;
+
+	handle = of_get_property(np, "l2-cache", NULL);
+	if (!handle)
+		handle = of_get_property(np, "next-level-cache", NULL);
+
+	if (handle)
+		return of_find_node_by_phandle(be32_to_cpup(handle));
+
+	/* OF on pmac has nodes instead of properties named "l2-cache"
+	 * beneath CPU nodes.
+	 */
+	if (!strcmp(np->type, "cpu"))
+		for_each_child_of_node(np, child)
+			if (!strcmp(child->type, "cache"))
+				return child;
+
+	return NULL;
+}
diff --git a/include/linux/of.h b/include/linux/of.h
index f95aee391e30..c08c07e249b3 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -226,6 +226,8 @@ static inline int of_get_child_count(const struct device_node *np)
 	return num;
 }
 
+/* cache lookup */
+extern struct device_node *of_find_next_cache_node(const struct device_node *);
 extern struct device_node *of_find_node_with_property(
 	struct device_node *from, const char *prop_name);
 #define for_each_node_with_property(dn, prop_name) \
-- 
cgit v1.2.3


From e1e906448d2fc6f5a69e1967e00868f0cbfbb566 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Tue, 24 Sep 2013 13:59:01 +0200
Subject: gpu: host1x: Make host1x header file public

In preparation to support host1x clients other than DRM, move this
header into a public location.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 MAINTAINERS                        |  1 +
 drivers/gpu/host1x/drm/drm.h       |  6 +++---
 drivers/gpu/host1x/drm/gr2d.c      |  1 -
 drivers/gpu/host1x/host1x.h        | 30 ------------------------------
 drivers/gpu/host1x/hw/channel_hw.c |  3 ++-
 include/linux/host1x.h             | 28 ++++++++++++++++++++++++++++
 6 files changed, 34 insertions(+), 35 deletions(-)
 delete mode 100644 drivers/gpu/host1x/host1x.h
 create mode 100644 include/linux/host1x.h

(limited to 'include/linux')

diff --git a/MAINTAINERS b/MAINTAINERS
index 284969fa2896..89f347ae077f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2817,6 +2817,7 @@ L:	linux-tegra@vger.kernel.org
 T:	git git://anongit.freedesktop.org/tegra/linux.git
 S:	Maintained
 F:	drivers/gpu/host1x/
+F:	include/linux/host1x.h
 F:	include/uapi/drm/tegra_drm.h
 F:	Documentation/devicetree/bindings/gpu/nvidia,tegra20-host1x.txt
 
diff --git a/drivers/gpu/host1x/drm/drm.h b/drivers/gpu/host1x/drm/drm.h
index dd6b98b18640..78754f6a9153 100644
--- a/drivers/gpu/host1x/drm/drm.h
+++ b/drivers/gpu/host1x/drm/drm.h
@@ -10,14 +10,14 @@
 #ifndef HOST1X_DRM_H
 #define HOST1X_DRM_H 1
 
+#include <uapi/drm/tegra_drm.h>
+#include <linux/host1x.h>
+
 #include <drm/drmP.h>
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_edid.h>
 #include <drm/drm_fb_helper.h>
 #include <drm/drm_fixed.h>
-#include <uapi/drm/tegra_drm.h>
-
-#include "host1x.h"
 
 struct tegra_fb {
 	struct drm_framebuffer base;
diff --git a/drivers/gpu/host1x/drm/gr2d.c b/drivers/gpu/host1x/drm/gr2d.c
index f045f7c0a91c..2691e333e0e2 100644
--- a/drivers/gpu/host1x/drm/gr2d.c
+++ b/drivers/gpu/host1x/drm/gr2d.c
@@ -20,7 +20,6 @@
 #include "drm.h"
 #include "gem.h"
 #include "job.h"
-#include "host1x.h"
 #include "host1x_bo.h"
 #include "host1x_client.h"
 #include "syncpt.h"
diff --git a/drivers/gpu/host1x/host1x.h b/drivers/gpu/host1x/host1x.h
deleted file mode 100644
index a2bc1e65e972..000000000000
--- a/drivers/gpu/host1x/host1x.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Tegra host1x driver
- *
- * Copyright (c) 2009-2013, NVIDIA Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
- */
-
-#ifndef __LINUX_HOST1X_H
-#define __LINUX_HOST1X_H
-
-enum host1x_class {
-	HOST1X_CLASS_HOST1X	= 0x1,
-	HOST1X_CLASS_GR2D	= 0x51,
-	HOST1X_CLASS_GR2D_SB    = 0x52
-};
-
-#endif
diff --git a/drivers/gpu/host1x/hw/channel_hw.c b/drivers/gpu/host1x/hw/channel_hw.c
index ee199623e365..c950bc655ade 100644
--- a/drivers/gpu/host1x/hw/channel_hw.c
+++ b/drivers/gpu/host1x/hw/channel_hw.c
@@ -16,10 +16,11 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include <linux/host1x.h>
 #include <linux/slab.h>
+
 #include <trace/events/host1x.h>
 
-#include "host1x.h"
 #include "host1x_bo.h"
 #include "channel.h"
 #include "dev.h"
diff --git a/include/linux/host1x.h b/include/linux/host1x.h
new file mode 100644
index 000000000000..fe09939800bc
--- /dev/null
+++ b/include/linux/host1x.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2009-2013, NVIDIA Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __LINUX_HOST1X_H
+#define __LINUX_HOST1X_H
+
+enum host1x_class {
+	HOST1X_CLASS_HOST1X = 0x1,
+	HOST1X_CLASS_GR2D = 0x51,
+	HOST1X_CLASS_GR2D_SB = 0x52,
+};
+
+#endif
-- 
cgit v1.2.3


From 53fa7f7204c97dc0c86b99ff8365ad6a7b2ebd78 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Tue, 24 Sep 2013 15:35:40 +0200
Subject: drm/tegra: Introduce tegra_drm_client structure

This structure derives from host1x_client. DRM-specific fields are moved
from host1x_client to this structure, so that host1x_client can remain
agnostic of DRM.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/host1x/drm/dc.c   | 35 ++++++++++++++++++-----------------
 drivers/gpu/host1x/drm/drm.c  | 30 +++++++++++++++++++-----------
 drivers/gpu/host1x/drm/drm.h  | 37 +++++++++++++++++--------------------
 drivers/gpu/host1x/drm/gr2d.c | 42 +++++++++++++++++++++++-------------------
 drivers/gpu/host1x/drm/hdmi.c | 33 +++++++++++++++++----------------
 include/linux/host1x.h        | 20 ++++++++++++++++++++
 6 files changed, 114 insertions(+), 83 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpu/host1x/drm/dc.c b/drivers/gpu/host1x/drm/dc.c
index e11aec779a15..5106df08f046 100644
--- a/drivers/gpu/host1x/drm/dc.c
+++ b/drivers/gpu/host1x/drm/dc.c
@@ -1038,30 +1038,30 @@ static int tegra_dc_debugfs_exit(struct tegra_dc *dc)
 	return 0;
 }
 
-static int tegra_dc_drm_init(struct host1x_client *client,
-			     struct drm_device *drm)
+static int tegra_dc_init(struct host1x_client *client)
 {
-	struct tegra_dc *dc = host1x_client_to_dc(client);
+	struct tegra_drm_client *drm = to_tegra_drm_client(client);
+	struct tegra_dc *dc = tegra_drm_client_to_dc(drm);
 	int err;
 
-	dc->pipe = drm->mode_config.num_crtc;
+	dc->pipe = drm->drm->mode_config.num_crtc;
 
-	drm_crtc_init(drm, &dc->base, &tegra_crtc_funcs);
+	drm_crtc_init(drm->drm, &dc->base, &tegra_crtc_funcs);
 	drm_mode_crtc_set_gamma_size(&dc->base, 256);
 	drm_crtc_helper_add(&dc->base, &tegra_crtc_helper_funcs);
 
-	err = tegra_dc_rgb_init(drm, dc);
+	err = tegra_dc_rgb_init(drm->drm, dc);
 	if (err < 0 && err != -ENODEV) {
 		dev_err(dc->dev, "failed to initialize RGB output: %d\n", err);
 		return err;
 	}
 
-	err = tegra_dc_add_planes(drm, dc);
+	err = tegra_dc_add_planes(drm->drm, dc);
 	if (err < 0)
 		return err;
 
 	if (IS_ENABLED(CONFIG_DEBUG_FS)) {
-		err = tegra_dc_debugfs_init(dc, drm->primary);
+		err = tegra_dc_debugfs_init(dc, drm->drm->primary);
 		if (err < 0)
 			dev_err(dc->dev, "debugfs setup failed: %d\n", err);
 	}
@@ -1077,9 +1077,10 @@ static int tegra_dc_drm_init(struct host1x_client *client,
 	return 0;
 }
 
-static int tegra_dc_drm_exit(struct host1x_client *client)
+static int tegra_dc_exit(struct host1x_client *client)
 {
-	struct tegra_dc *dc = host1x_client_to_dc(client);
+	struct tegra_drm_client *drm = to_tegra_drm_client(client);
+	struct tegra_dc *dc = tegra_drm_client_to_dc(drm);
 	int err;
 
 	devm_free_irq(dc->dev, dc->irq, dc);
@@ -1100,8 +1101,8 @@ static int tegra_dc_drm_exit(struct host1x_client *client)
 }
 
 static const struct host1x_client_ops dc_client_ops = {
-	.drm_init = tegra_dc_drm_init,
-	.drm_exit = tegra_dc_drm_exit,
+	.init = tegra_dc_init,
+	.exit = tegra_dc_exit,
 };
 
 static int tegra_dc_probe(struct platform_device *pdev)
@@ -1140,9 +1141,9 @@ static int tegra_dc_probe(struct platform_device *pdev)
 		return -ENXIO;
 	}
 
-	INIT_LIST_HEAD(&dc->client.list);
-	dc->client.ops = &dc_client_ops;
-	dc->client.dev = &pdev->dev;
+	INIT_LIST_HEAD(&dc->client.base.list);
+	dc->client.base.ops = &dc_client_ops;
+	dc->client.base.dev = &pdev->dev;
 
 	err = tegra_dc_rgb_probe(dc);
 	if (err < 0 && err != -ENODEV) {
@@ -1150,7 +1151,7 @@ static int tegra_dc_probe(struct platform_device *pdev)
 		return err;
 	}
 
-	err = host1x_register_client(tegra, &dc->client);
+	err = host1x_register_client(tegra, &dc->client.base);
 	if (err < 0) {
 		dev_err(&pdev->dev, "failed to register host1x client: %d\n",
 			err);
@@ -1168,7 +1169,7 @@ static int tegra_dc_remove(struct platform_device *pdev)
 	struct tegra_dc *dc = platform_get_drvdata(pdev);
 	int err;
 
-	err = host1x_unregister_client(tegra, &dc->client);
+	err = host1x_unregister_client(tegra, &dc->client.base);
 	if (err < 0) {
 		dev_err(&pdev->dev, "failed to unregister host1x client: %d\n",
 			err);
diff --git a/drivers/gpu/host1x/drm/drm.c b/drivers/gpu/host1x/drm/drm.c
index 4e503613536b..33d966145293 100644
--- a/drivers/gpu/host1x/drm/drm.c
+++ b/drivers/gpu/host1x/drm/drm.c
@@ -131,12 +131,18 @@ int tegra_drm_alloc(struct platform_device *pdev)
 int tegra_drm_init(struct tegra_drm *tegra, struct drm_device *drm)
 {
 	struct host1x_client *client;
+	int err;
 
 	mutex_lock(&tegra->clients_lock);
 
 	list_for_each_entry(client, &tegra->clients, list) {
-		if (client->ops && client->ops->drm_init) {
-			int err = client->ops->drm_init(client, drm);
+		struct tegra_drm_client *tdc = to_tegra_drm_client(client);
+
+		/* associate client with DRM device */
+		tdc->drm = drm;
+
+		if (client->ops && client->ops->init) {
+			err = client->ops->init(client);
 			if (err < 0) {
 				dev_err(tegra->dev,
 					"DRM setup failed for %s: %d\n",
@@ -154,8 +160,9 @@ int tegra_drm_init(struct tegra_drm *tegra, struct drm_device *drm)
 
 int tegra_drm_exit(struct tegra_drm *tegra)
 {
-	struct platform_device *pdev = to_platform_device(tegra->dev);
 	struct host1x_client *client;
+	struct platform_device *pdev;
+	int err;
 
 	if (!tegra->drm)
 		return 0;
@@ -163,8 +170,8 @@ int tegra_drm_exit(struct tegra_drm *tegra)
 	mutex_lock(&tegra->clients_lock);
 
 	list_for_each_entry_reverse(client, &tegra->clients, list) {
-		if (client->ops && client->ops->drm_exit) {
-			int err = client->ops->drm_exit(client);
+		if (client->ops && client->ops->exit) {
+			err = client->ops->exit(client);
 			if (err < 0) {
 				dev_err(tegra->dev,
 					"DRM cleanup failed for %s: %d\n",
@@ -177,6 +184,7 @@ int tegra_drm_exit(struct tegra_drm *tegra)
 
 	mutex_unlock(&tegra->clients_lock);
 
+	pdev = to_platform_device(tegra->dev);
 	drm_platform_exit(&tegra_drm_driver, pdev);
 	tegra->drm = NULL;
 
@@ -409,22 +417,22 @@ static int tegra_open_channel(struct drm_device *drm, void *data,
 	struct tegra_drm *tegra = drm->dev_private;
 	struct drm_tegra_open_channel *args = data;
 	struct tegra_drm_context *context;
-	struct host1x_client *client;
+	struct tegra_drm_client *client;
 	int err = -ENODEV;
 
 	context = kzalloc(sizeof(*context), GFP_KERNEL);
 	if (!context)
 		return -ENOMEM;
 
-	list_for_each_entry(client, &tegra->clients, list)
-		if (client->class == args->client) {
+	list_for_each_entry(client, &tegra->clients, base.list)
+		if (client->base.class == args->client) {
 			err = client->ops->open_channel(client, context);
 			if (err)
 				break;
 
-			context->client = client;
 			list_add(&context->list, &fpriv->contexts);
 			args->context = (uintptr_t)context;
+			context->client = client;
 			return 0;
 		}
 
@@ -463,10 +471,10 @@ static int tegra_get_syncpt(struct drm_device *drm, void *data,
 	if (!tegra_drm_file_owns_context(fpriv, context))
 		return -ENODEV;
 
-	if (args->index >= context->client->num_syncpts)
+	if (args->index >= context->client->base.num_syncpts)
 		return -EINVAL;
 
-	syncpt = context->client->syncpts[args->index];
+	syncpt = context->client->base.syncpts[args->index];
 	args->id = host1x_syncpt_id(syncpt);
 
 	return 0;
diff --git a/drivers/gpu/host1x/drm/drm.h b/drivers/gpu/host1x/drm/drm.h
index 78754f6a9153..8c26c6b1f5e1 100644
--- a/drivers/gpu/host1x/drm/drm.h
+++ b/drivers/gpu/host1x/drm/drm.h
@@ -44,18 +44,16 @@ struct tegra_drm {
 	struct tegra_fbdev *fbdev;
 };
 
-struct host1x_client;
+struct tegra_drm_client;
 
 struct tegra_drm_context {
-	struct host1x_client *client;
+	struct tegra_drm_client *client;
 	struct host1x_channel *channel;
 	struct list_head list;
 };
 
-struct host1x_client_ops {
-	int (*drm_init)(struct host1x_client *client, struct drm_device *drm);
-	int (*drm_exit)(struct host1x_client *client);
-	int (*open_channel)(struct host1x_client *client,
+struct tegra_drm_client_ops {
+	int (*open_channel)(struct tegra_drm_client *client,
 			    struct tegra_drm_context *context);
 	void (*close_channel)(struct tegra_drm_context *context);
 	int (*submit)(struct tegra_drm_context *context,
@@ -63,21 +61,19 @@ struct host1x_client_ops {
 		      struct drm_file *file);
 };
 
-struct host1x_client {
-	struct tegra_drm *tegra;
-	struct device *dev;
-
-	const struct host1x_client_ops *ops;
-
-	enum host1x_class class;
-	struct host1x_channel *channel;
-
-	struct host1x_syncpt **syncpts;
-	unsigned int num_syncpts;
+struct tegra_drm_client {
+	struct host1x_client base;
+	struct drm_device *drm;
 
-	struct list_head list;
+	const struct tegra_drm_client_ops *ops;
 };
 
+static inline struct tegra_drm_client *
+to_tegra_drm_client(struct host1x_client *client)
+{
+	return container_of(client, struct tegra_drm_client, base);
+}
+
 extern int tegra_drm_init(struct tegra_drm *tegra, struct drm_device *drm);
 extern int tegra_drm_exit(struct tegra_drm *tegra);
 
@@ -89,7 +85,7 @@ extern int host1x_unregister_client(struct tegra_drm *tegra,
 struct tegra_output;
 
 struct tegra_dc {
-	struct host1x_client client;
+	struct tegra_drm_client client;
 	struct device *dev;
 	spinlock_t lock;
 
@@ -112,7 +108,8 @@ struct tegra_dc {
 	struct drm_pending_vblank_event *event;
 };
 
-static inline struct tegra_dc *host1x_client_to_dc(struct host1x_client *client)
+static inline struct tegra_dc *
+tegra_drm_client_to_dc(struct tegra_drm_client *client)
 {
 	return container_of(client, struct tegra_dc, client);
 }
diff --git a/drivers/gpu/host1x/drm/gr2d.c b/drivers/gpu/host1x/drm/gr2d.c
index 2691e333e0e2..a7edc794c5ce 100644
--- a/drivers/gpu/host1x/drm/gr2d.c
+++ b/drivers/gpu/host1x/drm/gr2d.c
@@ -27,20 +27,19 @@
 #define GR2D_NUM_REGS 0x4d
 
 struct gr2d {
-	struct host1x_client client;
+	struct tegra_drm_client client;
 	struct host1x_channel *channel;
 	struct clk *clk;
 
 	DECLARE_BITMAP(addr_regs, GR2D_NUM_REGS);
 };
 
-static inline struct gr2d *to_gr2d(struct host1x_client *client)
+static inline struct gr2d *to_gr2d(struct tegra_drm_client *client)
 {
 	return container_of(client, struct gr2d, client);
 }
 
-static int gr2d_client_init(struct host1x_client *client,
-			    struct drm_device *drm)
+static int gr2d_client_init(struct host1x_client *client)
 {
 	return 0;
 }
@@ -50,7 +49,12 @@ static int gr2d_client_exit(struct host1x_client *client)
 	return 0;
 }
 
-static int gr2d_open_channel(struct host1x_client *client,
+static const struct host1x_client_ops gr2d_client_ops = {
+	.init = gr2d_client_init,
+	.exit = gr2d_client_exit,
+};
+
+static int gr2d_open_channel(struct tegra_drm_client *client,
 			     struct tegra_drm_context *context)
 {
 	struct gr2d *gr2d = to_gr2d(client);
@@ -140,7 +144,7 @@ static int gr2d_submit(struct tegra_drm_context *context,
 	job->num_relocs = args->num_relocs;
 	job->num_waitchk = args->num_waitchks;
 	job->client = (u32)args->context;
-	job->class = context->client->class;
+	job->class = context->client->base.class;
 	job->serialize = true;
 
 	while (num_cmdbufs) {
@@ -201,7 +205,7 @@ static int gr2d_submit(struct tegra_drm_context *context,
 	if (args->timeout && args->timeout < 10000)
 		job->timeout = args->timeout;
 
-	err = host1x_job_pin(job, context->client->dev);
+	err = host1x_job_pin(job, context->client->base.dev);
 	if (err)
 		goto fail;
 
@@ -221,9 +225,7 @@ fail:
 	return err;
 }
 
-static struct host1x_client_ops gr2d_client_ops = {
-	.drm_init = gr2d_client_init,
-	.drm_exit = gr2d_client_exit,
+static const struct tegra_drm_client_ops gr2d_ops = {
 	.open_channel = gr2d_open_channel,
 	.close_channel = gr2d_close_channel,
 	.submit = gr2d_submit,
@@ -279,13 +281,15 @@ static int gr2d_probe(struct platform_device *pdev)
 		return -ENOMEM;
 	}
 
-	gr2d->client.ops = &gr2d_client_ops;
-	gr2d->client.dev = dev;
-	gr2d->client.class = HOST1X_CLASS_GR2D;
-	gr2d->client.syncpts = syncpts;
-	gr2d->client.num_syncpts = 1;
+	INIT_LIST_HEAD(&gr2d->client.base.list);
+	gr2d->client.base.ops = &gr2d_client_ops;
+	gr2d->client.base.dev = dev;
+	gr2d->client.base.class = HOST1X_CLASS_GR2D;
+	gr2d->client.base.syncpts = syncpts;
+	gr2d->client.base.num_syncpts = 1;
+	gr2d->client.ops = &gr2d_ops;
 
-	err = host1x_register_client(tegra, &gr2d->client);
+	err = host1x_register_client(tegra, &gr2d->client.base);
 	if (err < 0) {
 		dev_err(dev, "failed to register host1x client: %d\n", err);
 		return err;
@@ -307,15 +311,15 @@ static int gr2d_remove(struct platform_device *pdev)
 	unsigned int i;
 	int err;
 
-	err = host1x_unregister_client(tegra, &gr2d->client);
+	err = host1x_unregister_client(tegra, &gr2d->client.base);
 	if (err < 0) {
 		dev_err(&pdev->dev, "failed to unregister host1x client: %d\n",
 			err);
 		return err;
 	}
 
-	for (i = 0; i < gr2d->client.num_syncpts; i++)
-		host1x_syncpt_free(gr2d->client.syncpts[i]);
+	for (i = 0; i < gr2d->client.base.num_syncpts; i++)
+		host1x_syncpt_free(gr2d->client.base.syncpts[i]);
 
 	host1x_channel_free(gr2d->channel);
 	clk_disable_unprepare(gr2d->clk);
diff --git a/drivers/gpu/host1x/drm/hdmi.c b/drivers/gpu/host1x/drm/hdmi.c
index 5d8c41cf4f58..30ac9e872b0d 100644
--- a/drivers/gpu/host1x/drm/hdmi.c
+++ b/drivers/gpu/host1x/drm/hdmi.c
@@ -19,7 +19,7 @@
 #include "host1x_client.h"
 
 struct tegra_hdmi {
-	struct host1x_client client;
+	struct tegra_drm_client client;
 	struct tegra_output output;
 	struct device *dev;
 
@@ -43,7 +43,7 @@ struct tegra_hdmi {
 };
 
 static inline struct tegra_hdmi *
-host1x_client_to_hdmi(struct host1x_client *client)
+tegra_drm_client_to_hdmi(struct tegra_drm_client *client)
 {
 	return container_of(client, struct tegra_hdmi, client);
 }
@@ -1116,24 +1116,24 @@ static int tegra_hdmi_debugfs_exit(struct tegra_hdmi *hdmi)
 	return 0;
 }
 
-static int tegra_hdmi_drm_init(struct host1x_client *client,
-			       struct drm_device *drm)
+static int tegra_hdmi_init(struct host1x_client *client)
 {
-	struct tegra_hdmi *hdmi = host1x_client_to_hdmi(client);
+	struct tegra_drm_client *drm = to_tegra_drm_client(client);
+	struct tegra_hdmi *hdmi = tegra_drm_client_to_hdmi(drm);
 	int err;
 
 	hdmi->output.type = TEGRA_OUTPUT_HDMI;
 	hdmi->output.dev = client->dev;
 	hdmi->output.ops = &hdmi_ops;
 
-	err = tegra_output_init(drm, &hdmi->output);
+	err = tegra_output_init(drm->drm, &hdmi->output);
 	if (err < 0) {
 		dev_err(client->dev, "output setup failed: %d\n", err);
 		return err;
 	}
 
 	if (IS_ENABLED(CONFIG_DEBUG_FS)) {
-		err = tegra_hdmi_debugfs_init(hdmi, drm->primary);
+		err = tegra_hdmi_debugfs_init(hdmi, drm->drm->primary);
 		if (err < 0)
 			dev_err(client->dev, "debugfs setup failed: %d\n", err);
 	}
@@ -1141,9 +1141,10 @@ static int tegra_hdmi_drm_init(struct host1x_client *client,
 	return 0;
 }
 
-static int tegra_hdmi_drm_exit(struct host1x_client *client)
+static int tegra_hdmi_exit(struct host1x_client *client)
 {
-	struct tegra_hdmi *hdmi = host1x_client_to_hdmi(client);
+	struct tegra_drm_client *drm = to_tegra_drm_client(client);
+	struct tegra_hdmi *hdmi = tegra_drm_client_to_hdmi(drm);
 	int err;
 
 	if (IS_ENABLED(CONFIG_DEBUG_FS)) {
@@ -1169,8 +1170,8 @@ static int tegra_hdmi_drm_exit(struct host1x_client *client)
 }
 
 static const struct host1x_client_ops hdmi_client_ops = {
-	.drm_init = tegra_hdmi_drm_init,
-	.drm_exit = tegra_hdmi_drm_exit,
+	.init = tegra_hdmi_init,
+	.exit = tegra_hdmi_exit,
 };
 
 static int tegra_hdmi_probe(struct platform_device *pdev)
@@ -1246,11 +1247,11 @@ static int tegra_hdmi_probe(struct platform_device *pdev)
 
 	hdmi->irq = err;
 
-	hdmi->client.ops = &hdmi_client_ops;
-	INIT_LIST_HEAD(&hdmi->client.list);
-	hdmi->client.dev = &pdev->dev;
+	INIT_LIST_HEAD(&hdmi->client.base.list);
+	hdmi->client.base.ops = &hdmi_client_ops;
+	hdmi->client.base.dev = &pdev->dev;
 
-	err = host1x_register_client(tegra, &hdmi->client);
+	err = host1x_register_client(tegra, &hdmi->client.base);
 	if (err < 0) {
 		dev_err(&pdev->dev, "failed to register host1x client: %d\n",
 			err);
@@ -1268,7 +1269,7 @@ static int tegra_hdmi_remove(struct platform_device *pdev)
 	struct tegra_hdmi *hdmi = platform_get_drvdata(pdev);
 	int err;
 
-	err = host1x_unregister_client(tegra, &hdmi->client);
+	err = host1x_unregister_client(tegra, &hdmi->client.base);
 	if (err < 0) {
 		dev_err(&pdev->dev, "failed to unregister host1x client: %d\n",
 			err);
diff --git a/include/linux/host1x.h b/include/linux/host1x.h
index fe09939800bc..d429a938ba13 100644
--- a/include/linux/host1x.h
+++ b/include/linux/host1x.h
@@ -25,4 +25,24 @@ enum host1x_class {
 	HOST1X_CLASS_GR2D_SB = 0x52,
 };
 
+struct host1x_client;
+
+struct host1x_client_ops {
+	int (*init)(struct host1x_client *client);
+	int (*exit)(struct host1x_client *client);
+};
+
+struct host1x_client {
+	struct list_head list;
+	struct device *dev;
+
+	const struct host1x_client_ops *ops;
+
+	enum host1x_class class;
+	struct host1x_channel *channel;
+
+	struct host1x_syncpt **syncpts;
+	unsigned int num_syncpts;
+};
+
 #endif
-- 
cgit v1.2.3


From 35d747a81d7eb824bd0c3476cd0c564b52ad5353 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Tue, 24 Sep 2013 16:30:32 +0200
Subject: gpu: host1x: Expose syncpt and channel functionality

Expose the buffer objects, syncpoint and channel functionality in the
public public header so that drivers can use them.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/host1x/cdma.c          |   2 +-
 drivers/gpu/host1x/channel.h       |   6 --
 drivers/gpu/host1x/drm/drm.c       |   2 -
 drivers/gpu/host1x/drm/gem.h       |   4 +-
 drivers/gpu/host1x/drm/gr2d.c      |   6 +-
 drivers/gpu/host1x/drm/hdmi.c      |   2 +-
 drivers/gpu/host1x/host1x_bo.h     |  87 -----------------
 drivers/gpu/host1x/hw/channel_hw.c |   1 -
 drivers/gpu/host1x/hw/debug_hw.c   |   1 -
 drivers/gpu/host1x/job.c           |   2 +-
 drivers/gpu/host1x/job.h           | 108 ----------------------
 drivers/gpu/host1x/syncpt.c        |  19 ++++
 drivers/gpu/host1x/syncpt.h        |  40 +-------
 include/linux/host1x.h             | 185 +++++++++++++++++++++++++++++++++++++
 14 files changed, 211 insertions(+), 254 deletions(-)
 delete mode 100644 drivers/gpu/host1x/host1x_bo.h

(limited to 'include/linux')

diff --git a/drivers/gpu/host1x/cdma.c b/drivers/gpu/host1x/cdma.c
index de72172d3b5f..3995255b16c7 100644
--- a/drivers/gpu/host1x/cdma.c
+++ b/drivers/gpu/host1x/cdma.c
@@ -20,6 +20,7 @@
 #include <asm/cacheflush.h>
 #include <linux/device.h>
 #include <linux/dma-mapping.h>
+#include <linux/host1x.h>
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
 #include <linux/kfifo.h>
@@ -30,7 +31,6 @@
 #include "channel.h"
 #include "dev.h"
 #include "debug.h"
-#include "host1x_bo.h"
 #include "job.h"
 
 /*
diff --git a/drivers/gpu/host1x/channel.h b/drivers/gpu/host1x/channel.h
index 48723b8eea42..df767cf90d51 100644
--- a/drivers/gpu/host1x/channel.h
+++ b/drivers/gpu/host1x/channel.h
@@ -40,12 +40,6 @@ struct host1x_channel {
 /* channel list operations */
 int host1x_channel_list_init(struct host1x *host);
 
-struct host1x_channel *host1x_channel_request(struct device *dev);
-void host1x_channel_free(struct host1x_channel *channel);
-struct host1x_channel *host1x_channel_get(struct host1x_channel *channel);
-void host1x_channel_put(struct host1x_channel *channel);
-int host1x_job_submit(struct host1x_job *job);
-
 #define host1x_for_each_channel(host, channel)				\
 	list_for_each_entry(channel, &host->chlist.list, list)
 
diff --git a/drivers/gpu/host1x/drm/drm.c b/drivers/gpu/host1x/drm/drm.c
index 33d966145293..1abcdbc7f4cd 100644
--- a/drivers/gpu/host1x/drm/drm.c
+++ b/drivers/gpu/host1x/drm/drm.c
@@ -8,10 +8,8 @@
  */
 
 #include "host1x_client.h"
-#include "dev.h"
 #include "drm.h"
 #include "gem.h"
-#include "syncpt.h"
 
 #define DRIVER_NAME "tegra"
 #define DRIVER_DESC "NVIDIA Tegra graphics"
diff --git a/drivers/gpu/host1x/drm/gem.h b/drivers/gpu/host1x/drm/gem.h
index 492533a2dacb..2b54f1425d5e 100644
--- a/drivers/gpu/host1x/drm/gem.h
+++ b/drivers/gpu/host1x/drm/gem.h
@@ -19,11 +19,11 @@
 #ifndef __HOST1X_GEM_H
 #define __HOST1X_GEM_H
 
+#include <linux/host1x.h>
+
 #include <drm/drm.h>
 #include <drm/drmP.h>
 
-#include "host1x_bo.h"
-
 struct tegra_bo {
 	struct drm_gem_object gem;
 	struct host1x_bo base;
diff --git a/drivers/gpu/host1x/drm/gr2d.c b/drivers/gpu/host1x/drm/gr2d.c
index a7edc794c5ce..dfb822428ca0 100644
--- a/drivers/gpu/host1x/drm/gr2d.c
+++ b/drivers/gpu/host1x/drm/gr2d.c
@@ -16,13 +16,9 @@
 
 #include <linux/clk.h>
 
-#include "channel.h"
+#include "host1x_client.h"
 #include "drm.h"
 #include "gem.h"
-#include "job.h"
-#include "host1x_bo.h"
-#include "host1x_client.h"
-#include "syncpt.h"
 
 #define GR2D_NUM_REGS 0x4d
 
diff --git a/drivers/gpu/host1x/drm/hdmi.c b/drivers/gpu/host1x/drm/hdmi.c
index 30ac9e872b0d..a2370045993c 100644
--- a/drivers/gpu/host1x/drm/hdmi.c
+++ b/drivers/gpu/host1x/drm/hdmi.c
@@ -13,10 +13,10 @@
 #include <linux/hdmi.h>
 #include <linux/regulator/consumer.h>
 
+#include "host1x_client.h"
 #include "hdmi.h"
 #include "drm.h"
 #include "dc.h"
-#include "host1x_client.h"
 
 struct tegra_hdmi {
 	struct tegra_drm_client client;
diff --git a/drivers/gpu/host1x/host1x_bo.h b/drivers/gpu/host1x/host1x_bo.h
deleted file mode 100644
index 4c1f10bd773d..000000000000
--- a/drivers/gpu/host1x/host1x_bo.h
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Tegra host1x Memory Management Abstraction header
- *
- * Copyright (c) 2012-2013, NVIDIA Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef _HOST1X_BO_H
-#define _HOST1X_BO_H
-
-struct host1x_bo;
-
-struct host1x_bo_ops {
-	struct host1x_bo *(*get)(struct host1x_bo *bo);
-	void (*put)(struct host1x_bo *bo);
-	dma_addr_t (*pin)(struct host1x_bo *bo, struct sg_table **sgt);
-	void (*unpin)(struct host1x_bo *bo, struct sg_table *sgt);
-	void *(*mmap)(struct host1x_bo *bo);
-	void (*munmap)(struct host1x_bo *bo, void *addr);
-	void *(*kmap)(struct host1x_bo *bo, unsigned int pagenum);
-	void (*kunmap)(struct host1x_bo *bo, unsigned int pagenum, void *addr);
-};
-
-struct host1x_bo {
-	const struct host1x_bo_ops *ops;
-};
-
-static inline void host1x_bo_init(struct host1x_bo *bo,
-				  const struct host1x_bo_ops *ops)
-{
-	bo->ops = ops;
-}
-
-static inline struct host1x_bo *host1x_bo_get(struct host1x_bo *bo)
-{
-	return bo->ops->get(bo);
-}
-
-static inline void host1x_bo_put(struct host1x_bo *bo)
-{
-	bo->ops->put(bo);
-}
-
-static inline dma_addr_t host1x_bo_pin(struct host1x_bo *bo,
-				       struct sg_table **sgt)
-{
-	return bo->ops->pin(bo, sgt);
-}
-
-static inline void host1x_bo_unpin(struct host1x_bo *bo, struct sg_table *sgt)
-{
-	bo->ops->unpin(bo, sgt);
-}
-
-static inline void *host1x_bo_mmap(struct host1x_bo *bo)
-{
-	return bo->ops->mmap(bo);
-}
-
-static inline void host1x_bo_munmap(struct host1x_bo *bo, void *addr)
-{
-	bo->ops->munmap(bo, addr);
-}
-
-static inline void *host1x_bo_kmap(struct host1x_bo *bo, unsigned int pagenum)
-{
-	return bo->ops->kmap(bo, pagenum);
-}
-
-static inline void host1x_bo_kunmap(struct host1x_bo *bo,
-				    unsigned int pagenum, void *addr)
-{
-	bo->ops->kunmap(bo, pagenum, addr);
-}
-
-#endif
diff --git a/drivers/gpu/host1x/hw/channel_hw.c b/drivers/gpu/host1x/hw/channel_hw.c
index c950bc655ade..aa9bdf331139 100644
--- a/drivers/gpu/host1x/hw/channel_hw.c
+++ b/drivers/gpu/host1x/hw/channel_hw.c
@@ -21,7 +21,6 @@
 
 #include <trace/events/host1x.h>
 
-#include "host1x_bo.h"
 #include "channel.h"
 #include "dev.h"
 #include "intr.h"
diff --git a/drivers/gpu/host1x/hw/debug_hw.c b/drivers/gpu/host1x/hw/debug_hw.c
index dfad66312286..788fe7179d8f 100644
--- a/drivers/gpu/host1x/hw/debug_hw.c
+++ b/drivers/gpu/host1x/hw/debug_hw.c
@@ -19,7 +19,6 @@
 #include "debug.h"
 #include "cdma.h"
 #include "channel.h"
-#include "host1x_bo.h"
 
 #define HOST1X_DEBUG_MAX_PAGE_OFFSET 102400
 
diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c
index d1b72f4c744d..de5ec333ce1a 100644
--- a/drivers/gpu/host1x/job.c
+++ b/drivers/gpu/host1x/job.c
@@ -18,6 +18,7 @@
 
 #include <linux/dma-mapping.h>
 #include <linux/err.h>
+#include <linux/host1x.h>
 #include <linux/kref.h>
 #include <linux/module.h>
 #include <linux/scatterlist.h>
@@ -27,7 +28,6 @@
 
 #include "channel.h"
 #include "dev.h"
-#include "host1x_bo.h"
 #include "job.h"
 #include "syncpt.h"
 
diff --git a/drivers/gpu/host1x/job.h b/drivers/gpu/host1x/job.h
index fba45f20458e..33a697d6dcef 100644
--- a/drivers/gpu/host1x/job.h
+++ b/drivers/gpu/host1x/job.h
@@ -34,15 +34,6 @@ struct host1x_cmdbuf {
 	u32 pad;
 };
 
-struct host1x_reloc {
-	struct host1x_bo *cmdbuf;
-	u32 cmdbuf_offset;
-	struct host1x_bo *target;
-	u32 target_offset;
-	u32 shift;
-	u32 pad;
-};
-
 struct host1x_waitchk {
 	struct host1x_bo *bo;
 	u32 offset;
@@ -55,105 +46,6 @@ struct host1x_job_unpin_data {
 	struct sg_table *sgt;
 };
 
-/*
- * Each submit is tracked as a host1x_job.
- */
-struct host1x_job {
-	/* When refcount goes to zero, job can be freed */
-	struct kref ref;
-
-	/* List entry */
-	struct list_head list;
-
-	/* Channel where job is submitted to */
-	struct host1x_channel *channel;
-
-	u32 client;
-
-	/* Gathers and their memory */
-	struct host1x_job_gather *gathers;
-	unsigned int num_gathers;
-
-	/* Wait checks to be processed at submit time */
-	struct host1x_waitchk *waitchk;
-	unsigned int num_waitchk;
-	u32 waitchk_mask;
-
-	/* Array of handles to be pinned & unpinned */
-	struct host1x_reloc *relocarray;
-	unsigned int num_relocs;
-	struct host1x_job_unpin_data *unpins;
-	unsigned int num_unpins;
-
-	dma_addr_t *addr_phys;
-	dma_addr_t *gather_addr_phys;
-	dma_addr_t *reloc_addr_phys;
-
-	/* Sync point id, number of increments and end related to the submit */
-	u32 syncpt_id;
-	u32 syncpt_incrs;
-	u32 syncpt_end;
-
-	/* Maximum time to wait for this job */
-	unsigned int timeout;
-
-	/* Index and number of slots used in the push buffer */
-	unsigned int first_get;
-	unsigned int num_slots;
-
-	/* Copy of gathers */
-	size_t gather_copy_size;
-	dma_addr_t gather_copy;
-	u8 *gather_copy_mapped;
-
-	/* Check if register is marked as an address reg */
-	int (*is_addr_reg)(struct device *dev, u32 reg, u32 class);
-
-	/* Request a SETCLASS to this class */
-	u32 class;
-
-	/* Add a channel wait for previous ops to complete */
-	bool serialize;
-};
-/*
- * Allocate memory for a job. Just enough memory will be allocated to
- * accomodate the submit.
- */
-struct host1x_job *host1x_job_alloc(struct host1x_channel *ch,
-				    u32 num_cmdbufs, u32 num_relocs,
-				    u32 num_waitchks);
-
-/*
- * Add a gather to a job.
- */
-void host1x_job_add_gather(struct host1x_job *job, struct host1x_bo *mem_id,
-			   u32 words, u32 offset);
-
-/*
- * Increment reference going to host1x_job.
- */
-struct host1x_job *host1x_job_get(struct host1x_job *job);
-
-/*
- * Decrement reference job, free if goes to zero.
- */
-void host1x_job_put(struct host1x_job *job);
-
-/*
- * Pin memory related to job. This handles relocation of addresses to the
- * host1x address space. Handles both the gather memory and any other memory
- * referred to from the gather buffers.
- *
- * Handles also patching out host waits that would wait for an expired sync
- * point value.
- */
-int host1x_job_pin(struct host1x_job *job, struct device *dev);
-
-/*
- * Unpin memory related to job.
- */
-void host1x_job_unpin(struct host1x_job *job);
-
 /*
  * Dump contents of job to debug output.
  */
diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c
index 409745b949db..03cf2922e469 100644
--- a/drivers/gpu/host1x/syncpt.c
+++ b/drivers/gpu/host1x/syncpt.c
@@ -354,6 +354,25 @@ void host1x_syncpt_deinit(struct host1x *host)
 		kfree(sp->name);
 }
 
+/*
+ * Read max. It indicates how many operations there are in queue, either in
+ * channel or in a software thread.
+ * */
+u32 host1x_syncpt_read_max(struct host1x_syncpt *sp)
+{
+	smp_rmb();
+	return (u32)atomic_read(&sp->max_val);
+}
+
+/*
+ * Read min, which is a shadow of the current sync point value in hardware.
+ */
+u32 host1x_syncpt_read_min(struct host1x_syncpt *sp)
+{
+	smp_rmb();
+	return (u32)atomic_read(&sp->min_val);
+}
+
 int host1x_syncpt_nb_pts(struct host1x *host)
 {
 	return host->info->nb_pts;
diff --git a/drivers/gpu/host1x/syncpt.h b/drivers/gpu/host1x/syncpt.h
index 267c0b9d3647..4eb933a497fd 100644
--- a/drivers/gpu/host1x/syncpt.h
+++ b/drivers/gpu/host1x/syncpt.h
@@ -20,6 +20,7 @@
 #define __HOST1X_SYNCPT_H
 
 #include <linux/atomic.h>
+#include <linux/host1x.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
 
@@ -50,25 +51,6 @@ int host1x_syncpt_init(struct host1x *host);
 /*  Free sync point array */
 void host1x_syncpt_deinit(struct host1x *host);
 
-/*
- * Read max. It indicates how many operations there are in queue, either in
- * channel or in a software thread.
- * */
-static inline u32 host1x_syncpt_read_max(struct host1x_syncpt *sp)
-{
-	smp_rmb();
-	return (u32)atomic_read(&sp->max_val);
-}
-
-/*
- * Read min, which is a shadow of the current sync point value in hardware.
- */
-static inline u32 host1x_syncpt_read_min(struct host1x_syncpt *sp)
-{
-	smp_rmb();
-	return (u32)atomic_read(&sp->min_val);
-}
-
 /* Return number of sync point supported. */
 int host1x_syncpt_nb_pts(struct host1x *host);
 
@@ -112,9 +94,6 @@ static inline bool host1x_syncpt_idle(struct host1x_syncpt *sp)
 	return (min == max);
 }
 
-/* Return pointer to struct denoting sync point id. */
-struct host1x_syncpt *host1x_syncpt_get(struct host1x *host, u32 id);
-
 /* Load current value from hardware to the shadow register. */
 u32 host1x_syncpt_load(struct host1x_syncpt *sp);
 
@@ -130,16 +109,9 @@ void host1x_syncpt_restore(struct host1x *host);
 /* Read current wait base value into shadow register and return it. */
 u32 host1x_syncpt_load_wait_base(struct host1x_syncpt *sp);
 
-/* Request incrementing a sync point. */
-int host1x_syncpt_incr(struct host1x_syncpt *sp);
-
 /* Indicate future operations by incrementing the sync point max. */
 u32 host1x_syncpt_incr_max(struct host1x_syncpt *sp, u32 incrs);
 
-/* Wait until sync point reaches a threshold value, or a timeout. */
-int host1x_syncpt_wait(struct host1x_syncpt *sp, u32 thresh,
-			long timeout, u32 *value);
-
 /* Check if sync point id is valid. */
 static inline int host1x_syncpt_is_valid(struct host1x_syncpt *sp)
 {
@@ -149,14 +121,4 @@ static inline int host1x_syncpt_is_valid(struct host1x_syncpt *sp)
 /* Patch a wait by replacing it with a wait for syncpt 0 value 0 */
 int host1x_syncpt_patch_wait(struct host1x_syncpt *sp, void *patch_addr);
 
-/* Return id of the sync point */
-u32 host1x_syncpt_id(struct host1x_syncpt *sp);
-
-/* Allocate a sync point for a device. */
-struct host1x_syncpt *host1x_syncpt_request(struct device *dev,
-					    bool client_managed);
-
-/* Free a sync point. */
-void host1x_syncpt_free(struct host1x_syncpt *sp);
-
 #endif
diff --git a/include/linux/host1x.h b/include/linux/host1x.h
index d429a938ba13..7442f2a57039 100644
--- a/include/linux/host1x.h
+++ b/include/linux/host1x.h
@@ -19,6 +19,9 @@
 #ifndef __LINUX_HOST1X_H
 #define __LINUX_HOST1X_H
 
+#include <linux/kref.h>
+#include <linux/types.h>
+
 enum host1x_class {
 	HOST1X_CLASS_HOST1X = 0x1,
 	HOST1X_CLASS_GR2D = 0x51,
@@ -45,4 +48,186 @@ struct host1x_client {
 	unsigned int num_syncpts;
 };
 
+/*
+ * host1x buffer objects
+ */
+
+struct host1x_bo;
+struct sg_table;
+
+struct host1x_bo_ops {
+	struct host1x_bo *(*get)(struct host1x_bo *bo);
+	void (*put)(struct host1x_bo *bo);
+	dma_addr_t (*pin)(struct host1x_bo *bo, struct sg_table **sgt);
+	void (*unpin)(struct host1x_bo *bo, struct sg_table *sgt);
+	void *(*mmap)(struct host1x_bo *bo);
+	void (*munmap)(struct host1x_bo *bo, void *addr);
+	void *(*kmap)(struct host1x_bo *bo, unsigned int pagenum);
+	void (*kunmap)(struct host1x_bo *bo, unsigned int pagenum, void *addr);
+};
+
+struct host1x_bo {
+	const struct host1x_bo_ops *ops;
+};
+
+static inline void host1x_bo_init(struct host1x_bo *bo,
+				  const struct host1x_bo_ops *ops)
+{
+	bo->ops = ops;
+}
+
+static inline struct host1x_bo *host1x_bo_get(struct host1x_bo *bo)
+{
+	return bo->ops->get(bo);
+}
+
+static inline void host1x_bo_put(struct host1x_bo *bo)
+{
+	bo->ops->put(bo);
+}
+
+static inline dma_addr_t host1x_bo_pin(struct host1x_bo *bo,
+				       struct sg_table **sgt)
+{
+	return bo->ops->pin(bo, sgt);
+}
+
+static inline void host1x_bo_unpin(struct host1x_bo *bo, struct sg_table *sgt)
+{
+	bo->ops->unpin(bo, sgt);
+}
+
+static inline void *host1x_bo_mmap(struct host1x_bo *bo)
+{
+	return bo->ops->mmap(bo);
+}
+
+static inline void host1x_bo_munmap(struct host1x_bo *bo, void *addr)
+{
+	bo->ops->munmap(bo, addr);
+}
+
+static inline void *host1x_bo_kmap(struct host1x_bo *bo, unsigned int pagenum)
+{
+	return bo->ops->kmap(bo, pagenum);
+}
+
+static inline void host1x_bo_kunmap(struct host1x_bo *bo,
+				    unsigned int pagenum, void *addr)
+{
+	bo->ops->kunmap(bo, pagenum, addr);
+}
+
+/*
+ * host1x syncpoints
+ */
+
+struct host1x_syncpt;
+struct host1x;
+
+struct host1x_syncpt *host1x_syncpt_get(struct host1x *host, u32 id);
+u32 host1x_syncpt_id(struct host1x_syncpt *sp);
+u32 host1x_syncpt_read_min(struct host1x_syncpt *sp);
+u32 host1x_syncpt_read_max(struct host1x_syncpt *sp);
+int host1x_syncpt_incr(struct host1x_syncpt *sp);
+int host1x_syncpt_wait(struct host1x_syncpt *sp, u32 thresh, long timeout,
+		       u32 *value);
+struct host1x_syncpt *host1x_syncpt_request(struct device *dev,
+					    bool client_managed);
+void host1x_syncpt_free(struct host1x_syncpt *sp);
+
+/*
+ * host1x channel
+ */
+
+struct host1x_channel;
+struct host1x_job;
+
+struct host1x_channel *host1x_channel_request(struct device *dev);
+void host1x_channel_free(struct host1x_channel *channel);
+struct host1x_channel *host1x_channel_get(struct host1x_channel *channel);
+void host1x_channel_put(struct host1x_channel *channel);
+int host1x_job_submit(struct host1x_job *job);
+
+/*
+ * host1x job
+ */
+
+struct host1x_reloc {
+	struct host1x_bo *cmdbuf;
+	u32 cmdbuf_offset;
+	struct host1x_bo *target;
+	u32 target_offset;
+	u32 shift;
+	u32 pad;
+};
+
+struct host1x_job {
+	/* When refcount goes to zero, job can be freed */
+	struct kref ref;
+
+	/* List entry */
+	struct list_head list;
+
+	/* Channel where job is submitted to */
+	struct host1x_channel *channel;
+
+	u32 client;
+
+	/* Gathers and their memory */
+	struct host1x_job_gather *gathers;
+	unsigned int num_gathers;
+
+	/* Wait checks to be processed at submit time */
+	struct host1x_waitchk *waitchk;
+	unsigned int num_waitchk;
+	u32 waitchk_mask;
+
+	/* Array of handles to be pinned & unpinned */
+	struct host1x_reloc *relocarray;
+	unsigned int num_relocs;
+	struct host1x_job_unpin_data *unpins;
+	unsigned int num_unpins;
+
+	dma_addr_t *addr_phys;
+	dma_addr_t *gather_addr_phys;
+	dma_addr_t *reloc_addr_phys;
+
+	/* Sync point id, number of increments and end related to the submit */
+	u32 syncpt_id;
+	u32 syncpt_incrs;
+	u32 syncpt_end;
+
+	/* Maximum time to wait for this job */
+	unsigned int timeout;
+
+	/* Index and number of slots used in the push buffer */
+	unsigned int first_get;
+	unsigned int num_slots;
+
+	/* Copy of gathers */
+	size_t gather_copy_size;
+	dma_addr_t gather_copy;
+	u8 *gather_copy_mapped;
+
+	/* Check if register is marked as an address reg */
+	int (*is_addr_reg)(struct device *dev, u32 reg, u32 class);
+
+	/* Request a SETCLASS to this class */
+	u32 class;
+
+	/* Add a channel wait for previous ops to complete */
+	bool serialize;
+};
+
+struct host1x_job *host1x_job_alloc(struct host1x_channel *ch,
+				    u32 num_cmdbufs, u32 num_relocs,
+				    u32 num_waitchks);
+void host1x_job_add_gather(struct host1x_job *job, struct host1x_bo *mem_id,
+			   u32 words, u32 offset);
+struct host1x_job *host1x_job_get(struct host1x_job *job);
+void host1x_job_put(struct host1x_job *job);
+int host1x_job_pin(struct host1x_job *job, struct device *dev);
+void host1x_job_unpin(struct host1x_job *job);
+
 #endif
-- 
cgit v1.2.3


From 776dc38403676f499a73d32e2e7c61eb5b42f736 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Mon, 14 Oct 2013 14:43:22 +0200
Subject: drm/tegra: Move subdevice infrastructure to host1x

The Tegra DRM driver currently uses some infrastructure to defer the DRM
core initialization until all required devices have registered. The same
infrastructure can potentially be used by any other driver that requires
more than a single sub-device of the host1x module.

Make the infrastructure more generic and keep only the DRM specific code
in the DRM part of the driver. Eventually this will make it easy to move
the DRM driver part back to the DRM subsystem.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/host1x/Makefile        |   2 +
 drivers/gpu/host1x/bus.c           | 550 +++++++++++++++++++++++++++++++++++++
 drivers/gpu/host1x/bus.h           |  29 ++
 drivers/gpu/host1x/dev.c           |  60 ++--
 drivers/gpu/host1x/dev.h           |   9 +-
 drivers/gpu/host1x/drm/bus.c       |  76 +++++
 drivers/gpu/host1x/drm/dc.c        |  30 +-
 drivers/gpu/host1x/drm/drm.c       | 347 ++++++++---------------
 drivers/gpu/host1x/drm/drm.h       |  31 ++-
 drivers/gpu/host1x/drm/gr2d.c      |  60 ++--
 drivers/gpu/host1x/drm/hdmi.c      |  28 +-
 drivers/gpu/host1x/host1x_client.h |  35 ---
 include/drm/drmP.h                 |   1 +
 include/linux/host1x.h             |  45 ++-
 14 files changed, 918 insertions(+), 385 deletions(-)
 create mode 100644 drivers/gpu/host1x/bus.c
 create mode 100644 drivers/gpu/host1x/bus.h
 create mode 100644 drivers/gpu/host1x/drm/bus.c
 delete mode 100644 drivers/gpu/host1x/host1x_client.h

(limited to 'include/linux')

diff --git a/drivers/gpu/host1x/Makefile b/drivers/gpu/host1x/Makefile
index 3b037b6e0298..7b781920c58a 100644
--- a/drivers/gpu/host1x/Makefile
+++ b/drivers/gpu/host1x/Makefile
@@ -1,6 +1,7 @@
 ccflags-y = -Idrivers/gpu/host1x
 
 host1x-y = \
+	bus.o \
 	syncpt.o \
 	dev.o \
 	intr.o \
@@ -17,4 +18,5 @@ host1x-$(CONFIG_DRM_TEGRA) += drm/drm.o drm/fb.o drm/dc.o
 host1x-$(CONFIG_DRM_TEGRA) += drm/output.o drm/rgb.o drm/hdmi.o
 host1x-$(CONFIG_DRM_TEGRA) += drm/gem.o
 host1x-$(CONFIG_DRM_TEGRA) += drm/gr2d.o
+host1x-$(CONFIG_DRM_TEGRA) += drm/bus.o
 obj-$(CONFIG_TEGRA_HOST1X) += host1x.o
diff --git a/drivers/gpu/host1x/bus.c b/drivers/gpu/host1x/bus.c
new file mode 100644
index 000000000000..509383f8be03
--- /dev/null
+++ b/drivers/gpu/host1x/bus.c
@@ -0,0 +1,550 @@
+/*
+ * Copyright (C) 2012 Avionic Design GmbH
+ * Copyright (C) 2012-2013, NVIDIA Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/host1x.h>
+#include <linux/of.h>
+#include <linux/slab.h>
+
+#include "dev.h"
+
+static DEFINE_MUTEX(clients_lock);
+static LIST_HEAD(clients);
+
+static DEFINE_MUTEX(drivers_lock);
+static LIST_HEAD(drivers);
+
+static DEFINE_MUTEX(devices_lock);
+static LIST_HEAD(devices);
+
+struct host1x_subdev {
+	struct host1x_client *client;
+	struct device_node *np;
+	struct list_head list;
+};
+
+/**
+ * host1x_subdev_add() - add a new subdevice with an associated device node
+ */
+static int host1x_subdev_add(struct host1x_device *device,
+			     struct device_node *np)
+{
+	struct host1x_subdev *subdev;
+
+	subdev = kzalloc(sizeof(*subdev), GFP_KERNEL);
+	if (!subdev)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&subdev->list);
+	subdev->np = of_node_get(np);
+
+	mutex_lock(&device->subdevs_lock);
+	list_add_tail(&subdev->list, &device->subdevs);
+	mutex_unlock(&device->subdevs_lock);
+
+	return 0;
+}
+
+/**
+ * host1x_subdev_del() - remove subdevice
+ */
+static void host1x_subdev_del(struct host1x_subdev *subdev)
+{
+	list_del(&subdev->list);
+	of_node_put(subdev->np);
+	kfree(subdev);
+}
+
+/**
+ * host1x_device_parse_dt() - scan device tree and add matching subdevices
+ */
+static int host1x_device_parse_dt(struct host1x_device *device)
+{
+	struct device_node *np;
+	int err;
+
+	for_each_child_of_node(device->dev.parent->of_node, np) {
+		if (of_match_node(device->driver->subdevs, np) &&
+		    of_device_is_available(np)) {
+			err = host1x_subdev_add(device, np);
+			if (err < 0)
+				return err;
+		}
+	}
+
+	return 0;
+}
+
+static void host1x_subdev_register(struct host1x_device *device,
+				   struct host1x_subdev *subdev,
+				   struct host1x_client *client)
+{
+	int err;
+
+	/*
+	 * Move the subdevice to the list of active (registered) subdevices
+	 * and associate it with a client. At the same time, associate the
+	 * client with its parent device.
+	 */
+	mutex_lock(&device->subdevs_lock);
+	mutex_lock(&device->clients_lock);
+	list_move_tail(&client->list, &device->clients);
+	list_move_tail(&subdev->list, &device->active);
+	client->parent = &device->dev;
+	subdev->client = client;
+	mutex_unlock(&device->clients_lock);
+	mutex_unlock(&device->subdevs_lock);
+
+	/*
+	 * When all subdevices have been registered, the composite device is
+	 * ready to be probed.
+	 */
+	if (list_empty(&device->subdevs)) {
+		err = device->driver->probe(device);
+		if (err < 0)
+			dev_err(&device->dev, "probe failed: %d\n", err);
+	}
+}
+
+static void __host1x_subdev_unregister(struct host1x_device *device,
+				       struct host1x_subdev *subdev)
+{
+	struct host1x_client *client = subdev->client;
+	int err;
+
+	/*
+	 * If all subdevices have been activated, we're about to remove the
+	 * first active subdevice, so unload the driver first.
+	 */
+	if (list_empty(&device->subdevs)) {
+		err = device->driver->remove(device);
+		if (err < 0)
+			dev_err(&device->dev, "remove failed: %d\n", err);
+	}
+
+	/*
+	 * Move the subdevice back to the list of idle subdevices and remove
+	 * it from list of clients.
+	 */
+	mutex_lock(&device->clients_lock);
+	subdev->client = NULL;
+	client->parent = NULL;
+	list_move_tail(&subdev->list, &device->subdevs);
+	/*
+	 * XXX: Perhaps don't do this here, but rather explicitly remove it
+	 * when the device is about to be deleted.
+	 *
+	 * This is somewhat complicated by the fact that this function is
+	 * used to remove the subdevice when a client is unregistered but
+	 * also when the composite device is about to be removed.
+	 */
+	list_del_init(&client->list);
+	mutex_unlock(&device->clients_lock);
+}
+
+static void host1x_subdev_unregister(struct host1x_device *device,
+				     struct host1x_subdev *subdev)
+{
+	mutex_lock(&device->subdevs_lock);
+	__host1x_subdev_unregister(device, subdev);
+	mutex_unlock(&device->subdevs_lock);
+}
+
+int host1x_device_init(struct host1x_device *device)
+{
+	struct host1x_client *client;
+	int err;
+
+	mutex_lock(&device->clients_lock);
+
+	list_for_each_entry(client, &device->clients, list) {
+		if (client->ops && client->ops->init) {
+			err = client->ops->init(client);
+			if (err < 0) {
+				dev_err(&device->dev,
+					"failed to initialize %s: %d\n",
+					dev_name(client->dev), err);
+				mutex_unlock(&device->clients_lock);
+				return err;
+			}
+		}
+	}
+
+	mutex_unlock(&device->clients_lock);
+
+	return 0;
+}
+
+int host1x_device_exit(struct host1x_device *device)
+{
+	struct host1x_client *client;
+	int err;
+
+	mutex_lock(&device->clients_lock);
+
+	list_for_each_entry_reverse(client, &device->clients, list) {
+		if (client->ops && client->ops->exit) {
+			err = client->ops->exit(client);
+			if (err < 0) {
+				dev_err(&device->dev,
+					"failed to cleanup %s: %d\n",
+					dev_name(client->dev), err);
+				mutex_unlock(&device->clients_lock);
+				return err;
+			}
+		}
+	}
+
+	mutex_unlock(&device->clients_lock);
+
+	return 0;
+}
+
+static int host1x_register_client(struct host1x *host1x,
+				  struct host1x_client *client)
+{
+	struct host1x_device *device;
+	struct host1x_subdev *subdev;
+
+	mutex_lock(&host1x->devices_lock);
+
+	list_for_each_entry(device, &host1x->devices, list) {
+		list_for_each_entry(subdev, &device->subdevs, list) {
+			if (subdev->np == client->dev->of_node) {
+				host1x_subdev_register(device, subdev, client);
+				mutex_unlock(&host1x->devices_lock);
+				return 0;
+			}
+		}
+	}
+
+	mutex_unlock(&host1x->devices_lock);
+	return -ENODEV;
+}
+
+static int host1x_unregister_client(struct host1x *host1x,
+				    struct host1x_client *client)
+{
+	struct host1x_device *device, *dt;
+	struct host1x_subdev *subdev;
+
+	mutex_lock(&host1x->devices_lock);
+
+	list_for_each_entry_safe(device, dt, &host1x->devices, list) {
+		list_for_each_entry(subdev, &device->active, list) {
+			if (subdev->client == client) {
+				host1x_subdev_unregister(device, subdev);
+				mutex_unlock(&host1x->devices_lock);
+				return 0;
+			}
+		}
+	}
+
+	mutex_unlock(&host1x->devices_lock);
+	return -ENODEV;
+}
+
+struct bus_type host1x_bus_type = {
+	.name = "host1x",
+};
+
+int host1x_bus_init(void)
+{
+	return bus_register(&host1x_bus_type);
+}
+
+void host1x_bus_exit(void)
+{
+	bus_unregister(&host1x_bus_type);
+}
+
+static void host1x_device_release(struct device *dev)
+{
+	struct host1x_device *device = to_host1x_device(dev);
+
+	kfree(device);
+}
+
+static int host1x_device_add(struct host1x *host1x,
+			     struct host1x_driver *driver)
+{
+	struct host1x_client *client, *tmp;
+	struct host1x_subdev *subdev;
+	struct host1x_device *device;
+	int err;
+
+	device = kzalloc(sizeof(*device), GFP_KERNEL);
+	if (!device)
+		return -ENOMEM;
+
+	mutex_init(&device->subdevs_lock);
+	INIT_LIST_HEAD(&device->subdevs);
+	INIT_LIST_HEAD(&device->active);
+	mutex_init(&device->clients_lock);
+	INIT_LIST_HEAD(&device->clients);
+	INIT_LIST_HEAD(&device->list);
+	device->driver = driver;
+
+	device->dev.coherent_dma_mask = host1x->dev->coherent_dma_mask;
+	device->dev.dma_mask = &device->dev.coherent_dma_mask;
+	device->dev.release = host1x_device_release;
+	dev_set_name(&device->dev, driver->name);
+	device->dev.bus = &host1x_bus_type;
+	device->dev.parent = host1x->dev;
+
+	err = device_register(&device->dev);
+	if (err < 0)
+		return err;
+
+	err = host1x_device_parse_dt(device);
+	if (err < 0) {
+		device_unregister(&device->dev);
+		return err;
+	}
+
+	mutex_lock(&host1x->devices_lock);
+	list_add_tail(&device->list, &host1x->devices);
+	mutex_unlock(&host1x->devices_lock);
+
+	mutex_lock(&clients_lock);
+
+	list_for_each_entry_safe(client, tmp, &clients, list) {
+		list_for_each_entry(subdev, &device->subdevs, list) {
+			if (subdev->np == client->dev->of_node) {
+				host1x_subdev_register(device, subdev, client);
+				break;
+			}
+		}
+	}
+
+	mutex_unlock(&clients_lock);
+
+	return 0;
+}
+
+/*
+ * Removes a device by first unregistering any subdevices and then removing
+ * itself from the list of devices.
+ *
+ * This function must be called with the host1x->devices_lock held.
+ */
+static void host1x_device_del(struct host1x *host1x,
+			      struct host1x_device *device)
+{
+	struct host1x_subdev *subdev, *sd;
+	struct host1x_client *client, *cl;
+
+	mutex_lock(&device->subdevs_lock);
+
+	/* unregister subdevices */
+	list_for_each_entry_safe(subdev, sd, &device->active, list) {
+		/*
+		 * host1x_subdev_unregister() will remove the client from
+		 * any lists, so we'll need to manually add it back to the
+		 * list of idle clients.
+		 *
+		 * XXX: Alternatively, perhaps don't remove the client from
+		 * any lists in host1x_subdev_unregister() and instead do
+		 * that explicitly from host1x_unregister_client()?
+		 */
+		client = subdev->client;
+
+		__host1x_subdev_unregister(device, subdev);
+
+		/* add the client to the list of idle clients */
+		mutex_lock(&clients_lock);
+		list_add_tail(&client->list, &clients);
+		mutex_unlock(&clients_lock);
+	}
+
+	/* remove subdevices */
+	list_for_each_entry_safe(subdev, sd, &device->subdevs, list)
+		host1x_subdev_del(subdev);
+
+	mutex_unlock(&device->subdevs_lock);
+
+	/* move clients to idle list */
+	mutex_lock(&clients_lock);
+	mutex_lock(&device->clients_lock);
+
+	list_for_each_entry_safe(client, cl, &device->clients, list)
+		list_move_tail(&client->list, &clients);
+
+	mutex_unlock(&device->clients_lock);
+	mutex_unlock(&clients_lock);
+
+	/* finally remove the device */
+	list_del_init(&device->list);
+	device_unregister(&device->dev);
+}
+
+static void host1x_attach_driver(struct host1x *host1x,
+				 struct host1x_driver *driver)
+{
+	struct host1x_device *device;
+	int err;
+
+	mutex_lock(&host1x->devices_lock);
+
+	list_for_each_entry(device, &host1x->devices, list) {
+		if (device->driver == driver) {
+			mutex_unlock(&host1x->devices_lock);
+			return;
+		}
+	}
+
+	mutex_unlock(&host1x->devices_lock);
+
+	err = host1x_device_add(host1x, driver);
+	if (err < 0)
+		dev_err(host1x->dev, "failed to allocate device: %d\n", err);
+}
+
+static void host1x_detach_driver(struct host1x *host1x,
+				 struct host1x_driver *driver)
+{
+	struct host1x_device *device, *tmp;
+
+	mutex_lock(&host1x->devices_lock);
+
+	list_for_each_entry_safe(device, tmp, &host1x->devices, list)
+		if (device->driver == driver)
+			host1x_device_del(host1x, device);
+
+	mutex_unlock(&host1x->devices_lock);
+}
+
+int host1x_register(struct host1x *host1x)
+{
+	struct host1x_driver *driver;
+
+	mutex_lock(&devices_lock);
+	list_add_tail(&host1x->list, &devices);
+	mutex_unlock(&devices_lock);
+
+	mutex_lock(&drivers_lock);
+
+	list_for_each_entry(driver, &drivers, list)
+		host1x_attach_driver(host1x, driver);
+
+	mutex_unlock(&drivers_lock);
+
+	return 0;
+}
+
+int host1x_unregister(struct host1x *host1x)
+{
+	struct host1x_driver *driver;
+
+	mutex_lock(&drivers_lock);
+
+	list_for_each_entry(driver, &drivers, list)
+		host1x_detach_driver(host1x, driver);
+
+	mutex_unlock(&drivers_lock);
+
+	mutex_lock(&devices_lock);
+	list_del_init(&host1x->list);
+	mutex_unlock(&devices_lock);
+
+	return 0;
+}
+
+int host1x_driver_register(struct host1x_driver *driver)
+{
+	struct host1x *host1x;
+
+	INIT_LIST_HEAD(&driver->list);
+
+	mutex_lock(&drivers_lock);
+	list_add_tail(&driver->list, &drivers);
+	mutex_unlock(&drivers_lock);
+
+	mutex_lock(&devices_lock);
+
+	list_for_each_entry(host1x, &devices, list)
+		host1x_attach_driver(host1x, driver);
+
+	mutex_unlock(&devices_lock);
+
+	return 0;
+}
+EXPORT_SYMBOL(host1x_driver_register);
+
+void host1x_driver_unregister(struct host1x_driver *driver)
+{
+	mutex_lock(&drivers_lock);
+	list_del_init(&driver->list);
+	mutex_unlock(&drivers_lock);
+}
+EXPORT_SYMBOL(host1x_driver_unregister);
+
+int host1x_client_register(struct host1x_client *client)
+{
+	struct host1x *host1x;
+	int err;
+
+	mutex_lock(&devices_lock);
+
+	list_for_each_entry(host1x, &devices, list) {
+		err = host1x_register_client(host1x, client);
+		if (!err) {
+			mutex_unlock(&devices_lock);
+			return 0;
+		}
+	}
+
+	mutex_unlock(&devices_lock);
+
+	mutex_lock(&clients_lock);
+	list_add_tail(&client->list, &clients);
+	mutex_unlock(&clients_lock);
+
+	return 0;
+}
+EXPORT_SYMBOL(host1x_client_register);
+
+int host1x_client_unregister(struct host1x_client *client)
+{
+	struct host1x_client *c;
+	struct host1x *host1x;
+	int err;
+
+	mutex_lock(&devices_lock);
+
+	list_for_each_entry(host1x, &devices, list) {
+		err = host1x_unregister_client(host1x, client);
+		if (!err) {
+			mutex_unlock(&devices_lock);
+			return 0;
+		}
+	}
+
+	mutex_unlock(&devices_lock);
+	mutex_lock(&clients_lock);
+
+	list_for_each_entry(c, &clients, list) {
+		if (c == client) {
+			list_del_init(&c->list);
+			break;
+		}
+	}
+
+	mutex_unlock(&clients_lock);
+
+	return 0;
+}
+EXPORT_SYMBOL(host1x_client_unregister);
diff --git a/drivers/gpu/host1x/bus.h b/drivers/gpu/host1x/bus.h
new file mode 100644
index 000000000000..4099e99212c8
--- /dev/null
+++ b/drivers/gpu/host1x/bus.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2012 Avionic Design GmbH
+ * Copyright (C) 2012-2013, NVIDIA Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HOST1X_BUS_H
+#define HOST1X_BUS_H
+
+struct host1x;
+
+int host1x_bus_init(void);
+void host1x_bus_exit(void);
+
+int host1x_register(struct host1x *host1x);
+int host1x_unregister(struct host1x *host1x);
+
+#endif
diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
index 105aa4ed665a..de0fd552710d 100644
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c
@@ -27,24 +27,12 @@
 #define CREATE_TRACE_POINTS
 #include <trace/events/host1x.h>
 
+#include "bus.h"
 #include "dev.h"
 #include "intr.h"
 #include "channel.h"
 #include "debug.h"
 #include "hw/host1x01.h"
-#include "host1x_client.h"
-
-void host1x_set_drm_data(struct device *dev, void *data)
-{
-	struct host1x *host1x = dev_get_drvdata(dev);
-	host1x->drm_data = data;
-}
-
-void *host1x_get_drm_data(struct device *dev)
-{
-	struct host1x *host1x = dev_get_drvdata(dev);
-	return host1x ? host1x->drm_data : NULL;
-}
 
 void host1x_sync_writel(struct host1x *host1x, u32 v, u32 r)
 {
@@ -114,6 +102,9 @@ static int host1x_probe(struct platform_device *pdev)
 	if (!host)
 		return -ENOMEM;
 
+	mutex_init(&host->devices_lock);
+	INIT_LIST_HEAD(&host->devices);
+	INIT_LIST_HEAD(&host->list);
 	host->dev = &pdev->dev;
 	host->info = id->data;
 
@@ -163,10 +154,14 @@ static int host1x_probe(struct platform_device *pdev)
 
 	host1x_debug_init(host);
 
-	tegra_drm_alloc(pdev);
+	err = host1x_register(host);
+	if (err < 0)
+		goto fail_deinit_intr;
 
 	return 0;
 
+fail_deinit_intr:
+	host1x_intr_deinit(host);
 fail_deinit_syncpt:
 	host1x_syncpt_deinit(host);
 	return err;
@@ -176,6 +171,7 @@ static int host1x_remove(struct platform_device *pdev)
 {
 	struct host1x *host = platform_get_drvdata(pdev);
 
+	host1x_unregister(host);
 	host1x_intr_deinit(host);
 	host1x_syncpt_deinit(host);
 	clk_disable_unprepare(host->clk);
@@ -196,46 +192,24 @@ static int __init tegra_host1x_init(void)
 {
 	int err;
 
-	err = platform_driver_register(&tegra_host1x_driver);
+	err = host1x_bus_init();
 	if (err < 0)
 		return err;
 
-#ifdef CONFIG_DRM_TEGRA
-	err = platform_driver_register(&tegra_dc_driver);
-	if (err < 0)
-		goto unregister_host1x;
-
-	err = platform_driver_register(&tegra_hdmi_driver);
-	if (err < 0)
-		goto unregister_dc;
-
-	err = platform_driver_register(&tegra_gr2d_driver);
-	if (err < 0)
-		goto unregister_hdmi;
-#endif
+	err = platform_driver_register(&tegra_host1x_driver);
+	if (err < 0) {
+		host1x_bus_exit();
+		return err;
+	}
 
 	return 0;
-
-#ifdef CONFIG_DRM_TEGRA
-unregister_hdmi:
-	platform_driver_unregister(&tegra_hdmi_driver);
-unregister_dc:
-	platform_driver_unregister(&tegra_dc_driver);
-unregister_host1x:
-	platform_driver_unregister(&tegra_host1x_driver);
-	return err;
-#endif
 }
 module_init(tegra_host1x_init);
 
 static void __exit tegra_host1x_exit(void)
 {
-#ifdef CONFIG_DRM_TEGRA
-	platform_driver_unregister(&tegra_gr2d_driver);
-	platform_driver_unregister(&tegra_hdmi_driver);
-	platform_driver_unregister(&tegra_dc_driver);
-#endif
 	platform_driver_unregister(&tegra_host1x_driver);
+	host1x_bus_exit();
 }
 module_exit(tegra_host1x_exit);
 
diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h
index bed90a8131be..6cf689b9e17b 100644
--- a/drivers/gpu/host1x/dev.h
+++ b/drivers/gpu/host1x/dev.h
@@ -125,7 +125,10 @@ struct host1x {
 
 	struct dentry *debugfs;
 
-	void *drm_data;
+	struct mutex devices_lock;
+	struct list_head devices;
+
+	struct list_head list;
 };
 
 void host1x_sync_writel(struct host1x *host1x, u32 r, u32 v);
@@ -301,8 +304,4 @@ static inline void host1x_hw_show_mlocks(struct host1x *host, struct output *o)
 	host->debug_op->show_mlocks(host, o);
 }
 
-extern struct platform_driver tegra_dc_driver;
-extern struct platform_driver tegra_hdmi_driver;
-extern struct platform_driver tegra_gr2d_driver;
-
 #endif
diff --git a/drivers/gpu/host1x/drm/bus.c b/drivers/gpu/host1x/drm/bus.c
new file mode 100644
index 000000000000..565f8f7b9a47
--- /dev/null
+++ b/drivers/gpu/host1x/drm/bus.c
@@ -0,0 +1,76 @@
+/*
+ * Copyright (C) 2013 NVIDIA Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "drm.h"
+
+static int drm_host1x_set_busid(struct drm_device *dev,
+				struct drm_master *master)
+{
+	const char *device = dev_name(dev->dev);
+	const char *driver = dev->driver->name;
+	const char *bus = dev->dev->bus->name;
+	int length;
+
+	master->unique_len = strlen(bus) + 1 + strlen(device);
+	master->unique_size = master->unique_len;
+
+	master->unique = kmalloc(master->unique_len + 1, GFP_KERNEL);
+	if (!master->unique)
+		return -ENOMEM;
+
+	snprintf(master->unique, master->unique_len + 1, "%s:%s", bus, device);
+
+	length = strlen(driver) + 1 + master->unique_len;
+
+	dev->devname = kmalloc(length + 1, GFP_KERNEL);
+	if (!dev->devname)
+		return -ENOMEM;
+
+	snprintf(dev->devname, length + 1, "%s@%s", driver, master->unique);
+
+	return 0;
+}
+
+static struct drm_bus drm_host1x_bus = {
+	.bus_type = DRIVER_BUS_HOST1X,
+	.set_busid = drm_host1x_set_busid,
+};
+
+int drm_host1x_init(struct drm_driver *driver, struct host1x_device *device)
+{
+	struct drm_device *drm;
+	int ret;
+
+	INIT_LIST_HEAD(&driver->device_list);
+	driver->bus = &drm_host1x_bus;
+
+	drm = drm_dev_alloc(driver, &device->dev);
+	if (!drm)
+		return -ENOMEM;
+
+	ret = drm_dev_register(drm, 0);
+	if (ret)
+		goto err_free;
+
+	DRM_INFO("Initialized %s %d.%d.%d %s on minor %d\n", driver->name,
+		 driver->major, driver->minor, driver->patchlevel,
+		 driver->date, drm->primary->index);
+
+	return 0;
+
+err_free:
+	drm_dev_free(drm);
+	return ret;
+}
+
+void drm_host1x_exit(struct drm_driver *driver, struct host1x_device *device)
+{
+	struct tegra_drm *tegra = dev_get_drvdata(&device->dev);
+
+	drm_put_dev(tegra->drm);
+}
diff --git a/drivers/gpu/host1x/drm/dc.c b/drivers/gpu/host1x/drm/dc.c
index 5106df08f046..588d4ba0d8cf 100644
--- a/drivers/gpu/host1x/drm/dc.c
+++ b/drivers/gpu/host1x/drm/dc.c
@@ -11,7 +11,6 @@
 #include <linux/clk/tegra.h>
 #include <linux/debugfs.h>
 
-#include "host1x_client.h"
 #include "dc.h"
 #include "drm.h"
 #include "gem.h"
@@ -1040,28 +1039,28 @@ static int tegra_dc_debugfs_exit(struct tegra_dc *dc)
 
 static int tegra_dc_init(struct host1x_client *client)
 {
-	struct tegra_drm_client *drm = to_tegra_drm_client(client);
-	struct tegra_dc *dc = tegra_drm_client_to_dc(drm);
+	struct tegra_drm *tegra = dev_get_drvdata(client->parent);
+	struct tegra_dc *dc = host1x_client_to_dc(client);
 	int err;
 
-	dc->pipe = drm->drm->mode_config.num_crtc;
+	dc->pipe = tegra->drm->mode_config.num_crtc;
 
-	drm_crtc_init(drm->drm, &dc->base, &tegra_crtc_funcs);
+	drm_crtc_init(tegra->drm, &dc->base, &tegra_crtc_funcs);
 	drm_mode_crtc_set_gamma_size(&dc->base, 256);
 	drm_crtc_helper_add(&dc->base, &tegra_crtc_helper_funcs);
 
-	err = tegra_dc_rgb_init(drm->drm, dc);
+	err = tegra_dc_rgb_init(tegra->drm, dc);
 	if (err < 0 && err != -ENODEV) {
 		dev_err(dc->dev, "failed to initialize RGB output: %d\n", err);
 		return err;
 	}
 
-	err = tegra_dc_add_planes(drm->drm, dc);
+	err = tegra_dc_add_planes(tegra->drm, dc);
 	if (err < 0)
 		return err;
 
 	if (IS_ENABLED(CONFIG_DEBUG_FS)) {
-		err = tegra_dc_debugfs_init(dc, drm->drm->primary);
+		err = tegra_dc_debugfs_init(dc, tegra->drm->primary);
 		if (err < 0)
 			dev_err(dc->dev, "debugfs setup failed: %d\n", err);
 	}
@@ -1079,8 +1078,7 @@ static int tegra_dc_init(struct host1x_client *client)
 
 static int tegra_dc_exit(struct host1x_client *client)
 {
-	struct tegra_drm_client *drm = to_tegra_drm_client(client);
-	struct tegra_dc *dc = tegra_drm_client_to_dc(drm);
+	struct tegra_dc *dc = host1x_client_to_dc(client);
 	int err;
 
 	devm_free_irq(dc->dev, dc->irq, dc);
@@ -1107,7 +1105,6 @@ static const struct host1x_client_ops dc_client_ops = {
 
 static int tegra_dc_probe(struct platform_device *pdev)
 {
-	struct tegra_drm *tegra = host1x_get_drm_data(pdev->dev.parent);
 	struct resource *regs;
 	struct tegra_dc *dc;
 	int err;
@@ -1141,9 +1138,9 @@ static int tegra_dc_probe(struct platform_device *pdev)
 		return -ENXIO;
 	}
 
-	INIT_LIST_HEAD(&dc->client.base.list);
-	dc->client.base.ops = &dc_client_ops;
-	dc->client.base.dev = &pdev->dev;
+	INIT_LIST_HEAD(&dc->client.list);
+	dc->client.ops = &dc_client_ops;
+	dc->client.dev = &pdev->dev;
 
 	err = tegra_dc_rgb_probe(dc);
 	if (err < 0 && err != -ENODEV) {
@@ -1151,7 +1148,7 @@ static int tegra_dc_probe(struct platform_device *pdev)
 		return err;
 	}
 
-	err = host1x_register_client(tegra, &dc->client.base);
+	err = host1x_client_register(&dc->client);
 	if (err < 0) {
 		dev_err(&pdev->dev, "failed to register host1x client: %d\n",
 			err);
@@ -1165,11 +1162,10 @@ static int tegra_dc_probe(struct platform_device *pdev)
 
 static int tegra_dc_remove(struct platform_device *pdev)
 {
-	struct tegra_drm *tegra = host1x_get_drm_data(pdev->dev.parent);
 	struct tegra_dc *dc = platform_get_drvdata(pdev);
 	int err;
 
-	err = host1x_unregister_client(tegra, &dc->client.base);
+	err = host1x_client_unregister(&dc->client);
 	if (err < 0) {
 		dev_err(&pdev->dev, "failed to unregister host1x client: %d\n",
 			err);
diff --git a/drivers/gpu/host1x/drm/drm.c b/drivers/gpu/host1x/drm/drm.c
index 1abcdbc7f4cd..c2db409bbd63 100644
--- a/drivers/gpu/host1x/drm/drm.c
+++ b/drivers/gpu/host1x/drm/drm.c
@@ -7,7 +7,8 @@
  * published by the Free Software Foundation.
  */
 
-#include "host1x_client.h"
+#include <linux/host1x.h>
+
 #include "drm.h"
 #include "gem.h"
 
@@ -22,239 +23,25 @@ struct tegra_drm_file {
 	struct list_head contexts;
 };
 
-struct host1x_subdev {
-	struct host1x_client *client;
-	struct device_node *np;
-	struct list_head list;
-};
-
-static int host1x_subdev_add(struct tegra_drm *tegra, struct device_node *np)
-{
-	struct host1x_subdev *subdev;
-
-	subdev = kzalloc(sizeof(*subdev), GFP_KERNEL);
-	if (!subdev)
-		return -ENOMEM;
-
-	INIT_LIST_HEAD(&subdev->list);
-	subdev->np = of_node_get(np);
-
-	list_add_tail(&subdev->list, &tegra->subdevs);
-
-	return 0;
-}
-
-static int host1x_subdev_register(struct tegra_drm *tegra,
-				  struct host1x_subdev *subdev,
-				  struct host1x_client *client)
-{
-	mutex_lock(&tegra->subdevs_lock);
-	list_del_init(&subdev->list);
-	list_add_tail(&subdev->list, &tegra->active);
-	subdev->client = client;
-	mutex_unlock(&tegra->subdevs_lock);
-
-	return 0;
-}
-
-static int host1x_subdev_unregister(struct tegra_drm *tegra,
-				    struct host1x_subdev *subdev)
-{
-	mutex_lock(&tegra->subdevs_lock);
-	list_del_init(&subdev->list);
-	mutex_unlock(&tegra->subdevs_lock);
-
-	of_node_put(subdev->np);
-	kfree(subdev);
-
-	return 0;
-}
-
-static int tegra_parse_dt(struct tegra_drm *tegra)
-{
-	static const char * const compat[] = {
-		"nvidia,tegra20-dc",
-		"nvidia,tegra20-hdmi",
-		"nvidia,tegra20-gr2d",
-		"nvidia,tegra30-dc",
-		"nvidia,tegra30-hdmi",
-		"nvidia,tegra30-gr2d",
-	};
-	unsigned int i;
-	int err;
-
-	for (i = 0; i < ARRAY_SIZE(compat); i++) {
-		struct device_node *np;
-
-		for_each_child_of_node(tegra->dev->of_node, np) {
-			if (of_device_is_compatible(np, compat[i]) &&
-			    of_device_is_available(np)) {
-				err = host1x_subdev_add(tegra, np);
-				if (err < 0)
-					return err;
-			}
-		}
-	}
-
-	return 0;
-}
-
-int tegra_drm_alloc(struct platform_device *pdev)
+static int tegra_drm_load(struct drm_device *drm, unsigned long flags)
 {
+	struct host1x_device *device = to_host1x_device(drm->dev);
 	struct tegra_drm *tegra;
 	int err;
 
-	tegra = devm_kzalloc(&pdev->dev, sizeof(*tegra), GFP_KERNEL);
+	tegra = kzalloc(sizeof(*tegra), GFP_KERNEL);
 	if (!tegra)
 		return -ENOMEM;
 
-	mutex_init(&tegra->subdevs_lock);
-	INIT_LIST_HEAD(&tegra->subdevs);
-	INIT_LIST_HEAD(&tegra->active);
+	dev_set_drvdata(drm->dev, tegra);
 	mutex_init(&tegra->clients_lock);
 	INIT_LIST_HEAD(&tegra->clients);
-	tegra->dev = &pdev->dev;
-
-	err = tegra_parse_dt(tegra);
-	if (err < 0) {
-		dev_err(&pdev->dev, "failed to parse DT: %d\n", err);
-		return err;
-	}
-
-	host1x_set_drm_data(&pdev->dev, tegra);
-
-	return 0;
-}
-
-int tegra_drm_init(struct tegra_drm *tegra, struct drm_device *drm)
-{
-	struct host1x_client *client;
-	int err;
-
-	mutex_lock(&tegra->clients_lock);
-
-	list_for_each_entry(client, &tegra->clients, list) {
-		struct tegra_drm_client *tdc = to_tegra_drm_client(client);
-
-		/* associate client with DRM device */
-		tdc->drm = drm;
-
-		if (client->ops && client->ops->init) {
-			err = client->ops->init(client);
-			if (err < 0) {
-				dev_err(tegra->dev,
-					"DRM setup failed for %s: %d\n",
-					dev_name(client->dev), err);
-				mutex_unlock(&tegra->clients_lock);
-				return err;
-			}
-		}
-	}
-
-	mutex_unlock(&tegra->clients_lock);
-
-	return 0;
-}
-
-int tegra_drm_exit(struct tegra_drm *tegra)
-{
-	struct host1x_client *client;
-	struct platform_device *pdev;
-	int err;
-
-	if (!tegra->drm)
-		return 0;
-
-	mutex_lock(&tegra->clients_lock);
-
-	list_for_each_entry_reverse(client, &tegra->clients, list) {
-		if (client->ops && client->ops->exit) {
-			err = client->ops->exit(client);
-			if (err < 0) {
-				dev_err(tegra->dev,
-					"DRM cleanup failed for %s: %d\n",
-					dev_name(client->dev), err);
-				mutex_unlock(&tegra->clients_lock);
-				return err;
-			}
-		}
-	}
-
-	mutex_unlock(&tegra->clients_lock);
-
-	pdev = to_platform_device(tegra->dev);
-	drm_platform_exit(&tegra_drm_driver, pdev);
-	tegra->drm = NULL;
-
-	return 0;
-}
-
-int host1x_register_client(struct tegra_drm *tegra,
-			   struct host1x_client *client)
-{
-	struct host1x_subdev *subdev, *tmp;
-	int err;
-
-	mutex_lock(&tegra->clients_lock);
-	list_add_tail(&client->list, &tegra->clients);
-	mutex_unlock(&tegra->clients_lock);
-
-	list_for_each_entry_safe(subdev, tmp, &tegra->subdevs, list)
-		if (subdev->np == client->dev->of_node)
-			host1x_subdev_register(tegra, subdev, client);
-
-	if (list_empty(&tegra->subdevs)) {
-		struct platform_device *pdev = to_platform_device(tegra->dev);
-
-		err = drm_platform_init(&tegra_drm_driver, pdev);
-		if (err < 0) {
-			dev_err(tegra->dev, "drm_platform_init(): %d\n", err);
-			return err;
-		}
-	}
-
-	return 0;
-}
-
-int host1x_unregister_client(struct tegra_drm *tegra,
-			     struct host1x_client *client)
-{
-	struct host1x_subdev *subdev, *tmp;
-	int err;
-
-	list_for_each_entry_safe(subdev, tmp, &tegra->active, list) {
-		if (subdev->client == client) {
-			err = tegra_drm_exit(tegra);
-			if (err < 0) {
-				dev_err(tegra->dev, "tegra_drm_exit(): %d\n",
-					err);
-				return err;
-			}
-
-			host1x_subdev_unregister(tegra, subdev);
-			break;
-		}
-	}
-
-	mutex_lock(&tegra->clients_lock);
-	list_del_init(&client->list);
-	mutex_unlock(&tegra->clients_lock);
-
-	return 0;
-}
-
-static int tegra_drm_load(struct drm_device *drm, unsigned long flags)
-{
-	struct tegra_drm *tegra;
-	int err;
-
-	tegra = host1x_get_drm_data(drm->dev);
 	drm->dev_private = tegra;
 	tegra->drm = drm;
 
 	drm_mode_config_init(drm);
 
-	err = tegra_drm_init(tegra, drm);
+	err = host1x_device_init(device);
 	if (err < 0)
 		return err;
 
@@ -280,9 +67,16 @@ static int tegra_drm_load(struct drm_device *drm, unsigned long flags)
 
 static int tegra_drm_unload(struct drm_device *drm)
 {
+	struct host1x_device *device = to_host1x_device(drm->dev);
+	int err;
+
 	drm_kms_helper_poll_fini(drm);
 	tegra_drm_fb_exit(drm);
 
+	err = host1x_device_exit(device);
+	if (err < 0)
+		return err;
+
 	drm_mode_config_cleanup(drm);
 
 	return 0;
@@ -370,10 +164,11 @@ static int tegra_gem_mmap(struct drm_device *drm, void *data,
 static int tegra_syncpt_read(struct drm_device *drm, void *data,
 			     struct drm_file *file)
 {
+	struct host1x *host = dev_get_drvdata(drm->dev->parent);
 	struct drm_tegra_syncpt_read *args = data;
-	struct host1x *host = dev_get_drvdata(drm->dev);
-	struct host1x_syncpt *sp = host1x_syncpt_get(host, args->id);
+	struct host1x_syncpt *sp;
 
+	sp = host1x_syncpt_get(host, args->id);
 	if (!sp)
 		return -EINVAL;
 
@@ -384,10 +179,11 @@ static int tegra_syncpt_read(struct drm_device *drm, void *data,
 static int tegra_syncpt_incr(struct drm_device *drm, void *data,
 			     struct drm_file *file)
 {
+	struct host1x *host1x = dev_get_drvdata(drm->dev->parent);
 	struct drm_tegra_syncpt_incr *args = data;
-	struct host1x *host = dev_get_drvdata(drm->dev);
-	struct host1x_syncpt *sp = host1x_syncpt_get(host, args->id);
+	struct host1x_syncpt *sp;
 
+	sp = host1x_syncpt_get(host1x, args->id);
 	if (!sp)
 		return -EINVAL;
 
@@ -397,10 +193,11 @@ static int tegra_syncpt_incr(struct drm_device *drm, void *data,
 static int tegra_syncpt_wait(struct drm_device *drm, void *data,
 			     struct drm_file *file)
 {
+	struct host1x *host1x = dev_get_drvdata(drm->dev->parent);
 	struct drm_tegra_syncpt_wait *args = data;
-	struct host1x *host = dev_get_drvdata(drm->dev);
-	struct host1x_syncpt *sp = host1x_syncpt_get(host, args->id);
+	struct host1x_syncpt *sp;
 
+	sp = host1x_syncpt_get(host1x, args->id);
 	if (!sp)
 		return -EINVAL;
 
@@ -422,7 +219,7 @@ static int tegra_open_channel(struct drm_device *drm, void *data,
 	if (!context)
 		return -ENOMEM;
 
-	list_for_each_entry(client, &tegra->clients, base.list)
+	list_for_each_entry(client, &tegra->clients, list)
 		if (client->base.class == args->client) {
 			err = client->ops->open_channel(client, context);
 			if (err)
@@ -441,8 +238,8 @@ static int tegra_open_channel(struct drm_device *drm, void *data,
 static int tegra_close_channel(struct drm_device *drm, void *data,
 			       struct drm_file *file)
 {
-	struct drm_tegra_close_channel *args = data;
 	struct tegra_drm_file *fpriv = file->driver_priv;
+	struct drm_tegra_close_channel *args = data;
 	struct tegra_drm_context *context;
 
 	context = tegra_drm_get_context(args->context);
@@ -652,3 +449,97 @@ struct drm_driver tegra_drm_driver = {
 	.minor = DRIVER_MINOR,
 	.patchlevel = DRIVER_PATCHLEVEL,
 };
+
+int tegra_drm_register_client(struct tegra_drm *tegra,
+			      struct tegra_drm_client *client)
+{
+	mutex_lock(&tegra->clients_lock);
+	list_add_tail(&client->list, &tegra->clients);
+	mutex_unlock(&tegra->clients_lock);
+
+	return 0;
+}
+
+int tegra_drm_unregister_client(struct tegra_drm *tegra,
+				struct tegra_drm_client *client)
+{
+	mutex_lock(&tegra->clients_lock);
+	list_del_init(&client->list);
+	mutex_unlock(&tegra->clients_lock);
+
+	return 0;
+}
+
+static int host1x_drm_probe(struct host1x_device *device)
+{
+	return drm_host1x_init(&tegra_drm_driver, device);
+}
+
+static int host1x_drm_remove(struct host1x_device *device)
+{
+	drm_host1x_exit(&tegra_drm_driver, device);
+
+	return 0;
+}
+
+static const struct of_device_id host1x_drm_subdevs[] = {
+	{ .compatible = "nvidia,tegra20-dc", },
+	{ .compatible = "nvidia,tegra20-hdmi", },
+	{ .compatible = "nvidia,tegra20-gr2d", },
+	{ .compatible = "nvidia,tegra30-dc", },
+	{ .compatible = "nvidia,tegra30-hdmi", },
+	{ .compatible = "nvidia,tegra30-gr2d", },
+	{ /* sentinel */ }
+};
+
+static struct host1x_driver host1x_drm_driver = {
+	.name = "drm",
+	.probe = host1x_drm_probe,
+	.remove = host1x_drm_remove,
+	.subdevs = host1x_drm_subdevs,
+};
+
+static int __init host1x_drm_init(void)
+{
+	int err;
+
+	err = host1x_driver_register(&host1x_drm_driver);
+	if (err < 0)
+		return err;
+
+	err = platform_driver_register(&tegra_dc_driver);
+	if (err < 0)
+		goto unregister_host1x;
+
+	err = platform_driver_register(&tegra_hdmi_driver);
+	if (err < 0)
+		goto unregister_dc;
+
+	err = platform_driver_register(&tegra_gr2d_driver);
+	if (err < 0)
+		goto unregister_hdmi;
+
+	return 0;
+
+unregister_hdmi:
+	platform_driver_unregister(&tegra_hdmi_driver);
+unregister_dc:
+	platform_driver_unregister(&tegra_dc_driver);
+unregister_host1x:
+	host1x_driver_unregister(&host1x_drm_driver);
+	return err;
+}
+module_init(host1x_drm_init);
+
+static void __exit host1x_drm_exit(void)
+{
+	platform_driver_unregister(&tegra_gr2d_driver);
+	platform_driver_unregister(&tegra_hdmi_driver);
+	platform_driver_unregister(&tegra_dc_driver);
+	host1x_driver_unregister(&host1x_drm_driver);
+}
+module_exit(host1x_drm_exit);
+
+MODULE_AUTHOR("Thierry Reding <thierry.reding@avionic-design.de>");
+MODULE_DESCRIPTION("NVIDIA Tegra DRM driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/gpu/host1x/drm/drm.h b/drivers/gpu/host1x/drm/drm.h
index 8c26c6b1f5e1..25522e23c7b8 100644
--- a/drivers/gpu/host1x/drm/drm.h
+++ b/drivers/gpu/host1x/drm/drm.h
@@ -32,11 +32,6 @@ struct tegra_fbdev {
 
 struct tegra_drm {
 	struct drm_device *drm;
-	struct device *dev;
-
-	struct mutex subdevs_lock;
-	struct list_head subdevs;
-	struct list_head active;
 
 	struct mutex clients_lock;
 	struct list_head clients;
@@ -63,29 +58,29 @@ struct tegra_drm_client_ops {
 
 struct tegra_drm_client {
 	struct host1x_client base;
-	struct drm_device *drm;
+	struct list_head list;
 
 	const struct tegra_drm_client_ops *ops;
 };
 
 static inline struct tegra_drm_client *
-to_tegra_drm_client(struct host1x_client *client)
+host1x_to_drm_client(struct host1x_client *client)
 {
 	return container_of(client, struct tegra_drm_client, base);
 }
 
+extern int tegra_drm_register_client(struct tegra_drm *tegra,
+				     struct tegra_drm_client *client);
+extern int tegra_drm_unregister_client(struct tegra_drm *tegra,
+				       struct tegra_drm_client *client);
+
 extern int tegra_drm_init(struct tegra_drm *tegra, struct drm_device *drm);
 extern int tegra_drm_exit(struct tegra_drm *tegra);
 
-extern int host1x_register_client(struct tegra_drm *tegra,
-				  struct host1x_client *client);
-extern int host1x_unregister_client(struct tegra_drm *tegra,
-				    struct host1x_client *client);
-
 struct tegra_output;
 
 struct tegra_dc {
-	struct tegra_drm_client client;
+	struct host1x_client client;
 	struct device *dev;
 	spinlock_t lock;
 
@@ -109,7 +104,7 @@ struct tegra_dc {
 };
 
 static inline struct tegra_dc *
-tegra_drm_client_to_dc(struct tegra_drm_client *client)
+host1x_client_to_dc(struct host1x_client *client)
 {
 	return container_of(client, struct tegra_dc, client);
 }
@@ -235,6 +230,10 @@ static inline int tegra_output_check_mode(struct tegra_output *output,
 	return output ? -ENOSYS : -EINVAL;
 }
 
+/* from bus.c */
+int drm_host1x_init(struct drm_driver *driver, struct host1x_device *device);
+void drm_host1x_exit(struct drm_driver *driver, struct host1x_device *device);
+
 /* from rgb.c */
 extern int tegra_dc_rgb_probe(struct tegra_dc *dc);
 extern int tegra_dc_rgb_init(struct drm_device *drm, struct tegra_dc *dc);
@@ -252,6 +251,8 @@ extern int tegra_drm_fb_init(struct drm_device *drm);
 extern void tegra_drm_fb_exit(struct drm_device *drm);
 extern void tegra_fbdev_restore_mode(struct tegra_fbdev *fbdev);
 
-extern struct drm_driver tegra_drm_driver;
+extern struct platform_driver tegra_dc_driver;
+extern struct platform_driver tegra_hdmi_driver;
+extern struct platform_driver tegra_gr2d_driver;
 
 #endif /* HOST1X_DRM_H */
diff --git a/drivers/gpu/host1x/drm/gr2d.c b/drivers/gpu/host1x/drm/gr2d.c
index dfb822428ca0..4e407e30da1c 100644
--- a/drivers/gpu/host1x/drm/gr2d.c
+++ b/drivers/gpu/host1x/drm/gr2d.c
@@ -16,7 +16,6 @@
 
 #include <linux/clk.h>
 
-#include "host1x_client.h"
 #include "drm.h"
 #include "gem.h"
 
@@ -35,19 +34,45 @@ static inline struct gr2d *to_gr2d(struct tegra_drm_client *client)
 	return container_of(client, struct gr2d, client);
 }
 
-static int gr2d_client_init(struct host1x_client *client)
+static int gr2d_init(struct host1x_client *client)
 {
-	return 0;
+	struct tegra_drm_client *drm = host1x_to_drm_client(client);
+	struct tegra_drm *tegra = dev_get_drvdata(client->parent);
+	struct gr2d *gr2d = to_gr2d(drm);
+
+	gr2d->channel = host1x_channel_request(client->dev);
+	if (!gr2d->channel)
+		return -ENOMEM;
+
+	client->syncpts[0] = host1x_syncpt_request(client->dev, false);
+	if (!client->syncpts[0]) {
+		host1x_channel_free(gr2d->channel);
+		return -ENOMEM;
+	}
+
+	return tegra_drm_register_client(tegra, drm);
 }
 
-static int gr2d_client_exit(struct host1x_client *client)
+static int gr2d_exit(struct host1x_client *client)
 {
+	struct tegra_drm_client *drm = host1x_to_drm_client(client);
+	struct tegra_drm *tegra = dev_get_drvdata(client->parent);
+	struct gr2d *gr2d = to_gr2d(drm);
+	int err;
+
+	err = tegra_drm_unregister_client(tegra, drm);
+	if (err < 0)
+		return err;
+
+	host1x_syncpt_free(client->syncpts[0]);
+	host1x_channel_free(gr2d->channel);
+
 	return 0;
 }
 
 static const struct host1x_client_ops gr2d_client_ops = {
-	.init = gr2d_client_init,
-	.exit = gr2d_client_exit,
+	.init = gr2d_init,
+	.exit = gr2d_exit,
 };
 
 static int gr2d_open_channel(struct tegra_drm_client *client,
@@ -240,7 +265,6 @@ static const u32 gr2d_addr_regs[] = {
 
 static int gr2d_probe(struct platform_device *pdev)
 {
-	struct tegra_drm *tegra = host1x_get_drm_data(pdev->dev.parent);
 	struct device *dev = &pdev->dev;
 	struct host1x_syncpt **syncpts;
 	struct gr2d *gr2d;
@@ -267,25 +291,17 @@ static int gr2d_probe(struct platform_device *pdev)
 		return err;
 	}
 
-	gr2d->channel = host1x_channel_request(dev);
-	if (!gr2d->channel)
-		return -ENOMEM;
-
-	*syncpts = host1x_syncpt_request(dev, false);
-	if (!(*syncpts)) {
-		host1x_channel_free(gr2d->channel);
-		return -ENOMEM;
-	}
-
 	INIT_LIST_HEAD(&gr2d->client.base.list);
 	gr2d->client.base.ops = &gr2d_client_ops;
 	gr2d->client.base.dev = dev;
 	gr2d->client.base.class = HOST1X_CLASS_GR2D;
 	gr2d->client.base.syncpts = syncpts;
 	gr2d->client.base.num_syncpts = 1;
+
+	INIT_LIST_HEAD(&gr2d->client.list);
 	gr2d->client.ops = &gr2d_ops;
 
-	err = host1x_register_client(tegra, &gr2d->client.base);
+	err = host1x_client_register(&gr2d->client.base);
 	if (err < 0) {
 		dev_err(dev, "failed to register host1x client: %d\n", err);
 		return err;
@@ -302,22 +318,16 @@ static int gr2d_probe(struct platform_device *pdev)
 
 static int gr2d_remove(struct platform_device *pdev)
 {
-	struct tegra_drm *tegra = host1x_get_drm_data(pdev->dev.parent);
 	struct gr2d *gr2d = platform_get_drvdata(pdev);
-	unsigned int i;
 	int err;
 
-	err = host1x_unregister_client(tegra, &gr2d->client.base);
+	err = host1x_client_unregister(&gr2d->client.base);
 	if (err < 0) {
 		dev_err(&pdev->dev, "failed to unregister host1x client: %d\n",
 			err);
 		return err;
 	}
 
-	for (i = 0; i < gr2d->client.base.num_syncpts; i++)
-		host1x_syncpt_free(gr2d->client.base.syncpts[i]);
-
-	host1x_channel_free(gr2d->channel);
 	clk_disable_unprepare(gr2d->clk);
 
 	return 0;
diff --git a/drivers/gpu/host1x/drm/hdmi.c b/drivers/gpu/host1x/drm/hdmi.c
index a2370045993c..f5663d15670b 100644
--- a/drivers/gpu/host1x/drm/hdmi.c
+++ b/drivers/gpu/host1x/drm/hdmi.c
@@ -13,13 +13,12 @@
 #include <linux/hdmi.h>
 #include <linux/regulator/consumer.h>
 
-#include "host1x_client.h"
 #include "hdmi.h"
 #include "drm.h"
 #include "dc.h"
 
 struct tegra_hdmi {
-	struct tegra_drm_client client;
+	struct host1x_client client;
 	struct tegra_output output;
 	struct device *dev;
 
@@ -43,7 +42,7 @@ struct tegra_hdmi {
 };
 
 static inline struct tegra_hdmi *
-tegra_drm_client_to_hdmi(struct tegra_drm_client *client)
+host1x_client_to_hdmi(struct host1x_client *client)
 {
 	return container_of(client, struct tegra_hdmi, client);
 }
@@ -1118,22 +1117,22 @@ static int tegra_hdmi_debugfs_exit(struct tegra_hdmi *hdmi)
 
 static int tegra_hdmi_init(struct host1x_client *client)
 {
-	struct tegra_drm_client *drm = to_tegra_drm_client(client);
-	struct tegra_hdmi *hdmi = tegra_drm_client_to_hdmi(drm);
+	struct tegra_drm *tegra = dev_get_drvdata(client->parent);
+	struct tegra_hdmi *hdmi = host1x_client_to_hdmi(client);
 	int err;
 
 	hdmi->output.type = TEGRA_OUTPUT_HDMI;
 	hdmi->output.dev = client->dev;
 	hdmi->output.ops = &hdmi_ops;
 
-	err = tegra_output_init(drm->drm, &hdmi->output);
+	err = tegra_output_init(tegra->drm, &hdmi->output);
 	if (err < 0) {
 		dev_err(client->dev, "output setup failed: %d\n", err);
 		return err;
 	}
 
 	if (IS_ENABLED(CONFIG_DEBUG_FS)) {
-		err = tegra_hdmi_debugfs_init(hdmi, drm->drm->primary);
+		err = tegra_hdmi_debugfs_init(hdmi, tegra->drm->primary);
 		if (err < 0)
 			dev_err(client->dev, "debugfs setup failed: %d\n", err);
 	}
@@ -1143,8 +1142,7 @@ static int tegra_hdmi_init(struct host1x_client *client)
 
 static int tegra_hdmi_exit(struct host1x_client *client)
 {
-	struct tegra_drm_client *drm = to_tegra_drm_client(client);
-	struct tegra_hdmi *hdmi = tegra_drm_client_to_hdmi(drm);
+	struct tegra_hdmi *hdmi = host1x_client_to_hdmi(client);
 	int err;
 
 	if (IS_ENABLED(CONFIG_DEBUG_FS)) {
@@ -1176,7 +1174,6 @@ static const struct host1x_client_ops hdmi_client_ops = {
 
 static int tegra_hdmi_probe(struct platform_device *pdev)
 {
-	struct tegra_drm *tegra = host1x_get_drm_data(pdev->dev.parent);
 	struct tegra_hdmi *hdmi;
 	struct resource *regs;
 	int err;
@@ -1247,11 +1244,11 @@ static int tegra_hdmi_probe(struct platform_device *pdev)
 
 	hdmi->irq = err;
 
-	INIT_LIST_HEAD(&hdmi->client.base.list);
-	hdmi->client.base.ops = &hdmi_client_ops;
-	hdmi->client.base.dev = &pdev->dev;
+	INIT_LIST_HEAD(&hdmi->client.list);
+	hdmi->client.ops = &hdmi_client_ops;
+	hdmi->client.dev = &pdev->dev;
 
-	err = host1x_register_client(tegra, &hdmi->client.base);
+	err = host1x_client_register(&hdmi->client);
 	if (err < 0) {
 		dev_err(&pdev->dev, "failed to register host1x client: %d\n",
 			err);
@@ -1265,11 +1262,10 @@ static int tegra_hdmi_probe(struct platform_device *pdev)
 
 static int tegra_hdmi_remove(struct platform_device *pdev)
 {
-	struct tegra_drm *tegra = host1x_get_drm_data(pdev->dev.parent);
 	struct tegra_hdmi *hdmi = platform_get_drvdata(pdev);
 	int err;
 
-	err = host1x_unregister_client(tegra, &hdmi->client.base);
+	err = host1x_client_unregister(&hdmi->client);
 	if (err < 0) {
 		dev_err(&pdev->dev, "failed to unregister host1x client: %d\n",
 			err);
diff --git a/drivers/gpu/host1x/host1x_client.h b/drivers/gpu/host1x/host1x_client.h
deleted file mode 100644
index 6a0bd0268042..000000000000
--- a/drivers/gpu/host1x/host1x_client.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Copyright (c) 2013, NVIDIA Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef HOST1X_CLIENT_H
-#define HOST1X_CLIENT_H
-
-struct device;
-struct platform_device;
-
-#ifdef CONFIG_DRM_TEGRA
-int tegra_drm_alloc(struct platform_device *pdev);
-#else
-static inline int tegra_drm_alloc(struct platform_device *pdev)
-{
-	return 0;
-}
-#endif
-
-void host1x_set_drm_data(struct device *dev, void *data);
-void *host1x_get_drm_data(struct device *dev);
-
-#endif
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index 2b954adf5bd4..ffd8ad92cdf9 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -150,6 +150,7 @@ int drm_err(const char *func, const char *format, ...);
 #define DRIVER_BUS_PCI 0x1
 #define DRIVER_BUS_PLATFORM 0x2
 #define DRIVER_BUS_USB 0x3
+#define DRIVER_BUS_HOST1X 0x4
 
 /***********************************************************************/
 /** \name Begin the DRM... */
diff --git a/include/linux/host1x.h b/include/linux/host1x.h
index 7442f2a57039..e62c61a4afa9 100644
--- a/include/linux/host1x.h
+++ b/include/linux/host1x.h
@@ -19,7 +19,7 @@
 #ifndef __LINUX_HOST1X_H
 #define __LINUX_HOST1X_H
 
-#include <linux/kref.h>
+#include <linux/device.h>
 #include <linux/types.h>
 
 enum host1x_class {
@@ -37,6 +37,7 @@ struct host1x_client_ops {
 
 struct host1x_client {
 	struct list_head list;
+	struct device *parent;
 	struct device *dev;
 
 	const struct host1x_client_ops *ops;
@@ -230,4 +231,46 @@ void host1x_job_put(struct host1x_job *job);
 int host1x_job_pin(struct host1x_job *job, struct device *dev);
 void host1x_job_unpin(struct host1x_job *job);
 
+/*
+ * subdevice probe infrastructure
+ */
+
+struct host1x_device;
+
+struct host1x_driver {
+	const struct of_device_id *subdevs;
+	struct list_head list;
+	const char *name;
+
+	int (*probe)(struct host1x_device *device);
+	int (*remove)(struct host1x_device *device);
+};
+
+int host1x_driver_register(struct host1x_driver *driver);
+void host1x_driver_unregister(struct host1x_driver *driver);
+
+struct host1x_device {
+	struct host1x_driver *driver;
+	struct list_head list;
+	struct device dev;
+
+	struct mutex subdevs_lock;
+	struct list_head subdevs;
+	struct list_head active;
+
+	struct mutex clients_lock;
+	struct list_head clients;
+};
+
+static inline struct host1x_device *to_host1x_device(struct device *dev)
+{
+	return container_of(dev, struct host1x_device, dev);
+}
+
+int host1x_device_init(struct host1x_device *device);
+int host1x_device_exit(struct host1x_device *device);
+
+int host1x_client_register(struct host1x_client *client);
+int host1x_client_unregister(struct host1x_client *client);
+
 #endif
-- 
cgit v1.2.3


From 5f60ed0d840d53e9d65aa54e1a5365af8ce2769e Mon Sep 17 00:00:00 2001
From: Thierry Reding <thierry.reding@avionic-design.de>
Date: Thu, 28 Feb 2013 08:08:01 +0100
Subject: drm/tegra: Add 3D support

Initialize and power the 3D unit on Tegra20, Tegra30 and Tegra114 and
register a channel with the Tegra DRM driver so that the unit can be
used from userspace.

Signed-off-by: Thierry Reding <thierry.reding@avionic-design.de>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/Makefile |   3 +-
 drivers/gpu/drm/tegra/drm.c    |  10 ++
 drivers/gpu/drm/tegra/drm.h    |   1 +
 drivers/gpu/drm/tegra/gr3d.c   | 337 +++++++++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/tegra/gr3d.h   |  27 ++++
 include/linux/host1x.h         |   1 +
 6 files changed, 378 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/tegra/gr3d.c
 create mode 100644 drivers/gpu/drm/tegra/gr3d.h

(limited to 'include/linux')

diff --git a/drivers/gpu/drm/tegra/Makefile b/drivers/gpu/drm/tegra/Makefile
index e1cdf1da2778..edc76abd58bb 100644
--- a/drivers/gpu/drm/tegra/Makefile
+++ b/drivers/gpu/drm/tegra/Makefile
@@ -9,6 +9,7 @@ tegra-drm-y := \
 	output.o \
 	rgb.o \
 	hdmi.o \
-	gr2d.o
+	gr2d.o \
+	gr3d.o
 
 obj-$(CONFIG_DRM_TEGRA) += tegra-drm.o
diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index 6f42f1692f29..26a2903879d8 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -615,10 +615,13 @@ static const struct of_device_id host1x_drm_subdevs[] = {
 	{ .compatible = "nvidia,tegra20-dc", },
 	{ .compatible = "nvidia,tegra20-hdmi", },
 	{ .compatible = "nvidia,tegra20-gr2d", },
+	{ .compatible = "nvidia,tegra20-gr3d", },
 	{ .compatible = "nvidia,tegra30-dc", },
 	{ .compatible = "nvidia,tegra30-hdmi", },
 	{ .compatible = "nvidia,tegra30-gr2d", },
+	{ .compatible = "nvidia,tegra30-gr3d", },
 	{ .compatible = "nvidia,tegra114-hdmi", },
+	{ .compatible = "nvidia,tegra114-gr3d", },
 	{ /* sentinel */ }
 };
 
@@ -649,8 +652,14 @@ static int __init host1x_drm_init(void)
 	if (err < 0)
 		goto unregister_hdmi;
 
+	err = platform_driver_register(&tegra_gr3d_driver);
+	if (err < 0)
+		goto unregister_gr2d;
+
 	return 0;
 
+unregister_gr2d:
+	platform_driver_unregister(&tegra_gr2d_driver);
 unregister_hdmi:
 	platform_driver_unregister(&tegra_hdmi_driver);
 unregister_dc:
@@ -663,6 +672,7 @@ module_init(host1x_drm_init);
 
 static void __exit host1x_drm_exit(void)
 {
+	platform_driver_unregister(&tegra_gr3d_driver);
 	platform_driver_unregister(&tegra_gr2d_driver);
 	platform_driver_unregister(&tegra_hdmi_driver);
 	platform_driver_unregister(&tegra_dc_driver);
diff --git a/drivers/gpu/drm/tegra/drm.h b/drivers/gpu/drm/tegra/drm.h
index e3cd3ed438d5..b599fd4650ad 100644
--- a/drivers/gpu/drm/tegra/drm.h
+++ b/drivers/gpu/drm/tegra/drm.h
@@ -261,5 +261,6 @@ extern void tegra_fbdev_restore_mode(struct tegra_fbdev *fbdev);
 extern struct platform_driver tegra_dc_driver;
 extern struct platform_driver tegra_hdmi_driver;
 extern struct platform_driver tegra_gr2d_driver;
+extern struct platform_driver tegra_gr3d_driver;
 
 #endif /* HOST1X_DRM_H */
diff --git a/drivers/gpu/drm/tegra/gr3d.c b/drivers/gpu/drm/tegra/gr3d.c
new file mode 100644
index 000000000000..dc0f222d1323
--- /dev/null
+++ b/drivers/gpu/drm/tegra/gr3d.c
@@ -0,0 +1,337 @@
+/*
+ * Copyright (C) 2013 Avionic Design GmbH
+ * Copyright (C) 2013 NVIDIA Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/clk.h>
+#include <linux/host1x.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/tegra-powergate.h>
+
+#include "drm.h"
+#include "gem.h"
+#include "gr3d.h"
+
+struct gr3d {
+	struct tegra_drm_client client;
+	struct host1x_channel *channel;
+	struct clk *clk_secondary;
+	struct clk *clk;
+
+	DECLARE_BITMAP(addr_regs, GR3D_NUM_REGS);
+};
+
+static inline struct gr3d *to_gr3d(struct tegra_drm_client *client)
+{
+	return container_of(client, struct gr3d, client);
+}
+
+static int gr3d_init(struct host1x_client *client)
+{
+	struct tegra_drm_client *drm = host1x_to_drm_client(client);
+	struct tegra_drm *tegra = dev_get_drvdata(client->parent);
+	struct gr3d *gr3d = to_gr3d(drm);
+
+	gr3d->channel = host1x_channel_request(client->dev);
+	if (!gr3d->channel)
+		return -ENOMEM;
+
+	client->syncpts[0] = host1x_syncpt_request(client->dev, 0);
+	if (!client->syncpts[0]) {
+		host1x_channel_free(gr3d->channel);
+		return -ENOMEM;
+	}
+
+	return tegra_drm_register_client(tegra, drm);
+}
+
+static int gr3d_exit(struct host1x_client *client)
+{
+	struct tegra_drm_client *drm = host1x_to_drm_client(client);
+	struct tegra_drm *tegra = dev_get_drvdata(client->parent);
+	struct gr3d *gr3d = to_gr3d(drm);
+	int err;
+
+	err = tegra_drm_unregister_client(tegra, drm);
+	if (err < 0)
+		return err;
+
+	host1x_syncpt_free(client->syncpts[0]);
+	host1x_channel_free(gr3d->channel);
+
+	return 0;
+}
+
+static const struct host1x_client_ops gr3d_client_ops = {
+	.init = gr3d_init,
+	.exit = gr3d_exit,
+};
+
+static int gr3d_open_channel(struct tegra_drm_client *client,
+			     struct tegra_drm_context *context)
+{
+	struct gr3d *gr3d = to_gr3d(client);
+
+	context->channel = host1x_channel_get(gr3d->channel);
+	if (!context->channel)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void gr3d_close_channel(struct tegra_drm_context *context)
+{
+	host1x_channel_put(context->channel);
+}
+
+static int gr3d_is_addr_reg(struct device *dev, u32 class, u32 offset)
+{
+	struct gr3d *gr3d = dev_get_drvdata(dev);
+
+	switch (class) {
+	case HOST1X_CLASS_HOST1X:
+		if (offset == 0x2b)
+			return 1;
+
+		break;
+
+	case HOST1X_CLASS_GR3D:
+		if (offset >= GR3D_NUM_REGS)
+			break;
+
+		if (test_bit(offset, gr3d->addr_regs))
+			return 1;
+
+		break;
+	}
+
+	return 0;
+}
+
+static const struct tegra_drm_client_ops gr3d_ops = {
+	.open_channel = gr3d_open_channel,
+	.close_channel = gr3d_close_channel,
+	.is_addr_reg = gr3d_is_addr_reg,
+	.submit = tegra_drm_submit,
+};
+
+static const struct of_device_id tegra_gr3d_match[] = {
+	{ .compatible = "nvidia,tegra114-gr3d" },
+	{ .compatible = "nvidia,tegra30-gr3d" },
+	{ .compatible = "nvidia,tegra20-gr3d" },
+	{ }
+};
+
+static const u32 gr3d_addr_regs[] = {
+	GR3D_IDX_ATTRIBUTE( 0),
+	GR3D_IDX_ATTRIBUTE( 1),
+	GR3D_IDX_ATTRIBUTE( 2),
+	GR3D_IDX_ATTRIBUTE( 3),
+	GR3D_IDX_ATTRIBUTE( 4),
+	GR3D_IDX_ATTRIBUTE( 5),
+	GR3D_IDX_ATTRIBUTE( 6),
+	GR3D_IDX_ATTRIBUTE( 7),
+	GR3D_IDX_ATTRIBUTE( 8),
+	GR3D_IDX_ATTRIBUTE( 9),
+	GR3D_IDX_ATTRIBUTE(10),
+	GR3D_IDX_ATTRIBUTE(11),
+	GR3D_IDX_ATTRIBUTE(12),
+	GR3D_IDX_ATTRIBUTE(13),
+	GR3D_IDX_ATTRIBUTE(14),
+	GR3D_IDX_ATTRIBUTE(15),
+	GR3D_IDX_INDEX_BASE,
+	GR3D_QR_ZTAG_ADDR,
+	GR3D_QR_CTAG_ADDR,
+	GR3D_QR_CZ_ADDR,
+	GR3D_TEX_TEX_ADDR( 0),
+	GR3D_TEX_TEX_ADDR( 1),
+	GR3D_TEX_TEX_ADDR( 2),
+	GR3D_TEX_TEX_ADDR( 3),
+	GR3D_TEX_TEX_ADDR( 4),
+	GR3D_TEX_TEX_ADDR( 5),
+	GR3D_TEX_TEX_ADDR( 6),
+	GR3D_TEX_TEX_ADDR( 7),
+	GR3D_TEX_TEX_ADDR( 8),
+	GR3D_TEX_TEX_ADDR( 9),
+	GR3D_TEX_TEX_ADDR(10),
+	GR3D_TEX_TEX_ADDR(11),
+	GR3D_TEX_TEX_ADDR(12),
+	GR3D_TEX_TEX_ADDR(13),
+	GR3D_TEX_TEX_ADDR(14),
+	GR3D_TEX_TEX_ADDR(15),
+	GR3D_DW_MEMORY_OUTPUT_ADDRESS,
+	GR3D_GLOBAL_SURFADDR( 0),
+	GR3D_GLOBAL_SURFADDR( 1),
+	GR3D_GLOBAL_SURFADDR( 2),
+	GR3D_GLOBAL_SURFADDR( 3),
+	GR3D_GLOBAL_SURFADDR( 4),
+	GR3D_GLOBAL_SURFADDR( 5),
+	GR3D_GLOBAL_SURFADDR( 6),
+	GR3D_GLOBAL_SURFADDR( 7),
+	GR3D_GLOBAL_SURFADDR( 8),
+	GR3D_GLOBAL_SURFADDR( 9),
+	GR3D_GLOBAL_SURFADDR(10),
+	GR3D_GLOBAL_SURFADDR(11),
+	GR3D_GLOBAL_SURFADDR(12),
+	GR3D_GLOBAL_SURFADDR(13),
+	GR3D_GLOBAL_SURFADDR(14),
+	GR3D_GLOBAL_SURFADDR(15),
+	GR3D_GLOBAL_SPILLSURFADDR,
+	GR3D_GLOBAL_SURFOVERADDR( 0),
+	GR3D_GLOBAL_SURFOVERADDR( 1),
+	GR3D_GLOBAL_SURFOVERADDR( 2),
+	GR3D_GLOBAL_SURFOVERADDR( 3),
+	GR3D_GLOBAL_SURFOVERADDR( 4),
+	GR3D_GLOBAL_SURFOVERADDR( 5),
+	GR3D_GLOBAL_SURFOVERADDR( 6),
+	GR3D_GLOBAL_SURFOVERADDR( 7),
+	GR3D_GLOBAL_SURFOVERADDR( 8),
+	GR3D_GLOBAL_SURFOVERADDR( 9),
+	GR3D_GLOBAL_SURFOVERADDR(10),
+	GR3D_GLOBAL_SURFOVERADDR(11),
+	GR3D_GLOBAL_SURFOVERADDR(12),
+	GR3D_GLOBAL_SURFOVERADDR(13),
+	GR3D_GLOBAL_SURFOVERADDR(14),
+	GR3D_GLOBAL_SURFOVERADDR(15),
+	GR3D_GLOBAL_SAMP01SURFADDR( 0),
+	GR3D_GLOBAL_SAMP01SURFADDR( 1),
+	GR3D_GLOBAL_SAMP01SURFADDR( 2),
+	GR3D_GLOBAL_SAMP01SURFADDR( 3),
+	GR3D_GLOBAL_SAMP01SURFADDR( 4),
+	GR3D_GLOBAL_SAMP01SURFADDR( 5),
+	GR3D_GLOBAL_SAMP01SURFADDR( 6),
+	GR3D_GLOBAL_SAMP01SURFADDR( 7),
+	GR3D_GLOBAL_SAMP01SURFADDR( 8),
+	GR3D_GLOBAL_SAMP01SURFADDR( 9),
+	GR3D_GLOBAL_SAMP01SURFADDR(10),
+	GR3D_GLOBAL_SAMP01SURFADDR(11),
+	GR3D_GLOBAL_SAMP01SURFADDR(12),
+	GR3D_GLOBAL_SAMP01SURFADDR(13),
+	GR3D_GLOBAL_SAMP01SURFADDR(14),
+	GR3D_GLOBAL_SAMP01SURFADDR(15),
+	GR3D_GLOBAL_SAMP23SURFADDR( 0),
+	GR3D_GLOBAL_SAMP23SURFADDR( 1),
+	GR3D_GLOBAL_SAMP23SURFADDR( 2),
+	GR3D_GLOBAL_SAMP23SURFADDR( 3),
+	GR3D_GLOBAL_SAMP23SURFADDR( 4),
+	GR3D_GLOBAL_SAMP23SURFADDR( 5),
+	GR3D_GLOBAL_SAMP23SURFADDR( 6),
+	GR3D_GLOBAL_SAMP23SURFADDR( 7),
+	GR3D_GLOBAL_SAMP23SURFADDR( 8),
+	GR3D_GLOBAL_SAMP23SURFADDR( 9),
+	GR3D_GLOBAL_SAMP23SURFADDR(10),
+	GR3D_GLOBAL_SAMP23SURFADDR(11),
+	GR3D_GLOBAL_SAMP23SURFADDR(12),
+	GR3D_GLOBAL_SAMP23SURFADDR(13),
+	GR3D_GLOBAL_SAMP23SURFADDR(14),
+	GR3D_GLOBAL_SAMP23SURFADDR(15),
+};
+
+static int gr3d_probe(struct platform_device *pdev)
+{
+	struct device_node *np = pdev->dev.of_node;
+	struct host1x_syncpt **syncpts;
+	struct gr3d *gr3d;
+	unsigned int i;
+	int err;
+
+	gr3d = devm_kzalloc(&pdev->dev, sizeof(*gr3d), GFP_KERNEL);
+	if (!gr3d)
+		return -ENOMEM;
+
+	syncpts = devm_kzalloc(&pdev->dev, sizeof(*syncpts), GFP_KERNEL);
+	if (!syncpts)
+		return -ENOMEM;
+
+	gr3d->clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(gr3d->clk)) {
+		dev_err(&pdev->dev, "cannot get clock\n");
+		return PTR_ERR(gr3d->clk);
+	}
+
+	if (of_device_is_compatible(np, "nvidia,tegra30-gr3d")) {
+		gr3d->clk_secondary = devm_clk_get(&pdev->dev, "3d2");
+		if (IS_ERR(gr3d->clk)) {
+			dev_err(&pdev->dev, "cannot get secondary clock\n");
+			return PTR_ERR(gr3d->clk);
+		}
+	}
+
+	err = tegra_powergate_sequence_power_up(TEGRA_POWERGATE_3D, gr3d->clk);
+	if (err < 0) {
+		dev_err(&pdev->dev, "failed to power up 3D unit\n");
+		return err;
+	}
+
+	if (gr3d->clk_secondary) {
+		err = tegra_powergate_sequence_power_up(TEGRA_POWERGATE_3D1,
+							gr3d->clk_secondary);
+		if (err < 0) {
+			dev_err(&pdev->dev,
+				"failed to power up secondary 3D unit\n");
+			return err;
+		}
+	}
+
+	INIT_LIST_HEAD(&gr3d->client.base.list);
+	gr3d->client.base.ops = &gr3d_client_ops;
+	gr3d->client.base.dev = &pdev->dev;
+	gr3d->client.base.class = HOST1X_CLASS_GR3D;
+	gr3d->client.base.syncpts = syncpts;
+	gr3d->client.base.num_syncpts = 1;
+
+	INIT_LIST_HEAD(&gr3d->client.list);
+	gr3d->client.ops = &gr3d_ops;
+
+	err = host1x_client_register(&gr3d->client.base);
+	if (err < 0) {
+		dev_err(&pdev->dev, "failed to register host1x client: %d\n",
+			err);
+		return err;
+	}
+
+	/* initialize address register map */
+	for (i = 0; i < ARRAY_SIZE(gr3d_addr_regs); i++)
+		set_bit(gr3d_addr_regs[i], gr3d->addr_regs);
+
+	platform_set_drvdata(pdev, gr3d);
+
+	return 0;
+}
+
+static int gr3d_remove(struct platform_device *pdev)
+{
+	struct gr3d *gr3d = platform_get_drvdata(pdev);
+	int err;
+
+	err = host1x_client_unregister(&gr3d->client.base);
+	if (err < 0) {
+		dev_err(&pdev->dev, "failed to unregister host1x client: %d\n",
+			err);
+		return err;
+	}
+
+	if (gr3d->clk_secondary) {
+		tegra_powergate_power_off(TEGRA_POWERGATE_3D1);
+		clk_disable_unprepare(gr3d->clk_secondary);
+	}
+
+	tegra_powergate_power_off(TEGRA_POWERGATE_3D);
+	clk_disable_unprepare(gr3d->clk);
+
+	return 0;
+}
+
+struct platform_driver tegra_gr3d_driver = {
+	.driver = {
+		.name = "tegra-gr3d",
+		.of_match_table = tegra_gr3d_match,
+	},
+	.probe = gr3d_probe,
+	.remove = gr3d_remove,
+};
diff --git a/drivers/gpu/drm/tegra/gr3d.h b/drivers/gpu/drm/tegra/gr3d.h
new file mode 100644
index 000000000000..0c30a1351c83
--- /dev/null
+++ b/drivers/gpu/drm/tegra/gr3d.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright (C) 2013 NVIDIA Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef TEGRA_GR3D_H
+#define TEGRA_GR3D_H
+
+#define GR3D_IDX_ATTRIBUTE(x)		(0x100 + (x) * 2)
+#define GR3D_IDX_INDEX_BASE		0x121
+#define GR3D_QR_ZTAG_ADDR		0x415
+#define GR3D_QR_CTAG_ADDR		0x417
+#define GR3D_QR_CZ_ADDR			0x419
+#define GR3D_TEX_TEX_ADDR(x)		(0x710 + (x))
+#define GR3D_DW_MEMORY_OUTPUT_ADDRESS	0x904
+#define GR3D_GLOBAL_SURFADDR(x)		(0xe00 + (x))
+#define GR3D_GLOBAL_SPILLSURFADDR	0xe2a
+#define GR3D_GLOBAL_SURFOVERADDR(x)	(0xe30 + (x))
+#define GR3D_GLOBAL_SAMP01SURFADDR(x)	(0xe50 + (x))
+#define GR3D_GLOBAL_SAMP23SURFADDR(x)	(0xe60 + (x))
+
+#define GR3D_NUM_REGS			0xe88
+
+#endif
diff --git a/include/linux/host1x.h b/include/linux/host1x.h
index e62c61a4afa9..f5dd56fbdc3e 100644
--- a/include/linux/host1x.h
+++ b/include/linux/host1x.h
@@ -26,6 +26,7 @@ enum host1x_class {
 	HOST1X_CLASS_HOST1X = 0x1,
 	HOST1X_CLASS_GR2D = 0x51,
 	HOST1X_CLASS_GR2D_SB = 0x52,
+	HOST1X_CLASS_GR3D = 0x60,
 };
 
 struct host1x_client;
-- 
cgit v1.2.3


From 8736fe81532182ba0086a371fae0708ea42a2cdf Mon Sep 17 00:00:00 2001
From: Arto Merilainen <amerilainen@nvidia.com>
Date: Mon, 14 Oct 2013 15:21:52 +0300
Subject: gpu: host1x: Add 'flags' field to syncpt request

Functions host1x_syncpt_request() and _host1x_syncpt_alloc() have
been taking a separate boolean flag ('client_managed') for indicating
if the syncpoint value should be tracked by the host1x driver.

This patch converts the field into generic 'flags' field so that
we can easily add more information while requesting a syncpoint.
Clients are adapted to use the new interface accordingly.

Signed-off-by: Arto Merilainen <amerilainen@nvidia.com>
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/gr2d.c |  2 +-
 drivers/gpu/host1x/syncpt.c  | 18 +++++++++++-------
 include/linux/host1x.h       |  4 +++-
 3 files changed, 15 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpu/drm/tegra/gr2d.c b/drivers/gpu/drm/tegra/gr2d.c
index 7f4eb110aa85..3a04b97b54a2 100644
--- a/drivers/gpu/drm/tegra/gr2d.c
+++ b/drivers/gpu/drm/tegra/gr2d.c
@@ -43,7 +43,7 @@ static int gr2d_init(struct host1x_client *client)
 	if (!gr2d->channel)
 		return -ENOMEM;
 
-	client->syncpts[0] = host1x_syncpt_request(client->dev, false);
+	client->syncpts[0] = host1x_syncpt_request(client->dev, 0);
 	if (!client->syncpts[0]) {
 		host1x_channel_free(gr2d->channel);
 		return -ENOMEM;
diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c
index 03cf2922e469..5b88ba4c974e 100644
--- a/drivers/gpu/host1x/syncpt.c
+++ b/drivers/gpu/host1x/syncpt.c
@@ -30,9 +30,9 @@
 #define SYNCPT_CHECK_PERIOD (2 * HZ)
 #define MAX_STUCK_CHECK_COUNT 15
 
-static struct host1x_syncpt *_host1x_syncpt_alloc(struct host1x *host,
-						  struct device *dev,
-						  bool client_managed)
+static struct host1x_syncpt *host1x_syncpt_alloc(struct host1x *host,
+						 struct device *dev,
+						 unsigned long flags)
 {
 	int i;
 	struct host1x_syncpt *sp = host->syncpt;
@@ -51,7 +51,11 @@ static struct host1x_syncpt *_host1x_syncpt_alloc(struct host1x *host,
 
 	sp->dev = dev;
 	sp->name = name;
-	sp->client_managed = client_managed;
+
+	if (flags & HOST1X_SYNCPT_CLIENT_MANAGED)
+		sp->client_managed = true;
+	else
+		sp->client_managed = false;
 
 	return sp;
 }
@@ -321,7 +325,7 @@ int host1x_syncpt_init(struct host1x *host)
 	host1x_syncpt_restore(host);
 
 	/* Allocate sync point to use for clearing waits for expired fences */
-	host->nop_sp = _host1x_syncpt_alloc(host, NULL, false);
+	host->nop_sp = host1x_syncpt_alloc(host, NULL, 0);
 	if (!host->nop_sp)
 		return -ENOMEM;
 
@@ -329,10 +333,10 @@ int host1x_syncpt_init(struct host1x *host)
 }
 
 struct host1x_syncpt *host1x_syncpt_request(struct device *dev,
-					    bool client_managed)
+					    unsigned long flags)
 {
 	struct host1x *host = dev_get_drvdata(dev->parent);
-	return _host1x_syncpt_alloc(host, dev, client_managed);
+	return host1x_syncpt_alloc(host, dev, flags);
 }
 
 void host1x_syncpt_free(struct host1x_syncpt *sp)
diff --git a/include/linux/host1x.h b/include/linux/host1x.h
index f5dd56fbdc3e..eb713dbbae29 100644
--- a/include/linux/host1x.h
+++ b/include/linux/host1x.h
@@ -124,6 +124,8 @@ static inline void host1x_bo_kunmap(struct host1x_bo *bo,
  * host1x syncpoints
  */
 
+#define HOST1X_SYNCPT_CLIENT_MANAGED	(1 << 0)
+
 struct host1x_syncpt;
 struct host1x;
 
@@ -135,7 +137,7 @@ int host1x_syncpt_incr(struct host1x_syncpt *sp);
 int host1x_syncpt_wait(struct host1x_syncpt *sp, u32 thresh, long timeout,
 		       u32 *value);
 struct host1x_syncpt *host1x_syncpt_request(struct device *dev,
-					    bool client_managed);
+					    unsigned long flags);
 void host1x_syncpt_free(struct host1x_syncpt *sp);
 
 /*
-- 
cgit v1.2.3


From f5a954fed9b3eb04973ede72c50c66157fa9e15b Mon Sep 17 00:00:00 2001
From: Arto Merilainen <amerilainen@nvidia.com>
Date: Mon, 14 Oct 2013 15:21:53 +0300
Subject: gpu: host1x: Add syncpoint base support

This patch adds support for hardware syncpoint bases. This creates
a simple mechanism to stall the command FIFO until an operation is
completed.

Signed-off-by: Arto Merilainen <amerilainen@nvidia.com>
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/host1x/dev.h                   |  2 ++
 drivers/gpu/host1x/hw/channel_hw.c         | 20 +++++++++++
 drivers/gpu/host1x/hw/hw_host1x01_uclass.h |  6 ++++
 drivers/gpu/host1x/syncpt.c                | 55 ++++++++++++++++++++++++++++--
 drivers/gpu/host1x/syncpt.h                |  6 ++++
 include/linux/host1x.h                     |  5 +++
 6 files changed, 92 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h
index 6cf689b9e17b..a61a976e7a42 100644
--- a/drivers/gpu/host1x/dev.h
+++ b/drivers/gpu/host1x/dev.h
@@ -27,6 +27,7 @@
 #include "job.h"
 
 struct host1x_syncpt;
+struct host1x_syncpt_base;
 struct host1x_channel;
 struct host1x_cdma;
 struct host1x_job;
@@ -102,6 +103,7 @@ struct host1x {
 
 	void __iomem *regs;
 	struct host1x_syncpt *syncpt;
+	struct host1x_syncpt_base *bases;
 	struct device *dev;
 	struct clk *clk;
 
diff --git a/drivers/gpu/host1x/hw/channel_hw.c b/drivers/gpu/host1x/hw/channel_hw.c
index 3be0cd296d3a..4608257ab656 100644
--- a/drivers/gpu/host1x/hw/channel_hw.c
+++ b/drivers/gpu/host1x/hw/channel_hw.c
@@ -67,6 +67,22 @@ static void submit_gathers(struct host1x_job *job)
 	}
 }
 
+static inline void synchronize_syncpt_base(struct host1x_job *job)
+{
+	struct host1x *host = dev_get_drvdata(job->channel->dev->parent);
+	struct host1x_syncpt *sp = host->syncpt + job->syncpt_id;
+	u32 id, value;
+
+	value = host1x_syncpt_read_max(sp);
+	id = sp->base->id;
+
+	host1x_cdma_push(&job->channel->cdma,
+			 host1x_opcode_setclass(HOST1X_CLASS_HOST1X,
+				HOST1X_UCLASS_LOAD_SYNCPT_BASE, 1),
+			 HOST1X_UCLASS_LOAD_SYNCPT_BASE_BASE_INDX_F(id) |
+			 HOST1X_UCLASS_LOAD_SYNCPT_BASE_VALUE_F(value));
+}
+
 static int channel_submit(struct host1x_job *job)
 {
 	struct host1x_channel *ch = job->channel;
@@ -118,6 +134,10 @@ static int channel_submit(struct host1x_job *job)
 					host1x_syncpt_read_max(sp)));
 	}
 
+	/* Synchronize base register to allow using it for relative waiting */
+	if (sp->base)
+		synchronize_syncpt_base(job);
+
 	syncval = host1x_syncpt_incr_max(sp, user_syncpt_incrs);
 
 	job->syncpt_end = syncval;
diff --git a/drivers/gpu/host1x/hw/hw_host1x01_uclass.h b/drivers/gpu/host1x/hw/hw_host1x01_uclass.h
index 42f3ce19ca32..f7553599ee27 100644
--- a/drivers/gpu/host1x/hw/hw_host1x01_uclass.h
+++ b/drivers/gpu/host1x/hw/hw_host1x01_uclass.h
@@ -111,6 +111,12 @@ static inline u32 host1x_uclass_wait_syncpt_base_offset_f(u32 v)
 }
 #define HOST1X_UCLASS_WAIT_SYNCPT_BASE_OFFSET_F(v) \
 	host1x_uclass_wait_syncpt_base_offset_f(v)
+static inline u32 host1x_uclass_load_syncpt_base_r(void)
+{
+	return 0xb;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_BASE \
+	host1x_uclass_load_syncpt_base_r()
 static inline u32 host1x_uclass_load_syncpt_base_base_indx_f(u32 v)
 {
 	return (v & 0xff) << 24;
diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c
index 5b88ba4c974e..159c479829c9 100644
--- a/drivers/gpu/host1x/syncpt.c
+++ b/drivers/gpu/host1x/syncpt.c
@@ -30,6 +30,29 @@
 #define SYNCPT_CHECK_PERIOD (2 * HZ)
 #define MAX_STUCK_CHECK_COUNT 15
 
+static struct host1x_syncpt_base *
+host1x_syncpt_base_request(struct host1x *host)
+{
+	struct host1x_syncpt_base *bases = host->bases;
+	unsigned int i;
+
+	for (i = 0; i < host->info->nb_bases; i++)
+		if (!bases[i].requested)
+			break;
+
+	if (i >= host->info->nb_bases)
+		return NULL;
+
+	bases[i].requested = true;
+	return &bases[i];
+}
+
+static void host1x_syncpt_base_free(struct host1x_syncpt_base *base)
+{
+	if (base)
+		base->requested = false;
+}
+
 static struct host1x_syncpt *host1x_syncpt_alloc(struct host1x *host,
 						 struct device *dev,
 						 unsigned long flags)
@@ -44,6 +67,12 @@ static struct host1x_syncpt *host1x_syncpt_alloc(struct host1x *host,
 	if (i >= host->info->nb_pts)
 		return NULL;
 
+	if (flags & HOST1X_SYNCPT_HAS_BASE) {
+		sp->base = host1x_syncpt_base_request(host);
+		if (!sp->base)
+			return NULL;
+	}
+
 	name = kasprintf(GFP_KERNEL, "%02d-%s", sp->id,
 			dev ? dev_name(dev) : NULL);
 	if (!name)
@@ -307,20 +336,30 @@ int host1x_syncpt_patch_wait(struct host1x_syncpt *sp, void *patch_addr)
 
 int host1x_syncpt_init(struct host1x *host)
 {
+	struct host1x_syncpt_base *bases;
 	struct host1x_syncpt *syncpt;
 	int i;
 
 	syncpt = devm_kzalloc(host->dev, sizeof(*syncpt) * host->info->nb_pts,
-		GFP_KERNEL);
+			      GFP_KERNEL);
 	if (!syncpt)
 		return -ENOMEM;
 
-	for (i = 0; i < host->info->nb_pts; ++i) {
+	bases = devm_kzalloc(host->dev, sizeof(*bases) * host->info->nb_bases,
+			     GFP_KERNEL);
+	if (!bases)
+		return -ENOMEM;
+
+	for (i = 0; i < host->info->nb_pts; i++) {
 		syncpt[i].id = i;
 		syncpt[i].host = host;
 	}
 
+	for (i = 0; i < host->info->nb_bases; i++)
+		bases[i].id = i;
+
 	host->syncpt = syncpt;
+	host->bases = bases;
 
 	host1x_syncpt_restore(host);
 
@@ -344,7 +383,9 @@ void host1x_syncpt_free(struct host1x_syncpt *sp)
 	if (!sp)
 		return;
 
+	host1x_syncpt_base_free(sp->base);
 	kfree(sp->name);
+	sp->base = NULL;
 	sp->dev = NULL;
 	sp->name = NULL;
 	sp->client_managed = false;
@@ -398,3 +439,13 @@ struct host1x_syncpt *host1x_syncpt_get(struct host1x *host, u32 id)
 		return NULL;
 	return host->syncpt + id;
 }
+
+struct host1x_syncpt_base *host1x_syncpt_get_base(struct host1x_syncpt *sp)
+{
+	return sp ? sp->base : NULL;
+}
+
+u32 host1x_syncpt_base_id(struct host1x_syncpt_base *base)
+{
+	return base->id;
+}
diff --git a/drivers/gpu/host1x/syncpt.h b/drivers/gpu/host1x/syncpt.h
index 4eb933a497fd..9056465ecd3f 100644
--- a/drivers/gpu/host1x/syncpt.h
+++ b/drivers/gpu/host1x/syncpt.h
@@ -31,6 +31,11 @@ struct host1x;
 /* Reserved for replacing an expired wait with a NOP */
 #define HOST1X_SYNCPT_RESERVED			0
 
+struct host1x_syncpt_base {
+	unsigned int id;
+	bool requested;
+};
+
 struct host1x_syncpt {
 	int id;
 	atomic_t min_val;
@@ -40,6 +45,7 @@ struct host1x_syncpt {
 	bool client_managed;
 	struct host1x *host;
 	struct device *dev;
+	struct host1x_syncpt_base *base;
 
 	/* interrupt data */
 	struct host1x_syncpt_intr intr;
diff --git a/include/linux/host1x.h b/include/linux/host1x.h
index eb713dbbae29..f5b9b87ac9a9 100644
--- a/include/linux/host1x.h
+++ b/include/linux/host1x.h
@@ -125,7 +125,9 @@ static inline void host1x_bo_kunmap(struct host1x_bo *bo,
  */
 
 #define HOST1X_SYNCPT_CLIENT_MANAGED	(1 << 0)
+#define HOST1X_SYNCPT_HAS_BASE		(1 << 1)
 
+struct host1x_syncpt_base;
 struct host1x_syncpt;
 struct host1x;
 
@@ -140,6 +142,9 @@ struct host1x_syncpt *host1x_syncpt_request(struct device *dev,
 					    unsigned long flags);
 void host1x_syncpt_free(struct host1x_syncpt *sp);
 
+struct host1x_syncpt_base *host1x_syncpt_get_base(struct host1x_syncpt *sp);
+u32 host1x_syncpt_base_id(struct host1x_syncpt_base *base);
+
 /*
  * host1x channel
  */
-- 
cgit v1.2.3


From 446b2a9380b64b9d7410d86ee8226031e03645cf Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Thu, 27 Jun 2013 10:25:33 +0100
Subject: DMA-API: amba: get rid of separate dma_mask

AMBA Primecell devices always treat streaming and coherent DMA exactly
the same, so there's no point in having the masks separated.

Acked-by: Grant Likely <grant.likely@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 drivers/amba/bus.c       | 6 +-----
 drivers/of/platform.c    | 3 ---
 include/linux/amba/bus.h | 2 --
 3 files changed, 1 insertion(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c
index c6707278a6bb..c4876ac9151a 100644
--- a/drivers/amba/bus.c
+++ b/drivers/amba/bus.c
@@ -552,7 +552,6 @@ amba_aphb_device_add(struct device *parent, const char *name,
 	if (!dev)
 		return ERR_PTR(-ENOMEM);
 
-	dev->dma_mask = dma_mask;
 	dev->dev.coherent_dma_mask = dma_mask;
 	dev->irq[0] = irq1;
 	dev->irq[1] = irq2;
@@ -619,7 +618,7 @@ static void amba_device_initialize(struct amba_device *dev, const char *name)
 		dev_set_name(&dev->dev, "%s", name);
 	dev->dev.release = amba_device_release;
 	dev->dev.bus = &amba_bustype;
-	dev->dev.dma_mask = &dev->dma_mask;
+	dev->dev.dma_mask = &dev->dev.coherent_dma_mask;
 	dev->res.name = dev_name(&dev->dev);
 }
 
@@ -663,9 +662,6 @@ int amba_device_register(struct amba_device *dev, struct resource *parent)
 	amba_device_initialize(dev, dev->dev.init_name);
 	dev->dev.init_name = NULL;
 
-	if (!dev->dev.coherent_dma_mask && dev->dma_mask)
-		dev_warn(&dev->dev, "coherent dma mask is unset\n");
-
 	return amba_device_add(dev, parent);
 }
 
diff --git a/drivers/of/platform.c b/drivers/of/platform.c
index 9b439ac63d8e..54382ba4a6e9 100644
--- a/drivers/of/platform.c
+++ b/drivers/of/platform.c
@@ -284,9 +284,6 @@ static struct amba_device *of_amba_device_create(struct device_node *node,
 	else
 		of_device_make_bus_id(&dev->dev);
 
-	/* setup amba-specific device info */
-	dev->dma_mask = ~0;
-
 	/* Allow the HW Peripheral ID to be overridden */
 	prop = of_get_property(node, "arm,primecell-periphid", NULL);
 	if (prop)
diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h
index 43ec7e247a80..682df0e1954a 100644
--- a/include/linux/amba/bus.h
+++ b/include/linux/amba/bus.h
@@ -30,7 +30,6 @@ struct amba_device {
 	struct device		dev;
 	struct resource		res;
 	struct clk		*pclk;
-	u64			dma_mask;
 	unsigned int		periphid;
 	unsigned int		irq[AMBA_NR_IRQS];
 };
@@ -131,7 +130,6 @@ struct amba_device name##_device = {				\
 struct amba_device name##_device = {				\
 	.dev = __AMBA_DEV(busid, data, ~0ULL),			\
 	.res = DEFINE_RES_MEM(base, SZ_4K),			\
-	.dma_mask = ~0ULL,					\
 	.irq = irqs,						\
 	.periphid = id,						\
 }
-- 
cgit v1.2.3


From fa6a8d6d65b19ab44e5244ea499bcd553cc72343 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Thu, 27 Jun 2013 12:21:45 +0100
Subject: DMA-API: provide a helper to setup DMA masks

Many drivers contain code such as:

	dev->dma_mask = &dev->coherent_dma_mask;
	dev->coherent_dma_mask = MASK;

Let's move this pattern out of drivers and have the DMA API provide a
helper for it.  This helper uses dma_set_mask_and_coherent() to allow
platform issues to be properly dealt with via dma_set_mask()/
dma_is_supported().

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 include/linux/dma-mapping.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index ec951f98e3d9..27d1421ad42c 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -111,6 +111,16 @@ static inline int dma_set_mask_and_coherent(struct device *dev, u64 mask)
 	return rc;
 }
 
+/*
+ * Similar to the above, except it deals with the case where the device
+ * does not have dev->dma_mask appropriately setup.
+ */
+static inline int dma_coerce_mask_and_coherent(struct device *dev, u64 mask)
+{
+	dev->dma_mask = &dev->coherent_dma_mask;
+	return dma_set_mask_and_coherent(dev, mask);
+}
+
 extern u64 dma_get_required_mask(struct device *dev);
 
 static inline unsigned int dma_get_max_seg_size(struct device *dev)
-- 
cgit v1.2.3


From 00c8f1623658947a97345ecb86b71232ff540d0d Mon Sep 17 00:00:00 2001
From: Santosh Shilimkar <santosh.shilimkar@ti.com>
Date: Mon, 29 Jul 2013 14:18:48 +0100
Subject: ARM: 7795/1: mm: dma-mapping: Add dma_max_pfn(dev) helper function

Most of the kernel assumes that PFN0 is the start of the physical
memory (RAM). This assumptions is not true on most of the ARM SOCs
and hence and if one try to update the ARM port to follow the assumptions,
we end of breaking the dma bounce limit for few block layer drivers.
One such example is trying to unify the meaning of max*_pfn on ARM
as the bootmem layer expects, breaks few block layer driver dma
bounce limit.

To fix this problem, we introduce dma_max_pfn(dev) generic helper with
a possibility of override from the architecture code. The helper converts
a DMA bitmask of bits to a block PFN number. In all the generic cases,
it is just  "dev->dma_mask >> PAGE_SHIFT" and hence default behavior
is maintained as is.

Subsequent patches will make use of the helper. No functional change.

Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 include/linux/dma-mapping.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 27d1421ad42c..fd4aee29ad10 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -153,6 +153,13 @@ static inline int dma_set_seg_boundary(struct device *dev, unsigned long mask)
 		return -EIO;
 }
 
+#ifndef dma_max_pfn
+static inline unsigned long dma_max_pfn(struct device *dev)
+{
+	return *dev->dma_mask >> PAGE_SHIFT;
+}
+#endif
+
 static inline void *dma_zalloc_coherent(struct device *dev, size_t size,
 					dma_addr_t *dma_handle, gfp_t flag)
 {
-- 
cgit v1.2.3


From 81e87e26796782e014fd1f2bb9cd8fb6ce4021a8 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Wed, 30 Oct 2013 21:43:01 +0200
Subject: kvm_host: typo fix

fix up typo in comment.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 92aae88756db..9bb1048d010e 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -804,7 +804,7 @@ static inline void kvm_guest_enter(void)
 
 	/* KVM does not hold any references to rcu protected data when it
 	 * switches CPU into a guest mode. In fact switching to a guest mode
-	 * is very similar to exiting to userspase from rcu point of view. In
+	 * is very similar to exiting to userspace from rcu point of view. In
 	 * addition CPU may stay in a guest mode for quite a long time (up to
 	 * one time slice). Lets treat guest mode as quiescent state, just like
 	 * we do with user-mode execution.
-- 
cgit v1.2.3


From 7d716456a0ee4e9bd63be9234f886d20382ac950 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Thu, 31 Oct 2013 12:48:14 +0100
Subject: sched/wait: Fix __wait_event_interruptible_lock_irq_timeout()

__wait_event_interruptible_lock_irq_timeout() needs the timeout
parameter passed instead of "ret".

This magically compiled since the only user has a local ret
variable. Luckily we got a build warning:

  CC      drivers/s390/scsi/zfcp_qdio.o
  drivers/s390/scsi/zfcp_qdio.c: In function 'zfcp_qdio_sbal_get':
  include/linux/wait.h:780:15: warning: 'ret' may be used uninitialized

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20131031114814.GB5551@osiris
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/wait.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/wait.h b/include/linux/wait.h
index 3b23afa04d6b..61939ba30aa0 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -739,7 +739,7 @@ do {									\
 #define __wait_event_interruptible_lock_irq_timeout(wq, condition,	\
 						    lock, timeout)	\
 	___wait_event(wq, ___wait_cond_timeout(condition),		\
-		      TASK_INTERRUPTIBLE, 0, ret,			\
+		      TASK_INTERRUPTIBLE, 0, timeout,			\
 		      spin_unlock_irq(&lock);				\
 		      __ret = schedule_timeout(__ret);			\
 		      spin_lock_irq(&lock));
-- 
cgit v1.2.3


From f6701d5f73c5c2f4ca37634514631fb056291f89 Mon Sep 17 00:00:00 2001
From: Bjørn Mork <bjorn@mork.no>
Date: Fri, 1 Nov 2013 11:16:39 +0100
Subject: net: cdc_ncm: add include protection to cdc_ncm.h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This makes it a lot easier to test modified versions

Cc: Alexey Orishko <alexey.orishko@gmail.com>
Signed-off-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/usb/cdc_ncm.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h
index cc25b70af33c..89f0bbc2cf83 100644
--- a/include/linux/usb/cdc_ncm.h
+++ b/include/linux/usb/cdc_ncm.h
@@ -36,6 +36,9 @@
  * SUCH DAMAGE.
  */
 
+#ifndef __LINUX_USB_CDC_NCM_H
+#define __LINUX_USB_CDC_NCM_H
+
 #define CDC_NCM_COMM_ALTSETTING_NCM		0
 #define CDC_NCM_COMM_ALTSETTING_MBIM		1
 
@@ -133,3 +136,5 @@ extern void cdc_ncm_unbind(struct usbnet *dev, struct usb_interface *intf);
 extern struct sk_buff *cdc_ncm_fill_tx_frame(struct cdc_ncm_ctx *ctx, struct sk_buff *skb, __le32 sign);
 extern int cdc_ncm_rx_verify_nth16(struct cdc_ncm_ctx *ctx, struct sk_buff *skb_in);
 extern int cdc_ncm_rx_verify_ndp16(struct sk_buff *skb_in, int ndpoffset);
+
+#endif /* __LINUX_USB_CDC_NCM_H */
-- 
cgit v1.2.3


From 3e515665a76ad8f60a1c05968cc6a5b2f2701171 Mon Sep 17 00:00:00 2001
From: Bjørn Mork <bjorn@mork.no>
Date: Fri, 1 Nov 2013 11:16:40 +0100
Subject: net: cdc_ncm: remove redundant "intf" field
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This is always a duplicate of the "control" field. It causes
confusion wrt intf_data updates and cleanups.

Cc: Alexey Orishko <alexey.orishko@gmail.com>
Signed-off-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/cdc_ncm.c   | 4 +---
 include/linux/usb/cdc_ncm.h | 1 -
 2 files changed, 1 insertion(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index 38566bffca98..bfab8796730f 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -382,7 +382,6 @@ int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_
 	len = intf->cur_altsetting->extralen;
 
 	ctx->udev = dev->udev;
-	ctx->intf = intf;
 
 	/* parse through descriptors associated with control interface */
 	while ((len > 0) && (buf[0] > 2) && (buf[0] <= len)) {
@@ -489,7 +488,6 @@ advance:
 
 	usb_set_intfdata(ctx->data, dev);
 	usb_set_intfdata(ctx->control, dev);
-	usb_set_intfdata(ctx->intf, dev);
 
 	if (ctx->ether_desc) {
 		temp = usbnet_get_ethernet_addr(dev, ctx->ether_desc->iMACAddress);
@@ -562,7 +560,7 @@ void cdc_ncm_unbind(struct usbnet *dev, struct usb_interface *intf)
 		ctx->control = NULL;
 	}
 
-	usb_set_intfdata(ctx->intf, NULL);
+	usb_set_intfdata(intf, NULL);
 	cdc_ncm_free(ctx);
 }
 EXPORT_SYMBOL_GPL(cdc_ncm_unbind);
diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h
index 89f0bbc2cf83..c14e00fb1667 100644
--- a/include/linux/usb/cdc_ncm.h
+++ b/include/linux/usb/cdc_ncm.h
@@ -103,7 +103,6 @@ struct cdc_ncm_ctx {
 	struct usb_host_endpoint *in_ep;
 	struct usb_host_endpoint *out_ep;
 	struct usb_host_endpoint *status_ep;
-	struct usb_interface *intf;
 	struct usb_interface *control;
 	struct usb_interface *data;
 
-- 
cgit v1.2.3


From ff1632aa8581b7103ac2af1ea3cb4a415eb9d6ad Mon Sep 17 00:00:00 2001
From: Bjørn Mork <bjorn@mork.no>
Date: Fri, 1 Nov 2013 11:16:41 +0100
Subject: net: cdc_ncm: remove redundant endpoint pointers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

No need to duplicate stuff already in the common usbnet
struct.  We still need to keep our special find_endpoints
function because we need explicit control over the selected
altsetting.

Cc: Alexey Orishko <alexey.orishko@gmail.com>
Signed-off-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/cdc_ncm.c   | 38 +++++++++++++++++++-------------------
 include/linux/usb/cdc_ncm.h |  3 ---
 2 files changed, 19 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index bfab8796730f..a989bd5290a6 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -292,9 +292,9 @@ max_dgram_err:
 }
 
 static void
-cdc_ncm_find_endpoints(struct cdc_ncm_ctx *ctx, struct usb_interface *intf)
+cdc_ncm_find_endpoints(struct usbnet *dev, struct usb_interface *intf)
 {
-	struct usb_host_endpoint *e;
+	struct usb_host_endpoint *e, *in = NULL, *out = NULL;
 	u8 ep;
 
 	for (ep = 0; ep < intf->cur_altsetting->desc.bNumEndpoints; ep++) {
@@ -303,18 +303,18 @@ cdc_ncm_find_endpoints(struct cdc_ncm_ctx *ctx, struct usb_interface *intf)
 		switch (e->desc.bmAttributes & USB_ENDPOINT_XFERTYPE_MASK) {
 		case USB_ENDPOINT_XFER_INT:
 			if (usb_endpoint_dir_in(&e->desc)) {
-				if (ctx->status_ep == NULL)
-					ctx->status_ep = e;
+				if (!dev->status)
+					dev->status = e;
 			}
 			break;
 
 		case USB_ENDPOINT_XFER_BULK:
 			if (usb_endpoint_dir_in(&e->desc)) {
-				if (ctx->in_ep == NULL)
-					ctx->in_ep = e;
+				if (!in)
+					in = e;
 			} else {
-				if (ctx->out_ep == NULL)
-					ctx->out_ep = e;
+				if (!out)
+					out = e;
 			}
 			break;
 
@@ -322,6 +322,14 @@ cdc_ncm_find_endpoints(struct cdc_ncm_ctx *ctx, struct usb_interface *intf)
 			break;
 		}
 	}
+	if (in && !dev->in)
+		dev->in = usb_rcvbulkpipe(dev->udev,
+					  in->desc.bEndpointAddress &
+					  USB_ENDPOINT_NUMBER_MASK);
+	if (out && !dev->out)
+		dev->out = usb_sndbulkpipe(dev->udev,
+					   out->desc.bEndpointAddress &
+					   USB_ENDPOINT_NUMBER_MASK);
 }
 
 static void cdc_ncm_free(struct cdc_ncm_ctx *ctx)
@@ -477,11 +485,9 @@ advance:
 	if (temp)
 		goto error2;
 
-	cdc_ncm_find_endpoints(ctx, ctx->data);
-	cdc_ncm_find_endpoints(ctx, ctx->control);
-
-	if ((ctx->in_ep == NULL) || (ctx->out_ep == NULL) ||
-	    (ctx->status_ep == NULL))
+	cdc_ncm_find_endpoints(dev, ctx->data);
+	cdc_ncm_find_endpoints(dev, ctx->control);
+	if (!dev->in || !dev->out || !dev->status)
 		goto error2;
 
 	dev->net->ethtool_ops = &cdc_ncm_ethtool_ops;
@@ -496,12 +502,6 @@ advance:
 		dev_info(&dev->udev->dev, "MAC-Address: %pM\n", dev->net->dev_addr);
 	}
 
-
-	dev->in = usb_rcvbulkpipe(dev->udev,
-		ctx->in_ep->desc.bEndpointAddress & USB_ENDPOINT_NUMBER_MASK);
-	dev->out = usb_sndbulkpipe(dev->udev,
-		ctx->out_ep->desc.bEndpointAddress & USB_ENDPOINT_NUMBER_MASK);
-	dev->status = ctx->status_ep;
 	dev->rx_urb_size = ctx->rx_max;
 
 	/* cdc_ncm_setup will override dwNtbOutMaxSize if it is
diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h
index c14e00fb1667..36e1e153ca2d 100644
--- a/include/linux/usb/cdc_ncm.h
+++ b/include/linux/usb/cdc_ncm.h
@@ -100,9 +100,6 @@ struct cdc_ncm_ctx {
 
 	struct net_device *netdev;
 	struct usb_device *udev;
-	struct usb_host_endpoint *in_ep;
-	struct usb_host_endpoint *out_ep;
-	struct usb_host_endpoint *status_ep;
 	struct usb_interface *control;
 	struct usb_interface *data;
 
-- 
cgit v1.2.3


From bed6f762123fc53c63efef386531dd877cba2468 Mon Sep 17 00:00:00 2001
From: Bjørn Mork <bjorn@mork.no>
Date: Fri, 1 Nov 2013 11:16:42 +0100
Subject: net: cdc_ncm: remove redundant netdev field
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Too many pointers back and forth are likely to confuse developers,
creating subtle bugs whenever we forget to syncronize them all.

As a usbnet driver, we should stick with the standard struct
usbnet fields as much as possible.  The netdevice is one such
field.

Cc: Greg Suarez <gsuarez@smithmicro.com>
Cc: Alexey Orishko <alexey.orishko@gmail.com>
Signed-off-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/cdc_mbim.c  |  2 +-
 drivers/net/usb/cdc_ncm.c   | 73 +++++++++++++++++++++++----------------------
 include/linux/usb/cdc_ncm.h |  3 +-
 3 files changed, 39 insertions(+), 39 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/usb/cdc_mbim.c b/drivers/net/usb/cdc_mbim.c
index 253472a8a983..af76aaf08b6b 100644
--- a/drivers/net/usb/cdc_mbim.c
+++ b/drivers/net/usb/cdc_mbim.c
@@ -175,7 +175,7 @@ static struct sk_buff *cdc_mbim_tx_fixup(struct usbnet *dev, struct sk_buff *skb
 	}
 
 	spin_lock_bh(&ctx->mtx);
-	skb_out = cdc_ncm_fill_tx_frame(ctx, skb, sign);
+	skb_out = cdc_ncm_fill_tx_frame(dev, skb, sign);
 	spin_unlock_bh(&ctx->mtx);
 	return skb_out;
 
diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index a989bd5290a6..e39e7678a798 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -80,8 +80,9 @@ cdc_ncm_get_drvinfo(struct net_device *net, struct ethtool_drvinfo *info)
 	usb_make_path(dev->udev, info->bus_info, sizeof(info->bus_info));
 }
 
-static u8 cdc_ncm_setup(struct cdc_ncm_ctx *ctx)
+static u8 cdc_ncm_setup(struct usbnet *dev)
 {
+	struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0];
 	u32 val;
 	u8 flags;
 	u8 iface_no;
@@ -90,7 +91,6 @@ static u8 cdc_ncm_setup(struct cdc_ncm_ctx *ctx)
 	u16 ntb_fmt_supported;
 	u32 min_dgram_size;
 	u32 min_hdr_size;
-	struct usbnet *dev = netdev_priv(ctx->netdev);
 
 	iface_no = ctx->control->cur_altsetting->desc.bInterfaceNumber;
 
@@ -285,8 +285,8 @@ static u8 cdc_ncm_setup(struct cdc_ncm_ctx *ctx)
 	}
 
 max_dgram_err:
-	if (ctx->netdev->mtu != (ctx->max_datagram_size - eth_hlen))
-		ctx->netdev->mtu = ctx->max_datagram_size - eth_hlen;
+	if (dev->net->mtu != (ctx->max_datagram_size - eth_hlen))
+		dev->net->mtu = ctx->max_datagram_size - eth_hlen;
 
 	return 0;
 }
@@ -375,11 +375,10 @@ int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_
 
 	hrtimer_init(&ctx->tx_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	ctx->tx_timer.function = &cdc_ncm_tx_timer_cb;
-	ctx->bh.data = (unsigned long)ctx;
+	ctx->bh.data = (unsigned long)dev;
 	ctx->bh.func = cdc_ncm_txpath_bh;
 	atomic_set(&ctx->stop, 0);
 	spin_lock_init(&ctx->mtx);
-	ctx->netdev = dev->net;
 
 	/* store ctx pointer in device data field */
 	dev->data[0] = (unsigned long)ctx;
@@ -477,7 +476,7 @@ advance:
 		goto error2;
 
 	/* initialize data interface */
-	if (cdc_ncm_setup(ctx))
+	if (cdc_ncm_setup(dev))
 		goto error2;
 
 	/* configure data interface */
@@ -669,9 +668,9 @@ static struct usb_cdc_ncm_ndp16 *cdc_ncm_ndp(struct cdc_ncm_ctx *ctx, struct sk_
 }
 
 struct sk_buff *
-cdc_ncm_fill_tx_frame(struct cdc_ncm_ctx *ctx, struct sk_buff *skb, __le32 sign)
+cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign)
 {
-	struct usbnet *dev = netdev_priv(ctx->netdev);
+	struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0];
 	struct usb_cdc_ncm_nth16 *nth16;
 	struct usb_cdc_ncm_ndp16 *ndp16;
 	struct sk_buff *skb_out;
@@ -695,7 +694,7 @@ cdc_ncm_fill_tx_frame(struct cdc_ncm_ctx *ctx, struct sk_buff *skb, __le32 sign)
 		if (skb_out == NULL) {
 			if (skb != NULL) {
 				dev_kfree_skb_any(skb);
-				ctx->netdev->stats.tx_dropped++;
+				dev->net->stats.tx_dropped++;
 			}
 			goto exit_no_skb;
 		}
@@ -733,12 +732,12 @@ cdc_ncm_fill_tx_frame(struct cdc_ncm_ctx *ctx, struct sk_buff *skb, __le32 sign)
 				/* won't fit, MTU problem? */
 				dev_kfree_skb_any(skb);
 				skb = NULL;
-				ctx->netdev->stats.tx_dropped++;
+				dev->net->stats.tx_dropped++;
 			} else {
 				/* no room for skb - store for later */
 				if (ctx->tx_rem_skb != NULL) {
 					dev_kfree_skb_any(ctx->tx_rem_skb);
-					ctx->netdev->stats.tx_dropped++;
+					dev->net->stats.tx_dropped++;
 				}
 				ctx->tx_rem_skb = skb;
 				ctx->tx_rem_sign = sign;
@@ -771,7 +770,7 @@ cdc_ncm_fill_tx_frame(struct cdc_ncm_ctx *ctx, struct sk_buff *skb, __le32 sign)
 	if (skb != NULL) {
 		dev_kfree_skb_any(skb);
 		skb = NULL;
-		ctx->netdev->stats.tx_dropped++;
+		dev->net->stats.tx_dropped++;
 	}
 
 	ctx->tx_curr_frame_num = n;
@@ -814,7 +813,7 @@ cdc_ncm_fill_tx_frame(struct cdc_ncm_ctx *ctx, struct sk_buff *skb, __le32 sign)
 
 	/* return skb */
 	ctx->tx_curr_skb = NULL;
-	ctx->netdev->stats.tx_packets += ctx->tx_curr_frame_num;
+	dev->net->stats.tx_packets += ctx->tx_curr_frame_num;
 	return skb_out;
 
 exit_no_skb:
@@ -846,18 +845,19 @@ static enum hrtimer_restart cdc_ncm_tx_timer_cb(struct hrtimer *timer)
 
 static void cdc_ncm_txpath_bh(unsigned long param)
 {
-	struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)param;
+	struct usbnet *dev = (struct usbnet *)param;
+	struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0];
 
 	spin_lock_bh(&ctx->mtx);
 	if (ctx->tx_timer_pending != 0) {
 		ctx->tx_timer_pending--;
 		cdc_ncm_tx_timeout_start(ctx);
 		spin_unlock_bh(&ctx->mtx);
-	} else if (ctx->netdev != NULL) {
+	} else if (dev->net != NULL) {
 		spin_unlock_bh(&ctx->mtx);
-		netif_tx_lock_bh(ctx->netdev);
-		usbnet_start_xmit(NULL, ctx->netdev);
-		netif_tx_unlock_bh(ctx->netdev);
+		netif_tx_lock_bh(dev->net);
+		usbnet_start_xmit(NULL, dev->net);
+		netif_tx_unlock_bh(dev->net);
 	} else {
 		spin_unlock_bh(&ctx->mtx);
 	}
@@ -880,7 +880,7 @@ cdc_ncm_tx_fixup(struct usbnet *dev, struct sk_buff *skb, gfp_t flags)
 		goto error;
 
 	spin_lock_bh(&ctx->mtx);
-	skb_out = cdc_ncm_fill_tx_frame(ctx, skb, cpu_to_le32(USB_CDC_NCM_NDP16_NOCRC_SIGN));
+	skb_out = cdc_ncm_fill_tx_frame(dev, skb, cpu_to_le32(USB_CDC_NCM_NDP16_NOCRC_SIGN));
 	spin_unlock_bh(&ctx->mtx);
 	return skb_out;
 
@@ -1047,9 +1047,10 @@ error:
 }
 
 static void
-cdc_ncm_speed_change(struct cdc_ncm_ctx *ctx,
+cdc_ncm_speed_change(struct usbnet *dev,
 		     struct usb_cdc_speed_change *data)
 {
+	struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0];
 	uint32_t rx_speed = le32_to_cpu(data->DLBitRRate);
 	uint32_t tx_speed = le32_to_cpu(data->ULBitRate);
 
@@ -1063,18 +1064,18 @@ cdc_ncm_speed_change(struct cdc_ncm_ctx *ctx,
 
 		if ((tx_speed > 1000000) && (rx_speed > 1000000)) {
 			printk(KERN_INFO KBUILD_MODNAME
-				": %s: %u mbit/s downlink "
-				"%u mbit/s uplink\n",
-				ctx->netdev->name,
-				(unsigned int)(rx_speed / 1000000U),
-				(unsigned int)(tx_speed / 1000000U));
+			       ": %s: %u mbit/s downlink "
+			       "%u mbit/s uplink\n",
+			       dev->net->name,
+			       (unsigned int)(rx_speed / 1000000U),
+			       (unsigned int)(tx_speed / 1000000U));
 		} else {
 			printk(KERN_INFO KBUILD_MODNAME
-				": %s: %u kbit/s downlink "
-				"%u kbit/s uplink\n",
-				ctx->netdev->name,
-				(unsigned int)(rx_speed / 1000U),
-				(unsigned int)(tx_speed / 1000U));
+			       ": %s: %u kbit/s downlink "
+			       "%u kbit/s uplink\n",
+			       dev->net->name,
+			       (unsigned int)(rx_speed / 1000U),
+			       (unsigned int)(tx_speed / 1000U));
 		}
 	}
 }
@@ -1091,7 +1092,7 @@ static void cdc_ncm_status(struct usbnet *dev, struct urb *urb)
 
 	/* test for split data in 8-byte chunks */
 	if (test_and_clear_bit(EVENT_STS_SPLIT, &dev->flags)) {
-		cdc_ncm_speed_change(ctx,
+		cdc_ncm_speed_change(dev,
 		      (struct usb_cdc_speed_change *)urb->transfer_buffer);
 		return;
 	}
@@ -1108,8 +1109,8 @@ static void cdc_ncm_status(struct usbnet *dev, struct urb *urb)
 		ctx->connected = le16_to_cpu(event->wValue);
 
 		printk(KERN_INFO KBUILD_MODNAME ": %s: network connection:"
-			" %sconnected\n",
-			ctx->netdev->name, ctx->connected ? "" : "dis");
+		       " %sconnected\n",
+		       dev->net->name, ctx->connected ? "" : "dis");
 
 		usbnet_link_change(dev, ctx->connected, 0);
 		if (!ctx->connected)
@@ -1121,8 +1122,8 @@ static void cdc_ncm_status(struct usbnet *dev, struct urb *urb)
 					sizeof(struct usb_cdc_speed_change)))
 			set_bit(EVENT_STS_SPLIT, &dev->flags);
 		else
-			cdc_ncm_speed_change(ctx,
-				(struct usb_cdc_speed_change *) &event[1]);
+			cdc_ncm_speed_change(dev,
+					     (struct usb_cdc_speed_change *)&event[1]);
 		break;
 
 	default:
diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h
index 36e1e153ca2d..5c47bd9620d5 100644
--- a/include/linux/usb/cdc_ncm.h
+++ b/include/linux/usb/cdc_ncm.h
@@ -98,7 +98,6 @@ struct cdc_ncm_ctx {
 	const struct usb_cdc_union_desc *union_desc;
 	const struct usb_cdc_ether_desc *ether_desc;
 
-	struct net_device *netdev;
 	struct usb_device *udev;
 	struct usb_interface *control;
 	struct usb_interface *data;
@@ -129,7 +128,7 @@ struct cdc_ncm_ctx {
 extern u8 cdc_ncm_select_altsetting(struct usbnet *dev, struct usb_interface *intf);
 extern int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_altsetting);
 extern void cdc_ncm_unbind(struct usbnet *dev, struct usb_interface *intf);
-extern struct sk_buff *cdc_ncm_fill_tx_frame(struct cdc_ncm_ctx *ctx, struct sk_buff *skb, __le32 sign);
+extern struct sk_buff *cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign);
 extern int cdc_ncm_rx_verify_nth16(struct cdc_ncm_ctx *ctx, struct sk_buff *skb_in);
 extern int cdc_ncm_rx_verify_ndp16(struct sk_buff *skb_in, int ndpoffset);
 
-- 
cgit v1.2.3


From de5bee2720776989060b9686e6a89e938a346345 Mon Sep 17 00:00:00 2001
From: Bjørn Mork <bjorn@mork.no>
Date: Fri, 1 Nov 2013 11:16:43 +0100
Subject: net: cdc_ncm: remove unused udev field
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We already use the usbnet udev field everywhere this could have
been used.

Cc: Alexey Orishko <alexey.orishko@gmail.com>
Signed-off-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/cdc_ncm.c   | 2 --
 include/linux/usb/cdc_ncm.h | 1 -
 2 files changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index e39e7678a798..9cdd762916dc 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -388,8 +388,6 @@ int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_
 	buf = intf->cur_altsetting->extra;
 	len = intf->cur_altsetting->extralen;
 
-	ctx->udev = dev->udev;
-
 	/* parse through descriptors associated with control interface */
 	while ((len > 0) && (buf[0] > 2) && (buf[0] <= len)) {
 
diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h
index 5c47bd9620d5..059dcc93c4d8 100644
--- a/include/linux/usb/cdc_ncm.h
+++ b/include/linux/usb/cdc_ncm.h
@@ -98,7 +98,6 @@ struct cdc_ncm_ctx {
 	const struct usb_cdc_union_desc *union_desc;
 	const struct usb_cdc_ether_desc *ether_desc;
 
-	struct usb_device *udev;
 	struct usb_interface *control;
 	struct usb_interface *data;
 
-- 
cgit v1.2.3


From f3028c524a7cd4d97b034fc1f35dcaecb5d6f9d6 Mon Sep 17 00:00:00 2001
From: Bjørn Mork <bjorn@mork.no>
Date: Fri, 1 Nov 2013 11:16:44 +0100
Subject: net: cdc_ncm: remove tx_speed and rx_speed fields
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

These fields are only used to prevent printing the same speeds
multiple times if we receive multiple identical speed notifications.

The value of these printk's is questionable, and even more so when
we filter out some of the notifications sent us by the firmware. If
we are going to print any of these, then we should print them all.

Removing little used fields is a bonus.

Cc: Alexey Orishko <alexey.orishko@gmail.com>
Signed-off-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/cdc_ncm.c   | 37 ++++++++++++++-----------------------
 include/linux/usb/cdc_ncm.h |  2 --
 2 files changed, 14 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index 9cdd762916dc..fc36a99f8183 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -510,7 +510,6 @@ advance:
 	    ctx->tx_max % usb_maxpacket(dev->udev, dev->out, 1) == 0)
 		ctx->tx_max++;
 
-	ctx->tx_speed = ctx->rx_speed = 0;
 	return 0;
 
 error2:
@@ -1048,7 +1047,6 @@ static void
 cdc_ncm_speed_change(struct usbnet *dev,
 		     struct usb_cdc_speed_change *data)
 {
-	struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0];
 	uint32_t rx_speed = le32_to_cpu(data->DLBitRRate);
 	uint32_t tx_speed = le32_to_cpu(data->ULBitRate);
 
@@ -1056,25 +1054,20 @@ cdc_ncm_speed_change(struct usbnet *dev,
 	 * Currently the USB-NET API does not support reporting the actual
 	 * device speed. Do print it instead.
 	 */
-	if ((tx_speed != ctx->tx_speed) || (rx_speed != ctx->rx_speed)) {
-		ctx->tx_speed = tx_speed;
-		ctx->rx_speed = rx_speed;
-
-		if ((tx_speed > 1000000) && (rx_speed > 1000000)) {
-			printk(KERN_INFO KBUILD_MODNAME
-			       ": %s: %u mbit/s downlink "
-			       "%u mbit/s uplink\n",
-			       dev->net->name,
-			       (unsigned int)(rx_speed / 1000000U),
-			       (unsigned int)(tx_speed / 1000000U));
-		} else {
-			printk(KERN_INFO KBUILD_MODNAME
-			       ": %s: %u kbit/s downlink "
-			       "%u kbit/s uplink\n",
-			       dev->net->name,
-			       (unsigned int)(rx_speed / 1000U),
-			       (unsigned int)(tx_speed / 1000U));
-		}
+	if ((tx_speed > 1000000) && (rx_speed > 1000000)) {
+		printk(KERN_INFO KBUILD_MODNAME
+		       ": %s: %u mbit/s downlink "
+		       "%u mbit/s uplink\n",
+		       dev->net->name,
+		       (unsigned int)(rx_speed / 1000000U),
+		       (unsigned int)(tx_speed / 1000000U));
+	} else {
+		printk(KERN_INFO KBUILD_MODNAME
+		       ": %s: %u kbit/s downlink "
+		       "%u kbit/s uplink\n",
+		       dev->net->name,
+		       (unsigned int)(rx_speed / 1000U),
+		       (unsigned int)(tx_speed / 1000U));
 	}
 }
 
@@ -1111,8 +1104,6 @@ static void cdc_ncm_status(struct usbnet *dev, struct urb *urb)
 		       dev->net->name, ctx->connected ? "" : "dis");
 
 		usbnet_link_change(dev, ctx->connected, 0);
-		if (!ctx->connected)
-			ctx->tx_speed = ctx->rx_speed = 0;
 		break;
 
 	case USB_CDC_NOTIFY_SPEED_CHANGE:
diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h
index 059dcc93c4d8..f14af3dd0cce 100644
--- a/include/linux/usb/cdc_ncm.h
+++ b/include/linux/usb/cdc_ncm.h
@@ -110,8 +110,6 @@ struct cdc_ncm_ctx {
 
 	u32 tx_timer_pending;
 	u32 tx_curr_frame_num;
-	u32 rx_speed;
-	u32 tx_speed;
 	u32 rx_max;
 	u32 tx_max;
 	u32 max_datagram_size;
-- 
cgit v1.2.3


From 6a9612e2cb22b3fd6a7304dcbf2b4ee1cf2104b2 Mon Sep 17 00:00:00 2001
From: Bjørn Mork <bjorn@mork.no>
Date: Fri, 1 Nov 2013 11:16:45 +0100
Subject: net: cdc_ncm: remove ncm_parm field
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Moving the call to cdc_ncm_setup() after the endpoint
setup removes the last remaining reference to ncm_parm
outside cdc_ncm_setup.

Collecting all the ncm_parm based calculations in
cdc_ncm_setup improves readability.

Cc: Alexey Orishko <alexey.orishko@gmail.com>
Signed-off-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/cdc_ncm.c   | 46 ++++++++++++++++++++++-----------------------
 include/linux/usb/cdc_ncm.h |  1 -
 2 files changed, 23 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index fc36a99f8183..4de3a5423e87 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -83,6 +83,7 @@ cdc_ncm_get_drvinfo(struct net_device *net, struct ethtool_drvinfo *info)
 static u8 cdc_ncm_setup(struct usbnet *dev)
 {
 	struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0];
+	struct usb_cdc_ncm_ntb_parameters ncm_parm;
 	u32 val;
 	u8 flags;
 	u8 iface_no;
@@ -97,22 +98,22 @@ static u8 cdc_ncm_setup(struct usbnet *dev)
 	err = usbnet_read_cmd(dev, USB_CDC_GET_NTB_PARAMETERS,
 			      USB_TYPE_CLASS | USB_DIR_IN
 			      |USB_RECIP_INTERFACE,
-			      0, iface_no, &ctx->ncm_parm,
-			      sizeof(ctx->ncm_parm));
+			      0, iface_no, &ncm_parm,
+			      sizeof(ncm_parm));
 	if (err < 0) {
 		pr_debug("failed GET_NTB_PARAMETERS\n");
 		return 1;
 	}
 
 	/* read correct set of parameters according to device mode */
-	ctx->rx_max = le32_to_cpu(ctx->ncm_parm.dwNtbInMaxSize);
-	ctx->tx_max = le32_to_cpu(ctx->ncm_parm.dwNtbOutMaxSize);
-	ctx->tx_remainder = le16_to_cpu(ctx->ncm_parm.wNdpOutPayloadRemainder);
-	ctx->tx_modulus = le16_to_cpu(ctx->ncm_parm.wNdpOutDivisor);
-	ctx->tx_ndp_modulus = le16_to_cpu(ctx->ncm_parm.wNdpOutAlignment);
+	ctx->rx_max = le32_to_cpu(ncm_parm.dwNtbInMaxSize);
+	ctx->tx_max = le32_to_cpu(ncm_parm.dwNtbOutMaxSize);
+	ctx->tx_remainder = le16_to_cpu(ncm_parm.wNdpOutPayloadRemainder);
+	ctx->tx_modulus = le16_to_cpu(ncm_parm.wNdpOutDivisor);
+	ctx->tx_ndp_modulus = le16_to_cpu(ncm_parm.wNdpOutAlignment);
 	/* devices prior to NCM Errata shall set this field to zero */
-	ctx->tx_max_datagrams = le16_to_cpu(ctx->ncm_parm.wNtbOutMaxDatagrams);
-	ntb_fmt_supported = le16_to_cpu(ctx->ncm_parm.bmNtbFormatsSupported);
+	ctx->tx_max_datagrams = le16_to_cpu(ncm_parm.wNtbOutMaxDatagrams);
+	ntb_fmt_supported = le16_to_cpu(ncm_parm.bmNtbFormatsSupported);
 
 	eth_hlen = ETH_HLEN;
 	min_dgram_size = CDC_NCM_MIN_DATAGRAM_SIZE;
@@ -153,7 +154,7 @@ static u8 cdc_ncm_setup(struct usbnet *dev)
 	}
 
 	/* inform device about NTB input size changes */
-	if (ctx->rx_max != le32_to_cpu(ctx->ncm_parm.dwNtbInMaxSize)) {
+	if (ctx->rx_max != le32_to_cpu(ncm_parm.dwNtbInMaxSize)) {
 		__le32 dwNtbInMaxSize = cpu_to_le32(ctx->rx_max);
 
 		err = usbnet_write_cmd(dev, USB_CDC_SET_NTB_INPUT_SIZE,
@@ -171,6 +172,14 @@ static u8 cdc_ncm_setup(struct usbnet *dev)
 		pr_debug("Using default maximum transmit length=%d\n",
 						CDC_NCM_NTB_MAX_SIZE_TX);
 		ctx->tx_max = CDC_NCM_NTB_MAX_SIZE_TX;
+
+		/* Adding a pad byte here simplifies the handling in
+		 * cdc_ncm_fill_tx_frame, by making tx_max always
+		 * represent the real skb max size.
+		 */
+		if (ctx->tx_max % usb_maxpacket(dev->udev, dev->out, 1) == 0)
+			ctx->tx_max++;
+
 	}
 
 	/*
@@ -473,10 +482,6 @@ advance:
 	if (temp)
 		goto error2;
 
-	/* initialize data interface */
-	if (cdc_ncm_setup(dev))
-		goto error2;
-
 	/* configure data interface */
 	temp = usb_set_interface(dev->udev, iface_no, data_altsetting);
 	if (temp)
@@ -487,6 +492,10 @@ advance:
 	if (!dev->in || !dev->out || !dev->status)
 		goto error2;
 
+	/* initialize data interface */
+	if (cdc_ncm_setup(dev))
+		goto error2;
+
 	dev->net->ethtool_ops = &cdc_ncm_ethtool_ops;
 
 	usb_set_intfdata(ctx->data, dev);
@@ -501,15 +510,6 @@ advance:
 
 	dev->rx_urb_size = ctx->rx_max;
 
-	/* cdc_ncm_setup will override dwNtbOutMaxSize if it is
-	 * outside the sane range. Adding a pad byte here if necessary
-	 * simplifies the handling in cdc_ncm_fill_tx_frame, making
-	 * tx_max always represent the real skb max size.
-	 */
-	if (ctx->tx_max != le32_to_cpu(ctx->ncm_parm.dwNtbOutMaxSize) &&
-	    ctx->tx_max % usb_maxpacket(dev->udev, dev->out, 1) == 0)
-		ctx->tx_max++;
-
 	return 0;
 
 error2:
diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h
index f14af3dd0cce..89b52a0fe4b9 100644
--- a/include/linux/usb/cdc_ncm.h
+++ b/include/linux/usb/cdc_ncm.h
@@ -88,7 +88,6 @@
 #define cdc_ncm_data_intf_is_mbim(x)  ((x)->desc.bInterfaceProtocol == USB_CDC_MBIM_PROTO_NTB)
 
 struct cdc_ncm_ctx {
-	struct usb_cdc_ncm_ntb_parameters ncm_parm;
 	struct hrtimer tx_timer;
 	struct tasklet_struct bh;
 
-- 
cgit v1.2.3


From 832922362e1308aaef95a43383577d56f51fbc3c Mon Sep 17 00:00:00 2001
From: Bjørn Mork <bjorn@mork.no>
Date: Fri, 1 Nov 2013 11:16:47 +0100
Subject: net: cdc_ncm: remove descriptor pointers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

header_desc was completely unused and union_desc was never used
outside cdc_ncm_bind_common.

Cc: Alexey Orishko <alexey.orishko@gmail.com>
Signed-off-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/cdc_ncm.c   | 12 ++++++------
 include/linux/usb/cdc_ncm.h |  4 +---
 2 files changed, 7 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index b8de8ddd6d98..435fcc75d706 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -372,6 +372,7 @@ static const struct ethtool_ops cdc_ncm_ethtool_ops = {
 
 int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_altsetting)
 {
+	const struct usb_cdc_union_desc *union_desc = NULL;
 	struct cdc_ncm_ctx *ctx;
 	struct usb_driver *driver;
 	u8 *buf;
@@ -406,16 +407,15 @@ int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_
 
 		switch (buf[2]) {
 		case USB_CDC_UNION_TYPE:
-			if (buf[0] < sizeof(*(ctx->union_desc)))
+			if (buf[0] < sizeof(*union_desc))
 				break;
 
-			ctx->union_desc =
-					(const struct usb_cdc_union_desc *)buf;
+			union_desc = (const struct usb_cdc_union_desc *)buf;
 
 			ctx->control = usb_ifnum_to_if(dev->udev,
-					ctx->union_desc->bMasterInterface0);
+						       union_desc->bMasterInterface0);
 			ctx->data = usb_ifnum_to_if(dev->udev,
-					ctx->union_desc->bSlaveInterface0);
+						    union_desc->bSlaveInterface0);
 			break;
 
 		case USB_CDC_ETHERNET_TYPE:
@@ -458,7 +458,7 @@ advance:
 	}
 
 	/* some buggy devices have an IAD but no CDC Union */
-	if (!ctx->union_desc && intf->intf_assoc && intf->intf_assoc->bInterfaceCount == 2) {
+	if (!union_desc && intf->intf_assoc && intf->intf_assoc->bInterfaceCount == 2) {
 		ctx->control = intf;
 		ctx->data = usb_ifnum_to_if(dev->udev, intf->cur_altsetting->desc.bInterfaceNumber + 1);
 		dev_dbg(&intf->dev, "CDC Union missing - got slave from IAD\n");
diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h
index 89b52a0fe4b9..cad54ad4ad12 100644
--- a/include/linux/usb/cdc_ncm.h
+++ b/include/linux/usb/cdc_ncm.h
@@ -92,9 +92,7 @@ struct cdc_ncm_ctx {
 	struct tasklet_struct bh;
 
 	const struct usb_cdc_ncm_desc *func_desc;
-	const struct usb_cdc_mbim_desc   *mbim_desc;
-	const struct usb_cdc_header_desc *header_desc;
-	const struct usb_cdc_union_desc *union_desc;
+	const struct usb_cdc_mbim_desc *mbim_desc;
 	const struct usb_cdc_ether_desc *ether_desc;
 
 	struct usb_interface *control;
-- 
cgit v1.2.3


From 6dd13e83ce37f716e36085cb8b58779da1e98f6d Mon Sep 17 00:00:00 2001
From: Bjørn Mork <bjorn@mork.no>
Date: Fri, 1 Nov 2013 11:16:57 +0100
Subject: net: cdc_ncm: drop "extern" from header declarations
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Cc: Alexey Orishko <alexey.orishko@gmail.com>
Signed-off-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/usb/cdc_ncm.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h
index cad54ad4ad12..2300f7492927 100644
--- a/include/linux/usb/cdc_ncm.h
+++ b/include/linux/usb/cdc_ncm.h
@@ -119,11 +119,11 @@ struct cdc_ncm_ctx {
 	u16 connected;
 };
 
-extern u8 cdc_ncm_select_altsetting(struct usbnet *dev, struct usb_interface *intf);
-extern int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_altsetting);
-extern void cdc_ncm_unbind(struct usbnet *dev, struct usb_interface *intf);
-extern struct sk_buff *cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign);
-extern int cdc_ncm_rx_verify_nth16(struct cdc_ncm_ctx *ctx, struct sk_buff *skb_in);
-extern int cdc_ncm_rx_verify_ndp16(struct sk_buff *skb_in, int ndpoffset);
+u8 cdc_ncm_select_altsetting(struct usbnet *dev, struct usb_interface *intf);
+int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_altsetting);
+void cdc_ncm_unbind(struct usbnet *dev, struct usb_interface *intf);
+struct sk_buff *cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign);
+int cdc_ncm_rx_verify_nth16(struct cdc_ncm_ctx *ctx, struct sk_buff *skb_in);
+int cdc_ncm_rx_verify_ndp16(struct sk_buff *skb_in, int ndpoffset);
 
 #endif /* __LINUX_USB_CDC_NCM_H */
-- 
cgit v1.2.3


From ec7f68e07bf10198717b7824c78201b46bbf1956 Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Wed, 30 Oct 2013 22:48:00 +0000
Subject: iio: hid_Sensors: fix crash during trigger unregister

We can't store the trigger instance created by iio_trigger_alloc, in
trig field of iio_device structure. This needs to be stored in the
driver private data. Othewise it can result in crash during module
unload. Hence created a trig_ptr in the common data structure
for each HID sensor IIO driver and storing here.

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/accel/hid-sensor-accel-3d.c             | 5 +++--
 drivers/iio/common/hid-sensors/hid-sensor-trigger.c | 9 ++++-----
 drivers/iio/common/hid-sensors/hid-sensor-trigger.h | 2 +-
 drivers/iio/gyro/hid-sensor-gyro-3d.c               | 5 +++--
 drivers/iio/light/hid-sensor-als.c                  | 5 +++--
 drivers/iio/magnetometer/hid-sensor-magn-3d.c       | 5 +++--
 include/linux/hid-sensor-hub.h                      | 3 +++
 7 files changed, 20 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iio/accel/hid-sensor-accel-3d.c b/drivers/iio/accel/hid-sensor-accel-3d.c
index dcda17395c4e..1cae4e920c9b 100644
--- a/drivers/iio/accel/hid-sensor-accel-3d.c
+++ b/drivers/iio/accel/hid-sensor-accel-3d.c
@@ -350,7 +350,7 @@ static int hid_accel_3d_probe(struct platform_device *pdev)
 error_iio_unreg:
 	iio_device_unregister(indio_dev);
 error_remove_trigger:
-	hid_sensor_remove_trigger(indio_dev);
+	hid_sensor_remove_trigger(&accel_state->common_attributes);
 error_unreg_buffer_funcs:
 	iio_triggered_buffer_cleanup(indio_dev);
 error_free_dev_mem:
@@ -363,10 +363,11 @@ static int hid_accel_3d_remove(struct platform_device *pdev)
 {
 	struct hid_sensor_hub_device *hsdev = pdev->dev.platform_data;
 	struct iio_dev *indio_dev = platform_get_drvdata(pdev);
+	struct accel_3d_state *accel_state = iio_priv(indio_dev);
 
 	sensor_hub_remove_callback(hsdev, HID_USAGE_SENSOR_ACCEL_3D);
 	iio_device_unregister(indio_dev);
-	hid_sensor_remove_trigger(indio_dev);
+	hid_sensor_remove_trigger(&accel_state->common_attributes);
 	iio_triggered_buffer_cleanup(indio_dev);
 	kfree(indio_dev->channels);
 
diff --git a/drivers/iio/common/hid-sensors/hid-sensor-trigger.c b/drivers/iio/common/hid-sensors/hid-sensor-trigger.c
index b6e77e0fc420..bbd6426c9726 100644
--- a/drivers/iio/common/hid-sensors/hid-sensor-trigger.c
+++ b/drivers/iio/common/hid-sensors/hid-sensor-trigger.c
@@ -55,11 +55,10 @@ static int hid_sensor_data_rdy_trigger_set_state(struct iio_trigger *trig,
 	return 0;
 }
 
-void hid_sensor_remove_trigger(struct iio_dev *indio_dev)
+void hid_sensor_remove_trigger(struct hid_sensor_common *attrb)
 {
-	iio_trigger_unregister(indio_dev->trig);
-	iio_trigger_free(indio_dev->trig);
-	indio_dev->trig = NULL;
+	iio_trigger_unregister(attrb->trigger);
+	iio_trigger_free(attrb->trigger);
 }
 EXPORT_SYMBOL(hid_sensor_remove_trigger);
 
@@ -90,7 +89,7 @@ int hid_sensor_setup_trigger(struct iio_dev *indio_dev, const char *name,
 		dev_err(&indio_dev->dev, "Trigger Register Failed\n");
 		goto error_free_trig;
 	}
-	indio_dev->trig = trig;
+	indio_dev->trig = attrb->trigger = trig;
 
 	return ret;
 
diff --git a/drivers/iio/common/hid-sensors/hid-sensor-trigger.h b/drivers/iio/common/hid-sensors/hid-sensor-trigger.h
index 9a8731478eda..ca02f7811aa8 100644
--- a/drivers/iio/common/hid-sensors/hid-sensor-trigger.h
+++ b/drivers/iio/common/hid-sensors/hid-sensor-trigger.h
@@ -21,6 +21,6 @@
 
 int hid_sensor_setup_trigger(struct iio_dev *indio_dev, const char *name,
 				struct hid_sensor_common *attrb);
-void hid_sensor_remove_trigger(struct iio_dev *indio_dev);
+void hid_sensor_remove_trigger(struct hid_sensor_common *attrb);
 
 #endif
diff --git a/drivers/iio/gyro/hid-sensor-gyro-3d.c b/drivers/iio/gyro/hid-sensor-gyro-3d.c
index ea01c6bcfb56..e54f0f4959d3 100644
--- a/drivers/iio/gyro/hid-sensor-gyro-3d.c
+++ b/drivers/iio/gyro/hid-sensor-gyro-3d.c
@@ -348,7 +348,7 @@ static int hid_gyro_3d_probe(struct platform_device *pdev)
 error_iio_unreg:
 	iio_device_unregister(indio_dev);
 error_remove_trigger:
-	hid_sensor_remove_trigger(indio_dev);
+	hid_sensor_remove_trigger(&gyro_state->common_attributes);
 error_unreg_buffer_funcs:
 	iio_triggered_buffer_cleanup(indio_dev);
 error_free_dev_mem:
@@ -361,10 +361,11 @@ static int hid_gyro_3d_remove(struct platform_device *pdev)
 {
 	struct hid_sensor_hub_device *hsdev = pdev->dev.platform_data;
 	struct iio_dev *indio_dev = platform_get_drvdata(pdev);
+	struct gyro_3d_state *gyro_state = iio_priv(indio_dev);
 
 	sensor_hub_remove_callback(hsdev, HID_USAGE_SENSOR_GYRO_3D);
 	iio_device_unregister(indio_dev);
-	hid_sensor_remove_trigger(indio_dev);
+	hid_sensor_remove_trigger(&gyro_state->common_attributes);
 	iio_triggered_buffer_cleanup(indio_dev);
 	kfree(indio_dev->channels);
 
diff --git a/drivers/iio/light/hid-sensor-als.c b/drivers/iio/light/hid-sensor-als.c
index fa6ae8cf89ea..8e8b9d722853 100644
--- a/drivers/iio/light/hid-sensor-als.c
+++ b/drivers/iio/light/hid-sensor-als.c
@@ -314,7 +314,7 @@ static int hid_als_probe(struct platform_device *pdev)
 error_iio_unreg:
 	iio_device_unregister(indio_dev);
 error_remove_trigger:
-	hid_sensor_remove_trigger(indio_dev);
+	hid_sensor_remove_trigger(&als_state->common_attributes);
 error_unreg_buffer_funcs:
 	iio_triggered_buffer_cleanup(indio_dev);
 error_free_dev_mem:
@@ -327,10 +327,11 @@ static int hid_als_remove(struct platform_device *pdev)
 {
 	struct hid_sensor_hub_device *hsdev = pdev->dev.platform_data;
 	struct iio_dev *indio_dev = platform_get_drvdata(pdev);
+	struct als_state *als_state = iio_priv(indio_dev);
 
 	sensor_hub_remove_callback(hsdev, HID_USAGE_SENSOR_ALS);
 	iio_device_unregister(indio_dev);
-	hid_sensor_remove_trigger(indio_dev);
+	hid_sensor_remove_trigger(&als_state->common_attributes);
 	iio_triggered_buffer_cleanup(indio_dev);
 	kfree(indio_dev->channels);
 
diff --git a/drivers/iio/magnetometer/hid-sensor-magn-3d.c b/drivers/iio/magnetometer/hid-sensor-magn-3d.c
index 2634920562fb..b26e1028a0a0 100644
--- a/drivers/iio/magnetometer/hid-sensor-magn-3d.c
+++ b/drivers/iio/magnetometer/hid-sensor-magn-3d.c
@@ -351,7 +351,7 @@ static int hid_magn_3d_probe(struct platform_device *pdev)
 error_iio_unreg:
 	iio_device_unregister(indio_dev);
 error_remove_trigger:
-	hid_sensor_remove_trigger(indio_dev);
+	hid_sensor_remove_trigger(&magn_state->common_attributes);
 error_unreg_buffer_funcs:
 	iio_triggered_buffer_cleanup(indio_dev);
 error_free_dev_mem:
@@ -364,10 +364,11 @@ static int hid_magn_3d_remove(struct platform_device *pdev)
 {
 	struct hid_sensor_hub_device *hsdev = pdev->dev.platform_data;
 	struct iio_dev *indio_dev = platform_get_drvdata(pdev);
+	struct magn_3d_state *magn_state = iio_priv(indio_dev);
 
 	sensor_hub_remove_callback(hsdev, HID_USAGE_SENSOR_COMPASS_3D);
 	iio_device_unregister(indio_dev);
-	hid_sensor_remove_trigger(indio_dev);
+	hid_sensor_remove_trigger(&magn_state->common_attributes);
 	iio_triggered_buffer_cleanup(indio_dev);
 	kfree(indio_dev->channels);
 
diff --git a/include/linux/hid-sensor-hub.h b/include/linux/hid-sensor-hub.h
index a265af294ea4..206a2af6b62b 100644
--- a/include/linux/hid-sensor-hub.h
+++ b/include/linux/hid-sensor-hub.h
@@ -21,6 +21,8 @@
 
 #include <linux/hid.h>
 #include <linux/hid-sensor-ids.h>
+#include <linux/iio/iio.h>
+#include <linux/iio/trigger.h>
 
 /**
  * struct hid_sensor_hub_attribute_info - Attribute info
@@ -184,6 +186,7 @@ struct hid_sensor_common {
 	struct platform_device *pdev;
 	unsigned usage_id;
 	bool data_ready;
+	struct iio_trigger *trigger;
 	struct hid_sensor_hub_attribute_info poll;
 	struct hid_sensor_hub_attribute_info report_state;
 	struct hid_sensor_hub_attribute_info power_state;
-- 
cgit v1.2.3


From 9bf76ca325d5e9208eb343f7bd4cc666f703ed30 Mon Sep 17 00:00:00 2001
From: Mathias Krause <minipli@googlemail.com>
Date: Sun, 3 Nov 2013 12:36:28 +0100
Subject: ipc, msg: forbid negative values for "msg{max,mnb,mni}"

Negative message lengths make no sense -- so don't do negative queue
lenghts or identifier counts. Prevent them from getting negative.

Also change the underlying data types to be unsigned to avoid hairy
surprises with sign extensions in cases where those variables get
evaluated in unsigned expressions with bigger data types, e.g size_t.

In case a user still wants to have "unlimited" sizes she could just use
INT_MAX instead.

Signed-off-by: Mathias Krause <minipli@googlemail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ipc_namespace.h |  6 +++---
 ipc/ipc_sysctl.c              | 20 ++++++++++++--------
 2 files changed, 15 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index 19c19a5eee29..f6c82de12541 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -34,9 +34,9 @@ struct ipc_namespace {
 	int		sem_ctls[4];
 	int		used_sems;
 
-	int		msg_ctlmax;
-	int		msg_ctlmnb;
-	int		msg_ctlmni;
+	unsigned int	msg_ctlmax;
+	unsigned int	msg_ctlmnb;
+	unsigned int	msg_ctlmni;
 	atomic_t	msg_bytes;
 	atomic_t	msg_hdrs;
 	int		auto_msgmni;
diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c
index 130dfece27ac..b0e99deb6d05 100644
--- a/ipc/ipc_sysctl.c
+++ b/ipc/ipc_sysctl.c
@@ -62,7 +62,7 @@ static int proc_ipc_dointvec_minmax_orphans(ctl_table *table, int write,
 	return err;
 }
 
-static int proc_ipc_callback_dointvec(ctl_table *table, int write,
+static int proc_ipc_callback_dointvec_minmax(ctl_table *table, int write,
 	void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	struct ctl_table ipc_table;
@@ -72,7 +72,7 @@ static int proc_ipc_callback_dointvec(ctl_table *table, int write,
 	memcpy(&ipc_table, table, sizeof(ipc_table));
 	ipc_table.data = get_ipc(table);
 
-	rc = proc_dointvec(&ipc_table, write, buffer, lenp, ppos);
+	rc = proc_dointvec_minmax(&ipc_table, write, buffer, lenp, ppos);
 
 	if (write && !rc && lenp_bef == *lenp)
 		/*
@@ -152,15 +152,13 @@ static int proc_ipcauto_dointvec_minmax(ctl_table *table, int write,
 #define proc_ipc_dointvec	   NULL
 #define proc_ipc_dointvec_minmax   NULL
 #define proc_ipc_dointvec_minmax_orphans   NULL
-#define proc_ipc_callback_dointvec NULL
+#define proc_ipc_callback_dointvec_minmax  NULL
 #define proc_ipcauto_dointvec_minmax NULL
 #endif
 
 static int zero;
 static int one = 1;
-#ifdef CONFIG_CHECKPOINT_RESTORE
 static int int_max = INT_MAX;
-#endif
 
 static struct ctl_table ipc_kern_table[] = {
 	{
@@ -198,21 +196,27 @@ static struct ctl_table ipc_kern_table[] = {
 		.data		= &init_ipc_ns.msg_ctlmax,
 		.maxlen		= sizeof (init_ipc_ns.msg_ctlmax),
 		.mode		= 0644,
-		.proc_handler	= proc_ipc_dointvec,
+		.proc_handler	= proc_ipc_dointvec_minmax,
+		.extra1		= &zero,
+		.extra2		= &int_max,
 	},
 	{
 		.procname	= "msgmni",
 		.data		= &init_ipc_ns.msg_ctlmni,
 		.maxlen		= sizeof (init_ipc_ns.msg_ctlmni),
 		.mode		= 0644,
-		.proc_handler	= proc_ipc_callback_dointvec,
+		.proc_handler	= proc_ipc_callback_dointvec_minmax,
+		.extra1		= &zero,
+		.extra2		= &int_max,
 	},
 	{
 		.procname	=  "msgmnb",
 		.data		= &init_ipc_ns.msg_ctlmnb,
 		.maxlen		= sizeof (init_ipc_ns.msg_ctlmnb),
 		.mode		= 0644,
-		.proc_handler	= proc_ipc_dointvec,
+		.proc_handler	= proc_ipc_dointvec_minmax,
+		.extra1		= &zero,
+		.extra2		= &int_max,
 	},
 	{
 		.procname	= "sem",
-- 
cgit v1.2.3


From f6f0747e5bc69401d7f90313aa1b46709d27840a Mon Sep 17 00:00:00 2001
From: Sylwester Nawrocki <s.nawrocki@samsung.com>
Date: Mon, 28 Oct 2013 13:12:35 -0500
Subject: of: Add empty for_each_available_child_of_node() macro definition

Add this empty macro definition so users can be compiled without
excluding this macro call with preprocessor directives when CONFIG_OF
is disabled.

Signed-off-by: Sylwester Nawrocki <s.nawrocki@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Signed-off-by: Rob Herring <rob.herring@calxeda.com>
---
 include/linux/of.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/of.h b/include/linux/of.h
index 54017b83650b..b97f685c941f 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -366,6 +366,9 @@ static inline bool of_have_populated_dt(void)
 #define for_each_child_of_node(parent, child) \
 	while (0)
 
+#define for_each_available_child_of_node(parent, child) \
+	while (0)
+
 static inline struct device_node *of_get_child_by_name(
 					const struct device_node *node,
 					const char *name)
-- 
cgit v1.2.3


From 6e95fcaa42e5078ac265964deebed597f9eae07a Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Wed, 30 Oct 2013 11:50:49 +0100
Subject: lib: crc32: add functionality to combine two crc32{, c}s in GF(2)

This patch adds a combinator to merge two or more crc32{,c}s
into a new one. This is useful for checksum computations of
fragmented skbs that use crc32/crc32c as checksums.

The arithmetics for combining both in the GF(2) was taken and
slightly modified from zlib. Only passing two crcs is insufficient
as two crcs and the length of the second piece is needed for
merging. The code is made generic, so that only polynomials
need to be passed for crc32_le resp. crc32c_le.

Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/crc32.h | 40 +++++++++++++++++++++++++
 lib/crc32.c           | 81 +++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 121 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/crc32.h b/include/linux/crc32.h
index 68267b64bb98..7d275c4fc011 100644
--- a/include/linux/crc32.h
+++ b/include/linux/crc32.h
@@ -11,8 +11,48 @@
 extern u32  crc32_le(u32 crc, unsigned char const *p, size_t len);
 extern u32  crc32_be(u32 crc, unsigned char const *p, size_t len);
 
+/**
+ * crc32_le_combine - Combine two crc32 check values into one. For two
+ * 		      sequences of bytes, seq1 and seq2 with lengths len1
+ * 		      and len2, crc32_le() check values were calculated
+ * 		      for each, crc1 and crc2.
+ *
+ * @crc1: crc32 of the first block
+ * @crc2: crc32 of the second block
+ * @len2: length of the second block
+ *
+ * Return: The crc32_le() check value of seq1 and seq2 concatenated,
+ * 	   requiring only crc1, crc2, and len2. Note: If seq_full denotes
+ * 	   the concatenated memory area of seq1 with seq2, and crc_full
+ * 	   the crc32_le() value of seq_full, then crc_full ==
+ * 	   crc32_le_combine(crc1, crc2, len2) when crc_full was seeded
+ * 	   with the same initializer as crc1, and crc2 seed was 0. See
+ * 	   also crc32_combine_test().
+ */
+extern u32  crc32_le_combine(u32 crc1, u32 crc2, size_t len2);
+
 extern u32  __crc32c_le(u32 crc, unsigned char const *p, size_t len);
 
+/**
+ * __crc32c_le_combine - Combine two crc32c check values into one. For two
+ * 			 sequences of bytes, seq1 and seq2 with lengths len1
+ * 			 and len2, __crc32c_le() check values were calculated
+ * 			 for each, crc1 and crc2.
+ *
+ * @crc1: crc32c of the first block
+ * @crc2: crc32c of the second block
+ * @len2: length of the second block
+ *
+ * Return: The __crc32c_le() check value of seq1 and seq2 concatenated,
+ * 	   requiring only crc1, crc2, and len2. Note: If seq_full denotes
+ * 	   the concatenated memory area of seq1 with seq2, and crc_full
+ * 	   the __crc32c_le() value of seq_full, then crc_full ==
+ * 	   __crc32c_le_combine(crc1, crc2, len2) when crc_full was
+ * 	   seeded with the same initializer as crc1, and crc2 seed
+ * 	   was 0. See also crc32c_combine_test().
+ */
+extern u32  __crc32c_le_combine(u32 crc1, u32 crc2, size_t len2);
+
 #define crc32(seed, data, length)  crc32_le(seed, (unsigned char const *)(data), length)
 
 /*
diff --git a/lib/crc32.c b/lib/crc32.c
index 429d61ce6aa0..595205cddc30 100644
--- a/lib/crc32.c
+++ b/lib/crc32.c
@@ -49,6 +49,30 @@ MODULE_AUTHOR("Matt Domsch <Matt_Domsch@dell.com>");
 MODULE_DESCRIPTION("Various CRC32 calculations");
 MODULE_LICENSE("GPL");
 
+#define GF2_DIM		32
+
+static u32 gf2_matrix_times(u32 *mat, u32 vec)
+{
+	u32 sum = 0;
+
+	while (vec) {
+		if (vec & 1)
+			sum ^= *mat;
+		vec >>= 1;
+		mat++;
+	}
+
+	return sum;
+}
+
+static void gf2_matrix_square(u32 *square, u32 *mat)
+{
+	int i;
+
+	for (i = 0; i < GF2_DIM; i++)
+		square[i] = gf2_matrix_times(mat, mat[i]);
+}
+
 #if CRC_LE_BITS > 8 || CRC_BE_BITS > 8
 
 /* implements slicing-by-4 or slicing-by-8 algorithm */
@@ -130,6 +154,52 @@ crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32 (*tab)[256])
 }
 #endif
 
+/* For conditions of distribution and use, see copyright notice in zlib.h */
+static u32 crc32_generic_combine(u32 crc1, u32 crc2, size_t len2,
+				 u32 polynomial)
+{
+	u32 even[GF2_DIM]; /* Even-power-of-two zeros operator */
+	u32 odd[GF2_DIM];  /* Odd-power-of-two zeros operator  */
+	u32 row;
+	int i;
+
+	if (len2 <= 0)
+		return crc1;
+
+	/* Put operator for one zero bit in odd */
+	odd[0] = polynomial;
+	row = 1;
+	for (i = 1; i < GF2_DIM; i++) {
+		odd[i] = row;
+		row <<= 1;
+	}
+
+	gf2_matrix_square(even, odd); /* Put operator for two zero bits in even */
+	gf2_matrix_square(odd, even); /* Put operator for four zero bits in odd */
+
+	/* Apply len2 zeros to crc1 (first square will put the operator for one
+	 * zero byte, eight zero bits, in even).
+	 */
+	do {
+		/* Apply zeros operator for this bit of len2 */
+		gf2_matrix_square(even, odd);
+		if (len2 & 1)
+			crc1 = gf2_matrix_times(even, crc1);
+		len2 >>= 1;
+		/* If no more bits set, then done */
+		if (len2 == 0)
+			break;
+		/* Another iteration of the loop with odd and even swapped */
+		gf2_matrix_square(odd, even);
+		if (len2 & 1)
+			crc1 = gf2_matrix_times(odd, crc1);
+		len2 >>= 1;
+	} while (len2 != 0);
+
+	crc1 ^= crc2;
+	return crc1;
+}
+
 /**
  * crc32_le_generic() - Calculate bitwise little-endian Ethernet AUTODIN II
  *			CRC32/CRC32C
@@ -200,8 +270,19 @@ u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len)
 			(const u32 (*)[256])crc32ctable_le, CRC32C_POLY_LE);
 }
 #endif
+u32 __pure crc32_le_combine(u32 crc1, u32 crc2, size_t len2)
+{
+	return crc32_generic_combine(crc1, crc2, len2, CRCPOLY_LE);
+}
+
+u32 __pure __crc32c_le_combine(u32 crc1, u32 crc2, size_t len2)
+{
+	return crc32_generic_combine(crc1, crc2, len2, CRC32C_POLY_LE);
+}
 EXPORT_SYMBOL(crc32_le);
+EXPORT_SYMBOL(crc32_le_combine);
 EXPORT_SYMBOL(__crc32c_le);
+EXPORT_SYMBOL(__crc32c_le_combine);
 
 /**
  * crc32_be_generic() - Calculate bitwise big-endian Ethernet AUTODIN II CRC32
-- 
cgit v1.2.3


From 2817a336d4d533fb8b68719723cd60ea7dd7c09e Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Wed, 30 Oct 2013 11:50:51 +0100
Subject: net: skb_checksum: allow custom update/combine for walking skb

Currently, skb_checksum walks over 1) linearized, 2) frags[], and
3) frag_list data and calculats the one's complement, a 32 bit
result suitable for feeding into itself or csum_tcpudp_magic(),
but unsuitable for SCTP as we're calculating CRC32c there.

Hence, in order to not re-implement the very same function in
SCTP (and maybe other protocols) over and over again, use an
update() + combine() callback internally to allow for walking
over the skb with different algorithms.

Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 13 ++++++++++---
 include/net/checksum.h |  6 ++++++
 net/core/skbuff.c      | 31 +++++++++++++++++++++----------
 3 files changed, 37 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 2c154976394b..44727b5d4981 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2360,8 +2360,6 @@ int skb_copy_datagram_const_iovec(const struct sk_buff *from, int offset,
 void skb_free_datagram(struct sock *sk, struct sk_buff *skb);
 void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb);
 int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags);
-__wsum skb_checksum(const struct sk_buff *skb, int offset, int len,
-		    __wsum csum);
 int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len);
 int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len);
 __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to,
@@ -2373,9 +2371,18 @@ void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to);
 void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len);
 int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen);
 void skb_scrub_packet(struct sk_buff *skb, bool xnet);
-
 struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features);
 
+struct skb_checksum_ops {
+	__wsum (*update)(const void *mem, int len, __wsum wsum);
+	__wsum (*combine)(__wsum csum, __wsum csum2, int offset, int len);
+};
+
+__wsum __skb_checksum(const struct sk_buff *skb, int offset, int len,
+		      __wsum csum, const struct skb_checksum_ops *ops);
+__wsum skb_checksum(const struct sk_buff *skb, int offset, int len,
+		    __wsum csum);
+
 static inline void *skb_header_pointer(const struct sk_buff *skb, int offset,
 				       int len, void *buffer)
 {
diff --git a/include/net/checksum.h b/include/net/checksum.h
index 8f59ca50477c..15f33fde826e 100644
--- a/include/net/checksum.h
+++ b/include/net/checksum.h
@@ -78,6 +78,12 @@ csum_block_add(__wsum csum, __wsum csum2, int offset)
 	return csum_add(csum, (__force __wsum)sum);
 }
 
+static inline __wsum
+csum_block_add_ext(__wsum csum, __wsum csum2, int offset, int len)
+{
+	return csum_block_add(csum, csum2, offset);
+}
+
 static inline __wsum
 csum_block_sub(__wsum csum, __wsum csum2, int offset)
 {
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 0ab32faa520f..31aab5376cb4 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1928,9 +1928,8 @@ fault:
 EXPORT_SYMBOL(skb_store_bits);
 
 /* Checksum skb data. */
-
-__wsum skb_checksum(const struct sk_buff *skb, int offset,
-			  int len, __wsum csum)
+__wsum __skb_checksum(const struct sk_buff *skb, int offset, int len,
+		      __wsum csum, const struct skb_checksum_ops *ops)
 {
 	int start = skb_headlen(skb);
 	int i, copy = start - offset;
@@ -1941,7 +1940,7 @@ __wsum skb_checksum(const struct sk_buff *skb, int offset,
 	if (copy > 0) {
 		if (copy > len)
 			copy = len;
-		csum = csum_partial(skb->data + offset, copy, csum);
+		csum = ops->update(skb->data + offset, copy, csum);
 		if ((len -= copy) == 0)
 			return csum;
 		offset += copy;
@@ -1962,10 +1961,10 @@ __wsum skb_checksum(const struct sk_buff *skb, int offset,
 			if (copy > len)
 				copy = len;
 			vaddr = kmap_atomic(skb_frag_page(frag));
-			csum2 = csum_partial(vaddr + frag->page_offset +
-					     offset - start, copy, 0);
+			csum2 = ops->update(vaddr + frag->page_offset +
+					    offset - start, copy, 0);
 			kunmap_atomic(vaddr);
-			csum = csum_block_add(csum, csum2, pos);
+			csum = ops->combine(csum, csum2, pos, copy);
 			if (!(len -= copy))
 				return csum;
 			offset += copy;
@@ -1984,9 +1983,9 @@ __wsum skb_checksum(const struct sk_buff *skb, int offset,
 			__wsum csum2;
 			if (copy > len)
 				copy = len;
-			csum2 = skb_checksum(frag_iter, offset - start,
-					     copy, 0);
-			csum = csum_block_add(csum, csum2, pos);
+			csum2 = __skb_checksum(frag_iter, offset - start,
+					       copy, 0, ops);
+			csum = ops->combine(csum, csum2, pos, copy);
 			if ((len -= copy) == 0)
 				return csum;
 			offset += copy;
@@ -1998,6 +1997,18 @@ __wsum skb_checksum(const struct sk_buff *skb, int offset,
 
 	return csum;
 }
+EXPORT_SYMBOL(__skb_checksum);
+
+__wsum skb_checksum(const struct sk_buff *skb, int offset,
+		    int len, __wsum csum)
+{
+	const struct skb_checksum_ops ops = {
+		.update  = csum_partial,
+		.combine = csum_block_add_ext,
+	};
+
+	return __skb_checksum(skb, offset, len, csum, &ops);
+}
 EXPORT_SYMBOL(skb_checksum);
 
 /* Both of above in one bottle. */
-- 
cgit v1.2.3


From 74d332c13b2148ae934ea94dac1745ae92efe8e5 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 30 Oct 2013 13:10:44 -0700
Subject: net: extend net_device allocation to vmalloc()

Joby Poriyath provided a xen-netback patch to reduce the size of
xenvif structure as some netdev allocation could fail under
memory pressure/fragmentation.

This patch is handling the problem at the core level, allowing
any netdev structures to use vmalloc() if kmalloc() failed.

As vmalloc() adds overhead on a critical network path, add __GFP_REPEAT
to kzalloc() flags to do this fallback only when really needed.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Joby Poriyath <joby.poriyath@citrix.com>
Cc: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/netdevices.txt | 10 +++++-----
 include/linux/netdevice.h               |  1 +
 net/core/dev.c                          | 22 +++++++++++++++++-----
 net/core/net-sysfs.c                    |  2 +-
 4 files changed, 24 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/networking/netdevices.txt b/Documentation/networking/netdevices.txt
index c7ecc7080494..0b1cf6b2a592 100644
--- a/Documentation/networking/netdevices.txt
+++ b/Documentation/networking/netdevices.txt
@@ -10,12 +10,12 @@ network devices.
 struct net_device allocation rules
 ==================================
 Network device structures need to persist even after module is unloaded and
-must be allocated with kmalloc.  If device has registered successfully,
-it will be freed on last use by free_netdev.  This is required to handle the
-pathologic case cleanly (example: rmmod mydriver </sys/class/net/myeth/mtu )
+must be allocated with alloc_netdev_mqs() and friends.
+If device has registered successfully, it will be freed on last use
+by free_netdev(). This is required to handle the pathologic case cleanly
+(example: rmmod mydriver </sys/class/net/myeth/mtu )
 
-There are routines in net_init.c to handle the common cases of
-alloc_etherdev, alloc_netdev.  These reserve extra space for driver
+alloc_netdev_mqs()/alloc_netdev() reserve extra space for driver
 private data which gets freed when the network device is freed. If
 separately allocated data is attached to the network device
 (netdev_priv(dev)) then it is up to the module exit handler to free that.
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index cb1d918ecdf1..e6353cafbf05 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1800,6 +1800,7 @@ static inline void unregister_netdevice(struct net_device *dev)
 
 int netdev_refcnt_read(const struct net_device *dev);
 void free_netdev(struct net_device *dev);
+void netdev_freemem(struct net_device *dev);
 void synchronize_net(void);
 int init_dummy_netdev(struct net_device *dev);
 
diff --git a/net/core/dev.c b/net/core/dev.c
index 0054c8c75f50..0e6136546a8c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6196,6 +6196,16 @@ void netdev_set_default_ethtool_ops(struct net_device *dev,
 }
 EXPORT_SYMBOL_GPL(netdev_set_default_ethtool_ops);
 
+void netdev_freemem(struct net_device *dev)
+{
+	char *addr = (char *)dev - dev->padded;
+
+	if (is_vmalloc_addr(addr))
+		vfree(addr);
+	else
+		kfree(addr);
+}
+
 /**
  *	alloc_netdev_mqs - allocate network device
  *	@sizeof_priv:	size of private data to allocate space for
@@ -6239,7 +6249,9 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
 	/* ensure 32-byte alignment of whole construct */
 	alloc_size += NETDEV_ALIGN - 1;
 
-	p = kzalloc(alloc_size, GFP_KERNEL);
+	p = kzalloc(alloc_size, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
+	if (!p)
+		p = vzalloc(alloc_size);
 	if (!p)
 		return NULL;
 
@@ -6248,7 +6260,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
 
 	dev->pcpu_refcnt = alloc_percpu(int);
 	if (!dev->pcpu_refcnt)
-		goto free_p;
+		goto free_dev;
 
 	if (dev_addr_init(dev))
 		goto free_pcpu;
@@ -6301,8 +6313,8 @@ free_pcpu:
 	kfree(dev->_rx);
 #endif
 
-free_p:
-	kfree(p);
+free_dev:
+	netdev_freemem(dev);
 	return NULL;
 }
 EXPORT_SYMBOL(alloc_netdev_mqs);
@@ -6339,7 +6351,7 @@ void free_netdev(struct net_device *dev)
 
 	/*  Compatibility with error handling in drivers */
 	if (dev->reg_state == NETREG_UNINITIALIZED) {
-		kfree((char *)dev - dev->padded);
+		netdev_freemem(dev);
 		return;
 	}
 
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index d954b56b4e47..d03f2c9750fa 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -1263,7 +1263,7 @@ static void netdev_release(struct device *d)
 	BUG_ON(dev->reg_state != NETREG_RELEASED);
 
 	kfree(dev->ifalias);
-	kfree((char *)dev - dev->padded);
+	netdev_freemem(dev);
 }
 
 static const void *net_namespace(struct device *d)
-- 
cgit v1.2.3


From f3f5a0f8cc40b942f4c0ae117df82eeb65f07d4d Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 4 Nov 2013 14:38:05 -0500
Subject: NFSv4.2: Fix a mismatch between Linux labeled NFS and the NFSv4.2
 spec

In the spec, the security label attribute id is '80', which means that
it should be bit number 80-64 == 16 in the 3rd word of the bitmap.

Fixes: 4488cc96c581: NFS: Add NFSv4.2 protocol constants
Cc: J. Bruce Fields <bfields@fieldses.org>
Cc: Steve Dickson <steved@redhat.com>
Cc: stable@vger.kernel.org # 3.11+
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs4.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index c56fa8fedce9..bfe6c379a24e 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -395,7 +395,7 @@ enum lock_type4 {
 #define FATTR4_WORD1_FS_LAYOUT_TYPES    (1UL << 30)
 #define FATTR4_WORD2_LAYOUT_BLKSIZE     (1UL << 1)
 #define FATTR4_WORD2_MDSTHRESHOLD       (1UL << 4)
-#define FATTR4_WORD2_SECURITY_LABEL     (1UL << 17)
+#define FATTR4_WORD2_SECURITY_LABEL     (1UL << 16)
 
 /* MDS threshold bitmap bits */
 #define THRESHOLD_RD                    (1UL << 0)
-- 
cgit v1.2.3


From acddd5dd44d4fd9b45dd5ee69cd8b183052b1cdc Mon Sep 17 00:00:00 2001
From: Jack Morgenstein <jackm@dev.mellanox.co.il>
Date: Sun, 3 Nov 2013 10:03:18 +0200
Subject: net/mlx4_core: Fix reg/unreg vlan/mac to conform to the firmware spec

The functions mlx4_register_vlan, mlx4_unregister_vlan, mlx4_register_mac,
mlx4_unregister_mac all made illegal use of the out_param in multifunc mode
to pass the port number. The firmware spec specifies that the port number
should be passed in bits 8..15 of the input-modifier field for ALLOC_RES and
FREE_RES (sections 20.15.1 and 20.15.2).

For MAC register/unregister, this patch contains workarounds so that guests
running previous kernels continue to work on a new Hypervisor, and guests
running the new kernel will continue to work on old hypervisors.

Vlan registeration capability is still not operational in multifunction mode,
since the vlan wrapper functions are not implemented in this patch.

Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/port.c          | 45 +++++++++++++++-------
 .../net/ethernet/mellanox/mlx4/resource_tracker.c  | 28 ++++++++------
 include/linux/mlx4/device.h                        |  1 +
 3 files changed, 49 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c
index d3d3106f588f..9433c1f6b0d4 100644
--- a/drivers/net/ethernet/mellanox/mlx4/port.c
+++ b/drivers/net/ethernet/mellanox/mlx4/port.c
@@ -178,13 +178,24 @@ EXPORT_SYMBOL_GPL(__mlx4_register_mac);
 int mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac)
 {
 	u64 out_param = 0;
-	int err;
+	int err = -EINVAL;
 
 	if (mlx4_is_mfunc(dev)) {
-		set_param_l(&out_param, port);
-		err = mlx4_cmd_imm(dev, mac, &out_param, RES_MAC,
-				   RES_OP_RESERVE_AND_MAP, MLX4_CMD_ALLOC_RES,
-				   MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
+		if (!(dev->flags & MLX4_FLAG_OLD_REG_MAC)) {
+			err = mlx4_cmd_imm(dev, mac, &out_param,
+					   ((u32) port) << 8 | (u32) RES_MAC,
+					   RES_OP_RESERVE_AND_MAP, MLX4_CMD_ALLOC_RES,
+					   MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
+		}
+		if (err && err == -EINVAL && mlx4_is_slave(dev)) {
+			/* retry using old REG_MAC format */
+			set_param_l(&out_param, port);
+			err = mlx4_cmd_imm(dev, mac, &out_param, RES_MAC,
+					   RES_OP_RESERVE_AND_MAP, MLX4_CMD_ALLOC_RES,
+					   MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
+			if (!err)
+				dev->flags |= MLX4_FLAG_OLD_REG_MAC;
+		}
 		if (err)
 			return err;
 
@@ -231,10 +242,18 @@ void mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, u64 mac)
 	u64 out_param = 0;
 
 	if (mlx4_is_mfunc(dev)) {
-		set_param_l(&out_param, port);
-		(void) mlx4_cmd_imm(dev, mac, &out_param, RES_MAC,
-				    RES_OP_RESERVE_AND_MAP, MLX4_CMD_FREE_RES,
-				    MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
+		if (!(dev->flags & MLX4_FLAG_OLD_REG_MAC)) {
+			(void) mlx4_cmd_imm(dev, mac, &out_param,
+					    ((u32) port) << 8 | (u32) RES_MAC,
+					    RES_OP_RESERVE_AND_MAP, MLX4_CMD_FREE_RES,
+					    MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
+		} else {
+			/* use old unregister mac format */
+			set_param_l(&out_param, port);
+			(void) mlx4_cmd_imm(dev, mac, &out_param, RES_MAC,
+					    RES_OP_RESERVE_AND_MAP, MLX4_CMD_FREE_RES,
+					    MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
+		}
 		return;
 	}
 	__mlx4_unregister_mac(dev, port, mac);
@@ -374,8 +393,8 @@ int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index)
 		return -EINVAL;
 
 	if (mlx4_is_mfunc(dev)) {
-		set_param_l(&out_param, port);
-		err = mlx4_cmd_imm(dev, vlan, &out_param, RES_VLAN,
+		err = mlx4_cmd_imm(dev, vlan, &out_param,
+				   ((u32) port) << 8 | (u32) RES_VLAN,
 				   RES_OP_RESERVE_AND_MAP, MLX4_CMD_ALLOC_RES,
 				   MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
 		if (!err)
@@ -418,8 +437,8 @@ void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, int index)
 	u64 out_param = 0;
 
 	if (mlx4_is_mfunc(dev)) {
-		set_param_l(&out_param, port);
-		(void) mlx4_cmd_imm(dev, index, &out_param, RES_VLAN,
+		(void) mlx4_cmd_imm(dev, index, &out_param,
+				    ((u32) port) << 8 | (u32) RES_VLAN,
 				    RES_OP_RESERVE_AND_MAP,
 				    MLX4_CMD_FREE_RES, MLX4_CMD_TIME_CLASS_A,
 				    MLX4_CMD_WRAPPED);
diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index dd6876321116..a5aa3be554fe 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
@@ -1443,7 +1443,7 @@ static void rem_slave_macs(struct mlx4_dev *dev, int slave)
 }
 
 static int mac_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd,
-			 u64 in_param, u64 *out_param)
+			 u64 in_param, u64 *out_param, int in_port)
 {
 	int err = -EINVAL;
 	int port;
@@ -1452,7 +1452,7 @@ static int mac_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd,
 	if (op != RES_OP_RESERVE_AND_MAP)
 		return err;
 
-	port = get_param_l(out_param);
+	port = !in_port ? get_param_l(out_param) : in_port;
 	mac = in_param;
 
 	err = __mlx4_register_mac(dev, port, mac);
@@ -1470,7 +1470,7 @@ static int mac_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd,
 }
 
 static int vlan_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd,
-			 u64 in_param, u64 *out_param)
+			 u64 in_param, u64 *out_param, int port)
 {
 	return 0;
 }
@@ -1528,7 +1528,7 @@ int mlx4_ALLOC_RES_wrapper(struct mlx4_dev *dev, int slave,
 	int err;
 	int alop = vhcr->op_modifier;
 
-	switch (vhcr->in_modifier) {
+	switch (vhcr->in_modifier & 0xFF) {
 	case RES_QP:
 		err = qp_alloc_res(dev, slave, vhcr->op_modifier, alop,
 				   vhcr->in_param, &vhcr->out_param);
@@ -1556,12 +1556,14 @@ int mlx4_ALLOC_RES_wrapper(struct mlx4_dev *dev, int slave,
 
 	case RES_MAC:
 		err = mac_alloc_res(dev, slave, vhcr->op_modifier, alop,
-				    vhcr->in_param, &vhcr->out_param);
+				    vhcr->in_param, &vhcr->out_param,
+				    (vhcr->in_modifier >> 8) & 0xFF);
 		break;
 
 	case RES_VLAN:
 		err = vlan_alloc_res(dev, slave, vhcr->op_modifier, alop,
-				    vhcr->in_param, &vhcr->out_param);
+				     vhcr->in_param, &vhcr->out_param,
+				     (vhcr->in_modifier >> 8) & 0xFF);
 		break;
 
 	case RES_COUNTER:
@@ -1730,14 +1732,14 @@ static int srq_free_res(struct mlx4_dev *dev, int slave, int op, int cmd,
 }
 
 static int mac_free_res(struct mlx4_dev *dev, int slave, int op, int cmd,
-			    u64 in_param, u64 *out_param)
+			    u64 in_param, u64 *out_param, int in_port)
 {
 	int port;
 	int err = 0;
 
 	switch (op) {
 	case RES_OP_RESERVE_AND_MAP:
-		port = get_param_l(out_param);
+		port = !in_port ? get_param_l(out_param) : in_port;
 		mac_del_from_slave(dev, slave, in_param, port);
 		__mlx4_unregister_mac(dev, port, in_param);
 		break;
@@ -1751,7 +1753,7 @@ static int mac_free_res(struct mlx4_dev *dev, int slave, int op, int cmd,
 }
 
 static int vlan_free_res(struct mlx4_dev *dev, int slave, int op, int cmd,
-			    u64 in_param, u64 *out_param)
+			    u64 in_param, u64 *out_param, int port)
 {
 	return 0;
 }
@@ -1803,7 +1805,7 @@ int mlx4_FREE_RES_wrapper(struct mlx4_dev *dev, int slave,
 	int err = -EINVAL;
 	int alop = vhcr->op_modifier;
 
-	switch (vhcr->in_modifier) {
+	switch (vhcr->in_modifier & 0xFF) {
 	case RES_QP:
 		err = qp_free_res(dev, slave, vhcr->op_modifier, alop,
 				  vhcr->in_param);
@@ -1831,12 +1833,14 @@ int mlx4_FREE_RES_wrapper(struct mlx4_dev *dev, int slave,
 
 	case RES_MAC:
 		err = mac_free_res(dev, slave, vhcr->op_modifier, alop,
-				   vhcr->in_param, &vhcr->out_param);
+				   vhcr->in_param, &vhcr->out_param,
+				   (vhcr->in_modifier >> 8) & 0xFF);
 		break;
 
 	case RES_VLAN:
 		err = vlan_free_res(dev, slave, vhcr->op_modifier, alop,
-				   vhcr->in_param, &vhcr->out_param);
+				    vhcr->in_param, &vhcr->out_param,
+				    (vhcr->in_modifier >> 8) & 0xFF);
 		break;
 
 	case RES_COUNTER:
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 9ad0c18495ad..297a16309f00 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -54,6 +54,7 @@ enum {
 	MLX4_FLAG_MASTER	= 1 << 2,
 	MLX4_FLAG_SLAVE		= 1 << 3,
 	MLX4_FLAG_SRIOV		= 1 << 4,
+	MLX4_FLAG_OLD_REG_MAC	= 1 << 6,
 };
 
 enum {
-- 
cgit v1.2.3


From 2009d0059c084288f060b1ffe3d14229588acb67 Mon Sep 17 00:00:00 2001
From: Jack Morgenstein <jackm@dev.mellanox.co.il>
Date: Sun, 3 Nov 2013 10:03:19 +0200
Subject: net/mlx4_en: Use vlan id instead of vlan index for unregistration

Use of vlan_index created problems unregistering vlans on guests.

In addition, tools delete vlan by tag, not by index, lets follow that.

Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/cmd.c           |  2 +-
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c     |  6 +----
 drivers/net/ethernet/mellanox/mlx4/mlx4.h          |  2 +-
 drivers/net/ethernet/mellanox/mlx4/port.c          | 27 ++++++++++++----------
 .../net/ethernet/mellanox/mlx4/resource_tracker.c  |  2 +-
 include/linux/mlx4/device.h                        |  2 +-
 6 files changed, 20 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index ae8eb4c4fb6c..887d62576f54 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -1687,7 +1687,7 @@ static void mlx4_master_deactivate_admin_state(struct mlx4_priv *priv, int slave
 		vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port];
 		if (NO_INDX != vp_oper->vlan_idx) {
 			__mlx4_unregister_vlan(&priv->dev,
-					       port, vp_oper->vlan_idx);
+					       port, vp_oper->state.default_vlan);
 			vp_oper->vlan_idx = NO_INDX;
 		}
 		if (NO_INDX != vp_oper->mac_idx) {
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 85d91665d400..b5554121aca4 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -417,7 +417,6 @@ static int mlx4_en_vlan_rx_kill_vid(struct net_device *dev,
 	struct mlx4_en_priv *priv = netdev_priv(dev);
 	struct mlx4_en_dev *mdev = priv->mdev;
 	int err;
-	int idx;
 
 	en_dbg(HW, priv, "Killing VID:%d\n", vid);
 
@@ -425,10 +424,7 @@ static int mlx4_en_vlan_rx_kill_vid(struct net_device *dev,
 
 	/* Remove VID from port VLAN filter */
 	mutex_lock(&mdev->state_lock);
-	if (!mlx4_find_cached_vlan(mdev->dev, priv->port, vid, &idx))
-		mlx4_unregister_vlan(mdev->dev, priv->port, idx);
-	else
-		en_dbg(HW, priv, "could not find vid %d in cache\n", vid);
+	mlx4_unregister_vlan(mdev->dev, priv->port, vid);
 
 	if (mdev->device_up && priv->port_up) {
 		err = mlx4_SET_VLAN_FLTR(mdev->dev, priv);
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index 348bb8c7d9a7..f2ad4f61f58c 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -1111,7 +1111,7 @@ int mlx4_change_port_types(struct mlx4_dev *dev,
 
 void mlx4_init_mac_table(struct mlx4_dev *dev, struct mlx4_mac_table *table);
 void mlx4_init_vlan_table(struct mlx4_dev *dev, struct mlx4_vlan_table *table);
-void __mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, int index);
+void __mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, u16 vlan);
 int __mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index);
 
 int mlx4_SET_PORT(struct mlx4_dev *dev, u8 port, int pkey_tbl_sz);
diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c
index 9433c1f6b0d4..caaa15470395 100644
--- a/drivers/net/ethernet/mellanox/mlx4/port.c
+++ b/drivers/net/ethernet/mellanox/mlx4/port.c
@@ -406,23 +406,26 @@ int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index)
 }
 EXPORT_SYMBOL_GPL(mlx4_register_vlan);
 
-void __mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, int index)
+void __mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, u16 vlan)
 {
 	struct mlx4_vlan_table *table = &mlx4_priv(dev)->port[port].vlan_table;
+	int index;
 
-	if (index < MLX4_VLAN_REGULAR) {
-		mlx4_warn(dev, "Trying to free special vlan index %d\n", index);
-		return;
+	mutex_lock(&table->mutex);
+	if (mlx4_find_cached_vlan(dev, port, vlan, &index)) {
+		mlx4_warn(dev, "vlan 0x%x is not in the vlan table\n", vlan);
+		goto out;
 	}
 
-	mutex_lock(&table->mutex);
-	if (!table->refs[index]) {
-		mlx4_warn(dev, "No vlan entry for index %d\n", index);
+	if (index < MLX4_VLAN_REGULAR) {
+		mlx4_warn(dev, "Trying to free special vlan index %d\n", index);
 		goto out;
 	}
+
 	if (--table->refs[index]) {
-		mlx4_dbg(dev, "Have more references for index %d,"
-			 "no need to modify vlan table\n", index);
+		mlx4_dbg(dev, "Have %d more references for index %d,"
+			 "no need to modify vlan table\n", table->refs[index],
+			 index);
 		goto out;
 	}
 	table->entries[index] = 0;
@@ -432,19 +435,19 @@ out:
 	mutex_unlock(&table->mutex);
 }
 
-void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, int index)
+void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, u16 vlan)
 {
 	u64 out_param = 0;
 
 	if (mlx4_is_mfunc(dev)) {
-		(void) mlx4_cmd_imm(dev, index, &out_param,
+		(void) mlx4_cmd_imm(dev, vlan, &out_param,
 				    ((u32) port) << 8 | (u32) RES_VLAN,
 				    RES_OP_RESERVE_AND_MAP,
 				    MLX4_CMD_FREE_RES, MLX4_CMD_TIME_CLASS_A,
 				    MLX4_CMD_WRAPPED);
 		return;
 	}
-	__mlx4_unregister_vlan(dev, port, index);
+	__mlx4_unregister_vlan(dev, port, vlan);
 }
 EXPORT_SYMBOL_GPL(mlx4_unregister_vlan);
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index a5aa3be554fe..993a2ef13866 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
@@ -4085,7 +4085,7 @@ void mlx4_vf_immed_vlan_work_handler(struct work_struct *_work)
 	if (work->flags & MLX4_VF_IMMED_VLAN_FLAG_VLAN && !errors &&
 	    NO_INDX != work->orig_vlan_ix)
 		__mlx4_unregister_vlan(&work->priv->dev, work->port,
-				       work->orig_vlan_ix);
+				       work->orig_vlan_id);
 out:
 	kfree(work);
 	return;
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 297a16309f00..e2e92885bdc1 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -1079,7 +1079,7 @@ int mlx4_SET_PORT_SCHEDULER(struct mlx4_dev *dev, u8 port, u8 *tc_tx_bw,
 		u8 *pg, u16 *ratelimit);
 int mlx4_find_cached_vlan(struct mlx4_dev *dev, u8 port, u16 vid, int *idx);
 int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index);
-void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, int index);
+void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, u16 vlan);
 
 int mlx4_map_phys_fmr(struct mlx4_dev *dev, struct mlx4_fmr *fmr, u64 *page_list,
 		      int npages, u64 iova, u32 *lkey, u32 *rkey);
-- 
cgit v1.2.3


From 5a0d0a6161aecbbc76e4c1d2b82e4c7cef88bb29 Mon Sep 17 00:00:00 2001
From: Jack Morgenstein <jackm@dev.mellanox.co.il>
Date: Sun, 3 Nov 2013 10:03:23 +0200
Subject: mlx4: Structures and init/teardown for VF resource quotas

This is step #1 for implementing SRIOV resource quotas for VFs.

Quotas are implemented per resource type for VFs and the PF, to prevent
any entity from simply grabbing all the resources for itself and leaving
the other entities unable to obtain such resources.

Resources which are allocated using quotas:  QPs, CQs, SRQs, MPTs, MTTs, MAC,
                                             VLAN, and Counters.

The quota system works as follows:
Each entity (VF or PF) is given a max number of a given resource (its quota),
and a guaranteed minimum number for each resource (starvation prevention).

For QPs, CQs, SRQs, MPTs and MTTs:
50% of the available quantity for the resource is divided equally among
the PF and all the active VFs (i.e., the number of VFs in the mlx4_core module
parameter "num_vfs"). This 50% represents the "guaranteed minimum" pool.
The other 50% is the "free pool", allocated on a first-come-first-serve basis.
For each VF/PF, resources are first allocated from its "guaranteed-minimum"
pool. When that pool is exhausted, the driver attempts to allocate from
the resource "free-pool".

The quota (i.e., max) for the VFs and the PF is:
  The free-pool amount (50% of the real max) + the guaranteed minimum

For MACs:
  Guarantee 2 MACs per VF/PF per port. As a result, since we have only
  128 MACs per port, reduce the allowable number of VFs from 64 to 63.
  Any remaining MACs are put into a free pool.

For VLANs:
  For the PF, the per-port quota is 128 and guarantee is 64
     (to allow the PF to register at least a VLAN per VF in VST mode).
  For the VFs, the per-port quota is 64 and the guarantee is 0.
      We assume that VGT VFs are trusted not to abuse the VLAN resource.

For Counters:
  For all functions (PF and VFs), the quota is 128 and the guarantee is 0.

In this patch, we define the needed structures, which are added to the
resource-tracker struct.  In addition, we do initialization
for the resource quota, and adjust the query_device response to use quotas
rather than resource maxima.

As part of the implementation, we introduce a new field in
mlx4_dev: quotas.  This field holds the resource quotas used
to report maxima to the upper layers (ib_core, via query_device).

The HCA maxima of these values are passed to the VFs (via
QUERY_HCA) so that they may continue to use these in handling
QPs, CQs, SRQs and MPTs.

Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/infiniband/hw/mlx4/main.c                  |   8 +-
 drivers/net/ethernet/mellanox/mlx4/fw.c            |  11 +-
 drivers/net/ethernet/mellanox/mlx4/main.c          |  32 +++--
 drivers/net/ethernet/mellanox/mlx4/mlx4.h          |  17 +++
 drivers/net/ethernet/mellanox/mlx4/qp.c            |   3 +-
 .../net/ethernet/mellanox/mlx4/resource_tracker.c  | 157 ++++++++++++++++++++-
 include/linux/mlx4/device.h                        |  17 +++
 7 files changed, 222 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index f0612645de99..7567437dbd34 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -177,18 +177,18 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
 
 	props->max_mr_size	   = ~0ull;
 	props->page_size_cap	   = dev->dev->caps.page_size_cap;
-	props->max_qp		   = dev->dev->caps.num_qps - dev->dev->caps.reserved_qps;
+	props->max_qp		   = dev->dev->quotas.qp;
 	props->max_qp_wr	   = dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE;
 	props->max_sge		   = min(dev->dev->caps.max_sq_sg,
 					 dev->dev->caps.max_rq_sg);
-	props->max_cq		   = dev->dev->caps.num_cqs - dev->dev->caps.reserved_cqs;
+	props->max_cq		   = dev->dev->quotas.cq;
 	props->max_cqe		   = dev->dev->caps.max_cqes;
-	props->max_mr		   = dev->dev->caps.num_mpts - dev->dev->caps.reserved_mrws;
+	props->max_mr		   = dev->dev->quotas.mpt;
 	props->max_pd		   = dev->dev->caps.num_pds - dev->dev->caps.reserved_pds;
 	props->max_qp_rd_atom	   = dev->dev->caps.max_qp_dest_rdma;
 	props->max_qp_init_rd_atom = dev->dev->caps.max_qp_init_rdma;
 	props->max_res_rd_atom	   = props->max_qp_rd_atom * props->max_qp;
-	props->max_srq		   = dev->dev->caps.num_srqs - dev->dev->caps.reserved_srqs;
+	props->max_srq		   = dev->dev->quotas.srq;
 	props->max_srq_wr	   = dev->dev->caps.max_srq_wqes - 1;
 	props->max_srq_sge	   = dev->dev->caps.max_srq_sge;
 	props->max_fast_reg_page_list_len = MLX4_MAX_FAST_REG_PAGES;
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index c151e7a6710a..f8c88c3ad9fc 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -177,6 +177,7 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave,
 				struct mlx4_cmd_mailbox *outbox,
 				struct mlx4_cmd_info *cmd)
 {
+	struct mlx4_priv *priv = mlx4_priv(dev);
 	u8	field;
 	u32	size;
 	int	err = 0;
@@ -250,13 +251,13 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave,
 		field = 0; /* protected FMR support not available as yet */
 		MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_FMR_OFFSET);
 
-		size = dev->caps.num_qps;
+		size = priv->mfunc.master.res_tracker.res_alloc[RES_QP].quota[slave];
 		MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP_QUOTA_OFFSET);
 
-		size = dev->caps.num_srqs;
+		size = priv->mfunc.master.res_tracker.res_alloc[RES_SRQ].quota[slave];
 		MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_SRQ_QUOTA_OFFSET);
 
-		size = dev->caps.num_cqs;
+		size = priv->mfunc.master.res_tracker.res_alloc[RES_CQ].quota[slave];
 		MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_CQ_QUOTA_OFFSET);
 
 		size = dev->caps.num_eqs;
@@ -265,10 +266,10 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave,
 		size = dev->caps.reserved_eqs;
 		MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_RESERVED_EQ_OFFSET);
 
-		size = dev->caps.num_mpts;
+		size = priv->mfunc.master.res_tracker.res_alloc[RES_MPT].quota[slave];
 		MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MPT_QUOTA_OFFSET);
 
-		size = dev->caps.num_mtts;
+		size = priv->mfunc.master.res_tracker.res_alloc[RES_MTT].quota[slave];
 		MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MTT_QUOTA_OFFSET);
 
 		size = dev->caps.num_mgms + dev->caps.num_amgms;
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 179d26709c94..7d2628dfdc29 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -562,13 +562,17 @@ static int mlx4_slave_cap(struct mlx4_dev *dev)
 	}
 
 	dev->caps.num_ports		= func_cap.num_ports;
-	dev->caps.num_qps		= func_cap.qp_quota;
-	dev->caps.num_srqs		= func_cap.srq_quota;
-	dev->caps.num_cqs		= func_cap.cq_quota;
-	dev->caps.num_eqs               = func_cap.max_eq;
-	dev->caps.reserved_eqs          = func_cap.reserved_eq;
-	dev->caps.num_mpts		= func_cap.mpt_quota;
-	dev->caps.num_mtts		= func_cap.mtt_quota;
+	dev->quotas.qp			= func_cap.qp_quota;
+	dev->quotas.srq			= func_cap.srq_quota;
+	dev->quotas.cq			= func_cap.cq_quota;
+	dev->quotas.mpt			= func_cap.mpt_quota;
+	dev->quotas.mtt			= func_cap.mtt_quota;
+	dev->caps.num_qps		= 1 << hca_param.log_num_qps;
+	dev->caps.num_srqs		= 1 << hca_param.log_num_srqs;
+	dev->caps.num_cqs		= 1 << hca_param.log_num_cqs;
+	dev->caps.num_mpts		= 1 << hca_param.log_mpt_sz;
+	dev->caps.num_eqs		= func_cap.max_eq;
+	dev->caps.reserved_eqs		= func_cap.reserved_eq;
 	dev->caps.num_pds               = MLX4_NUM_PDS;
 	dev->caps.num_mgms              = 0;
 	dev->caps.num_amgms             = 0;
@@ -2102,9 +2106,15 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data)
 			"aborting.\n");
 		return err;
 	}
-	if (num_vfs > MLX4_MAX_NUM_VF) {
-		printk(KERN_ERR "There are more VF's (%d) than allowed(%d)\n",
-		       num_vfs, MLX4_MAX_NUM_VF);
+
+	/* Due to requirement that all VFs and the PF are *guaranteed* 2 MACS
+	 * per port, we must limit the number of VFs to 63 (since their are
+	 * 128 MACs)
+	 */
+	if (num_vfs >= MLX4_MAX_NUM_VF) {
+		dev_err(&pdev->dev,
+			"Requested more VF's (%d) than allowed (%d)\n",
+			num_vfs, MLX4_MAX_NUM_VF - 1);
 		return -EINVAL;
 	}
 
@@ -2322,6 +2332,8 @@ slave_start:
 	if (err)
 		goto err_steer;
 
+	mlx4_init_quotas(dev);
+
 	for (port = 1; port <= dev->caps.num_ports; port++) {
 		err = mlx4_init_port_info(dev, port);
 		if (err)
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index 97941269bc14..e7eb86ecc6ea 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -504,12 +504,27 @@ struct slave_list {
 	struct list_head res_list[MLX4_NUM_OF_RESOURCE_TYPE];
 };
 
+struct resource_allocator {
+	union {
+		int res_reserved;
+		int res_port_rsvd[MLX4_MAX_PORTS];
+	};
+	union {
+		int res_free;
+		int res_port_free[MLX4_MAX_PORTS];
+	};
+	int *quota;
+	int *allocated;
+	int *guaranteed;
+};
+
 struct mlx4_resource_tracker {
 	spinlock_t lock;
 	/* tree for each resources */
 	struct rb_root res_tree[MLX4_NUM_OF_RESOURCE_TYPE];
 	/* num_of_slave's lists, one per slave */
 	struct slave_list *slave_list;
+	struct resource_allocator res_alloc[MLX4_NUM_OF_RESOURCE_TYPE];
 };
 
 #define SLAVE_EVENT_EQ_SIZE	128
@@ -1253,4 +1268,6 @@ static inline spinlock_t *mlx4_tlock(struct mlx4_dev *dev)
 
 void mlx4_vf_immed_vlan_work_handler(struct work_struct *_work);
 
+void mlx4_init_quotas(struct mlx4_dev *dev);
+
 #endif /* MLX4_H */
diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c
index e891b058c1be..2715e61dbb74 100644
--- a/drivers/net/ethernet/mellanox/mlx4/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx4/qp.c
@@ -480,8 +480,7 @@ int mlx4_init_qp_table(struct mlx4_dev *dev)
 	*/
 
 	err = mlx4_bitmap_init(&qp_table->bitmap, dev->caps.num_qps,
-			       (1 << 23) - 1, dev->phys_caps.base_sqpn + 8 +
-			       16 * MLX4_MFUNC_MAX * !!mlx4_is_master(dev),
+			       (1 << 23) - 1, mlx4_num_reserved_sqps(dev),
 			       reserved_from_top);
 	if (err)
 		return err;
diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index 35863889bec0..cc5d6d0aad16 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
@@ -284,10 +284,59 @@ static const char *ResourceType(enum mlx4_resource rt)
 }
 
 static void rem_slave_vlans(struct mlx4_dev *dev, int slave);
+static inline void initialize_res_quotas(struct mlx4_dev *dev,
+					 struct resource_allocator *res_alloc,
+					 enum mlx4_resource res_type,
+					 int vf, int num_instances)
+{
+	res_alloc->guaranteed[vf] = num_instances / (2 * (dev->num_vfs + 1));
+	res_alloc->quota[vf] = (num_instances / 2) + res_alloc->guaranteed[vf];
+	if (vf == mlx4_master_func_num(dev)) {
+		res_alloc->res_free = num_instances;
+		if (res_type == RES_MTT) {
+			/* reserved mtts will be taken out of the PF allocation */
+			res_alloc->res_free += dev->caps.reserved_mtts;
+			res_alloc->guaranteed[vf] += dev->caps.reserved_mtts;
+			res_alloc->quota[vf] += dev->caps.reserved_mtts;
+		}
+	}
+}
+
+void mlx4_init_quotas(struct mlx4_dev *dev)
+{
+	struct mlx4_priv *priv = mlx4_priv(dev);
+	int pf;
+
+	/* quotas for VFs are initialized in mlx4_slave_cap */
+	if (mlx4_is_slave(dev))
+		return;
+
+	if (!mlx4_is_mfunc(dev)) {
+		dev->quotas.qp = dev->caps.num_qps - dev->caps.reserved_qps -
+			mlx4_num_reserved_sqps(dev);
+		dev->quotas.cq = dev->caps.num_cqs - dev->caps.reserved_cqs;
+		dev->quotas.srq = dev->caps.num_srqs - dev->caps.reserved_srqs;
+		dev->quotas.mtt = dev->caps.num_mtts - dev->caps.reserved_mtts;
+		dev->quotas.mpt = dev->caps.num_mpts - dev->caps.reserved_mrws;
+		return;
+	}
+
+	pf = mlx4_master_func_num(dev);
+	dev->quotas.qp =
+		priv->mfunc.master.res_tracker.res_alloc[RES_QP].quota[pf];
+	dev->quotas.cq =
+		priv->mfunc.master.res_tracker.res_alloc[RES_CQ].quota[pf];
+	dev->quotas.srq =
+		priv->mfunc.master.res_tracker.res_alloc[RES_SRQ].quota[pf];
+	dev->quotas.mtt =
+		priv->mfunc.master.res_tracker.res_alloc[RES_MTT].quota[pf];
+	dev->quotas.mpt =
+		priv->mfunc.master.res_tracker.res_alloc[RES_MPT].quota[pf];
+}
 int mlx4_init_resource_tracker(struct mlx4_dev *dev)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
-	int i;
+	int i, j;
 	int t;
 
 	priv->mfunc.master.res_tracker.slave_list =
@@ -308,8 +357,104 @@ int mlx4_init_resource_tracker(struct mlx4_dev *dev)
 	for (i = 0 ; i < MLX4_NUM_OF_RESOURCE_TYPE; i++)
 		priv->mfunc.master.res_tracker.res_tree[i] = RB_ROOT;
 
+	for (i = 0; i < MLX4_NUM_OF_RESOURCE_TYPE; i++) {
+		struct resource_allocator *res_alloc =
+			&priv->mfunc.master.res_tracker.res_alloc[i];
+		res_alloc->quota = kmalloc((dev->num_vfs + 1) * sizeof(int), GFP_KERNEL);
+		res_alloc->guaranteed = kmalloc((dev->num_vfs + 1) * sizeof(int), GFP_KERNEL);
+		if (i == RES_MAC || i == RES_VLAN)
+			res_alloc->allocated = kzalloc(MLX4_MAX_PORTS *
+						       (dev->num_vfs + 1) * sizeof(int),
+							GFP_KERNEL);
+		else
+			res_alloc->allocated = kzalloc((dev->num_vfs + 1) * sizeof(int), GFP_KERNEL);
+
+		if (!res_alloc->quota || !res_alloc->guaranteed ||
+		    !res_alloc->allocated)
+			goto no_mem_err;
+
+		for (t = 0; t < dev->num_vfs + 1; t++) {
+			switch (i) {
+			case RES_QP:
+				initialize_res_quotas(dev, res_alloc, RES_QP,
+						      t, dev->caps.num_qps -
+						      dev->caps.reserved_qps -
+						      mlx4_num_reserved_sqps(dev));
+				break;
+			case RES_CQ:
+				initialize_res_quotas(dev, res_alloc, RES_CQ,
+						      t, dev->caps.num_cqs -
+						      dev->caps.reserved_cqs);
+				break;
+			case RES_SRQ:
+				initialize_res_quotas(dev, res_alloc, RES_SRQ,
+						      t, dev->caps.num_srqs -
+						      dev->caps.reserved_srqs);
+				break;
+			case RES_MPT:
+				initialize_res_quotas(dev, res_alloc, RES_MPT,
+						      t, dev->caps.num_mpts -
+						      dev->caps.reserved_mrws);
+				break;
+			case RES_MTT:
+				initialize_res_quotas(dev, res_alloc, RES_MTT,
+						      t, dev->caps.num_mtts -
+						      dev->caps.reserved_mtts);
+				break;
+			case RES_MAC:
+				if (t == mlx4_master_func_num(dev)) {
+					res_alloc->quota[t] = MLX4_MAX_MAC_NUM;
+					res_alloc->guaranteed[t] = 2;
+					for (j = 0; j < MLX4_MAX_PORTS; j++)
+						res_alloc->res_port_free[j] = MLX4_MAX_MAC_NUM;
+				} else {
+					res_alloc->quota[t] = MLX4_MAX_MAC_NUM;
+					res_alloc->guaranteed[t] = 2;
+				}
+				break;
+			case RES_VLAN:
+				if (t == mlx4_master_func_num(dev)) {
+					res_alloc->quota[t] = MLX4_MAX_VLAN_NUM;
+					res_alloc->guaranteed[t] = MLX4_MAX_VLAN_NUM / 2;
+					for (j = 0; j < MLX4_MAX_PORTS; j++)
+						res_alloc->res_port_free[j] =
+							res_alloc->quota[t];
+				} else {
+					res_alloc->quota[t] = MLX4_MAX_VLAN_NUM / 2;
+					res_alloc->guaranteed[t] = 0;
+				}
+				break;
+			case RES_COUNTER:
+				res_alloc->quota[t] = dev->caps.max_counters;
+				res_alloc->guaranteed[t] = 0;
+				if (t == mlx4_master_func_num(dev))
+					res_alloc->res_free = res_alloc->quota[t];
+				break;
+			default:
+				break;
+			}
+			if (i == RES_MAC || i == RES_VLAN) {
+				for (j = 0; j < MLX4_MAX_PORTS; j++)
+					res_alloc->res_port_rsvd[j] +=
+						res_alloc->guaranteed[t];
+			} else {
+				res_alloc->res_reserved += res_alloc->guaranteed[t];
+			}
+		}
+	}
 	spin_lock_init(&priv->mfunc.master.res_tracker.lock);
-	return 0 ;
+	return 0;
+
+no_mem_err:
+	for (i = 0; i < MLX4_NUM_OF_RESOURCE_TYPE; i++) {
+		kfree(priv->mfunc.master.res_tracker.res_alloc[i].allocated);
+		priv->mfunc.master.res_tracker.res_alloc[i].allocated = NULL;
+		kfree(priv->mfunc.master.res_tracker.res_alloc[i].guaranteed);
+		priv->mfunc.master.res_tracker.res_alloc[i].guaranteed = NULL;
+		kfree(priv->mfunc.master.res_tracker.res_alloc[i].quota);
+		priv->mfunc.master.res_tracker.res_alloc[i].quota = NULL;
+	}
+	return -ENOMEM;
 }
 
 void mlx4_free_resource_tracker(struct mlx4_dev *dev,
@@ -333,6 +478,14 @@ void mlx4_free_resource_tracker(struct mlx4_dev *dev,
 		}
 
 		if (type != RES_TR_FREE_SLAVES_ONLY) {
+			for (i = 0; i < MLX4_NUM_OF_RESOURCE_TYPE; i++) {
+				kfree(priv->mfunc.master.res_tracker.res_alloc[i].allocated);
+				priv->mfunc.master.res_tracker.res_alloc[i].allocated = NULL;
+				kfree(priv->mfunc.master.res_tracker.res_alloc[i].guaranteed);
+				priv->mfunc.master.res_tracker.res_alloc[i].guaranteed = NULL;
+				kfree(priv->mfunc.master.res_tracker.res_alloc[i].quota);
+				priv->mfunc.master.res_tracker.res_alloc[i].quota = NULL;
+			}
 			kfree(priv->mfunc.master.res_tracker.slave_list);
 			priv->mfunc.master.res_tracker.slave_list = NULL;
 		}
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index e2e92885bdc1..f6f59271f857 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -641,12 +641,23 @@ struct mlx4_counter {
 	__be64	tx_bytes;
 };
 
+struct mlx4_quotas {
+	int qp;
+	int cq;
+	int srq;
+	int mpt;
+	int mtt;
+	int counter;
+	int xrcd;
+};
+
 struct mlx4_dev {
 	struct pci_dev	       *pdev;
 	unsigned long		flags;
 	unsigned long		num_slaves;
 	struct mlx4_caps	caps;
 	struct mlx4_phys_caps	phys_caps;
+	struct mlx4_quotas	quotas;
 	struct radix_tree_root	qp_table_tree;
 	u8			rev_id;
 	char			board_id[MLX4_BOARD_ID_LEN];
@@ -772,6 +783,12 @@ static inline int mlx4_is_master(struct mlx4_dev *dev)
 	return dev->flags & MLX4_FLAG_MASTER;
 }
 
+static inline int mlx4_num_reserved_sqps(struct mlx4_dev *dev)
+{
+	return dev->phys_caps.base_sqpn + 8 +
+		16 * MLX4_MFUNC_MAX * !!mlx4_is_master(dev);
+}
+
 static inline int mlx4_is_qp_reserved(struct mlx4_dev *dev, u32 qpn)
 {
 	return (qpn < dev->phys_caps.base_sqpn + 8 +
-- 
cgit v1.2.3


From b944dba31d2c23f1cf5d69a66cc3449e0ba78503 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 4 Nov 2013 15:20:20 -0500
Subject: NFSv4: Sanity check the server reply in _nfs4_server_capabilities

We don't want to be setting capabilities and/or requesting attributes
that are not appropriate for the NFSv4 minor version.

- Ensure that we clear the NFS_CAP_SECURITY_LABEL capability when appropriate
- Ensure that we limit the attribute bitmasks to the mounted_on_fileid
  attribute and less for NFSv4.0
- Ensure that we limit the attribute bitmasks to suppattr_exclcreat and
  less for NFSv4.1
- Ensure that we limit it to change_sec_label or less for NFSv4.2

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c    | 25 ++++++++++++++++++++-----
 include/linux/nfs4.h |  2 ++
 2 files changed, 22 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 7e28b5c77cbc..ed2a0e6b9aed 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2706,6 +2706,10 @@ static void nfs4_close_context(struct nfs_open_context *ctx, int is_sync)
 		nfs4_close_state(ctx->state, ctx->mode);
 }
 
+#define FATTR4_WORD1_NFS40_MASK (2*FATTR4_WORD1_MOUNTED_ON_FILEID - 1UL)
+#define FATTR4_WORD2_NFS41_MASK (2*FATTR4_WORD2_SUPPATTR_EXCLCREAT - 1UL)
+#define FATTR4_WORD2_NFS42_MASK (2*FATTR4_WORD2_CHANGE_SECURITY_LABEL - 1UL)
+
 static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle)
 {
 	struct nfs4_server_caps_arg args = {
@@ -2721,12 +2725,25 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f
 
 	status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
 	if (status == 0) {
+		/* Sanity check the server answers */
+		switch (server->nfs_client->cl_minorversion) {
+		case 0:
+			res.attr_bitmask[1] &= FATTR4_WORD1_NFS40_MASK;
+			res.attr_bitmask[2] = 0;
+			break;
+		case 1:
+			res.attr_bitmask[2] &= FATTR4_WORD2_NFS41_MASK;
+			break;
+		case 2:
+			res.attr_bitmask[2] &= FATTR4_WORD2_NFS42_MASK;
+		}
 		memcpy(server->attr_bitmask, res.attr_bitmask, sizeof(server->attr_bitmask));
 		server->caps &= ~(NFS_CAP_ACLS|NFS_CAP_HARDLINKS|
 				NFS_CAP_SYMLINKS|NFS_CAP_FILEID|
 				NFS_CAP_MODE|NFS_CAP_NLINK|NFS_CAP_OWNER|
 				NFS_CAP_OWNER_GROUP|NFS_CAP_ATIME|
-				NFS_CAP_CTIME|NFS_CAP_MTIME);
+				NFS_CAP_CTIME|NFS_CAP_MTIME|
+				NFS_CAP_SECURITY_LABEL);
 		if (res.attr_bitmask[0] & FATTR4_WORD0_ACL)
 			server->caps |= NFS_CAP_ACLS;
 		if (res.has_links != 0)
@@ -2755,14 +2772,12 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f
 #endif
 		memcpy(server->attr_bitmask_nl, res.attr_bitmask,
 				sizeof(server->attr_bitmask));
+		server->attr_bitmask_nl[2] &= ~FATTR4_WORD2_SECURITY_LABEL;
 
-		if (server->caps & NFS_CAP_SECURITY_LABEL) {
-			server->attr_bitmask_nl[2] &= ~FATTR4_WORD2_SECURITY_LABEL;
-			res.attr_bitmask[2] &= ~FATTR4_WORD2_SECURITY_LABEL;
-		}
 		memcpy(server->cache_consistency_bitmask, res.attr_bitmask, sizeof(server->cache_consistency_bitmask));
 		server->cache_consistency_bitmask[0] &= FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE;
 		server->cache_consistency_bitmask[1] &= FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY;
+		server->cache_consistency_bitmask[2] = 0;
 		server->acl_bitmask = res.acl_bitmask;
 		server->fh_expire_type = res.fh_expire_type;
 	}
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index bfe6c379a24e..c6f41b616965 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -396,6 +396,8 @@ enum lock_type4 {
 #define FATTR4_WORD2_LAYOUT_BLKSIZE     (1UL << 1)
 #define FATTR4_WORD2_MDSTHRESHOLD       (1UL << 4)
 #define FATTR4_WORD2_SECURITY_LABEL     (1UL << 16)
+#define FATTR4_WORD2_CHANGE_SECURITY_LABEL \
+					(1UL << 17)
 
 /* MDS threshold bitmap bits */
 #define THRESHOLD_RD                    (1UL << 0)
-- 
cgit v1.2.3


From f8e617e100d7369a0108f96abf4414e9fb82ced7 Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Fri, 1 Nov 2013 14:07:47 +0800
Subject: net: introduce skb_coalesce_rx_frag()

Sometimes we need to coalesce the rx frags to avoid frag list. One example is
virtio-net driver which tries to use small frags for both MTU sized packet and
GSO packet. So this patch introduce skb_coalesce_rx_frag() to do this.

Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: Michael Dalton <mwdalton@google.com>
Cc: Eric Dumazet <edumazet@google.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h |  3 +++
 net/core/skbuff.c      | 12 ++++++++++++
 2 files changed, 15 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 44727b5d4981..2e153b69d318 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1372,6 +1372,9 @@ static inline void skb_fill_page_desc(struct sk_buff *skb, int i,
 void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off,
 		     int size, unsigned int truesize);
 
+void skb_coalesce_rx_frag(struct sk_buff *skb, int i, int size,
+			  unsigned int truesize);
+
 #define SKB_PAGE_ASSERT(skb) 	BUG_ON(skb_shinfo(skb)->nr_frags)
 #define SKB_FRAG_ASSERT(skb) 	BUG_ON(skb_has_frag_list(skb))
 #define SKB_LINEAR_ASSERT(skb)  BUG_ON(skb_is_nonlinear(skb))
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index e4115597b38b..3735fad5616e 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -476,6 +476,18 @@ void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off,
 }
 EXPORT_SYMBOL(skb_add_rx_frag);
 
+void skb_coalesce_rx_frag(struct sk_buff *skb, int i, int size,
+			  unsigned int truesize)
+{
+	skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+	skb_frag_size_add(frag, size);
+	skb->len += size;
+	skb->data_len += size;
+	skb->truesize += truesize;
+}
+EXPORT_SYMBOL(skb_coalesce_rx_frag);
+
 static void skb_drop_list(struct sk_buff **listp)
 {
 	kfree_skb_list(*listp);
-- 
cgit v1.2.3


From b0fed40214ce79ef70d97584ebdf13f89786da0e Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Wed, 22 May 2013 12:54:49 -0400
Subject: audit: implement generic feature setting and retrieving

The audit_status structure was not designed with extensibility in mind.
Define a new AUDIT_SET_FEATURE message type which takes a new structure
of bits where things can be enabled/disabled/locked one at a time.  This
structure should be able to grow in the future while maintaining forward
and backward compatibility (based loosly on the ideas from capabilities
and prctl)

This does not actually add any features, but is just infrastructure to
allow new on/off types of audit system features.

Signed-off-by: Eric Paris <eparis@redhat.com>
Signed-off-by: Richard Guy Briggs <rgb@redhat.com>
Signed-off-by: Eric Paris <eparis@redhat.com>
---
 include/linux/audit.h      |   2 +
 include/uapi/linux/audit.h |  16 +++++++
 kernel/audit.c             | 109 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 127 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 729a4d165bcc..7b31bec9bccb 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -73,6 +73,8 @@ struct audit_field {
 	void				*lsm_rule;
 };
 
+extern int is_audit_feature_set(int which);
+
 extern int __init audit_register_class(int class, unsigned *list);
 extern int audit_classify_syscall(int abi, unsigned syscall);
 extern int audit_classify_arch(int arch);
diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
index c1f0fced3ede..9eddf2ca614f 100644
--- a/include/uapi/linux/audit.h
+++ b/include/uapi/linux/audit.h
@@ -68,6 +68,9 @@
 #define AUDIT_MAKE_EQUIV	1015	/* Append to watched tree */
 #define AUDIT_TTY_GET		1016	/* Get TTY auditing status */
 #define AUDIT_TTY_SET		1017	/* Set TTY auditing status */
+#define AUDIT_SET_FEATURE	1018	/* Turn an audit feature on or off */
+#define AUDIT_GET_FEATURE	1019	/* Get which features are enabled */
+#define AUDIT_FEATURE_CHANGE	1020	/* audit log listing feature changes */
 
 #define AUDIT_FIRST_USER_MSG	1100	/* Userspace messages mostly uninteresting to kernel */
 #define AUDIT_USER_AVC		1107	/* We filter this differently */
@@ -375,6 +378,19 @@ struct audit_status {
 	__u32		backlog;	/* messages waiting in queue */
 };
 
+struct audit_features {
+#define AUDIT_FEATURE_VERSION	1
+	__u32	vers;
+	__u32	mask;		/* which bits we are dealing with */
+	__u32	features;	/* which feature to enable/disable */
+	__u32	lock;		/* which features to lock */
+};
+
+#define AUDIT_LAST_FEATURE	-1
+
+#define audit_feature_valid(x)		((x) >= 0 && (x) <= AUDIT_LAST_FEATURE)
+#define AUDIT_FEATURE_TO_MASK(x)	(1 << ((x) & 31)) /* mask for __u32 */
+
 struct audit_tty_status {
 	__u32		enabled;	/* 1 = enabled, 0 = disabled */
 	__u32		log_passwd;	/* 1 = enabled, 0 = disabled */
diff --git a/kernel/audit.c b/kernel/audit.c
index 74550ff3644f..29ee6a421c6c 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -139,6 +139,15 @@ static struct task_struct *kauditd_task;
 static DECLARE_WAIT_QUEUE_HEAD(kauditd_wait);
 static DECLARE_WAIT_QUEUE_HEAD(audit_backlog_wait);
 
+static struct audit_features af = {.vers = AUDIT_FEATURE_VERSION,
+				   .mask = -1,
+				   .features = 0,
+				   .lock = 0,};
+
+static char *audit_feature_names[0] = {
+};
+
+
 /* Serialize requests from userspace. */
 DEFINE_MUTEX(audit_cmd_mutex);
 
@@ -583,6 +592,8 @@ static int audit_netlink_ok(struct sk_buff *skb, u16 msg_type)
 		return -EOPNOTSUPP;
 	case AUDIT_GET:
 	case AUDIT_SET:
+	case AUDIT_GET_FEATURE:
+	case AUDIT_SET_FEATURE:
 	case AUDIT_LIST_RULES:
 	case AUDIT_ADD_RULE:
 	case AUDIT_DEL_RULE:
@@ -627,6 +638,94 @@ static int audit_log_common_recv_msg(struct audit_buffer **ab, u16 msg_type)
 	return rc;
 }
 
+int is_audit_feature_set(int i)
+{
+	return af.features & AUDIT_FEATURE_TO_MASK(i);
+}
+
+
+static int audit_get_feature(struct sk_buff *skb)
+{
+	u32 seq;
+
+	seq = nlmsg_hdr(skb)->nlmsg_seq;
+
+	audit_send_reply(NETLINK_CB(skb).portid, seq, AUDIT_GET, 0, 0,
+			 &af, sizeof(af));
+
+	return 0;
+}
+
+static void audit_log_feature_change(int which, u32 old_feature, u32 new_feature,
+				     u32 old_lock, u32 new_lock, int res)
+{
+	struct audit_buffer *ab;
+
+	ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_FEATURE_CHANGE);
+	audit_log_format(ab, "feature=%s new=%d old=%d old_lock=%d new_lock=%d res=%d",
+			 audit_feature_names[which], !!old_feature, !!new_feature,
+			 !!old_lock, !!new_lock, res);
+	audit_log_end(ab);
+}
+
+static int audit_set_feature(struct sk_buff *skb)
+{
+	struct audit_features *uaf;
+	int i;
+
+	BUILD_BUG_ON(AUDIT_LAST_FEATURE + 1 > sizeof(audit_feature_names)/sizeof(audit_feature_names[0]));
+	uaf = nlmsg_data(nlmsg_hdr(skb));
+
+	/* if there is ever a version 2 we should handle that here */
+
+	for (i = 0; i <= AUDIT_LAST_FEATURE; i++) {
+		u32 feature = AUDIT_FEATURE_TO_MASK(i);
+		u32 old_feature, new_feature, old_lock, new_lock;
+
+		/* if we are not changing this feature, move along */
+		if (!(feature & uaf->mask))
+			continue;
+
+		old_feature = af.features & feature;
+		new_feature = uaf->features & feature;
+		new_lock = (uaf->lock | af.lock) & feature;
+		old_lock = af.lock & feature;
+
+		/* are we changing a locked feature? */
+		if ((af.lock & feature) && (new_feature != old_feature)) {
+			audit_log_feature_change(i, old_feature, new_feature,
+						 old_lock, new_lock, 0);
+			return -EPERM;
+		}
+	}
+	/* nothing invalid, do the changes */
+	for (i = 0; i <= AUDIT_LAST_FEATURE; i++) {
+		u32 feature = AUDIT_FEATURE_TO_MASK(i);
+		u32 old_feature, new_feature, old_lock, new_lock;
+
+		/* if we are not changing this feature, move along */
+		if (!(feature & uaf->mask))
+			continue;
+
+		old_feature = af.features & feature;
+		new_feature = uaf->features & feature;
+		old_lock = af.lock & feature;
+		new_lock = (uaf->lock | af.lock) & feature;
+
+		if (new_feature != old_feature)
+			audit_log_feature_change(i, old_feature, new_feature,
+						 old_lock, new_lock, 1);
+
+		if (new_feature)
+			af.features |= feature;
+		else
+			af.features &= ~feature;
+		af.lock |= new_lock;
+	}
+
+	return 0;
+}
+
 static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
 	u32			seq;
@@ -698,6 +797,16 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 		if (status_get->mask & AUDIT_STATUS_BACKLOG_LIMIT)
 			err = audit_set_backlog_limit(status_get->backlog_limit);
 		break;
+	case AUDIT_GET_FEATURE:
+		err = audit_get_feature(skb);
+		if (err)
+			return err;
+		break;
+	case AUDIT_SET_FEATURE:
+		err = audit_set_feature(skb);
+		if (err)
+			return err;
+		break;
 	case AUDIT_USER:
 	case AUDIT_FIRST_USER_MSG ... AUDIT_LAST_USER_MSG:
 	case AUDIT_FIRST_USER_MSG2 ... AUDIT_LAST_USER_MSG2:
-- 
cgit v1.2.3


From d9cfea91e97d5d19f9d69beaa844f5fe56a6adc6 Mon Sep 17 00:00:00 2001
From: Richard Guy Briggs <rgb@redhat.com>
Date: Wed, 30 Oct 2013 17:56:13 -0400
Subject: audit: move audit_aux_data_execve contents into audit_context union

audit_bprm() was being called to add an AUDIT_EXECVE record to the audit
context every time search_binary_handler() was recursively called.  Only one
reference is necessary, so just update it.  Move the the contents of
audit_aux_data_execve into the union in audit_context, removing dependence on a
kmalloc along the way.

Reported-by: Oleg Nesterov <onestero@redhat.com>
Cc: Eric Paris <eparis@redhat.com>
Signed-off-by: Richard Guy Briggs <rgb@redhat.com>
Signed-off-by: Eric Paris <eparis@redhat.com>
---
 include/linux/audit.h |  4 ++--
 kernel/audit.h        |  4 ++++
 kernel/auditsc.c      | 41 ++++++++++++-----------------------------
 3 files changed, 18 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 7b31bec9bccb..08b38bf13eb9 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -209,7 +209,7 @@ static inline int audit_get_sessionid(struct task_struct *tsk)
 
 extern void __audit_ipc_obj(struct kern_ipc_perm *ipcp);
 extern void __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, umode_t mode);
-extern int __audit_bprm(struct linux_binprm *bprm);
+extern void __audit_bprm(struct linux_binprm *bprm);
 extern int __audit_socketcall(int nargs, unsigned long *args);
 extern int __audit_sockaddr(int len, void *addr);
 extern void __audit_fd_pair(int fd1, int fd2);
@@ -241,7 +241,7 @@ static inline void audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid
 static inline int audit_bprm(struct linux_binprm *bprm)
 {
 	if (unlikely(!audit_dummy_context()))
-		return __audit_bprm(bprm);
+		__audit_bprm(bprm);
 	return 0;
 }
 static inline int audit_socketcall(int nargs, unsigned long *args)
diff --git a/kernel/audit.h b/kernel/audit.h
index 123c9b7c3979..e7b94ab66c49 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -197,6 +197,10 @@ struct audit_context {
 			int			fd;
 			int			flags;
 		} mmap;
+		struct {
+			int			argc;
+			struct mm_struct	*mm;
+		} execve;
 	};
 	int fds[2];
 
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 11078f32d13e..425a8939be1a 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -95,12 +95,6 @@ struct audit_aux_data {
 /* Number of target pids per aux struct. */
 #define AUDIT_AUX_PIDS	16
 
-struct audit_aux_data_execve {
-	struct audit_aux_data	d;
-	int argc;
-	struct mm_struct *mm;
-};
-
 struct audit_aux_data_pids {
 	struct audit_aux_data	d;
 	pid_t			target_pid[AUDIT_AUX_PIDS];
@@ -1144,20 +1138,19 @@ static int audit_log_single_execve_arg(struct audit_context *context,
 }
 
 static void audit_log_execve_info(struct audit_context *context,
-				  struct audit_buffer **ab,
-				  struct audit_aux_data_execve *axi)
+				  struct audit_buffer **ab)
 {
 	int i, len;
 	size_t len_sent = 0;
 	const char __user *p;
 	char *buf;
 
-	if (axi->mm != current->mm)
+	if (context->execve.mm != current->mm)
 		return; /* execve failed, no additional info */
 
-	p = (const char __user *)axi->mm->arg_start;
+	p = (const char __user *)current->mm->arg_start;
 
-	audit_log_format(*ab, "argc=%d", axi->argc);
+	audit_log_format(*ab, "argc=%d", context->execve.argc);
 
 	/*
 	 * we need some kernel buffer to hold the userspace args.  Just
@@ -1171,7 +1164,7 @@ static void audit_log_execve_info(struct audit_context *context,
 		return;
 	}
 
-	for (i = 0; i < axi->argc; i++) {
+	for (i = 0; i < context->execve.argc; i++) {
 		len = audit_log_single_execve_arg(context, ab, i,
 						  &len_sent, p, buf);
 		if (len <= 0)
@@ -1274,6 +1267,9 @@ static void show_special(struct audit_context *context, int *call_panic)
 		audit_log_format(ab, "fd=%d flags=0x%x", context->mmap.fd,
 				 context->mmap.flags);
 		break; }
+	case AUDIT_EXECVE: {
+		audit_log_execve_info(context, &ab);
+		break; }
 	}
 	audit_log_end(ab);
 }
@@ -1320,11 +1316,6 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
 
 		switch (aux->type) {
 
-		case AUDIT_EXECVE: {
-			struct audit_aux_data_execve *axi = (void *)aux;
-			audit_log_execve_info(context, &ab, axi);
-			break; }
-
 		case AUDIT_BPRM_FCAPS: {
 			struct audit_aux_data_bprm_fcaps *axs = (void *)aux;
 			audit_log_format(ab, "fver=%x", axs->fcap_ver);
@@ -2147,21 +2138,13 @@ void __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, umode_t mo
 	context->ipc.has_perm = 1;
 }
 
-int __audit_bprm(struct linux_binprm *bprm)
+void __audit_bprm(struct linux_binprm *bprm)
 {
-	struct audit_aux_data_execve *ax;
 	struct audit_context *context = current->audit_context;
 
-	ax = kmalloc(sizeof(*ax), GFP_KERNEL);
-	if (!ax)
-		return -ENOMEM;
-
-	ax->argc = bprm->argc;
-	ax->mm = bprm->mm;
-	ax->d.type = AUDIT_EXECVE;
-	ax->d.next = context->aux;
-	context->aux = (void *)ax;
-	return 0;
+	context->type = AUDIT_EXECVE;
+	context->execve.argc = bprm->argc;
+	context->execve.mm = bprm->mm;
 }
 
 
-- 
cgit v1.2.3


From 9410d228a4cf434305306746bb799fb7acdd8648 Mon Sep 17 00:00:00 2001
From: Richard Guy Briggs <rgb@redhat.com>
Date: Wed, 30 Oct 2013 18:05:24 -0400
Subject: audit: call audit_bprm() only once to add AUDIT_EXECVE information

Move the audit_bprm() call from search_binary_handler() to exec_binprm().  This
allows us to get rid of the mm member of struct audit_aux_data_execve since
bprm->mm will equal current->mm.

This also mitigates the issue that ->argc could be modified by the
load_binary() call in search_binary_handler().

audit_bprm() was being called to add an AUDIT_EXECVE record to the audit
context every time search_binary_handler() was recursively called.  Only one
reference is necessary.

Reported-by: Oleg Nesterov <onestero@redhat.com>
Cc: Eric Paris <eparis@redhat.com>
Signed-off-by: Richard Guy Briggs <rgb@redhat.com>
Signed-off-by: Eric Paris <eparis@redhat.com>
---
This patch is against 3.11, but was developed on Oleg's post-3.11 patches that
introduce exec_binprm().
---
 fs/exec.c             | 5 +----
 include/linux/audit.h | 9 +++------
 kernel/audit.h        | 1 -
 kernel/auditsc.c      | 4 ----
 4 files changed, 4 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/fs/exec.c b/fs/exec.c
index fd774c7cb483..c5c24f2fc44a 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1383,10 +1383,6 @@ int search_binary_handler(struct linux_binprm *bprm)
 	if (retval)
 		return retval;
 
-	retval = audit_bprm(bprm);
-	if (retval)
-		return retval;
-
 	/* Need to fetch pid before load_binary changes it */
 	old_pid = current->pid;
 	rcu_read_lock();
@@ -1408,6 +1404,7 @@ int search_binary_handler(struct linux_binprm *bprm)
 			bprm->recursion_depth = depth;
 			if (retval >= 0) {
 				if (depth == 0) {
+					audit_bprm(bprm);
 					trace_sched_process_exec(current, old_pid, bprm);
 					ptrace_event(PTRACE_EVENT_EXEC, old_vpid);
 				}
diff --git a/include/linux/audit.h b/include/linux/audit.h
index 08b38bf13eb9..a40641954c29 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -238,11 +238,10 @@ static inline void audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid
 	if (unlikely(!audit_dummy_context()))
 		__audit_ipc_set_perm(qbytes, uid, gid, mode);
 }
-static inline int audit_bprm(struct linux_binprm *bprm)
+static inline void audit_bprm(struct linux_binprm *bprm)
 {
 	if (unlikely(!audit_dummy_context()))
 		__audit_bprm(bprm);
-	return 0;
 }
 static inline int audit_socketcall(int nargs, unsigned long *args)
 {
@@ -369,10 +368,8 @@ static inline void audit_ipc_obj(struct kern_ipc_perm *ipcp)
 static inline void audit_ipc_set_perm(unsigned long qbytes, uid_t uid,
 					gid_t gid, umode_t mode)
 { }
-static inline int audit_bprm(struct linux_binprm *bprm)
-{
-	return 0;
-}
+static inline void audit_bprm(struct linux_binprm *bprm)
+{ }
 static inline int audit_socketcall(int nargs, unsigned long *args)
 {
 	return 0;
diff --git a/kernel/audit.h b/kernel/audit.h
index e7b94ab66c49..b779642b29af 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -199,7 +199,6 @@ struct audit_context {
 		} mmap;
 		struct {
 			int			argc;
-			struct mm_struct	*mm;
 		} execve;
 	};
 	int fds[2];
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 425a8939be1a..dfc5d6745ee5 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1145,9 +1145,6 @@ static void audit_log_execve_info(struct audit_context *context,
 	const char __user *p;
 	char *buf;
 
-	if (context->execve.mm != current->mm)
-		return; /* execve failed, no additional info */
-
 	p = (const char __user *)current->mm->arg_start;
 
 	audit_log_format(*ab, "argc=%d", context->execve.argc);
@@ -2144,7 +2141,6 @@ void __audit_bprm(struct linux_binprm *bprm)
 
 	context->type = AUDIT_EXECVE;
 	context->execve.argc = bprm->argc;
-	context->execve.mm = bprm->mm;
 }
 
 
-- 
cgit v1.2.3


From 2f69702c4db5f1c3149fd17fe30bdeb87cba9698 Mon Sep 17 00:00:00 2001
From: Enrico Mioso <mrkiko.rs@gmail.com>
Date: Mon, 4 Nov 2013 09:50:47 +0100
Subject: net: cdc_ncm: Export cdc_ncm_{tx, rx}_fixup functions for re-use
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some drivers implementing NCM-like protocols, may re-use those functions, as is
the case in the huawei_cdc_ncm driver.
Export them via EXPORT_SYMBOL_GPL, in accordance with how other functions have
been exported.

Signed-off-by: Enrico Mioso <mrkiko.rs@gmail.com>
Signed-off-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/cdc_ncm.c   | 6 ++++--
 include/linux/usb/cdc_ncm.h | 3 +++
 2 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index 11c703337577..1763195bd599 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -830,7 +830,7 @@ static void cdc_ncm_txpath_bh(unsigned long param)
 	}
 }
 
-static struct sk_buff *
+struct sk_buff *
 cdc_ncm_tx_fixup(struct usbnet *dev, struct sk_buff *skb, gfp_t flags)
 {
 	struct sk_buff *skb_out;
@@ -857,6 +857,7 @@ error:
 
 	return NULL;
 }
+EXPORT_SYMBOL_GPL(cdc_ncm_tx_fixup);
 
 /* verify NTB header and return offset of first NDP, or negative error */
 int cdc_ncm_rx_verify_nth16(struct cdc_ncm_ctx *ctx, struct sk_buff *skb_in)
@@ -943,7 +944,7 @@ error:
 }
 EXPORT_SYMBOL_GPL(cdc_ncm_rx_verify_ndp16);
 
-static int cdc_ncm_rx_fixup(struct usbnet *dev, struct sk_buff *skb_in)
+int cdc_ncm_rx_fixup(struct usbnet *dev, struct sk_buff *skb_in)
 {
 	struct sk_buff *skb;
 	struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0];
@@ -1019,6 +1020,7 @@ err_ndp:
 error:
 	return 0;
 }
+EXPORT_SYMBOL_GPL(cdc_ncm_rx_fixup);
 
 static void
 cdc_ncm_speed_change(struct usbnet *dev,
diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h
index 2300f7492927..c3fa80745996 100644
--- a/include/linux/usb/cdc_ncm.h
+++ b/include/linux/usb/cdc_ncm.h
@@ -125,5 +125,8 @@ void cdc_ncm_unbind(struct usbnet *dev, struct usb_interface *intf);
 struct sk_buff *cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign);
 int cdc_ncm_rx_verify_nth16(struct cdc_ncm_ctx *ctx, struct sk_buff *skb_in);
 int cdc_ncm_rx_verify_ndp16(struct sk_buff *skb_in, int ndpoffset);
+struct sk_buff *
+cdc_ncm_tx_fixup(struct usbnet *dev, struct sk_buff *skb, gfp_t flags);
+int cdc_ncm_rx_fixup(struct usbnet *dev, struct sk_buff *skb_in);
 
 #endif /* __LINUX_USB_CDC_NCM_H */
-- 
cgit v1.2.3


From f306cc82a93d6b19f01634b80c580b9755c8b7cc Mon Sep 17 00:00:00 2001
From: Tom Zanussi <tom.zanussi@linux.intel.com>
Date: Thu, 24 Oct 2013 08:34:17 -0500
Subject: tracing: Update event filters for multibuffer

The trace event filters are still tied to event calls rather than
event files, which means you don't get what you'd expect when using
filters in the multibuffer case:

Before:

  # echo 'bytes_alloc > 8192' > /sys/kernel/debug/tracing/events/kmem/kmalloc/filter
  # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/filter
  bytes_alloc > 8192
  # mkdir /sys/kernel/debug/tracing/instances/test1
  # echo 'bytes_alloc > 2048' > /sys/kernel/debug/tracing/instances/test1/events/kmem/kmalloc/filter
  # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/filter
  bytes_alloc > 2048
  # cat /sys/kernel/debug/tracing/instances/test1/events/kmem/kmalloc/filter
  bytes_alloc > 2048

Setting the filter in tracing/instances/test1/events shouldn't affect
the same event in tracing/events as it does above.

After:

  # echo 'bytes_alloc > 8192' > /sys/kernel/debug/tracing/events/kmem/kmalloc/filter
  # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/filter
  bytes_alloc > 8192
  # mkdir /sys/kernel/debug/tracing/instances/test1
  # echo 'bytes_alloc > 2048' > /sys/kernel/debug/tracing/instances/test1/events/kmem/kmalloc/filter
  # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/filter
  bytes_alloc > 8192
  # cat /sys/kernel/debug/tracing/instances/test1/events/kmem/kmalloc/filter
  bytes_alloc > 2048

We'd like to just move the filter directly from ftrace_event_call to
ftrace_event_file, but there are a couple cases that don't yet have
multibuffer support and therefore have to continue using the current
event_call-based filters.  For those cases, a new USE_CALL_FILTER bit
is added to the event_call flags, whose main purpose is to keep the
old behavior for those cases until they can be updated with
multibuffer support; at that point, the USE_CALL_FILTER flag (and the
new associated call_filter_check_discard() function) can go away.

The multibuffer support also made filter_current_check_discard()
redundant, so this change removes that function as well and replaces
it with filter_check_discard() (or call_filter_check_discard() as
appropriate).

Link: http://lkml.kernel.org/r/f16e9ce4270c62f46b2e966119225e1c3cca7e60.1382620672.git.tom.zanussi@linux.intel.com

Signed-off-by: Tom Zanussi <tom.zanussi@linux.intel.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace_event.h         |  25 +++-
 include/linux/syscalls.h             |   4 +-
 include/trace/ftrace.h               |   7 +-
 kernel/trace/trace.c                 |  40 +++++--
 kernel/trace/trace.h                 |  18 +--
 kernel/trace/trace_branch.c          |   2 +-
 kernel/trace/trace_events.c          |  23 ++--
 kernel/trace/trace_events_filter.c   | 218 ++++++++++++++++++++++++++++-------
 kernel/trace/trace_export.c          |   2 +-
 kernel/trace/trace_functions_graph.c |   4 +-
 kernel/trace/trace_kprobe.c          |   4 +-
 kernel/trace/trace_mmiotrace.c       |   4 +-
 kernel/trace/trace_sched_switch.c    |   4 +-
 kernel/trace/trace_syscalls.c        |   8 +-
 kernel/trace/trace_uprobe.c          |   3 +-
 15 files changed, 264 insertions(+), 102 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 5eaa746735ff..9abbe630c456 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -202,6 +202,7 @@ enum {
 	TRACE_EVENT_FL_NO_SET_FILTER_BIT,
 	TRACE_EVENT_FL_IGNORE_ENABLE_BIT,
 	TRACE_EVENT_FL_WAS_ENABLED_BIT,
+	TRACE_EVENT_FL_USE_CALL_FILTER_BIT,
 };
 
 /*
@@ -213,6 +214,7 @@ enum {
  *  WAS_ENABLED   - Set and stays set when an event was ever enabled
  *                    (used for module unloading, if a module event is enabled,
  *                     it is best to clear the buffers that used it).
+ *  USE_CALL_FILTER - For ftrace internal events, don't use file filter
  */
 enum {
 	TRACE_EVENT_FL_FILTERED		= (1 << TRACE_EVENT_FL_FILTERED_BIT),
@@ -220,6 +222,7 @@ enum {
 	TRACE_EVENT_FL_NO_SET_FILTER	= (1 << TRACE_EVENT_FL_NO_SET_FILTER_BIT),
 	TRACE_EVENT_FL_IGNORE_ENABLE	= (1 << TRACE_EVENT_FL_IGNORE_ENABLE_BIT),
 	TRACE_EVENT_FL_WAS_ENABLED	= (1 << TRACE_EVENT_FL_WAS_ENABLED_BIT),
+	TRACE_EVENT_FL_USE_CALL_FILTER	= (1 << TRACE_EVENT_FL_USE_CALL_FILTER_BIT),
 };
 
 struct ftrace_event_call {
@@ -238,6 +241,7 @@ struct ftrace_event_call {
 	 *   bit 2:		failed to apply filter
 	 *   bit 3:		ftrace internal event (do not enable)
 	 *   bit 4:		Event was enabled by module
+	 *   bit 5:		use call filter rather than file filter
 	 */
 	int			flags; /* static flags of different events */
 
@@ -253,6 +257,8 @@ struct ftrace_subsystem_dir;
 enum {
 	FTRACE_EVENT_FL_ENABLED_BIT,
 	FTRACE_EVENT_FL_RECORDED_CMD_BIT,
+	FTRACE_EVENT_FL_FILTERED_BIT,
+	FTRACE_EVENT_FL_NO_SET_FILTER_BIT,
 	FTRACE_EVENT_FL_SOFT_MODE_BIT,
 	FTRACE_EVENT_FL_SOFT_DISABLED_BIT,
 };
@@ -261,6 +267,8 @@ enum {
  * Ftrace event file flags:
  *  ENABLED	  - The event is enabled
  *  RECORDED_CMD  - The comms should be recorded at sched_switch
+ *  FILTERED	  - The event has a filter attached
+ *  NO_SET_FILTER - Set when filter has error and is to be ignored
  *  SOFT_MODE     - The event is enabled/disabled by SOFT_DISABLED
  *  SOFT_DISABLED - When set, do not trace the event (even though its
  *                   tracepoint may be enabled)
@@ -268,6 +276,8 @@ enum {
 enum {
 	FTRACE_EVENT_FL_ENABLED		= (1 << FTRACE_EVENT_FL_ENABLED_BIT),
 	FTRACE_EVENT_FL_RECORDED_CMD	= (1 << FTRACE_EVENT_FL_RECORDED_CMD_BIT),
+	FTRACE_EVENT_FL_FILTERED	= (1 << FTRACE_EVENT_FL_FILTERED_BIT),
+	FTRACE_EVENT_FL_NO_SET_FILTER	= (1 << FTRACE_EVENT_FL_NO_SET_FILTER_BIT),
 	FTRACE_EVENT_FL_SOFT_MODE	= (1 << FTRACE_EVENT_FL_SOFT_MODE_BIT),
 	FTRACE_EVENT_FL_SOFT_DISABLED	= (1 << FTRACE_EVENT_FL_SOFT_DISABLED_BIT),
 };
@@ -275,6 +285,7 @@ enum {
 struct ftrace_event_file {
 	struct list_head		list;
 	struct ftrace_event_call	*event_call;
+	struct event_filter		*filter;
 	struct dentry			*dir;
 	struct trace_array		*tr;
 	struct ftrace_subsystem_dir	*system;
@@ -310,12 +321,16 @@ struct ftrace_event_file {
 
 #define MAX_FILTER_STR_VAL	256	/* Should handle KSYM_SYMBOL_LEN */
 
-extern void destroy_preds(struct ftrace_event_call *call);
+extern void destroy_preds(struct ftrace_event_file *file);
+extern void destroy_call_preds(struct ftrace_event_call *call);
 extern int filter_match_preds(struct event_filter *filter, void *rec);
-extern int filter_current_check_discard(struct ring_buffer *buffer,
-					struct ftrace_event_call *call,
-					void *rec,
-					struct ring_buffer_event *event);
+
+extern int filter_check_discard(struct ftrace_event_file *file, void *rec,
+				struct ring_buffer *buffer,
+				struct ring_buffer_event *event);
+extern int call_filter_check_discard(struct ftrace_event_call *call, void *rec,
+				     struct ring_buffer *buffer,
+				     struct ring_buffer_event *event);
 
 enum {
 	FILTER_OTHER = 0,
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 7fac04e7ff6e..10bafa97049d 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -120,7 +120,7 @@ extern struct trace_event_functions exit_syscall_print_funcs;
 		.class			= &event_class_syscall_enter,	\
 		.event.funcs            = &enter_syscall_print_funcs,	\
 		.data			= (void *)&__syscall_meta_##sname,\
-		.flags			= TRACE_EVENT_FL_CAP_ANY,	\
+		.flags			= TRACE_EVENT_FL_CAP_ANY | TRACE_EVENT_FL_USE_CALL_FILTER,\
 	};								\
 	static struct ftrace_event_call __used				\
 	  __attribute__((section("_ftrace_events")))			\
@@ -134,7 +134,7 @@ extern struct trace_event_functions exit_syscall_print_funcs;
 		.class			= &event_class_syscall_exit,	\
 		.event.funcs		= &exit_syscall_print_funcs,	\
 		.data			= (void *)&__syscall_meta_##sname,\
-		.flags			= TRACE_EVENT_FL_CAP_ANY,	\
+		.flags			= TRACE_EVENT_FL_CAP_ANY | TRACE_EVENT_FL_USE_CALL_FILTER,\
 	};								\
 	static struct ftrace_event_call __used				\
 	  __attribute__((section("_ftrace_events")))			\
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 5c7ab17cbb02..52594b20179e 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -437,9 +437,8 @@ static inline notrace int ftrace_get_offsets_##call(			\
  *	{ <assign>; }  <-- Here we assign the entries by the __field and
  *			   __array macros.
  *
- *	if (!filter_current_check_discard(buffer, event_call, entry, event))
- *		trace_nowake_buffer_unlock_commit(buffer,
- *						   event, irq_flags, pc);
+ *	if (!filter_check_discard(ftrace_file, entry, buffer, event))
+ *		trace_buffer_unlock_commit(buffer, event, irq_flags, pc);
  * }
  *
  * static struct trace_event ftrace_event_type_<call> = {
@@ -553,7 +552,7 @@ ftrace_raw_event_##call(void *__data, proto)				\
 									\
 	{ assign; }							\
 									\
-	if (!filter_current_check_discard(buffer, event_call, entry, event)) \
+	if (!filter_check_discard(ftrace_file, entry, buffer, event))	\
 		trace_buffer_unlock_commit(buffer, event, irq_flags, pc); \
 }
 /*
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 063a92bad578..489da8b19f30 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -235,13 +235,33 @@ void trace_array_put(struct trace_array *this_tr)
 	mutex_unlock(&trace_types_lock);
 }
 
-int filter_current_check_discard(struct ring_buffer *buffer,
-				 struct ftrace_event_call *call, void *rec,
-				 struct ring_buffer_event *event)
+int filter_check_discard(struct ftrace_event_file *file, void *rec,
+			 struct ring_buffer *buffer,
+			 struct ring_buffer_event *event)
 {
-	return filter_check_discard(call, rec, buffer, event);
+	if (unlikely(file->flags & FTRACE_EVENT_FL_FILTERED) &&
+	    !filter_match_preds(file->filter, rec)) {
+		ring_buffer_discard_commit(buffer, event);
+		return 1;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(filter_check_discard);
+
+int call_filter_check_discard(struct ftrace_event_call *call, void *rec,
+			      struct ring_buffer *buffer,
+			      struct ring_buffer_event *event)
+{
+	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
+	    !filter_match_preds(call->filter, rec)) {
+		ring_buffer_discard_commit(buffer, event);
+		return 1;
+	}
+
+	return 0;
 }
-EXPORT_SYMBOL_GPL(filter_current_check_discard);
+EXPORT_SYMBOL_GPL(call_filter_check_discard);
 
 cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu)
 {
@@ -1633,7 +1653,7 @@ trace_function(struct trace_array *tr,
 	entry->ip			= ip;
 	entry->parent_ip		= parent_ip;
 
-	if (!filter_check_discard(call, entry, buffer, event))
+	if (!call_filter_check_discard(call, entry, buffer, event))
 		__buffer_unlock_commit(buffer, event);
 }
 
@@ -1717,7 +1737,7 @@ static void __ftrace_trace_stack(struct ring_buffer *buffer,
 
 	entry->size = trace.nr_entries;
 
-	if (!filter_check_discard(call, entry, buffer, event))
+	if (!call_filter_check_discard(call, entry, buffer, event))
 		__buffer_unlock_commit(buffer, event);
 
  out:
@@ -1819,7 +1839,7 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
 	trace.entries		= entry->caller;
 
 	save_stack_trace_user(&trace);
-	if (!filter_check_discard(call, entry, buffer, event))
+	if (!call_filter_check_discard(call, entry, buffer, event))
 		__buffer_unlock_commit(buffer, event);
 
  out_drop_count:
@@ -2011,7 +2031,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
 	entry->fmt			= fmt;
 
 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
-	if (!filter_check_discard(call, entry, buffer, event)) {
+	if (!call_filter_check_discard(call, entry, buffer, event)) {
 		__buffer_unlock_commit(buffer, event);
 		ftrace_trace_stack(buffer, flags, 6, pc);
 	}
@@ -2066,7 +2086,7 @@ __trace_array_vprintk(struct ring_buffer *buffer,
 
 	memcpy(&entry->buf, tbuffer, len);
 	entry->buf[len] = '\0';
-	if (!filter_check_discard(call, entry, buffer, event)) {
+	if (!call_filter_check_discard(call, entry, buffer, event)) {
 		__buffer_unlock_commit(buffer, event);
 		ftrace_trace_stack(buffer, flags, 6, pc);
 	}
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index d1cf5159bec0..12d1a612a73e 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -1007,9 +1007,9 @@ struct filter_pred {
 
 extern enum regex_type
 filter_parse_regex(char *buff, int len, char **search, int *not);
-extern void print_event_filter(struct ftrace_event_call *call,
+extern void print_event_filter(struct ftrace_event_file *file,
 			       struct trace_seq *s);
-extern int apply_event_filter(struct ftrace_event_call *call,
+extern int apply_event_filter(struct ftrace_event_file *file,
 			      char *filter_string);
 extern int apply_subsystem_event_filter(struct ftrace_subsystem_dir *dir,
 					char *filter_string);
@@ -1020,20 +1020,6 @@ extern int filter_assign_type(const char *type);
 struct ftrace_event_field *
 trace_find_event_field(struct ftrace_event_call *call, char *name);
 
-static inline int
-filter_check_discard(struct ftrace_event_call *call, void *rec,
-		     struct ring_buffer *buffer,
-		     struct ring_buffer_event *event)
-{
-	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
-	    !filter_match_preds(call->filter, rec)) {
-		ring_buffer_discard_commit(buffer, event);
-		return 1;
-	}
-
-	return 0;
-}
-
 extern void trace_event_enable_cmd_record(bool enable);
 extern int event_trace_add_tracer(struct dentry *parent, struct trace_array *tr);
 extern int event_trace_del_tracer(struct trace_array *tr);
diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c
index d594da0dc03c..697fb9bac8f0 100644
--- a/kernel/trace/trace_branch.c
+++ b/kernel/trace/trace_branch.c
@@ -78,7 +78,7 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect)
 	entry->line = f->line;
 	entry->correct = val == expect;
 
-	if (!filter_check_discard(call, entry, buffer, event))
+	if (!call_filter_check_discard(call, entry, buffer, event))
 		__buffer_unlock_commit(buffer, event);
 
  out:
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 368a4d50cc30..043f833246a0 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -989,7 +989,7 @@ static ssize_t
 event_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
 		  loff_t *ppos)
 {
-	struct ftrace_event_call *call;
+	struct ftrace_event_file *file;
 	struct trace_seq *s;
 	int r = -ENODEV;
 
@@ -1004,12 +1004,12 @@ event_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
 	trace_seq_init(s);
 
 	mutex_lock(&event_mutex);
-	call = event_file_data(filp);
-	if (call)
-		print_event_filter(call, s);
+	file = event_file_data(filp);
+	if (file)
+		print_event_filter(file, s);
 	mutex_unlock(&event_mutex);
 
-	if (call)
+	if (file)
 		r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);
 
 	kfree(s);
@@ -1021,7 +1021,7 @@ static ssize_t
 event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
 		   loff_t *ppos)
 {
-	struct ftrace_event_call *call;
+	struct ftrace_event_file *file;
 	char *buf;
 	int err = -ENODEV;
 
@@ -1039,9 +1039,9 @@ event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
 	buf[cnt] = '\0';
 
 	mutex_lock(&event_mutex);
-	call = event_file_data(filp);
-	if (call)
-		err = apply_event_filter(call, buf);
+	file = event_file_data(filp);
+	if (file)
+		err = apply_event_filter(file, buf);
 	mutex_unlock(&event_mutex);
 
 	free_page((unsigned long) buf);
@@ -1539,7 +1539,7 @@ event_create_dir(struct dentry *parent, struct ftrace_event_file *file)
 			return -1;
 		}
 	}
-	trace_create_file("filter", 0644, file->dir, call,
+	trace_create_file("filter", 0644, file->dir, file,
 			  &ftrace_event_filter_fops);
 
 	trace_create_file("format", 0444, file->dir, call,
@@ -1577,6 +1577,7 @@ static void event_remove(struct ftrace_event_call *call)
 		if (file->event_call != call)
 			continue;
 		ftrace_event_enable_disable(file, 0);
+		destroy_preds(file);
 		/*
 		 * The do_for_each_event_file() is
 		 * a double loop. After finding the call for this
@@ -1700,7 +1701,7 @@ static void __trace_remove_event_call(struct ftrace_event_call *call)
 {
 	event_remove(call);
 	trace_destroy_fields(call);
-	destroy_preds(call);
+	destroy_call_preds(call);
 }
 
 static int probe_remove_event_call(struct ftrace_event_call *call)
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 97daa8cf958d..2468f56dc5db 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -637,10 +637,18 @@ static void append_filter_err(struct filter_parse_state *ps,
 	free_page((unsigned long) buf);
 }
 
+static inline struct event_filter *event_filter(struct ftrace_event_file *file)
+{
+	if (file->event_call->flags & TRACE_EVENT_FL_USE_CALL_FILTER)
+		return file->event_call->filter;
+	else
+		return file->filter;
+}
+
 /* caller must hold event_mutex */
-void print_event_filter(struct ftrace_event_call *call, struct trace_seq *s)
+void print_event_filter(struct ftrace_event_file *file, struct trace_seq *s)
 {
-	struct event_filter *filter = call->filter;
+	struct event_filter *filter = event_filter(file);
 
 	if (filter && filter->filter_string)
 		trace_seq_printf(s, "%s\n", filter->filter_string);
@@ -766,11 +774,21 @@ static void __free_preds(struct event_filter *filter)
 	filter->n_preds = 0;
 }
 
-static void filter_disable(struct ftrace_event_call *call)
+static void call_filter_disable(struct ftrace_event_call *call)
 {
 	call->flags &= ~TRACE_EVENT_FL_FILTERED;
 }
 
+static void filter_disable(struct ftrace_event_file *file)
+{
+	struct ftrace_event_call *call = file->event_call;
+
+	if (call->flags & TRACE_EVENT_FL_USE_CALL_FILTER)
+		call_filter_disable(call);
+	else
+		file->flags &= ~FTRACE_EVENT_FL_FILTERED;
+}
+
 static void __free_filter(struct event_filter *filter)
 {
 	if (!filter)
@@ -781,16 +799,30 @@ static void __free_filter(struct event_filter *filter)
 	kfree(filter);
 }
 
+void destroy_call_preds(struct ftrace_event_call *call)
+{
+	__free_filter(call->filter);
+	call->filter = NULL;
+}
+
+static void destroy_file_preds(struct ftrace_event_file *file)
+{
+	__free_filter(file->filter);
+	file->filter = NULL;
+}
+
 /*
- * Called when destroying the ftrace_event_call.
- * The call is being freed, so we do not need to worry about
- * the call being currently used. This is for module code removing
+ * Called when destroying the ftrace_event_file.
+ * The file is being freed, so we do not need to worry about
+ * the file being currently used. This is for module code removing
  * the tracepoints from within it.
  */
-void destroy_preds(struct ftrace_event_call *call)
+void destroy_preds(struct ftrace_event_file *file)
 {
-	__free_filter(call->filter);
-	call->filter = NULL;
+	if (file->event_call->flags & TRACE_EVENT_FL_USE_CALL_FILTER)
+		destroy_call_preds(file->event_call);
+	else
+		destroy_file_preds(file);
 }
 
 static struct event_filter *__alloc_filter(void)
@@ -825,28 +857,56 @@ static int __alloc_preds(struct event_filter *filter, int n_preds)
 	return 0;
 }
 
-static void filter_free_subsystem_preds(struct event_subsystem *system)
+static inline void __remove_filter(struct ftrace_event_file *file)
 {
+	struct ftrace_event_call *call = file->event_call;
+
+	filter_disable(file);
+	if (call->flags & TRACE_EVENT_FL_USE_CALL_FILTER)
+		remove_filter_string(call->filter);
+	else
+		remove_filter_string(file->filter);
+}
+
+static void filter_free_subsystem_preds(struct event_subsystem *system,
+					struct trace_array *tr)
+{
+	struct ftrace_event_file *file;
 	struct ftrace_event_call *call;
 
-	list_for_each_entry(call, &ftrace_events, list) {
+	list_for_each_entry(file, &tr->events, list) {
+		call = file->event_call;
 		if (strcmp(call->class->system, system->name) != 0)
 			continue;
 
-		filter_disable(call);
-		remove_filter_string(call->filter);
+		__remove_filter(file);
 	}
 }
 
-static void filter_free_subsystem_filters(struct event_subsystem *system)
+static inline void __free_subsystem_filter(struct ftrace_event_file *file)
 {
+	struct ftrace_event_call *call = file->event_call;
+
+	if (call->flags & TRACE_EVENT_FL_USE_CALL_FILTER) {
+		__free_filter(call->filter);
+		call->filter = NULL;
+	} else {
+		__free_filter(file->filter);
+		file->filter = NULL;
+	}
+}
+
+static void filter_free_subsystem_filters(struct event_subsystem *system,
+					  struct trace_array *tr)
+{
+	struct ftrace_event_file *file;
 	struct ftrace_event_call *call;
 
-	list_for_each_entry(call, &ftrace_events, list) {
+	list_for_each_entry(file, &tr->events, list) {
+		call = file->event_call;
 		if (strcmp(call->class->system, system->name) != 0)
 			continue;
-		__free_filter(call->filter);
-		call->filter = NULL;
+		__free_subsystem_filter(file);
 	}
 }
 
@@ -1617,15 +1677,85 @@ fail:
 	return err;
 }
 
+static inline void event_set_filtered_flag(struct ftrace_event_file *file)
+{
+	struct ftrace_event_call *call = file->event_call;
+
+	if (call->flags & TRACE_EVENT_FL_USE_CALL_FILTER)
+		call->flags |= TRACE_EVENT_FL_FILTERED;
+	else
+		file->flags |= FTRACE_EVENT_FL_FILTERED;
+}
+
+static inline void event_set_filter(struct ftrace_event_file *file,
+				    struct event_filter *filter)
+{
+	struct ftrace_event_call *call = file->event_call;
+
+	if (call->flags & TRACE_EVENT_FL_USE_CALL_FILTER)
+		rcu_assign_pointer(call->filter, filter);
+	else
+		rcu_assign_pointer(file->filter, filter);
+}
+
+static inline void event_clear_filter(struct ftrace_event_file *file)
+{
+	struct ftrace_event_call *call = file->event_call;
+
+	if (call->flags & TRACE_EVENT_FL_USE_CALL_FILTER)
+		RCU_INIT_POINTER(call->filter, NULL);
+	else
+		RCU_INIT_POINTER(file->filter, NULL);
+}
+
+static inline void
+event_set_no_set_filter_flag(struct ftrace_event_file *file)
+{
+	struct ftrace_event_call *call = file->event_call;
+
+	if (call->flags & TRACE_EVENT_FL_USE_CALL_FILTER)
+		call->flags |= TRACE_EVENT_FL_NO_SET_FILTER;
+	else
+		file->flags |= FTRACE_EVENT_FL_NO_SET_FILTER;
+}
+
+static inline void
+event_clear_no_set_filter_flag(struct ftrace_event_file *file)
+{
+	struct ftrace_event_call *call = file->event_call;
+
+	if (call->flags & TRACE_EVENT_FL_USE_CALL_FILTER)
+		call->flags &= ~TRACE_EVENT_FL_NO_SET_FILTER;
+	else
+		file->flags &= ~FTRACE_EVENT_FL_NO_SET_FILTER;
+}
+
+static inline bool
+event_no_set_filter_flag(struct ftrace_event_file *file)
+{
+	struct ftrace_event_call *call = file->event_call;
+
+	if (file->flags & FTRACE_EVENT_FL_NO_SET_FILTER)
+		return true;
+
+	if ((call->flags & TRACE_EVENT_FL_USE_CALL_FILTER) &&
+	    (call->flags & TRACE_EVENT_FL_NO_SET_FILTER))
+		return true;
+
+	return false;
+}
+
 struct filter_list {
 	struct list_head	list;
 	struct event_filter	*filter;
 };
 
 static int replace_system_preds(struct event_subsystem *system,
+				struct trace_array *tr,
 				struct filter_parse_state *ps,
 				char *filter_string)
 {
+	struct ftrace_event_file *file;
 	struct ftrace_event_call *call;
 	struct filter_list *filter_item;
 	struct filter_list *tmp;
@@ -1633,8 +1763,8 @@ static int replace_system_preds(struct event_subsystem *system,
 	bool fail = true;
 	int err;
 
-	list_for_each_entry(call, &ftrace_events, list) {
-
+	list_for_each_entry(file, &tr->events, list) {
+		call = file->event_call;
 		if (strcmp(call->class->system, system->name) != 0)
 			continue;
 
@@ -1644,18 +1774,20 @@ static int replace_system_preds(struct event_subsystem *system,
 		 */
 		err = replace_preds(call, NULL, ps, filter_string, true);
 		if (err)
-			call->flags |= TRACE_EVENT_FL_NO_SET_FILTER;
+			event_set_no_set_filter_flag(file);
 		else
-			call->flags &= ~TRACE_EVENT_FL_NO_SET_FILTER;
+			event_clear_no_set_filter_flag(file);
 	}
 
-	list_for_each_entry(call, &ftrace_events, list) {
+	list_for_each_entry(file, &tr->events, list) {
 		struct event_filter *filter;
 
+		call = file->event_call;
+
 		if (strcmp(call->class->system, system->name) != 0)
 			continue;
 
-		if (call->flags & TRACE_EVENT_FL_NO_SET_FILTER)
+		if (event_no_set_filter_flag(file))
 			continue;
 
 		filter_item = kzalloc(sizeof(*filter_item), GFP_KERNEL);
@@ -1676,17 +1808,17 @@ static int replace_system_preds(struct event_subsystem *system,
 
 		err = replace_preds(call, filter, ps, filter_string, false);
 		if (err) {
-			filter_disable(call);
+			filter_disable(file);
 			parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0);
 			append_filter_err(ps, filter);
 		} else
-			call->flags |= TRACE_EVENT_FL_FILTERED;
+			event_set_filtered_flag(file);
 		/*
 		 * Regardless of if this returned an error, we still
 		 * replace the filter for the call.
 		 */
-		filter = call->filter;
-		rcu_assign_pointer(call->filter, filter_item->filter);
+		filter = event_filter(file);
+		event_set_filter(file, filter_item->filter);
 		filter_item->filter = filter;
 
 		fail = false;
@@ -1816,6 +1948,7 @@ static int create_filter(struct ftrace_event_call *call,
  * and always remembers @filter_str.
  */
 static int create_system_filter(struct event_subsystem *system,
+				struct trace_array *tr,
 				char *filter_str, struct event_filter **filterp)
 {
 	struct event_filter *filter = NULL;
@@ -1824,7 +1957,7 @@ static int create_system_filter(struct event_subsystem *system,
 
 	err = create_filter_start(filter_str, true, &ps, &filter);
 	if (!err) {
-		err = replace_system_preds(system, ps, filter_str);
+		err = replace_system_preds(system, tr, ps, filter_str);
 		if (!err) {
 			/* System filters just show a default message */
 			kfree(filter->filter_string);
@@ -1840,20 +1973,25 @@ static int create_system_filter(struct event_subsystem *system,
 }
 
 /* caller must hold event_mutex */
-int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
+int apply_event_filter(struct ftrace_event_file *file, char *filter_string)
 {
+	struct ftrace_event_call *call = file->event_call;
 	struct event_filter *filter;
 	int err;
 
 	if (!strcmp(strstrip(filter_string), "0")) {
-		filter_disable(call);
-		filter = call->filter;
+		filter_disable(file);
+		filter = event_filter(file);
+
 		if (!filter)
 			return 0;
-		RCU_INIT_POINTER(call->filter, NULL);
+
+		event_clear_filter(file);
+
 		/* Make sure the filter is not being used */
 		synchronize_sched();
 		__free_filter(filter);
+
 		return 0;
 	}
 
@@ -1866,14 +2004,15 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
 	 * string
 	 */
 	if (filter) {
-		struct event_filter *tmp = call->filter;
+		struct event_filter *tmp;
 
+		tmp = event_filter(file);
 		if (!err)
-			call->flags |= TRACE_EVENT_FL_FILTERED;
+			event_set_filtered_flag(file);
 		else
-			filter_disable(call);
+			filter_disable(file);
 
-		rcu_assign_pointer(call->filter, filter);
+		event_set_filter(file, filter);
 
 		if (tmp) {
 			/* Make sure the call is done with the filter */
@@ -1889,6 +2028,7 @@ int apply_subsystem_event_filter(struct ftrace_subsystem_dir *dir,
 				 char *filter_string)
 {
 	struct event_subsystem *system = dir->subsystem;
+	struct trace_array *tr = dir->tr;
 	struct event_filter *filter;
 	int err = 0;
 
@@ -1901,18 +2041,18 @@ int apply_subsystem_event_filter(struct ftrace_subsystem_dir *dir,
 	}
 
 	if (!strcmp(strstrip(filter_string), "0")) {
-		filter_free_subsystem_preds(system);
+		filter_free_subsystem_preds(system, tr);
 		remove_filter_string(system->filter);
 		filter = system->filter;
 		system->filter = NULL;
 		/* Ensure all filters are no longer used */
 		synchronize_sched();
-		filter_free_subsystem_filters(system);
+		filter_free_subsystem_filters(system, tr);
 		__free_filter(filter);
 		goto out_unlock;
 	}
 
-	err = create_system_filter(system, filter_string, &filter);
+	err = create_system_filter(system, tr, filter_string, &filter);
 	if (filter) {
 		/*
 		 * No event actually uses the system filter
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index d21a74670088..7c3e3e72e2b6 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -180,7 +180,7 @@ struct ftrace_event_call __used event_##call = {			\
 	.event.type		= etype,				\
 	.class			= &event_class_ftrace_##call,		\
 	.print_fmt		= print,				\
-	.flags			= TRACE_EVENT_FL_IGNORE_ENABLE,		\
+	.flags			= TRACE_EVENT_FL_IGNORE_ENABLE | TRACE_EVENT_FL_USE_CALL_FILTER, \
 };									\
 struct ftrace_event_call __used						\
 __attribute__((section("_ftrace_events"))) *__event_##call = &event_##call;
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index e08c030b8f38..80387d1d27e1 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -270,7 +270,7 @@ int __trace_graph_entry(struct trace_array *tr,
 		return 0;
 	entry	= ring_buffer_event_data(event);
 	entry->graph_ent			= *trace;
-	if (!filter_current_check_discard(buffer, call, entry, event))
+	if (!call_filter_check_discard(call, entry, buffer, event))
 		__buffer_unlock_commit(buffer, event);
 
 	return 1;
@@ -385,7 +385,7 @@ void __trace_graph_return(struct trace_array *tr,
 		return;
 	entry	= ring_buffer_event_data(event);
 	entry->ret				= *trace;
-	if (!filter_current_check_discard(buffer, call, entry, event))
+	if (!call_filter_check_discard(call, entry, buffer, event))
 		__buffer_unlock_commit(buffer, event);
 }
 
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 243f6834d026..dae9541ada9e 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -835,7 +835,7 @@ __kprobe_trace_func(struct trace_probe *tp, struct pt_regs *regs,
 	entry->ip = (unsigned long)tp->rp.kp.addr;
 	store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
 
-	if (!filter_current_check_discard(buffer, call, entry, event))
+	if (!filter_check_discard(ftrace_file, entry, buffer, event))
 		trace_buffer_unlock_commit_regs(buffer, event,
 						irq_flags, pc, regs);
 }
@@ -884,7 +884,7 @@ __kretprobe_trace_func(struct trace_probe *tp, struct kretprobe_instance *ri,
 	entry->ret_ip = (unsigned long)ri->ret_addr;
 	store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
 
-	if (!filter_current_check_discard(buffer, call, entry, event))
+	if (!filter_check_discard(ftrace_file, entry, buffer, event))
 		trace_buffer_unlock_commit_regs(buffer, event,
 						irq_flags, pc, regs);
 }
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index b3dcfb2f0fef..0abd9b863474 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -323,7 +323,7 @@ static void __trace_mmiotrace_rw(struct trace_array *tr,
 	entry	= ring_buffer_event_data(event);
 	entry->rw			= *rw;
 
-	if (!filter_check_discard(call, entry, buffer, event))
+	if (!call_filter_check_discard(call, entry, buffer, event))
 		trace_buffer_unlock_commit(buffer, event, 0, pc);
 }
 
@@ -353,7 +353,7 @@ static void __trace_mmiotrace_map(struct trace_array *tr,
 	entry	= ring_buffer_event_data(event);
 	entry->map			= *map;
 
-	if (!filter_check_discard(call, entry, buffer, event))
+	if (!call_filter_check_discard(call, entry, buffer, event))
 		trace_buffer_unlock_commit(buffer, event, 0, pc);
 }
 
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index 4e98e3b257a3..3f34dc9b40f3 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -45,7 +45,7 @@ tracing_sched_switch_trace(struct trace_array *tr,
 	entry->next_state		= next->state;
 	entry->next_cpu	= task_cpu(next);
 
-	if (!filter_check_discard(call, entry, buffer, event))
+	if (!call_filter_check_discard(call, entry, buffer, event))
 		trace_buffer_unlock_commit(buffer, event, flags, pc);
 }
 
@@ -101,7 +101,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
 	entry->next_state		= wakee->state;
 	entry->next_cpu			= task_cpu(wakee);
 
-	if (!filter_check_discard(call, entry, buffer, event))
+	if (!call_filter_check_discard(call, entry, buffer, event))
 		trace_buffer_unlock_commit(buffer, event, flags, pc);
 }
 
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 559329d9bd2f..32644eece429 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -336,8 +336,8 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id)
 	entry->nr = syscall_nr;
 	syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args);
 
-	if (!filter_current_check_discard(buffer, sys_data->enter_event,
-					  entry, event))
+	if (!call_filter_check_discard(sys_data->enter_event, entry,
+				       buffer, event))
 		trace_current_buffer_unlock_commit(buffer, event,
 						   irq_flags, pc);
 }
@@ -377,8 +377,8 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret)
 	entry->nr = syscall_nr;
 	entry->ret = syscall_get_return_value(current, regs);
 
-	if (!filter_current_check_discard(buffer, sys_data->exit_event,
-					  entry, event))
+	if (!call_filter_check_discard(sys_data->exit_event, entry,
+				       buffer, event))
 		trace_current_buffer_unlock_commit(buffer, event,
 						   irq_flags, pc);
 }
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index 272261b5f94f..b6dcc42ef7f5 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -128,6 +128,7 @@ alloc_trace_uprobe(const char *group, const char *event, int nargs, bool is_ret)
 	if (is_ret)
 		tu->consumer.ret_handler = uretprobe_dispatcher;
 	init_trace_uprobe_filter(&tu->filter);
+	tu->call.flags |= TRACE_EVENT_FL_USE_CALL_FILTER;
 	return tu;
 
 error:
@@ -561,7 +562,7 @@ static void uprobe_trace_print(struct trace_uprobe *tu,
 	for (i = 0; i < tu->nr_args; i++)
 		call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset);
 
-	if (!filter_current_check_discard(buffer, call, entry, event))
+	if (!call_filter_check_discard(call, entry, buffer, event))
 		trace_buffer_unlock_commit(buffer, event, 0, 0);
 }
 
-- 
cgit v1.2.3


From 38de93abec8d8acd8d6dbbe9b0d92d6d5cdb3090 Mon Sep 17 00:00:00 2001
From: Tom Zanussi <tom.zanussi@linux.intel.com>
Date: Thu, 24 Oct 2013 08:34:18 -0500
Subject: tracing: Make register/unregister_ftrace_command __init

register/unregister_ftrace_command() are only ever called from __init
functions, so can themselves be made __init.

Also make register_snapshot_cmd() __init for the same reason.

Link: http://lkml.kernel.org/r/d4042c8cadb7ae6f843ac9a89a24e1c6a3099727.1382620672.git.tom.zanussi@linux.intel.com

Signed-off-by: Tom Zanussi <tom.zanussi@linux.intel.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace.h |  4 ++--
 kernel/trace/ftrace.c  | 12 ++++++++++--
 kernel/trace/trace.c   |  4 ++--
 3 files changed, 14 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index ec85d48619e1..31ea4b428360 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -533,11 +533,11 @@ static inline int ftrace_force_update(void) { return 0; }
 static inline void ftrace_disable_daemon(void) { }
 static inline void ftrace_enable_daemon(void) { }
 static inline void ftrace_release_mod(struct module *mod) {}
-static inline int register_ftrace_command(struct ftrace_func_command *cmd)
+static inline __init int register_ftrace_command(struct ftrace_func_command *cmd)
 {
 	return -EINVAL;
 }
-static inline int unregister_ftrace_command(char *cmd_name)
+static inline __init int unregister_ftrace_command(char *cmd_name)
 {
 	return -EINVAL;
 }
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 080b7d41e17f..22fa55696760 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -3307,7 +3307,11 @@ void unregister_ftrace_function_probe_all(char *glob)
 static LIST_HEAD(ftrace_commands);
 static DEFINE_MUTEX(ftrace_cmd_mutex);
 
-int register_ftrace_command(struct ftrace_func_command *cmd)
+/*
+ * Currently we only register ftrace commands from __init, so mark this
+ * __init too.
+ */
+__init int register_ftrace_command(struct ftrace_func_command *cmd)
 {
 	struct ftrace_func_command *p;
 	int ret = 0;
@@ -3326,7 +3330,11 @@ int register_ftrace_command(struct ftrace_func_command *cmd)
 	return ret;
 }
 
-int unregister_ftrace_command(struct ftrace_func_command *cmd)
+/*
+ * Currently we only unregister ftrace commands from __init, so mark
+ * this __init too.
+ */
+__init int unregister_ftrace_command(struct ftrace_func_command *cmd)
 {
 	struct ftrace_func_command *p, *n;
 	int ret = -ENODEV;
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 489da8b19f30..f9fa42b180e3 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -5477,12 +5477,12 @@ static struct ftrace_func_command ftrace_snapshot_cmd = {
 	.func			= ftrace_trace_snapshot_callback,
 };
 
-static int register_snapshot_cmd(void)
+static __init int register_snapshot_cmd(void)
 {
 	return register_ftrace_command(&ftrace_snapshot_cmd);
 }
 #else
-static inline int register_snapshot_cmd(void) { return 0; }
+static inline __init int register_snapshot_cmd(void) { return 0; }
 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
 
 struct dentry *tracing_init_dentry_tr(struct trace_array *tr)
-- 
cgit v1.2.3


From d562aff93bfb530b0992141500a402d17081189d Mon Sep 17 00:00:00 2001
From: Tom Zanussi <tom.zanussi@linux.intel.com>
Date: Thu, 24 Oct 2013 08:34:19 -0500
Subject: tracing: Add support for SOFT_DISABLE to syscall events

The original SOFT_DISABLE patches didn't add support for soft disable
of syscall events; this adds it.

Add an array of ftrace_event_file pointers indexed by syscall number
to the trace array and remove the existing enabled bitmaps, which as a
result are now redundant.  The ftrace_event_file structs in turn
contain the soft disable flags we need for per-syscall soft disable
accounting.

Adding ftrace_event_files also means we can remove the USE_CALL_FILTER
bit, thus enabling multibuffer filter support for syscall events.

Link: http://lkml.kernel.org/r/6e72b566e85d8df8042f133efbc6c30e21fb017e.1382620672.git.tom.zanussi@linux.intel.com

Signed-off-by: Tom Zanussi <tom.zanussi@linux.intel.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/syscalls.h      |  4 ++--
 kernel/trace/trace.h          |  4 ++--
 kernel/trace/trace_syscalls.c | 42 ++++++++++++++++++++++++++++++++----------
 3 files changed, 36 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 10bafa97049d..2ef31bfd620b 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -120,7 +120,7 @@ extern struct trace_event_functions exit_syscall_print_funcs;
 		.class			= &event_class_syscall_enter,	\
 		.event.funcs            = &enter_syscall_print_funcs,	\
 		.data			= (void *)&__syscall_meta_##sname,\
-		.flags			= TRACE_EVENT_FL_CAP_ANY | TRACE_EVENT_FL_USE_CALL_FILTER,\
+		.flags                  = TRACE_EVENT_FL_CAP_ANY,	\
 	};								\
 	static struct ftrace_event_call __used				\
 	  __attribute__((section("_ftrace_events")))			\
@@ -134,7 +134,7 @@ extern struct trace_event_functions exit_syscall_print_funcs;
 		.class			= &event_class_syscall_exit,	\
 		.event.funcs		= &exit_syscall_print_funcs,	\
 		.data			= (void *)&__syscall_meta_##sname,\
-		.flags			= TRACE_EVENT_FL_CAP_ANY | TRACE_EVENT_FL_USE_CALL_FILTER,\
+		.flags                  = TRACE_EVENT_FL_CAP_ANY,	\
 	};								\
 	static struct ftrace_event_call __used				\
 	  __attribute__((section("_ftrace_events")))			\
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 12d1a612a73e..9c27cdadd71f 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -192,8 +192,8 @@ struct trace_array {
 #ifdef CONFIG_FTRACE_SYSCALLS
 	int			sys_refcount_enter;
 	int			sys_refcount_exit;
-	DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls);
-	DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls);
+	struct ftrace_event_file *enter_syscall_files[NR_syscalls];
+	struct ftrace_event_file *exit_syscall_files[NR_syscalls];
 #endif
 	int			stop_count;
 	int			clock_id;
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 32644eece429..e4b6d11bdf78 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -302,6 +302,7 @@ static int __init syscall_exit_define_fields(struct ftrace_event_call *call)
 static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id)
 {
 	struct trace_array *tr = data;
+	struct ftrace_event_file *ftrace_file;
 	struct syscall_trace_enter *entry;
 	struct syscall_metadata *sys_data;
 	struct ring_buffer_event *event;
@@ -314,7 +315,13 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id)
 	syscall_nr = trace_get_syscall_nr(current, regs);
 	if (syscall_nr < 0)
 		return;
-	if (!test_bit(syscall_nr, tr->enabled_enter_syscalls))
+
+	/* Here we're inside tp handler's rcu_read_lock_sched (__DO_TRACE) */
+	ftrace_file = rcu_dereference_sched(tr->enter_syscall_files[syscall_nr]);
+	if (!ftrace_file)
+		return;
+
+	if (test_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &ftrace_file->flags))
 		return;
 
 	sys_data = syscall_nr_to_meta(syscall_nr);
@@ -336,8 +343,7 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id)
 	entry->nr = syscall_nr;
 	syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args);
 
-	if (!call_filter_check_discard(sys_data->enter_event, entry,
-				       buffer, event))
+	if (!filter_check_discard(ftrace_file, entry, buffer, event))
 		trace_current_buffer_unlock_commit(buffer, event,
 						   irq_flags, pc);
 }
@@ -345,6 +351,7 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id)
 static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret)
 {
 	struct trace_array *tr = data;
+	struct ftrace_event_file *ftrace_file;
 	struct syscall_trace_exit *entry;
 	struct syscall_metadata *sys_data;
 	struct ring_buffer_event *event;
@@ -356,7 +363,13 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret)
 	syscall_nr = trace_get_syscall_nr(current, regs);
 	if (syscall_nr < 0)
 		return;
-	if (!test_bit(syscall_nr, tr->enabled_exit_syscalls))
+
+	/* Here we're inside tp handler's rcu_read_lock_sched (__DO_TRACE()) */
+	ftrace_file = rcu_dereference_sched(tr->exit_syscall_files[syscall_nr]);
+	if (!ftrace_file)
+		return;
+
+	if (test_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &ftrace_file->flags))
 		return;
 
 	sys_data = syscall_nr_to_meta(syscall_nr);
@@ -377,8 +390,7 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret)
 	entry->nr = syscall_nr;
 	entry->ret = syscall_get_return_value(current, regs);
 
-	if (!call_filter_check_discard(sys_data->exit_event, entry,
-				       buffer, event))
+	if (!filter_check_discard(ftrace_file, entry, buffer, event))
 		trace_current_buffer_unlock_commit(buffer, event,
 						   irq_flags, pc);
 }
@@ -397,7 +409,7 @@ static int reg_event_syscall_enter(struct ftrace_event_file *file,
 	if (!tr->sys_refcount_enter)
 		ret = register_trace_sys_enter(ftrace_syscall_enter, tr);
 	if (!ret) {
-		set_bit(num, tr->enabled_enter_syscalls);
+		rcu_assign_pointer(tr->enter_syscall_files[num], file);
 		tr->sys_refcount_enter++;
 	}
 	mutex_unlock(&syscall_trace_lock);
@@ -415,10 +427,15 @@ static void unreg_event_syscall_enter(struct ftrace_event_file *file,
 		return;
 	mutex_lock(&syscall_trace_lock);
 	tr->sys_refcount_enter--;
-	clear_bit(num, tr->enabled_enter_syscalls);
+	rcu_assign_pointer(tr->enter_syscall_files[num], NULL);
 	if (!tr->sys_refcount_enter)
 		unregister_trace_sys_enter(ftrace_syscall_enter, tr);
 	mutex_unlock(&syscall_trace_lock);
+	/*
+	 * Callers expect the event to be completely disabled on
+	 * return, so wait for current handlers to finish.
+	 */
+	synchronize_sched();
 }
 
 static int reg_event_syscall_exit(struct ftrace_event_file *file,
@@ -435,7 +452,7 @@ static int reg_event_syscall_exit(struct ftrace_event_file *file,
 	if (!tr->sys_refcount_exit)
 		ret = register_trace_sys_exit(ftrace_syscall_exit, tr);
 	if (!ret) {
-		set_bit(num, tr->enabled_exit_syscalls);
+		rcu_assign_pointer(tr->exit_syscall_files[num], file);
 		tr->sys_refcount_exit++;
 	}
 	mutex_unlock(&syscall_trace_lock);
@@ -453,10 +470,15 @@ static void unreg_event_syscall_exit(struct ftrace_event_file *file,
 		return;
 	mutex_lock(&syscall_trace_lock);
 	tr->sys_refcount_exit--;
-	clear_bit(num, tr->enabled_exit_syscalls);
+	rcu_assign_pointer(tr->exit_syscall_files[num], NULL);
 	if (!tr->sys_refcount_exit)
 		unregister_trace_sys_exit(ftrace_syscall_exit, tr);
 	mutex_unlock(&syscall_trace_lock);
+	/*
+	 * Callers expect the event to be completely disabled on
+	 * return, so wait for current handlers to finish.
+	 */
+	synchronize_sched();
 }
 
 static int __init init_syscall_trace(struct ftrace_event_call *call)
-- 
cgit v1.2.3


From b8a216269ec0ce2e961d32e6d640d7010b8a818e Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 4 Oct 2013 22:06:53 +0200
Subject: sched: Move completion code from core.c to completion.c

Completions already have their own header file: linux/completion.h
Move the implementation out of kernel/sched/core.c and into its own
file: kernel/sched/completion.c.

Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Link: http://lkml.kernel.org/n/tip-x2y49rmxu5dljt66ai2lcfuw@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/completion.h |   2 +-
 kernel/sched/Makefile      |   2 +-
 kernel/sched/completion.c  | 299 +++++++++++++++++++++++++++++++++++++++++++++
 kernel/sched/core.c        | 284 ------------------------------------------
 4 files changed, 301 insertions(+), 286 deletions(-)
 create mode 100644 kernel/sched/completion.c

(limited to 'include/linux')

diff --git a/include/linux/completion.h b/include/linux/completion.h
index 3cd574d5b19e..22c33e35bcb2 100644
--- a/include/linux/completion.h
+++ b/include/linux/completion.h
@@ -5,7 +5,7 @@
  * (C) Copyright 2001 Linus Torvalds
  *
  * Atomic wait-for-completion handler data structures.
- * See kernel/sched/core.c for details.
+ * See kernel/sched/completion.c for details.
  */
 
 #include <linux/wait.h>
diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
index f8d3f4baa1a1..7b621409cf15 100644
--- a/kernel/sched/Makefile
+++ b/kernel/sched/Makefile
@@ -12,7 +12,7 @@ CFLAGS_core.o := $(PROFILING) -fno-omit-frame-pointer
 endif
 
 obj-y += core.o proc.o clock.o cputime.o idle_task.o fair.o rt.o stop_task.o
-obj-y += wait.o
+obj-y += wait.o completion.o
 obj-$(CONFIG_SMP) += cpupri.o
 obj-$(CONFIG_SCHED_AUTOGROUP) += auto_group.o
 obj-$(CONFIG_SCHEDSTATS) += stats.o
diff --git a/kernel/sched/completion.c b/kernel/sched/completion.c
new file mode 100644
index 000000000000..a63f4dc27909
--- /dev/null
+++ b/kernel/sched/completion.c
@@ -0,0 +1,299 @@
+/*
+ * Generic wait-for-completion handler;
+ *
+ * It differs from semaphores in that their default case is the opposite,
+ * wait_for_completion default blocks whereas semaphore default non-block. The
+ * interface also makes it easy to 'complete' multiple waiting threads,
+ * something which isn't entirely natural for semaphores.
+ *
+ * But more importantly, the primitive documents the usage. Semaphores would
+ * typically be used for exclusion which gives rise to priority inversion.
+ * Waiting for completion is a typically sync point, but not an exclusion point.
+ */
+
+#include <linux/sched.h>
+#include <linux/completion.h>
+
+/**
+ * complete: - signals a single thread waiting on this completion
+ * @x:  holds the state of this particular completion
+ *
+ * This will wake up a single thread waiting on this completion. Threads will be
+ * awakened in the same order in which they were queued.
+ *
+ * See also complete_all(), wait_for_completion() and related routines.
+ *
+ * It may be assumed that this function implies a write memory barrier before
+ * changing the task state if and only if any tasks are woken up.
+ */
+void complete(struct completion *x)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&x->wait.lock, flags);
+	x->done++;
+	__wake_up_locked(&x->wait, TASK_NORMAL, 1);
+	spin_unlock_irqrestore(&x->wait.lock, flags);
+}
+EXPORT_SYMBOL(complete);
+
+/**
+ * complete_all: - signals all threads waiting on this completion
+ * @x:  holds the state of this particular completion
+ *
+ * This will wake up all threads waiting on this particular completion event.
+ *
+ * It may be assumed that this function implies a write memory barrier before
+ * changing the task state if and only if any tasks are woken up.
+ */
+void complete_all(struct completion *x)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&x->wait.lock, flags);
+	x->done += UINT_MAX/2;
+	__wake_up_locked(&x->wait, TASK_NORMAL, 0);
+	spin_unlock_irqrestore(&x->wait.lock, flags);
+}
+EXPORT_SYMBOL(complete_all);
+
+static inline long __sched
+do_wait_for_common(struct completion *x,
+		   long (*action)(long), long timeout, int state)
+{
+	if (!x->done) {
+		DECLARE_WAITQUEUE(wait, current);
+
+		__add_wait_queue_tail_exclusive(&x->wait, &wait);
+		do {
+			if (signal_pending_state(state, current)) {
+				timeout = -ERESTARTSYS;
+				break;
+			}
+			__set_current_state(state);
+			spin_unlock_irq(&x->wait.lock);
+			timeout = action(timeout);
+			spin_lock_irq(&x->wait.lock);
+		} while (!x->done && timeout);
+		__remove_wait_queue(&x->wait, &wait);
+		if (!x->done)
+			return timeout;
+	}
+	x->done--;
+	return timeout ?: 1;
+}
+
+static inline long __sched
+__wait_for_common(struct completion *x,
+		  long (*action)(long), long timeout, int state)
+{
+	might_sleep();
+
+	spin_lock_irq(&x->wait.lock);
+	timeout = do_wait_for_common(x, action, timeout, state);
+	spin_unlock_irq(&x->wait.lock);
+	return timeout;
+}
+
+static long __sched
+wait_for_common(struct completion *x, long timeout, int state)
+{
+	return __wait_for_common(x, schedule_timeout, timeout, state);
+}
+
+static long __sched
+wait_for_common_io(struct completion *x, long timeout, int state)
+{
+	return __wait_for_common(x, io_schedule_timeout, timeout, state);
+}
+
+/**
+ * wait_for_completion: - waits for completion of a task
+ * @x:  holds the state of this particular completion
+ *
+ * This waits to be signaled for completion of a specific task. It is NOT
+ * interruptible and there is no timeout.
+ *
+ * See also similar routines (i.e. wait_for_completion_timeout()) with timeout
+ * and interrupt capability. Also see complete().
+ */
+void __sched wait_for_completion(struct completion *x)
+{
+	wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_UNINTERRUPTIBLE);
+}
+EXPORT_SYMBOL(wait_for_completion);
+
+/**
+ * wait_for_completion_timeout: - waits for completion of a task (w/timeout)
+ * @x:  holds the state of this particular completion
+ * @timeout:  timeout value in jiffies
+ *
+ * This waits for either a completion of a specific task to be signaled or for a
+ * specified timeout to expire. The timeout is in jiffies. It is not
+ * interruptible.
+ *
+ * Return: 0 if timed out, and positive (at least 1, or number of jiffies left
+ * till timeout) if completed.
+ */
+unsigned long __sched
+wait_for_completion_timeout(struct completion *x, unsigned long timeout)
+{
+	return wait_for_common(x, timeout, TASK_UNINTERRUPTIBLE);
+}
+EXPORT_SYMBOL(wait_for_completion_timeout);
+
+/**
+ * wait_for_completion_io: - waits for completion of a task
+ * @x:  holds the state of this particular completion
+ *
+ * This waits to be signaled for completion of a specific task. It is NOT
+ * interruptible and there is no timeout. The caller is accounted as waiting
+ * for IO.
+ */
+void __sched wait_for_completion_io(struct completion *x)
+{
+	wait_for_common_io(x, MAX_SCHEDULE_TIMEOUT, TASK_UNINTERRUPTIBLE);
+}
+EXPORT_SYMBOL(wait_for_completion_io);
+
+/**
+ * wait_for_completion_io_timeout: - waits for completion of a task (w/timeout)
+ * @x:  holds the state of this particular completion
+ * @timeout:  timeout value in jiffies
+ *
+ * This waits for either a completion of a specific task to be signaled or for a
+ * specified timeout to expire. The timeout is in jiffies. It is not
+ * interruptible. The caller is accounted as waiting for IO.
+ *
+ * Return: 0 if timed out, and positive (at least 1, or number of jiffies left
+ * till timeout) if completed.
+ */
+unsigned long __sched
+wait_for_completion_io_timeout(struct completion *x, unsigned long timeout)
+{
+	return wait_for_common_io(x, timeout, TASK_UNINTERRUPTIBLE);
+}
+EXPORT_SYMBOL(wait_for_completion_io_timeout);
+
+/**
+ * wait_for_completion_interruptible: - waits for completion of a task (w/intr)
+ * @x:  holds the state of this particular completion
+ *
+ * This waits for completion of a specific task to be signaled. It is
+ * interruptible.
+ *
+ * Return: -ERESTARTSYS if interrupted, 0 if completed.
+ */
+int __sched wait_for_completion_interruptible(struct completion *x)
+{
+	long t = wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_INTERRUPTIBLE);
+	if (t == -ERESTARTSYS)
+		return t;
+	return 0;
+}
+EXPORT_SYMBOL(wait_for_completion_interruptible);
+
+/**
+ * wait_for_completion_interruptible_timeout: - waits for completion (w/(to,intr))
+ * @x:  holds the state of this particular completion
+ * @timeout:  timeout value in jiffies
+ *
+ * This waits for either a completion of a specific task to be signaled or for a
+ * specified timeout to expire. It is interruptible. The timeout is in jiffies.
+ *
+ * Return: -ERESTARTSYS if interrupted, 0 if timed out, positive (at least 1,
+ * or number of jiffies left till timeout) if completed.
+ */
+long __sched
+wait_for_completion_interruptible_timeout(struct completion *x,
+					  unsigned long timeout)
+{
+	return wait_for_common(x, timeout, TASK_INTERRUPTIBLE);
+}
+EXPORT_SYMBOL(wait_for_completion_interruptible_timeout);
+
+/**
+ * wait_for_completion_killable: - waits for completion of a task (killable)
+ * @x:  holds the state of this particular completion
+ *
+ * This waits to be signaled for completion of a specific task. It can be
+ * interrupted by a kill signal.
+ *
+ * Return: -ERESTARTSYS if interrupted, 0 if completed.
+ */
+int __sched wait_for_completion_killable(struct completion *x)
+{
+	long t = wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_KILLABLE);
+	if (t == -ERESTARTSYS)
+		return t;
+	return 0;
+}
+EXPORT_SYMBOL(wait_for_completion_killable);
+
+/**
+ * wait_for_completion_killable_timeout: - waits for completion of a task (w/(to,killable))
+ * @x:  holds the state of this particular completion
+ * @timeout:  timeout value in jiffies
+ *
+ * This waits for either a completion of a specific task to be
+ * signaled or for a specified timeout to expire. It can be
+ * interrupted by a kill signal. The timeout is in jiffies.
+ *
+ * Return: -ERESTARTSYS if interrupted, 0 if timed out, positive (at least 1,
+ * or number of jiffies left till timeout) if completed.
+ */
+long __sched
+wait_for_completion_killable_timeout(struct completion *x,
+				     unsigned long timeout)
+{
+	return wait_for_common(x, timeout, TASK_KILLABLE);
+}
+EXPORT_SYMBOL(wait_for_completion_killable_timeout);
+
+/**
+ *	try_wait_for_completion - try to decrement a completion without blocking
+ *	@x:	completion structure
+ *
+ *	Return: 0 if a decrement cannot be done without blocking
+ *		 1 if a decrement succeeded.
+ *
+ *	If a completion is being used as a counting completion,
+ *	attempt to decrement the counter without blocking. This
+ *	enables us to avoid waiting if the resource the completion
+ *	is protecting is not available.
+ */
+bool try_wait_for_completion(struct completion *x)
+{
+	unsigned long flags;
+	int ret = 1;
+
+	spin_lock_irqsave(&x->wait.lock, flags);
+	if (!x->done)
+		ret = 0;
+	else
+		x->done--;
+	spin_unlock_irqrestore(&x->wait.lock, flags);
+	return ret;
+}
+EXPORT_SYMBOL(try_wait_for_completion);
+
+/**
+ *	completion_done - Test to see if a completion has any waiters
+ *	@x:	completion structure
+ *
+ *	Return: 0 if there are waiters (wait_for_completion() in progress)
+ *		 1 if there are no waiters.
+ *
+ */
+bool completion_done(struct completion *x)
+{
+	unsigned long flags;
+	int ret = 1;
+
+	spin_lock_irqsave(&x->wait.lock, flags);
+	if (!x->done)
+		ret = 0;
+	spin_unlock_irqrestore(&x->wait.lock, flags);
+	return ret;
+}
+EXPORT_SYMBOL(completion_done);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 91b28454c218..aa066f306be2 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2688,290 +2688,6 @@ int default_wake_function(wait_queue_t *curr, unsigned mode, int wake_flags,
 }
 EXPORT_SYMBOL(default_wake_function);
 
-/**
- * complete: - signals a single thread waiting on this completion
- * @x:  holds the state of this particular completion
- *
- * This will wake up a single thread waiting on this completion. Threads will be
- * awakened in the same order in which they were queued.
- *
- * See also complete_all(), wait_for_completion() and related routines.
- *
- * It may be assumed that this function implies a write memory barrier before
- * changing the task state if and only if any tasks are woken up.
- */
-void complete(struct completion *x)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&x->wait.lock, flags);
-	x->done++;
-	__wake_up_locked(&x->wait, TASK_NORMAL, 1);
-	spin_unlock_irqrestore(&x->wait.lock, flags);
-}
-EXPORT_SYMBOL(complete);
-
-/**
- * complete_all: - signals all threads waiting on this completion
- * @x:  holds the state of this particular completion
- *
- * This will wake up all threads waiting on this particular completion event.
- *
- * It may be assumed that this function implies a write memory barrier before
- * changing the task state if and only if any tasks are woken up.
- */
-void complete_all(struct completion *x)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&x->wait.lock, flags);
-	x->done += UINT_MAX/2;
-	__wake_up_locked(&x->wait, TASK_NORMAL, 0);
-	spin_unlock_irqrestore(&x->wait.lock, flags);
-}
-EXPORT_SYMBOL(complete_all);
-
-static inline long __sched
-do_wait_for_common(struct completion *x,
-		   long (*action)(long), long timeout, int state)
-{
-	if (!x->done) {
-		DECLARE_WAITQUEUE(wait, current);
-
-		__add_wait_queue_tail_exclusive(&x->wait, &wait);
-		do {
-			if (signal_pending_state(state, current)) {
-				timeout = -ERESTARTSYS;
-				break;
-			}
-			__set_current_state(state);
-			spin_unlock_irq(&x->wait.lock);
-			timeout = action(timeout);
-			spin_lock_irq(&x->wait.lock);
-		} while (!x->done && timeout);
-		__remove_wait_queue(&x->wait, &wait);
-		if (!x->done)
-			return timeout;
-	}
-	x->done--;
-	return timeout ?: 1;
-}
-
-static inline long __sched
-__wait_for_common(struct completion *x,
-		  long (*action)(long), long timeout, int state)
-{
-	might_sleep();
-
-	spin_lock_irq(&x->wait.lock);
-	timeout = do_wait_for_common(x, action, timeout, state);
-	spin_unlock_irq(&x->wait.lock);
-	return timeout;
-}
-
-static long __sched
-wait_for_common(struct completion *x, long timeout, int state)
-{
-	return __wait_for_common(x, schedule_timeout, timeout, state);
-}
-
-static long __sched
-wait_for_common_io(struct completion *x, long timeout, int state)
-{
-	return __wait_for_common(x, io_schedule_timeout, timeout, state);
-}
-
-/**
- * wait_for_completion: - waits for completion of a task
- * @x:  holds the state of this particular completion
- *
- * This waits to be signaled for completion of a specific task. It is NOT
- * interruptible and there is no timeout.
- *
- * See also similar routines (i.e. wait_for_completion_timeout()) with timeout
- * and interrupt capability. Also see complete().
- */
-void __sched wait_for_completion(struct completion *x)
-{
-	wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_UNINTERRUPTIBLE);
-}
-EXPORT_SYMBOL(wait_for_completion);
-
-/**
- * wait_for_completion_timeout: - waits for completion of a task (w/timeout)
- * @x:  holds the state of this particular completion
- * @timeout:  timeout value in jiffies
- *
- * This waits for either a completion of a specific task to be signaled or for a
- * specified timeout to expire. The timeout is in jiffies. It is not
- * interruptible.
- *
- * Return: 0 if timed out, and positive (at least 1, or number of jiffies left
- * till timeout) if completed.
- */
-unsigned long __sched
-wait_for_completion_timeout(struct completion *x, unsigned long timeout)
-{
-	return wait_for_common(x, timeout, TASK_UNINTERRUPTIBLE);
-}
-EXPORT_SYMBOL(wait_for_completion_timeout);
-
-/**
- * wait_for_completion_io: - waits for completion of a task
- * @x:  holds the state of this particular completion
- *
- * This waits to be signaled for completion of a specific task. It is NOT
- * interruptible and there is no timeout. The caller is accounted as waiting
- * for IO.
- */
-void __sched wait_for_completion_io(struct completion *x)
-{
-	wait_for_common_io(x, MAX_SCHEDULE_TIMEOUT, TASK_UNINTERRUPTIBLE);
-}
-EXPORT_SYMBOL(wait_for_completion_io);
-
-/**
- * wait_for_completion_io_timeout: - waits for completion of a task (w/timeout)
- * @x:  holds the state of this particular completion
- * @timeout:  timeout value in jiffies
- *
- * This waits for either a completion of a specific task to be signaled or for a
- * specified timeout to expire. The timeout is in jiffies. It is not
- * interruptible. The caller is accounted as waiting for IO.
- *
- * Return: 0 if timed out, and positive (at least 1, or number of jiffies left
- * till timeout) if completed.
- */
-unsigned long __sched
-wait_for_completion_io_timeout(struct completion *x, unsigned long timeout)
-{
-	return wait_for_common_io(x, timeout, TASK_UNINTERRUPTIBLE);
-}
-EXPORT_SYMBOL(wait_for_completion_io_timeout);
-
-/**
- * wait_for_completion_interruptible: - waits for completion of a task (w/intr)
- * @x:  holds the state of this particular completion
- *
- * This waits for completion of a specific task to be signaled. It is
- * interruptible.
- *
- * Return: -ERESTARTSYS if interrupted, 0 if completed.
- */
-int __sched wait_for_completion_interruptible(struct completion *x)
-{
-	long t = wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_INTERRUPTIBLE);
-	if (t == -ERESTARTSYS)
-		return t;
-	return 0;
-}
-EXPORT_SYMBOL(wait_for_completion_interruptible);
-
-/**
- * wait_for_completion_interruptible_timeout: - waits for completion (w/(to,intr))
- * @x:  holds the state of this particular completion
- * @timeout:  timeout value in jiffies
- *
- * This waits for either a completion of a specific task to be signaled or for a
- * specified timeout to expire. It is interruptible. The timeout is in jiffies.
- *
- * Return: -ERESTARTSYS if interrupted, 0 if timed out, positive (at least 1,
- * or number of jiffies left till timeout) if completed.
- */
-long __sched
-wait_for_completion_interruptible_timeout(struct completion *x,
-					  unsigned long timeout)
-{
-	return wait_for_common(x, timeout, TASK_INTERRUPTIBLE);
-}
-EXPORT_SYMBOL(wait_for_completion_interruptible_timeout);
-
-/**
- * wait_for_completion_killable: - waits for completion of a task (killable)
- * @x:  holds the state of this particular completion
- *
- * This waits to be signaled for completion of a specific task. It can be
- * interrupted by a kill signal.
- *
- * Return: -ERESTARTSYS if interrupted, 0 if completed.
- */
-int __sched wait_for_completion_killable(struct completion *x)
-{
-	long t = wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_KILLABLE);
-	if (t == -ERESTARTSYS)
-		return t;
-	return 0;
-}
-EXPORT_SYMBOL(wait_for_completion_killable);
-
-/**
- * wait_for_completion_killable_timeout: - waits for completion of a task (w/(to,killable))
- * @x:  holds the state of this particular completion
- * @timeout:  timeout value in jiffies
- *
- * This waits for either a completion of a specific task to be
- * signaled or for a specified timeout to expire. It can be
- * interrupted by a kill signal. The timeout is in jiffies.
- *
- * Return: -ERESTARTSYS if interrupted, 0 if timed out, positive (at least 1,
- * or number of jiffies left till timeout) if completed.
- */
-long __sched
-wait_for_completion_killable_timeout(struct completion *x,
-				     unsigned long timeout)
-{
-	return wait_for_common(x, timeout, TASK_KILLABLE);
-}
-EXPORT_SYMBOL(wait_for_completion_killable_timeout);
-
-/**
- *	try_wait_for_completion - try to decrement a completion without blocking
- *	@x:	completion structure
- *
- *	Return: 0 if a decrement cannot be done without blocking
- *		 1 if a decrement succeeded.
- *
- *	If a completion is being used as a counting completion,
- *	attempt to decrement the counter without blocking. This
- *	enables us to avoid waiting if the resource the completion
- *	is protecting is not available.
- */
-bool try_wait_for_completion(struct completion *x)
-{
-	unsigned long flags;
-	int ret = 1;
-
-	spin_lock_irqsave(&x->wait.lock, flags);
-	if (!x->done)
-		ret = 0;
-	else
-		x->done--;
-	spin_unlock_irqrestore(&x->wait.lock, flags);
-	return ret;
-}
-EXPORT_SYMBOL(try_wait_for_completion);
-
-/**
- *	completion_done - Test to see if a completion has any waiters
- *	@x:	completion structure
- *
- *	Return: 0 if there are waiters (wait_for_completion() in progress)
- *		 1 if there are no waiters.
- *
- */
-bool completion_done(struct completion *x)
-{
-	unsigned long flags;
-	int ret = 1;
-
-	spin_lock_irqsave(&x->wait.lock, flags);
-	if (!x->done)
-		ret = 0;
-	spin_unlock_irqrestore(&x->wait.lock, flags);
-	return ret;
-}
-EXPORT_SYMBOL(completion_done);
-
 static long __sched
 sleep_on_common(wait_queue_head_t *q, int state, long timeout)
 {
-- 
cgit v1.2.3


From ce332f662deb545c8a4f3f58debcca26bb2e44b0 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Mon, 4 Nov 2013 22:36:17 +0200
Subject: srcu: API for barrier after srcu read unlock

srcu read lock/unlock include a full memory barrier
but that's an implementation detail.
Add an API for make memory fencing explicit for
users that need this barrier, to make sure we
can change it as needed without breaking all users.

Acked-by: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 include/linux/srcu.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index c114614ed172..9b058eecd403 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -237,4 +237,18 @@ static inline void srcu_read_unlock(struct srcu_struct *sp, int idx)
 	__srcu_read_unlock(sp, idx);
 }
 
+/**
+ * smp_mb__after_srcu_read_unlock - ensure full ordering after srcu_read_unlock
+ *
+ * Converts the preceding srcu_read_unlock into a two-way memory barrier.
+ *
+ * Call this after srcu_read_unlock, to guarantee that all memory operations
+ * that occur after smp_mb__after_srcu_read_unlock will appear to happen after
+ * the preceding srcu_read_unlock.
+ */
+static inline void smp_mb__after_srcu_read_unlock(void)
+{
+	/* __srcu_read_unlock has smp_mb() internally so nothing to do here. */
+}
+
 #endif
-- 
cgit v1.2.3


From 8b414521bc5375ae8ba18c083af95d44b8da0d04 Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Fri, 11 Oct 2013 21:39:26 -0300
Subject: hung_task: add method to reset detector

In certain occasions it is possible for a hung task detector
positive to be false: continuation from a paused VM, for example.

Add a method to reset detection, similar as is done
with other kernel watchdogs.

Acked-by: Don Zickus <dzickus@redhat.com>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 arch/x86/kernel/pvclock.c |  1 +
 include/linux/sched.h     |  8 ++++++++
 kernel/hung_task.c        | 11 +++++++++++
 3 files changed, 20 insertions(+)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
index 6279928c0a71..2f355d229a58 100644
--- a/arch/x86/kernel/pvclock.c
+++ b/arch/x86/kernel/pvclock.c
@@ -48,6 +48,7 @@ void pvclock_touch_watchdogs(void)
 	touch_softlockup_watchdog_sync();
 	clocksource_touch_watchdog();
 	rcu_cpu_stall_reset();
+	reset_hung_task_detector();
 }
 
 static atomic64_t last_value = ATOMIC64_INIT(0);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6682da36b293..7bb4b4a2a101 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -285,6 +285,14 @@ static inline void lockup_detector_init(void)
 }
 #endif
 
+#ifdef CONFIG_DETECT_HUNG_TASK
+void reset_hung_task_detector(void);
+#else
+static inline void reset_hung_task_detector(void)
+{
+}
+#endif
+
 /* Attach to any functions which should be ignored in wchan output. */
 #define __sched		__attribute__((__section__(".sched.text")))
 
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index 3e97fb126e6b..dfdf51534b3e 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -203,6 +203,14 @@ int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
 	return ret;
 }
 
+static atomic_t reset_hung_task = ATOMIC_INIT(0);
+
+void reset_hung_task_detector(void)
+{
+	atomic_set(&reset_hung_task, 1);
+}
+EXPORT_SYMBOL_GPL(reset_hung_task_detector);
+
 /*
  * kthread which checks for tasks stuck in D state
  */
@@ -216,6 +224,9 @@ static int watchdog(void *dummy)
 		while (schedule_timeout_interruptible(timeout_jiffies(timeout)))
 			timeout = sysctl_hung_task_timeout_secs;
 
+		if (atomic_xchg(&reset_hung_task, 0))
+			continue;
+
 		check_hung_uninterruptible_tasks(timeout);
 	}
 
-- 
cgit v1.2.3


From 827da44c61419f29ae3be198c342e2147f1a10cb Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Mon, 7 Oct 2013 15:51:58 -0700
Subject: net: Explicitly initialize u64_stats_sync structures for lockdep

In order to enable lockdep on seqcount/seqlock structures, we
must explicitly initialize any locks.

The u64_stats_sync structure, uses a seqcount, and thus we need
to introduce a u64_stats_init() function and use it to initialize
the structure.

This unfortunately adds a lot of fairly trivial initialization code
to a number of drivers. But the benefit of ensuring correctness makes
this worth while.

Because these changes are required for lockdep to be enabled, and the
changes are quite trivial, I've not yet split this patch out into 30-some
separate patches, as I figured it would be better to get the various
maintainers thoughts on how to best merge this change along with
the seqcount lockdep enablement.

Feedback would be appreciated!

Signed-off-by: John Stultz <john.stultz@linaro.org>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org>
Cc: James Morris <jmorris@namei.org>
Cc: Jesse Gross <jesse@nicira.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: Mirko Lindner <mlindner@marvell.com>
Cc: Patrick McHardy <kaber@trash.net>
Cc: Roger Luethi <rl@hellgate.ch>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Simon Horman <horms@verge.net.au>
Cc: Stephen Hemminger <stephen@networkplumber.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Cc: Wensong Zhang <wensong@linux-vs.org>
Cc: netdev@vger.kernel.org
Link: http://lkml.kernel.org/r/1381186321-4906-2-git-send-email-john.stultz@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 drivers/net/dummy.c                            |  6 ++++++
 drivers/net/ethernet/emulex/benet/be_main.c    |  4 ++++
 drivers/net/ethernet/intel/igb/igb_main.c      |  5 +++++
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c  |  4 ++++
 drivers/net/ethernet/marvell/mvneta.c          |  3 +++
 drivers/net/ethernet/marvell/sky2.c            |  3 +++
 drivers/net/ethernet/neterion/vxge/vxge-main.c |  4 ++++
 drivers/net/ethernet/nvidia/forcedeth.c        |  2 ++
 drivers/net/ethernet/realtek/8139too.c         |  3 +++
 drivers/net/ethernet/tile/tilepro.c            |  2 ++
 drivers/net/ethernet/via/via-rhine.c           |  3 +++
 drivers/net/ifb.c                              |  5 +++++
 drivers/net/loopback.c                         |  6 ++++++
 drivers/net/macvlan.c                          |  7 +++++++
 drivers/net/nlmon.c                            |  8 ++++++++
 drivers/net/team/team.c                        |  6 ++++++
 drivers/net/team/team_mode_loadbalance.c       |  9 ++++++++-
 drivers/net/veth.c                             |  8 ++++++++
 drivers/net/virtio_net.c                       |  8 ++++++++
 drivers/net/vxlan.c                            |  8 ++++++++
 drivers/net/xen-netfront.c                     |  6 ++++++
 include/linux/u64_stats_sync.h                 |  7 +++++++
 net/8021q/vlan_dev.c                           |  9 ++++++++-
 net/bridge/br_device.c                         |  7 +++++++
 net/ipv4/af_inet.c                             | 14 ++++++++++++++
 net/ipv4/ip_tunnel.c                           |  8 +++++++-
 net/ipv6/addrconf.c                            | 14 ++++++++++++++
 net/ipv6/af_inet6.c                            | 14 ++++++++++++++
 net/ipv6/ip6_gre.c                             | 15 +++++++++++++++
 net/ipv6/ip6_tunnel.c                          |  7 +++++++
 net/ipv6/sit.c                                 | 15 +++++++++++++++
 net/netfilter/ipvs/ip_vs_ctl.c                 | 25 ++++++++++++++++++++++---
 net/openvswitch/datapath.c                     |  6 ++++++
 net/openvswitch/vport.c                        |  8 ++++++++
 34 files changed, 253 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/dummy.c b/drivers/net/dummy.c
index b710c6b2d659..bd8f84b0b894 100644
--- a/drivers/net/dummy.c
+++ b/drivers/net/dummy.c
@@ -88,10 +88,16 @@ static netdev_tx_t dummy_xmit(struct sk_buff *skb, struct net_device *dev)
 
 static int dummy_dev_init(struct net_device *dev)
 {
+	int i;
 	dev->dstats = alloc_percpu(struct pcpu_dstats);
 	if (!dev->dstats)
 		return -ENOMEM;
 
+	for_each_possible_cpu(i) {
+		struct pcpu_dstats *dstats;
+		dstats = per_cpu_ptr(dev->dstats, i);
+		u64_stats_init(&dstats->syncp);
+	}
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index 2c38cc402119..edd75950855a 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -2047,6 +2047,9 @@ static int be_tx_qs_create(struct be_adapter *adapter)
 		if (status)
 			return status;
 
+		u64_stats_init(&txo->stats.sync);
+		u64_stats_init(&txo->stats.sync_compl);
+
 		/* If num_evt_qs is less than num_tx_qs, then more than
 		 * one txq share an eq
 		 */
@@ -2108,6 +2111,7 @@ static int be_rx_cqs_create(struct be_adapter *adapter)
 		if (rc)
 			return rc;
 
+		u64_stats_init(&rxo->stats.sync);
 		eq = &adapter->eq_obj[i % adapter->num_evt_qs].q;
 		rc = be_cmd_cq_create(adapter, cq, eq, false, 3);
 		if (rc)
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 8cf44f2a8ccd..b6edb93a8fc1 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -1223,6 +1223,9 @@ static int igb_alloc_q_vector(struct igb_adapter *adapter,
 		ring->count = adapter->tx_ring_count;
 		ring->queue_index = txr_idx;
 
+		u64_stats_init(&ring->tx_syncp);
+		u64_stats_init(&ring->tx_syncp2);
+
 		/* assign ring to adapter */
 		adapter->tx_ring[txr_idx] = ring;
 
@@ -1256,6 +1259,8 @@ static int igb_alloc_q_vector(struct igb_adapter *adapter,
 		ring->count = adapter->rx_ring_count;
 		ring->queue_index = rxr_idx;
 
+		u64_stats_init(&ring->rx_syncp);
+
 		/* assign ring to adapter */
 		adapter->rx_ring[rxr_idx] = ring;
 	}
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 0ade0cd5ef53..c1750364ddc0 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -4867,6 +4867,8 @@ int ixgbe_setup_tx_resources(struct ixgbe_ring *tx_ring)
 	if (!tx_ring->tx_buffer_info)
 		goto err;
 
+	u64_stats_init(&tx_ring->syncp);
+
 	/* round up to nearest 4K */
 	tx_ring->size = tx_ring->count * sizeof(union ixgbe_adv_tx_desc);
 	tx_ring->size = ALIGN(tx_ring->size, 4096);
@@ -4949,6 +4951,8 @@ int ixgbe_setup_rx_resources(struct ixgbe_ring *rx_ring)
 	if (!rx_ring->rx_buffer_info)
 		goto err;
 
+	u64_stats_init(&rx_ring->syncp);
+
 	/* Round up to nearest 4K */
 	rx_ring->size = rx_ring->count * sizeof(union ixgbe_adv_rx_desc);
 	rx_ring->size = ALIGN(rx_ring->size, 4096);
diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index e35bac7cfdf1..cb4635c0cd73 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -2792,6 +2792,9 @@ static int mvneta_probe(struct platform_device *pdev)
 
 	pp = netdev_priv(dev);
 
+	u64_stats_init(&pp->tx_stats.syncp);
+	u64_stats_init(&pp->rx_stats.syncp);
+
 	pp->weight = MVNETA_RX_POLL_WEIGHT;
 	pp->phy_node = phy_node;
 	pp->phy_interface = phy_mode;
diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c
index e09a8c6f8536..339d841a538b 100644
--- a/drivers/net/ethernet/marvell/sky2.c
+++ b/drivers/net/ethernet/marvell/sky2.c
@@ -4763,6 +4763,9 @@ static struct net_device *sky2_init_netdev(struct sky2_hw *hw, unsigned port,
 	sky2->hw = hw;
 	sky2->msg_enable = netif_msg_init(debug, default_msg);
 
+	u64_stats_init(&sky2->tx_stats.syncp);
+	u64_stats_init(&sky2->rx_stats.syncp);
+
 	/* Auto speed and flow control */
 	sky2->flags = SKY2_FLAG_AUTO_SPEED | SKY2_FLAG_AUTO_PAUSE;
 	if (hw->chip_id != CHIP_ID_YUKON_XL)
diff --git a/drivers/net/ethernet/neterion/vxge/vxge-main.c b/drivers/net/ethernet/neterion/vxge/vxge-main.c
index 5a20eaf903dd..44626ec1127b 100644
--- a/drivers/net/ethernet/neterion/vxge/vxge-main.c
+++ b/drivers/net/ethernet/neterion/vxge/vxge-main.c
@@ -2072,6 +2072,10 @@ static int vxge_open_vpaths(struct vxgedev *vdev)
 				vdev->config.tx_steering_type;
 			vpath->fifo.ndev = vdev->ndev;
 			vpath->fifo.pdev = vdev->pdev;
+
+			u64_stats_init(&vpath->fifo.stats.syncp);
+			u64_stats_init(&vpath->ring.stats.syncp);
+
 			if (vdev->config.tx_steering_type)
 				vpath->fifo.txq =
 					netdev_get_tx_queue(vdev->ndev, i);
diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c
index 098b96dad66f..2d045be4b5cf 100644
--- a/drivers/net/ethernet/nvidia/forcedeth.c
+++ b/drivers/net/ethernet/nvidia/forcedeth.c
@@ -5619,6 +5619,8 @@ static int nv_probe(struct pci_dev *pci_dev, const struct pci_device_id *id)
 	spin_lock_init(&np->lock);
 	spin_lock_init(&np->hwstats_lock);
 	SET_NETDEV_DEV(dev, &pci_dev->dev);
+	u64_stats_init(&np->swstats_rx_syncp);
+	u64_stats_init(&np->swstats_tx_syncp);
 
 	init_timer(&np->oom_kick);
 	np->oom_kick.data = (unsigned long) dev;
diff --git a/drivers/net/ethernet/realtek/8139too.c b/drivers/net/ethernet/realtek/8139too.c
index 3ccedeb8aba0..c40e984868ad 100644
--- a/drivers/net/ethernet/realtek/8139too.c
+++ b/drivers/net/ethernet/realtek/8139too.c
@@ -791,6 +791,9 @@ static struct net_device *rtl8139_init_board(struct pci_dev *pdev)
 
 	pci_set_master (pdev);
 
+	u64_stats_init(&tp->rx_stats.syncp);
+	u64_stats_init(&tp->tx_stats.syncp);
+
 retry:
 	/* PIO bar register comes first. */
 	bar = !use_io;
diff --git a/drivers/net/ethernet/tile/tilepro.c b/drivers/net/ethernet/tile/tilepro.c
index 106be47716e7..edb2e12a0fe2 100644
--- a/drivers/net/ethernet/tile/tilepro.c
+++ b/drivers/net/ethernet/tile/tilepro.c
@@ -1008,6 +1008,8 @@ static void tile_net_register(void *dev_ptr)
 	info->egress_timer.data = (long)info;
 	info->egress_timer.function = tile_net_handle_egress_timer;
 
+	u64_stats_init(&info->stats.syncp);
+
 	priv->cpu[my_cpu] = info;
 
 	/*
diff --git a/drivers/net/ethernet/via/via-rhine.c b/drivers/net/ethernet/via/via-rhine.c
index bdf697b184ae..dd99715e6645 100644
--- a/drivers/net/ethernet/via/via-rhine.c
+++ b/drivers/net/ethernet/via/via-rhine.c
@@ -987,6 +987,9 @@ static int rhine_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	rp->base = ioaddr;
 
+	u64_stats_init(&rp->tx_stats.syncp);
+	u64_stats_init(&rp->rx_stats.syncp);
+
 	/* Get chip registers into a sane state */
 	rhine_power_init(dev);
 	rhine_hw_init(dev, pioaddr);
diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c
index a3bed28197d2..c14d39bf32d0 100644
--- a/drivers/net/ifb.c
+++ b/drivers/net/ifb.c
@@ -265,6 +265,7 @@ MODULE_PARM_DESC(numifbs, "Number of ifb devices");
 static int __init ifb_init_one(int index)
 {
 	struct net_device *dev_ifb;
+	struct ifb_private *dp;
 	int err;
 
 	dev_ifb = alloc_netdev(sizeof(struct ifb_private),
@@ -273,6 +274,10 @@ static int __init ifb_init_one(int index)
 	if (!dev_ifb)
 		return -ENOMEM;
 
+	dp = netdev_priv(dev_ifb);
+	u64_stats_init(&dp->rsync);
+	u64_stats_init(&dp->tsync);
+
 	dev_ifb->rtnl_link_ops = &ifb_link_ops;
 	err = register_netdevice(dev_ifb);
 	if (err < 0)
diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c
index a17d85a331f1..ac24c27b4b2d 100644
--- a/drivers/net/loopback.c
+++ b/drivers/net/loopback.c
@@ -137,10 +137,16 @@ static const struct ethtool_ops loopback_ethtool_ops = {
 
 static int loopback_dev_init(struct net_device *dev)
 {
+	int i;
 	dev->lstats = alloc_percpu(struct pcpu_lstats);
 	if (!dev->lstats)
 		return -ENOMEM;
 
+	for_each_possible_cpu(i) {
+		struct pcpu_lstats *lb_stats;
+		lb_stats = per_cpu_ptr(dev->lstats, i);
+		u64_stats_init(&lb_stats->syncp);
+	}
 	return 0;
 }
 
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 9bf46bd19b87..0924e51b9ee0 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -501,6 +501,7 @@ static int macvlan_init(struct net_device *dev)
 {
 	struct macvlan_dev *vlan = netdev_priv(dev);
 	const struct net_device *lowerdev = vlan->lowerdev;
+	int i;
 
 	dev->state		= (dev->state & ~MACVLAN_STATE_MASK) |
 				  (lowerdev->state & MACVLAN_STATE_MASK);
@@ -516,6 +517,12 @@ static int macvlan_init(struct net_device *dev)
 	if (!vlan->pcpu_stats)
 		return -ENOMEM;
 
+	for_each_possible_cpu(i) {
+		struct macvlan_pcpu_stats *mvlstats;
+		mvlstats = per_cpu_ptr(vlan->pcpu_stats, i);
+		u64_stats_init(&mvlstats->syncp);
+	}
+
 	return 0;
 }
 
diff --git a/drivers/net/nlmon.c b/drivers/net/nlmon.c
index b57ce5f48962..d2bb12bfabd5 100644
--- a/drivers/net/nlmon.c
+++ b/drivers/net/nlmon.c
@@ -47,8 +47,16 @@ static int nlmon_change_mtu(struct net_device *dev, int new_mtu)
 
 static int nlmon_dev_init(struct net_device *dev)
 {
+	int i;
+
 	dev->lstats = alloc_percpu(struct pcpu_lstats);
 
+	for_each_possible_cpu(i) {
+		struct pcpu_lstats *nlmstats;
+		nlmstats = per_cpu_ptr(dev->lstats, i);
+		u64_stats_init(&nlmstats->syncp);
+	}
+
 	return dev->lstats == NULL ? -ENOMEM : 0;
 }
 
diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index 50e43e64d51d..6574eb8766f9 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -1540,6 +1540,12 @@ static int team_init(struct net_device *dev)
 	if (!team->pcpu_stats)
 		return -ENOMEM;
 
+	for_each_possible_cpu(i) {
+		struct team_pcpu_stats *team_stats;
+		team_stats = per_cpu_ptr(team->pcpu_stats, i);
+		u64_stats_init(&team_stats->syncp);
+	}
+
 	for (i = 0; i < TEAM_PORT_HASHENTRIES; i++)
 		INIT_HLIST_HEAD(&team->en_port_hlist[i]);
 	INIT_LIST_HEAD(&team->port_list);
diff --git a/drivers/net/team/team_mode_loadbalance.c b/drivers/net/team/team_mode_loadbalance.c
index 829a9cd2b4da..d671fc3ac5ac 100644
--- a/drivers/net/team/team_mode_loadbalance.c
+++ b/drivers/net/team/team_mode_loadbalance.c
@@ -570,7 +570,7 @@ static int lb_init(struct team *team)
 {
 	struct lb_priv *lb_priv = get_lb_priv(team);
 	lb_select_tx_port_func_t *func;
-	int err;
+	int i, err;
 
 	/* set default tx port selector */
 	func = lb_select_tx_port_get_func("hash");
@@ -588,6 +588,13 @@ static int lb_init(struct team *team)
 		goto err_alloc_pcpu_stats;
 	}
 
+	for_each_possible_cpu(i) {
+		struct lb_pcpu_stats *team_lb_stats;
+		team_lb_stats = per_cpu_ptr(lb_priv->pcpu_stats, i);
+		u64_stats_init(&team_lb_stats->syncp);
+	}
+
+
 	INIT_DELAYED_WORK(&lb_priv->ex->stats.refresh_dw, lb_stats_refresh);
 
 	err = team_options_register(team, lb_options, ARRAY_SIZE(lb_options));
diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index eee1f19ef1e9..46e83e3fe999 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -230,10 +230,18 @@ static int veth_change_mtu(struct net_device *dev, int new_mtu)
 
 static int veth_dev_init(struct net_device *dev)
 {
+	int i;
+
 	dev->vstats = alloc_percpu(struct pcpu_vstats);
 	if (!dev->vstats)
 		return -ENOMEM;
 
+	for_each_possible_cpu(i) {
+		struct pcpu_vstats *veth_stats;
+		veth_stats = per_cpu_ptr(dev->vstats, i);
+		u64_stats_init(&veth_stats->syncp);
+	}
+
 	return 0;
 }
 
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 9fbdfcd1e1a0..ee384f3d612b 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -1569,6 +1569,14 @@ static int virtnet_probe(struct virtio_device *vdev)
 	if (vi->stats == NULL)
 		goto free;
 
+	for_each_possible_cpu(i) {
+		struct virtnet_stats *virtnet_stats;
+		virtnet_stats = per_cpu_ptr(vi->stats, i);
+		u64_stats_init(&virtnet_stats->tx_syncp);
+		u64_stats_init(&virtnet_stats->rx_syncp);
+	}
+
+
 	vi->vq_index = alloc_percpu(int);
 	if (vi->vq_index == NULL)
 		goto free_stats;
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 2ef5b6219f3f..01ab64d5f9a4 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -1884,11 +1884,19 @@ static int vxlan_init(struct net_device *dev)
 	struct vxlan_dev *vxlan = netdev_priv(dev);
 	struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id);
 	struct vxlan_sock *vs;
+	int i;
 
 	dev->tstats = alloc_percpu(struct pcpu_tstats);
 	if (!dev->tstats)
 		return -ENOMEM;
 
+	for_each_possible_cpu(i) {
+		struct pcpu_tstats *vxlan_stats;
+		vxlan_stats = per_cpu_ptr(dev->tstats, i);
+		u64_stats_init(&vxlan_stats->syncp);
+	}
+
+
 	spin_lock(&vn->sock_lock);
 	vs = vxlan_find_sock(dev_net(dev), vxlan->dst_port);
 	if (vs) {
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index 36808bf25677..54223ac6d8a6 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -1338,6 +1338,12 @@ static struct net_device *xennet_create_dev(struct xenbus_device *dev)
 	if (np->stats == NULL)
 		goto exit;
 
+	for_each_possible_cpu(i) {
+		struct netfront_stats *xen_nf_stats;
+		xen_nf_stats = per_cpu_ptr(np->stats, i);
+		u64_stats_init(&xen_nf_stats->syncp);
+	}
+
 	/* Initialise tx_skbs as a free chain containing every entry. */
 	np->tx_skb_freelist = 0;
 	for (i = 0; i < NET_TX_RING_SIZE; i++) {
diff --git a/include/linux/u64_stats_sync.h b/include/linux/u64_stats_sync.h
index 8da8c4e87da3..7bfabd20204c 100644
--- a/include/linux/u64_stats_sync.h
+++ b/include/linux/u64_stats_sync.h
@@ -67,6 +67,13 @@ struct u64_stats_sync {
 #endif
 };
 
+
+#if BITS_PER_LONG == 32 && defined(CONFIG_SMP)
+# define u64_stats_init(syncp)	seqcount_init(syncp.seq)
+#else
+# define u64_stats_init(syncp)	do { } while (0)
+#endif
+
 static inline void u64_stats_update_begin(struct u64_stats_sync *syncp)
 {
 #if BITS_PER_LONG==32 && defined(CONFIG_SMP)
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 09bf1c38805b..4deff3ed1da1 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -558,7 +558,7 @@ static const struct net_device_ops vlan_netdev_ops;
 static int vlan_dev_init(struct net_device *dev)
 {
 	struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
-	int subclass = 0;
+	int subclass = 0, i;
 
 	netif_carrier_off(dev);
 
@@ -612,6 +612,13 @@ static int vlan_dev_init(struct net_device *dev)
 	if (!vlan_dev_priv(dev)->vlan_pcpu_stats)
 		return -ENOMEM;
 
+	for_each_possible_cpu(i) {
+		struct vlan_pcpu_stats *vlan_stat;
+		vlan_stat = per_cpu_ptr(vlan_dev_priv(dev)->vlan_pcpu_stats, i);
+		u64_stats_init(&vlan_stat->syncp);
+	}
+
+
 	return 0;
 }
 
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index ca04163635da..7893d641775b 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -88,11 +88,18 @@ out:
 static int br_dev_init(struct net_device *dev)
 {
 	struct net_bridge *br = netdev_priv(dev);
+	int i;
 
 	br->stats = alloc_percpu(struct br_cpu_netstats);
 	if (!br->stats)
 		return -ENOMEM;
 
+	for_each_possible_cpu(i) {
+		struct br_cpu_netstats *br_dev_stats;
+		br_dev_stats = per_cpu_ptr(br->stats, i);
+		u64_stats_init(&br_dev_stats->syncp);
+	}
+
 	return 0;
 }
 
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index cfeb85cff4f0..5f4617e377b8 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1518,6 +1518,7 @@ int snmp_mib_init(void __percpu *ptr[2], size_t mibsize, size_t align)
 	ptr[0] = __alloc_percpu(mibsize, align);
 	if (!ptr[0])
 		return -ENOMEM;
+
 #if SNMP_ARRAY_SZ == 2
 	ptr[1] = __alloc_percpu(mibsize, align);
 	if (!ptr[1]) {
@@ -1561,6 +1562,8 @@ static const struct net_protocol icmp_protocol = {
 
 static __net_init int ipv4_mib_init_net(struct net *net)
 {
+	int i;
+
 	if (snmp_mib_init((void __percpu **)net->mib.tcp_statistics,
 			  sizeof(struct tcp_mib),
 			  __alignof__(struct tcp_mib)) < 0)
@@ -1569,6 +1572,17 @@ static __net_init int ipv4_mib_init_net(struct net *net)
 			  sizeof(struct ipstats_mib),
 			  __alignof__(struct ipstats_mib)) < 0)
 		goto err_ip_mib;
+
+	for_each_possible_cpu(i) {
+		struct ipstats_mib *af_inet_stats;
+		af_inet_stats = per_cpu_ptr(net->mib.ip_statistics[0], i);
+		u64_stats_init(&af_inet_stats->syncp);
+#if SNMP_ARRAY_SZ == 2
+		af_inet_stats = per_cpu_ptr(net->mib.ip_statistics[1], i);
+		u64_stats_init(&af_inet_stats->syncp);
+#endif
+	}
+
 	if (snmp_mib_init((void __percpu **)net->mib.net_statistics,
 			  sizeof(struct linux_mib),
 			  __alignof__(struct linux_mib)) < 0)
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 63a6d6d6b875..caf01176a5e4 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -976,13 +976,19 @@ int ip_tunnel_init(struct net_device *dev)
 {
 	struct ip_tunnel *tunnel = netdev_priv(dev);
 	struct iphdr *iph = &tunnel->parms.iph;
-	int err;
+	int i, err;
 
 	dev->destructor	= ip_tunnel_dev_free;
 	dev->tstats = alloc_percpu(struct pcpu_tstats);
 	if (!dev->tstats)
 		return -ENOMEM;
 
+	for_each_possible_cpu(i) {
+		struct pcpu_tstats *ipt_stats;
+		ipt_stats = per_cpu_ptr(dev->tstats, i);
+		u64_stats_init(&ipt_stats->syncp);
+	}
+
 	err = gro_cells_init(&tunnel->gro_cells, dev);
 	if (err) {
 		free_percpu(dev->tstats);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index cd3fb301da38..d62d589bb622 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -281,10 +281,24 @@ static void addrconf_mod_dad_timer(struct inet6_ifaddr *ifp,
 
 static int snmp6_alloc_dev(struct inet6_dev *idev)
 {
+	int i;
+
 	if (snmp_mib_init((void __percpu **)idev->stats.ipv6,
 			  sizeof(struct ipstats_mib),
 			  __alignof__(struct ipstats_mib)) < 0)
 		goto err_ip;
+
+	for_each_possible_cpu(i) {
+		struct ipstats_mib *addrconf_stats;
+		addrconf_stats = per_cpu_ptr(idev->stats.ipv6[0], i);
+		u64_stats_init(&addrconf_stats->syncp);
+#if SNMP_ARRAY_SZ == 2
+		addrconf_stats = per_cpu_ptr(idev->stats.ipv6[1], i);
+		u64_stats_init(&addrconf_stats->syncp);
+#endif
+	}
+
+
 	idev->stats.icmpv6dev = kzalloc(sizeof(struct icmpv6_mib_device),
 					GFP_KERNEL);
 	if (!idev->stats.icmpv6dev)
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 7c96100b021e..a8f8559b3dce 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -719,6 +719,8 @@ static void ipv6_packet_cleanup(void)
 
 static int __net_init ipv6_init_mibs(struct net *net)
 {
+	int i;
+
 	if (snmp_mib_init((void __percpu **)net->mib.udp_stats_in6,
 			  sizeof(struct udp_mib),
 			  __alignof__(struct udp_mib)) < 0)
@@ -731,6 +733,18 @@ static int __net_init ipv6_init_mibs(struct net *net)
 			  sizeof(struct ipstats_mib),
 			  __alignof__(struct ipstats_mib)) < 0)
 		goto err_ip_mib;
+
+	for_each_possible_cpu(i) {
+		struct ipstats_mib *af_inet6_stats;
+		af_inet6_stats = per_cpu_ptr(net->mib.ipv6_statistics[0], i);
+		u64_stats_init(&af_inet6_stats->syncp);
+#if SNMP_ARRAY_SZ == 2
+		af_inet6_stats = per_cpu_ptr(net->mib.ipv6_statistics[1], i);
+		u64_stats_init(&af_inet6_stats->syncp);
+#endif
+	}
+
+
 	if (snmp_mib_init((void __percpu **)net->mib.icmpv6_statistics,
 			  sizeof(struct icmpv6_mib),
 			  __alignof__(struct icmpv6_mib)) < 0)
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index bf4a9a084de5..8acb28621f9c 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -1252,6 +1252,7 @@ static void ip6gre_tunnel_setup(struct net_device *dev)
 static int ip6gre_tunnel_init(struct net_device *dev)
 {
 	struct ip6_tnl *tunnel;
+	int i;
 
 	tunnel = netdev_priv(dev);
 
@@ -1269,6 +1270,13 @@ static int ip6gre_tunnel_init(struct net_device *dev)
 	if (!dev->tstats)
 		return -ENOMEM;
 
+	for_each_possible_cpu(i) {
+		struct pcpu_tstats *ip6gre_tunnel_stats;
+		ip6gre_tunnel_stats = per_cpu_ptr(dev->tstats, i);
+		u64_stats_init(&ip6gre_tunnel_stats->syncp);
+	}
+
+
 	return 0;
 }
 
@@ -1449,6 +1457,7 @@ static void ip6gre_netlink_parms(struct nlattr *data[],
 static int ip6gre_tap_init(struct net_device *dev)
 {
 	struct ip6_tnl *tunnel;
+	int i;
 
 	tunnel = netdev_priv(dev);
 
@@ -1462,6 +1471,12 @@ static int ip6gre_tap_init(struct net_device *dev)
 	if (!dev->tstats)
 		return -ENOMEM;
 
+	for_each_possible_cpu(i) {
+		struct pcpu_tstats *ip6gre_tap_stats;
+		ip6gre_tap_stats = per_cpu_ptr(dev->tstats, i);
+		u64_stats_init(&ip6gre_tap_stats->syncp);
+	}
+
 	return 0;
 }
 
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 583b77e2f69b..df1fa58528c6 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1494,12 +1494,19 @@ static inline int
 ip6_tnl_dev_init_gen(struct net_device *dev)
 {
 	struct ip6_tnl *t = netdev_priv(dev);
+	int i;
 
 	t->dev = dev;
 	t->net = dev_net(dev);
 	dev->tstats = alloc_percpu(struct pcpu_tstats);
 	if (!dev->tstats)
 		return -ENOMEM;
+
+	for_each_possible_cpu(i) {
+		struct pcpu_tstats *ip6_tnl_stats;
+		ip6_tnl_stats = per_cpu_ptr(dev->tstats, i);
+		u64_stats_init(&ip6_tnl_stats->syncp);
+	}
 	return 0;
 }
 
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 19269453a8ea..365dc5473eb2 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -1310,6 +1310,7 @@ static void ipip6_tunnel_setup(struct net_device *dev)
 static int ipip6_tunnel_init(struct net_device *dev)
 {
 	struct ip_tunnel *tunnel = netdev_priv(dev);
+	int i;
 
 	tunnel->dev = dev;
 	tunnel->net = dev_net(dev);
@@ -1322,6 +1323,12 @@ static int ipip6_tunnel_init(struct net_device *dev)
 	if (!dev->tstats)
 		return -ENOMEM;
 
+	for_each_possible_cpu(i) {
+		struct pcpu_tstats *ipip6_tunnel_stats;
+		ipip6_tunnel_stats = per_cpu_ptr(dev->tstats, i);
+		u64_stats_init(&ipip6_tunnel_stats->syncp);
+	}
+
 	return 0;
 }
 
@@ -1331,6 +1338,7 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev)
 	struct iphdr *iph = &tunnel->parms.iph;
 	struct net *net = dev_net(dev);
 	struct sit_net *sitn = net_generic(net, sit_net_id);
+	int i;
 
 	tunnel->dev = dev;
 	tunnel->net = dev_net(dev);
@@ -1344,6 +1352,13 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev)
 	dev->tstats = alloc_percpu(struct pcpu_tstats);
 	if (!dev->tstats)
 		return -ENOMEM;
+
+	for_each_possible_cpu(i) {
+		struct pcpu_tstats *ipip6_fb_stats;
+		ipip6_fb_stats = per_cpu_ptr(dev->tstats, i);
+		u64_stats_init(&ipip6_fb_stats->syncp);
+	}
+
 	dev_hold(dev);
 	rcu_assign_pointer(sitn->tunnels_wc[0], tunnel);
 	return 0;
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index a3df9bddc4f7..3825725907f6 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -842,7 +842,7 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
 	       struct ip_vs_dest **dest_p)
 {
 	struct ip_vs_dest *dest;
-	unsigned int atype;
+	unsigned int atype, i;
 
 	EnterFunction(2);
 
@@ -869,6 +869,12 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
 	if (!dest->stats.cpustats)
 		goto err_alloc;
 
+	for_each_possible_cpu(i) {
+		struct ip_vs_cpu_stats *ip_vs_dest_stats;
+		ip_vs_dest_stats = per_cpu_ptr(dest->stats.cpustats, i);
+		u64_stats_init(&ip_vs_dest_stats->syncp);
+	}
+
 	dest->af = svc->af;
 	dest->protocol = svc->protocol;
 	dest->vaddr = svc->addr;
@@ -1134,7 +1140,7 @@ static int
 ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
 		  struct ip_vs_service **svc_p)
 {
-	int ret = 0;
+	int ret = 0, i;
 	struct ip_vs_scheduler *sched = NULL;
 	struct ip_vs_pe *pe = NULL;
 	struct ip_vs_service *svc = NULL;
@@ -1184,6 +1190,13 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
 		goto out_err;
 	}
 
+	for_each_possible_cpu(i) {
+		struct ip_vs_cpu_stats *ip_vs_stats;
+		ip_vs_stats = per_cpu_ptr(svc->stats.cpustats, i);
+		u64_stats_init(&ip_vs_stats->syncp);
+	}
+
+
 	/* I'm the first user of the service */
 	atomic_set(&svc->refcnt, 0);
 
@@ -3780,7 +3793,7 @@ static struct notifier_block ip_vs_dst_notifier = {
 
 int __net_init ip_vs_control_net_init(struct net *net)
 {
-	int idx;
+	int i, idx;
 	struct netns_ipvs *ipvs = net_ipvs(net);
 
 	/* Initialize rs_table */
@@ -3799,6 +3812,12 @@ int __net_init ip_vs_control_net_init(struct net *net)
 	if (!ipvs->tot_stats.cpustats)
 		return -ENOMEM;
 
+	for_each_possible_cpu(i) {
+		struct ip_vs_cpu_stats *ipvs_tot_stats;
+		ipvs_tot_stats = per_cpu_ptr(ipvs->tot_stats.cpustats, i);
+		u64_stats_init(&ipvs_tot_stats->syncp);
+	}
+
 	spin_lock_init(&ipvs->tot_stats.lock);
 
 	proc_create("ip_vs", 0, net->proc_net, &ip_vs_info_fops);
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 2aa13bd7f2b2..b92553c02279 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -1698,6 +1698,12 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
 		goto err_destroy_table;
 	}
 
+	for_each_possible_cpu(i) {
+		struct dp_stats_percpu *dpath_stats;
+		dpath_stats = per_cpu_ptr(dp->stats_percpu, i);
+		u64_stats_init(&dpath_stats->sync);
+	}
+
 	dp->ports = kmalloc(DP_VPORT_HASH_BUCKETS * sizeof(struct hlist_head),
 			GFP_KERNEL);
 	if (!dp->ports) {
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 6f65dbe13812..d830a95f03a4 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -118,6 +118,7 @@ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops,
 {
 	struct vport *vport;
 	size_t alloc_size;
+	int i;
 
 	alloc_size = sizeof(struct vport);
 	if (priv_size) {
@@ -141,6 +142,13 @@ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops,
 		return ERR_PTR(-ENOMEM);
 	}
 
+	for_each_possible_cpu(i) {
+		struct pcpu_tstats *vport_stats;
+		vport_stats = per_cpu_ptr(vport->percpu_stats, i);
+		u64_stats_init(&vport_stats->syncp);
+	}
+
+
 	spin_lock_init(&vport->stats_lock);
 
 	return vport;
-- 
cgit v1.2.3


From 1ca7d67cf5d5a2aef26a8d9afd789006fa098347 Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Mon, 7 Oct 2013 15:51:59 -0700
Subject: seqcount: Add lockdep functionality to seqcount/seqlock structures

Currently seqlocks and seqcounts don't support lockdep.

After running across a seqcount related deadlock in the timekeeping
code, I used a less-refined and more focused variant of this patch
to narrow down the cause of the issue.

This is a first-pass attempt to properly enable lockdep functionality
on seqlocks and seqcounts.

Since seqcounts are used in the vdso gettimeofday code, I've provided
non-lockdep accessors for those needs.

I've also handled one case where there were nested seqlock writers
and there may be more edge cases.

Comments and feedback would be appreciated!

Signed-off-by: John Stultz <john.stultz@linaro.org>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Li Zefan <lizefan@huawei.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: netdev@vger.kernel.org
Link: http://lkml.kernel.org/r/1381186321-4906-3-git-send-email-john.stultz@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/vdso/vclock_gettime.c |  8 ++---
 fs/dcache.c                    |  4 +--
 fs/fs_struct.c                 |  2 +-
 include/linux/init_task.h      |  8 ++---
 include/linux/lockdep.h        |  8 +++--
 include/linux/seqlock.h        | 79 ++++++++++++++++++++++++++++++++++++++----
 mm/filemap_xip.c               |  2 +-
 7 files changed, 90 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index 72074d528400..2ada505067cc 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -178,7 +178,7 @@ notrace static int __always_inline do_realtime(struct timespec *ts)
 
 	ts->tv_nsec = 0;
 	do {
-		seq = read_seqcount_begin(&gtod->seq);
+		seq = read_seqcount_begin_no_lockdep(&gtod->seq);
 		mode = gtod->clock.vclock_mode;
 		ts->tv_sec = gtod->wall_time_sec;
 		ns = gtod->wall_time_snsec;
@@ -198,7 +198,7 @@ notrace static int do_monotonic(struct timespec *ts)
 
 	ts->tv_nsec = 0;
 	do {
-		seq = read_seqcount_begin(&gtod->seq);
+		seq = read_seqcount_begin_no_lockdep(&gtod->seq);
 		mode = gtod->clock.vclock_mode;
 		ts->tv_sec = gtod->monotonic_time_sec;
 		ns = gtod->monotonic_time_snsec;
@@ -214,7 +214,7 @@ notrace static int do_realtime_coarse(struct timespec *ts)
 {
 	unsigned long seq;
 	do {
-		seq = read_seqcount_begin(&gtod->seq);
+		seq = read_seqcount_begin_no_lockdep(&gtod->seq);
 		ts->tv_sec = gtod->wall_time_coarse.tv_sec;
 		ts->tv_nsec = gtod->wall_time_coarse.tv_nsec;
 	} while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
@@ -225,7 +225,7 @@ notrace static int do_monotonic_coarse(struct timespec *ts)
 {
 	unsigned long seq;
 	do {
-		seq = read_seqcount_begin(&gtod->seq);
+		seq = read_seqcount_begin_no_lockdep(&gtod->seq);
 		ts->tv_sec = gtod->monotonic_time_coarse.tv_sec;
 		ts->tv_nsec = gtod->monotonic_time_coarse.tv_nsec;
 	} while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
diff --git a/fs/dcache.c b/fs/dcache.c
index ae6ebb88ceff..f750be22c08c 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2574,7 +2574,7 @@ static void __d_move(struct dentry * dentry, struct dentry * target)
 	dentry_lock_for_move(dentry, target);
 
 	write_seqcount_begin(&dentry->d_seq);
-	write_seqcount_begin(&target->d_seq);
+	write_seqcount_begin_nested(&target->d_seq, DENTRY_D_LOCK_NESTED);
 
 	/* __d_drop does write_seqcount_barrier, but they're OK to nest. */
 
@@ -2706,7 +2706,7 @@ static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon)
 	dentry_lock_for_move(anon, dentry);
 
 	write_seqcount_begin(&dentry->d_seq);
-	write_seqcount_begin(&anon->d_seq);
+	write_seqcount_begin_nested(&anon->d_seq, DENTRY_D_LOCK_NESTED);
 
 	dparent = dentry->d_parent;
 
diff --git a/fs/fs_struct.c b/fs/fs_struct.c
index d8ac61d0c932..7dca743b2ce1 100644
--- a/fs/fs_struct.c
+++ b/fs/fs_struct.c
@@ -161,6 +161,6 @@ EXPORT_SYMBOL(current_umask);
 struct fs_struct init_fs = {
 	.users		= 1,
 	.lock		= __SPIN_LOCK_UNLOCKED(init_fs.lock),
-	.seq		= SEQCNT_ZERO,
+	.seq		= SEQCNT_ZERO(init_fs.seq),
 	.umask		= 0022,
 };
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 5cd0f0949927..b0ed422e4e4a 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -32,10 +32,10 @@ extern struct fs_struct init_fs;
 #endif
 
 #ifdef CONFIG_CPUSETS
-#define INIT_CPUSET_SEQ							\
-	.mems_allowed_seq = SEQCNT_ZERO,
+#define INIT_CPUSET_SEQ(tsk)							\
+	.mems_allowed_seq = SEQCNT_ZERO(tsk.mems_allowed_seq),
 #else
-#define INIT_CPUSET_SEQ
+#define INIT_CPUSET_SEQ(tsk)
 #endif
 
 #define INIT_SIGNALS(sig) {						\
@@ -220,7 +220,7 @@ extern struct task_group root_task_group;
 	INIT_FTRACE_GRAPH						\
 	INIT_TRACE_RECURSION						\
 	INIT_TASK_RCU_PREEMPT(tsk)					\
-	INIT_CPUSET_SEQ							\
+	INIT_CPUSET_SEQ(tsk)						\
 	INIT_VTIME(tsk)							\
 }
 
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index cfc2f119779a..92b1bfc5da60 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -497,6 +497,10 @@ static inline void print_irqtrace_events(struct task_struct *curr)
 #define rwlock_acquire_read(l, s, t, i)		lock_acquire_shared_recursive(l, s, t, NULL, i)
 #define rwlock_release(l, n, i)			lock_release(l, n, i)
 
+#define seqcount_acquire(l, s, t, i)		lock_acquire_exclusive(l, s, t, NULL, i)
+#define seqcount_acquire_read(l, s, t, i)	lock_acquire_shared_recursive(l, s, t, NULL, i)
+#define seqcount_release(l, n, i)		lock_release(l, n, i)
+
 #define mutex_acquire(l, s, t, i)		lock_acquire_exclusive(l, s, t, NULL, i)
 #define mutex_acquire_nest(l, s, t, n, i)	lock_acquire_exclusive(l, s, t, n, i)
 #define mutex_release(l, n, i)			lock_release(l, n, i)
@@ -504,11 +508,11 @@ static inline void print_irqtrace_events(struct task_struct *curr)
 #define rwsem_acquire(l, s, t, i)		lock_acquire_exclusive(l, s, t, NULL, i)
 #define rwsem_acquire_nest(l, s, t, n, i)	lock_acquire_exclusive(l, s, t, n, i)
 #define rwsem_acquire_read(l, s, t, i)		lock_acquire_shared(l, s, t, NULL, i)
-# define rwsem_release(l, n, i)			lock_release(l, n, i)
+#define rwsem_release(l, n, i)			lock_release(l, n, i)
 
 #define lock_map_acquire(l)			lock_acquire_exclusive(l, 0, 0, NULL, _THIS_IP_)
 #define lock_map_acquire_read(l)		lock_acquire_shared_recursive(l, 0, 0, NULL, _THIS_IP_)
-# define lock_map_release(l)			lock_release(l, 1, _THIS_IP_)
+#define lock_map_release(l)			lock_release(l, 1, _THIS_IP_)
 
 #ifdef CONFIG_PROVE_LOCKING
 # define might_lock(lock) 						\
diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index 21a209336e79..1e8a8b6e837d 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -34,6 +34,7 @@
 
 #include <linux/spinlock.h>
 #include <linux/preempt.h>
+#include <linux/lockdep.h>
 #include <asm/processor.h>
 
 /*
@@ -44,10 +45,50 @@
  */
 typedef struct seqcount {
 	unsigned sequence;
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	struct lockdep_map dep_map;
+#endif
 } seqcount_t;
 
-#define SEQCNT_ZERO { 0 }
-#define seqcount_init(x)	do { *(x) = (seqcount_t) SEQCNT_ZERO; } while (0)
+static inline void __seqcount_init(seqcount_t *s, const char *name,
+					  struct lock_class_key *key)
+{
+	/*
+	 * Make sure we are not reinitializing a held lock:
+	 */
+	lockdep_init_map(&s->dep_map, name, key, 0);
+	s->sequence = 0;
+}
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+# define SEQCOUNT_DEP_MAP_INIT(lockname) \
+		.dep_map = { .name = #lockname } \
+
+# define seqcount_init(s)				\
+	do {						\
+		static struct lock_class_key __key;	\
+		__seqcount_init((s), #s, &__key);	\
+	} while (0)
+
+static inline void seqcount_lockdep_reader_access(const seqcount_t *s)
+{
+	seqcount_t *l = (seqcount_t *)s;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	seqcount_acquire_read(&l->dep_map, 0, 0, _RET_IP_);
+	seqcount_release(&l->dep_map, 1, _RET_IP_);
+	local_irq_restore(flags);
+}
+
+#else
+# define SEQCOUNT_DEP_MAP_INIT(lockname)
+# define seqcount_init(s) __seqcount_init(s, NULL, NULL)
+# define seqcount_lockdep_reader_access(x)
+#endif
+
+#define SEQCNT_ZERO(lockname) { .sequence = 0, SEQCOUNT_DEP_MAP_INIT(lockname)}
+
 
 /**
  * __read_seqcount_begin - begin a seq-read critical section (without barrier)
@@ -75,6 +116,22 @@ repeat:
 	return ret;
 }
 
+/**
+ * read_seqcount_begin_no_lockdep - start seq-read critical section w/o lockdep
+ * @s: pointer to seqcount_t
+ * Returns: count to be passed to read_seqcount_retry
+ *
+ * read_seqcount_begin_no_lockdep opens a read critical section of the given
+ * seqcount, but without any lockdep checking. Validity of the critical
+ * section is tested by checking read_seqcount_retry function.
+ */
+static inline unsigned read_seqcount_begin_no_lockdep(const seqcount_t *s)
+{
+	unsigned ret = __read_seqcount_begin(s);
+	smp_rmb();
+	return ret;
+}
+
 /**
  * read_seqcount_begin - begin a seq-read critical section
  * @s: pointer to seqcount_t
@@ -86,9 +143,8 @@ repeat:
  */
 static inline unsigned read_seqcount_begin(const seqcount_t *s)
 {
-	unsigned ret = __read_seqcount_begin(s);
-	smp_rmb();
-	return ret;
+	seqcount_lockdep_reader_access(s);
+	return read_seqcount_begin_no_lockdep(s);
 }
 
 /**
@@ -108,6 +164,8 @@ static inline unsigned read_seqcount_begin(const seqcount_t *s)
 static inline unsigned raw_seqcount_begin(const seqcount_t *s)
 {
 	unsigned ret = ACCESS_ONCE(s->sequence);
+
+	seqcount_lockdep_reader_access(s);
 	smp_rmb();
 	return ret & ~1;
 }
@@ -152,14 +210,21 @@ static inline int read_seqcount_retry(const seqcount_t *s, unsigned start)
  * Sequence counter only version assumes that callers are using their
  * own mutexing.
  */
-static inline void write_seqcount_begin(seqcount_t *s)
+static inline void write_seqcount_begin_nested(seqcount_t *s, int subclass)
 {
 	s->sequence++;
 	smp_wmb();
+	seqcount_acquire(&s->dep_map, subclass, 0, _RET_IP_);
+}
+
+static inline void write_seqcount_begin(seqcount_t *s)
+{
+	write_seqcount_begin_nested(s, 0);
 }
 
 static inline void write_seqcount_end(seqcount_t *s)
 {
+	seqcount_release(&s->dep_map, 1, _RET_IP_);
 	smp_wmb();
 	s->sequence++;
 }
@@ -188,7 +253,7 @@ typedef struct {
  */
 #define __SEQLOCK_UNLOCKED(lockname)			\
 	{						\
-		.seqcount = SEQCNT_ZERO,		\
+		.seqcount = SEQCNT_ZERO(lockname),	\
 		.lock =	__SPIN_LOCK_UNLOCKED(lockname)	\
 	}
 
diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c
index 28fe26b64f8a..d8d9fe3f685c 100644
--- a/mm/filemap_xip.c
+++ b/mm/filemap_xip.c
@@ -26,7 +26,7 @@
  * of ZERO_PAGE(), such as /dev/zero
  */
 static DEFINE_MUTEX(xip_sparse_mutex);
-static seqcount_t xip_sparse_seq = SEQCNT_ZERO;
+static seqcount_t xip_sparse_seq = SEQCNT_ZERO(xip_sparse_seq);
 static struct page *__xip_sparse_page;
 
 /* called under xip_sparse_mutex */
-- 
cgit v1.2.3


From db751fe3ea6880ff5ac5abe60cb7b80deb5a4140 Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Mon, 7 Oct 2013 15:52:00 -0700
Subject: cpuset: Fix potential deadlock w/ set_mems_allowed

After adding lockdep support to seqlock/seqcount structures,
I started seeing the following warning:

[    1.070907] ======================================================
[    1.072015] [ INFO: SOFTIRQ-safe -> SOFTIRQ-unsafe lock order detected ]
[    1.073181] 3.11.0+ #67 Not tainted
[    1.073801] ------------------------------------------------------
[    1.074882] kworker/u4:2/708 [HC0[0]:SC0[0]:HE0:SE1] is trying to acquire:
[    1.076088]  (&p->mems_allowed_seq){+.+...}, at: [<ffffffff81187d7f>] new_slab+0x5f/0x280
[    1.077572]
[    1.077572] and this task is already holding:
[    1.078593]  (&(&q->__queue_lock)->rlock){..-...}, at: [<ffffffff81339f03>] blk_execute_rq_nowait+0x53/0xf0
[    1.080042] which would create a new lock dependency:
[    1.080042]  (&(&q->__queue_lock)->rlock){..-...} -> (&p->mems_allowed_seq){+.+...}
[    1.080042]
[    1.080042] but this new dependency connects a SOFTIRQ-irq-safe lock:
[    1.080042]  (&(&q->__queue_lock)->rlock){..-...}
[    1.080042] ... which became SOFTIRQ-irq-safe at:
[    1.080042]   [<ffffffff810ec179>] __lock_acquire+0x5b9/0x1db0
[    1.080042]   [<ffffffff810edfe5>] lock_acquire+0x95/0x130
[    1.080042]   [<ffffffff818968a1>] _raw_spin_lock+0x41/0x80
[    1.080042]   [<ffffffff81560c9e>] scsi_device_unbusy+0x7e/0xd0
[    1.080042]   [<ffffffff8155a612>] scsi_finish_command+0x32/0xf0
[    1.080042]   [<ffffffff81560e91>] scsi_softirq_done+0xa1/0x130
[    1.080042]   [<ffffffff8133b0f3>] blk_done_softirq+0x73/0x90
[    1.080042]   [<ffffffff81095dc0>] __do_softirq+0x110/0x2f0
[    1.080042]   [<ffffffff81095fcd>] run_ksoftirqd+0x2d/0x60
[    1.080042]   [<ffffffff810bc506>] smpboot_thread_fn+0x156/0x1e0
[    1.080042]   [<ffffffff810b3916>] kthread+0xd6/0xe0
[    1.080042]   [<ffffffff818980ac>] ret_from_fork+0x7c/0xb0
[    1.080042]
[    1.080042] to a SOFTIRQ-irq-unsafe lock:
[    1.080042]  (&p->mems_allowed_seq){+.+...}
[    1.080042] ... which became SOFTIRQ-irq-unsafe at:
[    1.080042] ...  [<ffffffff810ec1d3>] __lock_acquire+0x613/0x1db0
[    1.080042]   [<ffffffff810edfe5>] lock_acquire+0x95/0x130
[    1.080042]   [<ffffffff810b3df2>] kthreadd+0x82/0x180
[    1.080042]   [<ffffffff818980ac>] ret_from_fork+0x7c/0xb0
[    1.080042]
[    1.080042] other info that might help us debug this:
[    1.080042]
[    1.080042]  Possible interrupt unsafe locking scenario:
[    1.080042]
[    1.080042]        CPU0                    CPU1
[    1.080042]        ----                    ----
[    1.080042]   lock(&p->mems_allowed_seq);
[    1.080042]                                local_irq_disable();
[    1.080042]                                lock(&(&q->__queue_lock)->rlock);
[    1.080042]                                lock(&p->mems_allowed_seq);
[    1.080042]   <Interrupt>
[    1.080042]     lock(&(&q->__queue_lock)->rlock);
[    1.080042]
[    1.080042]  *** DEADLOCK ***

The issue stems from the kthreadd() function calling set_mems_allowed
with irqs enabled. While its possibly unlikely for the actual deadlock
to trigger, a fix is fairly simple: disable irqs before taking the
mems_allowed_seq lock.

Signed-off-by: John Stultz <john.stultz@linaro.org>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Acked-by: Li Zefan <lizefan@huawei.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: netdev@vger.kernel.org
Link: http://lkml.kernel.org/r/1381186321-4906-4-git-send-email-john.stultz@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/cpuset.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index cc1b01cf2035..3fe661fe96d1 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -110,10 +110,14 @@ static inline bool put_mems_allowed(unsigned int seq)
 
 static inline void set_mems_allowed(nodemask_t nodemask)
 {
+	unsigned long flags;
+
 	task_lock(current);
+	local_irq_save(flags);
 	write_seqcount_begin(&current->mems_allowed_seq);
 	current->mems_allowed = nodemask;
 	write_seqcount_end(&current->mems_allowed_seq);
+	local_irq_restore(flags);
 	task_unlock(current);
 }
 
-- 
cgit v1.2.3


From 3820b4d2789f5166afdb136bb14f93166e6cfbc2 Mon Sep 17 00:00:00 2001
From: "David A. Long" <dave.long@linaro.org>
Date: Tue, 15 Oct 2013 17:04:16 -0400
Subject: uprobes: Move function declarations out of arch

Move the function declarations from the arch headers to the common
header, since only the function bodies are architecture-specific.
These changes are from Vincent Rabin's uprobes patch.

[ oleg: update arch/powerpc/include/asm/uprobes.h ]

Signed-off-by: Rabin Vincent <rabin@rab.in>
Signed-off-by: David A. Long <dave.long@linaro.org>
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
---
 arch/powerpc/include/asm/uprobes.h | 7 -------
 arch/x86/include/asm/uprobes.h     | 7 -------
 include/linux/uprobes.h            | 8 ++++++++
 3 files changed, 8 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/include/asm/uprobes.h b/arch/powerpc/include/asm/uprobes.h
index 23016020915e..b6fc3178372a 100644
--- a/arch/powerpc/include/asm/uprobes.h
+++ b/arch/powerpc/include/asm/uprobes.h
@@ -45,11 +45,4 @@ struct arch_uprobe_task {
 	unsigned long	saved_trap_nr;
 };
 
-extern int  arch_uprobe_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long addr);
-extern int  arch_uprobe_pre_xol(struct arch_uprobe *aup, struct pt_regs *regs);
-extern int  arch_uprobe_post_xol(struct arch_uprobe *aup, struct pt_regs *regs);
-extern bool arch_uprobe_xol_was_trapped(struct task_struct *tsk);
-extern int  arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, void *data);
-extern void arch_uprobe_abort_xol(struct arch_uprobe *aup, struct pt_regs *regs);
-extern unsigned long arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs *regs);
 #endif	/* _ASM_UPROBES_H */
diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h
index 6e5197910fd8..b20b4d68b934 100644
--- a/arch/x86/include/asm/uprobes.h
+++ b/arch/x86/include/asm/uprobes.h
@@ -49,11 +49,4 @@ struct arch_uprobe_task {
 	unsigned int			saved_tf;
 };
 
-extern int  arch_uprobe_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long addr);
-extern int  arch_uprobe_pre_xol(struct arch_uprobe *aup, struct pt_regs *regs);
-extern int  arch_uprobe_post_xol(struct arch_uprobe *aup, struct pt_regs *regs);
-extern bool arch_uprobe_xol_was_trapped(struct task_struct *tsk);
-extern int  arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, void *data);
-extern void arch_uprobe_abort_xol(struct arch_uprobe *aup, struct pt_regs *regs);
-extern unsigned long arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs *regs);
 #endif	/* _ASM_UPROBES_H */
diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index 9e0d5a6fe7a8..28473e3f6068 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -30,6 +30,7 @@
 struct vm_area_struct;
 struct mm_struct;
 struct inode;
+struct notifier_block;
 
 #ifdef CONFIG_ARCH_SUPPORTS_UPROBES
 # include <asm/uprobes.h>
@@ -125,6 +126,13 @@ extern void uprobe_notify_resume(struct pt_regs *regs);
 extern bool uprobe_deny_signal(void);
 extern bool arch_uprobe_skip_sstep(struct arch_uprobe *aup, struct pt_regs *regs);
 extern void uprobe_clear_state(struct mm_struct *mm);
+extern int  arch_uprobe_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long addr);
+extern int  arch_uprobe_pre_xol(struct arch_uprobe *aup, struct pt_regs *regs);
+extern int  arch_uprobe_post_xol(struct arch_uprobe *aup, struct pt_regs *regs);
+extern bool arch_uprobe_xol_was_trapped(struct task_struct *tsk);
+extern int  arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, void *data);
+extern void arch_uprobe_abort_xol(struct arch_uprobe *aup, struct pt_regs *regs);
+extern unsigned long arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs *regs);
 #else /* !CONFIG_UPROBES */
 struct uprobes_state {
 };
-- 
cgit v1.2.3


From f72d41fa902fb19a9b63028202a400b0ce497491 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Tue, 5 Nov 2013 19:50:39 +0100
Subject: uprobes: Export write_opcode() as uprobe_write_opcode()

set_swbp() and set_orig_insn() are __weak, but this is pointless
because write_opcode() is static.

Export write_opcode() as uprobe_write_opcode() for the upcoming
arm port, this way it can actually override set_swbp() and use
__opcode_to_mem_arm(bpinsn) instead if UPROBE_SWBP_INSN.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
---
 include/linux/uprobes.h |  1 +
 kernel/events/uprobes.c | 14 +++++++-------
 2 files changed, 8 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index 28473e3f6068..319eae70fe84 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -109,6 +109,7 @@ extern int __weak set_swbp(struct arch_uprobe *aup, struct mm_struct *mm, unsign
 extern int __weak set_orig_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr);
 extern bool __weak is_swbp_insn(uprobe_opcode_t *insn);
 extern bool __weak is_trap_insn(uprobe_opcode_t *insn);
+extern int uprobe_write_opcode(struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_t);
 extern int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc);
 extern int uprobe_apply(struct inode *inode, loff_t offset, struct uprobe_consumer *uc, bool);
 extern void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc);
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index fbcff61b5099..0ac346ae5edb 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -245,12 +245,12 @@ static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t
  * the architecture. If an arch has variable length instruction and the
  * breakpoint instruction is not of the smallest length instruction
  * supported by that architecture then we need to modify is_trap_at_addr and
- * write_opcode accordingly. This would never be a problem for archs that
- * have fixed length instructions.
+ * uprobe_write_opcode accordingly. This would never be a problem for archs
+ * that have fixed length instructions.
  */
 
 /*
- * write_opcode - write the opcode at a given virtual address.
+ * uprobe_write_opcode - write the opcode at a given virtual address.
  * @mm: the probed process address space.
  * @vaddr: the virtual address to store the opcode.
  * @opcode: opcode to be written at @vaddr.
@@ -261,7 +261,7 @@ static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t
  * For mm @mm, write the opcode at @vaddr.
  * Return 0 (success) or a negative errno.
  */
-static int write_opcode(struct mm_struct *mm, unsigned long vaddr,
+int uprobe_write_opcode(struct mm_struct *mm, unsigned long vaddr,
 			uprobe_opcode_t opcode)
 {
 	struct page *old_page, *new_page;
@@ -315,7 +315,7 @@ put_old:
  */
 int __weak set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr)
 {
-	return write_opcode(mm, vaddr, UPROBE_SWBP_INSN);
+	return uprobe_write_opcode(mm, vaddr, UPROBE_SWBP_INSN);
 }
 
 /**
@@ -330,7 +330,7 @@ int __weak set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned
 int __weak
 set_orig_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr)
 {
-	return write_opcode(mm, vaddr, *(uprobe_opcode_t *)auprobe->insn);
+	return uprobe_write_opcode(mm, vaddr, *(uprobe_opcode_t *)auprobe->insn);
 }
 
 static int match_uprobe(struct uprobe *l, struct uprobe *r)
@@ -577,7 +577,7 @@ static int prepare_uprobe(struct uprobe *uprobe, struct file *file,
 	if (ret)
 		goto out;
 
-	/* write_opcode() assumes we don't cross page boundary */
+	/* uprobe_write_opcode() assumes we don't cross page boundary */
 	BUG_ON((uprobe->offset & ~PAGE_MASK) +
 			UPROBE_SWBP_INSN_SIZE > PAGE_SIZE);
 
-- 
cgit v1.2.3


From 042b10d83d05174e50ee861ee3aca55fd6204324 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Wed, 6 Nov 2013 12:19:37 -0500
Subject: tracing: Remove unused function ftrace_off_permanent()

In the past, ftrace_off_permanent() was called if something
strange was detected. But the ftrace_bug() now handles all the
anomolies that can happen with ftrace (function tracing), and there
are no uses of ftrace_off_permanent(). Get rid of it.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/kernel.h |  2 --
 kernel/trace/trace.c   | 15 ---------------
 2 files changed, 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 672ddc4de4af..d4e98d13eff4 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -501,7 +501,6 @@ void tracing_snapshot_alloc(void);
 
 extern void tracing_start(void);
 extern void tracing_stop(void);
-extern void ftrace_off_permanent(void);
 
 static inline __printf(1, 2)
 void ____trace_printk_check_format(const char *fmt, ...)
@@ -639,7 +638,6 @@ extern void ftrace_dump(enum ftrace_dump_mode oops_dump_mode);
 #else
 static inline void tracing_start(void) { }
 static inline void tracing_stop(void) { }
-static inline void ftrace_off_permanent(void) { }
 static inline void trace_dump_stack(int skip) { }
 
 static inline void tracing_on(void) { }
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 2a595cf14f1c..d72a15c0b32c 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1283,21 +1283,6 @@ int is_tracing_stopped(void)
 	return global_trace.stop_count;
 }
 
-/**
- * ftrace_off_permanent - disable all ftrace code permanently
- *
- * This should only be called when a serious anomally has
- * been detected.  This will turn off the function tracing,
- * ring buffers, and other tracing utilites. It takes no
- * locks and can be called from any context.
- */
-void ftrace_off_permanent(void)
-{
-	tracing_disabled = 1;
-	ftrace_stop();
-	tracing_off_permanent();
-}
-
 /**
  * tracing_start - quick start of the tracer
  *
-- 
cgit v1.2.3


From 0e4ccb1505a9e29c50170742ce26ac4655baab2d Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Wed, 6 Nov 2013 16:16:56 -0500
Subject: PCI: Add x86_msi.msi_mask_irq() and msix_mask_irq()

Certain platforms do not allow writes in the MSI-X BARs to setup or tear
down vector values.  To combat against the generic code trying to write to
that and either silently being ignored or crashing due to the pagetables
being marked R/O this patch introduces a platform override.

Note that we keep two separate, non-weak, functions default_mask_msi_irqs()
and default_mask_msix_irqs() for the behavior of the arch_mask_msi_irqs()
and arch_mask_msix_irqs(), as the default behavior is needed by x86 PCI
code.

For Xen, which does not allow the guest to write to MSI-X tables - as the
hypervisor is solely responsible for setting the vector values - we
implement two nops.

This fixes a Xen guest crash when passing a PCI device with MSI-X to the
guest.  See the bugzilla for more details.

[bhelgaas: add bugzilla info]
Reference: https://bugzilla.kernel.org/show_bug.cgi?id=64581
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
CC: Sucheta Chakraborty <sucheta.chakraborty@qlogic.com>
CC: Zhenzhong Duan <zhenzhong.duan@oracle.com>
---
 arch/x86/include/asm/x86_init.h |  3 +++
 arch/x86/kernel/x86_init.c      | 10 ++++++++++
 arch/x86/pci/xen.c              | 13 ++++++++++++-
 drivers/pci/msi.c               | 22 ++++++++++++++++------
 include/linux/msi.h             |  2 ++
 5 files changed, 43 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 828a1565ba57..0f1be11e43d2 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -172,6 +172,7 @@ struct x86_platform_ops {
 
 struct pci_dev;
 struct msi_msg;
+struct msi_desc;
 
 struct x86_msi_ops {
 	int (*setup_msi_irqs)(struct pci_dev *dev, int nvec, int type);
@@ -182,6 +183,8 @@ struct x86_msi_ops {
 	void (*teardown_msi_irqs)(struct pci_dev *dev);
 	void (*restore_msi_irqs)(struct pci_dev *dev, int irq);
 	int  (*setup_hpet_msi)(unsigned int irq, unsigned int id);
+	u32 (*msi_mask_irq)(struct msi_desc *desc, u32 mask, u32 flag);
+	u32 (*msix_mask_irq)(struct msi_desc *desc, u32 flag);
 };
 
 struct IO_APIC_route_entry;
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index 8ce0072cd700..021783b1f46a 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -116,6 +116,8 @@ struct x86_msi_ops x86_msi = {
 	.teardown_msi_irqs	= default_teardown_msi_irqs,
 	.restore_msi_irqs	= default_restore_msi_irqs,
 	.setup_hpet_msi		= default_setup_hpet_msi,
+	.msi_mask_irq		= default_msi_mask_irq,
+	.msix_mask_irq		= default_msix_mask_irq,
 };
 
 /* MSI arch specific hooks */
@@ -138,6 +140,14 @@ void arch_restore_msi_irqs(struct pci_dev *dev, int irq)
 {
 	x86_msi.restore_msi_irqs(dev, irq);
 }
+u32 arch_msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag)
+{
+	return x86_msi.msi_mask_irq(desc, mask, flag);
+}
+u32 arch_msix_mask_irq(struct msi_desc *desc, u32 flag)
+{
+	return x86_msi.msix_mask_irq(desc, flag);
+}
 #endif
 
 struct x86_io_apic_ops x86_io_apic_ops = {
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index 48e8461057ba..5eee4959785d 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -382,7 +382,14 @@ static void xen_teardown_msi_irq(unsigned int irq)
 {
 	xen_destroy_irq(irq);
 }
-
+static u32 xen_nop_msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag)
+{
+	return 0;
+}
+static u32 xen_nop_msix_mask_irq(struct msi_desc *desc, u32 flag)
+{
+	return 0;
+}
 #endif
 
 int __init pci_xen_init(void)
@@ -406,6 +413,8 @@ int __init pci_xen_init(void)
 	x86_msi.setup_msi_irqs = xen_setup_msi_irqs;
 	x86_msi.teardown_msi_irq = xen_teardown_msi_irq;
 	x86_msi.teardown_msi_irqs = xen_teardown_msi_irqs;
+	x86_msi.msi_mask_irq = xen_nop_msi_mask_irq;
+	x86_msi.msix_mask_irq = xen_nop_msix_mask_irq;
 #endif
 	return 0;
 }
@@ -485,6 +494,8 @@ int __init pci_xen_initial_domain(void)
 	x86_msi.setup_msi_irqs = xen_initdom_setup_msi_irqs;
 	x86_msi.teardown_msi_irq = xen_teardown_msi_irq;
 	x86_msi.restore_msi_irqs = xen_initdom_restore_msi_irqs;
+	x86_msi.msi_mask_irq = xen_nop_msi_mask_irq;
+	x86_msi.msix_mask_irq = xen_nop_msix_mask_irq;
 #endif
 	xen_setup_acpi_sci();
 	__acpi_register_gsi = acpi_register_gsi_xen;
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 604265c40853..5e63645a7abe 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -185,7 +185,7 @@ static inline __attribute_const__ u32 msi_enabled_mask(u16 control)
  * reliably as devices without an INTx disable bit will then generate a
  * level IRQ which will never be cleared.
  */
-static u32 __msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag)
+u32 default_msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag)
 {
 	u32 mask_bits = desc->masked;
 
@@ -199,9 +199,14 @@ static u32 __msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag)
 	return mask_bits;
 }
 
+__weak u32 arch_msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag)
+{
+	return default_msi_mask_irq(desc, mask, flag);
+}
+
 static void msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag)
 {
-	desc->masked = __msi_mask_irq(desc, mask, flag);
+	desc->masked = arch_msi_mask_irq(desc, mask, flag);
 }
 
 /*
@@ -211,7 +216,7 @@ static void msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag)
  * file.  This saves a few milliseconds when initialising devices with lots
  * of MSI-X interrupts.
  */
-static u32 __msix_mask_irq(struct msi_desc *desc, u32 flag)
+u32 default_msix_mask_irq(struct msi_desc *desc, u32 flag)
 {
 	u32 mask_bits = desc->masked;
 	unsigned offset = desc->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE +
@@ -224,9 +229,14 @@ static u32 __msix_mask_irq(struct msi_desc *desc, u32 flag)
 	return mask_bits;
 }
 
+__weak u32 arch_msix_mask_irq(struct msi_desc *desc, u32 flag)
+{
+	return default_msix_mask_irq(desc, flag);
+}
+
 static void msix_mask_irq(struct msi_desc *desc, u32 flag)
 {
-	desc->masked = __msix_mask_irq(desc, flag);
+	desc->masked = arch_msix_mask_irq(desc, flag);
 }
 
 static void msi_set_mask_bit(struct irq_data *data, u32 flag)
@@ -902,7 +912,7 @@ void pci_msi_shutdown(struct pci_dev *dev)
 	pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &ctrl);
 	mask = msi_capable_mask(ctrl);
 	/* Keep cached state to be restored */
-	__msi_mask_irq(desc, mask, ~mask);
+	arch_msi_mask_irq(desc, mask, ~mask);
 
 	/* Restore dev->irq to its default pin-assertion irq */
 	dev->irq = desc->msi_attrib.default_irq;
@@ -998,7 +1008,7 @@ void pci_msix_shutdown(struct pci_dev *dev)
 	/* Return the device with MSI-X masked as initial states */
 	list_for_each_entry(entry, &dev->msi_list, list) {
 		/* Keep cached states to be restored */
-		__msix_mask_irq(entry, 1);
+		arch_msix_mask_irq(entry, 1);
 	}
 
 	msix_set_enable(dev, 0);
diff --git a/include/linux/msi.h b/include/linux/msi.h
index b17ead818aec..87cce50bd121 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -64,6 +64,8 @@ void arch_restore_msi_irqs(struct pci_dev *dev, int irq);
 
 void default_teardown_msi_irqs(struct pci_dev *dev);
 void default_restore_msi_irqs(struct pci_dev *dev, int irq);
+u32 default_msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag);
+u32 default_msix_mask_irq(struct msi_desc *desc, u32 flag);
 
 struct msi_chip {
 	struct module *owner;
-- 
cgit v1.2.3


From f83c3838b9146b891d0405d3a83660e8f6aed02f Mon Sep 17 00:00:00 2001
From: Ezequiel Garcia <ezequiel.garcia@free-electrons.com>
Date: Sun, 13 Oct 2013 18:05:23 -0300
Subject: mtd: Move major number definitions to major.h

This patch moves the char and block major number definitions
to major.h to be with the rest of the major numbers.
While doing this, include major.h in the files that need it.

Signed-off-by: Ezequiel Garcia <ezequiel.garcia@free-electrons.com>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 drivers/mtd/devices/block2mtd.c | 1 +
 drivers/mtd/mtdblock.c          | 1 +
 drivers/mtd/mtdblock_ro.c       | 1 +
 drivers/mtd/mtdchar.c           | 1 +
 drivers/mtd/mtdcore.c           | 1 +
 drivers/mtd/mtdsuper.c          | 1 +
 drivers/mtd/ubi/build.c         | 1 +
 include/linux/mtd/mtd.h         | 3 ---
 include/uapi/linux/major.h      | 2 ++
 9 files changed, 9 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/devices/block2mtd.c b/drivers/mtd/devices/block2mtd.c
index 5cb4c04726b2..d9fd87a4c8dc 100644
--- a/drivers/mtd/devices/block2mtd.c
+++ b/drivers/mtd/devices/block2mtd.c
@@ -20,6 +20,7 @@
 #include <linux/mutex.h>
 #include <linux/mount.h>
 #include <linux/slab.h>
+#include <linux/major.h>
 
 /* Info for the block device */
 struct block2mtd_dev {
diff --git a/drivers/mtd/mtdblock.c b/drivers/mtd/mtdblock.c
index 53884cc25916..485ea751c7f9 100644
--- a/drivers/mtd/mtdblock.c
+++ b/drivers/mtd/mtdblock.c
@@ -32,6 +32,7 @@
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/blktrans.h>
 #include <linux/mutex.h>
+#include <linux/major.h>
 
 
 struct mtdblk_dev {
diff --git a/drivers/mtd/mtdblock_ro.c b/drivers/mtd/mtdblock_ro.c
index 70d27b4d632b..fb5dc89369de 100644
--- a/drivers/mtd/mtdblock_ro.c
+++ b/drivers/mtd/mtdblock_ro.c
@@ -24,6 +24,7 @@
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/blktrans.h>
 #include <linux/module.h>
+#include <linux/major.h>
 
 static int mtdblock_readsect(struct mtd_blktrans_dev *dev,
 			      unsigned long block, char *buf)
diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index 684bfa39e4ee..9aa0c5e49c1d 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -32,6 +32,7 @@
 #include <linux/mount.h>
 #include <linux/blkpg.h>
 #include <linux/magic.h>
+#include <linux/major.h>
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/partitions.h>
 #include <linux/mtd/map.h>
diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index 92311a56939f..7189089d87e3 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -37,6 +37,7 @@
 #include <linux/backing-dev.h>
 #include <linux/gfp.h>
 #include <linux/slab.h>
+#include <linux/major.h>
 
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/partitions.h>
diff --git a/drivers/mtd/mtdsuper.c b/drivers/mtd/mtdsuper.c
index 334da5f583c0..20c02a3b7417 100644
--- a/drivers/mtd/mtdsuper.c
+++ b/drivers/mtd/mtdsuper.c
@@ -17,6 +17,7 @@
 #include <linux/export.h>
 #include <linux/ctype.h>
 #include <linux/slab.h>
+#include <linux/major.h>
 
 /*
  * compare superblocks to see if they're equivalent
diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c
index 315dcc6ec1f5..e05dc6298c1d 100644
--- a/drivers/mtd/ubi/build.c
+++ b/drivers/mtd/ubi/build.c
@@ -41,6 +41,7 @@
 #include <linux/kthread.h>
 #include <linux/kernel.h>
 #include <linux/slab.h>
+#include <linux/major.h>
 #include "ubi.h"
 
 /* Maximum length of the 'mtd=' parameter */
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 88409b813418..8cc0e2fb6894 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -29,9 +29,6 @@
 
 #include <asm/div64.h>
 
-#define MTD_CHAR_MAJOR 90
-#define MTD_BLOCK_MAJOR 31
-
 #define MTD_ERASE_PENDING	0x01
 #define MTD_ERASING		0x02
 #define MTD_ERASE_SUSPEND	0x04
diff --git a/include/uapi/linux/major.h b/include/uapi/linux/major.h
index 6a8ca98c9a96..620252e69b44 100644
--- a/include/uapi/linux/major.h
+++ b/include/uapi/linux/major.h
@@ -54,6 +54,7 @@
 #define ACSI_MAJOR		28
 #define AZTECH_CDROM_MAJOR	29
 #define FB_MAJOR		29   /* /dev/fb* framebuffers */
+#define MTD_BLOCK_MAJOR		31
 #define CM206_CDROM_MAJOR	32
 #define IDE2_MAJOR		33
 #define IDE3_MAJOR		34
@@ -105,6 +106,7 @@
 #define IDE6_MAJOR		88
 #define IDE7_MAJOR		89
 #define IDE8_MAJOR		90
+#define MTD_CHAR_MAJOR		90
 #define IDE9_MAJOR		91
 
 #define DASD_MAJOR		94
-- 
cgit v1.2.3


From a4d62babf988fe5dfde24437fa135ef147bc7aa0 Mon Sep 17 00:00:00 2001
From: Wang Haitao <wang.haitao1@zte.com.cn>
Date: Thu, 22 Aug 2013 19:32:38 +0800
Subject: mtd: map: fixed bug in 64-bit systems

Hardware:
	CPU: XLP832,the 64-bit OS
	NOR Flash:S29GL128S 128M
Software:
	Kernel:2.6.32.41
	Filesystem:JFFS2
When writing files, errors appear:
	Write len 182  but return retlen 180
	Write of 182 bytes at 0x072c815c failed. returned -5, retlen 180
	Write len 186  but return retlen 184
	Write of 186 bytes at 0x072caff4 failed. returned -5, retlen 184
These errors exist only in 64-bit systems,not in 32-bit systems. After analysis, we
found that the left shift operation is wrong in map_word_load_partial. For instance:
	unsigned char buf[3] ={0x9e,0x3a,0xea};
	map_bankwidth(map) is 4;
	for (i=0; i < 3; i++) {
		int bitpos;
		bitpos = (map_bankwidth(map)-1-i)*8;
		orig.x[0] &= ~(0xff << bitpos);
		orig.x[0] |= buf[i] << bitpos;
	}

The value of orig.x[0] is expected to be 0x9e3aeaff, but in this situation(64-bit
System) we'll get the wrong value of 0xffffffff9e3aeaff due to the 64-bit sign
extension:
buf[i] is defined as "unsigned char" and the left-shift operation will convert it
to the type of "signed int", so when left-shift buf[i] by 24 bits, the final result
will get the wrong value: 0xffffffff9e3aeaff.

If the left-shift bits are less than 24, then sign extension will not occur. Whereas
the bankwidth of the nor flash we used is 4, therefore this BUG emerges.

Signed-off-by: Pang Xunlei <pang.xunlei@zte.com.cn>
Signed-off-by: Zhang Yi <zhang.yi20@zte.com.cn>
Signed-off-by: Lu Zhongjun <lu.zhongjun@zte.com.cn>
Cc: <stable@vger.kernel.org>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 include/linux/mtd/map.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/map.h b/include/linux/mtd/map.h
index 4b02512e421c..5f487d776411 100644
--- a/include/linux/mtd/map.h
+++ b/include/linux/mtd/map.h
@@ -365,7 +365,7 @@ static inline map_word map_word_load_partial(struct map_info *map, map_word orig
 			bitpos = (map_bankwidth(map)-1-i)*8;
 #endif
 			orig.x[0] &= ~(0xff << bitpos);
-			orig.x[0] |= buf[i-start] << bitpos;
+			orig.x[0] |= (unsigned long)buf[i-start] << bitpos;
 		}
 	}
 	return orig;
@@ -384,7 +384,7 @@ static inline map_word map_word_ff(struct map_info *map)
 
 	if (map_bankwidth(map) < MAP_FF_LIMIT) {
 		int bw = 8 * map_bankwidth(map);
-		r.x[0] = (1 << bw) - 1;
+		r.x[0] = (1UL << bw) - 1;
 	} else {
 		for (i=0; i<map_words(map); i++)
 			r.x[i] = ~0UL;
-- 
cgit v1.2.3


From ac65caf514ec3e55e8d3d510ee37f80dd97418fe Mon Sep 17 00:00:00 2001
From: Pekon Gupta <pekon@ti.com>
Date: Thu, 24 Oct 2013 18:20:17 +0530
Subject: ARM: OMAP2+: cleaned-up DT support of various ECC schemes

OMAP NAND driver support multiple ECC scheme, which can used in different
flavours, depending on in-build Hardware engines present on SoC.

This patch updates following in DT bindings related to sectionion of ecc-schemes
- ti,elm-id: replaces elm_id (maintains backward compatibility)
- ti,nand-ecc-opts: selection of h/w or s/w implementation of an ecc-scheme
	depends on ti,elm-id. (supported values ham1, bch4, and bch8)
- maintain backward compatibility to deprecated DT bindings (sw, hw, hw-romcode)

Below table shows different flavours of ecc-schemes supported by OMAP devices
+---------------------------------------+---------------+---------------+
| ECC scheme                            |ECC calculation|Error detection|
+---------------------------------------+---------------+---------------+
|OMAP_ECC_HAM1_CODE_HW                  |H/W (GPMC)     |S/W            |
+---------------------------------------+---------------+---------------+
|OMAP_ECC_BCH8_CODE_HW_DETECTION_SW     |H/W (GPMC)     |S/W            |
|(requires CONFIG_MTD_NAND_ECC_BCH)     |               |               |
+---------------------------------------+---------------+---------------+
|OMAP_ECC_BCH8_CODE_HW                  |H/W (GPMC)     |H/W (ELM)      |
|(requires CONFIG_MTD_NAND_OMAP_BCH &&  |               |               |
| ti,elm-id in DT)                      |               |               |
+---------------------------------------+---------------+---------------+

To optimize footprint of omap2-nand driver, selection of some ECC schemes
also require enabling following Kconfigs, in addition to setting appropriate
DT bindings
- Kconfig:CONFIG_MTD_NAND_ECC_BCH        error detection done in software
- Kconfig:CONFIG_MTD_NAND_OMAP_BCH       error detection done by h/w engine

Signed-off-by: Pekon Gupta <pekon@ti.com>
Reviewed-by: Felipe Balbi <balbi@ti.com>
Acked-by: Tony Lindgren <tony@atomide.com>
Tested-by: Ezequiel Garcia <ezequiel.garcia@free-electrons.com>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 .../devicetree/bindings/mtd/gpmc-nand.txt          |  8 +++-
 arch/arm/mach-omap2/gpmc.c                         | 48 +++++++++++++++-------
 include/linux/platform_data/mtd-nand-omap2.h       | 13 +++++-
 3 files changed, 51 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/mtd/gpmc-nand.txt b/Documentation/devicetree/bindings/mtd/gpmc-nand.txt
index df338cb5059c..bfe07e152738 100644
--- a/Documentation/devicetree/bindings/mtd/gpmc-nand.txt
+++ b/Documentation/devicetree/bindings/mtd/gpmc-nand.txt
@@ -36,8 +36,12 @@ Optional properties:
 		"prefetch-dma"		Prefetch enabled sDMA mode
 		"prefetch-irq"		Prefetch enabled irq mode
 
- - elm_id:	Specifies elm device node. This is required to support BCH
- 		error correction using ELM module.
+ - elm_id:	<deprecated> use "ti,elm-id" instead
+ - ti,elm-id:	Specifies phandle of the ELM devicetree node.
+		ELM is an on-chip hardware engine on TI SoC which is used for
+		locating ECC errors for BCHx algorithms. SoC devices which have
+		ELM hardware engines should specify this device node in .dtsi
+		Using ELM for ECC error correction frees some CPU cycles.
 
 For inline partiton table parsing (optional):
 
diff --git a/arch/arm/mach-omap2/gpmc.c b/arch/arm/mach-omap2/gpmc.c
index 579697adaae7..c877129142ba 100644
--- a/arch/arm/mach-omap2/gpmc.c
+++ b/arch/arm/mach-omap2/gpmc.c
@@ -1341,14 +1341,6 @@ static void __maybe_unused gpmc_read_timings_dt(struct device_node *np,
 
 #ifdef CONFIG_MTD_NAND
 
-static const char * const nand_ecc_opts[] = {
-	[OMAP_ECC_HAMMING_CODE_DEFAULT]		= "sw",
-	[OMAP_ECC_HAMMING_CODE_HW]		= "hw",
-	[OMAP_ECC_HAMMING_CODE_HW_ROMCODE]	= "hw-romcode",
-	[OMAP_ECC_BCH4_CODE_HW]			= "bch4",
-	[OMAP_ECC_BCH8_CODE_HW]			= "bch8",
-};
-
 static const char * const nand_xfer_types[] = {
 	[NAND_OMAP_PREFETCH_POLLED]		= "prefetch-polled",
 	[NAND_OMAP_POLLED]			= "polled",
@@ -1378,13 +1370,41 @@ static int gpmc_probe_nand_child(struct platform_device *pdev,
 	gpmc_nand_data->cs = val;
 	gpmc_nand_data->of_node = child;
 
-	if (!of_property_read_string(child, "ti,nand-ecc-opt", &s))
-		for (val = 0; val < ARRAY_SIZE(nand_ecc_opts); val++)
-			if (!strcasecmp(s, nand_ecc_opts[val])) {
-				gpmc_nand_data->ecc_opt = val;
-				break;
-			}
+	/* Detect availability of ELM module */
+	gpmc_nand_data->elm_of_node = of_parse_phandle(child, "ti,elm-id", 0);
+	if (gpmc_nand_data->elm_of_node == NULL)
+		gpmc_nand_data->elm_of_node =
+					of_parse_phandle(child, "elm_id", 0);
+	if (gpmc_nand_data->elm_of_node == NULL)
+		pr_warn("%s: ti,elm-id property not found\n", __func__);
+
+	/* select ecc-scheme for NAND */
+	if (of_property_read_string(child, "ti,nand-ecc-opt", &s)) {
+		pr_err("%s: ti,nand-ecc-opt not found\n", __func__);
+		return -ENODEV;
+	}
+	if (!strcmp(s, "ham1") || !strcmp(s, "sw") ||
+		!strcmp(s, "hw") || !strcmp(s, "hw-romcode"))
+		gpmc_nand_data->ecc_opt =
+				OMAP_ECC_HAM1_CODE_HW;
+	else if (!strcmp(s, "bch4"))
+		if (gpmc_nand_data->elm_of_node)
+			gpmc_nand_data->ecc_opt =
+				OMAP_ECC_BCH4_CODE_HW;
+		else
+			gpmc_nand_data->ecc_opt =
+				OMAP_ECC_BCH4_CODE_HW_DETECTION_SW;
+	else if (!strcmp(s, "bch8"))
+		if (gpmc_nand_data->elm_of_node)
+			gpmc_nand_data->ecc_opt =
+				OMAP_ECC_BCH8_CODE_HW;
+		else
+			gpmc_nand_data->ecc_opt =
+				OMAP_ECC_BCH8_CODE_HW_DETECTION_SW;
+	else
+		pr_err("%s: ti,nand-ecc-opt invalid value\n", __func__);
 
+	/* select data transfer mode for NAND controller */
 	if (!of_property_read_string(child, "ti,nand-xfer-type", &s))
 		for (val = 0; val < ARRAY_SIZE(nand_xfer_types); val++)
 			if (!strcasecmp(s, nand_xfer_types[val])) {
diff --git a/include/linux/platform_data/mtd-nand-omap2.h b/include/linux/platform_data/mtd-nand-omap2.h
index 6bf9ef43ddb1..e4128f1510bf 100644
--- a/include/linux/platform_data/mtd-nand-omap2.h
+++ b/include/linux/platform_data/mtd-nand-omap2.h
@@ -28,8 +28,16 @@ enum omap_ecc {
 	OMAP_ECC_HAMMING_CODE_HW, /* gpmc to detect the error */
 		/* 1-bit ecc: stored at beginning of spare area as romcode */
 	OMAP_ECC_HAMMING_CODE_HW_ROMCODE, /* gpmc method & romcode layout */
-	OMAP_ECC_BCH4_CODE_HW, /* 4-bit BCH ecc code */
-	OMAP_ECC_BCH8_CODE_HW, /* 8-bit BCH ecc code */
+	/* 1-bit  ECC calculation by GPMC, Error detection by Software */
+	OMAP_ECC_HAM1_CODE_HW,
+	/* 4-bit  ECC calculation by GPMC, Error detection by Software */
+	OMAP_ECC_BCH4_CODE_HW_DETECTION_SW,
+	/* 4-bit  ECC calculation by GPMC, Error detection by ELM */
+	OMAP_ECC_BCH4_CODE_HW,
+	/* 8-bit  ECC calculation by GPMC, Error detection by Software */
+	OMAP_ECC_BCH8_CODE_HW_DETECTION_SW,
+	/* 8-bit  ECC calculation by GPMC, Error detection by ELM */
+	OMAP_ECC_BCH8_CODE_HW,
 };
 
 struct gpmc_nand_regs {
@@ -63,5 +71,6 @@ struct omap_nand_platform_data {
 
 	/* for passing the partitions */
 	struct device_node	*of_node;
+	struct device_node	*elm_of_node;
 };
 #endif
-- 
cgit v1.2.3


From c66d039197e42af8867e5d0d9b904daf0fb9e6bc Mon Sep 17 00:00:00 2001
From: Pekon Gupta <pekon@ti.com>
Date: Thu, 24 Oct 2013 18:20:18 +0530
Subject: mtd: nand: omap: combine different flavours of 1-bit hamming ecc
 schemes

OMAP NAND driver currently supports multiple flavours of 1-bit Hamming
ecc-scheme, like:
- OMAP_ECC_HAMMING_CODE_DEFAULT
	1-bit hamming ecc code using software library
- OMAP_ECC_HAMMING_CODE_HW
	1-bit hamming ecc-code using GPMC h/w engine
- OMAP_ECC_HAMMING_CODE_HW_ROMCODE
	1-bit hamming ecc-code using GPMC h/w engin with ecc-layout compatible
	to ROM code.

This patch combines above multiple ecc-schemes into single implementation:
- OMAP_ECC_HAM1_CODE_HW
	1-bit hamming ecc-code using GPMC h/w engine with ROM-code compatible
	ecc-layout.

Signed-off-by: Pekon Gupta <pekon@ti.com>
Reviewed-by: Felipe Balbi <balbi@ti.com>
Acked-by: Tony Lindgren <tony@atomide.com>
Tested-by: Ezequiel Garcia <ezequiel.garcia@free-electrons.com>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 Documentation/devicetree/bindings/mtd/gpmc-nand.txt | 8 ++++----
 arch/arm/mach-omap2/board-flash.c                   | 2 +-
 drivers/mtd/nand/omap2.c                            | 9 +++------
 include/linux/platform_data/mtd-nand-omap2.h        | 7 +------
 4 files changed, 9 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/mtd/gpmc-nand.txt b/Documentation/devicetree/bindings/mtd/gpmc-nand.txt
index bfe07e152738..5e1f31b5ff70 100644
--- a/Documentation/devicetree/bindings/mtd/gpmc-nand.txt
+++ b/Documentation/devicetree/bindings/mtd/gpmc-nand.txt
@@ -22,10 +22,10 @@ Optional properties:
 				width of 8 is assumed.
 
  - ti,nand-ecc-opt:		A string setting the ECC layout to use. One of:
-
-		"sw"		Software method (default)
-		"hw"		Hardware method
-		"hw-romcode"	gpmc hamming mode method & romcode layout
+		"sw"		<deprecated> use "ham1" instead
+		"hw"		<deprecated> use "ham1" instead
+		"hw-romcode"	<deprecated> use "ham1" instead
+		"ham1"		1-bit Hamming ecc code
 		"bch4"		4-bit BCH ecc code
 		"bch8"		8-bit BCH ecc code
 
diff --git a/arch/arm/mach-omap2/board-flash.c b/arch/arm/mach-omap2/board-flash.c
index fc20a61f6b2a..ac82512b9c8c 100644
--- a/arch/arm/mach-omap2/board-flash.c
+++ b/arch/arm/mach-omap2/board-flash.c
@@ -142,7 +142,7 @@ __init board_nand_init(struct mtd_partition *nand_parts, u8 nr_parts, u8 cs,
 	board_nand_data.nr_parts	= nr_parts;
 	board_nand_data.devsize		= nand_type;
 
-	board_nand_data.ecc_opt = OMAP_ECC_HAMMING_CODE_DEFAULT;
+	board_nand_data.ecc_opt = OMAP_ECC_BCH8_CODE_HW;
 	gpmc_nand_init(&board_nand_data, gpmc_t);
 }
 #endif /* CONFIG_MTD_NAND_OMAP2 || CONFIG_MTD_NAND_OMAP2_MODULE */
diff --git a/drivers/mtd/nand/omap2.c b/drivers/mtd/nand/omap2.c
index 4ecf0e5fd484..8d521aa001c8 100644
--- a/drivers/mtd/nand/omap2.c
+++ b/drivers/mtd/nand/omap2.c
@@ -1993,10 +1993,7 @@ static int omap_nand_probe(struct platform_device *pdev)
 	}
 
 	/* select the ecc type */
-	if (pdata->ecc_opt == OMAP_ECC_HAMMING_CODE_DEFAULT)
-		info->nand.ecc.mode = NAND_ECC_SOFT;
-	else if ((pdata->ecc_opt == OMAP_ECC_HAMMING_CODE_HW) ||
-		(pdata->ecc_opt == OMAP_ECC_HAMMING_CODE_HW_ROMCODE)) {
+	if (pdata->ecc_opt == OMAP_ECC_HAM1_CODE_HW) {
 		info->nand.ecc.bytes            = 3;
 		info->nand.ecc.size             = 512;
 		info->nand.ecc.strength         = 1;
@@ -2025,7 +2022,7 @@ static int omap_nand_probe(struct platform_device *pdev)
 	}
 
 	/* rom code layout */
-	if (pdata->ecc_opt == OMAP_ECC_HAMMING_CODE_HW_ROMCODE) {
+	if (pdata->ecc_opt == OMAP_ECC_HAM1_CODE_HW) {
 
 		if (info->nand.options & NAND_BUSWIDTH_16)
 			offset = 2;
@@ -2033,7 +2030,7 @@ static int omap_nand_probe(struct platform_device *pdev)
 			offset = 1;
 			info->nand.badblock_pattern = &bb_descrip_flashbased;
 		}
-		omap_oobinfo.eccbytes = 3 * (info->mtd.oobsize/16);
+		omap_oobinfo.eccbytes = 3 * (info->mtd.writesize / 512);
 		for (i = 0; i < omap_oobinfo.eccbytes; i++)
 			omap_oobinfo.eccpos[i] = i+offset;
 
diff --git a/include/linux/platform_data/mtd-nand-omap2.h b/include/linux/platform_data/mtd-nand-omap2.h
index e4128f1510bf..4da5bfa2147f 100644
--- a/include/linux/platform_data/mtd-nand-omap2.h
+++ b/include/linux/platform_data/mtd-nand-omap2.h
@@ -23,13 +23,8 @@ enum nand_io {
 };
 
 enum omap_ecc {
-		/* 1-bit ecc: stored at end of spare area */
-	OMAP_ECC_HAMMING_CODE_DEFAULT = 0, /* Default, s/w method */
-	OMAP_ECC_HAMMING_CODE_HW, /* gpmc to detect the error */
-		/* 1-bit ecc: stored at beginning of spare area as romcode */
-	OMAP_ECC_HAMMING_CODE_HW_ROMCODE, /* gpmc method & romcode layout */
 	/* 1-bit  ECC calculation by GPMC, Error detection by Software */
-	OMAP_ECC_HAM1_CODE_HW,
+	OMAP_ECC_HAM1_CODE_HW = 0,
 	/* 4-bit  ECC calculation by GPMC, Error detection by Software */
 	OMAP_ECC_BCH4_CODE_HW_DETECTION_SW,
 	/* 4-bit  ECC calculation by GPMC, Error detection by ELM */
-- 
cgit v1.2.3


From 5961ad2cb4dd14933889f5219e0d8505669d752d Mon Sep 17 00:00:00 2001
From: Brian Norris <computersforpeace@gmail.com>
Date: Wed, 30 Oct 2013 00:41:30 -0400
Subject: mtd: nand_bbt: kill NAND_BBT_SCANALLPAGES

Now that the last user of NAND_BBT_SCANALLPAGES has been removed, let's
kill this peculiar BBT feature flag.

Signed-off-by: Brian Norris <computersforpeace@gmail.com>
Reviewed-by: Ezequiel Garcia <ezequiel.garcia@free-electrons.com>
Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
---
 Documentation/DocBook/mtdnand.tmpl |  2 --
 drivers/mtd/nand/nand_bbt.c        | 37 +++----------------------------------
 include/linux/mtd/bbm.h            |  2 --
 3 files changed, 3 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/DocBook/mtdnand.tmpl b/Documentation/DocBook/mtdnand.tmpl
index a248f42a121e..cd11926e07c7 100644
--- a/Documentation/DocBook/mtdnand.tmpl
+++ b/Documentation/DocBook/mtdnand.tmpl
@@ -1222,8 +1222,6 @@ in this page</entry>
 #define NAND_BBT_VERSION	0x00000100
 /* Create a bbt if none axists */
 #define NAND_BBT_CREATE		0x00000200
-/* Search good / bad pattern through all pages of a block */
-#define NAND_BBT_SCANALLPAGES	0x00000400
 /* Write bbt if neccecary */
 #define NAND_BBT_WRITE		0x00001000
 /* Read and write back block contents when writing bbt */
diff --git a/drivers/mtd/nand/nand_bbt.c b/drivers/mtd/nand/nand_bbt.c
index c75b6a7c6ea4..c0615d1526f9 100644
--- a/drivers/mtd/nand/nand_bbt.c
+++ b/drivers/mtd/nand/nand_bbt.c
@@ -412,25 +412,6 @@ static void read_abs_bbts(struct mtd_info *mtd, uint8_t *buf,
 	}
 }
 
-/* Scan a given block full */
-static int scan_block_full(struct mtd_info *mtd, struct nand_bbt_descr *bd,
-			   loff_t offs, uint8_t *buf, size_t readlen,
-			   int scanlen, int numpages)
-{
-	int ret, j;
-
-	ret = scan_read_oob(mtd, buf, offs, readlen);
-	/* Ignore ECC errors when checking for BBM */
-	if (ret && !mtd_is_bitflip_or_eccerr(ret))
-		return ret;
-
-	for (j = 0; j < numpages; j++, buf += scanlen) {
-		if (check_pattern(buf, scanlen, mtd->writesize, bd))
-			return 1;
-	}
-	return 0;
-}
-
 /* Scan a given block partially */
 static int scan_block_fast(struct mtd_info *mtd, struct nand_bbt_descr *bd,
 			   loff_t offs, uint8_t *buf, int numpages)
@@ -477,24 +458,17 @@ static int create_bbt(struct mtd_info *mtd, uint8_t *buf,
 	struct nand_bbt_descr *bd, int chip)
 {
 	struct nand_chip *this = mtd->priv;
-	int i, numblocks, numpages, scanlen;
+	int i, numblocks, numpages;
 	int startblock;
 	loff_t from;
-	size_t readlen;
 
 	pr_info("Scanning device for bad blocks\n");
 
-	if (bd->options & NAND_BBT_SCANALLPAGES)
-		numpages = 1 << (this->bbt_erase_shift - this->page_shift);
-	else if (bd->options & NAND_BBT_SCAN2NDPAGE)
+	if (bd->options & NAND_BBT_SCAN2NDPAGE)
 		numpages = 2;
 	else
 		numpages = 1;
 
-	/* We need only read few bytes from the OOB area */
-	scanlen = 0;
-	readlen = bd->len;
-
 	if (chip == -1) {
 		numblocks = mtd->size >> this->bbt_erase_shift;
 		startblock = 0;
@@ -519,12 +493,7 @@ static int create_bbt(struct mtd_info *mtd, uint8_t *buf,
 
 		BUG_ON(bd->options & NAND_BBT_NO_OOB);
 
-		if (bd->options & NAND_BBT_SCANALLPAGES)
-			ret = scan_block_full(mtd, bd, from, buf, readlen,
-					      scanlen, numpages);
-		else
-			ret = scan_block_fast(mtd, bd, from, buf, numpages);
-
+		ret = scan_block_fast(mtd, bd, from, buf, numpages);
 		if (ret < 0)
 			return ret;
 
diff --git a/include/linux/mtd/bbm.h b/include/linux/mtd/bbm.h
index 95fc482cef36..36bb6a503f19 100644
--- a/include/linux/mtd/bbm.h
+++ b/include/linux/mtd/bbm.h
@@ -91,8 +91,6 @@ struct nand_bbt_descr {
  * with NAND_BBT_CREATE.
  */
 #define NAND_BBT_CREATE_EMPTY	0x00000400
-/* Search good / bad pattern through all pages of a block */
-#define NAND_BBT_SCANALLPAGES	0x00000800
 /* Write bbt if neccecary */
 #define NAND_BBT_WRITE		0x00002000
 /* Read and write back block contents when writing bbt */
-- 
cgit v1.2.3


From 5702941eec32cfd7b8cf9e36a0936e48170011a4 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Fri, 5 Jul 2013 00:31:36 +0800
Subject: irqchip: bcm2835: Convert to use IRQCHIP_DECLARE macro

This patch converts irq-bcm2835 driver to use the new IRQCHIP_DECLARE and
irqchip_init.

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Tested-by: Stephen Warren <swarren@wwwdotorg.org>
Cc: Simon Arlott <simon@fire.lp0.eu>
Cc: Olof Johansson <olof@lixom.net>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: linux-rpi-kernel@lists.infradead.org
Cc: linux-arm-kernel@lists.infradead.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/arm/mach-bcm2835/bcm2835.c |  5 ++---
 drivers/irqchip/irq-bcm2835.c   | 22 ++++++++++------------
 include/linux/irqchip/bcm2835.h | 29 -----------------------------
 3 files changed, 12 insertions(+), 44 deletions(-)
 delete mode 100644 include/linux/irqchip/bcm2835.h

(limited to 'include/linux')

diff --git a/arch/arm/mach-bcm2835/bcm2835.c b/arch/arm/mach-bcm2835/bcm2835.c
index 40686d7ef500..a4abd851905a 100644
--- a/arch/arm/mach-bcm2835/bcm2835.c
+++ b/arch/arm/mach-bcm2835/bcm2835.c
@@ -14,7 +14,7 @@
 
 #include <linux/delay.h>
 #include <linux/init.h>
-#include <linux/irqchip/bcm2835.h>
+#include <linux/irqchip.h>
 #include <linux/of_address.h>
 #include <linux/of_platform.h>
 #include <linux/clk/bcm2835.h>
@@ -131,8 +131,7 @@ static const char * const bcm2835_compat[] = {
 
 DT_MACHINE_START(BCM2835, "BCM2835")
 	.map_io = bcm2835_map_io,
-	.init_irq = bcm2835_init_irq,
-	.handle_irq = bcm2835_handle_irq,
+	.init_irq = irqchip_init,
 	.init_machine = bcm2835_init,
 	.init_time = clocksource_of_init,
 	.restart = bcm2835_restart,
diff --git a/drivers/irqchip/irq-bcm2835.c b/drivers/irqchip/irq-bcm2835.c
index 16c78f1c5ef2..1693b8e7f26a 100644
--- a/drivers/irqchip/irq-bcm2835.c
+++ b/drivers/irqchip/irq-bcm2835.c
@@ -49,9 +49,11 @@
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
 #include <linux/irqdomain.h>
-#include <linux/irqchip/bcm2835.h>
 
 #include <asm/exception.h>
+#include <asm/mach/irq.h>
+
+#include "irqchip.h"
 
 /* Put the bank and irq (32 bits) into the hwirq */
 #define MAKE_HWIRQ(b, n)	((b << 5) | (n))
@@ -93,6 +95,8 @@ struct armctrl_ic {
 };
 
 static struct armctrl_ic intc __read_mostly;
+static asmlinkage void __exception_irq_entry bcm2835_handle_irq(
+	struct pt_regs *regs);
 
 static void armctrl_mask_irq(struct irq_data *d)
 {
@@ -164,17 +168,9 @@ static int __init armctrl_of_init(struct device_node *node,
 			set_irq_flags(irq, IRQF_VALID | IRQF_PROBE);
 		}
 	}
-	return 0;
-}
-
-static struct of_device_id irq_of_match[] __initconst = {
-	{ .compatible = "brcm,bcm2835-armctrl-ic", .data = armctrl_of_init },
-	{ }
-};
 
-void __init bcm2835_init_irq(void)
-{
-	of_irq_init(irq_of_match);
+	set_handle_irq(bcm2835_handle_irq);
+	return 0;
 }
 
 /*
@@ -200,7 +196,7 @@ static void armctrl_handle_shortcut(int bank, struct pt_regs *regs,
 	handle_IRQ(irq_linear_revmap(intc.domain, irq), regs);
 }
 
-asmlinkage void __exception_irq_entry bcm2835_handle_irq(
+static asmlinkage void __exception_irq_entry bcm2835_handle_irq(
 	struct pt_regs *regs)
 {
 	u32 stat, irq;
@@ -222,3 +218,5 @@ asmlinkage void __exception_irq_entry bcm2835_handle_irq(
 		}
 	}
 }
+
+IRQCHIP_DECLARE(bcm2835_armctrl_ic, "brcm,bcm2835-armctrl-ic", armctrl_of_init);
diff --git a/include/linux/irqchip/bcm2835.h b/include/linux/irqchip/bcm2835.h
deleted file mode 100644
index 48a859bc9dca..000000000000
--- a/include/linux/irqchip/bcm2835.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Copyright (C) 2010 Broadcom
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-
-#ifndef __LINUX_IRQCHIP_BCM2835_H_
-#define __LINUX_IRQCHIP_BCM2835_H_
-
-#include <asm/exception.h>
-
-extern void bcm2835_init_irq(void);
-
-extern asmlinkage void __exception_irq_entry bcm2835_handle_irq(
-	struct pt_regs *regs);
-
-#endif
-- 
cgit v1.2.3


From c6bde215acfd637708142ae671843b6f0eadbc6d Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Wed, 6 Nov 2013 10:11:48 -0700
Subject: PCI: Add pci_upstream_bridge()

This adds a pci_upstream_bridge() interface to find the PCI-to-PCI bridge
upstream from a device.  This is typically just "dev->bus->self", but in
the case of a VF on a virtual bus, we have to start from the corresponding
PF.  Returns NULL if there is no upstream PCI bridge, i.e., if the device
is on a root bus.

Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Yinghai Lu <yinghai@kernel.org>
---
 include/linux/pci.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index d3a888ae4b2e..835ec7bf6c05 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -480,6 +480,15 @@ static inline bool pci_is_root_bus(struct pci_bus *pbus)
 	return !(pbus->parent);
 }
 
+static inline struct pci_dev *pci_upstream_bridge(struct pci_dev *dev)
+{
+	dev = pci_physfn(dev);
+	if (pci_is_root_bus(dev->bus))
+		return NULL;
+
+	return dev->bus->self;
+}
+
 #ifdef CONFIG_PCI_MSI
 static inline bool pci_dev_msi_enabled(struct pci_dev *pci_dev)
 {
-- 
cgit v1.2.3


From d32435391974e39c35ade4d115f17c538a96a708 Mon Sep 17 00:00:00 2001
From: Eyal Perry <eyalpe@mellanox.com>
Date: Wed, 6 Nov 2013 15:37:23 +0200
Subject: net/vlan: Provide read access to the vlan egress map

Provide a method for read-only access to the vlan device egress mapping.

Do this by refactoring vlan_dev_get_egress_qos_mask() such that now it
receives as an argument the skb priority instead of pointer to the skb.

Such an access is needed for the IBoE stack where the control plane
goes through the network stack. This is an add-on step on top of commit
d4a968658c "net/route: export symbol ip_tos2prio" which allowed the RDMA-CM
to use ip_tos2prio.

Signed-off-by: Eyal Perry <eyalpe@mellanox.com>
Signed-off-by: Hadar Hen Zion <hadarh@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_vlan.h |  9 ++++++++-
 net/8021q/vlan_dev.c    | 18 ++++++++++++------
 2 files changed, 20 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 715c343f7c00..f3088a0112cf 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -88,7 +88,8 @@ extern struct net_device *__vlan_find_dev_deep(struct net_device *real_dev,
 					       __be16 vlan_proto, u16 vlan_id);
 extern struct net_device *vlan_dev_real_dev(const struct net_device *dev);
 extern u16 vlan_dev_vlan_id(const struct net_device *dev);
-
+extern u16 vlan_dev_get_egress_qos_mask(struct net_device *dev,
+					u32 skprio);
 extern bool vlan_do_receive(struct sk_buff **skb);
 extern struct sk_buff *vlan_untag(struct sk_buff *skb);
 
@@ -121,6 +122,12 @@ static inline u16 vlan_dev_vlan_id(const struct net_device *dev)
 	return 0;
 }
 
+static inline u16 vlan_dev_get_egress_qos_mask(struct net_device *dev,
+					       u32 skprio)
+{
+	return 0;
+}
+
 static inline bool vlan_do_receive(struct sk_buff **skb)
 {
 	return false;
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 09bf1c38805b..13904a414929 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -69,15 +69,15 @@ static int vlan_dev_rebuild_header(struct sk_buff *skb)
 }
 
 static inline u16
-vlan_dev_get_egress_qos_mask(struct net_device *dev, struct sk_buff *skb)
+__vlan_dev_get_egress_qos_mask(struct net_device *dev, u32 skprio)
 {
 	struct vlan_priority_tci_mapping *mp;
 
 	smp_rmb(); /* coupled with smp_wmb() in vlan_dev_set_egress_priority() */
 
-	mp = vlan_dev_priv(dev)->egress_priority_map[(skb->priority & 0xF)];
+	mp = vlan_dev_priv(dev)->egress_priority_map[(skprio & 0xF)];
 	while (mp) {
-		if (mp->priority == skb->priority) {
+		if (mp->priority == skprio) {
 			return mp->vlan_qos; /* This should already be shifted
 					      * to mask correctly with the
 					      * VLAN's TCI */
@@ -87,6 +87,12 @@ vlan_dev_get_egress_qos_mask(struct net_device *dev, struct sk_buff *skb)
 	return 0;
 }
 
+u16 vlan_dev_get_egress_qos_mask(struct net_device *dev, u32 skprio)
+{
+	return __vlan_dev_get_egress_qos_mask(dev, skprio);
+}
+EXPORT_SYMBOL(vlan_dev_get_egress_qos_mask);
+
 /*
  *	Create the VLAN header for an arbitrary protocol layer
  *
@@ -111,7 +117,7 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
 		vhdr = (struct vlan_hdr *) skb_push(skb, VLAN_HLEN);
 
 		vlan_tci = vlan->vlan_id;
-		vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb);
+		vlan_tci |= __vlan_dev_get_egress_qos_mask(dev, skb->priority);
 		vhdr->h_vlan_TCI = htons(vlan_tci);
 
 		/*
@@ -168,7 +174,7 @@ static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb,
 	    vlan->flags & VLAN_FLAG_REORDER_HDR) {
 		u16 vlan_tci;
 		vlan_tci = vlan->vlan_id;
-		vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb);
+		vlan_tci |= __vlan_dev_get_egress_qos_mask(dev, skb->priority);
 		skb = __vlan_hwaccel_put_tag(skb, vlan->vlan_proto, vlan_tci);
 	}
 
@@ -253,7 +259,7 @@ int vlan_dev_set_egress_priority(const struct net_device *dev,
 	np->vlan_qos = vlan_qos;
 	/* Before inserting this element in hash table, make sure all its fields
 	 * are committed to memory.
-	 * coupled with smp_rmb() in vlan_dev_get_egress_qos_mask()
+	 * coupled with smp_rmb() in __vlan_dev_get_egress_qos_mask()
 	 */
 	smp_wmb();
 	vlan->egress_priority_map[skb_prio & 0xF] = np;
-- 
cgit v1.2.3


From df42153c59a38a65c6f7440d5c70d87d1c24438d Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Thu, 7 Nov 2013 10:48:49 +0300
Subject: net: make ndev->irq signed for error handling

There is a bug in cpsw_probe() where we do:

	ndev->irq = platform_get_irq(pdev, 0);
	if (ndev->irq < 0) {

The problem is that "ndev->irq" is unsigned so the error handling
doesn't work.  I have changed it to a regular int.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index e6353cafbf05..b6f6efbcfc74 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1132,7 +1132,7 @@ struct net_device {
 	unsigned long		mem_end;	/* shared mem end	*/
 	unsigned long		mem_start;	/* shared mem start	*/
 	unsigned long		base_addr;	/* device I/O address	*/
-	unsigned int		irq;		/* device IRQ number	*/
+	int			irq;		/* device IRQ number	*/
 
 	/*
 	 *	Some hardware also needs these fields, but they are not
-- 
cgit v1.2.3


From a6cc0cfa72e0b6d9f2c8fd858aacc32313c4f272 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.r.fastabend@intel.com>
Date: Wed, 6 Nov 2013 09:54:46 -0800
Subject: net: Add layer 2 hardware acceleration operations for macvlan devices

Add a operations structure that allows a network interface to export
the fact that it supports package forwarding in hardware between
physical interfaces and other mac layer devices assigned to it (such
as macvlans). This operaions structure can be used by virtual mac
devices to bypass software switching so that forwarding can be done
in hardware more efficiently.

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
CC: Andy Gospodarek <andy@greyhouse.net>
CC: "David S. Miller" <davem@davemloft.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/macvlan.c           | 36 +++++++++++++++++++++++++++++++++++-
 include/linux/if_macvlan.h      |  1 +
 include/linux/netdev_features.h |  2 ++
 include/linux/netdevice.h       | 36 +++++++++++++++++++++++++++++++++++-
 include/uapi/linux/if.h         |  1 +
 net/core/dev.c                  | 18 +++++++++++++-----
 net/core/ethtool.c              |  1 +
 net/sched/sch_generic.c         |  2 +-
 8 files changed, 89 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index cc9845ec91c1..af4aaa5893ff 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -297,7 +297,13 @@ netdev_tx_t macvlan_start_xmit(struct sk_buff *skb,
 	int ret;
 	const struct macvlan_dev *vlan = netdev_priv(dev);
 
-	ret = macvlan_queue_xmit(skb, dev);
+	if (vlan->fwd_priv) {
+		skb->dev = vlan->lowerdev;
+		ret = dev_hard_start_xmit(skb, skb->dev, NULL, vlan->fwd_priv);
+	} else {
+		ret = macvlan_queue_xmit(skb, dev);
+	}
+
 	if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) {
 		struct macvlan_pcpu_stats *pcpu_stats;
 
@@ -347,6 +353,21 @@ static int macvlan_open(struct net_device *dev)
 		goto hash_add;
 	}
 
+	if (lowerdev->features & NETIF_F_HW_L2FW_DOFFLOAD) {
+		vlan->fwd_priv =
+		      lowerdev->netdev_ops->ndo_dfwd_add_station(lowerdev, dev);
+
+		/* If we get a NULL pointer back, or if we get an error
+		 * then we should just fall through to the non accelerated path
+		 */
+		if (IS_ERR_OR_NULL(vlan->fwd_priv)) {
+			vlan->fwd_priv = NULL;
+		} else {
+			dev->features &= ~NETIF_F_LLTX;
+			return 0;
+		}
+	}
+
 	err = -EBUSY;
 	if (macvlan_addr_busy(vlan->port, dev->dev_addr))
 		goto out;
@@ -367,6 +388,11 @@ hash_add:
 del_unicast:
 	dev_uc_del(lowerdev, dev->dev_addr);
 out:
+	if (vlan->fwd_priv) {
+		lowerdev->netdev_ops->ndo_dfwd_del_station(lowerdev,
+							   vlan->fwd_priv);
+		vlan->fwd_priv = NULL;
+	}
 	return err;
 }
 
@@ -375,6 +401,13 @@ static int macvlan_stop(struct net_device *dev)
 	struct macvlan_dev *vlan = netdev_priv(dev);
 	struct net_device *lowerdev = vlan->lowerdev;
 
+	if (vlan->fwd_priv) {
+		lowerdev->netdev_ops->ndo_dfwd_del_station(lowerdev,
+							   vlan->fwd_priv);
+		vlan->fwd_priv = NULL;
+		return 0;
+	}
+
 	dev_uc_unsync(lowerdev, dev);
 	dev_mc_unsync(lowerdev, dev);
 
@@ -833,6 +866,7 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
 	if (err < 0)
 		goto destroy_port;
 
+	dev->priv_flags |= IFF_MACVLAN;
 	err = netdev_upper_dev_link(lowerdev, dev);
 	if (err)
 		goto destroy_port;
diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h
index ddd33fd5904d..c2702856295e 100644
--- a/include/linux/if_macvlan.h
+++ b/include/linux/if_macvlan.h
@@ -61,6 +61,7 @@ struct macvlan_dev {
 	struct hlist_node	hlist;
 	struct macvlan_port	*port;
 	struct net_device	*lowerdev;
+	void			*fwd_priv;
 	struct macvlan_pcpu_stats __percpu *pcpu_stats;
 
 	DECLARE_BITMAP(mc_filter, MACVLAN_MC_FILTER_SZ);
diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h
index b05a4b501ab5..1005ebf17575 100644
--- a/include/linux/netdev_features.h
+++ b/include/linux/netdev_features.h
@@ -62,6 +62,7 @@ enum {
 	NETIF_F_HW_VLAN_STAG_TX_BIT,	/* Transmit VLAN STAG HW acceleration */
 	NETIF_F_HW_VLAN_STAG_RX_BIT,	/* Receive VLAN STAG HW acceleration */
 	NETIF_F_HW_VLAN_STAG_FILTER_BIT,/* Receive filtering on VLAN STAGs */
+	NETIF_F_HW_L2FW_DOFFLOAD_BIT,	/* Allow L2 Forwarding in Hardware */
 
 	/*
 	 * Add your fresh new feature above and remember to update
@@ -116,6 +117,7 @@ enum {
 #define NETIF_F_HW_VLAN_STAG_FILTER __NETIF_F(HW_VLAN_STAG_FILTER)
 #define NETIF_F_HW_VLAN_STAG_RX	__NETIF_F(HW_VLAN_STAG_RX)
 #define NETIF_F_HW_VLAN_STAG_TX	__NETIF_F(HW_VLAN_STAG_TX)
+#define NETIF_F_HW_L2FW_DOFFLOAD	__NETIF_F(HW_L2FW_DOFFLOAD)
 
 /* Features valid for ethtool to change */
 /* = all defined minus driver/device-class-related */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index b6f6efbcfc74..15fa01c9a3bf 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -962,6 +962,25 @@ struct netdev_phys_port_id {
  *	Called by vxlan to notify the driver about a UDP port and socket
  *	address family that vxlan is not listening to anymore. The operation
  *	is protected by the vxlan_net->sock_lock.
+ *
+ * void* (*ndo_dfwd_add_station)(struct net_device *pdev,
+ *				 struct net_device *dev)
+ *	Called by upper layer devices to accelerate switching or other
+ *	station functionality into hardware. 'pdev is the lowerdev
+ *	to use for the offload and 'dev' is the net device that will
+ *	back the offload. Returns a pointer to the private structure
+ *	the upper layer will maintain.
+ * void (*ndo_dfwd_del_station)(struct net_device *pdev, void *priv)
+ *	Called by upper layer device to delete the station created
+ *	by 'ndo_dfwd_add_station'. 'pdev' is the net device backing
+ *	the station and priv is the structure returned by the add
+ *	operation.
+ * netdev_tx_t (*ndo_dfwd_start_xmit)(struct sk_buff *skb,
+ *				      struct net_device *dev,
+ *				      void *priv);
+ *	Callback to use for xmit over the accelerated station. This
+ *	is used in place of ndo_start_xmit on accelerated net
+ *	devices.
  */
 struct net_device_ops {
 	int			(*ndo_init)(struct net_device *dev);
@@ -1098,6 +1117,15 @@ struct net_device_ops {
 	void			(*ndo_del_vxlan_port)(struct  net_device *dev,
 						      sa_family_t sa_family,
 						      __be16 port);
+
+	void*			(*ndo_dfwd_add_station)(struct net_device *pdev,
+							struct net_device *dev);
+	void			(*ndo_dfwd_del_station)(struct net_device *pdev,
+							void *priv);
+
+	netdev_tx_t		(*ndo_dfwd_start_xmit) (struct sk_buff *skb,
+							struct net_device *dev,
+							void *priv);
 };
 
 /*
@@ -1195,6 +1223,7 @@ struct net_device {
 	/* Management operations */
 	const struct net_device_ops *netdev_ops;
 	const struct ethtool_ops *ethtool_ops;
+	const struct forwarding_accel_ops *fwd_ops;
 
 	/* Hardware header description */
 	const struct header_ops *header_ops;
@@ -2388,7 +2417,7 @@ int dev_change_carrier(struct net_device *, bool new_carrier);
 int dev_get_phys_port_id(struct net_device *dev,
 			 struct netdev_phys_port_id *ppid);
 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
-			struct netdev_queue *txq);
+			struct netdev_queue *txq, void *accel_priv);
 int dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
 
 extern int		netdev_budget;
@@ -2967,6 +2996,11 @@ static inline void netif_set_gso_max_size(struct net_device *dev,
 	dev->gso_max_size = size;
 }
 
+static inline bool netif_is_macvlan(struct net_device *dev)
+{
+	return dev->priv_flags & IFF_MACVLAN;
+}
+
 static inline bool netif_is_bond_master(struct net_device *dev)
 {
 	return dev->flags & IFF_MASTER && dev->priv_flags & IFF_BONDING;
diff --git a/include/uapi/linux/if.h b/include/uapi/linux/if.h
index 1ec407b01e46..d758163b0e43 100644
--- a/include/uapi/linux/if.h
+++ b/include/uapi/linux/if.h
@@ -83,6 +83,7 @@
 #define IFF_SUPP_NOFCS	0x80000		/* device supports sending custom FCS */
 #define IFF_LIVE_ADDR_CHANGE 0x100000	/* device supports hardware address
 					 * change when it's running */
+#define IFF_MACVLAN 0x200000		/* Macvlan device */
 
 
 #define IF_GET_IFACE	0x0001		/* for querying only */
diff --git a/net/core/dev.c b/net/core/dev.c
index 0e6136546a8c..8ffc52e01ece 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2538,7 +2538,7 @@ static inline int skb_needs_linearize(struct sk_buff *skb,
 }
 
 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
-			struct netdev_queue *txq)
+			struct netdev_queue *txq, void *accel_priv)
 {
 	const struct net_device_ops *ops = dev->netdev_ops;
 	int rc = NETDEV_TX_OK;
@@ -2604,9 +2604,13 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 			dev_queue_xmit_nit(skb, dev);
 
 		skb_len = skb->len;
-		rc = ops->ndo_start_xmit(skb, dev);
+		if (accel_priv)
+			rc = ops->ndo_dfwd_start_xmit(skb, dev, accel_priv);
+		else
+			rc = ops->ndo_start_xmit(skb, dev);
+
 		trace_net_dev_xmit(skb, rc, dev, skb_len);
-		if (rc == NETDEV_TX_OK)
+		if (rc == NETDEV_TX_OK && txq)
 			txq_trans_update(txq);
 		return rc;
 	}
@@ -2622,7 +2626,10 @@ gso:
 			dev_queue_xmit_nit(nskb, dev);
 
 		skb_len = nskb->len;
-		rc = ops->ndo_start_xmit(nskb, dev);
+		if (accel_priv)
+			rc = ops->ndo_dfwd_start_xmit(nskb, dev, accel_priv);
+		else
+			rc = ops->ndo_start_xmit(nskb, dev);
 		trace_net_dev_xmit(nskb, rc, dev, skb_len);
 		if (unlikely(rc != NETDEV_TX_OK)) {
 			if (rc & ~NETDEV_TX_MASK)
@@ -2647,6 +2654,7 @@ out_kfree_skb:
 out:
 	return rc;
 }
+EXPORT_SYMBOL_GPL(dev_hard_start_xmit);
 
 static void qdisc_pkt_len_init(struct sk_buff *skb)
 {
@@ -2854,7 +2862,7 @@ int dev_queue_xmit(struct sk_buff *skb)
 
 			if (!netif_xmit_stopped(txq)) {
 				__this_cpu_inc(xmit_recursion);
-				rc = dev_hard_start_xmit(skb, dev, txq);
+				rc = dev_hard_start_xmit(skb, dev, txq, NULL);
 				__this_cpu_dec(xmit_recursion);
 				if (dev_xmit_complete(rc)) {
 					HARD_TX_UNLOCK(dev, txq);
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 862989898f61..30071dec287a 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -96,6 +96,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
 	[NETIF_F_LOOPBACK_BIT] =         "loopback",
 	[NETIF_F_RXFCS_BIT] =            "rx-fcs",
 	[NETIF_F_RXALL_BIT] =            "rx-all",
+	[NETIF_F_HW_L2FW_DOFFLOAD_BIT] = "l2-fwd-offload",
 };
 
 static int ethtool_get_features(struct net_device *dev, void __user *useraddr)
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 7fc899a943a8..922a09406ba7 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -126,7 +126,7 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
 
 	HARD_TX_LOCK(dev, txq, smp_processor_id());
 	if (!netif_xmit_frozen_or_stopped(txq))
-		ret = dev_hard_start_xmit(skb, dev, txq);
+		ret = dev_hard_start_xmit(skb, dev, txq, NULL);
 
 	HARD_TX_UNLOCK(dev, txq);
 
-- 
cgit v1.2.3


From 6e7136ed7793fa4948b0192dcd6862d12a50d67c Mon Sep 17 00:00:00 2001
From: Eugenia Emantayev <eugenia@mellanox.com>
Date: Thu, 7 Nov 2013 12:19:53 +0200
Subject: net/mlx4_core: ICM pages are allocated on device NUMA node

This is done to optimize FW/HW access to host memory.

Signed-off-by: Yevgeny Petrilin <yevgenyp@mellanox.com>
Signed-off-by: Eugenia Emantayev <eugenia@mellanox.com>
Reviewed-by: Hadar Hen Zion <hadarh@mellanox.com>
Signed-off-by: Amir Vadai <amirv@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/icm.c  | 42 ++++++++++++++++++++++---------
 drivers/net/ethernet/mellanox/mlx4/main.c |  1 +
 include/linux/mlx4/device.h               |  1 +
 3 files changed, 32 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx4/icm.c b/drivers/net/ethernet/mellanox/mlx4/icm.c
index 31d02649be41..5fbf4924c272 100644
--- a/drivers/net/ethernet/mellanox/mlx4/icm.c
+++ b/drivers/net/ethernet/mellanox/mlx4/icm.c
@@ -93,13 +93,17 @@ void mlx4_free_icm(struct mlx4_dev *dev, struct mlx4_icm *icm, int coherent)
 	kfree(icm);
 }
 
-static int mlx4_alloc_icm_pages(struct scatterlist *mem, int order, gfp_t gfp_mask)
+static int mlx4_alloc_icm_pages(struct scatterlist *mem, int order,
+				gfp_t gfp_mask, int node)
 {
 	struct page *page;
 
-	page = alloc_pages(gfp_mask, order);
-	if (!page)
-		return -ENOMEM;
+	page = alloc_pages_node(node, gfp_mask, order);
+	if (!page) {
+		page = alloc_pages(gfp_mask, order);
+		if (!page)
+			return -ENOMEM;
+	}
 
 	sg_set_page(mem, page, PAGE_SIZE << order, 0);
 	return 0;
@@ -130,9 +134,15 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages,
 	/* We use sg_set_buf for coherent allocs, which assumes low memory */
 	BUG_ON(coherent && (gfp_mask & __GFP_HIGHMEM));
 
-	icm = kmalloc(sizeof *icm, gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN));
-	if (!icm)
-		return NULL;
+	icm = kmalloc_node(sizeof(*icm),
+			   gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN),
+			   dev->numa_node);
+	if (!icm) {
+		icm = kmalloc(sizeof(*icm),
+			      gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN));
+		if (!icm)
+			return NULL;
+	}
 
 	icm->refcount = 0;
 	INIT_LIST_HEAD(&icm->chunk_list);
@@ -141,10 +151,17 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages,
 
 	while (npages > 0) {
 		if (!chunk) {
-			chunk = kmalloc(sizeof *chunk,
-					gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN));
-			if (!chunk)
-				goto fail;
+			chunk = kmalloc_node(sizeof(*chunk),
+					     gfp_mask & ~(__GFP_HIGHMEM |
+							  __GFP_NOWARN),
+					     dev->numa_node);
+			if (!chunk) {
+				chunk = kmalloc(sizeof(*chunk),
+						gfp_mask & ~(__GFP_HIGHMEM |
+							     __GFP_NOWARN));
+				if (!chunk)
+					goto fail;
+			}
 
 			sg_init_table(chunk->mem, MLX4_ICM_CHUNK_LEN);
 			chunk->npages = 0;
@@ -161,7 +178,8 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages,
 						      cur_order, gfp_mask);
 		else
 			ret = mlx4_alloc_icm_pages(&chunk->mem[chunk->npages],
-						   cur_order, gfp_mask);
+						   cur_order, gfp_mask,
+						   dev->numa_node);
 
 		if (ret) {
 			if (--cur_order < 0)
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 7d2628dfdc29..5789ea2c934d 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -2191,6 +2191,7 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data)
 	mutex_init(&priv->bf_mutex);
 
 	dev->rev_id = pdev->revision;
+	dev->numa_node = dev_to_node(&pdev->dev);
 	/* Detect if this device is a virtual function */
 	if (pci_dev_data & MLX4_PCI_DEV_IS_VF) {
 		/* When acting as pf, we normally skip vfs unless explicitly
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index f6f59271f857..4cf0b0153639 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -662,6 +662,7 @@ struct mlx4_dev {
 	u8			rev_id;
 	char			board_id[MLX4_BOARD_ID_LEN];
 	int			num_vfs;
+	int			numa_node;
 	int			oper_log_mgm_entry_size;
 	u64			regid_promisc_array[MLX4_MAX_PORTS + 1];
 	u64			regid_allmulti_array[MLX4_MAX_PORTS + 1];
-- 
cgit v1.2.3


From 163561a4e2f8af44e96453bc10c7a4f9bcc736e1 Mon Sep 17 00:00:00 2001
From: Eugenia Emantayev <eugenia@mellanox.com>
Date: Thu, 7 Nov 2013 12:19:54 +0200
Subject: net/mlx4_en: Datapath structures are allocated per NUMA node

For each RX/TX ring and its CQ, allocation is done on a NUMA node that
corresponds to the core that the data structure should operate on.
The assumption is that the core number is reflected by the ring index.
The affected allocations are the ring/CQ data structures,
the TX/RX info and the shared HW/SW buffer.
For TX rings, each core has rings of all UPs.

Signed-off-by: Yevgeny Petrilin <yevgenyp@mellanox.com>
Signed-off-by: Eugenia Emantayev <eugenia@mellanox.com>
Reviewed-by: Hadar Hen Zion <hadarh@mellanox.com>
Signed-off-by: Amir Vadai <amirv@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/en_cq.c     | 17 ++++++++++---
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 12 ++++++---
 drivers/net/ethernet/mellanox/mlx4/en_rx.c     | 23 +++++++++++------
 drivers/net/ethernet/mellanox/mlx4/en_tx.c     | 34 +++++++++++++++++---------
 drivers/net/ethernet/mellanox/mlx4/mlx4_en.h   |  6 ++---
 drivers/net/ethernet/mellanox/mlx4/pd.c        | 11 ++++++---
 include/linux/mlx4/device.h                    |  2 +-
 7 files changed, 71 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/drivers/net/ethernet/mellanox/mlx4/en_cq.c
index d203f11b9edf..3a098cc4d349 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_cq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_cq.c
@@ -45,16 +45,20 @@ static void mlx4_en_cq_event(struct mlx4_cq *cq, enum mlx4_event event)
 
 int mlx4_en_create_cq(struct mlx4_en_priv *priv,
 		      struct mlx4_en_cq **pcq,
-		      int entries, int ring, enum cq_type mode)
+		      int entries, int ring, enum cq_type mode,
+		      int node)
 {
 	struct mlx4_en_dev *mdev = priv->mdev;
 	struct mlx4_en_cq *cq;
 	int err;
 
-	cq = kzalloc(sizeof(*cq), GFP_KERNEL);
+	cq = kzalloc_node(sizeof(*cq), GFP_KERNEL, node);
 	if (!cq) {
-		en_err(priv, "Failed to allocate CQ structure\n");
-		return -ENOMEM;
+		cq = kzalloc(sizeof(*cq), GFP_KERNEL);
+		if (!cq) {
+			en_err(priv, "Failed to allocate CQ structure\n");
+			return -ENOMEM;
+		}
 	}
 
 	cq->size = entries;
@@ -64,8 +68,13 @@ int mlx4_en_create_cq(struct mlx4_en_priv *priv,
 	cq->is_tx = mode;
 	spin_lock_init(&cq->lock);
 
+	/* Allocate HW buffers on provided NUMA node.
+	 * dev->numa_node is used in mtt range allocation flow.
+	 */
+	set_dev_node(&mdev->dev->pdev->dev, node);
 	err = mlx4_alloc_hwq_res(mdev->dev, &cq->wqres,
 				cq->buf_size, 2 * PAGE_SIZE);
+	set_dev_node(&mdev->dev->pdev->dev, mdev->dev->numa_node);
 	if (err)
 		goto err_cq;
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index f430788cc4fe..e72d8a112a6b 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -1895,6 +1895,7 @@ int mlx4_en_alloc_resources(struct mlx4_en_priv *priv)
 	struct mlx4_en_port_profile *prof = priv->prof;
 	int i;
 	int err;
+	int node;
 
 	err = mlx4_qp_reserve_range(priv->mdev->dev, priv->tx_ring_num, 256, &priv->base_tx_qpn);
 	if (err) {
@@ -1904,23 +1905,26 @@ int mlx4_en_alloc_resources(struct mlx4_en_priv *priv)
 
 	/* Create tx Rings */
 	for (i = 0; i < priv->tx_ring_num; i++) {
+		node = cpu_to_node(i % num_online_cpus());
 		if (mlx4_en_create_cq(priv, &priv->tx_cq[i],
-				      prof->tx_ring_size, i, TX))
+				      prof->tx_ring_size, i, TX, node))
 			goto err;
 
 		if (mlx4_en_create_tx_ring(priv, &priv->tx_ring[i], priv->base_tx_qpn + i,
-					   prof->tx_ring_size, TXBB_SIZE))
+					   prof->tx_ring_size, TXBB_SIZE, node))
 			goto err;
 	}
 
 	/* Create rx Rings */
 	for (i = 0; i < priv->rx_ring_num; i++) {
+		node = cpu_to_node(i % num_online_cpus());
 		if (mlx4_en_create_cq(priv, &priv->rx_cq[i],
-				      prof->rx_ring_size, i, RX))
+				      prof->rx_ring_size, i, RX, node))
 			goto err;
 
 		if (mlx4_en_create_rx_ring(priv, &priv->rx_ring[i],
-					   prof->rx_ring_size, priv->stride))
+					   prof->rx_ring_size, priv->stride,
+					   node))
 			goto err;
 	}
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index 1c45f88776c5..07a1d0fbae47 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -320,17 +320,20 @@ static void mlx4_en_free_rx_buf(struct mlx4_en_priv *priv,
 
 int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv,
 			   struct mlx4_en_rx_ring **pring,
-			   u32 size, u16 stride)
+			   u32 size, u16 stride, int node)
 {
 	struct mlx4_en_dev *mdev = priv->mdev;
 	struct mlx4_en_rx_ring *ring;
 	int err = -ENOMEM;
 	int tmp;
 
-	ring = kzalloc(sizeof(*ring), GFP_KERNEL);
+	ring = kzalloc_node(sizeof(*ring), GFP_KERNEL, node);
 	if (!ring) {
-		en_err(priv, "Failed to allocate RX ring structure\n");
-		return -ENOMEM;
+		ring = kzalloc(sizeof(*ring), GFP_KERNEL);
+		if (!ring) {
+			en_err(priv, "Failed to allocate RX ring structure\n");
+			return -ENOMEM;
+		}
 	}
 
 	ring->prod = 0;
@@ -343,17 +346,23 @@ int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv,
 
 	tmp = size * roundup_pow_of_two(MLX4_EN_MAX_RX_FRAGS *
 					sizeof(struct mlx4_en_rx_alloc));
-	ring->rx_info = vmalloc(tmp);
+	ring->rx_info = vmalloc_node(tmp, node);
 	if (!ring->rx_info) {
-		err = -ENOMEM;
-		goto err_ring;
+		ring->rx_info = vmalloc(tmp);
+		if (!ring->rx_info) {
+			err = -ENOMEM;
+			goto err_ring;
+		}
 	}
 
 	en_dbg(DRV, priv, "Allocated rx_info ring at addr:%p size:%d\n",
 		 ring->rx_info, tmp);
 
+	/* Allocate HW buffers on provided NUMA node */
+	set_dev_node(&mdev->dev->pdev->dev, node);
 	err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres,
 				 ring->buf_size, 2 * PAGE_SIZE);
+	set_dev_node(&mdev->dev->pdev->dev, mdev->dev->numa_node);
 	if (err)
 		goto err_info;
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index d4e4cf30a720..f54ebd5a1702 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -55,17 +55,20 @@ MODULE_PARM_DESC(inline_thold, "threshold for using inline data");
 
 int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
 			   struct mlx4_en_tx_ring **pring, int qpn, u32 size,
-			   u16 stride)
+			   u16 stride, int node)
 {
 	struct mlx4_en_dev *mdev = priv->mdev;
 	struct mlx4_en_tx_ring *ring;
 	int tmp;
 	int err;
 
-	ring = kzalloc(sizeof(*ring), GFP_KERNEL);
+	ring = kzalloc_node(sizeof(*ring), GFP_KERNEL, node);
 	if (!ring) {
-		en_err(priv, "Failed allocating TX ring\n");
-		return -ENOMEM;
+		ring = kzalloc(sizeof(*ring), GFP_KERNEL);
+		if (!ring) {
+			en_err(priv, "Failed allocating TX ring\n");
+			return -ENOMEM;
+		}
 	}
 
 	ring->size = size;
@@ -75,24 +78,33 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
 	inline_thold = min(inline_thold, MAX_INLINE);
 
 	tmp = size * sizeof(struct mlx4_en_tx_info);
-	ring->tx_info = vmalloc(tmp);
+	ring->tx_info = vmalloc_node(tmp, node);
 	if (!ring->tx_info) {
-		err = -ENOMEM;
-		goto err_ring;
+		ring->tx_info = vmalloc(tmp);
+		if (!ring->tx_info) {
+			err = -ENOMEM;
+			goto err_ring;
+		}
 	}
 
 	en_dbg(DRV, priv, "Allocated tx_info ring at addr:%p size:%d\n",
 		 ring->tx_info, tmp);
 
-	ring->bounce_buf = kmalloc(MAX_DESC_SIZE, GFP_KERNEL);
+	ring->bounce_buf = kmalloc_node(MAX_DESC_SIZE, GFP_KERNEL, node);
 	if (!ring->bounce_buf) {
-		err = -ENOMEM;
-		goto err_info;
+		ring->bounce_buf = kmalloc(MAX_DESC_SIZE, GFP_KERNEL);
+		if (!ring->bounce_buf) {
+			err = -ENOMEM;
+			goto err_info;
+		}
 	}
 	ring->buf_size = ALIGN(size * ring->stride, MLX4_EN_PAGE_SIZE);
 
+	/* Allocate HW buffers on provided NUMA node */
+	set_dev_node(&mdev->dev->pdev->dev, node);
 	err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres, ring->buf_size,
 				 2 * PAGE_SIZE);
+	set_dev_node(&mdev->dev->pdev->dev, mdev->dev->numa_node);
 	if (err) {
 		en_err(priv, "Failed allocating hwq resources\n");
 		goto err_bounce;
@@ -118,7 +130,7 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
 	}
 	ring->qp.event = mlx4_en_sqp_event;
 
-	err = mlx4_bf_alloc(mdev->dev, &ring->bf);
+	err = mlx4_bf_alloc(mdev->dev, &ring->bf, node);
 	if (err) {
 		en_dbg(DRV, priv, "working without blueflame (%d)", err);
 		ring->bf.uar = &mdev->priv_uar;
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index b2547ae07dfa..f3758de59c05 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -705,7 +705,7 @@ void mlx4_en_free_resources(struct mlx4_en_priv *priv);
 int mlx4_en_alloc_resources(struct mlx4_en_priv *priv);
 
 int mlx4_en_create_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq **pcq,
-		      int entries, int ring, enum cq_type mode);
+		      int entries, int ring, enum cq_type mode, int node);
 void mlx4_en_destroy_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq **pcq);
 int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq,
 			int cq_idx);
@@ -719,7 +719,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev);
 
 int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
 			   struct mlx4_en_tx_ring **pring,
-			   int qpn, u32 size, u16 stride);
+			   int qpn, u32 size, u16 stride, int node);
 void mlx4_en_destroy_tx_ring(struct mlx4_en_priv *priv,
 			     struct mlx4_en_tx_ring **pring);
 int mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv,
@@ -730,7 +730,7 @@ void mlx4_en_deactivate_tx_ring(struct mlx4_en_priv *priv,
 
 int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv,
 			   struct mlx4_en_rx_ring **pring,
-			   u32 size, u16 stride);
+			   u32 size, u16 stride, int node);
 void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv,
 			     struct mlx4_en_rx_ring **pring,
 			     u32 size, u16 stride);
diff --git a/drivers/net/ethernet/mellanox/mlx4/pd.c b/drivers/net/ethernet/mellanox/mlx4/pd.c
index 00f223acada7..84cfb40bf451 100644
--- a/drivers/net/ethernet/mellanox/mlx4/pd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/pd.c
@@ -168,7 +168,7 @@ void mlx4_uar_free(struct mlx4_dev *dev, struct mlx4_uar *uar)
 }
 EXPORT_SYMBOL_GPL(mlx4_uar_free);
 
-int mlx4_bf_alloc(struct mlx4_dev *dev, struct mlx4_bf *bf)
+int mlx4_bf_alloc(struct mlx4_dev *dev, struct mlx4_bf *bf, int node)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
 	struct mlx4_uar *uar;
@@ -186,10 +186,13 @@ int mlx4_bf_alloc(struct mlx4_dev *dev, struct mlx4_bf *bf)
 			err = -ENOMEM;
 			goto out;
 		}
-		uar = kmalloc(sizeof *uar, GFP_KERNEL);
+		uar = kmalloc_node(sizeof(*uar), GFP_KERNEL, node);
 		if (!uar) {
-			err = -ENOMEM;
-			goto out;
+			uar = kmalloc(sizeof(*uar), GFP_KERNEL);
+			if (!uar) {
+				err = -ENOMEM;
+				goto out;
+			}
 		}
 		err = mlx4_uar_alloc(dev, uar);
 		if (err)
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 4cf0b0153639..7d3a523160ba 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -835,7 +835,7 @@ void mlx4_xrcd_free(struct mlx4_dev *dev, u32 xrcdn);
 
 int mlx4_uar_alloc(struct mlx4_dev *dev, struct mlx4_uar *uar);
 void mlx4_uar_free(struct mlx4_dev *dev, struct mlx4_uar *uar);
-int mlx4_bf_alloc(struct mlx4_dev *dev, struct mlx4_bf *bf);
+int mlx4_bf_alloc(struct mlx4_dev *dev, struct mlx4_bf *bf, int node);
 void mlx4_bf_free(struct mlx4_dev *dev, struct mlx4_bf *bf);
 
 int mlx4_mtt_init(struct mlx4_dev *dev, int npages, int page_shift,
-- 
cgit v1.2.3


From 0c7ddf36c29c3ce12f2d2931a357ccaa0861035a Mon Sep 17 00:00:00 2001
From: Mathias Krause <mathias.krause@secunet.com>
Date: Thu, 7 Nov 2013 14:18:24 +0100
Subject: net: move pskb_put() to core code

This function has usage beside IPsec so move it to the core skbuff code.
While doing so, give it some documentation and change its return type to
'unsigned char *' to be in line with skb_put().

Signed-off-by: Mathias Krause <mathias.krause@secunet.com>
Cc: Steffen Klassert <steffen.klassert@secunet.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h |  1 +
 include/net/esp.h      |  2 --
 net/core/skbuff.c      | 23 +++++++++++++++++++++++
 net/xfrm/xfrm_algo.c   | 13 -------------
 4 files changed, 24 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 2e153b69d318..491dd6c2c6cc 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1417,6 +1417,7 @@ static inline void skb_set_tail_pointer(struct sk_buff *skb, const int offset)
 /*
  *	Add data to an sk_buff
  */
+unsigned char *pskb_put(struct sk_buff *skb, struct sk_buff *tail, int len);
 unsigned char *skb_put(struct sk_buff *skb, unsigned int len);
 static inline unsigned char *__skb_put(struct sk_buff *skb, unsigned int len)
 {
diff --git a/include/net/esp.h b/include/net/esp.h
index c92213c38312..a43be85aedc4 100644
--- a/include/net/esp.h
+++ b/include/net/esp.h
@@ -3,8 +3,6 @@
 
 #include <linux/skbuff.h>
 
-void *pskb_put(struct sk_buff *skb, struct sk_buff *tail, int len);
-
 struct ip_esp_hdr;
 
 static inline struct ip_esp_hdr *ip_esp_hdr(const struct sk_buff *skb)
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 3735fad5616e..2fbea08c4f50 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1263,6 +1263,29 @@ free_skb:
 }
 EXPORT_SYMBOL(skb_pad);
 
+/**
+ *	pskb_put - add data to the tail of a potentially fragmented buffer
+ *	@skb: start of the buffer to use
+ *	@tail: tail fragment of the buffer to use
+ *	@len: amount of data to add
+ *
+ *	This function extends the used data area of the potentially
+ *	fragmented buffer. @tail must be the last fragment of @skb -- or
+ *	@skb itself. If this would exceed the total buffer size the kernel
+ *	will panic. A pointer to the first byte of the extra data is
+ *	returned.
+ */
+
+unsigned char *pskb_put(struct sk_buff *skb, struct sk_buff *tail, int len)
+{
+	if (tail != skb) {
+		skb->data_len += len;
+		skb->len += len;
+	}
+	return skb_put(tail, len);
+}
+EXPORT_SYMBOL_GPL(pskb_put);
+
 /**
  *	skb_put - add data to a buffer
  *	@skb: buffer to use
diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c
index ab4ef72f0b1d..debe733386f8 100644
--- a/net/xfrm/xfrm_algo.c
+++ b/net/xfrm/xfrm_algo.c
@@ -802,17 +802,4 @@ int xfrm_count_pfkey_enc_supported(void)
 }
 EXPORT_SYMBOL_GPL(xfrm_count_pfkey_enc_supported);
 
-#if defined(CONFIG_INET_ESP) || defined(CONFIG_INET_ESP_MODULE) || defined(CONFIG_INET6_ESP) || defined(CONFIG_INET6_ESP_MODULE)
-
-void *pskb_put(struct sk_buff *skb, struct sk_buff *tail, int len)
-{
-	if (tail != skb) {
-		skb->data_len += len;
-		skb->len += len;
-	}
-	return skb_put(tail, len);
-}
-EXPORT_SYMBOL_GPL(pskb_put);
-#endif
-
 MODULE_LICENSE("GPL");
-- 
cgit v1.2.3


From bc32383cd6496d595e6a25cdc7cff1da6b694462 Mon Sep 17 00:00:00 2001
From: Mathias Krause <mathias.krause@secunet.com>
Date: Thu, 7 Nov 2013 14:18:26 +0100
Subject: net: skbuff - kernel-doc fixes

Use "@" to refer to parameters in the kernel-doc description. According
to Documentation/kernel-doc-nano-HOWTO.txt "&" shall be used to refer to
structures only.

Signed-off-by: Mathias Krause <mathias.krause@secunet.com>
Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h |  2 +-
 net/core/skbuff.c      | 10 +++++-----
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 491dd6c2c6cc..036ec7d8a83a 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1357,7 +1357,7 @@ static inline void __skb_fill_page_desc(struct sk_buff *skb, int i,
  * @size: the length of the data
  *
  * As per __skb_fill_page_desc() -- initialises the @i'th fragment of
- * @skb to point to &size bytes at offset @off within @page. In
+ * @skb to point to @size bytes at offset @off within @page. In
  * addition updates @skb such that @i is the last fragment.
  *
  * Does not take any additional reference on the fragment.
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 2fbea08c4f50..8c5197fe55a4 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1051,8 +1051,8 @@ EXPORT_SYMBOL(__pskb_copy);
  *	@ntail: room to add at tail
  *	@gfp_mask: allocation priority
  *
- *	Expands (or creates identical copy, if &nhead and &ntail are zero)
- *	header of skb. &sk_buff itself is not changed. &sk_buff MUST have
+ *	Expands (or creates identical copy, if @nhead and @ntail are zero)
+ *	header of @skb. &sk_buff itself is not changed. &sk_buff MUST have
  *	reference count of 1. Returns zero in the case of success or error,
  *	if expansion failed. In the last case, &sk_buff is not changed.
  *
@@ -2563,14 +2563,14 @@ EXPORT_SYMBOL(skb_prepare_seq_read);
  * @data: destination pointer for data to be returned
  * @st: state variable
  *
- * Reads a block of skb data at &consumed relative to the
+ * Reads a block of skb data at @consumed relative to the
  * lower offset specified to skb_prepare_seq_read(). Assigns
- * the head of the data block to &data and returns the length
+ * the head of the data block to @data and returns the length
  * of the block or 0 if the end of the skb data or the upper
  * offset has been reached.
  *
  * The caller is not required to consume all of the data
- * returned, i.e. &consumed is typically set to the number
+ * returned, i.e. @consumed is typically set to the number
  * of bytes already consumed and the next call to
  * skb_seq_read() will return the remaining part of the block.
  *
-- 
cgit v1.2.3


From a404d5576bbe586a1097a8bc2f32c5f22651b0aa Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 17 Sep 2013 22:30:31 +0200
Subject: blktrace: Send BLK_TN_PROCESS events to all running traces

Currently each task sends BLK_TN_PROCESS event to the first traced
device it interacts with after a new trace is started. When there are
several traced devices and the task accesses more devices, this logic
can result in BLK_TN_PROCESS being sent several times to some devices
while it is never sent to other devices. Thus blkparse doesn't display
command name when parsing some blktrace files.

Fix the problem by sending BLK_TN_PROCESS event to all traced devices
when a task interacts with any of them.

Signed-off-by: Jan Kara <jack@suse.cz>
Review-by: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blktrace_api.h |  2 ++
 kernel/trace/blktrace.c      | 33 +++++++++++++++++++++++++++------
 2 files changed, 29 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index 7c2e030e72f1..a12f6ed91c84 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -5,6 +5,7 @@
 #include <linux/relay.h>
 #include <linux/compat.h>
 #include <uapi/linux/blktrace_api.h>
+#include <linux/list.h>
 
 #if defined(CONFIG_BLK_DEV_IO_TRACE)
 
@@ -23,6 +24,7 @@ struct blk_trace {
 	struct dentry *dir;
 	struct dentry *dropped_file;
 	struct dentry *msg_file;
+	struct list_head running_list;
 	atomic_t dropped;
 };
 
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index b8b8560bfb95..7f727b34280d 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -26,6 +26,7 @@
 #include <linux/export.h>
 #include <linux/time.h>
 #include <linux/uaccess.h>
+#include <linux/list.h>
 
 #include <trace/events/block.h>
 
@@ -38,6 +39,9 @@ static unsigned int blktrace_seq __read_mostly = 1;
 static struct trace_array *blk_tr;
 static bool blk_tracer_enabled __read_mostly;
 
+static LIST_HEAD(running_trace_list);
+static __cacheline_aligned_in_smp DEFINE_SPINLOCK(running_trace_lock);
+
 /* Select an alternative, minimalistic output than the original one */
 #define TRACE_BLK_OPT_CLASSIC	0x1
 
@@ -107,10 +111,18 @@ record_it:
  * Send out a notify for this process, if we haven't done so since a trace
  * started
  */
-static void trace_note_tsk(struct blk_trace *bt, struct task_struct *tsk)
+static void trace_note_tsk(struct task_struct *tsk)
 {
+	unsigned long flags;
+	struct blk_trace *bt;
+
 	tsk->btrace_seq = blktrace_seq;
-	trace_note(bt, tsk->pid, BLK_TN_PROCESS, tsk->comm, sizeof(tsk->comm));
+	spin_lock_irqsave(&running_trace_lock, flags);
+	list_for_each_entry(bt, &running_trace_list, running_list) {
+		trace_note(bt, tsk->pid, BLK_TN_PROCESS, tsk->comm,
+			   sizeof(tsk->comm));
+	}
+	spin_unlock_irqrestore(&running_trace_lock, flags);
 }
 
 static void trace_note_time(struct blk_trace *bt)
@@ -229,16 +241,15 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
 		goto record_it;
 	}
 
+	if (unlikely(tsk->btrace_seq != blktrace_seq))
+		trace_note_tsk(tsk);
+
 	/*
 	 * A word about the locking here - we disable interrupts to reserve
 	 * some space in the relay per-cpu buffer, to prevent an irq
 	 * from coming in and stepping on our toes.
 	 */
 	local_irq_save(flags);
-
-	if (unlikely(tsk->btrace_seq != blktrace_seq))
-		trace_note_tsk(bt, tsk);
-
 	t = relay_reserve(bt->rchan, sizeof(*t) + pdu_len);
 	if (t) {
 		sequence = per_cpu_ptr(bt->sequence, cpu);
@@ -477,6 +488,7 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
 	bt->dir = dir;
 	bt->dev = dev;
 	atomic_set(&bt->dropped, 0);
+	INIT_LIST_HEAD(&bt->running_list);
 
 	ret = -EIO;
 	bt->dropped_file = debugfs_create_file("dropped", 0444, dir, bt,
@@ -601,6 +613,9 @@ int blk_trace_startstop(struct request_queue *q, int start)
 			blktrace_seq++;
 			smp_mb();
 			bt->trace_state = Blktrace_running;
+			spin_lock_irq(&running_trace_lock);
+			list_add(&bt->running_list, &running_trace_list);
+			spin_unlock_irq(&running_trace_lock);
 
 			trace_note_time(bt);
 			ret = 0;
@@ -608,6 +623,9 @@ int blk_trace_startstop(struct request_queue *q, int start)
 	} else {
 		if (bt->trace_state == Blktrace_running) {
 			bt->trace_state = Blktrace_stopped;
+			spin_lock_irq(&running_trace_lock);
+			list_del_init(&bt->running_list);
+			spin_unlock_irq(&running_trace_lock);
 			relay_flush(bt->rchan);
 			ret = 0;
 		}
@@ -1472,6 +1490,9 @@ static int blk_trace_remove_queue(struct request_queue *q)
 	if (atomic_dec_and_test(&blk_probes_ref))
 		blk_unregister_tracepoints();
 
+	spin_lock_irq(&running_trace_lock);
+	list_del(&bt->running_list);
+	spin_unlock_irq(&running_trace_lock);
 	blk_trace_free(bt);
 	return 0;
 }
-- 
cgit v1.2.3


From 8077c0d983ab276ec5f2700df56a64d671781905 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Mon, 14 Oct 2013 12:14:13 -0400
Subject: bdi: test bdi_init failure

There were two places where return value from bdi_init was not tested.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/char_dev.c               | 3 ++-
 include/linux/backing-dev.h | 4 ++--
 mm/swap.c                   | 3 ++-
 3 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/fs/char_dev.c b/fs/char_dev.c
index afc2bb691780..b9bc05b9513a 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -574,7 +574,8 @@ static struct kobject *base_probe(dev_t dev, int *part, void *data)
 void __init chrdev_init(void)
 {
 	cdev_map = kobj_map_init(base_probe, &chrdevs_lock);
-	bdi_init(&directly_mappable_cdev_bdi);
+	if (bdi_init(&directly_mappable_cdev_bdi))
+		panic("Failed to init directly mappable cdev bdi");
 }
 
 
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 5f66d519a726..24819001f5c8 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -109,7 +109,7 @@ struct backing_dev_info {
 #endif
 };
 
-int bdi_init(struct backing_dev_info *bdi);
+int __must_check bdi_init(struct backing_dev_info *bdi);
 void bdi_destroy(struct backing_dev_info *bdi);
 
 __printf(3, 4)
@@ -117,7 +117,7 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent,
 		const char *fmt, ...);
 int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev);
 void bdi_unregister(struct backing_dev_info *bdi);
-int bdi_setup_and_register(struct backing_dev_info *, char *, unsigned int);
+int __must_check bdi_setup_and_register(struct backing_dev_info *, char *, unsigned int);
 void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
 			enum wb_reason reason);
 void bdi_start_background_writeback(struct backing_dev_info *bdi);
diff --git a/mm/swap.c b/mm/swap.c
index 759c3caf44bd..7a9f80d451f5 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -934,7 +934,8 @@ void __init swap_setup(void)
 #ifdef CONFIG_SWAP
 	int i;
 
-	bdi_init(swapper_spaces[0].backing_dev_info);
+	if (bdi_init(swapper_spaces[0].backing_dev_info))
+		panic("Failed to init swap bdi");
 	for (i = 0; i < MAX_SWAPFILES; i++) {
 		spin_lock_init(&swapper_spaces[i].tree_lock);
 		INIT_LIST_HEAD(&swapper_spaces[i].i_mmap_nonlinear);
-- 
cgit v1.2.3


From 6678d83f18386eb103f8345024e52c5abe61725c Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kmo@daterainc.com>
Date: Wed, 7 Aug 2013 11:14:32 -0700
Subject: block: Consolidate duplicated bio_trim() implementations

Someone cut and pasted md's md_trim_bio() into xen-blkfront.c. Come on,
we should know better than this.

Signed-off-by: Kent Overstreet <kmo@daterainc.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Neil Brown <neilb@suse.de>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Jeremy Fitzhardinge <jeremy@goop.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/xen-blkfront.c | 53 +-------------------------------------------
 drivers/md/md.c              | 40 ---------------------------------
 drivers/md/md.h              |  1 -
 drivers/md/raid1.c           | 10 ++++-----
 drivers/md/raid10.c          | 18 +++++++--------
 fs/bio.c                     | 46 ++++++++++++++++++++++++++++++++++++++
 include/linux/bio.h          |  1 +
 7 files changed, 61 insertions(+), 108 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index a4660bbee8a6..8d53ed293606 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -1336,57 +1336,6 @@ static int blkfront_probe(struct xenbus_device *dev,
 	return 0;
 }
 
-/*
- * This is a clone of md_trim_bio, used to split a bio into smaller ones
- */
-static void trim_bio(struct bio *bio, int offset, int size)
-{
-	/* 'bio' is a cloned bio which we need to trim to match
-	 * the given offset and size.
-	 * This requires adjusting bi_sector, bi_size, and bi_io_vec
-	 */
-	int i;
-	struct bio_vec *bvec;
-	int sofar = 0;
-
-	size <<= 9;
-	if (offset == 0 && size == bio->bi_size)
-		return;
-
-	bio->bi_sector += offset;
-	bio->bi_size = size;
-	offset <<= 9;
-	clear_bit(BIO_SEG_VALID, &bio->bi_flags);
-
-	while (bio->bi_idx < bio->bi_vcnt &&
-	       bio->bi_io_vec[bio->bi_idx].bv_len <= offset) {
-		/* remove this whole bio_vec */
-		offset -= bio->bi_io_vec[bio->bi_idx].bv_len;
-		bio->bi_idx++;
-	}
-	if (bio->bi_idx < bio->bi_vcnt) {
-		bio->bi_io_vec[bio->bi_idx].bv_offset += offset;
-		bio->bi_io_vec[bio->bi_idx].bv_len -= offset;
-	}
-	/* avoid any complications with bi_idx being non-zero*/
-	if (bio->bi_idx) {
-		memmove(bio->bi_io_vec, bio->bi_io_vec+bio->bi_idx,
-			(bio->bi_vcnt - bio->bi_idx) * sizeof(struct bio_vec));
-		bio->bi_vcnt -= bio->bi_idx;
-		bio->bi_idx = 0;
-	}
-	/* Make sure vcnt and last bv are not too big */
-	bio_for_each_segment(bvec, bio, i) {
-		if (sofar + bvec->bv_len > size)
-			bvec->bv_len = size - sofar;
-		if (bvec->bv_len == 0) {
-			bio->bi_vcnt = i;
-			break;
-		}
-		sofar += bvec->bv_len;
-	}
-}
-
 static void split_bio_end(struct bio *bio, int error)
 {
 	struct split_bio *split_bio = bio->bi_private;
@@ -1522,7 +1471,7 @@ static int blkif_recover(struct blkfront_info *info)
 					   (unsigned int)(bio->bi_size >> 9) - offset);
 				cloned_bio = bio_clone(bio, GFP_NOIO);
 				BUG_ON(cloned_bio == NULL);
-				trim_bio(cloned_bio, offset, size);
+				bio_trim(cloned_bio, offset, size);
 				cloned_bio->bi_private = split_bio;
 				cloned_bio->bi_end_io = split_bio_end;
 				submit_bio(cloned_bio->bi_rw, cloned_bio);
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 561a65f82e26..752119068d66 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -183,46 +183,6 @@ struct bio *bio_clone_mddev(struct bio *bio, gfp_t gfp_mask,
 }
 EXPORT_SYMBOL_GPL(bio_clone_mddev);
 
-void md_trim_bio(struct bio *bio, int offset, int size)
-{
-	/* 'bio' is a cloned bio which we need to trim to match
-	 * the given offset and size.
-	 * This requires adjusting bi_sector, bi_size, and bi_io_vec
-	 */
-	int i;
-	struct bio_vec *bvec;
-	int sofar = 0;
-
-	size <<= 9;
-	if (offset == 0 && size == bio->bi_size)
-		return;
-
-	clear_bit(BIO_SEG_VALID, &bio->bi_flags);
-
-	bio_advance(bio, offset << 9);
-
-	bio->bi_size = size;
-
-	/* avoid any complications with bi_idx being non-zero*/
-	if (bio->bi_idx) {
-		memmove(bio->bi_io_vec, bio->bi_io_vec+bio->bi_idx,
-			(bio->bi_vcnt - bio->bi_idx) * sizeof(struct bio_vec));
-		bio->bi_vcnt -= bio->bi_idx;
-		bio->bi_idx = 0;
-	}
-	/* Make sure vcnt and last bv are not too big */
-	bio_for_each_segment(bvec, bio, i) {
-		if (sofar + bvec->bv_len > size)
-			bvec->bv_len = size - sofar;
-		if (bvec->bv_len == 0) {
-			bio->bi_vcnt = i;
-			break;
-		}
-		sofar += bvec->bv_len;
-	}
-}
-EXPORT_SYMBOL_GPL(md_trim_bio);
-
 /*
  * We have a system wide 'event count' that is incremented
  * on any 'interesting' event, and readers of /proc/mdstat
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 608050c43f17..c96456c16700 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -617,7 +617,6 @@ extern struct bio *bio_clone_mddev(struct bio *bio, gfp_t gfp_mask,
 				   struct mddev *mddev);
 extern struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
 				   struct mddev *mddev);
-extern void md_trim_bio(struct bio *bio, int offset, int size);
 
 extern void md_unplug(struct blk_plug_cb *cb, bool from_schedule);
 static inline int mddev_check_plugged(struct mddev *mddev)
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index aacf6bf352d8..af6681b19776 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1097,8 +1097,8 @@ read_again:
 		r1_bio->read_disk = rdisk;
 
 		read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev);
-		md_trim_bio(read_bio, r1_bio->sector - bio->bi_sector,
-			    max_sectors);
+		bio_trim(read_bio, r1_bio->sector - bio->bi_sector,
+			 max_sectors);
 
 		r1_bio->bios[rdisk] = read_bio;
 
@@ -1266,7 +1266,7 @@ read_again:
 			continue;
 
 		mbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
-		md_trim_bio(mbio, r1_bio->sector - bio->bi_sector, max_sectors);
+		bio_trim(mbio, r1_bio->sector - bio->bi_sector, max_sectors);
 
 		if (first_clone) {
 			/* do behind I/O ?
@@ -2126,7 +2126,7 @@ static int narrow_write_error(struct r1bio *r1_bio, int i)
 		wbio->bi_sector = r1_bio->sector;
 		wbio->bi_size = r1_bio->sectors << 9;
 
-		md_trim_bio(wbio, sector - r1_bio->sector, sectors);
+		bio_trim(wbio, sector - r1_bio->sector, sectors);
 		wbio->bi_sector += rdev->data_offset;
 		wbio->bi_bdev = rdev->bdev;
 		if (submit_bio_wait(WRITE, wbio) == 0)
@@ -2241,7 +2241,7 @@ read_more:
 		}
 		r1_bio->read_disk = disk;
 		bio = bio_clone_mddev(r1_bio->master_bio, GFP_NOIO, mddev);
-		md_trim_bio(bio, r1_bio->sector - bio->bi_sector, max_sectors);
+		bio_trim(bio, r1_bio->sector - bio->bi_sector, max_sectors);
 		r1_bio->bios[r1_bio->read_disk] = bio;
 		rdev = conf->mirrors[disk].rdev;
 		printk_ratelimited(KERN_ERR
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 73dc8a377522..7c3508abb5e1 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1302,8 +1302,8 @@ read_again:
 		slot = r10_bio->read_slot;
 
 		read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev);
-		md_trim_bio(read_bio, r10_bio->sector - bio->bi_sector,
-			    max_sectors);
+		bio_trim(read_bio, r10_bio->sector - bio->bi_sector,
+			 max_sectors);
 
 		r10_bio->devs[slot].bio = read_bio;
 		r10_bio->devs[slot].rdev = rdev;
@@ -1510,8 +1510,8 @@ retry_write:
 		if (r10_bio->devs[i].bio) {
 			struct md_rdev *rdev = conf->mirrors[d].rdev;
 			mbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
-			md_trim_bio(mbio, r10_bio->sector - bio->bi_sector,
-				    max_sectors);
+			bio_trim(mbio, r10_bio->sector - bio->bi_sector,
+				 max_sectors);
 			r10_bio->devs[i].bio = mbio;
 
 			mbio->bi_sector	= (r10_bio->devs[i].addr+
@@ -1553,8 +1553,8 @@ retry_write:
 				rdev = conf->mirrors[d].rdev;
 			}
 			mbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
-			md_trim_bio(mbio, r10_bio->sector - bio->bi_sector,
-				    max_sectors);
+			bio_trim(mbio, r10_bio->sector - bio->bi_sector,
+				 max_sectors);
 			r10_bio->devs[i].repl_bio = mbio;
 
 			mbio->bi_sector	= (r10_bio->devs[i].addr +
@@ -2614,7 +2614,7 @@ static int narrow_write_error(struct r10bio *r10_bio, int i)
 			sectors = sect_to_write;
 		/* Write at 'sector' for 'sectors' */
 		wbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
-		md_trim_bio(wbio, sector - bio->bi_sector, sectors);
+		bio_trim(wbio, sector - bio->bi_sector, sectors);
 		wbio->bi_sector = (r10_bio->devs[i].addr+
 				   choose_data_offset(r10_bio, rdev) +
 				   (sector - r10_bio->sector));
@@ -2687,9 +2687,7 @@ read_more:
 		(unsigned long long)r10_bio->sector);
 	bio = bio_clone_mddev(r10_bio->master_bio,
 			      GFP_NOIO, mddev);
-	md_trim_bio(bio,
-		    r10_bio->sector - bio->bi_sector,
-		    max_sectors);
+	bio_trim(bio, r10_bio->sector - bio->bi_sector, max_sectors);
 	r10_bio->devs[slot].bio = bio;
 	r10_bio->devs[slot].rdev = rdev;
 	bio->bi_sector = r10_bio->devs[slot].addr
diff --git a/fs/bio.c b/fs/bio.c
index ea5035da4d9a..2bdb4e25ee77 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -1804,6 +1804,52 @@ struct bio_pair *bio_split(struct bio *bi, int first_sectors)
 }
 EXPORT_SYMBOL(bio_split);
 
+/**
+ * bio_trim - trim a bio
+ * @bio:	bio to trim
+ * @offset:	number of sectors to trim from the front of @bio
+ * @size:	size we want to trim @bio to, in sectors
+ */
+void bio_trim(struct bio *bio, int offset, int size)
+{
+	/* 'bio' is a cloned bio which we need to trim to match
+	 * the given offset and size.
+	 * This requires adjusting bi_sector, bi_size, and bi_io_vec
+	 */
+	int i;
+	struct bio_vec *bvec;
+	int sofar = 0;
+
+	size <<= 9;
+	if (offset == 0 && size == bio->bi_size)
+		return;
+
+	clear_bit(BIO_SEG_VALID, &bio->bi_flags);
+
+	bio_advance(bio, offset << 9);
+
+	bio->bi_size = size;
+
+	/* avoid any complications with bi_idx being non-zero*/
+	if (bio->bi_idx) {
+		memmove(bio->bi_io_vec, bio->bi_io_vec+bio->bi_idx,
+			(bio->bi_vcnt - bio->bi_idx) * sizeof(struct bio_vec));
+		bio->bi_vcnt -= bio->bi_idx;
+		bio->bi_idx = 0;
+	}
+	/* Make sure vcnt and last bv are not too big */
+	bio_for_each_segment(bvec, bio, i) {
+		if (sofar + bvec->bv_len > size)
+			bvec->bv_len = size - sofar;
+		if (bvec->bv_len == 0) {
+			bio->bi_vcnt = i;
+			break;
+		}
+		sofar += bvec->bv_len;
+	}
+}
+EXPORT_SYMBOL_GPL(bio_trim);
+
 /**
  *      bio_sector_offset - Find hardware sector offset in bio
  *      @bio:           bio to inspect
diff --git a/include/linux/bio.h b/include/linux/bio.h
index ec48bac5b039..162036aca741 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -218,6 +218,7 @@ struct bio_pair {
 };
 extern struct bio_pair *bio_split(struct bio *bi, int first_sectors);
 extern void bio_pair_release(struct bio_pair *dbio);
+extern void bio_trim(struct bio *bio, int offset, int size);
 
 extern struct bio_set *bioset_create(unsigned int, unsigned int);
 extern void bioset_free(struct bio_set *);
-- 
cgit v1.2.3


From f8c5e94486671ffcac696886c246baa6ba89b5cf Mon Sep 17 00:00:00 2001
From: Chen Gang <gang.chen@asianux.com>
Date: Sun, 3 Nov 2013 22:23:39 +0800
Subject: kernel: trace: blktrace: remove redundent memcpy() in
 compat_blk_trace_setup()

do_blk_trace_setup() will fully initialize 'buts.name', so can remove
the related memcpy(). And also use BLKTRACE_BDEV_SIZE and ARRAY_SIZE
instead of hard code number '32'.

Signed-off-by: Chen Gang <gang.chen@asianux.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blktrace_api.h | 2 +-
 kernel/trace/blktrace.c      | 3 +--
 2 files changed, 2 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index a12f6ed91c84..afc1343df3c7 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -89,7 +89,7 @@ static inline int blk_trace_init_sysfs(struct device *dev)
 #ifdef CONFIG_COMPAT
 
 struct compat_blk_user_trace_setup {
-	char name[32];
+	char name[BLKTRACE_BDEV_SIZE];
 	u16 act_mask;
 	u32 buf_size;
 	u32 buf_nr;
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 7f727b34280d..f785aef65799 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -579,13 +579,12 @@ static int compat_blk_trace_setup(struct request_queue *q, char *name,
 		.end_lba = cbuts.end_lba,
 		.pid = cbuts.pid,
 	};
-	memcpy(&buts.name, &cbuts.name, 32);
 
 	ret = do_blk_trace_setup(q, name, dev, bdev, &buts);
 	if (ret)
 		return ret;
 
-	if (copy_to_user(arg, &buts.name, 32)) {
+	if (copy_to_user(arg, &buts.name, ARRAY_SIZE(buts.name))) {
 		blk_trace_remove(q);
 		return -EFAULT;
 	}
-- 
cgit v1.2.3


From a8d3f362f52b65207cacbfb4c50f75e9d4751ef6 Mon Sep 17 00:00:00 2001
From: Rob Herring <rob.herring@calxeda.com>
Date: Thu, 7 Nov 2013 12:13:27 -0600
Subject: dt/irq: add empty of_irq_count for !OF_IRQ

Add an empty version of of_irq_count for !OF_IRQ. This fixes build error
on sparc in linux-next:

drivers/gpio/gpio-bcm-kona.c:542: undefined reference to `of_irq_count'

Signed-off-by: Rob Herring <rob.herring@calxeda.com>
---
 include/linux/of_irq.h | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/of_irq.h b/include/linux/of_irq.h
index c0d6dfe80895..3f23b4472c31 100644
--- a/include/linux/of_irq.h
+++ b/include/linux/of_irq.h
@@ -37,12 +37,20 @@ extern int of_irq_parse_one(struct device_node *device, int index,
 extern unsigned int irq_create_of_mapping(struct of_phandle_args *irq_data);
 extern int of_irq_to_resource(struct device_node *dev, int index,
 			      struct resource *r);
-extern int of_irq_count(struct device_node *dev);
 extern int of_irq_to_resource_table(struct device_node *dev,
 		struct resource *res, int nr_irqs);
 
 extern void of_irq_init(const struct of_device_id *matches);
 
+#ifdef CONFIG_OF_IRQ
+extern int of_irq_count(struct device_node *dev);
+#else
+static inline int of_irq_count(struct device_node *dev)
+{
+	return 0;
+}
+#endif
+
 #if defined(CONFIG_OF)
 /*
  * irq_of_parse_and_map() is used by all OF enabled platforms; but SPARC
-- 
cgit v1.2.3


From 746b5583c1a48a837f4891adaff5e09d61b204a6 Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@dev.mellanox.co.il>
Date: Wed, 23 Oct 2013 09:53:14 +0300
Subject: IB/mlx5: Multithreaded create MR

Use asynchronous commands to execute up to eight concurrent create MR
commands. This is to fill memory caches faster so we keep consuming
from there.  Also, increase timeout for shrinking caches to five
minutes.

Signed-off-by: Eli Cohen <eli@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
---
 drivers/infiniband/hw/mlx5/main.c                 |   3 +-
 drivers/infiniband/hw/mlx5/mlx5_ib.h              |   6 +
 drivers/infiniband/hw/mlx5/mr.c                   | 163 +++++++++++++++++-----
 drivers/infiniband/hw/mlx5/qp.c                   |   4 +-
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c     | 106 ++++++++++----
 drivers/net/ethernet/mellanox/mlx5/core/debugfs.c |   8 +-
 drivers/net/ethernet/mellanox/mlx5/core/mr.c      |  32 +++--
 include/linux/mlx5/driver.h                       |  17 ++-
 8 files changed, 255 insertions(+), 84 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index b1a6cb3a2809..306534109627 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -745,7 +745,8 @@ static int alloc_pa_mkey(struct mlx5_ib_dev *dev, u32 *key, u32 pdn)
 	seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
 	seg->start_addr = 0;
 
-	err = mlx5_core_create_mkey(&dev->mdev, &mr, in, sizeof(*in));
+	err = mlx5_core_create_mkey(&dev->mdev, &mr, in, sizeof(*in),
+				    NULL, NULL, NULL);
 	if (err) {
 		mlx5_ib_warn(dev, "failed to create mkey, %d\n", err);
 		goto err_in;
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 836be9157242..4c134d93d4fc 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -262,6 +262,9 @@ struct mlx5_ib_mr {
 	int			npages;
 	struct completion	done;
 	enum ib_wc_status	status;
+	struct mlx5_ib_dev     *dev;
+	struct mlx5_create_mkey_mbox_out out;
+	unsigned long		start;
 };
 
 struct mlx5_ib_fast_reg_page_list {
@@ -323,6 +326,7 @@ struct mlx5_cache_ent {
 	struct mlx5_ib_dev     *dev;
 	struct work_struct	work;
 	struct delayed_work	dwork;
+	int			pending;
 };
 
 struct mlx5_mr_cache {
@@ -358,6 +362,8 @@ struct mlx5_ib_dev {
 	spinlock_t			mr_lock;
 	struct mlx5_ib_resources	devr;
 	struct mlx5_mr_cache		cache;
+	struct timer_list		delay_timer;
+	int				fill_delay;
 };
 
 static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq)
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 3453580b1eb2..d7f202290747 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -35,11 +35,13 @@
 #include <linux/random.h>
 #include <linux/debugfs.h>
 #include <linux/export.h>
+#include <linux/delay.h>
 #include <rdma/ib_umem.h>
 #include "mlx5_ib.h"
 
 enum {
 	DEF_CACHE_SIZE	= 10,
+	MAX_PENDING_REG_MR = 8,
 };
 
 enum {
@@ -63,6 +65,57 @@ static int order2idx(struct mlx5_ib_dev *dev, int order)
 		return order - cache->ent[0].order;
 }
 
+static void reg_mr_callback(int status, void *context)
+{
+	struct mlx5_ib_mr *mr = context;
+	struct mlx5_ib_dev *dev = mr->dev;
+	struct mlx5_mr_cache *cache = &dev->cache;
+	int c = order2idx(dev, mr->order);
+	struct mlx5_cache_ent *ent = &cache->ent[c];
+	u8 key;
+	unsigned long delta = jiffies - mr->start;
+	unsigned long index;
+	unsigned long flags;
+
+	index = find_last_bit(&delta, 8 * sizeof(delta));
+	if (index == 64)
+		index = 0;
+
+	spin_lock_irqsave(&ent->lock, flags);
+	ent->pending--;
+	spin_unlock_irqrestore(&ent->lock, flags);
+	if (status) {
+		mlx5_ib_warn(dev, "async reg mr failed. status %d\n", status);
+		kfree(mr);
+		dev->fill_delay = 1;
+		mod_timer(&dev->delay_timer, jiffies + HZ);
+		return;
+	}
+
+	if (mr->out.hdr.status) {
+		mlx5_ib_warn(dev, "failed - status %d, syndorme 0x%x\n",
+			     mr->out.hdr.status,
+			     be32_to_cpu(mr->out.hdr.syndrome));
+		kfree(mr);
+		dev->fill_delay = 1;
+		mod_timer(&dev->delay_timer, jiffies + HZ);
+		return;
+	}
+
+	spin_lock_irqsave(&dev->mdev.priv.mkey_lock, flags);
+	key = dev->mdev.priv.mkey_key++;
+	spin_unlock_irqrestore(&dev->mdev.priv.mkey_lock, flags);
+	mr->mmr.key = mlx5_idx_to_mkey(be32_to_cpu(mr->out.mkey) & 0xffffff) | key;
+
+	cache->last_add = jiffies;
+
+	spin_lock_irqsave(&ent->lock, flags);
+	list_add_tail(&mr->list, &ent->head);
+	ent->cur++;
+	ent->size++;
+	spin_unlock_irqrestore(&ent->lock, flags);
+}
+
 static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
 {
 	struct mlx5_mr_cache *cache = &dev->cache;
@@ -78,36 +131,39 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
 		return -ENOMEM;
 
 	for (i = 0; i < num; i++) {
+		if (ent->pending >= MAX_PENDING_REG_MR) {
+			err = -EAGAIN;
+			break;
+		}
+
 		mr = kzalloc(sizeof(*mr), GFP_KERNEL);
 		if (!mr) {
 			err = -ENOMEM;
-			goto out;
+			break;
 		}
 		mr->order = ent->order;
 		mr->umred = 1;
+		mr->dev = dev;
 		in->seg.status = 1 << 6;
 		in->seg.xlt_oct_size = cpu_to_be32((npages + 1) / 2);
 		in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
 		in->seg.flags = MLX5_ACCESS_MODE_MTT | MLX5_PERM_UMR_EN;
 		in->seg.log2_page_size = 12;
 
+		spin_lock_irq(&ent->lock);
+		ent->pending++;
+		spin_unlock_irq(&ent->lock);
+		mr->start = jiffies;
 		err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in,
-					    sizeof(*in));
+					    sizeof(*in), reg_mr_callback,
+					    mr, &mr->out);
 		if (err) {
 			mlx5_ib_warn(dev, "create mkey failed %d\n", err);
 			kfree(mr);
-			goto out;
+			break;
 		}
-		cache->last_add = jiffies;
-
-		spin_lock(&ent->lock);
-		list_add_tail(&mr->list, &ent->head);
-		ent->cur++;
-		ent->size++;
-		spin_unlock(&ent->lock);
 	}
 
-out:
 	kfree(in);
 	return err;
 }
@@ -121,16 +177,16 @@ static void remove_keys(struct mlx5_ib_dev *dev, int c, int num)
 	int i;
 
 	for (i = 0; i < num; i++) {
-		spin_lock(&ent->lock);
+		spin_lock_irq(&ent->lock);
 		if (list_empty(&ent->head)) {
-			spin_unlock(&ent->lock);
+			spin_unlock_irq(&ent->lock);
 			return;
 		}
 		mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
 		list_del(&mr->list);
 		ent->cur--;
 		ent->size--;
-		spin_unlock(&ent->lock);
+		spin_unlock_irq(&ent->lock);
 		err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr);
 		if (err)
 			mlx5_ib_warn(dev, "failed destroy mkey\n");
@@ -162,9 +218,13 @@ static ssize_t size_write(struct file *filp, const char __user *buf,
 		return -EINVAL;
 
 	if (var > ent->size) {
-		err = add_keys(dev, c, var - ent->size);
-		if (err)
-			return err;
+		do {
+			err = add_keys(dev, c, var - ent->size);
+			if (err && err != -EAGAIN)
+				return err;
+
+			usleep_range(3000, 5000);
+		} while (err);
 	} else if (var < ent->size) {
 		remove_keys(dev, c, ent->size - var);
 	}
@@ -280,23 +340,37 @@ static void __cache_work_func(struct mlx5_cache_ent *ent)
 	struct mlx5_ib_dev *dev = ent->dev;
 	struct mlx5_mr_cache *cache = &dev->cache;
 	int i = order2idx(dev, ent->order);
+	int err;
 
 	if (cache->stopped)
 		return;
 
 	ent = &dev->cache.ent[i];
-	if (ent->cur < 2 * ent->limit) {
-		add_keys(dev, i, 1);
-		if (ent->cur < 2 * ent->limit)
-			queue_work(cache->wq, &ent->work);
+	if (ent->cur < 2 * ent->limit && !dev->fill_delay) {
+		err = add_keys(dev, i, 1);
+		if (ent->cur < 2 * ent->limit) {
+			if (err == -EAGAIN) {
+				mlx5_ib_dbg(dev, "returned eagain, order %d\n",
+					    i + 2);
+				queue_delayed_work(cache->wq, &ent->dwork,
+						   msecs_to_jiffies(3));
+			} else if (err) {
+				mlx5_ib_warn(dev, "command failed order %d, err %d\n",
+					     i + 2, err);
+				queue_delayed_work(cache->wq, &ent->dwork,
+						   msecs_to_jiffies(1000));
+			} else {
+				queue_work(cache->wq, &ent->work);
+			}
+		}
 	} else if (ent->cur > 2 * ent->limit) {
 		if (!someone_adding(cache) &&
-		    time_after(jiffies, cache->last_add + 60 * HZ)) {
+		    time_after(jiffies, cache->last_add + 300 * HZ)) {
 			remove_keys(dev, i, 1);
 			if (ent->cur > ent->limit)
 				queue_work(cache->wq, &ent->work);
 		} else {
-			queue_delayed_work(cache->wq, &ent->dwork, 60 * HZ);
+			queue_delayed_work(cache->wq, &ent->dwork, 300 * HZ);
 		}
 	}
 }
@@ -336,18 +410,18 @@ static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order)
 
 		mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i);
 
-		spin_lock(&ent->lock);
+		spin_lock_irq(&ent->lock);
 		if (!list_empty(&ent->head)) {
 			mr = list_first_entry(&ent->head, struct mlx5_ib_mr,
 					      list);
 			list_del(&mr->list);
 			ent->cur--;
-			spin_unlock(&ent->lock);
+			spin_unlock_irq(&ent->lock);
 			if (ent->cur < ent->limit)
 				queue_work(cache->wq, &ent->work);
 			break;
 		}
-		spin_unlock(&ent->lock);
+		spin_unlock_irq(&ent->lock);
 
 		queue_work(cache->wq, &ent->work);
 
@@ -374,12 +448,12 @@ static void free_cached_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
 		return;
 	}
 	ent = &cache->ent[c];
-	spin_lock(&ent->lock);
+	spin_lock_irq(&ent->lock);
 	list_add_tail(&mr->list, &ent->head);
 	ent->cur++;
 	if (ent->cur > 2 * ent->limit)
 		shrink = 1;
-	spin_unlock(&ent->lock);
+	spin_unlock_irq(&ent->lock);
 
 	if (shrink)
 		queue_work(cache->wq, &ent->work);
@@ -394,16 +468,16 @@ static void clean_keys(struct mlx5_ib_dev *dev, int c)
 
 	cancel_delayed_work(&ent->dwork);
 	while (1) {
-		spin_lock(&ent->lock);
+		spin_lock_irq(&ent->lock);
 		if (list_empty(&ent->head)) {
-			spin_unlock(&ent->lock);
+			spin_unlock_irq(&ent->lock);
 			return;
 		}
 		mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
 		list_del(&mr->list);
 		ent->cur--;
 		ent->size--;
-		spin_unlock(&ent->lock);
+		spin_unlock_irq(&ent->lock);
 		err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr);
 		if (err)
 			mlx5_ib_warn(dev, "failed destroy mkey\n");
@@ -464,6 +538,13 @@ static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)
 	debugfs_remove_recursive(dev->cache.root);
 }
 
+static void delay_time_func(unsigned long ctx)
+{
+	struct mlx5_ib_dev *dev = (struct mlx5_ib_dev *)ctx;
+
+	dev->fill_delay = 0;
+}
+
 int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
 {
 	struct mlx5_mr_cache *cache = &dev->cache;
@@ -479,6 +560,7 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
 		return -ENOMEM;
 	}
 
+	setup_timer(&dev->delay_timer, delay_time_func, (unsigned long)dev);
 	for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
 		INIT_LIST_HEAD(&cache->ent[i].head);
 		spin_lock_init(&cache->ent[i].lock);
@@ -522,6 +604,7 @@ int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev)
 		clean_keys(dev, i);
 
 	destroy_workqueue(dev->cache.wq);
+	del_timer_sync(&dev->delay_timer);
 
 	return 0;
 }
@@ -551,7 +634,8 @@ struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc)
 	seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
 	seg->start_addr = 0;
 
-	err = mlx5_core_create_mkey(mdev, &mr->mmr, in, sizeof(*in));
+	err = mlx5_core_create_mkey(mdev, &mr->mmr, in, sizeof(*in), NULL, NULL,
+				    NULL);
 	if (err)
 		goto err_in;
 
@@ -660,14 +744,14 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
 	int err;
 	int i;
 
-	for (i = 0; i < 10; i++) {
+	for (i = 0; i < 1; i++) {
 		mr = alloc_cached_mr(dev, order);
 		if (mr)
 			break;
 
 		err = add_keys(dev, order2idx(dev, order), 1);
-		if (err) {
-			mlx5_ib_warn(dev, "add_keys failed\n");
+		if (err && err != -EAGAIN) {
+			mlx5_ib_warn(dev, "add_keys failed, err %d\n", err);
 			break;
 		}
 	}
@@ -759,8 +843,10 @@ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr,
 	in->seg.xlt_oct_size = cpu_to_be32(get_octo_len(virt_addr, length, 1 << page_shift));
 	in->seg.log2_page_size = page_shift;
 	in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
-	in->xlat_oct_act_size = cpu_to_be32(get_octo_len(virt_addr, length, 1 << page_shift));
-	err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, inlen);
+	in->xlat_oct_act_size = cpu_to_be32(get_octo_len(virt_addr, length,
+							 1 << page_shift));
+	err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, inlen, NULL,
+				    NULL, NULL);
 	if (err) {
 		mlx5_ib_warn(dev, "create mkey failed\n");
 		goto err_2;
@@ -944,7 +1030,8 @@ struct ib_mr *mlx5_ib_alloc_fast_reg_mr(struct ib_pd *pd,
 	 * TBD not needed - issue 197292 */
 	in->seg.log2_page_size = PAGE_SHIFT;
 
-	err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, sizeof(*in));
+	err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, sizeof(*in), NULL,
+				    NULL, NULL);
 	kfree(in);
 	if (err)
 		goto err_free;
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 5659ea880741..e3881433f5d7 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -1744,6 +1744,7 @@ static void set_reg_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
 			MLX5_MKEY_MASK_PD		|
 			MLX5_MKEY_MASK_LR		|
 			MLX5_MKEY_MASK_LW		|
+			MLX5_MKEY_MASK_KEY		|
 			MLX5_MKEY_MASK_RR		|
 			MLX5_MKEY_MASK_RW		|
 			MLX5_MKEY_MASK_A		|
@@ -1800,7 +1801,8 @@ static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *w
 	seg->start_addr = cpu_to_be64(wr->wr.fast_reg.iova_start);
 	seg->len = cpu_to_be64(wr->wr.fast_reg.length);
 	seg->log2_page_size = wr->wr.fast_reg.page_shift;
-	seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
+	seg->qpn_mkey7_0 = cpu_to_be32(0xffffff00 |
+				       mlx5_mkey_variant(wr->wr.fast_reg.rkey));
 }
 
 static void set_frwr_pages(struct mlx5_wqe_data_seg *dseg,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 6ca30739625f..8675d26a678b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -98,6 +98,7 @@ enum {
 static struct mlx5_cmd_work_ent *alloc_cmd(struct mlx5_cmd *cmd,
 					   struct mlx5_cmd_msg *in,
 					   struct mlx5_cmd_msg *out,
+					   void *uout, int uout_size,
 					   mlx5_cmd_cbk_t cbk,
 					   void *context, int page_queue)
 {
@@ -110,6 +111,8 @@ static struct mlx5_cmd_work_ent *alloc_cmd(struct mlx5_cmd *cmd,
 
 	ent->in		= in;
 	ent->out	= out;
+	ent->uout	= uout;
+	ent->uout_size	= uout_size;
 	ent->callback	= cbk;
 	ent->context	= context;
 	ent->cmd	= cmd;
@@ -534,6 +537,7 @@ static void cmd_work_handler(struct work_struct *work)
 	ent->lay = lay;
 	memset(lay, 0, sizeof(*lay));
 	memcpy(lay->in, ent->in->first.data, sizeof(lay->in));
+	ent->op = be32_to_cpu(lay->in[0]) >> 16;
 	if (ent->in->next)
 		lay->in_ptr = cpu_to_be64(ent->in->next->dma);
 	lay->inlen = cpu_to_be32(ent->in->len);
@@ -628,7 +632,8 @@ static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent)
  *    2. page queue commands do not support asynchrous completion
  */
 static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
-			   struct mlx5_cmd_msg *out, mlx5_cmd_cbk_t callback,
+			   struct mlx5_cmd_msg *out, void *uout, int uout_size,
+			   mlx5_cmd_cbk_t callback,
 			   void *context, int page_queue, u8 *status)
 {
 	struct mlx5_cmd *cmd = &dev->cmd;
@@ -642,7 +647,8 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
 	if (callback && page_queue)
 		return -EINVAL;
 
-	ent = alloc_cmd(cmd, in, out, callback, context, page_queue);
+	ent = alloc_cmd(cmd, in, out, uout, uout_size, callback, context,
+			page_queue);
 	if (IS_ERR(ent))
 		return PTR_ERR(ent);
 
@@ -670,10 +676,10 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
 		op = be16_to_cpu(((struct mlx5_inbox_hdr *)in->first.data)->opcode);
 		if (op < ARRAY_SIZE(cmd->stats)) {
 			stats = &cmd->stats[op];
-			spin_lock(&stats->lock);
+			spin_lock_irq(&stats->lock);
 			stats->sum += ds;
 			++stats->n;
-			spin_unlock(&stats->lock);
+			spin_unlock_irq(&stats->lock);
 		}
 		mlx5_core_dbg_mask(dev, 1 << MLX5_CMD_TIME,
 				   "fw exec time for %s is %lld nsec\n",
@@ -826,7 +832,7 @@ static struct mlx5_cmd_msg *mlx5_alloc_cmd_msg(struct mlx5_core_dev *dev,
 	int n;
 	int i;
 
-	msg = kzalloc(sizeof(*msg), GFP_KERNEL);
+	msg = kzalloc(sizeof(*msg), flags);
 	if (!msg)
 		return ERR_PTR(-ENOMEM);
 
@@ -1109,6 +1115,19 @@ void mlx5_cmd_use_polling(struct mlx5_core_dev *dev)
 		up(&cmd->sem);
 }
 
+static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg)
+{
+	unsigned long flags;
+
+	if (msg->cache) {
+		spin_lock_irqsave(&msg->cache->lock, flags);
+		list_add_tail(&msg->list, &msg->cache->head);
+		spin_unlock_irqrestore(&msg->cache->lock, flags);
+	} else {
+		mlx5_free_cmd_msg(dev, msg);
+	}
+}
+
 void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, unsigned long vector)
 {
 	struct mlx5_cmd *cmd = &dev->cmd;
@@ -1117,6 +1136,10 @@ void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, unsigned long vector)
 	void *context;
 	int err;
 	int i;
+	ktime_t t1, t2, delta;
+	s64 ds;
+	struct mlx5_cmd_stats *stats;
+	unsigned long flags;
 
 	for (i = 0; i < (1 << cmd->log_sz); i++) {
 		if (test_bit(i, &vector)) {
@@ -1141,9 +1164,29 @@ void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, unsigned long vector)
 			}
 			free_ent(cmd, ent->idx);
 			if (ent->callback) {
+				t1 = timespec_to_ktime(ent->ts1);
+				t2 = timespec_to_ktime(ent->ts2);
+				delta = ktime_sub(t2, t1);
+				ds = ktime_to_ns(delta);
+				if (ent->op < ARRAY_SIZE(cmd->stats)) {
+					stats = &cmd->stats[ent->op];
+					spin_lock_irqsave(&stats->lock, flags);
+					stats->sum += ds;
+					++stats->n;
+					spin_unlock_irqrestore(&stats->lock, flags);
+				}
+
 				callback = ent->callback;
 				context = ent->context;
 				err = ent->ret;
+				if (!err)
+					err = mlx5_copy_from_msg(ent->uout,
+								 ent->out,
+								 ent->uout_size);
+
+				mlx5_free_cmd_msg(dev, ent->out);
+				free_msg(dev, ent->in);
+
 				free_cmd(ent);
 				callback(err, context);
 			} else {
@@ -1160,7 +1203,8 @@ static int status_to_err(u8 status)
 	return status ? -1 : 0; /* TBD more meaningful codes */
 }
 
-static struct mlx5_cmd_msg *alloc_msg(struct mlx5_core_dev *dev, int in_size)
+static struct mlx5_cmd_msg *alloc_msg(struct mlx5_core_dev *dev, int in_size,
+				      gfp_t gfp)
 {
 	struct mlx5_cmd_msg *msg = ERR_PTR(-ENOMEM);
 	struct mlx5_cmd *cmd = &dev->cmd;
@@ -1172,7 +1216,7 @@ static struct mlx5_cmd_msg *alloc_msg(struct mlx5_core_dev *dev, int in_size)
 		ent = &cmd->cache.med;
 
 	if (ent) {
-		spin_lock(&ent->lock);
+		spin_lock_irq(&ent->lock);
 		if (!list_empty(&ent->head)) {
 			msg = list_entry(ent->head.next, typeof(*msg), list);
 			/* For cached lists, we must explicitly state what is
@@ -1181,43 +1225,34 @@ static struct mlx5_cmd_msg *alloc_msg(struct mlx5_core_dev *dev, int in_size)
 			msg->len = in_size;
 			list_del(&msg->list);
 		}
-		spin_unlock(&ent->lock);
+		spin_unlock_irq(&ent->lock);
 	}
 
 	if (IS_ERR(msg))
-		msg = mlx5_alloc_cmd_msg(dev, GFP_KERNEL, in_size);
+		msg = mlx5_alloc_cmd_msg(dev, gfp, in_size);
 
 	return msg;
 }
 
-static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg)
-{
-	if (msg->cache) {
-		spin_lock(&msg->cache->lock);
-		list_add_tail(&msg->list, &msg->cache->head);
-		spin_unlock(&msg->cache->lock);
-	} else {
-		mlx5_free_cmd_msg(dev, msg);
-	}
-}
-
 static int is_manage_pages(struct mlx5_inbox_hdr *in)
 {
 	return be16_to_cpu(in->opcode) == MLX5_CMD_OP_MANAGE_PAGES;
 }
 
-int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
-		  int out_size)
+static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
+		    int out_size, mlx5_cmd_cbk_t callback, void *context)
 {
 	struct mlx5_cmd_msg *inb;
 	struct mlx5_cmd_msg *outb;
 	int pages_queue;
+	gfp_t gfp;
 	int err;
 	u8 status = 0;
 
 	pages_queue = is_manage_pages(in);
+	gfp = callback ? GFP_ATOMIC : GFP_KERNEL;
 
-	inb = alloc_msg(dev, in_size);
+	inb = alloc_msg(dev, in_size, gfp);
 	if (IS_ERR(inb)) {
 		err = PTR_ERR(inb);
 		return err;
@@ -1229,13 +1264,14 @@ int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
 		goto out_in;
 	}
 
-	outb = mlx5_alloc_cmd_msg(dev, GFP_KERNEL, out_size);
+	outb = mlx5_alloc_cmd_msg(dev, gfp, out_size);
 	if (IS_ERR(outb)) {
 		err = PTR_ERR(outb);
 		goto out_in;
 	}
 
-	err = mlx5_cmd_invoke(dev, inb, outb, NULL, NULL, pages_queue, &status);
+	err = mlx5_cmd_invoke(dev, inb, outb, out, out_size, callback, context,
+			      pages_queue, &status);
 	if (err)
 		goto out_out;
 
@@ -1248,14 +1284,30 @@ int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
 	err = mlx5_copy_from_msg(out, outb, out_size);
 
 out_out:
-	mlx5_free_cmd_msg(dev, outb);
+	if (!callback)
+		mlx5_free_cmd_msg(dev, outb);
 
 out_in:
-	free_msg(dev, inb);
+	if (!callback)
+		free_msg(dev, inb);
 	return err;
 }
+
+int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
+		  int out_size)
+{
+	return cmd_exec(dev, in, in_size, out, out_size, NULL, NULL);
+}
 EXPORT_SYMBOL(mlx5_cmd_exec);
 
+int mlx5_cmd_exec_cb(struct mlx5_core_dev *dev, void *in, int in_size,
+		     void *out, int out_size, mlx5_cmd_cbk_t callback,
+		     void *context)
+{
+	return cmd_exec(dev, in, in_size, out, out_size, callback, context);
+}
+EXPORT_SYMBOL(mlx5_cmd_exec_cb);
+
 static void destroy_msg_cache(struct mlx5_core_dev *dev)
 {
 	struct mlx5_cmd *cmd = &dev->cmd;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
index 9c7194b26ee2..80f6d127257a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
@@ -154,10 +154,10 @@ static ssize_t average_read(struct file *filp, char __user *buf, size_t count,
 		return 0;
 
 	stats = filp->private_data;
-	spin_lock(&stats->lock);
+	spin_lock_irq(&stats->lock);
 	if (stats->n)
 		field = div64_u64(stats->sum, stats->n);
-	spin_unlock(&stats->lock);
+	spin_unlock_irq(&stats->lock);
 	ret = snprintf(tbuf, sizeof(tbuf), "%llu\n", field);
 	if (ret > 0) {
 		if (copy_to_user(buf, tbuf, ret))
@@ -175,10 +175,10 @@ static ssize_t average_write(struct file *filp, const char __user *buf,
 	struct mlx5_cmd_stats *stats;
 
 	stats = filp->private_data;
-	spin_lock(&stats->lock);
+	spin_lock_irq(&stats->lock);
 	stats->sum = 0;
 	stats->n = 0;
-	spin_unlock(&stats->lock);
+	spin_unlock_irq(&stats->lock);
 
 	*pos += count;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
index 5b44e2e46daf..35e514dc7b7d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
@@ -37,31 +37,41 @@
 #include "mlx5_core.h"
 
 int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr,
-			  struct mlx5_create_mkey_mbox_in *in, int inlen)
+			  struct mlx5_create_mkey_mbox_in *in, int inlen,
+			  mlx5_cmd_cbk_t callback, void *context,
+			  struct mlx5_create_mkey_mbox_out *out)
 {
-	struct mlx5_create_mkey_mbox_out out;
+	struct mlx5_create_mkey_mbox_out lout;
 	int err;
 	u8 key;
 
-	memset(&out, 0, sizeof(out));
-	spin_lock(&dev->priv.mkey_lock);
+	memset(&lout, 0, sizeof(lout));
+	spin_lock_irq(&dev->priv.mkey_lock);
 	key = dev->priv.mkey_key++;
-	spin_unlock(&dev->priv.mkey_lock);
+	spin_unlock_irq(&dev->priv.mkey_lock);
 	in->seg.qpn_mkey7_0 |= cpu_to_be32(key);
 	in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_MKEY);
-	err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out));
+	if (callback) {
+		err = mlx5_cmd_exec_cb(dev, in, inlen, out, sizeof(*out),
+				       callback, context);
+		return err;
+	} else {
+		err = mlx5_cmd_exec(dev, in, inlen, &lout, sizeof(lout));
+	}
+
 	if (err) {
 		mlx5_core_dbg(dev, "cmd exec faile %d\n", err);
 		return err;
 	}
 
-	if (out.hdr.status) {
-		mlx5_core_dbg(dev, "status %d\n", out.hdr.status);
-		return mlx5_cmd_status_to_err(&out.hdr);
+	if (lout.hdr.status) {
+		mlx5_core_dbg(dev, "status %d\n", lout.hdr.status);
+		return mlx5_cmd_status_to_err(&lout.hdr);
 	}
 
-	mr->key = mlx5_idx_to_mkey(be32_to_cpu(out.mkey) & 0xffffff) | key;
-	mlx5_core_dbg(dev, "out 0x%x, key 0x%x, mkey 0x%x\n", be32_to_cpu(out.mkey), key, mr->key);
+	mr->key = mlx5_idx_to_mkey(be32_to_cpu(lout.mkey) & 0xffffff) | key;
+	mlx5_core_dbg(dev, "out 0x%x, key 0x%x, mkey 0x%x\n",
+		      be32_to_cpu(lout.mkey), key, mr->key);
 
 	return err;
 }
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 6b8c496572c8..513619a75695 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -557,9 +557,11 @@ typedef void (*mlx5_cmd_cbk_t)(int status, void *context);
 struct mlx5_cmd_work_ent {
 	struct mlx5_cmd_msg    *in;
 	struct mlx5_cmd_msg    *out;
+	void		       *uout;
+	int			uout_size;
 	mlx5_cmd_cbk_t		callback;
 	void		       *context;
-	int idx;
+	int			idx;
 	struct completion	done;
 	struct mlx5_cmd        *cmd;
 	struct work_struct	work;
@@ -570,6 +572,7 @@ struct mlx5_cmd_work_ent {
 	u8			token;
 	struct timespec		ts1;
 	struct timespec		ts2;
+	u16			op;
 };
 
 struct mlx5_pas {
@@ -653,6 +656,9 @@ void mlx5_cmd_use_polling(struct mlx5_core_dev *dev);
 int mlx5_cmd_status_to_err(struct mlx5_outbox_hdr *hdr);
 int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
 		  int out_size);
+int mlx5_cmd_exec_cb(struct mlx5_core_dev *dev, void *in, int in_size,
+		     void *out, int out_size, mlx5_cmd_cbk_t callback,
+		     void *context);
 int mlx5_cmd_alloc_uar(struct mlx5_core_dev *dev, u32 *uarn);
 int mlx5_cmd_free_uar(struct mlx5_core_dev *dev, u32 uarn);
 int mlx5_alloc_uuars(struct mlx5_core_dev *dev, struct mlx5_uuar_info *uuari);
@@ -676,7 +682,9 @@ int mlx5_core_query_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
 int mlx5_core_arm_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
 		      u16 lwm, int is_srq);
 int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr,
-			  struct mlx5_create_mkey_mbox_in *in, int inlen);
+			  struct mlx5_create_mkey_mbox_in *in, int inlen,
+			  mlx5_cmd_cbk_t callback, void *context,
+			  struct mlx5_create_mkey_mbox_out *out);
 int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr);
 int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr,
 			 struct mlx5_query_mkey_mbox_out *out, int outlen);
@@ -745,6 +753,11 @@ static inline u32 mlx5_idx_to_mkey(u32 mkey_idx)
 	return mkey_idx << 8;
 }
 
+static inline u8 mlx5_mkey_variant(u32 mkey)
+{
+	return mkey & 0xff;
+}
+
 enum {
 	MLX5_PROF_MASK_QP_SIZE		= (u64)1 << 0,
 	MLX5_PROF_MASK_MR_CACHE		= (u64)1 << 1,
-- 
cgit v1.2.3


From bf0bf77f6519e5dcd57a77b47e1d151c1e81b7ec Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@dev.mellanox.co.il>
Date: Wed, 23 Oct 2013 09:53:19 +0300
Subject: mlx5: Support communicating arbitrary host page size to firmware

Connect-IB firmware requires 4K pages to be communicated with the
driver. This patch breaks larger pages to 4K units to enable support
for architectures utilizing larger page size, such as PowerPC.  This
patch also fixes several places that referred to PAGE_SHIFT instead of
explicit 12 which is the inherent page shift on Connect-IB.

Signed-off-by: Eli Cohen <eli@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
---
 drivers/infiniband/hw/mlx5/cq.c                    |   2 +-
 drivers/infiniband/hw/mlx5/qp.c                    |   4 +-
 drivers/infiniband/hw/mlx5/srq.c                   |   4 +-
 drivers/net/ethernet/mellanox/mlx5/core/eq.c       |   2 +-
 .../net/ethernet/mellanox/mlx5/core/pagealloc.c    | 174 ++++++++++++++-------
 include/linux/mlx5/driver.h                        |   1 +
 6 files changed, 127 insertions(+), 60 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index 84591865e39d..eecac7b52bcb 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -620,7 +620,7 @@ static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
 	}
 	mlx5_fill_page_array(&cq->buf.buf, (*cqb)->pas);
 
-	(*cqb)->ctx.log_pg_sz = cq->buf.buf.page_shift - PAGE_SHIFT;
+	(*cqb)->ctx.log_pg_sz = cq->buf.buf.page_shift - 12;
 	*index = dev->mdev.priv.uuari.uars[0].index;
 
 	return 0;
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 8a36fd78c89f..ce14008a5702 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -551,7 +551,7 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
 	}
 	mlx5_ib_populate_pas(dev, qp->umem, page_shift, (*in)->pas, 0);
 	(*in)->ctx.log_pg_sz_remote_qpn =
-		cpu_to_be32((page_shift - PAGE_SHIFT) << 24);
+		cpu_to_be32((page_shift - 12) << 24);
 	(*in)->ctx.params2 = cpu_to_be32(offset << 6);
 
 	(*in)->ctx.qp_counter_set_usr_page = cpu_to_be32(uar_index);
@@ -648,7 +648,7 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev,
 		goto err_buf;
 	}
 	(*in)->ctx.qp_counter_set_usr_page = cpu_to_be32(uar_index);
-	(*in)->ctx.log_pg_sz_remote_qpn = cpu_to_be32((qp->buf.page_shift - PAGE_SHIFT) << 24);
+	(*in)->ctx.log_pg_sz_remote_qpn = cpu_to_be32((qp->buf.page_shift - 12) << 24);
 	/* Set "fast registration enabled" for all kernel QPs */
 	(*in)->ctx.params1 |= cpu_to_be32(1 << 11);
 	(*in)->ctx.sq_crq_size |= cpu_to_be16(1 << 4);
diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c
index f1e5845b42b0..dbc2c7188c5e 100644
--- a/drivers/infiniband/hw/mlx5/srq.c
+++ b/drivers/infiniband/hw/mlx5/srq.c
@@ -123,7 +123,7 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
 		goto err_in;
 	}
 
-	(*in)->ctx.log_pg_sz = page_shift - PAGE_SHIFT;
+	(*in)->ctx.log_pg_sz = page_shift - 12;
 	(*in)->ctx.pgoff_cqn = cpu_to_be32(offset << 26);
 
 	return 0;
@@ -192,7 +192,7 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
 	}
 	srq->wq_sig = !!srq_signature;
 
-	(*in)->ctx.log_pg_sz = page_shift - PAGE_SHIFT;
+	(*in)->ctx.log_pg_sz = page_shift - 12;
 
 	return 0;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index 2231d93cc7ad..6b4b436840bd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -354,7 +354,7 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
 	in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_EQ);
 	in->ctx.log_sz_usr_page = cpu_to_be32(ilog2(eq->nent) << 24 | uar->index);
 	in->ctx.intr = vecidx;
-	in->ctx.log_page_size = PAGE_SHIFT - 12;
+	in->ctx.log_page_size = eq->buf.page_shift - 12;
 	in->events_mask = cpu_to_be64(mask);
 
 	err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out));
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
index a0d0da35578c..013aa422adee 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
@@ -57,10 +57,13 @@ struct mlx5_pages_req {
 };
 
 struct fw_page {
-	struct rb_node	rb_node;
-	u64		addr;
-	struct page	*page;
-	u16		func_id;
+	struct rb_node		rb_node;
+	u64			addr;
+	struct page	       *page;
+	u16			func_id;
+	unsigned long		bitmask;
+	struct list_head	list;
+	unsigned		free_count;
 };
 
 struct mlx5_query_pages_inbox {
@@ -94,6 +97,11 @@ enum {
 	MAX_RECLAIM_TIME_MSECS	= 5000,
 };
 
+enum {
+	MLX5_MAX_RECLAIM_TIME_MILI	= 5000,
+	MLX5_NUM_4K_IN_PAGE		= PAGE_SIZE / 4096,
+};
+
 static int insert_page(struct mlx5_core_dev *dev, u64 addr, struct page *page, u16 func_id)
 {
 	struct rb_root *root = &dev->priv.page_root;
@@ -101,6 +109,7 @@ static int insert_page(struct mlx5_core_dev *dev, u64 addr, struct page *page, u
 	struct rb_node *parent = NULL;
 	struct fw_page *nfp;
 	struct fw_page *tfp;
+	int i;
 
 	while (*new) {
 		parent = *new;
@@ -113,25 +122,29 @@ static int insert_page(struct mlx5_core_dev *dev, u64 addr, struct page *page, u
 			return -EEXIST;
 	}
 
-	nfp = kmalloc(sizeof(*nfp), GFP_KERNEL);
+	nfp = kzalloc(sizeof(*nfp), GFP_KERNEL);
 	if (!nfp)
 		return -ENOMEM;
 
 	nfp->addr = addr;
 	nfp->page = page;
 	nfp->func_id = func_id;
+	nfp->free_count = MLX5_NUM_4K_IN_PAGE;
+	for (i = 0; i < MLX5_NUM_4K_IN_PAGE; i++)
+		set_bit(i, &nfp->bitmask);
 
 	rb_link_node(&nfp->rb_node, parent, new);
 	rb_insert_color(&nfp->rb_node, root);
+	list_add(&nfp->list, &dev->priv.free_list);
 
 	return 0;
 }
 
-static struct page *remove_page(struct mlx5_core_dev *dev, u64 addr)
+static struct fw_page *find_fw_page(struct mlx5_core_dev *dev, u64 addr)
 {
 	struct rb_root *root = &dev->priv.page_root;
 	struct rb_node *tmp = root->rb_node;
-	struct page *result = NULL;
+	struct fw_page *result = NULL;
 	struct fw_page *tfp;
 
 	while (tmp) {
@@ -141,9 +154,7 @@ static struct page *remove_page(struct mlx5_core_dev *dev, u64 addr)
 		} else if (tfp->addr > addr) {
 			tmp = tmp->rb_right;
 		} else {
-			rb_erase(&tfp->rb_node, root);
-			result = tfp->page;
-			kfree(tfp);
+			result = tfp;
 			break;
 		}
 	}
@@ -176,13 +187,97 @@ static int mlx5_cmd_query_pages(struct mlx5_core_dev *dev, u16 *func_id,
 	return err;
 }
 
+static int alloc_4k(struct mlx5_core_dev *dev, u64 *addr)
+{
+	struct fw_page *fp;
+	unsigned n;
+
+	if (list_empty(&dev->priv.free_list)) {
+		return -ENOMEM;
+		mlx5_core_warn(dev, "\n");
+	}
+
+	fp = list_entry(dev->priv.free_list.next, struct fw_page, list);
+	n = find_first_bit(&fp->bitmask, 8 * sizeof(fp->bitmask));
+	if (n >= MLX5_NUM_4K_IN_PAGE) {
+		mlx5_core_warn(dev, "alloc 4k bug\n");
+		return -ENOENT;
+	}
+	clear_bit(n, &fp->bitmask);
+	fp->free_count--;
+	if (!fp->free_count)
+		list_del(&fp->list);
+
+	*addr = fp->addr + n * 4096;
+
+	return 0;
+}
+
+static void free_4k(struct mlx5_core_dev *dev, u64 addr)
+{
+	struct fw_page *fwp;
+	int n;
+
+	fwp = find_fw_page(dev, addr & PAGE_MASK);
+	if (!fwp) {
+		mlx5_core_warn(dev, "page not found\n");
+		return;
+	}
+
+	n = (addr & ~PAGE_MASK) % 4096;
+	fwp->free_count++;
+	set_bit(n, &fwp->bitmask);
+	if (fwp->free_count == MLX5_NUM_4K_IN_PAGE) {
+		rb_erase(&fwp->rb_node, &dev->priv.page_root);
+		list_del(&fwp->list);
+		dma_unmap_page(&dev->pdev->dev, addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+		__free_page(fwp->page);
+		kfree(fwp);
+	} else if (fwp->free_count == 1) {
+		list_add(&fwp->list, &dev->priv.free_list);
+	}
+}
+
+static int alloc_system_page(struct mlx5_core_dev *dev, u16 func_id)
+{
+	struct page *page;
+	u64 addr;
+	int err;
+
+	page = alloc_page(GFP_HIGHUSER);
+	if (!page) {
+		mlx5_core_warn(dev, "failed to allocate page\n");
+		return -ENOMEM;
+	}
+	addr = dma_map_page(&dev->pdev->dev, page, 0,
+			    PAGE_SIZE, DMA_BIDIRECTIONAL);
+	if (dma_mapping_error(&dev->pdev->dev, addr)) {
+		mlx5_core_warn(dev, "failed dma mapping page\n");
+		err = -ENOMEM;
+		goto out_alloc;
+	}
+	err = insert_page(dev, addr, page, func_id);
+	if (err) {
+		mlx5_core_err(dev, "failed to track allocated page\n");
+		goto out_mapping;
+	}
+
+	return 0;
+
+out_mapping:
+	dma_unmap_page(&dev->pdev->dev, addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+
+out_alloc:
+	__free_page(page);
+
+	return err;
+}
 static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages,
 		      int notify_fail)
 {
 	struct mlx5_manage_pages_inbox *in;
 	struct mlx5_manage_pages_outbox out;
 	struct mlx5_manage_pages_inbox *nin;
-	struct page *page;
 	int inlen;
 	u64 addr;
 	int err;
@@ -197,27 +292,15 @@ static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages,
 	memset(&out, 0, sizeof(out));
 
 	for (i = 0; i < npages; i++) {
-		page = alloc_page(GFP_HIGHUSER);
-		if (!page) {
-			err = -ENOMEM;
-			mlx5_core_warn(dev, "failed to allocate page\n");
-			goto out_alloc;
-		}
-		addr = dma_map_page(&dev->pdev->dev, page, 0,
-				    PAGE_SIZE, DMA_BIDIRECTIONAL);
-		if (dma_mapping_error(&dev->pdev->dev, addr)) {
-			mlx5_core_warn(dev, "failed dma mapping page\n");
-			__free_page(page);
-			err = -ENOMEM;
-			goto out_alloc;
-		}
-		err = insert_page(dev, addr, page, func_id);
+retry:
+		err = alloc_4k(dev, &addr);
 		if (err) {
-			mlx5_core_err(dev, "failed to track allocated page\n");
-			dma_unmap_page(&dev->pdev->dev, addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
-			__free_page(page);
-			err = -ENOMEM;
-			goto out_alloc;
+			if (err == -ENOMEM)
+				err = alloc_system_page(dev, func_id);
+			if (err)
+				goto out_4k;
+
+			goto retry;
 		}
 		in->pas[i] = cpu_to_be64(addr);
 	}
@@ -227,7 +310,6 @@ static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages,
 	in->func_id = cpu_to_be16(func_id);
 	in->num_entries = cpu_to_be32(npages);
 	err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out));
-	mlx5_core_dbg(dev, "err %d\n", err);
 	if (err) {
 		mlx5_core_warn(dev, "func_id 0x%x, npages %d, err %d\n", func_id, npages, err);
 		goto out_alloc;
@@ -251,7 +333,7 @@ out_alloc:
 		nin = kzalloc(sizeof(*nin), GFP_KERNEL);
 		if (!nin) {
 			mlx5_core_warn(dev, "allocation failed\n");
-			goto unmap;
+			goto out_4k;
 		}
 		memset(&out, 0, sizeof(out));
 		nin->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MANAGE_PAGES);
@@ -261,19 +343,9 @@ out_alloc:
 		kfree(nin);
 	}
 
-unmap:
-	for (i--; i >= 0; i--) {
-		addr = be64_to_cpu(in->pas[i]);
-		page = remove_page(dev, addr);
-		if (!page) {
-			mlx5_core_err(dev, "BUG: can't remove page at addr 0x%llx\n",
-				      addr);
-			continue;
-		}
-		dma_unmap_page(&dev->pdev->dev, addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
-		__free_page(page);
-	}
-
+out_4k:
+	for (i--; i >= 0; i--)
+		free_4k(dev, be64_to_cpu(in->pas[i]));
 out_free:
 	mlx5_vfree(in);
 	return err;
@@ -284,7 +356,6 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages,
 {
 	struct mlx5_manage_pages_inbox   in;
 	struct mlx5_manage_pages_outbox *out;
-	struct page *page;
 	int num_claimed;
 	int outlen;
 	u64 addr;
@@ -323,13 +394,7 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages,
 
 	for (i = 0; i < num_claimed; i++) {
 		addr = be64_to_cpu(out->pas[i]);
-		page = remove_page(dev, addr);
-		if (!page) {
-			mlx5_core_warn(dev, "FW reported unknown DMA address 0x%llx\n", addr);
-		} else {
-			dma_unmap_page(&dev->pdev->dev, addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
-			__free_page(page);
-		}
+		free_4k(dev, addr);
 	}
 
 out_free:
@@ -435,6 +500,7 @@ int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev)
 void mlx5_pagealloc_init(struct mlx5_core_dev *dev)
 {
 	dev->priv.page_root = RB_ROOT;
+	INIT_LIST_HEAD(&dev->priv.free_list);
 }
 
 void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev)
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 513619a75695..554548cd3dd4 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -483,6 +483,7 @@ struct mlx5_priv {
 	struct rb_root		page_root;
 	int			fw_pages;
 	int			reg_pages;
+	struct list_head	free_list;
 
 	struct mlx5_core_health health;
 
-- 
cgit v1.2.3


From 87b8de492da34942fc554f2958a570ce0642296a Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@dev.mellanox.co.il>
Date: Wed, 23 Oct 2013 09:53:20 +0300
Subject: mlx5: Clear reserved area in set_hca_cap()

Firmware spec requires reserved fields to be cleared when calling
set_hca_cap.  Current code queries and copy to the set area, possibly
resulting in reserved bits not cleared. This patch copies only
writable fields to the set area.

Fix also typo - msx => max

Signed-off-by: Eli Cohen <eli@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/main.c | 35 +++++++++++++++++++++++---
 include/linux/mlx5/device.h                    |  9 +++++--
 2 files changed, 39 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index bc0f5fb66e24..40a9f5ed814d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -159,6 +159,36 @@ struct mlx5_reg_host_endianess {
 	u8      rsvd[15];
 };
 
+
+#define CAP_MASK(pos, size) ((u64)((1 << (size)) - 1) << (pos))
+
+enum {
+	MLX5_CAP_BITS_RW_MASK	= CAP_MASK(MLX5_CAP_OFF_CMDIF_CSUM, 2) |
+				  CAP_MASK(MLX5_CAP_OFF_DCT, 1),
+};
+
+/* selectively copy writable fields clearing any reserved area
+ */
+static void copy_rw_fields(struct mlx5_hca_cap *to, struct mlx5_hca_cap *from)
+{
+	u64 v64;
+
+	to->log_max_qp = from->log_max_qp & 0x1f;
+	to->log_max_ra_req_dc = from->log_max_ra_req_dc & 0x3f;
+	to->log_max_ra_res_dc = from->log_max_ra_res_dc & 0x3f;
+	to->log_max_ra_req_qp = from->log_max_ra_req_qp & 0x3f;
+	to->log_max_ra_res_qp = from->log_max_ra_res_qp & 0x3f;
+	to->log_max_atomic_size_qp = from->log_max_atomic_size_qp;
+	to->log_max_atomic_size_dc = from->log_max_atomic_size_dc;
+	v64 = be64_to_cpu(from->flags) & MLX5_CAP_BITS_RW_MASK;
+	to->flags = cpu_to_be64(v64);
+}
+
+enum {
+	HCA_CAP_OPMOD_GET_MAX	= 0,
+	HCA_CAP_OPMOD_GET_CUR	= 1,
+};
+
 static int handle_hca_cap(struct mlx5_core_dev *dev)
 {
 	struct mlx5_cmd_query_hca_cap_mbox_out *query_out = NULL;
@@ -180,7 +210,7 @@ static int handle_hca_cap(struct mlx5_core_dev *dev)
 	}
 
 	query_ctx.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_HCA_CAP);
-	query_ctx.hdr.opmod  = cpu_to_be16(0x1);
+	query_ctx.hdr.opmod  = cpu_to_be16(HCA_CAP_OPMOD_GET_CUR);
 	err = mlx5_cmd_exec(dev, &query_ctx, sizeof(query_ctx),
 				 query_out, sizeof(*query_out));
 	if (err)
@@ -192,8 +222,7 @@ static int handle_hca_cap(struct mlx5_core_dev *dev)
 		goto query_ex;
 	}
 
-	memcpy(&set_ctx->hca_cap, &query_out->hca_cap,
-	       sizeof(set_ctx->hca_cap));
+	copy_rw_fields(&set_ctx->hca_cap, &query_out->hca_cap);
 
 	if (dev->profile->mask & MLX5_PROF_MASK_QP_SIZE)
 		set_ctx->hca_cap.log_max_qp = dev->profile->log_max_qp;
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 5eb4e31af22b..3d789f43e34b 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -230,6 +230,11 @@ enum {
 	MLX5_MAX_PAGE_SHIFT		= 31
 };
 
+enum {
+	MLX5_CAP_OFF_DCT		= 41,
+	MLX5_CAP_OFF_CMDIF_CSUM		= 46,
+};
+
 struct mlx5_inbox_hdr {
 	__be16		opcode;
 	u8		rsvd[4];
@@ -319,9 +324,9 @@ struct mlx5_hca_cap {
 	u8	rsvd25[42];
 	__be16  log_uar_page_sz;
 	u8	rsvd26[28];
-	u8	log_msx_atomic_size_qp;
+	u8	log_max_atomic_size_qp;
 	u8	rsvd27[2];
-	u8	log_msx_atomic_size_dc;
+	u8	log_max_atomic_size_dc;
 	u8	rsvd28[76];
 };
 
-- 
cgit v1.2.3


From 1b77d2bd753d119eedcbc08fda58934307676554 Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@dev.mellanox.co.il>
Date: Thu, 24 Oct 2013 12:01:03 +0300
Subject: mlx5: Use enum to indicate adapter page size

The Connect-IB adapter has an inherent page size which equals 4K.
Define an new enum that equals the page shift and use it instead of
using the value 12 throughout the code.

Signed-off-by: Eli Cohen <eli@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
---
 drivers/infiniband/hw/mlx5/cq.c              | 2 +-
 drivers/infiniband/hw/mlx5/qp.c              | 5 +++--
 drivers/infiniband/hw/mlx5/srq.c             | 4 ++--
 drivers/net/ethernet/mellanox/mlx5/core/eq.c | 2 +-
 include/linux/mlx5/device.h                  | 4 ++++
 5 files changed, 11 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index eecac7b52bcb..28344773f640 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -620,7 +620,7 @@ static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
 	}
 	mlx5_fill_page_array(&cq->buf.buf, (*cqb)->pas);
 
-	(*cqb)->ctx.log_pg_sz = cq->buf.buf.page_shift - 12;
+	(*cqb)->ctx.log_pg_sz = cq->buf.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT;
 	*index = dev->mdev.priv.uuari.uars[0].index;
 
 	return 0;
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 5bcf57943b84..7c6b4ba49bec 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -551,7 +551,7 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
 	}
 	mlx5_ib_populate_pas(dev, qp->umem, page_shift, (*in)->pas, 0);
 	(*in)->ctx.log_pg_sz_remote_qpn =
-		cpu_to_be32((page_shift - 12) << 24);
+		cpu_to_be32((page_shift - MLX5_ADAPTER_PAGE_SHIFT) << 24);
 	(*in)->ctx.params2 = cpu_to_be32(offset << 6);
 
 	(*in)->ctx.qp_counter_set_usr_page = cpu_to_be32(uar_index);
@@ -648,7 +648,8 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev,
 		goto err_buf;
 	}
 	(*in)->ctx.qp_counter_set_usr_page = cpu_to_be32(uar_index);
-	(*in)->ctx.log_pg_sz_remote_qpn = cpu_to_be32((qp->buf.page_shift - 12) << 24);
+	(*in)->ctx.log_pg_sz_remote_qpn =
+		cpu_to_be32((qp->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT) << 24);
 	/* Set "fast registration enabled" for all kernel QPs */
 	(*in)->ctx.params1 |= cpu_to_be32(1 << 11);
 	(*in)->ctx.sq_crq_size |= cpu_to_be16(1 << 4);
diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c
index dbc2c7188c5e..210b3eaf188a 100644
--- a/drivers/infiniband/hw/mlx5/srq.c
+++ b/drivers/infiniband/hw/mlx5/srq.c
@@ -123,7 +123,7 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
 		goto err_in;
 	}
 
-	(*in)->ctx.log_pg_sz = page_shift - 12;
+	(*in)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
 	(*in)->ctx.pgoff_cqn = cpu_to_be32(offset << 26);
 
 	return 0;
@@ -192,7 +192,7 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
 	}
 	srq->wq_sig = !!srq_signature;
 
-	(*in)->ctx.log_pg_sz = page_shift - 12;
+	(*in)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
 
 	return 0;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index 6b4b436840bd..64a61b286b2c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -354,7 +354,7 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
 	in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_EQ);
 	in->ctx.log_sz_usr_page = cpu_to_be32(ilog2(eq->nent) << 24 | uar->index);
 	in->ctx.intr = vecidx;
-	in->ctx.log_page_size = eq->buf.page_shift - 12;
+	in->ctx.log_page_size = eq->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT;
 	in->events_mask = cpu_to_be64(mask);
 
 	err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out));
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 3d789f43e34b..da78875807fc 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -230,6 +230,10 @@ enum {
 	MLX5_MAX_PAGE_SHIFT		= 31
 };
 
+enum {
+	MLX5_ADAPTER_PAGE_SHIFT		= 12
+};
+
 enum {
 	MLX5_CAP_OFF_DCT		= 41,
 	MLX5_CAP_OFF_CMDIF_CSUM		= 46,
-- 
cgit v1.2.3


From 4d8981f6b784ae445a28de909d77e27836c3ed44 Mon Sep 17 00:00:00 2001
From: Michal Simek <michal.simek@xilinx.com>
Date: Tue, 29 Oct 2013 16:02:00 +0100
Subject: ARM: 7871/1: amba: Extend number of IRQS

Xilinx Zynq pl330 dma driver has 9 irqs which all have to
be used by the driver to get it work properly.

Signed-off-by: Michal Simek <michal.simek@xilinx.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 include/linux/amba/bus.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h
index 43ec7e247a80..b327a1b1b7e8 100644
--- a/include/linux/amba/bus.h
+++ b/include/linux/amba/bus.h
@@ -21,7 +21,7 @@
 #include <linux/resource.h>
 #include <linux/regulator/consumer.h>
 
-#define AMBA_NR_IRQS	2
+#define AMBA_NR_IRQS	9
 #define AMBA_CID	0xb105f00d
 
 struct clk;
-- 
cgit v1.2.3


From 48a066e72d970a3e225a9c18690d570c736fc455 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 29 Sep 2013 22:06:07 -0400
Subject: RCU'd vfsmounts

* RCU-delayed freeing of vfsmounts
* vfsmount_lock replaced with a seqlock (mount_lock)
* sequence number from mount_lock is stored in nameidata->m_seq and
used when we exit RCU mode
* new vfsmount flag - MNT_SYNC_UMOUNT.  Set by umount_tree() when its
caller knows that vfsmount will have no surviving references.
* synchronize_rcu() done between unlocking namespace_sem in namespace_unlock()
and doing pending mntput().
* new helper: legitimize_mnt(mnt, seq).  Checks the mount_lock sequence
number against seq, then grabs reference to mnt.  Then it rechecks mount_lock
again to close the race and either returns success or drops the reference it
has acquired.  The subtle point is that in case of MNT_SYNC_UMOUNT we can
simply decrement the refcount and sod off - aforementioned synchronize_rcu()
makes sure that final mntput() won't come until we leave RCU mode.  We need
that, since we don't want to end up with some lazy pathwalk racing with
umount() and stealing the final mntput() from it - caller of umount() may
expect it to return only once the fs is shut down and we don't want to break
that.  In other cases (i.e. with MNT_SYNC_UMOUNT absent) we have to do
full-blown mntput() in case of mount_lock sequence number mismatch happening
just as we'd grabbed the reference, but in those cases we won't be stealing
the final mntput() from anything that would care.
* mntput_no_expire() doesn't lock anything on the fast path now.  Incidentally,
SMP and UP cases are handled the same way - no ifdefs there.
* normal pathname resolution does *not* do any writes to mount_lock.  It does,
of course, bump the refcounts of vfsmount and dentry in the very end, but that's
it.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/dcache.c           |  20 +++++---
 fs/mount.h            |  10 ++--
 fs/namei.c            |  50 ++++++++++---------
 fs/namespace.c        | 135 ++++++++++++++++++++++++++++++++------------------
 include/linux/mount.h |   2 +
 include/linux/namei.h |   2 +-
 6 files changed, 136 insertions(+), 83 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dcache.c b/fs/dcache.c
index eb0978da1bd4..aafa2a146434 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2887,24 +2887,28 @@ static int prepend_path(const struct path *path,
 	struct vfsmount *vfsmnt = path->mnt;
 	struct mount *mnt = real_mount(vfsmnt);
 	int error = 0;
-	unsigned seq = 0;
+	unsigned seq, m_seq = 0;
 	char *bptr;
 	int blen;
 
-	br_read_lock(&vfsmount_lock);
 	rcu_read_lock();
+restart_mnt:
+	read_seqbegin_or_lock(&mount_lock, &m_seq);
+	seq = 0;
 restart:
 	bptr = *buffer;
 	blen = *buflen;
+	error = 0;
 	read_seqbegin_or_lock(&rename_lock, &seq);
 	while (dentry != root->dentry || vfsmnt != root->mnt) {
 		struct dentry * parent;
 
 		if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) {
+			struct mount *parent = ACCESS_ONCE(mnt->mnt_parent);
 			/* Global root? */
-			if (mnt_has_parent(mnt)) {
-				dentry = mnt->mnt_mountpoint;
-				mnt = mnt->mnt_parent;
+			if (mnt != parent) {
+				dentry = ACCESS_ONCE(mnt->mnt_mountpoint);
+				mnt = parent;
 				vfsmnt = &mnt->mnt;
 				continue;
 			}
@@ -2938,7 +2942,11 @@ restart:
 		goto restart;
 	}
 	done_seqretry(&rename_lock, seq);
-	br_read_unlock(&vfsmount_lock);
+	if (need_seqretry(&mount_lock, m_seq)) {
+		m_seq = 1;
+		goto restart_mnt;
+	}
+	done_seqretry(&mount_lock, m_seq);
 
 	if (error >= 0 && bptr == *buffer) {
 		if (--blen < 0)
diff --git a/fs/mount.h b/fs/mount.h
index f0866076de6e..d64c594be6c4 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -1,7 +1,6 @@
 #include <linux/mount.h>
 #include <linux/seq_file.h>
 #include <linux/poll.h>
-#include <linux/lglock.h>
 
 struct mnt_namespace {
 	atomic_t		count;
@@ -30,6 +29,7 @@ struct mount {
 	struct mount *mnt_parent;
 	struct dentry *mnt_mountpoint;
 	struct vfsmount mnt;
+	struct rcu_head mnt_rcu;
 #ifdef CONFIG_SMP
 	struct mnt_pcp __percpu *mnt_pcp;
 #else
@@ -80,21 +80,23 @@ static inline int is_mounted(struct vfsmount *mnt)
 extern struct mount *__lookup_mnt(struct vfsmount *, struct dentry *);
 extern struct mount *__lookup_mnt_last(struct vfsmount *, struct dentry *);
 
+extern bool legitimize_mnt(struct vfsmount *, unsigned);
+
 static inline void get_mnt_ns(struct mnt_namespace *ns)
 {
 	atomic_inc(&ns->count);
 }
 
-extern struct lglock vfsmount_lock;
+extern seqlock_t mount_lock;
 
 static inline void lock_mount_hash(void)
 {
-	br_write_lock(&vfsmount_lock);
+	write_seqlock(&mount_lock);
 }
 
 static inline void unlock_mount_hash(void)
 {
-	br_write_unlock(&vfsmount_lock);
+	write_sequnlock(&mount_lock);
 }
 
 struct proc_mounts {
diff --git a/fs/namei.c b/fs/namei.c
index 1f844fbfce72..cb0ebae07e52 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -484,14 +484,12 @@ EXPORT_SYMBOL(path_put);
 
 static inline void lock_rcu_walk(void)
 {
-	br_read_lock(&vfsmount_lock);
 	rcu_read_lock();
 }
 
 static inline void unlock_rcu_walk(void)
 {
 	rcu_read_unlock();
-	br_read_unlock(&vfsmount_lock);
 }
 
 /**
@@ -512,26 +510,23 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry)
 	BUG_ON(!(nd->flags & LOOKUP_RCU));
 
 	/*
-	 * Get a reference to the parent first: we're
-	 * going to make "path_put(nd->path)" valid in
-	 * non-RCU context for "terminate_walk()".
-	 *
-	 * If this doesn't work, return immediately with
-	 * RCU walking still active (and then we will do
-	 * the RCU walk cleanup in terminate_walk()).
+	 * After legitimizing the bastards, terminate_walk()
+	 * will do the right thing for non-RCU mode, and all our
+	 * subsequent exit cases should rcu_read_unlock()
+	 * before returning.  Do vfsmount first; if dentry
+	 * can't be legitimized, just set nd->path.dentry to NULL
+	 * and rely on dput(NULL) being a no-op.
 	 */
-	if (!lockref_get_not_dead(&parent->d_lockref))
+	if (!legitimize_mnt(nd->path.mnt, nd->m_seq))
 		return -ECHILD;
-
-	/*
-	 * After the mntget(), we terminate_walk() will do
-	 * the right thing for non-RCU mode, and all our
-	 * subsequent exit cases should unlock_rcu_walk()
-	 * before returning.
-	 */
-	mntget(nd->path.mnt);
 	nd->flags &= ~LOOKUP_RCU;
 
+	if (!lockref_get_not_dead(&parent->d_lockref)) {
+		nd->path.dentry = NULL;	
+		unlock_rcu_walk();
+		return -ECHILD;
+	}
+
 	/*
 	 * For a negative lookup, the lookup sequence point is the parents
 	 * sequence point, and it only needs to revalidate the parent dentry.
@@ -608,16 +603,21 @@ static int complete_walk(struct nameidata *nd)
 		if (!(nd->flags & LOOKUP_ROOT))
 			nd->root.mnt = NULL;
 
+		if (!legitimize_mnt(nd->path.mnt, nd->m_seq)) {
+			unlock_rcu_walk();
+			return -ECHILD;
+		}
 		if (unlikely(!lockref_get_not_dead(&dentry->d_lockref))) {
 			unlock_rcu_walk();
+			mntput(nd->path.mnt);
 			return -ECHILD;
 		}
 		if (read_seqcount_retry(&dentry->d_seq, nd->seq)) {
 			unlock_rcu_walk();
 			dput(dentry);
+			mntput(nd->path.mnt);
 			return -ECHILD;
 		}
-		mntget(nd->path.mnt);
 		unlock_rcu_walk();
 	}
 
@@ -909,15 +909,15 @@ int follow_up(struct path *path)
 	struct mount *parent;
 	struct dentry *mountpoint;
 
-	br_read_lock(&vfsmount_lock);
+	read_seqlock_excl(&mount_lock);
 	parent = mnt->mnt_parent;
 	if (parent == mnt) {
-		br_read_unlock(&vfsmount_lock);
+		read_sequnlock_excl(&mount_lock);
 		return 0;
 	}
 	mntget(&parent->mnt);
 	mountpoint = dget(mnt->mnt_mountpoint);
-	br_read_unlock(&vfsmount_lock);
+	read_sequnlock_excl(&mount_lock);
 	dput(path->dentry);
 	path->dentry = mountpoint;
 	mntput(path->mnt);
@@ -1048,8 +1048,8 @@ static int follow_managed(struct path *path, unsigned flags)
 
 			/* Something is mounted on this dentry in another
 			 * namespace and/or whatever was mounted there in this
-			 * namespace got unmounted before we managed to get the
-			 * vfsmount_lock */
+			 * namespace got unmounted before lookup_mnt() could
+			 * get it */
 		}
 
 		/* Handle an automount point */
@@ -1864,6 +1864,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
 		if (flags & LOOKUP_RCU) {
 			lock_rcu_walk();
 			nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
+			nd->m_seq = read_seqbegin(&mount_lock);
 		} else {
 			path_get(&nd->path);
 		}
@@ -1872,6 +1873,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
 
 	nd->root.mnt = NULL;
 
+	nd->m_seq = read_seqbegin(&mount_lock);
 	if (*name=='/') {
 		if (flags & LOOKUP_RCU) {
 			lock_rcu_walk();
diff --git a/fs/namespace.c b/fs/namespace.c
index 500202ce10db..ac2ce8a766e1 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -53,7 +53,7 @@ EXPORT_SYMBOL_GPL(fs_kobj);
  * It should be taken for write in all cases where the vfsmount
  * tree or hash is modified or when a vfsmount structure is modified.
  */
-DEFINE_BRLOCK(vfsmount_lock);
+__cacheline_aligned_in_smp DEFINE_SEQLOCK(mount_lock);
 
 static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry)
 {
@@ -547,16 +547,38 @@ static void free_vfsmnt(struct mount *mnt)
 	kmem_cache_free(mnt_cache, mnt);
 }
 
+/* call under rcu_read_lock */
+bool legitimize_mnt(struct vfsmount *bastard, unsigned seq)
+{
+	struct mount *mnt;
+	if (read_seqretry(&mount_lock, seq))
+		return false;
+	if (bastard == NULL)
+		return true;
+	mnt = real_mount(bastard);
+	mnt_add_count(mnt, 1);
+	if (likely(!read_seqretry(&mount_lock, seq)))
+		return true;
+	if (bastard->mnt_flags & MNT_SYNC_UMOUNT) {
+		mnt_add_count(mnt, -1);
+		return false;
+	}
+	rcu_read_unlock();
+	mntput(bastard);
+	rcu_read_lock();
+	return false;
+}
+
 /*
  * find the first mount at @dentry on vfsmount @mnt.
- * vfsmount_lock must be held for read or write.
+ * call under rcu_read_lock()
  */
 struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry)
 {
 	struct list_head *head = mount_hashtable + hash(mnt, dentry);
 	struct mount *p;
 
-	list_for_each_entry(p, head, mnt_hash)
+	list_for_each_entry_rcu(p, head, mnt_hash)
 		if (&p->mnt_parent->mnt == mnt && p->mnt_mountpoint == dentry)
 			return p;
 	return NULL;
@@ -564,7 +586,7 @@ struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry)
 
 /*
  * find the last mount at @dentry on vfsmount @mnt.
- * vfsmount_lock must be held for read or write.
+ * mount_lock must be held.
  */
 struct mount *__lookup_mnt_last(struct vfsmount *mnt, struct dentry *dentry)
 {
@@ -596,17 +618,17 @@ struct mount *__lookup_mnt_last(struct vfsmount *mnt, struct dentry *dentry)
 struct vfsmount *lookup_mnt(struct path *path)
 {
 	struct mount *child_mnt;
+	struct vfsmount *m;
+	unsigned seq;
 
-	br_read_lock(&vfsmount_lock);
-	child_mnt = __lookup_mnt(path->mnt, path->dentry);
-	if (child_mnt) {
-		mnt_add_count(child_mnt, 1);
-		br_read_unlock(&vfsmount_lock);
-		return &child_mnt->mnt;
-	} else {
-		br_read_unlock(&vfsmount_lock);
-		return NULL;
-	}
+	rcu_read_lock();
+	do {
+		seq = read_seqbegin(&mount_lock);
+		child_mnt = __lookup_mnt(path->mnt, path->dentry);
+		m = child_mnt ? &child_mnt->mnt : NULL;
+	} while (!legitimize_mnt(m, seq));
+	rcu_read_unlock();
+	return m;
 }
 
 static struct mountpoint *new_mountpoint(struct dentry *dentry)
@@ -874,38 +896,46 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
 	return ERR_PTR(err);
 }
 
+static void delayed_free(struct rcu_head *head)
+{
+	struct mount *mnt = container_of(head, struct mount, mnt_rcu);
+	kfree(mnt->mnt_devname);
+#ifdef CONFIG_SMP
+	free_percpu(mnt->mnt_pcp);
+#endif
+	kmem_cache_free(mnt_cache, mnt);
+}
+
 static void mntput_no_expire(struct mount *mnt)
 {
 put_again:
-#ifdef CONFIG_SMP
-	br_read_lock(&vfsmount_lock);
-	if (likely(mnt->mnt_ns)) {
-		/* shouldn't be the last one */
-		mnt_add_count(mnt, -1);
-		br_read_unlock(&vfsmount_lock);
+	rcu_read_lock();
+	mnt_add_count(mnt, -1);
+	if (likely(mnt->mnt_ns)) { /* shouldn't be the last one */
+		rcu_read_unlock();
 		return;
 	}
-	br_read_unlock(&vfsmount_lock);
-
 	lock_mount_hash();
-	mnt_add_count(mnt, -1);
 	if (mnt_get_count(mnt)) {
+		rcu_read_unlock();
 		unlock_mount_hash();
 		return;
 	}
-#else
-	mnt_add_count(mnt, -1);
-	if (likely(mnt_get_count(mnt)))
-		return;
-	lock_mount_hash();
-#endif
 	if (unlikely(mnt->mnt_pinned)) {
 		mnt_add_count(mnt, mnt->mnt_pinned + 1);
 		mnt->mnt_pinned = 0;
+		rcu_read_unlock();
 		unlock_mount_hash();
 		acct_auto_close_mnt(&mnt->mnt);
 		goto put_again;
 	}
+	if (unlikely(mnt->mnt.mnt_flags & MNT_DOOMED)) {
+		rcu_read_unlock();
+		unlock_mount_hash();
+		return;
+	}
+	mnt->mnt.mnt_flags |= MNT_DOOMED;
+	rcu_read_unlock();
 
 	list_del(&mnt->mnt_instance);
 	unlock_mount_hash();
@@ -924,7 +954,8 @@ put_again:
 	fsnotify_vfsmount_delete(&mnt->mnt);
 	dput(mnt->mnt.mnt_root);
 	deactivate_super(mnt->mnt.mnt_sb);
-	free_vfsmnt(mnt);
+	mnt_free_id(mnt);
+	call_rcu(&mnt->mnt_rcu, delayed_free);
 }
 
 void mntput(struct vfsmount *mnt)
@@ -1137,6 +1168,8 @@ static void namespace_unlock(void)
 	list_splice_init(&unmounted, &head);
 	up_write(&namespace_sem);
 
+	synchronize_rcu();
+
 	while (!list_empty(&head)) {
 		mnt = list_first_entry(&head, struct mount, mnt_hash);
 		list_del_init(&mnt->mnt_hash);
@@ -1152,10 +1185,13 @@ static inline void namespace_lock(void)
 }
 
 /*
- * vfsmount lock must be held for write
+ * mount_lock must be held
  * namespace_sem must be held for write
+ * how = 0 => just this tree, don't propagate
+ * how = 1 => propagate; we know that nobody else has reference to any victims
+ * how = 2 => lazy umount
  */
-void umount_tree(struct mount *mnt, int propagate)
+void umount_tree(struct mount *mnt, int how)
 {
 	LIST_HEAD(tmp_list);
 	struct mount *p;
@@ -1163,7 +1199,7 @@ void umount_tree(struct mount *mnt, int propagate)
 	for (p = mnt; p; p = next_mnt(p, mnt))
 		list_move(&p->mnt_hash, &tmp_list);
 
-	if (propagate)
+	if (how)
 		propagate_umount(&tmp_list);
 
 	list_for_each_entry(p, &tmp_list, mnt_hash) {
@@ -1171,6 +1207,8 @@ void umount_tree(struct mount *mnt, int propagate)
 		list_del_init(&p->mnt_list);
 		__touch_mnt_namespace(p->mnt_ns);
 		p->mnt_ns = NULL;
+		if (how < 2)
+			p->mnt.mnt_flags |= MNT_SYNC_UMOUNT;
 		list_del_init(&p->mnt_child);
 		if (mnt_has_parent(p)) {
 			put_mountpoint(p->mnt_mp);
@@ -1262,14 +1300,18 @@ static int do_umount(struct mount *mnt, int flags)
 	lock_mount_hash();
 	event++;
 
-	if (!(flags & MNT_DETACH))
-		shrink_submounts(mnt);
-
-	retval = -EBUSY;
-	if (flags & MNT_DETACH || !propagate_mount_busy(mnt, 2)) {
+	if (flags & MNT_DETACH) {
 		if (!list_empty(&mnt->mnt_list))
-			umount_tree(mnt, 1);
+			umount_tree(mnt, 2);
 		retval = 0;
+	} else {
+		shrink_submounts(mnt);
+		retval = -EBUSY;
+		if (!propagate_mount_busy(mnt, 2)) {
+			if (!list_empty(&mnt->mnt_list))
+				umount_tree(mnt, 1);
+			retval = 0;
+		}
 	}
 	unlock_mount_hash();
 	namespace_unlock();
@@ -1955,7 +1997,7 @@ static int do_add_mount(struct mount *newmnt, struct path *path, int mnt_flags)
 	struct mount *parent;
 	int err;
 
-	mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL);
+	mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | MNT_DOOMED | MNT_SYNC_UMOUNT);
 
 	mp = lock_mount(path);
 	if (IS_ERR(mp))
@@ -2172,7 +2214,7 @@ resume:
  * process a list of expirable mountpoints with the intent of discarding any
  * submounts of a specific parent mountpoint
  *
- * vfsmount_lock must be held for write
+ * mount_lock must be held for write
  */
 static void shrink_submounts(struct mount *mnt)
 {
@@ -2558,7 +2600,7 @@ out_type:
 /*
  * Return true if path is reachable from root
  *
- * namespace_sem or vfsmount_lock is held
+ * namespace_sem or mount_lock is held
  */
 bool is_path_reachable(struct mount *mnt, struct dentry *dentry,
 			 const struct path *root)
@@ -2573,9 +2615,9 @@ bool is_path_reachable(struct mount *mnt, struct dentry *dentry,
 int path_is_under(struct path *path1, struct path *path2)
 {
 	int res;
-	br_read_lock(&vfsmount_lock);
+	read_seqlock_excl(&mount_lock);
 	res = is_path_reachable(real_mount(path1->mnt), path1->dentry, path2);
-	br_read_unlock(&vfsmount_lock);
+	read_sequnlock_excl(&mount_lock);
 	return res;
 }
 EXPORT_SYMBOL(path_is_under);
@@ -2748,8 +2790,6 @@ void __init mnt_init(void)
 	for (u = 0; u < HASH_SIZE; u++)
 		INIT_LIST_HEAD(&mountpoint_hashtable[u]);
 
-	br_lock_init(&vfsmount_lock);
-
 	err = sysfs_init();
 	if (err)
 		printk(KERN_WARNING "%s: sysfs_init error: %d\n",
@@ -2788,9 +2828,8 @@ void kern_unmount(struct vfsmount *mnt)
 {
 	/* release long term mount so mount point can be released */
 	if (!IS_ERR_OR_NULL(mnt)) {
-		lock_mount_hash();
 		real_mount(mnt)->mnt_ns = NULL;
-		unlock_mount_hash();
+		synchronize_rcu();	/* yecchhh... */
 		mntput(mnt);
 	}
 }
diff --git a/include/linux/mount.h b/include/linux/mount.h
index 38cd98f112a0..371d346fa270 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -49,6 +49,8 @@ struct mnt_namespace;
 
 #define MNT_LOCK_READONLY	0x400000
 #define MNT_LOCKED		0x800000
+#define MNT_DOOMED		0x1000000
+#define MNT_SYNC_UMOUNT		0x2000000
 
 struct vfsmount {
 	struct dentry *mnt_root;	/* root of the mounted tree */
diff --git a/include/linux/namei.h b/include/linux/namei.h
index 8e47bc7a1665..492de72560fa 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -16,7 +16,7 @@ struct nameidata {
 	struct path	root;
 	struct inode	*inode; /* path.dentry.d_inode */
 	unsigned int	flags;
-	unsigned	seq;
+	unsigned	seq, m_seq;
 	int		last_type;
 	unsigned	depth;
 	char *saved_names[MAX_NESTED_LINKS + 1];
-- 
cgit v1.2.3


From eee5cc2702929fd41cce28058dc6d6717f723f87 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 4 Oct 2013 11:06:42 -0400
Subject: get rid of s_files and files_lock

The only thing we need it for is alt-sysrq-r (emergency remount r/o)
and these days we can do just as well without going through the
list of files.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/file_table.c    | 125 -----------------------------------------------------
 fs/internal.h      |   3 --
 fs/open.c          |   2 -
 fs/super.c         |  15 +------
 include/linux/fs.h |  13 ------
 5 files changed, 2 insertions(+), 156 deletions(-)

(limited to 'include/linux')

diff --git a/fs/file_table.c b/fs/file_table.c
index e61e5529fa9d..23b6dca03ba0 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -36,8 +36,6 @@ struct files_stat_struct files_stat = {
 	.max_files = NR_FILE
 };
 
-DEFINE_STATIC_LGLOCK(files_lglock);
-
 /* SLAB cache for file structures */
 static struct kmem_cache *filp_cachep __read_mostly;
 
@@ -134,7 +132,6 @@ struct file *get_empty_filp(void)
 		return ERR_PTR(error);
 	}
 
-	INIT_LIST_HEAD(&f->f_u.fu_list);
 	atomic_long_set(&f->f_count, 1);
 	rwlock_init(&f->f_owner.lock);
 	spin_lock_init(&f->f_lock);
@@ -304,7 +301,6 @@ void fput(struct file *file)
 	if (atomic_long_dec_and_test(&file->f_count)) {
 		struct task_struct *task = current;
 
-		file_sb_list_del(file);
 		if (likely(!in_interrupt() && !(task->flags & PF_KTHREAD))) {
 			init_task_work(&file->f_u.fu_rcuhead, ____fput);
 			if (!task_work_add(task, &file->f_u.fu_rcuhead, true))
@@ -333,7 +329,6 @@ void __fput_sync(struct file *file)
 {
 	if (atomic_long_dec_and_test(&file->f_count)) {
 		struct task_struct *task = current;
-		file_sb_list_del(file);
 		BUG_ON(!(task->flags & PF_KTHREAD));
 		__fput(file);
 	}
@@ -345,129 +340,10 @@ void put_filp(struct file *file)
 {
 	if (atomic_long_dec_and_test(&file->f_count)) {
 		security_file_free(file);
-		file_sb_list_del(file);
 		file_free(file);
 	}
 }
 
-static inline int file_list_cpu(struct file *file)
-{
-#ifdef CONFIG_SMP
-	return file->f_sb_list_cpu;
-#else
-	return smp_processor_id();
-#endif
-}
-
-/* helper for file_sb_list_add to reduce ifdefs */
-static inline void __file_sb_list_add(struct file *file, struct super_block *sb)
-{
-	struct list_head *list;
-#ifdef CONFIG_SMP
-	int cpu;
-	cpu = smp_processor_id();
-	file->f_sb_list_cpu = cpu;
-	list = per_cpu_ptr(sb->s_files, cpu);
-#else
-	list = &sb->s_files;
-#endif
-	list_add(&file->f_u.fu_list, list);
-}
-
-/**
- * file_sb_list_add - add a file to the sb's file list
- * @file: file to add
- * @sb: sb to add it to
- *
- * Use this function to associate a file with the superblock of the inode it
- * refers to.
- */
-void file_sb_list_add(struct file *file, struct super_block *sb)
-{
-	if (likely(!(file->f_mode & FMODE_WRITE)))
-		return;
-	if (!S_ISREG(file_inode(file)->i_mode))
-		return;
-	lg_local_lock(&files_lglock);
-	__file_sb_list_add(file, sb);
-	lg_local_unlock(&files_lglock);
-}
-
-/**
- * file_sb_list_del - remove a file from the sb's file list
- * @file: file to remove
- * @sb: sb to remove it from
- *
- * Use this function to remove a file from its superblock.
- */
-void file_sb_list_del(struct file *file)
-{
-	if (!list_empty(&file->f_u.fu_list)) {
-		lg_local_lock_cpu(&files_lglock, file_list_cpu(file));
-		list_del_init(&file->f_u.fu_list);
-		lg_local_unlock_cpu(&files_lglock, file_list_cpu(file));
-	}
-}
-
-#ifdef CONFIG_SMP
-
-/*
- * These macros iterate all files on all CPUs for a given superblock.
- * files_lglock must be held globally.
- */
-#define do_file_list_for_each_entry(__sb, __file)		\
-{								\
-	int i;							\
-	for_each_possible_cpu(i) {				\
-		struct list_head *list;				\
-		list = per_cpu_ptr((__sb)->s_files, i);		\
-		list_for_each_entry((__file), list, f_u.fu_list)
-
-#define while_file_list_for_each_entry				\
-	}							\
-}
-
-#else
-
-#define do_file_list_for_each_entry(__sb, __file)		\
-{								\
-	struct list_head *list;					\
-	list = &(sb)->s_files;					\
-	list_for_each_entry((__file), list, f_u.fu_list)
-
-#define while_file_list_for_each_entry				\
-}
-
-#endif
-
-/**
- *	mark_files_ro - mark all files read-only
- *	@sb: superblock in question
- *
- *	All files are marked read-only.  We don't care about pending
- *	delete files so this should be used in 'force' mode only.
- */
-void mark_files_ro(struct super_block *sb)
-{
-	struct file *f;
-
-	lg_global_lock(&files_lglock);
-	do_file_list_for_each_entry(sb, f) {
-		if (!file_count(f))
-			continue;
-		if (!(f->f_mode & FMODE_WRITE))
-			continue;
-		spin_lock(&f->f_lock);
-		f->f_mode &= ~FMODE_WRITE;
-		spin_unlock(&f->f_lock);
-		if (file_check_writeable(f) != 0)
-			continue;
-		__mnt_drop_write(f->f_path.mnt);
-		file_release_write(f);
-	} while_file_list_for_each_entry;
-	lg_global_unlock(&files_lglock);
-}
-
 void __init files_init(unsigned long mempages)
 { 
 	unsigned long n;
@@ -483,6 +359,5 @@ void __init files_init(unsigned long mempages)
 	n = (mempages * (PAGE_SIZE / 1024)) / 10;
 	files_stat.max_files = max_t(unsigned long, n, NR_FILE);
 	files_defer_init();
-	lg_lock_init(&files_lglock, "files_lglock");
 	percpu_counter_init(&nr_files, 0);
 } 
diff --git a/fs/internal.h b/fs/internal.h
index 4a11e75ce14d..465742407466 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -73,9 +73,6 @@ extern void chroot_fs_refs(const struct path *, const struct path *);
 /*
  * file_table.c
  */
-extern void file_sb_list_add(struct file *f, struct super_block *sb);
-extern void file_sb_list_del(struct file *f);
-extern void mark_files_ro(struct super_block *);
 extern struct file *get_empty_filp(void);
 
 /*
diff --git a/fs/open.c b/fs/open.c
index a1465b1ec8c7..fffbed40dbe9 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -685,7 +685,6 @@ static int do_dentry_open(struct file *f,
 	}
 
 	f->f_mapping = inode->i_mapping;
-	file_sb_list_add(f, inode->i_sb);
 
 	if (unlikely(f->f_mode & FMODE_PATH)) {
 		f->f_op = &empty_fops;
@@ -724,7 +723,6 @@ static int do_dentry_open(struct file *f,
 
 cleanup_all:
 	fops_put(f->f_op);
-	file_sb_list_del(f);
 	if (f->f_mode & FMODE_WRITE) {
 		put_write_access(inode);
 		if (!special_file(inode->i_mode)) {
diff --git a/fs/super.c b/fs/super.c
index 743bb7118053..e5f6c2cfac38 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -140,9 +140,6 @@ static void destroy_super(struct super_block *s)
 	int i;
 	list_lru_destroy(&s->s_dentry_lru);
 	list_lru_destroy(&s->s_inode_lru);
-#ifdef CONFIG_SMP
-	free_percpu(s->s_files);
-#endif
 	for (i = 0; i < SB_FREEZE_LEVELS; i++)
 		percpu_counter_destroy(&s->s_writers.counter[i]);
 	security_sb_free(s);
@@ -172,15 +169,6 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
 	if (security_sb_alloc(s))
 		goto fail;
 
-#ifdef CONFIG_SMP
-	s->s_files = alloc_percpu(struct list_head);
-	if (!s->s_files)
-		goto fail;
-	for_each_possible_cpu(i)
-		INIT_LIST_HEAD(per_cpu_ptr(s->s_files, i));
-#else
-	INIT_LIST_HEAD(&s->s_files);
-#endif
 	for (i = 0; i < SB_FREEZE_LEVELS; i++) {
 		if (percpu_counter_init(&s->s_writers.counter[i], 0) < 0)
 			goto fail;
@@ -722,7 +710,8 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
 	   make sure there are no rw files opened */
 	if (remount_ro) {
 		if (force) {
-			mark_files_ro(sb);
+			sb->s_readonly_remount = 1;
+			smp_wmb();
 		} else {
 			retval = sb_prepare_remount_readonly(sb);
 			if (retval)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 2ab8a67ee054..2b0f4e974480 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -764,12 +764,7 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index)
 #define FILE_MNT_WRITE_RELEASED	2
 
 struct file {
-	/*
-	 * fu_list becomes invalid after file_free is called and queued via
-	 * fu_rcuhead for RCU freeing
-	 */
 	union {
-		struct list_head	fu_list;
 		struct llist_node	fu_llist;
 		struct rcu_head 	fu_rcuhead;
 	} f_u;
@@ -783,9 +778,6 @@ struct file {
 	 * Must not be taken from IRQ context.
 	 */
 	spinlock_t		f_lock;
-#ifdef CONFIG_SMP
-	int			f_sb_list_cpu;
-#endif
 	atomic_long_t		f_count;
 	unsigned int 		f_flags;
 	fmode_t			f_mode;
@@ -1264,11 +1256,6 @@ struct super_block {
 
 	struct list_head	s_inodes;	/* all inodes */
 	struct hlist_bl_head	s_anon;		/* anonymous dentries for (nfs) exporting */
-#ifdef CONFIG_SMP
-	struct list_head __percpu *s_files;
-#else
-	struct list_head	s_files;
-#endif
 	struct list_head	s_mounts;	/* list of mounts; _not_ for fs use */
 	struct block_device	*s_bdev;
 	struct backing_dev_info *s_bdi;
-- 
cgit v1.2.3


From 0f6ed63b170778b9c93fb0ae4017f110c9ee6416 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 5 Oct 2013 14:19:39 -0400
Subject: no need to keep brlock macros anymore...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/lglock.h | 10 ----------
 1 file changed, 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/lglock.h b/include/linux/lglock.h
index 0d24e932db0b..96549abe8842 100644
--- a/include/linux/lglock.h
+++ b/include/linux/lglock.h
@@ -25,16 +25,6 @@
 #include <linux/cpu.h>
 #include <linux/notifier.h>
 
-/* can make br locks by using local lock for read side, global lock for write */
-#define br_lock_init(name)	lg_lock_init(name, #name)
-#define br_read_lock(name)	lg_local_lock(name)
-#define br_read_unlock(name)	lg_local_unlock(name)
-#define br_write_lock(name)	lg_global_lock(name)
-#define br_write_unlock(name)	lg_global_unlock(name)
-
-#define DEFINE_BRLOCK(name)		DEFINE_LGLOCK(name)
-#define DEFINE_STATIC_BRLOCK(name)	DEFINE_STATIC_LGLOCK(name)
-
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 #define LOCKDEP_INIT_MAP lockdep_init_map
 #else
-- 
cgit v1.2.3


From ecc8c7725e6c21528329b34acae2a1d64b3af89b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 5 Oct 2013 15:32:35 -0400
Subject: new helper: dump_emit()

dump_write() analog, takes core_dump_params instead of file,
keeps track of the amount written in cprm->written and checks for
cprm->limit.  Start using it in binfmt_elf.c...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/binfmt_elf.c          | 60 +++++++++++++++++++-----------------------------
 fs/coredump.c            | 14 +++++++++++
 include/linux/binfmts.h  |  1 +
 include/linux/coredump.h |  2 ++
 4 files changed, 40 insertions(+), 37 deletions(-)

(limited to 'include/linux')

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 501c8a4d6eb1..00fd9c969a27 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1225,35 +1225,23 @@ static int notesize(struct memelfnote *en)
 	return sz;
 }
 
-#define DUMP_WRITE(addr, nr, foffset)	\
-	do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
-
-static int alignfile(struct file *file, loff_t *foffset)
+static int alignfile(struct coredump_params *cprm)
 {
 	static const char buf[4] = { 0, };
-	DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
-	return 1;
+	return dump_emit(cprm, buf, roundup(cprm->written, 4) - cprm->written);
 }
 
-static int writenote(struct memelfnote *men, struct file *file,
-			loff_t *foffset)
+static int writenote(struct memelfnote *men, struct coredump_params *cprm)
 {
 	struct elf_note en;
 	en.n_namesz = strlen(men->name) + 1;
 	en.n_descsz = men->datasz;
 	en.n_type = men->type;
 
-	DUMP_WRITE(&en, sizeof(en), foffset);
-	DUMP_WRITE(men->name, en.n_namesz, foffset);
-	if (!alignfile(file, foffset))
-		return 0;
-	DUMP_WRITE(men->data, men->datasz, foffset);
-	if (!alignfile(file, foffset))
-		return 0;
-
-	return 1;
+	return dump_emit(cprm, &en, sizeof(en)) &&
+	    dump_emit(cprm, men->name, en.n_namesz) && alignfile(cprm) &&
+	    dump_emit(cprm, men->data, men->datasz) && alignfile(cprm);
 }
-#undef DUMP_WRITE
 
 static void fill_elf_header(struct elfhdr *elf, int segs,
 			    u16 machine, u32 flags)
@@ -1702,7 +1690,7 @@ static size_t get_note_info_size(struct elf_note_info *info)
  * process-wide notes are interleaved after the first thread-specific note.
  */
 static int write_note_info(struct elf_note_info *info,
-			   struct file *file, loff_t *foffset)
+			   struct coredump_params *cprm)
 {
 	bool first = 1;
 	struct elf_thread_core_info *t = info->thread;
@@ -1710,22 +1698,22 @@ static int write_note_info(struct elf_note_info *info,
 	do {
 		int i;
 
-		if (!writenote(&t->notes[0], file, foffset))
+		if (!writenote(&t->notes[0], cprm))
 			return 0;
 
-		if (first && !writenote(&info->psinfo, file, foffset))
+		if (first && !writenote(&info->psinfo, cprm))
 			return 0;
-		if (first && !writenote(&info->signote, file, foffset))
+		if (first && !writenote(&info->signote, cprm))
 			return 0;
-		if (first && !writenote(&info->auxv, file, foffset))
+		if (first && !writenote(&info->auxv, cprm))
 			return 0;
 		if (first && info->files.data &&
-				!writenote(&info->files, file, foffset))
+				!writenote(&info->files, cprm))
 			return 0;
 
 		for (i = 1; i < info->thread_notes; ++i)
 			if (t->notes[i].data &&
-			    !writenote(&t->notes[i], file, foffset))
+			    !writenote(&t->notes[i], cprm))
 				return 0;
 
 		first = 0;
@@ -1935,13 +1923,13 @@ static size_t get_note_info_size(struct elf_note_info *info)
 }
 
 static int write_note_info(struct elf_note_info *info,
-			   struct file *file, loff_t *foffset)
+			   struct coredump_params *cprm)
 {
 	int i;
 	struct list_head *t;
 
 	for (i = 0; i < info->numnote; i++)
-		if (!writenote(info->notes + i, file, foffset))
+		if (!writenote(info->notes + i, cprm))
 			return 0;
 
 	/* write out the thread status notes section */
@@ -1950,7 +1938,7 @@ static int write_note_info(struct elf_note_info *info,
 				list_entry(t, struct elf_thread_status, list);
 
 		for (i = 0; i < tmp->num_notes; i++)
-			if (!writenote(&tmp->notes[i], file, foffset))
+			if (!writenote(&tmp->notes[i], cprm))
 				return 0;
 	}
 
@@ -2136,13 +2124,10 @@ static int elf_core_dump(struct coredump_params *cprm)
 
 	offset = dataoff;
 
-	size += sizeof(*elf);
-	if (size > cprm->limit || !dump_write(cprm->file, elf, sizeof(*elf)))
+	if (!dump_emit(cprm, elf, sizeof(*elf)))
 		goto end_coredump;
 
-	size += sizeof(*phdr4note);
-	if (size > cprm->limit
-	    || !dump_write(cprm->file, phdr4note, sizeof(*phdr4note)))
+	if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note)))
 		goto end_coredump;
 
 	/* Write program headers for segments dump */
@@ -2164,19 +2149,20 @@ static int elf_core_dump(struct coredump_params *cprm)
 			phdr.p_flags |= PF_X;
 		phdr.p_align = ELF_EXEC_PAGESIZE;
 
-		size += sizeof(phdr);
-		if (size > cprm->limit
-		    || !dump_write(cprm->file, &phdr, sizeof(phdr)))
+		if (!dump_emit(cprm, &phdr, sizeof(phdr)))
 			goto end_coredump;
 	}
+	size = cprm->written;
 
 	if (!elf_core_write_extra_phdrs(cprm->file, offset, &size, cprm->limit))
 		goto end_coredump;
 
+	cprm->written = foffset;	/* will disappear */
  	/* write out the notes section */
-	if (!write_note_info(&info, cprm->file, &foffset))
+	if (!write_note_info(&info, cprm))
 		goto end_coredump;
 
+	foffset = cprm->written;
 	if (elf_coredump_extra_notes_write(cprm->file, &foffset))
 		goto end_coredump;
 
diff --git a/fs/coredump.c b/fs/coredump.c
index 42c3b8423669..319f973bab72 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -693,6 +693,20 @@ int dump_write(struct file *file, const void *addr, int nr)
 }
 EXPORT_SYMBOL(dump_write);
 
+int dump_emit(struct coredump_params *cprm, const void *addr, int nr)
+{
+	struct file *file = cprm->file;
+	if (dump_interrupted() || !access_ok(VERIFY_READ, addr, nr))
+		return 0;
+	if (cprm->written + nr > cprm->limit)
+		return 0;
+	if (file->f_op->write(file, addr, nr, &file->f_pos) != nr)
+		return 0;
+	cprm->written += nr;
+	return 1;
+}
+EXPORT_SYMBOL(dump_emit);
+
 int dump_seek(struct file *file, loff_t off)
 {
 	int ret = 1;
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index e8112ae50531..8aa507e7a41a 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -61,6 +61,7 @@ struct coredump_params {
 	struct file *file;
 	unsigned long limit;
 	unsigned long mm_flags;
+	loff_t written;
 };
 
 /*
diff --git a/include/linux/coredump.h b/include/linux/coredump.h
index a98f1ca60407..2959376a9ad5 100644
--- a/include/linux/coredump.h
+++ b/include/linux/coredump.h
@@ -10,8 +10,10 @@
  * These are the only things you should do on a core-file: use only these
  * functions to write out all the necessary info.
  */
+struct coredump_params;
 extern int dump_write(struct file *file, const void *addr, int nr);
 extern int dump_seek(struct file *file, loff_t off);
+extern int dump_emit(struct coredump_params *cprm, const void *addr, int nr);
 #ifdef CONFIG_COREDUMP
 extern void do_coredump(siginfo_t *siginfo);
 #else
-- 
cgit v1.2.3


From 506f21c556c747bb07b893f146220ec45cda381b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 5 Oct 2013 17:22:57 -0400
Subject: switch elf_core_write_extra_phdrs() to dump_emit()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/ia64/kernel/elfcore.c |  6 ++----
 arch/x86/um/elfcore.c      |  7 ++-----
 fs/binfmt_elf.c            |  4 ++--
 fs/binfmt_elf_fdpic.c      |  4 +++-
 include/linux/elfcore.h    |  5 +++--
 kernel/elfcore.c           | 10 +++-------
 6 files changed, 15 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/kernel/elfcore.c b/arch/ia64/kernel/elfcore.c
index bac1639bc320..798ce543da4b 100644
--- a/arch/ia64/kernel/elfcore.c
+++ b/arch/ia64/kernel/elfcore.c
@@ -11,8 +11,7 @@ Elf64_Half elf_core_extra_phdrs(void)
 	return GATE_EHDR->e_phnum;
 }
 
-int elf_core_write_extra_phdrs(struct file *file, loff_t offset, size_t *size,
-			       unsigned long limit)
+int elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset)
 {
 	const struct elf_phdr *const gate_phdrs =
 		(const struct elf_phdr *) (GATE_ADDR + GATE_EHDR->e_phoff);
@@ -35,8 +34,7 @@ int elf_core_write_extra_phdrs(struct file *file, loff_t offset, size_t *size,
 			phdr.p_offset += ofs;
 		}
 		phdr.p_paddr = 0; /* match other core phdrs */
-		*size += sizeof(phdr);
-		if (*size > limit || !dump_write(file, &phdr, sizeof(phdr)))
+		if (!dump_emit(cprm, &phdr, sizeof(phdr)))
 			return 0;
 	}
 	return 1;
diff --git a/arch/x86/um/elfcore.c b/arch/x86/um/elfcore.c
index 6bb49b687c97..fc21f98efafe 100644
--- a/arch/x86/um/elfcore.c
+++ b/arch/x86/um/elfcore.c
@@ -11,8 +11,7 @@ Elf32_Half elf_core_extra_phdrs(void)
 	return vsyscall_ehdr ? (((struct elfhdr *)vsyscall_ehdr)->e_phnum) : 0;
 }
 
-int elf_core_write_extra_phdrs(struct file *file, loff_t offset, size_t *size,
-			       unsigned long limit)
+int elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset)
 {
 	if ( vsyscall_ehdr ) {
 		const struct elfhdr *const ehdrp =
@@ -32,9 +31,7 @@ int elf_core_write_extra_phdrs(struct file *file, loff_t offset, size_t *size,
 				phdr.p_offset += ofs;
 			}
 			phdr.p_paddr = 0; /* match other core phdrs */
-			*size += sizeof(phdr);
-			if (*size > limit
-			    || !dump_write(file, &phdr, sizeof(phdr)))
+			if (!dump_emit(cprm, &phdr, sizeof(phdr)))
 				return 0;
 		}
 	}
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 00fd9c969a27..35c4886dae2d 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -2152,11 +2152,11 @@ static int elf_core_dump(struct coredump_params *cprm)
 		if (!dump_emit(cprm, &phdr, sizeof(phdr)))
 			goto end_coredump;
 	}
-	size = cprm->written;
 
-	if (!elf_core_write_extra_phdrs(cprm->file, offset, &size, cprm->limit))
+	if (!elf_core_write_extra_phdrs(cprm, offset))
 		goto end_coredump;
 
+	size = cprm->written;
 	cprm->written = foffset;	/* will disappear */
  	/* write out the notes section */
 	if (!write_note_info(&info, cprm))
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index ea4c6273b4a5..44db8b92121a 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1791,9 +1791,11 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
 			goto end_coredump;
 	}
 
-	if (!elf_core_write_extra_phdrs(cprm->file, offset, &size, cprm->limit))
+	cprm->written = size;
+	if (!elf_core_write_extra_phdrs(cprm, offset))
 		goto end_coredump;
 
+	size = cprm->written;
  	/* write out the notes section */
 	for (i = 0; i < numnote; i++)
 		if (!writenote(notes + i, cprm->file, &foffset))
diff --git a/include/linux/elfcore.h b/include/linux/elfcore.h
index cdd3d13efce7..1b92a8c40624 100644
--- a/include/linux/elfcore.h
+++ b/include/linux/elfcore.h
@@ -6,6 +6,8 @@
 #include <asm/elf.h>
 #include <uapi/linux/elfcore.h>
 
+struct coredump_params;
+
 static inline void elf_core_copy_regs(elf_gregset_t *elfregs, struct pt_regs *regs)
 {
 #ifdef ELF_CORE_COPY_REGS
@@ -63,8 +65,7 @@ static inline int elf_core_copy_task_xfpregs(struct task_struct *t, elf_fpxregse
  */
 extern Elf_Half elf_core_extra_phdrs(void);
 extern int
-elf_core_write_extra_phdrs(struct file *file, loff_t offset, size_t *size,
-			   unsigned long limit);
+elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset);
 extern int
 elf_core_write_extra_data(struct file *file, size_t *size, unsigned long limit);
 extern size_t elf_core_extra_data_size(void);
diff --git a/kernel/elfcore.c b/kernel/elfcore.c
index ff915efef66d..e556751d15d9 100644
--- a/kernel/elfcore.c
+++ b/kernel/elfcore.c
@@ -1,23 +1,19 @@
 #include <linux/elf.h>
 #include <linux/fs.h>
 #include <linux/mm.h>
-
-#include <asm/elf.h>
-
+#include <linux/binfmts.h>
 
 Elf_Half __weak elf_core_extra_phdrs(void)
 {
 	return 0;
 }
 
-int __weak elf_core_write_extra_phdrs(struct file *file, loff_t offset, size_t *size,
-				      unsigned long limit)
+int __weak elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset)
 {
 	return 1;
 }
 
-int __weak elf_core_write_extra_data(struct file *file, size_t *size,
-				     unsigned long limit)
+int __weak elf_core_write_extra_data(struct coredump_params *cprm)
 {
 	return 1;
 }
-- 
cgit v1.2.3


From aa3e7eaf0a0f06edd2b733e84e7e8ffe108e8786 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 5 Oct 2013 17:50:15 -0400
Subject: switch elf_core_write_extra_data() to dump_emit()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/ia64/kernel/elfcore.c | 6 ++----
 arch/x86/um/elfcore.c      | 8 ++------
 fs/binfmt_elf.c            | 4 +++-
 fs/binfmt_elf_fdpic.c      | 4 +++-
 include/linux/elfcore.h    | 2 +-
 5 files changed, 11 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/kernel/elfcore.c b/arch/ia64/kernel/elfcore.c
index 798ce543da4b..04bc8fd5f893 100644
--- a/arch/ia64/kernel/elfcore.c
+++ b/arch/ia64/kernel/elfcore.c
@@ -40,8 +40,7 @@ int elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset)
 	return 1;
 }
 
-int elf_core_write_extra_data(struct file *file, size_t *size,
-			      unsigned long limit)
+int elf_core_write_extra_data(struct coredump_params *cprm)
 {
 	const struct elf_phdr *const gate_phdrs =
 		(const struct elf_phdr *) (GATE_ADDR + GATE_EHDR->e_phoff);
@@ -52,8 +51,7 @@ int elf_core_write_extra_data(struct file *file, size_t *size,
 			void *addr = (void *)gate_phdrs[i].p_vaddr;
 			size_t memsz = PAGE_ALIGN(gate_phdrs[i].p_memsz);
 
-			*size += memsz;
-			if (*size > limit || !dump_write(file, addr, memsz))
+			if (!dump_emit(cprm, addr, memsz))
 				return 0;
 			break;
 		}
diff --git a/arch/x86/um/elfcore.c b/arch/x86/um/elfcore.c
index fc21f98efafe..7bb89a27a5e4 100644
--- a/arch/x86/um/elfcore.c
+++ b/arch/x86/um/elfcore.c
@@ -38,8 +38,7 @@ int elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset)
 	return 1;
 }
 
-int elf_core_write_extra_data(struct file *file, size_t *size,
-			      unsigned long limit)
+int elf_core_write_extra_data(struct coredump_params *cprm)
 {
 	if ( vsyscall_ehdr ) {
 		const struct elfhdr *const ehdrp =
@@ -52,10 +51,7 @@ int elf_core_write_extra_data(struct file *file, size_t *size,
 			if (phdrp[i].p_type == PT_LOAD) {
 				void *addr = (void *) phdrp[i].p_vaddr;
 				size_t filesz = phdrp[i].p_filesz;
-
-				*size += filesz;
-				if (*size > limit
-				    || !dump_write(file, addr, filesz))
+				if (!dump_emit(cprm, addr, filesz))
 					return 0;
 			}
 		}
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 35c4886dae2d..a998b221b53a 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -2196,8 +2196,10 @@ static int elf_core_dump(struct coredump_params *cprm)
 		}
 	}
 
-	if (!elf_core_write_extra_data(cprm->file, &size, cprm->limit))
+	cprm->written = size;
+	if (!elf_core_write_extra_data(cprm))
 		goto end_coredump;
+	size = cprm->written;
 
 	if (e_phnum == PN_XNUM) {
 		size += sizeof(*shdr4extnum);
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 44db8b92121a..77bf7e33e706 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1818,8 +1818,10 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
 				    cprm->mm_flags) < 0)
 		goto end_coredump;
 
-	if (!elf_core_write_extra_data(cprm->file, &size, cprm->limit))
+	cprm->written = size;
+	if (!elf_core_write_extra_data(cprm))
 		goto end_coredump;
+	size = cprm->written;
 
 	if (e_phnum == PN_XNUM) {
 		size += sizeof(*shdr4extnum);
diff --git a/include/linux/elfcore.h b/include/linux/elfcore.h
index 1b92a8c40624..698d51a0eea3 100644
--- a/include/linux/elfcore.h
+++ b/include/linux/elfcore.h
@@ -67,7 +67,7 @@ extern Elf_Half elf_core_extra_phdrs(void);
 extern int
 elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset);
 extern int
-elf_core_write_extra_data(struct file *file, size_t *size, unsigned long limit);
+elf_core_write_extra_data(struct coredump_params *cprm);
 extern size_t elf_core_extra_data_size(void);
 
 #endif /* _LINUX_ELFCORE_H */
-- 
cgit v1.2.3


From cdc3d5627d5f7c4e6b6372b9fb39cba0fe6a9b2a Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 5 Oct 2013 22:24:29 -0400
Subject: switch elf_coredump_extra_notes_write() to dump_emit()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/powerpc/include/asm/spu.h               |  3 +-
 arch/powerpc/platforms/cell/spu_syscalls.c   |  5 ++--
 arch/powerpc/platforms/cell/spufs/coredump.c | 44 +++++++++++-----------------
 arch/powerpc/platforms/cell/spufs/spufs.h    |  3 +-
 fs/binfmt_elf.c                              |  7 ++---
 include/linux/elf.h                          |  6 ++--
 6 files changed, 30 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/include/asm/spu.h b/arch/powerpc/include/asm/spu.h
index 93f280e23279..37b7ca39ec9f 100644
--- a/arch/powerpc/include/asm/spu.h
+++ b/arch/powerpc/include/asm/spu.h
@@ -235,6 +235,7 @@ extern long spu_sys_callback(struct spu_syscall_block *s);
 
 /* syscalls implemented in spufs */
 struct file;
+struct coredump_params;
 struct spufs_calls {
 	long (*create_thread)(const char __user *name,
 					unsigned int flags, umode_t mode,
@@ -242,7 +243,7 @@ struct spufs_calls {
 	long (*spu_run)(struct file *filp, __u32 __user *unpc,
 						__u32 __user *ustatus);
 	int (*coredump_extra_notes_size)(void);
-	int (*coredump_extra_notes_write)(struct file *file, loff_t *foffset);
+	int (*coredump_extra_notes_write)(struct coredump_params *cprm);
 	void (*notify_spus_active)(void);
 	struct module *owner;
 };
diff --git a/arch/powerpc/platforms/cell/spu_syscalls.c b/arch/powerpc/platforms/cell/spu_syscalls.c
index db4e638cf408..3844f1397fc3 100644
--- a/arch/powerpc/platforms/cell/spu_syscalls.c
+++ b/arch/powerpc/platforms/cell/spu_syscalls.c
@@ -25,6 +25,7 @@
 #include <linux/module.h>
 #include <linux/syscalls.h>
 #include <linux/rcupdate.h>
+#include <linux/binfmts.h>
 
 #include <asm/spu.h>
 
@@ -126,7 +127,7 @@ int elf_coredump_extra_notes_size(void)
 	return ret;
 }
 
-int elf_coredump_extra_notes_write(struct file *file, loff_t *foffset)
+int elf_coredump_extra_notes_write(struct coredump_params *cprm)
 {
 	struct spufs_calls *calls;
 	int ret;
@@ -135,7 +136,7 @@ int elf_coredump_extra_notes_write(struct file *file, loff_t *foffset)
 	if (!calls)
 		return 0;
 
-	ret = calls->coredump_extra_notes_write(file, foffset);
+	ret = calls->coredump_extra_notes_write(cprm);
 
 	spufs_calls_put(calls);
 
diff --git a/arch/powerpc/platforms/cell/spufs/coredump.c b/arch/powerpc/platforms/cell/spufs/coredump.c
index c9500ea7be2f..5d9b0a288f36 100644
--- a/arch/powerpc/platforms/cell/spufs/coredump.c
+++ b/arch/powerpc/platforms/cell/spufs/coredump.c
@@ -27,6 +27,8 @@
 #include <linux/gfp.h>
 #include <linux/list.h>
 #include <linux/syscalls.h>
+#include <linux/coredump.h>
+#include <linux/binfmts.h>
 
 #include <asm/uaccess.h>
 
@@ -52,35 +54,24 @@ static ssize_t do_coredump_read(int num, struct spu_context *ctx, void *buffer,
  * These are the only things you should do on a core-file: use only these
  * functions to write out all the necessary info.
  */
-static int spufs_dump_write(struct file *file, const void *addr, int nr, loff_t *foffset)
+static int spufs_dump_write(struct coredump_params *cprm, const void *addr, int nr)
 {
-	unsigned long limit = rlimit(RLIMIT_CORE);
-	ssize_t written;
-
-	if (*foffset + nr > limit)
+	if (!dump_emit(cprm, addr, nr))
 		return -EIO;
-
-	written = file->f_op->write(file, addr, nr, &file->f_pos);
-	*foffset += written;
-
-	if (written != nr)
-		return -EIO;
-
 	return 0;
 }
 
-static int spufs_dump_align(struct file *file, char *buf, loff_t new_off,
-			    loff_t *foffset)
+static int spufs_dump_align(struct coredump_params *cprm, char *buf, loff_t new_off)
 {
 	int rc, size;
 
-	size = min((loff_t)PAGE_SIZE, new_off - *foffset);
+	size = min((loff_t)PAGE_SIZE, new_off - cprm->written);
 	memset(buf, 0, size);
 
 	rc = 0;
-	while (rc == 0 && new_off > *foffset) {
-		size = min((loff_t)PAGE_SIZE, new_off - *foffset);
-		rc = spufs_dump_write(file, buf, size, foffset);
+	while (rc == 0 && new_off > cprm->written) {
+		size = min((loff_t)PAGE_SIZE, new_off - cprm->written);
+		rc = spufs_dump_write(cprm, buf, size);
 	}
 
 	return rc;
@@ -165,7 +156,7 @@ int spufs_coredump_extra_notes_size(void)
 }
 
 static int spufs_arch_write_note(struct spu_context *ctx, int i,
-				  struct file *file, int dfd, loff_t *foffset)
+				  struct coredump_params *cprm, int dfd)
 {
 	loff_t pos = 0;
 	int sz, rc, nread, total = 0;
@@ -186,22 +177,22 @@ static int spufs_arch_write_note(struct spu_context *ctx, int i,
 	en.n_descsz = sz;
 	en.n_type = NT_SPU;
 
-	rc = spufs_dump_write(file, &en, sizeof(en), foffset);
+	rc = spufs_dump_write(cprm, &en, sizeof(en));
 	if (rc)
 		goto out;
 
-	rc = spufs_dump_write(file, fullname, en.n_namesz, foffset);
+	rc = spufs_dump_write(cprm, fullname, en.n_namesz);
 	if (rc)
 		goto out;
 
-	rc = spufs_dump_align(file, buf, roundup(*foffset, 4), foffset);
+	rc = spufs_dump_align(cprm, buf, roundup(cprm->written, 4));
 	if (rc)
 		goto out;
 
 	do {
 		nread = do_coredump_read(i, ctx, buf, bufsz, &pos);
 		if (nread > 0) {
-			rc = spufs_dump_write(file, buf, nread, foffset);
+			rc = spufs_dump_write(cprm, buf, nread);
 			if (rc)
 				goto out;
 			total += nread;
@@ -213,15 +204,14 @@ static int spufs_arch_write_note(struct spu_context *ctx, int i,
 		goto out;
 	}
 
-	rc = spufs_dump_align(file, buf, roundup(*foffset - total + sz, 4),
-			      foffset);
+	rc = spufs_dump_align(cprm, buf, roundup(cprm->written - total + sz, 4));
 
 out:
 	free_page((unsigned long)buf);
 	return rc;
 }
 
-int spufs_coredump_extra_notes_write(struct file *file, loff_t *foffset)
+int spufs_coredump_extra_notes_write(struct coredump_params *cprm)
 {
 	struct spu_context *ctx;
 	int fd, j, rc;
@@ -233,7 +223,7 @@ int spufs_coredump_extra_notes_write(struct file *file, loff_t *foffset)
 			return rc;
 
 		for (j = 0; spufs_coredump_read[j].name != NULL; j++) {
-			rc = spufs_arch_write_note(ctx, j, file, fd, foffset);
+			rc = spufs_arch_write_note(ctx, j, cprm, fd);
 			if (rc) {
 				spu_release_saved(ctx);
 				return rc;
diff --git a/arch/powerpc/platforms/cell/spufs/spufs.h b/arch/powerpc/platforms/cell/spufs/spufs.h
index 67852ade4c01..0ba3c9598358 100644
--- a/arch/powerpc/platforms/cell/spufs/spufs.h
+++ b/arch/powerpc/platforms/cell/spufs/spufs.h
@@ -247,12 +247,13 @@ extern const struct spufs_tree_descr spufs_dir_debug_contents[];
 
 /* system call implementation */
 extern struct spufs_calls spufs_calls;
+struct coredump_params;
 long spufs_run_spu(struct spu_context *ctx, u32 *npc, u32 *status);
 long spufs_create(struct path *nd, struct dentry *dentry, unsigned int flags,
 			umode_t mode, struct file *filp);
 /* ELF coredump callbacks for writing SPU ELF notes */
 extern int spufs_coredump_extra_notes_size(void);
-extern int spufs_coredump_extra_notes_write(struct file *file, loff_t *foffset);
+extern int spufs_coredump_extra_notes_write(struct coredump_params *cprm);
 
 extern const struct file_operations spufs_context_fops;
 
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index ddab40d3d828..3bf75d767a4a 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -2037,7 +2037,7 @@ static int elf_core_dump(struct coredump_params *cprm)
 	size_t size = 0;
 	struct vm_area_struct *vma, *gate_vma;
 	struct elfhdr *elf = NULL;
-	loff_t offset = 0, dataoff, foffset;
+	loff_t offset = 0, dataoff;
 	struct elf_note_info info = { };
 	struct elf_phdr *phdr4note = NULL;
 	struct elf_shdr *shdr4extnum = NULL;
@@ -2160,12 +2160,11 @@ static int elf_core_dump(struct coredump_params *cprm)
 	if (!write_note_info(&info, cprm))
 		goto end_coredump;
 
-	foffset = cprm->written;
-	if (elf_coredump_extra_notes_write(cprm->file, &foffset))
+	if (elf_coredump_extra_notes_write(cprm))
 		goto end_coredump;
 
 	/* Align to page */
-	if (!dump_seek(cprm->file, dataoff - foffset))
+	if (!dump_seek(cprm->file, dataoff - cprm->written))
 		goto end_coredump;
 
 	cprm->written = size;
diff --git a/include/linux/elf.h b/include/linux/elf.h
index 40a3c0e01b2b..67a5fa7830c4 100644
--- a/include/linux/elf.h
+++ b/include/linux/elf.h
@@ -39,13 +39,13 @@ extern Elf64_Dyn _DYNAMIC [];
 
 /* Optional callbacks to write extra ELF notes. */
 struct file;
+struct coredump_params;
 
 #ifndef ARCH_HAVE_EXTRA_ELF_NOTES
 static inline int elf_coredump_extra_notes_size(void) { return 0; }
-static inline int elf_coredump_extra_notes_write(struct file *file,
-			loff_t *foffset) { return 0; }
+static inline int elf_coredump_extra_notes_write(struct coredump_params *cprm) { return 0; }
 #else
 extern int elf_coredump_extra_notes_size(void);
-extern int elf_coredump_extra_notes_write(struct file *file, loff_t *foffset);
+extern int elf_coredump_extra_notes_write(struct coredump_params *cprm);
 #endif
 #endif /* _LINUX_ELF_H */
-- 
cgit v1.2.3


From 9b56d54380adb5fef71f687109bbd6f8413d694f Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 8 Oct 2013 09:26:08 -0400
Subject: dump_skip(): dump_seek() replacement taking coredump_params

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/x86/ia32/ia32_aout.c |  2 +-
 fs/binfmt_aout.c          |  2 +-
 fs/binfmt_elf.c           |  4 ++--
 fs/binfmt_elf_fdpic.c     | 11 +++--------
 fs/coredump.c             | 43 ++++++++++++++-----------------------------
 include/linux/coredump.h  |  3 +--
 6 files changed, 22 insertions(+), 43 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c
index 9e26e9e85576..d21ff89207cd 100644
--- a/arch/x86/ia32/ia32_aout.c
+++ b/arch/x86/ia32/ia32_aout.c
@@ -187,7 +187,7 @@ static int aout_core_dump(struct coredump_params *cprm)
 	if (!dump_emit(cprm, &dump, sizeof(dump)))
 		goto end_coredump;
 	/* Now dump all of the user data.  Include malloced stuff as well */
-	if (!dump_seek(cprm->file, PAGE_SIZE - sizeof(dump)))
+	if (!dump_skip(cprm, PAGE_SIZE - sizeof(dump)))
 		goto end_coredump;
 	/* now we start writing out the user space info */
 	set_fs(USER_DS);
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index a4f847f77234..ca0ba15a7306 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -87,7 +87,7 @@ static int aout_core_dump(struct coredump_params *cprm)
 	if (!dump_emit(cprm, &dump, sizeof(dump)))
 		goto end_coredump;
 /* Now dump all of the user data.  Include malloced stuff as well */
-	if (!dump_seek(cprm->file, PAGE_SIZE - sizeof(dump)))
+	if (!dump_skip(cprm, PAGE_SIZE - sizeof(dump)))
 		goto end_coredump;
 /* now we start writing out the user space info */
 	set_fs(USER_DS);
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 4f7dda9d86b5..c56ae3264a65 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -2162,7 +2162,7 @@ static int elf_core_dump(struct coredump_params *cprm)
 		goto end_coredump;
 
 	/* Align to page */
-	if (!dump_seek(cprm->file, dataoff - cprm->written))
+	if (!dump_skip(cprm, dataoff - cprm->written))
 		goto end_coredump;
 
 	for (vma = first_vma(current, gate_vma); vma != NULL;
@@ -2183,7 +2183,7 @@ static int elf_core_dump(struct coredump_params *cprm)
 				kunmap(page);
 				page_cache_release(page);
 			} else
-				stop = !dump_seek(cprm->file, PAGE_SIZE);
+				stop = !dump_skip(cprm, PAGE_SIZE);
 			if (stop)
 				goto end_coredump;
 		}
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 70e299917898..a69fc4ae1c85 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1509,7 +1509,7 @@ static bool elf_fdpic_dump_segments(struct coredump_params *cprm)
 				kunmap(page);
 				page_cache_release(page);
 			} else {
-				res = dump_seek(file, PAGE_SIZE);
+				res = dump_skip(cprm, PAGE_SIZE);
 			}
 			if (!res)
 				return false;
@@ -1547,11 +1547,10 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
 	int has_dumped = 0;
 	mm_segment_t fs;
 	int segs;
-	size_t size = 0;
 	int i;
 	struct vm_area_struct *vma;
 	struct elfhdr *elf = NULL;
-	loff_t offset = 0, dataoff, foffset;
+	loff_t offset = 0, dataoff;
 	int numnote;
 	struct memelfnote *notes = NULL;
 	struct elf_prstatus *prstatus = NULL;	/* NT_PRSTATUS */
@@ -1682,7 +1681,6 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
 
 	offset += sizeof(*elf);				/* Elf header */
 	offset += segs * sizeof(struct elf_phdr);	/* Program headers */
-	foffset = offset;
 
 	/* Write notes phdr entry */
 	{
@@ -1751,8 +1749,6 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
 	if (!elf_core_write_extra_phdrs(cprm, offset))
 		goto end_coredump;
 
-	size = cprm->written;
-	cprm->written = foffset;
  	/* write out the notes section */
 	for (i = 0; i < numnote; i++)
 		if (!writenote(notes + i, cprm))
@@ -1768,10 +1764,9 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
 				goto end_coredump;
 	}
 
-	if (!dump_seek(cprm->file, dataoff - cprm->written))
+	if (!dump_skip(cprm, dataoff - cprm->written))
 		goto end_coredump;
 
-	cprm->written = size;
 	if (!elf_fdpic_dump_segments(cprm))
 		goto end_coredump;
 
diff --git a/fs/coredump.c b/fs/coredump.c
index 2472ed9e682c..18baf2c009d4 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -685,14 +685,6 @@ fail:
  * do on a core-file: use only these functions to write out all the
  * necessary info.
  */
-int dump_write(struct file *file, const void *addr, int nr)
-{
-	return !dump_interrupted() &&
-		access_ok(VERIFY_READ, addr, nr) &&
-		file->f_op->write(file, addr, nr, &file->f_pos) == nr;
-}
-EXPORT_SYMBOL(dump_write);
-
 int dump_emit(struct coredump_params *cprm, const void *addr, int nr)
 {
 	struct file *file = cprm->file;
@@ -714,32 +706,25 @@ int dump_emit(struct coredump_params *cprm, const void *addr, int nr)
 }
 EXPORT_SYMBOL(dump_emit);
 
-int dump_seek(struct file *file, loff_t off)
+int dump_skip(struct coredump_params *cprm, size_t nr)
 {
-	int ret = 1;
-
+	static char zeroes[PAGE_SIZE];
+	struct file *file = cprm->file;
 	if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
+		if (cprm->written + nr > cprm->limit)
+			return 0;
 		if (dump_interrupted() ||
-		    file->f_op->llseek(file, off, SEEK_CUR) < 0)
+		    file->f_op->llseek(file, nr, SEEK_CUR) < 0)
 			return 0;
+		cprm->written += nr;
+		return 1;
 	} else {
-		char *buf = (char *)get_zeroed_page(GFP_KERNEL);
-
-		if (!buf)
-			return 0;
-		while (off > 0) {
-			unsigned long n = off;
-
-			if (n > PAGE_SIZE)
-				n = PAGE_SIZE;
-			if (!dump_write(file, buf, n)) {
-				ret = 0;
-				break;
-			}
-			off -= n;
+		while (nr > PAGE_SIZE) {
+			if (!dump_emit(cprm, zeroes, PAGE_SIZE))
+				return 0;
+			nr -= PAGE_SIZE;
 		}
-		free_page((unsigned long)buf);
+		return dump_emit(cprm, zeroes, nr);
 	}
-	return ret;
 }
-EXPORT_SYMBOL(dump_seek);
+EXPORT_SYMBOL(dump_skip);
diff --git a/include/linux/coredump.h b/include/linux/coredump.h
index 2959376a9ad5..07a0af93f230 100644
--- a/include/linux/coredump.h
+++ b/include/linux/coredump.h
@@ -11,8 +11,7 @@
  * functions to write out all the necessary info.
  */
 struct coredump_params;
-extern int dump_write(struct file *file, const void *addr, int nr);
-extern int dump_seek(struct file *file, loff_t off);
+extern int dump_skip(struct coredump_params *cprm, size_t nr);
 extern int dump_emit(struct coredump_params *cprm, const void *addr, int nr);
 #ifdef CONFIG_COREDUMP
 extern void do_coredump(siginfo_t *siginfo);
-- 
cgit v1.2.3


From 22a8cb8248ba5d340307ba72432253b1dbdb5cf7 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 8 Oct 2013 11:05:01 -0400
Subject: new helper: dump_align()

dump_skip to given alignment...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/powerpc/platforms/cell/spufs/coredump.c |  2 +-
 fs/binfmt_elf.c                              | 10 ++--------
 fs/binfmt_elf_fdpic.c                        | 10 ++--------
 fs/coredump.c                                |  9 +++++++++
 include/linux/coredump.h                     |  1 +
 5 files changed, 15 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/platforms/cell/spufs/coredump.c b/arch/powerpc/platforms/cell/spufs/coredump.c
index 1aaaa586fa50..be6212ddbf06 100644
--- a/arch/powerpc/platforms/cell/spufs/coredump.c
+++ b/arch/powerpc/platforms/cell/spufs/coredump.c
@@ -156,7 +156,7 @@ static int spufs_arch_write_note(struct spu_context *ctx, int i,
 	if (!dump_emit(cprm, fullname, en.n_namesz))
 		goto Eio;
 
-	if (!dump_skip(cprm, roundup(cprm->written, 4) - cprm->written))
+	if (!dump_align(cprm, 4))
 		goto Eio;
 
 	do {
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index c56ae3264a65..864154972670 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1225,12 +1225,6 @@ static int notesize(struct memelfnote *en)
 	return sz;
 }
 
-static int alignfile(struct coredump_params *cprm)
-{
-	static const char buf[4] = { 0, };
-	return dump_emit(cprm, buf, roundup(cprm->written, 4) - cprm->written);
-}
-
 static int writenote(struct memelfnote *men, struct coredump_params *cprm)
 {
 	struct elf_note en;
@@ -1239,8 +1233,8 @@ static int writenote(struct memelfnote *men, struct coredump_params *cprm)
 	en.n_type = men->type;
 
 	return dump_emit(cprm, &en, sizeof(en)) &&
-	    dump_emit(cprm, men->name, en.n_namesz) && alignfile(cprm) &&
-	    dump_emit(cprm, men->data, men->datasz) && alignfile(cprm);
+	    dump_emit(cprm, men->name, en.n_namesz) && dump_align(cprm, 4) &&
+	    dump_emit(cprm, men->data, men->datasz) && dump_align(cprm, 4);
 }
 
 static void fill_elf_header(struct elfhdr *elf, int segs,
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index a69fc4ae1c85..645f6e56f378 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1267,12 +1267,6 @@ static int notesize(struct memelfnote *en)
 
 /* #define DEBUG */
 
-static int alignfile(struct coredump_params *cprm)
-{
-	static const char buf[4] = { 0, };
-	return dump_emit(cprm, buf, roundup(cprm->written, 4) - cprm->written);
-}
-
 static int writenote(struct memelfnote *men, struct coredump_params *cprm)
 {
 	struct elf_note en;
@@ -1281,8 +1275,8 @@ static int writenote(struct memelfnote *men, struct coredump_params *cprm)
 	en.n_type = men->type;
 
 	return dump_emit(cprm, &en, sizeof(en)) &&
-		dump_emit(cprm, men->name, en.n_namesz) && alignfile(cprm) &&
-		dump_emit(cprm, men->data, men->datasz) && alignfile(cprm);
+		dump_emit(cprm, men->name, en.n_namesz) && dump_align(cprm, 4) &&
+		dump_emit(cprm, men->data, men->datasz) && dump_align(cprm, 4);
 }
 
 static inline void fill_elf_fdpic_header(struct elfhdr *elf, int segs)
diff --git a/fs/coredump.c b/fs/coredump.c
index 18baf2c009d4..dc1f937413d5 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -728,3 +728,12 @@ int dump_skip(struct coredump_params *cprm, size_t nr)
 	}
 }
 EXPORT_SYMBOL(dump_skip);
+
+int dump_align(struct coredump_params *cprm, int align)
+{
+	unsigned mod = cprm->written & (align - 1);
+	if (align & (align - 1))
+		return -EINVAL;
+	return mod ? dump_skip(cprm, align - mod) : 0;
+}
+EXPORT_SYMBOL(dump_align);
diff --git a/include/linux/coredump.h b/include/linux/coredump.h
index 07a0af93f230..d8eb880be82a 100644
--- a/include/linux/coredump.h
+++ b/include/linux/coredump.h
@@ -13,6 +13,7 @@
 struct coredump_params;
 extern int dump_skip(struct coredump_params *cprm, size_t nr);
 extern int dump_emit(struct coredump_params *cprm, const void *addr, int nr);
+extern int dump_align(struct coredump_params *cprm, int align);
 #ifdef CONFIG_COREDUMP
 extern void do_coredump(siginfo_t *siginfo);
 #else
-- 
cgit v1.2.3


From 6987843ff7e836ea65b554905aec34d2fad05c94 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 2 Oct 2013 22:35:11 -0400
Subject: take anon inode allocation to libfs.c

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/anon_inodes.c   | 50 ++------------------------------------------------
 fs/libfs.c         | 43 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/fs.h |  1 +
 3 files changed, 46 insertions(+), 48 deletions(-)

(limited to 'include/linux')

diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 85c961849953..22f9698a1214 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -24,7 +24,6 @@
 
 static struct vfsmount *anon_inode_mnt __read_mostly;
 static struct inode *anon_inode_inode;
-static const struct file_operations anon_inode_fops;
 
 /*
  * anon_inodefs_dname() is called from d_path().
@@ -39,51 +38,6 @@ static const struct dentry_operations anon_inodefs_dentry_operations = {
 	.d_dname	= anon_inodefs_dname,
 };
 
-/*
- * nop .set_page_dirty method so that people can use .page_mkwrite on
- * anon inodes.
- */
-static int anon_set_page_dirty(struct page *page)
-{
-	return 0;
-};
-
-static const struct address_space_operations anon_aops = {
-	.set_page_dirty = anon_set_page_dirty,
-};
-
-/*
- * A single inode exists for all anon_inode files. Contrary to pipes,
- * anon_inode inodes have no associated per-instance data, so we need
- * only allocate one of them.
- */
-static struct inode *anon_inode_mkinode(struct super_block *s)
-{
-	struct inode *inode = new_inode_pseudo(s);
-
-	if (!inode)
-		return ERR_PTR(-ENOMEM);
-
-	inode->i_ino = get_next_ino();
-	inode->i_fop = &anon_inode_fops;
-
-	inode->i_mapping->a_ops = &anon_aops;
-
-	/*
-	 * Mark the inode dirty from the very beginning,
-	 * that way it will never be moved to the dirty
-	 * list because mark_inode_dirty() will think
-	 * that it already _is_ on the dirty list.
-	 */
-	inode->i_state = I_DIRTY;
-	inode->i_mode = S_IRUSR | S_IWUSR;
-	inode->i_uid = current_fsuid();
-	inode->i_gid = current_fsgid();
-	inode->i_flags |= S_PRIVATE;
-	inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
-	return inode;
-}
-
 static struct dentry *anon_inodefs_mount(struct file_system_type *fs_type,
 				int flags, const char *dev_name, void *data)
 {
@@ -92,7 +46,7 @@ static struct dentry *anon_inodefs_mount(struct file_system_type *fs_type,
 			&anon_inodefs_dentry_operations, ANON_INODE_FS_MAGIC);
 	if (!IS_ERR(root)) {
 		struct super_block *s = root->d_sb;
-		anon_inode_inode = anon_inode_mkinode(s);
+		anon_inode_inode = alloc_anon_inode(s);
 		if (IS_ERR(anon_inode_inode)) {
 			dput(root);
 			deactivate_locked_super(s);
@@ -134,7 +88,7 @@ struct file *anon_inode_getfile_private(const char *name,
 	if (fops->owner && !try_module_get(fops->owner))
 		return ERR_PTR(-ENOENT);
 
-	inode = anon_inode_mkinode(anon_inode_mnt->mnt_sb);
+	inode = alloc_anon_inode(anon_inode_mnt->mnt_sb);
 	if (IS_ERR(inode)) {
 		file = ERR_PTR(-ENOMEM);
 		goto err_module;
diff --git a/fs/libfs.c b/fs/libfs.c
index 8c5018493154..5de06947ba5e 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -1002,3 +1002,46 @@ void kfree_put_link(struct dentry *dentry, struct nameidata *nd,
 		kfree(s);
 }
 EXPORT_SYMBOL(kfree_put_link);
+
+/*
+ * nop .set_page_dirty method so that people can use .page_mkwrite on
+ * anon inodes.
+ */
+static int anon_set_page_dirty(struct page *page)
+{
+	return 0;
+};
+
+/*
+ * A single inode exists for all anon_inode files. Contrary to pipes,
+ * anon_inode inodes have no associated per-instance data, so we need
+ * only allocate one of them.
+ */
+struct inode *alloc_anon_inode(struct super_block *s)
+{
+	static const struct address_space_operations anon_aops = {
+		.set_page_dirty = anon_set_page_dirty,
+	};
+	struct inode *inode = new_inode_pseudo(s);
+
+	if (!inode)
+		return ERR_PTR(-ENOMEM);
+
+	inode->i_ino = get_next_ino();
+	inode->i_mapping->a_ops = &anon_aops;
+
+	/*
+	 * Mark the inode dirty from the very beginning,
+	 * that way it will never be moved to the dirty
+	 * list because mark_inode_dirty() will think
+	 * that it already _is_ on the dirty list.
+	 */
+	inode->i_state = I_DIRTY;
+	inode->i_mode = S_IRUSR | S_IWUSR;
+	inode->i_uid = current_fsuid();
+	inode->i_gid = current_fsgid();
+	inode->i_flags |= S_PRIVATE;
+	inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+	return inode;
+}
+EXPORT_SYMBOL(alloc_anon_inode);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 2b0f4e974480..e190326ac212 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2562,6 +2562,7 @@ extern int simple_write_begin(struct file *file, struct address_space *mapping,
 extern int simple_write_end(struct file *file, struct address_space *mapping,
 			loff_t pos, unsigned len, unsigned copied,
 			struct page *page, void *fsdata);
+extern struct inode *alloc_anon_inode(struct super_block *);
 
 extern struct dentry *simple_lookup(struct inode *, struct dentry *, unsigned int flags);
 extern ssize_t generic_read_dir(struct file *, char __user *, size_t, loff_t *);
-- 
cgit v1.2.3


From 078d8e624c1837aa8ad65e58054a4a40d7ac46d2 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 9 Oct 2013 10:26:28 -0400
Subject: ... and kill anon_inode_getfile_private()

it's a seriously misguided API, now fortunately without users.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/anon_inodes.c            | 66 ---------------------------------------------
 include/linux/anon_inodes.h |  3 ---
 2 files changed, 69 deletions(-)

(limited to 'include/linux')

diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 22f9698a1214..24084732b1d0 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -62,72 +62,6 @@ static struct file_system_type anon_inode_fs_type = {
 	.kill_sb	= kill_anon_super,
 };
 
-/**
- * anon_inode_getfile_private - creates a new file instance by hooking it up to an
- *                      anonymous inode, and a dentry that describe the "class"
- *                      of the file
- *
- * @name:    [in]    name of the "class" of the new file
- * @fops:    [in]    file operations for the new file
- * @priv:    [in]    private data for the new file (will be file's private_data)
- * @flags:   [in]    flags
- *
- *
- * Similar to anon_inode_getfile, but each file holds a single inode.
- *
- */
-struct file *anon_inode_getfile_private(const char *name,
-					const struct file_operations *fops,
-					void *priv, int flags)
-{
-	struct qstr this;
-	struct path path;
-	struct file *file;
-	struct inode *inode;
-
-	if (fops->owner && !try_module_get(fops->owner))
-		return ERR_PTR(-ENOENT);
-
-	inode = alloc_anon_inode(anon_inode_mnt->mnt_sb);
-	if (IS_ERR(inode)) {
-		file = ERR_PTR(-ENOMEM);
-		goto err_module;
-	}
-
-	/*
-	 * Link the inode to a directory entry by creating a unique name
-	 * using the inode sequence number.
-	 */
-	file = ERR_PTR(-ENOMEM);
-	this.name = name;
-	this.len = strlen(name);
-	this.hash = 0;
-	path.dentry = d_alloc_pseudo(anon_inode_mnt->mnt_sb, &this);
-	if (!path.dentry)
-		goto err_module;
-
-	path.mnt = mntget(anon_inode_mnt);
-
-	d_instantiate(path.dentry, inode);
-
-	file = alloc_file(&path, OPEN_FMODE(flags), fops);
-	if (IS_ERR(file))
-		goto err_dput;
-
-	file->f_mapping = inode->i_mapping;
-	file->f_flags = flags & (O_ACCMODE | O_NONBLOCK);
-	file->private_data = priv;
-
-	return file;
-
-err_dput:
-	path_put(&path);
-err_module:
-	module_put(fops->owner);
-	return file;
-}
-EXPORT_SYMBOL_GPL(anon_inode_getfile_private);
-
 /**
  * anon_inode_getfile - creates a new file instance by hooking it up to an
  *                      anonymous inode, and a dentry that describe the "class"
diff --git a/include/linux/anon_inodes.h b/include/linux/anon_inodes.h
index cf573c22b81e..8013a45242fe 100644
--- a/include/linux/anon_inodes.h
+++ b/include/linux/anon_inodes.h
@@ -13,9 +13,6 @@ struct file_operations;
 struct file *anon_inode_getfile(const char *name,
 				const struct file_operations *fops,
 				void *priv, int flags);
-struct file *anon_inode_getfile_private(const char *name,
-				const struct file_operations *fops,
-				void *priv, int flags);
 int anon_inode_getfd(const char *name, const struct file_operations *fops,
 		     void *priv, int flags);
 
-- 
cgit v1.2.3


From ce3959604878c1c693979ec552069dc8bdb5ccde Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 13 Oct 2013 17:23:53 -0400
Subject: constify copy_siginfo_to_user{,32}()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/arm64/kernel/signal32.c     | 2 +-
 arch/ia64/kernel/signal.c        | 2 +-
 arch/mips/kernel/signal32.c      | 2 +-
 arch/parisc/kernel/signal32.c    | 2 +-
 arch/parisc/kernel/signal32.h    | 2 +-
 arch/powerpc/kernel/signal_32.c  | 2 +-
 arch/s390/kernel/compat_signal.c | 2 +-
 arch/sparc/kernel/signal32.c     | 2 +-
 arch/tile/kernel/compat_signal.c | 2 +-
 arch/x86/ia32/ia32_signal.c      | 2 +-
 fs/binfmt_elf.c                  | 2 +-
 include/asm-generic/siginfo.h    | 2 +-
 include/linux/compat.h           | 2 +-
 kernel/signal.c                  | 2 +-
 14 files changed, 14 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c
index e393174fe859..07da8c0fde2e 100644
--- a/arch/arm64/kernel/signal32.c
+++ b/arch/arm64/kernel/signal32.c
@@ -150,7 +150,7 @@ static inline int get_sigset_t(sigset_t *set,
 	return 0;
 }
 
-int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from)
+int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from)
 {
 	int err;
 
diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c
index 3637e03d2282..33cab9a8adff 100644
--- a/arch/ia64/kernel/signal.c
+++ b/arch/ia64/kernel/signal.c
@@ -105,7 +105,7 @@ restore_sigcontext (struct sigcontext __user *sc, struct sigscratch *scr)
 }
 
 int
-copy_siginfo_to_user (siginfo_t __user *to, siginfo_t *from)
+copy_siginfo_to_user (siginfo_t __user *to, const siginfo_t *from)
 {
 	if (!access_ok(VERIFY_WRITE, to, sizeof(siginfo_t)))
 		return -EFAULT;
diff --git a/arch/mips/kernel/signal32.c b/arch/mips/kernel/signal32.c
index 57de8b751627..1905a419aa46 100644
--- a/arch/mips/kernel/signal32.c
+++ b/arch/mips/kernel/signal32.c
@@ -314,7 +314,7 @@ SYSCALL_DEFINE3(32_sigaction, long, sig, const struct compat_sigaction __user *,
 	return ret;
 }
 
-int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from)
+int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from)
 {
 	int err;
 
diff --git a/arch/parisc/kernel/signal32.c b/arch/parisc/kernel/signal32.c
index 6c6a271a6140..984abbee71ca 100644
--- a/arch/parisc/kernel/signal32.c
+++ b/arch/parisc/kernel/signal32.c
@@ -319,7 +319,7 @@ copy_siginfo_from_user32 (siginfo_t *to, compat_siginfo_t __user *from)
 }
 
 int
-copy_siginfo_to_user32 (compat_siginfo_t __user *to, siginfo_t *from)
+copy_siginfo_to_user32 (compat_siginfo_t __user *to, const siginfo_t *from)
 {
 	compat_uptr_t addr;
 	compat_int_t val;
diff --git a/arch/parisc/kernel/signal32.h b/arch/parisc/kernel/signal32.h
index 72ab41a51f32..af51d4ccee42 100644
--- a/arch/parisc/kernel/signal32.h
+++ b/arch/parisc/kernel/signal32.h
@@ -34,7 +34,7 @@ struct compat_ucontext {
 
 /* ELF32 signal handling */
 
-int copy_siginfo_to_user32 (compat_siginfo_t __user *to, siginfo_t *from);
+int copy_siginfo_to_user32 (compat_siginfo_t __user *to, const siginfo_t *from);
 int copy_siginfo_from_user32 (siginfo_t *to, compat_siginfo_t __user *from);
 
 /* In a deft move of uber-hackery, we decide to carry the top half of all
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index bebdf1a1a540..149cd583d061 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -891,7 +891,7 @@ static long restore_tm_user_regs(struct pt_regs *regs,
 #endif
 
 #ifdef CONFIG_PPC64
-int copy_siginfo_to_user32(struct compat_siginfo __user *d, siginfo_t *s)
+int copy_siginfo_to_user32(struct compat_siginfo __user *d, const siginfo_t *s)
 {
 	int err;
 
diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c
index 1389b637dae5..44b25269a970 100644
--- a/arch/s390/kernel/compat_signal.c
+++ b/arch/s390/kernel/compat_signal.c
@@ -49,7 +49,7 @@ typedef struct
 	__u32 gprs_high[NUM_GPRS];
 } rt_sigframe32;
 
-int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from)
+int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from)
 {
 	int err;
 
diff --git a/arch/sparc/kernel/signal32.c b/arch/sparc/kernel/signal32.c
index b524f91dd0e5..ee789d2ef05d 100644
--- a/arch/sparc/kernel/signal32.c
+++ b/arch/sparc/kernel/signal32.c
@@ -68,7 +68,7 @@ struct rt_signal_frame32 {
 	/* __siginfo_rwin_t * */u32 rwin_save;
 } __attribute__((aligned(8)));
 
-int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from)
+int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from)
 {
 	int err;
 
diff --git a/arch/tile/kernel/compat_signal.c b/arch/tile/kernel/compat_signal.c
index 85e00b2f39bf..19c04b5ce408 100644
--- a/arch/tile/kernel/compat_signal.c
+++ b/arch/tile/kernel/compat_signal.c
@@ -49,7 +49,7 @@ struct compat_rt_sigframe {
 	struct compat_ucontext uc;
 };
 
-int copy_siginfo_to_user32(struct compat_siginfo __user *to, siginfo_t *from)
+int copy_siginfo_to_user32(struct compat_siginfo __user *to, const siginfo_t *from)
 {
 	int err;
 
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index 665a730307f2..220675795e08 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -34,7 +34,7 @@
 #include <asm/sys_ia32.h>
 #include <asm/smap.h>
 
-int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from)
+int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from)
 {
 	int err = 0;
 	bool ia32 = test_thread_flag(TIF_IA32);
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 864154972670..a6f8cb498b0e 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1374,7 +1374,7 @@ static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
 }
 
 static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
-		siginfo_t *siginfo)
+		const siginfo_t *siginfo)
 {
 	mm_segment_t old_fs = get_fs();
 	set_fs(KERNEL_DS);
diff --git a/include/asm-generic/siginfo.h b/include/asm-generic/siginfo.h
index b685d3bd32e2..3d1a3af5cf59 100644
--- a/include/asm-generic/siginfo.h
+++ b/include/asm-generic/siginfo.h
@@ -32,6 +32,6 @@ static inline void copy_siginfo(struct siginfo *to, struct siginfo *from)
 
 #endif
 
-extern int copy_siginfo_to_user(struct siginfo __user *to, struct siginfo *from);
+extern int copy_siginfo_to_user(struct siginfo __user *to, const struct siginfo *from);
 
 #endif
diff --git a/include/linux/compat.h b/include/linux/compat.h
index 345da00a86e0..78cdf51ff5ba 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -362,7 +362,7 @@ long compat_get_bitmap(unsigned long *mask, const compat_ulong_t __user *umask,
 long compat_put_bitmap(compat_ulong_t __user *umask, unsigned long *mask,
 		       unsigned long bitmap_size);
 int copy_siginfo_from_user32(siginfo_t *to, struct compat_siginfo __user *from);
-int copy_siginfo_to_user32(struct compat_siginfo __user *to, siginfo_t *from);
+int copy_siginfo_to_user32(struct compat_siginfo __user *to, const siginfo_t *from);
 int get_compat_sigevent(struct sigevent *event,
 		const struct compat_sigevent __user *u_event);
 long compat_sys_rt_tgsigqueueinfo(compat_pid_t tgid, compat_pid_t pid, int sig,
diff --git a/kernel/signal.c b/kernel/signal.c
index ded28b91fa53..940b30ee9a30 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2723,7 +2723,7 @@ COMPAT_SYSCALL_DEFINE2(rt_sigpending, compat_sigset_t __user *, uset,
 
 #ifndef HAVE_ARCH_COPY_SIGINFO_TO_USER
 
-int copy_siginfo_to_user(siginfo_t __user *to, siginfo_t *from)
+int copy_siginfo_to_user(siginfo_t __user *to, const siginfo_t *from)
 {
 	int err;
 
-- 
cgit v1.2.3


From ec57941e031685de434916e5398d0ca1d44cd374 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 13 Oct 2013 17:57:29 -0400
Subject: constify do_coredump() argument

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/binfmt_elf.c          | 4 ++--
 fs/coredump.c            | 2 +-
 include/linux/binfmts.h  | 2 +-
 include/linux/coredump.h | 4 ++--
 4 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index a6f8cb498b0e..1af412b96e06 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1581,7 +1581,7 @@ static int fill_thread_core_info(struct elf_thread_core_info *t,
 
 static int fill_note_info(struct elfhdr *elf, int phdrs,
 			  struct elf_note_info *info,
-			  siginfo_t *siginfo, struct pt_regs *regs)
+			  const siginfo_t *siginfo, struct pt_regs *regs)
 {
 	struct task_struct *dump_task = current;
 	const struct user_regset_view *view = task_user_regset_view(dump_task);
@@ -1830,7 +1830,7 @@ static int elf_note_info_init(struct elf_note_info *info)
 
 static int fill_note_info(struct elfhdr *elf, int phdrs,
 			  struct elf_note_info *info,
-			  siginfo_t *siginfo, struct pt_regs *regs)
+			  const siginfo_t *siginfo, struct pt_regs *regs)
 {
 	struct list_head *t;
 
diff --git a/fs/coredump.c b/fs/coredump.c
index dc1f937413d5..62406b6959b6 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -485,7 +485,7 @@ static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)
 	return err;
 }
 
-void do_coredump(siginfo_t *siginfo)
+void do_coredump(const siginfo_t *siginfo)
 {
 	struct core_state core_state;
 	struct core_name cn;
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index 8aa507e7a41a..790d3305a5a7 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -56,7 +56,7 @@ struct linux_binprm {
 
 /* Function parameter for binfmt->coredump */
 struct coredump_params {
-	siginfo_t *siginfo;
+	const siginfo_t *siginfo;
 	struct pt_regs *regs;
 	struct file *file;
 	unsigned long limit;
diff --git a/include/linux/coredump.h b/include/linux/coredump.h
index d8eb880be82a..d016a121a8c4 100644
--- a/include/linux/coredump.h
+++ b/include/linux/coredump.h
@@ -15,9 +15,9 @@ extern int dump_skip(struct coredump_params *cprm, size_t nr);
 extern int dump_emit(struct coredump_params *cprm, const void *addr, int nr);
 extern int dump_align(struct coredump_params *cprm, int align);
 #ifdef CONFIG_COREDUMP
-extern void do_coredump(siginfo_t *siginfo);
+extern void do_coredump(const siginfo_t *siginfo);
 #else
-static inline void do_coredump(siginfo_t *siginfo) {}
+static inline void do_coredump(const siginfo_t *siginfo) {}
 #endif
 
 #endif /* _LINUX_COREDUMP_H */
-- 
cgit v1.2.3


From b18825a7c8e37a7cf6abb97a12a6ad71af160de7 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 12 Sep 2013 19:22:53 +0100
Subject: VFS: Put a small type field into struct dentry::d_flags

Put a type field into struct dentry::d_flags to indicate if the dentry is one
of the following types that relate particularly to pathwalk:

	Miss (negative dentry)
	Directory
	"Automount" directory (defective - no i_op->lookup())
	Symlink
	Other (regular, socket, fifo, device)

The type field is set to one of the first five types on a dentry by calls to
__d_instantiate() and d_obtain_alias() from information in the inode (if one is
given).

The type is cleared by dentry_unlink_inode() when it reconstitutes an existing
dentry as a negative dentry.

Accessors provided are:

	d_set_type(dentry, type)
	d_is_directory(dentry)
	d_is_autodir(dentry)
	d_is_symlink(dentry)
	d_is_file(dentry)
	d_is_negative(dentry)
	d_is_positive(dentry)

A bunch of checks in pathname resolution switched to those.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/dcache.c            |  42 +++++++++++++++++---
 fs/namei.c             |  95 ++++++++++++++++++---------------------------
 include/linux/dcache.h | 103 +++++++++++++++++++++++++++++++++++++++----------
 3 files changed, 158 insertions(+), 82 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dcache.c b/fs/dcache.c
index fb7bcf3ba5d6..525770e576db 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -343,6 +343,7 @@ static void dentry_unlink_inode(struct dentry * dentry)
 	__releases(dentry->d_inode->i_lock)
 {
 	struct inode *inode = dentry->d_inode;
+	__d_clear_type(dentry);
 	dentry->d_inode = NULL;
 	hlist_del_init(&dentry->d_alias);
 	dentry_rcuwalk_barrier(dentry);
@@ -1648,14 +1649,42 @@ void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op)
 }
 EXPORT_SYMBOL(d_set_d_op);
 
+static unsigned d_flags_for_inode(struct inode *inode)
+{
+	unsigned add_flags = DCACHE_FILE_TYPE;
+
+	if (!inode)
+		return DCACHE_MISS_TYPE;
+
+	if (S_ISDIR(inode->i_mode)) {
+		add_flags = DCACHE_DIRECTORY_TYPE;
+		if (unlikely(!(inode->i_opflags & IOP_LOOKUP))) {
+			if (unlikely(!inode->i_op->lookup))
+				add_flags = DCACHE_AUTODIR_TYPE;
+			else
+				inode->i_opflags |= IOP_LOOKUP;
+		}
+	} else if (unlikely(!(inode->i_opflags & IOP_NOFOLLOW))) {
+		if (unlikely(inode->i_op->follow_link))
+			add_flags = DCACHE_SYMLINK_TYPE;
+		else
+			inode->i_opflags |= IOP_NOFOLLOW;
+	}
+
+	if (unlikely(IS_AUTOMOUNT(inode)))
+		add_flags |= DCACHE_NEED_AUTOMOUNT;
+	return add_flags;
+}
+
 static void __d_instantiate(struct dentry *dentry, struct inode *inode)
 {
+	unsigned add_flags = d_flags_for_inode(inode);
+
 	spin_lock(&dentry->d_lock);
-	if (inode) {
-		if (unlikely(IS_AUTOMOUNT(inode)))
-			dentry->d_flags |= DCACHE_NEED_AUTOMOUNT;
+	dentry->d_flags &= ~DCACHE_ENTRY_TYPE;
+	dentry->d_flags |= add_flags;
+	if (inode)
 		hlist_add_head(&dentry->d_alias, &inode->i_dentry);
-	}
 	dentry->d_inode = inode;
 	dentry_rcuwalk_barrier(dentry);
 	spin_unlock(&dentry->d_lock);
@@ -1860,6 +1889,7 @@ struct dentry *d_obtain_alias(struct inode *inode)
 	static const struct qstr anonstring = QSTR_INIT("/", 1);
 	struct dentry *tmp;
 	struct dentry *res;
+	unsigned add_flags;
 
 	if (!inode)
 		return ERR_PTR(-ESTALE);
@@ -1885,9 +1915,11 @@ struct dentry *d_obtain_alias(struct inode *inode)
 	}
 
 	/* attach a disconnected dentry */
+	add_flags = d_flags_for_inode(inode) | DCACHE_DISCONNECTED;
+
 	spin_lock(&tmp->d_lock);
 	tmp->d_inode = inode;
-	tmp->d_flags |= DCACHE_DISCONNECTED;
+	tmp->d_flags |= add_flags;
 	hlist_add_head(&tmp->d_alias, &inode->i_dentry);
 	hlist_bl_lock(&tmp->d_sb->s_anon);
 	hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon);
diff --git a/fs/namei.c b/fs/namei.c
index e5c0118ba9f8..e1fa43346c61 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1501,18 +1501,9 @@ static void terminate_walk(struct nameidata *nd)
  * so we keep a cache of "no, this doesn't need follow_link"
  * for the common case.
  */
-static inline int should_follow_link(struct inode *inode, int follow)
+static inline int should_follow_link(struct dentry *dentry, int follow)
 {
-	if (unlikely(!(inode->i_opflags & IOP_NOFOLLOW))) {
-		if (likely(inode->i_op->follow_link))
-			return follow;
-
-		/* This gets set once for the inode lifetime */
-		spin_lock(&inode->i_lock);
-		inode->i_opflags |= IOP_NOFOLLOW;
-		spin_unlock(&inode->i_lock);
-	}
-	return 0;
+	return unlikely(d_is_symlink(dentry)) ? follow : 0;
 }
 
 static inline int walk_component(struct nameidata *nd, struct path *path,
@@ -1542,7 +1533,7 @@ static inline int walk_component(struct nameidata *nd, struct path *path,
 	if (!inode)
 		goto out_path_put;
 
-	if (should_follow_link(inode, follow)) {
+	if (should_follow_link(path->dentry, follow)) {
 		if (nd->flags & LOOKUP_RCU) {
 			if (unlikely(unlazy_walk(nd, path->dentry))) {
 				err = -ECHILD;
@@ -1600,26 +1591,6 @@ static inline int nested_symlink(struct path *path, struct nameidata *nd)
 	return res;
 }
 
-/*
- * We really don't want to look at inode->i_op->lookup
- * when we don't have to. So we keep a cache bit in
- * the inode ->i_opflags field that says "yes, we can
- * do lookup on this inode".
- */
-static inline int can_lookup(struct inode *inode)
-{
-	if (likely(inode->i_opflags & IOP_LOOKUP))
-		return 1;
-	if (likely(!inode->i_op->lookup))
-		return 0;
-
-	/* We do this once for the lifetime of the inode */
-	spin_lock(&inode->i_lock);
-	inode->i_opflags |= IOP_LOOKUP;
-	spin_unlock(&inode->i_lock);
-	return 1;
-}
-
 /*
  * We can do the critical dentry name comparison and hashing
  * operations one word at a time, but we are limited to:
@@ -1823,7 +1794,7 @@ static int link_path_walk(const char *name, struct nameidata *nd)
 			if (err)
 				return err;
 		}
-		if (!can_lookup(nd->inode)) {
+		if (!d_is_directory(nd->path.dentry)) {
 			err = -ENOTDIR; 
 			break;
 		}
@@ -1841,9 +1812,10 @@ static int path_init(int dfd, const char *name, unsigned int flags,
 	nd->flags = flags | LOOKUP_JUMPED;
 	nd->depth = 0;
 	if (flags & LOOKUP_ROOT) {
-		struct inode *inode = nd->root.dentry->d_inode;
+		struct dentry *root = nd->root.dentry;
+		struct inode *inode = root->d_inode;
 		if (*name) {
-			if (!can_lookup(inode))
+			if (!d_is_directory(root))
 				return -ENOTDIR;
 			retval = inode_permission(inode, MAY_EXEC);
 			if (retval)
@@ -1899,7 +1871,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
 		dentry = f.file->f_path.dentry;
 
 		if (*name) {
-			if (!can_lookup(dentry->d_inode)) {
+			if (!d_is_directory(dentry)) {
 				fdput(f);
 				return -ENOTDIR;
 			}
@@ -1981,7 +1953,7 @@ static int path_lookupat(int dfd, const char *name,
 		err = complete_walk(nd);
 
 	if (!err && nd->flags & LOOKUP_DIRECTORY) {
-		if (!can_lookup(nd->inode)) {
+		if (!d_is_directory(nd->path.dentry)) {
 			path_put(&nd->path);
 			err = -ENOTDIR;
 		}
@@ -2273,7 +2245,7 @@ done:
 	}
 	path->dentry = dentry;
 	path->mnt = mntget(nd->path.mnt);
-	if (should_follow_link(dentry->d_inode, nd->flags & LOOKUP_FOLLOW))
+	if (should_follow_link(dentry, nd->flags & LOOKUP_FOLLOW))
 		return 1;
 	follow_mount(path);
 	error = 0;
@@ -2417,12 +2389,14 @@ static inline int check_sticky(struct inode *dir, struct inode *inode)
  * 10. We don't allow removal of NFS sillyrenamed files; it's handled by
  *     nfs_async_unlink().
  */
-static int may_delete(struct inode *dir,struct dentry *victim,int isdir)
+static int may_delete(struct inode *dir, struct dentry *victim, bool isdir)
 {
+	struct inode *inode = victim->d_inode;
 	int error;
 
-	if (!victim->d_inode)
+	if (d_is_negative(victim))
 		return -ENOENT;
+	BUG_ON(!inode);
 
 	BUG_ON(victim->d_parent->d_inode != dir);
 	audit_inode_child(dir, victim, AUDIT_TYPE_CHILD_DELETE);
@@ -2432,15 +2406,16 @@ static int may_delete(struct inode *dir,struct dentry *victim,int isdir)
 		return error;
 	if (IS_APPEND(dir))
 		return -EPERM;
-	if (check_sticky(dir, victim->d_inode)||IS_APPEND(victim->d_inode)||
-	    IS_IMMUTABLE(victim->d_inode) || IS_SWAPFILE(victim->d_inode))
+
+	if (check_sticky(dir, inode) || IS_APPEND(inode) ||
+	    IS_IMMUTABLE(inode) || IS_SWAPFILE(inode))
 		return -EPERM;
 	if (isdir) {
-		if (!S_ISDIR(victim->d_inode->i_mode))
+		if (!d_is_directory(victim) && !d_is_autodir(victim))
 			return -ENOTDIR;
 		if (IS_ROOT(victim))
 			return -EBUSY;
-	} else if (S_ISDIR(victim->d_inode->i_mode))
+	} else if (d_is_directory(victim) || d_is_autodir(victim))
 		return -EISDIR;
 	if (IS_DEADDIR(dir))
 		return -ENOENT;
@@ -2974,7 +2949,7 @@ retry_lookup:
 	/*
 	 * create/update audit record if it already exists.
 	 */
-	if (path->dentry->d_inode)
+	if (d_is_positive(path->dentry))
 		audit_inode(name, path->dentry, 0);
 
 	/*
@@ -3003,12 +2978,12 @@ retry_lookup:
 finish_lookup:
 	/* we _can_ be in RCU mode here */
 	error = -ENOENT;
-	if (!inode) {
+	if (d_is_negative(path->dentry)) {
 		path_to_nameidata(path, nd);
 		goto out;
 	}
 
-	if (should_follow_link(inode, !symlink_ok)) {
+	if (should_follow_link(path->dentry, !symlink_ok)) {
 		if (nd->flags & LOOKUP_RCU) {
 			if (unlikely(unlazy_walk(nd, path->dentry))) {
 				error = -ECHILD;
@@ -3037,10 +3012,11 @@ finish_open:
 	}
 	audit_inode(name, nd->path.dentry, 0);
 	error = -EISDIR;
-	if ((open_flag & O_CREAT) && S_ISDIR(nd->inode->i_mode))
+	if ((open_flag & O_CREAT) &&
+	    (d_is_directory(nd->path.dentry) || d_is_autodir(nd->path.dentry)))
 		goto out;
 	error = -ENOTDIR;
-	if ((nd->flags & LOOKUP_DIRECTORY) && !can_lookup(nd->inode))
+	if ((nd->flags & LOOKUP_DIRECTORY) && !d_is_directory(nd->path.dentry))
 		goto out;
 	if (!S_ISREG(nd->inode->i_mode))
 		will_truncate = false;
@@ -3266,7 +3242,7 @@ struct file *do_file_open_root(struct dentry *dentry, struct vfsmount *mnt,
 	nd.root.mnt = mnt;
 	nd.root.dentry = dentry;
 
-	if (dentry->d_inode->i_op->follow_link && op->intent & LOOKUP_OPEN)
+	if (d_is_symlink(dentry) && op->intent & LOOKUP_OPEN)
 		return ERR_PTR(-ELOOP);
 
 	file = path_openat(-1, &filename, &nd, op, flags | LOOKUP_RCU);
@@ -3316,8 +3292,9 @@ struct dentry *kern_path_create(int dfd, const char *pathname,
 		goto unlock;
 
 	error = -EEXIST;
-	if (dentry->d_inode)
+	if (d_is_positive(dentry))
 		goto fail;
+
 	/*
 	 * Special case - lookup gave negative, but... we had foo/bar/
 	 * From the vfs_mknod() POV we just have a negative dentry -
@@ -3706,7 +3683,7 @@ retry:
 		if (nd.last.name[nd.last.len])
 			goto slashes;
 		inode = dentry->d_inode;
-		if (!inode)
+		if (d_is_negative(dentry))
 			goto slashes;
 		ihold(inode);
 		error = security_path_unlink(&nd.path, dentry);
@@ -3731,8 +3708,12 @@ exit1:
 	return error;
 
 slashes:
-	error = !dentry->d_inode ? -ENOENT :
-		S_ISDIR(dentry->d_inode->i_mode) ? -EISDIR : -ENOTDIR;
+	if (d_is_negative(dentry))
+		error = -ENOENT;
+	else if (d_is_directory(dentry) || d_is_autodir(dentry))
+		error = -EISDIR;
+	else
+		error = -ENOTDIR;
 	goto exit2;
 }
 
@@ -4046,7 +4027,7 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	       struct inode *new_dir, struct dentry *new_dentry)
 {
 	int error;
-	int is_dir = S_ISDIR(old_dentry->d_inode->i_mode);
+	int is_dir = d_is_directory(old_dentry) || d_is_autodir(old_dentry);
 	const unsigned char *old_name;
 
 	if (old_dentry->d_inode == new_dentry->d_inode)
@@ -4134,10 +4115,10 @@ retry:
 		goto exit3;
 	/* source must exist */
 	error = -ENOENT;
-	if (!old_dentry->d_inode)
+	if (d_is_negative(old_dentry))
 		goto exit4;
 	/* unless the source is a directory trailing slashes give -ENOTDIR */
-	if (!S_ISDIR(old_dentry->d_inode->i_mode)) {
+	if (!d_is_directory(old_dentry) && !d_is_autodir(old_dentry)) {
 		error = -ENOTDIR;
 		if (oldnd.last.name[oldnd.last.len])
 			goto exit4;
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 716c3760ee39..57e87e749a48 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -169,13 +169,13 @@ struct dentry_operations {
  */
 
 /* d_flags entries */
-#define DCACHE_OP_HASH		0x0001
-#define DCACHE_OP_COMPARE	0x0002
-#define DCACHE_OP_REVALIDATE	0x0004
-#define DCACHE_OP_DELETE	0x0008
-#define DCACHE_OP_PRUNE         0x0010
+#define DCACHE_OP_HASH			0x00000001
+#define DCACHE_OP_COMPARE		0x00000002
+#define DCACHE_OP_REVALIDATE		0x00000004
+#define DCACHE_OP_DELETE		0x00000008
+#define DCACHE_OP_PRUNE			0x00000010
 
-#define	DCACHE_DISCONNECTED	0x0020
+#define	DCACHE_DISCONNECTED		0x00000020
      /* This dentry is possibly not currently connected to the dcache tree, in
       * which case its parent will either be itself, or will have this flag as
       * well.  nfsd will not use a dentry with this bit set, but will first
@@ -186,30 +186,38 @@ struct dentry_operations {
       * dentry into place and return that dentry rather than the passed one,
       * typically using d_splice_alias. */
 
-#define DCACHE_REFERENCED	0x0040  /* Recently used, don't discard. */
-#define DCACHE_RCUACCESS	0x0080	/* Entry has ever been RCU-visible */
+#define DCACHE_REFERENCED		0x00000040 /* Recently used, don't discard. */
+#define DCACHE_RCUACCESS		0x00000080 /* Entry has ever been RCU-visible */
 
-#define DCACHE_CANT_MOUNT	0x0100
-#define DCACHE_GENOCIDE		0x0200
-#define DCACHE_SHRINK_LIST	0x0400
+#define DCACHE_CANT_MOUNT		0x00000100
+#define DCACHE_GENOCIDE			0x00000200
+#define DCACHE_SHRINK_LIST		0x00000400
 
-#define DCACHE_OP_WEAK_REVALIDATE	0x0800
+#define DCACHE_OP_WEAK_REVALIDATE	0x00000800
 
-#define DCACHE_NFSFS_RENAMED	0x1000
+#define DCACHE_NFSFS_RENAMED		0x00001000
      /* this dentry has been "silly renamed" and has to be deleted on the last
       * dput() */
-#define DCACHE_COOKIE		0x2000	/* For use by dcookie subsystem */
-#define DCACHE_FSNOTIFY_PARENT_WATCHED 0x4000
+#define DCACHE_COOKIE			0x00002000 /* For use by dcookie subsystem */
+#define DCACHE_FSNOTIFY_PARENT_WATCHED	0x00004000
      /* Parent inode is watched by some fsnotify listener */
 
-#define DCACHE_MOUNTED		0x10000	/* is a mountpoint */
-#define DCACHE_NEED_AUTOMOUNT	0x20000	/* handle automount on this dir */
-#define DCACHE_MANAGE_TRANSIT	0x40000	/* manage transit from this dirent */
+#define DCACHE_DENTRY_KILLED		0x00008000
+
+#define DCACHE_MOUNTED			0x00010000 /* is a mountpoint */
+#define DCACHE_NEED_AUTOMOUNT		0x00020000 /* handle automount on this dir */
+#define DCACHE_MANAGE_TRANSIT		0x00040000 /* manage transit from this dirent */
 #define DCACHE_MANAGED_DENTRY \
 	(DCACHE_MOUNTED|DCACHE_NEED_AUTOMOUNT|DCACHE_MANAGE_TRANSIT)
 
-#define DCACHE_LRU_LIST		0x80000
-#define DCACHE_DENTRY_KILLED	0x100000
+#define DCACHE_LRU_LIST			0x00080000
+
+#define DCACHE_ENTRY_TYPE		0x00700000
+#define DCACHE_MISS_TYPE		0x00000000 /* Negative dentry */
+#define DCACHE_DIRECTORY_TYPE		0x00100000 /* Normal directory */
+#define DCACHE_AUTODIR_TYPE		0x00200000 /* Lookupless directory (presumed automount) */
+#define DCACHE_SYMLINK_TYPE		0x00300000 /* Symlink */
+#define DCACHE_FILE_TYPE		0x00400000 /* Other file type */
 
 extern seqlock_t rename_lock;
 
@@ -394,6 +402,61 @@ static inline bool d_mountpoint(const struct dentry *dentry)
 	return dentry->d_flags & DCACHE_MOUNTED;
 }
 
+/*
+ * Directory cache entry type accessor functions.
+ */
+static inline void __d_set_type(struct dentry *dentry, unsigned type)
+{
+	dentry->d_flags = (dentry->d_flags & ~DCACHE_ENTRY_TYPE) | type;
+}
+
+static inline void __d_clear_type(struct dentry *dentry)
+{
+	__d_set_type(dentry, DCACHE_MISS_TYPE);
+}
+
+static inline void d_set_type(struct dentry *dentry, unsigned type)
+{
+	spin_lock(&dentry->d_lock);
+	__d_set_type(dentry, type);
+	spin_unlock(&dentry->d_lock);
+}
+
+static inline unsigned __d_entry_type(const struct dentry *dentry)
+{
+	return dentry->d_flags & DCACHE_ENTRY_TYPE;
+}
+
+static inline bool d_is_directory(const struct dentry *dentry)
+{
+	return __d_entry_type(dentry) == DCACHE_DIRECTORY_TYPE;
+}
+
+static inline bool d_is_autodir(const struct dentry *dentry)
+{
+	return __d_entry_type(dentry) == DCACHE_AUTODIR_TYPE;
+}
+
+static inline bool d_is_symlink(const struct dentry *dentry)
+{
+	return __d_entry_type(dentry) == DCACHE_SYMLINK_TYPE;
+}
+
+static inline bool d_is_file(const struct dentry *dentry)
+{
+	return __d_entry_type(dentry) == DCACHE_FILE_TYPE;
+}
+
+static inline bool d_is_negative(const struct dentry *dentry)
+{
+	return __d_entry_type(dentry) == DCACHE_MISS_TYPE;
+}
+
+static inline bool d_is_positive(const struct dentry *dentry)
+{
+	return !d_is_negative(dentry);
+}
+
 extern int sysctl_vfs_cache_pressure;
 
 static inline unsigned long vfs_pressure_ratio(unsigned long val)
-- 
cgit v1.2.3


From b7a6ec52dd4eced4a9bcda9ca85b3c8af84d3c90 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Wed, 2 Oct 2013 17:01:18 -0400
Subject: vfs: split out vfs_getattr_nosec

The filehandle lookup code wants this version of getattr.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/stat.c          | 31 +++++++++++++++++++++++++------
 include/linux/fs.h |  1 +
 2 files changed, 26 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/fs/stat.c b/fs/stat.c
index d0ea7ef75e26..ae0c3cef9927 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -37,14 +37,21 @@ void generic_fillattr(struct inode *inode, struct kstat *stat)
 
 EXPORT_SYMBOL(generic_fillattr);
 
-int vfs_getattr(struct path *path, struct kstat *stat)
+/**
+ * vfs_getattr_nosec - getattr without security checks
+ * @path: file to get attributes from
+ * @stat: structure to return attributes in
+ *
+ * Get attributes without calling security_inode_getattr.
+ *
+ * Currently the only caller other than vfs_getattr is internal to the
+ * filehandle lookup code, which uses only the inode number and returns
+ * no attributes to any user.  Any other code probably wants
+ * vfs_getattr.
+ */
+int vfs_getattr_nosec(struct path *path, struct kstat *stat)
 {
 	struct inode *inode = path->dentry->d_inode;
-	int retval;
-
-	retval = security_inode_getattr(path->mnt, path->dentry);
-	if (retval)
-		return retval;
 
 	if (inode->i_op->getattr)
 		return inode->i_op->getattr(path->mnt, path->dentry, stat);
@@ -53,6 +60,18 @@ int vfs_getattr(struct path *path, struct kstat *stat)
 	return 0;
 }
 
+EXPORT_SYMBOL(vfs_getattr_nosec);
+
+int vfs_getattr(struct path *path, struct kstat *stat)
+{
+	int retval;
+
+	retval = security_inode_getattr(path->mnt, path->dentry);
+	if (retval)
+		return retval;
+	return vfs_getattr_nosec(path, stat);
+}
+
 EXPORT_SYMBOL(vfs_getattr);
 
 int vfs_fstat(unsigned int fd, struct kstat *stat)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e190326ac212..5e44b0893db8 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2504,6 +2504,7 @@ extern const struct inode_operations page_symlink_inode_operations;
 extern void kfree_put_link(struct dentry *, struct nameidata *, void *);
 extern int generic_readlink(struct dentry *, char __user *, int);
 extern void generic_fillattr(struct inode *, struct kstat *);
+int vfs_getattr_nosec(struct path *path, struct kstat *stat);
 extern int vfs_getattr(struct path *, struct kstat *);
 void __inode_add_bytes(struct inode *inode, loff_t bytes);
 void inode_add_bytes(struct inode *inode, loff_t bytes);
-- 
cgit v1.2.3


From 375e289ea85166c5241c570940e7e7e966c63a9f Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Wed, 18 Apr 2012 15:16:33 -0400
Subject: vfs: pull ext4's double-i_mutex-locking into common code

We want to do this elsewhere as well.

Also catch any attempts to use it for directories (where this ordering
would conflict with ancestor-first directory ordering in lock_rename).

Cc: Andreas Dilger <adilger.kernel@dilger.ca>
Cc: Dave Chinner <david@fromorbit.com>
Acked-by: Jeff Layton <jlayton@redhat.com>
Acked-by: "Theodore Ts'o" <tytso@mit.edu>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ext4/ext4.h        |  2 --
 fs/ext4/ioctl.c       |  4 ++--
 fs/ext4/move_extent.c | 40 ++--------------------------------------
 fs/inode.c            | 36 ++++++++++++++++++++++++++++++++++++
 include/linux/fs.h    |  3 +++
 5 files changed, 43 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index af815ea9d7cc..d01d62315f7e 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2734,8 +2734,6 @@ extern void ext4_double_down_write_data_sem(struct inode *first,
 					    struct inode *second);
 extern void ext4_double_up_write_data_sem(struct inode *orig_inode,
 					  struct inode *donor_inode);
-void ext4_inode_double_lock(struct inode *inode1, struct inode *inode2);
-void ext4_inode_double_unlock(struct inode *inode1, struct inode *inode2);
 extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,
 			     __u64 start_orig, __u64 start_donor,
 			     __u64 len, __u64 *moved_len);
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index a569d335f804..60589b60e9b0 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -130,7 +130,7 @@ static long swap_inode_boot_loader(struct super_block *sb,
 
 	/* Protect orig inodes against a truncate and make sure,
 	 * that only 1 swap_inode_boot_loader is running. */
-	ext4_inode_double_lock(inode, inode_bl);
+	lock_two_nondirectories(inode, inode_bl);
 
 	truncate_inode_pages(&inode->i_data, 0);
 	truncate_inode_pages(&inode_bl->i_data, 0);
@@ -205,7 +205,7 @@ static long swap_inode_boot_loader(struct super_block *sb,
 	ext4_inode_resume_unlocked_dio(inode);
 	ext4_inode_resume_unlocked_dio(inode_bl);
 
-	ext4_inode_double_unlock(inode, inode_bl);
+	unlock_two_nondirectories(inode, inode_bl);
 
 	iput(inode_bl);
 
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 7fa4d855dbd5..773b503bd18c 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -1202,42 +1202,6 @@ mext_check_arguments(struct inode *orig_inode,
 	return 0;
 }
 
-/**
- * ext4_inode_double_lock - Lock i_mutex on both @inode1 and @inode2
- *
- * @inode1:	the inode structure
- * @inode2:	the inode structure
- *
- * Lock two inodes' i_mutex
- */
-void
-ext4_inode_double_lock(struct inode *inode1, struct inode *inode2)
-{
-	BUG_ON(inode1 == inode2);
-	if (inode1 < inode2) {
-		mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT);
-		mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD);
-	} else {
-		mutex_lock_nested(&inode2->i_mutex, I_MUTEX_PARENT);
-		mutex_lock_nested(&inode1->i_mutex, I_MUTEX_CHILD);
-	}
-}
-
-/**
- * ext4_inode_double_unlock - Release i_mutex on both @inode1 and @inode2
- *
- * @inode1:     the inode that is released first
- * @inode2:     the inode that is released second
- *
- */
-
-void
-ext4_inode_double_unlock(struct inode *inode1, struct inode *inode2)
-{
-	mutex_unlock(&inode1->i_mutex);
-	mutex_unlock(&inode2->i_mutex);
-}
-
 /**
  * ext4_move_extents - Exchange the specified range of a file
  *
@@ -1327,7 +1291,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
 		return -EINVAL;
 	}
 	/* Protect orig and donor inodes against a truncate */
-	ext4_inode_double_lock(orig_inode, donor_inode);
+	lock_two_nondirectories(orig_inode, donor_inode);
 
 	/* Wait for all existing dio workers */
 	ext4_inode_block_unlocked_dio(orig_inode);
@@ -1535,7 +1499,7 @@ out:
 	ext4_double_up_write_data_sem(orig_inode, donor_inode);
 	ext4_inode_resume_unlocked_dio(orig_inode);
 	ext4_inode_resume_unlocked_dio(donor_inode);
-	ext4_inode_double_unlock(orig_inode, donor_inode);
+	unlock_two_nondirectories(orig_inode, donor_inode);
 
 	return ret;
 }
diff --git a/fs/inode.c b/fs/inode.c
index bf7a8b3ab937..ef25afdf3906 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -942,6 +942,42 @@ void unlock_new_inode(struct inode *inode)
 }
 EXPORT_SYMBOL(unlock_new_inode);
 
+/**
+ * lock_two_nondirectories - take two i_mutexes on non-directory objects
+ * @inode1: first inode to lock
+ * @inode2: second inode to lock
+ */
+void lock_two_nondirectories(struct inode *inode1, struct inode *inode2)
+{
+	WARN_ON_ONCE(S_ISDIR(inode1->i_mode));
+	if (inode1 == inode2 || !inode2) {
+		mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT);
+		return;
+	}
+	WARN_ON_ONCE(S_ISDIR(inode2->i_mode));
+	if (inode1 < inode2) {
+		mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT);
+		mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD);
+	} else {
+		mutex_lock_nested(&inode2->i_mutex, I_MUTEX_PARENT);
+		mutex_lock_nested(&inode1->i_mutex, I_MUTEX_CHILD);
+	}
+}
+EXPORT_SYMBOL(lock_two_nondirectories);
+
+/**
+ * unlock_two_nondirectories - release locks from lock_two_nondirectories()
+ * @inode1: first inode to unlock
+ * @inode2: second inode to unlock
+ */
+void unlock_two_nondirectories(struct inode *inode1, struct inode *inode2)
+{
+	mutex_unlock(&inode1->i_mutex);
+	if (inode2 && inode2 != inode1)
+		mutex_unlock(&inode2->i_mutex);
+}
+EXPORT_SYMBOL(unlock_two_nondirectories);
+
 /**
  * iget5_locked - obtain an inode from a mounted file system
  * @sb:		super block of file system
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 5e44b0893db8..4e1a0b41f966 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -637,6 +637,9 @@ enum inode_i_mutex_lock_class
 	I_MUTEX_QUOTA
 };
 
+void lock_two_nondirectories(struct inode *, struct inode*);
+void unlock_two_nondirectories(struct inode *, struct inode*);
+
 /*
  * NOTE: in a 32bit arch with a preemptable kernel and
  * an UP compile the i_size_read/write must be atomic
-- 
cgit v1.2.3


From 40bd22c9f8617ddd5da06044c81f72a2cf700791 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Wed, 18 Apr 2012 15:21:34 -0400
Subject: vfs: rename I_MUTEX_QUOTA now that it's not used for quotas

I_MUTEX_QUOTA is now just being used whenever we want to lock two
non-directories.  So the name isn't right.  I_MUTEX_NONDIR2 isn't
especially elegant but it's the best I could think of.

Also fix some outdated documentation.

Acked-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/inode.c         | 4 ++--
 include/linux/fs.h | 9 ++++++---
 2 files changed, 8 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/fs/inode.c b/fs/inode.c
index 51f329b98aa4..ce48c359ce9e 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -957,10 +957,10 @@ void lock_two_nondirectories(struct inode *inode1, struct inode *inode2)
 	WARN_ON_ONCE(S_ISDIR(inode2->i_mode));
 	if (inode1 < inode2) {
 		mutex_lock(&inode1->i_mutex);
-		mutex_lock_nested(&inode2->i_mutex, I_MUTEX_QUOTA);
+		mutex_lock_nested(&inode2->i_mutex, I_MUTEX_NONDIR2);
 	} else {
 		mutex_lock(&inode2->i_mutex);
-		mutex_lock_nested(&inode1->i_mutex, I_MUTEX_QUOTA);
+		mutex_lock_nested(&inode1->i_mutex, I_MUTEX_NONDIR2);
 	}
 }
 EXPORT_SYMBOL(lock_two_nondirectories);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 4e1a0b41f966..ed7f94af1ab2 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -623,10 +623,13 @@ static inline int inode_unhashed(struct inode *inode)
  * 0: the object of the current VFS operation
  * 1: parent
  * 2: child/target
- * 3: quota file
+ * 3: xattr
+ * 4: second non-directory
+ * The last is for certain operations (such as rename) which lock two
+ * non-directories at once.
  *
  * The locking order between these classes is
- * parent -> child -> normal -> xattr -> quota
+ * parent -> child -> normal -> xattr -> second non-directory
  */
 enum inode_i_mutex_lock_class
 {
@@ -634,7 +637,7 @@ enum inode_i_mutex_lock_class
 	I_MUTEX_PARENT,
 	I_MUTEX_CHILD,
 	I_MUTEX_XATTR,
-	I_MUTEX_QUOTA
+	I_MUTEX_NONDIR2
 };
 
 void lock_two_nondirectories(struct inode *, struct inode*);
-- 
cgit v1.2.3


From 617588d5186c887eb94321b021bb5a46f896f4b3 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Fri, 1 Jul 2011 15:18:34 -0400
Subject: locks: introduce new FL_DELEG lock flag

For now FL_DELEG is just a synonym for FL_LEASE.  So this patch doesn't
change behavior.

Next we'll modify break_lease to treat FL_DELEG leases differently, to
account for the fact that NFSv4 delegations should be broken in more
situations than Windows oplocks.

Acked-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/locks.c          | 2 +-
 fs/nfsd/nfs4state.c | 2 +-
 include/linux/fs.h  | 1 +
 3 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/locks.c b/fs/locks.c
index f155ffd648bb..079abcd8a836 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -134,7 +134,7 @@
 
 #define IS_POSIX(fl)	(fl->fl_flags & FL_POSIX)
 #define IS_FLOCK(fl)	(fl->fl_flags & FL_FLOCK)
-#define IS_LEASE(fl)	(fl->fl_flags & FL_LEASE)
+#define IS_LEASE(fl)	(fl->fl_flags & (FL_LEASE|FL_DELEG))
 
 static bool lease_breaking(struct file_lock *fl)
 {
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index a601fd49f997..f36a30a9f2d1 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -3008,7 +3008,7 @@ static struct file_lock *nfs4_alloc_init_lease(struct nfs4_delegation *dp, int f
 		return NULL;
 	locks_init_lock(fl);
 	fl->fl_lmops = &nfsd_lease_mng_ops;
-	fl->fl_flags = FL_LEASE;
+	fl->fl_flags = FL_DELEG;
 	fl->fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK;
 	fl->fl_end = OFFSET_MAX;
 	fl->fl_owner = (fl_owner_t)(dp->dl_file);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index ed7f94af1ab2..129e150f9e94 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -880,6 +880,7 @@ static inline int file_check_writeable(struct file *filp)
 
 #define FL_POSIX	1
 #define FL_FLOCK	2
+#define FL_DELEG	4	/* NFSv4 delegation */
 #define FL_ACCESS	8	/* not trying to lock, just looking */
 #define FL_EXISTS	16	/* when unlocking, test for existence */
 #define FL_LEASE	32	/* lease held on this file */
-- 
cgit v1.2.3


From df4e8d2c1d2bbbbace706bfe5417320c9e3fbee3 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Mon, 5 Mar 2012 13:18:59 -0500
Subject: locks: implement delegations

Implement NFSv4 delegations at the vfs level using the new FL_DELEG lock
type.

Note nfsd is the only delegation user and is only using read
delegations.  Warn on any attempt to set a write delegation for now.
We'll come back to that case later.

Acked-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/locks.c         | 55 ++++++++++++++++++++++++++++++++++++++++++++----------
 include/linux/fs.h | 18 +++++++++++++++---
 2 files changed, 60 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/fs/locks.c b/fs/locks.c
index 079abcd8a836..f99d52bdd05a 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1292,28 +1292,40 @@ static void time_out_leases(struct inode *inode)
 	}
 }
 
+static bool leases_conflict(struct file_lock *lease, struct file_lock *breaker)
+{
+	if ((breaker->fl_flags & FL_DELEG) && (lease->fl_flags & FL_LEASE))
+		return false;
+	return locks_conflict(breaker, lease);
+}
+
 /**
  *	__break_lease	-	revoke all outstanding leases on file
  *	@inode: the inode of the file to return
- *	@mode: the open mode (read or write)
+ *	@mode: O_RDONLY: break only write leases; O_WRONLY or O_RDWR:
+ *	    break all leases
+ *	@type: FL_LEASE: break leases and delegations; FL_DELEG: break
+ *	    only delegations
  *
  *	break_lease (inlined for speed) has checked there already is at least
  *	some kind of lock (maybe a lease) on this file.  Leases are broken on
  *	a call to open() or truncate().  This function can sleep unless you
  *	specified %O_NONBLOCK to your open().
  */
-int __break_lease(struct inode *inode, unsigned int mode)
+int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
 {
 	int error = 0;
 	struct file_lock *new_fl, *flock;
 	struct file_lock *fl;
 	unsigned long break_time;
 	int i_have_this_lease = 0;
+	bool lease_conflict = false;
 	int want_write = (mode & O_ACCMODE) != O_RDONLY;
 
 	new_fl = lease_alloc(NULL, want_write ? F_WRLCK : F_RDLCK);
 	if (IS_ERR(new_fl))
 		return PTR_ERR(new_fl);
+	new_fl->fl_flags = type;
 
 	spin_lock(&inode->i_lock);
 
@@ -1323,13 +1335,16 @@ int __break_lease(struct inode *inode, unsigned int mode)
 	if ((flock == NULL) || !IS_LEASE(flock))
 		goto out;
 
-	if (!locks_conflict(flock, new_fl))
+	for (fl = flock; fl && IS_LEASE(fl); fl = fl->fl_next) {
+		if (leases_conflict(fl, new_fl)) {
+			lease_conflict = true;
+			if (fl->fl_owner == current->files)
+				i_have_this_lease = 1;
+		}
+	}
+	if (!lease_conflict)
 		goto out;
 
-	for (fl = flock; fl && IS_LEASE(fl); fl = fl->fl_next)
-		if (fl->fl_owner == current->files)
-			i_have_this_lease = 1;
-
 	break_time = 0;
 	if (lease_break_time > 0) {
 		break_time = jiffies + lease_break_time * HZ;
@@ -1338,6 +1353,8 @@ int __break_lease(struct inode *inode, unsigned int mode)
 	}
 
 	for (fl = flock; fl && IS_LEASE(fl); fl = fl->fl_next) {
+		if (!leases_conflict(fl, new_fl))
+			continue;
 		if (want_write) {
 			if (fl->fl_flags & FL_UNLOCK_PENDING)
 				continue;
@@ -1379,7 +1396,7 @@ restart:
 		 */
 		for (flock = inode->i_flock; flock && IS_LEASE(flock);
 				flock = flock->fl_next) {
-			if (locks_conflict(new_fl, flock))
+			if (leases_conflict(new_fl, flock))
 				goto restart;
 		}
 		error = 0;
@@ -1460,9 +1477,26 @@ static int generic_add_lease(struct file *filp, long arg, struct file_lock **flp
 	struct file_lock *fl, **before, **my_before = NULL, *lease;
 	struct dentry *dentry = filp->f_path.dentry;
 	struct inode *inode = dentry->d_inode;
+	bool is_deleg = (*flp)->fl_flags & FL_DELEG;
 	int error;
 
 	lease = *flp;
+	/*
+	 * In the delegation case we need mutual exclusion with
+	 * a number of operations that take the i_mutex.  We trylock
+	 * because delegations are an optional optimization, and if
+	 * there's some chance of a conflict--we'd rather not
+	 * bother, maybe that's a sign this just isn't a good file to
+	 * hand out a delegation on.
+	 */
+	if (is_deleg && !mutex_trylock(&inode->i_mutex))
+		return -EAGAIN;
+
+	if (is_deleg && arg == F_WRLCK) {
+		/* Write delegations are not currently supported: */
+		WARN_ON_ONCE(1);
+		return -EINVAL;
+	}
 
 	error = -EAGAIN;
 	if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0))
@@ -1514,9 +1548,10 @@ static int generic_add_lease(struct file *filp, long arg, struct file_lock **flp
 		goto out;
 
 	locks_insert_lock(before, lease);
-	return 0;
-
+	error = 0;
 out:
+	if (is_deleg)
+		mutex_unlock(&inode->i_mutex);
 	return error;
 }
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 129e150f9e94..8e4be1be1a62 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1022,7 +1022,7 @@ extern int vfs_test_lock(struct file *, struct file_lock *);
 extern int vfs_lock_file(struct file *, unsigned int, struct file_lock *, struct file_lock *);
 extern int vfs_cancel_lock(struct file *filp, struct file_lock *fl);
 extern int flock_lock_file_wait(struct file *filp, struct file_lock *fl);
-extern int __break_lease(struct inode *inode, unsigned int flags);
+extern int __break_lease(struct inode *inode, unsigned int flags, unsigned int type);
 extern void lease_get_mtime(struct inode *, struct timespec *time);
 extern int generic_setlease(struct file *, long, struct file_lock **);
 extern int vfs_setlease(struct file *, long, struct file_lock **);
@@ -1131,7 +1131,7 @@ static inline int flock_lock_file_wait(struct file *filp,
 	return -ENOLCK;
 }
 
-static inline int __break_lease(struct inode *inode, unsigned int mode)
+static inline int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
 {
 	return 0;
 }
@@ -1961,9 +1961,17 @@ static inline int locks_verify_truncate(struct inode *inode,
 static inline int break_lease(struct inode *inode, unsigned int mode)
 {
 	if (inode->i_flock)
-		return __break_lease(inode, mode);
+		return __break_lease(inode, mode, FL_LEASE);
 	return 0;
 }
+
+static inline int break_deleg(struct inode *inode, unsigned int mode)
+{
+	if (inode->i_flock)
+		return __break_lease(inode, mode, FL_DELEG);
+	return 0;
+}
+
 #else /* !CONFIG_FILE_LOCKING */
 static inline int locks_mandatory_locked(struct inode *inode)
 {
@@ -2003,6 +2011,10 @@ static inline int break_lease(struct inode *inode, unsigned int mode)
 	return 0;
 }
 
+static inline int break_deleg(struct inode *inode, unsigned int mode)
+{
+	return 0;
+}
 #endif /* CONFIG_FILE_LOCKING */
 
 /* fs/open.c */
-- 
cgit v1.2.3


From b21996e36c8e3b92a84e972378bde80b43acd890 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Tue, 20 Sep 2011 09:14:34 -0400
Subject: locks: break delegations on unlink

We need to break delegations on any operation that changes the set of
links pointing to an inode.  Start with unlink.

Such operations also hold the i_mutex on a parent directory.  Breaking a
delegation may require waiting for a timeout (by default 90 seconds) in
the case of a unresponsive NFS client.  To avoid blocking all directory
operations, we therefore drop locks before waiting for the delegation.
The logic then looks like:

	acquire locks
	...
	test for delegation; if found:
		take reference on inode
		release locks
		wait for delegation break
		drop reference on inode
		retry

It is possible this could never terminate.  (Even if we take precautions
to prevent another delegation being acquired on the same inode, we could
get a different inode on each retry.)  But this seems very unlikely.

The initial test for a delegation happens after the lock on the target
inode is acquired, but the directory inode may have been acquired
further up the call stack.  We therefore add a "struct inode **"
argument to any intervening functions, which we use to pass the inode
back up to the caller in the case it needs a delegation synchronously
broken.

Cc: David Howells <dhowells@redhat.com>
Cc: Tyler Hicks <tyhicks@canonical.com>
Cc: Dustin Kirkland <dustin.kirkland@gazzang.com>
Acked-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/base/devtmpfs.c |  2 +-
 fs/cachefiles/namei.c   |  2 +-
 fs/ecryptfs/inode.c     |  4 ++--
 fs/namei.c              | 42 +++++++++++++++++++++++++++++++++++++++---
 fs/nfsd/vfs.c           |  2 +-
 include/linux/fs.h      |  2 +-
 ipc/mqueue.c            |  2 +-
 7 files changed, 46 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c
index 7413d065906b..1b8490e2fbde 100644
--- a/drivers/base/devtmpfs.c
+++ b/drivers/base/devtmpfs.c
@@ -324,7 +324,7 @@ static int handle_remove(const char *nodename, struct device *dev)
 			mutex_lock(&dentry->d_inode->i_mutex);
 			notify_change(dentry, &newattrs);
 			mutex_unlock(&dentry->d_inode->i_mutex);
-			err = vfs_unlink(parent.dentry->d_inode, dentry);
+			err = vfs_unlink(parent.dentry->d_inode, dentry, NULL);
 			if (!err || err == -ENOENT)
 				deleted = 1;
 		}
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index f4a08d7fa2f7..31d480c0e046 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -294,7 +294,7 @@ static int cachefiles_bury_object(struct cachefiles_cache *cache,
 		if (ret < 0) {
 			cachefiles_io_error(cache, "Unlink security error");
 		} else {
-			ret = vfs_unlink(dir->d_inode, rep);
+			ret = vfs_unlink(dir->d_inode, rep, NULL);
 
 			if (preemptive)
 				cachefiles_mark_object_buried(cache, rep);
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 0f9b66eaa767..dc60b8bd09ec 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -153,7 +153,7 @@ static int ecryptfs_do_unlink(struct inode *dir, struct dentry *dentry,
 
 	dget(lower_dentry);
 	lower_dir_dentry = lock_parent(lower_dentry);
-	rc = vfs_unlink(lower_dir_inode, lower_dentry);
+	rc = vfs_unlink(lower_dir_inode, lower_dentry, NULL);
 	if (rc) {
 		printk(KERN_ERR "Error in vfs_unlink; rc = [%d]\n", rc);
 		goto out_unlock;
@@ -208,7 +208,7 @@ ecryptfs_do_create(struct inode *directory_inode,
 	inode = __ecryptfs_get_inode(lower_dentry->d_inode,
 				     directory_inode->i_sb);
 	if (IS_ERR(inode)) {
-		vfs_unlink(lower_dir_dentry->d_inode, lower_dentry);
+		vfs_unlink(lower_dir_dentry->d_inode, lower_dentry, NULL);
 		goto out_lock;
 	}
 	fsstack_copy_attr_times(directory_inode, lower_dir_dentry->d_inode);
diff --git a/fs/namei.c b/fs/namei.c
index e633a58d4222..67ce331a3ed8 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3615,7 +3615,25 @@ SYSCALL_DEFINE1(rmdir, const char __user *, pathname)
 	return do_rmdir(AT_FDCWD, pathname);
 }
 
-int vfs_unlink(struct inode *dir, struct dentry *dentry)
+/**
+ * vfs_unlink - unlink a filesystem object
+ * @dir:	parent directory
+ * @dentry:	victim
+ * @delegated_inode: returns victim inode, if the inode is delegated.
+ *
+ * The caller must hold dir->i_mutex.
+ *
+ * If vfs_unlink discovers a delegation, it will return -EWOULDBLOCK and
+ * return a reference to the inode in delegated_inode.  The caller
+ * should then break the delegation on that inode and retry.  Because
+ * breaking a delegation may take a long time, the caller should drop
+ * dir->i_mutex before doing so.
+ *
+ * Alternatively, a caller may pass NULL for delegated_inode.  This may
+ * be appropriate for callers that expect the underlying filesystem not
+ * to be NFS exported.
+ */
+int vfs_unlink(struct inode *dir, struct dentry *dentry, struct inode **delegated_inode)
 {
 	struct inode *target = dentry->d_inode;
 	int error = may_delete(dir, dentry, 0);
@@ -3632,11 +3650,20 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry)
 	else {
 		error = security_inode_unlink(dir, dentry);
 		if (!error) {
+			error = break_deleg(target, O_WRONLY|O_NONBLOCK);
+			if (error) {
+				if (error == -EWOULDBLOCK && delegated_inode) {
+					*delegated_inode = target;
+					ihold(target);
+				}
+				goto out;
+			}
 			error = dir->i_op->unlink(dir, dentry);
 			if (!error)
 				dont_mount(dentry);
 		}
 	}
+out:
 	mutex_unlock(&target->i_mutex);
 
 	/* We don't d_delete() NFS sillyrenamed files--they still exist. */
@@ -3661,6 +3688,7 @@ static long do_unlinkat(int dfd, const char __user *pathname)
 	struct dentry *dentry;
 	struct nameidata nd;
 	struct inode *inode = NULL;
+	struct inode *delegated_inode = NULL;
 	unsigned int lookup_flags = 0;
 retry:
 	name = user_path_parent(dfd, pathname, &nd, lookup_flags);
@@ -3675,7 +3703,7 @@ retry:
 	error = mnt_want_write(nd.path.mnt);
 	if (error)
 		goto exit1;
-
+retry_deleg:
 	mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
 	dentry = lookup_hash(&nd);
 	error = PTR_ERR(dentry);
@@ -3690,13 +3718,21 @@ retry:
 		error = security_path_unlink(&nd.path, dentry);
 		if (error)
 			goto exit2;
-		error = vfs_unlink(nd.path.dentry->d_inode, dentry);
+		error = vfs_unlink(nd.path.dentry->d_inode, dentry, &delegated_inode);
 exit2:
 		dput(dentry);
 	}
 	mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
 	if (inode)
 		iput(inode);	/* truncate the inode here */
+	inode = NULL;
+	if (delegated_inode) {
+		error = break_deleg(delegated_inode, O_WRONLY);
+		iput(delegated_inode);
+		delegated_inode = NULL;
+		if (!error)
+			goto retry_deleg;
+	}
 	mnt_drop_write(nd.path.mnt);
 exit1:
 	path_put(&nd.path);
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 13886f7f40d5..7a810235d599 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1910,7 +1910,7 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
 	if (host_err)
 		goto out_put;
 	if (type != S_IFDIR)
-		host_err = vfs_unlink(dirp, rdentry);
+		host_err = vfs_unlink(dirp, rdentry, NULL);
 	else
 		host_err = vfs_rmdir(dirp, rdentry);
 	if (!host_err)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 8e4be1be1a62..a5799233142a 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1455,7 +1455,7 @@ extern int vfs_mknod(struct inode *, struct dentry *, umode_t, dev_t);
 extern int vfs_symlink(struct inode *, struct dentry *, const char *);
 extern int vfs_link(struct dentry *, struct inode *, struct dentry *);
 extern int vfs_rmdir(struct inode *, struct dentry *);
-extern int vfs_unlink(struct inode *, struct dentry *);
+extern int vfs_unlink(struct inode *, struct dentry *, struct inode **);
 extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *);
 
 /*
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index ae1996d3c539..95827ce2f3c7 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -886,7 +886,7 @@ SYSCALL_DEFINE1(mq_unlink, const char __user *, u_name)
 		err = -ENOENT;
 	} else {
 		ihold(inode);
-		err = vfs_unlink(dentry->d_parent->d_inode, dentry);
+		err = vfs_unlink(dentry->d_parent->d_inode, dentry, NULL);
 	}
 	dput(dentry);
 
-- 
cgit v1.2.3


From 5a14696c1795d3843673b5cf1982d0e5357a5bbf Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Tue, 28 Aug 2012 07:50:40 -0700
Subject: locks: helper functions for delegation breaking

We'll need the same logic for rename and link.

Acked-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c         | 13 +++----------
 include/linux/fs.h | 39 +++++++++++++++++++++++++++++++++++++--
 2 files changed, 40 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/fs/namei.c b/fs/namei.c
index 67ce331a3ed8..cfaeaae0f2db 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3650,14 +3650,9 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry, struct inode **delegate
 	else {
 		error = security_inode_unlink(dir, dentry);
 		if (!error) {
-			error = break_deleg(target, O_WRONLY|O_NONBLOCK);
-			if (error) {
-				if (error == -EWOULDBLOCK && delegated_inode) {
-					*delegated_inode = target;
-					ihold(target);
-				}
+			error = try_break_deleg(target, delegated_inode);
+			if (error)
 				goto out;
-			}
 			error = dir->i_op->unlink(dir, dentry);
 			if (!error)
 				dont_mount(dentry);
@@ -3727,9 +3722,7 @@ exit2:
 		iput(inode);	/* truncate the inode here */
 	inode = NULL;
 	if (delegated_inode) {
-		error = break_deleg(delegated_inode, O_WRONLY);
-		iput(delegated_inode);
-		delegated_inode = NULL;
+		error = break_deleg_wait(&delegated_inode);
 		if (!error)
 			goto retry_deleg;
 	}
diff --git a/include/linux/fs.h b/include/linux/fs.h
index a5799233142a..931f919f44e1 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1905,6 +1905,9 @@ extern bool fs_fully_visible(struct file_system_type *);
 
 extern int current_umask(void);
 
+extern void ihold(struct inode * inode);
+extern void iput(struct inode *);
+
 /* /sys/fs */
 extern struct kobject *fs_kobj;
 
@@ -1972,6 +1975,28 @@ static inline int break_deleg(struct inode *inode, unsigned int mode)
 	return 0;
 }
 
+static inline int try_break_deleg(struct inode *inode, struct inode **delegated_inode)
+{
+	int ret;
+
+	ret = break_deleg(inode, O_WRONLY|O_NONBLOCK);
+	if (ret == -EWOULDBLOCK && delegated_inode) {
+		*delegated_inode = inode;
+		ihold(inode);
+	}
+	return ret;
+}
+
+static inline int break_deleg_wait(struct inode **delegated_inode)
+{
+	int ret;
+
+	ret = break_deleg(*delegated_inode, O_WRONLY);
+	iput(*delegated_inode);
+	*delegated_inode = NULL;
+	return ret;
+}
+
 #else /* !CONFIG_FILE_LOCKING */
 static inline int locks_mandatory_locked(struct inode *inode)
 {
@@ -2015,6 +2040,18 @@ static inline int break_deleg(struct inode *inode, unsigned int mode)
 {
 	return 0;
 }
+
+static inline int try_break_deleg(struct inode *inode, struct inode **delegated_inode)
+{
+	return 0;
+}
+
+static inline int break_deleg_wait(struct inode **delegated_inode)
+{
+	BUG();
+	return 0;
+}
+
 #endif /* CONFIG_FILE_LOCKING */
 
 /* fs/open.c */
@@ -2350,8 +2387,6 @@ extern loff_t vfs_llseek(struct file *file, loff_t offset, int whence);
 extern int inode_init_always(struct super_block *, struct inode *);
 extern void inode_init_once(struct inode *);
 extern void address_space_init_once(struct address_space *mapping);
-extern void ihold(struct inode * inode);
-extern void iput(struct inode *);
 extern struct inode * igrab(struct inode *);
 extern ino_t iunique(struct super_block *, ino_t);
 extern int inode_needs_sync(struct inode *inode);
-- 
cgit v1.2.3


From 8e6d782cab50884ba94324632700e6233a252f6a Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Tue, 20 Sep 2011 16:59:58 -0400
Subject: locks: break delegations on rename

Cc: David Howells <dhowells@redhat.com>
Acked-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 .../lustre/lustre/include/linux/lustre_compat25.h  |  4 +-
 drivers/staging/lustre/lustre/lvfs/lvfs_linux.c    |  2 +-
 fs/cachefiles/namei.c                              |  2 +-
 fs/ecryptfs/inode.c                                |  3 +-
 fs/namei.c                                         | 47 ++++++++++++++++++++--
 fs/nfsd/vfs.c                                      |  2 +-
 include/linux/fs.h                                 |  2 +-
 7 files changed, 51 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/staging/lustre/lustre/include/linux/lustre_compat25.h b/drivers/staging/lustre/lustre/include/linux/lustre_compat25.h
index 9243dfab43d3..80b019bf969c 100644
--- a/drivers/staging/lustre/lustre/include/linux/lustre_compat25.h
+++ b/drivers/staging/lustre/lustre/include/linux/lustre_compat25.h
@@ -105,8 +105,8 @@ static inline void ll_set_fs_pwd(struct fs_struct *fs, struct vfsmount *mnt,
 #define ll_vfs_unlink(inode,entry,mnt)	  vfs_unlink(inode,entry)
 #define ll_vfs_mknod(dir,entry,mnt,mode,dev)    vfs_mknod(dir,entry,mode,dev)
 #define ll_security_inode_unlink(dir,entry,mnt) security_inode_unlink(dir,entry)
-#define ll_vfs_rename(old,old_dir,mnt,new,new_dir,mnt1) \
-		vfs_rename(old,old_dir,new,new_dir)
+#define ll_vfs_rename(old,old_dir,mnt,new,new_dir,mnt1,delegated_inode) \
+		vfs_rename(old,old_dir,new,new_dir,delegated_inode)
 
 #define cfs_bio_io_error(a,b)   bio_io_error((a))
 #define cfs_bio_endio(a,b,c)    bio_endio((a),(c))
diff --git a/drivers/staging/lustre/lustre/lvfs/lvfs_linux.c b/drivers/staging/lustre/lustre/lvfs/lvfs_linux.c
index 18e1b47a1d65..4ed7c9f0a8be 100644
--- a/drivers/staging/lustre/lustre/lvfs/lvfs_linux.c
+++ b/drivers/staging/lustre/lustre/lvfs/lvfs_linux.c
@@ -220,7 +220,7 @@ int lustre_rename(struct dentry *dir, struct vfsmount *mnt,
 		GOTO(put_old, err = PTR_ERR(dchild_new));
 
 	err = ll_vfs_rename(dir->d_inode, dchild_old, mnt,
-			    dir->d_inode, dchild_new, mnt);
+			    dir->d_inode, dchild_new, mnt, NULL);
 
 	dput(dchild_new);
 put_old:
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index 31d480c0e046..ca65f39dc8dc 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -396,7 +396,7 @@ try_again:
 		cachefiles_io_error(cache, "Rename security error %d", ret);
 	} else {
 		ret = vfs_rename(dir->d_inode, rep,
-				 cache->graveyard->d_inode, grave);
+				 cache->graveyard->d_inode, grave, NULL);
 		if (ret != 0 && ret != -ENOMEM)
 			cachefiles_io_error(cache,
 					    "Rename failed with error %d", ret);
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index dc60b8bd09ec..c23b01bb7e04 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -640,7 +640,8 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 		goto out_lock;
 	}
 	rc = vfs_rename(lower_old_dir_dentry->d_inode, lower_old_dentry,
-			lower_new_dir_dentry->d_inode, lower_new_dentry);
+			lower_new_dir_dentry->d_inode, lower_new_dentry,
+			NULL);
 	if (rc)
 		goto out_lock;
 	if (target_inode)
diff --git a/fs/namei.c b/fs/namei.c
index cfaeaae0f2db..ce7e580e4e14 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -4022,7 +4022,8 @@ out:
 }
 
 static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
-			    struct inode *new_dir, struct dentry *new_dentry)
+			    struct inode *new_dir, struct dentry *new_dentry,
+			    struct inode **delegated_inode)
 {
 	struct inode *target = new_dentry->d_inode;
 	struct inode *source = old_dentry->d_inode;
@@ -4039,6 +4040,14 @@ static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
 	if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry))
 		goto out;
 
+	error = try_break_deleg(source, delegated_inode);
+	if (error)
+		goto out;
+	if (target) {
+		error = try_break_deleg(target, delegated_inode);
+		if (error)
+			goto out;
+	}
 	error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
 	if (error)
 		goto out;
@@ -4053,8 +4062,30 @@ out:
 	return error;
 }
 
+/**
+ * vfs_rename - rename a filesystem object
+ * @old_dir:	parent of source
+ * @old_dentry:	source
+ * @new_dir:	parent of destination
+ * @new_dentry:	destination
+ * @delegated_inode: returns an inode needing a delegation break
+ *
+ * The caller must hold multiple mutexes--see lock_rename()).
+ *
+ * If vfs_rename discovers a delegation in need of breaking at either
+ * the source or destination, it will return -EWOULDBLOCK and return a
+ * reference to the inode in delegated_inode.  The caller should then
+ * break the delegation and retry.  Because breaking a delegation may
+ * take a long time, the caller should drop all locks before doing
+ * so.
+ *
+ * Alternatively, a caller may pass NULL for delegated_inode.  This may
+ * be appropriate for callers that expect the underlying filesystem not
+ * to be NFS exported.
+ */
 int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
-	       struct inode *new_dir, struct dentry *new_dentry)
+	       struct inode *new_dir, struct dentry *new_dentry,
+	       struct inode **delegated_inode)
 {
 	int error;
 	int is_dir = d_is_directory(old_dentry) || d_is_autodir(old_dentry);
@@ -4082,7 +4113,7 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	if (is_dir)
 		error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry);
 	else
-		error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry);
+		error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry,delegated_inode);
 	if (!error)
 		fsnotify_move(old_dir, new_dir, old_name, is_dir,
 			      new_dentry->d_inode, old_dentry);
@@ -4098,6 +4129,7 @@ SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname,
 	struct dentry *old_dentry, *new_dentry;
 	struct dentry *trap;
 	struct nameidata oldnd, newnd;
+	struct inode *delegated_inode = NULL;
 	struct filename *from;
 	struct filename *to;
 	unsigned int lookup_flags = 0;
@@ -4137,6 +4169,7 @@ retry:
 	newnd.flags &= ~LOOKUP_PARENT;
 	newnd.flags |= LOOKUP_RENAME_TARGET;
 
+retry_deleg:
 	trap = lock_rename(new_dir, old_dir);
 
 	old_dentry = lookup_hash(&oldnd);
@@ -4173,13 +4206,19 @@ retry:
 	if (error)
 		goto exit5;
 	error = vfs_rename(old_dir->d_inode, old_dentry,
-				   new_dir->d_inode, new_dentry);
+				   new_dir->d_inode, new_dentry,
+				   &delegated_inode);
 exit5:
 	dput(new_dentry);
 exit4:
 	dput(old_dentry);
 exit3:
 	unlock_rename(new_dir, old_dir);
+	if (delegated_inode) {
+		error = break_deleg_wait(&delegated_inode);
+		if (!error)
+			goto retry_deleg;
+	}
 	mnt_drop_write(oldnd.path.mnt);
 exit2:
 	if (retry_estale(error, lookup_flags))
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 7a810235d599..45bf0295894d 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1837,7 +1837,7 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
 		if (host_err)
 			goto out_dput_new;
 	}
-	host_err = vfs_rename(fdir, odentry, tdir, ndentry);
+	host_err = vfs_rename(fdir, odentry, tdir, ndentry, NULL);
 	if (!host_err) {
 		host_err = commit_metadata(tfhp);
 		if (!host_err)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 931f919f44e1..5bcff883fa90 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1456,7 +1456,7 @@ extern int vfs_symlink(struct inode *, struct dentry *, const char *);
 extern int vfs_link(struct dentry *, struct inode *, struct dentry *);
 extern int vfs_rmdir(struct inode *, struct dentry *);
 extern int vfs_unlink(struct inode *, struct dentry *, struct inode **);
-extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *);
+extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *, struct inode **);
 
 /*
  * VFS dentry helper functions.
-- 
cgit v1.2.3


From 146a8595c6399ee6ab4b5cc34c0d28aa4835fdc5 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Tue, 20 Sep 2011 17:14:31 -0400
Subject: locks: break delegations on link

Cc: Tyler Hicks <tyhicks@canonical.com>
Cc: Dustin Kirkland <dustin.kirkland@gazzang.com>
Acked-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ecryptfs/inode.c |  2 +-
 fs/namei.c          | 36 ++++++++++++++++++++++++++++++++----
 fs/nfsd/vfs.c       |  2 +-
 include/linux/fs.h  |  2 +-
 4 files changed, 35 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index c23b01bb7e04..1c628f023041 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -475,7 +475,7 @@ static int ecryptfs_link(struct dentry *old_dentry, struct inode *dir,
 	dget(lower_new_dentry);
 	lower_dir_dentry = lock_parent(lower_new_dentry);
 	rc = vfs_link(lower_old_dentry, lower_dir_dentry->d_inode,
-		      lower_new_dentry);
+		      lower_new_dentry, NULL);
 	if (rc || !lower_new_dentry->d_inode)
 		goto out_lock;
 	rc = ecryptfs_interpose(lower_new_dentry, new_dentry, dir->i_sb);
diff --git a/fs/namei.c b/fs/namei.c
index ce7e580e4e14..251178a1e383 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3819,7 +3819,26 @@ SYSCALL_DEFINE2(symlink, const char __user *, oldname, const char __user *, newn
 	return sys_symlinkat(oldname, AT_FDCWD, newname);
 }
 
-int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry)
+/**
+ * vfs_link - create a new link
+ * @old_dentry:	object to be linked
+ * @dir:	new parent
+ * @new_dentry:	where to create the new link
+ * @delegated_inode: returns inode needing a delegation break
+ *
+ * The caller must hold dir->i_mutex
+ *
+ * If vfs_link discovers a delegation on the to-be-linked file in need
+ * of breaking, it will return -EWOULDBLOCK and return a reference to the
+ * inode in delegated_inode.  The caller should then break the delegation
+ * and retry.  Because breaking a delegation may take a long time, the
+ * caller should drop the i_mutex before doing so.
+ *
+ * Alternatively, a caller may pass NULL for delegated_inode.  This may
+ * be appropriate for callers that expect the underlying filesystem not
+ * to be NFS exported.
+ */
+int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry, struct inode **delegated_inode)
 {
 	struct inode *inode = old_dentry->d_inode;
 	unsigned max_links = dir->i_sb->s_max_links;
@@ -3855,8 +3874,11 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
 		error =  -ENOENT;
 	else if (max_links && inode->i_nlink >= max_links)
 		error = -EMLINK;
-	else
-		error = dir->i_op->link(old_dentry, dir, new_dentry);
+	else {
+		error = try_break_deleg(inode, delegated_inode);
+		if (!error)
+			error = dir->i_op->link(old_dentry, dir, new_dentry);
+	}
 
 	if (!error && (inode->i_state & I_LINKABLE)) {
 		spin_lock(&inode->i_lock);
@@ -3883,6 +3905,7 @@ SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname,
 {
 	struct dentry *new_dentry;
 	struct path old_path, new_path;
+	struct inode *delegated_inode = NULL;
 	int how = 0;
 	int error;
 
@@ -3921,9 +3944,14 @@ retry:
 	error = security_path_link(old_path.dentry, &new_path, new_dentry);
 	if (error)
 		goto out_dput;
-	error = vfs_link(old_path.dentry, new_path.dentry->d_inode, new_dentry);
+	error = vfs_link(old_path.dentry, new_path.dentry->d_inode, new_dentry, &delegated_inode);
 out_dput:
 	done_path_create(&new_path, new_dentry);
+	if (delegated_inode) {
+		error = break_deleg_wait(&delegated_inode);
+		if (!error)
+			goto retry;
+	}
 	if (retry_estale(error, how)) {
 		how |= LOOKUP_REVAL;
 		goto retry;
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 45bf0295894d..27ba21b5f383 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1736,7 +1736,7 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
 		err = nfserrno(host_err);
 		goto out_dput;
 	}
-	host_err = vfs_link(dold, dirp, dnew);
+	host_err = vfs_link(dold, dirp, dnew, NULL);
 	if (!host_err) {
 		err = nfserrno(commit_metadata(ffhp));
 		if (!err)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 5bcff883fa90..6e36e7118ec1 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1453,7 +1453,7 @@ extern int vfs_create(struct inode *, struct dentry *, umode_t, bool);
 extern int vfs_mkdir(struct inode *, struct dentry *, umode_t);
 extern int vfs_mknod(struct inode *, struct dentry *, umode_t, dev_t);
 extern int vfs_symlink(struct inode *, struct dentry *, const char *);
-extern int vfs_link(struct dentry *, struct inode *, struct dentry *);
+extern int vfs_link(struct dentry *, struct inode *, struct dentry *, struct inode **);
 extern int vfs_rmdir(struct inode *, struct dentry *);
 extern int vfs_unlink(struct inode *, struct dentry *, struct inode **);
 extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *, struct inode **);
-- 
cgit v1.2.3


From 27ac0ffeac80ba6b9580529568d06144df044366 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Tue, 20 Sep 2011 17:19:26 -0400
Subject: locks: break delegations on any attribute modification

NFSv4 uses leases to guarantee that clients can cache metadata as well
as data.

Cc: Mikulas Patocka <mikulas@artax.karlin.mff.cuni.cz>
Cc: David Howells <dhowells@redhat.com>
Cc: Tyler Hicks <tyhicks@canonical.com>
Cc: Dustin Kirkland <dustin.kirkland@gazzang.com>
Acked-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/base/devtmpfs.c   |  4 ++--
 fs/attr.c                 | 25 ++++++++++++++++++++++++-
 fs/cachefiles/interface.c |  4 ++--
 fs/ecryptfs/inode.c       |  4 ++--
 fs/hpfs/namei.c           |  2 +-
 fs/inode.c                |  6 +++++-
 fs/nfsd/vfs.c             |  8 ++++++--
 fs/open.c                 | 22 ++++++++++++++++++----
 fs/utimes.c               |  9 ++++++++-
 include/linux/fs.h        |  2 +-
 10 files changed, 69 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c
index 1b8490e2fbde..0f3820121e02 100644
--- a/drivers/base/devtmpfs.c
+++ b/drivers/base/devtmpfs.c
@@ -216,7 +216,7 @@ static int handle_create(const char *nodename, umode_t mode, kuid_t uid,
 		newattrs.ia_gid = gid;
 		newattrs.ia_valid = ATTR_MODE|ATTR_UID|ATTR_GID;
 		mutex_lock(&dentry->d_inode->i_mutex);
-		notify_change(dentry, &newattrs);
+		notify_change(dentry, &newattrs, NULL);
 		mutex_unlock(&dentry->d_inode->i_mutex);
 
 		/* mark as kernel-created inode */
@@ -322,7 +322,7 @@ static int handle_remove(const char *nodename, struct device *dev)
 			newattrs.ia_valid =
 				ATTR_UID|ATTR_GID|ATTR_MODE;
 			mutex_lock(&dentry->d_inode->i_mutex);
-			notify_change(dentry, &newattrs);
+			notify_change(dentry, &newattrs, NULL);
 			mutex_unlock(&dentry->d_inode->i_mutex);
 			err = vfs_unlink(parent.dentry->d_inode, dentry, NULL);
 			if (!err || err == -ENOENT)
diff --git a/fs/attr.c b/fs/attr.c
index 1449adb14ef6..267968d94673 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -167,7 +167,27 @@ void setattr_copy(struct inode *inode, const struct iattr *attr)
 }
 EXPORT_SYMBOL(setattr_copy);
 
-int notify_change(struct dentry * dentry, struct iattr * attr)
+/**
+ * notify_change - modify attributes of a filesytem object
+ * @dentry:	object affected
+ * @iattr:	new attributes
+ * @delegated_inode: returns inode, if the inode is delegated
+ *
+ * The caller must hold the i_mutex on the affected object.
+ *
+ * If notify_change discovers a delegation in need of breaking,
+ * it will return -EWOULDBLOCK and return a reference to the inode in
+ * delegated_inode.  The caller should then break the delegation and
+ * retry.  Because breaking a delegation may take a long time, the
+ * caller should drop the i_mutex before doing so.
+ *
+ * Alternatively, a caller may pass NULL for delegated_inode.  This may
+ * be appropriate for callers that expect the underlying filesystem not
+ * to be NFS exported.  Also, passing NULL is fine for callers holding
+ * the file open for write, as there can be no conflicting delegation in
+ * that case.
+ */
+int notify_change(struct dentry * dentry, struct iattr * attr, struct inode **delegated_inode)
 {
 	struct inode *inode = dentry->d_inode;
 	umode_t mode = inode->i_mode;
@@ -241,6 +261,9 @@ int notify_change(struct dentry * dentry, struct iattr * attr)
 		return 0;
 
 	error = security_inode_setattr(dentry, attr);
+	if (error)
+		return error;
+	error = try_break_deleg(inode, delegated_inode);
 	if (error)
 		return error;
 
diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c
index 43eb5592cdea..5088a418ac4d 100644
--- a/fs/cachefiles/interface.c
+++ b/fs/cachefiles/interface.c
@@ -449,14 +449,14 @@ static int cachefiles_attr_changed(struct fscache_object *_object)
 		_debug("discard tail %llx", oi_size);
 		newattrs.ia_valid = ATTR_SIZE;
 		newattrs.ia_size = oi_size & PAGE_MASK;
-		ret = notify_change(object->backer, &newattrs);
+		ret = notify_change(object->backer, &newattrs, NULL);
 		if (ret < 0)
 			goto truncate_failed;
 	}
 
 	newattrs.ia_valid = ATTR_SIZE;
 	newattrs.ia_size = ni_size;
-	ret = notify_change(object->backer, &newattrs);
+	ret = notify_change(object->backer, &newattrs, NULL);
 
 truncate_failed:
 	mutex_unlock(&object->backer->d_inode->i_mutex);
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 1c628f023041..c36c44824471 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -882,7 +882,7 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
 		struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
 
 		mutex_lock(&lower_dentry->d_inode->i_mutex);
-		rc = notify_change(lower_dentry, &lower_ia);
+		rc = notify_change(lower_dentry, &lower_ia, NULL);
 		mutex_unlock(&lower_dentry->d_inode->i_mutex);
 	}
 	return rc;
@@ -983,7 +983,7 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
 		lower_ia.ia_valid &= ~ATTR_MODE;
 
 	mutex_lock(&lower_dentry->d_inode->i_mutex);
-	rc = notify_change(lower_dentry, &lower_ia);
+	rc = notify_change(lower_dentry, &lower_ia, NULL);
 	mutex_unlock(&lower_dentry->d_inode->i_mutex);
 out:
 	fsstack_copy_attr_all(inode, lower_inode);
diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c
index 345713d2f8f3..1b39afdd86fd 100644
--- a/fs/hpfs/namei.c
+++ b/fs/hpfs/namei.c
@@ -407,7 +407,7 @@ again:
 			/*printk("HPFS: truncating file before delete.\n");*/
 			newattrs.ia_size = 0;
 			newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME;
-			err = notify_change(dentry, &newattrs);
+			err = notify_change(dentry, &newattrs, NULL);
 			put_write_access(inode);
 			if (!err)
 				goto again;
diff --git a/fs/inode.c b/fs/inode.c
index ce48c359ce9e..4bcdad3c9361 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1603,7 +1603,11 @@ static int __remove_suid(struct dentry *dentry, int kill)
 	struct iattr newattrs;
 
 	newattrs.ia_valid = ATTR_FORCE | kill;
-	return notify_change(dentry, &newattrs);
+	/*
+	 * Note we call this on write, so notify_change will not
+	 * encounter any conflicting delegations:
+	 */
+	return notify_change(dentry, &newattrs, NULL);
 }
 
 int file_remove_suid(struct file *file)
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 27ba21b5f383..94b5f5d2bfed 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -427,7 +427,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
 			goto out_nfserr;
 		fh_lock(fhp);
 
-		host_err = notify_change(dentry, iap);
+		host_err = notify_change(dentry, iap, NULL);
 		err = nfserrno(host_err);
 		fh_unlock(fhp);
 	}
@@ -988,7 +988,11 @@ static void kill_suid(struct dentry *dentry)
 	ia.ia_valid = ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV;
 
 	mutex_lock(&dentry->d_inode->i_mutex);
-	notify_change(dentry, &ia);
+	/*
+	 * Note we call this on write, so notify_change will not
+	 * encounter any conflicting delegations:
+	 */
+	notify_change(dentry, &ia, NULL);
 	mutex_unlock(&dentry->d_inode->i_mutex);
 }
 
diff --git a/fs/open.c b/fs/open.c
index fffbed40dbe9..4b3e1edf2fe4 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -57,7 +57,8 @@ int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
 		newattrs.ia_valid |= ret | ATTR_FORCE;
 
 	mutex_lock(&dentry->d_inode->i_mutex);
-	ret = notify_change(dentry, &newattrs);
+	/* Note any delegations or leases have already been broken: */
+	ret = notify_change(dentry, &newattrs, NULL);
 	mutex_unlock(&dentry->d_inode->i_mutex);
 	return ret;
 }
@@ -464,21 +465,28 @@ out:
 static int chmod_common(struct path *path, umode_t mode)
 {
 	struct inode *inode = path->dentry->d_inode;
+	struct inode *delegated_inode = NULL;
 	struct iattr newattrs;
 	int error;
 
 	error = mnt_want_write(path->mnt);
 	if (error)
 		return error;
+retry_deleg:
 	mutex_lock(&inode->i_mutex);
 	error = security_path_chmod(path, mode);
 	if (error)
 		goto out_unlock;
 	newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
 	newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
-	error = notify_change(path->dentry, &newattrs);
+	error = notify_change(path->dentry, &newattrs, &delegated_inode);
 out_unlock:
 	mutex_unlock(&inode->i_mutex);
+	if (delegated_inode) {
+		error = break_deleg_wait(&delegated_inode);
+		if (!error)
+			goto retry_deleg;
+	}
 	mnt_drop_write(path->mnt);
 	return error;
 }
@@ -522,6 +530,7 @@ SYSCALL_DEFINE2(chmod, const char __user *, filename, umode_t, mode)
 static int chown_common(struct path *path, uid_t user, gid_t group)
 {
 	struct inode *inode = path->dentry->d_inode;
+	struct inode *delegated_inode = NULL;
 	int error;
 	struct iattr newattrs;
 	kuid_t uid;
@@ -546,12 +555,17 @@ static int chown_common(struct path *path, uid_t user, gid_t group)
 	if (!S_ISDIR(inode->i_mode))
 		newattrs.ia_valid |=
 			ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV;
+retry_deleg:
 	mutex_lock(&inode->i_mutex);
 	error = security_path_chown(path, uid, gid);
 	if (!error)
-		error = notify_change(path->dentry, &newattrs);
+		error = notify_change(path->dentry, &newattrs, &delegated_inode);
 	mutex_unlock(&inode->i_mutex);
-
+	if (delegated_inode) {
+		error = break_deleg_wait(&delegated_inode);
+		if (!error)
+			goto retry_deleg;
+	}
 	return error;
 }
 
diff --git a/fs/utimes.c b/fs/utimes.c
index f4fb7eca10e8..aa138d64560a 100644
--- a/fs/utimes.c
+++ b/fs/utimes.c
@@ -53,6 +53,7 @@ static int utimes_common(struct path *path, struct timespec *times)
 	int error;
 	struct iattr newattrs;
 	struct inode *inode = path->dentry->d_inode;
+	struct inode *delegated_inode = NULL;
 
 	error = mnt_want_write(path->mnt);
 	if (error)
@@ -101,9 +102,15 @@ static int utimes_common(struct path *path, struct timespec *times)
 				goto mnt_drop_write_and_out;
 		}
 	}
+retry_deleg:
 	mutex_lock(&inode->i_mutex);
-	error = notify_change(path->dentry, &newattrs);
+	error = notify_change(path->dentry, &newattrs, &delegated_inode);
 	mutex_unlock(&inode->i_mutex);
+	if (delegated_inode) {
+		error = break_deleg_wait(&delegated_inode);
+		if (!error)
+			goto retry_deleg;
+	}
 
 mnt_drop_write_and_out:
 	mnt_drop_write(path->mnt);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 6e36e7118ec1..ab2a0ca82dc5 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2278,7 +2278,7 @@ extern void emergency_remount(void);
 #ifdef CONFIG_BLOCK
 extern sector_t bmap(struct inode *, sector_t);
 #endif
-extern int notify_change(struct dentry *, struct iattr *);
+extern int notify_change(struct dentry *, struct iattr *, struct inode **);
 extern int inode_permission(struct inode *, int);
 extern int generic_permission(struct inode *, int);
 
-- 
cgit v1.2.3


From 6aafeef03b9d9ecf255f3a80ed85ee070260e1ae Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@resnulli.us>
Date: Wed, 6 Nov 2013 17:52:20 +0100
Subject: netfilter: push reasm skb through instead of original frag skbs

Pushing original fragments through causes several problems. For example
for matching, frags may not be matched correctly. Take following
example:

<example>
On HOSTA do:
ip6tables -I INPUT -p icmpv6 -j DROP
ip6tables -I INPUT -p icmpv6 -m icmp6 --icmpv6-type 128 -j ACCEPT

and on HOSTB you do:
ping6 HOSTA -s2000    (MTU is 1500)

Incoming echo requests will be filtered out on HOSTA. This issue does
not occur with smaller packets than MTU (where fragmentation does not happen)
</example>

As was discussed previously, the only correct solution seems to be to use
reassembled skb instead of separete frags. Doing this has positive side
effects in reducing sk_buff by one pointer (nfct_reasm) and also the reams
dances in ipvs and conntrack can be removed.

Future plan is to remove net/ipv6/netfilter/nf_conntrack_reasm.c
entirely and use code in net/ipv6/reassembly.c instead.

Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Marcelo Ricardo Leitner <mleitner@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h                         | 32 ---------------
 include/net/ip_vs.h                            | 32 +--------------
 include/net/netfilter/ipv6/nf_defrag_ipv6.h    |  4 +-
 net/core/skbuff.c                              |  3 --
 net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 56 +-------------------------
 net/ipv6/netfilter/nf_conntrack_reasm.c        | 19 +--------
 net/ipv6/netfilter/nf_defrag_ipv6_hooks.c      |  7 +++-
 net/netfilter/ipvs/ip_vs_core.c                | 55 +------------------------
 net/netfilter/ipvs/ip_vs_pe_sip.c              |  8 +---
 9 files changed, 13 insertions(+), 203 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 036ec7d8a83a..215b5ea1cb30 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -337,11 +337,6 @@ typedef unsigned int sk_buff_data_t;
 typedef unsigned char *sk_buff_data_t;
 #endif
 
-#if defined(CONFIG_NF_DEFRAG_IPV4) || defined(CONFIG_NF_DEFRAG_IPV4_MODULE) || \
-    defined(CONFIG_NF_DEFRAG_IPV6) || defined(CONFIG_NF_DEFRAG_IPV6_MODULE)
-#define NET_SKBUFF_NF_DEFRAG_NEEDED 1
-#endif
-
 /** 
  *	struct sk_buff - socket buffer
  *	@next: Next buffer in list
@@ -374,7 +369,6 @@ typedef unsigned char *sk_buff_data_t;
  *	@protocol: Packet protocol from driver
  *	@destructor: Destruct function
  *	@nfct: Associated connection, if any
- *	@nfct_reasm: netfilter conntrack re-assembly pointer
  *	@nf_bridge: Saved data about a bridged frame - see br_netfilter.c
  *	@skb_iif: ifindex of device we arrived on
  *	@tc_index: Traffic control index
@@ -463,9 +457,6 @@ struct sk_buff {
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 	struct nf_conntrack	*nfct;
 #endif
-#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
-	struct sk_buff		*nfct_reasm;
-#endif
 #ifdef CONFIG_BRIDGE_NETFILTER
 	struct nf_bridge_info	*nf_bridge;
 #endif
@@ -2595,18 +2586,6 @@ static inline void nf_conntrack_get(struct nf_conntrack *nfct)
 		atomic_inc(&nfct->use);
 }
 #endif
-#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
-static inline void nf_conntrack_get_reasm(struct sk_buff *skb)
-{
-	if (skb)
-		atomic_inc(&skb->users);
-}
-static inline void nf_conntrack_put_reasm(struct sk_buff *skb)
-{
-	if (skb)
-		kfree_skb(skb);
-}
-#endif
 #ifdef CONFIG_BRIDGE_NETFILTER
 static inline void nf_bridge_put(struct nf_bridge_info *nf_bridge)
 {
@@ -2625,10 +2604,6 @@ static inline void nf_reset(struct sk_buff *skb)
 	nf_conntrack_put(skb->nfct);
 	skb->nfct = NULL;
 #endif
-#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
-	nf_conntrack_put_reasm(skb->nfct_reasm);
-	skb->nfct_reasm = NULL;
-#endif
 #ifdef CONFIG_BRIDGE_NETFILTER
 	nf_bridge_put(skb->nf_bridge);
 	skb->nf_bridge = NULL;
@@ -2650,10 +2625,6 @@ static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src)
 	nf_conntrack_get(src->nfct);
 	dst->nfctinfo = src->nfctinfo;
 #endif
-#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
-	dst->nfct_reasm = src->nfct_reasm;
-	nf_conntrack_get_reasm(src->nfct_reasm);
-#endif
 #ifdef CONFIG_BRIDGE_NETFILTER
 	dst->nf_bridge  = src->nf_bridge;
 	nf_bridge_get(src->nf_bridge);
@@ -2665,9 +2636,6 @@ static inline void nf_copy(struct sk_buff *dst, const struct sk_buff *src)
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 	nf_conntrack_put(dst->nfct);
 #endif
-#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
-	nf_conntrack_put_reasm(dst->nfct_reasm);
-#endif
 #ifdef CONFIG_BRIDGE_NETFILTER
 	nf_bridge_put(dst->nf_bridge);
 #endif
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index cd7275f9c463..5679d927562b 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -109,7 +109,6 @@ extern int ip_vs_conn_tab_size;
 struct ip_vs_iphdr {
 	__u32 len;	/* IPv4 simply where L4 starts
 			   IPv6 where L4 Transport Header starts */
-	__u32 thoff_reasm; /* Transport Header Offset in nfct_reasm skb */
 	__u16 fragoffs; /* IPv6 fragment offset, 0 if first frag (or not frag)*/
 	__s16 protocol;
 	__s32 flags;
@@ -117,34 +116,12 @@ struct ip_vs_iphdr {
 	union nf_inet_addr daddr;
 };
 
-/* Dependency to module: nf_defrag_ipv6 */
-#if defined(CONFIG_NF_DEFRAG_IPV6) || defined(CONFIG_NF_DEFRAG_IPV6_MODULE)
-static inline struct sk_buff *skb_nfct_reasm(const struct sk_buff *skb)
-{
-	return skb->nfct_reasm;
-}
-static inline void *frag_safe_skb_hp(const struct sk_buff *skb, int offset,
-				      int len, void *buffer,
-				      const struct ip_vs_iphdr *ipvsh)
-{
-	if (unlikely(ipvsh->fragoffs && skb_nfct_reasm(skb)))
-		return skb_header_pointer(skb_nfct_reasm(skb),
-					  ipvsh->thoff_reasm, len, buffer);
-
-	return skb_header_pointer(skb, offset, len, buffer);
-}
-#else
-static inline struct sk_buff *skb_nfct_reasm(const struct sk_buff *skb)
-{
-	return NULL;
-}
 static inline void *frag_safe_skb_hp(const struct sk_buff *skb, int offset,
 				      int len, void *buffer,
 				      const struct ip_vs_iphdr *ipvsh)
 {
 	return skb_header_pointer(skb, offset, len, buffer);
 }
-#endif
 
 static inline void
 ip_vs_fill_ip4hdr(const void *nh, struct ip_vs_iphdr *iphdr)
@@ -171,19 +148,12 @@ ip_vs_fill_iph_skb(int af, const struct sk_buff *skb, struct ip_vs_iphdr *iphdr)
 			(struct ipv6hdr *)skb_network_header(skb);
 		iphdr->saddr.in6 = iph->saddr;
 		iphdr->daddr.in6 = iph->daddr;
-		/* ipv6_find_hdr() updates len, flags, thoff_reasm */
-		iphdr->thoff_reasm = 0;
+		/* ipv6_find_hdr() updates len, flags */
 		iphdr->len	 = 0;
 		iphdr->flags	 = 0;
 		iphdr->protocol  = ipv6_find_hdr(skb, &iphdr->len, -1,
 						 &iphdr->fragoffs,
 						 &iphdr->flags);
-		/* get proto from re-assembled packet and it's offset */
-		if (skb_nfct_reasm(skb))
-			iphdr->protocol = ipv6_find_hdr(skb_nfct_reasm(skb),
-							&iphdr->thoff_reasm,
-							-1, NULL, NULL);
-
 	} else
 #endif
 	{
diff --git a/include/net/netfilter/ipv6/nf_defrag_ipv6.h b/include/net/netfilter/ipv6/nf_defrag_ipv6.h
index 5613412e7dc2..27666d8a0bd0 100644
--- a/include/net/netfilter/ipv6/nf_defrag_ipv6.h
+++ b/include/net/netfilter/ipv6/nf_defrag_ipv6.h
@@ -6,9 +6,7 @@ void nf_defrag_ipv6_enable(void);
 int nf_ct_frag6_init(void);
 void nf_ct_frag6_cleanup(void);
 struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user);
-void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb,
-			struct net_device *in, struct net_device *out,
-			int (*okfn)(struct sk_buff *));
+void nf_ct_frag6_consume_orig(struct sk_buff *skb);
 
 struct inet_frags_ctl;
 
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 8c5197fe55a4..8cec1e6b844d 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -592,9 +592,6 @@ static void skb_release_head_state(struct sk_buff *skb)
 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
 	nf_conntrack_put(skb->nfct);
 #endif
-#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
-	nf_conntrack_put_reasm(skb->nfct_reasm);
-#endif
 #ifdef CONFIG_BRIDGE_NETFILTER
 	nf_bridge_put(skb->nf_bridge);
 #endif
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 486545eb42ce..4cbc6b290dd5 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -169,64 +169,13 @@ out:
 	return nf_conntrack_confirm(skb);
 }
 
-static unsigned int __ipv6_conntrack_in(struct net *net,
-					unsigned int hooknum,
-					struct sk_buff *skb,
-					const struct net_device *in,
-					const struct net_device *out,
-					int (*okfn)(struct sk_buff *))
-{
-	struct sk_buff *reasm = skb->nfct_reasm;
-	const struct nf_conn_help *help;
-	struct nf_conn *ct;
-	enum ip_conntrack_info ctinfo;
-
-	/* This packet is fragmented and has reassembled packet. */
-	if (reasm) {
-		/* Reassembled packet isn't parsed yet ? */
-		if (!reasm->nfct) {
-			unsigned int ret;
-
-			ret = nf_conntrack_in(net, PF_INET6, hooknum, reasm);
-			if (ret != NF_ACCEPT)
-				return ret;
-		}
-
-		/* Conntrack helpers need the entire reassembled packet in the
-		 * POST_ROUTING hook. In case of unconfirmed connections NAT
-		 * might reassign a helper, so the entire packet is also
-		 * required.
-		 */
-		ct = nf_ct_get(reasm, &ctinfo);
-		if (ct != NULL && !nf_ct_is_untracked(ct)) {
-			help = nfct_help(ct);
-			if ((help && help->helper) || !nf_ct_is_confirmed(ct)) {
-				nf_conntrack_get_reasm(reasm);
-				NF_HOOK_THRESH(NFPROTO_IPV6, hooknum, reasm,
-					       (struct net_device *)in,
-					       (struct net_device *)out,
-					       okfn, NF_IP6_PRI_CONNTRACK + 1);
-				return NF_DROP_ERR(-ECANCELED);
-			}
-		}
-
-		nf_conntrack_get(reasm->nfct);
-		skb->nfct = reasm->nfct;
-		skb->nfctinfo = reasm->nfctinfo;
-		return NF_ACCEPT;
-	}
-
-	return nf_conntrack_in(net, PF_INET6, hooknum, skb);
-}
-
 static unsigned int ipv6_conntrack_in(const struct nf_hook_ops *ops,
 				      struct sk_buff *skb,
 				      const struct net_device *in,
 				      const struct net_device *out,
 				      int (*okfn)(struct sk_buff *))
 {
-	return __ipv6_conntrack_in(dev_net(in), ops->hooknum, skb, in, out,
-				   okfn);
+	return nf_conntrack_in(dev_net(in), PF_INET6, ops->hooknum, skb);
 }
 
 static unsigned int ipv6_conntrack_local(const struct nf_hook_ops *ops,
@@ -240,8 +189,7 @@ static unsigned int ipv6_conntrack_local(const struct nf_hook_ops *ops,
 		net_notice_ratelimited("ipv6_conntrack_local: packet too short\n");
 		return NF_ACCEPT;
 	}
-	return __ipv6_conntrack_in(dev_net(out), ops->hooknum, skb, in, out,
-				   okfn);
+	return nf_conntrack_in(dev_net(out), PF_INET6, ops->hooknum, skb);
 }
 
 static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = {
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 4a258263d8ec..767ab8da8218 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -633,31 +633,16 @@ ret_orig:
 	return skb;
 }
 
-void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb,
-			struct net_device *in, struct net_device *out,
-			int (*okfn)(struct sk_buff *))
+void nf_ct_frag6_consume_orig(struct sk_buff *skb)
 {
 	struct sk_buff *s, *s2;
-	unsigned int ret = 0;
 
 	for (s = NFCT_FRAG6_CB(skb)->orig; s;) {
-		nf_conntrack_put_reasm(s->nfct_reasm);
-		nf_conntrack_get_reasm(skb);
-		s->nfct_reasm = skb;
-
 		s2 = s->next;
 		s->next = NULL;
-
-		if (ret != -ECANCELED)
-			ret = NF_HOOK_THRESH(NFPROTO_IPV6, hooknum, s,
-					     in, out, okfn,
-					     NF_IP6_PRI_CONNTRACK_DEFRAG + 1);
-		else
-			kfree_skb(s);
-
+		consume_skb(s);
 		s = s2;
 	}
-	nf_conntrack_put_reasm(skb);
 }
 
 static int nf_ct_net_init(struct net *net)
diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
index ec483aa3f60f..7b9a748c6bac 100644
--- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
+++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
@@ -75,8 +75,11 @@ static unsigned int ipv6_defrag(const struct nf_hook_ops *ops,
 	if (reasm == skb)
 		return NF_ACCEPT;
 
-	nf_ct_frag6_output(ops->hooknum, reasm, (struct net_device *)in,
-			   (struct net_device *)out, okfn);
+	nf_ct_frag6_consume_orig(reasm);
+
+	NF_HOOK_THRESH(NFPROTO_IPV6, ops->hooknum, reasm,
+		       (struct net_device *) in, (struct net_device *) out,
+		       okfn, NF_IP6_PRI_CONNTRACK_DEFRAG + 1);
 
 	return NF_STOLEN;
 }
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 34fda62f40f6..4f26ee46b51f 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1139,12 +1139,6 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
 	ip_vs_fill_iph_skb(af, skb, &iph);
 #ifdef CONFIG_IP_VS_IPV6
 	if (af == AF_INET6) {
-		if (!iph.fragoffs && skb_nfct_reasm(skb)) {
-			struct sk_buff *reasm = skb_nfct_reasm(skb);
-			/* Save fw mark for coming frags */
-			reasm->ipvs_property = 1;
-			reasm->mark = skb->mark;
-		}
 		if (unlikely(iph.protocol == IPPROTO_ICMPV6)) {
 			int related;
 			int verdict = ip_vs_out_icmp_v6(skb, &related,
@@ -1614,12 +1608,6 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
 
 #ifdef CONFIG_IP_VS_IPV6
 	if (af == AF_INET6) {
-		if (!iph.fragoffs && skb_nfct_reasm(skb)) {
-			struct sk_buff *reasm = skb_nfct_reasm(skb);
-			/* Save fw mark for coming frags. */
-			reasm->ipvs_property = 1;
-			reasm->mark = skb->mark;
-		}
 		if (unlikely(iph.protocol == IPPROTO_ICMPV6)) {
 			int related;
 			int verdict = ip_vs_in_icmp_v6(skb, &related, hooknum,
@@ -1671,9 +1659,8 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
 		/* sorry, all this trouble for a no-hit :) */
 		IP_VS_DBG_PKT(12, af, pp, skb, 0,
 			      "ip_vs_in: packet continues traversal as normal");
-		if (iph.fragoffs && !skb_nfct_reasm(skb)) {
+		if (iph.fragoffs) {
 			/* Fragment that couldn't be mapped to a conn entry
-			 * and don't have any pointer to a reasm skb
 			 * is missing module nf_defrag_ipv6
 			 */
 			IP_VS_DBG_RL("Unhandled frag, load nf_defrag_ipv6\n");
@@ -1755,38 +1742,6 @@ ip_vs_local_request4(const struct nf_hook_ops *ops, struct sk_buff *skb,
 
 #ifdef CONFIG_IP_VS_IPV6
 
-/*
- * AF_INET6 fragment handling
- * Copy info from first fragment, to the rest of them.
- */
-static unsigned int
-ip_vs_preroute_frag6(const struct nf_hook_ops *ops, struct sk_buff *skb,
-		     const struct net_device *in,
-		     const struct net_device *out,
-		     int (*okfn)(struct sk_buff *))
-{
-	struct sk_buff *reasm = skb_nfct_reasm(skb);
-	struct net *net;
-
-	/* Skip if not a "replay" from nf_ct_frag6_output or first fragment.
-	 * ipvs_property is set when checking first fragment
-	 * in ip_vs_in() and ip_vs_out().
-	 */
-	if (reasm)
-		IP_VS_DBG(2, "Fragment recv prop:%d\n", reasm->ipvs_property);
-	if (!reasm || !reasm->ipvs_property)
-		return NF_ACCEPT;
-
-	net = skb_net(skb);
-	if (!net_ipvs(net)->enable)
-		return NF_ACCEPT;
-
-	/* Copy stored fw mark, saved in ip_vs_{in,out} */
-	skb->mark = reasm->mark;
-
-	return NF_ACCEPT;
-}
-
 /*
  *	AF_INET6 handler in NF_INET_LOCAL_IN chain
  *	Schedule and forward packets from remote clients
@@ -1924,14 +1879,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
 		.priority	= 100,
 	},
 #ifdef CONFIG_IP_VS_IPV6
-	/* After mangle & nat fetch 2:nd fragment and following */
-	{
-		.hook		= ip_vs_preroute_frag6,
-		.owner		= THIS_MODULE,
-		.pf		= NFPROTO_IPV6,
-		.hooknum	= NF_INET_PRE_ROUTING,
-		.priority	= NF_IP6_PRI_NAT_DST + 1,
-	},
 	/* After packet filtering, change source only for VS/NAT */
 	{
 		.hook		= ip_vs_reply6,
diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c
index 9ef22bdce9f1..bed5f7042529 100644
--- a/net/netfilter/ipvs/ip_vs_pe_sip.c
+++ b/net/netfilter/ipvs/ip_vs_pe_sip.c
@@ -65,7 +65,6 @@ static int get_callid(const char *dptr, unsigned int dataoff,
 static int
 ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb)
 {
-	struct sk_buff *reasm = skb_nfct_reasm(skb);
 	struct ip_vs_iphdr iph;
 	unsigned int dataoff, datalen, matchoff, matchlen;
 	const char *dptr;
@@ -79,15 +78,10 @@ ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb)
 	/* todo: IPv6 fragments:
 	 *       I think this only should be done for the first fragment. /HS
 	 */
-	if (reasm) {
-		skb = reasm;
-		dataoff = iph.thoff_reasm + sizeof(struct udphdr);
-	} else
-		dataoff = iph.len + sizeof(struct udphdr);
+	dataoff = iph.len + sizeof(struct udphdr);
 
 	if (dataoff >= skb->len)
 		return -EINVAL;
-	/* todo: Check if this will mess-up the reasm skb !!! /HS */
 	retc = skb_linearize(skb);
 	if (retc < 0)
 		return retc;
-- 
cgit v1.2.3


From e267cb960ab790c94a5019272c0e4dac95dc4dba Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 11 Nov 2013 00:42:07 -0500
Subject: vlan: Implement vlan_dev_get_egress_qos_mask as an inline.

This is to avoid very silly Kconfig dependencies for modules
using this routine.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_vlan.h | 98 ++++++++++++++++++++++++++++++++++++++++++++++++-
 net/8021q/vlan.h        | 77 --------------------------------------
 net/8021q/vlan_dev.c    | 31 ++--------------
 3 files changed, 99 insertions(+), 107 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index f3088a0112cf..f252deb99454 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -88,8 +88,102 @@ extern struct net_device *__vlan_find_dev_deep(struct net_device *real_dev,
 					       __be16 vlan_proto, u16 vlan_id);
 extern struct net_device *vlan_dev_real_dev(const struct net_device *dev);
 extern u16 vlan_dev_vlan_id(const struct net_device *dev);
-extern u16 vlan_dev_get_egress_qos_mask(struct net_device *dev,
-					u32 skprio);
+
+/**
+ *	struct vlan_priority_tci_mapping - vlan egress priority mappings
+ *	@priority: skb priority
+ *	@vlan_qos: vlan priority: (skb->priority << 13) & 0xE000
+ *	@next: pointer to next struct
+ */
+struct vlan_priority_tci_mapping {
+	u32					priority;
+	u16					vlan_qos;
+	struct vlan_priority_tci_mapping	*next;
+};
+
+/**
+ *	struct vlan_pcpu_stats - VLAN percpu rx/tx stats
+ *	@rx_packets: number of received packets
+ *	@rx_bytes: number of received bytes
+ *	@rx_multicast: number of received multicast packets
+ *	@tx_packets: number of transmitted packets
+ *	@tx_bytes: number of transmitted bytes
+ *	@syncp: synchronization point for 64bit counters
+ *	@rx_errors: number of rx errors
+ *	@tx_dropped: number of tx drops
+ */
+struct vlan_pcpu_stats {
+	u64			rx_packets;
+	u64			rx_bytes;
+	u64			rx_multicast;
+	u64			tx_packets;
+	u64			tx_bytes;
+	struct u64_stats_sync	syncp;
+	u32			rx_errors;
+	u32			tx_dropped;
+};
+
+struct proc_dir_entry;
+struct netpoll;
+
+/**
+ *	struct vlan_dev_priv - VLAN private device data
+ *	@nr_ingress_mappings: number of ingress priority mappings
+ *	@ingress_priority_map: ingress priority mappings
+ *	@nr_egress_mappings: number of egress priority mappings
+ *	@egress_priority_map: hash of egress priority mappings
+ *	@vlan_proto: VLAN encapsulation protocol
+ *	@vlan_id: VLAN identifier
+ *	@flags: device flags
+ *	@real_dev: underlying netdevice
+ *	@real_dev_addr: address of underlying netdevice
+ *	@dent: proc dir entry
+ *	@vlan_pcpu_stats: ptr to percpu rx stats
+ */
+struct vlan_dev_priv {
+	unsigned int				nr_ingress_mappings;
+	u32					ingress_priority_map[8];
+	unsigned int				nr_egress_mappings;
+	struct vlan_priority_tci_mapping	*egress_priority_map[16];
+
+	__be16					vlan_proto;
+	u16					vlan_id;
+	u16					flags;
+
+	struct net_device			*real_dev;
+	unsigned char				real_dev_addr[ETH_ALEN];
+
+	struct proc_dir_entry			*dent;
+	struct vlan_pcpu_stats __percpu		*vlan_pcpu_stats;
+#ifdef CONFIG_NET_POLL_CONTROLLER
+	struct netpoll				*netpoll;
+#endif
+};
+
+static inline struct vlan_dev_priv *vlan_dev_priv(const struct net_device *dev)
+{
+	return netdev_priv(dev);
+}
+
+static inline u16
+vlan_dev_get_egress_qos_mask(struct net_device *dev, u32 skprio)
+{
+	struct vlan_priority_tci_mapping *mp;
+
+	smp_rmb(); /* coupled with smp_wmb() in vlan_dev_set_egress_priority() */
+
+	mp = vlan_dev_priv(dev)->egress_priority_map[(skprio & 0xF)];
+	while (mp) {
+		if (mp->priority == skprio) {
+			return mp->vlan_qos; /* This should already be shifted
+					      * to mask correctly with the
+					      * VLAN's TCI */
+		}
+		mp = mp->next;
+	}
+	return 0;
+}
+
 extern bool vlan_do_receive(struct sk_buff **skb);
 extern struct sk_buff *vlan_untag(struct sk_buff *skb);
 
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index a2caf00b82cc..5704ed9c3a23 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -5,83 +5,6 @@
 #include <linux/u64_stats_sync.h>
 #include <linux/list.h>
 
-
-/**
- *	struct vlan_priority_tci_mapping - vlan egress priority mappings
- *	@priority: skb priority
- *	@vlan_qos: vlan priority: (skb->priority << 13) & 0xE000
- *	@next: pointer to next struct
- */
-struct vlan_priority_tci_mapping {
-	u32					priority;
-	u16					vlan_qos;
-	struct vlan_priority_tci_mapping	*next;
-};
-
-
-/**
- *	struct vlan_pcpu_stats - VLAN percpu rx/tx stats
- *	@rx_packets: number of received packets
- *	@rx_bytes: number of received bytes
- *	@rx_multicast: number of received multicast packets
- *	@tx_packets: number of transmitted packets
- *	@tx_bytes: number of transmitted bytes
- *	@syncp: synchronization point for 64bit counters
- *	@rx_errors: number of rx errors
- *	@tx_dropped: number of tx drops
- */
-struct vlan_pcpu_stats {
-	u64			rx_packets;
-	u64			rx_bytes;
-	u64			rx_multicast;
-	u64			tx_packets;
-	u64			tx_bytes;
-	struct u64_stats_sync	syncp;
-	u32			rx_errors;
-	u32			tx_dropped;
-};
-
-struct netpoll;
-
-/**
- *	struct vlan_dev_priv - VLAN private device data
- *	@nr_ingress_mappings: number of ingress priority mappings
- *	@ingress_priority_map: ingress priority mappings
- *	@nr_egress_mappings: number of egress priority mappings
- *	@egress_priority_map: hash of egress priority mappings
- *	@vlan_proto: VLAN encapsulation protocol
- *	@vlan_id: VLAN identifier
- *	@flags: device flags
- *	@real_dev: underlying netdevice
- *	@real_dev_addr: address of underlying netdevice
- *	@dent: proc dir entry
- *	@vlan_pcpu_stats: ptr to percpu rx stats
- */
-struct vlan_dev_priv {
-	unsigned int				nr_ingress_mappings;
-	u32					ingress_priority_map[8];
-	unsigned int				nr_egress_mappings;
-	struct vlan_priority_tci_mapping	*egress_priority_map[16];
-
-	__be16					vlan_proto;
-	u16					vlan_id;
-	u16					flags;
-
-	struct net_device			*real_dev;
-	unsigned char				real_dev_addr[ETH_ALEN];
-
-	struct proc_dir_entry			*dent;
-	struct vlan_pcpu_stats __percpu		*vlan_pcpu_stats;
-#ifdef CONFIG_NET_POLL_CONTROLLER
-	struct netpoll				*netpoll;
-#endif
-};
-
-static inline struct vlan_dev_priv *vlan_dev_priv(const struct net_device *dev)
-{
-	return netdev_priv(dev);
-}
-
 /* if this changes, algorithm will have to be reworked because this
  * depends on completely exhausting the VLAN identifier space.  Thus
  * it gives constant time look-up, but in many cases it wastes memory.
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 13904a414929..8db1b985dbf1 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -68,31 +68,6 @@ static int vlan_dev_rebuild_header(struct sk_buff *skb)
 	return 0;
 }
 
-static inline u16
-__vlan_dev_get_egress_qos_mask(struct net_device *dev, u32 skprio)
-{
-	struct vlan_priority_tci_mapping *mp;
-
-	smp_rmb(); /* coupled with smp_wmb() in vlan_dev_set_egress_priority() */
-
-	mp = vlan_dev_priv(dev)->egress_priority_map[(skprio & 0xF)];
-	while (mp) {
-		if (mp->priority == skprio) {
-			return mp->vlan_qos; /* This should already be shifted
-					      * to mask correctly with the
-					      * VLAN's TCI */
-		}
-		mp = mp->next;
-	}
-	return 0;
-}
-
-u16 vlan_dev_get_egress_qos_mask(struct net_device *dev, u32 skprio)
-{
-	return __vlan_dev_get_egress_qos_mask(dev, skprio);
-}
-EXPORT_SYMBOL(vlan_dev_get_egress_qos_mask);
-
 /*
  *	Create the VLAN header for an arbitrary protocol layer
  *
@@ -117,7 +92,7 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
 		vhdr = (struct vlan_hdr *) skb_push(skb, VLAN_HLEN);
 
 		vlan_tci = vlan->vlan_id;
-		vlan_tci |= __vlan_dev_get_egress_qos_mask(dev, skb->priority);
+		vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb->priority);
 		vhdr->h_vlan_TCI = htons(vlan_tci);
 
 		/*
@@ -174,7 +149,7 @@ static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb,
 	    vlan->flags & VLAN_FLAG_REORDER_HDR) {
 		u16 vlan_tci;
 		vlan_tci = vlan->vlan_id;
-		vlan_tci |= __vlan_dev_get_egress_qos_mask(dev, skb->priority);
+		vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb->priority);
 		skb = __vlan_hwaccel_put_tag(skb, vlan->vlan_proto, vlan_tci);
 	}
 
@@ -259,7 +234,7 @@ int vlan_dev_set_egress_priority(const struct net_device *dev,
 	np->vlan_qos = vlan_qos;
 	/* Before inserting this element in hash table, make sure all its fields
 	 * are committed to memory.
-	 * coupled with smp_rmb() in __vlan_dev_get_egress_qos_mask()
+	 * coupled with smp_rmb() in vlan_dev_get_egress_qos_mask()
 	 */
 	smp_wmb();
 	vlan->egress_priority_map[skb_prio & 0xF] = np;
-- 
cgit v1.2.3


From 03e361b25ee8dfb1fd9b890072c23c4aae01c6c7 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Tue, 29 Oct 2013 10:03:04 +0100
Subject: mfd: Stop setting refcounting pointers in original mfd_cell arrays

Commit 1e29af62f2b285bd18685da93c3ce8c33ca2d1db ("mfd: Add refcounting
support to mfd_cells") had to drop the "const" keyword on the "cell"
parameter of mfd_add_devices(), as it added the refcounting pointers
to the objects of the passed mfd_cell array itself.

However, the mfd core code operates on copies of the mfd_cell objects,
so there's no need to modify the originally passed objects.

Hence, move the setting of the refcounting pointers from mfd_add_devices()
to mfd_platform_add_cell(), where the copy of the mfd_cell objects is made.
mfd_clone_cell() can just pass (a copy of) the original usage_count
pointer.

This allows to make the "cell" parameter of mfd_add_devices() "const"
again, and avoids future race conditions when registering multiple
instances of the same device in parallel.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/mfd-core.c   | 17 +++++++++--------
 include/linux/mfd/core.h |  2 +-
 2 files changed, 10 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/mfd-core.c b/drivers/mfd/mfd-core.c
index f421586f29fb..8736f4539bc0 100644
--- a/drivers/mfd/mfd-core.c
+++ b/drivers/mfd/mfd-core.c
@@ -63,7 +63,8 @@ int mfd_cell_disable(struct platform_device *pdev)
 EXPORT_SYMBOL(mfd_cell_disable);
 
 static int mfd_platform_add_cell(struct platform_device *pdev,
-				 const struct mfd_cell *cell)
+				 const struct mfd_cell *cell,
+				 atomic_t *usage_count)
 {
 	if (!cell)
 		return 0;
@@ -72,11 +73,12 @@ static int mfd_platform_add_cell(struct platform_device *pdev,
 	if (!pdev->mfd_cell)
 		return -ENOMEM;
 
+	pdev->mfd_cell->usage_count = usage_count;
 	return 0;
 }
 
 static int mfd_add_device(struct device *parent, int id,
-			  const struct mfd_cell *cell,
+			  const struct mfd_cell *cell, atomic_t *usage_count,
 			  struct resource *mem_base,
 			  int irq_base, struct irq_domain *domain)
 {
@@ -115,7 +117,7 @@ static int mfd_add_device(struct device *parent, int id,
 			goto fail_res;
 	}
 
-	ret = mfd_platform_add_cell(pdev, cell);
+	ret = mfd_platform_add_cell(pdev, cell, usage_count);
 	if (ret)
 		goto fail_res;
 
@@ -180,7 +182,7 @@ fail_alloc:
 }
 
 int mfd_add_devices(struct device *parent, int id,
-		    struct mfd_cell *cells, int n_devs,
+		    const struct mfd_cell *cells, int n_devs,
 		    struct resource *mem_base,
 		    int irq_base, struct irq_domain *domain)
 {
@@ -195,8 +197,7 @@ int mfd_add_devices(struct device *parent, int id,
 
 	for (i = 0; i < n_devs; i++) {
 		atomic_set(&cnts[i], 0);
-		cells[i].usage_count = &cnts[i];
-		ret = mfd_add_device(parent, id, cells + i, mem_base,
+		ret = mfd_add_device(parent, id, cells + i, cnts + i, mem_base,
 				     irq_base, domain);
 		if (ret)
 			break;
@@ -259,8 +260,8 @@ int mfd_clone_cell(const char *cell, const char **clones, size_t n_clones)
 	for (i = 0; i < n_clones; i++) {
 		cell_entry.name = clones[i];
 		/* don't give up if a single call fails; just report error */
-		if (mfd_add_device(pdev->dev.parent, -1, &cell_entry, NULL, 0,
-				   NULL))
+		if (mfd_add_device(pdev->dev.parent, -1, &cell_entry,
+				   cell_entry.usage_count, NULL, 0, NULL))
 			dev_err(dev, "failed to create platform device '%s'\n",
 					clones[i]);
 	}
diff --git a/include/linux/mfd/core.h b/include/linux/mfd/core.h
index cebe97ee98b8..60ced604664f 100644
--- a/include/linux/mfd/core.h
+++ b/include/linux/mfd/core.h
@@ -98,7 +98,7 @@ static inline const struct mfd_cell *mfd_get_cell(struct platform_device *pdev)
 }
 
 extern int mfd_add_devices(struct device *parent, int id,
-			   struct mfd_cell *cells, int n_devs,
+			   const struct mfd_cell *cells, int n_devs,
 			   struct resource *mem_base,
 			   int irq_base, struct irq_domain *irq_domain);
 
-- 
cgit v1.2.3


From 67a6de49bf545c34eb8dee99abbb92d9ea268200 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 8 Nov 2013 08:26:39 +0100
Subject: locking/doc: Update references to kernel/mutex.c

Fix this docbook error:

  >> docproc: kernel/mutex.c: No such file or directory

by updating the stale references to kernel/mutex.c.

Reported-by: fengguang.wu@intel.com
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/n/tip-34pikw1tlsskj65rrt5iusrq@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 Documentation/DocBook/kernel-locking.tmpl |  2 +-
 Documentation/mutex-design.txt            | 10 +++++-----
 include/linux/mutex.h                     |  2 +-
 kernel/locking/mutex.c                    |  2 +-
 4 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/DocBook/kernel-locking.tmpl b/Documentation/DocBook/kernel-locking.tmpl
index 09e884e5b9f5..19f2a5a5a5b4 100644
--- a/Documentation/DocBook/kernel-locking.tmpl
+++ b/Documentation/DocBook/kernel-locking.tmpl
@@ -1958,7 +1958,7 @@ machines due to caching.
   <chapter id="apiref-mutex">
    <title>Mutex API reference</title>
 !Iinclude/linux/mutex.h
-!Ekernel/mutex.c
+!Ekernel/locking/mutex.c
   </chapter>
 
   <chapter id="apiref-futex">
diff --git a/Documentation/mutex-design.txt b/Documentation/mutex-design.txt
index 38c10fd7f411..1dfe62c3641d 100644
--- a/Documentation/mutex-design.txt
+++ b/Documentation/mutex-design.txt
@@ -116,11 +116,11 @@ using mutexes at the moment, please let me know if you find any. ]
 Implementation of mutexes
 -------------------------
 
-'struct mutex' is the new mutex type, defined in include/linux/mutex.h
-and implemented in kernel/mutex.c. It is a counter-based mutex with a
-spinlock and a wait-list. The counter has 3 states: 1 for "unlocked",
-0 for "locked" and negative numbers (usually -1) for "locked, potential
-waiters queued".
+'struct mutex' is the new mutex type, defined in include/linux/mutex.h and
+implemented in kernel/locking/mutex.c. It is a counter-based mutex with a
+spinlock and a wait-list. The counter has 3 states: 1 for "unlocked", 0 for
+"locked" and negative numbers (usually -1) for "locked, potential waiters
+queued".
 
 the APIs of 'struct mutex' have been streamlined:
 
diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index bab49da8a0f0..d3181936c138 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -131,7 +131,7 @@ static inline int mutex_is_locked(struct mutex *lock)
 }
 
 /*
- * See kernel/mutex.c for detailed documentation of these APIs.
+ * See kernel/locking/mutex.c for detailed documentation of these APIs.
  * Also see Documentation/mutex-design.txt.
  */
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index d24105b1b794..4dd6e4c219de 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -1,5 +1,5 @@
 /*
- * kernel/mutex.c
+ * kernel/locking/mutex.c
  *
  * Mutexes: blocking mutual exclusion locks
  *
-- 
cgit v1.2.3


From a941f8360f200d6849b292f9dc50250bca531c0e Mon Sep 17 00:00:00 2001
From: Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>
Date: Fri, 8 Nov 2013 20:47:36 +0800
Subject: mm, slub: fix the typo in include/linux/slub_def.h

Acked-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>
Signed-off-by: Pekka Enberg <penberg@kernel.org>
---
 include/linux/slub_def.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 027276fa8713..2dc4e78fc234 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -17,7 +17,7 @@
 enum stat_item {
 	ALLOC_FASTPATH,		/* Allocation from cpu slab */
 	ALLOC_SLOWPATH,		/* Allocation by getting a new cpu slab */
-	FREE_FASTPATH,		/* Free to cpu slub */
+	FREE_FASTPATH,		/* Free to cpu slab */
 	FREE_SLOWPATH,		/* Freeing not to cpu slab */
 	FREE_FROZEN,		/* Freeing to frozen slab */
 	FREE_ADD_PARTIAL,	/* Freeing moves slab to partial list */
-- 
cgit v1.2.3


From 51c37a70aaa3f95773af560e6db3073520513912 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Mon, 11 Nov 2013 12:20:32 +0100
Subject: random32: fix off-by-one in seeding requirement

For properly initialising the Tausworthe generator [1], we have
a strict seeding requirement, that is, s1 > 1, s2 > 7, s3 > 15.

Commit 697f8d0348 ("random32: seeding improvement") introduced
a __seed() function that imposes boundary checks proposed by the
errata paper [2] to properly ensure above conditions.

However, we're off by one, as the function is implemented as:
"return (x < m) ? x + m : x;", and called with __seed(X, 1),
__seed(X, 7), __seed(X, 15). Thus, an unwanted seed of 1, 7, 15
would be possible, whereas the lower boundary should actually
be of at least 2, 8, 16, just as GSL does. Fix this, as otherwise
an initialization with an unwanted seed could have the effect
that Tausworthe's PRNG properties cannot not be ensured.

Note that this PRNG is *not* used for cryptography in the kernel.

 [1] http://www.iro.umontreal.ca/~lecuyer/myftp/papers/tausme.ps
 [2] http://www.iro.umontreal.ca/~lecuyer/myftp/papers/tausme2.ps

Joint work with Hannes Frederic Sowa.

Fixes: 697f8d0348a6 ("random32: seeding improvement")
Cc: Stephen Hemminger <stephen@networkplumber.org>
Cc: Florian Weimer <fweimer@redhat.com>
Cc: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/random.h |  6 +++---
 lib/random32.c         | 14 +++++++-------
 2 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/random.h b/include/linux/random.h
index 6312dd9ba449..bf9085e89fb5 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -50,9 +50,9 @@ static inline void prandom_seed_state(struct rnd_state *state, u64 seed)
 {
 	u32 i = (seed >> 32) ^ (seed << 10) ^ seed;
 
-	state->s1 = __seed(i, 1);
-	state->s2 = __seed(i, 7);
-	state->s3 = __seed(i, 15);
+	state->s1 = __seed(i, 2);
+	state->s2 = __seed(i, 8);
+	state->s3 = __seed(i, 16);
 }
 
 #ifdef CONFIG_ARCH_RANDOM
diff --git a/lib/random32.c b/lib/random32.c
index 52280d5526be..01e8890d1089 100644
--- a/lib/random32.c
+++ b/lib/random32.c
@@ -141,7 +141,7 @@ void prandom_seed(u32 entropy)
 	 */
 	for_each_possible_cpu (i) {
 		struct rnd_state *state = &per_cpu(net_rand_state, i);
-		state->s1 = __seed(state->s1 ^ entropy, 1);
+		state->s1 = __seed(state->s1 ^ entropy, 2);
 	}
 }
 EXPORT_SYMBOL(prandom_seed);
@@ -158,9 +158,9 @@ static int __init prandom_init(void)
 		struct rnd_state *state = &per_cpu(net_rand_state,i);
 
 #define LCG(x)	((x) * 69069)	/* super-duper LCG */
-		state->s1 = __seed(LCG(i + jiffies), 1);
-		state->s2 = __seed(LCG(state->s1), 7);
-		state->s3 = __seed(LCG(state->s2), 15);
+		state->s1 = __seed(LCG(i + jiffies), 2);
+		state->s2 = __seed(LCG(state->s1), 8);
+		state->s3 = __seed(LCG(state->s2), 16);
 
 		/* "warm it up" */
 		prandom_u32_state(state);
@@ -187,9 +187,9 @@ static int __init prandom_reseed(void)
 		u32 seeds[3];
 
 		get_random_bytes(&seeds, sizeof(seeds));
-		state->s1 = __seed(seeds[0], 1);
-		state->s2 = __seed(seeds[1], 7);
-		state->s3 = __seed(seeds[2], 15);
+		state->s1 = __seed(seeds[0], 2);
+		state->s2 = __seed(seeds[1], 8);
+		state->s3 = __seed(seeds[2], 16);
 
 		/* mix it in */
 		prandom_u32_state(state);
-- 
cgit v1.2.3


From 4af712e8df998475736f3e2727701bd31e3751a9 Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Mon, 11 Nov 2013 12:20:34 +0100
Subject: random32: add prandom_reseed_late() and call when nonblocking pool
 becomes initialized

The Tausworthe PRNG is initialized at late_initcall time. At that time the
entropy pool serving get_random_bytes is not filled sufficiently. This
patch adds an additional reseeding step as soon as the nonblocking pool
gets marked as initialized.

On some machines it might be possible that late_initcall gets called after
the pool has been initialized. In this situation we won't reseed again.

(A call to prandom_seed_late blocks later invocations of early reseed
attempts.)

Joint work with Daniel Borkmann.

Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Acked-by: "Theodore Ts'o" <tytso@mit.edu>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/char/random.c  |  5 ++++-
 include/linux/random.h |  1 +
 lib/random32.c         | 23 ++++++++++++++++++++++-
 3 files changed, 27 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 7a744d391756..4fe5609eeb72 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -603,8 +603,11 @@ retry:
 
 	if (!r->initialized && nbits > 0) {
 		r->entropy_total += nbits;
-		if (r->entropy_total > 128)
+		if (r->entropy_total > 128) {
 			r->initialized = 1;
+			if (r == &nonblocking_pool)
+				prandom_reseed_late();
+		}
 	}
 
 	trace_credit_entropy_bits(r->name, nbits, entropy_count,
diff --git a/include/linux/random.h b/include/linux/random.h
index bf9085e89fb5..5117ae348fe8 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -29,6 +29,7 @@ unsigned long randomize_range(unsigned long start, unsigned long end, unsigned l
 u32 prandom_u32(void);
 void prandom_bytes(void *buf, int nbytes);
 void prandom_seed(u32 seed);
+void prandom_reseed_late(void);
 
 u32 prandom_u32_state(struct rnd_state *);
 void prandom_bytes_state(struct rnd_state *state, void *buf, int nbytes);
diff --git a/lib/random32.c b/lib/random32.c
index 12215df701e8..9f2f2fb03dfe 100644
--- a/lib/random32.c
+++ b/lib/random32.c
@@ -200,9 +200,18 @@ static void prandom_start_seed_timer(void)
  *	Generate better values after random number generator
  *	is fully initialized.
  */
-static int __init prandom_reseed(void)
+static void __prandom_reseed(bool late)
 {
 	int i;
+	unsigned long flags;
+	static bool latch = false;
+	static DEFINE_SPINLOCK(lock);
+
+	/* only allow initial seeding (late == false) once */
+	spin_lock_irqsave(&lock, flags);
+	if (latch && !late)
+		goto out;
+	latch = true;
 
 	for_each_possible_cpu(i) {
 		struct rnd_state *state = &per_cpu(net_rand_state,i);
@@ -216,6 +225,18 @@ static int __init prandom_reseed(void)
 		/* mix it in */
 		prandom_u32_state(state);
 	}
+out:
+	spin_unlock_irqrestore(&lock, flags);
+}
+
+void prandom_reseed_late(void)
+{
+	__prandom_reseed(true);
+}
+
+static int __init prandom_reseed(void)
+{
+	__prandom_reseed(false);
 	prandom_start_seed_timer();
 	return 0;
 }
-- 
cgit v1.2.3


From 38e9efcdb33270b4da72143d8e7ca4dcf7f0989b Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Mon, 11 Nov 2013 12:20:35 +0100
Subject: random32: move rnd_state to linux/random.h

struct rnd_state got mistakenly pulled into uapi header. It is not
used anywhere and does also not belong there!

Commit 5960164fde ("lib/random32: export pseudo-random number
generator for modules"), the last commit on rnd_state before it
got moved to uapi, says:

  This patch moves the definition of struct rnd_state and the inline
  __seed() function to linux/random.h.  It renames the static __random32()
  function to prandom32() and exports it for use in modules.

Hence, the structure was moved from lib/random32.c to linux/random.h
so that it can be used within modules (FCoE-related code in this
case), but not from user space. However, it seems to have been
mistakenly moved to uapi header through the uapi script. Since no-one
should make use of it from the linux headers, move the structure back
to the kernel for internal use, so that it can be modified on demand.

Joint work with Hannes Frederic Sowa.

Cc: Joe Eykholt <jeykholt@cisco.com>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/random.h      | 4 ++++
 include/uapi/linux/random.h | 7 -------
 2 files changed, 4 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/random.h b/include/linux/random.h
index 5117ae348fe8..8ef0b70bd1f9 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -31,6 +31,10 @@ void prandom_bytes(void *buf, int nbytes);
 void prandom_seed(u32 seed);
 void prandom_reseed_late(void);
 
+struct rnd_state {
+	__u32 s1, s2, s3;
+};
+
 u32 prandom_u32_state(struct rnd_state *);
 void prandom_bytes_state(struct rnd_state *state, void *buf, int nbytes);
 
diff --git a/include/uapi/linux/random.h b/include/uapi/linux/random.h
index 7471b5b3b8ba..fff3528a078f 100644
--- a/include/uapi/linux/random.h
+++ b/include/uapi/linux/random.h
@@ -40,11 +40,4 @@ struct rand_pool_info {
 	__u32	buf[0];
 };
 
-struct rnd_state {
-	__u32 s1, s2, s3;
-};
-
-/* Exported functions */
-
-
 #endif /* _UAPI_LINUX_RANDOM_H */
-- 
cgit v1.2.3


From a98814cef87946d2708812ad9f8b1e03b8366b6f Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Mon, 11 Nov 2013 12:20:36 +0100
Subject: random32: upgrade taus88 generator to taus113 from errata paper

Since we use prandom*() functions quite often in networking code
i.e. in UDP port selection, netfilter code, etc, upgrade the PRNG
from Pierre L'Ecuyer's original paper "Maximally Equidistributed
Combined Tausworthe Generators", Mathematics of Computation, 65,
213 (1996), 203--213 to the version published in his errata paper [1].

The Tausworthe generator is a maximally-equidistributed generator,
that is fast and has good statistical properties [1].

The version presented there upgrades the 3 state LFSR to a 4 state
LFSR with increased periodicity from about 2^88 to 2^113. The
algorithm is presented in [1] by the very same author who also
designed the original algorithm in [2].

Also, by increasing the state, we make it a bit harder for attackers
to "guess" the PRNGs internal state. See also discussion in [3].

Now, as we use this sort of weak initialization discussed in [3]
only between core_initcall() until late_initcall() time [*] for
prandom32*() users, namely in prandom_init(), it is less relevant
from late_initcall() onwards as we overwrite seeds through
prandom_reseed() anyways with a seed source of higher entropy, that
is, get_random_bytes(). In other words, a exhaustive keysearch of
96 bit would be needed. Now, with the help of this patch, this
state-search increases further to 128 bit. Initialization needs
to make sure that s1 > 1, s2 > 7, s3 > 15, s4 > 127.

taus88 and taus113 algorithm is also part of GSL. I added a test
case in the next patch to verify internal behaviour of this patch
with GSL and ran tests with the dieharder 3.31.1 RNG test suite:

$ dieharder -g 052 -a -m 10 -s 1 -S 4137730333 #taus88
$ dieharder -g 054 -a -m 10 -s 1 -S 4137730333 #taus113

With this seed configuration, in order to compare both, we get
the following differences:

algorithm                 taus88           taus113
rands/second [**]         1.61e+08         1.37e+08
sts_serial(4, 1st run)    WEAK             PASSED
sts_serial(9, 2nd run)    WEAK             PASSED
rgb_lagged_sum(31)        WEAK             PASSED

We took out diehard_sums test as according to the authors it is
considered broken and unusable [4]. Despite that and the slight
decrease in performance (which is acceptable), taus113 here passes
all 113 tests (only rgb_minimum_distance_5 in WEAK, the rest PASSED).
In general, taus/taus113 is considered "very good" by the authors
of dieharder [5].

The papers [1][2] states a single warm-up step is sufficient by
running quicktaus once on each state to ensure proper initialization
of ~s_{0}:

Our selection of (s) according to Table 1 of [1] row 1 holds the
condition L - k <= r - s, that is,

  (32 32 32 32) - (31 29 28 25) <= (25 27 15 22) - (18 2 7 13)

with r = k - q and q = (6 2 13 3) as also stated by the paper.
So according to [2] we are safe with one round of quicktaus for
initialization. However we decided to include the warm-up phase
of the PRNG as done in GSL in every case as a safety net. We also
use the warm up phase to make the output of the RNG easier to
verify by the GSL output.

In prandom_init(), we also mix random_get_entropy() into it, just
like drivers/char/random.c does it, jiffies ^ random_get_entropy().
random-get_entropy() is get_cycles(). xor is entropy preserving so
it is fine if it is not implemented by some architectures.

Note, this PRNG is *not* used for cryptography in the kernel, but
rather as a fast PRNG for various randomizations i.e. in the
networking code, or elsewhere for debugging purposes, for example.

[*]: In order to generate some "sort of pseduo-randomness", since
get_random_bytes() is not yet available for us, we use jiffies and
initialize states s1 - s3 with a simple linear congruential generator
(LCG), that is x <- x * 69069; and derive s2, s3, from the 32bit
initialization from s1. So the above quote from [3] accounts only
for the time from core to late initcall, not afterwards.
[**] Single threaded run on MacBook Air w/ Intel Core i5-3317U

 [1] http://www.iro.umontreal.ca/~lecuyer/myftp/papers/tausme2.ps
 [2] http://www.iro.umontreal.ca/~lecuyer/myftp/papers/tausme.ps
 [3] http://thread.gmane.org/gmane.comp.encryption.general/12103/
 [4] http://code.google.com/p/dieharder/source/browse/trunk/libdieharder/diehard_sums.c?spec=svn490&r=490#20
 [5] http://www.phy.duke.edu/~rgb/General/dieharder.php

Joint work with Hannes Frederic Sowa.

Cc: Florian Weimer <fweimer@redhat.com>
Cc: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/random.h | 11 +++----
 lib/random32.c         | 80 +++++++++++++++++++++++++++++---------------------
 2 files changed, 52 insertions(+), 39 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/random.h b/include/linux/random.h
index 8ef0b70bd1f9..4002b3df4c85 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -32,10 +32,10 @@ void prandom_seed(u32 seed);
 void prandom_reseed_late(void);
 
 struct rnd_state {
-	__u32 s1, s2, s3;
+	__u32 s1, s2, s3, s4;
 };
 
-u32 prandom_u32_state(struct rnd_state *);
+u32 prandom_u32_state(struct rnd_state *state);
 void prandom_bytes_state(struct rnd_state *state, void *buf, int nbytes);
 
 /*
@@ -55,9 +55,10 @@ static inline void prandom_seed_state(struct rnd_state *state, u64 seed)
 {
 	u32 i = (seed >> 32) ^ (seed << 10) ^ seed;
 
-	state->s1 = __seed(i, 2);
-	state->s2 = __seed(i, 8);
-	state->s3 = __seed(i, 16);
+	state->s1 = __seed(i,   2U);
+	state->s2 = __seed(i,   8U);
+	state->s3 = __seed(i,  16U);
+	state->s4 = __seed(i, 128U);
 }
 
 #ifdef CONFIG_ARCH_RANDOM
diff --git a/lib/random32.c b/lib/random32.c
index 9f2f2fb03dfe..27adb753180f 100644
--- a/lib/random32.c
+++ b/lib/random32.c
@@ -2,19 +2,19 @@
   This is a maximally equidistributed combined Tausworthe generator
   based on code from GNU Scientific Library 1.5 (30 Jun 2004)
 
-   x_n = (s1_n ^ s2_n ^ s3_n)
+  lfsr113 version:
 
-   s1_{n+1} = (((s1_n & 4294967294) <<12) ^ (((s1_n <<13) ^ s1_n) >>19))
-   s2_{n+1} = (((s2_n & 4294967288) << 4) ^ (((s2_n << 2) ^ s2_n) >>25))
-   s3_{n+1} = (((s3_n & 4294967280) <<17) ^ (((s3_n << 3) ^ s3_n) >>11))
+   x_n = (s1_n ^ s2_n ^ s3_n ^ s4_n)
 
-   The period of this generator is about 2^88.
+   s1_{n+1} = (((s1_n & 4294967294) << 18) ^ (((s1_n <<  6) ^ s1_n) >> 13))
+   s2_{n+1} = (((s2_n & 4294967288) <<  2) ^ (((s2_n <<  2) ^ s2_n) >> 27))
+   s3_{n+1} = (((s3_n & 4294967280) <<  7) ^ (((s3_n << 13) ^ s3_n) >> 21))
+   s4_{n+1} = (((s4_n & 4294967168) << 13) ^ (((s4_n <<  3) ^ s4_n) >> 12))
 
-   From: P. L'Ecuyer, "Maximally Equidistributed Combined Tausworthe
-   Generators", Mathematics of Computation, 65, 213 (1996), 203--213.
-
-   This is available on the net from L'Ecuyer's home page,
+   The period of this generator is about 2^113 (see erratum paper).
 
+   From: P. L'Ecuyer, "Maximally Equidistributed Combined Tausworthe
+   Generators", Mathematics of Computation, 65, 213 (1996), 203--213:
    http://www.iro.umontreal.ca/~lecuyer/myftp/papers/tausme.ps
    ftp://ftp.iro.umontreal.ca/pub/simulation/lecuyer/papers/tausme.ps
 
@@ -29,7 +29,7 @@
         that paper.)
 
    This affects the seeding procedure by imposing the requirement
-   s1 > 1, s2 > 7, s3 > 15.
+   s1 > 1, s2 > 7, s3 > 15, s4 > 127.
 
 */
 
@@ -52,11 +52,12 @@ u32 prandom_u32_state(struct rnd_state *state)
 {
 #define TAUSWORTHE(s,a,b,c,d) ((s&c)<<d) ^ (((s <<a) ^ s)>>b)
 
-	state->s1 = TAUSWORTHE(state->s1, 13, 19, 4294967294UL, 12);
-	state->s2 = TAUSWORTHE(state->s2, 2, 25, 4294967288UL, 4);
-	state->s3 = TAUSWORTHE(state->s3, 3, 11, 4294967280UL, 17);
+	state->s1 = TAUSWORTHE(state->s1,  6U, 13U, 4294967294U, 18U);
+	state->s2 = TAUSWORTHE(state->s2,  2U, 27U, 4294967288U,  2U);
+	state->s3 = TAUSWORTHE(state->s3, 13U, 21U, 4294967280U,  7U);
+	state->s4 = TAUSWORTHE(state->s4,  3U, 12U, 4294967168U, 13U);
 
-	return (state->s1 ^ state->s2 ^ state->s3);
+	return (state->s1 ^ state->s2 ^ state->s3 ^ state->s4);
 }
 EXPORT_SYMBOL(prandom_u32_state);
 
@@ -126,6 +127,21 @@ void prandom_bytes(void *buf, int bytes)
 }
 EXPORT_SYMBOL(prandom_bytes);
 
+static void prandom_warmup(struct rnd_state *state)
+{
+	/* Calling RNG ten times to satify recurrence condition */
+	prandom_u32_state(state);
+	prandom_u32_state(state);
+	prandom_u32_state(state);
+	prandom_u32_state(state);
+	prandom_u32_state(state);
+	prandom_u32_state(state);
+	prandom_u32_state(state);
+	prandom_u32_state(state);
+	prandom_u32_state(state);
+	prandom_u32_state(state);
+}
+
 /**
  *	prandom_seed - add entropy to pseudo random number generator
  *	@seed: seed value
@@ -141,8 +157,9 @@ void prandom_seed(u32 entropy)
 	 */
 	for_each_possible_cpu (i) {
 		struct rnd_state *state = &per_cpu(net_rand_state, i);
-		state->s1 = __seed(state->s1 ^ entropy, 2);
-		prandom_u32_state(state);
+
+		state->s1 = __seed(state->s1 ^ entropy, 2U);
+		prandom_warmup(state);
 	}
 }
 EXPORT_SYMBOL(prandom_seed);
@@ -158,18 +175,13 @@ static int __init prandom_init(void)
 	for_each_possible_cpu(i) {
 		struct rnd_state *state = &per_cpu(net_rand_state,i);
 
-#define LCG(x)	((x) * 69069)	/* super-duper LCG */
-		state->s1 = __seed(LCG(i + jiffies), 2);
-		state->s2 = __seed(LCG(state->s1), 8);
-		state->s3 = __seed(LCG(state->s2), 16);
-
-		/* "warm it up" */
-		prandom_u32_state(state);
-		prandom_u32_state(state);
-		prandom_u32_state(state);
-		prandom_u32_state(state);
-		prandom_u32_state(state);
-		prandom_u32_state(state);
+#define LCG(x)	((x) * 69069U)	/* super-duper LCG */
+		state->s1 = __seed(LCG((i + jiffies) ^ random_get_entropy()), 2U);
+		state->s2 = __seed(LCG(state->s1),   8U);
+		state->s3 = __seed(LCG(state->s2),  16U);
+		state->s4 = __seed(LCG(state->s3), 128U);
+
+		prandom_warmup(state);
 	}
 	return 0;
 }
@@ -215,15 +227,15 @@ static void __prandom_reseed(bool late)
 
 	for_each_possible_cpu(i) {
 		struct rnd_state *state = &per_cpu(net_rand_state,i);
-		u32 seeds[3];
+		u32 seeds[4];
 
 		get_random_bytes(&seeds, sizeof(seeds));
-		state->s1 = __seed(seeds[0], 2);
-		state->s2 = __seed(seeds[1], 8);
-		state->s3 = __seed(seeds[2], 16);
+		state->s1 = __seed(seeds[0],   2U);
+		state->s2 = __seed(seeds[1],   8U);
+		state->s3 = __seed(seeds[2],  16U);
+		state->s4 = __seed(seeds[3], 128U);
 
-		/* mix it in */
-		prandom_u32_state(state);
+		prandom_warmup(state);
 	}
 out:
 	spin_unlock_irqrestore(&lock, flags);
-- 
cgit v1.2.3


From b9921ecdee66984b00c38c00a358ef3f611d2b50 Mon Sep 17 00:00:00 2001
From: Qiang Huang <h.huangqiang@huawei.com>
Date: Tue, 12 Nov 2013 15:07:22 -0800
Subject: mm: add a helper function to check may oom condition

Use helper function to check if we need to deal with oom condition.

Signed-off-by: Qiang Huang <h.huangqiang@huawei.com>
Acked-by: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/oom.h | 5 +++++
 mm/memcontrol.c     | 9 +--------
 mm/page_alloc.c     | 2 +-
 3 files changed, 7 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/oom.h b/include/linux/oom.h
index da60007075b5..4cd62677feb9 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -82,6 +82,11 @@ static inline void oom_killer_enable(void)
 	oom_killer_disabled = false;
 }
 
+static inline bool oom_gfp_allowed(gfp_t gfp_mask)
+{
+	return (gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY);
+}
+
 extern struct task_struct *find_lock_task_mm(struct task_struct *p);
 
 /* sysctls */
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 13b9d0f221b8..3427de9897a5 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2984,21 +2984,14 @@ static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size)
 	struct res_counter *fail_res;
 	struct mem_cgroup *_memcg;
 	int ret = 0;
-	bool may_oom;
 
 	ret = res_counter_charge(&memcg->kmem, size, &fail_res);
 	if (ret)
 		return ret;
 
-	/*
-	 * Conditions under which we can wait for the oom_killer. Those are
-	 * the same conditions tested by the core page allocator
-	 */
-	may_oom = (gfp & __GFP_FS) && !(gfp & __GFP_NORETRY);
-
 	_memcg = memcg;
 	ret = __mem_cgroup_try_charge(NULL, gfp, size >> PAGE_SHIFT,
-				      &_memcg, may_oom);
+				      &_memcg, oom_gfp_allowed(gfp));
 
 	if (ret == -EINTR)  {
 		/*
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 3d1d75a6629f..e0412c026e0d 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2593,7 +2593,7 @@ rebalance:
 	 * running out of options and have to consider going OOM
 	 */
 	if (!did_some_progress) {
-		if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY)) {
+		if (oom_gfp_allowed(gfp_mask)) {
 			if (oom_killer_disabled)
 				goto nopage;
 			/* Coredumps can quickly deplete all memory reserves */
-- 
cgit v1.2.3


From 01b0f19707c51ef247404e6af1d4a97a11ba34f7 Mon Sep 17 00:00:00 2001
From: Toshi Kani <toshi.kani@hp.com>
Date: Tue, 12 Nov 2013 15:07:25 -0800
Subject: cpu/mem hotplug: add try_online_node() for cpu_up()

cpu_up() has #ifdef CONFIG_MEMORY_HOTPLUG code blocks, which call
mem_online_node() to put its node online if offlined and then call
build_all_zonelists() to initialize the zone list.

These steps are specific to memory hotplug, and should be managed in
mm/memory_hotplug.c.  lock_memory_hotplug() should also be held for the
whole steps.

For this reason, this patch replaces mem_online_node() with
try_online_node(), which performs the whole steps with
lock_memory_hotplug() held.  try_online_node() is named after
try_offline_node() as they have similar purpose.

There is no functional change in this patch.

Signed-off-by: Toshi Kani <toshi.kani@hp.com>
Reviewed-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memory_hotplug.h |  8 +++++++-
 kernel/cpu.c                   | 29 +++--------------------------
 mm/memory_hotplug.c            | 16 ++++++++++++++--
 3 files changed, 24 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index dd38e62b84d2..22203c293f07 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -94,6 +94,8 @@ extern void __online_page_set_limits(struct page *page);
 extern void __online_page_increment_counters(struct page *page);
 extern void __online_page_free(struct page *page);
 
+extern int try_online_node(int nid);
+
 #ifdef CONFIG_MEMORY_HOTREMOVE
 extern bool is_pageblock_removable_nolock(struct page *page);
 extern int arch_remove_memory(u64 start, u64 size);
@@ -225,6 +227,11 @@ static inline void register_page_bootmem_info_node(struct pglist_data *pgdat)
 {
 }
 
+static inline int try_online_node(int nid)
+{
+	return 0;
+}
+
 static inline void lock_memory_hotplug(void) {}
 static inline void unlock_memory_hotplug(void) {}
 
@@ -256,7 +263,6 @@ static inline void remove_memory(int nid, u64 start, u64 size) {}
 
 extern int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn,
 		void *arg, int (*func)(struct memory_block *, void *));
-extern int mem_online_node(int nid);
 extern int add_memory(int nid, u64 start, u64 size);
 extern int arch_add_memory(int nid, u64 start, u64 size);
 extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages);
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 63aa50d7ce1e..973d034acf84 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -437,11 +437,6 @@ int cpu_up(unsigned int cpu)
 {
 	int err = 0;
 
-#ifdef	CONFIG_MEMORY_HOTPLUG
-	int nid;
-	pg_data_t	*pgdat;
-#endif
-
 	if (!cpu_possible(cpu)) {
 		printk(KERN_ERR "can't online cpu %d because it is not "
 			"configured as may-hotadd at boot time\n", cpu);
@@ -452,27 +447,9 @@ int cpu_up(unsigned int cpu)
 		return -EINVAL;
 	}
 
-#ifdef	CONFIG_MEMORY_HOTPLUG
-	nid = cpu_to_node(cpu);
-	if (!node_online(nid)) {
-		err = mem_online_node(nid);
-		if (err)
-			return err;
-	}
-
-	pgdat = NODE_DATA(nid);
-	if (!pgdat) {
-		printk(KERN_ERR
-			"Can't online cpu %d due to NULL pgdat\n", cpu);
-		return -ENOMEM;
-	}
-
-	if (pgdat->node_zonelists->_zonerefs->zone == NULL) {
-		mutex_lock(&zonelists_mutex);
-		build_all_zonelists(NULL, NULL);
-		mutex_unlock(&zonelists_mutex);
-	}
-#endif
+	err = try_online_node(cpu_to_node(cpu));
+	if (err)
+		return err;
 
 	cpu_maps_update_begin();
 
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 5118028468eb..8285346be663 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1043,17 +1043,23 @@ static void rollback_node_hotadd(int nid, pg_data_t *pgdat)
 }
 
 
-/*
+/**
+ * try_online_node - online a node if offlined
+ *
  * called by cpu_up() to online a node without onlined memory.
  */
-int mem_online_node(int nid)
+int try_online_node(int nid)
 {
 	pg_data_t	*pgdat;
 	int	ret;
 
+	if (node_online(nid))
+		return 0;
+
 	lock_memory_hotplug();
 	pgdat = hotadd_new_pgdat(nid, 0);
 	if (!pgdat) {
+		pr_err("Cannot online node %d due to NULL pgdat\n", nid);
 		ret = -ENOMEM;
 		goto out;
 	}
@@ -1061,6 +1067,12 @@ int mem_online_node(int nid)
 	ret = register_one_node(nid);
 	BUG_ON(ret);
 
+	if (pgdat->node_zonelists->_zonerefs->zone == NULL) {
+		mutex_lock(&zonelists_mutex);
+		build_all_zonelists(NULL, NULL);
+		mutex_unlock(&zonelists_mutex);
+	}
+
 out:
 	unlock_memory_hotplug();
 	return ret;
-- 
cgit v1.2.3


From 948927ee9e4f35f287e61a79c9f0e85ca2202c7d Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Tue, 12 Nov 2013 15:07:28 -0800
Subject: mm, mempolicy: make mpol_to_str robust and always succeed

mpol_to_str() should not fail.  Currently, it either fails because the
string buffer is too small or because a string hasn't been defined for a
mempolicy mode.

If a new mempolicy mode is introduced and no string is defined for it,
just warn and return "unknown".

If the buffer is too small, just truncate the string and return, the
same behavior as snprintf().

This also fixes a bug where there was no NULL-byte termination when doing
*p++ = '=' and *p++ ':' and maxlen has been reached.

Signed-off-by: David Rientjes <rientjes@google.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Chen Gang <gang.chen@asianux.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Dave Jones <davej@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/task_mmu.c        | 14 ++++++-------
 include/linux/mempolicy.h |  5 ++---
 mm/mempolicy.c            | 52 +++++++++++++++--------------------------------
 3 files changed, 24 insertions(+), 47 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 390bdab01c3c..9f1369fe0afb 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1387,8 +1387,8 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid)
 	struct mm_struct *mm = vma->vm_mm;
 	struct mm_walk walk = {};
 	struct mempolicy *pol;
-	int n;
-	char buffer[50];
+	char buffer[64];
+	int nid;
 
 	if (!mm)
 		return 0;
@@ -1404,10 +1404,8 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid)
 	walk.mm = mm;
 
 	pol = get_vma_policy(task, vma, vma->vm_start);
-	n = mpol_to_str(buffer, sizeof(buffer), pol);
+	mpol_to_str(buffer, sizeof(buffer), pol);
 	mpol_cond_put(pol);
-	if (n < 0)
-		return n;
 
 	seq_printf(m, "%08lx %s", vma->vm_start, buffer);
 
@@ -1460,9 +1458,9 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid)
 	if (md->writeback)
 		seq_printf(m, " writeback=%lu", md->writeback);
 
-	for_each_node_state(n, N_MEMORY)
-		if (md->node[n])
-			seq_printf(m, " N%d=%lu", n, md->node[n]);
+	for_each_node_state(nid, N_MEMORY)
+		if (md->node[nid])
+			seq_printf(m, " N%d=%lu", nid, md->node[nid]);
 out:
 	seq_putc(m, '\n');
 
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index ea4d2495c646..9fe426b30a41 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -169,7 +169,7 @@ int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from,
 extern int mpol_parse_str(char *str, struct mempolicy **mpol);
 #endif
 
-extern int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol);
+extern void mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol);
 
 /* Check if a vma is migratable */
 static inline int vma_migratable(struct vm_area_struct *vma)
@@ -307,9 +307,8 @@ static inline int mpol_parse_str(char *str, struct mempolicy **mpol)
 }
 #endif
 
-static inline int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol)
+static inline void mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol)
 {
-	return 0;
 }
 
 static inline int mpol_misplaced(struct page *page, struct vm_area_struct *vma,
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 71cb253368cb..260b8213a873 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -2914,62 +2914,45 @@ out:
  * @maxlen:  length of @buffer
  * @pol:  pointer to mempolicy to be formatted
  *
- * Convert a mempolicy into a string.
- * Returns the number of characters in buffer (if positive)
- * or an error (negative)
+ * Convert @pol into a string.  If @buffer is too short, truncate the string.
+ * Recommend a @maxlen of at least 32 for the longest mode, "interleave", the
+ * longest flag, "relative", and to display at least a few node ids.
  */
-int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol)
+void mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol)
 {
 	char *p = buffer;
-	int l;
-	nodemask_t nodes;
-	unsigned short mode;
-	unsigned short flags = pol ? pol->flags : 0;
-
-	/*
-	 * Sanity check:  room for longest mode, flag and some nodes
-	 */
-	VM_BUG_ON(maxlen < strlen("interleave") + strlen("relative") + 16);
+	nodemask_t nodes = NODE_MASK_NONE;
+	unsigned short mode = MPOL_DEFAULT;
+	unsigned short flags = 0;
 
-	if (!pol || pol == &default_policy)
-		mode = MPOL_DEFAULT;
-	else
+	if (pol && pol != &default_policy) {
 		mode = pol->mode;
+		flags = pol->flags;
+	}
 
 	switch (mode) {
 	case MPOL_DEFAULT:
-		nodes_clear(nodes);
 		break;
-
 	case MPOL_PREFERRED:
-		nodes_clear(nodes);
 		if (flags & MPOL_F_LOCAL)
 			mode = MPOL_LOCAL;
 		else
 			node_set(pol->v.preferred_node, nodes);
 		break;
-
 	case MPOL_BIND:
-		/* Fall through */
 	case MPOL_INTERLEAVE:
 		nodes = pol->v.nodes;
 		break;
-
 	default:
-		return -EINVAL;
+		WARN_ON_ONCE(1);
+		snprintf(p, maxlen, "unknown");
+		return;
 	}
 
-	l = strlen(policy_modes[mode]);
-	if (buffer + maxlen < p + l + 1)
-		return -ENOSPC;
-
-	strcpy(p, policy_modes[mode]);
-	p += l;
+	p += snprintf(p, maxlen, policy_modes[mode]);
 
 	if (flags & MPOL_MODE_FLAGS) {
-		if (buffer + maxlen < p + 2)
-			return -ENOSPC;
-		*p++ = '=';
+		p += snprintf(p, buffer + maxlen - p, "=");
 
 		/*
 		 * Currently, the only defined flags are mutually exclusive
@@ -2981,10 +2964,7 @@ int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol)
 	}
 
 	if (!nodes_empty(nodes)) {
-		if (buffer + maxlen < p + 2)
-			return -ENOSPC;
-		*p++ = ':';
+		p += snprintf(p, buffer + maxlen - p, ":");
 	 	p += nodelist_scnprintf(p, buffer + maxlen - p, nodes);
 	}
-	return p - buffer;
 }
-- 
cgit v1.2.3


From 85b35feaecd4d2284505b22708795bc1f03fc897 Mon Sep 17 00:00:00 2001
From: Zhang Yanfei <zhangyanfei@cn.fujitsu.com>
Date: Tue, 12 Nov 2013 15:07:42 -0800
Subject: mm/sparsemem: use PAGES_PER_SECTION to remove redundant nr_pages
 parameter

For below functions,

- sparse_add_one_section()
- kmalloc_section_memmap()
- __kmalloc_section_memmap()
- __kfree_section_memmap()

they are always invoked to operate on one memory section, so it is
redundant to always pass a nr_pages parameter, which is the page numbers
in one section.  So we can directly use predefined macro PAGES_PER_SECTION
instead of passing the parameter.

Signed-off-by: Zhang Yanfei <zhangyanfei@cn.fujitsu.com>
Cc: Wen Congyang <wency@cn.fujitsu.com>
Cc: Tang Chen <tangchen@cn.fujitsu.com>
Cc: Toshi Kani <toshi.kani@hp.com>
Cc: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Yasunori Goto <y-goto@jp.fujitsu.com>
Cc: Andy Whitcroft <apw@shadowen.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memory_hotplug.h |  3 +--
 mm/memory_hotplug.c            |  3 +--
 mm/sparse.c                    | 33 +++++++++++++++------------------
 3 files changed, 17 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 22203c293f07..4ca3d951fe91 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -268,8 +268,7 @@ extern int arch_add_memory(int nid, u64 start, u64 size);
 extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages);
 extern bool is_memblock_offlined(struct memory_block *mem);
 extern void remove_memory(int nid, u64 start, u64 size);
-extern int sparse_add_one_section(struct zone *zone, unsigned long start_pfn,
-								int nr_pages);
+extern int sparse_add_one_section(struct zone *zone, unsigned long start_pfn);
 extern void sparse_remove_one_section(struct zone *zone, struct mem_section *ms);
 extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map,
 					  unsigned long pnum);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 8285346be663..1b6fe8ca71e6 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -401,13 +401,12 @@ static int __meminit __add_zone(struct zone *zone, unsigned long phys_start_pfn)
 static int __meminit __add_section(int nid, struct zone *zone,
 					unsigned long phys_start_pfn)
 {
-	int nr_pages = PAGES_PER_SECTION;
 	int ret;
 
 	if (pfn_valid(phys_start_pfn))
 		return -EEXIST;
 
-	ret = sparse_add_one_section(zone, phys_start_pfn, nr_pages);
+	ret = sparse_add_one_section(zone, phys_start_pfn);
 
 	if (ret < 0)
 		return ret;
diff --git a/mm/sparse.c b/mm/sparse.c
index 4ac1d7ef548f..fbb9dbc6aca9 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -590,16 +590,15 @@ void __init sparse_init(void)
 
 #ifdef CONFIG_MEMORY_HOTPLUG
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
-static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid,
-						 unsigned long nr_pages)
+static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid)
 {
 	/* This will make the necessary allocations eventually. */
 	return sparse_mem_map_populate(pnum, nid);
 }
-static void __kfree_section_memmap(struct page *memmap, unsigned long nr_pages)
+static void __kfree_section_memmap(struct page *memmap)
 {
 	unsigned long start = (unsigned long)memmap;
-	unsigned long end = (unsigned long)(memmap + nr_pages);
+	unsigned long end = (unsigned long)(memmap + PAGES_PER_SECTION);
 
 	vmemmap_free(start, end);
 }
@@ -613,10 +612,10 @@ static void free_map_bootmem(struct page *memmap, unsigned long nr_pages)
 }
 #endif /* CONFIG_MEMORY_HOTREMOVE */
 #else
-static struct page *__kmalloc_section_memmap(unsigned long nr_pages)
+static struct page *__kmalloc_section_memmap(void)
 {
 	struct page *page, *ret;
-	unsigned long memmap_size = sizeof(struct page) * nr_pages;
+	unsigned long memmap_size = sizeof(struct page) * PAGES_PER_SECTION;
 
 	page = alloc_pages(GFP_KERNEL|__GFP_NOWARN, get_order(memmap_size));
 	if (page)
@@ -634,19 +633,18 @@ got_map_ptr:
 	return ret;
 }
 
-static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid,
-						  unsigned long nr_pages)
+static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid)
 {
-	return __kmalloc_section_memmap(nr_pages);
+	return __kmalloc_section_memmap();
 }
 
-static void __kfree_section_memmap(struct page *memmap, unsigned long nr_pages)
+static void __kfree_section_memmap(struct page *memmap)
 {
 	if (is_vmalloc_addr(memmap))
 		vfree(memmap);
 	else
 		free_pages((unsigned long)memmap,
-			   get_order(sizeof(struct page) * nr_pages));
+			   get_order(sizeof(struct page) * PAGES_PER_SECTION));
 }
 
 #ifdef CONFIG_MEMORY_HOTREMOVE
@@ -684,8 +682,7 @@ static void free_map_bootmem(struct page *memmap, unsigned long nr_pages)
  * set.  If this is <=0, then that means that the passed-in
  * map was not consumed and must be freed.
  */
-int __meminit sparse_add_one_section(struct zone *zone, unsigned long start_pfn,
-			   int nr_pages)
+int __meminit sparse_add_one_section(struct zone *zone, unsigned long start_pfn)
 {
 	unsigned long section_nr = pfn_to_section_nr(start_pfn);
 	struct pglist_data *pgdat = zone->zone_pgdat;
@@ -702,12 +699,12 @@ int __meminit sparse_add_one_section(struct zone *zone, unsigned long start_pfn,
 	ret = sparse_index_init(section_nr, pgdat->node_id);
 	if (ret < 0 && ret != -EEXIST)
 		return ret;
-	memmap = kmalloc_section_memmap(section_nr, pgdat->node_id, nr_pages);
+	memmap = kmalloc_section_memmap(section_nr, pgdat->node_id);
 	if (!memmap)
 		return -ENOMEM;
 	usemap = __kmalloc_section_usemap();
 	if (!usemap) {
-		__kfree_section_memmap(memmap, nr_pages);
+		__kfree_section_memmap(memmap);
 		return -ENOMEM;
 	}
 
@@ -719,7 +716,7 @@ int __meminit sparse_add_one_section(struct zone *zone, unsigned long start_pfn,
 		goto out;
 	}
 
-	memset(memmap, 0, sizeof(struct page) * nr_pages);
+	memset(memmap, 0, sizeof(struct page) * PAGES_PER_SECTION);
 
 	ms->section_mem_map |= SECTION_MARKED_PRESENT;
 
@@ -729,7 +726,7 @@ out:
 	pgdat_resize_unlock(pgdat, &flags);
 	if (ret <= 0) {
 		kfree(usemap);
-		__kfree_section_memmap(memmap, nr_pages);
+		__kfree_section_memmap(memmap);
 	}
 	return ret;
 }
@@ -771,7 +768,7 @@ static void free_section_usemap(struct page *memmap, unsigned long *usemap)
 	if (PageSlab(usemap_page) || PageCompound(usemap_page)) {
 		kfree(usemap);
 		if (memmap)
-			__kfree_section_memmap(memmap, PAGES_PER_SECTION);
+			__kfree_section_memmap(memmap);
 		return;
 	}
 
-- 
cgit v1.2.3


From 46c77e2bb07eba3b38edfec76873f12942c49dd3 Mon Sep 17 00:00:00 2001
From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Date: Tue, 12 Nov 2013 15:07:50 -0800
Subject: tools/vm/page-types.c: support KPF_SOFTDIRTY bit

Soft dirty bit allows us to track which pages are written since the last
clear_ref (by "echo 4 > /proc/pid/clear_refs".) This is useful for
userspace applications to know their memory footprints.

Note that the kernel exposes this flag via bit[55] of /proc/pid/pagemap,
and the semantics is not a default one (scheduled to be the default in the
near future.) However, it shifts to the new semantics at the first
clear_ref, and the users of soft dirty bit always do it before utilizing
the bit, so that's not a big deal.  Users must avoid relying on the bit in
page-types before the first clear_ref.

Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Cc: Cyrill Gorcunov <gorcunov@openvz.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kernel-page-flags.h |  1 +
 tools/vm/page-types.c             | 32 ++++++++++++++++++++------------
 2 files changed, 21 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kernel-page-flags.h b/include/linux/kernel-page-flags.h
index 546eb6a76934..f65ce09784f1 100644
--- a/include/linux/kernel-page-flags.h
+++ b/include/linux/kernel-page-flags.h
@@ -15,5 +15,6 @@
 #define KPF_OWNER_PRIVATE	37
 #define KPF_ARCH		38
 #define KPF_UNCACHED		39
+#define KPF_SOFTDIRTY		40
 
 #endif /* LINUX_KERNEL_PAGE_FLAGS_H */
diff --git a/tools/vm/page-types.c b/tools/vm/page-types.c
index 71c9c2511ee7..d5e9d6d185c8 100644
--- a/tools/vm/page-types.c
+++ b/tools/vm/page-types.c
@@ -59,12 +59,14 @@
 #define PM_PSHIFT_BITS      6
 #define PM_PSHIFT_OFFSET    (PM_STATUS_OFFSET - PM_PSHIFT_BITS)
 #define PM_PSHIFT_MASK      (((1LL << PM_PSHIFT_BITS) - 1) << PM_PSHIFT_OFFSET)
-#define PM_PSHIFT(x)        (((u64) (x) << PM_PSHIFT_OFFSET) & PM_PSHIFT_MASK)
+#define __PM_PSHIFT(x)      (((uint64_t) (x) << PM_PSHIFT_OFFSET) & PM_PSHIFT_MASK)
 #define PM_PFRAME_MASK      ((1LL << PM_PSHIFT_OFFSET) - 1)
 #define PM_PFRAME(x)        ((x) & PM_PFRAME_MASK)
 
+#define __PM_SOFT_DIRTY      (1LL)
 #define PM_PRESENT          PM_STATUS(4LL)
 #define PM_SWAP             PM_STATUS(2LL)
+#define PM_SOFT_DIRTY       __PM_PSHIFT(__PM_SOFT_DIRTY)
 
 
 /*
@@ -83,6 +85,7 @@
 #define KPF_OWNER_PRIVATE	37
 #define KPF_ARCH		38
 #define KPF_UNCACHED		39
+#define KPF_SOFTDIRTY		40
 
 /* [48-] take some arbitrary free slots for expanding overloaded flags
  * not part of kernel API
@@ -132,6 +135,7 @@ static const char * const page_flag_names[] = {
 	[KPF_OWNER_PRIVATE]	= "O:owner_private",
 	[KPF_ARCH]		= "h:arch",
 	[KPF_UNCACHED]		= "c:uncached",
+	[KPF_SOFTDIRTY]		= "f:softdirty",
 
 	[KPF_READAHEAD]		= "I:readahead",
 	[KPF_SLOB_FREE]		= "P:slob_free",
@@ -417,7 +421,7 @@ static int bit_mask_ok(uint64_t flags)
 	return 1;
 }
 
-static uint64_t expand_overloaded_flags(uint64_t flags)
+static uint64_t expand_overloaded_flags(uint64_t flags, uint64_t pme)
 {
 	/* SLOB/SLUB overload several page flags */
 	if (flags & BIT(SLAB)) {
@@ -433,6 +437,9 @@ static uint64_t expand_overloaded_flags(uint64_t flags)
 	if ((flags & (BIT(RECLAIM) | BIT(WRITEBACK))) == BIT(RECLAIM))
 		flags ^= BIT(RECLAIM) | BIT(READAHEAD);
 
+	if (pme & PM_SOFT_DIRTY)
+		flags |= BIT(SOFTDIRTY);
+
 	return flags;
 }
 
@@ -448,11 +455,11 @@ static uint64_t well_known_flags(uint64_t flags)
 	return flags;
 }
 
-static uint64_t kpageflags_flags(uint64_t flags)
+static uint64_t kpageflags_flags(uint64_t flags, uint64_t pme)
 {
-	flags = expand_overloaded_flags(flags);
-
-	if (!opt_raw)
+	if (opt_raw)
+		flags = expand_overloaded_flags(flags, pme);
+	else
 		flags = well_known_flags(flags);
 
 	return flags;
@@ -545,9 +552,9 @@ static size_t hash_slot(uint64_t flags)
 }
 
 static void add_page(unsigned long voffset,
-		     unsigned long offset, uint64_t flags)
+		     unsigned long offset, uint64_t flags, uint64_t pme)
 {
-	flags = kpageflags_flags(flags);
+	flags = kpageflags_flags(flags, pme);
 
 	if (!bit_mask_ok(flags))
 		return;
@@ -569,7 +576,8 @@ static void add_page(unsigned long voffset,
 #define KPAGEFLAGS_BATCH	(64 << 10)	/* 64k pages */
 static void walk_pfn(unsigned long voffset,
 		     unsigned long index,
-		     unsigned long count)
+		     unsigned long count,
+		     uint64_t pme)
 {
 	uint64_t buf[KPAGEFLAGS_BATCH];
 	unsigned long batch;
@@ -583,7 +591,7 @@ static void walk_pfn(unsigned long voffset,
 			break;
 
 		for (i = 0; i < pages; i++)
-			add_page(voffset + i, index + i, buf[i]);
+			add_page(voffset + i, index + i, buf[i], pme);
 
 		index += pages;
 		count -= pages;
@@ -608,7 +616,7 @@ static void walk_vma(unsigned long index, unsigned long count)
 		for (i = 0; i < pages; i++) {
 			pfn = pagemap_pfn(buf[i]);
 			if (pfn)
-				walk_pfn(index + i, pfn, 1);
+				walk_pfn(index + i, pfn, 1, buf[i]);
 		}
 
 		index += pages;
@@ -659,7 +667,7 @@ static void walk_addr_ranges(void)
 
 	for (i = 0; i < nr_addr_ranges; i++)
 		if (!opt_pid)
-			walk_pfn(0, opt_offset[i], opt_size[i]);
+			walk_pfn(0, opt_offset[i], opt_size[i], 0);
 		else
 			walk_task(opt_offset[i], opt_size[i]);
 
-- 
cgit v1.2.3


From c4a391b53a72d2df4ee97f96f78c1d5971b47489 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 12 Nov 2013 15:07:51 -0800
Subject: writeback: do not sync data dirtied after sync start

When there are processes heavily creating small files while sync(2) is
running, it can easily happen that quite some new files are created
between WB_SYNC_NONE and WB_SYNC_ALL pass of sync(2).  That can happen
especially if there are several busy filesystems (remember that sync
traverses filesystems sequentially and waits in WB_SYNC_ALL phase on one
fs before starting it on another fs).  Because WB_SYNC_ALL pass is slow
(e.g.  causes a transaction commit and cache flush for each inode in
ext3), resulting sync(2) times are rather large.

The following script reproduces the problem:

  function run_writers
  {
    for (( i = 0; i < 10; i++ )); do
      mkdir $1/dir$i
      for (( j = 0; j < 40000; j++ )); do
        dd if=/dev/zero of=$1/dir$i/$j bs=4k count=4 &>/dev/null
      done &
    done
  }

  for dir in "$@"; do
    run_writers $dir
  done

  sleep 40
  time sync

Fix the problem by disregarding inodes dirtied after sync(2) was called
in the WB_SYNC_ALL pass.  To allow for this, sync_inodes_sb() now takes
a time stamp when sync has started which is used for setting up work for
flusher threads.

To give some numbers, when above script is run on two ext4 filesystems
on simple SATA drive, the average sync time from 10 runs is 267.549
seconds with standard deviation 104.799426.  With the patched kernel,
the average sync time from 10 runs is 2.995 seconds with standard
deviation 0.096.

Signed-off-by: Jan Kara <jack@suse.cz>
Reviewed-by: Fengguang Wu <fengguang.wu@intel.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/fs-writeback.c                | 33 ++++++++++++++++++++++-----------
 fs/sync.c                        | 15 +++++++++------
 fs/xfs/xfs_super.c               |  2 +-
 include/linux/writeback.h        |  2 +-
 include/trace/events/writeback.h |  6 +++---
 5 files changed, 36 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 9f4935b8f208..4afdbd6d9678 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -39,13 +39,18 @@
 struct wb_writeback_work {
 	long nr_pages;
 	struct super_block *sb;
-	unsigned long *older_than_this;
+	/*
+	 * Write only inodes dirtied before this time. Don't forget to set
+	 * older_than_this_is_set when you set this.
+	 */
+	unsigned long older_than_this;
 	enum writeback_sync_modes sync_mode;
 	unsigned int tagged_writepages:1;
 	unsigned int for_kupdate:1;
 	unsigned int range_cyclic:1;
 	unsigned int for_background:1;
 	unsigned int for_sync:1;	/* sync(2) WB_SYNC_ALL writeback */
+	unsigned int older_than_this_is_set:1;
 	enum wb_reason reason;		/* why was writeback initiated? */
 
 	struct list_head list;		/* pending work list */
@@ -246,10 +251,10 @@ static int move_expired_inodes(struct list_head *delaying_queue,
 	int do_sb_sort = 0;
 	int moved = 0;
 
+	WARN_ON_ONCE(!work->older_than_this_is_set);
 	while (!list_empty(delaying_queue)) {
 		inode = wb_inode(delaying_queue->prev);
-		if (work->older_than_this &&
-		    inode_dirtied_after(inode, *work->older_than_this))
+		if (inode_dirtied_after(inode, work->older_than_this))
 			break;
 		list_move(&inode->i_wb_list, &tmp);
 		moved++;
@@ -733,6 +738,8 @@ static long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages,
 		.sync_mode	= WB_SYNC_NONE,
 		.range_cyclic	= 1,
 		.reason		= reason,
+		.older_than_this = jiffies,
+		.older_than_this_is_set = 1,
 	};
 
 	spin_lock(&wb->list_lock);
@@ -791,12 +798,13 @@ static long wb_writeback(struct bdi_writeback *wb,
 {
 	unsigned long wb_start = jiffies;
 	long nr_pages = work->nr_pages;
-	unsigned long oldest_jif;
 	struct inode *inode;
 	long progress;
 
-	oldest_jif = jiffies;
-	work->older_than_this = &oldest_jif;
+	if (!work->older_than_this_is_set) {
+		work->older_than_this = jiffies;
+		work->older_than_this_is_set = 1;
+	}
 
 	spin_lock(&wb->list_lock);
 	for (;;) {
@@ -830,10 +838,10 @@ static long wb_writeback(struct bdi_writeback *wb,
 		 * safe.
 		 */
 		if (work->for_kupdate) {
-			oldest_jif = jiffies -
+			work->older_than_this = jiffies -
 				msecs_to_jiffies(dirty_expire_interval * 10);
 		} else if (work->for_background)
-			oldest_jif = jiffies;
+			work->older_than_this = jiffies;
 
 		trace_writeback_start(wb->bdi, work);
 		if (list_empty(&wb->b_io))
@@ -1345,18 +1353,21 @@ EXPORT_SYMBOL(try_to_writeback_inodes_sb);
 
 /**
  * sync_inodes_sb	-	sync sb inode pages
- * @sb: the superblock
+ * @sb:			the superblock
+ * @older_than_this:	timestamp
  *
  * This function writes and waits on any dirty inode belonging to this
- * super_block.
+ * superblock that has been dirtied before given timestamp.
  */
-void sync_inodes_sb(struct super_block *sb)
+void sync_inodes_sb(struct super_block *sb, unsigned long older_than_this)
 {
 	DECLARE_COMPLETION_ONSTACK(done);
 	struct wb_writeback_work work = {
 		.sb		= sb,
 		.sync_mode	= WB_SYNC_ALL,
 		.nr_pages	= LONG_MAX,
+		.older_than_this = older_than_this,
+		.older_than_this_is_set = 1,
 		.range_cyclic	= 0,
 		.done		= &done,
 		.reason		= WB_REASON_SYNC,
diff --git a/fs/sync.c b/fs/sync.c
index 905f3f6b3d85..ff96f99fef64 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -27,10 +27,11 @@
  * wait == 1 case since in that case write_inode() functions do
  * sync_dirty_buffer() and thus effectively write one block at a time.
  */
-static int __sync_filesystem(struct super_block *sb, int wait)
+static int __sync_filesystem(struct super_block *sb, int wait,
+			     unsigned long start)
 {
 	if (wait)
-		sync_inodes_sb(sb);
+		sync_inodes_sb(sb, start);
 	else
 		writeback_inodes_sb(sb, WB_REASON_SYNC);
 
@@ -47,6 +48,7 @@ static int __sync_filesystem(struct super_block *sb, int wait)
 int sync_filesystem(struct super_block *sb)
 {
 	int ret;
+	unsigned long start = jiffies;
 
 	/*
 	 * We need to be protected against the filesystem going from
@@ -60,17 +62,17 @@ int sync_filesystem(struct super_block *sb)
 	if (sb->s_flags & MS_RDONLY)
 		return 0;
 
-	ret = __sync_filesystem(sb, 0);
+	ret = __sync_filesystem(sb, 0, start);
 	if (ret < 0)
 		return ret;
-	return __sync_filesystem(sb, 1);
+	return __sync_filesystem(sb, 1, start);
 }
 EXPORT_SYMBOL_GPL(sync_filesystem);
 
 static void sync_inodes_one_sb(struct super_block *sb, void *arg)
 {
 	if (!(sb->s_flags & MS_RDONLY))
-		sync_inodes_sb(sb);
+		sync_inodes_sb(sb, *((unsigned long *)arg));
 }
 
 static void sync_fs_one_sb(struct super_block *sb, void *arg)
@@ -102,9 +104,10 @@ static void fdatawait_one_bdev(struct block_device *bdev, void *arg)
 SYSCALL_DEFINE0(sync)
 {
 	int nowait = 0, wait = 1;
+	unsigned long start = jiffies;
 
 	wakeup_flusher_threads(0, WB_REASON_SYNC);
-	iterate_supers(sync_inodes_one_sb, NULL);
+	iterate_supers(sync_inodes_one_sb, &start);
 	iterate_supers(sync_fs_one_sb, &nowait);
 	iterate_supers(sync_fs_one_sb, &wait);
 	iterate_bdevs(fdatawrite_one_bdev, NULL);
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 15188cc99449..8968f5036fa1 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -918,7 +918,7 @@ xfs_flush_inodes(
 	struct super_block	*sb = mp->m_super;
 
 	if (down_read_trylock(&sb->s_umount)) {
-		sync_inodes_sb(sb);
+		sync_inodes_sb(sb, jiffies);
 		up_read(&sb->s_umount);
 	}
 }
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 021b8a319b9e..fc0e4320aa6d 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -97,7 +97,7 @@ void writeback_inodes_sb_nr(struct super_block *, unsigned long nr,
 int try_to_writeback_inodes_sb(struct super_block *, enum wb_reason reason);
 int try_to_writeback_inodes_sb_nr(struct super_block *, unsigned long nr,
 				  enum wb_reason reason);
-void sync_inodes_sb(struct super_block *);
+void sync_inodes_sb(struct super_block *sb, unsigned long older_than_this);
 void wakeup_flusher_threads(long nr_pages, enum wb_reason reason);
 void inode_wait_for_writeback(struct inode *inode);
 
diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h
index 464ea82e10db..c7bbbe794e65 100644
--- a/include/trace/events/writeback.h
+++ b/include/trace/events/writeback.h
@@ -287,11 +287,11 @@ TRACE_EVENT(writeback_queue_io,
 		__field(int,		reason)
 	),
 	TP_fast_assign(
-		unsigned long *older_than_this = work->older_than_this;
+		unsigned long older_than_this = work->older_than_this;
 		strncpy(__entry->name, dev_name(wb->bdi->dev), 32);
-		__entry->older	= older_than_this ?  *older_than_this : 0;
+		__entry->older	= older_than_this;
 		__entry->age	= older_than_this ?
-				  (jiffies - *older_than_this) * 1000 / HZ : -1;
+				  (jiffies - older_than_this) * 1000 / HZ : -1;
 		__entry->moved	= moved;
 		__entry->reason	= work->reason;
 	),
-- 
cgit v1.2.3


From 79442ed189acb8b949662676e750eda173c06f9b Mon Sep 17 00:00:00 2001
From: Tang Chen <tangchen@cn.fujitsu.com>
Date: Tue, 12 Nov 2013 15:07:59 -0800
Subject: mm/memblock.c: introduce bottom-up allocation mode

The Linux kernel cannot migrate pages used by the kernel.  As a result,
kernel pages cannot be hot-removed.  So we cannot allocate hotpluggable
memory for the kernel.

ACPI SRAT (System Resource Affinity Table) contains the memory hotplug
info.  But before SRAT is parsed, memblock has already started to allocate
memory for the kernel.  So we need to prevent memblock from doing this.

In a memory hotplug system, any numa node the kernel resides in should be
unhotpluggable.  And for a modern server, each node could have at least
16GB memory.  So memory around the kernel image is highly likely
unhotpluggable.

So the basic idea is: Allocate memory from the end of the kernel image and
to the higher memory.  Since memory allocation before SRAT is parsed won't
be too much, it could highly likely be in the same node with kernel image.

The current memblock can only allocate memory top-down.  So this patch
introduces a new bottom-up allocation mode to allocate memory bottom-up.
And later when we use this allocation direction to allocate memory, we
will limit the start address above the kernel.

Signed-off-by: Tang Chen <tangchen@cn.fujitsu.com>
Signed-off-by: Zhang Yanfei <zhangyanfei@cn.fujitsu.com>
Acked-by: Toshi Kani <toshi.kani@hp.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Wanpeng Li <liwanp@linux.vnet.ibm.com>
Cc: Thomas Renninger <trenn@suse.de>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Jiang Liu <jiang.liu@huawei.com>
Cc: Wen Congyang <wency@cn.fujitsu.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Cc: Taku Izumi <izumi.taku@jp.fujitsu.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Michal Nazarewicz <mina86@mina86.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Kamezawa Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memblock.h | 24 ++++++++++++++
 include/linux/mm.h       |  4 +++
 mm/memblock.c            | 83 ++++++++++++++++++++++++++++++++++++++++++++++--
 3 files changed, 108 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 31e95acddb4d..77c60e52939d 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -35,6 +35,7 @@ struct memblock_type {
 };
 
 struct memblock {
+	bool bottom_up;  /* is bottom up direction? */
 	phys_addr_t current_limit;
 	struct memblock_type memory;
 	struct memblock_type reserved;
@@ -148,6 +149,29 @@ phys_addr_t memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid)
 
 phys_addr_t memblock_alloc(phys_addr_t size, phys_addr_t align);
 
+#ifdef CONFIG_MOVABLE_NODE
+/*
+ * Set the allocation direction to bottom-up or top-down.
+ */
+static inline void memblock_set_bottom_up(bool enable)
+{
+	memblock.bottom_up = enable;
+}
+
+/*
+ * Check if the allocation direction is bottom-up or not.
+ * if this is true, that said, memblock will allocate memory
+ * in bottom-up direction.
+ */
+static inline bool memblock_bottom_up(void)
+{
+	return memblock.bottom_up;
+}
+#else
+static inline void memblock_set_bottom_up(bool enable) {}
+static inline bool memblock_bottom_up(void) { return false; }
+#endif
+
 /* Flags for memblock_alloc_base() amd __memblock_alloc_base() */
 #define MEMBLOCK_ALLOC_ANYWHERE	(~(phys_addr_t)0)
 #define MEMBLOCK_ALLOC_ACCESSIBLE	0
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 8aa4006b9636..42a35d94b82c 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -50,6 +50,10 @@ extern int sysctl_legacy_va_layout;
 #include <asm/pgtable.h>
 #include <asm/processor.h>
 
+#ifndef __pa_symbol
+#define __pa_symbol(x)  __pa(RELOC_HIDE((unsigned long)(x), 0))
+#endif
+
 extern unsigned long sysctl_user_reserve_kbytes;
 extern unsigned long sysctl_admin_reserve_kbytes;
 
diff --git a/mm/memblock.c b/mm/memblock.c
index accff1087137..53e477bb5558 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -20,6 +20,8 @@
 #include <linux/seq_file.h>
 #include <linux/memblock.h>
 
+#include <asm-generic/sections.h>
+
 static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock;
 static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock;
 
@@ -32,6 +34,7 @@ struct memblock memblock __initdata_memblock = {
 	.reserved.cnt		= 1,	/* empty dummy entry */
 	.reserved.max		= INIT_MEMBLOCK_REGIONS,
 
+	.bottom_up		= false,
 	.current_limit		= MEMBLOCK_ALLOC_ANYWHERE,
 };
 
@@ -82,6 +85,38 @@ static long __init_memblock memblock_overlaps_region(struct memblock_type *type,
 	return (i < type->cnt) ? i : -1;
 }
 
+/*
+ * __memblock_find_range_bottom_up - find free area utility in bottom-up
+ * @start: start of candidate range
+ * @end: end of candidate range, can be %MEMBLOCK_ALLOC_{ANYWHERE|ACCESSIBLE}
+ * @size: size of free area to find
+ * @align: alignment of free area to find
+ * @nid: nid of the free area to find, %MAX_NUMNODES for any node
+ *
+ * Utility called from memblock_find_in_range_node(), find free area bottom-up.
+ *
+ * RETURNS:
+ * Found address on success, 0 on failure.
+ */
+static phys_addr_t __init_memblock
+__memblock_find_range_bottom_up(phys_addr_t start, phys_addr_t end,
+				phys_addr_t size, phys_addr_t align, int nid)
+{
+	phys_addr_t this_start, this_end, cand;
+	u64 i;
+
+	for_each_free_mem_range(i, nid, &this_start, &this_end, NULL) {
+		this_start = clamp(this_start, start, end);
+		this_end = clamp(this_end, start, end);
+
+		cand = round_up(this_start, align);
+		if (cand < this_end && this_end - cand >= size)
+			return cand;
+	}
+
+	return 0;
+}
+
 /**
  * __memblock_find_range_top_down - find free area utility, in top-down
  * @start: start of candidate range
@@ -93,7 +128,7 @@ static long __init_memblock memblock_overlaps_region(struct memblock_type *type,
  * Utility called from memblock_find_in_range_node(), find free area top-down.
  *
  * RETURNS:
- * Found address on success, %0 on failure.
+ * Found address on success, 0 on failure.
  */
 static phys_addr_t __init_memblock
 __memblock_find_range_top_down(phys_addr_t start, phys_addr_t end,
@@ -127,13 +162,24 @@ __memblock_find_range_top_down(phys_addr_t start, phys_addr_t end,
  *
  * Find @size free area aligned to @align in the specified range and node.
  *
+ * When allocation direction is bottom-up, the @start should be greater
+ * than the end of the kernel image. Otherwise, it will be trimmed. The
+ * reason is that we want the bottom-up allocation just near the kernel
+ * image so it is highly likely that the allocated memory and the kernel
+ * will reside in the same node.
+ *
+ * If bottom-up allocation failed, will try to allocate memory top-down.
+ *
  * RETURNS:
- * Found address on success, %0 on failure.
+ * Found address on success, 0 on failure.
  */
 phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t start,
 					phys_addr_t end, phys_addr_t size,
 					phys_addr_t align, int nid)
 {
+	int ret;
+	phys_addr_t kernel_end;
+
 	/* pump up @end */
 	if (end == MEMBLOCK_ALLOC_ACCESSIBLE)
 		end = memblock.current_limit;
@@ -141,6 +187,37 @@ phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t start,
 	/* avoid allocating the first page */
 	start = max_t(phys_addr_t, start, PAGE_SIZE);
 	end = max(start, end);
+	kernel_end = __pa_symbol(_end);
+
+	/*
+	 * try bottom-up allocation only when bottom-up mode
+	 * is set and @end is above the kernel image.
+	 */
+	if (memblock_bottom_up() && end > kernel_end) {
+		phys_addr_t bottom_up_start;
+
+		/* make sure we will allocate above the kernel */
+		bottom_up_start = max(start, kernel_end);
+
+		/* ok, try bottom-up allocation first */
+		ret = __memblock_find_range_bottom_up(bottom_up_start, end,
+						      size, align, nid);
+		if (ret)
+			return ret;
+
+		/*
+		 * we always limit bottom-up allocation above the kernel,
+		 * but top-down allocation doesn't have the limit, so
+		 * retrying top-down allocation may succeed when bottom-up
+		 * allocation failed.
+		 *
+		 * bottom-up allocation is expected to be fail very rarely,
+		 * so we use WARN_ONCE() here to see the stack trace if
+		 * fail happens.
+		 */
+		WARN_ONCE(1, "memblock: bottom-up allocation failed, "
+			     "memory hotunplug may be affected\n");
+	}
 
 	return __memblock_find_range_top_down(start, end, size, align, nid);
 }
@@ -155,7 +232,7 @@ phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t start,
  * Find @size free area aligned to @align in the specified range.
  *
  * RETURNS:
- * Found address on success, %0 on failure.
+ * Found address on success, 0 on failure.
  */
 phys_addr_t __init_memblock memblock_find_in_range(phys_addr_t start,
 					phys_addr_t end, phys_addr_t size,
-- 
cgit v1.2.3


From 00619bcc44d6b779aa366130b354153c222e4380 Mon Sep 17 00:00:00 2001
From: Jerome Marchand <jmarchan@redhat.com>
Date: Tue, 12 Nov 2013 15:08:31 -0800
Subject: mm: factor commit limit calculation

The same calculation is currently done in three differents places.
Factor that code so future changes has to be made at only one place.

[akpm@linux-foundation.org: uninline vm_commit_limit()]
Signed-off-by: Jerome Marchand <jmarchan@redhat.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Michal Hocko <mhocko@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/meminfo.c    |  5 +----
 include/linux/mman.h |  2 ++
 mm/mmap.c            |  4 +---
 mm/nommu.c           |  3 +--
 mm/util.c            | 13 +++++++++++++
 5 files changed, 18 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 59d85d608898..c805d5b69ba1 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -24,7 +24,6 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
 {
 	struct sysinfo i;
 	unsigned long committed;
-	unsigned long allowed;
 	struct vmalloc_info vmi;
 	long cached;
 	unsigned long pages[NR_LRU_LISTS];
@@ -37,8 +36,6 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
 	si_meminfo(&i);
 	si_swapinfo(&i);
 	committed = percpu_counter_read_positive(&vm_committed_as);
-	allowed = ((totalram_pages - hugetlb_total_pages())
-		* sysctl_overcommit_ratio / 100) + total_swap_pages;
 
 	cached = global_page_state(NR_FILE_PAGES) -
 			total_swapcache_pages() - i.bufferram;
@@ -147,7 +144,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
 		K(global_page_state(NR_UNSTABLE_NFS)),
 		K(global_page_state(NR_BOUNCE)),
 		K(global_page_state(NR_WRITEBACK_TEMP)),
-		K(allowed),
+		K(vm_commit_limit()),
 		K(committed),
 		(unsigned long)VMALLOC_TOTAL >> 10,
 		vmi.used >> 10,
diff --git a/include/linux/mman.h b/include/linux/mman.h
index 92dc257251e4..7f7f8dae4b1d 100644
--- a/include/linux/mman.h
+++ b/include/linux/mman.h
@@ -87,4 +87,6 @@ calc_vm_flag_bits(unsigned long flags)
 	       _calc_vm_trans(flags, MAP_DENYWRITE,  VM_DENYWRITE ) |
 	       _calc_vm_trans(flags, MAP_LOCKED,     VM_LOCKED    );
 }
+
+unsigned long vm_commit_limit(void);
 #endif /* _LINUX_MMAN_H */
diff --git a/mm/mmap.c b/mm/mmap.c
index 3d3e224be771..803048e9c568 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -179,14 +179,12 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
 		goto error;
 	}
 
-	allowed = (totalram_pages - hugetlb_total_pages())
-	       	* sysctl_overcommit_ratio / 100;
+	allowed = vm_commit_limit();
 	/*
 	 * Reserve some for root
 	 */
 	if (!cap_sys_admin)
 		allowed -= sysctl_admin_reserve_kbytes >> (PAGE_SHIFT - 10);
-	allowed += total_swap_pages;
 
 	/*
 	 * Don't let a single process grow so big a user can't recover
diff --git a/mm/nommu.c b/mm/nommu.c
index ecd1f158548e..d8a957bb9e31 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1948,13 +1948,12 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
 		goto error;
 	}
 
-	allowed = totalram_pages * sysctl_overcommit_ratio / 100;
+	allowed = vm_commit_limit();
 	/*
 	 * Reserve some 3% for root
 	 */
 	if (!cap_sys_admin)
 		allowed -= sysctl_admin_reserve_kbytes >> (PAGE_SHIFT - 10);
-	allowed += total_swap_pages;
 
 	/*
 	 * Don't let a single process grow so big a user can't recover
diff --git a/mm/util.c b/mm/util.c
index eaf63fc2c92f..f7bc2096071c 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -7,6 +7,9 @@
 #include <linux/security.h>
 #include <linux/swap.h>
 #include <linux/swapops.h>
+#include <linux/mman.h>
+#include <linux/hugetlb.h>
+
 #include <asm/uaccess.h>
 
 #include "internal.h"
@@ -398,6 +401,16 @@ struct address_space *page_mapping(struct page *page)
 	return mapping;
 }
 
+/*
+ * Committed memory limit enforced when OVERCOMMIT_NEVER policy is used
+ */
+unsigned long vm_commit_limit(void)
+{
+	return ((totalram_pages - hugetlb_total_pages())
+		* sysctl_overcommit_ratio / 100) + total_swap_pages;
+}
+
+
 /* Tracepoints definitions. */
 EXPORT_TRACEPOINT_SYMBOL(kmalloc);
 EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc);
-- 
cgit v1.2.3


From 72403b4a0fbdf433c1fe0127e49864658f6f6468 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Tue, 12 Nov 2013 15:08:32 -0800
Subject: mm: numa: return the number of base pages altered by protection
 changes

Commit 0255d4918480 ("mm: Account for a THP NUMA hinting update as one
PTE update") was added to account for the number of PTE updates when
marking pages prot_numa.  task_numa_work was using the old return value
to track how much address space had been updated.  Altering the return
value causes the scanner to do more work than it is configured or
documented to in a single unit of work.

This patch reverts that commit and accounts for the number of THP
updates separately in vmstat.  It is up to the administrator to
interpret the pair of values correctly.  This is a straight-forward
operation and likely to only be of interest when actively debugging NUMA
balancing problems.

The impact of this patch is that the NUMA PTE scanner will scan slower
when THP is enabled and workloads may converge slower as a result.  On
the flip size system CPU usage should be lower than recent tests
reported.  This is an illustrative example of a short single JVM specjbb
test

specjbb
                       3.12.0                3.12.0
                      vanilla      acctupdates
TPut 1      26143.00 (  0.00%)     25747.00 ( -1.51%)
TPut 7     185257.00 (  0.00%)    183202.00 ( -1.11%)
TPut 13    329760.00 (  0.00%)    346577.00 (  5.10%)
TPut 19    442502.00 (  0.00%)    460146.00 (  3.99%)
TPut 25    540634.00 (  0.00%)    549053.00 (  1.56%)
TPut 31    512098.00 (  0.00%)    519611.00 (  1.47%)
TPut 37    461276.00 (  0.00%)    474973.00 (  2.97%)
TPut 43    403089.00 (  0.00%)    414172.00 (  2.75%)

              3.12.0      3.12.0
             vanillaacctupdates
User         5169.64     5184.14
System        100.45       80.02
Elapsed       252.75      251.85

Performance is similar but note the reduction in system CPU time.  While
this showed a performance gain, it will not be universal but at least
it'll be behaving as documented.  The vmstats are obviously different but
here is an obvious interpretation of them from mmtests.

                                3.12.0      3.12.0
                               vanillaacctupdates
NUMA page range updates        1408326    11043064
NUMA huge PMD updates                0       21040
NUMA PTE updates               1408326      291624

"NUMA page range updates" == nr_pte_updates and is the value returned to
the NUMA pte scanner.  NUMA huge PMD updates were the number of THP
updates which in combination can be used to calculate how many ptes were
updated from userspace.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Reported-by: Alex Thorlton <athorlton@sgi.com>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/vm_event_item.h |  1 +
 mm/mprotect.c                 | 10 +++++++---
 mm/vmstat.c                   |  1 +
 3 files changed, 9 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index 1855f0a22add..c557c6d096de 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -39,6 +39,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
 		PAGEOUTRUN, ALLOCSTALL, PGROTATED,
 #ifdef CONFIG_NUMA_BALANCING
 		NUMA_PTE_UPDATES,
+		NUMA_HUGE_PTE_UPDATES,
 		NUMA_HINT_FAULTS,
 		NUMA_HINT_FAULTS_LOCAL,
 		NUMA_PAGE_MIGRATE,
diff --git a/mm/mprotect.c b/mm/mprotect.c
index a597f2ffcd6f..26667971c824 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -112,6 +112,7 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
 	pmd_t *pmd;
 	unsigned long next;
 	unsigned long pages = 0;
+	unsigned long nr_huge_updates = 0;
 
 	pmd = pmd_offset(pud, addr);
 	do {
@@ -126,9 +127,10 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
 						newprot, prot_numa);
 
 				if (nr_ptes) {
-					if (nr_ptes == HPAGE_PMD_NR)
-						pages++;
-
+					if (nr_ptes == HPAGE_PMD_NR) {
+						pages += HPAGE_PMD_NR;
+						nr_huge_updates++;
+					}
 					continue;
 				}
 			}
@@ -141,6 +143,8 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
 		pages += this_pages;
 	} while (pmd++, addr = next, addr != end);
 
+	if (nr_huge_updates)
+		count_vm_numa_events(NUMA_HUGE_PTE_UPDATES, nr_huge_updates);
 	return pages;
 }
 
diff --git a/mm/vmstat.c b/mm/vmstat.c
index b6d17edf8cf3..72496140ac08 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -812,6 +812,7 @@ const char * const vmstat_text[] = {
 
 #ifdef CONFIG_NUMA_BALANCING
 	"numa_pte_updates",
+	"numa_huge_pte_updates",
 	"numa_hint_faults",
 	"numa_hint_faults_local",
 	"numa_pages_migrated",
-- 
cgit v1.2.3


From 83460ec8dcac14142e7860a01fa59c267ac4657c Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Tue, 12 Nov 2013 15:08:36 -0800
Subject: syscalls.h: use gcc alias instead of assembler aliases for syscalls

Use standard gcc __attribute__((alias(foo))) to define the syscall aliases
instead of custom assembler macros.

This is far cleaner, and also fixes my LTO kernel build.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Cc: Al Viro <viro@ZenIV.linux.org.uk>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compat.h   | 4 ++--
 include/linux/syscalls.h | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compat.h b/include/linux/compat.h
index 345da00a86e0..ada34c92b684 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -41,14 +41,14 @@
 	COMPAT_SYSCALL_DEFINEx(6, _##name, __VA_ARGS__)
 
 #define COMPAT_SYSCALL_DEFINEx(x, name, ...)				\
-	asmlinkage long compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));\
+	asmlinkage long compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))\
+		__attribute__((alias(__stringify(compat_SyS##name))));  \
 	static inline long C_SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__));\
 	asmlinkage long compat_SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__));\
 	asmlinkage long compat_SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__))\
 	{								\
 		return C_SYSC##name(__MAP(x,__SC_DELOUSE,__VA_ARGS__));	\
 	}								\
-	SYSCALL_ALIAS(compat_sys##name, compat_SyS##name);		\
 	static inline long C_SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__))
 
 #ifndef compat_user_stack_pointer
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 7fac04e7ff6e..c27f846f6b71 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -184,7 +184,8 @@ extern struct trace_event_functions exit_syscall_print_funcs;
 
 #define __PROTECT(...) asmlinkage_protect(__VA_ARGS__)
 #define __SYSCALL_DEFINEx(x, name, ...)					\
-	asmlinkage long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));	\
+	asmlinkage long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))	\
+		__attribute__((alias(__stringify(SyS##name))));		\
 	static inline long SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__));	\
 	asmlinkage long SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__));	\
 	asmlinkage long SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__))	\
@@ -194,7 +195,6 @@ extern struct trace_event_functions exit_syscall_print_funcs;
 		__PROTECT(x, ret,__MAP(x,__SC_ARGS,__VA_ARGS__));	\
 		return ret;						\
 	}								\
-	SYSCALL_ALIAS(sys##name, SyS##name);				\
 	static inline long SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__))
 
 asmlinkage long sys_time(time_t __user *tloc);
-- 
cgit v1.2.3


From 261adc9a609dbfde815337889b9e2c8728959ab8 Mon Sep 17 00:00:00 2001
From: Roel Kluin <roel.kluin@gmail.com>
Date: Tue, 12 Nov 2013 15:08:44 -0800
Subject: jump_label: unlikely(x) > 0

if (unlikely(x) > 0) doesn't seem to help branch prediction

Signed-off-by: Roel Kluin <roel.kluin@gmail.com>
Cc: Raghavendra K T <raghavendra.kt@linux.vnet.ibm.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: "H. Peter Anvin" <hpa@linux.intel.com>
Cc: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/jump_label.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index a5079072da66..cf08540d6204 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -132,14 +132,14 @@ static __always_inline void jump_label_init(void)
 
 static __always_inline bool static_key_false(struct static_key *key)
 {
-	if (unlikely(atomic_read(&key->enabled)) > 0)
+	if (unlikely(atomic_read(&key->enabled) > 0))
 		return true;
 	return false;
 }
 
 static __always_inline bool static_key_true(struct static_key *key)
 {
-	if (likely(atomic_read(&key->enabled)) > 0)
+	if (likely(atomic_read(&key->enabled) > 0))
 		return true;
 	return false;
 }
-- 
cgit v1.2.3


From 27f69e68a5e534412faebc53a4e04acc9ce7fd7e Mon Sep 17 00:00:00 2001
From: Vineet Gupta <Vineet.Gupta1@synopsys.com>
Date: Tue, 12 Nov 2013 15:08:47 -0800
Subject: sched: remove ARCH specific fpu_counter from task_struct

fpu_counter in task_struct was used only by sh/x86.  Both of these now
carry it in ARCH specific thread_struct, hence this can now be removed
from generic task_struct, shrinking it slightly for other arches.

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Paul Mundt <paul.mundt@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h | 9 ---------
 1 file changed, 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 045b0d227846..5e226fe3e512 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1062,15 +1062,6 @@ struct task_struct {
 	struct hlist_head preempt_notifiers;
 #endif
 
-	/*
-	 * fpu_counter contains the number of consecutive context switches
-	 * that the FPU is used. If this is over a threshold, the lazy fpu
-	 * saving becomes unlazy to save the trap. This is an unsigned char
-	 * so that after 256 times the counter wraps and the behavior turns
-	 * lazy again; this to deal with bursty apps that only use FPU for
-	 * a short time
-	 */
-	unsigned char fpu_counter;
 #ifdef CONFIG_BLK_DEV_IO_TRACE
 	unsigned int btrace_seq;
 #endif
-- 
cgit v1.2.3


From 5812c13a4e636da4bd7f7cabbbbc59d9dbf3c86c Mon Sep 17 00:00:00 2001
From: Milo Kim <milo.kim@ti.com>
Date: Tue, 12 Nov 2013 15:08:57 -0800
Subject: backlight: lp855x_bl: support new LP8555 device

LP8555 is one of the LP855x family devices.

This device needs pre_init_device() and post_init_device() driver
structure.  It's same as LP8557, so the device configuration code is
shared with LP8557.  Backlight outputs are generated from dual DC-DC boost
converters.  It's configurable EPROM settings which are defined in the
platform data.

Driver documentation and device tree bindings are updated.

Signed-off-by: Milo Kim <milo.kim@ti.com>
Signed-off-by: Jingoo Han <jg1.han@samsung.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/backlight/lp855x-driver.txt          |  5 ++--
 .../devicetree/bindings/video/backlight/lp855x.txt | 29 +++++++++++++++++++++-
 drivers/video/backlight/Kconfig                    |  4 +--
 drivers/video/backlight/lp855x_bl.c                | 17 +++++++++++--
 include/linux/platform_data/lp855x.h               | 19 ++++++++++++++
 5 files changed, 67 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/backlight/lp855x-driver.txt b/Documentation/backlight/lp855x-driver.txt
index 1c732f0c6758..01bce243d3d7 100644
--- a/Documentation/backlight/lp855x-driver.txt
+++ b/Documentation/backlight/lp855x-driver.txt
@@ -4,7 +4,8 @@ Kernel driver lp855x
 Backlight driver for LP855x ICs
 
 Supported chips:
-	Texas Instruments LP8550, LP8551, LP8552, LP8553, LP8556 and LP8557
+	Texas Instruments LP8550, LP8551, LP8552, LP8553, LP8555, LP8556 and
+	LP8557
 
 Author: Milo(Woogyom) Kim <milo.kim@ti.com>
 
@@ -24,7 +25,7 @@ Value : pwm based or register based
 
 2) chip_id
 The lp855x chip id.
-Value : lp8550/lp8551/lp8552/lp8553/lp8556/lp8557
+Value : lp8550/lp8551/lp8552/lp8553/lp8555/lp8556/lp8557
 
 Platform data for lp855x
 ------------------------
diff --git a/Documentation/devicetree/bindings/video/backlight/lp855x.txt b/Documentation/devicetree/bindings/video/backlight/lp855x.txt
index 1482103d288f..96e83a56048e 100644
--- a/Documentation/devicetree/bindings/video/backlight/lp855x.txt
+++ b/Documentation/devicetree/bindings/video/backlight/lp855x.txt
@@ -2,7 +2,7 @@ lp855x bindings
 
 Required properties:
   - compatible: "ti,lp8550", "ti,lp8551", "ti,lp8552", "ti,lp8553",
-                "ti,lp8556", "ti,lp8557"
+                "ti,lp8555", "ti,lp8556", "ti,lp8557"
   - reg: I2C slave address (u8)
   - dev-ctrl: Value of DEVICE CONTROL register (u8). It depends on the device.
 
@@ -15,6 +15,33 @@ Optional properties:
 
 Example:
 
+	/* LP8555 */
+	backlight@2c {
+		compatible = "ti,lp8555";
+		reg = <0x2c>;
+
+		dev-ctrl = /bits/ 8 <0x00>;
+		pwm-period = <10000>;
+
+		/* 4V OV, 4 output LED0 string enabled */
+		rom_14h {
+			rom-addr = /bits/ 8 <0x14>;
+			rom-val = /bits/ 8 <0xcf>;
+		};
+
+		/* Heavy smoothing, 24ms ramp time step */
+		rom_15h {
+			rom-addr = /bits/ 8 <0x15>;
+			rom-val = /bits/ 8 <0xc7>;
+		};
+
+		/* 4 output LED1 string enabled */
+		rom_19h {
+			rom-addr = /bits/ 8 <0x19>;
+			rom-val = /bits/ 8 <0x0f>;
+		};
+	};
+
 	/* LP8556 */
 	backlight@2c {
 		compatible = "ti,lp8556";
diff --git a/drivers/video/backlight/Kconfig b/drivers/video/backlight/Kconfig
index d4a7a351d67c..a65dd063ecad 100644
--- a/drivers/video/backlight/Kconfig
+++ b/drivers/video/backlight/Kconfig
@@ -388,8 +388,8 @@ config BACKLIGHT_LP855X
 	tristate "Backlight driver for TI LP855X"
 	depends on BACKLIGHT_CLASS_DEVICE && I2C
 	help
-	  This supports TI LP8550, LP8551, LP8552, LP8553, LP8556 and LP8557
-	  backlight driver.
+	  This supports TI LP8550, LP8551, LP8552, LP8553, LP8555, LP8556 and
+	  LP8557 backlight driver.
 
 config BACKLIGHT_LP8788
 	tristate "Backlight driver for TI LP8788 MFD"
diff --git a/drivers/video/backlight/lp855x_bl.c b/drivers/video/backlight/lp855x_bl.c
index c0b41f13bd4a..c952175d4113 100644
--- a/drivers/video/backlight/lp855x_bl.c
+++ b/drivers/video/backlight/lp855x_bl.c
@@ -26,13 +26,15 @@
 #define LP8556_EPROM_START		0xA0
 #define LP8556_EPROM_END		0xAF
 
-/* LP8557 Registers */
+/* LP8555/7 Registers */
 #define LP8557_BL_CMD			0x00
 #define LP8557_BL_MASK			0x01
 #define LP8557_BL_ON			0x01
 #define LP8557_BL_OFF			0x00
 #define LP8557_BRIGHTNESS_CTRL		0x04
 #define LP8557_CONFIG			0x10
+#define LP8555_EPROM_START		0x10
+#define LP8555_EPROM_END		0x7A
 #define LP8557_EPROM_START		0x10
 #define LP8557_EPROM_END		0x1E
 
@@ -111,6 +113,10 @@ static bool lp855x_is_valid_rom_area(struct lp855x *lp, u8 addr)
 		start = LP8556_EPROM_START;
 		end = LP8556_EPROM_END;
 		break;
+	case LP8555:
+		start = LP8555_EPROM_START;
+		end = LP8555_EPROM_END;
+		break;
 	case LP8557:
 		start = LP8557_EPROM_START;
 		end = LP8557_EPROM_END;
@@ -165,9 +171,14 @@ static int lp855x_configure(struct lp855x *lp)
 	struct lp855x_platform_data *pd = lp->pdata;
 
 	switch (lp->chip_id) {
-	case LP8550 ... LP8556:
+	case LP8550:
+	case LP8551:
+	case LP8552:
+	case LP8553:
+	case LP8556:
 		lp->cfg = &lp855x_dev_cfg;
 		break;
+	case LP8555:
 	case LP8557:
 		lp->cfg = &lp8557_dev_cfg;
 		break;
@@ -470,6 +481,7 @@ static const struct of_device_id lp855x_dt_ids[] = {
 	{ .compatible = "ti,lp8551", },
 	{ .compatible = "ti,lp8552", },
 	{ .compatible = "ti,lp8553", },
+	{ .compatible = "ti,lp8555", },
 	{ .compatible = "ti,lp8556", },
 	{ .compatible = "ti,lp8557", },
 	{ }
@@ -481,6 +493,7 @@ static const struct i2c_device_id lp855x_ids[] = {
 	{"lp8551", LP8551},
 	{"lp8552", LP8552},
 	{"lp8553", LP8553},
+	{"lp8555", LP8555},
 	{"lp8556", LP8556},
 	{"lp8557", LP8557},
 	{ }
diff --git a/include/linux/platform_data/lp855x.h b/include/linux/platform_data/lp855x.h
index ea3200527dd3..1b2ba24e4e03 100644
--- a/include/linux/platform_data/lp855x.h
+++ b/include/linux/platform_data/lp855x.h
@@ -40,6 +40,17 @@
 #define LP8553_PWM_CONFIG	LP8550_PWM_CONFIG
 #define LP8553_I2C_CONFIG	LP8550_I2C_CONFIG
 
+/* CONFIG register - LP8555 */
+#define LP8555_PWM_STANDBY	BIT(7)
+#define LP8555_PWM_FILTER	BIT(6)
+#define LP8555_RELOAD_EPROM	BIT(3)	/* use it if EPROMs should be reset
+					   when the backlight turns on */
+#define LP8555_OFF_OPENLEDS	BIT(2)
+#define LP8555_PWM_CONFIG	LP8555_PWM_ONLY
+#define LP8555_I2C_CONFIG	LP8555_I2C_ONLY
+#define LP8555_COMB1_CONFIG	LP8555_COMBINED1
+#define LP8555_COMB2_CONFIG	LP8555_COMBINED2
+
 /* DEVICE CONTROL register - LP8556 */
 #define LP8556_PWM_CONFIG	(LP8556_PWM_ONLY << BRT_MODE_SHFT)
 #define LP8556_COMB1_CONFIG	(LP8556_COMBINED1 << BRT_MODE_SHFT)
@@ -65,6 +76,7 @@ enum lp855x_chip_id {
 	LP8551,
 	LP8552,
 	LP8553,
+	LP8555,
 	LP8556,
 	LP8557,
 };
@@ -89,6 +101,13 @@ enum lp8553_brighntess_source {
 	LP8553_I2C_ONLY = LP8550_I2C_ONLY,
 };
 
+enum lp8555_brightness_source {
+	LP8555_PWM_ONLY,
+	LP8555_I2C_ONLY,
+	LP8555_COMBINED1,	/* Brightness register with shaped PWM */
+	LP8555_COMBINED2,	/* PWM with shaped brightness register */
+};
+
 enum lp8556_brightness_source {
 	LP8556_PWM_ONLY,
 	LP8556_COMBINED1,	/* pwm + i2c before the shaper block */
-- 
cgit v1.2.3


From 28e64a68a2ef1c48f30e8b6803725199929069fc Mon Sep 17 00:00:00 2001
From: Daniel Jeong <gshark.jeong@gmail.com>
Date: Tue, 12 Nov 2013 15:08:58 -0800
Subject: backlight: lm3630: apply chip revision

The LM3630 chip was revised by TI and chip name was also changed to
LM3630A.  And register map, default values and initial sequences are
changed.  The files, lm3630_bl.{c,h} are replaced by lm3630a_bl.{c,h} You
can find more information about LM3630A(datasheet, evm etc) at
http://www.ti.com/product/lm3630a

Signed-off-by: Daniel Jeong <gshark.jeong@gmail.com>
Signed-off-by: Jingoo Han <jg1.han@samsung.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/backlight/Kconfig          |   6 +-
 drivers/video/backlight/Makefile         |   2 +-
 drivers/video/backlight/lm3630_bl.c      | 475 ------------------------------
 drivers/video/backlight/lm3630a_bl.c     | 483 +++++++++++++++++++++++++++++++
 include/linux/platform_data/lm3630_bl.h  |  57 ----
 include/linux/platform_data/lm3630a_bl.h |  65 +++++
 6 files changed, 552 insertions(+), 536 deletions(-)
 delete mode 100644 drivers/video/backlight/lm3630_bl.c
 create mode 100644 drivers/video/backlight/lm3630a_bl.c
 delete mode 100644 include/linux/platform_data/lm3630_bl.h
 create mode 100644 include/linux/platform_data/lm3630a_bl.h

(limited to 'include/linux')

diff --git a/drivers/video/backlight/Kconfig b/drivers/video/backlight/Kconfig
index a65dd063ecad..5a3eb2ecb525 100644
--- a/drivers/video/backlight/Kconfig
+++ b/drivers/video/backlight/Kconfig
@@ -368,12 +368,12 @@ config BACKLIGHT_AAT2870
 	  If you have a AnalogicTech AAT2870 say Y to enable the
 	  backlight driver.
 
-config BACKLIGHT_LM3630
-	tristate "Backlight Driver for LM3630"
+config BACKLIGHT_LM3630A
+	tristate "Backlight Driver for LM3630A"
 	depends on BACKLIGHT_CLASS_DEVICE && I2C
 	select REGMAP_I2C
 	help
-	  This supports TI LM3630 Backlight Driver
+	  This supports TI LM3630A Backlight Driver
 
 config BACKLIGHT_LM3639
 	tristate "Backlight Driver for LM3639"
diff --git a/drivers/video/backlight/Makefile b/drivers/video/backlight/Makefile
index 38e1babb1946..bb820024f346 100644
--- a/drivers/video/backlight/Makefile
+++ b/drivers/video/backlight/Makefile
@@ -37,7 +37,7 @@ obj-$(CONFIG_BACKLIGHT_GPIO)		+= gpio_backlight.o
 obj-$(CONFIG_BACKLIGHT_HP680)		+= hp680_bl.o
 obj-$(CONFIG_BACKLIGHT_HP700)		+= jornada720_bl.o
 obj-$(CONFIG_BACKLIGHT_LM3533)		+= lm3533_bl.o
-obj-$(CONFIG_BACKLIGHT_LM3630)		+= lm3630_bl.o
+obj-$(CONFIG_BACKLIGHT_LM3630A)		+= lm3630a_bl.o
 obj-$(CONFIG_BACKLIGHT_LM3639)		+= lm3639_bl.o
 obj-$(CONFIG_BACKLIGHT_LOCOMO)		+= locomolcd.o
 obj-$(CONFIG_BACKLIGHT_LP855X)		+= lp855x_bl.o
diff --git a/drivers/video/backlight/lm3630_bl.c b/drivers/video/backlight/lm3630_bl.c
deleted file mode 100644
index 76a62e978fc3..000000000000
--- a/drivers/video/backlight/lm3630_bl.c
+++ /dev/null
@@ -1,475 +0,0 @@
-/*
-* Simple driver for Texas Instruments LM3630 Backlight driver chip
-* Copyright (C) 2012 Texas Instruments
-*
-* This program is free software; you can redistribute it and/or modify
-* it under the terms of the GNU General Public License version 2 as
-* published by the Free Software Foundation.
-*
-*/
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/i2c.h>
-#include <linux/backlight.h>
-#include <linux/err.h>
-#include <linux/delay.h>
-#include <linux/uaccess.h>
-#include <linux/interrupt.h>
-#include <linux/regmap.h>
-#include <linux/platform_data/lm3630_bl.h>
-
-#define REG_CTRL	0x00
-#define REG_CONFIG	0x01
-#define REG_BRT_A	0x03
-#define REG_BRT_B	0x04
-#define REG_INT_STATUS	0x09
-#define REG_INT_EN	0x0A
-#define REG_FAULT	0x0B
-#define REG_PWM_OUTLOW	0x12
-#define REG_PWM_OUTHIGH	0x13
-#define REG_MAX		0x1F
-
-#define INT_DEBOUNCE_MSEC	10
-
-enum lm3630_leds {
-	BLED_ALL = 0,
-	BLED_1,
-	BLED_2
-};
-
-static const char * const bled_name[] = {
-	[BLED_ALL] = "lm3630_bled",	/*Bank1 controls all string */
-	[BLED_1] = "lm3630_bled1",	/*Bank1 controls bled1 */
-	[BLED_2] = "lm3630_bled2",	/*Bank1 or 2 controls bled2 */
-};
-
-struct lm3630_chip_data {
-	struct device *dev;
-	struct delayed_work work;
-	int irq;
-	struct workqueue_struct *irqthread;
-	struct lm3630_platform_data *pdata;
-	struct backlight_device *bled1;
-	struct backlight_device *bled2;
-	struct regmap *regmap;
-};
-
-/* initialize chip */
-static int lm3630_chip_init(struct lm3630_chip_data *pchip)
-{
-	int ret;
-	unsigned int reg_val;
-	struct lm3630_platform_data *pdata = pchip->pdata;
-
-	/*pwm control */
-	reg_val = ((pdata->pwm_active & 0x01) << 2) | (pdata->pwm_ctrl & 0x03);
-	ret = regmap_update_bits(pchip->regmap, REG_CONFIG, 0x07, reg_val);
-	if (ret < 0)
-		goto out;
-
-	/* bank control */
-	reg_val = ((pdata->bank_b_ctrl & 0x01) << 1) |
-			(pdata->bank_a_ctrl & 0x07);
-	ret = regmap_update_bits(pchip->regmap, REG_CTRL, 0x07, reg_val);
-	if (ret < 0)
-		goto out;
-
-	ret = regmap_update_bits(pchip->regmap, REG_CTRL, 0x80, 0x00);
-	if (ret < 0)
-		goto out;
-
-	/* set initial brightness */
-	if (pdata->bank_a_ctrl != BANK_A_CTRL_DISABLE) {
-		ret = regmap_write(pchip->regmap,
-				   REG_BRT_A, pdata->init_brt_led1);
-		if (ret < 0)
-			goto out;
-	}
-
-	if (pdata->bank_b_ctrl != BANK_B_CTRL_DISABLE) {
-		ret = regmap_write(pchip->regmap,
-				   REG_BRT_B, pdata->init_brt_led2);
-		if (ret < 0)
-			goto out;
-	}
-	return ret;
-
-out:
-	dev_err(pchip->dev, "i2c failed to access register\n");
-	return ret;
-}
-
-/* interrupt handling */
-static void lm3630_delayed_func(struct work_struct *work)
-{
-	int ret;
-	unsigned int reg_val;
-	struct lm3630_chip_data *pchip;
-
-	pchip = container_of(work, struct lm3630_chip_data, work.work);
-
-	ret = regmap_read(pchip->regmap, REG_INT_STATUS, &reg_val);
-	if (ret < 0) {
-		dev_err(pchip->dev,
-			"i2c failed to access REG_INT_STATUS Register\n");
-		return;
-	}
-
-	dev_info(pchip->dev, "REG_INT_STATUS Register is 0x%x\n", reg_val);
-}
-
-static irqreturn_t lm3630_isr_func(int irq, void *chip)
-{
-	int ret;
-	struct lm3630_chip_data *pchip = chip;
-	unsigned long delay = msecs_to_jiffies(INT_DEBOUNCE_MSEC);
-
-	queue_delayed_work(pchip->irqthread, &pchip->work, delay);
-
-	ret = regmap_update_bits(pchip->regmap, REG_CTRL, 0x80, 0x00);
-	if (ret < 0)
-		goto out;
-
-	return IRQ_HANDLED;
-out:
-	dev_err(pchip->dev, "i2c failed to access register\n");
-	return IRQ_HANDLED;
-}
-
-static int lm3630_intr_config(struct lm3630_chip_data *pchip)
-{
-	INIT_DELAYED_WORK(&pchip->work, lm3630_delayed_func);
-	pchip->irqthread = create_singlethread_workqueue("lm3630-irqthd");
-	if (!pchip->irqthread) {
-		dev_err(pchip->dev, "create irq thread fail...\n");
-		return -1;
-	}
-	if (request_threaded_irq
-	    (pchip->irq, NULL, lm3630_isr_func,
-	     IRQF_TRIGGER_FALLING | IRQF_ONESHOT, "lm3630_irq", pchip)) {
-		dev_err(pchip->dev, "request threaded irq fail..\n");
-		return -1;
-	}
-	return 0;
-}
-
-static bool
-set_intensity(struct backlight_device *bl, struct lm3630_chip_data *pchip)
-{
-	if (!pchip->pdata->pwm_set_intensity)
-		return false;
-	pchip->pdata->pwm_set_intensity(bl->props.brightness - 1,
-					pchip->pdata->pwm_period);
-	return true;
-}
-
-/* update and get brightness */
-static int lm3630_bank_a_update_status(struct backlight_device *bl)
-{
-	int ret;
-	struct lm3630_chip_data *pchip = bl_get_data(bl);
-	enum lm3630_pwm_ctrl pwm_ctrl = pchip->pdata->pwm_ctrl;
-
-	/* brightness 0 means disable */
-	if (!bl->props.brightness) {
-		ret = regmap_update_bits(pchip->regmap, REG_CTRL, 0x04, 0x00);
-		if (ret < 0)
-			goto out;
-		return bl->props.brightness;
-	}
-
-	/* pwm control */
-	if (pwm_ctrl == PWM_CTRL_BANK_A || pwm_ctrl == PWM_CTRL_BANK_ALL) {
-		if (!set_intensity(bl, pchip))
-			dev_err(pchip->dev, "No pwm control func. in plat-data\n");
-	} else {
-
-		/* i2c control */
-		ret = regmap_update_bits(pchip->regmap, REG_CTRL, 0x80, 0x00);
-		if (ret < 0)
-			goto out;
-		mdelay(1);
-		ret = regmap_write(pchip->regmap,
-				   REG_BRT_A, bl->props.brightness - 1);
-		if (ret < 0)
-			goto out;
-	}
-	return bl->props.brightness;
-out:
-	dev_err(pchip->dev, "i2c failed to access REG_CTRL\n");
-	return bl->props.brightness;
-}
-
-static int lm3630_bank_a_get_brightness(struct backlight_device *bl)
-{
-	unsigned int reg_val;
-	int brightness, ret;
-	struct lm3630_chip_data *pchip = bl_get_data(bl);
-	enum lm3630_pwm_ctrl pwm_ctrl = pchip->pdata->pwm_ctrl;
-
-	if (pwm_ctrl == PWM_CTRL_BANK_A || pwm_ctrl == PWM_CTRL_BANK_ALL) {
-		ret = regmap_read(pchip->regmap, REG_PWM_OUTHIGH, &reg_val);
-		if (ret < 0)
-			goto out;
-		brightness = reg_val & 0x01;
-		ret = regmap_read(pchip->regmap, REG_PWM_OUTLOW, &reg_val);
-		if (ret < 0)
-			goto out;
-		brightness = ((brightness << 8) | reg_val) + 1;
-	} else {
-		ret = regmap_update_bits(pchip->regmap, REG_CTRL, 0x80, 0x00);
-		if (ret < 0)
-			goto out;
-		mdelay(1);
-		ret = regmap_read(pchip->regmap, REG_BRT_A, &reg_val);
-		if (ret < 0)
-			goto out;
-		brightness = reg_val + 1;
-	}
-	bl->props.brightness = brightness;
-	return bl->props.brightness;
-out:
-	dev_err(pchip->dev, "i2c failed to access register\n");
-	return 0;
-}
-
-static const struct backlight_ops lm3630_bank_a_ops = {
-	.options = BL_CORE_SUSPENDRESUME,
-	.update_status = lm3630_bank_a_update_status,
-	.get_brightness = lm3630_bank_a_get_brightness,
-};
-
-static int lm3630_bank_b_update_status(struct backlight_device *bl)
-{
-	int ret;
-	struct lm3630_chip_data *pchip = bl_get_data(bl);
-	enum lm3630_pwm_ctrl pwm_ctrl = pchip->pdata->pwm_ctrl;
-
-	if (pwm_ctrl == PWM_CTRL_BANK_B || pwm_ctrl == PWM_CTRL_BANK_ALL) {
-		if (!set_intensity(bl, pchip))
-			dev_err(pchip->dev,
-				"no pwm control func. in plat-data\n");
-	} else {
-		ret = regmap_update_bits(pchip->regmap, REG_CTRL, 0x80, 0x00);
-		if (ret < 0)
-			goto out;
-		mdelay(1);
-		ret = regmap_write(pchip->regmap,
-				   REG_BRT_B, bl->props.brightness - 1);
-	}
-	return bl->props.brightness;
-out:
-	dev_err(pchip->dev, "i2c failed to access register\n");
-	return bl->props.brightness;
-}
-
-static int lm3630_bank_b_get_brightness(struct backlight_device *bl)
-{
-	unsigned int reg_val;
-	int brightness, ret;
-	struct lm3630_chip_data *pchip = bl_get_data(bl);
-	enum lm3630_pwm_ctrl pwm_ctrl = pchip->pdata->pwm_ctrl;
-
-	if (pwm_ctrl == PWM_CTRL_BANK_B || pwm_ctrl == PWM_CTRL_BANK_ALL) {
-		ret = regmap_read(pchip->regmap, REG_PWM_OUTHIGH, &reg_val);
-		if (ret < 0)
-			goto out;
-		brightness = reg_val & 0x01;
-		ret = regmap_read(pchip->regmap, REG_PWM_OUTLOW, &reg_val);
-		if (ret < 0)
-			goto out;
-		brightness = ((brightness << 8) | reg_val) + 1;
-	} else {
-		ret = regmap_update_bits(pchip->regmap, REG_CTRL, 0x80, 0x00);
-		if (ret < 0)
-			goto out;
-		mdelay(1);
-		ret = regmap_read(pchip->regmap, REG_BRT_B, &reg_val);
-		if (ret < 0)
-			goto out;
-		brightness = reg_val + 1;
-	}
-	bl->props.brightness = brightness;
-
-	return bl->props.brightness;
-out:
-	dev_err(pchip->dev, "i2c failed to access register\n");
-	return bl->props.brightness;
-}
-
-static const struct backlight_ops lm3630_bank_b_ops = {
-	.options = BL_CORE_SUSPENDRESUME,
-	.update_status = lm3630_bank_b_update_status,
-	.get_brightness = lm3630_bank_b_get_brightness,
-};
-
-static int lm3630_backlight_register(struct lm3630_chip_data *pchip,
-				     enum lm3630_leds ledno)
-{
-	const char *name = bled_name[ledno];
-	struct backlight_properties props;
-	struct lm3630_platform_data *pdata = pchip->pdata;
-
-	props.type = BACKLIGHT_RAW;
-	switch (ledno) {
-	case BLED_1:
-	case BLED_ALL:
-		props.brightness = pdata->init_brt_led1;
-		props.max_brightness = pdata->max_brt_led1;
-		pchip->bled1 =
-		    backlight_device_register(name, pchip->dev, pchip,
-					      &lm3630_bank_a_ops, &props);
-		if (IS_ERR(pchip->bled1))
-			return PTR_ERR(pchip->bled1);
-		break;
-	case BLED_2:
-		props.brightness = pdata->init_brt_led2;
-		props.max_brightness = pdata->max_brt_led2;
-		pchip->bled2 =
-		    backlight_device_register(name, pchip->dev, pchip,
-					      &lm3630_bank_b_ops, &props);
-		if (IS_ERR(pchip->bled2))
-			return PTR_ERR(pchip->bled2);
-		break;
-	}
-	return 0;
-}
-
-static void lm3630_backlight_unregister(struct lm3630_chip_data *pchip)
-{
-	if (pchip->bled1)
-		backlight_device_unregister(pchip->bled1);
-	if (pchip->bled2)
-		backlight_device_unregister(pchip->bled2);
-}
-
-static const struct regmap_config lm3630_regmap = {
-	.reg_bits = 8,
-	.val_bits = 8,
-	.max_register = REG_MAX,
-};
-
-static int lm3630_probe(struct i2c_client *client,
-				  const struct i2c_device_id *id)
-{
-	struct lm3630_platform_data *pdata = client->dev.platform_data;
-	struct lm3630_chip_data *pchip;
-	int ret;
-
-	if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) {
-		dev_err(&client->dev, "fail : i2c functionality check...\n");
-		return -EOPNOTSUPP;
-	}
-
-	if (pdata == NULL) {
-		dev_err(&client->dev, "fail : no platform data.\n");
-		return -ENODATA;
-	}
-
-	pchip = devm_kzalloc(&client->dev, sizeof(struct lm3630_chip_data),
-			     GFP_KERNEL);
-	if (!pchip)
-		return -ENOMEM;
-	pchip->pdata = pdata;
-	pchip->dev = &client->dev;
-
-	pchip->regmap = devm_regmap_init_i2c(client, &lm3630_regmap);
-	if (IS_ERR(pchip->regmap)) {
-		ret = PTR_ERR(pchip->regmap);
-		dev_err(&client->dev, "fail : allocate register map: %d\n",
-			ret);
-		return ret;
-	}
-	i2c_set_clientdata(client, pchip);
-
-	/* chip initialize */
-	ret = lm3630_chip_init(pchip);
-	if (ret < 0) {
-		dev_err(&client->dev, "fail : init chip\n");
-		goto err_chip_init;
-	}
-
-	switch (pdata->bank_a_ctrl) {
-	case BANK_A_CTRL_ALL:
-		ret = lm3630_backlight_register(pchip, BLED_ALL);
-		pdata->bank_b_ctrl = BANK_B_CTRL_DISABLE;
-		break;
-	case BANK_A_CTRL_LED1:
-		ret = lm3630_backlight_register(pchip, BLED_1);
-		break;
-	case BANK_A_CTRL_LED2:
-		ret = lm3630_backlight_register(pchip, BLED_2);
-		pdata->bank_b_ctrl = BANK_B_CTRL_DISABLE;
-		break;
-	default:
-		break;
-	}
-
-	if (ret < 0)
-		goto err_bl_reg;
-
-	if (pdata->bank_b_ctrl && pchip->bled2 == NULL) {
-		ret = lm3630_backlight_register(pchip, BLED_2);
-		if (ret < 0)
-			goto err_bl_reg;
-	}
-
-	/* interrupt enable  : irq 0 is not allowed for lm3630 */
-	pchip->irq = client->irq;
-	if (pchip->irq)
-		lm3630_intr_config(pchip);
-
-	dev_info(&client->dev, "LM3630 backlight register OK.\n");
-	return 0;
-
-err_bl_reg:
-	dev_err(&client->dev, "fail : backlight register.\n");
-	lm3630_backlight_unregister(pchip);
-err_chip_init:
-	return ret;
-}
-
-static int lm3630_remove(struct i2c_client *client)
-{
-	int ret;
-	struct lm3630_chip_data *pchip = i2c_get_clientdata(client);
-
-	ret = regmap_write(pchip->regmap, REG_BRT_A, 0);
-	if (ret < 0)
-		dev_err(pchip->dev, "i2c failed to access register\n");
-
-	ret = regmap_write(pchip->regmap, REG_BRT_B, 0);
-	if (ret < 0)
-		dev_err(pchip->dev, "i2c failed to access register\n");
-
-	lm3630_backlight_unregister(pchip);
-	if (pchip->irq) {
-		free_irq(pchip->irq, pchip);
-		flush_workqueue(pchip->irqthread);
-		destroy_workqueue(pchip->irqthread);
-	}
-	return 0;
-}
-
-static const struct i2c_device_id lm3630_id[] = {
-	{LM3630_NAME, 0},
-	{}
-};
-
-MODULE_DEVICE_TABLE(i2c, lm3630_id);
-
-static struct i2c_driver lm3630_i2c_driver = {
-	.driver = {
-		   .name = LM3630_NAME,
-		   },
-	.probe = lm3630_probe,
-	.remove = lm3630_remove,
-	.id_table = lm3630_id,
-};
-
-module_i2c_driver(lm3630_i2c_driver);
-
-MODULE_DESCRIPTION("Texas Instruments Backlight driver for LM3630");
-MODULE_AUTHOR("G.Shark Jeong <gshark.jeong@gmail.com>");
-MODULE_AUTHOR("Daniel Jeong <daniel.jeong@ti.com>");
-MODULE_LICENSE("GPL v2");
diff --git a/drivers/video/backlight/lm3630a_bl.c b/drivers/video/backlight/lm3630a_bl.c
new file mode 100644
index 000000000000..cf40cc8e662b
--- /dev/null
+++ b/drivers/video/backlight/lm3630a_bl.c
@@ -0,0 +1,483 @@
+/*
+* Simple driver for Texas Instruments LM3630A Backlight driver chip
+* Copyright (C) 2012 Texas Instruments
+*
+* This program is free software; you can redistribute it and/or modify
+* it under the terms of the GNU General Public License version 2 as
+* published by the Free Software Foundation.
+*
+*/
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/i2c.h>
+#include <linux/backlight.h>
+#include <linux/err.h>
+#include <linux/delay.h>
+#include <linux/uaccess.h>
+#include <linux/interrupt.h>
+#include <linux/regmap.h>
+#include <linux/pwm.h>
+#include <linux/platform_data/lm3630a_bl.h>
+
+#define REG_CTRL	0x00
+#define REG_BOOST	0x02
+#define REG_CONFIG	0x01
+#define REG_BRT_A	0x03
+#define REG_BRT_B	0x04
+#define REG_I_A		0x05
+#define REG_I_B		0x06
+#define REG_INT_STATUS	0x09
+#define REG_INT_EN	0x0A
+#define REG_FAULT	0x0B
+#define REG_PWM_OUTLOW	0x12
+#define REG_PWM_OUTHIGH	0x13
+#define REG_MAX		0x1F
+
+#define INT_DEBOUNCE_MSEC	10
+struct lm3630a_chip {
+	struct device *dev;
+	struct delayed_work work;
+
+	int irq;
+	struct workqueue_struct *irqthread;
+	struct lm3630a_platform_data *pdata;
+	struct backlight_device *bleda;
+	struct backlight_device *bledb;
+	struct regmap *regmap;
+	struct pwm_device *pwmd;
+};
+
+/* i2c access */
+static int lm3630a_read(struct lm3630a_chip *pchip, unsigned int reg)
+{
+	int rval;
+	unsigned int reg_val;
+
+	rval = regmap_read(pchip->regmap, reg, &reg_val);
+	if (rval < 0)
+		return rval;
+	return reg_val & 0xFF;
+}
+
+static int lm3630a_write(struct lm3630a_chip *pchip,
+			 unsigned int reg, unsigned int data)
+{
+	return regmap_write(pchip->regmap, reg, data);
+}
+
+static int lm3630a_update(struct lm3630a_chip *pchip,
+			  unsigned int reg, unsigned int mask,
+			  unsigned int data)
+{
+	return regmap_update_bits(pchip->regmap, reg, mask, data);
+}
+
+/* initialize chip */
+static int lm3630a_chip_init(struct lm3630a_chip *pchip)
+{
+	int rval;
+	struct lm3630a_platform_data *pdata = pchip->pdata;
+
+	usleep_range(1000, 2000);
+	/* set Filter Strength Register */
+	rval = lm3630a_write(pchip, 0x50, 0x03);
+	/* set Cofig. register */
+	rval |= lm3630a_update(pchip, REG_CONFIG, 0x07, pdata->pwm_ctrl);
+	/* set boost control */
+	rval |= lm3630a_write(pchip, REG_BOOST, 0x38);
+	/* set current A */
+	rval |= lm3630a_update(pchip, REG_I_A, 0x1F, 0x1F);
+	/* set current B */
+	rval |= lm3630a_write(pchip, REG_I_B, 0x1F);
+	/* set control */
+	rval |=
+	    lm3630a_write(pchip, REG_CTRL, pdata->leda_ctrl | pdata->ledb_ctrl);
+	usleep_range(1000, 2000);
+	/* set brightness A and B */
+	rval |= lm3630a_write(pchip, REG_BRT_A, pdata->leda_init_brt);
+	rval |= lm3630a_write(pchip, REG_BRT_B, pdata->ledb_init_brt);
+
+	if (rval < 0)
+		dev_err(pchip->dev, "i2c failed to access register\n");
+	return rval;
+}
+
+/* interrupt handling */
+static void lm3630a_delayed_func(struct work_struct *work)
+{
+	unsigned int rval;
+	struct lm3630a_chip *pchip;
+
+	pchip = container_of(work, struct lm3630a_chip, work.work);
+
+	rval = lm3630a_read(pchip, REG_INT_STATUS);
+	if (rval < 0) {
+		dev_err(pchip->dev,
+			"i2c failed to access REG_INT_STATUS Register\n");
+		return;
+	}
+
+	dev_info(pchip->dev, "REG_INT_STATUS Register is 0x%x\n", rval);
+}
+
+static irqreturn_t lm3630a_isr_func(int irq, void *chip)
+{
+	int rval;
+	struct lm3630a_chip *pchip = chip;
+	unsigned long delay = msecs_to_jiffies(INT_DEBOUNCE_MSEC);
+
+	queue_delayed_work(pchip->irqthread, &pchip->work, delay);
+
+	rval = lm3630a_update(pchip, REG_CTRL, 0x80, 0x00);
+	if (rval < 0) {
+		dev_err(pchip->dev, "i2c failed to access register\n");
+		return IRQ_NONE;
+	}
+	return IRQ_HANDLED;
+}
+
+static int lm3630a_intr_config(struct lm3630a_chip *pchip)
+{
+	int rval;
+
+	rval = lm3630a_write(pchip, REG_INT_EN, 0x87);
+	if (rval < 0)
+		return rval;
+
+	INIT_DELAYED_WORK(&pchip->work, lm3630a_delayed_func);
+	pchip->irqthread = create_singlethread_workqueue("lm3630a-irqthd");
+	if (!pchip->irqthread) {
+		dev_err(pchip->dev, "create irq thread fail\n");
+		return -ENOMEM;
+	}
+	if (request_threaded_irq
+	    (pchip->irq, NULL, lm3630a_isr_func,
+	     IRQF_TRIGGER_FALLING | IRQF_ONESHOT, "lm3630a_irq", pchip)) {
+		dev_err(pchip->dev, "request threaded irq fail\n");
+		return -ENOMEM;
+	}
+	return rval;
+}
+
+static void lm3630a_pwm_ctrl(struct lm3630a_chip *pchip, int br, int br_max)
+{
+	unsigned int period = pwm_get_period(pchip->pwmd);
+	unsigned int duty = br * period / br_max;
+
+	pwm_config(pchip->pwmd, duty, period);
+	if (duty)
+		pwm_enable(pchip->pwmd);
+	else
+		pwm_disable(pchip->pwmd);
+}
+
+/* update and get brightness */
+static int lm3630a_bank_a_update_status(struct backlight_device *bl)
+{
+	int ret;
+	struct lm3630a_chip *pchip = bl_get_data(bl);
+	enum lm3630a_pwm_ctrl pwm_ctrl = pchip->pdata->pwm_ctrl;
+
+	/* pwm control */
+	if ((pwm_ctrl & LM3630A_PWM_BANK_A) != 0) {
+		lm3630a_pwm_ctrl(pchip, bl->props.brightness,
+				 bl->props.max_brightness);
+		return bl->props.brightness;
+	}
+
+	/* disable sleep */
+	ret = lm3630a_update(pchip, REG_CTRL, 0x80, 0x00);
+	if (ret < 0)
+		goto out_i2c_err;
+	usleep_range(1000, 2000);
+	/* minimum brightness is 0x04 */
+	ret = lm3630a_write(pchip, REG_BRT_A, bl->props.brightness);
+	if (bl->props.brightness < 0x4)
+		ret |= lm3630a_update(pchip, REG_CTRL, LM3630A_LEDA_ENABLE, 0);
+	else
+		ret |= lm3630a_update(pchip, REG_CTRL,
+				      LM3630A_LEDA_ENABLE, LM3630A_LEDA_ENABLE);
+	if (ret < 0)
+		goto out_i2c_err;
+	return bl->props.brightness;
+
+out_i2c_err:
+	dev_err(pchip->dev, "i2c failed to access\n");
+	return bl->props.brightness;
+}
+
+static int lm3630a_bank_a_get_brightness(struct backlight_device *bl)
+{
+	int brightness, rval;
+	struct lm3630a_chip *pchip = bl_get_data(bl);
+	enum lm3630a_pwm_ctrl pwm_ctrl = pchip->pdata->pwm_ctrl;
+
+	if ((pwm_ctrl & LM3630A_PWM_BANK_A) != 0) {
+		rval = lm3630a_read(pchip, REG_PWM_OUTHIGH);
+		if (rval < 0)
+			goto out_i2c_err;
+		brightness = (rval & 0x01) << 8;
+		rval = lm3630a_read(pchip, REG_PWM_OUTLOW);
+		if (rval < 0)
+			goto out_i2c_err;
+		brightness |= rval;
+		goto out;
+	}
+
+	/* disable sleep */
+	rval = lm3630a_update(pchip, REG_CTRL, 0x80, 0x00);
+	if (rval < 0)
+		goto out_i2c_err;
+	usleep_range(1000, 2000);
+	rval = lm3630a_read(pchip, REG_BRT_A);
+	if (rval < 0)
+		goto out_i2c_err;
+	brightness = rval;
+
+out:
+	bl->props.brightness = brightness;
+	return bl->props.brightness;
+out_i2c_err:
+	dev_err(pchip->dev, "i2c failed to access register\n");
+	return 0;
+}
+
+static const struct backlight_ops lm3630a_bank_a_ops = {
+	.options = BL_CORE_SUSPENDRESUME,
+	.update_status = lm3630a_bank_a_update_status,
+	.get_brightness = lm3630a_bank_a_get_brightness,
+};
+
+/* update and get brightness */
+static int lm3630a_bank_b_update_status(struct backlight_device *bl)
+{
+	int ret;
+	struct lm3630a_chip *pchip = bl_get_data(bl);
+	enum lm3630a_pwm_ctrl pwm_ctrl = pchip->pdata->pwm_ctrl;
+
+	/* pwm control */
+	if ((pwm_ctrl & LM3630A_PWM_BANK_B) != 0) {
+		lm3630a_pwm_ctrl(pchip, bl->props.brightness,
+				 bl->props.max_brightness);
+		return bl->props.brightness;
+	}
+
+	/* disable sleep */
+	ret = lm3630a_update(pchip, REG_CTRL, 0x80, 0x00);
+	if (ret < 0)
+		goto out_i2c_err;
+	usleep_range(1000, 2000);
+	/* minimum brightness is 0x04 */
+	ret = lm3630a_write(pchip, REG_BRT_B, bl->props.brightness);
+	if (bl->props.brightness < 0x4)
+		ret |= lm3630a_update(pchip, REG_CTRL, LM3630A_LEDB_ENABLE, 0);
+	else
+		ret |= lm3630a_update(pchip, REG_CTRL,
+				      LM3630A_LEDB_ENABLE, LM3630A_LEDB_ENABLE);
+	if (ret < 0)
+		goto out_i2c_err;
+	return bl->props.brightness;
+
+out_i2c_err:
+	dev_err(pchip->dev, "i2c failed to access REG_CTRL\n");
+	return bl->props.brightness;
+}
+
+static int lm3630a_bank_b_get_brightness(struct backlight_device *bl)
+{
+	int brightness, rval;
+	struct lm3630a_chip *pchip = bl_get_data(bl);
+	enum lm3630a_pwm_ctrl pwm_ctrl = pchip->pdata->pwm_ctrl;
+
+	if ((pwm_ctrl & LM3630A_PWM_BANK_B) != 0) {
+		rval = lm3630a_read(pchip, REG_PWM_OUTHIGH);
+		if (rval < 0)
+			goto out_i2c_err;
+		brightness = (rval & 0x01) << 8;
+		rval = lm3630a_read(pchip, REG_PWM_OUTLOW);
+		if (rval < 0)
+			goto out_i2c_err;
+		brightness |= rval;
+		goto out;
+	}
+
+	/* disable sleep */
+	rval = lm3630a_update(pchip, REG_CTRL, 0x80, 0x00);
+	if (rval < 0)
+		goto out_i2c_err;
+	usleep_range(1000, 2000);
+	rval = lm3630a_read(pchip, REG_BRT_B);
+	if (rval < 0)
+		goto out_i2c_err;
+	brightness = rval;
+
+out:
+	bl->props.brightness = brightness;
+	return bl->props.brightness;
+out_i2c_err:
+	dev_err(pchip->dev, "i2c failed to access register\n");
+	return 0;
+}
+
+static const struct backlight_ops lm3630a_bank_b_ops = {
+	.options = BL_CORE_SUSPENDRESUME,
+	.update_status = lm3630a_bank_b_update_status,
+	.get_brightness = lm3630a_bank_b_get_brightness,
+};
+
+static int lm3630a_backlight_register(struct lm3630a_chip *pchip)
+{
+	struct backlight_properties props;
+	struct lm3630a_platform_data *pdata = pchip->pdata;
+
+	props.type = BACKLIGHT_RAW;
+	if (pdata->leda_ctrl != LM3630A_LEDA_DISABLE) {
+		props.brightness = pdata->leda_init_brt;
+		props.max_brightness = pdata->leda_max_brt;
+		pchip->bleda =
+		    devm_backlight_device_register(pchip->dev, "lm3630a_leda",
+						   pchip->dev, pchip,
+						   &lm3630a_bank_a_ops, &props);
+		if (IS_ERR(pchip->bleda))
+			return PTR_ERR(pchip->bleda);
+	}
+
+	if ((pdata->ledb_ctrl != LM3630A_LEDB_DISABLE) &&
+	    (pdata->ledb_ctrl != LM3630A_LEDB_ON_A)) {
+		props.brightness = pdata->ledb_init_brt;
+		props.max_brightness = pdata->ledb_max_brt;
+		pchip->bledb =
+		    devm_backlight_device_register(pchip->dev, "lm3630a_ledb",
+						   pchip->dev, pchip,
+						   &lm3630a_bank_b_ops, &props);
+		if (IS_ERR(pchip->bledb))
+			return PTR_ERR(pchip->bledb);
+	}
+	return 0;
+}
+
+static const struct regmap_config lm3630a_regmap = {
+	.reg_bits = 8,
+	.val_bits = 8,
+	.max_register = REG_MAX,
+};
+
+static int lm3630a_probe(struct i2c_client *client,
+			 const struct i2c_device_id *id)
+{
+	struct lm3630a_platform_data *pdata = client->dev.platform_data;
+	struct lm3630a_chip *pchip;
+	int rval;
+
+	if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) {
+		dev_err(&client->dev, "fail : i2c functionality check\n");
+		return -EOPNOTSUPP;
+	}
+
+	pchip = devm_kzalloc(&client->dev, sizeof(struct lm3630a_chip),
+			     GFP_KERNEL);
+	if (!pchip)
+		return -ENOMEM;
+	pchip->dev = &client->dev;
+
+	pchip->regmap = devm_regmap_init_i2c(client, &lm3630a_regmap);
+	if (IS_ERR(pchip->regmap)) {
+		rval = PTR_ERR(pchip->regmap);
+		dev_err(&client->dev, "fail : allocate reg. map: %d\n", rval);
+		return rval;
+	}
+
+	i2c_set_clientdata(client, pchip);
+	if (pdata == NULL) {
+		pchip->pdata = devm_kzalloc(pchip->dev,
+					    sizeof(struct
+						   lm3630a_platform_data),
+					    GFP_KERNEL);
+		if (pchip->pdata == NULL)
+			return -ENOMEM;
+		/* default values */
+		pchip->pdata->leda_ctrl = LM3630A_LEDA_ENABLE;
+		pchip->pdata->ledb_ctrl = LM3630A_LEDB_ENABLE;
+		pchip->pdata->leda_max_brt = LM3630A_MAX_BRIGHTNESS;
+		pchip->pdata->ledb_max_brt = LM3630A_MAX_BRIGHTNESS;
+		pchip->pdata->leda_init_brt = LM3630A_MAX_BRIGHTNESS;
+		pchip->pdata->ledb_init_brt = LM3630A_MAX_BRIGHTNESS;
+	} else {
+		pchip->pdata = pdata;
+	}
+	/* chip initialize */
+	rval = lm3630a_chip_init(pchip);
+	if (rval < 0) {
+		dev_err(&client->dev, "fail : init chip\n");
+		return rval;
+	}
+	/* backlight register */
+	rval = lm3630a_backlight_register(pchip);
+	if (rval < 0) {
+		dev_err(&client->dev, "fail : backlight register.\n");
+		return rval;
+	}
+	/* pwm */
+	if (pdata->pwm_ctrl != LM3630A_PWM_DISABLE) {
+		pchip->pwmd = devm_pwm_get(pchip->dev, "lm3630a-pwm");
+		if (IS_ERR(pchip->pwmd)) {
+			dev_err(&client->dev, "fail : get pwm device\n");
+			return PTR_ERR(pchip->pwmd);
+		}
+	}
+	pchip->pwmd->period = pdata->pwm_period;
+
+	/* interrupt enable  : irq 0 is not allowed */
+	pchip->irq = client->irq;
+	if (pchip->irq) {
+		rval = lm3630a_intr_config(pchip);
+		if (rval < 0)
+			return rval;
+	}
+	dev_info(&client->dev, "LM3630A backlight register OK.\n");
+	return 0;
+}
+
+static int lm3630a_remove(struct i2c_client *client)
+{
+	int rval;
+	struct lm3630a_chip *pchip = i2c_get_clientdata(client);
+
+	rval = lm3630a_write(pchip, REG_BRT_A, 0);
+	if (rval < 0)
+		dev_err(pchip->dev, "i2c failed to access register\n");
+
+	rval = lm3630a_write(pchip, REG_BRT_B, 0);
+	if (rval < 0)
+		dev_err(pchip->dev, "i2c failed to access register\n");
+
+	if (pchip->irq) {
+		free_irq(pchip->irq, pchip);
+		flush_workqueue(pchip->irqthread);
+		destroy_workqueue(pchip->irqthread);
+	}
+	return 0;
+}
+
+static const struct i2c_device_id lm3630a_id[] = {
+	{LM3630A_NAME, 0},
+	{}
+};
+
+MODULE_DEVICE_TABLE(i2c, lm3630a_id);
+
+static struct i2c_driver lm3630a_i2c_driver = {
+	.driver = {
+		   .name = LM3630A_NAME,
+		   },
+	.probe = lm3630a_probe,
+	.remove = lm3630a_remove,
+	.id_table = lm3630a_id,
+};
+
+module_i2c_driver(lm3630a_i2c_driver);
+
+MODULE_DESCRIPTION("Texas Instruments Backlight driver for LM3630A");
+MODULE_AUTHOR("Daniel Jeong <gshark.jeong@gmail.com>");
+MODULE_AUTHOR("LDD MLP <ldd-mlp@list.ti.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/platform_data/lm3630_bl.h b/include/linux/platform_data/lm3630_bl.h
deleted file mode 100644
index 9176dd3f2d63..000000000000
--- a/include/linux/platform_data/lm3630_bl.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
-* Simple driver for Texas Instruments LM3630 LED Flash driver chip
-* Copyright (C) 2012 Texas Instruments
-*
-* This program is free software; you can redistribute it and/or modify
-* it under the terms of the GNU General Public License version 2 as
-* published by the Free Software Foundation.
-*
-*/
-
-#ifndef __LINUX_LM3630_H
-#define __LINUX_LM3630_H
-
-#define LM3630_NAME "lm3630_bl"
-
-enum lm3630_pwm_ctrl {
-	PWM_CTRL_DISABLE = 0,
-	PWM_CTRL_BANK_A,
-	PWM_CTRL_BANK_B,
-	PWM_CTRL_BANK_ALL,
-};
-
-enum lm3630_pwm_active {
-	PWM_ACTIVE_HIGH = 0,
-	PWM_ACTIVE_LOW,
-};
-
-enum lm3630_bank_a_ctrl {
-	BANK_A_CTRL_DISABLE = 0x0,
-	BANK_A_CTRL_LED1 = 0x4,
-	BANK_A_CTRL_LED2 = 0x1,
-	BANK_A_CTRL_ALL = 0x5,
-};
-
-enum lm3630_bank_b_ctrl {
-	BANK_B_CTRL_DISABLE = 0,
-	BANK_B_CTRL_LED2,
-};
-
-struct lm3630_platform_data {
-
-	/* maximum brightness */
-	int max_brt_led1;
-	int max_brt_led2;
-
-	/* initial on brightness */
-	int init_brt_led1;
-	int init_brt_led2;
-	enum lm3630_pwm_ctrl pwm_ctrl;
-	enum lm3630_pwm_active pwm_active;
-	enum lm3630_bank_a_ctrl bank_a_ctrl;
-	enum lm3630_bank_b_ctrl bank_b_ctrl;
-	unsigned int pwm_period;
-	void (*pwm_set_intensity) (int brightness, int max_brightness);
-};
-
-#endif /* __LINUX_LM3630_H */
diff --git a/include/linux/platform_data/lm3630a_bl.h b/include/linux/platform_data/lm3630a_bl.h
new file mode 100644
index 000000000000..7538e38e270b
--- /dev/null
+++ b/include/linux/platform_data/lm3630a_bl.h
@@ -0,0 +1,65 @@
+/*
+* Simple driver for Texas Instruments LM3630A LED Flash driver chip
+* Copyright (C) 2012 Texas Instruments
+*
+* This program is free software; you can redistribute it and/or modify
+* it under the terms of the GNU General Public License version 2 as
+* published by the Free Software Foundation.
+*
+*/
+
+#ifndef __LINUX_LM3630A_H
+#define __LINUX_LM3630A_H
+
+#define LM3630A_NAME "lm3630a_bl"
+
+enum lm3630a_pwm_ctrl {
+	LM3630A_PWM_DISABLE = 0x00,
+	LM3630A_PWM_BANK_A,
+	LM3630A_PWM_BANK_B,
+	LM3630A_PWM_BANK_ALL,
+	LM3630A_PWM_BANK_A_ACT_LOW = 0x05,
+	LM3630A_PWM_BANK_B_ACT_LOW,
+	LM3630A_PWM_BANK_ALL_ACT_LOW,
+};
+
+enum lm3630a_leda_ctrl {
+	LM3630A_LEDA_DISABLE = 0x00,
+	LM3630A_LEDA_ENABLE = 0x04,
+	LM3630A_LEDA_ENABLE_LINEAR = 0x14,
+};
+
+enum lm3630a_ledb_ctrl {
+	LM3630A_LEDB_DISABLE = 0x00,
+	LM3630A_LEDB_ON_A = 0x01,
+	LM3630A_LEDB_ENABLE = 0x02,
+	LM3630A_LEDB_ENABLE_LINEAR = 0x0A,
+};
+
+#define LM3630A_MAX_BRIGHTNESS 255
+/*
+ *@leda_init_brt : led a init brightness. 4~255
+ *@leda_max_brt  : led a max brightness.  4~255
+ *@leda_ctrl     : led a disable, enable linear, enable exponential
+ *@ledb_init_brt : led b init brightness. 4~255
+ *@ledb_max_brt  : led b max brightness.  4~255
+ *@ledb_ctrl     : led b disable, enable linear, enable exponential
+ *@pwm_period    : pwm period
+ *@pwm_ctrl      : pwm disable, bank a or b, active high or low
+ */
+struct lm3630a_platform_data {
+
+	/* led a config.  */
+	int leda_init_brt;
+	int leda_max_brt;
+	enum lm3630a_leda_ctrl leda_ctrl;
+	/* led b config. */
+	int ledb_init_brt;
+	int ledb_max_brt;
+	enum lm3630a_ledb_ctrl ledb_ctrl;
+	/* pwm config. */
+	unsigned int pwm_period;
+	enum lm3630a_pwm_ctrl pwm_ctrl;
+};
+
+#endif /* __LINUX_LM3630A_H */
-- 
cgit v1.2.3


From 684f0d3d14f2744ae7ad79063b21909e32bf444e Mon Sep 17 00:00:00 2001
From: Nicolin Chen <b42378@freescale.com>
Date: Tue, 12 Nov 2013 15:09:52 -0800
Subject: lib/genalloc: add a helper function for DMA buffer allocation

When using pool space for DMA buffer, there might be duplicated calling of
gen_pool_alloc() and gen_pool_virt_to_phys() in each implementation.

Thus it's better to add a simple helper function, a compatible one to the
common dma_alloc_coherent(), to save some code.

Signed-off-by: Nicolin Chen <b42378@freescale.com>
Cc: "Hans J. Koch" <hjk@hansjkoch.de>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Eric Miao <eric.y.miao@gmail.com>
Cc: Grant Likely <grant.likely@linaro.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Haojian Zhuang <haojian.zhuang@gmail.com>
Cc: Jaroslav Kysela <perex@perex.cz>
Cc: Kevin Hilman <khilman@deeprootsystems.com>
Cc: Liam Girdwood <lgirdwood@gmail.com>
Cc: Mark Brown <broonie@kernel.org>
Cc: Mauro Carvalho Chehab <m.chehab@samsung.com>
Cc: Rob Herring <rob.herring@calxeda.com>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Sekhar Nori <nsekhar@ti.com>
Cc: Takashi Iwai <tiwai@suse.de>
Cc: Vinod Koul <vinod.koul@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/genalloc.h |  2 ++
 lib/genalloc.c           | 28 ++++++++++++++++++++++++++++
 2 files changed, 30 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/genalloc.h b/include/linux/genalloc.h
index f8d41cb1cbe0..1eda33d7cb10 100644
--- a/include/linux/genalloc.h
+++ b/include/linux/genalloc.h
@@ -94,6 +94,8 @@ static inline int gen_pool_add(struct gen_pool *pool, unsigned long addr,
 }
 extern void gen_pool_destroy(struct gen_pool *);
 extern unsigned long gen_pool_alloc(struct gen_pool *, size_t);
+extern void *gen_pool_dma_alloc(struct gen_pool *pool, size_t size,
+		dma_addr_t *dma);
 extern void gen_pool_free(struct gen_pool *, unsigned long, size_t);
 extern void gen_pool_for_each_chunk(struct gen_pool *,
 	void (*)(struct gen_pool *, struct gen_pool_chunk *, void *), void *);
diff --git a/lib/genalloc.c b/lib/genalloc.c
index 26cf20be72b7..dda31168844f 100644
--- a/lib/genalloc.c
+++ b/lib/genalloc.c
@@ -312,6 +312,34 @@ retry:
 }
 EXPORT_SYMBOL(gen_pool_alloc);
 
+/**
+ * gen_pool_dma_alloc - allocate special memory from the pool for DMA usage
+ * @pool: pool to allocate from
+ * @size: number of bytes to allocate from the pool
+ * @dma: dma-view physical address
+ *
+ * Allocate the requested number of bytes from the specified pool.
+ * Uses the pool allocation function (with first-fit algorithm by default).
+ * Can not be used in NMI handler on architectures without
+ * NMI-safe cmpxchg implementation.
+ */
+void *gen_pool_dma_alloc(struct gen_pool *pool, size_t size, dma_addr_t *dma)
+{
+	unsigned long vaddr;
+
+	if (!pool)
+		return NULL;
+
+	vaddr = gen_pool_alloc(pool, size);
+	if (!vaddr)
+		return NULL;
+
+	*dma = gen_pool_virt_to_phys(pool, vaddr);
+
+	return (void *)vaddr;
+}
+EXPORT_SYMBOL(gen_pool_dma_alloc);
+
 /**
  * gen_pool_free - free allocated special memory back to the pool
  * @pool: pool to free to
-- 
cgit v1.2.3


From 008208c6b26f21c2648c250a09c55e737c02c5f8 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Tue, 12 Nov 2013 15:10:01 -0800
Subject: list: introduce list_next_entry() and list_prev_entry()

Add two trivial helpers list_next_entry() and list_prev_entry(), they
can have a lot of users including list.h itself.  In fact the 1st one is
already defined in events/core.c and bnx2x_sp.c, so the patch simply
moves the definition to list.h.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Cc: Eilon Greenstein <eilong@broadcom.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c |  3 ---
 include/linux/list.h                           | 16 ++++++++++++++++
 kernel/events/core.c                           |  3 ---
 3 files changed, 16 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c
index 9fbeee522d2c..32c92abf5094 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c
@@ -1217,9 +1217,6 @@ static void bnx2x_set_one_vlan_mac_e1h(struct bnx2x *bp,
 				     ETH_VLAN_FILTER_CLASSIFY, config);
 }
 
-#define list_next_entry(pos, member) \
-	list_entry((pos)->member.next, typeof(*(pos)), member)
-
 /**
  * bnx2x_vlan_mac_restore - reconfigure next MAC/VLAN/VLAN-MAC element
  *
diff --git a/include/linux/list.h b/include/linux/list.h
index f4d8a2f12a33..2ece63847001 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -372,6 +372,22 @@ static inline void list_splice_tail_init(struct list_head *list,
 #define list_first_entry_or_null(ptr, type, member) \
 	(!list_empty(ptr) ? list_first_entry(ptr, type, member) : NULL)
 
+/**
+ * list_next_entry - get the next element in list
+ * @pos:	the type * to cursor
+ * @member:	the name of the list_struct within the struct.
+ */
+#define list_next_entry(pos, member) \
+	list_entry((pos)->member.next, typeof(*(pos)), member)
+
+/**
+ * list_prev_entry - get the prev element in list
+ * @pos:	the type * to cursor
+ * @member:	the name of the list_struct within the struct.
+ */
+#define list_prev_entry(pos, member) \
+	list_entry((pos)->member.prev, typeof(*(pos)), member)
+
 /**
  * list_for_each	-	iterate over a list
  * @pos:	the &struct list_head to use as a loop cursor.
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 8c875ef6e120..d724e7757cd1 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2234,9 +2234,6 @@ static void __perf_event_sync_stat(struct perf_event *event,
 	perf_event_update_userpage(next_event);
 }
 
-#define list_next_entry(pos, member) \
-	list_entry(pos->member.next, typeof(*pos), member)
-
 static void perf_event_sync_stat(struct perf_event_context *ctx,
 				   struct perf_event_context *next_ctx)
 {
-- 
cgit v1.2.3


From 8120e2e5141a420edee725ff28f18aa264795f7a Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Tue, 12 Nov 2013 15:10:02 -0800
Subject: list: change list_for_each_entry*() to use list_*_entry()

Now that we have list_{next,prev}_entry() we can change
list_for_each_entry*() and list_safe_reset_next() to use the new helpers
to improve the readability.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Cc: Eilon Greenstein <eilong@broadcom.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/list.h | 44 ++++++++++++++++++++++----------------------
 1 file changed, 22 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/list.h b/include/linux/list.h
index 2ece63847001..c88a591d1c02 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -433,8 +433,8 @@ static inline void list_splice_tail_init(struct list_head *list,
  */
 #define list_for_each_entry(pos, head, member)				\
 	for (pos = list_entry((head)->next, typeof(*pos), member);	\
-	     &pos->member != (head); 	\
-	     pos = list_entry(pos->member.next, typeof(*pos), member))
+	     &pos->member != (head);					\
+	     pos = list_next_entry(pos, member))
 
 /**
  * list_for_each_entry_reverse - iterate backwards over list of given type.
@@ -444,8 +444,8 @@ static inline void list_splice_tail_init(struct list_head *list,
  */
 #define list_for_each_entry_reverse(pos, head, member)			\
 	for (pos = list_entry((head)->prev, typeof(*pos), member);	\
-	     &pos->member != (head); 	\
-	     pos = list_entry(pos->member.prev, typeof(*pos), member))
+	     &pos->member != (head); 					\
+	     pos = list_prev_entry(pos, member))
 
 /**
  * list_prepare_entry - prepare a pos entry for use in list_for_each_entry_continue()
@@ -468,9 +468,9 @@ static inline void list_splice_tail_init(struct list_head *list,
  * the current position.
  */
 #define list_for_each_entry_continue(pos, head, member) 		\
-	for (pos = list_entry(pos->member.next, typeof(*pos), member);	\
-	     &pos->member != (head);	\
-	     pos = list_entry(pos->member.next, typeof(*pos), member))
+	for (pos = list_next_entry(pos, member);			\
+	     &pos->member != (head);					\
+	     pos = list_next_entry(pos, member))
 
 /**
  * list_for_each_entry_continue_reverse - iterate backwards from the given point
@@ -482,9 +482,9 @@ static inline void list_splice_tail_init(struct list_head *list,
  * the current position.
  */
 #define list_for_each_entry_continue_reverse(pos, head, member)		\
-	for (pos = list_entry(pos->member.prev, typeof(*pos), member);	\
-	     &pos->member != (head);	\
-	     pos = list_entry(pos->member.prev, typeof(*pos), member))
+	for (pos = list_prev_entry(pos, member);			\
+	     &pos->member != (head);					\
+	     pos = list_prev_entry(pos, member))
 
 /**
  * list_for_each_entry_from - iterate over list of given type from the current point
@@ -495,8 +495,8 @@ static inline void list_splice_tail_init(struct list_head *list,
  * Iterate over list of given type, continuing from current position.
  */
 #define list_for_each_entry_from(pos, head, member) 			\
-	for (; &pos->member != (head);	\
-	     pos = list_entry(pos->member.next, typeof(*pos), member))
+	for (; &pos->member != (head);					\
+	     pos = list_next_entry(pos, member))
 
 /**
  * list_for_each_entry_safe - iterate over list of given type safe against removal of list entry
@@ -507,9 +507,9 @@ static inline void list_splice_tail_init(struct list_head *list,
  */
 #define list_for_each_entry_safe(pos, n, head, member)			\
 	for (pos = list_entry((head)->next, typeof(*pos), member),	\
-		n = list_entry(pos->member.next, typeof(*pos), member);	\
+		n = list_next_entry(pos, member);			\
 	     &pos->member != (head); 					\
-	     pos = n, n = list_entry(n->member.next, typeof(*n), member))
+	     pos = n, n = list_next_entry(n, member))
 
 /**
  * list_for_each_entry_safe_continue - continue list iteration safe against removal
@@ -522,10 +522,10 @@ static inline void list_splice_tail_init(struct list_head *list,
  * safe against removal of list entry.
  */
 #define list_for_each_entry_safe_continue(pos, n, head, member) 		\
-	for (pos = list_entry(pos->member.next, typeof(*pos), member), 		\
-		n = list_entry(pos->member.next, typeof(*pos), member);		\
+	for (pos = list_next_entry(pos, member), 				\
+		n = list_next_entry(pos, member);				\
 	     &pos->member != (head);						\
-	     pos = n, n = list_entry(n->member.next, typeof(*n), member))
+	     pos = n, n = list_next_entry(n, member))
 
 /**
  * list_for_each_entry_safe_from - iterate over list from current point safe against removal
@@ -538,9 +538,9 @@ static inline void list_splice_tail_init(struct list_head *list,
  * removal of list entry.
  */
 #define list_for_each_entry_safe_from(pos, n, head, member) 			\
-	for (n = list_entry(pos->member.next, typeof(*pos), member);		\
+	for (n = list_next_entry(pos, member);					\
 	     &pos->member != (head);						\
-	     pos = n, n = list_entry(n->member.next, typeof(*n), member))
+	     pos = n, n = list_next_entry(n, member))
 
 /**
  * list_for_each_entry_safe_reverse - iterate backwards over list safe against removal
@@ -554,9 +554,9 @@ static inline void list_splice_tail_init(struct list_head *list,
  */
 #define list_for_each_entry_safe_reverse(pos, n, head, member)		\
 	for (pos = list_entry((head)->prev, typeof(*pos), member),	\
-		n = list_entry(pos->member.prev, typeof(*pos), member);	\
+		n = list_prev_entry(pos, member);			\
 	     &pos->member != (head); 					\
-	     pos = n, n = list_entry(n->member.prev, typeof(*n), member))
+	     pos = n, n = list_prev_entry(n, member))
 
 /**
  * list_safe_reset_next - reset a stale list_for_each_entry_safe loop
@@ -571,7 +571,7 @@ static inline void list_splice_tail_init(struct list_head *list,
  * completing the current iteration of the loop body.
  */
 #define list_safe_reset_next(pos, n, member)				\
-	n = list_entry(pos->member.next, typeof(*pos), member)
+	n = list_next_entry(pos, member)
 
 /*
  * Double linked lists with a single pointer list head.
-- 
cgit v1.2.3


From 93be3c2eb3371f022ad88acf1ab6bee8e3c38378 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Tue, 12 Nov 2013 15:10:03 -0800
Subject: list: introduce list_last_entry(), use list_{first,last}_entry()

We already have list_first_entry(), it makes sense to also add
list_last_entry() for consistency.  And we use both helpers in
list_for_each_*().

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Cc: Eilon Greenstein <eilong@broadcom.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/list.h | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/list.h b/include/linux/list.h
index c88a591d1c02..ef9594171062 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -361,6 +361,17 @@ static inline void list_splice_tail_init(struct list_head *list,
 #define list_first_entry(ptr, type, member) \
 	list_entry((ptr)->next, type, member)
 
+/**
+ * list_last_entry - get the last element from a list
+ * @ptr:	the list head to take the element from.
+ * @type:	the type of the struct this is embedded in.
+ * @member:	the name of the list_struct within the struct.
+ *
+ * Note, that list is expected to be not empty.
+ */
+#define list_last_entry(ptr, type, member) \
+	list_entry((ptr)->prev, type, member)
+
 /**
  * list_first_entry_or_null - get the first element from a list
  * @ptr:	the list head to take the element from.
@@ -432,7 +443,7 @@ static inline void list_splice_tail_init(struct list_head *list,
  * @member:	the name of the list_struct within the struct.
  */
 #define list_for_each_entry(pos, head, member)				\
-	for (pos = list_entry((head)->next, typeof(*pos), member);	\
+	for (pos = list_first_entry(head, typeof(*pos), member);	\
 	     &pos->member != (head);					\
 	     pos = list_next_entry(pos, member))
 
@@ -443,7 +454,7 @@ static inline void list_splice_tail_init(struct list_head *list,
  * @member:	the name of the list_struct within the struct.
  */
 #define list_for_each_entry_reverse(pos, head, member)			\
-	for (pos = list_entry((head)->prev, typeof(*pos), member);	\
+	for (pos = list_last_entry(head, typeof(*pos), member);		\
 	     &pos->member != (head); 					\
 	     pos = list_prev_entry(pos, member))
 
@@ -506,7 +517,7 @@ static inline void list_splice_tail_init(struct list_head *list,
  * @member:	the name of the list_struct within the struct.
  */
 #define list_for_each_entry_safe(pos, n, head, member)			\
-	for (pos = list_entry((head)->next, typeof(*pos), member),	\
+	for (pos = list_first_entry(head, typeof(*pos), member),	\
 		n = list_next_entry(pos, member);			\
 	     &pos->member != (head); 					\
 	     pos = n, n = list_next_entry(n, member))
@@ -553,7 +564,7 @@ static inline void list_splice_tail_init(struct list_head *list,
  * of list entry.
  */
 #define list_for_each_entry_safe_reverse(pos, n, head, member)		\
-	for (pos = list_entry((head)->prev, typeof(*pos), member),	\
+	for (pos = list_last_entry(head, typeof(*pos), member),		\
 		n = list_prev_entry(pos, member);			\
 	     &pos->member != (head); 					\
 	     pos = n, n = list_prev_entry(n, member))
-- 
cgit v1.2.3


From 65321547c8be5b00427ac8de23fd15801b68de1f Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Tue, 12 Nov 2013 15:10:19 -0800
Subject: init.h: document the existence of __initconst

Initdata can be const since more than 5 years, using the __initconst
keyword.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/init.h | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/init.h b/include/linux/init.h
index f1c27a71d03c..8e68a64bfe00 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -26,8 +26,8 @@
  * extern int initialize_foobar_device(int, int, int) __init;
  *
  * For initialized data:
- * You should insert __initdata between the variable name and equal
- * sign followed by value, e.g.:
+ * You should insert __initdata or __initconst between the variable name
+ * and equal sign followed by value, e.g.:
  *
  * static int init_variable __initdata = 0;
  * static const char linux_logo[] __initconst = { 0x32, 0x36, ... };
@@ -35,8 +35,6 @@
  * Don't forget to initialize data not at file scope, i.e. within a function,
  * as gcc otherwise puts the data into the bss section and not into the init
  * section.
- * 
- * Also note, that this data cannot be "const".
  */
 
 /* These are for everybody (although not all archs will actually
-- 
cgit v1.2.3


From 5bccae6ec4587044779f0b8e6fcb8f87db4181f0 Mon Sep 17 00:00:00 2001
From: Sangbeom Kim <sbkim73@samsung.com>
Date: Tue, 12 Nov 2013 15:11:04 -0800
Subject: rtc: s5m-rtc: add real-time clock driver for s5m8767

Add real-time clock driver for s5m8767.

Signed-off-by: Sangbeom Kim <sbkim73@samsung.com>
Signed-off-by: Sachin Kamat <sachin.kamat@linaro.org>
Cc: Todd Broch <tbroch@chromium.org>
Cc: Mark Brown <broonie@kernel.org>
Acked-by: Lee Jones <lee.jones@linaro.org>	[mfd parts]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/rtc/Kconfig              |  10 +
 drivers/rtc/Makefile             |   1 +
 drivers/rtc/rtc-s5m.c            | 635 +++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/samsung/core.h |   1 +
 include/linux/mfd/samsung/rtc.h  |  11 +
 5 files changed, 658 insertions(+)
 create mode 100644 drivers/rtc/rtc-s5m.c

(limited to 'include/linux')

diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index 9654aa3c05cb..4f48b9a26aa0 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -497,6 +497,16 @@ config RTC_DRV_RV3029C2
 	  This driver can also be built as a module. If so, the module
 	  will be called rtc-rv3029c2.
 
+config RTC_DRV_S5M
+	tristate "Samsung S5M series"
+	depends on MFD_SEC_CORE
+	help
+	  If you say yes here you will get support for the
+	  RTC of Samsung S5M PMIC series.
+
+	  This driver can also be built as a module. If so, the module
+	  will be called rtc-s5m.
+
 endif # I2C
 
 comment "SPI RTC drivers"
diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile
index 2dff3d2009b5..9312e7965365 100644
--- a/drivers/rtc/Makefile
+++ b/drivers/rtc/Makefile
@@ -107,6 +107,7 @@ obj-$(CONFIG_RTC_DRV_RX8025)	+= rtc-rx8025.o
 obj-$(CONFIG_RTC_DRV_RX8581)	+= rtc-rx8581.o
 obj-$(CONFIG_RTC_DRV_S35390A)	+= rtc-s35390a.o
 obj-$(CONFIG_RTC_DRV_S3C)	+= rtc-s3c.o
+obj-$(CONFIG_RTC_DRV_S5M)	+= rtc-s5m.o
 obj-$(CONFIG_RTC_DRV_SA1100)	+= rtc-sa1100.o
 obj-$(CONFIG_RTC_DRV_SH)	+= rtc-sh.o
 obj-$(CONFIG_RTC_DRV_SNVS)	+= rtc-snvs.o
diff --git a/drivers/rtc/rtc-s5m.c b/drivers/rtc/rtc-s5m.c
new file mode 100644
index 000000000000..b7fd02bc0a14
--- /dev/null
+++ b/drivers/rtc/rtc-s5m.c
@@ -0,0 +1,635 @@
+/*
+ * Copyright (c) 2013 Samsung Electronics Co., Ltd
+ *	http://www.samsung.com
+ *
+ *  Copyright (C) 2013 Google, Inc
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ */
+
+#include <linux/module.h>
+#include <linux/i2c.h>
+#include <linux/slab.h>
+#include <linux/bcd.h>
+#include <linux/bitops.h>
+#include <linux/regmap.h>
+#include <linux/rtc.h>
+#include <linux/delay.h>
+#include <linux/platform_device.h>
+#include <linux/mfd/samsung/core.h>
+#include <linux/mfd/samsung/irq.h>
+#include <linux/mfd/samsung/rtc.h>
+
+struct s5m_rtc_info {
+	struct device *dev;
+	struct sec_pmic_dev *s5m87xx;
+	struct regmap *rtc;
+	struct rtc_device *rtc_dev;
+	int irq;
+	int device_type;
+	int rtc_24hr_mode;
+	bool wtsr_smpl;
+};
+
+static void s5m8767_data_to_tm(u8 *data, struct rtc_time *tm,
+			       int rtc_24hr_mode)
+{
+	tm->tm_sec = data[RTC_SEC] & 0x7f;
+	tm->tm_min = data[RTC_MIN] & 0x7f;
+	if (rtc_24hr_mode) {
+		tm->tm_hour = data[RTC_HOUR] & 0x1f;
+	} else {
+		tm->tm_hour = data[RTC_HOUR] & 0x0f;
+		if (data[RTC_HOUR] & HOUR_PM_MASK)
+			tm->tm_hour += 12;
+	}
+
+	tm->tm_wday = ffs(data[RTC_WEEKDAY] & 0x7f);
+	tm->tm_mday = data[RTC_DATE] & 0x1f;
+	tm->tm_mon = (data[RTC_MONTH] & 0x0f) - 1;
+	tm->tm_year = (data[RTC_YEAR1] & 0x7f) + 100;
+	tm->tm_yday = 0;
+	tm->tm_isdst = 0;
+}
+
+static int s5m8767_tm_to_data(struct rtc_time *tm, u8 *data)
+{
+	data[RTC_SEC] = tm->tm_sec;
+	data[RTC_MIN] = tm->tm_min;
+
+	if (tm->tm_hour >= 12)
+		data[RTC_HOUR] = tm->tm_hour | HOUR_PM_MASK;
+	else
+		data[RTC_HOUR] = tm->tm_hour & ~HOUR_PM_MASK;
+
+	data[RTC_WEEKDAY] = 1 << tm->tm_wday;
+	data[RTC_DATE] = tm->tm_mday;
+	data[RTC_MONTH] = tm->tm_mon + 1;
+	data[RTC_YEAR1] = tm->tm_year > 100 ? (tm->tm_year - 100) : 0;
+
+	if (tm->tm_year < 100) {
+		pr_err("s5m8767 RTC cannot handle the year %d.\n",
+		       1900 + tm->tm_year);
+		return -EINVAL;
+	} else {
+		return 0;
+	}
+}
+
+static inline int s5m8767_rtc_set_time_reg(struct s5m_rtc_info *info)
+{
+	int ret;
+	unsigned int data;
+
+	ret = regmap_read(info->rtc, SEC_RTC_UDR_CON, &data);
+	if (ret < 0) {
+		dev_err(info->dev, "failed to read update reg(%d)\n", ret);
+		return ret;
+	}
+
+	data |= RTC_TIME_EN_MASK;
+	data |= RTC_UDR_MASK;
+
+	ret = regmap_write(info->rtc, SEC_RTC_UDR_CON, data);
+	if (ret < 0) {
+		dev_err(info->dev, "failed to write update reg(%d)\n", ret);
+		return ret;
+	}
+
+	do {
+		ret = regmap_read(info->rtc, SEC_RTC_UDR_CON, &data);
+	} while ((data & RTC_UDR_MASK) && !ret);
+
+	return ret;
+}
+
+static inline int s5m8767_rtc_set_alarm_reg(struct s5m_rtc_info *info)
+{
+	int ret;
+	unsigned int data;
+
+	ret = regmap_read(info->rtc, SEC_RTC_UDR_CON, &data);
+	if (ret < 0) {
+		dev_err(info->dev, "%s: fail to read update reg(%d)\n",
+			__func__, ret);
+		return ret;
+	}
+
+	data &= ~RTC_TIME_EN_MASK;
+	data |= RTC_UDR_MASK;
+
+	ret = regmap_write(info->rtc, SEC_RTC_UDR_CON, data);
+	if (ret < 0) {
+		dev_err(info->dev, "%s: fail to write update reg(%d)\n",
+			__func__, ret);
+		return ret;
+	}
+
+	do {
+		ret = regmap_read(info->rtc, SEC_RTC_UDR_CON, &data);
+	} while ((data & RTC_UDR_MASK) && !ret);
+
+	return ret;
+}
+
+static void s5m8763_data_to_tm(u8 *data, struct rtc_time *tm)
+{
+	tm->tm_sec = bcd2bin(data[RTC_SEC]);
+	tm->tm_min = bcd2bin(data[RTC_MIN]);
+
+	if (data[RTC_HOUR] & HOUR_12) {
+		tm->tm_hour = bcd2bin(data[RTC_HOUR] & 0x1f);
+		if (data[RTC_HOUR] & HOUR_PM)
+			tm->tm_hour += 12;
+	} else {
+		tm->tm_hour = bcd2bin(data[RTC_HOUR] & 0x3f);
+	}
+
+	tm->tm_wday = data[RTC_WEEKDAY] & 0x07;
+	tm->tm_mday = bcd2bin(data[RTC_DATE]);
+	tm->tm_mon = bcd2bin(data[RTC_MONTH]);
+	tm->tm_year = bcd2bin(data[RTC_YEAR1]) + bcd2bin(data[RTC_YEAR2]) * 100;
+	tm->tm_year -= 1900;
+}
+
+static void s5m8763_tm_to_data(struct rtc_time *tm, u8 *data)
+{
+	data[RTC_SEC] = bin2bcd(tm->tm_sec);
+	data[RTC_MIN] = bin2bcd(tm->tm_min);
+	data[RTC_HOUR] = bin2bcd(tm->tm_hour);
+	data[RTC_WEEKDAY] = tm->tm_wday;
+	data[RTC_DATE] = bin2bcd(tm->tm_mday);
+	data[RTC_MONTH] = bin2bcd(tm->tm_mon);
+	data[RTC_YEAR1] = bin2bcd(tm->tm_year % 100);
+	data[RTC_YEAR2] = bin2bcd((tm->tm_year + 1900) / 100);
+}
+
+static int s5m_rtc_read_time(struct device *dev, struct rtc_time *tm)
+{
+	struct s5m_rtc_info *info = dev_get_drvdata(dev);
+	u8 data[8];
+	int ret;
+
+	ret = regmap_bulk_read(info->rtc, SEC_RTC_SEC, data, 8);
+	if (ret < 0)
+		return ret;
+
+	switch (info->device_type) {
+	case S5M8763X:
+		s5m8763_data_to_tm(data, tm);
+		break;
+
+	case S5M8767X:
+		s5m8767_data_to_tm(data, tm, info->rtc_24hr_mode);
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	dev_dbg(dev, "%s: %d/%d/%d %d:%d:%d(%d)\n", __func__,
+		1900 + tm->tm_year, 1 + tm->tm_mon, tm->tm_mday,
+		tm->tm_hour, tm->tm_min, tm->tm_sec, tm->tm_wday);
+
+	return rtc_valid_tm(tm);
+}
+
+static int s5m_rtc_set_time(struct device *dev, struct rtc_time *tm)
+{
+	struct s5m_rtc_info *info = dev_get_drvdata(dev);
+	u8 data[8];
+	int ret = 0;
+
+	switch (info->device_type) {
+	case S5M8763X:
+		s5m8763_tm_to_data(tm, data);
+		break;
+	case S5M8767X:
+		ret = s5m8767_tm_to_data(tm, data);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (ret < 0)
+		return ret;
+
+	dev_dbg(dev, "%s: %d/%d/%d %d:%d:%d(%d)\n", __func__,
+		1900 + tm->tm_year, 1 + tm->tm_mon, tm->tm_mday,
+		tm->tm_hour, tm->tm_min, tm->tm_sec, tm->tm_wday);
+
+	ret = regmap_raw_write(info->rtc, SEC_RTC_SEC, data, 8);
+	if (ret < 0)
+		return ret;
+
+	ret = s5m8767_rtc_set_time_reg(info);
+
+	return ret;
+}
+
+static int s5m_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
+{
+	struct s5m_rtc_info *info = dev_get_drvdata(dev);
+	u8 data[8];
+	unsigned int val;
+	int ret, i;
+
+	ret = regmap_bulk_read(info->rtc, SEC_ALARM0_SEC, data, 8);
+	if (ret < 0)
+		return ret;
+
+	switch (info->device_type) {
+	case S5M8763X:
+		s5m8763_data_to_tm(data, &alrm->time);
+		ret = regmap_read(info->rtc, SEC_ALARM0_CONF, &val);
+		if (ret < 0)
+			return ret;
+
+		alrm->enabled = !!val;
+
+		ret = regmap_read(info->rtc, SEC_RTC_STATUS, &val);
+		if (ret < 0)
+			return ret;
+
+		break;
+
+	case S5M8767X:
+		s5m8767_data_to_tm(data, &alrm->time, info->rtc_24hr_mode);
+		dev_dbg(dev, "%s: %d/%d/%d %d:%d:%d(%d)\n", __func__,
+			1900 + alrm->time.tm_year, 1 + alrm->time.tm_mon,
+			alrm->time.tm_mday, alrm->time.tm_hour,
+			alrm->time.tm_min, alrm->time.tm_sec,
+			alrm->time.tm_wday);
+
+		alrm->enabled = 0;
+		for (i = 0; i < 7; i++) {
+			if (data[i] & ALARM_ENABLE_MASK) {
+				alrm->enabled = 1;
+				break;
+			}
+		}
+
+		alrm->pending = 0;
+		ret = regmap_read(info->rtc, SEC_RTC_STATUS, &val);
+		if (ret < 0)
+			return ret;
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	if (val & ALARM0_STATUS)
+		alrm->pending = 1;
+	else
+		alrm->pending = 0;
+
+	return 0;
+}
+
+static int s5m_rtc_stop_alarm(struct s5m_rtc_info *info)
+{
+	u8 data[8];
+	int ret, i;
+	struct rtc_time tm;
+
+	ret = regmap_bulk_read(info->rtc, SEC_ALARM0_SEC, data, 8);
+	if (ret < 0)
+		return ret;
+
+	s5m8767_data_to_tm(data, &tm, info->rtc_24hr_mode);
+	dev_dbg(info->dev, "%s: %d/%d/%d %d:%d:%d(%d)\n", __func__,
+		1900 + tm.tm_year, 1 + tm.tm_mon, tm.tm_mday,
+		tm.tm_hour, tm.tm_min, tm.tm_sec, tm.tm_wday);
+
+	switch (info->device_type) {
+	case S5M8763X:
+		ret = regmap_write(info->rtc, SEC_ALARM0_CONF, 0);
+		break;
+
+	case S5M8767X:
+		for (i = 0; i < 7; i++)
+			data[i] &= ~ALARM_ENABLE_MASK;
+
+		ret = regmap_raw_write(info->rtc, SEC_ALARM0_SEC, data, 8);
+		if (ret < 0)
+			return ret;
+
+		ret = s5m8767_rtc_set_alarm_reg(info);
+
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	return ret;
+}
+
+static int s5m_rtc_start_alarm(struct s5m_rtc_info *info)
+{
+	int ret;
+	u8 data[8];
+	u8 alarm0_conf;
+	struct rtc_time tm;
+
+	ret = regmap_bulk_read(info->rtc, SEC_ALARM0_SEC, data, 8);
+	if (ret < 0)
+		return ret;
+
+	s5m8767_data_to_tm(data, &tm, info->rtc_24hr_mode);
+	dev_dbg(info->dev, "%s: %d/%d/%d %d:%d:%d(%d)\n", __func__,
+		1900 + tm.tm_year, 1 + tm.tm_mon, tm.tm_mday,
+		tm.tm_hour, tm.tm_min, tm.tm_sec, tm.tm_wday);
+
+	switch (info->device_type) {
+	case S5M8763X:
+		alarm0_conf = 0x77;
+		ret = regmap_write(info->rtc, SEC_ALARM0_CONF, alarm0_conf);
+		break;
+
+	case S5M8767X:
+		data[RTC_SEC] |= ALARM_ENABLE_MASK;
+		data[RTC_MIN] |= ALARM_ENABLE_MASK;
+		data[RTC_HOUR] |= ALARM_ENABLE_MASK;
+		data[RTC_WEEKDAY] &= ~ALARM_ENABLE_MASK;
+		if (data[RTC_DATE] & 0x1f)
+			data[RTC_DATE] |= ALARM_ENABLE_MASK;
+		if (data[RTC_MONTH] & 0xf)
+			data[RTC_MONTH] |= ALARM_ENABLE_MASK;
+		if (data[RTC_YEAR1] & 0x7f)
+			data[RTC_YEAR1] |= ALARM_ENABLE_MASK;
+
+		ret = regmap_raw_write(info->rtc, SEC_ALARM0_SEC, data, 8);
+		if (ret < 0)
+			return ret;
+		ret = s5m8767_rtc_set_alarm_reg(info);
+
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	return ret;
+}
+
+static int s5m_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
+{
+	struct s5m_rtc_info *info = dev_get_drvdata(dev);
+	u8 data[8];
+	int ret;
+
+	switch (info->device_type) {
+	case S5M8763X:
+		s5m8763_tm_to_data(&alrm->time, data);
+		break;
+
+	case S5M8767X:
+		s5m8767_tm_to_data(&alrm->time, data);
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	dev_dbg(dev, "%s: %d/%d/%d %d:%d:%d(%d)\n", __func__,
+		1900 + alrm->time.tm_year, 1 + alrm->time.tm_mon,
+		alrm->time.tm_mday, alrm->time.tm_hour, alrm->time.tm_min,
+		alrm->time.tm_sec, alrm->time.tm_wday);
+
+	ret = s5m_rtc_stop_alarm(info);
+	if (ret < 0)
+		return ret;
+
+	ret = regmap_raw_write(info->rtc, SEC_ALARM0_SEC, data, 8);
+	if (ret < 0)
+		return ret;
+
+	ret = s5m8767_rtc_set_alarm_reg(info);
+	if (ret < 0)
+		return ret;
+
+	if (alrm->enabled)
+		ret = s5m_rtc_start_alarm(info);
+
+	return ret;
+}
+
+static int s5m_rtc_alarm_irq_enable(struct device *dev,
+				    unsigned int enabled)
+{
+	struct s5m_rtc_info *info = dev_get_drvdata(dev);
+
+	if (enabled)
+		return s5m_rtc_start_alarm(info);
+	else
+		return s5m_rtc_stop_alarm(info);
+}
+
+static irqreturn_t s5m_rtc_alarm_irq(int irq, void *data)
+{
+	struct s5m_rtc_info *info = data;
+
+	rtc_update_irq(info->rtc_dev, 1, RTC_IRQF | RTC_AF);
+
+	return IRQ_HANDLED;
+}
+
+static const struct rtc_class_ops s5m_rtc_ops = {
+	.read_time = s5m_rtc_read_time,
+	.set_time = s5m_rtc_set_time,
+	.read_alarm = s5m_rtc_read_alarm,
+	.set_alarm = s5m_rtc_set_alarm,
+	.alarm_irq_enable = s5m_rtc_alarm_irq_enable,
+};
+
+static void s5m_rtc_enable_wtsr(struct s5m_rtc_info *info, bool enable)
+{
+	int ret;
+	ret = regmap_update_bits(info->rtc, SEC_WTSR_SMPL_CNTL,
+				 WTSR_ENABLE_MASK,
+				 enable ? WTSR_ENABLE_MASK : 0);
+	if (ret < 0)
+		dev_err(info->dev, "%s: fail to update WTSR reg(%d)\n",
+			__func__, ret);
+}
+
+static void s5m_rtc_enable_smpl(struct s5m_rtc_info *info, bool enable)
+{
+	int ret;
+	ret = regmap_update_bits(info->rtc, SEC_WTSR_SMPL_CNTL,
+				 SMPL_ENABLE_MASK,
+				 enable ? SMPL_ENABLE_MASK : 0);
+	if (ret < 0)
+		dev_err(info->dev, "%s: fail to update SMPL reg(%d)\n",
+			__func__, ret);
+}
+
+static int s5m8767_rtc_init_reg(struct s5m_rtc_info *info)
+{
+	u8 data[2];
+	unsigned int tp_read;
+	int ret;
+	struct rtc_time tm;
+
+	ret = regmap_read(info->rtc, SEC_RTC_UDR_CON, &tp_read);
+	if (ret < 0) {
+		dev_err(info->dev, "%s: fail to read control reg(%d)\n",
+			__func__, ret);
+		return ret;
+	}
+
+	/* Set RTC control register : Binary mode, 24hour mode */
+	data[0] = (1 << BCD_EN_SHIFT) | (1 << MODEL24_SHIFT);
+	data[1] = (0 << BCD_EN_SHIFT) | (1 << MODEL24_SHIFT);
+
+	info->rtc_24hr_mode = 1;
+	ret = regmap_raw_write(info->rtc, SEC_ALARM0_CONF, data, 2);
+	if (ret < 0) {
+		dev_err(info->dev, "%s: fail to write controlm reg(%d)\n",
+			__func__, ret);
+		return ret;
+	}
+
+	/* In first boot time, Set rtc time to 1/1/2012 00:00:00(SUN) */
+	if ((tp_read & RTC_TCON_MASK) == 0) {
+		dev_dbg(info->dev, "rtc init\n");
+		tm.tm_sec = 0;
+		tm.tm_min = 0;
+		tm.tm_hour = 0;
+		tm.tm_wday = 0;
+		tm.tm_mday = 1;
+		tm.tm_mon = 0;
+		tm.tm_year = 112;
+		tm.tm_yday = 0;
+		tm.tm_isdst = 0;
+		ret = s5m_rtc_set_time(info->dev, &tm);
+	}
+
+	ret = regmap_update_bits(info->rtc, SEC_RTC_UDR_CON,
+				 RTC_TCON_MASK, tp_read | RTC_TCON_MASK);
+	if (ret < 0)
+		dev_err(info->dev, "%s: fail to update TCON reg(%d)\n",
+			__func__, ret);
+
+	return ret;
+}
+
+static int s5m_rtc_probe(struct platform_device *pdev)
+{
+	struct sec_pmic_dev *s5m87xx = dev_get_drvdata(pdev->dev.parent);
+	struct sec_platform_data *pdata = s5m87xx->pdata;
+	struct s5m_rtc_info *info;
+	int ret;
+
+	if (!pdata) {
+		dev_err(pdev->dev.parent, "Platform data not supplied\n");
+		return -ENODEV;
+	}
+
+	info = devm_kzalloc(&pdev->dev, sizeof(*info), GFP_KERNEL);
+	if (!info)
+		return -ENOMEM;
+
+	info->dev = &pdev->dev;
+	info->s5m87xx = s5m87xx;
+	info->rtc = s5m87xx->rtc;
+	info->device_type = s5m87xx->device_type;
+	info->wtsr_smpl = s5m87xx->wtsr_smpl;
+
+	switch (pdata->device_type) {
+	case S5M8763X:
+		info->irq = s5m87xx->irq_base + S5M8763_IRQ_ALARM0;
+		break;
+
+	case S5M8767X:
+		info->irq = s5m87xx->irq_base + S5M8767_IRQ_RTCA1;
+		break;
+
+	default:
+		ret = -EINVAL;
+		dev_err(&pdev->dev, "Unsupported device type: %d\n", ret);
+		return ret;
+	}
+
+	platform_set_drvdata(pdev, info);
+
+	ret = s5m8767_rtc_init_reg(info);
+
+	if (info->wtsr_smpl) {
+		s5m_rtc_enable_wtsr(info, true);
+		s5m_rtc_enable_smpl(info, true);
+	}
+
+	device_init_wakeup(&pdev->dev, 1);
+
+	info->rtc_dev = devm_rtc_device_register(&pdev->dev, "s5m-rtc",
+						 &s5m_rtc_ops, THIS_MODULE);
+
+	if (IS_ERR(info->rtc_dev))
+		return PTR_ERR(info->rtc_dev);
+
+	ret = devm_request_threaded_irq(&pdev->dev, info->irq, NULL,
+					s5m_rtc_alarm_irq, 0, "rtc-alarm0",
+					info);
+	if (ret < 0)
+		dev_err(&pdev->dev, "Failed to request alarm IRQ: %d: %d\n",
+			info->irq, ret);
+
+	return ret;
+}
+
+static void s5m_rtc_shutdown(struct platform_device *pdev)
+{
+	struct s5m_rtc_info *info = platform_get_drvdata(pdev);
+	int i;
+	unsigned int val = 0;
+	if (info->wtsr_smpl) {
+		for (i = 0; i < 3; i++) {
+			s5m_rtc_enable_wtsr(info, false);
+			regmap_read(info->rtc, SEC_WTSR_SMPL_CNTL, &val);
+			pr_debug("%s: WTSR_SMPL reg(0x%02x)\n", __func__, val);
+			if (val & WTSR_ENABLE_MASK)
+				pr_emerg("%s: fail to disable WTSR\n",
+					 __func__);
+			else {
+				pr_info("%s: success to disable WTSR\n",
+					__func__);
+				break;
+			}
+		}
+	}
+	/* Disable SMPL when power off */
+	s5m_rtc_enable_smpl(info, false);
+}
+
+static const struct platform_device_id s5m_rtc_id[] = {
+	{ "s5m-rtc", 0 },
+};
+
+static struct platform_driver s5m_rtc_driver = {
+	.driver		= {
+		.name	= "s5m-rtc",
+		.owner	= THIS_MODULE,
+	},
+	.probe		= s5m_rtc_probe,
+	.shutdown	= s5m_rtc_shutdown,
+	.id_table	= s5m_rtc_id,
+};
+
+module_platform_driver(s5m_rtc_driver);
+
+/* Module information */
+MODULE_AUTHOR("Sangbeom Kim <sbkim73@samsung.com>");
+MODULE_DESCRIPTION("Samsung S5M RTC driver");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:s5m-rtc");
diff --git a/include/linux/mfd/samsung/core.h b/include/linux/mfd/samsung/core.h
index 378ae8a04c6a..2d0c9071bcfb 100644
--- a/include/linux/mfd/samsung/core.h
+++ b/include/linux/mfd/samsung/core.h
@@ -51,6 +51,7 @@ struct sec_pmic_dev {
 	int ono;
 	int type;
 	bool wakeup;
+	bool wtsr_smpl;
 };
 
 int sec_irq_init(struct sec_pmic_dev *sec_pmic);
diff --git a/include/linux/mfd/samsung/rtc.h b/include/linux/mfd/samsung/rtc.h
index 71597e20cddb..94b7cd6d8891 100644
--- a/include/linux/mfd/samsung/rtc.h
+++ b/include/linux/mfd/samsung/rtc.h
@@ -62,6 +62,11 @@ enum sec_rtc_reg {
 /* RTC Update Register1 */
 #define RTC_UDR_SHIFT		0
 #define RTC_UDR_MASK		(1 << RTC_UDR_SHIFT)
+#define RTC_TCON_SHIFT		1
+#define RTC_TCON_MASK		(1 << RTC_TCON_SHIFT)
+#define RTC_TIME_EN_SHIFT	3
+#define RTC_TIME_EN_MASK	(1 << RTC_TIME_EN_SHIFT)
+
 /* RTC Hour register */
 #define HOUR_PM_SHIFT		6
 #define HOUR_PM_MASK		(1 << HOUR_PM_SHIFT)
@@ -69,6 +74,12 @@ enum sec_rtc_reg {
 #define ALARM_ENABLE_SHIFT	7
 #define ALARM_ENABLE_MASK	(1 << ALARM_ENABLE_SHIFT)
 
+#define SMPL_ENABLE_SHIFT	7
+#define SMPL_ENABLE_MASK	(1 << SMPL_ENABLE_SHIFT)
+
+#define WTSR_ENABLE_SHIFT	6
+#define WTSR_ENABLE_MASK	(1 << WTSR_ENABLE_SHIFT)
+
 enum {
 	RTC_SEC = 0,
 	RTC_MIN,
-- 
cgit v1.2.3


From d049f74f2dbe71354d43d393ac3a188947811348 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Tue, 12 Nov 2013 15:11:17 -0800
Subject: exec/ptrace: fix get_dumpable() incorrect tests

The get_dumpable() return value is not boolean.  Most users of the
function actually want to be testing for non-SUID_DUMP_USER(1) rather than
SUID_DUMP_DISABLE(0).  The SUID_DUMP_ROOT(2) is also considered a
protected state.  Almost all places did this correctly, excepting the two
places fixed in this patch.

Wrong logic:
    if (dumpable == SUID_DUMP_DISABLE) { /* be protective */ }
        or
    if (dumpable == 0) { /* be protective */ }
        or
    if (!dumpable) { /* be protective */ }

Correct logic:
    if (dumpable != SUID_DUMP_USER) { /* be protective */ }
        or
    if (dumpable != 1) { /* be protective */ }

Without this patch, if the system had set the sysctl fs/suid_dumpable=2, a
user was able to ptrace attach to processes that had dropped privileges to
that user.  (This may have been partially mitigated if Yama was enabled.)

The macros have been moved into the file that declares get/set_dumpable(),
which means things like the ia64 code can see them too.

CVE-2013-2929

Reported-by: Vasily Kulikov <segoon@openwall.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: "Luck, Tony" <tony.luck@intel.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/ia64/include/asm/processor.h | 2 +-
 fs/exec.c                         | 6 ++++++
 include/linux/binfmts.h           | 3 ---
 include/linux/sched.h             | 4 ++++
 kernel/ptrace.c                   | 3 ++-
 5 files changed, 13 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/include/asm/processor.h b/arch/ia64/include/asm/processor.h
index e0a899a1a8a6..5a84b3a50741 100644
--- a/arch/ia64/include/asm/processor.h
+++ b/arch/ia64/include/asm/processor.h
@@ -319,7 +319,7 @@ struct thread_struct {
 	regs->loadrs = 0;									\
 	regs->r8 = get_dumpable(current->mm);	/* set "don't zap registers" flag */		\
 	regs->r12 = new_sp - 16;	/* allocate 16 byte scratch area */			\
-	if (unlikely(!get_dumpable(current->mm))) {							\
+	if (unlikely(get_dumpable(current->mm) != SUID_DUMP_USER)) {	\
 		/*										\
 		 * Zap scratch regs to avoid leaking bits between processes with different	\
 		 * uid/privileges.								\
diff --git a/fs/exec.c b/fs/exec.c
index 2ea437e5acf4..12120620f040 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1669,6 +1669,12 @@ int __get_dumpable(unsigned long mm_flags)
 	return (ret > SUID_DUMP_USER) ? SUID_DUMP_ROOT : ret;
 }
 
+/*
+ * This returns the actual value of the suid_dumpable flag. For things
+ * that are using this for checking for privilege transitions, it must
+ * test against SUID_DUMP_USER rather than treating it as a boolean
+ * value.
+ */
 int get_dumpable(struct mm_struct *mm)
 {
 	return __get_dumpable(mm->flags);
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index e8112ae50531..7554fd410bcc 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -99,9 +99,6 @@ extern void setup_new_exec(struct linux_binprm * bprm);
 extern void would_dump(struct linux_binprm *, struct file *);
 
 extern int suid_dumpable;
-#define SUID_DUMP_DISABLE	0	/* No setuid dumping */
-#define SUID_DUMP_USER		1	/* Dump as user of process */
-#define SUID_DUMP_ROOT		2	/* Dump as root */
 
 /* Stack area protections */
 #define EXSTACK_DEFAULT   0	/* Whatever the arch defaults to */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5e226fe3e512..f7efc8604652 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -323,6 +323,10 @@ static inline void arch_pick_mmap_layout(struct mm_struct *mm) {}
 extern void set_dumpable(struct mm_struct *mm, int value);
 extern int get_dumpable(struct mm_struct *mm);
 
+#define SUID_DUMP_DISABLE	0	/* No setuid dumping */
+#define SUID_DUMP_USER		1	/* Dump as user of process */
+#define SUID_DUMP_ROOT		2	/* Dump as root */
+
 /* mm flags */
 /* dumpable bits */
 #define MMF_DUMPABLE      0  /* core dump is permitted */
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index dd562e9aa2c8..1f4bcb3cc21c 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -257,7 +257,8 @@ ok:
 	if (task->mm)
 		dumpable = get_dumpable(task->mm);
 	rcu_read_lock();
-	if (!dumpable && !ptrace_has_cap(__task_cred(task)->user_ns, mode)) {
+	if (dumpable != SUID_DUMP_USER &&
+	    !ptrace_has_cap(__task_cred(task)->user_ns, mode)) {
 		rcu_read_unlock();
 		return -EPERM;
 	}
-- 
cgit v1.2.3


From 1310a5a99d900ee30b9f171146406bde0c6c2bd4 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 12 Nov 2013 15:11:19 -0800
Subject: rbtree: fix rbtree_postorder_for_each_entry_safe() iterator

The iterator rbtree_postorder_for_each_entry_safe() relies on pointer
underflow behavior when testing for loop termination.  In particular it
expects that

  &rb_entry(NULL, type, field)->field

is NULL.  But the result of this expression is not defined by a C standard
and some gcc versions (e.g.  4.3.4) assume the above expression can never
be equal to NULL.  The net result is an oops because the iteration is not
properly terminated.

Fix the problem by modifying the iterator to avoid pointer underflows.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Cody P Schafer <cody@linux.vnet.ibm.com>
Cc: Michel Lespinasse <walken@google.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Artem Bityutskiy <dedekind1@gmail.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Cc: Pablo Neira Ayuso <pablo@netfilter.org>
Cc: Patrick McHardy <kaber@trash.net>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Theodore Ts'o <tytso@mit.edu>
Cc: <stable@vger.kernel.org>		[3.12.x]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/rbtree.h | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h
index aa870a4ddf54..57e75ae9910f 100644
--- a/include/linux/rbtree.h
+++ b/include/linux/rbtree.h
@@ -85,6 +85,11 @@ static inline void rb_link_node(struct rb_node * node, struct rb_node * parent,
 	*rb_link = node;
 }
 
+#define rb_entry_safe(ptr, type, member) \
+	({ typeof(ptr) ____ptr = (ptr); \
+	   ____ptr ? rb_entry(____ptr, type, member) : NULL; \
+	})
+
 /**
  * rbtree_postorder_for_each_entry_safe - iterate over rb_root in post order of
  * given type safe against removal of rb_node entry
@@ -95,12 +100,9 @@ static inline void rb_link_node(struct rb_node * node, struct rb_node * parent,
  * @field:	the name of the rb_node field within 'type'.
  */
 #define rbtree_postorder_for_each_entry_safe(pos, n, root, field) \
-	for (pos = rb_entry(rb_first_postorder(root), typeof(*pos), field),\
-		n = rb_entry(rb_next_postorder(&pos->field), \
-			typeof(*pos), field); \
-	     &pos->field; \
-	     pos = n, \
-		n = rb_entry(rb_next_postorder(&pos->field), \
-			typeof(*pos), field))
+	for (pos = rb_entry_safe(rb_first_postorder(root), typeof(*pos), field); \
+	     pos && ({ n = rb_entry_safe(rb_next_postorder(&pos->field), \
+			typeof(*pos), field); 1; }); \
+	     pos = n)
 
 #endif	/* _LINUX_RBTREE_H */
-- 
cgit v1.2.3


From 4e9b45a19241354daec281d7a785739829b52359 Mon Sep 17 00:00:00 2001
From: Mathias Krause <minipli@googlemail.com>
Date: Tue, 12 Nov 2013 15:11:47 -0800
Subject: ipc, msg: fix message length check for negative values

On 64 bit systems the test for negative message sizes is bogus as the
size, which may be positive when evaluated as a long, will get truncated
to an int when passed to load_msg().  So a long might very well contain a
positive value but when truncated to an int it would become negative.

That in combination with a small negative value of msg_ctlmax (which will
be promoted to an unsigned type for the comparison against msgsz, making
it a big positive value and therefore make it pass the check) will lead to
two problems: 1/ The kmalloc() call in alloc_msg() will allocate a too
small buffer as the addition of alen is effectively a subtraction.  2/ The
copy_from_user() call in load_msg() will first overflow the buffer with
userland data and then, when the userland access generates an access
violation, the fixup handler copy_user_handle_tail() will try to fill the
remainder with zeros -- roughly 4GB.  That almost instantly results in a
system crash or reset.

  ,-[ Reproducer (needs to be run as root) ]--
  | #include <sys/stat.h>
  | #include <sys/msg.h>
  | #include <unistd.h>
  | #include <fcntl.h>
  |
  | int main(void) {
  |     long msg = 1;
  |     int fd;
  |
  |     fd = open("/proc/sys/kernel/msgmax", O_WRONLY);
  |     write(fd, "-1", 2);
  |     close(fd);
  |
  |     msgsnd(0, &msg, 0xfffffff0, IPC_NOWAIT);
  |
  |     return 0;
  | }
  '---

Fix the issue by preventing msgsz from getting truncated by consistently
using size_t for the message length.  This way the size checks in
do_msgsnd() could still be passed with a negative value for msg_ctlmax but
we would fail on the buffer allocation in that case and error out.

Also change the type of m_ts from int to size_t to avoid similar nastiness
in other code paths -- it is used in similar constructs, i.e.  signed vs.
unsigned checks.  It should never become negative under normal
circumstances, though.

Setting msg_ctlmax to a negative value is an odd configuration and should
be prevented.  As that might break existing userland, it will be handled
in a separate commit so it could easily be reverted and reworked without
reintroducing the above described bug.

Hardening mechanisms for user copy operations would have catched that bug
early -- e.g.  checking slab object sizes on user copy operations as the
usercopy feature of the PaX patch does.  Or, for that matter, detect the
long vs.  int sign change due to truncation, as the size overflow plugin
of the very same patch does.

[akpm@linux-foundation.org: fix i386 min() warnings]
Signed-off-by: Mathias Krause <minipli@googlemail.com>
Cc: Pax Team <pageexec@freemail.hu>
Cc: Davidlohr Bueso <davidlohr@hp.com>
Cc: Brad Spengler <spender@grsecurity.net>
Cc: Manfred Spraul <manfred@colorfullife.com>
Cc: <stable@vger.kernel.org>	[ v2.3.27+ -- yes, that old ;) ]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/msg.h |  6 +++---
 ipc/msgutil.c       | 20 ++++++++++----------
 ipc/util.h          |  4 ++--
 3 files changed, 15 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/msg.h b/include/linux/msg.h
index 391af8d11cce..e21f9d44307f 100644
--- a/include/linux/msg.h
+++ b/include/linux/msg.h
@@ -6,9 +6,9 @@
 
 /* one msg_msg structure for each message */
 struct msg_msg {
-	struct list_head m_list; 
-	long  m_type;          
-	int m_ts;           /* message text size */
+	struct list_head m_list;
+	long m_type;
+	size_t m_ts;		/* message text size */
 	struct msg_msgseg* next;
 	void *security;
 	/* the actual message follows immediately */
diff --git a/ipc/msgutil.c b/ipc/msgutil.c
index 491e71f2a1b8..7e7095974d54 100644
--- a/ipc/msgutil.c
+++ b/ipc/msgutil.c
@@ -41,15 +41,15 @@ struct msg_msgseg {
 	/* the next part of the message follows immediately */
 };
 
-#define DATALEN_MSG	(int)(PAGE_SIZE-sizeof(struct msg_msg))
-#define DATALEN_SEG	(int)(PAGE_SIZE-sizeof(struct msg_msgseg))
+#define DATALEN_MSG	((size_t)PAGE_SIZE-sizeof(struct msg_msg))
+#define DATALEN_SEG	((size_t)PAGE_SIZE-sizeof(struct msg_msgseg))
 
 
-static struct msg_msg *alloc_msg(int len)
+static struct msg_msg *alloc_msg(size_t len)
 {
 	struct msg_msg *msg;
 	struct msg_msgseg **pseg;
-	int alen;
+	size_t alen;
 
 	alen = min(len, DATALEN_MSG);
 	msg = kmalloc(sizeof(*msg) + alen, GFP_KERNEL);
@@ -80,12 +80,12 @@ out_err:
 	return NULL;
 }
 
-struct msg_msg *load_msg(const void __user *src, int len)
+struct msg_msg *load_msg(const void __user *src, size_t len)
 {
 	struct msg_msg *msg;
 	struct msg_msgseg *seg;
 	int err = -EFAULT;
-	int alen;
+	size_t alen;
 
 	msg = alloc_msg(len);
 	if (msg == NULL)
@@ -117,8 +117,8 @@ out_err:
 struct msg_msg *copy_msg(struct msg_msg *src, struct msg_msg *dst)
 {
 	struct msg_msgseg *dst_pseg, *src_pseg;
-	int len = src->m_ts;
-	int alen;
+	size_t len = src->m_ts;
+	size_t alen;
 
 	BUG_ON(dst == NULL);
 	if (src->m_ts > dst->m_ts)
@@ -147,9 +147,9 @@ struct msg_msg *copy_msg(struct msg_msg *src, struct msg_msg *dst)
 	return ERR_PTR(-ENOSYS);
 }
 #endif
-int store_msg(void __user *dest, struct msg_msg *msg, int len)
+int store_msg(void __user *dest, struct msg_msg *msg, size_t len)
 {
-	int alen;
+	size_t alen;
 	struct msg_msgseg *seg;
 
 	alen = min(len, DATALEN_MSG);
diff --git a/ipc/util.h b/ipc/util.h
index f2f5036f2eed..59d78aa94987 100644
--- a/ipc/util.h
+++ b/ipc/util.h
@@ -148,9 +148,9 @@ int ipc_parse_version (int *cmd);
 #endif
 
 extern void free_msg(struct msg_msg *msg);
-extern struct msg_msg *load_msg(const void __user *src, int len);
+extern struct msg_msg *load_msg(const void __user *src, size_t len);
 extern struct msg_msg *copy_msg(struct msg_msg *src, struct msg_msg *dst);
-extern int store_msg(void __user *dest, struct msg_msg *msg, int len);
+extern int store_msg(void __user *dest, struct msg_msg *msg, size_t len);
 
 extern void recompute_msgmni(struct ipc_namespace *);
 
-- 
cgit v1.2.3


From b39898cd4077f4b6ec706e717c938751c34e1dc4 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 6 Nov 2013 12:30:07 +0100
Subject: genirq: Prevent spurious detection for unconditionally polled
 interrupts

On a 68k platform a couple of interrupts are demultiplexed and
"polled" from a top level interrupt. Unfortunately there is no way to
determine which of the sub interrupts raised the top level interrupt,
so all of the demultiplexed interrupt handlers need to be
invoked. Given a high enough frequency this can trigger the spurious
interrupt detection mechanism, if one of the demultiplex interrupts
returns IRQ_NONE continuously. But this is a false positive as the
polling causes this behaviour and not buggy hardware/software.

Introduce IRQ_POLLED which can be set at interrupt chip setup time via
irq_set_status_flags(). The flag excludes the interrupt from the
spurious detector and from all core polling activities.

Reported-and-tested-by: Michael Schmitz <schmitzmic@gmail.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: linux-m68k@vger.kernel.org
Link: http://lkml.kernel.org/r/alpine.DEB.2.02.1311061149250.23353@ionos.tec.linutronix.de
---
 include/linux/irq.h   |  7 ++++++-
 kernel/irq/settings.h |  7 +++++++
 kernel/irq/spurious.c | 12 +++++++++---
 3 files changed, 22 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 56bb0dc8b7d4..7dc10036eff5 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -70,6 +70,9 @@ typedef	void (*irq_preflow_handler_t)(struct irq_data *data);
  * IRQ_MOVE_PCNTXT		- Interrupt can be migrated from process context
  * IRQ_NESTED_TRHEAD		- Interrupt nests into another thread
  * IRQ_PER_CPU_DEVID		- Dev_id is a per-cpu variable
+ * IRQ_IS_POLLED		- Always polled by another interrupt. Exclude
+ *				  it from the spurious interrupt detection
+ *				  mechanism and from core side polling.
  */
 enum {
 	IRQ_TYPE_NONE		= 0x00000000,
@@ -94,12 +97,14 @@ enum {
 	IRQ_NESTED_THREAD	= (1 << 15),
 	IRQ_NOTHREAD		= (1 << 16),
 	IRQ_PER_CPU_DEVID	= (1 << 17),
+	IRQ_IS_POLLED		= (1 << 18),
 };
 
 #define IRQF_MODIFY_MASK	\
 	(IRQ_TYPE_SENSE_MASK | IRQ_NOPROBE | IRQ_NOREQUEST | \
 	 IRQ_NOAUTOEN | IRQ_MOVE_PCNTXT | IRQ_LEVEL | IRQ_NO_BALANCING | \
-	 IRQ_PER_CPU | IRQ_NESTED_THREAD | IRQ_NOTHREAD | IRQ_PER_CPU_DEVID)
+	 IRQ_PER_CPU | IRQ_NESTED_THREAD | IRQ_NOTHREAD | IRQ_PER_CPU_DEVID | \
+	 IRQ_IS_POLLED)
 
 #define IRQ_NO_BALANCING_MASK	(IRQ_PER_CPU | IRQ_NO_BALANCING)
 
diff --git a/kernel/irq/settings.h b/kernel/irq/settings.h
index 1162f1030f18..3320b84cc60f 100644
--- a/kernel/irq/settings.h
+++ b/kernel/irq/settings.h
@@ -14,6 +14,7 @@ enum {
 	_IRQ_NO_BALANCING	= IRQ_NO_BALANCING,
 	_IRQ_NESTED_THREAD	= IRQ_NESTED_THREAD,
 	_IRQ_PER_CPU_DEVID	= IRQ_PER_CPU_DEVID,
+	_IRQ_IS_POLLED		= IRQ_IS_POLLED,
 	_IRQF_MODIFY_MASK	= IRQF_MODIFY_MASK,
 };
 
@@ -26,6 +27,7 @@ enum {
 #define IRQ_NOAUTOEN		GOT_YOU_MORON
 #define IRQ_NESTED_THREAD	GOT_YOU_MORON
 #define IRQ_PER_CPU_DEVID	GOT_YOU_MORON
+#define IRQ_IS_POLLED		GOT_YOU_MORON
 #undef IRQF_MODIFY_MASK
 #define IRQF_MODIFY_MASK	GOT_YOU_MORON
 
@@ -147,3 +149,8 @@ static inline bool irq_settings_is_nested_thread(struct irq_desc *desc)
 {
 	return desc->status_use_accessors & _IRQ_NESTED_THREAD;
 }
+
+static inline bool irq_settings_is_polled(struct irq_desc *desc)
+{
+	return desc->status_use_accessors & _IRQ_IS_POLLED;
+}
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index 7b5f012bde9d..a1d8cc63b56e 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -67,8 +67,13 @@ static int try_one_irq(int irq, struct irq_desc *desc, bool force)
 
 	raw_spin_lock(&desc->lock);
 
-	/* PER_CPU and nested thread interrupts are never polled */
-	if (irq_settings_is_per_cpu(desc) || irq_settings_is_nested_thread(desc))
+	/*
+	 * PER_CPU, nested thread interrupts and interrupts explicitely
+	 * marked polled are excluded from polling.
+	 */
+	if (irq_settings_is_per_cpu(desc) ||
+	    irq_settings_is_nested_thread(desc) ||
+	    irq_settings_is_polled(desc))
 		goto out;
 
 	/*
@@ -268,7 +273,8 @@ try_misrouted_irq(unsigned int irq, struct irq_desc *desc,
 void note_interrupt(unsigned int irq, struct irq_desc *desc,
 		    irqreturn_t action_ret)
 {
-	if (desc->istate & IRQS_POLL_INPROGRESS)
+	if (desc->istate & IRQS_POLL_INPROGRESS ||
+	    irq_settings_is_polled(desc))
 		return;
 
 	/* we get here again via the threaded handler */
-- 
cgit v1.2.3


From 54197e43a4a9a0f3fc406d72d9815754e84fab1e Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 17 Sep 2013 18:53:05 +0000
Subject: hardirq: Make hardirq bits generic

There is no reason for per arch hardirq bits. Make them all generic

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20130917183628.534494408@linutronix.de
---
 arch/blackfin/include/asm/hardirq.h |  3 ---
 arch/cris/include/asm/hardirq.h     | 12 ------------
 arch/m32r/include/asm/hardirq.h     | 16 ----------------
 arch/m68k/include/asm/hardirq.h     | 11 -----------
 arch/s390/include/asm/hardirq.h     |  2 --
 arch/sparc/include/asm/hardirq_32.h |  1 -
 arch/sparc/include/asm/hardirq_64.h |  2 --
 arch/tile/include/asm/hardirq.h     |  2 --
 include/linux/preempt_mask.h        | 30 ++++++++----------------------
 9 files changed, 8 insertions(+), 71 deletions(-)

(limited to 'include/linux')

diff --git a/arch/blackfin/include/asm/hardirq.h b/arch/blackfin/include/asm/hardirq.h
index c078dd78d998..58b54a6d5a16 100644
--- a/arch/blackfin/include/asm/hardirq.h
+++ b/arch/blackfin/include/asm/hardirq.h
@@ -12,9 +12,6 @@
 extern void ack_bad_irq(unsigned int irq);
 #define ack_bad_irq ack_bad_irq
 
-/* Define until common code gets sane defaults */
-#define HARDIRQ_BITS 9
-
 #include <asm-generic/hardirq.h>
 
 #endif
diff --git a/arch/cris/include/asm/hardirq.h b/arch/cris/include/asm/hardirq.h
index 17bb12d760b2..04126f7bfab2 100644
--- a/arch/cris/include/asm/hardirq.h
+++ b/arch/cris/include/asm/hardirq.h
@@ -2,18 +2,6 @@
 #define __ASM_HARDIRQ_H
 
 #include <asm/irq.h>
-
-#define HARDIRQ_BITS	8
-
-/*
- * The hardirq mask has to be large enough to have
- * space for potentially all IRQ sources in the system
- * nesting on a single CPU:
- */
-#if (1 << HARDIRQ_BITS) < NR_IRQS
-# error HARDIRQ_BITS is too low!
-#endif
-
 #include <asm-generic/hardirq.h>
 
 #endif /* __ASM_HARDIRQ_H */
diff --git a/arch/m32r/include/asm/hardirq.h b/arch/m32r/include/asm/hardirq.h
index 4c31c0ae215e..5f2ac4f64ddf 100644
--- a/arch/m32r/include/asm/hardirq.h
+++ b/arch/m32r/include/asm/hardirq.h
@@ -3,22 +3,6 @@
 #define __ASM_HARDIRQ_H
 
 #include <asm/irq.h>
-
-#if NR_IRQS > 256
-#define HARDIRQ_BITS	9
-#else
-#define HARDIRQ_BITS	8
-#endif
-
-/*
- * The hardirq mask has to be large enough to have
- * space for potentially all IRQ sources in the system
- * nesting on a single CPU:
- */
-#if (1 << HARDIRQ_BITS) < NR_IRQS
-# error HARDIRQ_BITS is too low!
-#endif
-
 #include <asm-generic/hardirq.h>
 
 #endif /* __ASM_HARDIRQ_H */
diff --git a/arch/m68k/include/asm/hardirq.h b/arch/m68k/include/asm/hardirq.h
index db30ed276878..6c618529d9b9 100644
--- a/arch/m68k/include/asm/hardirq.h
+++ b/arch/m68k/include/asm/hardirq.h
@@ -5,17 +5,6 @@
 #include <linux/cache.h>
 #include <asm/irq.h>
 
-#define HARDIRQ_BITS	8
-
-/*
- * The hardirq mask has to be large enough to have
- * space for potentially all IRQ sources in the system
- * nesting on a single CPU:
- */
-#if (1 << HARDIRQ_BITS) < NR_IRQS
-# error HARDIRQ_BITS is too low!
-#endif
-
 #ifdef CONFIG_MMU
 
 static inline void ack_bad_irq(unsigned int irq)
diff --git a/arch/s390/include/asm/hardirq.h b/arch/s390/include/asm/hardirq.h
index a908d2941c5d..b7eabaaeffbd 100644
--- a/arch/s390/include/asm/hardirq.h
+++ b/arch/s390/include/asm/hardirq.h
@@ -18,8 +18,6 @@
 #define __ARCH_HAS_DO_SOFTIRQ
 #define __ARCH_IRQ_EXIT_IRQS_DISABLED
 
-#define HARDIRQ_BITS	8
-
 static inline void ack_bad_irq(unsigned int irq)
 {
 	printk(KERN_CRIT "unexpected IRQ trap at vector %02x\n", irq);
diff --git a/arch/sparc/include/asm/hardirq_32.h b/arch/sparc/include/asm/hardirq_32.h
index 162007643cdc..ee93923b7f82 100644
--- a/arch/sparc/include/asm/hardirq_32.h
+++ b/arch/sparc/include/asm/hardirq_32.h
@@ -7,7 +7,6 @@
 #ifndef __SPARC_HARDIRQ_H
 #define __SPARC_HARDIRQ_H
 
-#define HARDIRQ_BITS    8
 #include <asm-generic/hardirq.h>
 
 #endif /* __SPARC_HARDIRQ_H */
diff --git a/arch/sparc/include/asm/hardirq_64.h b/arch/sparc/include/asm/hardirq_64.h
index 7c29fd1a87aa..f478ff1ddd02 100644
--- a/arch/sparc/include/asm/hardirq_64.h
+++ b/arch/sparc/include/asm/hardirq_64.h
@@ -14,6 +14,4 @@
 
 void ack_bad_irq(unsigned int irq);
 
-#define HARDIRQ_BITS	8
-
 #endif /* !(__SPARC64_HARDIRQ_H) */
diff --git a/arch/tile/include/asm/hardirq.h b/arch/tile/include/asm/hardirq.h
index 822390f9a154..54110af23985 100644
--- a/arch/tile/include/asm/hardirq.h
+++ b/arch/tile/include/asm/hardirq.h
@@ -42,6 +42,4 @@ DECLARE_PER_CPU(irq_cpustat_t, irq_stat);
 
 #include <linux/irq_cpustat.h>	/* Standard mappings for irq_cpustat_t above */
 
-#define HARDIRQ_BITS	8
-
 #endif /* _ASM_TILE_HARDIRQ_H */
diff --git a/include/linux/preempt_mask.h b/include/linux/preempt_mask.h
index 931bc616219f..810d7e386f20 100644
--- a/include/linux/preempt_mask.h
+++ b/include/linux/preempt_mask.h
@@ -11,36 +11,22 @@
  * - bits 0-7 are the preemption count (max preemption depth: 256)
  * - bits 8-15 are the softirq count (max # of softirqs: 256)
  *
- * The hardirq count can in theory reach the same as NR_IRQS.
- * In reality, the number of nested IRQS is limited to the stack
- * size as well. For archs with over 1000 IRQS it is not practical
- * to expect that they will all nest. We give a max of 10 bits for
- * hardirq nesting. An arch may choose to give less than 10 bits.
- * m68k expects it to be 8.
- *
- * - bits 16-25 are the hardirq count (max # of nested hardirqs: 1024)
- * - bit 26 is the NMI_MASK
- * - bit 27 is the PREEMPT_ACTIVE flag
+ * The hardirq count could in theory be the same as the number of
+ * interrupts in the system, but we run all interrupt handlers with
+ * interrupts disabled, so we cannot have nesting interrupts. Though
+ * there are a few palaeontologic drivers which reenable interrupts in
+ * the handler, so we need more than one bit here.
  *
  * PREEMPT_MASK: 0x000000ff
  * SOFTIRQ_MASK: 0x0000ff00
- * HARDIRQ_MASK: 0x03ff0000
- *     NMI_MASK: 0x04000000
+ * HARDIRQ_MASK: 0x000f0000
+ *     NMI_MASK: 0x00100000
  */
 #define PREEMPT_BITS	8
 #define SOFTIRQ_BITS	8
+#define HARDIRQ_BITS	4
 #define NMI_BITS	1
 
-#define MAX_HARDIRQ_BITS 10
-
-#ifndef HARDIRQ_BITS
-# define HARDIRQ_BITS	MAX_HARDIRQ_BITS
-#endif
-
-#if HARDIRQ_BITS > MAX_HARDIRQ_BITS
-#error HARDIRQ_BITS too high!
-#endif
-
 #define PREEMPT_SHIFT	0
 #define SOFTIRQ_SHIFT	(PREEMPT_SHIFT + PREEMPT_BITS)
 #define HARDIRQ_SHIFT	(SOFTIRQ_SHIFT + SOFTIRQ_BITS)
-- 
cgit v1.2.3


From 00d1a39e69d5afa7523dad515a05b21abd17c389 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 17 Sep 2013 18:53:09 +0000
Subject: preempt: Make PREEMPT_ACTIVE generic

No point in having this bit defined by architecture.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20130917183629.090698799@linutronix.de
---
 arch/alpha/include/asm/thread_info.h      |  2 --
 arch/arc/include/asm/thread_info.h        |  2 --
 arch/arm/include/asm/thread_info.h        |  6 ------
 arch/arm64/include/asm/thread_info.h      |  6 ------
 arch/avr32/include/asm/thread_info.h      |  2 --
 arch/blackfin/include/asm/thread_info.h   |  2 --
 arch/c6x/include/asm/thread_info.h        |  2 --
 arch/cris/include/asm/thread_info.h       |  2 --
 arch/frv/include/asm/thread_info.h        |  2 --
 arch/hexagon/include/asm/thread_info.h    |  4 ----
 arch/ia64/include/asm/thread_info.h       |  3 ---
 arch/m32r/include/asm/thread_info.h       |  2 --
 arch/m68k/include/asm/thread_info.h       |  2 --
 arch/metag/include/asm/thread_info.h      |  2 --
 arch/microblaze/include/asm/thread_info.h |  2 --
 arch/mips/include/asm/thread_info.h       |  2 --
 arch/mn10300/include/asm/thread_info.h    |  2 --
 arch/parisc/include/asm/thread_info.h     |  3 ---
 arch/powerpc/include/asm/thread_info.h    |  2 --
 arch/s390/include/asm/thread_info.h       |  2 --
 arch/score/include/asm/thread_info.h      |  2 --
 arch/sh/include/asm/thread_info.h         |  2 --
 arch/sh/kernel/entry-common.S             |  6 ++----
 arch/sparc/include/asm/thread_info_32.h   |  2 --
 arch/sparc/include/asm/thread_info_64.h   |  2 --
 arch/tile/include/asm/thread_info.h       |  2 --
 arch/um/include/asm/thread_info.h         |  2 --
 arch/unicore32/include/asm/thread_info.h  |  6 ------
 arch/x86/include/asm/thread_info.h        |  2 --
 arch/xtensa/include/asm/thread_info.h     |  2 --
 include/linux/preempt_mask.h              | 15 +++++----------
 include/linux/sched.h                     |  2 +-
 32 files changed, 8 insertions(+), 89 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/include/asm/thread_info.h b/arch/alpha/include/asm/thread_info.h
index 52cd2a4a3ff4..453597b91f3a 100644
--- a/arch/alpha/include/asm/thread_info.h
+++ b/arch/alpha/include/asm/thread_info.h
@@ -58,8 +58,6 @@ register struct thread_info *__current_thread_info __asm__("$8");
 #define THREAD_SIZE_ORDER 1
 #define THREAD_SIZE (2*PAGE_SIZE)
 
-#define PREEMPT_ACTIVE		0x40000000
-
 /*
  * Thread information flags:
  * - these are process state flags and used from assembly
diff --git a/arch/arc/include/asm/thread_info.h b/arch/arc/include/asm/thread_info.h
index 2d50a4cdd7f3..45be21672011 100644
--- a/arch/arc/include/asm/thread_info.h
+++ b/arch/arc/include/asm/thread_info.h
@@ -80,8 +80,6 @@ static inline __attribute_const__ struct thread_info *current_thread_info(void)
 
 #endif /* !__ASSEMBLY__ */
 
-#define PREEMPT_ACTIVE      0x10000000
-
 /*
  * thread information flags
  * - these are process state flags that various assembly files may need to
diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
index df5e13d64f2c..71a06b293489 100644
--- a/arch/arm/include/asm/thread_info.h
+++ b/arch/arm/include/asm/thread_info.h
@@ -140,12 +140,6 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user *,
 				    struct user_vfp_exc __user *);
 #endif
 
-/*
- * We use bit 30 of the preempt_count to indicate that kernel
- * preemption is occurring.  See <asm/hardirq.h>.
- */
-#define PREEMPT_ACTIVE	0x40000000
-
 /*
  * thread information flags:
  *  TIF_SYSCALL_TRACE	- syscall trace active
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index 23a3c4791d86..720e70b66ffd 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -88,12 +88,6 @@ static inline struct thread_info *current_thread_info(void)
 
 #endif
 
-/*
- * We use bit 30 of the preempt_count to indicate that kernel
- * preemption is occurring.  See <asm/hardirq.h>.
- */
-#define PREEMPT_ACTIVE	0x40000000
-
 /*
  * thread information flags:
  *  TIF_SYSCALL_TRACE	- syscall trace active
diff --git a/arch/avr32/include/asm/thread_info.h b/arch/avr32/include/asm/thread_info.h
index 6dc62e1f94c7..a978f3fe7c25 100644
--- a/arch/avr32/include/asm/thread_info.h
+++ b/arch/avr32/include/asm/thread_info.h
@@ -66,8 +66,6 @@ static inline struct thread_info *current_thread_info(void)
 
 #endif /* !__ASSEMBLY__ */
 
-#define PREEMPT_ACTIVE		0x40000000
-
 /*
  * Thread information flags
  * - these are process state flags that various assembly files may need to access
diff --git a/arch/blackfin/include/asm/thread_info.h b/arch/blackfin/include/asm/thread_info.h
index 3894005337ba..55f473bdad36 100644
--- a/arch/blackfin/include/asm/thread_info.h
+++ b/arch/blackfin/include/asm/thread_info.h
@@ -88,8 +88,6 @@ static inline struct thread_info *current_thread_info(void)
 #define TI_CPU		12
 #define TI_PREEMPT	16
 
-#define	PREEMPT_ACTIVE	0x4000000
-
 /*
  * thread information flag bit numbers
  */
diff --git a/arch/c6x/include/asm/thread_info.h b/arch/c6x/include/asm/thread_info.h
index 4c8dc562bd90..d4e9ef87076d 100644
--- a/arch/c6x/include/asm/thread_info.h
+++ b/arch/c6x/include/asm/thread_info.h
@@ -84,8 +84,6 @@ struct thread_info *current_thread_info(void)
 #define put_thread_info(ti)	put_task_struct((ti)->task)
 #endif /* __ASSEMBLY__ */
 
-#define	PREEMPT_ACTIVE	0x10000000
-
 /*
  * thread information flag bit numbers
  * - pending work-to-be-done flags are in LSW
diff --git a/arch/cris/include/asm/thread_info.h b/arch/cris/include/asm/thread_info.h
index 07c8c40c52b3..55dede18c032 100644
--- a/arch/cris/include/asm/thread_info.h
+++ b/arch/cris/include/asm/thread_info.h
@@ -44,8 +44,6 @@ struct thread_info {
 
 #endif
 
-#define PREEMPT_ACTIVE		0x10000000
-
 /*
  * macros/functions for gaining access to the thread information structure
  */
diff --git a/arch/frv/include/asm/thread_info.h b/arch/frv/include/asm/thread_info.h
index bebd7eadc772..af29e17c0181 100644
--- a/arch/frv/include/asm/thread_info.h
+++ b/arch/frv/include/asm/thread_info.h
@@ -52,8 +52,6 @@ struct thread_info {
 
 #endif
 
-#define PREEMPT_ACTIVE		0x10000000
-
 /*
  * macros/functions for gaining access to the thread information structure
  */
diff --git a/arch/hexagon/include/asm/thread_info.h b/arch/hexagon/include/asm/thread_info.h
index f7c32406a711..a59dad3b3695 100644
--- a/arch/hexagon/include/asm/thread_info.h
+++ b/arch/hexagon/include/asm/thread_info.h
@@ -73,10 +73,6 @@ struct thread_info {
 
 #endif  /* __ASSEMBLY__  */
 
-/*  looks like "linux/hardirq.h" uses this.  */
-
-#define PREEMPT_ACTIVE		0x10000000
-
 #ifndef __ASSEMBLY__
 
 #define INIT_THREAD_INFO(tsk)                   \
diff --git a/arch/ia64/include/asm/thread_info.h b/arch/ia64/include/asm/thread_info.h
index cade13dd0299..5957cf61f898 100644
--- a/arch/ia64/include/asm/thread_info.h
+++ b/arch/ia64/include/asm/thread_info.h
@@ -11,9 +11,6 @@
 #include <asm/processor.h>
 #include <asm/ptrace.h>
 
-#define PREEMPT_ACTIVE_BIT 30
-#define PREEMPT_ACTIVE	(1 << PREEMPT_ACTIVE_BIT)
-
 #ifndef __ASSEMBLY__
 
 /*
diff --git a/arch/m32r/include/asm/thread_info.h b/arch/m32r/include/asm/thread_info.h
index c074f4c2e858..00171703402f 100644
--- a/arch/m32r/include/asm/thread_info.h
+++ b/arch/m32r/include/asm/thread_info.h
@@ -53,8 +53,6 @@ struct thread_info {
 
 #endif
 
-#define PREEMPT_ACTIVE		0x10000000
-
 #define THREAD_SIZE		(PAGE_SIZE << 1)
 #define THREAD_SIZE_ORDER	1
 /*
diff --git a/arch/m68k/include/asm/thread_info.h b/arch/m68k/include/asm/thread_info.h
index 126131f94a2c..21a4784ca5a1 100644
--- a/arch/m68k/include/asm/thread_info.h
+++ b/arch/m68k/include/asm/thread_info.h
@@ -35,8 +35,6 @@ struct thread_info {
 };
 #endif /* __ASSEMBLY__ */
 
-#define PREEMPT_ACTIVE		0x4000000
-
 #define INIT_THREAD_INFO(tsk)			\
 {						\
 	.task		= &tsk,			\
diff --git a/arch/metag/include/asm/thread_info.h b/arch/metag/include/asm/thread_info.h
index 7c4a33006142..b19e9c588a16 100644
--- a/arch/metag/include/asm/thread_info.h
+++ b/arch/metag/include/asm/thread_info.h
@@ -46,8 +46,6 @@ struct thread_info {
 
 #endif
 
-#define PREEMPT_ACTIVE		0x10000000
-
 #ifdef CONFIG_4KSTACKS
 #define THREAD_SHIFT		12
 #else
diff --git a/arch/microblaze/include/asm/thread_info.h b/arch/microblaze/include/asm/thread_info.h
index de26ea6373de..8c9d36591a03 100644
--- a/arch/microblaze/include/asm/thread_info.h
+++ b/arch/microblaze/include/asm/thread_info.h
@@ -106,8 +106,6 @@ static inline struct thread_info *current_thread_info(void)
 /* thread information allocation */
 #endif /* __ASSEMBLY__ */
 
-#define PREEMPT_ACTIVE		0x10000000
-
 /*
  * thread information flags
  * - these are process state flags that various assembly files may
diff --git a/arch/mips/include/asm/thread_info.h b/arch/mips/include/asm/thread_info.h
index f9b24bfbdbae..4f58ef6d0eed 100644
--- a/arch/mips/include/asm/thread_info.h
+++ b/arch/mips/include/asm/thread_info.h
@@ -92,8 +92,6 @@ static inline struct thread_info *current_thread_info(void)
 
 #define STACK_WARN	(THREAD_SIZE / 8)
 
-#define PREEMPT_ACTIVE		0x10000000
-
 /*
  * thread information flags
  * - these are process state flags that various assembly files may need to
diff --git a/arch/mn10300/include/asm/thread_info.h b/arch/mn10300/include/asm/thread_info.h
index 224b4262486d..bf280eaccd36 100644
--- a/arch/mn10300/include/asm/thread_info.h
+++ b/arch/mn10300/include/asm/thread_info.h
@@ -16,8 +16,6 @@
 
 #include <asm/page.h>
 
-#define PREEMPT_ACTIVE		0x10000000
-
 #ifdef CONFIG_4KSTACKS
 #define THREAD_SIZE		(4096)
 #define THREAD_SIZE_ORDER	(0)
diff --git a/arch/parisc/include/asm/thread_info.h b/arch/parisc/include/asm/thread_info.h
index bc7cf120106b..d5f97ea3a4e1 100644
--- a/arch/parisc/include/asm/thread_info.h
+++ b/arch/parisc/include/asm/thread_info.h
@@ -46,9 +46,6 @@ struct thread_info {
 #define THREAD_SIZE             (PAGE_SIZE << THREAD_SIZE_ORDER)
 #define THREAD_SHIFT            (PAGE_SHIFT + THREAD_SIZE_ORDER)
 
-#define PREEMPT_ACTIVE_BIT	28
-#define PREEMPT_ACTIVE		(1 << PREEMPT_ACTIVE_BIT)
-
 /*
  * thread information flags
  */
diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h
index ba7b1973866e..8fd6cf6dcee8 100644
--- a/arch/powerpc/include/asm/thread_info.h
+++ b/arch/powerpc/include/asm/thread_info.h
@@ -82,8 +82,6 @@ static inline struct thread_info *current_thread_info(void)
 
 #endif /* __ASSEMBLY__ */
 
-#define PREEMPT_ACTIVE		0x10000000
-
 /*
  * thread information flag bit numbers
  */
diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h
index eb5f64d26d06..10e0fcd3633d 100644
--- a/arch/s390/include/asm/thread_info.h
+++ b/arch/s390/include/asm/thread_info.h
@@ -111,6 +111,4 @@ static inline struct thread_info *current_thread_info(void)
 #define is_32bit_task()		(1)
 #endif
 
-#define PREEMPT_ACTIVE		0x4000000
-
 #endif /* _ASM_THREAD_INFO_H */
diff --git a/arch/score/include/asm/thread_info.h b/arch/score/include/asm/thread_info.h
index 1425cc034872..656b7ada9326 100644
--- a/arch/score/include/asm/thread_info.h
+++ b/arch/score/include/asm/thread_info.h
@@ -72,8 +72,6 @@ register struct thread_info *__current_thread_info __asm__("r28");
 
 #endif /* !__ASSEMBLY__ */
 
-#define PREEMPT_ACTIVE		0x10000000
-
 /*
  * thread information flags
  * - these are process state flags that various assembly files may need to
diff --git a/arch/sh/include/asm/thread_info.h b/arch/sh/include/asm/thread_info.h
index 45a93669289d..ad27ffa65e2e 100644
--- a/arch/sh/include/asm/thread_info.h
+++ b/arch/sh/include/asm/thread_info.h
@@ -41,8 +41,6 @@ struct thread_info {
 
 #endif
 
-#define PREEMPT_ACTIVE		0x10000000
-
 #if defined(CONFIG_4KSTACKS)
 #define THREAD_SHIFT	12
 #else
diff --git a/arch/sh/kernel/entry-common.S b/arch/sh/kernel/entry-common.S
index 9b6e4beeb296..ca46834294b7 100644
--- a/arch/sh/kernel/entry-common.S
+++ b/arch/sh/kernel/entry-common.S
@@ -108,7 +108,7 @@ need_resched:
 	and	#(0xf0>>1), r0		! interrupts off (exception path)?
 	cmp/eq	#(0xf0>>1), r0
 	bt	noresched
-	mov.l	3f, r0
+	mov.l	1f, r0
 	jsr	@r0			! call preempt_schedule_irq
 	 nop
 	bra	need_resched
@@ -119,9 +119,7 @@ noresched:
 	 nop
 
 	.align 2
-1:	.long	PREEMPT_ACTIVE
-2:	.long	schedule
-3:	.long	preempt_schedule_irq
+1:	.long	preempt_schedule_irq
 #endif
 
 ENTRY(resume_userspace)
diff --git a/arch/sparc/include/asm/thread_info_32.h b/arch/sparc/include/asm/thread_info_32.h
index dd3807599bb9..96efa7adc223 100644
--- a/arch/sparc/include/asm/thread_info_32.h
+++ b/arch/sparc/include/asm/thread_info_32.h
@@ -105,8 +105,6 @@ register struct thread_info *current_thread_info_reg asm("g6");
 #define TI_W_SAVED	0x250
 /* #define TI_RESTART_BLOCK 0x25n */ /* Nobody cares */
 
-#define PREEMPT_ACTIVE		0x4000000
-
 /*
  * thread information flag bit numbers
  */
diff --git a/arch/sparc/include/asm/thread_info_64.h b/arch/sparc/include/asm/thread_info_64.h
index d5e504251079..2b4e17b79e9c 100644
--- a/arch/sparc/include/asm/thread_info_64.h
+++ b/arch/sparc/include/asm/thread_info_64.h
@@ -111,8 +111,6 @@ struct thread_info {
 #define THREAD_SHIFT PAGE_SHIFT
 #endif /* PAGE_SHIFT == 13 */
 
-#define PREEMPT_ACTIVE		0x10000000
-
 /*
  * macros/functions for gaining access to the thread information structure
  */
diff --git a/arch/tile/include/asm/thread_info.h b/arch/tile/include/asm/thread_info.h
index b8aa6df3e102..729aa107f64e 100644
--- a/arch/tile/include/asm/thread_info.h
+++ b/arch/tile/include/asm/thread_info.h
@@ -113,8 +113,6 @@ extern void _cpu_idle(void);
 
 #endif /* !__ASSEMBLY__ */
 
-#define PREEMPT_ACTIVE		0x10000000
-
 /*
  * Thread information flags that various assembly files may need to access.
  * Keep flags accessed frequently in low bits, particular since it makes
diff --git a/arch/um/include/asm/thread_info.h b/arch/um/include/asm/thread_info.h
index 2c8eeb2df8b4..1c5b2a83046a 100644
--- a/arch/um/include/asm/thread_info.h
+++ b/arch/um/include/asm/thread_info.h
@@ -60,8 +60,6 @@ static inline struct thread_info *current_thread_info(void)
 
 #endif
 
-#define PREEMPT_ACTIVE		0x10000000
-
 #define TIF_SYSCALL_TRACE	0	/* syscall trace active */
 #define TIF_SIGPENDING		1	/* signal pending */
 #define TIF_NEED_RESCHED	2	/* rescheduling necessary */
diff --git a/arch/unicore32/include/asm/thread_info.h b/arch/unicore32/include/asm/thread_info.h
index 818b4a1edb5b..af36d8eabdf1 100644
--- a/arch/unicore32/include/asm/thread_info.h
+++ b/arch/unicore32/include/asm/thread_info.h
@@ -117,12 +117,6 @@ static inline struct thread_info *current_thread_info(void)
 
 #endif
 
-/*
- * We use bit 30 of the preempt_count to indicate that kernel
- * preemption is occurring.  See <asm/hardirq.h>.
- */
-#define PREEMPT_ACTIVE	0x40000000
-
 /*
  * thread information flags:
  *  TIF_SYSCALL_TRACE	- syscall trace active
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index c46a46be1ec6..3ba3de457d05 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -153,8 +153,6 @@ struct thread_info {
 #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
 #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
 
-#define PREEMPT_ACTIVE		0x10000000
-
 #ifdef CONFIG_X86_32
 
 #define STACK_WARN	(THREAD_SIZE/8)
diff --git a/arch/xtensa/include/asm/thread_info.h b/arch/xtensa/include/asm/thread_info.h
index 9481004ac119..470153e8547c 100644
--- a/arch/xtensa/include/asm/thread_info.h
+++ b/arch/xtensa/include/asm/thread_info.h
@@ -76,8 +76,6 @@ struct thread_info {
 
 #endif
 
-#define PREEMPT_ACTIVE		0x10000000
-
 /*
  * macros/functions for gaining access to the thread information structure
  */
diff --git a/include/linux/preempt_mask.h b/include/linux/preempt_mask.h
index 810d7e386f20..d169820203dd 100644
--- a/include/linux/preempt_mask.h
+++ b/include/linux/preempt_mask.h
@@ -17,10 +17,11 @@
  * there are a few palaeontologic drivers which reenable interrupts in
  * the handler, so we need more than one bit here.
  *
- * PREEMPT_MASK: 0x000000ff
- * SOFTIRQ_MASK: 0x0000ff00
- * HARDIRQ_MASK: 0x000f0000
- *     NMI_MASK: 0x00100000
+ * PREEMPT_MASK:	0x000000ff
+ * SOFTIRQ_MASK:	0x0000ff00
+ * HARDIRQ_MASK:	0x000f0000
+ *     NMI_MASK:	0x00100000
+ * PREEMPT_ACTIVE:	0x00200000
  */
 #define PREEMPT_BITS	8
 #define SOFTIRQ_BITS	8
@@ -46,15 +47,9 @@
 
 #define SOFTIRQ_DISABLE_OFFSET	(2 * SOFTIRQ_OFFSET)
 
-#ifndef PREEMPT_ACTIVE
 #define PREEMPT_ACTIVE_BITS	1
 #define PREEMPT_ACTIVE_SHIFT	(NMI_SHIFT + NMI_BITS)
 #define PREEMPT_ACTIVE	(__IRQ_MASK(PREEMPT_ACTIVE_BITS) << PREEMPT_ACTIVE_SHIFT)
-#endif
-
-#if PREEMPT_ACTIVE < (1 << (NMI_SHIFT + NMI_BITS))
-#error PREEMPT_ACTIVE is too low!
-#endif
 
 #define hardirq_count()	(preempt_count() & HARDIRQ_MASK)
 #define softirq_count()	(preempt_count() & SOFTIRQ_MASK)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 045b0d227846..55080df48b70 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -22,7 +22,7 @@ struct sched_param {
 #include <linux/errno.h>
 #include <linux/nodemask.h>
 #include <linux/mm_types.h>
-#include <linux/preempt.h>
+#include <linux/preempt_mask.h>
 
 #include <asm/page.h>
 #include <asm/ptrace.h>
-- 
cgit v1.2.3


From d38a8c622a1b382336c3e152c6caf4e11d1f1b2a Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Fri, 18 Oct 2013 19:35:23 +0200
Subject: dmaengine: prepare for generic 'unmap' data

Add a hook for a common dma unmap implementation to enable removal of
the per driver custom unmap code.  (A reworked version of Bartlomiej
Zolnierkiewicz's patches to remove the custom callbacks and the size
increase of dma_async_tx_descriptor for drivers that don't care about
raid).

Cc: Vinod Koul <vinod.koul@intel.com>
Cc: Tomasz Figa <t.figa@samsung.com>
Cc: Dave Jiang <dave.jiang@intel.com>
[bzolnier: prepare pl330 driver for adding missing unmap while at it]
Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/dma/amba-pl08x.c  |  1 +
 drivers/dma/at_hdmac.c    |  1 +
 drivers/dma/dw/core.c     |  1 +
 drivers/dma/ep93xx_dma.c  |  1 +
 drivers/dma/fsldma.c      |  1 +
 drivers/dma/ioat/dma.c    |  1 +
 drivers/dma/ioat/dma_v2.c |  1 +
 drivers/dma/ioat/dma_v3.c |  1 +
 drivers/dma/iop-adma.c    |  1 +
 drivers/dma/mv_xor.c      |  1 +
 drivers/dma/pl330.c       |  2 ++
 drivers/dma/ppc4xx/adma.c |  1 +
 drivers/dma/timb_dma.c    |  1 +
 drivers/dma/txx9dmac.c    |  1 +
 include/linux/dmaengine.h | 26 ++++++++++++++++++++++++++
 15 files changed, 41 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c
index fce46c5bf1c7..7f9846464b77 100644
--- a/drivers/dma/amba-pl08x.c
+++ b/drivers/dma/amba-pl08x.c
@@ -1197,6 +1197,7 @@ static void pl08x_desc_free(struct virt_dma_desc *vd)
 	struct pl08x_txd *txd = to_pl08x_txd(&vd->tx);
 	struct pl08x_dma_chan *plchan = to_pl08x_chan(vd->tx.chan);
 
+	dma_descriptor_unmap(txd);
 	if (!plchan->slave)
 		pl08x_unmap_buffers(txd);
 
diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c
index c787f38a186a..cc7098ddf9d4 100644
--- a/drivers/dma/at_hdmac.c
+++ b/drivers/dma/at_hdmac.c
@@ -345,6 +345,7 @@ atc_chain_complete(struct at_dma_chan *atchan, struct at_desc *desc)
 	list_move(&desc->desc_node, &atchan->free_list);
 
 	/* unmap dma addresses (not on slave channels) */
+	dma_descriptor_unmap(txd);
 	if (!atchan->chan_common.private) {
 		struct device *parent = chan2parent(&atchan->chan_common);
 		if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
diff --git a/drivers/dma/dw/core.c b/drivers/dma/dw/core.c
index 89eb89f22284..e3fe1b1a73b1 100644
--- a/drivers/dma/dw/core.c
+++ b/drivers/dma/dw/core.c
@@ -311,6 +311,7 @@ dwc_descriptor_complete(struct dw_dma_chan *dwc, struct dw_desc *desc,
 	list_splice_init(&desc->tx_list, &dwc->free_list);
 	list_move(&desc->desc_node, &dwc->free_list);
 
+	dma_descriptor_unmap(txd);
 	if (!is_slave_direction(dwc->direction)) {
 		struct device *parent = chan2parent(&dwc->chan);
 		if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
diff --git a/drivers/dma/ep93xx_dma.c b/drivers/dma/ep93xx_dma.c
index 591cd8c63abb..dcd6bf5d3091 100644
--- a/drivers/dma/ep93xx_dma.c
+++ b/drivers/dma/ep93xx_dma.c
@@ -791,6 +791,7 @@ static void ep93xx_dma_tasklet(unsigned long data)
 		 * For the memcpy channels the API requires us to unmap the
 		 * buffers unless requested otherwise.
 		 */
+		dma_descriptor_unmap(&desc->txd);
 		if (!edmac->chan.private)
 			ep93xx_dma_unmap_buffers(desc);
 
diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
index b3f3e90054f2..66c4052a1f34 100644
--- a/drivers/dma/fsldma.c
+++ b/drivers/dma/fsldma.c
@@ -868,6 +868,7 @@ static void fsldma_cleanup_descriptor(struct fsldma_chan *chan,
 	/* Run any dependencies */
 	dma_run_dependencies(txd);
 
+	dma_descriptor_unmap(txd);
 	/* Unmap the dst buffer, if requested */
 	if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
 		if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE)
diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c
index 5ff6fc1819dc..26f8cfd6bc3f 100644
--- a/drivers/dma/ioat/dma.c
+++ b/drivers/dma/ioat/dma.c
@@ -602,6 +602,7 @@ static void __cleanup(struct ioat_dma_chan *ioat, dma_addr_t phys_complete)
 		dump_desc_dbg(ioat, desc);
 		if (tx->cookie) {
 			dma_cookie_complete(tx);
+			dma_descriptor_unmap(tx);
 			ioat_dma_unmap(chan, tx->flags, desc->len, desc->hw);
 			ioat->active -= desc->hw->tx_cnt;
 			if (tx->callback) {
diff --git a/drivers/dma/ioat/dma_v2.c b/drivers/dma/ioat/dma_v2.c
index b925e1b1d139..fc7b50a813cc 100644
--- a/drivers/dma/ioat/dma_v2.c
+++ b/drivers/dma/ioat/dma_v2.c
@@ -148,6 +148,7 @@ static void __cleanup(struct ioat2_dma_chan *ioat, dma_addr_t phys_complete)
 		tx = &desc->txd;
 		dump_desc_dbg(ioat, desc);
 		if (tx->cookie) {
+			dma_descriptor_unmap(tx);
 			ioat_dma_unmap(chan, tx->flags, desc->len, desc->hw);
 			dma_cookie_complete(tx);
 			if (tx->callback) {
diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c
index d8ececaf1b57..57a2901b917a 100644
--- a/drivers/dma/ioat/dma_v3.c
+++ b/drivers/dma/ioat/dma_v3.c
@@ -577,6 +577,7 @@ static void __cleanup(struct ioat2_dma_chan *ioat, dma_addr_t phys_complete)
 		tx = &desc->txd;
 		if (tx->cookie) {
 			dma_cookie_complete(tx);
+			dma_descriptor_unmap(tx);
 			ioat3_dma_unmap(ioat, desc, idx + i);
 			if (tx->callback) {
 				tx->callback(tx->callback_param);
diff --git a/drivers/dma/iop-adma.c b/drivers/dma/iop-adma.c
index dd8b44a56e5d..8f6e426590eb 100644
--- a/drivers/dma/iop-adma.c
+++ b/drivers/dma/iop-adma.c
@@ -152,6 +152,7 @@ iop_adma_run_tx_complete_actions(struct iop_adma_desc_slot *desc,
 		if (tx->callback)
 			tx->callback(tx->callback_param);
 
+		dma_descriptor_unmap(tx);
 		/* unmap dma addresses
 		 * (unmap_single vs unmap_page?)
 		 */
diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c
index 536dcb8ba5fd..ed1ab1d0875e 100644
--- a/drivers/dma/mv_xor.c
+++ b/drivers/dma/mv_xor.c
@@ -278,6 +278,7 @@ mv_xor_run_tx_complete_actions(struct mv_xor_desc_slot *desc,
 			desc->async_tx.callback(
 				desc->async_tx.callback_param);
 
+		dma_descriptor_unmap(&desc->async_tx);
 		/* unmap dma addresses
 		 * (unmap_single vs unmap_page?)
 		 */
diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c
index a562d24d20bf..ab25e52cd43b 100644
--- a/drivers/dma/pl330.c
+++ b/drivers/dma/pl330.c
@@ -2268,6 +2268,8 @@ static void pl330_tasklet(unsigned long data)
 			list_move_tail(&desc->node, &pch->dmac->desc_pool);
 		}
 
+		dma_descriptor_unmap(&desc->txd);
+
 		if (callback) {
 			spin_unlock_irqrestore(&pch->lock, flags);
 			callback(callback_param);
diff --git a/drivers/dma/ppc4xx/adma.c b/drivers/dma/ppc4xx/adma.c
index 370ff8265630..442492da7415 100644
--- a/drivers/dma/ppc4xx/adma.c
+++ b/drivers/dma/ppc4xx/adma.c
@@ -1765,6 +1765,7 @@ static dma_cookie_t ppc440spe_adma_run_tx_complete_actions(
 			desc->async_tx.callback(
 				desc->async_tx.callback_param);
 
+		dma_descriptor_unmap(&desc->async_tx);
 		/* unmap dma addresses
 		 * (unmap_single vs unmap_page?)
 		 *
diff --git a/drivers/dma/timb_dma.c b/drivers/dma/timb_dma.c
index 28af214fce04..1d0c98839087 100644
--- a/drivers/dma/timb_dma.c
+++ b/drivers/dma/timb_dma.c
@@ -293,6 +293,7 @@ static void __td_finish(struct timb_dma_chan *td_chan)
 
 	list_move(&td_desc->desc_node, &td_chan->free_list);
 
+	dma_descriptor_unmap(txd);
 	if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP))
 		__td_unmap_descs(td_desc,
 			txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE);
diff --git a/drivers/dma/txx9dmac.c b/drivers/dma/txx9dmac.c
index 71e8e775189e..22a0b6c78c77 100644
--- a/drivers/dma/txx9dmac.c
+++ b/drivers/dma/txx9dmac.c
@@ -419,6 +419,7 @@ txx9dmac_descriptor_complete(struct txx9dmac_chan *dc,
 	list_splice_init(&desc->tx_list, &dc->free_list);
 	list_move(&desc->desc_node, &dc->free_list);
 
+	dma_descriptor_unmap(txd);
 	if (!ds) {
 		dma_addr_t dmaaddr;
 		if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 0bc727534108..9070050fbcd8 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -413,6 +413,17 @@ void dma_chan_cleanup(struct kref *kref);
 typedef bool (*dma_filter_fn)(struct dma_chan *chan, void *filter_param);
 
 typedef void (*dma_async_tx_callback)(void *dma_async_param);
+
+struct dmaengine_unmap_data {
+	u8 to_cnt;
+	u8 from_cnt;
+	u8 bidi_cnt;
+	struct device *dev;
+	struct kref kref;
+	size_t len;
+	dma_addr_t addr[0];
+};
+
 /**
  * struct dma_async_tx_descriptor - async transaction descriptor
  * ---dma generic offload fields---
@@ -438,6 +449,7 @@ struct dma_async_tx_descriptor {
 	dma_cookie_t (*tx_submit)(struct dma_async_tx_descriptor *tx);
 	dma_async_tx_callback callback;
 	void *callback_param;
+	struct dmaengine_unmap_data *unmap;
 #ifdef CONFIG_ASYNC_TX_ENABLE_CHANNEL_SWITCH
 	struct dma_async_tx_descriptor *next;
 	struct dma_async_tx_descriptor *parent;
@@ -445,6 +457,20 @@ struct dma_async_tx_descriptor {
 #endif
 };
 
+static inline void dma_set_unmap(struct dma_async_tx_descriptor *tx,
+				 struct dmaengine_unmap_data *unmap)
+{
+	kref_get(&unmap->kref);
+	tx->unmap = unmap;
+}
+
+static inline void dma_descriptor_unmap(struct dma_async_tx_descriptor *tx)
+{
+	if (tx->unmap) {
+		tx->unmap = NULL;
+	}
+}
+
 #ifndef CONFIG_ASYNC_TX_ENABLE_CHANNEL_SWITCH
 static inline void txd_lock(struct dma_async_tx_descriptor *txd)
 {
-- 
cgit v1.2.3


From 45c463ae924c62af4aa64ded1ca831f334a1db65 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Fri, 18 Oct 2013 19:35:24 +0200
Subject: dmaengine: reference counted unmap data

Hang a common 'unmap' object off of dma descriptors for the purpose of
providing a unified unmapping interface.  The lifetime of a mapping may
span multiple descriptors, so these unmap objects are reference counted
by related descriptor.

Cc: Vinod Koul <vinod.koul@intel.com>
Cc: Tomasz Figa <t.figa@samsung.com>
Cc: Dave Jiang <dave.jiang@intel.com>
[bzolnier: fix IS_ENABLED() check]
[bzolnier: fix release ordering in dmaengine_destroy_unmap_pool()]
[bzolnier: fix check for success in dmaengine_init_unmap_pool()]
[bzolnier: use mempool_free() instead of kmem_cache_free()]
[bzolnier: add missing unmap->len initializations]
[bzolnier: add __init tag to dmaengine_init_unmap_pool()]
Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
[djbw: move DMAENGINE=n support to this patch for async_tx]
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/dma/dmaengine.c   | 156 +++++++++++++++++++++++++++++++++++++++++++---
 include/linux/dmaengine.h |   3 +
 2 files changed, 150 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index bbc89df6bc56..e721a1caff7f 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -65,6 +65,7 @@
 #include <linux/acpi.h>
 #include <linux/acpi_dma.h>
 #include <linux/of_dma.h>
+#include <linux/mempool.h>
 
 static DEFINE_MUTEX(dma_list_mutex);
 static DEFINE_IDR(dma_idr);
@@ -901,6 +902,129 @@ void dma_async_device_unregister(struct dma_device *device)
 }
 EXPORT_SYMBOL(dma_async_device_unregister);
 
+struct dmaengine_unmap_pool {
+	struct kmem_cache *cache;
+	const char *name;
+	mempool_t *pool;
+	size_t size;
+};
+
+#define __UNMAP_POOL(x) { .size = x, .name = "dmaengine-unmap-" __stringify(x) }
+static struct dmaengine_unmap_pool unmap_pool[] = {
+	__UNMAP_POOL(2),
+	#if IS_ENABLED(CONFIG_ASYNC_TX_DMA)
+	__UNMAP_POOL(16),
+	__UNMAP_POOL(128),
+	__UNMAP_POOL(256),
+	#endif
+};
+
+static struct dmaengine_unmap_pool *__get_unmap_pool(int nr)
+{
+	int order = get_count_order(nr);
+
+	switch (order) {
+	case 0 ... 1:
+		return &unmap_pool[0];
+	case 2 ... 4:
+		return &unmap_pool[1];
+	case 5 ... 7:
+		return &unmap_pool[2];
+	case 8:
+		return &unmap_pool[3];
+	default:
+		BUG();
+		return NULL;
+	}
+}
+
+static void dmaengine_unmap(struct kref *kref)
+{
+	struct dmaengine_unmap_data *unmap = container_of(kref, typeof(*unmap), kref);
+	struct device *dev = unmap->dev;
+	int cnt, i;
+
+	cnt = unmap->to_cnt;
+	for (i = 0; i < cnt; i++)
+		dma_unmap_page(dev, unmap->addr[i], unmap->len,
+			       DMA_TO_DEVICE);
+	cnt += unmap->from_cnt;
+	for (; i < cnt; i++)
+		dma_unmap_page(dev, unmap->addr[i], unmap->len,
+			       DMA_FROM_DEVICE);
+	cnt += unmap->bidi_cnt;
+	for (; i < cnt; i++)
+		dma_unmap_page(dev, unmap->addr[i], unmap->len,
+			       DMA_BIDIRECTIONAL);
+	mempool_free(unmap, __get_unmap_pool(cnt)->pool);
+}
+
+void dmaengine_unmap_put(struct dmaengine_unmap_data *unmap)
+{
+	if (unmap)
+		kref_put(&unmap->kref, dmaengine_unmap);
+}
+EXPORT_SYMBOL_GPL(dmaengine_unmap_put);
+
+static void dmaengine_destroy_unmap_pool(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(unmap_pool); i++) {
+		struct dmaengine_unmap_pool *p = &unmap_pool[i];
+
+		if (p->pool)
+			mempool_destroy(p->pool);
+		p->pool = NULL;
+		if (p->cache)
+			kmem_cache_destroy(p->cache);
+		p->cache = NULL;
+	}
+}
+
+static int __init dmaengine_init_unmap_pool(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(unmap_pool); i++) {
+		struct dmaengine_unmap_pool *p = &unmap_pool[i];
+		size_t size;
+
+		size = sizeof(struct dmaengine_unmap_data) +
+		       sizeof(dma_addr_t) * p->size;
+
+		p->cache = kmem_cache_create(p->name, size, 0,
+					     SLAB_HWCACHE_ALIGN, NULL);
+		if (!p->cache)
+			break;
+		p->pool = mempool_create_slab_pool(1, p->cache);
+		if (!p->pool)
+			break;
+	}
+
+	if (i == ARRAY_SIZE(unmap_pool))
+		return 0;
+
+	dmaengine_destroy_unmap_pool();
+	return -ENOMEM;
+}
+
+static struct dmaengine_unmap_data *
+dmaengine_get_unmap_data(struct device *dev, int nr, gfp_t flags)
+{
+	struct dmaengine_unmap_data *unmap;
+
+	unmap = mempool_alloc(__get_unmap_pool(nr)->pool, flags);
+	if (!unmap)
+		return NULL;
+
+	memset(unmap, 0, sizeof(*unmap));
+	kref_init(&unmap->kref);
+	unmap->dev = dev;
+
+	return unmap;
+}
+
 /**
  * dma_async_memcpy_pg_to_pg - offloaded copy from page to page
  * @chan: DMA channel to offload copy to
@@ -922,24 +1046,34 @@ dma_async_memcpy_pg_to_pg(struct dma_chan *chan, struct page *dest_pg,
 {
 	struct dma_device *dev = chan->device;
 	struct dma_async_tx_descriptor *tx;
-	dma_addr_t dma_dest, dma_src;
+	struct dmaengine_unmap_data *unmap;
 	dma_cookie_t cookie;
 	unsigned long flags;
 
-	dma_src = dma_map_page(dev->dev, src_pg, src_off, len, DMA_TO_DEVICE);
-	dma_dest = dma_map_page(dev->dev, dest_pg, dest_off, len,
-				DMA_FROM_DEVICE);
-	flags = DMA_CTRL_ACK;
-	tx = dev->device_prep_dma_memcpy(chan, dma_dest, dma_src, len, flags);
+	unmap = dmaengine_get_unmap_data(dev->dev, 2, GFP_NOIO);
+	if (!unmap)
+		return -ENOMEM;
+
+	unmap->to_cnt = 1;
+	unmap->from_cnt = 1;
+	unmap->addr[0] = dma_map_page(dev->dev, src_pg, src_off, len,
+				      DMA_TO_DEVICE);
+	unmap->addr[1] = dma_map_page(dev->dev, dest_pg, dest_off, len,
+				      DMA_FROM_DEVICE);
+	unmap->len = len;
+	flags = DMA_CTRL_ACK | DMA_COMPL_SKIP_SRC_UNMAP |
+		DMA_COMPL_SKIP_DEST_UNMAP;
+	tx = dev->device_prep_dma_memcpy(chan, unmap->addr[1], unmap->addr[0],
+					 len, flags);
 
 	if (!tx) {
-		dma_unmap_page(dev->dev, dma_src, len, DMA_TO_DEVICE);
-		dma_unmap_page(dev->dev, dma_dest, len, DMA_FROM_DEVICE);
+		dmaengine_unmap_put(unmap);
 		return -ENOMEM;
 	}
 
-	tx->callback = NULL;
+	dma_set_unmap(tx, unmap);
 	cookie = tx->tx_submit(tx);
+	dmaengine_unmap_put(unmap);
 
 	preempt_disable();
 	__this_cpu_add(chan->local->bytes_transferred, len);
@@ -1069,6 +1203,10 @@ EXPORT_SYMBOL_GPL(dma_run_dependencies);
 
 static int __init dma_bus_init(void)
 {
+	int err = dmaengine_init_unmap_pool();
+
+	if (err)
+		return err;
 	return class_register(&dma_devclass);
 }
 arch_initcall(dma_bus_init);
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 9070050fbcd8..2fe855a7cab1 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -464,9 +464,12 @@ static inline void dma_set_unmap(struct dma_async_tx_descriptor *tx,
 	tx->unmap = unmap;
 }
 
+void dmaengine_unmap_put(struct dmaengine_unmap_data *unmap);
+
 static inline void dma_descriptor_unmap(struct dma_async_tx_descriptor *tx)
 {
 	if (tx->unmap) {
+		dmaengine_unmap_put(tx->unmap);
 		tx->unmap = NULL;
 	}
 }
-- 
cgit v1.2.3


From 82e06c811163c4d853ed335d56c3378088bc89cc Mon Sep 17 00:00:00 2001
From: Shaohua Li <shli@kernel.org>
Date: Thu, 14 Nov 2013 15:16:16 +1100
Subject: wait: add wait_event_cmd()

Add a new API wait_event_cmd(). It's a variant of wait_even() with two
commands executed. One is executed before sleep, another after sleep.

Modified to match use wait.h approach based on suggestion by
Peter Zijlstra <peterz@infradead.org> - neilb

Signed-off-by: Shaohua Li <shli@fusionio.com>
Signed-off-by: NeilBrown <neilb@suse.de>
---
 include/linux/wait.h | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/wait.h b/include/linux/wait.h
index 61939ba30aa0..eaa00b10abaa 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -278,6 +278,31 @@ do {									\
 	__ret;								\
 })
 
+#define __wait_event_cmd(wq, condition, cmd1, cmd2)			\
+	(void)___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, 0,	\
+			    cmd1; schedule(); cmd2)
+
+/**
+ * wait_event_cmd - sleep until a condition gets true
+ * @wq: the waitqueue to wait on
+ * @condition: a C expression for the event to wait for
+ * cmd1: the command will be executed before sleep
+ * cmd2: the command will be executed after sleep
+ *
+ * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
+ * @condition evaluates to true. The @condition is checked each time
+ * the waitqueue @wq is woken up.
+ *
+ * wake_up() has to be called after changing any variable that could
+ * change the result of the wait condition.
+ */
+#define wait_event_cmd(wq, condition, cmd1, cmd2)			\
+do {									\
+	if (condition)							\
+		break;							\
+	__wait_event_cmd(wq, condition, cmd1, cmd2);			\
+} while (0)
+
 #define __wait_event_interruptible(wq, condition)			\
 	___wait_event(wq, condition, TASK_INTERRUPTIBLE, 0, 0,		\
 		      schedule())
-- 
cgit v1.2.3


From f7625980f5820edd1a73536e1a03bcbc1f889fec Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Thu, 14 Nov 2013 11:28:18 -0700
Subject: PCI: Fix whitespace, capitalization, and spelling errors

Fix whitespace, capitalization, and spelling errors.  No functional change.
I know "busses" is not an error, but "buses" was more common, so I used it
consistently.

Signed-off-by: Marta Rybczynska <rybczynska@gmail.com> (pci_reset_bridge_secondary_bus())
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/pci/ats.c                       |   2 +-
 drivers/pci/host/pci-tegra.c            |  12 ++--
 drivers/pci/host/pcie-designware.c      |   2 +-
 drivers/pci/hotplug/Kconfig             |   4 +-
 drivers/pci/hotplug/Makefile            |   2 +-
 drivers/pci/hotplug/acpiphp_core.c      |  12 ++--
 drivers/pci/hotplug/acpiphp_glue.c      |   2 +-
 drivers/pci/hotplug/acpiphp_ibm.c       |  14 ++--
 drivers/pci/hotplug/cpci_hotplug_core.c |   2 +-
 drivers/pci/hotplug/cpci_hotplug_pci.c  |   2 +-
 drivers/pci/hotplug/cpcihp_generic.c    |  20 +++---
 drivers/pci/hotplug/cpcihp_zt5550.c     |  22 +++----
 drivers/pci/hotplug/cpcihp_zt5550.h     |  18 +++---
 drivers/pci/hotplug/cpqphp_core.c       |   4 +-
 drivers/pci/hotplug/cpqphp_ctrl.c       |  10 +--
 drivers/pci/hotplug/cpqphp_pci.c        |   5 +-
 drivers/pci/hotplug/ibmphp.h            |  12 ++--
 drivers/pci/hotplug/ibmphp_core.c       | 109 ++++++++++++++++----------------
 drivers/pci/hotplug/ibmphp_ebda.c       | 103 +++++++++++++++---------------
 drivers/pci/hotplug/ibmphp_hpc.c        |  24 +++----
 drivers/pci/hotplug/ibmphp_pci.c        |  71 ++++++++++-----------
 drivers/pci/hotplug/ibmphp_res.c        |  76 +++++++++++-----------
 drivers/pci/hotplug/pci_hotplug_core.c  |   8 +--
 drivers/pci/hotplug/pciehp.h            |   2 +-
 drivers/pci/hotplug/pciehp_acpi.c       |   2 +-
 drivers/pci/hotplug/pciehp_core.c       |   4 +-
 drivers/pci/hotplug/pciehp_hpc.c        |   6 +-
 drivers/pci/hotplug/pcihp_skeleton.c    |   8 +--
 drivers/pci/hotplug/rpadlpar_core.c     |   2 +-
 drivers/pci/hotplug/rpaphp.h            |   6 +-
 drivers/pci/hotplug/rpaphp_core.c       |   8 +--
 drivers/pci/hotplug/rpaphp_pci.c        |   3 +-
 drivers/pci/hotplug/rpaphp_slot.c       |  19 +++---
 drivers/pci/hotplug/shpchp.h            |   8 +--
 drivers/pci/hotplug/shpchp_core.c       |  10 +--
 drivers/pci/hotplug/shpchp_hpc.c        |   2 +-
 drivers/pci/iov.c                       |   2 +-
 drivers/pci/irq.c                       |   2 +-
 drivers/pci/msi.c                       |   2 +-
 drivers/pci/pci-acpi.c                  |   2 +-
 drivers/pci/pci-driver.c                |  16 ++---
 drivers/pci/pci-stub.c                  |   4 +-
 drivers/pci/pci-sysfs.c                 |  28 ++++----
 drivers/pci/pci.c                       |  46 +++++++-------
 drivers/pci/pcie/aer/aerdrv_core.c      |   2 +-
 drivers/pci/pcie/aspm.c                 |   2 +-
 drivers/pci/pcie/pme.c                  |   4 +-
 drivers/pci/pcie/portdrv.h              |   2 +-
 drivers/pci/pcie/portdrv_bus.c          |   4 +-
 drivers/pci/pcie/portdrv_core.c         |   2 +-
 drivers/pci/pcie/portdrv_pci.c          |   6 +-
 drivers/pci/probe.c                     |  10 +--
 drivers/pci/proc.c                      |   2 +-
 drivers/pci/quirks.c                    | 104 +++++++++++++++---------------
 drivers/pci/remove.c                    |   2 +-
 drivers/pci/search.c                    |  12 ++--
 drivers/pci/setup-bus.c                 |  18 +++---
 drivers/pci/setup-res.c                 |   2 +-
 drivers/pci/slot.c                      |   2 +-
 drivers/pci/syscall.c                   |   2 +-
 include/linux/msi.h                     |  10 +--
 include/linux/pci.h                     |  55 ++++++++--------
 include/linux/pci_hotplug.h             |   5 +-
 include/linux/pcieport_if.h             |   2 +-
 include/uapi/linux/pci_regs.h           |  72 ++++++++++-----------
 65 files changed, 518 insertions(+), 520 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c
index 95655d7c0d0b..e52d7ffa38b9 100644
--- a/drivers/pci/ats.c
+++ b/drivers/pci/ats.c
@@ -410,7 +410,7 @@ EXPORT_SYMBOL_GPL(pci_disable_pasid);
  * Otherwise is returns a bitmask with supported features. Current
  * features reported are:
  * PCI_PASID_CAP_EXEC - Execute permission supported
- * PCI_PASID_CAP_PRIV - Priviledged mode supported
+ * PCI_PASID_CAP_PRIV - Privileged mode supported
  */
 int pci_pasid_features(struct pci_dev *pdev)
 {
diff --git a/drivers/pci/host/pci-tegra.c b/drivers/pci/host/pci-tegra.c
index 7c4f38dd42ba..0afbbbc55c81 100644
--- a/drivers/pci/host/pci-tegra.c
+++ b/drivers/pci/host/pci-tegra.c
@@ -249,7 +249,7 @@ struct tegra_pcie {
 	void __iomem *afi;
 	int irq;
 
-	struct list_head busses;
+	struct list_head buses;
 	struct resource *cs;
 
 	struct resource io;
@@ -399,14 +399,14 @@ free:
 
 /*
  * Look up a virtual address mapping for the specified bus number. If no such
- * mapping existis, try to create one.
+ * mapping exists, try to create one.
  */
 static void __iomem *tegra_pcie_bus_map(struct tegra_pcie *pcie,
 					unsigned int busnr)
 {
 	struct tegra_pcie_bus *bus;
 
-	list_for_each_entry(bus, &pcie->busses, list)
+	list_for_each_entry(bus, &pcie->buses, list)
 		if (bus->nr == busnr)
 			return (void __iomem *)bus->area->addr;
 
@@ -414,7 +414,7 @@ static void __iomem *tegra_pcie_bus_map(struct tegra_pcie *pcie,
 	if (IS_ERR(bus))
 		return NULL;
 
-	list_add_tail(&bus->list, &pcie->busses);
+	list_add_tail(&bus->list, &pcie->buses);
 
 	return (void __iomem *)bus->area->addr;
 }
@@ -808,7 +808,7 @@ static int tegra_pcie_enable_controller(struct tegra_pcie *pcie)
 	value &= ~AFI_FUSE_PCIE_T0_GEN2_DIS;
 	afi_writel(pcie, value, AFI_FUSE);
 
-	/* initialze internal PHY, enable up to 16 PCIE lanes */
+	/* initialize internal PHY, enable up to 16 PCIE lanes */
 	pads_writel(pcie, 0x0, PADS_CTL_SEL);
 
 	/* override IDDQ to 1 on all 4 lanes */
@@ -1624,7 +1624,7 @@ static int tegra_pcie_probe(struct platform_device *pdev)
 	if (!pcie)
 		return -ENOMEM;
 
-	INIT_LIST_HEAD(&pcie->busses);
+	INIT_LIST_HEAD(&pcie->buses);
 	INIT_LIST_HEAD(&pcie->ports);
 	pcie->soc_data = match->data;
 	pcie->dev = &pdev->dev;
diff --git a/drivers/pci/host/pcie-designware.c b/drivers/pci/host/pcie-designware.c
index 1e1fea4d959b..e33b68be0391 100644
--- a/drivers/pci/host/pcie-designware.c
+++ b/drivers/pci/host/pcie-designware.c
@@ -197,7 +197,7 @@ static int find_valid_pos0(struct pcie_port *pp, int msgvec, int pos, int *pos0)
 			return -ENOSPC;
 		/*
 		 * Check if this position is at correct offset.nvec is always a
-		 * power of two. pos0 must be nvec bit alligned.
+		 * power of two. pos0 must be nvec bit aligned.
 		 */
 		if (pos % msgvec)
 			pos += msgvec - (pos % msgvec);
diff --git a/drivers/pci/hotplug/Kconfig b/drivers/pci/hotplug/Kconfig
index 0a648af89531..df8caec59789 100644
--- a/drivers/pci/hotplug/Kconfig
+++ b/drivers/pci/hotplug/Kconfig
@@ -133,8 +133,8 @@ config HOTPLUG_PCI_RPA_DLPAR
 
 	  To compile this driver as a module, choose M here: the
 	  module will be called rpadlpar_io.
- 
- 	  When in doubt, say N.
+
+	  When in doubt, say N.
 
 config HOTPLUG_PCI_SGI
 	tristate "SGI PCI Hotplug Support"
diff --git a/drivers/pci/hotplug/Makefile b/drivers/pci/hotplug/Makefile
index 47ec8c80e16d..3e6532b945c1 100644
--- a/drivers/pci/hotplug/Makefile
+++ b/drivers/pci/hotplug/Makefile
@@ -31,7 +31,7 @@ pci_hotplug-objs	+=	cpci_hotplug_core.o	\
 				cpci_hotplug_pci.o
 endif
 ifdef CONFIG_ACPI
-pci_hotplug-objs 	+= 	acpi_pcihp.o
+pci_hotplug-objs	+=	acpi_pcihp.o
 endif
 
 cpqphp-objs		:=	cpqphp_core.o	\
diff --git a/drivers/pci/hotplug/acpiphp_core.c b/drivers/pci/hotplug/acpiphp_core.c
index 8650d39db392..dca66bc44578 100644
--- a/drivers/pci/hotplug/acpiphp_core.c
+++ b/drivers/pci/hotplug/acpiphp_core.c
@@ -111,7 +111,7 @@ int acpiphp_register_attention(struct acpiphp_attention_info *info)
  * @info: must match the pointer used to register
  *
  * Description: This is used to un-register a hardware specific acpi
- * driver that manipulates the attention LED.  The pointer to the 
+ * driver that manipulates the attention LED.  The pointer to the
  * info struct must be the same as the one used to set it.
  */
 int acpiphp_unregister_attention(struct acpiphp_attention_info *info)
@@ -169,8 +169,8 @@ static int disable_slot(struct hotplug_slot *hotplug_slot)
  * was registered with us.  This allows hardware specific
  * ACPI implementations to blink the light for us.
  */
- static int set_attention_status(struct hotplug_slot *hotplug_slot, u8 status)
- {
+static int set_attention_status(struct hotplug_slot *hotplug_slot, u8 status)
+{
 	int retval = -ENODEV;
 
 	pr_debug("%s - physical_slot = %s\n", __func__,
@@ -182,8 +182,8 @@ static int disable_slot(struct hotplug_slot *hotplug_slot)
 	} else
 		attention_info = NULL;
 	return retval;
- }
- 
+}
+
 
 /**
  * get_power_status - get power status of a slot
@@ -323,7 +323,7 @@ int acpiphp_register_hotplug_slot(struct acpiphp_slot *acpiphp_slot,
 	if (retval) {
 		pr_err("pci_hp_register failed with error %d\n", retval);
 		goto error_hpslot;
- 	}
+	}
 
 	pr_info("Slot [%s] registered\n", slot_name(slot));
 
diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c
index 5b4e9eb0e8ff..1cf605f67673 100644
--- a/drivers/pci/hotplug/acpiphp_glue.c
+++ b/drivers/pci/hotplug/acpiphp_glue.c
@@ -325,7 +325,7 @@ static acpi_status register_slot(acpi_handle handle, u32 lvl, void *data,
 
 	list_add_tail(&slot->node, &bridge->slots);
 
-	/* Register slots for ejectable funtions only. */
+	/* Register slots for ejectable functions only. */
 	if (acpi_pci_check_ejectable(pbus, handle)  || is_dock_device(handle)) {
 		unsigned long long sun;
 		int retval;
diff --git a/drivers/pci/hotplug/acpiphp_ibm.c b/drivers/pci/hotplug/acpiphp_ibm.c
index 0d64c414bf78..ecfac7e72d91 100644
--- a/drivers/pci/hotplug/acpiphp_ibm.c
+++ b/drivers/pci/hotplug/acpiphp_ibm.c
@@ -116,7 +116,7 @@ static struct bin_attribute ibm_apci_table_attr = {
 	    .read = ibm_read_apci_table,
 	    .write = NULL,
 };
-static struct acpiphp_attention_info ibm_attention_info = 
+static struct acpiphp_attention_info ibm_attention_info =
 {
 	.set_attn = ibm_set_attention_status,
 	.get_attn = ibm_get_attention_status,
@@ -171,9 +171,9 @@ ibm_slot_done:
  */
 static int ibm_set_attention_status(struct hotplug_slot *slot, u8 status)
 {
-	union acpi_object args[2]; 
+	union acpi_object args[2];
 	struct acpi_object_list params = { .pointer = args, .count = 2 };
-	acpi_status stat; 
+	acpi_status stat;
 	unsigned long long rc;
 	union apci_descriptor *ibm_slot;
 
@@ -208,7 +208,7 @@ static int ibm_set_attention_status(struct hotplug_slot *slot, u8 status)
  *
  * Description: This method is registered with the acpiphp module as a
  * callback to do the device specific task of getting the LED status.
- * 
+ *
  * Because there is no direct method of getting the LED status directly
  * from an ACPI call, we read the aPCI table and parse out our
  * slot descriptor to read the status from that.
@@ -259,7 +259,7 @@ static void ibm_handle_events(acpi_handle handle, u32 event, void *context)
 	pr_debug("%s: Received notification %02x\n", __func__, event);
 
 	if (subevent == 0x80) {
-		pr_debug("%s: generationg bus event\n", __func__);
+		pr_debug("%s: generating bus event\n", __func__);
 		acpi_bus_generate_netlink_event(note->device->pnp.device_class,
 						  dev_name(&note->device->dev),
 						  note->event, detail);
@@ -387,7 +387,7 @@ static acpi_status __init ibm_find_acpi_device(acpi_handle handle,
 		u32 lvl, void *context, void **rv)
 {
 	acpi_handle *phandle = (acpi_handle *)context;
-	acpi_status status; 
+	acpi_status status;
 	struct acpi_device_info *info;
 	int retval = 0;
 
@@ -405,7 +405,7 @@ static acpi_status __init ibm_find_acpi_device(acpi_handle handle,
 			info->hardware_id.string, handle);
 		*phandle = handle;
 		/* returning non-zero causes the search to stop
-		 * and returns this value to the caller of 
+		 * and returns this value to the caller of
 		 * acpi_walk_namespace, but it also causes some warnings
 		 * in the acpi debug code to print...
 		 */
diff --git a/drivers/pci/hotplug/cpci_hotplug_core.c b/drivers/pci/hotplug/cpci_hotplug_core.c
index 2b4c412f94c3..00c81a3cefc9 100644
--- a/drivers/pci/hotplug/cpci_hotplug_core.c
+++ b/drivers/pci/hotplug/cpci_hotplug_core.c
@@ -46,7 +46,7 @@
 	do {							\
 		if (cpci_debug)					\
 			printk (KERN_DEBUG "%s: " format "\n",	\
-				MY_NAME , ## arg); 		\
+				MY_NAME , ## arg);		\
 	} while (0)
 #define err(format, arg...) printk(KERN_ERR "%s: " format "\n", MY_NAME , ## arg)
 #define info(format, arg...) printk(KERN_INFO "%s: " format "\n", MY_NAME , ## arg)
diff --git a/drivers/pci/hotplug/cpci_hotplug_pci.c b/drivers/pci/hotplug/cpci_hotplug_pci.c
index d8add34177f2..d3add9819f63 100644
--- a/drivers/pci/hotplug/cpci_hotplug_pci.c
+++ b/drivers/pci/hotplug/cpci_hotplug_pci.c
@@ -39,7 +39,7 @@ extern int cpci_debug;
 	do {							\
 		if (cpci_debug)					\
 			printk (KERN_DEBUG "%s: " format "\n",	\
-				MY_NAME , ## arg); 		\
+				MY_NAME , ## arg);		\
 	} while (0)
 #define err(format, arg...) printk(KERN_ERR "%s: " format "\n", MY_NAME , ## arg)
 #define info(format, arg...) printk(KERN_INFO "%s: " format "\n", MY_NAME , ## arg)
diff --git a/drivers/pci/hotplug/cpcihp_generic.c b/drivers/pci/hotplug/cpcihp_generic.c
index a6a71c41cdf8..7536eef620b0 100644
--- a/drivers/pci/hotplug/cpcihp_generic.c
+++ b/drivers/pci/hotplug/cpcihp_generic.c
@@ -13,14 +13,14 @@
  * option) any later version.
  *
  * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
- * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY 
- * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL 
- * THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 
- * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * You should have received a copy of the GNU General Public License along
@@ -53,9 +53,9 @@
 
 #define dbg(format, arg...)					\
 	do {							\
-		if(debug)					\
+		if (debug)					\
 			printk (KERN_DEBUG "%s: " format "\n",	\
-				MY_NAME , ## arg); 		\
+				MY_NAME , ## arg);		\
 	} while(0)
 #define err(format, arg...) printk(KERN_ERR "%s: " format "\n", MY_NAME , ## arg)
 #define info(format, arg...) printk(KERN_INFO "%s: " format "\n", MY_NAME , ## arg)
diff --git a/drivers/pci/hotplug/cpcihp_zt5550.c b/drivers/pci/hotplug/cpcihp_zt5550.c
index 449b4bbc8301..e8c4a7ccf578 100644
--- a/drivers/pci/hotplug/cpcihp_zt5550.c
+++ b/drivers/pci/hotplug/cpcihp_zt5550.c
@@ -13,14 +13,14 @@
  * option) any later version.
  *
  * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
- * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY 
- * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL 
- * THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 
- * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * You should have received a copy of the GNU General Public License along
@@ -48,9 +48,9 @@
 
 #define dbg(format, arg...)					\
 	do {							\
-		if(debug)					\
+		if (debug)					\
 			printk (KERN_DEBUG "%s: " format "\n",	\
-				MY_NAME , ## arg); 		\
+				MY_NAME , ## arg);		\
 	} while(0)
 #define err(format, arg...) printk(KERN_ERR "%s: " format "\n", MY_NAME , ## arg)
 #define info(format, arg...) printk(KERN_INFO "%s: " format "\n", MY_NAME , ## arg)
@@ -285,7 +285,7 @@ static struct pci_device_id zt5550_hc_pci_tbl[] = {
 	{ 0, }
 };
 MODULE_DEVICE_TABLE(pci, zt5550_hc_pci_tbl);
-	
+
 static struct pci_driver zt5550_hc_driver = {
 	.name		= "zt5550_hc",
 	.id_table	= zt5550_hc_pci_tbl,
diff --git a/drivers/pci/hotplug/cpcihp_zt5550.h b/drivers/pci/hotplug/cpcihp_zt5550.h
index bebc6060a558..9a57fda5348c 100644
--- a/drivers/pci/hotplug/cpcihp_zt5550.h
+++ b/drivers/pci/hotplug/cpcihp_zt5550.h
@@ -13,14 +13,14 @@
  * option) any later version.
  *
  * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
- * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY 
- * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL 
- * THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 
- * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * You should have received a copy of the GNU General Public License along
@@ -55,7 +55,7 @@
 #define HC_CMD_REG		0x0C
 #define ARB_CONFIG_GNT_REG	0x10
 #define ARB_CONFIG_CFG_REG	0x12
-#define ARB_CONFIG_REG	 	0x10
+#define ARB_CONFIG_REG		0x10
 #define ISOL_CONFIG_REG		0x18
 #define FAULT_STATUS_REG	0x20
 #define FAULT_CONFIG_REG	0x24
diff --git a/drivers/pci/hotplug/cpqphp_core.c b/drivers/pci/hotplug/cpqphp_core.c
index c8eaeb43fa5d..31273e155e6c 100644
--- a/drivers/pci/hotplug/cpqphp_core.c
+++ b/drivers/pci/hotplug/cpqphp_core.c
@@ -862,10 +862,10 @@ static int cpqhpc_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		goto err_disable_device;
 	}
 
-	/* Check for the proper subsystem ID's
+	/* Check for the proper subsystem IDs
 	 * Intel uses a different SSID programming model than Compaq.
 	 * For Intel, each SSID bit identifies a PHP capability.
-	 * Also Intel HPC's may have RID=0.
+	 * Also Intel HPCs may have RID=0.
 	 */
 	if ((pdev->revision <= 2) && (vendor_id != PCI_VENDOR_ID_INTEL)) {
 		err(msg_HPC_not_supported);
diff --git a/drivers/pci/hotplug/cpqphp_ctrl.c b/drivers/pci/hotplug/cpqphp_ctrl.c
index d282019cda5f..11845b796799 100644
--- a/drivers/pci/hotplug/cpqphp_ctrl.c
+++ b/drivers/pci/hotplug/cpqphp_ctrl.c
@@ -1231,7 +1231,7 @@ static u8 set_controller_speed(struct controller *ctrl, u8 adapter_speed, u8 hp_
 
 	/* Only if mode change...*/
 	if (((bus->cur_bus_speed == PCI_SPEED_66MHz) && (adapter_speed == PCI_SPEED_66MHz_PCIX)) ||
-		((bus->cur_bus_speed == PCI_SPEED_66MHz_PCIX) && (adapter_speed == PCI_SPEED_66MHz))) 
+		((bus->cur_bus_speed == PCI_SPEED_66MHz_PCIX) && (adapter_speed == PCI_SPEED_66MHz)))
 			set_SOGO(ctrl);
 
 	wait_for_ctrl_irq(ctrl);
@@ -1828,7 +1828,7 @@ static void interrupt_event_handler(struct controller *ctrl)
 
 				if (ctrl->event_queue[loop].event_type == INT_BUTTON_PRESS) {
 					dbg("button pressed\n");
-				} else if (ctrl->event_queue[loop].event_type == 
+				} else if (ctrl->event_queue[loop].event_type ==
 					   INT_BUTTON_CANCEL) {
 					dbg("button cancel\n");
 					del_timer(&p_slot->task_event);
@@ -2411,11 +2411,11 @@ static int configure_new_function(struct controller *ctrl, struct pci_func *func
 		if (rc)
 			return rc;
 
-		/* find range of busses to use */
+		/* find range of buses to use */
 		dbg("find ranges of buses to use\n");
 		bus_node = get_max_resource(&(resources->bus_head), 1);
 
-		/* If we don't have any busses to allocate, we can't continue */
+		/* If we don't have any buses to allocate, we can't continue */
 		if (!bus_node)
 			return -ENOMEM;
 
@@ -2900,7 +2900,7 @@ static int configure_new_function(struct controller *ctrl, struct pci_func *func
 
 			/* If this function needs an interrupt and we are behind
 			 * a bridge and the pin is tied to something that's
-			 * alread mapped, set this one the same */
+			 * already mapped, set this one the same */
 			if (temp_byte && resources->irqs &&
 			    (resources->irqs->valid_INT &
 			     (0x01 << ((temp_byte + resources->irqs->barber_pole - 1) & 0x03)))) {
diff --git a/drivers/pci/hotplug/cpqphp_pci.c b/drivers/pci/hotplug/cpqphp_pci.c
index 09801c6945ce..6e4a12c91adb 100644
--- a/drivers/pci/hotplug/cpqphp_pci.c
+++ b/drivers/pci/hotplug/cpqphp_pci.c
@@ -291,7 +291,7 @@ int cpqhp_get_bus_dev (struct controller *ctrl, u8 * bus_num, u8 * dev_num, u8 s
  *
  * Reads configuration for all slots in a PCI bus and saves info.
  *
- * Note:  For non-hot plug busses, the slot # saved is the device #
+ * Note:  For non-hot plug buses, the slot # saved is the device #
  *
  * returns 0 if success
  */
@@ -455,7 +455,7 @@ int cpqhp_save_config(struct controller *ctrl, int busnumber, int is_hot_plug)
  * cpqhp_save_slot_config
  *
  * Saves configuration info for all PCI devices in a given slot
- * including subordinate busses.
+ * including subordinate buses.
  *
  * returns 0 if success
  */
@@ -1556,4 +1556,3 @@ void cpqhp_destroy_board_resources (struct pci_func * func)
 		kfree(tres);
 	}
 }
-
diff --git a/drivers/pci/hotplug/ibmphp.h b/drivers/pci/hotplug/ibmphp.h
index 8c5b25871d02..e3e46a7b3ee7 100644
--- a/drivers/pci/hotplug/ibmphp.h
+++ b/drivers/pci/hotplug/ibmphp.h
@@ -59,7 +59,7 @@ extern int ibmphp_debug;
 
 
 /************************************************************
-*  RESOURE TYPE                                             *
+*  RESOURCE TYPE                                             *
 ************************************************************/
 
 #define EBDA_RSRC_TYPE_MASK		0x03
@@ -103,7 +103,7 @@ extern int ibmphp_debug;
 //--------------------------------------------------------------
 
 struct rio_table_hdr {
-	u8 ver_num; 
+	u8 ver_num;
 	u8 scal_count;
 	u8 riodev_count;
 	u16 offset;
@@ -127,7 +127,7 @@ struct scal_detail {
 };
 
 //--------------------------------------------------------------
-// RIO DETAIL 
+// RIO DETAIL
 //--------------------------------------------------------------
 
 struct rio_detail {
@@ -152,7 +152,7 @@ struct opt_rio {
 	u8 first_slot_num;
 	u8 middle_num;
 	struct list_head opt_rio_list;
-};	
+};
 
 struct opt_rio_lo {
 	u8 rio_type;
@@ -161,7 +161,7 @@ struct opt_rio_lo {
 	u8 middle_num;
 	u8 pack_count;
 	struct list_head opt_rio_lo_list;
-};	
+};
 
 /****************************************************************
 *  HPC DESCRIPTOR NODE                                          *
@@ -574,7 +574,7 @@ void ibmphp_hpc_stop_poll_thread(void);
 #define HPC_CTLR_IRQ_PENDG	0x80
 
 //----------------------------------------------------------------------------
-// HPC_CTLR_WROKING status return codes
+// HPC_CTLR_WORKING status return codes
 //----------------------------------------------------------------------------
 #define HPC_CTLR_WORKING_NO	0x00
 #define HPC_CTLR_WORKING_YES	0x01
diff --git a/drivers/pci/hotplug/ibmphp_core.c b/drivers/pci/hotplug/ibmphp_core.c
index cbd72d81d253..efdc13adbe41 100644
--- a/drivers/pci/hotplug/ibmphp_core.c
+++ b/drivers/pci/hotplug/ibmphp_core.c
@@ -58,7 +58,7 @@ MODULE_DESCRIPTION (DRIVER_DESC);
 struct pci_bus *ibmphp_pci_bus;
 static int max_slots;
 
-static int irqs[16];    /* PIC mode IRQ's we're using so far (in case MPS
+static int irqs[16];    /* PIC mode IRQs we're using so far (in case MPS
 			 * tables don't provide default info for empty slots */
 
 static int init_flag;
@@ -71,20 +71,20 @@ static inline int get_max_adapter_speed (struct hotplug_slot *hs, u8 *value)
 	return get_max_adapter_speed_1 (hs, value, 1);
 }
 */
-static inline int get_cur_bus_info(struct slot **sl) 
+static inline int get_cur_bus_info(struct slot **sl)
 {
 	int rc = 1;
 	struct slot * slot_cur = *sl;
 
 	debug("options = %x\n", slot_cur->ctrl->options);
-	debug("revision = %x\n", slot_cur->ctrl->revision);	
+	debug("revision = %x\n", slot_cur->ctrl->revision);
 
-	if (READ_BUS_STATUS(slot_cur->ctrl)) 
+	if (READ_BUS_STATUS(slot_cur->ctrl))
 		rc = ibmphp_hpc_readslot(slot_cur, READ_BUSSTATUS, NULL);
-	
-	if (rc) 
+
+	if (rc)
 		return rc;
-	  
+
 	slot_cur->bus_on->current_speed = CURRENT_BUS_SPEED(slot_cur->busstatus);
 	if (READ_BUS_MODE(slot_cur->ctrl))
 		slot_cur->bus_on->current_bus_mode =
@@ -96,7 +96,7 @@ static inline int get_cur_bus_info(struct slot **sl)
 			slot_cur->busstatus,
 			slot_cur->bus_on->current_speed,
 			slot_cur->bus_on->current_bus_mode);
-	
+
 	*sl = slot_cur;
 	return 0;
 }
@@ -104,8 +104,8 @@ static inline int get_cur_bus_info(struct slot **sl)
 static inline int slot_update(struct slot **sl)
 {
 	int rc;
- 	rc = ibmphp_hpc_readslot(*sl, READ_ALLSTAT, NULL);
-	if (rc) 
+	rc = ibmphp_hpc_readslot(*sl, READ_ALLSTAT, NULL);
+	if (rc)
 		return rc;
 	if (!init_flag)
 		rc = get_cur_bus_info(sl);
@@ -172,7 +172,7 @@ int ibmphp_init_devno(struct slot **cur_slot)
 			debug("(*cur_slot)->irq[3] = %x\n",
 					(*cur_slot)->irq[3]);
 
-			debug("rtable->exlusive_irqs = %x\n",
+			debug("rtable->exclusive_irqs = %x\n",
 					rtable->exclusive_irqs);
 			debug("rtable->slots[loop].irq[0].bitmap = %x\n",
 					rtable->slots[loop].irq[0].bitmap);
@@ -271,7 +271,7 @@ static int set_attention_status(struct hotplug_slot *hotplug_slot, u8 value)
 			else
 				rc = -ENODEV;
 		}
-	} else	
+	} else
 		rc = -ENODEV;
 
 	ibmphp_unlock_operations();
@@ -288,7 +288,7 @@ static int get_attention_status(struct hotplug_slot *hotplug_slot, u8 * value)
 
 	debug("get_attention_status - Entry hotplug_slot[%lx] pvalue[%lx]\n",
 					(ulong) hotplug_slot, (ulong) value);
-        
+
 	ibmphp_lock_operations();
 	if (hotplug_slot) {
 		pslot = hotplug_slot->private;
@@ -406,14 +406,14 @@ static int get_max_bus_speed(struct slot *slot)
 
 	ibmphp_lock_operations();
 	mode = slot->supported_bus_mode;
-	speed = slot->supported_speed; 
+	speed = slot->supported_speed;
 	ibmphp_unlock_operations();
 
 	switch (speed) {
 	case BUS_SPEED_33:
 		break;
 	case BUS_SPEED_66:
-		if (mode == BUS_MODE_PCIX) 
+		if (mode == BUS_MODE_PCIX)
 			speed += 0x01;
 		break;
 	case BUS_SPEED_100:
@@ -515,13 +515,13 @@ static int __init init_ops(void)
 
 		debug("BEFORE GETTING SLOT STATUS, slot # %x\n",
 							slot_cur->number);
-		if (slot_cur->ctrl->revision == 0xFF) 
+		if (slot_cur->ctrl->revision == 0xFF)
 			if (get_ctrl_revision(slot_cur,
 						&slot_cur->ctrl->revision))
 				return -1;
 
-		if (slot_cur->bus_on->current_speed == 0xFF) 
-			if (get_cur_bus_info(&slot_cur)) 
+		if (slot_cur->bus_on->current_speed == 0xFF)
+			if (get_cur_bus_info(&slot_cur))
 				return -1;
 		get_max_bus_speed(slot_cur);
 
@@ -539,8 +539,8 @@ static int __init init_ops(void)
 		debug("SLOT_PRESENT = %x\n", SLOT_PRESENT(slot_cur->status));
 		debug("SLOT_LATCH = %x\n", SLOT_LATCH(slot_cur->status));
 
-		if ((SLOT_PWRGD(slot_cur->status)) && 
-		    !(SLOT_PRESENT(slot_cur->status)) && 
+		if ((SLOT_PWRGD(slot_cur->status)) &&
+		    !(SLOT_PRESENT(slot_cur->status)) &&
 		    !(SLOT_LATCH(slot_cur->status))) {
 			debug("BEFORE POWER OFF COMMAND\n");
 				rc = power_off(slot_cur);
@@ -581,13 +581,13 @@ static int validate(struct slot *slot_cur, int opn)
 
 	switch (opn) {
 		case ENABLE:
-			if (!(SLOT_PWRGD(slot_cur->status)) && 
-			     (SLOT_PRESENT(slot_cur->status)) && 
+			if (!(SLOT_PWRGD(slot_cur->status)) &&
+			     (SLOT_PRESENT(slot_cur->status)) &&
 			     !(SLOT_LATCH(slot_cur->status)))
 				return 0;
 			break;
 		case DISABLE:
-			if ((SLOT_PWRGD(slot_cur->status)) && 
+			if ((SLOT_PWRGD(slot_cur->status)) &&
 			    (SLOT_PRESENT(slot_cur->status)) &&
 			    !(SLOT_LATCH(slot_cur->status)))
 				return 0;
@@ -617,7 +617,7 @@ int ibmphp_update_slot_info(struct slot *slot_cur)
 		err("out of system memory\n");
 		return -ENOMEM;
 	}
-        
+
 	info->power_status = SLOT_PWRGD(slot_cur->status);
 	info->attention_status = SLOT_ATTN(slot_cur->status,
 						slot_cur->ext_status);
@@ -638,7 +638,7 @@ int ibmphp_update_slot_info(struct slot *slot_cur)
 		case BUS_SPEED_33:
 			break;
 		case BUS_SPEED_66:
-			if (mode == BUS_MODE_PCIX) 
+			if (mode == BUS_MODE_PCIX)
 				bus_speed += 0x01;
 			else if (mode == BUS_MODE_PCI)
 				;
@@ -654,8 +654,8 @@ int ibmphp_update_slot_info(struct slot *slot_cur)
 	}
 
 	bus->cur_bus_speed = bus_speed;
-	// To do: bus_names 
-	
+	// To do: bus_names
+
 	rc = pci_hp_change_slot_info(slot_cur->hotplug_slot, info);
 	kfree(info);
 	return rc;
@@ -729,8 +729,8 @@ static void ibm_unconfigure_device(struct pci_func *func)
 }
 
 /*
- * The following function is to fix kernel bug regarding 
- * getting bus entries, here we manually add those primary 
+ * The following function is to fix kernel bug regarding
+ * getting bus entries, here we manually add those primary
  * bus entries to kernel bus structure whenever apply
  */
 static u8 bus_structure_fixup(u8 busno)
@@ -814,7 +814,7 @@ static int ibm_configure_device(struct pci_func *func)
 }
 
 /*******************************************************
- * Returns whether the bus is empty or not 
+ * Returns whether the bus is empty or not
  *******************************************************/
 static int is_bus_empty(struct slot * slot_cur)
 {
@@ -842,7 +842,7 @@ static int is_bus_empty(struct slot * slot_cur)
 }
 
 /***********************************************************
- * If the HPC permits and the bus currently empty, tries to set the 
+ * If the HPC permits and the bus currently empty, tries to set the
  * bus speed and mode at the maximum card and bus capability
  * Parameters: slot
  * Returns: bus is set (0) or error code
@@ -856,7 +856,7 @@ static int set_bus(struct slot * slot_cur)
 	static struct pci_device_id ciobx[] = {
 		{ PCI_DEVICE(PCI_VENDOR_ID_SERVERWORKS, 0x0101) },
 	        { },
-	};	
+	};
 
 	debug("%s - entry slot # %d\n", __func__, slot_cur->number);
 	if (SET_BUS_STATUS(slot_cur->ctrl) && is_bus_empty(slot_cur)) {
@@ -877,7 +877,7 @@ static int set_bus(struct slot * slot_cur)
 				else if (!SLOT_BUS_MODE(slot_cur->ext_status))
 					/* if max slot/bus capability is 66 pci
 					and there's no bus mode mismatch, then
-					the adapter supports 66 pci */ 
+					the adapter supports 66 pci */
 					cmd = HPC_BUS_66CONVMODE;
 				else
 					cmd = HPC_BUS_33CONVMODE;
@@ -930,7 +930,7 @@ static int set_bus(struct slot * slot_cur)
 			return -EIO;
 		}
 	}
-	/* This is for x440, once Brandon fixes the firmware, 
+	/* This is for x440, once Brandon fixes the firmware,
 	will not need this delay */
 	msleep(1000);
 	debug("%s -Exit\n", __func__);
@@ -938,9 +938,9 @@ static int set_bus(struct slot * slot_cur)
 }
 
 /* This routine checks the bus limitations that the slot is on from the BIOS.
- * This is used in deciding whether or not to power up the slot.  
+ * This is used in deciding whether or not to power up the slot.
  * (electrical/spec limitations. For example, >1 133 MHz or >2 66 PCI cards on
- * same bus) 
+ * same bus)
  * Parameters: slot
  * Returns: 0 = no limitations, -EINVAL = exceeded limitations on the bus
  */
@@ -986,7 +986,7 @@ static int check_limitations(struct slot *slot_cur)
 static inline void print_card_capability(struct slot *slot_cur)
 {
 	info("capability of the card is ");
-	if ((slot_cur->ext_status & CARD_INFO) == PCIX133) 
+	if ((slot_cur->ext_status & CARD_INFO) == PCIX133)
 		info("   133 MHz PCI-X\n");
 	else if ((slot_cur->ext_status & CARD_INFO) == PCIX66)
 		info("    66 MHz PCI-X\n");
@@ -1020,7 +1020,7 @@ static int enable_slot(struct hotplug_slot *hs)
 	}
 
 	attn_LED_blink(slot_cur);
-	
+
 	rc = set_bus(slot_cur);
 	if (rc) {
 		err("was not able to set the bus\n");
@@ -1082,7 +1082,7 @@ static int enable_slot(struct hotplug_slot *hs)
 	rc = slot_update(&slot_cur);
 	if (rc)
 		goto error_power;
-	
+
 	rc = -EINVAL;
 	if (SLOT_POWER(slot_cur->status) && !(SLOT_PWRGD(slot_cur->status))) {
 		err("power fault occurred trying to power up...\n");
@@ -1093,7 +1093,7 @@ static int enable_slot(struct hotplug_slot *hs)
 					"speed and card capability\n");
 		print_card_capability(slot_cur);
 		goto error_power;
-	} 
+	}
 	/* Don't think this case will happen after above checks...
 	 * but just in case, for paranoia sake */
 	if (!(SLOT_POWER(slot_cur->status))) {
@@ -1144,7 +1144,7 @@ static int enable_slot(struct hotplug_slot *hs)
 	ibmphp_print_test();
 	rc = ibmphp_update_slot_info(slot_cur);
 exit:
-	ibmphp_unlock_operations(); 
+	ibmphp_unlock_operations();
 	return rc;
 
 error_nopower:
@@ -1180,7 +1180,7 @@ static int ibmphp_disable_slot(struct hotplug_slot *hotplug_slot)
 {
 	struct slot *slot = hotplug_slot->private;
 	int rc;
-	
+
 	ibmphp_lock_operations();
 	rc = ibmphp_do_disable_slot(slot);
 	ibmphp_unlock_operations();
@@ -1192,12 +1192,12 @@ int ibmphp_do_disable_slot(struct slot *slot_cur)
 	int rc;
 	u8 flag;
 
-	debug("DISABLING SLOT...\n"); 
-		
+	debug("DISABLING SLOT...\n");
+
 	if ((slot_cur == NULL) || (slot_cur->ctrl == NULL)) {
 		return -ENODEV;
 	}
-	
+
 	flag = slot_cur->flag;
 	slot_cur->flag = 1;
 
@@ -1210,7 +1210,7 @@ int ibmphp_do_disable_slot(struct slot *slot_cur)
 	attn_LED_blink(slot_cur);
 
 	if (slot_cur->func == NULL) {
-		/* We need this for fncs's that were there on bootup */
+		/* We need this for functions that were there on bootup */
 		slot_cur->func = kzalloc(sizeof(struct pci_func), GFP_KERNEL);
 		if (!slot_cur->func) {
 			err("out of system memory\n");
@@ -1222,12 +1222,13 @@ int ibmphp_do_disable_slot(struct slot *slot_cur)
 	}
 
 	ibm_unconfigure_device(slot_cur->func);
-        
-	/* If we got here from latch suddenly opening on operating card or 
-	a power fault, there's no power to the card, so cannot
-	read from it to determine what resources it occupied.  This operation
-	is forbidden anyhow.  The best we can do is remove it from kernel
-	lists at least */
+
+	/*
+	 * If we got here from latch suddenly opening on operating card or
+	 * a power fault, there's no power to the card, so cannot
+	 * read from it to determine what resources it occupied.  This operation
+	 * is forbidden anyhow.  The best we can do is remove it from kernel
+	 * lists at least */
 
 	if (!flag) {
 		attn_off(slot_cur);
@@ -1264,7 +1265,7 @@ error:
 		rc = -EFAULT;
 		goto exit;
 	}
-	if (flag)		
+	if (flag)
 		ibmphp_update_slot_info(slot_cur);
 	goto exit;
 }
@@ -1339,7 +1340,7 @@ static int __init ibmphp_init(void)
 	debug("AFTER Resource & EBDA INITIALIZATIONS\n");
 
 	max_slots = get_max_slots();
-	
+
 	if ((rc = ibmphp_register_pci()))
 		goto error;
 
diff --git a/drivers/pci/hotplug/ibmphp_ebda.c b/drivers/pci/hotplug/ibmphp_ebda.c
index 9df78bc14541..bd044158b36c 100644
--- a/drivers/pci/hotplug/ibmphp_ebda.c
+++ b/drivers/pci/hotplug/ibmphp_ebda.c
@@ -123,7 +123,7 @@ static struct ebda_pci_rsrc *alloc_ebda_pci_rsrc (void)
 static void __init print_bus_info (void)
 {
 	struct bus_info *ptr;
-	
+
 	list_for_each_entry(ptr, &bus_info_head, bus_info_list) {
 		debug ("%s - slot_min = %x\n", __func__, ptr->slot_min);
 		debug ("%s - slot_max = %x\n", __func__, ptr->slot_max);
@@ -131,7 +131,7 @@ static void __init print_bus_info (void)
 		debug ("%s - bus# = %x\n", __func__, ptr->busno);
 		debug ("%s - current_speed = %x\n", __func__, ptr->current_speed);
 		debug ("%s - controller_id = %x\n", __func__, ptr->controller_id);
-		
+
 		debug ("%s - slots_at_33_conv = %x\n", __func__, ptr->slots_at_33_conv);
 		debug ("%s - slots_at_66_conv = %x\n", __func__, ptr->slots_at_66_conv);
 		debug ("%s - slots_at_66_pcix = %x\n", __func__, ptr->slots_at_66_pcix);
@@ -144,7 +144,7 @@ static void __init print_bus_info (void)
 static void print_lo_info (void)
 {
 	struct rio_detail *ptr;
-	debug ("print_lo_info ----\n");	
+	debug ("print_lo_info ----\n");
 	list_for_each_entry(ptr, &rio_lo_head, rio_detail_list) {
 		debug ("%s - rio_node_id = %x\n", __func__, ptr->rio_node_id);
 		debug ("%s - rio_type = %x\n", __func__, ptr->rio_type);
@@ -176,7 +176,7 @@ static void __init print_ebda_pci_rsrc (void)
 	struct ebda_pci_rsrc *ptr;
 
 	list_for_each_entry(ptr, &ibmphp_ebda_pci_rsrc_head, ebda_pci_rsrc_list) {
-		debug ("%s - rsrc type: %x bus#: %x dev_func: %x start addr: %x end addr: %x\n", 
+		debug ("%s - rsrc type: %x bus#: %x dev_func: %x start addr: %x end addr: %x\n",
 			__func__, ptr->rsrc_type ,ptr->bus_num, ptr->dev_fun,ptr->start_addr, ptr->end_addr);
 	}
 }
@@ -259,7 +259,7 @@ int __init ibmphp_access_ebda (void)
 	ebda_seg = readw (io_mem);
 	iounmap (io_mem);
 	debug ("returned ebda segment: %x\n", ebda_seg);
-	
+
 	io_mem = ioremap(ebda_seg<<4, 1);
 	if (!io_mem)
 		return -ENOMEM;
@@ -310,7 +310,7 @@ int __init ibmphp_access_ebda (void)
 			re = readw (io_mem + sub_addr);	/* next sub blk */
 
 			sub_addr += 2;
-			rc_id = readw (io_mem + sub_addr); 	/* sub blk id */
+			rc_id = readw (io_mem + sub_addr);	/* sub blk id */
 
 			sub_addr += 2;
 			if (rc_id != 0x5243)
@@ -330,7 +330,7 @@ int __init ibmphp_access_ebda (void)
 			debug ("info about hpc descriptor---\n");
 			debug ("hot blk format: %x\n", format);
 			debug ("num of controller: %x\n", num_ctlrs);
-			debug ("offset of hpc data structure enteries: %x\n ", sub_addr);
+			debug ("offset of hpc data structure entries: %x\n ", sub_addr);
 
 			sub_addr = base + re;	/* re sub blk */
 			/* FIXME: rc is never used/checked */
@@ -359,7 +359,7 @@ int __init ibmphp_access_ebda (void)
 			debug ("info about rsrc descriptor---\n");
 			debug ("format: %x\n", format);
 			debug ("num of rsrc: %x\n", num_entries);
-			debug ("offset of rsrc data structure enteries: %x\n ", sub_addr);
+			debug ("offset of rsrc data structure entries: %x\n ", sub_addr);
 
 			hs_complete = 1;
 		} else {
@@ -376,7 +376,7 @@ int __init ibmphp_access_ebda (void)
 			rio_table_ptr->scal_count = readb (io_mem + offset + 1);
 			rio_table_ptr->riodev_count = readb (io_mem + offset + 2);
 			rio_table_ptr->offset = offset +3 ;
-			
+
 			debug("info about rio table hdr ---\n");
 			debug("ver_num: %x\nscal_count: %x\nriodev_count: %x\noffset of rio table: %x\n ",
 				rio_table_ptr->ver_num, rio_table_ptr->scal_count,
@@ -440,12 +440,12 @@ static int __init ebda_rio_table (void)
 		rio_detail_ptr->chassis_num = readb (io_mem + offset + 14);
 //		debug ("rio_node_id: %x\nbbar: %x\nrio_type: %x\nowner_id: %x\nport0_node: %x\nport0_port: %x\nport1_node: %x\nport1_port: %x\nfirst_slot_num: %x\nstatus: %x\n", rio_detail_ptr->rio_node_id, rio_detail_ptr->bbar, rio_detail_ptr->rio_type, rio_detail_ptr->owner_id, rio_detail_ptr->port0_node_connect, rio_detail_ptr->port0_port_connect, rio_detail_ptr->port1_node_connect, rio_detail_ptr->port1_port_connect, rio_detail_ptr->first_slot_num, rio_detail_ptr->status);
 		//create linked list of chassis
-		if (rio_detail_ptr->rio_type == 4 || rio_detail_ptr->rio_type == 5) 
+		if (rio_detail_ptr->rio_type == 4 || rio_detail_ptr->rio_type == 5)
 			list_add (&rio_detail_ptr->rio_detail_list, &rio_vg_head);
-		//create linked list of expansion box				
-		else if (rio_detail_ptr->rio_type == 6 || rio_detail_ptr->rio_type == 7) 
+		//create linked list of expansion box
+		else if (rio_detail_ptr->rio_type == 6 || rio_detail_ptr->rio_type == 7)
 			list_add (&rio_detail_ptr->rio_detail_list, &rio_lo_head);
-		else 
+		else
 			// not in my concern
 			kfree (rio_detail_ptr);
 		offset += 15;
@@ -456,7 +456,7 @@ static int __init ebda_rio_table (void)
 }
 
 /*
- * reorganizing linked list of chassis	 
+ * reorganizing linked list of chassis
  */
 static struct opt_rio *search_opt_vg (u8 chassis_num)
 {
@@ -464,7 +464,7 @@ static struct opt_rio *search_opt_vg (u8 chassis_num)
 	list_for_each_entry(ptr, &opt_vg_head, opt_rio_list) {
 		if (ptr->chassis_num == chassis_num)
 			return ptr;
-	}		
+	}
 	return NULL;
 }
 
@@ -472,7 +472,7 @@ static int __init combine_wpg_for_chassis (void)
 {
 	struct opt_rio *opt_rio_ptr = NULL;
 	struct rio_detail *rio_detail_ptr = NULL;
-	
+
 	list_for_each_entry(rio_detail_ptr, &rio_vg_head, rio_detail_list) {
 		opt_rio_ptr = search_opt_vg (rio_detail_ptr->chassis_num);
 		if (!opt_rio_ptr) {
@@ -484,14 +484,14 @@ static int __init combine_wpg_for_chassis (void)
 			opt_rio_ptr->first_slot_num = rio_detail_ptr->first_slot_num;
 			opt_rio_ptr->middle_num = rio_detail_ptr->first_slot_num;
 			list_add (&opt_rio_ptr->opt_rio_list, &opt_vg_head);
-		} else {	
+		} else {
 			opt_rio_ptr->first_slot_num = min (opt_rio_ptr->first_slot_num, rio_detail_ptr->first_slot_num);
 			opt_rio_ptr->middle_num = max (opt_rio_ptr->middle_num, rio_detail_ptr->first_slot_num);
-		}	
+		}
 	}
 	print_opt_vg ();
-	return 0;	
-}	
+	return 0;
+}
 
 /*
  * reorganizing linked list of expansion box
@@ -502,7 +502,7 @@ static struct opt_rio_lo *search_opt_lo (u8 chassis_num)
 	list_for_each_entry(ptr, &opt_lo_head, opt_rio_lo_list) {
 		if (ptr->chassis_num == chassis_num)
 			return ptr;
-	}		
+	}
 	return NULL;
 }
 
@@ -510,7 +510,7 @@ static int combine_wpg_for_expansion (void)
 {
 	struct opt_rio_lo *opt_rio_lo_ptr = NULL;
 	struct rio_detail *rio_detail_ptr = NULL;
-	
+
 	list_for_each_entry(rio_detail_ptr, &rio_lo_head, rio_detail_list) {
 		opt_rio_lo_ptr = search_opt_lo (rio_detail_ptr->chassis_num);
 		if (!opt_rio_lo_ptr) {
@@ -522,22 +522,22 @@ static int combine_wpg_for_expansion (void)
 			opt_rio_lo_ptr->first_slot_num = rio_detail_ptr->first_slot_num;
 			opt_rio_lo_ptr->middle_num = rio_detail_ptr->first_slot_num;
 			opt_rio_lo_ptr->pack_count = 1;
-			
+
 			list_add (&opt_rio_lo_ptr->opt_rio_lo_list, &opt_lo_head);
-		} else {	
+		} else {
 			opt_rio_lo_ptr->first_slot_num = min (opt_rio_lo_ptr->first_slot_num, rio_detail_ptr->first_slot_num);
 			opt_rio_lo_ptr->middle_num = max (opt_rio_lo_ptr->middle_num, rio_detail_ptr->first_slot_num);
 			opt_rio_lo_ptr->pack_count = 2;
-		}	
+		}
 	}
-	return 0;	
+	return 0;
 }
-	
+
 
 /* Since we don't know the max slot number per each chassis, hence go
  * through the list of all chassis to find out the range
- * Arguments: slot_num, 1st slot number of the chassis we think we are on, 
- * var (0 = chassis, 1 = expansion box) 
+ * Arguments: slot_num, 1st slot number of the chassis we think we are on,
+ * var (0 = chassis, 1 = expansion box)
  */
 static int first_slot_num (u8 slot_num, u8 first_slot, u8 var)
 {
@@ -547,7 +547,7 @@ static int first_slot_num (u8 slot_num, u8 first_slot, u8 var)
 
 	if (!var) {
 		list_for_each_entry(opt_vg_ptr, &opt_vg_head, opt_rio_list) {
-			if ((first_slot < opt_vg_ptr->first_slot_num) && (slot_num >= opt_vg_ptr->first_slot_num)) { 
+			if ((first_slot < opt_vg_ptr->first_slot_num) && (slot_num >= opt_vg_ptr->first_slot_num)) {
 				rc = -ENODEV;
 				break;
 			}
@@ -569,7 +569,7 @@ static struct opt_rio_lo * find_rxe_num (u8 slot_num)
 
 	list_for_each_entry(opt_lo_ptr, &opt_lo_head, opt_rio_lo_list) {
 		//check to see if this slot_num belongs to expansion box
-		if ((slot_num >= opt_lo_ptr->first_slot_num) && (!first_slot_num (slot_num, opt_lo_ptr->first_slot_num, 1))) 
+		if ((slot_num >= opt_lo_ptr->first_slot_num) && (!first_slot_num (slot_num, opt_lo_ptr->first_slot_num, 1)))
 			return opt_lo_ptr;
 	}
 	return NULL;
@@ -580,8 +580,8 @@ static struct opt_rio * find_chassis_num (u8 slot_num)
 	struct opt_rio *opt_vg_ptr;
 
 	list_for_each_entry(opt_vg_ptr, &opt_vg_head, opt_rio_list) {
-		//check to see if this slot_num belongs to chassis 
-		if ((slot_num >= opt_vg_ptr->first_slot_num) && (!first_slot_num (slot_num, opt_vg_ptr->first_slot_num, 0))) 
+		//check to see if this slot_num belongs to chassis
+		if ((slot_num >= opt_vg_ptr->first_slot_num) && (!first_slot_num (slot_num, opt_vg_ptr->first_slot_num, 0)))
 			return opt_vg_ptr;
 	}
 	return NULL;
@@ -594,13 +594,13 @@ static u8 calculate_first_slot (u8 slot_num)
 {
 	u8 first_slot = 1;
 	struct slot * slot_cur;
-	
+
 	list_for_each_entry(slot_cur, &ibmphp_slot_head, ibm_slot_list) {
 		if (slot_cur->ctrl) {
-			if ((slot_cur->ctrl->ctlr_type != 4) && (slot_cur->ctrl->ending_slot_num > first_slot) && (slot_num > slot_cur->ctrl->ending_slot_num)) 
+			if ((slot_cur->ctrl->ctlr_type != 4) && (slot_cur->ctrl->ending_slot_num > first_slot) && (slot_num > slot_cur->ctrl->ending_slot_num))
 				first_slot = slot_cur->ctrl->ending_slot_num;
 		}
-	}			
+	}
 	return first_slot + 1;
 
 }
@@ -622,11 +622,11 @@ static char *create_file_name (struct slot * slot_cur)
 		err ("Structure passed is empty\n");
 		return NULL;
 	}
-	
+
 	slot_num = slot_cur->number;
 
 	memset (str, 0, sizeof(str));
-	
+
 	if (rio_table_ptr) {
 		if (rio_table_ptr->ver_num == 3) {
 			opt_vg_ptr = find_chassis_num (slot_num);
@@ -660,7 +660,7 @@ static char *create_file_name (struct slot * slot_cur)
 			/* if both NULL and we DO have correct RIO table in BIOS */
 			return NULL;
 		}
-	} 
+	}
 	if (!flag) {
 		if (slot_cur->ctrl->ctlr_type == 4) {
 			first_slot = calculate_first_slot (slot_num);
@@ -798,7 +798,7 @@ static int __init ebda_rsrc_controller (void)
 			slot_ptr->ctl_index = readb (io_mem + addr_slot + 2*slot_num);
 			slot_ptr->slot_cap = readb (io_mem + addr_slot + 3*slot_num);
 
-			// create bus_info lined list --- if only one slot per bus: slot_min = slot_max 
+			// create bus_info lined list --- if only one slot per bus: slot_min = slot_max
 
 			bus_info_ptr2 = ibmphp_find_same_bus_num (slot_ptr->slot_bus_num);
 			if (!bus_info_ptr2) {
@@ -814,9 +814,9 @@ static int __init ebda_rsrc_controller (void)
 				bus_info_ptr1->index = bus_index++;
 				bus_info_ptr1->current_speed = 0xff;
 				bus_info_ptr1->current_bus_mode = 0xff;
-				
+
 				bus_info_ptr1->controller_id = hpc_ptr->ctlr_id;
-				
+
 				list_add_tail (&bus_info_ptr1->bus_info_list, &bus_info_head);
 
 			} else {
@@ -851,7 +851,7 @@ static int __init ebda_rsrc_controller (void)
 				bus_info_ptr2->slots_at_66_conv = bus_ptr->slots_at_66_conv;
 				bus_info_ptr2->slots_at_66_pcix = bus_ptr->slots_at_66_pcix;
 				bus_info_ptr2->slots_at_100_pcix = bus_ptr->slots_at_100_pcix;
-				bus_info_ptr2->slots_at_133_pcix = bus_ptr->slots_at_133_pcix; 
+				bus_info_ptr2->slots_at_133_pcix = bus_ptr->slots_at_133_pcix;
 			}
 			bus_ptr++;
 		}
@@ -864,7 +864,7 @@ static int __init ebda_rsrc_controller (void)
 				hpc_ptr->u.pci_ctlr.dev_fun = readb (io_mem + addr + 1);
 				hpc_ptr->irq = readb (io_mem + addr + 2);
 				addr += 3;
-				debug ("ctrl bus = %x, ctlr devfun = %x, irq = %x\n", 
+				debug ("ctrl bus = %x, ctlr devfun = %x, irq = %x\n",
 					hpc_ptr->u.pci_ctlr.bus,
 					hpc_ptr->u.pci_ctlr.dev_fun, hpc_ptr->irq);
 				break;
@@ -932,7 +932,7 @@ static int __init ebda_rsrc_controller (void)
 				tmp_slot->supported_speed =  2;
 			else if ((hpc_ptr->slots[index].slot_cap & EBDA_SLOT_66_MAX) == EBDA_SLOT_66_MAX)
 				tmp_slot->supported_speed =  1;
-				
+
 			if ((hpc_ptr->slots[index].slot_cap & EBDA_SLOT_PCIX_CAP) == EBDA_SLOT_PCIX_CAP)
 				tmp_slot->supported_bus_mode = 1;
 			else
@@ -1000,7 +1000,7 @@ error_no_hpc:
 	return rc;
 }
 
-/* 
+/*
  * map info (bus, devfun, start addr, end addr..) of i/o, memory,
  * pfm from the physical addr to a list of resource.
  */
@@ -1057,7 +1057,7 @@ static int __init ebda_rsrc_rsrc (void)
 			addr += 10;
 
 			debug ("rsrc from mem or pfm ---\n");
-			debug ("rsrc type: %x bus#: %x dev_func: %x start addr: %x end addr: %x\n", 
+			debug ("rsrc type: %x bus#: %x dev_func: %x start addr: %x end addr: %x\n",
 				rsrc_ptr->rsrc_type, rsrc_ptr->bus_num, rsrc_ptr->dev_fun, rsrc_ptr->start_addr, rsrc_ptr->end_addr);
 
 			list_add (&rsrc_ptr->ebda_pci_rsrc_list, &ibmphp_ebda_pci_rsrc_head);
@@ -1096,7 +1096,7 @@ struct bus_info *ibmphp_find_same_bus_num (u32 num)
 	struct bus_info *ptr;
 
 	list_for_each_entry(ptr, &bus_info_head, bus_info_list) {
-		if (ptr->busno == num) 
+		if (ptr->busno == num)
 			 return ptr;
 	}
 	return NULL;
@@ -1110,7 +1110,7 @@ int ibmphp_get_bus_index (u8 num)
 	struct bus_info *ptr;
 
 	list_for_each_entry(ptr, &bus_info_head, bus_info_list) {
-		if (ptr->busno == num)  
+		if (ptr->busno == num)
 			return ptr->index;
 	}
 	return -ENODEV;
@@ -1168,7 +1168,7 @@ static struct pci_device_id id_table[] = {
 		.subdevice	= HPC_SUBSYSTEM_ID,
 		.class		= ((PCI_CLASS_SYSTEM_PCI_HOTPLUG << 8) | 0x00),
 	}, {}
-};		
+};
 
 MODULE_DEVICE_TABLE(pci, id_table);
 
@@ -1197,7 +1197,7 @@ static int ibmphp_probe (struct pci_dev * dev, const struct pci_device_id *ids)
 	struct controller *ctrl;
 
 	debug ("inside ibmphp_probe\n");
-	
+
 	list_for_each_entry(ctrl, &ebda_hpc_head, ebda_hpc_list) {
 		if (ctrl->ctlr_type == 1) {
 			if ((dev->devfn == ctrl->u.pci_ctlr.dev_fun) && (dev->bus->number == ctrl->u.pci_ctlr.bus)) {
@@ -1210,4 +1210,3 @@ static int ibmphp_probe (struct pci_dev * dev, const struct pci_device_id *ids)
 	}
 	return -ENODEV;
 }
-
diff --git a/drivers/pci/hotplug/ibmphp_hpc.c b/drivers/pci/hotplug/ibmphp_hpc.c
index f59ed30512b5..5fc7a089f532 100644
--- a/drivers/pci/hotplug/ibmphp_hpc.c
+++ b/drivers/pci/hotplug/ibmphp_hpc.c
@@ -258,7 +258,7 @@ static u8 i2c_ctrl_write (struct controller *ctlr_ptr, void __iomem *WPGBbar, u8
 {
 	u8 rc;
 	void __iomem *wpg_addr;	// base addr + offset
-	unsigned long wpg_data;	// data to/from WPG LOHI format 
+	unsigned long wpg_data;	// data to/from WPG LOHI format
 	unsigned long ultemp;
 	unsigned long data;	// actual data HILO format
 	int i;
@@ -351,7 +351,7 @@ static u8 i2c_ctrl_write (struct controller *ctlr_ptr, void __iomem *WPGBbar, u8
 }
 
 //------------------------------------------------------------
-//  Read from ISA type HPC 
+//  Read from ISA type HPC
 //------------------------------------------------------------
 static u8 isa_ctrl_read (struct controller *ctlr_ptr, u8 offset)
 {
@@ -372,7 +372,7 @@ static void isa_ctrl_write (struct controller *ctlr_ptr, u8 offset, u8 data)
 {
 	u16 start_address;
 	u16 port_address;
-	
+
 	start_address = ctlr_ptr->u.isa_ctlr.io_start;
 	port_address = start_address + (u16) offset;
 	outb (data, port_address);
@@ -656,11 +656,11 @@ int ibmphp_hpc_readslot (struct slot * pslot, u8 cmd, u8 * pstatus)
 	//--------------------------------------------------------------------
 	// cleanup
 	//--------------------------------------------------------------------
-	
+
 	// remove physical to logical address mapping
 	if ((ctlr_ptr->ctlr_type == 2) || (ctlr_ptr->ctlr_type == 4))
 		iounmap (wpg_bbar);
-	
+
 	free_hpc_access ();
 
 	debug_polling ("%s - Exit rc[%d]\n", __func__, rc);
@@ -835,7 +835,7 @@ static int poll_hpc(void *data)
 		down (&semOperations);
 
 		switch (poll_state) {
-		case POLL_LATCH_REGISTER: 
+		case POLL_LATCH_REGISTER:
 			oldlatchlow = curlatchlow;
 			ctrl_count = 0x00;
 			list_for_each (pslotlist, &ibmphp_slot_head) {
@@ -892,16 +892,16 @@ static int poll_hpc(void *data)
 
 			if (kthread_should_stop())
 				goto out_sleep;
-			
+
 			down (&semOperations);
-			
+
 			if (poll_count >= POLL_LATCH_CNT) {
 				poll_count = 0;
 				poll_state = POLL_SLOTS;
 			} else
 				poll_state = POLL_LATCH_REGISTER;
 			break;
-		}	
+		}
 		/* give up the hardware semaphore */
 		up (&semOperations);
 		/* sleep for a short time just for good measure */
@@ -958,7 +958,7 @@ static int process_changeinstatus (struct slot *pslot, struct slot *poldslot)
 	// bit 5 - HPC_SLOT_PWRGD
 	if ((pslot->status & 0x20) != (poldslot->status & 0x20))
 		// OFF -> ON: ignore, ON -> OFF: disable slot
-		if ((poldslot->status & 0x20) && (SLOT_CONNECT (poldslot->status) == HPC_SLOT_CONNECTED) && (SLOT_PRESENT (poldslot->status))) 
+		if ((poldslot->status & 0x20) && (SLOT_CONNECT (poldslot->status) == HPC_SLOT_CONNECTED) && (SLOT_PRESENT (poldslot->status)))
 			disable = 1;
 
 	// bit 6 - HPC_SLOT_BUS_SPEED
@@ -980,7 +980,7 @@ static int process_changeinstatus (struct slot *pslot, struct slot *poldslot)
 					pslot->status &= ~HPC_SLOT_POWER;
 			}
 		}
-		// CLOSE -> OPEN 
+		// CLOSE -> OPEN
 		else if ((SLOT_PWRGD (poldslot->status) == HPC_SLOT_PWRGD_GOOD)
 			&& (SLOT_CONNECT (poldslot->status) == HPC_SLOT_CONNECTED) && (SLOT_PRESENT (poldslot->status))) {
 			disable = 1;
@@ -1075,7 +1075,7 @@ void __exit ibmphp_hpc_stop_poll_thread (void)
 	debug ("before locking operations \n");
 	ibmphp_lock_operations ();
 	debug ("after locking operations \n");
-	
+
 	// wait for poll thread to exit
 	debug ("before sem_exit down \n");
 	down (&sem_exit);
diff --git a/drivers/pci/hotplug/ibmphp_pci.c b/drivers/pci/hotplug/ibmphp_pci.c
index c60f5f3e838d..639ea3a75e14 100644
--- a/drivers/pci/hotplug/ibmphp_pci.c
+++ b/drivers/pci/hotplug/ibmphp_pci.c
@@ -1,8 +1,8 @@
 /*
  * IBM Hot Plug Controller Driver
- * 
+ *
  * Written By: Irene Zubarev, IBM Corporation
- * 
+ *
  * Copyright (C) 2001 Greg Kroah-Hartman (greg@kroah.com)
  * Copyright (C) 2001,2002 IBM Corp.
  *
@@ -42,7 +42,7 @@ static u8 find_sec_number (u8 primary_busno, u8 slotno);
 
 /*
  * NOTE..... If BIOS doesn't provide default routing, we assign:
- * 9 for SCSI, 10 for LAN adapters, and 11 for everything else. 
+ * 9 for SCSI, 10 for LAN adapters, and 11 for everything else.
  * If adapter is bridged, then we assign 11 to it and devices behind it.
  * We also assign the same irq numbers for multi function devices.
  * These are PIC mode, so shouldn't matter n.e.ways (hopefully)
@@ -71,11 +71,11 @@ static void assign_alt_irq (struct pci_func * cur_func, u8 class_code)
  * Configures the device to be added (will allocate needed resources if it
  * can), the device can be a bridge or a regular pci device, can also be
  * multi-functional
- * 
+ *
  * Input: function to be added
- * 
+ *
  * TO DO:  The error case with Multifunction device or multi function bridge,
- * if there is an error, will need to go through all previous functions and 
+ * if there is an error, will need to go through all previous functions and
  * unconfigure....or can add some code into unconfigure_card....
  */
 int ibmphp_configure_card (struct pci_func *func, u8 slotno)
@@ -98,7 +98,7 @@ int ibmphp_configure_card (struct pci_func *func, u8 slotno)
 	cur_func = func;
 
 	/* We only get bus and device from IRQ routing table.  So at this point,
-	 * func->busno is correct, and func->device contains only device (at the 5 
+	 * func->busno is correct, and func->device contains only device (at the 5
 	 * highest bits)
 	 */
 
@@ -151,7 +151,7 @@ int ibmphp_configure_card (struct pci_func *func, u8 slotno)
 						     cur_func->device, cur_func->busno);
 						cleanup_count = 6;
 						goto error;
-					}	
+					}
 					cur_func->next = NULL;
 					function = 0x8;
 					break;
@@ -339,7 +339,7 @@ error:
 }
 
 /*
- * This function configures the pci BARs of a single device.  
+ * This function configures the pci BARs of a single device.
  * Input: pointer to the pci_func
  * Output: configured PCI, 0, or error
  */
@@ -371,17 +371,17 @@ static int configure_device (struct pci_func *func)
 
 	for (count = 0; address[count]; count++) {	/* for 6 BARs */
 
-		/* not sure if i need this.  per scott, said maybe need smth like this
+		/* not sure if i need this.  per scott, said maybe need * something like this
 		   if devices don't adhere 100% to the spec, so don't want to write
 		   to the reserved bits
 
-		pcibios_read_config_byte(cur_func->busno, cur_func->device, 
+		pcibios_read_config_byte(cur_func->busno, cur_func->device,
 		PCI_BASE_ADDRESS_0 + 4 * count, &tmp);
 		if (tmp & 0x01) // IO
-			pcibios_write_config_dword(cur_func->busno, cur_func->device, 
+			pcibios_write_config_dword(cur_func->busno, cur_func->device,
 			PCI_BASE_ADDRESS_0 + 4 * count, 0xFFFFFFFD);
 		else  // Memory
-			pcibios_write_config_dword(cur_func->busno, cur_func->device, 
+			pcibios_write_config_dword(cur_func->busno, cur_func->device,
 			PCI_BASE_ADDRESS_0 + 4 * count, 0xFFFFFFFF);
 		 */
 		pci_bus_write_config_dword (ibmphp_pci_bus, devfn, address[count], 0xFFFFFFFF);
@@ -421,8 +421,8 @@ static int configure_device (struct pci_func *func)
 				return -EIO;
 			}
 			pci_bus_write_config_dword (ibmphp_pci_bus, devfn, address[count], func->io[count]->start);
-	
-			/* _______________This is for debugging purposes only_____________________ */ 
+
+			/* _______________This is for debugging purposes only_____________________ */
 			debug ("b4 writing, the IO address is %x\n", func->io[count]->start);
 			pci_bus_read_config_dword (ibmphp_pci_bus, devfn, address[count], &bar[count]);
 			debug ("after writing.... the start address is %x\n", bar[count]);
@@ -484,7 +484,7 @@ static int configure_device (struct pci_func *func)
 
 				pci_bus_write_config_dword (ibmphp_pci_bus, devfn, address[count], func->pfmem[count]->start);
 
-				/*_______________This is for debugging purposes only______________________________*/				
+				/*_______________This is for debugging purposes only______________________________*/
 				debug ("b4 writing, start address is %x\n", func->pfmem[count]->start);
 				pci_bus_read_config_dword (ibmphp_pci_bus, devfn, address[count], &bar[count]);
 				debug ("after writing, start address is %x\n", bar[count]);
@@ -559,7 +559,7 @@ static int configure_device (struct pci_func *func)
 /******************************************************************************
  * This routine configures a PCI-2-PCI bridge and the functions behind it
  * Parameters: pci_func
- * Returns: 
+ * Returns:
  ******************************************************************************/
 static int configure_bridge (struct pci_func **func_passed, u8 slotno)
 {
@@ -622,7 +622,7 @@ static int configure_bridge (struct pci_func **func_passed, u8 slotno)
 	debug ("AFTER FIND_SEC_NUMBER, func->busno IS %x\n", func->busno);
 
 	pci_bus_write_config_byte (ibmphp_pci_bus, devfn, PCI_SECONDARY_BUS, sec_number);
-	
+
 	/* __________________For debugging purposes only __________________________________
 	pci_bus_read_config_byte (ibmphp_pci_bus, devfn, PCI_SECONDARY_BUS, &sec_number);
 	debug ("sec_number after write/read is %x\n", sec_number);
@@ -644,7 +644,7 @@ static int configure_bridge (struct pci_func **func_passed, u8 slotno)
 
 
 	/* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-	   !!!!!!!!!!!!!!!NEED TO ADD!!!  FAST BACK-TO-BACK ENABLE!!!!!!!!!!!!!!!!!!!! 
+	   !!!!!!!!!!!!!!!NEED TO ADD!!!  FAST BACK-TO-BACK ENABLE!!!!!!!!!!!!!!!!!!!!
 	   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!*/
 
 
@@ -670,7 +670,7 @@ static int configure_bridge (struct pci_func **func_passed, u8 slotno)
 			debug ("len[count] in IO = %x\n", len[count]);
 
 			bus_io[count] = kzalloc(sizeof(struct resource_node), GFP_KERNEL);
-		
+
 			if (!bus_io[count]) {
 				err ("out of system memory\n");
 				retval = -ENOMEM;
@@ -735,7 +735,7 @@ static int configure_bridge (struct pci_func **func_passed, u8 slotno)
 						ibmphp_add_pfmem_from_mem (bus_pfmem[count]);
 						func->pfmem[count] = bus_pfmem[count];
 					} else {
-						err ("cannot allocate requested pfmem for bus %x, device %x, len %x\n", 
+						err ("cannot allocate requested pfmem for bus %x, device %x, len %x\n",
 						     func->busno, func->device, len[count]);
 						kfree (mem_tmp);
 						kfree (bus_pfmem[count]);
@@ -805,7 +805,7 @@ static int configure_bridge (struct pci_func **func_passed, u8 slotno)
 	debug ("amount_needed->mem = %x\n", amount_needed->mem);
 	debug ("amount_needed->pfmem =  %x\n", amount_needed->pfmem);
 
-	if (amount_needed->not_correct) {		
+	if (amount_needed->not_correct) {
 		debug ("amount_needed is not correct\n");
 		for (count = 0; address[count]; count++) {
 			/* for 2 BARs */
@@ -830,7 +830,7 @@ static int configure_bridge (struct pci_func **func_passed, u8 slotno)
 	} else {
 		debug ("it wants %x IO behind the bridge\n", amount_needed->io);
 		io = kzalloc(sizeof(*io), GFP_KERNEL);
-		
+
 		if (!io) {
 			err ("out of system memory\n");
 			retval = -ENOMEM;
@@ -959,7 +959,7 @@ static int configure_bridge (struct pci_func **func_passed, u8 slotno)
 
 		if (bus->noIORanges) {
 			pci_bus_write_config_byte (ibmphp_pci_bus, devfn, PCI_IO_BASE, 0x00 | bus->rangeIO->start >> 8);
-			pci_bus_write_config_byte (ibmphp_pci_bus, devfn, PCI_IO_LIMIT, 0x00 | bus->rangeIO->end >> 8);	
+			pci_bus_write_config_byte (ibmphp_pci_bus, devfn, PCI_IO_LIMIT, 0x00 | bus->rangeIO->end >> 8);
 
 			/* _______________This is for debugging purposes only ____________________
 			pci_bus_read_config_byte (ibmphp_pci_bus, devfn, PCI_IO_BASE, &temp);
@@ -980,7 +980,7 @@ static int configure_bridge (struct pci_func **func_passed, u8 slotno)
 		if (bus->noMemRanges) {
 			pci_bus_write_config_word (ibmphp_pci_bus, devfn, PCI_MEMORY_BASE, 0x0000 | bus->rangeMem->start >> 16);
 			pci_bus_write_config_word (ibmphp_pci_bus, devfn, PCI_MEMORY_LIMIT, 0x0000 | bus->rangeMem->end >> 16);
-			
+
 			/* ____________________This is for debugging purposes only ________________________
 			pci_bus_read_config_word (ibmphp_pci_bus, devfn, PCI_MEMORY_BASE, &temp);
 			debug ("mem_base = %x\n", (temp & PCI_MEMORY_RANGE_TYPE_MASK) << 16);
@@ -1017,7 +1017,7 @@ static int configure_bridge (struct pci_func **func_passed, u8 slotno)
 		pci_bus_read_config_byte (ibmphp_pci_bus, devfn, PCI_INTERRUPT_PIN, &irq);
 		if ((irq > 0x00) && (irq < 0x05))
 			pci_bus_write_config_byte (ibmphp_pci_bus, devfn, PCI_INTERRUPT_LINE, func->irq[irq - 1]);
-		/*    
+		/*
 		pci_bus_write_config_byte (ibmphp_pci_bus, devfn, PCI_BRIDGE_CONTROL, ctrl);
 		pci_bus_write_config_byte (ibmphp_pci_bus, devfn, PCI_BRIDGE_CONTROL, PCI_BRIDGE_CTL_PARITY);
 		pci_bus_write_config_byte (ibmphp_pci_bus, devfn, PCI_BRIDGE_CONTROL, PCI_BRIDGE_CTL_SERR);
@@ -1071,7 +1071,7 @@ error:
  * This function adds up the amount of resources needed behind the PPB bridge
  * and passes it to the configure_bridge function
  * Input: bridge function
- * Ouput: amount of resources needed
+ * Output: amount of resources needed
  *****************************************************************************/
 static struct res_needed *scan_behind_bridge (struct pci_func * func, u8 busno)
 {
@@ -1204,9 +1204,9 @@ static struct res_needed *scan_behind_bridge (struct pci_func * func, u8 busno)
 	return amount;
 }
 
-/* The following 3 unconfigure_boot_ routines deal with the case when we had the card 
- * upon bootup in the system, since we don't allocate func to such case, we need to read 
- * the start addresses from pci config space and then find the corresponding entries in 
+/* The following 3 unconfigure_boot_ routines deal with the case when we had the card
+ * upon bootup in the system, since we don't allocate func to such case, we need to read
+ * the start addresses from pci config space and then find the corresponding entries in
  * our resource lists.  The functions return either 0, -ENODEV, or -1 (general failure)
  * Change: we also call these functions even if we configured the card ourselves (i.e., not
  * the bootup case), since it should work same way
@@ -1561,8 +1561,8 @@ static int unconfigure_boot_card (struct slot *slot_cur)
  * unconfiguring the device
  * TO DO:  will probably need to add some code in case there was some resource,
  * to remove it... this is from when we have errors in the configure_card...
- * 			!!!!!!!!!!!!!!!!!!!!!!!!!FOR BUSES!!!!!!!!!!!!
- * Returns: 0, -1, -ENODEV 
+ *			!!!!!!!!!!!!!!!!!!!!!!!!!FOR BUSES!!!!!!!!!!!!
+ * Returns: 0, -1, -ENODEV
  */
 int ibmphp_unconfigure_card (struct slot **slot_cur, int the_end)
 {
@@ -1634,7 +1634,7 @@ int ibmphp_unconfigure_card (struct slot **slot_cur, int the_end)
  * Input: bus and the amount of resources needed (we know we can assign those,
  *        since they've been checked already
  * Output: bus added to the correct spot
- *         0, -1, error 
+ *         0, -1, error
  */
 static int add_new_bus (struct bus_node *bus, struct resource_node *io, struct resource_node *mem, struct resource_node *pfmem, u8 parent_busno)
 {
@@ -1650,7 +1650,7 @@ static int add_new_bus (struct bus_node *bus, struct resource_node *io, struct r
 			err ("strange, cannot find bus which is supposed to be at the system... something is terribly wrong...\n");
 			return -ENODEV;
 		}
-	
+
 		list_add (&bus->bus_list, &cur_bus->bus_list);
 	}
 	if (io) {
@@ -1679,7 +1679,7 @@ static int add_new_bus (struct bus_node *bus, struct resource_node *io, struct r
 	}
 	if (pfmem) {
 		pfmem_range = kzalloc(sizeof(*pfmem_range), GFP_KERNEL);
-		if (!pfmem_range) {	
+		if (!pfmem_range) {
 			err ("out of system memory\n");
 			return -ENOMEM;
 		}
@@ -1726,4 +1726,3 @@ static u8 find_sec_number (u8 primary_busno, u8 slotno)
 		return busno;
 	return 0xff;
 }
-
diff --git a/drivers/pci/hotplug/ibmphp_res.c b/drivers/pci/hotplug/ibmphp_res.c
index e2dc289f767c..a265acb2d518 100644
--- a/drivers/pci/hotplug/ibmphp_res.c
+++ b/drivers/pci/hotplug/ibmphp_res.c
@@ -72,7 +72,7 @@ static struct bus_node * __init alloc_error_bus (struct ebda_pci_rsrc * curr, u8
 static struct resource_node * __init alloc_resources (struct ebda_pci_rsrc * curr)
 {
 	struct resource_node *rs;
-	
+
 	if (!curr) {
 		err ("NULL passed to allocate\n");
 		return NULL;
@@ -128,7 +128,7 @@ static int __init alloc_bus_range (struct bus_node **new_bus, struct range_node
 	}
 	newrange->start = curr->start_addr;
 	newrange->end = curr->end_addr;
-		
+
 	if (first_bus || (!num_ranges))
 		newrange->rangeno = 1;
 	else {
@@ -162,7 +162,7 @@ static int __init alloc_bus_range (struct bus_node **new_bus, struct range_node
 			newbus->rangePFMem = newrange;
 			if (first_bus)
 				newbus->noPFMemRanges = 1;
-			else {	
+			else {
 				debug ("1st PFMemory Primary on Bus %x [%x - %x]\n", newbus->busno, newrange->start, newrange->end);
 				++newbus->noPFMemRanges;
 				fix_resources (newbus);
@@ -190,7 +190,7 @@ static int __init alloc_bus_range (struct bus_node **new_bus, struct range_node
  * This is the Resource Management initialization function.  It will go through
  * the Resource list taken from EBDA and fill in this module's data structures
  *
- * THIS IS NOT TAKING INTO CONSIDERATION IO RESTRICTIONS OF PRIMARY BUSES, 
+ * THIS IS NOT TAKING INTO CONSIDERATION IO RESTRICTIONS OF PRIMARY BUSES,
  * SINCE WE'RE GOING TO ASSUME FOR NOW WE DON'T HAVE THOSE ON OUR BUSES FOR NOW
  *
  * Input: ptr to the head of the resource list from EBDA
@@ -382,7 +382,7 @@ int __init ibmphp_rsrc_init (void)
  * pci devices' resources for the appropriate resource
  *
  * Input: type of the resource, range to add, current bus
- * Output: 0 or -1, bus and range ptrs 
+ * Output: 0 or -1, bus and range ptrs
  ********************************************************************************/
 static int add_bus_range (int type, struct range_node *range, struct bus_node *bus_cur)
 {
@@ -466,7 +466,7 @@ static void update_resources (struct bus_node *bus_cur, int type, int rangeno)
 
 	switch (type) {
 		case MEM:
-			if (bus_cur->firstMem) 
+			if (bus_cur->firstMem)
 				res = bus_cur->firstMem;
 			break;
 		case PFMEM:
@@ -583,7 +583,7 @@ static void fix_resources (struct bus_node *bus_cur)
 }
 
 /*******************************************************************************
- * This routine adds a resource to the list of resources to the appropriate bus 
+ * This routine adds a resource to the list of resources to the appropriate bus
  * based on their resource type and sorted by their starting addresses.  It assigns
  * the ptrs to next and nextRange if needed.
  *
@@ -605,11 +605,11 @@ int ibmphp_add_resource (struct resource_node *res)
 		err ("NULL passed to add\n");
 		return -ENODEV;
 	}
-	
+
 	bus_cur = find_bus_wprev (res->busno, NULL, 0);
-	
+
 	if (!bus_cur) {
-		/* didn't find a bus, smth's wrong!!! */
+		/* didn't find a bus, something's wrong!!! */
 		debug ("no bus in the system, either pci_dev's wrong or allocation failed\n");
 		return -ENODEV;
 	}
@@ -648,7 +648,7 @@ int ibmphp_add_resource (struct resource_node *res)
 	if (!range_cur) {
 		switch (res->type) {
 			case IO:
-				++bus_cur->needIOUpdate;					
+				++bus_cur->needIOUpdate;
 				break;
 			case MEM:
 				++bus_cur->needMemUpdate;
@@ -659,13 +659,13 @@ int ibmphp_add_resource (struct resource_node *res)
 		}
 		res->rangeno = -1;
 	}
-	
+
 	debug ("The range is %d\n", res->rangeno);
 	if (!res_start) {
 		/* no first{IO,Mem,Pfmem} on the bus, 1st IO/Mem/Pfmem resource ever */
 		switch (res->type) {
 			case IO:
-				bus_cur->firstIO = res;					
+				bus_cur->firstIO = res;
 				break;
 			case MEM:
 				bus_cur->firstMem = res;
@@ -673,7 +673,7 @@ int ibmphp_add_resource (struct resource_node *res)
 			case PFMEM:
 				bus_cur->firstPFMem = res;
 				break;
-		}	
+		}
 		res->next = NULL;
 		res->nextRange = NULL;
 	} else {
@@ -770,7 +770,7 @@ int ibmphp_add_resource (struct resource_node *res)
  * This routine will remove the resource from the list of resources
  *
  * Input: io, mem, and/or pfmem resource to be deleted
- * Ouput: modified resource list
+ * Output: modified resource list
  *        0 or error code
  ****************************************************************************/
 int ibmphp_remove_resource (struct resource_node *res)
@@ -825,7 +825,7 @@ int ibmphp_remove_resource (struct resource_node *res)
 
 	if (!res_cur) {
 		if (res->type == PFMEM) {
-			/* 
+			/*
 			 * case where pfmem might be in the PFMemFromMem list
 			 * so will also need to remove the corresponding mem
 			 * entry
@@ -961,12 +961,12 @@ static struct range_node * find_range (struct bus_node *bus_cur, struct resource
 }
 
 /*****************************************************************************
- * This routine will check to make sure the io/mem/pfmem->len that the device asked for 
+ * This routine will check to make sure the io/mem/pfmem->len that the device asked for
  * can fit w/i our list of available IO/MEM/PFMEM resources.  If cannot, returns -EINVAL,
  * otherwise, returns 0
  *
  * Input: resource
- * Ouput: the correct start and end address are inputted into the resource node,
+ * Output: the correct start and end address are inputted into the resource node,
  *        0 or -EINVAL
  *****************************************************************************/
 int ibmphp_check_resource (struct resource_node *res, u8 bridge)
@@ -996,7 +996,7 @@ int ibmphp_check_resource (struct resource_node *res, u8 bridge)
 	bus_cur = find_bus_wprev (res->busno, NULL, 0);
 
 	if (!bus_cur) {
-		/* didn't find a bus, smth's wrong!!! */
+		/* didn't find a bus, something's wrong!!! */
 		debug ("no bus in the system, either pci_dev's wrong or allocation failed\n");
 		return -EINVAL;
 	}
@@ -1066,7 +1066,7 @@ int ibmphp_check_resource (struct resource_node *res, u8 bridge)
 								break;
 						}
 					}
-			
+
 					if (flag && len_cur == res->len) {
 						debug ("but we are not here, right?\n");
 						res->start = start_cur;
@@ -1118,10 +1118,10 @@ int ibmphp_check_resource (struct resource_node *res, u8 bridge)
 		if (res_prev) {
 			if (res_prev->rangeno != res_cur->rangeno) {
 				/* 1st device on this range */
-				if ((res_cur->start != range->start) && 
+				if ((res_cur->start != range->start) &&
 					((len_tmp = res_cur->start - 1 - range->start) >= res->len)) {
 					if ((len_tmp < len_cur) || (len_cur == 0)) {
-						if ((range->start % tmp_divide) == 0) {	
+						if ((range->start % tmp_divide) == 0) {
 							/* just perfect, starting address is divisible by length */
 							flag = 1;
 							len_cur = len_tmp;
@@ -1344,7 +1344,7 @@ int ibmphp_check_resource (struct resource_node *res, u8 bridge)
  * This routine is called from remove_card if the card contained PPB.
  * It will remove all the resources on the bus as well as the bus itself
  * Input: Bus
- * Ouput: 0, -ENODEV
+ * Output: 0, -ENODEV
  ********************************************************************************/
 int ibmphp_remove_bus (struct bus_node *bus, u8 parent_busno)
 {
@@ -1353,7 +1353,7 @@ int ibmphp_remove_bus (struct bus_node *bus, u8 parent_busno)
 	struct bus_node *prev_bus;
 	int rc;
 
-	prev_bus = find_bus_wprev (parent_busno, NULL, 0);	
+	prev_bus = find_bus_wprev (parent_busno, NULL, 0);
 
 	if (!prev_bus) {
 		debug ("something terribly wrong. Cannot find parent bus to the one to remove\n");
@@ -1424,7 +1424,7 @@ int ibmphp_remove_bus (struct bus_node *bus, u8 parent_busno)
 }
 
 /******************************************************************************
- * This routine deletes the ranges from a given bus, and the entries from the 
+ * This routine deletes the ranges from a given bus, and the entries from the
  * parent's bus in the resources
  * Input: current bus, previous bus
  * Output: 0, -EINVAL
@@ -1453,7 +1453,7 @@ static int remove_ranges (struct bus_node *bus_cur, struct bus_node *bus_prev)
 	if (bus_cur->noMemRanges) {
 		range_cur = bus_cur->rangeMem;
 		for (i = 0; i < bus_cur->noMemRanges; i++) {
-			if (ibmphp_find_resource (bus_prev, range_cur->start, &res, MEM) < 0) 
+			if (ibmphp_find_resource (bus_prev, range_cur->start, &res, MEM) < 0)
 				return -EINVAL;
 
 			ibmphp_remove_resource (res);
@@ -1467,7 +1467,7 @@ static int remove_ranges (struct bus_node *bus_cur, struct bus_node *bus_prev)
 	if (bus_cur->noPFMemRanges) {
 		range_cur = bus_cur->rangePFMem;
 		for (i = 0; i < bus_cur->noPFMemRanges; i++) {
-			if (ibmphp_find_resource (bus_prev, range_cur->start, &res, PFMEM) < 0) 
+			if (ibmphp_find_resource (bus_prev, range_cur->start, &res, PFMEM) < 0)
 				return -EINVAL;
 
 			ibmphp_remove_resource (res);
@@ -1482,7 +1482,7 @@ static int remove_ranges (struct bus_node *bus_cur, struct bus_node *bus_prev)
 }
 
 /*
- * find the resource node in the bus 
+ * find the resource node in the bus
  * Input: Resource needed, start address of the resource, type of resource
  */
 int ibmphp_find_resource (struct bus_node *bus, u32 start_address, struct resource_node **res, int flag)
@@ -1512,7 +1512,7 @@ int ibmphp_find_resource (struct bus_node *bus, u32 start_address, struct resour
 			err ("wrong type of flag\n");
 			return -EINVAL;
 	}
-	
+
 	while (res_cur) {
 		if (res_cur->start == start_address) {
 			*res = res_cur;
@@ -1718,7 +1718,7 @@ static int __init once_over (void)
 			}	/* end for pfmem */
 		}	/* end if */
 	}	/* end list_for_each bus */
-	return 0; 
+	return 0;
 }
 
 int ibmphp_add_pfmem_from_mem (struct resource_node *pfmem)
@@ -1760,9 +1760,9 @@ static struct bus_node *find_bus_wprev (u8 bus_number, struct bus_node **prev, u
 	list_for_each (tmp, &gbuses) {
 		tmp_prev = tmp->prev;
 		bus_cur = list_entry (tmp, struct bus_node, bus_list);
-		if (flag) 
+		if (flag)
 			*prev = list_entry (tmp_prev, struct bus_node, bus_list);
-		if (bus_cur->busno == bus_number) 
+		if (bus_cur->busno == bus_number)
 			return bus_cur;
 	}
 
@@ -1776,7 +1776,7 @@ void ibmphp_print_test (void)
 	struct range_node *range;
 	struct resource_node *res;
 	struct list_head *tmp;
-	
+
 	debug_pci ("*****************START**********************\n");
 
 	if ((!list_empty(&gbuses)) && flags) {
@@ -1906,7 +1906,7 @@ static int range_exists_already (struct range_node * range, struct bus_node * bu
 			return 1;
 		range_cur = range_cur->next;
 	}
-	
+
 	return 0;
 }
 
@@ -1920,7 +1920,7 @@ static int range_exists_already (struct range_node * range, struct bus_node * bu
  * Returns: none
  * Note: this function doesn't take into account IO restrictions etc,
  *	 so will only work for bridges with no video/ISA devices behind them It
- *	 also will not work for onboard PPB's that can have more than 1 *bus
+ *	 also will not work for onboard PPBs that can have more than 1 *bus
  *	 behind them All these are TO DO.
  *	 Also need to add more error checkings... (from fnc returns etc)
  */
@@ -1963,7 +1963,7 @@ static int __init update_bridge_ranges (struct bus_node **bus)
 					case PCI_HEADER_TYPE_BRIDGE:
 						function = 0x8;
 					case PCI_HEADER_TYPE_MULTIBRIDGE:
-						/* We assume here that only 1 bus behind the bridge 
+						/* We assume here that only 1 bus behind the bridge
 						   TO DO: add functionality for several:
 						   temp = secondary;
 						   while (temp < subordinate) {
@@ -1972,7 +1972,7 @@ static int __init update_bridge_ranges (struct bus_node **bus)
 						   }
 						 */
 						pci_bus_read_config_byte (ibmphp_pci_bus, devfn, PCI_SECONDARY_BUS, &sec_busno);
-						bus_sec = find_bus_wprev (sec_busno, NULL, 0); 
+						bus_sec = find_bus_wprev (sec_busno, NULL, 0);
 						/* this bus structure doesn't exist yet, PPB was configured during previous loading of ibmphp */
 						if (!bus_sec) {
 							bus_sec = alloc_error_bus (NULL, sec_busno, 1);
@@ -2028,7 +2028,7 @@ static int __init update_bridge_ranges (struct bus_node **bus)
 								io->len = io->end - io->start + 1;
 								ibmphp_add_resource (io);
 							}
-						}	
+						}
 
 						pci_bus_read_config_word (ibmphp_pci_bus, devfn, PCI_MEMORY_BASE, &start_mem_address);
 						pci_bus_read_config_word (ibmphp_pci_bus, devfn, PCI_MEMORY_LIMIT, &end_mem_address);
diff --git a/drivers/pci/hotplug/pci_hotplug_core.c b/drivers/pci/hotplug/pci_hotplug_core.c
index ec20f74c8981..cfa92a984e62 100644
--- a/drivers/pci/hotplug/pci_hotplug_core.c
+++ b/drivers/pci/hotplug/pci_hotplug_core.c
@@ -131,7 +131,7 @@ static ssize_t power_write_file(struct pci_slot *pci_slot, const char *buf,
 	}
 	module_put(slot->ops->owner);
 
-exit:	
+exit:
 	if (retval)
 		return retval;
 	return count;
@@ -177,7 +177,7 @@ static ssize_t attention_write_file(struct pci_slot *slot, const char *buf,
 		retval = ops->set_attention_status(slot->hotplug, attention);
 	module_put(ops->owner);
 
-exit:	
+exit:
 	if (retval)
 		return retval;
 	return count;
@@ -247,7 +247,7 @@ static ssize_t test_write_file(struct pci_slot *pci_slot, const char *buf,
 		retval = slot->ops->hardware_test(slot, test);
 	module_put(slot->ops->owner);
 
-exit:	
+exit:
 	if (retval)
 		return retval;
 	return count;
@@ -512,7 +512,7 @@ int pci_hp_deregister(struct hotplug_slot *hotplug)
  * @hotplug: pointer to the slot whose info has changed
  * @info: pointer to the info copy into the slot's info structure
  *
- * @slot must have been registered with the pci 
+ * @slot must have been registered with the pci
  * hotplug subsystem previously with a call to pci_hp_register().
  *
  * Returns 0 if successful, anything else for an error.
diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h
index 541bbe6d5343..21e865ded1dc 100644
--- a/drivers/pci/hotplug/pciehp.h
+++ b/drivers/pci/hotplug/pciehp.h
@@ -180,5 +180,5 @@ static inline int pciehp_acpi_slot_detection_check(struct pci_dev *dev)
 {
 	return 0;
 }
-#endif 				/* CONFIG_ACPI */
+#endif				/* CONFIG_ACPI */
 #endif				/* _PCIEHP_H */
diff --git a/drivers/pci/hotplug/pciehp_acpi.c b/drivers/pci/hotplug/pciehp_acpi.c
index ead7c534095e..55ea2a07f266 100644
--- a/drivers/pci/hotplug/pciehp_acpi.c
+++ b/drivers/pci/hotplug/pciehp_acpi.c
@@ -78,7 +78,7 @@ static int __initdata dup_slot_id;
 static int __initdata acpi_slot_detected;
 static struct list_head __initdata dummy_slots = LIST_HEAD_INIT(dummy_slots);
 
-/* Dummy driver for dumplicate name detection */
+/* Dummy driver for duplicate name detection */
 static int __init dummy_probe(struct pcie_device *dev)
 {
 	u32 slot_cap;
diff --git a/drivers/pci/hotplug/pciehp_core.c b/drivers/pci/hotplug/pciehp_core.c
index f4a18f51a29c..bbd48bbe4e9b 100644
--- a/drivers/pci/hotplug/pciehp_core.c
+++ b/drivers/pci/hotplug/pciehp_core.c
@@ -351,8 +351,8 @@ static int __init pcied_init(void)
 
 	pciehp_firmware_init();
 	retval = pcie_port_service_register(&hpdriver_portdrv);
- 	dbg("pcie_port_service_register = %d\n", retval);
-  	info(DRIVER_DESC " version: " DRIVER_VERSION "\n");
+	dbg("pcie_port_service_register = %d\n", retval);
+	info(DRIVER_DESC " version: " DRIVER_VERSION "\n");
 	if (retval)
 		dbg("Failure to register service\n");
 
diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c
index 51f56ef4ab6f..3eea3fdd4b0b 100644
--- a/drivers/pci/hotplug/pciehp_hpc.c
+++ b/drivers/pci/hotplug/pciehp_hpc.c
@@ -92,7 +92,7 @@ static void start_int_poll_timer(struct controller *ctrl, int sec)
 {
 	/* Clamp to sane value */
 	if ((sec <= 0) || (sec > 60))
-        	sec = 2;
+		sec = 2;
 
 	ctrl->poll_timer.function = &int_poll_timeout;
 	ctrl->poll_timer.data = (unsigned long)ctrl;
@@ -194,7 +194,7 @@ static int pcie_write_cmd(struct controller *ctrl, u16 cmd, u16 mask)
 			ctrl_dbg(ctrl, "CMD_COMPLETED not clear after 1 sec\n");
 		} else if (!NO_CMD_CMPL(ctrl)) {
 			/*
-			 * This controller semms to notify of command completed
+			 * This controller seems to notify of command completed
 			 * event even though it supports none of power
 			 * controller, attention led, power led and EMI.
 			 */
@@ -926,7 +926,7 @@ struct controller *pcie_init(struct pcie_device *dev)
 	if (pciehp_writew(ctrl, PCI_EXP_SLTSTA, 0x1f))
 		goto abort_ctrl;
 
-	/* Disable sotfware notification */
+	/* Disable software notification */
 	pcie_disable_notification(ctrl);
 
 	ctrl_info(ctrl, "HPC vendor_id %x device_id %x ss_vid %x ss_did %x\n",
diff --git a/drivers/pci/hotplug/pcihp_skeleton.c b/drivers/pci/hotplug/pcihp_skeleton.c
index 1f00b937f721..ac69094e4b20 100644
--- a/drivers/pci/hotplug/pcihp_skeleton.c
+++ b/drivers/pci/hotplug/pcihp_skeleton.c
@@ -52,7 +52,7 @@ static LIST_HEAD(slot_list);
 	do {							\
 		if (debug)					\
 			printk (KERN_DEBUG "%s: " format "\n",	\
-				MY_NAME , ## arg); 		\
+				MY_NAME , ## arg);		\
 	} while (0)
 #define err(format, arg...) printk(KERN_ERR "%s: " format "\n", MY_NAME , ## arg)
 #define info(format, arg...) printk(KERN_INFO "%s: " format "\n", MY_NAME , ## arg)
@@ -287,7 +287,7 @@ static int __init init_slots(void)
 		hotplug_slot->release = &release_slot;
 		make_slot_name(slot);
 		hotplug_slot->ops = &skel_hotplug_slot_ops;
-		
+
 		/*
 		 * Initialize the slot info structure with some known
 		 * good values.
@@ -296,7 +296,7 @@ static int __init init_slots(void)
 		get_attention_status(hotplug_slot, &info->attention_status);
 		get_latch_status(hotplug_slot, &info->latch_status);
 		get_adapter_status(hotplug_slot, &info->adapter_status);
-		
+
 		dbg("registering slot %d\n", i);
 		retval = pci_hp_register(slot->hotplug_slot);
 		if (retval) {
@@ -336,7 +336,7 @@ static void __exit cleanup_slots(void)
 		pci_hp_deregister(slot->hotplug_slot);
 	}
 }
-		
+
 static int __init pcihp_skel_init(void)
 {
 	int retval;
diff --git a/drivers/pci/hotplug/rpadlpar_core.c b/drivers/pci/hotplug/rpadlpar_core.c
index bb7af78e4eed..e9c044d15add 100644
--- a/drivers/pci/hotplug/rpadlpar_core.c
+++ b/drivers/pci/hotplug/rpadlpar_core.c
@@ -217,7 +217,7 @@ static int dlpar_remove_phb(char *drc_name, struct device_node *dn)
 	if (!pcibios_find_pci_bus(dn))
 		return -EINVAL;
 
-	/* If pci slot is hotplugable, use hotplug to remove it */
+	/* If pci slot is hotpluggable, use hotplug to remove it */
 	slot = find_php_slot(dn);
 	if (slot && rpaphp_deregister_slot(slot)) {
 		printk(KERN_ERR "%s: unable to remove hotplug slot %s\n",
diff --git a/drivers/pci/hotplug/rpaphp.h b/drivers/pci/hotplug/rpaphp.h
index 3135856e5e1c..b2593e876a09 100644
--- a/drivers/pci/hotplug/rpaphp.h
+++ b/drivers/pci/hotplug/rpaphp.h
@@ -49,9 +49,9 @@
 extern bool rpaphp_debug;
 #define dbg(format, arg...)					\
 	do {							\
-		if (rpaphp_debug)					\
+		if (rpaphp_debug)				\
 			printk(KERN_DEBUG "%s: " format,	\
-				MY_NAME , ## arg); 		\
+				MY_NAME , ## arg);		\
 	} while (0)
 #define err(format, arg...) printk(KERN_ERR "%s: " format, MY_NAME , ## arg)
 #define info(format, arg...) printk(KERN_INFO "%s: " format, MY_NAME , ## arg)
@@ -99,5 +99,5 @@ void dealloc_slot_struct(struct slot *slot);
 struct slot *alloc_slot_struct(struct device_node *dn, int drc_index, char *drc_name, int power_domain);
 int rpaphp_register_slot(struct slot *slot);
 int rpaphp_deregister_slot(struct slot *slot);
-	
+
 #endif				/* _PPC64PHP_H */
diff --git a/drivers/pci/hotplug/rpaphp_core.c b/drivers/pci/hotplug/rpaphp_core.c
index 127d6e600185..b7fc5c9255a5 100644
--- a/drivers/pci/hotplug/rpaphp_core.c
+++ b/drivers/pci/hotplug/rpaphp_core.c
@@ -226,7 +226,7 @@ int rpaphp_get_drc_props(struct device_node *dn, int *drc_index,
 	for (i = 0; i < indexes[0]; i++) {
 		if ((unsigned int) indexes[i + 1] == *my_index) {
 			if (drc_name)
-                		*drc_name = name_tmp;
+				*drc_name = name_tmp;
 			if (drc_type)
 				*drc_type = type_tmp;
 			if (drc_index)
@@ -289,7 +289,7 @@ static int is_php_dn(struct device_node *dn, const int **indexes,
  * rpaphp_add_slot -- declare a hotplug slot to the hotplug subsystem.
  * @dn: device node of slot
  *
- * This subroutine will register a hotplugable slot with the
+ * This subroutine will register a hotpluggable slot with the
  * PCI hotplug infrastructure. This routine is typically called
  * during boot time, if the hotplug slots are present at boot time,
  * or is called later, by the dlpar add code, if the slot is
@@ -328,7 +328,7 @@ int rpaphp_add_slot(struct device_node *dn)
 			return -ENOMEM;
 
 		slot->type = simple_strtoul(type, NULL, 10);
-				
+
 		dbg("Found drc-index:0x%x drc-name:%s drc-type:%s\n",
 				indexes[i + 1], name, type);
 
@@ -356,7 +356,7 @@ static void __exit cleanup_slots(void)
 	/*
 	 * Unregister all of our slots with the pci_hotplug subsystem,
 	 * and free up all memory that we had allocated.
-	 * memory will be freed in release_slot callback. 
+	 * memory will be freed in release_slot callback.
 	 */
 
 	list_for_each_safe(tmp, n, &rpaphp_slot_head) {
diff --git a/drivers/pci/hotplug/rpaphp_pci.c b/drivers/pci/hotplug/rpaphp_pci.c
index 513e1e282391..9243f3e7a1c9 100644
--- a/drivers/pci/hotplug/rpaphp_pci.c
+++ b/drivers/pci/hotplug/rpaphp_pci.c
@@ -44,7 +44,7 @@ int rpaphp_get_sensor_state(struct slot *slot, int *state)
 			dbg("%s: slot must be power up to get sensor-state\n",
 			    __func__);
 
-			/* some slots have to be powered up 
+			/* some slots have to be powered up
 			 * before get-sensor will succeed.
 			 */
 			rc = rtas_set_power_level(slot->power_domain, POWER_ON,
@@ -133,4 +133,3 @@ int rpaphp_enable_slot(struct slot *slot)
 
 	return 0;
 }
-
diff --git a/drivers/pci/hotplug/rpaphp_slot.c b/drivers/pci/hotplug/rpaphp_slot.c
index b283bbea6d24..a6082cc263f7 100644
--- a/drivers/pci/hotplug/rpaphp_slot.c
+++ b/drivers/pci/hotplug/rpaphp_slot.c
@@ -1,5 +1,5 @@
 /*
- * RPA Virtual I/O device functions 
+ * RPA Virtual I/O device functions
  * Copyright (C) 2004 Linda Xie <lxie@us.ibm.com>
  *
  * All rights reserved.
@@ -51,27 +51,27 @@ struct slot *alloc_slot_struct(struct device_node *dn,
                        int drc_index, char *drc_name, int power_domain)
 {
 	struct slot *slot;
-	
+
 	slot = kzalloc(sizeof(struct slot), GFP_KERNEL);
 	if (!slot)
 		goto error_nomem;
 	slot->hotplug_slot = kzalloc(sizeof(struct hotplug_slot), GFP_KERNEL);
 	if (!slot->hotplug_slot)
-		goto error_slot;	
+		goto error_slot;
 	slot->hotplug_slot->info = kzalloc(sizeof(struct hotplug_slot_info),
 					   GFP_KERNEL);
 	if (!slot->hotplug_slot->info)
 		goto error_hpslot;
 	slot->name = kstrdup(drc_name, GFP_KERNEL);
 	if (!slot->name)
-		goto error_info;	
+		goto error_info;
 	slot->dn = dn;
 	slot->index = drc_index;
 	slot->power_domain = power_domain;
 	slot->hotplug_slot->private = slot;
 	slot->hotplug_slot->ops = &rpaphp_hotplug_slot_ops;
 	slot->hotplug_slot->release = &rpaphp_release_slot;
-	
+
 	return (slot);
 
 error_info:
@@ -91,7 +91,7 @@ static int is_registered(struct slot *slot)
 	list_for_each_entry(tmp_slot, &rpaphp_slot_head, rpaphp_slot_list) {
 		if (!strcmp(tmp_slot->name, slot->name))
 			return 1;
-	}	
+	}
 	return 0;
 }
 
@@ -104,7 +104,7 @@ int rpaphp_deregister_slot(struct slot *slot)
 		__func__, slot->name);
 
 	list_del(&slot->rpaphp_slot_list);
-	
+
 	retval = pci_hp_deregister(php_slot);
 	if (retval)
 		err("Problem unregistering a slot %s\n", slot->name);
@@ -120,7 +120,7 @@ int rpaphp_register_slot(struct slot *slot)
 	int retval;
 	int slotno;
 
-	dbg("%s registering slot:path[%s] index[%x], name[%s] pdomain[%x] type[%d]\n", 
+	dbg("%s registering slot:path[%s] index[%x], name[%s] pdomain[%x] type[%d]\n",
 		__func__, slot->dn->full_name, slot->index, slot->name,
 		slot->power_domain, slot->type);
 
@@ -128,7 +128,7 @@ int rpaphp_register_slot(struct slot *slot)
 	if (is_registered(slot)) {
 		err("rpaphp_register_slot: slot[%s] is already registered\n", slot->name);
 		return -EAGAIN;
-	}	
+	}
 
 	if (slot->dn->child)
 		slotno = PCI_SLOT(PCI_DN(slot->dn->child)->devfn);
@@ -145,4 +145,3 @@ int rpaphp_register_slot(struct slot *slot)
 	info("Slot [%s] registered\n", slot->name);
 	return 0;
 }
-
diff --git a/drivers/pci/hotplug/shpchp.h b/drivers/pci/hotplug/shpchp.h
index d876e4b3c6a9..61529097464d 100644
--- a/drivers/pci/hotplug/shpchp.h
+++ b/drivers/pci/hotplug/shpchp.h
@@ -216,13 +216,13 @@ struct ctrl_reg {
 
 /* offsets to the controller registers based on the above structure layout */
 enum ctrl_offsets {
-	BASE_OFFSET 	 = offsetof(struct ctrl_reg, base_offset),
-	SLOT_AVAIL1 	 = offsetof(struct ctrl_reg, slot_avail1),
+	BASE_OFFSET	 = offsetof(struct ctrl_reg, base_offset),
+	SLOT_AVAIL1	 = offsetof(struct ctrl_reg, slot_avail1),
 	SLOT_AVAIL2	 = offsetof(struct ctrl_reg, slot_avail2),
-	SLOT_CONFIG 	 = offsetof(struct ctrl_reg, slot_config),
+	SLOT_CONFIG	 = offsetof(struct ctrl_reg, slot_config),
 	SEC_BUS_CONFIG	 = offsetof(struct ctrl_reg, sec_bus_config),
 	MSI_CTRL	 = offsetof(struct ctrl_reg, msi_ctrl),
-	PROG_INTERFACE 	 = offsetof(struct ctrl_reg, prog_interface),
+	PROG_INTERFACE	 = offsetof(struct ctrl_reg, prog_interface),
 	CMD		 = offsetof(struct ctrl_reg, cmd),
 	CMD_STATUS	 = offsetof(struct ctrl_reg, cmd_status),
 	INTR_LOC	 = offsetof(struct ctrl_reg, intr_loc),
diff --git a/drivers/pci/hotplug/shpchp_core.c b/drivers/pci/hotplug/shpchp_core.c
index d3f757df691c..faf13abd5b99 100644
--- a/drivers/pci/hotplug/shpchp_core.c
+++ b/drivers/pci/hotplug/shpchp_core.c
@@ -143,11 +143,11 @@ static int init_slots(struct controller *ctrl)
 		snprintf(name, SLOT_NAME_SIZE, "%d", slot->number);
 		hotplug_slot->ops = &shpchp_hotplug_slot_ops;
 
- 		ctrl_dbg(ctrl, "Registering domain:bus:dev=%04x:%02x:%02x "
- 			 "hp_slot=%x sun=%x slot_device_offset=%x\n",
- 			 pci_domain_nr(ctrl->pci_dev->subordinate),
- 			 slot->bus, slot->device, slot->hp_slot, slot->number,
- 			 ctrl->slot_device_offset);
+		ctrl_dbg(ctrl, "Registering domain:bus:dev=%04x:%02x:%02x "
+			 "hp_slot=%x sun=%x slot_device_offset=%x\n",
+			 pci_domain_nr(ctrl->pci_dev->subordinate),
+			 slot->bus, slot->device, slot->hp_slot, slot->number,
+			 ctrl->slot_device_offset);
 		retval = pci_hp_register(slot->hotplug_slot,
 				ctrl->pci_dev->subordinate, slot->device, name);
 		if (retval) {
diff --git a/drivers/pci/hotplug/shpchp_hpc.c b/drivers/pci/hotplug/shpchp_hpc.c
index 75ba2311b54f..2d7f474ca0ec 100644
--- a/drivers/pci/hotplug/shpchp_hpc.c
+++ b/drivers/pci/hotplug/shpchp_hpc.c
@@ -116,7 +116,7 @@
 #define SLOT_REG_RSVDZ_MASK	((1 << 15) | (7 << 21))
 
 /*
- * SHPC Command Code definitnions
+ * SHPC Command Code definitions
  *
  *     Slot Operation				00h - 3Fh
  *     Set Bus Segment Speed/Mode A		40h - 47h
diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c
index 21a7182dccd4..1fe2d6fb19d5 100644
--- a/drivers/pci/iov.c
+++ b/drivers/pci/iov.c
@@ -610,7 +610,7 @@ resource_size_t pci_sriov_resource_alignment(struct pci_dev *dev, int resno)
 	struct resource tmp;
 	enum pci_bar_type type;
 	int reg = pci_iov_resource_bar(dev, resno, &type);
-	
+
 	if (!reg)
 		return 0;
 
diff --git a/drivers/pci/irq.c b/drivers/pci/irq.c
index b008cf86b9c3..6684f153ab57 100644
--- a/drivers/pci/irq.c
+++ b/drivers/pci/irq.c
@@ -25,7 +25,7 @@ static void pci_note_irq_problem(struct pci_dev *pdev, const char *reason)
 /**
  * pci_lost_interrupt - reports a lost PCI interrupt
  * @pdev:	device whose interrupt is lost
- * 
+ *
  * The primary function of this routine is to report a lost interrupt
  * in a standard way which users can recognise (instead of blaming the
  * driver).
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 5e63645a7abe..3fcd67a16677 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -784,7 +784,7 @@ error:
  * @nvec: how many MSIs have been requested ?
  * @type: are we checking for MSI or MSI-X ?
  *
- * Look at global flags, the device itself, and its parent busses
+ * Look at global flags, the device itself, and its parent buses
  * to determine if MSI/-X are supported for the device. If MSI/-X is
  * supported return 0, else return an error code.
  **/
diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c
index dfd1f59de729..92a49c2b1fda 100644
--- a/drivers/pci/pci-acpi.c
+++ b/drivers/pci/pci-acpi.c
@@ -141,7 +141,7 @@ phys_addr_t acpi_pci_root_get_mcfg_addr(acpi_handle handle)
  * if (_PRW at S-state x)
  *	choose from highest power _SxD to lowest power _SxW
  * else // no _PRW at S-state x
- * 	choose highest power _SxD or any lower power
+ *	choose highest power _SxD or any lower power
  */
 
 static pci_power_t acpi_pci_choose_state(struct pci_dev *pdev)
diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index 454853507b7e..9042fdbd7244 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -312,7 +312,7 @@ static int pci_call_probe(struct pci_driver *drv, struct pci_dev *dev,
  * __pci_device_probe - check if a driver wants to claim a specific PCI device
  * @drv: driver to call to check if it wants the PCI device
  * @pci_dev: PCI device being probed
- * 
+ *
  * returns 0 on success, else error.
  * side-effect: pci_dev->driver is set to drv when drv claims pci_dev.
  */
@@ -378,7 +378,7 @@ static int pci_device_remove(struct device * dev)
 	 * We would love to complain here if pci_dev->is_enabled is set, that
 	 * the driver should have called pci_disable_device(), but the
 	 * unfortunate fact is there are too many odd BIOS and bridge setups
-	 * that don't like drivers doing that all of the time.  
+	 * that don't like drivers doing that all of the time.
 	 * Oh well, we can dream of sane hardware when we sleep, no matter how
 	 * horrible the crap we have to deal with is when we are awake...
 	 */
@@ -1156,10 +1156,10 @@ static const struct dev_pm_ops pci_dev_pm_ops = {
  * @drv: the driver structure to register
  * @owner: owner module of drv
  * @mod_name: module name string
- * 
+ *
  * Adds the driver structure to the list of registered drivers.
- * Returns a negative value on error, otherwise 0. 
- * If no error occurred, the driver remains registered even if 
+ * Returns a negative value on error, otherwise 0.
+ * If no error occurred, the driver remains registered even if
  * no device was claimed during registration.
  */
 int __pci_register_driver(struct pci_driver *drv, struct module *owner,
@@ -1181,7 +1181,7 @@ int __pci_register_driver(struct pci_driver *drv, struct module *owner,
 /**
  * pci_unregister_driver - unregister a pci driver
  * @drv: the driver structure to unregister
- * 
+ *
  * Deletes the driver structure from the list of registered PCI drivers,
  * gives it a chance to clean up by calling its remove() function for
  * each device it was responsible for, and marks those devices as
@@ -1203,7 +1203,7 @@ static struct pci_driver pci_compat_driver = {
  * pci_dev_driver - get the pci_driver of a device
  * @dev: the device to query
  *
- * Returns the appropriate pci_driver structure or %NULL if there is no 
+ * Returns the appropriate pci_driver structure or %NULL if there is no
  * registered driver for the device.
  */
 struct pci_driver *
@@ -1224,7 +1224,7 @@ pci_dev_driver(const struct pci_dev *dev)
  * pci_bus_match - Tell if a PCI device structure has a matching PCI device id structure
  * @dev: the PCI device structure to match against
  * @drv: the device driver to search for matching PCI device id structures
- * 
+ *
  * Used by a driver to check whether a PCI device present in the
  * system is in its list of supported devices. Returns the matching
  * pci_device_id structure or %NULL if there is no match.
diff --git a/drivers/pci/pci-stub.c b/drivers/pci/pci-stub.c
index 6e47c519c510..2ff77509d8e5 100644
--- a/drivers/pci/pci-stub.c
+++ b/drivers/pci/pci-stub.c
@@ -2,13 +2,13 @@
  *
  * Copyright (C) 2008 Red Hat, Inc.
  * Author:
- * 	Chris Wright
+ *	Chris Wright
  *
  * This work is licensed under the terms of the GNU GPL, version 2.
  *
  * Usage is simple, allocate a new id to the stub driver and bind the
  * device to it.  For example:
- * 
+ *
  * # echo "8086 10f5" > /sys/bus/pci/drivers/pci-stub/new_id
  * # echo -n 0000:00:19.0 > /sys/bus/pci/drivers/e1000e/unbind
  * # echo -n 0000:00:19.0 > /sys/bus/pci/drivers/pci-stub/bind
diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index 2aaa83c85a4e..c91e6c18debc 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -10,7 +10,7 @@
  *
  * File attributes for PCI devices
  *
- * Modeled after usb's driverfs.c 
+ * Modeled after usb's driverfs.c
  *
  */
 
@@ -270,13 +270,17 @@ msi_bus_store(struct device *dev, struct device_attribute *attr,
 	if (kstrtoul(buf, 0, &val) < 0)
 		return -EINVAL;
 
-	/* bad things may happen if the no_msi flag is changed
-	 * while some drivers are loaded */
+	/*
+	 * Bad things may happen if the no_msi flag is changed
+	 * while drivers are loaded.
+	 */
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
-	/* Maybe pci devices without subordinate busses shouldn't even have this
-	 * attribute in the first place?  */
+	/*
+	 * Maybe devices without subordinate buses shouldn't have this
+	 * attribute in the first place?
+	 */
 	if (!pdev->subordinate)
 		return count;
 
@@ -670,7 +674,7 @@ pci_write_config(struct file* filp, struct kobject *kobj,
 		size = dev->cfg_size - off;
 		count = size;
 	}
-	
+
 	pci_config_pm_runtime_get(dev);
 
 	if ((off & 1) && size) {
@@ -678,7 +682,7 @@ pci_write_config(struct file* filp, struct kobject *kobj,
 		off++;
 		size--;
 	}
-	
+
 	if ((off & 3) && size > 2) {
 		u16 val = data[off - init_off];
 		val |= (u16) data[off - init_off + 1] << 8;
@@ -696,7 +700,7 @@ pci_write_config(struct file* filp, struct kobject *kobj,
 		off += 4;
 		size -= 4;
 	}
-	
+
 	if (size >= 2) {
 		u16 val = data[off - init_off];
 		val |= (u16) data[off - init_off + 1] << 8;
@@ -1229,21 +1233,21 @@ pci_read_rom(struct file *filp, struct kobject *kobj,
 
 	if (!pdev->rom_attr_enabled)
 		return -EINVAL;
-	
+
 	rom = pci_map_rom(pdev, &size);	/* size starts out as PCI window size */
 	if (!rom || !size)
 		return -EIO;
-		
+
 	if (off >= size)
 		count = 0;
 	else {
 		if (off + count > size)
 			count = size - off;
-		
+
 		memcpy_fromio(buf, rom + off, count);
 	}
 	pci_unmap_rom(pdev, rom);
-		
+
 	return count;
 }
 
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index b127fbda6fc8..33120d156668 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -198,7 +198,7 @@ static int __pci_bus_find_cap_start(struct pci_bus *bus,
 }
 
 /**
- * pci_find_capability - query for devices' capabilities 
+ * pci_find_capability - query for devices' capabilities
  * @dev: PCI device to query
  * @cap: capability code
  *
@@ -207,12 +207,12 @@ static int __pci_bus_find_cap_start(struct pci_bus *bus,
  * device's PCI configuration space or 0 in case the device does not
  * support it.  Possible values for @cap:
  *
- *  %PCI_CAP_ID_PM           Power Management 
- *  %PCI_CAP_ID_AGP          Accelerated Graphics Port 
- *  %PCI_CAP_ID_VPD          Vital Product Data 
- *  %PCI_CAP_ID_SLOTID       Slot Identification 
+ *  %PCI_CAP_ID_PM           Power Management
+ *  %PCI_CAP_ID_AGP          Accelerated Graphics Port
+ *  %PCI_CAP_ID_VPD          Vital Product Data
+ *  %PCI_CAP_ID_SLOTID       Slot Identification
  *  %PCI_CAP_ID_MSI          Message Signalled Interrupts
- *  %PCI_CAP_ID_CHSWP        CompactPCI HotSwap 
+ *  %PCI_CAP_ID_CHSWP        CompactPCI HotSwap
  *  %PCI_CAP_ID_PCIX         PCI-X
  *  %PCI_CAP_ID_EXP          PCI Express
  */
@@ -228,13 +228,13 @@ int pci_find_capability(struct pci_dev *dev, int cap)
 }
 
 /**
- * pci_bus_find_capability - query for devices' capabilities 
+ * pci_bus_find_capability - query for devices' capabilities
  * @bus:   the PCI bus to query
  * @devfn: PCI device to query
  * @cap:   capability code
  *
  * Like pci_find_capability() but works for pci devices that do not have a
- * pci_dev structure set up yet. 
+ * pci_dev structure set up yet.
  *
  * Returns the address of the requested capability structure within the
  * device's PCI configuration space or 0 in case the device does not
@@ -515,7 +515,7 @@ static int pci_raw_set_power_state(struct pci_dev *dev, pci_power_t state)
 		return -EINVAL;
 
 	/* Validate current state:
-	 * Can enter D0 from any state, but if we can only go deeper 
+	 * Can enter D0 from any state, but if we can only go deeper
 	 * to sleep if we're already in a low power state
 	 */
 	if (state != PCI_D0 && dev->current_state <= PCI_D3cold
@@ -998,7 +998,7 @@ static void pci_restore_config_space(struct pci_dev *pdev)
 	}
 }
 
-/** 
+/**
  * pci_restore_state - Restore the saved state of a PCI device
  * @dev: - PCI device that we're dealing with
  */
@@ -1030,7 +1030,7 @@ struct pci_saved_state {
  *			   the device saved state.
  * @dev: PCI device that we're dealing with
  *
- * Rerturn NULL if no state or error.
+ * Return NULL if no state or error.
  */
 struct pci_saved_state *pci_store_saved_state(struct pci_dev *dev)
 {
@@ -1880,7 +1880,7 @@ int pci_finish_runtime_suspend(struct pci_dev *dev)
  * pci_dev_run_wake - Check if device can generate run-time wake-up events.
  * @dev: Device to check.
  *
- * Return true if the device itself is cabable of generating wake-up events
+ * Return true if the device itself is capable of generating wake-up events
  * (through the platform or using the native PCIe PME) or if the device supports
  * PME and one of its upstream bridges can generate wake-up events.
  */
@@ -2447,7 +2447,7 @@ bool pci_acs_enabled(struct pci_dev *pdev, u16 acs_flags)
 	switch (pci_pcie_type(pdev)) {
 	/*
 	 * PCI/X-to-PCIe bridges are not specifically mentioned by the spec,
-	 * but since their primary inteface is PCI/X, we conservatively
+	 * but since their primary interface is PCI/X, we conservatively
 	 * handle them as we would a non-PCIe device.
 	 */
 	case PCI_EXP_TYPE_PCIE_BRIDGE:
@@ -2471,7 +2471,7 @@ bool pci_acs_enabled(struct pci_dev *pdev, u16 acs_flags)
 	/*
 	 * PCIe 3.0, 6.12.1.2 specifies ACS capabilities that should be
 	 * implemented by the remaining PCIe types to indicate peer-to-peer
-	 * capabilities, but only when they are part of a multifunciton
+	 * capabilities, but only when they are part of a multifunction
 	 * device.  The footnote for section 6.12 indicates the specific
 	 * PCIe types included here.
 	 */
@@ -2486,7 +2486,7 @@ bool pci_acs_enabled(struct pci_dev *pdev, u16 acs_flags)
 	}
 
 	/*
-	 * PCIe 3.0, 6.12.1.3 specifies no ACS capabilties are applicable
+	 * PCIe 3.0, 6.12.1.3 specifies no ACS capabilities are applicable
 	 * to single function devices with the exception of downstream ports.
 	 */
 	return true;
@@ -2622,7 +2622,7 @@ void pci_release_region(struct pci_dev *pdev, int bar)
  *
  *	If @exclusive is set, then the region is marked so that userspace
  *	is explicitly not allowed to map the resource via /dev/mem or
- * 	sysfs MMIO access.
+ *	sysfs MMIO access.
  *
  *	Returns 0 on success, or %EBUSY on error.  A warning
  *	message is also printed on failure.
@@ -2634,7 +2634,7 @@ static int __pci_request_region(struct pci_dev *pdev, int bar, const char *res_n
 
 	if (pci_resource_len(pdev, bar) == 0)
 		return 0;
-		
+
 	if (pci_resource_flags(pdev, bar) & IORESOURCE_IO) {
 		if (!request_region(pci_resource_start(pdev, bar),
 			    pci_resource_len(pdev, bar), res_name))
@@ -2694,7 +2694,7 @@ int pci_request_region(struct pci_dev *pdev, int bar, const char *res_name)
  *
  *	The key difference that _exclusive makes it that userspace is
  *	explicitly not allowed to map the resource via /dev/mem or
- * 	sysfs.
+ *	sysfs.
  */
 int pci_request_region_exclusive(struct pci_dev *pdev, int bar, const char *res_name)
 {
@@ -2799,7 +2799,7 @@ int pci_request_regions(struct pci_dev *pdev, const char *res_name)
  *	successfully.
  *
  *	pci_request_regions_exclusive() will mark the region so that
- * 	/dev/mem and the sysfs MMIO access will not be allowed.
+ *	/dev/mem and the sysfs MMIO access will not be allowed.
  *
  *	Returns 0 on success, or %EBUSY on error.  A warning
  *	message is also printed on failure.
@@ -2967,7 +2967,7 @@ pci_set_mwi(struct pci_dev *dev)
 		cmd |= PCI_COMMAND_INVALIDATE;
 		pci_write_config_word(dev, PCI_COMMAND, cmd);
 	}
-	
+
 	return 0;
 }
 
@@ -3292,7 +3292,7 @@ clear:
  *
  * NOTE: This causes the caller to sleep for twice the device power transition
  * cooldown period, which for the D0->D3hot and D3hot->D0 transitions is 10 ms
- * by devault (i.e. unless the @dev's d3_delay field has a different value).
+ * by default (i.e. unless the @dev's d3_delay field has a different value).
  * Moreover, only devices in D0 can be reset by this function.
  */
 static int pci_pm_reset(struct pci_dev *dev, int probe)
@@ -3341,7 +3341,7 @@ void pci_reset_bridge_secondary_bus(struct pci_dev *dev)
 	pci_write_config_word(dev, PCI_BRIDGE_CONTROL, ctrl);
 	/*
 	 * PCI spec v3.0 7.6.4.2 requires minimum Trst of 1ms.  Double
-	 * this to 2ms to ensure that we meet the minium requirement.
+	 * this to 2ms to ensure that we meet the minimum requirement.
 	 */
 	msleep(2);
 
@@ -3998,7 +3998,7 @@ int pcie_set_mps(struct pci_dev *dev, int mps)
 		return -EINVAL;
 
 	v = ffs(mps) - 8;
-	if (v > dev->pcie_mpss) 
+	if (v > dev->pcie_mpss)
 		return -EINVAL;
 	v <<= 5;
 
diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c
index 85ca36f2136d..688e48e51073 100644
--- a/drivers/pci/pcie/aer/aerdrv_core.c
+++ b/drivers/pci/pcie/aer/aerdrv_core.c
@@ -525,7 +525,7 @@ static void handle_error_source(struct pcie_device *aerdev,
 
 	if (info->severity == AER_CORRECTABLE) {
 		/*
-		 * Correctable error does not need software intevention.
+		 * Correctable error does not need software intervention.
 		 * No need to go through error recovery process.
 		 */
 		pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c
index 403a44374ed5..f1272dc54de1 100644
--- a/drivers/pci/pcie/aspm.c
+++ b/drivers/pci/pcie/aspm.c
@@ -548,7 +548,7 @@ static struct pcie_link_state *alloc_pcie_link_state(struct pci_dev *pdev)
 
 /*
  * pcie_aspm_init_link_state: Initiate PCI express link state.
- * It is called after the pcie and its children devices are scaned.
+ * It is called after the pcie and its children devices are scanned.
  * @pdev: the root port or switch downstream port
  */
 void pcie_aspm_init_link_state(struct pci_dev *pdev)
diff --git a/drivers/pci/pcie/pme.c b/drivers/pci/pcie/pme.c
index e56e594ce112..bbc3bdd2b189 100644
--- a/drivers/pci/pcie/pme.c
+++ b/drivers/pci/pcie/pme.c
@@ -419,8 +419,8 @@ static void pcie_pme_remove(struct pcie_device *srv)
 
 static struct pcie_port_service_driver pcie_pme_driver = {
 	.name		= "pcie_pme",
-	.port_type 	= PCI_EXP_TYPE_ROOT_PORT,
-	.service 	= PCIE_PORT_SERVICE_PME,
+	.port_type	= PCI_EXP_TYPE_ROOT_PORT,
+	.service	= PCIE_PORT_SERVICE_PME,
 
 	.probe		= pcie_pme_probe,
 	.suspend	= pcie_pme_suspend,
diff --git a/drivers/pci/pcie/portdrv.h b/drivers/pci/pcie/portdrv.h
index d2eb80aab569..d525548404d6 100644
--- a/drivers/pci/pcie/portdrv.h
+++ b/drivers/pci/pcie/portdrv.h
@@ -14,7 +14,7 @@
 #define PCIE_PORT_DEVICE_MAXSERVICES   4
 /*
  * According to the PCI Express Base Specification 2.0, the indices of
- * the MSI-X table entires used by port services must not exceed 31
+ * the MSI-X table entries used by port services must not exceed 31
  */
 #define PCIE_PORT_MAX_MSIX_ENTRIES	32
 
diff --git a/drivers/pci/pcie/portdrv_bus.c b/drivers/pci/pcie/portdrv_bus.c
index 67be55a7f260..87e79a6ffb5a 100644
--- a/drivers/pci/pcie/portdrv_bus.c
+++ b/drivers/pci/pcie/portdrv_bus.c
@@ -18,8 +18,8 @@
 static int pcie_port_bus_match(struct device *dev, struct device_driver *drv);
 
 struct bus_type pcie_port_bus_type = {
-	.name 		= "pci_express",
-	.match 		= pcie_port_bus_match,
+	.name		= "pci_express",
+	.match		= pcie_port_bus_match,
 };
 EXPORT_SYMBOL_GPL(pcie_port_bus_type);
 
diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c
index 08d131f7815b..0b6e76604068 100644
--- a/drivers/pci/pcie/portdrv_core.c
+++ b/drivers/pci/pcie/portdrv_core.c
@@ -46,7 +46,7 @@ static void release_pcie_device(struct device *dev)
  * pcie_port_msix_add_entry - add entry to given array of MSI-X entries
  * @entries: Array of MSI-X entries
  * @new_entry: Index of the entry to add to the array
- * @nr_entries: Number of entries aleady in the array
+ * @nr_entries: Number of entries already in the array
  *
  * Return value: Position of the added entry in the array
  */
diff --git a/drivers/pci/pcie/portdrv_pci.c b/drivers/pci/pcie/portdrv_pci.c
index 696caed5fdf5..cd1e57e51aa7 100644
--- a/drivers/pci/pcie/portdrv_pci.c
+++ b/drivers/pci/pcie/portdrv_pci.c
@@ -390,9 +390,9 @@ static struct pci_driver pcie_portdriver = {
 	.probe		= pcie_portdrv_probe,
 	.remove		= pcie_portdrv_remove,
 
-	.err_handler 	= &pcie_portdrv_err_handler,
+	.err_handler	= &pcie_portdrv_err_handler,
 
-	.driver.pm 	= PCIE_PORTDRV_PM_OPS,
+	.driver.pm	= PCIE_PORTDRV_PM_OPS,
 };
 
 static int __init dmi_pcie_pme_disable_msi(const struct dmi_system_id *d)
@@ -412,7 +412,7 @@ static struct dmi_system_id __initdata pcie_portdrv_dmi_table[] = {
 	 .ident = "MSI Wind U-100",
 	 .matches = {
 		     DMI_MATCH(DMI_SYS_VENDOR,
-		     		"MICRO-STAR INTERNATIONAL CO., LTD"),
+				"MICRO-STAR INTERNATIONAL CO., LTD"),
 		     DMI_MATCH(DMI_PRODUCT_NAME, "U-100"),
 		     },
 	 },
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 5e14f5a51357..38e403dddf6e 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -582,7 +582,7 @@ static enum pci_bus_speed agp_speed(int agp3, int agpstat)
 		index = 1;
 	else
 		goto out;
-	
+
 	if (agp3) {
 		index += 2;
 		if (index == 5)
@@ -789,7 +789,7 @@ int pci_scan_bridge(struct pci_bus *bus, struct pci_dev *dev, int max, int pass)
 	}
 
 	/* Disable MasterAbortMode during probing to avoid reporting
-	   of bus errors (in some architectures) */ 
+	   of bus errors (in some architectures) */
 	pci_read_config_word(dev, PCI_BRIDGE_CONTROL, &bctl);
 	pci_write_config_word(dev, PCI_BRIDGE_CONTROL,
 			      bctl & ~PCI_BRIDGE_CTL_MASTER_ABORT);
@@ -1005,7 +1005,7 @@ void set_pcie_hotplug_bridge(struct pci_dev *pdev)
  * pci_setup_device - fill in class and map information of a device
  * @dev: the device structure to fill
  *
- * Initialize the device structure with information about the device's 
+ * Initialize the device structure with information about the device's
  * vendor,class,memory and IO-space addresses,IRQ lines etc.
  * Called at initialisation of the PCI subsystem and by CardBus services.
  * Returns 0 on success and negative if unknown type of device (not normal,
@@ -1111,7 +1111,7 @@ int pci_setup_device(struct pci_dev *dev)
 			goto bad;
 		/* The PCI-to-PCI bridge spec requires that subtractive
 		   decoding (i.e. transparent) bridge must have programming
-		   interface code of 0x01. */ 
+		   interface code of 0x01. */
 		pci_read_irq(dev);
 		dev->transparent = ((dev->class & 0xff) == 1);
 		pci_read_bases(dev, 2, PCI_ROM_ADDRESS1);
@@ -1570,7 +1570,7 @@ static void pcie_write_mrrs(struct pci_dev *dev)
 	 * subsequent read will verify if the value is acceptable or not.
 	 * If the MRRS value provided is not acceptable (e.g., too large),
 	 * shrink the value until it is acceptable to the HW.
- 	 */
+	 */
 	while (mrrs != pcie_get_readrq(dev) && mrrs >= 128) {
 		rc = pcie_set_readrq(dev, mrrs);
 		if (!rc)
diff --git a/drivers/pci/proc.c b/drivers/pci/proc.c
index cdc7836d7e3d..46d1378f2e9e 100644
--- a/drivers/pci/proc.c
+++ b/drivers/pci/proc.c
@@ -222,7 +222,7 @@ static long proc_bus_pci_ioctl(struct file *file, unsigned int cmd,
 	default:
 		ret = -EINVAL;
 		break;
-	};
+	}
 
 	return ret;
 }
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 91490453c229..b3b1b9aa8863 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -55,7 +55,7 @@ static void quirk_mellanox_tavor(struct pci_dev *dev)
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_MELLANOX,PCI_DEVICE_ID_MELLANOX_TAVOR,quirk_mellanox_tavor);
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_MELLANOX,PCI_DEVICE_ID_MELLANOX_TAVOR_BRIDGE,quirk_mellanox_tavor);
 
-/* Deal with broken BIOS'es that neglect to enable passive release,
+/* Deal with broken BIOSes that neglect to enable passive release,
    which can cause problems in combination with the 82441FX/PPro MTRRs */
 static void quirk_passive_release(struct pci_dev *dev)
 {
@@ -78,11 +78,11 @@ DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82441,	quirk_p
 
 /*  The VIA VP2/VP3/MVP3 seem to have some 'features'. There may be a workaround
     but VIA don't answer queries. If you happen to have good contacts at VIA
-    ask them for me please -- Alan 
-    
-    This appears to be BIOS not version dependent. So presumably there is a 
+    ask them for me please -- Alan
+
+    This appears to be BIOS not version dependent. So presumably there is a
     chipset level fix */
-    
+
 static void quirk_isa_dma_hangs(struct pci_dev *dev)
 {
 	if (!isa_dma_bridge_buggy) {
@@ -97,7 +97,7 @@ static void quirk_isa_dma_hangs(struct pci_dev *dev)
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA,	PCI_DEVICE_ID_VIA_82C586_0,	quirk_isa_dma_hangs);
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA,	PCI_DEVICE_ID_VIA_82C596,	quirk_isa_dma_hangs);
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,    PCI_DEVICE_ID_INTEL_82371SB_0,  quirk_isa_dma_hangs);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AL,	PCI_DEVICE_ID_AL_M1533, 	quirk_isa_dma_hangs);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AL,	PCI_DEVICE_ID_AL_M1533,		quirk_isa_dma_hangs);
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NEC,	PCI_DEVICE_ID_NEC_CBUS_1,	quirk_isa_dma_hangs);
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NEC,	PCI_DEVICE_ID_NEC_CBUS_2,	quirk_isa_dma_hangs);
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NEC,	PCI_DEVICE_ID_NEC_CBUS_3,	quirk_isa_dma_hangs);
@@ -157,10 +157,10 @@ static void quirk_triton(struct pci_dev *dev)
 		pci_pci_problems |= PCIPCI_TRITON;
 	}
 }
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 	PCI_DEVICE_ID_INTEL_82437, 	quirk_triton);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 	PCI_DEVICE_ID_INTEL_82437VX, 	quirk_triton);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 	PCI_DEVICE_ID_INTEL_82439, 	quirk_triton);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 	PCI_DEVICE_ID_INTEL_82439TX, 	quirk_triton);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82437,	quirk_triton);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82437VX,	quirk_triton);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82439,	quirk_triton);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82439TX,	quirk_triton);
 
 /*
  *	VIA Apollo KT133 needs PCI latency patch
@@ -171,7 +171,7 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 	PCI_DEVICE_ID_INTEL_82439TX, 	quir
  *      the info on which Mr Breese based his work.
  *
  *	Updated based on further information from the site and also on
- *	information provided by VIA 
+ *	information provided by VIA
  */
 static void quirk_vialatency(struct pci_dev *dev)
 {
@@ -179,7 +179,7 @@ static void quirk_vialatency(struct pci_dev *dev)
 	u8 busarb;
 	/* Ok we have a potential problem chipset here. Now see if we have
 	   a buggy southbridge */
-	   
+
 	p = pci_get_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C686, NULL);
 	if (p!=NULL) {
 		/* 0x40 - 0x4f == 686B, 0x10 - 0x2f == 686A; thanks Dan Hollis */
@@ -194,9 +194,9 @@ static void quirk_vialatency(struct pci_dev *dev)
 		if (p->revision < 0x10 || p->revision > 0x12)
 			goto exit;
 	}
-	
+
 	/*
-	 *	Ok we have the problem. Now set the PCI master grant to 
+	 *	Ok we have the problem. Now set the PCI master grant to
 	 *	occur every master grant. The apparent bug is that under high
 	 *	PCI load (quite common in Linux of course) you can get data
 	 *	loss when the CPU is held off the bus for 3 bus master requests
@@ -209,7 +209,7 @@ static void quirk_vialatency(struct pci_dev *dev)
 	 */
 
 	pci_read_config_byte(dev, 0x76, &busarb);
-	/* Set bit 4 and bi 5 of byte 76 to 0x01 
+	/* Set bit 4 and bi 5 of byte 76 to 0x01
 	   "Master priority rotation on every PCI master grant */
 	busarb &= ~(1<<5);
 	busarb |= (1<<4);
@@ -252,7 +252,7 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA,	PCI_DEVICE_ID_VIA_82C576,	quirk_vsfx)
  *	that DMA to AGP space. Latency must be set to 0xA and triton
  *	workaround applied too
  *	[Info kindly provided by ALi]
- */	
+ */
 static void quirk_alimagik(struct pci_dev *dev)
 {
 	if ((pci_pci_problems&PCIPCI_ALIMAGIK)==0) {
@@ -260,8 +260,8 @@ static void quirk_alimagik(struct pci_dev *dev)
 		pci_pci_problems |= PCIPCI_ALIMAGIK|PCIPCI_TRITON;
 	}
 }
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AL, 	PCI_DEVICE_ID_AL_M1647, 	quirk_alimagik);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AL, 	PCI_DEVICE_ID_AL_M1651, 	quirk_alimagik);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AL,	PCI_DEVICE_ID_AL_M1647,		quirk_alimagik);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AL,	PCI_DEVICE_ID_AL_M1651,		quirk_alimagik);
 
 /*
  *	Natoma has some interesting boundary conditions with Zoran stuff
@@ -274,12 +274,12 @@ static void quirk_natoma(struct pci_dev *dev)
 		pci_pci_problems |= PCIPCI_NATOMA;
 	}
 }
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 	PCI_DEVICE_ID_INTEL_82441, 	quirk_natoma);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 	PCI_DEVICE_ID_INTEL_82443LX_0, 	quirk_natoma);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 	PCI_DEVICE_ID_INTEL_82443LX_1, 	quirk_natoma);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 	PCI_DEVICE_ID_INTEL_82443BX_0, 	quirk_natoma);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 	PCI_DEVICE_ID_INTEL_82443BX_1, 	quirk_natoma);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 	PCI_DEVICE_ID_INTEL_82443BX_2, 	quirk_natoma);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82441,	quirk_natoma);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82443LX_0,	quirk_natoma);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82443LX_1,	quirk_natoma);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82443BX_0,	quirk_natoma);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82443BX_1,	quirk_natoma);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82443BX_2,	quirk_natoma);
 
 /*
  *  This chip can cause PCI parity errors if config register 0xA0 is read
@@ -400,7 +400,7 @@ static void piix4_io_quirk(struct pci_dev *dev, const char *name, unsigned int p
 	/*
 	 * For now we only print it out. Eventually we'll want to
 	 * reserve it (at least if it's in the 0x1000+ range), but
-	 * let's get enough confirmation reports first. 
+	 * let's get enough confirmation reports first.
 	 */
 	base &= -size;
 	dev_info(&dev->dev, "%s PIO at %04x-%04x\n", name, base, base + size - 1);
@@ -425,7 +425,7 @@ static void piix4_mem_quirk(struct pci_dev *dev, const char *name, unsigned int
 	}
 	/*
 	 * For now we only print it out. Eventually we'll want to
-	 * reserve it, but let's get enough confirmation reports first. 
+	 * reserve it, but let's get enough confirmation reports first.
 	 */
 	base &= -size;
 	dev_info(&dev->dev, "%s MMIO at %04x-%04x\n", name, base, base + size - 1);
@@ -682,7 +682,7 @@ static void quirk_xio2000a(struct pci_dev *dev)
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_XIO2000A,
 			quirk_xio2000a);
 
-#ifdef CONFIG_X86_IO_APIC 
+#ifdef CONFIG_X86_IO_APIC
 
 #include <asm/io_apic.h>
 
@@ -696,12 +696,12 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_XIO2000A,
 static void quirk_via_ioapic(struct pci_dev *dev)
 {
 	u8 tmp;
-	
+
 	if (nr_ioapics < 1)
 		tmp = 0;    /* nothing routed to external APIC */
 	else
 		tmp = 0x1f; /* all known bits (4-0) routed to external APIC */
-		
+
 	dev_info(&dev->dev, "%sbling VIA external APIC routing\n",
 	       tmp == 0 ? "Disa" : "Ena");
 
@@ -712,7 +712,7 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA,	PCI_DEVICE_ID_VIA_82C686,	quirk_via_i
 DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_VIA,	PCI_DEVICE_ID_VIA_82C686,	quirk_via_ioapic);
 
 /*
- * VIA 8237: Some BIOSs don't set the 'Bypass APIC De-Assert Message' Bit.
+ * VIA 8237: Some BIOSes don't set the 'Bypass APIC De-Assert Message' Bit.
  * This leads to doubled level interrupt rates.
  * Set this bit to get rid of cycle wastage.
  * Otherwise uncritical.
@@ -986,7 +986,7 @@ DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_CYRIX,	PCI_DEVICE_ID_CYRIX_PCI_MASTER, qu
 static void quirk_disable_pxb(struct pci_dev *pdev)
 {
 	u16 config;
-	
+
 	if (pdev->revision != 0x04)		/* Only C0 requires this */
 		return;
 	pci_read_config_word(pdev, 0x40, &config);
@@ -1094,11 +1094,11 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82375,	quirk_e
  * On ASUS P4B boards, the SMBus PCI Device within the ICH2/4 southbridge
  * is not activated. The myth is that Asus said that they do not want the
  * users to be irritated by just another PCI Device in the Win98 device
- * manager. (see the file prog/hotplug/README.p4b in the lm_sensors 
+ * manager. (see the file prog/hotplug/README.p4b in the lm_sensors
  * package 2.7.0 for details)
  *
- * The SMBus PCI Device can be activated by setting a bit in the ICH LPC 
- * bridge. Unfortunately, this device has no subvendor/subdevice ID. So it 
+ * The SMBus PCI Device can be activated by setting a bit in the ICH LPC
+ * bridge. Unfortunately, this device has no subvendor/subdevice ID. So it
  * becomes necessary to do this tweak in two steps -- the chosen trigger
  * is either the Host bridge (preferred) or on-board VGA controller.
  *
@@ -1253,7 +1253,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82815_CGC,	asu
 static void asus_hides_smbus_lpc(struct pci_dev *dev)
 {
 	u16 val;
-	
+
 	if (likely(!asus_hides_smbus))
 		return;
 
@@ -1640,8 +1640,8 @@ static void quirk_disable_intel_boot_interrupt(struct pci_dev *dev)
 	dev_info(&dev->dev, "disabled boot interrupts on device [%04x:%04x]\n",
 		 dev->vendor, dev->device);
 }
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,   PCI_DEVICE_ID_INTEL_ESB_10, 	quirk_disable_intel_boot_interrupt);
-DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL,   PCI_DEVICE_ID_INTEL_ESB_10, 	quirk_disable_intel_boot_interrupt);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,   PCI_DEVICE_ID_INTEL_ESB_10,	quirk_disable_intel_boot_interrupt);
+DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL,   PCI_DEVICE_ID_INTEL_ESB_10,	quirk_disable_intel_boot_interrupt);
 
 /*
  * disable boot interrupts on HT-1000
@@ -1673,8 +1673,8 @@ static void quirk_disable_broadcom_boot_interrupt(struct pci_dev *dev)
 	dev_info(&dev->dev, "disabled boot interrupts on device [%04x:%04x]\n",
 		 dev->vendor, dev->device);
 }
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_SERVERWORKS,   PCI_DEVICE_ID_SERVERWORKS_HT1000SB, 	quirk_disable_broadcom_boot_interrupt);
-DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_SERVERWORKS,   PCI_DEVICE_ID_SERVERWORKS_HT1000SB, 	quirk_disable_broadcom_boot_interrupt);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_SERVERWORKS,   PCI_DEVICE_ID_SERVERWORKS_HT1000SB,	quirk_disable_broadcom_boot_interrupt);
+DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_SERVERWORKS,   PCI_DEVICE_ID_SERVERWORKS_HT1000SB,	quirk_disable_broadcom_boot_interrupt);
 
 /*
  * disable boot interrupts on AMD and ATI chipsets
@@ -1730,8 +1730,8 @@ static void quirk_disable_amd_8111_boot_interrupt(struct pci_dev *dev)
 	dev_info(&dev->dev, "disabled boot interrupts on device [%04x:%04x]\n",
 		 dev->vendor, dev->device);
 }
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD,   PCI_DEVICE_ID_AMD_8111_SMBUS, 	quirk_disable_amd_8111_boot_interrupt);
-DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD,   PCI_DEVICE_ID_AMD_8111_SMBUS, 	quirk_disable_amd_8111_boot_interrupt);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD,   PCI_DEVICE_ID_AMD_8111_SMBUS,	quirk_disable_amd_8111_boot_interrupt);
+DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD,   PCI_DEVICE_ID_AMD_8111_SMBUS,	quirk_disable_amd_8111_boot_interrupt);
 #endif /* CONFIG_X86_IO_APIC */
 
 /*
@@ -2127,8 +2127,8 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_PLX, 0x8624, quirk_tile_plx_gen1);
 #ifdef CONFIG_PCI_MSI
 /* Some chipsets do not support MSI. We cannot easily rely on setting
  * PCI_BUS_FLAGS_NO_MSI in its bus flags because there are actually
- * some other busses controlled by the chipset even if Linux is not
- * aware of it.  Instead of setting the flag on all busses in the
+ * some other buses controlled by the chipset even if Linux is not
+ * aware of it.  Instead of setting the flag on all buses in the
  * machine, simply disable MSI globally.
  */
 static void quirk_disable_all_msi(struct pci_dev *dev)
@@ -2288,14 +2288,14 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_NVIDIA,
 			nvenet_msi_disable);
 
 /*
- * Some versions of the MCP55 bridge from nvidia have a legacy irq routing
- * config register.  This register controls the routing of legacy interrupts
- * from devices that route through the MCP55.  If this register is misprogramed
- * interrupts are only sent to the bsp, unlike conventional systems where the
- * irq is broadxast to all online cpus.  Not having this register set
- * properly prevents kdump from booting up properly, so lets make sure that
- * we have it set correctly.
- * Note this is an undocumented register.
+ * Some versions of the MCP55 bridge from Nvidia have a legacy IRQ routing
+ * config register.  This register controls the routing of legacy
+ * interrupts from devices that route through the MCP55.  If this register
+ * is misprogrammed, interrupts are only sent to the BSP, unlike
+ * conventional systems where the IRQ is broadcast to all online CPUs.  Not
+ * having this register set properly prevents kdump from booting up
+ * properly, so let's make sure that we have it set correctly.
+ * Note that this is an undocumented register.
  */
 static void nvbridge_check_legacy_irq_routing(struct pci_dev *dev)
 {
@@ -2626,7 +2626,7 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATTANSIC, 0xe091,
 /* Allow manual resource allocation for PCI hotplug bridges
  * via pci=hpmemsize=nnM and pci=hpiosize=nnM parameters. For
  * some PCI-PCI hotplug bridges, like PLX 6254 (former HINT HB6),
- * kernel fails to allocate resources when hotplug device is 
+ * kernel fails to allocate resources when hotplug device is
  * inserted and PCI bus is rescanned.
  */
 static void quirk_hotplug_bridge(struct pci_dev *dev)
diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c
index 8fc54b7327bc..1576851028db 100644
--- a/drivers/pci/remove.c
+++ b/drivers/pci/remove.c
@@ -7,7 +7,7 @@ static void pci_free_resources(struct pci_dev *dev)
 {
 	int i;
 
- 	msi_remove_pci_irq_vectors(dev);
+	msi_remove_pci_irq_vectors(dev);
 
 	pci_cleanup_rom(dev);
 	for (i = 0; i < PCI_NUM_RESOURCES; i++) {
diff --git a/drivers/pci/search.c b/drivers/pci/search.c
index d0627fa9f368..3ff2ac7c14e2 100644
--- a/drivers/pci/search.c
+++ b/drivers/pci/search.c
@@ -1,5 +1,5 @@
 /*
- * 	PCI searching functions.
+ *	PCI searching functions.
  *
  *	Copyright (C) 1993 -- 1997 Drew Eckhardt, Frederic Potter,
  *					David Mosberger-Tang
@@ -96,12 +96,12 @@ struct pci_bus * pci_find_bus(int domain, int busnr)
  * pci_find_next_bus - begin or continue searching for a PCI bus
  * @from: Previous PCI bus found, or %NULL for new search.
  *
- * Iterates through the list of known PCI busses.  A new search is
+ * Iterates through the list of known PCI buses.  A new search is
  * initiated by passing %NULL as the @from argument.  Otherwise if
  * @from is not %NULL, searches continue from next device on the
  * global list.
  */
-struct pci_bus * 
+struct pci_bus *
 pci_find_next_bus(const struct pci_bus *from)
 {
 	struct list_head *n;
@@ -119,11 +119,11 @@ pci_find_next_bus(const struct pci_bus *from)
 /**
  * pci_get_slot - locate PCI device for a given PCI slot
  * @bus: PCI bus on which desired PCI device resides
- * @devfn: encodes number of PCI slot in which the desired PCI 
- * device resides and the logical device number within that slot 
+ * @devfn: encodes number of PCI slot in which the desired PCI
+ * device resides and the logical device number within that slot
  * in case of multi-function devices.
  *
- * Given a PCI bus and slot/function number, the desired PCI device 
+ * Given a PCI bus and slot/function number, the desired PCI device
  * is located in the list of PCI devices.
  * If the device is found, its reference count is increased and this
  * function returns a pointer to its data structure.  The caller must
diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
index 4ce83b26ae9e..219a4106480a 100644
--- a/drivers/pci/setup-bus.c
+++ b/drivers/pci/setup-bus.c
@@ -292,8 +292,8 @@ static void assign_requested_resources_sorted(struct list_head *head,
 				      (!(res->flags & IORESOURCE_ROM_ENABLE))))
 					add_to_list(fail_head,
 						    dev_res->dev, res,
-						    0 /* dont care */,
-						    0 /* dont care */);
+						    0 /* don't care */,
+						    0 /* don't care */);
 			}
 			reset_resource(res);
 		}
@@ -667,9 +667,9 @@ static void pci_bridge_check_ranges(struct pci_bus *bus)
 	if (!io) {
 		pci_write_config_word(bridge, PCI_IO_BASE, 0xf0f0);
 		pci_read_config_word(bridge, PCI_IO_BASE, &io);
- 		pci_write_config_word(bridge, PCI_IO_BASE, 0x0);
- 	}
- 	if (io)
+		pci_write_config_word(bridge, PCI_IO_BASE, 0x0);
+	}
+	if (io)
 		b_res[0].flags |= IORESOURCE_IO;
 	/*  DECchip 21050 pass 2 errata: the bridge may miss an address
 	    disconnect boundary by one PCI data phase.
@@ -819,7 +819,7 @@ static void pbus_size_io(struct pci_bus *bus, resource_size_t min_size,
 	resource_size_t min_align, align;
 
 	if (!b_res)
- 		return;
+		return;
 
 	min_align = window_alignment(bus, IORESOURCE_IO);
 	list_for_each_entry(dev, &bus->devices, bus_list) {
@@ -950,7 +950,7 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask,
 			if (realloc_head && i >= PCI_IOV_RESOURCES &&
 					i <= PCI_IOV_RESOURCE_END) {
 				r->end = r->start - 1;
-				add_to_list(realloc_head, dev, r, r_size, 0/* dont' care */);
+				add_to_list(realloc_head, dev, r, r_size, 0/* don't care */);
 				children_add_size += r_size;
 				continue;
 			}
@@ -1456,8 +1456,8 @@ static enum enable_type pci_realloc_detect(struct pci_bus *bus,
 
 /*
  * first try will not touch pci bridge res
- * second  and later try will clear small leaf bridge res
- * will stop till to the max  deepth if can not find good one
+ * second and later try will clear small leaf bridge res
+ * will stop till to the max depth if can not find good one
  */
 void pci_assign_unassigned_root_bus_resources(struct pci_bus *bus)
 {
diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c
index 07f2eddc09ce..83c4d3bc47ab 100644
--- a/drivers/pci/setup-res.c
+++ b/drivers/pci/setup-res.c
@@ -159,7 +159,7 @@ resource_size_t __weak pcibios_retrieve_fw_addr(struct pci_dev *dev, int idx)
 	return 0;
 }
 
-static int pci_revert_fw_address(struct resource *res, struct pci_dev *dev, 
+static int pci_revert_fw_address(struct resource *res, struct pci_dev *dev,
 		int resno, resource_size_t size)
 {
 	struct resource *root, *conflict;
diff --git a/drivers/pci/slot.c b/drivers/pci/slot.c
index c1e9284a677b..448ca562d1f8 100644
--- a/drivers/pci/slot.c
+++ b/drivers/pci/slot.c
@@ -53,7 +53,7 @@ static ssize_t address_read_file(struct pci_slot *slot, char *buf)
 static const char *pci_bus_speed_strings[] = {
 	"33 MHz PCI",		/* 0x00 */
 	"66 MHz PCI",		/* 0x01 */
-	"66 MHz PCI-X", 	/* 0x02 */
+	"66 MHz PCI-X",		/* 0x02 */
 	"100 MHz PCI-X",	/* 0x03 */
 	"133 MHz PCI-X",	/* 0x04 */
 	NULL,			/* 0x05 */
diff --git a/drivers/pci/syscall.c b/drivers/pci/syscall.c
index e1c1ec540893..24750a1b39b6 100644
--- a/drivers/pci/syscall.c
+++ b/drivers/pci/syscall.c
@@ -44,7 +44,7 @@ SYSCALL_DEFINE5(pciconfig_read, unsigned long, bus, unsigned long, dfn,
 	default:
 		err = -EINVAL;
 		goto error;
-	};
+	}
 
 	err = -EIO;
 	if (cfg_ret != PCIBIOS_SUCCESSFUL)
diff --git a/include/linux/msi.h b/include/linux/msi.h
index 87cce50bd121..009b02481436 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -26,11 +26,11 @@ struct msi_desc {
 	struct {
 		__u8	is_msix	: 1;
 		__u8	multiple: 3;	/* log2 number of messages */
-		__u8	maskbit	: 1; 	/* mask-pending bit supported ?   */
-		__u8	is_64	: 1;	/* Address size: 0=32bit 1=64bit  */
-		__u8	pos;	 	/* Location of the msi capability */
-		__u16	entry_nr;    	/* specific enabled entry 	  */
-		unsigned default_irq;	/* default pre-assigned irq	  */
+		__u8	maskbit	: 1;	/* mask-pending bit supported ? */
+		__u8	is_64	: 1;	/* Address size: 0=32bit 1=64bit */
+		__u8	pos;		/* Location of the msi capability */
+		__u16	entry_nr;	/* specific enabled entry */
+		unsigned default_irq;	/* default pre-assigned irq */
 	} msi_attrib;
 
 	u32 masked;			/* mask bits */
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 835ec7bf6c05..1084a15175e0 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -32,7 +32,6 @@
 #include <linux/irqreturn.h>
 #include <uapi/linux/pci.h>
 
-/* Include the ID list */
 #include <linux/pci_ids.h>
 
 /*
@@ -42,9 +41,10 @@
  *
  *	7:3 = slot
  *	2:0 = function
- * PCI_DEVFN(), PCI_SLOT(), and PCI_FUNC() are defined uapi/linux/pci.h
+ *
+ * PCI_DEVFN(), PCI_SLOT(), and PCI_FUNC() are defined in uapi/linux/pci.h.
  * In the interest of not exposing interfaces to user-space unnecessarily,
- * the following kernel only defines are being added here.
+ * the following kernel-only defines are being added here.
  */
 #define PCI_DEVID(bus, devfn)  ((((u16)bus) << 8) | devfn)
 /* return bus from PCI devid = ((u16)bus_number) << 8) | devfn */
@@ -153,10 +153,10 @@ enum pcie_reset_state {
 	/* Reset is NOT asserted (Use to deassert reset) */
 	pcie_deassert_reset = (__force pcie_reset_state_t) 1,
 
-	/* Use #PERST to reset PCI-E device */
+	/* Use #PERST to reset PCIe device */
 	pcie_warm_reset = (__force pcie_reset_state_t) 2,
 
-	/* Use PCI-E Hot Reset to reset device */
+	/* Use PCIe Hot Reset to reset device */
 	pcie_hot_reset = (__force pcie_reset_state_t) 3
 };
 
@@ -259,13 +259,13 @@ struct pci_dev {
 	unsigned int	class;		/* 3 bytes: (base,sub,prog-if) */
 	u8		revision;	/* PCI revision, low byte of class word */
 	u8		hdr_type;	/* PCI header type (`multi' flag masked out) */
-	u8		pcie_cap;	/* PCI-E capability offset */
+	u8		pcie_cap;	/* PCIe capability offset */
 	u8		msi_cap;	/* MSI capability offset */
 	u8		msix_cap;	/* MSI-X capability offset */
-	u8		pcie_mpss:3;	/* PCI-E Max Payload Size Supported */
+	u8		pcie_mpss:3;	/* PCIe Max Payload Size Supported */
 	u8		rom_base_reg;	/* which config register controls the ROM */
-	u8		pin;  		/* which interrupt pin this device uses */
-	u16		pcie_flags_reg;	/* cached PCI-E Capabilities Register */
+	u8		pin;		/* which interrupt pin this device uses */
+	u16		pcie_flags_reg;	/* cached PCIe Capabilities Register */
 
 	struct pci_driver *driver;	/* which driver has allocated this device */
 	u64		dma_mask;	/* Mask of the bits of bus address this
@@ -300,7 +300,7 @@ struct pci_dev {
 	unsigned int	d3cold_delay;	/* D3cold->D0 transition time in ms */
 
 #ifdef CONFIG_PCIEASPM
-	struct pcie_link_state	*link_state;	/* ASPM link state. */
+	struct pcie_link_state	*link_state;	/* ASPM link state */
 #endif
 
 	pci_channel_state_t error_state;	/* current connectivity state */
@@ -317,7 +317,7 @@ struct pci_dev {
 
 	bool match_driver;		/* Skip attaching driver */
 	/* These fields are used by common fixups */
-	unsigned int	transparent:1;	/* Transparent PCI bridge */
+	unsigned int	transparent:1;	/* Subtractive decode PCI bridge */
 	unsigned int	multifunction:1;/* Part of multi-function device */
 	/* keep track of device state */
 	unsigned int	is_added:1;
@@ -326,7 +326,7 @@ struct pci_dev {
 	unsigned int	block_cfg_access:1;	/* config space access is blocked */
 	unsigned int	broken_parity_status:1;	/* Device generates false positive parity */
 	unsigned int	irq_reroute_variant:2;	/* device needs IRQ rerouting variant */
-	unsigned int 	msi_enabled:1;
+	unsigned int	msi_enabled:1;
 	unsigned int	msix_enabled:1;
 	unsigned int	ari_enabled:1;	/* ARI forwarding */
 	unsigned int	is_managed:1;
@@ -371,7 +371,6 @@ static inline struct pci_dev *pci_physfn(struct pci_dev *dev)
 	if (dev->is_virtfn)
 		dev = dev->physfn;
 #endif
-
 	return dev;
 }
 
@@ -456,7 +455,7 @@ struct pci_bus {
 	char		name[48];
 
 	unsigned short  bridge_ctl;	/* manage NO_ISA/FBB/et al behaviors */
-	pci_bus_flags_t bus_flags;	/* Inherited by child busses */
+	pci_bus_flags_t bus_flags;	/* inherited by child buses */
 	struct device		*bridge;
 	struct device		dev;
 	struct bin_attribute	*legacy_io; /* legacy I/O for this bus */
@@ -468,7 +467,7 @@ struct pci_bus {
 #define to_pci_bus(n)	container_of(n, struct pci_bus, dev)
 
 /*
- * Returns true if the pci bus is root (behind host-pci bridge),
+ * Returns true if the PCI bus is root (behind host-PCI bridge),
  * false otherwise
  *
  * Some code assumes that "bus->self == NULL" means that bus is a root bus.
@@ -510,7 +509,7 @@ static inline bool pci_dev_msi_enabled(struct pci_dev *pci_dev) { return false;
 #define PCIBIOS_BUFFER_TOO_SMALL	0x89
 
 /*
- * Translate above to generic errno for passing back through non-pci.
+ * Translate above to generic errno for passing back through non-PCI code.
  */
 static inline int pcibios_err_to_errno(int err)
 {
@@ -561,11 +560,12 @@ struct pci_dynids {
 	struct list_head list;      /* for IDs added at runtime */
 };
 
-/* ---------------------------------------------------------------- */
-/** PCI Error Recovery System (PCI-ERS).  If a PCI device driver provides
- *  a set of callbacks in struct pci_error_handlers, then that device driver
- *  will be notified of PCI bus errors, and will be driven to recovery
- *  when an error occurs.
+
+/*
+ * PCI Error Recovery System (PCI-ERS).  If a PCI device driver provides
+ * a set of callbacks in struct pci_error_handlers, that device driver
+ * will be notified of PCI bus errors, and will be driven to recovery
+ * when an error occurs.
  */
 
 typedef unsigned int __bitwise pci_ers_result_t;
@@ -609,7 +609,6 @@ struct pci_error_handlers {
 	void (*resume)(struct pci_dev *dev);
 };
 
-/* ---------------------------------------------------------------- */
 
 struct module;
 struct pci_driver {
@@ -713,10 +712,10 @@ extern enum pcie_bus_config_types pcie_bus_config;
 
 extern struct bus_type pci_bus_type;
 
-/* Do NOT directly access these two variables, unless you are arch specific pci
- * code, or pci core code. */
+/* Do NOT directly access these two variables, unless you are arch-specific PCI
+ * code, or PCI core code. */
 extern struct list_head pci_root_buses;	/* list of all known PCI buses */
-/* Some device drivers need know if pci is initiated */
+/* Some device drivers need know if PCI is initiated */
 int no_pci_devices(void);
 
 void pcibios_resource_survey_bus(struct pci_bus *bus);
@@ -724,7 +723,7 @@ void pcibios_add_bus(struct pci_bus *bus);
 void pcibios_remove_bus(struct pci_bus *bus);
 void pcibios_fixup_bus(struct pci_bus *);
 int __must_check pcibios_enable_device(struct pci_dev *, int mask);
-/* Architecture specific versions may override this (weak) */
+/* Architecture-specific versions may override this (weak) */
 char *pcibios_setup(char *str);
 
 /* Used only when drivers/pci/setup.c is used */
@@ -1258,7 +1257,7 @@ void pci_cfg_access_unlock(struct pci_dev *dev);
 
 /*
  * PCI domain support.  Sometimes called PCI segment (eg by ACPI),
- * a PCI domain is defined to be a set of PCI busses which share
+ * a PCI domain is defined to be a set of PCI buses which share
  * configuration space.
  */
 #ifdef CONFIG_PCI_DOMAINS
@@ -1672,7 +1671,7 @@ extern u8 pci_cache_line_size;
 extern unsigned long pci_hotplug_io_size;
 extern unsigned long pci_hotplug_mem_size;
 
-/* Architecture specific versions may override these (weak) */
+/* Architecture-specific versions may override these (weak) */
 int pcibios_add_platform_entries(struct pci_dev *dev);
 void pcibios_disable_device(struct pci_dev *dev);
 void pcibios_set_master(struct pci_dev *dev);
diff --git a/include/linux/pci_hotplug.h b/include/linux/pci_hotplug.h
index 430dd963707b..a2e2f1d17e16 100644
--- a/include/linux/pci_hotplug.h
+++ b/include/linux/pci_hotplug.h
@@ -39,8 +39,8 @@
  * @hardware_test: Called to run a specified hardware test on the specified
  * slot.
  * @get_power_status: Called to get the current power status of a slot.
- * 	If this field is NULL, the value passed in the struct hotplug_slot_info
- * 	will be used when this value is requested by a user.
+ *	If this field is NULL, the value passed in the struct hotplug_slot_info
+ *	will be used when this value is requested by a user.
  * @get_attention_status: Called to get the current attention status of a slot.
  *	If this field is NULL, the value passed in the struct hotplug_slot_info
  *	will be used when this value is requested by a user.
@@ -191,4 +191,3 @@ static inline int pci_get_hp_params(struct pci_dev *dev,
 
 void pci_configure_slot(struct pci_dev *dev);
 #endif
-
diff --git a/include/linux/pcieport_if.h b/include/linux/pcieport_if.h
index 9572669eea97..4f1089f2cc98 100644
--- a/include/linux/pcieport_if.h
+++ b/include/linux/pcieport_if.h
@@ -23,7 +23,7 @@
 #define PCIE_PORT_SERVICE_VC		(1 << PCIE_PORT_SERVICE_VC_SHIFT)
 
 struct pcie_device {
-	int 		irq;	    /* Service IRQ/MSI/MSI-X Vector */
+	int		irq;	    /* Service IRQ/MSI/MSI-X Vector */
 	struct pci_dev *port;	    /* Root/Upstream/Downstream Port */
 	u32		service;    /* Port service this device represents */
 	void		*priv_data; /* Service Private Data */
diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
index 0890556f779e..4a98e85438a7 100644
--- a/include/uapi/linux/pci_regs.h
+++ b/include/uapi/linux/pci_regs.h
@@ -13,10 +13,10 @@
  *	PCI to PCI Bridge Specification
  *	PCI System Design Guide
  *
- * 	For hypertransport information, please consult the following manuals
- * 	from http://www.hypertransport.org
+ *	For HyperTransport information, please consult the following manuals
+ *	from http://www.hypertransport.org
  *
- *	The Hypertransport I/O Link Specification
+ *	The HyperTransport I/O Link Specification
  */
 
 #ifndef LINUX_PCI_REGS_H
@@ -37,7 +37,7 @@
 #define  PCI_COMMAND_INVALIDATE	0x10	/* Use memory write and invalidate */
 #define  PCI_COMMAND_VGA_PALETTE 0x20	/* Enable palette snooping */
 #define  PCI_COMMAND_PARITY	0x40	/* Enable parity checking */
-#define  PCI_COMMAND_WAIT 	0x80	/* Enable address/data stepping */
+#define  PCI_COMMAND_WAIT	0x80	/* Enable address/data stepping */
 #define  PCI_COMMAND_SERR	0x100	/* Enable SERR */
 #define  PCI_COMMAND_FAST_BACK	0x200	/* Enable back-to-back writes */
 #define  PCI_COMMAND_INTX_DISABLE 0x400 /* INTx Emulation Disable */
@@ -45,7 +45,7 @@
 #define PCI_STATUS		0x06	/* 16 bits */
 #define  PCI_STATUS_INTERRUPT	0x08	/* Interrupt status */
 #define  PCI_STATUS_CAP_LIST	0x10	/* Support Capability List */
-#define  PCI_STATUS_66MHZ	0x20	/* Support 66 Mhz PCI 2.1 bus */
+#define  PCI_STATUS_66MHZ	0x20	/* Support 66 MHz PCI 2.1 bus */
 #define  PCI_STATUS_UDF		0x40	/* Support User Definable Features [obsolete] */
 #define  PCI_STATUS_FAST_BACK	0x80	/* Accept fast-back to back */
 #define  PCI_STATUS_PARITY	0x100	/* Detected parity error */
@@ -205,14 +205,14 @@
 #define  PCI_CAP_ID_CHSWP	0x06	/* CompactPCI HotSwap */
 #define  PCI_CAP_ID_PCIX	0x07	/* PCI-X */
 #define  PCI_CAP_ID_HT		0x08	/* HyperTransport */
-#define  PCI_CAP_ID_VNDR	0x09	/* Vendor specific */
+#define  PCI_CAP_ID_VNDR	0x09	/* Vendor-Specific */
 #define  PCI_CAP_ID_DBG		0x0A	/* Debug port */
 #define  PCI_CAP_ID_CCRC	0x0B	/* CompactPCI Central Resource Control */
-#define  PCI_CAP_ID_SHPC 	0x0C	/* PCI Standard Hot-Plug Controller */
+#define  PCI_CAP_ID_SHPC	0x0C	/* PCI Standard Hot-Plug Controller */
 #define  PCI_CAP_ID_SSVID	0x0D	/* Bridge subsystem vendor/device ID */
 #define  PCI_CAP_ID_AGP3	0x0E	/* AGP Target PCI-PCI bridge */
 #define  PCI_CAP_ID_SECDEV	0x0F	/* Secure Device */
-#define  PCI_CAP_ID_EXP 	0x10	/* PCI Express */
+#define  PCI_CAP_ID_EXP		0x10	/* PCI Express */
 #define  PCI_CAP_ID_MSIX	0x11	/* MSI-X */
 #define  PCI_CAP_ID_SATA	0x12	/* SATA Data/Index Conf. */
 #define  PCI_CAP_ID_AF		0x13	/* PCI Advanced Features */
@@ -268,8 +268,8 @@
 #define  PCI_AGP_COMMAND_RQ_MASK 0xff000000  /* Master: Maximum number of requests */
 #define  PCI_AGP_COMMAND_SBA	0x0200	/* Sideband addressing enabled */
 #define  PCI_AGP_COMMAND_AGP	0x0100	/* Allow processing of AGP transactions */
-#define  PCI_AGP_COMMAND_64BIT	0x0020 	/* Allow processing of 64-bit addresses */
-#define  PCI_AGP_COMMAND_FW	0x0010 	/* Force FW transfers */
+#define  PCI_AGP_COMMAND_64BIT	0x0020	/* Allow processing of 64-bit addresses */
+#define  PCI_AGP_COMMAND_FW	0x0010	/* Force FW transfers */
 #define  PCI_AGP_COMMAND_RATE4	0x0004	/* Use 4x rate */
 #define  PCI_AGP_COMMAND_RATE2	0x0002	/* Use 2x rate */
 #define  PCI_AGP_COMMAND_RATE1	0x0001	/* Use 1x rate */
@@ -321,7 +321,7 @@
 #define  PCI_MSIX_PBA_OFFSET	0xfffffff8 /* Offset into specified BAR */
 #define PCI_CAP_MSIX_SIZEOF	12	/* size of MSIX registers */
 
-/* MSI-X entry's format */
+/* MSI-X Table entry format */
 #define PCI_MSIX_ENTRY_SIZE		16
 #define  PCI_MSIX_ENTRY_LOWER_ADDR	0
 #define  PCI_MSIX_ENTRY_UPPER_ADDR	4
@@ -372,7 +372,7 @@
 #define  PCI_X_CMD_SPLIT_16	0x0060	/* Max 16 */
 #define  PCI_X_CMD_SPLIT_32	0x0070	/* Max 32 */
 #define  PCI_X_CMD_MAX_SPLIT	0x0070	/* Max Outstanding Split Transactions */
-#define  PCI_X_CMD_VERSION(x) 	(((x) >> 12) & 3) /* Version */
+#define  PCI_X_CMD_VERSION(x)	(((x) >> 12) & 3) /* Version */
 #define PCI_X_STATUS		4	/* PCI-X capabilities */
 #define  PCI_X_STATUS_DEVFN	0x000000ff	/* A copy of devfn */
 #define  PCI_X_STATUS_BUS	0x0000ff00	/* A copy of bus nr */
@@ -407,8 +407,8 @@
 
 /* PCI Bridge Subsystem ID registers */
 
-#define PCI_SSVID_VENDOR_ID     4	/* PCI-Bridge subsystem vendor id register */
-#define PCI_SSVID_DEVICE_ID     6	/* PCI-Bridge subsystem device id register */
+#define PCI_SSVID_VENDOR_ID     4	/* PCI Bridge subsystem vendor ID */
+#define PCI_SSVID_DEVICE_ID     6	/* PCI Bridge subsystem device ID */
 
 /* PCI Express capability registers */
 
@@ -484,12 +484,12 @@
 #define  PCI_EXP_LNKCTL_CLKREQ_EN 0x0100 /* Enable clkreq */
 #define  PCI_EXP_LNKCTL_HAWD	0x0200	/* Hardware Autonomous Width Disable */
 #define  PCI_EXP_LNKCTL_LBMIE	0x0400	/* Link Bandwidth Management Interrupt Enable */
-#define  PCI_EXP_LNKCTL_LABIE	0x0800	/* Lnk Autonomous Bandwidth Interrupt Enable */
+#define  PCI_EXP_LNKCTL_LABIE	0x0800	/* Link Autonomous Bandwidth Interrupt Enable */
 #define PCI_EXP_LNKSTA		18	/* Link Status */
 #define  PCI_EXP_LNKSTA_CLS	0x000f	/* Current Link Speed */
 #define  PCI_EXP_LNKSTA_CLS_2_5GB 0x0001 /* Current Link Speed 2.5GT/s */
 #define  PCI_EXP_LNKSTA_CLS_5_0GB 0x0002 /* Current Link Speed 5.0GT/s */
-#define  PCI_EXP_LNKSTA_NLW	0x03f0	/* Nogotiated Link Width */
+#define  PCI_EXP_LNKSTA_NLW	0x03f0	/* Negotiated Link Width */
 #define  PCI_EXP_LNKSTA_NLW_SHIFT 4	/* start of NLW mask in link status */
 #define  PCI_EXP_LNKSTA_LT	0x0800	/* Link Training */
 #define  PCI_EXP_LNKSTA_SLC	0x1000	/* Slot Clock Configuration */
@@ -593,7 +593,7 @@
 #define PCI_EXT_CAP_ID_MFVC	0x08	/* Multi-Function VC Capability */
 #define PCI_EXT_CAP_ID_VC9	0x09	/* same as _VC */
 #define PCI_EXT_CAP_ID_RCRB	0x0A	/* Root Complex RB? */
-#define PCI_EXT_CAP_ID_VNDR	0x0B	/* Vendor Specific */
+#define PCI_EXT_CAP_ID_VNDR	0x0B	/* Vendor-Specific */
 #define PCI_EXT_CAP_ID_CAC	0x0C	/* Config Access - obsolete */
 #define PCI_EXT_CAP_ID_ACS	0x0D	/* Access Control Services */
 #define PCI_EXT_CAP_ID_ARI	0x0E	/* Alternate Routing ID */
@@ -602,12 +602,12 @@
 #define PCI_EXT_CAP_ID_MRIOV	0x11	/* Multi Root I/O Virtualization */
 #define PCI_EXT_CAP_ID_MCAST	0x12	/* Multicast */
 #define PCI_EXT_CAP_ID_PRI	0x13	/* Page Request Interface */
-#define PCI_EXT_CAP_ID_AMD_XXX	0x14	/* reserved for AMD */
-#define PCI_EXT_CAP_ID_REBAR	0x15	/* resizable BAR */
-#define PCI_EXT_CAP_ID_DPA	0x16	/* dynamic power alloc */
-#define PCI_EXT_CAP_ID_TPH	0x17	/* TPH request */
-#define PCI_EXT_CAP_ID_LTR	0x18	/* latency tolerance reporting */
-#define PCI_EXT_CAP_ID_SECPCI	0x19	/* Secondary PCIe */
+#define PCI_EXT_CAP_ID_AMD_XXX	0x14	/* Reserved for AMD */
+#define PCI_EXT_CAP_ID_REBAR	0x15	/* Resizable BAR */
+#define PCI_EXT_CAP_ID_DPA	0x16	/* Dynamic Power Allocation */
+#define PCI_EXT_CAP_ID_TPH	0x17	/* TPH Requester */
+#define PCI_EXT_CAP_ID_LTR	0x18	/* Latency Tolerance Reporting */
+#define PCI_EXT_CAP_ID_SECPCI	0x19	/* Secondary PCIe Capability */
 #define PCI_EXT_CAP_ID_PMUX	0x1A	/* Protocol Multiplexing */
 #define PCI_EXT_CAP_ID_PASID	0x1B	/* Process Address Space ID */
 #define PCI_EXT_CAP_ID_MAX	PCI_EXT_CAP_ID_PASID
@@ -667,9 +667,9 @@
 #define PCI_ERR_ROOT_COR_RCV		0x00000001	/* ERR_COR Received */
 /* Multi ERR_COR Received */
 #define PCI_ERR_ROOT_MULTI_COR_RCV	0x00000002
-/* ERR_FATAL/NONFATAL Recevied */
+/* ERR_FATAL/NONFATAL Received */
 #define PCI_ERR_ROOT_UNCOR_RCV		0x00000004
-/* Multi ERR_FATAL/NONFATAL Recevied */
+/* Multi ERR_FATAL/NONFATAL Received */
 #define PCI_ERR_ROOT_MULTI_UNCOR_RCV	0x00000008
 #define PCI_ERR_ROOT_FIRST_FATAL	0x00000010	/* First Fatal */
 #define PCI_ERR_ROOT_NONFATAL_RCV	0x00000020	/* Non-Fatal Received */
@@ -678,7 +678,7 @@
 
 /* Virtual Channel */
 #define PCI_VC_PORT_REG1	4
-#define  PCI_VC_REG1_EVCC	0x7	/* extended vc count */
+#define  PCI_VC_REG1_EVCC	0x7	/* extended VC count */
 #define PCI_VC_PORT_REG2	8
 #define  PCI_VC_REG2_32_PHASE	0x2
 #define  PCI_VC_REG2_64_PHASE	0x4
@@ -711,7 +711,7 @@
 #define  PCI_VNDR_HEADER_LEN(x)	(((x) >> 20) & 0xfff)
 
 /*
- * Hypertransport sub capability types
+ * HyperTransport sub capability types
  *
  * Unfortunately there are both 3 bit and 5 bit capability types defined
  * in the HT spec, catering for that is a little messy. You probably don't
@@ -739,8 +739,8 @@
 #define HT_CAPTYPE_DIRECT_ROUTE	0xB0	/* Direct routing configuration */
 #define HT_CAPTYPE_VCSET	0xB8	/* Virtual Channel configuration */
 #define HT_CAPTYPE_ERROR_RETRY	0xC0	/* Retry on error configuration */
-#define HT_CAPTYPE_GEN3		0xD0	/* Generation 3 hypertransport configuration */
-#define HT_CAPTYPE_PM		0xE0	/* Hypertransport powermanagement configuration */
+#define HT_CAPTYPE_GEN3		0xD0	/* Generation 3 HyperTransport configuration */
+#define HT_CAPTYPE_PM		0xE0	/* HyperTransport power management configuration */
 #define HT_CAP_SIZEOF_LONG	28	/* slave & primary */
 #define HT_CAP_SIZEOF_SHORT	24	/* host & secondary */
 
@@ -777,14 +777,14 @@
 #define PCI_PRI_ALLOC_REQ	0x0c	/* PRI max reqs allowed */
 #define PCI_EXT_CAP_PRI_SIZEOF	16
 
-/* PASID capability */
+/* Process Address Space ID */
 #define PCI_PASID_CAP		0x04    /* PASID feature register */
 #define  PCI_PASID_CAP_EXEC	0x02	/* Exec permissions Supported */
-#define  PCI_PASID_CAP_PRIV	0x04	/* Priviledge Mode Supported */
+#define  PCI_PASID_CAP_PRIV	0x04	/* Privilege Mode Supported */
 #define PCI_PASID_CTRL		0x06    /* PASID control register */
 #define  PCI_PASID_CTRL_ENABLE	0x01	/* Enable bit */
 #define  PCI_PASID_CTRL_EXEC	0x02	/* Exec permissions Enable */
-#define  PCI_PASID_CTRL_PRIV	0x04	/* Priviledge Mode Enable */
+#define  PCI_PASID_CTRL_PRIV	0x04	/* Privilege Mode Enable */
 #define PCI_EXT_CAP_PASID_SIZEOF	8
 
 /* Single Root I/O Virtualization */
@@ -839,22 +839,22 @@
 #define PCI_ACS_CTRL		0x06	/* ACS Control Register */
 #define PCI_ACS_EGRESS_CTL_V	0x08	/* ACS Egress Control Vector */
 
-#define PCI_VSEC_HDR		4	/* extended cap - vendor specific */
+#define PCI_VSEC_HDR		4	/* extended cap - vendor-specific */
 #define  PCI_VSEC_HDR_LEN_SHIFT	20	/* shift for length field */
 
-/* sata capability */
+/* SATA capability */
 #define PCI_SATA_REGS		4	/* SATA REGs specifier */
 #define  PCI_SATA_REGS_MASK	0xF	/* location - BAR#/inline */
 #define  PCI_SATA_REGS_INLINE	0xF	/* REGS in config space */
 #define PCI_SATA_SIZEOF_SHORT	8
 #define PCI_SATA_SIZEOF_LONG	16
 
-/* resizable BARs */
+/* Resizable BARs */
 #define PCI_REBAR_CTRL		8	/* control register */
 #define  PCI_REBAR_CTRL_NBAR_MASK	(7 << 5)	/* mask for # bars */
 #define  PCI_REBAR_CTRL_NBAR_SHIFT	5	/* shift for # bars */
 
-/* dynamic power allocation */
+/* Dynamic Power Allocation */
 #define PCI_DPA_CAP		4	/* capability register */
 #define  PCI_DPA_CAP_SUBSTATE_MASK	0x1F	/* # substates - 1 */
 #define PCI_DPA_BASE_SIZEOF	16	/* size with 0 substates */
-- 
cgit v1.2.3


From 8971646294bda65f8666b60cb2cb3d5e172c99bf Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Fri, 18 Oct 2013 19:35:25 +0200
Subject: async_memcpy: convert to dmaengine_unmap_data

Use the generic unmap object to unmap dma buffers.

Cc: Vinod Koul <vinod.koul@intel.com>
Cc: Tomasz Figa <t.figa@samsung.com>
Cc: Dave Jiang <dave.jiang@intel.com>
Reported-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
[bzolnier: add missing unmap->len initialization]
[bzolnier: fix whitespace damage]
Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
[djbw: add DMA_ENGINE=n support]
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 crypto/async_tx/async_memcpy.c | 40 +++++++++++++++++++++++-----------------
 drivers/dma/dmaengine.c        |  3 ++-
 include/linux/dmaengine.h      | 17 +++++++++++++++++
 3 files changed, 42 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/crypto/async_tx/async_memcpy.c b/crypto/async_tx/async_memcpy.c
index 9e62feffb374..72750214f779 100644
--- a/crypto/async_tx/async_memcpy.c
+++ b/crypto/async_tx/async_memcpy.c
@@ -50,33 +50,37 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
 						      &dest, 1, &src, 1, len);
 	struct dma_device *device = chan ? chan->device : NULL;
 	struct dma_async_tx_descriptor *tx = NULL;
+	struct dmaengine_unmap_data *unmap = NULL;
 
-	if (device && is_dma_copy_aligned(device, src_offset, dest_offset, len)) {
-		dma_addr_t dma_dest, dma_src;
-		unsigned long dma_prep_flags = 0;
+	if (device)
+		unmap = dmaengine_get_unmap_data(device->dev, 2, GFP_NOIO);
+
+	if (unmap && is_dma_copy_aligned(device, src_offset, dest_offset, len)) {
+		unsigned long dma_prep_flags = DMA_COMPL_SKIP_SRC_UNMAP |
+					       DMA_COMPL_SKIP_DEST_UNMAP;
 
 		if (submit->cb_fn)
 			dma_prep_flags |= DMA_PREP_INTERRUPT;
 		if (submit->flags & ASYNC_TX_FENCE)
 			dma_prep_flags |= DMA_PREP_FENCE;
-		dma_dest = dma_map_page(device->dev, dest, dest_offset, len,
-					DMA_FROM_DEVICE);
-
-		dma_src = dma_map_page(device->dev, src, src_offset, len,
-				       DMA_TO_DEVICE);
-
-		tx = device->device_prep_dma_memcpy(chan, dma_dest, dma_src,
-						    len, dma_prep_flags);
-		if (!tx) {
-			dma_unmap_page(device->dev, dma_dest, len,
-				       DMA_FROM_DEVICE);
-			dma_unmap_page(device->dev, dma_src, len,
-				       DMA_TO_DEVICE);
-		}
+
+		unmap->to_cnt = 1;
+		unmap->addr[0] = dma_map_page(device->dev, src, src_offset, len,
+					      DMA_TO_DEVICE);
+		unmap->from_cnt = 1;
+		unmap->addr[1] = dma_map_page(device->dev, dest, dest_offset, len,
+					      DMA_FROM_DEVICE);
+		unmap->len = len;
+
+		tx = device->device_prep_dma_memcpy(chan, unmap->addr[1],
+						    unmap->addr[0], len,
+						    dma_prep_flags);
 	}
 
 	if (tx) {
 		pr_debug("%s: (async) len: %zu\n", __func__, len);
+
+		dma_set_unmap(tx, unmap);
 		async_tx_submit(chan, tx, submit);
 	} else {
 		void *dest_buf, *src_buf;
@@ -96,6 +100,8 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
 		async_tx_sync_epilog(submit);
 	}
 
+	dmaengine_unmap_put(unmap);
+
 	return tx;
 }
 EXPORT_SYMBOL_GPL(async_memcpy);
diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index e721a1caff7f..54138b57b37c 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -1009,7 +1009,7 @@ static int __init dmaengine_init_unmap_pool(void)
 	return -ENOMEM;
 }
 
-static struct dmaengine_unmap_data *
+struct dmaengine_unmap_data *
 dmaengine_get_unmap_data(struct device *dev, int nr, gfp_t flags)
 {
 	struct dmaengine_unmap_data *unmap;
@@ -1024,6 +1024,7 @@ dmaengine_get_unmap_data(struct device *dev, int nr, gfp_t flags)
 
 	return unmap;
 }
+EXPORT_SYMBOL(dmaengine_get_unmap_data);
 
 /**
  * dma_async_memcpy_pg_to_pg - offloaded copy from page to page
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 2fe855a7cab1..3782cdb782a8 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -457,6 +457,7 @@ struct dma_async_tx_descriptor {
 #endif
 };
 
+#ifdef CONFIG_DMA_ENGINE
 static inline void dma_set_unmap(struct dma_async_tx_descriptor *tx,
 				 struct dmaengine_unmap_data *unmap)
 {
@@ -464,7 +465,23 @@ static inline void dma_set_unmap(struct dma_async_tx_descriptor *tx,
 	tx->unmap = unmap;
 }
 
+struct dmaengine_unmap_data *
+dmaengine_get_unmap_data(struct device *dev, int nr, gfp_t flags);
 void dmaengine_unmap_put(struct dmaengine_unmap_data *unmap);
+#else
+static inline void dma_set_unmap(struct dma_async_tx_descriptor *tx,
+				 struct dmaengine_unmap_data *unmap)
+{
+}
+static inline struct dmaengine_unmap_data *
+dmaengine_get_unmap_data(struct device *dev, int nr, gfp_t flags)
+{
+	return NULL;
+}
+static inline void dmaengine_unmap_put(struct dmaengine_unmap_data *unmap)
+{
+}
+#endif
 
 static inline void dma_descriptor_unmap(struct dma_async_tx_descriptor *tx)
 {
-- 
cgit v1.2.3


From 0776ae7b89782124ddd72eafe0b1e0fdcdabe32e Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Date: Fri, 18 Oct 2013 19:35:33 +0200
Subject: dmaengine: remove DMA unmap flags

Remove no longer needed DMA unmap flags:
- DMA_COMPL_SKIP_SRC_UNMAP
- DMA_COMPL_SKIP_DEST_UNMAP
- DMA_COMPL_SRC_UNMAP_SINGLE
- DMA_COMPL_DEST_UNMAP_SINGLE

Cc: Vinod Koul <vinod.koul@intel.com>
Cc: Tomasz Figa <t.figa@samsung.com>
Cc: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Acked-by: Jon Mason <jon.mason@intel.com>
Acked-by: Mark Brown <broonie@linaro.org>
[djbw: clean up straggling skip unmap flags in ntb]
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 crypto/async_tx/async_memcpy.c           |  3 +--
 crypto/async_tx/async_pq.c               |  1 -
 crypto/async_tx/async_raid6_recov.c      |  8 ++------
 crypto/async_tx/async_xor.c              |  6 ++----
 drivers/ata/pata_arasan_cf.c             |  3 +--
 drivers/dma/dmaengine.c                  |  3 +--
 drivers/dma/dmatest.c                    |  3 +--
 drivers/dma/ioat/dma.c                   |  3 +--
 drivers/dma/ioat/dma_v3.c                | 12 +++---------
 drivers/media/platform/m2m-deinterlace.c |  3 +--
 drivers/media/platform/timblogiw.c       |  2 +-
 drivers/misc/carma/carma-fpga.c          |  3 +--
 drivers/mtd/nand/atmel_nand.c            |  3 +--
 drivers/mtd/nand/fsmc_nand.c             |  2 --
 drivers/net/ethernet/micrel/ks8842.c     |  6 ++----
 drivers/ntb/ntb_transport.c              | 11 +++--------
 drivers/spi/spi-dw-mid.c                 |  4 ++--
 include/linux/dmaengine.h                | 18 ++++--------------
 18 files changed, 27 insertions(+), 67 deletions(-)

(limited to 'include/linux')

diff --git a/crypto/async_tx/async_memcpy.c b/crypto/async_tx/async_memcpy.c
index 72750214f779..f8c0b8dbeb75 100644
--- a/crypto/async_tx/async_memcpy.c
+++ b/crypto/async_tx/async_memcpy.c
@@ -56,8 +56,7 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
 		unmap = dmaengine_get_unmap_data(device->dev, 2, GFP_NOIO);
 
 	if (unmap && is_dma_copy_aligned(device, src_offset, dest_offset, len)) {
-		unsigned long dma_prep_flags = DMA_COMPL_SKIP_SRC_UNMAP |
-					       DMA_COMPL_SKIP_DEST_UNMAP;
+		unsigned long dma_prep_flags = 0;
 
 		if (submit->cb_fn)
 			dma_prep_flags |= DMA_PREP_INTERRUPT;
diff --git a/crypto/async_tx/async_pq.c b/crypto/async_tx/async_pq.c
index 4126b56fbc01..d05327caf69d 100644
--- a/crypto/async_tx/async_pq.c
+++ b/crypto/async_tx/async_pq.c
@@ -62,7 +62,6 @@ do_async_gen_syndrome(struct dma_chan *chan,
 	dma_addr_t dma_dest[2];
 	int src_off = 0;
 
-	dma_flags |= DMA_COMPL_SKIP_SRC_UNMAP | DMA_COMPL_SKIP_DEST_UNMAP;
 	if (submit->flags & ASYNC_TX_FENCE)
 		dma_flags |= DMA_PREP_FENCE;
 
diff --git a/crypto/async_tx/async_raid6_recov.c b/crypto/async_tx/async_raid6_recov.c
index a3a72a784421..934a84981495 100644
--- a/crypto/async_tx/async_raid6_recov.c
+++ b/crypto/async_tx/async_raid6_recov.c
@@ -47,9 +47,7 @@ async_sum_product(struct page *dest, struct page **srcs, unsigned char *coef,
 		struct device *dev = dma->dev;
 		dma_addr_t pq[2];
 		struct dma_async_tx_descriptor *tx;
-		enum dma_ctrl_flags dma_flags = DMA_COMPL_SKIP_SRC_UNMAP |
-						DMA_COMPL_SKIP_DEST_UNMAP |
-						DMA_PREP_PQ_DISABLE_P;
+		enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P;
 
 		if (submit->flags & ASYNC_TX_FENCE)
 			dma_flags |= DMA_PREP_FENCE;
@@ -113,9 +111,7 @@ async_mult(struct page *dest, struct page *src, u8 coef, size_t len,
 		dma_addr_t dma_dest[2];
 		struct device *dev = dma->dev;
 		struct dma_async_tx_descriptor *tx;
-		enum dma_ctrl_flags dma_flags = DMA_COMPL_SKIP_SRC_UNMAP |
-						DMA_COMPL_SKIP_DEST_UNMAP |
-						DMA_PREP_PQ_DISABLE_P;
+		enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P;
 
 		if (submit->flags & ASYNC_TX_FENCE)
 			dma_flags |= DMA_PREP_FENCE;
diff --git a/crypto/async_tx/async_xor.c b/crypto/async_tx/async_xor.c
index d2cc77d501c7..3c562f5a60bb 100644
--- a/crypto/async_tx/async_xor.c
+++ b/crypto/async_tx/async_xor.c
@@ -41,7 +41,7 @@ do_async_xor(struct dma_chan *chan, struct dmaengine_unmap_data *unmap,
 	dma_async_tx_callback cb_fn_orig = submit->cb_fn;
 	void *cb_param_orig = submit->cb_param;
 	enum async_tx_flags flags_orig = submit->flags;
-	enum dma_ctrl_flags dma_flags;
+	enum dma_ctrl_flags dma_flags = 0;
 	int src_cnt = unmap->to_cnt;
 	int xor_src_cnt;
 	dma_addr_t dma_dest = unmap->addr[unmap->to_cnt];
@@ -55,7 +55,6 @@ do_async_xor(struct dma_chan *chan, struct dmaengine_unmap_data *unmap,
 		/* if we are submitting additional xors, leave the chain open
 		 * and clear the callback parameters
 		 */
-		dma_flags = DMA_COMPL_SKIP_SRC_UNMAP | DMA_COMPL_SKIP_DEST_UNMAP;
 		if (src_cnt > xor_src_cnt) {
 			submit->flags &= ~ASYNC_TX_ACK;
 			submit->flags |= ASYNC_TX_FENCE;
@@ -284,8 +283,7 @@ async_xor_val(struct page *dest, struct page **src_list, unsigned int offset,
 
 	if (unmap && src_cnt <= device->max_xor &&
 	    is_dma_xor_aligned(device, offset, 0, len)) {
-		unsigned long dma_prep_flags = DMA_COMPL_SKIP_SRC_UNMAP |
-					       DMA_COMPL_SKIP_DEST_UNMAP;
+		unsigned long dma_prep_flags = 0;
 		int i;
 
 		pr_debug("%s: (async) len: %zu\n", __func__, len);
diff --git a/drivers/ata/pata_arasan_cf.c b/drivers/ata/pata_arasan_cf.c
index 853f610af28f..e88690ebfd82 100644
--- a/drivers/ata/pata_arasan_cf.c
+++ b/drivers/ata/pata_arasan_cf.c
@@ -396,8 +396,7 @@ dma_xfer(struct arasan_cf_dev *acdev, dma_addr_t src, dma_addr_t dest, u32 len)
 	struct dma_async_tx_descriptor *tx;
 	struct dma_chan *chan = acdev->dma_chan;
 	dma_cookie_t cookie;
-	unsigned long flags = DMA_PREP_INTERRUPT | DMA_COMPL_SKIP_SRC_UNMAP |
-		DMA_COMPL_SKIP_DEST_UNMAP;
+	unsigned long flags = DMA_PREP_INTERRUPT;
 	int ret = 0;
 
 	tx = chan->device->device_prep_dma_memcpy(chan, dest, src, len, flags);
diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index f878c808466e..b69ac3892b86 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -1065,8 +1065,7 @@ dma_async_memcpy_pg_to_pg(struct dma_chan *chan, struct page *dest_pg,
 	unmap->addr[1] = dma_map_page(dev->dev, dest_pg, dest_off, len,
 				      DMA_FROM_DEVICE);
 	unmap->len = len;
-	flags = DMA_CTRL_ACK | DMA_COMPL_SKIP_SRC_UNMAP |
-		DMA_COMPL_SKIP_DEST_UNMAP;
+	flags = DMA_CTRL_ACK;
 	tx = dev->device_prep_dma_memcpy(chan, unmap->addr[1], unmap->addr[0],
 					 len, flags);
 
diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c
index f4a2a25fae31..5791091c13ca 100644
--- a/drivers/dma/dmatest.c
+++ b/drivers/dma/dmatest.c
@@ -599,8 +599,7 @@ static int dmatest_func(void *data)
 	/*
 	 * src and dst buffers are freed by ourselves below
 	 */
-	flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT |
-		DMA_COMPL_SKIP_SRC_UNMAP | DMA_COMPL_SKIP_DEST_UNMAP;
+	flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT;
 
 	while (!kthread_should_stop()
 	       && !(params->iterations && total_tests >= params->iterations)) {
diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c
index c123e32dbbb0..6fcf741ad91b 100644
--- a/drivers/dma/ioat/dma.c
+++ b/drivers/dma/ioat/dma.c
@@ -818,8 +818,7 @@ int ioat_dma_self_test(struct ioatdma_device *device)
 
 	dma_src = dma_map_single(dev, src, IOAT_TEST_SIZE, DMA_TO_DEVICE);
 	dma_dest = dma_map_single(dev, dest, IOAT_TEST_SIZE, DMA_FROM_DEVICE);
-	flags = DMA_COMPL_SKIP_SRC_UNMAP | DMA_COMPL_SKIP_DEST_UNMAP |
-		DMA_PREP_INTERRUPT;
+	flags = DMA_PREP_INTERRUPT;
 	tx = device->common.device_prep_dma_memcpy(dma_chan, dma_dest, dma_src,
 						   IOAT_TEST_SIZE, flags);
 	if (!tx) {
diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c
index 43386c171bba..a4798f0cc225 100644
--- a/drivers/dma/ioat/dma_v3.c
+++ b/drivers/dma/ioat/dma_v3.c
@@ -1279,9 +1279,7 @@ static int ioat_xor_val_self_test(struct ioatdma_device *device)
 					   DMA_TO_DEVICE);
 	tx = dma->device_prep_dma_xor(dma_chan, dest_dma, dma_srcs,
 				      IOAT_NUM_SRC_TEST, PAGE_SIZE,
-				      DMA_PREP_INTERRUPT |
-				      DMA_COMPL_SKIP_SRC_UNMAP |
-				      DMA_COMPL_SKIP_DEST_UNMAP);
+				      DMA_PREP_INTERRUPT);
 
 	if (!tx) {
 		dev_err(dev, "Self-test xor prep failed\n");
@@ -1342,9 +1340,7 @@ static int ioat_xor_val_self_test(struct ioatdma_device *device)
 					   DMA_TO_DEVICE);
 	tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs,
 					  IOAT_NUM_SRC_TEST + 1, PAGE_SIZE,
-					  &xor_val_result, DMA_PREP_INTERRUPT |
-					  DMA_COMPL_SKIP_SRC_UNMAP |
-					  DMA_COMPL_SKIP_DEST_UNMAP);
+					  &xor_val_result, DMA_PREP_INTERRUPT);
 	if (!tx) {
 		dev_err(dev, "Self-test zero prep failed\n");
 		err = -ENODEV;
@@ -1389,9 +1385,7 @@ static int ioat_xor_val_self_test(struct ioatdma_device *device)
 					   DMA_TO_DEVICE);
 	tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs,
 					  IOAT_NUM_SRC_TEST + 1, PAGE_SIZE,
-					  &xor_val_result, DMA_PREP_INTERRUPT |
-					  DMA_COMPL_SKIP_SRC_UNMAP |
-					  DMA_COMPL_SKIP_DEST_UNMAP);
+					  &xor_val_result, DMA_PREP_INTERRUPT);
 	if (!tx) {
 		dev_err(dev, "Self-test 2nd zero prep failed\n");
 		err = -ENODEV;
diff --git a/drivers/media/platform/m2m-deinterlace.c b/drivers/media/platform/m2m-deinterlace.c
index 540516ca872c..879ea6fdd1be 100644
--- a/drivers/media/platform/m2m-deinterlace.c
+++ b/drivers/media/platform/m2m-deinterlace.c
@@ -341,8 +341,7 @@ static void deinterlace_issue_dma(struct deinterlace_ctx *ctx, int op,
 	ctx->xt->dir = DMA_MEM_TO_MEM;
 	ctx->xt->src_sgl = false;
 	ctx->xt->dst_sgl = true;
-	flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT |
-		DMA_COMPL_SKIP_DEST_UNMAP | DMA_COMPL_SKIP_SRC_UNMAP;
+	flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT;
 
 	tx = dmadev->device_prep_interleaved_dma(chan, ctx->xt, flags);
 	if (tx == NULL) {
diff --git a/drivers/media/platform/timblogiw.c b/drivers/media/platform/timblogiw.c
index b557caf5b1a4..59a95e3ab0e3 100644
--- a/drivers/media/platform/timblogiw.c
+++ b/drivers/media/platform/timblogiw.c
@@ -565,7 +565,7 @@ static void buffer_queue(struct videobuf_queue *vq, struct videobuf_buffer *vb)
 
 	desc = dmaengine_prep_slave_sg(fh->chan,
 		buf->sg, sg_elems, DMA_DEV_TO_MEM,
-		DMA_PREP_INTERRUPT | DMA_COMPL_SKIP_SRC_UNMAP);
+		DMA_PREP_INTERRUPT);
 	if (!desc) {
 		spin_lock_irq(&fh->queue_lock);
 		list_del_init(&vb->queue);
diff --git a/drivers/misc/carma/carma-fpga.c b/drivers/misc/carma/carma-fpga.c
index 7b56563f8b74..5335104e7c84 100644
--- a/drivers/misc/carma/carma-fpga.c
+++ b/drivers/misc/carma/carma-fpga.c
@@ -631,8 +631,7 @@ static int data_submit_dma(struct fpga_device *priv, struct data_buf *buf)
 	struct dma_async_tx_descriptor *tx;
 	dma_cookie_t cookie;
 	dma_addr_t dst, src;
-	unsigned long dma_flags = DMA_COMPL_SKIP_DEST_UNMAP |
-				  DMA_COMPL_SKIP_SRC_UNMAP;
+	unsigned long dma_flags = 0;
 
 	dst_sg = buf->vb.sglist;
 	dst_nents = buf->vb.sglen;
diff --git a/drivers/mtd/nand/atmel_nand.c b/drivers/mtd/nand/atmel_nand.c
index 060feeaf6b3e..2a837cb425d7 100644
--- a/drivers/mtd/nand/atmel_nand.c
+++ b/drivers/mtd/nand/atmel_nand.c
@@ -375,8 +375,7 @@ static int atmel_nand_dma_op(struct mtd_info *mtd, void *buf, int len,
 
 	dma_dev = host->dma_chan->device;
 
-	flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT | DMA_COMPL_SKIP_SRC_UNMAP |
-		DMA_COMPL_SKIP_DEST_UNMAP;
+	flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT;
 
 	phys_addr = dma_map_single(dma_dev->dev, p, len, dir);
 	if (dma_mapping_error(dma_dev->dev, phys_addr)) {
diff --git a/drivers/mtd/nand/fsmc_nand.c b/drivers/mtd/nand/fsmc_nand.c
index 3dc1a7564d87..8b2752263db9 100644
--- a/drivers/mtd/nand/fsmc_nand.c
+++ b/drivers/mtd/nand/fsmc_nand.c
@@ -573,8 +573,6 @@ static int dma_xfer(struct fsmc_nand_data *host, void *buffer, int len,
 	dma_dev = chan->device;
 	dma_addr = dma_map_single(dma_dev->dev, buffer, len, direction);
 
-	flags |= DMA_COMPL_SKIP_SRC_UNMAP | DMA_COMPL_SKIP_DEST_UNMAP;
-
 	if (direction == DMA_TO_DEVICE) {
 		dma_src = dma_addr;
 		dma_dst = host->data_pa;
diff --git a/drivers/net/ethernet/micrel/ks8842.c b/drivers/net/ethernet/micrel/ks8842.c
index 0951f7aca1ef..822616e3c375 100644
--- a/drivers/net/ethernet/micrel/ks8842.c
+++ b/drivers/net/ethernet/micrel/ks8842.c
@@ -459,8 +459,7 @@ static int ks8842_tx_frame_dma(struct sk_buff *skb, struct net_device *netdev)
 		sg_dma_len(&ctl->sg) += 4 - sg_dma_len(&ctl->sg) % 4;
 
 	ctl->adesc = dmaengine_prep_slave_sg(ctl->chan,
-		&ctl->sg, 1, DMA_MEM_TO_DEV,
-		DMA_PREP_INTERRUPT | DMA_COMPL_SKIP_SRC_UNMAP);
+		&ctl->sg, 1, DMA_MEM_TO_DEV, DMA_PREP_INTERRUPT);
 	if (!ctl->adesc)
 		return NETDEV_TX_BUSY;
 
@@ -571,8 +570,7 @@ static int __ks8842_start_new_rx_dma(struct net_device *netdev)
 		sg_dma_len(sg) = DMA_BUFFER_SIZE;
 
 		ctl->adesc = dmaengine_prep_slave_sg(ctl->chan,
-			sg, 1, DMA_DEV_TO_MEM,
-			DMA_PREP_INTERRUPT | DMA_COMPL_SKIP_SRC_UNMAP);
+			sg, 1, DMA_DEV_TO_MEM, DMA_PREP_INTERRUPT);
 
 		if (!ctl->adesc)
 			goto out;
diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c
index 222c2baa3a4b..d0222f13d154 100644
--- a/drivers/ntb/ntb_transport.c
+++ b/drivers/ntb/ntb_transport.c
@@ -1037,7 +1037,6 @@ static void ntb_async_rx(struct ntb_queue_entry *entry, void *offset,
 	struct dmaengine_unmap_data *unmap;
 	dma_cookie_t cookie;
 	void *buf = entry->buf;
-	unsigned long flags;
 
 	entry->len = len;
 
@@ -1073,10 +1072,9 @@ static void ntb_async_rx(struct ntb_queue_entry *entry, void *offset,
 
 	unmap->from_cnt = 1;
 
-	flags = DMA_COMPL_SKIP_SRC_UNMAP | DMA_COMPL_SKIP_DEST_UNMAP |
-		DMA_PREP_INTERRUPT;
 	txd = device->device_prep_dma_memcpy(chan, unmap->addr[1],
-					     unmap->addr[0], len, flags);
+					     unmap->addr[0], len,
+					     DMA_PREP_INTERRUPT);
 	if (!txd)
 		goto err_get_unmap;
 
@@ -1266,7 +1264,6 @@ static void ntb_async_tx(struct ntb_transport_qp *qp,
 	void __iomem *offset;
 	size_t len = entry->len;
 	void *buf = entry->buf;
-	unsigned long flags;
 
 	offset = qp->tx_mw + qp->tx_max_frame * qp->tx_index;
 	hdr = offset + qp->tx_max_frame - sizeof(struct ntb_payload_header);
@@ -1301,10 +1298,8 @@ static void ntb_async_tx(struct ntb_transport_qp *qp,
 
 	unmap->to_cnt = 1;
 
-	flags = DMA_COMPL_SKIP_SRC_UNMAP | DMA_COMPL_SKIP_DEST_UNMAP |
-		DMA_PREP_INTERRUPT;
 	txd = device->device_prep_dma_memcpy(chan, dest, unmap->addr[0], len,
-					    flags);
+					     DMA_PREP_INTERRUPT);
 	if (!txd)
 		goto err_get_unmap;
 
diff --git a/drivers/spi/spi-dw-mid.c b/drivers/spi/spi-dw-mid.c
index b9f0192758d6..6d207afec8cb 100644
--- a/drivers/spi/spi-dw-mid.c
+++ b/drivers/spi/spi-dw-mid.c
@@ -150,7 +150,7 @@ static int mid_spi_dma_transfer(struct dw_spi *dws, int cs_change)
 				&dws->tx_sgl,
 				1,
 				DMA_MEM_TO_DEV,
-				DMA_PREP_INTERRUPT | DMA_COMPL_SKIP_DEST_UNMAP);
+				DMA_PREP_INTERRUPT);
 	txdesc->callback = dw_spi_dma_done;
 	txdesc->callback_param = dws;
 
@@ -173,7 +173,7 @@ static int mid_spi_dma_transfer(struct dw_spi *dws, int cs_change)
 				&dws->rx_sgl,
 				1,
 				DMA_DEV_TO_MEM,
-				DMA_PREP_INTERRUPT | DMA_COMPL_SKIP_DEST_UNMAP);
+				DMA_PREP_INTERRUPT);
 	rxdesc->callback = dw_spi_dma_done;
 	rxdesc->callback_param = dws;
 
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 3782cdb782a8..491072cb5ba0 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -171,12 +171,6 @@ struct dma_interleaved_template {
  * @DMA_CTRL_ACK - if clear, the descriptor cannot be reused until the client
  *  acknowledges receipt, i.e. has has a chance to establish any dependency
  *  chains
- * @DMA_COMPL_SKIP_SRC_UNMAP - set to disable dma-unmapping the source buffer(s)
- * @DMA_COMPL_SKIP_DEST_UNMAP - set to disable dma-unmapping the destination(s)
- * @DMA_COMPL_SRC_UNMAP_SINGLE - set to do the source dma-unmapping as single
- * 	(if not set, do the source dma-unmapping as page)
- * @DMA_COMPL_DEST_UNMAP_SINGLE - set to do the destination dma-unmapping as single
- * 	(if not set, do the destination dma-unmapping as page)
  * @DMA_PREP_PQ_DISABLE_P - prevent generation of P while generating Q
  * @DMA_PREP_PQ_DISABLE_Q - prevent generation of Q while generating P
  * @DMA_PREP_CONTINUE - indicate to a driver that it is reusing buffers as
@@ -188,14 +182,10 @@ struct dma_interleaved_template {
 enum dma_ctrl_flags {
 	DMA_PREP_INTERRUPT = (1 << 0),
 	DMA_CTRL_ACK = (1 << 1),
-	DMA_COMPL_SKIP_SRC_UNMAP = (1 << 2),
-	DMA_COMPL_SKIP_DEST_UNMAP = (1 << 3),
-	DMA_COMPL_SRC_UNMAP_SINGLE = (1 << 4),
-	DMA_COMPL_DEST_UNMAP_SINGLE = (1 << 5),
-	DMA_PREP_PQ_DISABLE_P = (1 << 6),
-	DMA_PREP_PQ_DISABLE_Q = (1 << 7),
-	DMA_PREP_CONTINUE = (1 << 8),
-	DMA_PREP_FENCE = (1 << 9),
+	DMA_PREP_PQ_DISABLE_P = (1 << 2),
+	DMA_PREP_PQ_DISABLE_Q = (1 << 3),
+	DMA_PREP_CONTINUE = (1 << 4),
+	DMA_PREP_FENCE = (1 << 5),
 };
 
 /**
-- 
cgit v1.2.3


From 7b1998116bbb2f3e5dd6cb9a8ee6db479b0b50a9 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 11 Nov 2013 22:41:56 +0100
Subject: ACPI / driver core: Store an ACPI device pointer in struct
 acpi_dev_node

Modify struct acpi_dev_node to contain a pointer to struct acpi_device
associated with the given device object (that is, its ACPI companion
device) instead of an ACPI handle corresponding to it.  Introduce two
new macros for manipulating that pointer in a CONFIG_ACPI-safe way,
ACPI_COMPANION() and ACPI_COMPANION_SET(), and rework the
ACPI_HANDLE() macro to take the above changes into account.
Drop the ACPI_HANDLE_SET() macro entirely and rework its users to
use ACPI_COMPANION_SET() instead.  For some of them who used to
pass the result of acpi_get_child() directly to ACPI_HANDLE_SET()
introduce a helper routine acpi_preset_companion() doing an
equivalent thing.

The main motivation for doing this is that there are things
represented by struct acpi_device objects that don't have valid
ACPI handles (so called fixed ACPI hardware features, such as
power and sleep buttons) and we would like to create platform
device objects for them and "glue" them to their ACPI companions
in the usual way (which currently is impossible due to the
lack of valid ACPI handles).  However, there are more reasons
why it may be useful.

First, struct acpi_device pointers allow of much better type checking
than void pointers which are ACPI handles, so it should be more
difficult to write buggy code using modified struct acpi_dev_node
and the new macros.  Second, the change should help to reduce (over
time) the number of places in which the result of ACPI_HANDLE() is
passed to acpi_bus_get_device() in order to obtain a pointer to the
struct acpi_device associated with the given "physical" device,
because now that pointer is returned by ACPI_COMPANION() directly.
Finally, the change should make it easier to write generic code that
will build both for CONFIG_ACPI set and unset without adding explicit
compiler directives to it.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Tested-by: Mika Westerberg <mika.westerberg@linux.intel.com> # on Haswell
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Reviewed-by: Aaron Lu <aaron.lu@intel.com> # for ATA and SDIO part
---
 arch/ia64/hp/common/sba_iommu.c              |  2 +-
 arch/ia64/include/asm/pci.h                  |  2 +-
 arch/ia64/pci/pci.c                          |  6 ++--
 arch/ia64/sn/kernel/io_acpi_init.c           |  4 +--
 arch/x86/include/asm/pci.h                   |  2 +-
 arch/x86/pci/acpi.c                          |  4 +--
 drivers/acpi/acpi_platform.c                 |  2 +-
 drivers/acpi/device_pm.c                     |  6 +---
 drivers/acpi/glue.c                          | 47 ++++++++++++++--------------
 drivers/ata/libata-acpi.c                    |  4 +--
 drivers/base/platform.c                      |  4 +--
 drivers/gpio/gpiolib.c                       |  1 +
 drivers/gpu/drm/radeon/radeon_atpx_handler.c |  3 +-
 drivers/hid/i2c-hid/i2c-hid.c                |  2 +-
 drivers/i2c/i2c-core.c                       |  4 +--
 drivers/ide/ide-acpi.c                       |  3 +-
 drivers/mmc/core/sdio_bus.c                  |  3 +-
 drivers/pci/hotplug/sgi_hotplug.c            |  8 ++---
 drivers/spi/spi.c                            |  2 +-
 include/acpi/acpi_bus.h                      |  2 +-
 include/linux/acpi.h                         | 15 +++++++++
 include/linux/device.h                       | 12 ++-----
 22 files changed, 70 insertions(+), 68 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c
index d43daf192b21..4c530a82fc46 100644
--- a/arch/ia64/hp/common/sba_iommu.c
+++ b/arch/ia64/hp/common/sba_iommu.c
@@ -1992,7 +1992,7 @@ sba_connect_bus(struct pci_bus *bus)
 	if (PCI_CONTROLLER(bus)->iommu)
 		return;
 
-	handle = PCI_CONTROLLER(bus)->acpi_handle;
+	handle = acpi_device_handle(PCI_CONTROLLER(bus)->companion);
 	if (!handle)
 		return;
 
diff --git a/arch/ia64/include/asm/pci.h b/arch/ia64/include/asm/pci.h
index 80775f55f03f..71fbaaa495cc 100644
--- a/arch/ia64/include/asm/pci.h
+++ b/arch/ia64/include/asm/pci.h
@@ -95,7 +95,7 @@ struct iospace_resource {
 };
 
 struct pci_controller {
-	void *acpi_handle;
+	struct acpi_device *companion;
 	void *iommu;
 	int segment;
 	int node;		/* nearest node with memory or -1 for global allocation */
diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c
index 2326790b7d8b..9e4938d8ca4d 100644
--- a/arch/ia64/pci/pci.c
+++ b/arch/ia64/pci/pci.c
@@ -436,9 +436,9 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
 	if (!controller)
 		return NULL;
 
-	controller->acpi_handle = device->handle;
+	controller->companion = device;
 
-	pxm = acpi_get_pxm(controller->acpi_handle);
+	pxm = acpi_get_pxm(device->handle);
 #ifdef CONFIG_NUMA
 	if (pxm >= 0)
 		controller->node = pxm_to_node(pxm);
@@ -489,7 +489,7 @@ int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge)
 {
 	struct pci_controller *controller = bridge->bus->sysdata;
 
-	ACPI_HANDLE_SET(&bridge->dev, controller->acpi_handle);
+	ACPI_COMPANION_SET(&bridge->dev, controller->companion);
 	return 0;
 }
 
diff --git a/arch/ia64/sn/kernel/io_acpi_init.c b/arch/ia64/sn/kernel/io_acpi_init.c
index b1725398b5af..0640739cc20c 100644
--- a/arch/ia64/sn/kernel/io_acpi_init.c
+++ b/arch/ia64/sn/kernel/io_acpi_init.c
@@ -132,7 +132,7 @@ sn_get_bussoft_ptr(struct pci_bus *bus)
 	struct acpi_resource_vendor_typed *vendor;
 
 
-	handle = PCI_CONTROLLER(bus)->acpi_handle;
+	handle = acpi_device_handle(PCI_CONTROLLER(bus)->companion);
 	status = acpi_get_vendor_resource(handle, METHOD_NAME__CRS,
 					  &sn_uuid, &buffer);
 	if (ACPI_FAILURE(status)) {
@@ -360,7 +360,7 @@ sn_acpi_get_pcidev_info(struct pci_dev *dev, struct pcidev_info **pcidev_info,
 	acpi_status status;
 	struct acpi_buffer name_buffer = { ACPI_ALLOCATE_BUFFER, NULL };
 
-	rootbus_handle = PCI_CONTROLLER(dev)->acpi_handle;
+	rootbus_handle = acpi_device_handle(PCI_CONTROLLER(dev)->companion);
         status = acpi_evaluate_integer(rootbus_handle, METHOD_NAME__SEG, NULL,
                                        &segment);
         if (ACPI_SUCCESS(status)) {
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index 7d7443283a9d..947b5c417e83 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -15,7 +15,7 @@ struct pci_sysdata {
 	int		domain;		/* PCI domain */
 	int		node;		/* NUMA node */
 #ifdef CONFIG_ACPI
-	void		*acpi;		/* ACPI-specific data */
+	struct acpi_device *companion;	/* ACPI companion device */
 #endif
 #ifdef CONFIG_X86_64
 	void		*iommu;		/* IOMMU private data */
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index 7fb24e53d4c8..4f25ec077552 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -518,7 +518,7 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
 	sd = &info->sd;
 	sd->domain = domain;
 	sd->node = node;
-	sd->acpi = device->handle;
+	sd->companion = device;
 	/*
 	 * Maybe the desired pci bus has been already scanned. In such case
 	 * it is unnecessary to scan the pci bus with the given domain,busnum.
@@ -589,7 +589,7 @@ int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge)
 {
 	struct pci_sysdata *sd = bridge->bus->sysdata;
 
-	ACPI_HANDLE_SET(&bridge->dev, sd->acpi);
+	ACPI_COMPANION_SET(&bridge->dev, sd->companion);
 	return 0;
 }
 
diff --git a/drivers/acpi/acpi_platform.c b/drivers/acpi/acpi_platform.c
index 8a4cfc7e71f0..dbfe49e5fd63 100644
--- a/drivers/acpi/acpi_platform.c
+++ b/drivers/acpi/acpi_platform.c
@@ -111,7 +111,7 @@ int acpi_create_platform_device(struct acpi_device *adev,
 	pdevinfo.id = -1;
 	pdevinfo.res = resources;
 	pdevinfo.num_res = count;
-	pdevinfo.acpi_node.handle = adev->handle;
+	pdevinfo.acpi_node.companion = adev;
 	pdev = platform_device_register_full(&pdevinfo);
 	if (IS_ERR(pdev)) {
 		dev_err(&adev->dev, "platform device creation failed: %ld\n",
diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c
index d42b2fb5a7e9..119afda0968c 100644
--- a/drivers/acpi/device_pm.c
+++ b/drivers/acpi/device_pm.c
@@ -22,16 +22,12 @@
  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  */
 
-#include <linux/device.h>
+#include <linux/acpi.h>
 #include <linux/export.h>
 #include <linux/mutex.h>
 #include <linux/pm_qos.h>
 #include <linux/pm_runtime.h>
 
-#include <acpi/acpi.h>
-#include <acpi/acpi_bus.h>
-#include <acpi/acpi_drivers.h>
-
 #include "internal.h"
 
 #define _COMPONENT	ACPI_POWER_COMPONENT
diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c
index 10f0f40587bb..782071fd3df3 100644
--- a/drivers/acpi/glue.c
+++ b/drivers/acpi/glue.c
@@ -197,30 +197,27 @@ static void acpi_physnode_link_name(char *buf, unsigned int node_id)
 
 int acpi_bind_one(struct device *dev, acpi_handle handle)
 {
-	struct acpi_device *acpi_dev;
-	acpi_status status;
+	struct acpi_device *acpi_dev = NULL;
 	struct acpi_device_physical_node *physical_node, *pn;
 	char physical_node_name[PHYSICAL_NODE_NAME_SIZE];
 	struct list_head *physnode_list;
 	unsigned int node_id;
 	int retval = -EINVAL;
 
-	if (ACPI_HANDLE(dev)) {
+	if (ACPI_COMPANION(dev)) {
 		if (handle) {
-			dev_warn(dev, "ACPI handle is already set\n");
+			dev_warn(dev, "ACPI companion already set\n");
 			return -EINVAL;
 		} else {
-			handle = ACPI_HANDLE(dev);
+			acpi_dev = ACPI_COMPANION(dev);
 		}
+	} else {
+		acpi_bus_get_device(handle, &acpi_dev);
 	}
-	if (!handle)
+	if (!acpi_dev)
 		return -EINVAL;
 
 	get_device(dev);
-	status = acpi_bus_get_device(handle, &acpi_dev);
-	if (ACPI_FAILURE(status))
-		goto err;
-
 	physical_node = kzalloc(sizeof(*physical_node), GFP_KERNEL);
 	if (!physical_node) {
 		retval = -ENOMEM;
@@ -242,7 +239,7 @@ int acpi_bind_one(struct device *dev, acpi_handle handle)
 
 			dev_warn(dev, "Already associated with ACPI node\n");
 			kfree(physical_node);
-			if (ACPI_HANDLE(dev) != handle)
+			if (ACPI_COMPANION(dev) != acpi_dev)
 				goto err;
 
 			put_device(dev);
@@ -259,8 +256,8 @@ int acpi_bind_one(struct device *dev, acpi_handle handle)
 	list_add(&physical_node->node, physnode_list);
 	acpi_dev->physical_node_count++;
 
-	if (!ACPI_HANDLE(dev))
-		ACPI_HANDLE_SET(dev, acpi_dev->handle);
+	if (!ACPI_COMPANION(dev))
+		ACPI_COMPANION_SET(dev, acpi_dev);
 
 	acpi_physnode_link_name(physical_node_name, node_id);
 	retval = sysfs_create_link(&acpi_dev->dev.kobj, &dev->kobj,
@@ -283,7 +280,7 @@ int acpi_bind_one(struct device *dev, acpi_handle handle)
 	return 0;
 
  err:
-	ACPI_HANDLE_SET(dev, NULL);
+	ACPI_COMPANION_SET(dev, NULL);
 	put_device(dev);
 	return retval;
 }
@@ -291,19 +288,12 @@ EXPORT_SYMBOL_GPL(acpi_bind_one);
 
 int acpi_unbind_one(struct device *dev)
 {
+	struct acpi_device *acpi_dev = ACPI_COMPANION(dev);
 	struct acpi_device_physical_node *entry;
-	struct acpi_device *acpi_dev;
-	acpi_status status;
 
-	if (!ACPI_HANDLE(dev))
+	if (!acpi_dev)
 		return 0;
 
-	status = acpi_bus_get_device(ACPI_HANDLE(dev), &acpi_dev);
-	if (ACPI_FAILURE(status)) {
-		dev_err(dev, "Oops, ACPI handle corrupt in %s()\n", __func__);
-		return -EINVAL;
-	}
-
 	mutex_lock(&acpi_dev->physical_node_lock);
 
 	list_for_each_entry(entry, &acpi_dev->physical_node_list, node)
@@ -316,7 +306,7 @@ int acpi_unbind_one(struct device *dev)
 			acpi_physnode_link_name(physnode_name, entry->node_id);
 			sysfs_remove_link(&acpi_dev->dev.kobj, physnode_name);
 			sysfs_remove_link(&dev->kobj, "firmware_node");
-			ACPI_HANDLE_SET(dev, NULL);
+			ACPI_COMPANION_SET(dev, NULL);
 			/* acpi_bind_one() increase refcnt by one. */
 			put_device(dev);
 			kfree(entry);
@@ -328,6 +318,15 @@ int acpi_unbind_one(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(acpi_unbind_one);
 
+void acpi_preset_companion(struct device *dev, acpi_handle parent, u64 addr)
+{
+	struct acpi_device *adev;
+
+	if (!acpi_bus_get_device(acpi_get_child(parent, addr), &adev))
+		ACPI_COMPANION_SET(dev, adev);
+}
+EXPORT_SYMBOL_GPL(acpi_preset_companion);
+
 static int acpi_platform_notify(struct device *dev)
 {
 	struct acpi_bus_type *type = acpi_get_bus_type(dev);
diff --git a/drivers/ata/libata-acpi.c b/drivers/ata/libata-acpi.c
index ab714d2ad978..4372cfa883c9 100644
--- a/drivers/ata/libata-acpi.c
+++ b/drivers/ata/libata-acpi.c
@@ -185,7 +185,7 @@ void ata_acpi_bind_port(struct ata_port *ap)
 	if (libata_noacpi || ap->flags & ATA_FLAG_ACPI_SATA || !host_handle)
 		return;
 
-	ACPI_HANDLE_SET(&ap->tdev, acpi_get_child(host_handle, ap->port_no));
+	acpi_preset_companion(&ap->tdev, host_handle, ap->port_no);
 
 	if (ata_acpi_gtm(ap, &ap->__acpi_init_gtm) == 0)
 		ap->pflags |= ATA_PFLAG_INIT_GTM_VALID;
@@ -222,7 +222,7 @@ void ata_acpi_bind_dev(struct ata_device *dev)
 		parent_handle = port_handle;
 	}
 
-	ACPI_HANDLE_SET(&dev->tdev, acpi_get_child(parent_handle, adr));
+	acpi_preset_companion(&dev->tdev, parent_handle, adr);
 
 	register_hotplug_dock_device(ata_dev_acpi_handle(dev),
 				     &ata_acpi_dev_dock_ops, dev, NULL, NULL);
diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index 47051cd25113..3a94b799f166 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -432,7 +432,7 @@ struct platform_device *platform_device_register_full(
 		goto err_alloc;
 
 	pdev->dev.parent = pdevinfo->parent;
-	ACPI_HANDLE_SET(&pdev->dev, pdevinfo->acpi_node.handle);
+	ACPI_COMPANION_SET(&pdev->dev, pdevinfo->acpi_node.companion);
 
 	if (pdevinfo->dma_mask) {
 		/*
@@ -463,7 +463,7 @@ struct platform_device *platform_device_register_full(
 	ret = platform_device_add(pdev);
 	if (ret) {
 err:
-		ACPI_HANDLE_SET(&pdev->dev, NULL);
+		ACPI_COMPANION_SET(&pdev->dev, NULL);
 		kfree(pdev->dev.dma_mask);
 
 err_alloc:
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 7dd446150294..4e10b10d3ddd 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -13,6 +13,7 @@
 #include <linux/acpi_gpio.h>
 #include <linux/idr.h>
 #include <linux/slab.h>
+#include <linux/acpi.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/gpio.h>
diff --git a/drivers/gpu/drm/radeon/radeon_atpx_handler.c b/drivers/gpu/drm/radeon/radeon_atpx_handler.c
index d96070bf8388..27a6e9dbd0c4 100644
--- a/drivers/gpu/drm/radeon/radeon_atpx_handler.c
+++ b/drivers/gpu/drm/radeon/radeon_atpx_handler.c
@@ -8,8 +8,7 @@
  */
 #include <linux/vga_switcheroo.h>
 #include <linux/slab.h>
-#include <acpi/acpi.h>
-#include <acpi/acpi_bus.h>
+#include <linux/acpi.h>
 #include <linux/pci.h>
 
 #include "radeon_acpi.h"
diff --git a/drivers/hid/i2c-hid/i2c-hid.c b/drivers/hid/i2c-hid/i2c-hid.c
index fd7ce374f812..b7ecc3623a43 100644
--- a/drivers/hid/i2c-hid/i2c-hid.c
+++ b/drivers/hid/i2c-hid/i2c-hid.c
@@ -1012,7 +1012,7 @@ static int i2c_hid_probe(struct i2c_client *client,
 	hid->hid_get_raw_report = i2c_hid_get_raw_report;
 	hid->hid_output_raw_report = i2c_hid_output_raw_report;
 	hid->dev.parent = &client->dev;
-	ACPI_HANDLE_SET(&hid->dev, ACPI_HANDLE(&client->dev));
+	ACPI_COMPANION_SET(&hid->dev, ACPI_COMPANION(&client->dev));
 	hid->bus = BUS_I2C;
 	hid->version = le16_to_cpu(ihid->hdesc.bcdVersion);
 	hid->vendor = le16_to_cpu(ihid->hdesc.wVendorID);
diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
index 75ba8608383e..f74af33f5ddc 100644
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -674,7 +674,7 @@ i2c_new_device(struct i2c_adapter *adap, struct i2c_board_info const *info)
 	client->dev.bus = &i2c_bus_type;
 	client->dev.type = &i2c_client_type;
 	client->dev.of_node = info->of_node;
-	ACPI_HANDLE_SET(&client->dev, info->acpi_node.handle);
+	ACPI_COMPANION_SET(&client->dev, info->acpi_node.companion);
 
 	/* For 10-bit clients, add an arbitrary offset to avoid collisions */
 	dev_set_name(&client->dev, "%d-%04x", i2c_adapter_id(adap),
@@ -1103,7 +1103,7 @@ static acpi_status acpi_i2c_add_device(acpi_handle handle, u32 level,
 		return AE_OK;
 
 	memset(&info, 0, sizeof(info));
-	info.acpi_node.handle = handle;
+	info.acpi_node.companion = adev;
 	info.irq = -1;
 
 	INIT_LIST_HEAD(&resource_list);
diff --git a/drivers/ide/ide-acpi.c b/drivers/ide/ide-acpi.c
index 140c8ef50529..3662c4c536d6 100644
--- a/drivers/ide/ide-acpi.c
+++ b/drivers/ide/ide-acpi.c
@@ -7,6 +7,7 @@
  * Copyright (C) 2006 Hannes Reinecke
  */
 
+#include <linux/acpi.h>
 #include <linux/ata.h>
 #include <linux/delay.h>
 #include <linux/device.h>
@@ -19,8 +20,6 @@
 #include <linux/dmi.h>
 #include <linux/module.h>
 
-#include <acpi/acpi_bus.h>
-
 #define REGS_PER_GTF		7
 
 struct GTM_buffer {
diff --git a/drivers/mmc/core/sdio_bus.c b/drivers/mmc/core/sdio_bus.c
index ef8956568c3a..157b570ba343 100644
--- a/drivers/mmc/core/sdio_bus.c
+++ b/drivers/mmc/core/sdio_bus.c
@@ -308,8 +308,7 @@ static void sdio_acpi_set_handle(struct sdio_func *func)
 	struct mmc_host *host = func->card->host;
 	u64 addr = (host->slotno << 16) | func->num;
 
-	ACPI_HANDLE_SET(&func->dev,
-			acpi_get_child(ACPI_HANDLE(host->parent), addr));
+	acpi_preset_companion(&func->dev, ACPI_HANDLE(host->parent), addr);
 }
 #else
 static inline void sdio_acpi_set_handle(struct sdio_func *func) {}
diff --git a/drivers/pci/hotplug/sgi_hotplug.c b/drivers/pci/hotplug/sgi_hotplug.c
index b2781dfe60e9..5b05a68cca6c 100644
--- a/drivers/pci/hotplug/sgi_hotplug.c
+++ b/drivers/pci/hotplug/sgi_hotplug.c
@@ -9,6 +9,7 @@
  * Work to add BIOS PROM support was completed by Mike Habeck.
  */
 
+#include <linux/acpi.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
@@ -29,7 +30,6 @@
 #include <asm/sn/sn_feature_sets.h>
 #include <asm/sn/sn_sal.h>
 #include <asm/sn/types.h>
-#include <linux/acpi.h>
 #include <asm/sn/acpi.h>
 
 #include "../pci.h"
@@ -414,7 +414,7 @@ static int enable_slot(struct hotplug_slot *bss_hotplug_slot)
 		acpi_handle rethandle;
 		acpi_status ret;
 
-		phandle = PCI_CONTROLLER(slot->pci_bus)->acpi_handle;
+		phandle = acpi_device_handle(PCI_CONTROLLER(slot->pci_bus)->companion);
 
 		if (acpi_bus_get_device(phandle, &pdevice)) {
 			dev_dbg(&slot->pci_bus->self->dev,
@@ -495,7 +495,7 @@ static int disable_slot(struct hotplug_slot *bss_hotplug_slot)
 
 	/* free the ACPI resources for the slot */
 	if (SN_ACPI_BASE_SUPPORT() &&
-            PCI_CONTROLLER(slot->pci_bus)->acpi_handle) {
+            PCI_CONTROLLER(slot->pci_bus)->companion) {
 		unsigned long long adr;
 		struct acpi_device *device;
 		acpi_handle phandle;
@@ -504,7 +504,7 @@ static int disable_slot(struct hotplug_slot *bss_hotplug_slot)
 		acpi_status ret;
 
 		/* Get the rootbus node pointer */
-		phandle = PCI_CONTROLLER(slot->pci_bus)->acpi_handle;
+		phandle = acpi_device_handle(PCI_CONTROLLER(slot->pci_bus)->companion);
 
 		acpi_scan_lock_acquire();
 		/*
diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 927998aa5e71..a968d8549ee5 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -1144,7 +1144,7 @@ static acpi_status acpi_spi_add_device(acpi_handle handle, u32 level,
 		return AE_NO_MEMORY;
 	}
 
-	ACPI_HANDLE_SET(&spi->dev, handle);
+	ACPI_COMPANION_SET(&spi->dev, adev);
 	spi->irq = -1;
 
 	INIT_LIST_HEAD(&resource_list);
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index 89c60b0f6408..7b2de026a4f3 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -431,9 +431,9 @@ static inline acpi_handle acpi_get_child(acpi_handle handle, u64 addr)
 {
 	return acpi_find_child(handle, addr, false);
 }
+void acpi_preset_companion(struct device *dev, acpi_handle parent, u64 addr);
 int acpi_is_root_bridge(acpi_handle);
 struct acpi_pci_root *acpi_pci_find_root(acpi_handle handle);
-#define DEVICE_ACPI_HANDLE(dev) ((acpi_handle)ACPI_HANDLE(dev))
 
 int acpi_enable_wakeup_device_power(struct acpi_device *dev, int state);
 int acpi_disable_wakeup_device_power(struct acpi_device *dev);
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index b0972c4ce81c..47369aadb1d1 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -44,6 +44,15 @@
 #include <acpi/acpi_numa.h>
 #include <asm/acpi.h>
 
+static inline acpi_handle acpi_device_handle(struct acpi_device *adev)
+{
+	return adev ? adev->handle : NULL;
+}
+
+#define ACPI_COMPANION(dev)		((dev)->acpi_node.companion)
+#define ACPI_COMPANION_SET(dev, adev)	ACPI_COMPANION(dev) = (adev)
+#define ACPI_HANDLE(dev)		acpi_device_handle(ACPI_COMPANION(dev))
+
 enum acpi_irq_model_id {
 	ACPI_IRQ_MODEL_PIC = 0,
 	ACPI_IRQ_MODEL_IOAPIC,
@@ -401,6 +410,10 @@ static inline bool acpi_driver_match_device(struct device *dev,
 
 #define acpi_disabled 1
 
+#define ACPI_COMPANION(dev)		(NULL)
+#define ACPI_COMPANION_SET(dev, adev)	do { } while (0)
+#define ACPI_HANDLE(dev)		(NULL)
+
 static inline void acpi_early_init(void) { }
 
 static inline int early_acpi_boot_init(void)
@@ -469,6 +482,8 @@ static inline bool acpi_driver_match_device(struct device *dev,
 
 #endif	/* !CONFIG_ACPI */
 
+#define DEVICE_ACPI_HANDLE(dev)	ACPI_HANDLE(dev)
+
 #ifdef CONFIG_ACPI
 void acpi_os_set_prepare_sleep(int (*func)(u8 sleep_state,
 			       u32 pm1a_ctrl,  u32 pm1b_ctrl));
diff --git a/include/linux/device.h b/include/linux/device.h
index b025925df7f7..952b01033c32 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -644,9 +644,11 @@ struct device_dma_parameters {
 	unsigned long segment_boundary_mask;
 };
 
+struct acpi_device;
+
 struct acpi_dev_node {
 #ifdef CONFIG_ACPI
-	void	*handle;
+	struct acpi_device *companion;
 #endif
 };
 
@@ -790,14 +792,6 @@ static inline struct device *kobj_to_dev(struct kobject *kobj)
 	return container_of(kobj, struct device, kobj);
 }
 
-#ifdef CONFIG_ACPI
-#define ACPI_HANDLE(dev)	((dev)->acpi_node.handle)
-#define ACPI_HANDLE_SET(dev, _handle_)	(dev)->acpi_node.handle = (_handle_)
-#else
-#define ACPI_HANDLE(dev)	(NULL)
-#define ACPI_HANDLE_SET(dev, _handle_)	do { } while (0)
-#endif
-
 /* Get the wakeup routines, which depend on struct device */
 #include <linux/pm_wakeup.h>
 
-- 
cgit v1.2.3


From 3a83f992490f8235661b768e53bd5f14915420ac Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Thu, 14 Nov 2013 23:17:21 +0100
Subject: ACPI: Eliminate the DEVICE_ACPI_HANDLE() macro

Since DEVICE_ACPI_HANDLE() is now literally identical to
ACPI_HANDLE(), replace it with the latter everywhere and drop its
definition from include/acpi.h.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/acpi/device_pm.c                       |  8 ++++----
 drivers/gpu/drm/i915/intel_acpi.c              |  2 +-
 drivers/gpu/drm/i915/intel_opregion.c          |  2 +-
 drivers/gpu/drm/nouveau/core/subdev/mxm/base.c |  2 +-
 drivers/gpu/drm/nouveau/nouveau_acpi.c         |  6 +++---
 drivers/gpu/drm/radeon/radeon_acpi.c           |  8 ++++----
 drivers/gpu/drm/radeon/radeon_atpx_handler.c   |  4 ++--
 drivers/gpu/drm/radeon/radeon_bios.c           |  2 +-
 drivers/ide/ide-acpi.c                         |  2 +-
 drivers/pci/hotplug/acpi_pcihp.c               |  2 +-
 drivers/pci/hotplug/pciehp_acpi.c              |  4 ++--
 drivers/pci/ioapic.c                           |  2 +-
 drivers/pci/pci-acpi.c                         |  6 +++---
 drivers/pci/pci-label.c                        |  6 +++---
 drivers/platform/x86/apple-gmux.c              |  2 +-
 drivers/pnp/pnpacpi/core.c                     | 10 +++++-----
 drivers/usb/core/hub.c                         |  2 +-
 drivers/usb/core/usb-acpi.c                    |  4 ++--
 drivers/xen/pci.c                              |  6 +++---
 include/linux/acpi.h                           |  2 --
 include/linux/pci-acpi.h                       |  4 ++--
 21 files changed, 42 insertions(+), 44 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c
index 119afda0968c..b3480cf7db1a 100644
--- a/drivers/acpi/device_pm.c
+++ b/drivers/acpi/device_pm.c
@@ -544,7 +544,7 @@ static int acpi_dev_pm_get_state(struct device *dev, struct acpi_device *adev,
  */
 int acpi_pm_device_sleep_state(struct device *dev, int *d_min_p, int d_max_in)
 {
-	acpi_handle handle = DEVICE_ACPI_HANDLE(dev);
+	acpi_handle handle = ACPI_HANDLE(dev);
 	struct acpi_device *adev;
 	int ret, d_min, d_max;
 
@@ -652,7 +652,7 @@ int acpi_pm_device_run_wake(struct device *phys_dev, bool enable)
 	if (!device_run_wake(phys_dev))
 		return -EINVAL;
 
-	handle = DEVICE_ACPI_HANDLE(phys_dev);
+	handle = ACPI_HANDLE(phys_dev);
 	if (!handle || acpi_bus_get_device(handle, &adev)) {
 		dev_dbg(phys_dev, "ACPI handle without context in %s!\n",
 			__func__);
@@ -696,7 +696,7 @@ int acpi_pm_device_sleep_wake(struct device *dev, bool enable)
 	if (!device_can_wakeup(dev))
 		return -EINVAL;
 
-	handle = DEVICE_ACPI_HANDLE(dev);
+	handle = ACPI_HANDLE(dev);
 	if (!handle || acpi_bus_get_device(handle, &adev)) {
 		dev_dbg(dev, "ACPI handle without context in %s!\n", __func__);
 		return -ENODEV;
@@ -718,7 +718,7 @@ int acpi_pm_device_sleep_wake(struct device *dev, bool enable)
  */
 struct acpi_device *acpi_dev_pm_get_node(struct device *dev)
 {
-	acpi_handle handle = DEVICE_ACPI_HANDLE(dev);
+	acpi_handle handle = ACPI_HANDLE(dev);
 	struct acpi_device *adev;
 
 	return handle && !acpi_bus_get_device(handle, &adev) ? adev : NULL;
diff --git a/drivers/gpu/drm/i915/intel_acpi.c b/drivers/gpu/drm/i915/intel_acpi.c
index 43959edd4291..dfff0907f70e 100644
--- a/drivers/gpu/drm/i915/intel_acpi.c
+++ b/drivers/gpu/drm/i915/intel_acpi.c
@@ -196,7 +196,7 @@ static bool intel_dsm_pci_probe(struct pci_dev *pdev)
 	acpi_handle dhandle;
 	int ret;
 
-	dhandle = DEVICE_ACPI_HANDLE(&pdev->dev);
+	dhandle = ACPI_HANDLE(&pdev->dev);
 	if (!dhandle)
 		return false;
 
diff --git a/drivers/gpu/drm/i915/intel_opregion.c b/drivers/gpu/drm/i915/intel_opregion.c
index 119771ff46ab..8a94b8e15b27 100644
--- a/drivers/gpu/drm/i915/intel_opregion.c
+++ b/drivers/gpu/drm/i915/intel_opregion.c
@@ -289,7 +289,7 @@ static void intel_didl_outputs(struct drm_device *dev)
 	u32 temp;
 	int i = 0;
 
-	handle = DEVICE_ACPI_HANDLE(&dev->pdev->dev);
+	handle = ACPI_HANDLE(&dev->pdev->dev);
 	if (!handle || acpi_bus_get_device(handle, &acpi_dev))
 		return;
 
diff --git a/drivers/gpu/drm/nouveau/core/subdev/mxm/base.c b/drivers/gpu/drm/nouveau/core/subdev/mxm/base.c
index e286e132c7e7..129120473f6c 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/mxm/base.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/mxm/base.c
@@ -116,7 +116,7 @@ mxm_shadow_dsm(struct nouveau_mxm *mxm, u8 version)
 	acpi_handle handle;
 	int ret;
 
-	handle = DEVICE_ACPI_HANDLE(&device->pdev->dev);
+	handle = ACPI_HANDLE(&device->pdev->dev);
 	if (!handle)
 		return false;
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_acpi.c b/drivers/gpu/drm/nouveau/nouveau_acpi.c
index cfbeee607b3a..d9eb65dae1f7 100644
--- a/drivers/gpu/drm/nouveau/nouveau_acpi.c
+++ b/drivers/gpu/drm/nouveau/nouveau_acpi.c
@@ -256,7 +256,7 @@ static int nouveau_dsm_pci_probe(struct pci_dev *pdev)
 	acpi_handle dhandle;
 	int retval = 0;
 
-	dhandle = DEVICE_ACPI_HANDLE(&pdev->dev);
+	dhandle = ACPI_HANDLE(&pdev->dev);
 	if (!dhandle)
 		return false;
 
@@ -404,7 +404,7 @@ bool nouveau_acpi_rom_supported(struct pci_dev *pdev)
 	if (!nouveau_dsm_priv.dsm_detected && !nouveau_dsm_priv.optimus_detected)
 		return false;
 
-	dhandle = DEVICE_ACPI_HANDLE(&pdev->dev);
+	dhandle = ACPI_HANDLE(&pdev->dev);
 	if (!dhandle)
 		return false;
 
@@ -438,7 +438,7 @@ nouveau_acpi_edid(struct drm_device *dev, struct drm_connector *connector)
 		return NULL;
 	}
 
-	handle = DEVICE_ACPI_HANDLE(&dev->pdev->dev);
+	handle = ACPI_HANDLE(&dev->pdev->dev);
 	if (!handle)
 		return NULL;
 
diff --git a/drivers/gpu/drm/radeon/radeon_acpi.c b/drivers/gpu/drm/radeon/radeon_acpi.c
index 10f98c7742d8..98a9074b306b 100644
--- a/drivers/gpu/drm/radeon/radeon_acpi.c
+++ b/drivers/gpu/drm/radeon/radeon_acpi.c
@@ -369,7 +369,7 @@ int radeon_atif_handler(struct radeon_device *rdev,
 		return NOTIFY_DONE;
 
 	/* Check pending SBIOS requests */
-	handle = DEVICE_ACPI_HANDLE(&rdev->pdev->dev);
+	handle = ACPI_HANDLE(&rdev->pdev->dev);
 	count = radeon_atif_get_sbios_requests(handle, &req);
 
 	if (count <= 0)
@@ -556,7 +556,7 @@ int radeon_acpi_pcie_notify_device_ready(struct radeon_device *rdev)
 	struct radeon_atcs *atcs = &rdev->atcs;
 
 	/* Get the device handle */
-	handle = DEVICE_ACPI_HANDLE(&rdev->pdev->dev);
+	handle = ACPI_HANDLE(&rdev->pdev->dev);
 	if (!handle)
 		return -EINVAL;
 
@@ -596,7 +596,7 @@ int radeon_acpi_pcie_performance_request(struct radeon_device *rdev,
 	u32 retry = 3;
 
 	/* Get the device handle */
-	handle = DEVICE_ACPI_HANDLE(&rdev->pdev->dev);
+	handle = ACPI_HANDLE(&rdev->pdev->dev);
 	if (!handle)
 		return -EINVAL;
 
@@ -699,7 +699,7 @@ int radeon_acpi_init(struct radeon_device *rdev)
 	int ret;
 
 	/* Get the device handle */
-	handle = DEVICE_ACPI_HANDLE(&rdev->pdev->dev);
+	handle = ACPI_HANDLE(&rdev->pdev->dev);
 
 	/* No need to proceed if we're sure that ATIF is not supported */
 	if (!ASIC_IS_AVIVO(rdev) || !rdev->bios || !handle)
diff --git a/drivers/gpu/drm/radeon/radeon_atpx_handler.c b/drivers/gpu/drm/radeon/radeon_atpx_handler.c
index 27a6e9dbd0c4..933bd946057b 100644
--- a/drivers/gpu/drm/radeon/radeon_atpx_handler.c
+++ b/drivers/gpu/drm/radeon/radeon_atpx_handler.c
@@ -442,7 +442,7 @@ static bool radeon_atpx_pci_probe_handle(struct pci_dev *pdev)
 	acpi_handle dhandle, atpx_handle;
 	acpi_status status;
 
-	dhandle = DEVICE_ACPI_HANDLE(&pdev->dev);
+	dhandle = ACPI_HANDLE(&pdev->dev);
 	if (!dhandle)
 		return false;
 
@@ -488,7 +488,7 @@ static int radeon_atpx_init(void)
  */
 static int radeon_atpx_get_client_id(struct pci_dev *pdev)
 {
-	if (radeon_atpx_priv.dhandle == DEVICE_ACPI_HANDLE(&pdev->dev))
+	if (radeon_atpx_priv.dhandle == ACPI_HANDLE(&pdev->dev))
 		return VGA_SWITCHEROO_IGD;
 	else
 		return VGA_SWITCHEROO_DIS;
diff --git a/drivers/gpu/drm/radeon/radeon_bios.c b/drivers/gpu/drm/radeon/radeon_bios.c
index 061b227dae0c..374eaba78a69 100644
--- a/drivers/gpu/drm/radeon/radeon_bios.c
+++ b/drivers/gpu/drm/radeon/radeon_bios.c
@@ -185,7 +185,7 @@ static bool radeon_atrm_get_bios(struct radeon_device *rdev)
 		return false;
 
 	while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, pdev)) != NULL) {
-		dhandle = DEVICE_ACPI_HANDLE(&pdev->dev);
+		dhandle = ACPI_HANDLE(&pdev->dev);
 		if (!dhandle)
 			continue;
 
diff --git a/drivers/ide/ide-acpi.c b/drivers/ide/ide-acpi.c
index 3662c4c536d6..d9e1f7ccfe6f 100644
--- a/drivers/ide/ide-acpi.c
+++ b/drivers/ide/ide-acpi.c
@@ -127,7 +127,7 @@ static int ide_get_dev_handle(struct device *dev, acpi_handle *handle,
 
 	DEBPRINT("ENTER: pci %02x:%02x.%01x\n", bus, devnum, func);
 
-	dev_handle = DEVICE_ACPI_HANDLE(dev);
+	dev_handle = ACPI_HANDLE(dev);
 	if (!dev_handle) {
 		DEBPRINT("no acpi handle for device\n");
 		goto err;
diff --git a/drivers/pci/hotplug/acpi_pcihp.c b/drivers/pci/hotplug/acpi_pcihp.c
index 1ce8ee054f1a..a94d850ae228 100644
--- a/drivers/pci/hotplug/acpi_pcihp.c
+++ b/drivers/pci/hotplug/acpi_pcihp.c
@@ -367,7 +367,7 @@ int acpi_get_hp_hw_control_from_firmware(struct pci_dev *pdev, u32 flags)
 		string = (struct acpi_buffer){ ACPI_ALLOCATE_BUFFER, NULL };
 	}
 
-	handle = DEVICE_ACPI_HANDLE(&pdev->dev);
+	handle = ACPI_HANDLE(&pdev->dev);
 	if (!handle) {
 		/*
 		 * This hotplug controller was not listed in the ACPI name
diff --git a/drivers/pci/hotplug/pciehp_acpi.c b/drivers/pci/hotplug/pciehp_acpi.c
index ead7c534095e..cff7cadfc2e4 100644
--- a/drivers/pci/hotplug/pciehp_acpi.c
+++ b/drivers/pci/hotplug/pciehp_acpi.c
@@ -54,7 +54,7 @@ int pciehp_acpi_slot_detection_check(struct pci_dev *dev)
 {
 	if (slot_detection_mode != PCIEHP_DETECT_ACPI)
 		return 0;
-	if (acpi_pci_detect_ejectable(DEVICE_ACPI_HANDLE(&dev->dev)))
+	if (acpi_pci_detect_ejectable(ACPI_HANDLE(&dev->dev)))
 		return 0;
 	return -ENODEV;
 }
@@ -96,7 +96,7 @@ static int __init dummy_probe(struct pcie_device *dev)
 			dup_slot_id++;
 	}
 	list_add_tail(&slot->list, &dummy_slots);
-	handle = DEVICE_ACPI_HANDLE(&pdev->dev);
+	handle = ACPI_HANDLE(&pdev->dev);
 	if (!acpi_slot_detected && acpi_pci_detect_ejectable(handle))
 		acpi_slot_detected = 1;
 	return -ENODEV;         /* dummy driver always returns error */
diff --git a/drivers/pci/ioapic.c b/drivers/pci/ioapic.c
index 1b90579b233a..50ce68098298 100644
--- a/drivers/pci/ioapic.c
+++ b/drivers/pci/ioapic.c
@@ -37,7 +37,7 @@ static int ioapic_probe(struct pci_dev *dev, const struct pci_device_id *ent)
 	char *type;
 	struct resource *res;
 
-	handle = DEVICE_ACPI_HANDLE(&dev->dev);
+	handle = ACPI_HANDLE(&dev->dev);
 	if (!handle)
 		return -EINVAL;
 
diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c
index dfd1f59de729..f166126e28d1 100644
--- a/drivers/pci/pci-acpi.c
+++ b/drivers/pci/pci-acpi.c
@@ -173,14 +173,14 @@ static pci_power_t acpi_pci_choose_state(struct pci_dev *pdev)
 
 static bool acpi_pci_power_manageable(struct pci_dev *dev)
 {
-	acpi_handle handle = DEVICE_ACPI_HANDLE(&dev->dev);
+	acpi_handle handle = ACPI_HANDLE(&dev->dev);
 
 	return handle ? acpi_bus_power_manageable(handle) : false;
 }
 
 static int acpi_pci_set_power_state(struct pci_dev *dev, pci_power_t state)
 {
-	acpi_handle handle = DEVICE_ACPI_HANDLE(&dev->dev);
+	acpi_handle handle = ACPI_HANDLE(&dev->dev);
 	static const u8 state_conv[] = {
 		[PCI_D0] = ACPI_STATE_D0,
 		[PCI_D1] = ACPI_STATE_D1,
@@ -217,7 +217,7 @@ static int acpi_pci_set_power_state(struct pci_dev *dev, pci_power_t state)
 
 static bool acpi_pci_can_wakeup(struct pci_dev *dev)
 {
-	acpi_handle handle = DEVICE_ACPI_HANDLE(&dev->dev);
+	acpi_handle handle = ACPI_HANDLE(&dev->dev);
 
 	return handle ? acpi_bus_can_wakeup(handle) : false;
 }
diff --git a/drivers/pci/pci-label.c b/drivers/pci/pci-label.c
index edaed6f4da6c..d51f45aa669e 100644
--- a/drivers/pci/pci-label.c
+++ b/drivers/pci/pci-label.c
@@ -263,7 +263,7 @@ device_has_dsm(struct device *dev)
 	acpi_handle handle;
 	struct acpi_buffer output = {ACPI_ALLOCATE_BUFFER, NULL};
 
-	handle = DEVICE_ACPI_HANDLE(dev);
+	handle = ACPI_HANDLE(dev);
 
 	if (!handle)
 		return FALSE;
@@ -295,7 +295,7 @@ acpilabel_show(struct device *dev, struct device_attribute *attr, char *buf)
 	acpi_handle handle;
 	int length;
 
-	handle = DEVICE_ACPI_HANDLE(dev);
+	handle = ACPI_HANDLE(dev);
 
 	if (!handle)
 		return -1;
@@ -316,7 +316,7 @@ acpiindex_show(struct device *dev, struct device_attribute *attr, char *buf)
 	acpi_handle handle;
 	int length;
 
-	handle = DEVICE_ACPI_HANDLE(dev);
+	handle = ACPI_HANDLE(dev);
 
 	if (!handle)
 		return -1;
diff --git a/drivers/platform/x86/apple-gmux.c b/drivers/platform/x86/apple-gmux.c
index 8eea2efbbb6d..5753c82335ad 100644
--- a/drivers/platform/x86/apple-gmux.c
+++ b/drivers/platform/x86/apple-gmux.c
@@ -519,7 +519,7 @@ static int gmux_probe(struct pnp_dev *pnp, const struct pnp_device_id *id)
 
 	gmux_data->power_state = VGA_SWITCHEROO_ON;
 
-	gmux_data->dhandle = DEVICE_ACPI_HANDLE(&pnp->dev);
+	gmux_data->dhandle = ACPI_HANDLE(&pnp->dev);
 	if (!gmux_data->dhandle) {
 		pr_err("Cannot find acpi handle for pnp device %s\n",
 		       dev_name(&pnp->dev));
diff --git a/drivers/pnp/pnpacpi/core.c b/drivers/pnp/pnpacpi/core.c
index 747826d99059..14655a0f0431 100644
--- a/drivers/pnp/pnpacpi/core.c
+++ b/drivers/pnp/pnpacpi/core.c
@@ -89,7 +89,7 @@ static int pnpacpi_set_resources(struct pnp_dev *dev)
 
 	pnp_dbg(&dev->dev, "set resources\n");
 
-	handle = DEVICE_ACPI_HANDLE(&dev->dev);
+	handle = ACPI_HANDLE(&dev->dev);
 	if (!handle || acpi_bus_get_device(handle, &acpi_dev)) {
 		dev_dbg(&dev->dev, "ACPI device not found in %s!\n", __func__);
 		return -ENODEV;
@@ -122,7 +122,7 @@ static int pnpacpi_disable_resources(struct pnp_dev *dev)
 
 	dev_dbg(&dev->dev, "disable resources\n");
 
-	handle = DEVICE_ACPI_HANDLE(&dev->dev);
+	handle = ACPI_HANDLE(&dev->dev);
 	if (!handle || acpi_bus_get_device(handle, &acpi_dev)) {
 		dev_dbg(&dev->dev, "ACPI device not found in %s!\n", __func__);
 		return 0;
@@ -144,7 +144,7 @@ static bool pnpacpi_can_wakeup(struct pnp_dev *dev)
 	struct acpi_device *acpi_dev;
 	acpi_handle handle;
 
-	handle = DEVICE_ACPI_HANDLE(&dev->dev);
+	handle = ACPI_HANDLE(&dev->dev);
 	if (!handle || acpi_bus_get_device(handle, &acpi_dev)) {
 		dev_dbg(&dev->dev, "ACPI device not found in %s!\n", __func__);
 		return false;
@@ -159,7 +159,7 @@ static int pnpacpi_suspend(struct pnp_dev *dev, pm_message_t state)
 	acpi_handle handle;
 	int error = 0;
 
-	handle = DEVICE_ACPI_HANDLE(&dev->dev);
+	handle = ACPI_HANDLE(&dev->dev);
 	if (!handle || acpi_bus_get_device(handle, &acpi_dev)) {
 		dev_dbg(&dev->dev, "ACPI device not found in %s!\n", __func__);
 		return 0;
@@ -194,7 +194,7 @@ static int pnpacpi_suspend(struct pnp_dev *dev, pm_message_t state)
 static int pnpacpi_resume(struct pnp_dev *dev)
 {
 	struct acpi_device *acpi_dev;
-	acpi_handle handle = DEVICE_ACPI_HANDLE(&dev->dev);
+	acpi_handle handle = ACPI_HANDLE(&dev->dev);
 	int error = 0;
 
 	if (!handle || acpi_bus_get_device(handle, &acpi_dev)) {
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index 06cec635e703..a7c04e24ca48 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -5501,6 +5501,6 @@ acpi_handle usb_get_hub_port_acpi_handle(struct usb_device *hdev,
 	if (!hub)
 		return NULL;
 
-	return DEVICE_ACPI_HANDLE(&hub->ports[port1 - 1]->dev);
+	return ACPI_HANDLE(&hub->ports[port1 - 1]->dev);
 }
 #endif
diff --git a/drivers/usb/core/usb-acpi.c b/drivers/usb/core/usb-acpi.c
index 255c14464bf2..4e243c37f17f 100644
--- a/drivers/usb/core/usb-acpi.c
+++ b/drivers/usb/core/usb-acpi.c
@@ -173,7 +173,7 @@ static int usb_acpi_find_device(struct device *dev, acpi_handle *handle)
 		}
 
 		/* root hub's parent is the usb hcd. */
-		parent_handle = DEVICE_ACPI_HANDLE(dev->parent);
+		parent_handle = ACPI_HANDLE(dev->parent);
 		*handle = acpi_get_child(parent_handle, udev->portnum);
 		if (!*handle)
 			return -ENODEV;
@@ -194,7 +194,7 @@ static int usb_acpi_find_device(struct device *dev, acpi_handle *handle)
 
 			raw_port_num = usb_hcd_find_raw_port_number(hcd,
 				port_num);
-			*handle = acpi_get_child(DEVICE_ACPI_HANDLE(&udev->dev),
+			*handle = acpi_get_child(ACPI_HANDLE(&udev->dev),
 				raw_port_num);
 			if (!*handle)
 				return -ENODEV;
diff --git a/drivers/xen/pci.c b/drivers/xen/pci.c
index 18fff88254eb..cc86b267056f 100644
--- a/drivers/xen/pci.c
+++ b/drivers/xen/pci.c
@@ -58,12 +58,12 @@ static int xen_add_device(struct device *dev)
 			add.flags = XEN_PCI_DEV_EXTFN;
 
 #ifdef CONFIG_ACPI
-		handle = DEVICE_ACPI_HANDLE(&pci_dev->dev);
+		handle = ACPI_HANDLE(&pci_dev->dev);
 		if (!handle && pci_dev->bus->bridge)
-			handle = DEVICE_ACPI_HANDLE(pci_dev->bus->bridge);
+			handle = ACPI_HANDLE(pci_dev->bus->bridge);
 #ifdef CONFIG_PCI_IOV
 		if (!handle && pci_dev->is_virtfn)
-			handle = DEVICE_ACPI_HANDLE(physfn->bus->bridge);
+			handle = ACPI_HANDLE(physfn->bus->bridge);
 #endif
 		if (handle) {
 			acpi_status status;
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 47369aadb1d1..86a3ba99a38d 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -482,8 +482,6 @@ static inline bool acpi_driver_match_device(struct device *dev,
 
 #endif	/* !CONFIG_ACPI */
 
-#define DEVICE_ACPI_HANDLE(dev)	ACPI_HANDLE(dev)
-
 #ifdef CONFIG_ACPI
 void acpi_os_set_prepare_sleep(int (*func)(u8 sleep_state,
 			       u32 pm1a_ctrl,  u32 pm1b_ctrl));
diff --git a/include/linux/pci-acpi.h b/include/linux/pci-acpi.h
index d006f0ca60f4..5a462c4e5009 100644
--- a/include/linux/pci-acpi.h
+++ b/include/linux/pci-acpi.h
@@ -27,7 +27,7 @@ static inline acpi_handle acpi_find_root_bridge_handle(struct pci_dev *pdev)
 	while (!pci_is_root_bus(pbus))
 		pbus = pbus->parent;
 
-	return DEVICE_ACPI_HANDLE(pbus->bridge);
+	return ACPI_HANDLE(pbus->bridge);
 }
 
 static inline acpi_handle acpi_pci_get_bridge_handle(struct pci_bus *pbus)
@@ -39,7 +39,7 @@ static inline acpi_handle acpi_pci_get_bridge_handle(struct pci_bus *pbus)
 	else
 		dev = &pbus->self->dev;
 
-	return DEVICE_ACPI_HANDLE(dev);
+	return ACPI_HANDLE(dev);
 }
 
 void acpi_pci_add_bus(struct pci_bus *bus);
-- 
cgit v1.2.3


From 45c42a7f582cc07259bc78910de8b0b1c94094d5 Mon Sep 17 00:00:00 2001
From: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Date: Thu, 14 Nov 2013 14:03:51 +0200
Subject: ACPI: Provide acpi_dev_name accessor for struct acpi_device device
 name

struct acpi_device fields are only available when CONFIG_ACPI is set. We may
find use for dev_name(&adev->dev) in generic code that is build also without
CONFIG_ACPI is set but currently this requires #ifdef CONFIG_ACPI churn.

Provide here an accessor that returns dev_name of embedded struct device dev
in struct acpi_device or NULL depending on CONFIG_ACPI setting.

Signed-off-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/acpi.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 86a3ba99a38d..d9099b15b472 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -53,6 +53,11 @@ static inline acpi_handle acpi_device_handle(struct acpi_device *adev)
 #define ACPI_COMPANION_SET(dev, adev)	ACPI_COMPANION(dev) = (adev)
 #define ACPI_HANDLE(dev)		acpi_device_handle(ACPI_COMPANION(dev))
 
+static inline const char *acpi_dev_name(struct acpi_device *adev)
+{
+	return dev_name(&adev->dev);
+}
+
 enum acpi_irq_model_id {
 	ACPI_IRQ_MODEL_PIC = 0,
 	ACPI_IRQ_MODEL_IOAPIC,
@@ -414,6 +419,11 @@ static inline bool acpi_driver_match_device(struct device *dev,
 #define ACPI_COMPANION_SET(dev, adev)	do { } while (0)
 #define ACPI_HANDLE(dev)		(NULL)
 
+static inline const char *acpi_dev_name(struct acpi_device *adev)
+{
+	return NULL;
+}
+
 static inline void acpi_early_init(void) { }
 
 static inline int early_acpi_boot_init(void)
-- 
cgit v1.2.3


From 57c1ffcefb5acb3c8b5f8436c325a6bdbd8e9c78 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Thu, 14 Nov 2013 14:30:45 -0800
Subject: mm: rename USE_SPLIT_PTLOCKS to USE_SPLIT_PTE_PTLOCKS

We're going to introduce split page table lock for PMD level.  Let's
rename existing split ptlock for PTE level to avoid confusion.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Tested-by: Alex Thorlton <athorlton@sgi.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: "Eric W . Biederman" <ebiederm@xmission.com>
Cc: "Paul E . McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Dave Jones <davej@redhat.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Robin Holt <robinmholt@gmail.com>
Cc: Sedat Dilek <sedat.dilek@gmail.com>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/mm/fault-armv.c | 6 +++---
 arch/x86/xen/mmu.c       | 6 +++---
 include/linux/mm.h       | 6 +++---
 include/linux/mm_types.h | 8 ++++----
 4 files changed, 13 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mm/fault-armv.c b/arch/arm/mm/fault-armv.c
index 2a5907b5c8d2..ff379ac115df 100644
--- a/arch/arm/mm/fault-armv.c
+++ b/arch/arm/mm/fault-armv.c
@@ -65,7 +65,7 @@ static int do_adjust_pte(struct vm_area_struct *vma, unsigned long address,
 	return ret;
 }
 
-#if USE_SPLIT_PTLOCKS
+#if USE_SPLIT_PTE_PTLOCKS
 /*
  * If we are using split PTE locks, then we need to take the page
  * lock here.  Otherwise we are using shared mm->page_table_lock
@@ -84,10 +84,10 @@ static inline void do_pte_unlock(spinlock_t *ptl)
 {
 	spin_unlock(ptl);
 }
-#else /* !USE_SPLIT_PTLOCKS */
+#else /* !USE_SPLIT_PTE_PTLOCKS */
 static inline void do_pte_lock(spinlock_t *ptl) {}
 static inline void do_pte_unlock(spinlock_t *ptl) {}
-#endif /* USE_SPLIT_PTLOCKS */
+#endif /* USE_SPLIT_PTE_PTLOCKS */
 
 static int adjust_pte(struct vm_area_struct *vma, unsigned long address,
 	unsigned long pfn)
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index fdc3ba28ca38..455c873ce009 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -796,7 +796,7 @@ static spinlock_t *xen_pte_lock(struct page *page, struct mm_struct *mm)
 {
 	spinlock_t *ptl = NULL;
 
-#if USE_SPLIT_PTLOCKS
+#if USE_SPLIT_PTE_PTLOCKS
 	ptl = __pte_lockptr(page);
 	spin_lock_nest_lock(ptl, &mm->page_table_lock);
 #endif
@@ -1637,7 +1637,7 @@ static inline void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn,
 
 			__set_pfn_prot(pfn, PAGE_KERNEL_RO);
 
-			if (level == PT_PTE && USE_SPLIT_PTLOCKS)
+			if (level == PT_PTE && USE_SPLIT_PTE_PTLOCKS)
 				__pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn);
 
 			xen_mc_issue(PARAVIRT_LAZY_MMU);
@@ -1671,7 +1671,7 @@ static inline void xen_release_ptpage(unsigned long pfn, unsigned level)
 		if (!PageHighMem(page)) {
 			xen_mc_batch();
 
-			if (level == PT_PTE && USE_SPLIT_PTLOCKS)
+			if (level == PT_PTE && USE_SPLIT_PTE_PTLOCKS)
 				__pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn);
 
 			__set_pfn_prot(pfn, PAGE_KERNEL);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 42a35d94b82c..dc3333d6c986 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1316,7 +1316,7 @@ static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long a
 }
 #endif /* CONFIG_MMU && !__ARCH_HAS_4LEVEL_HACK */
 
-#if USE_SPLIT_PTLOCKS
+#if USE_SPLIT_PTE_PTLOCKS
 /*
  * We tuck a spinlock to guard each pagetable page into its struct page,
  * at page->private, with BUILD_BUG_ON to make sure that this will not
@@ -1329,14 +1329,14 @@ static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long a
 } while (0)
 #define pte_lock_deinit(page)	((page)->mapping = NULL)
 #define pte_lockptr(mm, pmd)	({(void)(mm); __pte_lockptr(pmd_page(*(pmd)));})
-#else	/* !USE_SPLIT_PTLOCKS */
+#else	/* !USE_SPLIT_PTE_PTLOCKS */
 /*
  * We use mm->page_table_lock to guard all pagetable pages of the mm.
  */
 #define pte_lock_init(page)	do {} while (0)
 #define pte_lock_deinit(page)	do {} while (0)
 #define pte_lockptr(mm, pmd)	({(void)(pmd); &(mm)->page_table_lock;})
-#endif /* USE_SPLIT_PTLOCKS */
+#endif /* USE_SPLIT_PTE_PTLOCKS */
 
 static inline void pgtable_page_ctor(struct page *page)
 {
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index a3198e5aaf4e..f1ff66dfa79e 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -23,7 +23,7 @@
 
 struct address_space;
 
-#define USE_SPLIT_PTLOCKS	(NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS)
+#define USE_SPLIT_PTE_PTLOCKS	(NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS)
 
 /*
  * Each physical page in the system has a struct page associated with
@@ -141,7 +141,7 @@ struct page {
 						 * indicates order in the buddy
 						 * system if PG_buddy is set.
 						 */
-#if USE_SPLIT_PTLOCKS
+#if USE_SPLIT_PTE_PTLOCKS
 		spinlock_t ptl;
 #endif
 		struct kmem_cache *slab_cache;	/* SL[AU]B: Pointer to slab */
@@ -309,14 +309,14 @@ enum {
 	NR_MM_COUNTERS
 };
 
-#if USE_SPLIT_PTLOCKS && defined(CONFIG_MMU)
+#if USE_SPLIT_PTE_PTLOCKS && defined(CONFIG_MMU)
 #define SPLIT_RSS_COUNTING
 /* per-thread cached information, */
 struct task_rss_stat {
 	int events;	/* for synchronization threshold */
 	int count[NR_MM_COUNTERS];
 };
-#endif /* USE_SPLIT_PTLOCKS */
+#endif /* USE_SPLIT_PTE_PTLOCKS */
 
 struct mm_rss_stat {
 	atomic_long_t count[NR_MM_COUNTERS];
-- 
cgit v1.2.3


From e1f56c89b040134add93f686931cc266541d239a Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Thu, 14 Nov 2013 14:30:48 -0800
Subject: mm: convert mm->nr_ptes to atomic_long_t

With split page table lock for PMD level we can't hold mm->page_table_lock
while updating nr_ptes.

Let's convert it to atomic_long_t to avoid races.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Tested-by: Alex Thorlton <athorlton@sgi.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: "Eric W . Biederman" <ebiederm@xmission.com>
Cc: "Paul E . McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Dave Jones <davej@redhat.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Robin Holt <robinmholt@gmail.com>
Cc: Sedat Dilek <sedat.dilek@gmail.com>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/task_mmu.c       |  3 ++-
 include/linux/mm_types.h |  2 +-
 kernel/fork.c            |  2 +-
 mm/huge_memory.c         | 10 +++++-----
 mm/memory.c              |  4 ++--
 mm/mmap.c                |  3 ++-
 mm/oom_kill.c            |  6 +++---
 7 files changed, 16 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index abbe825d20ff..8faaebdc6b02 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -62,7 +62,8 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
 		total_rss << (PAGE_SHIFT-10),
 		data << (PAGE_SHIFT-10),
 		mm->stack_vm << (PAGE_SHIFT-10), text, lib,
-		(PTRS_PER_PTE*sizeof(pte_t)*mm->nr_ptes) >> 10,
+		(PTRS_PER_PTE * sizeof(pte_t) *
+		 atomic_long_read(&mm->nr_ptes)) >> 10,
 		swap << (PAGE_SHIFT-10));
 }
 
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index f1ff66dfa79e..566df579c51f 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -339,6 +339,7 @@ struct mm_struct {
 	pgd_t * pgd;
 	atomic_t mm_users;			/* How many users with user space? */
 	atomic_t mm_count;			/* How many references to "struct mm_struct" (users count as 1) */
+	atomic_long_t nr_ptes;			/* Page table pages */
 	int map_count;				/* number of VMAs */
 
 	spinlock_t page_table_lock;		/* Protects page tables and some counters */
@@ -360,7 +361,6 @@ struct mm_struct {
 	unsigned long exec_vm;		/* VM_EXEC & ~VM_WRITE */
 	unsigned long stack_vm;		/* VM_GROWSUP/DOWN */
 	unsigned long def_flags;
-	unsigned long nr_ptes;		/* Page table pages */
 	unsigned long start_code, end_code, start_data, end_data;
 	unsigned long start_brk, brk, start_stack;
 	unsigned long arg_start, arg_end, env_start, env_end;
diff --git a/kernel/fork.c b/kernel/fork.c
index f6d11fc67f72..e2520756e005 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -532,7 +532,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p)
 	mm->flags = (current->mm) ?
 		(current->mm->flags & MMF_INIT_MASK) : default_dump_filter;
 	mm->core_state = NULL;
-	mm->nr_ptes = 0;
+	atomic_long_set(&mm->nr_ptes, 0);
 	memset(&mm->rss_stat, 0, sizeof(mm->rss_stat));
 	spin_lock_init(&mm->page_table_lock);
 	mm_init_aio(mm);
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 0556c6a44959..e5b2d316be2e 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -738,7 +738,7 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
 		pgtable_trans_huge_deposit(mm, pmd, pgtable);
 		set_pmd_at(mm, haddr, pmd, entry);
 		add_mm_counter(mm, MM_ANONPAGES, HPAGE_PMD_NR);
-		mm->nr_ptes++;
+		atomic_long_inc(&mm->nr_ptes);
 		spin_unlock(&mm->page_table_lock);
 	}
 
@@ -771,7 +771,7 @@ static bool set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm,
 	entry = pmd_mkhuge(entry);
 	pgtable_trans_huge_deposit(mm, pmd, pgtable);
 	set_pmd_at(mm, haddr, pmd, entry);
-	mm->nr_ptes++;
+	atomic_long_inc(&mm->nr_ptes);
 	return true;
 }
 
@@ -896,7 +896,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 	pmd = pmd_mkold(pmd_wrprotect(pmd));
 	pgtable_trans_huge_deposit(dst_mm, dst_pmd, pgtable);
 	set_pmd_at(dst_mm, addr, dst_pmd, pmd);
-	dst_mm->nr_ptes++;
+	atomic_long_inc(&dst_mm->nr_ptes);
 
 	ret = 0;
 out_unlock:
@@ -1392,7 +1392,7 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
 		tlb_remove_pmd_tlb_entry(tlb, pmd, addr);
 		pgtable = pgtable_trans_huge_withdraw(tlb->mm, pmd);
 		if (is_huge_zero_pmd(orig_pmd)) {
-			tlb->mm->nr_ptes--;
+			atomic_long_dec(&tlb->mm->nr_ptes);
 			spin_unlock(&tlb->mm->page_table_lock);
 			put_huge_zero_page();
 		} else {
@@ -1401,7 +1401,7 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
 			VM_BUG_ON(page_mapcount(page) < 0);
 			add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR);
 			VM_BUG_ON(!PageHead(page));
-			tlb->mm->nr_ptes--;
+			atomic_long_dec(&tlb->mm->nr_ptes);
 			spin_unlock(&tlb->mm->page_table_lock);
 			tlb_remove_page(tlb, page);
 		}
diff --git a/mm/memory.c b/mm/memory.c
index bf8665849a5f..0b5a93a49f27 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -382,7 +382,7 @@ static void free_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
 	pgtable_t token = pmd_pgtable(*pmd);
 	pmd_clear(pmd);
 	pte_free_tlb(tlb, token, addr);
-	tlb->mm->nr_ptes--;
+	atomic_long_dec(&tlb->mm->nr_ptes);
 }
 
 static inline void free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
@@ -573,7 +573,7 @@ int __pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
 	spin_lock(&mm->page_table_lock);
 	wait_split_huge_page = 0;
 	if (likely(pmd_none(*pmd))) {	/* Has another populated it ? */
-		mm->nr_ptes++;
+		atomic_long_inc(&mm->nr_ptes);
 		pmd_populate(mm, pmd, new);
 		new = NULL;
 	} else if (unlikely(pmd_trans_splitting(*pmd)))
diff --git a/mm/mmap.c b/mm/mmap.c
index 5a6baddde15d..834b2d785f1e 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2724,7 +2724,8 @@ void exit_mmap(struct mm_struct *mm)
 	}
 	vm_unacct_memory(nr_accounted);
 
-	WARN_ON(mm->nr_ptes > (FIRST_USER_ADDRESS+PMD_SIZE-1)>>PMD_SHIFT);
+	WARN_ON(atomic_long_read(&mm->nr_ptes) >
+			(FIRST_USER_ADDRESS+PMD_SIZE-1)>>PMD_SHIFT);
 }
 
 /* Insert vm structure into process list sorted by address
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 6738c47f1f72..1e4a600a6163 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -161,7 +161,7 @@ unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg,
 	 * The baseline for the badness score is the proportion of RAM that each
 	 * task's rss, pagetable and swap space use.
 	 */
-	points = get_mm_rss(p->mm) + p->mm->nr_ptes +
+	points = get_mm_rss(p->mm) + atomic_long_read(&p->mm->nr_ptes) +
 		 get_mm_counter(p->mm, MM_SWAPENTS);
 	task_unlock(p);
 
@@ -364,10 +364,10 @@ static void dump_tasks(const struct mem_cgroup *memcg, const nodemask_t *nodemas
 			continue;
 		}
 
-		pr_info("[%5d] %5d %5d %8lu %8lu %7lu %8lu         %5hd %s\n",
+		pr_info("[%5d] %5d %5d %8lu %8lu %7ld %8lu         %5hd %s\n",
 			task->pid, from_kuid(&init_user_ns, task_uid(task)),
 			task->tgid, task->mm->total_vm, get_mm_rss(task->mm),
-			task->mm->nr_ptes,
+			atomic_long_read(&task->mm->nr_ptes),
 			get_mm_counter(task->mm, MM_SWAPENTS),
 			task->signal->oom_score_adj, task->comm);
 		task_unlock(task);
-- 
cgit v1.2.3


From 9a86cb7bdc4ccbe3f99a1ca275b90a322a90f9ce Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Thu, 14 Nov 2013 14:30:51 -0800
Subject: mm: introduce api for split page table lock for PMD level

Basic api, backed by mm->page_table_lock for now. Actual implementation
will be added later.

Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Tested-by: Alex Thorlton <athorlton@sgi.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "Eric W . Biederman" <ebiederm@xmission.com>
Cc: "Paul E . McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Dave Jones <davej@redhat.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Robin Holt <robinmholt@gmail.com>
Cc: Sedat Dilek <sedat.dilek@gmail.com>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index dc3333d6c986..4f4ca41fcf27 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1378,6 +1378,19 @@ static inline void pgtable_page_dtor(struct page *page)
 	((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd, address))? \
 		NULL: pte_offset_kernel(pmd, address))
 
+static inline spinlock_t *pmd_lockptr(struct mm_struct *mm, pmd_t *pmd)
+{
+	return &mm->page_table_lock;
+}
+
+
+static inline spinlock_t *pmd_lock(struct mm_struct *mm, pmd_t *pmd)
+{
+	spinlock_t *ptl = pmd_lockptr(mm, pmd);
+	spin_lock(ptl);
+	return ptl;
+}
+
 extern void free_area_init(unsigned long * zones_size);
 extern void free_area_init_node(int nid, unsigned long * zones_size,
 		unsigned long zone_start_pfn, unsigned long *zholes_size);
-- 
cgit v1.2.3


From bf929152e9f6c49b66fad4ebf08cc95b02ce48f5 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Thu, 14 Nov 2013 14:30:54 -0800
Subject: mm, thp: change pmd_trans_huge_lock() to return taken lock

With split ptlock it's important to know which lock
pmd_trans_huge_lock() took.  This patch adds one more parameter to the
function to return the lock.

In most places migration to new api is trivial.  Exception is
move_huge_pmd(): we need to take two locks if pmd tables are different.

Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Tested-by: Alex Thorlton <athorlton@sgi.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "Eric W . Biederman" <ebiederm@xmission.com>
Cc: "Paul E . McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Dave Jones <davej@redhat.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Robin Holt <robinmholt@gmail.com>
Cc: Sedat Dilek <sedat.dilek@gmail.com>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/task_mmu.c      | 13 +++++++------
 include/linux/huge_mm.h | 14 +++++++-------
 mm/huge_memory.c        | 40 +++++++++++++++++++++++++++-------------
 mm/memcontrol.c         | 10 +++++-----
 4 files changed, 46 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 8faaebdc6b02..42b5cf5d0326 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -506,9 +506,9 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
 	pte_t *pte;
 	spinlock_t *ptl;
 
-	if (pmd_trans_huge_lock(pmd, vma) == 1) {
+	if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
 		smaps_pte_entry(*(pte_t *)pmd, addr, HPAGE_PMD_SIZE, walk);
-		spin_unlock(&walk->mm->page_table_lock);
+		spin_unlock(ptl);
 		mss->anonymous_thp += HPAGE_PMD_SIZE;
 		return 0;
 	}
@@ -999,13 +999,14 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
 {
 	struct vm_area_struct *vma;
 	struct pagemapread *pm = walk->private;
+	spinlock_t *ptl;
 	pte_t *pte;
 	int err = 0;
 	pagemap_entry_t pme = make_pme(PM_NOT_PRESENT(pm->v2));
 
 	/* find the first VMA at or above 'addr' */
 	vma = find_vma(walk->mm, addr);
-	if (vma && pmd_trans_huge_lock(pmd, vma) == 1) {
+	if (vma && pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
 		int pmd_flags2;
 
 		if ((vma->vm_flags & VM_SOFTDIRTY) || pmd_soft_dirty(*pmd))
@@ -1023,7 +1024,7 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
 			if (err)
 				break;
 		}
-		spin_unlock(&walk->mm->page_table_lock);
+		spin_unlock(ptl);
 		return err;
 	}
 
@@ -1325,7 +1326,7 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
 
 	md = walk->private;
 
-	if (pmd_trans_huge_lock(pmd, md->vma) == 1) {
+	if (pmd_trans_huge_lock(pmd, md->vma, &ptl) == 1) {
 		pte_t huge_pte = *(pte_t *)pmd;
 		struct page *page;
 
@@ -1333,7 +1334,7 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
 		if (page)
 			gather_stats(page, md, pte_dirty(huge_pte),
 				     HPAGE_PMD_SIZE/PAGE_SIZE);
-		spin_unlock(&walk->mm->page_table_lock);
+		spin_unlock(ptl);
 		return 0;
 	}
 
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 3935428c57cf..4aca0d8da112 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -129,15 +129,15 @@ extern void __vma_adjust_trans_huge(struct vm_area_struct *vma,
 				    unsigned long start,
 				    unsigned long end,
 				    long adjust_next);
-extern int __pmd_trans_huge_lock(pmd_t *pmd,
-				 struct vm_area_struct *vma);
+extern int __pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma,
+		spinlock_t **ptl);
 /* mmap_sem must be held on entry */
-static inline int pmd_trans_huge_lock(pmd_t *pmd,
-				      struct vm_area_struct *vma)
+static inline int pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma,
+		spinlock_t **ptl)
 {
 	VM_BUG_ON(!rwsem_is_locked(&vma->vm_mm->mmap_sem));
 	if (pmd_trans_huge(*pmd))
-		return __pmd_trans_huge_lock(pmd, vma);
+		return __pmd_trans_huge_lock(pmd, vma, ptl);
 	else
 		return 0;
 }
@@ -215,8 +215,8 @@ static inline void vma_adjust_trans_huge(struct vm_area_struct *vma,
 					 long adjust_next)
 {
 }
-static inline int pmd_trans_huge_lock(pmd_t *pmd,
-				      struct vm_area_struct *vma)
+static inline int pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma,
+		spinlock_t **ptl)
 {
 	return 0;
 }
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index e5b2d316be2e..471eb04066ff 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1376,9 +1376,10 @@ out:
 int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
 		 pmd_t *pmd, unsigned long addr)
 {
+	spinlock_t *ptl;
 	int ret = 0;
 
-	if (__pmd_trans_huge_lock(pmd, vma) == 1) {
+	if (__pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
 		struct page *page;
 		pgtable_t pgtable;
 		pmd_t orig_pmd;
@@ -1393,7 +1394,7 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
 		pgtable = pgtable_trans_huge_withdraw(tlb->mm, pmd);
 		if (is_huge_zero_pmd(orig_pmd)) {
 			atomic_long_dec(&tlb->mm->nr_ptes);
-			spin_unlock(&tlb->mm->page_table_lock);
+			spin_unlock(ptl);
 			put_huge_zero_page();
 		} else {
 			page = pmd_page(orig_pmd);
@@ -1402,7 +1403,7 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
 			add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR);
 			VM_BUG_ON(!PageHead(page));
 			atomic_long_dec(&tlb->mm->nr_ptes);
-			spin_unlock(&tlb->mm->page_table_lock);
+			spin_unlock(ptl);
 			tlb_remove_page(tlb, page);
 		}
 		pte_free(tlb->mm, pgtable);
@@ -1415,14 +1416,15 @@ int mincore_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
 		unsigned long addr, unsigned long end,
 		unsigned char *vec)
 {
+	spinlock_t *ptl;
 	int ret = 0;
 
-	if (__pmd_trans_huge_lock(pmd, vma) == 1) {
+	if (__pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
 		/*
 		 * All logical pages in the range are present
 		 * if backed by a huge page.
 		 */
-		spin_unlock(&vma->vm_mm->page_table_lock);
+		spin_unlock(ptl);
 		memset(vec, 1, (end - addr) >> PAGE_SHIFT);
 		ret = 1;
 	}
@@ -1435,6 +1437,7 @@ int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma,
 		  unsigned long new_addr, unsigned long old_end,
 		  pmd_t *old_pmd, pmd_t *new_pmd)
 {
+	spinlock_t *old_ptl, *new_ptl;
 	int ret = 0;
 	pmd_t pmd;
 
@@ -1455,12 +1458,21 @@ int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma,
 		goto out;
 	}
 
-	ret = __pmd_trans_huge_lock(old_pmd, vma);
+	/*
+	 * We don't have to worry about the ordering of src and dst
+	 * ptlocks because exclusive mmap_sem prevents deadlock.
+	 */
+	ret = __pmd_trans_huge_lock(old_pmd, vma, &old_ptl);
 	if (ret == 1) {
+		new_ptl = pmd_lockptr(mm, new_pmd);
+		if (new_ptl != old_ptl)
+			spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
 		pmd = pmdp_get_and_clear(mm, old_addr, old_pmd);
 		VM_BUG_ON(!pmd_none(*new_pmd));
 		set_pmd_at(mm, new_addr, new_pmd, pmd_mksoft_dirty(pmd));
-		spin_unlock(&mm->page_table_lock);
+		if (new_ptl != old_ptl)
+			spin_unlock(new_ptl);
+		spin_unlock(old_ptl);
 	}
 out:
 	return ret;
@@ -1476,9 +1488,10 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
 		unsigned long addr, pgprot_t newprot, int prot_numa)
 {
 	struct mm_struct *mm = vma->vm_mm;
+	spinlock_t *ptl;
 	int ret = 0;
 
-	if (__pmd_trans_huge_lock(pmd, vma) == 1) {
+	if (__pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
 		pmd_t entry;
 		ret = 1;
 		if (!prot_numa) {
@@ -1507,7 +1520,7 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
 		if (ret == HPAGE_PMD_NR)
 			set_pmd_at(mm, addr, pmd, entry);
 
-		spin_unlock(&vma->vm_mm->page_table_lock);
+		spin_unlock(ptl);
 	}
 
 	return ret;
@@ -1520,12 +1533,13 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
  * Note that if it returns 1, this routine returns without unlocking page
  * table locks. So callers must unlock them.
  */
-int __pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma)
+int __pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma,
+		spinlock_t **ptl)
 {
-	spin_lock(&vma->vm_mm->page_table_lock);
+	*ptl = pmd_lock(vma->vm_mm, pmd);
 	if (likely(pmd_trans_huge(*pmd))) {
 		if (unlikely(pmd_trans_splitting(*pmd))) {
-			spin_unlock(&vma->vm_mm->page_table_lock);
+			spin_unlock(*ptl);
 			wait_split_huge_page(vma->anon_vma, pmd);
 			return -1;
 		} else {
@@ -1534,7 +1548,7 @@ int __pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma)
 			return 1;
 		}
 	}
-	spin_unlock(&vma->vm_mm->page_table_lock);
+	spin_unlock(*ptl);
 	return 0;
 }
 
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index e3cd40b2d5d9..f1a0ae6e11b8 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -6605,10 +6605,10 @@ static int mem_cgroup_count_precharge_pte_range(pmd_t *pmd,
 	pte_t *pte;
 	spinlock_t *ptl;
 
-	if (pmd_trans_huge_lock(pmd, vma) == 1) {
+	if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
 		if (get_mctgt_type_thp(vma, addr, *pmd, NULL) == MC_TARGET_PAGE)
 			mc.precharge += HPAGE_PMD_NR;
-		spin_unlock(&vma->vm_mm->page_table_lock);
+		spin_unlock(ptl);
 		return 0;
 	}
 
@@ -6797,9 +6797,9 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd,
 	 *    to be unlocked in __split_huge_page_splitting(), where the main
 	 *    part of thp split is not executed yet.
 	 */
-	if (pmd_trans_huge_lock(pmd, vma) == 1) {
+	if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
 		if (mc.precharge < HPAGE_PMD_NR) {
-			spin_unlock(&vma->vm_mm->page_table_lock);
+			spin_unlock(ptl);
 			return 0;
 		}
 		target_type = get_mctgt_type_thp(vma, addr, *pmd, &target);
@@ -6816,7 +6816,7 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd,
 			}
 			put_page(page);
 		}
-		spin_unlock(&vma->vm_mm->page_table_lock);
+		spin_unlock(ptl);
 		return 0;
 	}
 
-- 
cgit v1.2.3


From 117b0791ac42f2ec447bc864e70ad622b5604059 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Thu, 14 Nov 2013 14:30:56 -0800
Subject: mm, thp: move ptl taking inside page_check_address_pmd()

With split page table lock we can't know which lock we need to take
before we find the relevant pmd.

Let's move lock taking inside the function.

Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Tested-by: Alex Thorlton <athorlton@sgi.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "Eric W . Biederman" <ebiederm@xmission.com>
Cc: "Paul E . McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Dave Jones <davej@redhat.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Robin Holt <robinmholt@gmail.com>
Cc: Sedat Dilek <sedat.dilek@gmail.com>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/huge_mm.h |  3 ++-
 mm/huge_memory.c        | 43 +++++++++++++++++++++++++++----------------
 mm/rmap.c               | 13 +++++--------
 3 files changed, 34 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 4aca0d8da112..91672e2deec3 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -54,7 +54,8 @@ enum page_check_address_pmd_flag {
 extern pmd_t *page_check_address_pmd(struct page *page,
 				     struct mm_struct *mm,
 				     unsigned long address,
-				     enum page_check_address_pmd_flag flag);
+				     enum page_check_address_pmd_flag flag,
+				     spinlock_t **ptl);
 
 #define HPAGE_PMD_ORDER (HPAGE_PMD_SHIFT-PAGE_SHIFT)
 #define HPAGE_PMD_NR (1<<HPAGE_PMD_ORDER)
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 471eb04066ff..c2082ab4fc93 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1552,23 +1552,33 @@ int __pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma,
 	return 0;
 }
 
+/*
+ * This function returns whether a given @page is mapped onto the @address
+ * in the virtual space of @mm.
+ *
+ * When it's true, this function returns *pmd with holding the page table lock
+ * and passing it back to the caller via @ptl.
+ * If it's false, returns NULL without holding the page table lock.
+ */
 pmd_t *page_check_address_pmd(struct page *page,
 			      struct mm_struct *mm,
 			      unsigned long address,
-			      enum page_check_address_pmd_flag flag)
+			      enum page_check_address_pmd_flag flag,
+			      spinlock_t **ptl)
 {
-	pmd_t *pmd, *ret = NULL;
+	pmd_t *pmd;
 
 	if (address & ~HPAGE_PMD_MASK)
-		goto out;
+		return NULL;
 
 	pmd = mm_find_pmd(mm, address);
 	if (!pmd)
-		goto out;
+		return NULL;
+	*ptl = pmd_lock(mm, pmd);
 	if (pmd_none(*pmd))
-		goto out;
+		goto unlock;
 	if (pmd_page(*pmd) != page)
-		goto out;
+		goto unlock;
 	/*
 	 * split_vma() may create temporary aliased mappings. There is
 	 * no risk as long as all huge pmd are found and have their
@@ -1578,14 +1588,15 @@ pmd_t *page_check_address_pmd(struct page *page,
 	 */
 	if (flag == PAGE_CHECK_ADDRESS_PMD_NOTSPLITTING_FLAG &&
 	    pmd_trans_splitting(*pmd))
-		goto out;
+		goto unlock;
 	if (pmd_trans_huge(*pmd)) {
 		VM_BUG_ON(flag == PAGE_CHECK_ADDRESS_PMD_SPLITTING_FLAG &&
 			  !pmd_trans_splitting(*pmd));
-		ret = pmd;
+		return pmd;
 	}
-out:
-	return ret;
+unlock:
+	spin_unlock(*ptl);
+	return NULL;
 }
 
 static int __split_huge_page_splitting(struct page *page,
@@ -1593,6 +1604,7 @@ static int __split_huge_page_splitting(struct page *page,
 				       unsigned long address)
 {
 	struct mm_struct *mm = vma->vm_mm;
+	spinlock_t *ptl;
 	pmd_t *pmd;
 	int ret = 0;
 	/* For mmu_notifiers */
@@ -1600,9 +1612,8 @@ static int __split_huge_page_splitting(struct page *page,
 	const unsigned long mmun_end   = address + HPAGE_PMD_SIZE;
 
 	mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
-	spin_lock(&mm->page_table_lock);
 	pmd = page_check_address_pmd(page, mm, address,
-				     PAGE_CHECK_ADDRESS_PMD_NOTSPLITTING_FLAG);
+			PAGE_CHECK_ADDRESS_PMD_NOTSPLITTING_FLAG, &ptl);
 	if (pmd) {
 		/*
 		 * We can't temporarily set the pmd to null in order
@@ -1613,8 +1624,8 @@ static int __split_huge_page_splitting(struct page *page,
 		 */
 		pmdp_splitting_flush(vma, address, pmd);
 		ret = 1;
+		spin_unlock(ptl);
 	}
-	spin_unlock(&mm->page_table_lock);
 	mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
 
 	return ret;
@@ -1745,14 +1756,14 @@ static int __split_huge_page_map(struct page *page,
 				 unsigned long address)
 {
 	struct mm_struct *mm = vma->vm_mm;
+	spinlock_t *ptl;
 	pmd_t *pmd, _pmd;
 	int ret = 0, i;
 	pgtable_t pgtable;
 	unsigned long haddr;
 
-	spin_lock(&mm->page_table_lock);
 	pmd = page_check_address_pmd(page, mm, address,
-				     PAGE_CHECK_ADDRESS_PMD_SPLITTING_FLAG);
+			PAGE_CHECK_ADDRESS_PMD_SPLITTING_FLAG, &ptl);
 	if (pmd) {
 		pgtable = pgtable_trans_huge_withdraw(mm, pmd);
 		pmd_populate(mm, &_pmd, pgtable);
@@ -1807,8 +1818,8 @@ static int __split_huge_page_map(struct page *page,
 		pmdp_invalidate(vma, address, pmd);
 		pmd_populate(mm, pmd, pgtable);
 		ret = 1;
+		spin_unlock(ptl);
 	}
-	spin_unlock(&mm->page_table_lock);
 
 	return ret;
 }
diff --git a/mm/rmap.c b/mm/rmap.c
index fd3ee7a54a13..b59d741dcf65 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -665,25 +665,23 @@ int page_referenced_one(struct page *page, struct vm_area_struct *vma,
 			unsigned long *vm_flags)
 {
 	struct mm_struct *mm = vma->vm_mm;
+	spinlock_t *ptl;
 	int referenced = 0;
 
 	if (unlikely(PageTransHuge(page))) {
 		pmd_t *pmd;
 
-		spin_lock(&mm->page_table_lock);
 		/*
 		 * rmap might return false positives; we must filter
 		 * these out using page_check_address_pmd().
 		 */
 		pmd = page_check_address_pmd(page, mm, address,
-					     PAGE_CHECK_ADDRESS_PMD_FLAG);
-		if (!pmd) {
-			spin_unlock(&mm->page_table_lock);
+					     PAGE_CHECK_ADDRESS_PMD_FLAG, &ptl);
+		if (!pmd)
 			goto out;
-		}
 
 		if (vma->vm_flags & VM_LOCKED) {
-			spin_unlock(&mm->page_table_lock);
+			spin_unlock(ptl);
 			*mapcount = 0;	/* break early from loop */
 			*vm_flags |= VM_LOCKED;
 			goto out;
@@ -692,10 +690,9 @@ int page_referenced_one(struct page *page, struct vm_area_struct *vma,
 		/* go ahead even if the pmd is pmd_trans_splitting() */
 		if (pmdp_clear_flush_young_notify(vma, address, pmd))
 			referenced++;
-		spin_unlock(&mm->page_table_lock);
+		spin_unlock(ptl);
 	} else {
 		pte_t *pte;
-		spinlock_t *ptl;
 
 		/*
 		 * rmap might return false positives; we must filter
-- 
cgit v1.2.3


From c389a250ab4cfa4a3775d9f2c45271618af6d5b2 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Thu, 14 Nov 2013 14:30:59 -0800
Subject: mm, thp: do not access mm->pmd_huge_pte directly

Currently mm->pmd_huge_pte protected by page table lock.  It will not
work with split lock.  We have to have per-pmd pmd_huge_pte for proper
access serialization.

For now, let's just introduce wrapper to access mm->pmd_huge_pte.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Tested-by: Alex Thorlton <athorlton@sgi.com>
Cc: Alex Thorlton <athorlton@sgi.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: "Eric W . Biederman" <ebiederm@xmission.com>
Cc: "Paul E . McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Dave Jones <davej@redhat.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Robin Holt <robinmholt@gmail.com>
Cc: Sedat Dilek <sedat.dilek@gmail.com>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/s390/mm/pgtable.c | 12 ++++++------
 arch/sparc/mm/tlb.c    | 12 ++++++------
 include/linux/mm.h     |  1 +
 mm/pgtable-generic.c   | 12 ++++++------
 4 files changed, 19 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 0a2e5e086749..1ea18fcfa211 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -1244,11 +1244,11 @@ void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
 	assert_spin_locked(&mm->page_table_lock);
 
 	/* FIFO */
-	if (!mm->pmd_huge_pte)
+	if (!pmd_huge_pte(mm, pmdp))
 		INIT_LIST_HEAD(lh);
 	else
-		list_add(lh, (struct list_head *) mm->pmd_huge_pte);
-	mm->pmd_huge_pte = pgtable;
+		list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp));
+	pmd_huge_pte(mm, pmdp) = pgtable;
 }
 
 pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
@@ -1260,12 +1260,12 @@ pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
 	assert_spin_locked(&mm->page_table_lock);
 
 	/* FIFO */
-	pgtable = mm->pmd_huge_pte;
+	pgtable = pmd_huge_pte(mm, pmdp);
 	lh = (struct list_head *) pgtable;
 	if (list_empty(lh))
-		mm->pmd_huge_pte = NULL;
+		pmd_huge_pte(mm, pmdp) = NULL;
 	else {
-		mm->pmd_huge_pte = (pgtable_t) lh->next;
+		pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next;
 		list_del(lh);
 	}
 	ptep = (pte_t *) pgtable;
diff --git a/arch/sparc/mm/tlb.c b/arch/sparc/mm/tlb.c
index 7a91f288c708..656cc46a81f5 100644
--- a/arch/sparc/mm/tlb.c
+++ b/arch/sparc/mm/tlb.c
@@ -196,11 +196,11 @@ void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
 	assert_spin_locked(&mm->page_table_lock);
 
 	/* FIFO */
-	if (!mm->pmd_huge_pte)
+	if (!pmd_huge_pte(mm, pmdp))
 		INIT_LIST_HEAD(lh);
 	else
-		list_add(lh, (struct list_head *) mm->pmd_huge_pte);
-	mm->pmd_huge_pte = pgtable;
+		list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp));
+	pmd_huge_pte(mm, pmdp) = pgtable;
 }
 
 pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
@@ -211,12 +211,12 @@ pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
 	assert_spin_locked(&mm->page_table_lock);
 
 	/* FIFO */
-	pgtable = mm->pmd_huge_pte;
+	pgtable = pmd_huge_pte(mm, pmdp);
 	lh = (struct list_head *) pgtable;
 	if (list_empty(lh))
-		mm->pmd_huge_pte = NULL;
+		pmd_huge_pte(mm, pmdp) = NULL;
 	else {
-		mm->pmd_huge_pte = (pgtable_t) lh->next;
+		pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next;
 		list_del(lh);
 	}
 	pte_val(pgtable[0]) = 0;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 4f4ca41fcf27..861cad53b744 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1383,6 +1383,7 @@ static inline spinlock_t *pmd_lockptr(struct mm_struct *mm, pmd_t *pmd)
 	return &mm->page_table_lock;
 }
 
+#define pmd_huge_pte(mm, pmd) ((mm)->pmd_huge_pte)
 
 static inline spinlock_t *pmd_lock(struct mm_struct *mm, pmd_t *pmd)
 {
diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c
index 3929a40bd6c0..41fee3e5d570 100644
--- a/mm/pgtable-generic.c
+++ b/mm/pgtable-generic.c
@@ -154,11 +154,11 @@ void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
 	assert_spin_locked(&mm->page_table_lock);
 
 	/* FIFO */
-	if (!mm->pmd_huge_pte)
+	if (!pmd_huge_pte(mm, pmdp))
 		INIT_LIST_HEAD(&pgtable->lru);
 	else
-		list_add(&pgtable->lru, &mm->pmd_huge_pte->lru);
-	mm->pmd_huge_pte = pgtable;
+		list_add(&pgtable->lru, &pmd_huge_pte(mm, pmdp)->lru);
+	pmd_huge_pte(mm, pmdp) = pgtable;
 }
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 #endif
@@ -173,11 +173,11 @@ pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
 	assert_spin_locked(&mm->page_table_lock);
 
 	/* FIFO */
-	pgtable = mm->pmd_huge_pte;
+	pgtable = pmd_huge_pte(mm, pmdp);
 	if (list_empty(&pgtable->lru))
-		mm->pmd_huge_pte = NULL;
+		pmd_huge_pte(mm, pmdp) = NULL;
 	else {
-		mm->pmd_huge_pte = list_entry(pgtable->lru.next,
+		pmd_huge_pte(mm, pmdp) = list_entry(pgtable->lru.next,
 					      struct page, lru);
 		list_del(&pgtable->lru);
 	}
-- 
cgit v1.2.3


From cb900f41215447433cbc456d1c4294e858a84d7c Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Thu, 14 Nov 2013 14:31:02 -0800
Subject: mm, hugetlb: convert hugetlbfs to use split pmd lock

Hugetlb supports multiple page sizes. We use split lock only for PMD
level, but not for PUD.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Tested-by: Alex Thorlton <athorlton@sgi.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "Eric W . Biederman" <ebiederm@xmission.com>
Cc: "Paul E . McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Dave Jones <davej@redhat.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Robin Holt <robinmholt@gmail.com>
Cc: Sedat Dilek <sedat.dilek@gmail.com>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/meminfo.c       |   2 +-
 include/linux/hugetlb.h |  26 ++++++++++++
 include/linux/swapops.h |   7 +--
 mm/hugetlb.c            | 110 +++++++++++++++++++++++++++++-------------------
 mm/mempolicy.c          |   5 ++-
 mm/migrate.c            |   7 +--
 mm/rmap.c               |   2 +-
 7 files changed, 105 insertions(+), 54 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index c805d5b69ba1..a77d2b299199 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -1,8 +1,8 @@
 #include <linux/fs.h>
-#include <linux/hugetlb.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
+#include <linux/hugetlb.h>
 #include <linux/mman.h>
 #include <linux/mmzone.h>
 #include <linux/proc_fs.h>
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 0393270466c3..acd2010328f3 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -392,6 +392,15 @@ static inline int hugepage_migration_support(struct hstate *h)
 	return pmd_huge_support() && (huge_page_shift(h) == PMD_SHIFT);
 }
 
+static inline spinlock_t *huge_pte_lockptr(struct hstate *h,
+					   struct mm_struct *mm, pte_t *pte)
+{
+	if (huge_page_size(h) == PMD_SIZE)
+		return pmd_lockptr(mm, (pmd_t *) pte);
+	VM_BUG_ON(huge_page_size(h) == PAGE_SIZE);
+	return &mm->page_table_lock;
+}
+
 #else	/* CONFIG_HUGETLB_PAGE */
 struct hstate {};
 #define alloc_huge_page_node(h, nid) NULL
@@ -401,6 +410,7 @@ struct hstate {};
 #define hstate_sizelog(s) NULL
 #define hstate_vma(v) NULL
 #define hstate_inode(i) NULL
+#define page_hstate(page) NULL
 #define huge_page_size(h) PAGE_SIZE
 #define huge_page_mask(h) PAGE_MASK
 #define vma_kernel_pagesize(v) PAGE_SIZE
@@ -421,6 +431,22 @@ static inline pgoff_t basepage_index(struct page *page)
 #define dissolve_free_huge_pages(s, e)	do {} while (0)
 #define pmd_huge_support()	0
 #define hugepage_migration_support(h)	0
+
+static inline spinlock_t *huge_pte_lockptr(struct hstate *h,
+					   struct mm_struct *mm, pte_t *pte)
+{
+	return &mm->page_table_lock;
+}
 #endif	/* CONFIG_HUGETLB_PAGE */
 
+static inline spinlock_t *huge_pte_lock(struct hstate *h,
+					struct mm_struct *mm, pte_t *pte)
+{
+	spinlock_t *ptl;
+
+	ptl = huge_pte_lockptr(h, mm, pte);
+	spin_lock(ptl);
+	return ptl;
+}
+
 #endif /* _LINUX_HUGETLB_H */
diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index 8d4fa82bfb91..c0f75261a728 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -139,7 +139,8 @@ static inline void make_migration_entry_read(swp_entry_t *entry)
 
 extern void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd,
 					unsigned long address);
-extern void migration_entry_wait_huge(struct mm_struct *mm, pte_t *pte);
+extern void migration_entry_wait_huge(struct vm_area_struct *vma,
+		struct mm_struct *mm, pte_t *pte);
 #else
 
 #define make_migration_entry(page, write) swp_entry(0, 0)
@@ -151,8 +152,8 @@ static inline int is_migration_entry(swp_entry_t swp)
 static inline void make_migration_entry_read(swp_entry_t *entryp) { }
 static inline void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd,
 					 unsigned long address) { }
-static inline void migration_entry_wait_huge(struct mm_struct *mm,
-					pte_t *pte) { }
+static inline void migration_entry_wait_huge(struct vm_area_struct *vma,
+		struct mm_struct *mm, pte_t *pte) { }
 static inline int is_write_migration_entry(swp_entry_t entry)
 {
 	return 0;
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 0b7656e804d1..7d57af21f49e 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2376,6 +2376,7 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
 	cow = (vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
 
 	for (addr = vma->vm_start; addr < vma->vm_end; addr += sz) {
+		spinlock_t *src_ptl, *dst_ptl;
 		src_pte = huge_pte_offset(src, addr);
 		if (!src_pte)
 			continue;
@@ -2387,8 +2388,9 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
 		if (dst_pte == src_pte)
 			continue;
 
-		spin_lock(&dst->page_table_lock);
-		spin_lock_nested(&src->page_table_lock, SINGLE_DEPTH_NESTING);
+		dst_ptl = huge_pte_lock(h, dst, dst_pte);
+		src_ptl = huge_pte_lockptr(h, src, src_pte);
+		spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
 		if (!huge_pte_none(huge_ptep_get(src_pte))) {
 			if (cow)
 				huge_ptep_set_wrprotect(src, addr, src_pte);
@@ -2398,8 +2400,8 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
 			page_dup_rmap(ptepage);
 			set_huge_pte_at(dst, addr, dst_pte, entry);
 		}
-		spin_unlock(&src->page_table_lock);
-		spin_unlock(&dst->page_table_lock);
+		spin_unlock(src_ptl);
+		spin_unlock(dst_ptl);
 	}
 	return 0;
 
@@ -2442,6 +2444,7 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
 	unsigned long address;
 	pte_t *ptep;
 	pte_t pte;
+	spinlock_t *ptl;
 	struct page *page;
 	struct hstate *h = hstate_vma(vma);
 	unsigned long sz = huge_page_size(h);
@@ -2455,25 +2458,25 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
 	tlb_start_vma(tlb, vma);
 	mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
 again:
-	spin_lock(&mm->page_table_lock);
 	for (address = start; address < end; address += sz) {
 		ptep = huge_pte_offset(mm, address);
 		if (!ptep)
 			continue;
 
+		ptl = huge_pte_lock(h, mm, ptep);
 		if (huge_pmd_unshare(mm, &address, ptep))
-			continue;
+			goto unlock;
 
 		pte = huge_ptep_get(ptep);
 		if (huge_pte_none(pte))
-			continue;
+			goto unlock;
 
 		/*
 		 * HWPoisoned hugepage is already unmapped and dropped reference
 		 */
 		if (unlikely(is_hugetlb_entry_hwpoisoned(pte))) {
 			huge_pte_clear(mm, address, ptep);
-			continue;
+			goto unlock;
 		}
 
 		page = pte_page(pte);
@@ -2484,7 +2487,7 @@ again:
 		 */
 		if (ref_page) {
 			if (page != ref_page)
-				continue;
+				goto unlock;
 
 			/*
 			 * Mark the VMA as having unmapped its page so that
@@ -2501,13 +2504,18 @@ again:
 
 		page_remove_rmap(page);
 		force_flush = !__tlb_remove_page(tlb, page);
-		if (force_flush)
+		if (force_flush) {
+			spin_unlock(ptl);
 			break;
+		}
 		/* Bail out after unmapping reference page if supplied */
-		if (ref_page)
+		if (ref_page) {
+			spin_unlock(ptl);
 			break;
+		}
+unlock:
+		spin_unlock(ptl);
 	}
-	spin_unlock(&mm->page_table_lock);
 	/*
 	 * mmu_gather ran out of room to batch pages, we break out of
 	 * the PTE lock to avoid doing the potential expensive TLB invalidate
@@ -2613,7 +2621,7 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
  */
 static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma,
 			unsigned long address, pte_t *ptep, pte_t pte,
-			struct page *pagecache_page)
+			struct page *pagecache_page, spinlock_t *ptl)
 {
 	struct hstate *h = hstate_vma(vma);
 	struct page *old_page, *new_page;
@@ -2647,8 +2655,8 @@ retry_avoidcopy:
 
 	page_cache_get(old_page);
 
-	/* Drop page_table_lock as buddy allocator may be called */
-	spin_unlock(&mm->page_table_lock);
+	/* Drop page table lock as buddy allocator may be called */
+	spin_unlock(ptl);
 	new_page = alloc_huge_page(vma, address, outside_reserve);
 
 	if (IS_ERR(new_page)) {
@@ -2666,13 +2674,13 @@ retry_avoidcopy:
 			BUG_ON(huge_pte_none(pte));
 			if (unmap_ref_private(mm, vma, old_page, address)) {
 				BUG_ON(huge_pte_none(pte));
-				spin_lock(&mm->page_table_lock);
+				spin_lock(ptl);
 				ptep = huge_pte_offset(mm, address & huge_page_mask(h));
 				if (likely(pte_same(huge_ptep_get(ptep), pte)))
 					goto retry_avoidcopy;
 				/*
-				 * race occurs while re-acquiring page_table_lock, and
-				 * our job is done.
+				 * race occurs while re-acquiring page table
+				 * lock, and our job is done.
 				 */
 				return 0;
 			}
@@ -2680,7 +2688,7 @@ retry_avoidcopy:
 		}
 
 		/* Caller expects lock to be held */
-		spin_lock(&mm->page_table_lock);
+		spin_lock(ptl);
 		if (err == -ENOMEM)
 			return VM_FAULT_OOM;
 		else
@@ -2695,7 +2703,7 @@ retry_avoidcopy:
 		page_cache_release(new_page);
 		page_cache_release(old_page);
 		/* Caller expects lock to be held */
-		spin_lock(&mm->page_table_lock);
+		spin_lock(ptl);
 		return VM_FAULT_OOM;
 	}
 
@@ -2707,10 +2715,10 @@ retry_avoidcopy:
 	mmun_end = mmun_start + huge_page_size(h);
 	mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
 	/*
-	 * Retake the page_table_lock to check for racing updates
+	 * Retake the page table lock to check for racing updates
 	 * before the page tables are altered
 	 */
-	spin_lock(&mm->page_table_lock);
+	spin_lock(ptl);
 	ptep = huge_pte_offset(mm, address & huge_page_mask(h));
 	if (likely(pte_same(huge_ptep_get(ptep), pte))) {
 		ClearPagePrivate(new_page);
@@ -2724,13 +2732,13 @@ retry_avoidcopy:
 		/* Make the old page be freed below */
 		new_page = old_page;
 	}
-	spin_unlock(&mm->page_table_lock);
+	spin_unlock(ptl);
 	mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
 	page_cache_release(new_page);
 	page_cache_release(old_page);
 
 	/* Caller expects lock to be held */
-	spin_lock(&mm->page_table_lock);
+	spin_lock(ptl);
 	return 0;
 }
 
@@ -2778,6 +2786,7 @@ static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	struct page *page;
 	struct address_space *mapping;
 	pte_t new_pte;
+	spinlock_t *ptl;
 
 	/*
 	 * Currently, we are forced to kill the process in the event the
@@ -2864,7 +2873,8 @@ retry:
 			goto backout_unlocked;
 		}
 
-	spin_lock(&mm->page_table_lock);
+	ptl = huge_pte_lockptr(h, mm, ptep);
+	spin_lock(ptl);
 	size = i_size_read(mapping->host) >> huge_page_shift(h);
 	if (idx >= size)
 		goto backout;
@@ -2885,16 +2895,16 @@ retry:
 
 	if ((flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED)) {
 		/* Optimization, do the COW without a second fault */
-		ret = hugetlb_cow(mm, vma, address, ptep, new_pte, page);
+		ret = hugetlb_cow(mm, vma, address, ptep, new_pte, page, ptl);
 	}
 
-	spin_unlock(&mm->page_table_lock);
+	spin_unlock(ptl);
 	unlock_page(page);
 out:
 	return ret;
 
 backout:
-	spin_unlock(&mm->page_table_lock);
+	spin_unlock(ptl);
 backout_unlocked:
 	unlock_page(page);
 	put_page(page);
@@ -2906,6 +2916,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 {
 	pte_t *ptep;
 	pte_t entry;
+	spinlock_t *ptl;
 	int ret;
 	struct page *page = NULL;
 	struct page *pagecache_page = NULL;
@@ -2918,7 +2929,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 	if (ptep) {
 		entry = huge_ptep_get(ptep);
 		if (unlikely(is_hugetlb_entry_migration(entry))) {
-			migration_entry_wait_huge(mm, ptep);
+			migration_entry_wait_huge(vma, mm, ptep);
 			return 0;
 		} else if (unlikely(is_hugetlb_entry_hwpoisoned(entry)))
 			return VM_FAULT_HWPOISON_LARGE |
@@ -2974,17 +2985,18 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 	if (page != pagecache_page)
 		lock_page(page);
 
-	spin_lock(&mm->page_table_lock);
+	ptl = huge_pte_lockptr(h, mm, ptep);
+	spin_lock(ptl);
 	/* Check for a racing update before calling hugetlb_cow */
 	if (unlikely(!pte_same(entry, huge_ptep_get(ptep))))
-		goto out_page_table_lock;
+		goto out_ptl;
 
 
 	if (flags & FAULT_FLAG_WRITE) {
 		if (!huge_pte_write(entry)) {
 			ret = hugetlb_cow(mm, vma, address, ptep, entry,
-							pagecache_page);
-			goto out_page_table_lock;
+					pagecache_page, ptl);
+			goto out_ptl;
 		}
 		entry = huge_pte_mkdirty(entry);
 	}
@@ -2993,8 +3005,8 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 						flags & FAULT_FLAG_WRITE))
 		update_mmu_cache(vma, address, ptep);
 
-out_page_table_lock:
-	spin_unlock(&mm->page_table_lock);
+out_ptl:
+	spin_unlock(ptl);
 
 	if (pagecache_page) {
 		unlock_page(pagecache_page);
@@ -3020,9 +3032,9 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	unsigned long remainder = *nr_pages;
 	struct hstate *h = hstate_vma(vma);
 
-	spin_lock(&mm->page_table_lock);
 	while (vaddr < vma->vm_end && remainder) {
 		pte_t *pte;
+		spinlock_t *ptl = NULL;
 		int absent;
 		struct page *page;
 
@@ -3030,8 +3042,12 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		 * Some archs (sparc64, sh*) have multiple pte_ts to
 		 * each hugepage.  We have to make sure we get the
 		 * first, for the page indexing below to work.
+		 *
+		 * Note that page table lock is not held when pte is null.
 		 */
 		pte = huge_pte_offset(mm, vaddr & huge_page_mask(h));
+		if (pte)
+			ptl = huge_pte_lock(h, mm, pte);
 		absent = !pte || huge_pte_none(huge_ptep_get(pte));
 
 		/*
@@ -3043,6 +3059,8 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		 */
 		if (absent && (flags & FOLL_DUMP) &&
 		    !hugetlbfs_pagecache_present(h, vma, vaddr)) {
+			if (pte)
+				spin_unlock(ptl);
 			remainder = 0;
 			break;
 		}
@@ -3062,10 +3080,10 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		      !huge_pte_write(huge_ptep_get(pte)))) {
 			int ret;
 
-			spin_unlock(&mm->page_table_lock);
+			if (pte)
+				spin_unlock(ptl);
 			ret = hugetlb_fault(mm, vma, vaddr,
 				(flags & FOLL_WRITE) ? FAULT_FLAG_WRITE : 0);
-			spin_lock(&mm->page_table_lock);
 			if (!(ret & VM_FAULT_ERROR))
 				continue;
 
@@ -3096,8 +3114,8 @@ same_page:
 			 */
 			goto same_page;
 		}
+		spin_unlock(ptl);
 	}
-	spin_unlock(&mm->page_table_lock);
 	*nr_pages = remainder;
 	*position = vaddr;
 
@@ -3118,13 +3136,15 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
 	flush_cache_range(vma, address, end);
 
 	mutex_lock(&vma->vm_file->f_mapping->i_mmap_mutex);
-	spin_lock(&mm->page_table_lock);
 	for (; address < end; address += huge_page_size(h)) {
+		spinlock_t *ptl;
 		ptep = huge_pte_offset(mm, address);
 		if (!ptep)
 			continue;
+		ptl = huge_pte_lock(h, mm, ptep);
 		if (huge_pmd_unshare(mm, &address, ptep)) {
 			pages++;
+			spin_unlock(ptl);
 			continue;
 		}
 		if (!huge_pte_none(huge_ptep_get(ptep))) {
@@ -3134,8 +3154,8 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
 			set_huge_pte_at(mm, address, ptep, pte);
 			pages++;
 		}
+		spin_unlock(ptl);
 	}
-	spin_unlock(&mm->page_table_lock);
 	/*
 	 * Must flush TLB before releasing i_mmap_mutex: x86's huge_pmd_unshare
 	 * may have cleared our pud entry and done put_page on the page table:
@@ -3298,6 +3318,7 @@ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
 	unsigned long saddr;
 	pte_t *spte = NULL;
 	pte_t *pte;
+	spinlock_t *ptl;
 
 	if (!vma_shareable(vma, addr))
 		return (pte_t *)pmd_alloc(mm, pud, addr);
@@ -3320,13 +3341,14 @@ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
 	if (!spte)
 		goto out;
 
-	spin_lock(&mm->page_table_lock);
+	ptl = huge_pte_lockptr(hstate_vma(vma), mm, spte);
+	spin_lock(ptl);
 	if (pud_none(*pud))
 		pud_populate(mm, pud,
 				(pmd_t *)((unsigned long)spte & PAGE_MASK));
 	else
 		put_page(virt_to_page(spte));
-	spin_unlock(&mm->page_table_lock);
+	spin_unlock(ptl);
 out:
 	pte = (pte_t *)pmd_alloc(mm, pud, addr);
 	mutex_unlock(&mapping->i_mmap_mutex);
@@ -3340,7 +3362,7 @@ out:
  * indicated by page_count > 1, unmap is achieved by clearing pud and
  * decrementing the ref count. If count == 1, the pte page is not shared.
  *
- * called with vma->vm_mm->page_table_lock held.
+ * called with page table lock held.
  *
  * returns: 1 successfully unmapped a shared pte page
  *	    0 the underlying pte page is not shared, or it is the last user
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 4cc19f6ab6c6..c4403cdf3433 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -525,8 +525,9 @@ static void queue_pages_hugetlb_pmd_range(struct vm_area_struct *vma,
 #ifdef CONFIG_HUGETLB_PAGE
 	int nid;
 	struct page *page;
+	spinlock_t *ptl;
 
-	spin_lock(&vma->vm_mm->page_table_lock);
+	ptl = huge_pte_lock(hstate_vma(vma), vma->vm_mm, (pte_t *)pmd);
 	page = pte_page(huge_ptep_get((pte_t *)pmd));
 	nid = page_to_nid(page);
 	if (node_isset(nid, *nodes) == !!(flags & MPOL_MF_INVERT))
@@ -536,7 +537,7 @@ static void queue_pages_hugetlb_pmd_range(struct vm_area_struct *vma,
 	    (flags & MPOL_MF_MOVE && page_mapcount(page) == 1))
 		isolate_huge_page(page, private);
 unlock:
-	spin_unlock(&vma->vm_mm->page_table_lock);
+	spin_unlock(ptl);
 #else
 	BUG();
 #endif
diff --git a/mm/migrate.c b/mm/migrate.c
index dfc8300ecbb2..01f45cefa4cd 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -130,7 +130,7 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma,
 		ptep = huge_pte_offset(mm, addr);
 		if (!ptep)
 			goto out;
-		ptl = &mm->page_table_lock;
+		ptl = huge_pte_lockptr(hstate_vma(vma), mm, ptep);
 	} else {
 		pmd = mm_find_pmd(mm, addr);
 		if (!pmd)
@@ -249,9 +249,10 @@ void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd,
 	__migration_entry_wait(mm, ptep, ptl);
 }
 
-void migration_entry_wait_huge(struct mm_struct *mm, pte_t *pte)
+void migration_entry_wait_huge(struct vm_area_struct *vma,
+		struct mm_struct *mm, pte_t *pte)
 {
-	spinlock_t *ptl = &(mm)->page_table_lock;
+	spinlock_t *ptl = huge_pte_lockptr(hstate_vma(vma), mm, pte);
 	__migration_entry_wait(mm, pte, ptl);
 }
 
diff --git a/mm/rmap.c b/mm/rmap.c
index b59d741dcf65..55c8b8dc9ffb 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -601,7 +601,7 @@ pte_t *__page_check_address(struct page *page, struct mm_struct *mm,
 
 	if (unlikely(PageHuge(page))) {
 		pte = huge_pte_offset(mm, address);
-		ptl = &mm->page_table_lock;
+		ptl = huge_pte_lockptr(page_hstate(page), mm, pte);
 		goto check;
 	}
 
-- 
cgit v1.2.3


From e009bb30c8df8a52a9622b616b67436b6a03a0cd Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Thu, 14 Nov 2013 14:31:07 -0800
Subject: mm: implement split page table lock for PMD level

The basic idea is the same as with PTE level: the lock is embedded into
struct page of table's page.

We can't use mm->pmd_huge_pte to store pgtables for THP, since we don't
take mm->page_table_lock anymore.  Let's reuse page->lru of table's page
for that.

pgtable_pmd_page_ctor() returns true, if initialization is successful
and false otherwise.  Current implementation never fails, but assumption
that constructor can fail will help to port it to -rt where spinlock_t
is rather huge and cannot be embedded into struct page -- dynamic
allocation is required.

Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Tested-by: Alex Thorlton <athorlton@sgi.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "Eric W . Biederman" <ebiederm@xmission.com>
Cc: "Paul E . McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Dave Jones <davej@redhat.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Robin Holt <robinmholt@gmail.com>
Cc: Sedat Dilek <sedat.dilek@gmail.com>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Hugh Dickins <hughd@google.com>
Reviewed-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h       | 32 ++++++++++++++++++++++++++++++++
 include/linux/mm_types.h |  7 ++++++-
 kernel/fork.c            |  4 ++--
 mm/Kconfig               |  3 +++
 4 files changed, 43 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 861cad53b744..255750d9b1be 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1378,13 +1378,45 @@ static inline void pgtable_page_dtor(struct page *page)
 	((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd, address))? \
 		NULL: pte_offset_kernel(pmd, address))
 
+#if USE_SPLIT_PMD_PTLOCKS
+
+static inline spinlock_t *pmd_lockptr(struct mm_struct *mm, pmd_t *pmd)
+{
+	return &virt_to_page(pmd)->ptl;
+}
+
+static inline bool pgtable_pmd_page_ctor(struct page *page)
+{
+	spin_lock_init(&page->ptl);
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+	page->pmd_huge_pte = NULL;
+#endif
+	return true;
+}
+
+static inline void pgtable_pmd_page_dtor(struct page *page)
+{
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+	VM_BUG_ON(page->pmd_huge_pte);
+#endif
+}
+
+#define pmd_huge_pte(mm, pmd) (virt_to_page(pmd)->pmd_huge_pte)
+
+#else
+
 static inline spinlock_t *pmd_lockptr(struct mm_struct *mm, pmd_t *pmd)
 {
 	return &mm->page_table_lock;
 }
 
+static inline bool pgtable_pmd_page_ctor(struct page *page) { return true; }
+static inline void pgtable_pmd_page_dtor(struct page *page) {}
+
 #define pmd_huge_pte(mm, pmd) ((mm)->pmd_huge_pte)
 
+#endif
+
 static inline spinlock_t *pmd_lock(struct mm_struct *mm, pmd_t *pmd)
 {
 	spinlock_t *ptl = pmd_lockptr(mm, pmd);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 566df579c51f..934261099975 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -24,6 +24,8 @@
 struct address_space;
 
 #define USE_SPLIT_PTE_PTLOCKS	(NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS)
+#define USE_SPLIT_PMD_PTLOCKS	(USE_SPLIT_PTE_PTLOCKS && \
+		IS_ENABLED(CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK))
 
 /*
  * Each physical page in the system has a struct page associated with
@@ -63,6 +65,9 @@ struct page {
 						 * this page is only used to
 						 * free other pages.
 						 */
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && USE_SPLIT_PMD_PTLOCKS
+		pgtable_t pmd_huge_pte; /* protected by page->ptl */
+#endif
 		};
 
 		union {
@@ -406,7 +411,7 @@ struct mm_struct {
 #ifdef CONFIG_MMU_NOTIFIER
 	struct mmu_notifier_mm *mmu_notifier_mm;
 #endif
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
 	pgtable_t pmd_huge_pte; /* protected by page_table_lock */
 #endif
 #ifdef CONFIG_CPUMASK_OFFSTACK
diff --git a/kernel/fork.c b/kernel/fork.c
index e2520756e005..728d5be9548c 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -560,7 +560,7 @@ static void check_mm(struct mm_struct *mm)
 					  "mm:%p idx:%d val:%ld\n", mm, i, x);
 	}
 
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
 	VM_BUG_ON(mm->pmd_huge_pte);
 #endif
 }
@@ -814,7 +814,7 @@ struct mm_struct *dup_mm(struct task_struct *tsk)
 	memcpy(mm, oldmm, sizeof(*mm));
 	mm_init_cpumask(mm);
 
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
 	mm->pmd_huge_pte = NULL;
 #endif
 	if (!mm_init(mm, tsk))
diff --git a/mm/Kconfig b/mm/Kconfig
index c28d247e524a..7aa02def4666 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -222,6 +222,9 @@ config SPLIT_PTLOCK_CPUS
 	default "999999" if !64BIT && GENERIC_LOCKBREAK
 	default "4"
 
+config ARCH_ENABLE_SPLIT_PMD_PTLOCK
+	boolean
+
 #
 # support for memory balloon compaction
 config BALLOON_COMPACTION
-- 
cgit v1.2.3


From 390f44e2aa2ab83f08231d7d05f066dc3494490e Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Thu, 14 Nov 2013 14:31:20 -0800
Subject: mm: allow pgtable_page_ctor() to fail

Change pgtable_page_ctor() return type from void to bool.  Returns true,
if initialization is successful and false otherwise.

Current implementation never fails, but it will change later.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 255750d9b1be..e855351c3361 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1338,10 +1338,11 @@ static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long a
 #define pte_lockptr(mm, pmd)	({(void)(pmd); &(mm)->page_table_lock;})
 #endif /* USE_SPLIT_PTE_PTLOCKS */
 
-static inline void pgtable_page_ctor(struct page *page)
+static inline bool pgtable_page_ctor(struct page *page)
 {
 	pte_lock_init(page);
 	inc_zone_page_state(page, NR_PAGETABLE);
+	return true;
 }
 
 static inline void pgtable_page_dtor(struct page *page)
-- 
cgit v1.2.3


From 49076ec2ccaf68610aa03d96bced9a6694b93ca1 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Thu, 14 Nov 2013 14:31:51 -0800
Subject: mm: dynamically allocate page->ptl if it cannot be embedded to struct
 page

If split page table lock is in use, we embed the lock into struct page
of table's page.  We have to disable split lock, if spinlock_t is too
big be to be embedded, like when DEBUG_SPINLOCK or DEBUG_LOCK_ALLOC
enabled.

This patch add support for dynamic allocation of split page table lock
if we can't embed it to struct page.

page->ptl is unsigned long now and we use it as spinlock_t if
sizeof(spinlock_t) <= sizeof(long), otherwise it's pointer to spinlock_t.

The spinlock_t allocated in pgtable_page_ctor() for PTE table and in
pgtable_pmd_page_ctor() for PMD table.  All other helpers converted to
support dynamically allocated page->ptl.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Reviewed-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/vm/split_page_table_lock | 94 ++++++++++++++++++++++++++++++++++
 arch/x86/xen/mmu.c                     |  2 +-
 include/linux/mm.h                     | 81 +++++++++++++++++++++--------
 include/linux/mm_types.h               |  5 +-
 mm/Kconfig                             |  2 -
 mm/memory.c                            | 19 +++++++
 6 files changed, 179 insertions(+), 24 deletions(-)
 create mode 100644 Documentation/vm/split_page_table_lock

(limited to 'include/linux')

diff --git a/Documentation/vm/split_page_table_lock b/Documentation/vm/split_page_table_lock
new file mode 100644
index 000000000000..7521d367f21d
--- /dev/null
+++ b/Documentation/vm/split_page_table_lock
@@ -0,0 +1,94 @@
+Split page table lock
+=====================
+
+Originally, mm->page_table_lock spinlock protected all page tables of the
+mm_struct. But this approach leads to poor page fault scalability of
+multi-threaded applications due high contention on the lock. To improve
+scalability, split page table lock was introduced.
+
+With split page table lock we have separate per-table lock to serialize
+access to the table. At the moment we use split lock for PTE and PMD
+tables. Access to higher level tables protected by mm->page_table_lock.
+
+There are helpers to lock/unlock a table and other accessor functions:
+ - pte_offset_map_lock()
+	maps pte and takes PTE table lock, returns pointer to the taken
+	lock;
+ - pte_unmap_unlock()
+	unlocks and unmaps PTE table;
+ - pte_alloc_map_lock()
+	allocates PTE table if needed and take the lock, returns pointer
+	to taken lock or NULL if allocation failed;
+ - pte_lockptr()
+	returns pointer to PTE table lock;
+ - pmd_lock()
+	takes PMD table lock, returns pointer to taken lock;
+ - pmd_lockptr()
+	returns pointer to PMD table lock;
+
+Split page table lock for PTE tables is enabled compile-time if
+CONFIG_SPLIT_PTLOCK_CPUS (usually 4) is less or equal to NR_CPUS.
+If split lock is disabled, all tables guaded by mm->page_table_lock.
+
+Split page table lock for PMD tables is enabled, if it's enabled for PTE
+tables and the architecture supports it (see below).
+
+Hugetlb and split page table lock
+---------------------------------
+
+Hugetlb can support several page sizes. We use split lock only for PMD
+level, but not for PUD.
+
+Hugetlb-specific helpers:
+ - huge_pte_lock()
+	takes pmd split lock for PMD_SIZE page, mm->page_table_lock
+	otherwise;
+ - huge_pte_lockptr()
+	returns pointer to table lock;
+
+Support of split page table lock by an architecture
+---------------------------------------------------
+
+There's no need in special enabling of PTE split page table lock:
+everything required is done by pgtable_page_ctor() and pgtable_page_dtor(),
+which must be called on PTE table allocation / freeing.
+
+Make sure the architecture doesn't use slab allocator for page table
+allocation: slab uses page->slab_cache and page->first_page for its pages.
+These fields share storage with page->ptl.
+
+PMD split lock only makes sense if you have more than two page table
+levels.
+
+PMD split lock enabling requires pgtable_pmd_page_ctor() call on PMD table
+allocation and pgtable_pmd_page_dtor() on freeing.
+
+Allocation usually happens in pmd_alloc_one(), freeing in pmd_free(), but
+make sure you cover all PMD table allocation / freeing paths: i.e X86_PAE
+preallocate few PMDs on pgd_alloc().
+
+With everything in place you can set CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK.
+
+NOTE: pgtable_page_ctor() and pgtable_pmd_page_ctor() can fail -- it must
+be handled properly.
+
+page->ptl
+---------
+
+page->ptl is used to access split page table lock, where 'page' is struct
+page of page containing the table. It shares storage with page->private
+(and few other fields in union).
+
+To avoid increasing size of struct page and have best performance, we use a
+trick:
+ - if spinlock_t fits into long, we use page->ptr as spinlock, so we
+   can avoid indirect access and save a cache line.
+ - if size of spinlock_t is bigger then size of long, we use page->ptl as
+   pointer to spinlock_t and allocate it dynamically. This allows to use
+   split lock with enabled DEBUG_SPINLOCK or DEBUG_LOCK_ALLOC, but costs
+   one more cache line for indirect access;
+
+The spinlock_t allocated in pgtable_page_ctor() for PTE table and in
+pgtable_pmd_page_ctor() for PMD table.
+
+Please, never access page->ptl directly -- use appropriate helper.
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 455c873ce009..49c962fe7e62 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -797,7 +797,7 @@ static spinlock_t *xen_pte_lock(struct page *page, struct mm_struct *mm)
 	spinlock_t *ptl = NULL;
 
 #if USE_SPLIT_PTE_PTLOCKS
-	ptl = __pte_lockptr(page);
+	ptl = ptlock_ptr(page);
 	spin_lock_nest_lock(ptl, &mm->page_table_lock);
 #endif
 
diff --git a/include/linux/mm.h b/include/linux/mm.h
index e855351c3361..d0339741b6ce 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1317,32 +1317,73 @@ static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long a
 #endif /* CONFIG_MMU && !__ARCH_HAS_4LEVEL_HACK */
 
 #if USE_SPLIT_PTE_PTLOCKS
-/*
- * We tuck a spinlock to guard each pagetable page into its struct page,
- * at page->private, with BUILD_BUG_ON to make sure that this will not
- * overflow into the next struct page (as it might with DEBUG_SPINLOCK).
- * When freeing, reset page->mapping so free_pages_check won't complain.
- */
-#define __pte_lockptr(page)	&((page)->ptl)
-#define pte_lock_init(_page)	do {					\
-	spin_lock_init(__pte_lockptr(_page));				\
-} while (0)
-#define pte_lock_deinit(page)	((page)->mapping = NULL)
-#define pte_lockptr(mm, pmd)	({(void)(mm); __pte_lockptr(pmd_page(*(pmd)));})
+bool __ptlock_alloc(struct page *page);
+void __ptlock_free(struct page *page);
+static inline bool ptlock_alloc(struct page *page)
+{
+	if (sizeof(spinlock_t) > sizeof(page->ptl))
+		return __ptlock_alloc(page);
+	return true;
+}
+static inline void ptlock_free(struct page *page)
+{
+	if (sizeof(spinlock_t) > sizeof(page->ptl))
+		__ptlock_free(page);
+}
+
+static inline spinlock_t *ptlock_ptr(struct page *page)
+{
+	if (sizeof(spinlock_t) > sizeof(page->ptl))
+		return (spinlock_t *) page->ptl;
+	else
+		return (spinlock_t *) &page->ptl;
+}
+
+static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd)
+{
+	return ptlock_ptr(pmd_page(*pmd));
+}
+
+static inline bool ptlock_init(struct page *page)
+{
+	/*
+	 * prep_new_page() initialize page->private (and therefore page->ptl)
+	 * with 0. Make sure nobody took it in use in between.
+	 *
+	 * It can happen if arch try to use slab for page table allocation:
+	 * slab code uses page->slab_cache and page->first_page (for tail
+	 * pages), which share storage with page->ptl.
+	 */
+	VM_BUG_ON(page->ptl);
+	if (!ptlock_alloc(page))
+		return false;
+	spin_lock_init(ptlock_ptr(page));
+	return true;
+}
+
+/* Reset page->mapping so free_pages_check won't complain. */
+static inline void pte_lock_deinit(struct page *page)
+{
+	page->mapping = NULL;
+	ptlock_free(page);
+}
+
 #else	/* !USE_SPLIT_PTE_PTLOCKS */
 /*
  * We use mm->page_table_lock to guard all pagetable pages of the mm.
  */
-#define pte_lock_init(page)	do {} while (0)
-#define pte_lock_deinit(page)	do {} while (0)
-#define pte_lockptr(mm, pmd)	({(void)(pmd); &(mm)->page_table_lock;})
+static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd)
+{
+	return &mm->page_table_lock;
+}
+static inline bool ptlock_init(struct page *page) { return true; }
+static inline void pte_lock_deinit(struct page *page) {}
 #endif /* USE_SPLIT_PTE_PTLOCKS */
 
 static inline bool pgtable_page_ctor(struct page *page)
 {
-	pte_lock_init(page);
 	inc_zone_page_state(page, NR_PAGETABLE);
-	return true;
+	return ptlock_init(page);
 }
 
 static inline void pgtable_page_dtor(struct page *page)
@@ -1383,16 +1424,15 @@ static inline void pgtable_page_dtor(struct page *page)
 
 static inline spinlock_t *pmd_lockptr(struct mm_struct *mm, pmd_t *pmd)
 {
-	return &virt_to_page(pmd)->ptl;
+	return ptlock_ptr(virt_to_page(pmd));
 }
 
 static inline bool pgtable_pmd_page_ctor(struct page *page)
 {
-	spin_lock_init(&page->ptl);
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 	page->pmd_huge_pte = NULL;
 #endif
-	return true;
+	return ptlock_init(page);
 }
 
 static inline void pgtable_pmd_page_dtor(struct page *page)
@@ -1400,6 +1440,7 @@ static inline void pgtable_pmd_page_dtor(struct page *page)
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 	VM_BUG_ON(page->pmd_huge_pte);
 #endif
+	ptlock_free(page);
 }
 
 #define pmd_huge_pte(mm, pmd) (virt_to_page(pmd)->pmd_huge_pte)
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 934261099975..423da79877e6 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -147,7 +147,10 @@ struct page {
 						 * system if PG_buddy is set.
 						 */
 #if USE_SPLIT_PTE_PTLOCKS
-		spinlock_t ptl;
+		unsigned long ptl; /* It's spinlock_t if it fits to long,
+				    * otherwise it's pointer to dynamicaly
+				    * allocated spinlock_t.
+				    */
 #endif
 		struct kmem_cache *slab_cache;	/* SL[AU]B: Pointer to slab */
 		struct page *first_page;	/* Compound tail pages */
diff --git a/mm/Kconfig b/mm/Kconfig
index 7aa02def4666..de31af256207 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -218,8 +218,6 @@ config SPLIT_PTLOCK_CPUS
 	int
 	default "999999" if ARM && !CPU_CACHE_VIPT
 	default "999999" if PARISC && !PA20
-	default "999999" if DEBUG_SPINLOCK || DEBUG_LOCK_ALLOC
-	default "999999" if !64BIT && GENERIC_LOCKBREAK
 	default "4"
 
 config ARCH_ENABLE_SPLIT_PMD_PTLOCK
diff --git a/mm/memory.c b/mm/memory.c
index d05c6b10e926..24ffae2a530e 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4270,3 +4270,22 @@ void copy_user_huge_page(struct page *dst, struct page *src,
 	}
 }
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */
+
+#if USE_SPLIT_PTE_PTLOCKS
+bool __ptlock_alloc(struct page *page)
+{
+	spinlock_t *ptl;
+
+	ptl = kmalloc(sizeof(spinlock_t), GFP_KERNEL);
+	if (!ptl)
+		return false;
+	page->ptl = (unsigned long)ptl;
+	return true;
+}
+
+void __ptlock_free(struct page *page)
+{
+	if (sizeof(spinlock_t) > sizeof(page->ptl))
+		kfree((spinlock_t *)page->ptl);
+}
+#endif
-- 
cgit v1.2.3


From 539edb5846c740d78a8b6c2e43a99ca4323df68f Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 14 Nov 2013 14:31:52 -0800
Subject: mm: properly separate the bloated ptl from the regular case

Use kernel/bounds.c to convert build-time spinlock_t size check into a
preprocessor symbol and apply that to properly separate the page::ptl
situation.

Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h       | 24 +++++++++++++-----------
 include/linux/mm_types.h |  9 +++++----
 kernel/bounds.c          |  2 ++
 mm/memory.c              | 11 +++++------
 4 files changed, 25 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index d0339741b6ce..1cedd000cf29 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1317,27 +1317,29 @@ static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long a
 #endif /* CONFIG_MMU && !__ARCH_HAS_4LEVEL_HACK */
 
 #if USE_SPLIT_PTE_PTLOCKS
-bool __ptlock_alloc(struct page *page);
-void __ptlock_free(struct page *page);
+#if BLOATED_SPINLOCKS
+extern bool ptlock_alloc(struct page *page);
+extern void ptlock_free(struct page *page);
+
+static inline spinlock_t *ptlock_ptr(struct page *page)
+{
+	return page->ptl;
+}
+#else /* BLOATED_SPINLOCKS */
 static inline bool ptlock_alloc(struct page *page)
 {
-	if (sizeof(spinlock_t) > sizeof(page->ptl))
-		return __ptlock_alloc(page);
 	return true;
 }
+
 static inline void ptlock_free(struct page *page)
 {
-	if (sizeof(spinlock_t) > sizeof(page->ptl))
-		__ptlock_free(page);
 }
 
 static inline spinlock_t *ptlock_ptr(struct page *page)
 {
-	if (sizeof(spinlock_t) > sizeof(page->ptl))
-		return (spinlock_t *) page->ptl;
-	else
-		return (spinlock_t *) &page->ptl;
+	return &page->ptl;
 }
+#endif /* BLOATED_SPINLOCKS */
 
 static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd)
 {
@@ -1354,7 +1356,7 @@ static inline bool ptlock_init(struct page *page)
 	 * slab code uses page->slab_cache and page->first_page (for tail
 	 * pages), which share storage with page->ptl.
 	 */
-	VM_BUG_ON(page->ptl);
+	VM_BUG_ON(*(unsigned long *)&page->ptl);
 	if (!ptlock_alloc(page))
 		return false;
 	spin_lock_init(ptlock_ptr(page));
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 423da79877e6..10f5a7272b80 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -147,10 +147,11 @@ struct page {
 						 * system if PG_buddy is set.
 						 */
 #if USE_SPLIT_PTE_PTLOCKS
-		unsigned long ptl; /* It's spinlock_t if it fits to long,
-				    * otherwise it's pointer to dynamicaly
-				    * allocated spinlock_t.
-				    */
+#if BLOATED_SPINLOCKS
+		spinlock_t *ptl;
+#else
+		spinlock_t ptl;
+#endif
 #endif
 		struct kmem_cache *slab_cache;	/* SL[AU]B: Pointer to slab */
 		struct page *first_page;	/* Compound tail pages */
diff --git a/kernel/bounds.c b/kernel/bounds.c
index e8ca97b5c386..578782ef6ae1 100644
--- a/kernel/bounds.c
+++ b/kernel/bounds.c
@@ -11,6 +11,7 @@
 #include <linux/kbuild.h>
 #include <linux/page_cgroup.h>
 #include <linux/log2.h>
+#include <linux/spinlock.h>
 
 void foo(void)
 {
@@ -21,5 +22,6 @@ void foo(void)
 #ifdef CONFIG_SMP
 	DEFINE(NR_CPUS_BITS, ilog2(CONFIG_NR_CPUS));
 #endif
+	DEFINE(BLOATED_SPINLOCKS, sizeof(spinlock_t) > sizeof(int));
 	/* End of constants */
 }
diff --git a/mm/memory.c b/mm/memory.c
index 24ffae2a530e..5d9025f3b3e1 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4271,21 +4271,20 @@ void copy_user_huge_page(struct page *dst, struct page *src,
 }
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */
 
-#if USE_SPLIT_PTE_PTLOCKS
-bool __ptlock_alloc(struct page *page)
+#if USE_SPLIT_PTE_PTLOCKS && BLOATED_SPINLOCKS
+bool ptlock_alloc(struct page *page)
 {
 	spinlock_t *ptl;
 
 	ptl = kmalloc(sizeof(spinlock_t), GFP_KERNEL);
 	if (!ptl)
 		return false;
-	page->ptl = (unsigned long)ptl;
+	page->ptl = ptl;
 	return true;
 }
 
-void __ptlock_free(struct page *page)
+void ptlock_free(struct page *page)
 {
-	if (sizeof(spinlock_t) > sizeof(page->ptl))
-		kfree((spinlock_t *)page->ptl);
+	kfree(page->ptl);
 }
 #endif
-- 
cgit v1.2.3


From ea1e7ed33708c7a760419ff9ded0a6cb90586a50 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Thu, 14 Nov 2013 14:31:53 -0800
Subject: mm: create a separate slab for page->ptl allocation

If DEBUG_SPINLOCK and DEBUG_LOCK_ALLOC are enabled spinlock_t on x86_64
is 72 bytes.  For page->ptl they will be allocated from kmalloc-96 slab,
so we loose 24 on each.  An average system can easily allocate few tens
thousands of page->ptl and overhead is significant.

Let's create a separate slab for page->ptl allocation to solve this.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 9 +++++++++
 init/main.c        | 2 +-
 mm/memory.c        | 7 +++++++
 3 files changed, 17 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 1cedd000cf29..0548eb201e05 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1318,6 +1318,7 @@ static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long a
 
 #if USE_SPLIT_PTE_PTLOCKS
 #if BLOATED_SPINLOCKS
+void __init ptlock_cache_init(void);
 extern bool ptlock_alloc(struct page *page);
 extern void ptlock_free(struct page *page);
 
@@ -1326,6 +1327,7 @@ static inline spinlock_t *ptlock_ptr(struct page *page)
 	return page->ptl;
 }
 #else /* BLOATED_SPINLOCKS */
+static inline void ptlock_cache_init(void) {}
 static inline bool ptlock_alloc(struct page *page)
 {
 	return true;
@@ -1378,10 +1380,17 @@ static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd)
 {
 	return &mm->page_table_lock;
 }
+static inline void ptlock_cache_init(void) {}
 static inline bool ptlock_init(struct page *page) { return true; }
 static inline void pte_lock_deinit(struct page *page) {}
 #endif /* USE_SPLIT_PTE_PTLOCKS */
 
+static inline void pgtable_init(void)
+{
+	ptlock_cache_init();
+	pgtable_cache_init();
+}
+
 static inline bool pgtable_page_ctor(struct page *page)
 {
 	inc_zone_page_state(page, NR_PAGETABLE);
diff --git a/init/main.c b/init/main.c
index 6ad1a533a8c7..5f191133376f 100644
--- a/init/main.c
+++ b/init/main.c
@@ -473,7 +473,7 @@ static void __init mm_init(void)
 	mem_init();
 	kmem_cache_init();
 	percpu_init_late();
-	pgtable_cache_init();
+	pgtable_init();
 	vmalloc_init();
 }
 
diff --git a/mm/memory.c b/mm/memory.c
index 5d9025f3b3e1..0409e8f43fa0 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4272,6 +4272,13 @@ void copy_user_huge_page(struct page *dst, struct page *src,
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */
 
 #if USE_SPLIT_PTE_PTLOCKS && BLOATED_SPINLOCKS
+static struct kmem_cache *page_ptl_cachep;
+void __init ptlock_cache_init(void)
+{
+	page_ptl_cachep = kmem_cache_create("page->ptl", sizeof(spinlock_t), 0,
+			SLAB_PANIC, NULL);
+}
+
 bool ptlock_alloc(struct page *page)
 {
 	spinlock_t *ptl;
-- 
cgit v1.2.3


From 57f4257eae33e036125973858934730250d464e3 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 14 Nov 2013 14:31:54 -0800
Subject: lockref: use BLOATED_SPINLOCKS to avoid explicit config dependencies

Avoid the fragile Kconfig construct guestimating spinlock_t sizes; use a
friendly compile-time test to determine this.

[kirill.shutemov@linux.intel.com: drop CONFIG_CMPXCHG_LOCKREF]
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/lockref.h | 7 ++++++-
 lib/Kconfig             | 7 -------
 lib/lockref.c           | 2 +-
 3 files changed, 7 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/lockref.h b/include/linux/lockref.h
index 13dfd36a3294..c8929c3832db 100644
--- a/include/linux/lockref.h
+++ b/include/linux/lockref.h
@@ -15,10 +15,15 @@
  */
 
 #include <linux/spinlock.h>
+#include <generated/bounds.h>
+
+#define USE_CMPXCHG_LOCKREF \
+	(IS_ENABLED(CONFIG_ARCH_USE_CMPXCHG_LOCKREF) && \
+	 IS_ENABLED(CONFIG_SMP) && !BLOATED_SPINLOCKS)
 
 struct lockref {
 	union {
-#ifdef CONFIG_CMPXCHG_LOCKREF
+#if USE_CMPXCHG_LOCKREF
 		aligned_u64 lock_count;
 #endif
 		struct {
diff --git a/lib/Kconfig b/lib/Kconfig
index 75485e163ca3..06dc74200a51 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -51,13 +51,6 @@ config PERCPU_RWSEM
 config ARCH_USE_CMPXCHG_LOCKREF
 	bool
 
-config CMPXCHG_LOCKREF
-	def_bool y if ARCH_USE_CMPXCHG_LOCKREF
-	depends on SMP
-	depends on !GENERIC_LOCKBREAK
-	depends on !DEBUG_SPINLOCK
-	depends on !DEBUG_LOCK_ALLOC
-
 config CRC_CCITT
 	tristate "CRC-CCITT functions"
 	help
diff --git a/lib/lockref.c b/lib/lockref.c
index af6e95d0bed6..d2b123f8456b 100644
--- a/lib/lockref.c
+++ b/lib/lockref.c
@@ -1,7 +1,7 @@
 #include <linux/export.h>
 #include <linux/lockref.h>
 
-#ifdef CONFIG_CMPXCHG_LOCKREF
+#if USE_CMPXCHG_LOCKREF
 
 /*
  * Allow weakly-ordered memory architectures to provide barrier-less
-- 
cgit v1.2.3


From 839cc2a94cc3665bafe32203c2f095f4dd470a80 Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Thu, 14 Nov 2013 14:31:56 -0800
Subject: seq_file: introduce seq_setwidth() and seq_pad()

There are several users who want to know bytes written by seq_*() for
alignment purpose.  Currently they are using %n format for knowing it
because seq_*() returns 0 on success.

This patch introduces seq_setwidth() and seq_pad() for allowing them to
align without using %n format.

Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Joe Perches <joe@perches.com>
Cc: David Miller <davem@davemloft.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/seq_file.c            | 15 +++++++++++++++
 include/linux/seq_file.h | 15 +++++++++++++++
 2 files changed, 30 insertions(+)

(limited to 'include/linux')

diff --git a/fs/seq_file.c b/fs/seq_file.c
index a290157265ef..1cd2388ca5bd 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -766,6 +766,21 @@ int seq_write(struct seq_file *seq, const void *data, size_t len)
 }
 EXPORT_SYMBOL(seq_write);
 
+/**
+ * seq_pad - write padding spaces to buffer
+ * @m: seq_file identifying the buffer to which data should be written
+ * @c: the byte to append after padding if non-zero
+ */
+void seq_pad(struct seq_file *m, char c)
+{
+	int size = m->pad_until - m->count;
+	if (size > 0)
+		seq_printf(m, "%*s", size, "");
+	if (c)
+		seq_putc(m, c);
+}
+EXPORT_SYMBOL(seq_pad);
+
 struct list_head *seq_list_start(struct list_head *head, loff_t pos)
 {
 	struct list_head *lh;
diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h
index 4e32edc8f506..52e0097f61f0 100644
--- a/include/linux/seq_file.h
+++ b/include/linux/seq_file.h
@@ -20,6 +20,7 @@ struct seq_file {
 	size_t size;
 	size_t from;
 	size_t count;
+	size_t pad_until;
 	loff_t index;
 	loff_t read_pos;
 	u64 version;
@@ -79,6 +80,20 @@ static inline void seq_commit(struct seq_file *m, int num)
 	}
 }
 
+/**
+ * seq_setwidth - set padding width
+ * @m: the seq_file handle
+ * @size: the max number of bytes to pad.
+ *
+ * Call seq_setwidth() for setting max width, then call seq_printf() etc. and
+ * finally call seq_pad() to pad the remaining bytes.
+ */
+static inline void seq_setwidth(struct seq_file *m, size_t size)
+{
+	m->pad_until = m->count + size;
+}
+void seq_pad(struct seq_file *m, char c);
+
 char *mangle_path(char *s, const char *p, const char *esc);
 int seq_open(struct file *, const struct seq_operations *);
 ssize_t seq_read(struct file *, char __user *, size_t, loff_t *);
-- 
cgit v1.2.3


From c32f74ab2872994bc8336ed367313da3139350ca Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa@the-dreams.de>
Date: Thu, 14 Nov 2013 14:32:01 -0800
Subject: sched: replace INIT_COMPLETION with reinit_completion

For the casual device driver writer, it is hard to remember when to use
init_completion (to init a completion structure) or INIT_COMPLETION (to
*reinit* a completion structure).  Furthermore, while all other
completion functions exepct a pointer as a parameter, INIT_COMPLETION
does not.  To make it easier to remember which function to use and to
make code more readable, introduce a new inline function with the proper
name and consistent argument type.  Update the kernel-doc for
init_completion while we are here.

Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
Acked-by: Linus Walleij <linus.walleij@linaro.org> (personally at LCE13)
Cc: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/completion.h | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/completion.h b/include/linux/completion.h
index 22c33e35bcb2..124e4b4334c1 100644
--- a/include/linux/completion.h
+++ b/include/linux/completion.h
@@ -19,8 +19,8 @@
  *
  * See also:  complete(), wait_for_completion() (and friends _timeout,
  * _interruptible, _interruptible_timeout, and _killable), init_completion(),
- * and macros DECLARE_COMPLETION(), DECLARE_COMPLETION_ONSTACK(), and
- * INIT_COMPLETION().
+ * reinit_completion(), and macros DECLARE_COMPLETION(),
+ * DECLARE_COMPLETION_ONSTACK().
  */
 struct completion {
 	unsigned int done;
@@ -65,7 +65,7 @@ struct completion {
 
 /**
  * init_completion - Initialize a dynamically allocated completion
- * @x:  completion structure that is to be initialized
+ * @x:  pointer to completion structure that is to be initialized
  *
  * This inline function will initialize a dynamically created completion
  * structure.
@@ -76,6 +76,18 @@ static inline void init_completion(struct completion *x)
 	init_waitqueue_head(&x->wait);
 }
 
+/**
+ * reinit_completion - reinitialize a completion structure
+ * @x:  pointer to completion structure that is to be reinitialized
+ *
+ * This inline function should be used to reinitialize a completion structure so it can
+ * be reused. This is especially important after complete_all() is used.
+ */
+static inline void reinit_completion(struct completion *x)
+{
+	x->done = 0;
+}
+
 extern void wait_for_completion(struct completion *);
 extern void wait_for_completion_io(struct completion *);
 extern int wait_for_completion_interruptible(struct completion *x);
-- 
cgit v1.2.3


From 62026aedaacedbe1ffe94a3599ad4acd8ecdf587 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa@the-dreams.de>
Date: Thu, 14 Nov 2013 14:32:03 -0800
Subject: sched: remove INIT_COMPLETION

All users are converted over to reinit_completion(). Remove the old
macro now.

Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
Cc: Linus Walleij <linus.walleij@linaro.org>
Cc: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/completion.h | 10 ----------
 1 file changed, 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/completion.h b/include/linux/completion.h
index 124e4b4334c1..5d5aaae3af43 100644
--- a/include/linux/completion.h
+++ b/include/linux/completion.h
@@ -106,14 +106,4 @@ extern bool completion_done(struct completion *x);
 extern void complete(struct completion *);
 extern void complete_all(struct completion *);
 
-/**
- * INIT_COMPLETION - reinitialize a completion structure
- * @x:  completion structure to be reinitialized
- *
- * This macro should be used to reinitialize a completion structure so it can
- * be reused. This is especially important after complete_all() is used.
- */
-#define INIT_COMPLETION(x)	((x).done = 0)
-
-
 #endif
-- 
cgit v1.2.3


From fc21c0cff2f425891b28ff6fb6b03b325c977428 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Thu, 14 Nov 2013 14:32:06 -0800
Subject: revert "softirq: Add support for triggering softirq work on softirqs"

This commit was incomplete in that code to remove items from the per-cpu
lists was missing and never acquired a user in the 5 years it has been in
the tree.  We're going to implement what it seems to try to archive in a
simpler way, and this code is in the way of doing so.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Cc: Jan Kara <jack@suse.cz>
Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/interrupt.h |  22 --------
 kernel/softirq.c          | 131 ----------------------------------------------
 2 files changed, 153 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index c9e831dc80bc..db43b58a3355 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -11,8 +11,6 @@
 #include <linux/irqnr.h>
 #include <linux/hardirq.h>
 #include <linux/irqflags.h>
-#include <linux/smp.h>
-#include <linux/percpu.h>
 #include <linux/hrtimer.h>
 #include <linux/kref.h>
 #include <linux/workqueue.h>
@@ -392,15 +390,6 @@ extern void __raise_softirq_irqoff(unsigned int nr);
 extern void raise_softirq_irqoff(unsigned int nr);
 extern void raise_softirq(unsigned int nr);
 
-/* This is the worklist that queues up per-cpu softirq work.
- *
- * send_remote_sendirq() adds work to these lists, and
- * the softirq handler itself dequeues from them.  The queues
- * are protected by disabling local cpu interrupts and they must
- * only be accessed by the local cpu that they are for.
- */
-DECLARE_PER_CPU(struct list_head [NR_SOFTIRQS], softirq_work_list);
-
 DECLARE_PER_CPU(struct task_struct *, ksoftirqd);
 
 static inline struct task_struct *this_cpu_ksoftirqd(void)
@@ -408,17 +397,6 @@ static inline struct task_struct *this_cpu_ksoftirqd(void)
 	return this_cpu_read(ksoftirqd);
 }
 
-/* Try to send a softirq to a remote cpu.  If this cannot be done, the
- * work will be queued to the local cpu.
- */
-extern void send_remote_softirq(struct call_single_data *cp, int cpu, int softirq);
-
-/* Like send_remote_softirq(), but the caller must disable local cpu interrupts
- * and compute the current cpu, passed in as 'this_cpu'.
- */
-extern void __send_remote_softirq(struct call_single_data *cp, int cpu,
-				  int this_cpu, int softirq);
-
 /* Tasklets --- multithreaded analogue of BHs.
 
    Main feature differing them of generic softirqs: tasklet
diff --git a/kernel/softirq.c b/kernel/softirq.c
index b24988353458..11025ccc06dd 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -6,8 +6,6 @@
  *	Distribute under GPLv2.
  *
  *	Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
- *
- *	Remote softirq infrastructure is by Jens Axboe.
  */
 
 #include <linux/export.h>
@@ -627,146 +625,17 @@ void tasklet_hrtimer_init(struct tasklet_hrtimer *ttimer,
 }
 EXPORT_SYMBOL_GPL(tasklet_hrtimer_init);
 
-/*
- * Remote softirq bits
- */
-
-DEFINE_PER_CPU(struct list_head [NR_SOFTIRQS], softirq_work_list);
-EXPORT_PER_CPU_SYMBOL(softirq_work_list);
-
-static void __local_trigger(struct call_single_data *cp, int softirq)
-{
-	struct list_head *head = &__get_cpu_var(softirq_work_list[softirq]);
-
-	list_add_tail(&cp->list, head);
-
-	/* Trigger the softirq only if the list was previously empty.  */
-	if (head->next == &cp->list)
-		raise_softirq_irqoff(softirq);
-}
-
-#ifdef CONFIG_USE_GENERIC_SMP_HELPERS
-static void remote_softirq_receive(void *data)
-{
-	struct call_single_data *cp = data;
-	unsigned long flags;
-	int softirq;
-
-	softirq = *(int *)cp->info;
-	local_irq_save(flags);
-	__local_trigger(cp, softirq);
-	local_irq_restore(flags);
-}
-
-static int __try_remote_softirq(struct call_single_data *cp, int cpu, int softirq)
-{
-	if (cpu_online(cpu)) {
-		cp->func = remote_softirq_receive;
-		cp->info = &softirq;
-		cp->flags = 0;
-
-		__smp_call_function_single(cpu, cp, 0);
-		return 0;
-	}
-	return 1;
-}
-#else /* CONFIG_USE_GENERIC_SMP_HELPERS */
-static int __try_remote_softirq(struct call_single_data *cp, int cpu, int softirq)
-{
-	return 1;
-}
-#endif
-
-/**
- * __send_remote_softirq - try to schedule softirq work on a remote cpu
- * @cp: private SMP call function data area
- * @cpu: the remote cpu
- * @this_cpu: the currently executing cpu
- * @softirq: the softirq for the work
- *
- * Attempt to schedule softirq work on a remote cpu.  If this cannot be
- * done, the work is instead queued up on the local cpu.
- *
- * Interrupts must be disabled.
- */
-void __send_remote_softirq(struct call_single_data *cp, int cpu, int this_cpu, int softirq)
-{
-	if (cpu == this_cpu || __try_remote_softirq(cp, cpu, softirq))
-		__local_trigger(cp, softirq);
-}
-EXPORT_SYMBOL(__send_remote_softirq);
-
-/**
- * send_remote_softirq - try to schedule softirq work on a remote cpu
- * @cp: private SMP call function data area
- * @cpu: the remote cpu
- * @softirq: the softirq for the work
- *
- * Like __send_remote_softirq except that disabling interrupts and
- * computing the current cpu is done for the caller.
- */
-void send_remote_softirq(struct call_single_data *cp, int cpu, int softirq)
-{
-	unsigned long flags;
-	int this_cpu;
-
-	local_irq_save(flags);
-	this_cpu = smp_processor_id();
-	__send_remote_softirq(cp, cpu, this_cpu, softirq);
-	local_irq_restore(flags);
-}
-EXPORT_SYMBOL(send_remote_softirq);
-
-static int remote_softirq_cpu_notify(struct notifier_block *self,
-					       unsigned long action, void *hcpu)
-{
-	/*
-	 * If a CPU goes away, splice its entries to the current CPU
-	 * and trigger a run of the softirq
-	 */
-	if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
-		int cpu = (unsigned long) hcpu;
-		int i;
-
-		local_irq_disable();
-		for (i = 0; i < NR_SOFTIRQS; i++) {
-			struct list_head *head = &per_cpu(softirq_work_list[i], cpu);
-			struct list_head *local_head;
-
-			if (list_empty(head))
-				continue;
-
-			local_head = &__get_cpu_var(softirq_work_list[i]);
-			list_splice_init(head, local_head);
-			raise_softirq_irqoff(i);
-		}
-		local_irq_enable();
-	}
-
-	return NOTIFY_OK;
-}
-
-static struct notifier_block remote_softirq_cpu_notifier = {
-	.notifier_call	= remote_softirq_cpu_notify,
-};
-
 void __init softirq_init(void)
 {
 	int cpu;
 
 	for_each_possible_cpu(cpu) {
-		int i;
-
 		per_cpu(tasklet_vec, cpu).tail =
 			&per_cpu(tasklet_vec, cpu).head;
 		per_cpu(tasklet_hi_vec, cpu).tail =
 			&per_cpu(tasklet_hi_vec, cpu).head;
-		for (i = 0; i < NR_SOFTIRQS; i++)
-			INIT_LIST_HEAD(&per_cpu(softirq_work_list[i], cpu));
 	}
 
-	register_hotcpu_notifier(&remote_softirq_cpu_notifier);
-
 	open_softirq(TASKLET_SOFTIRQ, tasklet_action);
 	open_softirq(HI_SOFTIRQ, tasklet_hi_action);
 }
-- 
cgit v1.2.3


From 0a06ff068f1255bcd7965ab07bc0f4adc3eb639a Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Thu, 14 Nov 2013 14:32:07 -0800
Subject: kernel: remove CONFIG_USE_GENERIC_SMP_HELPERS

We've switched over every architecture that supports SMP to it, so
remove the new useless config variable.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Cc: Jan Kara <jack@suse.cz>
Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/Kconfig          | 3 ---
 arch/alpha/Kconfig    | 1 -
 arch/arc/Kconfig      | 1 -
 arch/arm/Kconfig      | 1 -
 arch/arm64/Kconfig    | 1 -
 arch/blackfin/Kconfig | 1 -
 arch/hexagon/Kconfig  | 1 -
 arch/ia64/Kconfig     | 1 -
 arch/m32r/Kconfig     | 1 -
 arch/metag/Kconfig    | 1 -
 arch/mips/Kconfig     | 1 -
 arch/mn10300/Kconfig  | 1 -
 arch/parisc/Kconfig   | 1 -
 arch/powerpc/Kconfig  | 1 -
 arch/s390/Kconfig     | 1 -
 arch/sh/Kconfig       | 1 -
 arch/sparc/Kconfig    | 1 -
 arch/tile/Kconfig     | 1 -
 arch/x86/Kconfig      | 1 -
 block/blk-mq.c        | 4 ++--
 block/blk-softirq.c   | 4 ++--
 block/blk-sysfs.c     | 2 +-
 include/linux/smp.h   | 4 ----
 kernel/Kconfig.hz     | 2 +-
 kernel/smp.c          | 2 --
 25 files changed, 6 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/arch/Kconfig b/arch/Kconfig
index ded747c7b74c..f1cf895c040f 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -207,9 +207,6 @@ config HAVE_DMA_ATTRS
 config HAVE_DMA_CONTIGUOUS
 	bool
 
-config USE_GENERIC_SMP_HELPERS
-	bool
-
 config GENERIC_SMP_IDLE_THREAD
        bool
 
diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index 35a300d4a9fb..8d2a4833acda 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -522,7 +522,6 @@ config ARCH_MAY_HAVE_PC_FDC
 config SMP
 	bool "Symmetric multi-processing support"
 	depends on ALPHA_SABLE || ALPHA_LYNX || ALPHA_RAWHIDE || ALPHA_DP264 || ALPHA_WILDFIRE || ALPHA_TITAN || ALPHA_GENERIC || ALPHA_SHARK || ALPHA_MARVEL
-	select USE_GENERIC_SMP_HELPERS
 	---help---
 	  This enables support for systems with more than one CPU. If you have
 	  a system with only one CPU, like most personal computers, say N. If
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index 5ede5460c806..2ee0c9bfd032 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -125,7 +125,6 @@ config ARC_PLAT_NEEDS_CPU_TO_DMA
 config SMP
 	bool "Symmetric Multi-Processing (Incomplete)"
 	default n
-	select USE_GENERIC_SMP_HELPERS
 	help
 	  This enables support for systems with more than one CPU. If you have
 	  a system with only one CPU, like most personal computers, say N. If
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 603d661b445d..00c1ff45a158 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1432,7 +1432,6 @@ config SMP
 	depends on GENERIC_CLOCKEVENTS
 	depends on HAVE_SMP
 	depends on MMU || ARM_MPU
-	select USE_GENERIC_SMP_HELPERS
 	help
 	  This enables support for systems with more than one CPU. If you have
 	  a system with only one CPU, like most personal computers, say N. If
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index bb0bf1bfc05d..9714fe0403b7 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -143,7 +143,6 @@ config CPU_BIG_ENDIAN
 
 config SMP
 	bool "Symmetric Multi-Processing"
-	select USE_GENERIC_SMP_HELPERS
 	help
 	  This enables support for systems with more than one CPU.  If
 	  you say N here, the kernel will run on single and
diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig
index e887b57c3176..9ceccef9c649 100644
--- a/arch/blackfin/Kconfig
+++ b/arch/blackfin/Kconfig
@@ -34,7 +34,6 @@ config BLACKFIN
 	select ARCH_WANT_IPC_PARSE_VERSION
 	select GENERIC_ATOMIC64
 	select GENERIC_IRQ_PROBE
-	select USE_GENERIC_SMP_HELPERS if SMP
 	select HAVE_NMI_WATCHDOG if NMI_WATCHDOG
 	select GENERIC_SMP_IDLE_THREAD
 	select ARCH_USES_GETTIMEOFFSET if !GENERIC_CLOCKEVENTS
diff --git a/arch/hexagon/Kconfig b/arch/hexagon/Kconfig
index 99041b07e610..09df2608f40a 100644
--- a/arch/hexagon/Kconfig
+++ b/arch/hexagon/Kconfig
@@ -4,7 +4,6 @@ comment "Linux Kernel Configuration for Hexagon"
 config HEXAGON
 	def_bool y
 	select HAVE_OPROFILE
-	select USE_GENERIC_SMP_HELPERS if SMP
 	# Other pending projects/to-do items.
 	# select HAVE_REGS_AND_STACK_ACCESS_API
 	# select HAVE_HW_BREAKPOINT if PERF_EVENTS
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 7740ab10a171..dfe85e92ca2e 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -343,7 +343,6 @@ config FORCE_MAX_ZONEORDER
 
 config SMP
 	bool "Symmetric multi-processing support"
-	select USE_GENERIC_SMP_HELPERS
 	help
 	  This enables support for systems with more than one CPU. If you have
 	  a system with only one CPU, say N.  If you have a system with more
diff --git a/arch/m32r/Kconfig b/arch/m32r/Kconfig
index 75661fbf4529..09ef94a8a7c3 100644
--- a/arch/m32r/Kconfig
+++ b/arch/m32r/Kconfig
@@ -275,7 +275,6 @@ source "kernel/Kconfig.preempt"
 
 config SMP
 	bool "Symmetric multi-processing support"
-	select USE_GENERIC_SMP_HELPERS
 	---help---
 	  This enables support for systems with more than one CPU. If you have
 	  a system with only one CPU, like most personal computers, say N. If
diff --git a/arch/metag/Kconfig b/arch/metag/Kconfig
index 36368eb07e13..e56abd2c1b4f 100644
--- a/arch/metag/Kconfig
+++ b/arch/metag/Kconfig
@@ -111,7 +111,6 @@ config METAG_META21
 config SMP
 	bool "Symmetric multi-processing support"
 	depends on METAG_META21 && METAG_META21_MMU
-	select USE_GENERIC_SMP_HELPERS
 	help
 	  This enables support for systems with more than one thread running
 	  Linux. If you have a system with only one thread running Linux,
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 17cc7ff8458c..867d7db11581 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -2125,7 +2125,6 @@ source "mm/Kconfig"
 config SMP
 	bool "Multi-Processing support"
 	depends on SYS_SUPPORTS_SMP
-	select USE_GENERIC_SMP_HELPERS
 	help
 	  This enables support for systems with more than one CPU. If you have
 	  a system with only one CPU, like most personal computers, say N. If
diff --git a/arch/mn10300/Kconfig b/arch/mn10300/Kconfig
index 6aaa1607001a..8bde9237d13b 100644
--- a/arch/mn10300/Kconfig
+++ b/arch/mn10300/Kconfig
@@ -181,7 +181,6 @@ endmenu
 config SMP
 	bool "Symmetric multi-processing support"
 	default y
-	select USE_GENERIC_SMP_HELPERS
 	depends on MN10300_PROC_MN2WS0038 || MN10300_PROC_MN2WS0050
 	---help---
 	  This enables support for systems with more than one CPU. If you have
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index 7dcde539d61e..c03567a9a915 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -226,7 +226,6 @@ endchoice
 
 config SMP
 	bool "Symmetric multi-processing support"
-	select USE_GENERIC_SMP_HELPERS
 	---help---
 	  This enables support for systems with more than one CPU. If you have
 	  a system with only one CPU, like most personal computers, say N. If
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 2f898d63eb96..4740b0a15fa8 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -106,7 +106,6 @@ config PPC
 	select HAVE_MEMBLOCK_NODE_MAP
 	select HAVE_DMA_ATTRS
 	select HAVE_DMA_API_DEBUG
-	select USE_GENERIC_SMP_HELPERS if SMP
 	select HAVE_OPROFILE
 	select HAVE_DEBUG_KMEMLEAK
 	select GENERIC_ATOMIC64 if PPC32
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index f75d7e517927..314fced4fc14 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -141,7 +141,6 @@ config S390
 	select OLD_SIGACTION
 	select OLD_SIGSUSPEND3
 	select SYSCTL_EXCEPTION_TRACE
-	select USE_GENERIC_SMP_HELPERS if SMP
 	select VIRT_CPU_ACCOUNTING
 	select VIRT_TO_BUS
 
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 224f4bc9925e..e78561bc30ef 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -711,7 +711,6 @@ config CC_STACKPROTECTOR
 config SMP
 	bool "Symmetric multi-processing support"
 	depends on SYS_SUPPORTS_SMP
-	select USE_GENERIC_SMP_HELPERS
 	---help---
 	  This enables support for systems with more than one CPU. If you have
 	  a system with only one CPU, like most personal computers, say N. If
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 78c4fdb91bc5..8591b201d9cc 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -28,7 +28,6 @@ config SPARC
 	select HAVE_ARCH_JUMP_LABEL
 	select GENERIC_IRQ_SHOW
 	select ARCH_WANT_IPC_PARSE_VERSION
-	select USE_GENERIC_SMP_HELPERS if SMP
 	select GENERIC_PCI_IOMAP
 	select HAVE_NMI_WATCHDOG if SPARC64
 	select HAVE_BPF_JIT
diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig
index d45a2c48f185..b3692ce78f90 100644
--- a/arch/tile/Kconfig
+++ b/arch/tile/Kconfig
@@ -8,7 +8,6 @@ config TILE
 	select HAVE_KVM if !TILEGX
 	select GENERIC_FIND_FIRST_BIT
 	select SYSCTL_EXCEPTION_TRACE
-	select USE_GENERIC_SMP_HELPERS
 	select CC_OPTIMIZE_FOR_SIZE
 	select HAVE_DEBUG_KMEMLEAK
 	select GENERIC_IRQ_PROBE
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index af5513e5798a..83f521aa103f 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -90,7 +90,6 @@ config X86
 	select GENERIC_IRQ_SHOW
 	select GENERIC_CLOCKEVENTS_MIN_ADJUST
 	select IRQ_FORCED_THREADING
-	select USE_GENERIC_SMP_HELPERS if SMP
 	select HAVE_BPF_JIT if X86_64
 	select HAVE_ARCH_TRANSPARENT_HUGEPAGE
 	select CLKEVT_I8253
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 88d4e864d4c0..c661896e2465 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -319,7 +319,7 @@ void __blk_mq_end_io(struct request *rq, int error)
 		blk_mq_complete_request(rq, error);
 }
 
-#if defined(CONFIG_SMP) && defined(CONFIG_USE_GENERIC_SMP_HELPERS)
+#if defined(CONFIG_SMP)
 
 /*
  * Called with interrupts disabled.
@@ -361,7 +361,7 @@ static int ipi_remote_cpu(struct blk_mq_ctx *ctx, const int cpu,
 
 	return true;
 }
-#else /* CONFIG_SMP && CONFIG_USE_GENERIC_SMP_HELPERS */
+#else /* CONFIG_SMP */
 static int ipi_remote_cpu(struct blk_mq_ctx *ctx, const int cpu,
 			  struct request *rq, const int error)
 {
diff --git a/block/blk-softirq.c b/block/blk-softirq.c
index ce4b8bfd3d27..57790c1a97eb 100644
--- a/block/blk-softirq.c
+++ b/block/blk-softirq.c
@@ -36,7 +36,7 @@ static void blk_done_softirq(struct softirq_action *h)
 	}
 }
 
-#if defined(CONFIG_SMP) && defined(CONFIG_USE_GENERIC_SMP_HELPERS)
+#ifdef CONFIG_SMP
 static void trigger_softirq(void *data)
 {
 	struct request *rq = data;
@@ -71,7 +71,7 @@ static int raise_blk_irq(int cpu, struct request *rq)
 
 	return 1;
 }
-#else /* CONFIG_SMP && CONFIG_USE_GENERIC_SMP_HELPERS */
+#else /* CONFIG_SMP */
 static int raise_blk_irq(int cpu, struct request *rq)
 {
 	return 1;
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 4f8c4d90ec73..97779522472f 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -288,7 +288,7 @@ static ssize_t
 queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count)
 {
 	ssize_t ret = -EINVAL;
-#if defined(CONFIG_USE_GENERIC_SMP_HELPERS)
+#ifdef CONFIG_SMP
 	unsigned long val;
 
 	ret = queue_var_store(&val, page, count);
diff --git a/include/linux/smp.h b/include/linux/smp.h
index 731f5237d5f4..78851513030d 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -106,14 +106,10 @@ void kick_all_cpus_sync(void);
 /*
  * Generic and arch helpers
  */
-#ifdef CONFIG_USE_GENERIC_SMP_HELPERS
 void __init call_function_init(void);
 void generic_smp_call_function_single_interrupt(void);
 #define generic_smp_call_function_interrupt \
 	generic_smp_call_function_single_interrupt
-#else
-static inline void call_function_init(void) { }
-#endif
 
 /*
  * Mark the boot cpu "online" so that it can call console drivers in
diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
index 94fabd534b03..2a202a846757 100644
--- a/kernel/Kconfig.hz
+++ b/kernel/Kconfig.hz
@@ -55,4 +55,4 @@ config HZ
 	default 1000 if HZ_1000
 
 config SCHED_HRTICK
-	def_bool HIGH_RES_TIMERS && (!SMP || USE_GENERIC_SMP_HELPERS)
+	def_bool HIGH_RES_TIMERS
diff --git a/kernel/smp.c b/kernel/smp.c
index 46116100f0ee..1c194e20e943 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -15,7 +15,6 @@
 
 #include "smpboot.h"
 
-#ifdef CONFIG_USE_GENERIC_SMP_HELPERS
 enum {
 	CSD_FLAG_LOCK		= 0x01,
 	CSD_FLAG_WAIT		= 0x02,
@@ -464,7 +463,6 @@ int smp_call_function(smp_call_func_t func, void *info, int wait)
 	return 0;
 }
 EXPORT_SYMBOL(smp_call_function);
-#endif /* USE_GENERIC_SMP_HELPERS */
 
 /* Setup configured maximum number of CPUs to activate */
 unsigned int setup_max_cpus = NR_CPUS;
-- 
cgit v1.2.3


From 7cf64f861b7a43b395f0855994003254b06a7e5a Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Thu, 14 Nov 2013 14:32:09 -0800
Subject: kernel-provide-a-__smp_call_function_single-stub-for-config_smp-fix

x86_64 allnoconfig:

  kernel/up.c:25: error: redefinition of '__smp_call_function_single'
  include/linux/smp.h:154: note: previous definition of '__smp_call_function_single' was here

Cc: Christoph Hellwig <hch@infradead.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Jan Kara <jack@suse.cz>
Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/smp.h | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/smp.h b/include/linux/smp.h
index 78851513030d..5da22ee42e16 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -49,6 +49,9 @@ void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info),
 		smp_call_func_t func, void *info, bool wait,
 		gfp_t gfp_flags);
 
+void __smp_call_function_single(int cpuid, struct call_single_data *data,
+				int wait);
+
 #ifdef CONFIG_SMP
 
 #include <linux/preempt.h>
@@ -95,9 +98,6 @@ int smp_call_function(smp_call_func_t func, void *info, int wait);
 void smp_call_function_many(const struct cpumask *mask,
 			    smp_call_func_t func, void *info, bool wait);
 
-void __smp_call_function_single(int cpuid, struct call_single_data *data,
-				int wait);
-
 int smp_call_function_any(const struct cpumask *mask,
 			  smp_call_func_t func, void *info, int wait);
 
@@ -151,12 +151,6 @@ smp_call_function_any(const struct cpumask *mask, smp_call_func_t func,
 
 static inline void kick_all_cpus_sync(void) {  }
 
-static inline void __smp_call_function_single(int cpuid,
-		struct call_single_data *data, int wait)
-{
-	on_each_cpu(data->func, data->info, wait);
-}
-
 #endif /* !SMP */
 
 /*
-- 
cgit v1.2.3


From b89241e8cdb8321c20546d47645a9b65b58113b5 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Thu, 14 Nov 2013 14:32:11 -0800
Subject: llists: move llist_reverse_order from raid5 to llist.c

Make this useful helper available for other users.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jan Kara <jack@suse.cz>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/md/raid5.c    | 14 --------------
 include/linux/llist.h |  2 ++
 lib/llist.c           | 22 ++++++++++++++++++++++
 3 files changed, 24 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index f8b906843926..7f0e17a27aeb 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -293,20 +293,6 @@ static void __release_stripe(struct r5conf *conf, struct stripe_head *sh)
 		do_release_stripe(conf, sh);
 }
 
-static struct llist_node *llist_reverse_order(struct llist_node *head)
-{
-	struct llist_node *new_head = NULL;
-
-	while (head) {
-		struct llist_node *tmp = head;
-		head = head->next;
-		tmp->next = new_head;
-		new_head = tmp;
-	}
-
-	return new_head;
-}
-
 /* should hold conf->device_lock already */
 static int release_stripe_list(struct r5conf *conf)
 {
diff --git a/include/linux/llist.h b/include/linux/llist.h
index 8828a78dec9a..fbf10a0bc095 100644
--- a/include/linux/llist.h
+++ b/include/linux/llist.h
@@ -195,4 +195,6 @@ static inline struct llist_node *llist_del_all(struct llist_head *head)
 
 extern struct llist_node *llist_del_first(struct llist_head *head);
 
+struct llist_node *llist_reverse_order(struct llist_node *head);
+
 #endif /* LLIST_H */
diff --git a/lib/llist.c b/lib/llist.c
index 4a70d120138c..ef48b87247ec 100644
--- a/lib/llist.c
+++ b/lib/llist.c
@@ -81,3 +81,25 @@ struct llist_node *llist_del_first(struct llist_head *head)
 	return entry;
 }
 EXPORT_SYMBOL_GPL(llist_del_first);
+
+/**
+ * llist_reverse_order - reverse order of a llist chain
+ * @head:	first item of the list to be reversed
+ *
+ * Reverse the oder of a chain of llist entries and return the
+ * new first entry.
+ */
+struct llist_node *llist_reverse_order(struct llist_node *head)
+{
+	struct llist_node *new_head = NULL;
+
+	while (head) {
+		struct llist_node *tmp = head;
+		head = head->next;
+		tmp->next = new_head;
+		new_head = tmp;
+	}
+
+	return new_head;
+}
+EXPORT_SYMBOL_GPL(llist_reverse_order);
-- 
cgit v1.2.3


From 498d319bb512992ef0784c278fa03679f2f5649d Mon Sep 17 00:00:00 2001
From: Stefani Seibold <stefani@seibold.net>
Date: Thu, 14 Nov 2013 14:32:17 -0800
Subject: kfifo API type safety

This patch enhances the type safety for the kfifo API.  It is now safe
to put const data into a non const FIFO and the API will now generate a
compiler warning when reading from the fifo where the destination
address is pointing to a const variable.

As a side effect the kfifo_put() does now expect the value of an element
instead a pointer to the element.  This was suggested Russell King.  It
make the handling of the kfifo_put easier since there is no need to
create a helper variable for getting the address of a pointer or to pass
integers of different sizes.

IMHO the API break is okay, since there are currently only six users of
kfifo_put().

The code is also cleaner by kicking out the "if (0)" expressions.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Stefani Seibold <stefani@seibold.net>
Cc: Russell King <rmk@arm.linux.org.uk>
Cc: Hauke Mehrtens <hauke@hauke-m.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpu/drm/drm_flip_work.c          |  2 +-
 drivers/iio/industrialio-event.c         |  2 +-
 drivers/net/wireless/rt2x00/rt2800mmio.c |  2 +-
 drivers/net/wireless/rt2x00/rt2800usb.c  |  2 +-
 drivers/pci/pcie/aer/aerdrv_core.c       |  2 +-
 include/linux/kfifo.h                    | 47 ++++++++++----------------------
 mm/memory-failure.c                      |  2 +-
 samples/kfifo/bytestream-example.c       |  4 +--
 samples/kfifo/dma-example.c              |  2 +-
 samples/kfifo/inttype-example.c          |  4 +--
 10 files changed, 25 insertions(+), 44 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpu/drm/drm_flip_work.c b/drivers/gpu/drm/drm_flip_work.c
index e788882d9021..f9c7fa3d0012 100644
--- a/drivers/gpu/drm/drm_flip_work.c
+++ b/drivers/gpu/drm/drm_flip_work.c
@@ -34,7 +34,7 @@
  */
 void drm_flip_work_queue(struct drm_flip_work *work, void *val)
 {
-	if (kfifo_put(&work->fifo, (const void **)&val)) {
+	if (kfifo_put(&work->fifo, val)) {
 		atomic_inc(&work->pending);
 	} else {
 		DRM_ERROR("%s fifo full!\n", work->name);
diff --git a/drivers/iio/industrialio-event.c b/drivers/iio/industrialio-event.c
index dac15b9f9df8..c10eab64bc05 100644
--- a/drivers/iio/industrialio-event.c
+++ b/drivers/iio/industrialio-event.c
@@ -56,7 +56,7 @@ int iio_push_event(struct iio_dev *indio_dev, u64 ev_code, s64 timestamp)
 		ev.id = ev_code;
 		ev.timestamp = timestamp;
 
-		copied = kfifo_put(&ev_int->det_events, &ev);
+		copied = kfifo_put(&ev_int->det_events, ev);
 		if (copied != 0)
 			wake_up_locked_poll(&ev_int->wait, POLLIN);
 	}
diff --git a/drivers/net/wireless/rt2x00/rt2800mmio.c b/drivers/net/wireless/rt2x00/rt2800mmio.c
index ae152280e071..a8cc736b5063 100644
--- a/drivers/net/wireless/rt2x00/rt2800mmio.c
+++ b/drivers/net/wireless/rt2x00/rt2800mmio.c
@@ -446,7 +446,7 @@ static void rt2800mmio_txstatus_interrupt(struct rt2x00_dev *rt2x00dev)
 		if (!rt2x00_get_field32(status, TX_STA_FIFO_VALID))
 			break;
 
-		if (!kfifo_put(&rt2x00dev->txstatus_fifo, &status)) {
+		if (!kfifo_put(&rt2x00dev->txstatus_fifo, status)) {
 			rt2x00_warn(rt2x00dev, "TX status FIFO overrun, drop tx status report\n");
 			break;
 		}
diff --git a/drivers/net/wireless/rt2x00/rt2800usb.c b/drivers/net/wireless/rt2x00/rt2800usb.c
index 997df03a0c2e..a81ceb61d746 100644
--- a/drivers/net/wireless/rt2x00/rt2800usb.c
+++ b/drivers/net/wireless/rt2x00/rt2800usb.c
@@ -164,7 +164,7 @@ static bool rt2800usb_tx_sta_fifo_read_completed(struct rt2x00_dev *rt2x00dev,
 
 	valid = rt2x00_get_field32(tx_status, TX_STA_FIFO_VALID);
 	if (valid) {
-		if (!kfifo_put(&rt2x00dev->txstatus_fifo, &tx_status))
+		if (!kfifo_put(&rt2x00dev->txstatus_fifo, tx_status))
 			rt2x00_warn(rt2x00dev, "TX status FIFO overrun\n");
 
 		queue_work(rt2x00dev->workqueue, &rt2x00dev->txdone_work);
diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c
index 85ca36f2136d..6b3a958e1be6 100644
--- a/drivers/pci/pcie/aer/aerdrv_core.c
+++ b/drivers/pci/pcie/aer/aerdrv_core.c
@@ -574,7 +574,7 @@ void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn,
 	};
 
 	spin_lock_irqsave(&aer_recover_ring_lock, flags);
-	if (kfifo_put(&aer_recover_ring, &entry))
+	if (kfifo_put(&aer_recover_ring, entry))
 		schedule_work(&aer_recover_work);
 	else
 		pr_err("AER recover: Buffer overflow when recovering AER for %04x:%02x:%02x:%x\n",
diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index 10308c6a3d1c..552d51efb429 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -1,7 +1,7 @@
 /*
  * A generic kernel FIFO implementation
  *
- * Copyright (C) 2009/2010 Stefani Seibold <stefani@seibold.net>
+ * Copyright (C) 2013 Stefani Seibold <stefani@seibold.net>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -67,9 +67,10 @@ struct __kfifo {
 	union { \
 		struct __kfifo	kfifo; \
 		datatype	*type; \
+		const datatype	*const_type; \
 		char		(*rectype)[recsize]; \
 		ptrtype		*ptr; \
-		const ptrtype	*ptr_const; \
+		ptrtype const	*ptr_const; \
 	}
 
 #define __STRUCT_KFIFO(type, size, recsize, ptrtype) \
@@ -386,16 +387,12 @@ __kfifo_int_must_check_helper( \
 #define	kfifo_put(fifo, val) \
 ({ \
 	typeof((fifo) + 1) __tmp = (fifo); \
-	typeof((val) + 1) __val = (val); \
+	typeof(*__tmp->const_type) __val = (val); \
 	unsigned int __ret; \
-	const size_t __recsize = sizeof(*__tmp->rectype); \
+	size_t __recsize = sizeof(*__tmp->rectype); \
 	struct __kfifo *__kfifo = &__tmp->kfifo; \
-	if (0) { \
-		typeof(__tmp->ptr_const) __dummy __attribute__ ((unused)); \
-		__dummy = (typeof(__val))NULL; \
-	} \
 	if (__recsize) \
-		__ret = __kfifo_in_r(__kfifo, __val, sizeof(*__val), \
+		__ret = __kfifo_in_r(__kfifo, &__val, sizeof(__val), \
 			__recsize); \
 	else { \
 		__ret = !kfifo_is_full(__tmp); \
@@ -404,7 +401,7 @@ __kfifo_int_must_check_helper( \
 			((typeof(__tmp->type))__kfifo->data) : \
 			(__tmp->buf) \
 			)[__kfifo->in & __tmp->kfifo.mask] = \
-				*(typeof(__tmp->type))__val; \
+				(typeof(*__tmp->type))__val; \
 			smp_wmb(); \
 			__kfifo->in++; \
 		} \
@@ -415,7 +412,7 @@ __kfifo_int_must_check_helper( \
 /**
  * kfifo_get - get data from the fifo
  * @fifo: address of the fifo to be used
- * @val: the var where to store the data to be added
+ * @val: address where to store the data
  *
  * This macro reads the data from the fifo.
  * It returns 0 if the fifo was empty. Otherwise it returns the number
@@ -428,12 +425,10 @@ __kfifo_int_must_check_helper( \
 __kfifo_uint_must_check_helper( \
 ({ \
 	typeof((fifo) + 1) __tmp = (fifo); \
-	typeof((val) + 1) __val = (val); \
+	typeof(__tmp->ptr) __val = (val); \
 	unsigned int __ret; \
 	const size_t __recsize = sizeof(*__tmp->rectype); \
 	struct __kfifo *__kfifo = &__tmp->kfifo; \
-	if (0) \
-		__val = (typeof(__tmp->ptr))0; \
 	if (__recsize) \
 		__ret = __kfifo_out_r(__kfifo, __val, sizeof(*__val), \
 			__recsize); \
@@ -456,7 +451,7 @@ __kfifo_uint_must_check_helper( \
 /**
  * kfifo_peek - get data from the fifo without removing
  * @fifo: address of the fifo to be used
- * @val: the var where to store the data to be added
+ * @val: address where to store the data
  *
  * This reads the data from the fifo without removing it from the fifo.
  * It returns 0 if the fifo was empty. Otherwise it returns the number
@@ -469,12 +464,10 @@ __kfifo_uint_must_check_helper( \
 __kfifo_uint_must_check_helper( \
 ({ \
 	typeof((fifo) + 1) __tmp = (fifo); \
-	typeof((val) + 1) __val = (val); \
+	typeof(__tmp->ptr) __val = (val); \
 	unsigned int __ret; \
 	const size_t __recsize = sizeof(*__tmp->rectype); \
 	struct __kfifo *__kfifo = &__tmp->kfifo; \
-	if (0) \
-		__val = (typeof(__tmp->ptr))NULL; \
 	if (__recsize) \
 		__ret = __kfifo_out_peek_r(__kfifo, __val, sizeof(*__val), \
 			__recsize); \
@@ -508,14 +501,10 @@ __kfifo_uint_must_check_helper( \
 #define	kfifo_in(fifo, buf, n) \
 ({ \
 	typeof((fifo) + 1) __tmp = (fifo); \
-	typeof((buf) + 1) __buf = (buf); \
+	typeof(__tmp->ptr_const) __buf = (buf); \
 	unsigned long __n = (n); \
 	const size_t __recsize = sizeof(*__tmp->rectype); \
 	struct __kfifo *__kfifo = &__tmp->kfifo; \
-	if (0) { \
-		typeof(__tmp->ptr_const) __dummy __attribute__ ((unused)); \
-		__dummy = (typeof(__buf))NULL; \
-	} \
 	(__recsize) ?\
 	__kfifo_in_r(__kfifo, __buf, __n, __recsize) : \
 	__kfifo_in(__kfifo, __buf, __n); \
@@ -561,14 +550,10 @@ __kfifo_uint_must_check_helper( \
 __kfifo_uint_must_check_helper( \
 ({ \
 	typeof((fifo) + 1) __tmp = (fifo); \
-	typeof((buf) + 1) __buf = (buf); \
+	typeof(__tmp->ptr) __buf = (buf); \
 	unsigned long __n = (n); \
 	const size_t __recsize = sizeof(*__tmp->rectype); \
 	struct __kfifo *__kfifo = &__tmp->kfifo; \
-	if (0) { \
-		typeof(__tmp->ptr) __dummy = NULL; \
-		__buf = __dummy; \
-	} \
 	(__recsize) ?\
 	__kfifo_out_r(__kfifo, __buf, __n, __recsize) : \
 	__kfifo_out(__kfifo, __buf, __n); \
@@ -773,14 +758,10 @@ __kfifo_uint_must_check_helper( \
 __kfifo_uint_must_check_helper( \
 ({ \
 	typeof((fifo) + 1) __tmp = (fifo); \
-	typeof((buf) + 1) __buf = (buf); \
+	typeof(__tmp->ptr) __buf = (buf); \
 	unsigned long __n = (n); \
 	const size_t __recsize = sizeof(*__tmp->rectype); \
 	struct __kfifo *__kfifo = &__tmp->kfifo; \
-	if (0) { \
-		typeof(__tmp->ptr) __dummy __attribute__ ((unused)) = NULL; \
-		__buf = __dummy; \
-	} \
 	(__recsize) ? \
 	__kfifo_out_peek_r(__kfifo, __buf, __n, __recsize) : \
 	__kfifo_out_peek(__kfifo, __buf, __n); \
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index f9d78ec7831f..b7c171602ba1 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1269,7 +1269,7 @@ void memory_failure_queue(unsigned long pfn, int trapno, int flags)
 
 	mf_cpu = &get_cpu_var(memory_failure_cpu);
 	spin_lock_irqsave(&mf_cpu->lock, proc_flags);
-	if (kfifo_put(&mf_cpu->fifo, &entry))
+	if (kfifo_put(&mf_cpu->fifo, entry))
 		schedule_work_on(smp_processor_id(), &mf_cpu->work);
 	else
 		pr_err("Memory failure: buffer overflow when queuing memory failure at %#lx\n",
diff --git a/samples/kfifo/bytestream-example.c b/samples/kfifo/bytestream-example.c
index cfe40addda76..2fca916d9edf 100644
--- a/samples/kfifo/bytestream-example.c
+++ b/samples/kfifo/bytestream-example.c
@@ -64,7 +64,7 @@ static int __init testfunc(void)
 
 	/* put values into the fifo */
 	for (i = 0; i != 10; i++)
-		kfifo_put(&test, &i);
+		kfifo_put(&test, i);
 
 	/* show the number of used elements */
 	printk(KERN_INFO "fifo len: %u\n", kfifo_len(&test));
@@ -85,7 +85,7 @@ static int __init testfunc(void)
 	kfifo_skip(&test);
 
 	/* put values into the fifo until is full */
-	for (i = 20; kfifo_put(&test, &i); i++)
+	for (i = 20; kfifo_put(&test, i); i++)
 		;
 
 	printk(KERN_INFO "queue len: %u\n", kfifo_len(&test));
diff --git a/samples/kfifo/dma-example.c b/samples/kfifo/dma-example.c
index 06473791c08a..aa243db93f01 100644
--- a/samples/kfifo/dma-example.c
+++ b/samples/kfifo/dma-example.c
@@ -39,7 +39,7 @@ static int __init example_init(void)
 	kfifo_in(&fifo, "test", 4);
 
 	for (i = 0; i != 9; i++)
-		kfifo_put(&fifo, &i);
+		kfifo_put(&fifo, i);
 
 	/* kick away first byte */
 	kfifo_skip(&fifo);
diff --git a/samples/kfifo/inttype-example.c b/samples/kfifo/inttype-example.c
index 6f8e79e76c9e..8dc3c2e7105a 100644
--- a/samples/kfifo/inttype-example.c
+++ b/samples/kfifo/inttype-example.c
@@ -61,7 +61,7 @@ static int __init testfunc(void)
 
 	/* put values into the fifo */
 	for (i = 0; i != 10; i++)
-		kfifo_put(&test, &i);
+		kfifo_put(&test, i);
 
 	/* show the number of used elements */
 	printk(KERN_INFO "fifo len: %u\n", kfifo_len(&test));
@@ -78,7 +78,7 @@ static int __init testfunc(void)
 	kfifo_skip(&test);
 
 	/* put values into the fifo until is full */
-	for (i = 20; kfifo_put(&test, &i); i++)
+	for (i = 20; kfifo_put(&test, i); i++)
 		;
 
 	printk(KERN_INFO "queue len: %u\n", kfifo_len(&test));
-- 
cgit v1.2.3


From 8d3ef556aba2b5b7d8b7144f7be1814d75ea3cc6 Mon Sep 17 00:00:00 2001
From: Alexander Beregalov <a.beregalov@gmail.com>
Date: Thu, 14 Nov 2013 14:32:19 -0800
Subject: cmdline-parser: fix build

Fix following errors:

  include/linux/cmdline-parser.h:17:12: error: 'BDEVNAME_SIZE' undeclared here
  block/cmdline-parser.c:17:2: error: implicit declaration of function 'kzalloc'

Signed-off-by: Alexander Beregalov <alexander.beregalov@intel.com>
Cc: CaiZhiyong <caizhiyong@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cmdline-parser.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/cmdline-parser.h b/include/linux/cmdline-parser.h
index 98e892ef6d5a..a0f9280421ec 100644
--- a/include/linux/cmdline-parser.h
+++ b/include/linux/cmdline-parser.h
@@ -8,6 +8,8 @@
 #define CMDLINEPARSEH
 
 #include <linux/blkdev.h>
+#include <linux/fs.h>
+#include <linux/slab.h>
 
 /* partition flags */
 #define PF_RDONLY                   0x01 /* Device is read only */
-- 
cgit v1.2.3


From be9eac48274a2d9b142d6dd8567b9b2362939346 Mon Sep 17 00:00:00 2001
From: Michal Kubeček <mkubecek@suse.cz>
Date: Fri, 15 Nov 2013 06:18:40 +0100
Subject: macvlan: introduce macvlan_dev_real_dev() helper function

Introduce helper function macvlan_dev_real_dev which returns the
underlying device of a macvlan device, similar to vlan_dev_real_dev()
for 802.1q VLAN devices.

v2: IFF_MACVLAN flag and equivalent of is_macvlan_dev() were
introduced in the meantime

v3: do BUG() if compiled without macvlan support

Signed-off-by: Michal Kubecek <mkubecek@suse.cz>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_macvlan.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h
index c2702856295e..84ba5ac39e03 100644
--- a/include/linux/if_macvlan.h
+++ b/include/linux/if_macvlan.h
@@ -119,4 +119,21 @@ extern int macvlan_link_register(struct rtnl_link_ops *ops);
 extern netdev_tx_t macvlan_start_xmit(struct sk_buff *skb,
 				      struct net_device *dev);
 
+#if IS_ENABLED(CONFIG_MACVLAN)
+static inline struct net_device *
+macvlan_dev_real_dev(const struct net_device *dev)
+{
+	struct macvlan_dev *macvlan = netdev_priv(dev);
+
+	return macvlan->lowerdev;
+}
+#else
+static inline struct net_device *
+macvlan_dev_real_dev(const struct net_device *dev)
+{
+	BUG();
+	return NULL;
+}
+#endif
+
 #endif /* _LINUX_IF_MACVLAN_H */
-- 
cgit v1.2.3


From 018c5bba052b3a383d83cf0c756da0e7bc748397 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Fri, 15 Nov 2013 21:11:16 -0500
Subject: net: Handle CHECKSUM_COMPLETE more adequately in pskb_trim_rcsum().

Currently pskb_trim_rcsum() just balks on CHECKSUM_COMPLETE packets
and remarks them as CHECKSUM_NONE, forcing a software checksum
validation later.

We have all of the mechanics available to fixup the skb->csum value,
even for complicated fragmented packets, via the helpers
skb_checksum() and csum_sub().

So just use them.

Based upon a suggestion by Herbert Xu.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 39 +++++++++++++++++++++------------------
 1 file changed, 21 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 215b5ea1cb30..bec1cc7d5e3c 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2263,24 +2263,6 @@ static inline void skb_postpull_rcsum(struct sk_buff *skb,
 
 unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len);
 
-/**
- *	pskb_trim_rcsum - trim received skb and update checksum
- *	@skb: buffer to trim
- *	@len: new length
- *
- *	This is exactly the same as pskb_trim except that it ensures the
- *	checksum of received packets are still valid after the operation.
- */
-
-static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len)
-{
-	if (likely(len >= skb->len))
-		return 0;
-	if (skb->ip_summed == CHECKSUM_COMPLETE)
-		skb->ip_summed = CHECKSUM_NONE;
-	return __pskb_trim(skb, len);
-}
-
 #define skb_queue_walk(queue, skb) \
 		for (skb = (queue)->next;					\
 		     skb != (struct sk_buff *)(queue);				\
@@ -2378,6 +2360,27 @@ __wsum __skb_checksum(const struct sk_buff *skb, int offset, int len,
 __wsum skb_checksum(const struct sk_buff *skb, int offset, int len,
 		    __wsum csum);
 
+/**
+ *	pskb_trim_rcsum - trim received skb and update checksum
+ *	@skb: buffer to trim
+ *	@len: new length
+ *
+ *	This is exactly the same as pskb_trim except that it ensures the
+ *	checksum of received packets are still valid after the operation.
+ */
+
+static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len)
+{
+	if (likely(len >= skb->len))
+		return 0;
+	if (skb->ip_summed == CHECKSUM_COMPLETE) {
+		__wsum adj = skb_checksum(skb, len, skb->len - len, 0);
+
+		skb->csum = csum_sub(skb->csum, adj);
+	}
+	return __pskb_trim(skb, len);
+}
+
 static inline void *skb_header_pointer(const struct sk_buff *skb, int offset,
 				       int len, void *buffer)
 {
-- 
cgit v1.2.3


From b26d4cd385fc51e8844e2cdf9ba2051f5bba11a5 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 25 Oct 2013 18:47:37 -0400
Subject: consolidate simple ->d_delete() instances

Rename simple_delete_dentry() to always_delete_dentry() and export it.
Export simple_dentry_operations, while we are at it, and get rid of
their duplicates

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/ia64/kernel/perfmon.c |  8 +-------
 fs/9p/vfs_dentry.c         | 19 +------------------
 fs/configfs/dir.c          | 12 +-----------
 fs/efivarfs/super.c        | 11 +----------
 fs/hostfs/hostfs_kern.c    | 11 +----------
 fs/libfs.c                 | 12 +++++++-----
 fs/proc/generic.c          | 18 +-----------------
 fs/proc/namespaces.c       |  8 +-------
 include/linux/fs.h         |  2 ++
 kernel/cgroup.c            |  7 +------
 net/sunrpc/rpc_pipe.c      | 11 +----------
 11 files changed, 18 insertions(+), 101 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index 5a9ff1c3c3e9..cb592773c78b 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -2166,12 +2166,6 @@ static const struct file_operations pfm_file_ops = {
 	.flush		= pfm_flush
 };
 
-static int
-pfmfs_delete_dentry(const struct dentry *dentry)
-{
-	return 1;
-}
-
 static char *pfmfs_dname(struct dentry *dentry, char *buffer, int buflen)
 {
 	return dynamic_dname(dentry, buffer, buflen, "pfm:[%lu]",
@@ -2179,7 +2173,7 @@ static char *pfmfs_dname(struct dentry *dentry, char *buffer, int buflen)
 }
 
 static const struct dentry_operations pfmfs_dentry_operations = {
-	.d_delete = pfmfs_delete_dentry,
+	.d_delete = always_delete_dentry,
 	.d_dname = pfmfs_dname,
 };
 
diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c
index f039b104a98e..b03dd23feda8 100644
--- a/fs/9p/vfs_dentry.c
+++ b/fs/9p/vfs_dentry.c
@@ -42,23 +42,6 @@
 #include "v9fs_vfs.h"
 #include "fid.h"
 
-/**
- * v9fs_dentry_delete - called when dentry refcount equals 0
- * @dentry:  dentry in question
- *
- * By returning 1 here we should remove cacheing of unused
- * dentry components.
- *
- */
-
-static int v9fs_dentry_delete(const struct dentry *dentry)
-{
-	p9_debug(P9_DEBUG_VFS, " dentry: %s (%p)\n",
-		 dentry->d_name.name, dentry);
-
-	return 1;
-}
-
 /**
  * v9fs_cached_dentry_delete - called when dentry refcount equals 0
  * @dentry:  dentry in question
@@ -134,6 +117,6 @@ const struct dentry_operations v9fs_cached_dentry_operations = {
 };
 
 const struct dentry_operations v9fs_dentry_operations = {
-	.d_delete = v9fs_dentry_delete,
+	.d_delete = always_delete_dentry,
 	.d_release = v9fs_dentry_release,
 };
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 277bd1be21fd..4522e0755773 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -66,19 +66,9 @@ static void configfs_d_iput(struct dentry * dentry,
 	iput(inode);
 }
 
-/*
- * We _must_ delete our dentries on last dput, as the chain-to-parent
- * behavior is required to clear the parents of default_groups.
- */
-static int configfs_d_delete(const struct dentry *dentry)
-{
-	return 1;
-}
-
 const struct dentry_operations configfs_dentry_ops = {
 	.d_iput		= configfs_d_iput,
-	/* simple_delete_dentry() isn't exported */
-	.d_delete	= configfs_d_delete,
+	.d_delete	= always_delete_dentry,
 };
 
 #ifdef CONFIG_LOCKDEP
diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c
index a8766b880c07..becc725a1953 100644
--- a/fs/efivarfs/super.c
+++ b/fs/efivarfs/super.c
@@ -83,19 +83,10 @@ static int efivarfs_d_hash(const struct dentry *dentry, struct qstr *qstr)
 	return 0;
 }
 
-/*
- * Retaining negative dentries for an in-memory filesystem just wastes
- * memory and lookup time: arrange for them to be deleted immediately.
- */
-static int efivarfs_delete_dentry(const struct dentry *dentry)
-{
-	return 1;
-}
-
 static struct dentry_operations efivarfs_d_ops = {
 	.d_compare = efivarfs_d_compare,
 	.d_hash = efivarfs_d_hash,
-	.d_delete = efivarfs_delete_dentry,
+	.d_delete = always_delete_dentry,
 };
 
 static struct dentry *efivarfs_alloc_dentry(struct dentry *parent, char *name)
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 25437280a207..db23ce1bd903 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -33,15 +33,6 @@ static inline struct hostfs_inode_info *HOSTFS_I(struct inode *inode)
 
 #define FILE_HOSTFS_I(file) HOSTFS_I(file_inode(file))
 
-static int hostfs_d_delete(const struct dentry *dentry)
-{
-	return 1;
-}
-
-static const struct dentry_operations hostfs_dentry_ops = {
-	.d_delete		= hostfs_d_delete,
-};
-
 /* Changed in hostfs_args before the kernel starts running */
 static char *root_ino = "";
 static int append = 0;
@@ -925,7 +916,7 @@ static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent)
 	sb->s_blocksize_bits = 10;
 	sb->s_magic = HOSTFS_SUPER_MAGIC;
 	sb->s_op = &hostfs_sbops;
-	sb->s_d_op = &hostfs_dentry_ops;
+	sb->s_d_op = &simple_dentry_operations;
 	sb->s_maxbytes = MAX_LFS_FILESIZE;
 
 	/* NULL is printed as <NULL> by sprintf: avoid that. */
diff --git a/fs/libfs.c b/fs/libfs.c
index 5de06947ba5e..a1844244246f 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -47,10 +47,16 @@ EXPORT_SYMBOL(simple_statfs);
  * Retaining negative dentries for an in-memory filesystem just wastes
  * memory and lookup time: arrange for them to be deleted immediately.
  */
-static int simple_delete_dentry(const struct dentry *dentry)
+int always_delete_dentry(const struct dentry *dentry)
 {
 	return 1;
 }
+EXPORT_SYMBOL(always_delete_dentry);
+
+const struct dentry_operations simple_dentry_operations = {
+	.d_delete = always_delete_dentry,
+};
+EXPORT_SYMBOL(simple_dentry_operations);
 
 /*
  * Lookup the data. This is trivial - if the dentry didn't already
@@ -58,10 +64,6 @@ static int simple_delete_dentry(const struct dentry *dentry)
  */
 struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
 {
-	static const struct dentry_operations simple_dentry_operations = {
-		.d_delete = simple_delete_dentry,
-	};
-
 	if (dentry->d_name.len > NAME_MAX)
 		return ERR_PTR(-ENAMETOOLONG);
 	if (!dentry->d_sb->s_d_op)
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 737e15615b04..cca93b6fb9a9 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -174,22 +174,6 @@ static const struct inode_operations proc_link_inode_operations = {
 	.follow_link	= proc_follow_link,
 };
 
-/*
- * As some entries in /proc are volatile, we want to 
- * get rid of unused dentries.  This could be made 
- * smarter: we could keep a "volatile" flag in the 
- * inode to indicate which ones to keep.
- */
-static int proc_delete_dentry(const struct dentry * dentry)
-{
-	return 1;
-}
-
-static const struct dentry_operations proc_dentry_operations =
-{
-	.d_delete	= proc_delete_dentry,
-};
-
 /*
  * Don't create negative dentries here, return -ENOENT by hand
  * instead.
@@ -209,7 +193,7 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir,
 			inode = proc_get_inode(dir->i_sb, de);
 			if (!inode)
 				return ERR_PTR(-ENOMEM);
-			d_set_d_op(dentry, &proc_dentry_operations);
+			d_set_d_op(dentry, &simple_dentry_operations);
 			d_add(dentry, inode);
 			return NULL;
 		}
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index 49a7fff2e83a..9ae46b87470d 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -42,12 +42,6 @@ static const struct inode_operations ns_inode_operations = {
 	.setattr	= proc_setattr,
 };
 
-static int ns_delete_dentry(const struct dentry *dentry)
-{
-	/* Don't cache namespace inodes when not in use */
-	return 1;
-}
-
 static char *ns_dname(struct dentry *dentry, char *buffer, int buflen)
 {
 	struct inode *inode = dentry->d_inode;
@@ -59,7 +53,7 @@ static char *ns_dname(struct dentry *dentry, char *buffer, int buflen)
 
 const struct dentry_operations ns_dentry_operations =
 {
-	.d_delete	= ns_delete_dentry,
+	.d_delete	= always_delete_dentry,
 	.d_dname	= ns_dname,
 };
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index bf5d574ebdf4..121f11f001c0 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2622,7 +2622,9 @@ extern int simple_write_begin(struct file *file, struct address_space *mapping,
 extern int simple_write_end(struct file *file, struct address_space *mapping,
 			loff_t pos, unsigned len, unsigned copied,
 			struct page *page, void *fsdata);
+extern int always_delete_dentry(const struct dentry *);
 extern struct inode *alloc_anon_inode(struct super_block *);
+extern const struct dentry_operations simple_dentry_operations;
 
 extern struct dentry *simple_lookup(struct inode *, struct dentry *, unsigned int flags);
 extern ssize_t generic_read_dir(struct file *, char __user *, size_t, loff_t *);
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index e0839bcd48c8..4c62513fe19f 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -895,11 +895,6 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode)
 	iput(inode);
 }
 
-static int cgroup_delete(const struct dentry *d)
-{
-	return 1;
-}
-
 static void remove_dir(struct dentry *d)
 {
 	struct dentry *parent = dget(d->d_parent);
@@ -1486,7 +1481,7 @@ static int cgroup_get_rootdir(struct super_block *sb)
 {
 	static const struct dentry_operations cgroup_dops = {
 		.d_iput = cgroup_diput,
-		.d_delete = cgroup_delete,
+		.d_delete = always_delete_dentry,
 	};
 
 	struct inode *inode =
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index d0d14a04dce1..bf04b30a788a 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -471,15 +471,6 @@ struct rpc_filelist {
 	umode_t mode;
 };
 
-static int rpc_delete_dentry(const struct dentry *dentry)
-{
-	return 1;
-}
-
-static const struct dentry_operations rpc_dentry_operations = {
-	.d_delete = rpc_delete_dentry,
-};
-
 static struct inode *
 rpc_get_inode(struct super_block *sb, umode_t mode)
 {
@@ -1266,7 +1257,7 @@ rpc_fill_super(struct super_block *sb, void *data, int silent)
 	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
 	sb->s_magic = RPCAUTH_GSSMAGIC;
 	sb->s_op = &s_ops;
-	sb->s_d_op = &rpc_dentry_operations;
+	sb->s_d_op = &simple_dentry_operations;
 	sb->s_time_gran = 1;
 
 	inode = rpc_get_inode(sb, S_IFDIR | S_IRUGO | S_IXUGO);
-- 
cgit v1.2.3


From 2bc74feba12fbf052ec97aee8624c9f13593a9ac Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 25 Oct 2013 16:39:14 -0400
Subject: take read_seqbegin_or_lock() and friends to seqlock.h

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/dcache.c             | 29 -----------------------------
 include/linux/seqlock.h | 29 +++++++++++++++++++++++++++++
 2 files changed, 29 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dcache.c b/fs/dcache.c
index 0a38ef8d7f00..667e23ab0b4c 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -88,35 +88,6 @@ EXPORT_SYMBOL(rename_lock);
 
 static struct kmem_cache *dentry_cache __read_mostly;
 
-/**
- * read_seqbegin_or_lock - begin a sequence number check or locking block
- * @lock: sequence lock
- * @seq : sequence number to be checked
- *
- * First try it once optimistically without taking the lock. If that fails,
- * take the lock. The sequence number is also used as a marker for deciding
- * whether to be a reader (even) or writer (odd).
- * N.B. seq must be initialized to an even number to begin with.
- */
-static inline void read_seqbegin_or_lock(seqlock_t *lock, int *seq)
-{
-	if (!(*seq & 1))	/* Even */
-		*seq = read_seqbegin(lock);
-	else			/* Odd */
-		read_seqlock_excl(lock);
-}
-
-static inline int need_seqretry(seqlock_t *lock, int seq)
-{
-	return !(seq & 1) && read_seqretry(lock, seq);
-}
-
-static inline void done_seqretry(seqlock_t *lock, int seq)
-{
-	if (seq & 1)
-		read_sequnlock_excl(lock);
-}
-
 /*
  * This is the single most critical data structure when it comes
  * to the dcache: the hashtable for lookups. Somebody should try
diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index 1e8a8b6e837d..cf87a24c0f92 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -354,6 +354,35 @@ static inline void read_sequnlock_excl(seqlock_t *sl)
 	spin_unlock(&sl->lock);
 }
 
+/**
+ * read_seqbegin_or_lock - begin a sequence number check or locking block
+ * @lock: sequence lock
+ * @seq : sequence number to be checked
+ *
+ * First try it once optimistically without taking the lock. If that fails,
+ * take the lock. The sequence number is also used as a marker for deciding
+ * whether to be a reader (even) or writer (odd).
+ * N.B. seq must be initialized to an even number to begin with.
+ */
+static inline void read_seqbegin_or_lock(seqlock_t *lock, int *seq)
+{
+	if (!(*seq & 1))	/* Even */
+		*seq = read_seqbegin(lock);
+	else			/* Odd */
+		read_seqlock_excl(lock);
+}
+
+static inline int need_seqretry(seqlock_t *lock, int seq)
+{
+	return !(seq & 1) && read_seqretry(lock, seq);
+}
+
+static inline void done_seqretry(seqlock_t *lock, int seq)
+{
+	if (seq & 1)
+		read_sequnlock_excl(lock);
+}
+
 static inline void read_seqlock_excl_bh(seqlock_t *sl)
 {
 	spin_lock_bh(&sl->lock);
-- 
cgit v1.2.3


From d5b5f391d434c5cc8bcb1ab2d759738797b85f52 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 14 Nov 2013 16:23:04 +0100
Subject: ftrace, perf: Avoid infinite event generation loop

Vince's perf-trinity fuzzer found yet another 'interesting' problem.

When we sample the irq_work_exit tracepoint with period==1 (or
PERF_SAMPLE_PERIOD) and we add an fasync SIGNAL handler we create an
infinite event generation loop:

  ,-> <IPI>
  |     irq_work_exit() ->
  |       trace_irq_work_exit() ->
  |         ...
  |           __perf_event_overflow() -> (due to fasync)
  |             irq_work_queue() -> (irq_work_list must be empty)
  '---------      arch_irq_work_raise()

Similar things can happen due to regular poll() wakeups if we exceed
the ring-buffer wakeup watermark, or have an event_limit.

To avoid this, dis-allow sampling this particular tracepoint.

In order to achieve this, create a special perf_perm function pointer
for each event and call this (when set) on trying to create a
tracepoint perf event.

[ roasted: use expr... to allow for ',' in your expression ]

Reported-by: Vince Weaver <vincent.weaver@maine.edu>
Tested-by: Vince Weaver <vincent.weaver@maine.edu>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Dave Jones <davej@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/r/20131114152304.GC5364@laptop.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/trace/irq_vectors.h | 11 +++++++++++
 include/linux/ftrace_event.h             | 16 ++++++++++++++++
 include/linux/tracepoint.h               |  4 ++++
 include/trace/ftrace.h                   |  7 +++++++
 kernel/trace/trace_event_perf.c          |  6 ++++++
 5 files changed, 44 insertions(+)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/trace/irq_vectors.h b/arch/x86/include/asm/trace/irq_vectors.h
index 2874df24e7a4..4cab890007a7 100644
--- a/arch/x86/include/asm/trace/irq_vectors.h
+++ b/arch/x86/include/asm/trace/irq_vectors.h
@@ -71,6 +71,17 @@ DEFINE_IRQ_VECTOR_EVENT(x86_platform_ipi);
  */
 DEFINE_IRQ_VECTOR_EVENT(irq_work);
 
+/*
+ * We must dis-allow sampling irq_work_exit() because perf event sampling
+ * itself can cause irq_work, which would lead to an infinite loop;
+ *
+ *  1) irq_work_exit happens
+ *  2) generates perf sample
+ *  3) generates irq_work
+ *  4) goto 1
+ */
+TRACE_EVENT_PERF_PERM(irq_work_exit, is_sampling_event(p_event) ? -EPERM : 0);
+
 /*
  * call_function - called when entering/exiting a call function interrupt
  * vector handler
diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 9abbe630c456..8c9b7a1c4138 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -248,6 +248,9 @@ struct ftrace_event_call {
 #ifdef CONFIG_PERF_EVENTS
 	int				perf_refcount;
 	struct hlist_head __percpu	*perf_events;
+
+	int	(*perf_perm)(struct ftrace_event_call *,
+			     struct perf_event *);
 #endif
 };
 
@@ -317,6 +320,19 @@ struct ftrace_event_file {
 	}								\
 	early_initcall(trace_init_flags_##name);
 
+#define __TRACE_EVENT_PERF_PERM(name, expr...)				\
+	static int perf_perm_##name(struct ftrace_event_call *tp_event, \
+				    struct perf_event *p_event)		\
+	{								\
+		return ({ expr; });					\
+	}								\
+	static int __init trace_init_perf_perm_##name(void)		\
+	{								\
+		event_##name.perf_perm = &perf_perm_##name;		\
+		return 0;						\
+	}								\
+	early_initcall(trace_init_perf_perm_##name);
+
 #define PERF_MAX_TRACE_SIZE	2048
 
 #define MAX_FILTER_STR_VAL	256	/* Should handle KSYM_SYMBOL_LEN */
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index ebeab360d851..f16dc0a40049 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -267,6 +267,8 @@ static inline void tracepoint_synchronize_unregister(void)
 
 #define TRACE_EVENT_FLAGS(event, flag)
 
+#define TRACE_EVENT_PERF_PERM(event, expr...)
+
 #endif /* DECLARE_TRACE */
 
 #ifndef TRACE_EVENT
@@ -399,4 +401,6 @@ static inline void tracepoint_synchronize_unregister(void)
 
 #define TRACE_EVENT_FLAGS(event, flag)
 
+#define TRACE_EVENT_PERF_PERM(event, expr...)
+
 #endif /* ifdef TRACE_EVENT (see note above) */
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 52594b20179e..6b852f60f8ae 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -90,6 +90,10 @@
 #define TRACE_EVENT_FLAGS(name, value)					\
 	__TRACE_EVENT_FLAGS(name, value)
 
+#undef TRACE_EVENT_PERF_PERM
+#define TRACE_EVENT_PERF_PERM(name, expr...)				\
+	__TRACE_EVENT_PERF_PERM(name, expr)
+
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
 
@@ -140,6 +144,9 @@
 #undef TRACE_EVENT_FLAGS
 #define TRACE_EVENT_FLAGS(event, flag)
 
+#undef TRACE_EVENT_PERF_PERM
+#define TRACE_EVENT_PERF_PERM(event, expr...)
+
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
 /*
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index 78e27e3b52ac..630889f68b1d 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -24,6 +24,12 @@ static int	total_ref_count;
 static int perf_trace_event_perm(struct ftrace_event_call *tp_event,
 				 struct perf_event *p_event)
 {
+	if (tp_event->perf_perm) {
+		int ret = tp_event->perf_perm(tp_event, p_event);
+		if (ret)
+			return ret;
+	}
+
 	/* The ftrace function trace is allowed only for root. */
 	if (ftrace_event_is_function(tp_event) &&
 	    perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN))
-- 
cgit v1.2.3


From b972fc308c2763096b61b62169f2167ee0ca5a19 Mon Sep 17 00:00:00 2001
From: Alex Shi <alex.shi@linaro.org>
Date: Tue, 19 Nov 2013 17:21:52 +0800
Subject: sched: Remove unused variable in 'struct sched_domain'

The 'u64 last_update' variable isn't used now, remove it to save a bit of space.

Signed-off-by: Alex Shi <alex.shi@linaro.org>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Morten.Rasmussen@arm.com
Cc: linaro-kernel@lists.linaro.org
Link: http://lkml.kernel.org/r/1384852912-24791-1-git-send-email-alex.shi@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index f7efc8604652..b122395bf5f4 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -823,8 +823,6 @@ struct sched_domain {
 	unsigned int balance_interval;	/* initialise to 1. units in ms. */
 	unsigned int nr_balance_failed; /* initialise to 0 */
 
-	u64 last_update;
-
 	/* idle_balance() stats */
 	u64 max_newidle_lb_cost;
 	unsigned long next_decay_max_lb_cost;
-- 
cgit v1.2.3


From 94eddfbeaafa3e8040a2c47d370dea0e58e76941 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Tue, 19 Nov 2013 09:25:07 -0700
Subject: blk-mq: ensure that we set REQ_IO_STAT so diskstats work

If disk stats are enabled on the queue, a request needs to
be marked with REQ_IO_STAT for accounting to be active on
that request. This fixes an issue with virtio-blk not
showing up in /proc/diskstats after the conversion to
blk-mq.

Add QUEUE_FLAG_MQ_DEFAULT, setting stats and same cpu-group
completion on by default.

Reported-by: Dave Chinner <david@fromorbit.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c         | 12 ++++++++----
 include/linux/blkdev.h |  3 +++
 2 files changed, 11 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 862f458d4760..32593dba4684 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -171,9 +171,12 @@ bool blk_mq_can_queue(struct blk_mq_hw_ctx *hctx)
 }
 EXPORT_SYMBOL(blk_mq_can_queue);
 
-static void blk_mq_rq_ctx_init(struct blk_mq_ctx *ctx, struct request *rq,
-			       unsigned int rw_flags)
+static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
+			       struct request *rq, unsigned int rw_flags)
 {
+	if (blk_queue_io_stat(q))
+		rw_flags |= REQ_IO_STAT;
+
 	rq->mq_ctx = ctx;
 	rq->cmd_flags = rw_flags;
 	ctx->rq_dispatched[rw_is_sync(rw_flags)]++;
@@ -197,7 +200,7 @@ static struct request *blk_mq_alloc_request_pinned(struct request_queue *q,
 
 		rq = __blk_mq_alloc_request(hctx, gfp & ~__GFP_WAIT, reserved);
 		if (rq) {
-			blk_mq_rq_ctx_init(ctx, rq, rw);
+			blk_mq_rq_ctx_init(q, ctx, rq, rw);
 			break;
 		} else if (!(gfp & __GFP_WAIT))
 			break;
@@ -921,7 +924,7 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
 	trace_block_getrq(q, bio, rw);
 	rq = __blk_mq_alloc_request(hctx, GFP_ATOMIC, false);
 	if (likely(rq))
-		blk_mq_rq_ctx_init(ctx, rq, rw);
+		blk_mq_rq_ctx_init(q, ctx, rq, rw);
 	else {
 		blk_mq_put_ctx(ctx);
 		trace_block_sleeprq(q, bio, rw);
@@ -1377,6 +1380,7 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_reg *reg,
 	q->queue_hw_ctx = hctxs;
 
 	q->mq_ops = reg->ops;
+	q->queue_flags |= QUEUE_FLAG_MQ_DEFAULT;
 
 	blk_queue_make_request(q, blk_mq_make_request);
 	blk_queue_rq_timed_out(q, reg->ops->timeout);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index f26ec20f6354..1b135d49b279 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -505,6 +505,9 @@ struct request_queue {
 				 (1 << QUEUE_FLAG_SAME_COMP)	|	\
 				 (1 << QUEUE_FLAG_ADD_RANDOM))
 
+#define QUEUE_FLAG_MQ_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
+				 (1 << QUEUE_FLAG_SAME_COMP))
+
 static inline void queue_lockdep_assert_held(struct request_queue *q)
 {
 	if (q->queue_lock)
-- 
cgit v1.2.3


From 6bab2c613d7fa70bb8514f89ab7455ede717142b Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Fri, 15 Nov 2013 14:15:07 -0800
Subject: genirq: Correct fuzzy and fragile IRQ_RETVAL() definition

commit bedd30d986a0 ("genirq: make irqreturn_t an enum") blindly replaced
"0" by "IRQ_NONE" in the "IRQ_RETVAL(x)" macro definition.

However, as "x" is a condition, "0" meant "boolean false", not an
irqreturn_t value.

All of this worked, and kept working after the addition of IRQ_WAKE_THREAD,
as
  - both "boolean false" and "IRQ_NONE" are "0" (for the comparison),
  - "boolean true" and "boolean false" nicely map to the correct values of
    "IRQ_HANDLED" and "IRQ_NONE" (for the return value).

Correct the macro definition for clarity and future-proofness.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irqreturn.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/irqreturn.h b/include/linux/irqreturn.h
index 714ba08dc092..e374e369fb2f 100644
--- a/include/linux/irqreturn.h
+++ b/include/linux/irqreturn.h
@@ -14,6 +14,6 @@ enum irqreturn {
 };
 
 typedef enum irqreturn irqreturn_t;
-#define IRQ_RETVAL(x)	((x) != IRQ_NONE)
+#define IRQ_RETVAL(x)	((x) ? IRQ_HANDLED : IRQ_NONE)
 
 #endif
-- 
cgit v1.2.3


From 694e096fd7c2a7d40760fc9c9dfc826bdd495ea4 Mon Sep 17 00:00:00 2001
From: Anna Schumaker <bjschuma@netapp.com>
Date: Wed, 13 Nov 2013 12:29:08 -0500
Subject: NFS: Enabling v4.2 should not recompile nfsd and lockd

When CONFIG_NFS_V4_2 is toggled nfsd and lockd will be recompiled,
instead of only the nfs client.  This patch moves a small amount of code
into the client directory to avoid unnecessary recompiles.

Signed-off-by: Anna Schumaker <bjschuma@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/blocklayout/blocklayout.h |  1 +
 fs/nfs/dns_resolve.c             |  2 ++
 fs/nfs/internal.h                | 15 +++++++++++++++
 fs/nfs/nfs4_fs.h                 |  8 ++++++++
 include/linux/nfs4.h             | 10 ----------
 include/linux/nfs_fs.h           | 18 ------------------
 6 files changed, 26 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h
index 8485978993e8..9838fb020473 100644
--- a/fs/nfs/blocklayout/blocklayout.h
+++ b/fs/nfs/blocklayout/blocklayout.h
@@ -36,6 +36,7 @@
 #include <linux/nfs_fs.h>
 #include <linux/sunrpc/rpc_pipe_fs.h>
 
+#include "../nfs4_fs.h"
 #include "../pnfs.h"
 #include "../netns.h"
 
diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c
index fc0f95ec7358..d25f10fb4926 100644
--- a/fs/nfs/dns_resolve.c
+++ b/fs/nfs/dns_resolve.c
@@ -46,7 +46,9 @@ ssize_t nfs_dns_resolve_name(struct net *net, char *name, size_t namelen,
 #include <linux/sunrpc/cache.h>
 #include <linux/sunrpc/svcauth.h>
 #include <linux/sunrpc/rpc_pipe_fs.h>
+#include <linux/nfs_fs.h>
 
+#include "nfs4_fs.h"
 #include "dns_resolve.h"
 #include "cache_lib.h"
 #include "netns.h"
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index bca6a3e3c49c..8b5cc04a8611 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -269,6 +269,21 @@ extern const u32 nfs41_maxgetdevinfo_overhead;
 extern struct rpc_procinfo nfs4_procedures[];
 #endif
 
+#ifdef CONFIG_NFS_V4_SECURITY_LABEL
+extern struct nfs4_label *nfs4_label_alloc(struct nfs_server *server, gfp_t flags);
+static inline void nfs4_label_free(struct nfs4_label *label)
+{
+	if (label) {
+		kfree(label->label);
+		kfree(label);
+	}
+	return;
+}
+#else
+static inline struct nfs4_label *nfs4_label_alloc(struct nfs_server *server, gfp_t flags) { return NULL; }
+static inline void nfs4_label_free(void *label) {}
+#endif /* CONFIG_NFS_V4_SECURITY_LABEL */
+
 /* proc.c */
 void nfs_close_context(struct nfs_open_context *ctx, int is_sync);
 extern struct nfs_client *nfs_init_client(struct nfs_client *clp,
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 3ce79b04522e..5609edc742a0 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -9,6 +9,14 @@
 #ifndef __LINUX_FS_NFS_NFS4_FS_H
 #define __LINUX_FS_NFS_NFS4_FS_H
 
+#if defined(CONFIG_NFS_V4_2)
+#define NFS4_MAX_MINOR_VERSION 2
+#elif defined(CONFIG_NFS_V4_1)
+#define NFS4_MAX_MINOR_VERSION 1
+#else
+#define NFS4_MAX_MINOR_VERSION 0
+#endif
+
 #if IS_ENABLED(CONFIG_NFS_V4)
 
 #define NFS4_MAX_LOOP_ON_RECOVER (10)
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index c6f41b616965..ddd84871c0ad 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -410,16 +410,6 @@ enum lock_type4 {
 #define NFS4_VERSION 4
 #define NFS4_MINOR_VERSION 0
 
-#if defined(CONFIG_NFS_V4_2)
-#define NFS4_MAX_MINOR_VERSION 2
-#else
-#if defined(CONFIG_NFS_V4_1)
-#define NFS4_MAX_MINOR_VERSION 1
-#else
-#define NFS4_MAX_MINOR_VERSION 0
-#endif /* CONFIG_NFS_V4_1 */
-#endif /* CONFIG_NFS_V4_2 */
-
 #define NFS4_DEBUG 1
 
 /* Index of predefined Linux client operations */
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 14a48207a304..48997374eaf0 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -506,24 +506,6 @@ extern const struct inode_operations nfs_referral_inode_operations;
 extern int nfs_mountpoint_expiry_timeout;
 extern void nfs_release_automount_timer(void);
 
-/*
- * linux/fs/nfs/nfs4proc.c
- */
-#ifdef CONFIG_NFS_V4_SECURITY_LABEL
-extern struct nfs4_label *nfs4_label_alloc(struct nfs_server *server, gfp_t flags);
-static inline void nfs4_label_free(struct nfs4_label *label)
-{
-	if (label) {
-		kfree(label->label);
-		kfree(label);
-	}
-	return;
-}
-#else
-static inline struct nfs4_label *nfs4_label_alloc(struct nfs_server *server, gfp_t flags) { return NULL; }
-static inline void nfs4_label_free(void *label) {}
-#endif
-
 /*
  * linux/fs/nfs/unlink.c
  */
-- 
cgit v1.2.3


From c53ed7423619b4e8108914a9f31b426dd58ad591 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 19 Nov 2013 15:19:31 +0100
Subject: genetlink: only pass array to genl_register_family_with_ops()

As suggested by David Miller, make genl_register_family_with_ops()
a macro and pass only the array, evaluating ARRAY_SIZE() in the
macro, this is a little safer.

The openvswitch has some indirection, assing ops/n_ops directly in
that code. This might ultimately just assign the pointers in the
family initializations, saving the struct genl_family_and_ops and
code (once mcast groups are handled differently.)

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/team/team.c               |  3 +--
 drivers/net/wireless/mac80211_hwsim.c |  3 +--
 fs/dlm/netlink.c                      | 10 ++++++----
 include/linux/genl_magic_func.h       |  3 +--
 include/net/genetlink.h               |  8 ++++++--
 kernel/taskstats.c                    |  3 +--
 net/core/drop_monitor.c               |  3 +--
 net/hsr/hsr_netlink.c                 |  3 +--
 net/ieee802154/netlink.c              |  3 +--
 net/ipv4/tcp_metrics.c                |  3 +--
 net/irda/irnetlink.c                  |  3 +--
 net/l2tp/l2tp_netlink.c               |  7 +------
 net/netfilter/ipvs/ip_vs_ctl.c        |  2 +-
 net/netlabel/netlabel_cipso_v4.c      |  2 +-
 net/netlabel/netlabel_mgmt.c          |  2 +-
 net/netlabel/netlabel_unlabeled.c     |  2 +-
 net/netlink/genetlink.c               | 14 ++++++++------
 net/nfc/netlink.c                     |  3 +--
 net/openvswitch/datapath.c            |  5 +++--
 net/tipc/netlink.c                    | 11 ++++++-----
 net/wimax/stack.c                     |  4 ++--
 net/wireless/nl80211.c                |  3 +--
 22 files changed, 47 insertions(+), 53 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index 6390254beb7d..f55758b0840e 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -2699,8 +2699,7 @@ static int team_nl_init(void)
 {
 	int err;
 
-	err = genl_register_family_with_ops(&team_nl_family, team_nl_ops,
-					    ARRAY_SIZE(team_nl_ops));
+	err = genl_register_family_with_ops(&team_nl_family, team_nl_ops);
 	if (err)
 		return err;
 
diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index cfc3fda79a2d..9df7bc91a26f 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -2148,8 +2148,7 @@ static int hwsim_init_netlink(void)
 
 	printk(KERN_INFO "mac80211_hwsim: initializing netlink\n");
 
-	rc = genl_register_family_with_ops(&hwsim_genl_family,
-		hwsim_ops, ARRAY_SIZE(hwsim_ops));
+	rc = genl_register_family_with_ops(&hwsim_genl_family, hwsim_ops);
 	if (rc)
 		goto failure;
 
diff --git a/fs/dlm/netlink.c b/fs/dlm/netlink.c
index 60a327863b11..e7cfbaf8d0e2 100644
--- a/fs/dlm/netlink.c
+++ b/fs/dlm/netlink.c
@@ -74,14 +74,16 @@ static int user_cmd(struct sk_buff *skb, struct genl_info *info)
 	return 0;
 }
 
-static struct genl_ops dlm_nl_ops = {
-	.cmd		= DLM_CMD_HELLO,
-	.doit		= user_cmd,
+static struct genl_ops dlm_nl_ops[] = {
+	{
+		.cmd	= DLM_CMD_HELLO,
+		.doit	= user_cmd,
+	},
 };
 
 int __init dlm_netlink_init(void)
 {
-	return genl_register_family_with_ops(&family, &dlm_nl_ops, 1);
+	return genl_register_family_with_ops(&family, dlm_nl_ops);
 }
 
 void dlm_netlink_exit(void)
diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h
index 023bc346b877..47086030ab31 100644
--- a/include/linux/genl_magic_func.h
+++ b/include/linux/genl_magic_func.h
@@ -293,8 +293,7 @@ static int CONCAT_(GENL_MAGIC_FAMILY, _genl_multicast_ ## group)(	\
 
 int CONCAT_(GENL_MAGIC_FAMILY, _genl_register)(void)
 {
-	int err = genl_register_family_with_ops(&ZZZ_genl_family,
-		ZZZ_genl_ops, ARRAY_SIZE(ZZZ_genl_ops));
+	int err = genl_register_family_with_ops(&ZZZ_genl_family, ZZZ_genl_ops);
 	if (err)
 		return err;
 #undef GENL_mc_group
diff --git a/include/net/genetlink.h b/include/net/genetlink.h
index e96385d46b48..9bd52a4c5e17 100644
--- a/include/net/genetlink.h
+++ b/include/net/genetlink.h
@@ -152,8 +152,9 @@ static inline int genl_register_family(struct genl_family *family)
  *
  * Return 0 on success or a negative error code.
  */
-static inline int genl_register_family_with_ops(struct genl_family *family,
-	const struct genl_ops *ops, size_t n_ops)
+static inline int _genl_register_family_with_ops(struct genl_family *family,
+						 const struct genl_ops *ops,
+						 size_t n_ops)
 {
 	family->module = THIS_MODULE;
 	family->ops = ops;
@@ -161,6 +162,9 @@ static inline int genl_register_family_with_ops(struct genl_family *family,
 	return __genl_register_family(family);
 }
 
+#define genl_register_family_with_ops(family, ops)	\
+	_genl_register_family_with_ops((family), (ops), ARRAY_SIZE(ops))
+
 int genl_unregister_family(struct genl_family *family);
 int genl_register_mc_group(struct genl_family *family,
 			   struct genl_multicast_group *grp);
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index 76595cd9d211..13d2f7cd65db 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -703,8 +703,7 @@ static int __init taskstats_init(void)
 {
 	int rc;
 
-	rc = genl_register_family_with_ops(&family, taskstats_ops,
-					   ARRAY_SIZE(taskstats_ops));
+	rc = genl_register_family_with_ops(&family, taskstats_ops);
 	if (rc)
 		return rc;
 
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index f9fe2f22d20b..0efc5028ba9d 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -365,8 +365,7 @@ static int __init init_net_drop_monitor(void)
 	}
 
 	rc = genl_register_family_with_ops(&net_drop_monitor_family,
-					   dropmon_ops,
-					   ARRAY_SIZE(dropmon_ops));
+					   dropmon_ops);
 	if (rc) {
 		pr_err("Could not create drop monitor netlink family\n");
 		return rc;
diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c
index 3b9205d2afc4..f182260be76d 100644
--- a/net/hsr/hsr_netlink.c
+++ b/net/hsr/hsr_netlink.c
@@ -414,8 +414,7 @@ int __init hsr_netlink_init(void)
 	if (rc)
 		goto fail_rtnl_link_register;
 
-	rc = genl_register_family_with_ops(&hsr_genl_family, hsr_ops,
-					   ARRAY_SIZE(hsr_ops));
+	rc = genl_register_family_with_ops(&hsr_genl_family, hsr_ops);
 	if (rc)
 		goto fail_genl_register_family;
 
diff --git a/net/ieee802154/netlink.c b/net/ieee802154/netlink.c
index 3ffcdbb56aab..1a81709a4717 100644
--- a/net/ieee802154/netlink.c
+++ b/net/ieee802154/netlink.c
@@ -129,8 +129,7 @@ int __init ieee802154_nl_init(void)
 {
 	int rc;
 
-	rc = genl_register_family_with_ops(&nl802154_family, ieee8021154_ops,
-					   ARRAY_SIZE(ieee8021154_ops));
+	rc = genl_register_family_with_ops(&nl802154_family, ieee8021154_ops);
 	if (rc)
 		return rc;
 
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index 8c121b523eee..06493736fbc8 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -1082,8 +1082,7 @@ void __init tcp_metrics_init(void)
 	if (ret < 0)
 		goto cleanup;
 	ret = genl_register_family_with_ops(&tcp_metrics_nl_family,
-					    tcp_metrics_nl_ops,
-					    ARRAY_SIZE(tcp_metrics_nl_ops));
+					    tcp_metrics_nl_ops);
 	if (ret < 0)
 		goto cleanup_subsys;
 	return;
diff --git a/net/irda/irnetlink.c b/net/irda/irnetlink.c
index bf5d7d476dae..a37b81fe0479 100644
--- a/net/irda/irnetlink.c
+++ b/net/irda/irnetlink.c
@@ -149,8 +149,7 @@ static const struct genl_ops irda_nl_ops[] = {
 
 int irda_nl_register(void)
 {
-	return genl_register_family_with_ops(&irda_nl_family,
-		irda_nl_ops, ARRAY_SIZE(irda_nl_ops));
+	return genl_register_family_with_ops(&irda_nl_family, irda_nl_ops);
 }
 
 void irda_nl_unregister(void)
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index 57db66e24f1f..4cfd722e9153 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -887,13 +887,8 @@ EXPORT_SYMBOL_GPL(l2tp_nl_unregister_ops);
 
 static int l2tp_nl_init(void)
 {
-	int err;
-
 	pr_info("L2TP netlink interface\n");
-	err = genl_register_family_with_ops(&l2tp_nl_family, l2tp_nl_ops,
-					    ARRAY_SIZE(l2tp_nl_ops));
-
-	return err;
+	return genl_register_family_with_ops(&l2tp_nl_family, l2tp_nl_ops);
 }
 
 static void l2tp_nl_cleanup(void)
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index fc8a04ed8854..393498704691 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -3666,7 +3666,7 @@ static const struct genl_ops ip_vs_genl_ops[] __read_mostly = {
 static int __init ip_vs_genl_register(void)
 {
 	return genl_register_family_with_ops(&ip_vs_genl_family,
-		ip_vs_genl_ops, ARRAY_SIZE(ip_vs_genl_ops));
+					     ip_vs_genl_ops);
 }
 
 static void ip_vs_genl_unregister(void)
diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c
index 706691739b99..69345cebe3a3 100644
--- a/net/netlabel/netlabel_cipso_v4.c
+++ b/net/netlabel/netlabel_cipso_v4.c
@@ -783,5 +783,5 @@ static const struct genl_ops netlbl_cipsov4_ops[] = {
 int __init netlbl_cipsov4_genl_init(void)
 {
 	return genl_register_family_with_ops(&netlbl_cipsov4_gnl_family,
-		netlbl_cipsov4_ops, ARRAY_SIZE(netlbl_cipsov4_ops));
+					     netlbl_cipsov4_ops);
 }
diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c
index 7de6f660b80a..8ef83ee97c6a 100644
--- a/net/netlabel/netlabel_mgmt.c
+++ b/net/netlabel/netlabel_mgmt.c
@@ -779,5 +779,5 @@ static const struct genl_ops netlbl_mgmt_genl_ops[] = {
 int __init netlbl_mgmt_genl_init(void)
 {
 	return genl_register_family_with_ops(&netlbl_mgmt_gnl_family,
-		netlbl_mgmt_genl_ops, ARRAY_SIZE(netlbl_mgmt_genl_ops));
+					     netlbl_mgmt_genl_ops);
 }
diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
index 76ee9252daa7..43817d73ccf9 100644
--- a/net/netlabel/netlabel_unlabeled.c
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -1397,7 +1397,7 @@ static const struct genl_ops netlbl_unlabel_genl_ops[] = {
 int __init netlbl_unlabel_genl_init(void)
 {
 	return genl_register_family_with_ops(&netlbl_unlabel_gnl_family,
-		netlbl_unlabel_genl_ops, ARRAY_SIZE(netlbl_unlabel_genl_ops));
+					     netlbl_unlabel_genl_ops);
 }
 
 /*
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index f54215d7b8f3..c68ce73619b5 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -906,11 +906,13 @@ static int genl_ctrl_event(int event, void *data)
 	return 0;
 }
 
-static struct genl_ops genl_ctrl_ops = {
-	.cmd		= CTRL_CMD_GETFAMILY,
-	.doit		= ctrl_getfamily,
-	.dumpit		= ctrl_dumpfamily,
-	.policy		= ctrl_policy,
+static struct genl_ops genl_ctrl_ops[] = {
+	{
+		.cmd		= CTRL_CMD_GETFAMILY,
+		.doit		= ctrl_getfamily,
+		.dumpit		= ctrl_dumpfamily,
+		.policy		= ctrl_policy,
+	},
 };
 
 static struct genl_multicast_group notify_grp = {
@@ -954,7 +956,7 @@ static int __init genl_init(void)
 	for (i = 0; i < GENL_FAM_TAB_SIZE; i++)
 		INIT_LIST_HEAD(&family_ht[i]);
 
-	err = genl_register_family_with_ops(&genl_ctrl, &genl_ctrl_ops, 1);
+	err = genl_register_family_with_ops(&genl_ctrl, genl_ctrl_ops);
 	if (err < 0)
 		goto problem;
 
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index f5585611c098..fe6760d328a0 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -1536,8 +1536,7 @@ int __init nfc_genl_init(void)
 {
 	int rc;
 
-	rc = genl_register_family_with_ops(&nfc_genl_family, nfc_genl_ops,
-					   ARRAY_SIZE(nfc_genl_ops));
+	rc = genl_register_family_with_ops(&nfc_genl_family, nfc_genl_ops);
 	if (rc)
 		return rc;
 
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 91e1c927a465..8ec8b73033e0 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -1817,8 +1817,9 @@ static int dp_register_genl(void)
 	for (i = 0; i < ARRAY_SIZE(dp_genl_families); i++) {
 		const struct genl_family_and_ops *f = &dp_genl_families[i];
 
-		err = genl_register_family_with_ops(f->family, f->ops,
-						    f->n_ops);
+		f->family->ops = f->ops;
+		f->family->n_ops = f->n_ops;
+		err = genl_register_family(f->family);
 		if (err)
 			goto error;
 		n_registered++;
diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c
index 8bcd4985d0fb..9f72a6376362 100644
--- a/net/tipc/netlink.c
+++ b/net/tipc/netlink.c
@@ -76,9 +76,11 @@ static struct genl_family tipc_genl_family = {
 	.maxattr	= 0,
 };
 
-static struct genl_ops tipc_genl_ops = {
-	.cmd		= TIPC_GENL_CMD,
-	.doit		= handle_cmd,
+static struct genl_ops tipc_genl_ops[] = {
+	{
+		.cmd		= TIPC_GENL_CMD,
+		.doit		= handle_cmd,
+	},
 };
 
 static int tipc_genl_family_registered;
@@ -87,8 +89,7 @@ int tipc_netlink_start(void)
 {
 	int res;
 
-	res = genl_register_family_with_ops(&tipc_genl_family,
-		&tipc_genl_ops, 1);
+	res = genl_register_family_with_ops(&tipc_genl_family, tipc_genl_ops);
 	if (res) {
 		pr_err("Failed to register netlink interface\n");
 		return res;
diff --git a/net/wimax/stack.c b/net/wimax/stack.c
index 47170c9495f1..6328afe90319 100644
--- a/net/wimax/stack.c
+++ b/net/wimax/stack.c
@@ -597,8 +597,8 @@ int __init wimax_subsys_init(void)
 
 	snprintf(wimax_gnl_family.name, sizeof(wimax_gnl_family.name),
 		 "WiMAX");
-	result = genl_register_family_with_ops(&wimax_gnl_family, wimax_gnl_ops,
-					       ARRAY_SIZE(wimax_gnl_ops));
+	result = genl_register_family_with_ops(&wimax_gnl_family,
+					       wimax_gnl_ops);
 	if (unlikely(result < 0)) {
 		printk(KERN_ERR "cannot register generic netlink family: %d\n",
 		       result);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 58c43c8e149f..1b6c5dd4dccf 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -11329,8 +11329,7 @@ int nl80211_init(void)
 {
 	int err;
 
-	err = genl_register_family_with_ops(&nl80211_fam,
-		nl80211_ops, ARRAY_SIZE(nl80211_ops));
+	err = genl_register_family_with_ops(&nl80211_fam, nl80211_ops);
 	if (err)
 		return err;
 
-- 
cgit v1.2.3


From 68eb55031da7c967d954e5f9415cd05f4abdb692 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 19 Nov 2013 15:19:38 +0100
Subject: genetlink: pass family to functions using groups

This doesn't really change anything, but prepares for the
next patch that will change the APIs to pass the group ID
within the family, rather than the global group ID.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/acpi/event.c            |  3 +-
 drivers/net/team/team.c         |  5 +--
 drivers/scsi/pmcraid.c          |  3 +-
 drivers/thermal/thermal_core.c  |  3 +-
 fs/quota/netlink.c              |  2 +-
 include/linux/genl_magic_func.h |  3 +-
 include/net/genetlink.h         | 22 +++++++++----
 net/core/drop_monitor.c         |  3 +-
 net/hsr/hsr_netlink.c           |  6 ++--
 net/ieee802154/netlink.c        |  2 +-
 net/netlink/genetlink.c         | 12 ++++---
 net/nfc/netlink.c               | 39 +++++++++++++++--------
 net/openvswitch/datapath.c      | 35 +++++++++++++--------
 net/openvswitch/datapath.h      |  1 +
 net/openvswitch/dp_notify.c     |  5 +--
 net/wimax/op-msg.c              |  3 +-
 net/wimax/stack.c               |  3 +-
 net/wireless/nl80211.c          | 70 ++++++++++++++++++++---------------------
 18 files changed, 133 insertions(+), 87 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/event.c b/drivers/acpi/event.c
index 8247fcdde079..68a8755202ec 100644
--- a/drivers/acpi/event.c
+++ b/drivers/acpi/event.c
@@ -146,7 +146,8 @@ int acpi_bus_generate_netlink_event(const char *device_class,
 		return result;
 	}
 
-	genlmsg_multicast(skb, 0, acpi_event_mcgrp.id, GFP_ATOMIC);
+	genlmsg_multicast(&acpi_event_genl_family,
+			  skb, 0, acpi_event_mcgrp.id, GFP_ATOMIC);
 	return 0;
 }
 
diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index f55758b0840e..2721e29935a6 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -2677,8 +2677,9 @@ static struct genl_multicast_group team_change_event_mcgrp = {
 static int team_nl_send_multicast(struct sk_buff *skb,
 				  struct team *team, u32 portid)
 {
-	return genlmsg_multicast_netns(dev_net(team->dev), skb, 0,
-				       team_change_event_mcgrp.id, GFP_KERNEL);
+	return genlmsg_multicast_netns(&team_nl_family, dev_net(team->dev),
+				       skb, 0, team_change_event_mcgrp.id,
+				       GFP_KERNEL);
 }
 
 static int team_nl_send_event_options_get(struct team *team,
diff --git a/drivers/scsi/pmcraid.c b/drivers/scsi/pmcraid.c
index 1eb7b0280a45..2775441111ff 100644
--- a/drivers/scsi/pmcraid.c
+++ b/drivers/scsi/pmcraid.c
@@ -1512,7 +1512,8 @@ static int pmcraid_notify_aen(
 	}
 
 	result =
-		genlmsg_multicast(skb, 0, pmcraid_event_family.id, GFP_ATOMIC);
+		genlmsg_multicast(&pmcraid_event_family, skb, 0,
+				  pmcraid_event_family.id, GFP_ATOMIC);
 
 	/* If there are no listeners, genlmsg_multicast may return non-zero
 	 * value.
diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index 4962a6aaf295..2570a944fffc 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -1675,7 +1675,8 @@ int thermal_generate_netlink_event(struct thermal_zone_device *tz,
 		return result;
 	}
 
-	result = genlmsg_multicast(skb, 0, thermal_event_mcgrp.id, GFP_ATOMIC);
+	result = genlmsg_multicast(&thermal_event_genl_family, skb, 0,
+				   thermal_event_mcgrp.id, GFP_ATOMIC);
 	if (result)
 		dev_err(&tz->device, "Failed to send netlink event:%d", result);
 
diff --git a/fs/quota/netlink.c b/fs/quota/netlink.c
index aa22fe03b76c..a5b5eddf6603 100644
--- a/fs/quota/netlink.c
+++ b/fs/quota/netlink.c
@@ -88,7 +88,7 @@ void quota_send_warning(struct kqid qid, dev_t dev,
 		goto attr_err_out;
 	genlmsg_end(skb, msg_head);
 
-	genlmsg_multicast(skb, 0, quota_mcgrp.id, GFP_NOFS);
+	genlmsg_multicast(&quota_genl_family, skb, 0, quota_mcgrp.id, GFP_NOFS);
 	return;
 attr_err_out:
 	printk(KERN_ERR "VFS: Not enough space to compose quota message!\n");
diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h
index 47086030ab31..5b9b8ae6748b 100644
--- a/include/linux/genl_magic_func.h
+++ b/include/linux/genl_magic_func.h
@@ -286,7 +286,8 @@ static int CONCAT_(GENL_MAGIC_FAMILY, _genl_multicast_ ## group)(	\
 		CONCAT_(GENL_MAGIC_FAMILY, _mcg_ ## group).id;	\
 	if (!group_id)							\
 		return -EINVAL;						\
-	return genlmsg_multicast(skb, 0, group_id, flags);		\
+	return genlmsg_multicast(&ZZZ_genl_family, skb, 0,		\
+				 group_id, flags);			\
 }
 
 #include GENL_MAGIC_INCLUDE_FILE
diff --git a/include/net/genetlink.h b/include/net/genetlink.h
index 11ac77f6180c..60aef0df386b 100644
--- a/include/net/genetlink.h
+++ b/include/net/genetlink.h
@@ -166,7 +166,8 @@ static inline int _genl_register_family_with_ops(struct genl_family *family,
 int genl_unregister_family(struct genl_family *family);
 int genl_register_mc_group(struct genl_family *family,
 			   struct genl_multicast_group *grp);
-void genl_notify(struct sk_buff *skb, struct net *net, u32 portid,
+void genl_notify(struct genl_family *family,
+		 struct sk_buff *skb, struct net *net, u32 portid,
 		 u32 group, struct nlmsghdr *nlh, gfp_t flags);
 
 void *genlmsg_put(struct sk_buff *skb, u32 portid, u32 seq,
@@ -246,13 +247,15 @@ static inline void genlmsg_cancel(struct sk_buff *skb, void *hdr)
 
 /**
  * genlmsg_multicast_netns - multicast a netlink message to a specific netns
+ * @family: the generic netlink family
  * @net: the net namespace
  * @skb: netlink message as socket buffer
  * @portid: own netlink portid to avoid sending to yourself
  * @group: multicast group id
  * @flags: allocation flags
  */
-static inline int genlmsg_multicast_netns(struct net *net, struct sk_buff *skb,
+static inline int genlmsg_multicast_netns(struct genl_family *family,
+					  struct net *net, struct sk_buff *skb,
 					  u32 portid, unsigned int group, gfp_t flags)
 {
 	return nlmsg_multicast(net->genl_sock, skb, portid, group, flags);
@@ -260,19 +263,23 @@ static inline int genlmsg_multicast_netns(struct net *net, struct sk_buff *skb,
 
 /**
  * genlmsg_multicast - multicast a netlink message to the default netns
+ * @family: the generic netlink family
  * @skb: netlink message as socket buffer
  * @portid: own netlink portid to avoid sending to yourself
  * @group: multicast group id
  * @flags: allocation flags
  */
-static inline int genlmsg_multicast(struct sk_buff *skb, u32 portid,
+static inline int genlmsg_multicast(struct genl_family *family,
+				    struct sk_buff *skb, u32 portid,
 				    unsigned int group, gfp_t flags)
 {
-	return genlmsg_multicast_netns(&init_net, skb, portid, group, flags);
+	return genlmsg_multicast_netns(family, &init_net, skb,
+				       portid, group, flags);
 }
 
 /**
  * genlmsg_multicast_allns - multicast a netlink message to all net namespaces
+ * @family: the generic netlink family
  * @skb: netlink message as socket buffer
  * @portid: own netlink portid to avoid sending to yourself
  * @group: multicast group id
@@ -280,7 +287,8 @@ static inline int genlmsg_multicast(struct sk_buff *skb, u32 portid,
  *
  * This function must hold the RTNL or rcu_read_lock().
  */
-int genlmsg_multicast_allns(struct sk_buff *skb, u32 portid,
+int genlmsg_multicast_allns(struct genl_family *family,
+			    struct sk_buff *skb, u32 portid,
 			    unsigned int group, gfp_t flags);
 
 /**
@@ -353,6 +361,7 @@ static inline struct sk_buff *genlmsg_new(size_t payload, gfp_t flags)
 
 /**
  * genl_set_err - report error to genetlink broadcast listeners
+ * @family: the generic netlink family
  * @net: the network namespace to report the error to
  * @portid: the PORTID of a process that we want to skip (if any)
  * @group: the broadcast group that will notice the error
@@ -361,7 +370,8 @@ static inline struct sk_buff *genlmsg_new(size_t payload, gfp_t flags)
  * This function returns the number of broadcast listeners that have set the
  * NETLINK_RECV_NO_ENOBUFS socket option.
  */
-static inline int genl_set_err(struct net *net, u32 portid, u32 group, int code)
+static inline int genl_set_err(struct genl_family *family, struct net *net,
+			       u32 portid, u32 group, int code)
 {
 	return netlink_set_err(net->genl_sock, portid, group, code);
 }
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 46ee488c0015..1eab1dc48821 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -120,7 +120,8 @@ static void send_dm_alert(struct work_struct *work)
 	skb = reset_per_cpu_data(data);
 
 	if (skb)
-		genlmsg_multicast(skb, 0, dm_mcgrp.id, GFP_KERNEL);
+		genlmsg_multicast(&net_drop_monitor_family, skb, 0,
+				  dm_mcgrp.id, GFP_KERNEL);
 }
 
 /*
diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c
index 908e335dcdc9..0009416c08c2 100644
--- a/net/hsr/hsr_netlink.c
+++ b/net/hsr/hsr_netlink.c
@@ -129,7 +129,8 @@ void hsr_nl_ringerror(struct hsr_priv *hsr_priv, unsigned char addr[ETH_ALEN],
 		goto nla_put_failure;
 
 	genlmsg_end(skb, msg_head);
-	genlmsg_multicast(skb, 0, hsr_network_genl_mcgrp.id, GFP_ATOMIC);
+	genlmsg_multicast(&hsr_genl_family, skb, 0,
+			  hsr_network_genl_mcgrp.id, GFP_ATOMIC);
 
 	return;
 
@@ -163,7 +164,8 @@ void hsr_nl_nodedown(struct hsr_priv *hsr_priv, unsigned char addr[ETH_ALEN])
 		goto nla_put_failure;
 
 	genlmsg_end(skb, msg_head);
-	genlmsg_multicast(skb, 0, hsr_network_genl_mcgrp.id, GFP_ATOMIC);
+	genlmsg_multicast(&hsr_genl_family, skb, 0,
+			  hsr_network_genl_mcgrp.id, GFP_ATOMIC);
 
 	return;
 
diff --git a/net/ieee802154/netlink.c b/net/ieee802154/netlink.c
index 1a81709a4717..5172f467a383 100644
--- a/net/ieee802154/netlink.c
+++ b/net/ieee802154/netlink.c
@@ -70,7 +70,7 @@ int ieee802154_nl_mcast(struct sk_buff *msg, unsigned int group)
 	if (genlmsg_end(msg, hdr) < 0)
 		goto out;
 
-	return genlmsg_multicast(msg, 0, group, GFP_ATOMIC);
+	return genlmsg_multicast(&nl802154_family, msg, 0, group, GFP_ATOMIC);
 out:
 	nlmsg_free(msg);
 	return -ENOBUFS;
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index e9ef640200b4..36e3a86cacf6 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -883,11 +883,12 @@ static int genl_ctrl_event(int event, struct genl_family *family,
 		return PTR_ERR(msg);
 
 	if (!family->netnsok) {
-		genlmsg_multicast_netns(&init_net, msg, 0,
+		genlmsg_multicast_netns(&genl_ctrl, &init_net, msg, 0,
 					GENL_ID_CTRL, GFP_KERNEL);
 	} else {
 		rcu_read_lock();
-		genlmsg_multicast_allns(msg, 0, GENL_ID_CTRL, GFP_ATOMIC);
+		genlmsg_multicast_allns(&genl_ctrl, msg, 0,
+					GENL_ID_CTRL, GFP_ATOMIC);
 		rcu_read_unlock();
 	}
 
@@ -993,14 +994,15 @@ static int genlmsg_mcast(struct sk_buff *skb, u32 portid, unsigned long group,
 	return err;
 }
 
-int genlmsg_multicast_allns(struct sk_buff *skb, u32 portid, unsigned int group,
-			    gfp_t flags)
+int genlmsg_multicast_allns(struct genl_family *family, struct sk_buff *skb,
+			    u32 portid, unsigned int group, gfp_t flags)
 {
 	return genlmsg_mcast(skb, portid, group, flags);
 }
 EXPORT_SYMBOL(genlmsg_multicast_allns);
 
-void genl_notify(struct sk_buff *skb, struct net *net, u32 portid, u32 group,
+void genl_notify(struct genl_family *family,
+		 struct sk_buff *skb, struct net *net, u32 portid, u32 group,
 		 struct nlmsghdr *nlh, gfp_t flags)
 {
 	struct sock *sk = net->genl_sock;
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index fe6760d328a0..3092df313fb1 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -194,7 +194,8 @@ int nfc_genl_targets_found(struct nfc_dev *dev)
 
 	genlmsg_end(msg, hdr);
 
-	return genlmsg_multicast(msg, 0, nfc_genl_event_mcgrp.id, GFP_ATOMIC);
+	return genlmsg_multicast(&nfc_genl_family, msg, 0,
+				 nfc_genl_event_mcgrp.id, GFP_ATOMIC);
 
 nla_put_failure:
 	genlmsg_cancel(msg, hdr);
@@ -223,7 +224,8 @@ int nfc_genl_target_lost(struct nfc_dev *dev, u32 target_idx)
 
 	genlmsg_end(msg, hdr);
 
-	genlmsg_multicast(msg, 0, nfc_genl_event_mcgrp.id, GFP_KERNEL);
+	genlmsg_multicast(&nfc_genl_family, msg, 0,
+			  nfc_genl_event_mcgrp.id, GFP_KERNEL);
 
 	return 0;
 
@@ -255,7 +257,8 @@ int nfc_genl_tm_activated(struct nfc_dev *dev, u32 protocol)
 
 	genlmsg_end(msg, hdr);
 
-	genlmsg_multicast(msg, 0, nfc_genl_event_mcgrp.id, GFP_KERNEL);
+	genlmsg_multicast(&nfc_genl_family, msg, 0,
+			  nfc_genl_event_mcgrp.id, GFP_KERNEL);
 
 	return 0;
 
@@ -285,7 +288,8 @@ int nfc_genl_tm_deactivated(struct nfc_dev *dev)
 
 	genlmsg_end(msg, hdr);
 
-	genlmsg_multicast(msg, 0, nfc_genl_event_mcgrp.id, GFP_KERNEL);
+	genlmsg_multicast(&nfc_genl_family, msg, 0,
+			  nfc_genl_event_mcgrp.id, GFP_KERNEL);
 
 	return 0;
 
@@ -318,7 +322,8 @@ int nfc_genl_device_added(struct nfc_dev *dev)
 
 	genlmsg_end(msg, hdr);
 
-	genlmsg_multicast(msg, 0, nfc_genl_event_mcgrp.id, GFP_KERNEL);
+	genlmsg_multicast(&nfc_genl_family, msg, 0,
+			  nfc_genl_event_mcgrp.id, GFP_KERNEL);
 
 	return 0;
 
@@ -348,7 +353,8 @@ int nfc_genl_device_removed(struct nfc_dev *dev)
 
 	genlmsg_end(msg, hdr);
 
-	genlmsg_multicast(msg, 0, nfc_genl_event_mcgrp.id, GFP_KERNEL);
+	genlmsg_multicast(&nfc_genl_family, msg, 0,
+			  nfc_genl_event_mcgrp.id, GFP_KERNEL);
 
 	return 0;
 
@@ -414,7 +420,8 @@ int nfc_genl_llc_send_sdres(struct nfc_dev *dev, struct hlist_head *sdres_list)
 
 	genlmsg_end(msg, hdr);
 
-	return genlmsg_multicast(msg, 0, nfc_genl_event_mcgrp.id, GFP_ATOMIC);
+	return genlmsg_multicast(&nfc_genl_family, msg, 0,
+				 nfc_genl_event_mcgrp.id, GFP_ATOMIC);
 
 nla_put_failure:
 	genlmsg_cancel(msg, hdr);
@@ -448,7 +455,8 @@ int nfc_genl_se_added(struct nfc_dev *dev, u32 se_idx, u16 type)
 
 	genlmsg_end(msg, hdr);
 
-	genlmsg_multicast(msg, 0, nfc_genl_event_mcgrp.id, GFP_KERNEL);
+	genlmsg_multicast(&nfc_genl_family, msg, 0,
+			  nfc_genl_event_mcgrp.id, GFP_KERNEL);
 
 	return 0;
 
@@ -479,7 +487,8 @@ int nfc_genl_se_removed(struct nfc_dev *dev, u32 se_idx)
 
 	genlmsg_end(msg, hdr);
 
-	genlmsg_multicast(msg, 0, nfc_genl_event_mcgrp.id, GFP_KERNEL);
+	genlmsg_multicast(&nfc_genl_family, msg, 0,
+			  nfc_genl_event_mcgrp.id, GFP_KERNEL);
 
 	return 0;
 
@@ -600,7 +609,8 @@ int nfc_genl_dep_link_up_event(struct nfc_dev *dev, u32 target_idx,
 
 	dev->dep_link_up = true;
 
-	genlmsg_multicast(msg, 0, nfc_genl_event_mcgrp.id, GFP_ATOMIC);
+	genlmsg_multicast(&nfc_genl_family, msg, 0,
+			  nfc_genl_event_mcgrp.id, GFP_ATOMIC);
 
 	return 0;
 
@@ -632,7 +642,8 @@ int nfc_genl_dep_link_down_event(struct nfc_dev *dev)
 
 	genlmsg_end(msg, hdr);
 
-	genlmsg_multicast(msg, 0, nfc_genl_event_mcgrp.id, GFP_ATOMIC);
+	genlmsg_multicast(&nfc_genl_family, msg, 0,
+			  nfc_genl_event_mcgrp.id, GFP_ATOMIC);
 
 	return 0;
 
@@ -1137,7 +1148,8 @@ int nfc_genl_fw_download_done(struct nfc_dev *dev, const char *firmware_name,
 
 	genlmsg_end(msg, hdr);
 
-	genlmsg_multicast(msg, 0, nfc_genl_event_mcgrp.id, GFP_KERNEL);
+	genlmsg_multicast(&nfc_genl_family, msg, 0,
+			  nfc_genl_event_mcgrp.id, GFP_KERNEL);
 
 	return 0;
 
@@ -1308,7 +1320,8 @@ static void se_io_cb(void *context, u8 *apdu, size_t apdu_len, int err)
 
 	genlmsg_end(msg, hdr);
 
-	genlmsg_multicast(msg, 0, nfc_genl_event_mcgrp.id, GFP_KERNEL);
+	genlmsg_multicast(&nfc_genl_family, msg, 0,
+			  nfc_genl_event_mcgrp.id, GFP_KERNEL);
 
 	kfree(ctx);
 
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 3e2bb15fd717..5c19846b1d2a 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -61,10 +61,11 @@
 
 int ovs_net_id __read_mostly;
 
-static void ovs_notify(struct sk_buff *skb, struct genl_info *info,
+static void ovs_notify(struct genl_family *family,
+		       struct sk_buff *skb, struct genl_info *info,
 		       struct genl_multicast_group *grp)
 {
-	genl_notify(skb, genl_info_net(info), info->snd_portid,
+	genl_notify(family, skb, genl_info_net(info), info->snd_portid,
 		    grp->id, info->nlhdr, GFP_KERNEL);
 }
 
@@ -877,9 +878,10 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
 	ovs_unlock();
 
 	if (!IS_ERR(reply))
-		ovs_notify(reply, info, &ovs_dp_flow_multicast_group);
+		ovs_notify(&dp_flow_genl_family, reply, info,
+			   &ovs_dp_flow_multicast_group);
 	else
-		genl_set_err(sock_net(skb->sk), 0,
+		genl_set_err(&dp_flow_genl_family, sock_net(skb->sk), 0,
 			     ovs_dp_flow_multicast_group.id, PTR_ERR(reply));
 	return 0;
 
@@ -990,7 +992,8 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
 	ovs_flow_free(flow, true);
 	ovs_unlock();
 
-	ovs_notify(reply, info, &ovs_dp_flow_multicast_group);
+	ovs_notify(&dp_flow_genl_family, reply, info,
+		   &ovs_dp_flow_multicast_group);
 	return 0;
 unlock:
 	ovs_unlock();
@@ -1237,7 +1240,8 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
 
 	ovs_unlock();
 
-	ovs_notify(reply, info, &ovs_dp_datapath_multicast_group);
+	ovs_notify(&dp_datapath_genl_family, reply, info,
+		   &ovs_dp_datapath_multicast_group);
 	return 0;
 
 err_destroy_local_port:
@@ -1302,7 +1306,8 @@ static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
 	__dp_destroy(dp);
 	ovs_unlock();
 
-	ovs_notify(reply, info, &ovs_dp_datapath_multicast_group);
+	ovs_notify(&dp_datapath_genl_family, reply, info,
+		   &ovs_dp_datapath_multicast_group);
 
 	return 0;
 unlock:
@@ -1326,14 +1331,15 @@ static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
 				      info->snd_seq, OVS_DP_CMD_NEW);
 	if (IS_ERR(reply)) {
 		err = PTR_ERR(reply);
-		genl_set_err(sock_net(skb->sk), 0,
+		genl_set_err(&dp_datapath_genl_family, sock_net(skb->sk), 0,
 			     ovs_dp_datapath_multicast_group.id, err);
 		err = 0;
 		goto unlock;
 	}
 
 	ovs_unlock();
-	ovs_notify(reply, info, &ovs_dp_datapath_multicast_group);
+	ovs_notify(&dp_datapath_genl_family, reply, info,
+		   &ovs_dp_datapath_multicast_group);
 
 	return 0;
 unlock:
@@ -1425,7 +1431,7 @@ static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = {
 	[OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
 };
 
-static struct genl_family dp_vport_genl_family = {
+struct genl_family dp_vport_genl_family = {
 	.id = GENL_ID_GENERATE,
 	.hdrsize = sizeof(struct ovs_header),
 	.name = OVS_VPORT_FAMILY,
@@ -1595,7 +1601,8 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
 		goto exit_unlock;
 	}
 
-	ovs_notify(reply, info, &ovs_dp_vport_multicast_group);
+	ovs_notify(&dp_vport_genl_family, reply, info,
+		   &ovs_dp_vport_multicast_group);
 
 exit_unlock:
 	ovs_unlock();
@@ -1642,7 +1649,8 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
 	BUG_ON(err < 0);
 
 	ovs_unlock();
-	ovs_notify(reply, info, &ovs_dp_vport_multicast_group);
+	ovs_notify(&dp_vport_genl_family, reply, info,
+		   &ovs_dp_vport_multicast_group);
 	return 0;
 
 exit_free:
@@ -1679,7 +1687,8 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
 	err = 0;
 	ovs_dp_detach_port(vport);
 
-	ovs_notify(reply, info, &ovs_dp_vport_multicast_group);
+	ovs_notify(&dp_vport_genl_family, reply, info,
+		   &ovs_dp_vport_multicast_group);
 
 exit_unlock:
 	ovs_unlock();
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index d3d14a58aa91..4067ea41be28 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -177,6 +177,7 @@ static inline struct vport *ovs_vport_ovsl(const struct datapath *dp, int port_n
 }
 
 extern struct notifier_block ovs_dp_device_notifier;
+extern struct genl_family dp_vport_genl_family;
 extern struct genl_multicast_group ovs_dp_vport_multicast_group;
 
 void ovs_dp_process_received_packet(struct vport *, struct sk_buff *);
diff --git a/net/openvswitch/dp_notify.c b/net/openvswitch/dp_notify.c
index 3d55ead6095c..f4b66c84ea0b 100644
--- a/net/openvswitch/dp_notify.c
+++ b/net/openvswitch/dp_notify.c
@@ -34,13 +34,14 @@ static void dp_detach_port_notify(struct vport *vport)
 					  OVS_VPORT_CMD_DEL);
 	ovs_dp_detach_port(vport);
 	if (IS_ERR(notify)) {
-		genl_set_err(ovs_dp_get_net(dp), 0,
+		genl_set_err(&dp_vport_genl_family, ovs_dp_get_net(dp), 0,
 			     ovs_dp_vport_multicast_group.id,
 			     PTR_ERR(notify));
 		return;
 	}
 
-	genlmsg_multicast_netns(ovs_dp_get_net(dp), notify, 0,
+	genlmsg_multicast_netns(&dp_vport_genl_family,
+				ovs_dp_get_net(dp), notify, 0,
 				ovs_dp_vport_multicast_group.id,
 				GFP_KERNEL);
 }
diff --git a/net/wimax/op-msg.c b/net/wimax/op-msg.c
index ff19cbeaf607..f37dd3c5576d 100644
--- a/net/wimax/op-msg.c
+++ b/net/wimax/op-msg.c
@@ -279,7 +279,8 @@ int wimax_msg_send(struct wimax_dev *wimax_dev, struct sk_buff *skb)
 
 	d_printf(1, dev, "CTX: wimax msg, %zu bytes\n", size);
 	d_dump(2, dev, msg, size);
-	genlmsg_multicast(skb, 0, wimax_gnl_mcg.id, GFP_KERNEL);
+	genlmsg_multicast(&wimax_gnl_family, skb, 0,
+			  wimax_gnl_mcg.id, GFP_KERNEL);
 	d_printf(1, dev, "CTX: genl multicast done\n");
 	return 0;
 }
diff --git a/net/wimax/stack.c b/net/wimax/stack.c
index 6328afe90319..18888748e699 100644
--- a/net/wimax/stack.c
+++ b/net/wimax/stack.c
@@ -177,7 +177,8 @@ int wimax_gnl_re_state_change_send(
 		goto out;
 	}
 	genlmsg_end(report_skb, header);
-	genlmsg_multicast(report_skb, 0, wimax_gnl_mcg.id, GFP_KERNEL);
+	genlmsg_multicast(&wimax_gnl_family, report_skb, 0,
+			  wimax_gnl_mcg.id, GFP_KERNEL);
 out:
 	d_fnend(3, dev, "(wimax_dev %p report_skb %p) = %d\n",
 		wimax_dev, report_skb, result);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 1b6c5dd4dccf..f20edfd2e1f0 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -6868,7 +6868,7 @@ void cfg80211_testmode_event(struct sk_buff *skb, gfp_t gfp)
 
 	nla_nest_end(skb, data);
 	genlmsg_end(skb, hdr);
-	genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), skb, 0,
+	genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), skb, 0,
 				nl80211_testmode_mcgrp.id, gfp);
 }
 EXPORT_SYMBOL(cfg80211_testmode_event);
@@ -9597,7 +9597,7 @@ void nl80211_notify_dev_rename(struct cfg80211_registered_device *rdev)
 		return;
 	}
 
-	genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0,
+	genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
 				nl80211_config_mcgrp.id, GFP_KERNEL);
 }
 
@@ -9707,7 +9707,7 @@ void nl80211_send_scan_start(struct cfg80211_registered_device *rdev,
 		return;
 	}
 
-	genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0,
+	genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
 				nl80211_scan_mcgrp.id, GFP_KERNEL);
 }
 
@@ -9726,7 +9726,7 @@ void nl80211_send_scan_done(struct cfg80211_registered_device *rdev,
 		return;
 	}
 
-	genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0,
+	genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
 				nl80211_scan_mcgrp.id, GFP_KERNEL);
 }
 
@@ -9745,7 +9745,7 @@ void nl80211_send_scan_aborted(struct cfg80211_registered_device *rdev,
 		return;
 	}
 
-	genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0,
+	genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
 				nl80211_scan_mcgrp.id, GFP_KERNEL);
 }
 
@@ -9764,7 +9764,7 @@ void nl80211_send_sched_scan_results(struct cfg80211_registered_device *rdev,
 		return;
 	}
 
-	genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0,
+	genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
 				nl80211_scan_mcgrp.id, GFP_KERNEL);
 }
 
@@ -9782,7 +9782,7 @@ void nl80211_send_sched_scan(struct cfg80211_registered_device *rdev,
 		return;
 	}
 
-	genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0,
+	genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
 				nl80211_scan_mcgrp.id, GFP_KERNEL);
 }
 
@@ -9837,8 +9837,8 @@ void nl80211_send_reg_change_event(struct regulatory_request *request)
 	genlmsg_end(msg, hdr);
 
 	rcu_read_lock();
-	genlmsg_multicast_allns(msg, 0, nl80211_regulatory_mcgrp.id,
-				GFP_ATOMIC);
+	genlmsg_multicast_allns(&nl80211_fam, msg, 0,
+				nl80211_regulatory_mcgrp.id, GFP_ATOMIC);
 	rcu_read_unlock();
 
 	return;
@@ -9873,7 +9873,7 @@ static void nl80211_send_mlme_event(struct cfg80211_registered_device *rdev,
 
 	genlmsg_end(msg, hdr);
 
-	genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0,
+	genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
 				nl80211_mlme_mcgrp.id, gfp);
 	return;
 
@@ -9961,7 +9961,7 @@ static void nl80211_send_mlme_timeout(struct cfg80211_registered_device *rdev,
 
 	genlmsg_end(msg, hdr);
 
-	genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0,
+	genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
 				nl80211_mlme_mcgrp.id, gfp);
 	return;
 
@@ -10017,7 +10017,7 @@ void nl80211_send_connect_result(struct cfg80211_registered_device *rdev,
 
 	genlmsg_end(msg, hdr);
 
-	genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0,
+	genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
 				nl80211_mlme_mcgrp.id, gfp);
 	return;
 
@@ -10056,7 +10056,7 @@ void nl80211_send_roamed(struct cfg80211_registered_device *rdev,
 
 	genlmsg_end(msg, hdr);
 
-	genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0,
+	genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
 				nl80211_mlme_mcgrp.id, gfp);
 	return;
 
@@ -10094,7 +10094,7 @@ void nl80211_send_disconnected(struct cfg80211_registered_device *rdev,
 
 	genlmsg_end(msg, hdr);
 
-	genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0,
+	genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
 				nl80211_mlme_mcgrp.id, GFP_KERNEL);
 	return;
 
@@ -10128,7 +10128,7 @@ void nl80211_send_ibss_bssid(struct cfg80211_registered_device *rdev,
 
 	genlmsg_end(msg, hdr);
 
-	genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0,
+	genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
 				nl80211_mlme_mcgrp.id, gfp);
 	return;
 
@@ -10169,7 +10169,7 @@ void cfg80211_notify_new_peer_candidate(struct net_device *dev, const u8 *addr,
 
 	genlmsg_end(msg, hdr);
 
-	genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0,
+	genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
 				nl80211_mlme_mcgrp.id, gfp);
 	return;
 
@@ -10208,7 +10208,7 @@ void nl80211_michael_mic_failure(struct cfg80211_registered_device *rdev,
 
 	genlmsg_end(msg, hdr);
 
-	genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0,
+	genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
 				nl80211_mlme_mcgrp.id, gfp);
 	return;
 
@@ -10261,8 +10261,8 @@ void nl80211_send_beacon_hint_event(struct wiphy *wiphy,
 	genlmsg_end(msg, hdr);
 
 	rcu_read_lock();
-	genlmsg_multicast_allns(msg, 0, nl80211_regulatory_mcgrp.id,
-				GFP_ATOMIC);
+	genlmsg_multicast_allns(&nl80211_fam, msg, 0,
+				nl80211_regulatory_mcgrp.id, GFP_ATOMIC);
 	rcu_read_unlock();
 
 	return;
@@ -10307,7 +10307,7 @@ static void nl80211_send_remain_on_chan_event(
 
 	genlmsg_end(msg, hdr);
 
-	genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0,
+	genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
 				nl80211_mlme_mcgrp.id, gfp);
 	return;
 
@@ -10362,7 +10362,7 @@ void cfg80211_new_sta(struct net_device *dev, const u8 *mac_addr,
 		return;
 	}
 
-	genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0,
+	genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
 				nl80211_mlme_mcgrp.id, gfp);
 }
 EXPORT_SYMBOL(cfg80211_new_sta);
@@ -10392,7 +10392,7 @@ void cfg80211_del_sta(struct net_device *dev, const u8 *mac_addr, gfp_t gfp)
 
 	genlmsg_end(msg, hdr);
 
-	genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0,
+	genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
 				nl80211_mlme_mcgrp.id, gfp);
 	return;
 
@@ -10428,7 +10428,7 @@ void cfg80211_conn_failed(struct net_device *dev, const u8 *mac_addr,
 
 	genlmsg_end(msg, hdr);
 
-	genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0,
+	genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
 				nl80211_mlme_mcgrp.id, gfp);
 	return;
 
@@ -10590,7 +10590,7 @@ void cfg80211_mgmt_tx_status(struct wireless_dev *wdev, u64 cookie,
 
 	genlmsg_end(msg, hdr);
 
-	genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0,
+	genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
 				nl80211_mlme_mcgrp.id, gfp);
 	return;
 
@@ -10639,7 +10639,7 @@ void cfg80211_cqm_rssi_notify(struct net_device *dev,
 
 	genlmsg_end(msg, hdr);
 
-	genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0,
+	genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
 				nl80211_mlme_mcgrp.id, gfp);
 	return;
 
@@ -10684,7 +10684,7 @@ static void nl80211_gtk_rekey_notify(struct cfg80211_registered_device *rdev,
 
 	genlmsg_end(msg, hdr);
 
-	genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0,
+	genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
 				nl80211_mlme_mcgrp.id, gfp);
 	return;
 
@@ -10742,7 +10742,7 @@ nl80211_pmksa_candidate_notify(struct cfg80211_registered_device *rdev,
 
 	genlmsg_end(msg, hdr);
 
-	genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0,
+	genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
 				nl80211_mlme_mcgrp.id, gfp);
 	return;
 
@@ -10789,7 +10789,7 @@ static void nl80211_ch_switch_notify(struct cfg80211_registered_device *rdev,
 
 	genlmsg_end(msg, hdr);
 
-	genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0,
+	genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
 				nl80211_mlme_mcgrp.id, gfp);
 	return;
 
@@ -10866,7 +10866,7 @@ void cfg80211_cqm_txe_notify(struct net_device *dev,
 
 	genlmsg_end(msg, hdr);
 
-	genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0,
+	genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
 				nl80211_mlme_mcgrp.id, gfp);
 	return;
 
@@ -10915,7 +10915,7 @@ nl80211_radar_notify(struct cfg80211_registered_device *rdev,
 
 	genlmsg_end(msg, hdr);
 
-	genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0,
+	genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
 				nl80211_mlme_mcgrp.id, gfp);
 	return;
 
@@ -10962,7 +10962,7 @@ void cfg80211_cqm_pktloss_notify(struct net_device *dev,
 
 	genlmsg_end(msg, hdr);
 
-	genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0,
+	genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
 				nl80211_mlme_mcgrp.id, gfp);
 	return;
 
@@ -11002,7 +11002,7 @@ void cfg80211_probe_status(struct net_device *dev, const u8 *addr,
 
 	genlmsg_end(msg, hdr);
 
-	genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0,
+	genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
 				nl80211_mlme_mcgrp.id, gfp);
 	return;
 
@@ -11154,7 +11154,7 @@ void cfg80211_report_wowlan_wakeup(struct wireless_dev *wdev,
 
 	genlmsg_end(msg, hdr);
 
-	genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0,
+	genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
 				nl80211_mlme_mcgrp.id, gfp);
 	return;
 
@@ -11196,7 +11196,7 @@ void cfg80211_tdls_oper_request(struct net_device *dev, const u8 *peer,
 
 	genlmsg_end(msg, hdr);
 
-	genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0,
+	genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
 				nl80211_mlme_mcgrp.id, gfp);
 	return;
 
@@ -11279,7 +11279,7 @@ void cfg80211_ft_event(struct net_device *netdev,
 
 	genlmsg_end(msg, hdr);
 
-	genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0,
+	genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
 				nl80211_mlme_mcgrp.id, GFP_KERNEL);
 }
 EXPORT_SYMBOL(cfg80211_ft_event);
-- 
cgit v1.2.3


From 2a94fe48f32ccf7321450a2cc07f2b724a444e5b Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 19 Nov 2013 15:19:39 +0100
Subject: genetlink: make multicast groups const, prevent abuse

Register generic netlink multicast groups as an array with
the family and give them contiguous group IDs. Then instead
of passing the global group ID to the various functions that
send messages, pass the ID relative to the family - for most
families that's just 0 because the only have one group.

This avoids the list_head and ID in each group, adding a new
field for the mcast group ID offset to the family.

At the same time, this allows us to prevent abusing groups
again like the quota and dropmon code did, since we can now
check that a family only uses a group it owns.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/acpi/event.c            |  26 ++--
 drivers/net/team/team.c         |  25 +---
 drivers/thermal/thermal_core.c  |  24 ++--
 fs/quota/netlink.c              |  15 +--
 include/linux/genl_magic_func.h |  49 +++----
 include/net/genetlink.h         |  48 ++++---
 net/core/drop_monitor.c         |  18 +--
 net/hsr/hsr_netlink.c           |  19 +--
 net/ieee802154/ieee802154.h     |   6 +-
 net/ieee802154/netlink.c        |  26 ++--
 net/ieee802154/nl-mac.c         |  22 +---
 net/netlink/genetlink.c         | 278 ++++++++++++++++++++++++----------------
 net/nfc/netlink.c               |  51 +++-----
 net/openvswitch/datapath.c      |  43 +++----
 net/openvswitch/dp_notify.c     |   6 +-
 net/wimax/op-msg.c              |   3 +-
 net/wimax/stack.c               |  21 ++-
 net/wimax/wimax-internal.h      |   1 -
 net/wireless/nl80211.c          | 129 ++++++++-----------
 19 files changed, 377 insertions(+), 433 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/event.c b/drivers/acpi/event.c
index 68a8755202ec..aeb5aa6ce068 100644
--- a/drivers/acpi/event.c
+++ b/drivers/acpi/event.c
@@ -78,15 +78,17 @@ enum {
 #define ACPI_GENL_VERSION		0x01
 #define ACPI_GENL_MCAST_GROUP_NAME 	"acpi_mc_group"
 
+static const struct genl_multicast_group acpi_event_mcgrps[] = {
+	{ .name = ACPI_GENL_MCAST_GROUP_NAME, },
+};
+
 static struct genl_family acpi_event_genl_family = {
 	.id = GENL_ID_GENERATE,
 	.name = ACPI_GENL_FAMILY_NAME,
 	.version = ACPI_GENL_VERSION,
 	.maxattr = ACPI_GENL_ATTR_MAX,
-};
-
-static struct genl_multicast_group acpi_event_mcgrp = {
-	.name = ACPI_GENL_MCAST_GROUP_NAME,
+	.mcgrps = acpi_event_mcgrps,
+	.n_mcgrps = ARRAY_SIZE(acpi_event_mcgrps),
 };
 
 int acpi_bus_generate_netlink_event(const char *device_class,
@@ -146,8 +148,7 @@ int acpi_bus_generate_netlink_event(const char *device_class,
 		return result;
 	}
 
-	genlmsg_multicast(&acpi_event_genl_family,
-			  skb, 0, acpi_event_mcgrp.id, GFP_ATOMIC);
+	genlmsg_multicast(&acpi_event_genl_family, skb, 0, 0, GFP_ATOMIC);
 	return 0;
 }
 
@@ -155,18 +156,7 @@ EXPORT_SYMBOL(acpi_bus_generate_netlink_event);
 
 static int acpi_event_genetlink_init(void)
 {
-	int result;
-
-	result = genl_register_family(&acpi_event_genl_family);
-	if (result)
-		return result;
-
-	result = genl_register_mc_group(&acpi_event_genl_family,
-					&acpi_event_mcgrp);
-	if (result)
-		genl_unregister_family(&acpi_event_genl_family);
-
-	return result;
+	return genl_register_family(&acpi_event_genl_family);
 }
 
 #else
diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index 2721e29935a6..0715de50b3dc 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -2670,16 +2670,15 @@ static const struct genl_ops team_nl_ops[] = {
 	},
 };
 
-static struct genl_multicast_group team_change_event_mcgrp = {
-	.name = TEAM_GENL_CHANGE_EVENT_MC_GRP_NAME,
+static const struct genl_multicast_group team_nl_mcgrps[] = {
+	{ .name = TEAM_GENL_CHANGE_EVENT_MC_GRP_NAME, },
 };
 
 static int team_nl_send_multicast(struct sk_buff *skb,
 				  struct team *team, u32 portid)
 {
 	return genlmsg_multicast_netns(&team_nl_family, dev_net(team->dev),
-				       skb, 0, team_change_event_mcgrp.id,
-				       GFP_KERNEL);
+				       skb, 0, 0, GFP_KERNEL);
 }
 
 static int team_nl_send_event_options_get(struct team *team,
@@ -2698,22 +2697,8 @@ static int team_nl_send_event_port_get(struct team *team,
 
 static int team_nl_init(void)
 {
-	int err;
-
-	err = genl_register_family_with_ops(&team_nl_family, team_nl_ops);
-	if (err)
-		return err;
-
-	err = genl_register_mc_group(&team_nl_family, &team_change_event_mcgrp);
-	if (err)
-		goto err_change_event_grp_reg;
-
-	return 0;
-
-err_change_event_grp_reg:
-	genl_unregister_family(&team_nl_family);
-
-	return err;
+	return genl_register_family_with_ops_groups(&team_nl_family, team_nl_ops,
+						    team_nl_mcgrps);
 }
 
 static void team_nl_fini(void)
diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index 2570a944fffc..19edd6124ca3 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -1606,15 +1606,17 @@ exit:
 EXPORT_SYMBOL_GPL(thermal_zone_get_zone_by_name);
 
 #ifdef CONFIG_NET
+static const struct genl_multicast_group thermal_event_mcgrps[] = {
+	{ .name = THERMAL_GENL_MCAST_GROUP_NAME, },
+};
+
 static struct genl_family thermal_event_genl_family = {
 	.id = GENL_ID_GENERATE,
 	.name = THERMAL_GENL_FAMILY_NAME,
 	.version = THERMAL_GENL_VERSION,
 	.maxattr = THERMAL_GENL_ATTR_MAX,
-};
-
-static struct genl_multicast_group thermal_event_mcgrp = {
-	.name = THERMAL_GENL_MCAST_GROUP_NAME,
+	.mcgrps = thermal_event_mcgrps,
+	.n_mcgrps = ARRAY_SIZE(thermal_event_mcgrps),
 };
 
 int thermal_generate_netlink_event(struct thermal_zone_device *tz,
@@ -1676,7 +1678,7 @@ int thermal_generate_netlink_event(struct thermal_zone_device *tz,
 	}
 
 	result = genlmsg_multicast(&thermal_event_genl_family, skb, 0,
-				   thermal_event_mcgrp.id, GFP_ATOMIC);
+				   0, GFP_ATOMIC);
 	if (result)
 		dev_err(&tz->device, "Failed to send netlink event:%d", result);
 
@@ -1686,17 +1688,7 @@ EXPORT_SYMBOL_GPL(thermal_generate_netlink_event);
 
 static int genetlink_init(void)
 {
-	int result;
-
-	result = genl_register_family(&thermal_event_genl_family);
-	if (result)
-		return result;
-
-	result = genl_register_mc_group(&thermal_event_genl_family,
-					&thermal_event_mcgrp);
-	if (result)
-		genl_unregister_family(&thermal_event_genl_family);
-	return result;
+	return genl_register_family(&thermal_event_genl_family);
 }
 
 static void genetlink_exit(void)
diff --git a/fs/quota/netlink.c b/fs/quota/netlink.c
index a5b5eddf6603..72d29177998e 100644
--- a/fs/quota/netlink.c
+++ b/fs/quota/netlink.c
@@ -9,6 +9,10 @@
 #include <net/netlink.h>
 #include <net/genetlink.h>
 
+static const struct genl_multicast_group quota_mcgrps[] = {
+	{ .name = "events", },
+};
+
 /* Netlink family structure for quota */
 static struct genl_family quota_genl_family = {
 	/*
@@ -22,10 +26,8 @@ static struct genl_family quota_genl_family = {
 	.name = "VFS_DQUOT",
 	.version = 1,
 	.maxattr = QUOTA_NL_A_MAX,
-};
-
-static struct genl_multicast_group quota_mcgrp = {
-	.name = "events",
+	.mcgrps = quota_mcgrps,
+	.n_mcgrps = ARRAY_SIZE(quota_mcgrps),
 };
 
 /**
@@ -88,7 +90,7 @@ void quota_send_warning(struct kqid qid, dev_t dev,
 		goto attr_err_out;
 	genlmsg_end(skb, msg_head);
 
-	genlmsg_multicast(&quota_genl_family, skb, 0, quota_mcgrp.id, GFP_NOFS);
+	genlmsg_multicast(&quota_genl_family, skb, 0, 0, GFP_NOFS);
 	return;
 attr_err_out:
 	printk(KERN_ERR "VFS: Not enough space to compose quota message!\n");
@@ -102,9 +104,6 @@ static int __init quota_init(void)
 	if (genl_register_family(&quota_genl_family) != 0)
 		printk(KERN_ERR
 		       "VFS: Failed to create quota netlink interface.\n");
-	if (genl_register_mc_group(&quota_genl_family, &quota_mcgrp))
-		printk(KERN_ERR
-		       "VFS: Failed to register quota mcast group.\n");
 	return 0;
 };
 
diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h
index 5b9b8ae6748b..c0894dd8827b 100644
--- a/include/linux/genl_magic_func.h
+++ b/include/linux/genl_magic_func.h
@@ -273,49 +273,40 @@ static struct genl_family ZZZ_genl_family __read_mostly = {
  * Magic: define multicast groups
  * Magic: define multicast group registration helper
  */
+#define ZZZ_genl_mcgrps		CONCAT_(GENL_MAGIC_FAMILY, _genl_mcgrps)
+static const struct genl_multicast_group ZZZ_genl_mcgrps[] = {
+#undef GENL_mc_group
+#define GENL_mc_group(group) { .name = #group, },
+#include GENL_MAGIC_INCLUDE_FILE
+};
+
+enum CONCAT_(GENL_MAGIC_FAMILY, group_ids) {
+#undef GENL_mc_group
+#define GENL_mc_group(group) CONCAT_(GENL_MAGIC_FAMILY, _group_ ## group),
+#include GENL_MAGIC_INCLUDE_FILE
+};
+
 #undef GENL_mc_group
 #define GENL_mc_group(group)						\
-static struct genl_multicast_group					\
-CONCAT_(GENL_MAGIC_FAMILY, _mcg_ ## group) __read_mostly = {		\
-	.name = #group,							\
-};									\
 static int CONCAT_(GENL_MAGIC_FAMILY, _genl_multicast_ ## group)(	\
 	struct sk_buff *skb, gfp_t flags)				\
 {									\
 	unsigned int group_id =						\
-		CONCAT_(GENL_MAGIC_FAMILY, _mcg_ ## group).id;	\
-	if (!group_id)							\
-		return -EINVAL;						\
+		CONCAT_(GENL_MAGIC_FAMILY, _group_ ## group);		\
 	return genlmsg_multicast(&ZZZ_genl_family, skb, 0,		\
 				 group_id, flags);			\
 }
 
 #include GENL_MAGIC_INCLUDE_FILE
 
-int CONCAT_(GENL_MAGIC_FAMILY, _genl_register)(void)
-{
-	int err = genl_register_family_with_ops(&ZZZ_genl_family, ZZZ_genl_ops);
-	if (err)
-		return err;
-#undef GENL_mc_group
-#define GENL_mc_group(group)						\
-	err = genl_register_mc_group(&ZZZ_genl_family,			\
-		&CONCAT_(GENL_MAGIC_FAMILY, _mcg_ ## group));		\
-	if (err)							\
-		goto fail;						\
-	else								\
-		pr_info("%s: mcg %s: %u\n", #group,			\
-			__stringify(GENL_MAGIC_FAMILY),			\
-			CONCAT_(GENL_MAGIC_FAMILY, _mcg_ ## group).id);
-
-#include GENL_MAGIC_INCLUDE_FILE
-
 #undef GENL_mc_group
 #define GENL_mc_group(group)
-	return 0;
-fail:
-	genl_unregister_family(&ZZZ_genl_family);
-	return err;
+
+int CONCAT_(GENL_MAGIC_FAMILY, _genl_register)(void)
+{
+	return genl_register_family_with_ops_groups(&ZZZ_genl_family,	\
+						    ZZZ_genl_ops,	\
+						    ZZZ_genl_mcgrps);
 }
 
 void CONCAT_(GENL_MAGIC_FAMILY, _genl_unregister)(void)
diff --git a/include/net/genetlink.h b/include/net/genetlink.h
index 60aef0df386b..ace4abf118d7 100644
--- a/include/net/genetlink.h
+++ b/include/net/genetlink.h
@@ -10,14 +10,9 @@
 /**
  * struct genl_multicast_group - generic netlink multicast group
  * @name: name of the multicast group, names are per-family
- * @id: multicast group ID, assigned by the core, to use with
- *      genlmsg_multicast().
- * @list: list entry for linking
  */
 struct genl_multicast_group {
-	struct list_head	list;		/* private */
 	char			name[GENL_NAMSIZ];
-	u32			id;
 };
 
 struct genl_ops;
@@ -38,7 +33,9 @@ struct genl_info;
  *	undo operations done by pre_doit, for example release locks
  * @attrbuf: buffer to store parsed attributes
  * @family_list: family list
- * @mcast_groups: multicast groups list
+ * @mcgrps: multicast groups used by this family (private)
+ * @n_mcgrps: number of multicast groups (private)
+ * @mcgrp_offset: starting number of multicast group IDs in this family
  * @ops: the operations supported by this family (private)
  * @n_ops: number of operations supported by this family (private)
  */
@@ -58,9 +55,11 @@ struct genl_family {
 					     struct genl_info *info);
 	struct nlattr **	attrbuf;	/* private */
 	const struct genl_ops *	ops;		/* private */
+	const struct genl_multicast_group *mcgrps; /* private */
 	unsigned int		n_ops;		/* private */
+	unsigned int		n_mcgrps;	/* private */
+	unsigned int		mcgrp_offset;	/* private */
 	struct list_head	family_list;	/* private */
-	struct list_head	mcast_groups;	/* private */
 	struct module		*module;
 };
 
@@ -150,22 +149,30 @@ static inline int genl_register_family(struct genl_family *family)
  *
  * Return 0 on success or a negative error code.
  */
-static inline int _genl_register_family_with_ops(struct genl_family *family,
-						 const struct genl_ops *ops,
-						 size_t n_ops)
+static inline int
+_genl_register_family_with_ops_grps(struct genl_family *family,
+				    const struct genl_ops *ops, size_t n_ops,
+				    const struct genl_multicast_group *mcgrps,
+				    size_t n_mcgrps)
 {
 	family->module = THIS_MODULE;
 	family->ops = ops;
 	family->n_ops = n_ops;
+	family->mcgrps = mcgrps;
+	family->n_mcgrps = n_mcgrps;
 	return __genl_register_family(family);
 }
 
-#define genl_register_family_with_ops(family, ops)	\
-	_genl_register_family_with_ops((family), (ops), ARRAY_SIZE(ops))
+#define genl_register_family_with_ops(family, ops)			\
+	_genl_register_family_with_ops_grps((family),			\
+					    (ops), ARRAY_SIZE(ops),	\
+					    NULL, 0)
+#define genl_register_family_with_ops_groups(family, ops, grps)	\
+	_genl_register_family_with_ops_grps((family),			\
+					    (ops), ARRAY_SIZE(ops),	\
+					    (grps), ARRAY_SIZE(grps))
 
 int genl_unregister_family(struct genl_family *family);
-int genl_register_mc_group(struct genl_family *family,
-			   struct genl_multicast_group *grp);
 void genl_notify(struct genl_family *family,
 		 struct sk_buff *skb, struct net *net, u32 portid,
 		 u32 group, struct nlmsghdr *nlh, gfp_t flags);
@@ -251,13 +258,16 @@ static inline void genlmsg_cancel(struct sk_buff *skb, void *hdr)
  * @net: the net namespace
  * @skb: netlink message as socket buffer
  * @portid: own netlink portid to avoid sending to yourself
- * @group: multicast group id
+ * @group: offset of multicast group in groups array
  * @flags: allocation flags
  */
 static inline int genlmsg_multicast_netns(struct genl_family *family,
 					  struct net *net, struct sk_buff *skb,
 					  u32 portid, unsigned int group, gfp_t flags)
 {
+	if (group >= family->n_mcgrps)
+		return -EINVAL;
+	group = family->mcgrp_offset + group;
 	return nlmsg_multicast(net->genl_sock, skb, portid, group, flags);
 }
 
@@ -266,13 +276,16 @@ static inline int genlmsg_multicast_netns(struct genl_family *family,
  * @family: the generic netlink family
  * @skb: netlink message as socket buffer
  * @portid: own netlink portid to avoid sending to yourself
- * @group: multicast group id
+ * @group: offset of multicast group in groups array
  * @flags: allocation flags
  */
 static inline int genlmsg_multicast(struct genl_family *family,
 				    struct sk_buff *skb, u32 portid,
 				    unsigned int group, gfp_t flags)
 {
+	if (group >= family->n_mcgrps)
+		return -EINVAL;
+	group = family->mcgrp_offset + group;
 	return genlmsg_multicast_netns(family, &init_net, skb,
 				       portid, group, flags);
 }
@@ -282,7 +295,7 @@ static inline int genlmsg_multicast(struct genl_family *family,
  * @family: the generic netlink family
  * @skb: netlink message as socket buffer
  * @portid: own netlink portid to avoid sending to yourself
- * @group: multicast group id
+ * @group: offset of multicast group in groups array
  * @flags: allocation flags
  *
  * This function must hold the RTNL or rcu_read_lock().
@@ -365,6 +378,7 @@ static inline struct sk_buff *genlmsg_new(size_t payload, gfp_t flags)
  * @net: the network namespace to report the error to
  * @portid: the PORTID of a process that we want to skip (if any)
  * @group: the broadcast group that will notice the error
+ * 	(this is the offset of the multicast group in the groups array)
  * @code: error code, must be negative (as usual in kernelspace)
  *
  * This function returns the number of broadcast listeners that have set the
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 1eab1dc48821..95897183226e 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -106,8 +106,8 @@ static struct sk_buff *reset_per_cpu_data(struct per_cpu_dm_data *data)
 	return skb;
 }
 
-static struct genl_multicast_group dm_mcgrp = {
-	.name = "events",
+static struct genl_multicast_group dropmon_mcgrps[] = {
+	{ .name = "events", },
 };
 
 static void send_dm_alert(struct work_struct *work)
@@ -121,7 +121,7 @@ static void send_dm_alert(struct work_struct *work)
 
 	if (skb)
 		genlmsg_multicast(&net_drop_monitor_family, skb, 0,
-				  dm_mcgrp.id, GFP_KERNEL);
+				  0, GFP_KERNEL);
 }
 
 /*
@@ -369,19 +369,13 @@ static int __init init_net_drop_monitor(void)
 		return -ENOSPC;
 	}
 
-	rc = genl_register_family_with_ops(&net_drop_monitor_family,
-					   dropmon_ops);
+	rc = genl_register_family_with_ops_groups(&net_drop_monitor_family,
+						  dropmon_ops, dropmon_mcgrps);
 	if (rc) {
 		pr_err("Could not create drop monitor netlink family\n");
 		return rc;
 	}
-
-	rc = genl_register_mc_group(&net_drop_monitor_family, &dm_mcgrp);
-	if (rc) {
-		pr_err("Failed to register drop monitor mcast group\n");
-		goto out_unreg;
-	}
-	WARN_ON(dm_mcgrp.id != NET_DM_GRP_ALERT);
+	WARN_ON(net_drop_monitor_family.mcgrp_offset != NET_DM_GRP_ALERT);
 
 	rc = register_netdevice_notifier(&dropmon_net_notifier);
 	if (rc < 0) {
diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c
index 0009416c08c2..5325af85eea6 100644
--- a/net/hsr/hsr_netlink.c
+++ b/net/hsr/hsr_netlink.c
@@ -90,8 +90,8 @@ static struct genl_family hsr_genl_family = {
 	.maxattr = HSR_A_MAX,
 };
 
-static struct genl_multicast_group hsr_network_genl_mcgrp = {
-	.name = "hsr-network",
+static const struct genl_multicast_group hsr_mcgrps[] = {
+	{ .name = "hsr-network", },
 };
 
 
@@ -129,8 +129,7 @@ void hsr_nl_ringerror(struct hsr_priv *hsr_priv, unsigned char addr[ETH_ALEN],
 		goto nla_put_failure;
 
 	genlmsg_end(skb, msg_head);
-	genlmsg_multicast(&hsr_genl_family, skb, 0,
-			  hsr_network_genl_mcgrp.id, GFP_ATOMIC);
+	genlmsg_multicast(&hsr_genl_family, skb, 0, 0, GFP_ATOMIC);
 
 	return;
 
@@ -164,8 +163,7 @@ void hsr_nl_nodedown(struct hsr_priv *hsr_priv, unsigned char addr[ETH_ALEN])
 		goto nla_put_failure;
 
 	genlmsg_end(skb, msg_head);
-	genlmsg_multicast(&hsr_genl_family, skb, 0,
-			  hsr_network_genl_mcgrp.id, GFP_ATOMIC);
+	genlmsg_multicast(&hsr_genl_family, skb, 0, 0, GFP_ATOMIC);
 
 	return;
 
@@ -416,18 +414,13 @@ int __init hsr_netlink_init(void)
 	if (rc)
 		goto fail_rtnl_link_register;
 
-	rc = genl_register_family_with_ops(&hsr_genl_family, hsr_ops);
+	rc = genl_register_family_with_ops_groups(&hsr_genl_family, hsr_ops,
+						  hsr_mcgrps);
 	if (rc)
 		goto fail_genl_register_family;
 
-	rc = genl_register_mc_group(&hsr_genl_family, &hsr_network_genl_mcgrp);
-	if (rc)
-		goto fail_genl_register_mc_group;
-
 	return 0;
 
-fail_genl_register_mc_group:
-	genl_unregister_family(&hsr_genl_family);
 fail_genl_register_family:
 	rtnl_link_unregister(&hsr_link_ops);
 fail_rtnl_link_register:
diff --git a/net/ieee802154/ieee802154.h b/net/ieee802154/ieee802154.h
index 14d5dab4436f..cee4425b9956 100644
--- a/net/ieee802154/ieee802154.h
+++ b/net/ieee802154/ieee802154.h
@@ -54,8 +54,10 @@ int ieee802154_dump_phy(struct sk_buff *skb, struct netlink_callback *cb);
 int ieee802154_add_iface(struct sk_buff *skb, struct genl_info *info);
 int ieee802154_del_iface(struct sk_buff *skb, struct genl_info *info);
 
-extern struct genl_multicast_group ieee802154_coord_mcgrp;
-extern struct genl_multicast_group ieee802154_beacon_mcgrp;
+enum ieee802154_mcgrp_ids {
+	IEEE802154_COORD_MCGRP,
+	IEEE802154_BEACON_MCGRP,
+};
 
 int ieee802154_associate_req(struct sk_buff *skb, struct genl_info *info);
 int ieee802154_associate_resp(struct sk_buff *skb, struct genl_info *info);
diff --git a/net/ieee802154/netlink.c b/net/ieee802154/netlink.c
index 5172f467a383..43f1b2bf469f 100644
--- a/net/ieee802154/netlink.c
+++ b/net/ieee802154/netlink.c
@@ -125,25 +125,17 @@ static const struct genl_ops ieee8021154_ops[] = {
 			ieee802154_dump_iface),
 };
 
-int __init ieee802154_nl_init(void)
-{
-	int rc;
-
-	rc = genl_register_family_with_ops(&nl802154_family, ieee8021154_ops);
-	if (rc)
-		return rc;
+static const struct genl_multicast_group ieee802154_mcgrps[] = {
+	[IEEE802154_COORD_MCGRP] = { .name = IEEE802154_MCAST_COORD_NAME, },
+	[IEEE802154_BEACON_MCGRP] = { .name = IEEE802154_MCAST_BEACON_NAME, },
+};
 
-	rc = genl_register_mc_group(&nl802154_family, &ieee802154_coord_mcgrp);
-	if (rc)
-		goto fail;
 
-	rc = genl_register_mc_group(&nl802154_family, &ieee802154_beacon_mcgrp);
-	if (rc)
-		goto fail;
-	return 0;
-fail:
-	genl_unregister_family(&nl802154_family);
-	return rc;
+int __init ieee802154_nl_init(void)
+{
+	return genl_register_family_with_ops_groups(&nl802154_family,
+						    ieee8021154_ops,
+						    ieee802154_mcgrps);
 }
 
 void __exit ieee802154_nl_exit(void)
diff --git a/net/ieee802154/nl-mac.c b/net/ieee802154/nl-mac.c
index 28d493032132..ba5c1e002f37 100644
--- a/net/ieee802154/nl-mac.c
+++ b/net/ieee802154/nl-mac.c
@@ -39,14 +39,6 @@
 
 #include "ieee802154.h"
 
-struct genl_multicast_group ieee802154_coord_mcgrp = {
-	.name		= IEEE802154_MCAST_COORD_NAME,
-};
-
-struct genl_multicast_group ieee802154_beacon_mcgrp = {
-	.name		= IEEE802154_MCAST_BEACON_NAME,
-};
-
 int ieee802154_nl_assoc_indic(struct net_device *dev,
 		struct ieee802154_addr *addr, u8 cap)
 {
@@ -72,7 +64,7 @@ int ieee802154_nl_assoc_indic(struct net_device *dev,
 	    nla_put_u8(msg, IEEE802154_ATTR_CAPABILITY, cap))
 		goto nla_put_failure;
 
-	return ieee802154_nl_mcast(msg, ieee802154_coord_mcgrp.id);
+	return ieee802154_nl_mcast(msg, IEEE802154_COORD_MCGRP);
 
 nla_put_failure:
 	nlmsg_free(msg);
@@ -98,7 +90,7 @@ int ieee802154_nl_assoc_confirm(struct net_device *dev, u16 short_addr,
 	    nla_put_u16(msg, IEEE802154_ATTR_SHORT_ADDR, short_addr) ||
 	    nla_put_u8(msg, IEEE802154_ATTR_STATUS, status))
 		goto nla_put_failure;
-	return ieee802154_nl_mcast(msg, ieee802154_coord_mcgrp.id);
+	return ieee802154_nl_mcast(msg, IEEE802154_COORD_MCGRP);
 
 nla_put_failure:
 	nlmsg_free(msg);
@@ -133,7 +125,7 @@ int ieee802154_nl_disassoc_indic(struct net_device *dev,
 	}
 	if (nla_put_u8(msg, IEEE802154_ATTR_REASON, reason))
 		goto nla_put_failure;
-	return ieee802154_nl_mcast(msg, ieee802154_coord_mcgrp.id);
+	return ieee802154_nl_mcast(msg, IEEE802154_COORD_MCGRP);
 
 nla_put_failure:
 	nlmsg_free(msg);
@@ -157,7 +149,7 @@ int ieee802154_nl_disassoc_confirm(struct net_device *dev, u8 status)
 		    dev->dev_addr) ||
 	    nla_put_u8(msg, IEEE802154_ATTR_STATUS, status))
 		goto nla_put_failure;
-	return ieee802154_nl_mcast(msg, ieee802154_coord_mcgrp.id);
+	return ieee802154_nl_mcast(msg, IEEE802154_COORD_MCGRP);
 
 nla_put_failure:
 	nlmsg_free(msg);
@@ -183,7 +175,7 @@ int ieee802154_nl_beacon_indic(struct net_device *dev,
 	    nla_put_u16(msg, IEEE802154_ATTR_COORD_SHORT_ADDR, coord_addr) ||
 	    nla_put_u16(msg, IEEE802154_ATTR_COORD_PAN_ID, panid))
 		goto nla_put_failure;
-	return ieee802154_nl_mcast(msg, ieee802154_coord_mcgrp.id);
+	return ieee802154_nl_mcast(msg, IEEE802154_COORD_MCGRP);
 
 nla_put_failure:
 	nlmsg_free(msg);
@@ -214,7 +206,7 @@ int ieee802154_nl_scan_confirm(struct net_device *dev,
 	    (edl &&
 	     nla_put(msg, IEEE802154_ATTR_ED_LIST, 27, edl)))
 		goto nla_put_failure;
-	return ieee802154_nl_mcast(msg, ieee802154_coord_mcgrp.id);
+	return ieee802154_nl_mcast(msg, IEEE802154_COORD_MCGRP);
 
 nla_put_failure:
 	nlmsg_free(msg);
@@ -238,7 +230,7 @@ int ieee802154_nl_start_confirm(struct net_device *dev, u8 status)
 		    dev->dev_addr) ||
 	    nla_put_u8(msg, IEEE802154_ATTR_STATUS, status))
 		goto nla_put_failure;
-	return ieee802154_nl_mcast(msg, ieee802154_coord_mcgrp.id);
+	return ieee802154_nl_mcast(msg, IEEE802154_COORD_MCGRP);
 
 nla_put_failure:
 	nlmsg_free(msg);
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 36e3a86cacf6..7dbc4f732c75 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -69,16 +69,20 @@ static struct list_head family_ht[GENL_FAM_TAB_SIZE];
  * abuses the API and thinks it can statically use group 1.
  * That group will typically conflict with other groups that
  * any proper users use.
+ * Bit 16 is marked as used since it's used for generic netlink
+ * and the code no longer marks pre-reserved IDs as used.
  * Bit 17 is marked as already used since the VFS quota code
  * also abused this API and relied on family == group ID, we
  * cater to that by giving it a static family and group ID.
  */
-static unsigned long mc_group_start = 0x3 | BIT(GENL_ID_VFS_DQUOT);
+static unsigned long mc_group_start = 0x3 | BIT(GENL_ID_CTRL) |
+				      BIT(GENL_ID_VFS_DQUOT);
 static unsigned long *mc_groups = &mc_group_start;
 static unsigned long mc_groups_longs = 1;
 
 static int genl_ctrl_event(int event, struct genl_family *family,
-			   struct genl_multicast_group *grp);
+			   const struct genl_multicast_group *grp,
+			   int grp_id);
 
 static inline unsigned int genl_family_hash(unsigned int id)
 {
@@ -144,66 +148,110 @@ static u16 genl_generate_id(void)
 	return 0;
 }
 
-static struct genl_multicast_group notify_grp;
-
-/**
- * genl_register_mc_group - register a multicast group
- *
- * Registers the specified multicast group and notifies userspace
- * about the new group.
- *
- * Returns 0 on success or a negative error code.
- *
- * @family: The generic netlink family the group shall be registered for.
- * @grp: The group to register, must have a name.
- */
-int genl_register_mc_group(struct genl_family *family,
-			   struct genl_multicast_group *grp)
+static int genl_allocate_reserve_groups(int n_groups, int *first_id)
 {
-	int id;
 	unsigned long *new_groups;
-	int err = 0;
+	int start = 0;
+	int i;
+	int id;
+	bool fits;
+
+	do {
+		if (start == 0)
+			id = find_first_zero_bit(mc_groups,
+						 mc_groups_longs *
+						 BITS_PER_LONG);
+		else
+			id = find_next_zero_bit(mc_groups,
+						mc_groups_longs * BITS_PER_LONG,
+						start);
+
+		fits = true;
+		for (i = id;
+		     i < min_t(int, id + n_groups,
+			       mc_groups_longs * BITS_PER_LONG);
+		     i++) {
+			if (test_bit(i, mc_groups)) {
+				start = i;
+				fits = false;
+				break;
+			}
+		}
 
-	BUG_ON(grp->name[0] == '\0');
-	BUG_ON(memchr(grp->name, '\0', GENL_NAMSIZ) == NULL);
+		if (id >= mc_groups_longs * BITS_PER_LONG) {
+			unsigned long new_longs = mc_groups_longs +
+						  BITS_TO_LONGS(n_groups);
+			size_t nlen = new_longs * sizeof(unsigned long);
+
+			if (mc_groups == &mc_group_start) {
+				new_groups = kzalloc(nlen, GFP_KERNEL);
+				if (!new_groups)
+					return -ENOMEM;
+				mc_groups = new_groups;
+				*mc_groups = mc_group_start;
+			} else {
+				new_groups = krealloc(mc_groups, nlen,
+						      GFP_KERNEL);
+				if (!new_groups)
+					return -ENOMEM;
+				mc_groups = new_groups;
+				for (i = 0; i < BITS_TO_LONGS(n_groups); i++)
+					mc_groups[mc_groups_longs + i] = 0;
+			}
+			mc_groups_longs = new_longs;
+		}
+	} while (!fits);
 
-	genl_lock_all();
+	for (i = id; i < id + n_groups; i++)
+		set_bit(i, mc_groups);
+	*first_id = id;
+	return 0;
+}
+
+static struct genl_family genl_ctrl;
+
+static int genl_validate_assign_mc_groups(struct genl_family *family)
+{
+	int first_id;
+	int n_groups = family->n_mcgrps;
+	int err, i;
+	bool groups_allocated = false;
+
+	if (!n_groups)
+		return 0;
+
+	for (i = 0; i < n_groups; i++) {
+		const struct genl_multicast_group *grp = &family->mcgrps[i];
+
+		if (WARN_ON(grp->name[0] == '\0'))
+			return -EINVAL;
+		if (WARN_ON(memchr(grp->name, '\0', GENL_NAMSIZ) == NULL))
+			return -EINVAL;
+	}
 
 	/* special-case our own group and hacks */
-	if (grp == &notify_grp)
-		id = GENL_ID_CTRL;
-	else if (strcmp(family->name, "NET_DM") == 0)
-		id = 1;
-	else if (strcmp(family->name, "VFS_DQUOT") == 0)
-		id = GENL_ID_VFS_DQUOT;
-	else
-		id = find_first_zero_bit(mc_groups,
-					 mc_groups_longs * BITS_PER_LONG);
-
-
-	if (id >= mc_groups_longs * BITS_PER_LONG) {
-		size_t nlen = (mc_groups_longs + 1) * sizeof(unsigned long);
-
-		if (mc_groups == &mc_group_start) {
-			new_groups = kzalloc(nlen, GFP_KERNEL);
-			if (!new_groups) {
-				err = -ENOMEM;
-				goto out;
-			}
-			mc_groups = new_groups;
-			*mc_groups = mc_group_start;
-		} else {
-			new_groups = krealloc(mc_groups, nlen, GFP_KERNEL);
-			if (!new_groups) {
-				err = -ENOMEM;
-				goto out;
-			}
-			mc_groups = new_groups;
-			mc_groups[mc_groups_longs] = 0;
-		}
-		mc_groups_longs++;
+	if (family == &genl_ctrl) {
+		first_id = GENL_ID_CTRL;
+		BUG_ON(n_groups != 1);
+	} else if (strcmp(family->name, "NET_DM") == 0) {
+		first_id = 1;
+		BUG_ON(n_groups != 1);
+	} else if (strcmp(family->name, "VFS_DQUOT") == 0) {
+		first_id = GENL_ID_VFS_DQUOT;
+		BUG_ON(n_groups != 1);
+	} else {
+		groups_allocated = true;
+		err = genl_allocate_reserve_groups(n_groups, &first_id);
+		if (err)
+			return err;
 	}
 
+	family->mcgrp_offset = first_id;
+
+	/* if still initializing, can't and don't need to to realloc bitmaps */
+	if (!init_net.genl_sock)
+		return 0;
+
 	if (family->netnsok) {
 		struct net *net;
 
@@ -219,9 +267,7 @@ int genl_register_mc_group(struct genl_family *family,
 				 * number of _possible_ groups has been
 				 * increased on some sockets which is ok.
 				 */
-				rcu_read_unlock();
-				netlink_table_ungrab();
-				goto out;
+				break;
 			}
 		}
 		rcu_read_unlock();
@@ -229,46 +275,39 @@ int genl_register_mc_group(struct genl_family *family,
 	} else {
 		err = netlink_change_ngroups(init_net.genl_sock,
 					     mc_groups_longs * BITS_PER_LONG);
-		if (err)
-			goto out;
 	}
 
-	grp->id = id;
-	set_bit(id, mc_groups);
-	list_add_tail(&grp->list, &family->mcast_groups);
+	if (groups_allocated && err) {
+		for (i = 0; i < family->n_mcgrps; i++)
+			clear_bit(family->mcgrp_offset + i, mc_groups);
+	}
 
-	genl_ctrl_event(CTRL_CMD_NEWMCAST_GRP, family, grp);
- out:
-	genl_unlock_all();
 	return err;
 }
-EXPORT_SYMBOL(genl_register_mc_group);
 
-static void __genl_unregister_mc_group(struct genl_family *family,
-				       struct genl_multicast_group *grp)
+static void genl_unregister_mc_groups(struct genl_family *family)
 {
 	struct net *net;
+	int i;
 
 	netlink_table_grab();
 	rcu_read_lock();
-	for_each_net_rcu(net)
-		__netlink_clear_multicast_users(net->genl_sock, grp->id);
+	for_each_net_rcu(net) {
+		for (i = 0; i < family->n_mcgrps; i++)
+			__netlink_clear_multicast_users(
+				net->genl_sock, family->mcgrp_offset + i);
+	}
 	rcu_read_unlock();
 	netlink_table_ungrab();
 
-	if (grp->id != 1)
-		clear_bit(grp->id, mc_groups);
-	list_del(&grp->list);
-	genl_ctrl_event(CTRL_CMD_DELMCAST_GRP, family, grp);
-	grp->id = 0;
-}
-
-static void genl_unregister_mc_groups(struct genl_family *family)
-{
-	struct genl_multicast_group *grp, *tmp;
+	for (i = 0; i < family->n_mcgrps; i++) {
+		int grp_id = family->mcgrp_offset + i;
 
-	list_for_each_entry_safe(grp, tmp, &family->mcast_groups, list)
-		__genl_unregister_mc_group(family, grp);
+		if (grp_id != 1)
+			clear_bit(grp_id, mc_groups);
+		genl_ctrl_event(CTRL_CMD_DELMCAST_GRP, family,
+				&family->mcgrps[i], grp_id);
+	}
 }
 
 static int genl_validate_ops(struct genl_family *family)
@@ -314,7 +353,7 @@ static int genl_validate_ops(struct genl_family *family)
  */
 int __genl_register_family(struct genl_family *family)
 {
-	int err = -EINVAL;
+	int err = -EINVAL, i;
 
 	if (family->id && family->id < GENL_MIN_ID)
 		goto errout;
@@ -326,8 +365,6 @@ int __genl_register_family(struct genl_family *family)
 	if (err)
 		return err;
 
-	INIT_LIST_HEAD(&family->mcast_groups);
-
 	genl_lock_all();
 
 	if (genl_family_find_byname(family->name)) {
@@ -359,10 +396,18 @@ int __genl_register_family(struct genl_family *family)
 	} else
 		family->attrbuf = NULL;
 
+	err = genl_validate_assign_mc_groups(family);
+	if (err)
+		goto errout_locked;
+
 	list_add_tail(&family->family_list, genl_family_chain(family->id));
 	genl_unlock_all();
 
-	genl_ctrl_event(CTRL_CMD_NEWFAMILY, family, NULL);
+	/* send all events */
+	genl_ctrl_event(CTRL_CMD_NEWFAMILY, family, NULL, 0);
+	for (i = 0; i < family->n_mcgrps; i++)
+		genl_ctrl_event(CTRL_CMD_NEWMCAST_GRP, family,
+				&family->mcgrps[i], family->mcgrp_offset + i);
 
 	return 0;
 
@@ -398,7 +443,7 @@ int genl_unregister_family(struct genl_family *family)
 		genl_unlock_all();
 
 		kfree(family->attrbuf);
-		genl_ctrl_event(CTRL_CMD_DELFAMILY, family, NULL);
+		genl_ctrl_event(CTRL_CMD_DELFAMILY, family, NULL, 0);
 		return 0;
 	}
 
@@ -658,23 +703,26 @@ static int ctrl_fill_info(struct genl_family *family, u32 portid, u32 seq,
 		nla_nest_end(skb, nla_ops);
 	}
 
-	if (!list_empty(&family->mcast_groups)) {
-		struct genl_multicast_group *grp;
+	if (family->n_mcgrps) {
 		struct nlattr *nla_grps;
-		int idx = 1;
+		int i;
 
 		nla_grps = nla_nest_start(skb, CTRL_ATTR_MCAST_GROUPS);
 		if (nla_grps == NULL)
 			goto nla_put_failure;
 
-		list_for_each_entry(grp, &family->mcast_groups, list) {
+		for (i = 0; i < family->n_mcgrps; i++) {
 			struct nlattr *nest;
+			const struct genl_multicast_group *grp;
 
-			nest = nla_nest_start(skb, idx++);
+			grp = &family->mcgrps[i];
+
+			nest = nla_nest_start(skb, i + 1);
 			if (nest == NULL)
 				goto nla_put_failure;
 
-			if (nla_put_u32(skb, CTRL_ATTR_MCAST_GRP_ID, grp->id) ||
+			if (nla_put_u32(skb, CTRL_ATTR_MCAST_GRP_ID,
+					family->mcgrp_offset + i) ||
 			    nla_put_string(skb, CTRL_ATTR_MCAST_GRP_NAME,
 					   grp->name))
 				goto nla_put_failure;
@@ -692,9 +740,9 @@ nla_put_failure:
 }
 
 static int ctrl_fill_mcgrp_info(struct genl_family *family,
-				struct genl_multicast_group *grp, u32 portid,
-				u32 seq, u32 flags, struct sk_buff *skb,
-				u8 cmd)
+				const struct genl_multicast_group *grp,
+				int grp_id, u32 portid, u32 seq, u32 flags,
+				struct sk_buff *skb, u8 cmd)
 {
 	void *hdr;
 	struct nlattr *nla_grps;
@@ -716,7 +764,7 @@ static int ctrl_fill_mcgrp_info(struct genl_family *family,
 	if (nest == NULL)
 		goto nla_put_failure;
 
-	if (nla_put_u32(skb, CTRL_ATTR_MCAST_GRP_ID, grp->id) ||
+	if (nla_put_u32(skb, CTRL_ATTR_MCAST_GRP_ID, grp_id) ||
 	    nla_put_string(skb, CTRL_ATTR_MCAST_GRP_NAME,
 			   grp->name))
 		goto nla_put_failure;
@@ -782,9 +830,10 @@ static struct sk_buff *ctrl_build_family_msg(struct genl_family *family,
 	return skb;
 }
 
-static struct sk_buff *ctrl_build_mcgrp_msg(struct genl_family *family,
-					    struct genl_multicast_group *grp,
-					    u32 portid, int seq, u8 cmd)
+static struct sk_buff *
+ctrl_build_mcgrp_msg(struct genl_family *family,
+		     const struct genl_multicast_group *grp,
+		     int grp_id, u32 portid, int seq, u8 cmd)
 {
 	struct sk_buff *skb;
 	int err;
@@ -793,7 +842,8 @@ static struct sk_buff *ctrl_build_mcgrp_msg(struct genl_family *family,
 	if (skb == NULL)
 		return ERR_PTR(-ENOBUFS);
 
-	err = ctrl_fill_mcgrp_info(family, grp, portid, seq, 0, skb, cmd);
+	err = ctrl_fill_mcgrp_info(family, grp, grp_id, portid,
+				   seq, 0, skb, cmd);
 	if (err < 0) {
 		nlmsg_free(skb);
 		return ERR_PTR(err);
@@ -856,7 +906,8 @@ static int ctrl_getfamily(struct sk_buff *skb, struct genl_info *info)
 }
 
 static int genl_ctrl_event(int event, struct genl_family *family,
-			   struct genl_multicast_group *grp)
+			   const struct genl_multicast_group *grp,
+			   int grp_id)
 {
 	struct sk_buff *msg;
 
@@ -873,7 +924,7 @@ static int genl_ctrl_event(int event, struct genl_family *family,
 	case CTRL_CMD_NEWMCAST_GRP:
 	case CTRL_CMD_DELMCAST_GRP:
 		BUG_ON(!grp);
-		msg = ctrl_build_mcgrp_msg(family, grp, 0, 0, event);
+		msg = ctrl_build_mcgrp_msg(family, grp, grp_id, 0, 0, event);
 		break;
 	default:
 		return -EINVAL;
@@ -884,11 +935,11 @@ static int genl_ctrl_event(int event, struct genl_family *family,
 
 	if (!family->netnsok) {
 		genlmsg_multicast_netns(&genl_ctrl, &init_net, msg, 0,
-					GENL_ID_CTRL, GFP_KERNEL);
+					0, GFP_KERNEL);
 	} else {
 		rcu_read_lock();
 		genlmsg_multicast_allns(&genl_ctrl, msg, 0,
-					GENL_ID_CTRL, GFP_ATOMIC);
+					0, GFP_ATOMIC);
 		rcu_read_unlock();
 	}
 
@@ -904,8 +955,8 @@ static struct genl_ops genl_ctrl_ops[] = {
 	},
 };
 
-static struct genl_multicast_group notify_grp = {
-	.name		= "notify",
+static struct genl_multicast_group genl_ctrl_groups[] = {
+	{ .name = "notify", },
 };
 
 static int __net_init genl_pernet_init(struct net *net)
@@ -945,7 +996,8 @@ static int __init genl_init(void)
 	for (i = 0; i < GENL_FAM_TAB_SIZE; i++)
 		INIT_LIST_HEAD(&family_ht[i]);
 
-	err = genl_register_family_with_ops(&genl_ctrl, genl_ctrl_ops);
+	err = genl_register_family_with_ops_groups(&genl_ctrl, genl_ctrl_ops,
+						   genl_ctrl_groups);
 	if (err < 0)
 		goto problem;
 
@@ -953,10 +1005,6 @@ static int __init genl_init(void)
 	if (err)
 		goto problem;
 
-	err = genl_register_mc_group(&genl_ctrl, &notify_grp);
-	if (err < 0)
-		goto problem;
-
 	return 0;
 
 problem:
@@ -997,6 +1045,9 @@ static int genlmsg_mcast(struct sk_buff *skb, u32 portid, unsigned long group,
 int genlmsg_multicast_allns(struct genl_family *family, struct sk_buff *skb,
 			    u32 portid, unsigned int group, gfp_t flags)
 {
+	if (group >= family->n_mcgrps)
+		return -EINVAL;
+	group = family->mcgrp_offset + group;
 	return genlmsg_mcast(skb, portid, group, flags);
 }
 EXPORT_SYMBOL(genlmsg_multicast_allns);
@@ -1011,6 +1062,9 @@ void genl_notify(struct genl_family *family,
 	if (nlh)
 		report = nlmsg_report(nlh);
 
+	if (group >= family->n_mcgrps)
+		return;
+	group = family->mcgrp_offset + group;
 	nlmsg_notify(sk, skb, portid, group, report, flags);
 }
 EXPORT_SYMBOL(genl_notify);
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index 3092df313fb1..a9b2342d5253 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -30,8 +30,8 @@
 #include "nfc.h"
 #include "llcp.h"
 
-static struct genl_multicast_group nfc_genl_event_mcgrp = {
-	.name = NFC_GENL_MCAST_EVENT_NAME,
+static const struct genl_multicast_group nfc_genl_mcgrps[] = {
+	{ .name = NFC_GENL_MCAST_EVENT_NAME, },
 };
 
 static struct genl_family nfc_genl_family = {
@@ -194,8 +194,7 @@ int nfc_genl_targets_found(struct nfc_dev *dev)
 
 	genlmsg_end(msg, hdr);
 
-	return genlmsg_multicast(&nfc_genl_family, msg, 0,
-				 nfc_genl_event_mcgrp.id, GFP_ATOMIC);
+	return genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_ATOMIC);
 
 nla_put_failure:
 	genlmsg_cancel(msg, hdr);
@@ -224,8 +223,7 @@ int nfc_genl_target_lost(struct nfc_dev *dev, u32 target_idx)
 
 	genlmsg_end(msg, hdr);
 
-	genlmsg_multicast(&nfc_genl_family, msg, 0,
-			  nfc_genl_event_mcgrp.id, GFP_KERNEL);
+	genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_KERNEL);
 
 	return 0;
 
@@ -257,8 +255,7 @@ int nfc_genl_tm_activated(struct nfc_dev *dev, u32 protocol)
 
 	genlmsg_end(msg, hdr);
 
-	genlmsg_multicast(&nfc_genl_family, msg, 0,
-			  nfc_genl_event_mcgrp.id, GFP_KERNEL);
+	genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_KERNEL);
 
 	return 0;
 
@@ -288,8 +285,7 @@ int nfc_genl_tm_deactivated(struct nfc_dev *dev)
 
 	genlmsg_end(msg, hdr);
 
-	genlmsg_multicast(&nfc_genl_family, msg, 0,
-			  nfc_genl_event_mcgrp.id, GFP_KERNEL);
+	genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_KERNEL);
 
 	return 0;
 
@@ -322,8 +318,7 @@ int nfc_genl_device_added(struct nfc_dev *dev)
 
 	genlmsg_end(msg, hdr);
 
-	genlmsg_multicast(&nfc_genl_family, msg, 0,
-			  nfc_genl_event_mcgrp.id, GFP_KERNEL);
+	genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_KERNEL);
 
 	return 0;
 
@@ -353,8 +348,7 @@ int nfc_genl_device_removed(struct nfc_dev *dev)
 
 	genlmsg_end(msg, hdr);
 
-	genlmsg_multicast(&nfc_genl_family, msg, 0,
-			  nfc_genl_event_mcgrp.id, GFP_KERNEL);
+	genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_KERNEL);
 
 	return 0;
 
@@ -420,8 +414,7 @@ int nfc_genl_llc_send_sdres(struct nfc_dev *dev, struct hlist_head *sdres_list)
 
 	genlmsg_end(msg, hdr);
 
-	return genlmsg_multicast(&nfc_genl_family, msg, 0,
-				 nfc_genl_event_mcgrp.id, GFP_ATOMIC);
+	return genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_ATOMIC);
 
 nla_put_failure:
 	genlmsg_cancel(msg, hdr);
@@ -455,8 +448,7 @@ int nfc_genl_se_added(struct nfc_dev *dev, u32 se_idx, u16 type)
 
 	genlmsg_end(msg, hdr);
 
-	genlmsg_multicast(&nfc_genl_family, msg, 0,
-			  nfc_genl_event_mcgrp.id, GFP_KERNEL);
+	genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_KERNEL);
 
 	return 0;
 
@@ -487,8 +479,7 @@ int nfc_genl_se_removed(struct nfc_dev *dev, u32 se_idx)
 
 	genlmsg_end(msg, hdr);
 
-	genlmsg_multicast(&nfc_genl_family, msg, 0,
-			  nfc_genl_event_mcgrp.id, GFP_KERNEL);
+	genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_KERNEL);
 
 	return 0;
 
@@ -609,8 +600,7 @@ int nfc_genl_dep_link_up_event(struct nfc_dev *dev, u32 target_idx,
 
 	dev->dep_link_up = true;
 
-	genlmsg_multicast(&nfc_genl_family, msg, 0,
-			  nfc_genl_event_mcgrp.id, GFP_ATOMIC);
+	genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_ATOMIC);
 
 	return 0;
 
@@ -642,8 +632,7 @@ int nfc_genl_dep_link_down_event(struct nfc_dev *dev)
 
 	genlmsg_end(msg, hdr);
 
-	genlmsg_multicast(&nfc_genl_family, msg, 0,
-			  nfc_genl_event_mcgrp.id, GFP_ATOMIC);
+	genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_ATOMIC);
 
 	return 0;
 
@@ -1148,8 +1137,7 @@ int nfc_genl_fw_download_done(struct nfc_dev *dev, const char *firmware_name,
 
 	genlmsg_end(msg, hdr);
 
-	genlmsg_multicast(&nfc_genl_family, msg, 0,
-			  nfc_genl_event_mcgrp.id, GFP_KERNEL);
+	genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_KERNEL);
 
 	return 0;
 
@@ -1320,8 +1308,7 @@ static void se_io_cb(void *context, u8 *apdu, size_t apdu_len, int err)
 
 	genlmsg_end(msg, hdr);
 
-	genlmsg_multicast(&nfc_genl_family, msg, 0,
-			  nfc_genl_event_mcgrp.id, GFP_KERNEL);
+	genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_KERNEL);
 
 	kfree(ctx);
 
@@ -1549,15 +1536,15 @@ int __init nfc_genl_init(void)
 {
 	int rc;
 
-	rc = genl_register_family_with_ops(&nfc_genl_family, nfc_genl_ops);
+	rc = genl_register_family_with_ops_groups(&nfc_genl_family,
+						  nfc_genl_ops,
+						  nfc_genl_mcgrps);
 	if (rc)
 		return rc;
 
-	rc = genl_register_mc_group(&nfc_genl_family, &nfc_genl_event_mcgrp);
-
 	netlink_register_notifier(&nl_notifier);
 
-	return rc;
+	return 0;
 }
 
 /**
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 5c19846b1d2a..1de4d281e3f1 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -62,11 +62,10 @@
 int ovs_net_id __read_mostly;
 
 static void ovs_notify(struct genl_family *family,
-		       struct sk_buff *skb, struct genl_info *info,
-		       struct genl_multicast_group *grp)
+		       struct sk_buff *skb, struct genl_info *info)
 {
 	genl_notify(family, skb, genl_info_net(info), info->snd_portid,
-		    grp->id, info->nlhdr, GFP_KERNEL);
+		    0, info->nlhdr, GFP_KERNEL);
 }
 
 /**
@@ -878,11 +877,10 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
 	ovs_unlock();
 
 	if (!IS_ERR(reply))
-		ovs_notify(&dp_flow_genl_family, reply, info,
-			   &ovs_dp_flow_multicast_group);
+		ovs_notify(&dp_flow_genl_family, reply, info);
 	else
 		genl_set_err(&dp_flow_genl_family, sock_net(skb->sk), 0,
-			     ovs_dp_flow_multicast_group.id, PTR_ERR(reply));
+			     0, PTR_ERR(reply));
 	return 0;
 
 err_flow_free:
@@ -992,8 +990,7 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
 	ovs_flow_free(flow, true);
 	ovs_unlock();
 
-	ovs_notify(&dp_flow_genl_family, reply, info,
-		   &ovs_dp_flow_multicast_group);
+	ovs_notify(&dp_flow_genl_family, reply, info);
 	return 0;
 unlock:
 	ovs_unlock();
@@ -1240,8 +1237,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
 
 	ovs_unlock();
 
-	ovs_notify(&dp_datapath_genl_family, reply, info,
-		   &ovs_dp_datapath_multicast_group);
+	ovs_notify(&dp_datapath_genl_family, reply, info);
 	return 0;
 
 err_destroy_local_port:
@@ -1306,8 +1302,7 @@ static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
 	__dp_destroy(dp);
 	ovs_unlock();
 
-	ovs_notify(&dp_datapath_genl_family, reply, info,
-		   &ovs_dp_datapath_multicast_group);
+	ovs_notify(&dp_datapath_genl_family, reply, info);
 
 	return 0;
 unlock:
@@ -1332,14 +1327,13 @@ static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
 	if (IS_ERR(reply)) {
 		err = PTR_ERR(reply);
 		genl_set_err(&dp_datapath_genl_family, sock_net(skb->sk), 0,
-			     ovs_dp_datapath_multicast_group.id, err);
+			     0, err);
 		err = 0;
 		goto unlock;
 	}
 
 	ovs_unlock();
-	ovs_notify(&dp_datapath_genl_family, reply, info,
-		   &ovs_dp_datapath_multicast_group);
+	ovs_notify(&dp_datapath_genl_family, reply, info);
 
 	return 0;
 unlock:
@@ -1601,8 +1595,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
 		goto exit_unlock;
 	}
 
-	ovs_notify(&dp_vport_genl_family, reply, info,
-		   &ovs_dp_vport_multicast_group);
+	ovs_notify(&dp_vport_genl_family, reply, info);
 
 exit_unlock:
 	ovs_unlock();
@@ -1649,8 +1642,7 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
 	BUG_ON(err < 0);
 
 	ovs_unlock();
-	ovs_notify(&dp_vport_genl_family, reply, info,
-		   &ovs_dp_vport_multicast_group);
+	ovs_notify(&dp_vport_genl_family, reply, info);
 	return 0;
 
 exit_free:
@@ -1687,8 +1679,7 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
 	err = 0;
 	ovs_dp_detach_port(vport);
 
-	ovs_notify(&dp_vport_genl_family, reply, info,
-		   &ovs_dp_vport_multicast_group);
+	ovs_notify(&dp_vport_genl_family, reply, info);
 
 exit_unlock:
 	ovs_unlock();
@@ -1790,7 +1781,7 @@ struct genl_family_and_ops {
 	struct genl_family *family;
 	const struct genl_ops *ops;
 	int n_ops;
-	struct genl_multicast_group *group;
+	const struct genl_multicast_group *group;
 };
 
 static const struct genl_family_and_ops dp_genl_families[] = {
@@ -1828,16 +1819,12 @@ static int dp_register_genl(void)
 
 		f->family->ops = f->ops;
 		f->family->n_ops = f->n_ops;
+		f->family->mcgrps = f->group;
+		f->family->n_mcgrps = f->group ? 1 : 0;
 		err = genl_register_family(f->family);
 		if (err)
 			goto error;
 		n_registered++;
-
-		if (f->group) {
-			err = genl_register_mc_group(f->family, f->group);
-			if (err)
-				goto error;
-		}
 	}
 
 	return 0;
diff --git a/net/openvswitch/dp_notify.c b/net/openvswitch/dp_notify.c
index f4b66c84ea0b..2c631fe76be1 100644
--- a/net/openvswitch/dp_notify.c
+++ b/net/openvswitch/dp_notify.c
@@ -35,15 +35,13 @@ static void dp_detach_port_notify(struct vport *vport)
 	ovs_dp_detach_port(vport);
 	if (IS_ERR(notify)) {
 		genl_set_err(&dp_vport_genl_family, ovs_dp_get_net(dp), 0,
-			     ovs_dp_vport_multicast_group.id,
-			     PTR_ERR(notify));
+			     0, PTR_ERR(notify));
 		return;
 	}
 
 	genlmsg_multicast_netns(&dp_vport_genl_family,
 				ovs_dp_get_net(dp), notify, 0,
-				ovs_dp_vport_multicast_group.id,
-				GFP_KERNEL);
+				0, GFP_KERNEL);
 }
 
 void ovs_dp_notify_wq(struct work_struct *work)
diff --git a/net/wimax/op-msg.c b/net/wimax/op-msg.c
index f37dd3c5576d..c278b3356f75 100644
--- a/net/wimax/op-msg.c
+++ b/net/wimax/op-msg.c
@@ -279,8 +279,7 @@ int wimax_msg_send(struct wimax_dev *wimax_dev, struct sk_buff *skb)
 
 	d_printf(1, dev, "CTX: wimax msg, %zu bytes\n", size);
 	d_dump(2, dev, msg, size);
-	genlmsg_multicast(&wimax_gnl_family, skb, 0,
-			  wimax_gnl_mcg.id, GFP_KERNEL);
+	genlmsg_multicast(&wimax_gnl_family, skb, 0, 0, GFP_KERNEL);
 	d_printf(1, dev, "CTX: genl multicast done\n");
 	return 0;
 }
diff --git a/net/wimax/stack.c b/net/wimax/stack.c
index 18888748e699..ef2191b969a7 100644
--- a/net/wimax/stack.c
+++ b/net/wimax/stack.c
@@ -116,8 +116,9 @@ struct sk_buff *wimax_gnl_re_state_change_alloc(
 		dev_err(dev, "RE_STCH: can't create message\n");
 		goto error_new;
 	}
-	data = genlmsg_put(report_skb, 0, wimax_gnl_mcg.id, &wimax_gnl_family,
-			   0, WIMAX_GNL_RE_STATE_CHANGE);
+	/* FIXME: sending a group ID as the seq is wrong */
+	data = genlmsg_put(report_skb, 0, wimax_gnl_family.mcgrp_offset,
+			   &wimax_gnl_family, 0, WIMAX_GNL_RE_STATE_CHANGE);
 	if (data == NULL) {
 		dev_err(dev, "RE_STCH: can't put data into message\n");
 		goto error_put;
@@ -177,8 +178,7 @@ int wimax_gnl_re_state_change_send(
 		goto out;
 	}
 	genlmsg_end(report_skb, header);
-	genlmsg_multicast(&wimax_gnl_family, report_skb, 0,
-			  wimax_gnl_mcg.id, GFP_KERNEL);
+	genlmsg_multicast(&wimax_gnl_family, report_skb, 0, 0, GFP_KERNEL);
 out:
 	d_fnend(3, dev, "(wimax_dev %p report_skb %p) = %d\n",
 		wimax_dev, report_skb, result);
@@ -580,8 +580,8 @@ struct genl_family wimax_gnl_family = {
 	.maxattr = WIMAX_GNL_ATTR_MAX,
 };
 
-struct genl_multicast_group wimax_gnl_mcg = {
-	.name = "msg",
+static const struct genl_multicast_group wimax_gnl_mcgrps[] = {
+	{ .name = "msg", },
 };
 
 
@@ -598,21 +598,18 @@ int __init wimax_subsys_init(void)
 
 	snprintf(wimax_gnl_family.name, sizeof(wimax_gnl_family.name),
 		 "WiMAX");
-	result = genl_register_family_with_ops(&wimax_gnl_family,
-					       wimax_gnl_ops);
+	result = genl_register_family_with_ops_groups(&wimax_gnl_family,
+						      wimax_gnl_ops,
+						      wimax_gnl_mcgrps);
 	if (unlikely(result < 0)) {
 		printk(KERN_ERR "cannot register generic netlink family: %d\n",
 		       result);
 		goto error_register_family;
 	}
 
-	result = genl_register_mc_group(&wimax_gnl_family, &wimax_gnl_mcg);
-	if (result < 0)
-		goto error_mc_group;
 	d_fnend(4, NULL, "() = 0\n");
 	return 0;
 
-error_mc_group:
 	genl_unregister_family(&wimax_gnl_family);
 error_register_family:
 	d_fnend(4, NULL, "() = %d\n", result);
diff --git a/net/wimax/wimax-internal.h b/net/wimax/wimax-internal.h
index 8567d3079a83..b445b82020a8 100644
--- a/net/wimax/wimax-internal.h
+++ b/net/wimax/wimax-internal.h
@@ -86,7 +86,6 @@ void wimax_rfkill_rm(struct wimax_dev *);
 
 /* generic netlink */
 extern struct genl_family wimax_gnl_family;
-extern struct genl_multicast_group wimax_gnl_mcg;
 
 /* ops */
 int wimax_gnl_doit_msg_from_user(struct sk_buff *skb, struct genl_info *info);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index f20edfd2e1f0..a1eb21073176 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -47,6 +47,25 @@ static struct genl_family nl80211_fam = {
 	.post_doit = nl80211_post_doit,
 };
 
+/* multicast groups */
+enum nl80211_multicast_groups {
+	NL80211_MCGRP_CONFIG,
+	NL80211_MCGRP_SCAN,
+	NL80211_MCGRP_REGULATORY,
+	NL80211_MCGRP_MLME,
+	NL80211_MCGRP_TESTMODE /* keep last - ifdef! */
+};
+
+static const struct genl_multicast_group nl80211_mcgrps[] = {
+	[NL80211_MCGRP_CONFIG] = { .name = "config", },
+	[NL80211_MCGRP_SCAN] = { .name = "scan", },
+	[NL80211_MCGRP_REGULATORY] = { .name = "regulatory", },
+	[NL80211_MCGRP_MLME] = { .name = "mlme", },
+#ifdef CONFIG_NL80211_TESTMODE
+	[NL80211_MCGRP_TESTMODE] = { .name = "testmode", }
+#endif
+};
+
 /* returns ERR_PTR values */
 static struct wireless_dev *
 __cfg80211_wdev_from_attrs(struct net *netns, struct nlattr **attrs)
@@ -6656,10 +6675,6 @@ static int nl80211_set_mcast_rate(struct sk_buff *skb, struct genl_info *info)
 
 
 #ifdef CONFIG_NL80211_TESTMODE
-static struct genl_multicast_group nl80211_testmode_mcgrp = {
-	.name = "testmode",
-};
-
 static int nl80211_testmode_do(struct sk_buff *skb, struct genl_info *info)
 {
 	struct cfg80211_registered_device *rdev = info->user_ptr[0];
@@ -6869,7 +6884,7 @@ void cfg80211_testmode_event(struct sk_buff *skb, gfp_t gfp)
 	nla_nest_end(skb, data);
 	genlmsg_end(skb, hdr);
 	genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), skb, 0,
-				nl80211_testmode_mcgrp.id, gfp);
+				NL80211_MCGRP_TESTMODE, gfp);
 }
 EXPORT_SYMBOL(cfg80211_testmode_event);
 #endif
@@ -9566,21 +9581,6 @@ static const struct genl_ops nl80211_ops[] = {
 	},
 };
 
-static struct genl_multicast_group nl80211_mlme_mcgrp = {
-	.name = "mlme",
-};
-
-/* multicast groups */
-static struct genl_multicast_group nl80211_config_mcgrp = {
-	.name = "config",
-};
-static struct genl_multicast_group nl80211_scan_mcgrp = {
-	.name = "scan",
-};
-static struct genl_multicast_group nl80211_regulatory_mcgrp = {
-	.name = "regulatory",
-};
-
 /* notification functions */
 
 void nl80211_notify_dev_rename(struct cfg80211_registered_device *rdev)
@@ -9598,7 +9598,7 @@ void nl80211_notify_dev_rename(struct cfg80211_registered_device *rdev)
 	}
 
 	genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
-				nl80211_config_mcgrp.id, GFP_KERNEL);
+				NL80211_MCGRP_CONFIG, GFP_KERNEL);
 }
 
 static int nl80211_add_scan_req(struct sk_buff *msg,
@@ -9708,7 +9708,7 @@ void nl80211_send_scan_start(struct cfg80211_registered_device *rdev,
 	}
 
 	genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
-				nl80211_scan_mcgrp.id, GFP_KERNEL);
+				NL80211_MCGRP_SCAN, GFP_KERNEL);
 }
 
 void nl80211_send_scan_done(struct cfg80211_registered_device *rdev,
@@ -9727,7 +9727,7 @@ void nl80211_send_scan_done(struct cfg80211_registered_device *rdev,
 	}
 
 	genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
-				nl80211_scan_mcgrp.id, GFP_KERNEL);
+				NL80211_MCGRP_SCAN, GFP_KERNEL);
 }
 
 void nl80211_send_scan_aborted(struct cfg80211_registered_device *rdev,
@@ -9746,7 +9746,7 @@ void nl80211_send_scan_aborted(struct cfg80211_registered_device *rdev,
 	}
 
 	genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
-				nl80211_scan_mcgrp.id, GFP_KERNEL);
+				NL80211_MCGRP_SCAN, GFP_KERNEL);
 }
 
 void nl80211_send_sched_scan_results(struct cfg80211_registered_device *rdev,
@@ -9765,7 +9765,7 @@ void nl80211_send_sched_scan_results(struct cfg80211_registered_device *rdev,
 	}
 
 	genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
-				nl80211_scan_mcgrp.id, GFP_KERNEL);
+				NL80211_MCGRP_SCAN, GFP_KERNEL);
 }
 
 void nl80211_send_sched_scan(struct cfg80211_registered_device *rdev,
@@ -9783,7 +9783,7 @@ void nl80211_send_sched_scan(struct cfg80211_registered_device *rdev,
 	}
 
 	genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
-				nl80211_scan_mcgrp.id, GFP_KERNEL);
+				NL80211_MCGRP_SCAN, GFP_KERNEL);
 }
 
 /*
@@ -9838,7 +9838,7 @@ void nl80211_send_reg_change_event(struct regulatory_request *request)
 
 	rcu_read_lock();
 	genlmsg_multicast_allns(&nl80211_fam, msg, 0,
-				nl80211_regulatory_mcgrp.id, GFP_ATOMIC);
+				NL80211_MCGRP_REGULATORY, GFP_ATOMIC);
 	rcu_read_unlock();
 
 	return;
@@ -9874,7 +9874,7 @@ static void nl80211_send_mlme_event(struct cfg80211_registered_device *rdev,
 	genlmsg_end(msg, hdr);
 
 	genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
-				nl80211_mlme_mcgrp.id, gfp);
+				NL80211_MCGRP_MLME, gfp);
 	return;
 
  nla_put_failure:
@@ -9962,7 +9962,7 @@ static void nl80211_send_mlme_timeout(struct cfg80211_registered_device *rdev,
 	genlmsg_end(msg, hdr);
 
 	genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
-				nl80211_mlme_mcgrp.id, gfp);
+				NL80211_MCGRP_MLME, gfp);
 	return;
 
  nla_put_failure:
@@ -10018,7 +10018,7 @@ void nl80211_send_connect_result(struct cfg80211_registered_device *rdev,
 	genlmsg_end(msg, hdr);
 
 	genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
-				nl80211_mlme_mcgrp.id, gfp);
+				NL80211_MCGRP_MLME, gfp);
 	return;
 
  nla_put_failure:
@@ -10057,7 +10057,7 @@ void nl80211_send_roamed(struct cfg80211_registered_device *rdev,
 	genlmsg_end(msg, hdr);
 
 	genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
-				nl80211_mlme_mcgrp.id, gfp);
+				NL80211_MCGRP_MLME, gfp);
 	return;
 
  nla_put_failure:
@@ -10095,7 +10095,7 @@ void nl80211_send_disconnected(struct cfg80211_registered_device *rdev,
 	genlmsg_end(msg, hdr);
 
 	genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
-				nl80211_mlme_mcgrp.id, GFP_KERNEL);
+				NL80211_MCGRP_MLME, GFP_KERNEL);
 	return;
 
  nla_put_failure:
@@ -10129,7 +10129,7 @@ void nl80211_send_ibss_bssid(struct cfg80211_registered_device *rdev,
 	genlmsg_end(msg, hdr);
 
 	genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
-				nl80211_mlme_mcgrp.id, gfp);
+				NL80211_MCGRP_MLME, gfp);
 	return;
 
  nla_put_failure:
@@ -10170,7 +10170,7 @@ void cfg80211_notify_new_peer_candidate(struct net_device *dev, const u8 *addr,
 	genlmsg_end(msg, hdr);
 
 	genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
-				nl80211_mlme_mcgrp.id, gfp);
+				NL80211_MCGRP_MLME, gfp);
 	return;
 
  nla_put_failure:
@@ -10209,7 +10209,7 @@ void nl80211_michael_mic_failure(struct cfg80211_registered_device *rdev,
 	genlmsg_end(msg, hdr);
 
 	genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
-				nl80211_mlme_mcgrp.id, gfp);
+				NL80211_MCGRP_MLME, gfp);
 	return;
 
  nla_put_failure:
@@ -10262,7 +10262,7 @@ void nl80211_send_beacon_hint_event(struct wiphy *wiphy,
 
 	rcu_read_lock();
 	genlmsg_multicast_allns(&nl80211_fam, msg, 0,
-				nl80211_regulatory_mcgrp.id, GFP_ATOMIC);
+				NL80211_MCGRP_REGULATORY, GFP_ATOMIC);
 	rcu_read_unlock();
 
 	return;
@@ -10308,7 +10308,7 @@ static void nl80211_send_remain_on_chan_event(
 	genlmsg_end(msg, hdr);
 
 	genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
-				nl80211_mlme_mcgrp.id, gfp);
+				NL80211_MCGRP_MLME, gfp);
 	return;
 
  nla_put_failure:
@@ -10363,7 +10363,7 @@ void cfg80211_new_sta(struct net_device *dev, const u8 *mac_addr,
 	}
 
 	genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
-				nl80211_mlme_mcgrp.id, gfp);
+				NL80211_MCGRP_MLME, gfp);
 }
 EXPORT_SYMBOL(cfg80211_new_sta);
 
@@ -10393,7 +10393,7 @@ void cfg80211_del_sta(struct net_device *dev, const u8 *mac_addr, gfp_t gfp)
 	genlmsg_end(msg, hdr);
 
 	genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
-				nl80211_mlme_mcgrp.id, gfp);
+				NL80211_MCGRP_MLME, gfp);
 	return;
 
  nla_put_failure:
@@ -10429,7 +10429,7 @@ void cfg80211_conn_failed(struct net_device *dev, const u8 *mac_addr,
 	genlmsg_end(msg, hdr);
 
 	genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
-				nl80211_mlme_mcgrp.id, gfp);
+				NL80211_MCGRP_MLME, gfp);
 	return;
 
  nla_put_failure:
@@ -10591,7 +10591,7 @@ void cfg80211_mgmt_tx_status(struct wireless_dev *wdev, u64 cookie,
 	genlmsg_end(msg, hdr);
 
 	genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
-				nl80211_mlme_mcgrp.id, gfp);
+				NL80211_MCGRP_MLME, gfp);
 	return;
 
  nla_put_failure:
@@ -10640,7 +10640,7 @@ void cfg80211_cqm_rssi_notify(struct net_device *dev,
 	genlmsg_end(msg, hdr);
 
 	genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
-				nl80211_mlme_mcgrp.id, gfp);
+				NL80211_MCGRP_MLME, gfp);
 	return;
 
  nla_put_failure:
@@ -10685,7 +10685,7 @@ static void nl80211_gtk_rekey_notify(struct cfg80211_registered_device *rdev,
 	genlmsg_end(msg, hdr);
 
 	genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
-				nl80211_mlme_mcgrp.id, gfp);
+				NL80211_MCGRP_MLME, gfp);
 	return;
 
  nla_put_failure:
@@ -10743,7 +10743,7 @@ nl80211_pmksa_candidate_notify(struct cfg80211_registered_device *rdev,
 	genlmsg_end(msg, hdr);
 
 	genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
-				nl80211_mlme_mcgrp.id, gfp);
+				NL80211_MCGRP_MLME, gfp);
 	return;
 
  nla_put_failure:
@@ -10790,7 +10790,7 @@ static void nl80211_ch_switch_notify(struct cfg80211_registered_device *rdev,
 	genlmsg_end(msg, hdr);
 
 	genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
-				nl80211_mlme_mcgrp.id, gfp);
+				NL80211_MCGRP_MLME, gfp);
 	return;
 
  nla_put_failure:
@@ -10867,7 +10867,7 @@ void cfg80211_cqm_txe_notify(struct net_device *dev,
 	genlmsg_end(msg, hdr);
 
 	genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
-				nl80211_mlme_mcgrp.id, gfp);
+				NL80211_MCGRP_MLME, gfp);
 	return;
 
  nla_put_failure:
@@ -10916,7 +10916,7 @@ nl80211_radar_notify(struct cfg80211_registered_device *rdev,
 	genlmsg_end(msg, hdr);
 
 	genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
-				nl80211_mlme_mcgrp.id, gfp);
+				NL80211_MCGRP_MLME, gfp);
 	return;
 
  nla_put_failure:
@@ -10963,7 +10963,7 @@ void cfg80211_cqm_pktloss_notify(struct net_device *dev,
 	genlmsg_end(msg, hdr);
 
 	genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
-				nl80211_mlme_mcgrp.id, gfp);
+				NL80211_MCGRP_MLME, gfp);
 	return;
 
  nla_put_failure:
@@ -11003,7 +11003,7 @@ void cfg80211_probe_status(struct net_device *dev, const u8 *addr,
 	genlmsg_end(msg, hdr);
 
 	genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
-				nl80211_mlme_mcgrp.id, gfp);
+				NL80211_MCGRP_MLME, gfp);
 	return;
 
  nla_put_failure:
@@ -11155,7 +11155,7 @@ void cfg80211_report_wowlan_wakeup(struct wireless_dev *wdev,
 	genlmsg_end(msg, hdr);
 
 	genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
-				nl80211_mlme_mcgrp.id, gfp);
+				NL80211_MCGRP_MLME, gfp);
 	return;
 
  free_msg:
@@ -11197,7 +11197,7 @@ void cfg80211_tdls_oper_request(struct net_device *dev, const u8 *peer,
 	genlmsg_end(msg, hdr);
 
 	genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
-				nl80211_mlme_mcgrp.id, gfp);
+				NL80211_MCGRP_MLME, gfp);
 	return;
 
  nla_put_failure:
@@ -11280,7 +11280,7 @@ void cfg80211_ft_event(struct net_device *netdev,
 	genlmsg_end(msg, hdr);
 
 	genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
-				nl80211_mlme_mcgrp.id, GFP_KERNEL);
+				NL80211_MCGRP_MLME, GFP_KERNEL);
 }
 EXPORT_SYMBOL(cfg80211_ft_event);
 
@@ -11329,32 +11329,11 @@ int nl80211_init(void)
 {
 	int err;
 
-	err = genl_register_family_with_ops(&nl80211_fam, nl80211_ops);
+	err = genl_register_family_with_ops_groups(&nl80211_fam, nl80211_ops,
+						   nl80211_mcgrps);
 	if (err)
 		return err;
 
-	err = genl_register_mc_group(&nl80211_fam, &nl80211_config_mcgrp);
-	if (err)
-		goto err_out;
-
-	err = genl_register_mc_group(&nl80211_fam, &nl80211_scan_mcgrp);
-	if (err)
-		goto err_out;
-
-	err = genl_register_mc_group(&nl80211_fam, &nl80211_regulatory_mcgrp);
-	if (err)
-		goto err_out;
-
-	err = genl_register_mc_group(&nl80211_fam, &nl80211_mlme_mcgrp);
-	if (err)
-		goto err_out;
-
-#ifdef CONFIG_NL80211_TESTMODE
-	err = genl_register_mc_group(&nl80211_fam, &nl80211_testmode_mcgrp);
-	if (err)
-		goto err_out;
-#endif
-
 	err = netlink_register_notifier(&nl80211_netlink_notifier);
 	if (err)
 		goto err_out;
-- 
cgit v1.2.3


From 8b2e9b712f6139df9c754af0d67fecc4bbc88545 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Wed, 20 Nov 2013 14:41:47 -0800
Subject: Revert "mm: create a separate slab for page->ptl allocation"

This reverts commit ea1e7ed33708c7a760419ff9ded0a6cb90586a50.

Al points out that while the commit *does* actually create a separate
slab for the page->ptl allocation, that slab is never actually used, and
the code continues to use kmalloc/kfree.

Damien Wyart points out that the original patch did have the conversion
to use kmem_cache_alloc/free, so it got lost somewhere on its way to me.

Revert the half-arsed attempt that didn't do anything.  If we really do
want the special slab (remember: this is all relevant just for debug
builds, so it's not necessarily all that critical) we might as well redo
the patch fully.

Reported-by: Al Viro <viro@zeniv.linux.org.uk>
Acked-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Kirill A Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 9 ---------
 init/main.c        | 2 +-
 mm/memory.c        | 7 -------
 3 files changed, 1 insertion(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 0548eb201e05..1cedd000cf29 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1318,7 +1318,6 @@ static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long a
 
 #if USE_SPLIT_PTE_PTLOCKS
 #if BLOATED_SPINLOCKS
-void __init ptlock_cache_init(void);
 extern bool ptlock_alloc(struct page *page);
 extern void ptlock_free(struct page *page);
 
@@ -1327,7 +1326,6 @@ static inline spinlock_t *ptlock_ptr(struct page *page)
 	return page->ptl;
 }
 #else /* BLOATED_SPINLOCKS */
-static inline void ptlock_cache_init(void) {}
 static inline bool ptlock_alloc(struct page *page)
 {
 	return true;
@@ -1380,17 +1378,10 @@ static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd)
 {
 	return &mm->page_table_lock;
 }
-static inline void ptlock_cache_init(void) {}
 static inline bool ptlock_init(struct page *page) { return true; }
 static inline void pte_lock_deinit(struct page *page) {}
 #endif /* USE_SPLIT_PTE_PTLOCKS */
 
-static inline void pgtable_init(void)
-{
-	ptlock_cache_init();
-	pgtable_cache_init();
-}
-
 static inline bool pgtable_page_ctor(struct page *page)
 {
 	inc_zone_page_state(page, NR_PAGETABLE);
diff --git a/init/main.c b/init/main.c
index 01573fdfa186..febc511e078a 100644
--- a/init/main.c
+++ b/init/main.c
@@ -476,7 +476,7 @@ static void __init mm_init(void)
 	mem_init();
 	kmem_cache_init();
 	percpu_init_late();
-	pgtable_init();
+	pgtable_cache_init();
 	vmalloc_init();
 }
 
diff --git a/mm/memory.c b/mm/memory.c
index 0409e8f43fa0..5d9025f3b3e1 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4272,13 +4272,6 @@ void copy_user_huge_page(struct page *dst, struct page *src,
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */
 
 #if USE_SPLIT_PTE_PTLOCKS && BLOATED_SPINLOCKS
-static struct kmem_cache *page_ptl_cachep;
-void __init ptlock_cache_init(void)
-{
-	page_ptl_cachep = kmem_cache_create("page->ptl", sizeof(spinlock_t), 0,
-			SLAB_PANIC, NULL);
-}
-
 bool ptlock_alloc(struct page *page)
 {
 	spinlock_t *ptl;
-- 
cgit v1.2.3


From f3d3342602f8bcbf37d7c46641cb9bca7618eb1c Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Thu, 21 Nov 2013 03:14:22 +0100
Subject: net: rework recvmsg handler msg_name and msg_namelen logic

This patch now always passes msg->msg_namelen as 0. recvmsg handlers must
set msg_namelen to the proper size <= sizeof(struct sockaddr_storage)
to return msg_name to the user.

This prevents numerous uninitialized memory leaks we had in the
recvmsg handlers and makes it harder for new code to accidentally leak
uninitialized memory.

Optimize for the case recvfrom is called with NULL as address. We don't
need to copy the address at all, so set it to NULL before invoking the
recvmsg handler. We can do so, because all the recvmsg handlers must
cope with the case a plain read() is called on them. read() also sets
msg_name to NULL.

Also document these changes in include/linux/net.h as suggested by David
Miller.

Changes since RFC:

Set msg->msg_name = NULL if user specified a NULL in msg_name but had a
non-null msg_namelen in verify_iovec/verify_compat_iovec. This doesn't
affect sendto as it would bail out earlier while trying to copy-in the
address. It also more naturally reflects the logic by the callers of
verify_iovec.

With this change in place I could remove "
if (!uaddr || msg_sys->msg_namelen == 0)
	msg->msg_name = NULL
".

This change does not alter the user visible error logic as we ignore
msg_namelen as long as msg_name is NULL.

Also remove two unnecessary curly brackets in ___sys_recvmsg and change
comments to netdev style.

Cc: David Miller <davem@davemloft.net>
Suggested-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 crypto/algif_hash.c            |  2 --
 crypto/algif_skcipher.c        |  1 -
 drivers/isdn/mISDN/socket.c    | 13 ++++---------
 drivers/net/ppp/pppoe.c        |  2 --
 include/linux/net.h            |  8 ++++++++
 net/appletalk/ddp.c            | 16 +++++++---------
 net/atm/common.c               |  2 --
 net/ax25/af_ax25.c             |  4 ++--
 net/bluetooth/af_bluetooth.c   |  9 ++-------
 net/bluetooth/hci_sock.c       |  2 --
 net/bluetooth/rfcomm/sock.c    |  1 -
 net/bluetooth/sco.c            |  1 -
 net/caif/caif_socket.c         |  4 ----
 net/compat.c                   |  3 ++-
 net/core/iovec.c               |  3 ++-
 net/ipx/af_ipx.c               |  3 +--
 net/irda/af_irda.c             |  4 ----
 net/iucv/af_iucv.c             |  2 --
 net/key/af_key.c               |  1 -
 net/l2tp/l2tp_ppp.c            |  2 --
 net/llc/af_llc.c               |  2 --
 net/netlink/af_netlink.c       |  2 --
 net/netrom/af_netrom.c         |  3 +--
 net/nfc/llcp_sock.c            |  2 --
 net/nfc/rawsock.c              |  2 --
 net/packet/af_packet.c         | 32 +++++++++++++++-----------------
 net/rds/recv.c                 |  2 --
 net/rose/af_rose.c             |  8 +++++---
 net/rxrpc/ar-recvmsg.c         |  9 ++++++---
 net/socket.c                   | 19 +++++++++++--------
 net/tipc/socket.c              |  6 ------
 net/unix/af_unix.c             |  5 -----
 net/vmw_vsock/af_vsock.c       |  2 --
 net/vmw_vsock/vmci_transport.c |  2 --
 net/x25/af_x25.c               |  3 +--
 35 files changed, 67 insertions(+), 115 deletions(-)

(limited to 'include/linux')

diff --git a/crypto/algif_hash.c b/crypto/algif_hash.c
index 0262210cad38..ef5356cd280a 100644
--- a/crypto/algif_hash.c
+++ b/crypto/algif_hash.c
@@ -161,8 +161,6 @@ static int hash_recvmsg(struct kiocb *unused, struct socket *sock,
 	else if (len < ds)
 		msg->msg_flags |= MSG_TRUNC;
 
-	msg->msg_namelen = 0;
-
 	lock_sock(sk);
 	if (ctx->more) {
 		ctx->more = 0;
diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c
index a1c4f0a55583..6a6dfc062d2a 100644
--- a/crypto/algif_skcipher.c
+++ b/crypto/algif_skcipher.c
@@ -432,7 +432,6 @@ static int skcipher_recvmsg(struct kiocb *unused, struct socket *sock,
 	long copied = 0;
 
 	lock_sock(sk);
-	msg->msg_namelen = 0;
 	for (iov = msg->msg_iov, iovlen = msg->msg_iovlen; iovlen > 0;
 	     iovlen--, iov++) {
 		unsigned long seglen = iov->iov_len;
diff --git a/drivers/isdn/mISDN/socket.c b/drivers/isdn/mISDN/socket.c
index e47dcb9d1e91..5cefb479c707 100644
--- a/drivers/isdn/mISDN/socket.c
+++ b/drivers/isdn/mISDN/socket.c
@@ -117,7 +117,6 @@ mISDN_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 {
 	struct sk_buff		*skb;
 	struct sock		*sk = sock->sk;
-	struct sockaddr_mISDN	*maddr;
 
 	int		copied, err;
 
@@ -135,9 +134,9 @@ mISDN_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 	if (!skb)
 		return err;
 
-	if (msg->msg_namelen >= sizeof(struct sockaddr_mISDN)) {
-		msg->msg_namelen = sizeof(struct sockaddr_mISDN);
-		maddr = (struct sockaddr_mISDN *)msg->msg_name;
+	if (msg->msg_name) {
+		struct sockaddr_mISDN *maddr = msg->msg_name;
+
 		maddr->family = AF_ISDN;
 		maddr->dev = _pms(sk)->dev->id;
 		if ((sk->sk_protocol == ISDN_P_LAPD_TE) ||
@@ -150,11 +149,7 @@ mISDN_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 			maddr->sapi = _pms(sk)->ch.addr & 0xFF;
 			maddr->tei =  (_pms(sk)->ch.addr >> 8) & 0xFF;
 		}
-	} else {
-		if (msg->msg_namelen)
-			printk(KERN_WARNING "%s: too small namelen %d\n",
-			       __func__, msg->msg_namelen);
-		msg->msg_namelen = 0;
+		msg->msg_namelen = sizeof(*maddr);
 	}
 
 	copied = skb->len + MISDN_HEADER_LEN;
diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c
index 5f66e30d9823..82ee6ed954cb 100644
--- a/drivers/net/ppp/pppoe.c
+++ b/drivers/net/ppp/pppoe.c
@@ -979,8 +979,6 @@ static int pppoe_recvmsg(struct kiocb *iocb, struct socket *sock,
 	if (error < 0)
 		goto end;
 
-	m->msg_namelen = 0;
-
 	if (skb) {
 		total_len = min_t(size_t, total_len, skb->len);
 		error = skb_copy_datagram_iovec(skb, 0, m->msg_iov, total_len);
diff --git a/include/linux/net.h b/include/linux/net.h
index b292a0435571..4bcee94cef93 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -164,6 +164,14 @@ struct proto_ops {
 #endif
 	int		(*sendmsg)   (struct kiocb *iocb, struct socket *sock,
 				      struct msghdr *m, size_t total_len);
+	/* Notes for implementing recvmsg:
+	 * ===============================
+	 * msg->msg_namelen should get updated by the recvmsg handlers
+	 * iff msg_name != NULL. It is by default 0 to prevent
+	 * returning uninitialized memory to user space.  The recvfrom
+	 * handlers can assume that msg.msg_name is either NULL or has
+	 * a minimum size of sizeof(struct sockaddr_storage).
+	 */
 	int		(*recvmsg)   (struct kiocb *iocb, struct socket *sock,
 				      struct msghdr *m, size_t total_len,
 				      int flags);
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 7fee50d637f9..7d424ac6e760 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -1735,7 +1735,6 @@ static int atalk_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr
 			 size_t size, int flags)
 {
 	struct sock *sk = sock->sk;
-	struct sockaddr_at *sat = (struct sockaddr_at *)msg->msg_name;
 	struct ddpehdr *ddp;
 	int copied = 0;
 	int offset = 0;
@@ -1764,14 +1763,13 @@ static int atalk_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr
 	}
 	err = skb_copy_datagram_iovec(skb, offset, msg->msg_iov, copied);
 
-	if (!err) {
-		if (sat) {
-			sat->sat_family      = AF_APPLETALK;
-			sat->sat_port        = ddp->deh_sport;
-			sat->sat_addr.s_node = ddp->deh_snode;
-			sat->sat_addr.s_net  = ddp->deh_snet;
-		}
-		msg->msg_namelen = sizeof(*sat);
+	if (!err && msg->msg_name) {
+		struct sockaddr_at *sat = msg->msg_name;
+		sat->sat_family      = AF_APPLETALK;
+		sat->sat_port        = ddp->deh_sport;
+		sat->sat_addr.s_node = ddp->deh_snode;
+		sat->sat_addr.s_net  = ddp->deh_snet;
+		msg->msg_namelen     = sizeof(*sat);
 	}
 
 	skb_free_datagram(sk, skb);	/* Free the datagram. */
diff --git a/net/atm/common.c b/net/atm/common.c
index 737bef59ce89..7b491006eaf4 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -531,8 +531,6 @@ int vcc_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
 	struct sk_buff *skb;
 	int copied, error = -EINVAL;
 
-	msg->msg_namelen = 0;
-
 	if (sock->state != SS_CONNECTED)
 		return -ENOTCONN;
 
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index a00123ebb0ae..7bb1605bdfd9 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1636,11 +1636,11 @@ static int ax25_recvmsg(struct kiocb *iocb, struct socket *sock,
 
 	skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
 
-	if (msg->msg_namelen != 0) {
-		struct sockaddr_ax25 *sax = (struct sockaddr_ax25 *)msg->msg_name;
+	if (msg->msg_name) {
 		ax25_digi digi;
 		ax25_address src;
 		const unsigned char *mac = skb_mac_header(skb);
+		struct sockaddr_ax25 *sax = msg->msg_name;
 
 		memset(sax, 0, sizeof(struct full_sockaddr_ax25));
 		ax25_addr_parse(mac + 1, skb->data - mac - 1, &src, NULL,
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index f6a1671ea2ff..56ca494621c6 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -224,10 +224,9 @@ int bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 
 	skb = skb_recv_datagram(sk, flags, noblock, &err);
 	if (!skb) {
-		if (sk->sk_shutdown & RCV_SHUTDOWN) {
-			msg->msg_namelen = 0;
+		if (sk->sk_shutdown & RCV_SHUTDOWN)
 			return 0;
-		}
+
 		return err;
 	}
 
@@ -245,8 +244,6 @@ int bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 		if (bt_sk(sk)->skb_msg_name)
 			bt_sk(sk)->skb_msg_name(skb, msg->msg_name,
 						&msg->msg_namelen);
-		else
-			msg->msg_namelen = 0;
 	}
 
 	skb_free_datagram(sk, skb);
@@ -295,8 +292,6 @@ int bt_sock_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
 	if (flags & MSG_OOB)
 		return -EOPNOTSUPP;
 
-	msg->msg_namelen = 0;
-
 	BT_DBG("sk %p size %zu", sk, size);
 
 	lock_sock(sk);
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 71f0be173080..6a6c8bb4fd72 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -856,8 +856,6 @@ static int hci_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 	if (!skb)
 		return err;
 
-	msg->msg_namelen = 0;
-
 	copied = skb->len;
 	if (len < copied) {
 		msg->msg_flags |= MSG_TRUNC;
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index c4d3d423f89b..c80766f892c3 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -615,7 +615,6 @@ static int rfcomm_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 
 	if (test_and_clear_bit(RFCOMM_DEFER_SETUP, &d->flags)) {
 		rfcomm_dlc_accept(d);
-		msg->msg_namelen = 0;
 		return 0;
 	}
 
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 12a0e51e21e1..24fa3964b3c8 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -711,7 +711,6 @@ static int sco_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 	    test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags)) {
 		sco_conn_defer_accept(pi->conn->hcon, pi->setting);
 		sk->sk_state = BT_CONFIG;
-		msg->msg_namelen = 0;
 
 		release_sock(sk);
 		return 0;
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 05a41c7ec304..d6be3edb7a43 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -286,8 +286,6 @@ static int caif_seqpkt_recvmsg(struct kiocb *iocb, struct socket *sock,
 	if (m->msg_flags&MSG_OOB)
 		goto read_error;
 
-	m->msg_namelen = 0;
-
 	skb = skb_recv_datagram(sk, flags, 0 , &ret);
 	if (!skb)
 		goto read_error;
@@ -361,8 +359,6 @@ static int caif_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
 	if (flags&MSG_OOB)
 		goto out;
 
-	msg->msg_namelen = 0;
-
 	/*
 	 * Lock the socket to prevent queue disordering
 	 * while sleeps in memcpy_tomsg
diff --git a/net/compat.c b/net/compat.c
index 89032580bd1d..618c6a8a911b 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -93,7 +93,8 @@ int verify_compat_iovec(struct msghdr *kern_msg, struct iovec *kern_iov,
 			if (err < 0)
 				return err;
 		}
-		kern_msg->msg_name = kern_address;
+		if (kern_msg->msg_name)
+			kern_msg->msg_name = kern_address;
 	} else
 		kern_msg->msg_name = NULL;
 
diff --git a/net/core/iovec.c b/net/core/iovec.c
index 4cdb7c48dad6..b61869429f4c 100644
--- a/net/core/iovec.c
+++ b/net/core/iovec.c
@@ -48,7 +48,8 @@ int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr_storage *a
 			if (err < 0)
 				return err;
 		}
-		m->msg_name = address;
+		if (m->msg_name)
+			m->msg_name = address;
 	} else {
 		m->msg_name = NULL;
 	}
diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c
index 7a1e0fc1bd4d..e096025b477f 100644
--- a/net/ipx/af_ipx.c
+++ b/net/ipx/af_ipx.c
@@ -1823,8 +1823,6 @@ static int ipx_recvmsg(struct kiocb *iocb, struct socket *sock,
 	if (skb->tstamp.tv64)
 		sk->sk_stamp = skb->tstamp;
 
-	msg->msg_namelen = sizeof(*sipx);
-
 	if (sipx) {
 		sipx->sipx_family	= AF_IPX;
 		sipx->sipx_port		= ipx->ipx_source.sock;
@@ -1832,6 +1830,7 @@ static int ipx_recvmsg(struct kiocb *iocb, struct socket *sock,
 		sipx->sipx_network	= IPX_SKB_CB(skb)->ipx_source_net;
 		sipx->sipx_type 	= ipx->ipx_type;
 		sipx->sipx_zero		= 0;
+		msg->msg_namelen	= sizeof(*sipx);
 	}
 	rc = copied;
 
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index 0f676908d15b..de7db23049f1 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -1385,8 +1385,6 @@ static int irda_recvmsg_dgram(struct kiocb *iocb, struct socket *sock,
 
 	IRDA_DEBUG(4, "%s()\n", __func__);
 
-	msg->msg_namelen = 0;
-
 	skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT,
 				flags & MSG_DONTWAIT, &err);
 	if (!skb)
@@ -1451,8 +1449,6 @@ static int irda_recvmsg_stream(struct kiocb *iocb, struct socket *sock,
 	target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
 	timeo = sock_rcvtimeo(sk, noblock);
 
-	msg->msg_namelen = 0;
-
 	do {
 		int chunk;
 		struct sk_buff *skb = skb_dequeue(&sk->sk_receive_queue);
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 168aff5e60de..c4b7218058b6 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -1324,8 +1324,6 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 	int err = 0;
 	u32 offset;
 
-	msg->msg_namelen = 0;
-
 	if ((sk->sk_state == IUCV_DISCONN) &&
 	    skb_queue_empty(&iucv->backlog_skb_q) &&
 	    skb_queue_empty(&sk->sk_receive_queue) &&
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 911ef03bf8fb..545f047868ad 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -3616,7 +3616,6 @@ static int pfkey_recvmsg(struct kiocb *kiocb,
 	if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT))
 		goto out;
 
-	msg->msg_namelen = 0;
 	skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &err);
 	if (skb == NULL)
 		goto out;
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index ffda81ef1a70..be5fadf34739 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -197,8 +197,6 @@ static int pppol2tp_recvmsg(struct kiocb *iocb, struct socket *sock,
 	if (sk->sk_state & PPPOX_BOUND)
 		goto end;
 
-	msg->msg_namelen = 0;
-
 	err = 0;
 	skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT,
 				flags & MSG_DONTWAIT, &err);
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 6cba486353e8..7b01b9f5846c 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -720,8 +720,6 @@ static int llc_ui_recvmsg(struct kiocb *iocb, struct socket *sock,
 	int target;	/* Read at least this many bytes */
 	long timeo;
 
-	msg->msg_namelen = 0;
-
 	lock_sock(sk);
 	copied = -ENOTCONN;
 	if (unlikely(sk->sk_type == SOCK_STREAM && sk->sk_state == TCP_LISTEN))
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index f0176e1a5a81..bca50b95c182 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2335,8 +2335,6 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
 	}
 #endif
 
-	msg->msg_namelen = 0;
-
 	copied = data_skb->len;
 	if (len < copied) {
 		msg->msg_flags |= MSG_TRUNC;
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index 698814bfa7ad..53c19a35fc6d 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -1179,10 +1179,9 @@ static int nr_recvmsg(struct kiocb *iocb, struct socket *sock,
 		sax->sax25_family = AF_NETROM;
 		skb_copy_from_linear_data_offset(skb, 7, sax->sax25_call.ax25_call,
 			      AX25_ADDR_LEN);
+		msg->msg_namelen = sizeof(*sax);
 	}
 
-	msg->msg_namelen = sizeof(*sax);
-
 	skb_free_datagram(sk, skb);
 
 	release_sock(sk);
diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c
index d308402b67d8..824c6056bf82 100644
--- a/net/nfc/llcp_sock.c
+++ b/net/nfc/llcp_sock.c
@@ -807,8 +807,6 @@ static int llcp_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 
 	pr_debug("%p %zu\n", sk, len);
 
-	msg->msg_namelen = 0;
-
 	lock_sock(sk);
 
 	if (sk->sk_state == LLCP_CLOSED &&
diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c
index cd958b381f96..66bcd2eb5773 100644
--- a/net/nfc/rawsock.c
+++ b/net/nfc/rawsock.c
@@ -244,8 +244,6 @@ static int rawsock_recvmsg(struct kiocb *iocb, struct socket *sock,
 	if (!skb)
 		return rc;
 
-	msg->msg_namelen = 0;
-
 	copied = skb->len;
 	if (len < copied) {
 		msg->msg_flags |= MSG_TRUNC;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 2e8286b47c28..61bd50adead1 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2660,7 +2660,6 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
 	struct sock *sk = sock->sk;
 	struct sk_buff *skb;
 	int copied, err;
-	struct sockaddr_ll *sll;
 	int vnet_hdr_len = 0;
 
 	err = -EINVAL;
@@ -2744,22 +2743,10 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
 			goto out_free;
 	}
 
-	/*
-	 *	If the address length field is there to be filled in, we fill
-	 *	it in now.
-	 */
-
-	sll = &PACKET_SKB_CB(skb)->sa.ll;
-	if (sock->type == SOCK_PACKET)
-		msg->msg_namelen = sizeof(struct sockaddr_pkt);
-	else
-		msg->msg_namelen = sll->sll_halen + offsetof(struct sockaddr_ll, sll_addr);
-
-	/*
-	 *	You lose any data beyond the buffer you gave. If it worries a
-	 *	user program they can ask the device for its MTU anyway.
+	/* You lose any data beyond the buffer you gave. If it worries
+	 * a user program they can ask the device for its MTU
+	 * anyway.
 	 */
-
 	copied = skb->len;
 	if (copied > len) {
 		copied = len;
@@ -2772,9 +2759,20 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
 
 	sock_recv_ts_and_drops(msg, sk, skb);
 
-	if (msg->msg_name)
+	if (msg->msg_name) {
+		/* If the address length field is there to be filled
+		 * in, we fill it in now.
+		 */
+		if (sock->type == SOCK_PACKET) {
+			msg->msg_namelen = sizeof(struct sockaddr_pkt);
+		} else {
+			struct sockaddr_ll *sll = &PACKET_SKB_CB(skb)->sa.ll;
+			msg->msg_namelen = sll->sll_halen +
+				offsetof(struct sockaddr_ll, sll_addr);
+		}
 		memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa,
 		       msg->msg_namelen);
+	}
 
 	if (pkt_sk(sk)->auxdata) {
 		struct tpacket_auxdata aux;
diff --git a/net/rds/recv.c b/net/rds/recv.c
index 9f0f17cf6bf9..de339b24ca14 100644
--- a/net/rds/recv.c
+++ b/net/rds/recv.c
@@ -410,8 +410,6 @@ int rds_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
 
 	rdsdebug("size %zu flags 0x%x timeo %ld\n", size, msg_flags, timeo);
 
-	msg->msg_namelen = 0;
-
 	if (msg_flags & MSG_OOB)
 		goto out;
 
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index e98fcfbe6007..33af77246bfe 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -1216,7 +1216,6 @@ static int rose_recvmsg(struct kiocb *iocb, struct socket *sock,
 {
 	struct sock *sk = sock->sk;
 	struct rose_sock *rose = rose_sk(sk);
-	struct sockaddr_rose *srose = (struct sockaddr_rose *)msg->msg_name;
 	size_t copied;
 	unsigned char *asmptr;
 	struct sk_buff *skb;
@@ -1252,8 +1251,11 @@ static int rose_recvmsg(struct kiocb *iocb, struct socket *sock,
 
 	skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
 
-	if (srose != NULL) {
-		memset(srose, 0, msg->msg_namelen);
+	if (msg->msg_name) {
+		struct sockaddr_rose *srose;
+
+		memset(msg->msg_name, 0, sizeof(struct full_sockaddr_rose));
+		srose = msg->msg_name;
 		srose->srose_family = AF_ROSE;
 		srose->srose_addr   = rose->dest_addr;
 		srose->srose_call   = rose->dest_call;
diff --git a/net/rxrpc/ar-recvmsg.c b/net/rxrpc/ar-recvmsg.c
index 4b48687c3890..898492a8d61b 100644
--- a/net/rxrpc/ar-recvmsg.c
+++ b/net/rxrpc/ar-recvmsg.c
@@ -143,10 +143,13 @@ int rxrpc_recvmsg(struct kiocb *iocb, struct socket *sock,
 
 		/* copy the peer address and timestamp */
 		if (!continue_call) {
-			if (msg->msg_name && msg->msg_namelen > 0)
+			if (msg->msg_name) {
+				size_t len =
+					sizeof(call->conn->trans->peer->srx);
 				memcpy(msg->msg_name,
-				       &call->conn->trans->peer->srx,
-				       sizeof(call->conn->trans->peer->srx));
+				       &call->conn->trans->peer->srx, len);
+				msg->msg_namelen = len;
+			}
 			sock_recv_ts_and_drops(msg, &rx->sk, skb);
 		}
 
diff --git a/net/socket.c b/net/socket.c
index c226aceee65b..fc285564e49e 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1840,8 +1840,10 @@ SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
 	msg.msg_iov = &iov;
 	iov.iov_len = size;
 	iov.iov_base = ubuf;
-	msg.msg_name = (struct sockaddr *)&address;
-	msg.msg_namelen = sizeof(address);
+	/* Save some cycles and don't copy the address if not needed */
+	msg.msg_name = addr ? (struct sockaddr *)&address : NULL;
+	/* We assume all kernel code knows the size of sockaddr_storage */
+	msg.msg_namelen = 0;
 	if (sock->file->f_flags & O_NONBLOCK)
 		flags |= MSG_DONTWAIT;
 	err = sock_recvmsg(sock, &msg, size, flags);
@@ -2221,16 +2223,14 @@ static int ___sys_recvmsg(struct socket *sock, struct msghdr __user *msg,
 			goto out;
 	}
 
-	/*
-	 *      Save the user-mode address (verify_iovec will change the
-	 *      kernel msghdr to use the kernel address space)
+	/* Save the user-mode address (verify_iovec will change the
+	 * kernel msghdr to use the kernel address space)
 	 */
-
 	uaddr = (__force void __user *)msg_sys->msg_name;
 	uaddr_len = COMPAT_NAMELEN(msg);
-	if (MSG_CMSG_COMPAT & flags) {
+	if (MSG_CMSG_COMPAT & flags)
 		err = verify_compat_iovec(msg_sys, iov, &addr, VERIFY_WRITE);
-	} else
+	else
 		err = verify_iovec(msg_sys, iov, &addr, VERIFY_WRITE);
 	if (err < 0)
 		goto out_freeiov;
@@ -2239,6 +2239,9 @@ static int ___sys_recvmsg(struct socket *sock, struct msghdr __user *msg,
 	cmsg_ptr = (unsigned long)msg_sys->msg_control;
 	msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
 
+	/* We assume all kernel code knows the size of sockaddr_storage */
+	msg_sys->msg_namelen = 0;
+
 	if (sock->file->f_flags & O_NONBLOCK)
 		flags |= MSG_DONTWAIT;
 	err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys,
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 3906527259d1..3b61851bb927 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -980,9 +980,6 @@ static int recv_msg(struct kiocb *iocb, struct socket *sock,
 		goto exit;
 	}
 
-	/* will be updated in set_orig_addr() if needed */
-	m->msg_namelen = 0;
-
 	timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
 restart:
 
@@ -1091,9 +1088,6 @@ static int recv_stream(struct kiocb *iocb, struct socket *sock,
 		goto exit;
 	}
 
-	/* will be updated in set_orig_addr() if needed */
-	m->msg_namelen = 0;
-
 	target = sock_rcvlowat(sk, flags & MSG_WAITALL, buf_len);
 	timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
 
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index c1f403bed683..01625ccc3ae6 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1754,7 +1754,6 @@ static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
 {
 	struct unix_sock *u = unix_sk(sk);
 
-	msg->msg_namelen = 0;
 	if (u->addr) {
 		msg->msg_namelen = u->addr->len;
 		memcpy(msg->msg_name, u->addr->name, u->addr->len);
@@ -1778,8 +1777,6 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
 	if (flags&MSG_OOB)
 		goto out;
 
-	msg->msg_namelen = 0;
-
 	err = mutex_lock_interruptible(&u->readlock);
 	if (err) {
 		err = sock_intr_errno(sock_rcvtimeo(sk, noblock));
@@ -1924,8 +1921,6 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
 	target = sock_rcvlowat(sk, flags&MSG_WAITALL, size);
 	timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT);
 
-	msg->msg_namelen = 0;
-
 	/* Lock the socket to prevent queue disordering
 	 * while sleeps in memcpy_tomsg
 	 */
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 545c08b8a1d4..5adfd94c5b85 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -1662,8 +1662,6 @@ vsock_stream_recvmsg(struct kiocb *kiocb,
 	vsk = vsock_sk(sk);
 	err = 0;
 
-	msg->msg_namelen = 0;
-
 	lock_sock(sk);
 
 	if (sk->sk_state != SS_CONNECTED) {
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index 9d6986634e0b..687360da62d9 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -1746,8 +1746,6 @@ static int vmci_transport_dgram_dequeue(struct kiocb *kiocb,
 	if (flags & MSG_OOB || flags & MSG_ERRQUEUE)
 		return -EOPNOTSUPP;
 
-	msg->msg_namelen = 0;
-
 	/* Retrieve the head sk_buff from the socket's receive queue. */
 	err = 0;
 	skb = skb_recv_datagram(&vsk->sk, flags, noblock, &err);
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 45a3ab5612c1..7622789d3750 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -1340,10 +1340,9 @@ static int x25_recvmsg(struct kiocb *iocb, struct socket *sock,
 	if (sx25) {
 		sx25->sx25_family = AF_X25;
 		sx25->sx25_addr   = x25->dest_addr;
+		msg->msg_namelen = sizeof(*sx25);
 	}
 
-	msg->msg_namelen = sizeof(struct sockaddr_x25);
-
 	x25_check_rbuf(sk);
 	rc = copied;
 out_free_dgram:
-- 
cgit v1.2.3


From 3fb69bcadda6263b92dbf8dd74717cba27c77ed9 Mon Sep 17 00:00:00 2001
From: Madalin Bucur <madalin.bucur@freescale.com>
Date: Wed, 20 Nov 2013 16:38:19 -0600
Subject: net/phy: Add the autocross feature for forced links on VSC82x4

Add auto-MDI/MDI-X capability for forced (autonegotiation disabled)
10/100 Mbps speeds on Vitesse VSC82x4 PHYs. Exported previously static
function genphy_setup_forced() required by the new config_aneg handler
in the Vitesse PHY module.

Signed-off-by: Madalin Bucur <madalin.bucur@freescale.com>
Signed-off-by: Shruti Kanetkar <Shruti@freescale.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy_device.c |  4 +--
 drivers/net/phy/vitesse.c    | 75 +++++++++++++++++++++++++++++++++++++++++---
 include/linux/phy.h          |  1 +
 3 files changed, 73 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 74630e94fa3b..d6447b3f7409 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -697,7 +697,7 @@ static int genphy_config_advert(struct phy_device *phydev)
  *   to the values in phydev. Assumes that the values are valid.
  *   Please see phy_sanitize_settings().
  */
-static int genphy_setup_forced(struct phy_device *phydev)
+int genphy_setup_forced(struct phy_device *phydev)
 {
 	int err;
 	int ctl = 0;
@@ -716,7 +716,7 @@ static int genphy_setup_forced(struct phy_device *phydev)
 
 	return err;
 }
-
+EXPORT_SYMBOL(genphy_setup_forced);
 
 /**
  * genphy_restart_aneg - Enable and Restart Autonegotiation
diff --git a/drivers/net/phy/vitesse.c b/drivers/net/phy/vitesse.c
index 40406bc2f8af..508e4359338b 100644
--- a/drivers/net/phy/vitesse.c
+++ b/drivers/net/phy/vitesse.c
@@ -3,7 +3,7 @@
  *
  * Author: Kriston Carson
  *
- * Copyright (c) 2005, 2009 Freescale Semiconductor, Inc.
+ * Copyright (c) 2005, 2009, 2011 Freescale Semiconductor, Inc.
  *
  * This program is free software; you can redistribute  it and/or modify it
  * under  the terms of  the GNU General  Public License as published by the
@@ -18,6 +18,11 @@
 #include <linux/ethtool.h>
 #include <linux/phy.h>
 
+/* Vitesse Extended Page Magic Register(s) */
+#define MII_VSC82X4_EXT_PAGE_16E	0x10
+#define MII_VSC82X4_EXT_PAGE_17E	0x11
+#define MII_VSC82X4_EXT_PAGE_18E	0x12
+
 /* Vitesse Extended Control Register 1 */
 #define MII_VSC8244_EXT_CON1           0x17
 #define MII_VSC8244_EXTCON1_INIT       0x0000
@@ -54,6 +59,9 @@
 #define MII_VSC8221_AUXCONSTAT_INIT	0x0004 /* need to set this bit? */
 #define MII_VSC8221_AUXCONSTAT_RESERVED	0x0004
 
+/* Vitesse Extended Page Access Register */
+#define MII_VSC82X4_EXT_PAGE_ACCESS	0x1f
+
 #define PHY_ID_VSC8234			0x000fc620
 #define PHY_ID_VSC8244			0x000fc6c0
 #define PHY_ID_VSC8574			0x000704a0
@@ -154,6 +162,63 @@ static int vsc8221_config_init(struct phy_device *phydev)
 	 */
 }
 
+/* vsc82x4_config_autocross_enable - Enable auto MDI/MDI-X for forced links
+ * @phydev: target phy_device struct
+ *
+ * Enable auto MDI/MDI-X when in 10/100 forced link speeds by writing
+ * special values in the VSC8234/VSC8244 extended reserved registers
+ */
+static int vsc82x4_config_autocross_enable(struct phy_device *phydev)
+{
+	int ret;
+
+	if (phydev->autoneg == AUTONEG_ENABLE || phydev->speed > SPEED_100)
+		return 0;
+
+	/* map extended registers set 0x10 - 0x1e */
+	ret = phy_write(phydev, MII_VSC82X4_EXT_PAGE_ACCESS, 0x52b5);
+	if (ret >= 0)
+		ret = phy_write(phydev, MII_VSC82X4_EXT_PAGE_18E, 0x0012);
+	if (ret >= 0)
+		ret = phy_write(phydev, MII_VSC82X4_EXT_PAGE_17E, 0x2803);
+	if (ret >= 0)
+		ret = phy_write(phydev, MII_VSC82X4_EXT_PAGE_16E, 0x87fa);
+	/* map standard registers set 0x10 - 0x1e */
+	if (ret >= 0)
+		ret = phy_write(phydev, MII_VSC82X4_EXT_PAGE_ACCESS, 0x0000);
+	else
+		phy_write(phydev, MII_VSC82X4_EXT_PAGE_ACCESS, 0x0000);
+
+	return ret;
+}
+
+/* vsc82x4_config_aneg - restart auto-negotiation or write BMCR
+ * @phydev: target phy_device struct
+ *
+ * Description: If auto-negotiation is enabled, we configure the
+ *   advertising, and then restart auto-negotiation.  If it is not
+ *   enabled, then we write the BMCR and also start the auto
+ *   MDI/MDI-X feature
+ */
+static int vsc82x4_config_aneg(struct phy_device *phydev)
+{
+	int ret;
+
+	/* Enable auto MDI/MDI-X when in 10/100 forced link speeds by
+	 * writing special values in the VSC8234 extended reserved registers
+	 */
+	if (phydev->autoneg != AUTONEG_ENABLE && phydev->speed <= SPEED_100) {
+		ret = genphy_setup_forced(phydev);
+
+		if (ret < 0) /* error */
+			return ret;
+
+		return vsc82x4_config_autocross_enable(phydev);
+	}
+
+	return genphy_config_aneg(phydev);
+}
+
 /* Vitesse 82xx */
 static struct phy_driver vsc82xx_driver[] = {
 {
@@ -163,7 +228,7 @@ static struct phy_driver vsc82xx_driver[] = {
 	.features       = PHY_GBIT_FEATURES,
 	.flags          = PHY_HAS_INTERRUPT,
 	.config_init    = &vsc824x_config_init,
-	.config_aneg    = &genphy_config_aneg,
+	.config_aneg    = &vsc82x4_config_aneg,
 	.read_status    = &genphy_read_status,
 	.ack_interrupt  = &vsc824x_ack_interrupt,
 	.config_intr    = &vsc82xx_config_intr,
@@ -175,7 +240,7 @@ static struct phy_driver vsc82xx_driver[] = {
 	.features	= PHY_GBIT_FEATURES,
 	.flags		= PHY_HAS_INTERRUPT,
 	.config_init	= &vsc824x_config_init,
-	.config_aneg	= &genphy_config_aneg,
+	.config_aneg	= &vsc82x4_config_aneg,
 	.read_status	= &genphy_read_status,
 	.ack_interrupt	= &vsc824x_ack_interrupt,
 	.config_intr	= &vsc82xx_config_intr,
@@ -187,7 +252,7 @@ static struct phy_driver vsc82xx_driver[] = {
 	.features       = PHY_GBIT_FEATURES,
 	.flags          = PHY_HAS_INTERRUPT,
 	.config_init    = &vsc824x_config_init,
-	.config_aneg    = &genphy_config_aneg,
+	.config_aneg    = &vsc82x4_config_aneg,
 	.read_status    = &genphy_read_status,
 	.ack_interrupt  = &vsc824x_ack_interrupt,
 	.config_intr    = &vsc82xx_config_intr,
@@ -199,7 +264,7 @@ static struct phy_driver vsc82xx_driver[] = {
 	.features       = PHY_GBIT_FEATURES,
 	.flags          = PHY_HAS_INTERRUPT,
 	.config_init    = &vsc824x_config_init,
-	.config_aneg    = &genphy_config_aneg,
+	.config_aneg    = &vsc82x4_config_aneg,
 	.read_status    = &genphy_read_status,
 	.ack_interrupt  = &vsc824x_ack_interrupt,
 	.config_intr    = &vsc82xx_config_intr,
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 64ab823f7b74..48a4dc3cb8cf 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -559,6 +559,7 @@ static inline int phy_read_status(struct phy_device *phydev) {
 	return phydev->drv->read_status(phydev);
 }
 
+int genphy_setup_forced(struct phy_device *phydev);
 int genphy_restart_aneg(struct phy_device *phydev);
 int genphy_config_aneg(struct phy_device *phydev);
 int genphy_update_link(struct phy_device *phydev);
-- 
cgit v1.2.3


From 30b0a105d9f7141e4cbf72ae5511832457d89788 Mon Sep 17 00:00:00 2001
From: Dave Hansen <dave.hansen@linux.intel.com>
Date: Thu, 21 Nov 2013 14:31:58 -0800
Subject: mm: thp: give transparent hugepage code a separate copy_page

Right now, the migration code in migrate_page_copy() uses copy_huge_page()
for hugetlbfs and thp pages:

       if (PageHuge(page) || PageTransHuge(page))
                copy_huge_page(newpage, page);

So, yay for code reuse.  But:

  void copy_huge_page(struct page *dst, struct page *src)
  {
        struct hstate *h = page_hstate(src);

and a non-hugetlbfs page has no page_hstate().  This works 99% of the
time because page_hstate() determines the hstate from the page order
alone.  Since the page order of a THP page matches the default hugetlbfs
page order, it works.

But, if you change the default huge page size on the boot command-line
(say default_hugepagesz=1G), then we might not even *have* a 2MB hstate
so page_hstate() returns null and copy_huge_page() oopses pretty fast
since copy_huge_page() dereferences the hstate:

  void copy_huge_page(struct page *dst, struct page *src)
  {
        struct hstate *h = page_hstate(src);
        if (unlikely(pages_per_huge_page(h) > MAX_ORDER_NR_PAGES)) {
  ...

Mel noticed that the migration code is really the only user of these
functions.  This moves all the copy code over to migrate.c and makes
copy_huge_page() work for THP by checking for it explicitly.

I believe the bug was introduced in commit b32967ff101a ("mm: numa: Add
THP migration for the NUMA working set scanning fault case")

[akpm@linux-foundation.org: fix coding-style and comment text, per Naoya Horiguchi]
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Acked-by: Mel Gorman <mgorman@suse.de>
Reviewed-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Hillf Danton <dhillf@gmail.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Tested-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hugetlb.h |  4 ----
 mm/hugetlb.c            | 34 ----------------------------------
 mm/migrate.c            | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 48 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index acd2010328f3..85e0c58bdfdf 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -69,7 +69,6 @@ int dequeue_hwpoisoned_huge_page(struct page *page);
 bool isolate_huge_page(struct page *page, struct list_head *list);
 void putback_active_hugepage(struct page *page);
 bool is_hugepage_active(struct page *page);
-void copy_huge_page(struct page *dst, struct page *src);
 
 #ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE
 pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud);
@@ -140,9 +139,6 @@ static inline int dequeue_hwpoisoned_huge_page(struct page *page)
 #define isolate_huge_page(p, l) false
 #define putback_active_hugepage(p)	do {} while (0)
 #define is_hugepage_active(x)	false
-static inline void copy_huge_page(struct page *dst, struct page *src)
-{
-}
 
 static inline unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
 		unsigned long address, unsigned long end, pgprot_t newprot)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 7d57af21f49e..2130365d387d 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -476,40 +476,6 @@ static int vma_has_reserves(struct vm_area_struct *vma, long chg)
 	return 0;
 }
 
-static void copy_gigantic_page(struct page *dst, struct page *src)
-{
-	int i;
-	struct hstate *h = page_hstate(src);
-	struct page *dst_base = dst;
-	struct page *src_base = src;
-
-	for (i = 0; i < pages_per_huge_page(h); ) {
-		cond_resched();
-		copy_highpage(dst, src);
-
-		i++;
-		dst = mem_map_next(dst, dst_base, i);
-		src = mem_map_next(src, src_base, i);
-	}
-}
-
-void copy_huge_page(struct page *dst, struct page *src)
-{
-	int i;
-	struct hstate *h = page_hstate(src);
-
-	if (unlikely(pages_per_huge_page(h) > MAX_ORDER_NR_PAGES)) {
-		copy_gigantic_page(dst, src);
-		return;
-	}
-
-	might_sleep();
-	for (i = 0; i < pages_per_huge_page(h); i++) {
-		cond_resched();
-		copy_highpage(dst + i, src + i);
-	}
-}
-
 static void enqueue_huge_page(struct hstate *h, struct page *page)
 {
 	int nid = page_to_nid(page);
diff --git a/mm/migrate.c b/mm/migrate.c
index 316e720a2023..bb940045fe85 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -441,6 +441,54 @@ int migrate_huge_page_move_mapping(struct address_space *mapping,
 	return MIGRATEPAGE_SUCCESS;
 }
 
+/*
+ * Gigantic pages are so large that we do not guarantee that page++ pointer
+ * arithmetic will work across the entire page.  We need something more
+ * specialized.
+ */
+static void __copy_gigantic_page(struct page *dst, struct page *src,
+				int nr_pages)
+{
+	int i;
+	struct page *dst_base = dst;
+	struct page *src_base = src;
+
+	for (i = 0; i < nr_pages; ) {
+		cond_resched();
+		copy_highpage(dst, src);
+
+		i++;
+		dst = mem_map_next(dst, dst_base, i);
+		src = mem_map_next(src, src_base, i);
+	}
+}
+
+static void copy_huge_page(struct page *dst, struct page *src)
+{
+	int i;
+	int nr_pages;
+
+	if (PageHuge(src)) {
+		/* hugetlbfs page */
+		struct hstate *h = page_hstate(src);
+		nr_pages = pages_per_huge_page(h);
+
+		if (unlikely(nr_pages > MAX_ORDER_NR_PAGES)) {
+			__copy_gigantic_page(dst, src, nr_pages);
+			return;
+		}
+	} else {
+		/* thp page */
+		BUG_ON(!PageTransHuge(src));
+		nr_pages = hpage_nr_pages(src);
+	}
+
+	for (i = 0; i < nr_pages; i++) {
+		cond_resched();
+		copy_highpage(dst + i, src + i);
+	}
+}
+
 /*
  * Copy the page to its new location
  */
-- 
cgit v1.2.3


From 27c73ae759774e63313c1fbfeb17ba076cea64c5 Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Thu, 21 Nov 2013 14:32:02 -0800
Subject: mm: hugetlbfs: fix hugetlbfs optimization

Commit 7cb2ef56e6a8 ("mm: fix aio performance regression for database
caused by THP") can cause dereference of a dangling pointer if
split_huge_page runs during PageHuge() if there are updates to the
tail_page->private field.

Also it is repeating compound_head twice for hugetlbfs and it is running
compound_head+compound_trans_head for THP when a single one is needed in
both cases.

The new code within the PageSlab() check doesn't need to verify that the
THP page size is never bigger than the smallest hugetlbfs page size, to
avoid memory corruption.

A longstanding theoretical race condition was found while fixing the
above (see the change right after the skip_unlock label, that is
relevant for the compound_lock path too).

By re-establishing the _mapcount tail refcounting for all compound
pages, this also fixes the below problem:

  echo 0 >/sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages

  BUG: Bad page state in process bash  pfn:59a01
  page:ffffea000139b038 count:0 mapcount:10 mapping:          (null) index:0x0
  page flags: 0x1c00000000008000(tail)
  Modules linked in:
  CPU: 6 PID: 2018 Comm: bash Not tainted 3.12.0+ #25
  Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
  Call Trace:
    dump_stack+0x55/0x76
    bad_page+0xd5/0x130
    free_pages_prepare+0x213/0x280
    __free_pages+0x36/0x80
    update_and_free_page+0xc1/0xd0
    free_pool_huge_page+0xc2/0xe0
    set_max_huge_pages.part.58+0x14c/0x220
    nr_hugepages_store_common.isra.60+0xd0/0xf0
    nr_hugepages_store+0x13/0x20
    kobj_attr_store+0xf/0x20
    sysfs_write_file+0x189/0x1e0
    vfs_write+0xc5/0x1f0
    SyS_write+0x55/0xb0
    system_call_fastpath+0x16/0x1b

Signed-off-by: Khalid Aziz <khalid.aziz@oracle.com>
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Tested-by: Khalid Aziz <khalid.aziz@oracle.com>
Cc: Pravin Shelar <pshelar@nicira.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Ben Hutchings <bhutchings@solarflare.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Johannes Weiner <jweiner@redhat.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Rik van Riel <riel@redhat.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Minchan Kim <minchan@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hugetlb.h |   6 ++
 mm/hugetlb.c            |  17 ++++++
 mm/swap.c               | 143 ++++++++++++++++++++++++++++--------------------
 3 files changed, 106 insertions(+), 60 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 85e0c58bdfdf..9649ff0c63f8 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -31,6 +31,7 @@ struct hugepage_subpool *hugepage_new_subpool(long nr_blocks);
 void hugepage_put_subpool(struct hugepage_subpool *spool);
 
 int PageHuge(struct page *page);
+int PageHeadHuge(struct page *page_head);
 
 void reset_vma_resv_huge_pages(struct vm_area_struct *vma);
 int hugetlb_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *);
@@ -103,6 +104,11 @@ static inline int PageHuge(struct page *page)
 	return 0;
 }
 
+static inline int PageHeadHuge(struct page *page_head)
+{
+	return 0;
+}
+
 static inline void reset_vma_resv_huge_pages(struct vm_area_struct *vma)
 {
 }
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 2130365d387d..dee6cf4e6d34 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -702,6 +702,23 @@ int PageHuge(struct page *page)
 }
 EXPORT_SYMBOL_GPL(PageHuge);
 
+/*
+ * PageHeadHuge() only returns true for hugetlbfs head page, but not for
+ * normal or transparent huge pages.
+ */
+int PageHeadHuge(struct page *page_head)
+{
+	compound_page_dtor *dtor;
+
+	if (!PageHead(page_head))
+		return 0;
+
+	dtor = get_compound_page_dtor(page_head);
+
+	return dtor == free_huge_page;
+}
+EXPORT_SYMBOL_GPL(PageHeadHuge);
+
 pgoff_t __basepage_index(struct page *page)
 {
 	struct page *page_head = compound_head(page);
diff --git a/mm/swap.c b/mm/swap.c
index 7a9f80d451f5..84b26aaabd03 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -82,19 +82,6 @@ static void __put_compound_page(struct page *page)
 
 static void put_compound_page(struct page *page)
 {
-	/*
-	 * hugetlbfs pages cannot be split from under us.  If this is a
-	 * hugetlbfs page, check refcount on head page and release the page if
-	 * the refcount becomes zero.
-	 */
-	if (PageHuge(page)) {
-		page = compound_head(page);
-		if (put_page_testzero(page))
-			__put_compound_page(page);
-
-		return;
-	}
-
 	if (unlikely(PageTail(page))) {
 		/* __split_huge_page_refcount can run under us */
 		struct page *page_head = compound_trans_head(page);
@@ -111,14 +98,31 @@ static void put_compound_page(struct page *page)
 			 * still hot on arches that do not support
 			 * this_cpu_cmpxchg_double().
 			 */
-			if (PageSlab(page_head)) {
-				if (PageTail(page)) {
+			if (PageSlab(page_head) || PageHeadHuge(page_head)) {
+				if (likely(PageTail(page))) {
+					/*
+					 * __split_huge_page_refcount
+					 * cannot race here.
+					 */
+					VM_BUG_ON(!PageHead(page_head));
+					atomic_dec(&page->_mapcount);
 					if (put_page_testzero(page_head))
 						VM_BUG_ON(1);
-
-					atomic_dec(&page->_mapcount);
-					goto skip_lock_tail;
+					if (put_page_testzero(page_head))
+						__put_compound_page(page_head);
+					return;
 				} else
+					/*
+					 * __split_huge_page_refcount
+					 * run before us, "page" was a
+					 * THP tail. The split
+					 * page_head has been freed
+					 * and reallocated as slab or
+					 * hugetlbfs page of smaller
+					 * order (only possible if
+					 * reallocated as slab on
+					 * x86).
+					 */
 					goto skip_lock;
 			}
 			/*
@@ -132,8 +136,27 @@ static void put_compound_page(struct page *page)
 				/* __split_huge_page_refcount run before us */
 				compound_unlock_irqrestore(page_head, flags);
 skip_lock:
-				if (put_page_testzero(page_head))
-					__put_single_page(page_head);
+				if (put_page_testzero(page_head)) {
+					/*
+					 * The head page may have been
+					 * freed and reallocated as a
+					 * compound page of smaller
+					 * order and then freed again.
+					 * All we know is that it
+					 * cannot have become: a THP
+					 * page, a compound page of
+					 * higher order, a tail page.
+					 * That is because we still
+					 * hold the refcount of the
+					 * split THP tail and
+					 * page_head was the THP head
+					 * before the split.
+					 */
+					if (PageHead(page_head))
+						__put_compound_page(page_head);
+					else
+						__put_single_page(page_head);
+				}
 out_put_single:
 				if (put_page_testzero(page))
 					__put_single_page(page);
@@ -155,7 +178,6 @@ out_put_single:
 			VM_BUG_ON(atomic_read(&page->_count) != 0);
 			compound_unlock_irqrestore(page_head, flags);
 
-skip_lock_tail:
 			if (put_page_testzero(page_head)) {
 				if (PageHead(page_head))
 					__put_compound_page(page_head);
@@ -198,51 +220,52 @@ bool __get_page_tail(struct page *page)
 	 * proper PT lock that already serializes against
 	 * split_huge_page().
 	 */
+	unsigned long flags;
 	bool got = false;
-	struct page *page_head;
-
-	/*
-	 * If this is a hugetlbfs page it cannot be split under us.  Simply
-	 * increment refcount for the head page.
-	 */
-	if (PageHuge(page)) {
-		page_head = compound_head(page);
-		atomic_inc(&page_head->_count);
-		got = true;
-	} else {
-		unsigned long flags;
+	struct page *page_head = compound_trans_head(page);
 
-		page_head = compound_trans_head(page);
-		if (likely(page != page_head &&
-					get_page_unless_zero(page_head))) {
-
-			/* Ref to put_compound_page() comment. */
-			if (PageSlab(page_head)) {
-				if (likely(PageTail(page))) {
-					__get_page_tail_foll(page, false);
-					return true;
-				} else {
-					put_page(page_head);
-					return false;
-				}
-			}
-
-			/*
-			 * page_head wasn't a dangling pointer but it
-			 * may not be a head page anymore by the time
-			 * we obtain the lock. That is ok as long as it
-			 * can't be freed from under us.
-			 */
-			flags = compound_lock_irqsave(page_head);
-			/* here __split_huge_page_refcount won't run anymore */
+	if (likely(page != page_head && get_page_unless_zero(page_head))) {
+		/* Ref to put_compound_page() comment. */
+		if (PageSlab(page_head) || PageHeadHuge(page_head)) {
 			if (likely(PageTail(page))) {
+				/*
+				 * This is a hugetlbfs page or a slab
+				 * page. __split_huge_page_refcount
+				 * cannot race here.
+				 */
+				VM_BUG_ON(!PageHead(page_head));
 				__get_page_tail_foll(page, false);
-				got = true;
-			}
-			compound_unlock_irqrestore(page_head, flags);
-			if (unlikely(!got))
+				return true;
+			} else {
+				/*
+				 * __split_huge_page_refcount run
+				 * before us, "page" was a THP
+				 * tail. The split page_head has been
+				 * freed and reallocated as slab or
+				 * hugetlbfs page of smaller order
+				 * (only possible if reallocated as
+				 * slab on x86).
+				 */
 				put_page(page_head);
+				return false;
+			}
+		}
+
+		/*
+		 * page_head wasn't a dangling pointer but it
+		 * may not be a head page anymore by the time
+		 * we obtain the lock. That is ok as long as it
+		 * can't be freed from under us.
+		 */
+		flags = compound_lock_irqsave(page_head);
+		/* here __split_huge_page_refcount won't run anymore */
+		if (likely(PageTail(page))) {
+			__get_page_tail_foll(page, false);
+			got = true;
 		}
+		compound_unlock_irqrestore(page_head, flags);
+		if (unlikely(!got))
+			put_page(page_head);
 	}
 	return got;
 }
-- 
cgit v1.2.3


From 7aa555bf26763b86332c7a3689701c999834b87a Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Thu, 21 Nov 2013 14:32:11 -0800
Subject: mm: place page->pmd_huge_pte to right union

I don't know what went wrong, mis-merge or something, but ->pmd_huge_pte
placed in wrong union within struct page.

In original patch[1] it's placed to union with ->lru and ->slab, but in
commit e009bb30c8df ("mm: implement split page table lock for PMD
level") it's in union with ->index and ->freelist.

That union seems also unused for pages with table tables and safe to
re-use, but it's not what I've tested.

Let's move it to original place.  It fixes indentation at least.  :)

[1] https://lkml.org/lkml/2013/10/7/288

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Reviewed-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm_types.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 10f5a7272b80..011eb85d7b0f 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -65,9 +65,6 @@ struct page {
 						 * this page is only used to
 						 * free other pages.
 						 */
-#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && USE_SPLIT_PMD_PTLOCKS
-		pgtable_t pmd_huge_pte; /* protected by page->ptl */
-#endif
 		};
 
 		union {
@@ -135,6 +132,9 @@ struct page {
 
 		struct list_head list;	/* slobs list of pages */
 		struct slab *slab_page; /* slab fields */
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && USE_SPLIT_PMD_PTLOCKS
+		pgtable_t pmd_huge_pte; /* protected by page->ptl */
+#endif
 	};
 
 	/* Remainder is not double word aligned */
-- 
cgit v1.2.3


From 7e3528c3660a2e8602abc7858b0994d611f74bc3 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Fri, 22 Nov 2013 18:14:38 -0800
Subject: slab.h: remove duplicate kmalloc declaration and fix kernel-doc
 warnings

Fix kernel-doc warning for duplicate definition of 'kmalloc':

  Documentation/DocBook/kernel-api.xml:9483: element refentry: validity error : ID API-kmalloc already defined
  <refentry id="API-kmalloc">

Also combine the kernel-doc info from the 2 kmalloc definitions into one
block and remove the "see kcalloc" comment since kmalloc now contains the
@flags info.

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Acked-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/slab.h | 102 +++++++++++++++++++++++----------------------------
 1 file changed, 46 insertions(+), 56 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/slab.h b/include/linux/slab.h
index c2bba248fa63..1e2f4fe12773 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -388,10 +388,55 @@ static __always_inline void *kmalloc_large(size_t size, gfp_t flags)
 /**
  * kmalloc - allocate memory
  * @size: how many bytes of memory are required.
- * @flags: the type of memory to allocate (see kcalloc).
+ * @flags: the type of memory to allocate.
  *
  * kmalloc is the normal method of allocating memory
  * for objects smaller than page size in the kernel.
+ *
+ * The @flags argument may be one of:
+ *
+ * %GFP_USER - Allocate memory on behalf of user.  May sleep.
+ *
+ * %GFP_KERNEL - Allocate normal kernel ram.  May sleep.
+ *
+ * %GFP_ATOMIC - Allocation will not sleep.  May use emergency pools.
+ *   For example, use this inside interrupt handlers.
+ *
+ * %GFP_HIGHUSER - Allocate pages from high memory.
+ *
+ * %GFP_NOIO - Do not do any I/O at all while trying to get memory.
+ *
+ * %GFP_NOFS - Do not make any fs calls while trying to get memory.
+ *
+ * %GFP_NOWAIT - Allocation will not sleep.
+ *
+ * %GFP_THISNODE - Allocate node-local memory only.
+ *
+ * %GFP_DMA - Allocation suitable for DMA.
+ *   Should only be used for kmalloc() caches. Otherwise, use a
+ *   slab created with SLAB_DMA.
+ *
+ * Also it is possible to set different flags by OR'ing
+ * in one or more of the following additional @flags:
+ *
+ * %__GFP_COLD - Request cache-cold pages instead of
+ *   trying to return cache-warm pages.
+ *
+ * %__GFP_HIGH - This allocation has high priority and may use emergency pools.
+ *
+ * %__GFP_NOFAIL - Indicate that this allocation is in no way allowed to fail
+ *   (think twice before using).
+ *
+ * %__GFP_NORETRY - If memory is not immediately available,
+ *   then give up at once.
+ *
+ * %__GFP_NOWARN - If allocation fails, don't issue any warnings.
+ *
+ * %__GFP_REPEAT - If allocation fails initially, try once more before failing.
+ *
+ * There are other flags available as well, but these are not intended
+ * for general use, and so are not documented here. For a full list of
+ * potential flags, always refer to linux/gfp.h.
  */
 static __always_inline void *kmalloc(size_t size, gfp_t flags)
 {
@@ -501,61 +546,6 @@ struct seq_file;
 int cache_show(struct kmem_cache *s, struct seq_file *m);
 void print_slabinfo_header(struct seq_file *m);
 
-/**
- * kmalloc - allocate memory
- * @size: how many bytes of memory are required.
- * @flags: the type of memory to allocate.
- *
- * The @flags argument may be one of:
- *
- * %GFP_USER - Allocate memory on behalf of user.  May sleep.
- *
- * %GFP_KERNEL - Allocate normal kernel ram.  May sleep.
- *
- * %GFP_ATOMIC - Allocation will not sleep.  May use emergency pools.
- *   For example, use this inside interrupt handlers.
- *
- * %GFP_HIGHUSER - Allocate pages from high memory.
- *
- * %GFP_NOIO - Do not do any I/O at all while trying to get memory.
- *
- * %GFP_NOFS - Do not make any fs calls while trying to get memory.
- *
- * %GFP_NOWAIT - Allocation will not sleep.
- *
- * %GFP_THISNODE - Allocate node-local memory only.
- *
- * %GFP_DMA - Allocation suitable for DMA.
- *   Should only be used for kmalloc() caches. Otherwise, use a
- *   slab created with SLAB_DMA.
- *
- * Also it is possible to set different flags by OR'ing
- * in one or more of the following additional @flags:
- *
- * %__GFP_COLD - Request cache-cold pages instead of
- *   trying to return cache-warm pages.
- *
- * %__GFP_HIGH - This allocation has high priority and may use emergency pools.
- *
- * %__GFP_NOFAIL - Indicate that this allocation is in no way allowed to fail
- *   (think twice before using).
- *
- * %__GFP_NORETRY - If memory is not immediately available,
- *   then give up at once.
- *
- * %__GFP_NOWARN - If allocation fails, don't issue any warnings.
- *
- * %__GFP_REPEAT - If allocation fails initially, try once more before failing.
- *
- * There are other flags available as well, but these are not intended
- * for general use, and so are not documented here. For a full list of
- * potential flags, always refer to linux/gfp.h.
- *
- * kmalloc is the normal method of allocating memory
- * in the kernel.
- */
-static __always_inline void *kmalloc(size_t size, gfp_t flags);
-
 /**
  * kmalloc_array - allocate memory for an array.
  * @n: number of elements.
-- 
cgit v1.2.3


From 53e7cac35db5941f42221314c33693e71ffa496b Mon Sep 17 00:00:00 2001
From: Alexandre Courbot <acourbot@nvidia.com>
Date: Sat, 16 Nov 2013 21:44:52 +0900
Subject: gpiolib: use dedicated flags for GPIO properties

GPIO mapping properties were defined using the GPIOF_* flags, which are
declared in linux/gpio.h. This file is not included when using the
GPIO descriptor interface.

This patch declares the flags that can be used as GPIO mappings
properties in linux/gpio/driver.h, and uses them in gpiolib, so that no
deprecated declarations are used by the GPIO descriptor interface.

This patch also allows GPIO_OPEN_DRAIN and GPIO_OPEN_SOURCE to be
specified as GPIO mapping properties.

Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpiolib.c      | 22 +++++++++++++++-------
 include/linux/gpio/driver.h | 11 +++++++++--
 2 files changed, 24 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index d8938b9b794a..490198365ce4 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -14,6 +14,7 @@
 #include <linux/idr.h>
 #include <linux/slab.h>
 #include <linux/acpi.h>
+#include <linux/gpio/driver.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/gpio.h>
@@ -2274,7 +2275,8 @@ void gpiod_add_table(struct gpiod_lookup *table, size_t size)
 
 #ifdef CONFIG_OF
 static struct gpio_desc *of_find_gpio(struct device *dev, const char *con_id,
-				      unsigned int idx, unsigned long *flags)
+				      unsigned int idx,
+				      enum gpio_lookup_flags *flags)
 {
 	char prop_name[32]; /* 32 is max size of property name */
 	enum of_gpio_flags of_flags;
@@ -2292,7 +2294,7 @@ static struct gpio_desc *of_find_gpio(struct device *dev, const char *con_id,
 		return desc;
 
 	if (of_flags & OF_GPIO_ACTIVE_LOW)
-		*flags |= GPIOF_ACTIVE_LOW;
+		*flags |= GPIO_ACTIVE_LOW;
 
 	return desc;
 }
@@ -2305,7 +2307,8 @@ static struct gpio_desc *of_find_gpio(struct device *dev, const char *con_id,
 #endif
 
 static struct gpio_desc *acpi_find_gpio(struct device *dev, const char *con_id,
-					unsigned int idx, unsigned long *flags)
+					unsigned int idx,
+					enum gpio_lookup_flags *flags)
 {
 	struct acpi_gpio_info info;
 	struct gpio_desc *desc;
@@ -2315,13 +2318,14 @@ static struct gpio_desc *acpi_find_gpio(struct device *dev, const char *con_id,
 		return desc;
 
 	if (info.gpioint && info.active_low)
-		*flags |= GPIOF_ACTIVE_LOW;
+		*flags |= GPIO_ACTIVE_LOW;
 
 	return desc;
 }
 
 static struct gpio_desc *gpiod_find(struct device *dev, const char *con_id,
-				    unsigned int idx, unsigned long *flags)
+				    unsigned int idx,
+				    enum gpio_lookup_flags *flags)
 {
 	const char *dev_id = dev ? dev_name(dev) : NULL;
 	struct gpio_desc *desc = ERR_PTR(-ENODEV);
@@ -2413,7 +2417,7 @@ struct gpio_desc *__must_check gpiod_get_index(struct device *dev,
 {
 	struct gpio_desc *desc;
 	int status;
-	unsigned long flags = 0;
+	enum gpio_lookup_flags flags = 0;
 
 	dev_dbg(dev, "GPIO lookup for consumer %s\n", con_id);
 
@@ -2439,8 +2443,12 @@ struct gpio_desc *__must_check gpiod_get_index(struct device *dev,
 	if (status < 0)
 		return ERR_PTR(status);
 
-	if (flags & GPIOF_ACTIVE_LOW)
+	if (flags & GPIO_ACTIVE_LOW)
 		set_bit(FLAG_ACTIVE_LOW, &desc->flags);
+	if (flags & GPIO_OPEN_DRAIN)
+		set_bit(FLAG_OPEN_DRAIN, &desc->flags);
+	if (flags & GPIO_OPEN_SOURCE)
+		set_bit(FLAG_OPEN_SOURCE, &desc->flags);
 
 	return desc;
 }
diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index 656a27efb2c8..82eac610ce1a 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -125,6 +125,13 @@ extern struct gpio_chip *gpiochip_find(void *data,
 int gpiod_lock_as_irq(struct gpio_desc *desc);
 void gpiod_unlock_as_irq(struct gpio_desc *desc);
 
+enum gpio_lookup_flags {
+	GPIO_ACTIVE_HIGH = (0 << 0),
+	GPIO_ACTIVE_LOW = (1 << 0),
+	GPIO_OPEN_DRAIN = (1 << 1),
+	GPIO_OPEN_SOURCE = (1 << 2),
+};
+
 /**
  * Lookup table for associating GPIOs to specific devices and functions using
  * platform data.
@@ -152,9 +159,9 @@ struct gpiod_lookup {
 	 */
 	unsigned int idx;
 	/*
-	 * mask of GPIOF_* values
+	 * mask of GPIO_* values
 	 */
-	unsigned long flags;
+	enum gpio_lookup_flags flags;
 };
 
 /*
-- 
cgit v1.2.3


From 9886e1fd096b8004a25bb8b7e5690ad1408ebc6f Mon Sep 17 00:00:00 2001
From: Thierry Reding <thierry.reding@gmail.com>
Date: Mon, 25 Nov 2013 11:49:47 -0700
Subject: ARM: tegra: Provide dummy powergate implementation

In order to support increased build test coverage for drivers, implement
dummies for the powergate implementation. This will allow the drivers to
be built without requiring support for Tegra to be selected.

This patch solves the following build errors, which can be triggered in
v3.13-rc1 by selecting DRM_TEGRA without ARCH_TEGRA:

drivers/built-in.o: In function `gr3d_remove':
drivers/gpu/drm/tegra/gr3d.c:321: undefined reference to `tegra_powergate_power_off'
drivers/gpu/drm/tegra/gr3d.c:325: undefined reference to `tegra_powergate_power_off'
drivers/built-in.o: In function `gr3d_probe':
drivers/gpu/drm/tegra/gr3d.c:266: undefined reference to `tegra_powergate_sequence_power_up'
drivers/gpu/drm/tegra/gr3d.c:273: undefined reference to `tegra_powergate_sequence_power_up'

Signed-off-by: Thierry Reding <treding@nvidia.com>
[swarren, updated commit description]
Signed-off-by: Stephen Warren <swarren@nvidia.com>
Signed-off-by: Olof Johansson <olof@lixom.net>
---
 include/linux/tegra-powergate.h | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/tegra-powergate.h b/include/linux/tegra-powergate.h
index c98cfa406952..fd4498329c7c 100644
--- a/include/linux/tegra-powergate.h
+++ b/include/linux/tegra-powergate.h
@@ -45,6 +45,7 @@ struct clk;
 
 #define TEGRA_POWERGATE_3D0	TEGRA_POWERGATE_3D
 
+#ifdef CONFIG_ARCH_TEGRA
 int tegra_powergate_is_powered(int id);
 int tegra_powergate_power_on(int id);
 int tegra_powergate_power_off(int id);
@@ -52,5 +53,31 @@ int tegra_powergate_remove_clamping(int id);
 
 /* Must be called with clk disabled, and returns with clk enabled */
 int tegra_powergate_sequence_power_up(int id, struct clk *clk);
+#else
+static inline int tegra_powergate_is_powered(int id)
+{
+	return -ENOSYS;
+}
+
+static inline int tegra_powergate_power_on(int id)
+{
+	return -ENOSYS;
+}
+
+static inline int tegra_powergate_power_off(int id)
+{
+	return -ENOSYS;
+}
+
+static inline int tegra_powergate_remove_clamping(int id)
+{
+	return -ENOSYS;
+}
+
+static inline int tegra_powergate_sequence_power_up(int id, struct clk *clk)
+{
+	return -ENOSYS;
+}
+#endif
 
 #endif /* _MACH_TEGRA_POWERGATE_H_ */
-- 
cgit v1.2.3


From ecf61c78bd787b9dfb4a6b1dfb7561c2a95c5c17 Mon Sep 17 00:00:00 2001
From: Michal Marek <mmarek@suse.cz>
Date: Mon, 11 Nov 2013 14:40:35 +0100
Subject: PCI: Omit PCI ID macro strings to shorten quirk names

Pasting the verbatim PCI_(VENDOR|DEVICE)_* macros in the __pci_fixup_*
symbol names results in insanely long names such as

__pci_fixup_resumePCI_VENDOR_ID_SERVERWORKSPCI_DEVICE_ID_SERVERWORKS_HT1000SBquirk_disable_broadcom_boot_interrupt

When Link-Time Optimization adds its numeric suffix to such symbol, it
overflows the namebuf[KSYM_NAME_LEN] array in kernel/kallsyms.c.  Use the
line number instead to create (nearly) unique symbol names.

Reported-by: Joe Mario <jmario@redhat.com>
Signed-off-by: Michal Marek <mmarek@suse.cz>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Cc: Andi Kleen <ak@linux.intel.com>
---
 include/linux/pci.h | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 1084a15175e0..eb8078aeadc8 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1567,65 +1567,65 @@ enum pci_fixup_pass {
 /* Anonymous variables would be nice... */
 #define DECLARE_PCI_FIXUP_SECTION(section, name, vendor, device, class,	\
 				  class_shift, hook)			\
-	static const struct pci_fixup __pci_fixup_##name __used		\
+	static const struct pci_fixup __PASTE(__pci_fixup_##name,__LINE__) __used	\
 	__attribute__((__section__(#section), aligned((sizeof(void *)))))    \
 		= { vendor, device, class, class_shift, hook };
 
 #define DECLARE_PCI_FIXUP_CLASS_EARLY(vendor, device, class,		\
 					 class_shift, hook)		\
 	DECLARE_PCI_FIXUP_SECTION(.pci_fixup_early,			\
-		vendor##device##hook, vendor, device, class, class_shift, hook)
+		hook, vendor, device, class, class_shift, hook)
 #define DECLARE_PCI_FIXUP_CLASS_HEADER(vendor, device, class,		\
 					 class_shift, hook)		\
 	DECLARE_PCI_FIXUP_SECTION(.pci_fixup_header,			\
-		vendor##device##hook, vendor, device, class, class_shift, hook)
+		hook, vendor, device, class, class_shift, hook)
 #define DECLARE_PCI_FIXUP_CLASS_FINAL(vendor, device, class,		\
 					 class_shift, hook)		\
 	DECLARE_PCI_FIXUP_SECTION(.pci_fixup_final,			\
-		vendor##device##hook, vendor, device, class, class_shift, hook)
+		hook, vendor, device, class, class_shift, hook)
 #define DECLARE_PCI_FIXUP_CLASS_ENABLE(vendor, device, class,		\
 					 class_shift, hook)		\
 	DECLARE_PCI_FIXUP_SECTION(.pci_fixup_enable,			\
-		vendor##device##hook, vendor, device, class, class_shift, hook)
+		hook, vendor, device, class, class_shift, hook)
 #define DECLARE_PCI_FIXUP_CLASS_RESUME(vendor, device, class,		\
 					 class_shift, hook)		\
 	DECLARE_PCI_FIXUP_SECTION(.pci_fixup_resume,			\
-		resume##vendor##device##hook, vendor, device, class,	\
+		resume##hook, vendor, device, class,	\
 		class_shift, hook)
 #define DECLARE_PCI_FIXUP_CLASS_RESUME_EARLY(vendor, device, class,	\
 					 class_shift, hook)		\
 	DECLARE_PCI_FIXUP_SECTION(.pci_fixup_resume_early,		\
-		resume_early##vendor##device##hook, vendor, device,	\
+		resume_early##hook, vendor, device,	\
 		class, class_shift, hook)
 #define DECLARE_PCI_FIXUP_CLASS_SUSPEND(vendor, device, class,		\
 					 class_shift, hook)		\
 	DECLARE_PCI_FIXUP_SECTION(.pci_fixup_suspend,			\
-		suspend##vendor##device##hook, vendor, device, class,	\
+		suspend##hook, vendor, device, class,	\
 		class_shift, hook)
 
 #define DECLARE_PCI_FIXUP_EARLY(vendor, device, hook)			\
 	DECLARE_PCI_FIXUP_SECTION(.pci_fixup_early,			\
-		vendor##device##hook, vendor, device, PCI_ANY_ID, 0, hook)
+		hook, vendor, device, PCI_ANY_ID, 0, hook)
 #define DECLARE_PCI_FIXUP_HEADER(vendor, device, hook)			\
 	DECLARE_PCI_FIXUP_SECTION(.pci_fixup_header,			\
-		vendor##device##hook, vendor, device, PCI_ANY_ID, 0, hook)
+		hook, vendor, device, PCI_ANY_ID, 0, hook)
 #define DECLARE_PCI_FIXUP_FINAL(vendor, device, hook)			\
 	DECLARE_PCI_FIXUP_SECTION(.pci_fixup_final,			\
-		vendor##device##hook, vendor, device, PCI_ANY_ID, 0, hook)
+		hook, vendor, device, PCI_ANY_ID, 0, hook)
 #define DECLARE_PCI_FIXUP_ENABLE(vendor, device, hook)			\
 	DECLARE_PCI_FIXUP_SECTION(.pci_fixup_enable,			\
-		vendor##device##hook, vendor, device, PCI_ANY_ID, 0, hook)
+		hook, vendor, device, PCI_ANY_ID, 0, hook)
 #define DECLARE_PCI_FIXUP_RESUME(vendor, device, hook)			\
 	DECLARE_PCI_FIXUP_SECTION(.pci_fixup_resume,			\
-		resume##vendor##device##hook, vendor, device,		\
+		resume##hook, vendor, device,		\
 		PCI_ANY_ID, 0, hook)
 #define DECLARE_PCI_FIXUP_RESUME_EARLY(vendor, device, hook)		\
 	DECLARE_PCI_FIXUP_SECTION(.pci_fixup_resume_early,		\
-		resume_early##vendor##device##hook, vendor, device,	\
+		resume_early##hook, vendor, device,	\
 		PCI_ANY_ID, 0, hook)
 #define DECLARE_PCI_FIXUP_SUSPEND(vendor, device, hook)			\
 	DECLARE_PCI_FIXUP_SECTION(.pci_fixup_suspend,			\
-		suspend##vendor##device##hook, vendor, device,		\
+		suspend##hook, vendor, device,		\
 		PCI_ANY_ID, 0, hook)
 
 #ifdef CONFIG_PCI_QUIRKS
-- 
cgit v1.2.3


From 5a87182aa21d6d5d306840feab9321818dd3e2a3 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Wed, 27 Nov 2013 09:09:42 +0530
Subject: cpufreq: suspend governors on system suspend/hibernate

This patch adds cpufreq suspend/resume calls to dpm_{suspend|resume}_noirq()
for handling suspend/resume of cpufreq governors.

Lan Tianyu (Intel) & Jinhyuk Choi (Broadcom) found anr issue where
tunables configuration for clusters/sockets with non-boot CPUs was
getting lost after suspend/resume, as we were notifying governors
with CPUFREQ_GOV_POLICY_EXIT on removal of the last cpu for that
policy and so deallocating memory for tunables. This is fixed by
this patch as we don't allow any operation on governors after
device suspend and before device resume now.

Reported-and-tested-by: Lan Tianyu <tianyu.lan@intel.com>
Reported-by: Jinhyuk Choi <jinchoi@broadcom.com>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
[rjw: Changelog, minor cleanups]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/main.c |  3 +++
 drivers/cpufreq/cpufreq.c | 43 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/cpufreq.h   |  8 ++++++++
 3 files changed, 54 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 9f098a82cf04..10c3510d72a9 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -29,6 +29,7 @@
 #include <linux/async.h>
 #include <linux/suspend.h>
 #include <trace/events/power.h>
+#include <linux/cpufreq.h>
 #include <linux/cpuidle.h>
 #include "../base.h"
 #include "power.h"
@@ -473,6 +474,7 @@ static void dpm_resume_noirq(pm_message_t state)
 	dpm_show_time(starttime, state, "noirq");
 	resume_device_irqs();
 	cpuidle_resume();
+	cpufreq_resume();
 }
 
 /**
@@ -885,6 +887,7 @@ static int dpm_suspend_noirq(pm_message_t state)
 	ktime_t starttime = ktime_get();
 	int error = 0;
 
+	cpufreq_suspend();
 	cpuidle_pause();
 	suspend_device_irqs();
 	mutex_lock(&dpm_list_mtx);
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 02d534da22dd..606224a8abc2 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -26,6 +26,7 @@
 #include <linux/module.h>
 #include <linux/mutex.h>
 #include <linux/slab.h>
+#include <linux/suspend.h>
 #include <linux/syscore_ops.h>
 #include <linux/tick.h>
 #include <trace/events/power.h>
@@ -47,6 +48,9 @@ static LIST_HEAD(cpufreq_policy_list);
 static DEFINE_PER_CPU(char[CPUFREQ_NAME_LEN], cpufreq_cpu_governor);
 #endif
 
+/* Flag to suspend/resume CPUFreq governors */
+static bool cpufreq_suspended;
+
 static inline bool has_target(void)
 {
 	return cpufreq_driver->target_index || cpufreq_driver->target;
@@ -1462,6 +1466,41 @@ static struct subsys_interface cpufreq_interface = {
 	.remove_dev	= cpufreq_remove_dev,
 };
 
+void cpufreq_suspend(void)
+{
+	struct cpufreq_policy *policy;
+
+	if (!has_target())
+		return;
+
+	pr_debug("%s: Suspending Governors\n", __func__);
+
+	list_for_each_entry(policy, &cpufreq_policy_list, policy_list)
+		if (__cpufreq_governor(policy, CPUFREQ_GOV_STOP))
+			pr_err("%s: Failed to stop governor for policy: %p\n",
+				__func__, policy);
+
+	cpufreq_suspended = true;
+}
+
+void cpufreq_resume(void)
+{
+	struct cpufreq_policy *policy;
+
+	if (!has_target())
+		return;
+
+	pr_debug("%s: Resuming Governors\n", __func__);
+
+	cpufreq_suspended = false;
+
+	list_for_each_entry(policy, &cpufreq_policy_list, policy_list)
+		if (__cpufreq_governor(policy, CPUFREQ_GOV_START)
+		    || __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS))
+			pr_err("%s: Failed to start governor for policy: %p\n",
+				__func__, policy);
+}
+
 /**
  * cpufreq_bp_suspend - Prepare the boot CPU for system suspend.
  *
@@ -1764,6 +1803,10 @@ static int __cpufreq_governor(struct cpufreq_policy *policy,
 	struct cpufreq_governor *gov = NULL;
 #endif
 
+	/* Don't start any governor operations if we are entering suspend */
+	if (cpufreq_suspended)
+		return 0;
+
 	if (policy->governor->max_transition_latency &&
 	    policy->cpuinfo.transition_latency >
 	    policy->governor->max_transition_latency) {
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 5bd6ab9b0c27..91716658e9a1 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -272,6 +272,14 @@ cpufreq_verify_within_cpu_limits(struct cpufreq_policy *policy)
 			policy->cpuinfo.max_freq);
 }
 
+#ifdef CONFIG_CPU_FREQ
+void cpufreq_suspend(void);
+void cpufreq_resume(void);
+#else
+static inline void cpufreq_suspend(void) {}
+static inline void cpufreq_resume(void) {}
+#endif
+
 /*********************************************************************
  *                     CPUFREQ NOTIFIER INTERFACE                    *
  *********************************************************************/
-- 
cgit v1.2.3


From e0d59733f6b1796b8d6692642c87d7dd862c3e3a Mon Sep 17 00:00:00 2001
From: Seiji Aguchi <seiji.aguchi@hds.com>
Date: Wed, 30 Oct 2013 15:27:26 -0400
Subject: efivars, efi-pstore: Hold off deletion of sysfs entry until the scan
 is completed

Currently, when mounting pstore file system, a read callback of
efi_pstore driver runs mutiple times as below.

- In the first read callback, scan efivar_sysfs_list from head and pass
  a kmsg buffer of a entry to an upper pstore layer.
- In the second read callback, rescan efivar_sysfs_list from the entry
  and pass another kmsg buffer to it.
- Repeat the scan and pass until the end of efivar_sysfs_list.

In this process, an entry is read across the multiple read function
calls. To avoid race between the read and erasion, the whole process
above is protected by a spinlock, holding in open() and releasing in
close().

At the same time, kmemdup() is called to pass the buffer to pstore
filesystem during it. And then, it causes a following lockdep warning.

To make the dynamic memory allocation runnable without taking spinlock,
holding off a deletion of sysfs entry if it happens while scanning it
via efi_pstore, and deleting it after the scan is completed.

To implement it, this patch introduces two flags, scanning and deleting,
to efivar_entry.

On the code basis, it seems that all the scanning and deleting logic is
not needed because __efivars->lock are not dropped when reading from the
EFI variable store.

But, the scanning and deleting logic is still needed because an
efi-pstore and a pstore filesystem works as follows.

In case an entry(A) is found, the pointer is saved to psi->data.  And
efi_pstore_read() passes the entry(A) to a pstore filesystem by
releasing  __efivars->lock.

And then, the pstore filesystem calls efi_pstore_read() again and the
same entry(A), which is saved to psi->data, is used for resuming to scan
a sysfs-list.

So, to protect the entry(A), the logic is needed.

[    1.143710] ------------[ cut here ]------------
[    1.144058] WARNING: CPU: 1 PID: 1 at kernel/lockdep.c:2740 lockdep_trace_alloc+0x104/0x110()
[    1.144058] DEBUG_LOCKS_WARN_ON(irqs_disabled_flags(flags))
[    1.144058] Modules linked in:
[    1.144058] CPU: 1 PID: 1 Comm: systemd Not tainted 3.11.0-rc5 #2
[    1.144058]  0000000000000009 ffff8800797e9ae0 ffffffff816614a5 ffff8800797e9b28
[    1.144058]  ffff8800797e9b18 ffffffff8105510d 0000000000000080 0000000000000046
[    1.144058]  00000000000000d0 00000000000003af ffffffff81ccd0c0 ffff8800797e9b78
[    1.144058] Call Trace:
[    1.144058]  [<ffffffff816614a5>] dump_stack+0x54/0x74
[    1.144058]  [<ffffffff8105510d>] warn_slowpath_common+0x7d/0xa0
[    1.144058]  [<ffffffff8105517c>] warn_slowpath_fmt+0x4c/0x50
[    1.144058]  [<ffffffff8131290f>] ? vsscanf+0x57f/0x7b0
[    1.144058]  [<ffffffff810bbd74>] lockdep_trace_alloc+0x104/0x110
[    1.144058]  [<ffffffff81192da0>] __kmalloc_track_caller+0x50/0x280
[    1.144058]  [<ffffffff815147bb>] ? efi_pstore_read_func.part.1+0x12b/0x170
[    1.144058]  [<ffffffff8115b260>] kmemdup+0x20/0x50
[    1.144058]  [<ffffffff815147bb>] efi_pstore_read_func.part.1+0x12b/0x170
[    1.144058]  [<ffffffff81514800>] ? efi_pstore_read_func.part.1+0x170/0x170
[    1.144058]  [<ffffffff815148b4>] efi_pstore_read_func+0xb4/0xe0
[    1.144058]  [<ffffffff81512b7b>] __efivar_entry_iter+0xfb/0x120
[    1.144058]  [<ffffffff8151428f>] efi_pstore_read+0x3f/0x50
[    1.144058]  [<ffffffff8128d7ba>] pstore_get_records+0x9a/0x150
[    1.158207]  [<ffffffff812af25c>] ? selinux_d_instantiate+0x1c/0x20
[    1.158207]  [<ffffffff8128ce30>] ? parse_options+0x80/0x80
[    1.158207]  [<ffffffff8128ced5>] pstore_fill_super+0xa5/0xc0
[    1.158207]  [<ffffffff811ae7d2>] mount_single+0xa2/0xd0
[    1.158207]  [<ffffffff8128ccf8>] pstore_mount+0x18/0x20
[    1.158207]  [<ffffffff811ae8b9>] mount_fs+0x39/0x1b0
[    1.158207]  [<ffffffff81160550>] ? __alloc_percpu+0x10/0x20
[    1.158207]  [<ffffffff811c9493>] vfs_kern_mount+0x63/0xf0
[    1.158207]  [<ffffffff811cbb0e>] do_mount+0x23e/0xa20
[    1.158207]  [<ffffffff8115b51b>] ? strndup_user+0x4b/0xf0
[    1.158207]  [<ffffffff811cc373>] SyS_mount+0x83/0xc0
[    1.158207]  [<ffffffff81673cc2>] system_call_fastpath+0x16/0x1b
[    1.158207] ---[ end trace 61981bc62de9f6f4 ]---

Signed-off-by: Seiji Aguchi <seiji.aguchi@hds.com>
Tested-by: Madper Xie <cxie@redhat.com>
Cc: stable@kernel.org
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 drivers/firmware/efi/efi-pstore.c | 143 +++++++++++++++++++++++++++++++++++---
 drivers/firmware/efi/efivars.c    |  12 ++--
 drivers/firmware/efi/vars.c       |  12 +++-
 include/linux/efi.h               |   4 ++
 4 files changed, 155 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/firmware/efi/efi-pstore.c b/drivers/firmware/efi/efi-pstore.c
index 5002d50e3781..6ce31e93da55 100644
--- a/drivers/firmware/efi/efi-pstore.c
+++ b/drivers/firmware/efi/efi-pstore.c
@@ -18,14 +18,12 @@ module_param_named(pstore_disable, efivars_pstore_disable, bool, 0644);
 
 static int efi_pstore_open(struct pstore_info *psi)
 {
-	efivar_entry_iter_begin();
 	psi->data = NULL;
 	return 0;
 }
 
 static int efi_pstore_close(struct pstore_info *psi)
 {
-	efivar_entry_iter_end();
 	psi->data = NULL;
 	return 0;
 }
@@ -91,19 +89,125 @@ static int efi_pstore_read_func(struct efivar_entry *entry, void *data)
 	__efivar_entry_get(entry, &entry->var.Attributes,
 			   &entry->var.DataSize, entry->var.Data);
 	size = entry->var.DataSize;
+	memcpy(*cb_data->buf, entry->var.Data,
+	       (size_t)min_t(unsigned long, EFIVARS_DATA_SIZE_MAX, size));
 
-	*cb_data->buf = kmemdup(entry->var.Data, size, GFP_KERNEL);
-	if (*cb_data->buf == NULL)
-		return -ENOMEM;
 	return size;
 }
 
+/**
+ * efi_pstore_scan_sysfs_enter
+ * @entry: scanning entry
+ * @next: next entry
+ * @head: list head
+ */
+static void efi_pstore_scan_sysfs_enter(struct efivar_entry *pos,
+					struct efivar_entry *next,
+					struct list_head *head)
+{
+	pos->scanning = true;
+	if (&next->list != head)
+		next->scanning = true;
+}
+
+/**
+ * __efi_pstore_scan_sysfs_exit
+ * @entry: deleting entry
+ * @turn_off_scanning: Check if a scanning flag should be turned off
+ */
+static inline void __efi_pstore_scan_sysfs_exit(struct efivar_entry *entry,
+						bool turn_off_scanning)
+{
+	if (entry->deleting) {
+		list_del(&entry->list);
+		efivar_entry_iter_end();
+		efivar_unregister(entry);
+		efivar_entry_iter_begin();
+	} else if (turn_off_scanning)
+		entry->scanning = false;
+}
+
+/**
+ * efi_pstore_scan_sysfs_exit
+ * @pos: scanning entry
+ * @next: next entry
+ * @head: list head
+ * @stop: a flag checking if scanning will stop
+ */
+static void efi_pstore_scan_sysfs_exit(struct efivar_entry *pos,
+				       struct efivar_entry *next,
+				       struct list_head *head, bool stop)
+{
+	__efi_pstore_scan_sysfs_exit(pos, true);
+	if (stop)
+		__efi_pstore_scan_sysfs_exit(next, &next->list != head);
+}
+
+/**
+ * efi_pstore_sysfs_entry_iter
+ *
+ * @data: function-specific data to pass to callback
+ * @pos: entry to begin iterating from
+ *
+ * You MUST call efivar_enter_iter_begin() before this function, and
+ * efivar_entry_iter_end() afterwards.
+ *
+ * It is possible to begin iteration from an arbitrary entry within
+ * the list by passing @pos. @pos is updated on return to point to
+ * the next entry of the last one passed to efi_pstore_read_func().
+ * To begin iterating from the beginning of the list @pos must be %NULL.
+ */
+static int efi_pstore_sysfs_entry_iter(void *data, struct efivar_entry **pos)
+{
+	struct efivar_entry *entry, *n;
+	struct list_head *head = &efivar_sysfs_list;
+	int size = 0;
+
+	if (!*pos) {
+		list_for_each_entry_safe(entry, n, head, list) {
+			efi_pstore_scan_sysfs_enter(entry, n, head);
+
+			size = efi_pstore_read_func(entry, data);
+			efi_pstore_scan_sysfs_exit(entry, n, head, size < 0);
+			if (size)
+				break;
+		}
+		*pos = n;
+		return size;
+	}
+
+	list_for_each_entry_safe_from((*pos), n, head, list) {
+		efi_pstore_scan_sysfs_enter((*pos), n, head);
+
+		size = efi_pstore_read_func((*pos), data);
+		efi_pstore_scan_sysfs_exit((*pos), n, head, size < 0);
+		if (size)
+			break;
+	}
+	*pos = n;
+	return size;
+}
+
+/**
+ * efi_pstore_read
+ *
+ * This function returns a size of NVRAM entry logged via efi_pstore_write().
+ * The meaning and behavior of efi_pstore/pstore are as below.
+ *
+ * size > 0: Got data of an entry logged via efi_pstore_write() successfully,
+ *           and pstore filesystem will continue reading subsequent entries.
+ * size == 0: Entry was not logged via efi_pstore_write(),
+ *            and efi_pstore driver will continue reading subsequent entries.
+ * size < 0: Failed to get data of entry logging via efi_pstore_write(),
+ *           and pstore will stop reading entry.
+ */
 static ssize_t efi_pstore_read(u64 *id, enum pstore_type_id *type,
 			       int *count, struct timespec *timespec,
 			       char **buf, bool *compressed,
 			       struct pstore_info *psi)
 {
 	struct pstore_read_data data;
+	ssize_t size;
 
 	data.id = id;
 	data.type = type;
@@ -112,8 +216,17 @@ static ssize_t efi_pstore_read(u64 *id, enum pstore_type_id *type,
 	data.compressed = compressed;
 	data.buf = buf;
 
-	return __efivar_entry_iter(efi_pstore_read_func, &efivar_sysfs_list, &data,
-				   (struct efivar_entry **)&psi->data);
+	*data.buf = kzalloc(EFIVARS_DATA_SIZE_MAX, GFP_KERNEL);
+	if (!*data.buf)
+		return -ENOMEM;
+
+	efivar_entry_iter_begin();
+	size = efi_pstore_sysfs_entry_iter(&data,
+					   (struct efivar_entry **)&psi->data);
+	efivar_entry_iter_end();
+	if (size <= 0)
+		kfree(*data.buf);
+	return size;
 }
 
 static int efi_pstore_write(enum pstore_type_id type,
@@ -184,9 +297,17 @@ static int efi_pstore_erase_func(struct efivar_entry *entry, void *data)
 			return 0;
 	}
 
+	if (entry->scanning) {
+		/*
+		 * Skip deletion because this entry will be deleted
+		 * after scanning is completed.
+		 */
+		entry->deleting = true;
+	} else
+		list_del(&entry->list);
+
 	/* found */
 	__efivar_entry_delete(entry);
-	list_del(&entry->list);
 
 	return 1;
 }
@@ -214,10 +335,12 @@ static int efi_pstore_erase(enum pstore_type_id type, u64 id, int count,
 
 	efivar_entry_iter_begin();
 	found = __efivar_entry_iter(efi_pstore_erase_func, &efivar_sysfs_list, &edata, &entry);
-	efivar_entry_iter_end();
 
-	if (found)
+	if (found && !entry->scanning) {
+		efivar_entry_iter_end();
 		efivar_unregister(entry);
+	} else
+		efivar_entry_iter_end();
 
 	return 0;
 }
diff --git a/drivers/firmware/efi/efivars.c b/drivers/firmware/efi/efivars.c
index 933eb027d527..3dc248239197 100644
--- a/drivers/firmware/efi/efivars.c
+++ b/drivers/firmware/efi/efivars.c
@@ -383,12 +383,16 @@ static ssize_t efivar_delete(struct file *filp, struct kobject *kobj,
 	else if (__efivar_entry_delete(entry))
 		err = -EIO;
 
-	efivar_entry_iter_end();
-
-	if (err)
+	if (err) {
+		efivar_entry_iter_end();
 		return err;
+	}
 
-	efivar_unregister(entry);
+	if (!entry->scanning) {
+		efivar_entry_iter_end();
+		efivar_unregister(entry);
+	} else
+		efivar_entry_iter_end();
 
 	/* It's dead Jim.... */
 	return count;
diff --git a/drivers/firmware/efi/vars.c b/drivers/firmware/efi/vars.c
index 391c67b182d9..b22659cccca4 100644
--- a/drivers/firmware/efi/vars.c
+++ b/drivers/firmware/efi/vars.c
@@ -683,8 +683,16 @@ struct efivar_entry *efivar_entry_find(efi_char16_t *name, efi_guid_t guid,
 	if (!found)
 		return NULL;
 
-	if (remove)
-		list_del(&entry->list);
+	if (remove) {
+		if (entry->scanning) {
+			/*
+			 * The entry will be deleted
+			 * after scanning is completed.
+			 */
+			entry->deleting = true;
+		} else
+			list_del(&entry->list);
+	}
 
 	return entry;
 }
diff --git a/include/linux/efi.h b/include/linux/efi.h
index bc5687d0f315..11ce6784a196 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -801,6 +801,8 @@ struct efivar_entry {
 	struct efi_variable var;
 	struct list_head list;
 	struct kobject kobj;
+	bool scanning;
+	bool deleting;
 };
 
 
@@ -866,6 +868,8 @@ void efivar_run_worker(void);
 #if defined(CONFIG_EFI_VARS) || defined(CONFIG_EFI_VARS_MODULE)
 int efivars_sysfs_init(void);
 
+#define EFIVARS_DATA_SIZE_MAX 1024
+
 #endif /* CONFIG_EFI_VARS */
 
 #endif /* _LINUX_EFI_H */
-- 
cgit v1.2.3


From 23fd78d76415729b338ff1802a0066b4a62f7fb8 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 2 Dec 2013 11:24:18 +0000
Subject: KEYS: Fix multiple key add into associative array

If sufficient keys (or keyrings) are added into a keyring such that a node in
the associative array's tree overflows (each node has a capacity N, currently
16) and such that all N+1 keys have the same index key segment for that level
of the tree (the level'th nibble of the index key), then assoc_array_insert()
calls ops->diff_objects() to indicate at which bit position the two index keys
vary.

However, __key_link_begin() passes a NULL object to assoc_array_insert() with
the intention of supplying the correct pointer later before we commit the
change.  This means that keyring_diff_objects() is given a NULL pointer as one
of its arguments which it does not expect.  This results in an oops like the
attached.

With the previous patch to fix the keyring hash function, this can be forced
much more easily by creating a keyring and only adding keyrings to it.  Add any
other sort of key and a different insertion path is taken - all 16+1 objects
must want to cluster in the same node slot.

This can be tested by:

	r=`keyctl newring sandbox @s`
	for ((i=0; i<=16; i++)); do keyctl newring ring$i $r; done

This should work fine, but oopses when the 17th keyring is added.

Since ops->diff_objects() is always called with the first pointer pointing to
the object to be inserted (ie. the NULL pointer), we can fix the problem by
changing the to-be-inserted object pointer to point to the index key passed
into assoc_array_insert() instead.

Whilst we're at it, we also switch the arguments so that they are the same as
for ->compare_object().

BUG: unable to handle kernel NULL pointer dereference at 0000000000000088
IP: [<ffffffff81191ee4>] hash_key_type_and_desc+0x18/0xb0
...
RIP: 0010:[<ffffffff81191ee4>] hash_key_type_and_desc+0x18/0xb0
...
Call Trace:
 [<ffffffff81191f9d>] keyring_diff_objects+0x21/0xd2
 [<ffffffff811f09ef>] assoc_array_insert+0x3b6/0x908
 [<ffffffff811929a7>] __key_link_begin+0x78/0xe5
 [<ffffffff81191a2e>] key_create_or_update+0x17d/0x36a
 [<ffffffff81192e0a>] SyS_add_key+0x123/0x183
 [<ffffffff81400ddb>] tracesys+0xdd/0xe2

Signed-off-by: David Howells <dhowells@redhat.com>
Tested-by: Stephen Gallagher <sgallagh@redhat.com>
---
 Documentation/assoc_array.txt | 6 +++---
 include/linux/assoc_array.h   | 6 +++---
 lib/assoc_array.c             | 4 ++--
 security/keys/keyring.c       | 7 +++----
 4 files changed, 11 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/assoc_array.txt b/Documentation/assoc_array.txt
index f4faec0f66e4..2f2c6cdd73c0 100644
--- a/Documentation/assoc_array.txt
+++ b/Documentation/assoc_array.txt
@@ -164,10 +164,10 @@ This points to a number of methods, all of which need to be provided:
 
  (4) Diff the index keys of two objects.
 
-	int (*diff_objects)(const void *a, const void *b);
+	int (*diff_objects)(const void *object, const void *index_key);
 
-     Return the bit position at which the index keys of two objects differ or
-     -1 if they are the same.
+     Return the bit position at which the index key of the specified object
+     differs from the given index key or -1 if they are the same.
 
 
  (5) Free an object.
diff --git a/include/linux/assoc_array.h b/include/linux/assoc_array.h
index 9a193b84238a..a89df3be1686 100644
--- a/include/linux/assoc_array.h
+++ b/include/linux/assoc_array.h
@@ -41,10 +41,10 @@ struct assoc_array_ops {
 	/* Is this the object we're looking for? */
 	bool (*compare_object)(const void *object, const void *index_key);
 
-	/* How different are two objects, to a bit position in their keys? (or
-	 * -1 if they're the same)
+	/* How different is an object from an index key, to a bit position in
+	 * their keys? (or -1 if they're the same)
 	 */
-	int (*diff_objects)(const void *a, const void *b);
+	int (*diff_objects)(const void *object, const void *index_key);
 
 	/* Method to free an object. */
 	void (*free_object)(void *object);
diff --git a/lib/assoc_array.c b/lib/assoc_array.c
index 17edeaf19180..1b6a44f1ec3e 100644
--- a/lib/assoc_array.c
+++ b/lib/assoc_array.c
@@ -759,8 +759,8 @@ all_leaves_cluster_together:
 	pr_devel("all leaves cluster together\n");
 	diff = INT_MAX;
 	for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
-		int x = ops->diff_objects(assoc_array_ptr_to_leaf(edit->leaf),
-					  assoc_array_ptr_to_leaf(node->slots[i]));
+		int x = ops->diff_objects(assoc_array_ptr_to_leaf(node->slots[i]),
+					  index_key);
 		if (x < diff) {
 			BUG_ON(x < 0);
 			diff = x;
diff --git a/security/keys/keyring.c b/security/keys/keyring.c
index 0adbc77a59b9..3dd8445cd489 100644
--- a/security/keys/keyring.c
+++ b/security/keys/keyring.c
@@ -279,12 +279,11 @@ static bool keyring_compare_object(const void *object, const void *data)
  * Compare the index keys of a pair of objects and determine the bit position
  * at which they differ - if they differ.
  */
-static int keyring_diff_objects(const void *_a, const void *_b)
+static int keyring_diff_objects(const void *object, const void *data)
 {
-	const struct key *key_a = keyring_ptr_to_key(_a);
-	const struct key *key_b = keyring_ptr_to_key(_b);
+	const struct key *key_a = keyring_ptr_to_key(object);
 	const struct keyring_index_key *a = &key_a->index_key;
-	const struct keyring_index_key *b = &key_b->index_key;
+	const struct keyring_index_key *b = data;
 	unsigned long seg_a, seg_b;
 	int level, i;
 
-- 
cgit v1.2.3


From c7277090927a5e71871e799a355ed2940f6c8fc6 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Mon, 2 Dec 2013 11:24:19 +0000
Subject: security: shmem: implement kernel private shmem inodes

We have a problem where the big_key key storage implementation uses a
shmem backed inode to hold the key contents.  Because of this detail of
implementation LSM checks are being done between processes trying to
read the keys and the tmpfs backed inode.  The LSM checks are already
being handled on the key interface level and should not be enforced at
the inode level (since the inode is an implementation detail, not a
part of the security model)

This patch implements a new function shmem_kernel_file_setup() which
returns the equivalent to shmem_file_setup() only the underlying inode
has S_PRIVATE set.  This means that all LSM checks for the inode in
question are skipped.  It should only be used for kernel internal
operations where the inode is not exposed to userspace without proper
LSM checking.  It is possible that some other users of
shmem_file_setup() should use the new interface, but this has not been
explored.

Reproducing this bug is a little bit difficult.  The steps I used on
Fedora are:

 (1) Turn off selinux enforcing:

	setenforce 0

 (2) Create a huge key

	k=`dd if=/dev/zero bs=8192 count=1 | keyctl padd big_key test-key @s`

 (3) Access the key in another context:

	runcon system_u:system_r:httpd_t:s0-s0:c0.c1023 keyctl print $k >/dev/null

 (4) Examine the audit logs:

	ausearch -m AVC -i --subject httpd_t | audit2allow

If the last command's output includes a line that looks like:

	allow httpd_t user_tmpfs_t:file { open read };

There was an inode check between httpd and the tmpfs filesystem.  With
this patch no such denial will be seen.  (NOTE! you should clear your
audit log if you have tested for this previously)

(Please return you box to enforcing)

Signed-off-by: Eric Paris <eparis@redhat.com>
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Hugh Dickins <hughd@google.com>
cc: linux-mm@kvack.org
---
 include/linux/shmem_fs.h |  2 ++
 mm/shmem.c               | 36 +++++++++++++++++++++++++++++-------
 security/keys/big_key.c  |  2 +-
 3 files changed, 32 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index 30aa0dc60d75..9d55438bc4ad 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -47,6 +47,8 @@ extern int shmem_init(void);
 extern int shmem_fill_super(struct super_block *sb, void *data, int silent);
 extern struct file *shmem_file_setup(const char *name,
 					loff_t size, unsigned long flags);
+extern struct file *shmem_kernel_file_setup(const char *name, loff_t size,
+					    unsigned long flags);
 extern int shmem_zero_setup(struct vm_area_struct *);
 extern int shmem_lock(struct file *file, int lock, struct user_struct *user);
 extern void shmem_unlock_mapping(struct address_space *mapping);
diff --git a/mm/shmem.c b/mm/shmem.c
index 8297623fcaed..902a14842b74 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2918,13 +2918,8 @@ static struct dentry_operations anon_ops = {
 	.d_dname = simple_dname
 };
 
-/**
- * shmem_file_setup - get an unlinked file living in tmpfs
- * @name: name for dentry (to be seen in /proc/<pid>/maps
- * @size: size to be set for the file
- * @flags: VM_NORESERVE suppresses pre-accounting of the entire object size
- */
-struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags)
+static struct file *__shmem_file_setup(const char *name, loff_t size,
+				       unsigned long flags, unsigned int i_flags)
 {
 	struct file *res;
 	struct inode *inode;
@@ -2957,6 +2952,7 @@ struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags
 	if (!inode)
 		goto put_dentry;
 
+	inode->i_flags |= i_flags;
 	d_instantiate(path.dentry, inode);
 	inode->i_size = size;
 	clear_nlink(inode);	/* It is unlinked */
@@ -2977,6 +2973,32 @@ put_memory:
 	shmem_unacct_size(flags, size);
 	return res;
 }
+
+/**
+ * shmem_kernel_file_setup - get an unlinked file living in tmpfs which must be
+ * 	kernel internal.  There will be NO LSM permission checks against the
+ * 	underlying inode.  So users of this interface must do LSM checks at a
+ * 	higher layer.  The one user is the big_key implementation.  LSM checks
+ * 	are provided at the key level rather than the inode level.
+ * @name: name for dentry (to be seen in /proc/<pid>/maps
+ * @size: size to be set for the file
+ * @flags: VM_NORESERVE suppresses pre-accounting of the entire object size
+ */
+struct file *shmem_kernel_file_setup(const char *name, loff_t size, unsigned long flags)
+{
+	return __shmem_file_setup(name, size, flags, S_PRIVATE);
+}
+
+/**
+ * shmem_file_setup - get an unlinked file living in tmpfs
+ * @name: name for dentry (to be seen in /proc/<pid>/maps
+ * @size: size to be set for the file
+ * @flags: VM_NORESERVE suppresses pre-accounting of the entire object size
+ */
+struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags)
+{
+	return __shmem_file_setup(name, size, flags, 0);
+}
 EXPORT_SYMBOL_GPL(shmem_file_setup);
 
 /**
diff --git a/security/keys/big_key.c b/security/keys/big_key.c
index 7f44c3207a9b..8137b27d641d 100644
--- a/security/keys/big_key.c
+++ b/security/keys/big_key.c
@@ -70,7 +70,7 @@ int big_key_instantiate(struct key *key, struct key_preparsed_payload *prep)
 		 *
 		 * TODO: Encrypt the stored data with a temporary key.
 		 */
-		file = shmem_file_setup("", datalen, 0);
+		file = shmem_kernel_file_setup("", datalen, 0);
 		if (IS_ERR(file)) {
 			ret = PTR_ERR(file);
 			goto err_quota;
-- 
cgit v1.2.3


From 35773dac5f862cb1c82ea151eba3e2f6de51ec3e Mon Sep 17 00:00:00 2001
From: David Laight <David.Laight@ACULAB.COM>
Date: Mon, 11 Nov 2013 12:26:54 +0000
Subject: usb: xhci: Link TRB must not occur within a USB payload burst

Section 4.11.7.1 of rev 1.0 of the xhci specification states that a link TRB
can only occur at a boundary between underlying USB frames (512 bytes for
high speed devices).

If this isn't done the USB frames aren't formatted correctly and, for example,
the USB3 ethernet ax88179_178a card will stop sending (while still receiving)
when running a netperf tcp transmit test with (say) and 8k buffer.

This should be a candidate for stable, the ax88179_178a driver defaults to
gso and tso enabled so it passes a lot of fragmented skb to the USB stack.

Notes from Sarah:

Discussion: http://marc.info/?l=linux-usb&m=138384509604981&w=2

This patch fixes a long-standing xHCI driver bug that was revealed by a
change in 3.12 in the usb-net driver.  Commit
638c5115a794981441246fa8fa5d95c1875af5ba "USBNET: support DMA SG" added
support to use bulk endpoint scatter-gather (urb->sg).  Only the USB
ethernet drivers trigger this bug, because the mass storage driver sends
sg list entries in page-sized chunks.

This patch only fixes the issue for bulk endpoint scatter-gather.  The
problem will still occur for periodic endpoints, because hosts will
interpret no-op transfers as a request to skip a service interval, which
is not what we want.

Luckily, the USB core isn't set up for scatter-gather on isochronous
endpoints, and no USB drivers use scatter-gather for interrupt
endpoints.  Document this known limitation so that developers won't try
to use urb->sg for interrupt endpoints until this issue is fixed.  The
more comprehensive fix would be to allow link TRBs in the middle of the
endpoint ring and revert this patch, but that fix would touch too much
code to be allowed in for stable.

This patch should be backported to kernels as old as 3.12, that contain
the commit 638c5115a794981441246fa8fa5d95c1875af5ba "USBNET: support DMA
SG".  Without this patch, the USB network device gets wedged, and stops
sending packets.  Mark Lord confirms this patch fixes the regression:

http://marc.info/?l=linux-netdev&m=138487107625966&w=2

Signed-off-by: David Laight <david.laight@aculab.com>
Signed-off-by: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Tested-by: Mark Lord <mlord@pobox.com>
Cc: stable@vger.kernel.org
---
 drivers/usb/host/xhci-ring.c | 54 ++++++++++++++++++++++++++++++++++++++++++--
 include/linux/usb.h          |  2 ++
 2 files changed, 54 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index 1e2f3f495843..53c2e296467f 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -2973,8 +2973,58 @@ static int prepare_ring(struct xhci_hcd *xhci, struct xhci_ring *ep_ring,
 	}
 
 	while (1) {
-		if (room_on_ring(xhci, ep_ring, num_trbs))
-			break;
+		if (room_on_ring(xhci, ep_ring, num_trbs)) {
+			union xhci_trb *trb = ep_ring->enqueue;
+			unsigned int usable = ep_ring->enq_seg->trbs +
+					TRBS_PER_SEGMENT - 1 - trb;
+			u32 nop_cmd;
+
+			/*
+			 * Section 4.11.7.1 TD Fragments states that a link
+			 * TRB must only occur at the boundary between
+			 * data bursts (eg 512 bytes for 480M).
+			 * While it is possible to split a large fragment
+			 * we don't know the size yet.
+			 * Simplest solution is to fill the trb before the
+			 * LINK with nop commands.
+			 */
+			if (num_trbs == 1 || num_trbs <= usable || usable == 0)
+				break;
+
+			if (ep_ring->type != TYPE_BULK)
+				/*
+				 * While isoc transfers might have a buffer that
+				 * crosses a 64k boundary it is unlikely.
+				 * Since we can't add NOPs without generating
+				 * gaps in the traffic just hope it never
+				 * happens at the end of the ring.
+				 * This could be fixed by writing a LINK TRB
+				 * instead of the first NOP - however the
+				 * TRB_TYPE_LINK_LE32() calls would all need
+				 * changing to check the ring length.
+				 */
+				break;
+
+			if (num_trbs >= TRBS_PER_SEGMENT) {
+				xhci_err(xhci, "Too many fragments %d, max %d\n",
+						num_trbs, TRBS_PER_SEGMENT - 1);
+				return -ENOMEM;
+			}
+
+			nop_cmd = cpu_to_le32(TRB_TYPE(TRB_TR_NOOP) |
+					ep_ring->cycle_state);
+			ep_ring->num_trbs_free -= usable;
+			do {
+				trb->generic.field[0] = 0;
+				trb->generic.field[1] = 0;
+				trb->generic.field[2] = 0;
+				trb->generic.field[3] = nop_cmd;
+				trb++;
+			} while (--usable);
+			ep_ring->enqueue = trb;
+			if (room_on_ring(xhci, ep_ring, num_trbs))
+				break;
+		}
 
 		if (ep_ring == xhci->cmd_ring) {
 			xhci_err(xhci, "Do not support expand command ring\n");
diff --git a/include/linux/usb.h b/include/linux/usb.h
index 7454865ad148..512ab162832c 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -1264,6 +1264,8 @@ typedef void (*usb_complete_t)(struct urb *);
  * @sg: scatter gather buffer list, the buffer size of each element in
  * 	the list (except the last) must be divisible by the endpoint's
  * 	max packet size if no_sg_constraint isn't set in 'struct usb_bus'
+ * 	(FIXME: scatter-gather under xHCI is broken for periodic transfers.
+ * 	Do not use urb->sg for interrupt endpoints for now, only bulk.)
  * @num_mapped_sgs: (internal) number of mapped sg entries
  * @num_sgs: number of entries in the sg list
  * @transfer_buffer_length: How big is transfer_buffer.  The transfer may
-- 
cgit v1.2.3


From 8496e85c20e7836b3dec97780e40f420a3ae2801 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Sun, 1 Dec 2013 02:34:37 +0100
Subject: PCI / tg3: Give up chip reset and carrier loss handling if PCI device
 is not present

Modify tg3_chip_reset() and tg3_close() to check if the PCI network
adapter device is accessible at all in order to skip poking it or
trying to handle a carrier loss in vain when that's not the case.
Introduce a special PCI helper function pci_device_is_present()
for this purpose.

Of course, this uncovers the lack of the appropriate RTNL locking
in tg3_suspend() and tg3_resume(), so add that locking in there
too.

These changes prevent tg3 from burning a CPU at 100% load level for
solid several seconds after the Thunderbolt link is disconnected from
a Matrox DS1 docking station.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Michael Chan <mchan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/tg3.c | 26 +++++++++++++++++++-------
 drivers/pci/pci.c                   |  8 ++++++++
 include/linux/pci.h                 |  1 +
 3 files changed, 28 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index 369b736dde05..472305cdff4f 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -8932,6 +8932,9 @@ static int tg3_chip_reset(struct tg3 *tp)
 	void (*write_op)(struct tg3 *, u32, u32);
 	int i, err;
 
+	if (!pci_device_is_present(tp->pdev))
+		return -ENODEV;
+
 	tg3_nvram_lock(tp);
 
 	tg3_ape_lock(tp, TG3_APE_LOCK_GRC);
@@ -11581,10 +11584,11 @@ static int tg3_close(struct net_device *dev)
 	memset(&tp->net_stats_prev, 0, sizeof(tp->net_stats_prev));
 	memset(&tp->estats_prev, 0, sizeof(tp->estats_prev));
 
-	tg3_power_down_prepare(tp);
-
-	tg3_carrier_off(tp);
+	if (pci_device_is_present(tp->pdev)) {
+		tg3_power_down_prepare(tp);
 
+		tg3_carrier_off(tp);
+	}
 	return 0;
 }
 
@@ -17726,10 +17730,12 @@ static int tg3_suspend(struct device *device)
 	struct pci_dev *pdev = to_pci_dev(device);
 	struct net_device *dev = pci_get_drvdata(pdev);
 	struct tg3 *tp = netdev_priv(dev);
-	int err;
+	int err = 0;
+
+	rtnl_lock();
 
 	if (!netif_running(dev))
-		return 0;
+		goto unlock;
 
 	tg3_reset_task_cancel(tp);
 	tg3_phy_stop(tp);
@@ -17771,6 +17777,8 @@ out:
 			tg3_phy_start(tp);
 	}
 
+unlock:
+	rtnl_unlock();
 	return err;
 }
 
@@ -17779,10 +17787,12 @@ static int tg3_resume(struct device *device)
 	struct pci_dev *pdev = to_pci_dev(device);
 	struct net_device *dev = pci_get_drvdata(pdev);
 	struct tg3 *tp = netdev_priv(dev);
-	int err;
+	int err = 0;
+
+	rtnl_lock();
 
 	if (!netif_running(dev))
-		return 0;
+		goto unlock;
 
 	netif_device_attach(dev);
 
@@ -17806,6 +17816,8 @@ out:
 	if (!err)
 		tg3_phy_start(tp);
 
+unlock:
+	rtnl_unlock();
 	return err;
 }
 #endif /* CONFIG_PM_SLEEP */
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 33120d156668..07369f32e8bb 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -4165,6 +4165,14 @@ int pci_set_vga_state(struct pci_dev *dev, bool decode,
 	return 0;
 }
 
+bool pci_device_is_present(struct pci_dev *pdev)
+{
+	u32 v;
+
+	return pci_bus_read_dev_vendor_id(pdev->bus, pdev->devfn, &v, 0);
+}
+EXPORT_SYMBOL_GPL(pci_device_is_present);
+
 #define RESOURCE_ALIGNMENT_PARAM_SIZE COMMAND_LINE_SIZE
 static char resource_alignment_param[RESOURCE_ALIGNMENT_PARAM_SIZE] = {0};
 static DEFINE_SPINLOCK(resource_alignment_lock);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 1084a15175e0..25a389566e47 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -960,6 +960,7 @@ void pci_update_resource(struct pci_dev *dev, int resno);
 int __must_check pci_assign_resource(struct pci_dev *dev, int i);
 int __must_check pci_reassign_resource(struct pci_dev *dev, int i, resource_size_t add_size, resource_size_t align);
 int pci_select_bars(struct pci_dev *dev, unsigned long flags);
+bool pci_device_is_present(struct pci_dev *pdev);
 
 /* ROM control related routines */
 int pci_enable_rom(struct pci_dev *pdev);
-- 
cgit v1.2.3


From 9f740ffa8134aaef770f964485dac3ed6780d8b7 Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Wed, 27 Nov 2013 22:19:00 +0000
Subject: HID: hid-sensor-hub: Add logical min and max

Exporting logical minimum and maximum of HID fields as part of the
hid sensor attribute info. This can be used for range checking and
to calculate enumeration base for NAry fields of HID sensor hub.

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/hid/hid-sensor-hub.c   | 20 ++++++++------------
 include/linux/hid-sensor-hub.h |  2 ++
 2 files changed, 10 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hid/hid-sensor-hub.c b/drivers/hid/hid-sensor-hub.c
index a184e1921c11..d87f7cb4bee5 100644
--- a/drivers/hid/hid-sensor-hub.c
+++ b/drivers/hid/hid-sensor-hub.c
@@ -112,13 +112,15 @@ static int sensor_hub_get_physical_device_count(
 
 static void sensor_hub_fill_attr_info(
 		struct hid_sensor_hub_attribute_info *info,
-		s32 index, s32 report_id, s32 units, s32 unit_expo, s32 size)
+		s32 index, s32 report_id, struct hid_field *field)
 {
 	info->index = index;
 	info->report_id = report_id;
-	info->units = units;
-	info->unit_expo = unit_expo;
-	info->size = size/8;
+	info->units = field->unit;
+	info->unit_expo = field->unit_exponent;
+	info->size = (field->report_size * field->report_count)/8;
+	info->logical_minimum = field->logical_minimum;
+	info->logical_maximum = field->logical_maximum;
 }
 
 static struct hid_sensor_hub_callbacks *sensor_hub_get_callback(
@@ -325,9 +327,7 @@ int sensor_hub_input_get_attribute_info(struct hid_sensor_hub_device *hsdev,
 			if (field->physical == usage_id &&
 				field->logical == attr_usage_id) {
 				sensor_hub_fill_attr_info(info, i, report->id,
-					field->unit, field->unit_exponent,
-					field->report_size *
-							field->report_count);
+							  field);
 				ret = 0;
 			} else {
 				for (j = 0; j < field->maxusage; ++j) {
@@ -336,11 +336,7 @@ int sensor_hub_input_get_attribute_info(struct hid_sensor_hub_device *hsdev,
 					field->usage[j].collection_index ==
 					collection_index) {
 						sensor_hub_fill_attr_info(info,
-							i, report->id,
-							field->unit,
-							field->unit_exponent,
-							field->report_size *
-							field->report_count);
+							  i, report->id, field);
 						ret = 0;
 						break;
 					}
diff --git a/include/linux/hid-sensor-hub.h b/include/linux/hid-sensor-hub.h
index 206a2af6b62b..b914ca3f57ba 100644
--- a/include/linux/hid-sensor-hub.h
+++ b/include/linux/hid-sensor-hub.h
@@ -42,6 +42,8 @@ struct hid_sensor_hub_attribute_info {
 	s32 units;
 	s32 unit_expo;
 	s32 size;
+	s32 logical_minimum;
+	s32 logical_maximum;
 };
 
 /**
-- 
cgit v1.2.3


From 751d17e23a9f7c8e0bca5c0b2e8d39af655ecd2a Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Wed, 27 Nov 2013 22:19:00 +0000
Subject: iio: hid-sensors: Fix power and report state

In the original HID sensor hub firmwares all Named array enums were
to 0-based. But the most recent hub implemented as 1-based,
because of the implementation by one of the major OS vendor.
Using logical minimum for the field as the base of enum. So we add
logical minimum to the selector values before setting those fields.
Some sensor hub FWs already changed logical minimum from 0 to 1
to reflect this and hope every other vendor will follow.
There is no easy way to add a common HID quirk for NAry elements,
even if the standard specifies these field as NAry, the collection
used to describe selectors is still just "logical".

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/common/hid-sensors/Kconfig              |  9 ---------
 drivers/iio/common/hid-sensors/hid-sensor-trigger.c | 20 +++++++++++++++-----
 include/linux/hid-sensor-ids.h                      | 12 ++++++++++++
 3 files changed, 27 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iio/common/hid-sensors/Kconfig b/drivers/iio/common/hid-sensors/Kconfig
index 1178121b55b0..39188b72cd3b 100644
--- a/drivers/iio/common/hid-sensors/Kconfig
+++ b/drivers/iio/common/hid-sensors/Kconfig
@@ -25,13 +25,4 @@ config HID_SENSOR_IIO_TRIGGER
 	  If this driver is compiled as a module, it will be named
 	  hid-sensor-trigger.
 
-config HID_SENSOR_ENUM_BASE_QUIRKS
-	bool "ENUM base quirks for HID Sensor IIO drivers"
-	depends on HID_SENSOR_IIO_COMMON
-	help
-	  Say yes here to build support for sensor hub FW using
-	  enumeration, which is using 1 as base instead of 0.
-	  Since logical minimum is still set 0 instead of 1,
-	  there is no easy way to differentiate.
-
 endmenu
diff --git a/drivers/iio/common/hid-sensors/hid-sensor-trigger.c b/drivers/iio/common/hid-sensors/hid-sensor-trigger.c
index bbd6426c9726..7dcf83998e6f 100644
--- a/drivers/iio/common/hid-sensors/hid-sensor-trigger.c
+++ b/drivers/iio/common/hid-sensors/hid-sensor-trigger.c
@@ -33,24 +33,34 @@ static int hid_sensor_data_rdy_trigger_set_state(struct iio_trigger *trig,
 {
 	struct hid_sensor_common *st = iio_trigger_get_drvdata(trig);
 	int state_val;
+	int report_val;
 
 	if (state) {
 		if (sensor_hub_device_open(st->hsdev))
 			return -EIO;
-	} else
+		state_val =
+		HID_USAGE_SENSOR_PROP_POWER_STATE_D0_FULL_POWER_ENUM;
+		report_val =
+		HID_USAGE_SENSOR_PROP_REPORTING_STATE_ALL_EVENTS_ENUM;
+
+	} else {
 		sensor_hub_device_close(st->hsdev);
+		state_val =
+		HID_USAGE_SENSOR_PROP_POWER_STATE_D4_POWER_OFF_ENUM;
+		report_val =
+		HID_USAGE_SENSOR_PROP_REPORTING_STATE_NO_EVENTS_ENUM;
+	}
 
-	state_val = state ? 1 : 0;
-	if (IS_ENABLED(CONFIG_HID_SENSOR_ENUM_BASE_QUIRKS))
-		++state_val;
 	st->data_ready = state;
+	state_val += st->power_state.logical_minimum;
+	report_val += st->report_state.logical_minimum;
 	sensor_hub_set_feature(st->hsdev, st->power_state.report_id,
 					st->power_state.index,
 					(s32)state_val);
 
 	sensor_hub_set_feature(st->hsdev, st->report_state.report_id,
 					st->report_state.index,
-					(s32)state_val);
+					(s32)report_val);
 
 	return 0;
 }
diff --git a/include/linux/hid-sensor-ids.h b/include/linux/hid-sensor-ids.h
index 4f945d3ed49f..8323775ac21d 100644
--- a/include/linux/hid-sensor-ids.h
+++ b/include/linux/hid-sensor-ids.h
@@ -117,4 +117,16 @@
 #define HID_USAGE_SENSOR_PROP_REPORT_STATE			0x200316
 #define HID_USAGE_SENSOR_PROY_POWER_STATE			0x200319
 
+/* Power state enumerations */
+#define HID_USAGE_SENSOR_PROP_POWER_STATE_UNDEFINED_ENUM		0x00
+#define HID_USAGE_SENSOR_PROP_POWER_STATE_D0_FULL_POWER_ENUM		0x01
+#define HID_USAGE_SENSOR_PROP_POWER_STATE_D1_LOW_POWER_ENUM		0x02
+#define HID_USAGE_SENSOR_PROP_POWER_STATE_D2_STANDBY_WITH_WAKE_ENUM	0x03
+#define HID_USAGE_SENSOR_PROP_POWER_STATE_D3_SLEEP_WITH_WAKE_ENUM	0x04
+#define HID_USAGE_SENSOR_PROP_POWER_STATE_D4_POWER_OFF_ENUM		0x05
+
+/* Report State enumerations */
+#define HID_USAGE_SENSOR_PROP_REPORTING_STATE_NO_EVENTS_ENUM		0x00
+#define HID_USAGE_SENSOR_PROP_REPORTING_STATE_ALL_EVENTS_ENUM		0x01
+
 #endif
-- 
cgit v1.2.3


From 7ce5a27f2ef8ed4f8f892f46391f933c0e2ac0f1 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 2 Dec 2013 17:26:05 -0500
Subject: Revert "net: Handle CHECKSUM_COMPLETE more adequately in
 pskb_trim_rcsum()."

This reverts commit 018c5bba052b3a383d83cf0c756da0e7bc748397.

It causes regressions for people using chips driven by the sungem
driver.  Suspicion is that the skb->csum value isn't being adjusted
properly.

The change also has a bug in that if __pskb_trim() fails, we'll leave
a corruped skb->csum value in there.  We would really need to revert
it to it's original value in that case.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 39 ++++++++++++++++++---------------------
 1 file changed, 18 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index bec1cc7d5e3c..215b5ea1cb30 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2263,6 +2263,24 @@ static inline void skb_postpull_rcsum(struct sk_buff *skb,
 
 unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len);
 
+/**
+ *	pskb_trim_rcsum - trim received skb and update checksum
+ *	@skb: buffer to trim
+ *	@len: new length
+ *
+ *	This is exactly the same as pskb_trim except that it ensures the
+ *	checksum of received packets are still valid after the operation.
+ */
+
+static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len)
+{
+	if (likely(len >= skb->len))
+		return 0;
+	if (skb->ip_summed == CHECKSUM_COMPLETE)
+		skb->ip_summed = CHECKSUM_NONE;
+	return __pskb_trim(skb, len);
+}
+
 #define skb_queue_walk(queue, skb) \
 		for (skb = (queue)->next;					\
 		     skb != (struct sk_buff *)(queue);				\
@@ -2360,27 +2378,6 @@ __wsum __skb_checksum(const struct sk_buff *skb, int offset, int len,
 __wsum skb_checksum(const struct sk_buff *skb, int offset, int len,
 		    __wsum csum);
 
-/**
- *	pskb_trim_rcsum - trim received skb and update checksum
- *	@skb: buffer to trim
- *	@len: new length
- *
- *	This is exactly the same as pskb_trim except that it ensures the
- *	checksum of received packets are still valid after the operation.
- */
-
-static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len)
-{
-	if (likely(len >= skb->len))
-		return 0;
-	if (skb->ip_summed == CHECKSUM_COMPLETE) {
-		__wsum adj = skb_checksum(skb, len, skb->len - len, 0);
-
-		skb->csum = csum_sub(skb->csum, adj);
-	}
-	return __pskb_trim(skb, len);
-}
-
 static inline void *skb_header_pointer(const struct sk_buff *skb, int offset,
 				       int len, void *buffer)
 {
-- 
cgit v1.2.3


From 471e42ad148c05d091219096726d751684ebf918 Mon Sep 17 00:00:00 2001
From: Thomas Pugliese <thomas.pugliese@gmail.com>
Date: Mon, 2 Dec 2013 15:39:45 -0600
Subject: usb: wusbcore: fix deadlock in wusbhc_gtk_rekey

When multiple wireless USB devices are connected and one of the devices
disconnects, the host will distribute a new group key to the remaining
devicese using wusbhc_gtk_rekey.  wusbhc_gtk_rekey takes the
wusbhc->mutex and holds it while it submits a URB to set the new key.
This causes a deadlock in wa_urb_enqueue when it calls a device lookup
helper function that takes the same lock.

This patch changes wusbhc_gtk_rekey to submit a work item to set the GTK
so that the URB is submitted without holding wusbhc->mutex.

Signed-off-by: Thomas Pugliese <thomas.pugliese@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/wusbcore/devconnect.c | 24 +---------
 drivers/usb/wusbcore/security.c   | 98 +++++++++++++++++++++++----------------
 drivers/usb/wusbcore/wusbhc.h     |  6 +--
 include/linux/usb/wusb.h          |  2 +
 4 files changed, 62 insertions(+), 68 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/wusbcore/devconnect.c b/drivers/usb/wusbcore/devconnect.c
index 5107ca999d4c..f14e7929ba22 100644
--- a/drivers/usb/wusbcore/devconnect.c
+++ b/drivers/usb/wusbcore/devconnect.c
@@ -97,18 +97,12 @@ static void wusbhc_devconnect_acked_work(struct work_struct *work);
 
 static void wusb_dev_free(struct wusb_dev *wusb_dev)
 {
-	if (wusb_dev) {
-		kfree(wusb_dev->set_gtk_req);
-		usb_free_urb(wusb_dev->set_gtk_urb);
-		kfree(wusb_dev);
-	}
+	kfree(wusb_dev);
 }
 
 static struct wusb_dev *wusb_dev_alloc(struct wusbhc *wusbhc)
 {
 	struct wusb_dev *wusb_dev;
-	struct urb *urb;
-	struct usb_ctrlrequest *req;
 
 	wusb_dev = kzalloc(sizeof(*wusb_dev), GFP_KERNEL);
 	if (wusb_dev == NULL)
@@ -118,22 +112,6 @@ static struct wusb_dev *wusb_dev_alloc(struct wusbhc *wusbhc)
 
 	INIT_WORK(&wusb_dev->devconnect_acked_work, wusbhc_devconnect_acked_work);
 
-	urb = usb_alloc_urb(0, GFP_KERNEL);
-	if (urb == NULL)
-		goto err;
-	wusb_dev->set_gtk_urb = urb;
-
-	req = kmalloc(sizeof(*req), GFP_KERNEL);
-	if (req == NULL)
-		goto err;
-	wusb_dev->set_gtk_req = req;
-
-	req->bRequestType = USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_DEVICE;
-	req->bRequest = USB_REQ_SET_DESCRIPTOR;
-	req->wValue = cpu_to_le16(USB_DT_KEY << 8 | wusbhc->gtk_index);
-	req->wIndex = 0;
-	req->wLength = cpu_to_le16(wusbhc->gtk.descr.bLength);
-
 	return wusb_dev;
 err:
 	wusb_dev_free(wusb_dev);
diff --git a/drivers/usb/wusbcore/security.c b/drivers/usb/wusbcore/security.c
index dd88441c8f78..4c40d0dbf53d 100644
--- a/drivers/usb/wusbcore/security.c
+++ b/drivers/usb/wusbcore/security.c
@@ -29,19 +29,16 @@
 #include <linux/export.h>
 #include "wusbhc.h"
 
-static void wusbhc_set_gtk_callback(struct urb *urb);
-static void wusbhc_gtk_rekey_done_work(struct work_struct *work);
+static void wusbhc_gtk_rekey_work(struct work_struct *work);
 
 int wusbhc_sec_create(struct wusbhc *wusbhc)
 {
 	wusbhc->gtk.descr.bLength = sizeof(wusbhc->gtk.descr) + sizeof(wusbhc->gtk.data);
 	wusbhc->gtk.descr.bDescriptorType = USB_DT_KEY;
 	wusbhc->gtk.descr.bReserved = 0;
+	wusbhc->gtk_index = 0;
 
-	wusbhc->gtk_index = wusb_key_index(0, WUSB_KEY_INDEX_TYPE_GTK,
-					   WUSB_KEY_INDEX_ORIGINATOR_HOST);
-
-	INIT_WORK(&wusbhc->gtk_rekey_done_work, wusbhc_gtk_rekey_done_work);
+	INIT_WORK(&wusbhc->gtk_rekey_work, wusbhc_gtk_rekey_work);
 
 	return 0;
 }
@@ -113,7 +110,7 @@ int wusbhc_sec_start(struct wusbhc *wusbhc)
 	wusbhc_generate_gtk(wusbhc);
 
 	result = wusbhc->set_gtk(wusbhc, wusbhc->gtk_tkid,
-				 &wusbhc->gtk.descr.bKeyData, key_size);
+				&wusbhc->gtk.descr.bKeyData, key_size);
 	if (result < 0)
 		dev_err(wusbhc->dev, "cannot set GTK for the host: %d\n",
 			result);
@@ -129,7 +126,7 @@ int wusbhc_sec_start(struct wusbhc *wusbhc)
  */
 void wusbhc_sec_stop(struct wusbhc *wusbhc)
 {
-	cancel_work_sync(&wusbhc->gtk_rekey_done_work);
+	cancel_work_sync(&wusbhc->gtk_rekey_work);
 }
 
 
@@ -185,12 +182,14 @@ static int wusb_dev_set_encryption(struct usb_device *usb_dev, int value)
 static int wusb_dev_set_gtk(struct wusbhc *wusbhc, struct wusb_dev *wusb_dev)
 {
 	struct usb_device *usb_dev = wusb_dev->usb_dev;
+	u8 key_index = wusb_key_index(wusbhc->gtk_index,
+		WUSB_KEY_INDEX_TYPE_GTK, WUSB_KEY_INDEX_ORIGINATOR_HOST);
 
 	return usb_control_msg(
 		usb_dev, usb_sndctrlpipe(usb_dev, 0),
 		USB_REQ_SET_DESCRIPTOR,
 		USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_DEVICE,
-		USB_DT_KEY << 8 | wusbhc->gtk_index, 0,
+		USB_DT_KEY << 8 | key_index, 0,
 		&wusbhc->gtk.descr, wusbhc->gtk.descr.bLength,
 		1000);
 }
@@ -520,24 +519,55 @@ error_kzalloc:
  * Once all connected and authenticated devices have received the new
  * GTK, switch the host to using it.
  */
-static void wusbhc_gtk_rekey_done_work(struct work_struct *work)
+static void wusbhc_gtk_rekey_work(struct work_struct *work)
 {
-	struct wusbhc *wusbhc = container_of(work, struct wusbhc, gtk_rekey_done_work);
+	struct wusbhc *wusbhc = container_of(work,
+					struct wusbhc, gtk_rekey_work);
 	size_t key_size = sizeof(wusbhc->gtk.data);
+	int port_idx;
+	struct wusb_dev *wusb_dev, *wusb_dev_next;
+	LIST_HEAD(rekey_list);
 
 	mutex_lock(&wusbhc->mutex);
+	/* generate the new key */
+	wusbhc_generate_gtk(wusbhc);
+	/* roll the gtk index. */
+	wusbhc->gtk_index = (wusbhc->gtk_index + 1) % (WUSB_KEY_INDEX_MAX + 1);
+	/*
+	 * Save all connected devices on a list while holding wusbhc->mutex and
+	 * take a reference to each one.  Then submit the set key request to
+	 * them after releasing the lock in order to avoid a deadlock.
+	 */
+	for (port_idx = 0; port_idx < wusbhc->ports_max; port_idx++) {
+		wusb_dev = wusbhc->port[port_idx].wusb_dev;
+		if (!wusb_dev || !wusb_dev->usb_dev
+			|| !wusb_dev->usb_dev->authenticated)
+			continue;
 
-	if (--wusbhc->pending_set_gtks == 0)
-		wusbhc->set_gtk(wusbhc, wusbhc->gtk_tkid, &wusbhc->gtk.descr.bKeyData, key_size);
-
+		wusb_dev_get(wusb_dev);
+		list_add_tail(&wusb_dev->rekey_node, &rekey_list);
+	}
 	mutex_unlock(&wusbhc->mutex);
-}
 
-static void wusbhc_set_gtk_callback(struct urb *urb)
-{
-	struct wusbhc *wusbhc = urb->context;
+	/* Submit the rekey requests without holding wusbhc->mutex. */
+	list_for_each_entry_safe(wusb_dev, wusb_dev_next, &rekey_list,
+		rekey_node) {
+		list_del_init(&wusb_dev->rekey_node);
+		dev_dbg(&wusb_dev->usb_dev->dev, "%s: rekey device at port %d\n",
+			__func__, wusb_dev->port_idx);
+
+		if (wusb_dev_set_gtk(wusbhc, wusb_dev) < 0) {
+			dev_err(&wusb_dev->usb_dev->dev, "%s: rekey device at port %d failed\n",
+				__func__, wusb_dev->port_idx);
+		}
+		wusb_dev_put(wusb_dev);
+	}
 
-	queue_work(wusbd, &wusbhc->gtk_rekey_done_work);
+	/* Switch the host controller to use the new GTK. */
+	mutex_lock(&wusbhc->mutex);
+	wusbhc->set_gtk(wusbhc, wusbhc->gtk_tkid,
+		&wusbhc->gtk.descr.bKeyData, key_size);
+	mutex_unlock(&wusbhc->mutex);
 }
 
 /**
@@ -553,26 +583,12 @@ static void wusbhc_set_gtk_callback(struct urb *urb)
  */
 void wusbhc_gtk_rekey(struct wusbhc *wusbhc)
 {
-	static const size_t key_size = sizeof(wusbhc->gtk.data);
-	int p;
-
-	wusbhc_generate_gtk(wusbhc);
-
-	for (p = 0; p < wusbhc->ports_max; p++) {
-		struct wusb_dev *wusb_dev;
-
-		wusb_dev = wusbhc->port[p].wusb_dev;
-		if (!wusb_dev || !wusb_dev->usb_dev || !wusb_dev->usb_dev->authenticated)
-			continue;
-
-		usb_fill_control_urb(wusb_dev->set_gtk_urb, wusb_dev->usb_dev,
-				     usb_sndctrlpipe(wusb_dev->usb_dev, 0),
-				     (void *)wusb_dev->set_gtk_req,
-				     &wusbhc->gtk.descr, wusbhc->gtk.descr.bLength,
-				     wusbhc_set_gtk_callback, wusbhc);
-		if (usb_submit_urb(wusb_dev->set_gtk_urb, GFP_KERNEL) == 0)
-			wusbhc->pending_set_gtks++;
-	}
-	if (wusbhc->pending_set_gtks == 0)
-		wusbhc->set_gtk(wusbhc, wusbhc->gtk_tkid, &wusbhc->gtk.descr.bKeyData, key_size);
+	/*
+	 * We need to submit a URB to the downstream WUSB devices in order to
+	 * change the group key.  This can't be done while holding the
+	 * wusbhc->mutex since that is also taken in the urb_enqueue routine
+	 * and will cause a deadlock.  Instead, queue a work item to do
+	 * it when the lock is not held
+	 */
+	queue_work(wusbd, &wusbhc->gtk_rekey_work);
 }
diff --git a/drivers/usb/wusbcore/wusbhc.h b/drivers/usb/wusbcore/wusbhc.h
index 711b1952b114..6bd3b819a6b5 100644
--- a/drivers/usb/wusbcore/wusbhc.h
+++ b/drivers/usb/wusbcore/wusbhc.h
@@ -97,6 +97,7 @@ struct wusb_dev {
 	struct kref refcnt;
 	struct wusbhc *wusbhc;
 	struct list_head cack_node;	/* Connect-Ack list */
+	struct list_head rekey_node;	/* GTK rekey list */
 	u8 port_idx;
 	u8 addr;
 	u8 beacon_type:4;
@@ -107,8 +108,6 @@ struct wusb_dev {
 	struct usb_wireless_cap_descriptor *wusb_cap_descr;
 	struct uwb_mas_bm availability;
 	struct work_struct devconnect_acked_work;
-	struct urb *set_gtk_urb;
-	struct usb_ctrlrequest *set_gtk_req;
 	struct usb_device *usb_dev;
 };
 
@@ -296,8 +295,7 @@ struct wusbhc {
 	} __attribute__((packed)) gtk;
 	u8 gtk_index;
 	u32 gtk_tkid;
-	struct work_struct gtk_rekey_done_work;
-	int pending_set_gtks;
+	struct work_struct gtk_rekey_work;
 
 	struct usb_encryption_descriptor *ccm1_etd;
 };
diff --git a/include/linux/usb/wusb.h b/include/linux/usb/wusb.h
index 0c4d4ca370ec..eeb28329fa3c 100644
--- a/include/linux/usb/wusb.h
+++ b/include/linux/usb/wusb.h
@@ -271,6 +271,8 @@ static inline u8 wusb_key_index(int index, int type, int originator)
 #define WUSB_KEY_INDEX_TYPE_GTK			2
 #define WUSB_KEY_INDEX_ORIGINATOR_HOST		0
 #define WUSB_KEY_INDEX_ORIGINATOR_DEVICE	1
+/* bits 0-3 used for the key index. */
+#define WUSB_KEY_INDEX_MAX			15
 
 /* A CCM Nonce, defined in WUSB1.0[6.4.1] */
 struct aes_ccm_nonce {
-- 
cgit v1.2.3


From c9a9972b6f093e4e2f81f58892a7523df894144d Mon Sep 17 00:00:00 2001
From: Alexandre Courbot <acourbot@nvidia.com>
Date: Mon, 25 Nov 2013 18:34:24 +0900
Subject: gpiolib: add missing declarations

Add declaration of 'struct of_phandle_args' to avoid the following
warning:

  In file included from arch/arm/mach-tegra/board-paz00.c:21:0:
  include/linux/gpio/driver.h:102:17: warning: 'struct of_phandle_args' declared inside parameter list
  include/linux/gpio/driver.h:102:17: warning: its scope is only this definition or declaration, which is probably not what you want

Also proactively add other definitions/includes that could be missing
in other contexts.

Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>
Reported-by: Stephen Warren <swarren@wwwdotorg.org>
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/gpio/driver.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index 82eac610ce1a..3ea2cf6b0e6c 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -2,9 +2,12 @@
 #define __LINUX_GPIO_DRIVER_H
 
 #include <linux/types.h>
+#include <linux/module.h>
 
 struct device;
 struct gpio_desc;
+struct of_phandle_args;
+struct device_node;
 struct seq_file;
 
 /**
-- 
cgit v1.2.3


From 1431fb31ecbaba1b5718006128f0f2ed0b94e1c3 Mon Sep 17 00:00:00 2001
From: Paul Durrant <Paul.Durrant@citrix.com>
Date: Tue, 3 Dec 2013 17:39:29 +0000
Subject: xen-netback: fix fragment detection in checksum setup

The code to detect fragments in checksum_setup() was missing for IPv4 and
too eager for IPv6. (It transpires that Windows seems to send IPv6 packets
with a fragment header even if they are not a fragment - i.e. offset is zero,
and M bit is not set).

This patch also incorporates a fix to callers of maybe_pull_tail() where
skb->network_header was being erroneously added to the length argument.

Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
Signed-off-by: Zoltan Kiss <zoltan.kiss@citrix.com>
Cc: Wei Liu <wei.liu2@citrix.com>
Cc: Ian Campbell <ian.campbell@citrix.com>
Cc: David Vrabel <david.vrabel@citrix.com>
cc: David Miller <davem@davemloft.net>
Acked-by: Wei Liu <wei.liu2@citrix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/xen-netback/netback.c | 236 ++++++++++++++++++++++----------------
 include/linux/ipv6.h              |   1 +
 include/net/ipv6.h                |   3 +-
 3 files changed, 140 insertions(+), 100 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index 64f0e0d18b81..acf13920e6d1 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -1149,49 +1149,72 @@ static int xenvif_set_skb_gso(struct xenvif *vif,
 	return 0;
 }
 
-static inline void maybe_pull_tail(struct sk_buff *skb, unsigned int len)
+static inline int maybe_pull_tail(struct sk_buff *skb, unsigned int len,
+				  unsigned int max)
 {
-	if (skb_is_nonlinear(skb) && skb_headlen(skb) < len) {
-		/* If we need to pullup then pullup to the max, so we
-		 * won't need to do it again.
-		 */
-		int target = min_t(int, skb->len, MAX_TCP_HEADER);
-		__pskb_pull_tail(skb, target - skb_headlen(skb));
-	}
+	if (skb_headlen(skb) >= len)
+		return 0;
+
+	/* If we need to pullup then pullup to the max, so we
+	 * won't need to do it again.
+	 */
+	if (max > skb->len)
+		max = skb->len;
+
+	if (__pskb_pull_tail(skb, max - skb_headlen(skb)) == NULL)
+		return -ENOMEM;
+
+	if (skb_headlen(skb) < len)
+		return -EPROTO;
+
+	return 0;
 }
 
+/* This value should be large enough to cover a tagged ethernet header plus
+ * maximally sized IP and TCP or UDP headers.
+ */
+#define MAX_IP_HDR_LEN 128
+
 static int checksum_setup_ip(struct xenvif *vif, struct sk_buff *skb,
 			     int recalculate_partial_csum)
 {
-	struct iphdr *iph = (void *)skb->data;
-	unsigned int header_size;
 	unsigned int off;
-	int err = -EPROTO;
+	bool fragment;
+	int err;
+
+	fragment = false;
+
+	err = maybe_pull_tail(skb,
+			      sizeof(struct iphdr),
+			      MAX_IP_HDR_LEN);
+	if (err < 0)
+		goto out;
 
-	off = sizeof(struct iphdr);
+	if (ip_hdr(skb)->frag_off & htons(IP_OFFSET | IP_MF))
+		fragment = true;
 
-	header_size = skb->network_header + off + MAX_IPOPTLEN;
-	maybe_pull_tail(skb, header_size);
+	off = ip_hdrlen(skb);
 
-	off = iph->ihl * 4;
+	err = -EPROTO;
 
-	switch (iph->protocol) {
+	switch (ip_hdr(skb)->protocol) {
 	case IPPROTO_TCP:
 		if (!skb_partial_csum_set(skb, off,
 					  offsetof(struct tcphdr, check)))
 			goto out;
 
 		if (recalculate_partial_csum) {
-			struct tcphdr *tcph = tcp_hdr(skb);
-
-			header_size = skb->network_header +
-				off +
-				sizeof(struct tcphdr);
-			maybe_pull_tail(skb, header_size);
-
-			tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
-							 skb->len - off,
-							 IPPROTO_TCP, 0);
+			err = maybe_pull_tail(skb,
+					      off + sizeof(struct tcphdr),
+					      MAX_IP_HDR_LEN);
+			if (err < 0)
+				goto out;
+
+			tcp_hdr(skb)->check =
+				~csum_tcpudp_magic(ip_hdr(skb)->saddr,
+						   ip_hdr(skb)->daddr,
+						   skb->len - off,
+						   IPPROTO_TCP, 0);
 		}
 		break;
 	case IPPROTO_UDP:
@@ -1200,24 +1223,20 @@ static int checksum_setup_ip(struct xenvif *vif, struct sk_buff *skb,
 			goto out;
 
 		if (recalculate_partial_csum) {
-			struct udphdr *udph = udp_hdr(skb);
-
-			header_size = skb->network_header +
-				off +
-				sizeof(struct udphdr);
-			maybe_pull_tail(skb, header_size);
-
-			udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
-							 skb->len - off,
-							 IPPROTO_UDP, 0);
+			err = maybe_pull_tail(skb,
+					      off + sizeof(struct udphdr),
+					      MAX_IP_HDR_LEN);
+			if (err < 0)
+				goto out;
+
+			udp_hdr(skb)->check =
+				~csum_tcpudp_magic(ip_hdr(skb)->saddr,
+						   ip_hdr(skb)->daddr,
+						   skb->len - off,
+						   IPPROTO_UDP, 0);
 		}
 		break;
 	default:
-		if (net_ratelimit())
-			netdev_err(vif->dev,
-				   "Attempting to checksum a non-TCP/UDP packet, "
-				   "dropping a protocol %d packet\n",
-				   iph->protocol);
 		goto out;
 	}
 
@@ -1227,75 +1246,99 @@ out:
 	return err;
 }
 
+/* This value should be large enough to cover a tagged ethernet header plus
+ * an IPv6 header, all options, and a maximal TCP or UDP header.
+ */
+#define MAX_IPV6_HDR_LEN 256
+
+#define OPT_HDR(type, skb, off) \
+	(type *)(skb_network_header(skb) + (off))
+
 static int checksum_setup_ipv6(struct xenvif *vif, struct sk_buff *skb,
 			       int recalculate_partial_csum)
 {
-	int err = -EPROTO;
-	struct ipv6hdr *ipv6h = (void *)skb->data;
+	int err;
 	u8 nexthdr;
-	unsigned int header_size;
 	unsigned int off;
+	unsigned int len;
 	bool fragment;
 	bool done;
 
+	fragment = false;
 	done = false;
 
 	off = sizeof(struct ipv6hdr);
 
-	header_size = skb->network_header + off;
-	maybe_pull_tail(skb, header_size);
+	err = maybe_pull_tail(skb, off, MAX_IPV6_HDR_LEN);
+	if (err < 0)
+		goto out;
 
-	nexthdr = ipv6h->nexthdr;
+	nexthdr = ipv6_hdr(skb)->nexthdr;
 
-	while ((off <= sizeof(struct ipv6hdr) + ntohs(ipv6h->payload_len)) &&
-	       !done) {
+	len = sizeof(struct ipv6hdr) + ntohs(ipv6_hdr(skb)->payload_len);
+	while (off <= len && !done) {
 		switch (nexthdr) {
 		case IPPROTO_DSTOPTS:
 		case IPPROTO_HOPOPTS:
 		case IPPROTO_ROUTING: {
-			struct ipv6_opt_hdr *hp = (void *)(skb->data + off);
+			struct ipv6_opt_hdr *hp;
 
-			header_size = skb->network_header +
-				off +
-				sizeof(struct ipv6_opt_hdr);
-			maybe_pull_tail(skb, header_size);
+			err = maybe_pull_tail(skb,
+					      off +
+					      sizeof(struct ipv6_opt_hdr),
+					      MAX_IPV6_HDR_LEN);
+			if (err < 0)
+				goto out;
 
+			hp = OPT_HDR(struct ipv6_opt_hdr, skb, off);
 			nexthdr = hp->nexthdr;
 			off += ipv6_optlen(hp);
 			break;
 		}
 		case IPPROTO_AH: {
-			struct ip_auth_hdr *hp = (void *)(skb->data + off);
+			struct ip_auth_hdr *hp;
+
+			err = maybe_pull_tail(skb,
+					      off +
+					      sizeof(struct ip_auth_hdr),
+					      MAX_IPV6_HDR_LEN);
+			if (err < 0)
+				goto out;
+
+			hp = OPT_HDR(struct ip_auth_hdr, skb, off);
+			nexthdr = hp->nexthdr;
+			off += ipv6_authlen(hp);
+			break;
+		}
+		case IPPROTO_FRAGMENT: {
+			struct frag_hdr *hp;
 
-			header_size = skb->network_header +
-				off +
-				sizeof(struct ip_auth_hdr);
-			maybe_pull_tail(skb, header_size);
+			err = maybe_pull_tail(skb,
+					      off +
+					      sizeof(struct frag_hdr),
+					      MAX_IPV6_HDR_LEN);
+			if (err < 0)
+				goto out;
+
+			hp = OPT_HDR(struct frag_hdr, skb, off);
+
+			if (hp->frag_off & htons(IP6_OFFSET | IP6_MF))
+				fragment = true;
 
 			nexthdr = hp->nexthdr;
-			off += (hp->hdrlen+2)<<2;
+			off += sizeof(struct frag_hdr);
 			break;
 		}
-		case IPPROTO_FRAGMENT:
-			fragment = true;
-			/* fall through */
 		default:
 			done = true;
 			break;
 		}
 	}
 
-	if (!done) {
-		if (net_ratelimit())
-			netdev_err(vif->dev, "Failed to parse packet header\n");
-		goto out;
-	}
+	err = -EPROTO;
 
-	if (fragment) {
-		if (net_ratelimit())
-			netdev_err(vif->dev, "Packet is a fragment!\n");
+	if (!done || fragment)
 		goto out;
-	}
 
 	switch (nexthdr) {
 	case IPPROTO_TCP:
@@ -1304,17 +1347,17 @@ static int checksum_setup_ipv6(struct xenvif *vif, struct sk_buff *skb,
 			goto out;
 
 		if (recalculate_partial_csum) {
-			struct tcphdr *tcph = tcp_hdr(skb);
-
-			header_size = skb->network_header +
-				off +
-				sizeof(struct tcphdr);
-			maybe_pull_tail(skb, header_size);
-
-			tcph->check = ~csum_ipv6_magic(&ipv6h->saddr,
-						       &ipv6h->daddr,
-						       skb->len - off,
-						       IPPROTO_TCP, 0);
+			err = maybe_pull_tail(skb,
+					      off + sizeof(struct tcphdr),
+					      MAX_IPV6_HDR_LEN);
+			if (err < 0)
+				goto out;
+
+			tcp_hdr(skb)->check =
+				~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
+						 &ipv6_hdr(skb)->daddr,
+						 skb->len - off,
+						 IPPROTO_TCP, 0);
 		}
 		break;
 	case IPPROTO_UDP:
@@ -1323,25 +1366,20 @@ static int checksum_setup_ipv6(struct xenvif *vif, struct sk_buff *skb,
 			goto out;
 
 		if (recalculate_partial_csum) {
-			struct udphdr *udph = udp_hdr(skb);
-
-			header_size = skb->network_header +
-				off +
-				sizeof(struct udphdr);
-			maybe_pull_tail(skb, header_size);
-
-			udph->check = ~csum_ipv6_magic(&ipv6h->saddr,
-						       &ipv6h->daddr,
-						       skb->len - off,
-						       IPPROTO_UDP, 0);
+			err = maybe_pull_tail(skb,
+					      off + sizeof(struct udphdr),
+					      MAX_IPV6_HDR_LEN);
+			if (err < 0)
+				goto out;
+
+			udp_hdr(skb)->check =
+				~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
+						 &ipv6_hdr(skb)->daddr,
+						 skb->len - off,
+						 IPPROTO_UDP, 0);
 		}
 		break;
 	default:
-		if (net_ratelimit())
-			netdev_err(vif->dev,
-				   "Attempting to checksum a non-TCP/UDP packet, "
-				   "dropping a protocol %d packet\n",
-				   nexthdr);
 		goto out;
 	}
 
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 5d89d1b808a6..c56c350324e4 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -4,6 +4,7 @@
 #include <uapi/linux/ipv6.h>
 
 #define ipv6_optlen(p)  (((p)->hdrlen+1) << 3)
+#define ipv6_authlen(p) (((p)->hdrlen+2) << 2)
 /*
  * This structure contains configuration options per IPv6 link.
  */
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index eb198acaac1d..488316e339a1 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -110,7 +110,8 @@ struct frag_hdr {
 	__be32	identification;
 };
 
-#define	IP6_MF	0x0001
+#define	IP6_MF		0x0001
+#define	IP6_OFFSET	0xFFF8
 
 #include <net/sock.h>
 
-- 
cgit v1.2.3


From 4fc9bbf98fd66f879e628d8537ba7c240be2b58e Mon Sep 17 00:00:00 2001
From: Khalid Aziz <khalid.aziz@oracle.com>
Date: Wed, 27 Nov 2013 15:19:25 -0700
Subject: PCI: Disable Bus Master only on kexec reboot

Add a flag to tell the PCI subsystem that kernel is shutting down in
preparation to kexec a kernel.  Add code in PCI subsystem to use this flag
to clear Bus Master bit on PCI devices only in case of kexec reboot.

This fixes a power-off problem on Acer Aspire V5-573G and likely other
machines and avoids any other issues caused by clearing Bus Master bit on
PCI devices in normal shutdown path.  The problem was introduced by
b566a22c2332 ("PCI: disable Bus Master on PCI device shutdown").

This patch is based on discussion at
http://marc.info/?l=linux-pci&m=138425645204355&w=2

Link: https://bugzilla.kernel.org/show_bug.cgi?id=63861
Reported-by: Chang Liu <cl91tp@gmail.com>
Signed-off-by: Khalid Aziz <khalid.aziz@oracle.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Konstantin Khlebnikov <koct9i@gmail.com>
Cc: stable@vger.kernel.org	# v3.5+
---
 drivers/pci/pci-driver.c | 12 +++++++++---
 include/linux/kexec.h    |  3 +++
 kernel/kexec.c           |  4 ++++
 3 files changed, 16 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index 7edd5c307446..25f0bc659164 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -19,6 +19,7 @@
 #include <linux/cpu.h>
 #include <linux/pm_runtime.h>
 #include <linux/suspend.h>
+#include <linux/kexec.h>
 #include "pci.h"
 
 struct pci_dynid {
@@ -415,12 +416,17 @@ static void pci_device_shutdown(struct device *dev)
 	pci_msi_shutdown(pci_dev);
 	pci_msix_shutdown(pci_dev);
 
+#ifdef CONFIG_KEXEC
 	/*
-	 * Turn off Bus Master bit on the device to tell it to not
-	 * continue to do DMA. Don't touch devices in D3cold or unknown states.
+	 * If this is a kexec reboot, turn off Bus Master bit on the
+	 * device to tell it to not continue to do DMA. Don't touch
+	 * devices in D3cold or unknown states.
+	 * If it is not a kexec reboot, firmware will hit the PCI
+	 * devices with big hammer and stop their DMA any way.
 	 */
-	if (pci_dev->current_state <= PCI_D3hot)
+	if (kexec_in_progress && (pci_dev->current_state <= PCI_D3hot))
 		pci_clear_master(pci_dev);
+#endif
 }
 
 #ifdef CONFIG_PM
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index d78d28a733b1..5fd33dc1fe3a 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -198,6 +198,9 @@ extern u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4];
 extern size_t vmcoreinfo_size;
 extern size_t vmcoreinfo_max_size;
 
+/* flag to track if kexec reboot is in progress */
+extern bool kexec_in_progress;
+
 int __init parse_crashkernel(char *cmdline, unsigned long long system_ram,
 		unsigned long long *crash_size, unsigned long long *crash_base);
 int parse_crashkernel_high(char *cmdline, unsigned long long system_ram,
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 490afc03627e..d0d8fca54065 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -47,6 +47,9 @@ u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4];
 size_t vmcoreinfo_size;
 size_t vmcoreinfo_max_size = sizeof(vmcoreinfo_data);
 
+/* Flag to indicate we are going to kexec a new kernel */
+bool kexec_in_progress = false;
+
 /* Location of the reserved area for the crash kernel */
 struct resource crashk_res = {
 	.name  = "Crash kernel",
@@ -1675,6 +1678,7 @@ int kernel_kexec(void)
 	} else
 #endif
 	{
+		kexec_in_progress = true;
 		kernel_restart_prepare(NULL);
 		printk(KERN_EMERG "Starting new kernel\n");
 		machine_shutdown();
-- 
cgit v1.2.3


From 12205a4b79bef56ef618a4b7caa840c5c971cff2 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Sun, 8 Dec 2013 01:04:17 +0100
Subject: Revert "cpufreq: suspend governors on system suspend/hibernate"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit 5a87182aa21d (cpufreq: suspend governors on system
suspend/hibernate) causes hibernation problems to happen on
Bjørn Mork's and Paul Bolle's systems, so revert it.

Fixes: 5a87182aa21d (cpufreq: suspend governors on system suspend/hibernate)
Reported-by: Bjørn Mork <bjorn@mork.no>
Reported-by: Paul Bolle <pebolle@tiscali.nl>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/main.c |  3 ---
 drivers/cpufreq/cpufreq.c | 43 -------------------------------------------
 include/linux/cpufreq.h   |  8 --------
 3 files changed, 54 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index e3219dfd736c..1b41fca3d65a 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -29,7 +29,6 @@
 #include <linux/async.h>
 #include <linux/suspend.h>
 #include <trace/events/power.h>
-#include <linux/cpufreq.h>
 #include <linux/cpuidle.h>
 #include <linux/timer.h>
 
@@ -541,7 +540,6 @@ static void dpm_resume_noirq(pm_message_t state)
 	dpm_show_time(starttime, state, "noirq");
 	resume_device_irqs();
 	cpuidle_resume();
-	cpufreq_resume();
 }
 
 /**
@@ -957,7 +955,6 @@ static int dpm_suspend_noirq(pm_message_t state)
 	ktime_t starttime = ktime_get();
 	int error = 0;
 
-	cpufreq_suspend();
 	cpuidle_pause();
 	suspend_device_irqs();
 	mutex_lock(&dpm_list_mtx);
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 81e9d4412db8..b7c3b877da44 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -26,7 +26,6 @@
 #include <linux/module.h>
 #include <linux/mutex.h>
 #include <linux/slab.h>
-#include <linux/suspend.h>
 #include <linux/syscore_ops.h>
 #include <linux/tick.h>
 #include <trace/events/power.h>
@@ -48,9 +47,6 @@ static LIST_HEAD(cpufreq_policy_list);
 static DEFINE_PER_CPU(char[CPUFREQ_NAME_LEN], cpufreq_cpu_governor);
 #endif
 
-/* Flag to suspend/resume CPUFreq governors */
-static bool cpufreq_suspended;
-
 static inline bool has_target(void)
 {
 	return cpufreq_driver->target_index || cpufreq_driver->target;
@@ -1466,41 +1462,6 @@ static struct subsys_interface cpufreq_interface = {
 	.remove_dev	= cpufreq_remove_dev,
 };
 
-void cpufreq_suspend(void)
-{
-	struct cpufreq_policy *policy;
-
-	if (!has_target())
-		return;
-
-	pr_debug("%s: Suspending Governors\n", __func__);
-
-	list_for_each_entry(policy, &cpufreq_policy_list, policy_list)
-		if (__cpufreq_governor(policy, CPUFREQ_GOV_STOP))
-			pr_err("%s: Failed to stop governor for policy: %p\n",
-				__func__, policy);
-
-	cpufreq_suspended = true;
-}
-
-void cpufreq_resume(void)
-{
-	struct cpufreq_policy *policy;
-
-	if (!has_target())
-		return;
-
-	pr_debug("%s: Resuming Governors\n", __func__);
-
-	cpufreq_suspended = false;
-
-	list_for_each_entry(policy, &cpufreq_policy_list, policy_list)
-		if (__cpufreq_governor(policy, CPUFREQ_GOV_START)
-		    || __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS))
-			pr_err("%s: Failed to start governor for policy: %p\n",
-				__func__, policy);
-}
-
 /**
  * cpufreq_bp_suspend - Prepare the boot CPU for system suspend.
  *
@@ -1803,10 +1764,6 @@ static int __cpufreq_governor(struct cpufreq_policy *policy,
 	struct cpufreq_governor *gov = NULL;
 #endif
 
-	/* Don't start any governor operations if we are entering suspend */
-	if (cpufreq_suspended)
-		return 0;
-
 	if (policy->governor->max_transition_latency &&
 	    policy->cpuinfo.transition_latency >
 	    policy->governor->max_transition_latency) {
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index ee5fe9d77ae8..dc196bbcf227 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -280,14 +280,6 @@ cpufreq_verify_within_cpu_limits(struct cpufreq_policy *policy)
 			policy->cpuinfo.max_freq);
 }
 
-#ifdef CONFIG_CPU_FREQ
-void cpufreq_suspend(void);
-void cpufreq_resume(void);
-#else
-static inline void cpufreq_suspend(void) {}
-static inline void cpufreq_resume(void) {}
-#endif
-
 /*********************************************************************
  *                     CPUFREQ NOTIFIER INTERFACE                    *
  *********************************************************************/
-- 
cgit v1.2.3


From 503cf95c061a0551eb684da364509297efbe55d9 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@linux.intel.com>
Date: Tue, 10 Dec 2013 14:56:06 -0800
Subject: x86, build, icc: Remove uninitialized_var() from compiler-intel.h

When compiling with icc, <linux/compiler-gcc.h> ends up included
because the icc environment defines __GNUC__.  Thus, we neither need
nor want to have this macro defined in both compiler-gcc.h and
compiler-intel.h, and the fact that they are inconsistent just makes
the compiler spew warnings.

Reported-by: Sunil K. Pandey <sunil.k.pandey@intel.com>
Cc: Kevin B. Smith <kevin.b.smith@intel.com>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Link: http://lkml.kernel.org/n/tip-0mbwou1zt7pafij09b897lg3@git.kernel.org
Cc: <stable@vger.kernel.org>
---
 include/linux/compiler-intel.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compiler-intel.h b/include/linux/compiler-intel.h
index 973ce10c40b6..dc1bd3dcf11f 100644
--- a/include/linux/compiler-intel.h
+++ b/include/linux/compiler-intel.h
@@ -28,8 +28,6 @@
 
 #endif
 
-#define uninitialized_var(x) x
-
 #ifndef __HAVE_BUILTIN_BSWAP16__
 /* icc has this, but it's called _bswap16 */
 #define __HAVE_BUILTIN_BSWAP16__
-- 
cgit v1.2.3


From 12663bfc97c8b3fdb292428105dd92d563164050 Mon Sep 17 00:00:00 2001
From: Sasha Levin <sasha.levin@oracle.com>
Date: Sat, 7 Dec 2013 17:26:27 -0500
Subject: net: unix: allow set_peek_off to fail

unix_dgram_recvmsg() will hold the readlock of the socket until recv
is complete.

In the same time, we may try to setsockopt(SO_PEEK_OFF) which will hang until
unix_dgram_recvmsg() will complete (which can take a while) without allowing
us to break out of it, triggering a hung task spew.

Instead, allow set_peek_off to fail, this way userspace will not hang.

Signed-off-by: Sasha Levin <sasha.levin@oracle.com>
Acked-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/net.h | 2 +-
 net/core/sock.c     | 2 +-
 net/unix/af_unix.c  | 8 ++++++--
 3 files changed, 8 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/net.h b/include/linux/net.h
index 4bcee94cef93..69be3e6079c8 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -181,7 +181,7 @@ struct proto_ops {
 				      int offset, size_t size, int flags);
 	ssize_t 	(*splice_read)(struct socket *sock,  loff_t *ppos,
 				       struct pipe_inode_info *pipe, size_t len, unsigned int flags);
-	void		(*set_peek_off)(struct sock *sk, int val);
+	int		(*set_peek_off)(struct sock *sk, int val);
 };
 
 #define DECLARE_SOCKADDR(type, dst, src)	\
diff --git a/net/core/sock.c b/net/core/sock.c
index ab20ed9b0f31..5393b4b719d7 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -882,7 +882,7 @@ set_rcvbuf:
 
 	case SO_PEEK_OFF:
 		if (sock->ops->set_peek_off)
-			sock->ops->set_peek_off(sk, val);
+			ret = sock->ops->set_peek_off(sk, val);
 		else
 			ret = -EOPNOTSUPP;
 		break;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 01625ccc3ae6..a0ca162e5bd5 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -530,13 +530,17 @@ static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *,
 static int unix_seqpacket_recvmsg(struct kiocb *, struct socket *,
 				  struct msghdr *, size_t, int);
 
-static void unix_set_peek_off(struct sock *sk, int val)
+static int unix_set_peek_off(struct sock *sk, int val)
 {
 	struct unix_sock *u = unix_sk(sk);
 
-	mutex_lock(&u->readlock);
+	if (mutex_lock_interruptible(&u->readlock))
+		return -EINTR;
+
 	sk->sk_peek_off = val;
 	mutex_unlock(&u->readlock);
+
+	return 0;
 }
 
 
-- 
cgit v1.2.3


From 4bd7b5127bd02c12c1cc837a7a0b6ce295eb2505 Mon Sep 17 00:00:00 2001
From: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Date: Tue, 10 Dec 2013 02:20:41 +0300
Subject: micrel: add support for KSZ8041RNLI

Renesas R-Car development boards use KSZ8041RNLI PHY which for some reason has
ID of 0x00221537 that is not documented for KSZ8041-family PHYs and does not
match the  documented ID of  0x0022151x (where 'x' is the revision).  We have
to add the new #define PHY_ID_* and new ksphy_driver[] entry, almost the same
as KSZ8041 one, differing only in the 'phy_id' and 'name' fields.

Signed-off-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Tested-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/micrel.c   | 15 +++++++++++++++
 include/linux/micrel_phy.h |  2 ++
 2 files changed, 17 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index 3ae28f420868..26fa05a472b4 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -335,6 +335,21 @@ static struct phy_driver ksphy_driver[] = {
 	.suspend	= genphy_suspend,
 	.resume		= genphy_resume,
 	.driver		= { .owner = THIS_MODULE,},
+}, {
+	.phy_id		= PHY_ID_KSZ8041RNLI,
+	.phy_id_mask	= 0x00fffff0,
+	.name		= "Micrel KSZ8041RNLI",
+	.features	= PHY_BASIC_FEATURES |
+			  SUPPORTED_Pause | SUPPORTED_Asym_Pause,
+	.flags		= PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
+	.config_init	= kszphy_config_init,
+	.config_aneg	= genphy_config_aneg,
+	.read_status	= genphy_read_status,
+	.ack_interrupt	= kszphy_ack_interrupt,
+	.config_intr	= kszphy_config_intr,
+	.suspend	= genphy_suspend,
+	.resume		= genphy_resume,
+	.driver		= { .owner = THIS_MODULE,},
 }, {
 	.phy_id		= PHY_ID_KSZ8051,
 	.phy_id_mask	= 0x00fffff0,
diff --git a/include/linux/micrel_phy.h b/include/linux/micrel_phy.h
index ad05ce60c1c9..2e5b194b9b19 100644
--- a/include/linux/micrel_phy.h
+++ b/include/linux/micrel_phy.h
@@ -22,6 +22,8 @@
 #define PHY_ID_KSZ8021		0x00221555
 #define PHY_ID_KSZ8031		0x00221556
 #define PHY_ID_KSZ8041		0x00221510
+/* undocumented */
+#define PHY_ID_KSZ8041RNLI	0x00221537
 #define PHY_ID_KSZ8051		0x00221550
 /* same id: ks8001 Rev. A/B, and ks8721 Rev 3. */
 #define PHY_ID_KSZ8001		0x0022161A
-- 
cgit v1.2.3


From a5c21dcefa1c3d759457a604b3cfc4af29c8713f Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Thu, 12 Dec 2013 17:40:21 +0000
Subject: dcache: allow word-at-a-time name hashing with big-endian CPUs

When explicitly hashing the end of a string with the word-at-a-time
interface, we have to be careful which end of the word we pick up.

On big-endian CPUs, the upper-bits will contain the data we're after, so
ensure we generate our masks accordingly (and avoid hashing whatever
random junk may have been sitting after the string).

This patch adds a new dcache helper, bytemask_from_count, which creates
a mask appropriate for the CPU endianness.

Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/dcache.c            | 2 +-
 fs/namei.c             | 7 +------
 include/linux/dcache.h | 2 ++
 3 files changed, 4 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dcache.c b/fs/dcache.c
index 4bdb300b16e2..6055d61811d3 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -192,7 +192,7 @@ static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char
 		if (!tcount)
 			return 0;
 	}
-	mask = ~(~0ul << tcount*8);
+	mask = bytemask_from_count(tcount);
 	return unlikely(!!((a ^ b) & mask));
 }
 
diff --git a/fs/namei.c b/fs/namei.c
index c53d3a9547f9..3531deebad30 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1598,11 +1598,6 @@ static inline int nested_symlink(struct path *path, struct nameidata *nd)
  *   do a "get_unaligned()" if this helps and is sufficiently
  *   fast.
  *
- * - Little-endian machines (so that we can generate the mask
- *   of low bytes efficiently). Again, we *could* do a byte
- *   swapping load on big-endian architectures if that is not
- *   expensive enough to make the optimization worthless.
- *
  * - non-CONFIG_DEBUG_PAGEALLOC configurations (so that we
  *   do not trap on the (extremely unlikely) case of a page
  *   crossing operation.
@@ -1646,7 +1641,7 @@ unsigned int full_name_hash(const unsigned char *name, unsigned int len)
 		if (!len)
 			goto done;
 	}
-	mask = ~(~0ul << len*8);
+	mask = bytemask_from_count(len);
 	hash += mask & a;
 done:
 	return fold_hash(hash);
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 57e87e749a48..bf72e9ac6de0 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -29,8 +29,10 @@ struct vfsmount;
 /* The hash is always the low bits of hash_len */
 #ifdef __LITTLE_ENDIAN
  #define HASH_LEN_DECLARE u32 hash; u32 len;
+ #define bytemask_from_count(cnt)	(~(~0ul << (cnt)*8))
 #else
  #define HASH_LEN_DECLARE u32 len; u32 hash;
+ #define bytemask_from_count(cnt)	(~(~0ul >> (cnt)*8))
 #endif
 
 /*
-- 
cgit v1.2.3


From a0a9663dd2146d54339237764e1bfc60e8a39328 Mon Sep 17 00:00:00 2001
From: Sebastian Siewior <bigeasy@linutronix.de>
Date: Thu, 12 Dec 2013 10:15:59 +0100
Subject: net: make neigh_priv_len in struct net_device 16bit instead of 8bit

neigh_priv_len is defined as u8. With all debug enabled struct
ipoib_neigh has 200 bytes. The largest part is sk_buff_head with 96
bytes and here the spinlock with 72 bytes.
The size value still fits in this u8 leaving some room for more.

On -RT struct ipoib_neigh put on weight and has 392 bytes. The main
reason is sk_buff_head with 288 and the fatty here is spinlock with 192
bytes. This does no longer fit into into neigh_priv_len and gcc
complains.

This patch changes neigh_priv_len from being 8bit to 16bit. Since the
following element (dev_id) is 16bit followed by a spinlock which is
aligned, the struct remains with a total size of 3200 (allmodconfig) /
2048 (with as much debug off as possible) bytes on x86-64.
On x86-32 the struct is 1856 (allmodconfig) / 1216 (with as much debug
off as possible) bytes long. The numbers were gained with and without
the patch to prove that this change does not increase the size of the
struct.

Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 7f0ed423a360..d9a550bf3e8e 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1255,7 +1255,7 @@ struct net_device {
 	unsigned char		perm_addr[MAX_ADDR_LEN]; /* permanent hw address */
 	unsigned char		addr_assign_type; /* hw address assignment type */
 	unsigned char		addr_len;	/* hardware address length	*/
-	unsigned char		neigh_priv_len;
+	unsigned short		neigh_priv_len;
 	unsigned short          dev_id;		/* Used to differentiate devices
 						 * that share the same link
 						 * layer address
-- 
cgit v1.2.3


From f40386a4e976acb2bd3e0f9ead11e8e769acbe98 Mon Sep 17 00:00:00 2001
From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Date: Thu, 12 Dec 2013 17:12:19 -0800
Subject: include/linux/hugetlb.h: make isolate_huge_page() an inline

With CONFIG_HUGETLBFS=n:

  mm/migrate.c: In function `do_move_page_to_node_array':
  include/linux/hugetlb.h:140:33: warning: statement with no effect [-Wunused-value]
   #define isolate_huge_page(p, l) false
                                   ^
  mm/migrate.c:1170:4: note: in expansion of macro `isolate_huge_page'
      isolate_huge_page(page, &pagelist);

Reported-by: Borislav Petkov <bp@alien8.de>
Tested-by: Borislav Petkov <bp@alien8.de>
Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Acked-by: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hugetlb.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 9649ff0c63f8..bd7e98752222 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -142,7 +142,10 @@ static inline int dequeue_hwpoisoned_huge_page(struct page *page)
 	return 0;
 }
 
-#define isolate_huge_page(p, l) false
+static inline bool isolate_huge_page(struct page *page, struct list_head *list)
+{
+	return false;
+}
 #define putback_active_hugepage(p)	do {} while (0)
 #define is_hugepage_active(x)	false
 
-- 
cgit v1.2.3


From 386e79066f04850352f144e67edfd5b636c0f95c Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Thu, 12 Dec 2013 17:12:24 -0800
Subject: include/linux/kernel.h: make might_fault() a nop for !MMU

The machine cannot fault if !MUU, so make might_fault() a nop for !MMU.

This fixes below build error if
!CONFIG_MMU && (CONFIG_PROVE_LOCKING=y || CONFIG_DEBUG_ATOMIC_SLEEP=y):

  arch/arm/kernel/built-in.o: In function `arch_ptrace':
  arch/arm/kernel/ptrace.c:852: undefined reference to `might_fault'
  arch/arm/kernel/built-in.o: In function `restore_sigframe':
  arch/arm/kernel/signal.c:173: undefined reference to `might_fault'
  ...
  arch/arm/kernel/built-in.o:arch/arm/kernel/signal.c:177: more undefined references to `might_fault' follow
  make: *** [vmlinux] Error 1

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kernel.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index d4e98d13eff4..ecb87544cc5d 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -193,7 +193,8 @@ extern int _cond_resched(void);
 		(__x < 0) ? -__x : __x;		\
 	})
 
-#if defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_DEBUG_ATOMIC_SLEEP)
+#if defined(CONFIG_MMU) && \
+	(defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_DEBUG_ATOMIC_SLEEP))
 void might_fault(void);
 #else
 static inline void might_fault(void) { }
-- 
cgit v1.2.3


From 3e1e4a5f3a324502c27c4e8808e06ac2ea842360 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Date: Thu, 12 Dec 2013 17:12:31 -0800
Subject: mfd/rtc: s5m: fix register updating by adding regmap for RTC

Rename old regmap field of "struct sec_pmic_dev" to "regmap_pmic" and
add new regmap for RTC.

On S5M8767A registers were not properly updated and read due to usage of
the same regmap as the PMIC.  This could be observed in various hangs,
e.g.  in infinite loop during waiting for UDR field change.

On this chip family the RTC has different I2C address than PMIC so
additional regmap is needed.

Signed-off-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Reviewed-by: Mark Brown <broonie@linaro.org>
Acked-by: Sangbeom Kim <sbkim73@samsung.com>
Cc: Samuel Ortiz <sameo@linux.intel.com>
Cc: Lee Jones <lee.jones@linaro.org>
Cc: Liam Girdwood <lgirdwood@gmail.com>
Cc: Alessandro Zummo <a.zummo@towertech.it>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Kyungmin Park <kyungmin.park@samsung.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/mfd/sec-core.c           | 30 ++++++++++++++++++++++--------
 drivers/mfd/sec-irq.c            |  6 +++---
 drivers/regulator/s5m8767.c      |  2 +-
 drivers/rtc/rtc-s5m.c            |  2 +-
 include/linux/mfd/samsung/core.h |  3 ++-
 5 files changed, 29 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/sec-core.c b/drivers/mfd/sec-core.c
index 34c18fb8c089..54cc25546592 100644
--- a/drivers/mfd/sec-core.c
+++ b/drivers/mfd/sec-core.c
@@ -81,31 +81,31 @@ static struct of_device_id sec_dt_match[] = {
 
 int sec_reg_read(struct sec_pmic_dev *sec_pmic, u8 reg, void *dest)
 {
-	return regmap_read(sec_pmic->regmap, reg, dest);
+	return regmap_read(sec_pmic->regmap_pmic, reg, dest);
 }
 EXPORT_SYMBOL_GPL(sec_reg_read);
 
 int sec_bulk_read(struct sec_pmic_dev *sec_pmic, u8 reg, int count, u8 *buf)
 {
-	return regmap_bulk_read(sec_pmic->regmap, reg, buf, count);
+	return regmap_bulk_read(sec_pmic->regmap_pmic, reg, buf, count);
 }
 EXPORT_SYMBOL_GPL(sec_bulk_read);
 
 int sec_reg_write(struct sec_pmic_dev *sec_pmic, u8 reg, u8 value)
 {
-	return regmap_write(sec_pmic->regmap, reg, value);
+	return regmap_write(sec_pmic->regmap_pmic, reg, value);
 }
 EXPORT_SYMBOL_GPL(sec_reg_write);
 
 int sec_bulk_write(struct sec_pmic_dev *sec_pmic, u8 reg, int count, u8 *buf)
 {
-	return regmap_raw_write(sec_pmic->regmap, reg, buf, count);
+	return regmap_raw_write(sec_pmic->regmap_pmic, reg, buf, count);
 }
 EXPORT_SYMBOL_GPL(sec_bulk_write);
 
 int sec_reg_update(struct sec_pmic_dev *sec_pmic, u8 reg, u8 val, u8 mask)
 {
-	return regmap_update_bits(sec_pmic->regmap, reg, mask, val);
+	return regmap_update_bits(sec_pmic->regmap_pmic, reg, mask, val);
 }
 EXPORT_SYMBOL_GPL(sec_reg_update);
 
@@ -166,6 +166,11 @@ static struct regmap_config s5m8767_regmap_config = {
 	.cache_type = REGCACHE_FLAT,
 };
 
+static const struct regmap_config sec_rtc_regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 8,
+};
+
 #ifdef CONFIG_OF
 /*
  * Only the common platform data elements for s5m8767 are parsed here from the
@@ -266,9 +271,9 @@ static int sec_pmic_probe(struct i2c_client *i2c,
 		break;
 	}
 
-	sec_pmic->regmap = devm_regmap_init_i2c(i2c, regmap);
-	if (IS_ERR(sec_pmic->regmap)) {
-		ret = PTR_ERR(sec_pmic->regmap);
+	sec_pmic->regmap_pmic = devm_regmap_init_i2c(i2c, regmap);
+	if (IS_ERR(sec_pmic->regmap_pmic)) {
+		ret = PTR_ERR(sec_pmic->regmap_pmic);
 		dev_err(&i2c->dev, "Failed to allocate register map: %d\n",
 			ret);
 		return ret;
@@ -277,6 +282,15 @@ static int sec_pmic_probe(struct i2c_client *i2c,
 	sec_pmic->rtc = i2c_new_dummy(i2c->adapter, RTC_I2C_ADDR);
 	i2c_set_clientdata(sec_pmic->rtc, sec_pmic);
 
+	sec_pmic->regmap_rtc = devm_regmap_init_i2c(sec_pmic->rtc,
+			&sec_rtc_regmap_config);
+	if (IS_ERR(sec_pmic->regmap_rtc)) {
+		ret = PTR_ERR(sec_pmic->regmap_rtc);
+		dev_err(&i2c->dev, "Failed to allocate RTC register map: %d\n",
+			ret);
+		return ret;
+	}
+
 	if (pdata && pdata->cfg_pmic_irq)
 		pdata->cfg_pmic_irq();
 
diff --git a/drivers/mfd/sec-irq.c b/drivers/mfd/sec-irq.c
index 0dd84e99081e..b441b1be27cb 100644
--- a/drivers/mfd/sec-irq.c
+++ b/drivers/mfd/sec-irq.c
@@ -280,19 +280,19 @@ int sec_irq_init(struct sec_pmic_dev *sec_pmic)
 
 	switch (type) {
 	case S5M8763X:
-		ret = regmap_add_irq_chip(sec_pmic->regmap, sec_pmic->irq,
+		ret = regmap_add_irq_chip(sec_pmic->regmap_pmic, sec_pmic->irq,
 				  IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
 				  sec_pmic->irq_base, &s5m8763_irq_chip,
 				  &sec_pmic->irq_data);
 		break;
 	case S5M8767X:
-		ret = regmap_add_irq_chip(sec_pmic->regmap, sec_pmic->irq,
+		ret = regmap_add_irq_chip(sec_pmic->regmap_pmic, sec_pmic->irq,
 				  IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
 				  sec_pmic->irq_base, &s5m8767_irq_chip,
 				  &sec_pmic->irq_data);
 		break;
 	case S2MPS11X:
-		ret = regmap_add_irq_chip(sec_pmic->regmap, sec_pmic->irq,
+		ret = regmap_add_irq_chip(sec_pmic->regmap_pmic, sec_pmic->irq,
 				  IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
 				  sec_pmic->irq_base, &s2mps11_irq_chip,
 				  &sec_pmic->irq_data);
diff --git a/drivers/regulator/s5m8767.c b/drivers/regulator/s5m8767.c
index cbf91e25cf7f..aeb40aad0ae7 100644
--- a/drivers/regulator/s5m8767.c
+++ b/drivers/regulator/s5m8767.c
@@ -925,7 +925,7 @@ static int s5m8767_pmic_probe(struct platform_device *pdev)
 		config.dev = s5m8767->dev;
 		config.init_data = pdata->regulators[i].initdata;
 		config.driver_data = s5m8767;
-		config.regmap = iodev->regmap;
+		config.regmap = iodev->regmap_pmic;
 		config.of_node = pdata->regulators[i].reg_node;
 
 		rdev[i] = devm_regulator_register(&pdev->dev, &regulators[id],
diff --git a/drivers/rtc/rtc-s5m.c b/drivers/rtc/rtc-s5m.c
index 0ba56b7cb382..ae8119dc2846 100644
--- a/drivers/rtc/rtc-s5m.c
+++ b/drivers/rtc/rtc-s5m.c
@@ -567,7 +567,7 @@ static int s5m_rtc_probe(struct platform_device *pdev)
 
 	info->dev = &pdev->dev;
 	info->s5m87xx = s5m87xx;
-	info->regmap = s5m87xx->regmap;
+	info->regmap = s5m87xx->regmap_rtc;
 	info->device_type = s5m87xx->device_type;
 	info->wtsr_smpl = s5m87xx->wtsr_smpl;
 
diff --git a/include/linux/mfd/samsung/core.h b/include/linux/mfd/samsung/core.h
index 2d0c9071bcfb..cab2dd279076 100644
--- a/include/linux/mfd/samsung/core.h
+++ b/include/linux/mfd/samsung/core.h
@@ -39,7 +39,8 @@ enum sec_device_type {
 struct sec_pmic_dev {
 	struct device *dev;
 	struct sec_platform_data *pdata;
-	struct regmap *regmap;
+	struct regmap *regmap_pmic;
+	struct regmap *regmap_rtc;
 	struct i2c_client *i2c;
 	struct i2c_client *rtc;
 
-- 
cgit v1.2.3